// compose-filter.h // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Copyright 2005-2010 Google, Inc. // Author: riley@google.com (Michael Riley) // // \file // Classes for filtering the composition matches, e.g. for correct epsilon // handling. #ifndef FST_LIB_COMPOSE_FILTER_H__ #define FST_LIB_COMPOSE_FILTER_H__ #include #include // For optional argument declarations #include namespace fst { // COMPOSITION FILTER STATE - this represents the state of // the composition filter. It has the form: // // class FilterState { // public: // // Required constructors // FilterState(); // FilterState(const FilterState &f); // // An invalid filter state. // static const FilterState NoState(); // // Maps state to integer for hashing. // size_t Hash() const; // // Equality of filter states. // bool operator==(const FilterState &f) const; // // Inequality of filter states. // bool operator!=(const FilterState &f) const; // // Assignment to filter states. // FilterState& operator=(const FilterState& f); // }; // Filter state that is a signed integral type. template class IntegerFilterState { public: IntegerFilterState() : state_(kNoStateId) {} explicit IntegerFilterState(T s) : state_(s) {} static const IntegerFilterState NoState() { return IntegerFilterState(); } size_t Hash() const { return static_cast(state_); } bool operator==(const IntegerFilterState &f) const { return state_ == f.state_; } bool operator!=(const IntegerFilterState &f) const { return state_ != f.state_; } T GetState() const { return state_; } void SetState(T state) { state_ = state; } private: T state_; }; typedef IntegerFilterState CharFilterState; typedef IntegerFilterState ShortFilterState; typedef IntegerFilterState IntFilterState; // Filter state that is a weight (class). template class WeightFilterState { public: WeightFilterState() : weight_(W::Zero()) {} explicit WeightFilterState(W w) : weight_(w) {} static const WeightFilterState NoState() { return WeightFilterState(); } size_t Hash() const { return weight_.Hash(); } bool operator==(const WeightFilterState &f) const { return weight_ == f.weight_; } bool operator!=(const WeightFilterState &f) const { return weight_ != f.weight_; } W GetWeight() const { return weight_; } void SetWeight(W w) { weight_ = w; } private: W weight_; }; // Filter state that is the combination of two filter states. template class PairFilterState { public: PairFilterState() : f1_(F1::NoState()), f2_(F2::NoState()) {} PairFilterState(const F1 &f1, const F2 &f2) : f1_(f1), f2_(f2) {} static const PairFilterState NoState() { return PairFilterState(); } size_t Hash() const { size_t h1 = f1_.Hash(); size_t h2 = f2_.Hash(); const int lshift = 5; const int rshift = CHAR_BIT * sizeof(size_t) - 5; return h1 << lshift ^ h1 >> rshift ^ h2; } bool operator==(const PairFilterState &f) const { return f1_ == f.f1_ && f2_ == f.f2_; } bool operator!=(const PairFilterState &f) const { return f1_ != f.f1_ || f2_ != f.f2_; } const F1 &GetState1() const { return f1_; } const F2 &GetState2() const { return f2_; } void SetState(const F1 &f1, const F2 &f2) { f1_ = f1; f2_ = f2; } private: F1 f1_; F2 f2_; }; // COMPOSITION FILTERS - these determine which matches are allowed to // proceed. The filter's state is represented by the type // ComposeFilter::FilterState. The basic filters handle correct // epsilon matching. Their interface is: // // template // class ComposeFilter { // public: // typedef typename M1::FST1 FST1; // typedef typename M1::FST2 FST2; // typedef typename FST1::Arc Arc; // typedef ... FilterState; // typedef ... Matcher1; // typedef ... Matcher2; // // // Required constructors. // ComposeFilter(const FST1 &fst1, const FST2 &fst2, // // M1 *matcher1 = 0, M2 *matcher2 = 0); // // If safe=true, the copy is thread-safe. See Fst<>::Copy() // // for further doc. // ComposeFilter(const ComposeFilter &filter, // // bool safe = false); // // Return start state of filter. // FilterState Start() const; // // Specifies current composition state. // void SetState(StateId s1, StateId s2, const FilterState &f); // // // Apply filter at current composition state to these transitions. // // If an arc label to be matched is kNolabel, then that side // // does not consume a symbol. Returns the new filter state or, // // if disallowed, FilterState::NoState(). The filter is permitted to // // modify its inputs, e.g. for optimizations. // FilterState FilterArc(Arc *arc1, Arc *arc2) const; // // Apply filter at current composition state to these final weights // // (cf. superfinal transitions). The filter may modify its inputs, // // e.g. for optimizations. // void FilterFinal(Weight *final1, Weight *final2) const; // // // Return resp matchers. Ownership stays with filter. These // // methods allow the filter to access and possibly modify // // the composition matchers (useful e.g. with lookahead). // Matcher1 *GetMatcher1(); // Matcher2 *GetMatcher2(); // // // This specifies how the filter affects the composition result // // properties. It takes as argument the properties that would // // apply with a trivial composition fitler. // uint64 Properties(uint64 props) const; // }; // This filter requires epsilons on FST1 to be read before epsilons on FST2. template class SequenceComposeFilter { public: typedef typename M1::FST FST1; typedef typename M2::FST FST2; typedef typename FST1::Arc Arc; typedef CharFilterState FilterState; typedef M1 Matcher1; typedef M2 Matcher2; typedef typename Arc::StateId StateId; typedef typename Arc::Label Label; typedef typename Arc::Weight Weight; SequenceComposeFilter(const FST1 &fst1, const FST2 &fst2, M1 *matcher1 = 0, M2 *matcher2 = 0) : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)), matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)), fst1_(matcher1_->GetFst()), s1_(kNoStateId), s2_(kNoStateId), f_(kNoStateId) {} SequenceComposeFilter(const SequenceComposeFilter &filter, bool safe = false) : matcher1_(filter.matcher1_->Copy(safe)), matcher2_(filter.matcher2_->Copy(safe)), fst1_(matcher1_->GetFst()), s1_(kNoStateId), s2_(kNoStateId), f_(kNoStateId) {} ~SequenceComposeFilter() { delete matcher1_; delete matcher2_; } FilterState Start() const { return FilterState(0); } void SetState(StateId s1, StateId s2, const FilterState &f) { if (s1_ == s1 && s2_ == s2 && f == f_) return; s1_ = s1; s2_ = s2; f_ = f; size_t na1 = internal::NumArcs(fst1_, s1); size_t ne1 = internal::NumOutputEpsilons(fst1_, s1); bool fin1 = internal::Final(fst1_, s1) != Weight::Zero(); alleps1_ = na1 == ne1 && !fin1; noeps1_ = ne1 == 0; } FilterState FilterArc(Arc *arc1, Arc *arc2) const { if (arc1->olabel == kNoLabel) return alleps1_ ? FilterState::NoState() : noeps1_ ? FilterState(0) : FilterState(1); else if (arc2->ilabel == kNoLabel) return f_ != FilterState(0) ? FilterState::NoState() : FilterState(0); else return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0); } void FilterFinal(Weight *, Weight *) const {} // Return resp matchers. Ownership stays with filter. Matcher1 *GetMatcher1() { return matcher1_; } Matcher2 *GetMatcher2() { return matcher2_; } uint64 Properties(uint64 props) const { return props; } private: Matcher1 *matcher1_; Matcher2 *matcher2_; const FST1 &fst1_; StateId s1_; // Current fst1_ state; StateId s2_; // Current fst2_ state; FilterState f_; // Current filter state bool alleps1_; // Only epsilons (and non-final) leaving s1_? bool noeps1_; // No epsilons leaving s1_? void operator=(const SequenceComposeFilter &); // disallow }; // This filter requires epsilons on FST2 to be read before epsilons on FST1. template class AltSequenceComposeFilter { public: typedef typename M1::FST FST1; typedef typename M2::FST FST2; typedef typename FST1::Arc Arc; typedef CharFilterState FilterState; typedef M1 Matcher1; typedef M2 Matcher2; typedef typename Arc::StateId StateId; typedef typename Arc::Label Label; typedef typename Arc::Weight Weight; AltSequenceComposeFilter(const FST1 &fst1, const FST2 &fst2, M1 *matcher1 = 0, M2 *matcher2 = 0) : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)), matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)), fst2_(matcher2_->GetFst()), s1_(kNoStateId), s2_(kNoStateId), f_(kNoStateId) {} AltSequenceComposeFilter(const AltSequenceComposeFilter &filter, bool safe = false) : matcher1_(filter.matcher1_->Copy(safe)), matcher2_(filter.matcher2_->Copy(safe)), fst2_(matcher2_->GetFst()), s1_(kNoStateId), s2_(kNoStateId), f_(kNoStateId) {} ~AltSequenceComposeFilter() { delete matcher1_; delete matcher2_; } FilterState Start() const { return FilterState(0); } void SetState(StateId s1, StateId s2, const FilterState &f) { if (s1_ == s1 && s2_ == s2 && f == f_) return; s1_ = s1; s2_ = s2; f_ = f; size_t na2 = internal::NumArcs(fst2_, s2); size_t ne2 = internal::NumInputEpsilons(fst2_, s2); bool fin2 = internal::Final(fst2_, s2) != Weight::Zero(); alleps2_ = na2 == ne2 && !fin2; noeps2_ = ne2 == 0; } FilterState FilterArc(Arc *arc1, Arc *arc2) const { if (arc2->ilabel == kNoLabel) return alleps2_ ? FilterState::NoState() : noeps2_ ? FilterState(0) : FilterState(1); else if (arc1->olabel == kNoLabel) return f_ == FilterState(1) ? FilterState::NoState() : FilterState(0); else return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0); } void FilterFinal(Weight *, Weight *) const {} // Return resp matchers. Ownership stays with filter. Matcher1 *GetMatcher1() { return matcher1_; } Matcher2 *GetMatcher2() { return matcher2_; } uint64 Properties(uint64 props) const { return props; } private: Matcher1 *matcher1_; Matcher2 *matcher2_; const FST2 &fst2_; StateId s1_; // Current fst1_ state; StateId s2_; // Current fst2_ state; FilterState f_; // Current filter state bool alleps2_; // Only epsilons (and non-final) leaving s2_? bool noeps2_; // No epsilons leaving s2_? void operator=(const AltSequenceComposeFilter &); // disallow }; // This filter requires epsilons on FST1 to be matched with epsilons on FST2 // whenever possible. template class MatchComposeFilter { public: typedef typename M1::FST FST1; typedef typename M2::FST FST2; typedef typename FST1::Arc Arc; typedef CharFilterState FilterState; typedef M1 Matcher1; typedef M2 Matcher2; typedef typename Arc::StateId StateId; typedef typename Arc::Label Label; typedef typename Arc::Weight Weight; MatchComposeFilter(const FST1 &fst1, const FST2 &fst2, M1 *matcher1 = 0, M2 *matcher2 = 0) : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)), matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)), fst1_(matcher1_->GetFst()), fst2_(matcher2_->GetFst()), s1_(kNoStateId), s2_(kNoStateId), f_(kNoStateId) {} MatchComposeFilter(const MatchComposeFilter &filter, bool safe = false) : matcher1_(filter.matcher1_->Copy(safe)), matcher2_(filter.matcher2_->Copy(safe)), fst1_(matcher1_->GetFst()), fst2_(matcher2_->GetFst()), s1_(kNoStateId), s2_(kNoStateId), f_(kNoStateId) {} ~MatchComposeFilter() { delete matcher1_; delete matcher2_; } FilterState Start() const { return FilterState(0); } void SetState(StateId s1, StateId s2, const FilterState &f) { if (s1_ == s1 && s2_ == s2 && f == f_) return; s1_ = s1; s2_ = s2; f_ = f; size_t na1 = internal::NumArcs(fst1_, s1); size_t ne1 = internal::NumOutputEpsilons(fst1_, s1); bool f1 = internal::Final(fst1_, s1) != Weight::Zero(); alleps1_ = na1 == ne1 && !f1; noeps1_ = ne1 == 0; size_t na2 = internal::NumArcs(fst2_, s2); size_t ne2 = internal::NumInputEpsilons(fst2_, s2); bool f2 = internal::Final(fst2_, s2) != Weight::Zero(); alleps2_ = na2 == ne2 && !f2; noeps2_ = ne2 == 0; } FilterState FilterArc(Arc *arc1, Arc *arc2) const { if (arc2->ilabel == kNoLabel) // Epsilon on Fst1 return f_ == FilterState(0) ? (noeps2_ ? FilterState(0) : (alleps2_ ? FilterState::NoState(): FilterState(1))) : (f_ == FilterState(1) ? FilterState(1) : FilterState::NoState()); else if (arc1->olabel == kNoLabel) // Epsilon on Fst2 return f_ == FilterState(0) ? (noeps1_ ? FilterState(0) : (alleps1_ ? FilterState::NoState() : FilterState(2))) : (f_ == FilterState(2) ? FilterState(2) : FilterState::NoState()); else if (arc1->olabel == 0) // Epsilon on both return f_ == FilterState(0) ? FilterState(0) : FilterState::NoState(); else // Both are non-epsilons return FilterState(0); } void FilterFinal(Weight *, Weight *) const {} // Return resp matchers. Ownership stays with filter. Matcher1 *GetMatcher1() { return matcher1_; } Matcher2 *GetMatcher2() { return matcher2_; } uint64 Properties(uint64 props) const { return props; } private: Matcher1 *matcher1_; Matcher2 *matcher2_; const FST1 &fst1_; const FST2 &fst2_; StateId s1_; // Current fst1_ state; StateId s2_; // Current fst2_ state; FilterState f_; // Current filter state ID bool alleps1_, alleps2_; // Only epsilons (and non-final) leaving s1, s2? bool noeps1_, noeps2_; // No epsilons leaving s1, s2? void operator=(const MatchComposeFilter &); // disallow }; // This filter works with the MultiEpsMatcher to determine if // 'multi-epsilons' are preserved in the composition output // (rather than rewritten as 0) and ensures correct properties. template class MultiEpsFilter { public: typedef typename F::FST1 FST1; typedef typename F::FST2 FST2; typedef typename F::Arc Arc; typedef typename F::Matcher1 Matcher1; typedef typename F::Matcher2 Matcher2; typedef typename F::FilterState FilterState; typedef MultiEpsFilter Filter; typedef typename Arc::StateId StateId; typedef typename Arc::Label Label; typedef typename Arc::Weight Weight; MultiEpsFilter(const FST1 &fst1, const FST2 &fst2, Matcher1 *matcher1 = 0, Matcher2 *matcher2 = 0, bool keep_multi_eps = false) : filter_(fst1, fst2, matcher1, matcher2), keep_multi_eps_(keep_multi_eps) {} MultiEpsFilter(const Filter &filter, bool safe = false) : filter_(filter.filter_, safe), keep_multi_eps_(filter.keep_multi_eps_) {} FilterState Start() const { return filter_.Start(); } void SetState(StateId s1, StateId s2, const FilterState &f) { return filter_.SetState(s1, s2, f); } FilterState FilterArc(Arc *arc1, Arc *arc2) const { FilterState f = filter_.FilterArc(arc1, arc2); if (keep_multi_eps_) { if (arc1->olabel == kNoLabel) arc1->ilabel = arc2->ilabel; if (arc2->ilabel == kNoLabel) arc2->olabel = arc1->olabel; } return f; } void FilterFinal(Weight *w1, Weight *w2) const { return filter_.FilterFinal(w1, w2); } // Return resp matchers. Ownership stays with filter. Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); } Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); } uint64 Properties(uint64 iprops) const { uint64 oprops = filter_.Properties(iprops); return oprops & kILabelInvariantProperties & kOLabelInvariantProperties; } private: F filter_; bool keep_multi_eps_; }; } // namespace fst #endif // FST_LIB_COMPOSE_FILTER_H__