// state-table.h // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Copyright 2005-2010 Google, Inc. // Author: riley@google.com (Michael Riley) // // \file // Classes for representing the mapping between state tuples and state Ids. #ifndef FST_LIB_STATE_TABLE_H__ #define FST_LIB_STATE_TABLE_H__ #include using std::deque; #include using std::vector; #include #include namespace fst { // STATE TABLES - these determine the bijective mapping between state // tuples (e.g. in composition triples of two FST states and a // composition filter state) and their corresponding state IDs. // They are classes, templated on state tuples, of the form: // // template // class StateTable { // public: // typedef typename T StateTuple; // // // Required constructors. // StateTable(); // // // Lookup state ID by tuple. If it doesn't exist, then add it. // StateId FindState(const StateTuple &); // // Lookup state tuple by state ID. // const StateTuple &Tuple(StateId) const; // // # of stored tuples. // StateId Size() const; // }; // // A state tuple has the form: // // template // struct StateTuple { // typedef typename S StateId; // // // Required constructors. // StateTuple(); // StateTuple(const StateTuple &); // }; // An implementation using a hash map for the tuple to state ID mapping. // The state tuple T must have == defined. H is the hash function. template class HashStateTable : public HashBiTable { public: typedef T StateTuple; typedef typename StateTuple::StateId StateId; using HashBiTable::FindId; using HashBiTable::FindEntry; using HashBiTable::Size; HashStateTable() : HashBiTable() {} // Reserves space for table_size elements. explicit HashStateTable(size_t table_size) : HashBiTable(table_size) {} StateId FindState(const StateTuple &tuple) { return FindId(tuple); } const StateTuple &Tuple(StateId s) const { return FindEntry(s); } }; // An implementation using a hash map for the tuple to state ID mapping. // The state tuple T must have == defined. H is the hash function. template class CompactHashStateTable : public CompactHashBiTable { public: typedef T StateTuple; typedef typename StateTuple::StateId StateId; using CompactHashBiTable::FindId; using CompactHashBiTable::FindEntry; using CompactHashBiTable::Size; CompactHashStateTable() : CompactHashBiTable() {} // Reserves space for 'table_size' elements. explicit CompactHashStateTable(size_t table_size) : CompactHashBiTable(table_size) {} StateId FindState(const StateTuple &tuple) { return FindId(tuple); } const StateTuple &Tuple(StateId s) const { return FindEntry(s); } }; // An implementation using a vector for the tuple to state mapping. // It is passed a function object FP that should fingerprint tuples // uniquely to an integer that can used as a vector index. Normally, // VectorStateTable constructs the FP object. The user can instead // pass in this object; in that case, VectorStateTable takes its // ownership. template class VectorStateTable : public VectorBiTable { public: typedef T StateTuple; typedef typename StateTuple::StateId StateId; using VectorBiTable::FindId; using VectorBiTable::FindEntry; using VectorBiTable::Size; using VectorBiTable::Fingerprint; // Reserves space for 'table_size' elements. explicit VectorStateTable(FP *fp = 0, size_t table_size = 0) : VectorBiTable(fp, table_size) {} StateId FindState(const StateTuple &tuple) { return FindId(tuple); } const StateTuple &Tuple(StateId s) const { return FindEntry(s); } }; // An implementation using a vector and a compact hash table. The // selecting functor S returns true for tuples to be hashed in the // vector. The fingerprinting functor FP returns a unique fingerprint // for each tuple to be hashed in the vector (these need to be // suitable for indexing in a vector). The hash functor H is used when // hashing tuple into the compact hash table. template class VectorHashStateTable : public VectorHashBiTable { public: typedef T StateTuple; typedef typename StateTuple::StateId StateId; using VectorHashBiTable::FindId; using VectorHashBiTable::FindEntry; using VectorHashBiTable::Size; using VectorHashBiTable::Selector; using VectorHashBiTable::Fingerprint; using VectorHashBiTable::Hash; VectorHashStateTable(S *s, FP *fp, H *h, size_t vector_size = 0, size_t tuple_size = 0) : VectorHashBiTable( s, fp, h, vector_size, tuple_size) {} StateId FindState(const StateTuple &tuple) { return FindId(tuple); } const StateTuple &Tuple(StateId s) const { return FindEntry(s); } }; // An implementation using a hash map for the tuple to state ID // mapping. This version permits erasing of states. The state tuple T // must have == defined and its default constructor must produce a // tuple that will never be seen. F is the hash function. template class ErasableStateTable : public ErasableBiTable { public: typedef T StateTuple; typedef typename StateTuple::StateId StateId; using ErasableBiTable::FindId; using ErasableBiTable::FindEntry; using ErasableBiTable::Size; using ErasableBiTable::Erase; ErasableStateTable() : ErasableBiTable() {} StateId FindState(const StateTuple &tuple) { return FindId(tuple); } const StateTuple &Tuple(StateId s) const { return FindEntry(s); } }; // // COMPOSITION STATE TUPLES AND TABLES // // The composition state table has the form: // // template // class ComposeStateTable { // public: // typedef A Arc; // typedef F FilterState; // typedef typename A::StateId StateId; // typedef ComposeStateTuple StateTuple; // // // Required constructors. Copy constructor does not copy state. // ComposeStateTable(const Fst &fst1, const Fst &fst2); // ComposeStateTable(const ComposeStateTable &table); // // Lookup state ID by tuple. If it doesn't exist, then add it. // StateId FindState(const StateTuple &); // // Lookup state tuple by state ID. // const StateTuple &Tuple(StateId) const; // // # of stored tuples. // StateId Size() const; // // Return true if error encountered // bool Error() const; // }; // Represents the composition state. template struct ComposeStateTuple { typedef S StateId; typedef F FilterState; ComposeStateTuple() : state_id1(kNoStateId), state_id2(kNoStateId), filter_state(FilterState::NoState()) {} ComposeStateTuple(StateId s1, StateId s2, const FilterState &f) : state_id1(s1), state_id2(s2), filter_state(f) {} StateId state_id1; // State Id on fst1 StateId state_id2; // State Id on fst2 FilterState filter_state; // State of composition filter }; // Equality of composition state tuples. template inline bool operator==(const ComposeStateTuple& x, const ComposeStateTuple& y) { if (&x == &y) return true; return x.state_id1 == y.state_id1 && x.state_id2 == y.state_id2 && x.filter_state == y.filter_state; } // Hashing of composition state tuples. template class ComposeHash { public: size_t operator()(const ComposeStateTuple& t) const { return t.state_id1 + t.state_id2 * kPrime0 + t.filter_state.Hash() * kPrime1; } private: static const size_t kPrime0; static const size_t kPrime1; }; template const size_t ComposeHash::kPrime0 = 7853; template const size_t ComposeHash::kPrime1 = 7867; // A HashStateTable over composition tuples. template , ComposeHash > > class GenericComposeStateTable : public H { public: typedef A Arc; typedef typename A::StateId StateId; typedef F FilterState; typedef ComposeStateTuple StateTuple; GenericComposeStateTable(const Fst &fst1, const Fst &fst2) {} // Reserves space for 'table_size' elements. GenericComposeStateTable(const Fst &fst1, const Fst &fst2, size_t table_size) : H(table_size) {} bool Error() const { return false; } private: void operator=(const GenericComposeStateTable &table); // disallow }; // Fingerprint for general composition tuples. template class ComposeFingerprint { public: typedef S StateId; typedef F FilterState; typedef ComposeStateTuple StateTuple; // Required but suboptimal constructor. ComposeFingerprint() : mult1_(8192), mult2_(8192) { LOG(WARNING) << "TupleFingerprint: # of FST states should be provided."; } // Constructor is provided the sizes of the input FSTs ComposeFingerprint(StateId nstates1, StateId nstates2) : mult1_(nstates1), mult2_(nstates1 * nstates2) { } size_t operator()(const StateTuple &tuple) { return tuple.state_id1 + tuple.state_id2 * mult1_ + tuple.filter_state.Hash() * mult2_; } private: ssize_t mult1_; ssize_t mult2_; }; // Useful when the first composition state determines the tuple. template class ComposeState1Fingerprint { public: typedef S StateId; typedef F FilterState; typedef ComposeStateTuple StateTuple; size_t operator()(const StateTuple &tuple) { return tuple.state_id1; } }; // Useful when the second composition state determines the tuple. template class ComposeState2Fingerprint { public: typedef S StateId; typedef F FilterState; typedef ComposeStateTuple StateTuple; size_t operator()(const StateTuple &tuple) { return tuple.state_id2; } }; // A VectorStateTable over composition tuples. This can be used when // the product of number of states in FST1 and FST2 (and the // composition filter state hash) is manageable. If the FSTs are not // expanded Fsts, they will first have their states counted. template class ProductComposeStateTable : public VectorStateTable, ComposeFingerprint > { public: typedef A Arc; typedef typename A::StateId StateId; typedef F FilterState; typedef ComposeStateTuple StateTuple; typedef VectorStateTable > StateTable; // Reserves space for 'table_size' elements. ProductComposeStateTable(const Fst &fst1, const Fst &fst2, size_t table_size = 0) : StateTable(new ComposeFingerprint(CountStates(fst1), CountStates(fst2)), table_size) {} ProductComposeStateTable(const ProductComposeStateTable &table) : StateTable(new ComposeFingerprint(table.Fingerprint())) {} bool Error() const { return false; } private: void operator=(const ProductComposeStateTable &table); // disallow }; // A VectorStateTable over composition tuples. This can be used when // FST1 is a string (satisfies kStringProperties) and FST2 is // epsilon-free and deterministic. It should be used with a // composition filter that creates at most one filter state per tuple // under these conditions (e.g. SequenceComposeFilter or // MatchComposeFilter). template class StringDetComposeStateTable : public VectorStateTable, ComposeState1Fingerprint > { public: typedef A Arc; typedef typename A::StateId StateId; typedef F FilterState; typedef ComposeStateTuple StateTuple; typedef VectorStateTable > StateTable; StringDetComposeStateTable(const Fst &fst1, const Fst &fst2) : error_(false) { uint64 props1 = kString; uint64 props2 = kIDeterministic | kNoIEpsilons; if (fst1.Properties(props1, true) != props1 || fst2.Properties(props2, true) != props2) { FSTERROR() << "StringDetComposeStateTable: fst1 not a string or" << " fst2 not input deterministic and epsilon-free"; error_ = true; } } StringDetComposeStateTable(const StringDetComposeStateTable &table) : StateTable(table), error_(table.error_) {} bool Error() const { return error_; } private: bool error_; void operator=(const StringDetComposeStateTable &table); // disallow }; // A VectorStateTable over composition tuples. This can be used when // FST2 is a string (satisfies kStringProperties) and FST1 is // epsilon-free and deterministic. It should be used with a // composition filter that creates at most one filter state per tuple // under these conditions (e.g. SequenceComposeFilter or // MatchComposeFilter). template class DetStringComposeStateTable : public VectorStateTable, ComposeState2Fingerprint > { public: typedef A Arc; typedef typename A::StateId StateId; typedef F FilterState; typedef ComposeStateTuple StateTuple; typedef VectorStateTable > StateTable; DetStringComposeStateTable(const Fst &fst1, const Fst &fst2) :error_(false) { uint64 props1 = kODeterministic | kNoOEpsilons; uint64 props2 = kString; if (fst1.Properties(props1, true) != props1 || fst2.Properties(props2, true) != props2) { FSTERROR() << "StringDetComposeStateTable: fst2 not a string or" << " fst1 not output deterministic and epsilon-free"; error_ = true; } } DetStringComposeStateTable(const DetStringComposeStateTable &table) : StateTable(table), error_(table.error_) {} bool Error() const { return error_; } private: bool error_; void operator=(const DetStringComposeStateTable &table); // disallow }; // An ErasableStateTable over composition tuples. The Erase(StateId) method // can be called if the user either is sure that composition will never return // to that tuple or doesn't care that if it does, it is assigned a new // state ID. template class ErasableComposeStateTable : public ErasableStateTable, ComposeHash > { public: typedef A Arc; typedef typename A::StateId StateId; typedef F FilterState; typedef ComposeStateTuple StateTuple; ErasableComposeStateTable(const Fst &fst1, const Fst &fst2) {} bool Error() const { return false; } private: void operator=(const ErasableComposeStateTable &table); // disallow }; } // namespace fst #endif // FST_LIB_STATE_TABLE_H__