From 96a32415ab43377cf1575bd3f4f2980f58028209 Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 14 Aug 2015 11:51:42 +0800 Subject: add implementation for kaldi io (by ymz) --- .../src/tools/openfst/include/fst/state-table.h | 481 +++++++++++++++++++++ 1 file changed, 481 insertions(+) create mode 100644 kaldi_io/src/tools/openfst/include/fst/state-table.h (limited to 'kaldi_io/src/tools/openfst/include/fst/state-table.h') diff --git a/kaldi_io/src/tools/openfst/include/fst/state-table.h b/kaldi_io/src/tools/openfst/include/fst/state-table.h new file mode 100644 index 0000000..d8107a1 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/state-table.h @@ -0,0 +1,481 @@ +// state-table.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: riley@google.com (Michael Riley) +// +// \file +// Classes for representing the mapping between state tuples and state Ids. + +#ifndef FST_LIB_STATE_TABLE_H__ +#define FST_LIB_STATE_TABLE_H__ + +#include +using std::deque; +#include +using std::vector; + +#include +#include + + +namespace fst { + +// STATE TABLES - these determine the bijective mapping between state +// tuples (e.g. in composition triples of two FST states and a +// composition filter state) and their corresponding state IDs. +// They are classes, templated on state tuples, of the form: +// +// template +// class StateTable { +// public: +// typedef typename T StateTuple; +// +// // Required constructors. +// StateTable(); +// +// // Lookup state ID by tuple. If it doesn't exist, then add it. +// StateId FindState(const StateTuple &); +// // Lookup state tuple by state ID. +// const StateTuple &Tuple(StateId) const; +// // # of stored tuples. +// StateId Size() const; +// }; +// +// A state tuple has the form: +// +// template +// struct StateTuple { +// typedef typename S StateId; +// +// // Required constructors. +// StateTuple(); +// StateTuple(const StateTuple &); +// }; + + +// An implementation using a hash map for the tuple to state ID mapping. +// The state tuple T must have == defined. H is the hash function. +template +class HashStateTable : public HashBiTable { + public: + typedef T StateTuple; + typedef typename StateTuple::StateId StateId; + using HashBiTable::FindId; + using HashBiTable::FindEntry; + using HashBiTable::Size; + + HashStateTable() : HashBiTable() {} + + // Reserves space for table_size elements. + explicit HashStateTable(size_t table_size) + : HashBiTable(table_size) {} + + StateId FindState(const StateTuple &tuple) { return FindId(tuple); } + const StateTuple &Tuple(StateId s) const { return FindEntry(s); } +}; + + +// An implementation using a hash map for the tuple to state ID mapping. +// The state tuple T must have == defined. H is the hash function. +template +class CompactHashStateTable + : public CompactHashBiTable { + public: + typedef T StateTuple; + typedef typename StateTuple::StateId StateId; + using CompactHashBiTable::FindId; + using CompactHashBiTable::FindEntry; + using CompactHashBiTable::Size; + + CompactHashStateTable() : CompactHashBiTable() {} + + // Reserves space for 'table_size' elements. + explicit CompactHashStateTable(size_t table_size) + : CompactHashBiTable(table_size) {} + + StateId FindState(const StateTuple &tuple) { return FindId(tuple); } + const StateTuple &Tuple(StateId s) const { return FindEntry(s); } +}; + +// An implementation using a vector for the tuple to state mapping. +// It is passed a function object FP that should fingerprint tuples +// uniquely to an integer that can used as a vector index. Normally, +// VectorStateTable constructs the FP object. The user can instead +// pass in this object; in that case, VectorStateTable takes its +// ownership. +template +class VectorStateTable + : public VectorBiTable { + public: + typedef T StateTuple; + typedef typename StateTuple::StateId StateId; + using VectorBiTable::FindId; + using VectorBiTable::FindEntry; + using VectorBiTable::Size; + using VectorBiTable::Fingerprint; + + // Reserves space for 'table_size' elements. + explicit VectorStateTable(FP *fp = 0, size_t table_size = 0) + : VectorBiTable(fp, table_size) {} + + StateId FindState(const StateTuple &tuple) { return FindId(tuple); } + const StateTuple &Tuple(StateId s) const { return FindEntry(s); } +}; + + +// An implementation using a vector and a compact hash table. The +// selecting functor S returns true for tuples to be hashed in the +// vector. The fingerprinting functor FP returns a unique fingerprint +// for each tuple to be hashed in the vector (these need to be +// suitable for indexing in a vector). The hash functor H is used when +// hashing tuple into the compact hash table. +template +class VectorHashStateTable + : public VectorHashBiTable { + public: + typedef T StateTuple; + typedef typename StateTuple::StateId StateId; + using VectorHashBiTable::FindId; + using VectorHashBiTable::FindEntry; + using VectorHashBiTable::Size; + using VectorHashBiTable::Selector; + using VectorHashBiTable::Fingerprint; + using VectorHashBiTable::Hash; + + VectorHashStateTable(S *s, FP *fp, H *h, + size_t vector_size = 0, + size_t tuple_size = 0) + : VectorHashBiTable( + s, fp, h, vector_size, tuple_size) {} + + StateId FindState(const StateTuple &tuple) { return FindId(tuple); } + const StateTuple &Tuple(StateId s) const { return FindEntry(s); } +}; + + +// An implementation using a hash map for the tuple to state ID +// mapping. This version permits erasing of states. The state tuple T +// must have == defined and its default constructor must produce a +// tuple that will never be seen. F is the hash function. +template +class ErasableStateTable : public ErasableBiTable { + public: + typedef T StateTuple; + typedef typename StateTuple::StateId StateId; + using ErasableBiTable::FindId; + using ErasableBiTable::FindEntry; + using ErasableBiTable::Size; + using ErasableBiTable::Erase; + + ErasableStateTable() : ErasableBiTable() {} + StateId FindState(const StateTuple &tuple) { return FindId(tuple); } + const StateTuple &Tuple(StateId s) const { return FindEntry(s); } +}; + +// +// COMPOSITION STATE TUPLES AND TABLES +// +// The composition state table has the form: +// +// template +// class ComposeStateTable { +// public: +// typedef A Arc; +// typedef F FilterState; +// typedef typename A::StateId StateId; +// typedef ComposeStateTuple StateTuple; +// +// // Required constructors. Copy constructor does not copy state. +// ComposeStateTable(const Fst &fst1, const Fst &fst2); +// ComposeStateTable(const ComposeStateTable &table); +// // Lookup state ID by tuple. If it doesn't exist, then add it. +// StateId FindState(const StateTuple &); +// // Lookup state tuple by state ID. +// const StateTuple &Tuple(StateId) const; +// // # of stored tuples. +// StateId Size() const; +// // Return true if error encountered +// bool Error() const; +// }; + +// Represents the composition state. +template +struct ComposeStateTuple { + typedef S StateId; + typedef F FilterState; + + ComposeStateTuple() + : state_id1(kNoStateId), state_id2(kNoStateId), + filter_state(FilterState::NoState()) {} + + ComposeStateTuple(StateId s1, StateId s2, const FilterState &f) + : state_id1(s1), state_id2(s2), filter_state(f) {} + + StateId state_id1; // State Id on fst1 + StateId state_id2; // State Id on fst2 + FilterState filter_state; // State of composition filter +}; + +// Equality of composition state tuples. +template +inline bool operator==(const ComposeStateTuple& x, + const ComposeStateTuple& y) { + if (&x == &y) + return true; + return x.state_id1 == y.state_id1 && + x.state_id2 == y.state_id2 && + x.filter_state == y.filter_state; +} + + +// Hashing of composition state tuples. +template +class ComposeHash { + public: + size_t operator()(const ComposeStateTuple& t) const { + return t.state_id1 + t.state_id2 * kPrime0 + + t.filter_state.Hash() * kPrime1; + } + private: + static const size_t kPrime0; + static const size_t kPrime1; +}; + +template +const size_t ComposeHash::kPrime0 = 7853; + +template +const size_t ComposeHash::kPrime1 = 7867; + + +// A HashStateTable over composition tuples. +template , + ComposeHash > > +class GenericComposeStateTable : public H { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef F FilterState; + typedef ComposeStateTuple StateTuple; + + GenericComposeStateTable(const Fst &fst1, const Fst &fst2) {} + + // Reserves space for 'table_size' elements. + GenericComposeStateTable(const Fst &fst1, const Fst &fst2, + size_t table_size) : H(table_size) {} + + bool Error() const { return false; } + + private: + void operator=(const GenericComposeStateTable &table); // disallow +}; + + +// Fingerprint for general composition tuples. +template +class ComposeFingerprint { + public: + typedef S StateId; + typedef F FilterState; + typedef ComposeStateTuple StateTuple; + + // Required but suboptimal constructor. + ComposeFingerprint() : mult1_(8192), mult2_(8192) { + LOG(WARNING) << "TupleFingerprint: # of FST states should be provided."; + } + + // Constructor is provided the sizes of the input FSTs + ComposeFingerprint(StateId nstates1, StateId nstates2) + : mult1_(nstates1), mult2_(nstates1 * nstates2) { } + + size_t operator()(const StateTuple &tuple) { + return tuple.state_id1 + tuple.state_id2 * mult1_ + + tuple.filter_state.Hash() * mult2_; + } + + private: + ssize_t mult1_; + ssize_t mult2_; +}; + + +// Useful when the first composition state determines the tuple. +template +class ComposeState1Fingerprint { + public: + typedef S StateId; + typedef F FilterState; + typedef ComposeStateTuple StateTuple; + + size_t operator()(const StateTuple &tuple) { return tuple.state_id1; } +}; + + +// Useful when the second composition state determines the tuple. +template +class ComposeState2Fingerprint { + public: + typedef S StateId; + typedef F FilterState; + typedef ComposeStateTuple StateTuple; + + size_t operator()(const StateTuple &tuple) { return tuple.state_id2; } +}; + + +// A VectorStateTable over composition tuples. This can be used when +// the product of number of states in FST1 and FST2 (and the +// composition filter state hash) is manageable. If the FSTs are not +// expanded Fsts, they will first have their states counted. +template +class ProductComposeStateTable : public +VectorStateTable, + ComposeFingerprint > { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef F FilterState; + typedef ComposeStateTuple StateTuple; + typedef VectorStateTable > StateTable; + + // Reserves space for 'table_size' elements. + ProductComposeStateTable(const Fst &fst1, const Fst &fst2, + size_t table_size = 0) + : StateTable(new ComposeFingerprint(CountStates(fst1), + CountStates(fst2)), + table_size) {} + + ProductComposeStateTable(const ProductComposeStateTable &table) + : StateTable(new ComposeFingerprint(table.Fingerprint())) {} + + bool Error() const { return false; } + + private: + void operator=(const ProductComposeStateTable &table); // disallow +}; + +// A VectorStateTable over composition tuples. This can be used when +// FST1 is a string (satisfies kStringProperties) and FST2 is +// epsilon-free and deterministic. It should be used with a +// composition filter that creates at most one filter state per tuple +// under these conditions (e.g. SequenceComposeFilter or +// MatchComposeFilter). +template +class StringDetComposeStateTable : public +VectorStateTable, + ComposeState1Fingerprint > { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef F FilterState; + typedef ComposeStateTuple StateTuple; + typedef VectorStateTable > StateTable; + + StringDetComposeStateTable(const Fst &fst1, const Fst &fst2) + : error_(false) { + uint64 props1 = kString; + uint64 props2 = kIDeterministic | kNoIEpsilons; + if (fst1.Properties(props1, true) != props1 || + fst2.Properties(props2, true) != props2) { + FSTERROR() << "StringDetComposeStateTable: fst1 not a string or" + << " fst2 not input deterministic and epsilon-free"; + error_ = true; + } + } + + StringDetComposeStateTable(const StringDetComposeStateTable &table) + : StateTable(table), error_(table.error_) {} + + bool Error() const { return error_; } + + private: + bool error_; + + void operator=(const StringDetComposeStateTable &table); // disallow +}; + + +// A VectorStateTable over composition tuples. This can be used when +// FST2 is a string (satisfies kStringProperties) and FST1 is +// epsilon-free and deterministic. It should be used with a +// composition filter that creates at most one filter state per tuple +// under these conditions (e.g. SequenceComposeFilter or +// MatchComposeFilter). +template +class DetStringComposeStateTable : public +VectorStateTable, + ComposeState2Fingerprint > { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef F FilterState; + typedef ComposeStateTuple StateTuple; + typedef VectorStateTable > StateTable; + + DetStringComposeStateTable(const Fst &fst1, const Fst &fst2) + :error_(false) { + uint64 props1 = kODeterministic | kNoOEpsilons; + uint64 props2 = kString; + if (fst1.Properties(props1, true) != props1 || + fst2.Properties(props2, true) != props2) { + FSTERROR() << "StringDetComposeStateTable: fst2 not a string or" + << " fst1 not output deterministic and epsilon-free"; + error_ = true; + } + } + + DetStringComposeStateTable(const DetStringComposeStateTable &table) + : StateTable(table), error_(table.error_) {} + + bool Error() const { return error_; } + + private: + bool error_; + + void operator=(const DetStringComposeStateTable &table); // disallow +}; + + +// An ErasableStateTable over composition tuples. The Erase(StateId) method +// can be called if the user either is sure that composition will never return +// to that tuple or doesn't care that if it does, it is assigned a new +// state ID. +template +class ErasableComposeStateTable : public +ErasableStateTable, + ComposeHash > { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef F FilterState; + typedef ComposeStateTuple StateTuple; + + ErasableComposeStateTable(const Fst &fst1, const Fst &fst2) {} + + bool Error() const { return false; } + + private: + void operator=(const ErasableComposeStateTable &table); // disallow +}; + +} // namespace fst + +#endif // FST_LIB_STATE_TABLE_H__ -- cgit v1.2.3-70-g09d2