diff options
author | Ted Yin <[email protected]> | 2015-08-14 17:42:26 +0800 |
---|---|---|
committer | Ted Yin <[email protected]> | 2015-08-14 17:42:26 +0800 |
commit | c3cffb58b9921d78753336421b52b9ffdaa5515c (patch) | |
tree | bfea20e97c200cf734021e3756d749c892e658a4 /kaldi_io/src/tools/openfst/include/fst/encode.h | |
parent | 10cce5f6a5c9e2f8e00d5a2a4d87c9cb7c26bf4c (diff) | |
parent | dfdd17afc2e984ec6c32ea01290f5c76309a456a (diff) |
Merge pull request #2 from yimmon/master
remove needless files
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/encode.h')
-rw-r--r-- | kaldi_io/src/tools/openfst/include/fst/encode.h | 599 |
1 files changed, 0 insertions, 599 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/encode.h b/kaldi_io/src/tools/openfst/include/fst/encode.h deleted file mode 100644 index 08b84cb..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/encode.h +++ /dev/null @@ -1,599 +0,0 @@ -// encode.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Johan Schalkwyk) -// -// \file -// Class to encode and decoder an fst. - -#ifndef FST_LIB_ENCODE_H__ -#define FST_LIB_ENCODE_H__ - -#include <climits> -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <string> -#include <vector> -using std::vector; - -#include <fst/arc-map.h> -#include <fst/rmfinalepsilon.h> - - -namespace fst { - -static const uint32 kEncodeLabels = 0x0001; -static const uint32 kEncodeWeights = 0x0002; -static const uint32 kEncodeFlags = 0x0003; // All non-internal flags - -static const uint32 kEncodeHasISymbols = 0x0004; // For internal use -static const uint32 kEncodeHasOSymbols = 0x0008; // For internal use - -enum EncodeType { ENCODE = 1, DECODE = 2 }; - -// Identifies stream data as an encode table (and its endianity) -static const int32 kEncodeMagicNumber = 2129983209; - - -// The following class encapsulates implementation details for the -// encoding and decoding of label/weight tuples used for encoding -// and decoding of Fsts. The EncodeTable is bidirectional. I.E it -// stores both the Tuple of encode labels and weights to a unique -// label, and the reverse. -template <class A> class EncodeTable { - public: - typedef typename A::Label Label; - typedef typename A::Weight Weight; - - // Encoded data consists of arc input/output labels and arc weight - struct Tuple { - Tuple() {} - Tuple(Label ilabel_, Label olabel_, Weight weight_) - : ilabel(ilabel_), olabel(olabel_), weight(weight_) {} - Tuple(const Tuple& tuple) - : ilabel(tuple.ilabel), olabel(tuple.olabel), weight(tuple.weight) {} - - Label ilabel; - Label olabel; - Weight weight; - }; - - // Comparison object for hashing EncodeTable Tuple(s). - class TupleEqual { - public: - bool operator()(const Tuple* x, const Tuple* y) const { - return (x->ilabel == y->ilabel && - x->olabel == y->olabel && - x->weight == y->weight); - } - }; - - // Hash function for EncodeTabe Tuples. Based on the encode flags - // we either hash the labels, weights or combination of them. - class TupleKey { - public: - TupleKey() - : encode_flags_(kEncodeLabels | kEncodeWeights) {} - - TupleKey(const TupleKey& key) - : encode_flags_(key.encode_flags_) {} - - explicit TupleKey(uint32 encode_flags) - : encode_flags_(encode_flags) {} - - size_t operator()(const Tuple* x) const { - size_t hash = x->ilabel; - const int lshift = 5; - const int rshift = CHAR_BIT * sizeof(size_t) - 5; - if (encode_flags_ & kEncodeLabels) - hash = hash << lshift ^ hash >> rshift ^ x->olabel; - if (encode_flags_ & kEncodeWeights) - hash = hash << lshift ^ hash >> rshift ^ x->weight.Hash(); - return hash; - } - - private: - int32 encode_flags_; - }; - - typedef unordered_map<const Tuple*, - Label, - TupleKey, - TupleEqual> EncodeHash; - - explicit EncodeTable(uint32 encode_flags) - : flags_(encode_flags), - encode_hash_(1024, TupleKey(encode_flags)), - isymbols_(0), osymbols_(0) {} - - ~EncodeTable() { - for (size_t i = 0; i < encode_tuples_.size(); ++i) { - delete encode_tuples_[i]; - } - delete isymbols_; - delete osymbols_; - } - - // Given an arc encode either input/ouptut labels or input/costs or both - Label Encode(const A &arc) { - const Tuple tuple(arc.ilabel, - flags_ & kEncodeLabels ? arc.olabel : 0, - flags_ & kEncodeWeights ? arc.weight : Weight::One()); - typename EncodeHash::const_iterator it = encode_hash_.find(&tuple); - if (it == encode_hash_.end()) { - encode_tuples_.push_back(new Tuple(tuple)); - encode_hash_[encode_tuples_.back()] = encode_tuples_.size(); - return encode_tuples_.size(); - } else { - return it->second; - } - } - - // Given an arc, look up its encoded label. Returns kNoLabel if not found. - Label GetLabel(const A &arc) const { - const Tuple tuple(arc.ilabel, - flags_ & kEncodeLabels ? arc.olabel : 0, - flags_ & kEncodeWeights ? arc.weight : Weight::One()); - typename EncodeHash::const_iterator it = encode_hash_.find(&tuple); - if (it == encode_hash_.end()) { - return kNoLabel; - } else { - return it->second; - } - } - - // Given an encode arc Label decode back to input/output labels and costs - const Tuple* Decode(Label key) const { - if (key < 1 || key > encode_tuples_.size()) { - LOG(ERROR) << "EncodeTable::Decode: unknown decode key: " << key; - return 0; - } - return encode_tuples_[key - 1]; - } - - size_t Size() const { return encode_tuples_.size(); } - - bool Write(ostream &strm, const string &source) const; - - static EncodeTable<A> *Read(istream &strm, const string &source); - - const uint32 flags() const { return flags_ & kEncodeFlags; } - - int RefCount() const { return ref_count_.count(); } - int IncrRefCount() { return ref_count_.Incr(); } - int DecrRefCount() { return ref_count_.Decr(); } - - - SymbolTable *InputSymbols() const { return isymbols_; } - - SymbolTable *OutputSymbols() const { return osymbols_; } - - void SetInputSymbols(const SymbolTable* syms) { - if (isymbols_) delete isymbols_; - if (syms) { - isymbols_ = syms->Copy(); - flags_ |= kEncodeHasISymbols; - } else { - isymbols_ = 0; - flags_ &= ~kEncodeHasISymbols; - } - } - - void SetOutputSymbols(const SymbolTable* syms) { - if (osymbols_) delete osymbols_; - if (syms) { - osymbols_ = syms->Copy(); - flags_ |= kEncodeHasOSymbols; - } else { - osymbols_ = 0; - flags_ &= ~kEncodeHasOSymbols; - } - } - - private: - uint32 flags_; - vector<Tuple*> encode_tuples_; - EncodeHash encode_hash_; - RefCounter ref_count_; - SymbolTable *isymbols_; // Pre-encoded ilabel symbol table - SymbolTable *osymbols_; // Pre-encoded olabel symbol table - - DISALLOW_COPY_AND_ASSIGN(EncodeTable); -}; - -template <class A> inline -bool EncodeTable<A>::Write(ostream &strm, const string &source) const { - WriteType(strm, kEncodeMagicNumber); - WriteType(strm, flags_); - int64 size = encode_tuples_.size(); - WriteType(strm, size); - for (size_t i = 0; i < size; ++i) { - const Tuple* tuple = encode_tuples_[i]; - WriteType(strm, tuple->ilabel); - WriteType(strm, tuple->olabel); - tuple->weight.Write(strm); - } - - if (flags_ & kEncodeHasISymbols) - isymbols_->Write(strm); - - if (flags_ & kEncodeHasOSymbols) - osymbols_->Write(strm); - - strm.flush(); - if (!strm) { - LOG(ERROR) << "EncodeTable::Write: write failed: " << source; - return false; - } - return true; -} - -template <class A> inline -EncodeTable<A> *EncodeTable<A>::Read(istream &strm, const string &source) { - int32 magic_number = 0; - ReadType(strm, &magic_number); - if (magic_number != kEncodeMagicNumber) { - LOG(ERROR) << "EncodeTable::Read: Bad encode table header: " << source; - return 0; - } - uint32 flags; - ReadType(strm, &flags); - EncodeTable<A> *table = new EncodeTable<A>(flags); - - int64 size; - ReadType(strm, &size); - if (!strm) { - LOG(ERROR) << "EncodeTable::Read: read failed: " << source; - return 0; - } - - for (size_t i = 0; i < size; ++i) { - Tuple* tuple = new Tuple(); - ReadType(strm, &tuple->ilabel); - ReadType(strm, &tuple->olabel); - tuple->weight.Read(strm); - if (!strm) { - LOG(ERROR) << "EncodeTable::Read: read failed: " << source; - return 0; - } - table->encode_tuples_.push_back(tuple); - table->encode_hash_[table->encode_tuples_.back()] = - table->encode_tuples_.size(); - } - - if (flags & kEncodeHasISymbols) - table->isymbols_ = SymbolTable::Read(strm, source); - - if (flags & kEncodeHasOSymbols) - table->osymbols_ = SymbolTable::Read(strm, source); - - return table; -} - - -// A mapper to encode/decode weighted transducers. Encoding of an -// Fst is useful for performing classical determinization or minimization -// on a weighted transducer by treating it as an unweighted acceptor over -// encoded labels. -// -// The Encode mapper stores the encoding in a local hash table (EncodeTable) -// This table is shared (and reference counted) between the encoder and -// decoder. A decoder has read only access to the EncodeTable. -// -// The EncodeMapper allows on the fly encoding of the machine. As the -// EncodeTable is generated the same table may by used to decode the machine -// on the fly. For example in the following sequence of operations -// -// Encode -> Determinize -> Decode -// -// we will use the encoding table generated during the encode step in the -// decode, even though the encoding is not complete. -// -template <class A> class EncodeMapper { - typedef typename A::Weight Weight; - typedef typename A::Label Label; - public: - EncodeMapper(uint32 flags, EncodeType type) - : flags_(flags), - type_(type), - table_(new EncodeTable<A>(flags)), - error_(false) {} - - EncodeMapper(const EncodeMapper& mapper) - : flags_(mapper.flags_), - type_(mapper.type_), - table_(mapper.table_), - error_(false) { - table_->IncrRefCount(); - } - - // Copy constructor but setting the type, typically to DECODE - EncodeMapper(const EncodeMapper& mapper, EncodeType type) - : flags_(mapper.flags_), - type_(type), - table_(mapper.table_), - error_(mapper.error_) { - table_->IncrRefCount(); - } - - ~EncodeMapper() { - if (!table_->DecrRefCount()) delete table_; - } - - A operator()(const A &arc); - - MapFinalAction FinalAction() const { - return (type_ == ENCODE && (flags_ & kEncodeWeights)) ? - MAP_REQUIRE_SUPERFINAL : MAP_NO_SUPERFINAL; - } - - MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; } - - MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;} - - uint64 Properties(uint64 inprops) { - uint64 outprops = inprops; - if (error_) outprops |= kError; - - uint64 mask = kFstProperties; - if (flags_ & kEncodeLabels) - mask &= kILabelInvariantProperties & kOLabelInvariantProperties; - if (flags_ & kEncodeWeights) - mask &= kILabelInvariantProperties & kWeightInvariantProperties & - (type_ == ENCODE ? kAddSuperFinalProperties : - kRmSuperFinalProperties); - - return outprops & mask; - } - - const uint32 flags() const { return flags_; } - const EncodeType type() const { return type_; } - const EncodeTable<A> &table() const { return *table_; } - - bool Write(ostream &strm, const string& source) { - return table_->Write(strm, source); - } - - bool Write(const string& filename) { - ofstream strm(filename.c_str(), ofstream::out | ofstream::binary); - if (!strm) { - LOG(ERROR) << "EncodeMap: Can't open file: " << filename; - return false; - } - return Write(strm, filename); - } - - static EncodeMapper<A> *Read(istream &strm, - const string& source, - EncodeType type = ENCODE) { - EncodeTable<A> *table = EncodeTable<A>::Read(strm, source); - return table ? new EncodeMapper(table->flags(), type, table) : 0; - } - - static EncodeMapper<A> *Read(const string& filename, - EncodeType type = ENCODE) { - ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); - if (!strm) { - LOG(ERROR) << "EncodeMap: Can't open file: " << filename; - return NULL; - } - return Read(strm, filename, type); - } - - SymbolTable *InputSymbols() const { return table_->InputSymbols(); } - - SymbolTable *OutputSymbols() const { return table_->OutputSymbols(); } - - void SetInputSymbols(const SymbolTable* syms) { - table_->SetInputSymbols(syms); - } - - void SetOutputSymbols(const SymbolTable* syms) { - table_->SetOutputSymbols(syms); - } - - private: - uint32 flags_; - EncodeType type_; - EncodeTable<A>* table_; - bool error_; - - explicit EncodeMapper(uint32 flags, EncodeType type, EncodeTable<A> *table) - : flags_(flags), type_(type), table_(table) {} - void operator=(const EncodeMapper &); // Disallow. -}; - -template <class A> inline -A EncodeMapper<A>::operator()(const A &arc) { - if (type_ == ENCODE) { // labels and/or weights to single label - if ((arc.nextstate == kNoStateId && !(flags_ & kEncodeWeights)) || - (arc.nextstate == kNoStateId && (flags_ & kEncodeWeights) && - arc.weight == Weight::Zero())) { - return arc; - } else { - Label label = table_->Encode(arc); - return A(label, - flags_ & kEncodeLabels ? label : arc.olabel, - flags_ & kEncodeWeights ? Weight::One() : arc.weight, - arc.nextstate); - } - } else { // type_ == DECODE - if (arc.nextstate == kNoStateId) { - return arc; - } else { - if (arc.ilabel == 0) return arc; - if (flags_ & kEncodeLabels && arc.ilabel != arc.olabel) { - FSTERROR() << "EncodeMapper: Label-encoded arc has different " - "input and output labels"; - error_ = true; - } - if (flags_ & kEncodeWeights && arc.weight != Weight::One()) { - FSTERROR() << - "EncodeMapper: Weight-encoded arc has non-trivial weight"; - error_ = true; - } - const typename EncodeTable<A>::Tuple* tuple = table_->Decode(arc.ilabel); - if (!tuple) { - FSTERROR() << "EncodeMapper: decode failed"; - error_ = true; - return A(kNoLabel, kNoLabel, Weight::NoWeight(), arc.nextstate); - } else { - return A(tuple->ilabel, - flags_ & kEncodeLabels ? tuple->olabel : arc.olabel, - flags_ & kEncodeWeights ? tuple->weight : arc.weight, - arc.nextstate); - } - } - } -} - - -// Complexity: O(nstates + narcs) -template<class A> inline -void Encode(MutableFst<A> *fst, EncodeMapper<A>* mapper) { - mapper->SetInputSymbols(fst->InputSymbols()); - mapper->SetOutputSymbols(fst->OutputSymbols()); - ArcMap(fst, mapper); -} - -template<class A> inline -void Decode(MutableFst<A>* fst, const EncodeMapper<A>& mapper) { - ArcMap(fst, EncodeMapper<A>(mapper, DECODE)); - RmFinalEpsilon(fst); - fst->SetInputSymbols(mapper.InputSymbols()); - fst->SetOutputSymbols(mapper.OutputSymbols()); -} - - -// On the fly label and/or weight encoding of input Fst -// -// Complexity: -// - Constructor: O(1) -// - Traversal: O(nstates_visited + narcs_visited), assuming constant -// time to visit an input state or arc. -template <class A> -class EncodeFst : public ArcMapFst<A, A, EncodeMapper<A> > { - public: - typedef A Arc; - typedef EncodeMapper<A> C; - typedef ArcMapFstImpl< A, A, EncodeMapper<A> > Impl; - using ImplToFst<Impl>::GetImpl; - - EncodeFst(const Fst<A> &fst, EncodeMapper<A>* encoder) - : ArcMapFst<A, A, C>(fst, encoder, ArcMapFstOptions()) { - encoder->SetInputSymbols(fst.InputSymbols()); - encoder->SetOutputSymbols(fst.OutputSymbols()); - } - - EncodeFst(const Fst<A> &fst, const EncodeMapper<A>& encoder) - : ArcMapFst<A, A, C>(fst, encoder, ArcMapFstOptions()) {} - - // See Fst<>::Copy() for doc. - EncodeFst(const EncodeFst<A> &fst, bool copy = false) - : ArcMapFst<A, A, C>(fst, copy) {} - - // Get a copy of this EncodeFst. See Fst<>::Copy() for further doc. - virtual EncodeFst<A> *Copy(bool safe = false) const { - if (safe) { - FSTERROR() << "EncodeFst::Copy(true): not allowed."; - GetImpl()->SetProperties(kError, kError); - } - return new EncodeFst(*this); - } -}; - - -// On the fly label and/or weight encoding of input Fst -// -// Complexity: -// - Constructor: O(1) -// - Traversal: O(nstates_visited + narcs_visited), assuming constant -// time to visit an input state or arc. -template <class A> -class DecodeFst : public ArcMapFst<A, A, EncodeMapper<A> > { - public: - typedef A Arc; - typedef EncodeMapper<A> C; - typedef ArcMapFstImpl< A, A, EncodeMapper<A> > Impl; - using ImplToFst<Impl>::GetImpl; - - DecodeFst(const Fst<A> &fst, const EncodeMapper<A>& encoder) - : ArcMapFst<A, A, C>(fst, - EncodeMapper<A>(encoder, DECODE), - ArcMapFstOptions()) { - GetImpl()->SetInputSymbols(encoder.InputSymbols()); - GetImpl()->SetOutputSymbols(encoder.OutputSymbols()); - } - - // See Fst<>::Copy() for doc. - DecodeFst(const DecodeFst<A> &fst, bool safe = false) - : ArcMapFst<A, A, C>(fst, safe) {} - - // Get a copy of this DecodeFst. See Fst<>::Copy() for further doc. - virtual DecodeFst<A> *Copy(bool safe = false) const { - return new DecodeFst(*this, safe); - } -}; - - -// Specialization for EncodeFst. -template <class A> -class StateIterator< EncodeFst<A> > - : public StateIterator< ArcMapFst<A, A, EncodeMapper<A> > > { - public: - explicit StateIterator(const EncodeFst<A> &fst) - : StateIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst) {} -}; - - -// Specialization for EncodeFst. -template <class A> -class ArcIterator< EncodeFst<A> > - : public ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > > { - public: - ArcIterator(const EncodeFst<A> &fst, typename A::StateId s) - : ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst, s) {} -}; - - -// Specialization for DecodeFst. -template <class A> -class StateIterator< DecodeFst<A> > - : public StateIterator< ArcMapFst<A, A, EncodeMapper<A> > > { - public: - explicit StateIterator(const DecodeFst<A> &fst) - : StateIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst) {} -}; - - -// Specialization for DecodeFst. -template <class A> -class ArcIterator< DecodeFst<A> > - : public ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > > { - public: - ArcIterator(const DecodeFst<A> &fst, typename A::StateId s) - : ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst, s) {} -}; - - -// Useful aliases when using StdArc. -typedef EncodeFst<StdArc> StdEncodeFst; - -typedef DecodeFst<StdArc> StdDecodeFst; - -} // namespace fst - -#endif // FST_LIB_ENCODE_H__ |