diff options
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/compact-fst.h')
-rw-r--r-- | kaldi_io/src/tools/openfst/include/fst/compact-fst.h | 1438 |
1 files changed, 0 insertions, 1438 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/compact-fst.h b/kaldi_io/src/tools/openfst/include/fst/compact-fst.h deleted file mode 100644 index 6db3317..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/compact-fst.h +++ /dev/null @@ -1,1438 +0,0 @@ -// compact-fst.h - - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// FST Class for memory-efficient representation of common types of -// FSTs: linear automata, acceptors, unweighted FSTs, ... - -#ifndef FST_LIB_COMPACT_FST_H__ -#define FST_LIB_COMPACT_FST_H__ - -#include <iterator> -#include <utility> -using std::pair; using std::make_pair; -#include <vector> -using std::vector; - -#include <fst/cache.h> -#include <fst/expanded-fst.h> -#include <fst/fst-decl.h> // For optional argument declarations -#include <fst/mapped-file.h> -#include <fst/matcher.h> -#include <fst/test-properties.h> -#include <fst/util.h> - - -namespace fst { - -struct CompactFstOptions : public CacheOptions { - // CompactFst default caching behaviour is to do no caching. Most - // compactors are cheap and therefore we save memory by not doing - // caching. - CompactFstOptions() : CacheOptions(true, 0) {} - CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {} -}; - -// Compactor Interface - class determinies how arcs and final weights -// are compacted and expanded. -// -// Final weights are treated as transitions to the superfinal state, -// i.e. ilabel = olabel = kNoLabel and nextstate = kNoStateId. -// -// There are two types of compactors: -// -// * Fixed out-degree compactors: 'compactor.Size()' returns a -// positive integer 's'. An FST can be compacted by this compactor -// only if each state has exactly 's' outgoing transitions (counting a -// non-Zero() final weight as a transition). A typical example is a -// compactor for string FSTs, i.e. 's == 1'. -// -// * Variable out-degree compactors: 'compactor.Size() == -1'. There -// are no out-degree restrictions for these compactors. -// -// -// class Compactor { -// public: -// // Element is the type of the compacted transitions. -// typedef ... Element; -// // Return the compacted representation of a transition 'arc' -// // at a state 's'. -// Element Compact(StateId s, const Arc &arc); -// // Return the transition at state 's' represented by the compacted -// // transition 'e'. -// Arc Expand(StateId s, const Element &e); -// // Return -1 for variable out-degree compactors, and the mandatory -// // out-degree otherwise. -// ssize_t Size(); -// // Test whether 'fst' can be compacted by this compactor. -// bool Compatible(const Fst<A> &fst); -// // Return the properties that are always true for an fst -// // compacted using this compactor -// uint64 Properties(); -// // Return a string identifying the type of compactor. -// static const string &Type(); -// // Write a compactor to a file. -// bool Write(ostream &strm); -// // Read a compactor from a file. -// static Compactor *Read(istream &strm); -// // Default constructor (optional, see comment below). -// Compactor(); -// }; -// -// The default constructor is only required for FST_REGISTER to work -// (i.e. enabling Convert() and the command-line utilities to work -// with this new compactor). However, a default constructor always -// needs to be specify for this code to compile, but one can have it -// simply raised an error when called: -// -// Compactor::Compactor() { -// FSTERROR() << "Compactor: no default constructor"; -// } - - -// Implementation data for Compact Fst, which can shared between otherwise -// independent copies. -// -// The implementation contains two arrays: 'states_' and 'compacts_'. -// -// For fixed out-degree compactors, the 'states_' array is unallocated. -// The 'compacts_' contains the compacted transitions. Its size is -// 'ncompacts_'. The outgoing transitions at a given state are stored -// consecutively. For a given state 's', its 'compactor.Size()' outgoing -// transitions (including superfinal transition when 's' is final), are -// stored in position ['s*compactor.Size()', '(s+1)*compactor_.Size()'). -// -// For variable out-degree compactors, the states_ array has size -// 'nstates_ + 1' and contains pointers to positions into 'compacts_'. -// For a given state 's', the compacted transitions of 's' are -// stored in positions [ 'states_[s]', 'states_[s + 1]' ) in 'compacts_'. -// By convention, 'states_[nstates_] == ncompacts_'. -// -// In both cases, the superfinal transitons (when 's' is final, i.e. -// 'Final(s) != Weight::Zero()') is stored first. -// -// The unsigned type U is used to represent indices into the compacts_ -// array. -template <class E, class U> -class CompactFstData { - public: - typedef E CompactElement; - typedef U Unsigned; - - CompactFstData() - : states_region_(0), - compacts_region_(0), - states_(0), - compacts_(0), - nstates_(0), - ncompacts_(0), - narcs_(0), - start_(kNoStateId), - error_(false) {} - - template <class A, class Compactor> - CompactFstData(const Fst<A> &fst, const Compactor &compactor); - - template <class Iterator, class Compactor> - CompactFstData(const Iterator &begin, const Iterator &end, - const Compactor &compactor); - - ~CompactFstData() { - if (states_region_ == NULL) { - delete [] states_; - } - delete states_region_; - if (compacts_region_ == NULL) { - delete [] compacts_; - } - delete compacts_region_; - } - - template <class Compactor> - static CompactFstData<E, U> *Read(istream &strm, - const FstReadOptions &opts, - const FstHeader &hdr, - const Compactor &compactor); - - bool Write(ostream &strm, const FstWriteOptions &opts) const; - - Unsigned States(ssize_t i) const { return states_[i]; } - const CompactElement &Compacts(size_t i) const { return compacts_[i]; } - size_t NumStates() const { return nstates_; } - size_t NumCompacts() const { return ncompacts_; } - size_t NumArcs() const { return narcs_; } - ssize_t Start() const { return start_; } - - int RefCount() const { return ref_count_.count(); } - int IncrRefCount() { return ref_count_.Incr(); } - int DecrRefCount() { return ref_count_.Decr(); } - - bool Error() const { return error_; } - - private: - MappedFile *states_region_; - MappedFile *compacts_region_; - Unsigned *states_; - CompactElement *compacts_; - size_t nstates_; - size_t ncompacts_; - size_t narcs_; - ssize_t start_; - RefCounter ref_count_; - bool error_; -}; - -template <class E, class U> -template <class A, class C> -CompactFstData<E, U>::CompactFstData(const Fst<A> &fst, const C &compactor) - : states_region_(0), - compacts_region_(0), - states_(0), - compacts_(0), - nstates_(0), - ncompacts_(0), - narcs_(0), - start_(kNoStateId), - error_(false) { - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - start_ = fst.Start(); - // Count # of states and arcs. - StateId nfinals = 0; - for (StateIterator< Fst<A> > siter(fst); - !siter.Done(); - siter.Next()) { - ++nstates_; - StateId s = siter.Value(); - for (ArcIterator< Fst<A> > aiter(fst, s); - !aiter.Done(); - aiter.Next()) - ++narcs_; - if (fst.Final(s) != Weight::Zero()) ++nfinals; - } - if (compactor.Size() == -1) { - states_ = new Unsigned[nstates_ + 1]; - ncompacts_ = narcs_ + nfinals; - compacts_ = new CompactElement[ncompacts_]; - states_[nstates_] = ncompacts_; - } else { - states_ = 0; - ncompacts_ = nstates_ * compactor.Size(); - if ((narcs_ + nfinals) != ncompacts_) { - FSTERROR() << "CompactFstData: compactor incompatible with fst"; - error_ = true; - return; - } - compacts_ = new CompactElement[ncompacts_]; - } - size_t pos = 0, fpos = 0; - for (StateId s = 0; s < nstates_; ++s) { - fpos = pos; - if (compactor.Size() == -1) - states_[s] = pos; - if (fst.Final(s) != Weight::Zero()) - compacts_[pos++] = compactor.Compact(s, A(kNoLabel, kNoLabel, - fst.Final(s), kNoStateId)); - for (ArcIterator< Fst<A> > aiter(fst, s); - !aiter.Done(); - aiter.Next()) { - compacts_[pos++] = compactor.Compact(s, aiter.Value()); - } - if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size())) { - FSTERROR() << "CompactFstData: compactor incompatible with fst"; - error_ = true; - return; - } - } - if (pos != ncompacts_) { - FSTERROR() << "CompactFstData: compactor incompatible with fst"; - error_ = true; - return; - } -} - -template <class E, class U> -template <class Iterator, class C> -CompactFstData<E, U>::CompactFstData(const Iterator &begin, - const Iterator &end, - const C &compactor) - : states_region_(0), - compacts_region_(0), - states_(0), - compacts_(0), - nstates_(0), - ncompacts_(0), - narcs_(0), - start_(kNoStateId), - error_(false) { - typedef typename C::Arc Arc; - typedef typename Arc::Weight Weight; - if (compactor.Size() != -1) { - ncompacts_ = distance(begin, end); - if (compactor.Size() == 1) { - // For strings, allow implicit final weight. - // Empty input is the empty string. - if (ncompacts_ == 0) { - ++ncompacts_; - } else { - Arc arc = compactor.Expand(ncompacts_ - 1, - *(begin + (ncompacts_ - 1))); - if (arc.ilabel != kNoLabel) - ++ncompacts_; - } - } - if (ncompacts_ % compactor.Size()) { - FSTERROR() << "CompactFstData: size of input container incompatible" - << " with compactor"; - error_ = true; - return; - } - if (ncompacts_ == 0) - return; - start_ = 0; - nstates_ = ncompacts_ / compactor.Size(); - compacts_ = new CompactElement[ncompacts_]; - size_t i = 0; - Iterator it = begin; - for(; it != end; ++it, ++i){ - compacts_[i] = *it; - if (compactor.Expand(i, *it).ilabel != kNoLabel) - ++narcs_; - } - if (i < ncompacts_) - compacts_[i] = compactor.Compact(i, Arc(kNoLabel, kNoLabel, - Weight::One(), kNoStateId)); - } else { - if (distance(begin, end) == 0) - return; - // Count # of states, arcs and compacts. - Iterator it = begin; - for(size_t i = 0; it != end; ++it, ++i) { - Arc arc = compactor.Expand(i, *it); - if (arc.ilabel != kNoLabel) { - ++narcs_; - ++ncompacts_; - } else { - ++nstates_; - if (arc.weight != Weight::Zero()) - ++ncompacts_; - } - } - start_ = 0; - compacts_ = new CompactElement[ncompacts_]; - states_ = new Unsigned[nstates_ + 1]; - states_[nstates_] = ncompacts_; - size_t i = 0, s = 0; - for(it = begin; it != end; ++it) { - Arc arc = compactor.Expand(i, *it); - if (arc.ilabel != kNoLabel) { - compacts_[i++] = *it; - } else { - states_[s++] = i; - if (arc.weight != Weight::Zero()) - compacts_[i++] = *it; - } - } - if ((s != nstates_) || (i != ncompacts_)) { - FSTERROR() << "CompactFstData: ill-formed input container"; - error_ = true; - return; - } - } -} - -template <class E, class U> -template <class C> -CompactFstData<E, U> *CompactFstData<E, U>::Read( - istream &strm, - const FstReadOptions &opts, - const FstHeader &hdr, - const C &compactor) { - CompactFstData<E, U> *data = new CompactFstData<E, U>(); - data->start_ = hdr.Start(); - data->nstates_ = hdr.NumStates(); - data->narcs_ = hdr.NumArcs(); - - if (compactor.Size() == -1) { - if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { - LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source; - delete data; - return 0; - } - size_t b = (data->nstates_ + 1) * sizeof(Unsigned); - data->states_region_ = MappedFile::Map(&strm, opts, b); - if (!strm || data->states_region_ == NULL) { - LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source; - delete data; - return 0; - } - data->states_ = static_cast<Unsigned *>( - data->states_region_->mutable_data()); - } else { - data->states_ = 0; - } - data->ncompacts_ = compactor.Size() == -1 - ? data->states_[data->nstates_] - : data->nstates_ * compactor.Size(); - if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { - LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source; - delete data; - return 0; - } - size_t b = data->ncompacts_ * sizeof(CompactElement); - data->compacts_region_ = MappedFile::Map(&strm, opts, b); - if (!strm || data->compacts_region_ == NULL) { - LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source; - delete data; - return 0; - } - data->compacts_ = static_cast<CompactElement *>( - data->compacts_region_->mutable_data()); - return data; -} - -template<class E, class U> -bool CompactFstData<E, U>::Write(ostream &strm, - const FstWriteOptions &opts) const { - if (states_) { - if (opts.align && !AlignOutput(strm)) { - LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; - return false; - } - strm.write(reinterpret_cast<char *>(states_), - (nstates_ + 1) * sizeof(Unsigned)); - } - if (opts.align && !AlignOutput(strm)) { - LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; - return false; - } - strm.write(reinterpret_cast<char *>(compacts_), - ncompacts_ * sizeof(CompactElement)); - - strm.flush(); - if (!strm) { - LOG(ERROR) << "CompactFst::Write: Write failed: " << opts.source; - return false; - } - return true; -} - -template <class A, class C, class U> class CompactFst; -template <class F, class G> void Cast(const F &, G *); - -// Implementation class for CompactFst, which contains CompactFstData -// and Fst cache. -template <class A, class C, class U> -class CompactFstImpl : public CacheImpl<A> { - public: - using FstImpl<A>::SetType; - using FstImpl<A>::SetProperties; - using FstImpl<A>::Properties; - using FstImpl<A>::SetInputSymbols; - using FstImpl<A>::SetOutputSymbols; - using FstImpl<A>::WriteHeader; - - using CacheImpl<A>::PushArc; - using CacheImpl<A>::HasArcs; - using CacheImpl<A>::HasFinal; - using CacheImpl<A>::HasStart; - using CacheImpl<A>::SetArcs; - using CacheImpl<A>::SetFinal; - using CacheImpl<A>::SetStart; - - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef C Compactor; - typedef typename C::Element CompactElement; - typedef U Unsigned; - - CompactFstImpl() - : CacheImpl<A>(CompactFstOptions()), - compactor_(0), - own_compactor_(false), - data_(0) { - string type = "compact"; - if (sizeof(U) != sizeof(uint32)) { - string size; - Int64ToStr(8 * sizeof(U), &size); - type += size; - } - type += "_"; - type += C::Type(); - SetType(type); - SetProperties(kNullProperties | kStaticProperties); - } - - CompactFstImpl(const Fst<Arc> &fst, const C &compactor, - const CompactFstOptions &opts) - : CacheImpl<A>(opts), - compactor_(new C(compactor)), - own_compactor_(true), - data_(0) { - Init(fst); - } - - CompactFstImpl(const Fst<Arc> &fst, C *compactor, - const CompactFstOptions &opts) - : CacheImpl<A>(opts), - compactor_(compactor), - own_compactor_(false), - data_(0) { - Init(fst); - } - - template <class Iterator> - CompactFstImpl(const Iterator &b, const Iterator &e, const C &compactor, - const CompactFstOptions &opts) - : CacheImpl<A>(opts), - compactor_(new C(compactor)), - own_compactor_(true), - data_(0) { - Init(b, e); - } - - template <class Iterator> - CompactFstImpl(const Iterator &b, const Iterator &e, C *compactor, - const CompactFstOptions &opts) - : CacheImpl<A>(opts), - compactor_(compactor), - own_compactor_(false), - data_(0) { - Init(b, e); - } - - CompactFstImpl(const CompactFstImpl<A, C, U> &impl) - : CacheImpl<A>(impl), - compactor_(new C(*impl.compactor_)), - own_compactor_(true), - data_(impl.data_) { - if (data_) - data_->IncrRefCount(); - SetType(impl.Type()); - SetProperties(impl.Properties()); - SetInputSymbols(impl.InputSymbols()); - SetOutputSymbols(impl.OutputSymbols()); - } - - ~CompactFstImpl(){ - if (own_compactor_) - delete compactor_; - if (data_ && !data_->DecrRefCount()) - delete data_; - } - - StateId Start() { - if (!HasStart()) { - SetStart(data_->Start()); - } - return CacheImpl<A>::Start(); - } - - Weight Final(StateId s) { - if (HasFinal(s)) - return CacheImpl<A>::Final(s); - Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId); - if ((compactor_->Size() != -1) || - (data_->States(s) != data_->States(s + 1))) - arc = ComputeArc(s, - compactor_->Size() == -1 - ? data_->States(s) - : s * compactor_->Size()); - return arc.ilabel == kNoLabel ? arc.weight : Weight::Zero(); - } - - StateId NumStates() const { - if (Properties(kError)) return 0; - return data_->NumStates(); - } - - size_t NumArcs(StateId s) { - if (HasArcs(s)) - return CacheImpl<A>::NumArcs(s); - Unsigned i, num_arcs; - if (compactor_->Size() == -1) { - i = data_->States(s); - num_arcs = data_->States(s + 1) - i; - } else { - i = s * compactor_->Size(); - num_arcs = compactor_->Size(); - } - if (num_arcs > 0) { - const A &arc = ComputeArc(s, i, kArcILabelValue); - if (arc.ilabel == kNoStateId) { - --num_arcs; - } - } - return num_arcs; - } - - size_t NumInputEpsilons(StateId s) { - if (!HasArcs(s) && !Properties(kILabelSorted)) - Expand(s); - if (HasArcs(s)) - return CacheImpl<A>::NumInputEpsilons(s); - return CountEpsilons(s, false); - } - - size_t NumOutputEpsilons(StateId s) { - if (!HasArcs(s) && !Properties(kOLabelSorted)) - Expand(s); - if (HasArcs(s)) - return CacheImpl<A>::NumOutputEpsilons(s); - return CountEpsilons(s, true); - } - - size_t CountEpsilons(StateId s, bool output_epsilons) { - size_t begin = compactor_->Size() == -1 ? - data_->States(s) : s * compactor_->Size(); - size_t end = compactor_->Size() == -1 ? - data_->States(s + 1) : (s + 1) * compactor_->Size(); - size_t num_eps = 0; - for (size_t i = begin; i < end; ++i) { - const A &arc = ComputeArc( - s, i, output_epsilons ? kArcOLabelValue : kArcILabelValue); - const typename A::Label &label = - (output_epsilons ? arc.olabel : arc.ilabel); - if (label == kNoLabel) - continue; - else if (label > 0) - break; - ++num_eps; - } - return num_eps; - } - - static CompactFstImpl<A, C, U> *Read(istream &strm, - const FstReadOptions &opts) { - CompactFstImpl<A, C, U> *impl = new CompactFstImpl<A, C, U>(); - FstHeader hdr; - if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) { - delete impl; - return 0; - } - - // Ensures compatibility - if (hdr.Version() == kAlignedFileVersion) - hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED); - - impl->compactor_ = C::Read(strm); - if (!impl->compactor_) { - delete impl; - return 0; - } - impl->own_compactor_ = true; - impl->data_ = CompactFstData<CompactElement, U>::Read(strm, opts, hdr, - *impl->compactor_); - if (!impl->data_) { - delete impl; - return 0; - } - return impl; - } - - bool Write(ostream &strm, const FstWriteOptions &opts) const { - FstHeader hdr; - hdr.SetStart(data_->Start()); - hdr.SetNumStates(data_->NumStates()); - hdr.SetNumArcs(data_->NumArcs()); - - // Ensures compatibility - int file_version = opts.align ? kAlignedFileVersion : kFileVersion; - WriteHeader(strm, opts, file_version, &hdr); - compactor_->Write(strm); - return data_->Write(strm, opts); - } - - // Provide information needed for generic state iterator - void InitStateIterator(StateIteratorData<A> *data) const { - data->base = 0; - data->nstates = data_->NumStates(); - } - - void InitArcIterator(StateId s, ArcIteratorData<A> *data) { - if (!HasArcs(s)) - Expand(s); - CacheImpl<A>::InitArcIterator(s, data); - } - - Arc ComputeArc(StateId s, Unsigned i, uint32 f = kArcValueFlags) const { - return compactor_->Expand(s, data_->Compacts(i), f); - } - - void Expand(StateId s) { - size_t begin = compactor_->Size() == -1 ? - data_->States(s) : s * compactor_->Size(); - size_t end = compactor_->Size() == -1 ? - data_->States(s + 1) : (s + 1) * compactor_->Size(); - for (size_t i = begin; i < end; ++i) { - const Arc &arc = ComputeArc(s, i); - if (arc.ilabel == kNoLabel) - SetFinal(s, arc.weight); - else - PushArc(s, arc); - } - if (!HasFinal(s)) - SetFinal(s, Weight::Zero()); - SetArcs(s); - } - - template <class Iterator> - void SetCompactElements(const Iterator &b, const Iterator &e) { - if (data_ && !data_->DecrRefCount()) - delete data_; - data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_); - } - - C *GetCompactor() const { return compactor_; } - CompactFstData<CompactElement, U> *Data() const { return data_; } - - // Properties always true of this Fst class - static const uint64 kStaticProperties = kExpanded; - - protected: - template <class B, class D> - explicit CompactFstImpl(const CompactFstImpl<B, D, U> &impl) - : CacheImpl<A>(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())), - compactor_(new C(*impl.GetCompactor())), - own_compactor_(true), - data_(impl.Data()) { - if (data_) - data_->IncrRefCount(); - SetType(impl.Type()); - SetProperties(impl.Properties()); - SetInputSymbols(impl.InputSymbols()); - SetOutputSymbols(impl.OutputSymbols()); - } - - private: - friend class CompactFst<A, C, U>; // allow access during write. - - void Init(const Fst<Arc> &fst) { - string type = "compact"; - if (sizeof(U) != sizeof(uint32)) { - string size; - Int64ToStr(8 * sizeof(U), &size); - type += size; - } - type += "_"; - type += compactor_->Type(); - SetType(type); - SetInputSymbols(fst.InputSymbols()); - SetOutputSymbols(fst.OutputSymbols()); - data_ = new CompactFstData<CompactElement, U>(fst, *compactor_); - if (data_->Error()) - SetProperties(kError, kError); - uint64 copy_properties = fst.Properties(kCopyProperties, true); - if ((copy_properties & kError) || !compactor_->Compatible(fst)) { - FSTERROR() << "CompactFstImpl: input fst incompatible with compactor"; - SetProperties(kError, kError); - return; - } - SetProperties(copy_properties | kStaticProperties); - } - - template <class Iterator> - void Init(const Iterator &b, const Iterator &e) { - string type = "compact"; - if (sizeof(U) != sizeof(uint32)) { - string size; - Int64ToStr(8 * sizeof(U), &size); - type += size; - } - type += "_"; - type += compactor_->Type(); - SetType(type); - SetProperties(kStaticProperties | compactor_->Properties()); - data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_); - if (data_->Error()) - SetProperties(kError, kError); - } - - // Current unaligned file format version - static const int kFileVersion = 2; - // Current aligned file format version - static const int kAlignedFileVersion = 1; - // Minimum file format version supported - static const int kMinFileVersion = 1; - - C *compactor_; - bool own_compactor_; - CompactFstData<CompactElement, U> *data_; -}; - -template <class A, class C, class U> -const uint64 CompactFstImpl<A, C, U>::kStaticProperties; -template <class A, class C, class U> -const int CompactFstImpl<A, C, U>::kFileVersion; -template <class A, class C, class U> -const int CompactFstImpl<A, C, U>::kAlignedFileVersion; -template <class A, class C, class U> -const int CompactFstImpl<A, C, U>::kMinFileVersion; - - -// CompactFst. This class attaches interface to implementation and -// handles reference counting, delegating most methods to -// ImplToExpandedFst. The unsigned type U is used to represent indices -// into the compact arc array (uint32 by default, declared in -// fst-decl.h). -template <class A, class C, class U> -class CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > { - public: - friend class StateIterator< CompactFst<A, C, U> >; - friend class ArcIterator< CompactFst<A, C, U> >; - template <class F, class G> void friend Cast(const F &, G *); - - typedef A Arc; - typedef typename A::StateId StateId; - typedef CompactFstImpl<A, C, U> Impl; - typedef CacheState<A> State; - typedef U Unsigned; - - CompactFst() : ImplToExpandedFst<Impl>(new Impl()) {} - - explicit CompactFst(const Fst<A> &fst, const C &compactor = C(), - const CompactFstOptions &opts = CompactFstOptions()) - : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {} - - CompactFst(const Fst<A> &fst, C *compactor, - const CompactFstOptions &opts = CompactFstOptions()) - : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {} - - // The following 2 constructors take as input two iterators delimiting - // a set of (already) compacted transitions, starting with the - // transitions out of the initial state. The format of the input - // differs for fixed out-degree and variable out-degree compactors. - // - // - For fixed out-degree compactors, the final weight (encoded as a - // compacted transition) needs to be given only for final - // states. All strings (compactor of size 1) will be assume to be - // terminated by a final state even when the final state is not - // implicitely given. - // - // - For variable out-degree compactors, the final weight (encoded - // as a compacted transition) needs to be given for all states and - // must appeared first in the list (for state s, final weight of s, - // followed by outgoing transitons in s). - // - // These 2 constructors allows the direct construction of a CompactFst - // without first creating a more memory hungry 'regular' FST. This - // is useful when memory usage is severely constrained. - template <class Iterator> - explicit CompactFst(const Iterator &begin, const Iterator &end, - const C &compactor = C(), - const CompactFstOptions &opts = CompactFstOptions()) - : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {} - - template <class Iterator> - CompactFst(const Iterator &begin, const Iterator &end, - C *compactor, const CompactFstOptions &opts = CompactFstOptions()) - : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {} - - // See Fst<>::Copy() for doc. - CompactFst(const CompactFst<A, C, U> &fst, bool safe = false) - : ImplToExpandedFst<Impl>(fst, safe) {} - - // Get a copy of this CompactFst. See Fst<>::Copy() for further doc. - virtual CompactFst<A, C, U> *Copy(bool safe = false) const { - return new CompactFst<A, C, U>(*this, safe); - } - - // Read a CompactFst from an input stream; return NULL on error - static CompactFst<A, C, U> *Read(istream &strm, const FstReadOptions &opts) { - Impl* impl = Impl::Read(strm, opts); - return impl ? new CompactFst<A, C, U>(impl) : 0; - } - - // Read a CompactFst from a file; return NULL on error - // Empty filename reads from standard input - static CompactFst<A, C, U> *Read(const string &filename) { - Impl* impl = ImplToExpandedFst<Impl>::Read(filename); - return impl ? new CompactFst<A, C, U>(impl) : 0; - } - - virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { - return GetImpl()->Write(strm, opts); - } - - virtual bool Write(const string &filename) const { - return Fst<A>::WriteFile(filename); - } - - template <class F> - static bool WriteFst(const F &fst, const C &compactor, ostream &strm, - const FstWriteOptions &opts); - - virtual void InitStateIterator(StateIteratorData<A> *data) const { - GetImpl()->InitStateIterator(data); - } - - virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { - GetImpl()->InitArcIterator(s, data); - } - - virtual MatcherBase<A> *InitMatcher(MatchType match_type) const { - return new SortedMatcher<CompactFst<A, C, U> >(*this, match_type); - } - - template <class Iterator> - void SetCompactElements(const Iterator &b, const Iterator &e) { - GetImpl()->SetCompactElements(b, e); - } - - private: - CompactFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {} - - // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); } - - void SetImpl(Impl *impl, bool own_impl = false) { - ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl); - } - - // Use overloading to extract the type of the argument. - static Impl* GetImplIfCompactFst(const CompactFst<A, C, U> &compact_fst) { - return compact_fst.GetImpl(); - } - - // This does not give privileged treatment to subclasses of CompactFst. - template<typename NonCompactFst> - static Impl* GetImplIfCompactFst(const NonCompactFst& fst) { - return NULL; - } - - void operator=(const CompactFst<A, C, U> &fst); // disallow -}; - -// Writes Fst in Compact format, potentially with a pass over the machine -// before writing to compute the number of states and arcs. -// -template <class A, class C, class U> -template <class F> -bool CompactFst<A, C, U>::WriteFst(const F &fst, - const C &compactor, - ostream &strm, - const FstWriteOptions &opts) { - typedef U Unsigned; - typedef typename C::Element CompactElement; - typedef typename A::Weight Weight; - int file_version = opts.align ? - CompactFstImpl<A, C, U>::kAlignedFileVersion : - CompactFstImpl<A, C, U>::kFileVersion; - size_t num_arcs = -1, num_states = -1, num_compacts = -1; - C first_pass_compactor = compactor; - if (Impl* impl = GetImplIfCompactFst(fst)) { - num_arcs = impl->Data()->NumArcs(); - num_states = impl->Data()->NumStates(); - num_compacts = impl->Data()->NumCompacts(); - first_pass_compactor = *impl->GetCompactor(); - } else { - // A first pass is needed to compute the state of the compactor, which - // is saved ahead of the rest of the data structures. This unfortunately - // means forcing a complete double compaction when writing in this format. - // TODO(allauzen): eliminate mutable state from compactors. - num_arcs = 0; - num_states = 0; - for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { - const StateId s = siter.Value(); - ++num_states; - if (fst.Final(s) != Weight::Zero()) { - first_pass_compactor.Compact( - s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId)); - } - for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) { - ++num_arcs; - first_pass_compactor.Compact(s, aiter.Value()); - } - } - } - FstHeader hdr; - hdr.SetStart(fst.Start()); - hdr.SetNumStates(num_states); - hdr.SetNumArcs(num_arcs); - string type = "compact"; - if (sizeof(U) != sizeof(uint32)) { - string size; - Int64ToStr(8 * sizeof(U), &size); - type += size; - } - type += "_"; - type += C::Type(); - uint64 copy_properties = fst.Properties(kCopyProperties, true); - if ((copy_properties & kError) || !compactor.Compatible(fst)) { - LOG(ERROR) << "fst incompatible with compactor"; - return false; - } - uint64 properties = copy_properties | - CompactFstImpl<A, C, U>::kStaticProperties; - FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, properties, - &hdr); - first_pass_compactor.Write(strm); - if (first_pass_compactor.Size() == -1) { - if (opts.align && !AlignOutput(strm)) { - LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; - return false; - } - Unsigned compacts = 0; - for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { - const StateId s = siter.Value(); - strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts)); - if (fst.Final(s) != Weight::Zero()) { - ++compacts; - } - compacts += fst.NumArcs(s); - } - strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts)); - } - if (opts.align && !AlignOutput(strm)) { - LOG(ERROR) << "Could not align file during write after writing states"; - } - C second_pass_compactor = compactor; - CompactElement element; - for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { - const StateId s = siter.Value(); - if (fst.Final(s) != Weight::Zero()) { - element = second_pass_compactor.Compact( - s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId)); - strm.write(reinterpret_cast<const char *>(&element), sizeof(element)); - } - for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) { - element = second_pass_compactor.Compact(s, aiter.Value()); - strm.write(reinterpret_cast<const char *>(&element), sizeof(element)); - } - } - strm.flush(); - if (!strm) { - LOG(ERROR) << "CompactFst write failed: " << opts.source; - return false; - } - return true; -} - - -// Specialization for CompactFst; see generic version in fst.h -// for sample usage (but use the CompactFst type!). This version -// should inline. -template <class A, class C, class U> -class StateIterator< CompactFst<A, C, U> > { - public: - typedef typename A::StateId StateId; - - explicit StateIterator(const CompactFst<A, C, U> &fst) - : nstates_(fst.GetImpl()->NumStates()), s_(0) {} - - bool Done() const { return s_ >= nstates_; } - - StateId Value() const { return s_; } - - void Next() { ++s_; } - - void Reset() { s_ = 0; } - - private: - StateId nstates_; - StateId s_; - - DISALLOW_COPY_AND_ASSIGN(StateIterator); -}; - -// Specialization for CompactFst. -// Never caches, always iterates over the underlying compact elements. -template <class A, class C, class U> -class ArcIterator< CompactFst<A, C, U> > { - public: - typedef typename A::StateId StateId; - typedef typename C::Element CompactElement; - - ArcIterator(const CompactFst<A, C, U> &fst, StateId s) - : compactor_(fst.GetImpl()->GetCompactor()), state_(s), compacts_(0), - pos_(0), flags_(kArcValueFlags) { - - const CompactFstData<CompactElement, U> *data = fst.GetImpl()->Data(); - size_t offset; - if (compactor_->Size() == -1) { // Variable out-degree compactor - offset = data->States(s); - num_arcs_ = data->States(s + 1) - offset; - } else { // Fixed out-degree compactor - offset = s * compactor_->Size(); - num_arcs_ = compactor_->Size(); - } - if (num_arcs_ > 0) { - compacts_ = &(data->Compacts(offset)); - arc_ = compactor_->Expand(s, *compacts_, kArcILabelValue); - if (arc_.ilabel == kNoStateId) { - ++compacts_; - --num_arcs_; - } - } - } - - ~ArcIterator() {} - - bool Done() const { return pos_ >= num_arcs_; } - - const A& Value() const { - arc_ = compactor_->Expand(state_, compacts_[pos_], flags_); - return arc_; - } - - void Next() { ++pos_; } - - size_t Position() const { return pos_; } - - void Reset() { pos_ = 0; } - - void Seek(size_t pos) { pos_ = pos; } - - uint32 Flags() const { return flags_; } - - void SetFlags(uint32 f, uint32 m) { - flags_ &= ~m; - flags_ |= (f & kArcValueFlags); - } - - private: - C *compactor_; - StateId state_; - const CompactElement *compacts_; - size_t pos_; - size_t num_arcs_; - mutable A arc_; - uint32 flags_; - - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - -// // Specialization for CompactFst. -// // This is an optionally caching arc iterator. -// // TODO(allauzen): implements the kArcValueFlags, the current -// // implementation only implements the kArcNoCache flag. -// template <class A, class C, class U> -// class ArcIterator< CompactFst<A, C, U> > { -// public: -// typedef typename A::StateId StateId; - -// ArcIterator(const CompactFst<A, C, U> &fst, StateId s) -// : fst_(fst), state_(s), pos_(0), num_arcs_(0), offset_(0), -// flags_(kArcValueFlags) { -// cache_data_.ref_count = 0; - -// if (fst_.GetImpl()->HasArcs(state_)) { -// fst_.GetImpl()->InitArcIterator(s, &cache_data_); -// num_arcs_ = cache_data_.narcs; -// return; -// } - -// const C *compactor = fst_.GetImpl()->GetCompactor(); -// const CompactFstData<A, C, U> *data = fst_.GetImpl()->Data(); -// if (compactor->Size() == -1) { // Variable out-degree compactor -// offset_ = data->States(s); -// num_arcs_ = data->States(s + 1) - offset_; -// } else { // Fixed out-degree compactor -// offset_ = s * compactor->Size(); -// num_arcs_ = compactor->Size(); -// } -// if (num_arcs_ > 0) { -// const A &arc = fst_.GetImpl()->ComputeArc(s, offset_); -// if (arc.ilabel == kNoStateId) { -// ++offset_; -// --num_arcs_; -// } -// } -// } - - -// ~ArcIterator() { -// if (cache_data_.ref_count) -// --(*cache_data_.ref_count); -// } - -// bool Done() const { return pos_ >= num_arcs_; } - -// const A& Value() const { -// if (cache_data_.ref_count == 0) { -// if (flags_ & kArcNoCache) { -// arc_ = fst_.GetImpl()->ComputeArc(state_, pos_ + offset_); -// return arc_; -// } else { -// fst_.GetImpl()->InitArcIterator(state_, &cache_data_); -// } -// } -// return cache_data_.arcs[pos_]; -// } - -// void Next() { ++pos_; } - -// size_t Position() const { return pos_; } - -// void Reset() { pos_ = 0; } - -// void Seek(size_t pos) { pos_ = pos; } - -// uint32 Flags() const { return flags_; } - -// void SetFlags(uint32 f, uint32 m) { -// flags_ &= ~m; -// flags_ |= f; - -// if (!(flags_ & kArcNoCache) && cache_data_.ref_count == 0) -// fst_.GetImpl()->InitArcIterator(state_, &cache_data_); -// } - -// private: -// mutable const CompactFst<A, C, U> &fst_; -// StateId state_; -// size_t pos_; -// size_t num_arcs_; -// size_t offset_; -// uint32 flags_; -// mutable A arc_; -// mutable ArcIteratorData<A> cache_data_; - -// DISALLOW_COPY_AND_ASSIGN(ArcIterator); -// }; - - -// -// Utility Compactors -// - -// Compactor for unweighted string FSTs -template <class A> -class StringCompactor { - public: - typedef A Arc; - typedef typename A::Label Element; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - Element Compact(StateId s, const A &arc) const { return arc.ilabel; } - - Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { - return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId); - } - - ssize_t Size() const { return 1; } - - uint64 Properties() const { - return kString | kAcceptor | kUnweighted; - } - - bool Compatible(const Fst<A> &fst) const { - uint64 props = Properties(); - return fst.Properties(props, true) == props; - } - - static const string &Type() { - static const string type = "string"; - return type; - } - - bool Write(ostream &strm) const { return true; } - - static StringCompactor *Read(istream &strm) { - return new StringCompactor; - } -}; - - -// Compactor for weighted string FSTs -template <class A> -class WeightedStringCompactor { - public: - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - typedef pair<Label, Weight> Element; - - Element Compact(StateId s, const A &arc) const { - return make_pair(arc.ilabel, arc.weight); - } - - Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { - return Arc(p.first, p.first, p.second, - p.first != kNoLabel ? s + 1 : kNoStateId); - } - - ssize_t Size() const { return 1;} - - uint64 Properties() const { - return kString | kAcceptor; - } - - bool Compatible(const Fst<A> &fst) const { - uint64 props = Properties(); - return fst.Properties(props, true) == props; - } - - static const string &Type() { - static const string type = "weighted_string"; - return type; - } - - bool Write(ostream &strm) const { return true; } - - static WeightedStringCompactor *Read(istream &strm) { - return new WeightedStringCompactor; - } -}; - - -// Compactor for unweighted acceptor FSTs -template <class A> -class UnweightedAcceptorCompactor { - public: - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - typedef pair<Label, StateId> Element; - - Element Compact(StateId s, const A &arc) const { - return make_pair(arc.ilabel, arc.nextstate); - } - - Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { - return Arc(p.first, p.first, Weight::One(), p.second); - } - - ssize_t Size() const { return -1;} - - uint64 Properties() const { - return kAcceptor | kUnweighted; - } - - bool Compatible(const Fst<A> &fst) const { - uint64 props = Properties(); - return fst.Properties(props, true) == props; - } - - static const string &Type() { - static const string type = "unweighted_acceptor"; - return type; - } - - bool Write(ostream &strm) const { return true; } - - static UnweightedAcceptorCompactor *Read(istream &istrm) { - return new UnweightedAcceptorCompactor; - } -}; - - -// Compactor for weighted acceptor FSTs -template <class A> -class AcceptorCompactor { - public: - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - typedef pair< pair<Label, Weight>, StateId > Element; - - Element Compact(StateId s, const A &arc) const { - return make_pair(make_pair(arc.ilabel, arc.weight), arc.nextstate); - } - - Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { - return Arc(p.first.first, p.first.first, p.first.second, p.second); - } - - ssize_t Size() const { return -1;} - - uint64 Properties() const { - return kAcceptor; - } - - bool Compatible(const Fst<A> &fst) const { - uint64 props = Properties(); - return fst.Properties(props, true) == props; - } - - static const string &Type() { - static const string type = "acceptor"; - return type; - } - - bool Write(ostream &strm) const { return true; } - - static AcceptorCompactor *Read(istream &strm) { - return new AcceptorCompactor; - } -}; - - -// Compactor for unweighted FSTs -template <class A> -class UnweightedCompactor { - public: - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - typedef pair< pair<Label, Label>, StateId > Element; - - Element Compact(StateId s, const A &arc) const { - return make_pair(make_pair(arc.ilabel, arc.olabel), arc.nextstate); - } - - Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { - return Arc(p.first.first, p.first.second, Weight::One(), p.second); - } - - ssize_t Size() const { return -1; } - - uint64 Properties() const { - return kUnweighted; - } - - bool Compatible(const Fst<A> &fst) const { - uint64 props = Properties(); - return fst.Properties(props, true) == props; - } - - static const string &Type() { - static const string type = "unweighted"; - return type; - } - - bool Write(ostream &strm) const { return true; } - - static UnweightedCompactor *Read(istream &strm) { - return new UnweightedCompactor; - } -}; - - -// Uselful aliases when using StdArc -typedef CompactFst< StdArc, StringCompactor<StdArc> > -StdCompactStringFst; -typedef CompactFst< StdArc, WeightedStringCompactor<StdArc> > -StdCompactWeightedStringFst; -typedef CompactFst<StdArc, AcceptorCompactor<StdArc> > -StdCompactAcceptorFst; -typedef CompactFst<StdArc, UnweightedCompactor<StdArc> > -StdCompactUnweightedFst; -typedef CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > -StdCompactUnweightedAcceptorFst; - -} // namespace fst - -#endif // FST_LIB_COMPACT_FST_H__ |