summaryrefslogtreecommitdiff
path: root/kaldi_io/src/tools/openfst/include/fst/compact-fst.h
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/compact-fst.h')
-rw-r--r--kaldi_io/src/tools/openfst/include/fst/compact-fst.h1438
1 files changed, 1438 insertions, 0 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/compact-fst.h b/kaldi_io/src/tools/openfst/include/fst/compact-fst.h
new file mode 100644
index 0000000..6db3317
--- /dev/null
+++ b/kaldi_io/src/tools/openfst/include/fst/compact-fst.h
@@ -0,0 +1,1438 @@
+// compact-fst.h
+
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: allauzen@google.com (Cyril Allauzen)
+//
+// \file
+// FST Class for memory-efficient representation of common types of
+// FSTs: linear automata, acceptors, unweighted FSTs, ...
+
+#ifndef FST_LIB_COMPACT_FST_H__
+#define FST_LIB_COMPACT_FST_H__
+
+#include <iterator>
+#include <utility>
+using std::pair; using std::make_pair;
+#include <vector>
+using std::vector;
+
+#include <fst/cache.h>
+#include <fst/expanded-fst.h>
+#include <fst/fst-decl.h> // For optional argument declarations
+#include <fst/mapped-file.h>
+#include <fst/matcher.h>
+#include <fst/test-properties.h>
+#include <fst/util.h>
+
+
+namespace fst {
+
+struct CompactFstOptions : public CacheOptions {
+ // CompactFst default caching behaviour is to do no caching. Most
+ // compactors are cheap and therefore we save memory by not doing
+ // caching.
+ CompactFstOptions() : CacheOptions(true, 0) {}
+ CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {}
+};
+
+// Compactor Interface - class determinies how arcs and final weights
+// are compacted and expanded.
+//
+// Final weights are treated as transitions to the superfinal state,
+// i.e. ilabel = olabel = kNoLabel and nextstate = kNoStateId.
+//
+// There are two types of compactors:
+//
+// * Fixed out-degree compactors: 'compactor.Size()' returns a
+// positive integer 's'. An FST can be compacted by this compactor
+// only if each state has exactly 's' outgoing transitions (counting a
+// non-Zero() final weight as a transition). A typical example is a
+// compactor for string FSTs, i.e. 's == 1'.
+//
+// * Variable out-degree compactors: 'compactor.Size() == -1'. There
+// are no out-degree restrictions for these compactors.
+//
+//
+// class Compactor {
+// public:
+// // Element is the type of the compacted transitions.
+// typedef ... Element;
+// // Return the compacted representation of a transition 'arc'
+// // at a state 's'.
+// Element Compact(StateId s, const Arc &arc);
+// // Return the transition at state 's' represented by the compacted
+// // transition 'e'.
+// Arc Expand(StateId s, const Element &e);
+// // Return -1 for variable out-degree compactors, and the mandatory
+// // out-degree otherwise.
+// ssize_t Size();
+// // Test whether 'fst' can be compacted by this compactor.
+// bool Compatible(const Fst<A> &fst);
+// // Return the properties that are always true for an fst
+// // compacted using this compactor
+// uint64 Properties();
+// // Return a string identifying the type of compactor.
+// static const string &Type();
+// // Write a compactor to a file.
+// bool Write(ostream &strm);
+// // Read a compactor from a file.
+// static Compactor *Read(istream &strm);
+// // Default constructor (optional, see comment below).
+// Compactor();
+// };
+//
+// The default constructor is only required for FST_REGISTER to work
+// (i.e. enabling Convert() and the command-line utilities to work
+// with this new compactor). However, a default constructor always
+// needs to be specify for this code to compile, but one can have it
+// simply raised an error when called:
+//
+// Compactor::Compactor() {
+// FSTERROR() << "Compactor: no default constructor";
+// }
+
+
+// Implementation data for Compact Fst, which can shared between otherwise
+// independent copies.
+//
+// The implementation contains two arrays: 'states_' and 'compacts_'.
+//
+// For fixed out-degree compactors, the 'states_' array is unallocated.
+// The 'compacts_' contains the compacted transitions. Its size is
+// 'ncompacts_'. The outgoing transitions at a given state are stored
+// consecutively. For a given state 's', its 'compactor.Size()' outgoing
+// transitions (including superfinal transition when 's' is final), are
+// stored in position ['s*compactor.Size()', '(s+1)*compactor_.Size()').
+//
+// For variable out-degree compactors, the states_ array has size
+// 'nstates_ + 1' and contains pointers to positions into 'compacts_'.
+// For a given state 's', the compacted transitions of 's' are
+// stored in positions [ 'states_[s]', 'states_[s + 1]' ) in 'compacts_'.
+// By convention, 'states_[nstates_] == ncompacts_'.
+//
+// In both cases, the superfinal transitons (when 's' is final, i.e.
+// 'Final(s) != Weight::Zero()') is stored first.
+//
+// The unsigned type U is used to represent indices into the compacts_
+// array.
+template <class E, class U>
+class CompactFstData {
+ public:
+ typedef E CompactElement;
+ typedef U Unsigned;
+
+ CompactFstData()
+ : states_region_(0),
+ compacts_region_(0),
+ states_(0),
+ compacts_(0),
+ nstates_(0),
+ ncompacts_(0),
+ narcs_(0),
+ start_(kNoStateId),
+ error_(false) {}
+
+ template <class A, class Compactor>
+ CompactFstData(const Fst<A> &fst, const Compactor &compactor);
+
+ template <class Iterator, class Compactor>
+ CompactFstData(const Iterator &begin, const Iterator &end,
+ const Compactor &compactor);
+
+ ~CompactFstData() {
+ if (states_region_ == NULL) {
+ delete [] states_;
+ }
+ delete states_region_;
+ if (compacts_region_ == NULL) {
+ delete [] compacts_;
+ }
+ delete compacts_region_;
+ }
+
+ template <class Compactor>
+ static CompactFstData<E, U> *Read(istream &strm,
+ const FstReadOptions &opts,
+ const FstHeader &hdr,
+ const Compactor &compactor);
+
+ bool Write(ostream &strm, const FstWriteOptions &opts) const;
+
+ Unsigned States(ssize_t i) const { return states_[i]; }
+ const CompactElement &Compacts(size_t i) const { return compacts_[i]; }
+ size_t NumStates() const { return nstates_; }
+ size_t NumCompacts() const { return ncompacts_; }
+ size_t NumArcs() const { return narcs_; }
+ ssize_t Start() const { return start_; }
+
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+ bool Error() const { return error_; }
+
+ private:
+ MappedFile *states_region_;
+ MappedFile *compacts_region_;
+ Unsigned *states_;
+ CompactElement *compacts_;
+ size_t nstates_;
+ size_t ncompacts_;
+ size_t narcs_;
+ ssize_t start_;
+ RefCounter ref_count_;
+ bool error_;
+};
+
+template <class E, class U>
+template <class A, class C>
+CompactFstData<E, U>::CompactFstData(const Fst<A> &fst, const C &compactor)
+ : states_region_(0),
+ compacts_region_(0),
+ states_(0),
+ compacts_(0),
+ nstates_(0),
+ ncompacts_(0),
+ narcs_(0),
+ start_(kNoStateId),
+ error_(false) {
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+ start_ = fst.Start();
+ // Count # of states and arcs.
+ StateId nfinals = 0;
+ for (StateIterator< Fst<A> > siter(fst);
+ !siter.Done();
+ siter.Next()) {
+ ++nstates_;
+ StateId s = siter.Value();
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next())
+ ++narcs_;
+ if (fst.Final(s) != Weight::Zero()) ++nfinals;
+ }
+ if (compactor.Size() == -1) {
+ states_ = new Unsigned[nstates_ + 1];
+ ncompacts_ = narcs_ + nfinals;
+ compacts_ = new CompactElement[ncompacts_];
+ states_[nstates_] = ncompacts_;
+ } else {
+ states_ = 0;
+ ncompacts_ = nstates_ * compactor.Size();
+ if ((narcs_ + nfinals) != ncompacts_) {
+ FSTERROR() << "CompactFstData: compactor incompatible with fst";
+ error_ = true;
+ return;
+ }
+ compacts_ = new CompactElement[ncompacts_];
+ }
+ size_t pos = 0, fpos = 0;
+ for (StateId s = 0; s < nstates_; ++s) {
+ fpos = pos;
+ if (compactor.Size() == -1)
+ states_[s] = pos;
+ if (fst.Final(s) != Weight::Zero())
+ compacts_[pos++] = compactor.Compact(s, A(kNoLabel, kNoLabel,
+ fst.Final(s), kNoStateId));
+ for (ArcIterator< Fst<A> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ compacts_[pos++] = compactor.Compact(s, aiter.Value());
+ }
+ if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size())) {
+ FSTERROR() << "CompactFstData: compactor incompatible with fst";
+ error_ = true;
+ return;
+ }
+ }
+ if (pos != ncompacts_) {
+ FSTERROR() << "CompactFstData: compactor incompatible with fst";
+ error_ = true;
+ return;
+ }
+}
+
+template <class E, class U>
+template <class Iterator, class C>
+CompactFstData<E, U>::CompactFstData(const Iterator &begin,
+ const Iterator &end,
+ const C &compactor)
+ : states_region_(0),
+ compacts_region_(0),
+ states_(0),
+ compacts_(0),
+ nstates_(0),
+ ncompacts_(0),
+ narcs_(0),
+ start_(kNoStateId),
+ error_(false) {
+ typedef typename C::Arc Arc;
+ typedef typename Arc::Weight Weight;
+ if (compactor.Size() != -1) {
+ ncompacts_ = distance(begin, end);
+ if (compactor.Size() == 1) {
+ // For strings, allow implicit final weight.
+ // Empty input is the empty string.
+ if (ncompacts_ == 0) {
+ ++ncompacts_;
+ } else {
+ Arc arc = compactor.Expand(ncompacts_ - 1,
+ *(begin + (ncompacts_ - 1)));
+ if (arc.ilabel != kNoLabel)
+ ++ncompacts_;
+ }
+ }
+ if (ncompacts_ % compactor.Size()) {
+ FSTERROR() << "CompactFstData: size of input container incompatible"
+ << " with compactor";
+ error_ = true;
+ return;
+ }
+ if (ncompacts_ == 0)
+ return;
+ start_ = 0;
+ nstates_ = ncompacts_ / compactor.Size();
+ compacts_ = new CompactElement[ncompacts_];
+ size_t i = 0;
+ Iterator it = begin;
+ for(; it != end; ++it, ++i){
+ compacts_[i] = *it;
+ if (compactor.Expand(i, *it).ilabel != kNoLabel)
+ ++narcs_;
+ }
+ if (i < ncompacts_)
+ compacts_[i] = compactor.Compact(i, Arc(kNoLabel, kNoLabel,
+ Weight::One(), kNoStateId));
+ } else {
+ if (distance(begin, end) == 0)
+ return;
+ // Count # of states, arcs and compacts.
+ Iterator it = begin;
+ for(size_t i = 0; it != end; ++it, ++i) {
+ Arc arc = compactor.Expand(i, *it);
+ if (arc.ilabel != kNoLabel) {
+ ++narcs_;
+ ++ncompacts_;
+ } else {
+ ++nstates_;
+ if (arc.weight != Weight::Zero())
+ ++ncompacts_;
+ }
+ }
+ start_ = 0;
+ compacts_ = new CompactElement[ncompacts_];
+ states_ = new Unsigned[nstates_ + 1];
+ states_[nstates_] = ncompacts_;
+ size_t i = 0, s = 0;
+ for(it = begin; it != end; ++it) {
+ Arc arc = compactor.Expand(i, *it);
+ if (arc.ilabel != kNoLabel) {
+ compacts_[i++] = *it;
+ } else {
+ states_[s++] = i;
+ if (arc.weight != Weight::Zero())
+ compacts_[i++] = *it;
+ }
+ }
+ if ((s != nstates_) || (i != ncompacts_)) {
+ FSTERROR() << "CompactFstData: ill-formed input container";
+ error_ = true;
+ return;
+ }
+ }
+}
+
+template <class E, class U>
+template <class C>
+CompactFstData<E, U> *CompactFstData<E, U>::Read(
+ istream &strm,
+ const FstReadOptions &opts,
+ const FstHeader &hdr,
+ const C &compactor) {
+ CompactFstData<E, U> *data = new CompactFstData<E, U>();
+ data->start_ = hdr.Start();
+ data->nstates_ = hdr.NumStates();
+ data->narcs_ = hdr.NumArcs();
+
+ if (compactor.Size() == -1) {
+ if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
+ LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
+ delete data;
+ return 0;
+ }
+ size_t b = (data->nstates_ + 1) * sizeof(Unsigned);
+ data->states_region_ = MappedFile::Map(&strm, opts, b);
+ if (!strm || data->states_region_ == NULL) {
+ LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
+ delete data;
+ return 0;
+ }
+ data->states_ = static_cast<Unsigned *>(
+ data->states_region_->mutable_data());
+ } else {
+ data->states_ = 0;
+ }
+ data->ncompacts_ = compactor.Size() == -1
+ ? data->states_[data->nstates_]
+ : data->nstates_ * compactor.Size();
+ if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
+ LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
+ delete data;
+ return 0;
+ }
+ size_t b = data->ncompacts_ * sizeof(CompactElement);
+ data->compacts_region_ = MappedFile::Map(&strm, opts, b);
+ if (!strm || data->compacts_region_ == NULL) {
+ LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
+ delete data;
+ return 0;
+ }
+ data->compacts_ = static_cast<CompactElement *>(
+ data->compacts_region_->mutable_data());
+ return data;
+}
+
+template<class E, class U>
+bool CompactFstData<E, U>::Write(ostream &strm,
+ const FstWriteOptions &opts) const {
+ if (states_) {
+ if (opts.align && !AlignOutput(strm)) {
+ LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
+ return false;
+ }
+ strm.write(reinterpret_cast<char *>(states_),
+ (nstates_ + 1) * sizeof(Unsigned));
+ }
+ if (opts.align && !AlignOutput(strm)) {
+ LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
+ return false;
+ }
+ strm.write(reinterpret_cast<char *>(compacts_),
+ ncompacts_ * sizeof(CompactElement));
+
+ strm.flush();
+ if (!strm) {
+ LOG(ERROR) << "CompactFst::Write: Write failed: " << opts.source;
+ return false;
+ }
+ return true;
+}
+
+template <class A, class C, class U> class CompactFst;
+template <class F, class G> void Cast(const F &, G *);
+
+// Implementation class for CompactFst, which contains CompactFstData
+// and Fst cache.
+template <class A, class C, class U>
+class CompactFstImpl : public CacheImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::Properties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+ using FstImpl<A>::WriteHeader;
+
+ using CacheImpl<A>::PushArc;
+ using CacheImpl<A>::HasArcs;
+ using CacheImpl<A>::HasFinal;
+ using CacheImpl<A>::HasStart;
+ using CacheImpl<A>::SetArcs;
+ using CacheImpl<A>::SetFinal;
+ using CacheImpl<A>::SetStart;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef C Compactor;
+ typedef typename C::Element CompactElement;
+ typedef U Unsigned;
+
+ CompactFstImpl()
+ : CacheImpl<A>(CompactFstOptions()),
+ compactor_(0),
+ own_compactor_(false),
+ data_(0) {
+ string type = "compact";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(U), &size);
+ type += size;
+ }
+ type += "_";
+ type += C::Type();
+ SetType(type);
+ SetProperties(kNullProperties | kStaticProperties);
+ }
+
+ CompactFstImpl(const Fst<Arc> &fst, const C &compactor,
+ const CompactFstOptions &opts)
+ : CacheImpl<A>(opts),
+ compactor_(new C(compactor)),
+ own_compactor_(true),
+ data_(0) {
+ Init(fst);
+ }
+
+ CompactFstImpl(const Fst<Arc> &fst, C *compactor,
+ const CompactFstOptions &opts)
+ : CacheImpl<A>(opts),
+ compactor_(compactor),
+ own_compactor_(false),
+ data_(0) {
+ Init(fst);
+ }
+
+ template <class Iterator>
+ CompactFstImpl(const Iterator &b, const Iterator &e, const C &compactor,
+ const CompactFstOptions &opts)
+ : CacheImpl<A>(opts),
+ compactor_(new C(compactor)),
+ own_compactor_(true),
+ data_(0) {
+ Init(b, e);
+ }
+
+ template <class Iterator>
+ CompactFstImpl(const Iterator &b, const Iterator &e, C *compactor,
+ const CompactFstOptions &opts)
+ : CacheImpl<A>(opts),
+ compactor_(compactor),
+ own_compactor_(false),
+ data_(0) {
+ Init(b, e);
+ }
+
+ CompactFstImpl(const CompactFstImpl<A, C, U> &impl)
+ : CacheImpl<A>(impl),
+ compactor_(new C(*impl.compactor_)),
+ own_compactor_(true),
+ data_(impl.data_) {
+ if (data_)
+ data_->IncrRefCount();
+ SetType(impl.Type());
+ SetProperties(impl.Properties());
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ ~CompactFstImpl(){
+ if (own_compactor_)
+ delete compactor_;
+ if (data_ && !data_->DecrRefCount())
+ delete data_;
+ }
+
+ StateId Start() {
+ if (!HasStart()) {
+ SetStart(data_->Start());
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (HasFinal(s))
+ return CacheImpl<A>::Final(s);
+ Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
+ if ((compactor_->Size() != -1) ||
+ (data_->States(s) != data_->States(s + 1)))
+ arc = ComputeArc(s,
+ compactor_->Size() == -1
+ ? data_->States(s)
+ : s * compactor_->Size());
+ return arc.ilabel == kNoLabel ? arc.weight : Weight::Zero();
+ }
+
+ StateId NumStates() const {
+ if (Properties(kError)) return 0;
+ return data_->NumStates();
+ }
+
+ size_t NumArcs(StateId s) {
+ if (HasArcs(s))
+ return CacheImpl<A>::NumArcs(s);
+ Unsigned i, num_arcs;
+ if (compactor_->Size() == -1) {
+ i = data_->States(s);
+ num_arcs = data_->States(s + 1) - i;
+ } else {
+ i = s * compactor_->Size();
+ num_arcs = compactor_->Size();
+ }
+ if (num_arcs > 0) {
+ const A &arc = ComputeArc(s, i, kArcILabelValue);
+ if (arc.ilabel == kNoStateId) {
+ --num_arcs;
+ }
+ }
+ return num_arcs;
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s) && !Properties(kILabelSorted))
+ Expand(s);
+ if (HasArcs(s))
+ return CacheImpl<A>::NumInputEpsilons(s);
+ return CountEpsilons(s, false);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s) && !Properties(kOLabelSorted))
+ Expand(s);
+ if (HasArcs(s))
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ return CountEpsilons(s, true);
+ }
+
+ size_t CountEpsilons(StateId s, bool output_epsilons) {
+ size_t begin = compactor_->Size() == -1 ?
+ data_->States(s) : s * compactor_->Size();
+ size_t end = compactor_->Size() == -1 ?
+ data_->States(s + 1) : (s + 1) * compactor_->Size();
+ size_t num_eps = 0;
+ for (size_t i = begin; i < end; ++i) {
+ const A &arc = ComputeArc(
+ s, i, output_epsilons ? kArcOLabelValue : kArcILabelValue);
+ const typename A::Label &label =
+ (output_epsilons ? arc.olabel : arc.ilabel);
+ if (label == kNoLabel)
+ continue;
+ else if (label > 0)
+ break;
+ ++num_eps;
+ }
+ return num_eps;
+ }
+
+ static CompactFstImpl<A, C, U> *Read(istream &strm,
+ const FstReadOptions &opts) {
+ CompactFstImpl<A, C, U> *impl = new CompactFstImpl<A, C, U>();
+ FstHeader hdr;
+ if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
+ delete impl;
+ return 0;
+ }
+
+ // Ensures compatibility
+ if (hdr.Version() == kAlignedFileVersion)
+ hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
+
+ impl->compactor_ = C::Read(strm);
+ if (!impl->compactor_) {
+ delete impl;
+ return 0;
+ }
+ impl->own_compactor_ = true;
+ impl->data_ = CompactFstData<CompactElement, U>::Read(strm, opts, hdr,
+ *impl->compactor_);
+ if (!impl->data_) {
+ delete impl;
+ return 0;
+ }
+ return impl;
+ }
+
+ bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ FstHeader hdr;
+ hdr.SetStart(data_->Start());
+ hdr.SetNumStates(data_->NumStates());
+ hdr.SetNumArcs(data_->NumArcs());
+
+ // Ensures compatibility
+ int file_version = opts.align ? kAlignedFileVersion : kFileVersion;
+ WriteHeader(strm, opts, file_version, &hdr);
+ compactor_->Write(strm);
+ return data_->Write(strm, opts);
+ }
+
+ // Provide information needed for generic state iterator
+ void InitStateIterator(StateIteratorData<A> *data) const {
+ data->base = 0;
+ data->nstates = data_->NumStates();
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+ Arc ComputeArc(StateId s, Unsigned i, uint32 f = kArcValueFlags) const {
+ return compactor_->Expand(s, data_->Compacts(i), f);
+ }
+
+ void Expand(StateId s) {
+ size_t begin = compactor_->Size() == -1 ?
+ data_->States(s) : s * compactor_->Size();
+ size_t end = compactor_->Size() == -1 ?
+ data_->States(s + 1) : (s + 1) * compactor_->Size();
+ for (size_t i = begin; i < end; ++i) {
+ const Arc &arc = ComputeArc(s, i);
+ if (arc.ilabel == kNoLabel)
+ SetFinal(s, arc.weight);
+ else
+ PushArc(s, arc);
+ }
+ if (!HasFinal(s))
+ SetFinal(s, Weight::Zero());
+ SetArcs(s);
+ }
+
+ template <class Iterator>
+ void SetCompactElements(const Iterator &b, const Iterator &e) {
+ if (data_ && !data_->DecrRefCount())
+ delete data_;
+ data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
+ }
+
+ C *GetCompactor() const { return compactor_; }
+ CompactFstData<CompactElement, U> *Data() const { return data_; }
+
+ // Properties always true of this Fst class
+ static const uint64 kStaticProperties = kExpanded;
+
+ protected:
+ template <class B, class D>
+ explicit CompactFstImpl(const CompactFstImpl<B, D, U> &impl)
+ : CacheImpl<A>(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())),
+ compactor_(new C(*impl.GetCompactor())),
+ own_compactor_(true),
+ data_(impl.Data()) {
+ if (data_)
+ data_->IncrRefCount();
+ SetType(impl.Type());
+ SetProperties(impl.Properties());
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ private:
+ friend class CompactFst<A, C, U>; // allow access during write.
+
+ void Init(const Fst<Arc> &fst) {
+ string type = "compact";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(U), &size);
+ type += size;
+ }
+ type += "_";
+ type += compactor_->Type();
+ SetType(type);
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ data_ = new CompactFstData<CompactElement, U>(fst, *compactor_);
+ if (data_->Error())
+ SetProperties(kError, kError);
+ uint64 copy_properties = fst.Properties(kCopyProperties, true);
+ if ((copy_properties & kError) || !compactor_->Compatible(fst)) {
+ FSTERROR() << "CompactFstImpl: input fst incompatible with compactor";
+ SetProperties(kError, kError);
+ return;
+ }
+ SetProperties(copy_properties | kStaticProperties);
+ }
+
+ template <class Iterator>
+ void Init(const Iterator &b, const Iterator &e) {
+ string type = "compact";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(U), &size);
+ type += size;
+ }
+ type += "_";
+ type += compactor_->Type();
+ SetType(type);
+ SetProperties(kStaticProperties | compactor_->Properties());
+ data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
+ if (data_->Error())
+ SetProperties(kError, kError);
+ }
+
+ // Current unaligned file format version
+ static const int kFileVersion = 2;
+ // Current aligned file format version
+ static const int kAlignedFileVersion = 1;
+ // Minimum file format version supported
+ static const int kMinFileVersion = 1;
+
+ C *compactor_;
+ bool own_compactor_;
+ CompactFstData<CompactElement, U> *data_;
+};
+
+template <class A, class C, class U>
+const uint64 CompactFstImpl<A, C, U>::kStaticProperties;
+template <class A, class C, class U>
+const int CompactFstImpl<A, C, U>::kFileVersion;
+template <class A, class C, class U>
+const int CompactFstImpl<A, C, U>::kAlignedFileVersion;
+template <class A, class C, class U>
+const int CompactFstImpl<A, C, U>::kMinFileVersion;
+
+
+// CompactFst. This class attaches interface to implementation and
+// handles reference counting, delegating most methods to
+// ImplToExpandedFst. The unsigned type U is used to represent indices
+// into the compact arc array (uint32 by default, declared in
+// fst-decl.h).
+template <class A, class C, class U>
+class CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > {
+ public:
+ friend class StateIterator< CompactFst<A, C, U> >;
+ friend class ArcIterator< CompactFst<A, C, U> >;
+ template <class F, class G> void friend Cast(const F &, G *);
+
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef CompactFstImpl<A, C, U> Impl;
+ typedef CacheState<A> State;
+ typedef U Unsigned;
+
+ CompactFst() : ImplToExpandedFst<Impl>(new Impl()) {}
+
+ explicit CompactFst(const Fst<A> &fst, const C &compactor = C(),
+ const CompactFstOptions &opts = CompactFstOptions())
+ : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
+
+ CompactFst(const Fst<A> &fst, C *compactor,
+ const CompactFstOptions &opts = CompactFstOptions())
+ : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
+
+ // The following 2 constructors take as input two iterators delimiting
+ // a set of (already) compacted transitions, starting with the
+ // transitions out of the initial state. The format of the input
+ // differs for fixed out-degree and variable out-degree compactors.
+ //
+ // - For fixed out-degree compactors, the final weight (encoded as a
+ // compacted transition) needs to be given only for final
+ // states. All strings (compactor of size 1) will be assume to be
+ // terminated by a final state even when the final state is not
+ // implicitely given.
+ //
+ // - For variable out-degree compactors, the final weight (encoded
+ // as a compacted transition) needs to be given for all states and
+ // must appeared first in the list (for state s, final weight of s,
+ // followed by outgoing transitons in s).
+ //
+ // These 2 constructors allows the direct construction of a CompactFst
+ // without first creating a more memory hungry 'regular' FST. This
+ // is useful when memory usage is severely constrained.
+ template <class Iterator>
+ explicit CompactFst(const Iterator &begin, const Iterator &end,
+ const C &compactor = C(),
+ const CompactFstOptions &opts = CompactFstOptions())
+ : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
+
+ template <class Iterator>
+ CompactFst(const Iterator &begin, const Iterator &end,
+ C *compactor, const CompactFstOptions &opts = CompactFstOptions())
+ : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
+
+ // See Fst<>::Copy() for doc.
+ CompactFst(const CompactFst<A, C, U> &fst, bool safe = false)
+ : ImplToExpandedFst<Impl>(fst, safe) {}
+
+ // Get a copy of this CompactFst. See Fst<>::Copy() for further doc.
+ virtual CompactFst<A, C, U> *Copy(bool safe = false) const {
+ return new CompactFst<A, C, U>(*this, safe);
+ }
+
+ // Read a CompactFst from an input stream; return NULL on error
+ static CompactFst<A, C, U> *Read(istream &strm, const FstReadOptions &opts) {
+ Impl* impl = Impl::Read(strm, opts);
+ return impl ? new CompactFst<A, C, U>(impl) : 0;
+ }
+
+ // Read a CompactFst from a file; return NULL on error
+ // Empty filename reads from standard input
+ static CompactFst<A, C, U> *Read(const string &filename) {
+ Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
+ return impl ? new CompactFst<A, C, U>(impl) : 0;
+ }
+
+ virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
+ return GetImpl()->Write(strm, opts);
+ }
+
+ virtual bool Write(const string &filename) const {
+ return Fst<A>::WriteFile(filename);
+ }
+
+ template <class F>
+ static bool WriteFst(const F &fst, const C &compactor, ostream &strm,
+ const FstWriteOptions &opts);
+
+ virtual void InitStateIterator(StateIteratorData<A> *data) const {
+ GetImpl()->InitStateIterator(data);
+ }
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ virtual MatcherBase<A> *InitMatcher(MatchType match_type) const {
+ return new SortedMatcher<CompactFst<A, C, U> >(*this, match_type);
+ }
+
+ template <class Iterator>
+ void SetCompactElements(const Iterator &b, const Iterator &e) {
+ GetImpl()->SetCompactElements(b, e);
+ }
+
+ private:
+ CompactFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
+
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }
+
+ void SetImpl(Impl *impl, bool own_impl = false) {
+ ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
+ }
+
+ // Use overloading to extract the type of the argument.
+ static Impl* GetImplIfCompactFst(const CompactFst<A, C, U> &compact_fst) {
+ return compact_fst.GetImpl();
+ }
+
+ // This does not give privileged treatment to subclasses of CompactFst.
+ template<typename NonCompactFst>
+ static Impl* GetImplIfCompactFst(const NonCompactFst& fst) {
+ return NULL;
+ }
+
+ void operator=(const CompactFst<A, C, U> &fst); // disallow
+};
+
+// Writes Fst in Compact format, potentially with a pass over the machine
+// before writing to compute the number of states and arcs.
+//
+template <class A, class C, class U>
+template <class F>
+bool CompactFst<A, C, U>::WriteFst(const F &fst,
+ const C &compactor,
+ ostream &strm,
+ const FstWriteOptions &opts) {
+ typedef U Unsigned;
+ typedef typename C::Element CompactElement;
+ typedef typename A::Weight Weight;
+ int file_version = opts.align ?
+ CompactFstImpl<A, C, U>::kAlignedFileVersion :
+ CompactFstImpl<A, C, U>::kFileVersion;
+ size_t num_arcs = -1, num_states = -1, num_compacts = -1;
+ C first_pass_compactor = compactor;
+ if (Impl* impl = GetImplIfCompactFst(fst)) {
+ num_arcs = impl->Data()->NumArcs();
+ num_states = impl->Data()->NumStates();
+ num_compacts = impl->Data()->NumCompacts();
+ first_pass_compactor = *impl->GetCompactor();
+ } else {
+ // A first pass is needed to compute the state of the compactor, which
+ // is saved ahead of the rest of the data structures. This unfortunately
+ // means forcing a complete double compaction when writing in this format.
+ // TODO(allauzen): eliminate mutable state from compactors.
+ num_arcs = 0;
+ num_states = 0;
+ for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
+ const StateId s = siter.Value();
+ ++num_states;
+ if (fst.Final(s) != Weight::Zero()) {
+ first_pass_compactor.Compact(
+ s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
+ }
+ for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
+ ++num_arcs;
+ first_pass_compactor.Compact(s, aiter.Value());
+ }
+ }
+ }
+ FstHeader hdr;
+ hdr.SetStart(fst.Start());
+ hdr.SetNumStates(num_states);
+ hdr.SetNumArcs(num_arcs);
+ string type = "compact";
+ if (sizeof(U) != sizeof(uint32)) {
+ string size;
+ Int64ToStr(8 * sizeof(U), &size);
+ type += size;