summaryrefslogtreecommitdiff
path: root/kaldi_io/src/tools/openfst/include/fst/compact-fst.h
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/compact-fst.h')
-rw-r--r--kaldi_io/src/tools/openfst/include/fst/compact-fst.h1438
1 files changed, 0 insertions, 1438 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/compact-fst.h b/kaldi_io/src/tools/openfst/include/fst/compact-fst.h
deleted file mode 100644
index 6db3317..0000000
--- a/kaldi_io/src/tools/openfst/include/fst/compact-fst.h
+++ /dev/null
@@ -1,1438 +0,0 @@
-// compact-fst.h
-
-
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Copyright 2005-2010 Google, Inc.
-// Author: allauzen@google.com (Cyril Allauzen)
-//
-// \file
-// FST Class for memory-efficient representation of common types of
-// FSTs: linear automata, acceptors, unweighted FSTs, ...
-
-#ifndef FST_LIB_COMPACT_FST_H__
-#define FST_LIB_COMPACT_FST_H__
-
-#include <iterator>
-#include <utility>
-using std::pair; using std::make_pair;
-#include <vector>
-using std::vector;
-
-#include <fst/cache.h>
-#include <fst/expanded-fst.h>
-#include <fst/fst-decl.h> // For optional argument declarations
-#include <fst/mapped-file.h>
-#include <fst/matcher.h>
-#include <fst/test-properties.h>
-#include <fst/util.h>
-
-
-namespace fst {
-
-struct CompactFstOptions : public CacheOptions {
- // CompactFst default caching behaviour is to do no caching. Most
- // compactors are cheap and therefore we save memory by not doing
- // caching.
- CompactFstOptions() : CacheOptions(true, 0) {}
- CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {}
-};
-
-// Compactor Interface - class determinies how arcs and final weights
-// are compacted and expanded.
-//
-// Final weights are treated as transitions to the superfinal state,
-// i.e. ilabel = olabel = kNoLabel and nextstate = kNoStateId.
-//
-// There are two types of compactors:
-//
-// * Fixed out-degree compactors: 'compactor.Size()' returns a
-// positive integer 's'. An FST can be compacted by this compactor
-// only if each state has exactly 's' outgoing transitions (counting a
-// non-Zero() final weight as a transition). A typical example is a
-// compactor for string FSTs, i.e. 's == 1'.
-//
-// * Variable out-degree compactors: 'compactor.Size() == -1'. There
-// are no out-degree restrictions for these compactors.
-//
-//
-// class Compactor {
-// public:
-// // Element is the type of the compacted transitions.
-// typedef ... Element;
-// // Return the compacted representation of a transition 'arc'
-// // at a state 's'.
-// Element Compact(StateId s, const Arc &arc);
-// // Return the transition at state 's' represented by the compacted
-// // transition 'e'.
-// Arc Expand(StateId s, const Element &e);
-// // Return -1 for variable out-degree compactors, and the mandatory
-// // out-degree otherwise.
-// ssize_t Size();
-// // Test whether 'fst' can be compacted by this compactor.
-// bool Compatible(const Fst<A> &fst);
-// // Return the properties that are always true for an fst
-// // compacted using this compactor
-// uint64 Properties();
-// // Return a string identifying the type of compactor.
-// static const string &Type();
-// // Write a compactor to a file.
-// bool Write(ostream &strm);
-// // Read a compactor from a file.
-// static Compactor *Read(istream &strm);
-// // Default constructor (optional, see comment below).
-// Compactor();
-// };
-//
-// The default constructor is only required for FST_REGISTER to work
-// (i.e. enabling Convert() and the command-line utilities to work
-// with this new compactor). However, a default constructor always
-// needs to be specify for this code to compile, but one can have it
-// simply raised an error when called:
-//
-// Compactor::Compactor() {
-// FSTERROR() << "Compactor: no default constructor";
-// }
-
-
-// Implementation data for Compact Fst, which can shared between otherwise
-// independent copies.
-//
-// The implementation contains two arrays: 'states_' and 'compacts_'.
-//
-// For fixed out-degree compactors, the 'states_' array is unallocated.
-// The 'compacts_' contains the compacted transitions. Its size is
-// 'ncompacts_'. The outgoing transitions at a given state are stored
-// consecutively. For a given state 's', its 'compactor.Size()' outgoing
-// transitions (including superfinal transition when 's' is final), are
-// stored in position ['s*compactor.Size()', '(s+1)*compactor_.Size()').
-//
-// For variable out-degree compactors, the states_ array has size
-// 'nstates_ + 1' and contains pointers to positions into 'compacts_'.
-// For a given state 's', the compacted transitions of 's' are
-// stored in positions [ 'states_[s]', 'states_[s + 1]' ) in 'compacts_'.
-// By convention, 'states_[nstates_] == ncompacts_'.
-//
-// In both cases, the superfinal transitons (when 's' is final, i.e.
-// 'Final(s) != Weight::Zero()') is stored first.
-//
-// The unsigned type U is used to represent indices into the compacts_
-// array.
-template <class E, class U>
-class CompactFstData {
- public:
- typedef E CompactElement;
- typedef U Unsigned;
-
- CompactFstData()
- : states_region_(0),
- compacts_region_(0),
- states_(0),
- compacts_(0),
- nstates_(0),
- ncompacts_(0),
- narcs_(0),
- start_(kNoStateId),
- error_(false) {}
-
- template <class A, class Compactor>
- CompactFstData(const Fst<A> &fst, const Compactor &compactor);
-
- template <class Iterator, class Compactor>
- CompactFstData(const Iterator &begin, const Iterator &end,
- const Compactor &compactor);
-
- ~CompactFstData() {
- if (states_region_ == NULL) {
- delete [] states_;
- }
- delete states_region_;
- if (compacts_region_ == NULL) {
- delete [] compacts_;
- }
- delete compacts_region_;
- }
-
- template <class Compactor>
- static CompactFstData<E, U> *Read(istream &strm,
- const FstReadOptions &opts,
- const FstHeader &hdr,
- const Compactor &compactor);
-
- bool Write(ostream &strm, const FstWriteOptions &opts) const;
-
- Unsigned States(ssize_t i) const { return states_[i]; }
- const CompactElement &Compacts(size_t i) const { return compacts_[i]; }
- size_t NumStates() const { return nstates_; }
- size_t NumCompacts() const { return ncompacts_; }
- size_t NumArcs() const { return narcs_; }
- ssize_t Start() const { return start_; }
-
- int RefCount() const { return ref_count_.count(); }
- int IncrRefCount() { return ref_count_.Incr(); }
- int DecrRefCount() { return ref_count_.Decr(); }
-
- bool Error() const { return error_; }
-
- private:
- MappedFile *states_region_;
- MappedFile *compacts_region_;
- Unsigned *states_;
- CompactElement *compacts_;
- size_t nstates_;
- size_t ncompacts_;
- size_t narcs_;
- ssize_t start_;
- RefCounter ref_count_;
- bool error_;
-};
-
-template <class E, class U>
-template <class A, class C>
-CompactFstData<E, U>::CompactFstData(const Fst<A> &fst, const C &compactor)
- : states_region_(0),
- compacts_region_(0),
- states_(0),
- compacts_(0),
- nstates_(0),
- ncompacts_(0),
- narcs_(0),
- start_(kNoStateId),
- error_(false) {
- typedef typename A::StateId StateId;
- typedef typename A::Weight Weight;
- start_ = fst.Start();
- // Count # of states and arcs.
- StateId nfinals = 0;
- for (StateIterator< Fst<A> > siter(fst);
- !siter.Done();
- siter.Next()) {
- ++nstates_;
- StateId s = siter.Value();
- for (ArcIterator< Fst<A> > aiter(fst, s);
- !aiter.Done();
- aiter.Next())
- ++narcs_;
- if (fst.Final(s) != Weight::Zero()) ++nfinals;
- }
- if (compactor.Size() == -1) {
- states_ = new Unsigned[nstates_ + 1];
- ncompacts_ = narcs_ + nfinals;
- compacts_ = new CompactElement[ncompacts_];
- states_[nstates_] = ncompacts_;
- } else {
- states_ = 0;
- ncompacts_ = nstates_ * compactor.Size();
- if ((narcs_ + nfinals) != ncompacts_) {
- FSTERROR() << "CompactFstData: compactor incompatible with fst";
- error_ = true;
- return;
- }
- compacts_ = new CompactElement[ncompacts_];
- }
- size_t pos = 0, fpos = 0;
- for (StateId s = 0; s < nstates_; ++s) {
- fpos = pos;
- if (compactor.Size() == -1)
- states_[s] = pos;
- if (fst.Final(s) != Weight::Zero())
- compacts_[pos++] = compactor.Compact(s, A(kNoLabel, kNoLabel,
- fst.Final(s), kNoStateId));
- for (ArcIterator< Fst<A> > aiter(fst, s);
- !aiter.Done();
- aiter.Next()) {
- compacts_[pos++] = compactor.Compact(s, aiter.Value());
- }
- if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size())) {
- FSTERROR() << "CompactFstData: compactor incompatible with fst";
- error_ = true;
- return;
- }
- }
- if (pos != ncompacts_) {
- FSTERROR() << "CompactFstData: compactor incompatible with fst";
- error_ = true;
- return;
- }
-}
-
-template <class E, class U>
-template <class Iterator, class C>
-CompactFstData<E, U>::CompactFstData(const Iterator &begin,
- const Iterator &end,
- const C &compactor)
- : states_region_(0),
- compacts_region_(0),
- states_(0),
- compacts_(0),
- nstates_(0),
- ncompacts_(0),
- narcs_(0),
- start_(kNoStateId),
- error_(false) {
- typedef typename C::Arc Arc;
- typedef typename Arc::Weight Weight;
- if (compactor.Size() != -1) {
- ncompacts_ = distance(begin, end);
- if (compactor.Size() == 1) {
- // For strings, allow implicit final weight.
- // Empty input is the empty string.
- if (ncompacts_ == 0) {
- ++ncompacts_;
- } else {
- Arc arc = compactor.Expand(ncompacts_ - 1,
- *(begin + (ncompacts_ - 1)));
- if (arc.ilabel != kNoLabel)
- ++ncompacts_;
- }
- }
- if (ncompacts_ % compactor.Size()) {
- FSTERROR() << "CompactFstData: size of input container incompatible"
- << " with compactor";
- error_ = true;
- return;
- }
- if (ncompacts_ == 0)
- return;
- start_ = 0;
- nstates_ = ncompacts_ / compactor.Size();
- compacts_ = new CompactElement[ncompacts_];
- size_t i = 0;
- Iterator it = begin;
- for(; it != end; ++it, ++i){
- compacts_[i] = *it;
- if (compactor.Expand(i, *it).ilabel != kNoLabel)
- ++narcs_;
- }
- if (i < ncompacts_)
- compacts_[i] = compactor.Compact(i, Arc(kNoLabel, kNoLabel,
- Weight::One(), kNoStateId));
- } else {
- if (distance(begin, end) == 0)
- return;
- // Count # of states, arcs and compacts.
- Iterator it = begin;
- for(size_t i = 0; it != end; ++it, ++i) {
- Arc arc = compactor.Expand(i, *it);
- if (arc.ilabel != kNoLabel) {
- ++narcs_;
- ++ncompacts_;
- } else {
- ++nstates_;
- if (arc.weight != Weight::Zero())
- ++ncompacts_;
- }
- }
- start_ = 0;
- compacts_ = new CompactElement[ncompacts_];
- states_ = new Unsigned[nstates_ + 1];
- states_[nstates_] = ncompacts_;
- size_t i = 0, s = 0;
- for(it = begin; it != end; ++it) {
- Arc arc = compactor.Expand(i, *it);
- if (arc.ilabel != kNoLabel) {
- compacts_[i++] = *it;
- } else {
- states_[s++] = i;
- if (arc.weight != Weight::Zero())
- compacts_[i++] = *it;
- }
- }
- if ((s != nstates_) || (i != ncompacts_)) {
- FSTERROR() << "CompactFstData: ill-formed input container";
- error_ = true;
- return;
- }
- }
-}
-
-template <class E, class U>
-template <class C>
-CompactFstData<E, U> *CompactFstData<E, U>::Read(
- istream &strm,
- const FstReadOptions &opts,
- const FstHeader &hdr,
- const C &compactor) {
- CompactFstData<E, U> *data = new CompactFstData<E, U>();
- data->start_ = hdr.Start();
- data->nstates_ = hdr.NumStates();
- data->narcs_ = hdr.NumArcs();
-
- if (compactor.Size() == -1) {
- if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
- LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
- delete data;
- return 0;
- }
- size_t b = (data->nstates_ + 1) * sizeof(Unsigned);
- data->states_region_ = MappedFile::Map(&strm, opts, b);
- if (!strm || data->states_region_ == NULL) {
- LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
- delete data;
- return 0;
- }
- data->states_ = static_cast<Unsigned *>(
- data->states_region_->mutable_data());
- } else {
- data->states_ = 0;
- }
- data->ncompacts_ = compactor.Size() == -1
- ? data->states_[data->nstates_]
- : data->nstates_ * compactor.Size();
- if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) {
- LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source;
- delete data;
- return 0;
- }
- size_t b = data->ncompacts_ * sizeof(CompactElement);
- data->compacts_region_ = MappedFile::Map(&strm, opts, b);
- if (!strm || data->compacts_region_ == NULL) {
- LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source;
- delete data;
- return 0;
- }
- data->compacts_ = static_cast<CompactElement *>(
- data->compacts_region_->mutable_data());
- return data;
-}
-
-template<class E, class U>
-bool CompactFstData<E, U>::Write(ostream &strm,
- const FstWriteOptions &opts) const {
- if (states_) {
- if (opts.align && !AlignOutput(strm)) {
- LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
- return false;
- }
- strm.write(reinterpret_cast<char *>(states_),
- (nstates_ + 1) * sizeof(Unsigned));
- }
- if (opts.align && !AlignOutput(strm)) {
- LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source;
- return false;
- }
- strm.write(reinterpret_cast<char *>(compacts_),
- ncompacts_ * sizeof(CompactElement));
-
- strm.flush();
- if (!strm) {
- LOG(ERROR) << "CompactFst::Write: Write failed: " << opts.source;
- return false;
- }
- return true;
-}
-
-template <class A, class C, class U> class CompactFst;
-template <class F, class G> void Cast(const F &, G *);
-
-// Implementation class for CompactFst, which contains CompactFstData
-// and Fst cache.
-template <class A, class C, class U>
-class CompactFstImpl : public CacheImpl<A> {
- public:
- using FstImpl<A>::SetType;
- using FstImpl<A>::SetProperties;
- using FstImpl<A>::Properties;
- using FstImpl<A>::SetInputSymbols;
- using FstImpl<A>::SetOutputSymbols;
- using FstImpl<A>::WriteHeader;
-
- using CacheImpl<A>::PushArc;
- using CacheImpl<A>::HasArcs;
- using CacheImpl<A>::HasFinal;
- using CacheImpl<A>::HasStart;
- using CacheImpl<A>::SetArcs;
- using CacheImpl<A>::SetFinal;
- using CacheImpl<A>::SetStart;
-
- typedef A Arc;
- typedef typename A::Weight Weight;
- typedef typename A::StateId StateId;
- typedef C Compactor;
- typedef typename C::Element CompactElement;
- typedef U Unsigned;
-
- CompactFstImpl()
- : CacheImpl<A>(CompactFstOptions()),
- compactor_(0),
- own_compactor_(false),
- data_(0) {
- string type = "compact";
- if (sizeof(U) != sizeof(uint32)) {
- string size;
- Int64ToStr(8 * sizeof(U), &size);
- type += size;
- }
- type += "_";
- type += C::Type();
- SetType(type);
- SetProperties(kNullProperties | kStaticProperties);
- }
-
- CompactFstImpl(const Fst<Arc> &fst, const C &compactor,
- const CompactFstOptions &opts)
- : CacheImpl<A>(opts),
- compactor_(new C(compactor)),
- own_compactor_(true),
- data_(0) {
- Init(fst);
- }
-
- CompactFstImpl(const Fst<Arc> &fst, C *compactor,
- const CompactFstOptions &opts)
- : CacheImpl<A>(opts),
- compactor_(compactor),
- own_compactor_(false),
- data_(0) {
- Init(fst);
- }
-
- template <class Iterator>
- CompactFstImpl(const Iterator &b, const Iterator &e, const C &compactor,
- const CompactFstOptions &opts)
- : CacheImpl<A>(opts),
- compactor_(new C(compactor)),
- own_compactor_(true),
- data_(0) {
- Init(b, e);
- }
-
- template <class Iterator>
- CompactFstImpl(const Iterator &b, const Iterator &e, C *compactor,
- const CompactFstOptions &opts)
- : CacheImpl<A>(opts),
- compactor_(compactor),
- own_compactor_(false),
- data_(0) {
- Init(b, e);
- }
-
- CompactFstImpl(const CompactFstImpl<A, C, U> &impl)
- : CacheImpl<A>(impl),
- compactor_(new C(*impl.compactor_)),
- own_compactor_(true),
- data_(impl.data_) {
- if (data_)
- data_->IncrRefCount();
- SetType(impl.Type());
- SetProperties(impl.Properties());
- SetInputSymbols(impl.InputSymbols());
- SetOutputSymbols(impl.OutputSymbols());
- }
-
- ~CompactFstImpl(){
- if (own_compactor_)
- delete compactor_;
- if (data_ && !data_->DecrRefCount())
- delete data_;
- }
-
- StateId Start() {
- if (!HasStart()) {
- SetStart(data_->Start());
- }
- return CacheImpl<A>::Start();
- }
-
- Weight Final(StateId s) {
- if (HasFinal(s))
- return CacheImpl<A>::Final(s);
- Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId);
- if ((compactor_->Size() != -1) ||
- (data_->States(s) != data_->States(s + 1)))
- arc = ComputeArc(s,
- compactor_->Size() == -1
- ? data_->States(s)
- : s * compactor_->Size());
- return arc.ilabel == kNoLabel ? arc.weight : Weight::Zero();
- }
-
- StateId NumStates() const {
- if (Properties(kError)) return 0;
- return data_->NumStates();
- }
-
- size_t NumArcs(StateId s) {
- if (HasArcs(s))
- return CacheImpl<A>::NumArcs(s);
- Unsigned i, num_arcs;
- if (compactor_->Size() == -1) {
- i = data_->States(s);
- num_arcs = data_->States(s + 1) - i;
- } else {
- i = s * compactor_->Size();
- num_arcs = compactor_->Size();
- }
- if (num_arcs > 0) {
- const A &arc = ComputeArc(s, i, kArcILabelValue);
- if (arc.ilabel == kNoStateId) {
- --num_arcs;
- }
- }
- return num_arcs;
- }
-
- size_t NumInputEpsilons(StateId s) {
- if (!HasArcs(s) && !Properties(kILabelSorted))
- Expand(s);
- if (HasArcs(s))
- return CacheImpl<A>::NumInputEpsilons(s);
- return CountEpsilons(s, false);
- }
-
- size_t NumOutputEpsilons(StateId s) {
- if (!HasArcs(s) && !Properties(kOLabelSorted))
- Expand(s);
- if (HasArcs(s))
- return CacheImpl<A>::NumOutputEpsilons(s);
- return CountEpsilons(s, true);
- }
-
- size_t CountEpsilons(StateId s, bool output_epsilons) {
- size_t begin = compactor_->Size() == -1 ?
- data_->States(s) : s * compactor_->Size();
- size_t end = compactor_->Size() == -1 ?
- data_->States(s + 1) : (s + 1) * compactor_->Size();
- size_t num_eps = 0;
- for (size_t i = begin; i < end; ++i) {
- const A &arc = ComputeArc(
- s, i, output_epsilons ? kArcOLabelValue : kArcILabelValue);
- const typename A::Label &label =
- (output_epsilons ? arc.olabel : arc.ilabel);
- if (label == kNoLabel)
- continue;
- else if (label > 0)
- break;
- ++num_eps;
- }
- return num_eps;
- }
-
- static CompactFstImpl<A, C, U> *Read(istream &strm,
- const FstReadOptions &opts) {
- CompactFstImpl<A, C, U> *impl = new CompactFstImpl<A, C, U>();
- FstHeader hdr;
- if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) {
- delete impl;
- return 0;
- }
-
- // Ensures compatibility
- if (hdr.Version() == kAlignedFileVersion)
- hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED);
-
- impl->compactor_ = C::Read(strm);
- if (!impl->compactor_) {
- delete impl;
- return 0;
- }
- impl->own_compactor_ = true;
- impl->data_ = CompactFstData<CompactElement, U>::Read(strm, opts, hdr,
- *impl->compactor_);
- if (!impl->data_) {
- delete impl;
- return 0;
- }
- return impl;
- }
-
- bool Write(ostream &strm, const FstWriteOptions &opts) const {
- FstHeader hdr;
- hdr.SetStart(data_->Start());
- hdr.SetNumStates(data_->NumStates());
- hdr.SetNumArcs(data_->NumArcs());
-
- // Ensures compatibility
- int file_version = opts.align ? kAlignedFileVersion : kFileVersion;
- WriteHeader(strm, opts, file_version, &hdr);
- compactor_->Write(strm);
- return data_->Write(strm, opts);
- }
-
- // Provide information needed for generic state iterator
- void InitStateIterator(StateIteratorData<A> *data) const {
- data->base = 0;
- data->nstates = data_->NumStates();
- }
-
- void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
- if (!HasArcs(s))
- Expand(s);
- CacheImpl<A>::InitArcIterator(s, data);
- }
-
- Arc ComputeArc(StateId s, Unsigned i, uint32 f = kArcValueFlags) const {
- return compactor_->Expand(s, data_->Compacts(i), f);
- }
-
- void Expand(StateId s) {
- size_t begin = compactor_->Size() == -1 ?
- data_->States(s) : s * compactor_->Size();
- size_t end = compactor_->Size() == -1 ?
- data_->States(s + 1) : (s + 1) * compactor_->Size();
- for (size_t i = begin; i < end; ++i) {
- const Arc &arc = ComputeArc(s, i);
- if (arc.ilabel == kNoLabel)
- SetFinal(s, arc.weight);
- else
- PushArc(s, arc);
- }
- if (!HasFinal(s))
- SetFinal(s, Weight::Zero());
- SetArcs(s);
- }
-
- template <class Iterator>
- void SetCompactElements(const Iterator &b, const Iterator &e) {
- if (data_ && !data_->DecrRefCount())
- delete data_;
- data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
- }
-
- C *GetCompactor() const { return compactor_; }
- CompactFstData<CompactElement, U> *Data() const { return data_; }
-
- // Properties always true of this Fst class
- static const uint64 kStaticProperties = kExpanded;
-
- protected:
- template <class B, class D>
- explicit CompactFstImpl(const CompactFstImpl<B, D, U> &impl)
- : CacheImpl<A>(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())),
- compactor_(new C(*impl.GetCompactor())),
- own_compactor_(true),
- data_(impl.Data()) {
- if (data_)
- data_->IncrRefCount();
- SetType(impl.Type());
- SetProperties(impl.Properties());
- SetInputSymbols(impl.InputSymbols());
- SetOutputSymbols(impl.OutputSymbols());
- }
-
- private:
- friend class CompactFst<A, C, U>; // allow access during write.
-
- void Init(const Fst<Arc> &fst) {
- string type = "compact";
- if (sizeof(U) != sizeof(uint32)) {
- string size;
- Int64ToStr(8 * sizeof(U), &size);
- type += size;
- }
- type += "_";
- type += compactor_->Type();
- SetType(type);
- SetInputSymbols(fst.InputSymbols());
- SetOutputSymbols(fst.OutputSymbols());
- data_ = new CompactFstData<CompactElement, U>(fst, *compactor_);
- if (data_->Error())
- SetProperties(kError, kError);
- uint64 copy_properties = fst.Properties(kCopyProperties, true);
- if ((copy_properties & kError) || !compactor_->Compatible(fst)) {
- FSTERROR() << "CompactFstImpl: input fst incompatible with compactor";
- SetProperties(kError, kError);
- return;
- }
- SetProperties(copy_properties | kStaticProperties);
- }
-
- template <class Iterator>
- void Init(const Iterator &b, const Iterator &e) {
- string type = "compact";
- if (sizeof(U) != sizeof(uint32)) {
- string size;
- Int64ToStr(8 * sizeof(U), &size);
- type += size;
- }
- type += "_";
- type += compactor_->Type();
- SetType(type);
- SetProperties(kStaticProperties | compactor_->Properties());
- data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_);
- if (data_->Error())
- SetProperties(kError, kError);
- }
-
- // Current unaligned file format version
- static const int kFileVersion = 2;
- // Current aligned file format version
- static const int kAlignedFileVersion = 1;
- // Minimum file format version supported
- static const int kMinFileVersion = 1;
-
- C *compactor_;
- bool own_compactor_;
- CompactFstData<CompactElement, U> *data_;
-};
-
-template <class A, class C, class U>
-const uint64 CompactFstImpl<A, C, U>::kStaticProperties;
-template <class A, class C, class U>
-const int CompactFstImpl<A, C, U>::kFileVersion;
-template <class A, class C, class U>
-const int CompactFstImpl<A, C, U>::kAlignedFileVersion;
-template <class A, class C, class U>
-const int CompactFstImpl<A, C, U>::kMinFileVersion;
-
-
-// CompactFst. This class attaches interface to implementation and
-// handles reference counting, delegating most methods to
-// ImplToExpandedFst. The unsigned type U is used to represent indices
-// into the compact arc array (uint32 by default, declared in
-// fst-decl.h).
-template <class A, class C, class U>
-class CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > {
- public:
- friend class StateIterator< CompactFst<A, C, U> >;
- friend class ArcIterator< CompactFst<A, C, U> >;
- template <class F, class G> void friend Cast(const F &, G *);
-
- typedef A Arc;
- typedef typename A::StateId StateId;
- typedef CompactFstImpl<A, C, U> Impl;
- typedef CacheState<A> State;
- typedef U Unsigned;
-
- CompactFst() : ImplToExpandedFst<Impl>(new Impl()) {}
-
- explicit CompactFst(const Fst<A> &fst, const C &compactor = C(),
- const CompactFstOptions &opts = CompactFstOptions())
- : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
-
- CompactFst(const Fst<A> &fst, C *compactor,
- const CompactFstOptions &opts = CompactFstOptions())
- : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {}
-
- // The following 2 constructors take as input two iterators delimiting
- // a set of (already) compacted transitions, starting with the
- // transitions out of the initial state. The format of the input
- // differs for fixed out-degree and variable out-degree compactors.
- //
- // - For fixed out-degree compactors, the final weight (encoded as a
- // compacted transition) needs to be given only for final
- // states. All strings (compactor of size 1) will be assume to be
- // terminated by a final state even when the final state is not
- // implicitely given.
- //
- // - For variable out-degree compactors, the final weight (encoded
- // as a compacted transition) needs to be given for all states and
- // must appeared first in the list (for state s, final weight of s,
- // followed by outgoing transitons in s).
- //
- // These 2 constructors allows the direct construction of a CompactFst
- // without first creating a more memory hungry 'regular' FST. This
- // is useful when memory usage is severely constrained.
- template <class Iterator>
- explicit CompactFst(const Iterator &begin, const Iterator &end,
- const C &compactor = C(),
- const CompactFstOptions &opts = CompactFstOptions())
- : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
-
- template <class Iterator>
- CompactFst(const Iterator &begin, const Iterator &end,
- C *compactor, const CompactFstOptions &opts = CompactFstOptions())
- : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {}
-
- // See Fst<>::Copy() for doc.
- CompactFst(const CompactFst<A, C, U> &fst, bool safe = false)
- : ImplToExpandedFst<Impl>(fst, safe) {}
-
- // Get a copy of this CompactFst. See Fst<>::Copy() for further doc.
- virtual CompactFst<A, C, U> *Copy(bool safe = false) const {
- return new CompactFst<A, C, U>(*this, safe);
- }
-
- // Read a CompactFst from an input stream; return NULL on error
- static CompactFst<A, C, U> *Read(istream &strm, const FstReadOptions &opts) {
- Impl* impl = Impl::Read(strm, opts);
- return impl ? new CompactFst<A, C, U>(impl) : 0;
- }
-
- // Read a CompactFst from a file; return NULL on error
- // Empty filename reads from standard input
- static CompactFst<A, C, U> *Read(const string &filename) {
- Impl* impl = ImplToExpandedFst<Impl>::Read(filename);
- return impl ? new CompactFst<A, C, U>(impl) : 0;
- }
-
- virtual bool Write(ostream &strm, const FstWriteOptions &opts) const {
- return GetImpl()->Write(strm, opts);
- }
-
- virtual bool Write(const string &filename) const {
- return Fst<A>::WriteFile(filename);
- }
-
- template <class F>
- static bool WriteFst(const F &fst, const C &compactor, ostream &strm,
- const FstWriteOptions &opts);
-
- virtual void InitStateIterator(StateIteratorData<A> *data) const {
- GetImpl()->InitStateIterator(data);
- }
-
- virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
- GetImpl()->InitArcIterator(s, data);
- }
-
- virtual MatcherBase<A> *InitMatcher(MatchType match_type) const {
- return new SortedMatcher<CompactFst<A, C, U> >(*this, match_type);
- }
-
- template <class Iterator>
- void SetCompactElements(const Iterator &b, const Iterator &e) {
- GetImpl()->SetCompactElements(b, e);
- }
-
- private:
- CompactFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {}
-
- // Makes visible to friends.
- Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); }
-
- void SetImpl(Impl *impl, bool own_impl = false) {
- ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl);
- }
-
- // Use overloading to extract the type of the argument.
- static Impl* GetImplIfCompactFst(const CompactFst<A, C, U> &compact_fst) {
- return compact_fst.GetImpl();
- }
-
- // This does not give privileged treatment to subclasses of CompactFst.
- template<typename NonCompactFst>
- static Impl* GetImplIfCompactFst(const NonCompactFst& fst) {
- return NULL;
- }
-
- void operator=(const CompactFst<A, C, U> &fst); // disallow
-};
-
-// Writes Fst in Compact format, potentially with a pass over the machine
-// before writing to compute the number of states and arcs.
-//
-template <class A, class C, class U>
-template <class F>
-bool CompactFst<A, C, U>::WriteFst(const F &fst,
- const C &compactor,
- ostream &strm,
- const FstWriteOptions &opts) {
- typedef U Unsigned;
- typedef typename C::Element CompactElement;
- typedef typename A::Weight Weight;
- int file_version = opts.align ?
- CompactFstImpl<A, C, U>::kAlignedFileVersion :
- CompactFstImpl<A, C, U>::kFileVersion;
- size_t num_arcs = -1, num_states = -1, num_compacts = -1;
- C first_pass_compactor = compactor;
- if (Impl* impl = GetImplIfCompactFst(fst)) {
- num_arcs = impl->Data()->NumArcs();
- num_states = impl->Data()->NumStates();
- num_compacts = impl->Data()->NumCompacts();
- first_pass_compactor = *impl->GetCompactor();
- } else {
- // A first pass is needed to compute the state of the compactor, which
- // is saved ahead of the rest of the data structures. This unfortunately
- // means forcing a complete double compaction when writing in this format.
- // TODO(allauzen): eliminate mutable state from compactors.
- num_arcs = 0;
- num_states = 0;
- for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) {
- const StateId s = siter.Value();
- ++num_states;
- if (fst.Final(s) != Weight::Zero()) {
- first_pass_compactor.Compact(
- s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId));
- }
- for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) {
- ++num_arcs;
- first_pass_compactor.Compact(s, aiter.Value());
- }
- }
- }
- FstHeader hdr;
- hdr.SetStart(fst.Start());
- hdr.SetNumStates(num_states);
- hdr.SetNumArcs(num_arcs);
- string type = "compact";
- if (sizeof(U) != sizeof(uint32)) {
- string size;
- Int64ToStr(8 * sizeof(U), &size);
- type += size;
- }
- type += "_";
- type += C::Type();
- uint64 copy_properties = fst.Properties(kCopyProperties, true);
- if ((copy_properties & kError) || !compactor.Compatible(fst)) {
- LOG(ERROR) << "fst incompatible with compactor";
- return false;
- }
- uint64 properties = copy_properties |
- CompactFstImpl<A, C, U>::kStaticProperties;
- FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, properties,
- &hdr);
- first_pass_compactor.Write(strm);