From 96a32415ab43377cf1575bd3f4f2980f58028209 Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 14 Aug 2015 11:51:42 +0800 Subject: add implementation for kaldi io (by ymz) --- .../src/tools/openfst/include/fst/compact-fst.h | 1438 ++++++++++++++++++++ 1 file changed, 1438 insertions(+) create mode 100644 kaldi_io/src/tools/openfst/include/fst/compact-fst.h (limited to 'kaldi_io/src/tools/openfst/include/fst/compact-fst.h') diff --git a/kaldi_io/src/tools/openfst/include/fst/compact-fst.h b/kaldi_io/src/tools/openfst/include/fst/compact-fst.h new file mode 100644 index 0000000..6db3317 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/compact-fst.h @@ -0,0 +1,1438 @@ +// compact-fst.h + + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: allauzen@google.com (Cyril Allauzen) +// +// \file +// FST Class for memory-efficient representation of common types of +// FSTs: linear automata, acceptors, unweighted FSTs, ... + +#ifndef FST_LIB_COMPACT_FST_H__ +#define FST_LIB_COMPACT_FST_H__ + +#include +#include +using std::pair; using std::make_pair; +#include +using std::vector; + +#include +#include +#include // For optional argument declarations +#include +#include +#include +#include + + +namespace fst { + +struct CompactFstOptions : public CacheOptions { + // CompactFst default caching behaviour is to do no caching. Most + // compactors are cheap and therefore we save memory by not doing + // caching. + CompactFstOptions() : CacheOptions(true, 0) {} + CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {} +}; + +// Compactor Interface - class determinies how arcs and final weights +// are compacted and expanded. +// +// Final weights are treated as transitions to the superfinal state, +// i.e. ilabel = olabel = kNoLabel and nextstate = kNoStateId. +// +// There are two types of compactors: +// +// * Fixed out-degree compactors: 'compactor.Size()' returns a +// positive integer 's'. An FST can be compacted by this compactor +// only if each state has exactly 's' outgoing transitions (counting a +// non-Zero() final weight as a transition). A typical example is a +// compactor for string FSTs, i.e. 's == 1'. +// +// * Variable out-degree compactors: 'compactor.Size() == -1'. There +// are no out-degree restrictions for these compactors. +// +// +// class Compactor { +// public: +// // Element is the type of the compacted transitions. +// typedef ... Element; +// // Return the compacted representation of a transition 'arc' +// // at a state 's'. +// Element Compact(StateId s, const Arc &arc); +// // Return the transition at state 's' represented by the compacted +// // transition 'e'. +// Arc Expand(StateId s, const Element &e); +// // Return -1 for variable out-degree compactors, and the mandatory +// // out-degree otherwise. +// ssize_t Size(); +// // Test whether 'fst' can be compacted by this compactor. +// bool Compatible(const Fst &fst); +// // Return the properties that are always true for an fst +// // compacted using this compactor +// uint64 Properties(); +// // Return a string identifying the type of compactor. +// static const string &Type(); +// // Write a compactor to a file. +// bool Write(ostream &strm); +// // Read a compactor from a file. +// static Compactor *Read(istream &strm); +// // Default constructor (optional, see comment below). +// Compactor(); +// }; +// +// The default constructor is only required for FST_REGISTER to work +// (i.e. enabling Convert() and the command-line utilities to work +// with this new compactor). However, a default constructor always +// needs to be specify for this code to compile, but one can have it +// simply raised an error when called: +// +// Compactor::Compactor() { +// FSTERROR() << "Compactor: no default constructor"; +// } + + +// Implementation data for Compact Fst, which can shared between otherwise +// independent copies. +// +// The implementation contains two arrays: 'states_' and 'compacts_'. +// +// For fixed out-degree compactors, the 'states_' array is unallocated. +// The 'compacts_' contains the compacted transitions. Its size is +// 'ncompacts_'. The outgoing transitions at a given state are stored +// consecutively. For a given state 's', its 'compactor.Size()' outgoing +// transitions (including superfinal transition when 's' is final), are +// stored in position ['s*compactor.Size()', '(s+1)*compactor_.Size()'). +// +// For variable out-degree compactors, the states_ array has size +// 'nstates_ + 1' and contains pointers to positions into 'compacts_'. +// For a given state 's', the compacted transitions of 's' are +// stored in positions [ 'states_[s]', 'states_[s + 1]' ) in 'compacts_'. +// By convention, 'states_[nstates_] == ncompacts_'. +// +// In both cases, the superfinal transitons (when 's' is final, i.e. +// 'Final(s) != Weight::Zero()') is stored first. +// +// The unsigned type U is used to represent indices into the compacts_ +// array. +template +class CompactFstData { + public: + typedef E CompactElement; + typedef U Unsigned; + + CompactFstData() + : states_region_(0), + compacts_region_(0), + states_(0), + compacts_(0), + nstates_(0), + ncompacts_(0), + narcs_(0), + start_(kNoStateId), + error_(false) {} + + template + CompactFstData(const Fst &fst, const Compactor &compactor); + + template + CompactFstData(const Iterator &begin, const Iterator &end, + const Compactor &compactor); + + ~CompactFstData() { + if (states_region_ == NULL) { + delete [] states_; + } + delete states_region_; + if (compacts_region_ == NULL) { + delete [] compacts_; + } + delete compacts_region_; + } + + template + static CompactFstData *Read(istream &strm, + const FstReadOptions &opts, + const FstHeader &hdr, + const Compactor &compactor); + + bool Write(ostream &strm, const FstWriteOptions &opts) const; + + Unsigned States(ssize_t i) const { return states_[i]; } + const CompactElement &Compacts(size_t i) const { return compacts_[i]; } + size_t NumStates() const { return nstates_; } + size_t NumCompacts() const { return ncompacts_; } + size_t NumArcs() const { return narcs_; } + ssize_t Start() const { return start_; } + + int RefCount() const { return ref_count_.count(); } + int IncrRefCount() { return ref_count_.Incr(); } + int DecrRefCount() { return ref_count_.Decr(); } + + bool Error() const { return error_; } + + private: + MappedFile *states_region_; + MappedFile *compacts_region_; + Unsigned *states_; + CompactElement *compacts_; + size_t nstates_; + size_t ncompacts_; + size_t narcs_; + ssize_t start_; + RefCounter ref_count_; + bool error_; +}; + +template +template +CompactFstData::CompactFstData(const Fst &fst, const C &compactor) + : states_region_(0), + compacts_region_(0), + states_(0), + compacts_(0), + nstates_(0), + ncompacts_(0), + narcs_(0), + start_(kNoStateId), + error_(false) { + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + start_ = fst.Start(); + // Count # of states and arcs. + StateId nfinals = 0; + for (StateIterator< Fst > siter(fst); + !siter.Done(); + siter.Next()) { + ++nstates_; + StateId s = siter.Value(); + for (ArcIterator< Fst > aiter(fst, s); + !aiter.Done(); + aiter.Next()) + ++narcs_; + if (fst.Final(s) != Weight::Zero()) ++nfinals; + } + if (compactor.Size() == -1) { + states_ = new Unsigned[nstates_ + 1]; + ncompacts_ = narcs_ + nfinals; + compacts_ = new CompactElement[ncompacts_]; + states_[nstates_] = ncompacts_; + } else { + states_ = 0; + ncompacts_ = nstates_ * compactor.Size(); + if ((narcs_ + nfinals) != ncompacts_) { + FSTERROR() << "CompactFstData: compactor incompatible with fst"; + error_ = true; + return; + } + compacts_ = new CompactElement[ncompacts_]; + } + size_t pos = 0, fpos = 0; + for (StateId s = 0; s < nstates_; ++s) { + fpos = pos; + if (compactor.Size() == -1) + states_[s] = pos; + if (fst.Final(s) != Weight::Zero()) + compacts_[pos++] = compactor.Compact(s, A(kNoLabel, kNoLabel, + fst.Final(s), kNoStateId)); + for (ArcIterator< Fst > aiter(fst, s); + !aiter.Done(); + aiter.Next()) { + compacts_[pos++] = compactor.Compact(s, aiter.Value()); + } + if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size())) { + FSTERROR() << "CompactFstData: compactor incompatible with fst"; + error_ = true; + return; + } + } + if (pos != ncompacts_) { + FSTERROR() << "CompactFstData: compactor incompatible with fst"; + error_ = true; + return; + } +} + +template +template +CompactFstData::CompactFstData(const Iterator &begin, + const Iterator &end, + const C &compactor) + : states_region_(0), + compacts_region_(0), + states_(0), + compacts_(0), + nstates_(0), + ncompacts_(0), + narcs_(0), + start_(kNoStateId), + error_(false) { + typedef typename C::Arc Arc; + typedef typename Arc::Weight Weight; + if (compactor.Size() != -1) { + ncompacts_ = distance(begin, end); + if (compactor.Size() == 1) { + // For strings, allow implicit final weight. + // Empty input is the empty string. + if (ncompacts_ == 0) { + ++ncompacts_; + } else { + Arc arc = compactor.Expand(ncompacts_ - 1, + *(begin + (ncompacts_ - 1))); + if (arc.ilabel != kNoLabel) + ++ncompacts_; + } + } + if (ncompacts_ % compactor.Size()) { + FSTERROR() << "CompactFstData: size of input container incompatible" + << " with compactor"; + error_ = true; + return; + } + if (ncompacts_ == 0) + return; + start_ = 0; + nstates_ = ncompacts_ / compactor.Size(); + compacts_ = new CompactElement[ncompacts_]; + size_t i = 0; + Iterator it = begin; + for(; it != end; ++it, ++i){ + compacts_[i] = *it; + if (compactor.Expand(i, *it).ilabel != kNoLabel) + ++narcs_; + } + if (i < ncompacts_) + compacts_[i] = compactor.Compact(i, Arc(kNoLabel, kNoLabel, + Weight::One(), kNoStateId)); + } else { + if (distance(begin, end) == 0) + return; + // Count # of states, arcs and compacts. + Iterator it = begin; + for(size_t i = 0; it != end; ++it, ++i) { + Arc arc = compactor.Expand(i, *it); + if (arc.ilabel != kNoLabel) { + ++narcs_; + ++ncompacts_; + } else { + ++nstates_; + if (arc.weight != Weight::Zero()) + ++ncompacts_; + } + } + start_ = 0; + compacts_ = new CompactElement[ncompacts_]; + states_ = new Unsigned[nstates_ + 1]; + states_[nstates_] = ncompacts_; + size_t i = 0, s = 0; + for(it = begin; it != end; ++it) { + Arc arc = compactor.Expand(i, *it); + if (arc.ilabel != kNoLabel) { + compacts_[i++] = *it; + } else { + states_[s++] = i; + if (arc.weight != Weight::Zero()) + compacts_[i++] = *it; + } + } + if ((s != nstates_) || (i != ncompacts_)) { + FSTERROR() << "CompactFstData: ill-formed input container"; + error_ = true; + return; + } + } +} + +template +template +CompactFstData *CompactFstData::Read( + istream &strm, + const FstReadOptions &opts, + const FstHeader &hdr, + const C &compactor) { + CompactFstData *data = new CompactFstData(); + data->start_ = hdr.Start(); + data->nstates_ = hdr.NumStates(); + data->narcs_ = hdr.NumArcs(); + + if (compactor.Size() == -1) { + if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { + LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source; + delete data; + return 0; + } + size_t b = (data->nstates_ + 1) * sizeof(Unsigned); + data->states_region_ = MappedFile::Map(&strm, opts, b); + if (!strm || data->states_region_ == NULL) { + LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source; + delete data; + return 0; + } + data->states_ = static_cast( + data->states_region_->mutable_data()); + } else { + data->states_ = 0; + } + data->ncompacts_ = compactor.Size() == -1 + ? data->states_[data->nstates_] + : data->nstates_ * compactor.Size(); + if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { + LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source; + delete data; + return 0; + } + size_t b = data->ncompacts_ * sizeof(CompactElement); + data->compacts_region_ = MappedFile::Map(&strm, opts, b); + if (!strm || data->compacts_region_ == NULL) { + LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source; + delete data; + return 0; + } + data->compacts_ = static_cast( + data->compacts_region_->mutable_data()); + return data; +} + +template +bool CompactFstData::Write(ostream &strm, + const FstWriteOptions &opts) const { + if (states_) { + if (opts.align && !AlignOutput(strm)) { + LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; + return false; + } + strm.write(reinterpret_cast(states_), + (nstates_ + 1) * sizeof(Unsigned)); + } + if (opts.align && !AlignOutput(strm)) { + LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; + return false; + } + strm.write(reinterpret_cast(compacts_), + ncompacts_ * sizeof(CompactElement)); + + strm.flush(); + if (!strm) { + LOG(ERROR) << "CompactFst::Write: Write failed: " << opts.source; + return false; + } + return true; +} + +template class CompactFst; +template void Cast(const F &, G *); + +// Implementation class for CompactFst, which contains CompactFstData +// and Fst cache. +template +class CompactFstImpl : public CacheImpl { + public: + using FstImpl::SetType; + using FstImpl::SetProperties; + using FstImpl::Properties; + using FstImpl::SetInputSymbols; + using FstImpl::SetOutputSymbols; + using FstImpl::WriteHeader; + + using CacheImpl::PushArc; + using CacheImpl::HasArcs; + using CacheImpl::HasFinal; + using CacheImpl::HasStart; + using CacheImpl::SetArcs; + using CacheImpl::SetFinal; + using CacheImpl::SetStart; + + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef C Compactor; + typedef typename C::Element CompactElement; + typedef U Unsigned; + + CompactFstImpl() + : CacheImpl(CompactFstOptions()), + compactor_(0), + own_compactor_(false), + data_(0) { + string type = "compact"; + if (sizeof(U) != sizeof(uint32)) { + string size; + Int64ToStr(8 * sizeof(U), &size); + type += size; + } + type += "_"; + type += C::Type(); + SetType(type); + SetProperties(kNullProperties | kStaticProperties); + } + + CompactFstImpl(const Fst &fst, const C &compactor, + const CompactFstOptions &opts) + : CacheImpl(opts), + compactor_(new C(compactor)), + own_compactor_(true), + data_(0) { + Init(fst); + } + + CompactFstImpl(const Fst &fst, C *compactor, + const CompactFstOptions &opts) + : CacheImpl(opts), + compactor_(compactor), + own_compactor_(false), + data_(0) { + Init(fst); + } + + template + CompactFstImpl(const Iterator &b, const Iterator &e, const C &compactor, + const CompactFstOptions &opts) + : CacheImpl(opts), + compactor_(new C(compactor)), + own_compactor_(true), + data_(0) { + Init(b, e); + } + + template + CompactFstImpl(const Iterator &b, const Iterator &e, C *compactor, + const CompactFstOptions &opts) + : CacheImpl(opts), + compactor_(compactor), + own_compactor_(false), + data_(0) { + Init(b, e); + } + + CompactFstImpl(const CompactFstImpl &impl) + : CacheImpl(impl), + compactor_(new C(*impl.compactor_)), + own_compactor_(true), + data_(impl.data_) { + if (data_) + data_->IncrRefCount(); + SetType(impl.Type()); + SetProperties(impl.Properties()); + SetInputSymbols(impl.InputSymbols()); + SetOutputSymbols(impl.OutputSymbols()); + } + + ~CompactFstImpl(){ + if (own_compactor_) + delete compactor_; + if (data_ && !data_->DecrRefCount()) + delete data_; + } + + StateId Start() { + if (!HasStart()) { + SetStart(data_->Start()); + } + return CacheImpl::Start(); + } + + Weight Final(StateId s) { + if (HasFinal(s)) + return CacheImpl::Final(s); + Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId); + if ((compactor_->Size() != -1) || + (data_->States(s) != data_->States(s + 1))) + arc = ComputeArc(s, + compactor_->Size() == -1 + ? data_->States(s) + : s * compactor_->Size()); + return arc.ilabel == kNoLabel ? arc.weight : Weight::Zero(); + } + + StateId NumStates() const { + if (Properties(kError)) return 0; + return data_->NumStates(); + } + + size_t NumArcs(StateId s) { + if (HasArcs(s)) + return CacheImpl::NumArcs(s); + Unsigned i, num_arcs; + if (compactor_->Size() == -1) { + i = data_->States(s); + num_arcs = data_->States(s + 1) - i; + } else { + i = s * compactor_->Size(); + num_arcs = compactor_->Size(); + } + if (num_arcs > 0) { + const A &arc = ComputeArc(s, i, kArcILabelValue); + if (arc.ilabel == kNoStateId) { + --num_arcs; + } + } + return num_arcs; + } + + size_t NumInputEpsilons(StateId s) { + if (!HasArcs(s) && !Properties(kILabelSorted)) + Expand(s); + if (HasArcs(s)) + return CacheImpl::NumInputEpsilons(s); + return CountEpsilons(s, false); + } + + size_t NumOutputEpsilons(StateId s) { + if (!HasArcs(s) && !Properties(kOLabelSorted)) + Expand(s); + if (HasArcs(s)) + return CacheImpl::NumOutputEpsilons(s); + return CountEpsilons(s, true); + } + + size_t CountEpsilons(StateId s, bool output_epsilons) { + size_t begin = compactor_->Size() == -1 ? + data_->States(s) : s * compactor_->Size(); + size_t end = compactor_->Size() == -1 ? + data_->States(s + 1) : (s + 1) * compactor_->Size(); + size_t num_eps = 0; + for (size_t i = begin; i < end; ++i) { + const A &arc = ComputeArc( + s, i, output_epsilons ? kArcOLabelValue : kArcILabelValue); + const typename A::Label &label = + (output_epsilons ? arc.olabel : arc.ilabel); + if (label == kNoLabel) + continue; + else if (label > 0) + break; + ++num_eps; + } + return num_eps; + } + + static CompactFstImpl *Read(istream &strm, + const FstReadOptions &opts) { + CompactFstImpl *impl = new CompactFstImpl(); + FstHeader hdr; + if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) { + delete impl; + return 0; + } + + // Ensures compatibility + if (hdr.Version() == kAlignedFileVersion) + hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED); + + impl->compactor_ = C::Read(strm); + if (!impl->compactor_) { + delete impl; + return 0; + } + impl->own_compactor_ = true; + impl->data_ = CompactFstData::Read(strm, opts, hdr, + *impl->compactor_); + if (!impl->data_) { + delete impl; + return 0; + } + return impl; + } + + bool Write(ostream &strm, const FstWriteOptions &opts) const { + FstHeader hdr; + hdr.SetStart(data_->Start()); + hdr.SetNumStates(data_->NumStates()); + hdr.SetNumArcs(data_->NumArcs()); + + // Ensures compatibility + int file_version = opts.align ? kAlignedFileVersion : kFileVersion; + WriteHeader(strm, opts, file_version, &hdr); + compactor_->Write(strm); + return data_->Write(strm, opts); + } + + // Provide information needed for generic state iterator + void InitStateIterator(StateIteratorData *data) const { + data->base = 0; + data->nstates = data_->NumStates(); + } + + void InitArcIterator(StateId s, ArcIteratorData *data) { + if (!HasArcs(s)) + Expand(s); + CacheImpl::InitArcIterator(s, data); + } + + Arc ComputeArc(StateId s, Unsigned i, uint32 f = kArcValueFlags) const { + return compactor_->Expand(s, data_->Compacts(i), f); + } + + void Expand(StateId s) { + size_t begin = compactor_->Size() == -1 ? + data_->States(s) : s * compactor_->Size(); + size_t end = compactor_->Size() == -1 ? + data_->States(s + 1) : (s + 1) * compactor_->Size(); + for (size_t i = begin; i < end; ++i) { + const Arc &arc = ComputeArc(s, i); + if (arc.ilabel == kNoLabel) + SetFinal(s, arc.weight); + else + PushArc(s, arc); + } + if (!HasFinal(s)) + SetFinal(s, Weight::Zero()); + SetArcs(s); + } + + template + void SetCompactElements(const Iterator &b, const Iterator &e) { + if (data_ && !data_->DecrRefCount()) + delete data_; + data_ = new CompactFstData(b, e, *compactor_); + } + + C *GetCompactor() const { return compactor_; } + CompactFstData *Data() const { return data_; } + + // Properties always true of this Fst class + static const uint64 kStaticProperties = kExpanded; + + protected: + template + explicit CompactFstImpl(const CompactFstImpl &impl) + : CacheImpl(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())), + compactor_(new C(*impl.GetCompactor())), + own_compactor_(true), + data_(impl.Data()) { + if (data_) + data_->IncrRefCount(); + SetType(impl.Type()); + SetProperties(impl.Properties()); + SetInputSymbols(impl.InputSymbols()); + SetOutputSymbols(impl.OutputSymbols()); + } + + private: + friend class CompactFst; // allow access during write. + + void Init(const Fst &fst) { + string type = "compact"; + if (sizeof(U) != sizeof(uint32)) { + string size; + Int64ToStr(8 * sizeof(U), &size); + type += size; + } + type += "_"; + type += compactor_->Type(); + SetType(type); + SetInputSymbols(fst.InputSymbols()); + SetOutputSymbols(fst.OutputSymbols()); + data_ = new CompactFstData(fst, *compactor_); + if (data_->Error()) + SetProperties(kError, kError); + uint64 copy_properties = fst.Properties(kCopyProperties, true); + if ((copy_properties & kError) || !compactor_->Compatible(fst)) { + FSTERROR() << "CompactFstImpl: input fst incompatible with compactor"; + SetProperties(kError, kError); + return; + } + SetProperties(copy_properties | kStaticProperties); + } + + template + void Init(const Iterator &b, const Iterator &e) { + string type = "compact"; + if (sizeof(U) != sizeof(uint32)) { + string size; + Int64ToStr(8 * sizeof(U), &size); + type += size; + } + type += "_"; + type += compactor_->Type(); + SetType(type); + SetProperties(kStaticProperties | compactor_->Properties()); + data_ = new CompactFstData(b, e, *compactor_); + if (data_->Error()) + SetProperties(kError, kError); + } + + // Current unaligned file format version + static const int kFileVersion = 2; + // Current aligned file format version + static const int kAlignedFileVersion = 1; + // Minimum file format version supported + static const int kMinFileVersion = 1; + + C *compactor_; + bool own_compactor_; + CompactFstData *data_; +}; + +template +const uint64 CompactFstImpl::kStaticProperties; +template +const int CompactFstImpl::kFileVersion; +template +const int CompactFstImpl::kAlignedFileVersion; +template +const int CompactFstImpl::kMinFileVersion; + + +// CompactFst. This class attaches interface to implementation and +// handles reference counting, delegating most methods to +// ImplToExpandedFst. The unsigned type U is used to represent indices +// into the compact arc array (uint32 by default, declared in +// fst-decl.h). +template +class CompactFst : public ImplToExpandedFst< CompactFstImpl > { + public: + friend class StateIterator< CompactFst >; + friend class ArcIterator< CompactFst >; + template void friend Cast(const F &, G *); + + typedef A Arc; + typedef typename A::StateId StateId; + typedef CompactFstImpl Impl; + typedef CacheState State; + typedef U Unsigned; + + CompactFst() : ImplToExpandedFst(new Impl()) {} + + explicit CompactFst(const Fst &fst, const C &compactor = C(), + const CompactFstOptions &opts = CompactFstOptions()) + : ImplToExpandedFst(new Impl(fst, compactor, opts)) {} + + CompactFst(const Fst &fst, C *compactor, + const CompactFstOptions &opts = CompactFstOptions()) + : ImplToExpandedFst(new Impl(fst, compactor, opts)) {} + + // The following 2 constructors take as input two iterators delimiting + // a set of (already) compacted transitions, starting with the + // transitions out of the initial state. The format of the input + // differs for fixed out-degree and variable out-degree compactors. + // + // - For fixed out-degree compactors, the final weight (encoded as a + // compacted transition) needs to be given only for final + // states. All strings (compactor of size 1) will be assume to be + // terminated by a final state even when the final state is not + // implicitely given. + // + // - For variable out-degree compactors, the final weight (encoded + // as a compacted transition) needs to be given for all states and + // must appeared first in the list (for state s, final weight of s, + // followed by outgoing transitons in s). + // + // These 2 constructors allows the direct construction of a CompactFst + // without first creating a more memory hungry 'regular' FST. This + // is useful when memory usage is severely constrained. + template + explicit CompactFst(const Iterator &begin, const Iterator &end, + const C &compactor = C(), + const CompactFstOptions &opts = CompactFstOptions()) + : ImplToExpandedFst(new Impl(begin, end, compactor, opts)) {} + + template + CompactFst(const Iterator &begin, const Iterator &end, + C *compactor, const CompactFstOptions &opts = CompactFstOptions()) + : ImplToExpandedFst(new Impl(begin, end, compactor, opts)) {} + + // See Fst<>::Copy() for doc. + CompactFst(const CompactFst &fst, bool safe = false) + : ImplToExpandedFst(fst, safe) {} + + // Get a copy of this CompactFst. See Fst<>::Copy() for further doc. + virtual CompactFst *Copy(bool safe = false) const { + return new CompactFst(*this, safe); + } + + // Read a CompactFst from an input stream; return NULL on error + static CompactFst *Read(istream &strm, const FstReadOptions &opts) { + Impl* impl = Impl::Read(strm, opts); + return impl ? new CompactFst(impl) : 0; + } + + // Read a CompactFst from a file; return NULL on error + // Empty filename reads from standard input + static CompactFst *Read(const string &filename) { + Impl* impl = ImplToExpandedFst::Read(filename); + return impl ? new CompactFst(impl) : 0; + } + + virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { + return GetImpl()->Write(strm, opts); + } + + virtual bool Write(const string &filename) const { + return Fst::WriteFile(filename); + } + + template + static bool WriteFst(const F &fst, const C &compactor, ostream &strm, + const FstWriteOptions &opts); + + virtual void InitStateIterator(StateIteratorData *data) const { + GetImpl()->InitStateIterator(data); + } + + virtual void InitArcIterator(StateId s, ArcIteratorData *data) const { + GetImpl()->InitArcIterator(s, data); + } + + virtual MatcherBase *InitMatcher(MatchType match_type) const { + return new SortedMatcher >(*this, match_type); + } + + template + void SetCompactElements(const Iterator &b, const Iterator &e) { + GetImpl()->SetCompactElements(b, e); + } + + private: + CompactFst(Impl *impl) : ImplToExpandedFst(impl) {} + + // Makes visible to friends. + Impl *GetImpl() const { return ImplToFst >::GetImpl(); } + + void SetImpl(Impl *impl, bool own_impl = false) { + ImplToFst< Impl, ExpandedFst >::SetImpl(impl, own_impl); + } + + // Use overloading to extract the type of the argument. + static Impl* GetImplIfCompactFst(const CompactFst &compact_fst) { + return compact_fst.GetImpl(); + } + + // This does not give privileged treatment to subclasses of CompactFst. + template + static Impl* GetImplIfCompactFst(const NonCompactFst& fst) { + return NULL; + } + + void operator=(const CompactFst &fst); // disallow +}; + +// Writes Fst in Compact format, potentially with a pass over the machine +// before writing to compute the number of states and arcs. +// +template +template +bool CompactFst::WriteFst(const F &fst, + const C &compactor, + ostream &strm, + const FstWriteOptions &opts) { + typedef U Unsigned; + typedef typename C::Element CompactElement; + typedef typename A::Weight Weight; + int file_version = opts.align ? + CompactFstImpl::kAlignedFileVersion : + CompactFstImpl::kFileVersion; + size_t num_arcs = -1, num_states = -1, num_compacts = -1; + C first_pass_compactor = compactor; + if (Impl* impl = GetImplIfCompactFst(fst)) { + num_arcs = impl->Data()->NumArcs(); + num_states = impl->Data()->NumStates(); + num_compacts = impl->Data()->NumCompacts(); + first_pass_compactor = *impl->GetCompactor(); + } else { + // A first pass is needed to compute the state of the compactor, which + // is saved ahead of the rest of the data structures. This unfortunately + // means forcing a complete double compaction when writing in this format. + // TODO(allauzen): eliminate mutable state from compactors. + num_arcs = 0; + num_states = 0; + for (StateIterator siter(fst); !siter.Done(); siter.Next()) { + const StateId s = siter.Value(); + ++num_states; + if (fst.Final(s) != Weight::Zero()) { + first_pass_compactor.Compact( + s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId)); + } + for (ArcIterator aiter(fst, s); !aiter.Done(); aiter.Next()) { + ++num_arcs; + first_pass_compactor.Compact(s, aiter.Value()); + } + } + } + FstHeader hdr; + hdr.SetStart(fst.Start()); + hdr.SetNumStates(num_states); + hdr.SetNumArcs(num_arcs); + string type = "compact"; + if (sizeof(U) != sizeof(uint32)) { + string size; + Int64ToStr(8 * sizeof(U), &size); + type += size; + } + type += "_"; + type += C::Type(); + uint64 copy_properties = fst.Properties(kCopyProperties, true); + if ((copy_properties & kError) || !compactor.Compatible(fst)) { + LOG(ERROR) << "fst incompatible with compactor"; + return false; + } + uint64 properties = copy_properties | + CompactFstImpl::kStaticProperties; + FstImpl::WriteFstHeader(fst, strm, opts, file_version, type, properties, + &hdr); + first_pass_compactor.Write(strm); + if (first_pass_compactor.Size() == -1) { + if (opts.align && !AlignOutput(strm)) { + LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; + return false; + } + Unsigned compacts = 0; + for (StateIterator siter(fst); !siter.Done(); siter.Next()) { + const StateId s = siter.Value(); + strm.write(reinterpret_cast(&compacts), sizeof(compacts)); + if (fst.Final(s) != Weight::Zero()) { + ++compacts; + } + compacts += fst.NumArcs(s); + } + strm.write(reinterpret_cast(&compacts), sizeof(compacts)); + } + if (opts.align && !AlignOutput(strm)) { + LOG(ERROR) << "Could not align file during write after writing states"; + } + C second_pass_compactor = compactor; + CompactElement element; + for (StateIterator siter(fst); !siter.Done(); siter.Next()) { + const StateId s = siter.Value(); + if (fst.Final(s) != Weight::Zero()) { + element = second_pass_compactor.Compact( + s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId)); + strm.write(reinterpret_cast(&element), sizeof(element)); + } + for (ArcIterator aiter(fst, s); !aiter.Done(); aiter.Next()) { + element = second_pass_compactor.Compact(s, aiter.Value()); + strm.write(reinterpret_cast(&element), sizeof(element)); + } + } + strm.flush(); + if (!strm) { + LOG(ERROR) << "CompactFst write failed: " << opts.source; + return false; + } + return true; +} + + +// Specialization for CompactFst; see generic version in fst.h +// for sample usage (but use the CompactFst type!). This version +// should inline. +template +class StateIterator< CompactFst > { + public: + typedef typename A::StateId StateId; + + explicit StateIterator(const CompactFst &fst) + : nstates_(fst.GetImpl()->NumStates()), s_(0) {} + + bool Done() const { return s_ >= nstates_; } + + StateId Value() const { return s_; } + + void Next() { ++s_; } + + void Reset() { s_ = 0; } + + private: + StateId nstates_; + StateId s_; + + DISALLOW_COPY_AND_ASSIGN(StateIterator); +}; + +// Specialization for CompactFst. +// Never caches, always iterates over the underlying compact elements. +template +class ArcIterator< CompactFst > { + public: + typedef typename A::StateId StateId; + typedef typename C::Element CompactElement; + + ArcIterator(const CompactFst &fst, StateId s) + : compactor_(fst.GetImpl()->GetCompactor()), state_(s), compacts_(0), + pos_(0), flags_(kArcValueFlags) { + + const CompactFstData *data = fst.GetImpl()->Data(); + size_t offset; + if (compactor_->Size() == -1) { // Variable out-degree compactor + offset = data->States(s); + num_arcs_ = data->States(s + 1) - offset; + } else { // Fixed out-degree compactor + offset = s * compactor_->Size(); + num_arcs_ = compactor_->Size(); + } + if (num_arcs_ > 0) { + compacts_ = &(data->Compacts(offset)); + arc_ = compactor_->Expand(s, *compacts_, kArcILabelValue); + if (arc_.ilabel == kNoStateId) { + ++compacts_; + --num_arcs_; + } + } + } + + ~ArcIterator() {} + + bool Done() const { return pos_ >= num_arcs_; } + + const A& Value() const { + arc_ = compactor_->Expand(state_, compacts_[pos_], flags_); + return arc_; + } + + void Next() { ++pos_; } + + size_t Position() const { return pos_; } + + void Reset() { pos_ = 0; } + + void Seek(size_t pos) { pos_ = pos; } + + uint32 Flags() const { return flags_; } + + void SetFlags(uint32 f, uint32 m) { + flags_ &= ~m; + flags_ |= (f & kArcValueFlags); + } + + private: + C *compactor_; + StateId state_; + const CompactElement *compacts_; + size_t pos_; + size_t num_arcs_; + mutable A arc_; + uint32 flags_; + + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + +// // Specialization for CompactFst. +// // This is an optionally caching arc iterator. +// // TODO(allauzen): implements the kArcValueFlags, the current +// // implementation only implements the kArcNoCache flag. +// template +// class ArcIterator< CompactFst > { +// public: +// typedef typename A::StateId StateId; + +// ArcIterator(const CompactFst &fst, StateId s) +// : fst_(fst), state_(s), pos_(0), num_arcs_(0), offset_(0), +// flags_(kArcValueFlags) { +// cache_data_.ref_count = 0; + +// if (fst_.GetImpl()->HasArcs(state_)) { +// fst_.GetImpl()->InitArcIterator(s, &cache_data_); +// num_arcs_ = cache_data_.narcs; +// return; +// } + +// const C *compactor = fst_.GetImpl()->GetCompactor(); +// const CompactFstData *data = fst_.GetImpl()->Data(); +// if (compactor->Size() == -1) { // Variable out-degree compactor +// offset_ = data->States(s); +// num_arcs_ = data->States(s + 1) - offset_; +// } else { // Fixed out-degree compactor +// offset_ = s * compactor->Size(); +// num_arcs_ = compactor->Size(); +// } +// if (num_arcs_ > 0) { +// const A &arc = fst_.GetImpl()->ComputeArc(s, offset_); +// if (arc.ilabel == kNoStateId) { +// ++offset_; +// --num_arcs_; +// } +// } +// } + + +// ~ArcIterator() { +// if (cache_data_.ref_count) +// --(*cache_data_.ref_count); +// } + +// bool Done() const { return pos_ >= num_arcs_; } + +// const A& Value() const { +// if (cache_data_.ref_count == 0) { +// if (flags_ & kArcNoCache) { +// arc_ = fst_.GetImpl()->ComputeArc(state_, pos_ + offset_); +// return arc_; +// } else { +// fst_.GetImpl()->InitArcIterator(state_, &cache_data_); +// } +// } +// return cache_data_.arcs[pos_]; +// } + +// void Next() { ++pos_; } + +// size_t Position() const { return pos_; } + +// void Reset() { pos_ = 0; } + +// void Seek(size_t pos) { pos_ = pos; } + +// uint32 Flags() const { return flags_; } + +// void SetFlags(uint32 f, uint32 m) { +// flags_ &= ~m; +// flags_ |= f; + +// if (!(flags_ & kArcNoCache) && cache_data_.ref_count == 0) +// fst_.GetImpl()->InitArcIterator(state_, &cache_data_); +// } + +// private: +// mutable const CompactFst &fst_; +// StateId state_; +// size_t pos_; +// size_t num_arcs_; +// size_t offset_; +// uint32 flags_; +// mutable A arc_; +// mutable ArcIteratorData cache_data_; + +// DISALLOW_COPY_AND_ASSIGN(ArcIterator); +// }; + + +// +// Utility Compactors +// + +// Compactor for unweighted string FSTs +template +class StringCompactor { + public: + typedef A Arc; + typedef typename A::Label Element; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + Element Compact(StateId s, const A &arc) const { return arc.ilabel; } + + Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { + return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId); + } + + ssize_t Size() const { return 1; } + + uint64 Properties() const { + return kString | kAcceptor | kUnweighted; + } + + bool Compatible(const Fst &fst) const { + uint64 props = Properties(); + return fst.Properties(props, true) == props; + } + + static const string &Type() { + static const string type = "string"; + return type; + } + + bool Write(ostream &strm) const { return true; } + + static StringCompactor *Read(istream &strm) { + return new StringCompactor; + } +}; + + +// Compactor for weighted string FSTs +template +class WeightedStringCompactor { + public: + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + typedef pair Element; + + Element Compact(StateId s, const A &arc) const { + return make_pair(arc.ilabel, arc.weight); + } + + Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { + return Arc(p.first, p.first, p.second, + p.first != kNoLabel ? s + 1 : kNoStateId); + } + + ssize_t Size() const { return 1;} + + uint64 Properties() const { + return kString | kAcceptor; + } + + bool Compatible(const Fst &fst) const { + uint64 props = Properties(); + return fst.Properties(props, true) == props; + } + + static const string &Type() { + static const string type = "weighted_string"; + return type; + } + + bool Write(ostream &strm) const { return true; } + + static WeightedStringCompactor *Read(istream &strm) { + return new WeightedStringCompactor; + } +}; + + +// Compactor for unweighted acceptor FSTs +template +class UnweightedAcceptorCompactor { + public: + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + typedef pair Element; + + Element Compact(StateId s, const A &arc) const { + return make_pair(arc.ilabel, arc.nextstate); + } + + Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { + return Arc(p.first, p.first, Weight::One(), p.second); + } + + ssize_t Size() const { return -1;} + + uint64 Properties() const { + return kAcceptor | kUnweighted; + } + + bool Compatible(const Fst &fst) const { + uint64 props = Properties(); + return fst.Properties(props, true) == props; + } + + static const string &Type() { + static const string type = "unweighted_acceptor"; + return type; + } + + bool Write(ostream &strm) const { return true; } + + static UnweightedAcceptorCompactor *Read(istream &istrm) { + return new UnweightedAcceptorCompactor; + } +}; + + +// Compactor for weighted acceptor FSTs +template +class AcceptorCompactor { + public: + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + typedef pair< pair, StateId > Element; + + Element Compact(StateId s, const A &arc) const { + return make_pair(make_pair(arc.ilabel, arc.weight), arc.nextstate); + } + + Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { + return Arc(p.first.first, p.first.first, p.first.second, p.second); + } + + ssize_t Size() const { return -1;} + + uint64 Properties() const { + return kAcceptor; + } + + bool Compatible(const Fst &fst) const { + uint64 props = Properties(); + return fst.Properties(props, true) == props; + } + + static const string &Type() { + static const string type = "acceptor"; + return type; + } + + bool Write(ostream &strm) const { return true; } + + static AcceptorCompactor *Read(istream &strm) { + return new AcceptorCompactor; + } +}; + + +// Compactor for unweighted FSTs +template +class UnweightedCompactor { + public: + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + typedef pair< pair, StateId > Element; + + Element Compact(StateId s, const A &arc) const { + return make_pair(make_pair(arc.ilabel, arc.olabel), arc.nextstate); + } + + Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { + return Arc(p.first.first, p.first.second, Weight::One(), p.second); + } + + ssize_t Size() const { return -1; } + + uint64 Properties() const { + return kUnweighted; + } + + bool Compatible(const Fst &fst) const { + uint64 props = Properties(); + return fst.Properties(props, true) == props; + } + + static const string &Type() { + static const string type = "unweighted"; + return type; + } + + bool Write(ostream &strm) const { return true; } + + static UnweightedCompactor *Read(istream &strm) { + return new UnweightedCompactor; + } +}; + + +// Uselful aliases when using StdArc +typedef CompactFst< StdArc, StringCompactor > +StdCompactStringFst; +typedef CompactFst< StdArc, WeightedStringCompactor > +StdCompactWeightedStringFst; +typedef CompactFst > +StdCompactAcceptorFst; +typedef CompactFst > +StdCompactUnweightedFst; +typedef CompactFst > +StdCompactUnweightedAcceptorFst; + +} // namespace fst + +#endif // FST_LIB_COMPACT_FST_H__ -- cgit v1.2.3