diff options
author | Ted Yin <[email protected]> | 2015-08-14 17:42:26 +0800 |
---|---|---|
committer | Ted Yin <[email protected]> | 2015-08-14 17:42:26 +0800 |
commit | c3cffb58b9921d78753336421b52b9ffdaa5515c (patch) | |
tree | bfea20e97c200cf734021e3756d749c892e658a4 /kaldi_io/src/tools/openfst/include/fst/determinize.h | |
parent | 10cce5f6a5c9e2f8e00d5a2a4d87c9cb7c26bf4c (diff) | |
parent | dfdd17afc2e984ec6c32ea01290f5c76309a456a (diff) |
Merge pull request #2 from yimmon/master
remove needless files
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/determinize.h')
-rw-r--r-- | kaldi_io/src/tools/openfst/include/fst/determinize.h | 1015 |
1 files changed, 0 insertions, 1015 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/determinize.h b/kaldi_io/src/tools/openfst/include/fst/determinize.h deleted file mode 100644 index 9ff8723..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/determinize.h +++ /dev/null @@ -1,1015 +0,0 @@ -// determinize.h - - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Functions and classes to determinize an FST. - -#ifndef FST_LIB_DETERMINIZE_H__ -#define FST_LIB_DETERMINIZE_H__ - -#include <algorithm> -#include <climits> -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <map> -#include <fst/slist.h> -#include <string> -#include <vector> -using std::vector; - -#include <fst/arc-map.h> -#include <fst/cache.h> -#include <fst/bi-table.h> -#include <fst/factor-weight.h> -#include <fst/prune.h> -#include <fst/test-properties.h> - - -namespace fst { - -// -// COMMON DIVISORS - these are used in determinization to compute -// the transition weights. In the simplest case, it is just the same -// as the semiring Plus(). However, other choices permit more efficient -// determinization when the output contains strings. -// - -// The default common divisor uses the semiring Plus. -template <class W> -class DefaultCommonDivisor { - public: - typedef W Weight; - - W operator()(const W &w1, const W &w2) const { return Plus(w1, w2); } -}; - - -// The label common divisor for a (left) string semiring selects a -// single letter common prefix or the empty string. This is used in -// the determinization of output strings so that at most a single -// letter will appear in the output of a transtion. -template <typename L, StringType S> -class LabelCommonDivisor { - public: - typedef StringWeight<L, S> Weight; - - Weight operator()(const Weight &w1, const Weight &w2) const { - StringWeightIterator<L, S> iter1(w1); - StringWeightIterator<L, S> iter2(w2); - - if (!(StringWeight<L, S>::Properties() & kLeftSemiring)) { - FSTERROR() << "LabelCommonDivisor: Weight needs to be left semiring"; - return Weight::NoWeight(); - } else if (w1.Size() == 0 || w2.Size() == 0) { - return Weight::One(); - } else if (w1 == Weight::Zero()) { - return Weight(iter2.Value()); - } else if (w2 == Weight::Zero()) { - return Weight(iter1.Value()); - } else if (iter1.Value() == iter2.Value()) { - return Weight(iter1.Value()); - } else { - return Weight::One(); - } - } -}; - - -// The gallic common divisor uses the label common divisor on the -// string component and the template argument D common divisor on the -// weight component, which defaults to the default common divisor. -template <class L, class W, StringType S, class D = DefaultCommonDivisor<W> > -class GallicCommonDivisor { - public: - typedef GallicWeight<L, W, S> Weight; - - Weight operator()(const Weight &w1, const Weight &w2) const { - return Weight(label_common_divisor_(w1.Value1(), w2.Value1()), - weight_common_divisor_(w1.Value2(), w2.Value2())); - } - - private: - LabelCommonDivisor<L, S> label_common_divisor_; - D weight_common_divisor_; -}; - - -// Represents an element in a subset -template <class A> -struct DeterminizeElement { - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - DeterminizeElement() {} - - DeterminizeElement(StateId s, Weight w) : state_id(s), weight(w) {} - - bool operator==(const DeterminizeElement<A> & element) const { - return state_id == element.state_id && weight == element.weight; - } - - bool operator<(const DeterminizeElement<A> & element) const { - return state_id < element.state_id || - (state_id == element.state_id && weight == element.weight); - } - - StateId state_id; // Input state Id - Weight weight; // Residual weight -}; - - -// -// DETERMINIZE FILTERS - these can be used in determinization to compute -// transformations on the subsets prior to their being added as destination -// states. The filter operates on a map between a label and the -// corresponding destination subsets. The possibly modified map is -// then used to construct the destination states for arcs exiting state 's'. -// It must define the ordered map type LabelMap and have a default -// and copy constructor. - -// A determinize filter that does not modify its input. -template <class Arc> -struct IdentityDeterminizeFilter { - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef slist< DeterminizeElement<Arc> > Subset; - typedef map<Label, Subset*> LabelMap; - - static uint64 Properties(uint64 props) { return props; } - - void operator()(StateId s, LabelMap *label_map) {} -}; - - -// -// DETERMINIZATION STATE TABLES -// -// The determiziation state table has the form: -// -// template <class Arc> -// class DeterminizeStateTable { -// public: -// typedef typename Arc::StateId StateId; -// typedef DeterminizeElement<Arc> Element; -// typedef slist<Element> Subset; -// -// // Required constuctor -// DeterminizeStateTable(); -// -// // Required copy constructor that does not copy state -// DeterminizeStateTable(const DeterminizeStateTable<A,P> &table); -// -// // Lookup state ID by subset (not depending of the element order). -// // If it doesn't exist, then add it. FindState takes -// // ownership of the subset argument (so that it doesn't have to -// // copy it if it creates a new state). -// StateId FindState(Subset *subset); -// -// // Lookup subset by ID. -// const Subset *FindSubset(StateId id) const; -// }; -// - -// The default determinization state table based on the -// compact hash bi-table. -template <class Arc> -class DefaultDeterminizeStateTable { - public: - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - typedef DeterminizeElement<Arc> Element; - typedef slist<Element> Subset; - - explicit DefaultDeterminizeStateTable(size_t table_size = 0) - : table_size_(table_size), - subsets_(table_size_, new SubsetKey(), new SubsetEqual(&elements_)) { } - - DefaultDeterminizeStateTable(const DefaultDeterminizeStateTable<Arc> &table) - : table_size_(table.table_size_), - subsets_(table_size_, new SubsetKey(), new SubsetEqual(&elements_)) { } - - ~DefaultDeterminizeStateTable() { - for (StateId s = 0; s < subsets_.Size(); ++s) - delete subsets_.FindEntry(s); - } - - // Finds the state corresponding to a subset. Only creates a new - // state if the subset is not found. FindState takes ownership of - // the subset argument (so that it doesn't have to copy it if it - // creates a new state). - StateId FindState(Subset *subset) { - StateId ns = subsets_.Size(); - StateId s = subsets_.FindId(subset); - if (s != ns) delete subset; // subset found - return s; - } - - const Subset* FindSubset(StateId s) { return subsets_.FindEntry(s); } - - private: - // Comparison object for hashing Subset(s). Subsets are not sorted in this - // implementation, so ordering must not be assumed in the equivalence - // test. - class SubsetEqual { - public: - SubsetEqual() { // needed for compilation but should never be called - FSTERROR() << "SubsetEqual: default constructor not implemented"; - } - - // Constructor takes vector needed to check equality. See immediately - // below for constraints on it. - explicit SubsetEqual(vector<Element *> *elements) - : elements_(elements) {} - - // At each call to operator(), the elements_ vector should contain - // only NULLs. When this operator returns, elements_ will still - // have this property. - bool operator()(Subset* subset1, Subset* subset2) const { - if (!subset1 && !subset2) - return true; - if ((subset1 && !subset2) || (!subset1 && subset2)) - return false; - - if (subset1->size() != subset2->size()) - return false; - - // Loads first subset elements in element vector. - for (typename Subset::iterator iter1 = subset1->begin(); - iter1 != subset1->end(); - ++iter1) { - Element &element1 = *iter1; - while (elements_->size() <= element1.state_id) - elements_->push_back(0); - (*elements_)[element1.state_id] = &element1; - } - - // Checks second subset matches first via element vector. - for (typename Subset::iterator iter2 = subset2->begin(); - iter2 != subset2->end(); - ++iter2) { - Element &element2 = *iter2; - while (elements_->size() <= element2.state_id) - elements_->push_back(0); - Element *element1 = (*elements_)[element2.state_id]; - if (!element1 || element1->weight != element2.weight) { - // Mismatch found. Resets element vector before returning false. - for (typename Subset::iterator iter1 = subset1->begin(); - iter1 != subset1->end(); - ++iter1) - (*elements_)[iter1->state_id] = 0; - return false; - } else { - (*elements_)[element2.state_id] = 0; // Clears entry - } - } - return true; - } - private: - vector<Element *> *elements_; - }; - - // Hash function for Subset to Fst states. Subset elements are not - // sorted in this implementation, so the hash must be invariant - // under subset reordering. - class SubsetKey { - public: - size_t operator()(const Subset* subset) const { - size_t hash = 0; - if (subset) { - for (typename Subset::const_iterator iter = subset->begin(); - iter != subset->end(); - ++iter) { - const Element &element = *iter; - int lshift = element.state_id % (CHAR_BIT * sizeof(size_t) - 1) + 1; - int rshift = CHAR_BIT * sizeof(size_t) - lshift; - size_t n = element.state_id; - hash ^= n << lshift ^ n >> rshift ^ element.weight.Hash(); - } - } - return hash; - } - }; - - size_t table_size_; - - typedef CompactHashBiTable<StateId, Subset *, - SubsetKey, SubsetEqual, HS_STL> SubsetTable; - - SubsetTable subsets_; - vector<Element *> elements_; - - void operator=(const DefaultDeterminizeStateTable<Arc> &); // disallow -}; - -// Options for finite-state transducer determinization templated on -// the arc type, common divisor, the determinization filter and the -// state table. DeterminizeFst takes ownership of the determinization -// filter and state table if provided. -template <class Arc, - class D = DefaultCommonDivisor<typename Arc::Weight>, - class F = IdentityDeterminizeFilter<Arc>, - class T = DefaultDeterminizeStateTable<Arc> > -struct DeterminizeFstOptions : CacheOptions { - typedef typename Arc::Label Label; - float delta; // Quantization delta for subset weights - Label subsequential_label; // Label used for residual final output - // when producing subsequential transducers. - F *filter; // Determinization filter - T *state_table; // Determinization state table - - explicit DeterminizeFstOptions(const CacheOptions &opts, - float del = kDelta, Label lab = 0, - F *filt = 0, - T *table = 0) - : CacheOptions(opts), delta(del), subsequential_label(lab), - filter(filt), state_table(table) {} - - explicit DeterminizeFstOptions(float del = kDelta, Label lab = 0, - F *filt = 0, T *table = 0) - : delta(del), subsequential_label(lab), filter(filt), - state_table(table) {} -}; - -// Implementation of delayed DeterminizeFst. This base class is -// common to the variants that implement acceptor and transducer -// determinization. -template <class A> -class DeterminizeFstImplBase : public CacheImpl<A> { - public: - using FstImpl<A>::SetType; - using FstImpl<A>::SetProperties; - using FstImpl<A>::Properties; - using FstImpl<A>::SetInputSymbols; - using FstImpl<A>::SetOutputSymbols; - - using CacheBaseImpl< CacheState<A> >::HasStart; - using CacheBaseImpl< CacheState<A> >::HasFinal; - using CacheBaseImpl< CacheState<A> >::HasArcs; - using CacheBaseImpl< CacheState<A> >::SetFinal; - using CacheBaseImpl< CacheState<A> >::SetStart; - - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef CacheState<A> State; - - template <class D, class F, class T> - DeterminizeFstImplBase(const Fst<A> &fst, - const DeterminizeFstOptions<A, D, F, T> &opts) - : CacheImpl<A>(opts), fst_(fst.Copy()) { - SetType("determinize"); - uint64 iprops = fst.Properties(kFstProperties, false); - uint64 dprops = DeterminizeProperties(iprops, - opts.subsequential_label != 0); - SetProperties(F::Properties(dprops), kCopyProperties); - SetInputSymbols(fst.InputSymbols()); - SetOutputSymbols(fst.OutputSymbols()); - } - - DeterminizeFstImplBase(const DeterminizeFstImplBase<A> &impl) - : CacheImpl<A>(impl), - fst_(impl.fst_->Copy(true)) { - SetType("determinize"); - SetProperties(impl.Properties(), kCopyProperties); - SetInputSymbols(impl.InputSymbols()); - SetOutputSymbols(impl.OutputSymbols()); - } - - virtual ~DeterminizeFstImplBase() { delete fst_; } - - virtual DeterminizeFstImplBase<A> *Copy() = 0; - - StateId Start() { - if (!HasStart()) { - StateId start = ComputeStart(); - if (start != kNoStateId) { - SetStart(start); - } - } - return CacheImpl<A>::Start(); - } - - Weight Final(StateId s) { - if (!HasFinal(s)) { - Weight final = ComputeFinal(s); - SetFinal(s, final); - } - return CacheImpl<A>::Final(s); - } - - virtual void Expand(StateId s) = 0; - - size_t NumArcs(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumArcs(s); - } - - size_t NumInputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumInputEpsilons(s); - } - - size_t NumOutputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumOutputEpsilons(s); - } - - void InitArcIterator(StateId s, ArcIteratorData<A> *data) { - if (!HasArcs(s)) - Expand(s); - CacheImpl<A>::InitArcIterator(s, data); - } - - virtual StateId ComputeStart() = 0; - - virtual Weight ComputeFinal(StateId s) = 0; - - const Fst<A> &GetFst() const { return *fst_; } - - private: - const Fst<A> *fst_; // Input Fst - - void operator=(const DeterminizeFstImplBase<A> &); // disallow -}; - - -// Implementation of delayed determinization for weighted acceptors. -// It is templated on the arc type A and the common divisor D. -template <class A, class D, class F, class T> -class DeterminizeFsaImpl : public DeterminizeFstImplBase<A> { - public: - using FstImpl<A>::SetProperties; - using DeterminizeFstImplBase<A>::GetFst; - using DeterminizeFstImplBase<A>::SetArcs; - - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef DeterminizeElement<A> Element; - typedef slist<Element> Subset; - typedef typename F::LabelMap LabelMap; - - DeterminizeFsaImpl(const Fst<A> &fst, - const vector<Weight> *in_dist, vector<Weight> *out_dist, - const DeterminizeFstOptions<A, D, F, T> &opts) - : DeterminizeFstImplBase<A>(fst, opts), - delta_(opts.delta), - in_dist_(in_dist), - out_dist_(out_dist), - filter_(opts.filter ? opts.filter : new F()), - state_table_(opts.state_table ? opts.state_table : new T()) { - if (!fst.Properties(kAcceptor, true)) { - FSTERROR() << "DeterminizeFst: argument not an acceptor"; - SetProperties(kError, kError); - } - if (!(Weight::Properties() & kLeftSemiring)) { - FSTERROR() << "DeterminizeFst: Weight needs to be left distributive: " - << Weight::Type(); - SetProperties(kError, kError); - } - if (out_dist_) - out_dist_->clear(); - } - - DeterminizeFsaImpl(const DeterminizeFsaImpl<A, D, F, T> &impl) - : DeterminizeFstImplBase<A>(impl), - delta_(impl.delta_), - in_dist_(0), - out_dist_(0), - filter_(new F(*impl.filter_)), - state_table_(new T(*impl.state_table_)) { - if (impl.out_dist_) { - FSTERROR() << "DeterminizeFsaImpl: cannot copy with out_dist vector"; - SetProperties(kError, kError); - } - } - - virtual ~DeterminizeFsaImpl() { - delete filter_; - delete state_table_; - } - - virtual DeterminizeFsaImpl<A, D, F, T> *Copy() { - return new DeterminizeFsaImpl<A, D, F, T>(*this); - } - - uint64 Properties() const { return Properties(kFstProperties); } - - // Set error if found; return FST impl properties. - uint64 Properties(uint64 mask) const { - if ((mask & kError) && (GetFst().Properties(kError, false))) - SetProperties(kError, kError); - return FstImpl<A>::Properties(mask); - } - - virtual StateId ComputeStart() { - StateId s = GetFst().Start(); - if (s == kNoStateId) - return kNoStateId; - Element element(s, Weight::One()); - Subset *subset = new Subset; - subset->push_front(element); - return FindState(subset); - } - - virtual Weight ComputeFinal(StateId s) { - const Subset *subset = state_table_->FindSubset(s); - Weight final = Weight::Zero(); - for (typename Subset::const_iterator siter = subset->begin(); - siter != subset->end(); - ++siter) { - const Element &element = *siter; - final = Plus(final, Times(element.weight, - GetFst().Final(element.state_id))); - if (!final.Member()) - SetProperties(kError, kError); - } - return final; - } - - StateId FindState(Subset *subset) { - StateId s = state_table_->FindState(subset); - if (in_dist_ && out_dist_->size() <= s) - out_dist_->push_back(ComputeDistance(subset)); - return s; - } - - // Compute distance from a state to the final states in the DFA - // given the distances in the NFA. - Weight ComputeDistance(const Subset *subset) { - Weight outd = Weight::Zero(); - for (typename Subset::const_iterator siter = subset->begin(); - siter != subset->end(); ++siter) { - const Element &element = *siter; - Weight ind = element.state_id < in_dist_->size() ? - (*in_dist_)[element.state_id] : Weight::Zero(); - outd = Plus(outd, Times(element.weight, ind)); - } - return outd; - } - - // Computes the outgoing transitions from a state, creating new destination - // states as needed. - virtual void Expand(StateId s) { - - LabelMap label_map; - LabelSubsets(s, &label_map); - - for (typename LabelMap::iterator liter = label_map.begin(); - liter != label_map.end(); - ++liter) - AddArc(s, liter->first, liter->second); - SetArcs(s); - } - - private: - // Constructs destination subsets per label. At return, subset - // element weights include the input automaton label weights and the - // subsets may contain duplicate states. - void LabelSubsets(StateId s, LabelMap *label_map) { - const Subset *src_subset = state_table_->FindSubset(s); - - for (typename Subset::const_iterator siter = src_subset->begin(); - siter != src_subset->end(); - ++siter) { - const Element &src_element = *siter; - for (ArcIterator< Fst<A> > aiter(GetFst(), src_element.state_id); - !aiter.Done(); - aiter.Next()) { - const A &arc = aiter.Value(); - Element dest_element(arc.nextstate, - Times(src_element.weight, arc.weight)); - - // The LabelMap may be a e.g. multimap with more complex - // determinization filters, so we insert efficiently w/o using []. - typename LabelMap::iterator liter = label_map->lower_bound(arc.ilabel); - Subset* dest_subset; - if (liter == label_map->end() || liter->first != arc.ilabel) { - dest_subset = new Subset; - label_map->insert(liter, make_pair(arc.ilabel, dest_subset)); - } else { - dest_subset = liter->second; - } - - dest_subset->push_front(dest_element); - } - } - // Applies the determinization filter - (*filter_)(s, label_map); - } - - // Adds an arc from state S to the destination state associated - // with subset DEST_SUBSET (as created by LabelSubsets). - void AddArc(StateId s, Label label, Subset *dest_subset) { - A arc; - arc.ilabel = label; - arc.olabel = label; - arc.weight = Weight::Zero(); - - typename Subset::iterator oiter; - for (typename Subset::iterator diter = dest_subset->begin(); - diter != dest_subset->end();) { - Element &dest_element = *diter; - // Computes label weight. - arc.weight = common_divisor_(arc.weight, dest_element.weight); - - while (elements_.size() <= dest_element.state_id) - elements_.push_back(0); - Element *matching_element = elements_[dest_element.state_id]; - if (matching_element) { - // Found duplicate state: sums state weight and deletes dup. - matching_element->weight = Plus(matching_element->weight, - dest_element.weight); - if (!matching_element->weight.Member()) - SetProperties(kError, kError); - ++diter; - dest_subset->erase_after(oiter); - } else { - // Saves element so we can check for duplicate for this state. - elements_[dest_element.state_id] = &dest_element; - oiter = diter; - ++diter; - } - } - - // Divides out label weight from destination subset elements. - // Quantizes to ensure comparisons are effective. - // Clears element vector. - for (typename Subset::iterator diter = dest_subset->begin(); - diter != dest_subset->end(); - ++diter) { - Element &dest_element = *diter; - dest_element.weight = Divide(dest_element.weight, arc.weight, - DIVIDE_LEFT); - dest_element.weight = dest_element.weight.Quantize(delta_); - elements_[dest_element.state_id] = 0; - } - - arc.nextstate = FindState(dest_subset); - CacheImpl<A>::PushArc(s, arc); - } - - float delta_; // Quantization delta for subset weights - const vector<Weight> *in_dist_; // Distance to final NFA states - vector<Weight> *out_dist_; // Distance to final DFA states - - D common_divisor_; - F *filter_; - T *state_table_; - - vector<Element *> elements_; - - void operator=(const DeterminizeFsaImpl<A, D, F, T> &); // disallow -}; - - -// Implementation of delayed determinization for transducers. -// Transducer determinization is implemented by mapping the input to -// the Gallic semiring as an acceptor whose weights contain the output -// strings and using acceptor determinization above to determinize -// that acceptor. -template <class A, StringType S, class D, class F, class T> -class DeterminizeFstImpl : public DeterminizeFstImplBase<A> { - public: - using FstImpl<A>::SetProperties; - using DeterminizeFstImplBase<A>::GetFst; - using CacheBaseImpl< CacheState<A> >::GetCacheGc; - using CacheBaseImpl< CacheState<A> >::GetCacheLimit; - - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - typedef ToGallicMapper<A, S> ToMapper; - typedef FromGallicMapper<A, S> FromMapper; - - typedef typename ToMapper::ToArc ToArc; - typedef ArcMapFst<A, ToArc, ToMapper> ToFst; - typedef ArcMapFst<ToArc, A, FromMapper> FromFst; - - typedef GallicCommonDivisor<Label, Weight, S, D> CommonDivisor; - typedef GallicFactor<Label, Weight, S> FactorIterator; - - DeterminizeFstImpl(const Fst<A> &fst, - const DeterminizeFstOptions<A, D, F, T> &opts) - : DeterminizeFstImplBase<A>(fst, opts), - delta_(opts.delta), - subsequential_label_(opts.subsequential_label) { - Init(GetFst()); - } - - DeterminizeFstImpl(const DeterminizeFstImpl<A, S, D, F, T> &impl) - : DeterminizeFstImplBase<A>(impl), - delta_(impl.delta_), - subsequential_label_(impl.subsequential_label_) { - Init(GetFst()); - } - - ~DeterminizeFstImpl() { delete from_fst_; } - - virtual DeterminizeFstImpl<A, S, D, F, T> *Copy() { - return new DeterminizeFstImpl<A, S, D, F, T>(*this); - } - - uint64 Properties() const { return Properties(kFstProperties); } - - // Set error if found; return FST impl properties. - uint64 Properties(uint64 mask) const { - if ((mask & kError) && (GetFst().Properties(kError, false) || - from_fst_->Properties(kError, false))) - SetProperties(kError, kError); - return FstImpl<A>::Properties(mask); - } - - virtual StateId ComputeStart() { return from_fst_->Start(); } - - virtual Weight ComputeFinal(StateId s) { return from_fst_->Final(s); } - - virtual void Expand(StateId s) { - for (ArcIterator<FromFst> aiter(*from_fst_, s); - !aiter.Done(); - aiter.Next()) - CacheImpl<A>::PushArc(s, aiter.Value()); - CacheImpl<A>::SetArcs(s); - } - - private: - // Initialization of transducer determinization implementation, which - // is defined after DeterminizeFst since it calls it. - void Init(const Fst<A> &fst); - - float delta_; - Label subsequential_label_; - FromFst *from_fst_; - - void operator=(const DeterminizeFstImpl<A, S, D, F, T> &); // disallow -}; - - -// Determinizes a weighted transducer. This version is a delayed -// Fst. The result will be an equivalent FST that has the property -// that no state has two transitions with the same input label. -// For this algorithm, epsilon transitions are treated as regular -// symbols (cf. RmEpsilon). -// -// The transducer must be functional. The weights must be (weakly) -// left divisible (valid for TropicalWeight and LogWeight for instance) -// and be zero-sum-free if for all a,b: (Plus(a, b) = 0 => a = b = 0. -// -// Complexity: -// - Determinizable: exponential (polynomial in the size of the output) -// - Non-determinizable) does not terminate -// -// The determinizable automata include all unweighted and all acyclic input. -// -// References: -// - Mehryar Mohri, "Finite-State Transducers in Language and Speech -// Processing". Computational Linguistics, 23:2, 1997. -// -// This class attaches interface to implementation and handles -// reference counting, delegating most methods to ImplToFst. -template <class A> -class DeterminizeFst : public ImplToFst< DeterminizeFstImplBase<A> > { - public: - friend class ArcIterator< DeterminizeFst<A> >; - friend class StateIterator< DeterminizeFst<A> >; - template <class B, StringType S, class D, class F, class T> - friend class DeterminizeFstImpl; - - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef CacheState<A> State; - typedef DeterminizeFstImplBase<A> Impl; - - using ImplToFst<Impl>::SetImpl; - - explicit DeterminizeFst(const Fst<A> &fst) { - typedef DefaultCommonDivisor<Weight> D; - typedef IdentityDeterminizeFilter<A> F; - typedef DefaultDeterminizeStateTable<A> T; - DeterminizeFstOptions<A, D, F, T> opts; - if (fst.Properties(kAcceptor, true)) { - // Calls implementation for acceptors. - SetImpl(new DeterminizeFsaImpl<A, D, F, T>(fst, 0, 0, opts)); - } else { - // Calls implementation for transducers. - SetImpl(new - DeterminizeFstImpl<A, STRING_LEFT_RESTRICT, D, F, T>(fst, opts)); - } - } - - template <class D, class F, class T> - DeterminizeFst(const Fst<A> &fst, - const DeterminizeFstOptions<A, D, F, T> &opts) { - if (fst.Properties(kAcceptor, true)) { - // Calls implementation for acceptors. - SetImpl(new DeterminizeFsaImpl<A, D, F, T>(fst, 0, 0, opts)); - } else { - // Calls implementation for transducers. - SetImpl(new - DeterminizeFstImpl<A, STRING_LEFT_RESTRICT, D, F, T>(fst, opts)); - } - } - - // This acceptor-only version additionally computes the distance to - // final states in the output if provided with those distances for the - // input. Useful for e.g. unique N-shortest paths. - template <class D, class F, class T> - DeterminizeFst(const Fst<A> &fst, - const vector<Weight> *in_dist, vector<Weight> *out_dist, - const DeterminizeFstOptions<A, D, F, T> &opts) { - if (!fst.Properties(kAcceptor, true)) { - FSTERROR() << "DeterminizeFst:" - << " distance to final states computed for acceptors only"; - GetImpl()->SetProperties(kError, kError); - } - SetImpl(new DeterminizeFsaImpl<A, D, F, T>(fst, in_dist, out_dist, opts)); - } - - // See Fst<>::Copy() for doc. - DeterminizeFst(const DeterminizeFst<A> &fst, bool safe = false) { - if (safe) - SetImpl(fst.GetImpl()->Copy()); - else - SetImpl(fst.GetImpl(), false); - } - - // Get a copy of this DeterminizeFst. See Fst<>::Copy() for further doc. - virtual DeterminizeFst<A> *Copy(bool safe = false) const { - return new DeterminizeFst<A>(*this, safe); - } - - virtual inline void InitStateIterator(StateIteratorData<A> *data) const; - - virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { - GetImpl()->InitArcIterator(s, data); - } - - private: - // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } - - void operator=(const DeterminizeFst<A> &fst); // Disallow -}; - - -// Initialization of transducer determinization implementation. which -// is defined after DeterminizeFst since it calls it. -template <class A, StringType S, class D, class F, class T> -void DeterminizeFstImpl<A, S, D, F, T>::Init(const Fst<A> &fst) { - // Mapper to an acceptor. - ToFst to_fst(fst, ToMapper()); - - // Determinizes acceptor. - // This recursive call terminates since it passes the common divisor - // to a private constructor. - CacheOptions copts(GetCacheGc(), GetCacheLimit()); - DeterminizeFstOptions<ToArc, CommonDivisor> dopts(copts, delta_); - // Uses acceptor-only constructor to avoid template recursion - DeterminizeFst<ToArc> det_fsa(to_fst, 0, 0, dopts); - - // Mapper back to transducer. - FactorWeightOptions<ToArc> fopts(CacheOptions(true, 0), delta_, - kFactorFinalWeights, - subsequential_label_, - subsequential_label_); - FactorWeightFst<ToArc, FactorIterator> factored_fst(det_fsa, fopts); - from_fst_ = new FromFst(factored_fst, FromMapper(subsequential_label_)); -} - - -// Specialization for DeterminizeFst. -template <class A> -class StateIterator< DeterminizeFst<A> > - : public CacheStateIterator< DeterminizeFst<A> > { - public: - explicit StateIterator(const DeterminizeFst<A> &fst) - : CacheStateIterator< DeterminizeFst<A> >(fst, fst.GetImpl()) {} -}; - - -// Specialization for DeterminizeFst. -template <class A> -class ArcIterator< DeterminizeFst<A> > - : public CacheArcIterator< DeterminizeFst<A> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const DeterminizeFst<A> &fst, StateId s) - : CacheArcIterator< DeterminizeFst<A> >(fst.GetImpl(), s) { - if (!fst.GetImpl()->HasArcs(s)) - fst.GetImpl()->Expand(s); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - - -template <class A> inline -void DeterminizeFst<A>::InitStateIterator(StateIteratorData<A> *data) const -{ - data->base = new StateIterator< DeterminizeFst<A> >(*this); -} - - -// Useful aliases when using StdArc. -typedef DeterminizeFst<StdArc> StdDeterminizeFst; - - -template <class Arc> -struct DeterminizeOptions { - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - typedef typename Arc::Label Label; - - float delta; // Quantization delta for subset weights. - Weight weight_threshold; // Pruning weight threshold. - StateId state_threshold; // Pruning state threshold. - Label subsequential_label; // Label used for residual final output - // when producing subsequential transducers. - - explicit DeterminizeOptions(float d = kDelta, Weight w = Weight::Zero(), - StateId n = kNoStateId, Label l = 0) - : delta(d), weight_threshold(w), state_threshold(n), - subsequential_label(l) {} -}; - - -// Determinizes a weighted transducer. This version writes the -// determinized Fst to an output MutableFst. The result will be an -// equivalent FST that has the property that no state has two -// transitions with the same input label. For this algorithm, epsilon -// transitions are treated as regular symbols (cf. RmEpsilon). -// -// The transducer must be functional. The weights must be (weakly) -// left divisible (valid for TropicalWeight and LogWeight). -// -// Complexity: -// - Determinizable: exponential (polynomial in the size of the output) -// - Non-determinizable: does not terminate -// -// The determinizable automata include all unweighted and all acyclic input. -// -// References: -// - Mehryar Mohri, "Finite-State Transducers in Language and Speech -// Processing". Computational Linguistics, 23:2, 1997. -template <class Arc> -void Determinize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst, - const DeterminizeOptions<Arc> &opts - = DeterminizeOptions<Arc>()) { - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - - DeterminizeFstOptions<Arc> nopts; - nopts.delta = opts.delta; - nopts.subsequential_label = opts.subsequential_label; - - nopts.gc_limit = 0; // Cache only the last state for fastest copy. - - if (opts.weight_threshold != Weight::Zero() || - opts.state_threshold != kNoStateId) { - if (ifst.Properties(kAcceptor, false)) { - vector<Weight> idistance, odistance; - ShortestDistance(ifst, &idistance, true); - DeterminizeFst<Arc> dfst(ifst, &idistance, &odistance, nopts); - PruneOptions< Arc, AnyArcFilter<Arc> > popts(opts.weight_threshold, - opts.state_threshold, - AnyArcFilter<Arc>(), - &odistance); - Prune(dfst, ofst, popts); - } else { - *ofst = DeterminizeFst<Arc>(ifst, nopts); - Prune(ofst, opts.weight_threshold, opts.state_threshold); - } - } else { - *ofst = DeterminizeFst<Arc>(ifst, nopts); - } -} - - -} // namespace fst - -#endif // FST_LIB_DETERMINIZE_H__ |