summaryrefslogtreecommitdiff
path: root/kaldi_io/src/tools/openfst/include/fst/determinize.h
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/determinize.h')
-rw-r--r--kaldi_io/src/tools/openfst/include/fst/determinize.h1015
1 files changed, 1015 insertions, 0 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/determinize.h b/kaldi_io/src/tools/openfst/include/fst/determinize.h
new file mode 100644
index 0000000..9ff8723
--- /dev/null
+++ b/kaldi_io/src/tools/openfst/include/fst/determinize.h
@@ -0,0 +1,1015 @@
+// determinize.h
+
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Functions and classes to determinize an FST.
+
+#ifndef FST_LIB_DETERMINIZE_H__
+#define FST_LIB_DETERMINIZE_H__
+
+#include <algorithm>
+#include <climits>
+#include <tr1/unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <map>
+#include <fst/slist.h>
+#include <string>
+#include <vector>
+using std::vector;
+
+#include <fst/arc-map.h>
+#include <fst/cache.h>
+#include <fst/bi-table.h>
+#include <fst/factor-weight.h>
+#include <fst/prune.h>
+#include <fst/test-properties.h>
+
+
+namespace fst {
+
+//
+// COMMON DIVISORS - these are used in determinization to compute
+// the transition weights. In the simplest case, it is just the same
+// as the semiring Plus(). However, other choices permit more efficient
+// determinization when the output contains strings.
+//
+
+// The default common divisor uses the semiring Plus.
+template <class W>
+class DefaultCommonDivisor {
+ public:
+ typedef W Weight;
+
+ W operator()(const W &w1, const W &w2) const { return Plus(w1, w2); }
+};
+
+
+// The label common divisor for a (left) string semiring selects a
+// single letter common prefix or the empty string. This is used in
+// the determinization of output strings so that at most a single
+// letter will appear in the output of a transtion.
+template <typename L, StringType S>
+class LabelCommonDivisor {
+ public:
+ typedef StringWeight<L, S> Weight;
+
+ Weight operator()(const Weight &w1, const Weight &w2) const {
+ StringWeightIterator<L, S> iter1(w1);
+ StringWeightIterator<L, S> iter2(w2);
+
+ if (!(StringWeight<L, S>::Properties() & kLeftSemiring)) {
+ FSTERROR() << "LabelCommonDivisor: Weight needs to be left semiring";
+ return Weight::NoWeight();
+ } else if (w1.Size() == 0 || w2.Size() == 0) {
+ return Weight::One();
+ } else if (w1 == Weight::Zero()) {
+ return Weight(iter2.Value());
+ } else if (w2 == Weight::Zero()) {
+ return Weight(iter1.Value());
+ } else if (iter1.Value() == iter2.Value()) {
+ return Weight(iter1.Value());
+ } else {
+ return Weight::One();
+ }
+ }
+};
+
+
+// The gallic common divisor uses the label common divisor on the
+// string component and the template argument D common divisor on the
+// weight component, which defaults to the default common divisor.
+template <class L, class W, StringType S, class D = DefaultCommonDivisor<W> >
+class GallicCommonDivisor {
+ public:
+ typedef GallicWeight<L, W, S> Weight;
+
+ Weight operator()(const Weight &w1, const Weight &w2) const {
+ return Weight(label_common_divisor_(w1.Value1(), w2.Value1()),
+ weight_common_divisor_(w1.Value2(), w2.Value2()));
+ }
+
+ private:
+ LabelCommonDivisor<L, S> label_common_divisor_;
+ D weight_common_divisor_;
+};
+
+
+// Represents an element in a subset
+template <class A>
+struct DeterminizeElement {
+ typedef typename A::StateId StateId;
+ typedef typename A::Weight Weight;
+
+ DeterminizeElement() {}
+
+ DeterminizeElement(StateId s, Weight w) : state_id(s), weight(w) {}
+
+ bool operator==(const DeterminizeElement<A> & element) const {
+ return state_id == element.state_id && weight == element.weight;
+ }
+
+ bool operator<(const DeterminizeElement<A> & element) const {
+ return state_id < element.state_id ||
+ (state_id == element.state_id && weight == element.weight);
+ }
+
+ StateId state_id; // Input state Id
+ Weight weight; // Residual weight
+};
+
+
+//
+// DETERMINIZE FILTERS - these can be used in determinization to compute
+// transformations on the subsets prior to their being added as destination
+// states. The filter operates on a map between a label and the
+// corresponding destination subsets. The possibly modified map is
+// then used to construct the destination states for arcs exiting state 's'.
+// It must define the ordered map type LabelMap and have a default
+// and copy constructor.
+
+// A determinize filter that does not modify its input.
+template <class Arc>
+struct IdentityDeterminizeFilter {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef slist< DeterminizeElement<Arc> > Subset;
+ typedef map<Label, Subset*> LabelMap;
+
+ static uint64 Properties(uint64 props) { return props; }
+
+ void operator()(StateId s, LabelMap *label_map) {}
+};
+
+
+//
+// DETERMINIZATION STATE TABLES
+//
+// The determiziation state table has the form:
+//
+// template <class Arc>
+// class DeterminizeStateTable {
+// public:
+// typedef typename Arc::StateId StateId;
+// typedef DeterminizeElement<Arc> Element;
+// typedef slist<Element> Subset;
+//
+// // Required constuctor
+// DeterminizeStateTable();
+//
+// // Required copy constructor that does not copy state
+// DeterminizeStateTable(const DeterminizeStateTable<A,P> &table);
+//
+// // Lookup state ID by subset (not depending of the element order).
+// // If it doesn't exist, then add it. FindState takes
+// // ownership of the subset argument (so that it doesn't have to
+// // copy it if it creates a new state).
+// StateId FindState(Subset *subset);
+//
+// // Lookup subset by ID.
+// const Subset *FindSubset(StateId id) const;
+// };
+//
+
+// The default determinization state table based on the
+// compact hash bi-table.
+template <class Arc>
+class DefaultDeterminizeStateTable {
+ public:
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Label Label;
+ typedef typename Arc::Weight Weight;
+ typedef DeterminizeElement<Arc> Element;
+ typedef slist<Element> Subset;
+
+ explicit DefaultDeterminizeStateTable(size_t table_size = 0)
+ : table_size_(table_size),
+ subsets_(table_size_, new SubsetKey(), new SubsetEqual(&elements_)) { }
+
+ DefaultDeterminizeStateTable(const DefaultDeterminizeStateTable<Arc> &table)
+ : table_size_(table.table_size_),
+ subsets_(table_size_, new SubsetKey(), new SubsetEqual(&elements_)) { }
+
+ ~DefaultDeterminizeStateTable() {
+ for (StateId s = 0; s < subsets_.Size(); ++s)
+ delete subsets_.FindEntry(s);
+ }
+
+ // Finds the state corresponding to a subset. Only creates a new
+ // state if the subset is not found. FindState takes ownership of
+ // the subset argument (so that it doesn't have to copy it if it
+ // creates a new state).
+ StateId FindState(Subset *subset) {
+ StateId ns = subsets_.Size();
+ StateId s = subsets_.FindId(subset);
+ if (s != ns) delete subset; // subset found
+ return s;
+ }
+
+ const Subset* FindSubset(StateId s) { return subsets_.FindEntry(s); }
+
+ private:
+ // Comparison object for hashing Subset(s). Subsets are not sorted in this
+ // implementation, so ordering must not be assumed in the equivalence
+ // test.
+ class SubsetEqual {
+ public:
+ SubsetEqual() { // needed for compilation but should never be called
+ FSTERROR() << "SubsetEqual: default constructor not implemented";
+ }
+
+ // Constructor takes vector needed to check equality. See immediately
+ // below for constraints on it.
+ explicit SubsetEqual(vector<Element *> *elements)
+ : elements_(elements) {}
+
+ // At each call to operator(), the elements_ vector should contain
+ // only NULLs. When this operator returns, elements_ will still
+ // have this property.
+ bool operator()(Subset* subset1, Subset* subset2) const {
+ if (!subset1 && !subset2)
+ return true;
+ if ((subset1 && !subset2) || (!subset1 && subset2))
+ return false;
+
+ if (subset1->size() != subset2->size())
+ return false;
+
+ // Loads first subset elements in element vector.
+ for (typename Subset::iterator iter1 = subset1->begin();
+ iter1 != subset1->end();
+ ++iter1) {
+ Element &element1 = *iter1;
+ while (elements_->size() <= element1.state_id)
+ elements_->push_back(0);
+ (*elements_)[element1.state_id] = &element1;
+ }
+
+ // Checks second subset matches first via element vector.
+ for (typename Subset::iterator iter2 = subset2->begin();
+ iter2 != subset2->end();
+ ++iter2) {
+ Element &element2 = *iter2;
+ while (elements_->size() <= element2.state_id)
+ elements_->push_back(0);
+ Element *element1 = (*elements_)[element2.state_id];
+ if (!element1 || element1->weight != element2.weight) {
+ // Mismatch found. Resets element vector before returning false.
+ for (typename Subset::iterator iter1 = subset1->begin();
+ iter1 != subset1->end();
+ ++iter1)
+ (*elements_)[iter1->state_id] = 0;
+ return false;
+ } else {
+ (*elements_)[element2.state_id] = 0; // Clears entry
+ }
+ }
+ return true;
+ }
+ private:
+ vector<Element *> *elements_;
+ };
+
+ // Hash function for Subset to Fst states. Subset elements are not
+ // sorted in this implementation, so the hash must be invariant
+ // under subset reordering.
+ class SubsetKey {
+ public:
+ size_t operator()(const Subset* subset) const {
+ size_t hash = 0;
+ if (subset) {
+ for (typename Subset::const_iterator iter = subset->begin();
+ iter != subset->end();
+ ++iter) {
+ const Element &element = *iter;
+ int lshift = element.state_id % (CHAR_BIT * sizeof(size_t) - 1) + 1;
+ int rshift = CHAR_BIT * sizeof(size_t) - lshift;
+ size_t n = element.state_id;
+ hash ^= n << lshift ^ n >> rshift ^ element.weight.Hash();
+ }
+ }
+ return hash;
+ }
+ };
+
+ size_t table_size_;
+
+ typedef CompactHashBiTable<StateId, Subset *,
+ SubsetKey, SubsetEqual, HS_STL> SubsetTable;
+
+ SubsetTable subsets_;
+ vector<Element *> elements_;
+
+ void operator=(const DefaultDeterminizeStateTable<Arc> &); // disallow
+};
+
+// Options for finite-state transducer determinization templated on
+// the arc type, common divisor, the determinization filter and the
+// state table. DeterminizeFst takes ownership of the determinization
+// filter and state table if provided.
+template <class Arc,
+ class D = DefaultCommonDivisor<typename Arc::Weight>,
+ class F = IdentityDeterminizeFilter<Arc>,
+ class T = DefaultDeterminizeStateTable<Arc> >
+struct DeterminizeFstOptions : CacheOptions {
+ typedef typename Arc::Label Label;
+ float delta; // Quantization delta for subset weights
+ Label subsequential_label; // Label used for residual final output
+ // when producing subsequential transducers.
+ F *filter; // Determinization filter
+ T *state_table; // Determinization state table
+
+ explicit DeterminizeFstOptions(const CacheOptions &opts,
+ float del = kDelta, Label lab = 0,
+ F *filt = 0,
+ T *table = 0)
+ : CacheOptions(opts), delta(del), subsequential_label(lab),
+ filter(filt), state_table(table) {}
+
+ explicit DeterminizeFstOptions(float del = kDelta, Label lab = 0,
+ F *filt = 0, T *table = 0)
+ : delta(del), subsequential_label(lab), filter(filt),
+ state_table(table) {}
+};
+
+// Implementation of delayed DeterminizeFst. This base class is
+// common to the variants that implement acceptor and transducer
+// determinization.
+template <class A>
+class DeterminizeFstImplBase : public CacheImpl<A> {
+ public:
+ using FstImpl<A>::SetType;
+ using FstImpl<A>::SetProperties;
+ using FstImpl<A>::Properties;
+ using FstImpl<A>::SetInputSymbols;
+ using FstImpl<A>::SetOutputSymbols;
+
+ using CacheBaseImpl< CacheState<A> >::HasStart;
+ using CacheBaseImpl< CacheState<A> >::HasFinal;
+ using CacheBaseImpl< CacheState<A> >::HasArcs;
+ using CacheBaseImpl< CacheState<A> >::SetFinal;
+ using CacheBaseImpl< CacheState<A> >::SetStart;
+
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef CacheState<A> State;
+
+ template <class D, class F, class T>
+ DeterminizeFstImplBase(const Fst<A> &fst,
+ const DeterminizeFstOptions<A, D, F, T> &opts)
+ : CacheImpl<A>(opts), fst_(fst.Copy()) {
+ SetType("determinize");
+ uint64 iprops = fst.Properties(kFstProperties, false);
+ uint64 dprops = DeterminizeProperties(iprops,
+ opts.subsequential_label != 0);
+ SetProperties(F::Properties(dprops), kCopyProperties);
+ SetInputSymbols(fst.InputSymbols());
+ SetOutputSymbols(fst.OutputSymbols());
+ }
+
+ DeterminizeFstImplBase(const DeterminizeFstImplBase<A> &impl)
+ : CacheImpl<A>(impl),
+ fst_(impl.fst_->Copy(true)) {
+ SetType("determinize");
+ SetProperties(impl.Properties(), kCopyProperties);
+ SetInputSymbols(impl.InputSymbols());
+ SetOutputSymbols(impl.OutputSymbols());
+ }
+
+ virtual ~DeterminizeFstImplBase() { delete fst_; }
+
+ virtual DeterminizeFstImplBase<A> *Copy() = 0;
+
+ StateId Start() {
+ if (!HasStart()) {
+ StateId start = ComputeStart();
+ if (start != kNoStateId) {
+ SetStart(start);
+ }
+ }
+ return CacheImpl<A>::Start();
+ }
+
+ Weight Final(StateId s) {
+ if (!HasFinal(s)) {
+ Weight final = ComputeFinal(s);
+ SetFinal(s, final);
+ }
+ return CacheImpl<A>::Final(s);
+ }
+
+ virtual void Expand(StateId s) = 0;
+
+ size_t NumArcs(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumArcs(s);
+ }
+
+ size_t NumInputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumInputEpsilons(s);
+ }
+
+ size_t NumOutputEpsilons(StateId s) {
+ if (!HasArcs(s))
+ Expand(s);
+ return CacheImpl<A>::NumOutputEpsilons(s);
+ }
+
+ void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
+ if (!HasArcs(s))
+ Expand(s);
+ CacheImpl<A>::InitArcIterator(s, data);
+ }
+
+ virtual StateId ComputeStart() = 0;
+
+ virtual Weight ComputeFinal(StateId s) = 0;
+
+ const Fst<A> &GetFst() const { return *fst_; }
+
+ private:
+ const Fst<A> *fst_; // Input Fst
+
+ void operator=(const DeterminizeFstImplBase<A> &); // disallow
+};
+
+
+// Implementation of delayed determinization for weighted acceptors.
+// It is templated on the arc type A and the common divisor D.
+template <class A, class D, class F, class T>
+class DeterminizeFsaImpl : public DeterminizeFstImplBase<A> {
+ public:
+ using FstImpl<A>::SetProperties;
+ using DeterminizeFstImplBase<A>::GetFst;
+ using DeterminizeFstImplBase<A>::SetArcs;
+
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef DeterminizeElement<A> Element;
+ typedef slist<Element> Subset;
+ typedef typename F::LabelMap LabelMap;
+
+ DeterminizeFsaImpl(const Fst<A> &fst,
+ const vector<Weight> *in_dist, vector<Weight> *out_dist,
+ const DeterminizeFstOptions<A, D, F, T> &opts)
+ : DeterminizeFstImplBase<A>(fst, opts),
+ delta_(opts.delta),
+ in_dist_(in_dist),
+ out_dist_(out_dist),
+ filter_(opts.filter ? opts.filter : new F()),
+ state_table_(opts.state_table ? opts.state_table : new T()) {
+ if (!fst.Properties(kAcceptor, true)) {
+ FSTERROR() << "DeterminizeFst: argument not an acceptor";
+ SetProperties(kError, kError);
+ }
+ if (!(Weight::Properties() & kLeftSemiring)) {
+ FSTERROR() << "DeterminizeFst: Weight needs to be left distributive: "
+ << Weight::Type();
+ SetProperties(kError, kError);
+ }
+ if (out_dist_)
+ out_dist_->clear();
+ }
+
+ DeterminizeFsaImpl(const DeterminizeFsaImpl<A, D, F, T> &impl)
+ : DeterminizeFstImplBase<A>(impl),
+ delta_(impl.delta_),
+ in_dist_(0),
+ out_dist_(0),
+ filter_(new F(*impl.filter_)),
+ state_table_(new T(*impl.state_table_)) {
+ if (impl.out_dist_) {
+ FSTERROR() << "DeterminizeFsaImpl: cannot copy with out_dist vector";
+ SetProperties(kError, kError);
+ }
+ }
+
+ virtual ~DeterminizeFsaImpl() {
+ delete filter_;
+ delete state_table_;
+ }
+
+ virtual DeterminizeFsaImpl<A, D, F, T> *Copy() {
+ return new DeterminizeFsaImpl<A, D, F, T>(*this);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && (GetFst().Properties(kError, false)))
+ SetProperties(kError, kError);
+ return FstImpl<A>::Properties(mask);
+ }
+
+ virtual StateId ComputeStart() {
+ StateId s = GetFst().Start();
+ if (s == kNoStateId)
+ return kNoStateId;
+ Element element(s, Weight::One());
+ Subset *subset = new Subset;
+ subset->push_front(element);
+ return FindState(subset);
+ }
+
+ virtual Weight ComputeFinal(StateId s) {
+ const Subset *subset = state_table_->FindSubset(s);
+ Weight final = Weight::Zero();
+ for (typename Subset::const_iterator siter = subset->begin();
+ siter != subset->end();
+ ++siter) {
+ const Element &element = *siter;
+ final = Plus(final, Times(element.weight,
+ GetFst().Final(element.state_id)));
+ if (!final.Member())
+ SetProperties(kError, kError);
+ }
+ return final;
+ }
+
+ StateId FindState(Subset *subset) {
+ StateId s = state_table_->FindState(subset);
+ if (in_dist_ && out_dist_->size() <= s)
+ out_dist_->push_back(ComputeDistance(subset));
+ return s;
+ }
+
+ // Compute distance from a state to the final states in the DFA
+ // given the distances in the NFA.
+ Weight ComputeDistance(const Subset *subset) {
+ Weight outd = Weight::Zero();
+ for (typename Subset::const_iterator siter = subset->begin();
+ siter != subset->end(); ++siter) {
+ const Element &element = *siter;
+ Weight ind = element.state_id < in_dist_->size() ?
+ (*in_dist_)[element.state_id] : Weight::Zero();
+ outd = Plus(outd, Times(element.weight, ind));
+ }
+ return outd;
+ }
+
+ // Computes the outgoing transitions from a state, creating new destination
+ // states as needed.
+ virtual void Expand(StateId s) {
+
+ LabelMap label_map;
+ LabelSubsets(s, &label_map);
+
+ for (typename LabelMap::iterator liter = label_map.begin();
+ liter != label_map.end();
+ ++liter)
+ AddArc(s, liter->first, liter->second);
+ SetArcs(s);
+ }
+
+ private:
+ // Constructs destination subsets per label. At return, subset
+ // element weights include the input automaton label weights and the
+ // subsets may contain duplicate states.
+ void LabelSubsets(StateId s, LabelMap *label_map) {
+ const Subset *src_subset = state_table_->FindSubset(s);
+
+ for (typename Subset::const_iterator siter = src_subset->begin();
+ siter != src_subset->end();
+ ++siter) {
+ const Element &src_element = *siter;
+ for (ArcIterator< Fst<A> > aiter(GetFst(), src_element.state_id);
+ !aiter.Done();
+ aiter.Next()) {
+ const A &arc = aiter.Value();
+ Element dest_element(arc.nextstate,
+ Times(src_element.weight, arc.weight));
+
+ // The LabelMap may be a e.g. multimap with more complex
+ // determinization filters, so we insert efficiently w/o using [].
+ typename LabelMap::iterator liter = label_map->lower_bound(arc.ilabel);
+ Subset* dest_subset;
+ if (liter == label_map->end() || liter->first != arc.ilabel) {
+ dest_subset = new Subset;
+ label_map->insert(liter, make_pair(arc.ilabel, dest_subset));
+ } else {
+ dest_subset = liter->second;
+ }
+
+ dest_subset->push_front(dest_element);
+ }
+ }
+ // Applies the determinization filter
+ (*filter_)(s, label_map);
+ }
+
+ // Adds an arc from state S to the destination state associated
+ // with subset DEST_SUBSET (as created by LabelSubsets).
+ void AddArc(StateId s, Label label, Subset *dest_subset) {
+ A arc;
+ arc.ilabel = label;
+ arc.olabel = label;
+ arc.weight = Weight::Zero();
+
+ typename Subset::iterator oiter;
+ for (typename Subset::iterator diter = dest_subset->begin();
+ diter != dest_subset->end();) {
+ Element &dest_element = *diter;
+ // Computes label weight.
+ arc.weight = common_divisor_(arc.weight, dest_element.weight);
+
+ while (elements_.size() <= dest_element.state_id)
+ elements_.push_back(0);
+ Element *matching_element = elements_[dest_element.state_id];
+ if (matching_element) {
+ // Found duplicate state: sums state weight and deletes dup.
+ matching_element->weight = Plus(matching_element->weight,
+ dest_element.weight);
+ if (!matching_element->weight.Member())
+ SetProperties(kError, kError);
+ ++diter;
+ dest_subset->erase_after(oiter);
+ } else {
+ // Saves element so we can check for duplicate for this state.
+ elements_[dest_element.state_id] = &dest_element;
+ oiter = diter;
+ ++diter;
+ }
+ }
+
+ // Divides out label weight from destination subset elements.
+ // Quantizes to ensure comparisons are effective.
+ // Clears element vector.
+ for (typename Subset::iterator diter = dest_subset->begin();
+ diter != dest_subset->end();
+ ++diter) {
+ Element &dest_element = *diter;
+ dest_element.weight = Divide(dest_element.weight, arc.weight,
+ DIVIDE_LEFT);
+ dest_element.weight = dest_element.weight.Quantize(delta_);
+ elements_[dest_element.state_id] = 0;
+ }
+
+ arc.nextstate = FindState(dest_subset);
+ CacheImpl<A>::PushArc(s, arc);
+ }
+
+ float delta_; // Quantization delta for subset weights
+ const vector<Weight> *in_dist_; // Distance to final NFA states
+ vector<Weight> *out_dist_; // Distance to final DFA states
+
+ D common_divisor_;
+ F *filter_;
+ T *state_table_;
+
+ vector<Element *> elements_;
+
+ void operator=(const DeterminizeFsaImpl<A, D, F, T> &); // disallow
+};
+
+
+// Implementation of delayed determinization for transducers.
+// Transducer determinization is implemented by mapping the input to
+// the Gallic semiring as an acceptor whose weights contain the output
+// strings and using acceptor determinization above to determinize
+// that acceptor.
+template <class A, StringType S, class D, class F, class T>
+class DeterminizeFstImpl : public DeterminizeFstImplBase<A> {
+ public:
+ using FstImpl<A>::SetProperties;
+ using DeterminizeFstImplBase<A>::GetFst;
+ using CacheBaseImpl< CacheState<A> >::GetCacheGc;
+ using CacheBaseImpl< CacheState<A> >::GetCacheLimit;
+
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+
+ typedef ToGallicMapper<A, S> ToMapper;
+ typedef FromGallicMapper<A, S> FromMapper;
+
+ typedef typename ToMapper::ToArc ToArc;
+ typedef ArcMapFst<A, ToArc, ToMapper> ToFst;
+ typedef ArcMapFst<ToArc, A, FromMapper> FromFst;
+
+ typedef GallicCommonDivisor<Label, Weight, S, D> CommonDivisor;
+ typedef GallicFactor<Label, Weight, S> FactorIterator;
+
+ DeterminizeFstImpl(const Fst<A> &fst,
+ const DeterminizeFstOptions<A, D, F, T> &opts)
+ : DeterminizeFstImplBase<A>(fst, opts),
+ delta_(opts.delta),
+ subsequential_label_(opts.subsequential_label) {
+ Init(GetFst());
+ }
+
+ DeterminizeFstImpl(const DeterminizeFstImpl<A, S, D, F, T> &impl)
+ : DeterminizeFstImplBase<A>(impl),
+ delta_(impl.delta_),
+ subsequential_label_(impl.subsequential_label_) {
+ Init(GetFst());
+ }
+
+ ~DeterminizeFstImpl() { delete from_fst_; }
+
+ virtual DeterminizeFstImpl<A, S, D, F, T> *Copy() {
+ return new DeterminizeFstImpl<A, S, D, F, T>(*this);
+ }
+
+ uint64 Properties() const { return Properties(kFstProperties); }
+
+ // Set error if found; return FST impl properties.
+ uint64 Properties(uint64 mask) const {
+ if ((mask & kError) && (GetFst().Properties(kError, false) ||
+ from_fst_->Properties(kError, false)))
+ SetProperties(kError, kError);
+ return FstImpl<A>::Properties(mask);
+ }
+
+ virtual StateId ComputeStart() { return from_fst_->Start(); }
+
+ virtual Weight ComputeFinal(StateId s) { return from_fst_->Final(s); }
+
+ virtual void Expand(StateId s) {
+ for (ArcIterator<FromFst> aiter(*from_fst_, s);
+ !aiter.Done();
+ aiter.Next())
+ CacheImpl<A>::PushArc(s, aiter.Value());
+ CacheImpl<A>::SetArcs(s);
+ }
+
+ private:
+ // Initialization of transducer determinization implementation, which
+ // is defined after DeterminizeFst since it calls it.
+ void Init(const Fst<A> &fst);
+
+ float delta_;
+ Label subsequential_label_;
+ FromFst *from_fst_;
+
+ void operator=(const DeterminizeFstImpl<A, S, D, F, T> &); // disallow
+};
+
+
+// Determinizes a weighted transducer. This version is a delayed
+// Fst. The result will be an equivalent FST that has the property
+// that no state has two transitions with the same input label.
+// For this algorithm, epsilon transitions are treated as regular
+// symbols (cf. RmEpsilon).
+//
+// The transducer must be functional. The weights must be (weakly)
+// left divisible (valid for TropicalWeight and LogWeight for instance)
+// and be zero-sum-free if for all a,b: (Plus(a, b) = 0 => a = b = 0.
+//
+// Complexity:
+// - Determinizable: exponential (polynomial in the size of the output)
+// - Non-determinizable) does not terminate
+//
+// The determinizable automata include all unweighted and all acyclic input.
+//
+// References:
+// - Mehryar Mohri, "Finite-State Transducers in Language and Speech
+// Processing". Computational Linguistics, 23:2, 1997.
+//
+// This class attaches interface to implementation and handles
+// reference counting, delegating most methods to ImplToFst.
+template <class A>
+class DeterminizeFst : public ImplToFst< DeterminizeFstImplBase<A> > {
+ public:
+ friend class ArcIterator< DeterminizeFst<A> >;
+ friend class StateIterator< DeterminizeFst<A> >;
+ template <class B, StringType S, class D, class F, class T>
+ friend class DeterminizeFstImpl;
+
+ typedef A Arc;
+ typedef typename A::Weight Weight;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef CacheState<A> State;
+ typedef DeterminizeFstImplBase<A> Impl;
+
+ using ImplToFst<Impl>::SetImpl;
+
+ explicit DeterminizeFst(const Fst<A> &fst) {
+ typedef DefaultCommonDivisor<Weight> D;
+ typedef IdentityDeterminizeFilter<A> F;
+ typedef DefaultDeterminizeStateTable<A> T;
+ DeterminizeFstOptions<A, D, F, T> opts;
+ if (fst.Properties(kAcceptor, true)) {
+ // Calls implementation for acceptors.
+ SetImpl(new DeterminizeFsaImpl<A, D, F, T>(fst, 0, 0, opts));
+ } else {
+ // Calls implementation for transducers.
+ SetImpl(new
+ DeterminizeFstImpl<A, STRING_LEFT_RESTRICT, D, F, T>(fst, opts));
+ }
+ }
+
+ template <class D, class F, class T>
+ DeterminizeFst(const Fst<A> &fst,
+ const DeterminizeFstOptions<A, D, F, T> &opts) {
+ if (fst.Properties(kAcceptor, true)) {
+ // Calls implementation for acceptors.
+ SetImpl(new DeterminizeFsaImpl<A, D, F, T>(fst, 0, 0, opts));
+ } else {
+ // Calls implementation for transducers.
+ SetImpl(new
+ DeterminizeFstImpl<A, STRING_LEFT_RESTRICT, D, F, T>(fst, opts));
+ }
+ }
+
+ // This acceptor-only version additionally computes the distance to
+ // final states in the output if provided with those distances for the
+ // input. Useful for e.g. unique N-shortest paths.
+ template <class D, class F, class T>
+ DeterminizeFst(const Fst<A> &fst,
+ const vector<Weight> *in_dist, vector<Weight> *out_dist,
+ const DeterminizeFstOptions<A, D, F, T> &opts) {
+ if (!fst.Properties(kAcceptor, true)) {
+ FSTERROR() << "DeterminizeFst:"
+ << " distance to final states computed for acceptors only";
+ GetImpl()->SetProperties(kError, kError);
+ }
+ SetImpl(new DeterminizeFsaImpl<A, D, F, T>(fst, in_dist, out_dist, opts));
+ }
+
+ // See Fst<>::Copy() for doc.
+ DeterminizeFst(const DeterminizeFst<A> &fst, bool safe = false) {
+ if (safe)
+ SetImpl(fst.GetImpl()->Copy());
+ else
+ SetImpl(fst.GetImpl(), false);
+ }
+
+ // Get a copy of this DeterminizeFst. See Fst<>::Copy() for further doc.
+ virtual DeterminizeFst<A> *Copy(bool safe = false) const {
+ return new DeterminizeFst<A>(*this, safe);
+ }
+
+ virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
+
+ virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
+ GetImpl()->InitArcIterator(s, data);
+ }
+
+ private:
+ // Makes visible to friends.
+ Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
+
+ void operator=(const DeterminizeFst<A> &fst); // Disallow
+};
+
+
+// Initialization of transducer determinization implementation. which
+// is defined after DeterminizeFst since it calls it.
+template <class A, StringType S, class D, class F, class T>
+void DeterminizeFstImpl<A, S, D, F, T>::Init(const Fst<A> &fst) {
+ // Mapper to an acceptor.
+ ToFst to_fst(fst, ToMapper());
+
+ // Determinizes acceptor.
+ // This recursive call terminates since it passes the common divisor
+ // to a private constructor.
+ CacheOptions copts(GetCacheGc(), GetCacheLimit());
+ DeterminizeFstOptions<ToArc, CommonDivisor> dopts(copts, delta_);
+ // Uses acceptor-only constructor to avoid template recursion
+ DeterminizeFst<ToArc> det_fsa(to_fst, 0, 0, dopts);
+
+ // Mapper back to transducer.
+ FactorWeightOptions<ToArc> fopts(CacheOptions(true, 0), delta_,
+ kFactorFinalWeights,
+ subsequential_label_,
+ subsequential_label_);
+ FactorWeightFst<ToArc, FactorIterator> factored_fst(det_fsa, fopts);
+ from_fst_ = new FromFst(factored_fst, FromMapper(subsequential_label_));
+}
+
+
+// Specialization for DeterminizeFst.
+template <class A>
+class StateIterator< DeterminizeFst<A> >
+ : public CacheStateIterator< DeterminizeFst<A> > {
+ public:
+ explicit StateIterator(const DeterminizeFst<A> &fst)
+ : CacheStateIterator< DeterminizeFst<A> >(fst, fst.GetImpl()) {}
+};
+
+
+// Specialization for DeterminizeFst.
+template <class A>
+class ArcIterator< DeterminizeFst<A> >
+ : public CacheArcIterator< DeterminizeFst<A> > {
+ public:
+ typedef typename A::StateId StateId;
+
+ ArcIterator(const DeterminizeFst<A> &fst, StateId s)
+ : CacheArcIterator< DeterminizeFst<A> >(fst.GetImpl(), s) {
+ if (!fst.GetImpl()->HasArcs(s))
+ fst.GetImpl()->Expand(s);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ArcIterator);
+};
+
+
+template <class A> inline
+void DeterminizeFst<A>::InitStateIterator(StateIteratorData<A> *data) const
+{
+ data->base = new StateIterator< DeterminizeFst<A> >(*this);
+}
+
+
+// Useful aliases when using StdArc.
+typedef DeterminizeFst<StdArc> StdDeterminizeFst;
+
+
+template <class Arc>
+struct DeterminizeOptions {
+ typedef typename Arc::StateId StateId;
+ typedef typename Arc::Weight Weight;
+ typedef typename Arc::Label Label;
+
+ float delta; // Quantization delta for subset weights.
+ Weight weight_threshold; // Pruning weight threshold.
+ StateId state_threshold; // Pruning state threshold.
+ Label subsequential_label; // Label used for residual final output
+ // when producing subsequential transducers.
+
+ explicit DeterminizeOptions(float d = kDelta, Weight w = Weight::Zero(),
+ StateId n = kNoSta