summaryrefslogtreecommitdiff
path: root/kaldi_io/src/tools/openfst/include/fst/synchronize.h
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/synchronize.h')
-rw-r--r--kaldi_io/src/tools/openfst/include/fst/synchronize.h457
1 files changed, 0 insertions, 457 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/synchronize.h b/kaldi_io/src/tools/openfst/include/fst/synchronize.h
deleted file mode 100644
index 9582926..0000000
--- a/kaldi_io/src/tools/openfst/include/fst/synchronize.h
+++ /dev/null
@@ -1,457 +0,0 @@
-// synchronize.h
-
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Copyright 2005-2010 Google, Inc.
-// Author: [email protected] (Cyril Allauzen)
-//
-// \file
-// Synchronize an FST with bounded delay.
-
-#ifndef FST_LIB_SYNCHRONIZE_H__
-#define FST_LIB_SYNCHRONIZE_H__
-
-#include <algorithm>
-#include <tr1/unordered_map>
-using std::tr1::unordered_map;
-using std::tr1::unordered_multimap;
-#include <tr1/unordered_set>
-using std::tr1::unordered_set;
-using std::tr1::unordered_multiset;
-#include <string>
-#include <utility>
-using std::pair; using std::make_pair;
-#include <vector>
-using std::vector;
-
-#include <fst/cache.h>
-#include <fst/test-properties.h>
-
-
-namespace fst {
-
-typedef CacheOptions SynchronizeFstOptions;
-
-
-// Implementation class for SynchronizeFst
-template <class A>
-class SynchronizeFstImpl
- : public CacheImpl<A> {
- public:
- using FstImpl<A>::SetType;
- using FstImpl<A>::SetProperties;
- using FstImpl<A>::SetInputSymbols;
- using FstImpl<A>::SetOutputSymbols;
-
- using CacheBaseImpl< CacheState<A> >::PushArc;
- using CacheBaseImpl< CacheState<A> >::HasArcs;
- using CacheBaseImpl< CacheState<A> >::HasFinal;
- using CacheBaseImpl< CacheState<A> >::HasStart;
- using CacheBaseImpl< CacheState<A> >::SetArcs;
- using CacheBaseImpl< CacheState<A> >::SetFinal;
- using CacheBaseImpl< CacheState<A> >::SetStart;
-
- typedef A Arc;
- typedef typename A::Label Label;
- typedef typename A::Weight Weight;
- typedef typename A::StateId StateId;
-
- typedef basic_string<Label> String;
-
- struct Element {
- Element() {}
-
- Element(StateId s, const String *i, const String *o)
- : state(s), istring(i), ostring(o) {}
-
- StateId state; // Input state Id
- const String *istring; // Residual input labels
- const String *ostring; // Residual output labels
- // Residual strings are represented by const pointers to
- // basic_string<Label> and are stored in a hash_set. The pointed
- // memory is owned by the hash_set string_set_.
- };
-
- SynchronizeFstImpl(const Fst<A> &fst, const SynchronizeFstOptions &opts)
- : CacheImpl<A>(opts), fst_(fst.Copy()) {
- SetType("synchronize");
- uint64 props = fst.Properties(kFstProperties, false);
- SetProperties(SynchronizeProperties(props), kCopyProperties);
-
- SetInputSymbols(fst.InputSymbols());
- SetOutputSymbols(fst.OutputSymbols());
- }
-
- SynchronizeFstImpl(const SynchronizeFstImpl &impl)
- : CacheImpl<A>(impl),
- fst_(impl.fst_->Copy(true)) {
- SetType("synchronize");
- SetProperties(impl.Properties(), kCopyProperties);
- SetInputSymbols(impl.InputSymbols());
- SetOutputSymbols(impl.OutputSymbols());
- }
-
- ~SynchronizeFstImpl() {
- delete fst_;
- // Extract pointers from the hash set
- vector<const String*> strings;
- typename StringSet::iterator it = string_set_.begin();
- for (; it != string_set_.end(); ++it)
- strings.push_back(*it);
- // Free the extracted pointers
- for (size_t i = 0; i < strings.size(); ++i)
- delete strings[i];
- }
-
- StateId Start() {
- if (!HasStart()) {
- StateId s = fst_->Start();
- if (s == kNoStateId)
- return kNoStateId;
- const String *empty = FindString(new String());
- StateId start = FindState(Element(fst_->Start(), empty, empty));
- SetStart(start);
- }
- return CacheImpl<A>::Start();
- }
-
- Weight Final(StateId s) {
- if (!HasFinal(s)) {
- const Element &e = elements_[s];
- Weight w = e.state == kNoStateId ? Weight::One() : fst_->Final(e.state);
- if ((w != Weight::Zero()) && (e.istring)->empty() && (e.ostring)->empty())
- SetFinal(s, w);
- else
- SetFinal(s, Weight::Zero());
- }
- return CacheImpl<A>::Final(s);
- }
-
- size_t NumArcs(StateId s) {
- if (!HasArcs(s))
- Expand(s);
- return CacheImpl<A>::NumArcs(s);
- }
-
- size_t NumInputEpsilons(StateId s) {
- if (!HasArcs(s))
- Expand(s);
- return CacheImpl<A>::NumInputEpsilons(s);
- }
-
- size_t NumOutputEpsilons(StateId s) {
- if (!HasArcs(s))
- Expand(s);
- return CacheImpl<A>::NumOutputEpsilons(s);
- }
-
- uint64 Properties() const { return Properties(kFstProperties); }
-
- // Set error if found; return FST impl properties.
- uint64 Properties(uint64 mask) const {
- if ((mask & kError) && fst_->Properties(kError, false))
- SetProperties(kError, kError);
- return FstImpl<Arc>::Properties(mask);
- }
-
- void InitArcIterator(StateId s, ArcIteratorData<A> *data) {
- if (!HasArcs(s))
- Expand(s);
- CacheImpl<A>::InitArcIterator(s, data);
- }
-
- // Returns the first character of the string obtained by
- // concatenating s and l.
- Label Car(const String *s, Label l = 0) const {
- if (!s->empty())
- return (*s)[0];
- else
- return l;
- }
-
- // Computes the residual string obtained by removing the first
- // character in the concatenation of s and l.
- const String *Cdr(const String *s, Label l = 0) {
- String *r = new String();
- for (int i = 1; i < s->size(); ++i)
- r->push_back((*s)[i]);
- if (l && !(s->empty())) r->push_back(l);
- return FindString(r);
- }
-
- // Computes the concatenation of s and l.
- const String *Concat(const String *s, Label l = 0) {
- String *r = new String();
- for (int i = 0; i < s->size(); ++i)
- r->push_back((*s)[i]);
- if (l) r->push_back(l);
- return FindString(r);
- }
-
- // Tests if the concatenation of s and l is empty
- bool Empty(const String *s, Label l = 0) const {
- if (s->empty())
- return l == 0;
- else
- return false;
- }
-
- // Finds the string pointed by s in the hash set. Transfers the
- // pointer ownership to the hash set.
- const String *FindString(const String *s) {
- typename StringSet::iterator it = string_set_.find(s);
- if (it != string_set_.end()) {
- delete s;
- return (*it);
- } else {
- string_set_.insert(s);
- return s;
- }
- }
-
- // Finds state corresponding to an element. Creates new state
- // if element not found.
- StateId FindState(const Element &e) {
- typename ElementMap::iterator eit = element_map_.find(e);
- if (eit != element_map_.end()) {
- return (*eit).second;
- } else {
- StateId s = elements_.size();
- elements_.push_back(e);
- element_map_.insert(pair<const Element, StateId>(e, s));
- return s;
- }
- }
-
-
- // Computes the outgoing transitions from a state, creating new destination
- // states as needed.
- void Expand(StateId s) {
- Element e = elements_[s];
-
- if (e.state != kNoStateId)
- for (ArcIterator< Fst<A> > ait(*fst_, e.state);
- !ait.Done();
- ait.Next()) {
- const A &arc = ait.Value();
- if (!Empty(e.istring, arc.ilabel) && !Empty(e.ostring, arc.olabel)) {
- const String *istring = Cdr(e.istring, arc.ilabel);
- const String *ostring = Cdr(e.ostring, arc.olabel);
- StateId d = FindState(Element(arc.nextstate, istring, ostring));
- PushArc(s, Arc(Car(e.istring, arc.ilabel),
- Car(e.ostring, arc.olabel), arc.weight, d));
- } else {
- const String *istring = Concat(e.istring, arc.ilabel);
- const String *ostring = Concat(e.ostring, arc.olabel);
- StateId d = FindState(Element(arc.nextstate, istring, ostring));
- PushArc(s, Arc(0 , 0, arc.weight, d));
- }
- }
-
- Weight w = e.state == kNoStateId ? Weight::One() : fst_->Final(e.state);
- if ((w != Weight::Zero()) &&
- ((e.istring)->size() + (e.ostring)->size() > 0)) {
- const String *istring = Cdr(e.istring);
- const String *ostring = Cdr(e.ostring);
- StateId d = FindState(Element(kNoStateId, istring, ostring));
- PushArc(s, Arc(Car(e.istring), Car(e.ostring), w, d));
- }
- SetArcs(s);
- }
-
- private:
- // Equality function for Elements, assume strings have been hashed.
- class ElementEqual {
- public:
- bool operator()(const Element &x, const Element &y) const {
- return x.state == y.state &&
- x.istring == y.istring &&
- x.ostring == y.ostring;
- }
- };
-
- // Hash function for Elements to Fst states.
- class ElementKey {
- public:
- size_t operator()(const Element &x) const {
- size_t key = x.state;
- key = (key << 1) ^ (x.istring)->size();
- for (size_t i = 0; i < (x.istring)->size(); ++i)
- key = (key << 1) ^ (*x.istring)[i];
- key = (key << 1) ^ (x.ostring)->size();
- for (size_t i = 0; i < (x.ostring)->size(); ++i)
- key = (key << 1) ^ (*x.ostring)[i];
- return key;
- }
- };
-
- // Equality function for strings
- class StringEqual {
- public:
- bool operator()(const String * const &x, const String * const &y) const {
- if (x->size() != y->size()) return false;
- for (size_t i = 0; i < x->size(); ++i)
- if ((*x)[i] != (*y)[i]) return false;
- return true;
- }
- };
-
- // Hash function for set of strings
- class StringKey{
- public:
- size_t operator()(const String * const & x) const {
- size_t key = x->size();
- for (size_t i = 0; i < x->size(); ++i)
- key = (key << 1) ^ (*x)[i];
- return key;
- }
- };
-
-
- typedef unordered_map<Element, StateId, ElementKey, ElementEqual> ElementMap;
- typedef unordered_set<const String*, StringKey, StringEqual> StringSet;
-
- const Fst<A> *fst_;
- vector<Element> elements_; // mapping Fst state to Elements
- ElementMap element_map_; // mapping Elements to Fst state
- StringSet string_set_;
-
- void operator=(const SynchronizeFstImpl<A> &); // disallow
-};
-
-
-// Synchronizes a transducer. This version is a delayed Fst. The
-// result will be an equivalent FST that has the property that during
-// the traversal of a path, the delay is either zero or strictly
-// increasing, where the delay is the difference between the number of
-// non-epsilon output labels and input labels along the path.
-//
-// For the algorithm to terminate, the input transducer must have
-// bounded delay, i.e., the delay of every cycle must be zero.
-//
-// Complexity:
-// - A has bounded delay: exponential
-// - A does not have bounded delay: does not terminate
-//
-// References:
-// - Mehryar Mohri. Edit-Distance of Weighted Automata: General
-// Definitions and Algorithms, International Journal of Computer
-// Science, 14(6): 957-982 (2003).
-//
-// This class attaches interface to implementation and handles
-// reference counting, delegating most methods to ImplToFst.
-template <class A>
-class SynchronizeFst : public ImplToFst< SynchronizeFstImpl<A> > {
- public:
- friend class ArcIterator< SynchronizeFst<A> >;
- friend class StateIterator< SynchronizeFst<A> >;
-
- typedef A Arc;
- typedef typename A::Weight Weight;
- typedef typename A::StateId StateId;
- typedef CacheState<A> State;
- typedef SynchronizeFstImpl<A> Impl;
-
- SynchronizeFst(const Fst<A> &fst)
- : ImplToFst<Impl>(new Impl(fst, SynchronizeFstOptions())) {}
-
- SynchronizeFst(const Fst<A> &fst, const SynchronizeFstOptions &opts)
- : ImplToFst<Impl>(new Impl(fst, opts)) {}
-
- // See Fst<>::Copy() for doc.
- SynchronizeFst(const SynchronizeFst<A> &fst, bool safe = false)
- : ImplToFst<Impl>(fst, safe) {}
-
- // Get a copy of this SynchronizeFst. See Fst<>::Copy() for further doc.
- virtual SynchronizeFst<A> *Copy(bool safe = false) const {
- return new SynchronizeFst<A>(*this, safe);
- }
-
- virtual inline void InitStateIterator(StateIteratorData<A> *data) const;
-
- virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const {
- GetImpl()->InitArcIterator(s, data);
- }
-
- private:
- // Makes visible to friends.
- Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); }
-
- void operator=(const SynchronizeFst<A> &fst); // Disallow
-};
-
-
-// Specialization for SynchronizeFst.
-template<class A>
-class StateIterator< SynchronizeFst<A> >
- : public CacheStateIterator< SynchronizeFst<A> > {
- public:
- explicit StateIterator(const SynchronizeFst<A> &fst)
- : CacheStateIterator< SynchronizeFst<A> >(fst, fst.GetImpl()) {}
-};
-
-
-// Specialization for SynchronizeFst.
-template <class A>
-class ArcIterator< SynchronizeFst<A> >
- : public CacheArcIterator< SynchronizeFst<A> > {
- public:
- typedef typename A::StateId StateId;
-
- ArcIterator(const SynchronizeFst<A> &fst, StateId s)
- : CacheArcIterator< SynchronizeFst<A> >(fst.GetImpl(), s) {
- if (!fst.GetImpl()->HasArcs(s))
- fst.GetImpl()->Expand(s);
- }
-
- private:
- DISALLOW_COPY_AND_ASSIGN(ArcIterator);
-};
-
-
-template <class A> inline
-void SynchronizeFst<A>::InitStateIterator(StateIteratorData<A> *data) const
-{
- data->base = new StateIterator< SynchronizeFst<A> >(*this);
-}
-
-
-
-// Synchronizes a transducer. This version writes the synchronized
-// result to a MutableFst. The result will be an equivalent FST that
-// has the property that during the traversal of a path, the delay is
-// either zero or strictly increasing, where the delay is the
-// difference between the number of non-epsilon output labels and
-// input labels along the path.
-//
-// For the algorithm to terminate, the input transducer must have
-// bounded delay, i.e., the delay of every cycle must be zero.
-//
-// Complexity:
-// - A has bounded delay: exponential
-// - A does not have bounded delay: does not terminate
-//
-// References:
-// - Mehryar Mohri. Edit-Distance of Weighted Automata: General
-// Definitions and Algorithms, International Journal of Computer
-// Science, 14(6): 957-982 (2003).
-template<class Arc>
-void Synchronize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst) {
- SynchronizeFstOptions opts;
- opts.gc_limit = 0; // Cache only the last state for fastest copy.
- *ofst = SynchronizeFst<Arc>(ifst, opts);
-}
-
-} // namespace fst
-
-#endif // FST_LIB_SYNCHRONIZE_H__