diff options
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/equivalent.h')
-rw-r--r-- | kaldi_io/src/tools/openfst/include/fst/equivalent.h | 275 |
1 files changed, 275 insertions, 0 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/equivalent.h b/kaldi_io/src/tools/openfst/include/fst/equivalent.h new file mode 100644 index 0000000..e28fea1 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/equivalent.h @@ -0,0 +1,275 @@ +// equivalent.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Wojciech Skut) +// +// \file Functions and classes to determine the equivalence of two +// FSTs. + +#ifndef FST_LIB_EQUIVALENT_H__ +#define FST_LIB_EQUIVALENT_H__ + +#include <algorithm> +#include <deque> +using std::deque; +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include <utility> +using std::pair; using std::make_pair; +#include <vector> +using std::vector; + +#include <fst/encode.h> +#include <fst/push.h> +#include <fst/union-find.h> +#include <fst/vector-fst.h> + + +namespace fst { + +// Traits-like struct holding utility functions/typedefs/constants for +// the equivalence algorithm. +// +// Encoding device: in order to make the statesets of the two acceptors +// disjoint, we map Arc::StateId on the type MappedId. The states of +// the first acceptor are mapped on odd numbers (s -> 2s + 1), and +// those of the second one on even numbers (s -> 2s + 2). The number 0 +// is reserved for an implicit (non-final) 'dead state' (required for +// the correct treatment of non-coaccessible states; kNoStateId is +// mapped to kDeadState for both acceptors). The union-find algorithm +// operates on the mapped IDs. +template <class Arc> +struct EquivalenceUtil { + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + typedef StateId MappedId; // ID for an equivalence class. + + // MappedId for an implicit dead state. + static const MappedId kDeadState = 0; + + // MappedId for lookup failure. + static const MappedId kInvalidId = -1; + + // Maps state ID to the representative of the corresponding + // equivalence class. The parameter 'which_fst' takes the values 1 + // and 2, identifying the input FST. + static MappedId MapState(StateId s, int32 which_fst) { + return + (kNoStateId == s) + ? + kDeadState + : + (static_cast<MappedId>(s) << 1) + which_fst; + } + // Maps set ID to State ID. + static StateId UnMapState(MappedId id) { + return static_cast<StateId>((--id) >> 1); + } + // Convenience function: checks if state with MappedId 's' is final + // in acceptor 'fa'. + static bool IsFinal(const Fst<Arc> &fa, MappedId s) { + return + (kDeadState == s) ? + false : (fa.Final(UnMapState(s)) != Weight::Zero()); + } + // Convenience function: returns the representative of 'id' in 'sets', + // creating a new set if needed. + static MappedId FindSet(UnionFind<MappedId> *sets, MappedId id) { + MappedId repr = sets->FindSet(id); + if (repr != kInvalidId) { + return repr; + } else { + sets->MakeSet(id); + return id; + } + } +}; + +template <class Arc> const +typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kDeadState; + +template <class Arc> const +typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kInvalidId; + + +// Equivalence checking algorithm: determines if the two FSTs +// <code>fst1</code> and <code>fst2</code> are equivalent. The input +// FSTs must be deterministic input-side epsilon-free acceptors, +// unweighted or with weights over a left semiring. Two acceptors are +// considered equivalent if they accept exactly the same set of +// strings (with the same weights). +// +// The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and +// Analysis of Computer Programs") successively constructs sets of +// states that can be reached by the same prefixes, starting with a +// set containing the start states of both acceptors. A disjoint tree +// forest (the union-find algorithm) is used to represent the sets of +// states. The algorithm returns 'false' if one of the constructed +// sets contains both final and non-final states. Returns optional error +// value (when FLAGS_error_fatal = false). +// +// Complexity: quasi-linear, i.e. O(n G(n)), where +// n = |S1| + |S2| is the number of states in both acceptors +// G(n) is a very slowly growing function that can be approximated +// by 4 by all practical purposes. +// +template <class Arc> +bool Equivalent(const Fst<Arc> &fst1, + const Fst<Arc> &fst2, + double delta = kDelta, bool *error = 0) { + typedef typename Arc::Weight Weight; + if (error) *error = false; + + // Check that the symbol table are compatible + if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) || + !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) { + FSTERROR() << "Equivalent: input/output symbol tables of 1st argument " + << "do not match input/output symbol tables of 2nd argument"; + if (error) *error = true; + return false; + } + // Check properties first: + uint64 props = kNoEpsilons | kIDeterministic | kAcceptor; + if (fst1.Properties(props, true) != props) { + FSTERROR() << "Equivalent: first argument not an" + << " epsilon-free deterministic acceptor"; + if (error) *error = true; + return false; + } + if (fst2.Properties(props, true) != props) { + FSTERROR() << "Equivalent: second argument not an" + << " epsilon-free deterministic acceptor"; + if (error) *error = true; + return false; + } + + if ((fst1.Properties(kUnweighted , true) != kUnweighted) + || (fst2.Properties(kUnweighted , true) != kUnweighted)) { + VectorFst<Arc> efst1(fst1); + VectorFst<Arc> efst2(fst2); + Push(&efst1, REWEIGHT_TO_INITIAL, delta); + Push(&efst2, REWEIGHT_TO_INITIAL, delta); + ArcMap(&efst1, QuantizeMapper<Arc>(delta)); + ArcMap(&efst2, QuantizeMapper<Arc>(delta)); + EncodeMapper<Arc> mapper(kEncodeWeights|kEncodeLabels, ENCODE); + ArcMap(&efst1, &mapper); + ArcMap(&efst2, &mapper); + return Equivalent(efst1, efst2); + } + + // Convenience typedefs: + typedef typename Arc::StateId StateId; + typedef EquivalenceUtil<Arc> Util; + typedef typename Util::MappedId MappedId; + enum { FST1 = 1, FST2 = 2 }; // Required by Util::MapState(...) + + MappedId s1 = Util::MapState(fst1.Start(), FST1); + MappedId s2 = Util::MapState(fst2.Start(), FST2); + + // The union-find structure. + UnionFind<MappedId> eq_classes(1000, Util::kInvalidId); + + // Initialize the union-find structure. + eq_classes.MakeSet(s1); + eq_classes.MakeSet(s2); + + // Data structure for the (partial) acceptor transition function of + // fst1 and fst2: input labels mapped to pairs of MappedId's + // representing destination states of the corresponding arcs in fst1 + // and fst2, respectively. + typedef + unordered_map<typename Arc::Label, pair<MappedId, MappedId> > + Label2StatePairMap; + + Label2StatePairMap arc_pairs; + + // Pairs of MappedId's to be processed, organized in a queue. + deque<pair<MappedId, MappedId> > q; + + bool ret = true; + // Early return if the start states differ w.r.t. being final. + if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) { + ret = false; + } + + // Main loop: explores the two acceptors in a breadth-first manner, + // updating the equivalence relation on the statesets. Loop + // invariant: each block of states contains either final states only + // or non-final states only. + for (q.push_back(make_pair(s1, s2)); ret && !q.empty(); q.pop_front()) { + s1 = q.front().first; + s2 = q.front().second; + + // Representatives of the equivalence classes of s1/s2. + MappedId rep1 = Util::FindSet(&eq_classes, s1); + MappedId rep2 = Util::FindSet(&eq_classes, s2); + + if (rep1 != rep2) { + eq_classes.Union(rep1, rep2); + arc_pairs.clear(); + + // Copy outgoing arcs starting at s1 into the hashtable. + if (Util::kDeadState != s1) { + ArcIterator<Fst<Arc> > arc_iter(fst1, Util::UnMapState(s1)); + for (; !arc_iter.Done(); arc_iter.Next()) { + const Arc &arc = arc_iter.Value(); + if (arc.weight != Weight::Zero()) { // Zero-weight arcs + // are treated as + // non-exisitent. + arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1); + } + } + } + // Copy outgoing arcs starting at s2 into the hashtable. + if (Util::kDeadState != s2) { + ArcIterator<Fst<Arc> > arc_iter(fst2, Util::UnMapState(s2)); + for (; !arc_iter.Done(); arc_iter.Next()) { + const Arc &arc = arc_iter.Value(); + if (arc.weight != Weight::Zero()) { // Zero-weight arcs + // are treated as + // non-existent. + arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2); + } + } + } + // Iterate through the hashtable and process pairs of target + // states. + for (typename Label2StatePairMap::const_iterator + arc_iter = arc_pairs.begin(); + arc_iter != arc_pairs.end(); + ++arc_iter) { + const pair<MappedId, MappedId> &p = arc_iter->second; + if (Util::IsFinal(fst1, p.first) != Util::IsFinal(fst2, p.second)) { + // Detected inconsistency: return false. + ret = false; + break; + } + q.push_back(p); + } + } + } + + if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) { + if (error) *error = true; + return false; + } + + return ret; +} + +} // namespace fst + +#endif // FST_LIB_EQUIVALENT_H__ |