summaryrefslogtreecommitdiff
path: root/kaldi_io/src/tools/openfst/include/fst/minimize.h
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/minimize.h')
-rw-r--r--kaldi_io/src/tools/openfst/include/fst/minimize.h591
1 files changed, 0 insertions, 591 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/minimize.h b/kaldi_io/src/tools/openfst/include/fst/minimize.h
deleted file mode 100644
index 6e9dd3d..0000000
--- a/kaldi_io/src/tools/openfst/include/fst/minimize.h
+++ /dev/null
@@ -1,591 +0,0 @@
-// minimize.h
-// minimize.h
-
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// Copyright 2005-2010 Google, Inc.
-// Author: [email protected] (Johan Schalkwyk)
-//
-// \file Functions and classes to minimize a finite state acceptor
-//
-
-#ifndef FST_LIB_MINIMIZE_H__
-#define FST_LIB_MINIMIZE_H__
-
-#include <cmath>
-
-#include <algorithm>
-#include <map>
-#include <queue>
-#include <vector>
-using std::vector;
-
-#include <fst/arcsort.h>
-#include <fst/connect.h>
-#include <fst/dfs-visit.h>
-#include <fst/encode.h>
-#include <fst/factor-weight.h>
-#include <fst/fst.h>
-#include <fst/mutable-fst.h>
-#include <fst/partition.h>
-#include <fst/push.h>
-#include <fst/queue.h>
-#include <fst/reverse.h>
-#include <fst/state-map.h>
-
-
-namespace fst {
-
-// comparator for creating partition based on sorting on
-// - states
-// - final weight
-// - out degree,
-// - (input label, output label, weight, destination_block)
-template <class A>
-class StateComparator {
- public:
- typedef typename A::StateId StateId;
- typedef typename A::Weight Weight;
-
- static const uint32 kCompareFinal = 0x00000001;
- static const uint32 kCompareOutDegree = 0x00000002;
- static const uint32 kCompareArcs = 0x00000004;
- static const uint32 kCompareAll = 0x00000007;
-
- StateComparator(const Fst<A>& fst,
- const Partition<typename A::StateId>& partition,
- uint32 flags = kCompareAll)
- : fst_(fst), partition_(partition), flags_(flags) {}
-
- // compare state x with state y based on sort criteria
- bool operator()(const StateId x, const StateId y) const {
- // check for final state equivalence
- if (flags_ & kCompareFinal) {
- const size_t xfinal = fst_.Final(x).Hash();
- const size_t yfinal = fst_.Final(y).Hash();
- if (xfinal < yfinal) return true;
- else if (xfinal > yfinal) return false;
- }
-
- if (flags_ & kCompareOutDegree) {
- // check for # arcs
- if (fst_.NumArcs(x) < fst_.NumArcs(y)) return true;
- if (fst_.NumArcs(x) > fst_.NumArcs(y)) return false;
-
- if (flags_ & kCompareArcs) {
- // # arcs are equal, check for arc match
- for (ArcIterator<Fst<A> > aiter1(fst_, x), aiter2(fst_, y);
- !aiter1.Done() && !aiter2.Done(); aiter1.Next(), aiter2.Next()) {
- const A& arc1 = aiter1.Value();
- const A& arc2 = aiter2.Value();
- if (arc1.ilabel < arc2.ilabel) return true;
- if (arc1.ilabel > arc2.ilabel) return false;
-
- if (partition_.class_id(arc1.nextstate) <
- partition_.class_id(arc2.nextstate)) return true;
- if (partition_.class_id(arc1.nextstate) >
- partition_.class_id(arc2.nextstate)) return false;
- }
- }
- }
-
- return false;
- }
-
- private:
- const Fst<A>& fst_;
- const Partition<typename A::StateId>& partition_;
- const uint32 flags_;
-};
-
-template <class A> const uint32 StateComparator<A>::kCompareFinal;
-template <class A> const uint32 StateComparator<A>::kCompareOutDegree;
-template <class A> const uint32 StateComparator<A>::kCompareArcs;
-template <class A> const uint32 StateComparator<A>::kCompareAll;
-
-
-// Computes equivalence classes for cyclic Fsts. For cyclic minimization
-// we use the classic HopCroft minimization algorithm, which is of
-//
-// O(E)log(N),
-//
-// where E is the number of edges in the machine and N is number of states.
-//
-// The following paper describes the original algorithm
-// An N Log N algorithm for minimizing states in a finite automaton
-// by John HopCroft, January 1971
-//
-template <class A, class Queue>
-class CyclicMinimizer {
- public:
- typedef typename A::Label Label;
- typedef typename A::StateId StateId;
- typedef typename A::StateId ClassId;
- typedef typename A::Weight Weight;
- typedef ReverseArc<A> RevA;
-
- CyclicMinimizer(const ExpandedFst<A>& fst):
- // tell the Partition data-member to expect multiple repeated
- // calls to SplitOn with the same element if we are non-deterministic.
- P_(fst.Properties(kIDeterministic, true) == 0) {
- if(fst.Properties(kIDeterministic, true) == 0)
- CHECK(Weight::Properties() & kIdempotent); // this minimization
- // algorithm for non-deterministic FSTs can only work with idempotent
- // semirings.
- Initialize(fst);
- Compute(fst);
- }
-
- ~CyclicMinimizer() {
- delete aiter_queue_;
- }
-
- const Partition<StateId>& partition() const {
- return P_;
- }
-
- // helper classes
- private:
- typedef ArcIterator<Fst<RevA> > ArcIter;
- class ArcIterCompare {
- public:
- ArcIterCompare(const Partition<StateId>& partition)
- : partition_(partition) {}
-
- ArcIterCompare(const ArcIterCompare& comp)
- : partition_(comp.partition_) {}
-
- // compare two iterators based on there input labels, and proto state
- // (partition class Ids)
- bool operator()(const ArcIter* x, const ArcIter* y) const {
- const RevA& xarc = x->Value();
- const RevA& yarc = y->Value();
- return (xarc.ilabel > yarc.ilabel);
- }
-
- private:
- const Partition<StateId>& partition_;
- };
-
- typedef priority_queue<ArcIter*, vector<ArcIter*>, ArcIterCompare>
- ArcIterQueue;
-
- // helper methods
- private:
- // prepartitions the space into equivalence classes with
- // same final weight
- // same # arcs per state
- // same outgoing arcs
- void PrePartition(const Fst<A>& fst) {
- VLOG(5) << "PrePartition";
-
- typedef map<StateId, StateId, StateComparator<A> > EquivalenceMap;
- StateComparator<A> comp(fst, P_, StateComparator<A>::kCompareFinal);
- EquivalenceMap equiv_map(comp);
-
- StateIterator<Fst<A> > siter(fst);
- StateId class_id = P_.AddClass();
- P_.Add(siter.Value(), class_id);
- equiv_map[siter.Value()] = class_id;
- L_.Enqueue(class_id);
- for (siter.Next(); !siter.Done(); siter.Next()) {
- StateId s = siter.Value();
- typename EquivalenceMap::const_iterator it = equiv_map.find(s);
- if (it == equiv_map.end()) {
- class_id = P_.AddClass();
- P_.Add(s, class_id);
- equiv_map[s] = class_id;
- L_.Enqueue(class_id);
- } else {
- P_.Add(s, it->second);
- equiv_map[s] = it->second;
- }
- }
-
- VLOG(5) << "Initial Partition: " << P_.num_classes();
- }
-
- // - Create inverse transition Tr_ = rev(fst)
- // - loop over states in fst and split on final, creating two blocks
- // in the partition corresponding to final, non-final
- void Initialize(const Fst<A>& fst) {
- // construct Tr
- Reverse(fst, &Tr_);
- ILabelCompare<RevA> ilabel_comp;
- ArcSort(&Tr_, ilabel_comp);
-
- // initial split (F, S - F)
- P_.Initialize(Tr_.NumStates() - 1);
-
- // prep partition
- PrePartition(fst);
-
- // allocate arc iterator queue
- ArcIterCompare comp(P_);
- aiter_queue_ = new ArcIterQueue(comp);
- }
-
- // partition all classes with destination C
- void Split(ClassId C) {
- // Prep priority queue. Open arc iterator for each state in C, and
- // insert into priority queue.
- for (PartitionIterator<StateId> siter(P_, C);
- !siter.Done(); siter.Next()) {
- StateId s = siter.Value();
- if (Tr_.NumArcs(s + 1))
- aiter_queue_->push(new ArcIterator<Fst<RevA> >(Tr_, s + 1));
- }
-
- // Now pop arc iterator from queue, split entering equivalence class
- // re-insert updated iterator into queue.
- Label prev_label = -1;
- while (!aiter_queue_->empty()) {
- ArcIterator<Fst<RevA> >* aiter = aiter_queue_->top();
- aiter_queue_->pop();
- if (aiter->Done()) {
- delete aiter;
- continue;
- }
-
- const RevA& arc = aiter->Value();
- StateId from_state = aiter->Value().nextstate - 1;
- Label from_label = arc.ilabel;
- if (prev_label != from_label)
- P_.FinalizeSplit(&L_);
-
- StateId from_class = P_.class_id(from_state);
- if (P_.class_size(from_class) > 1)
- P_.SplitOn(from_state);
-
- prev_label = from_label;
- aiter->Next();
- if (aiter->Done())
- delete aiter;
- else
- aiter_queue_->push(aiter);
- }
- P_.FinalizeSplit(&L_);
- }
-
- // Main loop for hopcroft minimization.
- void Compute(const Fst<A>& fst) {
- // process active classes (FIFO, or FILO)
- while (!L_.Empty()) {
- ClassId C = L_.Head();
- L_.Dequeue();
-
- // split on C, all labels in C
- Split(C);
- }
- }
-
- // helper data
- private:
- // Partioning of states into equivalence classes
- Partition<StateId> P_;
-
- // L = set of active classes to be processed in partition P
- Queue L_;
-
- // reverse transition function
- VectorFst<RevA> Tr_;
-
- // Priority queue of open arc iterators for all states in the 'splitter'
- // equivalence class
- ArcIterQueue* aiter_queue_;
-};
-
-
-// Computes equivalence classes for acyclic Fsts. The implementation details
-// for this algorithms is documented by the following paper.
-//
-// Minimization of acyclic deterministic automata in linear time
-// Dominque Revuz
-//
-// Complexity O(|E|)
-//
-template <class A>
-class AcyclicMinimizer {
- public:
- typedef typename A::Label Label;
- typedef typename A::StateId StateId;
- typedef typename A::StateId ClassId;
- typedef typename A::Weight Weight;
-
- AcyclicMinimizer(const ExpandedFst<A>& fst):
- // tell the Partition data-member to expect multiple repeated
- // calls to SplitOn with the same element if we are non-deterministic.
- partition_(fst.Properties(kIDeterministic, true) == 0) {
- if(fst.Properties(kIDeterministic, true) == 0)
- CHECK(Weight::Properties() & kIdempotent); // minimization for
- // non-deterministic FSTs can only work with idempotent semirings.
- Initialize(fst);
- Refine(fst);
- }
-
- const Partition<StateId>& partition() {
- return partition_;
- }
-
- // helper classes
- private:
- // DFS visitor to compute the height (distance) to final state.
- class HeightVisitor {
- public:
- HeightVisitor() : max_height_(0), num_states_(0) { }
-
- // invoked before dfs visit
- void InitVisit(const Fst<A>& fst) {}
-
- // invoked when state is discovered (2nd arg is DFS tree root)
- bool InitState(StateId s, StateId root) {
- // extend height array and initialize height (distance) to 0
- for (size_t i = height_.size(); i <= s; ++i)
- height_.push_back(-1);
-
- if (s >= num_states_) num_states_ = s + 1;
- return true;
- }
-
- // invoked when tree arc examined (to undiscoverted state)
- bool TreeArc(StateId s, const A& arc) {
- return true;
- }
-
- // invoked when back arc examined (to unfinished state)
- bool BackArc(StateId s, const A& arc) {
- return true;
- }
-
- // invoked when forward or cross arc examined (to finished state)
- bool ForwardOrCrossArc(StateId s, const A& arc) {
- if (height_[arc.nextstate] + 1 > height_[s])
- height_[s] = height_[arc.nextstate] + 1;
- return true;
- }
-
- // invoked when state finished (parent is kNoStateId for tree root)
- void FinishState(StateId s, StateId parent, const A* parent_arc) {
- if (height_[s] == -1) height_[s] = 0;
- StateId h = height_[s] + 1;
- if (parent >= 0) {
- if (h > height_[parent]) height_[parent] = h;
- if (h > max_height_) max_height_ = h;
- }
- }
-
- // invoked after DFS visit
- void FinishVisit() {}
-
- size_t max_height() const { return max_height_; }
-
- const vector<StateId>& height() const { return height_; }
-
- const size_t num_states() const { return num_states_; }
-
- private:
- vector<StateId> height_;
- size_t max_height_;
- size_t num_states_;
- };
-
- // helper methods
- private:
- // cluster states according to height (distance to final state)
- void Initialize(const Fst<A>& fst) {
- // compute height (distance to final state)
- HeightVisitor hvisitor;
- DfsVisit(fst, &hvisitor);
-
- // create initial partition based on height
- partition_.Initialize(hvisitor.num_states());
- partition_.AllocateClasses(hvisitor.max_height() + 1);
- const vector<StateId>& hstates = hvisitor.height();
- for (size_t s = 0; s < hstates.size(); ++s)
- partition_.Add(s, hstates[s]);
- }
-
- // refine states based on arc sort (out degree, arc equivalence)
- void Refine(const Fst<A>& fst) {
- typedef map<StateId, StateId, StateComparator<A> > EquivalenceMap;
- StateComparator<A> comp(fst, partition_);
-
- // start with tail (height = 0)
- size_t height = partition_.num_classes();
- for (size_t h = 0; h < height; ++h) {
- EquivalenceMap equiv_classes(comp);
-
- // sort states within equivalence class
- PartitionIterator<StateId> siter(partition_, h);
- equiv_classes[siter.Value()] = h;
- for (siter.Next(); !siter.Done(); siter.Next()) {
- const StateId s = siter.Value();
- typename EquivalenceMap::const_iterator it = equiv_classes.find(s);
- if (it == equiv_classes.end())
- equiv_classes[s] = partition_.AddClass();
- else
- equiv_classes[s] = it->second;
- }
-
- // create refined partition
- for (siter.Reset(); !siter.Done();) {
- const StateId s = siter.Value();
- const StateId old_class = partition_.class_id(s);
- const StateId new_class = equiv_classes[s];
-
- // a move operation can invalidate the iterator, so
- // we first update the iterator to the next element
- // before we move the current element out of the list
- siter.Next();
- if (old_class != new_class)
- partition_.Move(s, new_class);
- }
- }
- }
-
- private:
- Partition<StateId> partition_;
-};
-
-
-// Given a partition and a mutable fst, merge states of Fst inplace
-// (i.e. destructively). Merging works by taking the first state in
-// a class of the partition to be the representative state for the class.
-// Each arc is then reconnected to this state. All states in the class
-// are merged by adding there arcs to the representative state.
-template <class A>
-void MergeStates(
- const Partition<typename A::StateId>& partition, MutableFst<A>* fst) {
- typedef typename A::StateId StateId;
-
- vector<StateId> state_map(partition.num_classes());
- for (size_t i = 0; i < partition.num_classes(); ++i) {
- PartitionIterator<StateId> siter(partition, i);
- state_map[i] = siter.Value(); // first state in partition;
- }
-
- // relabel destination states
- for (size_t c = 0; c < partition.num_classes(); ++c) {
- for (PartitionIterator<StateId> siter(partition, c);
- !siter.Done(); siter.Next()) {
- StateId s = siter.Value();
- for (MutableArcIterator<MutableFst<A> > aiter(fst, s);
- !aiter.Done(); aiter.Next()) {
- A arc = aiter.Value();
- arc.nextstate = state_map[partition.class_id(arc.nextstate)];
-
- if (s == state_map[c]) // first state just set destination
- aiter.SetValue(arc);
- else
- fst->AddArc(state_map[c], arc);
- }
- }
- }
- fst->SetStart(state_map[partition.class_id(fst->Start())]);
-
- Connect(fst);
-}
-
-template <class A>
-void AcceptorMinimize(MutableFst<A>* fst) {
- typedef typename A::StateId StateId;
- if (!(fst->Properties(kAcceptor | kUnweighted, true))) {
- FSTERROR() << "FST is not an unweighted acceptor";
- fst->SetProperties(kError, kError);
- return;
- }
-
- // connect fst before minimization, handles disconnected states
- Connect(fst);
- if (fst->NumStates() == 0) return;
-
- if (fst->Properties(kAcyclic, true)) {
- // Acyclic minimization (revuz)
- VLOG(2) << "Acyclic Minimization";
- ArcSort(fst, ILabelCompare<A>());
- AcyclicMinimizer<A> minimizer(*fst);
- MergeStates(minimizer.partition(), fst);
-
- } else {
- // Cyclic minimizaton (hopcroft)
- VLOG(2) << "Cyclic Minimization";
- CyclicMinimizer<A, LifoQueue<StateId> > minimizer(*fst);
- MergeStates(minimizer.partition(), fst);
- }
-
- // Merge in appropriate semiring
- ArcUniqueMapper<A> mapper(*fst);
- StateMap(fst, mapper);
-}
-
-
-// In place minimization of deterministic weighted automata and transducers.
-// For transducers, then the 'sfst' argument is not null, the algorithm
-// produces a compact factorization of the minimal transducer.
-//
-// In the acyclic case, we use an algorithm from Dominique Revuz that
-// is linear in the number of arcs (edges) in the machine.
-// Complexity = O(E)
-//
-// In the cyclic case, we use the classical hopcroft minimization.
-// Complexity = O(|E|log(|N|)
-//
-template <class A>
-void Minimize(MutableFst<A>* fst,
- MutableFst<A>* sfst = 0,
- float delta = kDelta) {
- uint64 props = fst->Properties(kAcceptor | kWeighted | kUnweighted, true);
-
- if (!(props & kAcceptor)) { // weighted transducer
- VectorFst< GallicArc<A, STRING_LEFT> > gfst;
- ArcMap(*fst, &gfst, ToGallicMapper<A, STRING_LEFT>());
- fst->DeleteStates();
- gfst.SetProperties(kAcceptor, kAcceptor);
- Push(&gfst, REWEIGHT_TO_INITIAL, delta);
- ArcMap(&gfst, QuantizeMapper< GallicArc<A, STRING_LEFT> >(delta));
- EncodeMapper< GallicArc<A, STRING_LEFT> >
- encoder(kEncodeLabels | kEncodeWeights, ENCODE);
- Encode(&gfst, &encoder);
- AcceptorMinimize(&gfst);
- Decode(&gfst, encoder);
-
- if (sfst == 0) {
- FactorWeightFst< GallicArc<A, STRING_LEFT>,
- GallicFactor<typename A::Label,
- typename A::Weight, STRING_LEFT> > fwfst(gfst);
- SymbolTable *osyms = fst->OutputSymbols() ?
- fst->OutputSymbols()->Copy() : 0;
- ArcMap(fwfst, fst, FromGallicMapper<A, STRING_LEFT>());
- fst->SetOutputSymbols(osyms);
- delete osyms;
- } else {
- sfst->SetOutputSymbols(fst->OutputSymbols());
- GallicToNewSymbolsMapper<A, STRING_LEFT> mapper(sfst);
- ArcMap(gfst, fst, &mapper);
- fst->SetOutputSymbols(sfst->InputSymbols());
- }
- } else if (props & kWeighted) { // weighted acceptor
- Push(fst, REWEIGHT_TO_INITIAL, delta);
- ArcMap(fst, QuantizeMapper<A>(delta));
- EncodeMapper<A> encoder(kEncodeLabels | kEncodeWeights, ENCODE);
- Encode(fst, &encoder);
- AcceptorMinimize(fst);
- Decode(fst, encoder);
- } else { // unweighted acceptor
- AcceptorMinimize(fst);
- }
-}
-
-} // namespace fst
-
-#endif // FST_LIB_MINIMIZE_H__