// minimize.h
// minimize.h
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright 2005-2010 Google, Inc.
// Author: johans@google.com (Johan Schalkwyk)
//
// \file Functions and classes to minimize a finite state acceptor
//
#ifndef FST_LIB_MINIMIZE_H__
#define FST_LIB_MINIMIZE_H__
#include <cmath>
#include <algorithm>
#include <map>
#include <queue>
#include <vector>
using std::vector;
#include <fst/arcsort.h>
#include <fst/connect.h>
#include <fst/dfs-visit.h>
#include <fst/encode.h>
#include <fst/factor-weight.h>
#include <fst/fst.h>
#include <fst/mutable-fst.h>
#include <fst/partition.h>
#include <fst/push.h>
#include <fst/queue.h>
#include <fst/reverse.h>
#include <fst/state-map.h>
namespace fst {
// comparator for creating partition based on sorting on
// - states
// - final weight
// - out degree,
// - (input label, output label, weight, destination_block)
template <class A>
class StateComparator {
public:
typedef typename A::StateId StateId;
typedef typename A::Weight Weight;
static const uint32 kCompareFinal = 0x00000001;
static const uint32 kCompareOutDegree = 0x00000002;
static const uint32 kCompareArcs = 0x00000004;
static const uint32 kCompareAll = 0x00000007;
StateComparator(const Fst<A>& fst,
const Partition<typename A::StateId>& partition,
uint32 flags = kCompareAll)
: fst_(fst), partition_(partition), flags_(flags) {}
// compare state x with state y based on sort criteria
bool operator()(const StateId x, const StateId y) const {
// check for final state equivalence
if (flags_ & kCompareFinal) {
const size_t xfinal = fst_.Final(x).Hash();
const size_t yfinal = fst_.Final(y).Hash();
if (xfinal < yfinal) return true;
else if (xfinal > yfinal) return false;
}
if (flags_ & kCompareOutDegree) {
// check for # arcs
if (fst_.NumArcs(x) < fst_.NumArcs(y)) return true;
if (fst_.NumArcs(x) > fst_.NumArcs(y)) return false;
if (flags_ & kCompareArcs) {
// # arcs are equal, check for arc match
for (ArcIterator<Fst<A> > aiter1(fst_, x), aiter2(fst_, y);
!aiter1.Done() && !aiter2.Done(); aiter1.Next(), aiter2.Next()) {
const A& arc1 = aiter1.Value();
const A& arc2 = aiter2.Value();
if (arc1.ilabel < arc2.ilabel) return true;
if (arc1.ilabel > arc2.ilabel) return false;
if (partition_.class_id(arc1.nextstate) <
partition_.class_id(arc2.nextstate)) return true;
if (partition_.class_id(arc1.nextstate) >
partition_.class_id(arc2.nextstate)) return false;
}
}
}
return false;
}
private:
const Fst<A>& fst_;
const Partition<typename A::StateId>& partition_;
const uint32 flags_;
};
template <class A> const uint32 StateComparator<A>::kCompareFinal;
template <class A> const uint32 StateComparator<A>::kCompareOutDegree;
template <class A> const uint32 StateComparator<A>::kCompareArcs;
template <class A> const uint32 StateComparator<A>::kCompareAll;
// Computes equivalence classes for cyclic Fsts. For cyclic minimization
// we use the classic HopCroft minimization algorithm, which is of
//
// O(E)log(N),
//
// where E is the number of edges in the machine and N is number of states.
//
// The following paper describes the original algorithm
// An N Log N algorithm for minimizing states in a finite automaton
// by John HopCroft, January 1971
//
template <class A, class Queue>
class CyclicMinimizer {
public:
typedef typename A::Label Label;
typedef typename A::StateId StateId;
typedef typename A::StateId ClassId;
typedef typename A::Weight Weight;
typedef ReverseArc<A> RevA;
CyclicMinimizer(const ExpandedFst<A>& fst):
// tell the Partition data-member to expect multiple repeated
// calls to SplitOn with the same element if we are non-deterministic.
P_(fst.Properties(kIDeterministic, true) == 0) {
if(fst.Properties(kIDeterministic, true) == 0)
CHECK(Weight::Properties() & kIdempotent); // this minimization
// algorithm for non-deterministic FSTs can only work with idempotent
// semirings.
Initialize(fst);
Compute(fst);
}
~CyclicMinimizer() {
delete aiter_queue_;
}
const Partition<StateId>& partition() const {
return P_;
}
// helper classes
private:
typedef ArcIterator<Fst<RevA> > ArcIter;
class ArcIterCompare {
public:
ArcIterCompare(const Partition<StateId>& partition)
: partition_(partition) {}
ArcIterCompare(const ArcIterCompare& comp)
: partition_(comp.partition_) {}
// compare two iterators based on there input labels, and proto state
// (partition class Ids)
bool operator()(const ArcIter* x, const ArcIter* y) const {
const RevA& xarc = x->Value();
const RevA& yarc = y->Value();
return (xarc.ilabel > yarc.ilabel);
}
private:
const Partition<StateId>& partition_;
};
typedef priority_queue<ArcIter*, vector<ArcIter*>, ArcIterCompare>
ArcIterQueue;
// helper methods
private:
// prepartitions the space into equivalence classes with
// same final weight
// same # arcs per state
// same outgoing arcs
void PrePartition(const Fst<A>& fst) {
VLOG(5) << "PrePartition";
typedef map<StateId, StateId, StateComparator<A> > EquivalenceMap;
StateComparator<A> comp(fst, P_, StateComparator<A>::kCompareFinal);
EquivalenceMap equiv_map(comp);
StateIterator<Fst<A> > siter(fst);
StateId class_id = P_.AddClass();
P_.Add(siter.Value(), class_id);
equiv_map[siter.Value()] = class_id;
L_.Enqueue(class_id);
for (siter.Next(); !siter.Done(); siter.Next()) {
StateId s = siter.Value();
typename EquivalenceMap::const_iterator it = equiv_map.find(s);
if (it == equiv_map.end()) {
class_id = P_.AddClass();
P_.Add(s, class_id);
equiv_