// determinize.h
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright 2005-2010 Google, Inc.
// Author: riley@google.com (Michael Riley)
//
// \file
// Functions and classes to determinize an FST.
#ifndef FST_LIB_DETERMINIZE_H__
#define FST_LIB_DETERMINIZE_H__
#include <algorithm>
#include <climits>
#include <tr1/unordered_map>
using std::tr1::unordered_map;
using std::tr1::unordered_multimap;
#include <map>
#include <fst/slist.h>
#include <string>
#include <vector>
using std::vector;
#include <fst/arc-map.h>
#include <fst/cache.h>
#include <fst/bi-table.h>
#include <fst/factor-weight.h>
#include <fst/prune.h>
#include <fst/test-properties.h>
namespace fst {
//
// COMMON DIVISORS - these are used in determinization to compute
// the transition weights. In the simplest case, it is just the same
// as the semiring Plus(). However, other choices permit more efficient
// determinization when the output contains strings.
//
// The default common divisor uses the semiring Plus.
template <class W>
class DefaultCommonDivisor {
public:
typedef W Weight;
W operator()(const W &w1, const W &w2) const { return Plus(w1, w2); }
};
// The label common divisor for a (left) string semiring selects a
// single letter common prefix or the empty string. This is used in
// the determinization of output strings so that at most a single
// letter will appear in the output of a transtion.
template <typename L, StringType S>
class LabelCommonDivisor {
public:
typedef StringWeight<L, S> Weight;
Weight operator()(const Weight &w1, const Weight &w2) const {
StringWeightIterator<L, S> iter1(w1);
StringWeightIterator<L, S> iter2(w2);
if (!(StringWeight<L, S>::Properties() & kLeftSemiring)) {
FSTERROR() << "LabelCommonDivisor: Weight needs to be left semiring";
return Weight::NoWeight();
} else if (w1.Size() == 0 || w2.Size() == 0) {
return Weight::One();
} else if (w1 == Weight::Zero()) {
return Weight(iter2.Value());
} else if (w2 == Weight::Zero()) {
return Weight(iter1.Value());
} else if (iter1.Value() == iter2.Value()) {
return Weight(iter1.Value());
} else {
return Weight::One();
}
}
};
// The gallic common divisor uses the label common divisor on the
// string component and the template argument D common divisor on the
// weight component, which defaults to the default common divisor.
template <class L, class W, StringType S, class D = DefaultCommonDivisor<W> >
class GallicCommonDivisor {
public:
typedef GallicWeight<L, W, S> Weight;
Weight operator()(const Weight &w1, const Weight &w2) const {
return Weight(label_common_divisor_(w1.Value1(), w2.Value1()),
weight_common_divisor_(w1.Value2(), w2.Value2()));
}
private:
LabelCommonDivisor<L, S> label_common_divisor_;
D weight_common_divisor_;
};
// Represents an element in a subset
template <class A>
struct DeterminizeElement {
typedef typename A::StateId StateId;
typedef typename A::Weight Weight;
DeterminizeElement() {}
DeterminizeElement(StateId s, Weight w) : state_id(s), weight(w) {}
bool operator==(const DeterminizeElement<A> & element) const {
return state_id == element.state_id && weight == element.weight;
}
bool operator<(const DeterminizeElement<A> & element) const {
return state_id < element.state_id ||
(state_id == element.state_id && weight == element.weight);
}
StateId state_id; // Input state Id
Weight weight; // Residual weight
};
//
// DETERMINIZE FILTERS - these can be used in determinization to compute
// transformations on the subsets prior to their being added as destination
// states. The filter operates on a map between a label and the
// corresponding destination subsets. The possibly modified map is
// then used to construct the destination states for arcs exiting state 's'.
// It must define the ordered map type LabelMap and have a default
// and copy constructor.
// A determinize filter that does not modify its input.
template <class Arc>
struct IdentityDeterminizeFilter {
typedef typename Arc::StateId StateId;
typedef typename Arc::Label Label;
typedef slist< DeterminizeElement<Arc> > Subset;
typedef map<Label, Subset*> LabelMap;
static uint64 Properties(uint64 props) { return props; }
void operator()(StateId s, LabelMap *label_map) {}
};
//
// DETERMINIZATION STATE TABLES
//
// The determiziation state table has the form:
//