summaryrefslogtreecommitdiff
path: root/kaldi_io/src/tools/openfst/include/fst/label-reachable.h
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/label-reachable.h')
-rw-r--r--kaldi_io/src/tools/openfst/include/fst/label-reachable.h565
1 files changed, 565 insertions, 0 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/label-reachable.h b/kaldi_io/src/tools/openfst/include/fst/label-reachable.h
new file mode 100644
index 0000000..af06eef
--- /dev/null
+++ b/kaldi_io/src/tools/openfst/include/fst/label-reachable.h
@@ -0,0 +1,565 @@
+// label_reachable.h
+
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+// Copyright 2005-2010 Google, Inc.
+// Author: riley@google.com (Michael Riley)
+//
+// \file
+// Class to determine if a non-epsilon label can be read as the
+// first non-epsilon symbol along some path from a given state.
+
+
+#ifndef FST_LIB_LABEL_REACHABLE_H__
+#define FST_LIB_LABEL_REACHABLE_H__
+
+#include <tr1/unordered_map>
+using std::tr1::unordered_map;
+using std::tr1::unordered_multimap;
+#include <vector>
+using std::vector;
+
+#include <fst/accumulator.h>
+#include <fst/arcsort.h>
+#include <fst/interval-set.h>
+#include <fst/state-reachable.h>
+#include <fst/vector-fst.h>
+
+
+namespace fst {
+
+// Stores shareable data for label reachable class copies.
+template <typename L>
+class LabelReachableData {
+ public:
+ typedef L Label;
+ typedef typename IntervalSet<L>::Interval Interval;
+
+ explicit LabelReachableData(bool reach_input, bool keep_relabel_data = true)
+ : reach_input_(reach_input),
+ keep_relabel_data_(keep_relabel_data),
+ have_relabel_data_(true),
+ final_label_(kNoLabel) {}
+
+ ~LabelReachableData() {}
+
+ bool ReachInput() const { return reach_input_; }
+
+ vector< IntervalSet<L> > *IntervalSets() { return &isets_; }
+
+ unordered_map<L, L> *Label2Index() {
+ if (!have_relabel_data_)
+ FSTERROR() << "LabelReachableData: no relabeling data";
+ return &label2index_;
+ }
+
+ Label FinalLabel() {
+ if (final_label_ == kNoLabel)
+ final_label_ = label2index_[kNoLabel];
+ return final_label_;
+ }
+
+ static LabelReachableData<L> *Read(istream &istrm) {
+ LabelReachableData<L> *data = new LabelReachableData<L>();
+
+ ReadType(istrm, &data->reach_input_);
+ ReadType(istrm, &data->keep_relabel_data_);
+ data->have_relabel_data_ = data->keep_relabel_data_;
+ if (data->keep_relabel_data_)
+ ReadType(istrm, &data->label2index_);
+ ReadType(istrm, &data->final_label_);
+ ReadType(istrm, &data->isets_);
+ return data;
+ }
+
+ bool Write(ostream &ostrm) {
+ WriteType(ostrm, reach_input_);
+ WriteType(ostrm, keep_relabel_data_);
+ if (keep_relabel_data_)
+ WriteType(ostrm, label2index_);
+ WriteType(ostrm, FinalLabel());
+ WriteType(ostrm, isets_);
+ return true;
+ }
+
+ int RefCount() const { return ref_count_.count(); }
+ int IncrRefCount() { return ref_count_.Incr(); }
+ int DecrRefCount() { return ref_count_.Decr(); }
+
+ private:
+ LabelReachableData() {}
+
+ bool reach_input_; // Input or output labels considered?
+ bool keep_relabel_data_; // Save label2index_ to file?
+ bool have_relabel_data_; // Using label2index_?
+ Label final_label_; // Final label
+ RefCounter ref_count_; // Reference count.
+ unordered_map<L, L> label2index_; // Finds index for a label.
+ vector<IntervalSet <L> > isets_; // Interval sets per state.
+
+ DISALLOW_COPY_AND_ASSIGN(LabelReachableData);
+};
+
+
+// Tests reachability of labels from a given state. If reach_input =
+// true, then input labels are considered, o.w. output labels are
+// considered. To test for reachability from a state s, first do
+// SetState(s). Then a label l can be reached from state s of FST f
+// iff Reach(r) is true where r = Relabel(l). The relabeling is
+// required to ensure a compact representation of the reachable
+// labels.
+
+// The whole FST can be relabeled instead with Relabel(&f,
+// reach_input) so that the test Reach(r) applies directly to the
+// labels of the transformed FST f. The relabeled FST will also be
+// sorted appropriately for composition.
+//
+// Reachablity of a final state from state s (via an epsilon path)
+// can be tested with ReachFinal();
+//
+// Reachability can also be tested on the set of labels specified by
+// an arc iterator, useful for FST composition. In particular,
+// Reach(aiter, ...) is true if labels on the input (output) side of
+// the transitions of the arc iterator, when iter_input is true
+// (false), can be reached from the state s. The iterator labels must
+// have already been relabeled.
+//
+// With the arc iterator test of reachability, the begin position, end
+// position and accumulated arc weight of the matches can be
+// returned. The optional template argument controls how reachable arc
+// weights are accumulated. The default uses the semiring
+// Plus(). Alternative ones can be used to distribute the weights in
+// composition in various ways.
+template <class A, class S = DefaultAccumulator<A> >
+class LabelReachable {
+ public:
+ typedef A Arc;
+ typedef typename A::StateId StateId;
+ typedef typename A::Label Label;
+ typedef typename A::Weight Weight;
+ typedef typename IntervalSet<Label>::Interval Interval;
+
+ LabelReachable(const Fst<A> &fst, bool reach_input, S *s = 0,
+ bool keep_relabel_data = true)
+ : fst_(new VectorFst<Arc>(fst)),
+ s_(kNoStateId),
+ data_(new LabelReachableData<Label>(reach_input, keep_relabel_data)),
+ accumulator_(s ? s : new S()),
+ ncalls_(0),
+ nintervals_(0),
+ error_(false) {
+ StateId ins = fst_->NumStates();
+ TransformFst();
+ FindIntervals(ins);
+ delete fst_;
+ }
+
+ explicit LabelReachable(LabelReachableData<Label> *data, S *s = 0)
+ : fst_(0),
+ s_(kNoStateId),
+ data_(data),
+ accumulator_(s ? s : new S()),
+ ncalls_(0),
+ nintervals_(0),
+ error_(false) {
+ data_->IncrRefCount();
+ }
+
+ LabelReachable(const LabelReachable<A, S> &reachable) :
+ fst_(0),
+ s_(kNoStateId),
+ data_(reachable.data_),
+ accumulator_(new S(*reachable.accumulator_)),
+ ncalls_(0),
+ nintervals_(0),
+ error_(reachable.error_) {
+ data_->IncrRefCount();
+ }
+
+ ~LabelReachable() {
+ if (!data_->DecrRefCount())
+ delete data_;
+ delete accumulator_;
+ if (ncalls_ > 0) {
+ VLOG(2) << "# of calls: " << ncalls_;
+ VLOG(2) << "# of intervals/call: " << (nintervals_ / ncalls_);
+ }
+ }
+
+ // Relabels w.r.t labels that give compact label sets.
+ Label Relabel(Label label) {
+ if (label == 0 || error_)
+ return label;
+ unordered_map<Label, Label> &label2index = *data_->Label2Index();
+ Label &relabel = label2index[label];
+ if (!relabel) // Add new label
+ relabel = label2index.size() + 1;
+ return relabel;
+ }
+
+ // Relabels Fst w.r.t to labels that give compact label sets.
+ void Relabel(MutableFst<Arc> *fst, bool relabel_input) {
+ for (StateIterator< MutableFst<Arc> > siter(*fst);
+ !siter.Done(); siter.Next()) {
+ StateId s = siter.Value();
+ for (MutableArcIterator< MutableFst<Arc> > aiter(fst, s);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ if (relabel_input)
+ arc.ilabel = Relabel(arc.ilabel);
+ else
+ arc.olabel = Relabel(arc.olabel);
+ aiter.SetValue(arc);
+ }
+ }
+ if (relabel_input) {
+ ArcSort(fst, ILabelCompare<Arc>());
+ fst->SetInputSymbols(0);
+ } else {
+ ArcSort(fst, OLabelCompare<Arc>());
+ fst->SetOutputSymbols(0);
+ }
+ }
+
+ // Returns relabeling pairs (cf. relabel.h::Relabel()).
+ // If 'avoid_collisions' is true, extra pairs are added to
+ // ensure no collisions when relabeling automata that have
+ // labels unseen here.
+ void RelabelPairs(vector<pair<Label, Label> > *pairs,
+ bool avoid_collisions = false) {
+ pairs->clear();
+ unordered_map<Label, Label> &label2index = *data_->Label2Index();
+ // Maps labels to their new values in [1, label2index().size()]
+ for (typename unordered_map<Label, Label>::const_iterator
+ it = label2index.begin(); it != label2index.end(); ++it)
+ if (it->second != data_->FinalLabel())
+ pairs->push_back(pair<Label, Label>(it->first, it->second));
+ if (avoid_collisions) {
+ // Ensures any label in [1, label2index().size()] is mapped either
+ // by the above step or to label2index() + 1 (to avoid collisions).
+ for (int i = 1; i <= label2index.size(); ++i) {
+ typename unordered_map<Label, Label>::const_iterator
+ it = label2index.find(i);
+ if (it == label2index.end() || it->second == data_->FinalLabel())
+ pairs->push_back(pair<Label, Label>(i, label2index.size() + 1));
+ }
+ }
+ }
+
+ // Set current state. Optionally set state associated
+ // with arc iterator to be passed to Reach.
+ void SetState(StateId s, StateId aiter_s = kNoStateId) {
+ s_ = s;
+ if (aiter_s != kNoStateId) {
+ accumulator_->SetState(aiter_s);
+ if (accumulator_->Error()) error_ = true;
+ }
+ }
+
+ // Can reach this label from current state?
+ // Original labels must be transformed by the Relabel methods above.
+ bool Reach(Label label) {
+ if (label == 0 || error_)
+ return false;
+ vector< IntervalSet<Label> > &isets = *data_->IntervalSets();
+ return isets[s_].Member(label);
+
+ }
+
+ // Can reach final state (via epsilon transitions) from this state?
+ bool ReachFinal() {
+ if (error_) return false;
+ vector< IntervalSet<Label> > &isets = *data_->IntervalSets();
+ return isets[s_].Member(data_->FinalLabel());
+ }
+
+ // Initialize with secondary FST to be used with Reach(Iterator,...).
+ // If copy is true, then 'fst' is a copy of the FST used in the
+ // previous call to this method (useful to avoid unnecessary updates).
+ template <class F>
+ void ReachInit(const F &fst, bool copy = false) {
+ accumulator_->Init(fst, copy);
+ if (accumulator_->Error()) error_ = true;
+ }
+
+ // Can reach any arc iterator label between iterator positions
+ // aiter_begin and aiter_end? If aiter_input = true, then iterator
+ // input labels are considered, o.w. output labels are considered.
+ // Arc iterator labels must be transformed by the Relabel methods
+ // above. If compute_weight is true, user may call ReachWeight().
+ template <class Iterator>
+ bool Reach(Iterator *aiter, ssize_t aiter_begin,
+ ssize_t aiter_end, bool aiter_input, bool compute_weight) {
+ if (error_) return false;
+ vector< IntervalSet<Label> > &isets = *data_->IntervalSets();
+ const vector<Interval> *intervals = isets[s_].Intervals();
+ ++ncalls_;
+ nintervals_ += intervals->size();
+
+ reach_begin_ = -1;
+ reach_end_ = -1;
+ reach_weight_ = Weight::Zero();
+
+ uint32 flags = aiter->Flags(); // save flags to restore them on exit
+ aiter->SetFlags(kArcNoCache, kArcNoCache); // make caching optional
+ aiter->Seek(aiter_begin);
+
+ if (2 * (aiter_end - aiter_begin) < intervals->size()) {
+ // Check each arc against intervals.
+ // Set arc iterator flags to only compute the ilabel or olabel values,
+ // since they are the only values required for most of the arcs processed.
+ aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue,
+ kArcValueFlags);
+ Label reach_label = kNoLabel;
+ for (ssize_t aiter_pos = aiter_begin;
+ aiter_pos < aiter_end; aiter->Next(), ++aiter_pos) {
+ const A &arc = aiter->Value();
+ Label label = aiter_input ? arc.ilabel : arc.olabel;
+ if (label == reach_label || Reach(label)) {
+ reach_label = label;
+ if (reach_begin_ < 0)
+ reach_begin_ = aiter_pos;
+ reach_end_ = aiter_pos + 1;
+ if (compute_weight) {
+ if (!(aiter->Flags() & kArcWeightValue)) {
+ // If the 'arc.weight' wasn't computed by the call
+ // to 'aiter->Value()' above, we need to call
+ // 'aiter->Value()' again after having set the arc iterator
+ // flags to compute the arc weight value.
+ aiter->SetFlags(kArcWeightValue, kArcValueFlags);
+ const A &arcb = aiter->Value();
+ // Call the accumulator.
+ reach_weight_ = accumulator_->Sum(reach_weight_, arcb.weight);
+ // Only ilabel or olabel required to process the following
+ // arcs.
+ aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue,
+ kArcValueFlags);
+ } else {
+ // Call the accumulator.
+ reach_weight_ = accumulator_->Sum(reach_weight_, arc.weight);
+ }
+ }
+ }
+ }
+ } else {
+ // Check each interval against arcs
+ ssize_t begin_low, end_low = aiter_begin;
+ for (typename vector<Interval>::const_iterator
+ iiter = intervals->begin();
+ iiter != intervals->end(); ++iiter) {
+ begin_low = LowerBound(aiter, end_low, aiter_end,
+ aiter_input, iiter->begin);
+ end_low = LowerBound(aiter, begin_low, aiter_end,
+ aiter_input, iiter->end);
+ if (end_low - begin_low > 0) {
+ if (reach_begin_ < 0)
+ reach_begin_ = begin_low;
+ reach_end_ = end_low;
+ if (compute_weight) {
+ aiter->SetFlags(kArcWeightValue, kArcValueFlags);
+ reach_weight_ = accumulator_->Sum(reach_weight_, aiter,
+ begin_low, end_low);
+ }
+ }
+ }
+ }
+
+ aiter->SetFlags(flags, kArcFlags); // restore original flag values
+ return reach_begin_ >= 0;
+ }
+
+ // Returns iterator position of first matching arc.
+ ssize_t ReachBegin() const { return reach_begin_; }
+
+ // Returns iterator position one past last matching arc.
+ ssize_t ReachEnd() const { return reach_end_; }
+
+ // Return the sum of the weights for matching arcs.
+ // Valid only if compute_weight was true in Reach() call.
+ Weight ReachWeight() const { return reach_weight_; }
+
+ // Access to the relabeling map. Excludes epsilon (0) label but
+ // includes kNoLabel that is used internally for super-final
+ // transitons.
+ const unordered_map<Label, Label>& Label2Index() const {
+ return *data_->Label2Index();
+ }
+
+ LabelReachableData<Label> *GetData() const { return data_; }
+
+ bool Error() const { return error_ || accumulator_->Error(); }
+
+ private:
+ // Redirects labeled arcs (input or output labels determined by
+ // ReachInput()) to new label-specific final states. Each original
+ // final state is redirected via a transition labeled with kNoLabel
+ // to a new kNoLabel-specific final state. Creates super-initial
+ // state for all states with zero in-degree.
+ void TransformFst() {
+ StateId ins = fst_->NumStates();
+ StateId ons = ins;
+
+ vector<ssize_t> indeg(ins, 0);
+
+ // Redirects labeled arcs to new final states.
+ for (StateId s = 0; s < ins; ++s) {
+ for (MutableArcIterator< VectorFst<Arc> > aiter(fst_, s);
+ !aiter.Done();
+ aiter.Next()) {
+ Arc arc = aiter.Value();
+ Label label = data_->ReachInput() ? arc.ilabel : arc.olabel;
+ if (label) {
+ if (label2state_.find(label) == label2state_.end()) {
+ label2state_[label] = ons;
+ indeg.push_back(0);
+ ++ons;
+ }
+ arc.nextstate = label2state_[label];
+ aiter.SetValue(arc);
+ }
+ ++indeg[arc.nextstate]; // Finds in-degrees for next step.
+ }
+
+ // Redirects final weights to new final state.
+ Weight final = fst_->Final(s);
+ if (final != Weight::Zero()) {
+ if (label2state_.find(kNoLabel) == label2state_.end()) {
+ label2state_[kNoLabel] = ons;
+ indeg.push_back(0);
+ ++ons;
+ }
+ Arc arc(kNoLabel, kNoLabel, final, label2state_[kNoLabel]);
+ fst_->AddArc(s, arc);
+ ++indeg[arc.nextstate]; // Finds in-degrees for next step.
+
+ fst_->SetFinal(s, Weight::Zero());
+ }
+ }
+
+ // Add new final states to Fst.
+ while (fst_->NumStates() < ons) {
+ StateId s = fst_->AddState();
+ fst_->SetFinal(s, Weight::One());
+ }
+
+ // Creates a super-initial state for all states with zero in-degree.
+ StateId start = fst_->AddState();
+ fst_->SetStart(start);
+ for (StateId s = 0; s < start; ++s) {
+ if (indeg[s] == 0) {
+ Arc arc(0, 0, Weight::One(), s);
+ fst_->AddArc(start, arc);
+ }
+ }
+ }
+
+ void FindIntervals(StateId ins) {
+ StateReachable<A, Label> state_reachable(*fst_);
+ if (state_reachable.Error()) {
+ error_ = true;
+ return;
+ }
+
+ vector<Label> &state2index = state_reachable.State2Index();
+ vector< IntervalSet<Label> > &isets = *data_->IntervalSets();
+ isets = state_reachable.IntervalSets();
+ isets.resize(ins);
+
+ unordered_map<Label, Label> &label2index = *data_->Label2Index();
+ for (typename unordered_map<Label, StateId>::const_iterator
+ it = label2state_.begin();
+ it != label2state_.end();
+ ++it) {
+ Label l = it->first;
+ StateId s = it->second;
+ Label i = state2index[s];
+ label2index[l] = i;
+ }
+ label2state_.clear();
+
+ double nintervals = 0;
+ ssize_t non_intervals = 0;
+ for (ssize_t s = 0; s < ins; ++s) {
+ nintervals += isets[s].Size();
+ if (isets[s].Size() > 1) {
+ ++non_intervals;
+ VLOG(3) << "state: " << s << " # of intervals: " << isets[s].Size();
+ }
+ }
+ VLOG(2) << "# of states: " << ins;
+ VLOG(2) << "# of intervals: " << nintervals;
+ VLOG(2) << "# of intervals/state: " << nintervals/ins;
+ VLOG(2) << "# of non-interval states: " << non_intervals;
+ }
+
+ template <class Iterator>
+ ssize_t LowerBound(Iterator *aiter, ssize_t aiter_begin,
+ ssize_t aiter_end, bool aiter_input,
+ Label match_label) const {
+ // Only need to compute the ilabel or olabel of arcs when
+ // performing the binary search.
+ aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue,
+ kArcValueFlags);
+ ssize_t low = aiter_begin;
+ ssize_t high = aiter_end;
+ while (low < high) {
+ ssize_t mid = (low + high) / 2;
+ aiter->Seek(mid);
+ Label label = aiter_input ?
+ aiter->Value().ilabel : aiter->Value().olabel;
+ if (label > match_label) {
+ high = mid;
+ } else if (label < match_label) {
+ low = mid + 1;
+ } else {
+ // Find first matching label (when non-deterministic)
+ for (ssize_t i = mid; i > low; --i) {
+ aiter->Seek(i - 1);
+ label = aiter_input ? aiter->Value().ilabel : aiter->Value().olabel;
+ if (label != match_label) {
+ aiter->Seek(i);
+ aiter->SetFlags(kArcValueFlags, kArcValueFlags);
+ return i;
+ }
+ }
+ aiter->SetFlags(kArcValueFlags, kArcValueFlags);
+ return low;
+ }
+ }
+ aiter->Seek(low);
+ aiter->SetFlags(kArcValueFlags, kArcValueFlags);
+ return low;
+ }
+
+ VectorFst<Arc> *fst_;
+ StateId s_; // Current state
+ unordered_map<Label, StateId> label2state_; // Finds final state for a label
+
+ ssize_t reach_begin_; // Iterator pos of first match
+ ssize_t reach_end_; // Iterator pos after last match
+ Weight reach_weight_; // Gives weight sum of arc iterator
+ // arcs with reachable labels.
+ LabelReachableData<Label> *data_; // Shareable data between copies
+ S *accumulator_; // Sums arc weights
+
+ double ncalls_;
+ double nintervals_;
+ bool error_;
+
+ void operator=(const LabelReachable<A, S> &); // Disallow
+};
+
+} // namespace fst
+
+#endif // FST_LIB_LABEL_REACHABLE_H__