diff options
author | Ted Yin <[email protected]> | 2015-08-14 17:42:26 +0800 |
---|---|---|
committer | Ted Yin <[email protected]> | 2015-08-14 17:42:26 +0800 |
commit | c3cffb58b9921d78753336421b52b9ffdaa5515c (patch) | |
tree | bfea20e97c200cf734021e3756d749c892e658a4 /kaldi_io/src/tools/openfst/include/fst/label-reachable.h | |
parent | 10cce5f6a5c9e2f8e00d5a2a4d87c9cb7c26bf4c (diff) | |
parent | dfdd17afc2e984ec6c32ea01290f5c76309a456a (diff) |
Merge pull request #2 from yimmon/master
remove needless files
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/label-reachable.h')
-rw-r--r-- | kaldi_io/src/tools/openfst/include/fst/label-reachable.h | 565 |
1 files changed, 0 insertions, 565 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/label-reachable.h b/kaldi_io/src/tools/openfst/include/fst/label-reachable.h deleted file mode 100644 index af06eef..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/label-reachable.h +++ /dev/null @@ -1,565 +0,0 @@ -// label_reachable.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Class to determine if a non-epsilon label can be read as the -// first non-epsilon symbol along some path from a given state. - - -#ifndef FST_LIB_LABEL_REACHABLE_H__ -#define FST_LIB_LABEL_REACHABLE_H__ - -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <vector> -using std::vector; - -#include <fst/accumulator.h> -#include <fst/arcsort.h> -#include <fst/interval-set.h> -#include <fst/state-reachable.h> -#include <fst/vector-fst.h> - - -namespace fst { - -// Stores shareable data for label reachable class copies. -template <typename L> -class LabelReachableData { - public: - typedef L Label; - typedef typename IntervalSet<L>::Interval Interval; - - explicit LabelReachableData(bool reach_input, bool keep_relabel_data = true) - : reach_input_(reach_input), - keep_relabel_data_(keep_relabel_data), - have_relabel_data_(true), - final_label_(kNoLabel) {} - - ~LabelReachableData() {} - - bool ReachInput() const { return reach_input_; } - - vector< IntervalSet<L> > *IntervalSets() { return &isets_; } - - unordered_map<L, L> *Label2Index() { - if (!have_relabel_data_) - FSTERROR() << "LabelReachableData: no relabeling data"; - return &label2index_; - } - - Label FinalLabel() { - if (final_label_ == kNoLabel) - final_label_ = label2index_[kNoLabel]; - return final_label_; - } - - static LabelReachableData<L> *Read(istream &istrm) { - LabelReachableData<L> *data = new LabelReachableData<L>(); - - ReadType(istrm, &data->reach_input_); - ReadType(istrm, &data->keep_relabel_data_); - data->have_relabel_data_ = data->keep_relabel_data_; - if (data->keep_relabel_data_) - ReadType(istrm, &data->label2index_); - ReadType(istrm, &data->final_label_); - ReadType(istrm, &data->isets_); - return data; - } - - bool Write(ostream &ostrm) { - WriteType(ostrm, reach_input_); - WriteType(ostrm, keep_relabel_data_); - if (keep_relabel_data_) - WriteType(ostrm, label2index_); - WriteType(ostrm, FinalLabel()); - WriteType(ostrm, isets_); - return true; - } - - int RefCount() const { return ref_count_.count(); } - int IncrRefCount() { return ref_count_.Incr(); } - int DecrRefCount() { return ref_count_.Decr(); } - - private: - LabelReachableData() {} - - bool reach_input_; // Input or output labels considered? - bool keep_relabel_data_; // Save label2index_ to file? - bool have_relabel_data_; // Using label2index_? - Label final_label_; // Final label - RefCounter ref_count_; // Reference count. - unordered_map<L, L> label2index_; // Finds index for a label. - vector<IntervalSet <L> > isets_; // Interval sets per state. - - DISALLOW_COPY_AND_ASSIGN(LabelReachableData); -}; - - -// Tests reachability of labels from a given state. If reach_input = -// true, then input labels are considered, o.w. output labels are -// considered. To test for reachability from a state s, first do -// SetState(s). Then a label l can be reached from state s of FST f -// iff Reach(r) is true where r = Relabel(l). The relabeling is -// required to ensure a compact representation of the reachable -// labels. - -// The whole FST can be relabeled instead with Relabel(&f, -// reach_input) so that the test Reach(r) applies directly to the -// labels of the transformed FST f. The relabeled FST will also be -// sorted appropriately for composition. -// -// Reachablity of a final state from state s (via an epsilon path) -// can be tested with ReachFinal(); -// -// Reachability can also be tested on the set of labels specified by -// an arc iterator, useful for FST composition. In particular, -// Reach(aiter, ...) is true if labels on the input (output) side of -// the transitions of the arc iterator, when iter_input is true -// (false), can be reached from the state s. The iterator labels must -// have already been relabeled. -// -// With the arc iterator test of reachability, the begin position, end -// position and accumulated arc weight of the matches can be -// returned. The optional template argument controls how reachable arc -// weights are accumulated. The default uses the semiring -// Plus(). Alternative ones can be used to distribute the weights in -// composition in various ways. -template <class A, class S = DefaultAccumulator<A> > -class LabelReachable { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename IntervalSet<Label>::Interval Interval; - - LabelReachable(const Fst<A> &fst, bool reach_input, S *s = 0, - bool keep_relabel_data = true) - : fst_(new VectorFst<Arc>(fst)), - s_(kNoStateId), - data_(new LabelReachableData<Label>(reach_input, keep_relabel_data)), - accumulator_(s ? s : new S()), - ncalls_(0), - nintervals_(0), - error_(false) { - StateId ins = fst_->NumStates(); - TransformFst(); - FindIntervals(ins); - delete fst_; - } - - explicit LabelReachable(LabelReachableData<Label> *data, S *s = 0) - : fst_(0), - s_(kNoStateId), - data_(data), - accumulator_(s ? s : new S()), - ncalls_(0), - nintervals_(0), - error_(false) { - data_->IncrRefCount(); - } - - LabelReachable(const LabelReachable<A, S> &reachable) : - fst_(0), - s_(kNoStateId), - data_(reachable.data_), - accumulator_(new S(*reachable.accumulator_)), - ncalls_(0), - nintervals_(0), - error_(reachable.error_) { - data_->IncrRefCount(); - } - - ~LabelReachable() { - if (!data_->DecrRefCount()) - delete data_; - delete accumulator_; - if (ncalls_ > 0) { - VLOG(2) << "# of calls: " << ncalls_; - VLOG(2) << "# of intervals/call: " << (nintervals_ / ncalls_); - } - } - - // Relabels w.r.t labels that give compact label sets. - Label Relabel(Label label) { - if (label == 0 || error_) - return label; - unordered_map<Label, Label> &label2index = *data_->Label2Index(); - Label &relabel = label2index[label]; - if (!relabel) // Add new label - relabel = label2index.size() + 1; - return relabel; - } - - // Relabels Fst w.r.t to labels that give compact label sets. - void Relabel(MutableFst<Arc> *fst, bool relabel_input) { - for (StateIterator< MutableFst<Arc> > siter(*fst); - !siter.Done(); siter.Next()) { - StateId s = siter.Value(); - for (MutableArcIterator< MutableFst<Arc> > aiter(fst, s); - !aiter.Done(); - aiter.Next()) { - Arc arc = aiter.Value(); - if (relabel_input) - arc.ilabel = Relabel(arc.ilabel); - else - arc.olabel = Relabel(arc.olabel); - aiter.SetValue(arc); - } - } - if (relabel_input) { - ArcSort(fst, ILabelCompare<Arc>()); - fst->SetInputSymbols(0); - } else { - ArcSort(fst, OLabelCompare<Arc>()); - fst->SetOutputSymbols(0); - } - } - - // Returns relabeling pairs (cf. relabel.h::Relabel()). - // If 'avoid_collisions' is true, extra pairs are added to - // ensure no collisions when relabeling automata that have - // labels unseen here. - void RelabelPairs(vector<pair<Label, Label> > *pairs, - bool avoid_collisions = false) { - pairs->clear(); - unordered_map<Label, Label> &label2index = *data_->Label2Index(); - // Maps labels to their new values in [1, label2index().size()] - for (typename unordered_map<Label, Label>::const_iterator - it = label2index.begin(); it != label2index.end(); ++it) - if (it->second != data_->FinalLabel()) - pairs->push_back(pair<Label, Label>(it->first, it->second)); - if (avoid_collisions) { - // Ensures any label in [1, label2index().size()] is mapped either - // by the above step or to label2index() + 1 (to avoid collisions). - for (int i = 1; i <= label2index.size(); ++i) { - typename unordered_map<Label, Label>::const_iterator - it = label2index.find(i); - if (it == label2index.end() || it->second == data_->FinalLabel()) - pairs->push_back(pair<Label, Label>(i, label2index.size() + 1)); - } - } - } - - // Set current state. Optionally set state associated - // with arc iterator to be passed to Reach. - void SetState(StateId s, StateId aiter_s = kNoStateId) { - s_ = s; - if (aiter_s != kNoStateId) { - accumulator_->SetState(aiter_s); - if (accumulator_->Error()) error_ = true; - } - } - - // Can reach this label from current state? - // Original labels must be transformed by the Relabel methods above. - bool Reach(Label label) { - if (label == 0 || error_) - return false; - vector< IntervalSet<Label> > &isets = *data_->IntervalSets(); - return isets[s_].Member(label); - - } - - // Can reach final state (via epsilon transitions) from this state? - bool ReachFinal() { - if (error_) return false; - vector< IntervalSet<Label> > &isets = *data_->IntervalSets(); - return isets[s_].Member(data_->FinalLabel()); - } - - // Initialize with secondary FST to be used with Reach(Iterator,...). - // If copy is true, then 'fst' is a copy of the FST used in the - // previous call to this method (useful to avoid unnecessary updates). - template <class F> - void ReachInit(const F &fst, bool copy = false) { - accumulator_->Init(fst, copy); - if (accumulator_->Error()) error_ = true; - } - - // Can reach any arc iterator label between iterator positions - // aiter_begin and aiter_end? If aiter_input = true, then iterator - // input labels are considered, o.w. output labels are considered. - // Arc iterator labels must be transformed by the Relabel methods - // above. If compute_weight is true, user may call ReachWeight(). - template <class Iterator> - bool Reach(Iterator *aiter, ssize_t aiter_begin, - ssize_t aiter_end, bool aiter_input, bool compute_weight) { - if (error_) return false; - vector< IntervalSet<Label> > &isets = *data_->IntervalSets(); - const vector<Interval> *intervals = isets[s_].Intervals(); - ++ncalls_; - nintervals_ += intervals->size(); - - reach_begin_ = -1; - reach_end_ = -1; - reach_weight_ = Weight::Zero(); - - uint32 flags = aiter->Flags(); // save flags to restore them on exit - aiter->SetFlags(kArcNoCache, kArcNoCache); // make caching optional - aiter->Seek(aiter_begin); - - if (2 * (aiter_end - aiter_begin) < intervals->size()) { - // Check each arc against intervals. - // Set arc iterator flags to only compute the ilabel or olabel values, - // since they are the only values required for most of the arcs processed. - aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue, - kArcValueFlags); - Label reach_label = kNoLabel; - for (ssize_t aiter_pos = aiter_begin; - aiter_pos < aiter_end; aiter->Next(), ++aiter_pos) { - const A &arc = aiter->Value(); - Label label = aiter_input ? arc.ilabel : arc.olabel; - if (label == reach_label || Reach(label)) { - reach_label = label; - if (reach_begin_ < 0) - reach_begin_ = aiter_pos; - reach_end_ = aiter_pos + 1; - if (compute_weight) { - if (!(aiter->Flags() & kArcWeightValue)) { - // If the 'arc.weight' wasn't computed by the call - // to 'aiter->Value()' above, we need to call - // 'aiter->Value()' again after having set the arc iterator - // flags to compute the arc weight value. - aiter->SetFlags(kArcWeightValue, kArcValueFlags); - const A &arcb = aiter->Value(); - // Call the accumulator. - reach_weight_ = accumulator_->Sum(reach_weight_, arcb.weight); - // Only ilabel or olabel required to process the following - // arcs. - aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue, - kArcValueFlags); - } else { - // Call the accumulator. - reach_weight_ = accumulator_->Sum(reach_weight_, arc.weight); - } - } - } - } - } else { - // Check each interval against arcs - ssize_t begin_low, end_low = aiter_begin; - for (typename vector<Interval>::const_iterator - iiter = intervals->begin(); - iiter != intervals->end(); ++iiter) { - begin_low = LowerBound(aiter, end_low, aiter_end, - aiter_input, iiter->begin); - end_low = LowerBound(aiter, begin_low, aiter_end, - aiter_input, iiter->end); - if (end_low - begin_low > 0) { - if (reach_begin_ < 0) - reach_begin_ = begin_low; - reach_end_ = end_low; - if (compute_weight) { - aiter->SetFlags(kArcWeightValue, kArcValueFlags); - reach_weight_ = accumulator_->Sum(reach_weight_, aiter, - begin_low, end_low); - } - } - } - } - - aiter->SetFlags(flags, kArcFlags); // restore original flag values - return reach_begin_ >= 0; - } - - // Returns iterator position of first matching arc. - ssize_t ReachBegin() const { return reach_begin_; } - - // Returns iterator position one past last matching arc. - ssize_t ReachEnd() const { return reach_end_; } - - // Return the sum of the weights for matching arcs. - // Valid only if compute_weight was true in Reach() call. - Weight ReachWeight() const { return reach_weight_; } - - // Access to the relabeling map. Excludes epsilon (0) label but - // includes kNoLabel that is used internally for super-final - // transitons. - const unordered_map<Label, Label>& Label2Index() const { - return *data_->Label2Index(); - } - - LabelReachableData<Label> *GetData() const { return data_; } - - bool Error() const { return error_ || accumulator_->Error(); } - - private: - // Redirects labeled arcs (input or output labels determined by - // ReachInput()) to new label-specific final states. Each original - // final state is redirected via a transition labeled with kNoLabel - // to a new kNoLabel-specific final state. Creates super-initial - // state for all states with zero in-degree. - void TransformFst() { - StateId ins = fst_->NumStates(); - StateId ons = ins; - - vector<ssize_t> indeg(ins, 0); - - // Redirects labeled arcs to new final states. - for (StateId s = 0; s < ins; ++s) { - for (MutableArcIterator< VectorFst<Arc> > aiter(fst_, s); - !aiter.Done(); - aiter.Next()) { - Arc arc = aiter.Value(); - Label label = data_->ReachInput() ? arc.ilabel : arc.olabel; - if (label) { - if (label2state_.find(label) == label2state_.end()) { - label2state_[label] = ons; - indeg.push_back(0); - ++ons; - } - arc.nextstate = label2state_[label]; - aiter.SetValue(arc); - } - ++indeg[arc.nextstate]; // Finds in-degrees for next step. - } - - // Redirects final weights to new final state. - Weight final = fst_->Final(s); - if (final != Weight::Zero()) { - if (label2state_.find(kNoLabel) == label2state_.end()) { - label2state_[kNoLabel] = ons; - indeg.push_back(0); - ++ons; - } - Arc arc(kNoLabel, kNoLabel, final, label2state_[kNoLabel]); - fst_->AddArc(s, arc); - ++indeg[arc.nextstate]; // Finds in-degrees for next step. - - fst_->SetFinal(s, Weight::Zero()); - } - } - - // Add new final states to Fst. - while (fst_->NumStates() < ons) { - StateId s = fst_->AddState(); - fst_->SetFinal(s, Weight::One()); - } - - // Creates a super-initial state for all states with zero in-degree. - StateId start = fst_->AddState(); - fst_->SetStart(start); - for (StateId s = 0; s < start; ++s) { - if (indeg[s] == 0) { - Arc arc(0, 0, Weight::One(), s); - fst_->AddArc(start, arc); - } - } - } - - void FindIntervals(StateId ins) { - StateReachable<A, Label> state_reachable(*fst_); - if (state_reachable.Error()) { - error_ = true; - return; - } - - vector<Label> &state2index = state_reachable.State2Index(); - vector< IntervalSet<Label> > &isets = *data_->IntervalSets(); - isets = state_reachable.IntervalSets(); - isets.resize(ins); - - unordered_map<Label, Label> &label2index = *data_->Label2Index(); - for (typename unordered_map<Label, StateId>::const_iterator - it = label2state_.begin(); - it != label2state_.end(); - ++it) { - Label l = it->first; - StateId s = it->second; - Label i = state2index[s]; - label2index[l] = i; - } - label2state_.clear(); - - double nintervals = 0; - ssize_t non_intervals = 0; - for (ssize_t s = 0; s < ins; ++s) { - nintervals += isets[s].Size(); - if (isets[s].Size() > 1) { - ++non_intervals; - VLOG(3) << "state: " << s << " # of intervals: " << isets[s].Size(); - } - } - VLOG(2) << "# of states: " << ins; - VLOG(2) << "# of intervals: " << nintervals; - VLOG(2) << "# of intervals/state: " << nintervals/ins; - VLOG(2) << "# of non-interval states: " << non_intervals; - } - - template <class Iterator> - ssize_t LowerBound(Iterator *aiter, ssize_t aiter_begin, - ssize_t aiter_end, bool aiter_input, - Label match_label) const { - // Only need to compute the ilabel or olabel of arcs when - // performing the binary search. - aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue, - kArcValueFlags); - ssize_t low = aiter_begin; - ssize_t high = aiter_end; - while (low < high) { - ssize_t mid = (low + high) / 2; - aiter->Seek(mid); - Label label = aiter_input ? - aiter->Value().ilabel : aiter->Value().olabel; - if (label > match_label) { - high = mid; - } else if (label < match_label) { - low = mid + 1; - } else { - // Find first matching label (when non-deterministic) - for (ssize_t i = mid; i > low; --i) { - aiter->Seek(i - 1); - label = aiter_input ? aiter->Value().ilabel : aiter->Value().olabel; - if (label != match_label) { - aiter->Seek(i); - aiter->SetFlags(kArcValueFlags, kArcValueFlags); - return i; - } - } - aiter->SetFlags(kArcValueFlags, kArcValueFlags); - return low; - } - } - aiter->Seek(low); - aiter->SetFlags(kArcValueFlags, kArcValueFlags); - return low; - } - - VectorFst<Arc> *fst_; - StateId s_; // Current state - unordered_map<Label, StateId> label2state_; // Finds final state for a label - - ssize_t reach_begin_; // Iterator pos of first match - ssize_t reach_end_; // Iterator pos after last match - Weight reach_weight_; // Gives weight sum of arc iterator - // arcs with reachable labels. - LabelReachableData<Label> *data_; // Shareable data between copies - S *accumulator_; // Sums arc weights - - double ncalls_; - double nintervals_; - bool error_; - - void operator=(const LabelReachable<A, S> &); // Disallow -}; - -} // namespace fst - -#endif // FST_LIB_LABEL_REACHABLE_H__ |