diff options
author | Ted Yin <[email protected]> | 2015-08-14 17:42:26 +0800 |
---|---|---|
committer | Ted Yin <[email protected]> | 2015-08-14 17:42:26 +0800 |
commit | c3cffb58b9921d78753336421b52b9ffdaa5515c (patch) | |
tree | bfea20e97c200cf734021e3756d749c892e658a4 /kaldi_io/src/tools/openfst | |
parent | 10cce5f6a5c9e2f8e00d5a2a4d87c9cb7c26bf4c (diff) | |
parent | dfdd17afc2e984ec6c32ea01290f5c76309a456a (diff) |
Merge pull request #2 from yimmon/master
remove needless files
Diffstat (limited to 'kaldi_io/src/tools/openfst')
165 files changed, 0 insertions, 46166 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/accumulator.h b/kaldi_io/src/tools/openfst/include/fst/accumulator.h deleted file mode 100644 index 81d1847..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/accumulator.h +++ /dev/null @@ -1,745 +0,0 @@ -// accumulator.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Classes to accumulate arc weights. Useful for weight lookahead. - -#ifndef FST_LIB_ACCUMULATOR_H__ -#define FST_LIB_ACCUMULATOR_H__ - -#include <algorithm> -#include <functional> -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <vector> -using std::vector; - -#include <fst/arcfilter.h> -#include <fst/arcsort.h> -#include <fst/dfs-visit.h> -#include <fst/expanded-fst.h> -#include <fst/replace.h> - -namespace fst { - -// This class accumulates arc weights using the semiring Plus(). -template <class A> -class DefaultAccumulator { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - DefaultAccumulator() {} - - DefaultAccumulator(const DefaultAccumulator<A> &acc) {} - - void Init(const Fst<A>& fst, bool copy = false) {} - - void SetState(StateId) {} - - Weight Sum(Weight w, Weight v) { - return Plus(w, v); - } - - template <class ArcIterator> - Weight Sum(Weight w, ArcIterator *aiter, ssize_t begin, - ssize_t end) { - Weight sum = w; - aiter->Seek(begin); - for (ssize_t pos = begin; pos < end; aiter->Next(), ++pos) - sum = Plus(sum, aiter->Value().weight); - return sum; - } - - bool Error() const { return false; } - - private: - void operator=(const DefaultAccumulator<A> &); // Disallow -}; - - -// This class accumulates arc weights using the log semiring Plus() -// assuming an arc weight has a WeightConvert specialization to -// and from log64 weights. -template <class A> -class LogAccumulator { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - LogAccumulator() {} - - LogAccumulator(const LogAccumulator<A> &acc) {} - - void Init(const Fst<A>& fst, bool copy = false) {} - - void SetState(StateId) {} - - Weight Sum(Weight w, Weight v) { - return LogPlus(w, v); - } - - template <class ArcIterator> - Weight Sum(Weight w, ArcIterator *aiter, ssize_t begin, - ssize_t end) { - Weight sum = w; - aiter->Seek(begin); - for (ssize_t pos = begin; pos < end; aiter->Next(), ++pos) - sum = LogPlus(sum, aiter->Value().weight); - return sum; - } - - bool Error() const { return false; } - - private: - double LogPosExp(double x) { return log(1.0F + exp(-x)); } - - Weight LogPlus(Weight w, Weight v) { - double f1 = to_log_weight_(w).Value(); - double f2 = to_log_weight_(v).Value(); - if (f1 > f2) - return to_weight_(f2 - LogPosExp(f1 - f2)); - else - return to_weight_(f1 - LogPosExp(f2 - f1)); - } - - WeightConvert<Weight, Log64Weight> to_log_weight_; - WeightConvert<Log64Weight, Weight> to_weight_; - - void operator=(const LogAccumulator<A> &); // Disallow -}; - - -// Stores shareable data for fast log accumulator copies. -class FastLogAccumulatorData { - public: - FastLogAccumulatorData() {} - - vector<double> *Weights() { return &weights_; } - vector<ssize_t> *WeightPositions() { return &weight_positions_; } - double *WeightEnd() { return &(weights_[weights_.size() - 1]); }; - int RefCount() const { return ref_count_.count(); } - int IncrRefCount() { return ref_count_.Incr(); } - int DecrRefCount() { return ref_count_.Decr(); } - - private: - // Cummulative weight per state for all states s.t. # of arcs > - // arc_limit_ with arcs in order. Special first element per state - // being Log64Weight::Zero(); - vector<double> weights_; - // Maps from state to corresponding beginning weight position in - // weights_. Position -1 means no pre-computed weights for that - // state. - vector<ssize_t> weight_positions_; - RefCounter ref_count_; // Reference count. - - DISALLOW_COPY_AND_ASSIGN(FastLogAccumulatorData); -}; - - -// This class accumulates arc weights using the log semiring Plus() -// assuming an arc weight has a WeightConvert specialization to and -// from log64 weights. The member function Init(fst) has to be called -// to setup pre-computed weight information. -template <class A> -class FastLogAccumulator { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - explicit FastLogAccumulator(ssize_t arc_limit = 20, ssize_t arc_period = 10) - : arc_limit_(arc_limit), - arc_period_(arc_period), - data_(new FastLogAccumulatorData()), - error_(false) {} - - FastLogAccumulator(const FastLogAccumulator<A> &acc) - : arc_limit_(acc.arc_limit_), - arc_period_(acc.arc_period_), - data_(acc.data_), - error_(acc.error_) { - data_->IncrRefCount(); - } - - ~FastLogAccumulator() { - if (!data_->DecrRefCount()) - delete data_; - } - - void SetState(StateId s) { - vector<double> &weights = *data_->Weights(); - vector<ssize_t> &weight_positions = *data_->WeightPositions(); - - if (weight_positions.size() <= s) { - FSTERROR() << "FastLogAccumulator::SetState: invalid state id."; - error_ = true; - return; - } - - ssize_t pos = weight_positions[s]; - if (pos >= 0) - state_weights_ = &(weights[pos]); - else - state_weights_ = 0; - } - - Weight Sum(Weight w, Weight v) { - return LogPlus(w, v); - } - - template <class ArcIterator> - Weight Sum(Weight w, ArcIterator *aiter, ssize_t begin, - ssize_t end) { - if (error_) return Weight::NoWeight(); - Weight sum = w; - // Finds begin and end of pre-stored weights - ssize_t index_begin = -1, index_end = -1; - ssize_t stored_begin = end, stored_end = end; - if (state_weights_ != 0) { - index_begin = begin > 0 ? (begin - 1)/ arc_period_ + 1 : 0; - index_end = end / arc_period_; - stored_begin = index_begin * arc_period_; - stored_end = index_end * arc_period_; - } - // Computes sum before pre-stored weights - if (begin < stored_begin) { - ssize_t pos_end = min(stored_begin, end); - aiter->Seek(begin); - for (ssize_t pos = begin; pos < pos_end; aiter->Next(), ++pos) - sum = LogPlus(sum, aiter->Value().weight); - } - // Computes sum between pre-stored weights - if (stored_begin < stored_end) { - sum = LogPlus(sum, LogMinus(state_weights_[index_end], - state_weights_[index_begin])); - } - // Computes sum after pre-stored weights - if (stored_end < end) { - ssize_t pos_start = max(stored_begin, stored_end); - aiter->Seek(pos_start); - for (ssize_t pos = pos_start; pos < end; aiter->Next(), ++pos) - sum = LogPlus(sum, aiter->Value().weight); - } - return sum; - } - - template <class F> - void Init(const F &fst, bool copy = false) { - if (copy) - return; - vector<double> &weights = *data_->Weights(); - vector<ssize_t> &weight_positions = *data_->WeightPositions(); - if (!weights.empty() || arc_limit_ < arc_period_) { - FSTERROR() << "FastLogAccumulator: initialization error."; - error_ = true; - return; - } - weight_positions.reserve(CountStates(fst)); - - ssize_t weight_position = 0; - for(StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { - StateId s = siter.Value(); - if (fst.NumArcs(s) >= arc_limit_) { - double sum = FloatLimits<double>::PosInfinity(); - weight_positions.push_back(weight_position); - weights.push_back(sum); - ++weight_position; - ssize_t narcs = 0; - for(ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) { - const A &arc = aiter.Value(); - sum = LogPlus(sum, arc.weight); - // Stores cumulative weight distribution per arc_period_. - if (++narcs % arc_period_ == 0) { - weights.push_back(sum); - ++weight_position; - } - } - } else { - weight_positions.push_back(-1); - } - } - } - - bool Error() const { return error_; } - - private: - double LogPosExp(double x) { - return x == FloatLimits<double>::PosInfinity() ? - 0.0 : log(1.0F + exp(-x)); - } - - double LogMinusExp(double x) { - return x == FloatLimits<double>::PosInfinity() ? - 0.0 : log(1.0F - exp(-x)); - } - - Weight LogPlus(Weight w, Weight v) { - double f1 = to_log_weight_(w).Value(); - double f2 = to_log_weight_(v).Value(); - if (f1 > f2) - return to_weight_(f2 - LogPosExp(f1 - f2)); - else - return to_weight_(f1 - LogPosExp(f2 - f1)); - } - - double LogPlus(double f1, Weight v) { - double f2 = to_log_weight_(v).Value(); - if (f1 == FloatLimits<double>::PosInfinity()) - return f2; - else if (f1 > f2) - return f2 - LogPosExp(f1 - f2); - else - return f1 - LogPosExp(f2 - f1); - } - - Weight LogMinus(double f1, double f2) { - if (f1 >= f2) { - FSTERROR() << "FastLogAcumulator::LogMinus: f1 >= f2 with f1 = " << f1 - << " and f2 = " << f2; - error_ = true; - return Weight::NoWeight(); - } - if (f2 == FloatLimits<double>::PosInfinity()) - return to_weight_(f1); - else - return to_weight_(f1 - LogMinusExp(f2 - f1)); - } - - WeightConvert<Weight, Log64Weight> to_log_weight_; - WeightConvert<Log64Weight, Weight> to_weight_; - - ssize_t arc_limit_; // Minimum # of arcs to pre-compute state - ssize_t arc_period_; // Save cumulative weights per 'arc_period_'. - bool init_; // Cumulative weights initialized? - FastLogAccumulatorData *data_; - double *state_weights_; - bool error_; - - void operator=(const FastLogAccumulator<A> &); // Disallow -}; - - -// Stores shareable data for cache log accumulator copies. -// All copies share the same cache. -template <class A> -class CacheLogAccumulatorData { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - CacheLogAccumulatorData(bool gc, size_t gc_limit) - : cache_gc_(gc), cache_limit_(gc_limit), cache_size_(0) {} - - ~CacheLogAccumulatorData() { - for(typename unordered_map<StateId, CacheState>::iterator it = cache_.begin(); - it != cache_.end(); - ++it) - delete it->second.weights; - } - - bool CacheDisabled() const { return cache_gc_ && cache_limit_ == 0; } - - vector<double> *GetWeights(StateId s) { - typename unordered_map<StateId, CacheState>::iterator it = cache_.find(s); - if (it != cache_.end()) { - it->second.recent = true; - return it->second.weights; - } else { - return 0; - } - } - - void AddWeights(StateId s, vector<double> *weights) { - if (cache_gc_ && cache_size_ >= cache_limit_) - GC(false); - cache_.insert(make_pair(s, CacheState(weights, true))); - if (cache_gc_) - cache_size_ += weights->capacity() * sizeof(double); - } - - int RefCount() const { return ref_count_.count(); } - int IncrRefCount() { return ref_count_.Incr(); } - int DecrRefCount() { return ref_count_.Decr(); } - - private: - // Cached information for a given state. - struct CacheState { - vector<double>* weights; // Accumulated weights for this state. - bool recent; // Has this state been accessed since last GC? - - CacheState(vector<double> *w, bool r) : weights(w), recent(r) {} - }; - - // Garbage collect: Delete from cache states that have not been - // accessed since the last GC ('free_recent = false') until - // 'cache_size_' is 2/3 of 'cache_limit_'. If it does not free enough - // memory, start deleting recently accessed states. - void GC(bool free_recent) { - size_t cache_target = (2 * cache_limit_)/3 + 1; - typename unordered_map<StateId, CacheState>::iterator it = cache_.begin(); - while (it != cache_.end() && cache_size_ > cache_target) { - CacheState &cs = it->second; - if (free_recent || !cs.recent) { - cache_size_ -= cs.weights->capacity() * sizeof(double); - delete cs.weights; - cache_.erase(it++); - } else { - cs.recent = false; - ++it; - } - } - if (!free_recent && cache_size_ > cache_target) - GC(true); - } - - unordered_map<StateId, CacheState> cache_; // Cache - bool cache_gc_; // Enable garbage collection - size_t cache_limit_; // # of bytes cached - size_t cache_size_; // # of bytes allowed before GC - RefCounter ref_count_; - - DISALLOW_COPY_AND_ASSIGN(CacheLogAccumulatorData); -}; - -// This class accumulates arc weights using the log semiring Plus() -// has a WeightConvert specialization to and from log64 weights. It -// is similar to the FastLogAccumator. However here, the accumulated -// weights are pre-computed and stored only for the states that are -// visited. The member function Init(fst) has to be called to setup -// this accumulator. -template <class A> -class CacheLogAccumulator { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - explicit CacheLogAccumulator(ssize_t arc_limit = 10, bool gc = false, - size_t gc_limit = 10 * 1024 * 1024) - : arc_limit_(arc_limit), fst_(0), data_( - new CacheLogAccumulatorData<A>(gc, gc_limit)), s_(kNoStateId), - error_(false) {} - - CacheLogAccumulator(const CacheLogAccumulator<A> &acc) - : arc_limit_(acc.arc_limit_), fst_(acc.fst_ ? acc.fst_->Copy() : 0), - data_(acc.data_), s_(kNoStateId), error_(acc.error_) { - data_->IncrRefCount(); - } - - ~CacheLogAccumulator() { - if (fst_) - delete fst_; - if (!data_->DecrRefCount()) - delete data_; - } - - // Arg 'arc_limit' specifies minimum # of arcs to pre-compute state. - void Init(const Fst<A> &fst, bool copy = false) { - if (copy) { - delete fst_; - } else if (fst_) { - FSTERROR() << "CacheLogAccumulator: initialization error."; - error_ = true; - return; - } - fst_ = fst.Copy(); - } - - void SetState(StateId s, int depth = 0) { - if (s == s_) - return; - s_ = s; - - if (data_->CacheDisabled() || error_) { - weights_ = 0; - return; - } - - if (!fst_) { - FSTERROR() << "CacheLogAccumulator::SetState: incorrectly initialized."; - error_ = true; - weights_ = 0; - return; - } - - weights_ = data_->GetWeights(s); - if ((weights_ == 0) && (fst_->NumArcs(s) >= arc_limit_)) { - weights_ = new vector<double>; - weights_->reserve(fst_->NumArcs(s) + 1); - weights_->push_back(FloatLimits<double>::PosInfinity()); - data_->AddWeights(s, weights_); - } - } - - Weight Sum(Weight w, Weight v) { - return LogPlus(w, v); - } - - template <class Iterator> - Weight Sum(Weight w, Iterator *aiter, ssize_t begin, - ssize_t end) { - if (weights_ == 0) { - Weight sum = w; - aiter->Seek(begin); - for (ssize_t pos = begin; pos < end; aiter->Next(), ++pos) - sum = LogPlus(sum, aiter->Value().weight); - return sum; - } else { - if (weights_->size() <= end) - for (aiter->Seek(weights_->size() - 1); - weights_->size() <= end; - aiter->Next()) - weights_->push_back(LogPlus(weights_->back(), - aiter->Value().weight)); - return LogPlus(w, LogMinus((*weights_)[end], (*weights_)[begin])); - } - } - - template <class Iterator> - size_t LowerBound(double w, Iterator *aiter) { - if (weights_ != 0) { - return lower_bound(weights_->begin() + 1, - weights_->end(), - w, - std::greater<double>()) - - weights_->begin() - 1; - } else { - size_t n = 0; - double x = FloatLimits<double>::PosInfinity(); - for(aiter->Reset(); !aiter->Done(); aiter->Next(), ++n) { - x = LogPlus(x, aiter->Value().weight); - if (x < w) break; - } - return n; - } - } - - bool Error() const { return error_; } - - private: - double LogPosExp(double x) { - return x == FloatLimits<double>::PosInfinity() ? - 0.0 : log(1.0F + exp(-x)); - } - - double LogMinusExp(double x) { - return x == FloatLimits<double>::PosInfinity() ? - 0.0 : log(1.0F - exp(-x)); - } - - Weight LogPlus(Weight w, Weight v) { - double f1 = to_log_weight_(w).Value(); - double f2 = to_log_weight_(v).Value(); - if (f1 > f2) - return to_weight_(f2 - LogPosExp(f1 - f2)); - else - return to_weight_(f1 - LogPosExp(f2 - f1)); - } - - double LogPlus(double f1, Weight v) { - double f2 = to_log_weight_(v).Value(); - if (f1 == FloatLimits<double>::PosInfinity()) - return f2; - else if (f1 > f2) - return f2 - LogPosExp(f1 - f2); - else - return f1 - LogPosExp(f2 - f1); - } - - Weight LogMinus(double f1, double f2) { - if (f1 >= f2) { - FSTERROR() << "CacheLogAcumulator::LogMinus: f1 >= f2 with f1 = " << f1 - << " and f2 = " << f2; - error_ = true; - return Weight::NoWeight(); - } - if (f2 == FloatLimits<double>::PosInfinity()) - return to_weight_(f1); - else - return to_weight_(f1 - LogMinusExp(f2 - f1)); - } - - WeightConvert<Weight, Log64Weight> to_log_weight_; - WeightConvert<Log64Weight, Weight> to_weight_; - - ssize_t arc_limit_; // Minimum # of arcs to cache a state - vector<double> *weights_; // Accumulated weights for cur. state - const Fst<A>* fst_; // Input fst - CacheLogAccumulatorData<A> *data_; // Cache data - StateId s_; // Current state - bool error_; - - void operator=(const CacheLogAccumulator<A> &); // Disallow -}; - - -// Stores shareable data for replace accumulator copies. -template <class Accumulator, class T> -class ReplaceAccumulatorData { - public: - typedef typename Accumulator::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef T StateTable; - typedef typename T::StateTuple StateTuple; - - ReplaceAccumulatorData() : state_table_(0) {} - - ReplaceAccumulatorData(const vector<Accumulator*> &accumulators) - : state_table_(0), accumulators_(accumulators) {} - - ~ReplaceAccumulatorData() { - for (size_t i = 0; i < fst_array_.size(); ++i) - delete fst_array_[i]; - for (size_t i = 0; i < accumulators_.size(); ++i) - delete accumulators_[i]; - } - - void Init(const vector<pair<Label, const Fst<Arc>*> > &fst_tuples, - const StateTable *state_table) { - state_table_ = state_table; - accumulators_.resize(fst_tuples.size()); - for (size_t i = 0; i < accumulators_.size(); ++i) { - if (!accumulators_[i]) - accumulators_[i] = new Accumulator; - accumulators_[i]->Init(*(fst_tuples[i].second)); - fst_array_.push_back(fst_tuples[i].second->Copy()); - } - } - - const StateTuple &GetTuple(StateId s) const { - return state_table_->Tuple(s); - } - - Accumulator *GetAccumulator(size_t i) { return accumulators_[i]; } - - const Fst<Arc> *GetFst(size_t i) const { return fst_array_[i]; } - - int RefCount() const { return ref_count_.count(); } - int IncrRefCount() { return ref_count_.Incr(); } - int DecrRefCount() { return ref_count_.Decr(); } - - private: - const T * state_table_; - vector<Accumulator*> accumulators_; - vector<const Fst<Arc>*> fst_array_; - RefCounter ref_count_; - - DISALLOW_COPY_AND_ASSIGN(ReplaceAccumulatorData); -}; - -// This class accumulates weights in a ReplaceFst. The 'Init' method -// takes as input the argument used to build the ReplaceFst and the -// ReplaceFst state table. It uses accumulators of type 'Accumulator' -// in the underlying FSTs. -template <class Accumulator, - class T = DefaultReplaceStateTable<typename Accumulator::Arc> > -class ReplaceAccumulator { - public: - typedef typename Accumulator::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - typedef T StateTable; - typedef typename T::StateTuple StateTuple; - - ReplaceAccumulator() - : init_(false), data_(new ReplaceAccumulatorData<Accumulator, T>()), - error_(false) {} - - ReplaceAccumulator(const vector<Accumulator*> &accumulators) - : init_(false), - data_(new ReplaceAccumulatorData<Accumulator, T>(accumulators)), - error_(false) {} - - ReplaceAccumulator(const ReplaceAccumulator<Accumulator, T> &acc) - : init_(acc.init_), data_(acc.data_), error_(acc.error_) { - if (!init_) - FSTERROR() << "ReplaceAccumulator: can't copy unintialized accumulator"; - data_->IncrRefCount(); - } - - ~ReplaceAccumulator() { - if (!data_->DecrRefCount()) - delete data_; - } - - // Does not take ownership of the state table, the state table - // is own by the ReplaceFst - void Init(const vector<pair<Label, const Fst<Arc>*> > &fst_tuples, - const StateTable *state_table) { - init_ = true; - data_->Init(fst_tuples, state_table); - } - - void SetState(StateId s) { - if (!init_) { - FSTERROR() << "ReplaceAccumulator::SetState: incorrectly initialized."; - error_ = true; - return; - } - StateTuple tuple = data_->GetTuple(s); - fst_id_ = tuple.fst_id - 1; // Replace FST ID is 1-based - data_->GetAccumulator(fst_id_)->SetState(tuple.fst_state); - if ((tuple.prefix_id != 0) && - (data_->GetFst(fst_id_)->Final(tuple.fst_state) != Weight::Zero())) { - offset_ = 1; - offset_weight_ = data_->GetFst(fst_id_)->Final(tuple.fst_state); - } else { - offset_ = 0; - offset_weight_ = Weight::Zero(); - } - } - - Weight Sum(Weight w, Weight v) { - if (error_) return Weight::NoWeight(); - return data_->GetAccumulator(fst_id_)->Sum(w, v); - } - - template <class ArcIterator> - Weight Sum(Weight w, ArcIterator *aiter, ssize_t begin, - ssize_t end) { - if (error_) return Weight::NoWeight(); - Weight sum = begin == end ? Weight::Zero() - : data_->GetAccumulator(fst_id_)->Sum( - w, aiter, begin ? begin - offset_ : 0, end - offset_); - if (begin == 0 && end != 0 && offset_ > 0) - sum = Sum(offset_weight_, sum); - return sum; - } - - bool Error() const { return error_; } - - private: - bool init_; - ReplaceAccumulatorData<Accumulator, T> *data_; - Label fst_id_; - size_t offset_; - Weight offset_weight_; - bool error_; - - void operator=(const ReplaceAccumulator<Accumulator, T> &); // Disallow -}; - -} // namespace fst - -#endif // FST_LIB_ACCUMULATOR_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/add-on.h b/kaldi_io/src/tools/openfst/include/fst/add-on.h deleted file mode 100644 index ee21a93..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/add-on.h +++ /dev/null @@ -1,306 +0,0 @@ -// add-on.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Fst implementation class to attach an arbitrary object with a -// read/write method to an FST and its file rep. The FST is given a -// new type name. - -#ifndef FST_LIB_ADD_ON_FST_H__ -#define FST_LIB_ADD_ON_FST_H__ - -#include <stddef.h> -#include <string> - -#include <fst/fst.h> - - -namespace fst { - -// Identifies stream data as an add-on fst. -static const int32 kAddOnMagicNumber = 446681434; - - -// -// Some useful add-on objects. -// - -// Nothing to save. -class NullAddOn { - public: - NullAddOn() {} - - static NullAddOn *Read(istream &istrm) { - return new NullAddOn(); - }; - - bool Write(ostream &ostrm) const { return true; } - - int RefCount() const { return ref_count_.count(); } - int IncrRefCount() { return ref_count_.Incr(); } - int DecrRefCount() { return ref_count_.Decr(); } - - private: - RefCounter ref_count_; - - DISALLOW_COPY_AND_ASSIGN(NullAddOn); -}; - - -// Create a new add-on from a pair of add-ons. -template <class A1, class A2> -class AddOnPair { - public: - // Argument reference count incremented. - AddOnPair(A1 *a1, A2 *a2) - : a1_(a1), a2_(a2) { - if (a1_) - a1_->IncrRefCount(); - if (a2_) - a2_->IncrRefCount(); - } - - ~AddOnPair() { - if (a1_ && !a1_->DecrRefCount()) - delete a1_; - if (a2_ && !a2_->DecrRefCount()) - delete a2_; - } - - A1 *First() const { return a1_; } - A2 *Second() const { return a2_; } - - static AddOnPair<A1, A2> *Read(istream &istrm) { - A1 *a1 = 0; - bool have_addon1 = false; - ReadType(istrm, &have_addon1); - if (have_addon1) - a1 = A1::Read(istrm); - - A2 *a2 = 0; - bool have_addon2 = false; - ReadType(istrm, &have_addon2); - if (have_addon2) - a2 = A2::Read(istrm); - - AddOnPair<A1, A2> *a = new AddOnPair<A1, A2>(a1, a2); - if (a1) - a1->DecrRefCount(); - if (a2) - a2->DecrRefCount(); - return a; - }; - - bool Write(ostream &ostrm) const { - bool have_addon1 = a1_; - WriteType(ostrm, have_addon1); - if (have_addon1) - a1_->Write(ostrm); - bool have_addon2 = a2_; - WriteType(ostrm, have_addon2); - if (have_addon2) - a2_->Write(ostrm); - return true; - } - - int RefCount() const { return ref_count_.count(); } - - int IncrRefCount() { - return ref_count_.Incr(); - } - - int DecrRefCount() { - return ref_count_.Decr(); - } - - private: - A1 *a1_; - A2 *a2_; - RefCounter ref_count_; - - DISALLOW_COPY_AND_ASSIGN(AddOnPair); -}; - - -// Add to an Fst F a type T object. T must have a 'T* Read(istream &)', -// a 'bool Write(ostream &)' method, and 'int RecCount(), 'int IncrRefCount()' -// and 'int DecrRefCount()' methods (e.g. 'MatcherData' in matcher-fst.h). -// The result is a new Fst implemenation with type name 'type'. -template<class F, class T> -class AddOnImpl : public FstImpl<typename F::Arc> { - public: - typedef typename F::Arc Arc; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - typedef typename Arc::StateId StateId; - - using FstImpl<Arc>::SetType; - using FstImpl<Arc>::SetProperties; - using FstImpl<Arc>::WriteHeader; - - // If 't' is non-zero, its reference count is incremented. - AddOnImpl(const F &fst, const string &type, T *t = 0) - : fst_(fst), t_(t) { - SetType(type); - SetProperties(fst_.Properties(kFstProperties, false)); - if (t_) - t_->IncrRefCount(); - } - - // If 't' is non-zero, its reference count is incremented. - AddOnImpl(const Fst<Arc> &fst, const string &type, T *t = 0) - : fst_(fst), t_(t) { - SetType(type); - SetProperties(fst_.Properties(kFstProperties, false)); - if (t_) - t_->IncrRefCount(); - } - - AddOnImpl(const AddOnImpl<F, T> &impl) - : fst_(impl.fst_), t_(impl.t_) { - SetType(impl.Type()); - SetProperties(fst_.Properties(kCopyProperties, false)); - if (t_) - t_->IncrRefCount(); - } - - ~AddOnImpl() { - if (t_ && !t_->DecrRefCount()) - delete t_; - } - - StateId Start() const { return fst_.Start(); } - Weight Final(StateId s) const { return fst_.Final(s); } - size_t NumArcs(StateId s) const { return fst_.NumArcs(s); } - - size_t NumInputEpsilons(StateId s) const { - return fst_.NumInputEpsilons(s); - } - - size_t NumOutputEpsilons(StateId s) const { - return fst_.NumOutputEpsilons(s); - } - - size_t NumStates() const { return fst_.NumStates(); } - - static AddOnImpl<F, T> *Read(istream &strm, const FstReadOptions &opts) { - FstReadOptions nopts(opts); - FstHeader hdr; - if (!nopts.header) { - hdr.Read(strm, nopts.source); - nopts.header = &hdr; - } - AddOnImpl<F, T> *impl = new AddOnImpl<F, T>(nopts.header->FstType()); - if (!impl->ReadHeader(strm, nopts, kMinFileVersion, &hdr)) - return 0; - delete impl; // Used here only for checking types. - - int32 magic_number = 0; - ReadType(strm, &magic_number); // Ensures this is an add-on Fst. - if (magic_number != kAddOnMagicNumber) { - LOG(ERROR) << "AddOnImpl::Read: Bad add-on header: " << nopts.source; - return 0; - } - - FstReadOptions fopts(opts); - fopts.header = 0; // Contained header was written out. - F *fst = F::Read(strm, fopts); - if (!fst) - return 0; - - T *t = 0; - bool have_addon = false; - ReadType(strm, &have_addon); - if (have_addon) { // Read add-on object if present. - t = T::Read(strm); - if (!t) - return 0; - } - impl = new AddOnImpl<F, T>(*fst, nopts.header->FstType(), t); - delete fst; - if (t) - t->DecrRefCount(); - return impl; - } - - bool Write(ostream &strm, const FstWriteOptions &opts) const { - FstHeader hdr; - FstWriteOptions nopts(opts); - nopts.write_isymbols = false; // Let contained FST hold any symbols. - nopts.write_osymbols = false; - WriteHeader(strm, nopts, kFileVersion, &hdr); - WriteType(strm, kAddOnMagicNumber); // Ensures this is an add-on Fst. - FstWriteOptions fopts(opts); - fopts.write_header = true; // Force writing contained header. - if (!fst_.Write(strm, fopts)) - return false; - bool have_addon = t_; - WriteType(strm, have_addon); - if (have_addon) // Write add-on object if present. - t_->Write(strm); - return true; - } - - void InitStateIterator(StateIteratorData<Arc> *data) const { - fst_.InitStateIterator(data); - } - - void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { - fst_.InitArcIterator(s, data); - } - - F &GetFst() { return fst_; } - - const F &GetFst() const { return fst_; } - - T *GetAddOn() const { return t_; } - - // If 't' is non-zero, its reference count is incremented. - void SetAddOn(T *t) { - if (t == t_) - return; - if (t_ && !t_->DecrRefCount()) - delete t_; - t_ = t; - if (t_) - t_->IncrRefCount(); - } - - private: - explicit AddOnImpl(const string &type) : t_(0) { - SetType(type); - SetProperties(kExpanded); - } - - // Current file format version - static const int kFileVersion = 1; - // Minimum file format version supported - static const int kMinFileVersion = 1; - - F fst_; - T *t_; - - void operator=(const AddOnImpl<F, T> &fst); // Disallow -}; - -template <class F, class T> const int AddOnImpl<F, T>::kFileVersion; -template <class F, class T> const int AddOnImpl<F, T>::kMinFileVersion; - - -} // namespace fst - -#endif // FST_LIB_ADD_ON_FST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/arc-map.h b/kaldi_io/src/tools/openfst/include/fst/arc-map.h deleted file mode 100644 index 914f81c..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/arc-map.h +++ /dev/null @@ -1,1146 +0,0 @@ -// arc-map.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Class to map over/transform arcs e.g., change semirings or -// implement project/invert. Consider using when operation does -// not change the number of arcs (except possibly superfinal arcs). - -#ifndef FST_LIB_ARC_MAP_H__ -#define FST_LIB_ARC_MAP_H__ - -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <string> -#include <utility> -using std::pair; using std::make_pair; - -#include <fst/cache.h> -#include <fst/mutable-fst.h> - - -namespace fst { - -// This determines how final weights are mapped. -enum MapFinalAction { - // A final weight is mapped into a final weight. An error - // is raised if this is not possible. - MAP_NO_SUPERFINAL, - - // A final weight is mapped to an arc to the superfinal state - // when the result cannot be represented as a final weight. - // The superfinal state will be added only if it is needed. - MAP_ALLOW_SUPERFINAL, - - // A final weight is mapped to an arc to the superfinal state - // unless the result can be represented as a final weight of weight - // Zero(). The superfinal state is always added (if the input is - // not the empty Fst). - MAP_REQUIRE_SUPERFINAL -}; - -// This determines how symbol tables are mapped. -enum MapSymbolsAction { - // Symbols should be cleared in the result by the map. - MAP_CLEAR_SYMBOLS, - - // Symbols should be copied from the input FST by the map. - MAP_COPY_SYMBOLS, - - // Symbols should not be modified in the result by the map itself. - // (They may set by the mapper). - MAP_NOOP_SYMBOLS -}; - -// ArcMapper Interface - class determinies how arcs and final weights -// are mapped. Useful for implementing operations that do not change -// the number of arcs (expect possibly superfinal arcs). -// -// class ArcMapper { -// public: -// typedef A FromArc; -// typedef B ToArc; -// -// // Maps an arc type A to arc type B. -// B operator()(const A &arc); -// // Specifies final action the mapper requires (see above). -// // The mapper will be passed final weights as arcs of the -// // form A(0, 0, weight, kNoStateId). -// MapFinalAction FinalAction() const; -// // Specifies input symbol table action the mapper requires (see above). -// MapSymbolsAction InputSymbolsAction() const; -// // Specifies output symbol table action the mapper requires (see above). -// MapSymbolsAction OutputSymbolsAction() const; -// // This specifies the known properties of an Fst mapped by this -// // mapper. It takes as argument the input Fst's known properties. -// uint64 Properties(uint64 props) const; -// }; -// -// The ArcMap functions and classes below will use the FinalAction() -// method of the mapper to determine how to treat final weights, -// e.g. whether to add a superfinal state. They will use the Properties() -// method to set the result Fst properties. -// -// We include a various map versions below. One dimension of -// variation is whether the mapping mutates its input, writes to a -// new result Fst, or is an on-the-fly Fst. Another dimension is how -// we pass the mapper. We allow passing the mapper by pointer -// for cases that we need to change the state of the user's mapper. -// This is the case with the encode mapper, which is reused during -// decoding. We also include map versions that pass the mapper -// by value or const reference when this suffices. - - -// Maps an arc type A using a mapper function object C, passed -// by pointer. This version modifies its Fst input. -template<class A, class C> -void ArcMap(MutableFst<A> *fst, C* mapper) { - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) - fst->SetInputSymbols(0); - - if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) - fst->SetOutputSymbols(0); - - if (fst->Start() == kNoStateId) - return; - - uint64 props = fst->Properties(kFstProperties, false); - - MapFinalAction final_action = mapper->FinalAction(); - StateId superfinal = kNoStateId; - if (final_action == MAP_REQUIRE_SUPERFINAL) { - superfinal = fst->AddState(); - fst->SetFinal(superfinal, Weight::One()); - } - - for (StateId s = 0; s < fst->NumStates(); ++s) { - for (MutableArcIterator< MutableFst<A> > aiter(fst, s); - !aiter.Done(); aiter.Next()) { - const A &arc = aiter.Value(); - aiter.SetValue((*mapper)(arc)); - } - - switch (final_action) { - case MAP_NO_SUPERFINAL: - default: { - A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId)); - if (final_arc.ilabel != 0 || final_arc.olabel != 0) { - FSTERROR() << "ArcMap: non-zero arc labels for superfinal arc"; - fst->SetProperties(kError, kError); - } - - fst->SetFinal(s, final_arc.weight); - break; - } - case MAP_ALLOW_SUPERFINAL: { - if (s != superfinal) { - A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId)); - if (final_arc.ilabel != 0 || final_arc.olabel != 0) { - // Add a superfinal state if not already done. - if (superfinal == kNoStateId) { - superfinal = fst->AddState(); - fst->SetFinal(superfinal, Weight::One()); - } - final_arc.nextstate = superfinal; - fst->AddArc(s, final_arc); - fst->SetFinal(s, Weight::Zero()); - } else { - fst->SetFinal(s, final_arc.weight); - } - break; - } - } - case MAP_REQUIRE_SUPERFINAL: { - if (s != superfinal) { - A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId)); - if (final_arc.ilabel != 0 || final_arc.olabel != 0 || - final_arc.weight != Weight::Zero()) - fst->AddArc(s, A(final_arc.ilabel, final_arc.olabel, - final_arc.weight, superfinal)); - fst->SetFinal(s, Weight::Zero()); - } - break; - } - } - } - fst->SetProperties(mapper->Properties(props), kFstProperties); -} - - -// Maps an arc type A using a mapper function object C, passed -// by value. This version modifies its Fst input. -template<class A, class C> -void ArcMap(MutableFst<A> *fst, C mapper) { - ArcMap(fst, &mapper); -} - - -// Maps an arc type A to an arc type B using mapper function -// object C, passed by pointer. This version writes the mapped -// input Fst to an output MutableFst. -template<class A, class B, class C> -void ArcMap(const Fst<A> &ifst, MutableFst<B> *ofst, C* mapper) { - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - ofst->DeleteStates(); - - if (mapper->InputSymbolsAction() == MAP_COPY_SYMBOLS) - ofst->SetInputSymbols(ifst.InputSymbols()); - else if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) - ofst->SetInputSymbols(0); - - if (mapper->OutputSymbolsAction() == MAP_COPY_SYMBOLS) - ofst->SetOutputSymbols(ifst.OutputSymbols()); - else if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) - ofst->SetOutputSymbols(0); - - uint64 iprops = ifst.Properties(kCopyProperties, false); - - if (ifst.Start() == kNoStateId) { - if (iprops & kError) ofst->SetProperties(kError, kError); - return; - } - - MapFinalAction final_action = mapper->FinalAction(); - if (ifst.Properties(kExpanded, false)) { - ofst->ReserveStates(CountStates(ifst) + - final_action == MAP_NO_SUPERFINAL ? 0 : 1); - } - - // Add all states. - for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next()) - ofst->AddState(); - - StateId superfinal = kNoStateId; - if (final_action == MAP_REQUIRE_SUPERFINAL) { - superfinal = ofst->AddState(); - ofst->SetFinal(superfinal, B::Weight::One()); - } - for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next()) { - StateId s = siter.Value(); - if (s == ifst.Start()) - ofst->SetStart(s); - - ofst->ReserveArcs(s, ifst.NumArcs(s)); - for (ArcIterator< Fst<A> > aiter(ifst, s); !aiter.Done(); aiter.Next()) - ofst->AddArc(s, (*mapper)(aiter.Value())); - - switch (final_action) { - case MAP_NO_SUPERFINAL: - default: { - B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId)); - if (final_arc.ilabel != 0 || final_arc.olabel != 0) { - FSTERROR() << "ArcMap: non-zero arc labels for superfinal arc"; - ofst->SetProperties(kError, kError); - } - ofst->SetFinal(s, final_arc.weight); - break; - } - case MAP_ALLOW_SUPERFINAL: { - B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId)); - if (final_arc.ilabel != 0 || final_arc.olabel != 0) { - // Add a superfinal state if not already done. - if (superfinal == kNoStateId) { - superfinal = ofst->AddState(); - ofst->SetFinal(superfinal, B::Weight::One()); - } - final_arc.nextstate = superfinal; - ofst->AddArc(s, final_arc); - ofst->SetFinal(s, B::Weight::Zero()); - } else { - ofst->SetFinal(s, final_arc.weight); - } - break; - } - case MAP_REQUIRE_SUPERFINAL: { - B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId)); - if (final_arc.ilabel != 0 || final_arc.olabel != 0 || - final_arc.weight != B::Weight::Zero()) - ofst->AddArc(s, B(final_arc.ilabel, final_arc.olabel, - final_arc.weight, superfinal)); - ofst->SetFinal(s, B::Weight::Zero()); - break; - } - } - } - uint64 oprops = ofst->Properties(kFstProperties, false); - ofst->SetProperties(mapper->Properties(iprops) | oprops, kFstProperties); -} - -// Maps an arc type A to an arc type B using mapper function -// object C, passed by value. This version writes the mapped input -// Fst to an output MutableFst. -template<class A, class B, class C> -void ArcMap(const Fst<A> &ifst, MutableFst<B> *ofst, C mapper) { - ArcMap(ifst, ofst, &mapper); -} - - -struct ArcMapFstOptions : public CacheOptions { - // ArcMapFst default caching behaviour is to do no caching. Most - // mappers are cheap and therefore we save memory by not doing - // caching. - ArcMapFstOptions() : CacheOptions(true, 0) {} - ArcMapFstOptions(const CacheOptions& opts) : CacheOptions(opts) {} -}; - - -template <class A, class B, class C> class ArcMapFst; - -// Implementation of delayed ArcMapFst. -template <class A, class B, class C> -class ArcMapFstImpl : public CacheImpl<B> { - public: - using FstImpl<B>::SetType; - using FstImpl<B>::SetProperties; - using FstImpl<B>::SetInputSymbols; - using FstImpl<B>::SetOutputSymbols; - - using VectorFstBaseImpl<typename CacheImpl<B>::State>::NumStates; - - using CacheImpl<B>::PushArc; - using CacheImpl<B>::HasArcs; - using CacheImpl<B>::HasFinal; - using CacheImpl<B>::HasStart; - using CacheImpl<B>::SetArcs; - using CacheImpl<B>::SetFinal; - using CacheImpl<B>::SetStart; - - friend class StateIterator< ArcMapFst<A, B, C> >; - - typedef B Arc; - typedef typename B::Weight Weight; - typedef typename B::StateId StateId; - - ArcMapFstImpl(const Fst<A> &fst, const C &mapper, - const ArcMapFstOptions& opts) - : CacheImpl<B>(opts), - fst_(fst.Copy()), - mapper_(new C(mapper)), - own_mapper_(true), - superfinal_(kNoStateId), - nstates_(0) { - Init(); - } - - ArcMapFstImpl(const Fst<A> &fst, C *mapper, - const ArcMapFstOptions& opts) - : CacheImpl<B>(opts), - fst_(fst.Copy()), - mapper_(mapper), - own_mapper_(false), - superfinal_(kNoStateId), - nstates_(0) { - Init(); - } - - ArcMapFstImpl(const ArcMapFstImpl<A, B, C> &impl) - : CacheImpl<B>(impl), - fst_(impl.fst_->Copy(true)), - mapper_(new C(*impl.mapper_)), - own_mapper_(true), - superfinal_(kNoStateId), - nstates_(0) { - Init(); - } - - ~ArcMapFstImpl() { - delete fst_; - if (own_mapper_) delete mapper_; - } - - StateId Start() { - if (!HasStart()) - SetStart(FindOState(fst_->Start())); - return CacheImpl<B>::Start(); - } - - Weight Final(StateId s) { - if (!HasFinal(s)) { - switch (final_action_) { - case MAP_NO_SUPERFINAL: - default: { - B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)), - kNoStateId)); - if (final_arc.ilabel != 0 || final_arc.olabel != 0) { - FSTERROR() << "ArcMapFst: non-zero arc labels for superfinal arc"; - SetProperties(kError, kError); - } - SetFinal(s, final_arc.weight); - break; - } - case MAP_ALLOW_SUPERFINAL: { - if (s == superfinal_) { - SetFinal(s, Weight::One()); - } else { - B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)), - kNoStateId)); - if (final_arc.ilabel == 0 && final_arc.olabel == 0) - SetFinal(s, final_arc.weight); - else - SetFinal(s, Weight::Zero()); - } - break; - } - case MAP_REQUIRE_SUPERFINAL: { - SetFinal(s, s == superfinal_ ? Weight::One() : Weight::Zero()); - break; - } - } - } - return CacheImpl<B>::Final(s); - } - - size_t NumArcs(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<B>::NumArcs(s); - } - - size_t NumInputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<B>::NumInputEpsilons(s); - } - - size_t NumOutputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<B>::NumOutputEpsilons(s); - } - - uint64 Properties() const { return Properties(kFstProperties); } - - // Set error if found; return FST impl properties. - uint64 Properties(uint64 mask) const { - if ((mask & kError) && (fst_->Properties(kError, false) || - (mapper_->Properties(0) & kError))) - SetProperties(kError, kError); - return FstImpl<Arc>::Properties(mask); - } - - void InitArcIterator(StateId s, ArcIteratorData<B> *data) { - if (!HasArcs(s)) - Expand(s); - CacheImpl<B>::InitArcIterator(s, data); - } - - void Expand(StateId s) { - // Add exiting arcs. - if (s == superfinal_) { SetArcs(s); return; } - - for (ArcIterator< Fst<A> > aiter(*fst_, FindIState(s)); - !aiter.Done(); aiter.Next()) { - A aarc(aiter.Value()); - aarc.nextstate = FindOState(aarc.nextstate); - const B& barc = (*mapper_)(aarc); - PushArc(s, barc); - } - - // Check for superfinal arcs. - if (!HasFinal(s) || Final(s) == Weight::Zero()) - switch (final_action_) { - case MAP_NO_SUPERFINAL: - default: - break; - case MAP_ALLOW_SUPERFINAL: { - B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)), - kNoStateId)); - if (final_arc.ilabel != 0 || final_arc.olabel != 0) { - if (superfinal_ == kNoStateId) - superfinal_ = nstates_++; - final_arc.nextstate = superfinal_; - PushArc(s, final_arc); - } - break; - } - case MAP_REQUIRE_SUPERFINAL: { - B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)), - kNoStateId)); - if (final_arc.ilabel != 0 || final_arc.olabel != 0 || - final_arc.weight != B::Weight::Zero()) - PushArc(s, B(final_arc.ilabel, final_arc.olabel, - final_arc.weight, superfinal_)); - break; - } - } - SetArcs(s); - } - - private: - void Init() { - SetType("map"); - - if (mapper_->InputSymbolsAction() == MAP_COPY_SYMBOLS) - SetInputSymbols(fst_->InputSymbols()); - else if (mapper_->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) - SetInputSymbols(0); - - if (mapper_->OutputSymbolsAction() == MAP_COPY_SYMBOLS) - SetOutputSymbols(fst_->OutputSymbols()); - else if (mapper_->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) - SetOutputSymbols(0); - - if (fst_->Start() == kNoStateId) { - final_action_ = MAP_NO_SUPERFINAL; - SetProperties(kNullProperties); - } else { - final_action_ = mapper_->FinalAction(); - uint64 props = fst_->Properties(kCopyProperties, false); - SetProperties(mapper_->Properties(props)); - if (final_action_ == MAP_REQUIRE_SUPERFINAL) - superfinal_ = 0; - } - } - - // Maps from output state to input state. - StateId FindIState(StateId s) { - if (superfinal_ == kNoStateId || s < superfinal_) - return s; - else - return s - 1; - } - - // Maps from input state to output state. - StateId FindOState(StateId is) { - StateId os; - if (superfinal_ == kNoStateId || is < superfinal_) - os = is; - else - os = is + 1; - - if (os >= nstates_) - nstates_ = os + 1; - - return os; - } - - - const Fst<A> *fst_; - C* mapper_; - bool own_mapper_; - MapFinalAction final_action_; - - StateId superfinal_; - StateId nstates_; - - void operator=(const ArcMapFstImpl<A, B, C> &); // disallow -}; - - -// Maps an arc type A to an arc type B using Mapper function object -// C. This version is a delayed Fst. -template <class A, class B, class C> -class ArcMapFst : public ImplToFst< ArcMapFstImpl<A, B, C> > { - public: - friend class ArcIterator< ArcMapFst<A, B, C> >; - friend class StateIterator< ArcMapFst<A, B, C> >; - - typedef B Arc; - typedef typename B::Weight Weight; - typedef typename B::StateId StateId; - typedef CacheState<B> State; - typedef ArcMapFstImpl<A, B, C> Impl; - - ArcMapFst(const Fst<A> &fst, const C &mapper, const ArcMapFstOptions& opts) - : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {} - - ArcMapFst(const Fst<A> &fst, C* mapper, const ArcMapFstOptions& opts) - : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {} - - ArcMapFst(const Fst<A> &fst, const C &mapper) - : ImplToFst<Impl>(new Impl(fst, mapper, ArcMapFstOptions())) {} - - ArcMapFst(const Fst<A> &fst, C* mapper) - : ImplToFst<Impl>(new Impl(fst, mapper, ArcMapFstOptions())) {} - - // See Fst<>::Copy() for doc. - ArcMapFst(const ArcMapFst<A, B, C> &fst, bool safe = false) - : ImplToFst<Impl>(fst, safe) {} - - // Get a copy of this ArcMapFst. See Fst<>::Copy() for further doc. - virtual ArcMapFst<A, B, C> *Copy(bool safe = false) const { - return new ArcMapFst<A, B, C>(*this, safe); - } - - virtual inline void InitStateIterator(StateIteratorData<B> *data) const; - - virtual void InitArcIterator(StateId s, ArcIteratorData<B> *data) const { - GetImpl()->InitArcIterator(s, data); - } - - private: - // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } - - void operator=(const ArcMapFst<A, B, C> &fst); // disallow -}; - - -// Specialization for ArcMapFst. -template<class A, class B, class C> -class StateIterator< ArcMapFst<A, B, C> > : public StateIteratorBase<B> { - public: - typedef typename B::StateId StateId; - - explicit StateIterator(const ArcMapFst<A, B, C> &fst) - : impl_(fst.GetImpl()), siter_(*impl_->fst_), s_(0), - superfinal_(impl_->final_action_ == MAP_REQUIRE_SUPERFINAL) - { CheckSuperfinal(); } - - bool Done() const { return siter_.Done() && !superfinal_; } - - StateId Value() const { return s_; } - - void Next() { - ++s_; - if (!siter_.Done()) { - siter_.Next(); - CheckSuperfinal(); - } - else if (superfinal_) - superfinal_ = false; - } - - void Reset() { - s_ = 0; - siter_.Reset(); - superfinal_ = impl_->final_action_ == MAP_REQUIRE_SUPERFINAL; - CheckSuperfinal(); - } - - private: - // This allows base-class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - bool Done_() const { return Done(); } - StateId Value_() const { return Value(); } - void Next_() { Next(); } - void Reset_() { Reset(); } - - void CheckSuperfinal() { - if (impl_->final_action_ != MAP_ALLOW_SUPERFINAL || superfinal_) - return; - if (!siter_.Done()) { - B final_arc = (*impl_->mapper_)(A(0, 0, impl_->fst_->Final(s_), - kNoStateId)); - if (final_arc.ilabel != 0 || final_arc.olabel != 0) - superfinal_ = true; - } - } - - const ArcMapFstImpl<A, B, C> *impl_; - StateIterator< Fst<A> > siter_; - StateId s_; - bool superfinal_; // true if there is a superfinal state and not done - - DISALLOW_COPY_AND_ASSIGN(StateIterator); -}; - - -// Specialization for ArcMapFst. -template <class A, class B, class C> -class ArcIterator< ArcMapFst<A, B, C> > - : public CacheArcIterator< ArcMapFst<A, B, C> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const ArcMapFst<A, B, C> &fst, StateId s) - : CacheArcIterator< ArcMapFst<A, B, C> >(fst.GetImpl(), s) { - if (!fst.GetImpl()->HasArcs(s)) - fst.GetImpl()->Expand(s); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - -template <class A, class B, class C> inline -void ArcMapFst<A, B, C>::InitStateIterator(StateIteratorData<B> *data) - const { - data->base = new StateIterator< ArcMapFst<A, B, C> >(*this); -} - - -// -// Utility Mappers -// - -// Mapper that returns its input. -template <class A> -struct IdentityArcMapper { - typedef A FromArc; - typedef A ToArc; - - A operator()(const A &arc) const { return arc; } - - MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } - - MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - - MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} - - uint64 Properties(uint64 props) const { return props; } -}; - - -// Mapper that returns its input with final states redirected to -// a single super-final state. -template <class A> -struct SuperFinalMapper { - typedef A FromArc; - typedef A ToArc; - - A operator()(const A &arc) const { return arc; } - - MapFinalAction FinalAction() const { return MAP_REQUIRE_SUPERFINAL; } - - MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - - MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} - - uint64 Properties(uint64 props) const { - return props & kAddSuperFinalProperties; - } -}; - - -// Mapper that leaves labels and nextstate unchanged and constructs a new weight -// from the underlying value of the arc weight. Requires that there is a -// WeightConvert class specialization that converts the weights. -template <class A, class B> -class WeightConvertMapper { - public: - typedef A FromArc; - typedef B ToArc; - typedef typename FromArc::Weight FromWeight; - typedef typename ToArc::Weight ToWeight; - - ToArc operator()(const FromArc &arc) const { - return ToArc(arc.ilabel, arc.olabel, - convert_weight_(arc.weight), arc.nextstate); - } - - MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } - - MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - - MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} - - uint64 Properties(uint64 props) const { return props; } - - private: - WeightConvert<FromWeight, ToWeight> convert_weight_; -}; - -// Non-precision-changing weight conversions. -// Consider using more efficient Cast (fst.h) instead. -typedef WeightConvertMapper<StdArc, LogArc> StdToLogMapper; -typedef WeightConvertMapper<LogArc, StdArc> LogToStdMapper; - -// Precision-changing weight conversions. -typedef WeightConvertMapper<StdArc, Log64Arc> StdToLog64Mapper; -typedef WeightConvertMapper<LogArc, Log64Arc> LogToLog64Mapper; -typedef WeightConvertMapper<Log64Arc, StdArc> Log64ToStdMapper; -typedef WeightConvertMapper<Log64Arc, LogArc> Log64ToLogMapper; - -// Mapper from A to GallicArc<A>. -template <class A, StringType S = STRING_LEFT> -struct ToGallicMapper { - typedef A FromArc; - typedef GallicArc<A, S> ToArc; - - typedef StringWeight<typename A::Label, S> SW; - typedef typename A::Weight AW; - typedef typename GallicArc<A, S>::Weight GW; - - ToArc operator()(const A &arc) const { - // 'Super-final' arc. - if (arc.nextstate == kNoStateId && arc.weight != AW::Zero()) - return ToArc(0, 0, GW(SW::One(), arc.weight), kNoStateId); - // 'Super-non-final' arc. - else if (arc.nextstate == kNoStateId) - return ToArc(0, 0, GW(SW::Zero(), arc.weight), kNoStateId); - // Epsilon label. - else if (arc.olabel == 0) - return ToArc(arc.ilabel, arc.ilabel, - GW(SW::One(), arc.weight), arc.nextstate); - // Regular label. - else - return ToArc(arc.ilabel, arc.ilabel, - GW(SW(arc.olabel), arc.weight), arc.nextstate); - } - - MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } - - MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - - MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;} - - uint64 Properties(uint64 props) const { - return ProjectProperties(props, true) & kWeightInvariantProperties; - } -}; - - -// Mapper from GallicArc<A> to A. -template <class A, StringType S = STRING_LEFT> -struct FromGallicMapper { - typedef GallicArc<A, S> FromArc; - typedef A ToArc; - - typedef typename A::Label Label; - typedef StringWeight<Label, S> SW; - typedef typename A::Weight AW; - typedef typename GallicArc<A, S>::Weight GW; - - FromGallicMapper(Label superfinal_label = 0) - : superfinal_label_(superfinal_label), error_(false) {} - - A operator()(const FromArc &arc) const { - // 'Super-non-final' arc. - if (arc.nextstate == kNoStateId && arc.weight == GW::Zero()) - return A(arc.ilabel, 0, AW::Zero(), kNoStateId); - - SW w1 = arc.weight.Value1(); - AW w2 = arc.weight.Value2(); - StringWeightIterator<Label, S> iter1(w1); - - Label l = w1.Size() == 1 ? iter1.Value() : 0; - - if (l == kStringInfinity || l == kStringBad || - arc.ilabel != arc.olabel || w1.Size() > 1) { - FSTERROR() << "FromGallicMapper: unrepesentable weight"; - error_ = true; - } - - if (arc.ilabel == 0 && l != 0 && arc.nextstate == kNoStateId) - return A(superfinal_label_, l, w2, arc.nextstate); - else - return A(arc.ilabel, l, w2, arc.nextstate); - } - - MapFinalAction FinalAction() const { return MAP_ALLOW_SUPERFINAL; } - - MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - - MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;} - - uint64 Properties(uint64 inprops) const { - uint64 outprops = inprops & kOLabelInvariantProperties & - kWeightInvariantProperties & kAddSuperFinalProperties; - if (error_) - outprops |= kError; - return outprops; - } - - private: - Label superfinal_label_; - mutable bool error_; -}; - - -// Mapper from GallicArc<A> to A. -template <class A, StringType S = STRING_LEFT> -struct GallicToNewSymbolsMapper { - typedef GallicArc<A, S> FromArc; - typedef A ToArc; - - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef StringWeight<Label, S> SW; - typedef typename A::Weight AW; - typedef typename GallicArc<A, S>::Weight GW; - - GallicToNewSymbolsMapper(MutableFst<ToArc> *fst) - : fst_(fst), lmax_(0), osymbols_(fst->OutputSymbols()), - isymbols_(0), error_(false) { - fst_->DeleteStates(); - state_ = fst_->AddState(); - fst_->SetStart(state_); - fst_->SetFinal(state_, AW::One()); - if (osymbols_) { - string name = osymbols_->Name() + "_from_gallic"; - fst_->SetInputSymbols(new SymbolTable(name)); - isymbols_ = fst_->MutableInputSymbols(); - isymbols_->AddSymbol(osymbols_->Find((int64) 0), 0); - } else { - fst_->SetInputSymbols(0); - } - } - - A operator()(const FromArc &arc) { - // 'Super-non-final' arc. - if (arc.nextstate == kNoStateId && arc.weight == GW::Zero()) - return A(arc.ilabel, 0, AW::Zero(), kNoStateId); - - SW w1 = arc.weight.Value1(); - AW w2 = arc.weight.Value2(); - Label l; - - if (w1.Size() == 0) { - l = 0; - } else { - typename Map::iterator miter = map_.find(w1); - if (miter != map_.end()) { - l = (*miter).second; - } else { - l = ++lmax_; - map_.insert(pair<const SW, Label>(w1, l)); - StringWeightIterator<Label, S> iter1(w1); - StateId n; - string s; - for(size_t i = 0, p = state_; - i < w1.Size(); - ++i, iter1.Next(), p = n) { - n = i == w1.Size() - 1 ? state_ : fst_->AddState(); - fst_->AddArc(p, ToArc(i ? 0 : l, iter1.Value(), AW::One(), n)); - if (isymbols_) { - if (i) s = s + "_"; - s = s + osymbols_->Find(iter1.Value()); - } - } - if (isymbols_) - isymbols_->AddSymbol(s, l); - } - } - - if (l == kStringInfinity || l == kStringBad || arc.ilabel != arc.olabel) { - FSTERROR() << "GallicToNewSymbolMapper: unrepesentable weight"; - error_ = true; - } - - return A(arc.ilabel, l, w2, arc.nextstate); - } - - MapFinalAction FinalAction() const { return MAP_ALLOW_SUPERFINAL; } - - MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - - MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; } - - uint64 Properties(uint64 inprops) const { - uint64 outprops = inprops & kOLabelInvariantProperties & - kWeightInvariantProperties & kAddSuperFinalProperties; - if (error_) - outprops |= kError; - return outprops; - } - - private: - class StringKey { - public: - size_t operator()(const SW &x) const { - return x.Hash(); - } - }; - - typedef unordered_map<SW, Label, StringKey> Map; - - MutableFst<ToArc> *fst_; - Map map_; - Label lmax_; - StateId state_; - const SymbolTable *osymbols_; - SymbolTable *isymbols_; - mutable bool error_; - - DISALLOW_COPY_AND_ASSIGN(GallicToNewSymbolsMapper); -}; - - -// Mapper to add a constant to all weights. -template <class A> -struct PlusMapper { - typedef A FromArc; - typedef A ToArc; - typedef typename A::Weight Weight; - - explicit PlusMapper(Weight w) : weight_(w) {} - - A operator()(const A &arc) const { - if (arc.weight == Weight::Zero()) - return arc; - Weight w = Plus(arc.weight, weight_); - return A(arc.ilabel, arc.olabel, w, arc.nextstate); - } - - MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } - - MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - - MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} - - uint64 Properties(uint64 props) const { - return props & kWeightInvariantProperties; - } - - private: - - - - Weight weight_; -}; - - -// Mapper to (right) multiply a constant to all weights. -template <class A> -struct TimesMapper { - typedef A FromArc; - typedef A ToArc; - typedef typename A::Weight Weight; - - explicit TimesMapper(Weight w) : weight_(w) {} - - A operator()(const A &arc) const { - if (arc.weight == Weight::Zero()) - return arc; - Weight w = Times(arc.weight, weight_); - return A(arc.ilabel, arc.olabel, w, arc.nextstate); - } - - MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } - - MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - - MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} - - uint64 Properties(uint64 props) const { - return props & kWeightInvariantProperties; - } - - private: - Weight weight_; -}; - - -// Mapper to reciprocate all non-Zero() weights. -template <class A> -struct InvertWeightMapper { - typedef A FromArc; - typedef A ToArc; - typedef typename A::Weight Weight; - - A operator()(const A &arc) const { - if (arc.weight == Weight::Zero()) - return arc; - Weight w = Divide(Weight::One(), arc.weight); - return A(arc.ilabel, arc.olabel, w, arc.nextstate); - } - - MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } - - MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - - MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} - - uint64 Properties(uint64 props) const { - return props & kWeightInvariantProperties; - } -}; - - -// Mapper to map all non-Zero() weights to One(). -template <class A, class B = A> -struct RmWeightMapper { - typedef A FromArc; - typedef B ToArc; - typedef typename FromArc::Weight FromWeight; - typedef typename ToArc::Weight ToWeight; - - B operator()(const A &arc) const { - ToWeight w = arc.weight != FromWeight::Zero() ? - ToWeight::One() : ToWeight::Zero(); - return B(arc.ilabel, arc.olabel, w, arc.nextstate); - } - - MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } - - MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - - MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} - - uint64 Properties(uint64 props) const { - return (props & kWeightInvariantProperties) | kUnweighted; - } -}; - - -// Mapper to quantize all weights. -template <class A, class B = A> -struct QuantizeMapper { - typedef A FromArc; - typedef B ToArc; - typedef typename FromArc::Weight FromWeight; - typedef typename ToArc::Weight ToWeight; - - QuantizeMapper() : delta_(kDelta) {} - - explicit QuantizeMapper(float d) : delta_(d) {} - - B operator()(const A &arc) const { - ToWeight w = arc.weight.Quantize(delta_); - return B(arc.ilabel, arc.olabel, w, arc.nextstate); - } - - MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } - - MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - - MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} - - uint64 Properties(uint64 props) const { - return props & kWeightInvariantProperties; - } - - private: - float delta_; -}; - - -// Mapper from A to B under the assumption: -// B::Weight = A::Weight::ReverseWeight -// B::Label == A::Label -// B::StateId == A::StateId -// The weight is reversed, while the label and nextstate preserved -// in the mapping. -template <class A, class B> -struct ReverseWeightMapper { - typedef A FromArc; - typedef B ToArc; - - B operator()(const A &arc) const { - return B(arc.ilabel, arc.olabel, arc.weight.Reverse(), arc.nextstate); - } - - MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } - - MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - - MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} - - uint64 Properties(uint64 props) const { return props; } -}; - -} // namespace fst - -#endif // FST_LIB_ARC_MAP_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/arc.h b/kaldi_io/src/tools/openfst/include/fst/arc.h deleted file mode 100644 index 5f4014b..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/arc.h +++ /dev/null @@ -1,307 +0,0 @@ -// arc.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// -// Commonly used Fst arc types. - -#ifndef FST_LIB_ARC_H__ -#define FST_LIB_ARC_H__ - -#include <string> - - -#include <fst/expectation-weight.h> -#include <fst/float-weight.h> -#include <fst/lexicographic-weight.h> -#include <fst/power-weight.h> -#include <fst/product-weight.h> -#include <fst/signed-log-weight.h> -#include <fst/sparse-power-weight.h> -#include <iostream> -#include <fstream> -#include <sstream> -#include <fst/string-weight.h> - - -namespace fst { - -template <class W> -class ArcTpl { - public: - typedef W Weight; - typedef int Label; - typedef int StateId; - - ArcTpl(Label i, Label o, const Weight& w, StateId s) - : ilabel(i), olabel(o), weight(w), nextstate(s) {} - - ArcTpl() {} - - static const string &Type(void) { - static const string type = - (Weight::Type() == "tropical") ? "standard" : Weight::Type(); - return type; - } - - Label ilabel; - Label olabel; - Weight weight; - StateId nextstate; -}; - -typedef ArcTpl<TropicalWeight> StdArc; -typedef ArcTpl<LogWeight> LogArc; -typedef ArcTpl<Log64Weight> Log64Arc; -typedef ArcTpl<SignedLogWeight> SignedLogArc; -typedef ArcTpl<SignedLog64Weight> SignedLog64Arc; -typedef ArcTpl<MinMaxWeight> MinMaxArc; - - -// Arc with integer labels and state Ids and string weights. -template <StringType S = STRING_LEFT> -class StringArc { - public: - typedef int Label; - typedef StringWeight<int, S> Weight; - typedef int StateId; - - StringArc(Label i, Label o, Weight w, StateId s) - : ilabel(i), olabel(o), weight(w), nextstate(s) {} - - StringArc() {} - - static const string &Type() { // Arc type name - static const string type = - S == STRING_LEFT ? "standard_string" : - (S == STRING_RIGHT ? "right_standard_string" : - (S == STRING_LEFT_RESTRICT ? "restricted_string" : - "right_restricted_string")); - return type; - } - - Label ilabel; // Transition input label - Label olabel; // Transition output label - Weight weight; // Transition weight - StateId nextstate; // Transition destination state -}; - - -// Arc with label and state Id type the same as template arg and with -// weights over the Gallic semiring w.r.t the output labels and weights of A. -template <class A, StringType S = STRING_LEFT> -struct GallicArc { - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef GallicWeight<Label, typename A::Weight, S> Weight; - - GallicArc() {} - - GallicArc(Label i, Label o, Weight w, StateId s) - : ilabel(i), olabel(o), weight(w), nextstate(s) {} - - GallicArc(const A &arc) - : ilabel(arc.ilabel), olabel(arc.ilabel), - weight(arc.olabel, arc.weight), nextstate(arc.nextstate) {} - - static const string &Type() { // Arc type name - static const string type = - (S == STRING_LEFT ? "gallic_" : - (S == STRING_RIGHT ? "right_gallic_" : - (S == STRING_LEFT_RESTRICT ? "restricted_gallic_" : - "right_restricted_gallic_"))) + A::Type(); - return type; - } - - Label ilabel; // Transition input label - Label olabel; // Transition output label - Weight weight; // Transition weight - StateId nextstate; // Transition destination state -}; - - -// Arc with the reverse of the weight found in its template arg. -template <class A> struct ReverseArc { - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::Weight AWeight; - typedef typename AWeight::ReverseWeight Weight; - typedef typename A::StateId StateId; - - ReverseArc(Label i, Label o, Weight w, StateId s) - : ilabel(i), olabel(o), weight(w), nextstate(s) {} - - ReverseArc() {} - - static const string &Type() { // Arc type name - static const string type = "reverse_" + Arc::Type(); - return type; - } - - Label ilabel; // Transition input label - Label olabel; // Transition output label - Weight weight; // Transition weight - StateId nextstate; // Transition destination state -}; - - -// Arc with integer labels and state Ids and lexicographic weights. -template<class W1, class W2> -struct LexicographicArc { - typedef int Label; - typedef LexicographicWeight<W1, W2> Weight; - typedef int StateId; - - LexicographicArc(Label i, Label o, Weight w, StateId s) - : ilabel(i), olabel(o), weight(w), nextstate(s) {} - - LexicographicArc() {} - - static const string &Type() { // Arc type name - static const string type = Weight::Type(); - return type; - } - - Label ilabel; // Transition input label - Label olabel; // Transition output label - Weight weight; // Transition weight - StateId nextstate; // Transition destination state -}; - - -// Arc with integer labels and state Ids and product weights. -template<class W1, class W2> -struct ProductArc { - typedef int Label; - typedef ProductWeight<W1, W2> Weight; - typedef int StateId; - - ProductArc(Label i, Label o, Weight w, StateId s) - : ilabel(i), olabel(o), weight(w), nextstate(s) {} - - ProductArc() {} - - static const string &Type() { // Arc type name - static const string type = Weight::Type(); - return type; - } - - Label ilabel; // Transition input label - Label olabel; // Transition output label - Weight weight; // Transition weight - StateId nextstate; // Transition destination state -}; - - -// Arc with label and state Id type the same as first template arg and with -// weights over the n-th cartesian power of the weight type of the -// template arg. -template <class A, unsigned int n> -struct PowerArc { - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef PowerWeight<typename A::Weight, n> Weight; - - PowerArc() {} - - PowerArc(Label i, Label o, Weight w, StateId s) - : ilabel(i), olabel(o), weight(w), nextstate(s) {} - - static const string &Type() { // Arc type name - static string type; - if (type.empty()) { - string power; - Int64ToStr(n, &power); - type = A::Type() + "_^" + power; - } - return type; - } - - Label ilabel; // Transition input label - Label olabel; // Transition output label - Weight weight; // Transition weight - StateId nextstate; // Transition destination state -}; - - -// Arc with label and state Id type the same as first template arg and with -// weights over the arbitrary cartesian power of the weight type. -template <class A, class K = int> -struct SparsePowerArc { - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef SparsePowerWeight<typename A::Weight, K> Weight; - - SparsePowerArc() {} - - SparsePowerArc(Label i, Label o, Weight w, StateId s) - : ilabel(i), olabel(o), weight(w), nextstate(s) {} - - static const string &Type() { // Arc type name - static string type; - if (type.empty()) { type = A::Type() + "_^n"; } - if(sizeof(K) != sizeof(uint32)) { - string size; - Int64ToStr(8 * sizeof(K), &size); - type += "_" + size; - } - return type; - } - - Label ilabel; // Transition input label - Label olabel; // Transition output label - Weight weight; // Transition weight - StateId nextstate; // Transition destination state -}; - - -// Arc with label and state Id type the same as first template arg and with -// expectation weight over the first template arg weight type and the -// second template arg. -template <class A, class X2> -struct ExpectationArc { - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::Weight X1; - typedef ExpectationWeight<X1, X2> Weight; - - ExpectationArc() {} - - ExpectationArc(Label i, Label o, Weight w, StateId s) - : ilabel(i), olabel(o), weight(w), nextstate(s) {} - - static const string &Type() { // Arc type name - static string type; - if (type.empty()) { - type = "expectation_" + A::Type() + "_" + X2::Type(); - } - return type; - } - - Label ilabel; // Transition input label - Label olabel; // Transition output label - Weight weight; // Transition weight - StateId nextstate; // Transition destination state -}; - -} // namespace fst - -#endif // FST_LIB_ARC_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/arcfilter.h b/kaldi_io/src/tools/openfst/include/fst/arcfilter.h deleted file mode 100644 index 179dc2c..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/arcfilter.h +++ /dev/null @@ -1,99 +0,0 @@ -// arcfilter.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Function objects to restrict which arcs are traversed in an FST. - -#ifndef FST_LIB_ARCFILTER_H__ -#define FST_LIB_ARCFILTER_H__ - - -#include <fst/fst.h> -#include <fst/util.h> - - -namespace fst { - -// True for all arcs. -template <class A> -class AnyArcFilter { -public: - bool operator()(const A &arc) const { return true; } -}; - - -// True for (input/output) epsilon arcs. -template <class A> -class EpsilonArcFilter { -public: - bool operator()(const A &arc) const { - return arc.ilabel == 0 && arc.olabel == 0; - } -}; - - -// True for input epsilon arcs. -template <class A> -class InputEpsilonArcFilter { -public: - bool operator()(const A &arc) const { - return arc.ilabel == 0; - } -}; - - -// True for output epsilon arcs. -template <class A> -class OutputEpsilonArcFilter { -public: - bool operator()(const A &arc) const { - return arc.olabel == 0; - } -}; - - -// True if specified labels match (don't match) when keep_match is -// true (false). -template <class A> -class MultiLabelArcFilter { -public: - typedef typename A::Label Label; - - MultiLabelArcFilter(bool match_input = true, bool keep_match = true) - : match_input_(match_input), - keep_match_(keep_match) {} - - - bool operator()(const A &arc) const { - Label label = match_input_ ? arc.ilabel : arc.olabel; - bool match = labels_.Find(label) != labels_.End(); - return keep_match_ ? match : !match; - } - - void AddLabel(Label label) { - labels_.Insert(label); - } - -private: - CompactSet<Label, kNoLabel> labels_; - bool match_input_; - bool keep_match_; -}; - -} // namespace fst - -#endif // FST_LIB_ARCFILTER_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/arcsort.h b/kaldi_io/src/tools/openfst/include/fst/arcsort.h deleted file mode 100644 index 37a51dc..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/arcsort.h +++ /dev/null @@ -1,217 +0,0 @@ -// arcsort.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Functions and classes to sort arcs in an FST. - -#ifndef FST_LIB_ARCSORT_H__ -#define FST_LIB_ARCSORT_H__ - -#include <algorithm> -#include <string> -#include <vector> -using std::vector; - -#include <fst/cache.h> -#include <fst/state-map.h> -#include <fst/test-properties.h> - - -namespace fst { - -template <class Arc, class Compare> -class ArcSortMapper { - public: - typedef Arc FromArc; - typedef Arc ToArc; - - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - - ArcSortMapper(const Fst<Arc> &fst, const Compare &comp) - : fst_(fst), comp_(comp), i_(0) {} - - // Allows updating Fst argument; pass only if changed. - ArcSortMapper(const ArcSortMapper<Arc, Compare> &mapper, - const Fst<Arc> *fst = 0) - : fst_(fst ? *fst : mapper.fst_), comp_(mapper.comp_), i_(0) {} - - StateId Start() { return fst_.Start(); } - Weight Final(StateId s) const { return fst_.Final(s); } - - void SetState(StateId s) { - i_ = 0; - arcs_.clear(); - arcs_.reserve(fst_.NumArcs(s)); - for (ArcIterator< Fst<Arc> > aiter(fst_, s); !aiter.Done(); aiter.Next()) - arcs_.push_back(aiter.Value()); - sort(arcs_.begin(), arcs_.end(), comp_); - } - - bool Done() const { return i_ >= arcs_.size(); } - const Arc &Value() const { return arcs_[i_]; } - void Next() { ++i_; } - - MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - uint64 Properties(uint64 props) const { return comp_.Properties(props); } - - private: - const Fst<Arc> &fst_; - const Compare &comp_; - vector<Arc> arcs_; - ssize_t i_; // current arc position - - void operator=(const ArcSortMapper<Arc, Compare> &); // disallow -}; - - -// Sorts the arcs in an FST according to function object 'comp' of -// type Compare. This version modifies its input. Comparison function -// objects ILabelCompare and OLabelCompare are provived by the -// library. In general, Compare must meet the requirements for an STL -// sort comparision function object. It must also have a member -// Properties(uint64) that specifies the known properties of the -// sorted FST; it takes as argument the input FST's known properties -// before the sort. -// -// Complexity: -// - Time: O(V D log D) -// - Space: O(D) -// where V = # of states and D = maximum out-degree. -template<class Arc, class Compare> -void ArcSort(MutableFst<Arc> *fst, Compare comp) { - ArcSortMapper<Arc, Compare> mapper(*fst, comp); - StateMap(fst, mapper); -} - -typedef CacheOptions ArcSortFstOptions; - -// Sorts the arcs in an FST according to function object 'comp' of -// type Compare. This version is a delayed Fst. Comparsion function -// objects ILabelCompare and OLabelCompare are provided by the -// library. In general, Compare must meet the requirements for an STL -// comparision function object (e.g. as used for STL sort). It must -// also have a member Properties(uint64) that specifies the known -// properties of the sorted FST; it takes as argument the input FST's -// known properties. -// -// Complexity: -// - Time: O(v d log d) -// - Space: O(d) -// where v = # of states visited, d = maximum out-degree of states -// visited. Constant time and space to visit an input state is assumed -// and exclusive of caching. -template <class A, class C> -class ArcSortFst : public StateMapFst<A, A, ArcSortMapper<A, C> > { - using StateMapFst<A, A, ArcSortMapper<A, C> >::GetImpl; - public: - typedef A Arc; - typedef typename Arc::StateId StateId; - typedef ArcSortMapper<A, C> M; - - ArcSortFst(const Fst<A> &fst, const C &comp) - : StateMapFst<A, A, M>(fst, ArcSortMapper<A, C>(fst, comp)) {} - - ArcSortFst(const Fst<A> &fst, const C &comp, const ArcSortFstOptions &opts) - : StateMapFst<A, A, M>(fst, ArcSortMapper<A, C>(fst, comp), opts) {} - - // See Fst<>::Copy() for doc. - ArcSortFst(const ArcSortFst<A, C> &fst, bool safe = false) - : StateMapFst<A, A, M>(fst, safe) {} - - // Get a copy of this ArcSortFst. See Fst<>::Copy() for further doc. - virtual ArcSortFst<A, C> *Copy(bool safe = false) const { - return new ArcSortFst(*this, safe); - } - - virtual size_t NumArcs(StateId s) const { - return GetImpl()->GetFst().NumArcs(s); - } - - virtual size_t NumInputEpsilons(StateId s) const { - return GetImpl()->GetFst().NumInputEpsilons(s); - } - - virtual size_t NumOutputEpsilons(StateId s) const { - return GetImpl()->GetFst().NumOutputEpsilons(s); - } -}; - - -// Specialization for ArcSortFst. -template <class A, class C> -class StateIterator< ArcSortFst<A, C> > - : public StateIterator< StateMapFst<A, A, ArcSortMapper<A, C> > > { - public: - explicit StateIterator(const ArcSortFst<A, C> &fst) - : StateIterator< StateMapFst<A, A, ArcSortMapper<A, C> > >(fst) {} -}; - - -// Specialization for ArcSortFst. -template <class A, class C> -class ArcIterator< ArcSortFst<A, C> > - : public ArcIterator< StateMapFst<A, A, ArcSortMapper<A, C> > > { - public: - ArcIterator(const ArcSortFst<A, C> &fst, typename A::StateId s) - : ArcIterator< StateMapFst<A, A, ArcSortMapper<A, C> > >(fst, s) {} -}; - - -// Compare class for comparing input labels of arcs. -template<class A> class ILabelCompare { - public: - bool operator() (A arc1, A arc2) const { - return arc1.ilabel < arc2.ilabel; - } - - uint64 Properties(uint64 props) const { - return (props & kArcSortProperties) | kILabelSorted | - (props & kAcceptor ? kOLabelSorted : 0); - } -}; - - -// Compare class for comparing output labels of arcs. -template<class A> class OLabelCompare { - public: - bool operator() (const A &arc1, const A &arc2) const { - return arc1.olabel < arc2.olabel; - } - - uint64 Properties(uint64 props) const { - return (props & kArcSortProperties) | kOLabelSorted | - (props & kAcceptor ? kILabelSorted : 0); - } -}; - - -// Useful aliases when using StdArc. -template<class C> class StdArcSortFst : public ArcSortFst<StdArc, C> { - public: - typedef StdArc Arc; - typedef C Compare; -}; - -typedef ILabelCompare<StdArc> StdILabelCompare; - -typedef OLabelCompare<StdArc> StdOLabelCompare; - -} // namespace fst - -#endif // FST_LIB_ARCSORT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/bi-table.h b/kaldi_io/src/tools/openfst/include/fst/bi-table.h deleted file mode 100644 index d220ce4..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/bi-table.h +++ /dev/null @@ -1,532 +0,0 @@ -// bi-table.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Classes for representing a bijective mapping between an arbitrary entry -// of type T and a signed integral ID. - -#ifndef FST_LIB_BI_TABLE_H__ -#define FST_LIB_BI_TABLE_H__ - -#include <deque> -using std::deque; -#include <functional> -#include <vector> -using std::vector; - -#include <tr1/unordered_set> -using std::tr1::unordered_set; -using std::tr1::unordered_multiset; - -namespace fst { - -// BI TABLES - these determine a bijective mapping between an -// arbitrary entry of type T and an signed integral ID of type I. The IDs are -// allocated starting from 0 in order. -// -// template <class I, class T> -// class BiTable { -// public: -// -// // Required constructors. -// BiTable(); -// -// // Lookup integer ID from entry. If it doesn't exist and 'insert' -// / is true, then add it. Otherwise return -1. -// I FindId(const T &entry, bool insert = true); -// // Lookup entry from integer ID. -// const T &FindEntry(I) const; -// // # of stored entries. -// I Size() const; -// }; - -// An implementation using a hash map for the entry to ID mapping. -// H is the hash function and E is the equality function. -// If passed to the constructor, ownership is given to this class. - -template <class I, class T, class H, class E = std::equal_to<T> > -class HashBiTable { - public: - // Reserves space for 'table_size' elements. - explicit HashBiTable(size_t table_size = 0, H *h = 0, E *e = 0) - : hash_func_(h), - hash_equal_(e), - entry2id_(table_size, (h ? *h : H()), (e ? *e : E())) { - if (table_size) - id2entry_.reserve(table_size); - } - - HashBiTable(const HashBiTable<I, T, H, E> &table) - : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0), - hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0), - entry2id_(table.entry2id_.begin(), table.entry2id_.end(), - table.entry2id_.size(), - (hash_func_ ? *hash_func_ : H()), - (hash_equal_ ? *hash_equal_ : E())), - id2entry_(table.id2entry_) { } - - ~HashBiTable() { - delete hash_func_; - delete hash_equal_; - } - - I FindId(const T &entry, bool insert = true) { - I &id_ref = entry2id_[entry]; - if (id_ref == 0) { // T not found - if (insert) { // store and assign it a new ID - id2entry_.push_back(entry); - id_ref = id2entry_.size(); - } else { - return -1; - } - } - return id_ref - 1; // NB: id_ref = ID + 1 - } - - const T &FindEntry(I s) const { - return id2entry_[s]; - } - - I Size() const { return id2entry_.size(); } - - private: - H *hash_func_; - E *hash_equal_; - unordered_map<T, I, H, E> entry2id_; - vector<T> id2entry_; - - void operator=(const HashBiTable<I, T, H, E> &table); // disallow -}; - - -// Enables alternative hash set representations below. -// typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType; -typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType; - -// Default hash set is STL hash_set -template<class K, class H, class E, HSType> -struct HashSet : public unordered_set<K, H, E> { - HashSet(size_t n = 0, const H &h = H(), const E &e = E()) - : unordered_set<K, H, E>(n, h, e) { } - - void rehash(size_t n) { } -}; - - -// An implementation using a hash set for the entry to ID mapping. -// The hash set holds 'keys' which are either the ID or kCurrentKey. -// These keys can be mapped to entrys either by looking up in the -// entry vector or, if kCurrentKey, in current_entry_ member. The hash -// and key equality functions map to entries first. H -// is the hash function and E is the equality function. If passed to -// the constructor, ownership is given to this class. -template <class I, class T, class H, - class E = std::equal_to<T>, HSType HS = HS_DENSE> -class CompactHashBiTable { - public: - friend class HashFunc; - friend class HashEqual; - - // Reserves space for 'table_size' elements. - explicit CompactHashBiTable(size_t table_size = 0, H *h = 0, E *e = 0) - : hash_func_(h), - hash_equal_(e), - compact_hash_func_(*this), - compact_hash_equal_(*this), - keys_(table_size, compact_hash_func_, compact_hash_equal_) { - if (table_size) - id2entry_.reserve(table_size); - } - - CompactHashBiTable(const CompactHashBiTable<I, T, H, E, HS> &table) - : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0), - hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0), - compact_hash_func_(*this), - compact_hash_equal_(*this), - keys_(table.keys_.size(), compact_hash_func_, compact_hash_equal_), - id2entry_(table.id2entry_) { - keys_.insert(table.keys_.begin(), table.keys_.end()); - } - - ~CompactHashBiTable() { - delete hash_func_; - delete hash_equal_; - } - - I FindId(const T &entry, bool insert = true) { - current_entry_ = &entry; - typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); - if (it == keys_.end()) { // T not found - if (insert) { // store and assign it a new ID - I key = id2entry_.size(); - id2entry_.push_back(entry); - keys_.insert(key); - return key; - } else { - return -1; - } - } else { - return *it; - } - } - - const T &FindEntry(I s) const { return id2entry_[s]; } - - I Size() const { return id2entry_.size(); } - - // Clear content. With argument, erases last n IDs. - void Clear(ssize_t n = -1) { - if (n < 0 || n > id2entry_.size()) - n = id2entry_.size(); - while (n-- > 0) { - I key = id2entry_.size() - 1; - keys_.erase(key); - id2entry_.pop_back(); - } - keys_.rehash(0); - } - - private: - static const I kCurrentKey; // -1 - static const I kEmptyKey; // -2 - static const I kDeletedKey; // -3 - - class HashFunc { - public: - HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {} - - size_t operator()(I k) const { - if (k >= kCurrentKey) { - return (*ht_->hash_func_)(ht_->Key2Entry(k)); - } else { - return 0; - } - } - - private: - const CompactHashBiTable *ht_; - }; - - class HashEqual { - public: - HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {} - - bool operator()(I k1, I k2) const { - if (k1 >= kCurrentKey && k2 >= kCurrentKey) { - return (*ht_->hash_equal_)(ht_->Key2Entry(k1), ht_->Key2Entry(k2)); - } else { - return k1 == k2; - } - } - private: - const CompactHashBiTable *ht_; - }; - - typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet; - - const T &Key2Entry(I k) const { - if (k == kCurrentKey) - return *current_entry_; - else - return id2entry_[k]; - } - - H *hash_func_; - E *hash_equal_; - HashFunc compact_hash_func_; - HashEqual compact_hash_equal_; - KeyHashSet keys_; - vector<T> id2entry_; - const T *current_entry_; - - void operator=(const CompactHashBiTable<I, T, H, E, HS> &table); // disallow -}; - - -template <class I, class T, class H, class E, HSType HS> -const I CompactHashBiTable<I, T, H, E, HS>::kCurrentKey = -1; - -template <class I, class T, class H, class E, HSType HS> -const I CompactHashBiTable<I, T, H, E, HS>::kEmptyKey = -2; - -template <class I, class T, class H, class E, HSType HS> -const I CompactHashBiTable<I, T, H, E, HS>::kDeletedKey = -3; - - -// An implementation using a vector for the entry to ID mapping. -// It is passed a function object FP that should fingerprint entries -// uniquely to an integer that can used as a vector index. Normally, -// VectorBiTable constructs the FP object. The user can instead -// pass in this object; in that case, VectorBiTable takes its -// ownership. -template <class I, class T, class FP> -class VectorBiTable { - public: - // Reserves space for 'table_size' elements. - explicit VectorBiTable(FP *fp = 0, size_t table_size = 0) - : fp_(fp ? fp : new FP()) { - if (table_size) - id2entry_.reserve(table_size); - } - - VectorBiTable(const VectorBiTable<I, T, FP> &table) - : fp_(table.fp_ ? new FP(*table.fp_) : 0), - fp2id_(table.fp2id_), - id2entry_(table.id2entry_) { } - - ~VectorBiTable() { delete fp_; } - - I FindId(const T &entry, bool insert = true) { - ssize_t fp = (*fp_)(entry); - if (fp >= fp2id_.size()) - fp2id_.resize(fp + 1); - I &id_ref = fp2id_[fp]; - if (id_ref == 0) { // T not found - if (insert) { // store and assign it a new ID - id2entry_.push_back(entry); - id_ref = id2entry_.size(); - } else { - return -1; - } - } - return id_ref - 1; // NB: id_ref = ID + 1 - } - - const T &FindEntry(I s) const { return id2entry_[s]; } - - I Size() const { return id2entry_.size(); } - - const FP &Fingerprint() const { return *fp_; } - - private: - FP *fp_; - vector<I> fp2id_; - vector<T> id2entry_; - - void operator=(const VectorBiTable<I, T, FP> &table); // disallow -}; - - -// An implementation using a vector and a compact hash table. The -// selecting functor S returns true for entries to be hashed in the -// vector. The fingerprinting functor FP returns a unique fingerprint -// for each entry to be hashed in the vector (these need to be -// suitable for indexing in a vector). The hash functor H is used -// when hashing entry into the compact hash table. If passed to the -// constructor, ownership is given to this class. -template <class I, class T, class S, class FP, class H, HSType HS = HS_DENSE> -class VectorHashBiTable { - public: - friend class HashFunc; - friend class HashEqual; - - explicit VectorHashBiTable(S *s, FP *fp = 0, H *h = 0, - size_t vector_size = 0, - size_t entry_size = 0) - : selector_(s), - fp_(fp ? fp : new FP()), - h_(h ? h : new H()), - hash_func_(*this), - hash_equal_(*this), - keys_(0, hash_func_, hash_equal_) { - if (vector_size) - fp2id_.reserve(vector_size); - if (entry_size) - id2entry_.reserve(entry_size); - } - - VectorHashBiTable(const VectorHashBiTable<I, T, S, FP, H, HS> &table) - : selector_(new S(table.s_)), - fp_(table.fp_ ? new FP(*table.fp_) : 0), - h_(table.h_ ? new H(*table.h_) : 0), - id2entry_(table.id2entry_), - fp2id_(table.fp2id_), - hash_func_(*this), - hash_equal_(*this), - keys_(table.keys_.size(), hash_func_, hash_equal_) { - keys_.insert(table.keys_.begin(), table.keys_.end()); - } - - ~VectorHashBiTable() { - delete selector_; - delete fp_; - delete h_; - } - - I FindId(const T &entry, bool insert = true) { - if ((*selector_)(entry)) { // Use the vector if 'selector_(entry) == true' - uint64 fp = (*fp_)(entry); - if (fp2id_.size() <= fp) - fp2id_.resize(fp + 1, 0); - if (fp2id_[fp] == 0) { // T not found - if (insert) { // store and assign it a new ID - id2entry_.push_back(entry); - fp2id_[fp] = id2entry_.size(); - } else { - return -1; - } - } - return fp2id_[fp] - 1; // NB: assoc_value = ID + 1 - } else { // Use the hash table otherwise. - current_entry_ = &entry; - typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); - if (it == keys_.end()) { - if (insert) { - I key = id2entry_.size(); - id2entry_.push_back(entry); - keys_.insert(key); - return key; - } else { - return -1; - } - } else { - return *it; - } - } - } - - const T &FindEntry(I s) const { - return id2entry_[s]; - } - - I Size() const { return id2entry_.size(); } - - const S &Selector() const { return *selector_; } - - const FP &Fingerprint() const { return *fp_; } - - const H &Hash() const { return *h_; } - - private: - static const I kCurrentKey; // -1 - static const I kEmptyKey; // -2 - - class HashFunc { - public: - HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {} - - size_t operator()(I k) const { - if (k >= kCurrentKey) { - return (*(ht_->h_))(ht_->Key2Entry(k)); - } else { - return 0; - } - } - private: - const VectorHashBiTable *ht_; - }; - - class HashEqual { - public: - HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {} - - bool operator()(I k1, I k2) const { - if (k1 >= kCurrentKey && k2 >= kCurrentKey) { - return ht_->Key2Entry(k1) == ht_->Key2Entry(k2); - } else { - return k1 == k2; - } - } - private: - const VectorHashBiTable *ht_; - }; - - typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet; - - const T &Key2Entry(I k) const { - if (k == kCurrentKey) - return *current_entry_; - else - return id2entry_[k]; - } - - S *selector_; // Returns true if entry hashed into vector - FP *fp_; // Fingerprint used when hashing entry into vector - H *h_; // Hash function used when hashing entry into hash_set - - vector<T> id2entry_; // Maps state IDs to entry - vector<I> fp2id_; // Maps entry fingerprints to IDs - - // Compact implementation of the hash table mapping entrys to - // state IDs using the hash function 'h_' - HashFunc hash_func_; - HashEqual hash_equal_; - KeyHashSet keys_; - const T *current_entry_; - - // disallow - void operator=(const VectorHashBiTable<I, T, S, FP, H, HS> &table); -}; - -template <class I, class T, class S, class FP, class H, HSType HS> -const I VectorHashBiTable<I, T, S, FP, H, HS>::kCurrentKey = -1; - -template <class I, class T, class S, class FP, class H, HSType HS> -const I VectorHashBiTable<I, T, S, FP, H, HS>::kEmptyKey = -3; - - -// An implementation using a hash map for the entry to ID -// mapping. This version permits erasing of arbitrary states. The -// entry T must have == defined and its default constructor must -// produce a entry that will never be seen. F is the hash function. -template <class I, class T, class F> -class ErasableBiTable { - public: - ErasableBiTable() : first_(0) {} - - I FindId(const T &entry, bool insert = true) { - I &id_ref = entry2id_[entry]; - if (id_ref == 0) { // T not found - if (insert) { // store and assign it a new ID - id2entry_.push_back(entry); - id_ref = id2entry_.size() + first_; - } else { - return -1; - } - } - return id_ref - 1; // NB: id_ref = ID + 1 - } - - const T &FindEntry(I s) const { return id2entry_[s - first_]; } - - I Size() const { return id2entry_.size(); } - - void Erase(I s) { - T &entry = id2entry_[s - first_]; - typename unordered_map<T, I, F>::iterator it = - entry2id_.find(entry); - entry2id_.erase(it); - id2entry_[s - first_] = empty_entry_; - while (!id2entry_.empty() && id2entry_.front() == empty_entry_) { - id2entry_.pop_front(); - ++first_; - } - } - - private: - unordered_map<T, I, F> entry2id_; - deque<T> id2entry_; - const T empty_entry_; - I first_; // I of first element in the deque; - - // disallow - void operator=(const ErasableBiTable<I, T, F> &table); //disallow -}; - -} // namespace fst - -#endif // FST_LIB_BI_TABLE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/cache.h b/kaldi_io/src/tools/openfst/include/fst/cache.h deleted file mode 100644 index 7c96fe1..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/cache.h +++ /dev/null @@ -1,861 +0,0 @@ -// cache.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// An Fst implementation that caches FST elements of a delayed -// computation. - -#ifndef FST_LIB_CACHE_H__ -#define FST_LIB_CACHE_H__ - -#include <vector> -using std::vector; -#include <list> - -#include <fst/vector-fst.h> - - -DECLARE_bool(fst_default_cache_gc); -DECLARE_int64(fst_default_cache_gc_limit); - -namespace fst { - -struct CacheOptions { - bool gc; // enable GC - size_t gc_limit; // # of bytes allowed before GC - - CacheOptions(bool g, size_t l) : gc(g), gc_limit(l) {} - CacheOptions() - : gc(FLAGS_fst_default_cache_gc), - gc_limit(FLAGS_fst_default_cache_gc_limit) {} -}; - -// A CacheStateAllocator allocates and frees CacheStates -// template <class S> -// struct CacheStateAllocator { -// S *Allocate(StateId s); -// void Free(S *state, StateId s); -// }; -// - -// A simple allocator class, can be overridden as needed, -// maintains a single entry cache. -template <class S> -struct DefaultCacheStateAllocator { - typedef typename S::Arc::StateId StateId; - - DefaultCacheStateAllocator() : mru_(NULL) { } - - ~DefaultCacheStateAllocator() { - delete mru_; - } - - S *Allocate(StateId s) { - if (mru_) { - S *state = mru_; - mru_ = NULL; - state->Reset(); - return state; - } - return new S(); - } - - void Free(S *state, StateId s) { - if (mru_) { - delete mru_; - } - mru_ = state; - } - - private: - S *mru_; -}; - -// VectorState but additionally has a flags data member (see -// CacheState below). This class is used to cache FST elements with -// the flags used to indicate what has been cached. Use HasStart() -// HasFinal(), and HasArcs() to determine if cached and SetStart(), -// SetFinal(), AddArc(), (or PushArc() and SetArcs()) to cache. Note -// you must set the final weight even if the state is non-final to -// mark it as cached. If the 'gc' option is 'false', cached items have -// the extent of the FST - minimizing computation. If the 'gc' option -// is 'true', garbage collection of states (not in use in an arc -// iterator and not 'protected') is performed, in a rough -// approximation of LRU order, when 'gc_limit' bytes is reached - -// controlling memory use. When 'gc_limit' is 0, special optimizations -// apply - minimizing memory use. - -template <class S, class C = DefaultCacheStateAllocator<S> > -class CacheBaseImpl : public VectorFstBaseImpl<S> { - public: - typedef S State; - typedef C Allocator; - typedef typename State::Arc Arc; - typedef typename Arc::Weight Weight; - typedef typename Arc::StateId StateId; - - using FstImpl<Arc>::Type; - using FstImpl<Arc>::Properties; - using FstImpl<Arc>::SetProperties; - using VectorFstBaseImpl<State>::NumStates; - using VectorFstBaseImpl<State>::Start; - using VectorFstBaseImpl<State>::AddState; - using VectorFstBaseImpl<State>::SetState; - using VectorFstBaseImpl<State>::ReserveStates; - - explicit CacheBaseImpl(C *allocator = 0) - : cache_start_(false), nknown_states_(0), min_unexpanded_state_id_(0), - cache_first_state_id_(kNoStateId), cache_first_state_(0), - cache_gc_(FLAGS_fst_default_cache_gc), cache_size_(0), - cache_limit_(FLAGS_fst_default_cache_gc_limit > kMinCacheLimit || - FLAGS_fst_default_cache_gc_limit == 0 ? - FLAGS_fst_default_cache_gc_limit : kMinCacheLimit), - protect_(false) { - allocator_ = allocator ? allocator : new C(); - } - - explicit CacheBaseImpl(const CacheOptions &opts, C *allocator = 0) - : cache_start_(false), nknown_states_(0), - min_unexpanded_state_id_(0), cache_first_state_id_(kNoStateId), - cache_first_state_(0), cache_gc_(opts.gc), cache_size_(0), - cache_limit_(opts.gc_limit > kMinCacheLimit || opts.gc_limit == 0 ? - opts.gc_limit : kMinCacheLimit), - protect_(false) { - allocator_ = allocator ? allocator : new C(); - } - - // Preserve gc parameters. If preserve_cache true, also preserves - // cache data. - CacheBaseImpl(const CacheBaseImpl<S, C> &impl, bool preserve_cache = false) - : VectorFstBaseImpl<S>(), cache_start_(false), nknown_states_(0), - min_unexpanded_state_id_(0), cache_first_state_id_(kNoStateId), - cache_first_state_(0), cache_gc_(impl.cache_gc_), cache_size_(0), - cache_limit_(impl.cache_limit_), - protect_(impl.protect_) { - allocator_ = new C(); - if (preserve_cache) { - cache_start_ = impl.cache_start_; - nknown_states_ = impl.nknown_states_; - expanded_states_ = impl.expanded_states_; - min_unexpanded_state_id_ = impl.min_unexpanded_state_id_; - if (impl.cache_first_state_id_ != kNoStateId) { - cache_first_state_id_ = impl.cache_first_state_id_; - cache_first_state_ = allocator_->Allocate(cache_first_state_id_); - *cache_first_state_ = *impl.cache_first_state_; - } - cache_states_ = impl.cache_states_; - cache_size_ = impl.cache_size_; - ReserveStates(impl.NumStates()); - for (StateId s = 0; s < impl.NumStates(); ++s) { - const S *state = - static_cast<const VectorFstBaseImpl<S> &>(impl).GetState(s); - if (state) { - S *copied_state = allocator_->Allocate(s); - *copied_state = *state; - AddState(copied_state); - } else { - AddState(0); - } - } - VectorFstBaseImpl<S>::SetStart(impl.Start()); - } - } - - ~CacheBaseImpl() { - allocator_->Free(cache_first_state_, cache_first_state_id_); - delete allocator_; - } - - // Gets a state from its ID; state must exist. - const S *GetState(StateId s) const { - if (s == cache_first_state_id_) - return cache_first_state_; - else - return VectorFstBaseImpl<S>::GetState(s); - } - - // Gets a state from its ID; state must exist. - S *GetState(StateId s) { - if (s == cache_first_state_id_) - return cache_first_state_; - else - return VectorFstBaseImpl<S>::GetState(s); - } - - // Gets a state from its ID; return 0 if it doesn't exist. - const S *CheckState(StateId s) const { - if (s == cache_first_state_id_) - return cache_first_state_; - else if (s < NumStates()) - return VectorFstBaseImpl<S>::GetState(s); - else - return 0; - } - - // Gets a state from its ID; add it if necessary. - S *ExtendState(StateId s); - - void SetStart(StateId s) { - VectorFstBaseImpl<S>::SetStart(s); - cache_start_ = true; - if (s >= nknown_states_) - nknown_states_ = s + 1; - } - - void SetFinal(StateId s, Weight w) { - S *state = ExtendState(s); - state->final = w; - state->flags |= kCacheFinal | kCacheRecent | kCacheModified; - } - - // AddArc adds a single arc to state s and does incremental cache - // book-keeping. For efficiency, prefer PushArc and SetArcs below - // when possible. - void AddArc(StateId s, const Arc &arc) { - S *state = ExtendState(s); - state->arcs.push_back(arc); - if (arc.ilabel == 0) { - ++state->niepsilons; - } - if (arc.olabel == 0) { - ++state->noepsilons; - } - const Arc *parc = state->arcs.empty() ? 0 : &(state->arcs.back()); - SetProperties(AddArcProperties(Properties(), s, arc, parc)); - state->flags |= kCacheModified; - if (cache_gc_ && s != cache_first_state_id_ && - !(state->flags & kCacheProtect)) { - cache_size_ += sizeof(Arc); - if (cache_size_ > cache_limit_) - GC(s, false); - } - } - - // Adds a single arc to state s but delays cache book-keeping. - // SetArcs must be called when all PushArc calls at a state are - // complete. Do not mix with calls to AddArc. - void PushArc(StateId s, const Arc &arc) { - S *state = ExtendState(s); - state->arcs.push_back(arc); - } - - // Marks arcs of state s as cached and does cache book-keeping after all - // calls to PushArc have been completed. Do not mix with calls to AddArc. - void SetArcs(StateId s) { - S *state = ExtendState(s); - vector<Arc> &arcs = state->arcs; - state->niepsilons = state->noepsilons = 0; - for (size_t a = 0; a < arcs.size(); ++a) { - const Arc &arc = arcs[a]; - if (arc.nextstate >= nknown_states_) - nknown_states_ = arc.nextstate + 1; - if (arc.ilabel == 0) - ++state->niepsilons; - if (arc.olabel == 0) - ++state->noepsilons; - } - ExpandedState(s); - state->flags |= kCacheArcs | kCacheRecent | kCacheModified; - if (cache_gc_ && s != cache_first_state_id_ && - !(state->flags & kCacheProtect)) { - cache_size_ += arcs.capacity() * sizeof(Arc); - if (cache_size_ > cache_limit_) - GC(s, false); - } - }; - - void ReserveArcs(StateId s, size_t n) { - S *state = ExtendState(s); - state->arcs.reserve(n); - } - - void DeleteArcs(StateId s, size_t n) { - S *state = ExtendState(s); - const vector<Arc> &arcs = state->arcs; - for (size_t i = 0; i < n; ++i) { - size_t j = arcs.size() - i - 1; - if (arcs[j].ilabel == 0) - --state->niepsilons; - if (arcs[j].olabel == 0) - --state->noepsilons; - } - - state->arcs.resize(arcs.size() - n); - SetProperties(DeleteArcsProperties(Properties())); - state->flags |= kCacheModified; - if (cache_gc_ && s != cache_first_state_id_ && - !(state->flags & kCacheProtect)) { - cache_size_ -= n * sizeof(Arc); - } - } - - void DeleteArcs(StateId s) { - S *state = ExtendState(s); - size_t n = state->arcs.size(); - state->niepsilons = 0; - state->noepsilons = 0; - state->arcs.clear(); - SetProperties(DeleteArcsProperties(Properties())); - state->flags |= kCacheModified; - if (cache_gc_ && s != cache_first_state_id_ && - !(state->flags & kCacheProtect)) { - cache_size_ -= n * sizeof(Arc); - } - } - - void DeleteStates(const vector<StateId> &dstates) { - size_t old_num_states = NumStates(); - vector<StateId> newid(old_num_states, 0); - for (size_t i = 0; i < dstates.size(); ++i) - newid[dstates[i]] = kNoStateId; - StateId nstates = 0; - for (StateId s = 0; s < old_num_states; ++s) { - if (newid[s] != kNoStateId) { - newid[s] = nstates; - ++nstates; - } - } - // just for states_.resize(), does unnecessary walk. - VectorFstBaseImpl<S>::DeleteStates(dstates); - SetProperties(DeleteStatesProperties(Properties())); - // Update list of cached states. - typename list<StateId>::iterator siter = cache_states_.begin(); - while (siter != cache_states_.end()) { - if (newid[*siter] != kNoStateId) { - *siter = newid[*siter]; - ++siter; - } else { - cache_states_.erase(siter++); - } - } - } - - void DeleteStates() { - cache_states_.clear(); - allocator_->Free(cache_first_state_, cache_first_state_id_); - for (int s = 0; s < NumStates(); ++s) { - allocator_->Free(VectorFstBaseImpl<S>::GetState(s), s); - SetState(s, 0); - } - nknown_states_ = 0; - min_unexpanded_state_id_ = 0; - cache_first_state_id_ = kNoStateId; - cache_first_state_ = 0; - cache_size_ = 0; - cache_start_ = false; - VectorFstBaseImpl<State>::DeleteStates(); - SetProperties(DeleteAllStatesProperties(Properties(), - kExpanded | kMutable)); - } - - // Is the start state cached? - bool HasStart() const { - if (!cache_start_ && Properties(kError)) - cache_start_ = true; - return cache_start_; - } - - // Is the final weight of state s cached? - bool HasFinal(StateId s) const { - const S *state = CheckState(s); - if (state && state->flags & kCacheFinal) { - state->flags |= kCacheRecent; - return true; - } else { - return false; - } - } - - // Are arcs of state s cached? - bool HasArcs(StateId s) const { - const S *state = CheckState(s); - if (state && state->flags & kCacheArcs) { - state->flags |= kCacheRecent; - return true; - } else { - return false; - } - } - - Weight Final(StateId s) const { - const S *state = GetState(s); - return state->final; - } - - size_t NumArcs(StateId s) const { - const S *state = GetState(s); - return state->arcs.size(); - } - - size_t NumInputEpsilons(StateId s) const { - const S *state = GetState(s); - return state->niepsilons; - } - - size_t NumOutputEpsilons(StateId s) const { - const S *state = GetState(s); - return state->noepsilons; - } - - // Provides information needed for generic arc iterator. - void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { - const S *state = GetState(s); - data->base = 0; - data->narcs = state->arcs.size(); - data->arcs = data->narcs > 0 ? &(state->arcs[0]) : 0; - data->ref_count = &(state->ref_count); - ++(*data->ref_count); - } - - // Number of known states. - StateId NumKnownStates() const { return nknown_states_; } - - // Update number of known states taking in account the existence of state s. - void UpdateNumKnownStates(StateId s) { - if (s >= nknown_states_) - nknown_states_ = s + 1; - } - - // Find the mininum never-expanded state Id - StateId MinUnexpandedState() const { - while (min_unexpanded_state_id_ < expanded_states_.size() && - expanded_states_[min_unexpanded_state_id_]) - ++min_unexpanded_state_id_; - return min_unexpanded_state_id_; - } - - // Removes from cache_states_ and uncaches (not referenced-counted - // or protected) states that have not been accessed since the last - // GC until at most cache_fraction * cache_limit_ bytes are cached. - // If that fails to free enough, recurs uncaching recently visited - // states as well. If still unable to free enough memory, then - // widens cache_limit_ to fulfill condition. - void GC(StateId current, bool free_recent, float cache_fraction = 0.666); - - // Setc/clears GC protection: if true, new states are protected - // from garbage collection. - void GCProtect(bool on) { protect_ = on; } - - void ExpandedState(StateId s) { - if (s < min_unexpanded_state_id_) - return; - while (expanded_states_.size() <= s) - expanded_states_.push_back(false); - expanded_states_[s] = true; - } - - C *GetAllocator() const { - return allocator_; - } - - // Caching on/off switch, limit and size accessors. - bool GetCacheGc() const { return cache_gc_; } - size_t GetCacheLimit() const { return cache_limit_; } - size_t GetCacheSize() const { return cache_size_; } - - private: - static const size_t kMinCacheLimit = 8096; // Minimum (non-zero) cache limit - - static const uint32 kCacheFinal = 0x0001; // Final weight has been cached - static const uint32 kCacheArcs = 0x0002; // Arcs have been cached - static const uint32 kCacheRecent = 0x0004; // Mark as visited since GC - static const uint32 kCacheProtect = 0x0008; // Mark state as GC protected - - public: - static const uint32 kCacheModified = 0x0010; // Mark state as modified - static const uint32 kCacheFlags = kCacheFinal | kCacheArcs | kCacheRecent - | kCacheProtect | kCacheModified; - - private: - C *allocator_; // used to allocate new states - mutable bool cache_start_; // Is the start state cached? - StateId nknown_states_; // # of known states - vector<bool> expanded_states_; // states that have been expanded - mutable StateId min_unexpanded_state_id_; // minimum never-expanded state Id - StateId cache_first_state_id_; // First cached state id - S *cache_first_state_; // First cached state - list<StateId> cache_states_; // list of currently cached states - bool cache_gc_; // enable GC - size_t cache_size_; // # of bytes cached - size_t cache_limit_; // # of bytes allowed before GC - bool protect_; // Protect new states from GC - - void operator=(const CacheBaseImpl<S, C> &impl); // disallow -}; - -// Gets a state from its ID; add it if necessary. -template <class S, class C> -S *CacheBaseImpl<S, C>::ExtendState(typename S::Arc::StateId s) { - // If 'protect_' true and a new state, protects from garbage collection. - if (s == cache_first_state_id_) { - return cache_first_state_; // Return 1st cached state - } else if (cache_limit_ == 0 && cache_first_state_id_ == kNoStateId) { - cache_first_state_id_ = s; // Remember 1st cached state - cache_first_state_ = allocator_->Allocate(s); - if (protect_) cache_first_state_->flags |= kCacheProtect; - return cache_first_state_; - } else if (cache_first_state_id_ != kNoStateId && - cache_first_state_->ref_count == 0 && - !(cache_first_state_->flags & kCacheProtect)) { - // With Default allocator, the Free and Allocate will reuse the same S*. - allocator_->Free(cache_first_state_, cache_first_state_id_); - cache_first_state_id_ = s; - cache_first_state_ = allocator_->Allocate(s); - if (protect_) cache_first_state_->flags |= kCacheProtect; - return cache_first_state_; // Return 1st cached state - } else { - while (NumStates() <= s) // Add state to main cache - AddState(0); - S *state = VectorFstBaseImpl<S>::GetState(s); - if (!state) { - state = allocator_->Allocate(s); - if (protect_) state->flags |= kCacheProtect; - SetState(s, state); - if (cache_first_state_id_ != kNoStateId) { // Forget 1st cached state - while (NumStates() <= cache_first_state_id_) - AddState(0); - SetState(cache_first_state_id_, cache_first_state_); - if (cache_gc_ && !(cache_first_state_->flags & kCacheProtect)) { - cache_states_.push_back(cache_first_state_id_); - cache_size_ += sizeof(S) + - cache_first_state_->arcs.capacity() * sizeof(Arc); - } - cache_limit_ = kMinCacheLimit; - cache_first_state_id_ = kNoStateId; - cache_first_state_ = 0; - } - if (cache_gc_ && !protect_) { - cache_states_.push_back(s); - cache_size_ += sizeof(S); - if (cache_size_ > cache_limit_) - GC(s, false); - } - } - return state; - } -} - -// Removes from cache_states_ and uncaches (not referenced-counted or -// protected) states that have not been accessed since the last GC -// until at most cache_fraction * cache_limit_ bytes are cached. If -// that fails to free enough, recurs uncaching recently visited states -// as well. If still unable to free enough memory, then widens cache_limit_ -// to fulfill condition. -template <class S, class C> -void CacheBaseImpl<S, C>::GC(typename S::Arc::StateId current, - bool free_recent, float cache_fraction) { - if (!cache_gc_) - return; - VLOG(2) << "CacheImpl: Enter GC: object = " << Type() << "(" << this - << "), free recently cached = " << free_recent - << ", cache size = " << cache_size_ - << ", cache frac = " << cache_fraction - << ", cache limit = " << cache_limit_ << "\n"; - typename list<StateId>::iterator siter = cache_states_.begin(); - - size_t cache_target = cache_fraction * cache_limit_; - while (siter != cache_states_.end()) { - StateId s = *siter; - S* state = VectorFstBaseImpl<S>::GetState(s); - if (cache_size_ > cache_target && state->ref_count == 0 && - (free_recent || !(state->flags & kCacheRecent)) && s != current) { - cache_size_ -= sizeof(S) + state->arcs.capacity() * sizeof(Arc); - allocator_->Free(state, s); - SetState(s, 0); - cache_states_.erase(siter++); - } else { - state->flags &= ~kCacheRecent; - ++siter; - } - } - if (!free_recent && cache_size_ > cache_target) { // recurses on recent - GC(current, true); - } else if (cache_target > 0) { // widens cache limit - while (cache_size_ > cache_target) { - cache_limit_ *= 2; - cache_target *= 2; - } - } else if (cache_size_ > 0) { - FSTERROR() << "CacheImpl:GC: Unable to free all cached states"; - } - VLOG(2) << "CacheImpl: Exit GC: object = " << Type() << "(" << this - << "), free recently cached = " << free_recent - << ", cache size = " << cache_size_ - << ", cache frac = " << cache_fraction - << ", cache limit = " << cache_limit_ << "\n"; -} - -template <class S, class C> const uint32 CacheBaseImpl<S, C>::kCacheFinal; -template <class S, class C> const uint32 CacheBaseImpl<S, C>::kCacheArcs; -template <class S, class C> const uint32 CacheBaseImpl<S, C>::kCacheRecent; -template <class S, class C> const uint32 CacheBaseImpl<S, C>::kCacheModified; -template <class S, class C> const size_t CacheBaseImpl<S, C>::kMinCacheLimit; - -// Arcs implemented by an STL vector per state. Similar to VectorState -// but adds flags and ref count to keep track of what has been cached. -template <class A> -struct CacheState { - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - CacheState() : final(Weight::Zero()), flags(0), ref_count(0) {} - - void Reset() { - flags = 0; - ref_count = 0; - arcs.resize(0); - } - - Weight final; // Final weight - vector<A> arcs; // Arcs represenation - size_t niepsilons; // # of input epsilons - size_t noepsilons; // # of output epsilons - mutable uint32 flags; - mutable int ref_count; -}; - -// A CacheBaseImpl with a commonly used CacheState. -template <class A> -class CacheImpl : public CacheBaseImpl< CacheState<A> > { - public: - typedef CacheState<A> State; - - CacheImpl() {} - - explicit CacheImpl(const CacheOptions &opts) - : CacheBaseImpl< CacheState<A> >(opts) {} - - CacheImpl(const CacheImpl<A> &impl, bool preserve_cache = false) - : CacheBaseImpl<State>(impl, preserve_cache) {} - - private: - void operator=(const CacheImpl<State> &impl); // disallow -}; - - -// Use this to make a state iterator for a CacheBaseImpl-derived Fst, -// which must have type 'State' defined. Note this iterator only -// returns those states reachable from the initial state, so consider -// implementing a class-specific one. -template <class F> -class CacheStateIterator : public StateIteratorBase<typename F::Arc> { - public: - typedef typename F::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename F::State State; - typedef CacheBaseImpl<State> Impl; - - CacheStateIterator(const F &fst, Impl *impl) - : fst_(fst), impl_(impl), s_(0) { - fst_.Start(); // force start state - } - - bool Done() const { - if (s_ < impl_->NumKnownStates()) - return false; - if (s_ < impl_->NumKnownStates()) - return false; - for (StateId u = impl_->MinUnexpandedState(); - u < impl_->NumKnownStates(); - u = impl_->MinUnexpandedState()) { - // force state expansion - ArcIterator<F> aiter(fst_, u); - aiter.SetFlags(kArcValueFlags, kArcValueFlags | kArcNoCache); - for (; !aiter.Done(); aiter.Next()) - impl_->UpdateNumKnownStates(aiter.Value().nextstate); - impl_->ExpandedState(u); - if (s_ < impl_->NumKnownStates()) - return false; - } - return true; - } - - StateId Value() const { return s_; } - - void Next() { ++s_; } - - void Reset() { s_ = 0; } - - private: - // This allows base class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual bool Done_() const { return Done(); } - virtual StateId Value_() const { return Value(); } - virtual void Next_() { Next(); } - virtual void Reset_() { Reset(); } - - const F &fst_; - Impl *impl_; - StateId s_; -}; - - -// Use this to make an arc iterator for a CacheBaseImpl-derived Fst, -// which must have types 'Arc' and 'State' defined. -template <class F, - class C = DefaultCacheStateAllocator<CacheState<typename F::Arc> > > -class CacheArcIterator { - public: - typedef typename F::Arc Arc; - typedef typename F::State State; - typedef typename Arc::StateId StateId; - typedef CacheBaseImpl<State, C> Impl; - - CacheArcIterator(Impl *impl, StateId s) : i_(0) { - state_ = impl->ExtendState(s); - ++state_->ref_count; - } - - ~CacheArcIterator() { --state_->ref_count; } - - bool Done() const { return i_ >= state_->arcs.size(); } - - const Arc& Value() const { return state_->arcs[i_]; } - - void Next() { ++i_; } - - size_t Position() const { return i_; } - - void Reset() { i_ = 0; } - - void Seek(size_t a) { i_ = a; } - - uint32 Flags() const { - return kArcValueFlags; - } - - void SetFlags(uint32 flags, uint32 mask) {} - - private: - const State *state_; - size_t i_; - - DISALLOW_COPY_AND_ASSIGN(CacheArcIterator); -}; - -// Use this to make a mutable arc iterator for a CacheBaseImpl-derived Fst, -// which must have types 'Arc' and 'State' defined. -template <class F, - class C = DefaultCacheStateAllocator<CacheState<typename F::Arc> > > -class CacheMutableArcIterator - : public MutableArcIteratorBase<typename F::Arc> { - public: - typedef typename F::State State; - typedef typename F::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - typedef CacheBaseImpl<State, C> Impl; - - // You will need to call MutateCheck() in the constructor. - CacheMutableArcIterator(Impl *impl, StateId s) : i_(0), s_(s), impl_(impl) { - state_ = impl_->ExtendState(s_); - ++state_->ref_count; - }; - - ~CacheMutableArcIterator() { - --state_->ref_count; - } - - bool Done() const { return i_ >= state_->arcs.size(); } - - const Arc& Value() const { return state_->arcs[i_]; } - - void Next() { ++i_; } - - size_t Position() const { return i_; } - - void Reset() { i_ = 0; } - - void Seek(size_t a) { i_ = a; } - - void SetValue(const Arc& arc) { - state_->flags |= CacheBaseImpl<State, C>::kCacheModified; - uint64 properties = impl_->Properties(); - Arc& oarc = state_->arcs[i_]; - if (oarc.ilabel != oarc.olabel) - properties &= ~kNotAcceptor; - if (oarc.ilabel == 0) { - --state_->niepsilons; - properties &= ~kIEpsilons; - if (oarc.olabel == 0) - properties &= ~kEpsilons; - } - if (oarc.olabel == 0) { - --state_->noepsilons; - properties &= ~kOEpsilons; - } - if (oarc.weight != Weight::Zero() && oarc.weight != Weight::One()) - properties &= ~kWeighted; - oarc = arc; - if (arc.ilabel != arc.olabel) { - properties |= kNotAcceptor; - properties &= ~kAcceptor; - } - if (arc.ilabel == 0) { - ++state_->niepsilons; - properties |= kIEpsilons; - properties &= ~kNoIEpsilons; - if (arc.olabel == 0) { - properties |= kEpsilons; - properties &= ~kNoEpsilons; - } - } - if (arc.olabel == 0) { - ++state_->noepsilons; - properties |= kOEpsilons; - properties &= ~kNoOEpsilons; - } - if (arc.weight != Weight::Zero() && arc.weight != Weight::One()) { - properties |= kWeighted; - properties &= ~kUnweighted; - } - properties &= kSetArcProperties | kAcceptor | kNotAcceptor | - kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | - kOEpsilons | kNoOEpsilons | kWeighted | kUnweighted; - impl_->SetProperties(properties); - } - - uint32 Flags() const { - return kArcValueFlags; - } - - void SetFlags(uint32 f, uint32 m) {} - - private: - virtual bool Done_() const { return Done(); } - virtual const Arc& Value_() const { return Value(); } - virtual void Next_() { Next(); } - virtual size_t Position_() const { return Position(); } - virtual void Reset_() { Reset(); } - virtual void Seek_(size_t a) { Seek(a); } - virtual void SetValue_(const Arc &a) { SetValue(a); } - uint32 Flags_() const { return Flags(); } - void SetFlags_(uint32 f, uint32 m) { SetFlags(f, m); } - - size_t i_; - StateId s_; - Impl *impl_; - State *state_; - - DISALLOW_COPY_AND_ASSIGN(CacheMutableArcIterator); -}; - -} // namespace fst - -#endif // FST_LIB_CACHE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/closure.h b/kaldi_io/src/tools/openfst/include/fst/closure.h deleted file mode 100644 index 541562b..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/closure.h +++ /dev/null @@ -1,155 +0,0 @@ -// closure.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Functions and classes to compute the concatenative closure of an Fst. - -#ifndef FST_LIB_CLOSURE_H__ -#define FST_LIB_CLOSURE_H__ - -#include <vector> -using std::vector; -#include <algorithm> - -#include <fst/mutable-fst.h> -#include <fst/rational.h> - - -namespace fst { - -// Computes the concatenative closure. This version modifies its -// MutableFst input. If FST transduces string x to y with weight a, -// then the closure transduces x to y with weight a, xx to yy with -// weight Times(a, a), xxx to yyy with with Times(Times(a, a), a), -// etc. If closure_type == CLOSURE_STAR, then the empty string is -// transduced to itself with weight Weight::One() as well. -// -// Complexity: -// - Time: O(V) -// - Space: O(V) -// where V = # of states. -template<class Arc> -void Closure(MutableFst<Arc> *fst, ClosureType closure_type) { - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - uint64 props = fst->Properties(kFstProperties, false); - StateId start = fst->Start(); - for (StateIterator< MutableFst<Arc> > siter(*fst); - !siter.Done(); - siter.Next()) { - StateId s = siter.Value(); - Weight final = fst->Final(s); - if (final != Weight::Zero()) - fst->AddArc(s, Arc(0, 0, final, start)); - } - if (closure_type == CLOSURE_STAR) { - fst->ReserveStates(fst->NumStates() + 1); - StateId nstart = fst->AddState(); - fst->SetStart(nstart); - fst->SetFinal(nstart, Weight::One()); - if (start != kNoLabel) - fst->AddArc(nstart, Arc(0, 0, Weight::One(), start)); - } - fst->SetProperties(ClosureProperties(props, closure_type == CLOSURE_STAR), - kFstProperties); -} - -// Computes the concatenative closure. This version modifies its -// RationalFst input. -template<class Arc> -void Closure(RationalFst<Arc> *fst, ClosureType closure_type) { - fst->GetImpl()->AddClosure(closure_type); -} - - -struct ClosureFstOptions : RationalFstOptions { - ClosureType type; - - ClosureFstOptions(const RationalFstOptions &opts, ClosureType t) - : RationalFstOptions(opts), type(t) {} - explicit ClosureFstOptions(ClosureType t) : type(t) {} - ClosureFstOptions() : type(CLOSURE_STAR) {} -}; - - -// Computes the concatenative closure. This version is a delayed -// Fst. If FST transduces string x to y with weight a, then the -// closure transduces x to y with weight a, xx to yy with weight -// Times(a, a), xxx to yyy with weight Times(Times(a, a), a), etc. If -// closure_type == CLOSURE_STAR, then The empty string is transduced -// to itself with weight Weight::One() as well. -// -// Complexity: -// - Time: O(v) -// - Space: O(v) -// where v = # of states visited. Constant time and space to visit an -// input state or arc is assumed and exclusive of caching. -template <class A> -class ClosureFst : public RationalFst<A> { - public: - using ImplToFst< RationalFstImpl<A> >::GetImpl; - - typedef A Arc; - - ClosureFst(const Fst<A> &fst, ClosureType closure_type) { - GetImpl()->InitClosure(fst, closure_type); - } - - ClosureFst(const Fst<A> &fst, const ClosureFstOptions &opts) - : RationalFst<A>(opts) { - GetImpl()->InitClosure(fst, opts.type); - } - - // See Fst<>::Copy() for doc. - ClosureFst(const ClosureFst<A> &fst, bool safe = false) - : RationalFst<A>(fst, safe) {} - - // Get a copy of this ClosureFst. See Fst<>::Copy() for further doc. - virtual ClosureFst<A> *Copy(bool safe = false) const { - return new ClosureFst<A>(*this, safe); - } -}; - - -// Specialization for ClosureFst. -template <class A> -class StateIterator< ClosureFst<A> > : public StateIterator< RationalFst<A> > { - public: - explicit StateIterator(const ClosureFst<A> &fst) - : StateIterator< RationalFst<A> >(fst) {} -}; - - -// Specialization for ClosureFst. -template <class A> -class ArcIterator< ClosureFst<A> > : public ArcIterator< RationalFst<A> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const ClosureFst<A> &fst, StateId s) - : ArcIterator< RationalFst<A> >(fst, s) {} -}; - - -// Useful alias when using StdArc. -typedef ClosureFst<StdArc> StdClosureFst; - -} // namespace fst - -#endif // FST_LIB_CLOSURE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/compact-fst.h b/kaldi_io/src/tools/openfst/include/fst/compact-fst.h deleted file mode 100644 index 6db3317..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/compact-fst.h +++ /dev/null @@ -1,1438 +0,0 @@ -// compact-fst.h - - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// FST Class for memory-efficient representation of common types of -// FSTs: linear automata, acceptors, unweighted FSTs, ... - -#ifndef FST_LIB_COMPACT_FST_H__ -#define FST_LIB_COMPACT_FST_H__ - -#include <iterator> -#include <utility> -using std::pair; using std::make_pair; -#include <vector> -using std::vector; - -#include <fst/cache.h> -#include <fst/expanded-fst.h> -#include <fst/fst-decl.h> // For optional argument declarations -#include <fst/mapped-file.h> -#include <fst/matcher.h> -#include <fst/test-properties.h> -#include <fst/util.h> - - -namespace fst { - -struct CompactFstOptions : public CacheOptions { - // CompactFst default caching behaviour is to do no caching. Most - // compactors are cheap and therefore we save memory by not doing - // caching. - CompactFstOptions() : CacheOptions(true, 0) {} - CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {} -}; - -// Compactor Interface - class determinies how arcs and final weights -// are compacted and expanded. -// -// Final weights are treated as transitions to the superfinal state, -// i.e. ilabel = olabel = kNoLabel and nextstate = kNoStateId. -// -// There are two types of compactors: -// -// * Fixed out-degree compactors: 'compactor.Size()' returns a -// positive integer 's'. An FST can be compacted by this compactor -// only if each state has exactly 's' outgoing transitions (counting a -// non-Zero() final weight as a transition). A typical example is a -// compactor for string FSTs, i.e. 's == 1'. -// -// * Variable out-degree compactors: 'compactor.Size() == -1'. There -// are no out-degree restrictions for these compactors. -// -// -// class Compactor { -// public: -// // Element is the type of the compacted transitions. -// typedef ... Element; -// // Return the compacted representation of a transition 'arc' -// // at a state 's'. -// Element Compact(StateId s, const Arc &arc); -// // Return the transition at state 's' represented by the compacted -// // transition 'e'. -// Arc Expand(StateId s, const Element &e); -// // Return -1 for variable out-degree compactors, and the mandatory -// // out-degree otherwise. -// ssize_t Size(); -// // Test whether 'fst' can be compacted by this compactor. -// bool Compatible(const Fst<A> &fst); -// // Return the properties that are always true for an fst -// // compacted using this compactor -// uint64 Properties(); -// // Return a string identifying the type of compactor. -// static const string &Type(); -// // Write a compactor to a file. -// bool Write(ostream &strm); -// // Read a compactor from a file. -// static Compactor *Read(istream &strm); -// // Default constructor (optional, see comment below). -// Compactor(); -// }; -// -// The default constructor is only required for FST_REGISTER to work -// (i.e. enabling Convert() and the command-line utilities to work -// with this new compactor). However, a default constructor always -// needs to be specify for this code to compile, but one can have it -// simply raised an error when called: -// -// Compactor::Compactor() { -// FSTERROR() << "Compactor: no default constructor"; -// } - - -// Implementation data for Compact Fst, which can shared between otherwise -// independent copies. -// -// The implementation contains two arrays: 'states_' and 'compacts_'. -// -// For fixed out-degree compactors, the 'states_' array is unallocated. -// The 'compacts_' contains the compacted transitions. Its size is -// 'ncompacts_'. The outgoing transitions at a given state are stored -// consecutively. For a given state 's', its 'compactor.Size()' outgoing -// transitions (including superfinal transition when 's' is final), are -// stored in position ['s*compactor.Size()', '(s+1)*compactor_.Size()'). -// -// For variable out-degree compactors, the states_ array has size -// 'nstates_ + 1' and contains pointers to positions into 'compacts_'. -// For a given state 's', the compacted transitions of 's' are -// stored in positions [ 'states_[s]', 'states_[s + 1]' ) in 'compacts_'. -// By convention, 'states_[nstates_] == ncompacts_'. -// -// In both cases, the superfinal transitons (when 's' is final, i.e. -// 'Final(s) != Weight::Zero()') is stored first. -// -// The unsigned type U is used to represent indices into the compacts_ -// array. -template <class E, class U> -class CompactFstData { - public: - typedef E CompactElement; - typedef U Unsigned; - - CompactFstData() - : states_region_(0), - compacts_region_(0), - states_(0), - compacts_(0), - nstates_(0), - ncompacts_(0), - narcs_(0), - start_(kNoStateId), - error_(false) {} - - template <class A, class Compactor> - CompactFstData(const Fst<A> &fst, const Compactor &compactor); - - template <class Iterator, class Compactor> - CompactFstData(const Iterator &begin, const Iterator &end, - const Compactor &compactor); - - ~CompactFstData() { - if (states_region_ == NULL) { - delete [] states_; - } - delete states_region_; - if (compacts_region_ == NULL) { - delete [] compacts_; - } - delete compacts_region_; - } - - template <class Compactor> - static CompactFstData<E, U> *Read(istream &strm, - const FstReadOptions &opts, - const FstHeader &hdr, - const Compactor &compactor); - - bool Write(ostream &strm, const FstWriteOptions &opts) const; - - Unsigned States(ssize_t i) const { return states_[i]; } - const CompactElement &Compacts(size_t i) const { return compacts_[i]; } - size_t NumStates() const { return nstates_; } - size_t NumCompacts() const { return ncompacts_; } - size_t NumArcs() const { return narcs_; } - ssize_t Start() const { return start_; } - - int RefCount() const { return ref_count_.count(); } - int IncrRefCount() { return ref_count_.Incr(); } - int DecrRefCount() { return ref_count_.Decr(); } - - bool Error() const { return error_; } - - private: - MappedFile *states_region_; - MappedFile *compacts_region_; - Unsigned *states_; - CompactElement *compacts_; - size_t nstates_; - size_t ncompacts_; - size_t narcs_; - ssize_t start_; - RefCounter ref_count_; - bool error_; -}; - -template <class E, class U> -template <class A, class C> -CompactFstData<E, U>::CompactFstData(const Fst<A> &fst, const C &compactor) - : states_region_(0), - compacts_region_(0), - states_(0), - compacts_(0), - nstates_(0), - ncompacts_(0), - narcs_(0), - start_(kNoStateId), - error_(false) { - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - start_ = fst.Start(); - // Count # of states and arcs. - StateId nfinals = 0; - for (StateIterator< Fst<A> > siter(fst); - !siter.Done(); - siter.Next()) { - ++nstates_; - StateId s = siter.Value(); - for (ArcIterator< Fst<A> > aiter(fst, s); - !aiter.Done(); - aiter.Next()) - ++narcs_; - if (fst.Final(s) != Weight::Zero()) ++nfinals; - } - if (compactor.Size() == -1) { - states_ = new Unsigned[nstates_ + 1]; - ncompacts_ = narcs_ + nfinals; - compacts_ = new CompactElement[ncompacts_]; - states_[nstates_] = ncompacts_; - } else { - states_ = 0; - ncompacts_ = nstates_ * compactor.Size(); - if ((narcs_ + nfinals) != ncompacts_) { - FSTERROR() << "CompactFstData: compactor incompatible with fst"; - error_ = true; - return; - } - compacts_ = new CompactElement[ncompacts_]; - } - size_t pos = 0, fpos = 0; - for (StateId s = 0; s < nstates_; ++s) { - fpos = pos; - if (compactor.Size() == -1) - states_[s] = pos; - if (fst.Final(s) != Weight::Zero()) - compacts_[pos++] = compactor.Compact(s, A(kNoLabel, kNoLabel, - fst.Final(s), kNoStateId)); - for (ArcIterator< Fst<A> > aiter(fst, s); - !aiter.Done(); - aiter.Next()) { - compacts_[pos++] = compactor.Compact(s, aiter.Value()); - } - if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size())) { - FSTERROR() << "CompactFstData: compactor incompatible with fst"; - error_ = true; - return; - } - } - if (pos != ncompacts_) { - FSTERROR() << "CompactFstData: compactor incompatible with fst"; - error_ = true; - return; - } -} - -template <class E, class U> -template <class Iterator, class C> -CompactFstData<E, U>::CompactFstData(const Iterator &begin, - const Iterator &end, - const C &compactor) - : states_region_(0), - compacts_region_(0), - states_(0), - compacts_(0), - nstates_(0), - ncompacts_(0), - narcs_(0), - start_(kNoStateId), - error_(false) { - typedef typename C::Arc Arc; - typedef typename Arc::Weight Weight; - if (compactor.Size() != -1) { - ncompacts_ = distance(begin, end); - if (compactor.Size() == 1) { - // For strings, allow implicit final weight. - // Empty input is the empty string. - if (ncompacts_ == 0) { - ++ncompacts_; - } else { - Arc arc = compactor.Expand(ncompacts_ - 1, - *(begin + (ncompacts_ - 1))); - if (arc.ilabel != kNoLabel) - ++ncompacts_; - } - } - if (ncompacts_ % compactor.Size()) { - FSTERROR() << "CompactFstData: size of input container incompatible" - << " with compactor"; - error_ = true; - return; - } - if (ncompacts_ == 0) - return; - start_ = 0; - nstates_ = ncompacts_ / compactor.Size(); - compacts_ = new CompactElement[ncompacts_]; - size_t i = 0; - Iterator it = begin; - for(; it != end; ++it, ++i){ - compacts_[i] = *it; - if (compactor.Expand(i, *it).ilabel != kNoLabel) - ++narcs_; - } - if (i < ncompacts_) - compacts_[i] = compactor.Compact(i, Arc(kNoLabel, kNoLabel, - Weight::One(), kNoStateId)); - } else { - if (distance(begin, end) == 0) - return; - // Count # of states, arcs and compacts. - Iterator it = begin; - for(size_t i = 0; it != end; ++it, ++i) { - Arc arc = compactor.Expand(i, *it); - if (arc.ilabel != kNoLabel) { - ++narcs_; - ++ncompacts_; - } else { - ++nstates_; - if (arc.weight != Weight::Zero()) - ++ncompacts_; - } - } - start_ = 0; - compacts_ = new CompactElement[ncompacts_]; - states_ = new Unsigned[nstates_ + 1]; - states_[nstates_] = ncompacts_; - size_t i = 0, s = 0; - for(it = begin; it != end; ++it) { - Arc arc = compactor.Expand(i, *it); - if (arc.ilabel != kNoLabel) { - compacts_[i++] = *it; - } else { - states_[s++] = i; - if (arc.weight != Weight::Zero()) - compacts_[i++] = *it; - } - } - if ((s != nstates_) || (i != ncompacts_)) { - FSTERROR() << "CompactFstData: ill-formed input container"; - error_ = true; - return; - } - } -} - -template <class E, class U> -template <class C> -CompactFstData<E, U> *CompactFstData<E, U>::Read( - istream &strm, - const FstReadOptions &opts, - const FstHeader &hdr, - const C &compactor) { - CompactFstData<E, U> *data = new CompactFstData<E, U>(); - data->start_ = hdr.Start(); - data->nstates_ = hdr.NumStates(); - data->narcs_ = hdr.NumArcs(); - - if (compactor.Size() == -1) { - if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { - LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source; - delete data; - return 0; - } - size_t b = (data->nstates_ + 1) * sizeof(Unsigned); - data->states_region_ = MappedFile::Map(&strm, opts, b); - if (!strm || data->states_region_ == NULL) { - LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source; - delete data; - return 0; - } - data->states_ = static_cast<Unsigned *>( - data->states_region_->mutable_data()); - } else { - data->states_ = 0; - } - data->ncompacts_ = compactor.Size() == -1 - ? data->states_[data->nstates_] - : data->nstates_ * compactor.Size(); - if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { - LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source; - delete data; - return 0; - } - size_t b = data->ncompacts_ * sizeof(CompactElement); - data->compacts_region_ = MappedFile::Map(&strm, opts, b); - if (!strm || data->compacts_region_ == NULL) { - LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source; - delete data; - return 0; - } - data->compacts_ = static_cast<CompactElement *>( - data->compacts_region_->mutable_data()); - return data; -} - -template<class E, class U> -bool CompactFstData<E, U>::Write(ostream &strm, - const FstWriteOptions &opts) const { - if (states_) { - if (opts.align && !AlignOutput(strm)) { - LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; - return false; - } - strm.write(reinterpret_cast<char *>(states_), - (nstates_ + 1) * sizeof(Unsigned)); - } - if (opts.align && !AlignOutput(strm)) { - LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; - return false; - } - strm.write(reinterpret_cast<char *>(compacts_), - ncompacts_ * sizeof(CompactElement)); - - strm.flush(); - if (!strm) { - LOG(ERROR) << "CompactFst::Write: Write failed: " << opts.source; - return false; - } - return true; -} - -template <class A, class C, class U> class CompactFst; -template <class F, class G> void Cast(const F &, G *); - -// Implementation class for CompactFst, which contains CompactFstData -// and Fst cache. -template <class A, class C, class U> -class CompactFstImpl : public CacheImpl<A> { - public: - using FstImpl<A>::SetType; - using FstImpl<A>::SetProperties; - using FstImpl<A>::Properties; - using FstImpl<A>::SetInputSymbols; - using FstImpl<A>::SetOutputSymbols; - using FstImpl<A>::WriteHeader; - - using CacheImpl<A>::PushArc; - using CacheImpl<A>::HasArcs; - using CacheImpl<A>::HasFinal; - using CacheImpl<A>::HasStart; - using CacheImpl<A>::SetArcs; - using CacheImpl<A>::SetFinal; - using CacheImpl<A>::SetStart; - - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef C Compactor; - typedef typename C::Element CompactElement; - typedef U Unsigned; - - CompactFstImpl() - : CacheImpl<A>(CompactFstOptions()), - compactor_(0), - own_compactor_(false), - data_(0) { - string type = "compact"; - if (sizeof(U) != sizeof(uint32)) { - string size; - Int64ToStr(8 * sizeof(U), &size); - type += size; - } - type += "_"; - type += C::Type(); - SetType(type); - SetProperties(kNullProperties | kStaticProperties); - } - - CompactFstImpl(const Fst<Arc> &fst, const C &compactor, - const CompactFstOptions &opts) - : CacheImpl<A>(opts), - compactor_(new C(compactor)), - own_compactor_(true), - data_(0) { - Init(fst); - } - - CompactFstImpl(const Fst<Arc> &fst, C *compactor, - const CompactFstOptions &opts) - : CacheImpl<A>(opts), - compactor_(compactor), - own_compactor_(false), - data_(0) { - Init(fst); - } - - template <class Iterator> - CompactFstImpl(const Iterator &b, const Iterator &e, const C &compactor, - const CompactFstOptions &opts) - : CacheImpl<A>(opts), - compactor_(new C(compactor)), - own_compactor_(true), - data_(0) { - Init(b, e); - } - - template <class Iterator> - CompactFstImpl(const Iterator &b, const Iterator &e, C *compactor, - const CompactFstOptions &opts) - : CacheImpl<A>(opts), - compactor_(compactor), - own_compactor_(false), - data_(0) { - Init(b, e); - } - - CompactFstImpl(const CompactFstImpl<A, C, U> &impl) - : CacheImpl<A>(impl), - compactor_(new C(*impl.compactor_)), - own_compactor_(true), - data_(impl.data_) { - if (data_) - data_->IncrRefCount(); - SetType(impl.Type()); - SetProperties(impl.Properties()); - SetInputSymbols(impl.InputSymbols()); - SetOutputSymbols(impl.OutputSymbols()); - } - - ~CompactFstImpl(){ - if (own_compactor_) - delete compactor_; - if (data_ && !data_->DecrRefCount()) - delete data_; - } - - StateId Start() { - if (!HasStart()) { - SetStart(data_->Start()); - } - return CacheImpl<A>::Start(); - } - - Weight Final(StateId s) { - if (HasFinal(s)) - return CacheImpl<A>::Final(s); - Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId); - if ((compactor_->Size() != -1) || - (data_->States(s) != data_->States(s + 1))) - arc = ComputeArc(s, - compactor_->Size() == -1 - ? data_->States(s) - : s * compactor_->Size()); - return arc.ilabel == kNoLabel ? arc.weight : Weight::Zero(); - } - - StateId NumStates() const { - if (Properties(kError)) return 0; - return data_->NumStates(); - } - - size_t NumArcs(StateId s) { - if (HasArcs(s)) - return CacheImpl<A>::NumArcs(s); - Unsigned i, num_arcs; - if (compactor_->Size() == -1) { - i = data_->States(s); - num_arcs = data_->States(s + 1) - i; - } else { - i = s * compactor_->Size(); - num_arcs = compactor_->Size(); - } - if (num_arcs > 0) { - const A &arc = ComputeArc(s, i, kArcILabelValue); - if (arc.ilabel == kNoStateId) { - --num_arcs; - } - } - return num_arcs; - } - - size_t NumInputEpsilons(StateId s) { - if (!HasArcs(s) && !Properties(kILabelSorted)) - Expand(s); - if (HasArcs(s)) - return CacheImpl<A>::NumInputEpsilons(s); - return CountEpsilons(s, false); - } - - size_t NumOutputEpsilons(StateId s) { - if (!HasArcs(s) && !Properties(kOLabelSorted)) - Expand(s); - if (HasArcs(s)) - return CacheImpl<A>::NumOutputEpsilons(s); - return CountEpsilons(s, true); - } - - size_t CountEpsilons(StateId s, bool output_epsilons) { - size_t begin = compactor_->Size() == -1 ? - data_->States(s) : s * compactor_->Size(); - size_t end = compactor_->Size() == -1 ? - data_->States(s + 1) : (s + 1) * compactor_->Size(); - size_t num_eps = 0; - for (size_t i = begin; i < end; ++i) { - const A &arc = ComputeArc( - s, i, output_epsilons ? kArcOLabelValue : kArcILabelValue); - const typename A::Label &label = - (output_epsilons ? arc.olabel : arc.ilabel); - if (label == kNoLabel) - continue; - else if (label > 0) - break; - ++num_eps; - } - return num_eps; - } - - static CompactFstImpl<A, C, U> *Read(istream &strm, - const FstReadOptions &opts) { - CompactFstImpl<A, C, U> *impl = new CompactFstImpl<A, C, U>(); - FstHeader hdr; - if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) { - delete impl; - return 0; - } - - // Ensures compatibility - if (hdr.Version() == kAlignedFileVersion) - hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED); - - impl->compactor_ = C::Read(strm); - if (!impl->compactor_) { - delete impl; - return 0; - } - impl->own_compactor_ = true; - impl->data_ = CompactFstData<CompactElement, U>::Read(strm, opts, hdr, - *impl->compactor_); - if (!impl->data_) { - delete impl; - return 0; - } - return impl; - } - - bool Write(ostream &strm, const FstWriteOptions &opts) const { - FstHeader hdr; - hdr.SetStart(data_->Start()); - hdr.SetNumStates(data_->NumStates()); - hdr.SetNumArcs(data_->NumArcs()); - - // Ensures compatibility - int file_version = opts.align ? kAlignedFileVersion : kFileVersion; - WriteHeader(strm, opts, file_version, &hdr); - compactor_->Write(strm); - return data_->Write(strm, opts); - } - - // Provide information needed for generic state iterator - void InitStateIterator(StateIteratorData<A> *data) const { - data->base = 0; - data->nstates = data_->NumStates(); - } - - void InitArcIterator(StateId s, ArcIteratorData<A> *data) { - if (!HasArcs(s)) - Expand(s); - CacheImpl<A>::InitArcIterator(s, data); - } - - Arc ComputeArc(StateId s, Unsigned i, uint32 f = kArcValueFlags) const { - return compactor_->Expand(s, data_->Compacts(i), f); - } - - void Expand(StateId s) { - size_t begin = compactor_->Size() == -1 ? - data_->States(s) : s * compactor_->Size(); - size_t end = compactor_->Size() == -1 ? - data_->States(s + 1) : (s + 1) * compactor_->Size(); - for (size_t i = begin; i < end; ++i) { - const Arc &arc = ComputeArc(s, i); - if (arc.ilabel == kNoLabel) - SetFinal(s, arc.weight); - else - PushArc(s, arc); - } - if (!HasFinal(s)) - SetFinal(s, Weight::Zero()); - SetArcs(s); - } - - template <class Iterator> - void SetCompactElements(const Iterator &b, const Iterator &e) { - if (data_ && !data_->DecrRefCount()) - delete data_; - data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_); - } - - C *GetCompactor() const { return compactor_; } - CompactFstData<CompactElement, U> *Data() const { return data_; } - - // Properties always true of this Fst class - static const uint64 kStaticProperties = kExpanded; - - protected: - template <class B, class D> - explicit CompactFstImpl(const CompactFstImpl<B, D, U> &impl) - : CacheImpl<A>(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())), - compactor_(new C(*impl.GetCompactor())), - own_compactor_(true), - data_(impl.Data()) { - if (data_) - data_->IncrRefCount(); - SetType(impl.Type()); - SetProperties(impl.Properties()); - SetInputSymbols(impl.InputSymbols()); - SetOutputSymbols(impl.OutputSymbols()); - } - - private: - friend class CompactFst<A, C, U>; // allow access during write. - - void Init(const Fst<Arc> &fst) { - string type = "compact"; - if (sizeof(U) != sizeof(uint32)) { - string size; - Int64ToStr(8 * sizeof(U), &size); - type += size; - } - type += "_"; - type += compactor_->Type(); - SetType(type); - SetInputSymbols(fst.InputSymbols()); - SetOutputSymbols(fst.OutputSymbols()); - data_ = new CompactFstData<CompactElement, U>(fst, *compactor_); - if (data_->Error()) - SetProperties(kError, kError); - uint64 copy_properties = fst.Properties(kCopyProperties, true); - if ((copy_properties & kError) || !compactor_->Compatible(fst)) { - FSTERROR() << "CompactFstImpl: input fst incompatible with compactor"; - SetProperties(kError, kError); - return; - } - SetProperties(copy_properties | kStaticProperties); - } - - template <class Iterator> - void Init(const Iterator &b, const Iterator &e) { - string type = "compact"; - if (sizeof(U) != sizeof(uint32)) { - string size; - Int64ToStr(8 * sizeof(U), &size); - type += size; - } - type += "_"; - type += compactor_->Type(); - SetType(type); - SetProperties(kStaticProperties | compactor_->Properties()); - data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_); - if (data_->Error()) - SetProperties(kError, kError); - } - - // Current unaligned file format version - static const int kFileVersion = 2; - // Current aligned file format version - static const int kAlignedFileVersion = 1; - // Minimum file format version supported - static const int kMinFileVersion = 1; - - C *compactor_; - bool own_compactor_; - CompactFstData<CompactElement, U> *data_; -}; - -template <class A, class C, class U> -const uint64 CompactFstImpl<A, C, U>::kStaticProperties; -template <class A, class C, class U> -const int CompactFstImpl<A, C, U>::kFileVersion; -template <class A, class C, class U> -const int CompactFstImpl<A, C, U>::kAlignedFileVersion; -template <class A, class C, class U> -const int CompactFstImpl<A, C, U>::kMinFileVersion; - - -// CompactFst. This class attaches interface to implementation and -// handles reference counting, delegating most methods to -// ImplToExpandedFst. The unsigned type U is used to represent indices -// into the compact arc array (uint32 by default, declared in -// fst-decl.h). -template <class A, class C, class U> -class CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > { - public: - friend class StateIterator< CompactFst<A, C, U> >; - friend class ArcIterator< CompactFst<A, C, U> >; - template <class F, class G> void friend Cast(const F &, G *); - - typedef A Arc; - typedef typename A::StateId StateId; - typedef CompactFstImpl<A, C, U> Impl; - typedef CacheState<A> State; - typedef U Unsigned; - - CompactFst() : ImplToExpandedFst<Impl>(new Impl()) {} - - explicit CompactFst(const Fst<A> &fst, const C &compactor = C(), - const CompactFstOptions &opts = CompactFstOptions()) - : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {} - - CompactFst(const Fst<A> &fst, C *compactor, - const CompactFstOptions &opts = CompactFstOptions()) - : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {} - - // The following 2 constructors take as input two iterators delimiting - // a set of (already) compacted transitions, starting with the - // transitions out of the initial state. The format of the input - // differs for fixed out-degree and variable out-degree compactors. - // - // - For fixed out-degree compactors, the final weight (encoded as a - // compacted transition) needs to be given only for final - // states. All strings (compactor of size 1) will be assume to be - // terminated by a final state even when the final state is not - // implicitely given. - // - // - For variable out-degree compactors, the final weight (encoded - // as a compacted transition) needs to be given for all states and - // must appeared first in the list (for state s, final weight of s, - // followed by outgoing transitons in s). - // - // These 2 constructors allows the direct construction of a CompactFst - // without first creating a more memory hungry 'regular' FST. This - // is useful when memory usage is severely constrained. - template <class Iterator> - explicit CompactFst(const Iterator &begin, const Iterator &end, - const C &compactor = C(), - const CompactFstOptions &opts = CompactFstOptions()) - : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {} - - template <class Iterator> - CompactFst(const Iterator &begin, const Iterator &end, - C *compactor, const CompactFstOptions &opts = CompactFstOptions()) - : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {} - - // See Fst<>::Copy() for doc. - CompactFst(const CompactFst<A, C, U> &fst, bool safe = false) - : ImplToExpandedFst<Impl>(fst, safe) {} - - // Get a copy of this CompactFst. See Fst<>::Copy() for further doc. - virtual CompactFst<A, C, U> *Copy(bool safe = false) const { - return new CompactFst<A, C, U>(*this, safe); - } - - // Read a CompactFst from an input stream; return NULL on error - static CompactFst<A, C, U> *Read(istream &strm, const FstReadOptions &opts) { - Impl* impl = Impl::Read(strm, opts); - return impl ? new CompactFst<A, C, U>(impl) : 0; - } - - // Read a CompactFst from a file; return NULL on error - // Empty filename reads from standard input - static CompactFst<A, C, U> *Read(const string &filename) { - Impl* impl = ImplToExpandedFst<Impl>::Read(filename); - return impl ? new CompactFst<A, C, U>(impl) : 0; - } - - virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { - return GetImpl()->Write(strm, opts); - } - - virtual bool Write(const string &filename) const { - return Fst<A>::WriteFile(filename); - } - - template <class F> - static bool WriteFst(const F &fst, const C &compactor, ostream &strm, - const FstWriteOptions &opts); - - virtual void InitStateIterator(StateIteratorData<A> *data) const { - GetImpl()->InitStateIterator(data); - } - - virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { - GetImpl()->InitArcIterator(s, data); - } - - virtual MatcherBase<A> *InitMatcher(MatchType match_type) const { - return new SortedMatcher<CompactFst<A, C, U> >(*this, match_type); - } - - template <class Iterator> - void SetCompactElements(const Iterator &b, const Iterator &e) { - GetImpl()->SetCompactElements(b, e); - } - - private: - CompactFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {} - - // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); } - - void SetImpl(Impl *impl, bool own_impl = false) { - ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl); - } - - // Use overloading to extract the type of the argument. - static Impl* GetImplIfCompactFst(const CompactFst<A, C, U> &compact_fst) { - return compact_fst.GetImpl(); - } - - // This does not give privileged treatment to subclasses of CompactFst. - template<typename NonCompactFst> - static Impl* GetImplIfCompactFst(const NonCompactFst& fst) { - return NULL; - } - - void operator=(const CompactFst<A, C, U> &fst); // disallow -}; - -// Writes Fst in Compact format, potentially with a pass over the machine -// before writing to compute the number of states and arcs. -// -template <class A, class C, class U> -template <class F> -bool CompactFst<A, C, U>::WriteFst(const F &fst, - const C &compactor, - ostream &strm, - const FstWriteOptions &opts) { - typedef U Unsigned; - typedef typename C::Element CompactElement; - typedef typename A::Weight Weight; - int file_version = opts.align ? - CompactFstImpl<A, C, U>::kAlignedFileVersion : - CompactFstImpl<A, C, U>::kFileVersion; - size_t num_arcs = -1, num_states = -1, num_compacts = -1; - C first_pass_compactor = compactor; - if (Impl* impl = GetImplIfCompactFst(fst)) { - num_arcs = impl->Data()->NumArcs(); - num_states = impl->Data()->NumStates(); - num_compacts = impl->Data()->NumCompacts(); - first_pass_compactor = *impl->GetCompactor(); - } else { - // A first pass is needed to compute the state of the compactor, which - // is saved ahead of the rest of the data structures. This unfortunately - // means forcing a complete double compaction when writing in this format. - // TODO(allauzen): eliminate mutable state from compactors. - num_arcs = 0; - num_states = 0; - for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { - const StateId s = siter.Value(); - ++num_states; - if (fst.Final(s) != Weight::Zero()) { - first_pass_compactor.Compact( - s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId)); - } - for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) { - ++num_arcs; - first_pass_compactor.Compact(s, aiter.Value()); - } - } - } - FstHeader hdr; - hdr.SetStart(fst.Start()); - hdr.SetNumStates(num_states); - hdr.SetNumArcs(num_arcs); - string type = "compact"; - if (sizeof(U) != sizeof(uint32)) { - string size; - Int64ToStr(8 * sizeof(U), &size); - type += size; - } - type += "_"; - type += C::Type(); - uint64 copy_properties = fst.Properties(kCopyProperties, true); - if ((copy_properties & kError) || !compactor.Compatible(fst)) { - LOG(ERROR) << "fst incompatible with compactor"; - return false; - } - uint64 properties = copy_properties | - CompactFstImpl<A, C, U>::kStaticProperties; - FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, properties, - &hdr); - first_pass_compactor.Write(strm); - if (first_pass_compactor.Size() == -1) { - if (opts.align && !AlignOutput(strm)) { - LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; - return false; - } - Unsigned compacts = 0; - for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { - const StateId s = siter.Value(); - strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts)); - if (fst.Final(s) != Weight::Zero()) { - ++compacts; - } - compacts += fst.NumArcs(s); - } - strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts)); - } - if (opts.align && !AlignOutput(strm)) { - LOG(ERROR) << "Could not align file during write after writing states"; - } - C second_pass_compactor = compactor; - CompactElement element; - for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { - const StateId s = siter.Value(); - if (fst.Final(s) != Weight::Zero()) { - element = second_pass_compactor.Compact( - s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId)); - strm.write(reinterpret_cast<const char *>(&element), sizeof(element)); - } - for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) { - element = second_pass_compactor.Compact(s, aiter.Value()); - strm.write(reinterpret_cast<const char *>(&element), sizeof(element)); - } - } - strm.flush(); - if (!strm) { - LOG(ERROR) << "CompactFst write failed: " << opts.source; - return false; - } - return true; -} - - -// Specialization for CompactFst; see generic version in fst.h -// for sample usage (but use the CompactFst type!). This version -// should inline. -template <class A, class C, class U> -class StateIterator< CompactFst<A, C, U> > { - public: - typedef typename A::StateId StateId; - - explicit StateIterator(const CompactFst<A, C, U> &fst) - : nstates_(fst.GetImpl()->NumStates()), s_(0) {} - - bool Done() const { return s_ >= nstates_; } - - StateId Value() const { return s_; } - - void Next() { ++s_; } - - void Reset() { s_ = 0; } - - private: - StateId nstates_; - StateId s_; - - DISALLOW_COPY_AND_ASSIGN(StateIterator); -}; - -// Specialization for CompactFst. -// Never caches, always iterates over the underlying compact elements. -template <class A, class C, class U> -class ArcIterator< CompactFst<A, C, U> > { - public: - typedef typename A::StateId StateId; - typedef typename C::Element CompactElement; - - ArcIterator(const CompactFst<A, C, U> &fst, StateId s) - : compactor_(fst.GetImpl()->GetCompactor()), state_(s), compacts_(0), - pos_(0), flags_(kArcValueFlags) { - - const CompactFstData<CompactElement, U> *data = fst.GetImpl()->Data(); - size_t offset; - if (compactor_->Size() == -1) { // Variable out-degree compactor - offset = data->States(s); - num_arcs_ = data->States(s + 1) - offset; - } else { // Fixed out-degree compactor - offset = s * compactor_->Size(); - num_arcs_ = compactor_->Size(); - } - if (num_arcs_ > 0) { - compacts_ = &(data->Compacts(offset)); - arc_ = compactor_->Expand(s, *compacts_, kArcILabelValue); - if (arc_.ilabel == kNoStateId) { - ++compacts_; - --num_arcs_; - } - } - } - - ~ArcIterator() {} - - bool Done() const { return pos_ >= num_arcs_; } - - const A& Value() const { - arc_ = compactor_->Expand(state_, compacts_[pos_], flags_); - return arc_; - } - - void Next() { ++pos_; } - - size_t Position() const { return pos_; } - - void Reset() { pos_ = 0; } - - void Seek(size_t pos) { pos_ = pos; } - - uint32 Flags() const { return flags_; } - - void SetFlags(uint32 f, uint32 m) { - flags_ &= ~m; - flags_ |= (f & kArcValueFlags); - } - - private: - C *compactor_; - StateId state_; - const CompactElement *compacts_; - size_t pos_; - size_t num_arcs_; - mutable A arc_; - uint32 flags_; - - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - -// // Specialization for CompactFst. -// // This is an optionally caching arc iterator. -// // TODO(allauzen): implements the kArcValueFlags, the current -// // implementation only implements the kArcNoCache flag. -// template <class A, class C, class U> -// class ArcIterator< CompactFst<A, C, U> > { -// public: -// typedef typename A::StateId StateId; - -// ArcIterator(const CompactFst<A, C, U> &fst, StateId s) -// : fst_(fst), state_(s), pos_(0), num_arcs_(0), offset_(0), -// flags_(kArcValueFlags) { -// cache_data_.ref_count = 0; - -// if (fst_.GetImpl()->HasArcs(state_)) { -// fst_.GetImpl()->InitArcIterator(s, &cache_data_); -// num_arcs_ = cache_data_.narcs; -// return; -// } - -// const C *compactor = fst_.GetImpl()->GetCompactor(); -// const CompactFstData<A, C, U> *data = fst_.GetImpl()->Data(); -// if (compactor->Size() == -1) { // Variable out-degree compactor -// offset_ = data->States(s); -// num_arcs_ = data->States(s + 1) - offset_; -// } else { // Fixed out-degree compactor -// offset_ = s * compactor->Size(); -// num_arcs_ = compactor->Size(); -// } -// if (num_arcs_ > 0) { -// const A &arc = fst_.GetImpl()->ComputeArc(s, offset_); -// if (arc.ilabel == kNoStateId) { -// ++offset_; -// --num_arcs_; -// } -// } -// } - - -// ~ArcIterator() { -// if (cache_data_.ref_count) -// --(*cache_data_.ref_count); -// } - -// bool Done() const { return pos_ >= num_arcs_; } - -// const A& Value() const { -// if (cache_data_.ref_count == 0) { -// if (flags_ & kArcNoCache) { -// arc_ = fst_.GetImpl()->ComputeArc(state_, pos_ + offset_); -// return arc_; -// } else { -// fst_.GetImpl()->InitArcIterator(state_, &cache_data_); -// } -// } -// return cache_data_.arcs[pos_]; -// } - -// void Next() { ++pos_; } - -// size_t Position() const { return pos_; } - -// void Reset() { pos_ = 0; } - -// void Seek(size_t pos) { pos_ = pos; } - -// uint32 Flags() const { return flags_; } - -// void SetFlags(uint32 f, uint32 m) { -// flags_ &= ~m; -// flags_ |= f; - -// if (!(flags_ & kArcNoCache) && cache_data_.ref_count == 0) -// fst_.GetImpl()->InitArcIterator(state_, &cache_data_); -// } - -// private: -// mutable const CompactFst<A, C, U> &fst_; -// StateId state_; -// size_t pos_; -// size_t num_arcs_; -// size_t offset_; -// uint32 flags_; -// mutable A arc_; -// mutable ArcIteratorData<A> cache_data_; - -// DISALLOW_COPY_AND_ASSIGN(ArcIterator); -// }; - - -// -// Utility Compactors -// - -// Compactor for unweighted string FSTs -template <class A> -class StringCompactor { - public: - typedef A Arc; - typedef typename A::Label Element; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - Element Compact(StateId s, const A &arc) const { return arc.ilabel; } - - Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { - return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId); - } - - ssize_t Size() const { return 1; } - - uint64 Properties() const { - return kString | kAcceptor | kUnweighted; - } - - bool Compatible(const Fst<A> &fst) const { - uint64 props = Properties(); - return fst.Properties(props, true) == props; - } - - static const string &Type() { - static const string type = "string"; - return type; - } - - bool Write(ostream &strm) const { return true; } - - static StringCompactor *Read(istream &strm) { - return new StringCompactor; - } -}; - - -// Compactor for weighted string FSTs -template <class A> -class WeightedStringCompactor { - public: - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - typedef pair<Label, Weight> Element; - - Element Compact(StateId s, const A &arc) const { - return make_pair(arc.ilabel, arc.weight); - } - - Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { - return Arc(p.first, p.first, p.second, - p.first != kNoLabel ? s + 1 : kNoStateId); - } - - ssize_t Size() const { return 1;} - - uint64 Properties() const { - return kString | kAcceptor; - } - - bool Compatible(const Fst<A> &fst) const { - uint64 props = Properties(); - return fst.Properties(props, true) == props; - } - - static const string &Type() { - static const string type = "weighted_string"; - return type; - } - - bool Write(ostream &strm) const { return true; } - - static WeightedStringCompactor *Read(istream &strm) { - return new WeightedStringCompactor; - } -}; - - -// Compactor for unweighted acceptor FSTs -template <class A> -class UnweightedAcceptorCompactor { - public: - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - typedef pair<Label, StateId> Element; - - Element Compact(StateId s, const A &arc) const { - return make_pair(arc.ilabel, arc.nextstate); - } - - Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { - return Arc(p.first, p.first, Weight::One(), p.second); - } - - ssize_t Size() const { return -1;} - - uint64 Properties() const { - return kAcceptor | kUnweighted; - } - - bool Compatible(const Fst<A> &fst) const { - uint64 props = Properties(); - return fst.Properties(props, true) == props; - } - - static const string &Type() { - static const string type = "unweighted_acceptor"; - return type; - } - - bool Write(ostream &strm) const { return true; } - - static UnweightedAcceptorCompactor *Read(istream &istrm) { - return new UnweightedAcceptorCompactor; - } -}; - - -// Compactor for weighted acceptor FSTs -template <class A> -class AcceptorCompactor { - public: - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - typedef pair< pair<Label, Weight>, StateId > Element; - - Element Compact(StateId s, const A &arc) const { - return make_pair(make_pair(arc.ilabel, arc.weight), arc.nextstate); - } - - Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { - return Arc(p.first.first, p.first.first, p.first.second, p.second); - } - - ssize_t Size() const { return -1;} - - uint64 Properties() const { - return kAcceptor; - } - - bool Compatible(const Fst<A> &fst) const { - uint64 props = Properties(); - return fst.Properties(props, true) == props; - } - - static const string &Type() { - static const string type = "acceptor"; - return type; - } - - bool Write(ostream &strm) const { return true; } - - static AcceptorCompactor *Read(istream &strm) { - return new AcceptorCompactor; - } -}; - - -// Compactor for unweighted FSTs -template <class A> -class UnweightedCompactor { - public: - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - typedef pair< pair<Label, Label>, StateId > Element; - - Element Compact(StateId s, const A &arc) const { - return make_pair(make_pair(arc.ilabel, arc.olabel), arc.nextstate); - } - - Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { - return Arc(p.first.first, p.first.second, Weight::One(), p.second); - } - - ssize_t Size() const { return -1; } - - uint64 Properties() const { - return kUnweighted; - } - - bool Compatible(const Fst<A> &fst) const { - uint64 props = Properties(); - return fst.Properties(props, true) == props; - } - - static const string &Type() { - static const string type = "unweighted"; - return type; - } - - bool Write(ostream &strm) const { return true; } - - static UnweightedCompactor *Read(istream &strm) { - return new UnweightedCompactor; - } -}; - - -// Uselful aliases when using StdArc -typedef CompactFst< StdArc, StringCompactor<StdArc> > -StdCompactStringFst; -typedef CompactFst< StdArc, WeightedStringCompactor<StdArc> > -StdCompactWeightedStringFst; -typedef CompactFst<StdArc, AcceptorCompactor<StdArc> > -StdCompactAcceptorFst; -typedef CompactFst<StdArc, UnweightedCompactor<StdArc> > -StdCompactUnweightedFst; -typedef CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > -StdCompactUnweightedAcceptorFst; - -} // namespace fst - -#endif // FST_LIB_COMPACT_FST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/compat.h b/kaldi_io/src/tools/openfst/include/fst/compat.h deleted file mode 100644 index 3b5275d..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/compat.h +++ /dev/null @@ -1,131 +0,0 @@ -// compat.h -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Author: [email protected] (Michael Riley) -// -// \file -// Google compatibility declarations and inline definitions. - -#ifndef FST_LIB_COMPAT_H__ -#define FST_LIB_COMPAT_H__ - -#include <dlfcn.h> - -#include <climits> -#include <cstdlib> -#include <cstring> -#include <iostream> -#include <string> -#include <vector> - -// Makes copy constructor and operator= private -#define DISALLOW_COPY_AND_ASSIGN(type) \ - type(const type&); \ - void operator=(const type&) - -#include <fst/config.h> -#include <fst/types.h> -#include <fst/lock.h> -#include <fst/flags.h> -#include <fst/log.h> -#include <fst/icu.h> - -using std::cin; -using std::cout; -using std::cerr; -using std::endl; -using std::string; - -void FailedNewHandler(); - -namespace fst { - -using namespace std; - -void SplitToVector(char *line, const char *delim, - std::vector<char *> *vec, bool omit_empty_strings); - -// Downcasting -template<typename To, typename From> -inline To down_cast(From* f) { - return static_cast<To>(f); -} - -// Bitcasting -template <class Dest, class Source> -inline Dest bit_cast(const Source& source) { - // Compile time assertion: sizeof(Dest) == sizeof(Source) - // A compile error here means your Dest and Source have different sizes. - typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : - -1]; - Dest dest; - memcpy(&dest, &source, sizeof(dest)); - return dest; -} - -// Check sums -class CheckSummer { - public: - CheckSummer() : count_(0) { - check_sum_.resize(kCheckSumLength, '\0'); - } - - void Reset() { - count_ = 0; - for (int i = 0; i < kCheckSumLength; ++i) - check_sum_[i] = '\0'; - } - - void Update(void const *data, int size) { - const char *p = reinterpret_cast<const char *>(data); - for (int i = 0; i < size; ++i) - check_sum_[(count_++) % kCheckSumLength] ^= p[i]; - } - - void Update(string const &data) { - for (int i = 0; i < data.size(); ++i) - check_sum_[(count_++) % kCheckSumLength] ^= data[i]; - } - - string Digest() { - return check_sum_; - } - - private: - static const int kCheckSumLength = 32; - int count_; - string check_sum_; - - DISALLOW_COPY_AND_ASSIGN(CheckSummer); -}; - -} // namespace fst - - -// Define missing hash functions if needed -#ifndef HAVE_STD__TR1__HASH_LONG_LONG_UNSIGNED_ -namespace std { -namespace tr1 { - -template <class T> class hash; - -template<> struct hash<uint64> { - size_t operator()(uint64 x) const { return x; } -}; - -} -} -#endif // HAVE_STD__TR1__HASH_LONG_LONG_UNSIGNED_ - -#endif // FST_LIB_COMPAT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/complement.h b/kaldi_io/src/tools/openfst/include/fst/complement.h deleted file mode 100644 index dacf396..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/complement.h +++ /dev/null @@ -1,338 +0,0 @@ -// complement.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Class to complement an Fst. - -#ifndef FST_LIB_COMPLEMENT_H__ -#define FST_LIB_COMPLEMENT_H__ - -#include <algorithm> -#include <string> -#include <vector> -using std::vector; - -#include <fst/fst.h> -#include <fst/test-properties.h> - - -namespace fst { - -template <class A> class ComplementFst; - -// Implementation of delayed ComplementFst. The algorithm used -// completes the (deterministic) FSA and then exchanges final and -// non-final states. Completion, i.e. ensuring that all labels can be -// read from every state, is accomplished by using RHO labels, which -// match all labels that are otherwise not found leaving a state. The -// first state in the output is reserved to be a new state that is the -// destination of all RHO labels. Each remaining output state s -// corresponds to input state s - 1. The first arc in the output at -// these states is the rho label, the remaining arcs correspond to the -// input arcs. -template <class A> -class ComplementFstImpl : public FstImpl<A> { - public: - using FstImpl<A>::SetType; - using FstImpl<A>::SetProperties; - using FstImpl<A>::SetInputSymbols; - using FstImpl<A>::SetOutputSymbols; - - friend class StateIterator< ComplementFst<A> >; - friend class ArcIterator< ComplementFst<A> >; - - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - explicit ComplementFstImpl(const Fst<A> &fst) : fst_(fst.Copy()) { - SetType("complement"); - uint64 props = fst.Properties(kILabelSorted, false); - SetProperties(ComplementProperties(props), kCopyProperties); - SetInputSymbols(fst.InputSymbols()); - SetOutputSymbols(fst.OutputSymbols()); - } - - ComplementFstImpl(const ComplementFstImpl<A> &impl) - : fst_(impl.fst_->Copy()) { - SetType("complement"); - SetProperties(impl.Properties(), kCopyProperties); - SetInputSymbols(impl.InputSymbols()); - SetOutputSymbols(impl.OutputSymbols()); - } - - ~ComplementFstImpl() { delete fst_; } - - StateId Start() const { - if (Properties(kError)) - return kNoStateId; - - StateId start = fst_->Start(); - if (start != kNoStateId) - return start + 1; - else - return 0; - } - - // Exchange final and non-final states; make rho destination state final. - Weight Final(StateId s) const { - if (s == 0 || fst_->Final(s - 1) == Weight::Zero()) - return Weight::One(); - else - return Weight::Zero(); - } - - size_t NumArcs(StateId s) const { - if (s == 0) - return 1; - else - return fst_->NumArcs(s - 1) + 1; - } - - size_t NumInputEpsilons(StateId s) const { - return s == 0 ? 0 : fst_->NumInputEpsilons(s - 1); - } - - size_t NumOutputEpsilons(StateId s) const { - return s == 0 ? 0 : fst_->NumOutputEpsilons(s - 1); - } - - - uint64 Properties() const { return Properties(kFstProperties); } - - // Set error if found; return FST impl properties. - uint64 Properties(uint64 mask) const { - if ((mask & kError) && fst_->Properties(kError, false)) - SetProperties(kError, kError); - return FstImpl<Arc>::Properties(mask); - } - - - private: - const Fst<A> *fst_; - - void operator=(const ComplementFstImpl<A> &fst); // Disallow -}; - - -// Complements an automaton. This is a library-internal operation that -// introduces a (negative) 'rho' label; use Difference/DifferenceFst in -// user code, which will not see this label. This version is a delayed Fst. -// -// This class attaches interface to implementation and handles -// reference counting, delegating most methods to ImplToFst. -template <class A> -class ComplementFst : public ImplToFst< ComplementFstImpl<A> > { - public: - friend class StateIterator< ComplementFst<A> >; - friend class ArcIterator< ComplementFst<A> >; - - using ImplToFst< ComplementFstImpl<A> >::GetImpl; - - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef ComplementFstImpl<A> Impl; - - explicit ComplementFst(const Fst<A> &fst) - : ImplToFst<Impl>(new Impl(fst)) { - uint64 props = kUnweighted | kNoEpsilons | kIDeterministic | kAcceptor; - if (fst.Properties(props, true) != props) { - FSTERROR() << "ComplementFst: argument not an unweighted " - << "epsilon-free deterministic acceptor"; - GetImpl()->SetProperties(kError, kError); - } - } - - // See Fst<>::Copy() for doc. - ComplementFst(const ComplementFst<A> &fst, bool safe = false) - : ImplToFst<Impl>(fst, safe) {} - - // Get a copy of this ComplementFst. See Fst<>::Copy() for further doc. - virtual ComplementFst<A> *Copy(bool safe = false) const { - return new ComplementFst<A>(*this, safe); - } - - virtual inline void InitStateIterator(StateIteratorData<A> *data) const; - - virtual inline void InitArcIterator(StateId s, - ArcIteratorData<A> *data) const; - - // Label that represents the rho transition. - // We use a negative value, which is thus private to the library and - // which will preserve FST label sort order. - static const Label kRhoLabel = -2; - private: - // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } - - void operator=(const ComplementFst<A> &fst); // disallow -}; - -template <class A> const typename A::Label ComplementFst<A>::kRhoLabel; - - -// Specialization for ComplementFst. -template <class A> -class StateIterator< ComplementFst<A> > : public StateIteratorBase<A> { - public: - typedef typename A::StateId StateId; - typedef typename A::Label Label; - - explicit StateIterator(const ComplementFst<A> &fst) - : siter_(*fst.GetImpl()->fst_), s_(0) { - } - - bool Done() const { return s_ > 0 && siter_.Done(); } - - StateId Value() const { return s_; } - - void Next() { - if (s_ != 0) - siter_.Next(); - ++s_; - } - - void Reset() { - siter_.Reset(); - s_ = 0; - } - - private: - // This allows base class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual bool Done_() const { return Done(); } - virtual StateId Value_() const { return Value(); } - virtual void Next_() { Next(); } - virtual void Reset_() { Reset(); } - - StateIterator< Fst<A> > siter_; - StateId s_; - - DISALLOW_COPY_AND_ASSIGN(StateIterator); -}; - - -// Specialization for ComplementFst. -template <class A> -class ArcIterator< ComplementFst<A> > : public ArcIteratorBase<A> { - public: - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - - ArcIterator(const ComplementFst<A> &fst, StateId s) - : aiter_(0), s_(s), pos_(0) { - if (s_ != 0) - aiter_ = new ArcIterator< Fst<A> >(*fst.GetImpl()->fst_, s - 1); - } - - virtual ~ArcIterator() { delete aiter_; } - - bool Done() const { - if (s_ != 0) - return pos_ > 0 && aiter_->Done(); - else - return pos_ > 0; - } - - // Adds the rho label to the rho destination state. - const A& Value() const { - if (pos_ == 0) { - arc_.ilabel = arc_.olabel = ComplementFst<A>::kRhoLabel; - arc_.weight = Weight::One(); - arc_.nextstate = 0; - } else { - arc_ = aiter_->Value(); - ++arc_.nextstate; - } - return arc_; - } - - void Next() { - if (s_ != 0 && pos_ > 0) - aiter_->Next(); - ++pos_; - } - - size_t Position() const { - return pos_; - } - - void Reset() { - if (s_ != 0) - aiter_->Reset(); - pos_ = 0; - } - - void Seek(size_t a) { - if (s_ != 0) { - if (a == 0) { - aiter_->Reset(); - } else { - aiter_->Seek(a - 1); - } - } - pos_ = a; - } - - uint32 Flags() const { - return kArcValueFlags; - } - - void SetFlags(uint32 f, uint32 m) {} - - private: - // This allows base class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual bool Done_() const { return Done(); } - virtual const A& Value_() const { return Value(); } - virtual void Next_() { Next(); } - virtual size_t Position_() const { return Position(); } - virtual void Reset_() { Reset(); } - virtual void Seek_(size_t a) { Seek(a); } - uint32 Flags_() const { return Flags(); } - void SetFlags_(uint32 f, uint32 m) { SetFlags(f, m); } - - ArcIterator< Fst<A> > *aiter_; - StateId s_; - size_t pos_; - mutable A arc_; - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - - -template <class A> inline void -ComplementFst<A>::InitStateIterator(StateIteratorData<A> *data) const { - data->base = new StateIterator< ComplementFst<A> >(*this); -} - -template <class A> inline void -ComplementFst<A>::InitArcIterator(StateId s, ArcIteratorData<A> *data) const { - data->base = new ArcIterator< ComplementFst<A> >(*this, s); -} - - -// Useful alias when using StdArc. -typedef ComplementFst<StdArc> StdComplementFst; - -} // namespace fst - -#endif // FST_LIB_COMPLEMENT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/compose-filter.h b/kaldi_io/src/tools/openfst/include/fst/compose-filter.h deleted file mode 100644 index 6bf7736..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/compose-filter.h +++ /dev/null @@ -1,542 +0,0 @@ -// compose-filter.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Classes for filtering the composition matches, e.g. for correct epsilon -// handling. - -#ifndef FST_LIB_COMPOSE_FILTER_H__ -#define FST_LIB_COMPOSE_FILTER_H__ - -#include <fst/fst.h> -#include <fst/fst-decl.h> // For optional argument declarations -#include <fst/matcher.h> - - -namespace fst { - - -// COMPOSITION FILTER STATE - this represents the state of -// the composition filter. It has the form: -// -// class FilterState { -// public: -// // Required constructors -// FilterState(); -// FilterState(const FilterState &f); -// // An invalid filter state. -// static const FilterState NoState(); -// // Maps state to integer for hashing. -// size_t Hash() const; -// // Equality of filter states. -// bool operator==(const FilterState &f) const; -// // Inequality of filter states. -// bool operator!=(const FilterState &f) const; -// // Assignment to filter states. -// FilterState& operator=(const FilterState& f); -// }; - - -// Filter state that is a signed integral type. -template <typename T> -class IntegerFilterState { - public: - IntegerFilterState() : state_(kNoStateId) {} - explicit IntegerFilterState(T s) : state_(s) {} - - static const IntegerFilterState NoState() { return IntegerFilterState(); } - - size_t Hash() const { return static_cast<size_t>(state_); } - - bool operator==(const IntegerFilterState &f) const { - return state_ == f.state_; - } - - bool operator!=(const IntegerFilterState &f) const { - return state_ != f.state_; - } - - T GetState() const { return state_; } - - void SetState(T state) { state_ = state; } - -private: - T state_; -}; - -typedef IntegerFilterState<signed char> CharFilterState; -typedef IntegerFilterState<short> ShortFilterState; -typedef IntegerFilterState<int> IntFilterState; - - -// Filter state that is a weight (class). -template <class W> -class WeightFilterState { - public: - WeightFilterState() : weight_(W::Zero()) {} - explicit WeightFilterState(W w) : weight_(w) {} - - static const WeightFilterState NoState() { return WeightFilterState(); } - - size_t Hash() const { return weight_.Hash(); } - - bool operator==(const WeightFilterState &f) const { - return weight_ == f.weight_; - } - - bool operator!=(const WeightFilterState &f) const { - return weight_ != f.weight_; - } - - W GetWeight() const { return weight_; } - - void SetWeight(W w) { weight_ = w; } - -private: - W weight_; -}; - - -// Filter state that is the combination of two filter states. -template <class F1, class F2> -class PairFilterState { - public: - PairFilterState() : f1_(F1::NoState()), f2_(F2::NoState()) {} - - PairFilterState(const F1 &f1, const F2 &f2) : f1_(f1), f2_(f2) {} - - static const PairFilterState NoState() { return PairFilterState(); } - - size_t Hash() const { - size_t h1 = f1_.Hash(); - size_t h2 = f2_.Hash(); - const int lshift = 5; - const int rshift = CHAR_BIT * sizeof(size_t) - 5; - return h1 << lshift ^ h1 >> rshift ^ h2; - } - - bool operator==(const PairFilterState &f) const { - return f1_ == f.f1_ && f2_ == f.f2_; - } - - bool operator!=(const PairFilterState &f) const { - return f1_ != f.f1_ || f2_ != f.f2_; - } - - const F1 &GetState1() const { return f1_; } - const F2 &GetState2() const { return f2_; } - - void SetState(const F1 &f1, const F2 &f2) { - f1_ = f1; - f2_ = f2; - } - -private: - F1 f1_; - F2 f2_; -}; - - -// COMPOSITION FILTERS - these determine which matches are allowed to -// proceed. The filter's state is represented by the type -// ComposeFilter::FilterState. The basic filters handle correct -// epsilon matching. Their interface is: -// -// template <class M1, class M2> -// class ComposeFilter { -// public: -// typedef typename M1::FST1 FST1; -// typedef typename M1::FST2 FST2; -// typedef typename FST1::Arc Arc; -// typedef ... FilterState; -// typedef ... Matcher1; -// typedef ... Matcher2; -// -// // Required constructors. -// ComposeFilter(const FST1 &fst1, const FST2 &fst2, -// // M1 *matcher1 = 0, M2 *matcher2 = 0); -// // If safe=true, the copy is thread-safe. See Fst<>::Copy() -// // for further doc. -// ComposeFilter(const ComposeFilter<M1, M2> &filter, -// // bool safe = false); -// // Return start state of filter. -// FilterState Start() const; -// // Specifies current composition state. -// void SetState(StateId s1, StateId s2, const FilterState &f); -// -// // Apply filter at current composition state to these transitions. -// // If an arc label to be matched is kNolabel, then that side -// // does not consume a symbol. Returns the new filter state or, -// // if disallowed, FilterState::NoState(). The filter is permitted to -// // modify its inputs, e.g. for optimizations. -// FilterState FilterArc(Arc *arc1, Arc *arc2) const; - -// // Apply filter at current composition state to these final weights -// // (cf. superfinal transitions). The filter may modify its inputs, -// // e.g. for optimizations. -// void FilterFinal(Weight *final1, Weight *final2) const; -// -// // Return resp matchers. Ownership stays with filter. These -// // methods allow the filter to access and possibly modify -// // the composition matchers (useful e.g. with lookahead). -// Matcher1 *GetMatcher1(); -// Matcher2 *GetMatcher2(); -// -// // This specifies how the filter affects the composition result -// // properties. It takes as argument the properties that would -// // apply with a trivial composition fitler. -// uint64 Properties(uint64 props) const; -// }; - -// This filter requires epsilons on FST1 to be read before epsilons on FST2. -template <class M1, class M2> -class SequenceComposeFilter { - public: - typedef typename M1::FST FST1; - typedef typename M2::FST FST2; - typedef typename FST1::Arc Arc; - typedef CharFilterState FilterState; - typedef M1 Matcher1; - typedef M2 Matcher2; - - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - SequenceComposeFilter(const FST1 &fst1, const FST2 &fst2, - M1 *matcher1 = 0, M2 *matcher2 = 0) - : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)), - matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)), - fst1_(matcher1_->GetFst()), - s1_(kNoStateId), - s2_(kNoStateId), - f_(kNoStateId) {} - - SequenceComposeFilter(const SequenceComposeFilter<M1, M2> &filter, - bool safe = false) - : matcher1_(filter.matcher1_->Copy(safe)), - matcher2_(filter.matcher2_->Copy(safe)), - fst1_(matcher1_->GetFst()), - s1_(kNoStateId), - s2_(kNoStateId), - f_(kNoStateId) {} - - ~SequenceComposeFilter() { - delete matcher1_; - delete matcher2_; - } - - FilterState Start() const { return FilterState(0); } - - void SetState(StateId s1, StateId s2, const FilterState &f) { - if (s1_ == s1 && s2_ == s2 && f == f_) - return; - s1_ = s1; - s2_ = s2; - f_ = f; - size_t na1 = internal::NumArcs(fst1_, s1); - size_t ne1 = internal::NumOutputEpsilons(fst1_, s1); - bool fin1 = internal::Final(fst1_, s1) != Weight::Zero(); - alleps1_ = na1 == ne1 && !fin1; - noeps1_ = ne1 == 0; - } - - FilterState FilterArc(Arc *arc1, Arc *arc2) const { - if (arc1->olabel == kNoLabel) - return alleps1_ ? FilterState::NoState() : - noeps1_ ? FilterState(0) : FilterState(1); - else if (arc2->ilabel == kNoLabel) - return f_ != FilterState(0) ? FilterState::NoState() : FilterState(0); - else - return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0); - } - - void FilterFinal(Weight *, Weight *) const {} - - // Return resp matchers. Ownership stays with filter. - Matcher1 *GetMatcher1() { return matcher1_; } - Matcher2 *GetMatcher2() { return matcher2_; } - - uint64 Properties(uint64 props) const { return props; } - - private: - Matcher1 *matcher1_; - Matcher2 *matcher2_; - const FST1 &fst1_; - StateId s1_; // Current fst1_ state; - StateId s2_; // Current fst2_ state; - FilterState f_; // Current filter state - bool alleps1_; // Only epsilons (and non-final) leaving s1_? - bool noeps1_; // No epsilons leaving s1_? - - void operator=(const SequenceComposeFilter<M1, M2> &); // disallow -}; - - -// This filter requires epsilons on FST2 to be read before epsilons on FST1. -template <class M1, class M2> -class AltSequenceComposeFilter { - public: - typedef typename M1::FST FST1; - typedef typename M2::FST FST2; - typedef typename FST1::Arc Arc; - typedef CharFilterState FilterState; - typedef M1 Matcher1; - typedef M2 Matcher2; - - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - AltSequenceComposeFilter(const FST1 &fst1, const FST2 &fst2, - M1 *matcher1 = 0, M2 *matcher2 = 0) - : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)), - matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)), - fst2_(matcher2_->GetFst()), - s1_(kNoStateId), - s2_(kNoStateId), - f_(kNoStateId) {} - - AltSequenceComposeFilter(const AltSequenceComposeFilter<M1, M2> &filter, - bool safe = false) - : matcher1_(filter.matcher1_->Copy(safe)), - matcher2_(filter.matcher2_->Copy(safe)), - fst2_(matcher2_->GetFst()), - s1_(kNoStateId), - s2_(kNoStateId), - f_(kNoStateId) {} - - ~AltSequenceComposeFilter() { - delete matcher1_; - delete matcher2_; - } - - FilterState Start() const { return FilterState(0); } - - void SetState(StateId s1, StateId s2, const FilterState &f) { - if (s1_ == s1 && s2_ == s2 && f == f_) - return; - s1_ = s1; - s2_ = s2; - f_ = f; - size_t na2 = internal::NumArcs(fst2_, s2); - size_t ne2 = internal::NumInputEpsilons(fst2_, s2); - bool fin2 = internal::Final(fst2_, s2) != Weight::Zero(); - alleps2_ = na2 == ne2 && !fin2; - noeps2_ = ne2 == 0; - } - - FilterState FilterArc(Arc *arc1, Arc *arc2) const { - if (arc2->ilabel == kNoLabel) - return alleps2_ ? FilterState::NoState() : - noeps2_ ? FilterState(0) : FilterState(1); - else if (arc1->olabel == kNoLabel) - return f_ == FilterState(1) ? FilterState::NoState() : FilterState(0); - else - return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0); - } - - void FilterFinal(Weight *, Weight *) const {} - - // Return resp matchers. Ownership stays with filter. - Matcher1 *GetMatcher1() { return matcher1_; } - Matcher2 *GetMatcher2() { return matcher2_; } - - uint64 Properties(uint64 props) const { return props; } - - private: - Matcher1 *matcher1_; - Matcher2 *matcher2_; - const FST2 &fst2_; - StateId s1_; // Current fst1_ state; - StateId s2_; // Current fst2_ state; - FilterState f_; // Current filter state - bool alleps2_; // Only epsilons (and non-final) leaving s2_? - bool noeps2_; // No epsilons leaving s2_? - -void operator=(const AltSequenceComposeFilter<M1, M2> &); // disallow -}; - - -// This filter requires epsilons on FST1 to be matched with epsilons on FST2 -// whenever possible. -template <class M1, class M2> -class MatchComposeFilter { - public: - typedef typename M1::FST FST1; - typedef typename M2::FST FST2; - typedef typename FST1::Arc Arc; - typedef CharFilterState FilterState; - typedef M1 Matcher1; - typedef M2 Matcher2; - - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - MatchComposeFilter(const FST1 &fst1, const FST2 &fst2, - M1 *matcher1 = 0, M2 *matcher2 = 0) - : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)), - matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)), - fst1_(matcher1_->GetFst()), - fst2_(matcher2_->GetFst()), - s1_(kNoStateId), - s2_(kNoStateId), - f_(kNoStateId) {} - - MatchComposeFilter(const MatchComposeFilter<M1, M2> &filter, - bool safe = false) - : matcher1_(filter.matcher1_->Copy(safe)), - matcher2_(filter.matcher2_->Copy(safe)), - fst1_(matcher1_->GetFst()), - fst2_(matcher2_->GetFst()), - s1_(kNoStateId), - s2_(kNoStateId), - f_(kNoStateId) {} - - ~MatchComposeFilter() { - delete matcher1_; - delete matcher2_; - } - - FilterState Start() const { return FilterState(0); } - - void SetState(StateId s1, StateId s2, const FilterState &f) { - if (s1_ == s1 && s2_ == s2 && f == f_) - return; - s1_ = s1; - s2_ = s2; - f_ = f; - size_t na1 = internal::NumArcs(fst1_, s1); - size_t ne1 = internal::NumOutputEpsilons(fst1_, s1); - bool f1 = internal::Final(fst1_, s1) != Weight::Zero(); - alleps1_ = na1 == ne1 && !f1; - noeps1_ = ne1 == 0; - size_t na2 = internal::NumArcs(fst2_, s2); - size_t ne2 = internal::NumInputEpsilons(fst2_, s2); - bool f2 = internal::Final(fst2_, s2) != Weight::Zero(); - alleps2_ = na2 == ne2 && !f2; - noeps2_ = ne2 == 0; - } - - FilterState FilterArc(Arc *arc1, Arc *arc2) const { - if (arc2->ilabel == kNoLabel) // Epsilon on Fst1 - return f_ == FilterState(0) ? - (noeps2_ ? FilterState(0) : - (alleps2_ ? FilterState::NoState(): FilterState(1))) : - (f_ == FilterState(1) ? FilterState(1) : FilterState::NoState()); - else if (arc1->olabel == kNoLabel) // Epsilon on Fst2 - return f_ == FilterState(0) ? - (noeps1_ ? FilterState(0) : - (alleps1_ ? FilterState::NoState() : FilterState(2))) : - (f_ == FilterState(2) ? FilterState(2) : FilterState::NoState()); - else if (arc1->olabel == 0) // Epsilon on both - return f_ == FilterState(0) ? FilterState(0) : FilterState::NoState(); - else // Both are non-epsilons - return FilterState(0); - } - - void FilterFinal(Weight *, Weight *) const {} - - // Return resp matchers. Ownership stays with filter. - Matcher1 *GetMatcher1() { return matcher1_; } - Matcher2 *GetMatcher2() { return matcher2_; } - - uint64 Properties(uint64 props) const { return props; } - - private: - Matcher1 *matcher1_; - Matcher2 *matcher2_; - const FST1 &fst1_; - const FST2 &fst2_; - StateId s1_; // Current fst1_ state; - StateId s2_; // Current fst2_ state; - FilterState f_; // Current filter state ID - bool alleps1_, alleps2_; // Only epsilons (and non-final) leaving s1, s2? - bool noeps1_, noeps2_; // No epsilons leaving s1, s2? - - void operator=(const MatchComposeFilter<M1, M2> &); // disallow -}; - - -// This filter works with the MultiEpsMatcher to determine if -// 'multi-epsilons' are preserved in the composition output -// (rather than rewritten as 0) and ensures correct properties. -template <class F> -class MultiEpsFilter { - public: - typedef typename F::FST1 FST1; - typedef typename F::FST2 FST2; - typedef typename F::Arc Arc; - typedef typename F::Matcher1 Matcher1; - typedef typename F::Matcher2 Matcher2; - typedef typename F::FilterState FilterState; - typedef MultiEpsFilter<F> Filter; - - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - MultiEpsFilter(const FST1 &fst1, const FST2 &fst2, - Matcher1 *matcher1 = 0, Matcher2 *matcher2 = 0, - bool keep_multi_eps = false) - : filter_(fst1, fst2, matcher1, matcher2), - keep_multi_eps_(keep_multi_eps) {} - - MultiEpsFilter(const Filter &filter, bool safe = false) - : filter_(filter.filter_, safe), - keep_multi_eps_(filter.keep_multi_eps_) {} - - FilterState Start() const { return filter_.Start(); } - - void SetState(StateId s1, StateId s2, const FilterState &f) { - return filter_.SetState(s1, s2, f); - } - - FilterState FilterArc(Arc *arc1, Arc *arc2) const { - FilterState f = filter_.FilterArc(arc1, arc2); - if (keep_multi_eps_) { - if (arc1->olabel == kNoLabel) - arc1->ilabel = arc2->ilabel; - if (arc2->ilabel == kNoLabel) - arc2->olabel = arc1->olabel; - } - return f; - } - - void FilterFinal(Weight *w1, Weight *w2) const { - return filter_.FilterFinal(w1, w2); - } - - // Return resp matchers. Ownership stays with filter. - Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); } - Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); } - - uint64 Properties(uint64 iprops) const { - uint64 oprops = filter_.Properties(iprops); - return oprops & kILabelInvariantProperties & kOLabelInvariantProperties; - } - - private: - F filter_; - bool keep_multi_eps_; -}; - -} // namespace fst - - -#endif // FST_LIB_COMPOSE_FILTER_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/compose.h b/kaldi_io/src/tools/openfst/include/fst/compose.h deleted file mode 100644 index db5ea3a..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/compose.h +++ /dev/null @@ -1,728 +0,0 @@ -// compose.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Class to compute the composition of two FSTs - -#ifndef FST_LIB_COMPOSE_H__ -#define FST_LIB_COMPOSE_H__ - -#include <algorithm> -#include <string> -#include <vector> -using std::vector; - -#include <fst/cache.h> -#include <fst/compose-filter.h> -#include <fst/lookahead-filter.h> -#include <fst/matcher.h> -#include <fst/state-table.h> -#include <fst/test-properties.h> - - -namespace fst { - -// Delayed composition options templated on the arc type, the matcher, -// the composition filter, and the composition state table. By -// default, the matchers, filter, and state table are constructed by -// composition. If set below, the user can instead pass in these -// objects; in that case, ComposeFst takes their ownership. This -// version controls composition implemented between generic Fst<Arc> -// types and a shared matcher type M for Fst<Arc>. This should be -// adequate for most applications, giving a reasonable tradeoff -// between efficiency and code sharing (but see ComposeFstImplOptions). -template <class A, - class M = Matcher<Fst<A> >, - class F = SequenceComposeFilter<M>, - class T = GenericComposeStateTable<A, typename F::FilterState> > -struct ComposeFstOptions : public CacheOptions { - M *matcher1; // FST1 matcher (see matcher.h) - M *matcher2; // FST2 matcher - F *filter; // Composition filter (see compose-filter.h) - T *state_table; // Composition state table (see compose-state-table.h) - - explicit ComposeFstOptions(const CacheOptions &opts, - M *mat1 = 0, M *mat2 = 0, - F *filt = 0, T *sttable= 0) - : CacheOptions(opts), matcher1(mat1), matcher2(mat2), - filter(filt), state_table(sttable) {} - - ComposeFstOptions() : matcher1(0), matcher2(0), filter(0), state_table(0) {} -}; - - -// Delayed composition options templated on the two matcher types, the -// composition filter, and the composition state table. By default, -// the matchers, filter, and state table are constructed by -// composition. If set below, the user can instead pass in these -// objects; in that case, ComposeFst takes their ownership. This -// version controls composition implemented using arbitrary matchers -// (of the same Arc type but otherwise arbitrary Fst type). The user -// must ensure the matchers are compatible. These options permit the -// most efficient use, but shares the least code. This is for advanced -// use only in the most demanding or specialized applications that can -// benefit from it (o.w. prefer ComposeFstOptions). -template <class M1, class M2, - class F = SequenceComposeFilter<M1, M2>, - class T = GenericComposeStateTable<typename M1::Arc, - typename F::FilterState> > -struct ComposeFstImplOptions : public CacheOptions { - M1 *matcher1; // FST1 matcher (see matcher.h) - M2 *matcher2; // FST2 matcher - F *filter; // Composition filter (see compose-filter.h) - T *state_table; // Composition state table (see compose-state-table.h) - - explicit ComposeFstImplOptions(const CacheOptions &opts, - M1 *mat1 = 0, M2 *mat2 = 0, - F *filt = 0, T *sttable= 0) - : CacheOptions(opts), matcher1(mat1), matcher2(mat2), - filter(filt), state_table(sttable) {} - - ComposeFstImplOptions() - : matcher1(0), matcher2(0), filter(0), state_table(0) {} -}; - - -// Implementation of delayed composition. This base class is -// common to the variants with different matchers, composition filters -// and state tables. -template <class A> -class ComposeFstImplBase : public CacheImpl<A> { - public: - using FstImpl<A>::SetType; - using FstImpl<A>::SetProperties; - using FstImpl<A>::Properties; - using FstImpl<A>::SetInputSymbols; - using FstImpl<A>::SetOutputSymbols; - - using CacheBaseImpl< CacheState<A> >::HasStart; - using CacheBaseImpl< CacheState<A> >::HasFinal; - using CacheBaseImpl< CacheState<A> >::HasArcs; - using CacheBaseImpl< CacheState<A> >::SetFinal; - using CacheBaseImpl< CacheState<A> >::SetStart; - - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef CacheState<A> State; - - ComposeFstImplBase(const Fst<A> &fst1, const Fst<A> &fst2, - const CacheOptions &opts) - : CacheImpl<A>(opts) { - VLOG(2) << "ComposeFst(" << this << "): Begin"; - SetType("compose"); - - if (!CompatSymbols(fst2.InputSymbols(), fst1.OutputSymbols())) { - FSTERROR() << "ComposeFst: output symbol table of 1st argument " - << "does not match input symbol table of 2nd argument"; - SetProperties(kError, kError); - } - - SetInputSymbols(fst1.InputSymbols()); - SetOutputSymbols(fst2.OutputSymbols()); - } - - ComposeFstImplBase(const ComposeFstImplBase<A> &impl) - : CacheImpl<A>(impl, true) { - SetProperties(impl.Properties(), kCopyProperties); - SetInputSymbols(impl.InputSymbols()); - SetOutputSymbols(impl.OutputSymbols()); - } - - virtual ComposeFstImplBase<A> *Copy() = 0; - - virtual ~ComposeFstImplBase() {} - - StateId Start() { - if (!HasStart()) { - StateId start = ComputeStart(); - if (start != kNoStateId) { - SetStart(start); - } - } - return CacheImpl<A>::Start(); - } - - Weight Final(StateId s) { - if (!HasFinal(s)) { - Weight final = ComputeFinal(s); - SetFinal(s, final); - } - return CacheImpl<A>::Final(s); - } - - virtual void Expand(StateId s) = 0; - - size_t NumArcs(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumArcs(s); - } - - size_t NumInputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumInputEpsilons(s); - } - - size_t NumOutputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumOutputEpsilons(s); - } - - void InitArcIterator(StateId s, ArcIteratorData<A> *data) { - if (!HasArcs(s)) - Expand(s); - CacheImpl<A>::InitArcIterator(s, data); - } - - protected: - virtual StateId ComputeStart() = 0; - virtual Weight ComputeFinal(StateId s) = 0; -}; - - -// Implementaion of delayed composition templated on the matchers (see -// matcher.h), composition filter (see compose-filter-inl.h) and -// the composition state table (see compose-state-table.h). -template <class M1, class M2, class F, class T> -class ComposeFstImpl : public ComposeFstImplBase<typename M1::Arc> { - typedef typename M1::FST FST1; - typedef typename M2::FST FST2; - typedef typename M1::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - typedef typename F::FilterState FilterState; - typedef typename F::Matcher1 Matcher1; - typedef typename F::Matcher2 Matcher2; - - using CacheBaseImpl<CacheState<Arc> >::SetArcs; - using FstImpl<Arc>::SetType; - using FstImpl<Arc>::SetProperties; - - typedef ComposeStateTuple<StateId, FilterState> StateTuple; - - public: - ComposeFstImpl(const FST1 &fst1, const FST2 &fst2, - const ComposeFstImplOptions<M1, M2, F, T> &opts); - - ComposeFstImpl(const ComposeFstImpl<M1, M2, F, T> &impl) - : ComposeFstImplBase<Arc>(impl), - filter_(new F(*impl.filter_, true)), - matcher1_(filter_->GetMatcher1()), - matcher2_(filter_->GetMatcher2()), - fst1_(matcher1_->GetFst()), - fst2_(matcher2_->GetFst()), - state_table_(new T(*impl.state_table_)), - match_type_(impl.match_type_) {} - - ~ComposeFstImpl() { - VLOG(2) << "ComposeFst(" << this - << "): End: # of visited states: " << state_table_->Size(); - - delete filter_; - delete state_table_; - } - - virtual ComposeFstImpl<M1, M2, F, T> *Copy() { - return new ComposeFstImpl<M1, M2, F, T>(*this); - } - - uint64 Properties() const { return Properties(kFstProperties); } - - // Set error if found; return FST impl properties. - uint64 Properties(uint64 mask) const { - if ((mask & kError) && - (fst1_.Properties(kError, false) || - fst2_.Properties(kError, false) || - (matcher1_->Properties(0) & kError) || - (matcher2_->Properties(0) & kError) | - (filter_->Properties(0) & kError) || - state_table_->Error())) { - SetProperties(kError, kError); - } - return FstImpl<Arc>::Properties(mask); - } - - // Arranges it so that the first arg to OrderedExpand is the Fst - // that will be matched on. - void Expand(StateId s) { - const StateTuple &tuple = state_table_->Tuple(s); - StateId s1 = tuple.state_id1; - StateId s2 = tuple.state_id2; - filter_->SetState(s1, s2, tuple.filter_state); - if (match_type_ == MATCH_OUTPUT || - (match_type_ == MATCH_BOTH && - internal::NumArcs(fst1_, s1) > internal::NumArcs(fst2_, s2))) - OrderedExpand(s, fst1_, s1, fst2_, s2, matcher1_, false); - else - OrderedExpand(s, fst2_, s2, fst1_, s1, matcher2_, true); - } - - const FST1 &GetFst1() { return fst1_; } - const FST2 &GetFst2() { return fst2_; } - M1 *GetMatcher1() { return matcher1_; } - M2 *GetMatcher2() { return matcher2_; } - F *GetFilter() { return filter_; } - T *GetStateTable() { return state_table_; } - - private: - // This does that actual matching of labels in the composition. The - // arguments are ordered so matching is called on state 'sa' of - // 'fsta' for each arc leaving state 'sb' of 'fstb'. The 'match_input' arg - // determines whether the input or output label of arcs at 'sb' is - // the one to match on. - template <class FST, class Matcher> - void OrderedExpand(StateId s, const Fst<Arc> &, StateId sa, - const FST &fstb, StateId sb, - Matcher *matchera, bool match_input) { - matchera->SetState(sa); - - // First process non-consuming symbols (e.g., epsilons) on FSTA. - Arc loop(match_input ? 0 : kNoLabel, match_input ? kNoLabel : 0, - Weight::One(), sb); - MatchArc(s, matchera, loop, match_input); - - // Then process matches on FSTB. - for (ArcIterator<FST> iterb(fstb, sb); !iterb.Done(); iterb.Next()) - MatchArc(s, matchera, iterb.Value(), match_input); - - SetArcs(s); - } - - // Matches a single transition from 'fstb' against 'fata' at 's'. - template <class Matcher> - void MatchArc(StateId s, Matcher *matchera, - const Arc &arc, bool match_input) { - if (matchera->Find(match_input ? arc.olabel : arc.ilabel)) { - for (; !matchera->Done(); matchera->Next()) { - Arc arca = matchera->Value(); - Arc arcb = arc; - if (match_input) { - const FilterState &f = filter_->FilterArc(&arcb, &arca); - if (f != FilterState::NoState()) - AddArc(s, arcb, arca, f); - } else { - const FilterState &f = filter_->FilterArc(&arca, &arcb); - if (f != FilterState::NoState()) - AddArc(s, arca, arcb, f); - } - } - } - } - - // Add a matching transition at 's'. - void AddArc(StateId s, const Arc &arc1, const Arc &arc2, - const FilterState &f) { - StateTuple tuple(arc1.nextstate, arc2.nextstate, f); - Arc oarc(arc1.ilabel, arc2.olabel, Times(arc1.weight, arc2.weight), - state_table_->FindState(tuple)); - CacheImpl<Arc>::PushArc(s, oarc); - } - - StateId ComputeStart() { - StateId s1 = fst1_.Start(); - if (s1 == kNoStateId) - return kNoStateId; - - StateId s2 = fst2_.Start(); - if (s2 == kNoStateId) - return kNoStateId; - - const FilterState &f = filter_->Start(); - StateTuple tuple(s1, s2, f); - return state_table_->FindState(tuple); - } - - Weight ComputeFinal(StateId s) { - const StateTuple &tuple = state_table_->Tuple(s); - StateId s1 = tuple.state_id1; - Weight final1 = internal::Final(fst1_, s1); - if (final1 == Weight::Zero()) - return final1; - - StateId s2 = tuple.state_id2; - Weight final2 = internal::Final(fst2_, s2); - if (final2 == Weight::Zero()) - return final2; - - filter_->SetState(s1, s2, tuple.filter_state); - filter_->FilterFinal(&final1, &final2); - return Times(final1, final2); - } - - // Identifies and verifies the capabilities of the matcher to be used for - // composition. - void SetMatchType(); - - F *filter_; - Matcher1 *matcher1_; - Matcher2 *matcher2_; - const FST1 &fst1_; - const FST2 &fst2_; - T *state_table_; - - MatchType match_type_; - - void operator=(const ComposeFstImpl<M1, M2, F, T> &); // disallow -}; - -template <class M1, class M2, class F, class T> inline -ComposeFstImpl<M1, M2, F, T>::ComposeFstImpl( - const FST1 &fst1, const FST2 &fst2, - const ComposeFstImplOptions<M1, M2, F, T> &opts) - : ComposeFstImplBase<Arc>(fst1, fst2, opts), - filter_(opts.filter ? opts.filter : - new F(fst1, fst2, opts.matcher1, opts.matcher2)), - matcher1_(filter_->GetMatcher1()), - matcher2_(filter_->GetMatcher2()), - fst1_(matcher1_->GetFst()), - fst2_(matcher2_->GetFst()), - state_table_(opts.state_table ? opts.state_table : - new T(fst1_, fst2_)) { - SetMatchType(); - if (match_type_ == MATCH_NONE) - SetProperties(kError, kError); - VLOG(2) << "ComposeFst(" << this << "): Match type: " - << (match_type_ == MATCH_OUTPUT ? "output" : - (match_type_ == MATCH_INPUT ? "input" : - (match_type_ == MATCH_BOTH ? "both" : - (match_type_ == MATCH_NONE ? "none" : "unknown")))); - - uint64 fprops1 = fst1.Properties(kFstProperties, false); - uint64 fprops2 = fst2.Properties(kFstProperties, false); - uint64 mprops1 = matcher1_->Properties(fprops1); - uint64 mprops2 = matcher2_->Properties(fprops2); - uint64 cprops = ComposeProperties(mprops1, mprops2); - SetProperties(filter_->Properties(cprops), kCopyProperties); - if (state_table_->Error()) SetProperties(kError, kError); - VLOG(2) << "ComposeFst(" << this << "): Initialized"; -} - -template <class M1, class M2, class F, class T> -void ComposeFstImpl<M1, M2, F, T>::SetMatchType() { - MatchType type1 = matcher1_->Type(false); - MatchType type2 = matcher2_->Type(false); - uint32 flags1 = matcher1_->Flags(); - uint32 flags2 = matcher2_->Flags(); - if (flags1 & flags2 & kRequireMatch) { - FSTERROR() << "ComposeFst: only one argument can require matching."; - match_type_ = MATCH_NONE; - } else if (flags1 & kRequireMatch) { - if (matcher1_->Type(true) != MATCH_OUTPUT) { - FSTERROR() << "ComposeFst: 1st argument requires matching but cannot."; - match_type_ = MATCH_NONE; - } - match_type_ = MATCH_OUTPUT; - } else if (flags2 & kRequireMatch) { - if (matcher2_->Type(true) != MATCH_INPUT) { - FSTERROR() << "ComposeFst: 2nd argument requires matching but cannot."; - match_type_ = MATCH_NONE; - } - match_type_ = MATCH_INPUT; - } else if (flags1 & flags2 & kPreferMatch && - type1 == MATCH_OUTPUT && type2 == MATCH_INPUT) { - match_type_ = MATCH_BOTH; - } else if (flags1 & kPreferMatch && type1 == MATCH_OUTPUT) { - match_type_ = MATCH_OUTPUT; - } else if (flags2 & kPreferMatch && type2 == MATCH_INPUT) { - match_type_ = MATCH_INPUT; - } else if (type1 == MATCH_OUTPUT && type2 == MATCH_INPUT) { - match_type_ = MATCH_BOTH; - } else if (type1 == MATCH_OUTPUT) { - match_type_ = MATCH_OUTPUT; - } else if (type2 == MATCH_INPUT) { - match_type_ = MATCH_INPUT; - } else if (flags1 & kPreferMatch && matcher1_->Type(true) == MATCH_OUTPUT) { - match_type_ = MATCH_OUTPUT; - } else if (flags2 & kPreferMatch && matcher2_->Type(true) == MATCH_INPUT) { - match_type_ = MATCH_INPUT; - } else if (matcher1_->Type(true) == MATCH_OUTPUT) { - match_type_ = MATCH_OUTPUT; - } else if (matcher2_->Type(true) == MATCH_INPUT) { - match_type_ = MATCH_INPUT; - } else { - FSTERROR() << "ComposeFst: 1st argument cannot match on output labels " - << "and 2nd argument cannot match on input labels (sort?)."; - match_type_ = MATCH_NONE; - } -} - - -// Computes the composition of two transducers. This version is a -// delayed Fst. If FST1 transduces string x to y with weight a and FST2 -// transduces y to z with weight b, then their composition transduces -// string x to z with weight Times(x, z). -// -// The output labels of the first transducer or the input labels of -// the second transducer must be sorted (with the default matcher). -// The weights need to form a commutative semiring (valid for -// TropicalWeight and LogWeight). -// -// Complexity: -// Assuming the first FST is unsorted and the second is sorted: -// - Time: O(v1 v2 d1 (log d2 + m2)), -// - Space: O(v1 v2) -// where vi = # of states visited, di = maximum out-degree, and mi the -// maximum multiplicity of the states visited for the ith -// FST. Constant time and space to visit an input state or arc is -// assumed and exclusive of caching. -// -// Caveats: -// - ComposeFst does not trim its output (since it is a delayed operation). -// - The efficiency of composition can be strongly affected by several factors: -// - the choice of which tnansducer is sorted - prefer sorting the FST -// that has the greater average out-degree. -// - the amount of non-determinism -// - the presence and location of epsilon transitions - avoid epsilon -// transitions on the output side of the first transducer or -// the input side of the second transducer or prefer placing -// them later in a path since they delay matching and can -// introduce non-coaccessible states and transitions. -// -// This class attaches interface to implementation and handles -// reference counting, delegating most methods to ImplToFst. -template <class A> -class ComposeFst : public ImplToFst< ComposeFstImplBase<A> > { - public: - friend class ArcIterator< ComposeFst<A> >; - friend class StateIterator< ComposeFst<A> >; - - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef CacheState<A> State; - typedef ComposeFstImplBase<A> Impl; - - using ImplToFst<Impl>::SetImpl; - - // Compose specifying only caching options. - ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2, - const CacheOptions &opts = CacheOptions()) - : ImplToFst<Impl>(CreateBase(fst1, fst2, opts)) {} - - // Compose specifying one shared matcher type M. Requires input - // Fsts and matcher FST type (M::FST) be Fst<A>. Recommended for - // best code-sharing and matcher compatiblity. - template <class M, class F, class T> - ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2, - const ComposeFstOptions<A, M, F, T> &opts) - : ImplToFst<Impl>(CreateBase1(fst1, fst2, opts)) {} - - // Compose specifying two matcher types M1 and M2. Requires input - // Fsts (of the same Arc type but o.w. arbitrary) match the - // corresponding matcher FST types (M1::FST, M2::FST). Recommended - // only for advanced use in demanding or specialized applications - // due to potential code bloat and matcher incompatibilities. - template <class M1, class M2, class F, class T> - ComposeFst(const typename M1::FST &fst1, const typename M2::FST &fst2, - const ComposeFstImplOptions<M1, M2, F, T> &opts) - : ImplToFst<Impl>(CreateBase2(fst1, fst2, opts)) {} - - // See Fst<>::Copy() for doc. - ComposeFst(const ComposeFst<A> &fst, bool safe = false) { - if (safe) - SetImpl(fst.GetImpl()->Copy()); - else - SetImpl(fst.GetImpl(), false); - } - - // Get a copy of this ComposeFst. See Fst<>::Copy() for further doc. - virtual ComposeFst<A> *Copy(bool safe = false) const { - return new ComposeFst<A>(*this, safe); - } - - virtual inline void InitStateIterator(StateIteratorData<A> *data) const; - - virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { - GetImpl()->InitArcIterator(s, data); - } - - protected: - ComposeFst() {} - - // Create compose implementation specifying two matcher types. - template <class M1, class M2, class F, class T> - static Impl *CreateBase2( - const typename M1::FST &fst1, const typename M2::FST &fst2, - const ComposeFstImplOptions<M1, M2, F, T> &opts) { - Impl *impl = new ComposeFstImpl<M1, M2, F, T>(fst1, fst2, opts); - if (!(Weight::Properties() & kCommutative)) { - int64 props1 = fst1.Properties(kUnweighted, true); - int64 props2 = fst2.Properties(kUnweighted, true); - if (!(props1 & kUnweighted) && !(props2 & kUnweighted)) { - FSTERROR() << "ComposeFst: Weights must be a commutative semiring: " - << Weight::Type(); - impl->SetProperties(kError, kError); - } - } - return impl; - } - - // Create compose implementation specifying one matcher type. - // Requires input Fsts and matcher FST type (M::FST) be Fst<A> - template <class M, class F, class T> - static Impl *CreateBase1(const Fst<A> &fst1, const Fst<A> &fst2, - const ComposeFstOptions<A, M, F, T> &opts) { - ComposeFstImplOptions<M, M, F, T> nopts(opts, opts.matcher1, opts.matcher2, - opts.filter, opts.state_table); - return CreateBase2(fst1, fst2, nopts); - } - - // Create compose implementation specifying no matcher type. - static Impl *CreateBase(const Fst<A> &fst1, const Fst<A> &fst2, - const CacheOptions &opts) { - switch (LookAheadMatchType(fst1, fst2)) { // Check for lookahead matchers - default: - case MATCH_NONE: { // Default composition (no look-ahead) - VLOG(2) << "ComposeFst: Default composition (no look-ahead)"; - ComposeFstOptions<Arc> nopts(opts); - return CreateBase1(fst1, fst2, nopts); - } - case MATCH_OUTPUT: { // Lookahead on fst1 - VLOG(2) << "ComposeFst: Lookahead on fst1"; - typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::FstMatcher M; - typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::ComposeFilter F; - ComposeFstOptions<Arc, M, F> nopts(opts); - return CreateBase1(fst1, fst2, nopts); - } - case MATCH_INPUT: { // Lookahead on fst2 - VLOG(2) << "ComposeFst: Lookahead on fst2"; - typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::FstMatcher M; - typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::ComposeFilter F; - ComposeFstOptions<Arc, M, F> nopts(opts); - return CreateBase1(fst1, fst2, nopts); - } - } - } - - private: - // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } - - void operator=(const ComposeFst<A> &fst); // disallow -}; - - -// Specialization for ComposeFst. -template<class A> -class StateIterator< ComposeFst<A> > - : public CacheStateIterator< ComposeFst<A> > { - public: - explicit StateIterator(const ComposeFst<A> &fst) - : CacheStateIterator< ComposeFst<A> >(fst, fst.GetImpl()) {} -}; - - -// Specialization for ComposeFst. -template <class A> -class ArcIterator< ComposeFst<A> > - : public CacheArcIterator< ComposeFst<A> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const ComposeFst<A> &fst, StateId s) - : CacheArcIterator< ComposeFst<A> >(fst.GetImpl(), s) { - if (!fst.GetImpl()->HasArcs(s)) - fst.GetImpl()->Expand(s); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - -template <class A> inline -void ComposeFst<A>::InitStateIterator(StateIteratorData<A> *data) const { - data->base = new StateIterator< ComposeFst<A> >(*this); -} - -// Useful alias when using StdArc. -typedef ComposeFst<StdArc> StdComposeFst; - -enum ComposeFilter { AUTO_FILTER, SEQUENCE_FILTER, ALT_SEQUENCE_FILTER, - MATCH_FILTER }; - -struct ComposeOptions { - bool connect; // Connect output - ComposeFilter filter_type; // Which pre-defined filter to use - - ComposeOptions(bool c, ComposeFilter ft = AUTO_FILTER) - : connect(c), filter_type(ft) {} - ComposeOptions() : connect(true), filter_type(AUTO_FILTER) {} -}; - -// Computes the composition of two transducers. This version writes -// the composed FST into a MurableFst. If FST1 transduces string x to -// y with weight a and FST2 transduces y to z with weight b, then -// their composition transduces string x to z with weight -// Times(x, z). -// -// The output labels of the first transducer or the input labels of -// the second transducer must be sorted. The weights need to form a -// commutative semiring (valid for TropicalWeight and LogWeight). -// -// Complexity: -// Assuming the first FST is unsorted and the second is sorted: -// - Time: O(V1 V2 D1 (log D2 + M2)), -// - Space: O(V1 V2 D1 M2) -// where Vi = # of states, Di = maximum out-degree, and Mi is -// the maximum multiplicity for the ith FST. -// -// Caveats: -// - Compose trims its output. -// - The efficiency of composition can be strongly affected by several factors: -// - the choice of which tnansducer is sorted - prefer sorting the FST -// that has the greater average out-degree. -// - the amount of non-determinism -// - the presence and location of epsilon transitions - avoid epsilon -// transitions on the output side of the first transducer or -// the input side of the second transducer or prefer placing -// them later in a path since they delay matching and can -// introduce non-coaccessible states and transitions. -template<class Arc> -void Compose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2, - MutableFst<Arc> *ofst, - const ComposeOptions &opts = ComposeOptions()) { - typedef Matcher< Fst<Arc> > M; - - if (opts.filter_type == AUTO_FILTER) { - CacheOptions nopts; - nopts.gc_limit = 0; // Cache only the last state for fastest copy. - *ofst = ComposeFst<Arc>(ifst1, ifst2, nopts); - } else if (opts.filter_type == SEQUENCE_FILTER) { - ComposeFstOptions<Arc> copts; - copts.gc_limit = 0; // Cache only the last state for fastest copy. - *ofst = ComposeFst<Arc>(ifst1, ifst2, copts); - } else if (opts.filter_type == ALT_SEQUENCE_FILTER) { - ComposeFstOptions<Arc, M, AltSequenceComposeFilter<M> > copts; - copts.gc_limit = 0; // Cache only the last state for fastest copy. - *ofst = ComposeFst<Arc>(ifst1, ifst2, copts); - } else if (opts.filter_type == MATCH_FILTER) { - ComposeFstOptions<Arc, M, MatchComposeFilter<M> > copts; - copts.gc_limit = 0; // Cache only the last state for fastest copy. - *ofst = ComposeFst<Arc>(ifst1, ifst2, copts); - } - - if (opts.connect) - Connect(ofst); -} - -} // namespace fst - -#endif // FST_LIB_COMPOSE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/concat.h b/kaldi_io/src/tools/openfst/include/fst/concat.h deleted file mode 100644 index 8500d50..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/concat.h +++ /dev/null @@ -1,246 +0,0 @@ -// concat.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Functions and classes to compute the concat of two FSTs. - -#ifndef FST_LIB_CONCAT_H__ -#define FST_LIB_CONCAT_H__ - -#include <vector> -using std::vector; -#include <algorithm> - -#include <fst/mutable-fst.h> -#include <fst/rational.h> - - -namespace fst { - -// Computes the concatenation (product) of two FSTs. If FST1 -// transduces string x to y with weight a and FST2 transduces string w -// to v with weight b, then their concatenation transduces string xw -// to yv with Times(a, b). -// -// This version modifies its MutableFst argument (in first position). -// -// Complexity: -// - Time: O(V1 + V2 + E2) -// - Space: O(V1 + V2 + E2) -// where Vi = # of states and Ei = # of arcs of the ith FST. -// -template<class Arc> -void Concat(MutableFst<Arc> *fst1, const Fst<Arc> &fst2) { - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - // TODO(riley): restore when voice actions issues fixed - // Check that the symbol table are compatible - if (!CompatSymbols(fst1->InputSymbols(), fst2.InputSymbols()) || - !CompatSymbols(fst1->OutputSymbols(), fst2.OutputSymbols())) { - LOG(ERROR) << "Concat: input/output symbol tables of 1st argument " - << "do not match input/output symbol tables of 2nd argument"; - // fst1->SetProperties(kError, kError); - // return; - } - - uint64 props1 = fst1->Properties(kFstProperties, false); - uint64 props2 = fst2.Properties(kFstProperties, false); - - StateId start1 = fst1->Start(); - if (start1 == kNoStateId) { - if (props2 & kError) fst1->SetProperties(kError, kError); - return; - } - - StateId numstates1 = fst1->NumStates(); - if (fst2.Properties(kExpanded, false)) - fst1->ReserveStates(numstates1 + CountStates(fst2)); - - for (StateIterator< Fst<Arc> > siter2(fst2); - !siter2.Done(); - siter2.Next()) { - StateId s1 = fst1->AddState(); - StateId s2 = siter2.Value(); - fst1->SetFinal(s1, fst2.Final(s2)); - fst1->ReserveArcs(s1, fst2.NumArcs(s2)); - for (ArcIterator< Fst<Arc> > aiter(fst2, s2); - !aiter.Done(); - aiter.Next()) { - Arc arc = aiter.Value(); - arc.nextstate += numstates1; - fst1->AddArc(s1, arc); - } - } - - StateId start2 = fst2.Start(); - for (StateId s1 = 0; s1 < numstates1; ++s1) { - Weight final = fst1->Final(s1); - if (final != Weight::Zero()) { - fst1->SetFinal(s1, Weight::Zero()); - if (start2 != kNoStateId) - fst1->AddArc(s1, Arc(0, 0, final, start2 + numstates1)); - } - } - if (start2 != kNoStateId) - fst1->SetProperties(ConcatProperties(props1, props2), kFstProperties); -} - -// Computes the concatentation of two FSTs. This version modifies its -// MutableFst argument (in second position). -// -// Complexity: -// - Time: O(V1 + E1) -// - Space: O(V1 + E1) -// where Vi = # of states and Ei = # of arcs of the ith FST. -// -template<class Arc> -void Concat(const Fst<Arc> &fst1, MutableFst<Arc> *fst2) { - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - // Check that the symbol table are compatible - if (!CompatSymbols(fst1.InputSymbols(), fst2->InputSymbols()) || - !CompatSymbols(fst1.OutputSymbols(), fst2->OutputSymbols())) { - LOG(ERROR) << "Concat: input/output symbol tables of 1st argument " - << "do not match input/output symbol tables of 2nd argument"; - // fst2->SetProperties(kError, kError); - // return; - } - - uint64 props1 = fst1.Properties(kFstProperties, false); - uint64 props2 = fst2->Properties(kFstProperties, false); - - StateId start2 = fst2->Start(); - if (start2 == kNoStateId) { - if (props1 & kError) fst2->SetProperties(kError, kError); - return; - } - - StateId numstates2 = fst2->NumStates(); - if (fst1.Properties(kExpanded, false)) - fst2->ReserveStates(numstates2 + CountStates(fst1)); - - for (StateIterator< Fst<Arc> > siter(fst1); - !siter.Done(); - siter.Next()) { - StateId s1 = siter.Value(); - StateId s2 = fst2->AddState(); - Weight final = fst1.Final(s1); - fst2->ReserveArcs(s2, fst1.NumArcs(s1) + (final != Weight::Zero() ? 1 : 0)); - if (final != Weight::Zero()) - fst2->AddArc(s2, Arc(0, 0, final, start2)); - for (ArcIterator< Fst<Arc> > aiter(fst1, s1); - !aiter.Done(); - aiter.Next()) { - Arc arc = aiter.Value(); - arc.nextstate += numstates2; - fst2->AddArc(s2, arc); - } - } - StateId start1 = fst1.Start(); - fst2->SetStart(start1 == kNoStateId ? fst2->AddState() : start1 + numstates2); - if (start1 != kNoStateId) - fst2->SetProperties(ConcatProperties(props1, props2), kFstProperties); -} - - -// Computes the concatentation of two FSTs. This version modifies its -// RationalFst input (in first position). -template<class Arc> -void Concat(RationalFst<Arc> *fst1, const Fst<Arc> &fst2) { - fst1->GetImpl()->AddConcat(fst2, true); -} - -// Computes the concatentation of two FSTs. This version modifies its -// RationalFst input (in second position). -template<class Arc> -void Concat(const Fst<Arc> &fst1, RationalFst<Arc> *fst2) { - fst2->GetImpl()->AddConcat(fst1, false); -} - -typedef RationalFstOptions ConcatFstOptions; - - -// Computes the concatenation (product) of two FSTs; this version is a -// delayed Fst. If FST1 transduces string x to y with weight a and FST2 -// transduces string w to v with weight b, then their concatenation -// transduces string xw to yv with Times(a, b). -// -// Complexity: -// - Time: O(v1 + e1 + v2 + e2), -// - Space: O(v1 + v2) -// where vi = # of states visited and ei = # of arcs visited of the -// ith FST. Constant time and space to visit an input state or arc is -// assumed and exclusive of caching. -template <class A> -class ConcatFst : public RationalFst<A> { - public: - using ImplToFst< RationalFstImpl<A> >::GetImpl; - - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - ConcatFst(const Fst<A> &fst1, const Fst<A> &fst2) { - GetImpl()->InitConcat(fst1, fst2); - } - - ConcatFst(const Fst<A> &fst1, const Fst<A> &fst2, - const ConcatFstOptions &opts) : RationalFst<A>(opts) { - GetImpl()->InitConcat(fst1, fst2); - } - - // See Fst<>::Copy() for doc. - ConcatFst(const ConcatFst<A> &fst, bool safe = false) - : RationalFst<A>(fst, safe) {} - - // Get a copy of this ConcatFst. See Fst<>::Copy() for further doc. - virtual ConcatFst<A> *Copy(bool safe = false) const { - return new ConcatFst<A>(*this, safe); - } -}; - - -// Specialization for ConcatFst. -template <class A> -class StateIterator< ConcatFst<A> > : public StateIterator< RationalFst<A> > { - public: - explicit StateIterator(const ConcatFst<A> &fst) - : StateIterator< RationalFst<A> >(fst) {} -}; - - -// Specialization for ConcatFst. -template <class A> -class ArcIterator< ConcatFst<A> > : public ArcIterator< RationalFst<A> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const ConcatFst<A> &fst, StateId s) - : ArcIterator< RationalFst<A> >(fst, s) {} -}; - - -// Useful alias when using StdArc. -typedef ConcatFst<StdArc> StdConcatFst; - -} // namespace fst - -#endif // FST_LIB_CONCAT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/config.h b/kaldi_io/src/tools/openfst/include/fst/config.h deleted file mode 100644 index 046b49c..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/config.h +++ /dev/null @@ -1,12 +0,0 @@ -/* src/include/fst/config.h. Generated from config.h.in by configure. */ -// OpenFst config file - -/* Define to 1 if you have the ICU library. */ -/* #undef HAVE_ICU */ - -/* Define to 1 if the system has the type `std::tr1::hash<long long - unsigned>'. */ -#define HAVE_STD__TR1__HASH_LONG_LONG_UNSIGNED_ 1 - -/* Define to 1 if the system has the type `__gnu_cxx::slist<int>'. */ -#define HAVE___GNU_CXX__SLIST_INT_ 1 diff --git a/kaldi_io/src/tools/openfst/include/fst/connect.h b/kaldi_io/src/tools/openfst/include/fst/connect.h deleted file mode 100644 index 427808c..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/connect.h +++ /dev/null @@ -1,319 +0,0 @@ -// connect.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Classes and functions to remove unsuccessful paths from an Fst. - -#ifndef FST_LIB_CONNECT_H__ -#define FST_LIB_CONNECT_H__ - -#include <vector> -using std::vector; - -#include <fst/dfs-visit.h> -#include <fst/union-find.h> -#include <fst/mutable-fst.h> - - -namespace fst { - -// Finds and returns connected components. Use with Visit(). -template <class A> -class CcVisitor { - public: - typedef A Arc; - typedef typename Arc::Weight Weight; - typedef typename A::StateId StateId; - - // cc[i]: connected component number for state i. - CcVisitor(vector<StateId> *cc) - : comps_(new UnionFind<StateId>(0, kNoStateId)), - cc_(cc), - nstates_(0) { } - - // comps: connected components equiv classes. - CcVisitor(UnionFind<StateId> *comps) - : comps_(comps), - cc_(0), - nstates_(0) { } - - ~CcVisitor() { - if (cc_) // own comps_? - delete comps_; - } - - void InitVisit(const Fst<A> &fst) { } - - bool InitState(StateId s, StateId root) { - ++nstates_; - if (comps_->FindSet(s) == kNoStateId) - comps_->MakeSet(s); - return true; - } - - bool WhiteArc(StateId s, const A &arc) { - comps_->MakeSet(arc.nextstate); - comps_->Union(s, arc.nextstate); - return true; - } - - bool GreyArc(StateId s, const A &arc) { - comps_->Union(s, arc.nextstate); - return true; - } - - bool BlackArc(StateId s, const A &arc) { - comps_->Union(s, arc.nextstate); - return true; - } - - void FinishState(StateId s) { } - - void FinishVisit() { - if (cc_) - GetCcVector(cc_); - } - - // cc[i]: connected component number for state i. - // Returns number of components. - int GetCcVector(vector<StateId> *cc) { - cc->clear(); - cc->resize(nstates_, kNoStateId); - StateId ncomp = 0; - for (StateId i = 0; i < nstates_; ++i) { - StateId rep = comps_->FindSet(i); - StateId &comp = (*cc)[rep]; - if (comp == kNoStateId) { - comp = ncomp; - ++ncomp; - } - (*cc)[i] = comp; - } - return ncomp; - } - - private: - UnionFind<StateId> *comps_; // Components - vector<StateId> *cc_; // State's cc number - StateId nstates_; // State count -}; - - -// Finds and returns strongly-connected components, accessible and -// coaccessible states and related properties. Uses Tarjan's single -// DFS SCC algorithm (see Aho, et al, "Design and Analysis of Computer -// Algorithms", 189pp). Use with DfsVisit(); -template <class A> -class SccVisitor { - public: - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - // scc[i]: strongly-connected component number for state i. - // SCC numbers will be in topological order for acyclic input. - // access[i]: accessibility of state i. - // coaccess[i]: coaccessibility of state i. - // Any of above can be NULL. - // props: related property bits (cyclicity, initial cyclicity, - // accessibility, coaccessibility) set/cleared (o.w. unchanged). - SccVisitor(vector<StateId> *scc, vector<bool> *access, - vector<bool> *coaccess, uint64 *props) - : scc_(scc), access_(access), coaccess_(coaccess), props_(props) {} - SccVisitor(uint64 *props) - : scc_(0), access_(0), coaccess_(0), props_(props) {} - - void InitVisit(const Fst<A> &fst); - - bool InitState(StateId s, StateId root); - - bool TreeArc(StateId s, const A &arc) { return true; } - - bool BackArc(StateId s, const A &arc) { - StateId t = arc.nextstate; - if ((*dfnumber_)[t] < (*lowlink_)[s]) - (*lowlink_)[s] = (*dfnumber_)[t]; - if ((*coaccess_)[t]) - (*coaccess_)[s] = true; - *props_ |= kCyclic; - *props_ &= ~kAcyclic; - if (arc.nextstate == start_) { - *props_ |= kInitialCyclic; - *props_ &= ~kInitialAcyclic; - } - return true; - } - - bool ForwardOrCrossArc(StateId s, const A &arc) { - StateId t = arc.nextstate; - if ((*dfnumber_)[t] < (*dfnumber_)[s] /* cross edge */ && - (*onstack_)[t] && (*dfnumber_)[t] < (*lowlink_)[s]) - (*lowlink_)[s] = (*dfnumber_)[t]; - if ((*coaccess_)[t]) - (*coaccess_)[s] = true; - return true; - } - - void FinishState(StateId s, StateId p, const A *); - - void FinishVisit() { - // Numbers SCC's in topological order when acyclic. - if (scc_) - for (StateId i = 0; i < scc_->size(); ++i) - (*scc_)[i] = nscc_ - 1 - (*scc_)[i]; - if (coaccess_internal_) - delete coaccess_; - delete dfnumber_; - delete lowlink_; - delete onstack_; - delete scc_stack_; - } - - private: - vector<StateId> *scc_; // State's scc number - vector<bool> *access_; // State's accessibility - vector<bool> *coaccess_; // State's coaccessibility - uint64 *props_; - const Fst<A> *fst_; - StateId start_; - StateId nstates_; // State count - StateId nscc_; // SCC count - bool coaccess_internal_; - vector<StateId> *dfnumber_; // state discovery times - vector<StateId> *lowlink_; // lowlink[s] == dfnumber[s] => SCC root - vector<bool> *onstack_; // is a state on the SCC stack - vector<StateId> *scc_stack_; // SCC stack (w/ random access) -}; - -template <class A> inline -void SccVisitor<A>::InitVisit(const Fst<A> &fst) { - if (scc_) - scc_->clear(); - if (access_) - access_->clear(); - if (coaccess_) { - coaccess_->clear(); - coaccess_internal_ = false; - } else { - coaccess_ = new vector<bool>; - coaccess_internal_ = true; - } - *props_ |= kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible; - *props_ &= ~(kCyclic | kInitialCyclic | kNotAccessible | kNotCoAccessible); - fst_ = &fst; - start_ = fst.Start(); - nstates_ = 0; - nscc_ = 0; - dfnumber_ = new vector<StateId>; - lowlink_ = new vector<StateId>; - onstack_ = new vector<bool>; - scc_stack_ = new vector<StateId>; -} - -template <class A> inline -bool SccVisitor<A>::InitState(StateId s, StateId root) { - scc_stack_->push_back(s); - while (dfnumber_->size() <= s) { - if (scc_) - scc_->push_back(-1); - if (access_) - access_->push_back(false); - coaccess_->push_back(false); - dfnumber_->push_back(-1); - lowlink_->push_back(-1); - onstack_->push_back(false); - } - (*dfnumber_)[s] = nstates_; - (*lowlink_)[s] = nstates_; - (*onstack_)[s] = true; - if (root == start_) { - if (access_) - (*access_)[s] = true; - } else { - if (access_) - (*access_)[s] = false; - *props_ |= kNotAccessible; - *props_ &= ~kAccessible; - } - ++nstates_; - return true; -} - -template <class A> inline -void SccVisitor<A>::FinishState(StateId s, StateId p, const A *) { - if (fst_->Final(s) != Weight::Zero()) - (*coaccess_)[s] = true; - if ((*dfnumber_)[s] == (*lowlink_)[s]) { // root of new SCC - bool scc_coaccess = false; - size_t i = scc_stack_->size(); - StateId t; - do { - t = (*scc_stack_)[--i]; - if ((*coaccess_)[t]) - scc_coaccess = true; - } while (s != t); - do { - t = scc_stack_->back(); - if (scc_) - (*scc_)[t] = nscc_; - if (scc_coaccess) - (*coaccess_)[t] = true; - (*onstack_)[t] = false; - scc_stack_->pop_back(); - } while (s != t); - if (!scc_coaccess) { - *props_ |= kNotCoAccessible; - *props_ &= ~kCoAccessible; - } - ++nscc_; - } - if (p != kNoStateId) { - if ((*coaccess_)[s]) - (*coaccess_)[p] = true; - if ((*lowlink_)[s] < (*lowlink_)[p]) - (*lowlink_)[p] = (*lowlink_)[s]; - } -} - - -// Trims an FST, removing states and arcs that are not on successful -// paths. This version modifies its input. -// -// Complexity: -// - Time: O(V + E) -// - Space: O(V + E) -// where V = # of states and E = # of arcs. -template<class Arc> -void Connect(MutableFst<Arc> *fst) { - typedef typename Arc::StateId StateId; - - vector<bool> access; - vector<bool> coaccess; - uint64 props = 0; - SccVisitor<Arc> scc_visitor(0, &access, &coaccess, &props); - DfsVisit(*fst, &scc_visitor); - vector<StateId> dstates; - for (StateId s = 0; s < access.size(); ++s) - if (!access[s] || !coaccess[s]) - dstates.push_back(s); - fst->DeleteStates(dstates); - fst->SetProperties(kAccessible | kCoAccessible, kAccessible | kCoAccessible); -} - -} // namespace fst - -#endif // FST_LIB_CONNECT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/const-fst.h b/kaldi_io/src/tools/openfst/include/fst/const-fst.h deleted file mode 100644 index e6e85af..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/const-fst.h +++ /dev/null @@ -1,497 +0,0 @@ -// const-fst.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Simple concrete immutable FST whose states and arcs are each stored -// in single arrays. - -#ifndef FST_LIB_CONST_FST_H__ -#define FST_LIB_CONST_FST_H__ - -#include <string> -#include <vector> -using std::vector; - -#include <fst/expanded-fst.h> -#include <fst/fst-decl.h> // For optional argument declarations -#include <fst/mapped-file.h> -#include <fst/test-properties.h> -#include <fst/util.h> - - -namespace fst { - -template <class A, class U> class ConstFst; -template <class F, class G> void Cast(const F &, G *); - -// States and arcs each implemented by single arrays, templated on the -// Arc definition. The unsigned type U is used to represent indices into -// the arc array. -template <class A, class U> -class ConstFstImpl : public FstImpl<A> { - public: - using FstImpl<A>::SetInputSymbols; - using FstImpl<A>::SetOutputSymbols; - using FstImpl<A>::SetType; - using FstImpl<A>::SetProperties; - using FstImpl<A>::Properties; - - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef U Unsigned; - - ConstFstImpl() - : states_region_(0), arcs_region_(0), states_(0), arcs_(0), nstates_(0), - narcs_(0), start_(kNoStateId) { - string type = "const"; - if (sizeof(U) != sizeof(uint32)) { - string size; - Int64ToStr(8 * sizeof(U), &size); - type += size; - } - SetType(type); - SetProperties(kNullProperties | kStaticProperties); - } - - explicit ConstFstImpl(const Fst<A> &fst); - - ~ConstFstImpl() { - delete arcs_region_; - delete states_region_; - } - - StateId Start() const { return start_; } - - Weight Final(StateId s) const { return states_[s].final; } - - StateId NumStates() const { return nstates_; } - - size_t NumArcs(StateId s) const { return states_[s].narcs; } - - size_t NumInputEpsilons(StateId s) const { return states_[s].niepsilons; } - - size_t NumOutputEpsilons(StateId s) const { return states_[s].noepsilons; } - - static ConstFstImpl<A, U> *Read(istream &strm, const FstReadOptions &opts); - - A *Arcs(StateId s) { return arcs_ + states_[s].pos; } - - // Provide information needed for generic state iterator - void InitStateIterator(StateIteratorData<A> *data) const { - data->base = 0; - data->nstates = nstates_; - } - - // Provide information needed for the generic arc iterator - void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { - data->base = 0; - data->arcs = arcs_ + states_[s].pos; - data->narcs = states_[s].narcs; - data->ref_count = 0; - } - - private: - friend class ConstFst<A, U>; // Allow finding narcs_, nstates_ during Write - - // States implemented by array *states_ below, arcs by (single) *arcs_. - struct State { - Weight final; // Final weight - Unsigned pos; // Start of state's arcs in *arcs_ - Unsigned narcs; // Number of arcs (per state) - Unsigned niepsilons; // # of input epsilons - Unsigned noepsilons; // # of output epsilons - State() : final(Weight::Zero()), niepsilons(0), noepsilons(0) {} - }; - - // Properties always true of this Fst class - static const uint64 kStaticProperties = kExpanded; - // Current unaligned file format version. The unaligned version was added and - // made the default since the aligned version does not work on pipes. - static const int kFileVersion = 2; - // Current aligned file format version - static const int kAlignedFileVersion = 1; - // Minimum file format version supported - static const int kMinFileVersion = 1; - - MappedFile *states_region_; // Mapped file for states - MappedFile *arcs_region_; // Mapped file for arcs - State *states_; // States represenation - A *arcs_; // Arcs representation - StateId nstates_; // Number of states - size_t narcs_; // Number of arcs (per FST) - StateId start_; // Initial state - - DISALLOW_COPY_AND_ASSIGN(ConstFstImpl); -}; - -template <class A, class U> -const uint64 ConstFstImpl<A, U>::kStaticProperties; -template <class A, class U> -const int ConstFstImpl<A, U>::kFileVersion; -template <class A, class U> -const int ConstFstImpl<A, U>::kAlignedFileVersion; -template <class A, class U> -const int ConstFstImpl<A, U>::kMinFileVersion; - - -template<class A, class U> -ConstFstImpl<A, U>::ConstFstImpl(const Fst<A> &fst) : nstates_(0), narcs_(0) { - string type = "const"; - if (sizeof(U) != sizeof(uint32)) { - string size; - Int64ToStr(sizeof(U) * 8, &size); - type += size; - } - SetType(type); - SetInputSymbols(fst.InputSymbols()); - SetOutputSymbols(fst.OutputSymbols()); - start_ = fst.Start(); - - // Count # of states and arcs. - for (StateIterator< Fst<A> > siter(fst); - !siter.Done(); - siter.Next()) { - ++nstates_; - StateId s = siter.Value(); - for (ArcIterator< Fst<A> > aiter(fst, s); - !aiter.Done(); - aiter.Next()) - ++narcs_; - } - states_region_ = MappedFile::Allocate(nstates_ * sizeof(*states_)); - arcs_region_ = MappedFile::Allocate(narcs_ * sizeof(*arcs_)); - states_ = reinterpret_cast<State*>(states_region_->mutable_data()); - arcs_ = reinterpret_cast<A*>(arcs_region_->mutable_data()); - size_t pos = 0; - for (StateId s = 0; s < nstates_; ++s) { - states_[s].final = fst.Final(s); - states_[s].pos = pos; - states_[s].narcs = 0; - states_[s].niepsilons = 0; - states_[s].noepsilons = 0; - for (ArcIterator< Fst<A> > aiter(fst, s); - !aiter.Done(); - aiter.Next()) { - const A &arc = aiter.Value(); - ++states_[s].narcs; - if (arc.ilabel == 0) - ++states_[s].niepsilons; - if (arc.olabel == 0) - ++states_[s].noepsilons; - arcs_[pos++] = arc; - } - } - SetProperties(fst.Properties(kCopyProperties, true) | kStaticProperties); -} - - -template<class A, class U> -ConstFstImpl<A, U> *ConstFstImpl<A, U>::Read(istream &strm, - const FstReadOptions &opts) { - ConstFstImpl<A, U> *impl = new ConstFstImpl<A, U>; - FstHeader hdr; - if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) { - delete impl; - return 0; - } - impl->start_ = hdr.Start(); - impl->nstates_ = hdr.NumStates(); - impl->narcs_ = hdr.NumArcs(); - - // Ensures compatibility - if (hdr.Version() == kAlignedFileVersion) - hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED); - - if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { - LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source; - delete impl; - return 0; - } - - size_t b = impl->nstates_ * sizeof(typename ConstFstImpl<A, U>::State); - impl->states_region_ = MappedFile::Map(&strm, opts, b); - if (!strm || impl->states_region_ == NULL) { - LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source; - delete impl; - return 0; - } - impl->states_ = reinterpret_cast<State*>( - impl->states_region_->mutable_data()); - if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { - LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source; - delete impl; - return 0; - } - - b = impl->narcs_ * sizeof(A); - impl->arcs_region_ = MappedFile::Map(&strm, opts, b); - if (!strm || impl->arcs_region_ == NULL) { - LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source; - delete impl; - return 0; - } - impl->arcs_ = reinterpret_cast<A*>(impl->arcs_region_->mutable_data()); - return impl; -} - -// Simple concrete immutable FST. This class attaches interface to -// implementation and handles reference counting, delegating most -// methods to ImplToExpandedFst. The unsigned type U is used to -// represent indices into the arc array (uint32 by default, declared -// in fst-decl.h). -template <class A, class U> -class ConstFst : public ImplToExpandedFst< ConstFstImpl<A, U> > { - public: - friend class StateIterator< ConstFst<A, U> >; - friend class ArcIterator< ConstFst<A, U> >; - template <class F, class G> void friend Cast(const F &, G *); - - typedef A Arc; - typedef typename A::StateId StateId; - typedef ConstFstImpl<A, U> Impl; - typedef U Unsigned; - - ConstFst() : ImplToExpandedFst<Impl>(new Impl()) {} - - explicit ConstFst(const Fst<A> &fst) - : ImplToExpandedFst<Impl>(new Impl(fst)) {} - - ConstFst(const ConstFst<A, U> &fst) : ImplToExpandedFst<Impl>(fst) {} - - // Get a copy of this ConstFst. See Fst<>::Copy() for further doc. - virtual ConstFst<A, U> *Copy(bool safe = false) const { - return new ConstFst<A, U>(*this); - } - - // Read a ConstFst from an input stream; return NULL on error - static ConstFst<A, U> *Read(istream &strm, const FstReadOptions &opts) { - Impl* impl = Impl::Read(strm, opts); - return impl ? new ConstFst<A, U>(impl) : 0; - } - - // Read a ConstFst from a file; return NULL on error - // Empty filename reads from standard input - static ConstFst<A, U> *Read(const string &filename) { - Impl* impl = ImplToExpandedFst<Impl>::Read(filename); - return impl ? new ConstFst<A, U>(impl) : 0; - } - - virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { - return WriteFst(*this, strm, opts); - } - - virtual bool Write(const string &filename) const { - return Fst<A>::WriteFile(filename); - } - - template <class F> - static bool WriteFst(const F &fst, ostream &strm, - const FstWriteOptions &opts); - - virtual void InitStateIterator(StateIteratorData<Arc> *data) const { - GetImpl()->InitStateIterator(data); - } - - virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { - GetImpl()->InitArcIterator(s, data); - } - - private: - explicit ConstFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {} - - // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); } - - void SetImpl(Impl *impl, bool own_impl = true) { - ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl); - } - - // Use overloading to extract the type of the argument. - static Impl* GetImplIfConstFst(const ConstFst &const_fst) { - return const_fst.GetImpl(); - } - - // Note that this does not give privileged treatment to subtypes of ConstFst. - template<typename NonConstFst> - static Impl* GetImplIfConstFst(const NonConstFst& fst) { - return NULL; - } - - void operator=(const ConstFst<A, U> &fst); // disallow -}; - -// Writes Fst in Const format, potentially with a pass over the machine -// before writing to compute number of states and arcs. -// -template <class A, class U> -template <class F> -bool ConstFst<A, U>::WriteFst(const F &fst, ostream &strm, - const FstWriteOptions &opts) { - int file_version = opts.align ? ConstFstImpl<A, U>::kAlignedFileVersion : - ConstFstImpl<A, U>::kFileVersion; - size_t num_arcs = -1, num_states = -1; - size_t start_offset = 0; - bool update_header = true; - if (Impl* impl = GetImplIfConstFst(fst)) { - num_arcs = impl->narcs_; - num_states = impl->nstates_; - update_header = false; - } else if ((start_offset = strm.tellp()) == -1) { - // precompute values needed for header when we cannot seek to rewrite it. - num_arcs = 0; - num_states = 0; - for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { - num_arcs += fst.NumArcs(siter.Value()); - ++num_states; - } - update_header = false; - } - FstHeader hdr; - hdr.SetStart(fst.Start()); - hdr.SetNumStates(num_states); - hdr.SetNumArcs(num_arcs); - string type = "const"; - if (sizeof(U) != sizeof(uint32)) { - string size; - Int64ToStr(8 * sizeof(U), &size); - type += size; - } - uint64 properties = fst.Properties(kCopyProperties, true) | - ConstFstImpl<A, U>::kStaticProperties; - FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, properties, - &hdr); - if (opts.align && !AlignOutput(strm)) { - LOG(ERROR) << "Could not align file during write after header"; - return false; - } - size_t pos = 0, states = 0; - typename ConstFstImpl<A, U>::State state; - for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { - state.final = fst.Final(siter.Value()); - state.pos = pos; - state.narcs = fst.NumArcs(siter.Value()); - state.niepsilons = fst.NumInputEpsilons(siter.Value()); - state.noepsilons = fst.NumOutputEpsilons(siter.Value()); - strm.write(reinterpret_cast<const char *>(&state), sizeof(state)); - pos += state.narcs; - ++states; - } - hdr.SetNumStates(states); - hdr.SetNumArcs(pos); - if (opts.align && !AlignOutput(strm)) { - LOG(ERROR) << "Could not align file during write after writing states"; - } - for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { - StateId s = siter.Value(); - for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) { - const A &arc = aiter.Value(); - strm.write(reinterpret_cast<const char *>(&arc), sizeof(arc)); - } - } - strm.flush(); - if (!strm) { - LOG(ERROR) << "ConstFst Write write failed: " << opts.source; - return false; - } - if (update_header) { - return FstImpl<A>::UpdateFstHeader(fst, strm, opts, file_version, type, - properties, &hdr, start_offset); - } else { - if (hdr.NumStates() != num_states) { - LOG(ERROR) << "Inconsistent number of states observed during write"; - return false; - } - if (hdr.NumArcs() != num_arcs) { - LOG(ERROR) << "Inconsistent number of arcs observed during write"; - return false; - } - } - return true; -} - -// Specialization for ConstFst; see generic version in fst.h -// for sample usage (but use the ConstFst type!). This version -// should inline. -template <class A, class U> -class StateIterator< ConstFst<A, U> > { - public: - typedef typename A::StateId StateId; - - explicit StateIterator(const ConstFst<A, U> &fst) - : nstates_(fst.GetImpl()->NumStates()), s_(0) {} - - bool Done() const { return s_ >= nstates_; } - - StateId Value() const { return s_; } - - void Next() { ++s_; } - - void Reset() { s_ = 0; } - - private: - StateId nstates_; - StateId s_; - - DISALLOW_COPY_AND_ASSIGN(StateIterator); -}; - - -// Specialization for ConstFst; see generic version in fst.h -// for sample usage (but use the ConstFst type!). This version -// should inline. -template <class A, class U> -class ArcIterator< ConstFst<A, U> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const ConstFst<A, U> &fst, StateId s) - : arcs_(fst.GetImpl()->Arcs(s)), - narcs_(fst.GetImpl()->NumArcs(s)), i_(0) {} - - bool Done() const { return i_ >= narcs_; } - - const A& Value() const { return arcs_[i_]; } - - void Next() { ++i_; } - - size_t Position() const { return i_; } - - void Reset() { i_ = 0; } - - void Seek(size_t a) { i_ = a; } - - uint32 Flags() const { - return kArcValueFlags; - } - - void SetFlags(uint32 f, uint32 m) {} - - private: - const A *arcs_; - size_t narcs_; - size_t i_; - - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - -// A useful alias when using StdArc. -typedef ConstFst<StdArc> StdConstFst; - -} // namespace fst - -#endif // FST_LIB_CONST_FST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/determinize.h b/kaldi_io/src/tools/openfst/include/fst/determinize.h deleted file mode 100644 index 9ff8723..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/determinize.h +++ /dev/null @@ -1,1015 +0,0 @@ -// determinize.h - - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Functions and classes to determinize an FST. - -#ifndef FST_LIB_DETERMINIZE_H__ -#define FST_LIB_DETERMINIZE_H__ - -#include <algorithm> -#include <climits> -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <map> -#include <fst/slist.h> -#include <string> -#include <vector> -using std::vector; - -#include <fst/arc-map.h> -#include <fst/cache.h> -#include <fst/bi-table.h> -#include <fst/factor-weight.h> -#include <fst/prune.h> -#include <fst/test-properties.h> - - -namespace fst { - -// -// COMMON DIVISORS - these are used in determinization to compute -// the transition weights. In the simplest case, it is just the same -// as the semiring Plus(). However, other choices permit more efficient -// determinization when the output contains strings. -// - -// The default common divisor uses the semiring Plus. -template <class W> -class DefaultCommonDivisor { - public: - typedef W Weight; - - W operator()(const W &w1, const W &w2) const { return Plus(w1, w2); } -}; - - -// The label common divisor for a (left) string semiring selects a -// single letter common prefix or the empty string. This is used in -// the determinization of output strings so that at most a single -// letter will appear in the output of a transtion. -template <typename L, StringType S> -class LabelCommonDivisor { - public: - typedef StringWeight<L, S> Weight; - - Weight operator()(const Weight &w1, const Weight &w2) const { - StringWeightIterator<L, S> iter1(w1); - StringWeightIterator<L, S> iter2(w2); - - if (!(StringWeight<L, S>::Properties() & kLeftSemiring)) { - FSTERROR() << "LabelCommonDivisor: Weight needs to be left semiring"; - return Weight::NoWeight(); - } else if (w1.Size() == 0 || w2.Size() == 0) { - return Weight::One(); - } else if (w1 == Weight::Zero()) { - return Weight(iter2.Value()); - } else if (w2 == Weight::Zero()) { - return Weight(iter1.Value()); - } else if (iter1.Value() == iter2.Value()) { - return Weight(iter1.Value()); - } else { - return Weight::One(); - } - } -}; - - -// The gallic common divisor uses the label common divisor on the -// string component and the template argument D common divisor on the -// weight component, which defaults to the default common divisor. -template <class L, class W, StringType S, class D = DefaultCommonDivisor<W> > -class GallicCommonDivisor { - public: - typedef GallicWeight<L, W, S> Weight; - - Weight operator()(const Weight &w1, const Weight &w2) const { - return Weight(label_common_divisor_(w1.Value1(), w2.Value1()), - weight_common_divisor_(w1.Value2(), w2.Value2())); - } - - private: - LabelCommonDivisor<L, S> label_common_divisor_; - D weight_common_divisor_; -}; - - -// Represents an element in a subset -template <class A> -struct DeterminizeElement { - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - DeterminizeElement() {} - - DeterminizeElement(StateId s, Weight w) : state_id(s), weight(w) {} - - bool operator==(const DeterminizeElement<A> & element) const { - return state_id == element.state_id && weight == element.weight; - } - - bool operator<(const DeterminizeElement<A> & element) const { - return state_id < element.state_id || - (state_id == element.state_id && weight == element.weight); - } - - StateId state_id; // Input state Id - Weight weight; // Residual weight -}; - - -// -// DETERMINIZE FILTERS - these can be used in determinization to compute -// transformations on the subsets prior to their being added as destination -// states. The filter operates on a map between a label and the -// corresponding destination subsets. The possibly modified map is -// then used to construct the destination states for arcs exiting state 's'. -// It must define the ordered map type LabelMap and have a default -// and copy constructor. - -// A determinize filter that does not modify its input. -template <class Arc> -struct IdentityDeterminizeFilter { - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef slist< DeterminizeElement<Arc> > Subset; - typedef map<Label, Subset*> LabelMap; - - static uint64 Properties(uint64 props) { return props; } - - void operator()(StateId s, LabelMap *label_map) {} -}; - - -// -// DETERMINIZATION STATE TABLES -// -// The determiziation state table has the form: -// -// template <class Arc> -// class DeterminizeStateTable { -// public: -// typedef typename Arc::StateId StateId; -// typedef DeterminizeElement<Arc> Element; -// typedef slist<Element> Subset; -// -// // Required constuctor -// DeterminizeStateTable(); -// -// // Required copy constructor that does not copy state -// DeterminizeStateTable(const DeterminizeStateTable<A,P> &table); -// -// // Lookup state ID by subset (not depending of the element order). -// // If it doesn't exist, then add it. FindState takes -// // ownership of the subset argument (so that it doesn't have to -// // copy it if it creates a new state). -// StateId FindState(Subset *subset); -// -// // Lookup subset by ID. -// const Subset *FindSubset(StateId id) const; -// }; -// - -// The default determinization state table based on the -// compact hash bi-table. -template <class Arc> -class DefaultDeterminizeStateTable { - public: - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - typedef DeterminizeElement<Arc> Element; - typedef slist<Element> Subset; - - explicit DefaultDeterminizeStateTable(size_t table_size = 0) - : table_size_(table_size), - subsets_(table_size_, new SubsetKey(), new SubsetEqual(&elements_)) { } - - DefaultDeterminizeStateTable(const DefaultDeterminizeStateTable<Arc> &table) - : table_size_(table.table_size_), - subsets_(table_size_, new SubsetKey(), new SubsetEqual(&elements_)) { } - - ~DefaultDeterminizeStateTable() { - for (StateId s = 0; s < subsets_.Size(); ++s) - delete subsets_.FindEntry(s); - } - - // Finds the state corresponding to a subset. Only creates a new - // state if the subset is not found. FindState takes ownership of - // the subset argument (so that it doesn't have to copy it if it - // creates a new state). - StateId FindState(Subset *subset) { - StateId ns = subsets_.Size(); - StateId s = subsets_.FindId(subset); - if (s != ns) delete subset; // subset found - return s; - } - - const Subset* FindSubset(StateId s) { return subsets_.FindEntry(s); } - - private: - // Comparison object for hashing Subset(s). Subsets are not sorted in this - // implementation, so ordering must not be assumed in the equivalence - // test. - class SubsetEqual { - public: - SubsetEqual() { // needed for compilation but should never be called - FSTERROR() << "SubsetEqual: default constructor not implemented"; - } - - // Constructor takes vector needed to check equality. See immediately - // below for constraints on it. - explicit SubsetEqual(vector<Element *> *elements) - : elements_(elements) {} - - // At each call to operator(), the elements_ vector should contain - // only NULLs. When this operator returns, elements_ will still - // have this property. - bool operator()(Subset* subset1, Subset* subset2) const { - if (!subset1 && !subset2) - return true; - if ((subset1 && !subset2) || (!subset1 && subset2)) - return false; - - if (subset1->size() != subset2->size()) - return false; - - // Loads first subset elements in element vector. - for (typename Subset::iterator iter1 = subset1->begin(); - iter1 != subset1->end(); - ++iter1) { - Element &element1 = *iter1; - while (elements_->size() <= element1.state_id) - elements_->push_back(0); - (*elements_)[element1.state_id] = &element1; - } - - // Checks second subset matches first via element vector. - for (typename Subset::iterator iter2 = subset2->begin(); - iter2 != subset2->end(); - ++iter2) { - Element &element2 = *iter2; - while (elements_->size() <= element2.state_id) - elements_->push_back(0); - Element *element1 = (*elements_)[element2.state_id]; - if (!element1 || element1->weight != element2.weight) { - // Mismatch found. Resets element vector before returning false. - for (typename Subset::iterator iter1 = subset1->begin(); - iter1 != subset1->end(); - ++iter1) - (*elements_)[iter1->state_id] = 0; - return false; - } else { - (*elements_)[element2.state_id] = 0; // Clears entry - } - } - return true; - } - private: - vector<Element *> *elements_; - }; - - // Hash function for Subset to Fst states. Subset elements are not - // sorted in this implementation, so the hash must be invariant - // under subset reordering. - class SubsetKey { - public: - size_t operator()(const Subset* subset) const { - size_t hash = 0; - if (subset) { - for (typename Subset::const_iterator iter = subset->begin(); - iter != subset->end(); - ++iter) { - const Element &element = *iter; - int lshift = element.state_id % (CHAR_BIT * sizeof(size_t) - 1) + 1; - int rshift = CHAR_BIT * sizeof(size_t) - lshift; - size_t n = element.state_id; - hash ^= n << lshift ^ n >> rshift ^ element.weight.Hash(); - } - } - return hash; - } - }; - - size_t table_size_; - - typedef CompactHashBiTable<StateId, Subset *, - SubsetKey, SubsetEqual, HS_STL> SubsetTable; - - SubsetTable subsets_; - vector<Element *> elements_; - - void operator=(const DefaultDeterminizeStateTable<Arc> &); // disallow -}; - -// Options for finite-state transducer determinization templated on -// the arc type, common divisor, the determinization filter and the -// state table. DeterminizeFst takes ownership of the determinization -// filter and state table if provided. -template <class Arc, - class D = DefaultCommonDivisor<typename Arc::Weight>, - class F = IdentityDeterminizeFilter<Arc>, - class T = DefaultDeterminizeStateTable<Arc> > -struct DeterminizeFstOptions : CacheOptions { - typedef typename Arc::Label Label; - float delta; // Quantization delta for subset weights - Label subsequential_label; // Label used for residual final output - // when producing subsequential transducers. - F *filter; // Determinization filter - T *state_table; // Determinization state table - - explicit DeterminizeFstOptions(const CacheOptions &opts, - float del = kDelta, Label lab = 0, - F *filt = 0, - T *table = 0) - : CacheOptions(opts), delta(del), subsequential_label(lab), - filter(filt), state_table(table) {} - - explicit DeterminizeFstOptions(float del = kDelta, Label lab = 0, - F *filt = 0, T *table = 0) - : delta(del), subsequential_label(lab), filter(filt), - state_table(table) {} -}; - -// Implementation of delayed DeterminizeFst. This base class is -// common to the variants that implement acceptor and transducer -// determinization. -template <class A> -class DeterminizeFstImplBase : public CacheImpl<A> { - public: - using FstImpl<A>::SetType; - using FstImpl<A>::SetProperties; - using FstImpl<A>::Properties; - using FstImpl<A>::SetInputSymbols; - using FstImpl<A>::SetOutputSymbols; - - using CacheBaseImpl< CacheState<A> >::HasStart; - using CacheBaseImpl< CacheState<A> >::HasFinal; - using CacheBaseImpl< CacheState<A> >::HasArcs; - using CacheBaseImpl< CacheState<A> >::SetFinal; - using CacheBaseImpl< CacheState<A> >::SetStart; - - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef CacheState<A> State; - - template <class D, class F, class T> - DeterminizeFstImplBase(const Fst<A> &fst, - const DeterminizeFstOptions<A, D, F, T> &opts) - : CacheImpl<A>(opts), fst_(fst.Copy()) { - SetType("determinize"); - uint64 iprops = fst.Properties(kFstProperties, false); - uint64 dprops = DeterminizeProperties(iprops, - opts.subsequential_label != 0); - SetProperties(F::Properties(dprops), kCopyProperties); - SetInputSymbols(fst.InputSymbols()); - SetOutputSymbols(fst.OutputSymbols()); - } - - DeterminizeFstImplBase(const DeterminizeFstImplBase<A> &impl) - : CacheImpl<A>(impl), - fst_(impl.fst_->Copy(true)) { - SetType("determinize"); - SetProperties(impl.Properties(), kCopyProperties); - SetInputSymbols(impl.InputSymbols()); - SetOutputSymbols(impl.OutputSymbols()); - } - - virtual ~DeterminizeFstImplBase() { delete fst_; } - - virtual DeterminizeFstImplBase<A> *Copy() = 0; - - StateId Start() { - if (!HasStart()) { - StateId start = ComputeStart(); - if (start != kNoStateId) { - SetStart(start); - } - } - return CacheImpl<A>::Start(); - } - - Weight Final(StateId s) { - if (!HasFinal(s)) { - Weight final = ComputeFinal(s); - SetFinal(s, final); - } - return CacheImpl<A>::Final(s); - } - - virtual void Expand(StateId s) = 0; - - size_t NumArcs(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumArcs(s); - } - - size_t NumInputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumInputEpsilons(s); - } - - size_t NumOutputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumOutputEpsilons(s); - } - - void InitArcIterator(StateId s, ArcIteratorData<A> *data) { - if (!HasArcs(s)) - Expand(s); - CacheImpl<A>::InitArcIterator(s, data); - } - - virtual StateId ComputeStart() = 0; - - virtual Weight ComputeFinal(StateId s) = 0; - - const Fst<A> &GetFst() const { return *fst_; } - - private: - const Fst<A> *fst_; // Input Fst - - void operator=(const DeterminizeFstImplBase<A> &); // disallow -}; - - -// Implementation of delayed determinization for weighted acceptors. -// It is templated on the arc type A and the common divisor D. -template <class A, class D, class F, class T> -class DeterminizeFsaImpl : public DeterminizeFstImplBase<A> { - public: - using FstImpl<A>::SetProperties; - using DeterminizeFstImplBase<A>::GetFst; - using DeterminizeFstImplBase<A>::SetArcs; - - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef DeterminizeElement<A> Element; - typedef slist<Element> Subset; - typedef typename F::LabelMap LabelMap; - - DeterminizeFsaImpl(const Fst<A> &fst, - const vector<Weight> *in_dist, vector<Weight> *out_dist, - const DeterminizeFstOptions<A, D, F, T> &opts) - : DeterminizeFstImplBase<A>(fst, opts), - delta_(opts.delta), - in_dist_(in_dist), - out_dist_(out_dist), - filter_(opts.filter ? opts.filter : new F()), - state_table_(opts.state_table ? opts.state_table : new T()) { - if (!fst.Properties(kAcceptor, true)) { - FSTERROR() << "DeterminizeFst: argument not an acceptor"; - SetProperties(kError, kError); - } - if (!(Weight::Properties() & kLeftSemiring)) { - FSTERROR() << "DeterminizeFst: Weight needs to be left distributive: " - << Weight::Type(); - SetProperties(kError, kError); - } - if (out_dist_) - out_dist_->clear(); - } - - DeterminizeFsaImpl(const DeterminizeFsaImpl<A, D, F, T> &impl) - : DeterminizeFstImplBase<A>(impl), - delta_(impl.delta_), - in_dist_(0), - out_dist_(0), - filter_(new F(*impl.filter_)), - state_table_(new T(*impl.state_table_)) { - if (impl.out_dist_) { - FSTERROR() << "DeterminizeFsaImpl: cannot copy with out_dist vector"; - SetProperties(kError, kError); - } - } - - virtual ~DeterminizeFsaImpl() { - delete filter_; - delete state_table_; - } - - virtual DeterminizeFsaImpl<A, D, F, T> *Copy() { - return new DeterminizeFsaImpl<A, D, F, T>(*this); - } - - uint64 Properties() const { return Properties(kFstProperties); } - - // Set error if found; return FST impl properties. - uint64 Properties(uint64 mask) const { - if ((mask & kError) && (GetFst().Properties(kError, false))) - SetProperties(kError, kError); - return FstImpl<A>::Properties(mask); - } - - virtual StateId ComputeStart() { - StateId s = GetFst().Start(); - if (s == kNoStateId) - return kNoStateId; - Element element(s, Weight::One()); - Subset *subset = new Subset; - subset->push_front(element); - return FindState(subset); - } - - virtual Weight ComputeFinal(StateId s) { - const Subset *subset = state_table_->FindSubset(s); - Weight final = Weight::Zero(); - for (typename Subset::const_iterator siter = subset->begin(); - siter != subset->end(); - ++siter) { - const Element &element = *siter; - final = Plus(final, Times(element.weight, - GetFst().Final(element.state_id))); - if (!final.Member()) - SetProperties(kError, kError); - } - return final; - } - - StateId FindState(Subset *subset) { - StateId s = state_table_->FindState(subset); - if (in_dist_ && out_dist_->size() <= s) - out_dist_->push_back(ComputeDistance(subset)); - return s; - } - - // Compute distance from a state to the final states in the DFA - // given the distances in the NFA. - Weight ComputeDistance(const Subset *subset) { - Weight outd = Weight::Zero(); - for (typename Subset::const_iterator siter = subset->begin(); - siter != subset->end(); ++siter) { - const Element &element = *siter; - Weight ind = element.state_id < in_dist_->size() ? - (*in_dist_)[element.state_id] : Weight::Zero(); - outd = Plus(outd, Times(element.weight, ind)); - } - return outd; - } - - // Computes the outgoing transitions from a state, creating new destination - // states as needed. - virtual void Expand(StateId s) { - - LabelMap label_map; - LabelSubsets(s, &label_map); - - for (typename LabelMap::iterator liter = label_map.begin(); - liter != label_map.end(); - ++liter) - AddArc(s, liter->first, liter->second); - SetArcs(s); - } - - private: - // Constructs destination subsets per label. At return, subset - // element weights include the input automaton label weights and the - // subsets may contain duplicate states. - void LabelSubsets(StateId s, LabelMap *label_map) { - const Subset *src_subset = state_table_->FindSubset(s); - - for (typename Subset::const_iterator siter = src_subset->begin(); - siter != src_subset->end(); - ++siter) { - const Element &src_element = *siter; - for (ArcIterator< Fst<A> > aiter(GetFst(), src_element.state_id); - !aiter.Done(); - aiter.Next()) { - const A &arc = aiter.Value(); - Element dest_element(arc.nextstate, - Times(src_element.weight, arc.weight)); - - // The LabelMap may be a e.g. multimap with more complex - // determinization filters, so we insert efficiently w/o using []. - typename LabelMap::iterator liter = label_map->lower_bound(arc.ilabel); - Subset* dest_subset; - if (liter == label_map->end() || liter->first != arc.ilabel) { - dest_subset = new Subset; - label_map->insert(liter, make_pair(arc.ilabel, dest_subset)); - } else { - dest_subset = liter->second; - } - - dest_subset->push_front(dest_element); - } - } - // Applies the determinization filter - (*filter_)(s, label_map); - } - - // Adds an arc from state S to the destination state associated - // with subset DEST_SUBSET (as created by LabelSubsets). - void AddArc(StateId s, Label label, Subset *dest_subset) { - A arc; - arc.ilabel = label; - arc.olabel = label; - arc.weight = Weight::Zero(); - - typename Subset::iterator oiter; - for (typename Subset::iterator diter = dest_subset->begin(); - diter != dest_subset->end();) { - Element &dest_element = *diter; - // Computes label weight. - arc.weight = common_divisor_(arc.weight, dest_element.weight); - - while (elements_.size() <= dest_element.state_id) - elements_.push_back(0); - Element *matching_element = elements_[dest_element.state_id]; - if (matching_element) { - // Found duplicate state: sums state weight and deletes dup. - matching_element->weight = Plus(matching_element->weight, - dest_element.weight); - if (!matching_element->weight.Member()) - SetProperties(kError, kError); - ++diter; - dest_subset->erase_after(oiter); - } else { - // Saves element so we can check for duplicate for this state. - elements_[dest_element.state_id] = &dest_element; - oiter = diter; - ++diter; - } - } - - // Divides out label weight from destination subset elements. - // Quantizes to ensure comparisons are effective. - // Clears element vector. - for (typename Subset::iterator diter = dest_subset->begin(); - diter != dest_subset->end(); - ++diter) { - Element &dest_element = *diter; - dest_element.weight = Divide(dest_element.weight, arc.weight, - DIVIDE_LEFT); - dest_element.weight = dest_element.weight.Quantize(delta_); - elements_[dest_element.state_id] = 0; - } - - arc.nextstate = FindState(dest_subset); - CacheImpl<A>::PushArc(s, arc); - } - - float delta_; // Quantization delta for subset weights - const vector<Weight> *in_dist_; // Distance to final NFA states - vector<Weight> *out_dist_; // Distance to final DFA states - - D common_divisor_; - F *filter_; - T *state_table_; - - vector<Element *> elements_; - - void operator=(const DeterminizeFsaImpl<A, D, F, T> &); // disallow -}; - - -// Implementation of delayed determinization for transducers. -// Transducer determinization is implemented by mapping the input to -// the Gallic semiring as an acceptor whose weights contain the output -// strings and using acceptor determinization above to determinize -// that acceptor. -template <class A, StringType S, class D, class F, class T> -class DeterminizeFstImpl : public DeterminizeFstImplBase<A> { - public: - using FstImpl<A>::SetProperties; - using DeterminizeFstImplBase<A>::GetFst; - using CacheBaseImpl< CacheState<A> >::GetCacheGc; - using CacheBaseImpl< CacheState<A> >::GetCacheLimit; - - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - typedef ToGallicMapper<A, S> ToMapper; - typedef FromGallicMapper<A, S> FromMapper; - - typedef typename ToMapper::ToArc ToArc; - typedef ArcMapFst<A, ToArc, ToMapper> ToFst; - typedef ArcMapFst<ToArc, A, FromMapper> FromFst; - - typedef GallicCommonDivisor<Label, Weight, S, D> CommonDivisor; - typedef GallicFactor<Label, Weight, S> FactorIterator; - - DeterminizeFstImpl(const Fst<A> &fst, - const DeterminizeFstOptions<A, D, F, T> &opts) - : DeterminizeFstImplBase<A>(fst, opts), - delta_(opts.delta), - subsequential_label_(opts.subsequential_label) { - Init(GetFst()); - } - - DeterminizeFstImpl(const DeterminizeFstImpl<A, S, D, F, T> &impl) - : DeterminizeFstImplBase<A>(impl), - delta_(impl.delta_), - subsequential_label_(impl.subsequential_label_) { - Init(GetFst()); - } - - ~DeterminizeFstImpl() { delete from_fst_; } - - virtual DeterminizeFstImpl<A, S, D, F, T> *Copy() { - return new DeterminizeFstImpl<A, S, D, F, T>(*this); - } - - uint64 Properties() const { return Properties(kFstProperties); } - - // Set error if found; return FST impl properties. - uint64 Properties(uint64 mask) const { - if ((mask & kError) && (GetFst().Properties(kError, false) || - from_fst_->Properties(kError, false))) - SetProperties(kError, kError); - return FstImpl<A>::Properties(mask); - } - - virtual StateId ComputeStart() { return from_fst_->Start(); } - - virtual Weight ComputeFinal(StateId s) { return from_fst_->Final(s); } - - virtual void Expand(StateId s) { - for (ArcIterator<FromFst> aiter(*from_fst_, s); - !aiter.Done(); - aiter.Next()) - CacheImpl<A>::PushArc(s, aiter.Value()); - CacheImpl<A>::SetArcs(s); - } - - private: - // Initialization of transducer determinization implementation, which - // is defined after DeterminizeFst since it calls it. - void Init(const Fst<A> &fst); - - float delta_; - Label subsequential_label_; - FromFst *from_fst_; - - void operator=(const DeterminizeFstImpl<A, S, D, F, T> &); // disallow -}; - - -// Determinizes a weighted transducer. This version is a delayed -// Fst. The result will be an equivalent FST that has the property -// that no state has two transitions with the same input label. -// For this algorithm, epsilon transitions are treated as regular -// symbols (cf. RmEpsilon). -// -// The transducer must be functional. The weights must be (weakly) -// left divisible (valid for TropicalWeight and LogWeight for instance) -// and be zero-sum-free if for all a,b: (Plus(a, b) = 0 => a = b = 0. -// -// Complexity: -// - Determinizable: exponential (polynomial in the size of the output) -// - Non-determinizable) does not terminate -// -// The determinizable automata include all unweighted and all acyclic input. -// -// References: -// - Mehryar Mohri, "Finite-State Transducers in Language and Speech -// Processing". Computational Linguistics, 23:2, 1997. -// -// This class attaches interface to implementation and handles -// reference counting, delegating most methods to ImplToFst. -template <class A> -class DeterminizeFst : public ImplToFst< DeterminizeFstImplBase<A> > { - public: - friend class ArcIterator< DeterminizeFst<A> >; - friend class StateIterator< DeterminizeFst<A> >; - template <class B, StringType S, class D, class F, class T> - friend class DeterminizeFstImpl; - - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef CacheState<A> State; - typedef DeterminizeFstImplBase<A> Impl; - - using ImplToFst<Impl>::SetImpl; - - explicit DeterminizeFst(const Fst<A> &fst) { - typedef DefaultCommonDivisor<Weight> D; - typedef IdentityDeterminizeFilter<A> F; - typedef DefaultDeterminizeStateTable<A> T; - DeterminizeFstOptions<A, D, F, T> opts; - if (fst.Properties(kAcceptor, true)) { - // Calls implementation for acceptors. - SetImpl(new DeterminizeFsaImpl<A, D, F, T>(fst, 0, 0, opts)); - } else { - // Calls implementation for transducers. - SetImpl(new - DeterminizeFstImpl<A, STRING_LEFT_RESTRICT, D, F, T>(fst, opts)); - } - } - - template <class D, class F, class T> - DeterminizeFst(const Fst<A> &fst, - const DeterminizeFstOptions<A, D, F, T> &opts) { - if (fst.Properties(kAcceptor, true)) { - // Calls implementation for acceptors. - SetImpl(new DeterminizeFsaImpl<A, D, F, T>(fst, 0, 0, opts)); - } else { - // Calls implementation for transducers. - SetImpl(new - DeterminizeFstImpl<A, STRING_LEFT_RESTRICT, D, F, T>(fst, opts)); - } - } - - // This acceptor-only version additionally computes the distance to - // final states in the output if provided with those distances for the - // input. Useful for e.g. unique N-shortest paths. - template <class D, class F, class T> - DeterminizeFst(const Fst<A> &fst, - const vector<Weight> *in_dist, vector<Weight> *out_dist, - const DeterminizeFstOptions<A, D, F, T> &opts) { - if (!fst.Properties(kAcceptor, true)) { - FSTERROR() << "DeterminizeFst:" - << " distance to final states computed for acceptors only"; - GetImpl()->SetProperties(kError, kError); - } - SetImpl(new DeterminizeFsaImpl<A, D, F, T>(fst, in_dist, out_dist, opts)); - } - - // See Fst<>::Copy() for doc. - DeterminizeFst(const DeterminizeFst<A> &fst, bool safe = false) { - if (safe) - SetImpl(fst.GetImpl()->Copy()); - else - SetImpl(fst.GetImpl(), false); - } - - // Get a copy of this DeterminizeFst. See Fst<>::Copy() for further doc. - virtual DeterminizeFst<A> *Copy(bool safe = false) const { - return new DeterminizeFst<A>(*this, safe); - } - - virtual inline void InitStateIterator(StateIteratorData<A> *data) const; - - virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { - GetImpl()->InitArcIterator(s, data); - } - - private: - // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } - - void operator=(const DeterminizeFst<A> &fst); // Disallow -}; - - -// Initialization of transducer determinization implementation. which -// is defined after DeterminizeFst since it calls it. -template <class A, StringType S, class D, class F, class T> -void DeterminizeFstImpl<A, S, D, F, T>::Init(const Fst<A> &fst) { - // Mapper to an acceptor. - ToFst to_fst(fst, ToMapper()); - - // Determinizes acceptor. - // This recursive call terminates since it passes the common divisor - // to a private constructor. - CacheOptions copts(GetCacheGc(), GetCacheLimit()); - DeterminizeFstOptions<ToArc, CommonDivisor> dopts(copts, delta_); - // Uses acceptor-only constructor to avoid template recursion - DeterminizeFst<ToArc> det_fsa(to_fst, 0, 0, dopts); - - // Mapper back to transducer. - FactorWeightOptions<ToArc> fopts(CacheOptions(true, 0), delta_, - kFactorFinalWeights, - subsequential_label_, - subsequential_label_); - FactorWeightFst<ToArc, FactorIterator> factored_fst(det_fsa, fopts); - from_fst_ = new FromFst(factored_fst, FromMapper(subsequential_label_)); -} - - -// Specialization for DeterminizeFst. -template <class A> -class StateIterator< DeterminizeFst<A> > - : public CacheStateIterator< DeterminizeFst<A> > { - public: - explicit StateIterator(const DeterminizeFst<A> &fst) - : CacheStateIterator< DeterminizeFst<A> >(fst, fst.GetImpl()) {} -}; - - -// Specialization for DeterminizeFst. -template <class A> -class ArcIterator< DeterminizeFst<A> > - : public CacheArcIterator< DeterminizeFst<A> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const DeterminizeFst<A> &fst, StateId s) - : CacheArcIterator< DeterminizeFst<A> >(fst.GetImpl(), s) { - if (!fst.GetImpl()->HasArcs(s)) - fst.GetImpl()->Expand(s); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - - -template <class A> inline -void DeterminizeFst<A>::InitStateIterator(StateIteratorData<A> *data) const -{ - data->base = new StateIterator< DeterminizeFst<A> >(*this); -} - - -// Useful aliases when using StdArc. -typedef DeterminizeFst<StdArc> StdDeterminizeFst; - - -template <class Arc> -struct DeterminizeOptions { - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - typedef typename Arc::Label Label; - - float delta; // Quantization delta for subset weights. - Weight weight_threshold; // Pruning weight threshold. - StateId state_threshold; // Pruning state threshold. - Label subsequential_label; // Label used for residual final output - // when producing subsequential transducers. - - explicit DeterminizeOptions(float d = kDelta, Weight w = Weight::Zero(), - StateId n = kNoStateId, Label l = 0) - : delta(d), weight_threshold(w), state_threshold(n), - subsequential_label(l) {} -}; - - -// Determinizes a weighted transducer. This version writes the -// determinized Fst to an output MutableFst. The result will be an -// equivalent FST that has the property that no state has two -// transitions with the same input label. For this algorithm, epsilon -// transitions are treated as regular symbols (cf. RmEpsilon). -// -// The transducer must be functional. The weights must be (weakly) -// left divisible (valid for TropicalWeight and LogWeight). -// -// Complexity: -// - Determinizable: exponential (polynomial in the size of the output) -// - Non-determinizable: does not terminate -// -// The determinizable automata include all unweighted and all acyclic input. -// -// References: -// - Mehryar Mohri, "Finite-State Transducers in Language and Speech -// Processing". Computational Linguistics, 23:2, 1997. -template <class Arc> -void Determinize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst, - const DeterminizeOptions<Arc> &opts - = DeterminizeOptions<Arc>()) { - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - - DeterminizeFstOptions<Arc> nopts; - nopts.delta = opts.delta; - nopts.subsequential_label = opts.subsequential_label; - - nopts.gc_limit = 0; // Cache only the last state for fastest copy. - - if (opts.weight_threshold != Weight::Zero() || - opts.state_threshold != kNoStateId) { - if (ifst.Properties(kAcceptor, false)) { - vector<Weight> idistance, odistance; - ShortestDistance(ifst, &idistance, true); - DeterminizeFst<Arc> dfst(ifst, &idistance, &odistance, nopts); - PruneOptions< Arc, AnyArcFilter<Arc> > popts(opts.weight_threshold, - opts.state_threshold, - AnyArcFilter<Arc>(), - &odistance); - Prune(dfst, ofst, popts); - } else { - *ofst = DeterminizeFst<Arc>(ifst, nopts); - Prune(ofst, opts.weight_threshold, opts.state_threshold); - } - } else { - *ofst = DeterminizeFst<Arc>(ifst, nopts); - } -} - - -} // namespace fst - -#endif // FST_LIB_DETERMINIZE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/dfs-visit.h b/kaldi_io/src/tools/openfst/include/fst/dfs-visit.h deleted file mode 100644 index 4d93a39..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/dfs-visit.h +++ /dev/null @@ -1,205 +0,0 @@ -// dfs-visit.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Depth-first search visitation. See visit.h for more general -// search queue disciplines. - -#ifndef FST_LIB_DFS_VISIT_H__ -#define FST_LIB_DFS_VISIT_H__ - -#include <stack> -#include <vector> -using std::vector; - -#include <fst/arcfilter.h> -#include <fst/fst.h> - - -namespace fst { - -// Visitor Interface - class determines actions taken during a Dfs. -// If any of the boolean member functions return false, the DFS is -// aborted by first calling FinishState() on all currently grey states -// and then calling FinishVisit(). -// -// Note this is similar to the more general visitor interface in visit.h -// except that FinishState returns additional information appropriate only for -// a DFS and some methods names here are better suited to a DFS. -// -// template <class Arc> -// class Visitor { -// public: -// typedef typename Arc::StateId StateId; -// -// Visitor(T *return_data); -// // Invoked before DFS visit -// void InitVisit(const Fst<Arc> &fst); -// // Invoked when state discovered (2nd arg is DFS tree root) -// bool InitState(StateId s, StateId root); -// // Invoked when tree arc examined (to white/undiscovered state) -// bool TreeArc(StateId s, const Arc &a); -// // Invoked when back arc examined (to grey/unfinished state) -// bool BackArc(StateId s, const Arc &a); -// // Invoked when forward or cross arc examined (to black/finished state) -// bool ForwardOrCrossArc(StateId s, const Arc &a); -// // Invoked when state finished (PARENT is kNoStateID and ARC == NULL -// // when S is tree root) -// void FinishState(StateId s, StateId parent, const Arc *parent_arc); -// // Invoked after DFS visit -// void FinishVisit(); -// }; - -// An Fst state's DFS status -const int kDfsWhite = 0; // Undiscovered -const int kDfsGrey = 1; // Discovered & unfinished -const int kDfsBlack = 2; // Finished - -// An Fst state's DFS stack state -template <class Arc> -struct DfsState { - typedef typename Arc::StateId StateId; - - DfsState(const Fst<Arc> &fst, StateId s): state_id(s), arc_iter(fst, s) {} - - StateId state_id; // Fst state ... - ArcIterator< Fst<Arc> > arc_iter; // and its corresponding arcs -}; - - -// Performs depth-first visitation. Visitor class argument determines -// actions and contains any return data. ArcFilter determines arcs -// that are considered. -// -// Note this is similar to Visit() in visit.h called with a LIFO -// queue except this version has a Visitor class specialized and -// augmented for a DFS. -template <class Arc, class V, class ArcFilter> -void DfsVisit(const Fst<Arc> &fst, V *visitor, ArcFilter filter) { - typedef typename Arc::StateId StateId; - - visitor->InitVisit(fst); - - StateId start = fst.Start(); - if (start == kNoStateId) { - visitor->FinishVisit(); - return; - } - - vector<char> state_color; // Fst state DFS status - stack<DfsState<Arc> *> state_stack; // DFS execution stack - - StateId nstates = start + 1; // # of known states in general case - bool expanded = false; - if (fst.Properties(kExpanded, false)) { // tests if expanded case, then - nstates = CountStates(fst); // uses ExpandedFst::NumStates(). - expanded = true; - } - - state_color.resize(nstates, kDfsWhite); - StateIterator< Fst<Arc> > siter(fst); - - // Continue DFS while true - bool dfs = true; - - // Iterate over trees in DFS forest. - for (StateId root = start; dfs && root < nstates;) { - state_color[root] = kDfsGrey; - state_stack.push(new DfsState<Arc>(fst, root)); - dfs = visitor->InitState(root, root); - while (!state_stack.empty()) { - DfsState<Arc> *dfs_state = state_stack.top(); - StateId s = dfs_state->state_id; - if (s >= state_color.size()) { - nstates = s + 1; - state_color.resize(nstates, kDfsWhite); - } - ArcIterator< Fst<Arc> > &aiter = dfs_state->arc_iter; - if (!dfs || aiter.Done()) { - state_color[s] = kDfsBlack; - delete dfs_state; - state_stack.pop(); - if (!state_stack.empty()) { - DfsState<Arc> *parent_state = state_stack.top(); - StateId p = parent_state->state_id; - ArcIterator< Fst<Arc> > &piter = parent_state->arc_iter; - visitor->FinishState(s, p, &piter.Value()); - piter.Next(); - } else { - visitor->FinishState(s, kNoStateId, 0); - } - continue; - } - const Arc &arc = aiter.Value(); - if (arc.nextstate >= state_color.size()) { - nstates = arc.nextstate + 1; - state_color.resize(nstates, kDfsWhite); - } - if (!filter(arc)) { - aiter.Next(); - continue; - } - int next_color = state_color[arc.nextstate]; - switch (next_color) { - default: - case kDfsWhite: - dfs = visitor->TreeArc(s, arc); - if (!dfs) break; - state_color[arc.nextstate] = kDfsGrey; - state_stack.push(new DfsState<Arc>(fst, arc.nextstate)); - dfs = visitor->InitState(arc.nextstate, root); - break; - case kDfsGrey: - dfs = visitor->BackArc(s, arc); - aiter.Next(); - break; - case kDfsBlack: - dfs = visitor->ForwardOrCrossArc(s, arc); - aiter.Next(); - break; - } - } - - // Find next tree root - for (root = root == start ? 0 : root + 1; - root < nstates && state_color[root] != kDfsWhite; - ++root) { - } - - // Check for a state beyond the largest known state - if (!expanded && root == nstates) { - for (; !siter.Done(); siter.Next()) { - if (siter.Value() == nstates) { - ++nstates; - state_color.push_back(kDfsWhite); - break; - } - } - } - } - visitor->FinishVisit(); -} - - -template <class Arc, class V> -void DfsVisit(const Fst<Arc> &fst, V *visitor) { - DfsVisit(fst, visitor, AnyArcFilter<Arc>()); -} - -} // namespace fst - -#endif // FST_LIB_DFS_VISIT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/difference.h b/kaldi_io/src/tools/openfst/include/fst/difference.h deleted file mode 100644 index 8a3306f..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/difference.h +++ /dev/null @@ -1,189 +0,0 @@ -// difference.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Class to compute the difference between two FSAs - -#ifndef FST_LIB_DIFFERENCE_H__ -#define FST_LIB_DIFFERENCE_H__ - -#include <vector> -using std::vector; -#include <algorithm> - -#include <fst/cache.h> -#include <fst/compose.h> -#include <fst/complement.h> - - -namespace fst { - -template <class A, - class M = Matcher<Fst<A> >, - class F = SequenceComposeFilter<M>, - class T = GenericComposeStateTable<A, typename F::FilterState> > -struct DifferenceFstOptions : public ComposeFstOptions<A, M, F, T> { - explicit DifferenceFstOptions(const CacheOptions &opts, - M *mat1 = 0, M *mat2 = 0, - F *filt = 0, T *sttable= 0) - : ComposeFstOptions<A, M, F, T>(mat1, mat2, filt, sttable) { } - - DifferenceFstOptions() {} -}; - -// Computes the difference between two FSAs. This version is a delayed -// Fst. Only strings that are in the first automaton but not in second -// are retained in the result. -// -// The first argument must be an acceptor; the second argument must be -// an unweighted, epsilon-free, deterministic acceptor. One of the -// arguments must be label-sorted. -// -// Complexity: same as ComposeFst. -// -// Caveats: same as ComposeFst. -template <class A> -class DifferenceFst : public ComposeFst<A> { - public: - using ImplToFst< ComposeFstImplBase<A> >::SetImpl; - using ImplToFst< ComposeFstImplBase<A> >::GetImpl; - - using ComposeFst<A>::CreateBase1; - - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - // A - B = A ^ B'. - DifferenceFst(const Fst<A> &fst1, const Fst<A> &fst2, - const CacheOptions &opts = CacheOptions()) { - typedef RhoMatcher< Matcher<Fst<A> > > R; - - ComplementFst<A> cfst(fst2); - ComposeFstOptions<A, R> copts(CacheOptions(), - new R(fst1, MATCH_NONE), - new R(cfst, MATCH_INPUT, - ComplementFst<A>::kRhoLabel)); - SetImpl(CreateBase1(fst1, cfst, copts)); - - if (!fst1.Properties(kAcceptor, true)) { - FSTERROR() << "DifferenceFst: 1st argument not an acceptor"; - GetImpl()->SetProperties(kError, kError); - } - } - - template <class M, class F, class T> - DifferenceFst(const Fst<A> &fst1, const Fst<A> &fst2, - const DifferenceFstOptions<A, M, F, T> &opts) { - typedef RhoMatcher<M> R; - - ComplementFst<A> cfst(fst2); - ComposeFstOptions<A, R> copts(opts); - copts.matcher1 = new R(fst1, MATCH_NONE, kNoLabel, MATCHER_REWRITE_ALWAYS, - opts.matcher1); - copts.matcher2 = new R(cfst, MATCH_INPUT, ComplementFst<A>::kRhoLabel, - MATCHER_REWRITE_ALWAYS, opts.matcher2); - - SetImpl(CreateBase1(fst1, cfst, copts)); - - if (!fst1.Properties(kAcceptor, true)) { - FSTERROR() << "DifferenceFst: 1st argument not an acceptor"; - GetImpl()->SetProperties(kError, kError); - } - } - - // See Fst<>::Copy() for doc. - DifferenceFst(const DifferenceFst<A> &fst, bool safe = false) - : ComposeFst<A>(fst, safe) {} - - // Get a copy of this DifferenceFst. See Fst<>::Copy() for further doc. - virtual DifferenceFst<A> *Copy(bool safe = false) const { - return new DifferenceFst<A>(*this, safe); - } -}; - - -// Specialization for DifferenceFst. -template <class A> -class StateIterator< DifferenceFst<A> > - : public StateIterator< ComposeFst<A> > { - public: - explicit StateIterator(const DifferenceFst<A> &fst) - : StateIterator< ComposeFst<A> >(fst) {} -}; - - -// Specialization for DifferenceFst. -template <class A> -class ArcIterator< DifferenceFst<A> > - : public ArcIterator< ComposeFst<A> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const DifferenceFst<A> &fst, StateId s) - : ArcIterator< ComposeFst<A> >(fst, s) {} -}; - -// Useful alias when using StdArc. -typedef DifferenceFst<StdArc> StdDifferenceFst; - - -typedef ComposeOptions DifferenceOptions; - - -// Computes the difference between two FSAs. This version is writes -// the difference to an output MutableFst. Only strings that are in -// the first automaton but not in second are retained in the result. -// -// The first argument must be an acceptor; the second argument must be -// an unweighted, epsilon-free, deterministic acceptor. One of the -// arguments must be label-sorted. -// -// Complexity: same as Compose. -// -// Caveats: same as Compose. -template<class Arc> -void Difference(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2, - MutableFst<Arc> *ofst, - const DifferenceOptions &opts = DifferenceOptions()) { - typedef Matcher< Fst<Arc> > M; - - if (opts.filter_type == AUTO_FILTER) { - CacheOptions nopts; - nopts.gc_limit = 0; // Cache only the last state for fastest copy. - *ofst = DifferenceFst<Arc>(ifst1, ifst2, nopts); - } else if (opts.filter_type == SEQUENCE_FILTER) { - DifferenceFstOptions<Arc> dopts; - dopts.gc_limit = 0; // Cache only the last state for fastest copy. - *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts); - } else if (opts.filter_type == ALT_SEQUENCE_FILTER) { - DifferenceFstOptions<Arc, M, AltSequenceComposeFilter<M> > dopts; - dopts.gc_limit = 0; // Cache only the last state for fastest copy. - *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts); - } else if (opts.filter_type == MATCH_FILTER) { - DifferenceFstOptions<Arc, M, MatchComposeFilter<M> > dopts; - dopts.gc_limit = 0; // Cache only the last state for fastest copy. - *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts); - } - - if (opts.connect) - Connect(ofst); -} - -} // namespace fst - -#endif // FST_LIB_DIFFERENCE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/edit-fst.h b/kaldi_io/src/tools/openfst/include/fst/edit-fst.h deleted file mode 100644 index bd33b9d..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/edit-fst.h +++ /dev/null @@ -1,779 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Dan Bikel) -// -// An \ref Fst implementation that allows non-destructive edit operations on an -// existing fst. - -#ifndef FST_LIB_EDIT_FST_H_ -#define FST_LIB_EDIT_FST_H_ - -#include <vector> -using std::vector; - -#include <fst/cache.h> - -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; - -namespace fst { - -// The EditFst class enables non-destructive edit operations on a wrapped -// ExpandedFst. The implementation uses copy-on-write semantics at the node -// level: if a user has an underlying fst on which he or she wants to perform a -// relatively small number of edits (read: mutations), then this implementation -// will copy the edited node to an internal MutableFst and perform any edits in -// situ on that copied node. This class supports all the methods of MutableFst -// except for DeleteStates(const vector<StateId> &); thus, new nodes may also be -// added, and one may add transitions from existing nodes of the wrapped fst to -// new nodes. -// -// N.B.: The documentation for Fst::Copy(true) says that its behavior is -// undefined if invoked on an fst that has already been accessed. This class -// requires that the Fst implementation it wraps provides consistent, reliable -// behavior when its Copy(true) method is invoked, where consistent means -// the graph structure, graph properties and state numbering and do not change. -// VectorFst and CompactFst, for example, are both well-behaved in this regard. - -// The EditFstData class is a container for all mutable data for EditFstImpl; -// also, this class provides most of the actual implementation of what EditFst -// does (that is, most of EditFstImpl's methods delegate to methods in this, the -// EditFstData class). Instances of this class are reference-counted and can be -// shared between otherwise independent EditFstImpl instances. This scheme -// allows EditFstImpl to implement the thread-safe, copy-on-write semantics -// required by Fst::Copy(true). -// -// template parameters: -// A the type of arc to use -// WrappedFstT the type of fst wrapped by the EditFst instance that -// this EditFstData instance is backing -// MutableFstT the type of mutable fst to use internally for edited states; -// crucially, MutableFstT::Copy(false) *must* yield an fst that is -// thread-safe for reading (VectorFst, for example, has this property) -template <typename A, - typename WrappedFstT = ExpandedFst<A>, - typename MutableFstT = VectorFst<A> > -class EditFstData { - public: - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef typename unordered_map<StateId, StateId>::const_iterator - IdMapIterator; - typedef typename unordered_map<StateId, Weight>::const_iterator - FinalWeightIterator; - - - EditFstData() : num_new_states_(0) { - SetEmptyAndDeleteKeysForInternalMaps(); - } - - EditFstData(const EditFstData &other) : - edits_(other.edits_), - external_to_internal_ids_(other.external_to_internal_ids_), - edited_final_weights_(other.edited_final_weights_), - num_new_states_(other.num_new_states_) { - } - - ~EditFstData() { - } - - static EditFstData<A, WrappedFstT, MutableFstT> *Read(istream &strm, - const FstReadOptions &opts); - - bool Write(ostream &strm, const FstWriteOptions &opts) const { - // Serialize all private data members of this class. - FstWriteOptions edits_opts(opts); - edits_opts.write_header = true; // Force writing contained header. - edits_.Write(strm, edits_opts); - WriteType(strm, external_to_internal_ids_); - WriteType(strm, edited_final_weights_); - WriteType(strm, num_new_states_); - if (!strm) { - LOG(ERROR) << "EditFstData::Write: write failed: " << opts.source; - return false; - } - return true; - } - - int RefCount() const { return ref_count_.count(); } - int IncrRefCount() { return ref_count_.Incr(); } - int DecrRefCount() { return ref_count_.Decr(); } - - StateId NumNewStates() const { - return num_new_states_; - } - - // accessor methods for the fst holding edited states - StateId EditedStart() const { - return edits_.Start(); - } - - Weight Final(StateId s, const WrappedFstT *wrapped) const { - FinalWeightIterator final_weight_it = GetFinalWeightIterator(s); - if (final_weight_it == NotInFinalWeightMap()) { - IdMapIterator it = GetEditedIdMapIterator(s); - return it == NotInEditedMap() ? - wrapped->Final(s) : edits_.Final(it->second); - } - else { - return final_weight_it->second; - } - } - - size_t NumArcs(StateId s, const WrappedFstT *wrapped) const { - IdMapIterator it = GetEditedIdMapIterator(s); - return it == NotInEditedMap() ? - wrapped->NumArcs(s) : edits_.NumArcs(it->second); - } - - size_t NumInputEpsilons(StateId s, const WrappedFstT *wrapped) const { - IdMapIterator it = GetEditedIdMapIterator(s); - return it == NotInEditedMap() ? - wrapped->NumInputEpsilons(s) : - edits_.NumInputEpsilons(it->second); - } - - size_t NumOutputEpsilons(StateId s, const WrappedFstT *wrapped) const { - IdMapIterator it = GetEditedIdMapIterator(s); - return it == NotInEditedMap() ? - wrapped->NumOutputEpsilons(s) : - edits_.NumOutputEpsilons(it->second); - } - - void SetEditedProperties(uint64 props, uint64 mask) { - edits_.SetProperties(props, mask); - } - - // non-const MutableFst operations - - // Sets the start state for this fst. - void SetStart(StateId s) { - edits_.SetStart(s); - } - - // Sets the final state for this fst. - Weight SetFinal(StateId s, Weight w, const WrappedFstT *wrapped) { - Weight old_weight = Final(s, wrapped); - IdMapIterator it = GetEditedIdMapIterator(s); - // if we haven't already edited state s, don't add it to edited_ (which can - // be expensive if s has many transitions); just use the - // edited_final_weights_ map - if (it == NotInEditedMap()) { - edited_final_weights_[s] = w; - } - else { - edits_.SetFinal(GetEditableInternalId(s, wrapped), w); - } - return old_weight; - } - - // Adds a new state to this fst, initially with no arcs. - StateId AddState(StateId curr_num_states) { - StateId internal_state_id = edits_.AddState(); - StateId external_state_id = curr_num_states; - external_to_internal_ids_[external_state_id] = internal_state_id; - num_new_states_++; - return external_state_id; - } - - // Adds the specified arc to the specified state of this fst. - const A *AddArc(StateId s, const Arc &arc, const WrappedFstT *wrapped) { - StateId internal_id = GetEditableInternalId(s, wrapped); - - size_t num_arcs = edits_.NumArcs(internal_id); - ArcIterator<MutableFstT> arc_it(edits_, internal_id); - const A *prev_arc = NULL; - if (num_arcs > 0) { - // grab the final arc associated with this state in edits_ - arc_it.Seek(num_arcs - 1); - prev_arc = &(arc_it.Value()); - } - edits_.AddArc(internal_id, arc); - return prev_arc; - } - - void DeleteStates() { - edits_.DeleteStates(); - num_new_states_ = 0; - external_to_internal_ids_.clear(); - edited_final_weights_.clear(); - } - - // Removes all but the first n outgoing arcs of the specified state. - void DeleteArcs(StateId s, size_t n, const WrappedFstT *wrapped) { - edits_.DeleteArcs(GetEditableInternalId(s, wrapped), n); - } - - // Removes all outgoing arcs from the specified state. - void DeleteArcs(StateId s, const WrappedFstT *wrapped) { - edits_.DeleteArcs(GetEditableInternalId(s, wrapped)); - } - - // end methods for non-const MutableFst operations - - // Provides information for the generic arc iterator. - void InitArcIterator(StateId s, ArcIteratorData<Arc> *data, - const WrappedFstT *wrapped) const { - IdMapIterator id_map_it = GetEditedIdMapIterator(s); - if (id_map_it == NotInEditedMap()) { - VLOG(3) << "EditFstData::InitArcIterator: iterating on state " - << s << " of original fst"; - wrapped->InitArcIterator(s, data); - } else { - VLOG(2) << "EditFstData::InitArcIterator: iterating on edited state " - << s << " (internal state id: " << id_map_it->second << ")"; - edits_.InitArcIterator(id_map_it->second, data); - } - } - - // Provides information for the generic mutable arc iterator. - void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data, - const WrappedFstT *wrapped) { - data->base = - new MutableArcIterator<MutableFstT>(&edits_, - GetEditableInternalId(s, wrapped)); - } - - // Prints out the map from external to internal state id's (for debugging - // purposes). - void PrintMap() { - for (IdMapIterator map_it = external_to_internal_ids_.begin(); - map_it != NotInEditedMap(); ++map_it) { - LOG(INFO) << "(external,internal)=(" - << map_it->first << "," << map_it->second << ")"; - } - } - - - private: - void SetEmptyAndDeleteKeysForInternalMaps() { - } - - // Returns the iterator of the map from external to internal state id's - // of edits_ for the specified external state id. - IdMapIterator GetEditedIdMapIterator(StateId s) const { - return external_to_internal_ids_.find(s); - } - IdMapIterator NotInEditedMap() const { - return external_to_internal_ids_.end(); - } - - FinalWeightIterator GetFinalWeightIterator(StateId s) const { - return edited_final_weights_.find(s); - } - FinalWeightIterator NotInFinalWeightMap() const { - return edited_final_weights_.end(); - } - - // Returns the internal state id of the specified external id if the state has - // already been made editable, or else copies the state from wrapped_ - // to edits_ and returns the state id of the newly editable state in edits_. - // - // \return makes the specified state editable if it isn't already and returns - // its state id in edits_ - StateId GetEditableInternalId(StateId s, const WrappedFstT *wrapped) { - IdMapIterator id_map_it = GetEditedIdMapIterator(s); - if (id_map_it == NotInEditedMap()) { - StateId new_internal_id = edits_.AddState(); - VLOG(2) << "EditFstData::GetEditableInternalId: editing state " << s - << " of original fst; new internal state id:" << new_internal_id; - external_to_internal_ids_[s] = new_internal_id; - for (ArcIterator< Fst<A> > arc_iterator(*wrapped, s); - !arc_iterator.Done(); - arc_iterator.Next()) { - edits_.AddArc(new_internal_id, arc_iterator.Value()); - } - // copy the final weight - FinalWeightIterator final_weight_it = GetFinalWeightIterator(s); - if (final_weight_it == NotInFinalWeightMap()) { - edits_.SetFinal(new_internal_id, wrapped->Final(s)); - } else { - edits_.SetFinal(new_internal_id, final_weight_it->second); - edited_final_weights_.erase(s); - } - return new_internal_id; - } else { - return id_map_it->second; - } - } - - // A mutable fst (by default, a VectorFst) to contain new states, and/or - // copies of states from a wrapped ExpandedFst that have been modified in - // some way. - MutableFstT edits_; - // A mapping from external state id's to the internal id's of states that - // appear in edits_. - unordered_map<StateId, StateId> external_to_internal_ids_; - // A mapping from external state id's to final state weights assigned to - // those states. The states in this map are *only* those whose final weight - // has been modified; if any other part of the state has been modified, - // the entire state is copied to edits_, and all modifications reside there. - unordered_map<StateId, Weight> edited_final_weights_; - // The number of new states added to this mutable fst impl, which is <= the - // number of states in edits_ (since edits_ contains both edited *and* new - // states). - StateId num_new_states_; - RefCounter ref_count_; -}; - -// EditFstData method implementations: just the Read method. -template <typename A, typename WrappedFstT, typename MutableFstT> -EditFstData<A, WrappedFstT, MutableFstT> * -EditFstData<A, WrappedFstT, MutableFstT>::Read(istream &strm, - const FstReadOptions &opts) { - EditFstData<A, WrappedFstT, MutableFstT> *data = - new EditFstData<A, WrappedFstT, MutableFstT>(); - // next read in MutabelFstT machine that stores edits - FstReadOptions edits_opts(opts); - edits_opts.header = 0; // Contained header was written out, so read it in. - - // Because our internal representation of edited states is a solid object - // of type MutableFstT (defaults to VectorFst<A>) and not a pointer, - // and because the static Read method allocates a new object on the heap, - // we need to call Read, check if there was a failure, use - // MutableFstT::operator= to assign the object (not the pointer) to the - // edits_ data member (which will increase the ref count by 1 on the impl) - // and, finally, delete the heap-allocated object. - MutableFstT *edits = MutableFstT::Read(strm, edits_opts); - if (!edits) { - return 0; - } - data->edits_ = *edits; - delete edits; - // finally, read in rest of private data members - ReadType(strm, &data->external_to_internal_ids_); - ReadType(strm, &data->edited_final_weights_); - ReadType(strm, &data->num_new_states_); - if (!strm) { - LOG(ERROR) << "EditFst::Read: read failed: " << opts.source; - return 0; - } - return data; -} - -// This class enables non-destructive edit operations on a wrapped ExpandedFst. -// The implementation uses copy-on-write semantics at the node level: if a user -// has an underlying fst on which he or she wants to perform a relatively small -// number of edits (read: mutations), then this implementation will copy the -// edited node to an internal MutableFst and perform any edits in situ on that -// copied node. This class supports all the methods of MutableFst except for -// DeleteStates(const vector<StateId> &); thus, new nodes may also be added, and -// one may add transitions from existing nodes of the wrapped fst to new nodes. -// -// template parameters: -// A the type of arc to use -// WrappedFstT the type of fst wrapped by the EditFst instance that -// this EditFstImpl instance is backing -// MutableFstT the type of mutable fst to use internally for edited states; -// crucially, MutableFstT::Copy(false) *must* yield an fst that is -// thread-safe for reading (VectorFst, for example, has this property) -template <typename A, - typename WrappedFstT = ExpandedFst<A>, - typename MutableFstT = VectorFst<A> > -class EditFstImpl : public FstImpl<A> { - public: - using FstImpl<A>::SetProperties; - using FstImpl<A>::SetInputSymbols; - using FstImpl<A>::SetOutputSymbols; - using FstImpl<A>::WriteHeader; - - typedef A Arc; - typedef typename Arc::Weight Weight; - typedef typename Arc::StateId StateId; - - // Constructs an editable fst implementation with no states. Effectively, - // this initially-empty fst will in every way mimic the behavior of - // a VectorFst--more precisely, a VectorFstImpl instance--but with slightly - // slower performance (by a constant factor), due to the fact that - // this class maintains a mapping between external state id's and - // their internal equivalents. - EditFstImpl() { - FstImpl<A>::SetType("edit"); - wrapped_ = new MutableFstT(); - InheritPropertiesFromWrapped(); - data_ = new EditFstData<A, WrappedFstT, MutableFstT>(); - } - - // Wraps the specified ExpandedFst. This constructor requires that the - // specified Fst is an ExpandedFst instance. This requirement is only enforced - // at runtime. (See below for the reason.) - // - // This library uses the pointer-to-implementation or "PIMPL" design pattern. - // In particular, to make it convenient to bind an implementation class to its - // interface, there are a pair of template "binder" classes, one for immutable - // and one for mutable fst's (ImplToFst and ImplToMutableFst, respectively). - // As it happens, the API for the ImplToMutableFst<I,F> class requires that - // the implementation class--the template parameter "I"--have a constructor - // taking a const Fst<A> reference. Accordingly, the constructor here must - // perform a static_cast to the WrappedFstT type required by EditFst and - // therefore EditFstImpl. - explicit EditFstImpl(const Fst<A> &wrapped) - : wrapped_(static_cast<WrappedFstT *>(wrapped.Copy())) { - FstImpl<A>::SetType("edit"); - - data_ = new EditFstData<A, WrappedFstT, MutableFstT>(); - // have edits_ inherit all properties from wrapped_ - data_->SetEditedProperties(wrapped_->Properties(kFstProperties, false), - kFstProperties); - InheritPropertiesFromWrapped(); - } - - // A copy constructor for this implementation class, used to implement - // the Copy() method of the Fst interface. - EditFstImpl(const EditFstImpl &impl) - : FstImpl<A>(), - wrapped_(static_cast<WrappedFstT *>(impl.wrapped_->Copy(true))), - data_(impl.data_) { - data_->IncrRefCount(); - SetProperties(impl.Properties()); - } - - ~EditFstImpl() { - delete wrapped_; - if (!data_->DecrRefCount()) { - delete data_; - } - } - - // const Fst/ExpandedFst operations, declared in the Fst and ExpandedFst - // interfaces - StateId Start() const { - StateId edited_start = data_->EditedStart(); - return edited_start == kNoStateId ? wrapped_->Start() : edited_start; - } - - Weight Final(StateId s) const { - return data_->Final(s, wrapped_); - } - - size_t NumArcs(StateId s) const { - return data_->NumArcs(s, wrapped_); - } - - size_t NumInputEpsilons(StateId s) const { - return data_->NumInputEpsilons(s, wrapped_); - } - - size_t NumOutputEpsilons(StateId s) const { - return data_->NumOutputEpsilons(s, wrapped_); - } - - StateId NumStates() const { - return wrapped_->NumStates() + data_->NumNewStates(); - } - - static EditFstImpl<A, WrappedFstT, MutableFstT> * - Read(istream &strm, - const FstReadOptions &opts); - - bool Write(ostream &strm, const FstWriteOptions &opts) const { - FstHeader hdr; - hdr.SetStart(Start()); - hdr.SetNumStates(NumStates()); - FstWriteOptions header_opts(opts); - header_opts.write_isymbols = false; // Let contained FST hold any symbols. - header_opts.write_osymbols = false; - WriteHeader(strm, header_opts, kFileVersion, &hdr); - - // First, serialize wrapped fst to stream. - FstWriteOptions wrapped_opts(opts); - wrapped_opts.write_header = true; // Force writing contained header. - wrapped_->Write(strm, wrapped_opts); - - data_->Write(strm, opts); - - strm.flush(); - if (!strm) { - LOG(ERROR) << "EditFst::Write: write failed: " << opts.source; - return false; - } - return true; - } - // end const Fst operations - - // non-const MutableFst operations - - // Sets the start state for this fst. - void SetStart(StateId s) { - MutateCheck(); - data_->SetStart(s); - SetProperties(SetStartProperties(FstImpl<A>::Properties())); - } - - // Sets the final state for this fst. - void SetFinal(StateId s, Weight w) { - MutateCheck(); - Weight old_weight = data_->SetFinal(s, w, wrapped_); - SetProperties(SetFinalProperties(FstImpl<A>::Properties(), old_weight, w)); - } - - // Adds a new state to this fst, initially with no arcs. - StateId AddState() { - MutateCheck(); - SetProperties(AddStateProperties(FstImpl<A>::Properties())); - return data_->AddState(NumStates()); - } - - // Adds the specified arc to the specified state of this fst. - void AddArc(StateId s, const Arc &arc) { - MutateCheck(); - const A *prev_arc = data_->AddArc(s, arc, wrapped_); - SetProperties(AddArcProperties(FstImpl<A>::Properties(), s, arc, prev_arc)); - } - - void DeleteStates(const vector<StateId>& dstates) { - FSTERROR() << ": EditFstImpl::DeleteStates(const std::vector<StateId>&): " - << " not implemented"; - SetProperties(kError, kError); - } - - // Deletes all states in this fst. - void DeleteStates(); - - // Removes all but the first n outgoing arcs of the specified state. - void DeleteArcs(StateId s, size_t n) { - MutateCheck(); - data_->DeleteArcs(s, n, wrapped_); - SetProperties(DeleteArcsProperties(FstImpl<A>::Properties())); - } - - // Removes all outgoing arcs from the specified state. - void DeleteArcs(StateId s) { - MutateCheck(); - data_->DeleteArcs(s, wrapped_); - SetProperties(DeleteArcsProperties(FstImpl<A>::Properties())); - } - - void ReserveStates(StateId s) { - } - - void ReserveArcs(StateId s, size_t n) { - } - - // end non-const MutableFst operations - - // Provides information for the generic state iterator. - void InitStateIterator(StateIteratorData<Arc> *data) const { - data->base = 0; - data->nstates = NumStates(); - } - - // Provides information for the generic arc iterator. - void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { - data_->InitArcIterator(s, data, wrapped_); - } - - // Provides information for the generic mutable arc iterator. - void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data) { - MutateCheck(); - data_->InitMutableArcIterator(s, data, wrapped_); - } - - private: - typedef typename unordered_map<StateId, StateId>::const_iterator - IdMapIterator; - typedef typename unordered_map<StateId, Weight>::const_iterator - FinalWeightIterator; - // Properties always true of this Fst class - static const uint64 kStaticProperties = kExpanded | kMutable; - // Current file format version - static const int kFileVersion = 2; - // Minimum file format version supported - static const int kMinFileVersion = 2; - - // Causes this fst to inherit all the properties from its wrapped fst, except - // for the two properties that always apply to EditFst instances: kExpanded - // and kMutable. - void InheritPropertiesFromWrapped() { - SetProperties(wrapped_->Properties(kCopyProperties, false) | - kStaticProperties); - SetInputSymbols(wrapped_->InputSymbols()); - SetOutputSymbols(wrapped_->OutputSymbols()); - } - - // This method ensures that any operations that alter the mutable data - // portion of this EditFstImpl cause the data_ member to be copied when its - // reference count is greater than 1. Note that this method is distinct from - // MutableFst::Mutate, which gets invoked whenever one of the basic mutation - // methods defined in MutableFst is invoked, such as SetInputSymbols. - // The MutateCheck here in EditFstImpl is invoked whenever one of the - // mutating methods specifically related to the types of edits provided - // by EditFst is performed, such as changing an arc of an existing state - // of the wrapped fst via a MutableArcIterator, or adding a new state via - // AddState(). - void MutateCheck() { - if (data_->RefCount() > 1) { - EditFstData<A, WrappedFstT, MutableFstT> *data_copy = - new EditFstData<A, WrappedFstT, MutableFstT>(*data_); - if (data_ && !data_->DecrRefCount()) { - delete data_; - } - data_ = data_copy; - } - } - - // The fst that this fst wraps. The purpose of this class is to enable - // non-destructive edits on this wrapped fst. - const WrappedFstT *wrapped_; - // The mutable data for this EditFst instance, with delegates for all the - // methods that can mutate data. - EditFstData<A, WrappedFstT, MutableFstT> *data_; -}; - -template <typename A, typename WrappedFstT, typename MutableFstT> -const uint64 EditFstImpl<A, WrappedFstT, MutableFstT>::kStaticProperties; - -// EditFstImpl IMPLEMENTATION STARTS HERE - -template<typename A, typename WrappedFstT, typename MutableFstT> -inline void EditFstImpl<A, WrappedFstT, MutableFstT>::DeleteStates() { - data_->DeleteStates(); - delete wrapped_; - // we are deleting all states, so just forget about pointer to wrapped_ - // and do what default constructor does: set wrapped_ to a new VectorFst - wrapped_ = new MutableFstT(); - uint64 newProps = DeleteAllStatesProperties(FstImpl<A>::Properties(), - kStaticProperties); - FstImpl<A>::SetProperties(newProps); -} - -template <typename A, typename WrappedFstT, typename MutableFstT> -EditFstImpl<A, WrappedFstT, MutableFstT> * -EditFstImpl<A, WrappedFstT, MutableFstT>::Read(istream &strm, - const FstReadOptions &opts) { - EditFstImpl<A, WrappedFstT, MutableFstT> *impl = new EditFstImpl(); - FstHeader hdr; - if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) { - return 0; - } - impl->SetStart(hdr.Start()); - - // first, read in wrapped fst - FstReadOptions wrapped_opts(opts); - wrapped_opts.header = 0; // Contained header was written out, so read it in. - Fst<A> *wrapped_fst = Fst<A>::Read(strm, wrapped_opts); - if (!wrapped_fst) { - return 0; - } - impl->wrapped_ = static_cast<WrappedFstT *>(wrapped_fst); - - impl->data_ = EditFstData<A, WrappedFstT, MutableFstT>::Read(strm, opts); - - if (!impl->data_) { - delete wrapped_fst; - return 0; - } - - return impl; -} - -// END EditFstImpl IMPLEMENTATION - -// Concrete, editable FST. This class attaches interface to implementation. -template <typename A, - typename WrappedFstT = ExpandedFst<A>, - typename MutableFstT = VectorFst<A> > -class EditFst : - public ImplToMutableFst< EditFstImpl<A, WrappedFstT, MutableFstT> > { - public: - friend class MutableArcIterator< EditFst<A, WrappedFstT, MutableFstT> >; - - typedef A Arc; - typedef typename A::StateId StateId; - typedef EditFstImpl<A, WrappedFstT, MutableFstT> Impl; - - EditFst() : ImplToMutableFst<Impl>(new Impl()) {} - - explicit EditFst(const Fst<A> &fst) : - ImplToMutableFst<Impl>(new Impl(fst)) {} - - explicit EditFst(const WrappedFstT &fst) : - ImplToMutableFst<Impl>(new Impl(fst)) {} - - // See Fst<>::Copy() for doc. - EditFst(const EditFst<A, WrappedFstT, MutableFstT> &fst, bool safe = false) : - ImplToMutableFst<Impl>(fst, safe) {} - - virtual ~EditFst() {} - - // Get a copy of this EditFst. See Fst<>::Copy() for further doc. - virtual EditFst<A, WrappedFstT, MutableFstT> *Copy(bool safe = false) const { - return new EditFst<A, WrappedFstT, MutableFstT>(*this, safe); - } - - EditFst<A, WrappedFstT, MutableFstT> & - operator=(const EditFst<A, WrappedFstT, MutableFstT> &fst) { - SetImpl(fst.GetImpl(), false); - return *this; - } - - virtual EditFst<A, WrappedFstT, MutableFstT> &operator=(const Fst<A> &fst) { - if (this != &fst) { - SetImpl(new Impl(fst)); - } - return *this; - } - - // Read an EditFst from an input stream; return NULL on error. - static EditFst<A, WrappedFstT, MutableFstT> * - Read(istream &strm, - const FstReadOptions &opts) { - Impl* impl = Impl::Read(strm, opts); - return impl ? new EditFst<A>(impl) : 0; - } - - // Read an EditFst from a file; return NULL on error. - // Empty filename reads from standard input. - static EditFst<A, WrappedFstT, MutableFstT> *Read(const string &filename) { - Impl* impl = ImplToExpandedFst<Impl, MutableFst<A> >::Read(filename); - return impl ? new EditFst<A, WrappedFstT, MutableFstT>(impl) : 0; - } - - virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { - return GetImpl()->Write(strm, opts); - } - - virtual bool Write(const string &filename) const { - return Fst<A>::WriteFile(filename); - } - - virtual void InitStateIterator(StateIteratorData<Arc> *data) const { - GetImpl()->InitStateIterator(data); - } - - virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { - GetImpl()->InitArcIterator(s, data); - } - - virtual - void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data) { - GetImpl()->InitMutableArcIterator(s, data); - } - private: - explicit EditFst(Impl *impl) : ImplToMutableFst<Impl>(impl) {} - - // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst< Impl, MutableFst<A> >::GetImpl(); } - - void SetImpl(Impl *impl, bool own_impl = true) { - ImplToFst< Impl, MutableFst<A> >::SetImpl(impl, own_impl); - } -}; - -} // namespace fst - -#endif // FST_LIB_EDIT_FST_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/encode.h b/kaldi_io/src/tools/openfst/include/fst/encode.h deleted file mode 100644 index 08b84cb..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/encode.h +++ /dev/null @@ -1,599 +0,0 @@ -// encode.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Johan Schalkwyk) -// -// \file -// Class to encode and decoder an fst. - -#ifndef FST_LIB_ENCODE_H__ -#define FST_LIB_ENCODE_H__ - -#include <climits> -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <string> -#include <vector> -using std::vector; - -#include <fst/arc-map.h> -#include <fst/rmfinalepsilon.h> - - -namespace fst { - -static const uint32 kEncodeLabels = 0x0001; -static const uint32 kEncodeWeights = 0x0002; -static const uint32 kEncodeFlags = 0x0003; // All non-internal flags - -static const uint32 kEncodeHasISymbols = 0x0004; // For internal use -static const uint32 kEncodeHasOSymbols = 0x0008; // For internal use - -enum EncodeType { ENCODE = 1, DECODE = 2 }; - -// Identifies stream data as an encode table (and its endianity) -static const int32 kEncodeMagicNumber = 2129983209; - - -// The following class encapsulates implementation details for the -// encoding and decoding of label/weight tuples used for encoding -// and decoding of Fsts. The EncodeTable is bidirectional. I.E it -// stores both the Tuple of encode labels and weights to a unique -// label, and the reverse. -template <class A> class EncodeTable { - public: - typedef typename A::Label Label; - typedef typename A::Weight Weight; - - // Encoded data consists of arc input/output labels and arc weight - struct Tuple { - Tuple() {} - Tuple(Label ilabel_, Label olabel_, Weight weight_) - : ilabel(ilabel_), olabel(olabel_), weight(weight_) {} - Tuple(const Tuple& tuple) - : ilabel(tuple.ilabel), olabel(tuple.olabel), weight(tuple.weight) {} - - Label ilabel; - Label olabel; - Weight weight; - }; - - // Comparison object for hashing EncodeTable Tuple(s). - class TupleEqual { - public: - bool operator()(const Tuple* x, const Tuple* y) const { - return (x->ilabel == y->ilabel && - x->olabel == y->olabel && - x->weight == y->weight); - } - }; - - // Hash function for EncodeTabe Tuples. Based on the encode flags - // we either hash the labels, weights or combination of them. - class TupleKey { - public: - TupleKey() - : encode_flags_(kEncodeLabels | kEncodeWeights) {} - - TupleKey(const TupleKey& key) - : encode_flags_(key.encode_flags_) {} - - explicit TupleKey(uint32 encode_flags) - : encode_flags_(encode_flags) {} - - size_t operator()(const Tuple* x) const { - size_t hash = x->ilabel; - const int lshift = 5; - const int rshift = CHAR_BIT * sizeof(size_t) - 5; - if (encode_flags_ & kEncodeLabels) - hash = hash << lshift ^ hash >> rshift ^ x->olabel; - if (encode_flags_ & kEncodeWeights) - hash = hash << lshift ^ hash >> rshift ^ x->weight.Hash(); - return hash; - } - - private: - int32 encode_flags_; - }; - - typedef unordered_map<const Tuple*, - Label, - TupleKey, - TupleEqual> EncodeHash; - - explicit EncodeTable(uint32 encode_flags) - : flags_(encode_flags), - encode_hash_(1024, TupleKey(encode_flags)), - isymbols_(0), osymbols_(0) {} - - ~EncodeTable() { - for (size_t i = 0; i < encode_tuples_.size(); ++i) { - delete encode_tuples_[i]; - } - delete isymbols_; - delete osymbols_; - } - - // Given an arc encode either input/ouptut labels or input/costs or both - Label Encode(const A &arc) { - const Tuple tuple(arc.ilabel, - flags_ & kEncodeLabels ? arc.olabel : 0, - flags_ & kEncodeWeights ? arc.weight : Weight::One()); - typename EncodeHash::const_iterator it = encode_hash_.find(&tuple); - if (it == encode_hash_.end()) { - encode_tuples_.push_back(new Tuple(tuple)); - encode_hash_[encode_tuples_.back()] = encode_tuples_.size(); - return encode_tuples_.size(); - } else { - return it->second; - } - } - - // Given an arc, look up its encoded label. Returns kNoLabel if not found. - Label GetLabel(const A &arc) const { - const Tuple tuple(arc.ilabel, - flags_ & kEncodeLabels ? arc.olabel : 0, - flags_ & kEncodeWeights ? arc.weight : Weight::One()); - typename EncodeHash::const_iterator it = encode_hash_.find(&tuple); - if (it == encode_hash_.end()) { - return kNoLabel; - } else { - return it->second; - } - } - - // Given an encode arc Label decode back to input/output labels and costs - const Tuple* Decode(Label key) const { - if (key < 1 || key > encode_tuples_.size()) { - LOG(ERROR) << "EncodeTable::Decode: unknown decode key: " << key; - return 0; - } - return encode_tuples_[key - 1]; - } - - size_t Size() const { return encode_tuples_.size(); } - - bool Write(ostream &strm, const string &source) const; - - static EncodeTable<A> *Read(istream &strm, const string &source); - - const uint32 flags() const { return flags_ & kEncodeFlags; } - - int RefCount() const { return ref_count_.count(); } - int IncrRefCount() { return ref_count_.Incr(); } - int DecrRefCount() { return ref_count_.Decr(); } - - - SymbolTable *InputSymbols() const { return isymbols_; } - - SymbolTable *OutputSymbols() const { return osymbols_; } - - void SetInputSymbols(const SymbolTable* syms) { - if (isymbols_) delete isymbols_; - if (syms) { - isymbols_ = syms->Copy(); - flags_ |= kEncodeHasISymbols; - } else { - isymbols_ = 0; - flags_ &= ~kEncodeHasISymbols; - } - } - - void SetOutputSymbols(const SymbolTable* syms) { - if (osymbols_) delete osymbols_; - if (syms) { - osymbols_ = syms->Copy(); - flags_ |= kEncodeHasOSymbols; - } else { - osymbols_ = 0; - flags_ &= ~kEncodeHasOSymbols; - } - } - - private: - uint32 flags_; - vector<Tuple*> encode_tuples_; - EncodeHash encode_hash_; - RefCounter ref_count_; - SymbolTable *isymbols_; // Pre-encoded ilabel symbol table - SymbolTable *osymbols_; // Pre-encoded olabel symbol table - - DISALLOW_COPY_AND_ASSIGN(EncodeTable); -}; - -template <class A> inline -bool EncodeTable<A>::Write(ostream &strm, const string &source) const { - WriteType(strm, kEncodeMagicNumber); - WriteType(strm, flags_); - int64 size = encode_tuples_.size(); - WriteType(strm, size); - for (size_t i = 0; i < size; ++i) { - const Tuple* tuple = encode_tuples_[i]; - WriteType(strm, tuple->ilabel); - WriteType(strm, tuple->olabel); - tuple->weight.Write(strm); - } - - if (flags_ & kEncodeHasISymbols) - isymbols_->Write(strm); - - if (flags_ & kEncodeHasOSymbols) - osymbols_->Write(strm); - - strm.flush(); - if (!strm) { - LOG(ERROR) << "EncodeTable::Write: write failed: " << source; - return false; - } - return true; -} - -template <class A> inline -EncodeTable<A> *EncodeTable<A>::Read(istream &strm, const string &source) { - int32 magic_number = 0; - ReadType(strm, &magic_number); - if (magic_number != kEncodeMagicNumber) { - LOG(ERROR) << "EncodeTable::Read: Bad encode table header: " << source; - return 0; - } - uint32 flags; - ReadType(strm, &flags); - EncodeTable<A> *table = new EncodeTable<A>(flags); - - int64 size; - ReadType(strm, &size); - if (!strm) { - LOG(ERROR) << "EncodeTable::Read: read failed: " << source; - return 0; - } - - for (size_t i = 0; i < size; ++i) { - Tuple* tuple = new Tuple(); - ReadType(strm, &tuple->ilabel); - ReadType(strm, &tuple->olabel); - tuple->weight.Read(strm); - if (!strm) { - LOG(ERROR) << "EncodeTable::Read: read failed: " << source; - return 0; - } - table->encode_tuples_.push_back(tuple); - table->encode_hash_[table->encode_tuples_.back()] = - table->encode_tuples_.size(); - } - - if (flags & kEncodeHasISymbols) - table->isymbols_ = SymbolTable::Read(strm, source); - - if (flags & kEncodeHasOSymbols) - table->osymbols_ = SymbolTable::Read(strm, source); - - return table; -} - - -// A mapper to encode/decode weighted transducers. Encoding of an -// Fst is useful for performing classical determinization or minimization -// on a weighted transducer by treating it as an unweighted acceptor over -// encoded labels. -// -// The Encode mapper stores the encoding in a local hash table (EncodeTable) -// This table is shared (and reference counted) between the encoder and -// decoder. A decoder has read only access to the EncodeTable. -// -// The EncodeMapper allows on the fly encoding of the machine. As the -// EncodeTable is generated the same table may by used to decode the machine -// on the fly. For example in the following sequence of operations -// -// Encode -> Determinize -> Decode -// -// we will use the encoding table generated during the encode step in the -// decode, even though the encoding is not complete. -// -template <class A> class EncodeMapper { - typedef typename A::Weight Weight; - typedef typename A::Label Label; - public: - EncodeMapper(uint32 flags, EncodeType type) - : flags_(flags), - type_(type), - table_(new EncodeTable<A>(flags)), - error_(false) {} - - EncodeMapper(const EncodeMapper& mapper) - : flags_(mapper.flags_), - type_(mapper.type_), - table_(mapper.table_), - error_(false) { - table_->IncrRefCount(); - } - - // Copy constructor but setting the type, typically to DECODE - EncodeMapper(const EncodeMapper& mapper, EncodeType type) - : flags_(mapper.flags_), - type_(type), - table_(mapper.table_), - error_(mapper.error_) { - table_->IncrRefCount(); - } - - ~EncodeMapper() { - if (!table_->DecrRefCount()) delete table_; - } - - A operator()(const A &arc); - - MapFinalAction FinalAction() const { - return (type_ == ENCODE && (flags_ & kEncodeWeights)) ? - MAP_REQUIRE_SUPERFINAL : MAP_NO_SUPERFINAL; - } - - MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; } - - MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;} - - uint64 Properties(uint64 inprops) { - uint64 outprops = inprops; - if (error_) outprops |= kError; - - uint64 mask = kFstProperties; - if (flags_ & kEncodeLabels) - mask &= kILabelInvariantProperties & kOLabelInvariantProperties; - if (flags_ & kEncodeWeights) - mask &= kILabelInvariantProperties & kWeightInvariantProperties & - (type_ == ENCODE ? kAddSuperFinalProperties : - kRmSuperFinalProperties); - - return outprops & mask; - } - - const uint32 flags() const { return flags_; } - const EncodeType type() const { return type_; } - const EncodeTable<A> &table() const { return *table_; } - - bool Write(ostream &strm, const string& source) { - return table_->Write(strm, source); - } - - bool Write(const string& filename) { - ofstream strm(filename.c_str(), ofstream::out | ofstream::binary); - if (!strm) { - LOG(ERROR) << "EncodeMap: Can't open file: " << filename; - return false; - } - return Write(strm, filename); - } - - static EncodeMapper<A> *Read(istream &strm, - const string& source, - EncodeType type = ENCODE) { - EncodeTable<A> *table = EncodeTable<A>::Read(strm, source); - return table ? new EncodeMapper(table->flags(), type, table) : 0; - } - - static EncodeMapper<A> *Read(const string& filename, - EncodeType type = ENCODE) { - ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); - if (!strm) { - LOG(ERROR) << "EncodeMap: Can't open file: " << filename; - return NULL; - } - return Read(strm, filename, type); - } - - SymbolTable *InputSymbols() const { return table_->InputSymbols(); } - - SymbolTable *OutputSymbols() const { return table_->OutputSymbols(); } - - void SetInputSymbols(const SymbolTable* syms) { - table_->SetInputSymbols(syms); - } - - void SetOutputSymbols(const SymbolTable* syms) { - table_->SetOutputSymbols(syms); - } - - private: - uint32 flags_; - EncodeType type_; - EncodeTable<A>* table_; - bool error_; - - explicit EncodeMapper(uint32 flags, EncodeType type, EncodeTable<A> *table) - : flags_(flags), type_(type), table_(table) {} - void operator=(const EncodeMapper &); // Disallow. -}; - -template <class A> inline -A EncodeMapper<A>::operator()(const A &arc) { - if (type_ == ENCODE) { // labels and/or weights to single label - if ((arc.nextstate == kNoStateId && !(flags_ & kEncodeWeights)) || - (arc.nextstate == kNoStateId && (flags_ & kEncodeWeights) && - arc.weight == Weight::Zero())) { - return arc; - } else { - Label label = table_->Encode(arc); - return A(label, - flags_ & kEncodeLabels ? label : arc.olabel, - flags_ & kEncodeWeights ? Weight::One() : arc.weight, - arc.nextstate); - } - } else { // type_ == DECODE - if (arc.nextstate == kNoStateId) { - return arc; - } else { - if (arc.ilabel == 0) return arc; - if (flags_ & kEncodeLabels && arc.ilabel != arc.olabel) { - FSTERROR() << "EncodeMapper: Label-encoded arc has different " - "input and output labels"; - error_ = true; - } - if (flags_ & kEncodeWeights && arc.weight != Weight::One()) { - FSTERROR() << - "EncodeMapper: Weight-encoded arc has non-trivial weight"; - error_ = true; - } - const typename EncodeTable<A>::Tuple* tuple = table_->Decode(arc.ilabel); - if (!tuple) { - FSTERROR() << "EncodeMapper: decode failed"; - error_ = true; - return A(kNoLabel, kNoLabel, Weight::NoWeight(), arc.nextstate); - } else { - return A(tuple->ilabel, - flags_ & kEncodeLabels ? tuple->olabel : arc.olabel, - flags_ & kEncodeWeights ? tuple->weight : arc.weight, - arc.nextstate); - } - } - } -} - - -// Complexity: O(nstates + narcs) -template<class A> inline -void Encode(MutableFst<A> *fst, EncodeMapper<A>* mapper) { - mapper->SetInputSymbols(fst->InputSymbols()); - mapper->SetOutputSymbols(fst->OutputSymbols()); - ArcMap(fst, mapper); -} - -template<class A> inline -void Decode(MutableFst<A>* fst, const EncodeMapper<A>& mapper) { - ArcMap(fst, EncodeMapper<A>(mapper, DECODE)); - RmFinalEpsilon(fst); - fst->SetInputSymbols(mapper.InputSymbols()); - fst->SetOutputSymbols(mapper.OutputSymbols()); -} - - -// On the fly label and/or weight encoding of input Fst -// -// Complexity: -// - Constructor: O(1) -// - Traversal: O(nstates_visited + narcs_visited), assuming constant -// time to visit an input state or arc. -template <class A> -class EncodeFst : public ArcMapFst<A, A, EncodeMapper<A> > { - public: - typedef A Arc; - typedef EncodeMapper<A> C; - typedef ArcMapFstImpl< A, A, EncodeMapper<A> > Impl; - using ImplToFst<Impl>::GetImpl; - - EncodeFst(const Fst<A> &fst, EncodeMapper<A>* encoder) - : ArcMapFst<A, A, C>(fst, encoder, ArcMapFstOptions()) { - encoder->SetInputSymbols(fst.InputSymbols()); - encoder->SetOutputSymbols(fst.OutputSymbols()); - } - - EncodeFst(const Fst<A> &fst, const EncodeMapper<A>& encoder) - : ArcMapFst<A, A, C>(fst, encoder, ArcMapFstOptions()) {} - - // See Fst<>::Copy() for doc. - EncodeFst(const EncodeFst<A> &fst, bool copy = false) - : ArcMapFst<A, A, C>(fst, copy) {} - - // Get a copy of this EncodeFst. See Fst<>::Copy() for further doc. - virtual EncodeFst<A> *Copy(bool safe = false) const { - if (safe) { - FSTERROR() << "EncodeFst::Copy(true): not allowed."; - GetImpl()->SetProperties(kError, kError); - } - return new EncodeFst(*this); - } -}; - - -// On the fly label and/or weight encoding of input Fst -// -// Complexity: -// - Constructor: O(1) -// - Traversal: O(nstates_visited + narcs_visited), assuming constant -// time to visit an input state or arc. -template <class A> -class DecodeFst : public ArcMapFst<A, A, EncodeMapper<A> > { - public: - typedef A Arc; - typedef EncodeMapper<A> C; - typedef ArcMapFstImpl< A, A, EncodeMapper<A> > Impl; - using ImplToFst<Impl>::GetImpl; - - DecodeFst(const Fst<A> &fst, const EncodeMapper<A>& encoder) - : ArcMapFst<A, A, C>(fst, - EncodeMapper<A>(encoder, DECODE), - ArcMapFstOptions()) { - GetImpl()->SetInputSymbols(encoder.InputSymbols()); - GetImpl()->SetOutputSymbols(encoder.OutputSymbols()); - } - - // See Fst<>::Copy() for doc. - DecodeFst(const DecodeFst<A> &fst, bool safe = false) - : ArcMapFst<A, A, C>(fst, safe) {} - - // Get a copy of this DecodeFst. See Fst<>::Copy() for further doc. - virtual DecodeFst<A> *Copy(bool safe = false) const { - return new DecodeFst(*this, safe); - } -}; - - -// Specialization for EncodeFst. -template <class A> -class StateIterator< EncodeFst<A> > - : public StateIterator< ArcMapFst<A, A, EncodeMapper<A> > > { - public: - explicit StateIterator(const EncodeFst<A> &fst) - : StateIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst) {} -}; - - -// Specialization for EncodeFst. -template <class A> -class ArcIterator< EncodeFst<A> > - : public ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > > { - public: - ArcIterator(const EncodeFst<A> &fst, typename A::StateId s) - : ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst, s) {} -}; - - -// Specialization for DecodeFst. -template <class A> -class StateIterator< DecodeFst<A> > - : public StateIterator< ArcMapFst<A, A, EncodeMapper<A> > > { - public: - explicit StateIterator(const DecodeFst<A> &fst) - : StateIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst) {} -}; - - -// Specialization for DecodeFst. -template <class A> -class ArcIterator< DecodeFst<A> > - : public ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > > { - public: - ArcIterator(const DecodeFst<A> &fst, typename A::StateId s) - : ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst, s) {} -}; - - -// Useful aliases when using StdArc. -typedef EncodeFst<StdArc> StdEncodeFst; - -typedef DecodeFst<StdArc> StdDecodeFst; - -} // namespace fst - -#endif // FST_LIB_ENCODE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/epsnormalize.h b/kaldi_io/src/tools/openfst/include/fst/epsnormalize.h deleted file mode 100644 index 9d178b1..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/epsnormalize.h +++ /dev/null @@ -1,73 +0,0 @@ -// epsnormalize.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// Function that implements epsilon normalization. - -#ifndef FST_LIB_EPSNORMALIZE_H__ -#define FST_LIB_EPSNORMALIZE_H__ - -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; - - -#include <fst/factor-weight.h> -#include <fst/invert.h> -#include <fst/arc-map.h> -#include <fst/rmepsilon.h> - - -namespace fst { - -enum EpsNormalizeType {EPS_NORM_INPUT, EPS_NORM_OUTPUT}; - -// Returns an equivalent FST that is epsilon-normalized. An acceptor is -// epsilon-normalized if it is epsilon-removed. A transducer is input -// epsilon-normalized if additionally if on each path any epsilon input -// label follows all non-epsilon input labels. Output epsilon-normalized -// is defined similarly. -// -// The input FST needs to be functional. -// -// References: -// - Mehryar Mohri. "Generic epsilon-removal and input epsilon-normalization -// algorithms for weighted transducers", International Journal of Computer -// Science, 13(1): 129-143, 2002. -template <class Arc> -void EpsNormalize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst, - EpsNormalizeType type = EPS_NORM_INPUT) { - VectorFst< GallicArc<Arc, STRING_RIGHT_RESTRICT> > gfst; - if (type == EPS_NORM_INPUT) - ArcMap(ifst, &gfst, ToGallicMapper<Arc, STRING_RIGHT_RESTRICT>()); - else // type == EPS_NORM_OUTPUT - ArcMap(InvertFst<Arc>(ifst), &gfst, - ToGallicMapper<Arc, STRING_RIGHT_RESTRICT>()); - RmEpsilon(&gfst); - FactorWeightFst< GallicArc<Arc, STRING_RIGHT_RESTRICT>, - GallicFactor<typename Arc::Label, - typename Arc::Weight, STRING_RIGHT_RESTRICT> > - fwfst(gfst); - ArcMap(fwfst, ofst, FromGallicMapper<Arc, STRING_RIGHT_RESTRICT>()); - ofst->SetOutputSymbols(ifst.OutputSymbols()); - if(type == EPS_NORM_OUTPUT) - Invert(ofst); -} - -} // namespace fst - -#endif // FST_LIB_EPSNORMALIZE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/equal.h b/kaldi_io/src/tools/openfst/include/fst/equal.h deleted file mode 100644 index 33be198..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/equal.h +++ /dev/null @@ -1,124 +0,0 @@ -// test.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Function to test equality of two Fsts. - -#ifndef FST_LIB_EQUAL_H__ -#define FST_LIB_EQUAL_H__ - -#include <fst/fst.h> - - -namespace fst { - -// Tests if two Fsts have the same states and arcs in the same order. -template<class Arc> -bool Equal(const Fst<Arc> &fst1, const Fst<Arc> &fst2, float delta = kDelta) { - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - - if (fst1.Start() != fst2.Start()) { - VLOG(1) << "Equal: mismatched start states"; - return false; - } - - StateIterator< Fst<Arc> > siter1(fst1); - StateIterator< Fst<Arc> > siter2(fst2); - - while (!siter1.Done() || !siter2.Done()) { - if (siter1.Done() || siter2.Done()) { - VLOG(1) << "Equal: mismatched # of states"; - return false; - } - StateId s1 = siter1.Value(); - StateId s2 = siter2.Value(); - if (s1 != s2) { - VLOG(1) << "Equal: mismatched states:" - << ", state1 = " << s1 - << ", state2 = " << s2; - return false; - } - Weight final1 = fst1.Final(s1); - Weight final2 = fst2.Final(s2); - if (!ApproxEqual(final1, final2, delta)) { - VLOG(1) << "Equal: mismatched final weights:" - << " state = " << s1 - << ", final1 = " << final1 - << ", final2 = " << final2; - return false; - } - ArcIterator< Fst<Arc> > aiter1(fst1, s1); - ArcIterator< Fst<Arc> > aiter2(fst2, s2); - for (size_t a = 0; !aiter1.Done() || !aiter2.Done(); ++a) { - if (aiter1.Done() || aiter2.Done()) { - VLOG(1) << "Equal: mismatched # of arcs" - << " state = " << s1; - return false; - } - Arc arc1 = aiter1.Value(); - Arc arc2 = aiter2.Value(); - if (arc1.ilabel != arc2.ilabel) { - VLOG(1) << "Equal: mismatched arc input labels:" - << " state = " << s1 - << ", arc = " << a - << ", ilabel1 = " << arc1.ilabel - << ", ilabel2 = " << arc2.ilabel; - return false; - } else if (arc1.olabel != arc2.olabel) { - VLOG(1) << "Equal: mismatched arc output labels:" - << " state = " << s1 - << ", arc = " << a - << ", olabel1 = " << arc1.olabel - << ", olabel2 = " << arc2.olabel; - return false; - } else if (!ApproxEqual(arc1.weight, arc2.weight, delta)) { - VLOG(1) << "Equal: mismatched arc weights:" - << " state = " << s1 - << ", arc = " << a - << ", weight1 = " << arc1.weight - << ", weight2 = " << arc2.weight; - return false; - } else if (arc1.nextstate != arc2.nextstate) { - VLOG(1) << "Equal: mismatched input label:" - << " state = " << s1 - << ", arc = " << a - << ", nextstate1 = " << arc1.nextstate - << ", nextstate2 = " << arc2.nextstate; - return false; - } - aiter1.Next(); - aiter2.Next(); - - } - // Sanity checks: should never fail - if (fst1.NumArcs(s1) != fst2.NumArcs(s2) || - fst1.NumInputEpsilons(s1) != fst2.NumInputEpsilons(s2) || - fst1.NumOutputEpsilons(s1) != fst2.NumOutputEpsilons(s2)) { - FSTERROR() << "Equal: inconsistent arc/epsilon counts"; - } - - siter1.Next(); - siter2.Next(); - } - return true; -} - -} // namespace fst - - -#endif // FST_LIB_EQUAL_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/equivalent.h b/kaldi_io/src/tools/openfst/include/fst/equivalent.h deleted file mode 100644 index e28fea1..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/equivalent.h +++ /dev/null @@ -1,275 +0,0 @@ -// equivalent.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Wojciech Skut) -// -// \file Functions and classes to determine the equivalence of two -// FSTs. - -#ifndef FST_LIB_EQUIVALENT_H__ -#define FST_LIB_EQUIVALENT_H__ - -#include <algorithm> -#include <deque> -using std::deque; -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <utility> -using std::pair; using std::make_pair; -#include <vector> -using std::vector; - -#include <fst/encode.h> -#include <fst/push.h> -#include <fst/union-find.h> -#include <fst/vector-fst.h> - - -namespace fst { - -// Traits-like struct holding utility functions/typedefs/constants for -// the equivalence algorithm. -// -// Encoding device: in order to make the statesets of the two acceptors -// disjoint, we map Arc::StateId on the type MappedId. The states of -// the first acceptor are mapped on odd numbers (s -> 2s + 1), and -// those of the second one on even numbers (s -> 2s + 2). The number 0 -// is reserved for an implicit (non-final) 'dead state' (required for -// the correct treatment of non-coaccessible states; kNoStateId is -// mapped to kDeadState for both acceptors). The union-find algorithm -// operates on the mapped IDs. -template <class Arc> -struct EquivalenceUtil { - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - typedef StateId MappedId; // ID for an equivalence class. - - // MappedId for an implicit dead state. - static const MappedId kDeadState = 0; - - // MappedId for lookup failure. - static const MappedId kInvalidId = -1; - - // Maps state ID to the representative of the corresponding - // equivalence class. The parameter 'which_fst' takes the values 1 - // and 2, identifying the input FST. - static MappedId MapState(StateId s, int32 which_fst) { - return - (kNoStateId == s) - ? - kDeadState - : - (static_cast<MappedId>(s) << 1) + which_fst; - } - // Maps set ID to State ID. - static StateId UnMapState(MappedId id) { - return static_cast<StateId>((--id) >> 1); - } - // Convenience function: checks if state with MappedId 's' is final - // in acceptor 'fa'. - static bool IsFinal(const Fst<Arc> &fa, MappedId s) { - return - (kDeadState == s) ? - false : (fa.Final(UnMapState(s)) != Weight::Zero()); - } - // Convenience function: returns the representative of 'id' in 'sets', - // creating a new set if needed. - static MappedId FindSet(UnionFind<MappedId> *sets, MappedId id) { - MappedId repr = sets->FindSet(id); - if (repr != kInvalidId) { - return repr; - } else { - sets->MakeSet(id); - return id; - } - } -}; - -template <class Arc> const -typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kDeadState; - -template <class Arc> const -typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kInvalidId; - - -// Equivalence checking algorithm: determines if the two FSTs -// <code>fst1</code> and <code>fst2</code> are equivalent. The input -// FSTs must be deterministic input-side epsilon-free acceptors, -// unweighted or with weights over a left semiring. Two acceptors are -// considered equivalent if they accept exactly the same set of -// strings (with the same weights). -// -// The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and -// Analysis of Computer Programs") successively constructs sets of -// states that can be reached by the same prefixes, starting with a -// set containing the start states of both acceptors. A disjoint tree -// forest (the union-find algorithm) is used to represent the sets of -// states. The algorithm returns 'false' if one of the constructed -// sets contains both final and non-final states. Returns optional error -// value (when FLAGS_error_fatal = false). -// -// Complexity: quasi-linear, i.e. O(n G(n)), where -// n = |S1| + |S2| is the number of states in both acceptors -// G(n) is a very slowly growing function that can be approximated -// by 4 by all practical purposes. -// -template <class Arc> -bool Equivalent(const Fst<Arc> &fst1, - const Fst<Arc> &fst2, - double delta = kDelta, bool *error = 0) { - typedef typename Arc::Weight Weight; - if (error) *error = false; - - // Check that the symbol table are compatible - if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) || - !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) { - FSTERROR() << "Equivalent: input/output symbol tables of 1st argument " - << "do not match input/output symbol tables of 2nd argument"; - if (error) *error = true; - return false; - } - // Check properties first: - uint64 props = kNoEpsilons | kIDeterministic | kAcceptor; - if (fst1.Properties(props, true) != props) { - FSTERROR() << "Equivalent: first argument not an" - << " epsilon-free deterministic acceptor"; - if (error) *error = true; - return false; - } - if (fst2.Properties(props, true) != props) { - FSTERROR() << "Equivalent: second argument not an" - << " epsilon-free deterministic acceptor"; - if (error) *error = true; - return false; - } - - if ((fst1.Properties(kUnweighted , true) != kUnweighted) - || (fst2.Properties(kUnweighted , true) != kUnweighted)) { - VectorFst<Arc> efst1(fst1); - VectorFst<Arc> efst2(fst2); - Push(&efst1, REWEIGHT_TO_INITIAL, delta); - Push(&efst2, REWEIGHT_TO_INITIAL, delta); - ArcMap(&efst1, QuantizeMapper<Arc>(delta)); - ArcMap(&efst2, QuantizeMapper<Arc>(delta)); - EncodeMapper<Arc> mapper(kEncodeWeights|kEncodeLabels, ENCODE); - ArcMap(&efst1, &mapper); - ArcMap(&efst2, &mapper); - return Equivalent(efst1, efst2); - } - - // Convenience typedefs: - typedef typename Arc::StateId StateId; - typedef EquivalenceUtil<Arc> Util; - typedef typename Util::MappedId MappedId; - enum { FST1 = 1, FST2 = 2 }; // Required by Util::MapState(...) - - MappedId s1 = Util::MapState(fst1.Start(), FST1); - MappedId s2 = Util::MapState(fst2.Start(), FST2); - - // The union-find structure. - UnionFind<MappedId> eq_classes(1000, Util::kInvalidId); - - // Initialize the union-find structure. - eq_classes.MakeSet(s1); - eq_classes.MakeSet(s2); - - // Data structure for the (partial) acceptor transition function of - // fst1 and fst2: input labels mapped to pairs of MappedId's - // representing destination states of the corresponding arcs in fst1 - // and fst2, respectively. - typedef - unordered_map<typename Arc::Label, pair<MappedId, MappedId> > - Label2StatePairMap; - - Label2StatePairMap arc_pairs; - - // Pairs of MappedId's to be processed, organized in a queue. - deque<pair<MappedId, MappedId> > q; - - bool ret = true; - // Early return if the start states differ w.r.t. being final. - if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) { - ret = false; - } - - // Main loop: explores the two acceptors in a breadth-first manner, - // updating the equivalence relation on the statesets. Loop - // invariant: each block of states contains either final states only - // or non-final states only. - for (q.push_back(make_pair(s1, s2)); ret && !q.empty(); q.pop_front()) { - s1 = q.front().first; - s2 = q.front().second; - - // Representatives of the equivalence classes of s1/s2. - MappedId rep1 = Util::FindSet(&eq_classes, s1); - MappedId rep2 = Util::FindSet(&eq_classes, s2); - - if (rep1 != rep2) { - eq_classes.Union(rep1, rep2); - arc_pairs.clear(); - - // Copy outgoing arcs starting at s1 into the hashtable. - if (Util::kDeadState != s1) { - ArcIterator<Fst<Arc> > arc_iter(fst1, Util::UnMapState(s1)); - for (; !arc_iter.Done(); arc_iter.Next()) { - const Arc &arc = arc_iter.Value(); - if (arc.weight != Weight::Zero()) { // Zero-weight arcs - // are treated as - // non-exisitent. - arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1); - } - } - } - // Copy outgoing arcs starting at s2 into the hashtable. - if (Util::kDeadState != s2) { - ArcIterator<Fst<Arc> > arc_iter(fst2, Util::UnMapState(s2)); - for (; !arc_iter.Done(); arc_iter.Next()) { - const Arc &arc = arc_iter.Value(); - if (arc.weight != Weight::Zero()) { // Zero-weight arcs - // are treated as - // non-existent. - arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2); - } - } - } - // Iterate through the hashtable and process pairs of target - // states. - for (typename Label2StatePairMap::const_iterator - arc_iter = arc_pairs.begin(); - arc_iter != arc_pairs.end(); - ++arc_iter) { - const pair<MappedId, MappedId> &p = arc_iter->second; - if (Util::IsFinal(fst1, p.first) != Util::IsFinal(fst2, p.second)) { - // Detected inconsistency: return false. - ret = false; - break; - } - q.push_back(p); - } - } - } - - if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) { - if (error) *error = true; - return false; - } - - return ret; -} - -} // namespace fst - -#endif // FST_LIB_EQUIVALENT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/expanded-fst.h b/kaldi_io/src/tools/openfst/include/fst/expanded-fst.h deleted file mode 100644 index 676ceb3..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/expanded-fst.h +++ /dev/null @@ -1,189 +0,0 @@ -// expanded-fst.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Generic FST augmented with state count - interface class definition. -// - -#ifndef FST_LIB_EXPANDED_FST_H__ -#define FST_LIB_EXPANDED_FST_H__ - -#include <sys/types.h> -#include <string> - -#include <fst/fst.h> - - -namespace fst { - -// A generic FST plus state count. -template <class A> -class ExpandedFst : public Fst<A> { - public: - typedef A Arc; - typedef typename A::StateId StateId; - - virtual StateId NumStates() const = 0; // State count - - // Get a copy of this ExpandedFst. See Fst<>::Copy() for further doc. - virtual ExpandedFst<A> *Copy(bool safe = false) const = 0; - - // Read an ExpandedFst from an input stream; return NULL on error. - static ExpandedFst<A> *Read(istream &strm, const FstReadOptions &opts) { - FstReadOptions ropts(opts); - FstHeader hdr; - if (ropts.header) - hdr = *opts.header; - else { - if (!hdr.Read(strm, opts.source)) - return 0; - ropts.header = &hdr; - } - if (!(hdr.Properties() & kExpanded)) { - LOG(ERROR) << "ExpandedFst::Read: Not an ExpandedFst: " << ropts.source; - return 0; - } - FstRegister<A> *registr = FstRegister<A>::GetRegister(); - const typename FstRegister<A>::Reader reader = - registr->GetReader(hdr.FstType()); - if (!reader) { - LOG(ERROR) << "ExpandedFst::Read: Unknown FST type \"" << hdr.FstType() - << "\" (arc type = \"" << A::Type() - << "\"): " << ropts.source; - return 0; - } - Fst<A> *fst = reader(strm, ropts); - if (!fst) return 0; - return static_cast<ExpandedFst<A> *>(fst); - } - - // Read an ExpandedFst from a file; return NULL on error. - // Empty filename reads from standard input. - static ExpandedFst<A> *Read(const string &filename) { - if (!filename.empty()) { - ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); - if (!strm) { - LOG(ERROR) << "ExpandedFst::Read: Can't open file: " << filename; - return 0; - } - return Read(strm, FstReadOptions(filename)); - } else { - return Read(cin, FstReadOptions("standard input")); - } - } -}; - - -namespace internal { - -// ExpandedFst<A> case - abstract methods. -template <class A> inline -typename A::Weight Final(const ExpandedFst<A> &fst, typename A::StateId s) { - return fst.Final(s); -} - -template <class A> inline -ssize_t NumArcs(const ExpandedFst<A> &fst, typename A::StateId s) { - return fst.NumArcs(s); -} - -template <class A> inline -ssize_t NumInputEpsilons(const ExpandedFst<A> &fst, typename A::StateId s) { - return fst.NumInputEpsilons(s); -} - -template <class A> inline -ssize_t NumOutputEpsilons(const ExpandedFst<A> &fst, typename A::StateId s) { - return fst.NumOutputEpsilons(s); -} - -} // namespace internal - - -// A useful alias when using StdArc. -typedef ExpandedFst<StdArc> StdExpandedFst; - - -// This is a helper class template useful for attaching an ExpandedFst -// interface to its implementation, handling reference counting. It -// delegates to ImplToFst the handling of the Fst interface methods. -template < class I, class F = ExpandedFst<typename I::Arc> > -class ImplToExpandedFst : public ImplToFst<I, F> { - public: - typedef typename I::Arc Arc; - typedef typename Arc::Weight Weight; - typedef typename Arc::StateId StateId; - - using ImplToFst<I, F>::GetImpl; - - virtual StateId NumStates() const { return GetImpl()->NumStates(); } - - protected: - ImplToExpandedFst() : ImplToFst<I, F>() {} - - ImplToExpandedFst(I *impl) : ImplToFst<I, F>(impl) {} - - ImplToExpandedFst(const ImplToExpandedFst<I, F> &fst) - : ImplToFst<I, F>(fst) {} - - ImplToExpandedFst(const ImplToExpandedFst<I, F> &fst, bool safe) - : ImplToFst<I, F>(fst, safe) {} - - // Read FST implementation from a file; return NULL on error. - // Empty filename reads from standard input. - static I *Read(const string &filename) { - if (!filename.empty()) { - ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); - if (!strm) { - LOG(ERROR) << "ExpandedFst::Read: Can't open file: " << filename; - return 0; - } - return I::Read(strm, FstReadOptions(filename)); - } else { - return I::Read(cin, FstReadOptions("standard input")); - } - } - - private: - // Disallow - ImplToExpandedFst<I, F> &operator=(const ImplToExpandedFst<I, F> &fst); - - ImplToExpandedFst<I, F> &operator=(const Fst<Arc> &fst) { - FSTERROR() << "ImplToExpandedFst: Assignment operator disallowed"; - GetImpl()->SetProperties(kError, kError); - return *this; - } -}; - -// Function to return the number of states in an FST, counting them -// if necessary. -template <class Arc> -typename Arc::StateId CountStates(const Fst<Arc> &fst) { - if (fst.Properties(kExpanded, false)) { - const ExpandedFst<Arc> *efst = static_cast<const ExpandedFst<Arc> *>(&fst); - return efst->NumStates(); - } else { - typename Arc::StateId nstates = 0; - for (StateIterator< Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) - ++nstates; - return nstates; - } -} - -} // namespace fst - -#endif // FST_LIB_EXPANDED_FST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/expectation-weight.h b/kaldi_io/src/tools/openfst/include/fst/expectation-weight.h deleted file mode 100644 index 5226cad..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/expectation-weight.h +++ /dev/null @@ -1,142 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Kasturi Rangan Raghavan) -// Inspiration: [email protected] (Masha Maria Shugrina) -// \file -// Expectation semiring as described by Jason Eisner: -// See: doi=10.1.1.22.9398 -// Multiplex semiring operations and identities: -// One: <One, Zero> -// Zero: <Zero, Zero> -// Plus: <a1, b1> + <a2, b2> = < (a1 + a2) , (b1 + b2) > -// Times: <a1, b1> * <a2, b2> = < (a1 * a2) , [(a1 * b2) + (a2 * b1)] > -// Division: Undefined (currently) -// -// Usually used to store the pair <probability, random_variable> so that -// ShortestDistance[Fst<ArcTpl<ExpectationWeight<P, V> > >] -// == < PosteriorProbability, Expected_Value[V] > - -#ifndef FST_LIB_EXPECTATION_WEIGHT_H_ -#define FST_LIB_EXPECTATION_WEIGHT_H_ - -#include<string> - -#include <fst/pair-weight.h> - - -namespace fst { - -// X1 is usually a probability weight like LogWeight -// X2 is usually a random variable or vector -// see SignedLogWeight or SparsePowerWeight -// -// If X1 is distinct from X2, it is required that there is an external -// product between X1 and X2 and if both semriring are commutative, or -// left or right semirings, then result must have those properties. -template <class X1, class X2> -class ExpectationWeight : public PairWeight<X1, X2> { - public: - using PairWeight<X1, X2>::Value1; - using PairWeight<X1, X2>::Value2; - - using PairWeight<X1, X2>::Reverse; - using PairWeight<X1, X2>::Quantize; - using PairWeight<X1, X2>::Member; - - typedef X1 W1; - typedef X2 W2; - - typedef ExpectationWeight<typename X1::ReverseWeight, - typename X2::ReverseWeight> ReverseWeight; - - ExpectationWeight() : PairWeight<X1, X2>(Zero()) { } - - ExpectationWeight(const ExpectationWeight<X1, X2>& w) - : PairWeight<X1, X2> (w) { } - - ExpectationWeight(const PairWeight<X1, X2>& w) - : PairWeight<X1, X2> (w) { } - - ExpectationWeight(const X1& x1, const X2& x2) - : PairWeight<X1, X2>(x1, x2) { } - - static const ExpectationWeight<X1, X2> &Zero() { - static const ExpectationWeight<X1, X2> zero(X1::Zero(), X2::Zero()); - return zero; - } - - static const ExpectationWeight<X1, X2> &One() { - static const ExpectationWeight<X1, X2> one(X1::One(), X2::Zero()); - return one; - } - - static const ExpectationWeight<X1, X2> &NoWeight() { - static const ExpectationWeight<X1, X2> no_weight(X1::NoWeight(), - X2::NoWeight()); - return no_weight; - } - - static const string &Type() { - static const string type = "expectation_" + X1::Type() + "_" + X2::Type(); - return type; - } - - PairWeight<X1, X2> Quantize(float delta = kDelta) const { - return PairWeight<X1, X2>::Quantize(); - } - - ReverseWeight Reverse() const { - return PairWeight<X1, X2>::Reverse(); - } - - bool Member() const { - return PairWeight<X1, X2>::Member(); - } - - static uint64 Properties() { - uint64 props1 = W1::Properties(); - uint64 props2 = W2::Properties(); - return props1 & props2 & (kLeftSemiring | kRightSemiring | - kCommutative | kIdempotent); - } -}; - -template <class X1, class X2> -inline ExpectationWeight<X1, X2> Plus(const ExpectationWeight<X1, X2> &w, - const ExpectationWeight<X1, X2> &v) { - return ExpectationWeight<X1, X2>(Plus(w.Value1(), v.Value1()), - Plus(w.Value2(), v.Value2())); -} - - -template <class X1, class X2> -inline ExpectationWeight<X1, X2> Times(const ExpectationWeight<X1, X2> &w, - const ExpectationWeight<X1, X2> &v) { - return ExpectationWeight<X1, X2>(Times(w.Value1(), v.Value1()), - Plus(Times(w.Value1(), v.Value2()), - Times(w.Value2(), v.Value1()))); -} - -template <class X1, class X2> -inline ExpectationWeight<X1, X2> Divide(const ExpectationWeight<X1, X2> &w, - const ExpectationWeight<X1, X2> &v, - DivideType typ = DIVIDE_ANY) { - FSTERROR() << "ExpectationWeight::Divide: not implemented"; - return ExpectationWeight<X1, X2>::NoWeight(); -} - -} // namespace fst - -#endif // FST_LIB_EXPECTATION_WEIGHT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/compile-strings.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/compile-strings.h deleted file mode 100644 index ca247db..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/extensions/far/compile-strings.h +++ /dev/null @@ -1,304 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Authors: [email protected] (Cyril Allauzen) -// [email protected] (Terry Tai) -// [email protected] (Jake Ratkiewicz) - - -#ifndef FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_ -#define FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_ - -#include <libgen.h> -#include <string> -#include <vector> -using std::vector; - -#include <fst/extensions/far/far.h> -#include <fst/string.h> - -namespace fst { - -// Construct a reader that provides FSTs from a file (stream) either on a -// line-by-line basis or on a per-stream basis. Note that the freshly -// constructed reader is already set to the first input. -// -// Sample Usage: -// for (StringReader<Arc> reader(...); !reader.Done(); reader.Next()) { -// Fst *fst = reader.GetVectorFst(); -// } -template <class A> -class StringReader { - public: - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename StringCompiler<A>::TokenType TokenType; - - enum EntryType { LINE = 1, FILE = 2 }; - - StringReader(istream &istrm, - const string &source, - EntryType entry_type, - TokenType token_type, - bool allow_negative_labels, - const SymbolTable *syms = 0, - Label unknown_label = kNoStateId) - : nline_(0), strm_(istrm), source_(source), entry_type_(entry_type), - token_type_(token_type), symbols_(syms), done_(false), - compiler_(token_type, syms, unknown_label, allow_negative_labels) { - Next(); // Initialize the reader to the first input. - } - - bool Done() { - return done_; - } - - void Next() { - VLOG(1) << "Processing source " << source_ << " at line " << nline_; - if (!strm_) { // We're done if we have no more input. - done_ = true; - return; - } - if (entry_type_ == LINE) { - getline(strm_, content_); - ++nline_; - } else { - content_.clear(); - string line; - while (getline(strm_, line)) { - ++nline_; - content_.append(line); - content_.append("\n"); - } - } - if (!strm_ && content_.empty()) // We're also done if we read off all the - done_ = true; // whitespace at the end of a file. - } - - VectorFst<A> *GetVectorFst(bool keep_symbols = false) { - VectorFst<A> *fst = new VectorFst<A>; - if (keep_symbols) { - fst->SetInputSymbols(symbols_); - fst->SetOutputSymbols(symbols_); - } - if (compiler_(content_, fst)) { - return fst; - } else { - delete fst; - return NULL; - } - } - - CompactFst<A, StringCompactor<A> > *GetCompactFst(bool keep_symbols = false) { - CompactFst<A, StringCompactor<A> > *fst; - if (keep_symbols) { - VectorFst<A> tmp; - tmp.SetInputSymbols(symbols_); - tmp.SetOutputSymbols(symbols_); - fst = new CompactFst<A, StringCompactor<A> >(tmp); - } else { - fst = new CompactFst<A, StringCompactor<A> >; - } - if (compiler_(content_, fst)) { - return fst; - } else { - delete fst; - return NULL; - } - } - - private: - size_t nline_; - istream &strm_; - string source_; - EntryType entry_type_; - TokenType token_type_; - const SymbolTable *symbols_; - bool done_; - StringCompiler<A> compiler_; - string content_; // The actual content of the input stream's next FST. - - DISALLOW_COPY_AND_ASSIGN(StringReader); -}; - -// Compute the minimal length required to encode each line number as a decimal -// number. -int KeySize(const char *filename); - -template <class Arc> -void FarCompileStrings(const vector<string> &in_fnames, - const string &out_fname, - const string &fst_type, - const FarType &far_type, - int32 generate_keys, - FarEntryType fet, - FarTokenType tt, - const string &symbols_fname, - const string &unknown_symbol, - bool keep_symbols, - bool initial_symbols, - bool allow_negative_labels, - bool file_list_input, - const string &key_prefix, - const string &key_suffix) { - typename StringReader<Arc>::EntryType entry_type; - if (fet == FET_LINE) { - entry_type = StringReader<Arc>::LINE; - } else if (fet == FET_FILE) { - entry_type = StringReader<Arc>::FILE; - } else { - FSTERROR() << "FarCompileStrings: unknown entry type"; - return; - } - - typename StringCompiler<Arc>::TokenType token_type; - if (tt == FTT_SYMBOL) { - token_type = StringCompiler<Arc>::SYMBOL; - } else if (tt == FTT_BYTE) { - token_type = StringCompiler<Arc>::BYTE; - } else if (tt == FTT_UTF8) { - token_type = StringCompiler<Arc>::UTF8; - } else { - FSTERROR() << "FarCompileStrings: unknown token type"; - return; - } - - bool compact; - if (fst_type.empty() || (fst_type == "vector")) { - compact = false; - } else if (fst_type == "compact") { - compact = true; - } else { - FSTERROR() << "FarCompileStrings: unknown fst type: " - << fst_type; - return; - } - - const SymbolTable *syms = 0; - typename Arc::Label unknown_label = kNoLabel; - if (!symbols_fname.empty()) { - SymbolTableTextOptions opts; - opts.allow_negative = allow_negative_labels; - syms = SymbolTable::ReadText(symbols_fname, opts); - if (!syms) { - FSTERROR() << "FarCompileStrings: error reading symbol table: " - << symbols_fname; - return; - } - if (!unknown_symbol.empty()) { - unknown_label = syms->Find(unknown_symbol); - if (unknown_label == kNoLabel) { - FSTERROR() << "FarCompileStrings: unknown label \"" << unknown_label - << "\" missing from symbol table: " << symbols_fname; - return; - } - } - } - - FarWriter<Arc> *far_writer = - FarWriter<Arc>::Create(out_fname, far_type); - if (!far_writer) return; - - vector<string> inputs; - if (file_list_input) { - for (int i = 1; i < in_fnames.size(); ++i) { - istream *istrm = in_fnames.empty() ? &cin : - new ifstream(in_fnames[i].c_str()); - string str; - while (getline(*istrm, str)) - inputs.push_back(str); - if (!in_fnames.empty()) - delete istrm; - } - } else { - inputs = in_fnames; - } - - for (int i = 0, n = 0; i < inputs.size(); ++i) { - if (generate_keys == 0 && inputs[i].empty()) { - FSTERROR() << "FarCompileStrings: read from a file instead of stdin or" - << " set the --generate_keys flags."; - delete far_writer; - delete syms; - return; - } - int key_size = generate_keys ? generate_keys : - (entry_type == StringReader<Arc>::FILE ? 1 : - KeySize(inputs[i].c_str())); - istream *istrm = inputs[i].empty() ? &cin : - new ifstream(inputs[i].c_str()); - - bool keep_syms = keep_symbols; - for (StringReader<Arc> reader( - *istrm, inputs[i].empty() ? "stdin" : inputs[i], - entry_type, token_type, allow_negative_labels, - syms, unknown_label); - !reader.Done(); - reader.Next()) { - ++n; - const Fst<Arc> *fst; - if (compact) - fst = reader.GetCompactFst(keep_syms); - else - fst = reader.GetVectorFst(keep_syms); - if (initial_symbols) - keep_syms = false; - if (!fst) { - FSTERROR() << "FarCompileStrings: compiling string number " << n - << " in file " << inputs[i] << " failed with token_type = " - << (tt == FTT_BYTE ? "byte" : - (tt == FTT_UTF8 ? "utf8" : - (tt == FTT_SYMBOL ? "symbol" : "unknown"))) - << " and entry_type = " - << (fet == FET_LINE ? "line" : - (fet == FET_FILE ? "file" : "unknown")); - delete far_writer; - delete syms; - if (!inputs[i].empty()) delete istrm; - return; - } - ostringstream keybuf; - keybuf.width(key_size); - keybuf.fill('0'); - keybuf << n; - string key; - if (generate_keys > 0) { - key = keybuf.str(); - } else { - char* filename = new char[inputs[i].size() + 1]; - strcpy(filename, inputs[i].c_str()); - key = basename(filename); - if (entry_type != StringReader<Arc>::FILE) { - key += "-"; - key += keybuf.str(); - } - delete[] filename; - } - far_writer->Add(key_prefix + key + key_suffix, *fst); - delete fst; - } - if (generate_keys == 0) - n = 0; - if (!inputs[i].empty()) - delete istrm; - } - - delete far_writer; -} - -} // namespace fst - - -#endif // FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/create.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/create.h deleted file mode 100644 index edb31e7..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/extensions/far/create.h +++ /dev/null @@ -1,87 +0,0 @@ -// create-main.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// Modified: [email protected] (Jake Ratkiewicz) to use new dispatch -// -// \file -// Creates a finite-state archive from component FSTs. Includes -// helper function for farcreate.cc that templates the main on the arc -// type to support multiple and extensible arc types. -// - -#ifndef FST_EXTENSIONS_FAR_CREATE_H__ -#define FST_EXTENSIONS_FAR_CREATE_H__ - -#include <libgen.h> -#include <string> -#include <vector> -using std::vector; - -#include <fst/extensions/far/far.h> - -namespace fst { - -template <class Arc> -void FarCreate(const vector<string> &in_fnames, - const string &out_fname, - const int32 generate_keys, - const bool file_list_input, - const FarType &far_type, - const string &key_prefix, - const string &key_suffix) { - FarWriter<Arc> *far_writer = - FarWriter<Arc>::Create(out_fname, far_type); - if (!far_writer) return; - - vector<string> inputs; - if (file_list_input) { - for (int i = 1; i < in_fnames.size(); ++i) { - ifstream istrm(in_fnames[i].c_str()); - string str; - while (getline(istrm, str)) - inputs.push_back(str); - } - } else { - inputs = in_fnames; - } - - for (int i = 0; i < inputs.size(); ++i) { - Fst<Arc> *ifst = Fst<Arc>::Read(inputs[i]); - if (!ifst) return; - string key; - if (generate_keys > 0) { - ostringstream keybuf; - keybuf.width(generate_keys); - keybuf.fill('0'); - keybuf << i + 1; - key = keybuf.str(); - } else { - char* filename = new char[inputs[i].size() + 1]; - strcpy(filename, inputs[i].c_str()); - key = basename(filename); - delete[] filename; - } - - far_writer->Add(key_prefix + key + key_suffix, *ifst); - delete ifst; - } - - delete far_writer; -} - -} // namespace fst - -#endif // FST_EXTENSIONS_FAR_CREATE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/equal.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/equal.h deleted file mode 100644 index be82e2d..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/extensions/far/equal.h +++ /dev/null @@ -1,99 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) - -#ifndef FST_EXTENSIONS_FAR_EQUAL_H_ -#define FST_EXTENSIONS_FAR_EQUAL_H_ - -#include <string> - -#include <fst/extensions/far/far.h> -#include <fst/equal.h> - -namespace fst { - -template <class Arc> -bool FarEqual(const string &filename1, - const string &filename2, - float delta = kDelta, - const string &begin_key = string(), - const string &end_key = string()) { - - FarReader<Arc> *reader1 = FarReader<Arc>::Open(filename1); - FarReader<Arc> *reader2 = FarReader<Arc>::Open(filename2); - if (!reader1 || !reader2) { - delete reader1; - delete reader2; - VLOG(1) << "FarEqual: cannot open input Far file(s)"; - return false; - } - - if (!begin_key.empty()) { - bool find_begin1 = reader1->Find(begin_key); - bool find_begin2 = reader2->Find(begin_key); - if (!find_begin1 || !find_begin2) { - bool ret = !find_begin1 && !find_begin2; - if (!ret) { - VLOG(1) << "FarEqual: key \"" << begin_key << "\" missing from " - << (find_begin1 ? "second" : "first") << " archive."; - } - delete reader1; - delete reader2; - return ret; - } - } - - for(; !reader1->Done() && !reader2->Done(); - reader1->Next(), reader2->Next()) { - const string key1 = reader1->GetKey(); - const string key2 = reader2->GetKey(); - if (!end_key.empty() && end_key < key1 && end_key < key2) { - delete reader1; - delete reader2; - return true; - } - if (key1 != key2) { - VLOG(1) << "FarEqual: mismatched keys \"" - << key1 << "\" <> \"" << key2 << "\"."; - delete reader1; - delete reader2; - return false; - } - if (!Equal(reader1->GetFst(), reader2->GetFst(), delta)) { - VLOG(1) << "FarEqual: Fsts for key \"" << key1 << "\" are not equal."; - delete reader1; - delete reader2; - return false; - } - } - - if (!reader1->Done() || !reader2->Done()) { - VLOG(1) << "FarEqual: key \"" - << (reader1->Done() ? reader2->GetKey() : reader1->GetKey()) - << "\" missing form " << (reader2->Done() ? "first" : "second") - << " archive."; - delete reader1; - delete reader2; - return false; - } - - delete reader1; - delete reader2; - return true; -} - -} // namespace fst - -#endif // FST_EXTENSIONS_FAR_EQUAL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/extract.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/extract.h deleted file mode 100644 index 95866de..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/extensions/far/extract.h +++ /dev/null @@ -1,140 +0,0 @@ -// extract-main.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// Modified: [email protected] (Jake Ratkiewicz) to use the new arc-dispatch - -// \file -// Extracts component FSTs from an finite-state archive. -// - -#ifndef FST_EXTENSIONS_FAR_EXTRACT_H__ -#define FST_EXTENSIONS_FAR_EXTRACT_H__ - -#include <string> -#include <vector> -using std::vector; - -#include <fst/extensions/far/far.h> - -namespace fst { - -template<class Arc> -inline void FarWriteFst(const Fst<Arc>* fst, string key, - string* okey, int* nrep, - const int32 &generate_filenames, int i, - const string &filename_prefix, - const string &filename_suffix) { - if (key == *okey) - ++*nrep; - else - *nrep = 0; - - *okey = key; - - string ofilename; - if (generate_filenames) { - ostringstream tmp; - tmp.width(generate_filenames); - tmp.fill('0'); - tmp << i; - ofilename = tmp.str(); - } else { - if (*nrep > 0) { - ostringstream tmp; - tmp << '.' << nrep; - key.append(tmp.str().data(), tmp.str().size()); - } - ofilename = key; - } - fst->Write(filename_prefix + ofilename + filename_suffix); -} - -template<class Arc> -void FarExtract(const vector<string> &ifilenames, - const int32 &generate_filenames, - const string &keys, - const string &key_separator, - const string &range_delimiter, - const string &filename_prefix, - const string &filename_suffix) { - FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames); - if (!far_reader) return; - - string okey; - int nrep = 0; - - vector<char *> key_vector; - // User has specified a set of fsts to extract, where some of the "fsts" could - // be ranges. - if (!keys.empty()) { - char *keys_cstr = new char[keys.size()+1]; - strcpy(keys_cstr, keys.c_str()); - SplitToVector(keys_cstr, key_separator.c_str(), &key_vector, true); - int i = 0; - for (int k = 0; k < key_vector.size(); ++k, ++i) { - string key = string(key_vector[k]); - char *key_cstr = new char[key.size()+1]; - strcpy(key_cstr, key.c_str()); - vector<char *> range_vector; - SplitToVector(key_cstr, range_delimiter.c_str(), &range_vector, false); - if (range_vector.size() == 1) { // Not a range - if (!far_reader->Find(key)) { - LOG(ERROR) << "FarExtract: Cannot find key: " << key; - return; - } - const Fst<Arc> &fst = far_reader->GetFst(); - FarWriteFst(&fst, key, &okey, &nrep, generate_filenames, i, - filename_prefix, filename_suffix); - } else if (range_vector.size() == 2) { // A legal range - string begin_key = string(range_vector[0]); - string end_key = string(range_vector[1]); - if (begin_key.empty() || end_key.empty()) { - LOG(ERROR) << "FarExtract: Illegal range specification: " << key; - return; - } - if (!far_reader->Find(begin_key)) { - LOG(ERROR) << "FarExtract: Cannot find key: " << begin_key; - return; - } - for ( ; !far_reader->Done(); far_reader->Next(), ++i) { - string ikey = far_reader->GetKey(); - if (end_key < ikey) break; - const Fst<Arc> &fst = far_reader->GetFst(); - FarWriteFst(&fst, ikey, &okey, &nrep, generate_filenames, i, - filename_prefix, filename_suffix); - } - } else { - LOG(ERROR) << "FarExtract: Illegal range specification: " << key; - return; - } - delete key_cstr; - } - delete keys_cstr; - return; - } - // Nothing specified: extract everything. - for (int i = 1; !far_reader->Done(); far_reader->Next(), ++i) { - string key = far_reader->GetKey(); - const Fst<Arc> &fst = far_reader->GetFst(); - FarWriteFst(&fst, key, &okey, &nrep, generate_filenames, i, - filename_prefix, filename_suffix); - } - return; -} - -} // namespace fst - -#endif // FST_EXTENSIONS_FAR_EXTRACT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/far.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/far.h deleted file mode 100644 index acce76e..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/extensions/far/far.h +++ /dev/null @@ -1,532 +0,0 @@ -// far.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Finite-State Transducer (FST) archive classes. -// - -#ifndef FST_EXTENSIONS_FAR_FAR_H__ -#define FST_EXTENSIONS_FAR_FAR_H__ - -#include <fst/extensions/far/stlist.h> -#include <fst/extensions/far/sttable.h> -#include <fst/fst.h> -#include <fst/vector-fst.h> - -namespace fst { - -enum FarEntryType { FET_LINE, FET_FILE }; -enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 }; - -inline bool IsFst(const string &filename) { - ifstream strm(filename.c_str()); - if (!strm) - return false; - return IsFstHeader(strm, filename); -} - -// FST archive header class -class FarHeader { - public: - const string &FarType() const { return fartype_; } - const string &ArcType() const { return arctype_; } - - bool Read(const string &filename) { - FstHeader fsthdr; - if (filename.empty()) { - // Header reading unsupported on stdin. Assumes STList and StdArc. - fartype_ = "stlist"; - arctype_ = "standard"; - return true; - } else if (IsSTTable(filename)) { // Check if STTable - ReadSTTableHeader(filename, &fsthdr); - fartype_ = "sttable"; - arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); - return true; - } else if (IsSTList(filename)) { // Check if STList - ReadSTListHeader(filename, &fsthdr); - fartype_ = "sttable"; - arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); - return true; - } else if (IsFst(filename)) { // Check if Fst - ifstream istrm(filename.c_str()); - fsthdr.Read(istrm, filename); - fartype_ = "fst"; - arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); - return true; - } - return false; - } - - private: - string fartype_; - string arctype_; -}; - -enum FarType { - FAR_DEFAULT = 0, - FAR_STTABLE = 1, - FAR_STLIST = 2, - FAR_FST = 3, -}; - -// This class creates an archive of FSTs. -template <class A> -class FarWriter { - public: - typedef A Arc; - - // Creates a new (empty) FST archive; returns NULL on error. - static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT); - - // Adds an FST to the end of an archive. Keys must be non-empty and - // in lexicographic order. FSTs must have a suitable write method. - virtual void Add(const string &key, const Fst<A> &fst) = 0; - - virtual FarType Type() const = 0; - - virtual bool Error() const = 0; - - virtual ~FarWriter() {} - - protected: - FarWriter() {} - - private: - DISALLOW_COPY_AND_ASSIGN(FarWriter); -}; - - -// This class iterates through an existing archive of FSTs. -template <class A> -class FarReader { - public: - typedef A Arc; - - // Opens an existing FST archive in a single file; returns NULL on error. - // Sets current position to the beginning of the achive. - static FarReader *Open(const string &filename); - - // Opens an existing FST archive in multiple files; returns NULL on error. - // Sets current position to the beginning of the achive. - static FarReader *Open(const vector<string> &filenames); - - // Resets current posision to beginning of archive. - virtual void Reset() = 0; - - // Sets current position to first entry >= key. Returns true if a match. - virtual bool Find(const string &key) = 0; - - // Current position at end of archive? - virtual bool Done() const = 0; - - // Move current position to next FST. - virtual void Next() = 0; - - // Returns key at the current position. This reference is invalidated if - // the current position in the archive is changed. - virtual const string &GetKey() const = 0; - - // Returns FST at the current position. This reference is invalidated if - // the current position in the archive is changed. - virtual const Fst<A> &GetFst() const = 0; - - virtual FarType Type() const = 0; - - virtual bool Error() const = 0; - - virtual ~FarReader() {} - - protected: - FarReader() {} - - private: - DISALLOW_COPY_AND_ASSIGN(FarReader); -}; - - -template <class A> -class FstWriter { - public: - void operator()(ostream &strm, const Fst<A> &fst) const { - fst.Write(strm, FstWriteOptions()); - } -}; - - -template <class A> -class STTableFarWriter : public FarWriter<A> { - public: - typedef A Arc; - - static STTableFarWriter *Create(const string &filename) { - STTableWriter<Fst<A>, FstWriter<A> > *writer = - STTableWriter<Fst<A>, FstWriter<A> >::Create(filename); - return new STTableFarWriter(writer); - } - - void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); } - - FarType Type() const { return FAR_STTABLE; } - - bool Error() const { return writer_->Error(); } - - ~STTableFarWriter() { delete writer_; } - - private: - explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer) - : writer_(writer) {} - - private: - STTableWriter<Fst<A>, FstWriter<A> > *writer_; - - DISALLOW_COPY_AND_ASSIGN(STTableFarWriter); -}; - - -template <class A> -class STListFarWriter : public FarWriter<A> { - public: - typedef A Arc; - - static STListFarWriter *Create(const string &filename) { - STListWriter<Fst<A>, FstWriter<A> > *writer = - STListWriter<Fst<A>, FstWriter<A> >::Create(filename); - return new STListFarWriter(writer); - } - - void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); } - - FarType Type() const { return FAR_STLIST; } - - bool Error() const { return writer_->Error(); } - - ~STListFarWriter() { delete writer_; } - - private: - explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer) - : writer_(writer) {} - - private: - STListWriter<Fst<A>, FstWriter<A> > *writer_; - - DISALLOW_COPY_AND_ASSIGN(STListFarWriter); -}; - - -template <class A> -class FstFarWriter : public FarWriter<A> { - public: - typedef A Arc; - - explicit FstFarWriter(const string &filename) - : filename_(filename), error_(false), written_(false) {} - - static FstFarWriter *Create(const string &filename) { - return new FstFarWriter(filename); - } - - void Add(const string &key, const Fst<A> &fst) { - if (written_) { - LOG(WARNING) << "FstFarWriter::Add: only one Fst supported," - << " subsequent entries discarded."; - } else { - error_ = !fst.Write(filename_); - written_ = true; - } - } - - FarType Type() const { return FAR_FST; } - - bool Error() const { return error_; } - - ~FstFarWriter() {} - - private: - string filename_; - bool error_; - bool written_; - - DISALLOW_COPY_AND_ASSIGN(FstFarWriter); -}; - - -template <class A> -FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) { - switch(type) { - case FAR_DEFAULT: - if (filename.empty()) - return STListFarWriter<A>::Create(filename); - case FAR_STTABLE: - return STTableFarWriter<A>::Create(filename); - break; - case FAR_STLIST: - return STListFarWriter<A>::Create(filename); - break; - case FAR_FST: - return FstFarWriter<A>::Create(filename); - break; - default: - LOG(ERROR) << "FarWriter::Create: unknown far type"; - return 0; - } -} - - -template <class A> -class FstReader { - public: - Fst<A> *operator()(istream &strm) const { - return Fst<A>::Read(strm, FstReadOptions()); - } -}; - - -template <class A> -class STTableFarReader : public FarReader<A> { - public: - typedef A Arc; - - static STTableFarReader *Open(const string &filename) { - STTableReader<Fst<A>, FstReader<A> > *reader = - STTableReader<Fst<A>, FstReader<A> >::Open(filename); - // TODO: error check - return new STTableFarReader(reader); - } - - static STTableFarReader *Open(const vector<string> &filenames) { - STTableReader<Fst<A>, FstReader<A> > *reader = - STTableReader<Fst<A>, FstReader<A> >::Open(filenames); - // TODO: error check - return new STTableFarReader(reader); - } - - void Reset() { reader_->Reset(); } - - bool Find(const string &key) { return reader_->Find(key); } - - bool Done() const { return reader_->Done(); } - - void Next() { return reader_->Next(); } - - const string &GetKey() const { return reader_->GetKey(); } - - const Fst<A> &GetFst() const { return reader_->GetEntry(); } - - FarType Type() const { return FAR_STTABLE; } - - bool Error() const { return reader_->Error(); } - - ~STTableFarReader() { delete reader_; } - - private: - explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader) - : reader_(reader) {} - - private: - STTableReader<Fst<A>, FstReader<A> > *reader_; - - DISALLOW_COPY_AND_ASSIGN(STTableFarReader); -}; - - -template <class A> -class STListFarReader : public FarReader<A> { - public: - typedef A Arc; - - static STListFarReader *Open(const string &filename) { - STListReader<Fst<A>, FstReader<A> > *reader = - STListReader<Fst<A>, FstReader<A> >::Open(filename); - // TODO: error check - return new STListFarReader(reader); - } - - static STListFarReader *Open(const vector<string> &filenames) { - STListReader<Fst<A>, FstReader<A> > *reader = - STListReader<Fst<A>, FstReader<A> >::Open(filenames); - // TODO: error check - return new STListFarReader(reader); - } - - void Reset() { reader_->Reset(); } - - bool Find(const string &key) { return reader_->Find(key); } - - bool Done() const { return reader_->Done(); } - - void Next() { return reader_->Next(); } - - const string &GetKey() const { return reader_->GetKey(); } - - const Fst<A> &GetFst() const { return reader_->GetEntry(); } - - FarType Type() const { return FAR_STLIST; } - - bool Error() const { return reader_->Error(); } - - ~STListFarReader() { delete reader_; } - - private: - explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader) - : reader_(reader) {} - - private: - STListReader<Fst<A>, FstReader<A> > *reader_; - - DISALLOW_COPY_AND_ASSIGN(STListFarReader); -}; - -template <class A> -class FstFarReader : public FarReader<A> { - public: - typedef A Arc; - - static FstFarReader *Open(const string &filename) { - vector<string> filenames; - filenames.push_back(filename); - return new FstFarReader<A>(filenames); - } - - static FstFarReader *Open(const vector<string> &filenames) { - return new FstFarReader<A>(filenames); - } - - FstFarReader(const vector<string> &filenames) - : keys_(filenames), has_stdin_(false), pos_(0), fst_(0), error_(false) { - sort(keys_.begin(), keys_.end()); - streams_.resize(keys_.size(), 0); - for (size_t i = 0; i < keys_.size(); ++i) { - if (keys_[i].empty()) { - if (!has_stdin_) { - streams_[i] = &cin; - //sources_[i] = "stdin"; - has_stdin_ = true; - } else { - FSTERROR() << "FstFarReader::FstFarReader: stdin should only " - << "appear once in the input file list."; - error_ = true; - return; - } - } else { - streams_[i] = new ifstream( - keys_[i].c_str(), ifstream::in | ifstream::binary); - } - } - if (pos_ >= keys_.size()) return; - ReadFst(); - } - - void Reset() { - if (has_stdin_) { - FSTERROR() << "FstFarReader::Reset: operation not supported on stdin"; - error_ = true; - return; - } - pos_ = 0; - ReadFst(); - } - - bool Find(const string &key) { - if (has_stdin_) { - FSTERROR() << "FstFarReader::Find: operation not supported on stdin"; - error_ = true; - return false; - } - pos_ = 0;//TODO - ReadFst(); - return true; - } - - bool Done() const { return error_ || pos_ >= keys_.size(); } - - void Next() { - ++pos_; - ReadFst(); - } - - const string &GetKey() const { - return keys_[pos_]; - } - - const Fst<A> &GetFst() const { - return *fst_; - } - - FarType Type() const { return FAR_FST; } - - bool Error() const { return error_; } - - ~FstFarReader() { - if (fst_) delete fst_; - for (size_t i = 0; i < keys_.size(); ++i) - delete streams_[i]; - } - - private: - void ReadFst() { - if (fst_) delete fst_; - if (pos_ >= keys_.size()) return; - streams_[pos_]->seekg(0); - fst_ = Fst<A>::Read(*streams_[pos_], FstReadOptions()); - if (!fst_) { - FSTERROR() << "FstFarReader: error reading Fst from: " << keys_[pos_]; - error_ = true; - } - } - - private: - vector<string> keys_; - vector<istream*> streams_; - bool has_stdin_; - size_t pos_; - mutable Fst<A> *fst_; - mutable bool error_; - - DISALLOW_COPY_AND_ASSIGN(FstFarReader); -}; - -template <class A> -FarReader<A> *FarReader<A>::Open(const string &filename) { - if (filename.empty()) - return STListFarReader<A>::Open(filename); - else if (IsSTTable(filename)) - return STTableFarReader<A>::Open(filename); - else if (IsSTList(filename)) - return STListFarReader<A>::Open(filename); - else if (IsFst(filename)) - return FstFarReader<A>::Open(filename); - return 0; -} - - -template <class A> -FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) { - if (!filenames.empty() && filenames[0].empty()) - return STListFarReader<A>::Open(filenames); - else if (!filenames.empty() && IsSTTable(filenames[0])) - return STTableFarReader<A>::Open(filenames); - else if (!filenames.empty() && IsSTList(filenames[0])) - return STListFarReader<A>::Open(filenames); - else if (!filenames.empty() && IsFst(filenames[0])) - return FstFarReader<A>::Open(filenames); - return 0; -} - -} // namespace fst - -#endif // FST_EXTENSIONS_FAR_FAR_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/farlib.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/farlib.h deleted file mode 100644 index 91ba224..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/extensions/far/farlib.h +++ /dev/null @@ -1,31 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -// A finite-state archive (FAR) is used to store an indexable collection of -// FSTs in a single file. Utilities are provided to create FARs from FSTs, -// to iterate over FARs, and to extract specific FSTs from FARs. - -#ifndef FST_EXTENSIONS_FAR_FARLIB_H_ -#define FST_EXTENSIONS_FAR_FARLIB_H_ - -#include <fst/extensions/far/far.h> -#include <fst/extensions/far/compile-strings.h> -#include <fst/extensions/far/create.h> -#include <fst/extensions/far/extract.h> -#include <fst/extensions/far/info.h> -#include <fst/extensions/far/print-strings.h> - -#endif // FST_EXTENSIONS_FAR_FARLIB_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/farscript.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/farscript.h deleted file mode 100644 index cfd9167..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/extensions/far/farscript.h +++ /dev/null @@ -1,273 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -// Convenience file for including all of the FAR operations, -// or registering them for new arc types. - -#ifndef FST_EXTENSIONS_FAR_FARSCRIPT_H_ -#define FST_EXTENSIONS_FAR_FARSCRIPT_H_ - -#include <vector> -using std::vector; -#include <string> - -#include <fst/script/arg-packs.h> -#include <fst/extensions/far/compile-strings.h> -#include <fst/extensions/far/create.h> -#include <fst/extensions/far/equal.h> -#include <fst/extensions/far/extract.h> -#include <fst/extensions/far/info.h> -#include <fst/extensions/far/print-strings.h> -#include <fst/extensions/far/far.h> - -#include <fst/types.h> - -namespace fst { -namespace script { - -// Note: it is safe to pass these strings as references because -// this struct is only used to pass them deeper in the call graph. -// Be sure you understand why this is so before using this struct -// for anything else! -struct FarCompileStringsArgs { - const vector<string> &in_fnames; - const string &out_fname; - const string &fst_type; - const FarType &far_type; - const int32 generate_keys; - const FarEntryType fet; - const FarTokenType tt; - const string &symbols_fname; - const string &unknown_symbol; - const bool keep_symbols; - const bool initial_symbols; - const bool allow_negative_labels; - const bool file_list_input; - const string &key_prefix; - const string &key_suffix; - - FarCompileStringsArgs(const vector<string> &in_fnames, - const string &out_fname, - const string &fst_type, - const FarType &far_type, - int32 generate_keys, - FarEntryType fet, - FarTokenType tt, - const string &symbols_fname, - const string &unknown_symbol, - bool keep_symbols, - bool initial_symbols, - bool allow_negative_labels, - bool file_list_input, - const string &key_prefix, - const string &key_suffix) : - in_fnames(in_fnames), out_fname(out_fname), fst_type(fst_type), - far_type(far_type), generate_keys(generate_keys), fet(fet), - tt(tt), symbols_fname(symbols_fname), unknown_symbol(unknown_symbol), - keep_symbols(keep_symbols), initial_symbols(initial_symbols), - allow_negative_labels(allow_negative_labels), - file_list_input(file_list_input), key_prefix(key_prefix), - key_suffix(key_suffix) { } -}; - -template <class Arc> -void FarCompileStrings(FarCompileStringsArgs *args) { - fst::FarCompileStrings<Arc>( - args->in_fnames, args->out_fname, args->fst_type, args->far_type, - args->generate_keys, args->fet, args->tt, args->symbols_fname, - args->unknown_symbol, args->keep_symbols, args->initial_symbols, - args->allow_negative_labels, args->file_list_input, - args->key_prefix, args->key_suffix); -} - -void FarCompileStrings( - const vector<string> &in_fnames, - const string &out_fname, - const string &arc_type, - const string &fst_type, - const FarType &far_type, - int32 generate_keys, - FarEntryType fet, - FarTokenType tt, - const string &symbols_fname, - const string &unknown_symbol, - bool keep_symbols, - bool initial_symbols, - bool allow_negative_labels, - bool file_list_input, - const string &key_prefix, - const string &key_suffix); - - -// Note: it is safe to pass these strings as references because -// this struct is only used to pass them deeper in the call graph. -// Be sure you understand why this is so before using this struct -// for anything else! -struct FarCreateArgs { - const vector<string> &in_fnames; - const string &out_fname; - const int32 generate_keys; - const bool file_list_input; - const FarType &far_type; - const string &key_prefix; - const string &key_suffix; - - FarCreateArgs( - const vector<string> &in_fnames, const string &out_fname, - const int32 generate_keys, const bool file_list_input, - const FarType &far_type, const string &key_prefix, - const string &key_suffix) - : in_fnames(in_fnames), out_fname(out_fname), - generate_keys(generate_keys), file_list_input(file_list_input), - far_type(far_type), key_prefix(key_prefix), key_suffix(key_suffix) { } -}; - -template<class Arc> -void FarCreate(FarCreateArgs *args) { - fst::FarCreate<Arc>(args->in_fnames, args->out_fname, args->generate_keys, - args->file_list_input, args->far_type, - args->key_prefix, args->key_suffix); -} - -void FarCreate(const vector<string> &in_fnames, - const string &out_fname, - const string &arc_type, - const int32 generate_keys, - const bool file_list_input, - const FarType &far_type, - const string &key_prefix, - const string &key_suffix); - - -typedef args::Package<const string &, const string &, float, - const string &, const string &> FarEqualInnerArgs; -typedef args::WithReturnValue<bool, FarEqualInnerArgs> FarEqualArgs; - -template <class Arc> -void FarEqual(FarEqualArgs *args) { - args->retval = fst::FarEqual<Arc>( - args->args.arg1, args->args.arg2, args->args.arg3, - args->args.arg4, args->args.arg5); -} - -bool FarEqual(const string &filename1, - const string &filename2, - const string &arc_type, - float delta = kDelta, - const string &begin_key = string(), - const string &end_key = string()); - - -typedef args::Package<const vector<string> &, int32, - const string&, const string&, const string&, - const string&, const string&> FarExtractArgs; - -template<class Arc> -void FarExtract(FarExtractArgs *args) { - fst::FarExtract<Arc>( - args->arg1, args->arg2, args->arg3, args->arg4, args->arg5, args->arg6, - args->arg7); -} - -void FarExtract(const vector<string> &ifilenames, - const string &arc_type, - int32 generate_filenames, - const string &keys, - const string &key_separator, - const string &range_delimiter, - const string &filename_prefix, - const string &filename_suffix); - -typedef args::Package<const vector<string> &, const string &, - const string &, const bool> FarInfoArgs; - -template <class Arc> -void FarInfo(FarInfoArgs *args) { - fst::FarInfo<Arc>(args->arg1, args->arg2, args->arg3, args->arg4); -} - -void FarInfo(const vector<string> &filenames, - const string &arc_type, - const string &begin_key, - const string &end_key, - const bool list_fsts); - -struct FarPrintStringsArgs { - const vector<string> &ifilenames; - const FarEntryType entry_type; - const FarTokenType token_type; - const string &begin_key; - const string &end_key; - const bool print_key; - const bool print_weight; - const string &symbols_fname; - const bool initial_symbols; - const int32 generate_filenames; - const string &filename_prefix; - const string &filename_suffix; - - FarPrintStringsArgs( - const vector<string> &ifilenames, const FarEntryType entry_type, - const FarTokenType token_type, const string &begin_key, - const string &end_key, const bool print_key, const bool print_weight, - const string &symbols_fname, const bool initial_symbols, - const int32 generate_filenames, - const string &filename_prefix, const string &filename_suffix) : - ifilenames(ifilenames), entry_type(entry_type), token_type(token_type), - begin_key(begin_key), end_key(end_key), - print_key(print_key), print_weight(print_weight), - symbols_fname(symbols_fname), initial_symbols(initial_symbols), - generate_filenames(generate_filenames), filename_prefix(filename_prefix), - filename_suffix(filename_suffix) { } -}; - -template <class Arc> -void FarPrintStrings(FarPrintStringsArgs *args) { - fst::FarPrintStrings<Arc>( - args->ifilenames, args->entry_type, args->token_type, - args->begin_key, args->end_key, args->print_key, args->print_weight, - args->symbols_fname, args->initial_symbols, args->generate_filenames, - args->filename_prefix, args->filename_suffix); -} - - -void FarPrintStrings(const vector<string> &ifilenames, - const string &arc_type, - const FarEntryType entry_type, - const FarTokenType token_type, - const string &begin_key, - const string &end_key, - const bool print_key, - const bool print_weight, - const string &symbols_fname, - const bool initial_symbols, - const int32 generate_filenames, - const string &filename_prefix, - const string &filename_suffix); - -} // namespace script -} // namespace fst - - -#define REGISTER_FST_FAR_OPERATIONS(ArcType) \ - REGISTER_FST_OPERATION(FarCompileStrings, ArcType, FarCompileStringsArgs); \ - REGISTER_FST_OPERATION(FarCreate, ArcType, FarCreateArgs); \ - REGISTER_FST_OPERATION(FarEqual, ArcType, FarEqualArgs); \ - REGISTER_FST_OPERATION(FarExtract, ArcType, FarExtractArgs); \ - REGISTER_FST_OPERATION(FarInfo, ArcType, FarInfoArgs); \ - REGISTER_FST_OPERATION(FarPrintStrings, ArcType, FarPrintStringsArgs) - -#endif // FST_EXTENSIONS_FAR_FARSCRIPT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/info.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/info.h deleted file mode 100644 index 100fe68..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/extensions/far/info.h +++ /dev/null @@ -1,128 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// Modified: [email protected] (Jake Ratkiewicz) - -#ifndef FST_EXTENSIONS_FAR_INFO_H_ -#define FST_EXTENSIONS_FAR_INFO_H_ - -#include <iomanip> -#include <set> -#include <string> -#include <vector> -using std::vector; - -#include <fst/extensions/far/far.h> -#include <fst/extensions/far/main.h> // For FarTypeToString - -namespace fst { - -template <class Arc> -void CountStatesAndArcs(const Fst<Arc> &fst, size_t *nstate, size_t *narc) { - StateIterator<Fst<Arc> > siter(fst); - for (; !siter.Done(); siter.Next(), ++(*nstate)) { - ArcIterator<Fst<Arc> > aiter(fst, siter.Value()); - for (; !aiter.Done(); aiter.Next(), ++(*narc)) {} - } -} - -struct KeyInfo { - string key; - string type; - size_t nstate; - size_t narc; - - KeyInfo(string k, string t, int64 ns = 0, int64 na = 0) - : key(k), type(t), nstate(ns), narc(na) {} -}; - -template <class Arc> -void FarInfo(const vector<string> &filenames, const string &begin_key, - const string &end_key, const bool list_fsts) { - FarReader<Arc> *far_reader = FarReader<Arc>::Open(filenames); - if (!far_reader) return; - - if (!begin_key.empty()) - far_reader->Find(begin_key); - - vector<KeyInfo> *infos = list_fsts ? new vector<KeyInfo>() : 0; - size_t nfst = 0, nstate = 0, narc = 0; - set<string> fst_types; - for (; !far_reader->Done(); far_reader->Next()) { - string key = far_reader->GetKey(); - if (!end_key.empty() && end_key < key) - break; - ++nfst; - const Fst<Arc> &fst = far_reader->GetFst(); - fst_types.insert(fst.Type()); - if (infos) { - KeyInfo info(key, fst.Type()); - CountStatesAndArcs(fst, &info.nstate, &info.narc); - nstate += info.nstate; - nstate += info.narc; - infos->push_back(info); - } else { - CountStatesAndArcs(fst, &nstate, &narc); - } - } - - if (!infos) { - cout << std::left << setw(50) << "far type" - << FarTypeToString(far_reader->Type()) << endl; - cout << std::left << setw(50) << "arc type" << Arc::Type() << endl; - cout << std::left << setw(50) << "fst type"; - for (set<string>::const_iterator iter = fst_types.begin(); - iter != fst_types.end(); - ++iter) { - if (iter != fst_types.begin()) - cout << ","; - cout << *iter; - } - cout << endl; - cout << std::left << setw(50) << "# of FSTs" << nfst << endl; - cout << std::left << setw(50) << "total # of states" << nstate << endl; - cout << std::left << setw(50) << "total # of arcs" << narc << endl; - - } else { - int wkey = 10, wtype = 10, wnstate = 16, wnarc = 16; - for (size_t i = 0; i < infos->size(); ++i) { - const KeyInfo &info = (*infos)[i]; - if (info.key.size() + 2 > wkey) - wkey = info.key.size() + 2; - if (info.type.size() + 2 > wtype) - wtype = info.type.size() + 2; - if (ceil(log10(info.nstate)) + 2 > wnstate) - wnstate = ceil(log10(info.nstate)) + 2; - if (ceil(log10(info.narc)) + 2 > wnarc) - wnarc = ceil(log10(info.narc)) + 2; - } - - cout << std::left << setw(wkey) << "key" << setw(wtype) << "type" - << std::right << setw(wnstate) << "# of states" - << setw(wnarc) << "# of arcs" << endl; - - for (size_t i = 0; i < infos->size(); ++i) { - const KeyInfo &info = (*infos)[i]; - cout << std::left << setw(wkey) << info.key << setw(wtype) << info.type - << std::right << setw(wnstate) << info.nstate - << setw(wnarc) << info.narc << endl; - } - } -} - -} // namespace fst - - -#endif // FST_EXTENSIONS_FAR_INFO_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/main.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/main.h deleted file mode 100644 index 00ccfef..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/extensions/far/main.h +++ /dev/null @@ -1,43 +0,0 @@ -// main.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Classes and functions for registering and invoking Far main -// functions that support multiple and extensible arc types. - -#ifndef FST_EXTENSIONS_FAR_MAIN_H__ -#define FST_EXTENSIONS_FAR_MAIN_H__ - -#include <fst/extensions/far/far.h> - -namespace fst { - -FarEntryType StringToFarEntryType(const string &s); -FarTokenType StringToFarTokenType(const string &s); - -// Return the 'FarType' value corresponding to a far type name. -FarType FarTypeFromString(const string &str); - -// Return the textual name corresponding to a 'FarType;. -string FarTypeToString(FarType type); - -string LoadArcTypeFromFar(const string& far_fname); -string LoadArcTypeFromFst(const string& far_fname); - -} // namespace fst - -#endif // FST_EXTENSIONS_FAR_MAIN_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/print-strings.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/print-strings.h deleted file mode 100644 index dcc7351..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/extensions/far/print-strings.h +++ /dev/null @@ -1,138 +0,0 @@ -// printstrings-main.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// Modified by: [email protected] (Jake Ratkiewicz) -// -// \file -// Output as strings the string FSTs in a finite-state archive. - -#ifndef FST_EXTENSIONS_FAR_PRINT_STRINGS_H__ -#define FST_EXTENSIONS_FAR_PRINT_STRINGS_H__ - -#include <string> -#include <vector> -using std::vector; - -#include <fst/extensions/far/far.h> -#include <fst/shortest-distance.h> -#include <fst/string.h> - -DECLARE_string(far_field_separator); - -namespace fst { - -template <class Arc> -void FarPrintStrings( - const vector<string> &ifilenames, const FarEntryType entry_type, - const FarTokenType far_token_type, const string &begin_key, - const string &end_key, const bool print_key, const bool print_weight, - const string &symbols_fname, const bool initial_symbols, - const int32 generate_filenames, - const string &filename_prefix, const string &filename_suffix) { - - typename StringPrinter<Arc>::TokenType token_type; - if (far_token_type == FTT_SYMBOL) { - token_type = StringPrinter<Arc>::SYMBOL; - } else if (far_token_type == FTT_BYTE) { - token_type = StringPrinter<Arc>::BYTE; - } else if (far_token_type == FTT_UTF8) { - token_type = StringPrinter<Arc>::UTF8; - } else { - FSTERROR() << "FarPrintStrings: unknown token type"; - return; - } - - const SymbolTable *syms = 0; - if (!symbols_fname.empty()) { - // allow negative flag? - SymbolTableTextOptions opts; - opts.allow_negative = true; - syms = SymbolTable::ReadText(symbols_fname, opts); - if (!syms) { - FSTERROR() << "FarPrintStrings: error reading symbol table: " - << symbols_fname; - return; - } - } - - FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames); - if (!far_reader) return; - - if (!begin_key.empty()) - far_reader->Find(begin_key); - - string okey; - int nrep = 0; - for (int i = 1; !far_reader->Done(); far_reader->Next(), ++i) { - string key = far_reader->GetKey(); - if (!end_key.empty() && end_key < key) - break; - if (okey == key) - ++nrep; - else - nrep = 0; - okey = key; - - const Fst<Arc> &fst = far_reader->GetFst(); - if (i == 1 && initial_symbols && syms == 0 && fst.InputSymbols() != 0) - syms = fst.InputSymbols()->Copy(); - string str; - VLOG(2) << "Handling key: " << key; - StringPrinter<Arc> string_printer( - token_type, syms ? syms : fst.InputSymbols()); - string_printer(fst, &str); - - if (entry_type == FET_LINE) { - if (print_key) - cout << key << FLAGS_far_field_separator[0]; - cout << str; - if (print_weight) - cout << FLAGS_far_field_separator[0] << ShortestDistance(fst); - cout << endl; - } else if (entry_type == FET_FILE) { - stringstream sstrm; - if (generate_filenames) { - sstrm.fill('0'); - sstrm << std::right << setw(generate_filenames) << i; - } else { - sstrm << key; - if (nrep > 0) - sstrm << "." << nrep; - } - - string filename; - filename = filename_prefix + sstrm.str() + filename_suffix; - - ofstream ostrm(filename.c_str()); - if (!ostrm) { - FSTERROR() << "FarPrintStrings: Can't open file:" << filename; - delete syms; - delete far_reader; - return; - } - ostrm << str; - if (token_type == StringPrinter<Arc>::SYMBOL) - ostrm << "\n"; - } - } - delete syms; -} - - - -} // namespace fst - -#endif // FST_EXTENSIONS_FAR_PRINT_STRINGS_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/stlist.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/stlist.h deleted file mode 100644 index ff3d98b..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/extensions/far/stlist.h +++ /dev/null @@ -1,305 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// A generic (string,type) list file format. -// -// This is a stripped-down version of STTable that does -// not support the Find() operation but that does support -// reading/writting from standard in/out. - -#ifndef FST_EXTENSIONS_FAR_STLIST_H_ -#define FST_EXTENSIONS_FAR_STLIST_H_ - -#include <iostream> -#include <fstream> -#include <sstream> -#include <fst/util.h> - -#include <algorithm> -#include <functional> -#include <queue> -#include <string> -#include <utility> -using std::pair; using std::make_pair; -#include <vector> -using std::vector; - -namespace fst { - -static const int32 kSTListMagicNumber = 5656924; -static const int32 kSTListFileVersion = 1; - -// String-type list writing class for object of type 'T' using functor 'W' -// to write an object of type 'T' from a stream. 'W' must conform to the -// following interface: -// -// struct Writer { -// void operator()(ostream &, const T &) const; -// }; -// -template <class T, class W> -class STListWriter { - public: - typedef T EntryType; - typedef W EntryWriter; - - explicit STListWriter(const string filename) - : stream_( - filename.empty() ? &cout : - new ofstream(filename.c_str(), ofstream::out | ofstream::binary)), - error_(false) { - WriteType(*stream_, kSTListMagicNumber); - WriteType(*stream_, kSTListFileVersion); - if (!stream_) { - FSTERROR() << "STListWriter::STListWriter: error writing to file: " - << filename; - error_ = true; - } - } - - static STListWriter<T, W> *Create(const string &filename) { - return new STListWriter<T, W>(filename); - } - - void Add(const string &key, const T &t) { - if (key == "") { - FSTERROR() << "STListWriter::Add: key empty: " << key; - error_ = true; - } else if (key < last_key_) { - FSTERROR() << "STListWriter::Add: key disorder: " << key; - error_ = true; - } - if (error_) return; - last_key_ = key; - WriteType(*stream_, key); - entry_writer_(*stream_, t); - } - - bool Error() const { return error_; } - - ~STListWriter() { - WriteType(*stream_, string()); - if (stream_ != &cout) - delete stream_; - } - - private: - EntryWriter entry_writer_; // Write functor for 'EntryType' - ostream *stream_; // Output stream - string last_key_; // Last key - bool error_; - - DISALLOW_COPY_AND_ASSIGN(STListWriter); -}; - - -// String-type list reading class for object of type 'T' using functor 'R' -// to read an object of type 'T' form a stream. 'R' must conform to the -// following interface: -// -// struct Reader { -// T *operator()(istream &) const; -// }; -// -template <class T, class R> -class STListReader { - public: - typedef T EntryType; - typedef R EntryReader; - - explicit STListReader(const vector<string> &filenames) - : sources_(filenames), entry_(0), error_(false) { - streams_.resize(filenames.size(), 0); - bool has_stdin = false; - for (size_t i = 0; i < filenames.size(); ++i) { - if (filenames[i].empty()) { - if (!has_stdin) { - streams_[i] = &cin; - sources_[i] = "stdin"; - has_stdin = true; - } else { - FSTERROR() << "STListReader::STListReader: stdin should only " - << "appear once in the input file list."; - error_ = true; - return; - } - } else { - streams_[i] = new ifstream( - filenames[i].c_str(), ifstream::in | ifstream::binary); - } - int32 magic_number = 0, file_version = 0; - ReadType(*streams_[i], &magic_number); - ReadType(*streams_[i], &file_version); - if (magic_number != kSTListMagicNumber) { - FSTERROR() << "STListReader::STListReader: wrong file type: " - << filenames[i]; - error_ = true; - return; - } - if (file_version != kSTListFileVersion) { - FSTERROR() << "STListReader::STListReader: wrong file version: " - << filenames[i]; - error_ = true; - return; - } - string key; - ReadType(*streams_[i], &key); - if (!key.empty()) - heap_.push(make_pair(key, i)); - if (!*streams_[i]) { - FSTERROR() << "STListReader: error reading file: " << sources_[i]; - error_ = true; - return; - } - } - if (heap_.empty()) return; - size_t current = heap_.top().second; - entry_ = entry_reader_(*streams_[current]); - if (!entry_ || !*streams_[current]) { - FSTERROR() << "STListReader: error reading entry for key: " - << heap_.top().first << ", file: " << sources_[current]; - error_ = true; - } - } - - ~STListReader() { - for (size_t i = 0; i < streams_.size(); ++i) { - if (streams_[i] != &cin) - delete streams_[i]; - } - if (entry_) - delete entry_; - } - - static STListReader<T, R> *Open(const string &filename) { - vector<string> filenames; - filenames.push_back(filename); - return new STListReader<T, R>(filenames); - } - - static STListReader<T, R> *Open(const vector<string> &filenames) { - return new STListReader<T, R>(filenames); - } - - void Reset() { - FSTERROR() - << "STListReader::Reset: stlist does not support reset operation"; - error_ = true; - } - - bool Find(const string &key) { - FSTERROR() - << "STListReader::Find: stlist does not support find operation"; - error_ = true; - return false; - } - - bool Done() const { - return error_ || heap_.empty(); - } - - void Next() { - if (error_) return; - size_t current = heap_.top().second; - string key; - heap_.pop(); - ReadType(*(streams_[current]), &key); - if (!*streams_[current]) { - FSTERROR() << "STListReader: error reading file: " - << sources_[current]; - error_ = true; - return; - } - if (!key.empty()) - heap_.push(make_pair(key, current)); - - if(!heap_.empty()) { - current = heap_.top().second; - if (entry_) - delete entry_; - entry_ = entry_reader_(*streams_[current]); - if (!entry_ || !*streams_[current]) { - FSTERROR() << "STListReader: error reading entry for key: " - << heap_.top().first << ", file: " << sources_[current]; - error_ = true; - } - } - } - - const string &GetKey() const { - return heap_.top().first; - } - - const EntryType &GetEntry() const { - return *entry_; - } - - bool Error() const { return error_; } - - private: - EntryReader entry_reader_; // Read functor for 'EntryType' - vector<istream*> streams_; // Input streams - vector<string> sources_; // and corresponding file names - priority_queue< - pair<string, size_t>, vector<pair<string, size_t> >, - greater<pair<string, size_t> > > heap_; // (Key, stream id) heap - mutable EntryType *entry_; // Pointer to the currently read entry - bool error_; - - DISALLOW_COPY_AND_ASSIGN(STListReader); -}; - - -// String-type list header reading function template on the entry header -// type 'H' having a member function: -// Read(istream &strm, const string &filename); -// Checks that 'filename' is an STList and call the H::Read() on the last -// entry in the STList. -// Does not support reading from stdin. -template <class H> -bool ReadSTListHeader(const string &filename, H *header) { - if (filename.empty()) { - LOG(ERROR) << "ReadSTListHeader: reading header not supported on stdin"; - return false; - } - ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); - int32 magic_number = 0, file_version = 0; - ReadType(strm, &magic_number); - ReadType(strm, &file_version); - if (magic_number != kSTListMagicNumber) { - LOG(ERROR) << "ReadSTListHeader: wrong file type: " << filename; - return false; - } - if (file_version != kSTListFileVersion) { - LOG(ERROR) << "ReadSTListHeader: wrong file version: " << filename; - return false; - } - string key; - ReadType(strm, &key); - header->Read(strm, filename + ":" + key); - if (!strm) { - LOG(ERROR) << "ReadSTListHeader: error reading file: " << filename; - return false; - } - return true; -} - -bool IsSTList(const string &filename); - -} // namespace fst - -#endif // FST_EXTENSIONS_FAR_STLIST_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/sttable.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/sttable.h deleted file mode 100644 index 3ce0a4b..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/extensions/far/sttable.h +++ /dev/null @@ -1,371 +0,0 @@ -// sttable.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// A generic string-to-type table file format -// -// This is not meant as a generalization of SSTable. This is more of -// a simple replacement for SSTable in order to provide an open-source -// implementation of the FAR format for the external version of the -// FST Library. - -#ifndef FST_EXTENSIONS_FAR_STTABLE_H_ -#define FST_EXTENSIONS_FAR_STTABLE_H_ - -#include <algorithm> -#include <iostream> -#include <fstream> -#include <sstream> -#include <fst/util.h> - -namespace fst { - -static const int32 kSTTableMagicNumber = 2125656924; -static const int32 kSTTableFileVersion = 1; - -// String-to-type table writing class for object of type 'T' using functor 'W' -// to write an object of type 'T' from a stream. 'W' must conform to the -// following interface: -// -// struct Writer { -// void operator()(ostream &, const T &) const; -// }; -// -template <class T, class W> -class STTableWriter { - public: - typedef T EntryType; - typedef W EntryWriter; - - explicit STTableWriter(const string &filename) - : stream_(filename.c_str(), ofstream::out | ofstream::binary), - error_(false) { - WriteType(stream_, kSTTableMagicNumber); - WriteType(stream_, kSTTableFileVersion); - if (!stream_) { - FSTERROR() << "STTableWriter::STTableWriter: error writing to file: " - << filename; - error_=true; - } - } - - static STTableWriter<T, W> *Create(const string &filename) { - if (filename.empty()) { - LOG(ERROR) << "STTableWriter: writing to standard out unsupported."; - return 0; - } - return new STTableWriter<T, W>(filename); - } - - void Add(const string &key, const T &t) { - if (key == "") { - FSTERROR() << "STTableWriter::Add: key empty: " << key; - error_ = true; - } else if (key < last_key_) { - FSTERROR() << "STTableWriter::Add: key disorder: " << key; - error_ = true; - } - if (error_) return; - last_key_ = key; - positions_.push_back(stream_.tellp()); - WriteType(stream_, key); - entry_writer_(stream_, t); - } - - bool Error() const { return error_; } - - ~STTableWriter() { - WriteType(stream_, positions_); - WriteType(stream_, static_cast<int64>(positions_.size())); - } - - private: - EntryWriter entry_writer_; // Write functor for 'EntryType' - ofstream stream_; // Output stream - vector<int64> positions_; // Position in file of each key-entry pair - string last_key_; // Last key - bool error_; - - DISALLOW_COPY_AND_ASSIGN(STTableWriter); -}; - - -// String-to-type table reading class for object of type 'T' using functor 'R' -// to read an object of type 'T' form a stream. 'R' must conform to the -// following interface: -// -// struct Reader { -// T *operator()(istream &) const; -// }; -// -template <class T, class R> -class STTableReader { - public: - typedef T EntryType; - typedef R EntryReader; - - explicit STTableReader(const vector<string> &filenames) - : sources_(filenames), entry_(0), error_(false) { - compare_ = new Compare(&keys_); - keys_.resize(filenames.size()); - streams_.resize(filenames.size(), 0); - positions_.resize(filenames.size()); - for (size_t i = 0; i < filenames.size(); ++i) { - streams_[i] = new ifstream( - filenames[i].c_str(), ifstream::in | ifstream::binary); - int32 magic_number = 0, file_version = 0; - ReadType(*streams_[i], &magic_number); - ReadType(*streams_[i], &file_version); - if (magic_number != kSTTableMagicNumber) { - FSTERROR() << "STTableReader::STTableReader: wrong file type: " - << filenames[i]; - error_ = true; - return; - } - if (file_version != kSTTableFileVersion) { - FSTERROR() << "STTableReader::STTableReader: wrong file version: " - << filenames[i]; - error_ = true; - return; - } - int64 num_entries; - streams_[i]->seekg(-static_cast<int>(sizeof(int64)), ios_base::end); - ReadType(*streams_[i], &num_entries); - streams_[i]->seekg(-static_cast<int>(sizeof(int64)) * - (num_entries + 1), ios_base::end); - positions_[i].resize(num_entries); - for (size_t j = 0; (j < num_entries) && (*streams_[i]); ++j) - ReadType(*streams_[i], &(positions_[i][j])); - streams_[i]->seekg(positions_[i][0]); - if (!*streams_[i]) { - FSTERROR() << "STTableReader::STTableReader: error reading file: " - << filenames[i]; - error_ = true; - return; - } - - } - MakeHeap(); - } - - ~STTableReader() { - for (size_t i = 0; i < streams_.size(); ++i) - delete streams_[i]; - delete compare_; - if (entry_) - delete entry_; - } - - static STTableReader<T, R> *Open(const string &filename) { - if (filename.empty()) { - LOG(ERROR) << "STTableReader: reading from standard in not supported"; - return 0; - } - vector<string> filenames; - filenames.push_back(filename); - return new STTableReader<T, R>(filenames); - } - - static STTableReader<T, R> *Open(const vector<string> &filenames) { - return new STTableReader<T, R>(filenames); - } - - void Reset() { - if (error_) return; - for (size_t i = 0; i < streams_.size(); ++i) - streams_[i]->seekg(positions_[i].front()); - MakeHeap(); - } - - bool Find(const string &key) { - if (error_) return false; - for (size_t i = 0; i < streams_.size(); ++i) - LowerBound(i, key); - MakeHeap(); - return keys_[current_] == key; - } - - bool Done() const { return error_ || heap_.empty(); } - - void Next() { - if (error_) return; - if (streams_[current_]->tellg() <= positions_[current_].back()) { - ReadType(*(streams_[current_]), &(keys_[current_])); - if (!*streams_[current_]) { - FSTERROR() << "STTableReader: error reading file: " - << sources_[current_]; - error_ = true; - return; - } - push_heap(heap_.begin(), heap_.end(), *compare_); - } else { - heap_.pop_back(); - } - if (!heap_.empty()) - PopHeap(); - } - - const string &GetKey() const { - return keys_[current_]; - } - - const EntryType &GetEntry() const { - return *entry_; - } - - bool Error() const { return error_; } - - private: - // Comparison functor used to compare stream IDs in the heap - struct Compare { - Compare(const vector<string> *keys) : keys_(keys) {} - - bool operator()(size_t i, size_t j) const { - return (*keys_)[i] > (*keys_)[j]; - }; - - private: - const vector<string> *keys_; - }; - - // Position the stream with ID 'id' at the position corresponding - // to the lower bound for key 'find_key' - void LowerBound(size_t id, const string &find_key) { - ifstream *strm = streams_[id]; - const vector<int64> &positions = positions_[id]; - size_t low = 0, high = positions.size() - 1; - - while (low < high) { - size_t mid = (low + high)/2; - strm->seekg(positions[mid]); - string key; - ReadType(*strm, &key); - if (key > find_key) { - high = mid; - } else if (key < find_key) { - low = mid + 1; - } else { - for (size_t i = mid; i > low; --i) { - strm->seekg(positions[i - 1]); - ReadType(*strm, &key); - if (key != find_key) { - strm->seekg(positions[i]); - return; - } - } - strm->seekg(positions[low]); - return; - } - } - strm->seekg(positions[low]); - } - - // Add all streams to the heap - void MakeHeap() { - heap_.clear(); - for (size_t i = 0; i < streams_.size(); ++i) { - ReadType(*streams_[i], &(keys_[i])); - if (!*streams_[i]) { - FSTERROR() << "STTableReader: error reading file: " << sources_[i]; - error_ = true; - return; - } - heap_.push_back(i); - } - make_heap(heap_.begin(), heap_.end(), *compare_); - PopHeap(); - } - - // Position the stream with the lowest key at the top - // of the heap, set 'current_' to the ID of that stream - // and read the current entry from that stream - void PopHeap() { - pop_heap(heap_.begin(), heap_.end(), *compare_); - current_ = heap_.back(); - if (entry_) - delete entry_; - entry_ = entry_reader_(*streams_[current_]); - if (!entry_) - error_ = true; - if (!*streams_[current_]) { - FSTERROR() << "STTableReader: error reading entry for key: " - << keys_[current_] << ", file: " << sources_[current_]; - error_ = true; - } - } - - - EntryReader entry_reader_; // Read functor for 'EntryType' - vector<ifstream*> streams_; // Input streams - vector<string> sources_; // and corresponding file names - vector<vector<int64> > positions_; // Index of positions for each stream - vector<string> keys_; // Lowest unread key for each stream - vector<int64> heap_; // Heap containing ID of streams with unread keys - int64 current_; // Id of current stream to be read - Compare *compare_; // Functor comparing stream IDs for the heap - mutable EntryType *entry_; // Pointer to the currently read entry - bool error_; - - DISALLOW_COPY_AND_ASSIGN(STTableReader); -}; - - -// String-to-type table header reading function template on the entry header -// type 'H' having a member function: -// Read(istream &strm, const string &filename); -// Checks that 'filename' is an STTable and call the H::Read() on the last -// entry in the STTable. -template <class H> -bool ReadSTTableHeader(const string &filename, H *header) { - ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); - int32 magic_number = 0, file_version = 0; - ReadType(strm, &magic_number); - ReadType(strm, &file_version); - if (magic_number != kSTTableMagicNumber) { - LOG(ERROR) << "ReadSTTableHeader: wrong file type: " << filename; - return false; - } - if (file_version != kSTTableFileVersion) { - LOG(ERROR) << "ReadSTTableHeader: wrong file version: " << filename; - return false; - } - int64 i = -1; - strm.seekg(-static_cast<int>(sizeof(int64)), ios_base::end); - ReadType(strm, &i); // Read number of entries - if (!strm) { - LOG(ERROR) << "ReadSTTableHeader: error reading file: " << filename; - return false; - } - if (i == 0) return true; // No entry header to read - strm.seekg(-2 * static_cast<int>(sizeof(int64)), ios_base::end); - ReadType(strm, &i); // Read position for last entry in file - strm.seekg(i); - string key; - ReadType(strm, &key); - header->Read(strm, filename + ":" + key); - if (!strm) { - LOG(ERROR) << "ReadSTTableHeader: error reading file: " << filename; - return false; - } - return true; -} - -bool IsSTTable(const string &filename); - -} // namespace fst - -#endif // FST_EXTENSIONS_FAR_STTABLE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/bitmap-index.h b/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/bitmap-index.h deleted file mode 100644 index f5a5ba7..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/bitmap-index.h +++ /dev/null @@ -1,183 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jeffrey Sorensen) - -#ifndef FST_EXTENSIONS_NGRAM_BITMAP_INDEX_H_ -#define FST_EXTENSIONS_NGRAM_BITMAP_INDEX_H_ - -#include <vector> -using std::vector; - -#include <fst/compat.h> - -// This class is a bitstring storage class with an index that allows -// seeking to the Nth set or clear bit in time O(Log(N)) where N is -// the length of the bit vector. In addition, it allows counting set or -// clear bits over ranges in constant time. -// -// This is accomplished by maintaining an "secondary" index of limited -// size in bits that maintains a running count of the number of bits set -// in each block of bitmap data. A block is defined as the number of -// uint64 values that can fit in the secondary index before an overflow -// occurs. -// -// To handle overflows, a "primary" index containing a running count of -// bits set in each block is created using the type uint64. - -namespace fst { - -class BitmapIndex { - public: - static size_t StorageSize(size_t size) { - return ((size + kStorageBlockMask) >> kStorageLogBitSize); - } - - BitmapIndex() : bits_(NULL), size_(0) { } - - bool Get(size_t index) const { - return (bits_[index >> kStorageLogBitSize] & - (kOne << (index & kStorageBlockMask))) != 0; - } - - static void Set(uint64* bits, size_t index) { - bits[index >> kStorageLogBitSize] |= (kOne << (index & kStorageBlockMask)); - } - - static void Clear(uint64* bits, size_t index) { - bits[index >> kStorageLogBitSize] &= ~(kOne << (index & kStorageBlockMask)); - } - - size_t Bits() const { - return size_; - } - - size_t ArraySize() const { - return StorageSize(size_); - } - - // Returns the number of one bits in the bitmap - size_t GetOnesCount() const { - return primary_index_[primary_index_size() - 1]; - } - - // Returns the number of one bits in positions 0 to limit - 1. - // REQUIRES: limit <= Bits() - size_t Rank1(size_t end) const; - - // Returns the number of one bits in the range start to end - 1. - // REQUIRES: limit <= Bits() - size_t GetOnesCountInRange(size_t start, size_t end) const { - return Rank1(end) - Rank1(start); - } - - // Returns the number of zero bits in positions 0 to limit - 1. - // REQUIRES: limit <= Bits() - size_t Rank0(size_t end) const { - return end - Rank1(end); - } - - // Returns the number of zero bits in the range start to end - 1. - // REQUIRES: limit <= Bits() - size_t GetZeroesCountInRange(size_t start, size_t end) const { - return end - start - GetOnesCountInRange(start, end); - } - - // Return true if any bit between begin inclusive and end exclusive - // is set. 0 <= begin <= end <= Bits() is required. - // - bool TestRange(size_t start, size_t end) const { - return Rank1(end) > Rank1(start); - } - - // Returns the offset to the nth set bit (zero based) - // or Bits() if index >= number of ones - size_t Select1(size_t bit_index) const; - - // Returns the offset to the nth clear bit (zero based) - // or Bits() if index > number of - size_t Select0(size_t bit_index) const; - - // Rebuilds from index for the associated Bitmap, should be called - // whenever changes have been made to the Bitmap or else behavior - // of the indexed bitmap methods will be undefined. - void BuildIndex(const uint64 *bits, size_t size); - - // the secondary index accumulates counts until it can possibly overflow - // this constant computes the number of uint64 units that can fit into - // units the size of uint16. - static const uint64 kOne = 1; - static const uint32 kStorageBitSize = 64; - static const uint32 kStorageLogBitSize = 6; - static const uint32 kSecondaryBlockSize = ((1 << 16) - 1) - >> kStorageLogBitSize; - - private: - static const uint32 kStorageBlockMask = kStorageBitSize - 1; - - // returns, from the index, the count of ones up to array_index - size_t get_index_ones_count(size_t array_index) const; - - // because the indexes, both primary and secondary, contain a running - // count of the population of one bits contained in [0,i), there is - // no reason to have an element in the zeroth position as this value would - // necessarily be zero. (The bits are indexed in a zero based way.) Thus - // we don't store the 0th element in either index. Both of the following - // functions, if greater than 0, must be decremented by one before retreiving - // the value from the corresponding array. - // returns the 1 + the block that contains the bitindex in question - // the inverted version works the same but looks for zeros using an inverted - // view of the index - size_t find_primary_block(size_t bit_index) const; - - size_t find_inverted_primary_block(size_t bit_index) const; - - // similarly, the secondary index (which resets its count to zero at - // the end of every kSecondaryBlockSize entries) does not store the element - // at 0. Note that the rem_bit_index parameter is the number of bits - // within the secondary block, after the bits accounted for by the primary - // block have been removed (i.e. the remaining bits) And, because we - // reset to zero with each new block, there is no need to store those - // actual zeros. - // returns 1 + the secondary block that contains the bitindex in question - size_t find_secondary_block(size_t block, size_t rem_bit_index) const; - - size_t find_inverted_secondary_block(size_t block, size_t rem_bit_index) - const; - - // We create a primary index based upon the number of secondary index - // blocks. The primary index uses fields wide enough to accomodate any - // index of the bitarray so cannot overflow - // The primary index is the actual running - // count of one bits set for all blocks (and, thus, all uint64s). - size_t primary_index_size() const { - return (ArraySize() + kSecondaryBlockSize - 1) / kSecondaryBlockSize; - } - - const uint64* bits_; - size_t size_; - - // The primary index contains the running popcount of all blocks - // which means the nth value contains the popcounts of - // [0,n*kSecondaryBlockSize], however, the 0th element is omitted. - vector<uint32> primary_index_; - // The secondary index contains the running popcount of the associated - // bitmap. It is the same length (in units of uint16) as the - // bitmap's map is in units of uint64s. - vector<uint16> secondary_index_; -}; - -} // end namespace fst - -#endif // FST_EXTENSIONS_NGRAM_BITMAP_INDEX_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/ngram-fst.h b/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/ngram-fst.h deleted file mode 100644 index d113fb3..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/ngram-fst.h +++ /dev/null @@ -1,934 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jeffrey Sorensen) -// -#ifndef FST_EXTENSIONS_NGRAM_NGRAM_FST_H_ -#define FST_EXTENSIONS_NGRAM_NGRAM_FST_H_ - -#include <stddef.h> -#include <string.h> -#include <algorithm> -#include <string> -#include <vector> -using std::vector; - -#include <fst/compat.h> -#include <fst/fstlib.h> -#include <fst/mapped-file.h> -#include <fst/extensions/ngram/bitmap-index.h> - -// NgramFst implements a n-gram language model based upon the LOUDS data -// structure. Please refer to "Unary Data Strucutres for Language Models" -// http://research.google.com/pubs/archive/37218.pdf - -namespace fst { -template <class A> class NGramFst; -template <class A> class NGramFstMatcher; - -// Instance data containing mutable state for bookkeeping repeated access to -// the same state. -template <class A> -struct NGramFstInst { - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - StateId state_; - size_t num_futures_; - size_t offset_; - size_t node_; - StateId node_state_; - vector<Label> context_; - StateId context_state_; - NGramFstInst() - : state_(kNoStateId), node_state_(kNoStateId), - context_state_(kNoStateId) { } -}; - -// Implementation class for LOUDS based NgramFst interface -template <class A> -class NGramFstImpl : public FstImpl<A> { - using FstImpl<A>::SetInputSymbols; - using FstImpl<A>::SetOutputSymbols; - using FstImpl<A>::SetType; - using FstImpl<A>::WriteHeader; - - friend class ArcIterator<NGramFst<A> >; - friend class NGramFstMatcher<A>; - - public: - using FstImpl<A>::InputSymbols; - using FstImpl<A>::SetProperties; - using FstImpl<A>::Properties; - - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - NGramFstImpl() : data_region_(0), data_(0), owned_(false) { - SetType("ngram"); - SetInputSymbols(NULL); - SetOutputSymbols(NULL); - SetProperties(kStaticProperties); - } - - NGramFstImpl(const Fst<A> &fst, vector<StateId>* order_out); - - ~NGramFstImpl() { - if (owned_) { - delete [] data_; - } - delete data_region_; - } - - static NGramFstImpl<A>* Read(istream &strm, // NOLINT - const FstReadOptions &opts) { - NGramFstImpl<A>* impl = new NGramFstImpl(); - FstHeader hdr; - if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) return 0; - uint64 num_states, num_futures, num_final; - const size_t offset = sizeof(num_states) + sizeof(num_futures) + - sizeof(num_final); - // Peek at num_states and num_futures to see how much more needs to be read. - strm.read(reinterpret_cast<char *>(&num_states), sizeof(num_states)); - strm.read(reinterpret_cast<char *>(&num_futures), sizeof(num_futures)); - strm.read(reinterpret_cast<char *>(&num_final), sizeof(num_final)); - size_t size = Storage(num_states, num_futures, num_final); - MappedFile *data_region = MappedFile::Allocate(size); - char *data = reinterpret_cast<char *>(data_region->mutable_data()); - // Copy num_states, num_futures and num_final back into data. - memcpy(data, reinterpret_cast<char *>(&num_states), sizeof(num_states)); - memcpy(data + sizeof(num_states), reinterpret_cast<char *>(&num_futures), - sizeof(num_futures)); - memcpy(data + sizeof(num_states) + sizeof(num_futures), - reinterpret_cast<char *>(&num_final), sizeof(num_final)); - strm.read(data + offset, size - offset); - if (!strm) { - delete impl; - return NULL; - } - impl->Init(data, false, data_region); - return impl; - } - - bool Write(ostream &strm, // NOLINT - const FstWriteOptions &opts) const { - FstHeader hdr; - hdr.SetStart(Start()); - hdr.SetNumStates(num_states_); - WriteHeader(strm, opts, kFileVersion, &hdr); - strm.write(data_, Storage(num_states_, num_futures_, num_final_)); - return strm; - } - - StateId Start() const { - return 1; - } - - Weight Final(StateId state) const { - if (final_index_.Get(state)) { - return final_probs_[final_index_.Rank1(state)]; - } else { - return Weight::Zero(); - } - } - - size_t NumArcs(StateId state, NGramFstInst<A> *inst = NULL) const { - if (inst == NULL) { - const size_t next_zero = future_index_.Select0(state + 1); - const size_t this_zero = future_index_.Select0(state); - return next_zero - this_zero - 1; - } - SetInstFuture(state, inst); - return inst->num_futures_ + ((state == 0) ? 0 : 1); - } - - size_t NumInputEpsilons(StateId state) const { - // State 0 has no parent, thus no backoff. - if (state == 0) return 0; - return 1; - } - - size_t NumOutputEpsilons(StateId state) const { - return NumInputEpsilons(state); - } - - StateId NumStates() const { - return num_states_; - } - - void InitStateIterator(StateIteratorData<A>* data) const { - data->base = 0; - data->nstates = num_states_; - } - - static size_t Storage(uint64 num_states, uint64 num_futures, - uint64 num_final) { - uint64 b64; - Weight weight; - Label label; - size_t offset = sizeof(num_states) + sizeof(num_futures) + - sizeof(num_final); - offset += sizeof(b64) * ( - BitmapIndex::StorageSize(num_states * 2 + 1) + - BitmapIndex::StorageSize(num_futures + num_states + 1) + - BitmapIndex::StorageSize(num_states)); - offset += (num_states + 1) * sizeof(label) + num_futures * sizeof(label); - // Pad for alignemnt, see - // http://en.wikipedia.org/wiki/Data_structure_alignment#Computing_padding - offset = (offset + sizeof(weight) - 1) & ~(sizeof(weight) - 1); - offset += (num_states + 1) * sizeof(weight) + num_final * sizeof(weight) + - (num_futures + 1) * sizeof(weight); - return offset; - } - - void SetInstFuture(StateId state, NGramFstInst<A> *inst) const { - if (inst->state_ != state) { - inst->state_ = state; - const size_t next_zero = future_index_.Select0(state + 1); - const size_t this_zero = future_index_.Select0(state); - inst->num_futures_ = next_zero - this_zero - 1; - inst->offset_ = future_index_.Rank1(future_index_.Select0(state) + 1); - } - } - - void SetInstNode(NGramFstInst<A> *inst) const { - if (inst->node_state_ != inst->state_) { - inst->node_state_ = inst->state_; - inst->node_ = context_index_.Select1(inst->state_); - } - } - - void SetInstContext(NGramFstInst<A> *inst) const { - SetInstNode(inst); - if (inst->context_state_ != inst->state_) { - inst->context_state_ = inst->state_; - inst->context_.clear(); - size_t node = inst->node_; - while (node != 0) { - inst->context_.push_back(context_words_[context_index_.Rank1(node)]); - node = context_index_.Select1(context_index_.Rank0(node) - 1); - } - } - } - - // Access to the underlying representation - const char* GetData(size_t* data_size) const { - *data_size = Storage(num_states_, num_futures_, num_final_); - return data_; - } - - void Init(const char* data, bool owned, MappedFile *file = 0); - - const vector<Label> &GetContext(StateId s, NGramFstInst<A> *inst) const { - SetInstFuture(s, inst); - SetInstContext(inst); - return inst->context_; - } - - private: - StateId Transition(const vector<Label> &context, Label future) const; - - // Properties always true for this Fst class. - static const uint64 kStaticProperties = kAcceptor | kIDeterministic | - kODeterministic | kEpsilons | kIEpsilons | kOEpsilons | kILabelSorted | - kOLabelSorted | kWeighted | kCyclic | kInitialAcyclic | kNotTopSorted | - kAccessible | kCoAccessible | kNotString | kExpanded; - // Current file format version. - static const int kFileVersion = 4; - // Minimum file format version supported. - static const int kMinFileVersion = 4; - - MappedFile *data_region_; - const char* data_; - bool owned_; // True if we own data_ - uint64 num_states_, num_futures_, num_final_; - size_t root_num_children_; - const Label *root_children_; - size_t root_first_child_; - // borrowed references - const uint64 *context_, *future_, *final_; - const Label *context_words_, *future_words_; - const Weight *backoff_, *final_probs_, *future_probs_; - BitmapIndex context_index_; - BitmapIndex future_index_; - BitmapIndex final_index_; - - void operator=(const NGramFstImpl<A> &); // Disallow -}; - -template<typename A> -NGramFstImpl<A>::NGramFstImpl(const Fst<A> &fst, vector<StateId>* order_out) - : data_region_(0), data_(0), owned_(false) { - typedef A Arc; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - typedef typename Arc::StateId StateId; - SetType("ngram"); - SetInputSymbols(fst.InputSymbols()); - SetOutputSymbols(fst.OutputSymbols()); - SetProperties(kStaticProperties); - - // Check basic requirements for an OpenGRM language model Fst. - int64 props = kAcceptor | kIDeterministic | kIEpsilons | kILabelSorted; - if (fst.Properties(props, true) != props) { - FSTERROR() << "NGramFst only accepts OpenGRM langauge models as input"; - SetProperties(kError, kError); - return; - } - - int64 num_states = CountStates(fst); - Label* context = new Label[num_states]; - - // Find the unigram state by starting from the start state, following - // epsilons. - StateId unigram = fst.Start(); - while (1) { - if (unigram == kNoStateId) { - FSTERROR() << "Could not identify unigram state."; - SetProperties(kError, kError); - return; - } - ArcIterator<Fst<A> > aiter(fst, unigram); - if (aiter.Done()) { - LOG(WARNING) << "Unigram state " << unigram << " has no arcs."; - break; - } - if (aiter.Value().ilabel != 0) break; - unigram = aiter.Value().nextstate; - } - - // Each state's context is determined by the subtree it is under from the - // unigram state. - queue<pair<StateId, Label> > label_queue; - vector<bool> visited(num_states); - // Force an epsilon link to the start state. - label_queue.push(make_pair(fst.Start(), 0)); - for (ArcIterator<Fst<A> > aiter(fst, unigram); - !aiter.Done(); aiter.Next()) { - label_queue.push(make_pair(aiter.Value().nextstate, aiter.Value().ilabel)); - } - // investigate states in breadth first fashion to assign context words. - while (!label_queue.empty()) { - pair<StateId, Label> &now = label_queue.front(); - if (!visited[now.first]) { - context[now.first] = now.second; - visited[now.first] = true; - for (ArcIterator<Fst<A> > aiter(fst, now.first); - !aiter.Done(); aiter.Next()) { - const Arc &arc = aiter.Value(); - if (arc.ilabel != 0) { - label_queue.push(make_pair(arc.nextstate, now.second)); - } - } - } - label_queue.pop(); - } - visited.clear(); - - // The arc from the start state should be assigned an epsilon to put it - // in front of the all other labels (which makes Start state 1 after - // unigram which is state 0). - context[fst.Start()] = 0; - - // Build the tree of contexts fst by reversing the epsilon arcs from fst. - VectorFst<Arc> context_fst; - uint64 num_final = 0; - for (int i = 0; i < num_states; ++i) { - if (fst.Final(i) != Weight::Zero()) { - ++num_final; - } - context_fst.SetFinal(context_fst.AddState(), fst.Final(i)); - } - context_fst.SetStart(unigram); - context_fst.SetInputSymbols(fst.InputSymbols()); - context_fst.SetOutputSymbols(fst.OutputSymbols()); - int64 num_context_arcs = 0; - int64 num_futures = 0; - for (StateIterator<Fst<A> > siter(fst); !siter.Done(); siter.Next()) { - const StateId &state = siter.Value(); - num_futures += fst.NumArcs(state) - fst.NumInputEpsilons(state); - ArcIterator<Fst<A> > aiter(fst, state); - if (!aiter.Done()) { - const Arc &arc = aiter.Value(); - // this arc goes from state to arc.nextstate, so create an arc from - // arc.nextstate to state to reverse it. - if (arc.ilabel == 0) { - context_fst.AddArc(arc.nextstate, Arc(context[state], context[state], - arc.weight, state)); - num_context_arcs++; - } - } - } - if (num_context_arcs != context_fst.NumStates() - 1) { - FSTERROR() << "Number of contexts arcs != number of states - 1"; - SetProperties(kError, kError); - return; - } - if (context_fst.NumStates() != num_states) { - FSTERROR() << "Number of contexts != number of states"; - SetProperties(kError, kError); - return; - } - int64 context_props = context_fst.Properties(kIDeterministic | - kILabelSorted, true); - if (!(context_props & kIDeterministic)) { - FSTERROR() << "Input fst is not structured properly"; - SetProperties(kError, kError); - return; - } - if (!(context_props & kILabelSorted)) { - ArcSort(&context_fst, ILabelCompare<Arc>()); - } - - delete [] context; - - uint64 b64; - Weight weight; - Label label = kNoLabel; - const size_t storage = Storage(num_states, num_futures, num_final); - MappedFile *data_region = MappedFile::Allocate(storage); - char *data = reinterpret_cast<char *>(data_region->mutable_data()); - memset(data, 0, storage); - size_t offset = 0; - memcpy(data + offset, reinterpret_cast<char *>(&num_states), - sizeof(num_states)); - offset += sizeof(num_states); - memcpy(data + offset, reinterpret_cast<char *>(&num_futures), - sizeof(num_futures)); - offset += sizeof(num_futures); - memcpy(data + offset, reinterpret_cast<char *>(&num_final), - sizeof(num_final)); - offset += sizeof(num_final); - uint64* context_bits = reinterpret_cast<uint64*>(data + offset); - offset += BitmapIndex::StorageSize(num_states * 2 + 1) * sizeof(b64); - uint64* future_bits = reinterpret_cast<uint64*>(data + offset); - offset += - BitmapIndex::StorageSize(num_futures + num_states + 1) * sizeof(b64); - uint64* final_bits = reinterpret_cast<uint64*>(data + offset); - offset += BitmapIndex::StorageSize(num_states) * sizeof(b64); - Label* context_words = reinterpret_cast<Label*>(data + offset); - offset += (num_states + 1) * sizeof(label); - Label* future_words = reinterpret_cast<Label*>(data + offset); - offset += num_futures * sizeof(label); - offset = (offset + sizeof(weight) - 1) & ~(sizeof(weight) - 1); - Weight* backoff = reinterpret_cast<Weight*>(data + offset); - offset += (num_states + 1) * sizeof(weight); - Weight* final_probs = reinterpret_cast<Weight*>(data + offset); - offset += num_final * sizeof(weight); - Weight* future_probs = reinterpret_cast<Weight*>(data + offset); - int64 context_arc = 0, future_arc = 0, context_bit = 0, future_bit = 0, - final_bit = 0; - - // pseudo-root bits - BitmapIndex::Set(context_bits, context_bit++); - ++context_bit; - context_words[context_arc] = label; - backoff[context_arc] = Weight::Zero(); - context_arc++; - - ++future_bit; - if (order_out) { - order_out->clear(); - order_out->resize(num_states); - } - - queue<StateId> context_q; - context_q.push(context_fst.Start()); - StateId state_number = 0; - while (!context_q.empty()) { - const StateId &state = context_q.front(); - if (order_out) { - (*order_out)[state] = state_number; - } - - const Weight &final = context_fst.Final(state); - if (final != Weight::Zero()) { - BitmapIndex::Set(final_bits, state_number); - final_probs[final_bit] = final; - ++final_bit; - } - - for (ArcIterator<VectorFst<A> > aiter(context_fst, state); - !aiter.Done(); aiter.Next()) { - const Arc &arc = aiter.Value(); - context_words[context_arc] = arc.ilabel; - backoff[context_arc] = arc.weight; - ++context_arc; - BitmapIndex::Set(context_bits, context_bit++); - context_q.push(arc.nextstate); - } - ++context_bit; - - for (ArcIterator<Fst<A> > aiter(fst, state); !aiter.Done(); aiter.Next()) { - const Arc &arc = aiter.Value(); - if (arc.ilabel != 0) { - future_words[future_arc] = arc.ilabel; - future_probs[future_arc] = arc.weight; - ++future_arc; - BitmapIndex::Set(future_bits, future_bit++); - } - } - ++future_bit; - ++state_number; - context_q.pop(); - } - - if ((state_number != num_states) || - (context_bit != num_states * 2 + 1) || - (context_arc != num_states) || - (future_arc != num_futures) || - (future_bit != num_futures + num_states + 1) || - (final_bit != num_final)) { - FSTERROR() << "Structure problems detected during construction"; - SetProperties(kError, kError); - return; - } - - Init(data, false, data_region); -} - -template<typename A> -inline void NGramFstImpl<A>::Init(const char* data, bool owned, - MappedFile *data_region) { - if (owned_) { - delete [] data_; - } - delete data_region_; - data_region_ = data_region; - owned_ = owned; - data_ = data; - size_t offset = 0; - num_states_ = *(reinterpret_cast<const uint64*>(data_ + offset)); - offset += sizeof(num_states_); - num_futures_ = *(reinterpret_cast<const uint64*>(data_ + offset)); - offset += sizeof(num_futures_); - num_final_ = *(reinterpret_cast<const uint64*>(data_ + offset)); - offset += sizeof(num_final_); - uint64 bits; - size_t context_bits = num_states_ * 2 + 1; - size_t future_bits = num_futures_ + num_states_ + 1; - context_ = reinterpret_cast<const uint64*>(data_ + offset); - offset += BitmapIndex::StorageSize(context_bits) * sizeof(bits); - future_ = reinterpret_cast<const uint64*>(data_ + offset); - offset += BitmapIndex::StorageSize(future_bits) * sizeof(bits); - final_ = reinterpret_cast<const uint64*>(data_ + offset); - offset += BitmapIndex::StorageSize(num_states_) * sizeof(bits); - context_words_ = reinterpret_cast<const Label*>(data_ + offset); - offset += (num_states_ + 1) * sizeof(*context_words_); - future_words_ = reinterpret_cast<const Label*>(data_ + offset); - offset += num_futures_ * sizeof(*future_words_); - offset = (offset + sizeof(*backoff_) - 1) & ~(sizeof(*backoff_) - 1); - backoff_ = reinterpret_cast<const Weight*>(data_ + offset); - offset += (num_states_ + 1) * sizeof(*backoff_); - final_probs_ = reinterpret_cast<const Weight*>(data_ + offset); - offset += num_final_ * sizeof(*final_probs_); - future_probs_ = reinterpret_cast<const Weight*>(data_ + offset); - - context_index_.BuildIndex(context_, context_bits); - future_index_.BuildIndex(future_, future_bits); - final_index_.BuildIndex(final_, num_states_); - - const size_t node_rank = context_index_.Rank1(0); - root_first_child_ = context_index_.Select0(node_rank) + 1; - if (context_index_.Get(root_first_child_) == false) { - FSTERROR() << "Missing unigrams"; - SetProperties(kError, kError); - return; - } - const size_t last_child = context_index_.Select0(node_rank + 1) - 1; - root_num_children_ = last_child - root_first_child_ + 1; - root_children_ = context_words_ + context_index_.Rank1(root_first_child_); -} - -template<typename A> -inline typename A::StateId NGramFstImpl<A>::Transition( - const vector<Label> &context, Label future) const { - size_t num_children = root_num_children_; - const Label *children = root_children_; - const Label *loc = lower_bound(children, children + num_children, future); - if (loc == children + num_children || *loc != future) { - return context_index_.Rank1(0); - } - size_t node = root_first_child_ + loc - children; - size_t node_rank = context_index_.Rank1(node); - size_t first_child = context_index_.Select0(node_rank) + 1; - if (context_index_.Get(first_child) == false) { - return context_index_.Rank1(node); - } - size_t last_child = context_index_.Select0(node_rank + 1) - 1; - num_children = last_child - first_child + 1; - for (int word = context.size() - 1; word >= 0; --word) { - children = context_words_ + context_index_.Rank1(first_child); - loc = lower_bound(children, children + last_child - first_child + 1, - context[word]); - if (loc == children + last_child - first_child + 1 || - *loc != context[word]) { - break; - } - node = first_child + loc - children; - node_rank = context_index_.Rank1(node); - first_child = context_index_.Select0(node_rank) + 1; - if (context_index_.Get(first_child) == false) break; - last_child = context_index_.Select0(node_rank + 1) - 1; - } - return context_index_.Rank1(node); -} - -/*****************************************************************************/ -template<class A> -class NGramFst : public ImplToExpandedFst<NGramFstImpl<A> > { - friend class ArcIterator<NGramFst<A> >; - friend class NGramFstMatcher<A>; - - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef NGramFstImpl<A> Impl; - - explicit NGramFst(const Fst<A> &dst) - : ImplToExpandedFst<Impl>(new Impl(dst, NULL)) {} - - NGramFst(const Fst<A> &fst, vector<StateId>* order_out) - : ImplToExpandedFst<Impl>(new Impl(fst, order_out)) {} - - // Because the NGramFstImpl is a const stateless data structure, there - // is never a need to do anything beside copy the reference. - NGramFst(const NGramFst<A> &fst, bool safe = false) - : ImplToExpandedFst<Impl>(fst, false) {} - - NGramFst() : ImplToExpandedFst<Impl>(new Impl()) {} - - // Non-standard constructor to initialize NGramFst directly from data. - NGramFst(const char* data, bool owned) : ImplToExpandedFst<Impl>(new Impl()) { - GetImpl()->Init(data, owned, NULL); - } - - // Get method that gets the data associated with Init(). - const char* GetData(size_t* data_size) const { - return GetImpl()->GetData(data_size); - } - - const vector<Label> GetContext(StateId s) const { - return GetImpl()->GetContext(s, &inst_); - } - - virtual size_t NumArcs(StateId s) const { - return GetImpl()->NumArcs(s, &inst_); - } - - virtual NGramFst<A>* Copy(bool safe = false) const { - return new NGramFst(*this, safe); - } - - static NGramFst<A>* Read(istream &strm, const FstReadOptions &opts) { - Impl* impl = Impl::Read(strm, opts); - return impl ? new NGramFst<A>(impl) : 0; - } - - static NGramFst<A>* Read(const string &filename) { - if (!filename.empty()) { - ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); - if (!strm) { - LOG(ERROR) << "NGramFst::Read: Can't open file: " << filename; - return 0; - } - return Read(strm, FstReadOptions(filename)); - } else { - return Read(cin, FstReadOptions("standard input")); - } - } - - virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { - return GetImpl()->Write(strm, opts); - } - - virtual bool Write(const string &filename) const { - return Fst<A>::WriteFile(filename); - } - - virtual inline void InitStateIterator(StateIteratorData<A>* data) const { - GetImpl()->InitStateIterator(data); - } - - virtual inline void InitArcIterator( - StateId s, ArcIteratorData<A>* data) const; - - virtual MatcherBase<A>* InitMatcher(MatchType match_type) const { - return new NGramFstMatcher<A>(*this, match_type); - } - - private: - explicit NGramFst(Impl* impl) : ImplToExpandedFst<Impl>(impl) {} - - Impl* GetImpl() const { - return - ImplToExpandedFst<Impl, ExpandedFst<A> >::GetImpl(); - } - - void SetImpl(Impl* impl, bool own_impl = true) { - ImplToExpandedFst<Impl, Fst<A> >::SetImpl(impl, own_impl); - } - - mutable NGramFstInst<A> inst_; -}; - -template <class A> inline void -NGramFst<A>::InitArcIterator(StateId s, ArcIteratorData<A>* data) const { - GetImpl()->SetInstFuture(s, &inst_); - GetImpl()->SetInstNode(&inst_); - data->base = new ArcIterator<NGramFst<A> >(*this, s); -} - -/*****************************************************************************/ -template <class A> -class NGramFstMatcher : public MatcherBase<A> { - public: - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - NGramFstMatcher(const NGramFst<A> &fst, MatchType match_type) - : fst_(fst), inst_(fst.inst_), match_type_(match_type), - current_loop_(false), - loop_(kNoLabel, 0, A::Weight::One(), kNoStateId) { - if (match_type_ == MATCH_OUTPUT) { - swap(loop_.ilabel, loop_.olabel); - } - } - - NGramFstMatcher(const NGramFstMatcher<A> &matcher, bool safe = false) - : fst_(matcher.fst_), inst_(matcher.inst_), - match_type_(matcher.match_type_), current_loop_(false), - loop_(kNoLabel, 0, A::Weight::One(), kNoStateId) { - if (match_type_ == MATCH_OUTPUT) { - swap(loop_.ilabel, loop_.olabel); - } - } - - virtual NGramFstMatcher<A>* Copy(bool safe = false) const { - return new NGramFstMatcher<A>(*this, safe); - } - - virtual MatchType Type(bool test) const { - return match_type_; - } - - virtual const Fst<A> &GetFst() const { - return fst_; - } - - virtual uint64 Properties(uint64 props) const { - return props; - } - - private: - virtual void SetState_(StateId s) { - fst_.GetImpl()->SetInstFuture(s, &inst_); - current_loop_ = false; - } - - virtual bool Find_(Label label) { - const Label nolabel = kNoLabel; - done_ = true; - if (label == 0 || label == nolabel) { - if (label == 0) { - current_loop_ = true; - loop_.nextstate = inst_.state_; - } - // The unigram state has no epsilon arc. - if (inst_.state_ != 0) { - arc_.ilabel = arc_.olabel = 0; - fst_.GetImpl()->SetInstNode(&inst_); - arc_.nextstate = fst_.GetImpl()->context_index_.Rank1( - fst_.GetImpl()->context_index_.Select1( - fst_.GetImpl()->context_index_.Rank0(inst_.node_) - 1)); - arc_.weight = fst_.GetImpl()->backoff_[inst_.state_]; - done_ = false; - } - } else { - const Label *start = fst_.GetImpl()->future_words_ + inst_.offset_; - const Label *end = start + inst_.num_futures_; - const Label* search = lower_bound(start, end, label); - if (search != end && *search == label) { - size_t state = search - start; - arc_.ilabel = arc_.olabel = label; - arc_.weight = fst_.GetImpl()->future_probs_[inst_.offset_ + state]; - fst_.GetImpl()->SetInstContext(&inst_); - arc_.nextstate = fst_.GetImpl()->Transition(inst_.context_, label); - done_ = false; - } - } - return !Done_(); - } - - virtual bool Done_() const { - return !current_loop_ && done_; - } - - virtual const Arc& Value_() const { - return (current_loop_) ? loop_ : arc_; - } - - virtual void Next_() { - if (current_loop_) { - current_loop_ = false; - } else { - done_ = true; - } - } - - const NGramFst<A>& fst_; - NGramFstInst<A> inst_; - MatchType match_type_; // Supplied by caller - bool done_; - Arc arc_; - bool current_loop_; // Current arc is the implicit loop - Arc loop_; -}; - -/*****************************************************************************/ -template<class A> -class ArcIterator<NGramFst<A> > : public ArcIteratorBase<A> { - public: - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - ArcIterator(const NGramFst<A> &fst, StateId state) - : lazy_(~0), impl_(fst.GetImpl()), i_(0), flags_(kArcValueFlags) { - inst_ = fst.inst_; - impl_->SetInstFuture(state, &inst_); - impl_->SetInstNode(&inst_); - } - - bool Done() const { - return i_ >= ((inst_.node_ == 0) ? inst_.num_futures_ : - inst_.num_futures_ + 1); - } - - const Arc &Value() const { - bool eps = (inst_.node_ != 0 && i_ == 0); - StateId state = (inst_.node_ == 0) ? i_ : i_ - 1; - if (flags_ & lazy_ & (kArcILabelValue | kArcOLabelValue)) { - arc_.ilabel = - arc_.olabel = eps ? 0 : impl_->future_words_[inst_.offset_ + state]; - lazy_ &= ~(kArcILabelValue | kArcOLabelValue); - } - if (flags_ & lazy_ & kArcNextStateValue) { - if (eps) { - arc_.nextstate = impl_->context_index_.Rank1( - impl_->context_index_.Select1( - impl_->context_index_.Rank0(inst_.node_) - 1)); - } else { - if (lazy_ & kArcNextStateValue) { - impl_->SetInstContext(&inst_); // first time only. - } - arc_.nextstate = - impl_->Transition(inst_.context_, - impl_->future_words_[inst_.offset_ + state]); - } - lazy_ &= ~kArcNextStateValue; - } - if (flags_ & lazy_ & kArcWeightValue) { - arc_.weight = eps ? impl_->backoff_[inst_.state_] : - impl_->future_probs_[inst_.offset_ + state]; - lazy_ &= ~kArcWeightValue; - } - return arc_; - } - - void Next() { - ++i_; - lazy_ = ~0; - } - - size_t Position() const { return i_; } - - void Reset() { - i_ = 0; - lazy_ = ~0; - } - - void Seek(size_t a) { - if (i_ != a) { - i_ = a; - lazy_ = ~0; - } - } - - uint32 Flags() const { - return flags_; - } - - void SetFlags(uint32 f, uint32 m) { - flags_ &= ~m; - flags_ |= (f & kArcValueFlags); - } - - private: - virtual bool Done_() const { return Done(); } - virtual const Arc& Value_() const { return Value(); } - virtual void Next_() { Next(); } - virtual size_t Position_() const { return Position(); } - virtual void Reset_() { Reset(); } - virtual void Seek_(size_t a) { Seek(a); } - uint32 Flags_() const { return Flags(); } - void SetFlags_(uint32 f, uint32 m) { SetFlags(f, m); } - - mutable Arc arc_; - mutable uint32 lazy_; - const NGramFstImpl<A> *impl_; - mutable NGramFstInst<A> inst_; - - size_t i_; - uint32 flags_; - - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - -/*****************************************************************************/ -// Specialization for NGramFst; see generic version in fst.h -// for sample usage (but use the ProdLmFst type!). This version -// should inline. -template <class A> -class StateIterator<NGramFst<A> > : public StateIteratorBase<A> { - public: - typedef typename A::StateId StateId; - - explicit StateIterator(const NGramFst<A> &fst) - : s_(0), num_states_(fst.NumStates()) { } - - bool Done() const { return s_ >= num_states_; } - StateId Value() const { return s_; } - void Next() { ++s_; } - void Reset() { s_ = 0; } - - private: - virtual bool Done_() const { return Done(); } - virtual StateId Value_() const { return Value(); } - virtual void Next_() { Next(); } - virtual void Reset_() { Reset(); } - - StateId s_, num_states_; - - DISALLOW_COPY_AND_ASSIGN(StateIterator); -}; -} // namespace fst -#endif // FST_EXTENSIONS_NGRAM_NGRAM_FST_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/nthbit.h b/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/nthbit.h deleted file mode 100644 index d4a9a5a..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/nthbit.h +++ /dev/null @@ -1,46 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jeffrey Sorensen) -// [email protected] (Doug Rohde) - -#ifndef FST_EXTENSIONS_NGRAM_NTHBIT_H_ -#define FST_EXTENSIONS_NGRAM_NTHBIT_H_ - -#include <fst/types.h> - -extern uint32 nth_bit_bit_offset[]; - -inline uint32 nth_bit(uint64 v, uint32 r) { - uint32 shift = 0; - uint32 c = __builtin_popcount(v & 0xffffffff); - uint32 mask = -(r > c); - r -= c & mask; - shift += (32 & mask); - - c = __builtin_popcount((v >> shift) & 0xffff); - mask = -(r > c); - r -= c & mask; - shift += (16 & mask); - - c = __builtin_popcount((v >> shift) & 0xff); - mask = -(r > c); - r -= c & mask; - shift += (8 & mask); - - return shift + ((nth_bit_bit_offset[(v >> shift) & 0xff] >> - ((r - 1) << 2)) & 0xf); -} - -#endif // FST_EXTENSIONS_NGRAM_NTHBIT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/factor-weight.h b/kaldi_io/src/tools/openfst/include/fst/factor-weight.h deleted file mode 100644 index 685155c..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/factor-weight.h +++ /dev/null @@ -1,475 +0,0 @@ -// factor-weight.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// Classes to factor weights in an FST. - -#ifndef FST_LIB_FACTOR_WEIGHT_H__ -#define FST_LIB_FACTOR_WEIGHT_H__ - -#include <algorithm> -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <string> -#include <utility> -using std::pair; using std::make_pair; -#include <vector> -using std::vector; - -#include <fst/cache.h> -#include <fst/test-properties.h> - - -namespace fst { - -const uint32 kFactorFinalWeights = 0x00000001; -const uint32 kFactorArcWeights = 0x00000002; - -template <class Arc> -struct FactorWeightOptions : CacheOptions { - typedef typename Arc::Label Label; - float delta; - uint32 mode; // factor arc weights and/or final weights - Label final_ilabel; // input label of arc created when factoring final w's - Label final_olabel; // output label of arc created when factoring final w's - - FactorWeightOptions(const CacheOptions &opts, float d, - uint32 m = kFactorArcWeights | kFactorFinalWeights, - Label il = 0, Label ol = 0) - : CacheOptions(opts), delta(d), mode(m), final_ilabel(il), - final_olabel(ol) {} - - explicit FactorWeightOptions( - float d, uint32 m = kFactorArcWeights | kFactorFinalWeights, - Label il = 0, Label ol = 0) - : delta(d), mode(m), final_ilabel(il), final_olabel(ol) {} - - FactorWeightOptions(uint32 m = kFactorArcWeights | kFactorFinalWeights, - Label il = 0, Label ol = 0) - : delta(kDelta), mode(m), final_ilabel(il), final_olabel(ol) {} -}; - - -// A factor iterator takes as argument a weight w and returns a -// sequence of pairs of weights (xi,yi) such that the sum of the -// products xi times yi is equal to w. If w is fully factored, -// the iterator should return nothing. -// -// template <class W> -// class FactorIterator { -// public: -// FactorIterator(W w); -// bool Done() const; -// void Next(); -// pair<W, W> Value() const; -// void Reset(); -// } - - -// Factor trivially. -template <class W> -class IdentityFactor { - public: - IdentityFactor(const W &w) {} - bool Done() const { return true; } - void Next() {} - pair<W, W> Value() const { return make_pair(W::One(), W::One()); } // unused - void Reset() {} -}; - - -// Factor a StringWeight w as 'ab' where 'a' is a label. -template <typename L, StringType S = STRING_LEFT> -class StringFactor { - public: - StringFactor(const StringWeight<L, S> &w) - : weight_(w), done_(w.Size() <= 1) {} - - bool Done() const { return done_; } - - void Next() { done_ = true; } - - pair< StringWeight<L, S>, StringWeight<L, S> > Value() const { - StringWeightIterator<L, S> iter(weight_); - StringWeight<L, S> w1(iter.Value()); - StringWeight<L, S> w2; - for (iter.Next(); !iter.Done(); iter.Next()) - w2.PushBack(iter.Value()); - return make_pair(w1, w2); - } - - void Reset() { done_ = weight_.Size() <= 1; } - - private: - StringWeight<L, S> weight_; - bool done_; -}; - - -// Factor a GallicWeight using StringFactor. -template <class L, class W, StringType S = STRING_LEFT> -class GallicFactor { - public: - GallicFactor(const GallicWeight<L, W, S> &w) - : weight_(w), done_(w.Value1().Size() <= 1) {} - - bool Done() const { return done_; } - - void Next() { done_ = true; } - - pair< GallicWeight<L, W, S>, GallicWeight<L, W, S> > Value() const { - StringFactor<L, S> iter(weight_.Value1()); - GallicWeight<L, W, S> w1(iter.Value().first, weight_.Value2()); - GallicWeight<L, W, S> w2(iter.Value().second, W::One()); - return make_pair(w1, w2); - } - - void Reset() { done_ = weight_.Value1().Size() <= 1; } - - private: - GallicWeight<L, W, S> weight_; - bool done_; -}; - - -// Implementation class for FactorWeight -template <class A, class F> -class FactorWeightFstImpl - : public CacheImpl<A> { - public: - using FstImpl<A>::SetType; - using FstImpl<A>::SetProperties; - using FstImpl<A>::SetInputSymbols; - using FstImpl<A>::SetOutputSymbols; - - using CacheBaseImpl< CacheState<A> >::PushArc; - using CacheBaseImpl< CacheState<A> >::HasStart; - using CacheBaseImpl< CacheState<A> >::HasFinal; - using CacheBaseImpl< CacheState<A> >::HasArcs; - using CacheBaseImpl< CacheState<A> >::SetArcs; - using CacheBaseImpl< CacheState<A> >::SetFinal; - using CacheBaseImpl< CacheState<A> >::SetStart; - - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef F FactorIterator; - - struct Element { - Element() {} - - Element(StateId s, Weight w) : state(s), weight(w) {} - - StateId state; // Input state Id - Weight weight; // Residual weight - }; - - FactorWeightFstImpl(const Fst<A> &fst, const FactorWeightOptions<A> &opts) - : CacheImpl<A>(opts), - fst_(fst.Copy()), - delta_(opts.delta), - mode_(opts.mode), - final_ilabel_(opts.final_ilabel), - final_olabel_(opts.final_olabel) { - SetType("factor_weight"); - uint64 props = fst.Properties(kFstProperties, false); - SetProperties(FactorWeightProperties(props), kCopyProperties); - - SetInputSymbols(fst.InputSymbols()); - SetOutputSymbols(fst.OutputSymbols()); - - if (mode_ == 0) - LOG(WARNING) << "FactorWeightFst: factor mode is set to 0: " - << "factoring neither arc weights nor final weights."; - } - - FactorWeightFstImpl(const FactorWeightFstImpl<A, F> &impl) - : CacheImpl<A>(impl), - fst_(impl.fst_->Copy(true)), - delta_(impl.delta_), - mode_(impl.mode_), - final_ilabel_(impl.final_ilabel_), - final_olabel_(impl.final_olabel_) { - SetType("factor_weight"); - SetProperties(impl.Properties(), kCopyProperties); - SetInputSymbols(impl.InputSymbols()); - SetOutputSymbols(impl.OutputSymbols()); - } - - ~FactorWeightFstImpl() { - delete fst_; - } - - StateId Start() { - if (!HasStart()) { - StateId s = fst_->Start(); - if (s == kNoStateId) - return kNoStateId; - StateId start = FindState(Element(fst_->Start(), Weight::One())); - SetStart(start); - } - return CacheImpl<A>::Start(); - } - - Weight Final(StateId s) { - if (!HasFinal(s)) { - const Element &e = elements_[s]; - // TODO: fix so cast is unnecessary - Weight w = e.state == kNoStateId - ? e.weight - : (Weight) Times(e.weight, fst_->Final(e.state)); - FactorIterator f(w); - if (!(mode_ & kFactorFinalWeights) || f.Done()) - SetFinal(s, w); - else - SetFinal(s, Weight::Zero()); - } - return CacheImpl<A>::Final(s); - } - - size_t NumArcs(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumArcs(s); - } - - size_t NumInputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumInputEpsilons(s); - } - - size_t NumOutputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumOutputEpsilons(s); - } - - uint64 Properties() const { return Properties(kFstProperties); } - - // Set error if found; return FST impl properties. - uint64 Properties(uint64 mask) const { - if ((mask & kError) && fst_->Properties(kError, false)) - SetProperties(kError, kError); - return FstImpl<Arc>::Properties(mask); - } - - void InitArcIterator(StateId s, ArcIteratorData<A> *data) { - if (!HasArcs(s)) - Expand(s); - CacheImpl<A>::InitArcIterator(s, data); - } - - - // Find state corresponding to an element. Create new state - // if element not found. - StateId FindState(const Element &e) { - if (!(mode_ & kFactorArcWeights) && e.weight == Weight::One()) { - while (unfactored_.size() <= e.state) - unfactored_.push_back(kNoStateId); - if (unfactored_[e.state] == kNoStateId) { - unfactored_[e.state] = elements_.size(); - elements_.push_back(e); - } - return unfactored_[e.state]; - } else { - typename ElementMap::iterator eit = element_map_.find(e); - if (eit != element_map_.end()) { - return (*eit).second; - } else { - StateId s = elements_.size(); - elements_.push_back(e); - element_map_.insert(pair<const Element, StateId>(e, s)); - return s; - } - } - } - - // Computes the outgoing transitions from a state, creating new destination - // states as needed. - void Expand(StateId s) { - Element e = elements_[s]; - if (e.state != kNoStateId) { - for (ArcIterator< Fst<A> > ait(*fst_, e.state); - !ait.Done(); - ait.Next()) { - const A &arc = ait.Value(); - Weight w = Times(e.weight, arc.weight); - FactorIterator fit(w); - if (!(mode_ & kFactorArcWeights) || fit.Done()) { - StateId d = FindState(Element(arc.nextstate, Weight::One())); - PushArc(s, Arc(arc.ilabel, arc.olabel, w, d)); - } else { - for (; !fit.Done(); fit.Next()) { - const pair<Weight, Weight> &p = fit.Value(); - StateId d = FindState(Element(arc.nextstate, - p.second.Quantize(delta_))); - PushArc(s, Arc(arc.ilabel, arc.olabel, p.first, d)); - } - } - } - } - - if ((mode_ & kFactorFinalWeights) && - ((e.state == kNoStateId) || - (fst_->Final(e.state) != Weight::Zero()))) { - Weight w = e.state == kNoStateId - ? e.weight - : Times(e.weight, fst_->Final(e.state)); - for (FactorIterator fit(w); - !fit.Done(); - fit.Next()) { - const pair<Weight, Weight> &p = fit.Value(); - StateId d = FindState(Element(kNoStateId, - p.second.Quantize(delta_))); - PushArc(s, Arc(final_ilabel_, final_olabel_, p.first, d)); - } - } - SetArcs(s); - } - - private: - static const size_t kPrime = 7853; - - // Equality function for Elements, assume weights have been quantized. - class ElementEqual { - public: - bool operator()(const Element &x, const Element &y) const { - return x.state == y.state && x.weight == y.weight; - } - }; - - // Hash function for Elements to Fst states. - class ElementKey { - public: - size_t operator()(const Element &x) const { - return static_cast<size_t>(x.state * kPrime + x.weight.Hash()); - } - private: - }; - - typedef unordered_map<Element, StateId, ElementKey, ElementEqual> ElementMap; - - const Fst<A> *fst_; - float delta_; - uint32 mode_; // factoring arc and/or final weights - Label final_ilabel_; // ilabel of arc created when factoring final w's - Label final_olabel_; // olabel of arc created when factoring final w's - vector<Element> elements_; // mapping Fst state to Elements - ElementMap element_map_; // mapping Elements to Fst state - // mapping between old/new 'StateId' for states that do not need to - // be factored when 'mode_' is '0' or 'kFactorFinalWeights' - vector<StateId> unfactored_; - - void operator=(const FactorWeightFstImpl<A, F> &); // disallow -}; - -template <class A, class F> const size_t FactorWeightFstImpl<A, F>::kPrime; - - -// FactorWeightFst takes as template parameter a FactorIterator as -// defined above. The result of weight factoring is a transducer -// equivalent to the input whose path weights have been factored -// according to the FactorIterator. States and transitions will be -// added as necessary. The algorithm is a generalization to arbitrary -// weights of the second step of the input epsilon-normalization -// algorithm due to Mohri, "Generic epsilon-removal and input -// epsilon-normalization algorithms for weighted transducers", -// International Journal of Computer Science 13(1): 129-143 (2002). -// -// This class attaches interface to implementation and handles -// reference counting, delegating most methods to ImplToFst. -template <class A, class F> -class FactorWeightFst : public ImplToFst< FactorWeightFstImpl<A, F> > { - public: - friend class ArcIterator< FactorWeightFst<A, F> >; - friend class StateIterator< FactorWeightFst<A, F> >; - - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef CacheState<A> State; - typedef FactorWeightFstImpl<A, F> Impl; - - FactorWeightFst(const Fst<A> &fst) - : ImplToFst<Impl>(new Impl(fst, FactorWeightOptions<A>())) {} - - FactorWeightFst(const Fst<A> &fst, const FactorWeightOptions<A> &opts) - : ImplToFst<Impl>(new Impl(fst, opts)) {} - - // See Fst<>::Copy() for doc. - FactorWeightFst(const FactorWeightFst<A, F> &fst, bool copy) - : ImplToFst<Impl>(fst, copy) {} - - // Get a copy of this FactorWeightFst. See Fst<>::Copy() for further doc. - virtual FactorWeightFst<A, F> *Copy(bool copy = false) const { - return new FactorWeightFst<A, F>(*this, copy); - } - - virtual inline void InitStateIterator(StateIteratorData<A> *data) const; - - virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { - GetImpl()->InitArcIterator(s, data); - } - - private: - // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } - - void operator=(const FactorWeightFst<A, F> &fst); // Disallow -}; - - -// Specialization for FactorWeightFst. -template<class A, class F> -class StateIterator< FactorWeightFst<A, F> > - : public CacheStateIterator< FactorWeightFst<A, F> > { - public: - explicit StateIterator(const FactorWeightFst<A, F> &fst) - : CacheStateIterator< FactorWeightFst<A, F> >(fst, fst.GetImpl()) {} -}; - - -// Specialization for FactorWeightFst. -template <class A, class F> -class ArcIterator< FactorWeightFst<A, F> > - : public CacheArcIterator< FactorWeightFst<A, F> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const FactorWeightFst<A, F> &fst, StateId s) - : CacheArcIterator< FactorWeightFst<A, F> >(fst.GetImpl(), s) { - if (!fst.GetImpl()->HasArcs(s)) - fst.GetImpl()->Expand(s); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - -template <class A, class F> inline -void FactorWeightFst<A, F>::InitStateIterator(StateIteratorData<A> *data) const -{ - data->base = new StateIterator< FactorWeightFst<A, F> >(*this); -} - - -} // namespace fst - -#endif // FST_LIB_FACTOR_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/flags.h b/kaldi_io/src/tools/openfst/include/fst/flags.h deleted file mode 100644 index b3bb66c..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/flags.h +++ /dev/null @@ -1,242 +0,0 @@ -// flags.h -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Author: [email protected] (Michael Riley) -// -// \file -// Google-style flag handling declarations and inline definitions. - -#ifndef FST_LIB_FLAGS_H__ -#define FST_LIB_FLAGS_H__ - -#include <iostream> -#include <map> -#include <set> -#include <sstream> -#include <string> - -#include <fst/types.h> -#include <fst/lock.h> - -using std::string; - -// -// FLAGS USAGE: -// -// Definition example: -// -// DEFINE_int32(length, 0, "length"); -// -// This defines variable FLAGS_length, initialized to 0. -// -// Declaration example: -// -// DECLARE_int32(length); -// -// SET_FLAGS() can be used to set flags from the command line -// using, for example, '--length=2'. -// -// ShowUsage() can be used to print out command and flag usage. -// - -#define DECLARE_bool(name) extern bool FLAGS_ ## name -#define DECLARE_string(name) extern string FLAGS_ ## name -#define DECLARE_int32(name) extern int32 FLAGS_ ## name -#define DECLARE_int64(name) extern int64 FLAGS_ ## name -#define DECLARE_double(name) extern double FLAGS_ ## name - -template <typename T> -struct FlagDescription { - FlagDescription(T *addr, const char *doc, const char *type, - const char *file, const T val) - : address(addr), - doc_string(doc), - type_name(type), - file_name(file), - default_value(val) {} - - T *address; - const char *doc_string; - const char *type_name; - const char *file_name; - const T default_value; -}; - -template <typename T> -class FlagRegister { - public: - static FlagRegister<T> *GetRegister() { - fst::FstOnceInit(®ister_init_, &FlagRegister<T>::Init); - return register_; - } - - const FlagDescription<T> &GetFlagDescription(const string &name) const { - fst::MutexLock l(register_lock_); - typename std::map< string, FlagDescription<T> >::const_iterator it = - flag_table_.find(name); - return it != flag_table_.end() ? it->second : 0; - } - void SetDescription(const string &name, - const FlagDescription<T> &desc) { - fst::MutexLock l(register_lock_); - flag_table_.insert(make_pair(name, desc)); - } - - bool SetFlag(const string &val, bool *address) const { - if (val == "true" || val == "1" || val.empty()) { - *address = true; - return true; - } else if (val == "false" || val == "0") { - *address = false; - return true; - } - else { - return false; - } - } - bool SetFlag(const string &val, string *address) const { - *address = val; - return true; - } - bool SetFlag(const string &val, int32 *address) const { - char *p = 0; - *address = strtol(val.c_str(), &p, 0); - return !val.empty() && *p == '\0'; - } - bool SetFlag(const string &val, int64 *address) const { - char *p = 0; - *address = strtoll(val.c_str(), &p, 0); - return !val.empty() && *p == '\0'; - } - bool SetFlag(const string &val, double *address) const { - char *p = 0; - *address = strtod(val.c_str(), &p); - return !val.empty() && *p == '\0'; - } - - bool SetFlag(const string &arg, const string &val) const { - for (typename std::map< string, FlagDescription<T> >::const_iterator it = - flag_table_.begin(); - it != flag_table_.end(); - ++it) { - const string &name = it->first; - const FlagDescription<T> &desc = it->second; - if (arg == name) - return SetFlag(val, desc.address); - } - return false; - } - - void GetUsage(std::set< std::pair<string, string> > *usage_set) const { - for (typename std::map< string, - FlagDescription<T> >::const_iterator it = - flag_table_.begin(); - it != flag_table_.end(); - ++it) { - const string &name = it->first; - const FlagDescription<T> &desc = it->second; - string usage = " --" + name; - usage += ": type = "; - usage += desc.type_name; - usage += ", default = "; - usage += GetDefault(desc.default_value) + "\n "; - usage += desc.doc_string; - usage_set->insert(make_pair(desc.file_name, usage)); - } - } - - private: - static void Init() { - register_lock_ = new fst::Mutex; - register_ = new FlagRegister<T>; - } - - std::map< string, FlagDescription<T> > flag_table_; - - string GetDefault(bool default_value) const { - return default_value ? "true" : "false"; - } - - string GetDefault(const string &default_value) const { - return "\"" + default_value + "\""; - } - - template<typename V> string GetDefault(const V& default_value) const { - std::ostringstream strm; - strm << default_value; - return strm.str(); - } - - static fst::FstOnceType register_init_; // ensures only called once - static fst::Mutex* register_lock_; // multithreading lock - static FlagRegister<T> *register_; -}; - -template <class T> -fst::FstOnceType FlagRegister<T>::register_init_ = fst::FST_ONCE_INIT; - -template <class T> -fst::Mutex *FlagRegister<T>::register_lock_ = 0; - -template <class T> -FlagRegister<T> *FlagRegister<T>::register_ = 0; - - -template <typename T> -class FlagRegisterer { - public: - FlagRegisterer(const string &name, const FlagDescription<T> &desc) { - FlagRegister<T> *registr = FlagRegister<T>::GetRegister(); - registr->SetDescription(name, desc); - } - - private: - DISALLOW_COPY_AND_ASSIGN(FlagRegisterer); -}; - - -#define DEFINE_VAR(type, name, value, doc) \ - type FLAGS_ ## name = value; \ - static FlagRegisterer<type> \ - name ## _flags_registerer(#name, FlagDescription<type>(&FLAGS_ ## name, \ - doc, \ - #type, \ - __FILE__, \ - value)) - -#define DEFINE_bool(name, value, doc) DEFINE_VAR(bool, name, value, doc) -#define DEFINE_string(name, value, doc) \ - DEFINE_VAR(string, name, value, doc) -#define DEFINE_int32(name, value, doc) DEFINE_VAR(int32, name, value, doc) -#define DEFINE_int64(name, value, doc) DEFINE_VAR(int64, name, value, doc) -#define DEFINE_double(name, value, doc) DEFINE_VAR(double, name, value, doc) - - -// Temporary directory -DECLARE_string(tmpdir); - -void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags, - const char *src = ""); - -#define SET_FLAGS(usage, argc, argv, rmflags) \ -SetFlags(usage, argc, argv, rmflags, __FILE__) - -// Deprecated - for backward compatibility -inline void InitFst(const char *usage, int *argc, char ***argv, bool rmflags) { - return SetFlags(usage, argc, argv, rmflags); -} - -void ShowUsage(bool long_usage = true); - -#endif // FST_LIB_FLAGS_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/float-weight.h b/kaldi_io/src/tools/openfst/include/fst/float-weight.h deleted file mode 100644 index eb22638..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/float-weight.h +++ /dev/null @@ -1,601 +0,0 @@ -// float-weight.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Float weight set and associated semiring operation definitions. -// - -#ifndef FST_LIB_FLOAT_WEIGHT_H__ -#define FST_LIB_FLOAT_WEIGHT_H__ - -#include <limits> -#include <climits> -#include <sstream> -#include <string> - -#include <fst/util.h> -#include <fst/weight.h> - - -namespace fst { - -// numeric limits class -template <class T> -class FloatLimits { - public: - static const T PosInfinity() { - static const T pos_infinity = numeric_limits<T>::infinity(); - return pos_infinity; - } - - static const T NegInfinity() { - static const T neg_infinity = -PosInfinity(); - return neg_infinity; - } - - static const T NumberBad() { - static const T number_bad = numeric_limits<T>::quiet_NaN(); - return number_bad; - } - -}; - -// weight class to be templated on floating-points types -template <class T = float> -class FloatWeightTpl { - public: - FloatWeightTpl() {} - - FloatWeightTpl(T f) : value_(f) {} - - FloatWeightTpl(const FloatWeightTpl<T> &w) : value_(w.value_) {} - - FloatWeightTpl<T> &operator=(const FloatWeightTpl<T> &w) { - value_ = w.value_; - return *this; - } - - istream &Read(istream &strm) { - return ReadType(strm, &value_); - } - - ostream &Write(ostream &strm) const { - return WriteType(strm, value_); - } - - size_t Hash() const { - union { - T f; - size_t s; - } u; - u.s = 0; - u.f = value_; - return u.s; - } - - const T &Value() const { return value_; } - - protected: - void SetValue(const T &f) { value_ = f; } - - inline static string GetPrecisionString() { - int64 size = sizeof(T); - if (size == sizeof(float)) return ""; - size *= CHAR_BIT; - - string result; - Int64ToStr(size, &result); - return result; - } - - private: - T value_; -}; - -// Single-precision float weight -typedef FloatWeightTpl<float> FloatWeight; - -template <class T> -inline bool operator==(const FloatWeightTpl<T> &w1, - const FloatWeightTpl<T> &w2) { - // Volatile qualifier thwarts over-aggressive compiler optimizations - // that lead to problems esp. with NaturalLess(). - volatile T v1 = w1.Value(); - volatile T v2 = w2.Value(); - return v1 == v2; -} - -inline bool operator==(const FloatWeightTpl<double> &w1, - const FloatWeightTpl<double> &w2) { - return operator==<double>(w1, w2); -} - -inline bool operator==(const FloatWeightTpl<float> &w1, - const FloatWeightTpl<float> &w2) { - return operator==<float>(w1, w2); -} - -template <class T> -inline bool operator!=(const FloatWeightTpl<T> &w1, - const FloatWeightTpl<T> &w2) { - return !(w1 == w2); -} - -inline bool operator!=(const FloatWeightTpl<double> &w1, - const FloatWeightTpl<double> &w2) { - return operator!=<double>(w1, w2); -} - -inline bool operator!=(const FloatWeightTpl<float> &w1, - const FloatWeightTpl<float> &w2) { - return operator!=<float>(w1, w2); -} - -template <class T> -inline bool ApproxEqual(const FloatWeightTpl<T> &w1, - const FloatWeightTpl<T> &w2, - float delta = kDelta) { - return w1.Value() <= w2.Value() + delta && w2.Value() <= w1.Value() + delta; -} - -template <class T> -inline ostream &operator<<(ostream &strm, const FloatWeightTpl<T> &w) { - if (w.Value() == FloatLimits<T>::PosInfinity()) - return strm << "Infinity"; - else if (w.Value() == FloatLimits<T>::NegInfinity()) - return strm << "-Infinity"; - else if (w.Value() != w.Value()) // Fails for NaN - return strm << "BadNumber"; - else - return strm << w.Value(); -} - -template <class T> -inline istream &operator>>(istream &strm, FloatWeightTpl<T> &w) { - string s; - strm >> s; - if (s == "Infinity") { - w = FloatWeightTpl<T>(FloatLimits<T>::PosInfinity()); - } else if (s == "-Infinity") { - w = FloatWeightTpl<T>(FloatLimits<T>::NegInfinity()); - } else { - char *p; - T f = strtod(s.c_str(), &p); - if (p < s.c_str() + s.size()) - strm.clear(std::ios::badbit); - else - w = FloatWeightTpl<T>(f); - } - return strm; -} - - -// Tropical semiring: (min, +, inf, 0) -template <class T> -class TropicalWeightTpl : public FloatWeightTpl<T> { - public: - using FloatWeightTpl<T>::Value; - - typedef TropicalWeightTpl<T> ReverseWeight; - - TropicalWeightTpl() : FloatWeightTpl<T>() {} - - TropicalWeightTpl(T f) : FloatWeightTpl<T>(f) {} - - TropicalWeightTpl(const TropicalWeightTpl<T> &w) : FloatWeightTpl<T>(w) {} - - static const TropicalWeightTpl<T> Zero() { - return TropicalWeightTpl<T>(FloatLimits<T>::PosInfinity()); } - - static const TropicalWeightTpl<T> One() { - return TropicalWeightTpl<T>(0.0F); } - - static const TropicalWeightTpl<T> NoWeight() { - return TropicalWeightTpl<T>(FloatLimits<T>::NumberBad()); } - - static const string &Type() { - static const string type = "tropical" + - FloatWeightTpl<T>::GetPrecisionString(); - return type; - } - - bool Member() const { - // First part fails for IEEE NaN - return Value() == Value() && Value() != FloatLimits<T>::NegInfinity(); - } - - TropicalWeightTpl<T> Quantize(float delta = kDelta) const { - if (Value() == FloatLimits<T>::NegInfinity() || - Value() == FloatLimits<T>::PosInfinity() || - Value() != Value()) - return *this; - else - return TropicalWeightTpl<T>(floor(Value()/delta + 0.5F) * delta); - } - - TropicalWeightTpl<T> Reverse() const { return *this; } - - static uint64 Properties() { - return kLeftSemiring | kRightSemiring | kCommutative | - kPath | kIdempotent; - } -}; - -// Single precision tropical weight -typedef TropicalWeightTpl<float> TropicalWeight; - -template <class T> -inline TropicalWeightTpl<T> Plus(const TropicalWeightTpl<T> &w1, - const TropicalWeightTpl<T> &w2) { - if (!w1.Member() || !w2.Member()) - return TropicalWeightTpl<T>::NoWeight(); - return w1.Value() < w2.Value() ? w1 : w2; -} - -inline TropicalWeightTpl<float> Plus(const TropicalWeightTpl<float> &w1, - const TropicalWeightTpl<float> &w2) { - return Plus<float>(w1, w2); -} - -inline TropicalWeightTpl<double> Plus(const TropicalWeightTpl<double> &w1, - const TropicalWeightTpl<double> &w2) { - return Plus<double>(w1, w2); -} - -template <class T> -inline TropicalWeightTpl<T> Times(const TropicalWeightTpl<T> &w1, - const TropicalWeightTpl<T> &w2) { - if (!w1.Member() || !w2.Member()) - return TropicalWeightTpl<T>::NoWeight(); - T f1 = w1.Value(), f2 = w2.Value(); - if (f1 == FloatLimits<T>::PosInfinity()) - return w1; - else if (f2 == FloatLimits<T>::PosInfinity()) - return w2; - else - return TropicalWeightTpl<T>(f1 + f2); -} - -inline TropicalWeightTpl<float> Times(const TropicalWeightTpl<float> &w1, - const TropicalWeightTpl<float> &w2) { - return Times<float>(w1, w2); -} - -inline TropicalWeightTpl<double> Times(const TropicalWeightTpl<double> &w1, - const TropicalWeightTpl<double> &w2) { - return Times<double>(w1, w2); -} - -template <class T> -inline TropicalWeightTpl<T> Divide(const TropicalWeightTpl<T> &w1, - const TropicalWeightTpl<T> &w2, - DivideType typ = DIVIDE_ANY) { - if (!w1.Member() || !w2.Member()) - return TropicalWeightTpl<T>::NoWeight(); - T f1 = w1.Value(), f2 = w2.Value(); - if (f2 == FloatLimits<T>::PosInfinity()) - return FloatLimits<T>::NumberBad(); - else if (f1 == FloatLimits<T>::PosInfinity()) - return FloatLimits<T>::PosInfinity(); - else - return TropicalWeightTpl<T>(f1 - f2); -} - -inline TropicalWeightTpl<float> Divide(const TropicalWeightTpl<float> &w1, - const TropicalWeightTpl<float> &w2, - DivideType typ = DIVIDE_ANY) { - return Divide<float>(w1, w2, typ); -} - -inline TropicalWeightTpl<double> Divide(const TropicalWeightTpl<double> &w1, - const TropicalWeightTpl<double> &w2, - DivideType typ = DIVIDE_ANY) { - return Divide<double>(w1, w2, typ); -} - - -// Log semiring: (log(e^-x + e^y), +, inf, 0) -template <class T> -class LogWeightTpl : public FloatWeightTpl<T> { - public: - using FloatWeightTpl<T>::Value; - - typedef LogWeightTpl ReverseWeight; - - LogWeightTpl() : FloatWeightTpl<T>() {} - - LogWeightTpl(T f) : FloatWeightTpl<T>(f) {} - - LogWeightTpl(const LogWeightTpl<T> &w) : FloatWeightTpl<T>(w) {} - - static const LogWeightTpl<T> Zero() { - return LogWeightTpl<T>(FloatLimits<T>::PosInfinity()); - } - - static const LogWeightTpl<T> One() { - return LogWeightTpl<T>(0.0F); - } - - static const LogWeightTpl<T> NoWeight() { - return LogWeightTpl<T>(FloatLimits<T>::NumberBad()); } - - static const string &Type() { - static const string type = "log" + FloatWeightTpl<T>::GetPrecisionString(); - return type; - } - - bool Member() const { - // First part fails for IEEE NaN - return Value() == Value() && Value() != FloatLimits<T>::NegInfinity(); - } - - LogWeightTpl<T> Quantize(float delta = kDelta) const { - if (Value() == FloatLimits<T>::NegInfinity() || - Value() == FloatLimits<T>::PosInfinity() || - Value() != Value()) - return *this; - else - return LogWeightTpl<T>(floor(Value()/delta + 0.5F) * delta); - } - - LogWeightTpl<T> Reverse() const { return *this; } - - static uint64 Properties() { - return kLeftSemiring | kRightSemiring | kCommutative; - } -}; - -// Single-precision log weight -typedef LogWeightTpl<float> LogWeight; -// Double-precision log weight -typedef LogWeightTpl<double> Log64Weight; - -template <class T> -inline T LogExp(T x) { return log(1.0F + exp(-x)); } - -template <class T> -inline LogWeightTpl<T> Plus(const LogWeightTpl<T> &w1, - const LogWeightTpl<T> &w2) { - T f1 = w1.Value(), f2 = w2.Value(); - if (f1 == FloatLimits<T>::PosInfinity()) - return w2; - else if (f2 == FloatLimits<T>::PosInfinity()) - return w1; - else if (f1 > f2) - return LogWeightTpl<T>(f2 - LogExp(f1 - f2)); - else - return LogWeightTpl<T>(f1 - LogExp(f2 - f1)); -} - -inline LogWeightTpl<float> Plus(const LogWeightTpl<float> &w1, - const LogWeightTpl<float> &w2) { - return Plus<float>(w1, w2); -} - -inline LogWeightTpl<double> Plus(const LogWeightTpl<double> &w1, - const LogWeightTpl<double> &w2) { - return Plus<double>(w1, w2); -} - -template <class T> -inline LogWeightTpl<T> Times(const LogWeightTpl<T> &w1, - const LogWeightTpl<T> &w2) { - if (!w1.Member() || !w2.Member()) - return LogWeightTpl<T>::NoWeight(); - T f1 = w1.Value(), f2 = w2.Value(); - if (f1 == FloatLimits<T>::PosInfinity()) - return w1; - else if (f2 == FloatLimits<T>::PosInfinity()) - return w2; - else - return LogWeightTpl<T>(f1 + f2); -} - -inline LogWeightTpl<float> Times(const LogWeightTpl<float> &w1, - const LogWeightTpl<float> &w2) { - return Times<float>(w1, w2); -} - -inline LogWeightTpl<double> Times(const LogWeightTpl<double> &w1, - const LogWeightTpl<double> &w2) { - return Times<double>(w1, w2); -} - -template <class T> -inline LogWeightTpl<T> Divide(const LogWeightTpl<T> &w1, - const LogWeightTpl<T> &w2, - DivideType typ = DIVIDE_ANY) { - if (!w1.Member() || !w2.Member()) - return LogWeightTpl<T>::NoWeight(); - T f1 = w1.Value(), f2 = w2.Value(); - if (f2 == FloatLimits<T>::PosInfinity()) - return FloatLimits<T>::NumberBad(); - else if (f1 == FloatLimits<T>::PosInfinity()) - return FloatLimits<T>::PosInfinity(); - else - return LogWeightTpl<T>(f1 - f2); -} - -inline LogWeightTpl<float> Divide(const LogWeightTpl<float> &w1, - const LogWeightTpl<float> &w2, - DivideType typ = DIVIDE_ANY) { - return Divide<float>(w1, w2, typ); -} - -inline LogWeightTpl<double> Divide(const LogWeightTpl<double> &w1, - const LogWeightTpl<double> &w2, - DivideType typ = DIVIDE_ANY) { - return Divide<double>(w1, w2, typ); -} - -// MinMax semiring: (min, max, inf, -inf) -template <class T> -class MinMaxWeightTpl : public FloatWeightTpl<T> { - public: - using FloatWeightTpl<T>::Value; - - typedef MinMaxWeightTpl<T> ReverseWeight; - - MinMaxWeightTpl() : FloatWeightTpl<T>() {} - - MinMaxWeightTpl(T f) : FloatWeightTpl<T>(f) {} - - MinMaxWeightTpl(const MinMaxWeightTpl<T> &w) : FloatWeightTpl<T>(w) {} - - static const MinMaxWeightTpl<T> Zero() { - return MinMaxWeightTpl<T>(FloatLimits<T>::PosInfinity()); - } - - static const MinMaxWeightTpl<T> One() { - return MinMaxWeightTpl<T>(FloatLimits<T>::NegInfinity()); - } - - static const MinMaxWeightTpl<T> NoWeight() { - return MinMaxWeightTpl<T>(FloatLimits<T>::NumberBad()); } - - static const string &Type() { - static const string type = "minmax" + - FloatWeightTpl<T>::GetPrecisionString(); - return type; - } - - bool Member() const { - // Fails for IEEE NaN - return Value() == Value(); - } - - MinMaxWeightTpl<T> Quantize(float delta = kDelta) const { - // If one of infinities, or a NaN - if (Value() == FloatLimits<T>::NegInfinity() || - Value() == FloatLimits<T>::PosInfinity() || - Value() != Value()) - return *this; - else - return MinMaxWeightTpl<T>(floor(Value()/delta + 0.5F) * delta); - } - - MinMaxWeightTpl<T> Reverse() const { return *this; } - - static uint64 Properties() { - return kLeftSemiring | kRightSemiring | kCommutative | kIdempotent | kPath; - } -}; - -// Single-precision min-max weight -typedef MinMaxWeightTpl<float> MinMaxWeight; - -// Min -template <class T> -inline MinMaxWeightTpl<T> Plus( - const MinMaxWeightTpl<T> &w1, const MinMaxWeightTpl<T> &w2) { - if (!w1.Member() || !w2.Member()) - return MinMaxWeightTpl<T>::NoWeight(); - return w1.Value() < w2.Value() ? w1 : w2; -} - -inline MinMaxWeightTpl<float> Plus( - const MinMaxWeightTpl<float> &w1, const MinMaxWeightTpl<float> &w2) { - return Plus<float>(w1, w2); -} - -inline MinMaxWeightTpl<double> Plus( - const MinMaxWeightTpl<double> &w1, const MinMaxWeightTpl<double> &w2) { - return Plus<double>(w1, w2); -} - -// Max -template <class T> -inline MinMaxWeightTpl<T> Times( - const MinMaxWeightTpl<T> &w1, const MinMaxWeightTpl<T> &w2) { - if (!w1.Member() || !w2.Member()) - return MinMaxWeightTpl<T>::NoWeight(); - return w1.Value() >= w2.Value() ? w1 : w2; -} - -inline MinMaxWeightTpl<float> Times( - const MinMaxWeightTpl<float> &w1, const MinMaxWeightTpl<float> &w2) { - return Times<float>(w1, w2); -} - -inline MinMaxWeightTpl<double> Times( - const MinMaxWeightTpl<double> &w1, const MinMaxWeightTpl<double> &w2) { - return Times<double>(w1, w2); -} - -// Defined only for special cases -template <class T> -inline MinMaxWeightTpl<T> Divide(const MinMaxWeightTpl<T> &w1, - const MinMaxWeightTpl<T> &w2, - DivideType typ = DIVIDE_ANY) { - if (!w1.Member() || !w2.Member()) - return MinMaxWeightTpl<T>::NoWeight(); - // min(w1, x) = w2, w1 >= w2 => min(w1, x) = w2, x = w2 - return w1.Value() >= w2.Value() ? w1 : FloatLimits<T>::NumberBad(); -} - -inline MinMaxWeightTpl<float> Divide(const MinMaxWeightTpl<float> &w1, - const MinMaxWeightTpl<float> &w2, - DivideType typ = DIVIDE_ANY) { - return Divide<float>(w1, w2, typ); -} - -inline MinMaxWeightTpl<double> Divide(const MinMaxWeightTpl<double> &w1, - const MinMaxWeightTpl<double> &w2, - DivideType typ = DIVIDE_ANY) { - return Divide<double>(w1, w2, typ); -} - -// -// WEIGHT CONVERTER SPECIALIZATIONS. -// - -// Convert to tropical -template <> -struct WeightConvert<LogWeight, TropicalWeight> { - TropicalWeight operator()(LogWeight w) const { return w.Value(); } -}; - -template <> -struct WeightConvert<Log64Weight, TropicalWeight> { - TropicalWeight operator()(Log64Weight w) const { return w.Value(); } -}; - -// Convert to log -template <> -struct WeightConvert<TropicalWeight, LogWeight> { - LogWeight operator()(TropicalWeight w) const { return w.Value(); } -}; - -template <> -struct WeightConvert<Log64Weight, LogWeight> { - LogWeight operator()(Log64Weight w) const { return w.Value(); } -}; - -// Convert to log64 -template <> -struct WeightConvert<TropicalWeight, Log64Weight> { - Log64Weight operator()(TropicalWeight w) const { return w.Value(); } -}; - -template <> -struct WeightConvert<LogWeight, Log64Weight> { - Log64Weight operator()(LogWeight w) const { return w.Value(); } -}; - -} // namespace fst - -#endif // FST_LIB_FLOAT_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/fst-decl.h b/kaldi_io/src/tools/openfst/include/fst/fst-decl.h deleted file mode 100644 index f27ded8..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/fst-decl.h +++ /dev/null @@ -1,124 +0,0 @@ -// fst-decl.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// This file contains declarations of classes in the Fst template library. -// - -#ifndef FST_LIB_FST_DECL_H__ -#define FST_LIB_FST_DECL_H__ - -#include <fst/types.h> - -namespace fst { - -class SymbolTable; -class SymbolTableIterator; - -template <class W> class FloatWeightTpl; -template <class W> class TropicalWeightTpl; -template <class W> class LogWeightTpl; -template <class W> class MinMaxWeightTpl; - -typedef FloatWeightTpl<float> FloatWeight; -typedef TropicalWeightTpl<float> TropicalWeight; -typedef LogWeightTpl<float> LogWeight; -typedef MinMaxWeightTpl<float> MinMaxWeight; - -template <class W> class ArcTpl; -typedef ArcTpl<TropicalWeight> StdArc; -typedef ArcTpl<LogWeight> LogArc; - -template <class A, class C, class U = uint32> class CompactFst; -template <class A, class U = uint32> class ConstFst; -template <class A, class W, class M> class EditFst; -template <class A> class ExpandedFst; -template <class A> class Fst; -template <class A> class MutableFst; -template <class A> class VectorFst; - -template <class A, class C> class ArcSortFst; -template <class A> class ClosureFst; -template <class A> class ComposeFst; -template <class A> class ConcatFst; -template <class A> class DeterminizeFst; -template <class A> class DifferenceFst; -template <class A> class IntersectFst; -template <class A> class InvertFst; -template <class A, class B, class C> class ArcMapFst; -template <class A> class ProjectFst; -template <class A, class B, class S> class RandGenFst; -template <class A> class RelabelFst; -template <class A, class T> class ReplaceFst; -template <class A> class RmEpsilonFst; -template <class A> class UnionFst; - -template <class T, class Compare, bool max> class Heap; - -template <class A> class AcceptorCompactor; -template <class A> class StringCompactor; -template <class A> class UnweightedAcceptorCompactor; -template <class A> class UnweightedCompactor; -template <class A> class WeightedStringCompactor; - -template <class A, class P> class DefaultReplaceStateTable; - -typedef CompactFst<StdArc, AcceptorCompactor<StdArc> > -StdCompactAcceptorFst; -typedef CompactFst< StdArc, StringCompactor<StdArc> > -StdCompactStringFst; -typedef CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > -StdCompactUnweightedAcceptorFst; -typedef CompactFst<StdArc, UnweightedCompactor<StdArc> > -StdCompactUnweightedFst; -typedef CompactFst< StdArc, WeightedStringCompactor<StdArc> > -StdCompactWeightedStringFst; -typedef ConstFst<StdArc> StdConstFst; -typedef ExpandedFst<StdArc> StdExpandedFst; -typedef Fst<StdArc> StdFst; -typedef MutableFst<StdArc> StdMutableFst; -typedef VectorFst<StdArc> StdVectorFst; - - -template <class C> class StdArcSortFst; -typedef ClosureFst<StdArc> StdClosureFst; -typedef ComposeFst<StdArc> StdComposeFst; -typedef ConcatFst<StdArc> StdConcatFst; -typedef DeterminizeFst<StdArc> StdDeterminizeFst; -typedef DifferenceFst<StdArc> StdDifferenceFst; -typedef IntersectFst<StdArc> StdIntersectFst; -typedef InvertFst<StdArc> StdInvertFst; -typedef ProjectFst<StdArc> StdProjectFst; -typedef RelabelFst<StdArc> StdRelabelFst; -typedef ReplaceFst<StdArc, DefaultReplaceStateTable<StdArc, ssize_t> > -StdReplaceFst; -typedef RmEpsilonFst<StdArc> StdRmEpsilonFst; -typedef UnionFst<StdArc> StdUnionFst; - -template <typename T> class IntegerFilterState; -typedef IntegerFilterState<signed char> CharFilterState; -typedef IntegerFilterState<short> ShortFilterState; -typedef IntegerFilterState<int> IntFilterState; - -template <class F> class Matcher; -template <class M1, class M2 = M1> class SequenceComposeFilter; -template <class M1, class M2 = M1> class AltSequenceComposeFilter; -template <class M1, class M2 = M1> class MatchComposeFilter; - -} // namespace fst - -#endif // FST_LIB_FST_DECL_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/fst.h b/kaldi_io/src/tools/openfst/include/fst/fst.h deleted file mode 100644 index 150fc4e..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/fst.h +++ /dev/null @@ -1,949 +0,0 @@ -// fst.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Finite-State Transducer (FST) - abstract base class definition, -// state and arc iterator interface, and suggested base implementation. -// - -#ifndef FST_LIB_FST_H__ -#define FST_LIB_FST_H__ - -#include <stddef.h> -#include <sys/types.h> -#include <cmath> -#include <string> - -#include <fst/compat.h> -#include <fst/types.h> - -#include <fst/arc.h> -#include <fst/properties.h> -#include <fst/register.h> -#include <iostream> -#include <fstream> -#include <sstream> -#include <fst/symbol-table.h> -#include <fst/util.h> - - -DECLARE_bool(fst_align); - -namespace fst { - -bool IsFstHeader(istream &, const string &); - -class FstHeader; -template <class A> class StateIteratorData; -template <class A> class ArcIteratorData; -template <class A> class MatcherBase; - -struct FstReadOptions { - // FileReadMode(s) are advisory, there are many conditions than prevent a - // file from being mapped, READ mode will be selected in these cases with - // a warning indicating why it was chosen. - enum FileReadMode { READ, MAP }; - - string source; // Where you're reading from - const FstHeader *header; // Pointer to Fst header. If non-zero, use - // this info (don't read a stream header) - const SymbolTable* isymbols; // Pointer to input symbols. If non-zero, use - // this info (read and skip stream isymbols) - const SymbolTable* osymbols; // Pointer to output symbols. If non-zero, use - // this info (read and skip stream osymbols) - FileReadMode mode; // Read or map files (advisory, if possible) - - explicit FstReadOptions(const string& src = "<unspecified>", - const FstHeader *hdr = 0, - const SymbolTable* isym = 0, - const SymbolTable* osym = 0); - - explicit FstReadOptions(const string& src, - const SymbolTable* isym, - const SymbolTable* osym = 0); - - // Helper function to convert strings FileReadModes into their enum value. - static FileReadMode ReadMode(const string &mode); -}; - -struct FstWriteOptions { - string source; // Where you're writing to - bool write_header; // Write the header? - bool write_isymbols; // Write input symbols? - bool write_osymbols; // Write output symbols? - bool align; // Write data aligned where appropriate; - // this may fail on pipes - - explicit FstWriteOptions(const string& src = "<unspecifed>", - bool hdr = true, bool isym = true, - bool osym = true, bool alig = FLAGS_fst_align) - : source(src), write_header(hdr), - write_isymbols(isym), write_osymbols(osym), align(alig) {} -}; - -// -// Fst HEADER CLASS -// -// This is the recommended Fst file header representation. -// -class FstHeader { - public: - enum { - HAS_ISYMBOLS = 0x1, // Has input symbol table - HAS_OSYMBOLS = 0x2, // Has output symbol table - IS_ALIGNED = 0x4, // Memory-aligned (where appropriate) - } Flags; - - FstHeader() : version_(0), flags_(0), properties_(0), start_(-1), - numstates_(0), numarcs_(0) {} - const string &FstType() const { return fsttype_; } - const string &ArcType() const { return arctype_; } - int32 Version() const { return version_; } - int32 GetFlags() const { return flags_; } - uint64 Properties() const { return properties_; } - int64 Start() const { return start_; } - int64 NumStates() const { return numstates_; } - int64 NumArcs() const { return numarcs_; } - - void SetFstType(const string& type) { fsttype_ = type; } - void SetArcType(const string& type) { arctype_ = type; } - void SetVersion(int32 version) { version_ = version; } - void SetFlags(int32 flags) { flags_ = flags; } - void SetProperties(uint64 properties) { properties_ = properties; } - void SetStart(int64 start) { start_ = start; } - void SetNumStates(int64 numstates) { numstates_ = numstates; } - void SetNumArcs(int64 numarcs) { numarcs_ = numarcs; } - - bool Read(istream &strm, const string &source, bool rewind = false); - bool Write(ostream &strm, const string &source) const; - - private: - - string fsttype_; // E.g. "vector" - string arctype_; // E.g. "standard" - int32 version_; // Type version # - int32 flags_; // File format bits - uint64 properties_; // FST property bits - int64 start_; // Start state - int64 numstates_; // # of states - int64 numarcs_; // # of arcs -}; - - -// Specifies matcher action. -enum MatchType { MATCH_INPUT, // Match input label. - MATCH_OUTPUT, // Match output label. - MATCH_BOTH, // Match input or output label. - MATCH_NONE, // Match nothing. - MATCH_UNKNOWN }; // Match type unknown. - -// -// Fst INTERFACE CLASS DEFINITION -// - -// A generic FST, templated on the arc definition, with -// common-demoninator methods (use StateIterator and ArcIterator to -// iterate over its states and arcs). -template <class A> -class Fst { - public: - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - virtual ~Fst() {} - - virtual StateId Start() const = 0; // Initial state - - virtual Weight Final(StateId) const = 0; // State's final weight - - virtual size_t NumArcs(StateId) const = 0; // State's arc count - - virtual size_t NumInputEpsilons(StateId) - const = 0; // State's input epsilon count - - virtual size_t NumOutputEpsilons(StateId) - const = 0; // State's output epsilon count - - // If test=false, return stored properties bits for mask (some poss. unknown) - // If test=true, return property bits for mask (computing o.w. unknown) - virtual uint64 Properties(uint64 mask, bool test) - const = 0; // Property bits - - virtual const string& Type() const = 0; // Fst type name - - // Get a copy of this Fst. The copying behaves as follows: - // - // (1) The copying is constant time if safe = false or if safe = true - // and is on an otherwise unaccessed Fst. - // - // (2) If safe = true, the copy is thread-safe in that the original - // and copy can be safely accessed (but not necessarily mutated) by - // separate threads. For some Fst types, 'Copy(true)' should only be - // called on an Fst that has not otherwise been accessed. Its behavior - // is undefined otherwise. - // - // (3) If a MutableFst is copied and then mutated, then the original is - // unmodified and vice versa (often by a copy-on-write on the initial - // mutation, which may not be constant time). - virtual Fst<A> *Copy(bool safe = false) const = 0; - - // Read an Fst from an input stream; returns NULL on error - static Fst<A> *Read(istream &strm, const FstReadOptions &opts) { - FstReadOptions ropts(opts); - FstHeader hdr; - if (ropts.header) - hdr = *opts.header; - else { - if (!hdr.Read(strm, opts.source)) - return 0; - ropts.header = &hdr; - } - FstRegister<A> *registr = FstRegister<A>::GetRegister(); - const typename FstRegister<A>::Reader reader = - registr->GetReader(hdr.FstType()); - if (!reader) { - LOG(ERROR) << "Fst::Read: Unknown FST type \"" << hdr.FstType() - << "\" (arc type = \"" << A::Type() - << "\"): " << ropts.source; - return 0; - } - return reader(strm, ropts); - }; - - // Read an Fst from a file; return NULL on error - // Empty filename reads from standard input - static Fst<A> *Read(const string &filename) { - if (!filename.empty()) { - ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); - if (!strm) { - LOG(ERROR) << "Fst::Read: Can't open file: " << filename; - return 0; - } - return Read(strm, FstReadOptions(filename)); - } else { - return Read(cin, FstReadOptions("standard input")); - } - } - - // Write an Fst to an output stream; return false on error - virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { - LOG(ERROR) << "Fst::Write: No write stream method for " << Type() - << " Fst type"; - return false; - } - - // Write an Fst to a file; return false on error - // Empty filename writes to standard output - virtual bool Write(const string &filename) const { - LOG(ERROR) << "Fst::Write: No write filename method for " << Type() - << " Fst type"; - return false; - } - - // Return input label symbol table; return NULL if not specified - virtual const SymbolTable* InputSymbols() const = 0; - - // Return output label symbol table; return NULL if not specified - virtual const SymbolTable* OutputSymbols() const = 0; - - // For generic state iterator construction; not normally called - // directly by users. - virtual void InitStateIterator(StateIteratorData<A> *) const = 0; - - // For generic arc iterator construction; not normally called - // directly by users. - virtual void InitArcIterator(StateId s, ArcIteratorData<A> *) const = 0; - - // For generic matcher construction; not normally called - // directly by users. - virtual MatcherBase<A> *InitMatcher(MatchType match_type) const; - - protected: - bool WriteFile(const string &filename) const { - if (!filename.empty()) { - ofstream strm(filename.c_str(), ofstream::out | ofstream::binary); - if (!strm) { - LOG(ERROR) << "Fst::Write: Can't open file: " << filename; - return false; - } - return Write(strm, FstWriteOptions(filename)); - } else { - return Write(cout, FstWriteOptions("standard output")); - } - } -}; - - -// -// STATE and ARC ITERATOR DEFINITIONS -// - -// State iterator interface templated on the Arc definition; used -// for StateIterator specializations returned by the InitStateIterator -// Fst method. -template <class A> -class StateIteratorBase { - public: - typedef A Arc; - typedef typename A::StateId StateId; - - virtual ~StateIteratorBase() {} - - bool Done() const { return Done_(); } // End of iterator? - StateId Value() const { return Value_(); } // Current state (when !Done) - void Next() { Next_(); } // Advance to next state (when !Done) - void Reset() { Reset_(); } // Return to initial condition - - private: - // This allows base class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual bool Done_() const = 0; - virtual StateId Value_() const = 0; - virtual void Next_() = 0; - virtual void Reset_() = 0; -}; - - -// StateIterator initialization data - -template <class A> struct StateIteratorData { - StateIteratorBase<A> *base; // Specialized iterator if non-zero - typename A::StateId nstates; // O.w. total # of states -}; - - -// Generic state iterator, templated on the FST definition -// - a wrapper around pointer to specific one. -// Here is a typical use: \code -// for (StateIterator<StdFst> siter(fst); -// !siter.Done(); -// siter.Next()) { -// StateId s = siter.Value(); -// ... -// } \endcode -template <class F> -class StateIterator { - public: - typedef F FST; - typedef typename F::Arc Arc; - typedef typename Arc::StateId StateId; - - explicit StateIterator(const F &fst) : s_(0) { - fst.InitStateIterator(&data_); - } - - ~StateIterator() { if (data_.base) delete data_.base; } - - bool Done() const { - return data_.base ? data_.base->Done() : s_ >= data_.nstates; - } - - StateId Value() const { return data_.base ? data_.base->Value() : s_; } - - void Next() { - if (data_.base) - data_.base->Next(); - else - ++s_; - } - - void Reset() { - if (data_.base) - data_.base->Reset(); - else - s_ = 0; - } - - private: - StateIteratorData<Arc> data_; - StateId s_; - - DISALLOW_COPY_AND_ASSIGN(StateIterator); -}; - - -// Flags to control the behavior on an arc iterator: -static const uint32 kArcILabelValue = 0x0001; // Value() gives valid ilabel -static const uint32 kArcOLabelValue = 0x0002; // " " " olabel -static const uint32 kArcWeightValue = 0x0004; // " " " weight -static const uint32 kArcNextStateValue = 0x0008; // " " " nextstate -static const uint32 kArcNoCache = 0x0010; // No need to cache arcs - -static const uint32 kArcValueFlags = - kArcILabelValue | kArcOLabelValue | - kArcWeightValue | kArcNextStateValue; - -static const uint32 kArcFlags = kArcValueFlags | kArcNoCache; - - -// Arc iterator interface, templated on the Arc definition; used -// for Arc iterator specializations that are returned by the InitArcIterator -// Fst method. -template <class A> -class ArcIteratorBase { - public: - typedef A Arc; - typedef typename A::StateId StateId; - - virtual ~ArcIteratorBase() {} - - bool Done() const { return Done_(); } // End of iterator? - const A& Value() const { return Value_(); } // Current arc (when !Done) - void Next() { Next_(); } // Advance to next arc (when !Done) - size_t Position() const { return Position_(); } // Return current position - void Reset() { Reset_(); } // Return to initial condition - void Seek(size_t a) { Seek_(a); } // Random arc access by position - uint32 Flags() const { return Flags_(); } // Return current behavorial flags - void SetFlags(uint32 flags, uint32 mask) { // Set behavorial flags - SetFlags_(flags, mask); - } - - private: - // This allows base class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual bool Done_() const = 0; - virtual const A& Value_() const = 0; - virtual void Next_() = 0; - virtual size_t Position_() const = 0; - virtual void Reset_() = 0; - virtual void Seek_(size_t a) = 0; - virtual uint32 Flags_() const = 0; - virtual void SetFlags_(uint32 flags, uint32 mask) = 0; -}; - - -// ArcIterator initialization data -template <class A> struct ArcIteratorData { - ArcIteratorBase<A> *base; // Specialized iterator if non-zero - const A *arcs; // O.w. arcs pointer - size_t narcs; // ... and arc count - int *ref_count; // ... and reference count if non-zero -}; - - -// Generic arc iterator, templated on the FST definition -// - a wrapper around pointer to specific one. -// Here is a typical use: \code -// for (ArcIterator<StdFst> aiter(fst, s)); -// !aiter.Done(); -// aiter.Next()) { -// StdArc &arc = aiter.Value(); -// ... -// } \endcode -template <class F> -class ArcIterator { - public: - typedef F FST; - typedef typename F::Arc Arc; - typedef typename Arc::StateId StateId; - - ArcIterator(const F &fst, StateId s) : i_(0) { - fst.InitArcIterator(s, &data_); - } - - explicit ArcIterator(const ArcIteratorData<Arc> &data) : data_(data), i_(0) { - if (data_.ref_count) - ++(*data_.ref_count); - } - - ~ArcIterator() { - if (data_.base) - delete data_.base; - else if (data_.ref_count) - --(*data_.ref_count); - } - - bool Done() const { - return data_.base ? data_.base->Done() : i_ >= data_.narcs; - } - - const Arc& Value() const { - return data_.base ? data_.base->Value() : data_.arcs[i_]; - } - - void Next() { - if (data_.base) - data_.base->Next(); - else - ++i_; - } - - void Reset() { - if (data_.base) - data_.base->Reset(); - else - i_ = 0; - } - - void Seek(size_t a) { - if (data_.base) - data_.base->Seek(a); - else - i_ = a; - } - - size_t Position() const { - return data_.base ? data_.base->Position() : i_; - } - - uint32 Flags() const { - if (data_.base) - return data_.base->Flags(); - else - return kArcValueFlags; - } - - void SetFlags(uint32 flags, uint32 mask) { - if (data_.base) - data_.base->SetFlags(flags, mask); - } - - private: - ArcIteratorData<Arc> data_; - size_t i_; - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - -// -// MATCHER DEFINITIONS -// - -template <class A> -MatcherBase<A> *Fst<A>::InitMatcher(MatchType match_type) const { - return 0; // Use the default matcher -} - - -// -// FST ACCESSORS - Useful functions in high-performance cases. -// - -namespace internal { - -// General case - requires non-abstract, 'final' methods. Use for inlining. -template <class F> inline -typename F::Arc::Weight Final(const F &fst, typename F::Arc::StateId s) { - return fst.F::Final(s); -} - -template <class F> inline -ssize_t NumArcs(const F &fst, typename F::Arc::StateId s) { - return fst.F::NumArcs(s); -} - -template <class F> inline -ssize_t NumInputEpsilons(const F &fst, typename F::Arc::StateId s) { - return fst.F::NumInputEpsilons(s); -} - -template <class F> inline -ssize_t NumOutputEpsilons(const F &fst, typename F::Arc::StateId s) { - return fst.F::NumOutputEpsilons(s); -} - - -// Fst<A> case - abstract methods. -template <class A> inline -typename A::Weight Final(const Fst<A> &fst, typename A::StateId s) { - return fst.Final(s); -} - -template <class A> inline -ssize_t NumArcs(const Fst<A> &fst, typename A::StateId s) { - return fst.NumArcs(s); -} - -template <class A> inline -ssize_t NumInputEpsilons(const Fst<A> &fst, typename A::StateId s) { - return fst.NumInputEpsilons(s); -} - -template <class A> inline -ssize_t NumOutputEpsilons(const Fst<A> &fst, typename A::StateId s) { - return fst.NumOutputEpsilons(s); -} - -} // namespace internal - -// A useful alias when using StdArc. -typedef Fst<StdArc> StdFst; - - -// -// CONSTANT DEFINITIONS -// - -const int kNoStateId = -1; // Not a valid state ID -const int kNoLabel = -1; // Not a valid label - -// -// Fst IMPLEMENTATION BASE -// -// This is the recommended Fst implementation base class. It will -// handle reference counts, property bits, type information and symbols. -// - -template <class A> class FstImpl { - public: - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - FstImpl() - : properties_(0), type_("null"), isymbols_(0), osymbols_(0) {} - - FstImpl(const FstImpl<A> &impl) - : properties_(impl.properties_), type_(impl.type_), - isymbols_(impl.isymbols_ ? impl.isymbols_->Copy() : 0), - osymbols_(impl.osymbols_ ? impl.osymbols_->Copy() : 0) {} - - virtual ~FstImpl() { - delete isymbols_; - delete osymbols_; - } - - const string& Type() const { return type_; } - - void SetType(const string &type) { type_ = type; } - - virtual uint64 Properties() const { return properties_; } - - virtual uint64 Properties(uint64 mask) const { return properties_ & mask; } - - void SetProperties(uint64 props) { - properties_ &= kError; // kError can't be cleared - properties_ |= props; - } - - void SetProperties(uint64 props, uint64 mask) { - properties_ &= ~mask | kError; // kError can't be cleared - properties_ |= props & mask; - } - - // Allows (only) setting error bit on const FST impls - void SetProperties(uint64 props, uint64 mask) const { - if (mask != kError) - FSTERROR() << "FstImpl::SetProperties() const: can only set kError"; - properties_ |= kError; - } - - const SymbolTable* InputSymbols() const { return isymbols_; } - - const SymbolTable* OutputSymbols() const { return osymbols_; } - - SymbolTable* InputSymbols() { return isymbols_; } - - SymbolTable* OutputSymbols() { return osymbols_; } - - void SetInputSymbols(const SymbolTable* isyms) { - if (isymbols_) delete isymbols_; - isymbols_ = isyms ? isyms->Copy() : 0; - } - - void SetOutputSymbols(const SymbolTable* osyms) { - if (osymbols_) delete osymbols_; - osymbols_ = osyms ? osyms->Copy() : 0; - } - - int RefCount() const { - return ref_count_.count(); - } - - int IncrRefCount() { - return ref_count_.Incr(); - } - - int DecrRefCount() { - return ref_count_.Decr(); - } - - // Read-in header and symbols from input stream, initialize Fst, and - // return the header. If opts.header is non-null, skip read-in and - // use the option value. If opts.[io]symbols is non-null, read-in - // (if present), but use the option value. - bool ReadHeader(istream &strm, const FstReadOptions& opts, - int min_version, FstHeader *hdr); - - // Write-out header and symbols from output stream. - // If a opts.header is false, skip writing header. - // If opts.[io]symbols is false, skip writing those symbols. - // This method is needed for Impl's that implement Write methods. - void WriteHeader(ostream &strm, const FstWriteOptions& opts, - int version, FstHeader *hdr) const { - if (opts.write_header) { - hdr->SetFstType(type_); - hdr->SetArcType(A::Type()); - hdr->SetVersion(version); - hdr->SetProperties(properties_); - int32 file_flags = 0; - if (isymbols_ && opts.write_isymbols) - file_flags |= FstHeader::HAS_ISYMBOLS; - if (osymbols_ && opts.write_osymbols) - file_flags |= FstHeader::HAS_OSYMBOLS; - if (opts.align) - file_flags |= FstHeader::IS_ALIGNED; - hdr->SetFlags(file_flags); - hdr->Write(strm, opts.source); - } - if (isymbols_ && opts.write_isymbols) isymbols_->Write(strm); - if (osymbols_ && opts.write_osymbols) osymbols_->Write(strm); - } - - // Write-out header and symbols to output stream. - // If a opts.header is false, skip writing header. - // If opts.[io]symbols is false, skip writing those symbols. - // type is the Fst type being written. - // This method is used in the cross-type serialization methods Fst::WriteFst. - static void WriteFstHeader(const Fst<A> &fst, ostream &strm, - const FstWriteOptions& opts, int version, - const string &type, uint64 properties, - FstHeader *hdr) { - if (opts.write_header) { - hdr->SetFstType(type); - hdr->SetArcType(A::Type()); - hdr->SetVersion(version); - hdr->SetProperties(properties); - int32 file_flags = 0; - if (fst.InputSymbols() && opts.write_isymbols) - file_flags |= FstHeader::HAS_ISYMBOLS; - if (fst.OutputSymbols() && opts.write_osymbols) - file_flags |= FstHeader::HAS_OSYMBOLS; - if (opts.align) - file_flags |= FstHeader::IS_ALIGNED; - hdr->SetFlags(file_flags); - hdr->Write(strm, opts.source); - } - if (fst.InputSymbols() && opts.write_isymbols) { - fst.InputSymbols()->Write(strm); - } - if (fst.OutputSymbols() && opts.write_osymbols) { - fst.OutputSymbols()->Write(strm); - } - } - - // In serialization routines where the header cannot be written until after - // the machine has been serialized, this routine can be called to seek to - // the beginning of the file an rewrite the header with updated fields. - // It repositions the file pointer back at the end of the file. - // returns true on success, false on failure. - static bool UpdateFstHeader(const Fst<A> &fst, ostream &strm, - const FstWriteOptions& opts, int version, - const string &type, uint64 properties, - FstHeader *hdr, size_t header_offset) { - strm.seekp(header_offset); - if (!strm) { - LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source; - return false; - } - WriteFstHeader(fst, strm, opts, version, type, properties, hdr); - if (!strm) { - LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source; - return false; - } - strm.seekp(0, ios_base::end); - if (!strm) { - LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source; - return false; - } - return true; - } - - protected: - mutable uint64 properties_; // Property bits - - private: - string type_; // Unique name of Fst class - SymbolTable *isymbols_; // Ilabel symbol table - SymbolTable *osymbols_; // Olabel symbol table - RefCounter ref_count_; // Reference count - - void operator=(const FstImpl<A> &impl); // disallow -}; - -template <class A> inline -bool FstImpl<A>::ReadHeader(istream &strm, const FstReadOptions& opts, - int min_version, FstHeader *hdr) { - if (opts.header) - *hdr = *opts.header; - else if (!hdr->Read(strm, opts.source)) - return false; - - if (FLAGS_v >= 2) { - LOG(INFO) << "FstImpl::ReadHeader: source: " << opts.source - << ", fst_type: " << hdr->FstType() - << ", arc_type: " << A::Type() - << ", version: " << hdr->Version() - << ", flags: " << hdr->GetFlags(); - } - - if (hdr->FstType() != type_) { - LOG(ERROR) << "FstImpl::ReadHeader: Fst not of type \"" << type_ - << "\": " << opts.source; - return false; - } - if (hdr->ArcType() != A::Type()) { - LOG(ERROR) << "FstImpl::ReadHeader: Arc not of type \"" << A::Type() - << "\": " << opts.source; - return false; - } - if (hdr->Version() < min_version) { - LOG(ERROR) << "FstImpl::ReadHeader: Obsolete " << type_ - << " Fst version: " << opts.source; - return false; - } - properties_ = hdr->Properties(); - if (hdr->GetFlags() & FstHeader::HAS_ISYMBOLS) - isymbols_ = SymbolTable::Read(strm, opts.source); - if (hdr->GetFlags() & FstHeader::HAS_OSYMBOLS) - osymbols_ =SymbolTable::Read(strm, opts.source); - - if (opts.isymbols) { - delete isymbols_; - isymbols_ = opts.isymbols->Copy(); - } - if (opts.osymbols) { - delete osymbols_; - osymbols_ = opts.osymbols->Copy(); - } - return true; -} - - -template<class Arc> -uint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known); - - -// This is a helper class template useful for attaching an Fst interface to -// its implementation, handling reference counting. -template < class I, class F = Fst<typename I::Arc> > -class ImplToFst : public F { - public: - typedef typename I::Arc Arc; - typedef typename Arc::Weight Weight; - typedef typename Arc::StateId StateId; - - virtual ~ImplToFst() { if (!impl_->DecrRefCount()) delete impl_; } - - virtual StateId Start() const { return impl_->Start(); } - - virtual Weight Final(StateId s) const { return impl_->Final(s); } - - virtual size_t NumArcs(StateId s) const { return impl_->NumArcs(s); } - - virtual size_t NumInputEpsilons(StateId s) const { - return impl_->NumInputEpsilons(s); - } - - virtual size_t NumOutputEpsilons(StateId s) const { - return impl_->NumOutputEpsilons(s); - } - - virtual uint64 Properties(uint64 mask, bool test) const { - if (test) { - uint64 knownprops, testprops = TestProperties(*this, mask, &knownprops); - impl_->SetProperties(testprops, knownprops); - return testprops & mask; - } else { - return impl_->Properties(mask); - } - } - - virtual const string& Type() const { return impl_->Type(); } - - virtual const SymbolTable* InputSymbols() const { - return impl_->InputSymbols(); - } - - virtual const SymbolTable* OutputSymbols() const { - return impl_->OutputSymbols(); - } - - protected: - ImplToFst() : impl_(0) {} - - ImplToFst(I *impl) : impl_(impl) {} - - ImplToFst(const ImplToFst<I, F> &fst) { - impl_ = fst.impl_; - impl_->IncrRefCount(); - } - - // This constructor presumes there is a copy constructor for the - // implementation. - ImplToFst(const ImplToFst<I, F> &fst, bool safe) { - if (safe) { - impl_ = new I(*(fst.impl_)); - } else { - impl_ = fst.impl_; - impl_->IncrRefCount(); - } - } - - I *GetImpl() const { return impl_; } - - // Change Fst implementation pointer. If 'own_impl' is true, - // ownership of the input implementation is given to this - // object; otherwise, the input implementation's reference count - // should be incremented. - void SetImpl(I *impl, bool own_impl = true) { - if (!own_impl) - impl->IncrRefCount(); - if (impl_ && !impl_->DecrRefCount()) delete impl_; - impl_ = impl; - } - - private: - // Disallow - ImplToFst<I, F> &operator=(const ImplToFst<I, F> &fst); - - ImplToFst<I, F> &operator=(const Fst<Arc> &fst) { - FSTERROR() << "ImplToFst: Assignment operator disallowed"; - GetImpl()->SetProperties(kError, kError); - return *this; - } - - I *impl_; -}; - - -// Converts FSTs by casting their implementations, where this makes -// sense (which excludes implementations with weight-dependent virtual -// methods). Must be a friend of the Fst classes involved (currently -// the concrete Fsts: VectorFst, ConstFst, CompactFst). -template<class F, class G> void Cast(const F &ifst, G *ofst) { - ofst->SetImpl(reinterpret_cast<typename G::Impl *>(ifst.GetImpl()), false); -} - -// Fst Serialization -template <class A> -void FstToString(const Fst<A> &fst, string *result) { - ostringstream ostrm; - fst.Write(ostrm, FstWriteOptions("FstToString")); - *result = ostrm.str(); -} - -template <class A> -Fst<A> *StringToFst(const string &s) { - istringstream istrm(s); - return Fst<A>::Read(istrm, FstReadOptions("StringToFst")); -} - -} // namespace fst - -#endif // FST_LIB_FST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/fstlib.h b/kaldi_io/src/tools/openfst/include/fst/fstlib.h deleted file mode 100644 index de5976d..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/fstlib.h +++ /dev/null @@ -1,153 +0,0 @@ -// fstlib.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \page FstLib FST - Weighted Finite State Transducers -// This is a library for constructing, combining, optimizing, and -// searching "weighted finite-state transducers" (FSTs). Weighted -// finite-state transducers are automata where each transition has an -// input label, an output label, and a weight. The more familiar -// finite-state acceptor is represented as a transducer with each -// transition's input and output the same. Finite-state acceptors -// are used to represent sets of strings (specifically, "regular" or -// "rational sets"); finite-state transducers are used to represent -// binary relations between pairs of strings (specifically, "rational -// transductions"). The weights can be used to represent the cost of -// taking a particular transition. -// -// In this library, the transducers are templated on the Arc -// (transition) definition, which allows changing the label, weight, -// and state ID sets. Labels and state IDs are restricted to signed -// integral types but the weight can be an arbitrary type whose -// members satisfy certain algebraic ("semiring") properties. -// -// For more information, see the FST Library Wiki page: -// http://wiki.corp.google.com/twiki/bin/view/Main/FstLibrary - -// \file -// This convenience file includes all other FST inl.h files. -// - -#ifndef FST_LIB_FSTLIB_H__ -#define FST_LIB_FSTLIB_H__ - - -// Abstract FST classes -#include <fst/fst.h> -#include <fst/expanded-fst.h> -#include <fst/mutable-fst.h> - -// Concrete FST classes -#include <fst/compact-fst.h> -#include <fst/const-fst.h> -#include <fst/edit-fst.h> -#include <fst/vector-fst.h> - -// FST algorithms and delayed FST classes -#include <fst/arcsort.h> -#include <fst/arc-map.h> -#include <fst/closure.h> -#include <fst/compose.h> -#include <fst/concat.h> -#include <fst/connect.h> -#include <fst/determinize.h> -#include <fst/difference.h> -#include <fst/encode.h> -#include <fst/epsnormalize.h> -#include <fst/equal.h> -#include <fst/equivalent.h> -#include <fst/factor-weight.h> -#include <fst/intersect.h> -#include <fst/invert.h> -#include <fst/map.h> -#include <fst/minimize.h> -#include <fst/project.h> -#include <fst/prune.h> -#include <fst/push.h> -#include <fst/randequivalent.h> -#include <fst/randgen.h> -#include <fst/rational.h> -#include <fst/relabel.h> -#include <fst/replace.h> -#include <fst/replace-util.h> -#include <fst/reverse.h> -#include <fst/reweight.h> -#include <fst/rmepsilon.h> -#include <fst/rmfinalepsilon.h> -#include <fst/shortest-distance.h> -#include <fst/shortest-path.h> -#include <fst/statesort.h> -#include <fst/state-map.h> -#include <fst/synchronize.h> -#include <fst/topsort.h> -#include <fst/union.h> -#include <fst/verify.h> -#include <fst/visit.h> - -// Weights -#include <fst/weight.h> -#include <fst/expectation-weight.h> -#include <fst/float-weight.h> -#include <fst/lexicographic-weight.h> -#include <fst/pair-weight.h> -#include <fst/power-weight.h> -#include <fst/product-weight.h> -#include <fst/random-weight.h> -#include <fst/signed-log-weight.h> -#include <fst/sparse-power-weight.h> -#include <fst/sparse-tuple-weight.h> -#include <fst/string-weight.h> -#include <fst/tuple-weight.h> - -// Auxiliary classes for composition -#include <fst/compose-filter.h> -#include <fst/lookahead-filter.h> -#include <fst/lookahead-matcher.h> -#include <fst/matcher-fst.h> -#include <fst/matcher.h> -#include <fst/state-table.h> - -// Data structures -#include <fst/heap.h> -#include <fst/interval-set.h> -#include <fst/queue.h> -#include <fst/union-find.h> - -// Miscellaneous -#include <fst/accumulator.h> -#include <fst/add-on.h> -#include <fst/arc.h> -#include <fst/arcfilter.h> -#include <fst/cache.h> -#include <fst/complement.h> -#include <fst/dfs-visit.h> -#include <fst/generic-register.h> -#include <fst/label-reachable.h> -#include <fst/partition.h> -#include <fst/properties.h> -#include <fst/register.h> -#include <fst/state-reachable.h> -#include <iostream> -#include <fstream> -#include <sstream> -#include <fst/string.h> -#include <fst/symbol-table.h> -#include <fst/symbol-table-ops.h> -#include <fst/test-properties.h> -#include <fst/util.h> - - -#endif // FST_LIB_FSTLIB_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/generic-register.h b/kaldi_io/src/tools/openfst/include/fst/generic-register.h deleted file mode 100644 index 4f8b512..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/generic-register.h +++ /dev/null @@ -1,159 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_LIB_GENERIC_REGISTER_H_ -#define FST_LIB_GENERIC_REGISTER_H_ - -#include <map> -#include <string> - -#include <fst/compat.h> -#include <fst/types.h> - -// Generic class representing a globally-stored correspondence between -// objects of KeyType and EntryType. -// KeyType must: -// a) be such as can be stored as a key in a map<> -// b) be concatenable with a const char* with the + operator -// (or you must subclass and redefine LoadEntryFromSharedObject) -// EntryType must be default constructible. -// -// The third template parameter should be the type of a subclass of this class -// (think CRTP). This is to allow GetRegister() to instantiate and return -// an object of the appropriate type. - -namespace fst { - -template<class KeyType, class EntryType, class RegisterType> -class GenericRegister { - public: - typedef KeyType Key; - typedef EntryType Entry; - - static RegisterType *GetRegister() { - FstOnceInit(®ister_init_, - &RegisterType::Init); - - return register_; - } - - void SetEntry(const KeyType &key, - const EntryType &entry) { - MutexLock l(register_lock_); - - register_table_.insert(make_pair(key, entry)); - } - - EntryType GetEntry(const KeyType &key) const { - const EntryType *entry = LookupEntry(key); - if (entry) { - return *entry; - } else { - return LoadEntryFromSharedObject(key); - } - } - - virtual ~GenericRegister() { } - - protected: - // Override this if you want to be able to load missing definitions from - // shared object files. - virtual EntryType LoadEntryFromSharedObject(const KeyType &key) const { - string so_filename = ConvertKeyToSoFilename(key); - - void *handle = dlopen(so_filename.c_str(), RTLD_LAZY); - if (handle == 0) { - LOG(ERROR) << "GenericRegister::GetEntry : " << dlerror(); - return EntryType(); - } - - // We assume that the DSO constructs a static object in its global - // scope that does the registration. Thus we need only load it, not - // call any methods. - const EntryType *entry = this->LookupEntry(key); - if (entry == 0) { - LOG(ERROR) << "GenericRegister::GetEntry : " - << "lookup failed in shared object: " << so_filename; - return EntryType(); - } - return *entry; - } - - // Override this to define how to turn a key into an SO filename. - virtual string ConvertKeyToSoFilename(const KeyType& key) const = 0; - - virtual const EntryType *LookupEntry( - const KeyType &key) const { - MutexLock l(register_lock_); - - typename RegisterMapType::const_iterator it = register_table_.find(key); - - if (it != register_table_.end()) { - return &it->second; - } else { - return 0; - } - } - - private: - typedef map<KeyType, EntryType> RegisterMapType; - - static void Init() { - register_lock_ = new Mutex; - register_ = new RegisterType; - } - - static FstOnceType register_init_; - static Mutex *register_lock_; - static RegisterType *register_; - - RegisterMapType register_table_; -}; - -template<class KeyType, class EntryType, class RegisterType> -FstOnceType GenericRegister<KeyType, EntryType, - RegisterType>::register_init_ = FST_ONCE_INIT; - -template<class KeyType, class EntryType, class RegisterType> -Mutex *GenericRegister<KeyType, EntryType, RegisterType>::register_lock_ = 0; - -template<class KeyType, class EntryType, class RegisterType> -RegisterType *GenericRegister<KeyType, EntryType, RegisterType>::register_ = 0; - -// -// GENERIC REGISTRATION -// - -// Generic register-er class capable of creating new register entries in the -// given RegisterType template parameter. This type must define types Key -// and Entry, and have appropriate static GetRegister() and instance -// SetEntry() functions. An easy way to accomplish this is to have RegisterType -// be the type of a subclass of GenericRegister. -template<class RegisterType> -class GenericRegisterer { - public: - typedef typename RegisterType::Key Key; - typedef typename RegisterType::Entry Entry; - - GenericRegisterer(Key key, Entry entry) { - RegisterType *reg = RegisterType::GetRegister(); - reg->SetEntry(key, entry); - } -}; - -} // namespace fst - -#endif // FST_LIB_GENERIC_REGISTER_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/heap.h b/kaldi_io/src/tools/openfst/include/fst/heap.h deleted file mode 100644 index a7affbd..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/heap.h +++ /dev/null @@ -1,206 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// All Rights Reserved. -// Author: Johan Schalkwyk ([email protected]) -// -// \file -// Implementation of a heap as in STL, but allows tracking positions -// in heap using a key. The key can be used to do an in-place update of -// values in the heap. - -#ifndef FST_LIB_HEAP_H__ -#define FST_LIB_HEAP_H__ - -#include <vector> -using std::vector; -#include <functional> - -#include <fst/compat.h> -namespace fst { - -// -// \class Heap -// \brief A templated heap implementation that support in-place update -// of values. -// -// The templated heap implementation is a little different from the -// STL priority_queue and the *_heap operations in STL. This heap -// supports indexing of values in the heap via an associated key. -// -// Each value is internally associated with a key which is returned -// to the calling functions on heap insert. This key can be used -// to later update the specific value in the heap. -// -// \param T the element type of the hash, can be POD, Data or Ptr to Data -// \param Compare Comparison class for determiningg min-heapness. -// \param whether heap top should be max or min element w.r.t. Compare -// - -static const int kNoKey = -1; -template <class T, class Compare, bool max> -class Heap { - public: - - // Initialize with a specific comparator - Heap(Compare comp) : comp_(comp), size_(0) { } - - // Create a heap with initial size of internal arrays of 0 - Heap() : size_(0) { } - - ~Heap() { } - - // Insert a value into the heap - int Insert(const T& val) { - if (size_ < A_.size()) { - A_[size_] = val; - pos_[key_[size_]] = size_; - } else { - A_.push_back(val); - pos_.push_back(size_); - key_.push_back(size_); - } - - ++size_; - return Insert(val, size_ - 1); - } - - // Update a value at position given by the key. The pos array is first - // indexed by the key. The position gives the position in the heap array. - // Once we have the position we can then use the standard heap operations - // to calculate the parent and child positions. - void Update(int key, const T& val) { - int i = pos_[key]; - if (Better(val, A_[Parent(i)])) { - Insert(val, i); - } else { - A_[i] = val; - Heapify(i); - } - } - - // Return the greatest (max=true) / least (max=false) value w.r.t. - // from the heap. - T Pop() { - T top = A_[0]; - - Swap(0, size_-1); - size_--; - Heapify(0); - return top; - } - - // Return the greatest (max=true) / least (max=false) value w.r.t. - // comp object from the heap. - T Top() const { - return A_[0]; - } - - // Check if the heap is empty - bool Empty() const { - return size_ == 0; - } - - void Clear() { - size_ = 0; - } - - - // - // The following protected routines are used in a supportive role - // for managing the heap and keeping the heap properties. - // - private: - // Compute left child of parent - int Left(int i) { - return 2*(i+1)-1; // 0 -> 1, 1 -> 3 - } - - // Compute right child of parent - int Right(int i) { - return 2*(i+1); // 0 -> 2, 1 -> 4 - } - - // Given a child compute parent - int Parent(int i) { - return (i-1)/2; // 1 -> 0, 2 -> 0, 3 -> 1, 4-> 1 - } - - // Swap a child, parent. Use to move element up/down tree. - // Note a little tricky here. When we swap we need to swap: - // the value - // the associated keys - // the position of the value in the heap - void Swap(int j, int k) { - int tkey = key_[j]; - pos_[key_[j] = key_[k]] = j; - pos_[key_[k] = tkey] = k; - - T val = A_[j]; - A_[j] = A_[k]; - A_[k] = val; - } - - // Returns the greater (max=true) / least (max=false) of two - // elements. - bool Better(const T& x, const T& y) { - return max ? comp_(y, x) : comp_(x, y); - } - - // Heapify subtree rooted at index i. - void Heapify(int i) { - int l = Left(i); - int r = Right(i); - int largest; - - if (l < size_ && Better(A_[l], A_[i]) ) - largest = l; - else - largest = i; - - if (r < size_ && Better(A_[r], A_[largest]) ) - largest = r; - - if (largest != i) { - Swap(i, largest); - Heapify(largest); - } - } - - - // Insert (update) element at subtree rooted at index i - int Insert(const T& val, int i) { - int p; - while (i > 0 && !Better(A_[p = Parent(i)], val)) { - Swap(i, p); - i = p; - } - - return key_[i]; - } - - private: - Compare comp_; - - vector<int> pos_; - vector<int> key_; - vector<T> A_; - int size_; - - // DISALLOW_COPY_AND_ASSIGN(Heap); -}; - -} // namespace fst - -#endif // FST_LIB_HEAP_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/icu.h b/kaldi_io/src/tools/openfst/include/fst/icu.h deleted file mode 100644 index 3947716..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/icu.h +++ /dev/null @@ -1,116 +0,0 @@ -// icu.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jeffrey Sorensen) -// [email protected] (Fredrik Roubert) -// -// This library implements an unrestricted Thompson/Pike UTF-8 parser and -// serializer. UTF-8 is a restricted subset of this byte stream encoding. See -// http://en.wikipedia.org/wiki/UTF-8 for a good description of the encoding -// details. - -#ifndef FST_LIB_ICU_H_ -#define FST_LIB_ICU_H_ - -#include <iostream> -#include <fstream> -#include <sstream> - -namespace fst { - -template <class Label> -bool UTF8StringToLabels(const string &str, vector<Label> *labels) { - const char *data = str.data(); - size_t length = str.size(); - for (int i = 0; i < length; /* no update */) { - int c = data[i++] & 0xff; - if ((c & 0x80) == 0) { - labels->push_back(c); - } else { - if ((c & 0xc0) == 0x80) { - LOG(ERROR) << "UTF8StringToLabels: continuation byte as lead byte"; - return false; - } - int count = (c >= 0xc0) + (c >= 0xe0) + (c >= 0xf0) + (c >= 0xf8) + - (c >= 0xfc); - int code = c & ((1 << (6 - count)) - 1); - while (count != 0) { - if (i == length) { - LOG(ERROR) << "UTF8StringToLabels: truncated utf-8 byte sequence"; - return false; - } - char cb = data[i++]; - if ((cb & 0xc0) != 0x80) { - LOG(ERROR) << "UTF8StringToLabels: missing/invalid continuation byte"; - return false; - } - code = (code << 6) | (cb & 0x3f); - count--; - } - if (code < 0) { - // This should not be able to happen. - LOG(ERROR) << "UTF8StringToLabels: Invalid character found: " << c; - return false; - } - labels->push_back(code); - } - } - return true; -} - -template <class Label> -bool LabelsToUTF8String(const vector<Label> &labels, string *str) { - ostringstream ostr; - for (size_t i = 0; i < labels.size(); ++i) { - int32_t code = labels[i]; - if (code < 0) { - LOG(ERROR) << "LabelsToUTF8String: Invalid character found: " << code; - return false; - } else if (code < 0x80) { - ostr << static_cast<char>(code); - } else if (code < 0x800) { - ostr << static_cast<char>((code >> 6) | 0xc0); - ostr << static_cast<char>((code & 0x3f) | 0x80); - } else if (code < 0x10000) { - ostr << static_cast<char>((code >> 12) | 0xe0); - ostr << static_cast<char>(((code >> 6) & 0x3f) | 0x80); - ostr << static_cast<char>((code & 0x3f) | 0x80); - } else if (code < 0x200000) { - ostr << static_cast<char>((code >> 18) | 0xf0); - ostr << static_cast<char>(((code >> 12) & 0x3f) | 0x80); - ostr << static_cast<char>(((code >> 6) & 0x3f) | 0x80); - ostr << static_cast<char>((code & 0x3f) | 0x80); - } else if (code < 0x4000000) { - ostr << static_cast<char>((code >> 24) | 0xf8); - ostr << static_cast<char>(((code >> 18) & 0x3f) | 0x80); - ostr << static_cast<char>(((code >> 12) & 0x3f) | 0x80); - ostr << static_cast<char>(((code >> 6) & 0x3f) | 0x80); - ostr << static_cast<char>((code & 0x3f) | 0x80); - } else { - ostr << static_cast<char>((code >> 30) | 0xfc); - ostr << static_cast<char>(((code >> 24) & 0x3f) | 0x80); - ostr << static_cast<char>(((code >> 18) & 0x3f) | 0x80); - ostr << static_cast<char>(((code >> 12) & 0x3f) | 0x80); - ostr << static_cast<char>(((code >> 6) & 0x3f) | 0x80); - ostr << static_cast<char>((code & 0x3f) | 0x80); - } - } - *str = ostr.str(); - return true; -} - -} // namespace fst - -#endif // FST_LIB_ICU_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/intersect.h b/kaldi_io/src/tools/openfst/include/fst/intersect.h deleted file mode 100644 index f46116f..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/intersect.h +++ /dev/null @@ -1,172 +0,0 @@ -// intersect.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Class to compute the intersection of two FSAs - -#ifndef FST_LIB_INTERSECT_H__ -#define FST_LIB_INTERSECT_H__ - -#include <algorithm> -#include <vector> -using std::vector; - -#include <fst/cache.h> -#include <fst/compose.h> - - -namespace fst { - -template <class A, - class M = Matcher<Fst<A> >, - class F = SequenceComposeFilter<M>, - class T = GenericComposeStateTable<A, typename F::FilterState> > -struct IntersectFstOptions : public ComposeFstOptions<A, M, F, T> { - explicit IntersectFstOptions(const CacheOptions &opts, - M *mat1 = 0, M *mat2 = 0, - F *filt = 0, T *sttable= 0) - : ComposeFstOptions<A, M, F, T>(opts, mat1, mat2, filt, sttable) { } - - IntersectFstOptions() {} -}; - -// Computes the intersection (Hadamard product) of two FSAs. This -// version is a delayed Fst. Only strings that are in both automata -// are retained in the result. -// -// The two arguments must be acceptors. One of the arguments must be -// label-sorted. -// -// Complexity: same as ComposeFst. -// -// Caveats: same as ComposeFst. -template <class A> -class IntersectFst : public ComposeFst<A> { - public: - using ComposeFst<A>::CreateBase; - using ComposeFst<A>::CreateBase1; - using ComposeFst<A>::Properties; - using ImplToFst< ComposeFstImplBase<A> >::GetImpl; - using ImplToFst< ComposeFstImplBase<A> >::SetImpl; - - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - IntersectFst(const Fst<A> &fst1, const Fst<A> &fst2, - const CacheOptions opts = CacheOptions()) { - bool acceptors = fst1.Properties(kAcceptor, true) && - fst2.Properties(kAcceptor, true); - SetImpl(CreateBase(fst1, fst2, opts)); - if (!acceptors) { - FSTERROR() << "IntersectFst: input FSTs are not acceptors"; - GetImpl()->SetProperties(kError); - } - } - - template <class M, class F, class T> - IntersectFst(const Fst<A> &fst1, const Fst<A> &fst2, - const IntersectFstOptions<A, M, F, T> &opts) { - bool acceptors = fst1.Properties(kAcceptor, true) && - fst2.Properties(kAcceptor, true); - SetImpl(CreateBase1(fst1, fst2, opts)); - if (!acceptors) { - FSTERROR() << "IntersectFst: input FSTs are not acceptors"; - GetImpl()->SetProperties(kError); - } - } - - // See Fst<>::Copy() for doc. - IntersectFst(const IntersectFst<A> &fst, bool safe = false) : - ComposeFst<A>(fst, safe) {} - - // Get a copy of this IntersectFst. See Fst<>::Copy() for further doc. - virtual IntersectFst<A> *Copy(bool safe = false) const { - return new IntersectFst<A>(*this, safe); - } -}; - - -// Specialization for IntersectFst. -template <class A> -class StateIterator< IntersectFst<A> > - : public StateIterator< ComposeFst<A> > { - public: - explicit StateIterator(const IntersectFst<A> &fst) - : StateIterator< ComposeFst<A> >(fst) {} -}; - - -// Specialization for IntersectFst. -template <class A> -class ArcIterator< IntersectFst<A> > - : public ArcIterator< ComposeFst<A> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const IntersectFst<A> &fst, StateId s) - : ArcIterator< ComposeFst<A> >(fst, s) {} -}; - -// Useful alias when using StdArc. -typedef IntersectFst<StdArc> StdIntersectFst; - - -typedef ComposeOptions IntersectOptions; - - -// Computes the intersection (Hadamard product) of two FSAs. This -// version writes the intersection to an output MurableFst. Only -// strings that are in both automata are retained in the result. -// -// The two arguments must be acceptors. One of the arguments must be -// label-sorted. -// -// Complexity: same as Compose. -// -// Caveats: same as Compose. -template<class Arc> -void Intersect(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2, - MutableFst<Arc> *ofst, - const IntersectOptions &opts = IntersectOptions()) { - typedef Matcher< Fst<Arc> > M; - - if (opts.filter_type == AUTO_FILTER) { - CacheOptions nopts; - nopts.gc_limit = 0; // Cache only the last state for fastest copy. - *ofst = IntersectFst<Arc>(ifst1, ifst2, nopts); - } else if (opts.filter_type == SEQUENCE_FILTER) { - IntersectFstOptions<Arc> iopts; - iopts.gc_limit = 0; // Cache only the last state for fastest copy. - *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts); - } else if (opts.filter_type == ALT_SEQUENCE_FILTER) { - IntersectFstOptions<Arc, M, AltSequenceComposeFilter<M> > iopts; - iopts.gc_limit = 0; // Cache only the last state for fastest copy. - *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts); - } else if (opts.filter_type == MATCH_FILTER) { - IntersectFstOptions<Arc, M, MatchComposeFilter<M> > iopts; - iopts.gc_limit = 0; // Cache only the last state for fastest copy. - *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts); - } - - if (opts.connect) - Connect(ofst); -} - -} // namespace fst - -#endif // FST_LIB_INTERSECT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/interval-set.h b/kaldi_io/src/tools/openfst/include/fst/interval-set.h deleted file mode 100644 index 58cad44..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/interval-set.h +++ /dev/null @@ -1,381 +0,0 @@ -// interval-set.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Class to represent and operate on sets of intervals. - -#ifndef FST_LIB_INTERVAL_SET_H__ -#define FST_LIB_INTERVAL_SET_H__ - -#include <iostream> -#include <vector> -using std::vector; - - -#include <fst/util.h> - - -namespace fst { - -// Stores and operates on a set of half-open integral intervals [a,b) -// of signed integers of type T. -template <typename T> -class IntervalSet { - public: - struct Interval { - T begin_; - T end_; - - Interval() : begin_(-1), end_(-1) {} - - Interval(T b, T e) : begin_(b), end_(e) {} - - bool operator<(const Interval &i) const { - return begin_ < i.begin_ || (begin_ == i.begin_ && end_ > i.end_); - } - - bool operator==(const Interval &i) const { - return begin_ == i.begin_ && end_ == i.end_; - } - - bool operator!=(const Interval &i) const { - return begin_ != i.begin_ || end_ != i.end_; - } - - istream &Read(istream &strm) { - T n; - ReadType(strm, &n); - begin_ = n; - ReadType(strm, &n); - end_ = n; - return strm; - } - - ostream &Write(ostream &strm) const { - T n = begin_; - WriteType(strm, n); - n = end_; - WriteType(strm, n); - return strm; - } - }; - - IntervalSet() : count_(-1) {} - - // Returns the interval set as a vector. - vector<Interval> *Intervals() { return &intervals_; } - - const vector<Interval> *Intervals() const { return &intervals_; } - - bool Empty() const { return intervals_.empty(); } - - T Size() const { return intervals_.size(); } - - // Number of points in the intervals (undefined if not normalized). - T Count() const { return count_; } - - void Clear() { - intervals_.clear(); - count_ = 0; - } - - // Adds an interval set to the set. The result may not be normalized. - void Union(const IntervalSet<T> &iset) { - const vector<Interval> *intervals = iset.Intervals(); - for (typename vector<Interval>::const_iterator it = intervals->begin(); - it != intervals->end(); ++it) - intervals_.push_back(*it); - } - - // Requires intervals be normalized. - bool Member(T value) const { - Interval interval(value, value); - typename vector<Interval>::const_iterator lb = - lower_bound(intervals_.begin(), intervals_.end(), interval); - if (lb == intervals_.begin()) - return false; - return (--lb)->end_ > value; - } - - // Requires intervals be normalized. - bool operator==(const IntervalSet<T>& iset) const { - return *(iset.Intervals()) == intervals_; - } - - // Requires intervals be normalized. - bool operator!=(const IntervalSet<T>& iset) const { - return *(iset.Intervals()) != intervals_; - } - - bool Singleton() const { - return intervals_.size() == 1 && - intervals_[0].begin_ + 1 == intervals_[0].end_; - } - - - // Sorts; collapses overlapping and adjacent interals; sets count. - void Normalize(); - - // Intersects an interval set with the set. Requires intervals be - // normalized. The result is normalized. - void Intersect(const IntervalSet<T> &iset, IntervalSet<T> *oset) const; - - // Complements the set w.r.t [0, maxval). Requires intervals be - // normalized. The result is normalized. - void Complement(T maxval, IntervalSet<T> *oset) const; - - // Subtract an interval set from the set. Requires intervals be - // normalized. The result is normalized. - void Difference(const IntervalSet<T> &iset, IntervalSet<T> *oset) const; - - // Determines if an interval set overlaps with the set. Requires - // intervals be normalized. - bool Overlaps(const IntervalSet<T> &iset) const; - - // Determines if an interval set overlaps with the set but neither - // is contained in the other. Requires intervals be normalized. - bool StrictlyOverlaps(const IntervalSet<T> &iset) const; - - // Determines if an interval set is contained within the set. Requires - // intervals be normalized. - bool Contains(const IntervalSet<T> &iset) const; - - istream &Read(istream &strm) { - ReadType(strm, &intervals_); - return ReadType(strm, &count_); - } - - ostream &Write(ostream &strm) const { - WriteType(strm, intervals_); - return WriteType(strm, count_); - } - - private: - vector<Interval> intervals_; - T count_; -}; - -// Sorts; collapses overlapping and adjacent interavls; sets count. -template <typename T> -void IntervalSet<T>::Normalize() { - sort(intervals_.begin(), intervals_.end()); - - count_ = 0; - T size = 0; - for (T i = 0; i < intervals_.size(); ++i) { - Interval &inti = intervals_[i]; - if (inti.begin_ == inti.end_) - continue; - for (T j = i + 1; j < intervals_.size(); ++j) { - Interval &intj = intervals_[j]; - if (intj.begin_ > inti.end_) - break; - if (intj.end_ > inti.end_) - inti.end_ = intj.end_; - ++i; - } - count_ += inti.end_ - inti.begin_; - intervals_[size++] = inti; - } - intervals_.resize(size); -} - -// Intersects an interval set with the set. Requires intervals be normalized. -// The result is normalized. -template <typename T> -void IntervalSet<T>::Intersect(const IntervalSet<T> &iset, - IntervalSet<T> *oset) const { - const vector<Interval> *iintervals = iset.Intervals(); - vector<Interval> *ointervals = oset->Intervals(); - typename vector<Interval>::const_iterator it1 = intervals_.begin(); - typename vector<Interval>::const_iterator it2 = iintervals->begin(); - - ointervals->clear(); - oset->count_ = 0; - - while (it1 != intervals_.end() && it2 != iintervals->end()) { - if (it1->end_ <= it2->begin_) { - ++it1; - } else if (it2->end_ <= it1->begin_) { - ++it2; - } else { - Interval interval; - interval.begin_ = max(it1->begin_, it2->begin_); - interval.end_ = min(it1->end_, it2->end_); - ointervals->push_back(interval); - oset->count_ += interval.end_ - interval.begin_; - if (it1->end_ < it2->end_) - ++it1; - else - ++it2; - } - } -} - -// Complements the set w.r.t [0, maxval). Requires intervals be normalized. -// The result is normalized. -template <typename T> -void IntervalSet<T>::Complement(T maxval, IntervalSet<T> *oset) const { - vector<Interval> *ointervals = oset->Intervals(); - ointervals->clear(); - oset->count_ = 0; - - Interval interval; - interval.begin_ = 0; - for (typename vector<Interval>::const_iterator it = intervals_.begin(); - it != intervals_.end(); - ++it) { - interval.end_ = min(it->begin_, maxval); - if (interval.begin_ < interval.end_) { - ointervals->push_back(interval); - oset->count_ += interval.end_ - interval.begin_; - } - interval.begin_ = it->end_; - } - interval.end_ = maxval; - if (interval.begin_ < interval.end_) { - ointervals->push_back(interval); - oset->count_ += interval.end_ - interval.begin_; - } -} - -// Subtract an interval set from the set. Requires intervals be normalized. -// The result is normalized. -template <typename T> -void IntervalSet<T>::Difference(const IntervalSet<T> &iset, - IntervalSet<T> *oset) const { - if (intervals_.empty()) { - oset->Intervals()->clear(); - oset->count_ = 0; - } else { - IntervalSet<T> cset; - iset.Complement(intervals_.back().end_, &cset); - Intersect(cset, oset); - } -} - -// Determines if an interval set overlaps with the set. Requires -// intervals be normalized. -template <typename T> -bool IntervalSet<T>::Overlaps(const IntervalSet<T> &iset) const { - const vector<Interval> *intervals = iset.Intervals(); - typename vector<Interval>::const_iterator it1 = intervals_.begin(); - typename vector<Interval>::const_iterator it2 = intervals->begin(); - - while (it1 != intervals_.end() && it2 != intervals->end()) { - if (it1->end_ <= it2->begin_) { - ++it1; - } else if (it2->end_ <= it1->begin_) { - ++it2; - } else { - return true; - } - } - return false; -} - -// Determines if an interval set overlaps with the set but neither -// is contained in the other. Requires intervals be normalized. -template <typename T> -bool IntervalSet<T>::StrictlyOverlaps(const IntervalSet<T> &iset) const { - const vector<Interval> *intervals = iset.Intervals(); - typename vector<Interval>::const_iterator it1 = intervals_.begin(); - typename vector<Interval>::const_iterator it2 = intervals->begin(); - bool only1 = false; // point in intervals_ but not intervals - bool only2 = false; // point in intervals but not intervals_ - bool overlap = false; // point in both intervals_ and intervals - - while (it1 != intervals_.end() && it2 != intervals->end()) { - if (it1->end_ <= it2->begin_) { // no overlap - it1 first - only1 = true; - ++it1; - } else if (it2->end_ <= it1->begin_) { // no overlap - it2 first - only2 = true; - ++it2; - } else if (it2->begin_ == it1->begin_ && it2->end_ == it1->end_) { // equals - overlap = true; - ++it1; - ++it2; - } else if (it2->begin_ <= it1->begin_ && it2->end_ >= it1->end_) { // 1 c 2 - only2 = true; - overlap = true; - ++it1; - } else if (it1->begin_ <= it2->begin_ && it1->end_ >= it2->end_) { // 2 c 1 - only1 = true; - overlap = true; - ++it2; - } else { // strict overlap - only1 = true; - only2 = true; - overlap = true; - } - if (only1 == true && only2 == true && overlap == true) - return true; - } - if (it1 != intervals_.end()) - only1 = true; - if (it2 != intervals->end()) - only2 = true; - - return only1 == true && only2 == true && overlap == true; -} - -// Determines if an interval set is contained within the set. Requires -// intervals be normalized. -template <typename T> -bool IntervalSet<T>::Contains(const IntervalSet<T> &iset) const { - if (iset.Count() > Count()) - return false; - - const vector<Interval> *intervals = iset.Intervals(); - typename vector<Interval>::const_iterator it1 = intervals_.begin(); - typename vector<Interval>::const_iterator it2 = intervals->begin(); - - while (it1 != intervals_.end() && it2 != intervals->end()) { - if (it1->end_ <= it2->begin_) { // no overlap - it1 first - ++it1; - } else if (it2->begin_ < it1->begin_ || it2->end_ > it1->end_) { // no C - return false; - } else if (it2->end_ == it1->end_) { - ++it1; - ++it2; - } else { - ++it2; - } - } - return it2 == intervals->end(); -} - -template <typename T> -ostream &operator<<(ostream &strm, const IntervalSet<T> &s) { - typedef typename IntervalSet<T>::Interval Interval; - const vector<Interval> *intervals = s.Intervals(); - strm << "{"; - for (typename vector<Interval>::const_iterator it = intervals->begin(); - it != intervals->end(); - ++it) { - if (it != intervals->begin()) - strm << ","; - strm << "[" << it->begin_ << "," << it->end_ << ")"; - } - strm << "}"; - return strm; -} - -} // namespace fst - -#endif // FST_LIB_INTERVAL_SET_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/invert.h b/kaldi_io/src/tools/openfst/include/fst/invert.h deleted file mode 100644 index bc83a5d..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/invert.h +++ /dev/null @@ -1,125 +0,0 @@ -// invert.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Functions and classes to invert an Fst. - -#ifndef FST_LIB_INVERT_H__ -#define FST_LIB_INVERT_H__ - -#include <fst/arc-map.h> -#include <fst/mutable-fst.h> - - -namespace fst { - -// Mapper to implement inversion of an arc. -template <class A> struct InvertMapper { - InvertMapper() {} - - A operator()(const A &arc) { - return A(arc.olabel, arc.ilabel, arc.weight, arc.nextstate); - } - - MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } - - MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; } - - MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;} - - uint64 Properties(uint64 props) { return InvertProperties(props); } -}; - - -// Inverts the transduction corresponding to an FST by exchanging the -// FST's input and output labels. This version modifies its input. -// -// Complexity: -// - Time: O(V + E) -// - Space: O(1) -// where V = # of states and E = # of arcs. -template<class Arc> inline -void Invert(MutableFst<Arc> *fst) { - SymbolTable *input = fst->InputSymbols() ? fst->InputSymbols()->Copy() : 0; - SymbolTable *output = fst->OutputSymbols() ? fst->OutputSymbols()->Copy() : 0; - ArcMap(fst, InvertMapper<Arc>()); - fst->SetInputSymbols(output); - fst->SetOutputSymbols(input); - delete input; - delete output; -} - - -// Inverts the transduction corresponding to an FST by exchanging the -// FST's input and output labels. This version is a delayed Fst. -// -// Complexity: -// - Time: O(v + e) -// - Space: O(1) -// where v = # of states visited, e = # of arcs visited. Constant -// time and to visit an input state or arc is assumed and exclusive -// of caching. -template <class A> -class InvertFst : public ArcMapFst<A, A, InvertMapper<A> > { - public: - typedef A Arc; - typedef InvertMapper<A> C; - typedef ArcMapFstImpl< A, A, InvertMapper<A> > Impl; - using ImplToFst<Impl>::GetImpl; - - explicit InvertFst(const Fst<A> &fst) : ArcMapFst<A, A, C>(fst, C()) { - GetImpl()->SetOutputSymbols(fst.InputSymbols()); - GetImpl()->SetInputSymbols(fst.OutputSymbols()); - } - - // See Fst<>::Copy() for doc. - InvertFst(const InvertFst<A> &fst, bool safe = false) - : ArcMapFst<A, A, C>(fst, safe) {} - - // Get a copy of this InvertFst. See Fst<>::Copy() for further doc. - virtual InvertFst<A> *Copy(bool safe = false) const { - return new InvertFst(*this, safe); - } -}; - - -// Specialization for InvertFst. -template <class A> -class StateIterator< InvertFst<A> > - : public StateIterator< ArcMapFst<A, A, InvertMapper<A> > > { - public: - explicit StateIterator(const InvertFst<A> &fst) - : StateIterator< ArcMapFst<A, A, InvertMapper<A> > >(fst) {} -}; - - -// Specialization for InvertFst. -template <class A> -class ArcIterator< InvertFst<A> > - : public ArcIterator< ArcMapFst<A, A, InvertMapper<A> > > { - public: - ArcIterator(const InvertFst<A> &fst, typename A::StateId s) - : ArcIterator< ArcMapFst<A, A, InvertMapper<A> > >(fst, s) {} -}; - - -// Useful alias when using StdArc. -typedef InvertFst<StdArc> StdInvertFst; - -} // namespace fst - -#endif // FST_LIB_INVERT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/label-reachable.h b/kaldi_io/src/tools/openfst/include/fst/label-reachable.h deleted file mode 100644 index af06eef..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/label-reachable.h +++ /dev/null @@ -1,565 +0,0 @@ -// label_reachable.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Class to determine if a non-epsilon label can be read as the -// first non-epsilon symbol along some path from a given state. - - -#ifndef FST_LIB_LABEL_REACHABLE_H__ -#define FST_LIB_LABEL_REACHABLE_H__ - -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <vector> -using std::vector; - -#include <fst/accumulator.h> -#include <fst/arcsort.h> -#include <fst/interval-set.h> -#include <fst/state-reachable.h> -#include <fst/vector-fst.h> - - -namespace fst { - -// Stores shareable data for label reachable class copies. -template <typename L> -class LabelReachableData { - public: - typedef L Label; - typedef typename IntervalSet<L>::Interval Interval; - - explicit LabelReachableData(bool reach_input, bool keep_relabel_data = true) - : reach_input_(reach_input), - keep_relabel_data_(keep_relabel_data), - have_relabel_data_(true), - final_label_(kNoLabel) {} - - ~LabelReachableData() {} - - bool ReachInput() const { return reach_input_; } - - vector< IntervalSet<L> > *IntervalSets() { return &isets_; } - - unordered_map<L, L> *Label2Index() { - if (!have_relabel_data_) - FSTERROR() << "LabelReachableData: no relabeling data"; - return &label2index_; - } - - Label FinalLabel() { - if (final_label_ == kNoLabel) - final_label_ = label2index_[kNoLabel]; - return final_label_; - } - - static LabelReachableData<L> *Read(istream &istrm) { - LabelReachableData<L> *data = new LabelReachableData<L>(); - - ReadType(istrm, &data->reach_input_); - ReadType(istrm, &data->keep_relabel_data_); - data->have_relabel_data_ = data->keep_relabel_data_; - if (data->keep_relabel_data_) - ReadType(istrm, &data->label2index_); - ReadType(istrm, &data->final_label_); - ReadType(istrm, &data->isets_); - return data; - } - - bool Write(ostream &ostrm) { - WriteType(ostrm, reach_input_); - WriteType(ostrm, keep_relabel_data_); - if (keep_relabel_data_) - WriteType(ostrm, label2index_); - WriteType(ostrm, FinalLabel()); - WriteType(ostrm, isets_); - return true; - } - - int RefCount() const { return ref_count_.count(); } - int IncrRefCount() { return ref_count_.Incr(); } - int DecrRefCount() { return ref_count_.Decr(); } - - private: - LabelReachableData() {} - - bool reach_input_; // Input or output labels considered? - bool keep_relabel_data_; // Save label2index_ to file? - bool have_relabel_data_; // Using label2index_? - Label final_label_; // Final label - RefCounter ref_count_; // Reference count. - unordered_map<L, L> label2index_; // Finds index for a label. - vector<IntervalSet <L> > isets_; // Interval sets per state. - - DISALLOW_COPY_AND_ASSIGN(LabelReachableData); -}; - - -// Tests reachability of labels from a given state. If reach_input = -// true, then input labels are considered, o.w. output labels are -// considered. To test for reachability from a state s, first do -// SetState(s). Then a label l can be reached from state s of FST f -// iff Reach(r) is true where r = Relabel(l). The relabeling is -// required to ensure a compact representation of the reachable -// labels. - -// The whole FST can be relabeled instead with Relabel(&f, -// reach_input) so that the test Reach(r) applies directly to the -// labels of the transformed FST f. The relabeled FST will also be -// sorted appropriately for composition. -// -// Reachablity of a final state from state s (via an epsilon path) -// can be tested with ReachFinal(); -// -// Reachability can also be tested on the set of labels specified by -// an arc iterator, useful for FST composition. In particular, -// Reach(aiter, ...) is true if labels on the input (output) side of -// the transitions of the arc iterator, when iter_input is true -// (false), can be reached from the state s. The iterator labels must -// have already been relabeled. -// -// With the arc iterator test of reachability, the begin position, end -// position and accumulated arc weight of the matches can be -// returned. The optional template argument controls how reachable arc -// weights are accumulated. The default uses the semiring -// Plus(). Alternative ones can be used to distribute the weights in -// composition in various ways. -template <class A, class S = DefaultAccumulator<A> > -class LabelReachable { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename IntervalSet<Label>::Interval Interval; - - LabelReachable(const Fst<A> &fst, bool reach_input, S *s = 0, - bool keep_relabel_data = true) - : fst_(new VectorFst<Arc>(fst)), - s_(kNoStateId), - data_(new LabelReachableData<Label>(reach_input, keep_relabel_data)), - accumulator_(s ? s : new S()), - ncalls_(0), - nintervals_(0), - error_(false) { - StateId ins = fst_->NumStates(); - TransformFst(); - FindIntervals(ins); - delete fst_; - } - - explicit LabelReachable(LabelReachableData<Label> *data, S *s = 0) - : fst_(0), - s_(kNoStateId), - data_(data), - accumulator_(s ? s : new S()), - ncalls_(0), - nintervals_(0), - error_(false) { - data_->IncrRefCount(); - } - - LabelReachable(const LabelReachable<A, S> &reachable) : - fst_(0), - s_(kNoStateId), - data_(reachable.data_), - accumulator_(new S(*reachable.accumulator_)), - ncalls_(0), - nintervals_(0), - error_(reachable.error_) { - data_->IncrRefCount(); - } - - ~LabelReachable() { - if (!data_->DecrRefCount()) - delete data_; - delete accumulator_; - if (ncalls_ > 0) { - VLOG(2) << "# of calls: " << ncalls_; - VLOG(2) << "# of intervals/call: " << (nintervals_ / ncalls_); - } - } - - // Relabels w.r.t labels that give compact label sets. - Label Relabel(Label label) { - if (label == 0 || error_) - return label; - unordered_map<Label, Label> &label2index = *data_->Label2Index(); - Label &relabel = label2index[label]; - if (!relabel) // Add new label - relabel = label2index.size() + 1; - return relabel; - } - - // Relabels Fst w.r.t to labels that give compact label sets. - void Relabel(MutableFst<Arc> *fst, bool relabel_input) { - for (StateIterator< MutableFst<Arc> > siter(*fst); - !siter.Done(); siter.Next()) { - StateId s = siter.Value(); - for (MutableArcIterator< MutableFst<Arc> > aiter(fst, s); - !aiter.Done(); - aiter.Next()) { - Arc arc = aiter.Value(); - if (relabel_input) - arc.ilabel = Relabel(arc.ilabel); - else - arc.olabel = Relabel(arc.olabel); - aiter.SetValue(arc); - } - } - if (relabel_input) { - ArcSort(fst, ILabelCompare<Arc>()); - fst->SetInputSymbols(0); - } else { - ArcSort(fst, OLabelCompare<Arc>()); - fst->SetOutputSymbols(0); - } - } - - // Returns relabeling pairs (cf. relabel.h::Relabel()). - // If 'avoid_collisions' is true, extra pairs are added to - // ensure no collisions when relabeling automata that have - // labels unseen here. - void RelabelPairs(vector<pair<Label, Label> > *pairs, - bool avoid_collisions = false) { - pairs->clear(); - unordered_map<Label, Label> &label2index = *data_->Label2Index(); - // Maps labels to their new values in [1, label2index().size()] - for (typename unordered_map<Label, Label>::const_iterator - it = label2index.begin(); it != label2index.end(); ++it) - if (it->second != data_->FinalLabel()) - pairs->push_back(pair<Label, Label>(it->first, it->second)); - if (avoid_collisions) { - // Ensures any label in [1, label2index().size()] is mapped either - // by the above step or to label2index() + 1 (to avoid collisions). - for (int i = 1; i <= label2index.size(); ++i) { - typename unordered_map<Label, Label>::const_iterator - it = label2index.find(i); - if (it == label2index.end() || it->second == data_->FinalLabel()) - pairs->push_back(pair<Label, Label>(i, label2index.size() + 1)); - } - } - } - - // Set current state. Optionally set state associated - // with arc iterator to be passed to Reach. - void SetState(StateId s, StateId aiter_s = kNoStateId) { - s_ = s; - if (aiter_s != kNoStateId) { - accumulator_->SetState(aiter_s); - if (accumulator_->Error()) error_ = true; - } - } - - // Can reach this label from current state? - // Original labels must be transformed by the Relabel methods above. - bool Reach(Label label) { - if (label == 0 || error_) - return false; - vector< IntervalSet<Label> > &isets = *data_->IntervalSets(); - return isets[s_].Member(label); - - } - - // Can reach final state (via epsilon transitions) from this state? - bool ReachFinal() { - if (error_) return false; - vector< IntervalSet<Label> > &isets = *data_->IntervalSets(); - return isets[s_].Member(data_->FinalLabel()); - } - - // Initialize with secondary FST to be used with Reach(Iterator,...). - // If copy is true, then 'fst' is a copy of the FST used in the - // previous call to this method (useful to avoid unnecessary updates). - template <class F> - void ReachInit(const F &fst, bool copy = false) { - accumulator_->Init(fst, copy); - if (accumulator_->Error()) error_ = true; - } - - // Can reach any arc iterator label between iterator positions - // aiter_begin and aiter_end? If aiter_input = true, then iterator - // input labels are considered, o.w. output labels are considered. - // Arc iterator labels must be transformed by the Relabel methods - // above. If compute_weight is true, user may call ReachWeight(). - template <class Iterator> - bool Reach(Iterator *aiter, ssize_t aiter_begin, - ssize_t aiter_end, bool aiter_input, bool compute_weight) { - if (error_) return false; - vector< IntervalSet<Label> > &isets = *data_->IntervalSets(); - const vector<Interval> *intervals = isets[s_].Intervals(); - ++ncalls_; - nintervals_ += intervals->size(); - - reach_begin_ = -1; - reach_end_ = -1; - reach_weight_ = Weight::Zero(); - - uint32 flags = aiter->Flags(); // save flags to restore them on exit - aiter->SetFlags(kArcNoCache, kArcNoCache); // make caching optional - aiter->Seek(aiter_begin); - - if (2 * (aiter_end - aiter_begin) < intervals->size()) { - // Check each arc against intervals. - // Set arc iterator flags to only compute the ilabel or olabel values, - // since they are the only values required for most of the arcs processed. - aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue, - kArcValueFlags); - Label reach_label = kNoLabel; - for (ssize_t aiter_pos = aiter_begin; - aiter_pos < aiter_end; aiter->Next(), ++aiter_pos) { - const A &arc = aiter->Value(); - Label label = aiter_input ? arc.ilabel : arc.olabel; - if (label == reach_label || Reach(label)) { - reach_label = label; - if (reach_begin_ < 0) - reach_begin_ = aiter_pos; - reach_end_ = aiter_pos + 1; - if (compute_weight) { - if (!(aiter->Flags() & kArcWeightValue)) { - // If the 'arc.weight' wasn't computed by the call - // to 'aiter->Value()' above, we need to call - // 'aiter->Value()' again after having set the arc iterator - // flags to compute the arc weight value. - aiter->SetFlags(kArcWeightValue, kArcValueFlags); - const A &arcb = aiter->Value(); - // Call the accumulator. - reach_weight_ = accumulator_->Sum(reach_weight_, arcb.weight); - // Only ilabel or olabel required to process the following - // arcs. - aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue, - kArcValueFlags); - } else { - // Call the accumulator. - reach_weight_ = accumulator_->Sum(reach_weight_, arc.weight); - } - } - } - } - } else { - // Check each interval against arcs - ssize_t begin_low, end_low = aiter_begin; - for (typename vector<Interval>::const_iterator - iiter = intervals->begin(); - iiter != intervals->end(); ++iiter) { - begin_low = LowerBound(aiter, end_low, aiter_end, - aiter_input, iiter->begin); - end_low = LowerBound(aiter, begin_low, aiter_end, - aiter_input, iiter->end); - if (end_low - begin_low > 0) { - if (reach_begin_ < 0) - reach_begin_ = begin_low; - reach_end_ = end_low; - if (compute_weight) { - aiter->SetFlags(kArcWeightValue, kArcValueFlags); - reach_weight_ = accumulator_->Sum(reach_weight_, aiter, - begin_low, end_low); - } - } - } - } - - aiter->SetFlags(flags, kArcFlags); // restore original flag values - return reach_begin_ >= 0; - } - - // Returns iterator position of first matching arc. - ssize_t ReachBegin() const { return reach_begin_; } - - // Returns iterator position one past last matching arc. - ssize_t ReachEnd() const { return reach_end_; } - - // Return the sum of the weights for matching arcs. - // Valid only if compute_weight was true in Reach() call. - Weight ReachWeight() const { return reach_weight_; } - - // Access to the relabeling map. Excludes epsilon (0) label but - // includes kNoLabel that is used internally for super-final - // transitons. - const unordered_map<Label, Label>& Label2Index() const { - return *data_->Label2Index(); - } - - LabelReachableData<Label> *GetData() const { return data_; } - - bool Error() const { return error_ || accumulator_->Error(); } - - private: - // Redirects labeled arcs (input or output labels determined by - // ReachInput()) to new label-specific final states. Each original - // final state is redirected via a transition labeled with kNoLabel - // to a new kNoLabel-specific final state. Creates super-initial - // state for all states with zero in-degree. - void TransformFst() { - StateId ins = fst_->NumStates(); - StateId ons = ins; - - vector<ssize_t> indeg(ins, 0); - - // Redirects labeled arcs to new final states. - for (StateId s = 0; s < ins; ++s) { - for (MutableArcIterator< VectorFst<Arc> > aiter(fst_, s); - !aiter.Done(); - aiter.Next()) { - Arc arc = aiter.Value(); - Label label = data_->ReachInput() ? arc.ilabel : arc.olabel; - if (label) { - if (label2state_.find(label) == label2state_.end()) { - label2state_[label] = ons; - indeg.push_back(0); - ++ons; - } - arc.nextstate = label2state_[label]; - aiter.SetValue(arc); - } - ++indeg[arc.nextstate]; // Finds in-degrees for next step. - } - - // Redirects final weights to new final state. - Weight final = fst_->Final(s); - if (final != Weight::Zero()) { - if (label2state_.find(kNoLabel) == label2state_.end()) { - label2state_[kNoLabel] = ons; - indeg.push_back(0); - ++ons; - } - Arc arc(kNoLabel, kNoLabel, final, label2state_[kNoLabel]); - fst_->AddArc(s, arc); - ++indeg[arc.nextstate]; // Finds in-degrees for next step. - - fst_->SetFinal(s, Weight::Zero()); - } - } - - // Add new final states to Fst. - while (fst_->NumStates() < ons) { - StateId s = fst_->AddState(); - fst_->SetFinal(s, Weight::One()); - } - - // Creates a super-initial state for all states with zero in-degree. - StateId start = fst_->AddState(); - fst_->SetStart(start); - for (StateId s = 0; s < start; ++s) { - if (indeg[s] == 0) { - Arc arc(0, 0, Weight::One(), s); - fst_->AddArc(start, arc); - } - } - } - - void FindIntervals(StateId ins) { - StateReachable<A, Label> state_reachable(*fst_); - if (state_reachable.Error()) { - error_ = true; - return; - } - - vector<Label> &state2index = state_reachable.State2Index(); - vector< IntervalSet<Label> > &isets = *data_->IntervalSets(); - isets = state_reachable.IntervalSets(); - isets.resize(ins); - - unordered_map<Label, Label> &label2index = *data_->Label2Index(); - for (typename unordered_map<Label, StateId>::const_iterator - it = label2state_.begin(); - it != label2state_.end(); - ++it) { - Label l = it->first; - StateId s = it->second; - Label i = state2index[s]; - label2index[l] = i; - } - label2state_.clear(); - - double nintervals = 0; - ssize_t non_intervals = 0; - for (ssize_t s = 0; s < ins; ++s) { - nintervals += isets[s].Size(); - if (isets[s].Size() > 1) { - ++non_intervals; - VLOG(3) << "state: " << s << " # of intervals: " << isets[s].Size(); - } - } - VLOG(2) << "# of states: " << ins; - VLOG(2) << "# of intervals: " << nintervals; - VLOG(2) << "# of intervals/state: " << nintervals/ins; - VLOG(2) << "# of non-interval states: " << non_intervals; - } - - template <class Iterator> - ssize_t LowerBound(Iterator *aiter, ssize_t aiter_begin, - ssize_t aiter_end, bool aiter_input, - Label match_label) const { - // Only need to compute the ilabel or olabel of arcs when - // performing the binary search. - aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue, - kArcValueFlags); - ssize_t low = aiter_begin; - ssize_t high = aiter_end; - while (low < high) { - ssize_t mid = (low + high) / 2; - aiter->Seek(mid); - Label label = aiter_input ? - aiter->Value().ilabel : aiter->Value().olabel; - if (label > match_label) { - high = mid; - } else if (label < match_label) { - low = mid + 1; - } else { - // Find first matching label (when non-deterministic) - for (ssize_t i = mid; i > low; --i) { - aiter->Seek(i - 1); - label = aiter_input ? aiter->Value().ilabel : aiter->Value().olabel; - if (label != match_label) { - aiter->Seek(i); - aiter->SetFlags(kArcValueFlags, kArcValueFlags); - return i; - } - } - aiter->SetFlags(kArcValueFlags, kArcValueFlags); - return low; - } - } - aiter->Seek(low); - aiter->SetFlags(kArcValueFlags, kArcValueFlags); - return low; - } - - VectorFst<Arc> *fst_; - StateId s_; // Current state - unordered_map<Label, StateId> label2state_; // Finds final state for a label - - ssize_t reach_begin_; // Iterator pos of first match - ssize_t reach_end_; // Iterator pos after last match - Weight reach_weight_; // Gives weight sum of arc iterator - // arcs with reachable labels. - LabelReachableData<Label> *data_; // Shareable data between copies - S *accumulator_; // Sums arc weights - - double ncalls_; - double nintervals_; - bool error_; - - void operator=(const LabelReachable<A, S> &); // Disallow -}; - -} // namespace fst - -#endif // FST_LIB_LABEL_REACHABLE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/lexicographic-weight.h b/kaldi_io/src/tools/openfst/include/fst/lexicographic-weight.h deleted file mode 100644 index 4b55c50..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/lexicographic-weight.h +++ /dev/null @@ -1,151 +0,0 @@ -// lexicographic-weight.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Richard Sproat) -// -// \file -// Lexicographic weight set and associated semiring operation definitions. -// -// A lexicographic weight is a sequence of weights, each of which must have the -// path property and Times() must be (strongly) cancellative -// (for all a,b,c != Zero(): Times(c, a) = Times(c, b) => a = b, -// Times(a, c) = Times(b, c) => a = b). -// The + operation on two weights a and b is the lexicographically -// prior of a and b. - -#ifndef FST_LIB_LEXICOGRAPHIC_WEIGHT_H__ -#define FST_LIB_LEXICOGRAPHIC_WEIGHT_H__ - -#include <string> - -#include <fst/pair-weight.h> -#include <fst/weight.h> - - -namespace fst { - -template<class W1, class W2> -class LexicographicWeight : public PairWeight<W1, W2> { - public: - using PairWeight<W1, W2>::Value1; - using PairWeight<W1, W2>::Value2; - using PairWeight<W1, W2>::SetValue1; - using PairWeight<W1, W2>::SetValue2; - using PairWeight<W1, W2>::Zero; - using PairWeight<W1, W2>::One; - using PairWeight<W1, W2>::NoWeight; - using PairWeight<W1, W2>::Quantize; - using PairWeight<W1, W2>::Reverse; - - typedef LexicographicWeight<typename W1::ReverseWeight, - typename W2::ReverseWeight> - ReverseWeight; - - LexicographicWeight() {} - - LexicographicWeight(const PairWeight<W1, W2>& w) - : PairWeight<W1, W2>(w) {} - - LexicographicWeight(W1 w1, W2 w2) : PairWeight<W1, W2>(w1, w2) { - uint64 props = kPath; - if ((W1::Properties() & props) != props) { - FSTERROR() << "LexicographicWeight must " - << "have the path property: " << W1::Type(); - SetValue1(W1::NoWeight()); - } - if ((W2::Properties() & props) != props) { - FSTERROR() << "LexicographicWeight must " - << "have the path property: " << W2::Type(); - SetValue2(W2::NoWeight()); - } - } - - static const LexicographicWeight<W1, W2> &Zero() { - static const LexicographicWeight<W1, W2> zero(PairWeight<W1, W2>::Zero()); - return zero; - } - - static const LexicographicWeight<W1, W2> &One() { - static const LexicographicWeight<W1, W2> one(PairWeight<W1, W2>::One()); - return one; - } - - static const LexicographicWeight<W1, W2> &NoWeight() { - static const LexicographicWeight<W1, W2> no_weight( - PairWeight<W1, W2>::NoWeight()); - return no_weight; - } - - static const string &Type() { - static const string type = W1::Type() + "_LT_" + W2::Type(); - return type; - } - - bool Member() const { - if (!Value1().Member() || !Value2().Member()) return false; - // Lexicographic weights cannot mix zeroes and non-zeroes. - if (Value1() == W1::Zero() && Value2() == W2::Zero()) return true; - if (Value1() != W1::Zero() && Value2() != W2::Zero()) return true; - return false; - } - - LexicographicWeight<W1, W2> Quantize(float delta = kDelta) const { - return PairWeight<W1, W2>::Quantize(); - } - - ReverseWeight Reverse() const { - return PairWeight<W1, W2>::Reverse(); - } - - static uint64 Properties() { - uint64 props1 = W1::Properties(); - uint64 props2 = W2::Properties(); - return props1 & props2 & (kLeftSemiring | kRightSemiring | kPath | - kIdempotent | kCommutative); - } -}; - -template <class W1, class W2> -inline LexicographicWeight<W1, W2> Plus(const LexicographicWeight<W1, W2> &w, - const LexicographicWeight<W1, W2> &v) { - if (!w.Member() || !v.Member()) - return LexicographicWeight<W1, W2>::NoWeight(); - NaturalLess<W1> less1; - NaturalLess<W2> less2; - if (less1(w.Value1(), v.Value1())) return w; - if (less1(v.Value1(), w.Value1())) return v; - if (less2(w.Value2(), v.Value2())) return w; - if (less2(v.Value2(), w.Value2())) return v; - return w; -} - -template <class W1, class W2> -inline LexicographicWeight<W1, W2> Times(const LexicographicWeight<W1, W2> &w, - const LexicographicWeight<W1, W2> &v) { - return LexicographicWeight<W1, W2>(Times(w.Value1(), v.Value1()), - Times(w.Value2(), v.Value2())); -} - -template <class W1, class W2> -inline LexicographicWeight<W1, W2> Divide(const LexicographicWeight<W1, W2> &w, - const LexicographicWeight<W1, W2> &v, - DivideType typ = DIVIDE_ANY) { - return LexicographicWeight<W1, W2>(Divide(w.Value1(), v.Value1(), typ), - Divide(w.Value2(), v.Value2(), typ)); -} - -} // namespace fst - -#endif // FST_LIB_LEXICOGRAPHIC_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/lock.h b/kaldi_io/src/tools/openfst/include/fst/lock.h deleted file mode 100644 index 58cb22a..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/lock.h +++ /dev/null @@ -1,100 +0,0 @@ -// lock.h -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Author: [email protected] (Michael Riley) -// -// \file -// Google-compatibility locking declarations and inline definitions -// -// Classes and functions here are no-ops (by design); proper locking requires -// actual implementation. - -#ifndef FST_LIB_LOCK_H__ -#define FST_LIB_LOCK_H__ - -#include <fst/compat.h> // for DISALLOW_COPY_AND_ASSIGN - -namespace fst { - -using namespace std; - -// -// Single initialization - single-thread implementation -// - -typedef int FstOnceType; - -static const int FST_ONCE_INIT = 1; - -inline int FstOnceInit(FstOnceType *once, void (*init)(void)) { - if (*once) - (*init)(); - *once = 0; - return 0; -} - -// -// Thread locking - single-thread (non-)implementation -// - -class Mutex { - public: - Mutex() {} - - private: - DISALLOW_COPY_AND_ASSIGN(Mutex); -}; - -class MutexLock { - public: - MutexLock(Mutex *) {} - - private: - DISALLOW_COPY_AND_ASSIGN(MutexLock); -}; - -class ReaderMutexLock { - public: - ReaderMutexLock(Mutex *) {} - - private: - DISALLOW_COPY_AND_ASSIGN(ReaderMutexLock); -}; - -// Reference counting - single-thread implementation -class RefCounter { - public: - RefCounter() : count_(1) {} - - int count() const { return count_; } - -// below lines are modifications of openfst for multi-thrads support, -// from tools/extras/openfst_gcc41up.patch, applied by tools/Makefile, -// applicable to gcc 4.1 or above - // int Incr() const { return ++count_; } - // int Decr() const { return --count_; } - - int Incr() const { return __sync_add_and_fetch(&count_, 1); } - int Decr() const { return __sync_sub_and_fetch(&count_, 1); } -// end modifications - - private: - mutable int count_; - - DISALLOW_COPY_AND_ASSIGN(RefCounter); -}; - -} // namespace fst - -#endif // FST_LIB_LOCK_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/log.h b/kaldi_io/src/tools/openfst/include/fst/log.h deleted file mode 100644 index d1492cd..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/log.h +++ /dev/null @@ -1,66 +0,0 @@ -// log.h -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Author: [email protected] (Michael Riley) -// -// \file -// Google-style logging declarations and inline definitions. - -#ifndef FST_LIB_LOG_H__ -#define FST_LIB_LOG_H__ - -#include <cassert> -#include <iostream> -#include <string> - -#include <fst/types.h> -#include <fst/flags.h> - -using std::string; - -DECLARE_int32(v); - -class LogMessage { - public: - LogMessage(const string &type) : fatal_(type == "FATAL") { - std::cerr << type << ": "; - } - ~LogMessage() { - std::cerr << std::endl; - if(fatal_) - exit(1); - } - std::ostream &stream() { return std::cerr; } - - private: - bool fatal_; -}; - -#define LOG(type) LogMessage(#type).stream() -#define VLOG(level) if ((level) <= FLAGS_v) LOG(INFO) - -// Checks -inline void CHECK(bool x) { assert(x); } - -#define CHECK_EQ(x, y) CHECK((x) == (y)) -#define CHECK_LT(x, y) CHECK((x) < (y)) -#define CHECK_GT(x, y) CHECK((x) > (y)) -#define CHECK_LE(x, y) CHECK((x) <= (y)) -#define CHECK_GE(x, y) CHECK((x) >= (y)) -#define CHECK_NE(x, y) CHECK((x) != (y)) - -// Ports -#define ATTRIBUTE_DEPRECATED __attribute__((deprecated)) - -#endif // FST_LIB_LOG_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/lookahead-filter.h b/kaldi_io/src/tools/openfst/include/fst/lookahead-filter.h deleted file mode 100644 index e11c1bb..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/lookahead-filter.h +++ /dev/null @@ -1,698 +0,0 @@ -// lookahead-filter.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Composition filters to support lookahead matchers, useful for improving -// composition efficiency with certain inputs. - -#ifndef FST_LIB_LOOKAHEAD_FILTER_H__ -#define FST_LIB_LOOKAHEAD_FILTER_H__ - -#include <vector> -using std::vector; - -#include <fst/fst.h> -#include <fst/lookahead-matcher.h> - - -namespace fst { - -// Identifies and verifies the capabilities of the matcher to be used for -// lookahead with the composition filters below. This version is passed -// the matchers. -template <class M1, class M2> -MatchType LookAheadMatchType(const M1 &m1, const M2 &m2) { - MatchType type1 = m1.Type(false); - MatchType type2 = m2.Type(false); - if (type1 == MATCH_OUTPUT && - m1.Flags() & kOutputLookAheadMatcher) - return MATCH_OUTPUT; - else if (type2 == MATCH_INPUT && - m2.Flags() & kInputLookAheadMatcher) - return MATCH_INPUT; - else if (m1.Flags() & kOutputLookAheadMatcher && - m1.Type(true) == MATCH_OUTPUT) - return MATCH_OUTPUT; - else if (m2.Flags() & kInputLookAheadMatcher && - m2.Type(true) == MATCH_INPUT) - return MATCH_INPUT; - else - return MATCH_NONE; -} - -// Identifies and verifies the capabilities of the matcher to be used for -// lookahead with the composition filters below. This version uses the -// Fst's default matchers. -template <class Arc> -MatchType LookAheadMatchType(const Fst<Arc> &fst1, const Fst<Arc> &fst2) { - LookAheadMatcher< Fst <Arc> > matcher1(fst1, MATCH_OUTPUT); - LookAheadMatcher< Fst <Arc> > matcher2(fst2, MATCH_INPUT); - return LookAheadMatchType(matcher1, matcher2); -} - -// -// LookAheadSelector - a helper class for selecting among possibly -// distinct FST and matcher types w/o using a common base class. This -// lets us avoid virtual function calls. -// - -// Stores and returns the appropriate FST and matcher for lookahead. -// It is templated on the matcher types. General case has no methods -// since not currently supported. -template <class M1, class M2, MatchType MT> -class LookAheadSelector { -}; - -// Stores and returns the appropriate FST and matcher for lookahead. -// Specialized for two matchers of same type with the (match) 'type' -// arg determining which is used for lookahead. -template <class M, MatchType MT> -class LookAheadSelector<M, M, MT> { - public: - typedef typename M::Arc Arc; - typedef typename M::FST F; - - LookAheadSelector(M *lmatcher1, M *lmatcher2, MatchType type) - : lmatcher1_(lmatcher1->Copy()), - lmatcher2_(lmatcher2->Copy()), - type_(type) {} - - LookAheadSelector(const LookAheadSelector<M, M, MT> &selector) - : lmatcher1_(selector.lmatcher1_->Copy()), - lmatcher2_(selector.lmatcher2_->Copy()), - type_(selector.type_) {} - - ~LookAheadSelector() { - delete lmatcher1_; - delete lmatcher2_; - } - - const F &GetFst() const { - return type_ == MATCH_OUTPUT ? lmatcher2_->GetFst() : - lmatcher1_->GetFst(); - } - - M *GetMatcher() const { - return type_ == MATCH_OUTPUT ? lmatcher1_ : lmatcher2_; - } - - private: - M *lmatcher1_; - M *lmatcher2_; - MatchType type_; - - void operator=(const LookAheadSelector<M, M, MT> &); // disallow -}; - -// Stores and returns the appropriate FST and matcher for lookahead. -// Specialized for lookahead on input labels. -template <class M1, class M2> -class LookAheadSelector<M1, M2, MATCH_INPUT> { - public: - typedef typename M1::FST F1; - - LookAheadSelector(M1 *lmatcher1, M2 *lmatcher2, MatchType) - : fst_(lmatcher1->GetFst().Copy()), - lmatcher_(lmatcher2->Copy()) {} - - LookAheadSelector(const LookAheadSelector<M1, M2, MATCH_INPUT> &selector) - : fst_(selector.fst_->Copy()), - lmatcher_(selector.lmatcher_->Copy()) {} - - ~LookAheadSelector() { - delete lmatcher_; - delete fst_; - } - - const F1 &GetFst() const { return *fst_; } - - M2 *GetMatcher() const { return lmatcher_; } - - private: - const F1 *fst_; - M2 *lmatcher_; - - void operator=(const LookAheadSelector<M1, M2, MATCH_INPUT> &); // disallow -}; - - -// Stores and returns the appropriate FST and matcher for lookahead. -// Specialized for lookahead on output labels. -template <class M1, class M2> -class LookAheadSelector<M1, M2, MATCH_OUTPUT> { - public: - typedef typename M2::FST F2; - - LookAheadSelector(M1 *lmatcher1, M2 *lmatcher2, MatchType) - : fst_(lmatcher2->GetFst().Copy()), - lmatcher_(lmatcher1->Copy()) {} - - LookAheadSelector(const LookAheadSelector<M1, M2, MATCH_OUTPUT> &selector) - : fst_(selector.fst_->Copy()), - lmatcher_(selector.lmatcher_->Copy()) {} - - ~LookAheadSelector() { - delete lmatcher_; - delete fst_; - } - - const F2 &GetFst() const { return *fst_; } - - M1 *GetMatcher() const { return lmatcher_; } - - private: - const F2 *fst_; - M1 *lmatcher_; - - void operator=(const LookAheadSelector<M1, M2, MATCH_OUTPUT> &); // disallow -}; - -// This filter uses a lookahead matcher in FilterArc(arc1, arc2) to -// examine the future of the composition state (arc1.nextstate, -// arc2.nextstate), blocking moving forward when its determined to be -// non-coaccessible. It is templated on an underlying filter, -// typically the epsilon filter. Which matcher is the lookahead -// matcher is determined by the template argument MT unless it is -// MATCH_BOTH. In that case, both matcher arguments must be lookahead -// matchers of the same type and one will be selected by -// LookAheadMatchType() based on their capability. -template <class F, - class M1 = LookAheadMatcher<typename F::FST1>, - class M2 = M1, - MatchType MT = MATCH_BOTH> -class LookAheadComposeFilter { - public: - typedef typename F::FST1 FST1; - typedef typename F::FST2 FST2; - typedef typename F::Arc Arc; - typedef typename F::Matcher1 Matcher1; - typedef typename F::Matcher2 Matcher2; - typedef typename F::FilterState FilterState; - typedef LookAheadComposeFilter<F, M1, M2, MT> Filter; - - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - LookAheadComposeFilter(const FST1 &fst1, const FST2 &fst2, - M1 *matcher1, M2 *matcher2) - : filter_(fst1, fst2, matcher1, matcher2), - lookahead_type_(MT == MATCH_BOTH ? - LookAheadMatchType(*filter_.GetMatcher1(), - *filter_.GetMatcher2()) : MT), - selector_(filter_.GetMatcher1(), filter_.GetMatcher2(), - lookahead_type_), - flags_(lookahead_type_ == MATCH_OUTPUT ? - filter_.GetMatcher1()->Flags() : - filter_.GetMatcher2()->Flags()) { - if (lookahead_type_ == MATCH_NONE) { - FSTERROR() << "LookAheadComposeFilter: 1st argument cannot " - << "match/look-ahead on output labels and 2nd argument " - << "cannot match/look-ahead on input labels."; - } - selector_.GetMatcher()->InitLookAheadFst(selector_.GetFst()); - } - - LookAheadComposeFilter(const LookAheadComposeFilter<F, M1, M2, MT> &filter, - bool safe = false) - : filter_(filter.filter_, safe), - lookahead_type_(filter.lookahead_type_), - selector_(filter_.GetMatcher1(), filter_.GetMatcher2(), - lookahead_type_), - flags_(filter.flags_) { - selector_.GetMatcher()->InitLookAheadFst(selector_.GetFst(), true); - } - - FilterState Start() const { - return filter_.Start(); - } - - void SetState(StateId s1, StateId s2, const FilterState &f) { - filter_.SetState(s1, s2, f); - } - - FilterState FilterArc(Arc *arc1, Arc *arc2) const { - lookahead_arc_ = false; - - const FilterState &f = filter_.FilterArc(arc1, arc2); - if (f == FilterState::NoState()) - return FilterState::NoState(); - - return LookAheadOutput() ? LookAheadFilterArc(arc1, arc2, f) : - LookAheadFilterArc(arc2, arc1, f); - } - - void FilterFinal(Weight *weight1, Weight *weight2) const { - filter_.FilterFinal(weight1, weight2); - } - - // Return resp matchers. Ownership stays with filter. - Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); } - Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); } - - const LookAheadSelector<Matcher1, Matcher2, MT> &Selector() const { - return selector_; - } - - uint64 Properties(uint64 inprops) const { - uint64 outprops = filter_.Properties(inprops); - if (lookahead_type_ == MATCH_NONE) - outprops |= kError; - return outprops; - } - - uint32 LookAheadFlags() const { return flags_; } - - bool LookAheadArc() const { return lookahead_arc_; } - - bool LookAheadOutput() const { - if (MT == MATCH_OUTPUT) - return true; - else if (MT == MATCH_INPUT) - return false; - else if (lookahead_type_ == MATCH_OUTPUT) - return true; - else - return false; - } - - private: - FilterState LookAheadFilterArc(Arc *arca, Arc *arcb, - const FilterState &f) const { - Label &labela = LookAheadOutput() ? arca->olabel : arca->ilabel; - - if (labela != 0 && !(flags_ & kLookAheadNonEpsilons)) - return f; - if (labela == 0 && !(flags_ & kLookAheadEpsilons)) - return f; - - lookahead_arc_ = true; - selector_.GetMatcher()->SetState(arca->nextstate); - - return selector_.GetMatcher()->LookAheadFst(selector_.GetFst(), - arcb->nextstate) ? f : - FilterState::NoState(); - } - - F filter_; // Underlying filter - MatchType lookahead_type_; // Lookahead match type - LookAheadSelector<Matcher1, Matcher2, MT> selector_; - uint32 flags_; // Lookahead flags - mutable bool lookahead_arc_; // Look-ahead performed at last FilterArc()? - - void operator=(const LookAheadComposeFilter<F, M1, M2> &); // disallow -}; - - -// This filter adds weight-pushing to a lookahead composition filter -// using the LookAheadWeight() method of matcher argument. It is -// templated on an underlying lookahead filter, typically the basic -// lookahead filter. Weight-pushing in composition brings weights -// forward as much as possible based on the lookahead information. -template <class F, - class M1 = LookAheadMatcher<typename F::FST1>, - class M2 = M1, - MatchType MT = MATCH_BOTH> -class PushWeightsComposeFilter { - public: - typedef typename F::FST1 FST1; - typedef typename F::FST2 FST2; - typedef typename F::Arc Arc; - typedef typename F::Matcher1 Matcher1; - typedef typename F::Matcher2 Matcher2; - typedef typename F::FilterState FilterState1; - typedef WeightFilterState<typename Arc::Weight> FilterState2; - typedef PairFilterState<FilterState1, FilterState2> FilterState; - - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - PushWeightsComposeFilter(const FST1 &fst1, const FST2 &fst2, - M1 *matcher1, M2 *matcher2) - : filter_(fst1, fst2, matcher1, matcher2), - f_(FilterState::NoState()) {} - - PushWeightsComposeFilter(const PushWeightsComposeFilter<F, M1, M2, MT> - &filter, - bool safe = false) - : filter_(filter.filter_, safe), - f_(FilterState::NoState()) {} - - FilterState Start() const { - return FilterState(filter_.Start(), FilterState2(Weight::One())); - } - - void SetState(StateId s1, StateId s2, const FilterState &f) { - f_ = f; - filter_.SetState(s1, s2, f.GetState1()); - } - - FilterState FilterArc(Arc *arc1, Arc *arc2) const { - const FilterState1 &f1 = filter_.FilterArc(arc1, arc2); - if (f1 == FilterState1::NoState()) - return FilterState::NoState(); - - if (!(LookAheadFlags() & kLookAheadWeight)) - return FilterState(f1, FilterState2(Weight::One())); - - const Weight &lweight = filter_.LookAheadArc() ? - Selector().GetMatcher()->LookAheadWeight() : Weight::One(); - const FilterState2 &f2 = f_.GetState2(); - const Weight &fweight = f2.GetWeight(); - - arc2->weight = Divide(Times(arc2->weight, lweight), fweight); - return FilterState(f1, FilterState2(lweight)); - } - - void FilterFinal(Weight *weight1, Weight *weight2) const { - filter_.FilterFinal(weight1, weight2); - if (!(LookAheadFlags() & kLookAheadWeight) || *weight1 == Weight::Zero()) - return; - - const FilterState2 &f2 = f_.GetState2(); - const Weight &fweight = f2.GetWeight(); - *weight1 = Divide(*weight1, fweight); - } - // Return resp matchers. Ownership states with filter. - Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); } - Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); } - - const LookAheadSelector<Matcher1, Matcher2, MT> &Selector() const { - return filter_.Selector(); - } - - uint32 LookAheadFlags() const { return filter_.LookAheadFlags(); } - bool LookAheadArc() const { return filter_.LookAheadArc(); } - bool LookAheadOutput() const { return filter_.LookAheadOutput(); } - - uint64 Properties(uint64 props) const { - return filter_.Properties(props) & kWeightInvariantProperties; - } - - private: - F filter_; // Underlying filter - FilterState f_; // Current filter state - - void operator=(const PushWeightsComposeFilter<F, M1, M2, MT> &); // disallow -}; - -// This filter adds label-pushing to a lookahead composition filter -// using the LookAheadPrefix() method of the matcher argument. It is -// templated on an underlying filter, typically the basic lookahead -// or weight-pushing lookahead filter. Label-pushing in composition -// matches labels as early as possible based on the lookahead -// information. -template <class F, - class M1 = LookAheadMatcher<typename F::FST1>, - class M2 = M1, - MatchType MT = MATCH_BOTH> -class PushLabelsComposeFilter { - public: - typedef typename F::FST1 FST1; - typedef typename F::FST2 FST2; - typedef typename F::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - typedef MultiEpsMatcher<typename F::Matcher1> Matcher1; - typedef MultiEpsMatcher<typename F::Matcher2> Matcher2; - typedef typename F::FilterState FilterState1; - typedef IntegerFilterState<typename Arc::Label> FilterState2; - typedef PairFilterState<FilterState1, FilterState2> FilterState; - - PushLabelsComposeFilter(const FST1 &fst1, const FST2 &fst2, - M1 *matcher1, M2 *matcher2) - : filter_(fst1, fst2, matcher1, matcher2), - f_(FilterState::NoState()), - fst1_(filter_.GetMatcher1()->GetFst()), - fst2_(filter_.GetMatcher2()->GetFst()), - matcher1_(fst1_, MATCH_OUTPUT, - filter_.LookAheadOutput() ? kMultiEpsList : kMultiEpsLoop, - filter_.GetMatcher1(), - false), - matcher2_(fst2_, MATCH_INPUT, - filter_.LookAheadOutput() ? kMultiEpsLoop : kMultiEpsList, - filter_.GetMatcher2(), - false) {} - - PushLabelsComposeFilter(const PushLabelsComposeFilter<F, M1, M2, MT> &filter, - bool safe = false) - : filter_(filter.filter_, safe), - f_(FilterState::NoState()), - fst1_(filter_.GetMatcher1()->GetFst()), - fst2_(filter_.GetMatcher2()->GetFst()), - matcher1_(fst1_, MATCH_OUTPUT, - filter_.LookAheadOutput() ? kMultiEpsList : kMultiEpsLoop, - filter_.GetMatcher1(), - false), - matcher2_(fst2_, MATCH_INPUT, - filter_.LookAheadOutput() ? kMultiEpsLoop : kMultiEpsList, - filter_.GetMatcher2(), - false) { - } - - FilterState Start() const { - return FilterState(filter_.Start(), FilterState2(kNoLabel)); - } - - void SetState(StateId s1, StateId s2, const FilterState &f) { - f_ = f; - filter_.SetState(s1, s2, f.GetState1()); - if (!(LookAheadFlags() & kLookAheadPrefix)) - return; - - narcsa_ = LookAheadOutput() ? internal::NumArcs(fst1_, s1) - : internal::NumArcs(fst2_, s2); - - const FilterState2 &f2 = f_.GetState2(); - const Label &flabel = f2.GetState(); - - GetMatcher1()->ClearMultiEpsLabels(); - GetMatcher2()->ClearMultiEpsLabels(); - if (flabel != kNoLabel) { // Have a lookahead label? - GetMatcher1()->AddMultiEpsLabel(flabel); // Yes, make it a multi-epsilon - GetMatcher2()->AddMultiEpsLabel(flabel); // label so that it matches the - } // implicit epsilon arc to be - } // modified below when pushing. - - FilterState FilterArc(Arc *arc1, Arc *arc2) const { - if (!(LookAheadFlags() & kLookAheadPrefix)) - return FilterState(filter_.FilterArc(arc1, arc2), - FilterState2(kNoLabel)); - - const FilterState2 &f2 = f_.GetState2(); - const Label &flabel = f2.GetState(); - if (flabel != kNoLabel) // Have a lookahead label? - return LookAheadOutput() ? PushedLabelFilterArc(arc1, arc2, flabel) : - PushedLabelFilterArc(arc2, arc1, flabel); - - const FilterState1 &f1 = filter_.FilterArc(arc1, arc2); - if (f1 == FilterState1::NoState()) - return FilterState::NoState(); - - if (!filter_.LookAheadArc()) - return FilterState(f1, FilterState2(kNoLabel)); - - return LookAheadOutput() ? PushLabelFilterArc(arc1, arc2, f1) : - PushLabelFilterArc(arc2, arc1, f1); - } - - void FilterFinal(Weight *weight1, Weight *weight2) const { - filter_.FilterFinal(weight1, weight2); - if (!(LookAheadFlags() & kLookAheadPrefix) || - *weight1 == Weight::Zero()) - return; - - const FilterState2 &f2 = f_.GetState2(); - const Label &flabel = f2.GetState(); - if (flabel != kNoLabel) - *weight1 = Weight::Zero(); - } - - // Return resp matchers. Ownership states with filter. - Matcher1 *GetMatcher1() { return &matcher1_; } - Matcher2 *GetMatcher2() { return &matcher2_; } - - uint64 Properties(uint64 iprops) const { - uint64 oprops = filter_.Properties(iprops); - if (LookAheadOutput()) - return oprops & kOLabelInvariantProperties; - else - return oprops & kILabelInvariantProperties; - } - - private: - const LookAheadSelector<typename F::Matcher1, typename F::Matcher2, MT> - &Selector() const { - return filter_.Selector(); - } - - // Consumes an already pushed label. - FilterState PushedLabelFilterArc(Arc *arca, Arc *arcb, - Label flabel) const { - Label &labela = LookAheadOutput() ? arca->olabel : arca->ilabel; - const Label &labelb = LookAheadOutput() ? arcb->ilabel : arcb->olabel; - - if (labelb != kNoLabel) { - return FilterState::NoState(); // Block non- (multi-) epsilon label - } else if (labela == flabel) { - labela = 0; // Convert match to multi-eps to eps - return Start(); - } else if (labela == 0) { - if (narcsa_ == 1) - return f_; // Take eps; keep state w/ label - Selector().GetMatcher()->SetState(arca->nextstate); - if (Selector().GetMatcher()->LookAheadLabel(flabel)) - return f_; // Take eps; keep state w/ label - else - return FilterState::NoState(); // Block non-coaccessible path - } else { - return FilterState::NoState(); // Block mismatch to multi-eps label - } - } - - // Pushes a label forward when possible. - FilterState PushLabelFilterArc(Arc *arca, Arc *arcb, - const FilterState1 &f1) const { - Label &labela = LookAheadOutput() ? arca->olabel : arca->ilabel; - const Label &labelb = LookAheadOutput() ? arcb->olabel : arcb->ilabel; - - if (labelb != 0) // No place to push. - return FilterState(f1, FilterState2(kNoLabel)); - if (labela != 0 && // Wrong lookahead prefix type? - LookAheadFlags() & kLookAheadNonEpsilonPrefix) - return FilterState(f1, FilterState2(kNoLabel)); - - Arc larc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId); - - if (Selector().GetMatcher()->LookAheadPrefix(&larc)) { // Have prefix arc? - labela = LookAheadOutput() ? larc.ilabel : larc.olabel; - arcb->ilabel = larc.ilabel; // Yes, go forward on that arc, - arcb->olabel = larc.olabel; // thus pushing the label. - arcb->weight = Times(arcb->weight, larc.weight); - arcb->nextstate = larc.nextstate; - return FilterState(f1, FilterState2(labela)); - } else { - return FilterState(f1, FilterState2(kNoLabel)); - } - } - - uint32 LookAheadFlags() const { return filter_.LookAheadFlags(); } - bool LookAheadArc() const { return filter_.LookAheadArc(); } - bool LookAheadOutput() const { return filter_.LookAheadOutput(); } - - F filter_; // Underlying filter - FilterState f_ ; // Current filter state - const FST1 &fst1_; - const FST2 &fst2_; - Matcher1 matcher1_; // Multi-epsilon matcher for fst1 - Matcher2 matcher2_; // Multi-epsilon matcher for fst2 - ssize_t narcsa_; // Number of arcs leaving look-ahead match FST - - void operator=(const PushLabelsComposeFilter<F, M1, M2, MT> &); // disallow -}; - -// -// CONVENIENCE CLASS useful for setting up composition with a default -// look-ahead matcher and filter. -// - -template <class A, MatchType type> // MATCH_NONE -class DefaultLookAhead { - public: - typedef Matcher< Fst<A> > M; - typedef SequenceComposeFilter<M> ComposeFilter; - typedef M FstMatcher; -}; - -// Specializes for MATCH_INPUT to allow lookahead. -template <class A> -class DefaultLookAhead<A, MATCH_INPUT> { - public: - typedef LookAheadMatcher< Fst<A> > M; - typedef SequenceComposeFilter<M> SF; - typedef LookAheadComposeFilter<SF, M> ComposeFilter; - typedef M FstMatcher; -}; - -// Specializes for MATCH_OUTPUT to allow lookahead. -template <class A> -class DefaultLookAhead<A, MATCH_OUTPUT> { - public: - typedef LookAheadMatcher< Fst<A> > M; - typedef AltSequenceComposeFilter<M> SF; - typedef LookAheadComposeFilter<SF, M> ComposeFilter; - typedef M FstMatcher; -}; - -// Specializes for StdArc to allow weight and label pushing. -template <> -class DefaultLookAhead<StdArc, MATCH_INPUT> { - public: - typedef StdArc A; - typedef LookAheadMatcher< Fst<A> > M; - typedef SequenceComposeFilter<M> SF; - typedef LookAheadComposeFilter<SF, M> LF; - typedef PushWeightsComposeFilter<LF, M> WF; - typedef PushLabelsComposeFilter<WF, M> ComposeFilter; - typedef M FstMatcher; -}; - -// Specializes for StdArc to allow weight and label pushing. -template <> -class DefaultLookAhead<StdArc, MATCH_OUTPUT> { - public: - typedef StdArc A; - typedef LookAheadMatcher< Fst<A> > M; - typedef AltSequenceComposeFilter<M> SF; - typedef LookAheadComposeFilter<SF, M> LF; - typedef PushWeightsComposeFilter<LF, M> WF; - typedef PushLabelsComposeFilter<WF, M> ComposeFilter; - typedef M FstMatcher; -}; - -// Specializes for LogArc to allow weight and label pushing. -template <> -class DefaultLookAhead<LogArc, MATCH_INPUT> { - public: - typedef LogArc A; - typedef LookAheadMatcher< Fst<A> > M; - typedef SequenceComposeFilter<M> SF; - typedef LookAheadComposeFilter<SF, M> LF; - typedef PushWeightsComposeFilter<LF, M> WF; - typedef PushLabelsComposeFilter<WF, M> ComposeFilter; - typedef M FstMatcher; -}; - -// Specializes for LogArc to allow weight and label pushing. -template <> -class DefaultLookAhead<LogArc, MATCH_OUTPUT> { - public: - typedef LogArc A; - typedef LookAheadMatcher< Fst<A> > M; - typedef AltSequenceComposeFilter<M> SF; - typedef LookAheadComposeFilter<SF, M> LF; - typedef PushWeightsComposeFilter<LF, M> WF; - typedef PushLabelsComposeFilter<WF, M> ComposeFilter; - typedef M FstMatcher; -}; - -} // namespace fst - -#endif // FST_LIB_LOOKAHEAD_FILTER_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/lookahead-matcher.h b/kaldi_io/src/tools/openfst/include/fst/lookahead-matcher.h deleted file mode 100644 index f927d65..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/lookahead-matcher.h +++ /dev/null @@ -1,812 +0,0 @@ -// lookahead-matcher.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Classes to add lookahead to FST matchers, useful e.g. for improving -// composition efficiency with certain inputs. - -#ifndef FST_LIB_LOOKAHEAD_MATCHER_H__ -#define FST_LIB_LOOKAHEAD_MATCHER_H__ - -#include <fst/add-on.h> -#include <fst/const-fst.h> -#include <fst/fst.h> -#include <fst/label-reachable.h> -#include <fst/matcher.h> - - -DECLARE_string(save_relabel_ipairs); -DECLARE_string(save_relabel_opairs); - -namespace fst { - -// LOOKAHEAD MATCHERS - these have the interface of Matchers (see -// matcher.h) and these additional methods: -// -// template <class F> -// class LookAheadMatcher { -// public: -// typedef F FST; -// typedef F::Arc Arc; -// typedef typename Arc::StateId StateId; -// typedef typename Arc::Label Label; -// typedef typename Arc::Weight Weight; -// -// // Required constructors. -// LookAheadMatcher(const F &fst, MatchType match_type); -// // If safe=true, the copy is thread-safe (except the lookahead Fst is -// // preserved). See Fst<>::Cop() for further doc. -// LookAheadMatcher(const LookAheadMatcher &matcher, bool safe = false); -// -// Below are methods for looking ahead for a match to a label and -// more generally, to a rational set. Each returns false if there is -// definitely not a match and returns true if there possibly is a -// match. - -// // LABEL LOOKAHEAD: Can 'label' be read from the current matcher state -// // after possibly following epsilon transitions? -// bool LookAheadLabel(Label label) const; -// -// // RATIONAL LOOKAHEAD: The next methods allow looking ahead for an -// // arbitrary rational set of strings, specified by an FST and a state -// // from which to begin the matching. If the lookahead FST is a -// // transducer, this looks on the side different from the matcher -// // 'match_type' (cf. composition). -// -// // Are there paths P from 's' in the lookahead FST that can be read from -// // the cur. matcher state? -// bool LookAheadFst(const Fst<Arc>& fst, StateId s); -// -// // Gives an estimate of the combined weight of the paths P in the -// // lookahead and matcher FSTs for the last call to LookAheadFst. -// // A trivial implementation returns Weight::One(). Non-trivial -// // implementations are useful for weight-pushing in composition. -// Weight LookAheadWeight() const; -// -// // Is there is a single non-epsilon arc found in the lookahead FST -// // that begins P (after possibly following any epsilons) in the last -// // call LookAheadFst? If so, return true and copy it to '*arc', o.w. -// // return false. A trivial implementation returns false. Non-trivial -// // implementations are useful for label-pushing in composition. -// bool LookAheadPrefix(Arc *arc); -// -// // Optionally pre-specifies the lookahead FST that will be passed -// // to LookAheadFst() for possible precomputation. If copy is true, -// // then 'fst' is a copy of the FST used in the previous call to -// // this method (useful to avoid unnecessary updates). -// void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false); -// -// }; - -// -// LOOK-AHEAD FLAGS (see also kMatcherFlags in matcher.h): -// -// Matcher is a lookahead matcher when 'match_type' is MATCH_INPUT. -const uint32 kInputLookAheadMatcher = 0x00000010; - -// Matcher is a lookahead matcher when 'match_type' is MATCH_OUTPUT. -const uint32 kOutputLookAheadMatcher = 0x00000020; - -// A non-trivial implementation of LookAheadWeight() method defined and -// should be used? -const uint32 kLookAheadWeight = 0x00000040; - -// A non-trivial implementation of LookAheadPrefix() method defined and -// should be used? -const uint32 kLookAheadPrefix = 0x00000080; - -// Look-ahead of matcher FST non-epsilon arcs? -const uint32 kLookAheadNonEpsilons = 0x00000100; - -// Look-ahead of matcher FST epsilon arcs? -const uint32 kLookAheadEpsilons = 0x00000200; - -// Ignore epsilon paths for the lookahead prefix? Note this gives -// correct results in composition only with an appropriate composition -// filter since it depends on the filter blocking the ignored paths. -const uint32 kLookAheadNonEpsilonPrefix = 0x00000400; - -// For LabelLookAheadMatcher, save relabeling data to file -const uint32 kLookAheadKeepRelabelData = 0x00000800; - -// Flags used for lookahead matchers. -const uint32 kLookAheadFlags = 0x00000ff0; - -// LookAhead Matcher interface, templated on the Arc definition; used -// for lookahead matcher specializations that are returned by the -// InitMatcher() Fst method. -template <class A> -class LookAheadMatcherBase : public MatcherBase<A> { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - - LookAheadMatcherBase() - : weight_(Weight::One()), - prefix_arc_(kNoLabel, kNoLabel, Weight::One(), kNoStateId) {} - - virtual ~LookAheadMatcherBase() {} - - bool LookAheadLabel(Label label) const { return LookAheadLabel_(label); } - - bool LookAheadFst(const Fst<Arc> &fst, StateId s) { - return LookAheadFst_(fst, s); - } - - Weight LookAheadWeight() const { return weight_; } - - bool LookAheadPrefix(Arc *arc) const { - if (prefix_arc_.nextstate != kNoStateId) { - *arc = prefix_arc_; - return true; - } else { - return false; - } - } - - virtual void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) = 0; - - protected: - void SetLookAheadWeight(const Weight &w) { weight_ = w; } - - void SetLookAheadPrefix(const Arc &arc) { prefix_arc_ = arc; } - - void ClearLookAheadPrefix() { prefix_arc_.nextstate = kNoStateId; } - - private: - virtual bool LookAheadLabel_(Label label) const = 0; - virtual bool LookAheadFst_(const Fst<Arc> &fst, - StateId s) = 0; // This must set l.a. weight and - // prefix if non-trivial. - Weight weight_; // Look-ahead weight - Arc prefix_arc_; // Look-ahead prefix arc -}; - - -// Don't really lookahead, just declare future looks good regardless. -template <class M> -class TrivialLookAheadMatcher - : public LookAheadMatcherBase<typename M::FST::Arc> { - public: - typedef typename M::FST FST; - typedef typename M::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - TrivialLookAheadMatcher(const FST &fst, MatchType match_type) - : matcher_(fst, match_type) {} - - TrivialLookAheadMatcher(const TrivialLookAheadMatcher<M> &lmatcher, - bool safe = false) - : matcher_(lmatcher.matcher_, safe) {} - - // General matcher methods - TrivialLookAheadMatcher<M> *Copy(bool safe = false) const { - return new TrivialLookAheadMatcher<M>(*this, safe); - } - - MatchType Type(bool test) const { return matcher_.Type(test); } - void SetState(StateId s) { return matcher_.SetState(s); } - bool Find(Label label) { return matcher_.Find(label); } - bool Done() const { return matcher_.Done(); } - const Arc& Value() const { return matcher_.Value(); } - void Next() { matcher_.Next(); } - virtual const FST &GetFst() const { return matcher_.GetFst(); } - uint64 Properties(uint64 props) const { return matcher_.Properties(props); } - uint32 Flags() const { - return matcher_.Flags() | kInputLookAheadMatcher | kOutputLookAheadMatcher; - } - - // Look-ahead methods. - bool LookAheadLabel(Label label) const { return true; } - bool LookAheadFst(const Fst<Arc> &fst, StateId s) {return true; } - Weight LookAheadWeight() const { return Weight::One(); } - bool LookAheadPrefix(Arc *arc) const { return false; } - void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) {} - - private: - // This allows base class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual void SetState_(StateId s) { SetState(s); } - virtual bool Find_(Label label) { return Find(label); } - virtual bool Done_() const { return Done(); } - virtual const Arc& Value_() const { return Value(); } - virtual void Next_() { Next(); } - - bool LookAheadLabel_(Label l) const { return LookAheadLabel(l); } - - bool LookAheadFst_(const Fst<Arc> &fst, StateId s) { - return LookAheadFst(fst, s); - } - - Weight LookAheadWeight_() const { return LookAheadWeight(); } - bool LookAheadPrefix_(Arc *arc) const { return LookAheadPrefix(arc); } - - M matcher_; -}; - -// Look-ahead of one transition. Template argument F accepts flags to -// control behavior. -template <class M, uint32 F = kLookAheadNonEpsilons | kLookAheadEpsilons | - kLookAheadWeight | kLookAheadPrefix> -class ArcLookAheadMatcher - : public LookAheadMatcherBase<typename M::FST::Arc> { - public: - typedef typename M::FST FST; - typedef typename M::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - typedef NullAddOn MatcherData; - - using LookAheadMatcherBase<Arc>::LookAheadWeight; - using LookAheadMatcherBase<Arc>::SetLookAheadPrefix; - using LookAheadMatcherBase<Arc>::SetLookAheadWeight; - using LookAheadMatcherBase<Arc>::ClearLookAheadPrefix; - - ArcLookAheadMatcher(const FST &fst, MatchType match_type, - MatcherData *data = 0) - : matcher_(fst, match_type), - fst_(matcher_.GetFst()), - lfst_(0), - s_(kNoStateId) {} - - ArcLookAheadMatcher(const ArcLookAheadMatcher<M, F> &lmatcher, - bool safe = false) - : matcher_(lmatcher.matcher_, safe), - fst_(matcher_.GetFst()), - lfst_(lmatcher.lfst_), - s_(kNoStateId) {} - - // General matcher methods - ArcLookAheadMatcher<M, F> *Copy(bool safe = false) const { - return new ArcLookAheadMatcher<M, F>(*this, safe); - } - - MatchType Type(bool test) const { return matcher_.Type(test); } - - void SetState(StateId s) { - s_ = s; - matcher_.SetState(s); - } - - bool Find(Label label) { return matcher_.Find(label); } - bool Done() const { return matcher_.Done(); } - const Arc& Value() const { return matcher_.Value(); } - void Next() { matcher_.Next(); } - const FST &GetFst() const { return fst_; } - uint64 Properties(uint64 props) const { return matcher_.Properties(props); } - uint32 Flags() const { - return matcher_.Flags() | kInputLookAheadMatcher | - kOutputLookAheadMatcher | F; - } - - // Writable matcher methods - MatcherData *GetData() const { return 0; } - - // Look-ahead methods. - bool LookAheadLabel(Label label) const { return matcher_.Find(label); } - - // Checks if there is a matching (possibly super-final) transition - // at (s_, s). - bool LookAheadFst(const Fst<Arc> &fst, StateId s); - - void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) { - lfst_ = &fst; - } - - private: - // This allows base class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual void SetState_(StateId s) { SetState(s); } - virtual bool Find_(Label label) { return Find(label); } - virtual bool Done_() const { return Done(); } - virtual const Arc& Value_() const { return Value(); } - virtual void Next_() { Next(); } - - bool LookAheadLabel_(Label l) const { return LookAheadLabel(l); } - bool LookAheadFst_(const Fst<Arc> &fst, StateId s) { - return LookAheadFst(fst, s); - } - - mutable M matcher_; - const FST &fst_; // Matcher FST - const Fst<Arc> *lfst_; // Look-ahead FST - StateId s_; // Matcher state -}; - -template <class M, uint32 F> -bool ArcLookAheadMatcher<M, F>::LookAheadFst(const Fst<Arc> &fst, StateId s) { - if (&fst != lfst_) - InitLookAheadFst(fst); - - bool ret = false; - ssize_t nprefix = 0; - if (F & kLookAheadWeight) - SetLookAheadWeight(Weight::Zero()); - if (F & kLookAheadPrefix) - ClearLookAheadPrefix(); - if (fst_.Final(s_) != Weight::Zero() && - lfst_->Final(s) != Weight::Zero()) { - if (!(F & (kLookAheadWeight | kLookAheadPrefix))) - return true; - ++nprefix; - if (F & kLookAheadWeight) - SetLookAheadWeight(Plus(LookAheadWeight(), - Times(fst_.Final(s_), lfst_->Final(s)))); - ret = true; - } - if (matcher_.Find(kNoLabel)) { - if (!(F & (kLookAheadWeight | kLookAheadPrefix))) - return true; - ++nprefix; - if (F & kLookAheadWeight) - for (; !matcher_.Done(); matcher_.Next()) - SetLookAheadWeight(Plus(LookAheadWeight(), matcher_.Value().weight)); - ret = true; - } - for (ArcIterator< Fst<Arc> > aiter(*lfst_, s); - !aiter.Done(); - aiter.Next()) { - const Arc &arc = aiter.Value(); - Label label = kNoLabel; - switch (matcher_.Type(false)) { - case MATCH_INPUT: - label = arc.olabel; - break; - case MATCH_OUTPUT: - label = arc.ilabel; - break; - default: - FSTERROR() << "ArcLookAheadMatcher::LookAheadFst: bad match type"; - return true; - } - if (label == 0) { - if (!(F & (kLookAheadWeight | kLookAheadPrefix))) - return true; - if (!(F & kLookAheadNonEpsilonPrefix)) - ++nprefix; - if (F & kLookAheadWeight) - SetLookAheadWeight(Plus(LookAheadWeight(), arc.weight)); - ret = true; - } else if (matcher_.Find(label)) { - if (!(F & (kLookAheadWeight | kLookAheadPrefix))) - return true; - for (; !matcher_.Done(); matcher_.Next()) { - ++nprefix; - if (F & kLookAheadWeight) - SetLookAheadWeight(Plus(LookAheadWeight(), - Times(arc.weight, - matcher_.Value().weight))); - if ((F & kLookAheadPrefix) && nprefix == 1) - SetLookAheadPrefix(arc); - } - ret = true; - } - } - if (F & kLookAheadPrefix) { - if (nprefix == 1) - SetLookAheadWeight(Weight::One()); // Avoids double counting. - else - ClearLookAheadPrefix(); - } - return ret; -} - - -// Template argument F accepts flags to control behavior. -// It must include precisely one of KInputLookAheadMatcher or -// KOutputLookAheadMatcher. -template <class M, uint32 F = kLookAheadEpsilons | kLookAheadWeight | - kLookAheadPrefix | kLookAheadNonEpsilonPrefix | - kLookAheadKeepRelabelData, - class S = DefaultAccumulator<typename M::Arc> > -class LabelLookAheadMatcher - : public LookAheadMatcherBase<typename M::FST::Arc> { - public: - typedef typename M::FST FST; - typedef typename M::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - typedef LabelReachableData<Label> MatcherData; - - using LookAheadMatcherBase<Arc>::LookAheadWeight; - using LookAheadMatcherBase<Arc>::SetLookAheadPrefix; - using LookAheadMatcherBase<Arc>::SetLookAheadWeight; - using LookAheadMatcherBase<Arc>::ClearLookAheadPrefix; - - LabelLookAheadMatcher(const FST &fst, MatchType match_type, - MatcherData *data = 0, S *s = 0) - : matcher_(fst, match_type), - lfst_(0), - label_reachable_(0), - s_(kNoStateId), - error_(false) { - if (!(F & (kInputLookAheadMatcher | kOutputLookAheadMatcher))) { - FSTERROR() << "LabelLookaheadMatcher: bad matcher flags: " << F; - error_ = true; - } - bool reach_input = match_type == MATCH_INPUT; - if (data) { - if (reach_input == data->ReachInput()) - label_reachable_ = new LabelReachable<Arc, S>(data, s); - } else if ((reach_input && (F & kInputLookAheadMatcher)) || - (!reach_input && (F & kOutputLookAheadMatcher))) { - label_reachable_ = new LabelReachable<Arc, S>( - fst, reach_input, s, F & kLookAheadKeepRelabelData); - } - } - - LabelLookAheadMatcher(const LabelLookAheadMatcher<M, F, S> &lmatcher, - bool safe = false) - : matcher_(lmatcher.matcher_, safe), - lfst_(lmatcher.lfst_), - label_reachable_( - lmatcher.label_reachable_ ? - new LabelReachable<Arc, S>(*lmatcher.label_reachable_) : 0), - s_(kNoStateId), - error_(lmatcher.error_) {} - - ~LabelLookAheadMatcher() { - delete label_reachable_; - } - - // General matcher methods - LabelLookAheadMatcher<M, F, S> *Copy(bool safe = false) const { - return new LabelLookAheadMatcher<M, F, S>(*this, safe); - } - - MatchType Type(bool test) const { return matcher_.Type(test); } - - void SetState(StateId s) { - if (s_ == s) - return; - s_ = s; - match_set_state_ = false; - reach_set_state_ = false; - } - - bool Find(Label label) { - if (!match_set_state_) { - matcher_.SetState(s_); - match_set_state_ = true; - } - return matcher_.Find(label); - } - - bool Done() const { return matcher_.Done(); } - const Arc& Value() const { return matcher_.Value(); } - void Next() { matcher_.Next(); } - const FST &GetFst() const { return matcher_.GetFst(); } - - uint64 Properties(uint64 inprops) const { - uint64 outprops = matcher_.Properties(inprops); - if (error_ || (label_reachable_ && label_reachable_->Error())) - outprops |= kError; - return outprops; - } - - uint32 Flags() const { - if (label_reachable_ && label_reachable_->GetData()->ReachInput()) - return matcher_.Flags() | F | kInputLookAheadMatcher; - else if (label_reachable_ && !label_reachable_->GetData()->ReachInput()) - return matcher_.Flags() | F | kOutputLookAheadMatcher; - else - return matcher_.Flags(); - } - - // Writable matcher methods - MatcherData *GetData() const { - return label_reachable_ ? label_reachable_->GetData() : 0; - }; - - // Look-ahead methods. - bool LookAheadLabel(Label label) const { - if (label == 0) - return true; - - if (label_reachable_) { - if (!reach_set_state_) { - label_reachable_->SetState(s_); - reach_set_state_ = true; - } - return label_reachable_->Reach(label); - } else { - return true; - } - } - - // Checks if there is a matching (possibly super-final) transition - // at (s_, s). - template <class L> - bool LookAheadFst(const L &fst, StateId s); - - void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) { - lfst_ = &fst; - if (label_reachable_) - label_reachable_->ReachInit(fst, copy); - } - - template <class L> - void InitLookAheadFst(const L& fst, bool copy = false) { - lfst_ = static_cast<const Fst<Arc> *>(&fst); - if (label_reachable_) - label_reachable_->ReachInit(fst, copy); - } - - private: - // This allows base class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual void SetState_(StateId s) { SetState(s); } - virtual bool Find_(Label label) { return Find(label); } - virtual bool Done_() const { return Done(); } - virtual const Arc& Value_() const { return Value(); } - virtual void Next_() { Next(); } - - bool LookAheadLabel_(Label l) const { return LookAheadLabel(l); } - bool LookAheadFst_(const Fst<Arc> &fst, StateId s) { - return LookAheadFst(fst, s); - } - - mutable M matcher_; - const Fst<Arc> *lfst_; // Look-ahead FST - LabelReachable<Arc, S> *label_reachable_; // Label reachability info - StateId s_; // Matcher state - bool match_set_state_; // matcher_.SetState called? - mutable bool reach_set_state_; // reachable_.SetState called? - bool error_; -}; - -template <class M, uint32 F, class S> -template <class L> inline -bool LabelLookAheadMatcher<M, F, S>::LookAheadFst(const L &fst, StateId s) { - if (static_cast<const Fst<Arc> *>(&fst) != lfst_) - InitLookAheadFst(fst); - - SetLookAheadWeight(Weight::One()); - ClearLookAheadPrefix(); - - if (!label_reachable_) - return true; - - label_reachable_->SetState(s_, s); - reach_set_state_ = true; - - bool compute_weight = F & kLookAheadWeight; - bool compute_prefix = F & kLookAheadPrefix; - - bool reach_input = Type(false) == MATCH_OUTPUT; - ArcIterator<L> aiter(fst, s); - bool reach_arc = label_reachable_->Reach(&aiter, 0, - internal::NumArcs(*lfst_, s), - reach_input, compute_weight); - Weight lfinal = internal::Final(*lfst_, s); - bool reach_final = lfinal != Weight::Zero() && label_reachable_->ReachFinal(); - if (reach_arc) { - ssize_t begin = label_reachable_->ReachBegin(); - ssize_t end = label_reachable_->ReachEnd(); - if (compute_prefix && end - begin == 1 && !reach_final) { - aiter.Seek(begin); - SetLookAheadPrefix(aiter.Value()); - compute_weight = false; - } else if (compute_weight) { - SetLookAheadWeight(label_reachable_->ReachWeight()); - } - } - if (reach_final && compute_weight) - SetLookAheadWeight(reach_arc ? - Plus(LookAheadWeight(), lfinal) : lfinal); - - return reach_arc || reach_final; -} - - -// Label-lookahead relabeling class. -template <class A> -class LabelLookAheadRelabeler { - public: - typedef typename A::Label Label; - typedef LabelReachableData<Label> MatcherData; - typedef AddOnPair<MatcherData, MatcherData> D; - - // Relabels matcher Fst - initialization function object. - template <typename I> - LabelLookAheadRelabeler(I **impl); - - // Relabels arbitrary Fst. Class L should be a label-lookahead Fst. - template <class L> - static void Relabel(MutableFst<A> *fst, const L &mfst, - bool relabel_input) { - typename L::Impl *impl = mfst.GetImpl(); - D *data = impl->GetAddOn(); - LabelReachable<A> reachable(data->First() ? - data->First() : data->Second()); - reachable.Relabel(fst, relabel_input); - } - - // Returns relabeling pairs (cf. relabel.h::Relabel()). - // Class L should be a label-lookahead Fst. - // If 'avoid_collisions' is true, extra pairs are added to - // ensure no collisions when relabeling automata that have - // labels unseen here. - template <class L> - static void RelabelPairs(const L &mfst, vector<pair<Label, Label> > *pairs, - bool avoid_collisions = false) { - typename L::Impl *impl = mfst.GetImpl(); - D *data = impl->GetAddOn(); - LabelReachable<A> reachable(data->First() ? - data->First() : data->Second()); - reachable.RelabelPairs(pairs, avoid_collisions); - } -}; - -template <class A> -template <typename I> inline -LabelLookAheadRelabeler<A>::LabelLookAheadRelabeler(I **impl) { - Fst<A> &fst = (*impl)->GetFst(); - D *data = (*impl)->GetAddOn(); - const string name = (*impl)->Type(); - bool is_mutable = fst.Properties(kMutable, false); - MutableFst<A> *mfst = 0; - if (is_mutable) { - mfst = static_cast<MutableFst<A> *>(&fst); - } else { - mfst = new VectorFst<A>(fst); - data->IncrRefCount(); - delete *impl; - } - if (data->First()) { // reach_input - LabelReachable<A> reachable(data->First()); - reachable.Relabel(mfst, true); - if (!FLAGS_save_relabel_ipairs.empty()) { - vector<pair<Label, Label> > pairs; - reachable.RelabelPairs(&pairs, true); - WriteLabelPairs(FLAGS_save_relabel_ipairs, pairs); - } - } else { - LabelReachable<A> reachable(data->Second()); - reachable.Relabel(mfst, false); - if (!FLAGS_save_relabel_opairs.empty()) { - vector<pair<Label, Label> > pairs; - reachable.RelabelPairs(&pairs, true); - WriteLabelPairs(FLAGS_save_relabel_opairs, pairs); - } - } - if (!is_mutable) { - *impl = new I(*mfst, name); - (*impl)->SetAddOn(data); - delete mfst; - data->DecrRefCount(); - } -} - - -// Generic lookahead matcher, templated on the FST definition -// - a wrapper around pointer to specific one. -template <class F> -class LookAheadMatcher { - public: - typedef F FST; - typedef typename F::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - typedef LookAheadMatcherBase<Arc> LBase; - - LookAheadMatcher(const F &fst, MatchType match_type) { - base_ = fst.InitMatcher(match_type); - if (!base_) - base_ = new SortedMatcher<F>(fst, match_type); - lookahead_ = false; - } - - LookAheadMatcher(const LookAheadMatcher<F> &matcher, bool safe = false) { - base_ = matcher.base_->Copy(safe); - lookahead_ = matcher.lookahead_; - } - - ~LookAheadMatcher() { delete base_; } - - // General matcher methods - LookAheadMatcher<F> *Copy(bool safe = false) const { - return new LookAheadMatcher<F>(*this, safe); - } - - MatchType Type(bool test) const { return base_->Type(test); } - void SetState(StateId s) { base_->SetState(s); } - bool Find(Label label) { return base_->Find(label); } - bool Done() const { return base_->Done(); } - const Arc& Value() const { return base_->Value(); } - void Next() { base_->Next(); } - const F &GetFst() const { return static_cast<const F &>(base_->GetFst()); } - - uint64 Properties(uint64 props) const { return base_->Properties(props); } - - uint32 Flags() const { return base_->Flags(); } - - // Look-ahead methods - bool LookAheadLabel(Label label) const { - if (LookAheadCheck()) { - LBase *lbase = static_cast<LBase *>(base_); - return lbase->LookAheadLabel(label); - } else { - return true; - } - } - - bool LookAheadFst(const Fst<Arc> &fst, StateId s) { - if (LookAheadCheck()) { - LBase *lbase = static_cast<LBase *>(base_); - return lbase->LookAheadFst(fst, s); - } else { - return true; - } - } - - Weight LookAheadWeight() const { - if (LookAheadCheck()) { - LBase *lbase = static_cast<LBase *>(base_); - return lbase->LookAheadWeight(); - } else { - return Weight::One(); - } - } - - bool LookAheadPrefix(Arc *arc) const { - if (LookAheadCheck()) { - LBase *lbase = static_cast<LBase *>(base_); - return lbase->LookAheadPrefix(arc); - } else { - return false; - } - } - - void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) { - if (LookAheadCheck()) { - LBase *lbase = static_cast<LBase *>(base_); - lbase->InitLookAheadFst(fst, copy); - } - } - - private: - bool LookAheadCheck() const { - if (!lookahead_) { - lookahead_ = base_->Flags() & - (kInputLookAheadMatcher | kOutputLookAheadMatcher); - if (!lookahead_) { - FSTERROR() << "LookAheadMatcher: No look-ahead matcher defined"; - } - } - return lookahead_; - } - - MatcherBase<Arc> *base_; - mutable bool lookahead_; - - void operator=(const LookAheadMatcher<Arc> &); // disallow -}; - -} // namespace fst - -#endif // FST_LIB_LOOKAHEAD_MATCHER_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/map.h b/kaldi_io/src/tools/openfst/include/fst/map.h deleted file mode 100644 index 419cac4..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/map.h +++ /dev/null @@ -1,121 +0,0 @@ -// map.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Compatability file for old-style Map() functions and MapFst class -// that have been renamed to ArcMap (cf. StateMap). - -#ifndef FST_LIB_MAP_H__ -#define FST_LIB_MAP_H__ - - -#include <fst/arc-map.h> - - -namespace fst { - -template<class A, class C> -void Map(MutableFst<A> *fst, C* mapper) { - ArcMap(fst, mapper); -} - -template<class A, class C> -void Map(MutableFst<A> *fst, C mapper) { - ArcMap(fst, mapper); -} - -template<class A, class B, class C> -void Map(const Fst<A> &ifst, MutableFst<B> *ofst, C* mapper) { - ArcMap(ifst, ofst, mapper); -} - -template<class A, class B, class C> -void Map(const Fst<A> &ifst, MutableFst<B> *ofst, C mapper) { - ArcMap(ifst, ofst, mapper); -} - -typedef ArcMapFstOptions MapFstOptions; - -template <class A, class B, class C> -class MapFst : public ArcMapFst<A, B, C> { - public: - typedef B Arc; - typedef typename B::Weight Weight; - typedef typename B::StateId StateId; - typedef CacheState<B> State; - - MapFst(const Fst<A> &fst, const C &mapper, const MapFstOptions& opts) - : ArcMapFst<A, B, C>(fst, mapper, opts) {} - - MapFst(const Fst<A> &fst, C* mapper, const MapFstOptions& opts) - : ArcMapFst<A, B, C>(fst, mapper, opts) {} - - MapFst(const Fst<A> &fst, const C &mapper) - : ArcMapFst<A, B, C>(fst, mapper) {} - - MapFst(const Fst<A> &fst, C* mapper) : ArcMapFst<A, B, C>(fst, mapper) {} - - // See Fst<>::Copy() for doc. - MapFst(const ArcMapFst<A, B, C> &fst, bool safe = false) - : ArcMapFst<A, B, C>(fst, safe) {} - - // Get a copy of this MapFst. See Fst<>::Copy() for further doc. -virtual MapFst<A, B, C> *Copy(bool safe = false) const { - return new MapFst(*this, safe); - } -}; - - -// Specialization for MapFst. -template <class A, class B, class C> -class StateIterator< MapFst<A, B, C> > - : public StateIterator< ArcMapFst<A, B, C> > { - public: - explicit StateIterator(const ArcMapFst<A, B, C> &fst) - : StateIterator< ArcMapFst<A, B, C> >(fst) {} -}; - - -// Specialization for MapFst. -template <class A, class B, class C> -class ArcIterator< MapFst<A, B, C> > - : public ArcIterator< ArcMapFst<A, B, C> > { - public: - ArcIterator(const ArcMapFst<A, B, C> &fst, typename A::StateId s) - : ArcIterator< ArcMapFst<A, B, C> >(fst, s) {} -}; - - -template <class A> -struct IdentityMapper { - typedef A FromArc; - typedef A ToArc; - - A operator()(const A &arc) const { return arc; } - - MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } - - MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - - MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} - - uint64 Properties(uint64 props) const { return props; } -}; - -} // namespace fst - -#endif // FST_LIB_MAP_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/mapped-file.h b/kaldi_io/src/tools/openfst/include/fst/mapped-file.h deleted file mode 100644 index d61bc14..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/mapped-file.h +++ /dev/null @@ -1,83 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jeffrey Sorensen) - -#ifndef FST_LIB_MAPPED_FILE_H_ -#define FST_LIB_MAPPED_FILE_H_ - -#include <unistd.h> -#include <sys/mman.h> - -#include <fst/fst.h> -#include <iostream> -#include <fstream> -#include <sstream> - -DECLARE_int32(fst_arch_alignment); // defined in mapped-file.h - -namespace fst { - -// A memory region is a simple abstraction for allocated memory or data from -// mmap'ed files. If mmap equals NULL, then data represents an owned region of -// size bytes. Otherwise, mmap and size refer to the mapping and data is a -// casted pointer to a region contained within [mmap, mmap + size). -// If size is 0, then mmap refers and data refer to a block of memory managed -// externally by some other allocator. -struct MemoryRegion { - void *data; - void *mmap; - size_t size; -}; - -class MappedFile { - public: - virtual ~MappedFile(); - - void* mutable_data() const { - return reinterpret_cast<void*>(region_.data); - } - - const void* data() const { - return reinterpret_cast<void*>(region_.data); - } - - // Returns a MappedFile object that contains the contents of the input - // stream s starting from the current file position with size bytes. - // The file name must also be provided in the FstReadOptions as opts.source - // or else mapping will fail. If mapping is not possible, then a MappedFile - // object with a new[]'ed block of memory will be created. - static MappedFile* Map(istream* s, const FstReadOptions& opts, size_t size); - - // Creates a MappedFile object with a new[]'ed block of memory of size. - // RECOMMENDED FOR INTERNAL USE ONLY, may change in future releases. - static MappedFile* Allocate(size_t size); - - // Creates a MappedFile object pointing to a borrowed reference to data. - // This block of memory is not owned by the MappedFile object and will not - // be freed. - // RECOMMENDED FOR INTERNAL USE ONLY, may change in future releases. - static MappedFile* Borrow(void *data); - - static const int kArchAlignment; - - private: - explicit MappedFile(const MemoryRegion ®ion); - - MemoryRegion region_; - DISALLOW_COPY_AND_ASSIGN(MappedFile); -}; -} // namespace fst - -#endif // FST_LIB_MAPPED_FILE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/matcher-fst.h b/kaldi_io/src/tools/openfst/include/fst/matcher-fst.h deleted file mode 100644 index 73e64ad..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/matcher-fst.h +++ /dev/null @@ -1,359 +0,0 @@ -// matcher-fst.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Class to add a matcher to an FST. - -#ifndef FST_LIB_MATCHER_FST_FST_H__ -#define FST_LIB_MATCHER_FST_FST_H__ - -#include <fst/add-on.h> -#include <fst/const-fst.h> -#include <fst/lookahead-matcher.h> - - -namespace fst { - -// WRITABLE MATCHERS - these have the interface of Matchers (see -// matcher.h) and these additional methods: -// -// template <class F> -// class Matcher { -// public: -// typedef ... MatcherData; // Initialization data -// ... -// // Constructor with additional argument for external initialization -// // data; matcher increments its reference count on construction and -// // decrements the reference count, and if 0 deletes, on destruction. -// Matcher(const F &fst, MatchType type, MatcherData *data); -// -// // Returns pointer to initialization data that can be -// // passed to a Matcher constructor. -// MatcherData *GetData() const; -// }; - -// The matcher initialization data class must have the form: -// class MatcherData { -// public: -// // Required copy constructor. -// MatcherData(const MatcherData &); -// // -// // Required I/O methods. -// static MatcherData *Read(istream &istrm); -// bool Write(ostream &ostrm); -// -// // Required reference counting. -// int RefCount() const; -// int IncrRefCount(); -// int DecrRefCount(); -// }; - -// Default MatcherFst initializer - does nothing. -template <class M> -class NullMatcherFstInit { - public: - typedef AddOnPair<typename M::MatcherData, typename M::MatcherData> D; - typedef AddOnImpl<typename M::FST, D> Impl; - NullMatcherFstInit(Impl **) {} -}; - -// Class to add a matcher M to an Fst F. Creates a new Fst of type name N. -// Optional function object I can be used to initialize the Fst. -template <class F, class M, const char* N, - class I = NullMatcherFstInit<M> > -class MatcherFst - : public ImplToExpandedFst< - AddOnImpl<F, - AddOnPair<typename M::MatcherData, - typename M::MatcherData> > > { - public: - friend class StateIterator< MatcherFst<F, M, N, I> >; - friend class ArcIterator< MatcherFst<F, M, N, I> >; - - typedef F FST; - typedef M FstMatcher; - typedef typename F::Arc Arc; - typedef typename Arc::StateId StateId; - typedef AddOnPair<typename M::MatcherData, typename M::MatcherData> D; - typedef AddOnImpl<F, D> Impl; - - MatcherFst() : ImplToExpandedFst<Impl>(new Impl(F(), N)) {} - - explicit MatcherFst(const F &fst) - : ImplToExpandedFst<Impl>(CreateImpl(fst, N)) {} - - explicit MatcherFst(const Fst<Arc> &fst) - : ImplToExpandedFst<Impl>(CreateImpl(fst, N)) {} - - // See Fst<>::Copy() for doc. - MatcherFst(const MatcherFst<F, M, N, I> &fst, bool safe = false) - : ImplToExpandedFst<Impl>(fst, safe) {} - - // Get a copy of this MatcherFst. See Fst<>::Copy() for further doc. - virtual MatcherFst<F, M, N, I> *Copy(bool safe = false) const { - return new MatcherFst<F, M, N, I>(*this, safe); - } - - // Read a MatcherFst from an input stream; return NULL on error - static MatcherFst<F, M, N, I> *Read(istream &strm, - const FstReadOptions &opts) { - Impl *impl = Impl::Read(strm, opts); - return impl ? new MatcherFst<F, M, N, I>(impl) : 0; - } - - // Read a MatcherFst from a file; return NULL on error - // Empty filename reads from standard input - static MatcherFst<F, M, N, I> *Read(const string &filename) { - Impl *impl = ImplToExpandedFst<Impl>::Read(filename); - return impl ? new MatcherFst<F, M, N, I>(impl) : 0; - } - - virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { - return GetImpl()->Write(strm, opts); - } - - virtual bool Write(const string &filename) const { - return Fst<Arc>::WriteFile(filename); - } - - virtual void InitStateIterator(StateIteratorData<Arc> *data) const { - return GetImpl()->InitStateIterator(data); - } - - virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { - return GetImpl()->InitArcIterator(s, data); - } - - virtual M *InitMatcher(MatchType match_type) const { - return new M(GetFst(), match_type, GetData(match_type)); - } - - // Allows access to MatcherFst components. - Impl *GetImpl() const { - return ImplToFst<Impl, ExpandedFst<Arc> >::GetImpl(); - } - - F& GetFst() const { return GetImpl()->GetFst(); } - - typename M::MatcherData *GetData(MatchType match_type) const { - D *data = GetImpl()->GetAddOn(); - return match_type == MATCH_INPUT ? data->First() : data->Second(); - } - - private: - static Impl *CreateImpl(const F &fst, const string &name) { - M imatcher(fst, MATCH_INPUT); - M omatcher(fst, MATCH_OUTPUT); - D *data = new D(imatcher.GetData(), omatcher.GetData()); - Impl *impl = new Impl(fst, name); - impl->SetAddOn(data); - I init(&impl); - data->DecrRefCount(); - return impl; - } - - static Impl *CreateImpl(const Fst<Arc> &fst, const string &name) { - F ffst(fst); - return CreateImpl(ffst, name); - } - - explicit MatcherFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {} - - // Makes visible to friends. - void SetImpl(Impl *impl, bool own_impl = true) { - ImplToFst< Impl, ExpandedFst<Arc> >::SetImpl(impl, own_impl); - } - - void operator=(const MatcherFst<F, M, N, I> &fst); // disallow -}; - - -// Specialization fo MatcherFst. -template <class F, class M, const char* N, class I> -class StateIterator< MatcherFst<F, M, N, I> > : public StateIterator<F> { - public: - explicit StateIterator(const MatcherFst<F, M, N, I> &fst) : - StateIterator<F>(fst.GetImpl()->GetFst()) {} -}; - - -// Specialization for MatcherFst. -template <class F, class M, const char* N, class I> -class ArcIterator< MatcherFst<F, M, N, I> > : public ArcIterator<F> { - public: - ArcIterator(const MatcherFst<F, M, N, I> &fst, typename F::Arc::StateId s) - : ArcIterator<F>(fst.GetImpl()->GetFst(), s) {} -}; - - -// Specialization for MatcherFst -template <class F, class M, const char* N, class I> -class Matcher< MatcherFst<F, M, N, I> > { - public: - typedef MatcherFst<F, M, N, I> FST; - typedef typename F::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - - Matcher(const FST &fst, MatchType match_type) { - matcher_ = fst.InitMatcher(match_type); - } - - Matcher(const Matcher<FST> &matcher) { - matcher_ = matcher.matcher_->Copy(); - } - - ~Matcher() { delete matcher_; } - - Matcher<FST> *Copy() const { - return new Matcher<FST>(*this); - } - - MatchType Type(bool test) const { return matcher_->Type(test); } - void SetState(StateId s) { matcher_->SetState(s); } - bool Find(Label label) { return matcher_->Find(label); } - bool Done() const { return matcher_->Done(); } - const Arc& Value() const { return matcher_->Value(); } - void Next() { matcher_->Next(); } - uint64 Properties(uint64 props) const { return matcher_->Properties(props); } - uint32 Flags() const { return matcher_->Flags(); } - - private: - M *matcher_; - - void operator=(const Matcher<Arc> &); // disallow -}; - - -// Specialization for MatcherFst -template <class F, class M, const char* N, class I> -class LookAheadMatcher< MatcherFst<F, M, N, I> > { - public: - typedef MatcherFst<F, M, N, I> FST; - typedef typename F::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - LookAheadMatcher(const FST &fst, MatchType match_type) { - matcher_ = fst.InitMatcher(match_type); - } - - LookAheadMatcher(const LookAheadMatcher<FST> &matcher, bool safe = false) { - matcher_ = matcher.matcher_->Copy(safe); - } - - ~LookAheadMatcher() { delete matcher_; } - - // General matcher methods - LookAheadMatcher<FST> *Copy(bool safe = false) const { - return new LookAheadMatcher<FST>(*this, safe); - } - - MatchType Type(bool test) const { return matcher_->Type(test); } - void SetState(StateId s) { matcher_->SetState(s); } - bool Find(Label label) { return matcher_->Find(label); } - bool Done() const { return matcher_->Done(); } - const Arc& Value() const { return matcher_->Value(); } - void Next() { matcher_->Next(); } - const FST &GetFst() const { return matcher_->GetFst(); } - uint64 Properties(uint64 props) const { return matcher_->Properties(props); } - uint32 Flags() const { return matcher_->Flags(); } - - // Look-ahead methods - bool LookAheadLabel(Label label) const { - return matcher_->LookAheadLabel(label); - } - - bool LookAheadFst(const Fst<Arc> &fst, StateId s) { - return matcher_->LookAheadFst(fst, s); - } - - Weight LookAheadWeight() const { return matcher_->LookAheadWeight(); } - - bool LookAheadPrefix(Arc *arc) const { - return matcher_->LookAheadPrefix(arc); - } - - void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) { - matcher_->InitLookAheadFst(fst, copy); - } - - private: - M *matcher_; - - void operator=(const LookAheadMatcher<FST> &); // disallow -}; - -// -// Useful aliases when using StdArc and LogArc. -// - -// Arc look-ahead matchers -extern const char arc_lookahead_fst_type[]; - -typedef MatcherFst<ConstFst<StdArc>, - ArcLookAheadMatcher<SortedMatcher<ConstFst<StdArc> > >, - arc_lookahead_fst_type> StdArcLookAheadFst; - -typedef MatcherFst<ConstFst<LogArc>, - ArcLookAheadMatcher<SortedMatcher<ConstFst<LogArc> > >, - arc_lookahead_fst_type> LogArcLookAheadFst; - - -// Label look-ahead matchers -extern const char ilabel_lookahead_fst_type[]; -extern const char olabel_lookahead_fst_type[]; - -static const uint32 ilabel_lookahead_flags = kInputLookAheadMatcher | - kLookAheadWeight | kLookAheadPrefix | - kLookAheadEpsilons | kLookAheadNonEpsilonPrefix; -static const uint32 olabel_lookahead_flags = kOutputLookAheadMatcher | - kLookAheadWeight | kLookAheadPrefix | - kLookAheadEpsilons | kLookAheadNonEpsilonPrefix; - -typedef MatcherFst<ConstFst<StdArc>, - LabelLookAheadMatcher<SortedMatcher<ConstFst<StdArc> >, - ilabel_lookahead_flags, - FastLogAccumulator<StdArc> >, - ilabel_lookahead_fst_type, - LabelLookAheadRelabeler<StdArc> > StdILabelLookAheadFst; - -typedef MatcherFst<ConstFst<LogArc>, - LabelLookAheadMatcher<SortedMatcher<ConstFst<LogArc> >, - ilabel_lookahead_flags, - FastLogAccumulator<LogArc> >, - ilabel_lookahead_fst_type, - LabelLookAheadRelabeler<LogArc> > LogILabelLookAheadFst; - -typedef MatcherFst<ConstFst<StdArc>, - LabelLookAheadMatcher<SortedMatcher<ConstFst<StdArc> >, - olabel_lookahead_flags, - FastLogAccumulator<StdArc> >, - olabel_lookahead_fst_type, - LabelLookAheadRelabeler<StdArc> > StdOLabelLookAheadFst; - -typedef MatcherFst<ConstFst<LogArc>, - LabelLookAheadMatcher<SortedMatcher<ConstFst<LogArc> >, - olabel_lookahead_flags, - FastLogAccumulator<LogArc> >, - olabel_lookahead_fst_type, - LabelLookAheadRelabeler<LogArc> > LogOLabelLookAheadFst; - -} // namespace fst - -#endif // FST_LIB_MATCHER_FST_FST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/matcher.h b/kaldi_io/src/tools/openfst/include/fst/matcher.h deleted file mode 100644 index 89ed9be..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/matcher.h +++ /dev/null @@ -1,1205 +0,0 @@ -// matcher.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Classes to allow matching labels leaving FST states. - -#ifndef FST_LIB_MATCHER_H__ -#define FST_LIB_MATCHER_H__ - -#include <algorithm> -#include <set> - -#include <fst/mutable-fst.h> // for all internal FST accessors - - -namespace fst { - -// MATCHERS - these can find and iterate through requested labels at -// FST states. In the simplest form, these are just some associative -// map or search keyed on labels. More generally, they may -// implement matching special labels that represent sets of labels -// such as 'sigma' (all), 'rho' (rest), or 'phi' (fail). -// The Matcher interface is: -// -// template <class F> -// class Matcher { -// public: -// typedef F FST; -// typedef F::Arc Arc; -// typedef typename Arc::StateId StateId; -// typedef typename Arc::Label Label; -// typedef typename Arc::Weight Weight; -// -// // Required constructors. -// Matcher(const F &fst, MatchType type); -// // If safe=true, the copy is thread-safe. See Fst<>::Copy() -// // for further doc. -// Matcher(const Matcher &matcher, bool safe = false); -// -// // If safe=true, the copy is thread-safe. See Fst<>::Copy() -// // for further doc. -// Matcher<F> *Copy(bool safe = false) const; -// -// // Returns the match type that can be provided (depending on -// // compatibility of the input FST). It is either -// // the requested match type, MATCH_NONE, or MATCH_UNKNOWN. -// // If 'test' is false, a constant time test is performed, but -// // MATCH_UNKNOWN may be returned. If 'test' is true, -// // a definite answer is returned, but may involve more costly -// // computation (e.g., visiting the Fst). -// MatchType Type(bool test) const; -// // Specifies the current state. -// void SetState(StateId s); -// -// // This finds matches to a label at the current state. -// // Returns true if a match found. kNoLabel matches any -// // 'non-consuming' transitions, e.g., epsilon transitions, -// // which do not require a matching symbol. -// bool Find(Label label); -// // These iterate through any matches found: -// bool Done() const; // No more matches. -// const A& Value() const; // Current arc (when !Done) -// void Next(); // Advance to next arc (when !Done) -// // Initially and after SetState() the iterator methods -// // have undefined behavior until Find() is called. -// -// // Return matcher FST. -// const F& GetFst() const; -// // This specifies the known Fst properties as viewed from this -// // matcher. It takes as argument the input Fst's known properties. -// uint64 Properties(uint64 props) const; -// }; - -// -// MATCHER FLAGS (see also kLookAheadFlags in lookahead-matcher.h) -// -// Matcher prefers being used as the matching side in composition. -const uint32 kPreferMatch = 0x00000001; - -// Matcher needs to be used as the matching side in composition. -const uint32 kRequireMatch = 0x00000002; - -// Flags used for basic matchers (see also lookahead.h). -const uint32 kMatcherFlags = kPreferMatch | kRequireMatch; - -// Matcher interface, templated on the Arc definition; used -// for matcher specializations that are returned by the -// InitMatcher Fst method. -template <class A> -class MatcherBase { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - - virtual ~MatcherBase() {} - - virtual MatcherBase<A> *Copy(bool safe = false) const = 0; - virtual MatchType Type(bool test) const = 0; - void SetState(StateId s) { SetState_(s); } - bool Find(Label label) { return Find_(label); } - bool Done() const { return Done_(); } - const A& Value() const { return Value_(); } - void Next() { Next_(); } - virtual const Fst<A> &GetFst() const = 0; - virtual uint64 Properties(uint64 props) const = 0; - virtual uint32 Flags() const { return 0; } - private: - virtual void SetState_(StateId s) = 0; - virtual bool Find_(Label label) = 0; - virtual bool Done_() const = 0; - virtual const A& Value_() const = 0; - virtual void Next_() = 0; -}; - - -// A matcher that expects sorted labels on the side to be matched. -// If match_type == MATCH_INPUT, epsilons match the implicit self loop -// Arc(kNoLabel, 0, Weight::One(), current_state) as well as any -// actual epsilon transitions. If match_type == MATCH_OUTPUT, then -// Arc(0, kNoLabel, Weight::One(), current_state) is instead matched. -template <class F> -class SortedMatcher : public MatcherBase<typename F::Arc> { - public: - typedef F FST; - typedef typename F::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - // Labels >= binary_label will be searched for by binary search, - // o.w. linear search is used. - SortedMatcher(const F &fst, MatchType match_type, - Label binary_label = 1) - : fst_(fst.Copy()), - s_(kNoStateId), - aiter_(0), - match_type_(match_type), - binary_label_(binary_label), - match_label_(kNoLabel), - narcs_(0), - loop_(kNoLabel, 0, Weight::One(), kNoStateId), - error_(false) { - switch(match_type_) { - case MATCH_INPUT: - case MATCH_NONE: - break; - case MATCH_OUTPUT: - swap(loop_.ilabel, loop_.olabel); - break; - default: - FSTERROR() << "SortedMatcher: bad match type"; - match_type_ = MATCH_NONE; - error_ = true; - } - } - - SortedMatcher(const SortedMatcher<F> &matcher, bool safe = false) - : fst_(matcher.fst_->Copy(safe)), - s_(kNoStateId), - aiter_(0), - match_type_(matcher.match_type_), - binary_label_(matcher.binary_label_), - match_label_(kNoLabel), - narcs_(0), - loop_(matcher.loop_), - error_(matcher.error_) {} - - virtual ~SortedMatcher() { - if (aiter_) - delete aiter_; - delete fst_; - } - - virtual SortedMatcher<F> *Copy(bool safe = false) const { - return new SortedMatcher<F>(*this, safe); - } - - virtual MatchType Type(bool test) const { - if (match_type_ == MATCH_NONE) - return match_type_; - - uint64 true_prop = match_type_ == MATCH_INPUT ? - kILabelSorted : kOLabelSorted; - uint64 false_prop = match_type_ == MATCH_INPUT ? - kNotILabelSorted : kNotOLabelSorted; - uint64 props = fst_->Properties(true_prop | false_prop, test); - - if (props & true_prop) - return match_type_; - else if (props & false_prop) - return MATCH_NONE; - else - return MATCH_UNKNOWN; - } - - void SetState(StateId s) { - if (s_ == s) - return; - s_ = s; - if (match_type_ == MATCH_NONE) { - FSTERROR() << "SortedMatcher: bad match type"; - error_ = true; - } - if (aiter_) - delete aiter_; - aiter_ = new ArcIterator<F>(*fst_, s); - aiter_->SetFlags(kArcNoCache, kArcNoCache); - narcs_ = internal::NumArcs(*fst_, s); - loop_.nextstate = s; - } - - bool Find(Label match_label) { - exact_match_ = true; - if (error_) { - current_loop_ = false; - match_label_ = kNoLabel; - return false; - } - current_loop_ = match_label == 0; - match_label_ = match_label == kNoLabel ? 0 : match_label; - if (Search()) { - return true; - } else { - return current_loop_; - } - } - - // Positions matcher to the first position where inserting - // match_label would maintain the sort order. - void LowerBound(Label match_label) { - exact_match_ = false; - current_loop_ = false; - if (error_) { - match_label_ = kNoLabel; - return; - } - match_label_ = match_label; - Search(); - } - - // After Find(), returns false if no more exact matches. - // After LowerBound(), returns false if no more arcs. - bool Done() const { - if (current_loop_) - return false; - if (aiter_->Done()) - return true; - if (!exact_match_) - return false; - aiter_->SetFlags( - match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue, - kArcValueFlags); - Label label = match_type_ == MATCH_INPUT ? - aiter_->Value().ilabel : aiter_->Value().olabel; - return label != match_label_; - } - - const Arc& Value() const { - if (current_loop_) { - return loop_; - } - aiter_->SetFlags(kArcValueFlags, kArcValueFlags); - return aiter_->Value(); - } - - void Next() { - if (current_loop_) - current_loop_ = false; - else - aiter_->Next(); - } - - virtual const F &GetFst() const { return *fst_; } - - virtual uint64 Properties(uint64 inprops) const { - uint64 outprops = inprops; - if (error_) outprops |= kError; - return outprops; - } - - size_t Position() const { return aiter_ ? aiter_->Position() : 0; } - - private: - virtual void SetState_(StateId s) { SetState(s); } - virtual bool Find_(Label label) { return Find(label); } - virtual bool Done_() const { return Done(); } - virtual const Arc& Value_() const { return Value(); } - virtual void Next_() { Next(); } - - bool Search(); - - const F *fst_; - StateId s_; // Current state - ArcIterator<F> *aiter_; // Iterator for current state - MatchType match_type_; // Type of match to perform - Label binary_label_; // Least label for binary search - Label match_label_; // Current label to be matched - size_t narcs_; // Current state arc count - Arc loop_; // For non-consuming symbols - bool current_loop_; // Current arc is the implicit loop - bool exact_match_; // Exact match or lower bound? - bool error_; // Error encountered - - void operator=(const SortedMatcher<F> &); // Disallow -}; - -// Returns true iff match to match_label_. Positions arc iterator at -// lower bound regardless. -template <class F> inline -bool SortedMatcher<F>::Search() { - aiter_->SetFlags( - match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue, - kArcValueFlags); - if (match_label_ >= binary_label_) { - // Binary search for match. - size_t low = 0; - size_t high = narcs_; - while (low < high) { - size_t mid = (low + high) / 2; - aiter_->Seek(mid); - Label label = match_type_ == MATCH_INPUT ? - aiter_->Value().ilabel : aiter_->Value().olabel; - if (label > match_label_) { - high = mid; - } else if (label < match_label_) { - low = mid + 1; - } else { - // find first matching label (when non-determinism) - for (size_t i = mid; i > low; --i) { - aiter_->Seek(i - 1); - label = match_type_ == MATCH_INPUT ? aiter_->Value().ilabel : - aiter_->Value().olabel; - if (label != match_label_) { - aiter_->Seek(i); - return true; - } - } - return true; - } - } - aiter_->Seek(low); - return false; - } else { - // Linear search for match. - for (aiter_->Reset(); !aiter_->Done(); aiter_->Next()) { - Label label = match_type_ == MATCH_INPUT ? - aiter_->Value().ilabel : aiter_->Value().olabel; - if (label == match_label_) { - return true; - } - if (label > match_label_) - break; - } - return false; - } -} - - -// Specifies whether during matching we rewrite both the input and output sides. -enum MatcherRewriteMode { - MATCHER_REWRITE_AUTO = 0, // Rewrites both sides iff acceptor. - MATCHER_REWRITE_ALWAYS, - MATCHER_REWRITE_NEVER -}; - - -// For any requested label that doesn't match at a state, this matcher -// considers all transitions that match the label 'rho_label' (rho = -// 'rest'). Each such rho transition found is returned with the -// rho_label rewritten as the requested label (both sides if an -// acceptor, or if 'rewrite_both' is true and both input and output -// labels of the found transition are 'rho_label'). If 'rho_label' is -// kNoLabel, this special matching is not done. RhoMatcher is -// templated itself on a matcher, which is used to perform the -// underlying matching. By default, the underlying matcher is -// constructed by RhoMatcher. The user can instead pass in this -// object; in that case, RhoMatcher takes its ownership. -template <class M> -class RhoMatcher : public MatcherBase<typename M::Arc> { - public: - typedef typename M::FST FST; - typedef typename M::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - RhoMatcher(const FST &fst, - MatchType match_type, - Label rho_label = kNoLabel, - MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO, - M *matcher = 0) - : matcher_(matcher ? matcher : new M(fst, match_type)), - match_type_(match_type), - rho_label_(rho_label), - error_(false) { - if (match_type == MATCH_BOTH) { - FSTERROR() << "RhoMatcher: bad match type"; - match_type_ = MATCH_NONE; - error_ = true; - } - if (rho_label == 0) { - FSTERROR() << "RhoMatcher: 0 cannot be used as rho_label"; - rho_label_ = kNoLabel; - error_ = true; - } - - if (rewrite_mode == MATCHER_REWRITE_AUTO) - rewrite_both_ = fst.Properties(kAcceptor, true); - else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) - rewrite_both_ = true; - else - rewrite_both_ = false; - } - - RhoMatcher(const RhoMatcher<M> &matcher, bool safe = false) - : matcher_(new M(*matcher.matcher_, safe)), - match_type_(matcher.match_type_), - rho_label_(matcher.rho_label_), - rewrite_both_(matcher.rewrite_both_), - error_(matcher.error_) {} - - virtual ~RhoMatcher() { - delete matcher_; - } - - virtual RhoMatcher<M> *Copy(bool safe = false) const { - return new RhoMatcher<M>(*this, safe); - } - - virtual MatchType Type(bool test) const { return matcher_->Type(test); } - - void SetState(StateId s) { - matcher_->SetState(s); - has_rho_ = rho_label_ != kNoLabel; - } - - bool Find(Label match_label) { - if (match_label == rho_label_ && rho_label_ != kNoLabel) { - FSTERROR() << "RhoMatcher::Find: bad label (rho)"; - error_ = true; - return false; - } - if (matcher_->Find(match_label)) { - rho_match_ = kNoLabel; - return true; - } else if (has_rho_ && match_label != 0 && match_label != kNoLabel && - (has_rho_ = matcher_->Find(rho_label_))) { - rho_match_ = match_label; - return true; - } else { - return false; - } - } - - bool Done() const { return matcher_->Done(); } - - const Arc& Value() const { - if (rho_match_ == kNoLabel) { - return matcher_->Value(); - } else { - rho_arc_ = matcher_->Value(); - if (rewrite_both_) { - if (rho_arc_.ilabel == rho_label_) - rho_arc_.ilabel = rho_match_; - if (rho_arc_.olabel == rho_label_) - rho_arc_.olabel = rho_match_; - } else if (match_type_ == MATCH_INPUT) { - rho_arc_.ilabel = rho_match_; - } else { - rho_arc_.olabel = rho_match_; - } - return rho_arc_; - } - } - - void Next() { matcher_->Next(); } - - virtual const FST &GetFst() const { return matcher_->GetFst(); } - - virtual uint64 Properties(uint64 props) const; - - virtual uint32 Flags() const { - if (rho_label_ == kNoLabel || match_type_ == MATCH_NONE) - return matcher_->Flags(); - return matcher_->Flags() | kRequireMatch; - } - - private: - virtual void SetState_(StateId s) { SetState(s); } - virtual bool Find_(Label label) { return Find(label); } - virtual bool Done_() const { return Done(); } - virtual const Arc& Value_() const { return Value(); } - virtual void Next_() { Next(); } - - M *matcher_; - MatchType match_type_; // Type of match requested - Label rho_label_; // Label that represents the rho transition - bool rewrite_both_; // Rewrite both sides when both are 'rho_label_' - bool has_rho_; // Are there possibly rhos at the current state? - Label rho_match_; // Current label that matches rho transition - mutable Arc rho_arc_; // Arc to return when rho match - bool error_; // Error encountered - - void operator=(const RhoMatcher<M> &); // Disallow -}; - -template <class M> inline -uint64 RhoMatcher<M>::Properties(uint64 inprops) const { - uint64 outprops = matcher_->Properties(inprops); - if (error_) outprops |= kError; - - if (match_type_ == MATCH_NONE) { - return outprops; - } else if (match_type_ == MATCH_INPUT) { - if (rewrite_both_) { - return outprops & ~(kODeterministic | kNonODeterministic | kString | - kILabelSorted | kNotILabelSorted | - kOLabelSorted | kNotOLabelSorted); - } else { - return outprops & ~(kODeterministic | kAcceptor | kString | - kILabelSorted | kNotILabelSorted); - } - } else if (match_type_ == MATCH_OUTPUT) { - if (rewrite_both_) { - return outprops & ~(kIDeterministic | kNonIDeterministic | kString | - kILabelSorted | kNotILabelSorted | - kOLabelSorted | kNotOLabelSorted); - } else { - return outprops & ~(kIDeterministic | kAcceptor | kString | - kOLabelSorted | kNotOLabelSorted); - } - } else { - // Shouldn't ever get here. - FSTERROR() << "RhoMatcher:: bad match type: " << match_type_; - return 0; - } -} - - -// For any requested label, this matcher considers all transitions -// that match the label 'sigma_label' (sigma = "any"), and this in -// additions to transitions with the requested label. Each such sigma -// transition found is returned with the sigma_label rewritten as the -// requested label (both sides if an acceptor, or if 'rewrite_both' is -// true and both input and output labels of the found transition are -// 'sigma_label'). If 'sigma_label' is kNoLabel, this special -// matching is not done. SigmaMatcher is templated itself on a -// matcher, which is used to perform the underlying matching. By -// default, the underlying matcher is constructed by SigmaMatcher. -// The user can instead pass in this object; in that case, -// SigmaMatcher takes its ownership. -template <class M> -class SigmaMatcher : public MatcherBase<typename M::Arc> { - public: - typedef typename M::FST FST; - typedef typename M::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - SigmaMatcher(const FST &fst, - MatchType match_type, - Label sigma_label = kNoLabel, - MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO, - M *matcher = 0) - : matcher_(matcher ? matcher : new M(fst, match_type)), - match_type_(match_type), - sigma_label_(sigma_label), - error_(false) { - if (match_type == MATCH_BOTH) { - FSTERROR() << "SigmaMatcher: bad match type"; - match_type_ = MATCH_NONE; - error_ = true; - } - if (sigma_label == 0) { - FSTERROR() << "SigmaMatcher: 0 cannot be used as sigma_label"; - sigma_label_ = kNoLabel; - error_ = true; - } - - if (rewrite_mode == MATCHER_REWRITE_AUTO) - rewrite_both_ = fst.Properties(kAcceptor, true); - else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) - rewrite_both_ = true; - else - rewrite_both_ = false; - } - - SigmaMatcher(const SigmaMatcher<M> &matcher, bool safe = false) - : matcher_(new M(*matcher.matcher_, safe)), - match_type_(matcher.match_type_), - sigma_label_(matcher.sigma_label_), - rewrite_both_(matcher.rewrite_both_), - error_(matcher.error_) {} - - virtual ~SigmaMatcher() { - delete matcher_; - } - - virtual SigmaMatcher<M> *Copy(bool safe = false) const { - return new SigmaMatcher<M>(*this, safe); - } - - virtual MatchType Type(bool test) const { return matcher_->Type(test); } - - void SetState(StateId s) { - matcher_->SetState(s); - has_sigma_ = - sigma_label_ != kNoLabel ? matcher_->Find(sigma_label_) : false; - } - - bool Find(Label match_label) { - match_label_ = match_label; - if (match_label == sigma_label_ && sigma_label_ != kNoLabel) { - FSTERROR() << "SigmaMatcher::Find: bad label (sigma)"; - error_ = true; - return false; - } - if (matcher_->Find(match_label)) { - sigma_match_ = kNoLabel; - return true; - } else if (has_sigma_ && match_label != 0 && match_label != kNoLabel && - matcher_->Find(sigma_label_)) { - sigma_match_ = match_label; - return true; - } else { - return false; - } - } - - bool Done() const { - return matcher_->Done(); - } - - const Arc& Value() const { - if (sigma_match_ == kNoLabel) { - return matcher_->Value(); - } else { - sigma_arc_ = matcher_->Value(); - if (rewrite_both_) { - if (sigma_arc_.ilabel == sigma_label_) - sigma_arc_.ilabel = sigma_match_; - if (sigma_arc_.olabel == sigma_label_) - sigma_arc_.olabel = sigma_match_; - } else if (match_type_ == MATCH_INPUT) { - sigma_arc_.ilabel = sigma_match_; - } else { - sigma_arc_.olabel = sigma_match_; - } - return sigma_arc_; - } - } - - void Next() { - matcher_->Next(); - if (matcher_->Done() && has_sigma_ && (sigma_match_ == kNoLabel) && - (match_label_ > 0)) { - matcher_->Find(sigma_label_); - sigma_match_ = match_label_; - } - } - - virtual const FST &GetFst() const { return matcher_->GetFst(); } - - virtual uint64 Properties(uint64 props) const; - - virtual uint32 Flags() const { - if (sigma_label_ == kNoLabel || match_type_ == MATCH_NONE) - return matcher_->Flags(); - // kRequireMatch temporarily disabled until issues - // in //speech/gaudi/annotation/util/denorm are resolved. - // return matcher_->Flags() | kRequireMatch; - return matcher_->Flags(); - } - -private: - virtual void SetState_(StateId s) { SetState(s); } - virtual bool Find_(Label label) { return Find(label); } - virtual bool Done_() const { return Done(); } - virtual const Arc& Value_() const { return Value(); } - virtual void Next_() { Next(); } - - M *matcher_; - MatchType match_type_; // Type of match requested - Label sigma_label_; // Label that represents the sigma transition - bool rewrite_both_; // Rewrite both sides when both are 'sigma_label_' - bool has_sigma_; // Are there sigmas at the current state? - Label sigma_match_; // Current label that matches sigma transition - mutable Arc sigma_arc_; // Arc to return when sigma match - Label match_label_; // Label being matched - bool error_; // Error encountered - - void operator=(const SigmaMatcher<M> &); // disallow -}; - -template <class M> inline -uint64 SigmaMatcher<M>::Properties(uint64 inprops) const { - uint64 outprops = matcher_->Properties(inprops); - if (error_) outprops |= kError; - - if (match_type_ == MATCH_NONE) { - return outprops; - } else if (rewrite_both_) { - return outprops & ~(kIDeterministic | kNonIDeterministic | - kODeterministic | kNonODeterministic | - kILabelSorted | kNotILabelSorted | - kOLabelSorted | kNotOLabelSorted | - kString); - } else if (match_type_ == MATCH_INPUT) { - return outprops & ~(kIDeterministic | kNonIDeterministic | - kODeterministic | kNonODeterministic | - kILabelSorted | kNotILabelSorted | - kString | kAcceptor); - } else if (match_type_ == MATCH_OUTPUT) { - return outprops & ~(kIDeterministic | kNonIDeterministic | - kODeterministic | kNonODeterministic | - kOLabelSorted | kNotOLabelSorted | - kString | kAcceptor); - } else { - // Shouldn't ever get here. - FSTERROR() << "SigmaMatcher:: bad match type: " << match_type_; - return 0; - } -} - - -// For any requested label that doesn't match at a state, this matcher -// considers the *unique* transition that matches the label 'phi_label' -// (phi = 'fail'), and recursively looks for a match at its -// destination. When 'phi_loop' is true, if no match is found but a -// phi self-loop is found, then the phi transition found is returned -// with the phi_label rewritten as the requested label (both sides if -// an acceptor, or if 'rewrite_both' is true and both input and output -// labels of the found transition are 'phi_label'). If 'phi_label' is -// kNoLabel, this special matching is not done. PhiMatcher is -// templated itself on a matcher, which is used to perform the -// underlying matching. By default, the underlying matcher is -// constructed by PhiMatcher. The user can instead pass in this -// object; in that case, PhiMatcher takes its ownership. -// Warning: phi non-determinism not supported (for simplicity). -template <class M> -class PhiMatcher : public MatcherBase<typename M::Arc> { - public: - typedef typename M::FST FST; - typedef typename M::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - PhiMatcher(const FST &fst, - MatchType match_type, - Label phi_label = kNoLabel, - bool phi_loop = true, - MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO, - M *matcher = 0) - : matcher_(matcher ? matcher : new M(fst, match_type)), - match_type_(match_type), - phi_label_(phi_label), - state_(kNoStateId), - phi_loop_(phi_loop), - error_(false) { - if (match_type == MATCH_BOTH) { - FSTERROR() << "PhiMatcher: bad match type"; - match_type_ = MATCH_NONE; - error_ = true; - } - - if (rewrite_mode == MATCHER_REWRITE_AUTO) - rewrite_both_ = fst.Properties(kAcceptor, true); - else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) - rewrite_both_ = true; - else - rewrite_both_ = false; - } - - PhiMatcher(const PhiMatcher<M> &matcher, bool safe = false) - : matcher_(new M(*matcher.matcher_, safe)), - match_type_(matcher.match_type_), - phi_label_(matcher.phi_label_), - rewrite_both_(matcher.rewrite_both_), - state_(kNoStateId), - phi_loop_(matcher.phi_loop_), - error_(matcher.error_) {} - - virtual ~PhiMatcher() { - delete matcher_; - } - - virtual PhiMatcher<M> *Copy(bool safe = false) const { - return new PhiMatcher<M>(*this, safe); - } - - virtual MatchType Type(bool test) const { return matcher_->Type(test); } - - void SetState(StateId s) { - matcher_->SetState(s); - state_ = s; - has_phi_ = phi_label_ != kNoLabel; - } - - bool Find(Label match_label); - - bool Done() const { return matcher_->Done(); } - - const Arc& Value() const { - if ((phi_match_ == kNoLabel) && (phi_weight_ == Weight::One())) { - return matcher_->Value(); - } else if (phi_match_ == 0) { // Virtual epsilon loop - phi_arc_ = Arc(kNoLabel, 0, Weight::One(), state_); - if (match_type_ == MATCH_OUTPUT) - swap(phi_arc_.ilabel, phi_arc_.olabel); - return phi_arc_; - } else { - phi_arc_ = matcher_->Value(); - phi_arc_.weight = Times(phi_weight_, phi_arc_.weight); - if (phi_match_ != kNoLabel) { // Phi loop match - if (rewrite_both_) { - if (phi_arc_.ilabel == phi_label_) - phi_arc_.ilabel = phi_match_; - if (phi_arc_.olabel == phi_label_) - phi_arc_.olabel = phi_match_; - } else if (match_type_ == MATCH_INPUT) { - phi_arc_.ilabel = phi_match_; - } else { - phi_arc_.olabel = phi_match_; - } - } - return phi_arc_; - } - } - - void Next() { matcher_->Next(); } - - virtual const FST &GetFst() const { return matcher_->GetFst(); } - - virtual uint64 Properties(uint64 props) const; - - virtual uint32 Flags() const { - if (phi_label_ == kNoLabel || match_type_ == MATCH_NONE) - return matcher_->Flags(); - return matcher_->Flags() | kRequireMatch; - } - -private: - virtual void SetState_(StateId s) { SetState(s); } - virtual bool Find_(Label label) { return Find(label); } - virtual bool Done_() const { return Done(); } - virtual const Arc& Value_() const { return Value(); } - virtual void Next_() { Next(); } - - M *matcher_; - MatchType match_type_; // Type of match requested - Label phi_label_; // Label that represents the phi transition - bool rewrite_both_; // Rewrite both sides when both are 'phi_label_' - bool has_phi_; // Are there possibly phis at the current state? - Label phi_match_; // Current label that matches phi loop - mutable Arc phi_arc_; // Arc to return - StateId state_; // State where looking for matches - Weight phi_weight_; // Product of the weights of phi transitions taken - bool phi_loop_; // When true, phi self-loop are allowed and treated - // as rho (required for Aho-Corasick) - bool error_; // Error encountered - - void operator=(const PhiMatcher<M> &); // disallow -}; - -template <class M> inline -bool PhiMatcher<M>::Find(Label match_label) { - if (match_label == phi_label_ && phi_label_ != kNoLabel && phi_label_ != 0) { - FSTERROR() << "PhiMatcher::Find: bad label (phi): " << phi_label_; - error_ = true; - return false; - } - matcher_->SetState(state_); - phi_match_ = kNoLabel; - phi_weight_ = Weight::One(); - if (phi_label_ == 0) { // When 'phi_label_ == 0', - if (match_label == kNoLabel) // there are no more true epsilon arcs, - return false; - if (match_label == 0) { // but virtual eps loop need to be returned - if (!matcher_->Find(kNoLabel)) { - return matcher_->Find(0); - } else { - phi_match_ = 0; - return true; - } - } - } - if (!has_phi_ || match_label == 0 || match_label == kNoLabel) - return matcher_->Find(match_label); - StateId state = state_; - while (!matcher_->Find(match_label)) { - // Look for phi transition (if phi_label_ == 0, we need to look - // for -1 to avoid getting the virtual self-loop) - if (!matcher_->Find(phi_label_ == 0 ? -1 : phi_label_)) - return false; - if (phi_loop_ && matcher_->Value().nextstate == state) { - phi_match_ = match_label; - return true; - } - phi_weight_ = Times(phi_weight_, matcher_->Value().weight); - state = matcher_->Value().nextstate; - matcher_->Next(); - if (!matcher_->Done()) { - FSTERROR() << "PhiMatcher: phi non-determinism not supported"; - error_ = true; - } - matcher_->SetState(state); - } - return true; -} - -template <class M> inline -uint64 PhiMatcher<M>::Properties(uint64 inprops) const { - uint64 outprops = matcher_->Properties(inprops); - if (error_) outprops |= kError; - - if (match_type_ == MATCH_NONE) { - return outprops; - } else if (match_type_ == MATCH_INPUT) { - if (phi_label_ == 0) { - outprops &= ~kEpsilons | ~kIEpsilons | ~kOEpsilons; - outprops |= kNoEpsilons | kNoIEpsilons; - } - if (rewrite_both_) { - return outprops & ~(kODeterministic | kNonODeterministic | kString | - kILabelSorted | kNotILabelSorted | - kOLabelSorted | kNotOLabelSorted); - } else { - return outprops & ~(kODeterministic | kAcceptor | kString | - kILabelSorted | kNotILabelSorted | - kOLabelSorted | kNotOLabelSorted); - } - } else if (match_type_ == MATCH_OUTPUT) { - if (phi_label_ == 0) { - outprops &= ~kEpsilons | ~kIEpsilons | ~kOEpsilons; - outprops |= kNoEpsilons | kNoOEpsilons; - } - if (rewrite_both_) { - return outprops & ~(kIDeterministic | kNonIDeterministic | kString | - kILabelSorted | kNotILabelSorted | - kOLabelSorted | kNotOLabelSorted); - } else { - return outprops & ~(kIDeterministic | kAcceptor | kString | - kILabelSorted | kNotILabelSorted | - kOLabelSorted | kNotOLabelSorted); - } - } else { - // Shouldn't ever get here. - FSTERROR() << "PhiMatcher:: bad match type: " << match_type_; - return 0; - } -} - - -// -// MULTI-EPS MATCHER FLAGS -// - -// Return multi-epsilon arcs for Find(kNoLabel). -const uint32 kMultiEpsList = 0x00000001; - -// Return a kNolabel loop for Find(multi_eps). -const uint32 kMultiEpsLoop = 0x00000002; - -// MultiEpsMatcher: allows treating multiple non-0 labels as -// non-consuming labels in addition to 0 that is always -// non-consuming. Precise behavior controlled by 'flags' argument. By -// default, the underlying matcher is constructed by -// MultiEpsMatcher. The user can instead pass in this object; in that -// case, MultiEpsMatcher takes its ownership iff 'own_matcher' is -// true. -template <class M> -class MultiEpsMatcher { - public: - typedef typename M::FST FST; - typedef typename M::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - MultiEpsMatcher(const FST &fst, MatchType match_type, - uint32 flags = (kMultiEpsLoop | kMultiEpsList), - M *matcher = 0, bool own_matcher = true) - : matcher_(matcher ? matcher : new M(fst, match_type)), - flags_(flags), - own_matcher_(matcher ? own_matcher : true) { - if (match_type == MATCH_INPUT) { - loop_.ilabel = kNoLabel; - loop_.olabel = 0; - } else { - loop_.ilabel = 0; - loop_.olabel = kNoLabel; - } - loop_.weight = Weight::One(); - loop_.nextstate = kNoStateId; - } - - MultiEpsMatcher(const MultiEpsMatcher<M> &matcher, bool safe = false) - : matcher_(new M(*matcher.matcher_, safe)), - flags_(matcher.flags_), - own_matcher_(true), - multi_eps_labels_(matcher.multi_eps_labels_), - loop_(matcher.loop_) { - loop_.nextstate = kNoStateId; - } - - ~MultiEpsMatcher() { - if (own_matcher_) - delete matcher_; - } - - MultiEpsMatcher<M> *Copy(bool safe = false) const { - return new MultiEpsMatcher<M>(*this, safe); - } - - MatchType Type(bool test) const { return matcher_->Type(test); } - - void SetState(StateId s) { - matcher_->SetState(s); - loop_.nextstate = s; - } - - bool Find(Label match_label); - - bool Done() const { - return done_; - } - - const Arc& Value() const { - return current_loop_ ? loop_ : matcher_->Value(); - } - - void Next() { - if (!current_loop_) { - matcher_->Next(); - done_ = matcher_->Done(); - if (done_ && multi_eps_iter_ != multi_eps_labels_.End()) { - ++multi_eps_iter_; - while ((multi_eps_iter_ != multi_eps_labels_.End()) && - !matcher_->Find(*multi_eps_iter_)) - ++multi_eps_iter_; - if (multi_eps_iter_ != multi_eps_labels_.End()) - done_ = false; - else - done_ = !matcher_->Find(kNoLabel); - - } - } else { - done_ = true; - } - } - - const FST &GetFst() const { return matcher_->GetFst(); } - - uint64 Properties(uint64 props) const { return matcher_->Properties(props); } - - uint32 Flags() const { return matcher_->Flags(); } - - void AddMultiEpsLabel(Label label) { - if (label == 0) { - FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0"; - } else { - multi_eps_labels_.Insert(label); - } - } - - void RemoveMultiEpsLabel(Label label) { - if (label == 0) { - FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0"; - } else { - multi_eps_labels_.Erase(label); - } - } - - void ClearMultiEpsLabels() { - multi_eps_labels_.Clear(); - } - -private: - M *matcher_; - uint32 flags_; - bool own_matcher_; // Does this class delete the matcher? - - // Multi-eps label set - CompactSet<Label, kNoLabel> multi_eps_labels_; - typename CompactSet<Label, kNoLabel>::const_iterator multi_eps_iter_; - - bool current_loop_; // Current arc is the implicit loop - mutable Arc loop_; // For non-consuming symbols - bool done_; // Matching done - - void operator=(const MultiEpsMatcher<M> &); // Disallow -}; - -template <class M> inline -bool MultiEpsMatcher<M>::Find(Label match_label) { - multi_eps_iter_ = multi_eps_labels_.End(); - current_loop_ = false; - bool ret; - if (match_label == 0) { - ret = matcher_->Find(0); - } else if (match_label == kNoLabel) { - if (flags_ & kMultiEpsList) { - // return all non-consuming arcs (incl. epsilon) - multi_eps_iter_ = multi_eps_labels_.Begin(); - while ((multi_eps_iter_ != multi_eps_labels_.End()) && - !matcher_->Find(*multi_eps_iter_)) - ++multi_eps_iter_; - if (multi_eps_iter_ != multi_eps_labels_.End()) - ret = true; - else - ret = matcher_->Find(kNoLabel); - } else { - // return all epsilon arcs - ret = matcher_->Find(kNoLabel); - } - } else if ((flags_ & kMultiEpsLoop) && - multi_eps_labels_.Find(match_label) != multi_eps_labels_.End()) { - // return 'implicit' loop - current_loop_ = true; - ret = true; - } else { - ret = matcher_->Find(match_label); - } - done_ = !ret; - return ret; -} - - -// Generic matcher, templated on the FST definition -// - a wrapper around pointer to specific one. -// Here is a typical use: \code -// Matcher<StdFst> matcher(fst, MATCH_INPUT); -// matcher.SetState(state); -// if (matcher.Find(label)) -// for (; !matcher.Done(); matcher.Next()) { -// StdArc &arc = matcher.Value(); -// ... -// } \endcode -template <class F> -class Matcher { - public: - typedef F FST; - typedef typename F::Arc Arc; - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - Matcher(const F &fst, MatchType match_type) { - base_ = fst.InitMatcher(match_type); - if (!base_) - base_ = new SortedMatcher<F>(fst, match_type); - } - - Matcher(const Matcher<F> &matcher, bool safe = false) { - base_ = matcher.base_->Copy(safe); - } - - // Takes ownership of the provided matcher - Matcher(MatcherBase<Arc>* base_matcher) { base_ = base_matcher; } - - ~Matcher() { delete base_; } - - Matcher<F> *Copy(bool safe = false) const { - return new Matcher<F>(*this, safe); - } - - MatchType Type(bool test) const { return base_->Type(test); } - void SetState(StateId s) { base_->SetState(s); } - bool Find(Label label) { return base_->Find(label); } - bool Done() const { return base_->Done(); } - const Arc& Value() const { return base_->Value(); } - void Next() { base_->Next(); } - const F &GetFst() const { return static_cast<const F &>(base_->GetFst()); } - uint64 Properties(uint64 props) const { return base_->Properties(props); } - uint32 Flags() const { return base_->Flags() & kMatcherFlags; } - - private: - MatcherBase<Arc> *base_; - - void operator=(const Matcher<Arc> &); // disallow -}; - -} // namespace fst - - - -#endif // FST_LIB_MATCHER_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/minimize.h b/kaldi_io/src/tools/openfst/include/fst/minimize.h deleted file mode 100644 index 6e9dd3d..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/minimize.h +++ /dev/null @@ -1,591 +0,0 @@ -// minimize.h -// minimize.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Johan Schalkwyk) -// -// \file Functions and classes to minimize a finite state acceptor -// - -#ifndef FST_LIB_MINIMIZE_H__ -#define FST_LIB_MINIMIZE_H__ - -#include <cmath> - -#include <algorithm> -#include <map> -#include <queue> -#include <vector> -using std::vector; - -#include <fst/arcsort.h> -#include <fst/connect.h> -#include <fst/dfs-visit.h> -#include <fst/encode.h> -#include <fst/factor-weight.h> -#include <fst/fst.h> -#include <fst/mutable-fst.h> -#include <fst/partition.h> -#include <fst/push.h> -#include <fst/queue.h> -#include <fst/reverse.h> -#include <fst/state-map.h> - - -namespace fst { - -// comparator for creating partition based on sorting on -// - states -// - final weight -// - out degree, -// - (input label, output label, weight, destination_block) -template <class A> -class StateComparator { - public: - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - static const uint32 kCompareFinal = 0x00000001; - static const uint32 kCompareOutDegree = 0x00000002; - static const uint32 kCompareArcs = 0x00000004; - static const uint32 kCompareAll = 0x00000007; - - StateComparator(const Fst<A>& fst, - const Partition<typename A::StateId>& partition, - uint32 flags = kCompareAll) - : fst_(fst), partition_(partition), flags_(flags) {} - - // compare state x with state y based on sort criteria - bool operator()(const StateId x, const StateId y) const { - // check for final state equivalence - if (flags_ & kCompareFinal) { - const size_t xfinal = fst_.Final(x).Hash(); - const size_t yfinal = fst_.Final(y).Hash(); - if (xfinal < yfinal) return true; - else if (xfinal > yfinal) return false; - } - - if (flags_ & kCompareOutDegree) { - // check for # arcs - if (fst_.NumArcs(x) < fst_.NumArcs(y)) return true; - if (fst_.NumArcs(x) > fst_.NumArcs(y)) return false; - - if (flags_ & kCompareArcs) { - // # arcs are equal, check for arc match - for (ArcIterator<Fst<A> > aiter1(fst_, x), aiter2(fst_, y); - !aiter1.Done() && !aiter2.Done(); aiter1.Next(), aiter2.Next()) { - const A& arc1 = aiter1.Value(); - const A& arc2 = aiter2.Value(); - if (arc1.ilabel < arc2.ilabel) return true; - if (arc1.ilabel > arc2.ilabel) return false; - - if (partition_.class_id(arc1.nextstate) < - partition_.class_id(arc2.nextstate)) return true; - if (partition_.class_id(arc1.nextstate) > - partition_.class_id(arc2.nextstate)) return false; - } - } - } - - return false; - } - - private: - const Fst<A>& fst_; - const Partition<typename A::StateId>& partition_; - const uint32 flags_; -}; - -template <class A> const uint32 StateComparator<A>::kCompareFinal; -template <class A> const uint32 StateComparator<A>::kCompareOutDegree; -template <class A> const uint32 StateComparator<A>::kCompareArcs; -template <class A> const uint32 StateComparator<A>::kCompareAll; - - -// Computes equivalence classes for cyclic Fsts. For cyclic minimization -// we use the classic HopCroft minimization algorithm, which is of -// -// O(E)log(N), -// -// where E is the number of edges in the machine and N is number of states. -// -// The following paper describes the original algorithm -// An N Log N algorithm for minimizing states in a finite automaton -// by John HopCroft, January 1971 -// -template <class A, class Queue> -class CyclicMinimizer { - public: - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::StateId ClassId; - typedef typename A::Weight Weight; - typedef ReverseArc<A> RevA; - - CyclicMinimizer(const ExpandedFst<A>& fst): - // tell the Partition data-member to expect multiple repeated - // calls to SplitOn with the same element if we are non-deterministic. - P_(fst.Properties(kIDeterministic, true) == 0) { - if(fst.Properties(kIDeterministic, true) == 0) - CHECK(Weight::Properties() & kIdempotent); // this minimization - // algorithm for non-deterministic FSTs can only work with idempotent - // semirings. - Initialize(fst); - Compute(fst); - } - - ~CyclicMinimizer() { - delete aiter_queue_; - } - - const Partition<StateId>& partition() const { - return P_; - } - - // helper classes - private: - typedef ArcIterator<Fst<RevA> > ArcIter; - class ArcIterCompare { - public: - ArcIterCompare(const Partition<StateId>& partition) - : partition_(partition) {} - - ArcIterCompare(const ArcIterCompare& comp) - : partition_(comp.partition_) {} - - // compare two iterators based on there input labels, and proto state - // (partition class Ids) - bool operator()(const ArcIter* x, const ArcIter* y) const { - const RevA& xarc = x->Value(); - const RevA& yarc = y->Value(); - return (xarc.ilabel > yarc.ilabel); - } - - private: - const Partition<StateId>& partition_; - }; - - typedef priority_queue<ArcIter*, vector<ArcIter*>, ArcIterCompare> - ArcIterQueue; - - // helper methods - private: - // prepartitions the space into equivalence classes with - // same final weight - // same # arcs per state - // same outgoing arcs - void PrePartition(const Fst<A>& fst) { - VLOG(5) << "PrePartition"; - - typedef map<StateId, StateId, StateComparator<A> > EquivalenceMap; - StateComparator<A> comp(fst, P_, StateComparator<A>::kCompareFinal); - EquivalenceMap equiv_map(comp); - - StateIterator<Fst<A> > siter(fst); - StateId class_id = P_.AddClass(); - P_.Add(siter.Value(), class_id); - equiv_map[siter.Value()] = class_id; - L_.Enqueue(class_id); - for (siter.Next(); !siter.Done(); siter.Next()) { - StateId s = siter.Value(); - typename EquivalenceMap::const_iterator it = equiv_map.find(s); - if (it == equiv_map.end()) { - class_id = P_.AddClass(); - P_.Add(s, class_id); - equiv_map[s] = class_id; - L_.Enqueue(class_id); - } else { - P_.Add(s, it->second); - equiv_map[s] = it->second; - } - } - - VLOG(5) << "Initial Partition: " << P_.num_classes(); - } - - // - Create inverse transition Tr_ = rev(fst) - // - loop over states in fst and split on final, creating two blocks - // in the partition corresponding to final, non-final - void Initialize(const Fst<A>& fst) { - // construct Tr - Reverse(fst, &Tr_); - ILabelCompare<RevA> ilabel_comp; - ArcSort(&Tr_, ilabel_comp); - - // initial split (F, S - F) - P_.Initialize(Tr_.NumStates() - 1); - - // prep partition - PrePartition(fst); - - // allocate arc iterator queue - ArcIterCompare comp(P_); - aiter_queue_ = new ArcIterQueue(comp); - } - - // partition all classes with destination C - void Split(ClassId C) { - // Prep priority queue. Open arc iterator for each state in C, and - // insert into priority queue. - for (PartitionIterator<StateId> siter(P_, C); - !siter.Done(); siter.Next()) { - StateId s = siter.Value(); - if (Tr_.NumArcs(s + 1)) - aiter_queue_->push(new ArcIterator<Fst<RevA> >(Tr_, s + 1)); - } - - // Now pop arc iterator from queue, split entering equivalence class - // re-insert updated iterator into queue. - Label prev_label = -1; - while (!aiter_queue_->empty()) { - ArcIterator<Fst<RevA> >* aiter = aiter_queue_->top(); - aiter_queue_->pop(); - if (aiter->Done()) { - delete aiter; - continue; - } - - const RevA& arc = aiter->Value(); - StateId from_state = aiter->Value().nextstate - 1; - Label from_label = arc.ilabel; - if (prev_label != from_label) - P_.FinalizeSplit(&L_); - - StateId from_class = P_.class_id(from_state); - if (P_.class_size(from_class) > 1) - P_.SplitOn(from_state); - - prev_label = from_label; - aiter->Next(); - if (aiter->Done()) - delete aiter; - else - aiter_queue_->push(aiter); - } - P_.FinalizeSplit(&L_); - } - - // Main loop for hopcroft minimization. - void Compute(const Fst<A>& fst) { - // process active classes (FIFO, or FILO) - while (!L_.Empty()) { - ClassId C = L_.Head(); - L_.Dequeue(); - - // split on C, all labels in C - Split(C); - } - } - - // helper data - private: - // Partioning of states into equivalence classes - Partition<StateId> P_; - - // L = set of active classes to be processed in partition P - Queue L_; - - // reverse transition function - VectorFst<RevA> Tr_; - - // Priority queue of open arc iterators for all states in the 'splitter' - // equivalence class - ArcIterQueue* aiter_queue_; -}; - - -// Computes equivalence classes for acyclic Fsts. The implementation details -// for this algorithms is documented by the following paper. -// -// Minimization of acyclic deterministic automata in linear time -// Dominque Revuz -// -// Complexity O(|E|) -// -template <class A> -class AcyclicMinimizer { - public: - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::StateId ClassId; - typedef typename A::Weight Weight; - - AcyclicMinimizer(const ExpandedFst<A>& fst): - // tell the Partition data-member to expect multiple repeated - // calls to SplitOn with the same element if we are non-deterministic. - partition_(fst.Properties(kIDeterministic, true) == 0) { - if(fst.Properties(kIDeterministic, true) == 0) - CHECK(Weight::Properties() & kIdempotent); // minimization for - // non-deterministic FSTs can only work with idempotent semirings. - Initialize(fst); - Refine(fst); - } - - const Partition<StateId>& partition() { - return partition_; - } - - // helper classes - private: - // DFS visitor to compute the height (distance) to final state. - class HeightVisitor { - public: - HeightVisitor() : max_height_(0), num_states_(0) { } - - // invoked before dfs visit - void InitVisit(const Fst<A>& fst) {} - - // invoked when state is discovered (2nd arg is DFS tree root) - bool InitState(StateId s, StateId root) { - // extend height array and initialize height (distance) to 0 - for (size_t i = height_.size(); i <= s; ++i) - height_.push_back(-1); - - if (s >= num_states_) num_states_ = s + 1; - return true; - } - - // invoked when tree arc examined (to undiscoverted state) - bool TreeArc(StateId s, const A& arc) { - return true; - } - - // invoked when back arc examined (to unfinished state) - bool BackArc(StateId s, const A& arc) { - return true; - } - - // invoked when forward or cross arc examined (to finished state) - bool ForwardOrCrossArc(StateId s, const A& arc) { - if (height_[arc.nextstate] + 1 > height_[s]) - height_[s] = height_[arc.nextstate] + 1; - return true; - } - - // invoked when state finished (parent is kNoStateId for tree root) - void FinishState(StateId s, StateId parent, const A* parent_arc) { - if (height_[s] == -1) height_[s] = 0; - StateId h = height_[s] + 1; - if (parent >= 0) { - if (h > height_[parent]) height_[parent] = h; - if (h > max_height_) max_height_ = h; - } - } - - // invoked after DFS visit - void FinishVisit() {} - - size_t max_height() const { return max_height_; } - - const vector<StateId>& height() const { return height_; } - - const size_t num_states() const { return num_states_; } - - private: - vector<StateId> height_; - size_t max_height_; - size_t num_states_; - }; - - // helper methods - private: - // cluster states according to height (distance to final state) - void Initialize(const Fst<A>& fst) { - // compute height (distance to final state) - HeightVisitor hvisitor; - DfsVisit(fst, &hvisitor); - - // create initial partition based on height - partition_.Initialize(hvisitor.num_states()); - partition_.AllocateClasses(hvisitor.max_height() + 1); - const vector<StateId>& hstates = hvisitor.height(); - for (size_t s = 0; s < hstates.size(); ++s) - partition_.Add(s, hstates[s]); - } - - // refine states based on arc sort (out degree, arc equivalence) - void Refine(const Fst<A>& fst) { - typedef map<StateId, StateId, StateComparator<A> > EquivalenceMap; - StateComparator<A> comp(fst, partition_); - - // start with tail (height = 0) - size_t height = partition_.num_classes(); - for (size_t h = 0; h < height; ++h) { - EquivalenceMap equiv_classes(comp); - - // sort states within equivalence class - PartitionIterator<StateId> siter(partition_, h); - equiv_classes[siter.Value()] = h; - for (siter.Next(); !siter.Done(); siter.Next()) { - const StateId s = siter.Value(); - typename EquivalenceMap::const_iterator it = equiv_classes.find(s); - if (it == equiv_classes.end()) - equiv_classes[s] = partition_.AddClass(); - else - equiv_classes[s] = it->second; - } - - // create refined partition - for (siter.Reset(); !siter.Done();) { - const StateId s = siter.Value(); - const StateId old_class = partition_.class_id(s); - const StateId new_class = equiv_classes[s]; - - // a move operation can invalidate the iterator, so - // we first update the iterator to the next element - // before we move the current element out of the list - siter.Next(); - if (old_class != new_class) - partition_.Move(s, new_class); - } - } - } - - private: - Partition<StateId> partition_; -}; - - -// Given a partition and a mutable fst, merge states of Fst inplace -// (i.e. destructively). Merging works by taking the first state in -// a class of the partition to be the representative state for the class. -// Each arc is then reconnected to this state. All states in the class -// are merged by adding there arcs to the representative state. -template <class A> -void MergeStates( - const Partition<typename A::StateId>& partition, MutableFst<A>* fst) { - typedef typename A::StateId StateId; - - vector<StateId> state_map(partition.num_classes()); - for (size_t i = 0; i < partition.num_classes(); ++i) { - PartitionIterator<StateId> siter(partition, i); - state_map[i] = siter.Value(); // first state in partition; - } - - // relabel destination states - for (size_t c = 0; c < partition.num_classes(); ++c) { - for (PartitionIterator<StateId> siter(partition, c); - !siter.Done(); siter.Next()) { - StateId s = siter.Value(); - for (MutableArcIterator<MutableFst<A> > aiter(fst, s); - !aiter.Done(); aiter.Next()) { - A arc = aiter.Value(); - arc.nextstate = state_map[partition.class_id(arc.nextstate)]; - - if (s == state_map[c]) // first state just set destination - aiter.SetValue(arc); - else - fst->AddArc(state_map[c], arc); - } - } - } - fst->SetStart(state_map[partition.class_id(fst->Start())]); - - Connect(fst); -} - -template <class A> -void AcceptorMinimize(MutableFst<A>* fst) { - typedef typename A::StateId StateId; - if (!(fst->Properties(kAcceptor | kUnweighted, true))) { - FSTERROR() << "FST is not an unweighted acceptor"; - fst->SetProperties(kError, kError); - return; - } - - // connect fst before minimization, handles disconnected states - Connect(fst); - if (fst->NumStates() == 0) return; - - if (fst->Properties(kAcyclic, true)) { - // Acyclic minimization (revuz) - VLOG(2) << "Acyclic Minimization"; - ArcSort(fst, ILabelCompare<A>()); - AcyclicMinimizer<A> minimizer(*fst); - MergeStates(minimizer.partition(), fst); - - } else { - // Cyclic minimizaton (hopcroft) - VLOG(2) << "Cyclic Minimization"; - CyclicMinimizer<A, LifoQueue<StateId> > minimizer(*fst); - MergeStates(minimizer.partition(), fst); - } - - // Merge in appropriate semiring - ArcUniqueMapper<A> mapper(*fst); - StateMap(fst, mapper); -} - - -// In place minimization of deterministic weighted automata and transducers. -// For transducers, then the 'sfst' argument is not null, the algorithm -// produces a compact factorization of the minimal transducer. -// -// In the acyclic case, we use an algorithm from Dominique Revuz that -// is linear in the number of arcs (edges) in the machine. -// Complexity = O(E) -// -// In the cyclic case, we use the classical hopcroft minimization. -// Complexity = O(|E|log(|N|) -// -template <class A> -void Minimize(MutableFst<A>* fst, - MutableFst<A>* sfst = 0, - float delta = kDelta) { - uint64 props = fst->Properties(kAcceptor | kWeighted | kUnweighted, true); - - if (!(props & kAcceptor)) { // weighted transducer - VectorFst< GallicArc<A, STRING_LEFT> > gfst; - ArcMap(*fst, &gfst, ToGallicMapper<A, STRING_LEFT>()); - fst->DeleteStates(); - gfst.SetProperties(kAcceptor, kAcceptor); - Push(&gfst, REWEIGHT_TO_INITIAL, delta); - ArcMap(&gfst, QuantizeMapper< GallicArc<A, STRING_LEFT> >(delta)); - EncodeMapper< GallicArc<A, STRING_LEFT> > - encoder(kEncodeLabels | kEncodeWeights, ENCODE); - Encode(&gfst, &encoder); - AcceptorMinimize(&gfst); - Decode(&gfst, encoder); - - if (sfst == 0) { - FactorWeightFst< GallicArc<A, STRING_LEFT>, - GallicFactor<typename A::Label, - typename A::Weight, STRING_LEFT> > fwfst(gfst); - SymbolTable *osyms = fst->OutputSymbols() ? - fst->OutputSymbols()->Copy() : 0; - ArcMap(fwfst, fst, FromGallicMapper<A, STRING_LEFT>()); - fst->SetOutputSymbols(osyms); - delete osyms; - } else { - sfst->SetOutputSymbols(fst->OutputSymbols()); - GallicToNewSymbolsMapper<A, STRING_LEFT> mapper(sfst); - ArcMap(gfst, fst, &mapper); - fst->SetOutputSymbols(sfst->InputSymbols()); - } - } else if (props & kWeighted) { // weighted acceptor - Push(fst, REWEIGHT_TO_INITIAL, delta); - ArcMap(fst, QuantizeMapper<A>(delta)); - EncodeMapper<A> encoder(kEncodeLabels | kEncodeWeights, ENCODE); - Encode(fst, &encoder); - AcceptorMinimize(fst); - Decode(fst, encoder); - } else { // unweighted acceptor - AcceptorMinimize(fst); - } -} - -} // namespace fst - -#endif // FST_LIB_MINIMIZE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/mutable-fst.h b/kaldi_io/src/tools/openfst/include/fst/mutable-fst.h deleted file mode 100644 index 09eb237..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/mutable-fst.h +++ /dev/null @@ -1,378 +0,0 @@ -// mutable-fst.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Expanded FST augmented with mutators - interface class definition -// and mutable arc iterator interface. -// - -#ifndef FST_LIB_MUTABLE_FST_H__ -#define FST_LIB_MUTABLE_FST_H__ - -#include <stddef.h> -#include <sys/types.h> -#include <string> -#include <vector> -using std::vector; - -#include <fst/expanded-fst.h> - - -namespace fst { - -template <class A> class MutableArcIteratorData; - -// An expanded FST plus mutators (use MutableArcIterator to modify arcs). -template <class A> -class MutableFst : public ExpandedFst<A> { - public: - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - virtual MutableFst<A> &operator=(const Fst<A> &fst) = 0; - - MutableFst<A> &operator=(const MutableFst<A> &fst) { - return operator=(static_cast<const Fst<A> &>(fst)); - } - - virtual void SetStart(StateId) = 0; // Set the initial state - virtual void SetFinal(StateId, Weight) = 0; // Set a state's final weight - virtual void SetProperties(uint64 props, - uint64 mask) = 0; // Set property bits wrt mask - - virtual StateId AddState() = 0; // Add a state, return its ID - virtual void AddArc(StateId, const A &arc) = 0; // Add an arc to state - - virtual void DeleteStates(const vector<StateId>&) = 0; // Delete some states - virtual void DeleteStates() = 0; // Delete all states - virtual void DeleteArcs(StateId, size_t n) = 0; // Delete some arcs at state - virtual void DeleteArcs(StateId) = 0; // Delete all arcs at state - - virtual void ReserveStates(StateId n) { } // Optional, best effort only. - virtual void ReserveArcs(StateId s, size_t n) { } // Optional, Best effort. - - // Return input label symbol table; return NULL if not specified - virtual const SymbolTable* InputSymbols() const = 0; - // Return output label symbol table; return NULL if not specified - virtual const SymbolTable* OutputSymbols() const = 0; - - // Return input label symbol table; return NULL if not specified - virtual SymbolTable* MutableInputSymbols() = 0; - // Return output label symbol table; return NULL if not specified - virtual SymbolTable* MutableOutputSymbols() = 0; - - // Set input label symbol table; NULL signifies not unspecified - virtual void SetInputSymbols(const SymbolTable* isyms) = 0; - // Set output label symbol table; NULL signifies not unspecified - virtual void SetOutputSymbols(const SymbolTable* osyms) = 0; - - // Get a copy of this MutableFst. See Fst<>::Copy() for further doc. - virtual MutableFst<A> *Copy(bool safe = false) const = 0; - - // Read an MutableFst from an input stream; return NULL on error. - static MutableFst<A> *Read(istream &strm, const FstReadOptions &opts) { - FstReadOptions ropts(opts); - FstHeader hdr; - if (ropts.header) - hdr = *opts.header; - else { - if (!hdr.Read(strm, opts.source)) - return 0; - ropts.header = &hdr; - } - if (!(hdr.Properties() & kMutable)) { - LOG(ERROR) << "MutableFst::Read: Not an MutableFst: " << ropts.source; - return 0; - } - FstRegister<A> *registr = FstRegister<A>::GetRegister(); - const typename FstRegister<A>::Reader reader = - registr->GetReader(hdr.FstType()); - if (!reader) { - LOG(ERROR) << "MutableFst::Read: Unknown FST type \"" << hdr.FstType() - << "\" (arc type = \"" << A::Type() - << "\"): " << ropts.source; - return 0; - } - Fst<A> *fst = reader(strm, ropts); - if (!fst) return 0; - return static_cast<MutableFst<A> *>(fst); - } - - // Read a MutableFst from a file; return NULL on error. - // Empty filename reads from standard input. If 'convert' is true, - // convert to a mutable FST of type 'convert_type' if file is - // a non-mutable FST. - static MutableFst<A> *Read(const string &filename, bool convert = false, - const string &convert_type = "vector") { - if (convert == false) { - if (!filename.empty()) { - ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); - if (!strm) { - LOG(ERROR) << "MutableFst::Read: Can't open file: " << filename; - return 0; - } - return Read(strm, FstReadOptions(filename)); - } else { - return Read(cin, FstReadOptions("standard input")); - } - } else { // Converts to 'convert_type' if not mutable. - Fst<A> *ifst = Fst<A>::Read(filename); - if (!ifst) return 0; - if (ifst->Properties(kMutable, false)) { - return static_cast<MutableFst *>(ifst); - } else { - Fst<A> *ofst = Convert(*ifst, convert_type); - delete ifst; - if (!ofst) return 0; - if (!ofst->Properties(kMutable, false)) - LOG(ERROR) << "MutableFst: bad convert type: " << convert_type; - return static_cast<MutableFst *>(ofst); - } - } - } - - // For generic mutuble arc iterator construction; not normally called - // directly by users. - virtual void InitMutableArcIterator(StateId s, - MutableArcIteratorData<A> *) = 0; -}; - -// Mutable arc iterator interface, templated on the Arc definition; used -// for mutable Arc iterator specializations that are returned by -// the InitMutableArcIterator MutableFst method. -template <class A> -class MutableArcIteratorBase : public ArcIteratorBase<A> { - public: - typedef A Arc; - - void SetValue(const A &arc) { SetValue_(arc); } // Set current arc's content - - private: - virtual void SetValue_(const A &arc) = 0; -}; - -template <class A> -struct MutableArcIteratorData { - MutableArcIteratorBase<A> *base; // Specific iterator -}; - -// Generic mutable arc iterator, templated on the FST definition -// - a wrapper around pointer to specific one. -// Here is a typical use: \code -// for (MutableArcIterator<StdFst> aiter(&fst, s)); -// !aiter.Done(); -// aiter.Next()) { -// StdArc arc = aiter.Value(); -// arc.ilabel = 7; -// aiter.SetValue(arc); -// ... -// } \endcode -// This version requires function calls. -template <class F> -class MutableArcIterator { - public: - typedef F FST; - typedef typename F::Arc Arc; - typedef typename Arc::StateId StateId; - - MutableArcIterator(F *fst, StateId s) { - fst->InitMutableArcIterator(s, &data_); - } - ~MutableArcIterator() { delete data_.base; } - - bool Done() const { return data_.base->Done(); } - const Arc& Value() const { return data_.base->Value(); } - void Next() { data_.base->Next(); } - size_t Position() const { return data_.base->Position(); } - void Reset() { data_.base->Reset(); } - void Seek(size_t a) { data_.base->Seek(a); } - void SetValue(const Arc &a) { data_.base->SetValue(a); } - uint32 Flags() const { return data_.base->Flags(); } - void SetFlags(uint32 f, uint32 m) { - return data_.base->SetFlags(f, m); - } - - private: - MutableArcIteratorData<Arc> data_; - DISALLOW_COPY_AND_ASSIGN(MutableArcIterator); -}; - - -namespace internal { - -// MutableFst<A> case - abstract methods. -template <class A> inline -typename A::Weight Final(const MutableFst<A> &fst, typename A::StateId s) { - return fst.Final(s); -} - -template <class A> inline -ssize_t NumArcs(const MutableFst<A> &fst, typename A::StateId s) { - return fst.NumArcs(s); -} - -template <class A> inline -ssize_t NumInputEpsilons(const MutableFst<A> &fst, typename A::StateId s) { - return fst.NumInputEpsilons(s); -} - -template <class A> inline -ssize_t NumOutputEpsilons(const MutableFst<A> &fst, typename A::StateId s) { - return fst.NumOutputEpsilons(s); -} - -} // namespace internal - - -// A useful alias when using StdArc. -typedef MutableFst<StdArc> StdMutableFst; - - -// This is a helper class template useful for attaching a MutableFst -// interface to its implementation, handling reference counting and -// copy-on-write. -template <class I, class F = MutableFst<typename I::Arc> > -class ImplToMutableFst : public ImplToExpandedFst<I, F> { - public: - typedef typename I::Arc Arc; - typedef typename Arc::Weight Weight; - typedef typename Arc::StateId StateId; - - using ImplToFst<I, F>::GetImpl; - using ImplToFst<I, F>::SetImpl; - - virtual void SetStart(StateId s) { - MutateCheck(); - GetImpl()->SetStart(s); - } - - virtual void SetFinal(StateId s, Weight w) { - MutateCheck(); - GetImpl()->SetFinal(s, w); - } - - virtual void SetProperties(uint64 props, uint64 mask) { - // Can skip mutate check if extrinsic properties don't change, - // since it is then safe to update all (shallow) copies - uint64 exprops = kExtrinsicProperties & mask; - if (GetImpl()->Properties(exprops) != (props & exprops)) - MutateCheck(); - GetImpl()->SetProperties(props, mask); - } - - virtual StateId AddState() { - MutateCheck(); - return GetImpl()->AddState(); - } - - virtual void AddArc(StateId s, const Arc &arc) { - MutateCheck(); - GetImpl()->AddArc(s, arc); - } - - virtual void DeleteStates(const vector<StateId> &dstates) { - MutateCheck(); - GetImpl()->DeleteStates(dstates); - } - - virtual void DeleteStates() { - MutateCheck(); - GetImpl()->DeleteStates(); - } - - virtual void DeleteArcs(StateId s, size_t n) { - MutateCheck(); - GetImpl()->DeleteArcs(s, n); - } - - virtual void DeleteArcs(StateId s) { - MutateCheck(); - GetImpl()->DeleteArcs(s); - } - - virtual void ReserveStates(StateId s) { - MutateCheck(); - GetImpl()->ReserveStates(s); - } - - virtual void ReserveArcs(StateId s, size_t n) { - MutateCheck(); - GetImpl()->ReserveArcs(s, n); - } - - virtual const SymbolTable* InputSymbols() const { - return GetImpl()->InputSymbols(); - } - - virtual const SymbolTable* OutputSymbols() const { - return GetImpl()->OutputSymbols(); - } - - virtual SymbolTable* MutableInputSymbols() { - MutateCheck(); - return GetImpl()->InputSymbols(); - } - - virtual SymbolTable* MutableOutputSymbols() { - MutateCheck(); - return GetImpl()->OutputSymbols(); - } - - virtual void SetInputSymbols(const SymbolTable* isyms) { - MutateCheck(); - GetImpl()->SetInputSymbols(isyms); - } - - virtual void SetOutputSymbols(const SymbolTable* osyms) { - MutateCheck(); - GetImpl()->SetOutputSymbols(osyms); - } - - protected: - ImplToMutableFst() : ImplToExpandedFst<I, F>() {} - - ImplToMutableFst(I *impl) : ImplToExpandedFst<I, F>(impl) {} - - - ImplToMutableFst(const ImplToMutableFst<I, F> &fst) - : ImplToExpandedFst<I, F>(fst) {} - - ImplToMutableFst(const ImplToMutableFst<I, F> &fst, bool safe) - : ImplToExpandedFst<I, F>(fst, safe) {} - - void MutateCheck() { - // Copy on write - if (GetImpl()->RefCount() > 1) - SetImpl(new I(*this)); - } - - private: - // Disallow - ImplToMutableFst<I, F> &operator=(const ImplToMutableFst<I, F> &fst); - - ImplToMutableFst<I, F> &operator=(const Fst<Arc> &fst) { - FSTERROR() << "ImplToMutableFst: Assignment operator disallowed"; - GetImpl()->SetProperties(kError, kError); - return *this; - } -}; - - -} // namespace fst - -#endif // FST_LIB_MUTABLE_FST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/pair-weight.h b/kaldi_io/src/tools/openfst/include/fst/pair-weight.h deleted file mode 100644 index 7d8aa11..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/pair-weight.h +++ /dev/null @@ -1,280 +0,0 @@ -// pair-weight.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Masha Maria Shugrina) -// -// \file -// Pair weight templated base class for weight classes that -// contain two weights (e.g. Product, Lexicographic) - -#ifndef FST_LIB_PAIR_WEIGHT_H_ -#define FST_LIB_PAIR_WEIGHT_H_ - -#include <climits> -#include <stack> -#include <string> - -#include <fst/weight.h> - - -DECLARE_string(fst_weight_parentheses); -DECLARE_string(fst_weight_separator); - -namespace fst { - -template<class W1, class W2> class PairWeight; -template <class W1, class W2> -istream &operator>>(istream &strm, PairWeight<W1, W2> &w); - -template<class W1, class W2> -class PairWeight { - public: - friend istream &operator>><W1, W2>(istream&, PairWeight<W1, W2>&); - - typedef PairWeight<typename W1::ReverseWeight, - typename W2::ReverseWeight> - ReverseWeight; - - PairWeight() {} - - PairWeight(const PairWeight& w) : value1_(w.value1_), value2_(w.value2_) {} - - PairWeight(W1 w1, W2 w2) : value1_(w1), value2_(w2) {} - - static const PairWeight<W1, W2> &Zero() { - static const PairWeight<W1, W2> zero(W1::Zero(), W2::Zero()); - return zero; - } - - static const PairWeight<W1, W2> &One() { - static const PairWeight<W1, W2> one(W1::One(), W2::One()); - return one; - } - - static const PairWeight<W1, W2> &NoWeight() { - static const PairWeight<W1, W2> no_weight(W1::NoWeight(), W2::NoWeight()); - return no_weight; - } - - istream &Read(istream &strm) { - value1_.Read(strm); - return value2_.Read(strm); - } - - ostream &Write(ostream &strm) const { - value1_.Write(strm); - return value2_.Write(strm); - } - - PairWeight<W1, W2> &operator=(const PairWeight<W1, W2> &w) { - value1_ = w.Value1(); - value2_ = w.Value2(); - return *this; - } - - bool Member() const { return value1_.Member() && value2_.Member(); } - - size_t Hash() const { - size_t h1 = value1_.Hash(); - size_t h2 = value2_.Hash(); - const int lshift = 5; - const int rshift = CHAR_BIT * sizeof(size_t) - 5; - return h1 << lshift ^ h1 >> rshift ^ h2; - } - - PairWeight<W1, W2> Quantize(float delta = kDelta) const { - return PairWeight<W1, W2>(value1_.Quantize(delta), - value2_.Quantize(delta)); - } - - ReverseWeight Reverse() const { - return ReverseWeight(value1_.Reverse(), value2_.Reverse()); - } - - const W1& Value1() const { return value1_; } - - const W2& Value2() const { return value2_; } - - protected: - void SetValue1(const W1 &w) { value1_ = w; } - void SetValue2(const W2 &w) { value2_ = w; } - - // Reads PairWeight when there are not parentheses around pair terms - inline static istream &ReadNoParen( - istream &strm, PairWeight<W1, W2>& w, char separator) { - int c; - do { - c = strm.get(); - } while (isspace(c)); - - string s1; - while (c != separator) { - if (c == EOF) { - strm.clear(std::ios::badbit); - return strm; - } - s1 += c; - c = strm.get(); - } - istringstream strm1(s1); - W1 w1 = W1::Zero(); - strm1 >> w1; - - // read second element - W2 w2 = W2::Zero(); - strm >> w2; - - w = PairWeight<W1, W2>(w1, w2); - return strm; - } - - // Reads PairWeight when there are parentheses around pair terms - inline static istream &ReadWithParen( - istream &strm, PairWeight<W1, W2>& w, - char separator, char open_paren, char close_paren) { - int c; - do { - c = strm.get(); - } while (isspace(c)); - if (c != open_paren) { - FSTERROR() << " is fst_weight_parentheses flag set correcty? "; - strm.clear(std::ios::failbit); - return strm; - } - c = strm.get(); - - // read first element - stack<int> parens; - string s1; - while (c != separator || !parens.empty()) { - if (c == EOF) { - strm.clear(std::ios::badbit); - return strm; - } - s1 += c; - // if parens encountered before separator, they must be matched - if (c == open_paren) { - parens.push(1); - } else if (c == close_paren) { - // Fail for mismatched parens - if (parens.empty()) { - strm.clear(std::ios::failbit); - return strm; - } - parens.pop(); - } - c = strm.get(); - } - istringstream strm1(s1); - W1 w1 = W1::Zero(); - strm1 >> w1; - - // read second element - string s2; - c = strm.get(); - while (c != EOF) { - s2 += c; - c = strm.get(); - } - if (s2.empty() || (s2[s2.size() - 1] != close_paren)) { - FSTERROR() << " is fst_weight_parentheses flag set correcty? "; - strm.clear(std::ios::failbit); - return strm; - } - - s2.erase(s2.size() - 1, 1); - istringstream strm2(s2); - W2 w2 = W2::Zero(); - strm2 >> w2; - - w = PairWeight<W1, W2>(w1, w2); - return strm; - } - - private: - W1 value1_; - W2 value2_; - -}; - -template <class W1, class W2> -inline bool operator==(const PairWeight<W1, W2> &w, - const PairWeight<W1, W2> &v) { - return w.Value1() == v.Value1() && w.Value2() == v.Value2(); -} - -template <class W1, class W2> -inline bool operator!=(const PairWeight<W1, W2> &w1, - const PairWeight<W1, W2> &w2) { - return w1.Value1() != w2.Value1() || w1.Value2() != w2.Value2(); -} - - -template <class W1, class W2> -inline bool ApproxEqual(const PairWeight<W1, W2> &w1, - const PairWeight<W1, W2> &w2, - float delta = kDelta) { - return ApproxEqual(w1.Value1(), w2.Value1(), delta) && - ApproxEqual(w1.Value2(), w2.Value2(), delta); -} - -template <class W1, class W2> -inline ostream &operator<<(ostream &strm, const PairWeight<W1, W2> &w) { - if(FLAGS_fst_weight_separator.size() != 1) { - FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1"; - strm.clear(std::ios::badbit); - return strm; - } - char separator = FLAGS_fst_weight_separator[0]; - if (FLAGS_fst_weight_parentheses.empty()) - return strm << w.Value1() << separator << w.Value2(); - - if (FLAGS_fst_weight_parentheses.size() != 2) { - FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2"; - strm.clear(std::ios::badbit); - return strm; - } - char open_paren = FLAGS_fst_weight_parentheses[0]; - char close_paren = FLAGS_fst_weight_parentheses[1]; - return strm << open_paren << w.Value1() << separator - << w.Value2() << close_paren ; -} - -template <class W1, class W2> -inline istream &operator>>(istream &strm, PairWeight<W1, W2> &w) { - if(FLAGS_fst_weight_separator.size() != 1) { - FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1"; - strm.clear(std::ios::badbit); - return strm; - } - char separator = FLAGS_fst_weight_separator[0]; - bool read_parens = !FLAGS_fst_weight_parentheses.empty(); - if (read_parens) { - if (FLAGS_fst_weight_parentheses.size() != 2) { - FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2"; - strm.clear(std::ios::badbit); - return strm; - } - return PairWeight<W1, W2>::ReadWithParen( - strm, w, separator, FLAGS_fst_weight_parentheses[0], - FLAGS_fst_weight_parentheses[1]); - } else { - return PairWeight<W1, W2>::ReadNoParen(strm, w, separator); - } -} - -} // namespace fst - -#endif // FST_LIB_PAIR_WEIGHT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/partition.h b/kaldi_io/src/tools/openfst/include/fst/partition.h deleted file mode 100644 index 40b849a..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/partition.h +++ /dev/null @@ -1,305 +0,0 @@ -// partition.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Johan Schalkwyk) -// -// \file Functions and classes to create a partition of states -// - -#ifndef FST_LIB_PARTITION_H__ -#define FST_LIB_PARTITION_H__ - -#include <vector> -using std::vector; -#include <algorithm> - - -#include <fst/queue.h> - - - -namespace fst { - -template <typename T> class PartitionIterator; - -// \class Partition -// \brief Defines a partitioning of states. Typically used to represent -// equivalence classes for Fst operations like minimization. -// -template <typename T> -class Partition { - friend class PartitionIterator<T>; - - struct Element { - Element() : value(0), next(0), prev(0) {} - Element(T v) : value(v), next(0), prev(0) {} - - T value; - Element* next; - Element* prev; - }; - - public: - Partition(bool allow_repeated_split): - allow_repeated_split_(allow_repeated_split) {} - - Partition(bool allow_repeated_split, T num_states): - allow_repeated_split_(allow_repeated_split) { - Initialize(num_states); - } - - ~Partition() { - for (size_t i = 0; i < elements_.size(); ++i) - delete elements_[i]; - } - - // Create an empty partition for num_states. At initialization time - // all elements are not assigned to a class (i.e class_index = -1). - // Initialize just creates num_states of elements. All element - // operations are then done by simply disconnecting the element from - // it current class and placing it at the head of the next class. - void Initialize(size_t num_states) { - for (size_t i = 0; i < elements_.size(); ++i) - delete elements_[i]; - elements_.clear(); - classes_.clear(); - class_index_.clear(); - - elements_.resize(num_states); - class_index_.resize(num_states, -1); - class_size_.reserve(num_states); - for (size_t i = 0; i < num_states; ++i) - elements_[i] = new Element(i); - num_states_ = num_states; - } - - // Add a class, resize classes_ and class_size_ resource by 1. - size_t AddClass() { - size_t num_classes = classes_.size(); - classes_.resize(num_classes + 1, 0); - class_size_.resize(num_classes + 1, 0); - class_split_.resize(num_classes + 1, 0); - split_size_.resize(num_classes + 1, 0); - return num_classes; - } - - void AllocateClasses(T num_classes) { - size_t n = classes_.size() + num_classes; - classes_.resize(n, 0); - class_size_.resize(n, 0); - class_split_.resize(n, 0); - split_size_.resize(n, 0); - } - - // Add element_id to class_id. The Add method is used to initialize - // partition. Once elements have been added to a class, you need to - // use the Move() method move an element from once class to another. - void Add(T element_id, T class_id) { - Element* element = elements_[element_id]; - - if (classes_[class_id]) - classes_[class_id]->prev = element; - element->next = classes_[class_id]; - element->prev = 0; - classes_[class_id] = element; - - class_index_[element_id] = class_id; - class_size_[class_id]++; - } - - // Move and element_id to class_id. Disconnects (removes) element - // from it current class and - void Move(T element_id, T class_id) { - T old_class_id = class_index_[element_id]; - - Element* element = elements_[element_id]; - if (element->next) element->next->prev = element->prev; - if (element->prev) element->prev->next = element->next; - else classes_[old_class_id] = element->next; - - Add(element_id, class_id); - class_size_[old_class_id]--; - } - - // split class on the element_id - void SplitOn(T element_id) { - T class_id = class_index_[element_id]; - if (class_size_[class_id] == 1) return; - - // first time class is split - if (split_size_[class_id] == 0) { - visited_classes_.push_back(class_id); - class_split_[class_id] = classes_[class_id]; - } - // increment size of split (set of element at head of chain) - split_size_[class_id]++; - - // update split point - if (class_split_[class_id] != 0 - && class_split_[class_id] == elements_[element_id]) - class_split_[class_id] = elements_[element_id]->next; - - // move to head of chain in same class - Move(element_id, class_id); - } - - // Finalize class_id, split if required, and update class_splits, - // class indices of the newly created class. Returns the new_class id - // or -1 if no new class was created. - T SplitRefine(T class_id) { - - Element* split_el = class_split_[class_id]; - // only split if necessary - //if (class_size_[class_id] == split_size_[class_id]) { - if(split_el == NULL) { // we split on everything... - split_size_[class_id] = 0; - return -1; - } else { - T new_class = AddClass(); - - if(allow_repeated_split_) { // split_size_ is possibly - // inaccurate, so work it out exactly. - size_t split_count; Element *e; - for(split_count=0,e=classes_[class_id]; - e != split_el; split_count++, e=e->next); - split_size_[class_id] = split_count; - } - size_t remainder = class_size_[class_id] - split_size_[class_id]; - if (remainder < split_size_[class_id]) { // add smaller - classes_[new_class] = split_el; - split_el->prev->next = 0; - split_el->prev = 0; - class_size_[class_id] = split_size_[class_id]; - class_size_[new_class] = remainder; - } else { - classes_[new_class] = classes_[class_id]; - class_size_[class_id] = remainder; - class_size_[new_class] = split_size_[class_id]; - split_el->prev->next = 0; - split_el->prev = 0; - classes_[class_id] = split_el; - } - - // update class index for element in new class - for (Element* el = classes_[new_class]; el; el = el->next) - class_index_[el->value] = new_class; - - class_split_[class_id] = 0; - split_size_[class_id] = 0; - - return new_class; - } - } - - // Once all states have been processed for a particular class C, we - // can finalize the split. FinalizeSplit() will update each block in the - // partition, create new once and update the queue of active classes - // that require further refinement. - template <class Queue> - void FinalizeSplit(Queue* L) { - for (size_t i = 0; i < visited_classes_.size(); ++i) { - T new_class = SplitRefine(visited_classes_[i]); - if (new_class != -1 && L) - L->Enqueue(new_class); - } - visited_classes_.clear(); - } - - - const T class_id(T element_id) const { - return class_index_[element_id]; - } - - const vector<T>& class_sizes() const { - return class_size_; - } - - const size_t class_size(T class_id) const { - return class_size_[class_id]; - } - - const T num_classes() const { - return classes_.size(); - } - - - private: - int num_states_; - - // container of all elements (owner of ptrs) - vector<Element*> elements_; - - // linked list of elements belonging to class - vector<Element*> classes_; - - // pointer to split point for each class - vector<Element*> class_split_; - - // class index of element - vector<T> class_index_; - - // class sizes - vector<T> class_size_; - - // size of split for each class - // in the nondeterministic case, split_size_ is actually an upper - // bound on the size of split for each class. - vector<T> split_size_; - - // set of visited classes to be used in split refine - vector<T> visited_classes_; - - // true if input fst was deterministic: we can make - // certain assumptions in this case that speed up the algorithm. - bool allow_repeated_split_; -}; - - -// iterate over members of a class in a partition -template <typename T> -class PartitionIterator { - typedef typename Partition<T>::Element Element; - public: - PartitionIterator(const Partition<T>& partition, T class_id) - : p_(partition), - element_(p_.classes_[class_id]), - class_id_(class_id) {} - - bool Done() { - return (element_ == 0); - } - - const T Value() { - return (element_->value); - } - - void Next() { - element_ = element_->next; - } - - void Reset() { - element_ = p_.classes_[class_id_]; - } - - private: - const Partition<T>& p_; - - const Element* element_; - - T class_id_; -}; -} // namespace fst - -#endif // FST_LIB_PARTITION_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/power-weight.h b/kaldi_io/src/tools/openfst/include/fst/power-weight.h deleted file mode 100644 index 256928d..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/power-weight.h +++ /dev/null @@ -1,159 +0,0 @@ -// power-weight.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// Cartesian power weight semiring operation definitions. - -#ifndef FST_LIB_POWER_WEIGHT_H__ -#define FST_LIB_POWER_WEIGHT_H__ - -#include <fst/tuple-weight.h> -#include <fst/weight.h> - - -namespace fst { - -// Cartesian power semiring: W ^ n -// Forms: -// - a left semimodule when W is a left semiring, -// - a right semimodule when W is a right semiring, -// - a bisemimodule when W is a semiring, -// the free semimodule of rank n over W -// The Times operation is overloaded to provide the -// left and right scalar products. -template <class W, unsigned int n> -class PowerWeight : public TupleWeight<W, n> { - public: - using TupleWeight<W, n>::Zero; - using TupleWeight<W, n>::One; - using TupleWeight<W, n>::NoWeight; - using TupleWeight<W, n>::Quantize; - using TupleWeight<W, n>::Reverse; - - typedef PowerWeight<typename W::ReverseWeight, n> ReverseWeight; - - PowerWeight() {} - - PowerWeight(const TupleWeight<W, n> &w) : TupleWeight<W, n>(w) {} - - template <class Iterator> - PowerWeight(Iterator begin, Iterator end) : TupleWeight<W, n>(begin, end) {} - - static const PowerWeight<W, n> &Zero() { - static const PowerWeight<W, n> zero(TupleWeight<W, n>::Zero()); - return zero; - } - - static const PowerWeight<W, n> &One() { - static const PowerWeight<W, n> one(TupleWeight<W, n>::One()); - return one; - } - - static const PowerWeight<W, n> &NoWeight() { - static const PowerWeight<W, n> no_weight(TupleWeight<W, n>::NoWeight()); - return no_weight; - } - - static const string &Type() { - static string type; - if (type.empty()) { - string power; - Int64ToStr(n, &power); - type = W::Type() + "_^" + power; - } - return type; - } - - static uint64 Properties() { - uint64 props = W::Properties(); - return props & (kLeftSemiring | kRightSemiring | - kCommutative | kIdempotent); - } - - PowerWeight<W, n> Quantize(float delta = kDelta) const { - return TupleWeight<W, n>::Quantize(delta); - } - - ReverseWeight Reverse() const { - return TupleWeight<W, n>::Reverse(); - } -}; - - -// Semiring plus operation -template <class W, unsigned int n> -inline PowerWeight<W, n> Plus(const PowerWeight<W, n> &w1, - const PowerWeight<W, n> &w2) { - PowerWeight<W, n> w; - for (size_t i = 0; i < n; ++i) - w.SetValue(i, Plus(w1.Value(i), w2.Value(i))); - return w; -} - -// Semiring times operation -template <class W, unsigned int n> -inline PowerWeight<W, n> Times(const PowerWeight<W, n> &w1, - const PowerWeight<W, n> &w2) { - PowerWeight<W, n> w; - for (size_t i = 0; i < n; ++i) - w.SetValue(i, Times(w1.Value(i), w2.Value(i))); - return w; -} - -// Semiring divide operation -template <class W, unsigned int n> -inline PowerWeight<W, n> Divide(const PowerWeight<W, n> &w1, - const PowerWeight<W, n> &w2, - DivideType type = DIVIDE_ANY) { - PowerWeight<W, n> w; - for (size_t i = 0; i < n; ++i) - w.SetValue(i, Divide(w1.Value(i), w2.Value(i), type)); - return w; -} - -// Semimodule left scalar product -template <class W, unsigned int n> -inline PowerWeight<W, n> Times(const W &s, const PowerWeight<W, n> &w) { - PowerWeight<W, n> sw; - for (size_t i = 0; i < n; ++i) - sw.SetValue(i, Times(s, w.Value(i))); - return w; -} - -// Semimodule right scalar product -template <class W, unsigned int n> -inline PowerWeight<W, n> Times(const PowerWeight<W, n> &w, const W &s) { - PowerWeight<W, n> ws; - for (size_t i = 0; i < n; ++i) - ws.SetValue(i, Times(w.Value(i), s)); - return w; -} - -// Semimodule dot product -template <class W, unsigned int n> -inline W DotProduct(const PowerWeight<W, n> &w1, - const PowerWeight<W, n> &w2) { - W w = W::Zero(); - for (size_t i = 0; i < n; ++i) - w = Plus(w, Times(w1.Value(i), w2.Value(i))); - return w; -} - - -} // namespace fst - -#endif // FST_LIB_POWER_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/product-weight.h b/kaldi_io/src/tools/openfst/include/fst/product-weight.h deleted file mode 100644 index 16dede8..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/product-weight.h +++ /dev/null @@ -1,115 +0,0 @@ -// product-weight.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Product weight set and associated semiring operation definitions. - -#ifndef FST_LIB_PRODUCT_WEIGHT_H__ -#define FST_LIB_PRODUCT_WEIGHT_H__ - -#include <stack> -#include <string> - -#include <fst/pair-weight.h> -#include <fst/weight.h> - - -namespace fst { - -// Product semiring: W1 * W2 -template<class W1, class W2> -class ProductWeight : public PairWeight<W1, W2> { - public: - using PairWeight<W1, W2>::Zero; - using PairWeight<W1, W2>::One; - using PairWeight<W1, W2>::NoWeight; - using PairWeight<W1, W2>::Quantize; - using PairWeight<W1, W2>::Reverse; - - typedef ProductWeight<typename W1::ReverseWeight, typename W2::ReverseWeight> - ReverseWeight; - - ProductWeight() {} - - ProductWeight(const PairWeight<W1, W2>& w) : PairWeight<W1, W2>(w) {} - - ProductWeight(W1 w1, W2 w2) : PairWeight<W1, W2>(w1, w2) {} - - static const ProductWeight<W1, W2> &Zero() { - static const ProductWeight<W1, W2> zero(PairWeight<W1, W2>::Zero()); - return zero; - } - - static const ProductWeight<W1, W2> &One() { - static const ProductWeight<W1, W2> one(PairWeight<W1, W2>::One()); - return one; - } - - static const ProductWeight<W1, W2> &NoWeight() { - static const ProductWeight<W1, W2> no_weight( - PairWeight<W1, W2>::NoWeight()); - return no_weight; - } - - static const string &Type() { - static const string type = W1::Type() + "_X_" + W2::Type(); - return type; - } - - static uint64 Properties() { - uint64 props1 = W1::Properties(); - uint64 props2 = W2::Properties(); - return props1 & props2 & (kLeftSemiring | kRightSemiring | - kCommutative | kIdempotent); - } - - ProductWeight<W1, W2> Quantize(float delta = kDelta) const { - return PairWeight<W1, W2>::Quantize(delta); - } - - ReverseWeight Reverse() const { - return PairWeight<W1, W2>::Reverse(); - } - - -}; - -template <class W1, class W2> -inline ProductWeight<W1, W2> Plus(const ProductWeight<W1, W2> &w, - const ProductWeight<W1, W2> &v) { - return ProductWeight<W1, W2>(Plus(w.Value1(), v.Value1()), - Plus(w.Value2(), v.Value2())); -} - -template <class W1, class W2> -inline ProductWeight<W1, W2> Times(const ProductWeight<W1, W2> &w, - const ProductWeight<W1, W2> &v) { - return ProductWeight<W1, W2>(Times(w.Value1(), v.Value1()), - Times(w.Value2(), v.Value2())); -} - -template <class W1, class W2> -inline ProductWeight<W1, W2> Divide(const ProductWeight<W1, W2> &w, - const ProductWeight<W1, W2> &v, - DivideType typ = DIVIDE_ANY) { - return ProductWeight<W1, W2>(Divide(w.Value1(), v.Value1(), typ), - Divide(w.Value2(), v.Value2(), typ)); -} - -} // namespace fst - -#endif // FST_LIB_PRODUCT_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/project.h b/kaldi_io/src/tools/openfst/include/fst/project.h deleted file mode 100644 index 07946c3..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/project.h +++ /dev/null @@ -1,148 +0,0 @@ -// project.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Functions and classes to project an Fst on to its domain or range. - -#ifndef FST_LIB_PROJECT_H__ -#define FST_LIB_PROJECT_H__ - -#include <fst/arc-map.h> -#include <fst/mutable-fst.h> - - -namespace fst { - -// This specifies whether to project on input or output. -enum ProjectType { PROJECT_INPUT = 1, PROJECT_OUTPUT = 2 }; - - -// Mapper to implement projection per arc. -template <class A> class ProjectMapper { - public: - explicit ProjectMapper(ProjectType project_type) - : project_type_(project_type) {} - - A operator()(const A &arc) { - typename A::Label label = project_type_ == PROJECT_INPUT - ? arc.ilabel : arc.olabel; - return A(label, label, arc.weight, arc.nextstate); - } - - MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } - - MapSymbolsAction InputSymbolsAction() const { - return project_type_ == PROJECT_INPUT ? MAP_COPY_SYMBOLS : - MAP_CLEAR_SYMBOLS; - } - - MapSymbolsAction OutputSymbolsAction() const { - return project_type_ == PROJECT_OUTPUT ? MAP_COPY_SYMBOLS : - MAP_CLEAR_SYMBOLS; - } - - uint64 Properties(uint64 props) { - return ProjectProperties(props, project_type_ == PROJECT_INPUT); - } - - - private: - ProjectType project_type_; -}; - - -// Projects an FST onto its domain or range by either copying each arcs' -// input label to the output label or vice versa. This version modifies -// its input. -// -// Complexity: -// - Time: O(V + E) -// - Space: O(1) -// where V = # of states and E = # of arcs. -template<class Arc> inline -void Project(MutableFst<Arc> *fst, ProjectType project_type) { - ArcMap(fst, ProjectMapper<Arc>(project_type)); - if (project_type == PROJECT_INPUT) - fst->SetOutputSymbols(fst->InputSymbols()); - if (project_type == PROJECT_OUTPUT) - fst->SetInputSymbols(fst->OutputSymbols()); -} - - -// Projects an FST onto its domain or range by either copying each arc's -// input label to the output label or vice versa. This version is a delayed -// Fst. -// -// Complexity: -// - Time: O(v + e) -// - Space: O(1) -// where v = # of states visited, e = # of arcs visited. Constant -// time and to visit an input state or arc is assumed and exclusive -// of caching. -template <class A> -class ProjectFst : public ArcMapFst<A, A, ProjectMapper<A> > { - public: - typedef A Arc; - typedef ProjectMapper<A> C; - typedef ArcMapFstImpl< A, A, ProjectMapper<A> > Impl; - using ImplToFst<Impl>::GetImpl; - - ProjectFst(const Fst<A> &fst, ProjectType project_type) - : ArcMapFst<A, A, C>(fst, C(project_type)) { - if (project_type == PROJECT_INPUT) - GetImpl()->SetOutputSymbols(fst.InputSymbols()); - if (project_type == PROJECT_OUTPUT) - GetImpl()->SetInputSymbols(fst.OutputSymbols()); - } - - // See Fst<>::Copy() for doc. - ProjectFst(const ProjectFst<A> &fst, bool safe = false) - : ArcMapFst<A, A, C>(fst, safe) {} - - // Get a copy of this ProjectFst. See Fst<>::Copy() for further doc. - virtual ProjectFst<A> *Copy(bool safe = false) const { - return new ProjectFst(*this, safe); - } -}; - - -// Specialization for ProjectFst. -template <class A> -class StateIterator< ProjectFst<A> > - : public StateIterator< ArcMapFst<A, A, ProjectMapper<A> > > { - public: - explicit StateIterator(const ProjectFst<A> &fst) - : StateIterator< ArcMapFst<A, A, ProjectMapper<A> > >(fst) {} -}; - - -// Specialization for ProjectFst. -template <class A> -class ArcIterator< ProjectFst<A> > - : public ArcIterator< ArcMapFst<A, A, ProjectMapper<A> > > { - public: - ArcIterator(const ProjectFst<A> &fst, typename A::StateId s) - : ArcIterator< ArcMapFst<A, A, ProjectMapper<A> > >(fst, s) {} -}; - - -// Useful alias when using StdArc. -typedef ProjectFst<StdArc> StdProjectFst; - -} // namespace fst - -#endif // FST_LIB_PROJECT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/properties.h b/kaldi_io/src/tools/openfst/include/fst/properties.h deleted file mode 100644 index 8fab16f..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/properties.h +++ /dev/null @@ -1,460 +0,0 @@ -// properties.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: Michael Riley <[email protected]> -// \file -// FST property bits. - -#ifndef FST_LIB_PROPERTIES_H__ -#define FST_LIB_PROPERTIES_H__ - -#include <sys/types.h> -#include <vector> -using std::vector; - -#include <fst/compat.h> - -namespace fst { - -// The property bits here assert facts about an FST. If individual -// bits are added, then the composite properties below, the property -// functions and property names in properties.cc, and -// TestProperties() in test-properties.h should be updated. - -// -// BINARY PROPERTIES -// -// For each property below, there is a single bit. If it is set, -// the property is true. If it is not set, the property is false. -// - -// The Fst is an ExpandedFst -const uint64 kExpanded = 0x0000000000000001ULL; - -// The Fst is a MutableFst -const uint64 kMutable = 0x0000000000000002ULL; - -// An error was detected while constructing/using the FST -const uint64 kError = 0x0000000000000004ULL; - -// -// TRINARY PROPERTIES -// -// For each of these properties below there is a pair of property bits -// - one positive and one negative. If the positive bit is set, the -// property is true. If the negative bit is set, the property is -// false. If neither is set, the property has unknown value. Both -// should never be simultaneously set. The individual positive and -// negative bit pairs should be adjacent with the positive bit -// at an odd and lower position. - -// ilabel == olabel for each arc -const uint64 kAcceptor = 0x0000000000010000ULL; -// ilabel != olabel for some arc -const uint64 kNotAcceptor = 0x0000000000020000ULL; - -// ilabels unique leaving each state -const uint64 kIDeterministic = 0x0000000000040000ULL; -// ilabels not unique leaving some state -const uint64 kNonIDeterministic = 0x0000000000080000ULL; - -// olabels unique leaving each state -const uint64 kODeterministic = 0x0000000000100000ULL; -// olabels not unique leaving some state -const uint64 kNonODeterministic = 0x0000000000200000ULL; - -// FST has input/output epsilons -const uint64 kEpsilons = 0x0000000000400000ULL; -// FST has no input/output epsilons -const uint64 kNoEpsilons = 0x0000000000800000ULL; - -// FST has input epsilons -const uint64 kIEpsilons = 0x0000000001000000ULL; -// FST has no input epsilons -const uint64 kNoIEpsilons = 0x0000000002000000ULL; - -// FST has output epsilons -const uint64 kOEpsilons = 0x0000000004000000ULL; -// FST has no output epsilons -const uint64 kNoOEpsilons = 0x0000000008000000ULL; - -// ilabels sorted wrt < for each state -const uint64 kILabelSorted = 0x0000000010000000ULL; -// ilabels not sorted wrt < for some state -const uint64 kNotILabelSorted = 0x0000000020000000ULL; - -// olabels sorted wrt < for each state -const uint64 kOLabelSorted = 0x0000000040000000ULL; -// olabels not sorted wrt < for some state -const uint64 kNotOLabelSorted = 0x0000000080000000ULL; - -// Non-trivial arc or final weights -const uint64 kWeighted = 0x0000000100000000ULL; -// Only trivial arc and final weights -const uint64 kUnweighted = 0x0000000200000000ULL; - -// FST has cycles -const uint64 kCyclic = 0x0000000400000000ULL; -// FST has no cycles -const uint64 kAcyclic = 0x0000000800000000ULL; - -// FST has cycles containing the initial state -const uint64 kInitialCyclic = 0x0000001000000000ULL; -// FST has no cycles containing the initial state -const uint64 kInitialAcyclic = 0x0000002000000000ULL; - -// FST is topologically sorted -const uint64 kTopSorted = 0x0000004000000000ULL; -// FST is not topologically sorted -const uint64 kNotTopSorted = 0x0000008000000000ULL; - -// All states reachable from the initial state -const uint64 kAccessible = 0x0000010000000000ULL; -// Not all states reachable from the initial state -const uint64 kNotAccessible = 0x0000020000000000ULL; - -// All states can reach a final state -const uint64 kCoAccessible = 0x0000040000000000ULL; -// Not all states can reach a final state -const uint64 kNotCoAccessible = 0x0000080000000000ULL; - -// If NumStates() > 0, then state 0 is initial, state NumStates()-1 is -// final, there is a transition from each non-final state i to -// state i+1, and there are no other transitions. -const uint64 kString = 0x0000100000000000ULL; - -// Not a string FST -const uint64 kNotString = 0x0000200000000000ULL; - -// -// COMPOSITE PROPERTIES -// - -// Properties of an empty machine -const uint64 kNullProperties - = kAcceptor | kIDeterministic | kODeterministic | kNoEpsilons | - kNoIEpsilons | kNoOEpsilons | kILabelSorted | kOLabelSorted | - kUnweighted | kAcyclic | kInitialAcyclic | kTopSorted | - kAccessible | kCoAccessible | kString; - -// Properties that are preserved when an FST is copied -const uint64 kCopyProperties - = kError | kAcceptor | kNotAcceptor | kIDeterministic | kNonIDeterministic | - kODeterministic | kNonODeterministic | kEpsilons | kNoEpsilons | - kIEpsilons | kNoIEpsilons | kOEpsilons | kNoOEpsilons | - kILabelSorted | kNotILabelSorted | kOLabelSorted | - kNotOLabelSorted | kWeighted | kUnweighted | kCyclic | kAcyclic | - kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted | - kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible | - kString | kNotString; - -// Properites that are intrinsic to the FST -const uint64 kIntrinsicProperties - = kExpanded | kMutable | kAcceptor | kNotAcceptor | kIDeterministic | - kNonIDeterministic | kODeterministic | kNonODeterministic | - kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons | - kNoOEpsilons | kILabelSorted | kNotILabelSorted | kOLabelSorted | - kNotOLabelSorted | kWeighted | kUnweighted | kCyclic | kAcyclic | - kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted | - kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible | - kString | kNotString; - -// Properites that are (potentially) extrinsic to the FST -const uint64 kExtrinsicProperties = kError; - -// Properties that are preserved when an FST start state is set -const uint64 kSetStartProperties - = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | - kIDeterministic | kNonIDeterministic | kODeterministic | - kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons | - kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted | - kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kWeighted | - kUnweighted | kCyclic | kAcyclic | kTopSorted | kNotTopSorted | - kCoAccessible | kNotCoAccessible; - -// Properties that are preserved when an FST final weight is set -const uint64 kSetFinalProperties - = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | - kIDeterministic | kNonIDeterministic | kODeterministic | - kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons | - kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted | - kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kCyclic | - kAcyclic | kInitialCyclic | kInitialAcyclic | kTopSorted | - kNotTopSorted | kAccessible | kNotAccessible; - -// Properties that are preserved when an FST state is added -const uint64 kAddStateProperties - = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | - kIDeterministic | kNonIDeterministic | kODeterministic | - kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons | - kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted | - kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kWeighted | - kUnweighted | kCyclic | kAcyclic | kInitialCyclic | - kInitialAcyclic | kTopSorted | kNotTopSorted | kNotAccessible | - kNotCoAccessible | kNotString; - -// Properties that are preserved when an FST arc is added -const uint64 kAddArcProperties = kExpanded | kMutable | kError | kNotAcceptor | - kNonIDeterministic | kNonODeterministic | kEpsilons | kIEpsilons | - kOEpsilons | kNotILabelSorted | kNotOLabelSorted | kWeighted | - kCyclic | kInitialCyclic | kNotTopSorted | kAccessible | kCoAccessible; - -// Properties that are preserved when an FST arc is set -const uint64 kSetArcProperties = kExpanded | kMutable | kError; - -// Properties that are preserved when FST states are deleted -const uint64 kDeleteStatesProperties - = kExpanded | kMutable | kError | kAcceptor | kIDeterministic | - kODeterministic | kNoEpsilons | kNoIEpsilons | kNoOEpsilons | - kILabelSorted | kOLabelSorted | kUnweighted | kAcyclic | - kInitialAcyclic | kTopSorted; - -// Properties that are preserved when FST arcs are deleted -const uint64 kDeleteArcsProperties - = kExpanded | kMutable | kError | kAcceptor | kIDeterministic | - kODeterministic | kNoEpsilons | kNoIEpsilons | kNoOEpsilons | - kILabelSorted | kOLabelSorted | kUnweighted | kAcyclic | - kInitialAcyclic | kTopSorted | kNotAccessible | kNotCoAccessible; - -// Properties that are preserved when an FST's states are reordered -const uint64 kStateSortProperties = kExpanded | kMutable | kError | kAcceptor | - kNotAcceptor | kIDeterministic | kNonIDeterministic | - kODeterministic | kNonODeterministic | kEpsilons | kNoEpsilons | - kIEpsilons | kNoIEpsilons | kOEpsilons | kNoOEpsilons | - kILabelSorted | kNotILabelSorted | kOLabelSorted | kNotOLabelSorted - | kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic | - kInitialAcyclic | kAccessible | kNotAccessible | kCoAccessible | - kNotCoAccessible; - -// Properties that are preserved when an FST's arcs are reordered -const uint64 kArcSortProperties = - kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kIDeterministic | - kNonIDeterministic | kODeterministic | kNonODeterministic | - kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons | - kNoOEpsilons | kWeighted | kUnweighted | kCyclic | kAcyclic | - kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted | - kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible | - kString | kNotString; - -// Properties that are preserved when an FST's input labels are changed. -const uint64 kILabelInvariantProperties = - kExpanded | kMutable | kError | kODeterministic | kNonODeterministic | - kOEpsilons | kNoOEpsilons | kOLabelSorted | kNotOLabelSorted | - kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic | - kInitialAcyclic | kTopSorted | kNotTopSorted | kAccessible | - kNotAccessible | kCoAccessible | kNotCoAccessible | kString | kNotString; - -// Properties that are preserved when an FST's output labels are changed. -const uint64 kOLabelInvariantProperties = - kExpanded | kMutable | kError | kIDeterministic | kNonIDeterministic | - kIEpsilons | kNoIEpsilons | kILabelSorted | kNotILabelSorted | - kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic | - kInitialAcyclic | kTopSorted | kNotTopSorted | kAccessible | - kNotAccessible | kCoAccessible | kNotCoAccessible | kString | kNotString; - -// Properties that are preserved when an FST's weights are changed. -// This assumes that the set of states that are non-final is not changed. -const uint64 kWeightInvariantProperties = - kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kIDeterministic | - kNonIDeterministic | kODeterministic | kNonODeterministic | - kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons | - kNoOEpsilons | kILabelSorted | kNotILabelSorted | kOLabelSorted | - kNotOLabelSorted | kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic | - kTopSorted | kNotTopSorted | kAccessible | kNotAccessible | kCoAccessible | - kNotCoAccessible | kString | kNotString; - -// Properties that are preserved when a superfinal state is added -// and an FSTs final weights are directed to it via new transitions. -const uint64 kAddSuperFinalProperties = kExpanded | kMutable | kError | - kAcceptor | kNotAcceptor | kNonIDeterministic | kNonODeterministic | - kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted | kNotOLabelSorted | - kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic | - kInitialAcyclic | kNotTopSorted | kNotAccessible | kCoAccessible | - kNotCoAccessible | kNotString; - -// Properties that are preserved when a superfinal state is removed -// and the epsilon transitions directed to it are made final weights. -const uint64 kRmSuperFinalProperties = kExpanded | kMutable | kError | - kAcceptor | kNotAcceptor | kIDeterministic | kODeterministic | - kNoEpsilons | kNoIEpsilons | kNoOEpsilons | kILabelSorted | kOLabelSorted | - kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic | - kInitialAcyclic | kTopSorted | kAccessible | kCoAccessible | - kNotCoAccessible | kString; - -// All binary properties -const uint64 kBinaryProperties = 0x0000000000000007ULL; - -// All trinary properties -const uint64 kTrinaryProperties = 0x00003fffffff0000ULL; - -// -// COMPUTED PROPERTIES -// - -// 1st bit of trinary properties -const uint64 kPosTrinaryProperties = - kTrinaryProperties & 0x5555555555555555ULL; - -// 2nd bit of trinary properties -const uint64 kNegTrinaryProperties = - kTrinaryProperties & 0xaaaaaaaaaaaaaaaaULL; - -// All properties -const uint64 kFstProperties = kBinaryProperties | kTrinaryProperties; - -// -// PROPERTY FUNCTIONS and STRING NAMES (defined in properties.cc) -// - -// Below are functions for getting property bit vectors when executing -// mutating fst operations. -inline uint64 SetStartProperties(uint64 inprops); -template <typename Weight> -uint64 SetFinalProperties(uint64 inprops, Weight old_weight, - Weight new_weight); -inline uint64 AddStateProperties(uint64 inprops); -template <typename A> -uint64 AddArcProperties(uint64 inprops, typename A::StateId s, const A &arc, - const A *prev_arc); -inline uint64 DeleteStatesProperties(uint64 inprops); -inline uint64 DeleteAllStatesProperties(uint64 inprops, uint64 staticProps); -inline uint64 DeleteArcsProperties(uint64 inprops); - -uint64 ClosureProperties(uint64 inprops, bool star, bool delayed = false); -uint64 ComplementProperties(uint64 inprops); -uint64 ComposeProperties(uint64 inprops1, uint64 inprops2); -uint64 ConcatProperties(uint64 inprops1, uint64 inprops2, - bool delayed = false); -uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label); -uint64 FactorWeightProperties(uint64 inprops); -uint64 InvertProperties(uint64 inprops); -uint64 ProjectProperties(uint64 inprops, bool project_input); -uint64 RandGenProperties(uint64 inprops, bool weighted); -uint64 RelabelProperties(uint64 inprops); -uint64 ReplaceProperties(const vector<uint64>& inprops, - ssize_t root, - bool epsilon_on_replace, - bool no_empty_fst); -uint64 ReverseProperties(uint64 inprops); -uint64 ReweightProperties(uint64 inprops); -uint64 RmEpsilonProperties(uint64 inprops, bool delayed = false); -uint64 ShortestPathProperties(uint64 props); -uint64 SynchronizeProperties(uint64 inprops); -uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed = false); - -// Definitions of inlined functions. - -uint64 SetStartProperties(uint64 inprops) { - uint64 outprops = inprops & kSetStartProperties; - if (inprops & kAcyclic) { - outprops |= kInitialAcyclic; - } - return outprops; -} - -uint64 AddStateProperties(uint64 inprops) { - return inprops & kAddStateProperties; -} - -uint64 DeleteStatesProperties(uint64 inprops) { - return inprops & kDeleteStatesProperties; -} - -uint64 DeleteAllStatesProperties(uint64 inprops, uint64 staticprops) { - uint64 outprops = inprops & kError; - return outprops | kNullProperties | staticprops; -} - -uint64 DeleteArcsProperties(uint64 inprops) { - return inprops & kDeleteArcsProperties; -} - -// Definitions of template functions. - -// -template <typename Weight> -uint64 SetFinalProperties(uint64 inprops, Weight old_weight, - Weight new_weight) { - uint64 outprops = inprops; - if (old_weight != Weight::Zero() && old_weight != Weight::One()) { - outprops &= ~kWeighted; - } - if (new_weight != Weight::Zero() && new_weight != Weight::One()) { - outprops |= kWeighted; - outprops &= ~kUnweighted; - } - outprops &= kSetFinalProperties | kWeighted | kUnweighted; - return outprops; -} - -/// Gets the properties for the MutableFst::AddArc method. -/// -/// \param inprops the current properties of the fst -/// \param s the id of the state to which an arc is being added -/// \param arc the arc being added to the state with the specified id -/// \param prev_arc the previously-added (or "last") arc of state s, or NULL if -/// s currently has no arcs -template <typename A> -uint64 AddArcProperties(uint64 inprops, typename A::StateId s, - const A &arc, const A *prev_arc) { - uint64 outprops = inprops; - if (arc.ilabel != arc.olabel) { - outprops |= kNotAcceptor; - outprops &= ~kAcceptor; - } - if (arc.ilabel == 0) { - outprops |= kIEpsilons; - outprops &= ~kNoIEpsilons; - if (arc.olabel == 0) { - outprops |= kEpsilons; - outprops &= ~kNoEpsilons; - } - } - if (arc.olabel == 0) { - outprops |= kOEpsilons; - outprops &= ~kNoOEpsilons; - } - if (prev_arc != 0) { - if (prev_arc->ilabel > arc.ilabel) { - outprops |= kNotILabelSorted; - outprops &= ~kILabelSorted; - } - if (prev_arc->olabel > arc.olabel) { - outprops |= kNotOLabelSorted; - outprops &= ~kOLabelSorted; - } - } - if (arc.weight != A::Weight::Zero() && arc.weight != A::Weight::One()) { - outprops |= kWeighted; - outprops &= ~kUnweighted; - } - if (arc.nextstate <= s) { - outprops |= kNotTopSorted; - outprops &= ~kTopSorted; - } - outprops &= kAddArcProperties | kAcceptor | - kNoEpsilons | kNoIEpsilons | kNoOEpsilons | - kILabelSorted | kOLabelSorted | kUnweighted | kTopSorted; - if (outprops & kTopSorted) { - outprops |= kAcyclic | kInitialAcyclic; - } - return outprops; -} - -extern const char *PropertyNames[]; - -} // namespace fst - -#endif // FST_LIB_PROPERTIES_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/prune.h b/kaldi_io/src/tools/openfst/include/fst/prune.h deleted file mode 100644 index 5ea5b4d..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/prune.h +++ /dev/null @@ -1,339 +0,0 @@ -// prune.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// Functions implementing pruning. - -#ifndef FST_LIB_PRUNE_H__ -#define FST_LIB_PRUNE_H__ - -#include <vector> -using std::vector; - -#include <fst/arcfilter.h> -#include <fst/heap.h> -#include <fst/shortest-distance.h> - - -namespace fst { - -template <class A, class ArcFilter> -class PruneOptions { - public: - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - // Pruning weight threshold. - Weight weight_threshold; - // Pruning state threshold. - StateId state_threshold; - // Arc filter. - ArcFilter filter; - // If non-zero, passes in pre-computed shortest distance to final states. - const vector<Weight> *distance; - // Determines the degree of convergence required when computing shortest - // distances. - float delta; - - explicit PruneOptions(const Weight& w, StateId s, ArcFilter f, - vector<Weight> *d = 0, float e = kDelta) - : weight_threshold(w), - state_threshold(s), - filter(f), - distance(d), - delta(e) {} - private: - PruneOptions(); // disallow -}; - - -template <class S, class W> -class PruneCompare { - public: - typedef S StateId; - typedef W Weight; - - PruneCompare(const vector<Weight> &idistance, - const vector<Weight> &fdistance) - : idistance_(idistance), fdistance_(fdistance) {} - - bool operator()(const StateId x, const StateId y) const { - Weight wx = Times(x < idistance_.size() ? idistance_[x] : Weight::Zero(), - x < fdistance_.size() ? fdistance_[x] : Weight::Zero()); - Weight wy = Times(y < idistance_.size() ? idistance_[y] : Weight::Zero(), - y < fdistance_.size() ? fdistance_[y] : Weight::Zero()); - return less_(wx, wy); - } - - private: - const vector<Weight> &idistance_; - const vector<Weight> &fdistance_; - NaturalLess<Weight> less_; -}; - - - -// Pruning algorithm: this version modifies its input and it takes an -// options class as an argment. Delete states and arcs in 'fst' that -// do not belong to a successful path whose weight is no more than -// the weight of the shortest path Times() 'opts.weight_threshold'. -// When 'opts.state_threshold != kNoStateId', the resulting transducer -// will restricted further to have at most 'opts.state_threshold' -// states. Weights need to be commutative and have the path -// property. The weight 'w' of any cycle needs to be bounded, i.e., -// 'Plus(w, W::One()) = One()'. -template <class Arc, class ArcFilter> -void Prune(MutableFst<Arc> *fst, - const PruneOptions<Arc, ArcFilter> &opts) { - typedef typename Arc::Weight Weight; - typedef typename Arc::StateId StateId; - - if ((Weight::Properties() & (kPath | kCommutative)) - != (kPath | kCommutative)) { - FSTERROR() << "Prune: Weight needs to have the path property and" - << " be commutative: " - << Weight::Type(); - fst->SetProperties(kError, kError); - return; - } - StateId ns = fst->NumStates(); - if (ns == 0) return; - vector<Weight> idistance(ns, Weight::Zero()); - vector<Weight> tmp; - if (!opts.distance) { - tmp.reserve(ns); - ShortestDistance(*fst, &tmp, true, opts.delta); - } - const vector<Weight> *fdistance = opts.distance ? opts.distance : &tmp; - - if ((opts.state_threshold == 0) || - (fdistance->size() <= fst->Start()) || - ((*fdistance)[fst->Start()] == Weight::Zero())) { - fst->DeleteStates(); - return; - } - PruneCompare<StateId, Weight> compare(idistance, *fdistance); - Heap< StateId, PruneCompare<StateId, Weight>, false> heap(compare); - vector<bool> visited(ns, false); - vector<size_t> enqueued(ns, kNoKey); - vector<StateId> dead; - dead.push_back(fst->AddState()); - NaturalLess<Weight> less; - Weight limit = Times((*fdistance)[fst->Start()], opts.weight_threshold); - - StateId num_visited = 0; - StateId s = fst->Start(); - if (!less(limit, (*fdistance)[s])) { - idistance[s] = Weight::One(); - enqueued[s] = heap.Insert(s); - ++num_visited; - } - - while (!heap.Empty()) { - s = heap.Top(); - heap.Pop(); - enqueued[s] = kNoKey; - visited[s] = true; - if (less(limit, Times(idistance[s], fst->Final(s)))) - fst->SetFinal(s, Weight::Zero()); - for (MutableArcIterator< MutableFst<Arc> > ait(fst, s); - !ait.Done(); - ait.Next()) { - Arc arc = ait.Value(); - if (!opts.filter(arc)) continue; - Weight weight = Times(Times(idistance[s], arc.weight), - arc.nextstate < fdistance->size() - ? (*fdistance)[arc.nextstate] - : Weight::Zero()); - if (less(limit, weight)) { - arc.nextstate = dead[0]; - ait.SetValue(arc); - continue; - } - if (less(Times(idistance[s], arc.weight), idistance[arc.nextstate])) - idistance[arc.nextstate] = Times(idistance[s], arc.weight); - if (visited[arc.nextstate]) continue; - if ((opts.state_threshold != kNoStateId) && - (num_visited >= opts.state_threshold)) - continue; - if (enqueued[arc.nextstate] == kNoKey) { - enqueued[arc.nextstate] = heap.Insert(arc.nextstate); - ++num_visited; - } else { - heap.Update(enqueued[arc.nextstate], arc.nextstate); - } - } - } - for (size_t i = 0; i < visited.size(); ++i) - if (!visited[i]) dead.push_back(i); - fst->DeleteStates(dead); -} - - -// Pruning algorithm: this version modifies its input and simply takes -// the pruning threshold as an argument. Delete states and arcs in -// 'fst' that do not belong to a successful path whose weight is no -// more than the weight of the shortest path Times() -// 'weight_threshold'. When 'state_threshold != kNoStateId', the -// resulting transducer will be restricted further to have at most -// 'opts.state_threshold' states. Weights need to be commutative and -// have the path property. The weight 'w' of any cycle needs to be -// bounded, i.e., 'Plus(w, W::One()) = One()'. -template <class Arc> -void Prune(MutableFst<Arc> *fst, - typename Arc::Weight weight_threshold, - typename Arc::StateId state_threshold = kNoStateId, - double delta = kDelta) { - PruneOptions<Arc, AnyArcFilter<Arc> > opts(weight_threshold, state_threshold, - AnyArcFilter<Arc>(), 0, delta); - Prune(fst, opts); -} - - -// Pruning algorithm: this version writes the pruned input Fst to an -// output MutableFst and it takes an options class as an argument. -// 'ofst' contains states and arcs that belong to a successful path in -// 'ifst' whose weight is no more than the weight of the shortest path -// Times() 'opts.weight_threshold'. When 'opts.state_threshold != -// kNoStateId', 'ofst' will be restricted further to have at most -// 'opts.state_threshold' states. Weights need to be commutative and -// have the path property. The weight 'w' of any cycle needs to be -// bounded, i.e., 'Plus(w, W::One()) = One()'. -template <class Arc, class ArcFilter> -void Prune(const Fst<Arc> &ifst, - MutableFst<Arc> *ofst, - const PruneOptions<Arc, ArcFilter> &opts) { - typedef typename Arc::Weight Weight; - typedef typename Arc::StateId StateId; - - if ((Weight::Properties() & (kPath | kCommutative)) - != (kPath | kCommutative)) { - FSTERROR() << "Prune: Weight needs to have the path property and" - << " be commutative: " - << Weight::Type(); - ofst->SetProperties(kError, kError); - return; - } - ofst->DeleteStates(); - ofst->SetInputSymbols(ifst.InputSymbols()); - ofst->SetOutputSymbols(ifst.OutputSymbols()); - if (ifst.Start() == kNoStateId) - return; - NaturalLess<Weight> less; - if (less(opts.weight_threshold, Weight::One()) || - (opts.state_threshold == 0)) - return; - vector<Weight> idistance; - vector<Weight> tmp; - if (!opts.distance) - ShortestDistance(ifst, &tmp, true, opts.delta); - const vector<Weight> *fdistance = opts.distance ? opts.distance : &tmp; - - if ((fdistance->size() <= ifst.Start()) || - ((*fdistance)[ifst.Start()] == Weight::Zero())) { - return; - } - PruneCompare<StateId, Weight> compare(idistance, *fdistance); - Heap< StateId, PruneCompare<StateId, Weight>, false> heap(compare); - vector<StateId> copy; - vector<size_t> enqueued; - vector<bool> visited; - - StateId s = ifst.Start(); - Weight limit = Times(s < fdistance->size() ? (*fdistance)[s] : Weight::Zero(), - opts.weight_threshold); - while (copy.size() <= s) - copy.push_back(kNoStateId); - copy[s] = ofst->AddState(); - ofst->SetStart(copy[s]); - while (idistance.size() <= s) - idistance.push_back(Weight::Zero()); - idistance[s] = Weight::One(); - while (enqueued.size() <= s) { - enqueued.push_back(kNoKey); - visited.push_back(false); - } - enqueued[s] = heap.Insert(s); - - while (!heap.Empty()) { - s = heap.Top(); - heap.Pop(); - enqueued[s] = kNoKey; - visited[s] = true; - if (!less(limit, Times(idistance[s], ifst.Final(s)))) - ofst->SetFinal(copy[s], ifst.Final(s)); - for (ArcIterator< Fst<Arc> > ait(ifst, s); - !ait.Done(); - ait.Next()) { - const Arc &arc = ait.Value(); - if (!opts.filter(arc)) continue; - Weight weight = Times(Times(idistance[s], arc.weight), - arc.nextstate < fdistance->size() - ? (*fdistance)[arc.nextstate] - : Weight::Zero()); - if (less(limit, weight)) continue; - if ((opts.state_threshold != kNoStateId) && - (ofst->NumStates() >= opts.state_threshold)) - continue; - while (idistance.size() <= arc.nextstate) - idistance.push_back(Weight::Zero()); - if (less(Times(idistance[s], arc.weight), - idistance[arc.nextstate])) - idistance[arc.nextstate] = Times(idistance[s], arc.weight); - while (copy.size() <= arc.nextstate) - copy.push_back(kNoStateId); - if (copy[arc.nextstate] == kNoStateId) - copy[arc.nextstate] = ofst->AddState(); - ofst->AddArc(copy[s], Arc(arc.ilabel, arc.olabel, arc.weight, - copy[arc.nextstate])); - while (enqueued.size() <= arc.nextstate) { - enqueued.push_back(kNoKey); - visited.push_back(false); - } - if (visited[arc.nextstate]) continue; - if (enqueued[arc.nextstate] == kNoKey) - enqueued[arc.nextstate] = heap.Insert(arc.nextstate); - else - heap.Update(enqueued[arc.nextstate], arc.nextstate); - } - } -} - - -// Pruning algorithm: this version writes the pruned input Fst to an -// output MutableFst and simply takes the pruning threshold as an -// argument. 'ofst' contains states and arcs that belong to a -// successful path in 'ifst' whose weight is no more than -// the weight of the shortest path Times() 'weight_threshold'. When -// 'state_threshold != kNoStateId', 'ofst' will be restricted further -// to have at most 'opts.state_threshold' states. Weights need to be -// commutative and have the path property. The weight 'w' of any cycle -// needs to be bounded, i.e., 'Plus(w, W::One()) = W::One()'. -template <class Arc> -void Prune(const Fst<Arc> &ifst, - MutableFst<Arc> *ofst, - typename Arc::Weight weight_threshold, - typename Arc::StateId state_threshold = kNoStateId, - float delta = kDelta) { - PruneOptions<Arc, AnyArcFilter<Arc> > opts(weight_threshold, state_threshold, - AnyArcFilter<Arc>(), 0, delta); - Prune(ifst, ofst, opts); -} - -} // namespace fst - -#endif // FST_LIB_PRUNE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/push.h b/kaldi_io/src/tools/openfst/include/fst/push.h deleted file mode 100644 index 1f7a8fa..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/push.h +++ /dev/null @@ -1,175 +0,0 @@ -// push.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// Class to reweight/push an FST. - -#ifndef FST_LIB_PUSH_H__ -#define FST_LIB_PUSH_H__ - -#include <vector> -using std::vector; - -#include <fst/factor-weight.h> -#include <fst/fst.h> -#include <fst/arc-map.h> -#include <fst/reweight.h> -#include <fst/shortest-distance.h> - - -namespace fst { - -// Private helper functions for Push -namespace internal { - -// Compute the total weight (sum of the weights of all accepting paths) from -// the output of ShortestDistance. 'distance' is the shortest distance from the -// initial state when 'reverse == false' and to the final states when -// 'reverse == true'. -template <class Arc> -typename Arc::Weight ComputeTotalWeight( - const Fst<Arc> &fst, - const vector<typename Arc::Weight> &distance, - bool reverse) { - if (reverse) - return fst.Start() < distance.size() ? - distance[fst.Start()] : Arc::Weight::Zero(); - - typename Arc::Weight sum = Arc::Weight::Zero(); - for (typename Arc::StateId s = 0; s < distance.size(); ++s) - sum = Plus(sum, Times(distance[s], fst.Final(s))); - return sum; -} - -// Divide the weight of every accepting path by 'w'. The weight 'w' is -// divided at the final states if 'at_final == true' and at the -// initial state otherwise. -template <class Arc> -void RemoveWeight(MutableFst<Arc> *fst, typename Arc::Weight w, bool at_final) { - if ((w == Arc::Weight::One()) || (w == Arc::Weight::Zero())) - return; - - if (at_final) { - // Remove 'w' from the final states - for (StateIterator< MutableFst<Arc> > sit(*fst); - !sit.Done(); - sit.Next()) - fst->SetFinal(sit.Value(), - Divide(fst->Final(sit.Value()), w, DIVIDE_RIGHT)); - } else { // at_final == false - // Remove 'w' from the initial state - typename Arc::StateId start = fst->Start(); - for (MutableArcIterator<MutableFst<Arc> > ait(fst, start); - !ait.Done(); - ait.Next()) { - Arc arc = ait.Value(); - arc.weight = Divide(arc.weight, w, DIVIDE_LEFT); - ait.SetValue(arc); - } - fst->SetFinal(start, Divide(fst->Final(start), w, DIVIDE_LEFT)); - } -} -} // namespace internal - -// Pushes the weights in FST in the direction defined by TYPE. If -// pushing towards the initial state, the sum of the weight of the -// outgoing transitions and final weight at a non-initial state is -// equal to One() in the resulting machine. If pushing towards the -// final state, the same property holds on the reverse machine. -// -// Weight needs to be left distributive when pushing towards the -// initial state and right distributive when pushing towards the final -// states. -template <class Arc> -void Push(MutableFst<Arc> *fst, - ReweightType type, - float delta = kDelta, - bool remove_total_weight = false) { - vector<typename Arc::Weight> distance; - ShortestDistance(*fst, &distance, type == REWEIGHT_TO_INITIAL, delta); - typename Arc::Weight total_weight = Arc::Weight::One(); - if (remove_total_weight) - total_weight = internal::ComputeTotalWeight(*fst, distance, - type == REWEIGHT_TO_INITIAL); - Reweight(fst, distance, type); - if (remove_total_weight) - internal::RemoveWeight(fst, total_weight, type == REWEIGHT_TO_FINAL); -} - -const uint32 kPushWeights = 0x0001; -const uint32 kPushLabels = 0x0002; -const uint32 kPushRemoveTotalWeight = 0x0004; -const uint32 kPushRemoveCommonAffix = 0x0008; - -// OFST obtained from IFST by pushing weights and/or labels according -// to PTYPE in the direction defined by RTYPE. Weight needs to be -// left distributive when pushing weights towards the initial state -// and right distributive when pushing weights towards the final -// states. -template <class Arc, ReweightType rtype> -void Push(const Fst<Arc> &ifst, - MutableFst<Arc> *ofst, - uint32 ptype, - float delta = kDelta) { - - if ((ptype & (kPushWeights | kPushLabels)) == kPushWeights) { - *ofst = ifst; - Push(ofst, rtype, delta, ptype & kPushRemoveTotalWeight); - } else if (ptype & kPushLabels) { - const StringType stype = rtype == REWEIGHT_TO_INITIAL - ? STRING_LEFT - : STRING_RIGHT; - vector<typename GallicArc<Arc, stype>::Weight> gdistance; - VectorFst<GallicArc<Arc, stype> > gfst; - ArcMap(ifst, &gfst, ToGallicMapper<Arc, stype>()); - if (ptype & kPushWeights ) { - ShortestDistance(gfst, &gdistance, rtype == REWEIGHT_TO_INITIAL, delta); - } else { - ArcMapFst<Arc, Arc, RmWeightMapper<Arc> > - uwfst(ifst, RmWeightMapper<Arc>()); - ArcMapFst<Arc, GallicArc<Arc, stype>, ToGallicMapper<Arc, stype> > - guwfst(uwfst, ToGallicMapper<Arc, stype>()); - ShortestDistance(guwfst, &gdistance, rtype == REWEIGHT_TO_INITIAL, delta); - } - typename GallicArc<Arc, stype>::Weight total_weight = - GallicArc<Arc, stype>::Weight::One(); - if (ptype & (kPushRemoveTotalWeight | kPushRemoveCommonAffix)) { - total_weight = internal::ComputeTotalWeight( - gfst, gdistance, rtype == REWEIGHT_TO_INITIAL); - total_weight = typename GallicArc<Arc, stype>::Weight( - ptype & kPushRemoveCommonAffix ? total_weight.Value1() - : StringWeight<typename Arc::Label, stype>::One(), - ptype & kPushRemoveTotalWeight ? total_weight.Value2() - : Arc::Weight::One()); - } - Reweight(&gfst, gdistance, rtype); - if (ptype & (kPushRemoveTotalWeight | kPushRemoveCommonAffix)) - internal::RemoveWeight(&gfst, total_weight, rtype == REWEIGHT_TO_FINAL); - FactorWeightFst< GallicArc<Arc, stype>, GallicFactor<typename Arc::Label, - typename Arc::Weight, stype> > fwfst(gfst); - ArcMap(fwfst, ofst, FromGallicMapper<Arc, stype>()); - ofst->SetOutputSymbols(ifst.OutputSymbols()); - } else { - LOG(WARNING) << "Push: pushing type is set to 0: " - << "pushing neither labels nor weights."; - *ofst = ifst; - } -} - -} // namespace fst - -#endif /* FST_LIB_PUSH_H_ */ diff --git a/kaldi_io/src/tools/openfst/include/fst/queue.h b/kaldi_io/src/tools/openfst/include/fst/queue.h deleted file mode 100644 index 95a082d..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/queue.h +++ /dev/null @@ -1,938 +0,0 @@ -// queue.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// Functions and classes for various Fst state queues with -// a unified interface. - -#ifndef FST_LIB_QUEUE_H__ -#define FST_LIB_QUEUE_H__ - -#include <deque> -using std::deque; -#include <vector> -using std::vector; - -#include <fst/arcfilter.h> -#include <fst/connect.h> -#include <fst/heap.h> -#include <fst/topsort.h> - - -namespace fst { - -// template <class S> -// class Queue { -// public: -// typedef typename S StateId; -// -// // Ctr: may need args (e.g., Fst, comparator) for some queues -// Queue(...); -// // Returns the head of the queue -// StateId Head() const; -// // Inserts a state -// void Enqueue(StateId s); -// // Removes the head of the queue -// void Dequeue(); -// // Updates ordering of state s when weight changes, if necessary -// void Update(StateId s); -// // Does the queue contain no elements? -// bool Empty() const; -// // Remove all states from queue -// void Clear(); -// }; - -// State queue types. -enum QueueType { - TRIVIAL_QUEUE = 0, // Single state queue - FIFO_QUEUE = 1, // First-in, first-out queue - LIFO_QUEUE = 2, // Last-in, first-out queue - SHORTEST_FIRST_QUEUE = 3, // Shortest-first queue - TOP_ORDER_QUEUE = 4, // Topologically-ordered queue - STATE_ORDER_QUEUE = 5, // State-ID ordered queue - SCC_QUEUE = 6, // Component graph top-ordered meta-queue - AUTO_QUEUE = 7, // Auto-selected queue - OTHER_QUEUE = 8 - }; - - -// QueueBase, templated on the StateId, is the base class shared by the -// queues considered by AutoQueue. -template <class S> -class QueueBase { - public: - typedef S StateId; - - QueueBase(QueueType type) : queue_type_(type), error_(false) {} - virtual ~QueueBase() {} - StateId Head() const { return Head_(); } - void Enqueue(StateId s) { Enqueue_(s); } - void Dequeue() { Dequeue_(); } - void Update(StateId s) { Update_(s); } - bool Empty() const { return Empty_(); } - void Clear() { Clear_(); } - QueueType Type() { return queue_type_; } - bool Error() const { return error_; } - void SetError(bool error) { error_ = error; } - - private: - // This allows base-class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual StateId Head_() const = 0; - virtual void Enqueue_(StateId s) = 0; - virtual void Dequeue_() = 0; - virtual void Update_(StateId s) = 0; - virtual bool Empty_() const = 0; - virtual void Clear_() = 0; - - QueueType queue_type_; - bool error_; -}; - - -// Trivial queue discipline, templated on the StateId. You may enqueue -// at most one state at a time. It is used for strongly connected components -// with only one state and no self loops. -template <class S> -class TrivialQueue : public QueueBase<S> { -public: - typedef S StateId; - - TrivialQueue() : QueueBase<S>(TRIVIAL_QUEUE), front_(kNoStateId) {} - StateId Head() const { return front_; } - void Enqueue(StateId s) { front_ = s; } - void Dequeue() { front_ = kNoStateId; } - void Update(StateId s) {} - bool Empty() const { return front_ == kNoStateId; } - void Clear() { front_ = kNoStateId; } - - -private: - // This allows base-class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual StateId Head_() const { return Head(); } - virtual void Enqueue_(StateId s) { Enqueue(s); } - virtual void Dequeue_() { Dequeue(); } - virtual void Update_(StateId s) { Update(s); } - virtual bool Empty_() const { return Empty(); } - virtual void Clear_() { return Clear(); } - - StateId front_; -}; - - -// First-in, first-out queue discipline, templated on the StateId. -template <class S> -class FifoQueue : public QueueBase<S>, public deque<S> { - public: - using deque<S>::back; - using deque<S>::push_front; - using deque<S>::pop_back; - using deque<S>::empty; - using deque<S>::clear; - - typedef S StateId; - - FifoQueue() : QueueBase<S>(FIFO_QUEUE) {} - StateId Head() const { return back(); } - void Enqueue(StateId s) { push_front(s); } - void Dequeue() { pop_back(); } - void Update(StateId s) {} - bool Empty() const { return empty(); } - void Clear() { clear(); } - - private: - // This allows base-class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual StateId Head_() const { return Head(); } - virtual void Enqueue_(StateId s) { Enqueue(s); } - virtual void Dequeue_() { Dequeue(); } - virtual void Update_(StateId s) { Update(s); } - virtual bool Empty_() const { return Empty(); } - virtual void Clear_() { return Clear(); } -}; - - -// Last-in, first-out queue discipline, templated on the StateId. -template <class S> -class LifoQueue : public QueueBase<S>, public deque<S> { - public: - using deque<S>::front; - using deque<S>::push_front; - using deque<S>::pop_front; - using deque<S>::empty; - using deque<S>::clear; - - typedef S StateId; - - LifoQueue() : QueueBase<S>(LIFO_QUEUE) {} - StateId Head() const { return front(); } - void Enqueue(StateId s) { push_front(s); } - void Dequeue() { pop_front(); } - void Update(StateId s) {} - bool Empty() const { return empty(); } - void Clear() { clear(); } - - private: - // This allows base-class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual StateId Head_() const { return Head(); } - virtual void Enqueue_(StateId s) { Enqueue(s); } - virtual void Dequeue_() { Dequeue(); } - virtual void Update_(StateId s) { Update(s); } - virtual bool Empty_() const { return Empty(); } - virtual void Clear_() { return Clear(); } -}; - - -// Shortest-first queue discipline, templated on the StateId and -// comparison function object. Comparison function object COMP is -// used to compare two StateIds. If a (single) state's order changes, -// it can be reordered in the queue with a call to Update(). -// If 'update == false', call to Update() does not reorder the queue. -template <typename S, typename C, bool update = true> -class ShortestFirstQueue : public QueueBase<S> { - public: - typedef S StateId; - typedef C Compare; - - ShortestFirstQueue(C comp) - : QueueBase<S>(SHORTEST_FIRST_QUEUE), heap_(comp) {} - - StateId Head() const { return heap_.Top(); } - - void Enqueue(StateId s) { - if (update) { - for (StateId i = key_.size(); i <= s; ++i) - key_.push_back(kNoKey); - key_[s] = heap_.Insert(s); - } else { - heap_.Insert(s); - } - } - - void Dequeue() { - if (update) - key_[heap_.Pop()] = kNoKey; - else - heap_.Pop(); - } - - void Update(StateId s) { - if (!update) - return; - if (s >= key_.size() || key_[s] == kNoKey) { - Enqueue(s); - } else { - heap_.Update(key_[s], s); - } - } - - bool Empty() const { return heap_.Empty(); } - - void Clear() { - heap_.Clear(); - if (update) key_.clear(); - } - - private: - Heap<S, C, false> heap_; - vector<ssize_t> key_; - - // This allows base-class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual StateId Head_() const { return Head(); } - virtual void Enqueue_(StateId s) { Enqueue(s); } - virtual void Dequeue_() { Dequeue(); } - virtual void Update_(StateId s) { Update(s); } - virtual bool Empty_() const { return Empty(); } - virtual void Clear_() { return Clear(); } -}; - - -// Given a vector that maps from states to weights and a Less -// comparison function object between weights, this class defines a -// comparison function object between states. -template <typename S, typename L> -class StateWeightCompare { - public: - typedef L Less; - typedef typename L::Weight Weight; - typedef S StateId; - - StateWeightCompare(const vector<Weight>& weights, const L &less) - : weights_(weights), less_(less) {} - - bool operator()(const S x, const S y) const { - return less_(weights_[x], weights_[y]); - } - - private: - const vector<Weight>& weights_; - L less_; -}; - - -// Shortest-first queue discipline, templated on the StateId and Weight, is -// specialized to use the weight's natural order for the comparison function. -template <typename S, typename W> -class NaturalShortestFirstQueue : - public ShortestFirstQueue<S, StateWeightCompare<S, NaturalLess<W> > > { - public: - typedef StateWeightCompare<S, NaturalLess<W> > C; - - NaturalShortestFirstQueue(const vector<W> &distance) : - ShortestFirstQueue<S, C>(C(distance, less_)) {} - - private: - NaturalLess<W> less_; -}; - -// Topological-order queue discipline, templated on the StateId. -// States are ordered in the queue topologically. The FST must be acyclic. -template <class S> -class TopOrderQueue : public QueueBase<S> { - public: - typedef S StateId; - - // This constructor computes the top. order. It accepts an arc filter - // to limit the transitions considered in that computation (e.g., only - // the epsilon graph). - template <class Arc, class ArcFilter> - TopOrderQueue(const Fst<Arc> &fst, ArcFilter filter) - : QueueBase<S>(TOP_ORDER_QUEUE), front_(0), back_(kNoStateId), - order_(0), state_(0) { - bool acyclic; - TopOrderVisitor<Arc> top_order_visitor(&order_, &acyclic); - DfsVisit(fst, &top_order_visitor, filter); - if (!acyclic) { - FSTERROR() << "TopOrderQueue: fst is not acyclic."; - QueueBase<S>::SetError(true); - } - state_.resize(order_.size(), kNoStateId); - } - - // This constructor is passed the top. order, useful when we know it - // beforehand. - TopOrderQueue(const vector<StateId> &order) - : QueueBase<S>(TOP_ORDER_QUEUE), front_(0), back_(kNoStateId), - order_(order), state_(order.size(), kNoStateId) {} - - StateId Head() const { return state_[front_]; } - - void Enqueue(StateId s) { - if (front_ > back_) front_ = back_ = order_[s]; - else if (order_[s] > back_) back_ = order_[s]; - else if (order_[s] < front_) front_ = order_[s]; - state_[order_[s]] = s; - } - - void Dequeue() { - state_[front_] = kNoStateId; - while ((front_ <= back_) && (state_[front_] == kNoStateId)) ++front_; - } - - void Update(StateId s) {} - - bool Empty() const { return front_ > back_; } - - void Clear() { - for (StateId i = front_; i <= back_; ++i) state_[i] = kNoStateId; - back_ = kNoStateId; - front_ = 0; - } - - private: - StateId front_; - StateId back_; - vector<StateId> order_; - vector<StateId> state_; - - // This allows base-class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual StateId Head_() const { return Head(); } - virtual void Enqueue_(StateId s) { Enqueue(s); } - virtual void Dequeue_() { Dequeue(); } - virtual void Update_(StateId s) { Update(s); } - virtual bool Empty_() const { return Empty(); } - virtual void Clear_() { return Clear(); } -}; - - -// State order queue discipline, templated on the StateId. -// States are ordered in the queue by state Id. -template <class S> -class StateOrderQueue : public QueueBase<S> { -public: - typedef S StateId; - - StateOrderQueue() - : QueueBase<S>(STATE_ORDER_QUEUE), front_(0), back_(kNoStateId) {} - - StateId Head() const { return front_; } - - void Enqueue(StateId s) { - if (front_ > back_) front_ = back_ = s; - else if (s > back_) back_ = s; - else if (s < front_) front_ = s; - while (enqueued_.size() <= s) enqueued_.push_back(false); - enqueued_[s] = true; - } - - void Dequeue() { - enqueued_[front_] = false; - while ((front_ <= back_) && (enqueued_[front_] == false)) ++front_; - } - - void Update(StateId s) {} - - bool Empty() const { return front_ > back_; } - - void Clear() { - for (StateId i = front_; i <= back_; ++i) enqueued_[i] = false; - front_ = 0; - back_ = kNoStateId; - } - -private: - StateId front_; - StateId back_; - vector<bool> enqueued_; - - // This allows base-class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual StateId Head_() const { return Head(); } - virtual void Enqueue_(StateId s) { Enqueue(s); } - virtual void Dequeue_() { Dequeue(); } - virtual void Update_(StateId s) { Update(s); } - virtual bool Empty_() const { return Empty(); } - virtual void Clear_() { return Clear(); } - -}; - - -// SCC topological-order meta-queue discipline, templated on the StateId S -// and a queue Q, which is used inside each SCC. It visits the SCC's -// of an FST in topological order. Its constructor is passed the queues to -// to use within an SCC. -template <class S, class Q> -class SccQueue : public QueueBase<S> { - public: - typedef S StateId; - typedef Q Queue; - - // Constructor takes a vector specifying the SCC number per state - // and a vector giving the queue to use per SCC number. - SccQueue(const vector<StateId> &scc, vector<Queue*> *queue) - : QueueBase<S>(SCC_QUEUE), queue_(queue), scc_(scc), front_(0), - back_(kNoStateId) {} - - StateId Head() const { - while ((front_ <= back_) && - (((*queue_)[front_] && (*queue_)[front_]->Empty()) - || (((*queue_)[front_] == 0) && - ((front_ >= trivial_queue_.size()) - || (trivial_queue_[front_] == kNoStateId))))) - ++front_; - if ((*queue_)[front_]) - return (*queue_)[front_]->Head(); - else - return trivial_queue_[front_]; - } - - void Enqueue(StateId s) { - if (front_ > back_) front_ = back_ = scc_[s]; - else if (scc_[s] > back_) back_ = scc_[s]; - else if (scc_[s] < front_) front_ = scc_[s]; - if ((*queue_)[scc_[s]]) { - (*queue_)[scc_[s]]->Enqueue(s); - } else { - while (trivial_queue_.size() <= scc_[s]) - trivial_queue_.push_back(kNoStateId); - trivial_queue_[scc_[s]] = s; - } - } - - void Dequeue() { - if ((*queue_)[front_]) - (*queue_)[front_]->Dequeue(); - else if (front_ < trivial_queue_.size()) - trivial_queue_[front_] = kNoStateId; - } - - void Update(StateId s) { - if ((*queue_)[scc_[s]]) - (*queue_)[scc_[s]]->Update(s); - } - - bool Empty() const { - if (front_ < back_) // Queue scc # back_ not empty unless back_==front_ - return false; - else if (front_ > back_) - return true; - else if ((*queue_)[front_]) - return (*queue_)[front_]->Empty(); - else - return (front_ >= trivial_queue_.size()) - || (trivial_queue_[front_] == kNoStateId); - } - - void Clear() { - for (StateId i = front_; i <= back_; ++i) - if ((*queue_)[i]) - (*queue_)[i]->Clear(); - else if (i < trivial_queue_.size()) - trivial_queue_[i] = kNoStateId; - front_ = 0; - back_ = kNoStateId; - } - -private: - vector<Queue*> *queue_; - const vector<StateId> &scc_; - mutable StateId front_; - StateId back_; - vector<StateId> trivial_queue_; - - // This allows base-class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual StateId Head_() const { return Head(); } - virtual void Enqueue_(StateId s) { Enqueue(s); } - virtual void Dequeue_() { Dequeue(); } - virtual void Update_(StateId s) { Update(s); } - virtual bool Empty_() const { return Empty(); } - virtual void Clear_() { return Clear(); } - - DISALLOW_COPY_AND_ASSIGN(SccQueue); -}; - - -// Automatic queue discipline, templated on the StateId. It selects a -// queue discipline for a given FST based on its properties. -template <class S> -class AutoQueue : public QueueBase<S> { -public: - typedef S StateId; - - // This constructor takes a state distance vector that, if non-null and if - // the Weight type has the path property, will entertain the - // shortest-first queue using the natural order w.r.t to the distance. - template <class Arc, class ArcFilter> - AutoQueue(const Fst<Arc> &fst, const vector<typename Arc::Weight> *distance, - ArcFilter filter) : QueueBase<S>(AUTO_QUEUE) { - typedef typename Arc::Weight Weight; - typedef StateWeightCompare< StateId, NaturalLess<Weight> > Compare; - - // First check if the FST is known to have these properties. - uint64 props = fst.Properties(kAcyclic | kCyclic | - kTopSorted | kUnweighted, false); - if ((props & kTopSorted) || fst.Start() == kNoStateId) { - queue_ = new StateOrderQueue<StateId>(); - VLOG(2) << "AutoQueue: using state-order discipline"; - } else if (props & kAcyclic) { - queue_ = new TopOrderQueue<StateId>(fst, filter); - VLOG(2) << "AutoQueue: using top-order discipline"; - } else if ((props & kUnweighted) && (Weight::Properties() & kIdempotent)) { - queue_ = new LifoQueue<StateId>(); - VLOG(2) << "AutoQueue: using LIFO discipline"; - } else { - uint64 properties; - // Decompose into strongly-connected components. - SccVisitor<Arc> scc_visitor(&scc_, 0, 0, &properties); - DfsVisit(fst, &scc_visitor, filter); - StateId nscc = *max_element(scc_.begin(), scc_.end()) + 1; - vector<QueueType> queue_types(nscc); - NaturalLess<Weight> *less = 0; - Compare *comp = 0; - if (distance && (Weight::Properties() & kPath)) { - less = new NaturalLess<Weight>; - comp = new Compare(*distance, *less); - } - // Find the queue type to use per SCC. - bool unweighted; - bool all_trivial; - SccQueueType(fst, scc_, &queue_types, filter, less, &all_trivial, - &unweighted); - // If unweighted and semiring is idempotent, use lifo queue. - if (unweighted) { - queue_ = new LifoQueue<StateId>(); - VLOG(2) << "AutoQueue: using LIFO discipline"; - delete comp; - delete less; - return; - } - // If all the scc are trivial, FST is acyclic and the scc# gives - // the topological order. - if (all_trivial) { - queue_ = new TopOrderQueue<StateId>(scc_); - VLOG(2) << "AutoQueue: using top-order discipline"; - delete comp; - delete less; - return; - } - VLOG(2) << "AutoQueue: using SCC meta-discipline"; - queues_.resize(nscc); - for (StateId i = 0; i < nscc; ++i) { - switch(queue_types[i]) { - case TRIVIAL_QUEUE: - queues_[i] = 0; - VLOG(3) << "AutoQueue: SCC #" << i - << ": using trivial discipline"; - break; - case SHORTEST_FIRST_QUEUE: - queues_[i] = new ShortestFirstQueue<StateId, Compare, false>(*comp); - VLOG(3) << "AutoQueue: SCC #" << i << - ": using shortest-first discipline"; - break; - case LIFO_QUEUE: - queues_[i] = new LifoQueue<StateId>(); - VLOG(3) << "AutoQueue: SCC #" << i - << ": using LIFO disciplle"; - break; - case FIFO_QUEUE: - default: - queues_[i] = new FifoQueue<StateId>(); - VLOG(3) << "AutoQueue: SCC #" << i - << ": using FIFO disciplle"; - break; - } - } - queue_ = new SccQueue< StateId, QueueBase<StateId> >(scc_, &queues_); - delete comp; - delete less; - } - } - - ~AutoQueue() { - for (StateId i = 0; i < queues_.size(); ++i) - delete queues_[i]; - delete queue_; - } - - StateId Head() const { return queue_->Head(); } - - void Enqueue(StateId s) { queue_->Enqueue(s); } - - void Dequeue() { queue_->Dequeue(); } - - void Update(StateId s) { queue_->Update(s); } - - bool Empty() const { return queue_->Empty(); } - - void Clear() { queue_->Clear(); } - - - private: - QueueBase<StateId> *queue_; - vector< QueueBase<StateId>* > queues_; - vector<StateId> scc_; - - template <class Arc, class ArcFilter, class Less> - static void SccQueueType(const Fst<Arc> &fst, - const vector<StateId> &scc, - vector<QueueType> *queue_types, - ArcFilter filter, Less *less, - bool *all_trivial, bool *unweighted); - - // This allows base-class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual StateId Head_() const { return Head(); } - - virtual void Enqueue_(StateId s) { Enqueue(s); } - - virtual void Dequeue_() { Dequeue(); } - - virtual void Update_(StateId s) { Update(s); } - - virtual bool Empty_() const { return Empty(); } - - virtual void Clear_() { return Clear(); } - - DISALLOW_COPY_AND_ASSIGN(AutoQueue); -}; - - -// Examines the states in an Fst's strongly connected components and -// determines which type of queue to use per SCC. Stores result in -// vector QUEUE_TYPES, which is assumed to have length equal to the -// number of SCCs. An arc filter is used to limit the transitions -// considered (e.g., only the epsilon graph). ALL_TRIVIAL is set -// to true if every queue is the trivial queue. UNWEIGHTED is set to -// true if the semiring is idempotent and all the arc weights are equal to -// Zero() or One(). -template <class StateId> -template <class A, class ArcFilter, class Less> -void AutoQueue<StateId>::SccQueueType(const Fst<A> &fst, - const vector<StateId> &scc, - vector<QueueType> *queue_type, - ArcFilter filter, Less *less, - bool *all_trivial, bool *unweighted) { - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - *all_trivial = true; - *unweighted = true; - - for (StateId i = 0; i < queue_type->size(); ++i) - (*queue_type)[i] = TRIVIAL_QUEUE; - - for (StateIterator< Fst<Arc> > sit(fst); !sit.Done(); sit.Next()) { - StateId state = sit.Value(); - for (ArcIterator< Fst<Arc> > ait(fst, state); - !ait.Done(); - ait.Next()) { - const Arc &arc = ait.Value(); - if (!filter(arc)) continue; - if (scc[state] == scc[arc.nextstate]) { - QueueType &type = (*queue_type)[scc[state]]; - if (!less || ((*less)(arc.weight, Weight::One()))) - type = FIFO_QUEUE; - else if ((type == TRIVIAL_QUEUE) || (type == LIFO_QUEUE)) { - if (!(Weight::Properties() & kIdempotent) || - (arc.weight != Weight::Zero() && arc.weight != Weight::One())) - type = SHORTEST_FIRST_QUEUE; - else - type = LIFO_QUEUE; - } - if (type != TRIVIAL_QUEUE) *all_trivial = false; - } - if (!(Weight::Properties() & kIdempotent) || - (arc.weight != Weight::Zero() && arc.weight != Weight::One())) - *unweighted = false; - } - } -} - - -// An A* estimate is a function object that maps from a state ID to a -// an estimate of the shortest distance to the final states. -// The trivial A* estimate is always One(). -template <typename S, typename W> -struct TrivialAStarEstimate { - W operator()(S s) const { return W::One(); } -}; - - -// Given a vector that maps from states to weights representing the -// shortest distance from the initial state, a Less comparison -// function object between weights, and an estimate E of the -// shortest distance to the final states, this class defines a -// comparison function object between states. -template <typename S, typename L, typename E> -class AStarWeightCompare { - public: - typedef L Less; - typedef typename L::Weight Weight; - typedef S StateId; - - AStarWeightCompare(const vector<Weight>& weights, const L &less, - const E &estimate) - : weights_(weights), less_(less), estimate_(estimate) {} - - bool operator()(const S x, const S y) const { - Weight wx = Times(weights_[x], estimate_(x)); - Weight wy = Times(weights_[y], estimate_(y)); - return less_(wx, wy); - } - - private: - const vector<Weight>& weights_; - L less_; - const E &estimate_; -}; - - -// A* queue discipline, templated on the StateId, Weight and an -// estimate E of the shortest distance to the final states, is specialized -// to use the weight's natural order for the comparison function. -template <typename S, typename W, typename E> -class NaturalAStarQueue : - public ShortestFirstQueue<S, AStarWeightCompare<S, NaturalLess<W>, E> > { - public: - typedef AStarWeightCompare<S, NaturalLess<W>, E> C; - - NaturalAStarQueue(const vector<W> &distance, const E &estimate) : - ShortestFirstQueue<S, C>(C(distance, less_, estimate)) {} - - private: - NaturalLess<W> less_; -}; - - -// A state equivalence class is a function object that -// maps from a state ID to an equivalence class (state) ID. -// The trivial equivalence class maps a state to itself. -template <typename S> -struct TrivialStateEquivClass { - S operator()(S s) const { return s; } -}; - - -// Distance-based pruning queue discipline: Enqueues a state 's' -// only when its shortest distance (so far), as specified by -// 'distance', is less than (as specified by 'comp') the shortest -// distance Times() the 'threshold' to any state in the same -// equivalence class, as specified by the function object -// 'class_func'. The underlying queue discipline is specified by -// 'queue'. The ownership of 'queue' is given to this class. -template <typename Q, typename L, typename C> -class PruneQueue : public QueueBase<typename Q::StateId> { - public: - typedef typename Q::StateId StateId; - typedef typename L::Weight Weight; - - PruneQueue(const vector<Weight> &distance, Q *queue, L comp, - const C &class_func, Weight threshold) - : QueueBase<StateId>(OTHER_QUEUE), - distance_(distance), - queue_(queue), - less_(comp), - class_func_(class_func), - threshold_(threshold) {} - - ~PruneQueue() { delete queue_; } - - StateId Head() const { return queue_->Head(); } - - void Enqueue(StateId s) { - StateId c = class_func_(s); - if (c >= class_distance_.size()) - class_distance_.resize(c + 1, Weight::Zero()); - if (less_(distance_[s], class_distance_[c])) - class_distance_[c] = distance_[s]; - - // Enqueue only if below threshold limit - Weight limit = Times(class_distance_[c], threshold_); - if (less_(distance_[s], limit)) - queue_->Enqueue(s); - } - - void Dequeue() { queue_->Dequeue(); } - - void Update(StateId s) { - StateId c = class_func_(s); - if (less_(distance_[s], class_distance_[c])) - class_distance_[c] = distance_[s]; - queue_->Update(s); - } - - bool Empty() const { return queue_->Empty(); } - void Clear() { queue_->Clear(); } - - private: - // This allows base-class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual StateId Head_() const { return Head(); } - virtual void Enqueue_(StateId s) { Enqueue(s); } - virtual void Dequeue_() { Dequeue(); } - virtual void Update_(StateId s) { Update(s); } - virtual bool Empty_() const { return Empty(); } - virtual void Clear_() { return Clear(); } - - const vector<Weight> &distance_; // shortest distance to state - Q *queue_; - L less_; - const C &class_func_; // eqv. class function object - Weight threshold_; // pruning weight threshold - vector<Weight> class_distance_; // shortest distance to class - - DISALLOW_COPY_AND_ASSIGN(PruneQueue); -}; - - -// Pruning queue discipline (see above) using the weight's natural -// order for the comparison function. The ownership of 'queue' is -// given to this class. -template <typename Q, typename W, typename C> -class NaturalPruneQueue : - public PruneQueue<Q, NaturalLess<W>, C> { - public: - typedef typename Q::StateId StateId; - typedef W Weight; - - NaturalPruneQueue(const vector<W> &distance, Q *queue, - const C &class_func_, Weight threshold) : - PruneQueue<Q, NaturalLess<W>, C>(distance, queue, less_, - class_func_, threshold) {} - - private: - NaturalLess<W> less_; -}; - - -// Filter-based pruning queue discipline: Enqueues a state 's' only -// if allowed by the filter, specified by the function object 'state_filter'. -// The underlying queue discipline is specified by 'queue'. The ownership -// of 'queue' is given to this class. -template <typename Q, typename F> -class FilterQueue : public QueueBase<typename Q::StateId> { - public: - typedef typename Q::StateId StateId; - - FilterQueue(Q *queue, const F &state_filter) - : QueueBase<StateId>(OTHER_QUEUE), - queue_(queue), - state_filter_(state_filter) {} - - ~FilterQueue() { delete queue_; } - - StateId Head() const { return queue_->Head(); } - - // Enqueues only if allowed by state filter. - void Enqueue(StateId s) { - if (state_filter_(s)) { - queue_->Enqueue(s); - } - } - - void Dequeue() { queue_->Dequeue(); } - - void Update(StateId s) {} - bool Empty() const { return queue_->Empty(); } - void Clear() { queue_->Clear(); } - - private: - // This allows base-class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual StateId Head_() const { return Head(); } - virtual void Enqueue_(StateId s) { Enqueue(s); } - virtual void Dequeue_() { Dequeue(); } - virtual void Update_(StateId s) { Update(s); } - virtual bool Empty_() const { return Empty(); } - virtual void Clear_() { return Clear(); } - - Q *queue_; - const F &state_filter_; // Filter to prune states - - DISALLOW_COPY_AND_ASSIGN(FilterQueue); -}; - -} // namespace fst - -#endif diff --git a/kaldi_io/src/tools/openfst/include/fst/randequivalent.h b/kaldi_io/src/tools/openfst/include/fst/randequivalent.h deleted file mode 100644 index 1aaccf7..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/randequivalent.h +++ /dev/null @@ -1,135 +0,0 @@ -// randequivalent.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// Tests if two FSTS are equivalent by checking if random -// strings from one FST are transduced the same by both FSTs. - -#ifndef FST_RANDEQUIVALENT_H__ -#define FST_RANDEQUIVALENT_H__ - -#include <fst/arcsort.h> -#include <fst/compose.h> -#include <fst/project.h> -#include <fst/randgen.h> -#include <fst/shortest-distance.h> -#include <fst/vector-fst.h> - - -namespace fst { - -// Test if two FSTs are equivalent by randomly generating 'num_paths' -// paths (as specified by the RandGenOptions 'opts') in these FSTs. -// -// For each randomly generated path, the algorithm computes for each -// of the two FSTs the sum of the weights of all the successful paths -// sharing the same input and output labels as the considered randomly -// generated path and checks that these two values are within -// 'delta'. Returns optional error value (when FLAGS_error_fatal = false). -template<class Arc, class ArcSelector> -bool RandEquivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2, - ssize_t num_paths, float delta, - const RandGenOptions<ArcSelector> &opts, - bool *error = 0) { - typedef typename Arc::Weight Weight; - if (error) *error = false; - - // Check that the symbol table are compatible - if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) || - !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) { - FSTERROR() << "RandEquivalent: input/output symbol tables of 1st " - << "argument do not match input/output symbol tables of 2nd " - << "argument"; - if (error) *error = true; - return false; - } - - ILabelCompare<Arc> icomp; - OLabelCompare<Arc> ocomp; - VectorFst<Arc> sfst1(fst1); - VectorFst<Arc> sfst2(fst2); - Connect(&sfst1); - Connect(&sfst2); - ArcSort(&sfst1, icomp); - ArcSort(&sfst2, icomp); - - bool ret = true; - for (ssize_t n = 0; n < num_paths; ++n) { - VectorFst<Arc> path; - const Fst<Arc> &fst = rand() % 2 ? sfst1 : sfst2; - RandGen(fst, &path, opts); - - VectorFst<Arc> ipath(path); - VectorFst<Arc> opath(path); - Project(&ipath, PROJECT_INPUT); - Project(&opath, PROJECT_OUTPUT); - - VectorFst<Arc> cfst1, pfst1; - Compose(ipath, sfst1, &cfst1); - ArcSort(&cfst1, ocomp); - Compose(cfst1, opath, &pfst1); - // Give up if there are epsilon cycles in a non-idempotent semiring - if (!(Weight::Properties() & kIdempotent) && - pfst1.Properties(kCyclic, true)) - continue; - Weight sum1 = ShortestDistance(pfst1); - - VectorFst<Arc> cfst2, pfst2; - Compose(ipath, sfst2, &cfst2); - ArcSort(&cfst2, ocomp); - Compose(cfst2, opath, &pfst2); - // Give up if there are epsilon cycles in a non-idempotent semiring - if (!(Weight::Properties() & kIdempotent) && - pfst2.Properties(kCyclic, true)) - continue; - Weight sum2 = ShortestDistance(pfst2); - - if (!ApproxEqual(sum1, sum2, delta)) { - VLOG(1) << "Sum1 = " << sum1; - VLOG(1) << "Sum2 = " << sum2; - ret = false; - break; - } - } - - if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) { - if (error) *error = true; - return false; - } - - return ret; -} - - -// Test if two FSTs are equivalent by randomly generating 'num_paths' paths -// of length no more than 'path_length' using the seed 'seed' in these FSTs. -// Returns optional error value (when FLAGS_error_fatal = false). -template <class Arc> -bool RandEquivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2, - ssize_t num_paths, float delta = kDelta, - int seed = time(0), int path_length = INT_MAX, - bool *error = 0) { - UniformArcSelector<Arc> uniform_selector(seed); - RandGenOptions< UniformArcSelector<Arc> > - opts(uniform_selector, path_length); - return RandEquivalent(fst1, fst2, num_paths, delta, opts, error); -} - - -} // namespace fst - -#endif // FST_LIB_RANDEQUIVALENT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/randgen.h b/kaldi_io/src/tools/openfst/include/fst/randgen.h deleted file mode 100644 index 82ddffa..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/randgen.h +++ /dev/null @@ -1,712 +0,0 @@ -// randgen.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Classes and functions to generate random paths through an FST. - -#ifndef FST_LIB_RANDGEN_H__ -#define FST_LIB_RANDGEN_H__ - -#include <cmath> -#include <cstdlib> -#include <ctime> -#include <map> - -#include <fst/accumulator.h> -#include <fst/cache.h> -#include <fst/dfs-visit.h> -#include <fst/mutable-fst.h> - -namespace fst { - -// -// ARC SELECTORS - these function objects are used to select a random -// transition to take from an FST's state. They should return a number -// N s.t. 0 <= N <= NumArcs(). If N < NumArcs(), then the N-th -// transition is selected. If N == NumArcs(), then the final weight at -// that state is selected (i.e., the 'super-final' transition is selected). -// It can be assumed these will not be called unless either there -// are transitions leaving the state and/or the state is final. -// - -// Randomly selects a transition using the uniform distribution. -template <class A> -struct UniformArcSelector { - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - UniformArcSelector(int seed = time(0)) { srand(seed); } - - size_t operator()(const Fst<A> &fst, StateId s) const { - double r = rand()/(RAND_MAX + 1.0); - size_t n = fst.NumArcs(s); - if (fst.Final(s) != Weight::Zero()) - ++n; - return static_cast<size_t>(r * n); - } -}; - - -// Randomly selects a transition w.r.t. the weights treated as negative -// log probabilities after normalizing for the total weight leaving -// the state. Weight::zero transitions are disregarded. -// Assumes Weight::Value() accesses the floating point -// representation of the weight. -template <class A> -class LogProbArcSelector { - public: - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - LogProbArcSelector(int seed = time(0)) { srand(seed); } - - size_t operator()(const Fst<A> &fst, StateId s) const { - // Find total weight leaving state - double sum = 0.0; - for (ArcIterator< Fst<A> > aiter(fst, s); !aiter.Done(); - aiter.Next()) { - const A &arc = aiter.Value(); - sum += exp(-to_log_weight_(arc.weight).Value()); - } - sum += exp(-to_log_weight_(fst.Final(s)).Value()); - - double r = rand()/(RAND_MAX + 1.0); - double p = 0.0; - int n = 0; - for (ArcIterator< Fst<A> > aiter(fst, s); !aiter.Done(); - aiter.Next(), ++n) { - const A &arc = aiter.Value(); - p += exp(-to_log_weight_(arc.weight).Value()); - if (p > r * sum) return n; - } - return n; - } - - private: - WeightConvert<Weight, Log64Weight> to_log_weight_; -}; - -// Convenience definitions -typedef LogProbArcSelector<StdArc> StdArcSelector; -typedef LogProbArcSelector<LogArc> LogArcSelector; - - -// Same as LogProbArcSelector but use CacheLogAccumulator to cache -// the cummulative weight computations. -template <class A> -class FastLogProbArcSelector : public LogProbArcSelector<A> { - public: - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - using LogProbArcSelector<A>::operator(); - - FastLogProbArcSelector(int seed = time(0)) - : LogProbArcSelector<A>(seed), - seed_(seed) {} - - size_t operator()(const Fst<A> &fst, StateId s, - CacheLogAccumulator<A> *accumulator) const { - accumulator->SetState(s); - ArcIterator< Fst<A> > aiter(fst, s); - // Find total weight leaving state - double sum = to_log_weight_(accumulator->Sum(fst.Final(s), &aiter, 0, - fst.NumArcs(s))).Value(); - double r = -log(rand()/(RAND_MAX + 1.0)); - return accumulator->LowerBound(r + sum, &aiter); - } - - int Seed() const { return seed_; } - private: - int seed_; - WeightConvert<Weight, Log64Weight> to_log_weight_; -}; - -// Random path state info maintained by RandGenFst and passed to samplers. -template <typename A> -struct RandState { - typedef typename A::StateId StateId; - - StateId state_id; // current input FST state - size_t nsamples; // # of samples to be sampled at this state - size_t length; // length of path to this random state - size_t select; // previous sample arc selection - const RandState<A> *parent; // previous random state on this path - - RandState(StateId s, size_t n, size_t l, size_t k, const RandState<A> *p) - : state_id(s), nsamples(n), length(l), select(k), parent(p) {} - - RandState() - : state_id(kNoStateId), nsamples(0), length(0), select(0), parent(0) {} -}; - -// This class, given an arc selector, samples, with raplacement, -// multiple random transitions from an FST's state. This is a generic -// version with a straight-forward use of the arc selector. -// Specializations may be defined for arc selectors for greater -// efficiency or special behavior. -template <class A, class S> -class ArcSampler { - public: - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - // The 'max_length' may be interpreted (including ignored) by a - // sampler as it chooses. This generic version interprets this literally. - ArcSampler(const Fst<A> &fst, const S &arc_selector, - int max_length = INT_MAX) - : fst_(fst), - arc_selector_(arc_selector), - max_length_(max_length) {} - - // Allow updating Fst argument; pass only if changed. - ArcSampler(const ArcSampler<A, S> &sampler, const Fst<A> *fst = 0) - : fst_(fst ? *fst : sampler.fst_), - arc_selector_(sampler.arc_selector_), - max_length_(sampler.max_length_) { - Reset(); - } - - // Samples 'rstate.nsamples' from state 'state_id'. The 'rstate.length' is - // the length of the path to 'rstate'. Returns true if samples were - // collected. No samples may be collected if either there are no (including - // 'super-final') transitions leaving that state or if the - // 'max_length' has been deemed reached. Use the iterator members to - // read the samples. The samples will be in their original order. - bool Sample(const RandState<A> &rstate) { - sample_map_.clear(); - if ((fst_.NumArcs(rstate.state_id) == 0 && - fst_.Final(rstate.state_id) == Weight::Zero()) || - rstate.length == max_length_) { - Reset(); - return false; - } - - for (size_t i = 0; i < rstate.nsamples; ++i) - ++sample_map_[arc_selector_(fst_, rstate.state_id)]; - Reset(); - return true; - } - - // More samples? - bool Done() const { return sample_iter_ == sample_map_.end(); } - - // Gets the next sample. - void Next() { ++sample_iter_; } - - // Returns a pair (N, K) where 0 <= N <= NumArcs(s) and 0 < K <= nsamples. - // If N < NumArcs(s), then the N-th transition is specified. - // If N == NumArcs(s), then the final weight at that state is - // specified (i.e., the 'super-final' transition is specified). - // For the specified transition, K repetitions have been sampled. - pair<size_t, size_t> Value() const { return *sample_iter_; } - - void Reset() { sample_iter_ = sample_map_.begin(); } - - bool Error() const { return false; } - - private: - const Fst<A> &fst_; - const S &arc_selector_; - int max_length_; - - // Stores (N, K) as described for Value(). - map<size_t, size_t> sample_map_; - map<size_t, size_t>::const_iterator sample_iter_; - - // disallow - ArcSampler<A, S> & operator=(const ArcSampler<A, S> &s); -}; - - -// Specialization for FastLogProbArcSelector. -template <class A> -class ArcSampler<A, FastLogProbArcSelector<A> > { - public: - typedef FastLogProbArcSelector<A> S; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - typedef CacheLogAccumulator<A> C; - - ArcSampler(const Fst<A> &fst, const S &arc_selector, int max_length = INT_MAX) - : fst_(fst), - arc_selector_(arc_selector), - max_length_(max_length), - accumulator_(new C()) { - accumulator_->Init(fst); - } - - ArcSampler(const ArcSampler<A, S> &sampler, const Fst<A> *fst = 0) - : fst_(fst ? *fst : sampler.fst_), - arc_selector_(sampler.arc_selector_), - max_length_(sampler.max_length_) { - if (fst) { - accumulator_ = new C(); - accumulator_->Init(*fst); - } else { // shallow copy - accumulator_ = new C(*sampler.accumulator_); - } - } - - ~ArcSampler() { - delete accumulator_; - } - - bool Sample(const RandState<A> &rstate) { - sample_map_.clear(); - if ((fst_.NumArcs(rstate.state_id) == 0 && - fst_.Final(rstate.state_id) == Weight::Zero()) || - rstate.length == max_length_) { - Reset(); - return false; - } - - for (size_t i = 0; i < rstate.nsamples; ++i) - ++sample_map_[arc_selector_(fst_, rstate.state_id, accumulator_)]; - Reset(); - return true; - } - - bool Done() const { return sample_iter_ == sample_map_.end(); } - void Next() { ++sample_iter_; } - pair<size_t, size_t> Value() const { return *sample_iter_; } - void Reset() { sample_iter_ = sample_map_.begin(); } - - bool Error() const { return accumulator_->Error(); } - - private: - const Fst<A> &fst_; - const S &arc_selector_; - int max_length_; - - // Stores (N, K) as described for Value(). - map<size_t, size_t> sample_map_; - map<size_t, size_t>::const_iterator sample_iter_; - C *accumulator_; - - // disallow - ArcSampler<A, S> & operator=(const ArcSampler<A, S> &s); -}; - - -// Options for random path generation with RandGenFst. The template argument -// is an arc sampler, typically class 'ArcSampler' above. Ownership of -// the sampler is taken by RandGenFst. -template <class S> -struct RandGenFstOptions : public CacheOptions { - S *arc_sampler; // How to sample transitions at a state - size_t npath; // # of paths to generate - bool weighted; // Output tree weighted by path count; o.w. - // output unweighted DAG - bool remove_total_weight; // Remove total weight when output is weighted. - - RandGenFstOptions(const CacheOptions &copts, S *samp, - size_t n = 1, bool w = true, bool rw = false) - : CacheOptions(copts), - arc_sampler(samp), - npath(n), - weighted(w), - remove_total_weight(rw) {} -}; - - -// Implementation of RandGenFst. -template <class A, class B, class S> -class RandGenFstImpl : public CacheImpl<B> { - public: - using FstImpl<B>::SetType; - using FstImpl<B>::SetProperties; - using FstImpl<B>::SetInputSymbols; - using FstImpl<B>::SetOutputSymbols; - - using CacheBaseImpl< CacheState<B> >::AddArc; - using CacheBaseImpl< CacheState<B> >::HasArcs; - using CacheBaseImpl< CacheState<B> >::HasFinal; - using CacheBaseImpl< CacheState<B> >::HasStart; - using CacheBaseImpl< CacheState<B> >::SetArcs; - using CacheBaseImpl< CacheState<B> >::SetFinal; - using CacheBaseImpl< CacheState<B> >::SetStart; - - typedef B Arc; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - RandGenFstImpl(const Fst<A> &fst, const RandGenFstOptions<S> &opts) - : CacheImpl<B>(opts), - fst_(fst.Copy()), - arc_sampler_(opts.arc_sampler), - npath_(opts.npath), - weighted_(opts.weighted), - remove_total_weight_(opts.remove_total_weight), - superfinal_(kNoLabel) { - SetType("randgen"); - - uint64 props = fst.Properties(kFstProperties, false); - SetProperties(RandGenProperties(props, weighted_), kCopyProperties); - - SetInputSymbols(fst.InputSymbols()); - SetOutputSymbols(fst.OutputSymbols()); - } - - RandGenFstImpl(const RandGenFstImpl &impl) - : CacheImpl<B>(impl), - fst_(impl.fst_->Copy(true)), - arc_sampler_(new S(*impl.arc_sampler_, fst_)), - npath_(impl.npath_), - weighted_(impl.weighted_), - superfinal_(kNoLabel) { - SetType("randgen"); - SetProperties(impl.Properties(), kCopyProperties); - SetInputSymbols(impl.InputSymbols()); - SetOutputSymbols(impl.OutputSymbols()); - } - - ~RandGenFstImpl() { - for (int i = 0; i < state_table_.size(); ++i) - delete state_table_[i]; - delete fst_; - delete arc_sampler_; - } - - StateId Start() { - if (!HasStart()) { - StateId s = fst_->Start(); - if (s == kNoStateId) - return kNoStateId; - StateId start = state_table_.size(); - SetStart(start); - RandState<A> *rstate = new RandState<A>(s, npath_, 0, 0, 0); - state_table_.push_back(rstate); - } - return CacheImpl<B>::Start(); - } - - Weight Final(StateId s) { - if (!HasFinal(s)) { - Expand(s); - } - return CacheImpl<B>::Final(s); - } - - size_t NumArcs(StateId s) { - if (!HasArcs(s)) { - Expand(s); - } - return CacheImpl<B>::NumArcs(s); - } - - size_t NumInputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<B>::NumInputEpsilons(s); - } - - size_t NumOutputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<B>::NumOutputEpsilons(s); - } - - uint64 Properties() const { return Properties(kFstProperties); } - - // Set error if found; return FST impl properties. - uint64 Properties(uint64 mask) const { - if ((mask & kError) && - (fst_->Properties(kError, false) || arc_sampler_->Error())) { - SetProperties(kError, kError); - } - return FstImpl<Arc>::Properties(mask); - } - - void InitArcIterator(StateId s, ArcIteratorData<B> *data) { - if (!HasArcs(s)) - Expand(s); - CacheImpl<B>::InitArcIterator(s, data); - } - - // Computes the outgoing transitions from a state, creating new destination - // states as needed. - void Expand(StateId s) { - if (s == superfinal_) { - SetFinal(s, Weight::One()); - SetArcs(s); - return; - } - - SetFinal(s, Weight::Zero()); - const RandState<A> &rstate = *state_table_[s]; - arc_sampler_->Sample(rstate); - ArcIterator< Fst<A> > aiter(*fst_, rstate.state_id); - size_t narcs = fst_->NumArcs(rstate.state_id); - for (;!arc_sampler_->Done(); arc_sampler_->Next()) { - const pair<size_t, size_t> &sample_pair = arc_sampler_->Value(); - size_t pos = sample_pair.first; - size_t count = sample_pair.second; - double prob = static_cast<double>(count)/rstate.nsamples; - if (pos < narcs) { // regular transition - aiter.Seek(sample_pair.first); - const A &aarc = aiter.Value(); - Weight weight = weighted_ ? to_weight_(-log(prob)) : Weight::One(); - B barc(aarc.ilabel, aarc.olabel, weight, state_table_.size()); - AddArc(s, barc); - RandState<A> *nrstate = - new RandState<A>(aarc.nextstate, count, rstate.length + 1, - pos, &rstate); - state_table_.push_back(nrstate); - } else { // super-final transition - if (weighted_) { - Weight weight = remove_total_weight_ ? - to_weight_(-log(prob)) : to_weight_(-log(prob * npath_)); - SetFinal(s, weight); - } else { - if (superfinal_ == kNoLabel) { - superfinal_ = state_table_.size(); - RandState<A> *nrstate = new RandState<A>(kNoStateId, 0, 0, 0, 0); - state_table_.push_back(nrstate); - } - for (size_t n = 0; n < count; ++n) { - B barc(0, 0, Weight::One(), superfinal_); - AddArc(s, barc); - } - } - } - } - SetArcs(s); - } - - private: - Fst<A> *fst_; - S *arc_sampler_; - size_t npath_; - vector<RandState<A> *> state_table_; - bool weighted_; - bool remove_total_weight_; - StateId superfinal_; - WeightConvert<Log64Weight, Weight> to_weight_; - - void operator=(const RandGenFstImpl<A, B, S> &); // disallow -}; - - -// Fst class to randomly generate paths through an FST; details controlled -// by RandGenOptionsFst. Output format is a tree weighted by the -// path count. -template <class A, class B, class S> -class RandGenFst : public ImplToFst< RandGenFstImpl<A, B, S> > { - public: - friend class ArcIterator< RandGenFst<A, B, S> >; - friend class StateIterator< RandGenFst<A, B, S> >; - typedef B Arc; - typedef S Sampler; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef CacheState<B> State; - typedef RandGenFstImpl<A, B, S> Impl; - - RandGenFst(const Fst<A> &fst, const RandGenFstOptions<S> &opts) - : ImplToFst<Impl>(new Impl(fst, opts)) {} - - // See Fst<>::Copy() for doc. - RandGenFst(const RandGenFst<A, B, S> &fst, bool safe = false) - : ImplToFst<Impl>(fst, safe) {} - - // Get a copy of this RandGenFst. See Fst<>::Copy() for further doc. - virtual RandGenFst<A, B, S> *Copy(bool safe = false) const { - return new RandGenFst<A, B, S>(*this, safe); - } - - virtual inline void InitStateIterator(StateIteratorData<B> *data) const; - - virtual void InitArcIterator(StateId s, ArcIteratorData<B> *data) const { - GetImpl()->InitArcIterator(s, data); - } - - private: - // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } - - void operator=(const RandGenFst<A, B, S> &fst); // Disallow -}; - - - -// Specialization for RandGenFst. -template <class A, class B, class S> -class StateIterator< RandGenFst<A, B, S> > - : public CacheStateIterator< RandGenFst<A, B, S> > { - public: - explicit StateIterator(const RandGenFst<A, B, S> &fst) - : CacheStateIterator< RandGenFst<A, B, S> >(fst, fst.GetImpl()) {} - - private: - DISALLOW_COPY_AND_ASSIGN(StateIterator); -}; - - -// Specialization for RandGenFst. -template <class A, class B, class S> -class ArcIterator< RandGenFst<A, B, S> > - : public CacheArcIterator< RandGenFst<A, B, S> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const RandGenFst<A, B, S> &fst, StateId s) - : CacheArcIterator< RandGenFst<A, B, S> >(fst.GetImpl(), s) { - if (!fst.GetImpl()->HasArcs(s)) - fst.GetImpl()->Expand(s); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - - -template <class A, class B, class S> inline -void RandGenFst<A, B, S>::InitStateIterator(StateIteratorData<B> *data) const -{ - data->base = new StateIterator< RandGenFst<A, B, S> >(*this); -} - -// Options for random path generation. -template <class S> -struct RandGenOptions { - const S &arc_selector; // How an arc is selected at a state - int max_length; // Maximum path length - size_t npath; // # of paths to generate - bool weighted; // Output is tree weighted by path count; o.w. - // output unweighted union of paths. - bool remove_total_weight; // Remove total weight when output is weighted. - - RandGenOptions(const S &sel, int len = INT_MAX, size_t n = 1, - bool w = false, bool rw = false) - : arc_selector(sel), - max_length(len), - npath(n), - weighted(w), - remove_total_weight(rw) {} -}; - - -template <class IArc, class OArc> -class RandGenVisitor { - public: - typedef typename IArc::Weight Weight; - typedef typename IArc::StateId StateId; - - RandGenVisitor(MutableFst<OArc> *ofst) : ofst_(ofst) {} - - void InitVisit(const Fst<IArc> &ifst) { - ifst_ = &ifst; - - ofst_->DeleteStates(); - ofst_->SetInputSymbols(ifst.InputSymbols()); - ofst_->SetOutputSymbols(ifst.OutputSymbols()); - if (ifst.Properties(kError, false)) - ofst_->SetProperties(kError, kError); - path_.clear(); - } - - bool InitState(StateId s, StateId root) { return true; } - - bool TreeArc(StateId s, const IArc &arc) { - if (ifst_->Final(arc.nextstate) == Weight::Zero()) { - path_.push_back(arc); - } else { - OutputPath(); - } - return true; - } - - bool BackArc(StateId s, const IArc &arc) { - FSTERROR() << "RandGenVisitor: cyclic input"; - ofst_->SetProperties(kError, kError); - return false; - } - - bool ForwardOrCrossArc(StateId s, const IArc &arc) { - OutputPath(); - return true; - } - - void FinishState(StateId s, StateId p, const IArc *) { - if (p != kNoStateId && ifst_->Final(s) == Weight::Zero()) - path_.pop_back(); - } - - void FinishVisit() {} - - private: - void OutputPath() { - if (ofst_->Start() == kNoStateId) { - StateId start = ofst_->AddState(); - ofst_->SetStart(start); - } - - StateId src = ofst_->Start(); - for (size_t i = 0; i < path_.size(); ++i) { - StateId dest = ofst_->AddState(); - OArc arc(path_[i].ilabel, path_[i].olabel, Weight::One(), dest); - ofst_->AddArc(src, arc); - src = dest; - } - ofst_->SetFinal(src, Weight::One()); - } - - const Fst<IArc> *ifst_; - MutableFst<OArc> *ofst_; - vector<OArc> path_; - - DISALLOW_COPY_AND_ASSIGN(RandGenVisitor); -}; - - -// Randomly generate paths through an FST; details controlled by -// RandGenOptions. -template<class IArc, class OArc, class Selector> -void RandGen(const Fst<IArc> &ifst, MutableFst<OArc> *ofst, - const RandGenOptions<Selector> &opts) { - typedef ArcSampler<IArc, Selector> Sampler; - typedef RandGenFst<IArc, OArc, Sampler> RandFst; - typedef typename OArc::StateId StateId; - typedef typename OArc::Weight Weight; - - Sampler* arc_sampler = new Sampler(ifst, opts.arc_selector, opts.max_length); - RandGenFstOptions<Sampler> fopts(CacheOptions(true, 0), arc_sampler, - opts.npath, opts.weighted, - opts.remove_total_weight); - RandFst rfst(ifst, fopts); - if (opts.weighted) { - *ofst = rfst; - } else { - RandGenVisitor<IArc, OArc> rand_visitor(ofst); - DfsVisit(rfst, &rand_visitor); - } -} - -// Randomly generate a path through an FST with the uniform distribution -// over the transitions. -template<class IArc, class OArc> -void RandGen(const Fst<IArc> &ifst, MutableFst<OArc> *ofst) { - UniformArcSelector<IArc> uniform_selector; - RandGenOptions< UniformArcSelector<IArc> > opts(uniform_selector); - RandGen(ifst, ofst, opts); -} - -} // namespace fst - -#endif // FST_LIB_RANDGEN_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/random-weight.h b/kaldi_io/src/tools/openfst/include/fst/random-weight.h deleted file mode 100644 index 0ccd95d..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/random-weight.h +++ /dev/null @@ -1,348 +0,0 @@ -// random-weight.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Function objects to generate random weights in various semirings -// for testing purposes. - -#ifndef FST_LIB_RANDOM_WEIGHT_H__ -#define FST_LIB_RANDOM_WEIGHT_H__ - -#include <cstdlib> -#include <ctime> -#include <vector> -using std::vector; - - -#include <fst/float-weight.h> -#include <fst/product-weight.h> -#include <fst/string-weight.h> -#include <fst/lexicographic-weight.h> -#include <fst/power-weight.h> -#include <fst/signed-log-weight.h> -#include <fst/sparse-power-weight.h> - - -namespace fst { - -// The boolean 'allow_zero' below determines whether Zero() and zero -// divisors should be returned in the random weight generation. - -// This function object returns TropicalWeightTpl<T>'s that are random integers -// chosen from [0, kNumRandomWeights). -template <class T> -class TropicalWeightGenerator_ { - public: - typedef TropicalWeightTpl<T> Weight; - - TropicalWeightGenerator_(int seed = time(0), bool allow_zero = true) - : allow_zero_(allow_zero) { - srand(seed); - } - - Weight operator() () const { - int n = rand() % (kNumRandomWeights + allow_zero_); - if (allow_zero_ && n == kNumRandomWeights) - return Weight::Zero(); - - return Weight(static_cast<T>(n)); - } - - private: - // The number of alternative random weights. - static const int kNumRandomWeights = 5; - - bool allow_zero_; // permit Zero() and zero divisors -}; - -template <class T> const int TropicalWeightGenerator_<T>::kNumRandomWeights; - -typedef TropicalWeightGenerator_<float> TropicalWeightGenerator; - - -// This function object returns LogWeightTpl<T>'s that are random integers -// chosen from [0, kNumRandomWeights). -template <class T> -class LogWeightGenerator_ { - public: - typedef LogWeightTpl<T> Weight; - - LogWeightGenerator_(int seed = time(0), bool allow_zero = true) - : allow_zero_(allow_zero) { - srand(seed); - } - - Weight operator() () const { - int n = rand() % (kNumRandomWeights + allow_zero_); - if (allow_zero_ && n == kNumRandomWeights) - return Weight::Zero(); - - return Weight(static_cast<T>(n)); - } - - private: - // Number of alternative random weights. - static const int kNumRandomWeights = 5; - - bool allow_zero_; // permit Zero() and zero divisors -}; - -template <class T> const int LogWeightGenerator_<T>::kNumRandomWeights; - -typedef LogWeightGenerator_<float> LogWeightGenerator; - - -// This function object returns MinMaxWeightTpl<T>'s that are random integers -// chosen from (-kNumRandomWeights, kNumRandomWeights) in addition to -// One(), and Zero() if zero is allowed. -template <class T> -class MinMaxWeightGenerator_ { - public: - typedef MinMaxWeightTpl<T> Weight; - - MinMaxWeightGenerator_(int seed = time(0), bool allow_zero = true) - : allow_zero_(allow_zero) { - srand(seed); - } - - Weight operator() () const { - int n = (rand() % (2*kNumRandomWeights + allow_zero_)) - kNumRandomWeights; - if (allow_zero_ && n == kNumRandomWeights) - return Weight::Zero(); - else if (n == -kNumRandomWeights) - return Weight::One(); - - return Weight(static_cast<T>(n)); - } - - private: - // Parameters controlling the number of alternative random weights. - static const int kNumRandomWeights = 5; - - bool allow_zero_; // permit Zero() and zero divisors -}; - -template <class T> const int MinMaxWeightGenerator_<T>::kNumRandomWeights; - -typedef MinMaxWeightGenerator_<float> MinMaxWeightGenerator; - - -// This function object returns StringWeights that are random integer -// strings chosen from {1,...,kAlphabetSize}^{0,kMaxStringLength} U { Zero } -template <typename L, StringType S = STRING_LEFT> -class StringWeightGenerator { - public: - typedef StringWeight<L, S> Weight; - - StringWeightGenerator(int seed = time(0), bool allow_zero = true) - : allow_zero_(allow_zero) { - srand(seed); - } - - Weight operator() () const { - int n = rand() % (kMaxStringLength + allow_zero_); - if (allow_zero_ && n == kMaxStringLength) - return Weight::Zero(); - - vector<L> v; - for (int i = 0; i < n; ++i) - v.push_back(rand() % kAlphabetSize + 1); - return Weight(v.begin(), v.end()); - } - - private: - // Alphabet size for random weights. - static const int kAlphabetSize = 5; - // Number of alternative random weights. - static const int kMaxStringLength = 5; - - bool allow_zero_; // permit Zero() and zero -}; - -template <typename L, StringType S> -const int StringWeightGenerator<L, S>::kAlphabetSize; -template <typename L, StringType S> -const int StringWeightGenerator<L, S>::kMaxStringLength; - - -// This function object returns a weight generator over the product of the -// weights (by default) for the generators G1 and G2. -template <class G1, class G2, - class W = ProductWeight<typename G1::Weight, typename G2::Weight> > -class ProductWeightGenerator { - public: - typedef typename G1::Weight W1; - typedef typename G2::Weight W2; - typedef W Weight; - - ProductWeightGenerator(int seed = time(0), bool allow_zero = true) - : generator1_(seed, allow_zero), generator2_(seed, allow_zero) {} - - Weight operator() () const { - W1 w1 = generator1_(); - W2 w2 = generator2_(); - return Weight(w1, w2); - } - - private: - G1 generator1_; - G2 generator2_; -}; - - -// This function object returns a weight generator for a lexicographic weight -// composed out of weights for the generators G1 and G2. For lexicographic -// weights, we cannot generate zeroes for the two subweights separately: -// weights are members iff both members are zero or both members are non-zero. -template <class G1, class G2> -class LexicographicWeightGenerator { - public: - typedef typename G1::Weight W1; - typedef typename G2::Weight W2; - typedef LexicographicWeight<W1, W2> Weight; - - LexicographicWeightGenerator(int seed = time(0), bool allow_zero = true) - : generator1_(seed, false), generator2_(seed, false), - allow_zero_(allow_zero) {} - - Weight operator() () const { - if (allow_zero_) { - int n = rand() % (kNumRandomWeights + allow_zero_); - if (n == kNumRandomWeights) - return Weight(W1::Zero(), W2::Zero()); - } - W1 w1 = generator1_(); - W2 w2 = generator2_(); - return Weight(w1, w2); - } - - private: - G1 generator1_; - G2 generator2_; - static const int kNumRandomWeights = 5; - bool allow_zero_; -}; - -template <class G1, class G2> -const int LexicographicWeightGenerator<G1, G2>::kNumRandomWeights; - - -// Product generator of a string weight generator and an -// arbitrary weight generator. -template <class L, class G, StringType S = STRING_LEFT> -class GallicWeightGenerator - : public ProductWeightGenerator<StringWeightGenerator<L, S>, G> { - - public: - typedef ProductWeightGenerator<StringWeightGenerator<L, S>, G> PG; - typedef typename G::Weight W; - typedef GallicWeight<L, W, S> Weight; - - GallicWeightGenerator(int seed = time(0), bool allow_zero = true) - : PG(seed, allow_zero) {} - - GallicWeightGenerator(const PG &pg) : PG(pg) {} -}; - -// This function object returms a weight generator over the catersian power -// of rank n of the weights for the generator G. -template <class G, unsigned int n> -class PowerWeightGenerator { - public: - typedef typename G::Weight W; - typedef PowerWeight<W, n> Weight; - - PowerWeightGenerator(int seed = time(0), bool allow_zero = true) - : generator_(seed, allow_zero) {} - - Weight operator()() const { - Weight w; - for (size_t i = 0; i < n; ++i) { - W r = generator_(); - w.SetValue(i, r); - } - return w; - } - - private: - G generator_; -}; - -// This function object returns SignedLogWeightTpl<T>'s that are -// random integers chosen from [0, kNumRandomWeights). -// The sign is randomly chosen as well. -template <class T> -class SignedLogWeightGenerator_ { - public: - typedef SignedLogWeightTpl<T> Weight; - - SignedLogWeightGenerator_(int seed = time(0), bool allow_zero = true) - : allow_zero_(allow_zero) { - srand(seed); - } - - Weight operator() () const { - int m = rand() % 2; - int n = rand() % (kNumRandomWeights + allow_zero_); - - return SignedLogWeightTpl<T>( - (m == 0) ? - TropicalWeight(-1.0) : - TropicalWeight(1.0), - (allow_zero_ && n == kNumRandomWeights) ? - LogWeightTpl<T>::Zero() : - LogWeightTpl<T>(static_cast<T>(n))); - } - - private: - // Number of alternative random weights. - static const int kNumRandomWeights = 5; - bool allow_zero_; // permit Zero() and zero divisors -}; - -template <class T> const int SignedLogWeightGenerator_<T>::kNumRandomWeights; - -typedef SignedLogWeightGenerator_<float> SignedLogWeightGenerator; - -// This function object returms a weight generator over the catersian power -// of rank n of the weights for the generator G. -template <class G, class K, unsigned int n> -class SparsePowerWeightGenerator { - public: - typedef typename G::Weight W; - typedef SparsePowerWeight<W, K> Weight; - - SparsePowerWeightGenerator(int seed = time(0), bool allow_zero = true) - : generator_(seed, allow_zero) {} - - Weight operator()() const { - Weight w; - for (size_t i = 1; i <= n; ++i) { - W r = generator_(); - K p = i; - w.Push(p, r, true); - } - return w; - } - - private: - G generator_; -}; - -} // namespace fst - -#endif // FST_LIB_RANDOM_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/rational.h b/kaldi_io/src/tools/openfst/include/fst/rational.h deleted file mode 100644 index 96aa00d..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/rational.h +++ /dev/null @@ -1,330 +0,0 @@ -// rational.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// An Fst implementation and base interface for delayed unions, -// concatenations and closures. - -#ifndef FST_LIB_RATIONAL_H__ -#define FST_LIB_RATIONAL_H__ - -#include <algorithm> -#include <string> -#include <vector> -using std::vector; - -#include <fst/mutable-fst.h> -#include <fst/replace.h> -#include <fst/test-properties.h> - - -namespace fst { - -typedef CacheOptions RationalFstOptions; - -// This specifies whether to add the empty string. -enum ClosureType { CLOSURE_STAR = 0, // T* -> add the empty string - CLOSURE_PLUS = 1 }; // T+ -> don't add the empty string - -template <class A> class RationalFst; -template <class A> void Union(RationalFst<A> *fst1, const Fst<A> &fst2); -template <class A> void Concat(RationalFst<A> *fst1, const Fst<A> &fst2); -template <class A> void Concat(const Fst<A> &fst1, RationalFst<A> *fst2); -template <class A> void Closure(RationalFst<A> *fst, ClosureType closure_type); - - -// Implementation class for delayed unions, concatenations and closures. -template<class A> -class RationalFstImpl : public FstImpl<A> { - public: - using FstImpl<A>::SetType; - using FstImpl<A>::SetProperties; - using FstImpl<A>::WriteHeader; - using FstImpl<A>::SetInputSymbols; - using FstImpl<A>::SetOutputSymbols; - - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef typename A::Label Label; - - explicit RationalFstImpl(const RationalFstOptions &opts) - : nonterminals_(0), - replace_(0), - replace_options_(opts, 0) { - SetType("rational"); - fst_tuples_.push_back(pair<Label, const Fst<A>*>(0, 0)); - } - - RationalFstImpl(const RationalFstImpl<A> &impl) - : rfst_(impl.rfst_), - nonterminals_(impl.nonterminals_), - - replace_(impl.replace_ ? impl.replace_->Copy(true) : 0), - replace_options_(impl.replace_options_) { - SetType("rational"); - fst_tuples_.reserve(impl.fst_tuples_.size()); - for (size_t i = 0; i < impl.fst_tuples_.size(); ++i) - fst_tuples_.push_back(make_pair(impl.fst_tuples_[i].first, - impl.fst_tuples_[i].second - ? impl.fst_tuples_[i].second->Copy(true) - : 0)); - } - - virtual ~RationalFstImpl() { - for (size_t i = 0; i < fst_tuples_.size(); ++i) - if (fst_tuples_[i].second) - delete fst_tuples_[i].second; - if (replace_) - delete replace_; - } - - StateId Start() { return Replace()->Start(); } - - Weight Final(StateId s) { return Replace()->Final(s); } - - size_t NumArcs(StateId s) { return Replace()->NumArcs(s); } - - size_t NumInputEpsilons(StateId s) { - return Replace()->NumInputEpsilons(s); - } - - size_t NumOutputEpsilons(StateId s) { - return Replace()->NumOutputEpsilons(s); - } - - uint64 Properties() const { return Properties(kFstProperties); } - - // Set error if found; return FST impl properties. - uint64 Properties(uint64 mask) const { - if ((mask & kError) && Replace()->Properties(kError, false)) - SetProperties(kError, kError); - return FstImpl<Arc>::Properties(mask); - } - - // Implementation of UnionFst(fst1,fst2) - void InitUnion(const Fst<A> &fst1, const Fst<A> &fst2) { - if (replace_) - delete replace_; - uint64 props1 = fst1.Properties(kFstProperties, false); - uint64 props2 = fst2.Properties(kFstProperties, false); - SetInputSymbols(fst1.InputSymbols()); - SetOutputSymbols(fst1.OutputSymbols()); - rfst_.AddState(); - rfst_.AddState(); - rfst_.SetStart(0); - rfst_.SetFinal(1, Weight::One()); - rfst_.SetInputSymbols(fst1.InputSymbols()); - rfst_.SetOutputSymbols(fst1.OutputSymbols()); - nonterminals_ = 2; - rfst_.AddArc(0, A(0, -1, Weight::One(), 1)); - rfst_.AddArc(0, A(0, -2, Weight::One(), 1)); - fst_tuples_.push_back(make_pair(-1, fst1.Copy())); - fst_tuples_.push_back(make_pair(-2, fst2.Copy())); - SetProperties(UnionProperties(props1, props2, true), kCopyProperties); - } - - // Implementation of ConcatFst(fst1,fst2) - void InitConcat(const Fst<A> &fst1, const Fst<A> &fst2) { - if (replace_) - delete replace_; - uint64 props1 = fst1.Properties(kFstProperties, false); - uint64 props2 = fst2.Properties(kFstProperties, false); - SetInputSymbols(fst1.InputSymbols()); - SetOutputSymbols(fst1.OutputSymbols()); - rfst_.AddState(); - rfst_.AddState(); - rfst_.AddState(); - rfst_.SetStart(0); - rfst_.SetFinal(2, Weight::One()); - rfst_.SetInputSymbols(fst1.InputSymbols()); - rfst_.SetOutputSymbols(fst1.OutputSymbols()); - nonterminals_ = 2; - rfst_.AddArc(0, A(0, -1, Weight::One(), 1)); - rfst_.AddArc(1, A(0, -2, Weight::One(), 2)); - fst_tuples_.push_back(make_pair(-1, fst1.Copy())); - fst_tuples_.push_back(make_pair(-2, fst2.Copy())); - SetProperties(ConcatProperties(props1, props2, true), kCopyProperties); - } - - // Implementation of ClosureFst(fst, closure_type) - void InitClosure(const Fst<A> &fst, ClosureType closure_type) { - if (replace_) - delete replace_; - uint64 props = fst.Properties(kFstProperties, false); - SetInputSymbols(fst.InputSymbols()); - SetOutputSymbols(fst.OutputSymbols()); - if (closure_type == CLOSURE_STAR) { - rfst_.AddState(); - rfst_.SetStart(0); - rfst_.SetFinal(0, Weight::One()); - rfst_.AddArc(0, A(0, -1, Weight::One(), 0)); - } else { - rfst_.AddState(); - rfst_.AddState(); - rfst_.SetStart(0); - rfst_.SetFinal(1, Weight::One()); - rfst_.AddArc(0, A(0, -1, Weight::One(), 1)); - rfst_.AddArc(1, A(0, 0, Weight::One(), 0)); - } - rfst_.SetInputSymbols(fst.InputSymbols()); - rfst_.SetOutputSymbols(fst.OutputSymbols()); - fst_tuples_.push_back(make_pair(-1, fst.Copy())); - nonterminals_ = 1; - SetProperties(ClosureProperties(props, closure_type == CLOSURE_STAR, true), - kCopyProperties); - } - - // Implementation of Union(Fst &, RationalFst *) - void AddUnion(const Fst<A> &fst) { - if (replace_) - delete replace_; - uint64 props1 = FstImpl<A>::Properties(); - uint64 props2 = fst.Properties(kFstProperties, false); - VectorFst<A> afst; - afst.AddState(); - afst.AddState(); - afst.SetStart(0); - afst.SetFinal(1, Weight::One()); - ++nonterminals_; - afst.AddArc(0, A(0, -nonterminals_, Weight::One(), 1)); - Union(&rfst_, afst); - fst_tuples_.push_back(make_pair(-nonterminals_, fst.Copy())); - SetProperties(UnionProperties(props1, props2, true), kCopyProperties); - } - - // Implementation of Concat(Fst &, RationalFst *) - void AddConcat(const Fst<A> &fst, bool append) { - if (replace_) - delete replace_; - uint64 props1 = FstImpl<A>::Properties(); - uint64 props2 = fst.Properties(kFstProperties, false); - VectorFst<A> afst; - afst.AddState(); - afst.AddState(); - afst.SetStart(0); - afst.SetFinal(1, Weight::One()); - ++nonterminals_; - afst.AddArc(0, A(0, -nonterminals_, Weight::One(), 1)); - if (append) - Concat(&rfst_, afst); - else - Concat(afst, &rfst_); - fst_tuples_.push_back(make_pair(-nonterminals_, fst.Copy())); - SetProperties(ConcatProperties(props1, props2, true), kCopyProperties); - } - - // Implementation of Closure(RationalFst *, closure_type) - void AddClosure(ClosureType closure_type) { - if (replace_) - delete replace_; - uint64 props = FstImpl<A>::Properties(); - Closure(&rfst_, closure_type); - SetProperties(ClosureProperties(props, closure_type == CLOSURE_STAR, true), - kCopyProperties); - } - - // Returns the underlying ReplaceFst. - ReplaceFst<A> *Replace() const { - if (!replace_) { - fst_tuples_[0].second = rfst_.Copy(); - replace_ = new ReplaceFst<A>(fst_tuples_, replace_options_); - } - return replace_; - } - - private: - VectorFst<A> rfst_; // rational topology machine; uses neg. nonterminals - Label nonterminals_; // # of nonterminals used - // Contains the nonterminals and their corresponding FSTs. - mutable vector<pair<Label, const Fst<A>*> > fst_tuples_; - mutable ReplaceFst<A> *replace_; // Underlying ReplaceFst - ReplaceFstOptions<A> replace_options_; // Options for creating 'replace_' - - void operator=(const RationalFstImpl<A> &impl); // disallow -}; - -// Parent class for the delayed rational operations - delayed union, -// concatenation, and closure. -// -// This class attaches interface to implementation and handles -// reference counting, delegating most methods to ImplToFst. -template <class A> -class RationalFst : public ImplToFst< RationalFstImpl<A> > { - public: - friend class StateIterator< RationalFst<A> >; - friend class ArcIterator< RationalFst<A> >; - friend void Union<>(RationalFst<A> *fst1, const Fst<A> &fst2); - friend void Concat<>(RationalFst<A> *fst1, const Fst<A> &fst2); - friend void Concat<>(const Fst<A> &fst1, RationalFst<A> *fst2); - friend void Closure<>(RationalFst<A> *fst, ClosureType closure_type); - - typedef A Arc; - typedef typename A::StateId StateId; - typedef RationalFstImpl<A> Impl; - - virtual void InitStateIterator(StateIteratorData<A> *data) const { - GetImpl()->Replace()->InitStateIterator(data); - } - - virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { - GetImpl()->Replace()->InitArcIterator(s, data); - } - - protected: - RationalFst() - : ImplToFst<Impl>(new Impl(RationalFstOptions())) {} - - explicit RationalFst(const RationalFstOptions &opts) - : ImplToFst<Impl>(new Impl(opts)) {} - - // See Fst<>::Copy() for doc. - RationalFst(const RationalFst<A> &fst , bool safe = false) - : ImplToFst<Impl>(fst, safe) {} - - private: - // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } - - void operator=(const RationalFst<A> &fst); // disallow -}; - - -// Specialization for RationalFst. -template <class A> -class StateIterator< RationalFst<A> > - : public StateIterator< ReplaceFst<A> > { - public: - explicit StateIterator(const RationalFst<A> &fst) - : StateIterator< ReplaceFst<A> >(*(fst.GetImpl()->Replace())) {} -}; - - -// Specialization for RationalFst. -template <class A> -class ArcIterator< RationalFst<A> > - : public CacheArcIterator< ReplaceFst<A> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const RationalFst<A> &fst, StateId s) - : ArcIterator< ReplaceFst<A> >(*(fst.GetImpl()->Replace()), s) {} -}; - -} // namespace fst - -#endif // FST_LIB_RATIONAL_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/register.h b/kaldi_io/src/tools/openfst/include/fst/register.h deleted file mode 100644 index ea3f4d8..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/register.h +++ /dev/null @@ -1,133 +0,0 @@ -// register.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley), [email protected] (Jake Ratkiewicz) -// -// \file -// Classes for registering derived Fsts for generic reading -// - -#ifndef FST_LIB_REGISTER_H__ -#define FST_LIB_REGISTER_H__ - -#include <string> - - -#include <fst/compat.h> -#include <iostream> -#include <fstream> -#include <sstream> -#include <fst/util.h> -#include <fst/generic-register.h> - - -#include <fst/types.h> - -namespace fst { - -template <class A> class Fst; -struct FstReadOptions; - -// This class represents a single entry in a FstRegister -template<class A> -struct FstRegisterEntry { - typedef Fst<A> *(*Reader)(istream &strm, const FstReadOptions &opts); - typedef Fst<A> *(*Converter)(const Fst<A> &fst); - - Reader reader; - Converter converter; - FstRegisterEntry() : reader(0), converter(0) {} - FstRegisterEntry(Reader r, Converter c) : reader(r), converter(c) { } -}; - -// This class maintains the correspondence between a string describing -// an FST type, and its reader and converter. -template<class A> -class FstRegister : public GenericRegister<string, FstRegisterEntry<A>, - FstRegister<A> > { - public: - typedef typename FstRegisterEntry<A>::Reader Reader; - typedef typename FstRegisterEntry<A>::Converter Converter; - - const Reader GetReader(const string &type) const { - return this->GetEntry(type).reader; - } - - const Converter GetConverter(const string &type) const { - return this->GetEntry(type).converter; - } - - protected: - virtual string ConvertKeyToSoFilename(const string& key) const { - string legal_type(key); - - ConvertToLegalCSymbol(&legal_type); - - return legal_type + "-fst.so"; - } -}; - - -// This class registers an Fst type for generic reading and creating. -// The Fst type must have a default constructor and a copy constructor -// from 'Fst<Arc>' for this to work. -template <class F> -class FstRegisterer - : public GenericRegisterer<FstRegister<typename F::Arc> > { - public: - typedef typename F::Arc Arc; - typedef typename FstRegister<Arc>::Entry Entry; - typedef typename FstRegister<Arc>::Reader Reader; - - FstRegisterer() : - GenericRegisterer<FstRegister<typename F::Arc> >( - F().Type(), BuildEntry()) { } - - private: - Entry BuildEntry() { - F *(*reader)(istream &strm, - const FstReadOptions &opts) = &F::Read; - - return Entry(reinterpret_cast<Reader>(reader), - &FstRegisterer<F>::Convert); - } - - static Fst<Arc> *Convert(const Fst<Arc> &fst) { return new F(fst); } -}; - - -// Convenience macro to generate static FstRegisterer instance. -#define REGISTER_FST(F, A) \ -static fst::FstRegisterer< F<A> > F ## _ ## A ## _registerer - - -// Converts an fst to type 'type'. -template <class A> -Fst<A> *Convert(const Fst<A> &fst, const string &ftype) { - FstRegister<A> *registr = FstRegister<A>::GetRegister(); - const typename FstRegister<A>::Converter - converter = registr->GetConverter(ftype); - if (!converter) { - string atype = A::Type(); - LOG(ERROR) << "Fst::Convert: Unknown FST type \"" << ftype - << "\" (arc type = \"" << atype << "\")"; - return 0; - } - return converter(fst); -} - -} // namespace fst - -#endif // FST_LIB_REGISTER_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/relabel.h b/kaldi_io/src/tools/openfst/include/fst/relabel.h deleted file mode 100644 index dc675b6..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/relabel.h +++ /dev/null @@ -1,528 +0,0 @@ -// relabel.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Johan Schalkwyk) -// -// \file -// Functions and classes to relabel an Fst (either on input or output) -// -#ifndef FST_LIB_RELABEL_H__ -#define FST_LIB_RELABEL_H__ - -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <string> -#include <utility> -using std::pair; using std::make_pair; -#include <vector> -using std::vector; - -#include <fst/cache.h> -#include <fst/test-properties.h> - - -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; - -namespace fst { - -// -// Relabels either the input labels or output labels. The old to -// new labels are specified using a vector of pair<Label,Label>. -// Any label associations not specified are assumed to be identity -// mapping. -// -// \param fst input fst, must be mutable -// \param ipairs vector of input label pairs indicating old to new mapping -// \param opairs vector of output label pairs indicating old to new mapping -// -template <class A> -void Relabel( - MutableFst<A> *fst, - const vector<pair<typename A::Label, typename A::Label> >& ipairs, - const vector<pair<typename A::Label, typename A::Label> >& opairs) { - typedef typename A::StateId StateId; - typedef typename A::Label Label; - - uint64 props = fst->Properties(kFstProperties, false); - - // construct label to label hash. - unordered_map<Label, Label> input_map; - for (size_t i = 0; i < ipairs.size(); ++i) { - input_map[ipairs[i].first] = ipairs[i].second; - } - - unordered_map<Label, Label> output_map; - for (size_t i = 0; i < opairs.size(); ++i) { - output_map[opairs[i].first] = opairs[i].second; - } - - for (StateIterator<MutableFst<A> > siter(*fst); - !siter.Done(); siter.Next()) { - StateId s = siter.Value(); - for (MutableArcIterator<MutableFst<A> > aiter(fst, s); - !aiter.Done(); aiter.Next()) { - A arc = aiter.Value(); - - // relabel input - // only relabel if relabel pair defined - typename unordered_map<Label, Label>::iterator it = - input_map.find(arc.ilabel); - if (it != input_map.end()) { - if (it->second == kNoLabel) { - FSTERROR() << "Input symbol id " << arc.ilabel - << " missing from target vocabulary"; - fst->SetProperties(kError, kError); - return; - } - arc.ilabel = it->second; - } - - // relabel output - it = output_map.find(arc.olabel); - if (it != output_map.end()) { - if (it->second == kNoLabel) { - FSTERROR() << "Output symbol id " << arc.olabel - << " missing from target vocabulary"; - fst->SetProperties(kError, kError); - return; - } - arc.olabel = it->second; - } - - aiter.SetValue(arc); - } - } - - fst->SetProperties(RelabelProperties(props), kFstProperties); -} - -// -// Relabels either the input labels or output labels. The old to -// new labels mappings are specified using an input Symbol set. -// Any label associations not specified are assumed to be identity -// mapping. -// -// \param fst input fst, must be mutable -// \param new_isymbols symbol set indicating new mapping of input symbols -// \param new_osymbols symbol set indicating new mapping of output symbols -// -template<class A> -void Relabel(MutableFst<A> *fst, - const SymbolTable* new_isymbols, - const SymbolTable* new_osymbols) { - Relabel(fst, - fst->InputSymbols(), new_isymbols, true, - fst->OutputSymbols(), new_osymbols, true); -} - -template<class A> -void Relabel(MutableFst<A> *fst, - const SymbolTable* old_isymbols, - const SymbolTable* new_isymbols, - bool attach_new_isymbols, - const SymbolTable* old_osymbols, - const SymbolTable* new_osymbols, - bool attach_new_osymbols) { - typedef typename A::StateId StateId; - typedef typename A::Label Label; - - vector<pair<Label, Label> > ipairs; - if (old_isymbols && new_isymbols) { - for (SymbolTableIterator syms_iter(*old_isymbols); !syms_iter.Done(); - syms_iter.Next()) { - string isymbol = syms_iter.Symbol(); - int isymbol_val = syms_iter.Value(); - int new_isymbol_val = new_isymbols->Find(isymbol); - ipairs.push_back(make_pair(isymbol_val, new_isymbol_val)); - } - if (attach_new_isymbols) - fst->SetInputSymbols(new_isymbols); - } - - vector<pair<Label, Label> > opairs; - if (old_osymbols && new_osymbols) { - for (SymbolTableIterator syms_iter(*old_osymbols); !syms_iter.Done(); - syms_iter.Next()) { - string osymbol = syms_iter.Symbol(); - int osymbol_val = syms_iter.Value(); - int new_osymbol_val = new_osymbols->Find(osymbol); - opairs.push_back(make_pair(osymbol_val, new_osymbol_val)); - } - if (attach_new_osymbols) - fst->SetOutputSymbols(new_osymbols); - } - - // call relabel using vector of relabel pairs. - Relabel(fst, ipairs, opairs); -} - - -typedef CacheOptions RelabelFstOptions; - -template <class A> class RelabelFst; - -// -// \class RelabelFstImpl -// \brief Implementation for delayed relabeling -// -// Relabels an FST from one symbol set to another. Relabeling -// can either be on input or output space. RelabelFst implements -// a delayed version of the relabel. Arcs are relabeled on the fly -// and not cached. I.e each request is recomputed. -// -template<class A> -class RelabelFstImpl : public CacheImpl<A> { - friend class StateIterator< RelabelFst<A> >; - public: - using FstImpl<A>::SetType; - using FstImpl<A>::SetProperties; - using FstImpl<A>::WriteHeader; - using FstImpl<A>::SetInputSymbols; - using FstImpl<A>::SetOutputSymbols; - - using CacheImpl<A>::PushArc; - using CacheImpl<A>::HasArcs; - using CacheImpl<A>::HasFinal; - using CacheImpl<A>::HasStart; - using CacheImpl<A>::SetArcs; - using CacheImpl<A>::SetFinal; - using CacheImpl<A>::SetStart; - - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef CacheState<A> State; - - RelabelFstImpl(const Fst<A>& fst, - const vector<pair<Label, Label> >& ipairs, - const vector<pair<Label, Label> >& opairs, - const RelabelFstOptions &opts) - : CacheImpl<A>(opts), fst_(fst.Copy()), - relabel_input_(false), relabel_output_(false) { - uint64 props = fst.Properties(kCopyProperties, false); - SetProperties(RelabelProperties(props)); - SetType("relabel"); - - // create input label map - if (ipairs.size() > 0) { - for (size_t i = 0; i < ipairs.size(); ++i) { - input_map_[ipairs[i].first] = ipairs[i].second; - } - relabel_input_ = true; - } - - // create output label map - if (opairs.size() > 0) { - for (size_t i = 0; i < opairs.size(); ++i) { - output_map_[opairs[i].first] = opairs[i].second; - } - relabel_output_ = true; - } - } - - RelabelFstImpl(const Fst<A>& fst, - const SymbolTable* old_isymbols, - const SymbolTable* new_isymbols, - const SymbolTable* old_osymbols, - const SymbolTable* new_osymbols, - const RelabelFstOptions &opts) - : CacheImpl<A>(opts), fst_(fst.Copy()), - relabel_input_(false), relabel_output_(false) { - SetType("relabel"); - - uint64 props = fst.Properties(kCopyProperties, false); - SetProperties(RelabelProperties(props)); - SetInputSymbols(old_isymbols); - SetOutputSymbols(old_osymbols); - - if (old_isymbols && new_isymbols && - old_isymbols->LabeledCheckSum() != new_isymbols->LabeledCheckSum()) { - for (SymbolTableIterator syms_iter(*old_isymbols); !syms_iter.Done(); - syms_iter.Next()) { - input_map_[syms_iter.Value()] = new_isymbols->Find(syms_iter.Symbol()); - } - SetInputSymbols(new_isymbols); - relabel_input_ = true; - } - - if (old_osymbols && new_osymbols && - old_osymbols->LabeledCheckSum() != new_osymbols->LabeledCheckSum()) { - for (SymbolTableIterator syms_iter(*old_osymbols); !syms_iter.Done(); - syms_iter.Next()) { - output_map_[syms_iter.Value()] = - new_osymbols->Find(syms_iter.Symbol()); - } - SetOutputSymbols(new_osymbols); - relabel_output_ = true; - } - } - - RelabelFstImpl(const RelabelFstImpl<A>& impl) - : CacheImpl<A>(impl), - fst_(impl.fst_->Copy(true)), - input_map_(impl.input_map_), - output_map_(impl.output_map_), - relabel_input_(impl.relabel_input_), - relabel_output_(impl.relabel_output_) { - SetType("relabel"); - SetProperties(impl.Properties(), kCopyProperties); - SetInputSymbols(impl.InputSymbols()); - SetOutputSymbols(impl.OutputSymbols()); - } - - ~RelabelFstImpl() { delete fst_; } - - StateId Start() { - if (!HasStart()) { - StateId s = fst_->Start(); - SetStart(s); - } - return CacheImpl<A>::Start(); - } - - Weight Final(StateId s) { - if (!HasFinal(s)) { - SetFinal(s, fst_->Final(s)); - } - return CacheImpl<A>::Final(s); - } - - size_t NumArcs(StateId s) { - if (!HasArcs(s)) { - Expand(s); - } - return CacheImpl<A>::NumArcs(s); - } - - size_t NumInputEpsilons(StateId s) { - if (!HasArcs(s)) { - Expand(s); - } - return CacheImpl<A>::NumInputEpsilons(s); - } - - size_t NumOutputEpsilons(StateId s) { - if (!HasArcs(s)) { - Expand(s); - } - return CacheImpl<A>::NumOutputEpsilons(s); - } - - uint64 Properties() const { return Properties(kFstProperties); } - - // Set error if found; return FST impl properties. - uint64 Properties(uint64 mask) const { - if ((mask & kError) && fst_->Properties(kError, false)) - SetProperties(kError, kError); - return FstImpl<Arc>::Properties(mask); - } - - void InitArcIterator(StateId s, ArcIteratorData<A>* data) { - if (!HasArcs(s)) { - Expand(s); - } - CacheImpl<A>::InitArcIterator(s, data); - } - - void Expand(StateId s) { - for (ArcIterator<Fst<A> > aiter(*fst_, s); !aiter.Done(); aiter.Next()) { - A arc = aiter.Value(); - - // relabel input - if (relabel_input_) { - typename unordered_map<Label, Label>::iterator it = - input_map_.find(arc.ilabel); - if (it != input_map_.end()) { arc.ilabel = it->second; } - } - - // relabel output - if (relabel_output_) { - typename unordered_map<Label, Label>::iterator it = - output_map_.find(arc.olabel); - if (it != output_map_.end()) { arc.olabel = it->second; } - } - - PushArc(s, arc); - } - SetArcs(s); - } - - - private: - const Fst<A> *fst_; - - unordered_map<Label, Label> input_map_; - unordered_map<Label, Label> output_map_; - bool relabel_input_; - bool relabel_output_; - - void operator=(const RelabelFstImpl<A> &); // disallow -}; - - -// -// \class RelabelFst -// \brief Delayed implementation of arc relabeling -// -// This class attaches interface to implementation and handles -// reference counting, delegating most methods to ImplToFst. -template <class A> -class RelabelFst : public ImplToFst< RelabelFstImpl<A> > { - public: - friend class ArcIterator< RelabelFst<A> >; - friend class StateIterator< RelabelFst<A> >; - - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef CacheState<A> State; - typedef RelabelFstImpl<A> Impl; - - RelabelFst(const Fst<A>& fst, - const vector<pair<Label, Label> >& ipairs, - const vector<pair<Label, Label> >& opairs) - : ImplToFst<Impl>(new Impl(fst, ipairs, opairs, RelabelFstOptions())) {} - - RelabelFst(const Fst<A>& fst, - const vector<pair<Label, Label> >& ipairs, - const vector<pair<Label, Label> >& opairs, - const RelabelFstOptions &opts) - : ImplToFst<Impl>(new Impl(fst, ipairs, opairs, opts)) {} - - RelabelFst(const Fst<A>& fst, - const SymbolTable* new_isymbols, - const SymbolTable* new_osymbols) - : ImplToFst<Impl>(new Impl(fst, fst.InputSymbols(), new_isymbols, - fst.OutputSymbols(), new_osymbols, - RelabelFstOptions())) {} - - RelabelFst(const Fst<A>& fst, - const SymbolTable* new_isymbols, - const SymbolTable* new_osymbols, - const RelabelFstOptions &opts) - : ImplToFst<Impl>(new Impl(fst, fst.InputSymbols(), new_isymbols, - fst.OutputSymbols(), new_osymbols, opts)) {} - - RelabelFst(const Fst<A>& fst, - const SymbolTable* old_isymbols, - const SymbolTable* new_isymbols, - const SymbolTable* old_osymbols, - const SymbolTable* new_osymbols) - : ImplToFst<Impl>(new Impl(fst, old_isymbols, new_isymbols, old_osymbols, - new_osymbols, RelabelFstOptions())) {} - - RelabelFst(const Fst<A>& fst, - const SymbolTable* old_isymbols, - const SymbolTable* new_isymbols, - const SymbolTable* old_osymbols, - const SymbolTable* new_osymbols, - const RelabelFstOptions &opts) - : ImplToFst<Impl>(new Impl(fst, old_isymbols, new_isymbols, old_osymbols, - new_osymbols, opts)) {} - - // See Fst<>::Copy() for doc. - RelabelFst(const RelabelFst<A> &fst, bool safe = false) - : ImplToFst<Impl>(fst, safe) {} - - // Get a copy of this RelabelFst. See Fst<>::Copy() for further doc. - virtual RelabelFst<A> *Copy(bool safe = false) const { - return new RelabelFst<A>(*this, safe); - } - - virtual void InitStateIterator(StateIteratorData<A> *data) const; - - virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { - return GetImpl()->InitArcIterator(s, data); - } - - private: - // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } - - void operator=(const RelabelFst<A> &fst); // disallow -}; - -// Specialization for RelabelFst. -template<class A> -class StateIterator< RelabelFst<A> > : public StateIteratorBase<A> { - public: - typedef typename A::StateId StateId; - - explicit StateIterator(const RelabelFst<A> &fst) - : impl_(fst.GetImpl()), siter_(*impl_->fst_), s_(0) {} - - bool Done() const { return siter_.Done(); } - - StateId Value() const { return s_; } - - void Next() { - if (!siter_.Done()) { - ++s_; - siter_.Next(); - } - } - - void Reset() { - s_ = 0; - siter_.Reset(); - } - - private: - bool Done_() const { return Done(); } - StateId Value_() const { return Value(); } - void Next_() { Next(); } - void Reset_() { Reset(); } - - const RelabelFstImpl<A> *impl_; - StateIterator< Fst<A> > siter_; - StateId s_; - - DISALLOW_COPY_AND_ASSIGN(StateIterator); -}; - - -// Specialization for RelabelFst. -template <class A> -class ArcIterator< RelabelFst<A> > - : public CacheArcIterator< RelabelFst<A> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const RelabelFst<A> &fst, StateId s) - : CacheArcIterator< RelabelFst<A> >(fst.GetImpl(), s) { - if (!fst.GetImpl()->HasArcs(s)) - fst.GetImpl()->Expand(s); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - -template <class A> inline -void RelabelFst<A>::InitStateIterator(StateIteratorData<A> *data) const { - data->base = new StateIterator< RelabelFst<A> >(*this); -} - -// Useful alias when using StdArc. -typedef RelabelFst<StdArc> StdRelabelFst; - -} // namespace fst - -#endif // FST_LIB_RELABEL_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/replace-util.h b/kaldi_io/src/tools/openfst/include/fst/replace-util.h deleted file mode 100644 index d58cb15..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/replace-util.h +++ /dev/null @@ -1,550 +0,0 @@ -// replace-util.h - - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// - -// \file -// Utility classes for the recursive replacement of Fsts (RTNs). - -#ifndef FST_LIB_REPLACE_UTIL_H__ -#define FST_LIB_REPLACE_UTIL_H__ - -#include <vector> -using std::vector; -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <tr1/unordered_set> -using std::tr1::unordered_set; -using std::tr1::unordered_multiset; -#include <map> - -#include <fst/connect.h> -#include <fst/mutable-fst.h> -#include <fst/topsort.h> - - -namespace fst { - -template <class Arc> -void Replace(const vector<pair<typename Arc::Label, const Fst<Arc>* > >&, - MutableFst<Arc> *, typename Arc::Label, bool); - - -// Utility class for the recursive replacement of Fsts (RTNs). The -// user provides a set of Label, Fst pairs at construction. These are -// used by methods for testing cyclic dependencies and connectedness -// and doing RTN connection and specific Fst replacement by label or -// for various optimization properties. The modified results can be -// obtained with the GetFstPairs() or GetMutableFstPairs() methods. -template <class Arc> -class ReplaceUtil { - public: - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - typedef typename Arc::StateId StateId; - - typedef pair<Label, const Fst<Arc>*> FstPair; - typedef pair<Label, MutableFst<Arc>*> MutableFstPair; - typedef unordered_map<Label, Label> NonTerminalHash; - - // Constructs from mutable Fsts; Fst ownership given to ReplaceUtil. - ReplaceUtil(const vector<MutableFstPair> &fst_pairs, - Label root_label, bool epsilon_on_replace = false); - - // Constructs from Fsts; Fst ownership retained by caller. - ReplaceUtil(const vector<FstPair> &fst_pairs, - Label root_label, bool epsilon_on_replace = false); - - // Constructs from ReplaceFst internals; ownership retained by caller. - ReplaceUtil(const vector<const Fst<Arc> *> &fst_array, - const NonTerminalHash &nonterminal_hash, Label root_fst, - bool epsilon_on_replace = false); - - ~ReplaceUtil() { - for (Label i = 0; i < fst_array_.size(); ++i) - delete fst_array_[i]; - } - - // True if the non-terminal dependencies are cyclic. Cyclic - // dependencies will result in an unexpandable replace fst. - bool CyclicDependencies() const { - GetDependencies(false); - return depprops_ & kCyclic; - } - - // Returns true if no useless Fsts, states or transitions. - bool Connected() const { - GetDependencies(false); - uint64 props = kAccessible | kCoAccessible; - for (Label i = 0; i < fst_array_.size(); ++i) { - if (!fst_array_[i]) - continue; - if (fst_array_[i]->Properties(props, true) != props || !depaccess_[i]) - return false; - } - return true; - } - - // Removes useless Fsts, states and transitions. - void Connect(); - - // Replaces Fsts specified by labels. - // Does nothing if there are cyclic dependencies. - void ReplaceLabels(const vector<Label> &labels); - - // Replaces Fsts that have at most 'nstates' states, 'narcs' arcs and - // 'nnonterm' non-terminals (updating in reverse dependency order). - // Does nothing if there are cyclic dependencies. - void ReplaceBySize(size_t nstates, size_t narcs, size_t nnonterms); - - // Replaces singleton Fsts. - // Does nothing if there are cyclic dependencies. - void ReplaceTrivial() { ReplaceBySize(2, 1, 1); } - - // Replaces non-terminals that have at most 'ninstances' instances - // (updating in dependency order). - // Does nothing if there are cyclic dependencies. - void ReplaceByInstances(size_t ninstances); - - // Replaces non-terminals that have only one instance. - // Does nothing if there are cyclic dependencies. - void ReplaceUnique() { ReplaceByInstances(1); } - - // Returns Label, Fst pairs; Fst ownership retained by ReplaceUtil. - void GetFstPairs(vector<FstPair> *fst_pairs); - - // Returns Label, MutableFst pairs; Fst ownership given to caller. - void GetMutableFstPairs(vector<MutableFstPair> *mutable_fst_pairs); - - private: - // Per Fst statistics - struct ReplaceStats { - StateId nstates; // # of states - StateId nfinal; // # of final states - size_t narcs; // # of arcs - Label nnonterms; // # of non-terminals in Fst - size_t nref; // # of non-terminal instances referring to this Fst - - // # of times that ith Fst references this Fst - map<Label, size_t> inref; - // # of times that this Fst references the ith Fst - map<Label, size_t> outref; - - ReplaceStats() - : nstates(0), - nfinal(0), - narcs(0), - nnonterms(0), - nref(0) {} - }; - - // Check Mutable Fsts exist o.w. create them. - void CheckMutableFsts(); - - // Computes the dependency graph of the replace Fsts. - // If 'stats' is true, dependency statistics computed as well. - void GetDependencies(bool stats) const; - - void ClearDependencies() const { - depfst_.DeleteStates(); - stats_.clear(); - depprops_ = 0; - have_stats_ = false; - } - - // Get topological order of dependencies. Returns false with cyclic input. - bool GetTopOrder(const Fst<Arc> &fst, vector<Label> *toporder) const; - - // Update statistics assuming that jth Fst will be replaced. - void UpdateStats(Label j); - - Label root_label_; // root non-terminal - Label root_fst_; // root Fst ID - bool epsilon_on_replace_; // see Replace() - vector<const Fst<Arc> *> fst_array_; // Fst per ID - vector<MutableFst<Arc> *> mutable_fst_array_; // MutableFst per ID - vector<Label> nonterminal_array_; // Fst ID to non-terminal - NonTerminalHash nonterminal_hash_; // non-terminal to Fst ID - mutable VectorFst<Arc> depfst_; // Fst ID dependencies - mutable vector<bool> depaccess_; // Fst ID accessibility - mutable uint64 depprops_; // dependency Fst props - mutable bool have_stats_; // have dependency statistics - mutable vector<ReplaceStats> stats_; // Per Fst statistics - DISALLOW_COPY_AND_ASSIGN(ReplaceUtil); -}; - -template <class Arc> -ReplaceUtil<Arc>::ReplaceUtil( - const vector<MutableFstPair> &fst_pairs, - Label root_label, bool epsilon_on_replace) - : root_label_(root_label), - epsilon_on_replace_(epsilon_on_replace), - depprops_(0), - have_stats_(false) { - fst_array_.push_back(0); - mutable_fst_array_.push_back(0); - nonterminal_array_.push_back(kNoLabel); - for (Label i = 0; i < fst_pairs.size(); ++i) { - Label label = fst_pairs[i].first; - MutableFst<Arc> *fst = fst_pairs[i].second; - nonterminal_hash_[label] = fst_array_.size(); - nonterminal_array_.push_back(label); - fst_array_.push_back(fst); - mutable_fst_array_.push_back(fst); - } - root_fst_ = nonterminal_hash_[root_label_]; - if (!root_fst_) - FSTERROR() << "ReplaceUtil: no root FST for label: " << root_label_; -} - -template <class Arc> -ReplaceUtil<Arc>::ReplaceUtil( - const vector<FstPair> &fst_pairs, - Label root_label, bool epsilon_on_replace) - : root_label_(root_label), - epsilon_on_replace_(epsilon_on_replace), - depprops_(0), - have_stats_(false) { - fst_array_.push_back(0); - nonterminal_array_.push_back(kNoLabel); - for (Label i = 0; i < fst_pairs.size(); ++i) { - Label label = fst_pairs[i].first; - const Fst<Arc> *fst = fst_pairs[i].second; - nonterminal_hash_[label] = fst_array_.size(); - nonterminal_array_.push_back(label); - fst_array_.push_back(fst->Copy()); - } - root_fst_ = nonterminal_hash_[root_label]; - if (!root_fst_) - FSTERROR() << "ReplaceUtil: no root FST for label: " << root_label_; -} - -template <class Arc> -ReplaceUtil<Arc>::ReplaceUtil( - const vector<const Fst<Arc> *> &fst_array, - const NonTerminalHash &nonterminal_hash, Label root_fst, - bool epsilon_on_replace) - : root_fst_(root_fst), - epsilon_on_replace_(epsilon_on_replace), - nonterminal_array_(fst_array.size()), - nonterminal_hash_(nonterminal_hash), - depprops_(0), - have_stats_(false) { - fst_array_.push_back(0); - for (Label i = 1; i < fst_array.size(); ++i) - fst_array_.push_back(fst_array[i]->Copy()); - for (typename NonTerminalHash::const_iterator it = - nonterminal_hash.begin(); it != nonterminal_hash.end(); ++it) - nonterminal_array_[it->second] = it->first; - root_label_ = nonterminal_array_[root_fst_]; -} - -template <class Arc> -void ReplaceUtil<Arc>::GetDependencies(bool stats) const { - if (depfst_.NumStates() > 0) { - if (stats && !have_stats_) - ClearDependencies(); - else - return; - } - - have_stats_ = stats; - if (have_stats_) - stats_.reserve(fst_array_.size()); - - for (Label i = 0; i < fst_array_.size(); ++i) { - depfst_.AddState(); - depfst_.SetFinal(i, Weight::One()); - if (have_stats_) - stats_.push_back(ReplaceStats()); - } - depfst_.SetStart(root_fst_); - - // An arc from each state (representing the fst) to the - // state representing the fst being replaced - for (Label i = 0; i < fst_array_.size(); ++i) { - const Fst<Arc> *ifst = fst_array_[i]; - if (!ifst) - continue; - for (StateIterator<Fst<Arc> > siter(*ifst); !siter.Done(); siter.Next()) { - StateId s = siter.Value(); - if (have_stats_) { - ++stats_[i].nstates; - if (ifst->Final(s) != Weight::Zero()) - ++stats_[i].nfinal; - } - for (ArcIterator<Fst<Arc> > aiter(*ifst, s); - !aiter.Done(); aiter.Next()) { - if (have_stats_) - ++stats_[i].narcs; - const Arc& arc = aiter.Value(); - - typename NonTerminalHash::const_iterator it = - nonterminal_hash_.find(arc.olabel); - if (it != nonterminal_hash_.end()) { - Label j = it->second; - depfst_.AddArc(i, Arc(arc.olabel, arc.olabel, Weight::One(), j)); - if (have_stats_) { - ++stats_[i].nnonterms; - ++stats_[j].nref; - ++stats_[j].inref[i]; - ++stats_[i].outref[j]; - } - } - } - } - } - - // Gets accessibility info - SccVisitor<Arc> scc_visitor(0, &depaccess_, 0, &depprops_); - DfsVisit(depfst_, &scc_visitor); -} - -template <class Arc> -void ReplaceUtil<Arc>::UpdateStats(Label j) { - if (!have_stats_) { - FSTERROR() << "ReplaceUtil::UpdateStats: stats not available"; - return; - } - - if (j == root_fst_) // can't replace root - return; - - typedef typename map<Label, size_t>::iterator Iter; - for (Iter in = stats_[j].inref.begin(); - in != stats_[j].inref.end(); - ++in) { - Label i = in->first; - size_t ni = in->second; - stats_[i].nstates += stats_[j].nstates * ni; - stats_[i].narcs += (stats_[j].narcs + 1) * ni; // narcs - 1 + 2 (eps) - stats_[i].nnonterms += (stats_[j].nnonterms - 1) * ni; - stats_[i].outref.erase(stats_[i].outref.find(j)); - for (Iter out = stats_[j].outref.begin(); - out != stats_[j].outref.end(); - ++out) { - Label k = out->first; - size_t nk = out->second; - stats_[i].outref[k] += ni * nk; - } - } - - for (Iter out = stats_[j].outref.begin(); - out != stats_[j].outref.end(); - ++out) { - Label k = out->first; - size_t nk = out->second; - stats_[k].nref -= nk; - stats_[k].inref.erase(stats_[k].inref.find(j)); - for (Iter in = stats_[j].inref.begin(); - in != stats_[j].inref.end(); - ++in) { - Label i = in->first; - size_t ni = in->second; - stats_[k].inref[i] += ni * nk; - stats_[k].nref += ni * nk; - } - } -} - -template <class Arc> -void ReplaceUtil<Arc>::CheckMutableFsts() { - if (mutable_fst_array_.size() == 0) { - for (Label i = 0; i < fst_array_.size(); ++i) { - if (!fst_array_[i]) { - mutable_fst_array_.push_back(0); - } else { - mutable_fst_array_.push_back(new VectorFst<Arc>(*fst_array_[i])); - delete fst_array_[i]; - fst_array_[i] = mutable_fst_array_[i]; - } - } - } -} - -template <class Arc> -void ReplaceUtil<Arc>::Connect() { - CheckMutableFsts(); - uint64 props = kAccessible | kCoAccessible; - for (Label i = 0; i < mutable_fst_array_.size(); ++i) { - if (!mutable_fst_array_[i]) - continue; - if (mutable_fst_array_[i]->Properties(props, false) != props) - fst::Connect(mutable_fst_array_[i]); - } - GetDependencies(false); - for (Label i = 0; i < mutable_fst_array_.size(); ++i) { - MutableFst<Arc> *fst = mutable_fst_array_[i]; - if (fst && !depaccess_[i]) { - delete fst; - fst_array_[i] = 0; - mutable_fst_array_[i] = 0; - } - } - ClearDependencies(); -} - -template <class Arc> -bool ReplaceUtil<Arc>::GetTopOrder(const Fst<Arc> &fst, - vector<Label> *toporder) const { - // Finds topological order of dependencies. - vector<StateId> order; - bool acyclic = false; - - TopOrderVisitor<Arc> top_order_visitor(&order, &acyclic); - DfsVisit(fst, &top_order_visitor); - if (!acyclic) { - LOG(WARNING) << "ReplaceUtil::GetTopOrder: Cyclical label dependencies"; - return false; - } - - toporder->resize(order.size()); - for (Label i = 0; i < order.size(); ++i) - (*toporder)[order[i]] = i; - - return true; -} - -template <class Arc> -void ReplaceUtil<Arc>::ReplaceLabels(const vector<Label> &labels) { - CheckMutableFsts(); - unordered_set<Label> label_set; - for (Label i = 0; i < labels.size(); ++i) - if (labels[i] != root_label_) // can't replace root - label_set.insert(labels[i]); - - // Finds Fst dependencies restricted to the labels requested. - GetDependencies(false); - VectorFst<Arc> pfst(depfst_); - for (StateId i = 0; i < pfst.NumStates(); ++i) { - vector<Arc> arcs; - for (ArcIterator< VectorFst<Arc> > aiter(pfst, i); - !aiter.Done(); aiter.Next()) { - const Arc &arc = aiter.Value(); - Label label = nonterminal_array_[arc.nextstate]; - if (label_set.count(label) > 0) - arcs.push_back(arc); - } - pfst.DeleteArcs(i); - for (size_t j = 0; j < arcs.size(); ++j) - pfst.AddArc(i, arcs[j]); - } - - vector<Label> toporder; - if (!GetTopOrder(pfst, &toporder)) { - ClearDependencies(); - return; - } - - // Visits Fsts in reverse topological order of dependencies and - // performs replacements. - for (Label o = toporder.size() - 1; o >= 0; --o) { - vector<FstPair> fst_pairs; - StateId s = toporder[o]; - for (ArcIterator< VectorFst<Arc> > aiter(pfst, s); - !aiter.Done(); aiter.Next()) { - const Arc &arc = aiter.Value(); - Label label = nonterminal_array_[arc.nextstate]; - const Fst<Arc> *fst = fst_array_[arc.nextstate]; - fst_pairs.push_back(make_pair(label, fst)); - } - if (fst_pairs.empty()) - continue; - Label label = nonterminal_array_[s]; - const Fst<Arc> *fst = fst_array_[s]; - fst_pairs.push_back(make_pair(label, fst)); - - Replace(fst_pairs, mutable_fst_array_[s], label, epsilon_on_replace_); - } - ClearDependencies(); -} - -template <class Arc> -void ReplaceUtil<Arc>::ReplaceBySize(size_t nstates, size_t narcs, - size_t nnonterms) { - vector<Label> labels; - GetDependencies(true); - - vector<Label> toporder; - if (!GetTopOrder(depfst_, &toporder)) { - ClearDependencies(); - return; - } - - for (Label o = toporder.size() - 1; o >= 0; --o) { - Label j = toporder[o]; - if (stats_[j].nstates <= nstates && - stats_[j].narcs <= narcs && - stats_[j].nnonterms <= nnonterms) { - labels.push_back(nonterminal_array_[j]); - UpdateStats(j); - } - } - ReplaceLabels(labels); -} - -template <class Arc> -void ReplaceUtil<Arc>::ReplaceByInstances(size_t ninstances) { - vector<Label> labels; - GetDependencies(true); - - vector<Label> toporder; - if (!GetTopOrder(depfst_, &toporder)) { - ClearDependencies(); - return; - } - for (Label o = 0; o < toporder.size(); ++o) { - Label j = toporder[o]; - if (stats_[j].nref <= ninstances) { - labels.push_back(nonterminal_array_[j]); - UpdateStats(j); - } - } - ReplaceLabels(labels); -} - -template <class Arc> -void ReplaceUtil<Arc>::GetFstPairs(vector<FstPair> *fst_pairs) { - CheckMutableFsts(); - fst_pairs->clear(); - for (Label i = 0; i < fst_array_.size(); ++i) { - Label label = nonterminal_array_[i]; - const Fst<Arc> *fst = fst_array_[i]; - if (!fst) - continue; - fst_pairs->push_back(make_pair(label, fst)); - } -} - -template <class Arc> -void ReplaceUtil<Arc>::GetMutableFstPairs( - vector<MutableFstPair> *mutable_fst_pairs) { - CheckMutableFsts(); - mutable_fst_pairs->clear(); - for (Label i = 0; i < mutable_fst_array_.size(); ++i) { - Label label = nonterminal_array_[i]; - MutableFst<Arc> *fst = mutable_fst_array_[i]; - if (!fst) - continue; - mutable_fst_pairs->push_back(make_pair(label, fst->Copy())); - } -} - -} // namespace fst - -#endif // FST_LIB_REPLACE_UTIL_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/replace.h b/kaldi_io/src/tools/openfst/include/fst/replace.h deleted file mode 100644 index ef5f6cc..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/replace.h +++ /dev/null @@ -1,1453 +0,0 @@ -// replace.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Johan Schalkwyk) -// -// \file -// Functions and classes for the recursive replacement of Fsts. -// - -#ifndef FST_LIB_REPLACE_H__ -#define FST_LIB_REPLACE_H__ - -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <set> -#include <string> -#include <utility> -using std::pair; using std::make_pair; -#include <vector> -using std::vector; - -#include <fst/cache.h> -#include <fst/expanded-fst.h> -#include <fst/fst.h> -#include <fst/matcher.h> -#include <fst/replace-util.h> -#include <fst/state-table.h> -#include <fst/test-properties.h> - -namespace fst { - -// -// REPLACE STATE TUPLES AND TABLES -// -// The replace state table has the form -// -// template <class A, class P> -// class ReplaceStateTable { -// public: -// typedef A Arc; -// typedef P PrefixId; -// typedef typename A::StateId StateId; -// typedef ReplaceStateTuple<StateId, PrefixId> StateTuple; -// typedef typename A::Label Label; -// -// // Required constuctor -// ReplaceStateTable(const vector<pair<Label, const Fst<A>*> > &fst_tuples, -// Label root); -// -// // Required copy constructor that does not copy state -// ReplaceStateTable(const ReplaceStateTable<A,P> &table); -// -// // Lookup state ID by tuple. If it doesn't exist, then add it. -// StateId FindState(const StateTuple &tuple); -// -// // Lookup state tuple by ID. -// const StateTuple &Tuple(StateId id) const; -// }; - - -// \struct ReplaceStateTuple -// \brief Tuple of information that uniquely defines a state in replace -template <class S, class P> -struct ReplaceStateTuple { - typedef S StateId; - typedef P PrefixId; - - ReplaceStateTuple() - : prefix_id(-1), fst_id(kNoStateId), fst_state(kNoStateId) {} - - ReplaceStateTuple(PrefixId p, StateId f, StateId s) - : prefix_id(p), fst_id(f), fst_state(s) {} - - PrefixId prefix_id; // index in prefix table - StateId fst_id; // current fst being walked - StateId fst_state; // current state in fst being walked, not to be - // confused with the state_id of the combined fst -}; - - -// Equality of replace state tuples. -template <class S, class P> -inline bool operator==(const ReplaceStateTuple<S, P>& x, - const ReplaceStateTuple<S, P>& y) { - return x.prefix_id == y.prefix_id && - x.fst_id == y.fst_id && - x.fst_state == y.fst_state; -} - - -// \class ReplaceRootSelector -// Functor returning true for tuples corresponding to states in the root FST -template <class S, class P> -class ReplaceRootSelector { - public: - bool operator()(const ReplaceStateTuple<S, P> &tuple) const { - return tuple.prefix_id == 0; - } -}; - - -// \class ReplaceFingerprint -// Fingerprint for general replace state tuples. -template <class S, class P> -class ReplaceFingerprint { - public: - ReplaceFingerprint(const vector<uint64> *size_array) - : cumulative_size_array_(size_array) {} - - uint64 operator()(const ReplaceStateTuple<S, P> &tuple) const { - return tuple.prefix_id * (cumulative_size_array_->back()) + - cumulative_size_array_->at(tuple.fst_id - 1) + - tuple.fst_state; - } - - private: - const vector<uint64> *cumulative_size_array_; -}; - - -// \class ReplaceFstStateFingerprint -// Useful when the fst_state uniquely define the tuple. -template <class S, class P> -class ReplaceFstStateFingerprint { - public: - uint64 operator()(const ReplaceStateTuple<S, P>& tuple) const { - return tuple.fst_state; - } -}; - - -// \class ReplaceHash -// A generic hash function for replace state tuples. -template <typename S, typename P> -class ReplaceHash { - public: - size_t operator()(const ReplaceStateTuple<S, P>& t) const { - return t.prefix_id + t.fst_id * kPrime0 + t.fst_state * kPrime1; - } - private: - static const size_t kPrime0; - static const size_t kPrime1; -}; - -template <typename S, typename P> -const size_t ReplaceHash<S, P>::kPrime0 = 7853; - -template <typename S, typename P> -const size_t ReplaceHash<S, P>::kPrime1 = 7867; - -template <class A, class T> class ReplaceFstMatcher; - - -// \class VectorHashReplaceStateTable -// A two-level state table for replace. -// Warning: calls CountStates to compute the number of states of each -// component Fst. -template <class A, class P = ssize_t> -class VectorHashReplaceStateTable { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef P PrefixId; - typedef ReplaceStateTuple<StateId, P> StateTuple; - typedef VectorHashStateTable<ReplaceStateTuple<StateId, P>, - ReplaceRootSelector<StateId, P>, - ReplaceFstStateFingerprint<StateId, P>, - ReplaceFingerprint<StateId, P> > StateTable; - - VectorHashReplaceStateTable( - const vector<pair<Label, const Fst<A>*> > &fst_tuples, - Label root) : root_size_(0) { - cumulative_size_array_.push_back(0); - for (size_t i = 0; i < fst_tuples.size(); ++i) { - if (fst_tuples[i].first == root) { - root_size_ = CountStates(*(fst_tuples[i].second)); - cumulative_size_array_.push_back(cumulative_size_array_.back()); - } else { - cumulative_size_array_.push_back(cumulative_size_array_.back() + - CountStates(*(fst_tuples[i].second))); - } - } - state_table_ = new StateTable( - new ReplaceRootSelector<StateId, P>, - new ReplaceFstStateFingerprint<StateId, P>, - new ReplaceFingerprint<StateId, P>(&cumulative_size_array_), - root_size_, - root_size_ + cumulative_size_array_.back()); - } - - VectorHashReplaceStateTable(const VectorHashReplaceStateTable<A, P> &table) - : root_size_(table.root_size_), - cumulative_size_array_(table.cumulative_size_array_) { - state_table_ = new StateTable( - new ReplaceRootSelector<StateId, P>, - new ReplaceFstStateFingerprint<StateId, P>, - new ReplaceFingerprint<StateId, P>(&cumulative_size_array_), - root_size_, - root_size_ + cumulative_size_array_.back()); - } - - ~VectorHashReplaceStateTable() { - delete state_table_; - } - - StateId FindState(const StateTuple &tuple) { - return state_table_->FindState(tuple); - } - - const StateTuple &Tuple(StateId id) const { - return state_table_->Tuple(id); - } - - private: - StateId root_size_; - vector<uint64> cumulative_size_array_; - StateTable *state_table_; -}; - - -// \class DefaultReplaceStateTable -// Default replace state table -template <class A, class P = ssize_t> -class DefaultReplaceStateTable : public CompactHashStateTable< - ReplaceStateTuple<typename A::StateId, P>, - ReplaceHash<typename A::StateId, P> > { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef P PrefixId; - typedef ReplaceStateTuple<StateId, P> StateTuple; - typedef CompactHashStateTable<StateTuple, - ReplaceHash<StateId, PrefixId> > StateTable; - - using StateTable::FindState; - using StateTable::Tuple; - - DefaultReplaceStateTable( - const vector<pair<Label, const Fst<A>*> > &fst_tuples, - Label root) {} - - DefaultReplaceStateTable(const DefaultReplaceStateTable<A, P> &table) - : StateTable() {} -}; - -// -// REPLACE FST CLASS -// - -// By default ReplaceFst will copy the input label of the 'replace arc'. -// For acceptors we do not want this behaviour. Instead we need to -// create an epsilon arc when recursing into the appropriate Fst. -// The 'epsilon_on_replace' option can be used to toggle this behaviour. -template <class A, class T = DefaultReplaceStateTable<A> > -struct ReplaceFstOptions : CacheOptions { - int64 root; // root rule for expansion - bool epsilon_on_replace; - bool take_ownership; // take ownership of input Fst(s) - T* state_table; - - ReplaceFstOptions(const CacheOptions &opts, int64 r) - : CacheOptions(opts), - root(r), - epsilon_on_replace(false), - take_ownership(false), - state_table(0) {} - explicit ReplaceFstOptions(int64 r) - : root(r), - epsilon_on_replace(false), - take_ownership(false), - state_table(0) {} - ReplaceFstOptions(int64 r, bool epsilon_replace_arc) - : root(r), - epsilon_on_replace(epsilon_replace_arc), - take_ownership(false), - state_table(0) {} - ReplaceFstOptions() - : root(kNoLabel), - epsilon_on_replace(false), - take_ownership(false), - state_table(0) {} -}; - - -// \class ReplaceFstImpl -// \brief Implementation class for replace class Fst -// -// The replace implementation class supports a dynamic -// expansion of a recursive transition network represented as Fst -// with dynamic replacable arcs. -// -template <class A, class T> -class ReplaceFstImpl : public CacheImpl<A> { - friend class ReplaceFstMatcher<A, T>; - - public: - using FstImpl<A>::SetType; - using FstImpl<A>::SetProperties; - using FstImpl<A>::WriteHeader; - using FstImpl<A>::SetInputSymbols; - using FstImpl<A>::SetOutputSymbols; - using FstImpl<A>::InputSymbols; - using FstImpl<A>::OutputSymbols; - - using CacheImpl<A>::PushArc; - using CacheImpl<A>::HasArcs; - using CacheImpl<A>::HasFinal; - using CacheImpl<A>::HasStart; - using CacheImpl<A>::SetArcs; - using CacheImpl<A>::SetFinal; - using CacheImpl<A>::SetStart; - - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef CacheState<A> State; - typedef A Arc; - typedef unordered_map<Label, Label> NonTerminalHash; - - typedef T StateTable; - typedef typename T::PrefixId PrefixId; - typedef ReplaceStateTuple<StateId, PrefixId> StateTuple; - - // constructor for replace class implementation. - // \param fst_tuples array of label/fst tuples, one for each non-terminal - ReplaceFstImpl(const vector< pair<Label, const Fst<A>* > >& fst_tuples, - const ReplaceFstOptions<A, T> &opts) - : CacheImpl<A>(opts), - epsilon_on_replace_(opts.epsilon_on_replace), - state_table_(opts.state_table ? opts.state_table : - new StateTable(fst_tuples, opts.root)) { - - SetType("replace"); - - if (fst_tuples.size() > 0) { - SetInputSymbols(fst_tuples[0].second->InputSymbols()); - SetOutputSymbols(fst_tuples[0].second->OutputSymbols()); - } - - bool all_negative = true; // all nonterminals are negative? - bool dense_range = true; // all nonterminals are positive - // and form a dense range containing 1? - for (size_t i = 0; i < fst_tuples.size(); ++i) { - Label nonterminal = fst_tuples[i].first; - if (nonterminal >= 0) - all_negative = false; - if (nonterminal > fst_tuples.size() || nonterminal <= 0) - dense_range = false; - } - - vector<uint64> inprops; - bool all_ilabel_sorted = true; - bool all_olabel_sorted = true; - bool all_non_empty = true; - fst_array_.push_back(0); - for (size_t i = 0; i < fst_tuples.size(); ++i) { - Label label = fst_tuples[i].first; - const Fst<A> *fst = fst_tuples[i].second; - nonterminal_hash_[label] = fst_array_.size(); - nonterminal_set_.insert(label); - fst_array_.push_back(opts.take_ownership ? fst : fst->Copy()); - if (fst->Start() == kNoStateId) - all_non_empty = false; - if(!fst->Properties(kILabelSorted, false)) - all_ilabel_sorted = false; - if(!fst->Properties(kOLabelSorted, false)) - all_olabel_sorted = false; - inprops.push_back(fst->Properties(kCopyProperties, false)); - if (i) { - if (!CompatSymbols(InputSymbols(), fst->InputSymbols())) { - FSTERROR() << "ReplaceFstImpl: input symbols of Fst " << i - << " does not match input symbols of base Fst (0'th fst)"; - SetProperties(kError, kError); - } - if (!CompatSymbols(OutputSymbols(), fst->OutputSymbols())) { - FSTERROR() << "ReplaceFstImpl: output symbols of Fst " << i - << " does not match output symbols of base Fst " - << "(0'th fst)"; - SetProperties(kError, kError); - } - } - } - Label nonterminal = nonterminal_hash_[opts.root]; - if ((nonterminal == 0) && (fst_array_.size() > 1)) { - FSTERROR() << "ReplaceFstImpl: no Fst corresponding to root label '" - << opts.root << "' in the input tuple vector"; - SetProperties(kError, kError); - } - root_ = (nonterminal > 0) ? nonterminal : 1; - - SetProperties(ReplaceProperties(inprops, root_ - 1, epsilon_on_replace_, - all_non_empty)); - // We assume that all terminals are positive. The resulting - // ReplaceFst is known to be kILabelSorted when all sub-FSTs are - // kILabelSorted and one of the 3 following conditions is satisfied: - // 1. 'epsilon_on_replace' is false, or - // 2. all non-terminals are negative, or - // 3. all non-terninals are positive and form a dense range containing 1. - if (all_ilabel_sorted && - (!epsilon_on_replace_ || all_negative || dense_range)) - SetProperties(kILabelSorted, kILabelSorted); - // Similarly, the resulting ReplaceFst is known to be - // kOLabelSorted when all sub-FSTs are kOLabelSorted and one of - // the 2 following conditions is satisfied: - // 1. all non-terminals are negative, or - // 2. all non-terninals are positive and form a dense range containing 1. - if (all_olabel_sorted && (all_negative || dense_range)) - SetProperties(kOLabelSorted, kOLabelSorted); - - // Enable optional caching as long as sorted and all non empty. - if (Properties(kILabelSorted | kOLabelSorted) && all_non_empty) - always_cache_ = false; - else - always_cache_ = true; - VLOG(2) << "ReplaceFstImpl::ReplaceFstImpl: always_cache = " - << (always_cache_ ? "true" : "false"); - } - - ReplaceFstImpl(const ReplaceFstImpl& impl) - : CacheImpl<A>(impl), - epsilon_on_replace_(impl.epsilon_on_replace_), - always_cache_(impl.always_cache_), - state_table_(new StateTable(*(impl.state_table_))), - nonterminal_set_(impl.nonterminal_set_), - nonterminal_hash_(impl.nonterminal_hash_), - root_(impl.root_) { - SetType("replace"); - SetProperties(impl.Properties(), kCopyProperties); - SetInputSymbols(impl.InputSymbols()); - SetOutputSymbols(impl.OutputSymbols()); - fst_array_.reserve(impl.fst_array_.size()); - fst_array_.push_back(0); - for (size_t i = 1; i < impl.fst_array_.size(); ++i) { - fst_array_.push_back(impl.fst_array_[i]->Copy(true)); - } - } - - ~ReplaceFstImpl() { - VLOG(2) << "~ReplaceFstImpl: gc = " - << (CacheImpl<A>::GetCacheGc() ? "true" : "false") - << ", gc_size = " << CacheImpl<A>::GetCacheSize() - << ", gc_limit = " << CacheImpl<A>::GetCacheLimit(); - - delete state_table_; - for (size_t i = 1; i < fst_array_.size(); ++i) { - delete fst_array_[i]; - } - } - - // Computes the dependency graph of the replace class and returns - // true if the dependencies are cyclic. Cyclic dependencies will result - // in an un-expandable replace fst. - bool CyclicDependencies() const { - ReplaceUtil<A> replace_util(fst_array_, nonterminal_hash_, root_); - return replace_util.CyclicDependencies(); - } - - // Return or compute start state of replace fst - StateId Start() { - if (!HasStart()) { - if (fst_array_.size() == 1) { // no fsts defined for replace - SetStart(kNoStateId); - return kNoStateId; - } else { - const Fst<A>* fst = fst_array_[root_]; - StateId fst_start = fst->Start(); - if (fst_start == kNoStateId) // root Fst is empty - return kNoStateId; - - PrefixId prefix = GetPrefixId(StackPrefix()); - StateId start = state_table_->FindState( - StateTuple(prefix, root_, fst_start)); - SetStart(start); - return start; - } - } else { - return CacheImpl<A>::Start(); - } - } - - // return final weight of state (kInfWeight means state is not final) - Weight Final(StateId s) { - if (!HasFinal(s)) { - const StateTuple& tuple = state_table_->Tuple(s); - const StackPrefix& stack = stackprefix_array_[tuple.prefix_id]; - const Fst<A>* fst = fst_array_[tuple.fst_id]; - StateId fst_state = tuple.fst_state; - - if (fst->Final(fst_state) != Weight::Zero() && stack.Depth() == 0) - SetFinal(s, fst->Final(fst_state)); - else - SetFinal(s, Weight::Zero()); - } - return CacheImpl<A>::Final(s); - } - - size_t NumArcs(StateId s) { - if (HasArcs(s)) { // If state cached, use the cached value. - return CacheImpl<A>::NumArcs(s); - } else if (always_cache_) { // If always caching, expand and cache state. - Expand(s); - return CacheImpl<A>::NumArcs(s); - } else { // Otherwise compute the number of arcs without expanding. - StateTuple tuple = state_table_->Tuple(s); - if (tuple.fst_state == kNoStateId) - return 0; - - const Fst<A>* fst = fst_array_[tuple.fst_id]; - size_t num_arcs = fst->NumArcs(tuple.fst_state); - if (ComputeFinalArc(tuple, 0)) - num_arcs++; - - return num_arcs; - } - } - - // Returns whether a given label is a non terminal - bool IsNonTerminal(Label l) const { - // TODO(allauzen): be smarter and take advantage of - // all_dense or all_negative. - // Use also in ComputeArc, this would require changes to replace - // so that recursing into an empty fst lead to a non co-accessible - // state instead of deleting the arc as done currently. - // Current use correct, since i/olabel sorted iff all_non_empty. - typename NonTerminalHash::const_iterator it = - nonterminal_hash_.find(l); - return it != nonterminal_hash_.end(); - } - - size_t NumInputEpsilons(StateId s) { - if (HasArcs(s)) { - // If state cached, use the cached value. - return CacheImpl<A>::NumInputEpsilons(s); - } else if (always_cache_ || !Properties(kILabelSorted)) { - // If always caching or if the number of input epsilons is too expensive - // to compute without caching (i.e. not ilabel sorted), - // then expand and cache state. - Expand(s); - return CacheImpl<A>::NumInputEpsilons(s); - } else { - // Otherwise, compute the number of input epsilons without caching. - StateTuple tuple = state_table_->Tuple(s); - if (tuple.fst_state == kNoStateId) - return 0; - const Fst<A>* fst = fst_array_[tuple.fst_id]; - size_t num = 0; - if (!epsilon_on_replace_) { - // If epsilon_on_replace is false, all input epsilon arcs - // are also input epsilons arcs in the underlying machine. - fst->NumInputEpsilons(tuple.fst_state); - } else { - // Otherwise, one need to consider that all non-terminal arcs - // in the underlying machine also become input epsilon arc. - ArcIterator<Fst<A> > aiter(*fst, tuple.fst_state); - for (; !aiter.Done() && - ((aiter.Value().ilabel == 0) || - IsNonTerminal(aiter.Value().olabel)); - aiter.Next()) - ++num; - } - if (ComputeFinalArc(tuple, 0)) - num++; - return num; - } - } - - size_t NumOutputEpsilons(StateId s) { - if (HasArcs(s)) { - // If state cached, use the cached value. - return CacheImpl<A>::NumOutputEpsilons(s); - } else if(always_cache_ || !Properties(kOLabelSorted)) { - // If always caching or if the number of output epsilons is too expensive - // to compute without caching (i.e. not olabel sorted), - // then expand and cache state. - Expand(s); - return CacheImpl<A>::NumOutputEpsilons(s); - } else { - // Otherwise, compute the number of output epsilons without caching. - StateTuple tuple = state_table_->Tuple(s); - if (tuple.fst_state == kNoStateId) - return 0; - const Fst<A>* fst = fst_array_[tuple.fst_id]; - size_t num = 0; - ArcIterator<Fst<A> > aiter(*fst, tuple.fst_state); - for (; !aiter.Done() && - ((aiter.Value().olabel == 0) || - IsNonTerminal(aiter.Value().olabel)); - aiter.Next()) - ++num; - if (ComputeFinalArc(tuple, 0)) - num++; - return num; - } - } - - uint64 Properties() const { return Properties(kFstProperties); } - - // Set error if found; return FST impl properties. - uint64 Properties(uint64 mask) const { - if (mask & kError) { - for (size_t i = 1; i < fst_array_.size(); ++i) { - if (fst_array_[i]->Properties(kError, false)) - SetProperties(kError, kError); - } - } - return FstImpl<Arc>::Properties(mask); - } - - // return the base arc iterator, if arcs have not been computed yet, - // extend/recurse for new arcs. - void InitArcIterator(StateId s, ArcIteratorData<A> *data) { - if (!HasArcs(s)) - Expand(s); - CacheImpl<A>::InitArcIterator(s, data); - // TODO(allauzen): Set behaviour of generic iterator - // Warning: ArcIterator<ReplaceFst<A> >::InitCache() - // relies on current behaviour. - } - - - // Extend current state (walk arcs one level deep) - void Expand(StateId s) { - StateTuple tuple = state_table_->Tuple(s); - - // If local fst is empty - if (tuple.fst_state == kNoStateId) { - SetArcs(s); - return; - } - - ArcIterator< Fst<A> > aiter( - *(fst_array_[tuple.fst_id]), tuple.fst_state); - Arc arc; - - // Create a final arc when needed - if (ComputeFinalArc(tuple, &arc)) - PushArc(s, arc); - - // Expand all arcs leaving the state - for (;!aiter.Done(); aiter.Next()) { - if (ComputeArc(tuple, aiter.Value(), &arc)) - PushArc(s, arc); - } - - SetArcs(s); - } - - void Expand(StateId s, const StateTuple &tuple, - const ArcIteratorData<A> &data) { - // If local fst is empty - if (tuple.fst_state == kNoStateId) { - SetArcs(s); - return; - } - - ArcIterator< Fst<A> > aiter(data); - Arc arc; - - // Create a final arc when needed - if (ComputeFinalArc(tuple, &arc)) - AddArc(s, arc); - - // Expand all arcs leaving the state - for (; !aiter.Done(); aiter.Next()) { - if (ComputeArc(tuple, aiter.Value(), &arc)) - AddArc(s, arc); - } - - SetArcs(s); - } - - // If arcp == 0, only returns if a final arc is required, does not - // actually compute it. - bool ComputeFinalArc(const StateTuple &tuple, A* arcp, - uint32 flags = kArcValueFlags) { - const Fst<A>* fst = fst_array_[tuple.fst_id]; - StateId fst_state = tuple.fst_state; - if (fst_state == kNoStateId) - return false; - - // if state is final, pop up stack - const StackPrefix& stack = stackprefix_array_[tuple.prefix_id]; - if (fst->Final(fst_state) != Weight::Zero() && stack.Depth()) { - if (arcp) { - arcp->ilabel = 0; - arcp->olabel = 0; - if (flags & kArcNextStateValue) { - PrefixId prefix_id = PopPrefix(stack); - const PrefixTuple& top = stack.Top(); - arcp->nextstate = state_table_->FindState( - StateTuple(prefix_id, top.fst_id, top.nextstate)); - } - if (flags & kArcWeightValue) - arcp->weight = fst->Final(fst_state); - } - return true; - } else { - return false; - } - } - - // Compute the arc in the replace fst corresponding to a given - // in the underlying machine. Returns false if the underlying arc - // corresponds to no arc in the replace. - bool ComputeArc(const StateTuple &tuple, const A &arc, A* arcp, - uint32 flags = kArcValueFlags) { - if (!epsilon_on_replace_ && - (flags == (flags & (kArcILabelValue | kArcWeightValue)))) { - *arcp = arc; - return true; - } - - if (arc.olabel == 0) { // expand local fst - StateId nextstate = flags & kArcNextStateValue - ? state_table_->FindState( - StateTuple(tuple.prefix_id, tuple.fst_id, arc.nextstate)) - : kNoStateId; - *arcp = A(arc.ilabel, arc.olabel, arc.weight, nextstate); - } else { - // check for non terminal - typename NonTerminalHash::const_iterator it = - nonterminal_hash_.find(arc.olabel); - if (it != nonterminal_hash_.end()) { // recurse into non terminal - Label nonterminal = it->second; - const Fst<A>* nt_fst = fst_array_[nonterminal]; - PrefixId nt_prefix = PushPrefix(stackprefix_array_[tuple.prefix_id], - tuple.fst_id, arc.nextstate); - - // if start state is valid replace, else arc is implicitly - // deleted - StateId nt_start = nt_fst->Start(); - if (nt_start != kNoStateId) { - StateId nt_nextstate = flags & kArcNextStateValue - ? state_table_->FindState( - StateTuple(nt_prefix, nonterminal, nt_start)) - : kNoStateId; - Label ilabel = (epsilon_on_replace_) ? 0 : arc.ilabel; - *arcp = A(ilabel, 0, arc.weight, nt_nextstate); - } else { - return false; - } - } else { - StateId nextstate = flags & kArcNextStateValue - ? state_table_->FindState( - StateTuple(tuple.prefix_id, tuple.fst_id, arc.nextstate)) - : kNoStateId; - *arcp = A(arc.ilabel, arc.olabel, arc.weight, nextstate); - } - } - return true; - } - - // Returns the arc iterator flags supported by this Fst. - uint32 ArcIteratorFlags() const { - uint32 flags = kArcValueFlags; - if (!always_cache_) - flags |= kArcNoCache; - return flags; - } - - T* GetStateTable() const { - return state_table_; - } - - const Fst<A>* GetFst(Label fst_id) const { - return fst_array_[fst_id]; - } - - bool EpsilonOnReplace() const { return epsilon_on_replace_; } - - // private helper classes - private: - static const size_t kPrime0; - - // \class PrefixTuple - // \brief Tuple of fst_id and destination state (entry in stack prefix) - struct PrefixTuple { - PrefixTuple(Label f, StateId s) : fst_id(f), nextstate(s) {} - - Label fst_id; - StateId nextstate; - }; - - // \class StackPrefix - // \brief Container for stack prefix. - class StackPrefix { - public: - StackPrefix() {} - - // copy constructor - StackPrefix(const StackPrefix& x) : - prefix_(x.prefix_) { - } - - void Push(StateId fst_id, StateId nextstate) { - prefix_.push_back(PrefixTuple(fst_id, nextstate)); - } - - void Pop() { - prefix_.pop_back(); - } - - const PrefixTuple& Top() const { - return prefix_[prefix_.size()-1]; - } - - size_t Depth() const { - return prefix_.size(); - } - - public: - vector<PrefixTuple> prefix_; - }; - - - // \class StackPrefixEqual - // \brief Compare two stack prefix classes for equality - class StackPrefixEqual { - public: - bool operator()(const StackPrefix& x, const StackPrefix& y) const { - if (x.prefix_.size() != y.prefix_.size()) return false; - for (size_t i = 0; i < x.prefix_.size(); ++i) { - if (x.prefix_[i].fst_id != y.prefix_[i].fst_id || - x.prefix_[i].nextstate != y.prefix_[i].nextstate) return false; - } - return true; - } - }; - - // - // \class StackPrefixKey - // \brief Hash function for stack prefix to prefix id - class StackPrefixKey { - public: - size_t operator()(const StackPrefix& x) const { - size_t sum = 0; - for (size_t i = 0; i < x.prefix_.size(); ++i) { - sum += x.prefix_[i].fst_id + x.prefix_[i].nextstate*kPrime0; - } - return sum; - } - }; - - typedef unordered_map<StackPrefix, PrefixId, StackPrefixKey, StackPrefixEqual> - StackPrefixHash; - - // private methods - private: - // hash stack prefix (return unique index into stackprefix array) - PrefixId GetPrefixId(const StackPrefix& prefix) { - typename StackPrefixHash::iterator it = prefix_hash_.find(prefix); - if (it == prefix_hash_.end()) { - PrefixId prefix_id = stackprefix_array_.size(); - stackprefix_array_.push_back(prefix); - prefix_hash_[prefix] = prefix_id; - return prefix_id; - } else { - return it->second; - } - } - - // prefix id after a stack pop - PrefixId PopPrefix(StackPrefix prefix) { - prefix.Pop(); - return GetPrefixId(prefix); - } - - // prefix id after a stack push - PrefixId PushPrefix(StackPrefix prefix, Label fst_id, StateId nextstate) { - prefix.Push(fst_id, nextstate); - return GetPrefixId(prefix); - } - - - // private data - private: - // runtime options - bool epsilon_on_replace_; - bool always_cache_; // Optionally caching arc iterator disabled when true - - // state table - StateTable *state_table_; - - // cross index of unique stack prefix - // could potentially have one copy of prefix array - StackPrefixHash prefix_hash_; - vector<StackPrefix> stackprefix_array_; - - set<Label> nonterminal_set_; - NonTerminalHash nonterminal_hash_; - vector<const Fst<A>*> fst_array_; - Label root_; - - void operator=(const ReplaceFstImpl<A, T> &); // disallow -}; - - -template <class A, class T> -const size_t ReplaceFstImpl<A, T>::kPrime0 = 7853; - -// -// \class ReplaceFst -// \brief Recursivively replaces arcs in the root Fst with other Fsts. -// This version is a delayed Fst. -// -// ReplaceFst supports dynamic replacement of arcs in one Fst with -// another Fst. This replacement is recursive. ReplaceFst can be used -// to support a variety of delayed constructions such as recursive -// transition networks, union, or closure. It is constructed with an -// array of Fst(s). One Fst represents the root (or topology) -// machine. The root Fst refers to other Fsts by recursively replacing -// arcs labeled as non-terminals with the matching non-terminal -// Fst. Currently the ReplaceFst uses the output symbols of the arcs -// to determine whether the arc is a non-terminal arc or not. A -// non-terminal can be any label that is not a non-zero terminal label -// in the output alphabet. -// -// Note that the constructor uses a vector of pair<>. These correspond -// to the tuple of non-terminal Label and corresponding Fst. For example -// to implement the closure operation we need 2 Fsts. The first root -// Fst is a single Arc on the start State that self loops, it references -// the particular machine for which we are performing the closure operation. -// -// The ReplaceFst class supports an optionally caching arc iterator: -// ArcIterator< ReplaceFst<A> > -// The ReplaceFst need to be built such that it is known to be ilabel -// or olabel sorted (see usage below). -// -// Observe that Matcher<Fst<A> > will use the optionally caching arc -// iterator when available (Fst is ilabel sorted and matching on the -// input, or Fst is olabel sorted and matching on the output). -// In order to obtain the most efficient behaviour, it is recommended -// to set 'epsilon_on_replace' to false (this means constructing acceptors -// as transducers with epsilons on the input side of nonterminal arcs) -// and matching on the input side. -// -// This class attaches interface to implementation and handles -// reference counting, delegating most methods to ImplToFst. -template <class A, class T = DefaultReplaceStateTable<A> > -class ReplaceFst : public ImplToFst< ReplaceFstImpl<A, T> > { - public: - friend class ArcIterator< ReplaceFst<A, T> >; - friend class StateIterator< ReplaceFst<A, T> >; - friend class ReplaceFstMatcher<A, T>; - - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef CacheState<A> State; - typedef ReplaceFstImpl<A, T> Impl; - - using ImplToFst<Impl>::Properties; - - ReplaceFst(const vector<pair<Label, const Fst<A>* > >& fst_array, - Label root) - : ImplToFst<Impl>(new Impl(fst_array, ReplaceFstOptions<A, T>(root))) {} - - ReplaceFst(const vector<pair<Label, const Fst<A>* > >& fst_array, - const ReplaceFstOptions<A, T> &opts) - : ImplToFst<Impl>(new Impl(fst_array, opts)) {} - - // See Fst<>::Copy() for doc. - ReplaceFst(const ReplaceFst<A, T>& fst, bool safe = false) - : ImplToFst<Impl>(fst, safe) {} - - // Get a copy of this ReplaceFst. See Fst<>::Copy() for further doc. - virtual ReplaceFst<A, T> *Copy(bool safe = false) const { - return new ReplaceFst<A, T>(*this, safe); - } - - virtual inline void InitStateIterator(StateIteratorData<A> *data) const; - - virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { - GetImpl()->InitArcIterator(s, data); - } - - virtual MatcherBase<A> *InitMatcher(MatchType match_type) const { - if ((GetImpl()->ArcIteratorFlags() & kArcNoCache) && - ((match_type == MATCH_INPUT && Properties(kILabelSorted, false)) || - (match_type == MATCH_OUTPUT && Properties(kOLabelSorted, false)))) { - return new ReplaceFstMatcher<A, T>(*this, match_type); - } - else { - VLOG(2) << "Not using replace matcher"; - return 0; - } - } - - bool CyclicDependencies() const { - return GetImpl()->CyclicDependencies(); - } - - private: - // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } - - void operator=(const ReplaceFst<A> &fst); // disallow -}; - - -// Specialization for ReplaceFst. -template<class A, class T> -class StateIterator< ReplaceFst<A, T> > - : public CacheStateIterator< ReplaceFst<A, T> > { - public: - explicit StateIterator(const ReplaceFst<A, T> &fst) - : CacheStateIterator< ReplaceFst<A, T> >(fst, fst.GetImpl()) {} - - private: - DISALLOW_COPY_AND_ASSIGN(StateIterator); -}; - - -// Specialization for ReplaceFst. -// Implements optional caching. It can be used as follows: -// -// ReplaceFst<A> replace; -// ArcIterator< ReplaceFst<A> > aiter(replace, s); -// // Note: ArcIterator< Fst<A> > is always a caching arc iterator. -// aiter.SetFlags(kArcNoCache, kArcNoCache); -// // Use the arc iterator, no arc will be cached, no state will be expanded. -// // The varied 'kArcValueFlags' can be used to decide which part -// // of arc values needs to be computed. -// aiter.SetFlags(kArcILabelValue, kArcValueFlags); -// // Only want the ilabel for this arc -// aiter.Value(); // Does not compute the destination state. -// aiter.Next(); -// aiter.SetFlags(kArcNextStateValue, kArcNextStateValue); -// // Want both ilabel and nextstate for that arc -// aiter.Value(); // Does compute the destination state and inserts it -// // in the replace state table. -// // No Arc has been cached at that point. -// -template <class A, class T> -class ArcIterator< ReplaceFst<A, T> > { - public: - typedef A Arc; - typedef typename A::StateId StateId; - - ArcIterator(const ReplaceFst<A, T> &fst, StateId s) - : fst_(fst), state_(s), pos_(0), offset_(0), flags_(0), arcs_(0), - data_flags_(0), final_flags_(0) { - cache_data_.ref_count = 0; - local_data_.ref_count = 0; - - // If FST does not support optional caching, force caching. - if(!(fst_.GetImpl()->ArcIteratorFlags() & kArcNoCache) && - !(fst_.GetImpl()->HasArcs(state_))) - fst_.GetImpl()->Expand(state_); - - // If state is already cached, use cached arcs array. - if (fst_.GetImpl()->HasArcs(state_)) { - (fst_.GetImpl())->template CacheImpl<A>::InitArcIterator(state_, - &cache_data_); - num_arcs_ = cache_data_.narcs; - arcs_ = cache_data_.arcs; // 'arcs_' is a ptr to the cached arcs. - data_flags_ = kArcValueFlags; // All the arc member values are valid. - } else { // Otherwise delay decision until Value() is called. - tuple_ = fst_.GetImpl()->GetStateTable()->Tuple(state_); - if (tuple_.fst_state == kNoStateId) { - num_arcs_ = 0; - } else { - // The decision to cache or not to cache has been defered - // until Value() or SetFlags() is called. However, the arc - // iterator is set up now to be ready for non-caching in order - // to keep the Value() method simple and efficient. - const Fst<A>* fst = fst_.GetImpl()->GetFst(tuple_.fst_id); - fst->InitArcIterator(tuple_.fst_state, &local_data_); - // 'arcs_' is a pointer to the arcs in the underlying machine. - arcs_ = local_data_.arcs; - // Compute the final arc (but not its destination state) - // if a final arc is required. - bool has_final_arc = fst_.GetImpl()->ComputeFinalArc( - tuple_, - &final_arc_, - kArcValueFlags & ~kArcNextStateValue); - // Set the arc value flags that hold for 'final_arc_'. - final_flags_ = kArcValueFlags & ~kArcNextStateValue; - // Compute the number of arcs. - num_arcs_ = local_data_.narcs; - if (has_final_arc) - ++num_arcs_; - // Set the offset between the underlying arc positions and - // the positions in the arc iterator. - offset_ = num_arcs_ - local_data_.narcs; - // Defers the decision to cache or not until Value() or - // SetFlags() is called. - data_flags_ = 0; - } - } - } - - ~ArcIterator() { - if (cache_data_.ref_count) - --(*cache_data_.ref_count); - if (local_data_.ref_count) - --(*local_data_.ref_count); - } - - void ExpandAndCache() const { - // TODO(allauzen): revisit this - // fst_.GetImpl()->Expand(state_, tuple_, local_data_); - // (fst_.GetImpl())->CacheImpl<A>*>::InitArcIterator(state_, - // &cache_data_); - // - fst_.InitArcIterator(state_, &cache_data_); // Expand and cache state. - arcs_ = cache_data_.arcs; // 'arcs_' is a pointer to the cached arcs. - data_flags_ = kArcValueFlags; // All the arc member values are valid. - offset_ = 0; // No offset - - } - - void Init() { - if (flags_ & kArcNoCache) { // If caching is disabled - // 'arcs_' is a pointer to the arcs in the underlying machine. - arcs_ = local_data_.arcs; - // Set the arcs value flags that hold for 'arcs_'. - data_flags_ = kArcWeightValue; - if (!fst_.GetImpl()->EpsilonOnReplace()) - data_flags_ |= kArcILabelValue; - // Set the offset between the underlying arc positions and - // the positions in the arc iterator. - offset_ = num_arcs_ - local_data_.narcs; - } else { // Otherwise, expand and cache - ExpandAndCache(); - } - } - - bool Done() const { return pos_ >= num_arcs_; } - - const A& Value() const { - // If 'data_flags_' was set to 0, non-caching was not requested - if (!data_flags_) { - // TODO(allauzen): revisit this. - if (flags_ & kArcNoCache) { - // Should never happen. - FSTERROR() << "ReplaceFst: inconsistent arc iterator flags"; - } - ExpandAndCache(); // Expand and cache. - } - - if (pos_ - offset_ >= 0) { // The requested arc is not the 'final' arc. - const A& arc = arcs_[pos_ - offset_]; - if ((data_flags_ & flags_) == (flags_ & kArcValueFlags)) { - // If the value flags for 'arc' match the recquired value flags - // then return 'arc'. - return arc; - } else { - // Otherwise, compute the corresponding arc on-the-fly. - fst_.GetImpl()->ComputeArc(tuple_, arc, &arc_, flags_ & kArcValueFlags); - return arc_; - } - } else { // The requested arc is the 'final' arc. - if ((final_flags_ & flags_) != (flags_ & kArcValueFlags)) { - // If the arc value flags that hold for the final arc - // do not match the requested value flags, then - // 'final_arc_' needs to be updated. - fst_.GetImpl()->ComputeFinalArc(tuple_, &final_arc_, - flags_ & kArcValueFlags); - final_flags_ = flags_ & kArcValueFlags; - } - return final_arc_; - } - } - - void Next() { ++pos_; } - - size_t Position() const { return pos_; } - - void Reset() { pos_ = 0; } - - void Seek(size_t pos) { pos_ = pos; } - - uint32 Flags() const { return flags_; } - - void SetFlags(uint32 f, uint32 mask) { - // Update the flags taking into account what flags are supported - // by the Fst. - flags_ &= ~mask; - flags_ |= (f & fst_.GetImpl()->ArcIteratorFlags()); - // If non-caching is not requested (and caching has not already - // been performed), then flush 'data_flags_' to request caching - // during the next call to Value(). - if (!(flags_ & kArcNoCache) && data_flags_ != kArcValueFlags) { - if (!fst_.GetImpl()->HasArcs(state_)) - data_flags_ = 0; - } - // If 'data_flags_' has been flushed but non-caching is requested - // before calling Value(), then set up the iterator for non-caching. - if ((f & kArcNoCache) && (!data_flags_)) - Init(); - } - - private: - const ReplaceFst<A, T> &fst_; // Reference to the FST - StateId state_; // State in the FST - mutable typename T::StateTuple tuple_; // Tuple corresponding to state_ - - ssize_t pos_; // Current position - mutable ssize_t offset_; // Offset between position in iterator and in arcs_ - ssize_t num_arcs_; // Number of arcs at state_ - uint32 flags_; // Behavorial flags for the arc iterator - mutable Arc arc_; // Memory to temporarily store computed arcs - - mutable ArcIteratorData<Arc> cache_data_; // Arc iterator data in cache - mutable ArcIteratorData<Arc> local_data_; // Arc iterator data in local fst - - mutable const A* arcs_; // Array of arcs - mutable uint32 data_flags_; // Arc value flags valid for data in arcs_ - mutable Arc final_arc_; // Final arc (when required) - mutable uint32 final_flags_; // Arc value flags valid for final_arc_ - - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - - -template <class A, class T> -class ReplaceFstMatcher : public MatcherBase<A> { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef MultiEpsMatcher<Matcher<Fst<A> > > LocalMatcher; - - ReplaceFstMatcher(const ReplaceFst<A, T> &fst, fst::MatchType match_type) - : fst_(fst), - impl_(fst_.GetImpl()), - s_(fst::kNoStateId), - match_type_(match_type), - current_loop_(false), - final_arc_(false), - loop_(fst::kNoLabel, 0, A::Weight::One(), fst::kNoStateId) { - if (match_type_ == fst::MATCH_OUTPUT) - swap(loop_.ilabel, loop_.olabel); - InitMatchers(); - } - - ReplaceFstMatcher(const ReplaceFstMatcher<A, T> &matcher, bool safe = false) - : fst_(matcher.fst_), - impl_(fst_.GetImpl()), - s_(fst::kNoStateId), - match_type_(matcher.match_type_), - current_loop_(false), - loop_(fst::kNoLabel, 0, A::Weight::One(), fst::kNoStateId) { - if (match_type_ == fst::MATCH_OUTPUT) - swap(loop_.ilabel, loop_.olabel); - InitMatchers(); - } - - // Create a local matcher for each component Fst of replace. - // LocalMatcher is a multi epsilon wrapper matcher. MultiEpsilonMatcher - // is used to match each non-terminal arc, since these non-terminal - // turn into epsilons on recursion. - void InitMatchers() { - const vector<const Fst<A>*>& fst_array = impl_->fst_array_; - matcher_.resize(fst_array.size(), 0); - for (size_t i = 0; i < fst_array.size(); ++i) { - if (fst_array[i]) { - matcher_[i] = - new LocalMatcher(*fst_array[i], match_type_, kMultiEpsList); - - typename set<Label>::iterator it = impl_->nonterminal_set_.begin(); - for (; it != impl_->nonterminal_set_.end(); ++it) { - matcher_[i]->AddMultiEpsLabel(*it); - } - } - } - } - - virtual ReplaceFstMatcher<A, T> *Copy(bool safe = false) const { - return new ReplaceFstMatcher<A, T>(*this, safe); - } - - virtual ~ReplaceFstMatcher() { - for (size_t i = 0; i < matcher_.size(); ++i) - delete matcher_[i]; - } - - virtual MatchType Type(bool test) const { - if (match_type_ == MATCH_NONE) - return match_type_; - - uint64 true_prop = match_type_ == MATCH_INPUT ? - kILabelSorted : kOLabelSorted; - uint64 false_prop = match_type_ == MATCH_INPUT ? - kNotILabelSorted : kNotOLabelSorted; - uint64 props = fst_.Properties(true_prop | false_prop, test); - - if (props & true_prop) - return match_type_; - else if (props & false_prop) - return MATCH_NONE; - else - return MATCH_UNKNOWN; - } - - virtual const Fst<A> &GetFst() const { - return fst_; - } - - virtual uint64 Properties(uint64 props) const { - return props; - } - - private: - // Set the sate from which our matching happens. - virtual void SetState_(StateId s) { - if (s_ == s) return; - - s_ = s; - tuple_ = impl_->GetStateTable()->Tuple(s_); - if (tuple_.fst_state == kNoStateId) { - done_ = true; - return; - } - // Get current matcher. Used for non epsilon matching - current_matcher_ = matcher_[tuple_.fst_id]; - current_matcher_->SetState(tuple_.fst_state); - loop_.nextstate = s_; - - final_arc_ = false; - } - - // Search for label, from previous set state. If label == 0, first - // hallucinate and epsilon loop, else use the underlying matcher to - // search for the label or epsilons. - // - Note since the ReplaceFST recursion on non-terminal arcs causes - // epsilon transitions to be created we use the MultiEpsilonMatcher - // to search for possible matches of non terminals. - // - If the component Fst reaches a final state we also need to add - // the exiting final arc. - virtual bool Find_(Label label) { - bool found = false; - label_ = label; - if (label_ == 0 || label_ == kNoLabel) { - // Compute loop directly, saving Replace::ComputeArc - if (label_ == 0) { - current_loop_ = true; - found = true; - } - // Search for matching multi epsilons - final_arc_ = impl_->ComputeFinalArc(tuple_, 0); - found = current_matcher_->Find(kNoLabel) || final_arc_ || found; - } else { - // Search on sub machine directly using sub machine matcher. - found = current_matcher_->Find(label_); - } - return found; - } - - virtual bool Done_() const { - return !current_loop_ && !final_arc_ && current_matcher_->Done(); - } - - virtual const Arc& Value_() const { - if (current_loop_) { - return loop_; - } - if (final_arc_) { - impl_->ComputeFinalArc(tuple_, &arc_); - return arc_; - } - const Arc& component_arc = current_matcher_->Value(); - impl_->ComputeArc(tuple_, component_arc, &arc_); - return arc_; - } - - virtual void Next_() { - if (current_loop_) { - current_loop_ = false; - return; - } - if (final_arc_) { - final_arc_ = false; - return; - } - current_matcher_->Next(); - } - - const ReplaceFst<A, T>& fst_; - ReplaceFstImpl<A, T> *impl_; - LocalMatcher* current_matcher_; - vector<LocalMatcher*> matcher_; - - StateId s_; // Current state - Label label_; // Current label - - MatchType match_type_; // Supplied by caller - mutable bool done_; - mutable bool current_loop_; // Current arc is the implicit loop - mutable bool final_arc_; // Current arc for exiting recursion - mutable typename T::StateTuple tuple_; // Tuple corresponding to state_ - mutable Arc arc_; - Arc loop_; -}; - -template <class A, class T> inline -void ReplaceFst<A, T>::InitStateIterator(StateIteratorData<A> *data) const { - data->base = new StateIterator< ReplaceFst<A, T> >(*this); -} - -typedef ReplaceFst<StdArc> StdReplaceFst; - - -// // Recursivively replaces arcs in the root Fst with other Fsts. -// This version writes the result of replacement to an output MutableFst. -// -// Replace supports replacement of arcs in one Fst with another -// Fst. This replacement is recursive. Replace takes an array of -// Fst(s). One Fst represents the root (or topology) machine. The root -// Fst refers to other Fsts by recursively replacing arcs labeled as -// non-terminals with the matching non-terminal Fst. Currently Replace -// uses the output symbols of the arcs to determine whether the arc is -// a non-terminal arc or not. A non-terminal can be any label that is -// not a non-zero terminal label in the output alphabet. Note that -// input argument is a vector of pair<>. These correspond to the tuple -// of non-terminal Label and corresponding Fst. -template<class Arc> -void Replace(const vector<pair<typename Arc::Label, - const Fst<Arc>* > >& ifst_array, - MutableFst<Arc> *ofst, typename Arc::Label root, - bool epsilon_on_replace) { - ReplaceFstOptions<Arc> opts(root, epsilon_on_replace); - opts.gc_limit = 0; // Cache only the last state for fastest copy. - *ofst = ReplaceFst<Arc>(ifst_array, opts); -} - -template<class Arc> -void Replace(const vector<pair<typename Arc::Label, - const Fst<Arc>* > >& ifst_array, - MutableFst<Arc> *ofst, typename Arc::Label root) { - Replace(ifst_array, ofst, root, false); -} - -} // namespace fst - -#endif // FST_LIB_REPLACE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/reverse.h b/kaldi_io/src/tools/openfst/include/fst/reverse.h deleted file mode 100644 index 4d4c75c..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/reverse.h +++ /dev/null @@ -1,91 +0,0 @@ -// reverse.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Functions and classes to sort arcs in an FST. - -#ifndef FST_LIB_REVERSE_H__ -#define FST_LIB_REVERSE_H__ - -#include <algorithm> -#include <vector> -using std::vector; - -#include <fst/cache.h> - - -namespace fst { - -// Reverses an FST. The reversed result is written to an output -// MutableFst. If A transduces string x to y with weight a, then the -// reverse of A transduces the reverse of x to the reverse of y with -// weight a.Reverse(). -// -// Typically, a = a.Reverse() and Arc = RevArc (e.g. for -// TropicalWeight or LogWeight). In general, e.g. when the weights -// only form a left or right semiring, the output arc type must match -// the input arc type except having the reversed Weight type. -template<class Arc, class RevArc> -void Reverse(const Fst<Arc> &ifst, MutableFst<RevArc> *ofst) { - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - typedef typename RevArc::Weight RevWeight; - - ofst->DeleteStates(); - ofst->SetInputSymbols(ifst.InputSymbols()); - ofst->SetOutputSymbols(ifst.OutputSymbols()); - if (ifst.Properties(kExpanded, false)) - ofst->ReserveStates(CountStates(ifst) + 1); - StateId istart = ifst.Start(); - StateId ostart = ofst->AddState(); - ofst->SetStart(ostart); - - for (StateIterator< Fst<Arc> > siter(ifst); - !siter.Done(); - siter.Next()) { - StateId is = siter.Value(); - StateId os = is + 1; - while (ofst->NumStates() <= os) - ofst->AddState(); - if (is == istart) - ofst->SetFinal(os, RevWeight::One()); - - Weight final = ifst.Final(is); - if (final != Weight::Zero()) { - RevArc oarc(0, 0, final.Reverse(), os); - ofst->AddArc(0, oarc); - } - - for (ArcIterator< Fst<Arc> > aiter(ifst, is); - !aiter.Done(); - aiter.Next()) { - const Arc &iarc = aiter.Value(); - RevArc oarc(iarc.ilabel, iarc.olabel, iarc.weight.Reverse(), os); - StateId nos = iarc.nextstate + 1; - while (ofst->NumStates() <= nos) - ofst->AddState(); - ofst->AddArc(nos, oarc); - } - } - uint64 iprops = ifst.Properties(kCopyProperties, false); - uint64 oprops = ofst->Properties(kFstProperties, false); - ofst->SetProperties(ReverseProperties(iprops) | oprops, kFstProperties); -} - -} // namespace fst - -#endif // FST_LIB_REVERSE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/reweight.h b/kaldi_io/src/tools/openfst/include/fst/reweight.h deleted file mode 100644 index c051c2a..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/reweight.h +++ /dev/null @@ -1,146 +0,0 @@ -// reweight.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// Function to reweight an FST. - -#ifndef FST_LIB_REWEIGHT_H__ -#define FST_LIB_REWEIGHT_H__ - -#include <vector> -using std::vector; - -#include <fst/mutable-fst.h> - - -namespace fst { - -enum ReweightType { REWEIGHT_TO_INITIAL, REWEIGHT_TO_FINAL }; - -// Reweight FST according to the potentials defined by the POTENTIAL -// vector in the direction defined by TYPE. Weight needs to be left -// distributive when reweighting towards the initial state and right -// distributive when reweighting towards the final states. -// -// An arc of weight w, with an origin state of potential p and -// destination state of potential q, is reweighted by p\wq when -// reweighting towards the initial state and by pw/q when reweighting -// towards the final states. -template <class Arc> -void Reweight(MutableFst<Arc> *fst, - const vector<typename Arc::Weight> &potential, - ReweightType type) { - typedef typename Arc::Weight Weight; - - if (fst->NumStates() == 0) - return; - - if (type == REWEIGHT_TO_FINAL && !(Weight::Properties() & kRightSemiring)) { - FSTERROR() << "Reweight: Reweighting to the final states requires " - << "Weight to be right distributive: " - << Weight::Type(); - fst->SetProperties(kError, kError); - return; - } - - if (type == REWEIGHT_TO_INITIAL && !(Weight::Properties() & kLeftSemiring)) { - FSTERROR() << "Reweight: Reweighting to the initial state requires " - << "Weight to be left distributive: " - << Weight::Type(); - fst->SetProperties(kError, kError); - return; - } - - StateIterator< MutableFst<Arc> > sit(*fst); - for (; !sit.Done(); sit.Next()) { - typename Arc::StateId state = sit.Value(); - if (state == potential.size()) - break; - typename Arc::Weight weight = potential[state]; - if (weight != Weight::Zero()) { - for (MutableArcIterator< MutableFst<Arc> > ait(fst, state); - !ait.Done(); - ait.Next()) { - Arc arc = ait.Value(); - if (arc.nextstate >= potential.size()) - continue; - typename Arc::Weight nextweight = potential[arc.nextstate]; - if (nextweight == Weight::Zero()) - continue; - if (type == REWEIGHT_TO_INITIAL) - arc.weight = Divide(Times(arc.weight, nextweight), weight, - DIVIDE_LEFT); - if (type == REWEIGHT_TO_FINAL) - arc.weight = Divide(Times(weight, arc.weight), nextweight, - DIVIDE_RIGHT); - ait.SetValue(arc); - } - if (type == REWEIGHT_TO_INITIAL) - fst->SetFinal(state, Divide(fst->Final(state), weight, DIVIDE_LEFT)); - } - if (type == REWEIGHT_TO_FINAL) - fst->SetFinal(state, Times(weight, fst->Final(state))); - } - - // This handles elements past the end of the potentials array. - for (; !sit.Done(); sit.Next()) { - typename Arc::StateId state = sit.Value(); - if (type == REWEIGHT_TO_FINAL) - fst->SetFinal(state, Times(Weight::Zero(), fst->Final(state))); - } - - typename Arc::Weight startweight = fst->Start() < potential.size() ? - potential[fst->Start()] : Weight::Zero(); - if ((startweight != Weight::One()) && (startweight != Weight::Zero())) { - if (fst->Properties(kInitialAcyclic, true) & kInitialAcyclic) { - typename Arc::StateId state = fst->Start(); - for (MutableArcIterator< MutableFst<Arc> > ait(fst, state); - !ait.Done(); - ait.Next()) { - Arc arc = ait.Value(); - if (type == REWEIGHT_TO_INITIAL) - arc.weight = Times(startweight, arc.weight); - else - arc.weight = Times( - Divide(Weight::One(), startweight, DIVIDE_RIGHT), - arc.weight); - ait.SetValue(arc); - } - if (type == REWEIGHT_TO_INITIAL) - fst->SetFinal(state, Times(startweight, fst->Final(state))); - else - fst->SetFinal(state, Times(Divide(Weight::One(), startweight, - DIVIDE_RIGHT), - fst->Final(state))); - } else { - typename Arc::StateId state = fst->AddState(); - Weight w = type == REWEIGHT_TO_INITIAL ? startweight : - Divide(Weight::One(), startweight, DIVIDE_RIGHT); - Arc arc(0, 0, w, fst->Start()); - fst->AddArc(state, arc); - fst->SetStart(state); - } - } - - fst->SetProperties(ReweightProperties( - fst->Properties(kFstProperties, false)), - kFstProperties); -} - -} // namespace fst - -#endif // FST_LIB_REWEIGHT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/rmepsilon.h b/kaldi_io/src/tools/openfst/include/fst/rmepsilon.h deleted file mode 100644 index 89b8178..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/rmepsilon.h +++ /dev/null @@ -1,600 +0,0 @@ -// rmepsilon.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// Functions and classes that implemement epsilon-removal. - -#ifndef FST_LIB_RMEPSILON_H__ -#define FST_LIB_RMEPSILON_H__ - -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <fst/slist.h> -#include <stack> -#include <string> -#include <utility> -using std::pair; using std::make_pair; -#include <vector> -using std::vector; - -#include <fst/arcfilter.h> -#include <fst/cache.h> -#include <fst/connect.h> -#include <fst/factor-weight.h> -#include <fst/invert.h> -#include <fst/prune.h> -#include <fst/queue.h> -#include <fst/shortest-distance.h> -#include <fst/topsort.h> - - -namespace fst { - -template <class Arc, class Queue> -class RmEpsilonOptions - : public ShortestDistanceOptions<Arc, Queue, EpsilonArcFilter<Arc> > { - public: - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - - bool connect; // Connect output - Weight weight_threshold; // Pruning weight threshold. - StateId state_threshold; // Pruning state threshold. - - explicit RmEpsilonOptions(Queue *q, float d = kDelta, bool c = true, - Weight w = Weight::Zero(), - StateId n = kNoStateId) - : ShortestDistanceOptions< Arc, Queue, EpsilonArcFilter<Arc> >( - q, EpsilonArcFilter<Arc>(), kNoStateId, d), - connect(c), weight_threshold(w), state_threshold(n) {} - private: - RmEpsilonOptions(); // disallow -}; - -// Computation state of the epsilon-removal algorithm. -template <class Arc, class Queue> -class RmEpsilonState { - public: - typedef typename Arc::Label Label; - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - - RmEpsilonState(const Fst<Arc> &fst, - vector<Weight> *distance, - const RmEpsilonOptions<Arc, Queue> &opts) - : fst_(fst), distance_(distance), sd_state_(fst_, distance, opts, true), - expand_id_(0) {} - - // Compute arcs and final weight for state 's' - void Expand(StateId s); - - // Returns arcs of expanded state. - vector<Arc> &Arcs() { return arcs_; } - - // Returns final weight of expanded state. - const Weight &Final() const { return final_; } - - // Return true if an error has occured. - bool Error() const { return sd_state_.Error(); } - - private: - static const size_t kPrime0 = 7853; - static const size_t kPrime1 = 7867; - - struct Element { - Label ilabel; - Label olabel; - StateId nextstate; - - Element() {} - - Element(Label i, Label o, StateId s) - : ilabel(i), olabel(o), nextstate(s) {} - }; - - class ElementKey { - public: - size_t operator()(const Element& e) const { - return static_cast<size_t>(e.nextstate + - e.ilabel * kPrime0 + - e.olabel * kPrime1); - } - - private: - }; - - class ElementEqual { - public: - bool operator()(const Element &e1, const Element &e2) const { - return (e1.ilabel == e2.ilabel) && (e1.olabel == e2.olabel) - && (e1.nextstate == e2.nextstate); - } - }; - - typedef unordered_map<Element, pair<StateId, size_t>, - ElementKey, ElementEqual> ElementMap; - - const Fst<Arc> &fst_; - // Distance from state being expanded in epsilon-closure. - vector<Weight> *distance_; - // Shortest distance algorithm computation state. - ShortestDistanceState<Arc, Queue, EpsilonArcFilter<Arc> > sd_state_; - // Maps an element 'e' to a pair 'p' corresponding to a position - // in the arcs vector of the state being expanded. 'e' corresponds - // to the position 'p.second' in the 'arcs_' vector if 'p.first' is - // equal to the state being expanded. - ElementMap element_map_; - EpsilonArcFilter<Arc> eps_filter_; - stack<StateId> eps_queue_; // Queue used to visit the epsilon-closure - vector<bool> visited_; // '[i] = true' if state 'i' has been visited - slist<StateId> visited_states_; // List of visited states - vector<Arc> arcs_; // Arcs of state being expanded - Weight final_; // Final weight of state being expanded - StateId expand_id_; // Unique ID for each call to Expand - - DISALLOW_COPY_AND_ASSIGN(RmEpsilonState); -}; - -template <class Arc, class Queue> -const size_t RmEpsilonState<Arc, Queue>::kPrime0; -template <class Arc, class Queue> -const size_t RmEpsilonState<Arc, Queue>::kPrime1; - - -template <class Arc, class Queue> -void RmEpsilonState<Arc,Queue>::Expand(typename Arc::StateId source) { - final_ = Weight::Zero(); - arcs_.clear(); - sd_state_.ShortestDistance(source); - if (sd_state_.Error()) - return; - eps_queue_.push(source); - - while (!eps_queue_.empty()) { - StateId state = eps_queue_.top(); - eps_queue_.pop(); - - while (visited_.size() <= state) visited_.push_back(false); - if (visited_[state]) continue; - visited_[state] = true; - visited_states_.push_front(state); - - for (ArcIterator< Fst<Arc> > ait(fst_, state); - !ait.Done(); - ait.Next()) { - Arc arc = ait.Value(); - arc.weight = Times((*distance_)[state], arc.weight); - - if (eps_filter_(arc)) { - while (visited_.size() <= arc.nextstate) - visited_.push_back(false); - if (!visited_[arc.nextstate]) - eps_queue_.push(arc.nextstate); - } else { - Element element(arc.ilabel, arc.olabel, arc.nextstate); - typename ElementMap::iterator it = element_map_.find(element); - if (it == element_map_.end()) { - element_map_.insert( - pair<Element, pair<StateId, size_t> > - (element, pair<StateId, size_t>(expand_id_, arcs_.size()))); - arcs_.push_back(arc); - } else { - if (((*it).second).first == expand_id_) { - Weight &w = arcs_[((*it).second).second].weight; - w = Plus(w, arc.weight); - } else { - ((*it).second).first = expand_id_; - ((*it).second).second = arcs_.size(); - arcs_.push_back(arc); - } - } - } - } - final_ = Plus(final_, Times((*distance_)[state], fst_.Final(state))); - } - - while (!visited_states_.empty()) { - visited_[visited_states_.front()] = false; - visited_states_.pop_front(); - } - ++expand_id_; -} - -// Removes epsilon-transitions (when both the input and output label -// are an epsilon) from a transducer. The result will be an equivalent -// FST that has no such epsilon transitions. This version modifies -// its input. It allows fine control via the options argument; see -// below for a simpler interface. -// -// The vector 'distance' will be used to hold the shortest distances -// during the epsilon-closure computation. The state queue discipline -// and convergence delta are taken in the options argument. -template <class Arc, class Queue> -void RmEpsilon(MutableFst<Arc> *fst, - vector<typename Arc::Weight> *distance, - const RmEpsilonOptions<Arc, Queue> &opts) { - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - typedef typename Arc::Label Label; - - if (fst->Start() == kNoStateId) { - return; - } - - // 'noneps_in[s]' will be set to true iff 's' admits a non-epsilon - // incoming transition or is the start state. - vector<bool> noneps_in(fst->NumStates(), false); - noneps_in[fst->Start()] = true; - for (StateId i = 0; i < fst->NumStates(); ++i) { - for (ArcIterator<Fst<Arc> > aiter(*fst, i); - !aiter.Done(); - aiter.Next()) { - if (aiter.Value().ilabel != 0 || aiter.Value().olabel != 0) - noneps_in[aiter.Value().nextstate] = true; - } - } - - // States sorted in topological order when (acyclic) or generic - // topological order (cyclic). - vector<StateId> states; - states.reserve(fst->NumStates()); - - if (fst->Properties(kTopSorted, false) & kTopSorted) { - for (StateId i = 0; i < fst->NumStates(); i++) - states.push_back(i); - } else if (fst->Properties(kAcyclic, false) & kAcyclic) { - vector<StateId> order; - bool acyclic; - TopOrderVisitor<Arc> top_order_visitor(&order, &acyclic); - DfsVisit(*fst, &top_order_visitor, EpsilonArcFilter<Arc>()); - // Sanity check: should be acyclic if property bit is set. - if(!acyclic) { - FSTERROR() << "RmEpsilon: inconsistent acyclic property bit"; - fst->SetProperties(kError, kError); - return; - } - states.resize(order.size()); - for (StateId i = 0; i < order.size(); i++) - states[order[i]] = i; - } else { - uint64 props; - vector<StateId> scc; - SccVisitor<Arc> scc_visitor(&scc, 0, 0, &props); - DfsVisit(*fst, &scc_visitor, EpsilonArcFilter<Arc>()); - vector<StateId> first(scc.size(), kNoStateId); - vector<StateId> next(scc.size(), kNoStateId); - for (StateId i = 0; i < scc.size(); i++) { - if (first[scc[i]] != kNoStateId) - next[i] = first[scc[i]]; - first[scc[i]] = i; - } - for (StateId i = 0; i < first.size(); i++) - for (StateId j = first[i]; j != kNoStateId; j = next[j]) - states.push_back(j); - } - - RmEpsilonState<Arc, Queue> - rmeps_state(*fst, distance, opts); - - while (!states.empty()) { - StateId state = states.back(); - states.pop_back(); - if (!noneps_in[state]) - continue; - rmeps_state.Expand(state); - fst->SetFinal(state, rmeps_state.Final()); - fst->DeleteArcs(state); - vector<Arc> &arcs = rmeps_state.Arcs(); - fst->ReserveArcs(state, arcs.size()); - while (!arcs.empty()) { - fst->AddArc(state, arcs.back()); - arcs.pop_back(); - } - } - - for (StateId s = 0; s < fst->NumStates(); ++s) { - if (!noneps_in[s]) - fst->DeleteArcs(s); - } - - if(rmeps_state.Error()) - fst->SetProperties(kError, kError); - fst->SetProperties( - RmEpsilonProperties(fst->Properties(kFstProperties, false)), - kFstProperties); - - if (opts.weight_threshold != Weight::Zero() || - opts.state_threshold != kNoStateId) - Prune(fst, opts.weight_threshold, opts.state_threshold); - if (opts.connect && (opts.weight_threshold == Weight::Zero() || - opts.state_threshold != kNoStateId)) - Connect(fst); -} - -// Removes epsilon-transitions (when both the input and output label -// are an epsilon) from a transducer. The result will be an equivalent -// FST that has no such epsilon transitions. This version modifies its -// input. It has a simplified interface; see above for a version that -// allows finer control. -// -// Complexity: -// - Time: -// - Unweighted: O(V2 + V E) -// - Acyclic: O(V2 + V E) -// - Tropical semiring: O(V2 log V + V E) -// - General: exponential -// - Space: O(V E) -// where V = # of states visited, E = # of arcs. -// -// References: -// - Mehryar Mohri. Generic Epsilon-Removal and Input -// Epsilon-Normalization Algorithms for Weighted Transducers, -// "International Journal of Computer Science", 13(1):129-143 (2002). -template <class Arc> -void RmEpsilon(MutableFst<Arc> *fst, - bool connect = true, - typename Arc::Weight weight_threshold = Arc::Weight::Zero(), - typename Arc::StateId state_threshold = kNoStateId, - float delta = kDelta) { - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - typedef typename Arc::Label Label; - - vector<Weight> distance; - AutoQueue<StateId> state_queue(*fst, &distance, EpsilonArcFilter<Arc>()); - RmEpsilonOptions<Arc, AutoQueue<StateId> > - opts(&state_queue, delta, connect, weight_threshold, state_threshold); - - RmEpsilon(fst, &distance, opts); -} - - -struct RmEpsilonFstOptions : CacheOptions { - float delta; - - RmEpsilonFstOptions(const CacheOptions &opts, float delta = kDelta) - : CacheOptions(opts), delta(delta) {} - - explicit RmEpsilonFstOptions(float delta = kDelta) : delta(delta) {} -}; - - -// Implementation of delayed RmEpsilonFst. -template <class A> -class RmEpsilonFstImpl : public CacheImpl<A> { - public: - using FstImpl<A>::SetType; - using FstImpl<A>::SetProperties; - using FstImpl<A>::SetInputSymbols; - using FstImpl<A>::SetOutputSymbols; - - using CacheBaseImpl< CacheState<A> >::PushArc; - using CacheBaseImpl< CacheState<A> >::HasArcs; - using CacheBaseImpl< CacheState<A> >::HasFinal; - using CacheBaseImpl< CacheState<A> >::HasStart; - using CacheBaseImpl< CacheState<A> >::SetArcs; - using CacheBaseImpl< CacheState<A> >::SetFinal; - using CacheBaseImpl< CacheState<A> >::SetStart; - - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef CacheState<A> State; - - RmEpsilonFstImpl(const Fst<A>& fst, const RmEpsilonFstOptions &opts) - : CacheImpl<A>(opts), - fst_(fst.Copy()), - delta_(opts.delta), - rmeps_state_( - *fst_, - &distance_, - RmEpsilonOptions<A, FifoQueue<StateId> >(&queue_, delta_, false)) { - SetType("rmepsilon"); - uint64 props = fst.Properties(kFstProperties, false); - SetProperties(RmEpsilonProperties(props, true), kCopyProperties); - SetInputSymbols(fst.InputSymbols()); - SetOutputSymbols(fst.OutputSymbols()); - } - - RmEpsilonFstImpl(const RmEpsilonFstImpl &impl) - : CacheImpl<A>(impl), - fst_(impl.fst_->Copy(true)), - delta_(impl.delta_), - rmeps_state_( - *fst_, - &distance_, - RmEpsilonOptions<A, FifoQueue<StateId> >(&queue_, delta_, false)) { - SetType("rmepsilon"); - SetProperties(impl.Properties(), kCopyProperties); - SetInputSymbols(impl.InputSymbols()); - SetOutputSymbols(impl.OutputSymbols()); - } - - ~RmEpsilonFstImpl() { - delete fst_; - } - - StateId Start() { - if (!HasStart()) { - SetStart(fst_->Start()); - } - return CacheImpl<A>::Start(); - } - - Weight Final(StateId s) { - if (!HasFinal(s)) { - Expand(s); - } - return CacheImpl<A>::Final(s); - } - - size_t NumArcs(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumArcs(s); - } - - size_t NumInputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumInputEpsilons(s); - } - - size_t NumOutputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumOutputEpsilons(s); - } - - uint64 Properties() const { return Properties(kFstProperties); } - - // Set error if found; return FST impl properties. - uint64 Properties(uint64 mask) const { - if ((mask & kError) && - (fst_->Properties(kError, false) || rmeps_state_.Error())) - SetProperties(kError, kError); - return FstImpl<A>::Properties(mask); - } - - void InitArcIterator(StateId s, ArcIteratorData<A> *data) { - if (!HasArcs(s)) - Expand(s); - CacheImpl<A>::InitArcIterator(s, data); - } - - void Expand(StateId s) { - rmeps_state_.Expand(s); - SetFinal(s, rmeps_state_.Final()); - vector<A> &arcs = rmeps_state_.Arcs(); - while (!arcs.empty()) { - PushArc(s, arcs.back()); - arcs.pop_back(); - } - SetArcs(s); - } - - private: - const Fst<A> *fst_; - float delta_; - vector<Weight> distance_; - FifoQueue<StateId> queue_; - RmEpsilonState<A, FifoQueue<StateId> > rmeps_state_; - - void operator=(const RmEpsilonFstImpl<A> &); // disallow -}; - - -// Removes epsilon-transitions (when both the input and output label -// are an epsilon) from a transducer. The result will be an equivalent -// FST that has no such epsilon transitions. This version is a -// delayed Fst. -// -// Complexity: -// - Time: -// - Unweighted: O(v^2 + v e) -// - General: exponential -// - Space: O(v e) -// where v = # of states visited, e = # of arcs visited. Constant time -// to visit an input state or arc is assumed and exclusive of caching. -// -// References: -// - Mehryar Mohri. Generic Epsilon-Removal and Input -// Epsilon-Normalization Algorithms for Weighted Transducers, -// "International Journal of Computer Science", 13(1):129-143 (2002). -// -// This class attaches interface to implementation and handles -// reference counting, delegating most methods to ImplToFst. -template <class A> -class RmEpsilonFst : public ImplToFst< RmEpsilonFstImpl<A> > { - public: - friend class ArcIterator< RmEpsilonFst<A> >; - friend class StateIterator< RmEpsilonFst<A> >; - - typedef A Arc; - typedef typename A::StateId StateId; - typedef CacheState<A> State; - typedef RmEpsilonFstImpl<A> Impl; - - RmEpsilonFst(const Fst<A> &fst) - : ImplToFst<Impl>(new Impl(fst, RmEpsilonFstOptions())) {} - - RmEpsilonFst(const Fst<A> &fst, const RmEpsilonFstOptions &opts) - : ImplToFst<Impl>(new Impl(fst, opts)) {} - - // See Fst<>::Copy() for doc. - RmEpsilonFst(const RmEpsilonFst<A> &fst, bool safe = false) - : ImplToFst<Impl>(fst, safe) {} - - // Get a copy of this RmEpsilonFst. See Fst<>::Copy() for further doc. - virtual RmEpsilonFst<A> *Copy(bool safe = false) const { - return new RmEpsilonFst<A>(*this, safe); - } - - virtual inline void InitStateIterator(StateIteratorData<A> *data) const; - - virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { - GetImpl()->InitArcIterator(s, data); - } - - private: - // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } - - void operator=(const RmEpsilonFst<A> &fst); // disallow -}; - -// Specialization for RmEpsilonFst. -template<class A> -class StateIterator< RmEpsilonFst<A> > - : public CacheStateIterator< RmEpsilonFst<A> > { - public: - explicit StateIterator(const RmEpsilonFst<A> &fst) - : CacheStateIterator< RmEpsilonFst<A> >(fst, fst.GetImpl()) {} -}; - - -// Specialization for RmEpsilonFst. -template <class A> -class ArcIterator< RmEpsilonFst<A> > - : public CacheArcIterator< RmEpsilonFst<A> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const RmEpsilonFst<A> &fst, StateId s) - : CacheArcIterator< RmEpsilonFst<A> >(fst.GetImpl(), s) { - if (!fst.GetImpl()->HasArcs(s)) - fst.GetImpl()->Expand(s); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - - -template <class A> inline -void RmEpsilonFst<A>::InitStateIterator(StateIteratorData<A> *data) const { - data->base = new StateIterator< RmEpsilonFst<A> >(*this); -} - - -// Useful alias when using StdArc. -typedef RmEpsilonFst<StdArc> StdRmEpsilonFst; - -} // namespace fst - -#endif // FST_LIB_RMEPSILON_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/rmfinalepsilon.h b/kaldi_io/src/tools/openfst/include/fst/rmfinalepsilon.h deleted file mode 100644 index eb0f937..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/rmfinalepsilon.h +++ /dev/null @@ -1,107 +0,0 @@ -// rmfinalepsilon.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Johan Schalkwyk) -// -// \file -// Function to remove of final states that have epsilon only input arcs. - -#ifndef FST_LIB_RMFINALEPSILON_H__ -#define FST_LIB_RMFINALEPSILON_H__ - -#include <tr1/unordered_set> -using std::tr1::unordered_set; -using std::tr1::unordered_multiset; -#include <vector> -using std::vector; - -#include <fst/connect.h> -#include <fst/mutable-fst.h> - - -namespace fst { - -template<class A> -void RmFinalEpsilon(MutableFst<A>* fst) { - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - // Determine the coaccesibility of states. - vector<bool> access; - vector<bool> coaccess; - uint64 props = 0; - SccVisitor<A> scc_visitor(0, &access, &coaccess, &props); - DfsVisit(*fst, &scc_visitor); - - // Find potential list of removable final states. These are final states - // that have no outgoing transitions or final states that have a - // non-coaccessible future. Complexity O(S) - unordered_set<StateId> finals; - for (StateIterator<Fst<A> > siter(*fst); !siter.Done(); siter.Next()) { - StateId s = siter.Value(); - if (fst->Final(s) != Weight::Zero()) { - bool future_coaccess = false; - for (ArcIterator<Fst<A> > aiter(*fst, s); !aiter.Done(); aiter.Next()) { - const A& arc = aiter.Value(); - if (coaccess[arc.nextstate]) { - future_coaccess = true; - break; - } - } - if (!future_coaccess) { - finals.insert(s); - } - } - } - - // Move the final weight. Complexity O(E) - vector<A> arcs; - for (StateIterator<Fst<A> > siter(*fst); !siter.Done(); siter.Next()) { - StateId s = siter.Value(); - Weight w(fst->Final(s)); - - arcs.clear(); - for (ArcIterator<Fst<A> > aiter(*fst, s); !aiter.Done(); aiter.Next()) { - const A& arc = aiter.Value(); - // is next state in the list of finals - if (finals.find(arc.nextstate) != finals.end()) { - // sum up all epsilon arcs - if (arc.ilabel == 0 && arc.olabel == 0) { - w = Plus(Times(fst->Final(arc.nextstate), arc.weight), w); - } else { - arcs.push_back(arc); - } - } else { - arcs.push_back(arc); - } - } - - // If some arcs (epsilon arcs) were deleted, delete all - // arcs and add back only the non epsilon arcs - if (arcs.size() < fst->NumArcs(s)) { - fst->DeleteArcs(s); - fst->SetFinal(s, w); - for (size_t i = 0; i < arcs.size(); ++i) { - fst->AddArc(s, arcs[i]); - } - } - } - - Connect(fst); -} - -} // namespace fst - -#endif // FST_LIB_RMFINALEPSILON_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/arcsort.h b/kaldi_io/src/tools/openfst/include/fst/script/arcsort.h deleted file mode 100644 index 4277332..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/arcsort.h +++ /dev/null @@ -1,49 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_ARCSORT_H_ -#define FST_SCRIPT_ARCSORT_H_ - -#include <fst/arcsort.h> -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> - -namespace fst { -namespace script { - -enum ArcSortType { ILABEL_COMPARE, OLABEL_COMPARE }; - -typedef args::Package<MutableFstClass*, const ArcSortType> ArcSortArgs; - -template<class Arc> -void ArcSort(ArcSortArgs *args) { - MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>(); - - if (args->arg2 == ILABEL_COMPARE) { - ILabelCompare<Arc> icomp; - ArcSort(fst, icomp); - } else { // OLABEL_COMPARE - OLabelCompare<Arc> ocomp; - ArcSort(fst, ocomp); - } -} - -void ArcSort(MutableFstClass *ofst, ArcSortType sort_type); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_ARCSORT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/arg-packs.h b/kaldi_io/src/tools/openfst/include/fst/script/arg-packs.h deleted file mode 100644 index 8ebf8d8..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/arg-packs.h +++ /dev/null @@ -1,240 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -// Convenience templates for defining arg packs for the FstClass operations. - -// See operation-templates.h for a discussion about why these are needed; the -// short story is that all FstClass operations must be implemented by a version -// that takes one argument, most likely a struct bundling all the -// logical arguments together. These template structs provide convenient ways -// to specify these bundles (e.g. by means of appropriate typedefs). - -// The ArgPack template is sufficient for bundling together all the args for -// a particular function. The function is assumed to be void-returning. If -// you want a space for a return value, use the WithReturnValue template -// as follows: - -// WithReturnValue<bool, ArgPack<...> > - -#ifndef FST_SCRIPT_ARG_PACKS_H_ -#define FST_SCRIPT_ARG_PACKS_H_ - -namespace fst { -namespace script { -namespace args { - -// Sentinel value that means "no arg here." -class none_type { }; - -// Base arg pack template class. Specializations follow that allow -// fewer numbers of arguments (down to 2). If the maximum number of arguments -// increases, you will need to change three things: -// 1) Add more template parameters to this template -// 2) Add more specializations to allow fewer numbers of parameters than -// the new max. -// 3) Add extra none_types to all existing specializations to fill -// the new slots. - - -// 9 args (max) -template<class T1, - class T2 = none_type, - class T3 = none_type, - class T4 = none_type, - class T5 = none_type, - class T6 = none_type, - class T7 = none_type, - class T8 = none_type, - class T9 = none_type> -struct Package { - T1 arg1; - T2 arg2; - T3 arg3; - T4 arg4; - T5 arg5; - T6 arg6; - T7 arg7; - T8 arg8; - T9 arg9; - - Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, - T7 arg7, T8 arg8, T9 arg9) : - arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5), - arg6(arg6), arg7(arg7), arg8(arg8), arg9(arg9) { } -}; - -// 8 args -template<class T1, - class T2, - class T3, - class T4, - class T5, - class T6, - class T7, - class T8> -struct Package<T1, T2, T3, T4, T5, T6, T7, T8, none_type> { - T1 arg1; - T2 arg2; - T3 arg3; - T4 arg4; - T5 arg5; - T6 arg6; - T7 arg7; - T8 arg8; - - Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, - T7 arg7, T8 arg8) : - arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5), - arg6(arg6), arg7(arg7), arg8(arg8) { } -}; - -// 7 args -template<class T1, - class T2, - class T3, - class T4, - class T5, - class T6, - class T7> -struct Package<T1, T2, T3, T4, T5, T6, T7, - none_type, none_type> { - T1 arg1; - T2 arg2; - T3 arg3; - T4 arg4; - T5 arg5; - T6 arg6; - T7 arg7; - - Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, - T7 arg7) : - arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5), - arg6(arg6), arg7(arg7) { } -}; - -// 6 args -template<class T1, - class T2, - class T3, - class T4, - class T5, - class T6> -struct Package<T1, T2, T3, T4, T5, T6, none_type, - none_type, none_type> { - T1 arg1; - T2 arg2; - T3 arg3; - T4 arg4; - T5 arg5; - T6 arg6; - - Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6) : - arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5), - arg6(arg6) { } -}; - -// 5 args -template<class T1, - class T2, - class T3, - class T4, - class T5> -struct Package<T1, T2, T3, T4, T5, none_type, none_type, - none_type, none_type> { - T1 arg1; - T2 arg2; - T3 arg3; - T4 arg4; - T5 arg5; - - Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5) : - arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5) { } -}; - -// 4 args -template<class T1, - class T2, - class T3, - class T4> -struct Package<T1, T2, T3, T4, none_type, none_type, - none_type, none_type, none_type> { - T1 arg1; - T2 arg2; - T3 arg3; - T4 arg4; - - Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4) : - arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4) { } -}; - -// 3 args -template<class T1, - class T2, - class T3> -struct Package<T1, T2, T3, none_type, none_type, - none_type, none_type, none_type, - none_type> { - T1 arg1; - T2 arg2; - T3 arg3; - - Package(T1 arg1, T2 arg2, T3 arg3) : - arg1(arg1), arg2(arg2), arg3(arg3) { } -}; - -// 2 args (minimum) -template<class T1, - class T2> -struct Package<T1, T2, none_type, none_type, - none_type, none_type, none_type, - none_type, none_type> { - T1 arg1; - T2 arg2; - - Package(T1 arg1, T2 arg2) : - arg1(arg1), arg2(arg2) { } -}; - -// Tack this on to an existing arg pack to add a return value. -// The syntax for accessing the args is then slightly more stilted, -// as you must do an extra member access (since the args are stored -// as a member of this class). -// The alternative is to declare another slew of templates for functions -// that return a value, analogous to the above. - -template<class Retval, class ArgPackage> -struct WithReturnValue { - Retval retval; - const ArgPackage &args; - - explicit WithReturnValue(const ArgPackage &args) : args(args) { } -}; - -// We don't want to store a reference to a reference, if ArgPackage is -// already some reference type. -template<class Retval, class ArgPackage> -struct WithReturnValue<Retval, ArgPackage&> { - Retval retval; - const ArgPackage &args; - - explicit WithReturnValue(const ArgPackage &args) : args(args) { } -}; - -} // namespace args -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_ARG_PACKS_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/closure.h b/kaldi_io/src/tools/openfst/include/fst/script/closure.h deleted file mode 100644 index 93b5ec3..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/closure.h +++ /dev/null @@ -1,41 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_CLOSURE_H_ -#define FST_SCRIPT_CLOSURE_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/closure.h> - -namespace fst { -namespace script { - -typedef args::Package<MutableFstClass*, const ClosureType> ClosureArgs; - -template<class Arc> -void Closure(ClosureArgs *args) { - MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>(); - - Closure(fst, args->arg2); -} - -void Closure(MutableFstClass *ofst, ClosureType closure_type); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_CLOSURE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/compile-impl.h b/kaldi_io/src/tools/openfst/include/fst/script/compile-impl.h deleted file mode 100644 index 68f37c3..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/compile-impl.h +++ /dev/null @@ -1,216 +0,0 @@ -// compile.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Class to to compile a binary Fst from textual input. - -#ifndef FST_SCRIPT_COMPILE_IMPL_H_ -#define FST_SCRIPT_COMPILE_IMPL_H_ - -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <sstream> -#include <string> -#include <vector> -using std::vector; - -#include <iostream> -#include <fstream> -#include <sstream> -#include <fst/fst.h> -#include <fst/util.h> -#include <fst/vector-fst.h> - -DECLARE_string(fst_field_separator); - -namespace fst { - -// Compile a binary Fst from textual input, helper class for fstcompile.cc -// WARNING: Stand-alone use of this class not recommended, most code should -// read/write using the binary format which is much more efficient. -template <class A> class FstCompiler { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - - // WARNING: use of 'allow_negative_labels = true' not recommended; may - // cause conflicts - FstCompiler(istream &istrm, const string &source, - const SymbolTable *isyms, const SymbolTable *osyms, - const SymbolTable *ssyms, bool accep, bool ikeep, - bool okeep, bool nkeep, bool allow_negative_labels = false) - : nline_(0), source_(source), - isyms_(isyms), osyms_(osyms), ssyms_(ssyms), - nstates_(0), keep_state_numbering_(nkeep), - allow_negative_labels_(allow_negative_labels) { - char line[kLineLen]; - while (istrm.getline(line, kLineLen)) { - ++nline_; - vector<char *> col; - string separator = FLAGS_fst_field_separator + "\n"; - SplitToVector(line, separator.c_str(), &col, true); - if (col.size() == 0 || col[0][0] == '\0') // empty line - continue; - if (col.size() > 5 || - (col.size() > 4 && accep) || - (col.size() == 3 && !accep)) { - FSTERROR() << "FstCompiler: Bad number of columns, source = " - << source_ - << ", line = " << nline_; - fst_.SetProperties(kError, kError); - return; - } - StateId s = StrToStateId(col[0]); - while (s >= fst_.NumStates()) - fst_.AddState(); - if (nline_ == 1) - fst_.SetStart(s); - - Arc arc; - StateId d = s; - switch (col.size()) { - case 1: - fst_.SetFinal(s, Weight::One()); - break; - case 2: - fst_.SetFinal(s, StrToWeight(col[1], true)); - break; - case 3: - arc.nextstate = d = StrToStateId(col[1]); - arc.ilabel = StrToILabel(col[2]); - arc.olabel = arc.ilabel; - arc.weight = Weight::One(); - fst_.AddArc(s, arc); - break; - case 4: - arc.nextstate = d = StrToStateId(col[1]); - arc.ilabel = StrToILabel(col[2]); - if (accep) { - arc.olabel = arc.ilabel; - arc.weight = StrToWeight(col[3], false); - } else { - arc.olabel = StrToOLabel(col[3]); - arc.weight = Weight::One(); - } - fst_.AddArc(s, arc); - break; - case 5: - arc.nextstate = d = StrToStateId(col[1]); - arc.ilabel = StrToILabel(col[2]); - arc.olabel = StrToOLabel(col[3]); - arc.weight = StrToWeight(col[4], false); - fst_.AddArc(s, arc); - } - while (d >= fst_.NumStates()) - fst_.AddState(); - } - if (ikeep) - fst_.SetInputSymbols(isyms); - if (okeep) - fst_.SetOutputSymbols(osyms); - } - - const VectorFst<A> &Fst() const { - return fst_; - } - - private: - // Maximum line length in text file. - static const int kLineLen = 8096; - - int64 StrToId(const char *s, const SymbolTable *syms, - const char *name, bool allow_negative = false) const { - int64 n = 0; - - if (syms) { - n = syms->Find(s); - if (n == -1 || (!allow_negative && n < 0)) { - FSTERROR() << "FstCompiler: Symbol \"" << s - << "\" is not mapped to any integer " << name - << ", symbol table = " << syms->Name() - << ", source = " << source_ << ", line = " << nline_; - fst_.SetProperties(kError, kError); - } - } else { - char *p; - n = strtoll(s, &p, 10); - if (p < s + strlen(s) || (!allow_negative && n < 0)) { - FSTERROR() << "FstCompiler: Bad " << name << " integer = \"" << s - << "\", source = " << source_ << ", line = " << nline_; - fst_.SetProperties(kError, kError); - } - } - return n; - } - - StateId StrToStateId(const char *s) { - StateId n = StrToId(s, ssyms_, "state ID"); - - if (keep_state_numbering_) - return n; - - // remap state IDs to make dense set - typename unordered_map<StateId, StateId>::const_iterator it = states_.find(n); - if (it == states_.end()) { - states_[n] = nstates_; - return nstates_++; - } else { - return it->second; - } - } - - StateId StrToILabel(const char *s) const { - return StrToId(s, isyms_, "arc ilabel", allow_negative_labels_); - } - - StateId StrToOLabel(const char *s) const { - return StrToId(s, osyms_, "arc olabel", allow_negative_labels_); - } - - Weight StrToWeight(const char *s, bool allow_zero) const { - Weight w; - istringstream strm(s); - strm >> w; - if (!strm || (!allow_zero && w == Weight::Zero())) { - FSTERROR() << "FstCompiler: Bad weight = \"" << s - << "\", source = " << source_ << ", line = " << nline_; - fst_.SetProperties(kError, kError); - w = Weight::NoWeight(); - } - return w; - } - - mutable VectorFst<A> fst_; - size_t nline_; - string source_; // text FST source name - const SymbolTable *isyms_; // ilabel symbol table - const SymbolTable *osyms_; // olabel symbol table - const SymbolTable *ssyms_; // slabel symbol table - unordered_map<StateId, StateId> states_; // state ID map - StateId nstates_; // number of seen states - bool keep_state_numbering_; - bool allow_negative_labels_; // not recommended; may cause conflicts - - DISALLOW_COPY_AND_ASSIGN(FstCompiler); -}; - -} // namespace fst - -#endif // FST_SCRIPT_COMPILE_IMPL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/compile.h b/kaldi_io/src/tools/openfst/include/fst/script/compile.h deleted file mode 100644 index bb6ea56..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/compile.h +++ /dev/null @@ -1,92 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_COMPILE_H_ -#define FST_SCRIPT_COMPILE_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/script/compile-impl.h> - -namespace fst { -namespace script { - -// Note: it is safe to pass these strings as references because -// this struct is only used to pass them deeper in the call graph. -// Be sure you understand why this is so before using this struct -// for anything else! -struct FstCompileArgs { - fst::istream &istrm; - const string &source; - const string &dest; - const string &fst_type; - const fst::SymbolTable *isyms; - const fst::SymbolTable *osyms; - const fst::SymbolTable *ssyms; - const bool accep; - const bool ikeep; - const bool okeep; - const bool nkeep; - const bool allow_negative_labels; - - FstCompileArgs(istream &istrm, const string &source, const string &dest, - const string &fst_type, const fst::SymbolTable *isyms, - const fst::SymbolTable *osyms, - const fst::SymbolTable *ssyms, - bool accep, bool ikeep, bool okeep, bool nkeep, - bool allow_negative_labels = false) : - istrm(istrm), source(source), dest(dest), fst_type(fst_type), - isyms(isyms), osyms(osyms), ssyms(ssyms), accep(accep), ikeep(ikeep), - okeep(okeep), nkeep(nkeep), - allow_negative_labels(allow_negative_labels) { } -}; - -template<class Arc> -void CompileFst(FstCompileArgs *args) { - using fst::FstCompiler; - using fst::Convert; - using fst::Fst; - - FstCompiler<Arc> fstcompiler(args->istrm, args->source, args->isyms, - args->osyms, args->ssyms, - args->accep, args->ikeep, - args->okeep, args->nkeep, - args->allow_negative_labels); - - const Fst<Arc> *fst = &fstcompiler.Fst(); - if (args->fst_type != "vector") { - fst = Convert<Arc>(*fst, args->fst_type); - if (!fst) { - FSTERROR() << "Failed to convert FST to desired type: " - << args->fst_type; - return; - } - } - - fst->Write(args->dest); -} - -void CompileFst(istream &istrm, const string &source, const string &dest, - const string &fst_type, const string &arc_type, - const SymbolTable *isyms, - const SymbolTable *osyms, const SymbolTable *ssyms, - bool accep, bool ikeep, bool okeep, bool nkeep, - bool allow_negative_labels); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_COMPILE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/compose.h b/kaldi_io/src/tools/openfst/include/fst/script/compose.h deleted file mode 100644 index 96375f7..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/compose.h +++ /dev/null @@ -1,63 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_COMPOSE_H_ -#define FST_SCRIPT_COMPOSE_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/compose.h> - -namespace fst { -namespace script { - -typedef args::Package<const FstClass&, const FstClass&, - MutableFstClass*, ComposeFilter> ComposeArgs1; - -template<class Arc> -void Compose(ComposeArgs1 *args) { - const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>()); - const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>(); - - Compose(ifst1, ifst2, ofst, args->arg4); -} - -typedef fst::ComposeOptions ComposeOptions; - -typedef args::Package<const FstClass&, const FstClass&, - MutableFstClass*, const ComposeOptions &> ComposeArgs2; - -template<class Arc> -void Compose(ComposeArgs2 *args) { - const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>()); - const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>(); - - Compose(ifst1, ifst2, ofst, args->arg4); -} - -void Compose(const FstClass &ifst1, const FstClass &ifst2, - MutableFstClass *ofst, - const ComposeOptions &opts = fst::script::ComposeOptions()); - -void Compose(const FstClass &ifst1, const FstClass &ifst2, - MutableFstClass *ofst, ComposeFilter compose_filter); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_COMPOSE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/concat.h b/kaldi_io/src/tools/openfst/include/fst/script/concat.h deleted file mode 100644 index 46c4407..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/concat.h +++ /dev/null @@ -1,54 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_CONCAT_H_ -#define FST_SCRIPT_CONCAT_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/concat.h> - -namespace fst { -namespace script { - -typedef args::Package<MutableFstClass*, const FstClass&> ConcatArgs1; -typedef args::Package<const FstClass&, MutableFstClass*> ConcatArgs2; - -template<class Arc> -void Concat(ConcatArgs1 *args) { - MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>(); - const Fst<Arc> &ifst = *(args->arg2.GetFst<Arc>()); - - Concat(ofst, ifst); -} - -template<class Arc> -void Concat(ConcatArgs2 *args) { - const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); - - Concat(ifst, ofst); -} - -void Concat(MutableFstClass *ofst, const FstClass &ifst); -void Concat(const FstClass &ifst, MutableFstClass *ofst); - -} // namespace script -} // namespace fst - - - -#endif // FST_SCRIPT_CONCAT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/connect.h b/kaldi_io/src/tools/openfst/include/fst/script/connect.h deleted file mode 100644 index 19c4390..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/connect.h +++ /dev/null @@ -1,45 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_CONNECT_H_ -#define FST_SCRIPT_CONNECT_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/dfs-visit.h> -#include <fst/connect.h> - -namespace fst { -namespace script { - -// This function confuses SWIG, because both versions have the same args -#ifndef SWIG -template<class Arc> -void Connect(MutableFstClass *fst) { - MutableFst<Arc> *typed_fst = fst->GetMutableFst<Arc>(); - - Connect(typed_fst); -} -#endif - -void Connect(MutableFstClass *fst); - -} // namespace script -} // namespace fst - - - -#endif // FST_SCRIPT_CONNECT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/convert.h b/kaldi_io/src/tools/openfst/include/fst/script/convert.h deleted file mode 100644 index 4a3ce6b..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/convert.h +++ /dev/null @@ -1,49 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_CONVERT_H_ -#define FST_SCRIPT_CONVERT_H_ - -#include <string> - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> - -namespace fst { -namespace script { - -typedef args::Package<const FstClass&, const string&> ConvertInnerArgs; -typedef args::WithReturnValue<FstClass*, ConvertInnerArgs> ConvertArgs; - -template<class Arc> -void Convert(ConvertArgs *args) { - const Fst<Arc> &fst = *(args->args.arg1.GetFst<Arc>()); - const string &new_type = args->args.arg2; - - Fst<Arc> *result = Convert(fst, new_type); - args->retval = new FstClass(*result); - delete result; -} - -#ifdef SWIG -%newobject Convert; -#endif -FstClass *Convert(const FstClass& f, const string &new_type); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_CONVERT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/decode.h b/kaldi_io/src/tools/openfst/include/fst/script/decode.h deleted file mode 100644 index 1064ad5..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/decode.h +++ /dev/null @@ -1,46 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_DECODE_H_ -#define FST_SCRIPT_DECODE_H_ - -#include <string> - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/encode.h> - -namespace fst { -namespace script { - -typedef args::Package<MutableFstClass*, const string&> DecodeArgs; - -template<class Arc> -void Decode(DecodeArgs *args) { - MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>(); - - EncodeMapper<Arc> *decoder = EncodeMapper<Arc>::Read(args->arg2, DECODE); - Decode(ofst, *decoder); - - delete decoder; -} - -void Decode(MutableFstClass *fst, const string &coder_fname); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_DECODE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/determinize.h b/kaldi_io/src/tools/openfst/include/fst/script/determinize.h deleted file mode 100644 index 38fd7ad..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/determinize.h +++ /dev/null @@ -1,68 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_DETERMINIZE_H_ -#define FST_SCRIPT_DETERMINIZE_H_ - -#include <fst/determinize.h> -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/script/weight-class.h> - -namespace fst { -namespace script { - -struct DeterminizeOptions { - float delta; - WeightClass weight_threshold; - int64 state_threshold; - int64 subsequential_label; - - explicit DeterminizeOptions(float d = fst::kDelta, - WeightClass w = - fst::script::WeightClass::Zero(), - int64 n = fst::kNoStateId, int64 l = 0) - : delta(d), weight_threshold(w), state_threshold(n), - subsequential_label(l) {} -}; - -typedef args::Package<const FstClass&, MutableFstClass*, - const DeterminizeOptions &> DeterminizeArgs; - -template<class Arc> -void Determinize(DeterminizeArgs *args) { - const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); - const DeterminizeOptions &opts = args->arg3; - - fst::DeterminizeOptions<Arc> detargs; - detargs.delta = opts.delta; - detargs.weight_threshold = - *(opts.weight_threshold.GetWeight<typename Arc::Weight>()); - detargs.state_threshold = opts.state_threshold; - detargs.subsequential_label = opts.subsequential_label; - - Determinize(ifst, ofst, detargs); -} - -void Determinize(const FstClass &ifst, MutableFstClass *ofst, - const DeterminizeOptions &opts = - fst::script::DeterminizeOptions()); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_DETERMINIZE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/difference.h b/kaldi_io/src/tools/openfst/include/fst/script/difference.h deleted file mode 100644 index 76490d4..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/difference.h +++ /dev/null @@ -1,67 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_DIFFERENCE_H_ -#define FST_SCRIPT_DIFFERENCE_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/script/compose.h> // for ComposeFilter -#include <fst/difference.h> - -namespace fst { -namespace script { - -typedef args::Package<const FstClass&, const FstClass&, - MutableFstClass*, ComposeFilter> DifferenceArgs1; - -template<class Arc> -void Difference(DifferenceArgs1 *args) { - const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>()); - const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>(); - - Difference(ifst1, ifst2, ofst, args->arg4); -} - -typedef args::Package<const FstClass&, const FstClass&, - MutableFstClass*, const ComposeOptions &> DifferenceArgs2; - -template<class Arc> -void Difference(DifferenceArgs2 *args) { - const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>()); - const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>(); - - Difference(ifst1, ifst2, ofst, args->arg4); -} - - -void Difference(const FstClass &ifst1, const FstClass &ifst2, - MutableFstClass *ofst, - ComposeFilter compose_filter); - -void Difference(const FstClass &ifst1, const FstClass &ifst2, - MutableFstClass *ofst, - const ComposeOptions &opts = fst::script::ComposeOptions()); - - -} // namespace script -} // namespace fst - - - -#endif // FST_SCRIPT_DIFFERENCE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/disambiguate.h b/kaldi_io/src/tools/openfst/include/fst/script/disambiguate.h deleted file mode 100644 index e42a9c2..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/disambiguate.h +++ /dev/null @@ -1,68 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_DISAMBIGUATE_H_ -#define FST_SCRIPT_DISAMBIGUATE_H_ - -#include <fst/disambiguate.h> -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/script/weight-class.h> - -namespace fst { -namespace script { - -struct DisambiguateOptions { - float delta; - WeightClass weight_threshold; - int64 state_threshold; - int64 subsequential_label; - - explicit DisambiguateOptions(float d = fst::kDelta, - WeightClass w = - fst::script::WeightClass::Zero(), - int64 n = fst::kNoStateId, int64 l = 0) - : delta(d), weight_threshold(w), state_threshold(n), - subsequential_label(l) {} -}; - -typedef args::Package<const FstClass&, MutableFstClass*, - const DisambiguateOptions &> DisambiguateArgs; - -template<class Arc> -void Disambiguate(DisambiguateArgs *args) { - const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); - const DisambiguateOptions &opts = args->arg3; - - fst::DisambiguateOptions<Arc> detargs; - detargs.delta = opts.delta; - detargs.weight_threshold = - *(opts.weight_threshold.GetWeight<typename Arc::Weight>()); - detargs.state_threshold = opts.state_threshold; - detargs.subsequential_label = opts.subsequential_label; - - Disambiguate(ifst, ofst, detargs); -} - -void Disambiguate(const FstClass &ifst, MutableFstClass *ofst, - const DisambiguateOptions &opts = - fst::script::DisambiguateOptions()); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_DISAMBIGUATE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/draw-impl.h b/kaldi_io/src/tools/openfst/include/fst/script/draw-impl.h deleted file mode 100644 index 893e258..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/draw-impl.h +++ /dev/null @@ -1,234 +0,0 @@ -// draw.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// Class to draw a binary FST by producing a text file in dot format, -// helper class to fstdraw.cc - -#ifndef FST_SCRIPT_DRAW_IMPL_H_ -#define FST_SCRIPT_DRAW_IMPL_H_ - -#include <sstream> -#include <string> - -#include <fst/script/fst-class.h> -#include <fst/fst.h> -#include <fst/util.h> - -namespace fst { - -// Print a binary Fst in the dot textual format, helper class for fstdraw.cc -// WARNING: Stand-alone use not recommend. -template <class A> class FstDrawer { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - - FstDrawer(const Fst<A> &fst, - const SymbolTable *isyms, - const SymbolTable *osyms, - const SymbolTable *ssyms, - bool accep, - string title, - float width, - float height, - bool portrait, - bool vertical, - float ranksep, - float nodesep, - int fontsize, - int precision, - bool show_weight_one) - : fst_(fst), isyms_(isyms), osyms_(osyms), ssyms_(ssyms), - accep_(accep && fst.Properties(kAcceptor, true)), ostrm_(0), - title_(title), width_(width), height_(height), portrait_(portrait), - vertical_(vertical), ranksep_(ranksep), nodesep_(nodesep), - fontsize_(fontsize), precision_(precision), - show_weight_one_(show_weight_one) {} - - // Draw Fst to an output buffer (or stdout if buf = 0) - void Draw(ostream *strm, const string &dest) { - ostrm_ = strm; - dest_ = dest; - StateId start = fst_.Start(); - if (start == kNoStateId) - return; - - PrintString("digraph FST {\n"); - if (vertical_) - PrintString("rankdir = BT;\n"); - else - PrintString("rankdir = LR;\n"); - PrintString("size = \""); - Print(width_); - PrintString(","); - Print(height_); - PrintString("\";\n"); - if (!dest_.empty()) - PrintString("label = \"" + title_ + "\";\n"); - PrintString("center = 1;\n"); - if (portrait_) - PrintString("orientation = Portrait;\n"); - else - PrintString("orientation = Landscape;\n"); - PrintString("ranksep = \""); - Print(ranksep_); - PrintString("\";\n"); - PrintString("nodesep = \""); - Print(nodesep_); - PrintString("\";\n"); - // initial state first - DrawState(start); - for (StateIterator< Fst<A> > siter(fst_); - !siter.Done(); - siter.Next()) { - StateId s = siter.Value(); - if (s != start) - DrawState(s); - } - PrintString("}\n"); - } - - private: - // Maximum line length in text file. - static const int kLineLen = 8096; - - void PrintString(const string &s) const { - *ostrm_ << s; - } - - // Escapes backslash and double quote if these occur in the string. Dot will - // not deal gracefully with these if they are not escaped. - inline void EscapeChars(const string &s, string* ns) const { - const char* c = s.c_str(); - while (*c) { - if (*c == '\\' || *c == '"') ns->push_back('\\'); - ns->push_back(*c); - ++c; - } - } - - void PrintId(int64 id, const SymbolTable *syms, - const char *name) const { - if (syms) { - string symbol = syms->Find(id); - if (symbol == "") { - FSTERROR() << "FstDrawer: Integer " << id - << " is not mapped to any textual symbol" - << ", symbol table = " << syms->Name() - << ", destination = " << dest_; - symbol = "?"; - } - string nsymbol; - EscapeChars(symbol, &nsymbol); - PrintString(nsymbol); - } else { - string idstr; - Int64ToStr(id, &idstr); - PrintString(idstr); - } - } - - void PrintStateId(StateId s) const { - PrintId(s, ssyms_, "state ID"); - } - - void PrintILabel(Label l) const { - PrintId(l, isyms_, "arc input label"); - } - - void PrintOLabel(Label l) const { - PrintId(l, osyms_, "arc output label"); - } - - template <class T> - void Print(T t) const { - *ostrm_ << t; - } - - void DrawState(StateId s) const { - Print(s); - PrintString(" [label = \""); - PrintStateId(s); - Weight final = fst_.Final(s); - if (final != Weight::Zero()) { - if (show_weight_one_ || (final != Weight::One())) { - PrintString("/"); - Print(final); - } - PrintString("\", shape = doublecircle,"); - } else { - PrintString("\", shape = circle,"); - } - if (s == fst_.Start()) - PrintString(" style = bold,"); - else - PrintString(" style = solid,"); - PrintString(" fontsize = "); - Print(fontsize_); - PrintString("]\n"); - for (ArcIterator< Fst<A> > aiter(fst_, s); - !aiter.Done(); - aiter.Next()) { - Arc arc = aiter.Value(); - PrintString("\t"); - Print(s); - PrintString(" -> "); - Print(arc.nextstate); - PrintString(" [label = \""); - PrintILabel(arc.ilabel); - if (!accep_) { - PrintString(":"); - PrintOLabel(arc.olabel); - } - if (show_weight_one_ || (arc.weight != Weight::One())) { - PrintString("/"); - Print(arc.weight); - } - PrintString("\", fontsize = "); - Print(fontsize_); - PrintString("];\n"); - } - } - - const Fst<A> &fst_; - const SymbolTable *isyms_; // ilabel symbol table - const SymbolTable *osyms_; // olabel symbol table - const SymbolTable *ssyms_; // slabel symbol table - bool accep_; // print as acceptor when possible - ostream *ostrm_; // drawn FST destination - string dest_; // drawn FST destination name - - string title_; - float width_; - float height_; - bool portrait_; - bool vertical_; - float ranksep_; - float nodesep_; - int fontsize_; - int precision_; - bool show_weight_one_; - - DISALLOW_COPY_AND_ASSIGN(FstDrawer); -}; - -} // namespace fst - -#endif // FST_SCRIPT_DRAW_IMPL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/draw.h b/kaldi_io/src/tools/openfst/include/fst/script/draw.h deleted file mode 100644 index 2b66373..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/draw.h +++ /dev/null @@ -1,114 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_DRAW_H_ -#define FST_SCRIPT_DRAW_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/script/draw-impl.h> -#include <iostream> -#include <fstream> -#include <sstream> - -namespace fst { -namespace script { - -// Note: it is safe to pass these strings as references because -// this struct is only used to pass them deeper in the call graph. -// Be sure you understand why this is so before using this struct -// for anything else! -struct FstDrawerArgs { - const FstClass &fst; - const SymbolTable *isyms; - const SymbolTable *osyms; - const SymbolTable *ssyms; - const bool accep; - const string& title; - const float width; - const float height; - const bool portrait; - const bool vertical; - const float ranksep; - const float nodesep; - const int fontsize; - const int precision; - const bool show_weight_one; - ostream *ostrm; - const string &dest; - - FstDrawerArgs(const FstClass &fst, - const SymbolTable *isyms, - const SymbolTable *osyms, - const SymbolTable *ssyms, - bool accep, - const string &title, - float width, - float height, - bool portrait, - bool vertical, - float ranksep, - float nodesep, - int fontsize, - int precision, - bool show_weight_one, - ostream *ostrm, - const string &dest) : - fst(fst), isyms(isyms), osyms(osyms), ssyms(ssyms), accep(accep), - title(title), width(width), height(height), portrait(portrait), - vertical(vertical), ranksep(ranksep), nodesep(nodesep), - fontsize(fontsize), precision(precision), - show_weight_one(show_weight_one), ostrm(ostrm), dest(dest) { } -}; - - -template<class Arc> -void DrawFst(FstDrawerArgs *args) { - const Fst<Arc> &fst = *(args->fst.GetFst<Arc>()); - - FstDrawer<Arc> fstdrawer(fst, args->isyms, args->osyms, args->ssyms, - args->accep, args->title, args->width, - args->height, args->portrait, - args->vertical, args->ranksep, - args->nodesep, args->fontsize, - args->precision, args->show_weight_one); - fstdrawer.Draw(args->ostrm, args->dest); -} - -void DrawFst(const FstClass &fst, - const SymbolTable *isyms, - const SymbolTable *osyms, - const SymbolTable *ssyms, - bool accep, - const string &title, - float width, - float height, - bool portrait, - bool vertical, - float ranksep, - float nodesep, - int fontsize, - int precision, - bool show_weight_one, - ostream *ostrm, - const string &dest); - -} // namespace script -} // namespace fst - - - -#endif // FST_SCRIPT_DRAW_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/encode.h b/kaldi_io/src/tools/openfst/include/fst/script/encode.h deleted file mode 100644 index dc1a290..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/encode.h +++ /dev/null @@ -1,58 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_ENCODE_H_ -#define FST_SCRIPT_ENCODE_H_ - -#include <string> - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/encode.h> - -namespace fst { -namespace script { - -typedef args::Package<MutableFstClass*, uint32, bool, - const string &> EncodeArgs; - -template<class Arc> -void Encode(EncodeArgs *args) { - MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>(); - bool reuse_encoder = args->arg3; - const string &coder_fname = args->arg4; - uint32 flags = args->arg2; - - EncodeMapper<Arc> *encoder = reuse_encoder - ? EncodeMapper<Arc>::Read(coder_fname, ENCODE) - : new EncodeMapper<Arc>(flags, ENCODE); - - Encode(ofst, encoder); - if (!args->arg3) - encoder->Write(coder_fname); - - delete encoder; -} - -void Encode(MutableFstClass *fst, uint32 flags, bool reuse_encoder, - const string &coder_fname); - -} // namespace script -} // namespace fst - - - -#endif // FST_SCRIPT_ENCODE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/epsnormalize.h b/kaldi_io/src/tools/openfst/include/fst/script/epsnormalize.h deleted file mode 100644 index 50b12da..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/epsnormalize.h +++ /dev/null @@ -1,44 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_EPSNORMALIZE_H_ -#define FST_SCRIPT_EPSNORMALIZE_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/epsnormalize.h> - -namespace fst { -namespace script { - -typedef args::Package<const FstClass&, MutableFstClass*, - EpsNormalizeType> EpsNormalizeArgs; - -template<class Arc> -void EpsNormalize(EpsNormalizeArgs *args) { - const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); - - EpsNormalize(ifst, ofst, args->arg3); -} - -void EpsNormalize(const FstClass &ifst, MutableFstClass *ofst, - EpsNormalizeType norm_type = EPS_NORM_INPUT); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_EPSNORMALIZE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/equal.h b/kaldi_io/src/tools/openfst/include/fst/script/equal.h deleted file mode 100644 index 9fb2d3c..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/equal.h +++ /dev/null @@ -1,45 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_EQUAL_H_ -#define FST_SCRIPT_EQUAL_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/equal.h> - -namespace fst { -namespace script { - -typedef args::Package<const FstClass&, const FstClass&, float> EqualInnerArgs; -typedef args::WithReturnValue<bool, EqualInnerArgs> EqualArgs; - -template<class Arc> -void Equal(EqualArgs *args) { - const Fst<Arc> &fst1 = *(args->args.arg1.GetFst<Arc>()); - const Fst<Arc> &fst2 = *(args->args.arg2.GetFst<Arc>()); - - args->retval = Equal(fst1, fst2, args->args.arg3); -} - -bool Equal(const FstClass &fst1, const FstClass &fst2, - float delta = kDelta); - -} // namespace script -} // namespace fst - - -#endif // FST_SCRIPT_EQUAL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/equivalent.h b/kaldi_io/src/tools/openfst/include/fst/script/equivalent.h deleted file mode 100644 index 43460c6..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/equivalent.h +++ /dev/null @@ -1,47 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_EQUIVALENT_H_ -#define FST_SCRIPT_EQUIVALENT_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/equivalent.h> - -namespace fst { -namespace script { - -typedef args::Package<const FstClass &, const FstClass &, - float> EquivalentInnerArgs; -typedef args::WithReturnValue<bool, EquivalentInnerArgs> EquivalentArgs; - -template<class Arc> -void Equivalent(EquivalentArgs *args) { - const Fst<Arc> &fst1 = *(args->args.arg1.GetFst<Arc>()); - const Fst<Arc> &fst2 = *(args->args.arg2.GetFst<Arc>()); - - args->retval = Equivalent(fst1, fst2, args->args.arg3); -} - -bool Equivalent(const FstClass &fst1, const FstClass &fst2, - float delta = kDelta); - -} // namespace script -} // namespace fst - - - -#endif // FST_SCRIPT_EQUIVALENT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/fst-class.h b/kaldi_io/src/tools/openfst/include/fst/script/fst-class.h deleted file mode 100644 index fe2cf53..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/fst-class.h +++ /dev/null @@ -1,382 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_FST_CLASS_H_ -#define FST_SCRIPT_FST_CLASS_H_ - -#include <string> - -#include <fst/fst.h> -#include <fst/mutable-fst.h> -#include <fst/vector-fst.h> -#include <iostream> -#include <fstream> -#include <sstream> - -// Classes to support "boxing" all existing types of FST arcs in a single -// FstClass which hides the arc types. This allows clients to load -// and work with FSTs without knowing the arc type. - -// These classes are only recommended for use in high-level scripting -// applications. Most users should use the lower-level templated versions -// corresponding to these classes. - -namespace fst { -namespace script { - -// -// Abstract base class defining the set of functionalities implemented -// in all impls, and passed through by all bases Below FstClassBase -// the class hierarchy bifurcates; FstClassImplBase serves as the base -// class for all implementations (of which FstClassImpl is currently -// the only one) and FstClass serves as the base class for all -// interfaces. -// -class FstClassBase { - public: - virtual const string &ArcType() const = 0; - virtual const string &FstType() const = 0; - virtual const string &WeightType() const = 0; - virtual const SymbolTable *InputSymbols() const = 0; - virtual const SymbolTable *OutputSymbols() const = 0; - virtual bool Write(const string& fname) const = 0; - virtual bool Write(ostream &ostr, const FstWriteOptions &opts) const = 0; - virtual uint64 Properties(uint64 mask, bool test) const = 0; - virtual ~FstClassBase() { } -}; - -class FstClassImplBase : public FstClassBase { - public: - virtual FstClassImplBase *Copy() = 0; - virtual void SetInputSymbols(SymbolTable *is) = 0; - virtual void SetOutputSymbols(SymbolTable *is) = 0; - virtual ~FstClassImplBase() { } -}; - - -// -// CONTAINER CLASS -// Wraps an Fst<Arc>, hiding its arc type. Whether this Fst<Arc> -// pointer refers to a special kind of FST (e.g. a MutableFst) is -// known by the type of interface class that owns the pointer to this -// container. -// - -template<class Arc> -class FstClassImpl : public FstClassImplBase { - public: - explicit FstClassImpl(Fst<Arc> *impl, - bool should_own = false) : - impl_(should_own ? impl : impl->Copy()) { } - - explicit FstClassImpl(const Fst<Arc> &impl) : impl_(impl.Copy()) { } - - virtual const string &ArcType() const { - return Arc::Type(); - } - - virtual const string &FstType() const { - return impl_->Type(); - } - - virtual const string &WeightType() const { - return Arc::Weight::Type(); - } - - virtual const SymbolTable *InputSymbols() const { - return impl_->InputSymbols(); - } - - virtual const SymbolTable *OutputSymbols() const { - return impl_->OutputSymbols(); - } - - // Warning: calling this method casts the FST to a mutable FST. - virtual void SetInputSymbols(SymbolTable *is) { - static_cast<MutableFst<Arc> *>(impl_)->SetInputSymbols(is); - } - - // Warning: calling this method casts the FST to a mutable FST. - virtual void SetOutputSymbols(SymbolTable *os) { - static_cast<MutableFst<Arc> *>(impl_)->SetOutputSymbols(os); - } - - virtual bool Write(const string &fname) const { - return impl_->Write(fname); - } - - virtual bool Write(ostream &ostr, const FstWriteOptions &opts) const { - return impl_->Write(ostr, opts); - } - - virtual uint64 Properties(uint64 mask, bool test) const { - return impl_->Properties(mask, test); - } - - virtual ~FstClassImpl() { delete impl_; } - - Fst<Arc> *GetImpl() const { return impl_; } - - Fst<Arc> *GetImpl() { return impl_; } - - virtual FstClassImpl *Copy() { - return new FstClassImpl<Arc>(impl_); - } - - private: - Fst<Arc> *impl_; -}; - -// -// BASE CLASS DEFINITIONS -// - -class MutableFstClass; - -class FstClass : public FstClassBase { - public: - template<class Arc> - static FstClass *Read(istream &stream, - const FstReadOptions &opts) { - if (!opts.header) { - FSTERROR() << "FstClass::Read: options header not specified"; - return 0; - } - const FstHeader &hdr = *opts.header; - - if (hdr.Properties() & kMutable) { - return ReadTypedFst<MutableFstClass, MutableFst<Arc> >(stream, opts); - } else { - return ReadTypedFst<FstClass, Fst<Arc> >(stream, opts); - } - } - - FstClass() : impl_(NULL) { - } - - template<class Arc> - explicit FstClass(const Fst<Arc> &fst) : impl_(new FstClassImpl<Arc>(fst)) { - } - - FstClass(const FstClass &other) : impl_(other.impl_->Copy()) { } - - FstClass &operator=(const FstClass &other) { - delete impl_; - impl_ = other.impl_->Copy(); - return *this; - } - - static FstClass *Read(const string &fname); - - static FstClass *Read(istream &istr, const string &source); - - virtual const string &ArcType() const { - return impl_->ArcType(); - } - - virtual const string& FstType() const { - return impl_->FstType(); - } - - virtual const SymbolTable *InputSymbols() const { - return impl_->InputSymbols(); - } - - virtual const SymbolTable *OutputSymbols() const { - return impl_->OutputSymbols(); - } - - virtual const string& WeightType() const { - return impl_->WeightType(); - } - - virtual bool Write(const string &fname) const { - return impl_->Write(fname); - } - - virtual bool Write(ostream &ostr, const FstWriteOptions &opts) const { - return impl_->Write(ostr, opts); - } - - virtual uint64 Properties(uint64 mask, bool test) const { - return impl_->Properties(mask, test); - } - - template<class Arc> - const Fst<Arc> *GetFst() const { - if (Arc::Type() != ArcType()) { - return NULL; - } else { - FstClassImpl<Arc> *typed_impl = static_cast<FstClassImpl<Arc> *>(impl_); - return typed_impl->GetImpl(); - } - } - - virtual ~FstClass() { delete impl_; } - - // These methods are required by IO registration - template<class Arc> - static FstClassImplBase *Convert(const FstClass &other) { - LOG(ERROR) << "Doesn't make sense to convert any class to type FstClass."; - return 0; - } - - template<class Arc> - static FstClassImplBase *Create() { - LOG(ERROR) << "Doesn't make sense to create an FstClass with a " - << "particular arc type."; - return 0; - } - - - protected: - explicit FstClass(FstClassImplBase *impl) : impl_(impl) { } - - // Generic template method for reading an arc-templated FST of type - // UnderlyingT, and returning it wrapped as FstClassT, with appropriate - // error checking. Called from arc-templated Read() static methods. - template<class FstClassT, class UnderlyingT> - static FstClassT* ReadTypedFst(istream &stream, - const FstReadOptions &opts) { - UnderlyingT *u = UnderlyingT::Read(stream, opts); - if (!u) { - return 0; - } else { - FstClassT *r = new FstClassT(*u); - delete u; - return r; - } - } - - FstClassImplBase *GetImpl() const { return impl_; } - - FstClassImplBase *GetImpl() { return impl_; } - -// friend ostream &operator<<(ostream&, const FstClass&); - - private: - FstClassImplBase *impl_; -}; - -// -// Specific types of FstClass with special properties -// - -class MutableFstClass : public FstClass { - public: - template<class Arc> - explicit MutableFstClass(const MutableFst<Arc> &fst) : - FstClass(fst) { } - - template<class Arc> - MutableFst<Arc> *GetMutableFst() { - Fst<Arc> *fst = const_cast<Fst<Arc> *>(this->GetFst<Arc>()); - MutableFst<Arc> *mfst = static_cast<MutableFst<Arc> *>(fst); - - return mfst; - } - - template<class Arc> - static MutableFstClass *Read(istream &stream, - const FstReadOptions &opts) { - MutableFst<Arc> *mfst = MutableFst<Arc>::Read(stream, opts); - if (!mfst) { - return 0; - } else { - MutableFstClass *retval = new MutableFstClass(*mfst); - delete mfst; - return retval; - } - } - - virtual bool Write(const string &fname) const { - return GetImpl()->Write(fname); - } - - virtual bool Write(ostream &ostr, const FstWriteOptions &opts) const { - return GetImpl()->Write(ostr, opts); - } - - static MutableFstClass *Read(const string &fname, bool convert = false); - - virtual void SetInputSymbols(SymbolTable *is) { - GetImpl()->SetInputSymbols(is); - } - - virtual void SetOutputSymbols(SymbolTable *os) { - GetImpl()->SetOutputSymbols(os); - } - - // These methods are required by IO registration - template<class Arc> - static FstClassImplBase *Convert(const FstClass &other) { - LOG(ERROR) << "Doesn't make sense to convert any class to type " - << "MutableFstClass."; - return 0; - } - - template<class Arc> - static FstClassImplBase *Create() { - LOG(ERROR) << "Doesn't make sense to create a MutableFstClass with a " - << "particular arc type."; - return 0; - } - - protected: - explicit MutableFstClass(FstClassImplBase *impl) : FstClass(impl) { } -}; - - -class VectorFstClass : public MutableFstClass { - public: - explicit VectorFstClass(const FstClass &other); - explicit VectorFstClass(const string &arc_type); - - template<class Arc> - explicit VectorFstClass(const VectorFst<Arc> &fst) : - MutableFstClass(fst) { } - - template<class Arc> - static VectorFstClass *Read(istream &stream, - const FstReadOptions &opts) { - VectorFst<Arc> *vfst = VectorFst<Arc>::Read(stream, opts); - if (!vfst) { - return 0; - } else { - VectorFstClass *retval = new VectorFstClass(*vfst); - delete vfst; - return retval; - } - } - - static VectorFstClass *Read(const string &fname); - - // Converter / creator for known arc types - template<class Arc> - static FstClassImplBase *Convert(const FstClass &other) { - return new FstClassImpl<Arc>(new VectorFst<Arc>( - *other.GetFst<Arc>()), true); - } - - template<class Arc> - static FstClassImplBase *Create() { - return new FstClassImpl<Arc>(new VectorFst<Arc>(), true); - } -}; - -} // namespace script -} // namespace fst -#endif // FST_SCRIPT_FST_CLASS_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/fstscript-decl.h b/kaldi_io/src/tools/openfst/include/fst/script/fstscript-decl.h deleted file mode 100644 index fee813e..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/fstscript-decl.h +++ /dev/null @@ -1,35 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -// Forward declarations for the FST and FST-script classes. - -#ifndef FST_SCRIPT_FSTSCRIPT_DECL_H_ -#define FST_SCRIPT_FSTSCRIPT_DECL_H_ - -#include <fst/fst-decl.h> - -namespace fst { -namespace script { - -class FstClass; -class MutableFstClass; -class VectorFstClass; -class WeightClass; - -} // namespace script -} // namespace fst; - -#endif // FST_SCRIPT_FSTSCRIPT_DECL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/fstscript.h b/kaldi_io/src/tools/openfst/include/fst/script/fstscript.h deleted file mode 100644 index 90e1e75..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/fstscript.h +++ /dev/null @@ -1,154 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -// Convenience file that includes all FstScript functionality - -#ifndef FST_SCRIPT_FSTSCRIPT_H_ -#define FST_SCRIPT_FSTSCRIPT_H_ - -// Major classes -#include <fst/script/fst-class.h> -#include <fst/script/weight-class.h> -#include <fst/script/text-io.h> - -// Templates like Operation< >, Apply< > -#include <fst/script/script-impl.h> - -// Operations -#include <fst/script/arcsort.h> -#include <fst/script/closure.h> -#include <fst/script/compile.h> -#include <fst/script/compose.h> -#include <fst/script/concat.h> -#include <fst/script/connect.h> -#include <fst/script/convert.h> -#include <fst/script/decode.h> -#include <fst/script/determinize.h> -#include <fst/script/difference.h> -#include <fst/script/draw.h> -#include <fst/script/encode.h> -#include <fst/script/epsnormalize.h> -#include <fst/script/equal.h> -#include <fst/script/equivalent.h> -#include <fst/script/info.h> -#include <fst/script/intersect.h> -#include <fst/script/invert.h> -#include <fst/script/map.h> -#include <fst/script/minimize.h> -#include <fst/script/print.h> -#include <fst/script/project.h> -#include <fst/script/prune.h> -#include <fst/script/push.h> -#include <fst/script/randequivalent.h> -#include <fst/script/randgen.h> -#include <fst/script/relabel.h> -#include <fst/script/replace.h> -#include <fst/script/reverse.h> -#include <fst/script/reweight.h> -#include <fst/script/rmepsilon.h> -#include <fst/script/shortest-distance.h> -#include <fst/script/shortest-path.h> -#include <fst/script/symbols.h> -#include <fst/script/synchronize.h> -#include <fst/script/topsort.h> -#include <fst/script/union.h> -#include <fst/script/verify.h> - -// -// REGISTER OPERATIONS -// - - -// This class is necessary because registering each of the operations -// separately overfills the stack, as there's so many of them. -namespace fst { -namespace script { -template<class Arc> -class AllFstOperationsRegisterer { - public: - AllFstOperationsRegisterer() { - RegisterBatch1(); - RegisterBatch2(); - } - - private: - void RegisterBatch1() { - REGISTER_FST_OPERATION(ArcSort, Arc, ArcSortArgs); - REGISTER_FST_OPERATION(Closure, Arc, ClosureArgs); - REGISTER_FST_OPERATION(CompileFst, Arc, FstCompileArgs); - REGISTER_FST_OPERATION(Compose, Arc, ComposeArgs1); - REGISTER_FST_OPERATION(Compose, Arc, ComposeArgs2); - REGISTER_FST_OPERATION(Concat, Arc, ConcatArgs1); - REGISTER_FST_OPERATION(Concat, Arc, ConcatArgs2); - REGISTER_FST_OPERATION(Connect, Arc, MutableFstClass); - REGISTER_FST_OPERATION(Convert, Arc, ConvertArgs); - REGISTER_FST_OPERATION(Decode, Arc, DecodeArgs); - REGISTER_FST_OPERATION(Determinize, Arc, DeterminizeArgs); - REGISTER_FST_OPERATION(Difference, Arc, DifferenceArgs1); - REGISTER_FST_OPERATION(Difference, Arc, DifferenceArgs2); - REGISTER_FST_OPERATION(DrawFst, Arc, FstDrawerArgs); - REGISTER_FST_OPERATION(Encode, Arc, EncodeArgs); - REGISTER_FST_OPERATION(EpsNormalize, Arc, EpsNormalizeArgs); - REGISTER_FST_OPERATION(Equal, Arc, EqualArgs); - REGISTER_FST_OPERATION(Equivalent, Arc, EquivalentArgs); - REGISTER_FST_OPERATION(PrintFstInfo, Arc, InfoArgs); - REGISTER_FST_OPERATION(Intersect, Arc, IntersectArgs1); - REGISTER_FST_OPERATION(Intersect, Arc, IntersectArgs2); - REGISTER_FST_OPERATION(Invert, Arc, MutableFstClass); - REGISTER_FST_OPERATION(Map, Arc, MapArgs); - REGISTER_FST_OPERATION(Minimize, Arc, MinimizeArgs); - } - - void RegisterBatch2() { - REGISTER_FST_OPERATION(PrintFst, Arc, FstPrinterArgs); - REGISTER_FST_OPERATION(Project, Arc, ProjectArgs); - REGISTER_FST_OPERATION(Prune, Arc, PruneArgs1); - REGISTER_FST_OPERATION(Prune, Arc, PruneArgs2); - REGISTER_FST_OPERATION(Prune, Arc, PruneArgs3); - REGISTER_FST_OPERATION(Prune, Arc, PruneArgs4); - REGISTER_FST_OPERATION(Push, Arc, PushArgs1); - REGISTER_FST_OPERATION(Push, Arc, PushArgs2); - REGISTER_FST_OPERATION(RandEquivalent, Arc, RandEquivalentArgs1); - REGISTER_FST_OPERATION(RandEquivalent, Arc, RandEquivalentArgs2); - REGISTER_FST_OPERATION(RandGen, Arc, RandGenArgs); - REGISTER_FST_OPERATION(Relabel, Arc, RelabelArgs1); - REGISTER_FST_OPERATION(Relabel, Arc, RelabelArgs2); - REGISTER_FST_OPERATION(Relabel, Arc, RelabelArgs3); - REGISTER_FST_OPERATION(Replace, Arc, ReplaceArgs); - REGISTER_FST_OPERATION(Reverse, Arc, ReverseArgs); - REGISTER_FST_OPERATION(Reweight, Arc, ReweightArgs); - REGISTER_FST_OPERATION(RmEpsilon, Arc, RmEpsilonArgs1); - REGISTER_FST_OPERATION(RmEpsilon, Arc, RmEpsilonArgs2); - REGISTER_FST_OPERATION(RmEpsilon, Arc, RmEpsilonArgs3); - REGISTER_FST_OPERATION(ShortestDistance, Arc, ShortestDistanceArgs1); - REGISTER_FST_OPERATION(ShortestDistance, Arc, ShortestDistanceArgs2); - REGISTER_FST_OPERATION(ShortestDistance, Arc, ShortestDistanceArgs3); - REGISTER_FST_OPERATION(ShortestPath, Arc, ShortestPathArgs1); - REGISTER_FST_OPERATION(ShortestPath, Arc, ShortestPathArgs2); - REGISTER_FST_OPERATION(Synchronize, Arc, SynchronizeArgs); - REGISTER_FST_OPERATION(TopSort, Arc, TopSortArgs); - REGISTER_FST_OPERATION(Union, Arc, UnionArgs); - REGISTER_FST_OPERATION(Verify, Arc, VerifyArgs); - } -}; -} // namespace script -} // namespace fst - - -#define REGISTER_FST_OPERATIONS(Arc) \ - AllFstOperationsRegisterer<Arc> register_all_fst_operations ## Arc; - -#endif // FST_SCRIPT_FSTSCRIPT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/info-impl.h b/kaldi_io/src/tools/openfst/include/fst/script/info-impl.h deleted file mode 100644 index 408fbcd..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/info-impl.h +++ /dev/null @@ -1,325 +0,0 @@ -// info.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Class to compute various information about FSTs, helper class for fstinfo.cc - -#ifndef FST_SCRIPT_INFO_IMPL_H_ -#define FST_SCRIPT_INFO_IMPL_H_ - -#include <string> -#include <vector> -using std::vector; - -#include <fst/connect.h> -#include <fst/dfs-visit.h> -#include <fst/fst.h> -#include <fst/lookahead-matcher.h> -#include <fst/matcher.h> -#include <fst/queue.h> -#include <fst/test-properties.h> -#include <fst/verify.h> -#include <fst/visit.h> - -namespace fst { - -// Compute various information about FSTs, helper class for fstinfo.cc. -// WARNING: Stand-alone use of this class is not recommended, most code -// should call directly the relevant library functions: Fst<A>::NumStates, -// Fst<A>::NumArcs, TestProperties, ... -template <class A> class FstInfo { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - - // When info_type is "short" (or "auto" and not an ExpandedFst) - // then only minimal info is computed and can be requested. - FstInfo(const Fst<A> &fst, bool test_properties, - const string &arc_filter_type = "any", - string info_type = "auto", bool verify = true) - : fst_type_(fst.Type()), - input_symbols_(fst.InputSymbols() ? - fst.InputSymbols()->Name() : "none"), - output_symbols_(fst.OutputSymbols() ? - fst.OutputSymbols()->Name() : "none"), - nstates_(0), narcs_(0), start_(kNoStateId), nfinal_(0), - nepsilons_(0), niepsilons_(0), noepsilons_(0), - naccess_(0), ncoaccess_(0), nconnect_(0), ncc_(0), nscc_(0), - input_match_type_(MATCH_NONE), output_match_type_(MATCH_NONE), - input_lookahead_(false), output_lookahead_(false), - properties_(0), arc_filter_type_(arc_filter_type), long_info_(true) { - if (info_type == "long") { - long_info_ = true; - } else if (info_type == "short") { - long_info_ = false; - } else if (info_type == "auto") { - long_info_ = fst.Properties(kExpanded, false); - } else { - FSTERROR() << "Bad info type: " << info_type; - return; - } - - if (!long_info_) - return; - - // If the FST is not sane, we return. - if (verify && !Verify(fst)) { - FSTERROR() << "FstInfo: Verify: FST not well-formed."; - return; - } - - start_ = fst.Start(); - properties_ = fst.Properties(kFstProperties, test_properties); - - for (StateIterator< Fst<A> > siter(fst); - !siter.Done(); - siter.Next()) { - ++nstates_; - StateId s = siter.Value(); - if (fst.Final(s) != Weight::Zero()) - ++nfinal_; - for (ArcIterator< Fst<A> > aiter(fst, s); - !aiter.Done(); - aiter.Next()) { - const A &arc = aiter.Value(); - ++narcs_; - if (arc.ilabel == 0 && arc.olabel == 0) - ++nepsilons_; - if (arc.ilabel == 0) - ++niepsilons_; - if (arc.olabel == 0) - ++noepsilons_; - } - } - - { - vector<StateId> cc; - CcVisitor<Arc> cc_visitor(&cc); - FifoQueue<StateId> fifo_queue; - if (arc_filter_type == "any") { - Visit(fst, &cc_visitor, &fifo_queue); - } else if (arc_filter_type == "epsilon") { - Visit(fst, &cc_visitor, &fifo_queue, EpsilonArcFilter<Arc>()); - } else if (arc_filter_type == "iepsilon") { - Visit(fst, &cc_visitor, &fifo_queue, InputEpsilonArcFilter<Arc>()); - } else if (arc_filter_type == "oepsilon") { - Visit(fst, &cc_visitor, &fifo_queue, OutputEpsilonArcFilter<Arc>()); - } else { - FSTERROR() << "Bad arc filter type: " << arc_filter_type; - return; - } - - for (StateId s = 0; s < cc.size(); ++s) { - if (cc[s] >= ncc_) - ncc_ = cc[s] + 1; - } - } - - { - vector<StateId> scc; - vector<bool> access, coaccess; - uint64 props = 0; - SccVisitor<Arc> scc_visitor(&scc, &access, &coaccess, &props); - if (arc_filter_type == "any") { - DfsVisit(fst, &scc_visitor); - } else if (arc_filter_type == "epsilon") { - DfsVisit(fst, &scc_visitor, EpsilonArcFilter<Arc>()); - } else if (arc_filter_type == "iepsilon") { - DfsVisit(fst, &scc_visitor, InputEpsilonArcFilter<Arc>()); - } else if (arc_filter_type == "oepsilon") { - DfsVisit(fst, &scc_visitor, OutputEpsilonArcFilter<Arc>()); - } else { - FSTERROR() << "Bad arc filter type: " << arc_filter_type; - return; - } - - for (StateId s = 0; s < scc.size(); ++s) { - if (access[s]) - ++naccess_; - if (coaccess[s]) - ++ncoaccess_; - if (access[s] && coaccess[s]) - ++nconnect_; - if (scc[s] >= nscc_) - nscc_ = scc[s] + 1; - } - } - - LookAheadMatcher< Fst<A> > imatcher(fst, MATCH_INPUT); - input_match_type_ = imatcher.Type(test_properties); - input_lookahead_ = imatcher.Flags() & kInputLookAheadMatcher; - - LookAheadMatcher< Fst<A> > omatcher(fst, MATCH_OUTPUT); - output_match_type_ = omatcher.Type(test_properties); - output_lookahead_ = omatcher.Flags() & kOutputLookAheadMatcher; - } - - // Short info - const string& FstType() const { return fst_type_; } - const string& ArcType() const { return A::Type(); } - const string& InputSymbols() const { return input_symbols_; } - const string& OutputSymbols() const { return output_symbols_; } - const bool LongInfo() const { return long_info_; } - const string& ArcFilterType() const { return arc_filter_type_; } - - // Long info - MatchType InputMatchType() const { CheckLong(); return input_match_type_; } - MatchType OutputMatchType() const { CheckLong(); return output_match_type_; } - bool InputLookAhead() const { CheckLong(); return input_lookahead_; } - bool OutputLookAhead() const { CheckLong(); return output_lookahead_; } - int64 NumStates() const { CheckLong(); return nstates_; } - int64 NumArcs() const { CheckLong(); return narcs_; } - int64 Start() const { CheckLong(); return start_; } - int64 NumFinal() const { CheckLong(); return nfinal_; } - int64 NumEpsilons() const { CheckLong(); return nepsilons_; } - int64 NumInputEpsilons() const { CheckLong(); return niepsilons_; } - int64 NumOutputEpsilons() const { CheckLong(); return noepsilons_; } - int64 NumAccessible() const { CheckLong(); return naccess_; } - int64 NumCoAccessible() const { CheckLong(); return ncoaccess_; } - int64 NumConnected() const { CheckLong(); return nconnect_; } - int64 NumCc() const { CheckLong(); return ncc_; } - int64 NumScc() const { CheckLong(); return nscc_; } - uint64 Properties() const { CheckLong(); return properties_; } - - private: - void CheckLong() const { - if (!long_info_) - FSTERROR() << "FstInfo: method only available with long info version"; - } - - string fst_type_; - string input_symbols_; - string output_symbols_; - int64 nstates_; - int64 narcs_; - int64 start_; - int64 nfinal_; - int64 nepsilons_; - int64 niepsilons_; - int64 noepsilons_; - int64 naccess_; - int64 ncoaccess_; - int64 nconnect_; - int64 ncc_; - int64 nscc_; - MatchType input_match_type_; - MatchType output_match_type_; - bool input_lookahead_; - bool output_lookahead_; - uint64 properties_; - string arc_filter_type_; - bool long_info_; - DISALLOW_COPY_AND_ASSIGN(FstInfo); -}; - -template <class A> -void PrintFstInfo(const FstInfo<A> &fstinfo, bool pipe = false) { - ostream &os = pipe ? cerr : cout; - - ios_base::fmtflags old = os.setf(ios::left); - os.width(50); - os << "fst type" << fstinfo.FstType() << endl; - os.width(50); - os << "arc type" << fstinfo.ArcType() << endl; - os.width(50); - os << "input symbol table" << fstinfo.InputSymbols() << endl; - os.width(50); - os << "output symbol table" << fstinfo.OutputSymbols() << endl; - - if (!fstinfo.LongInfo()) { - os.setf(old); - return; - } - - os.width(50); - os << "# of states" << fstinfo.NumStates() << endl; - os.width(50); - os << "# of arcs" << fstinfo.NumArcs() << endl; - os.width(50); - os << "initial state" << fstinfo.Start() << endl; - os.width(50); - os << "# of final states" << fstinfo.NumFinal() << endl; - os.width(50); - os << "# of input/output epsilons" << fstinfo.NumEpsilons() << endl; - os.width(50); - os << "# of input epsilons" << fstinfo.NumInputEpsilons() << endl; - os.width(50); - os << "# of output epsilons" << fstinfo.NumOutputEpsilons() << endl; - os.width(50); - - string arc_type = ""; - if (fstinfo.ArcFilterType() == "epsilon") - arc_type = "epsilon "; - else if (fstinfo.ArcFilterType() == "iepsilon") - arc_type = "input-epsilon "; - else if (fstinfo.ArcFilterType() == "oepsilon") - arc_type = "output-epsilon "; - - string accessible_label = "# of " + arc_type + "accessible states"; - os.width(50); - os << accessible_label << fstinfo.NumAccessible() << endl; - string coaccessible_label = "# of " + arc_type + "coaccessible states"; - os.width(50); - os << coaccessible_label << fstinfo.NumCoAccessible() << endl; - string connected_label = "# of " + arc_type + "connected states"; - os.width(50); - os << connected_label << fstinfo.NumConnected() << endl; - string numcc_label = "# of " + arc_type + "connected components"; - os.width(50); - os << numcc_label << fstinfo.NumCc() << endl; - string numscc_label = "# of " + arc_type + "strongly conn components"; - os.width(50); - os << numscc_label << fstinfo.NumScc() << endl; - - os.width(50); - os << "input matcher" - << (fstinfo.InputMatchType() == MATCH_INPUT ? 'y' : - fstinfo.InputMatchType() == MATCH_NONE ? 'n' : '?') << endl; - os.width(50); - os << "output matcher" - << (fstinfo.OutputMatchType() == MATCH_OUTPUT ? 'y' : - fstinfo.OutputMatchType() == MATCH_NONE ? 'n' : '?') << endl; - os.width(50); - os << "input lookahead" - << (fstinfo.InputLookAhead() ? 'y' : 'n') << endl; - os.width(50); - os << "output lookahead" - << (fstinfo.OutputLookAhead() ? 'y' : 'n') << endl; - - uint64 prop = 1; - for (int i = 0; i < 64; ++i, prop <<= 1) { - if (prop & kBinaryProperties) { - char value = 'n'; - if (fstinfo.Properties() & prop) value = 'y'; - os.width(50); - os << PropertyNames[i] << value << endl; - } else if (prop & kPosTrinaryProperties) { - char value = '?'; - if (fstinfo.Properties() & prop) value = 'y'; - else if (fstinfo.Properties() & prop << 1) value = 'n'; - os.width(50); - os << PropertyNames[i] << value << endl; - } - } - os.setf(old); -} - -} // namespace fst - -#endif // FST_SCRIPT_INFO_IMPL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/info.h b/kaldi_io/src/tools/openfst/include/fst/script/info.h deleted file mode 100644 index f434bd5..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/info.h +++ /dev/null @@ -1,48 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_INFO_H_ -#define FST_SCRIPT_INFO_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/script/info-impl.h> - -namespace fst { -namespace script { - -typedef args::Package<const FstClass&, bool, const string&, - const string&, bool, bool> InfoArgs; - -template<class Arc> -void PrintFstInfo(InfoArgs *args) { - const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>()); - FstInfo<Arc> fstinfo(fst, args->arg2, args->arg3, - args->arg4, args->arg5); - PrintFstInfo(fstinfo, args->arg6); - - if (args->arg6) - fst.Write(""); -} - -void PrintFstInfo(const FstClass &f, bool test_properties, - const string &arc_filter, const string &info_type, - bool pipe, bool verify); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_INFO_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/intersect.h b/kaldi_io/src/tools/openfst/include/fst/script/intersect.h deleted file mode 100644 index 8011024..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/intersect.h +++ /dev/null @@ -1,65 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_INTERSECT_H_ -#define FST_SCRIPT_INTERSECT_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/intersect.h> -#include <fst/script/compose.h> // for ComposeOptions, ComposeFilter - -namespace fst { -namespace script { - -typedef args::Package<const FstClass&, const FstClass&, - MutableFstClass*, ComposeFilter> IntersectArgs1; - -template<class Arc> -void Intersect(IntersectArgs1 *args) { - const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>()); - const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>(); - - Intersect(ifst1, ifst2, ofst, args->arg4); -} - -typedef args::Package<const FstClass&, const FstClass&, - MutableFstClass*, const ComposeOptions &> IntersectArgs2; - -template<class Arc> -void Intersect(IntersectArgs2 *args) { - const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>()); - const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>(); - - Intersect(ifst1, ifst2, ofst, args->arg4); -} - -void Intersect(const FstClass &ifst1, const FstClass &ifst2, - MutableFstClass *ofst, - ComposeFilter compose_filter); - -void Intersect(const FstClass &ifst, const FstClass &ifst2, - MutableFstClass *ofst, - const ComposeOptions &opts = fst::script::ComposeOptions()); - -} // namespace script -} // namespace fst - - - -#endif // FST_SCRIPT_INTERSECT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/invert.h b/kaldi_io/src/tools/openfst/include/fst/script/invert.h deleted file mode 100644 index 1befd9f..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/invert.h +++ /dev/null @@ -1,43 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_INVERT_H_ -#define FST_SCRIPT_INVERT_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/invert.h> - -namespace fst { -namespace script { - -// The following confuses swig, because it has the same arguments -// as the non-templated version -#ifndef SWIG -template<class Arc> -void Invert(MutableFstClass *fst) { - MutableFst<Arc> *typed_fst = fst->GetMutableFst<Arc>(); - - Invert(typed_fst); -} -#endif - -void Invert(MutableFstClass *fst); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_INVERT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/map.h b/kaldi_io/src/tools/openfst/include/fst/script/map.h deleted file mode 100644 index 3caaa9f..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/map.h +++ /dev/null @@ -1,123 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_MAP_H_ -#define FST_SCRIPT_MAP_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/script/weight-class.h> -#include <fst/arc-map.h> -#include <fst/state-map.h> - -namespace fst { -namespace script { - -template <class M> -Fst<typename M::ToArc> *ArcMap(const Fst<typename M::FromArc> &fst, - const M &mapper) { - typedef typename M::ToArc ToArc; - VectorFst<ToArc> *ofst = new VectorFst<ToArc>; - ArcMap(fst, ofst, mapper); - return ofst; -} - -template <class M> -Fst<typename M::ToArc> *StateMap(const Fst<typename M::FromArc> &fst, - const M &mapper) { - typedef typename M::ToArc ToArc; - VectorFst<ToArc> *ofst = new VectorFst<ToArc>; - StateMap(fst, ofst, mapper); - return ofst; -} - -enum MapType { ARC_SUM_MAPPER, IDENTITY_MAPPER, INVERT_MAPPER, PLUS_MAPPER, - QUANTIZE_MAPPER, RMWEIGHT_MAPPER, SUPERFINAL_MAPPER, - TIMES_MAPPER, TO_LOG_MAPPER, TO_LOG64_MAPPER, TO_STD_MAPPER }; - -typedef args::Package<const FstClass&, MapType, float, - const WeightClass &> MapInnerArgs; -typedef args::WithReturnValue<FstClass*, MapInnerArgs> MapArgs; - -template <class Arc> -void Map(MapArgs *args) { - const Fst<Arc> &ifst = *(args->args.arg1.GetFst<Arc>()); - MapType map_type = args->args.arg2; - float delta = args->args.arg3; - typename Arc::Weight w = *(args->args.arg4.GetWeight<typename Arc::Weight>()); - - Fst<Arc> *fst = NULL; - Fst<LogArc> *lfst = NULL; - Fst<Log64Arc> *l64fst = NULL; - Fst<StdArc> *sfst = NULL; - if (map_type == ARC_SUM_MAPPER) { - args->retval = new FstClass(*(fst = - script::StateMap(ifst, ArcSumMapper<Arc>(ifst)))); - } else if (map_type == IDENTITY_MAPPER) { - args->retval = new FstClass(*(fst = - script::ArcMap(ifst, IdentityArcMapper<Arc>()))); - } else if (map_type == INVERT_MAPPER) { - args->retval = new FstClass(*(fst = - script::ArcMap(ifst, InvertWeightMapper<Arc>()))); - } else if (map_type == PLUS_MAPPER) { - args->retval = new FstClass(*(fst = - script::ArcMap(ifst, PlusMapper<Arc>(w)))); - } else if (map_type == QUANTIZE_MAPPER) { - args->retval = new FstClass(*(fst = - script::ArcMap(ifst, QuantizeMapper<Arc>(delta)))); - } else if (map_type == RMWEIGHT_MAPPER) { - args->retval = new FstClass(*(fst = - script::ArcMap(ifst, RmWeightMapper<Arc>()))); - } else if (map_type == SUPERFINAL_MAPPER) { - args->retval = new FstClass(*(fst = - script::ArcMap(ifst, SuperFinalMapper<Arc>()))); - } else if (map_type == TIMES_MAPPER) { - args->retval = new FstClass(*(fst = - script::ArcMap(ifst, TimesMapper<Arc>(w)))); - } else if (map_type == TO_LOG_MAPPER) { - args->retval = new FstClass(*(lfst = - script::ArcMap(ifst, WeightConvertMapper<Arc, LogArc>()))); - } else if (map_type == TO_LOG64_MAPPER) { - args->retval = new FstClass(*(l64fst = - script::ArcMap(ifst, WeightConvertMapper<Arc, Log64Arc>()))); - } else if (map_type == TO_STD_MAPPER) { - args->retval = new FstClass(*(sfst = - script::ArcMap(ifst, WeightConvertMapper<Arc, StdArc>()))); - } else { - FSTERROR() << "Error: unknown/unsupported mapper type: " - << map_type; - VectorFst<Arc> *ofst = new VectorFst<Arc>; - ofst->SetProperties(kError, kError); - args->retval = new FstClass(*(fst =ofst)); - } - delete sfst; - delete l64fst; - delete lfst; - delete fst; -} - - -#ifdef SWIG -%newobject Map; -#endif -FstClass *Map(const FstClass& f, MapType map_type, - float delta = fst::kDelta, - const WeightClass &w = fst::script::WeightClass::Zero()); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_MAP_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/minimize.h b/kaldi_io/src/tools/openfst/include/fst/script/minimize.h deleted file mode 100644 index f250d03..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/minimize.h +++ /dev/null @@ -1,45 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_MINIMIZE_H_ -#define FST_SCRIPT_MINIMIZE_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/minimize.h> - -namespace fst { -namespace script { - -typedef args::Package<MutableFstClass*, MutableFstClass*, float> MinimizeArgs; - -template<class Arc> -void Minimize(MinimizeArgs *args) { - MutableFst<Arc> *ofst1 = args->arg1->GetMutableFst<Arc>(); - MutableFst<Arc> *ofst2 = args->arg2 ? args->arg2->GetMutableFst<Arc>() : 0; - - Minimize(ofst1, ofst2, args->arg3); -} - -void Minimize(MutableFstClass *ofst1, MutableFstClass *ofst2 = 0, - float delta = kDelta); - -} // namespace script -} // namespace fst - - - -#endif // FST_SCRIPT_MINIMIZE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/print-impl.h b/kaldi_io/src/tools/openfst/include/fst/script/print-impl.h deleted file mode 100644 index 1433a29..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/print-impl.h +++ /dev/null @@ -1,149 +0,0 @@ -// print.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Stand-alone class to print out binary FSTs in the AT&T format, -// helper class for fstprint.cc - -#ifndef FST_SCRIPT_PRINT_IMPL_H_ -#define FST_SCRIPT_PRINT_IMPL_H_ - -#include <sstream> -#include <string> - -#include <fst/fst.h> -#include <fst/util.h> - -DECLARE_string(fst_field_separator); - -namespace fst { - -// Print a binary Fst in textual format, helper class for fstprint.cc -// WARNING: Stand-alone use of this class not recommended, most code should -// read/write using the binary format which is much more efficient. -template <class A> class FstPrinter { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - - FstPrinter(const Fst<A> &fst, - const SymbolTable *isyms, - const SymbolTable *osyms, - const SymbolTable *ssyms, - bool accep, - bool show_weight_one) - : fst_(fst), isyms_(isyms), osyms_(osyms), ssyms_(ssyms), - accep_(accep && fst.Properties(kAcceptor, true)), ostrm_(0), - show_weight_one_(show_weight_one) {} - - // Print Fst to an output stream - void Print(ostream *ostrm, const string &dest) { - ostrm_ = ostrm; - dest_ = dest; - StateId start = fst_.Start(); - if (start == kNoStateId) - return; - // initial state first - PrintState(start); - for (StateIterator< Fst<A> > siter(fst_); - !siter.Done(); - siter.Next()) { - StateId s = siter.Value(); - if (s != start) - PrintState(s); - } - } - - private: - // Maximum line length in text file. - static const int kLineLen = 8096; - - void PrintId(int64 id, const SymbolTable *syms, - const char *name) const { - if (syms) { - string symbol = syms->Find(id); - if (symbol == "") { - FSTERROR() << "FstPrinter: Integer " << id - << " is not mapped to any textual symbol" - << ", symbol table = " << syms->Name() - << ", destination = " << dest_; - symbol = "?"; - } - *ostrm_ << symbol; - } else { - *ostrm_ << id; - } - } - - void PrintStateId(StateId s) const { - PrintId(s, ssyms_, "state ID"); - } - - void PrintILabel(Label l) const { - PrintId(l, isyms_, "arc input label"); - } - - void PrintOLabel(Label l) const { - PrintId(l, osyms_, "arc output label"); - } - - void PrintState(StateId s) const { - bool output = false; - for (ArcIterator< Fst<A> > aiter(fst_, s); - !aiter.Done(); - aiter.Next()) { - Arc arc = aiter.Value(); - PrintStateId(s); - *ostrm_ << FLAGS_fst_field_separator[0]; - PrintStateId(arc.nextstate); - *ostrm_ << FLAGS_fst_field_separator[0]; - PrintILabel(arc.ilabel); - if (!accep_) { - *ostrm_ << FLAGS_fst_field_separator[0]; - PrintOLabel(arc.olabel); - } - if (show_weight_one_ || arc.weight != Weight::One()) - *ostrm_ << FLAGS_fst_field_separator[0] << arc.weight; - *ostrm_ << "\n"; - output = true; - } - Weight final = fst_.Final(s); - if (final != Weight::Zero() || !output) { - PrintStateId(s); - if (show_weight_one_ || final != Weight::One()) { - *ostrm_ << FLAGS_fst_field_separator[0] << final; - } - *ostrm_ << "\n"; - } - } - - const Fst<A> &fst_; - const SymbolTable *isyms_; // ilabel symbol table - const SymbolTable *osyms_; // olabel symbol table - const SymbolTable *ssyms_; // slabel symbol table - bool accep_; // print as acceptor when possible - ostream *ostrm_; // text FST destination - string dest_; // text FST destination name - bool show_weight_one_; // print weights equal to Weight::One() - DISALLOW_COPY_AND_ASSIGN(FstPrinter); -}; - -} // namespace fst - -#endif // FST_SCRIPT_PRINT_IMPL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/print.h b/kaldi_io/src/tools/openfst/include/fst/script/print.h deleted file mode 100644 index f82b19b..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/print.h +++ /dev/null @@ -1,86 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_PRINT_H_ -#define FST_SCRIPT_PRINT_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/script/print-impl.h> - -namespace fst { -namespace script { - -// Note: it is safe to pass these strings as references because -// this struct is only used to pass them deeper in the call graph. -// Be sure you understand why this is so before using this struct -// for anything else! -struct FstPrinterArgs { - const FstClass &fst; - const SymbolTable *isyms; - const SymbolTable *osyms; - const SymbolTable *ssyms; - const bool accept; - const bool show_weight_one; - ostream *ostrm; - const string &dest; - - FstPrinterArgs(const FstClass &fst, - const SymbolTable *isyms, - const SymbolTable *osyms, - const SymbolTable *ssyms, - bool accept, - bool show_weight_one, - ostream *ostrm, - const string &dest) : - fst(fst), isyms(isyms), osyms(osyms), ssyms(ssyms), accept(accept), - show_weight_one(show_weight_one), ostrm(ostrm), dest(dest) { } -}; - -template<class Arc> -void PrintFst(FstPrinterArgs *args) { - const Fst<Arc> &fst = *(args->fst.GetFst<Arc>()); - - fst::FstPrinter<Arc> fstprinter(fst, args->isyms, args->osyms, - args->ssyms, args->accept, - args->show_weight_one); - fstprinter.Print(args->ostrm, args->dest); -} - -void PrintFst(const FstClass &fst, ostream &ostrm, const string &dest, - const SymbolTable *isyms, - const SymbolTable *osyms, - const SymbolTable *ssyms, - bool accept, bool show_weight_one); - - -// Below are two printing methods with useful defaults for a few of -// the fst printer arguments. -template <class Arc> -void PrintFst(const Fst<Arc> &fst, ostream &os, const string dest = "", - const SymbolTable *isyms = NULL, - const SymbolTable *osyms = NULL, - const SymbolTable *ssyms = NULL) { - fst::FstPrinter<Arc> fstprinter(fst, isyms, osyms, ssyms, true, true); - fstprinter.Print(&os, dest); -} - -} // namespace script -} // namespace fst - - - -#endif // FST_SCRIPT_PRINT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/project.h b/kaldi_io/src/tools/openfst/include/fst/script/project.h deleted file mode 100644 index 12ee890..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/project.h +++ /dev/null @@ -1,43 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_PROJECT_H_ -#define FST_SCRIPT_PROJECT_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/project.h> // for ProjectType - -namespace fst { -namespace script { - -typedef args::Package<MutableFstClass*, ProjectType> ProjectArgs; - -template<class Arc> -void Project(ProjectArgs *args) { - MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>(); - - Project(ofst, args->arg2); -} - -void Project(MutableFstClass *ofst, ProjectType project_type); - -} // namespace script -} // namespace fst - - - -#endif // FST_SCRIPT_PROJECT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/prune.h b/kaldi_io/src/tools/openfst/include/fst/script/prune.h deleted file mode 100644 index 7118ff1..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/prune.h +++ /dev/null @@ -1,153 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_PRUNE_H_ -#define FST_SCRIPT_PRUNE_H_ - -#include <vector> -using std::vector; - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/script/weight-class.h> -#include <fst/prune.h> -#include <fst/arcfilter.h> - -namespace fst { -namespace script { - -struct PruneOptions { - WeightClass weight_threshold; - int64 state_threshold; - const vector<WeightClass> *distance; - float delta; - - explicit PruneOptions(const WeightClass& w, int64 s, - vector<WeightClass> *d = 0, float e = kDelta) - : weight_threshold(w), - state_threshold(s), - distance(d), - delta(e) {} - private: - PruneOptions(); // disallow -}; - -// converts a script::PruneOptions into a fst::PruneOptions. -// Notes: -// If the original opts.distance is not NULL, a new distance will be -// created with new; it's the client's responsibility to delete this. - -template<class A> -fst::PruneOptions<A, AnyArcFilter<A> > ConvertPruneOptions( - const PruneOptions &opts) { - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - Weight weight_threshold = *(opts.weight_threshold.GetWeight<Weight>()); - StateId state_threshold = opts.state_threshold; - vector<Weight> *distance = 0; - - if (opts.distance) { - distance = new vector<Weight>(opts.distance->size()); - for (unsigned i = 0; i < opts.distance->size(); ++i) { - (*distance)[i] = *((*opts.distance)[i].GetWeight<Weight>()); - } - } - - return fst::PruneOptions<A, AnyArcFilter<A> >( - weight_threshold, state_threshold, AnyArcFilter<A>(), distance, - opts.delta); -} - -// 1 -typedef args::Package<MutableFstClass *, const PruneOptions &> PruneArgs1; - -template<class Arc> -void Prune(PruneArgs1 *args) { - MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>(); - - typedef typename Arc::Weight Weight; - typedef typename Arc::StateId StateId; - - fst::PruneOptions<Arc, AnyArcFilter<Arc> > opts = - ConvertPruneOptions<Arc>(args->arg2); - Prune(ofst, opts); - delete opts.distance; -} - -// 2 -typedef args::Package<const FstClass &, MutableFstClass *, - const PruneOptions &> PruneArgs2; - -template<class Arc> -void Prune(PruneArgs2 *args) { - const Fst<Arc>& ifst = *(args->arg1.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); - - fst::PruneOptions<Arc, AnyArcFilter<Arc> > opts = - ConvertPruneOptions<Arc>(args->arg3); - Prune(ifst, ofst, opts); - delete opts.distance; -} - -// 3 -typedef args::Package<const FstClass &, - MutableFstClass *, - const WeightClass &, int64, float> PruneArgs3; - -template<class Arc> -void Prune(PruneArgs3 *args) { - const Fst<Arc>& ifst = *(args->arg1.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); - typename Arc::Weight w = *(args->arg3.GetWeight<typename Arc::Weight>()); - - Prune(ifst, ofst, w, args->arg4, args->arg5); -} - -// 4 -typedef args::Package<MutableFstClass *, const WeightClass&, - int64, float> PruneArgs4; -template<class Arc> -void Prune(PruneArgs4 *args) { - MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>(); - typename Arc::Weight w = *(args->arg2.GetWeight<typename Arc::Weight>()); - Prune(fst, w, args->arg3, args->arg4); -} - - -// 1 -void Prune(MutableFstClass *fst, const PruneOptions &opts); - -// 2 -void Prune(const FstClass &ifst, MutableFstClass *fst, - const PruneOptions &opts); - -// 3 -void Prune(const FstClass &ifst, MutableFstClass *ofst, - const WeightClass &weight_threshold, - int64 state_threshold = kNoStateId, - float delta = kDelta); - -// 4 -void Prune(MutableFstClass *fst, const WeightClass& weight_threshold, - int64 state_threshold, float delta); - -} // namespace script -} // namespace fst - - - -#endif // FST_SCRIPT_PRUNE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/push.h b/kaldi_io/src/tools/openfst/include/fst/script/push.h deleted file mode 100644 index cebd655..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/push.h +++ /dev/null @@ -1,70 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_PUSH_H_ -#define FST_SCRIPT_PUSH_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/push.h> - -namespace fst { -namespace script { - -// 1 -typedef args::Package<MutableFstClass*, ReweightType, float, bool> PushArgs1; - -template<class Arc> -void Push(PushArgs1 *args) { - MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>(); - - if (args->arg2 == REWEIGHT_TO_FINAL) { - fst::Push(ofst, REWEIGHT_TO_FINAL, args->arg3, args->arg4); - } else { - fst::Push(ofst, REWEIGHT_TO_INITIAL, args->arg3, args->arg4); - } -} - -// 2 -typedef args::Package<const FstClass &, MutableFstClass *, uint32, - ReweightType, float> PushArgs2; - -template<class Arc> -void Push(PushArgs2 *args) { - const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); - - if (args->arg4 == REWEIGHT_TO_FINAL) { - fst::Push<Arc, REWEIGHT_TO_FINAL>(ifst, ofst, args->arg3, args->arg5); - } else { - fst::Push<Arc, REWEIGHT_TO_INITIAL>(ifst, ofst, args->arg3, args->arg5); - } -} - -// 1 -void Push(MutableFstClass *ofst, ReweightType type, float delta = kDelta, - bool remove_total_weight = false); - -// 2 -void Push(const FstClass &ifst, MutableFstClass *ofst, uint32 flags, - ReweightType dir, float delta); - -} // namespace script -} // namespace fst - - - -#endif // FST_SCRIPT_PUSH_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/randequivalent.h b/kaldi_io/src/tools/openfst/include/fst/script/randequivalent.h deleted file mode 100644 index b929683..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/randequivalent.h +++ /dev/null @@ -1,105 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_RANDEQUIVALENT_H_ -#define FST_SCRIPT_RANDEQUIVALENT_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/script/randgen.h> // for RandArcSelection -#include <fst/randequivalent.h> - -namespace fst { -namespace script { - -// 1 -typedef args::Package<const FstClass&, const FstClass&, - int32, float, int, int> RandEquivalentInnerArgs1; -typedef args::WithReturnValue<bool, - RandEquivalentInnerArgs1> RandEquivalentArgs1; - -template<class Arc> -void RandEquivalent(RandEquivalentArgs1 *args) { - const Fst<Arc> &fst1 = *(args->args.arg1.GetFst<Arc>()); - const Fst<Arc> &fst2 = *(args->args.arg2.GetFst<Arc>()); - - args->retval = RandEquivalent(fst1, fst2, args->args.arg3, args->args.arg4, - args->args.arg5, args->args.arg6); -} - -// 2 -typedef args::Package<const FstClass &, const FstClass &, int32, - ssize_t, float, - const RandGenOptions<RandArcSelection> &> - RandEquivalentInnerArgs2; - -typedef args::WithReturnValue<bool, - RandEquivalentInnerArgs2> RandEquivalentArgs2; - -template<class Arc> -void RandEquivalent(RandEquivalentArgs2 *args) { - const Fst<Arc> &fst1 = *(args->args.arg1.GetFst<Arc>()); - const Fst<Arc> &fst2 = *(args->args.arg2.GetFst<Arc>()); - const RandGenOptions<RandArcSelection> &opts = args->args.arg6; - int32 seed = args->args.arg3; - - if (opts.arc_selector == UNIFORM_ARC_SELECTOR) { - UniformArcSelector<Arc> arc_selector(seed); - RandGenOptions< UniformArcSelector<Arc> > - ropts(arc_selector, opts.max_length, opts.npath); - - args->retval = RandEquivalent(fst1, fst2, args->args.arg4, - args->args.arg5, ropts); - } else if (opts.arc_selector == FAST_LOG_PROB_ARC_SELECTOR) { - FastLogProbArcSelector<Arc> arc_selector(seed); - RandGenOptions< FastLogProbArcSelector<Arc> > - ropts(arc_selector, opts.max_length, opts.npath); - - args->retval = RandEquivalent(fst1, fst2, args->args.arg4, - args->args.arg5, ropts); - } else { - LogProbArcSelector<Arc> arc_selector(seed); - RandGenOptions< LogProbArcSelector<Arc> > - ropts(arc_selector, opts.max_length, opts.npath); - args->retval = RandEquivalent(fst1, fst2, args->args.arg4, - args->args.arg5, ropts); - } -} - - -// 1 -bool RandEquivalent(const FstClass &fst1, - const FstClass &fst2, - int32 seed = time(0), - ssize_t num_paths = 1, - float delta = fst::kDelta, - int path_length = INT_MAX); - -// 2 -bool RandEquivalent(const FstClass &fst1, - const FstClass &fst2, - int32 seed, - ssize_t num_paths, - float delta, - const fst::RandGenOptions< - fst::script::RandArcSelection> &opts); - -} // namespace script -} // namespace fst - - - -#endif // FST_SCRIPT_RANDEQUIVALENT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/randgen.h b/kaldi_io/src/tools/openfst/include/fst/script/randgen.h deleted file mode 100644 index 817f9c1..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/randgen.h +++ /dev/null @@ -1,76 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_RANDGEN_H_ -#define FST_SCRIPT_RANDGEN_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/randgen.h> - -namespace fst { -namespace script { - -enum RandArcSelection { - UNIFORM_ARC_SELECTOR, - LOG_PROB_ARC_SELECTOR, - FAST_LOG_PROB_ARC_SELECTOR -}; - -typedef args::Package<const FstClass &, MutableFstClass*, int32, - const RandGenOptions<RandArcSelection> &> RandGenArgs; - -template<class Arc> -void RandGen(RandGenArgs *args) { - const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); - int32 seed = args->arg3; - const RandGenOptions<RandArcSelection> &opts = args->arg4; - - if (opts.arc_selector == UNIFORM_ARC_SELECTOR) { - UniformArcSelector<Arc> arc_selector(seed); - RandGenOptions< UniformArcSelector<Arc> > - ropts(arc_selector, opts.max_length, - opts.npath, opts.weighted); - RandGen(ifst, ofst, ropts); - } else if (opts.arc_selector == FAST_LOG_PROB_ARC_SELECTOR) { - FastLogProbArcSelector<Arc> arc_selector(seed); - RandGenOptions< FastLogProbArcSelector<Arc> > - ropts(arc_selector, opts.max_length, - opts.npath, opts.weighted); - RandGen(ifst, ofst, ropts); - } else { - LogProbArcSelector<Arc> arc_selector(seed); - RandGenOptions< LogProbArcSelector<Arc> > - ropts(arc_selector, opts.max_length, - opts.npath, opts.weighted); - RandGen(ifst, ofst, ropts); - } -} - - -// Client-facing prototype -void RandGen(const FstClass &ifst, MutableFstClass *ofst, int32 seed = time(0), - const RandGenOptions<RandArcSelection> &opts = - fst::RandGenOptions<fst::script::RandArcSelection>( - fst::script::UNIFORM_ARC_SELECTOR)); - -} // namespace script -} // namespace fst - - - -#endif // FST_SCRIPT_RANDGEN_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/register.h b/kaldi_io/src/tools/openfst/include/fst/script/register.h deleted file mode 100644 index 03e0e36..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/register.h +++ /dev/null @@ -1,120 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_REGISTER_H_ -#define FST_SCRIPT_REGISTER_H_ - -#include <string> - -#include <fst/generic-register.h> -#include <fst/script/fst-class.h> -#include <fst/script/weight-class.h> - -// Holds methods and classes responsible for maintaining -// the register for FstClass arc types. - -namespace fst { -namespace script { - -// -// Registers for reading and converting various kinds of FST classes. -// - -// This class definition is to avoid a nested class definition inside -// the IORegistration struct. -template<class Reader, class Creator, class Converter> -struct FstClassRegEntry { - Reader reader; - Creator creator; - Converter converter; - - FstClassRegEntry(Reader r, Creator cr, Converter co) : - reader(r), creator(cr), converter(co) { } - FstClassRegEntry() : reader(0), creator(0), converter(0) { } -}; - -template<class Reader, class Creator, class Converter> -class FstClassIORegister - : public GenericRegister<string, - FstClassRegEntry<Reader, Creator, Converter>, - FstClassIORegister<Reader, Creator, - Converter> > { - public: - Reader GetReader(const string &arc_type) const { - return this->GetEntry(arc_type).reader; - } - - Creator GetCreator(const string &arc_type) const { - return this->GetEntry(arc_type).creator; - } - - Converter GetConverter(const string &arc_type) const { - return this->GetEntry(arc_type).converter; - } - - protected: - virtual string ConvertKeyToSoFilename( - const string& key) const { - string legal_type(key); - ConvertToLegalCSymbol(&legal_type); - - return legal_type + "-arc.so"; - } -}; - -// -// Struct containing everything needed to register a particular type -// of FST class (e.g. a plain FstClass, or a MutableFstClass, etc) -// -template<class FstClassType> -struct IORegistration { - typedef FstClassType *(*Reader)(istream &stream, - const FstReadOptions &opts); - - typedef FstClassImplBase *(*Creator)(); - typedef FstClassImplBase *(*Converter)(const FstClass &other); - - typedef FstClassRegEntry<Reader, Creator, Converter> Entry; - - // FST class Register - typedef FstClassIORegister<Reader, Creator, Converter> Register; - - // FST class Register-er - typedef GenericRegisterer<FstClassIORegister<Reader, Creator, Converter> > - Registerer; -}; - - -// -// REGISTRATION MACROS -// - -#define REGISTER_FST_CLASS(Class, Arc) \ - static IORegistration<Class>::Registerer Class ## _ ## Arc ## _registerer( \ - Arc::Type(), \ - IORegistration<Class>::Entry(Class::Read<Arc>, \ - Class::Create<Arc>, \ - Class::Convert<Arc>)) - -#define REGISTER_FST_CLASSES(Arc) \ - REGISTER_FST_CLASS(FstClass, Arc); \ - REGISTER_FST_CLASS(MutableFstClass, Arc); \ - REGISTER_FST_CLASS(VectorFstClass, Arc); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_REGISTER_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/relabel.h b/kaldi_io/src/tools/openfst/include/fst/script/relabel.h deleted file mode 100644 index 6bbb4c5..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/relabel.h +++ /dev/null @@ -1,102 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_RELABEL_H_ -#define FST_SCRIPT_RELABEL_H_ - -#include <utility> -using std::pair; using std::make_pair; -#include <algorithm> -#include <vector> -using std::vector; - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/relabel.h> - -namespace fst { -namespace script { - -// 1 -typedef args::Package<MutableFstClass *, - const SymbolTable *, const SymbolTable *, bool, - const SymbolTable *, const SymbolTable *, - bool> RelabelArgs1; - -template<class Arc> -void Relabel(RelabelArgs1 *args) { - MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>(); - - Relabel(ofst, args->arg2, args->arg3, args->arg4, - args->arg5, args->arg6, args->arg7); -} - -// 2 -typedef args::Package<MutableFstClass*, - const vector<pair<int64, int64> > &, - const vector<pair<int64, int64> > > RelabelArgs2; - -template<class Arc> -void Relabel(RelabelArgs2 *args) { - MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>(); - - // In case int64 is not the same as Arc::Label, - // copy the reassignments - typedef typename Arc::Label Label; - - vector<pair<Label, Label> > converted_ipairs(args->arg2.size()); - copy(args->arg2.begin(), args->arg2.end(), converted_ipairs.begin()); - - vector<pair<Label, Label> > converted_opairs(args->arg3.size()); - copy(args->arg3.begin(), args->arg3.end(), converted_opairs.begin()); - - Relabel(ofst, converted_ipairs, converted_opairs); -} - -// 3 -typedef args::Package<MutableFstClass*, const SymbolTable*, - const SymbolTable*> RelabelArgs3; -template<class Arc> -void Relabel(args::Package<MutableFstClass*, const SymbolTable*, - const SymbolTable*> *args) { - MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>(); - Relabel(fst, args->arg2, args->arg3); -} - - -// 1 -void Relabel(MutableFstClass *ofst, - const SymbolTable *old_isyms, const SymbolTable *relabel_isyms, - bool attach_new_isyms, - const SymbolTable *old_osyms, const SymbolTable *relabel_osyms, - bool attch_new_osyms); - -// 2 -void Relabel(MutableFstClass *ofst, - const vector<pair<int64, int64> > &ipairs, - const vector<pair<int64, int64> > &opairs); - - -// 3 -void Relabel(MutableFstClass *fst, - const SymbolTable *new_isymbols, - const SymbolTable *new_osymbols); - - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_RELABEL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/replace.h b/kaldi_io/src/tools/openfst/include/fst/script/replace.h deleted file mode 100644 index 5eaf5bf..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/replace.h +++ /dev/null @@ -1,62 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_REPLACE_H_ -#define FST_SCRIPT_REPLACE_H_ - -#include <utility> -using std::pair; using std::make_pair; -#include <vector> -using std::vector; - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/replace.h> - -namespace fst { -namespace script { - -typedef args::Package<const vector<pair<int64, const FstClass *> > &, - MutableFstClass *, const int64, bool> ReplaceArgs; - -template<class Arc> -void Replace(ReplaceArgs *args) { - // Now that we know the arc type, we construct a vector of - // pair<real label, real fst> that the real Replace will use - const vector<pair<int64, const FstClass *> >& untyped_tuples = - args->arg1; - - vector<pair<typename Arc::Label, const Fst<Arc> *> > fst_tuples( - untyped_tuples.size()); - - for (unsigned i = 0; i < untyped_tuples.size(); ++i) { - fst_tuples[i].first = untyped_tuples[i].first; // convert label - fst_tuples[i].second = untyped_tuples[i].second->GetFst<Arc>(); - } - - MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); - - Replace(fst_tuples, ofst, args->arg3, args->arg4); -} - -void Replace(const vector<pair<int64, const FstClass *> > &tuples, - MutableFstClass *ofst, const int64 &root, - bool epsilon_on_replace = false); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_REPLACE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/reverse.h b/kaldi_io/src/tools/openfst/include/fst/script/reverse.h deleted file mode 100644 index 3930875..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/reverse.h +++ /dev/null @@ -1,42 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_REVERSE_H_ -#define FST_SCRIPT_REVERSE_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/reverse.h> - -namespace fst { -namespace script { - -typedef args::Package<const FstClass &, MutableFstClass *> ReverseArgs; - -template<class Arc> -void Reverse(ReverseArgs *args) { - const Fst<Arc> &fst1 = *(args->arg1.GetFst<Arc>()); - MutableFst<Arc> *fst2 = args->arg2->GetMutableFst<Arc>(); - - Reverse(fst1, fst2); -} - -void Reverse(const FstClass &fst1, MutableFstClass *fst2); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_REVERSE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/reweight.h b/kaldi_io/src/tools/openfst/include/fst/script/reweight.h deleted file mode 100644 index 7bce839..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/reweight.h +++ /dev/null @@ -1,53 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_REWEIGHT_H_ -#define FST_SCRIPT_REWEIGHT_H_ - -#include <vector> -using std::vector; - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/script/weight-class.h> -#include <fst/reweight.h> - -namespace fst { -namespace script { - -typedef args::Package<MutableFstClass *, const vector<WeightClass> &, - ReweightType> ReweightArgs; - -template<class Arc> -void Reweight(ReweightArgs *args) { - MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>(); - typedef typename Arc::Weight Weight; - vector<Weight> potentials(args->arg2.size()); - - for (unsigned i = 0; i < args->arg2.size(); ++i) { - potentials[i] = *(args->arg2[i].GetWeight<Weight>()); - } - - Reweight(fst, potentials, args->arg3); -} - -void Reweight(MutableFstClass *fst, const vector<WeightClass> &potential, - ReweightType reweight_type); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_REWEIGHT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/rmepsilon.h b/kaldi_io/src/tools/openfst/include/fst/script/rmepsilon.h deleted file mode 100644 index 62fed03..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/rmepsilon.h +++ /dev/null @@ -1,211 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_RMEPSILON_H_ -#define FST_SCRIPT_RMEPSILON_H_ - -#include <vector> -using std::vector; - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/script/weight-class.h> -#include <fst/script/shortest-distance.h> // for ShortestDistanceOptions -#include <fst/rmepsilon.h> -#include <fst/queue.h> - -// the following is necessary, or SWIG complains mightily about -// shortestdistanceoptions not being defined before being used as a base. -#ifdef SWIG -%include "nlp/fst/script/shortest-distance.h" -#endif - - -namespace fst { -namespace script { - -// -// OPTIONS -// - -struct RmEpsilonOptions : public fst::script::ShortestDistanceOptions { - bool connect; - WeightClass weight_threshold; - int64 state_threshold; - - RmEpsilonOptions(QueueType qt = AUTO_QUEUE, float d = kDelta, bool c = true, - WeightClass w = fst::script::WeightClass::Zero(), - int64 n = kNoStateId) - : ShortestDistanceOptions(qt, EPSILON_ARC_FILTER, - kNoStateId, d), - connect(c), weight_threshold(w), state_threshold(n) { } -}; - - -// -// TEMPLATES -// - -// this function takes care of transforming a script-land RmEpsilonOptions -// into a lib-land RmEpsilonOptions -template<class Arc> -void RmEpsilonHelper(MutableFst<Arc> *fst, - vector<typename Arc::Weight> *distance, - const RmEpsilonOptions &opts) { - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - - typename Arc::Weight weight_thresh = - *(opts.weight_threshold.GetWeight<Weight>()); - - switch (opts.queue_type) { - case AUTO_QUEUE: { - AutoQueue<StateId> queue(*fst, distance, EpsilonArcFilter<Arc>()); - fst::RmEpsilonOptions<Arc, AutoQueue<StateId> > ropts( - &queue, opts.delta, opts.connect, weight_thresh, - opts.state_threshold); - RmEpsilon(fst, distance, ropts); - break; - } - case FIFO_QUEUE: { - FifoQueue<StateId> queue; - fst::RmEpsilonOptions<Arc, FifoQueue<StateId> > ropts( - &queue, opts.delta, opts.connect, weight_thresh, - opts.state_threshold); - RmEpsilon(fst, distance, ropts); - break; - } - case LIFO_QUEUE: { - LifoQueue<StateId> queue; - fst::RmEpsilonOptions<Arc, LifoQueue<StateId> > ropts( - &queue, opts.delta, opts.connect, weight_thresh, - opts.state_threshold); - RmEpsilon(fst, distance, ropts); - break; - } - case SHORTEST_FIRST_QUEUE: { - NaturalShortestFirstQueue<StateId, Weight> queue(*distance); - fst::RmEpsilonOptions<Arc, NaturalShortestFirstQueue<StateId, - Weight> > ropts( - &queue, opts.delta, opts.connect, weight_thresh, - opts.state_threshold); - RmEpsilon(fst, distance, ropts); - break; - } - case STATE_ORDER_QUEUE: { - StateOrderQueue<StateId> queue; - fst::RmEpsilonOptions<Arc, StateOrderQueue<StateId> > ropts( - &queue, opts.delta, opts.connect, weight_thresh, - opts.state_threshold); - RmEpsilon(fst, distance, ropts); - break; - } - case TOP_ORDER_QUEUE: { - TopOrderQueue<StateId> queue(*fst, EpsilonArcFilter<Arc>()); - fst::RmEpsilonOptions<Arc, TopOrderQueue<StateId> > ropts( - &queue, opts.delta, opts.connect, weight_thresh, - opts.state_threshold); - RmEpsilon(fst, distance, ropts); - break; - } - default: - FSTERROR() << "Unknown or unsupported queue type: " << opts.queue_type; - fst->SetProperties(kError, kError); - } -} - -// 1 -typedef args::Package<const FstClass &, MutableFstClass *, - bool, const RmEpsilonOptions &> RmEpsilonArgs1; - -template<class Arc> -void RmEpsilon(RmEpsilonArgs1 *args) { - const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); - vector<typename Arc::Weight> distance; - bool reverse = args->arg3; - - if (reverse) { - VectorFst<Arc> rfst; - Reverse(ifst, &rfst); - RmEpsilonHelper(&rfst, &distance, args->arg4); - Reverse(rfst, ofst); - } else { - *ofst = ifst; - } - RmEpsilonHelper(ofst, &distance, args->arg4); -} - -// 2 -typedef args::Package<MutableFstClass *, bool, - const WeightClass, int64, - float> RmEpsilonArgs2; - -template<class Arc> -void RmEpsilon(RmEpsilonArgs2 *args) { - MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>(); - typename Arc::Weight w = *(args->arg3.GetWeight<typename Arc::Weight>()); - - RmEpsilon(fst, args->arg2, w, args->arg4, args->arg5); -} - -// 3 -typedef args::Package<MutableFstClass *, vector<WeightClass> *, - const RmEpsilonOptions &> RmEpsilonArgs3; - -template<class Arc> -void RmEpsilon(RmEpsilonArgs3 *args) { - MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>(); - const RmEpsilonOptions &opts = args->arg3; - - vector<typename Arc::Weight> weights; - - RmEpsilonHelper(fst, &weights, opts); - - // Copy the weights back - args->arg2->resize(weights.size()); - for (unsigned i = 0; i < weights.size(); ++i) { - (*args->arg2)[i] = WeightClass(weights[i]); - } -} - -// -// PROTOTYPES -// - -// 1 -void RmEpsilon(const FstClass &ifst, MutableFstClass *ofst, - bool reverse = false, - const RmEpsilonOptions& opts = - fst::script::RmEpsilonOptions()); - -// 2 -void RmEpsilon(MutableFstClass *arc, bool connect = true, - const WeightClass &weight_threshold = - fst::script::WeightClass::Zero(), - int64 state_threshold = fst::kNoStateId, - float delta = fst::kDelta); - -// 3 -void RmEpsilon(MutableFstClass *fst, vector<WeightClass> *distance, - const RmEpsilonOptions &opts); - - -} // namespace script -} // namespace fst - - -#endif // FST_SCRIPT_RMEPSILON_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/script-impl.h b/kaldi_io/src/tools/openfst/include/fst/script/script-impl.h deleted file mode 100644 index 452c7c5..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/script-impl.h +++ /dev/null @@ -1,206 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -// This file defines the registration mechanism for new operations. -// These operations are designed to enable scripts to work with FST classes -// at a high level. - -// If you have a new arc type and want these operations to work with FSTs -// with that arc type, see below for the registration steps -// you must take. - -// These methods are only recommended for use in high-level scripting -// applications. Most users should use the lower-level templated versions -// corresponding to these. - -// If you have a new arc type you'd like these operations to work with, -// use the REGISTER_FST_OPERATIONS macro defined in fstcsript.h - -// If you have a custom operation you'd like to define, you need four -// components. In the following, assume you want to create a new operation -// with the signature -// -// void Foo(const FstClass &ifst, MutableFstClass *ofst); -// -// You need: -// -// 1) A way to bundle the args that your new Foo operation will take, as -// a single struct. The template structs in arg-packs.h provide a handy -// way to do this. In Foo's case, that might look like this: -// -// typedef args::Package<const FstClass &, -// MutableFstClass *> FooArgs; -// -// Note: this package of args is going to be passed by non-const pointer. -// -// 2) A function template that is able to perform Foo, given the args and -// arc type. Yours might look like this: -// -// template<class Arc> -// void Foo(FooArgs *args) { -// // Pull out the actual, arc-templated FSTs -// const Fst<Arc> &ifst = args->arg1.GetFst<Arc>(); -// MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); -// -// // actually perform foo on ifst and ofst... -// } -// -// 3) a client-facing function for your operation. This would look like -// the following: -// -// void Foo(const FstClass &ifst, MutableFstClass *ofst) { -// // Check that the arc types of the FSTs match -// if (!ArcTypesMatch(ifst, *ofst, "Foo")) return; -// // package the args -// FooArgs args(ifst, ofst); -// // Finally, call the operation -// Apply<Operation<FooArgs> >("Foo", ifst->ArcType(), &args); -// } -// -// The Apply<> function template takes care of the link between 2 and 3, -// provided you also have: -// -// 4) A registration for your new operation, on the arc types you care about. -// This can be provided easily by the REGISTER_FST_OPERATION macro in -// operations.h: -// -// REGISTER_FST_OPERATION(Foo, StdArc, FooArgs); -// REGISTER_FST_OPERATION(Foo, MyArc, FooArgs); -// // .. etc -// -// -// That's it! Now when you call Foo(const FstClass &, MutableFstClass *), -// it dispatches (in #3) via the Apply<> function to the correct -// instantiation of the template function in #2. -// - - -#ifndef FST_SCRIPT_SCRIPT_IMPL_H_ -#define FST_SCRIPT_SCRIPT_IMPL_H_ - -// -// This file contains general-purpose templates which are used in the -// implementation of the operations. -// - -#include <utility> -using std::pair; using std::make_pair; -#include <string> - -#include <fst/script/fst-class.h> -#include <fst/generic-register.h> -#include <fst/script/arg-packs.h> - -#include <fst/types.h> - -namespace fst { -namespace script { - -// -// A generic register for operations with various kinds of signatures. -// Needed since every function signature requires a new registration class. -// The pair<string, string> is understood to be the operation name and arc -// type; subclasses (or typedefs) need only provide the operation signature. -// - -template<class OperationSignature> -class GenericOperationRegister - : public GenericRegister<pair<string, string>, - OperationSignature, - GenericOperationRegister<OperationSignature> > { - public: - void RegisterOperation(const string &operation_name, - const string &arc_type, - OperationSignature op) { - this->SetEntry(make_pair(operation_name, arc_type), op); - } - - OperationSignature GetOperation( - const string &operation_name, const string &arc_type) { - return this->GetEntry(make_pair(operation_name, arc_type)); - } - - protected: - virtual string ConvertKeyToSoFilename( - const pair<string, string>& key) const { - // Just use the old-style FST for now. - string legal_type(key.second); // the arc type - ConvertToLegalCSymbol(&legal_type); - - return legal_type + "-arc.so"; - } -}; - - -// Operation package - everything you need to register a new type of operation - -// The ArgPack should be the type that's passed into each wrapped function - -// for instance, it might be a struct containing all the args. -// It's always passed by pointer, so const members should be used to enforce -// constness where it's needed. Return values should be implemented as a -// member of ArgPack as well. - -template<class ArgPack> -struct Operation { - typedef ArgPack Args; - typedef void (*OpType)(ArgPack *args); - - // The register (hash) type - typedef GenericOperationRegister<OpType> Register; - - // The register-er type - typedef GenericRegisterer<Register> Registerer; -}; - - -// Macro for registering new types of operations. - -#define REGISTER_FST_OPERATION(Op, Arc, ArgPack) \ - static fst::script::Operation<ArgPack>::Registerer \ - arc_dispatched_operation_ ## ArgPack ## Op ## Arc ## _registerer( \ - make_pair(#Op, Arc::Type()), Op<Arc>) - - -// -// Template function to apply an operation by name -// - -template<class OpReg> -void Apply(const string &op_name, const string &arc_type, - typename OpReg::Args *args) { - typename OpReg::Register *reg = OpReg::Register::GetRegister(); - - typename OpReg::OpType op = reg->GetOperation(op_name, arc_type); - - if (op == 0) { - FSTERROR() << "No operation found for \"" << op_name << "\" on " - << "arc type " << arc_type; - return; - } - - op(args); -} - - -// Helper that logs to ERROR if the arc types of a and b don't match. -// The op_name is also printed. -bool ArcTypesMatch(const FstClass &a, const FstClass &b, - const string &op_name); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_SCRIPT_IMPL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/shortest-distance.h b/kaldi_io/src/tools/openfst/include/fst/script/shortest-distance.h deleted file mode 100644 index 5fc2976..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/shortest-distance.h +++ /dev/null @@ -1,250 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_SHORTEST_DISTANCE_H_ -#define FST_SCRIPT_SHORTEST_DISTANCE_H_ - -#include <vector> -using std::vector; - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/script/weight-class.h> -#include <fst/script/prune.h> // for ArcFilterType -#include <fst/queue.h> // for QueueType -#include <fst/shortest-distance.h> - -namespace fst { -namespace script { - -enum ArcFilterType { ANY_ARC_FILTER, EPSILON_ARC_FILTER, - INPUT_EPSILON_ARC_FILTER, OUTPUT_EPSILON_ARC_FILTER }; - -// See nlp/fst/lib/shortest-distance.h for the template options class -// that this one shadows -struct ShortestDistanceOptions { - const QueueType queue_type; - const ArcFilterType arc_filter_type; - const int64 source; - const float delta; - const bool first_path; - - ShortestDistanceOptions(QueueType qt, ArcFilterType aft, int64 s, - float d) - : queue_type(qt), arc_filter_type(aft), source(s), delta(d), - first_path(false) { } -}; - - - -// 1 -typedef args::Package<const FstClass &, vector<WeightClass> *, - const ShortestDistanceOptions &> ShortestDistanceArgs1; - -template<class Queue, class Arc, class ArcFilter> -struct QueueConstructor { - // template<class Arc, class ArcFilter> - static Queue *Construct(const Fst<Arc> &, - const vector<typename Arc::Weight> *) { - return new Queue(); - } -}; - -// Specializations to deal with AutoQueue, NaturalShortestFirstQueue, -// and TopOrderQueue's different constructors -template<class Arc, class ArcFilter> -struct QueueConstructor<AutoQueue<typename Arc::StateId>, Arc, ArcFilter> { - // template<class Arc, class ArcFilter> - static AutoQueue<typename Arc::StateId> *Construct( - const Fst<Arc> &fst, - const vector<typename Arc::Weight> *distance) { - return new AutoQueue<typename Arc::StateId>(fst, distance, ArcFilter()); - } -}; - -template<class Arc, class ArcFilter> -struct QueueConstructor<NaturalShortestFirstQueue<typename Arc::StateId, - typename Arc::Weight>, - Arc, ArcFilter> { - // template<class Arc, class ArcFilter> - static NaturalShortestFirstQueue<typename Arc::StateId, typename Arc::Weight> - *Construct(const Fst<Arc> &fst, - const vector<typename Arc::Weight> *distance) { - return new NaturalShortestFirstQueue<typename Arc::StateId, - typename Arc::Weight>(*distance); - } -}; - -template<class Arc, class ArcFilter> -struct QueueConstructor<TopOrderQueue<typename Arc::StateId>, Arc, ArcFilter> { - // template<class Arc, class ArcFilter> - static TopOrderQueue<typename Arc::StateId> *Construct( - const Fst<Arc> &fst, const vector<typename Arc::Weight> *weights) { - return new TopOrderQueue<typename Arc::StateId>(fst, ArcFilter()); - } -}; - - -template<class Arc, class Queue> -void ShortestDistanceHelper(ShortestDistanceArgs1 *args) { - const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>()); - const ShortestDistanceOptions &opts = args->arg3; - - vector<typename Arc::Weight> weights; - - switch (opts.arc_filter_type) { - case ANY_ARC_FILTER: { - Queue *queue = - QueueConstructor<Queue, Arc, AnyArcFilter<Arc> >::Construct( - fst, &weights); - fst::ShortestDistanceOptions<Arc, Queue, AnyArcFilter<Arc> > sdopts( - queue, AnyArcFilter<Arc>(), opts.source, opts.delta); - ShortestDistance(fst, &weights, sdopts); - delete queue; - break; - } - case EPSILON_ARC_FILTER: { - Queue *queue = - QueueConstructor<Queue, Arc, AnyArcFilter<Arc> >::Construct( - fst, &weights); - fst::ShortestDistanceOptions<Arc, Queue, - EpsilonArcFilter<Arc> > sdopts( - queue, EpsilonArcFilter<Arc>(), opts.source, opts.delta); - ShortestDistance(fst, &weights, sdopts); - delete queue; - break; - } - case INPUT_EPSILON_ARC_FILTER: { - Queue *queue = - QueueConstructor<Queue, Arc, InputEpsilonArcFilter<Arc> >::Construct( - fst, &weights); - fst::ShortestDistanceOptions<Arc, Queue, - InputEpsilonArcFilter<Arc> > sdopts( - queue, InputEpsilonArcFilter<Arc>(), opts.source, opts.delta); - ShortestDistance(fst, &weights, sdopts); - delete queue; - break; - } - case OUTPUT_EPSILON_ARC_FILTER: { - Queue *queue = - QueueConstructor<Queue, Arc, - OutputEpsilonArcFilter<Arc> >::Construct( - fst, &weights); - fst::ShortestDistanceOptions<Arc, Queue, - OutputEpsilonArcFilter<Arc> > sdopts( - queue, OutputEpsilonArcFilter<Arc>(), opts.source, opts.delta); - ShortestDistance(fst, &weights, sdopts); - delete queue; - break; - } - } - - // Copy the weights back - args->arg2->resize(weights.size()); - for (unsigned i = 0; i < weights.size(); ++i) { - (*args->arg2)[i] = WeightClass(weights[i]); - } -} - -template<class Arc> -void ShortestDistance(ShortestDistanceArgs1 *args) { - const ShortestDistanceOptions &opts = args->arg3; - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - - // Must consider (opts.queue_type x opts.filter_type) options - switch (opts.queue_type) { - default: - FSTERROR() << "Unknown queue type." << opts.queue_type; - - case AUTO_QUEUE: - ShortestDistanceHelper<Arc, AutoQueue<StateId> >(args); - return; - - case FIFO_QUEUE: - ShortestDistanceHelper<Arc, FifoQueue<StateId> >(args); - return; - - case LIFO_QUEUE: - ShortestDistanceHelper<Arc, LifoQueue<StateId> >(args); - return; - - case SHORTEST_FIRST_QUEUE: - ShortestDistanceHelper<Arc, - NaturalShortestFirstQueue<StateId, Weight> >(args); - return; - - case STATE_ORDER_QUEUE: - ShortestDistanceHelper<Arc, StateOrderQueue<StateId> >(args); - return; - - case TOP_ORDER_QUEUE: - ShortestDistanceHelper<Arc, TopOrderQueue<StateId> >(args); - return; - } -} - -// 2 -typedef args::Package<const FstClass&, vector<WeightClass>*, - bool, double> ShortestDistanceArgs2; - -template<class Arc> -void ShortestDistance(ShortestDistanceArgs2 *args) { - const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>()); - vector<typename Arc::Weight> distance; - - ShortestDistance(fst, &distance, args->arg3, args->arg4); - - // convert the typed weights back into weightclass - vector<WeightClass> *retval = args->arg2; - retval->resize(distance.size()); - - for (unsigned i = 0; i < distance.size(); ++i) { - (*retval)[i] = WeightClass(distance[i]); - } -} - -// 3 -typedef args::WithReturnValue<WeightClass, - const FstClass &> ShortestDistanceArgs3; - -template<class Arc> -void ShortestDistance(ShortestDistanceArgs3 *args) { - const Fst<Arc> &fst = *(args->args.GetFst<Arc>()); - - args->retval = WeightClass(ShortestDistance(fst)); -} - - -// 1 -void ShortestDistance(const FstClass &fst, vector<WeightClass> *distance, - const ShortestDistanceOptions &opts); - -// 2 -void ShortestDistance(const FstClass &ifst, vector<WeightClass> *distance, - bool reverse = false, double delta = fst::kDelta); - -#ifndef SWIG -// 3 -WeightClass ShortestDistance(const FstClass &ifst); -#endif - -} // namespace script -} // namespace fst - - - -#endif // FST_SCRIPT_SHORTEST_DISTANCE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/shortest-path.h b/kaldi_io/src/tools/openfst/include/fst/script/shortest-path.h deleted file mode 100644 index b3a3eb9..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/shortest-path.h +++ /dev/null @@ -1,190 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_SHORTEST_PATH_H_ -#define FST_SCRIPT_SHORTEST_PATH_H_ - -#include <vector> -using std::vector; - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/script/weight-class.h> -#include <fst/shortest-path.h> -#include <fst/script/shortest-distance.h> // for ShortestDistanceOptions - -namespace fst { -namespace script { - -struct ShortestPathOptions - : public fst::script::ShortestDistanceOptions { - const size_t nshortest; - const bool unique; - const bool has_distance; - const bool first_path; - const WeightClass weight_threshold; - const int64 state_threshold; - - ShortestPathOptions(QueueType qt, size_t n = 1, - bool u = false, bool hasdist = false, - float d = fst::kDelta, bool fp = false, - WeightClass w = fst::script::WeightClass::Zero(), - int64 s = fst::kNoStateId) - : ShortestDistanceOptions(qt, ANY_ARC_FILTER, kNoStateId, d), - nshortest(n), unique(u), has_distance(hasdist), first_path(fp), - weight_threshold(w), state_threshold(s) { } -}; - -typedef args::Package<const FstClass &, MutableFstClass *, - vector<WeightClass> *, const ShortestPathOptions &> - ShortestPathArgs1; - - -template<class Arc> -void ShortestPath(ShortestPathArgs1 *args) { - const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); - const ShortestPathOptions &opts = args->arg4; - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - typedef AnyArcFilter<Arc> ArcFilter; - - vector<typename Arc::Weight> weights; - typename Arc::Weight weight_threshold = - *(opts.weight_threshold.GetWeight<Weight>()); - - switch (opts.queue_type) { - case AUTO_QUEUE: { - typedef AutoQueue<StateId> Queue; - Queue *queue = QueueConstructor<Queue, Arc, - ArcFilter>::Construct(ifst, &weights); - fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts( - queue, ArcFilter(), opts.nshortest, opts.unique, - opts.has_distance, opts.delta, opts.first_path, - weight_threshold, opts.state_threshold); - ShortestPath(ifst, ofst, &weights, spopts); - delete queue; - return; - } - case FIFO_QUEUE: { - typedef FifoQueue<StateId> Queue; - Queue *queue = QueueConstructor<Queue, Arc, - ArcFilter>::Construct(ifst, &weights); - fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts( - queue, ArcFilter(), opts.nshortest, opts.unique, - opts.has_distance, opts.delta, opts.first_path, - weight_threshold, opts.state_threshold); - ShortestPath(ifst, ofst, &weights, spopts); - delete queue; - return; - } - case LIFO_QUEUE: { - typedef LifoQueue<StateId> Queue; - Queue *queue = QueueConstructor<Queue, Arc, - ArcFilter >::Construct(ifst, &weights); - fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts( - queue, ArcFilter(), opts.nshortest, opts.unique, - opts.has_distance, opts.delta, opts.first_path, - weight_threshold, opts.state_threshold); - ShortestPath(ifst, ofst, &weights, spopts); - delete queue; - return; - } - case SHORTEST_FIRST_QUEUE: { - typedef NaturalShortestFirstQueue<StateId, Weight> Queue; - Queue *queue = QueueConstructor<Queue, Arc, - ArcFilter>::Construct(ifst, &weights); - fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts( - queue, ArcFilter(), opts.nshortest, opts.unique, - opts.has_distance, opts.delta, opts.first_path, - weight_threshold, opts.state_threshold); - ShortestPath(ifst, ofst, &weights, spopts); - delete queue; - return; - } - case STATE_ORDER_QUEUE: { - typedef StateOrderQueue<StateId> Queue; - Queue *queue = QueueConstructor<Queue, Arc, - ArcFilter>::Construct(ifst, &weights); - fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts( - queue, ArcFilter(), opts.nshortest, opts.unique, - opts.has_distance, opts.delta, opts.first_path, - weight_threshold, opts.state_threshold); - ShortestPath(ifst, ofst, &weights, spopts); - delete queue; - return; - } - case TOP_ORDER_QUEUE: { - typedef TopOrderQueue<StateId> Queue; - Queue *queue = QueueConstructor<Queue, Arc, - ArcFilter>::Construct(ifst, &weights); - fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts( - queue, ArcFilter(), opts.nshortest, opts.unique, - opts.has_distance, opts.delta, opts.first_path, - weight_threshold, opts.state_threshold); - ShortestPath(ifst, ofst, &weights, spopts); - delete queue; - return; - } - default: - FSTERROR() << "Unknown queue type: " << opts.queue_type; - ofst->SetProperties(kError, kError); - } - - // Copy the weights back - args->arg3->resize(weights.size()); - for (unsigned i = 0; i < weights.size(); ++i) { - (*args->arg3)[i] = WeightClass(weights[i]); - } -} - -// 2 -typedef args::Package<const FstClass &, MutableFstClass *, - size_t, bool, bool, WeightClass, - int64> ShortestPathArgs2; - -template<class Arc> -void ShortestPath(ShortestPathArgs2 *args) { - const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); - typename Arc::Weight weight_threshold = - *(args->arg6.GetWeight<typename Arc::Weight>()); - - ShortestPath(ifst, ofst, args->arg3, args->arg4, args->arg5, - weight_threshold, args->arg7); -} - - -// 1 -void ShortestPath(const FstClass &ifst, MutableFstClass *ofst, - vector<WeightClass> *distance, - const ShortestPathOptions &opts); - - -// 2 -void ShortestPath(const FstClass &ifst, MutableFstClass *ofst, - size_t n = 1, bool unique = false, - bool first_path = false, - WeightClass weight_threshold = - fst::script::WeightClass::Zero(), - int64 state_threshold = fst::kNoStateId); - -} // namespace script -} // namespace fst - - - -#endif // FST_SCRIPT_SHORTEST_PATH_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/symbols.h b/kaldi_io/src/tools/openfst/include/fst/script/symbols.h deleted file mode 100644 index 927600a..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/symbols.h +++ /dev/null @@ -1,20 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_SYMBOLS_H_ -#define FST_SCRIPT_SYMBOLS_H_ - -#endif // FST_SCRIPT_SYMBOLS_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/synchronize.h b/kaldi_io/src/tools/openfst/include/fst/script/synchronize.h deleted file mode 100644 index 3c0c905..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/synchronize.h +++ /dev/null @@ -1,42 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_SYNCHRONIZE_H_ -#define FST_SCRIPT_SYNCHRONIZE_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/synchronize.h> - -namespace fst { -namespace script { - -typedef args::Package<const FstClass &, MutableFstClass *> SynchronizeArgs; - -template<class Arc> -void Synchronize(SynchronizeArgs *args) { - const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); - MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); - - Synchronize(ifst, ofst); -} - -void Synchronize(const FstClass &ifst, MutableFstClass *ofst); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_SYNCHRONIZE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/text-io.h b/kaldi_io/src/tools/openfst/include/fst/script/text-io.h deleted file mode 100644 index d97a007..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/text-io.h +++ /dev/null @@ -1,51 +0,0 @@ -// text-io.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// Modified: [email protected] (Jake Ratkiewicz) to work with generic WeightClass -// -// \file -// Utilities for reading and writing textual strings representing -// states, labels, and weights and files specifying label-label pairs -// and potentials (state-weight pairs). -// - -#ifndef FST_SCRIPT_TEXT_IO_H__ -#define FST_SCRIPT_TEXT_IO_H__ - -#include <string> -#include <vector> -using std::vector; - - -#include <iostream> -#include <fstream> -#include <sstream> -#include <fst/script/weight-class.h> - -namespace fst { -namespace script { - -bool ReadPotentials(const string &weight_type, - const string& filename, - vector<WeightClass>* potential); - -bool WritePotentials(const string& filename, - const vector<WeightClass>& potential); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_TEXT_IO_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/topsort.h b/kaldi_io/src/tools/openfst/include/fst/script/topsort.h deleted file mode 100644 index 4e27e48..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/topsort.h +++ /dev/null @@ -1,40 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_TOPSORT_H_ -#define FST_SCRIPT_TOPSORT_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/topsort.h> - -namespace fst { -namespace script { - -typedef args::WithReturnValue<bool, MutableFstClass*> TopSortArgs; - -template<class Arc> -void TopSort(TopSortArgs *args) { - MutableFst<Arc> *fst = args->args->GetMutableFst<Arc>(); - args->retval = TopSort(fst); -} - -bool TopSort(MutableFstClass *fst); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_TOPSORT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/union.h b/kaldi_io/src/tools/openfst/include/fst/script/union.h deleted file mode 100644 index 780e484..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/union.h +++ /dev/null @@ -1,42 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -#ifndef FST_SCRIPT_UNION_H_ -#define FST_SCRIPT_UNION_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/union.h> - -namespace fst { -namespace script { - -typedef args::Package<MutableFstClass *, const FstClass &> UnionArgs; - -template<class Arc> -void Union(UnionArgs *args) { - MutableFst<Arc> *fst1 = args->arg1->GetMutableFst<Arc>(); - const Fst<Arc> &fst2 = *(args->arg2.GetFst<Arc>()); - - Union(fst1, fst2); -} - -void Union(MutableFstClass *fst1, const FstClass &fst2); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_UNION_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/verify.h b/kaldi_io/src/tools/openfst/include/fst/script/verify.h deleted file mode 100644 index 6904003..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/verify.h +++ /dev/null @@ -1,40 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jeffrey Sorensen) - -#ifndef FST_SCRIPT_VERIFY_H_ -#define FST_SCRIPT_VERIFY_H_ - -#include <fst/script/arg-packs.h> -#include <fst/script/fst-class.h> -#include <fst/verify.h> - -namespace fst { -namespace script { - -typedef args::WithReturnValue<bool, const FstClass *> VerifyArgs; - -template<class Arc> -void Verify(VerifyArgs *args) { - const Fst<Arc> *fst = args->args->GetFst<Arc>(); - args->retval = Verify(*fst); -} - -bool Verify(const FstClass &fst1); - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_VERIFY_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/weight-class.h b/kaldi_io/src/tools/openfst/include/fst/script/weight-class.h deleted file mode 100644 index b9f7ddf..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/script/weight-class.h +++ /dev/null @@ -1,223 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jake Ratkiewicz) - -// Represents a generic weight in an FST -- that is, represents a specific -// type of weight underneath while hiding that type from a client. - - -#ifndef FST_SCRIPT_WEIGHT_CLASS_H_ -#define FST_SCRIPT_WEIGHT_CLASS_H_ - -#include <string> - -#include <fst/generic-register.h> -#include <fst/util.h> - -namespace fst { -namespace script { - -class WeightImplBase { - public: - virtual WeightImplBase *Copy() const = 0; - virtual void Print(ostream *o) const = 0; - virtual const string &Type() const = 0; - virtual string to_string() const = 0; - virtual bool operator == (const WeightImplBase &other) const = 0; - virtual ~WeightImplBase() { } -}; - -template<class W> -struct WeightClassImpl : public WeightImplBase { - W weight; - - explicit WeightClassImpl(const W& weight) : weight(weight) { } - - virtual WeightClassImpl<W> *Copy() const { - return new WeightClassImpl<W>(weight); - } - - virtual const string &Type() const { return W::Type(); } - - virtual void Print(ostream *o) const { - *o << weight; - } - - virtual string to_string() const { - string str; - WeightToStr(weight, &str); - return str; - } - - virtual bool operator == (const WeightImplBase &other) const { - if (Type() != other.Type()) { - return false; - } else { - const WeightClassImpl<W> *typed_other = - static_cast<const WeightClassImpl<W> *>(&other); - - return typed_other->weight == weight; - } - } -}; - - -class WeightClass { - public: - WeightClass() : element_type_(ZERO), impl_(0) { } - - template<class W> - explicit WeightClass(const W& weight) - : element_type_(OTHER), impl_(new WeightClassImpl<W>(weight)) { } - - WeightClass(const string &weight_type, const string &weight_str); - - WeightClass(const WeightClass &other) : - element_type_(other.element_type_), - impl_(other.impl_ ? other.impl_->Copy() : 0) { } - - WeightClass &operator = (const WeightClass &other) { - if (impl_) delete impl_; - impl_ = other.impl_ ? other.impl_->Copy() : 0; - element_type_ = other.element_type_; - return *this; - } - - template<class W> - const W* GetWeight() const; - - string to_string() const { - switch (element_type_) { - case ZERO: - return "ZERO"; - case ONE: - return "ONE"; - default: - case OTHER: - return impl_->to_string(); - } - } - - bool operator == (const WeightClass &other) const { - return element_type_ == other.element_type_ && - ((impl_ && other.impl_ && (*impl_ == *other.impl_)) || - (impl_ == 0 && other.impl_ == 0)); - } - - static const WeightClass &Zero() { - static WeightClass w(ZERO); - - return w; - } - - static const WeightClass &One() { - static WeightClass w(ONE); - - return w; - } - - const string &Type() const { - if (impl_) return impl_->Type(); - static const string no_type = "none"; - return no_type; - } - - - ~WeightClass() { if (impl_) delete impl_; } - private: - enum ElementType { ZERO, ONE, OTHER }; - ElementType element_type_; - - WeightImplBase *impl_; - - explicit WeightClass(ElementType et) : element_type_(et), impl_(0) { } - - friend ostream &operator << (ostream &o, const WeightClass &c); -}; - -template<class W> -const W* WeightClass::GetWeight() const { - // We need to store zero and one as statics, because the weight type - // W might return them as temporaries. We're returning a pointer, - // and it won't do to get the address of a temporary. - static const W zero = W::Zero(); - static const W one = W::One(); - - if (element_type_ == ZERO) { - return &zero; - } else if (element_type_ == ONE) { - return &one; - } else { - if (W::Type() != impl_->Type()) { - return NULL; - } else { - WeightClassImpl<W> *typed_impl = - static_cast<WeightClassImpl<W> *>(impl_); - return &typed_impl->weight; - } - } -} - -// -// Registration for generic weight types. -// - -typedef WeightImplBase* (*StrToWeightImplBaseT)(const string &str, - const string &src, - size_t nline); - -template<class W> -WeightImplBase* StrToWeightImplBase(const string &str, - const string &src, size_t nline) { - return new WeightClassImpl<W>(StrToWeight<W>(str, src, nline)); -} - -// The following confuses swig, and doesn't need to be wrapped anyway. -#ifndef SWIG -ostream& operator << (ostream &o, const WeightClass &c); - -class WeightClassRegister : public GenericRegister<string, - StrToWeightImplBaseT, - WeightClassRegister> { - protected: - virtual string ConvertKeyToSoFilename(const string &key) const { - return key + ".so"; - } -}; - -typedef GenericRegisterer<WeightClassRegister> WeightClassRegisterer; -#endif - -// internal version, needs to be called by wrapper in order for -// macro args to expand -#define REGISTER_FST_WEIGHT__(Weight, line) \ - static WeightClassRegisterer weight_registerer ## _ ## line( \ - Weight::Type(), \ - StrToWeightImplBase<Weight>) - -// This layer is where __FILE__ and __LINE__ are expanded -#define REGISTER_FST_WEIGHT_EXPANDER(Weight, line) \ - REGISTER_FST_WEIGHT__(Weight, line) - -// -// Macro for registering new weight types. Clients call this. -// -#define REGISTER_FST_WEIGHT(Weight) \ - REGISTER_FST_WEIGHT_EXPANDER(Weight, __LINE__) - -} // namespace script -} // namespace fst - -#endif // FST_SCRIPT_WEIGHT_CLASS_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/shortest-distance.h b/kaldi_io/src/tools/openfst/include/fst/shortest-distance.h deleted file mode 100644 index ec47a14..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/shortest-distance.h +++ /dev/null @@ -1,348 +0,0 @@ -// shortest-distance.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// Functions and classes to find shortest distance in an FST. - -#ifndef FST_LIB_SHORTEST_DISTANCE_H__ -#define FST_LIB_SHORTEST_DISTANCE_H__ - -#include <deque> -using std::deque; -#include <vector> -using std::vector; - -#include <fst/arcfilter.h> -#include <fst/cache.h> -#include <fst/queue.h> -#include <fst/reverse.h> -#include <fst/test-properties.h> - - -namespace fst { - -template <class Arc, class Queue, class ArcFilter> -struct ShortestDistanceOptions { - typedef typename Arc::StateId StateId; - - Queue *state_queue; // Queue discipline used; owned by caller - ArcFilter arc_filter; // Arc filter (e.g., limit to only epsilon graph) - StateId source; // If kNoStateId, use the Fst's initial state - float delta; // Determines the degree of convergence required - bool first_path; // For a semiring with the path property (o.w. - // undefined), compute the shortest-distances along - // along the first path to a final state found - // by the algorithm. That path is the shortest-path - // only if the FST has a unique final state (or all - // the final states have the same final weight), the - // queue discipline is shortest-first and all the - // weights in the FST are between One() and Zero() - // according to NaturalLess. - - ShortestDistanceOptions(Queue *q, ArcFilter filt, StateId src = kNoStateId, - float d = kDelta) - : state_queue(q), arc_filter(filt), source(src), delta(d), - first_path(false) {} -}; - - -// Computation state of the shortest-distance algorithm. Reusable -// information is maintained across calls to member function -// ShortestDistance(source) when 'retain' is true for improved -// efficiency when calling multiple times from different source states -// (e.g., in epsilon removal). Contrary to usual conventions, 'fst' -// may not be freed before this class. Vector 'distance' should not be -// modified by the user between these calls. -// The Error() method returns true if an error was encountered. -template<class Arc, class Queue, class ArcFilter> -class ShortestDistanceState { - public: - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - - ShortestDistanceState( - const Fst<Arc> &fst, - vector<Weight> *distance, - const ShortestDistanceOptions<Arc, Queue, ArcFilter> &opts, - bool retain) - : fst_(fst), distance_(distance), state_queue_(opts.state_queue), - arc_filter_(opts.arc_filter), delta_(opts.delta), - first_path_(opts.first_path), retain_(retain), source_id_(0), - error_(false) { - distance_->clear(); - } - - ~ShortestDistanceState() {} - - void ShortestDistance(StateId source); - - bool Error() const { return error_; } - - private: - const Fst<Arc> &fst_; - vector<Weight> *distance_; - Queue *state_queue_; - ArcFilter arc_filter_; - float delta_; - bool first_path_; - bool retain_; // Retain and reuse information across calls - - vector<Weight> rdistance_; // Relaxation distance. - vector<bool> enqueued_; // Is state enqueued? - vector<StateId> sources_; // Source ID for ith state in 'distance_', - // 'rdistance_', and 'enqueued_' if retained. - StateId source_id_; // Unique ID characterizing each call to SD - - bool error_; -}; - -// Compute the shortest distance. If 'source' is kNoStateId, use -// the initial state of the Fst. -template <class Arc, class Queue, class ArcFilter> -void ShortestDistanceState<Arc, Queue, ArcFilter>::ShortestDistance( - StateId source) { - if (fst_.Start() == kNoStateId) { - if (fst_.Properties(kError, false)) error_ = true; - return; - } - - if (!(Weight::Properties() & kRightSemiring)) { - FSTERROR() << "ShortestDistance: Weight needs to be right distributive: " - << Weight::Type(); - error_ = true; - return; - } - - if (first_path_ && !(Weight::Properties() & kPath)) { - FSTERROR() << "ShortestDistance: first_path option disallowed when " - << "Weight does not have the path property: " - << Weight::Type(); - error_ = true; - return; - } - - state_queue_->Clear(); - - if (!retain_) { - distance_->clear(); - rdistance_.clear(); - enqueued_.clear(); - } - - if (source == kNoStateId) - source = fst_.Start(); - - while (distance_->size() <= source) { - distance_->push_back(Weight::Zero()); - rdistance_.push_back(Weight::Zero()); - enqueued_.push_back(false); - } - if (retain_) { - while (sources_.size() <= source) - sources_.push_back(kNoStateId); - sources_[source] = source_id_; - } - (*distance_)[source] = Weight::One(); - rdistance_[source] = Weight::One(); - enqueued_[source] = true; - - state_queue_->Enqueue(source); - - while (!state_queue_->Empty()) { - StateId s = state_queue_->Head(); - state_queue_->Dequeue(); - while (distance_->size() <= s) { - distance_->push_back(Weight::Zero()); - rdistance_.push_back(Weight::Zero()); - enqueued_.push_back(false); - } - if (first_path_ && (fst_.Final(s) != Weight::Zero())) - break; - enqueued_[s] = false; - Weight r = rdistance_[s]; - rdistance_[s] = Weight::Zero(); - for (ArcIterator< Fst<Arc> > aiter(fst_, s); - !aiter.Done(); - aiter.Next()) { - const Arc &arc = aiter.Value(); - if (!arc_filter_(arc)) - continue; - while (distance_->size() <= arc.nextstate) { - distance_->push_back(Weight::Zero()); - rdistance_.push_back(Weight::Zero()); - enqueued_.push_back(false); - } - if (retain_) { - while (sources_.size() <= arc.nextstate) - sources_.push_back(kNoStateId); - if (sources_[arc.nextstate] != source_id_) { - (*distance_)[arc.nextstate] = Weight::Zero(); - rdistance_[arc.nextstate] = Weight::Zero(); - enqueued_[arc.nextstate] = false; - sources_[arc.nextstate] = source_id_; - } - } - Weight &nd = (*distance_)[arc.nextstate]; - Weight &nr = rdistance_[arc.nextstate]; - Weight w = Times(r, arc.weight); - if (!ApproxEqual(nd, Plus(nd, w), delta_)) { - nd = Plus(nd, w); - nr = Plus(nr, w); - if (!nd.Member() || !nr.Member()) { - error_ = true; - return; - } - if (!enqueued_[arc.nextstate]) { - state_queue_->Enqueue(arc.nextstate); - enqueued_[arc.nextstate] = true; - } else { - state_queue_->Update(arc.nextstate); - } - } - } - } - ++source_id_; - if (fst_.Properties(kError, false)) error_ = true; -} - - -// Shortest-distance algorithm: this version allows fine control -// via the options argument. See below for a simpler interface. -// -// This computes the shortest distance from the 'opts.source' state to -// each visited state S and stores the value in the 'distance' vector. -// An unvisited state S has distance Zero(), which will be stored in -// the 'distance' vector if S is less than the maximum visited state. -// The state queue discipline, arc filter, and convergence delta are -// taken in the options argument. -// The 'distance' vector will contain a unique element for which -// Member() is false if an error was encountered. -// -// The weights must must be right distributive and k-closed (i.e., 1 + -// x + x^2 + ... + x^(k +1) = 1 + x + x^2 + ... + x^k). -// -// The algorithm is from Mohri, "Semiring Framweork and Algorithms for -// Shortest-Distance Problems", Journal of Automata, Languages and -// Combinatorics 7(3):321-350, 2002. The complexity of algorithm -// depends on the properties of the semiring and the queue discipline -// used. Refer to the paper for more details. -template<class Arc, class Queue, class ArcFilter> -void ShortestDistance( - const Fst<Arc> &fst, - vector<typename Arc::Weight> *distance, - const ShortestDistanceOptions<Arc, Queue, ArcFilter> &opts) { - - ShortestDistanceState<Arc, Queue, ArcFilter> - sd_state(fst, distance, opts, false); - sd_state.ShortestDistance(opts.source); - if (sd_state.Error()) { - distance->clear(); - distance->resize(1, Arc::Weight::NoWeight()); - } -} - -// Shortest-distance algorithm: simplified interface. See above for a -// version that allows finer control. -// -// If 'reverse' is false, this computes the shortest distance from the -// initial state to each state S and stores the value in the -// 'distance' vector. If 'reverse' is true, this computes the shortest -// distance from each state to the final states. An unvisited state S -// has distance Zero(), which will be stored in the 'distance' vector -// if S is less than the maximum visited state. The state queue -// discipline is automatically-selected. -// The 'distance' vector will contain a unique element for which -// Member() is false if an error was encountered. -// -// The weights must must be right (left) distributive if reverse is -// false (true) and k-closed (i.e., 1 + x + x^2 + ... + x^(k +1) = 1 + -// x + x^2 + ... + x^k). -// -// The algorithm is from Mohri, "Semiring Framweork and Algorithms for -// Shortest-Distance Problems", Journal of Automata, Languages and -// Combinatorics 7(3):321-350, 2002. The complexity of algorithm -// depends on the properties of the semiring and the queue discipline -// used. Refer to the paper for more details. -template<class Arc> -void ShortestDistance(const Fst<Arc> &fst, - vector<typename Arc::Weight> *distance, - bool reverse = false, - float delta = kDelta) { - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - - if (!reverse) { - AnyArcFilter<Arc> arc_filter; - AutoQueue<StateId> state_queue(fst, distance, arc_filter); - ShortestDistanceOptions< Arc, AutoQueue<StateId>, AnyArcFilter<Arc> > - opts(&state_queue, arc_filter); - opts.delta = delta; - ShortestDistance(fst, distance, opts); - } else { - typedef ReverseArc<Arc> ReverseArc; - typedef typename ReverseArc::Weight ReverseWeight; - AnyArcFilter<ReverseArc> rarc_filter; - VectorFst<ReverseArc> rfst; - Reverse(fst, &rfst); - vector<ReverseWeight> rdistance; - AutoQueue<StateId> state_queue(rfst, &rdistance, rarc_filter); - ShortestDistanceOptions< ReverseArc, AutoQueue<StateId>, - AnyArcFilter<ReverseArc> > - ropts(&state_queue, rarc_filter); - ropts.delta = delta; - ShortestDistance(rfst, &rdistance, ropts); - distance->clear(); - if (rdistance.size() == 1 && !rdistance[0].Member()) { - distance->resize(1, Arc::Weight::NoWeight()); - return; - } - while (distance->size() < rdistance.size() - 1) - distance->push_back(rdistance[distance->size() + 1].Reverse()); - } -} - - -// Return the sum of the weight of all successful paths in an FST, i.e., -// the shortest-distance from the initial state to the final states. -// Returns a weight such that Member() is false if an error was encountered. -template <class Arc> -typename Arc::Weight ShortestDistance(const Fst<Arc> &fst, float delta = kDelta) { - typedef typename Arc::Weight Weight; - typedef typename Arc::StateId StateId; - vector<Weight> distance; - if (Weight::Properties() & kRightSemiring) { - ShortestDistance(fst, &distance, false, delta); - if (distance.size() == 1 && !distance[0].Member()) - return Arc::Weight::NoWeight(); - Weight sum = Weight::Zero(); - for (StateId s = 0; s < distance.size(); ++s) - sum = Plus(sum, Times(distance[s], fst.Final(s))); - return sum; - } else { - ShortestDistance(fst, &distance, true, delta); - StateId s = fst.Start(); - if (distance.size() == 1 && !distance[0].Member()) - return Arc::Weight::NoWeight(); - return s != kNoStateId && s < distance.size() ? - distance[s] : Weight::Zero(); - } -} - - -} // namespace fst - -#endif // FST_LIB_SHORTEST_DISTANCE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/shortest-path.h b/kaldi_io/src/tools/openfst/include/fst/shortest-path.h deleted file mode 100644 index 9cd13d9..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/shortest-path.h +++ /dev/null @@ -1,501 +0,0 @@ -// shortest-path.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// Functions to find shortest paths in an FST. - -#ifndef FST_LIB_SHORTEST_PATH_H__ -#define FST_LIB_SHORTEST_PATH_H__ - -#include <functional> -#include <utility> -using std::pair; using std::make_pair; -#include <vector> -using std::vector; - -#include <fst/cache.h> -#include <fst/determinize.h> -#include <fst/queue.h> -#include <fst/shortest-distance.h> -#include <fst/test-properties.h> - - -namespace fst { - -template <class Arc, class Queue, class ArcFilter> -struct ShortestPathOptions - : public ShortestDistanceOptions<Arc, Queue, ArcFilter> { - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - size_t nshortest; // return n-shortest paths - bool unique; // only return paths with distinct input strings - bool has_distance; // distance vector already contains the - // shortest distance from the initial state - bool first_path; // Single shortest path stops after finding the first - // path to a final state. That path is the shortest path - // only when using the ShortestFirstQueue and - // only when all the weights in the FST are between - // One() and Zero() according to NaturalLess. - Weight weight_threshold; // pruning weight threshold. - StateId state_threshold; // pruning state threshold. - - ShortestPathOptions(Queue *q, ArcFilter filt, size_t n = 1, bool u = false, - bool hasdist = false, float d = kDelta, - bool fp = false, Weight w = Weight::Zero(), - StateId s = kNoStateId) - : ShortestDistanceOptions<Arc, Queue, ArcFilter>(q, filt, kNoStateId, d), - nshortest(n), unique(u), has_distance(hasdist), first_path(fp), - weight_threshold(w), state_threshold(s) {} -}; - - -// Shortest-path algorithm: normally not called directly; prefer -// 'ShortestPath' below with n=1. 'ofst' contains the shortest path in -// 'ifst'. 'distance' returns the shortest distances from the source -// state to each state in 'ifst'. 'opts' is used to specify options -// such as the queue discipline, the arc filter and delta. -// -// The shortest path is the lowest weight path w.r.t. the natural -// semiring order. -// -// The weights need to be right distributive and have the path (kPath) -// property. -template<class Arc, class Queue, class ArcFilter> -void SingleShortestPath(const Fst<Arc> &ifst, - MutableFst<Arc> *ofst, - vector<typename Arc::Weight> *distance, - ShortestPathOptions<Arc, Queue, ArcFilter> &opts) { - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - - ofst->DeleteStates(); - ofst->SetInputSymbols(ifst.InputSymbols()); - ofst->SetOutputSymbols(ifst.OutputSymbols()); - - if (ifst.Start() == kNoStateId) { - if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError); - return; - } - - vector<bool> enqueued; - vector<StateId> parent; - vector<Arc> arc_parent; - - Queue *state_queue = opts.state_queue; - StateId source = opts.source == kNoStateId ? ifst.Start() : opts.source; - Weight f_distance = Weight::Zero(); - StateId f_parent = kNoStateId; - - distance->clear(); - state_queue->Clear(); - if (opts.nshortest != 1) { - FSTERROR() << "SingleShortestPath: for nshortest > 1, use ShortestPath" - << " instead"; - ofst->SetProperties(kError, kError); - return; - } - if (opts.weight_threshold != Weight::Zero() || - opts.state_threshold != kNoStateId) { - FSTERROR() << - "SingleShortestPath: weight and state thresholds not applicable"; - ofst->SetProperties(kError, kError); - return; - } - if ((Weight::Properties() & (kPath | kRightSemiring)) - != (kPath | kRightSemiring)) { - FSTERROR() << "SingleShortestPath: Weight needs to have the path" - << " property and be right distributive: " << Weight::Type(); - ofst->SetProperties(kError, kError); - return; - } - while (distance->size() < source) { - distance->push_back(Weight::Zero()); - enqueued.push_back(false); - parent.push_back(kNoStateId); - arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId)); - } - distance->push_back(Weight::One()); - parent.push_back(kNoStateId); - arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId)); - state_queue->Enqueue(source); - enqueued.push_back(true); - - while (!state_queue->Empty()) { - StateId s = state_queue->Head(); - state_queue->Dequeue(); - enqueued[s] = false; - Weight sd = (*distance)[s]; - if (ifst.Final(s) != Weight::Zero()) { - Weight w = Times(sd, ifst.Final(s)); - if (f_distance != Plus(f_distance, w)) { - f_distance = Plus(f_distance, w); - f_parent = s; - } - if (!f_distance.Member()) { - ofst->SetProperties(kError, kError); - return; - } - if (opts.first_path) - break; - } - for (ArcIterator< Fst<Arc> > aiter(ifst, s); - !aiter.Done(); - aiter.Next()) { - const Arc &arc = aiter.Value(); - while (distance->size() <= arc.nextstate) { - distance->push_back(Weight::Zero()); - enqueued.push_back(false); - parent.push_back(kNoStateId); - arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(), - kNoStateId)); - } - Weight &nd = (*distance)[arc.nextstate]; - Weight w = Times(sd, arc.weight); - if (nd != Plus(nd, w)) { - nd = Plus(nd, w); - if (!nd.Member()) { - ofst->SetProperties(kError, kError); - return; - } - parent[arc.nextstate] = s; - arc_parent[arc.nextstate] = arc; - if (!enqueued[arc.nextstate]) { - state_queue->Enqueue(arc.nextstate); - enqueued[arc.nextstate] = true; - } else { - state_queue->Update(arc.nextstate); - } - } - } - } - - StateId s_p = kNoStateId, d_p = kNoStateId; - for (StateId s = f_parent, d = kNoStateId; - s != kNoStateId; - d = s, s = parent[s]) { - d_p = s_p; - s_p = ofst->AddState(); - if (d == kNoStateId) { - ofst->SetFinal(s_p, ifst.Final(f_parent)); - } else { - arc_parent[d].nextstate = d_p; - ofst->AddArc(s_p, arc_parent[d]); - } - } - ofst->SetStart(s_p); - if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError); - ofst->SetProperties( - ShortestPathProperties(ofst->Properties(kFstProperties, false)), - kFstProperties); -} - - -template <class S, class W> -class ShortestPathCompare { - public: - typedef S StateId; - typedef W Weight; - typedef pair<StateId, Weight> Pair; - - ShortestPathCompare(const vector<Pair>& pairs, - const vector<Weight>& distance, - StateId sfinal, float d) - : pairs_(pairs), distance_(distance), superfinal_(sfinal), delta_(d) {} - - bool operator()(const StateId x, const StateId y) const { - const Pair &px = pairs_[x]; - const Pair &py = pairs_[y]; - Weight dx = px.first == superfinal_ ? Weight::One() : - px.first < distance_.size() ? distance_[px.first] : Weight::Zero(); - Weight dy = py.first == superfinal_ ? Weight::One() : - py.first < distance_.size() ? distance_[py.first] : Weight::Zero(); - Weight wx = Times(dx, px.second); - Weight wy = Times(dy, py.second); - // Penalize complete paths to ensure correct results with inexact weights. - // This forms a strict weak order so long as ApproxEqual(a, b) => - // ApproxEqual(a, c) for all c s.t. less_(a, c) && less_(c, b). - if (px.first == superfinal_ && py.first != superfinal_) { - return less_(wy, wx) || ApproxEqual(wx, wy, delta_); - } else if (py.first == superfinal_ && px.first != superfinal_) { - return less_(wy, wx) && !ApproxEqual(wx, wy, delta_); - } else { - return less_(wy, wx); - } - } - - private: - const vector<Pair> &pairs_; - const vector<Weight> &distance_; - StateId superfinal_; - float delta_; - NaturalLess<Weight> less_; -}; - - -// N-Shortest-path algorithm: implements the core n-shortest path -// algorithm. The output is built REVERSED. See below for versions with -// more options and not reversed. -// -// 'ofst' contains the REVERSE of 'n'-shortest paths in 'ifst'. -// 'distance' must contain the shortest distance from each state to a final -// state in 'ifst'. 'delta' is the convergence delta. -// -// The n-shortest paths are the n-lowest weight paths w.r.t. the -// natural semiring order. The single path that can be read from the -// ith of at most n transitions leaving the initial state of 'ofst' is -// the ith shortest path. Disregarding the initial state and initial -// transitions, the n-shortest paths, in fact, form a tree rooted at -// the single final state. -// -// The weights need to be left and right distributive (kSemiring) and -// have the path (kPath) property. -// -// The algorithm is from Mohri and Riley, "An Efficient Algorithm for -// the n-best-strings problem", ICSLP 2002. The algorithm relies on -// the shortest-distance algorithm. There are some issues with the -// pseudo-code as written in the paper (viz., line 11). -// -// IMPLEMENTATION NOTE: The input fst 'ifst' can be a delayed fst and -// and at any state in its expansion the values of distance vector need only -// be defined at that time for the states that are known to exist. -template<class Arc, class RevArc> -void NShortestPath(const Fst<RevArc> &ifst, - MutableFst<Arc> *ofst, - const vector<typename Arc::Weight> &distance, - size_t n, - float delta = kDelta, - typename Arc::Weight weight_threshold = Arc::Weight::Zero(), - typename Arc::StateId state_threshold = kNoStateId) { - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - typedef pair<StateId, Weight> Pair; - typedef typename RevArc::Weight RevWeight; - - if (n <= 0) return; - if ((Weight::Properties() & (kPath | kSemiring)) != (kPath | kSemiring)) { - FSTERROR() << "NShortestPath: Weight needs to have the " - << "path property and be distributive: " - << Weight::Type(); - ofst->SetProperties(kError, kError); - return; - } - ofst->DeleteStates(); - ofst->SetInputSymbols(ifst.InputSymbols()); - ofst->SetOutputSymbols(ifst.OutputSymbols()); - // Each state in 'ofst' corresponds to a path with weight w from the - // initial state of 'ifst' to a state s in 'ifst', that can be - // characterized by a pair (s,w). The vector 'pairs' maps each - // state in 'ofst' to the corresponding pair maps states in OFST to - // the corresponding pair (s,w). - vector<Pair> pairs; - // The supefinal state is denoted by -1, 'compare' knows that the - // distance from 'superfinal' to the final state is 'Weight::One()', - // hence 'distance[superfinal]' is not needed. - StateId superfinal = -1; - ShortestPathCompare<StateId, Weight> - compare(pairs, distance, superfinal, delta); - vector<StateId> heap; - // 'r[s + 1]', 's' state in 'fst', is the number of states in 'ofst' - // which corresponding pair contains 's' ,i.e. , it is number of - // paths computed so far to 's'. Valid for 's == -1' (superfinal). - vector<int> r; - NaturalLess<Weight> less; - if (ifst.Start() == kNoStateId || - distance.size() <= ifst.Start() || - distance[ifst.Start()] == Weight::Zero() || - less(weight_threshold, Weight::One()) || - state_threshold == 0) { - if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError); - return; - } - ofst->SetStart(ofst->AddState()); - StateId final = ofst->AddState(); - ofst->SetFinal(final, Weight::One()); - while (pairs.size() <= final) - pairs.push_back(Pair(kNoStateId, Weight::Zero())); - pairs[final] = Pair(ifst.Start(), Weight::One()); - heap.push_back(final); - Weight limit = Times(distance[ifst.Start()], weight_threshold); - - while (!heap.empty()) { - pop_heap(heap.begin(), heap.end(), compare); - StateId state = heap.back(); - Pair p = pairs[state]; - heap.pop_back(); - Weight d = p.first == superfinal ? Weight::One() : - p.first < distance.size() ? distance[p.first] : Weight::Zero(); - - if (less(limit, Times(d, p.second)) || - (state_threshold != kNoStateId && - ofst->NumStates() >= state_threshold)) - continue; - - while (r.size() <= p.first + 1) r.push_back(0); - ++r[p.first + 1]; - if (p.first == superfinal) - ofst->AddArc(ofst->Start(), Arc(0, 0, Weight::One(), state)); - if ((p.first == superfinal) && (r[p.first + 1] == n)) break; - if (r[p.first + 1] > n) continue; - if (p.first == superfinal) continue; - - for (ArcIterator< Fst<RevArc> > aiter(ifst, p.first); - !aiter.Done(); - aiter.Next()) { - const RevArc &rarc = aiter.Value(); - Arc arc(rarc.ilabel, rarc.olabel, rarc.weight.Reverse(), rarc.nextstate); - Weight w = Times(p.second, arc.weight); - StateId next = ofst->AddState(); - pairs.push_back(Pair(arc.nextstate, w)); - arc.nextstate = state; - ofst->AddArc(next, arc); - heap.push_back(next); - push_heap(heap.begin(), heap.end(), compare); - } - - Weight finalw = ifst.Final(p.first).Reverse(); - if (finalw != Weight::Zero()) { - Weight w = Times(p.second, finalw); - StateId next = ofst->AddState(); - pairs.push_back(Pair(superfinal, w)); - ofst->AddArc(next, Arc(0, 0, finalw, state)); - heap.push_back(next); - push_heap(heap.begin(), heap.end(), compare); - } - } - Connect(ofst); - if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError); - ofst->SetProperties( - ShortestPathProperties(ofst->Properties(kFstProperties, false)), - kFstProperties); -} - - -// N-Shortest-path algorithm: this version allow fine control -// via the options argument. See below for a simpler interface. -// -// 'ofst' contains the n-shortest paths in 'ifst'. 'distance' returns -// the shortest distances from the source state to each state in -// 'ifst'. 'opts' is used to specify options such as the number of -// paths to return, whether they need to have distinct input -// strings, the queue discipline, the arc filter and the convergence -// delta. -// -// The n-shortest paths are the n-lowest weight paths w.r.t. the -// natural semiring order. The single path that can be read from the -// ith of at most n transitions leaving the initial state of 'ofst' is -// the ith shortest path. Disregarding the initial state and initial -// transitions, The n-shortest paths, in fact, form a tree rooted at -// the single final state. - -// The weights need to be right distributive and have the path (kPath) -// property. They need to be left distributive as well for nshortest -// > 1. -// -// The algorithm is from Mohri and Riley, "An Efficient Algorithm for -// the n-best-strings problem", ICSLP 2002. The algorithm relies on -// the shortest-distance algorithm. There are some issues with the -// pseudo-code as written in the paper (viz., line 11). -template<class Arc, class Queue, class ArcFilter> -void ShortestPath(const Fst<Arc> &ifst, MutableFst<Arc> *ofst, - vector<typename Arc::Weight> *distance, - ShortestPathOptions<Arc, Queue, ArcFilter> &opts) { - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - typedef ReverseArc<Arc> ReverseArc; - - size_t n = opts.nshortest; - if (n == 1) { - SingleShortestPath(ifst, ofst, distance, opts); - return; - } - if (n <= 0) return; - if ((Weight::Properties() & (kPath | kSemiring)) != (kPath | kSemiring)) { - FSTERROR() << "ShortestPath: n-shortest: Weight needs to have the " - << "path property and be distributive: " - << Weight::Type(); - ofst->SetProperties(kError, kError); - return; - } - if (!opts.has_distance) { - ShortestDistance(ifst, distance, opts); - if (distance->size() == 1 && !(*distance)[0].Member()) { - ofst->SetProperties(kError, kError); - return; - } - } - // Algorithm works on the reverse of 'fst' : 'rfst', 'distance' is - // the distance to the final state in 'rfst', 'ofst' is built as the - // reverse of the tree of n-shortest path in 'rfst'. - VectorFst<ReverseArc> rfst; - Reverse(ifst, &rfst); - Weight d = Weight::Zero(); - for (ArcIterator< VectorFst<ReverseArc> > aiter(rfst, 0); - !aiter.Done(); aiter.Next()) { - const ReverseArc &arc = aiter.Value(); - StateId s = arc.nextstate - 1; - if (s < distance->size()) - d = Plus(d, Times(arc.weight.Reverse(), (*distance)[s])); - } - distance->insert(distance->begin(), d); - - if (!opts.unique) { - NShortestPath(rfst, ofst, *distance, n, opts.delta, - opts.weight_threshold, opts.state_threshold); - } else { - vector<Weight> ddistance; - DeterminizeFstOptions<ReverseArc> dopts(opts.delta); - DeterminizeFst<ReverseArc> dfst(rfst, distance, &ddistance, dopts); - NShortestPath(dfst, ofst, ddistance, n, opts.delta, - opts.weight_threshold, opts.state_threshold); - } - distance->erase(distance->begin()); -} - - -// Shortest-path algorithm: simplified interface. See above for a -// version that allows finer control. -// -// 'ofst' contains the 'n'-shortest paths in 'ifst'. The queue -// discipline is automatically selected. When 'unique' == true, only -// paths with distinct input labels are returned. -// -// The n-shortest paths are the n-lowest weight paths w.r.t. the -// natural semiring order. The single path that can be read from the -// ith of at most n transitions leaving the initial state of 'ofst' is -// the ith best path. -// -// The weights need to be right distributive and have the path -// (kPath) property. -template<class Arc> -void ShortestPath(const Fst<Arc> &ifst, MutableFst<Arc> *ofst, - size_t n = 1, bool unique = false, - bool first_path = false, - typename Arc::Weight weight_threshold = Arc::Weight::Zero(), - typename Arc::StateId state_threshold = kNoStateId) { - vector<typename Arc::Weight> distance; - AnyArcFilter<Arc> arc_filter; - AutoQueue<typename Arc::StateId> state_queue(ifst, &distance, arc_filter); - ShortestPathOptions< Arc, AutoQueue<typename Arc::StateId>, - AnyArcFilter<Arc> > opts(&state_queue, arc_filter, n, unique, false, - kDelta, first_path, weight_threshold, - state_threshold); - ShortestPath(ifst, ofst, &distance, opts); -} - -} // namespace fst - -#endif // FST_LIB_SHORTEST_PATH_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/signed-log-weight.h b/kaldi_io/src/tools/openfst/include/fst/signed-log-weight.h deleted file mode 100644 index 61adefb..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/signed-log-weight.h +++ /dev/null @@ -1,367 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Kasturi Rangan Raghavan) -// \file -// LogWeight along with sign information that represents the value X in the -// linear domain as <sign(X), -ln(|X|)> -// The sign is a TropicalWeight: -// positive, TropicalWeight.Value() > 0.0, recommended value 1.0 -// negative, TropicalWeight.Value() <= 0.0, recommended value -1.0 - -#ifndef FST_LIB_SIGNED_LOG_WEIGHT_H_ -#define FST_LIB_SIGNED_LOG_WEIGHT_H_ - -#include <fst/float-weight.h> -#include <fst/pair-weight.h> - - -namespace fst { -template <class T> -class SignedLogWeightTpl - : public PairWeight<TropicalWeight, LogWeightTpl<T> > { - public: - typedef TropicalWeight X1; - typedef LogWeightTpl<T> X2; - using PairWeight<X1, X2>::Value1; - using PairWeight<X1, X2>::Value2; - - using PairWeight<X1, X2>::Reverse; - using PairWeight<X1, X2>::Quantize; - using PairWeight<X1, X2>::Member; - - typedef SignedLogWeightTpl<T> ReverseWeight; - - SignedLogWeightTpl() : PairWeight<X1, X2>() {} - - SignedLogWeightTpl(const SignedLogWeightTpl<T>& w) - : PairWeight<X1, X2> (w) { } - - SignedLogWeightTpl(const PairWeight<X1, X2>& w) - : PairWeight<X1, X2> (w) { } - - SignedLogWeightTpl(const X1& x1, const X2& x2) - : PairWeight<X1, X2>(x1, x2) { } - - static const SignedLogWeightTpl<T> &Zero() { - static const SignedLogWeightTpl<T> zero(X1(1.0), X2::Zero()); - return zero; - } - - static const SignedLogWeightTpl<T> &One() { - static const SignedLogWeightTpl<T> one(X1(1.0), X2::One()); - return one; - } - - static const SignedLogWeightTpl<T> &NoWeight() { - static const SignedLogWeightTpl<T> no_weight(X1(1.0), X2::NoWeight()); - return no_weight; - } - - static const string &Type() { - static const string type = "signed_log_" + X1::Type() + "_" + X2::Type(); - return type; - } - - ProductWeight<X1, X2> Quantize(float delta = kDelta) const { - return PairWeight<X1, X2>::Quantize(); - } - - ReverseWeight Reverse() const { - return PairWeight<X1, X2>::Reverse(); - } - - bool Member() const { - return PairWeight<X1, X2>::Member(); - } - - static uint64 Properties() { - // not idempotent nor path - return kLeftSemiring | kRightSemiring | kCommutative; - } - - size_t Hash() const { - size_t h1; - if (Value2() == X2::Zero() || Value1().Value() > 0.0) - h1 = TropicalWeight(1.0).Hash(); - else - h1 = TropicalWeight(-1.0).Hash(); - size_t h2 = Value2().Hash(); - const int lshift = 5; - const int rshift = CHAR_BIT * sizeof(size_t) - 5; - return h1 << lshift ^ h1 >> rshift ^ h2; - } -}; - -template <class T> -inline SignedLogWeightTpl<T> Plus(const SignedLogWeightTpl<T> &w1, - const SignedLogWeightTpl<T> &w2) { - if (!w1.Member() || !w2.Member()) - return SignedLogWeightTpl<T>::NoWeight(); - bool s1 = w1.Value1().Value() > 0.0; - bool s2 = w2.Value1().Value() > 0.0; - T f1 = w1.Value2().Value(); - T f2 = w2.Value2().Value(); - if (f1 == FloatLimits<T>::PosInfinity()) - return w2; - else if (f2 == FloatLimits<T>::PosInfinity()) - return w1; - else if (f1 == f2) { - if (s1 == s2) - return SignedLogWeightTpl<T>(w1.Value1(), (f2 - log(2.0F))); - else - return SignedLogWeightTpl<T>::Zero(); - } else if (f1 > f2) { - if (s1 == s2) { - return SignedLogWeightTpl<T>( - w1.Value1(), (f2 - log(1.0F + exp(f2 - f1)))); - } else { - return SignedLogWeightTpl<T>( - w2.Value1(), (f2 - log(1.0F - exp(f2 - f1)))); - } - } else { - if (s2 == s1) { - return SignedLogWeightTpl<T>( - w2.Value1(), (f1 - log(1.0F + exp(f1 - f2)))); - } else { - return SignedLogWeightTpl<T>( - w1.Value1(), (f1 - log(1.0F - exp(f1 - f2)))); - } - } -} - -template <class T> -inline SignedLogWeightTpl<T> Minus(const SignedLogWeightTpl<T> &w1, - const SignedLogWeightTpl<T> &w2) { - SignedLogWeightTpl<T> minus_w2(-w2.Value1().Value(), w2.Value2()); - return Plus(w1, minus_w2); -} - -template <class T> -inline SignedLogWeightTpl<T> Times(const SignedLogWeightTpl<T> &w1, - const SignedLogWeightTpl<T> &w2) { - if (!w1.Member() || !w2.Member()) - return SignedLogWeightTpl<T>::NoWeight(); - bool s1 = w1.Value1().Value() > 0.0; - bool s2 = w2.Value1().Value() > 0.0; - T f1 = w1.Value2().Value(); - T f2 = w2.Value2().Value(); - if (s1 == s2) - return SignedLogWeightTpl<T>(TropicalWeight(1.0), (f1 + f2)); - else - return SignedLogWeightTpl<T>(TropicalWeight(-1.0), (f1 + f2)); -} - -template <class T> -inline SignedLogWeightTpl<T> Divide(const SignedLogWeightTpl<T> &w1, - const SignedLogWeightTpl<T> &w2, - DivideType typ = DIVIDE_ANY) { - if (!w1.Member() || !w2.Member()) - return SignedLogWeightTpl<T>::NoWeight(); - bool s1 = w1.Value1().Value() > 0.0; - bool s2 = w2.Value1().Value() > 0.0; - T f1 = w1.Value2().Value(); - T f2 = w2.Value2().Value(); - if (f2 == FloatLimits<T>::PosInfinity()) - return SignedLogWeightTpl<T>(TropicalWeight(1.0), - FloatLimits<T>::NumberBad()); - else if (f1 == FloatLimits<T>::PosInfinity()) - return SignedLogWeightTpl<T>(TropicalWeight(1.0), - FloatLimits<T>::PosInfinity()); - else if (s1 == s2) - return SignedLogWeightTpl<T>(TropicalWeight(1.0), (f1 - f2)); - else - return SignedLogWeightTpl<T>(TropicalWeight(-1.0), (f1 - f2)); -} - -template <class T> -inline bool ApproxEqual(const SignedLogWeightTpl<T> &w1, - const SignedLogWeightTpl<T> &w2, - float delta = kDelta) { - bool s1 = w1.Value1().Value() > 0.0; - bool s2 = w2.Value1().Value() > 0.0; - if (s1 == s2) { - return ApproxEqual(w1.Value2(), w2.Value2(), delta); - } else { - return w1.Value2() == LogWeightTpl<T>::Zero() - && w2.Value2() == LogWeightTpl<T>::Zero(); - } -} - -template <class T> -inline bool operator==(const SignedLogWeightTpl<T> &w1, - const SignedLogWeightTpl<T> &w2) { - bool s1 = w1.Value1().Value() > 0.0; - bool s2 = w2.Value1().Value() > 0.0; - if (s1 == s2) - return w1.Value2() == w2.Value2(); - else - return (w1.Value2() == LogWeightTpl<T>::Zero()) && - (w2.Value2() == LogWeightTpl<T>::Zero()); -} - - -// Single-precision signed-log weight -typedef SignedLogWeightTpl<float> SignedLogWeight; -// Double-precision signed-log weight -typedef SignedLogWeightTpl<double> SignedLog64Weight; - -// -// WEIGHT CONVERTER SPECIALIZATIONS. -// - -template <class W1, class W2> -bool SignedLogConvertCheck(W1 w) { - if (w.Value1().Value() < 0.0) { - FSTERROR() << "WeightConvert: can't convert weight from \"" - << W1::Type() << "\" to \"" << W2::Type(); - return false; - } - return true; -} - -// Convert to tropical -template <> -struct WeightConvert<SignedLogWeight, TropicalWeight> { - TropicalWeight operator()(SignedLogWeight w) const { - if (!SignedLogConvertCheck<SignedLogWeight, TropicalWeight>(w)) - return TropicalWeight::NoWeight(); - return w.Value2().Value(); - } -}; - -template <> -struct WeightConvert<SignedLog64Weight, TropicalWeight> { - TropicalWeight operator()(SignedLog64Weight w) const { - if (!SignedLogConvertCheck<SignedLog64Weight, TropicalWeight>(w)) - return TropicalWeight::NoWeight(); - return w.Value2().Value(); - } -}; - -// Convert to log -template <> -struct WeightConvert<SignedLogWeight, LogWeight> { - LogWeight operator()(SignedLogWeight w) const { - if (!SignedLogConvertCheck<SignedLogWeight, LogWeight>(w)) - return LogWeight::NoWeight(); - return w.Value2().Value(); - } -}; - -template <> -struct WeightConvert<SignedLog64Weight, LogWeight> { - LogWeight operator()(SignedLog64Weight w) const { - if (!SignedLogConvertCheck<SignedLog64Weight, LogWeight>(w)) - return LogWeight::NoWeight(); - return w.Value2().Value(); - } -}; - -// Convert to log64 -template <> -struct WeightConvert<SignedLogWeight, Log64Weight> { - Log64Weight operator()(SignedLogWeight w) const { - if (!SignedLogConvertCheck<SignedLogWeight, Log64Weight>(w)) - return Log64Weight::NoWeight(); - return w.Value2().Value(); - } -}; - -template <> -struct WeightConvert<SignedLog64Weight, Log64Weight> { - Log64Weight operator()(SignedLog64Weight w) const { - if (!SignedLogConvertCheck<SignedLog64Weight, Log64Weight>(w)) - return Log64Weight::NoWeight(); - return w.Value2().Value(); - } -}; - -// Convert to signed log -template <> -struct WeightConvert<TropicalWeight, SignedLogWeight> { - SignedLogWeight operator()(TropicalWeight w) const { - TropicalWeight x1 = 1.0; - LogWeight x2 = w.Value(); - return SignedLogWeight(x1, x2); - } -}; - -template <> -struct WeightConvert<LogWeight, SignedLogWeight> { - SignedLogWeight operator()(LogWeight w) const { - TropicalWeight x1 = 1.0; - LogWeight x2 = w.Value(); - return SignedLogWeight(x1, x2); - } -}; - -template <> -struct WeightConvert<Log64Weight, SignedLogWeight> { - SignedLogWeight operator()(Log64Weight w) const { - TropicalWeight x1 = 1.0; - LogWeight x2 = w.Value(); - return SignedLogWeight(x1, x2); - } -}; - -template <> -struct WeightConvert<SignedLog64Weight, SignedLogWeight> { - SignedLogWeight operator()(SignedLog64Weight w) const { - TropicalWeight x1 = w.Value1(); - LogWeight x2 = w.Value2().Value(); - return SignedLogWeight(x1, x2); - } -}; - -// Convert to signed log64 -template <> -struct WeightConvert<TropicalWeight, SignedLog64Weight> { - SignedLog64Weight operator()(TropicalWeight w) const { - TropicalWeight x1 = 1.0; - Log64Weight x2 = w.Value(); - return SignedLog64Weight(x1, x2); - } -}; - -template <> -struct WeightConvert<LogWeight, SignedLog64Weight> { - SignedLog64Weight operator()(LogWeight w) const { - TropicalWeight x1 = 1.0; - Log64Weight x2 = w.Value(); - return SignedLog64Weight(x1, x2); - } -}; - -template <> -struct WeightConvert<Log64Weight, SignedLog64Weight> { - SignedLog64Weight operator()(Log64Weight w) const { - TropicalWeight x1 = 1.0; - Log64Weight x2 = w.Value(); - return SignedLog64Weight(x1, x2); - } -}; - -template <> -struct WeightConvert<SignedLogWeight, SignedLog64Weight> { - SignedLog64Weight operator()(SignedLogWeight w) const { - TropicalWeight x1 = w.Value1(); - Log64Weight x2 = w.Value2().Value(); - return SignedLog64Weight(x1, x2); - } -}; - -} // namespace fst - -#endif // FST_LIB_SIGNED_LOG_WEIGHT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/slist.h b/kaldi_io/src/tools/openfst/include/fst/slist.h deleted file mode 100644 index b800522..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/slist.h +++ /dev/null @@ -1,61 +0,0 @@ -// slist.h -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Author: [email protected] (Michael Riley) -// -// \file -// Includes slist definition or defines in terms of STL list as a fallback. - -#ifndef FST_LIB_SLIST_H__ -#define FST_LIB_SLIST_H__ - -#include <fst/config.h> - -#ifdef HAVE___GNU_CXX__SLIST_INT_ - -#include <ext/slist> - -namespace fst { - -using __gnu_cxx::slist; - -} - -#else - -#include <list> - -namespace fst { - -using std::list; - -template <typename T> class slist : public list<T> { - public: - typedef typename list<T>::iterator iterator; - typedef typename list<T>::const_iterator const_iterator; - - using list<T>::erase; - - iterator erase_after(iterator pos) { - iterator npos = pos; - erase(++npos); - return pos; - } -}; - -} // namespace fst - -#endif // HAVE___GNU_CXX__SLIST_INT_ - -#endif // FST_LIB_SLIST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/sparse-power-weight.h b/kaldi_io/src/tools/openfst/include/fst/sparse-power-weight.h deleted file mode 100644 index a1ff56a..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/sparse-power-weight.h +++ /dev/null @@ -1,225 +0,0 @@ -// sparse-power-weight.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Kasturi Rangan Raghavan) -// Inspiration: [email protected] (Cyril Allauzen) -// -// \file -// Cartesian power weight semiring operation definitions. -// Uses SparseTupleWeight as underlying representation. - -#ifndef FST_LIB_SPARSE_POWER_WEIGHT_H__ -#define FST_LIB_SPARSE_POWER_WEIGHT_H__ - -#include<string> - -#include <fst/sparse-tuple-weight.h> -#include <fst/weight.h> - - -namespace fst { - -// Below SparseTupleWeight*Mapper are used in conjunction with -// SparseTupleWeightMap to compute the respective semiring operations -template<class W, class K> -struct SparseTupleWeightPlusMapper { - W Map(const K& k, const W& v1, const W& v2) const { - return Plus(v1, v2); - } -}; - -template<class W, class K> -struct SparseTupleWeightTimesMapper { - W Map(const K& k, const W& v1, const W& v2) const { - return Times(v1, v2); - } -}; - -template<class W, class K> -struct SparseTupleWeightDivideMapper { - SparseTupleWeightDivideMapper(DivideType divide_type) { - divide_type_ = divide_type; - } - W Map(const K& k, const W& v1, const W& v2) const { - return Divide(v1, v2, divide_type_); - } - DivideType divide_type_; -}; - -template<class W, class K> -struct SparseTupleWeightApproxMapper { - SparseTupleWeightApproxMapper(float delta) { delta_ = delta; } - W Map(const K& k, const W& v1, const W& v2) const { - return ApproxEqual(v1, v2, delta_) ? W::One() : W::Zero(); - } - float delta_; -}; - -// Sparse cartesian power semiring: W ^ n -// Forms: -// - a left semimodule when W is a left semiring, -// - a right semimodule when W is a right semiring, -// - a bisemimodule when W is a semiring, -// the free semimodule of rank n over W -// The Times operation is overloaded to provide the -// left and right scalar products. -// K is the key value type. kNoKey(-1) is reserved for internal use -template <class W, class K = int> -class SparsePowerWeight : public SparseTupleWeight<W, K> { - public: - using SparseTupleWeight<W, K>::Zero; - using SparseTupleWeight<W, K>::One; - using SparseTupleWeight<W, K>::NoWeight; - using SparseTupleWeight<W, K>::Quantize; - using SparseTupleWeight<W, K>::Reverse; - - typedef SparsePowerWeight<typename W::ReverseWeight, K> ReverseWeight; - - SparsePowerWeight() {} - - SparsePowerWeight(const SparseTupleWeight<W, K> &w) : - SparseTupleWeight<W, K>(w) { } - - template <class Iterator> - SparsePowerWeight(Iterator begin, Iterator end) : - SparseTupleWeight<W, K>(begin, end) { } - - SparsePowerWeight(const K &key, const W &w) : - SparseTupleWeight<W, K>(key, w) { } - - static const SparsePowerWeight<W, K> &Zero() { - static const SparsePowerWeight<W, K> zero(SparseTupleWeight<W, K>::Zero()); - return zero; - } - - static const SparsePowerWeight<W, K> &One() { - static const SparsePowerWeight<W, K> one(SparseTupleWeight<W, K>::One()); - return one; - } - - static const SparsePowerWeight<W, K> &NoWeight() { - static const SparsePowerWeight<W, K> no_weight( - SparseTupleWeight<W, K>::NoWeight()); - return no_weight; - } - - // Overide this: Overwrite the Type method to reflect the key type - // if using non-default key type. - static const string &Type() { - static string type; - if(type.empty()) { - type = W::Type() + "_^n"; - if(sizeof(K) != sizeof(uint32)) { - string size; - Int64ToStr(8 * sizeof(K), &size); - type += "_" + size; - } - } - return type; - } - - static uint64 Properties() { - uint64 props = W::Properties(); - return props & (kLeftSemiring | kRightSemiring | - kCommutative | kIdempotent); - } - - SparsePowerWeight<W, K> Quantize(float delta = kDelta) const { - return SparseTupleWeight<W, K>::Quantize(delta); - } - - ReverseWeight Reverse() const { - return SparseTupleWeight<W, K>::Reverse(); - } -}; - -// Semimodule plus operation -template <class W, class K> -inline SparsePowerWeight<W, K> Plus(const SparsePowerWeight<W, K> &w1, - const SparsePowerWeight<W, K> &w2) { - SparsePowerWeight<W, K> ret; - SparseTupleWeightPlusMapper<W, K> operator_mapper; - SparseTupleWeightMap(&ret, w1, w2, operator_mapper); - return ret; -} - -// Semimodule times operation -template <class W, class K> -inline SparsePowerWeight<W, K> Times(const SparsePowerWeight<W, K> &w1, - const SparsePowerWeight<W, K> &w2) { - SparsePowerWeight<W, K> ret; - SparseTupleWeightTimesMapper<W, K> operator_mapper; - SparseTupleWeightMap(&ret, w1, w2, operator_mapper); - return ret; -} - -// Semimodule divide operation -template <class W, class K> -inline SparsePowerWeight<W, K> Divide(const SparsePowerWeight<W, K> &w1, - const SparsePowerWeight<W, K> &w2, - DivideType type = DIVIDE_ANY) { - SparsePowerWeight<W, K> ret; - SparseTupleWeightDivideMapper<W, K> operator_mapper(type); - SparseTupleWeightMap(&ret, w1, w2, operator_mapper); - return ret; -} - -// Semimodule dot product -template <class W, class K> -inline const W& DotProduct(const SparsePowerWeight<W, K> &w1, - const SparsePowerWeight<W, K> &w2) { - const SparsePowerWeight<W, K>& product = Times(w1, w2); - W ret(W::Zero()); - for (SparseTupleWeightIterator<W, K> it(product); !it.Done(); it.Next()) { - ret = Plus(ret, it.Value().second); - } - return ret; -} - -template <class W, class K> -inline bool ApproxEqual(const SparsePowerWeight<W, K> &w1, - const SparsePowerWeight<W, K> &w2, - float delta = kDelta) { - SparseTupleWeight<W, K> ret; - SparseTupleWeightApproxMapper<W, K> operator_mapper(kDelta); - SparseTupleWeightMap(&ret, w1, w2, operator_mapper); - return ret == SparsePowerWeight<W, K>::One(); -} - -template <class W, class K> -inline SparsePowerWeight<W, K> Times(const W &k, - const SparsePowerWeight<W, K> &w2) { - SparsePowerWeight<W, K> w1(k); - return Times(w1, w2); -} - -template <class W, class K> -inline SparsePowerWeight<W, K> Times(const SparsePowerWeight<W, K> &w1, - const W &k) { - SparsePowerWeight<W, K> w2(k); - return Times(w1, w2); -} - -template <class W, class K> -inline SparsePowerWeight<W, K> Divide(const SparsePowerWeight<W, K> &w1, - const W &k, - DivideType divide_type = DIVIDE_ANY) { - SparsePowerWeight<W, K> w2(k); - return Divide(w1, w2, divide_type); -} - -} // namespace fst - -#endif // FST_LIB_SPARSE_POWER_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/sparse-tuple-weight.h b/kaldi_io/src/tools/openfst/include/fst/sparse-tuple-weight.h deleted file mode 100644 index c12ef4f..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/sparse-tuple-weight.h +++ /dev/null @@ -1,640 +0,0 @@ -// sparse-tuple-weight.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Kasturi Rangan Raghavan) -// Inspiration: [email protected] (Cyril Allauzen) -// \file -// Sparse version of tuple-weight, based on tuple-weight.h -// Internally stores sparse key, value pairs in linked list -// Default value elemnt is the assumed value of unset keys -// Internal singleton implementation that stores first key, -// value pair as a initialized member variable to avoide -// unnecessary allocation on heap. -// Use SparseTupleWeightIterator to iterate through the key,value pairs -// Note: this does NOT iterate through the default value. -// -// Sparse tuple weight set operation definitions. - -#ifndef FST_LIB_SPARSE_TUPLE_WEIGHT_H__ -#define FST_LIB_SPARSE_TUPLE_WEIGHT_H__ - -#include<string> -#include<list> -#include<stack> -#include<tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; - -#include <fst/weight.h> - - -DECLARE_string(fst_weight_parentheses); -DECLARE_string(fst_weight_separator); - -namespace fst { - -template <class W, class K> class SparseTupleWeight; - -template<class W, class K> -class SparseTupleWeightIterator; - -template <class W, class K> -istream &operator>>(istream &strm, SparseTupleWeight<W, K> &w); - -// Arbitrary dimension tuple weight, stored as a sorted linked-list -// W is any weight class, -// K is the key value type. kNoKey(-1) is reserved for internal use -template <class W, class K = int> -class SparseTupleWeight { - public: - typedef pair<K, W> Pair; - typedef SparseTupleWeight<typename W::ReverseWeight, K> ReverseWeight; - - const static K kNoKey = -1; - SparseTupleWeight() { - Init(); - } - - template <class Iterator> - SparseTupleWeight(Iterator begin, Iterator end) { - Init(); - // Assumes input iterator is sorted - for (Iterator it = begin; it != end; ++it) - Push(*it); - } - - - SparseTupleWeight(const K& key, const W &w) { - Init(); - Push(key, w); - } - - SparseTupleWeight(const W &w) { - Init(w); - } - - SparseTupleWeight(const SparseTupleWeight<W, K> &w) { - Init(w.DefaultValue()); - SetDefaultValue(w.DefaultValue()); - for (SparseTupleWeightIterator<W, K> it(w); !it.Done(); it.Next()) { - Push(it.Value()); - } - } - - static const SparseTupleWeight<W, K> &Zero() { - static SparseTupleWeight<W, K> zero; - return zero; - } - - static const SparseTupleWeight<W, K> &One() { - static SparseTupleWeight<W, K> one(W::One()); - return one; - } - - static const SparseTupleWeight<W, K> &NoWeight() { - static SparseTupleWeight<W, K> no_weight(W::NoWeight()); - return no_weight; - } - - istream &Read(istream &strm) { - ReadType(strm, &default_); - ReadType(strm, &first_); - return ReadType(strm, &rest_); - } - - ostream &Write(ostream &strm) const { - WriteType(strm, default_); - WriteType(strm, first_); - return WriteType(strm, rest_); - } - - SparseTupleWeight<W, K> &operator=(const SparseTupleWeight<W, K> &w) { - if (this == &w) return *this; // check for w = w - Init(w.DefaultValue()); - for (SparseTupleWeightIterator<W, K> it(w); !it.Done(); it.Next()) { - Push(it.Value()); - } - return *this; - } - - bool Member() const { - if (!DefaultValue().Member()) return false; - for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) { - if (!it.Value().second.Member()) return false; - } - return true; - } - - // Assumes H() function exists for the hash of the key value - size_t Hash() const { - uint64 h = 0; - std::tr1::hash<K> H; - for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) { - h = 5 * h + H(it.Value().first); - h = 13 * h + it.Value().second.Hash(); - } - return size_t(h); - } - - SparseTupleWeight<W, K> Quantize(float delta = kDelta) const { - SparseTupleWeight<W, K> w; - for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) { - w.Push(it.Value().first, it.Value().second.Quantize(delta)); - } - return w; - } - - ReverseWeight Reverse() const { - SparseTupleWeight<W, K> w; - for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) { - w.Push(it.Value().first, it.Value().second.Reverse()); - } - return w; - } - - // Common initializer among constructors. - void Init() { - Init(W::Zero()); - } - - void Init(const W& default_value) { - first_.first = kNoKey; - /* initialized to the reserved key value */ - default_ = default_value; - rest_.clear(); - } - - size_t Size() const { - if (first_.first == kNoKey) - return 0; - else - return rest_.size() + 1; - } - - inline void Push(const K &k, const W &w, bool default_value_check = true) { - Push(make_pair(k, w), default_value_check); - } - - inline void Push(const Pair &p, bool default_value_check = true) { - if (default_value_check && p.second == default_) return; - if (first_.first == kNoKey) { - first_ = p; - } else { - rest_.push_back(p); - } - } - - void SetDefaultValue(const W& val) { default_ = val; } - - const W& DefaultValue() const { return default_; } - - protected: - static istream& ReadNoParen( - istream&, SparseTupleWeight<W, K>&, char separator); - - static istream& ReadWithParen( - istream&, SparseTupleWeight<W, K>&, - char separator, char open_paren, char close_paren); - - private: - // Assumed default value of uninitialized keys, by default W::Zero() - W default_; - - // Key values pairs are first stored in first_, then fill rest_ - // this way we can avoid dynamic allocation in the common case - // where the weight is a single key,val pair. - Pair first_; - list<Pair> rest_; - - friend istream &operator>><W, K>(istream&, SparseTupleWeight<W, K>&); - friend class SparseTupleWeightIterator<W, K>; -}; - -template<class W, class K> -class SparseTupleWeightIterator { - public: - typedef typename SparseTupleWeight<W, K>::Pair Pair; - typedef typename list<Pair>::const_iterator const_iterator; - typedef typename list<Pair>::iterator iterator; - - explicit SparseTupleWeightIterator(const SparseTupleWeight<W, K>& w) - : first_(w.first_), rest_(w.rest_), init_(true), - iter_(rest_.begin()) {} - - bool Done() const { - if (init_) - return first_.first == SparseTupleWeight<W, K>::kNoKey; - else - return iter_ == rest_.end(); - } - - const Pair& Value() const { return init_ ? first_ : *iter_; } - - void Next() { - if (init_) - init_ = false; - else - ++iter_; - } - - void Reset() { - init_ = true; - iter_ = rest_.begin(); - } - - private: - const Pair &first_; - const list<Pair> & rest_; - bool init_; // in the initialized state? - typename list<Pair>::const_iterator iter_; - - DISALLOW_COPY_AND_ASSIGN(SparseTupleWeightIterator); -}; - -template<class W, class K, class M> -inline void SparseTupleWeightMap( - SparseTupleWeight<W, K>* ret, - const SparseTupleWeight<W, K>& w1, - const SparseTupleWeight<W, K>& w2, - const M& operator_mapper) { - SparseTupleWeightIterator<W, K> w1_it(w1); - SparseTupleWeightIterator<W, K> w2_it(w2); - const W& v1_def = w1.DefaultValue(); - const W& v2_def = w2.DefaultValue(); - ret->SetDefaultValue(operator_mapper.Map(0, v1_def, v2_def)); - while (!w1_it.Done() || !w2_it.Done()) { - const K& k1 = (w1_it.Done()) ? w2_it.Value().first : w1_it.Value().first; - const K& k2 = (w2_it.Done()) ? w1_it.Value().first : w2_it.Value().first; - const W& v1 = (w1_it.Done()) ? v1_def : w1_it.Value().second; - const W& v2 = (w2_it.Done()) ? v2_def : w2_it.Value().second; - if (k1 == k2) { - ret->Push(k1, operator_mapper.Map(k1, v1, v2)); - if (!w1_it.Done()) w1_it.Next(); - if (!w2_it.Done()) w2_it.Next(); - } else if (k1 < k2) { - ret->Push(k1, operator_mapper.Map(k1, v1, v2_def)); - w1_it.Next(); - } else { - ret->Push(k2, operator_mapper.Map(k2, v1_def, v2)); - w2_it.Next(); - } - } -} - -template <class W, class K> -inline bool operator==(const SparseTupleWeight<W, K> &w1, - const SparseTupleWeight<W, K> &w2) { - const W& v1_def = w1.DefaultValue(); - const W& v2_def = w2.DefaultValue(); - if (v1_def != v2_def) return false; - - SparseTupleWeightIterator<W, K> w1_it(w1); - SparseTupleWeightIterator<W, K> w2_it(w2); - while (!w1_it.Done() || !w2_it.Done()) { - const K& k1 = (w1_it.Done()) ? w2_it.Value().first : w1_it.Value().first; - const K& k2 = (w2_it.Done()) ? w1_it.Value().first : w2_it.Value().first; - const W& v1 = (w1_it.Done()) ? v1_def : w1_it.Value().second; - const W& v2 = (w2_it.Done()) ? v2_def : w2_it.Value().second; - if (k1 == k2) { - if (v1 != v2) return false; - if (!w1_it.Done()) w1_it.Next(); - if (!w2_it.Done()) w2_it.Next(); - } else if (k1 < k2) { - if (v1 != v2_def) return false; - w1_it.Next(); - } else { - if (v1_def != v2) return false; - w2_it.Next(); - } - } - return true; -} - -template <class W, class K> -inline bool operator!=(const SparseTupleWeight<W, K> &w1, - const SparseTupleWeight<W, K> &w2) { - return !(w1 == w2); -} - -template <class W, class K> -inline ostream &operator<<(ostream &strm, const SparseTupleWeight<W, K> &w) { - if(FLAGS_fst_weight_separator.size() != 1) { - FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1"; - strm.clear(std::ios::badbit); - return strm; - } - char separator = FLAGS_fst_weight_separator[0]; - bool write_parens = false; - if (!FLAGS_fst_weight_parentheses.empty()) { - if (FLAGS_fst_weight_parentheses.size() != 2) { - FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2"; - strm.clear(std::ios::badbit); - return strm; - } - write_parens = true; - } - - if (write_parens) - strm << FLAGS_fst_weight_parentheses[0]; - - strm << w.DefaultValue(); - strm << separator; - - size_t n = w.Size(); - strm << n; - strm << separator; - - for (SparseTupleWeightIterator<W, K> it(w); !it.Done(); it.Next()) { - strm << it.Value().first; - strm << separator; - strm << it.Value().second; - strm << separator; - } - - if (write_parens) - strm << FLAGS_fst_weight_parentheses[1]; - - return strm; -} - -template <class W, class K> -inline istream &operator>>(istream &strm, SparseTupleWeight<W, K> &w) { - if(FLAGS_fst_weight_separator.size() != 1) { - FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1"; - strm.clear(std::ios::badbit); - return strm; - } - char separator = FLAGS_fst_weight_separator[0]; - - if (!FLAGS_fst_weight_parentheses.empty()) { - if (FLAGS_fst_weight_parentheses.size() != 2) { - FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2"; - strm.clear(std::ios::badbit); - return strm; - } - return SparseTupleWeight<W, K>::ReadWithParen( - strm, w, separator, FLAGS_fst_weight_parentheses[0], - FLAGS_fst_weight_parentheses[1]); - } else { - return SparseTupleWeight<W, K>::ReadNoParen(strm, w, separator); - } -} - -// Reads SparseTupleWeight when there are no parentheses around tuple terms -template <class W, class K> -inline istream& SparseTupleWeight<W, K>::ReadNoParen( - istream &strm, - SparseTupleWeight<W, K> &w, - char separator) { - int c; - size_t n; - - do { - c = strm.get(); - } while (isspace(c)); - - - { // Read default weight - W default_value; - string s; - while (c != separator) { - if (c == EOF) { - strm.clear(std::ios::badbit); - return strm; - } - s += c; - c = strm.get(); - } - istringstream sstrm(s); - sstrm >> default_value; - w.SetDefaultValue(default_value); - } - - c = strm.get(); - - { // Read n - string s; - while (c != separator) { - if (c == EOF) { - strm.clear(std::ios::badbit); - return strm; - } - s += c; - c = strm.get(); - } - istringstream sstrm(s); - sstrm >> n; - } - - // Read n elements - for (size_t i = 0; i < n; ++i) { - // discard separator - c = strm.get(); - K p; - W r; - - { // read key - string s; - while (c != separator) { - if (c == EOF) { - strm.clear(std::ios::badbit); - return strm; - } - s += c; - c = strm.get(); - } - istringstream sstrm(s); - sstrm >> p; - } - - c = strm.get(); - - { // read weight - string s; - while (c != separator) { - if (c == EOF) { - strm.clear(std::ios::badbit); - return strm; - } - s += c; - c = strm.get(); - } - istringstream sstrm(s); - sstrm >> r; - } - - w.Push(p, r); - } - - c = strm.get(); - if (c != separator) { - strm.clear(std::ios::badbit); - } - - return strm; -} - -// Reads SparseTupleWeight when there are parentheses around tuple terms -template <class W, class K> -inline istream& SparseTupleWeight<W, K>::ReadWithParen( - istream &strm, - SparseTupleWeight<W, K> &w, - char separator, - char open_paren, - char close_paren) { - int c; - size_t n; - - do { - c = strm.get(); - } while (isspace(c)); - - if (c != open_paren) { - FSTERROR() << "is fst_weight_parentheses flag set correcty? "; - strm.clear(std::ios::badbit); - return strm; - } - - c = strm.get(); - - { // Read weight - W default_value; - stack<int> parens; - string s; - while (c != separator || !parens.empty()) { - if (c == EOF) { - strm.clear(std::ios::badbit); - return strm; - } - s += c; - // If parens encountered before separator, they must be matched - if (c == open_paren) { - parens.push(1); - } else if (c == close_paren) { - // Fail for mismatched parens - if (parens.empty()) { - strm.clear(std::ios::failbit); - return strm; - } - parens.pop(); - } - c = strm.get(); - } - istringstream sstrm(s); - sstrm >> default_value; - w.SetDefaultValue(default_value); - } - - c = strm.get(); - - { // Read n - string s; - while (c != separator) { - if (c == EOF) { - strm.clear(std::ios::badbit); - return strm; - } - s += c; - c = strm.get(); - } - istringstream sstrm(s); - sstrm >> n; - } - - // Read n elements - for (size_t i = 0; i < n; ++i) { - // discard separator - c = strm.get(); - K p; - W r; - - { // Read key - stack<int> parens; - string s; - while (c != separator || !parens.empty()) { - if (c == EOF) { - strm.clear(std::ios::badbit); - return strm; - } - s += c; - // If parens encountered before separator, they must be matched - if (c == open_paren) { - parens.push(1); - } else if (c == close_paren) { - // Fail for mismatched parens - if (parens.empty()) { - strm.clear(std::ios::failbit); - return strm; - } - parens.pop(); - } - c = strm.get(); - } - istringstream sstrm(s); - sstrm >> p; - } - - c = strm.get(); - - { // Read weight - stack<int> parens; - string s; - while (c != separator || !parens.empty()) { - if (c == EOF) { - strm.clear(std::ios::badbit); - return strm; - } - s += c; - // If parens encountered before separator, they must be matched - if (c == open_paren) { - parens.push(1); - } else if (c == close_paren) { - // Fail for mismatched parens - if (parens.empty()) { - strm.clear(std::ios::failbit); - return strm; - } - parens.pop(); - } - c = strm.get(); - } - istringstream sstrm(s); - sstrm >> r; - } - - w.Push(p, r); - } - - if (c != separator) { - FSTERROR() << " separator expected, not found! "; - strm.clear(std::ios::badbit); - return strm; - } - - c = strm.get(); - if (c != close_paren) { - FSTERROR() << " is fst_weight_parentheses flag set correcty? "; - strm.clear(std::ios::badbit); - return strm; - } - - return strm; -} - - - -} // namespace fst - -#endif // FST_LIB_SPARSE_TUPLE_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/state-map.h b/kaldi_io/src/tools/openfst/include/fst/state-map.h deleted file mode 100644 index 9d6db74..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/state-map.h +++ /dev/null @@ -1,605 +0,0 @@ -// map.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Class to map over/transform states e.g., sort transitions -// Consider using when operation does not change the number of states. - -#ifndef FST_LIB_STATE_MAP_H__ -#define FST_LIB_STATE_MAP_H__ - -#include <algorithm> -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <string> -#include <utility> -using std::pair; using std::make_pair; - -#include <fst/cache.h> -#include <fst/arc-map.h> -#include <fst/mutable-fst.h> - - -namespace fst { - -// StateMapper Interface - class determinies how states are mapped. -// Useful for implementing operations that do not change the number of states. -// -// class StateMapper { -// public: -// typedef A FromArc; -// typedef B ToArc; -// -// // Typical constructor -// StateMapper(const Fst<A> &fst); -// // Required copy constructor that allows updating Fst argument; -// // pass only if relevant and changed. -// StateMapper(const StateMapper &mapper, const Fst<A> *fst = 0); -// -// // Specifies initial state of result -// B::StateId Start() const; -// // Specifies state's final weight in result -// B::Weight Final(B::StateId s) const; -// -// // These methods iterate through a state's arcs in result -// // Specifies state to iterate over -// void SetState(B::StateId s); -// // End of arcs? -// bool Done() const; -// // Current arc - -// const B &Value() const; -// // Advance to next arc (when !Done) -// void Next(); -// -// // Specifies input symbol table action the mapper requires (see above). -// MapSymbolsAction InputSymbolsAction() const; -// // Specifies output symbol table action the mapper requires (see above). -// MapSymbolsAction OutputSymbolsAction() const; -// // This specifies the known properties of an Fst mapped by this -// // mapper. It takes as argument the input Fst's known properties. -// uint64 Properties(uint64 props) const; -// }; -// -// We include a various state map versions below. One dimension of -// variation is whether the mapping mutates its input, writes to a -// new result Fst, or is an on-the-fly Fst. Another dimension is how -// we pass the mapper. We allow passing the mapper by pointer -// for cases that we need to change the state of the user's mapper. -// We also include map versions that pass the mapper -// by value or const reference when this suffices. - -// Maps an arc type A using a mapper function object C, passed -// by pointer. This version modifies its Fst input. -template<class A, class C> -void StateMap(MutableFst<A> *fst, C* mapper) { - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) - fst->SetInputSymbols(0); - - if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) - fst->SetOutputSymbols(0); - - if (fst->Start() == kNoStateId) - return; - - uint64 props = fst->Properties(kFstProperties, false); - - fst->SetStart(mapper->Start()); - - for (StateId s = 0; s < fst->NumStates(); ++s) { - mapper->SetState(s); - fst->DeleteArcs(s); - for (; !mapper->Done(); mapper->Next()) - fst->AddArc(s, mapper->Value()); - fst->SetFinal(s, mapper->Final(s)); - } - - fst->SetProperties(mapper->Properties(props), kFstProperties); -} - -// Maps an arc type A using a mapper function object C, passed -// by value. This version modifies its Fst input. -template<class A, class C> -void StateMap(MutableFst<A> *fst, C mapper) { - StateMap(fst, &mapper); -} - - -// Maps an arc type A to an arc type B using mapper function -// object C, passed by pointer. This version writes the mapped -// input Fst to an output MutableFst. -template<class A, class B, class C> -void StateMap(const Fst<A> &ifst, MutableFst<B> *ofst, C* mapper) { - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - ofst->DeleteStates(); - - if (mapper->InputSymbolsAction() == MAP_COPY_SYMBOLS) - ofst->SetInputSymbols(ifst.InputSymbols()); - else if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) - ofst->SetInputSymbols(0); - - if (mapper->OutputSymbolsAction() == MAP_COPY_SYMBOLS) - ofst->SetOutputSymbols(ifst.OutputSymbols()); - else if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) - ofst->SetOutputSymbols(0); - - uint64 iprops = ifst.Properties(kCopyProperties, false); - - if (ifst.Start() == kNoStateId) { - if (iprops & kError) ofst->SetProperties(kError, kError); - return; - } - - // Add all states. - if (ifst.Properties(kExpanded, false)) - ofst->ReserveStates(CountStates(ifst)); - for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next()) - ofst->AddState(); - - ofst->SetStart(mapper->Start()); - - for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next()) { - StateId s = siter.Value(); - mapper->SetState(s); - for (; !mapper->Done(); mapper->Next()) - ofst->AddArc(s, mapper->Value()); - ofst->SetFinal(s, mapper->Final(s)); - } - - uint64 oprops = ofst->Properties(kFstProperties, false); - ofst->SetProperties(mapper->Properties(iprops) | oprops, kFstProperties); -} - -// Maps an arc type A to an arc type B using mapper function -// object C, passed by value. This version writes the mapped input -// Fst to an output MutableFst. -template<class A, class B, class C> -void StateMap(const Fst<A> &ifst, MutableFst<B> *ofst, C mapper) { - StateMap(ifst, ofst, &mapper); -} - -typedef CacheOptions StateMapFstOptions; - -template <class A, class B, class C> class StateMapFst; - -// Implementation of delayed StateMapFst. -template <class A, class B, class C> -class StateMapFstImpl : public CacheImpl<B> { - public: - using FstImpl<B>::SetType; - using FstImpl<B>::SetProperties; - using FstImpl<B>::SetInputSymbols; - using FstImpl<B>::SetOutputSymbols; - - using VectorFstBaseImpl<typename CacheImpl<B>::State>::NumStates; - - using CacheImpl<B>::PushArc; - using CacheImpl<B>::HasArcs; - using CacheImpl<B>::HasFinal; - using CacheImpl<B>::HasStart; - using CacheImpl<B>::SetArcs; - using CacheImpl<B>::SetFinal; - using CacheImpl<B>::SetStart; - - friend class StateIterator< StateMapFst<A, B, C> >; - - typedef B Arc; - typedef typename B::Weight Weight; - typedef typename B::StateId StateId; - - StateMapFstImpl(const Fst<A> &fst, const C &mapper, - const StateMapFstOptions& opts) - : CacheImpl<B>(opts), - fst_(fst.Copy()), - mapper_(new C(mapper, fst_)), - own_mapper_(true) { - Init(); - } - - StateMapFstImpl(const Fst<A> &fst, C *mapper, - const StateMapFstOptions& opts) - : CacheImpl<B>(opts), - fst_(fst.Copy()), - mapper_(mapper), - own_mapper_(false) { - Init(); - } - - StateMapFstImpl(const StateMapFstImpl<A, B, C> &impl) - : CacheImpl<B>(impl), - fst_(impl.fst_->Copy(true)), - mapper_(new C(*impl.mapper_, fst_)), - own_mapper_(true) { - Init(); - } - - ~StateMapFstImpl() { - delete fst_; - if (own_mapper_) delete mapper_; - } - - StateId Start() { - if (!HasStart()) - SetStart(mapper_->Start()); - return CacheImpl<B>::Start(); - } - - Weight Final(StateId s) { - if (!HasFinal(s)) - SetFinal(s, mapper_->Final(s)); - return CacheImpl<B>::Final(s); - } - - size_t NumArcs(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<B>::NumArcs(s); - } - - size_t NumInputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<B>::NumInputEpsilons(s); - } - - size_t NumOutputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<B>::NumOutputEpsilons(s); - } - - void InitStateIterator(StateIteratorData<A> *data) const { - fst_->InitStateIterator(data); - } - - void InitArcIterator(StateId s, ArcIteratorData<B> *data) { - if (!HasArcs(s)) - Expand(s); - CacheImpl<B>::InitArcIterator(s, data); - } - - uint64 Properties() const { return Properties(kFstProperties); } - - // Set error if found; return FST impl properties. - uint64 Properties(uint64 mask) const { - if ((mask & kError) && (fst_->Properties(kError, false) || - (mapper_->Properties(0) & kError))) - SetProperties(kError, kError); - return FstImpl<Arc>::Properties(mask); - } - - void Expand(StateId s) { - // Add exiting arcs. - for (mapper_->SetState(s); !mapper_->Done(); mapper_->Next()) - PushArc(s, mapper_->Value()); - SetArcs(s); - } - - const Fst<A> &GetFst() const { - return *fst_; - } - - private: - void Init() { - SetType("statemap"); - - if (mapper_->InputSymbolsAction() == MAP_COPY_SYMBOLS) - SetInputSymbols(fst_->InputSymbols()); - else if (mapper_->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) - SetInputSymbols(0); - - if (mapper_->OutputSymbolsAction() == MAP_COPY_SYMBOLS) - SetOutputSymbols(fst_->OutputSymbols()); - else if (mapper_->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) - SetOutputSymbols(0); - - uint64 props = fst_->Properties(kCopyProperties, false); - SetProperties(mapper_->Properties(props)); - } - - const Fst<A> *fst_; - C* mapper_; - bool own_mapper_; - - void operator=(const StateMapFstImpl<A, B, C> &); // disallow -}; - - -// Maps an arc type A to an arc type B using Mapper function object -// C. This version is a delayed Fst. -template <class A, class B, class C> -class StateMapFst : public ImplToFst< StateMapFstImpl<A, B, C> > { - public: - friend class ArcIterator< StateMapFst<A, B, C> >; - - typedef B Arc; - typedef typename B::Weight Weight; - typedef typename B::StateId StateId; - typedef CacheState<B> State; - typedef StateMapFstImpl<A, B, C> Impl; - - StateMapFst(const Fst<A> &fst, const C &mapper, - const StateMapFstOptions& opts) - : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {} - - StateMapFst(const Fst<A> &fst, C* mapper, const StateMapFstOptions& opts) - : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {} - - StateMapFst(const Fst<A> &fst, const C &mapper) - : ImplToFst<Impl>(new Impl(fst, mapper, StateMapFstOptions())) {} - - StateMapFst(const Fst<A> &fst, C* mapper) - : ImplToFst<Impl>(new Impl(fst, mapper, StateMapFstOptions())) {} - - // See Fst<>::Copy() for doc. - StateMapFst(const StateMapFst<A, B, C> &fst, bool safe = false) - : ImplToFst<Impl>(fst, safe) {} - - // Get a copy of this StateMapFst. See Fst<>::Copy() for further doc. - virtual StateMapFst<A, B, C> *Copy(bool safe = false) const { - return new StateMapFst<A, B, C>(*this, safe); - } - - virtual void InitStateIterator(StateIteratorData<A> *data) const { - GetImpl()->InitStateIterator(data); - } - - virtual void InitArcIterator(StateId s, ArcIteratorData<B> *data) const { - GetImpl()->InitArcIterator(s, data); - } - - protected: - Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } - - private: - void operator=(const StateMapFst<A, B, C> &fst); // disallow -}; - - -// Specialization for StateMapFst. -template <class A, class B, class C> -class ArcIterator< StateMapFst<A, B, C> > - : public CacheArcIterator< StateMapFst<A, B, C> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const StateMapFst<A, B, C> &fst, StateId s) - : CacheArcIterator< StateMapFst<A, B, C> >(fst.GetImpl(), s) { - if (!fst.GetImpl()->HasArcs(s)) - fst.GetImpl()->Expand(s); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - -// -// Utility Mappers -// - -// Mapper that returns its input. -template <class A> -class IdentityStateMapper { - public: - typedef A FromArc; - typedef A ToArc; - - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - explicit IdentityStateMapper(const Fst<A> &fst) : fst_(fst), aiter_(0) {} - - // Allows updating Fst argument; pass only if changed. - IdentityStateMapper(const IdentityStateMapper<A> &mapper, - const Fst<A> *fst = 0) - : fst_(fst ? *fst : mapper.fst_), aiter_(0) {} - - ~IdentityStateMapper() { delete aiter_; } - - StateId Start() const { return fst_.Start(); } - - Weight Final(StateId s) const { return fst_.Final(s); } - - void SetState(StateId s) { - if (aiter_) delete aiter_; - aiter_ = new ArcIterator< Fst<A> >(fst_, s); - } - - bool Done() const { return aiter_->Done(); } - const A &Value() const { return aiter_->Value(); } - void Next() { aiter_->Next(); } - - MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} - - uint64 Properties(uint64 props) const { return props; } - - private: - const Fst<A> &fst_; - ArcIterator< Fst<A> > *aiter_; -}; - -template <class A> -class ArcSumMapper { - public: - typedef A FromArc; - typedef A ToArc; - - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - explicit ArcSumMapper(const Fst<A> &fst) : fst_(fst), i_(0) {} - - // Allows updating Fst argument; pass only if changed. - ArcSumMapper(const ArcSumMapper<A> &mapper, - const Fst<A> *fst = 0) - : fst_(fst ? *fst : mapper.fst_), i_(0) {} - - StateId Start() const { return fst_.Start(); } - Weight Final(StateId s) const { return fst_.Final(s); } - - void SetState(StateId s) { - i_ = 0; - arcs_.clear(); - arcs_.reserve(fst_.NumArcs(s)); - for (ArcIterator<Fst<A> > aiter(fst_, s); !aiter.Done(); aiter.Next()) - arcs_.push_back(aiter.Value()); - - // First sorts the exiting arcs by input label, output label - // and destination state and then sums weights of arcs with - // the same input label, output label, and destination state. - sort(arcs_.begin(), arcs_.end(), comp_); - size_t narcs = 0; - for (size_t i = 0; i < arcs_.size(); ++i) { - if (narcs > 0 && equal_(arcs_[i], arcs_[narcs - 1])) { - arcs_[narcs - 1].weight = Plus(arcs_[narcs - 1].weight, - arcs_[i].weight); - } else { - arcs_[narcs++] = arcs_[i]; - } - } - arcs_.resize(narcs); - } - - bool Done() const { return i_ >= arcs_.size(); } - const A &Value() const { return arcs_[i_]; } - void Next() { ++i_; } - - MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - - uint64 Properties(uint64 props) const { - return props & kArcSortProperties & - kDeleteArcsProperties & kWeightInvariantProperties; - } - - private: - struct Compare { - bool operator()(const A& x, const A& y) { - if (x.ilabel < y.ilabel) return true; - if (x.ilabel > y.ilabel) return false; - if (x.olabel < y.olabel) return true; - if (x.olabel > y.olabel) return false; - if (x.nextstate < y.nextstate) return true; - if (x.nextstate > y.nextstate) return false; - return false; - } - }; - - struct Equal { - bool operator()(const A& x, const A& y) { - return (x.ilabel == y.ilabel && - x.olabel == y.olabel && - x.nextstate == y.nextstate); - } - }; - - const Fst<A> &fst_; - Compare comp_; - Equal equal_; - vector<A> arcs_; - ssize_t i_; // current arc position - - void operator=(const ArcSumMapper<A> &); // disallow -}; - -template <class A> -class ArcUniqueMapper { - public: - typedef A FromArc; - typedef A ToArc; - - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - explicit ArcUniqueMapper(const Fst<A> &fst) : fst_(fst), i_(0) {} - - // Allows updating Fst argument; pass only if changed. - ArcUniqueMapper(const ArcUniqueMapper<A> &mapper, - const Fst<A> *fst = 0) - : fst_(fst ? *fst : mapper.fst_), i_(0) {} - - StateId Start() const { return fst_.Start(); } - Weight Final(StateId s) const { return fst_.Final(s); } - - void SetState(StateId s) { - i_ = 0; - arcs_.clear(); - arcs_.reserve(fst_.NumArcs(s)); - for (ArcIterator<Fst<A> > aiter(fst_, s); !aiter.Done(); aiter.Next()) - arcs_.push_back(aiter.Value()); - - // First sorts the exiting arcs by input label, output label - // and destination state and then uniques identical arcs - sort(arcs_.begin(), arcs_.end(), comp_); - typename vector<A>::iterator unique_end = - unique(arcs_.begin(), arcs_.end(), equal_); - arcs_.resize(unique_end - arcs_.begin()); - } - - bool Done() const { return i_ >= arcs_.size(); } - const A &Value() const { return arcs_[i_]; } - void Next() { ++i_; } - - MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; } - - uint64 Properties(uint64 props) const { - return props & kArcSortProperties & kDeleteArcsProperties; - } - - private: - struct Compare { - bool operator()(const A& x, const A& y) { - if (x.ilabel < y.ilabel) return true; - if (x.ilabel > y.ilabel) return false; - if (x.olabel < y.olabel) return true; - if (x.olabel > y.olabel) return false; - if (x.nextstate < y.nextstate) return true; - if (x.nextstate > y.nextstate) return false; - return false; - } - }; - - struct Equal { - bool operator()(const A& x, const A& y) { - return (x.ilabel == y.ilabel && - x.olabel == y.olabel && - x.nextstate == y.nextstate && - x.weight == y.weight); - } - }; - - const Fst<A> &fst_; - Compare comp_; - Equal equal_; - vector<A> arcs_; - ssize_t i_; // current arc position - - void operator=(const ArcUniqueMapper<A> &); // disallow -}; - - -} // namespace fst - -#endif // FST_LIB_STATE_MAP_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/state-reachable.h b/kaldi_io/src/tools/openfst/include/fst/state-reachable.h deleted file mode 100644 index 6d0c971..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/state-reachable.h +++ /dev/null @@ -1,198 +0,0 @@ -// state-reachable.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Class to determine whether a given (final) state can be reached from some -// other given state. - -#ifndef FST_LIB_STATE_REACHABLE_H__ -#define FST_LIB_STATE_REACHABLE_H__ - -#include <vector> -using std::vector; - -#include <fst/dfs-visit.h> -#include <fst/fst.h> -#include <fst/interval-set.h> - - -namespace fst { - -// Computes the (final) states reachable from a given state in an FST. -// After this visitor has been called, a final state f can be reached -// from a state s iff (*isets)[s].Member(state2index[f]) is true, where -// (*isets[s]) is a set of half-open inteval of final state indices -// and state2index[f] maps from a final state to its index. -// -// If state2index is empty, it is filled-in with suitable indices. -// If it is non-empty, those indices are used; in this case, the -// final states must have out-degree 0. -template <class A, typename I = typename A::StateId> -class IntervalReachVisitor { - public: - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename IntervalSet<I>::Interval Interval; - - IntervalReachVisitor(const Fst<A> &fst, - vector< IntervalSet<I> > *isets, - vector<I> *state2index) - : fst_(fst), - isets_(isets), - state2index_(state2index), - index_(state2index->empty() ? 1 : -1), - error_(false) { - isets_->clear(); - } - - void InitVisit(const Fst<A> &fst) { error_ = false; } - - bool InitState(StateId s, StateId r) { - while (isets_->size() <= s) - isets_->push_back(IntervalSet<Label>()); - while (state2index_->size() <= s) - state2index_->push_back(-1); - - if (fst_.Final(s) != Weight::Zero()) { - // Create tree interval - vector<Interval> *intervals = (*isets_)[s].Intervals(); - if (index_ < 0) { // Use state2index_ map to set index - if (fst_.NumArcs(s) > 0) { - FSTERROR() << "IntervalReachVisitor: state2index map must be empty " - << "for this FST"; - error_ = true; - return false; - } - I index = (*state2index_)[s]; - if (index < 0) { - FSTERROR() << "IntervalReachVisitor: state2index map incomplete"; - error_ = true; - return false; - } - intervals->push_back(Interval(index, index + 1)); - } else { // Use pre-order index - intervals->push_back(Interval(index_, index_ + 1)); - (*state2index_)[s] = index_++; - } - } - return true; - } - - bool TreeArc(StateId s, const A &arc) { - return true; - } - - bool BackArc(StateId s, const A &arc) { - FSTERROR() << "IntervalReachVisitor: cyclic input"; - error_ = true; - return false; - } - - bool ForwardOrCrossArc(StateId s, const A &arc) { - // Non-tree interval - (*isets_)[s].Union((*isets_)[arc.nextstate]); - return true; - } - - void FinishState(StateId s, StateId p, const A *arc) { - if (index_ >= 0 && fst_.Final(s) != Weight::Zero()) { - vector<Interval> *intervals = (*isets_)[s].Intervals(); - (*intervals)[0].end = index_; // Update tree interval end - } - (*isets_)[s].Normalize(); - if (p != kNoStateId) - (*isets_)[p].Union((*isets_)[s]); // Propagate intervals to parent - } - - void FinishVisit() {} - - bool Error() const { return error_; } - - private: - const Fst<A> &fst_; - vector< IntervalSet<I> > *isets_; - vector<I> *state2index_; - I index_; - bool error_; -}; - - -// Tests reachability of final states from a given state. To test for -// reachability from a state s, first do SetState(s). Then a final -// state f can be reached from state s of FST iff Reach(f) is true. -template <class A, typename I = typename A::StateId> -class StateReachable { - public: - typedef A Arc; - typedef I Index; - typedef typename A::StateId StateId; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename IntervalSet<I>::Interval Interval; - - StateReachable(const Fst<A> &fst) - : error_(false) { - IntervalReachVisitor<Arc> reach_visitor(fst, &isets_, &state2index_); - DfsVisit(fst, &reach_visitor); - if (reach_visitor.Error()) error_ = true; - } - - StateReachable(const StateReachable<A> &reachable) { - FSTERROR() << "Copy constructor for state reachable class " - << "not yet implemented."; - error_ = true; - } - - // Set current state. - void SetState(StateId s) { s_ = s; } - - // Can reach this label from current state? - bool Reach(StateId s) { - if (s >= state2index_.size()) - return false; - - I i = state2index_[s]; - if (i < 0) { - FSTERROR() << "StateReachable: state non-final: " << s; - error_ = true; - return false; - } - return isets_[s_].Member(i); - } - - // Access to the state-to-index mapping. Unassigned states have index -1. - vector<I> &State2Index() { return state2index_; } - - // Access to the interval sets. These specify the reachability - // to the final states as intervals of the final state indices. - const vector< IntervalSet<I> > &IntervalSets() { return isets_; } - - bool Error() const { return error_; } - - private: - StateId s_; // Current state - vector< IntervalSet<I> > isets_; // Interval sets per state - vector<I> state2index_; // Finds index for a final state - bool error_; - - void operator=(const StateReachable<A> &); // Disallow -}; - -} // namespace fst - -#endif // FST_LIB_STATE_REACHABLE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/state-table.h b/kaldi_io/src/tools/openfst/include/fst/state-table.h deleted file mode 100644 index d8107a1..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/state-table.h +++ /dev/null @@ -1,481 +0,0 @@ -// state-table.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Classes for representing the mapping between state tuples and state Ids. - -#ifndef FST_LIB_STATE_TABLE_H__ -#define FST_LIB_STATE_TABLE_H__ - -#include <deque> -using std::deque; -#include <vector> -using std::vector; - -#include <fst/bi-table.h> -#include <fst/expanded-fst.h> - - -namespace fst { - -// STATE TABLES - these determine the bijective mapping between state -// tuples (e.g. in composition triples of two FST states and a -// composition filter state) and their corresponding state IDs. -// They are classes, templated on state tuples, of the form: -// -// template <class T> -// class StateTable { -// public: -// typedef typename T StateTuple; -// -// // Required constructors. -// StateTable(); -// -// // Lookup state ID by tuple. If it doesn't exist, then add it. -// StateId FindState(const StateTuple &); -// // Lookup state tuple by state ID. -// const StateTuple<StateId> &Tuple(StateId) const; -// // # of stored tuples. -// StateId Size() const; -// }; -// -// A state tuple has the form: -// -// template <class S> -// struct StateTuple { -// typedef typename S StateId; -// -// // Required constructors. -// StateTuple(); -// StateTuple(const StateTuple &); -// }; - - -// An implementation using a hash map for the tuple to state ID mapping. -// The state tuple T must have == defined. H is the hash function. -template <class T, class H> -class HashStateTable : public HashBiTable<typename T::StateId, T, H> { - public: - typedef T StateTuple; - typedef typename StateTuple::StateId StateId; - using HashBiTable<StateId, T, H>::FindId; - using HashBiTable<StateId, T, H>::FindEntry; - using HashBiTable<StateId, T, H>::Size; - - HashStateTable() : HashBiTable<StateId, T, H>() {} - - // Reserves space for table_size elements. - explicit HashStateTable(size_t table_size) - : HashBiTable<StateId, T, H>(table_size) {} - - StateId FindState(const StateTuple &tuple) { return FindId(tuple); } - const StateTuple &Tuple(StateId s) const { return FindEntry(s); } -}; - - -// An implementation using a hash map for the tuple to state ID mapping. -// The state tuple T must have == defined. H is the hash function. -template <class T, class H> -class CompactHashStateTable - : public CompactHashBiTable<typename T::StateId, T, H> { - public: - typedef T StateTuple; - typedef typename StateTuple::StateId StateId; - using CompactHashBiTable<StateId, T, H>::FindId; - using CompactHashBiTable<StateId, T, H>::FindEntry; - using CompactHashBiTable<StateId, T, H>::Size; - - CompactHashStateTable() : CompactHashBiTable<StateId, T, H>() {} - - // Reserves space for 'table_size' elements. - explicit CompactHashStateTable(size_t table_size) - : CompactHashBiTable<StateId, T, H>(table_size) {} - - StateId FindState(const StateTuple &tuple) { return FindId(tuple); } - const StateTuple &Tuple(StateId s) const { return FindEntry(s); } -}; - -// An implementation using a vector for the tuple to state mapping. -// It is passed a function object FP that should fingerprint tuples -// uniquely to an integer that can used as a vector index. Normally, -// VectorStateTable constructs the FP object. The user can instead -// pass in this object; in that case, VectorStateTable takes its -// ownership. -template <class T, class FP> -class VectorStateTable - : public VectorBiTable<typename T::StateId, T, FP> { - public: - typedef T StateTuple; - typedef typename StateTuple::StateId StateId; - using VectorBiTable<StateId, T, FP>::FindId; - using VectorBiTable<StateId, T, FP>::FindEntry; - using VectorBiTable<StateId, T, FP>::Size; - using VectorBiTable<StateId, T, FP>::Fingerprint; - - // Reserves space for 'table_size' elements. - explicit VectorStateTable(FP *fp = 0, size_t table_size = 0) - : VectorBiTable<StateId, T, FP>(fp, table_size) {} - - StateId FindState(const StateTuple &tuple) { return FindId(tuple); } - const StateTuple &Tuple(StateId s) const { return FindEntry(s); } -}; - - -// An implementation using a vector and a compact hash table. The -// selecting functor S returns true for tuples to be hashed in the -// vector. The fingerprinting functor FP returns a unique fingerprint -// for each tuple to be hashed in the vector (these need to be -// suitable for indexing in a vector). The hash functor H is used when -// hashing tuple into the compact hash table. -template <class T, class S, class FP, class H> -class VectorHashStateTable - : public VectorHashBiTable<typename T::StateId, T, S, FP, H> { - public: - typedef T StateTuple; - typedef typename StateTuple::StateId StateId; - using VectorHashBiTable<StateId, T, S, FP, H>::FindId; - using VectorHashBiTable<StateId, T, S, FP, H>::FindEntry; - using VectorHashBiTable<StateId, T, S, FP, H>::Size; - using VectorHashBiTable<StateId, T, S, FP, H>::Selector; - using VectorHashBiTable<StateId, T, S, FP, H>::Fingerprint; - using VectorHashBiTable<StateId, T, S, FP, H>::Hash; - - VectorHashStateTable(S *s, FP *fp, H *h, - size_t vector_size = 0, - size_t tuple_size = 0) - : VectorHashBiTable<StateId, T, S, FP, H>( - s, fp, h, vector_size, tuple_size) {} - - StateId FindState(const StateTuple &tuple) { return FindId(tuple); } - const StateTuple &Tuple(StateId s) const { return FindEntry(s); } -}; - - -// An implementation using a hash map for the tuple to state ID -// mapping. This version permits erasing of states. The state tuple T -// must have == defined and its default constructor must produce a -// tuple that will never be seen. F is the hash function. -template <class T, class F> -class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, F> { - public: - typedef T StateTuple; - typedef typename StateTuple::StateId StateId; - using ErasableBiTable<StateId, T, F>::FindId; - using ErasableBiTable<StateId, T, F>::FindEntry; - using ErasableBiTable<StateId, T, F>::Size; - using ErasableBiTable<StateId, T, F>::Erase; - - ErasableStateTable() : ErasableBiTable<StateId, T, F>() {} - StateId FindState(const StateTuple &tuple) { return FindId(tuple); } - const StateTuple &Tuple(StateId s) const { return FindEntry(s); } -}; - -// -// COMPOSITION STATE TUPLES AND TABLES -// -// The composition state table has the form: -// -// template <class A, class F> -// class ComposeStateTable { -// public: -// typedef A Arc; -// typedef F FilterState; -// typedef typename A::StateId StateId; -// typedef ComposeStateTuple<StateId> StateTuple; -// -// // Required constructors. Copy constructor does not copy state. -// ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2); -// ComposeStateTable(const ComposeStateTable<A, F> &table); -// // Lookup state ID by tuple. If it doesn't exist, then add it. -// StateId FindState(const StateTuple &); -// // Lookup state tuple by state ID. -// const StateTuple<StateId> &Tuple(StateId) const; -// // # of stored tuples. -// StateId Size() const; -// // Return true if error encountered -// bool Error() const; -// }; - -// Represents the composition state. -template <typename S, typename F> -struct ComposeStateTuple { - typedef S StateId; - typedef F FilterState; - - ComposeStateTuple() - : state_id1(kNoStateId), state_id2(kNoStateId), - filter_state(FilterState::NoState()) {} - - ComposeStateTuple(StateId s1, StateId s2, const FilterState &f) - : state_id1(s1), state_id2(s2), filter_state(f) {} - - StateId state_id1; // State Id on fst1 - StateId state_id2; // State Id on fst2 - FilterState filter_state; // State of composition filter -}; - -// Equality of composition state tuples. -template <typename S, typename F> -inline bool operator==(const ComposeStateTuple<S, F>& x, - const ComposeStateTuple<S, F>& y) { - if (&x == &y) - return true; - return x.state_id1 == y.state_id1 && - x.state_id2 == y.state_id2 && - x.filter_state == y.filter_state; -} - - -// Hashing of composition state tuples. -template <typename S, typename F> -class ComposeHash { - public: - size_t operator()(const ComposeStateTuple<S, F>& t) const { - return t.state_id1 + t.state_id2 * kPrime0 + - t.filter_state.Hash() * kPrime1; - } - private: - static const size_t kPrime0; - static const size_t kPrime1; -}; - -template <typename S, typename F> -const size_t ComposeHash<S, F>::kPrime0 = 7853; - -template <typename S, typename F> -const size_t ComposeHash<S, F>::kPrime1 = 7867; - - -// A HashStateTable over composition tuples. -template <typename A, - typename F, - typename H = - CompactHashStateTable<ComposeStateTuple<typename A::StateId, F>, - ComposeHash<typename A::StateId, F> > > -class GenericComposeStateTable : public H { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef F FilterState; - typedef ComposeStateTuple<StateId, F> StateTuple; - - GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {} - - // Reserves space for 'table_size' elements. - GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2, - size_t table_size) : H(table_size) {} - - bool Error() const { return false; } - - private: - void operator=(const GenericComposeStateTable<A, F> &table); // disallow -}; - - -// Fingerprint for general composition tuples. -template <typename S, typename F> -class ComposeFingerprint { - public: - typedef S StateId; - typedef F FilterState; - typedef ComposeStateTuple<S, F> StateTuple; - - // Required but suboptimal constructor. - ComposeFingerprint() : mult1_(8192), mult2_(8192) { - LOG(WARNING) << "TupleFingerprint: # of FST states should be provided."; - } - - // Constructor is provided the sizes of the input FSTs - ComposeFingerprint(StateId nstates1, StateId nstates2) - : mult1_(nstates1), mult2_(nstates1 * nstates2) { } - - size_t operator()(const StateTuple &tuple) { - return tuple.state_id1 + tuple.state_id2 * mult1_ + - tuple.filter_state.Hash() * mult2_; - } - - private: - ssize_t mult1_; - ssize_t mult2_; -}; - - -// Useful when the first composition state determines the tuple. -template <typename S, typename F> -class ComposeState1Fingerprint { - public: - typedef S StateId; - typedef F FilterState; - typedef ComposeStateTuple<S, F> StateTuple; - - size_t operator()(const StateTuple &tuple) { return tuple.state_id1; } -}; - - -// Useful when the second composition state determines the tuple. -template <typename S, typename F> -class ComposeState2Fingerprint { - public: - typedef S StateId; - typedef F FilterState; - typedef ComposeStateTuple<S, F> StateTuple; - - size_t operator()(const StateTuple &tuple) { return tuple.state_id2; } -}; - - -// A VectorStateTable over composition tuples. This can be used when -// the product of number of states in FST1 and FST2 (and the -// composition filter state hash) is manageable. If the FSTs are not -// expanded Fsts, they will first have their states counted. -template <typename A, typename F> -class ProductComposeStateTable : public -VectorStateTable<ComposeStateTuple<typename A::StateId, F>, - ComposeFingerprint<typename A::StateId, F> > { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef F FilterState; - typedef ComposeStateTuple<StateId, F> StateTuple; - typedef VectorStateTable<StateTuple, - ComposeFingerprint<StateId, F> > StateTable; - - // Reserves space for 'table_size' elements. - ProductComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2, - size_t table_size = 0) - : StateTable(new ComposeFingerprint<StateId, F>(CountStates(fst1), - CountStates(fst2)), - table_size) {} - - ProductComposeStateTable(const ProductComposeStateTable<A, F> &table) - : StateTable(new ComposeFingerprint<StateId, F>(table.Fingerprint())) {} - - bool Error() const { return false; } - - private: - void operator=(const ProductComposeStateTable<A, F> &table); // disallow -}; - -// A VectorStateTable over composition tuples. This can be used when -// FST1 is a string (satisfies kStringProperties) and FST2 is -// epsilon-free and deterministic. It should be used with a -// composition filter that creates at most one filter state per tuple -// under these conditions (e.g. SequenceComposeFilter or -// MatchComposeFilter). -template <typename A, typename F> -class StringDetComposeStateTable : public -VectorStateTable<ComposeStateTuple<typename A::StateId, F>, - ComposeState1Fingerprint<typename A::StateId, F> > { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef F FilterState; - typedef ComposeStateTuple<StateId, F> StateTuple; - typedef VectorStateTable<StateTuple, - ComposeState1Fingerprint<StateId, F> > StateTable; - - StringDetComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) - : error_(false) { - uint64 props1 = kString; - uint64 props2 = kIDeterministic | kNoIEpsilons; - if (fst1.Properties(props1, true) != props1 || - fst2.Properties(props2, true) != props2) { - FSTERROR() << "StringDetComposeStateTable: fst1 not a string or" - << " fst2 not input deterministic and epsilon-free"; - error_ = true; - } - } - - StringDetComposeStateTable(const StringDetComposeStateTable<A, F> &table) - : StateTable(table), error_(table.error_) {} - - bool Error() const { return error_; } - - private: - bool error_; - - void operator=(const StringDetComposeStateTable<A, F> &table); // disallow -}; - - -// A VectorStateTable over composition tuples. This can be used when -// FST2 is a string (satisfies kStringProperties) and FST1 is -// epsilon-free and deterministic. It should be used with a -// composition filter that creates at most one filter state per tuple -// under these conditions (e.g. SequenceComposeFilter or -// MatchComposeFilter). -template <typename A, typename F> -class DetStringComposeStateTable : public -VectorStateTable<ComposeStateTuple<typename A::StateId, F>, - ComposeState2Fingerprint<typename A::StateId, F> > { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef F FilterState; - typedef ComposeStateTuple<StateId, F> StateTuple; - typedef VectorStateTable<StateTuple, - ComposeState2Fingerprint<StateId, F> > StateTable; - - DetStringComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) - :error_(false) { - uint64 props1 = kODeterministic | kNoOEpsilons; - uint64 props2 = kString; - if (fst1.Properties(props1, true) != props1 || - fst2.Properties(props2, true) != props2) { - FSTERROR() << "StringDetComposeStateTable: fst2 not a string or" - << " fst1 not output deterministic and epsilon-free"; - error_ = true; - } - } - - DetStringComposeStateTable(const DetStringComposeStateTable<A, F> &table) - : StateTable(table), error_(table.error_) {} - - bool Error() const { return error_; } - - private: - bool error_; - - void operator=(const DetStringComposeStateTable<A, F> &table); // disallow -}; - - -// An ErasableStateTable over composition tuples. The Erase(StateId) method -// can be called if the user either is sure that composition will never return -// to that tuple or doesn't care that if it does, it is assigned a new -// state ID. -template <typename A, typename F> -class ErasableComposeStateTable : public -ErasableStateTable<ComposeStateTuple<typename A::StateId, F>, - ComposeHash<typename A::StateId, F> > { - public: - typedef A Arc; - typedef typename A::StateId StateId; - typedef F FilterState; - typedef ComposeStateTuple<StateId, F> StateTuple; - - ErasableComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {} - - bool Error() const { return false; } - - private: - void operator=(const ErasableComposeStateTable<A, F> &table); // disallow -}; - -} // namespace fst - -#endif // FST_LIB_STATE_TABLE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/statesort.h b/kaldi_io/src/tools/openfst/include/fst/statesort.h deleted file mode 100644 index 6f827f4..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/statesort.h +++ /dev/null @@ -1,97 +0,0 @@ -// statesort.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Function to sort states of an Fst. - -#ifndef FST_LIB_STATESORT_H__ -#define FST_LIB_STATESORT_H__ - -#include <vector> -using std::vector; -#include <algorithm> - -#include <fst/mutable-fst.h> - - -namespace fst { - -// Sorts the input states of an FST, modifying it. ORDER[i] gives the -// the state Id after sorting that corresponds to state Id i before -// sorting. ORDER must be a permutation of FST's states ID sequence: -// (0, 1, 2, ..., fst->NumStates() - 1). -template <class Arc> -void StateSort(MutableFst<Arc> *fst, - const vector<typename Arc::StateId> &order) { - typedef typename Arc::StateId StateId; - typedef typename Arc::Weight Weight; - - if (order.size() != fst->NumStates()) { - FSTERROR() << "StateSort: bad order vector size: " << order.size(); - fst->SetProperties(kError, kError); - return; - } - - if (fst->Start() == kNoStateId) - return; - - uint64 props = fst->Properties(kStateSortProperties, false); - - vector<bool> done(order.size(), false); - vector<Arc> arcsa, arcsb; - vector<Arc> *arcs1 = &arcsa, *arcs2 = &arcsb; - - fst->SetStart(order[fst->Start()]); - - for (StateIterator< MutableFst<Arc> > siter(*fst); - !siter.Done(); - siter.Next()) { - StateId s1 = siter.Value(), s2; - if (done[s1]) - continue; - Weight final1 = fst->Final(s1), final2 = Weight::Zero(); - arcs1->clear(); - for (ArcIterator< MutableFst<Arc> > aiter(*fst, s1); - !aiter.Done(); - aiter.Next()) - arcs1->push_back(aiter.Value()); - for (; !done[s1]; s1 = s2, final1 = final2, swap(arcs1, arcs2)) { - s2 = order[s1]; - if (!done[s2]) { - final2 = fst->Final(s2); - arcs2->clear(); - for (ArcIterator< MutableFst<Arc> > aiter(*fst, s2); - !aiter.Done(); - aiter.Next()) - arcs2->push_back(aiter.Value()); - } - fst->SetFinal(s2, final1); - fst->DeleteArcs(s2); - for (size_t i = 0; i < arcs1->size(); ++i) { - Arc arc = (*arcs1)[i]; - arc.nextstate = order[arc.nextstate]; - fst->AddArc(s2, arc); - } - done[s1] = true; - } - } - fst->SetProperties(props, kFstProperties); -} - -} // namespace fst - -#endif // FST_LIB_STATESORT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/string-weight.h b/kaldi_io/src/tools/openfst/include/fst/string-weight.h deleted file mode 100644 index 1beeb33..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/string-weight.h +++ /dev/null @@ -1,560 +0,0 @@ -// string-weight.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// String weight set and associated semiring operation definitions. - -#ifndef FST_LIB_STRING_WEIGHT_H__ -#define FST_LIB_STRING_WEIGHT_H__ - -#include <list> -#include <string> - -#include <fst/product-weight.h> -#include <fst/weight.h> - -namespace fst { - -const int kStringInfinity = -1; // Label for the infinite string -const int kStringBad = -2; // Label for a non-string -const char kStringSeparator = '_'; // Label separator in strings - -// Determines whether to use left or right string semiring. Includes -// restricted versions that signal an error if proper prefixes -// (suffixes) would otherwise be returned by Plus, useful with various -// algorithms that require functional transducer input with the -// string semirings. -enum StringType { STRING_LEFT = 0, STRING_RIGHT = 1 , - STRING_LEFT_RESTRICT = 2, STRING_RIGHT_RESTRICT }; - -#define REVERSE_STRING_TYPE(S) \ - ((S) == STRING_LEFT ? STRING_RIGHT : \ - ((S) == STRING_RIGHT ? STRING_LEFT : \ - ((S) == STRING_LEFT_RESTRICT ? STRING_RIGHT_RESTRICT : \ - STRING_LEFT_RESTRICT))) - -template <typename L, StringType S = STRING_LEFT> -class StringWeight; - -template <typename L, StringType S = STRING_LEFT> -class StringWeightIterator; - -template <typename L, StringType S = STRING_LEFT> -class StringWeightReverseIterator; - -template <typename L, StringType S> -bool operator==(const StringWeight<L, S> &, const StringWeight<L, S> &); - - -// String semiring: (longest_common_prefix/suffix, ., Infinity, Epsilon) -template <typename L, StringType S> -class StringWeight { - public: - typedef L Label; - typedef StringWeight<L, REVERSE_STRING_TYPE(S)> ReverseWeight; - - friend class StringWeightIterator<L, S>; - friend class StringWeightReverseIterator<L, S>; - friend bool operator==<>(const StringWeight<L, S> &, - const StringWeight<L, S> &); - - StringWeight() { Init(); } - - template <typename Iter> - StringWeight(const Iter &begin, const Iter &end) { - Init(); - for (Iter iter = begin; iter != end; ++iter) - PushBack(*iter); - } - - explicit StringWeight(L l) { Init(); PushBack(l); } - - static const StringWeight<L, S> &Zero() { - static const StringWeight<L, S> zero(kStringInfinity); - return zero; - } - - static const StringWeight<L, S> &One() { - static const StringWeight<L, S> one; - return one; - } - - static const StringWeight<L, S> &NoWeight() { - static const StringWeight<L, S> no_weight(kStringBad); - return no_weight; - } - - static const string &Type() { - static const string type = - S == STRING_LEFT ? "string" : - (S == STRING_RIGHT ? "right_string" : - (S == STRING_LEFT_RESTRICT ? "restricted_string" : - "right_restricted_string")); - return type; - } - - bool Member() const; - - istream &Read(istream &strm); - - ostream &Write(ostream &strm) const; - - size_t Hash() const; - - StringWeight<L, S> Quantize(float delta = kDelta) const { - return *this; - } - - ReverseWeight Reverse() const; - - static uint64 Properties() { - return (S == STRING_LEFT || S == STRING_LEFT_RESTRICT ? - kLeftSemiring : kRightSemiring) | kIdempotent; - } - - // NB: This needs to be uncommented only if default fails for this impl. - // StringWeight<L, S> &operator=(const StringWeight<L, S> &w); - - // These operations combined with the StringWeightIterator and - // StringWeightReverseIterator provide the access and mutation of - // the string internal elements. - - // Common initializer among constructors. - void Init() { first_ = 0; } - - // Clear existing StringWeight. - void Clear() { first_ = 0; rest_.clear(); } - - size_t Size() const { return first_ ? rest_.size() + 1 : 0; } - - void PushFront(L l) { - if (first_) - rest_.push_front(first_); - first_ = l; - } - - void PushBack(L l) { - if (!first_) - first_ = l; - else - rest_.push_back(l); - } - - private: - L first_; // first label in string (0 if empty) - list<L> rest_; // remaining labels in string -}; - - -// Traverses string in forward direction. -template <typename L, StringType S> -class StringWeightIterator { - public: - explicit StringWeightIterator(const StringWeight<L, S>& w) - : first_(w.first_), rest_(w.rest_), init_(true), - iter_(rest_.begin()) {} - - bool Done() const { - if (init_) return first_ == 0; - else return iter_ == rest_.end(); - } - - const L& Value() const { return init_ ? first_ : *iter_; } - - void Next() { - if (init_) init_ = false; - else ++iter_; - } - - void Reset() { - init_ = true; - iter_ = rest_.begin(); - } - - private: - const L &first_; - const list<L> &rest_; - bool init_; // in the initialized state? - typename list<L>::const_iterator iter_; - - DISALLOW_COPY_AND_ASSIGN(StringWeightIterator); -}; - - -// Traverses string in backward direction. -template <typename L, StringType S> -class StringWeightReverseIterator { - public: - explicit StringWeightReverseIterator(const StringWeight<L, S>& w) - : first_(w.first_), rest_(w.rest_), fin_(first_ == 0), - iter_(rest_.rbegin()) {} - - bool Done() const { return fin_; } - - const L& Value() const { return iter_ == rest_.rend() ? first_ : *iter_; } - - void Next() { - if (iter_ == rest_.rend()) fin_ = true; - else ++iter_; - } - - void Reset() { - fin_ = false; - iter_ = rest_.rbegin(); - } - - private: - const L &first_; - const list<L> &rest_; - bool fin_; // in the final state? - typename list<L>::const_reverse_iterator iter_; - - DISALLOW_COPY_AND_ASSIGN(StringWeightReverseIterator); -}; - - -// StringWeight member functions follow that require -// StringWeightIterator or StringWeightReverseIterator. - -template <typename L, StringType S> -inline istream &StringWeight<L, S>::Read(istream &strm) { - Clear(); - int32 size; - ReadType(strm, &size); - for (int i = 0; i < size; ++i) { - L label; - ReadType(strm, &label); - PushBack(label); - } - return strm; -} - -template <typename L, StringType S> -inline ostream &StringWeight<L, S>::Write(ostream &strm) const { - int32 size = Size(); - WriteType(strm, size); - for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next()) { - L label = iter.Value(); - WriteType(strm, label); - } - return strm; -} - -template <typename L, StringType S> -inline bool StringWeight<L, S>::Member() const { - if (Size() != 1) - return true; - StringWeightIterator<L, S> iter(*this); - return iter.Value() != kStringBad; -} - -template <typename L, StringType S> -inline typename StringWeight<L, S>::ReverseWeight -StringWeight<L, S>::Reverse() const { - ReverseWeight rw; - for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next()) - rw.PushFront(iter.Value()); - return rw; -} - -template <typename L, StringType S> -inline size_t StringWeight<L, S>::Hash() const { - size_t h = 0; - for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next()) - h ^= h<<1 ^ iter.Value(); - return h; -} - -// NB: This needs to be uncommented only if default fails for this the impl. -// -// template <typename L, StringType S> -// inline StringWeight<L, S> -// &StringWeight<L, S>::operator=(const StringWeight<L, S> &w) { -// if (this != &w) { -// Clear(); -// for (StringWeightIterator<L, S> iter(w); !iter.Done(); iter.Next()) -// PushBack(iter.Value()); -// } -// return *this; -// } - -template <typename L, StringType S> -inline bool operator==(const StringWeight<L, S> &w1, - const StringWeight<L, S> &w2) { - if (w1.Size() != w2.Size()) - return false; - - StringWeightIterator<L, S> iter1(w1); - StringWeightIterator<L, S> iter2(w2); - - for (; !iter1.Done() ; iter1.Next(), iter2.Next()) - if (iter1.Value() != iter2.Value()) - return false; - - return true; -} - -template <typename L, StringType S> -inline bool operator!=(const StringWeight<L, S> &w1, - const StringWeight<L, S> &w2) { - return !(w1 == w2); -} - -template <typename L, StringType S> -inline bool ApproxEqual(const StringWeight<L, S> &w1, - const StringWeight<L, S> &w2, - float delta = kDelta) { - return w1 == w2; -} - -template <typename L, StringType S> -inline ostream &operator<<(ostream &strm, const StringWeight<L, S> &w) { - StringWeightIterator<L, S> iter(w); - if (iter.Done()) - return strm << "Epsilon"; - else if (iter.Value() == kStringInfinity) - return strm << "Infinity"; - else if (iter.Value() == kStringBad) - return strm << "BadString"; - else - for (size_t i = 0; !iter.Done(); ++i, iter.Next()) { - if (i > 0) - strm << kStringSeparator; - strm << iter.Value(); - } - return strm; -} - -template <typename L, StringType S> -inline istream &operator>>(istream &strm, StringWeight<L, S> &w) { - string s; - strm >> s; - if (s == "Infinity") { - w = StringWeight<L, S>::Zero(); - } else if (s == "Epsilon") { - w = StringWeight<L, S>::One(); - } else { - w.Clear(); - char *p = 0; - for (const char *cs = s.c_str(); !p || *p != '\0'; cs = p + 1) { - int l = strtoll(cs, &p, 10); - if (p == cs || (*p != 0 && *p != kStringSeparator)) { - strm.clear(std::ios::badbit); - break; - } - w.PushBack(l); - } - } - return strm; -} - - -// Default is for the restricted left and right semirings. String -// equality is required (for non-Zero() input. This restriction -// is used in e.g. Determinize to ensure functional input. -template <typename L, StringType S> inline StringWeight<L, S> -Plus(const StringWeight<L, S> &w1, - const StringWeight<L, S> &w2) { - if (!w1.Member() || !w2.Member()) - return StringWeight<L, S>::NoWeight(); - if (w1 == StringWeight<L, S>::Zero()) - return w2; - if (w2 == StringWeight<L, S>::Zero()) - return w1; - - if (w1 != w2) { - FSTERROR() << "StringWeight::Plus: unequal arguments " - << "(non-functional FST?)" - << " w1 = " << w1 - << " w2 = " << w2; - return StringWeight<L, S>::NoWeight(); - } - - return w1; -} - - -// Longest common prefix for left string semiring. -template <typename L> inline StringWeight<L, STRING_LEFT> -Plus(const StringWeight<L, STRING_LEFT> &w1, - const StringWeight<L, STRING_LEFT> &w2) { - if (!w1.Member() || !w2.Member()) - return StringWeight<L, STRING_LEFT>::NoWeight(); - if (w1 == StringWeight<L, STRING_LEFT>::Zero()) - return w2; - if (w2 == StringWeight<L, STRING_LEFT>::Zero()) - return w1; - - StringWeight<L, STRING_LEFT> sum; - StringWeightIterator<L, STRING_LEFT> iter1(w1); - StringWeightIterator<L, STRING_LEFT> iter2(w2); - for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value(); - iter1.Next(), iter2.Next()) - sum.PushBack(iter1.Value()); - return sum; -} - - -// Longest common suffix for right string semiring. -template <typename L> inline StringWeight<L, STRING_RIGHT> -Plus(const StringWeight<L, STRING_RIGHT> &w1, - const StringWeight<L, STRING_RIGHT> &w2) { - if (!w1.Member() || !w2.Member()) - return StringWeight<L, STRING_RIGHT>::NoWeight(); - if (w1 == StringWeight<L, STRING_RIGHT>::Zero()) - return w2; - if (w2 == StringWeight<L, STRING_RIGHT>::Zero()) - return w1; - - StringWeight<L, STRING_RIGHT> sum; - StringWeightReverseIterator<L, STRING_RIGHT> iter1(w1); - StringWeightReverseIterator<L, STRING_RIGHT> iter2(w2); - for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value(); - iter1.Next(), iter2.Next()) - sum.PushFront(iter1.Value()); - return sum; -} - - -template <typename L, StringType S> -inline StringWeight<L, S> Times(const StringWeight<L, S> &w1, - const StringWeight<L, S> &w2) { - if (!w1.Member() || !w2.Member()) - return StringWeight<L, S>::NoWeight(); - if (w1 == StringWeight<L, S>::Zero() || w2 == StringWeight<L, S>::Zero()) - return StringWeight<L, S>::Zero(); - - StringWeight<L, S> prod(w1); - for (StringWeightIterator<L, S> iter(w2); !iter.Done(); iter.Next()) - prod.PushBack(iter.Value()); - - return prod; -} - - -// Default is for left division in the left string and the -// left restricted string semirings. -template <typename L, StringType S> inline StringWeight<L, S> -Divide(const StringWeight<L, S> &w1, - const StringWeight<L, S> &w2, - DivideType typ) { - - if (typ != DIVIDE_LEFT) { - FSTERROR() << "StringWeight::Divide: only left division is defined " - << "for the " << StringWeight<L, S>::Type() << " semiring"; - return StringWeight<L, S>::NoWeight(); - } - - if (!w1.Member() || !w2.Member()) - return StringWeight<L, S>::NoWeight(); - - if (w2 == StringWeight<L, S>::Zero()) - return StringWeight<L, S>(kStringBad); - else if (w1 == StringWeight<L, S>::Zero()) - return StringWeight<L, S>::Zero(); - - StringWeight<L, S> div; - StringWeightIterator<L, S> iter(w1); - for (int i = 0; !iter.Done(); iter.Next(), ++i) { - if (i >= w2.Size()) - div.PushBack(iter.Value()); - } - return div; -} - - -// Right division in the right string semiring. -template <typename L> inline StringWeight<L, STRING_RIGHT> -Divide(const StringWeight<L, STRING_RIGHT> &w1, - const StringWeight<L, STRING_RIGHT> &w2, - DivideType typ) { - - if (typ != DIVIDE_RIGHT) { - FSTERROR() << "StringWeight::Divide: only right division is defined " - << "for the right string semiring"; - return StringWeight<L, STRING_RIGHT>::NoWeight(); - } - - if (!w1.Member() || !w2.Member()) - return StringWeight<L, STRING_RIGHT>::NoWeight(); - - if (w2 == StringWeight<L, STRING_RIGHT>::Zero()) - return StringWeight<L, STRING_RIGHT>(kStringBad); - else if (w1 == StringWeight<L, STRING_RIGHT>::Zero()) - return StringWeight<L, STRING_RIGHT>::Zero(); - - StringWeight<L, STRING_RIGHT> div; - StringWeightReverseIterator<L, STRING_RIGHT> iter(w1); - for (int i = 0; !iter.Done(); iter.Next(), ++i) { - if (i >= w2.Size()) - div.PushFront(iter.Value()); - } - return div; -} - - -// Right division in the right restricted string semiring. -template <typename L> inline StringWeight<L, STRING_RIGHT_RESTRICT> -Divide(const StringWeight<L, STRING_RIGHT_RESTRICT> &w1, - const StringWeight<L, STRING_RIGHT_RESTRICT> &w2, - DivideType typ) { - - if (typ != DIVIDE_RIGHT) { - FSTERROR() << "StringWeight::Divide: only right division is defined " - << "for the right restricted string semiring"; - return StringWeight<L, STRING_RIGHT_RESTRICT>::NoWeight(); - } - - if (!w1.Member() || !w2.Member()) - return StringWeight<L, STRING_RIGHT_RESTRICT>::NoWeight(); - - if (w2 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero()) - return StringWeight<L, STRING_RIGHT_RESTRICT>(kStringBad); - else if (w1 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero()) - return StringWeight<L, STRING_RIGHT_RESTRICT>::Zero(); - - StringWeight<L, STRING_RIGHT_RESTRICT> div; - StringWeightReverseIterator<L, STRING_RIGHT_RESTRICT> iter(w1); - for (int i = 0; !iter.Done(); iter.Next(), ++i) { - if (i >= w2.Size()) - div.PushFront(iter.Value()); - } - return div; -} - - -// Product of string weight and an arbitray weight. -template <class L, class W, StringType S = STRING_LEFT> -struct GallicWeight : public ProductWeight<StringWeight<L, S>, W> { - typedef GallicWeight<L, typename W::ReverseWeight, REVERSE_STRING_TYPE(S)> - ReverseWeight; - - GallicWeight() {} - - GallicWeight(StringWeight<L, S> w1, W w2) - : ProductWeight<StringWeight<L, S>, W>(w1, w2) {} - - explicit GallicWeight(const string &s, int *nread = 0) - : ProductWeight<StringWeight<L, S>, W>(s, nread) {} - - GallicWeight(const ProductWeight<StringWeight<L, S>, W> &w) - : ProductWeight<StringWeight<L, S>, W>(w) {} -}; - -} // namespace fst - -#endif // FST_LIB_STRING_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/string.h b/kaldi_io/src/tools/openfst/include/fst/string.h deleted file mode 100644 index 9eaf7a3..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/string.h +++ /dev/null @@ -1,271 +0,0 @@ - -// string.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// Utilities to convert strings into FSTs. -// - -#ifndef FST_LIB_STRING_H_ -#define FST_LIB_STRING_H_ - -#include <fst/compact-fst.h> -#include <fst/icu.h> -#include <fst/mutable-fst.h> - -DECLARE_string(fst_field_separator); - -namespace fst { - -// Functor compiling a string in an FST -template <class A> -class StringCompiler { - public: - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - - enum TokenType { SYMBOL = 1, BYTE = 2, UTF8 = 3 }; - - StringCompiler(TokenType type, const SymbolTable *syms = 0, - Label unknown_label = kNoLabel, - bool allow_negative = false) - : token_type_(type), syms_(syms), unknown_label_(unknown_label), - allow_negative_(allow_negative) {} - - // Compile string 's' into FST 'fst'. - template <class F> - bool operator()(const string &s, F *fst) const { - vector<Label> labels; - if (!ConvertStringToLabels(s, &labels)) - return false; - Compile(labels, fst); - return true; - } - - template <class F> - bool operator()(const string &s, F *fst, Weight w) const { - vector<Label> labels; - if (!ConvertStringToLabels(s, &labels)) - return false; - Compile(labels, fst, w); - return true; - } - - private: - bool ConvertStringToLabels(const string &str, vector<Label> *labels) const { - labels->clear(); - if (token_type_ == BYTE) { - for (size_t i = 0; i < str.size(); ++i) - labels->push_back(static_cast<unsigned char>(str[i])); - } else if (token_type_ == UTF8) { - return UTF8StringToLabels(str, labels); - } else { - char *c_str = new char[str.size() + 1]; - str.copy(c_str, str.size()); - c_str[str.size()] = 0; - vector<char *> vec; - string separator = "\n" + FLAGS_fst_field_separator; - SplitToVector(c_str, separator.c_str(), &vec, true); - for (size_t i = 0; i < vec.size(); ++i) { - Label label; - if (!ConvertSymbolToLabel(vec[i], &label)) - return false; - labels->push_back(label); - } - delete[] c_str; - } - return true; - } - - void Compile(const vector<Label> &labels, MutableFst<A> *fst, - const Weight &weight = Weight::One()) const { - fst->DeleteStates(); - while (fst->NumStates() <= labels.size()) - fst->AddState(); - for (size_t i = 0; i < labels.size(); ++i) - fst->AddArc(i, Arc(labels[i], labels[i], Weight::One(), i + 1)); - fst->SetStart(0); - fst->SetFinal(labels.size(), weight); - } - - template <class Unsigned> - void Compile(const vector<Label> &labels, - CompactFst<A, StringCompactor<A>, Unsigned> *fst) const { - fst->SetCompactElements(labels.begin(), labels.end()); - } - - template <class Unsigned> - void Compile(const vector<Label> &labels, - CompactFst<A, WeightedStringCompactor<A>, Unsigned> *fst, - const Weight &weight = Weight::One()) const { - vector<pair<Label, Weight> > compacts; - compacts.reserve(labels.size()); - for (size_t i = 0; i < labels.size(); ++i) - compacts.push_back(make_pair(labels[i], Weight::One())); - compacts.back().second = weight; - fst->SetCompactElements(compacts.begin(), compacts.end()); - } - - bool ConvertSymbolToLabel(const char *s, Label* output) const { - int64 n; - if (syms_) { - n = syms_->Find(s); - if ((n == -1) && (unknown_label_ != kNoLabel)) - n = unknown_label_; - if (n == -1 || (!allow_negative_ && n < 0)) { - VLOG(1) << "StringCompiler::ConvertSymbolToLabel: Symbol \"" << s - << "\" is not mapped to any integer label, symbol table = " - << syms_->Name(); - return false; - } - } else { - char *p; - n = strtoll(s, &p, 10); - if (p < s + strlen(s) || (!allow_negative_ && n < 0)) { - VLOG(1) << "StringCompiler::ConvertSymbolToLabel: Bad label integer " - << "= \"" << s << "\""; - return false; - } - } - *output = n; - return true; - } - - TokenType token_type_; // Token type: symbol, byte or utf8 encoded - const SymbolTable *syms_; // Symbol table used when token type is symbol - Label unknown_label_; // Label for token missing from symbol table - bool allow_negative_; // Negative labels allowed? - - DISALLOW_COPY_AND_ASSIGN(StringCompiler); -}; - -// Functor to print a string FST as a string. -template <class A> -class StringPrinter { - public: - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - enum TokenType { SYMBOL = 1, BYTE = 2, UTF8 = 3 }; - - StringPrinter(TokenType token_type, - const SymbolTable *syms = 0) - : token_type_(token_type), syms_(syms) {} - - // Convert the FST 'fst' into the string 'output' - bool operator()(const Fst<A> &fst, string *output) { - bool is_a_string = FstToLabels(fst); - if (!is_a_string) { - VLOG(1) << "StringPrinter::operator(): Fst is not a string."; - return false; - } - - output->clear(); - - if (token_type_ == SYMBOL) { - stringstream sstrm; - for (size_t i = 0; i < labels_.size(); ++i) { - if (i) - sstrm << *(FLAGS_fst_field_separator.rbegin()); - if (!PrintLabel(labels_[i], sstrm)) - return false; - } - *output = sstrm.str(); - } else if (token_type_ == BYTE) { - output->reserve(labels_.size()); - for (size_t i = 0; i < labels_.size(); ++i) { - output->push_back(labels_[i]); - } - } else if (token_type_ == UTF8) { - return LabelsToUTF8String(labels_, output); - } else { - VLOG(1) << "StringPrinter::operator(): Unknown token type: " - << token_type_; - return false; - } - return true; - } - - private: - bool FstToLabels(const Fst<A> &fst) { - labels_.clear(); - - StateId s = fst.Start(); - if (s == kNoStateId) { - VLOG(2) << "StringPrinter::FstToLabels: Invalid starting state for " - << "string fst."; - return false; - } - - while (fst.Final(s) == Weight::Zero()) { - ArcIterator<Fst<A> > aiter(fst, s); - if (aiter.Done()) { - VLOG(2) << "StringPrinter::FstToLabels: String fst traversal does " - << "not reach final state."; - return false; - } - - const A& arc = aiter.Value(); - labels_.push_back(arc.olabel); - - s = arc.nextstate; - if (s == kNoStateId) { - VLOG(2) << "StringPrinter::FstToLabels: Transition to invalid " - << "state."; - return false; - } - - aiter.Next(); - if (!aiter.Done()) { - VLOG(2) << "StringPrinter::FstToLabels: State with multiple " - << "outgoing arcs found."; - return false; - } - } - - return true; - } - - bool PrintLabel(Label lab, ostream& ostrm) { - if (syms_) { - string symbol = syms_->Find(lab); - if (symbol == "") { - VLOG(2) << "StringPrinter::PrintLabel: Integer " << lab << " is not " - << "mapped to any textual symbol, symbol table = " - << syms_->Name(); - return false; - } - ostrm << symbol; - } else { - ostrm << lab; - } - return true; - } - - TokenType token_type_; // Token type: symbol, byte or utf8 encoded - const SymbolTable *syms_; // Symbol table used when token type is symbol - vector<Label> labels_; // Input FST labels. - - DISALLOW_COPY_AND_ASSIGN(StringPrinter); -}; - -} // namespace fst - -#endif // FST_LIB_STRING_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/symbol-table-ops.h b/kaldi_io/src/tools/openfst/include/fst/symbol-table-ops.h deleted file mode 100644 index 1f327da..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/symbol-table-ops.h +++ /dev/null @@ -1,91 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Jeffrey Sorensen) - -#ifndef FST_LIB_SYMBOL_TABLE_OPS_H_ -#define FST_LIB_SYMBOL_TABLE_OPS_H_ - -#include <vector> -using std::vector; -#include <string> -#include <tr1/unordered_set> -using std::tr1::unordered_set; -using std::tr1::unordered_multiset; - - -#include <fst/fst.h> -#include <fst/symbol-table.h> - - -namespace fst { - -// Returns a minimal symbol table containing only symbols referenced by the -// passed fst. Symbols preserve their original numbering, so fst does not -// require relabeling. -template<class Arc> -SymbolTable *PruneSymbolTable(const Fst<Arc> &fst, const SymbolTable &syms, - bool input) { - unordered_set<typename Arc::Label> seen; - seen.insert(0); // Always keep epslion - StateIterator<Fst<Arc> > siter(fst); - for (; !siter.Done(); siter.Next()) { - ArcIterator<Fst<Arc> > aiter(fst, siter.Value()); - for (; !aiter.Done(); aiter.Next()) { - typename Arc::Label sym = (input) ? aiter.Value().ilabel : - aiter.Value().olabel; - seen.insert(sym); - } - } - SymbolTable *pruned = new SymbolTable(syms.Name() + "_pruned"); - for (SymbolTableIterator stiter(syms); !stiter.Done(); stiter.Next()) { - typename Arc::Label label = stiter.Value(); - if (seen.find(label) != seen.end()) { - pruned->AddSymbol(stiter.Symbol(), stiter.Value()); - } - } - return pruned; -} - -// Relabels a symbol table to make it a contiguous mapping. -SymbolTable *CompactSymbolTable(const SymbolTable &syms); - -// Merges two SymbolTables, all symbols from left will be merged into right -// with the same ids. Symbols in right that have conflicting ids with those -// in left will be assigned to value assigned from the left SymbolTable. -// The returned symbol table will never modify symbol assignments from the left -// side, but may do so on the right. If right_relabel_output is non-NULL, it -// will be assigned true if the symbols from the right table needed to be -// reassigned. -// A potential use case is to Compose two Fst's that have different symbol -// tables. You can reconcile them in the following way: -// Fst<Arc> a, b; -// bool relabel; -// SymbolTable *bnew = MergeSymbolTable(a.OutputSymbols(), -// b.InputSymbols(), &relabel); -// if (relabel) { -// Relabel(b, bnew, NULL); -// } -// b.SetInputSymbols(bnew); -// delete bnew; -SymbolTable *MergeSymbolTable(const SymbolTable &left, const SymbolTable &right, - bool *right_relabel_output = 0); - -// Read the symbol table from any Fst::Read()able file, without loading the -// corresponding Fst. Returns NULL if the Fst does not contain a symbol table -// or the symbol table cannot be read. -SymbolTable *FstReadSymbols(const string &filename, bool input); - -} // namespace fst -#endif // FST_LIB_SYMBOL_TABLE_OPS_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/symbol-table.h b/kaldi_io/src/tools/openfst/include/fst/symbol-table.h deleted file mode 100644 index 6eb6c2d..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/symbol-table.h +++ /dev/null @@ -1,537 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// All Rights Reserved. -// -// Author : Johan Schalkwyk -// -// \file -// Classes to provide symbol-to-integer and integer-to-symbol mappings. - -#ifndef FST_LIB_SYMBOL_TABLE_H__ -#define FST_LIB_SYMBOL_TABLE_H__ - -#include <cstring> -#include <string> -#include <utility> -using std::pair; using std::make_pair; -#include <vector> -using std::vector; - - -#include <fst/compat.h> -#include <iostream> -#include <fstream> -#include <sstream> - - -#include <map> - -DECLARE_bool(fst_compat_symbols); - -namespace fst { - -// WARNING: Reading via symbol table read options should -// not be used. This is a temporary work around for -// reading symbol ranges of previously stored symbol sets. -struct SymbolTableReadOptions { - SymbolTableReadOptions() { } - - SymbolTableReadOptions(vector<pair<int64, int64> > string_hash_ranges_, - const string& source_) - : string_hash_ranges(string_hash_ranges_), - source(source_) { } - - vector<pair<int64, int64> > string_hash_ranges; - string source; -}; - -struct SymbolTableTextOptions { - SymbolTableTextOptions(); - - bool allow_negative; - string fst_field_separator; -}; - -class SymbolTableImpl { - public: - SymbolTableImpl(const string &name) - : name_(name), - available_key_(0), - dense_key_limit_(0), - check_sum_finalized_(false) {} - - explicit SymbolTableImpl(const SymbolTableImpl& impl) - : name_(impl.name_), - available_key_(0), - dense_key_limit_(0), - check_sum_finalized_(false) { - for (size_t i = 0; i < impl.symbols_.size(); ++i) { - AddSymbol(impl.symbols_[i], impl.Find(impl.symbols_[i])); - } - } - - ~SymbolTableImpl() { - for (size_t i = 0; i < symbols_.size(); ++i) - delete[] symbols_[i]; - } - - // TODO(johans): Add flag to specify whether the symbol - // should be indexed as string or int or both. - int64 AddSymbol(const string& symbol, int64 key); - - int64 AddSymbol(const string& symbol) { - int64 key = Find(symbol); - return (key == -1) ? AddSymbol(symbol, available_key_++) : key; - } - - static SymbolTableImpl* ReadText( - istream &strm, const string &name, - const SymbolTableTextOptions &opts = SymbolTableTextOptions()); - - static SymbolTableImpl* Read(istream &strm, - const SymbolTableReadOptions& opts); - - bool Write(ostream &strm) const; - - // - // Return the string associated with the key. If the key is out of - // range (<0, >max), return an empty string. - string Find(int64 key) const { - if (key >=0 && key < dense_key_limit_) - return string(symbols_[key]); - - map<int64, const char*>::const_iterator it = - key_map_.find(key); - if (it == key_map_.end()) { - return ""; - } - return string(it->second); - } - - // - // Return the key associated with the symbol. If the symbol - // does not exists, return SymbolTable::kNoSymbol. - int64 Find(const string& symbol) const { - return Find(symbol.c_str()); - } - - // - // Return the key associated with the symbol. If the symbol - // does not exists, return SymbolTable::kNoSymbol. - int64 Find(const char* symbol) const { - map<const char *, int64, StrCmp>::const_iterator it = - symbol_map_.find(symbol); - if (it == symbol_map_.end()) { - return -1; - } - return it->second; - } - - int64 GetNthKey(ssize_t pos) const { - if ((pos < 0) || (pos >= symbols_.size())) return -1; - else return Find(symbols_[pos]); - } - - const string& Name() const { return name_; } - - int IncrRefCount() const { - return ref_count_.Incr(); - } - int DecrRefCount() const { - return ref_count_.Decr(); - } - int RefCount() const { - return ref_count_.count(); - } - - string CheckSum() const { - MaybeRecomputeCheckSum(); - return check_sum_string_; - } - - string LabeledCheckSum() const { - MaybeRecomputeCheckSum(); - return labeled_check_sum_string_; - } - - int64 AvailableKey() const { - return available_key_; - } - - size_t NumSymbols() const { - return symbols_.size(); - } - - private: - // Recomputes the checksums (both of them) if we've had changes since the last - // computation (i.e., if check_sum_finalized_ is false). - // Takes ~2.5 microseconds (dbg) or ~230 nanoseconds (opt) on a 2.67GHz Xeon - // if the checksum is up-to-date (requiring no recomputation). - void MaybeRecomputeCheckSum() const; - - struct StrCmp { - bool operator()(const char *s1, const char *s2) const { - return strcmp(s1, s2) < 0; - } - }; - - string name_; - int64 available_key_; - int64 dense_key_limit_; - vector<const char *> symbols_; - map<int64, const char*> key_map_; - map<const char *, int64, StrCmp> symbol_map_; - - mutable RefCounter ref_count_; - mutable bool check_sum_finalized_; - mutable string check_sum_string_; - mutable string labeled_check_sum_string_; - mutable Mutex check_sum_mutex_; -}; - -// -// \class SymbolTable -// \brief Symbol (string) to int and reverse mapping -// -// The SymbolTable implements the mappings of labels to strings and reverse. -// SymbolTables are used to describe the alphabet of the input and output -// labels for arcs in a Finite State Transducer. -// -// SymbolTables are reference counted and can therefore be shared across -// multiple machines. For example a language model grammar G, with a -// SymbolTable for the words in the language model can share this symbol -// table with the lexical representation L o G. -// -class SymbolTable { - public: - static const int64 kNoSymbol = -1; - - // Construct symbol table with an unspecified name. - SymbolTable() : impl_(new SymbolTableImpl("<unspecified>")) {} - - // Construct symbol table with a unique name. - SymbolTable(const string& name) : impl_(new SymbolTableImpl(name)) {} - - // Create a reference counted copy. - SymbolTable(const SymbolTable& table) : impl_(table.impl_) { - impl_->IncrRefCount(); - } - - // Derefence implentation object. When reference count hits 0, delete - // implementation. - virtual ~SymbolTable() { - if (!impl_->DecrRefCount()) delete impl_; - } - - // Copys the implemenation from one symbol table to another. - void operator=(const SymbolTable &st) { - if (impl_ != st.impl_) { - st.impl_->IncrRefCount(); - if (!impl_->DecrRefCount()) delete impl_; - impl_ = st.impl_; - } - } - - // Read an ascii representation of the symbol table from an istream. Pass a - // name to give the resulting SymbolTable. - static SymbolTable* ReadText( - istream &strm, const string& name, - const SymbolTableTextOptions &opts = SymbolTableTextOptions()) { - SymbolTableImpl* impl = SymbolTableImpl::ReadText(strm, name, opts); - if (!impl) - return 0; - else - return new SymbolTable(impl); - } - - // read an ascii representation of the symbol table - static SymbolTable* ReadText(const string& filename, - const SymbolTableTextOptions &opts = SymbolTableTextOptions()) { - ifstream strm(filename.c_str(), ifstream::in); - if (!strm) { - LOG(ERROR) << "SymbolTable::ReadText: Can't open file " << filename; - return 0; - } - return ReadText(strm, filename, opts); - } - - - // WARNING: Reading via symbol table read options should - // not be used. This is a temporary work around. - static SymbolTable* Read(istream &strm, - const SymbolTableReadOptions& opts) { - SymbolTableImpl* impl = SymbolTableImpl::Read(strm, opts); - if (!impl) - return 0; - else - return new SymbolTable(impl); - } - - // read a binary dump of the symbol table from a stream - static SymbolTable* Read(istream &strm, const string& source) { - SymbolTableReadOptions opts; - opts.source = source; - return Read(strm, opts); - } - - // read a binary dump of the symbol table - static SymbolTable* Read(const string& filename) { - ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); - if (!strm) { - LOG(ERROR) << "SymbolTable::Read: Can't open file " << filename; - return 0; - } - return Read(strm, filename); - } - - //-------------------------------------------------------- - // Derivable Interface (final) - //-------------------------------------------------------- - // create a reference counted copy - virtual SymbolTable* Copy() const { - return new SymbolTable(*this); - } - - // Add a symbol with given key to table. A symbol table also - // keeps track of the last available key (highest key value in - // the symbol table). - virtual int64 AddSymbol(const string& symbol, int64 key) { - MutateCheck(); - return impl_->AddSymbol(symbol, key); - } - - // Add a symbol to the table. The associated value key is automatically - // assigned by the symbol table. - virtual int64 AddSymbol(const string& symbol) { - MutateCheck(); - return impl_->AddSymbol(symbol); - } - - // Add another symbol table to this table. All key values will be offset - // by the current available key (highest key value in the symbol table). - // Note string symbols with the same key value with still have the same - // key value after the symbol table has been merged, but a different - // value. Adding symbol tables do not result in changes in the base table. - virtual void AddTable(const SymbolTable& table); - - // return the name of the symbol table - virtual const string& Name() const { - return impl_->Name(); - } - - // Return the label-agnostic MD5 check-sum for this table. All new symbols - // added to the table will result in an updated checksum. - // DEPRECATED. - virtual string CheckSum() const { - return impl_->CheckSum(); - } - - // Same as CheckSum(), but this returns an label-dependent version. - virtual string LabeledCheckSum() const { - return impl_->LabeledCheckSum(); - } - - virtual bool Write(ostream &strm) const { - return impl_->Write(strm); - } - - bool Write(const string& filename) const { - ofstream strm(filename.c_str(), ofstream::out | ofstream::binary); - if (!strm) { - LOG(ERROR) << "SymbolTable::Write: Can't open file " << filename; - return false; - } - return Write(strm); - } - - // Dump an ascii text representation of the symbol table via a stream - virtual bool WriteText( - ostream &strm, - const SymbolTableTextOptions &opts = SymbolTableTextOptions()) const; - - // Dump an ascii text representation of the symbol table - bool WriteText(const string& filename) const { - ofstream strm(filename.c_str()); - if (!strm) { - LOG(ERROR) << "SymbolTable::WriteText: Can't open file " << filename; - return false; - } - return WriteText(strm); - } - - // Return the string associated with the key. If the key is out of - // range (<0, >max), log error and return an empty string. - virtual string Find(int64 key) const { - return impl_->Find(key); - } - - // Return the key associated with the symbol. If the symbol - // does not exists, log error and return SymbolTable::kNoSymbol - virtual int64 Find(const string& symbol) const { - return impl_->Find(symbol); - } - - // Return the key associated with the symbol. If the symbol - // does not exists, log error and return SymbolTable::kNoSymbol - virtual int64 Find(const char* symbol) const { - return impl_->Find(symbol); - } - - // Return the current available key (i.e highest key number+1) in - // the symbol table - virtual int64 AvailableKey(void) const { - return impl_->AvailableKey(); - } - - // Return the current number of symbols in table (not necessarily - // equal to AvailableKey()) - virtual size_t NumSymbols(void) const { - return impl_->NumSymbols(); - } - - virtual int64 GetNthKey(ssize_t pos) const { - return impl_->GetNthKey(pos); - } - - private: - explicit SymbolTable(SymbolTableImpl* impl) : impl_(impl) {} - - void MutateCheck() { - // Copy on write - if (impl_->RefCount() > 1) { - impl_->DecrRefCount(); - impl_ = new SymbolTableImpl(*impl_); - } - } - - const SymbolTableImpl* Impl() const { - return impl_; - } - - private: - SymbolTableImpl* impl_; -}; - - -// -// \class SymbolTableIterator -// \brief Iterator class for symbols in a symbol table -class SymbolTableIterator { - public: - SymbolTableIterator(const SymbolTable& table) - : table_(table), - pos_(0), - nsymbols_(table.NumSymbols()), - key_(table.GetNthKey(0)) { } - - ~SymbolTableIterator() { } - - // is iterator done - bool Done(void) { - return (pos_ == nsymbols_); - } - - // return the Value() of the current symbol (int64 key) - int64 Value(void) { - return key_; - } - - // return the string of the current symbol - string Symbol(void) { - return table_.Find(key_); - } - - // advance iterator forward - void Next(void) { - ++pos_; - if (pos_ < nsymbols_) key_ = table_.GetNthKey(pos_); - } - - // reset iterator - void Reset(void) { - pos_ = 0; - key_ = table_.GetNthKey(0); - } - - private: - const SymbolTable& table_; - ssize_t pos_; - size_t nsymbols_; - int64 key_; -}; - - -// Tests compatibilty between two sets of symbol tables -inline bool CompatSymbols(const SymbolTable *syms1, const SymbolTable *syms2, - bool warning = true) { - if (!FLAGS_fst_compat_symbols) { - return true; - } else if (!syms1 && !syms2) { - return true; - } else if (syms1 && !syms2) { - if (warning) - LOG(WARNING) << - "CompatSymbols: first symbol table present but second missing"; - return false; - } else if (!syms1 && syms2) { - if (warning) - LOG(WARNING) << - "CompatSymbols: second symbol table present but first missing"; - return false; - } else if (syms1->LabeledCheckSum() != syms2->LabeledCheckSum()) { - if (warning) - LOG(WARNING) << "CompatSymbols: Symbol table check sums do not match"; - return false; - } else { - return true; - } -} - - -// Relabels a symbol table as specified by the input vector of pairs -// (old label, new label). The new symbol table only retains symbols -// for which a relabeling is *explicitely* specified. -// TODO(allauzen): consider adding options to allow for some form -// of implicit identity relabeling. -template <class Label> -SymbolTable *RelabelSymbolTable(const SymbolTable *table, - const vector<pair<Label, Label> > &pairs) { - SymbolTable *new_table = new SymbolTable( - table->Name().empty() ? string() : - (string("relabeled_") + table->Name())); - - for (size_t i = 0; i < pairs.size(); ++i) - new_table->AddSymbol(table->Find(pairs[i].first), pairs[i].second); - - return new_table; -} - -// Symbol Table Serialization -inline void SymbolTableToString(const SymbolTable *table, string *result) { - ostringstream ostrm; - table->Write(ostrm); - *result = ostrm.str(); -} - -inline SymbolTable *StringToSymbolTable(const string &s) { - istringstream istrm(s); - return SymbolTable::Read(istrm, SymbolTableReadOptions()); -} - - - -} // namespace fst - -#endif // FST_LIB_SYMBOL_TABLE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/synchronize.h b/kaldi_io/src/tools/openfst/include/fst/synchronize.h deleted file mode 100644 index 9582926..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/synchronize.h +++ /dev/null @@ -1,457 +0,0 @@ -// synchronize.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Cyril Allauzen) -// -// \file -// Synchronize an FST with bounded delay. - -#ifndef FST_LIB_SYNCHRONIZE_H__ -#define FST_LIB_SYNCHRONIZE_H__ - -#include <algorithm> -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <tr1/unordered_set> -using std::tr1::unordered_set; -using std::tr1::unordered_multiset; -#include <string> -#include <utility> -using std::pair; using std::make_pair; -#include <vector> -using std::vector; - -#include <fst/cache.h> -#include <fst/test-properties.h> - - -namespace fst { - -typedef CacheOptions SynchronizeFstOptions; - - -// Implementation class for SynchronizeFst -template <class A> -class SynchronizeFstImpl - : public CacheImpl<A> { - public: - using FstImpl<A>::SetType; - using FstImpl<A>::SetProperties; - using FstImpl<A>::SetInputSymbols; - using FstImpl<A>::SetOutputSymbols; - - using CacheBaseImpl< CacheState<A> >::PushArc; - using CacheBaseImpl< CacheState<A> >::HasArcs; - using CacheBaseImpl< CacheState<A> >::HasFinal; - using CacheBaseImpl< CacheState<A> >::HasStart; - using CacheBaseImpl< CacheState<A> >::SetArcs; - using CacheBaseImpl< CacheState<A> >::SetFinal; - using CacheBaseImpl< CacheState<A> >::SetStart; - - typedef A Arc; - typedef typename A::Label Label; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - typedef basic_string<Label> String; - - struct Element { - Element() {} - - Element(StateId s, const String *i, const String *o) - : state(s), istring(i), ostring(o) {} - - StateId state; // Input state Id - const String *istring; // Residual input labels - const String *ostring; // Residual output labels - // Residual strings are represented by const pointers to - // basic_string<Label> and are stored in a hash_set. The pointed - // memory is owned by the hash_set string_set_. - }; - - SynchronizeFstImpl(const Fst<A> &fst, const SynchronizeFstOptions &opts) - : CacheImpl<A>(opts), fst_(fst.Copy()) { - SetType("synchronize"); - uint64 props = fst.Properties(kFstProperties, false); - SetProperties(SynchronizeProperties(props), kCopyProperties); - - SetInputSymbols(fst.InputSymbols()); - SetOutputSymbols(fst.OutputSymbols()); - } - - SynchronizeFstImpl(const SynchronizeFstImpl &impl) - : CacheImpl<A>(impl), - fst_(impl.fst_->Copy(true)) { - SetType("synchronize"); - SetProperties(impl.Properties(), kCopyProperties); - SetInputSymbols(impl.InputSymbols()); - SetOutputSymbols(impl.OutputSymbols()); - } - - ~SynchronizeFstImpl() { - delete fst_; - // Extract pointers from the hash set - vector<const String*> strings; - typename StringSet::iterator it = string_set_.begin(); - for (; it != string_set_.end(); ++it) - strings.push_back(*it); - // Free the extracted pointers - for (size_t i = 0; i < strings.size(); ++i) - delete strings[i]; - } - - StateId Start() { - if (!HasStart()) { - StateId s = fst_->Start(); - if (s == kNoStateId) - return kNoStateId; - const String *empty = FindString(new String()); - StateId start = FindState(Element(fst_->Start(), empty, empty)); - SetStart(start); - } - return CacheImpl<A>::Start(); - } - - Weight Final(StateId s) { - if (!HasFinal(s)) { - const Element &e = elements_[s]; - Weight w = e.state == kNoStateId ? Weight::One() : fst_->Final(e.state); - if ((w != Weight::Zero()) && (e.istring)->empty() && (e.ostring)->empty()) - SetFinal(s, w); - else - SetFinal(s, Weight::Zero()); - } - return CacheImpl<A>::Final(s); - } - - size_t NumArcs(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumArcs(s); - } - - size_t NumInputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumInputEpsilons(s); - } - - size_t NumOutputEpsilons(StateId s) { - if (!HasArcs(s)) - Expand(s); - return CacheImpl<A>::NumOutputEpsilons(s); - } - - uint64 Properties() const { return Properties(kFstProperties); } - - // Set error if found; return FST impl properties. - uint64 Properties(uint64 mask) const { - if ((mask & kError) && fst_->Properties(kError, false)) - SetProperties(kError, kError); - return FstImpl<Arc>::Properties(mask); - } - - void InitArcIterator(StateId s, ArcIteratorData<A> *data) { - if (!HasArcs(s)) - Expand(s); - CacheImpl<A>::InitArcIterator(s, data); - } - - // Returns the first character of the string obtained by - // concatenating s and l. - Label Car(const String *s, Label l = 0) const { - if (!s->empty()) - return (*s)[0]; - else - return l; - } - - // Computes the residual string obtained by removing the first - // character in the concatenation of s and l. - const String *Cdr(const String *s, Label l = 0) { - String *r = new String(); - for (int i = 1; i < s->size(); ++i) - r->push_back((*s)[i]); - if (l && !(s->empty())) r->push_back(l); - return FindString(r); - } - - // Computes the concatenation of s and l. - const String *Concat(const String *s, Label l = 0) { - String *r = new String(); - for (int i = 0; i < s->size(); ++i) - r->push_back((*s)[i]); - if (l) r->push_back(l); - return FindString(r); - } - - // Tests if the concatenation of s and l is empty - bool Empty(const String *s, Label l = 0) const { - if (s->empty()) - return l == 0; - else - return false; - } - - // Finds the string pointed by s in the hash set. Transfers the - // pointer ownership to the hash set. - const String *FindString(const String *s) { - typename StringSet::iterator it = string_set_.find(s); - if (it != string_set_.end()) { - delete s; - return (*it); - } else { - string_set_.insert(s); - return s; - } - } - - // Finds state corresponding to an element. Creates new state - // if element not found. - StateId FindState(const Element &e) { - typename ElementMap::iterator eit = element_map_.find(e); - if (eit != element_map_.end()) { - return (*eit).second; - } else { - StateId s = elements_.size(); - elements_.push_back(e); - element_map_.insert(pair<const Element, StateId>(e, s)); - return s; - } - } - - - // Computes the outgoing transitions from a state, creating new destination - // states as needed. - void Expand(StateId s) { - Element e = elements_[s]; - - if (e.state != kNoStateId) - for (ArcIterator< Fst<A> > ait(*fst_, e.state); - !ait.Done(); - ait.Next()) { - const A &arc = ait.Value(); - if (!Empty(e.istring, arc.ilabel) && !Empty(e.ostring, arc.olabel)) { - const String *istring = Cdr(e.istring, arc.ilabel); - const String *ostring = Cdr(e.ostring, arc.olabel); - StateId d = FindState(Element(arc.nextstate, istring, ostring)); - PushArc(s, Arc(Car(e.istring, arc.ilabel), - Car(e.ostring, arc.olabel), arc.weight, d)); - } else { - const String *istring = Concat(e.istring, arc.ilabel); - const String *ostring = Concat(e.ostring, arc.olabel); - StateId d = FindState(Element(arc.nextstate, istring, ostring)); - PushArc(s, Arc(0 , 0, arc.weight, d)); - } - } - - Weight w = e.state == kNoStateId ? Weight::One() : fst_->Final(e.state); - if ((w != Weight::Zero()) && - ((e.istring)->size() + (e.ostring)->size() > 0)) { - const String *istring = Cdr(e.istring); - const String *ostring = Cdr(e.ostring); - StateId d = FindState(Element(kNoStateId, istring, ostring)); - PushArc(s, Arc(Car(e.istring), Car(e.ostring), w, d)); - } - SetArcs(s); - } - - private: - // Equality function for Elements, assume strings have been hashed. - class ElementEqual { - public: - bool operator()(const Element &x, const Element &y) const { - return x.state == y.state && - x.istring == y.istring && - x.ostring == y.ostring; - } - }; - - // Hash function for Elements to Fst states. - class ElementKey { - public: - size_t operator()(const Element &x) const { - size_t key = x.state; - key = (key << 1) ^ (x.istring)->size(); - for (size_t i = 0; i < (x.istring)->size(); ++i) - key = (key << 1) ^ (*x.istring)[i]; - key = (key << 1) ^ (x.ostring)->size(); - for (size_t i = 0; i < (x.ostring)->size(); ++i) - key = (key << 1) ^ (*x.ostring)[i]; - return key; - } - }; - - // Equality function for strings - class StringEqual { - public: - bool operator()(const String * const &x, const String * const &y) const { - if (x->size() != y->size()) return false; - for (size_t i = 0; i < x->size(); ++i) - if ((*x)[i] != (*y)[i]) return false; - return true; - } - }; - - // Hash function for set of strings - class StringKey{ - public: - size_t operator()(const String * const & x) const { - size_t key = x->size(); - for (size_t i = 0; i < x->size(); ++i) - key = (key << 1) ^ (*x)[i]; - return key; - } - }; - - - typedef unordered_map<Element, StateId, ElementKey, ElementEqual> ElementMap; - typedef unordered_set<const String*, StringKey, StringEqual> StringSet; - - const Fst<A> *fst_; - vector<Element> elements_; // mapping Fst state to Elements - ElementMap element_map_; // mapping Elements to Fst state - StringSet string_set_; - - void operator=(const SynchronizeFstImpl<A> &); // disallow -}; - - -// Synchronizes a transducer. This version is a delayed Fst. The -// result will be an equivalent FST that has the property that during -// the traversal of a path, the delay is either zero or strictly -// increasing, where the delay is the difference between the number of -// non-epsilon output labels and input labels along the path. -// -// For the algorithm to terminate, the input transducer must have -// bounded delay, i.e., the delay of every cycle must be zero. -// -// Complexity: -// - A has bounded delay: exponential -// - A does not have bounded delay: does not terminate -// -// References: -// - Mehryar Mohri. Edit-Distance of Weighted Automata: General -// Definitions and Algorithms, International Journal of Computer -// Science, 14(6): 957-982 (2003). -// -// This class attaches interface to implementation and handles -// reference counting, delegating most methods to ImplToFst. -template <class A> -class SynchronizeFst : public ImplToFst< SynchronizeFstImpl<A> > { - public: - friend class ArcIterator< SynchronizeFst<A> >; - friend class StateIterator< SynchronizeFst<A> >; - - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - typedef CacheState<A> State; - typedef SynchronizeFstImpl<A> Impl; - - SynchronizeFst(const Fst<A> &fst) - : ImplToFst<Impl>(new Impl(fst, SynchronizeFstOptions())) {} - - SynchronizeFst(const Fst<A> &fst, const SynchronizeFstOptions &opts) - : ImplToFst<Impl>(new Impl(fst, opts)) {} - - // See Fst<>::Copy() for doc. - SynchronizeFst(const SynchronizeFst<A> &fst, bool safe = false) - : ImplToFst<Impl>(fst, safe) {} - - // Get a copy of this SynchronizeFst. See Fst<>::Copy() for further doc. - virtual SynchronizeFst<A> *Copy(bool safe = false) const { - return new SynchronizeFst<A>(*this, safe); - } - - virtual inline void InitStateIterator(StateIteratorData<A> *data) const; - - virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { - GetImpl()->InitArcIterator(s, data); - } - - private: - // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } - - void operator=(const SynchronizeFst<A> &fst); // Disallow -}; - - -// Specialization for SynchronizeFst. -template<class A> -class StateIterator< SynchronizeFst<A> > - : public CacheStateIterator< SynchronizeFst<A> > { - public: - explicit StateIterator(const SynchronizeFst<A> &fst) - : CacheStateIterator< SynchronizeFst<A> >(fst, fst.GetImpl()) {} -}; - - -// Specialization for SynchronizeFst. -template <class A> -class ArcIterator< SynchronizeFst<A> > - : public CacheArcIterator< SynchronizeFst<A> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const SynchronizeFst<A> &fst, StateId s) - : CacheArcIterator< SynchronizeFst<A> >(fst.GetImpl(), s) { - if (!fst.GetImpl()->HasArcs(s)) - fst.GetImpl()->Expand(s); - } - - private: - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - - -template <class A> inline -void SynchronizeFst<A>::InitStateIterator(StateIteratorData<A> *data) const -{ - data->base = new StateIterator< SynchronizeFst<A> >(*this); -} - - - -// Synchronizes a transducer. This version writes the synchronized -// result to a MutableFst. The result will be an equivalent FST that -// has the property that during the traversal of a path, the delay is -// either zero or strictly increasing, where the delay is the -// difference between the number of non-epsilon output labels and -// input labels along the path. -// -// For the algorithm to terminate, the input transducer must have -// bounded delay, i.e., the delay of every cycle must be zero. -// -// Complexity: -// - A has bounded delay: exponential -// - A does not have bounded delay: does not terminate -// -// References: -// - Mehryar Mohri. Edit-Distance of Weighted Automata: General -// Definitions and Algorithms, International Journal of Computer -// Science, 14(6): 957-982 (2003). -template<class Arc> -void Synchronize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst) { - SynchronizeFstOptions opts; - opts.gc_limit = 0; // Cache only the last state for fastest copy. - *ofst = SynchronizeFst<Arc>(ifst, opts); -} - -} // namespace fst - -#endif // FST_LIB_SYNCHRONIZE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/test-properties.h b/kaldi_io/src/tools/openfst/include/fst/test-properties.h deleted file mode 100644 index 80af593..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/test-properties.h +++ /dev/null @@ -1,250 +0,0 @@ -// test-properties.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Functions to manipulate and test property bits - -#ifndef FST_LIB_TEST_PROPERTIES_H__ -#define FST_LIB_TEST_PROPERTIES_H__ - -#include <tr1/unordered_set> -using std::tr1::unordered_set; -using std::tr1::unordered_multiset; - -#include <fst/dfs-visit.h> -#include <fst/connect.h> - - -DECLARE_bool(fst_verify_properties); - -namespace fst { - -// For a binary property, the bit is always returned set. -// For a trinary (i.e. two-bit) property, both bits are -// returned set iff either corresponding input bit is set. -inline uint64 KnownProperties(uint64 props) { - return kBinaryProperties | (props & kTrinaryProperties) | - ((props & kPosTrinaryProperties) << 1) | - ((props & kNegTrinaryProperties) >> 1); -} - -// Tests compatibility between two sets of properties -inline bool CompatProperties(uint64 props1, uint64 props2) { - uint64 known_props1 = KnownProperties(props1); - uint64 known_props2 = KnownProperties(props2); - uint64 known_props = known_props1 & known_props2; - uint64 incompat_props = (props1 & known_props) ^ (props2 & known_props); - if (incompat_props) { - uint64 prop = 1; - for (int i = 0; i < 64; ++i, prop <<= 1) - if (prop & incompat_props) - LOG(ERROR) << "CompatProperties: mismatch: " << PropertyNames[i] - << ": props1 = " << (props1 & prop ? "true" : "false") - << ", props2 = " << (props2 & prop ? "true" : "false"); - return false; - } else { - return true; - } -} - -// Computes FST property values defined in properties.h. The value of -// each property indicated in the mask will be determined and returned -// (these will never be unknown here). In the course of determining -// the properties specifically requested in the mask, certain other -// properties may be determined (those with little additional expense) -// and their values will be returned as well. The complete set of -// known properties (whether true or false) determined by this -// operation will be assigned to the the value pointed to by KNOWN. -// If 'use_stored' is true, pre-computed FST properties may be used -// when possible. This routine is seldom called directly; instead it -// is used to implement fst.Properties(mask, true). -template<class Arc> -uint64 ComputeProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known, - bool use_stored) { - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - typedef typename Arc::StateId StateId; - - uint64 fst_props = fst.Properties(kFstProperties, false); // Fst-stored - - // Check stored FST properties first if allowed. - if (use_stored) { - uint64 known_props = KnownProperties(fst_props); - // If FST contains required info, return it. - if ((known_props & mask) == mask) { - *known = known_props; - return fst_props; - } - } - - // Compute (trinary) properties explicitly. - - // Initialize with binary properties (already known). - uint64 comp_props = fst_props & kBinaryProperties; - - // Compute these trinary properties with a DFS. We compute only those - // that need a DFS here, since we otherwise would like to avoid a DFS - // since its stack could grow large. - uint64 dfs_props = kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic | - kAccessible | kNotAccessible | - kCoAccessible | kNotCoAccessible; - if (mask & dfs_props) { - SccVisitor<Arc> scc_visitor(&comp_props); - DfsVisit(fst, &scc_visitor); - } - - // Compute any remaining trinary properties via a state and arcs iterations - if (mask & ~(kBinaryProperties | dfs_props)) { - comp_props |= kAcceptor | kNoEpsilons | kNoIEpsilons | kNoOEpsilons | - kILabelSorted | kOLabelSorted | kUnweighted | kTopSorted | kString; - if (mask & (kIDeterministic | kNonIDeterministic)) - comp_props |= kIDeterministic; - if (mask & (kODeterministic | kNonODeterministic)) - comp_props |= kODeterministic; - - unordered_set<Label> *ilabels = 0; - unordered_set<Label> *olabels = 0; - - StateId nfinal = 0; - for (StateIterator< Fst<Arc> > siter(fst); - !siter.Done(); - siter.Next()) { - StateId s = siter.Value(); - - Arc prev_arc; - // Create these only if we need to - if (mask & (kIDeterministic | kNonIDeterministic)) - ilabels = new unordered_set<Label>; - if (mask & (kODeterministic | kNonODeterministic)) - olabels = new unordered_set<Label>; - - bool first_arc = true; - for (ArcIterator< Fst<Arc> > aiter(fst, s); - !aiter.Done(); - aiter.Next()) { - const Arc &arc =aiter.Value(); - - if (ilabels && ilabels->find(arc.ilabel) != ilabels->end()) { - comp_props |= kNonIDeterministic; - comp_props &= ~kIDeterministic; - } - if (olabels && olabels->find(arc.olabel) != olabels->end()) { - comp_props |= kNonODeterministic; - comp_props &= ~kODeterministic; - } - if (arc.ilabel != arc.olabel) { - comp_props |= kNotAcceptor; - comp_props &= ~kAcceptor; - } - if (arc.ilabel == 0 && arc.olabel == 0) { - comp_props |= kEpsilons; - comp_props &= ~kNoEpsilons; - } - if (arc.ilabel == 0) { - comp_props |= kIEpsilons; - comp_props &= ~kNoIEpsilons; - } - if (arc.olabel == 0) { - comp_props |= kOEpsilons; - comp_props &= ~kNoOEpsilons; - } - if (!first_arc) { - if (arc.ilabel < prev_arc.ilabel) { - comp_props |= kNotILabelSorted; - comp_props &= ~kILabelSorted; - } - if (arc.olabel < prev_arc.olabel) { - comp_props |= kNotOLabelSorted; - comp_props &= ~kOLabelSorted; - } - } - if (arc.weight != Weight::One() && arc.weight != Weight::Zero()) { - comp_props |= kWeighted; - comp_props &= ~kUnweighted; - } - if (arc.nextstate <= s) { - comp_props |= kNotTopSorted; - comp_props &= ~kTopSorted; - } - if (arc.nextstate != s + 1) { - comp_props |= kNotString; - comp_props &= ~kString; - } - prev_arc = arc; - first_arc = false; - if (ilabels) - ilabels->insert(arc.ilabel); - if (olabels) - olabels->insert(arc.olabel); - } - - if (nfinal > 0) { // final state not last - comp_props |= kNotString; - comp_props &= ~kString; - } - - Weight final = fst.Final(s); - - if (final != Weight::Zero()) { // final state - if (final != Weight::One()) { - comp_props |= kWeighted; - comp_props &= ~kUnweighted; - } - ++nfinal; - } else { // non-final state - if (fst.NumArcs(s) != 1) { - comp_props |= kNotString; - comp_props &= ~kString; - } - } - - delete ilabels; - delete olabels; - } - - if (fst.Start() != kNoStateId && fst.Start() != 0) { - comp_props |= kNotString; - comp_props &= ~kString; - } - } - - *known = KnownProperties(comp_props); - return comp_props; -} - -// This is a wrapper around ComputeProperties that will cause a fatal -// error if the stored properties and the computed properties are -// incompatible when 'FLAGS_fst_verify_properties' is true. This -// routine is seldom called directly; instead it is used to implement -// fst.Properties(mask, true). -template<class Arc> -uint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known) { - if (FLAGS_fst_verify_properties) { - uint64 stored_props = fst.Properties(kFstProperties, false); - uint64 computed_props = ComputeProperties(fst, mask, known, false); - if (!CompatProperties(stored_props, computed_props)) - LOG(FATAL) << "TestProperties: stored Fst properties incorrect" - << " (stored: props1, computed: props2)"; - return computed_props; - } else { - return ComputeProperties(fst, mask, known, true); - } -} - -} // namespace fst - -#endif // FST_LIB_TEST_PROPERTIES_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/topsort.h b/kaldi_io/src/tools/openfst/include/fst/topsort.h deleted file mode 100644 index 53735e5..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/topsort.h +++ /dev/null @@ -1,112 +0,0 @@ -// topsort.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Topological sort of FSTs - -#ifndef FST_LIB_TOPSORT_H__ -#define FST_LIB_TOPSORT_H__ - -#include <algorithm> -#include <vector> -using std::vector; - - -#include <fst/dfs-visit.h> -#include <fst/fst.h> -#include <fst/statesort.h> - - -namespace fst { - -// DFS visitor class to return topological ordering. -template <class A> -class TopOrderVisitor { - public: - typedef A Arc; - typedef typename A::StateId StateId; - - // If acyclic, ORDER[i] gives the topological position of state Id i; - // otherwise unchanged. ACYCLIC will be true iff the FST has - // no cycles. - TopOrderVisitor(vector<StateId> *order, bool *acyclic) - : order_(order), acyclic_(acyclic) {} - - void InitVisit(const Fst<A> &fst) { - finish_ = new vector<StateId>; - *acyclic_ = true; - } - - bool InitState(StateId s, StateId r) { return true; } - - bool TreeArc(StateId s, const A &arc) { return true; } - - bool BackArc(StateId s, const A &arc) { return (*acyclic_ = false); } - - bool ForwardOrCrossArc(StateId s, const A &arc) { return true; } - - void FinishState(StateId s, StateId p, const A *) { finish_->push_back(s); } - - void FinishVisit() { - if (*acyclic_) { - order_->clear(); - for (StateId s = 0; s < finish_->size(); ++s) - order_->push_back(kNoStateId); - for (StateId s = 0; s < finish_->size(); ++s) - (*order_)[(*finish_)[finish_->size() - s - 1]] = s; - } - delete finish_; - } - - private: - vector<StateId> *order_; - bool *acyclic_; - vector<StateId> *finish_; // states in finishing-time order -}; - - -// Topologically sorts its input if acyclic, modifying it. Otherwise, -// the input is unchanged. When sorted, all transitions are from -// lower to higher state IDs. -// -// Complexity: -// - Time: O(V + E) -// - Space: O(V + E) -// where V = # of states and E = # of arcs. -template <class Arc> -bool TopSort(MutableFst<Arc> *fst) { - typedef typename Arc::StateId StateId; - - vector<StateId> order; - bool acyclic; - - TopOrderVisitor<Arc> top_order_visitor(&order, &acyclic); - DfsVisit(*fst, &top_order_visitor); - - if (acyclic) { - StateSort(fst, order); - fst->SetProperties(kAcyclic | kInitialAcyclic | kTopSorted, - kAcyclic | kInitialAcyclic | kTopSorted); - } else { - fst->SetProperties(kCyclic | kNotTopSorted, kCyclic | kNotTopSorted); - } - return acyclic; -} - -} // namespace fst - -#endif // FST_LIB_TOPSORT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/tuple-weight.h b/kaldi_io/src/tools/openfst/include/fst/tuple-weight.h deleted file mode 100644 index 184026c..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/tuple-weight.h +++ /dev/null @@ -1,332 +0,0 @@ -// tuple-weight.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: allauzen@google (Cyril Allauzen) -// -// \file -// Tuple weight set operation definitions. - -#ifndef FST_LIB_TUPLE_WEIGHT_H__ -#define FST_LIB_TUPLE_WEIGHT_H__ - -#include <string> -#include <vector> -using std::vector; - -#include <fst/weight.h> - - -DECLARE_string(fst_weight_parentheses); -DECLARE_string(fst_weight_separator); - -namespace fst { - -template<class W, unsigned int n> class TupleWeight; -template <class W, unsigned int n> -istream &operator>>(istream &strm, TupleWeight<W, n> &w); - -// n-tuple weight, element of the n-th catersian power of W -template <class W, unsigned int n> -class TupleWeight { - public: - typedef TupleWeight<typename W::ReverseWeight, n> ReverseWeight; - - TupleWeight() {} - - TupleWeight(const TupleWeight &w) { - for (size_t i = 0; i < n; ++i) - values_[i] = w.values_[i]; - } - - template <class Iterator> - TupleWeight(Iterator begin, Iterator end) { - for (Iterator iter = begin; iter != end; ++iter) - values_[iter - begin] = *iter; - } - - TupleWeight(const W &w) { - for (size_t i = 0; i < n; ++i) - values_[i] = w; - } - - static const TupleWeight<W, n> &Zero() { - static const TupleWeight<W, n> zero(W::Zero()); - return zero; - } - - static const TupleWeight<W, n> &One() { - static const TupleWeight<W, n> one(W::One()); - return one; - } - - static const TupleWeight<W, n> &NoWeight() { - static const TupleWeight<W, n> no_weight(W::NoWeight()); - return no_weight; - } - - static unsigned int Length() { - return n; - } - - istream &Read(istream &strm) { - for (size_t i = 0; i < n; ++i) - values_[i].Read(strm); - return strm; - } - - ostream &Write(ostream &strm) const { - for (size_t i = 0; i < n; ++i) - values_[i].Write(strm); - return strm; - } - - TupleWeight<W, n> &operator=(const TupleWeight<W, n> &w) { - for (size_t i = 0; i < n; ++i) - values_[i] = w.values_[i]; - return *this; - } - - bool Member() const { - bool member = true; - for (size_t i = 0; i < n; ++i) - member = member && values_[i].Member(); - return member; - } - - size_t Hash() const { - uint64 hash = 0; - for (size_t i = 0; i < n; ++i) - hash = 5 * hash + values_[i].Hash(); - return size_t(hash); - } - - TupleWeight<W, n> Quantize(float delta = kDelta) const { - TupleWeight<W, n> w; - for (size_t i = 0; i < n; ++i) - w.values_[i] = values_[i].Quantize(delta); - return w; - } - - ReverseWeight Reverse() const { - TupleWeight<W, n> w; - for (size_t i = 0; i < n; ++i) - w.values_[i] = values_[i].Reverse(); - return w; - } - - const W& Value(size_t i) const { return values_[i]; } - - void SetValue(size_t i, const W &w) { values_[i] = w; } - - protected: - // Reads TupleWeight when there are no parentheses around tuple terms - inline static istream &ReadNoParen(istream &strm, - TupleWeight<W, n> &w, - char separator) { - int c; - do { - c = strm.get(); - } while (isspace(c)); - - for (size_t i = 0; i < n - 1; ++i) { - string s; - if (i) - c = strm.get(); - while (c != separator) { - if (c == EOF) { - strm.clear(std::ios::badbit); - return strm; - } - s += c; - c = strm.get(); - } - // read (i+1)-th element - istringstream sstrm(s); - W r = W::Zero(); - sstrm >> r; - w.SetValue(i, r); - } - - // read n-th element - W r = W::Zero(); - strm >> r; - w.SetValue(n - 1, r); - - return strm; - } - - // Reads TupleWeight when there are parentheses around tuple terms - inline static istream &ReadWithParen(istream &strm, - TupleWeight<W, n> &w, - char separator, - char open_paren, - char close_paren) { - int c; - do { - c = strm.get(); - } while (isspace(c)); - - if (c != open_paren) { - FSTERROR() << " is fst_weight_parentheses flag set correcty? "; - strm.clear(std::ios::badbit); - return strm; - } - - for (size_t i = 0; i < n - 1; ++i) { - // read (i+1)-th element - stack<int> parens; - string s; - c = strm.get(); - while (c != separator || !parens.empty()) { - if (c == EOF) { - strm.clear(std::ios::badbit); - return strm; - } - s += c; - // if parens encountered before separator, they must be matched - if (c == open_paren) { - parens.push(1); - } else if (c == close_paren) { - // Fail for mismatched parens - if (parens.empty()) { - strm.clear(std::ios::failbit); - return strm; - } - parens.pop(); - } - c = strm.get(); - } - istringstream sstrm(s); - W r = W::Zero(); - sstrm >> r; - w.SetValue(i, r); - } - - // read n-th element - string s; - c = strm.get(); - while (c != EOF) { - s += c; - c = strm.get(); - } - if (s.empty() || *s.rbegin() != close_paren) { - FSTERROR() << " is fst_weight_parentheses flag set correcty? "; - strm.clear(std::ios::failbit); - return strm; - } - s.erase(s.size() - 1, 1); - istringstream sstrm(s); - W r = W::Zero(); - sstrm >> r; - w.SetValue(n - 1, r); - - return strm; - } - - - private: - W values_[n]; - - friend istream &operator>><W, n>(istream&, TupleWeight<W, n>&); -}; - -template <class W, unsigned int n> -inline bool operator==(const TupleWeight<W, n> &w1, - const TupleWeight<W, n> &w2) { - bool equal = true; - for (size_t i = 0; i < n; ++i) - equal = equal && (w1.Value(i) == w2.Value(i)); - return equal; -} - -template <class W, unsigned int n> -inline bool operator!=(const TupleWeight<W, n> &w1, - const TupleWeight<W, n> &w2) { - bool not_equal = false; - for (size_t i = 0; (i < n) && !not_equal; ++i) - not_equal = not_equal || (w1.Value(i) != w2.Value(i)); - return not_equal; -} - -template <class W, unsigned int n> -inline bool ApproxEqual(const TupleWeight<W, n> &w1, - const TupleWeight<W, n> &w2, - float delta = kDelta) { - bool approx_equal = true; - for (size_t i = 0; i < n; ++i) - approx_equal = approx_equal && - ApproxEqual(w1.Value(i), w2.Value(i), delta); - return approx_equal; -} - -template <class W, unsigned int n> -inline ostream &operator<<(ostream &strm, const TupleWeight<W, n> &w) { - if(FLAGS_fst_weight_separator.size() != 1) { - FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1"; - strm.clear(std::ios::badbit); - return strm; - } - char separator = FLAGS_fst_weight_separator[0]; - bool write_parens = false; - if (!FLAGS_fst_weight_parentheses.empty()) { - if (FLAGS_fst_weight_parentheses.size() != 2) { - FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2"; - strm.clear(std::ios::badbit); - return strm; - } - write_parens = true; - } - - if (write_parens) - strm << FLAGS_fst_weight_parentheses[0]; - for (size_t i = 0; i < n; ++i) { - if(i) - strm << separator; - strm << w.Value(i); - } - if (write_parens) - strm << FLAGS_fst_weight_parentheses[1]; - - return strm; -} - -template <class W, unsigned int n> -inline istream &operator>>(istream &strm, TupleWeight<W, n> &w) { - if(FLAGS_fst_weight_separator.size() != 1) { - FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1"; - strm.clear(std::ios::badbit); - return strm; - } - char separator = FLAGS_fst_weight_separator[0]; - - if (!FLAGS_fst_weight_parentheses.empty()) { - if (FLAGS_fst_weight_parentheses.size() != 2) { - FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2"; - strm.clear(std::ios::badbit); - return strm; - } - return TupleWeight<W, n>::ReadWithParen( - strm, w, separator, FLAGS_fst_weight_parentheses[0], - FLAGS_fst_weight_parentheses[1]); - } else { - return TupleWeight<W, n>::ReadNoParen(strm, w, separator); - } -} - - - -} // namespace fst - -#endif // FST_LIB_TUPLE_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/types.h b/kaldi_io/src/tools/openfst/include/fst/types.h deleted file mode 100644 index 8c4367a..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/types.h +++ /dev/null @@ -1,38 +0,0 @@ -// types.h -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Author: [email protected] (Michael Riley) -// -// \file -// Various type definitions (mostly for Google compatibility). - -#include <cstdlib> // for ssize_t -#include <stdint.h> // *int*_t - -#include <fst/compat.h> // for DISALLOW_COPY_AND_ASSIGN - -#ifndef FST_LIB_TYPES_H__ -#define FST_LIB_TYPES_H__ - -typedef int8_t int8; -typedef int16_t int16; -typedef int32_t int32; -typedef int64_t int64; - -typedef uint8_t uint8; -typedef uint16_t uint16; -typedef uint32_t uint32; -typedef uint64_t uint64; - -#endif // FST_LIB_TYPES_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/union-find.h b/kaldi_io/src/tools/openfst/include/fst/union-find.h deleted file mode 100644 index c8633e0..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/union-find.h +++ /dev/null @@ -1,110 +0,0 @@ - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Wojciech Skut) -// -// \file Union-Find algorithm for dense sets of non-negative -// integers. Implemented using disjoint tree forests with rank -// heuristics and path compression. - -#ifndef __fst_union_find_inl_h__ -#define __fst_union_find_inl_h__ - -#include <stack> -#include <vector> -using std::vector; -#include <fst/types.h> - -namespace fst { - -// Union-Find algorithm for dense sets of non-negative integers -// (exact type: T). -template <class T> -class UnionFind { - public: - // Ctor: creates a disjoint set forest for the range [0;max). - // 'fail' is a value indicating that an element hasn't been - // initialized using MakeSet(...). The upper bound of the range - // can be reset (increased) using MakeSet(...). - UnionFind(T max, T fail) - : parent_(max, fail), rank_(max), fail_(fail) { } - - // Finds the representative of the set 'item' belongs to. - // Performs path compression if needed. - T FindSet(T item) { - if (item >= parent_.size() - || item == fail_ - || parent_[item] == fail_) return fail_; - - T *p = &parent_[item]; - for (; *p != item; item = *p, p = &parent_[item]) { - exec_stack_.push(p); - } - for (; ! exec_stack_.empty(); exec_stack_.pop()) { - *exec_stack_.top() = *p; - } - return *p; - } - - // Creates the (destructive) union of the sets x and y belong to. - void Union(T x, T y) { - Link(FindSet(x), FindSet(y)); - } - - // Initialization of an element: creates a singleton set containing - // 'item'. The range [0;max) is reset if item >= max. - T MakeSet(T item) { - if (item >= parent_.size()) { - // New value in parent_ should be initialized to fail_ - size_t nitem = item > 0 ? 2 * item : 2; - parent_.resize(nitem, fail_); - rank_.resize(nitem); - } - parent_[item] = item; - return item; - } - - // Initialization of all elements starting from 0 to max - 1 to distinct sets - void MakeAllSet(T max) { - parent_.resize(max); - for (T item = 0; item < max; ++item) { - parent_[item] = item; - } - } - - private: - vector<T> parent_; // Parent nodes. - vector<int> rank_; // Rank of an element = min. depth in tree. - T fail_; // Value indicating lookup failure. - stack<T*> exec_stack_; // Used for path compression. - - // Links trees rooted in 'x' and 'y'. - void Link(T x, T y) { - if (x == y) return; - - if (rank_[x] > rank_[y]) { - parent_[y] = x; - } else { - parent_[x] = y; - if (rank_[x] == rank_[y]) { - ++rank_[y]; - } - } - } - DISALLOW_COPY_AND_ASSIGN(UnionFind); -}; - -} // namespace fst - -#endif // __fst_union_find_inl_h__ diff --git a/kaldi_io/src/tools/openfst/include/fst/union.h b/kaldi_io/src/tools/openfst/include/fst/union.h deleted file mode 100644 index a2f97fb..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/union.h +++ /dev/null @@ -1,185 +0,0 @@ -// union.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Functions and classes to compute the union of two FSTs. - -#ifndef FST_LIB_UNION_H__ -#define FST_LIB_UNION_H__ - -#include <vector> -using std::vector; -#include <algorithm> - -#include <fst/mutable-fst.h> -#include <fst/rational.h> - - -namespace fst { - -// Computes the union (sum) of two FSTs. This version writes the -// union to an output MurableFst. If A transduces string x to y with -// weight a and B transduces string w to v with weight b, then their -// union transduces x to y with weight a and w to v with weight b. -// -// Complexity: -// - Time: (V2 + E2) -// - Space: O(V2 + E2) -// where Vi = # of states and Ei = # of arcs of the ith FST. -template <class Arc> -void Union(MutableFst<Arc> *fst1, const Fst<Arc> &fst2) { - typedef typename Arc::StateId StateId; - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - - // TODO(riley): restore when voice actions issues fixed - // Check that the symbol table are compatible - if (!CompatSymbols(fst1->InputSymbols(), fst2.InputSymbols()) || - !CompatSymbols(fst1->OutputSymbols(), fst2.OutputSymbols())) { - LOG(ERROR) << "Union: input/output symbol tables of 1st argument " - << "do not match input/output symbol tables of 2nd argument"; - // fst1->SetProperties(kError, kError); - // return; - } - - StateId numstates1 = fst1->NumStates(); - bool initial_acyclic1 = fst1->Properties(kInitialAcyclic, true); - uint64 props1 = fst1->Properties(kFstProperties, false); - uint64 props2 = fst2.Properties(kFstProperties, false); - - StateId start2 = fst2.Start(); - if (start2 == kNoStateId) { - if (props2 & kError) fst1->SetProperties(kError, kError); - return; - } - - if (fst2.Properties(kExpanded, false)) { - fst1->ReserveStates( - numstates1 + CountStates(fst2) + (initial_acyclic1 ? 0 : 1)); - } - - for (StateIterator< Fst<Arc> > siter(fst2); - !siter.Done(); - siter.Next()) { - StateId s1 = fst1->AddState(); - StateId s2 = siter.Value(); - fst1->SetFinal(s1, fst2.Final(s2)); - fst1->ReserveArcs(s1, fst2.NumArcs(s2)); - for (ArcIterator< Fst<Arc> > aiter(fst2, s2); - !aiter.Done(); - aiter.Next()) { - Arc arc = aiter.Value(); - arc.nextstate += numstates1; - fst1->AddArc(s1, arc); - } - } - StateId start1 = fst1->Start(); - if (start1 == kNoStateId) { - fst1->SetStart(start2); - fst1->SetProperties(props2, kCopyProperties); - return; - } - - if (initial_acyclic1) { - fst1->AddArc(start1, Arc(0, 0, Weight::One(), start2 + numstates1)); - } else { - StateId nstart1 = fst1->AddState(); - fst1->SetStart(nstart1); - fst1->AddArc(nstart1, Arc(0, 0, Weight::One(), start1)); - fst1->AddArc(nstart1, Arc(0, 0, Weight::One(), start2 + numstates1)); - } - fst1->SetProperties(UnionProperties(props1, props2), kFstProperties); -} - - -// Computes the union of two FSTs; this version modifies its -// RationalFst argument. -template<class Arc> -void Union(RationalFst<Arc> *fst1, const Fst<Arc> &fst2) { - fst1->GetImpl()->AddUnion(fst2); -} - - -typedef RationalFstOptions UnionFstOptions; - - -// Computes the union (sum) of two FSTs. This version is a delayed -// Fst. If A transduces string x to y with weight a and B transduces -// string w to v with weight b, then their union transduces x to y -// with weight a and w to v with weight b. -// -// Complexity: -// - Time: O(v1 + e1 + v2 + e2) -// - Sapce: O(v1 + v2) -// where vi = # of states visited and ei = # of arcs visited of the -// ith FST. Constant time and space to visit an input state or arc -// is assumed and exclusive of caching. -template <class A> -class UnionFst : public RationalFst<A> { - public: - using ImplToFst< RationalFstImpl<A> >::GetImpl; - - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - UnionFst(const Fst<A> &fst1, const Fst<A> &fst2) { - GetImpl()->InitUnion(fst1, fst2); - } - - UnionFst(const Fst<A> &fst1, const Fst<A> &fst2, const UnionFstOptions &opts) - : RationalFst<A>(opts) { - GetImpl()->InitUnion(fst1, fst2); - } - - // See Fst<>::Copy() for doc. - UnionFst(const UnionFst<A> &fst, bool safe = false) - : RationalFst<A>(fst, safe) {} - - // Get a copy of this UnionFst. See Fst<>::Copy() for further doc. - virtual UnionFst<A> *Copy(bool safe = false) const { - return new UnionFst<A>(*this, safe); - } -}; - - -// Specialization for UnionFst. -template <class A> -class StateIterator< UnionFst<A> > : public StateIterator< RationalFst<A> > { - public: - explicit StateIterator(const UnionFst<A> &fst) - : StateIterator< RationalFst<A> >(fst) {} -}; - - -// Specialization for UnionFst. -template <class A> -class ArcIterator< UnionFst<A> > : public ArcIterator< RationalFst<A> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const UnionFst<A> &fst, StateId s) - : ArcIterator< RationalFst<A> >(fst, s) {} -}; - - -// Useful alias when using StdArc. -typedef UnionFst<StdArc> StdUnionFst; - -} // namespace fst - -#endif // FST_LIB_UNION_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/util.h b/kaldi_io/src/tools/openfst/include/fst/util.h deleted file mode 100644 index 57d7c4b..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/util.h +++ /dev/null @@ -1,437 +0,0 @@ -// util.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// FST utility inline definitions. - -#ifndef FST_LIB_UTIL_H__ -#define FST_LIB_UTIL_H__ - -#include <tr1/unordered_map> -using std::tr1::unordered_map; -using std::tr1::unordered_multimap; -#include <tr1/unordered_set> -using std::tr1::unordered_set; -using std::tr1::unordered_multiset; -#include <list> -#include <map> -#include <set> -#include <sstream> -#include <string> -#include <vector> -using std::vector; - - -#include <fst/compat.h> -#include <fst/types.h> - -#include <iostream> -#include <fstream> -#include <sstream> - -// -// UTILITY FOR ERROR HANDLING -// - -DECLARE_bool(fst_error_fatal); - -#define FSTERROR() (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR)) - -namespace fst { - -// -// UTILITIES FOR TYPE I/O -// - -// Read some types from an input stream. - -// Generic case. -template <typename T> -inline istream &ReadType(istream &strm, T *t) { - return t->Read(strm); -} - -// Fixed size, contiguous memory read. -#define READ_POD_TYPE(T) \ -inline istream &ReadType(istream &strm, T *t) { \ - return strm.read(reinterpret_cast<char *>(t), sizeof(T)); \ -} - -READ_POD_TYPE(bool); -READ_POD_TYPE(char); -READ_POD_TYPE(signed char); -READ_POD_TYPE(unsigned char); -READ_POD_TYPE(short); -READ_POD_TYPE(unsigned short); -READ_POD_TYPE(int); -READ_POD_TYPE(unsigned int); -READ_POD_TYPE(long); -READ_POD_TYPE(unsigned long); -READ_POD_TYPE(long long); -READ_POD_TYPE(unsigned long long); -READ_POD_TYPE(float); -READ_POD_TYPE(double); - -// String case. -inline istream &ReadType(istream &strm, string *s) { - s->clear(); - int32 ns = 0; - strm.read(reinterpret_cast<char *>(&ns), sizeof(ns)); - for (int i = 0; i < ns; ++i) { - char c; - strm.read(&c, 1); - *s += c; - } - return strm; -} - -// Pair case. -template <typename S, typename T> -inline istream &ReadType(istream &strm, pair<S, T> *p) { - ReadType(strm, &p->first); - ReadType(strm, &p->second); - return strm; -} - -template <typename S, typename T> -inline istream &ReadType(istream &strm, pair<const S, T> *p) { - ReadType(strm, const_cast<S *>(&p->first)); - ReadType(strm, &p->second); - return strm; -} - -// General case - no-op. -template <typename C> -void StlReserve(C *c, int64 n) {} - -// Specialization for vectors. -template <typename S, typename T> -void StlReserve(vector<S, T> *c, int64 n) { - c->reserve(n); -} - -// STL sequence container. -#define READ_STL_SEQ_TYPE(C) \ -template <typename S, typename T> \ -inline istream &ReadType(istream &strm, C<S, T> *c) { \ - c->clear(); \ - int64 n = 0; \ - strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \ - StlReserve(c, n); \ - for (ssize_t i = 0; i < n; ++i) { \ - typename C<S, T>::value_type value; \ - ReadType(strm, &value); \ - c->insert(c->end(), value); \ - } \ - return strm; \ -} - -READ_STL_SEQ_TYPE(vector); -READ_STL_SEQ_TYPE(list); - -// STL associative container. -#define READ_STL_ASSOC_TYPE(C) \ -template <typename S, typename T, typename U> \ -inline istream &ReadType(istream &strm, C<S, T, U> *c) { \ - c->clear(); \ - int64 n = 0; \ - strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \ - for (ssize_t i = 0; i < n; ++i) { \ - typename C<S, T, U>::value_type value; \ - ReadType(strm, &value); \ - c->insert(value); \ - } \ - return strm; \ -} - -READ_STL_ASSOC_TYPE(set); -READ_STL_ASSOC_TYPE(unordered_set); -READ_STL_ASSOC_TYPE(map); -READ_STL_ASSOC_TYPE(unordered_map); - -// Write some types to an output stream. - -// Generic case. -template <typename T> -inline ostream &WriteType(ostream &strm, const T t) { - t.Write(strm); - return strm; -} - -// Fixed size, contiguous memory write. -#define WRITE_POD_TYPE(T) \ -inline ostream &WriteType(ostream &strm, const T t) { \ - return strm.write(reinterpret_cast<const char *>(&t), sizeof(T)); \ -} - -WRITE_POD_TYPE(bool); -WRITE_POD_TYPE(char); -WRITE_POD_TYPE(signed char); -WRITE_POD_TYPE(unsigned char); -WRITE_POD_TYPE(short); -WRITE_POD_TYPE(unsigned short); -WRITE_POD_TYPE(int); -WRITE_POD_TYPE(unsigned int); -WRITE_POD_TYPE(long); -WRITE_POD_TYPE(unsigned long); -WRITE_POD_TYPE(long long); -WRITE_POD_TYPE(unsigned long long); -WRITE_POD_TYPE(float); -WRITE_POD_TYPE(double); - -// String case. -inline ostream &WriteType(ostream &strm, const string &s) { - int32 ns = s.size(); - strm.write(reinterpret_cast<const char *>(&ns), sizeof(ns)); - return strm.write(s.data(), ns); -} - -// Pair case. -template <typename S, typename T> -inline ostream &WriteType(ostream &strm, const pair<S, T> &p) { - WriteType(strm, p.first); - WriteType(strm, p.second); - return strm; -} - -// STL sequence container. -#define WRITE_STL_SEQ_TYPE(C) \ -template <typename S, typename T> \ -inline ostream &WriteType(ostream &strm, const C<S, T> &c) { \ - int64 n = c.size(); \ - strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \ - for (typename C<S, T>::const_iterator it = c.begin(); \ - it != c.end(); ++it) \ - WriteType(strm, *it); \ - return strm; \ -} - -WRITE_STL_SEQ_TYPE(vector); -WRITE_STL_SEQ_TYPE(list); - -// STL associative container. -#define WRITE_STL_ASSOC_TYPE(C) \ -template <typename S, typename T, typename U> \ -inline ostream &WriteType(ostream &strm, const C<S, T, U> &c) { \ - int64 n = c.size(); \ - strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \ - for (typename C<S, T, U>::const_iterator it = c.begin(); \ - it != c.end(); ++it) \ - WriteType(strm, *it); \ - return strm; \ -} - -WRITE_STL_ASSOC_TYPE(set); -WRITE_STL_ASSOC_TYPE(unordered_set); -WRITE_STL_ASSOC_TYPE(map); -WRITE_STL_ASSOC_TYPE(unordered_map); - -// Utilities for converting between int64 or Weight and string. - -int64 StrToInt64(const string &s, const string &src, size_t nline, - bool allow_negative, bool *error = 0); - -template <typename Weight> -Weight StrToWeight(const string &s, const string &src, size_t nline) { - Weight w; - istringstream strm(s); - strm >> w; - if (!strm) { - FSTERROR() << "StrToWeight: Bad weight = \"" << s - << "\", source = " << src << ", line = " << nline; - return Weight::NoWeight(); - } - return w; -} - -void Int64ToStr(int64 n, string *s); - -template <typename Weight> -void WeightToStr(Weight w, string *s) { - ostringstream strm; - strm.precision(9); - strm << w; - s->append(strm.str().data(), strm.str().size()); -} - -// Utilities for reading/writing label pairs - -// Returns true on success -template <typename Label> -bool ReadLabelPairs(const string& filename, - vector<pair<Label, Label> >* pairs, - bool allow_negative = false) { - ifstream strm(filename.c_str()); - - if (!strm) { - LOG(ERROR) << "ReadLabelPairs: Can't open file: " << filename; - return false; - } - - const int kLineLen = 8096; - char line[kLineLen]; - size_t nline = 0; - - pairs->clear(); - while (strm.getline(line, kLineLen)) { - ++nline; - vector<char *> col; - SplitToVector(line, "\n\t ", &col, true); - if (col.size() == 0 || col[0][0] == '\0') // empty line - continue; - if (col.size() != 2) { - LOG(ERROR) << "ReadLabelPairs: Bad number of columns, " - << "file = " << filename << ", line = " << nline; - return false; - } - - bool err; - Label frmlabel = StrToInt64(col[0], filename, nline, allow_negative, &err); - if (err) return false; - Label tolabel = StrToInt64(col[1], filename, nline, allow_negative, &err); - if (err) return false; - pairs->push_back(make_pair(frmlabel, tolabel)); - } - return true; -} - -// Returns true on success -template <typename Label> -bool WriteLabelPairs(const string& filename, - const vector<pair<Label, Label> >& pairs) { - ostream *strm = &cout; - if (!filename.empty()) { - strm = new ofstream(filename.c_str()); - if (!*strm) { - LOG(ERROR) << "WriteLabelPairs: Can't open file: " << filename; - return false; - } - } - - for (ssize_t n = 0; n < pairs.size(); ++n) - *strm << pairs[n].first << "\t" << pairs[n].second << "\n"; - - if (!*strm) { - LOG(ERROR) << "WriteLabelPairs: Write failed: " - << (filename.empty() ? "standard output" : filename); - return false; - } - if (strm != &cout) - delete strm; - return true; -} - -// Utilities for converting a type name to a legal C symbol. - -void ConvertToLegalCSymbol(string *s); - - -// -// UTILITIES FOR STREAM I/O -// - -bool AlignInput(istream &strm); -bool AlignOutput(ostream &strm); - -// -// UTILITIES FOR PROTOCOL BUFFER I/O -// - - -// An associative container for which testing membership is -// faster than an STL set if members are restricted to an interval -// that excludes most non-members. A 'Key' must have ==, !=, and < defined. -// Element 'NoKey' should be a key that marks an uninitialized key and -// is otherwise unused. 'Find()' returns an STL const_iterator to the match -// found, otherwise it equals 'End()'. -template <class Key, Key NoKey> -class CompactSet { -public: - typedef typename set<Key>::const_iterator const_iterator; - - CompactSet() - : min_key_(NoKey), - max_key_(NoKey) { } - - CompactSet(const CompactSet<Key, NoKey> &compact_set) - : set_(compact_set.set_), - min_key_(compact_set.min_key_), - max_key_(compact_set.max_key_) { } - - void Insert(Key key) { - set_.insert(key); - if (min_key_ == NoKey || key < min_key_) - min_key_ = key; - if (max_key_ == NoKey || max_key_ < key) - max_key_ = key; - } - - void Erase(Key key) { - set_.erase(key); - if (set_.empty()) { - min_key_ = max_key_ = NoKey; - } else if (key == min_key_) { - ++min_key_; - } else if (key == max_key_) { - --max_key_; - } - } - - void Clear() { - set_.clear(); - min_key_ = max_key_ = NoKey; - } - - const_iterator Find(Key key) const { - if (min_key_ == NoKey || - key < min_key_ || max_key_ < key) - return set_.end(); - else - return set_.find(key); - } - - bool Member(Key key) const { - if (min_key_ == NoKey || key < min_key_ || max_key_ < key) { - return false; // out of range - } else if (min_key_ != NoKey && max_key_ + 1 == min_key_ + set_.size()) { - return true; // dense range - } else { - return set_.find(key) != set_.end(); - } - } - - const_iterator Begin() const { return set_.begin(); } - - const_iterator End() const { return set_.end(); } - - // All stored keys are greater than or equal to this value. - Key LowerBound() const { return min_key_; } - - // All stored keys are less than or equal to this value. - Key UpperBound() const { return max_key_; } - -private: - set<Key> set_; - Key min_key_; - Key max_key_; - - void operator=(const CompactSet<Key, NoKey> &); //disallow -}; - -} // namespace fst - -#endif // FST_LIB_UTIL_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/vector-fst.h b/kaldi_io/src/tools/openfst/include/fst/vector-fst.h deleted file mode 100644 index 8b80876..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/vector-fst.h +++ /dev/null @@ -1,731 +0,0 @@ -// vector-fst.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Simple concrete, mutable FST whose states and arcs are stored in STL -// vectors. - -#ifndef FST_LIB_VECTOR_FST_H__ -#define FST_LIB_VECTOR_FST_H__ - -#include <string> -#include <vector> -using std::vector; - -#include <fst/mutable-fst.h> -#include <fst/test-properties.h> - - -namespace fst { - -template <class A> class VectorFst; -template <class F, class G> void Cast(const F &, G *); - - -// States and arcs implemented by STL vectors, templated on the -// State definition. This does not manage the Fst properties. -template <class State> -class VectorFstBaseImpl : public FstImpl<typename State::Arc> { - public: - typedef typename State::Arc Arc; - typedef typename Arc::Weight Weight; - typedef typename Arc::StateId StateId; - - VectorFstBaseImpl() : start_(kNoStateId) {} - - ~VectorFstBaseImpl() { - for (StateId s = 0; s < states_.size(); ++s) - delete states_[s]; - } - - StateId Start() const { return start_; } - - Weight Final(StateId s) const { return states_[s]->final; } - - StateId NumStates() const { return states_.size(); } - - size_t NumArcs(StateId s) const { return states_[s]->arcs.size(); } - - void SetStart(StateId s) { start_ = s; } - - void SetFinal(StateId s, Weight w) { states_[s]->final = w; } - - StateId AddState() { - states_.push_back(new State); - return states_.size() - 1; - } - - StateId AddState(State *state) { - states_.push_back(state); - return states_.size() - 1; - } - - void AddArc(StateId s, const Arc &arc) { - states_[s]->arcs.push_back(arc); - } - - void DeleteStates(const vector<StateId>& dstates) { - vector<StateId> newid(states_.size(), 0); - for (size_t i = 0; i < dstates.size(); ++i) - newid[dstates[i]] = kNoStateId; - StateId nstates = 0; - for (StateId s = 0; s < states_.size(); ++s) { - if (newid[s] != kNoStateId) { - newid[s] = nstates; - if (s != nstates) - states_[nstates] = states_[s]; - ++nstates; - } else { - delete states_[s]; - } - } - states_.resize(nstates); - for (StateId s = 0; s < states_.size(); ++s) { - vector<Arc> &arcs = states_[s]->arcs; - size_t narcs = 0; - for (size_t i = 0; i < arcs.size(); ++i) { - StateId t = newid[arcs[i].nextstate]; - if (t != kNoStateId) { - arcs[i].nextstate = t; - if (i != narcs) - arcs[narcs] = arcs[i]; - ++narcs; - } else { - if (arcs[i].ilabel == 0) - --states_[s]->niepsilons; - if (arcs[i].olabel == 0) - --states_[s]->noepsilons; - } - } - arcs.resize(narcs); - } - if (Start() != kNoStateId) - SetStart(newid[Start()]); - } - - void DeleteStates() { - for (StateId s = 0; s < states_.size(); ++s) - delete states_[s]; - states_.clear(); - SetStart(kNoStateId); - } - - void DeleteArcs(StateId s, size_t n) { - states_[s]->arcs.resize(states_[s]->arcs.size() - n); - } - - void DeleteArcs(StateId s) { states_[s]->arcs.clear(); } - - State *GetState(StateId s) { return states_[s]; } - - const State *GetState(StateId s) const { return states_[s]; } - - void SetState(StateId s, State *state) { states_[s] = state; } - - void ReserveStates(StateId n) { states_.reserve(n); } - - void ReserveArcs(StateId s, size_t n) { states_[s]->arcs.reserve(n); } - - // Provide information needed for generic state iterator - void InitStateIterator(StateIteratorData<Arc> *data) const { - data->base = 0; - data->nstates = states_.size(); - } - - // Provide information needed for generic arc iterator - void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { - data->base = 0; - data->narcs = states_[s]->arcs.size(); - data->arcs = data->narcs > 0 ? &states_[s]->arcs[0] : 0; - data->ref_count = 0; - } - - private: - vector<State *> states_; // States represenation. - StateId start_; // initial state - - DISALLOW_COPY_AND_ASSIGN(VectorFstBaseImpl); -}; - -// Arcs implemented by an STL vector per state. -template <class A> -struct VectorState { - typedef A Arc; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - VectorState() : final(Weight::Zero()), niepsilons(0), noepsilons(0) {} - - Weight final; // Final weight - vector<A> arcs; // Arcs represenation - size_t niepsilons; // # of input epsilons - size_t noepsilons; // # of output epsilons -}; - -// This is a VectorFstBaseImpl container that holds VectorState's. It -// manages Fst properties and the # of input and output epsilons. -template <class A> -class VectorFstImpl : public VectorFstBaseImpl< VectorState<A> > { - public: - using FstImpl<A>::SetInputSymbols; - using FstImpl<A>::SetOutputSymbols; - using FstImpl<A>::SetType; - using FstImpl<A>::SetProperties; - using FstImpl<A>::Properties; - - using VectorFstBaseImpl<VectorState<A> >::Start; - using VectorFstBaseImpl<VectorState<A> >::NumStates; - using VectorFstBaseImpl<VectorState<A> >::GetState; - using VectorFstBaseImpl<VectorState<A> >::ReserveArcs; - - friend class MutableArcIterator< VectorFst<A> >; - - typedef VectorFstBaseImpl< VectorState<A> > BaseImpl; - typedef typename A::Weight Weight; - typedef typename A::StateId StateId; - - VectorFstImpl() { - SetType("vector"); - SetProperties(kNullProperties | kStaticProperties); - } - explicit VectorFstImpl(const Fst<A> &fst); - - static VectorFstImpl<A> *Read(istream &strm, const FstReadOptions &opts); - - size_t NumInputEpsilons(StateId s) const { return GetState(s)->niepsilons; } - - size_t NumOutputEpsilons(StateId s) const { return GetState(s)->noepsilons; } - - void SetStart(StateId s) { - BaseImpl::SetStart(s); - SetProperties(SetStartProperties(Properties())); - } - - void SetFinal(StateId s, Weight w) { - Weight ow = BaseImpl::Final(s); - BaseImpl::SetFinal(s, w); - SetProperties(SetFinalProperties(Properties(), ow, w)); - } - - StateId AddState() { - StateId s = BaseImpl::AddState(); - SetProperties(AddStateProperties(Properties())); - return s; - } - - void AddArc(StateId s, const A &arc) { - VectorState<A> *state = GetState(s); - if (arc.ilabel == 0) { - ++state->niepsilons; - } - if (arc.olabel == 0) { - ++state->noepsilons; - } - - const A *parc = state->arcs.empty() ? 0 : &(state->arcs.back()); - SetProperties(AddArcProperties(Properties(), s, arc, parc)); - - BaseImpl::AddArc(s, arc); - } - - void DeleteStates(const vector<StateId> &dstates) { - BaseImpl::DeleteStates(dstates); - SetProperties(DeleteStatesProperties(Properties())); - } - - void DeleteStates() { - BaseImpl::DeleteStates(); - SetProperties(DeleteAllStatesProperties(Properties(), - kStaticProperties)); - } - - void DeleteArcs(StateId s, size_t n) { - const vector<A> &arcs = GetState(s)->arcs; - for (size_t i = 0; i < n; ++i) { - size_t j = arcs.size() - i - 1; - if (arcs[j].ilabel == 0) - --GetState(s)->niepsilons; - if (arcs[j].olabel == 0) - --GetState(s)->noepsilons; - } - BaseImpl::DeleteArcs(s, n); - SetProperties(DeleteArcsProperties(Properties())); - } - - void DeleteArcs(StateId s) { - GetState(s)->niepsilons = 0; - GetState(s)->noepsilons = 0; - BaseImpl::DeleteArcs(s); - SetProperties(DeleteArcsProperties(Properties())); - } - - // Properties always true of this Fst class - static const uint64 kStaticProperties = kExpanded | kMutable; - - private: - // Current file format version - static const int kFileVersion = 2; - // Minimum file format version supported - static const int kMinFileVersion = 1; - - DISALLOW_COPY_AND_ASSIGN(VectorFstImpl); -}; - -template <class A> const uint64 VectorFstImpl<A>::kStaticProperties; -template <class A> const int VectorFstImpl<A>::kFileVersion; -template <class A> const int VectorFstImpl<A>::kMinFileVersion; - - -template <class A> -VectorFstImpl<A>::VectorFstImpl(const Fst<A> &fst) { - SetType("vector"); - SetInputSymbols(fst.InputSymbols()); - SetOutputSymbols(fst.OutputSymbols()); - BaseImpl::SetStart(fst.Start()); - if (fst.Properties(kExpanded, false)) - BaseImpl::ReserveStates(CountStates(fst)); - - for (StateIterator< Fst<A> > siter(fst); - !siter.Done(); - siter.Next()) { - StateId s = siter.Value(); - BaseImpl::AddState(); - BaseImpl::SetFinal(s, fst.Final(s)); - ReserveArcs(s, fst.NumArcs(s)); - for (ArcIterator< Fst<A> > aiter(fst, s); - !aiter.Done(); - aiter.Next()) { - const A &arc = aiter.Value(); - BaseImpl::AddArc(s, arc); - if (arc.ilabel == 0) - ++GetState(s)->niepsilons; - if (arc.olabel == 0) - ++GetState(s)->noepsilons; - } - } - SetProperties(fst.Properties(kCopyProperties, false) | kStaticProperties); -} - -template <class A> -VectorFstImpl<A> *VectorFstImpl<A>::Read(istream &strm, - const FstReadOptions &opts) { - VectorFstImpl<A> *impl = new VectorFstImpl; - FstHeader hdr; - if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) { - delete impl; - return 0; - } - impl->BaseImpl::SetStart(hdr.Start()); - if (hdr.NumStates() != kNoStateId) { - impl->ReserveStates(hdr.NumStates()); - } - - StateId s = 0; - for (;hdr.NumStates() == kNoStateId || s < hdr.NumStates(); ++s) { - typename A::Weight final; - if (!final.Read(strm)) break; - impl->BaseImpl::AddState(); - VectorState<A> *state = impl->GetState(s); - state->final = final; - int64 narcs; - ReadType(strm, &narcs); - if (!strm) { - LOG(ERROR) << "VectorFst::Read: read failed: " << opts.source; - delete impl; - return 0; - } - impl->ReserveArcs(s, narcs); - for (size_t j = 0; j < narcs; ++j) { - A arc; - ReadType(strm, &arc.ilabel); - ReadType(strm, &arc.olabel); - arc.weight.Read(strm); - ReadType(strm, &arc.nextstate); - if (!strm) { - LOG(ERROR) << "VectorFst::Read: read failed: " << opts.source; - delete impl; - return 0; - } - impl->BaseImpl::AddArc(s, arc); - if (arc.ilabel == 0) - ++state->niepsilons; - if (arc.olabel == 0) - ++state->noepsilons; - } - } - if (hdr.NumStates() != kNoStateId && s != hdr.NumStates()) { - LOG(ERROR) << "VectorFst::Read: unexpected end of file: " << opts.source; - delete impl; - return 0; - } - return impl; -} - -// Converts a string into a weight. -template <class W> class WeightFromString { - public: - W operator()(const string &s); -}; - -// Generic case fails. -template <class W> inline -W WeightFromString<W>::operator()(const string &s) { - FSTERROR() << "VectorFst::Read: Obsolete file format"; - return W::NoWeight(); -} - -// TropicalWeight version. -template <> inline -TropicalWeight WeightFromString<TropicalWeight>::operator()(const string &s) { - float f; - memcpy(&f, s.data(), sizeof(f)); - return TropicalWeight(f); -} - -// LogWeight version. -template <> inline -LogWeight WeightFromString<LogWeight>::operator()(const string &s) { - float f; - memcpy(&f, s.data(), sizeof(f)); - return LogWeight(f); -} - -// Simple concrete, mutable FST. This class attaches interface to -// implementation and handles reference counting, delegating most -// methods to ImplToMutableFst. Supports additional operations: -// ReserveStates and ReserveArcs (cf. STL vectors). -template <class A> -class VectorFst : public ImplToMutableFst< VectorFstImpl<A> > { - public: - friend class StateIterator< VectorFst<A> >; - friend class ArcIterator< VectorFst<A> >; - friend class MutableArcIterator< VectorFst<A> >; - template <class F, class G> friend void Cast(const F &, G *); - - typedef A Arc; - typedef typename A::StateId StateId; - typedef VectorFstImpl<A> Impl; - - VectorFst() : ImplToMutableFst<Impl>(new Impl) {} - - explicit VectorFst(const Fst<A> &fst) - : ImplToMutableFst<Impl>(new Impl(fst)) {} - - VectorFst(const VectorFst<A> &fst) : ImplToMutableFst<Impl>(fst) {} - - // Get a copy of this VectorFst. See Fst<>::Copy() for further doc. - virtual VectorFst<A> *Copy(bool safe = false) const { - return new VectorFst<A>(*this); - } - - VectorFst<A> &operator=(const VectorFst<A> &fst) { - SetImpl(fst.GetImpl(), false); - return *this; - } - - virtual VectorFst<A> &operator=(const Fst<A> &fst) { - if (this != &fst) SetImpl(new Impl(fst)); - return *this; - } - - // Read a VectorFst from an input stream; return NULL on error - static VectorFst<A> *Read(istream &strm, const FstReadOptions &opts) { - Impl* impl = Impl::Read(strm, opts); - return impl ? new VectorFst<A>(impl) : 0; - } - - // Read a VectorFst from a file; return NULL on error - // Empty filename reads from standard input - static VectorFst<A> *Read(const string &filename) { - Impl* impl = ImplToExpandedFst<Impl, MutableFst<A> >::Read(filename); - return impl ? new VectorFst<A>(impl) : 0; - } - - virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { - return WriteFst(*this, strm, opts); - } - - virtual bool Write(const string &filename) const { - return Fst<A>::WriteFile(filename); - } - - template <class F> - static bool WriteFst(const F &fst, ostream &strm, - const FstWriteOptions &opts); - - void ReserveStates(StateId n) { - MutateCheck(); - GetImpl()->ReserveStates(n); - } - - void ReserveArcs(StateId s, size_t n) { - MutateCheck(); - GetImpl()->ReserveArcs(s, n); - } - - virtual void InitStateIterator(StateIteratorData<Arc> *data) const { - GetImpl()->InitStateIterator(data); - } - - virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { - GetImpl()->InitArcIterator(s, data); - } - - virtual inline - void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *); - - private: - explicit VectorFst(Impl *impl) : ImplToMutableFst<Impl>(impl) {} - - // Makes visible to friends. - Impl *GetImpl() const { return ImplToFst< Impl, MutableFst<A> >::GetImpl(); } - - void SetImpl(Impl *impl, bool own_impl = true) { - ImplToFst< Impl, MutableFst<A> >::SetImpl(impl, own_impl); - } - - void MutateCheck() { return ImplToMutableFst<Impl>::MutateCheck(); } -}; - -// Specialization for VectorFst; see generic version in fst.h -// for sample usage (but use the VectorFst type!). This version -// should inline. -template <class A> -class StateIterator< VectorFst<A> > { - public: - typedef typename A::StateId StateId; - - explicit StateIterator(const VectorFst<A> &fst) - : nstates_(fst.GetImpl()->NumStates()), s_(0) {} - - bool Done() const { return s_ >= nstates_; } - - StateId Value() const { return s_; } - - void Next() { ++s_; } - - void Reset() { s_ = 0; } - - private: - StateId nstates_; - StateId s_; - - DISALLOW_COPY_AND_ASSIGN(StateIterator); -}; - -// Writes Fst to file, will call CountStates so may involve two passes if -// called from an Fst that is not derived from Expanded. -template <class A> -template <class F> -bool VectorFst<A>::WriteFst(const F &fst, ostream &strm, - const FstWriteOptions &opts) { - static const int kFileVersion = 2; - bool update_header = true; - FstHeader hdr; - hdr.SetStart(fst.Start()); - hdr.SetNumStates(kNoStateId); - size_t start_offset = 0; - if (fst.Properties(kExpanded, false) || (start_offset = strm.tellp()) != -1) { - hdr.SetNumStates(CountStates(fst)); - update_header = false; - } - uint64 properties = fst.Properties(kCopyProperties, false) | - VectorFstImpl<A>::kStaticProperties; - FstImpl<A>::WriteFstHeader(fst, strm, opts, kFileVersion, "vector", - properties, &hdr); - StateId num_states = 0; - for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { - typename A::StateId s = siter.Value(); - fst.Final(s).Write(strm); - int64 narcs = fst.NumArcs(s); - WriteType(strm, narcs); - for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) { - const A &arc = aiter.Value(); - WriteType(strm, arc.ilabel); - WriteType(strm, arc.olabel); - arc.weight.Write(strm); - WriteType(strm, arc.nextstate); - } - num_states++; - } - strm.flush(); - if (!strm) { - LOG(ERROR) << "VectorFst::Write: write failed: " << opts.source; - return false; - } - if (update_header) { - hdr.SetNumStates(num_states); - return FstImpl<A>::UpdateFstHeader(fst, strm, opts, kFileVersion, "vector", - properties, &hdr, start_offset); - } else { - if (num_states != hdr.NumStates()) { - LOG(ERROR) << "Inconsistent number of states observed during write"; - return false; - } - } - return true; -} - -// Specialization for VectorFst; see generic version in fst.h -// for sample usage (but use the VectorFst type!). This version -// should inline. -template <class A> -class ArcIterator< VectorFst<A> > { - public: - typedef typename A::StateId StateId; - - ArcIterator(const VectorFst<A> &fst, StateId s) - : arcs_(fst.GetImpl()->GetState(s)->arcs), i_(0) {} - - bool Done() const { return i_ >= arcs_.size(); } - - const A& Value() const { return arcs_[i_]; } - - void Next() { ++i_; } - - void Reset() { i_ = 0; } - - void Seek(size_t a) { i_ = a; } - - size_t Position() const { return i_; } - - uint32 Flags() const { - return kArcValueFlags; - } - - void SetFlags(uint32 f, uint32 m) {} - - private: - const vector<A>& arcs_; - size_t i_; - - DISALLOW_COPY_AND_ASSIGN(ArcIterator); -}; - -// Specialization for VectorFst; see generic version in fst.h -// for sample usage (but use the VectorFst type!). This version -// should inline. -template <class A> -class MutableArcIterator< VectorFst<A> > - : public MutableArcIteratorBase<A> { - public: - typedef typename A::StateId StateId; - typedef typename A::Weight Weight; - - MutableArcIterator(VectorFst<A> *fst, StateId s) : i_(0) { - fst->MutateCheck(); - state_ = fst->GetImpl()->GetState(s); - properties_ = &fst->GetImpl()->properties_; - } - - bool Done() const { return i_ >= state_->arcs.size(); } - - const A& Value() const { return state_->arcs[i_]; } - - void Next() { ++i_; } - - size_t Position() const { return i_; } - - void Reset() { i_ = 0; } - - void Seek(size_t a) { i_ = a; } - - void SetValue(const A &arc) { - A& oarc = state_->arcs[i_]; - if (oarc.ilabel != oarc.olabel) - *properties_ &= ~kNotAcceptor; - if (oarc.ilabel == 0) { - --state_->niepsilons; - *properties_ &= ~kIEpsilons; - if (oarc.olabel == 0) - *properties_ &= ~kEpsilons; - } - if (oarc.olabel == 0) { - --state_->noepsilons; - *properties_ &= ~kOEpsilons; - } - if (oarc.weight != Weight::Zero() && oarc.weight != Weight::One()) - *properties_ &= ~kWeighted; - oarc = arc; - if (arc.ilabel != arc.olabel) { - *properties_ |= kNotAcceptor; - *properties_ &= ~kAcceptor; - } - if (arc.ilabel == 0) { - ++state_->niepsilons; - *properties_ |= kIEpsilons; - *properties_ &= ~kNoIEpsilons; - if (arc.olabel == 0) { - *properties_ |= kEpsilons; - *properties_ &= ~kNoEpsilons; - } - } - if (arc.olabel == 0) { - ++state_->noepsilons; - *properties_ |= kOEpsilons; - *properties_ &= ~kNoOEpsilons; - } - if (arc.weight != Weight::Zero() && arc.weight != Weight::One()) { - *properties_ |= kWeighted; - *properties_ &= ~kUnweighted; - } - *properties_ &= kSetArcProperties | kAcceptor | kNotAcceptor | - kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | - kOEpsilons | kNoOEpsilons | kWeighted | kUnweighted; - } - - uint32 Flags() const { - return kArcValueFlags; - } - - void SetFlags(uint32 f, uint32 m) {} - - - private: - // This allows base-class virtual access to non-virtual derived- - // class members of the same name. It makes the derived class more - // efficient to use but unsafe to further derive. - virtual bool Done_() const { return Done(); } - virtual const A& Value_() const { return Value(); } - virtual void Next_() { Next(); } - virtual size_t Position_() const { return Position(); } - virtual void Reset_() { Reset(); } - virtual void Seek_(size_t a) { Seek(a); } - virtual void SetValue_(const A &a) { SetValue(a); } - uint32 Flags_() const { return Flags(); } - void SetFlags_(uint32 f, uint32 m) { SetFlags(f, m); } - - struct VectorState<A> *state_; - uint64 *properties_; - size_t i_; - - DISALLOW_COPY_AND_ASSIGN(MutableArcIterator); -}; - -// Provide information needed for the generic mutable arc iterator -template <class A> inline -void VectorFst<A>::InitMutableArcIterator( - StateId s, MutableArcIteratorData<A> *data) { - data->base = new MutableArcIterator< VectorFst<A> >(this, s); -} - -// A useful alias when using StdArc. -typedef VectorFst<StdArc> StdVectorFst; - -} // namespace fst - -#endif // FST_LIB_VECTOR_FST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/verify.h b/kaldi_io/src/tools/openfst/include/fst/verify.h deleted file mode 100644 index 576cfca..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/verify.h +++ /dev/null @@ -1,126 +0,0 @@ -// verify.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Function to verify an Fst's contents - -#ifndef FST_LIB_VERIFY_H__ -#define FST_LIB_VERIFY_H__ - -#include <fst/fst.h> -#include <fst/test-properties.h> - - -namespace fst { - -// Verifies that an Fst's contents are sane. -template<class Arc> -bool Verify(const Fst<Arc> &fst, bool allow_negative_labels = false) { - typedef typename Arc::Label Label; - typedef typename Arc::Weight Weight; - typedef typename Arc::StateId StateId; - - StateId start = fst.Start(); - const SymbolTable *isyms = fst.InputSymbols(); - const SymbolTable *osyms = fst.OutputSymbols(); - - // Count states - StateId ns = 0; - for (StateIterator< Fst<Arc> > siter(fst); - !siter.Done(); - siter.Next()) - ++ns; - - if (start == kNoStateId && ns > 0) { - LOG(ERROR) << "Verify: Fst start state ID unset"; - return false; - } else if (start >= ns) { - LOG(ERROR) << "Verify: Fst start state ID exceeds number of states"; - return false; - } - - for (StateIterator< Fst<Arc> > siter(fst); - !siter.Done(); - siter.Next()) { - StateId s = siter.Value(); - size_t na = 0; - for (ArcIterator< Fst<Arc> > aiter(fst, s); - !aiter.Done(); - aiter.Next()) { - const Arc &arc =aiter.Value(); - if (!allow_negative_labels && arc.ilabel < 0) { - LOG(ERROR) << "Verify: Fst input label ID of arc at position " - << na << " of state " << s << " is negative"; - return false; - } else if (isyms && isyms->Find(arc.ilabel) == "") { - LOG(ERROR) << "Verify: Fst input label ID " << arc.ilabel - << " of arc at position " << na << " of state " << s - << " is missing from input symbol table \"" - << isyms->Name() << "\""; - return false; - } else if (!allow_negative_labels && arc.olabel < 0) { - LOG(ERROR) << "Verify: Fst output label ID of arc at position " - << na << " of state " << s << " is negative"; - return false; - } else if (osyms && osyms->Find(arc.olabel) == "") { - LOG(ERROR) << "Verify: Fst output label ID " << arc.olabel - << " of arc at position " << na << " of state " << s - << " is missing from output symbol table \"" - << osyms->Name() << "\""; - return false; - } else if (!arc.weight.Member() || arc.weight == Weight::Zero()) { - LOG(ERROR) << "Verify: Fst weight of arc at position " - << na << " of state " << s << " is invalid"; - return false; - } else if (arc.nextstate < 0) { - LOG(ERROR) << "Verify: Fst destination state ID of arc at position " - << na << " of state " << s << " is negative"; - return false; - } else if (arc.nextstate >= ns) { - LOG(ERROR) << "Verify: Fst destination state ID of arc at position " - << na << " of state " << s - << " exceeds number of states"; - return false; - } - ++na; - } - if (!fst.Final(s).Member()) { - LOG(ERROR) << "Verify: Fst final weight of state " << s << " is invalid"; - return false; - } - } - uint64 fst_props = fst.Properties(kFstProperties, false); - if (fst_props & kError) { - LOG(ERROR) << "Verify: Fst error property is set"; - return false; - } - - uint64 known_props; - uint64 test_props = ComputeProperties(fst, kFstProperties, &known_props, - false); - if (!CompatProperties(fst_props, test_props)) { - LOG(ERROR) << "Verify: stored Fst properties incorrect " - << "(props1 = stored props, props2 = tested)"; - return false; - } else { - return true; - } -} - -} // namespace fst - -#endif // FST_LIB_VERIFY_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/visit.h b/kaldi_io/src/tools/openfst/include/fst/visit.h deleted file mode 100644 index 5f5059a..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/visit.h +++ /dev/null @@ -1,284 +0,0 @@ -// visit.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// Queue-dependent visitation of finite-state transducers. See also -// dfs-visit.h. - -#ifndef FST_LIB_VISIT_H__ -#define FST_LIB_VISIT_H__ - - -#include <fst/arcfilter.h> -#include <fst/mutable-fst.h> - - -namespace fst { - -// Visitor Interface - class determines actions taken during a visit. -// If any of the boolean member functions return false, the visit is -// aborted by first calling FinishState() on all unfinished (grey) -// states and then calling FinishVisit(). -// -// Note this is more general than the visitor interface in -// dfs-visit.h but lacks some DFS-specific behavior. -// -// template <class Arc> -// class Visitor { -// public: -// typedef typename Arc::StateId StateId; -// -// Visitor(T *return_data); -// // Invoked before visit -// void InitVisit(const Fst<Arc> &fst); -// // Invoked when state discovered (2nd arg is visitation root) -// bool InitState(StateId s, StateId root); -// // Invoked when arc to white/undiscovered state examined -// bool WhiteArc(StateId s, const Arc &a); -// // Invoked when arc to grey/unfinished state examined -// bool GreyArc(StateId s, const Arc &a); -// // Invoked when arc to black/finished state examined -// bool BlackArc(StateId s, const Arc &a); -// // Invoked when state finished. -// void FinishState(StateId s); -// // Invoked after visit -// void FinishVisit(); -// }; - -// Performs queue-dependent visitation. Visitor class argument -// determines actions and contains any return data. ArcFilter -// determines arcs that are considered. -// -// Note this is more general than DfsVisit() in dfs-visit.h but lacks -// some DFS-specific Visitor behavior. -template <class Arc, class V, class Q, class ArcFilter> -void Visit(const Fst<Arc> &fst, V *visitor, Q *queue, ArcFilter filter) { - - typedef typename Arc::StateId StateId; - typedef ArcIterator< Fst<Arc> > AIterator; - - visitor->InitVisit(fst); - - StateId start = fst.Start(); - if (start == kNoStateId) { - visitor->FinishVisit(); - return; - } - - // An Fst state's visit color - const unsigned kWhiteState = 0x01; // Undiscovered - const unsigned kGreyState = 0x02; // Discovered & unfinished - const unsigned kBlackState = 0x04; // Finished - - // We destroy an iterator as soon as possible and mark it so - const unsigned kArcIterDone = 0x08; // Arc iterator done and destroyed - - vector<unsigned char> state_status; - vector<AIterator *> arc_iterator; - - StateId nstates = start + 1; // # of known states in general case - bool expanded = false; - if (fst.Properties(kExpanded, false)) { // tests if expanded case, then - nstates = CountStates(fst); // uses ExpandedFst::NumStates(). - expanded = true; - } - - state_status.resize(nstates, kWhiteState); - arc_iterator.resize(nstates); - StateIterator< Fst<Arc> > siter(fst); - - // Continues visit while true - bool visit = true; - - // Iterates over trees in visit forest. - for (StateId root = start; visit && root < nstates;) { - visit = visitor->InitState(root, root); - state_status[root] = kGreyState; - queue->Enqueue(root); - while (!queue->Empty()) { - StateId s = queue->Head(); - if (s >= state_status.size()) { - nstates = s + 1; - state_status.resize(nstates, kWhiteState); - arc_iterator.resize(nstates); - } - // Creates arc iterator if needed. - if (arc_iterator[s] == 0 && !(state_status[s] & kArcIterDone) && visit) - arc_iterator[s] = new AIterator(fst, s); - // Deletes arc iterator if done. - AIterator *aiter = arc_iterator[s]; - if ((aiter && aiter->Done()) || !visit) { - delete aiter; - arc_iterator[s] = 0; - state_status[s] |= kArcIterDone; - } - // Dequeues state and marks black if done - if (state_status[s] & kArcIterDone) { - queue->Dequeue(); - visitor->FinishState(s); - state_status[s] = kBlackState; - continue; - } - - const Arc &arc = aiter->Value(); - if (arc.nextstate >= state_status.size()) { - nstates = arc.nextstate + 1; - state_status.resize(nstates, kWhiteState); - arc_iterator.resize(nstates); - } - // Visits respective arc types - if (filter(arc)) { - // Enqueues destination state and marks grey if white - if (state_status[arc.nextstate] == kWhiteState) { - visit = visitor->WhiteArc(s, arc); - if (!visit) continue; - visit = visitor->InitState(arc.nextstate, root); - state_status[arc.nextstate] = kGreyState; - queue->Enqueue(arc.nextstate); - } else if (state_status[arc.nextstate] == kBlackState) { - visit = visitor->BlackArc(s, arc); - } else { - visit = visitor->GreyArc(s, arc); - } - } - aiter->Next(); - // Destroys an iterator ASAP for efficiency. - if (aiter->Done()) { - delete aiter; - arc_iterator[s] = 0; - state_status[s] |= kArcIterDone; - } - } - // Finds next tree root - for (root = root == start ? 0 : root + 1; - root < nstates && state_status[root] != kWhiteState; - ++root) { - } - - // Check for a state beyond the largest known state - if (!expanded && root == nstates) { - for (; !siter.Done(); siter.Next()) { - if (siter.Value() == nstates) { - ++nstates; - state_status.push_back(kWhiteState); - arc_iterator.push_back(0); - break; - } - } - } - } - visitor->FinishVisit(); -} - - -template <class Arc, class V, class Q> -inline void Visit(const Fst<Arc> &fst, V *visitor, Q* queue) { - Visit(fst, visitor, queue, AnyArcFilter<Arc>()); -} - -// Copies input FST to mutable FST following queue order. -template <class A> -class CopyVisitor { - public: - typedef A Arc; - typedef typename A::StateId StateId; - - CopyVisitor(MutableFst<Arc> *ofst) : ifst_(0), ofst_(ofst) {} - - void InitVisit(const Fst<A> &ifst) { - ifst_ = &ifst; - ofst_->DeleteStates(); - ofst_->SetStart(ifst_->Start()); - } - - bool InitState(StateId s, StateId) { - while (ofst_->NumStates() <= s) - ofst_->AddState(); - return true; - } - - bool WhiteArc(StateId s, const Arc &arc) { - ofst_->AddArc(s, arc); - return true; - } - - bool GreyArc(StateId s, const Arc &arc) { - ofst_->AddArc(s, arc); - return true; - } - - bool BlackArc(StateId s, const Arc &arc) { - ofst_->AddArc(s, arc); - return true; - } - - void FinishState(StateId s) { - ofst_->SetFinal(s, ifst_->Final(s)); - } - - void FinishVisit() {} - - private: - const Fst<Arc> *ifst_; - MutableFst<Arc> *ofst_; -}; - - -// Visits input FST up to a state limit following queue order. If -// 'access_only' is true, aborts on visiting first state not -// accessible from the initial state. -template <class A> -class PartialVisitor { - public: - typedef A Arc; - typedef typename A::StateId StateId; - - explicit PartialVisitor(StateId maxvisit, bool access_only = false) - : maxvisit_(maxvisit), - access_only_(access_only), - start_(kNoStateId) {} - - void InitVisit(const Fst<A> &ifst) { - nvisit_ = 0; - start_ = ifst.Start(); - } - - bool InitState(StateId s, StateId root) { - if (access_only_ && root != start_) - return false; - ++nvisit_; - return nvisit_ <= maxvisit_; - } - - bool WhiteArc(StateId s, const Arc &arc) { return true; } - bool GreyArc(StateId s, const Arc &arc) { return true; } - bool BlackArc(StateId s, const Arc &arc) { return true; } - void FinishState(StateId s) {} - void FinishVisit() {} - - private: - StateId maxvisit_; - bool access_only_; - StateId nvisit_; - StateId start_; - -}; - - -} // namespace fst - -#endif // FST_LIB_VISIT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/weight.h b/kaldi_io/src/tools/openfst/include/fst/weight.h deleted file mode 100644 index 7eb4bb1..0000000 --- a/kaldi_io/src/tools/openfst/include/fst/weight.h +++ /dev/null @@ -1,179 +0,0 @@ -// weight.h - -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Copyright 2005-2010 Google, Inc. -// Author: [email protected] (Michael Riley) -// -// \file -// General weight set and associated semiring operation definitions. -// -// A semiring is specified by two binary operations Plus and Times and -// two designated elements Zero and One with the following properties: -// Plus: associative, commutative, and has Zero as its identity. -// Times: associative and has identity One, distributes w.r.t. Plus, and -// has Zero as an annihilator: -// Times(Zero(), a) == Times(a, Zero()) = Zero(). -// -// A left semiring distributes on the left; a right semiring is -// similarly defined. -// -// A Weight class must have binary functions =Plus= and =Times= and -// static member functions =Zero()= and =One()= and these must form -// (at least) a left or right semiring. -// -// In addition, the following should be defined for a Weight: -// Member: predicate on set membership. -// NoWeight: static member function that returns an element that is -// not a set member; used to signal an error. -// >>: reads textual representation of a weight. -// <<: prints textual representation of a weight. -// Read(istream &strm): reads binary representation of a weight. -// Write(ostream &strm): writes binary representation of a weight. -// Hash: maps weight to size_t. -// ApproxEqual: approximate equality (for inexact weights) -// Quantize: quantizes wrt delta (for inexact weights) -// Divide: for all a,b,c s.t. Times(a, b) == c -// --> b' = Divide(c, a, DIVIDE_LEFT) if a left semiring, b'.Member() -// and Times(a, b') == c -// --> a' = Divide(c, b, DIVIDE_RIGHT) if a right semiring, a'.Member() -// and Times(a', b) == c -// --> b' = Divide(c, a) = Divide(c, a, DIVIDE_ANY) = -// Divide(c, a, DIVIDE_LEFT) = Divide(c, a, DIVIDE_RIGHT) if a -// commutative semiring, b'.Member() and Times(a, b') = Times(b', a) = c -// ReverseWeight: the type of the corresponding reverse weight. -// Typically the same type as Weight for a (both left and right) semiring. -// For the left string semiring, it is the right string semiring. -// Reverse: a mapping from Weight to ReverseWeight s.t. -// --> Reverse(Reverse(a)) = a -// --> Reverse(Plus(a, b)) = Plus(Reverse(a), Reverse(b)) -// --> Reverse(Times(a, b)) = Times(Reverse(b), Reverse(a)) -// Typically the identity mapping in a (both left and right) semiring. -// In the left string semiring, it maps to the reverse string -// in the right string semiring. -// Properties: specifies additional properties that hold: -// LeftSemiring: indicates weights form a left semiring. -// RightSemiring: indicates weights form a right semiring. -// Commutative: for all a,b: Times(a,b) == Times(b,a) -// Idempotent: for all a: Plus(a, a) == a. -// Path: for all a, b: Plus(a, b) == a or Plus(a, b) == b. - - -#ifndef FST_LIB_WEIGHT_H__ -#define FST_LIB_WEIGHT_H__ - -#include <cmath> -#include <cctype> -#include <iostream> -#include <sstream> - -#include <fst/compat.h> - -#include <fst/util.h> - - -namespace fst { - -// -// CONSTANT DEFINITIONS -// - -// A representable float near .001 -const float kDelta = 1.0F/1024.0F; - -// For all a,b,c: Times(c, Plus(a,b)) = Plus(Times(c,a), Times(c, b)) -const uint64 kLeftSemiring = 0x0000000000000001ULL; - -// For all a,b,c: Times(Plus(a,b), c) = Plus(Times(a,c), Times(b, c)) -const uint64 kRightSemiring = 0x0000000000000002ULL; - -const uint64 kSemiring = kLeftSemiring | kRightSemiring; - -// For all a,b: Times(a,b) = Times(b,a) -const uint64 kCommutative = 0x0000000000000004ULL; - -// For all a: Plus(a, a) = a -const uint64 kIdempotent = 0x0000000000000008ULL; - -// For all a,b: Plus(a,b) = a or Plus(a,b) = b -const uint64 kPath = 0x0000000000000010ULL; - - -// Determines direction of division. -enum DivideType { DIVIDE_LEFT, // left division - DIVIDE_RIGHT, // right division - DIVIDE_ANY }; // division in a commutative semiring - -// NATURAL ORDER -// -// By definition: -// a <= b iff a + b = a -// The natural order is a negative partial order iff the semiring is -// idempotent. It is trivially monotonic for plus. It is left -// (resp. right) monotonic for times iff the semiring is left -// (resp. right) distributive. It is a total order iff the semiring -// has the path property. See Mohri, "Semiring Framework and -// Algorithms for Shortest-Distance Problems", Journal of Automata, -// Languages and Combinatorics 7(3):321-350, 2002. We define the -// strict version of this order below. - -template <class W> -class NaturalLess { - public: - typedef W Weight; - - NaturalLess() { - if (!(W::Properties() & kIdempotent)) { - FSTERROR() << "NaturalLess: Weight type is not idempotent: " - << W::Type(); - } - } - - bool operator()(const W &w1, const W &w2) const { - return (Plus(w1, w2) == w1) && w1 != w2; - } -}; - - -// Power is the iterated product for arbitrary semirings such that -// Power(w, 0) is One() for the semiring, and -// Power(w, n) = Times(Power(w, n-1), w) - -template <class W> -W Power(W w, size_t n) { - W result = W::One(); - for (size_t i = 0; i < n; ++i) { - result = Times(result, w); - } - return result; -} - -// General weight converter - raises error. -template <class W1, class W2> -struct WeightConvert { - W2 operator()(W1 w1) const { - FSTERROR() << "WeightConvert: can't convert weight from \"" - << W1::Type() << "\" to \"" << W2::Type(); - return W2::NoWeight(); - } -}; - -// Specialized weight converter to self. -template <class W> -struct WeightConvert<W, W> { - W operator()(W w) const { return w; } -}; - -} // namespace fst - -#endif // FST_LIB_WEIGHT_H__ |