diff options
Diffstat (limited to 'kaldi_io/src/tools/openfst')
165 files changed, 46166 insertions, 0 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/accumulator.h b/kaldi_io/src/tools/openfst/include/fst/accumulator.h new file mode 100644 index 0000000..81d1847 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/accumulator.h @@ -0,0 +1,745 @@ +// accumulator.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Classes to accumulate arc weights. Useful for weight lookahead. + +#ifndef FST_LIB_ACCUMULATOR_H__ +#define FST_LIB_ACCUMULATOR_H__ + +#include <algorithm> +#include <functional> +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include <vector> +using std::vector; + +#include <fst/arcfilter.h> +#include <fst/arcsort.h> +#include <fst/dfs-visit.h> +#include <fst/expanded-fst.h> +#include <fst/replace.h> + +namespace fst { + +// This class accumulates arc weights using the semiring Plus(). +template <class A> +class DefaultAccumulator { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + DefaultAccumulator() {} + + DefaultAccumulator(const DefaultAccumulator<A> &acc) {} + + void Init(const Fst<A>& fst, bool copy = false) {} + + void SetState(StateId) {} + + Weight Sum(Weight w, Weight v) { + return Plus(w, v); + } + + template <class ArcIterator> + Weight Sum(Weight w, ArcIterator *aiter, ssize_t begin, + ssize_t end) { + Weight sum = w; + aiter->Seek(begin); + for (ssize_t pos = begin; pos < end; aiter->Next(), ++pos) + sum = Plus(sum, aiter->Value().weight); + return sum; + } + + bool Error() const { return false; } + + private: + void operator=(const DefaultAccumulator<A> &); // Disallow +}; + + +// This class accumulates arc weights using the log semiring Plus() +// assuming an arc weight has a WeightConvert specialization to +// and from log64 weights. +template <class A> +class LogAccumulator { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + LogAccumulator() {} + + LogAccumulator(const LogAccumulator<A> &acc) {} + + void Init(const Fst<A>& fst, bool copy = false) {} + + void SetState(StateId) {} + + Weight Sum(Weight w, Weight v) { + return LogPlus(w, v); + } + + template <class ArcIterator> + Weight Sum(Weight w, ArcIterator *aiter, ssize_t begin, + ssize_t end) { + Weight sum = w; + aiter->Seek(begin); + for (ssize_t pos = begin; pos < end; aiter->Next(), ++pos) + sum = LogPlus(sum, aiter->Value().weight); + return sum; + } + + bool Error() const { return false; } + + private: + double LogPosExp(double x) { return log(1.0F + exp(-x)); } + + Weight LogPlus(Weight w, Weight v) { + double f1 = to_log_weight_(w).Value(); + double f2 = to_log_weight_(v).Value(); + if (f1 > f2) + return to_weight_(f2 - LogPosExp(f1 - f2)); + else + return to_weight_(f1 - LogPosExp(f2 - f1)); + } + + WeightConvert<Weight, Log64Weight> to_log_weight_; + WeightConvert<Log64Weight, Weight> to_weight_; + + void operator=(const LogAccumulator<A> &); // Disallow +}; + + +// Stores shareable data for fast log accumulator copies. +class FastLogAccumulatorData { + public: + FastLogAccumulatorData() {} + + vector<double> *Weights() { return &weights_; } + vector<ssize_t> *WeightPositions() { return &weight_positions_; } + double *WeightEnd() { return &(weights_[weights_.size() - 1]); }; + int RefCount() const { return ref_count_.count(); } + int IncrRefCount() { return ref_count_.Incr(); } + int DecrRefCount() { return ref_count_.Decr(); } + + private: + // Cummulative weight per state for all states s.t. # of arcs > + // arc_limit_ with arcs in order. Special first element per state + // being Log64Weight::Zero(); + vector<double> weights_; + // Maps from state to corresponding beginning weight position in + // weights_. Position -1 means no pre-computed weights for that + // state. + vector<ssize_t> weight_positions_; + RefCounter ref_count_; // Reference count. + + DISALLOW_COPY_AND_ASSIGN(FastLogAccumulatorData); +}; + + +// This class accumulates arc weights using the log semiring Plus() +// assuming an arc weight has a WeightConvert specialization to and +// from log64 weights. The member function Init(fst) has to be called +// to setup pre-computed weight information. +template <class A> +class FastLogAccumulator { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + explicit FastLogAccumulator(ssize_t arc_limit = 20, ssize_t arc_period = 10) + : arc_limit_(arc_limit), + arc_period_(arc_period), + data_(new FastLogAccumulatorData()), + error_(false) {} + + FastLogAccumulator(const FastLogAccumulator<A> &acc) + : arc_limit_(acc.arc_limit_), + arc_period_(acc.arc_period_), + data_(acc.data_), + error_(acc.error_) { + data_->IncrRefCount(); + } + + ~FastLogAccumulator() { + if (!data_->DecrRefCount()) + delete data_; + } + + void SetState(StateId s) { + vector<double> &weights = *data_->Weights(); + vector<ssize_t> &weight_positions = *data_->WeightPositions(); + + if (weight_positions.size() <= s) { + FSTERROR() << "FastLogAccumulator::SetState: invalid state id."; + error_ = true; + return; + } + + ssize_t pos = weight_positions[s]; + if (pos >= 0) + state_weights_ = &(weights[pos]); + else + state_weights_ = 0; + } + + Weight Sum(Weight w, Weight v) { + return LogPlus(w, v); + } + + template <class ArcIterator> + Weight Sum(Weight w, ArcIterator *aiter, ssize_t begin, + ssize_t end) { + if (error_) return Weight::NoWeight(); + Weight sum = w; + // Finds begin and end of pre-stored weights + ssize_t index_begin = -1, index_end = -1; + ssize_t stored_begin = end, stored_end = end; + if (state_weights_ != 0) { + index_begin = begin > 0 ? (begin - 1)/ arc_period_ + 1 : 0; + index_end = end / arc_period_; + stored_begin = index_begin * arc_period_; + stored_end = index_end * arc_period_; + } + // Computes sum before pre-stored weights + if (begin < stored_begin) { + ssize_t pos_end = min(stored_begin, end); + aiter->Seek(begin); + for (ssize_t pos = begin; pos < pos_end; aiter->Next(), ++pos) + sum = LogPlus(sum, aiter->Value().weight); + } + // Computes sum between pre-stored weights + if (stored_begin < stored_end) { + sum = LogPlus(sum, LogMinus(state_weights_[index_end], + state_weights_[index_begin])); + } + // Computes sum after pre-stored weights + if (stored_end < end) { + ssize_t pos_start = max(stored_begin, stored_end); + aiter->Seek(pos_start); + for (ssize_t pos = pos_start; pos < end; aiter->Next(), ++pos) + sum = LogPlus(sum, aiter->Value().weight); + } + return sum; + } + + template <class F> + void Init(const F &fst, bool copy = false) { + if (copy) + return; + vector<double> &weights = *data_->Weights(); + vector<ssize_t> &weight_positions = *data_->WeightPositions(); + if (!weights.empty() || arc_limit_ < arc_period_) { + FSTERROR() << "FastLogAccumulator: initialization error."; + error_ = true; + return; + } + weight_positions.reserve(CountStates(fst)); + + ssize_t weight_position = 0; + for(StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { + StateId s = siter.Value(); + if (fst.NumArcs(s) >= arc_limit_) { + double sum = FloatLimits<double>::PosInfinity(); + weight_positions.push_back(weight_position); + weights.push_back(sum); + ++weight_position; + ssize_t narcs = 0; + for(ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) { + const A &arc = aiter.Value(); + sum = LogPlus(sum, arc.weight); + // Stores cumulative weight distribution per arc_period_. + if (++narcs % arc_period_ == 0) { + weights.push_back(sum); + ++weight_position; + } + } + } else { + weight_positions.push_back(-1); + } + } + } + + bool Error() const { return error_; } + + private: + double LogPosExp(double x) { + return x == FloatLimits<double>::PosInfinity() ? + 0.0 : log(1.0F + exp(-x)); + } + + double LogMinusExp(double x) { + return x == FloatLimits<double>::PosInfinity() ? + 0.0 : log(1.0F - exp(-x)); + } + + Weight LogPlus(Weight w, Weight v) { + double f1 = to_log_weight_(w).Value(); + double f2 = to_log_weight_(v).Value(); + if (f1 > f2) + return to_weight_(f2 - LogPosExp(f1 - f2)); + else + return to_weight_(f1 - LogPosExp(f2 - f1)); + } + + double LogPlus(double f1, Weight v) { + double f2 = to_log_weight_(v).Value(); + if (f1 == FloatLimits<double>::PosInfinity()) + return f2; + else if (f1 > f2) + return f2 - LogPosExp(f1 - f2); + else + return f1 - LogPosExp(f2 - f1); + } + + Weight LogMinus(double f1, double f2) { + if (f1 >= f2) { + FSTERROR() << "FastLogAcumulator::LogMinus: f1 >= f2 with f1 = " << f1 + << " and f2 = " << f2; + error_ = true; + return Weight::NoWeight(); + } + if (f2 == FloatLimits<double>::PosInfinity()) + return to_weight_(f1); + else + return to_weight_(f1 - LogMinusExp(f2 - f1)); + } + + WeightConvert<Weight, Log64Weight> to_log_weight_; + WeightConvert<Log64Weight, Weight> to_weight_; + + ssize_t arc_limit_; // Minimum # of arcs to pre-compute state + ssize_t arc_period_; // Save cumulative weights per 'arc_period_'. + bool init_; // Cumulative weights initialized? + FastLogAccumulatorData *data_; + double *state_weights_; + bool error_; + + void operator=(const FastLogAccumulator<A> &); // Disallow +}; + + +// Stores shareable data for cache log accumulator copies. +// All copies share the same cache. +template <class A> +class CacheLogAccumulatorData { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + CacheLogAccumulatorData(bool gc, size_t gc_limit) + : cache_gc_(gc), cache_limit_(gc_limit), cache_size_(0) {} + + ~CacheLogAccumulatorData() { + for(typename unordered_map<StateId, CacheState>::iterator it = cache_.begin(); + it != cache_.end(); + ++it) + delete it->second.weights; + } + + bool CacheDisabled() const { return cache_gc_ && cache_limit_ == 0; } + + vector<double> *GetWeights(StateId s) { + typename unordered_map<StateId, CacheState>::iterator it = cache_.find(s); + if (it != cache_.end()) { + it->second.recent = true; + return it->second.weights; + } else { + return 0; + } + } + + void AddWeights(StateId s, vector<double> *weights) { + if (cache_gc_ && cache_size_ >= cache_limit_) + GC(false); + cache_.insert(make_pair(s, CacheState(weights, true))); + if (cache_gc_) + cache_size_ += weights->capacity() * sizeof(double); + } + + int RefCount() const { return ref_count_.count(); } + int IncrRefCount() { return ref_count_.Incr(); } + int DecrRefCount() { return ref_count_.Decr(); } + + private: + // Cached information for a given state. + struct CacheState { + vector<double>* weights; // Accumulated weights for this state. + bool recent; // Has this state been accessed since last GC? + + CacheState(vector<double> *w, bool r) : weights(w), recent(r) {} + }; + + // Garbage collect: Delete from cache states that have not been + // accessed since the last GC ('free_recent = false') until + // 'cache_size_' is 2/3 of 'cache_limit_'. If it does not free enough + // memory, start deleting recently accessed states. + void GC(bool free_recent) { + size_t cache_target = (2 * cache_limit_)/3 + 1; + typename unordered_map<StateId, CacheState>::iterator it = cache_.begin(); + while (it != cache_.end() && cache_size_ > cache_target) { + CacheState &cs = it->second; + if (free_recent || !cs.recent) { + cache_size_ -= cs.weights->capacity() * sizeof(double); + delete cs.weights; + cache_.erase(it++); + } else { + cs.recent = false; + ++it; + } + } + if (!free_recent && cache_size_ > cache_target) + GC(true); + } + + unordered_map<StateId, CacheState> cache_; // Cache + bool cache_gc_; // Enable garbage collection + size_t cache_limit_; // # of bytes cached + size_t cache_size_; // # of bytes allowed before GC + RefCounter ref_count_; + + DISALLOW_COPY_AND_ASSIGN(CacheLogAccumulatorData); +}; + +// This class accumulates arc weights using the log semiring Plus() +// has a WeightConvert specialization to and from log64 weights. It +// is similar to the FastLogAccumator. However here, the accumulated +// weights are pre-computed and stored only for the states that are +// visited. The member function Init(fst) has to be called to setup +// this accumulator. +template <class A> +class CacheLogAccumulator { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + explicit CacheLogAccumulator(ssize_t arc_limit = 10, bool gc = false, + size_t gc_limit = 10 * 1024 * 1024) + : arc_limit_(arc_limit), fst_(0), data_( + new CacheLogAccumulatorData<A>(gc, gc_limit)), s_(kNoStateId), + error_(false) {} + + CacheLogAccumulator(const CacheLogAccumulator<A> &acc) + : arc_limit_(acc.arc_limit_), fst_(acc.fst_ ? acc.fst_->Copy() : 0), + data_(acc.data_), s_(kNoStateId), error_(acc.error_) { + data_->IncrRefCount(); + } + + ~CacheLogAccumulator() { + if (fst_) + delete fst_; + if (!data_->DecrRefCount()) + delete data_; + } + + // Arg 'arc_limit' specifies minimum # of arcs to pre-compute state. + void Init(const Fst<A> &fst, bool copy = false) { + if (copy) { + delete fst_; + } else if (fst_) { + FSTERROR() << "CacheLogAccumulator: initialization error."; + error_ = true; + return; + } + fst_ = fst.Copy(); + } + + void SetState(StateId s, int depth = 0) { + if (s == s_) + return; + s_ = s; + + if (data_->CacheDisabled() || error_) { + weights_ = 0; + return; + } + + if (!fst_) { + FSTERROR() << "CacheLogAccumulator::SetState: incorrectly initialized."; + error_ = true; + weights_ = 0; + return; + } + + weights_ = data_->GetWeights(s); + if ((weights_ == 0) && (fst_->NumArcs(s) >= arc_limit_)) { + weights_ = new vector<double>; + weights_->reserve(fst_->NumArcs(s) + 1); + weights_->push_back(FloatLimits<double>::PosInfinity()); + data_->AddWeights(s, weights_); + } + } + + Weight Sum(Weight w, Weight v) { + return LogPlus(w, v); + } + + template <class Iterator> + Weight Sum(Weight w, Iterator *aiter, ssize_t begin, + ssize_t end) { + if (weights_ == 0) { + Weight sum = w; + aiter->Seek(begin); + for (ssize_t pos = begin; pos < end; aiter->Next(), ++pos) + sum = LogPlus(sum, aiter->Value().weight); + return sum; + } else { + if (weights_->size() <= end) + for (aiter->Seek(weights_->size() - 1); + weights_->size() <= end; + aiter->Next()) + weights_->push_back(LogPlus(weights_->back(), + aiter->Value().weight)); + return LogPlus(w, LogMinus((*weights_)[end], (*weights_)[begin])); + } + } + + template <class Iterator> + size_t LowerBound(double w, Iterator *aiter) { + if (weights_ != 0) { + return lower_bound(weights_->begin() + 1, + weights_->end(), + w, + std::greater<double>()) + - weights_->begin() - 1; + } else { + size_t n = 0; + double x = FloatLimits<double>::PosInfinity(); + for(aiter->Reset(); !aiter->Done(); aiter->Next(), ++n) { + x = LogPlus(x, aiter->Value().weight); + if (x < w) break; + } + return n; + } + } + + bool Error() const { return error_; } + + private: + double LogPosExp(double x) { + return x == FloatLimits<double>::PosInfinity() ? + 0.0 : log(1.0F + exp(-x)); + } + + double LogMinusExp(double x) { + return x == FloatLimits<double>::PosInfinity() ? + 0.0 : log(1.0F - exp(-x)); + } + + Weight LogPlus(Weight w, Weight v) { + double f1 = to_log_weight_(w).Value(); + double f2 = to_log_weight_(v).Value(); + if (f1 > f2) + return to_weight_(f2 - LogPosExp(f1 - f2)); + else + return to_weight_(f1 - LogPosExp(f2 - f1)); + } + + double LogPlus(double f1, Weight v) { + double f2 = to_log_weight_(v).Value(); + if (f1 == FloatLimits<double>::PosInfinity()) + return f2; + else if (f1 > f2) + return f2 - LogPosExp(f1 - f2); + else + return f1 - LogPosExp(f2 - f1); + } + + Weight LogMinus(double f1, double f2) { + if (f1 >= f2) { + FSTERROR() << "CacheLogAcumulator::LogMinus: f1 >= f2 with f1 = " << f1 + << " and f2 = " << f2; + error_ = true; + return Weight::NoWeight(); + } + if (f2 == FloatLimits<double>::PosInfinity()) + return to_weight_(f1); + else + return to_weight_(f1 - LogMinusExp(f2 - f1)); + } + + WeightConvert<Weight, Log64Weight> to_log_weight_; + WeightConvert<Log64Weight, Weight> to_weight_; + + ssize_t arc_limit_; // Minimum # of arcs to cache a state + vector<double> *weights_; // Accumulated weights for cur. state + const Fst<A>* fst_; // Input fst + CacheLogAccumulatorData<A> *data_; // Cache data + StateId s_; // Current state + bool error_; + + void operator=(const CacheLogAccumulator<A> &); // Disallow +}; + + +// Stores shareable data for replace accumulator copies. +template <class Accumulator, class T> +class ReplaceAccumulatorData { + public: + typedef typename Accumulator::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef T StateTable; + typedef typename T::StateTuple StateTuple; + + ReplaceAccumulatorData() : state_table_(0) {} + + ReplaceAccumulatorData(const vector<Accumulator*> &accumulators) + : state_table_(0), accumulators_(accumulators) {} + + ~ReplaceAccumulatorData() { + for (size_t i = 0; i < fst_array_.size(); ++i) + delete fst_array_[i]; + for (size_t i = 0; i < accumulators_.size(); ++i) + delete accumulators_[i]; + } + + void Init(const vector<pair<Label, const Fst<Arc>*> > &fst_tuples, + const StateTable *state_table) { + state_table_ = state_table; + accumulators_.resize(fst_tuples.size()); + for (size_t i = 0; i < accumulators_.size(); ++i) { + if (!accumulators_[i]) + accumulators_[i] = new Accumulator; + accumulators_[i]->Init(*(fst_tuples[i].second)); + fst_array_.push_back(fst_tuples[i].second->Copy()); + } + } + + const StateTuple &GetTuple(StateId s) const { + return state_table_->Tuple(s); + } + + Accumulator *GetAccumulator(size_t i) { return accumulators_[i]; } + + const Fst<Arc> *GetFst(size_t i) const { return fst_array_[i]; } + + int RefCount() const { return ref_count_.count(); } + int IncrRefCount() { return ref_count_.Incr(); } + int DecrRefCount() { return ref_count_.Decr(); } + + private: + const T * state_table_; + vector<Accumulator*> accumulators_; + vector<const Fst<Arc>*> fst_array_; + RefCounter ref_count_; + + DISALLOW_COPY_AND_ASSIGN(ReplaceAccumulatorData); +}; + +// This class accumulates weights in a ReplaceFst. The 'Init' method +// takes as input the argument used to build the ReplaceFst and the +// ReplaceFst state table. It uses accumulators of type 'Accumulator' +// in the underlying FSTs. +template <class Accumulator, + class T = DefaultReplaceStateTable<typename Accumulator::Arc> > +class ReplaceAccumulator { + public: + typedef typename Accumulator::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + typedef T StateTable; + typedef typename T::StateTuple StateTuple; + + ReplaceAccumulator() + : init_(false), data_(new ReplaceAccumulatorData<Accumulator, T>()), + error_(false) {} + + ReplaceAccumulator(const vector<Accumulator*> &accumulators) + : init_(false), + data_(new ReplaceAccumulatorData<Accumulator, T>(accumulators)), + error_(false) {} + + ReplaceAccumulator(const ReplaceAccumulator<Accumulator, T> &acc) + : init_(acc.init_), data_(acc.data_), error_(acc.error_) { + if (!init_) + FSTERROR() << "ReplaceAccumulator: can't copy unintialized accumulator"; + data_->IncrRefCount(); + } + + ~ReplaceAccumulator() { + if (!data_->DecrRefCount()) + delete data_; + } + + // Does not take ownership of the state table, the state table + // is own by the ReplaceFst + void Init(const vector<pair<Label, const Fst<Arc>*> > &fst_tuples, + const StateTable *state_table) { + init_ = true; + data_->Init(fst_tuples, state_table); + } + + void SetState(StateId s) { + if (!init_) { + FSTERROR() << "ReplaceAccumulator::SetState: incorrectly initialized."; + error_ = true; + return; + } + StateTuple tuple = data_->GetTuple(s); + fst_id_ = tuple.fst_id - 1; // Replace FST ID is 1-based + data_->GetAccumulator(fst_id_)->SetState(tuple.fst_state); + if ((tuple.prefix_id != 0) && + (data_->GetFst(fst_id_)->Final(tuple.fst_state) != Weight::Zero())) { + offset_ = 1; + offset_weight_ = data_->GetFst(fst_id_)->Final(tuple.fst_state); + } else { + offset_ = 0; + offset_weight_ = Weight::Zero(); + } + } + + Weight Sum(Weight w, Weight v) { + if (error_) return Weight::NoWeight(); + return data_->GetAccumulator(fst_id_)->Sum(w, v); + } + + template <class ArcIterator> + Weight Sum(Weight w, ArcIterator *aiter, ssize_t begin, + ssize_t end) { + if (error_) return Weight::NoWeight(); + Weight sum = begin == end ? Weight::Zero() + : data_->GetAccumulator(fst_id_)->Sum( + w, aiter, begin ? begin - offset_ : 0, end - offset_); + if (begin == 0 && end != 0 && offset_ > 0) + sum = Sum(offset_weight_, sum); + return sum; + } + + bool Error() const { return error_; } + + private: + bool init_; + ReplaceAccumulatorData<Accumulator, T> *data_; + Label fst_id_; + size_t offset_; + Weight offset_weight_; + bool error_; + + void operator=(const ReplaceAccumulator<Accumulator, T> &); // Disallow +}; + +} // namespace fst + +#endif // FST_LIB_ACCUMULATOR_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/add-on.h b/kaldi_io/src/tools/openfst/include/fst/add-on.h new file mode 100644 index 0000000..ee21a93 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/add-on.h @@ -0,0 +1,306 @@ +// add-on.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Fst implementation class to attach an arbitrary object with a +// read/write method to an FST and its file rep. The FST is given a +// new type name. + +#ifndef FST_LIB_ADD_ON_FST_H__ +#define FST_LIB_ADD_ON_FST_H__ + +#include <stddef.h> +#include <string> + +#include <fst/fst.h> + + +namespace fst { + +// Identifies stream data as an add-on fst. +static const int32 kAddOnMagicNumber = 446681434; + + +// +// Some useful add-on objects. +// + +// Nothing to save. +class NullAddOn { + public: + NullAddOn() {} + + static NullAddOn *Read(istream &istrm) { + return new NullAddOn(); + }; + + bool Write(ostream &ostrm) const { return true; } + + int RefCount() const { return ref_count_.count(); } + int IncrRefCount() { return ref_count_.Incr(); } + int DecrRefCount() { return ref_count_.Decr(); } + + private: + RefCounter ref_count_; + + DISALLOW_COPY_AND_ASSIGN(NullAddOn); +}; + + +// Create a new add-on from a pair of add-ons. +template <class A1, class A2> +class AddOnPair { + public: + // Argument reference count incremented. + AddOnPair(A1 *a1, A2 *a2) + : a1_(a1), a2_(a2) { + if (a1_) + a1_->IncrRefCount(); + if (a2_) + a2_->IncrRefCount(); + } + + ~AddOnPair() { + if (a1_ && !a1_->DecrRefCount()) + delete a1_; + if (a2_ && !a2_->DecrRefCount()) + delete a2_; + } + + A1 *First() const { return a1_; } + A2 *Second() const { return a2_; } + + static AddOnPair<A1, A2> *Read(istream &istrm) { + A1 *a1 = 0; + bool have_addon1 = false; + ReadType(istrm, &have_addon1); + if (have_addon1) + a1 = A1::Read(istrm); + + A2 *a2 = 0; + bool have_addon2 = false; + ReadType(istrm, &have_addon2); + if (have_addon2) + a2 = A2::Read(istrm); + + AddOnPair<A1, A2> *a = new AddOnPair<A1, A2>(a1, a2); + if (a1) + a1->DecrRefCount(); + if (a2) + a2->DecrRefCount(); + return a; + }; + + bool Write(ostream &ostrm) const { + bool have_addon1 = a1_; + WriteType(ostrm, have_addon1); + if (have_addon1) + a1_->Write(ostrm); + bool have_addon2 = a2_; + WriteType(ostrm, have_addon2); + if (have_addon2) + a2_->Write(ostrm); + return true; + } + + int RefCount() const { return ref_count_.count(); } + + int IncrRefCount() { + return ref_count_.Incr(); + } + + int DecrRefCount() { + return ref_count_.Decr(); + } + + private: + A1 *a1_; + A2 *a2_; + RefCounter ref_count_; + + DISALLOW_COPY_AND_ASSIGN(AddOnPair); +}; + + +// Add to an Fst F a type T object. T must have a 'T* Read(istream &)', +// a 'bool Write(ostream &)' method, and 'int RecCount(), 'int IncrRefCount()' +// and 'int DecrRefCount()' methods (e.g. 'MatcherData' in matcher-fst.h). +// The result is a new Fst implemenation with type name 'type'. +template<class F, class T> +class AddOnImpl : public FstImpl<typename F::Arc> { + public: + typedef typename F::Arc Arc; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + typedef typename Arc::StateId StateId; + + using FstImpl<Arc>::SetType; + using FstImpl<Arc>::SetProperties; + using FstImpl<Arc>::WriteHeader; + + // If 't' is non-zero, its reference count is incremented. + AddOnImpl(const F &fst, const string &type, T *t = 0) + : fst_(fst), t_(t) { + SetType(type); + SetProperties(fst_.Properties(kFstProperties, false)); + if (t_) + t_->IncrRefCount(); + } + + // If 't' is non-zero, its reference count is incremented. + AddOnImpl(const Fst<Arc> &fst, const string &type, T *t = 0) + : fst_(fst), t_(t) { + SetType(type); + SetProperties(fst_.Properties(kFstProperties, false)); + if (t_) + t_->IncrRefCount(); + } + + AddOnImpl(const AddOnImpl<F, T> &impl) + : fst_(impl.fst_), t_(impl.t_) { + SetType(impl.Type()); + SetProperties(fst_.Properties(kCopyProperties, false)); + if (t_) + t_->IncrRefCount(); + } + + ~AddOnImpl() { + if (t_ && !t_->DecrRefCount()) + delete t_; + } + + StateId Start() const { return fst_.Start(); } + Weight Final(StateId s) const { return fst_.Final(s); } + size_t NumArcs(StateId s) const { return fst_.NumArcs(s); } + + size_t NumInputEpsilons(StateId s) const { + return fst_.NumInputEpsilons(s); + } + + size_t NumOutputEpsilons(StateId s) const { + return fst_.NumOutputEpsilons(s); + } + + size_t NumStates() const { return fst_.NumStates(); } + + static AddOnImpl<F, T> *Read(istream &strm, const FstReadOptions &opts) { + FstReadOptions nopts(opts); + FstHeader hdr; + if (!nopts.header) { + hdr.Read(strm, nopts.source); + nopts.header = &hdr; + } + AddOnImpl<F, T> *impl = new AddOnImpl<F, T>(nopts.header->FstType()); + if (!impl->ReadHeader(strm, nopts, kMinFileVersion, &hdr)) + return 0; + delete impl; // Used here only for checking types. + + int32 magic_number = 0; + ReadType(strm, &magic_number); // Ensures this is an add-on Fst. + if (magic_number != kAddOnMagicNumber) { + LOG(ERROR) << "AddOnImpl::Read: Bad add-on header: " << nopts.source; + return 0; + } + + FstReadOptions fopts(opts); + fopts.header = 0; // Contained header was written out. + F *fst = F::Read(strm, fopts); + if (!fst) + return 0; + + T *t = 0; + bool have_addon = false; + ReadType(strm, &have_addon); + if (have_addon) { // Read add-on object if present. + t = T::Read(strm); + if (!t) + return 0; + } + impl = new AddOnImpl<F, T>(*fst, nopts.header->FstType(), t); + delete fst; + if (t) + t->DecrRefCount(); + return impl; + } + + bool Write(ostream &strm, const FstWriteOptions &opts) const { + FstHeader hdr; + FstWriteOptions nopts(opts); + nopts.write_isymbols = false; // Let contained FST hold any symbols. + nopts.write_osymbols = false; + WriteHeader(strm, nopts, kFileVersion, &hdr); + WriteType(strm, kAddOnMagicNumber); // Ensures this is an add-on Fst. + FstWriteOptions fopts(opts); + fopts.write_header = true; // Force writing contained header. + if (!fst_.Write(strm, fopts)) + return false; + bool have_addon = t_; + WriteType(strm, have_addon); + if (have_addon) // Write add-on object if present. + t_->Write(strm); + return true; + } + + void InitStateIterator(StateIteratorData<Arc> *data) const { + fst_.InitStateIterator(data); + } + + void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { + fst_.InitArcIterator(s, data); + } + + F &GetFst() { return fst_; } + + const F &GetFst() const { return fst_; } + + T *GetAddOn() const { return t_; } + + // If 't' is non-zero, its reference count is incremented. + void SetAddOn(T *t) { + if (t == t_) + return; + if (t_ && !t_->DecrRefCount()) + delete t_; + t_ = t; + if (t_) + t_->IncrRefCount(); + } + + private: + explicit AddOnImpl(const string &type) : t_(0) { + SetType(type); + SetProperties(kExpanded); + } + + // Current file format version + static const int kFileVersion = 1; + // Minimum file format version supported + static const int kMinFileVersion = 1; + + F fst_; + T *t_; + + void operator=(const AddOnImpl<F, T> &fst); // Disallow +}; + +template <class F, class T> const int AddOnImpl<F, T>::kFileVersion; +template <class F, class T> const int AddOnImpl<F, T>::kMinFileVersion; + + +} // namespace fst + +#endif // FST_LIB_ADD_ON_FST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/arc-map.h b/kaldi_io/src/tools/openfst/include/fst/arc-map.h new file mode 100644 index 0000000..914f81c --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/arc-map.h @@ -0,0 +1,1146 @@ +// arc-map.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Class to map over/transform arcs e.g., change semirings or +// implement project/invert. Consider using when operation does +// not change the number of arcs (except possibly superfinal arcs). + +#ifndef FST_LIB_ARC_MAP_H__ +#define FST_LIB_ARC_MAP_H__ + +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include <string> +#include <utility> +using std::pair; using std::make_pair; + +#include <fst/cache.h> +#include <fst/mutable-fst.h> + + +namespace fst { + +// This determines how final weights are mapped. +enum MapFinalAction { + // A final weight is mapped into a final weight. An error + // is raised if this is not possible. + MAP_NO_SUPERFINAL, + + // A final weight is mapped to an arc to the superfinal state + // when the result cannot be represented as a final weight. + // The superfinal state will be added only if it is needed. + MAP_ALLOW_SUPERFINAL, + + // A final weight is mapped to an arc to the superfinal state + // unless the result can be represented as a final weight of weight + // Zero(). The superfinal state is always added (if the input is + // not the empty Fst). + MAP_REQUIRE_SUPERFINAL +}; + +// This determines how symbol tables are mapped. +enum MapSymbolsAction { + // Symbols should be cleared in the result by the map. + MAP_CLEAR_SYMBOLS, + + // Symbols should be copied from the input FST by the map. + MAP_COPY_SYMBOLS, + + // Symbols should not be modified in the result by the map itself. + // (They may set by the mapper). + MAP_NOOP_SYMBOLS +}; + +// ArcMapper Interface - class determinies how arcs and final weights +// are mapped. Useful for implementing operations that do not change +// the number of arcs (expect possibly superfinal arcs). +// +// class ArcMapper { +// public: +// typedef A FromArc; +// typedef B ToArc; +// +// // Maps an arc type A to arc type B. +// B operator()(const A &arc); +// // Specifies final action the mapper requires (see above). +// // The mapper will be passed final weights as arcs of the +// // form A(0, 0, weight, kNoStateId). +// MapFinalAction FinalAction() const; +// // Specifies input symbol table action the mapper requires (see above). +// MapSymbolsAction InputSymbolsAction() const; +// // Specifies output symbol table action the mapper requires (see above). +// MapSymbolsAction OutputSymbolsAction() const; +// // This specifies the known properties of an Fst mapped by this +// // mapper. It takes as argument the input Fst's known properties. +// uint64 Properties(uint64 props) const; +// }; +// +// The ArcMap functions and classes below will use the FinalAction() +// method of the mapper to determine how to treat final weights, +// e.g. whether to add a superfinal state. They will use the Properties() +// method to set the result Fst properties. +// +// We include a various map versions below. One dimension of +// variation is whether the mapping mutates its input, writes to a +// new result Fst, or is an on-the-fly Fst. Another dimension is how +// we pass the mapper. We allow passing the mapper by pointer +// for cases that we need to change the state of the user's mapper. +// This is the case with the encode mapper, which is reused during +// decoding. We also include map versions that pass the mapper +// by value or const reference when this suffices. + + +// Maps an arc type A using a mapper function object C, passed +// by pointer. This version modifies its Fst input. +template<class A, class C> +void ArcMap(MutableFst<A> *fst, C* mapper) { + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) + fst->SetInputSymbols(0); + + if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) + fst->SetOutputSymbols(0); + + if (fst->Start() == kNoStateId) + return; + + uint64 props = fst->Properties(kFstProperties, false); + + MapFinalAction final_action = mapper->FinalAction(); + StateId superfinal = kNoStateId; + if (final_action == MAP_REQUIRE_SUPERFINAL) { + superfinal = fst->AddState(); + fst->SetFinal(superfinal, Weight::One()); + } + + for (StateId s = 0; s < fst->NumStates(); ++s) { + for (MutableArcIterator< MutableFst<A> > aiter(fst, s); + !aiter.Done(); aiter.Next()) { + const A &arc = aiter.Value(); + aiter.SetValue((*mapper)(arc)); + } + + switch (final_action) { + case MAP_NO_SUPERFINAL: + default: { + A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId)); + if (final_arc.ilabel != 0 || final_arc.olabel != 0) { + FSTERROR() << "ArcMap: non-zero arc labels for superfinal arc"; + fst->SetProperties(kError, kError); + } + + fst->SetFinal(s, final_arc.weight); + break; + } + case MAP_ALLOW_SUPERFINAL: { + if (s != superfinal) { + A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId)); + if (final_arc.ilabel != 0 || final_arc.olabel != 0) { + // Add a superfinal state if not already done. + if (superfinal == kNoStateId) { + superfinal = fst->AddState(); + fst->SetFinal(superfinal, Weight::One()); + } + final_arc.nextstate = superfinal; + fst->AddArc(s, final_arc); + fst->SetFinal(s, Weight::Zero()); + } else { + fst->SetFinal(s, final_arc.weight); + } + break; + } + } + case MAP_REQUIRE_SUPERFINAL: { + if (s != superfinal) { + A final_arc = (*mapper)(A(0, 0, fst->Final(s), kNoStateId)); + if (final_arc.ilabel != 0 || final_arc.olabel != 0 || + final_arc.weight != Weight::Zero()) + fst->AddArc(s, A(final_arc.ilabel, final_arc.olabel, + final_arc.weight, superfinal)); + fst->SetFinal(s, Weight::Zero()); + } + break; + } + } + } + fst->SetProperties(mapper->Properties(props), kFstProperties); +} + + +// Maps an arc type A using a mapper function object C, passed +// by value. This version modifies its Fst input. +template<class A, class C> +void ArcMap(MutableFst<A> *fst, C mapper) { + ArcMap(fst, &mapper); +} + + +// Maps an arc type A to an arc type B using mapper function +// object C, passed by pointer. This version writes the mapped +// input Fst to an output MutableFst. +template<class A, class B, class C> +void ArcMap(const Fst<A> &ifst, MutableFst<B> *ofst, C* mapper) { + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + ofst->DeleteStates(); + + if (mapper->InputSymbolsAction() == MAP_COPY_SYMBOLS) + ofst->SetInputSymbols(ifst.InputSymbols()); + else if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) + ofst->SetInputSymbols(0); + + if (mapper->OutputSymbolsAction() == MAP_COPY_SYMBOLS) + ofst->SetOutputSymbols(ifst.OutputSymbols()); + else if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) + ofst->SetOutputSymbols(0); + + uint64 iprops = ifst.Properties(kCopyProperties, false); + + if (ifst.Start() == kNoStateId) { + if (iprops & kError) ofst->SetProperties(kError, kError); + return; + } + + MapFinalAction final_action = mapper->FinalAction(); + if (ifst.Properties(kExpanded, false)) { + ofst->ReserveStates(CountStates(ifst) + + final_action == MAP_NO_SUPERFINAL ? 0 : 1); + } + + // Add all states. + for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next()) + ofst->AddState(); + + StateId superfinal = kNoStateId; + if (final_action == MAP_REQUIRE_SUPERFINAL) { + superfinal = ofst->AddState(); + ofst->SetFinal(superfinal, B::Weight::One()); + } + for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next()) { + StateId s = siter.Value(); + if (s == ifst.Start()) + ofst->SetStart(s); + + ofst->ReserveArcs(s, ifst.NumArcs(s)); + for (ArcIterator< Fst<A> > aiter(ifst, s); !aiter.Done(); aiter.Next()) + ofst->AddArc(s, (*mapper)(aiter.Value())); + + switch (final_action) { + case MAP_NO_SUPERFINAL: + default: { + B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId)); + if (final_arc.ilabel != 0 || final_arc.olabel != 0) { + FSTERROR() << "ArcMap: non-zero arc labels for superfinal arc"; + ofst->SetProperties(kError, kError); + } + ofst->SetFinal(s, final_arc.weight); + break; + } + case MAP_ALLOW_SUPERFINAL: { + B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId)); + if (final_arc.ilabel != 0 || final_arc.olabel != 0) { + // Add a superfinal state if not already done. + if (superfinal == kNoStateId) { + superfinal = ofst->AddState(); + ofst->SetFinal(superfinal, B::Weight::One()); + } + final_arc.nextstate = superfinal; + ofst->AddArc(s, final_arc); + ofst->SetFinal(s, B::Weight::Zero()); + } else { + ofst->SetFinal(s, final_arc.weight); + } + break; + } + case MAP_REQUIRE_SUPERFINAL: { + B final_arc = (*mapper)(A(0, 0, ifst.Final(s), kNoStateId)); + if (final_arc.ilabel != 0 || final_arc.olabel != 0 || + final_arc.weight != B::Weight::Zero()) + ofst->AddArc(s, B(final_arc.ilabel, final_arc.olabel, + final_arc.weight, superfinal)); + ofst->SetFinal(s, B::Weight::Zero()); + break; + } + } + } + uint64 oprops = ofst->Properties(kFstProperties, false); + ofst->SetProperties(mapper->Properties(iprops) | oprops, kFstProperties); +} + +// Maps an arc type A to an arc type B using mapper function +// object C, passed by value. This version writes the mapped input +// Fst to an output MutableFst. +template<class A, class B, class C> +void ArcMap(const Fst<A> &ifst, MutableFst<B> *ofst, C mapper) { + ArcMap(ifst, ofst, &mapper); +} + + +struct ArcMapFstOptions : public CacheOptions { + // ArcMapFst default caching behaviour is to do no caching. Most + // mappers are cheap and therefore we save memory by not doing + // caching. + ArcMapFstOptions() : CacheOptions(true, 0) {} + ArcMapFstOptions(const CacheOptions& opts) : CacheOptions(opts) {} +}; + + +template <class A, class B, class C> class ArcMapFst; + +// Implementation of delayed ArcMapFst. +template <class A, class B, class C> +class ArcMapFstImpl : public CacheImpl<B> { + public: + using FstImpl<B>::SetType; + using FstImpl<B>::SetProperties; + using FstImpl<B>::SetInputSymbols; + using FstImpl<B>::SetOutputSymbols; + + using VectorFstBaseImpl<typename CacheImpl<B>::State>::NumStates; + + using CacheImpl<B>::PushArc; + using CacheImpl<B>::HasArcs; + using CacheImpl<B>::HasFinal; + using CacheImpl<B>::HasStart; + using CacheImpl<B>::SetArcs; + using CacheImpl<B>::SetFinal; + using CacheImpl<B>::SetStart; + + friend class StateIterator< ArcMapFst<A, B, C> >; + + typedef B Arc; + typedef typename B::Weight Weight; + typedef typename B::StateId StateId; + + ArcMapFstImpl(const Fst<A> &fst, const C &mapper, + const ArcMapFstOptions& opts) + : CacheImpl<B>(opts), + fst_(fst.Copy()), + mapper_(new C(mapper)), + own_mapper_(true), + superfinal_(kNoStateId), + nstates_(0) { + Init(); + } + + ArcMapFstImpl(const Fst<A> &fst, C *mapper, + const ArcMapFstOptions& opts) + : CacheImpl<B>(opts), + fst_(fst.Copy()), + mapper_(mapper), + own_mapper_(false), + superfinal_(kNoStateId), + nstates_(0) { + Init(); + } + + ArcMapFstImpl(const ArcMapFstImpl<A, B, C> &impl) + : CacheImpl<B>(impl), + fst_(impl.fst_->Copy(true)), + mapper_(new C(*impl.mapper_)), + own_mapper_(true), + superfinal_(kNoStateId), + nstates_(0) { + Init(); + } + + ~ArcMapFstImpl() { + delete fst_; + if (own_mapper_) delete mapper_; + } + + StateId Start() { + if (!HasStart()) + SetStart(FindOState(fst_->Start())); + return CacheImpl<B>::Start(); + } + + Weight Final(StateId s) { + if (!HasFinal(s)) { + switch (final_action_) { + case MAP_NO_SUPERFINAL: + default: { + B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)), + kNoStateId)); + if (final_arc.ilabel != 0 || final_arc.olabel != 0) { + FSTERROR() << "ArcMapFst: non-zero arc labels for superfinal arc"; + SetProperties(kError, kError); + } + SetFinal(s, final_arc.weight); + break; + } + case MAP_ALLOW_SUPERFINAL: { + if (s == superfinal_) { + SetFinal(s, Weight::One()); + } else { + B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)), + kNoStateId)); + if (final_arc.ilabel == 0 && final_arc.olabel == 0) + SetFinal(s, final_arc.weight); + else + SetFinal(s, Weight::Zero()); + } + break; + } + case MAP_REQUIRE_SUPERFINAL: { + SetFinal(s, s == superfinal_ ? Weight::One() : Weight::Zero()); + break; + } + } + } + return CacheImpl<B>::Final(s); + } + + size_t NumArcs(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<B>::NumArcs(s); + } + + size_t NumInputEpsilons(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<B>::NumInputEpsilons(s); + } + + size_t NumOutputEpsilons(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<B>::NumOutputEpsilons(s); + } + + uint64 Properties() const { return Properties(kFstProperties); } + + // Set error if found; return FST impl properties. + uint64 Properties(uint64 mask) const { + if ((mask & kError) && (fst_->Properties(kError, false) || + (mapper_->Properties(0) & kError))) + SetProperties(kError, kError); + return FstImpl<Arc>::Properties(mask); + } + + void InitArcIterator(StateId s, ArcIteratorData<B> *data) { + if (!HasArcs(s)) + Expand(s); + CacheImpl<B>::InitArcIterator(s, data); + } + + void Expand(StateId s) { + // Add exiting arcs. + if (s == superfinal_) { SetArcs(s); return; } + + for (ArcIterator< Fst<A> > aiter(*fst_, FindIState(s)); + !aiter.Done(); aiter.Next()) { + A aarc(aiter.Value()); + aarc.nextstate = FindOState(aarc.nextstate); + const B& barc = (*mapper_)(aarc); + PushArc(s, barc); + } + + // Check for superfinal arcs. + if (!HasFinal(s) || Final(s) == Weight::Zero()) + switch (final_action_) { + case MAP_NO_SUPERFINAL: + default: + break; + case MAP_ALLOW_SUPERFINAL: { + B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)), + kNoStateId)); + if (final_arc.ilabel != 0 || final_arc.olabel != 0) { + if (superfinal_ == kNoStateId) + superfinal_ = nstates_++; + final_arc.nextstate = superfinal_; + PushArc(s, final_arc); + } + break; + } + case MAP_REQUIRE_SUPERFINAL: { + B final_arc = (*mapper_)(A(0, 0, fst_->Final(FindIState(s)), + kNoStateId)); + if (final_arc.ilabel != 0 || final_arc.olabel != 0 || + final_arc.weight != B::Weight::Zero()) + PushArc(s, B(final_arc.ilabel, final_arc.olabel, + final_arc.weight, superfinal_)); + break; + } + } + SetArcs(s); + } + + private: + void Init() { + SetType("map"); + + if (mapper_->InputSymbolsAction() == MAP_COPY_SYMBOLS) + SetInputSymbols(fst_->InputSymbols()); + else if (mapper_->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) + SetInputSymbols(0); + + if (mapper_->OutputSymbolsAction() == MAP_COPY_SYMBOLS) + SetOutputSymbols(fst_->OutputSymbols()); + else if (mapper_->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) + SetOutputSymbols(0); + + if (fst_->Start() == kNoStateId) { + final_action_ = MAP_NO_SUPERFINAL; + SetProperties(kNullProperties); + } else { + final_action_ = mapper_->FinalAction(); + uint64 props = fst_->Properties(kCopyProperties, false); + SetProperties(mapper_->Properties(props)); + if (final_action_ == MAP_REQUIRE_SUPERFINAL) + superfinal_ = 0; + } + } + + // Maps from output state to input state. + StateId FindIState(StateId s) { + if (superfinal_ == kNoStateId || s < superfinal_) + return s; + else + return s - 1; + } + + // Maps from input state to output state. + StateId FindOState(StateId is) { + StateId os; + if (superfinal_ == kNoStateId || is < superfinal_) + os = is; + else + os = is + 1; + + if (os >= nstates_) + nstates_ = os + 1; + + return os; + } + + + const Fst<A> *fst_; + C* mapper_; + bool own_mapper_; + MapFinalAction final_action_; + + StateId superfinal_; + StateId nstates_; + + void operator=(const ArcMapFstImpl<A, B, C> &); // disallow +}; + + +// Maps an arc type A to an arc type B using Mapper function object +// C. This version is a delayed Fst. +template <class A, class B, class C> +class ArcMapFst : public ImplToFst< ArcMapFstImpl<A, B, C> > { + public: + friend class ArcIterator< ArcMapFst<A, B, C> >; + friend class StateIterator< ArcMapFst<A, B, C> >; + + typedef B Arc; + typedef typename B::Weight Weight; + typedef typename B::StateId StateId; + typedef CacheState<B> State; + typedef ArcMapFstImpl<A, B, C> Impl; + + ArcMapFst(const Fst<A> &fst, const C &mapper, const ArcMapFstOptions& opts) + : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {} + + ArcMapFst(const Fst<A> &fst, C* mapper, const ArcMapFstOptions& opts) + : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {} + + ArcMapFst(const Fst<A> &fst, const C &mapper) + : ImplToFst<Impl>(new Impl(fst, mapper, ArcMapFstOptions())) {} + + ArcMapFst(const Fst<A> &fst, C* mapper) + : ImplToFst<Impl>(new Impl(fst, mapper, ArcMapFstOptions())) {} + + // See Fst<>::Copy() for doc. + ArcMapFst(const ArcMapFst<A, B, C> &fst, bool safe = false) + : ImplToFst<Impl>(fst, safe) {} + + // Get a copy of this ArcMapFst. See Fst<>::Copy() for further doc. + virtual ArcMapFst<A, B, C> *Copy(bool safe = false) const { + return new ArcMapFst<A, B, C>(*this, safe); + } + + virtual inline void InitStateIterator(StateIteratorData<B> *data) const; + + virtual void InitArcIterator(StateId s, ArcIteratorData<B> *data) const { + GetImpl()->InitArcIterator(s, data); + } + + private: + // Makes visible to friends. + Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } + + void operator=(const ArcMapFst<A, B, C> &fst); // disallow +}; + + +// Specialization for ArcMapFst. +template<class A, class B, class C> +class StateIterator< ArcMapFst<A, B, C> > : public StateIteratorBase<B> { + public: + typedef typename B::StateId StateId; + + explicit StateIterator(const ArcMapFst<A, B, C> &fst) + : impl_(fst.GetImpl()), siter_(*impl_->fst_), s_(0), + superfinal_(impl_->final_action_ == MAP_REQUIRE_SUPERFINAL) + { CheckSuperfinal(); } + + bool Done() const { return siter_.Done() && !superfinal_; } + + StateId Value() const { return s_; } + + void Next() { + ++s_; + if (!siter_.Done()) { + siter_.Next(); + CheckSuperfinal(); + } + else if (superfinal_) + superfinal_ = false; + } + + void Reset() { + s_ = 0; + siter_.Reset(); + superfinal_ = impl_->final_action_ == MAP_REQUIRE_SUPERFINAL; + CheckSuperfinal(); + } + + private: + // This allows base-class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + bool Done_() const { return Done(); } + StateId Value_() const { return Value(); } + void Next_() { Next(); } + void Reset_() { Reset(); } + + void CheckSuperfinal() { + if (impl_->final_action_ != MAP_ALLOW_SUPERFINAL || superfinal_) + return; + if (!siter_.Done()) { + B final_arc = (*impl_->mapper_)(A(0, 0, impl_->fst_->Final(s_), + kNoStateId)); + if (final_arc.ilabel != 0 || final_arc.olabel != 0) + superfinal_ = true; + } + } + + const ArcMapFstImpl<A, B, C> *impl_; + StateIterator< Fst<A> > siter_; + StateId s_; + bool superfinal_; // true if there is a superfinal state and not done + + DISALLOW_COPY_AND_ASSIGN(StateIterator); +}; + + +// Specialization for ArcMapFst. +template <class A, class B, class C> +class ArcIterator< ArcMapFst<A, B, C> > + : public CacheArcIterator< ArcMapFst<A, B, C> > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const ArcMapFst<A, B, C> &fst, StateId s) + : CacheArcIterator< ArcMapFst<A, B, C> >(fst.GetImpl(), s) { + if (!fst.GetImpl()->HasArcs(s)) + fst.GetImpl()->Expand(s); + } + + private: + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + +template <class A, class B, class C> inline +void ArcMapFst<A, B, C>::InitStateIterator(StateIteratorData<B> *data) + const { + data->base = new StateIterator< ArcMapFst<A, B, C> >(*this); +} + + +// +// Utility Mappers +// + +// Mapper that returns its input. +template <class A> +struct IdentityArcMapper { + typedef A FromArc; + typedef A ToArc; + + A operator()(const A &arc) const { return arc; } + + MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } + + MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + + MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} + + uint64 Properties(uint64 props) const { return props; } +}; + + +// Mapper that returns its input with final states redirected to +// a single super-final state. +template <class A> +struct SuperFinalMapper { + typedef A FromArc; + typedef A ToArc; + + A operator()(const A &arc) const { return arc; } + + MapFinalAction FinalAction() const { return MAP_REQUIRE_SUPERFINAL; } + + MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + + MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} + + uint64 Properties(uint64 props) const { + return props & kAddSuperFinalProperties; + } +}; + + +// Mapper that leaves labels and nextstate unchanged and constructs a new weight +// from the underlying value of the arc weight. Requires that there is a +// WeightConvert class specialization that converts the weights. +template <class A, class B> +class WeightConvertMapper { + public: + typedef A FromArc; + typedef B ToArc; + typedef typename FromArc::Weight FromWeight; + typedef typename ToArc::Weight ToWeight; + + ToArc operator()(const FromArc &arc) const { + return ToArc(arc.ilabel, arc.olabel, + convert_weight_(arc.weight), arc.nextstate); + } + + MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } + + MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + + MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} + + uint64 Properties(uint64 props) const { return props; } + + private: + WeightConvert<FromWeight, ToWeight> convert_weight_; +}; + +// Non-precision-changing weight conversions. +// Consider using more efficient Cast (fst.h) instead. +typedef WeightConvertMapper<StdArc, LogArc> StdToLogMapper; +typedef WeightConvertMapper<LogArc, StdArc> LogToStdMapper; + +// Precision-changing weight conversions. +typedef WeightConvertMapper<StdArc, Log64Arc> StdToLog64Mapper; +typedef WeightConvertMapper<LogArc, Log64Arc> LogToLog64Mapper; +typedef WeightConvertMapper<Log64Arc, StdArc> Log64ToStdMapper; +typedef WeightConvertMapper<Log64Arc, LogArc> Log64ToLogMapper; + +// Mapper from A to GallicArc<A>. +template <class A, StringType S = STRING_LEFT> +struct ToGallicMapper { + typedef A FromArc; + typedef GallicArc<A, S> ToArc; + + typedef StringWeight<typename A::Label, S> SW; + typedef typename A::Weight AW; + typedef typename GallicArc<A, S>::Weight GW; + + ToArc operator()(const A &arc) const { + // 'Super-final' arc. + if (arc.nextstate == kNoStateId && arc.weight != AW::Zero()) + return ToArc(0, 0, GW(SW::One(), arc.weight), kNoStateId); + // 'Super-non-final' arc. + else if (arc.nextstate == kNoStateId) + return ToArc(0, 0, GW(SW::Zero(), arc.weight), kNoStateId); + // Epsilon label. + else if (arc.olabel == 0) + return ToArc(arc.ilabel, arc.ilabel, + GW(SW::One(), arc.weight), arc.nextstate); + // Regular label. + else + return ToArc(arc.ilabel, arc.ilabel, + GW(SW(arc.olabel), arc.weight), arc.nextstate); + } + + MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } + + MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + + MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;} + + uint64 Properties(uint64 props) const { + return ProjectProperties(props, true) & kWeightInvariantProperties; + } +}; + + +// Mapper from GallicArc<A> to A. +template <class A, StringType S = STRING_LEFT> +struct FromGallicMapper { + typedef GallicArc<A, S> FromArc; + typedef A ToArc; + + typedef typename A::Label Label; + typedef StringWeight<Label, S> SW; + typedef typename A::Weight AW; + typedef typename GallicArc<A, S>::Weight GW; + + FromGallicMapper(Label superfinal_label = 0) + : superfinal_label_(superfinal_label), error_(false) {} + + A operator()(const FromArc &arc) const { + // 'Super-non-final' arc. + if (arc.nextstate == kNoStateId && arc.weight == GW::Zero()) + return A(arc.ilabel, 0, AW::Zero(), kNoStateId); + + SW w1 = arc.weight.Value1(); + AW w2 = arc.weight.Value2(); + StringWeightIterator<Label, S> iter1(w1); + + Label l = w1.Size() == 1 ? iter1.Value() : 0; + + if (l == kStringInfinity || l == kStringBad || + arc.ilabel != arc.olabel || w1.Size() > 1) { + FSTERROR() << "FromGallicMapper: unrepesentable weight"; + error_ = true; + } + + if (arc.ilabel == 0 && l != 0 && arc.nextstate == kNoStateId) + return A(superfinal_label_, l, w2, arc.nextstate); + else + return A(arc.ilabel, l, w2, arc.nextstate); + } + + MapFinalAction FinalAction() const { return MAP_ALLOW_SUPERFINAL; } + + MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + + MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;} + + uint64 Properties(uint64 inprops) const { + uint64 outprops = inprops & kOLabelInvariantProperties & + kWeightInvariantProperties & kAddSuperFinalProperties; + if (error_) + outprops |= kError; + return outprops; + } + + private: + Label superfinal_label_; + mutable bool error_; +}; + + +// Mapper from GallicArc<A> to A. +template <class A, StringType S = STRING_LEFT> +struct GallicToNewSymbolsMapper { + typedef GallicArc<A, S> FromArc; + typedef A ToArc; + + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef StringWeight<Label, S> SW; + typedef typename A::Weight AW; + typedef typename GallicArc<A, S>::Weight GW; + + GallicToNewSymbolsMapper(MutableFst<ToArc> *fst) + : fst_(fst), lmax_(0), osymbols_(fst->OutputSymbols()), + isymbols_(0), error_(false) { + fst_->DeleteStates(); + state_ = fst_->AddState(); + fst_->SetStart(state_); + fst_->SetFinal(state_, AW::One()); + if (osymbols_) { + string name = osymbols_->Name() + "_from_gallic"; + fst_->SetInputSymbols(new SymbolTable(name)); + isymbols_ = fst_->MutableInputSymbols(); + isymbols_->AddSymbol(osymbols_->Find((int64) 0), 0); + } else { + fst_->SetInputSymbols(0); + } + } + + A operator()(const FromArc &arc) { + // 'Super-non-final' arc. + if (arc.nextstate == kNoStateId && arc.weight == GW::Zero()) + return A(arc.ilabel, 0, AW::Zero(), kNoStateId); + + SW w1 = arc.weight.Value1(); + AW w2 = arc.weight.Value2(); + Label l; + + if (w1.Size() == 0) { + l = 0; + } else { + typename Map::iterator miter = map_.find(w1); + if (miter != map_.end()) { + l = (*miter).second; + } else { + l = ++lmax_; + map_.insert(pair<const SW, Label>(w1, l)); + StringWeightIterator<Label, S> iter1(w1); + StateId n; + string s; + for(size_t i = 0, p = state_; + i < w1.Size(); + ++i, iter1.Next(), p = n) { + n = i == w1.Size() - 1 ? state_ : fst_->AddState(); + fst_->AddArc(p, ToArc(i ? 0 : l, iter1.Value(), AW::One(), n)); + if (isymbols_) { + if (i) s = s + "_"; + s = s + osymbols_->Find(iter1.Value()); + } + } + if (isymbols_) + isymbols_->AddSymbol(s, l); + } + } + + if (l == kStringInfinity || l == kStringBad || arc.ilabel != arc.olabel) { + FSTERROR() << "GallicToNewSymbolMapper: unrepesentable weight"; + error_ = true; + } + + return A(arc.ilabel, l, w2, arc.nextstate); + } + + MapFinalAction FinalAction() const { return MAP_ALLOW_SUPERFINAL; } + + MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + + MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; } + + uint64 Properties(uint64 inprops) const { + uint64 outprops = inprops & kOLabelInvariantProperties & + kWeightInvariantProperties & kAddSuperFinalProperties; + if (error_) + outprops |= kError; + return outprops; + } + + private: + class StringKey { + public: + size_t operator()(const SW &x) const { + return x.Hash(); + } + }; + + typedef unordered_map<SW, Label, StringKey> Map; + + MutableFst<ToArc> *fst_; + Map map_; + Label lmax_; + StateId state_; + const SymbolTable *osymbols_; + SymbolTable *isymbols_; + mutable bool error_; + + DISALLOW_COPY_AND_ASSIGN(GallicToNewSymbolsMapper); +}; + + +// Mapper to add a constant to all weights. +template <class A> +struct PlusMapper { + typedef A FromArc; + typedef A ToArc; + typedef typename A::Weight Weight; + + explicit PlusMapper(Weight w) : weight_(w) {} + + A operator()(const A &arc) const { + if (arc.weight == Weight::Zero()) + return arc; + Weight w = Plus(arc.weight, weight_); + return A(arc.ilabel, arc.olabel, w, arc.nextstate); + } + + MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } + + MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + + MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} + + uint64 Properties(uint64 props) const { + return props & kWeightInvariantProperties; + } + + private: + + + + Weight weight_; +}; + + +// Mapper to (right) multiply a constant to all weights. +template <class A> +struct TimesMapper { + typedef A FromArc; + typedef A ToArc; + typedef typename A::Weight Weight; + + explicit TimesMapper(Weight w) : weight_(w) {} + + A operator()(const A &arc) const { + if (arc.weight == Weight::Zero()) + return arc; + Weight w = Times(arc.weight, weight_); + return A(arc.ilabel, arc.olabel, w, arc.nextstate); + } + + MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } + + MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + + MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} + + uint64 Properties(uint64 props) const { + return props & kWeightInvariantProperties; + } + + private: + Weight weight_; +}; + + +// Mapper to reciprocate all non-Zero() weights. +template <class A> +struct InvertWeightMapper { + typedef A FromArc; + typedef A ToArc; + typedef typename A::Weight Weight; + + A operator()(const A &arc) const { + if (arc.weight == Weight::Zero()) + return arc; + Weight w = Divide(Weight::One(), arc.weight); + return A(arc.ilabel, arc.olabel, w, arc.nextstate); + } + + MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } + + MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + + MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} + + uint64 Properties(uint64 props) const { + return props & kWeightInvariantProperties; + } +}; + + +// Mapper to map all non-Zero() weights to One(). +template <class A, class B = A> +struct RmWeightMapper { + typedef A FromArc; + typedef B ToArc; + typedef typename FromArc::Weight FromWeight; + typedef typename ToArc::Weight ToWeight; + + B operator()(const A &arc) const { + ToWeight w = arc.weight != FromWeight::Zero() ? + ToWeight::One() : ToWeight::Zero(); + return B(arc.ilabel, arc.olabel, w, arc.nextstate); + } + + MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } + + MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + + MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} + + uint64 Properties(uint64 props) const { + return (props & kWeightInvariantProperties) | kUnweighted; + } +}; + + +// Mapper to quantize all weights. +template <class A, class B = A> +struct QuantizeMapper { + typedef A FromArc; + typedef B ToArc; + typedef typename FromArc::Weight FromWeight; + typedef typename ToArc::Weight ToWeight; + + QuantizeMapper() : delta_(kDelta) {} + + explicit QuantizeMapper(float d) : delta_(d) {} + + B operator()(const A &arc) const { + ToWeight w = arc.weight.Quantize(delta_); + return B(arc.ilabel, arc.olabel, w, arc.nextstate); + } + + MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } + + MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + + MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} + + uint64 Properties(uint64 props) const { + return props & kWeightInvariantProperties; + } + + private: + float delta_; +}; + + +// Mapper from A to B under the assumption: +// B::Weight = A::Weight::ReverseWeight +// B::Label == A::Label +// B::StateId == A::StateId +// The weight is reversed, while the label and nextstate preserved +// in the mapping. +template <class A, class B> +struct ReverseWeightMapper { + typedef A FromArc; + typedef B ToArc; + + B operator()(const A &arc) const { + return B(arc.ilabel, arc.olabel, arc.weight.Reverse(), arc.nextstate); + } + + MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } + + MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + + MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} + + uint64 Properties(uint64 props) const { return props; } +}; + +} // namespace fst + +#endif // FST_LIB_ARC_MAP_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/arc.h b/kaldi_io/src/tools/openfst/include/fst/arc.h new file mode 100644 index 0000000..5f4014b --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/arc.h @@ -0,0 +1,307 @@ +// arc.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// +// Commonly used Fst arc types. + +#ifndef FST_LIB_ARC_H__ +#define FST_LIB_ARC_H__ + +#include <string> + + +#include <fst/expectation-weight.h> +#include <fst/float-weight.h> +#include <fst/lexicographic-weight.h> +#include <fst/power-weight.h> +#include <fst/product-weight.h> +#include <fst/signed-log-weight.h> +#include <fst/sparse-power-weight.h> +#include <iostream> +#include <fstream> +#include <sstream> +#include <fst/string-weight.h> + + +namespace fst { + +template <class W> +class ArcTpl { + public: + typedef W Weight; + typedef int Label; + typedef int StateId; + + ArcTpl(Label i, Label o, const Weight& w, StateId s) + : ilabel(i), olabel(o), weight(w), nextstate(s) {} + + ArcTpl() {} + + static const string &Type(void) { + static const string type = + (Weight::Type() == "tropical") ? "standard" : Weight::Type(); + return type; + } + + Label ilabel; + Label olabel; + Weight weight; + StateId nextstate; +}; + +typedef ArcTpl<TropicalWeight> StdArc; +typedef ArcTpl<LogWeight> LogArc; +typedef ArcTpl<Log64Weight> Log64Arc; +typedef ArcTpl<SignedLogWeight> SignedLogArc; +typedef ArcTpl<SignedLog64Weight> SignedLog64Arc; +typedef ArcTpl<MinMaxWeight> MinMaxArc; + + +// Arc with integer labels and state Ids and string weights. +template <StringType S = STRING_LEFT> +class StringArc { + public: + typedef int Label; + typedef StringWeight<int, S> Weight; + typedef int StateId; + + StringArc(Label i, Label o, Weight w, StateId s) + : ilabel(i), olabel(o), weight(w), nextstate(s) {} + + StringArc() {} + + static const string &Type() { // Arc type name + static const string type = + S == STRING_LEFT ? "standard_string" : + (S == STRING_RIGHT ? "right_standard_string" : + (S == STRING_LEFT_RESTRICT ? "restricted_string" : + "right_restricted_string")); + return type; + } + + Label ilabel; // Transition input label + Label olabel; // Transition output label + Weight weight; // Transition weight + StateId nextstate; // Transition destination state +}; + + +// Arc with label and state Id type the same as template arg and with +// weights over the Gallic semiring w.r.t the output labels and weights of A. +template <class A, StringType S = STRING_LEFT> +struct GallicArc { + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef GallicWeight<Label, typename A::Weight, S> Weight; + + GallicArc() {} + + GallicArc(Label i, Label o, Weight w, StateId s) + : ilabel(i), olabel(o), weight(w), nextstate(s) {} + + GallicArc(const A &arc) + : ilabel(arc.ilabel), olabel(arc.ilabel), + weight(arc.olabel, arc.weight), nextstate(arc.nextstate) {} + + static const string &Type() { // Arc type name + static const string type = + (S == STRING_LEFT ? "gallic_" : + (S == STRING_RIGHT ? "right_gallic_" : + (S == STRING_LEFT_RESTRICT ? "restricted_gallic_" : + "right_restricted_gallic_"))) + A::Type(); + return type; + } + + Label ilabel; // Transition input label + Label olabel; // Transition output label + Weight weight; // Transition weight + StateId nextstate; // Transition destination state +}; + + +// Arc with the reverse of the weight found in its template arg. +template <class A> struct ReverseArc { + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::Weight AWeight; + typedef typename AWeight::ReverseWeight Weight; + typedef typename A::StateId StateId; + + ReverseArc(Label i, Label o, Weight w, StateId s) + : ilabel(i), olabel(o), weight(w), nextstate(s) {} + + ReverseArc() {} + + static const string &Type() { // Arc type name + static const string type = "reverse_" + Arc::Type(); + return type; + } + + Label ilabel; // Transition input label + Label olabel; // Transition output label + Weight weight; // Transition weight + StateId nextstate; // Transition destination state +}; + + +// Arc with integer labels and state Ids and lexicographic weights. +template<class W1, class W2> +struct LexicographicArc { + typedef int Label; + typedef LexicographicWeight<W1, W2> Weight; + typedef int StateId; + + LexicographicArc(Label i, Label o, Weight w, StateId s) + : ilabel(i), olabel(o), weight(w), nextstate(s) {} + + LexicographicArc() {} + + static const string &Type() { // Arc type name + static const string type = Weight::Type(); + return type; + } + + Label ilabel; // Transition input label + Label olabel; // Transition output label + Weight weight; // Transition weight + StateId nextstate; // Transition destination state +}; + + +// Arc with integer labels and state Ids and product weights. +template<class W1, class W2> +struct ProductArc { + typedef int Label; + typedef ProductWeight<W1, W2> Weight; + typedef int StateId; + + ProductArc(Label i, Label o, Weight w, StateId s) + : ilabel(i), olabel(o), weight(w), nextstate(s) {} + + ProductArc() {} + + static const string &Type() { // Arc type name + static const string type = Weight::Type(); + return type; + } + + Label ilabel; // Transition input label + Label olabel; // Transition output label + Weight weight; // Transition weight + StateId nextstate; // Transition destination state +}; + + +// Arc with label and state Id type the same as first template arg and with +// weights over the n-th cartesian power of the weight type of the +// template arg. +template <class A, unsigned int n> +struct PowerArc { + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef PowerWeight<typename A::Weight, n> Weight; + + PowerArc() {} + + PowerArc(Label i, Label o, Weight w, StateId s) + : ilabel(i), olabel(o), weight(w), nextstate(s) {} + + static const string &Type() { // Arc type name + static string type; + if (type.empty()) { + string power; + Int64ToStr(n, &power); + type = A::Type() + "_^" + power; + } + return type; + } + + Label ilabel; // Transition input label + Label olabel; // Transition output label + Weight weight; // Transition weight + StateId nextstate; // Transition destination state +}; + + +// Arc with label and state Id type the same as first template arg and with +// weights over the arbitrary cartesian power of the weight type. +template <class A, class K = int> +struct SparsePowerArc { + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef SparsePowerWeight<typename A::Weight, K> Weight; + + SparsePowerArc() {} + + SparsePowerArc(Label i, Label o, Weight w, StateId s) + : ilabel(i), olabel(o), weight(w), nextstate(s) {} + + static const string &Type() { // Arc type name + static string type; + if (type.empty()) { type = A::Type() + "_^n"; } + if(sizeof(K) != sizeof(uint32)) { + string size; + Int64ToStr(8 * sizeof(K), &size); + type += "_" + size; + } + return type; + } + + Label ilabel; // Transition input label + Label olabel; // Transition output label + Weight weight; // Transition weight + StateId nextstate; // Transition destination state +}; + + +// Arc with label and state Id type the same as first template arg and with +// expectation weight over the first template arg weight type and the +// second template arg. +template <class A, class X2> +struct ExpectationArc { + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::Weight X1; + typedef ExpectationWeight<X1, X2> Weight; + + ExpectationArc() {} + + ExpectationArc(Label i, Label o, Weight w, StateId s) + : ilabel(i), olabel(o), weight(w), nextstate(s) {} + + static const string &Type() { // Arc type name + static string type; + if (type.empty()) { + type = "expectation_" + A::Type() + "_" + X2::Type(); + } + return type; + } + + Label ilabel; // Transition input label + Label olabel; // Transition output label + Weight weight; // Transition weight + StateId nextstate; // Transition destination state +}; + +} // namespace fst + +#endif // FST_LIB_ARC_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/arcfilter.h b/kaldi_io/src/tools/openfst/include/fst/arcfilter.h new file mode 100644 index 0000000..179dc2c --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/arcfilter.h @@ -0,0 +1,99 @@ +// arcfilter.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Function objects to restrict which arcs are traversed in an FST. + +#ifndef FST_LIB_ARCFILTER_H__ +#define FST_LIB_ARCFILTER_H__ + + +#include <fst/fst.h> +#include <fst/util.h> + + +namespace fst { + +// True for all arcs. +template <class A> +class AnyArcFilter { +public: + bool operator()(const A &arc) const { return true; } +}; + + +// True for (input/output) epsilon arcs. +template <class A> +class EpsilonArcFilter { +public: + bool operator()(const A &arc) const { + return arc.ilabel == 0 && arc.olabel == 0; + } +}; + + +// True for input epsilon arcs. +template <class A> +class InputEpsilonArcFilter { +public: + bool operator()(const A &arc) const { + return arc.ilabel == 0; + } +}; + + +// True for output epsilon arcs. +template <class A> +class OutputEpsilonArcFilter { +public: + bool operator()(const A &arc) const { + return arc.olabel == 0; + } +}; + + +// True if specified labels match (don't match) when keep_match is +// true (false). +template <class A> +class MultiLabelArcFilter { +public: + typedef typename A::Label Label; + + MultiLabelArcFilter(bool match_input = true, bool keep_match = true) + : match_input_(match_input), + keep_match_(keep_match) {} + + + bool operator()(const A &arc) const { + Label label = match_input_ ? arc.ilabel : arc.olabel; + bool match = labels_.Find(label) != labels_.End(); + return keep_match_ ? match : !match; + } + + void AddLabel(Label label) { + labels_.Insert(label); + } + +private: + CompactSet<Label, kNoLabel> labels_; + bool match_input_; + bool keep_match_; +}; + +} // namespace fst + +#endif // FST_LIB_ARCFILTER_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/arcsort.h b/kaldi_io/src/tools/openfst/include/fst/arcsort.h new file mode 100644 index 0000000..37a51dc --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/arcsort.h @@ -0,0 +1,217 @@ +// arcsort.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Functions and classes to sort arcs in an FST. + +#ifndef FST_LIB_ARCSORT_H__ +#define FST_LIB_ARCSORT_H__ + +#include <algorithm> +#include <string> +#include <vector> +using std::vector; + +#include <fst/cache.h> +#include <fst/state-map.h> +#include <fst/test-properties.h> + + +namespace fst { + +template <class Arc, class Compare> +class ArcSortMapper { + public: + typedef Arc FromArc; + typedef Arc ToArc; + + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + + ArcSortMapper(const Fst<Arc> &fst, const Compare &comp) + : fst_(fst), comp_(comp), i_(0) {} + + // Allows updating Fst argument; pass only if changed. + ArcSortMapper(const ArcSortMapper<Arc, Compare> &mapper, + const Fst<Arc> *fst = 0) + : fst_(fst ? *fst : mapper.fst_), comp_(mapper.comp_), i_(0) {} + + StateId Start() { return fst_.Start(); } + Weight Final(StateId s) const { return fst_.Final(s); } + + void SetState(StateId s) { + i_ = 0; + arcs_.clear(); + arcs_.reserve(fst_.NumArcs(s)); + for (ArcIterator< Fst<Arc> > aiter(fst_, s); !aiter.Done(); aiter.Next()) + arcs_.push_back(aiter.Value()); + sort(arcs_.begin(), arcs_.end(), comp_); + } + + bool Done() const { return i_ >= arcs_.size(); } + const Arc &Value() const { return arcs_[i_]; } + void Next() { ++i_; } + + MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + uint64 Properties(uint64 props) const { return comp_.Properties(props); } + + private: + const Fst<Arc> &fst_; + const Compare &comp_; + vector<Arc> arcs_; + ssize_t i_; // current arc position + + void operator=(const ArcSortMapper<Arc, Compare> &); // disallow +}; + + +// Sorts the arcs in an FST according to function object 'comp' of +// type Compare. This version modifies its input. Comparison function +// objects ILabelCompare and OLabelCompare are provived by the +// library. In general, Compare must meet the requirements for an STL +// sort comparision function object. It must also have a member +// Properties(uint64) that specifies the known properties of the +// sorted FST; it takes as argument the input FST's known properties +// before the sort. +// +// Complexity: +// - Time: O(V D log D) +// - Space: O(D) +// where V = # of states and D = maximum out-degree. +template<class Arc, class Compare> +void ArcSort(MutableFst<Arc> *fst, Compare comp) { + ArcSortMapper<Arc, Compare> mapper(*fst, comp); + StateMap(fst, mapper); +} + +typedef CacheOptions ArcSortFstOptions; + +// Sorts the arcs in an FST according to function object 'comp' of +// type Compare. This version is a delayed Fst. Comparsion function +// objects ILabelCompare and OLabelCompare are provided by the +// library. In general, Compare must meet the requirements for an STL +// comparision function object (e.g. as used for STL sort). It must +// also have a member Properties(uint64) that specifies the known +// properties of the sorted FST; it takes as argument the input FST's +// known properties. +// +// Complexity: +// - Time: O(v d log d) +// - Space: O(d) +// where v = # of states visited, d = maximum out-degree of states +// visited. Constant time and space to visit an input state is assumed +// and exclusive of caching. +template <class A, class C> +class ArcSortFst : public StateMapFst<A, A, ArcSortMapper<A, C> > { + using StateMapFst<A, A, ArcSortMapper<A, C> >::GetImpl; + public: + typedef A Arc; + typedef typename Arc::StateId StateId; + typedef ArcSortMapper<A, C> M; + + ArcSortFst(const Fst<A> &fst, const C &comp) + : StateMapFst<A, A, M>(fst, ArcSortMapper<A, C>(fst, comp)) {} + + ArcSortFst(const Fst<A> &fst, const C &comp, const ArcSortFstOptions &opts) + : StateMapFst<A, A, M>(fst, ArcSortMapper<A, C>(fst, comp), opts) {} + + // See Fst<>::Copy() for doc. + ArcSortFst(const ArcSortFst<A, C> &fst, bool safe = false) + : StateMapFst<A, A, M>(fst, safe) {} + + // Get a copy of this ArcSortFst. See Fst<>::Copy() for further doc. + virtual ArcSortFst<A, C> *Copy(bool safe = false) const { + return new ArcSortFst(*this, safe); + } + + virtual size_t NumArcs(StateId s) const { + return GetImpl()->GetFst().NumArcs(s); + } + + virtual size_t NumInputEpsilons(StateId s) const { + return GetImpl()->GetFst().NumInputEpsilons(s); + } + + virtual size_t NumOutputEpsilons(StateId s) const { + return GetImpl()->GetFst().NumOutputEpsilons(s); + } +}; + + +// Specialization for ArcSortFst. +template <class A, class C> +class StateIterator< ArcSortFst<A, C> > + : public StateIterator< StateMapFst<A, A, ArcSortMapper<A, C> > > { + public: + explicit StateIterator(const ArcSortFst<A, C> &fst) + : StateIterator< StateMapFst<A, A, ArcSortMapper<A, C> > >(fst) {} +}; + + +// Specialization for ArcSortFst. +template <class A, class C> +class ArcIterator< ArcSortFst<A, C> > + : public ArcIterator< StateMapFst<A, A, ArcSortMapper<A, C> > > { + public: + ArcIterator(const ArcSortFst<A, C> &fst, typename A::StateId s) + : ArcIterator< StateMapFst<A, A, ArcSortMapper<A, C> > >(fst, s) {} +}; + + +// Compare class for comparing input labels of arcs. +template<class A> class ILabelCompare { + public: + bool operator() (A arc1, A arc2) const { + return arc1.ilabel < arc2.ilabel; + } + + uint64 Properties(uint64 props) const { + return (props & kArcSortProperties) | kILabelSorted | + (props & kAcceptor ? kOLabelSorted : 0); + } +}; + + +// Compare class for comparing output labels of arcs. +template<class A> class OLabelCompare { + public: + bool operator() (const A &arc1, const A &arc2) const { + return arc1.olabel < arc2.olabel; + } + + uint64 Properties(uint64 props) const { + return (props & kArcSortProperties) | kOLabelSorted | + (props & kAcceptor ? kILabelSorted : 0); + } +}; + + +// Useful aliases when using StdArc. +template<class C> class StdArcSortFst : public ArcSortFst<StdArc, C> { + public: + typedef StdArc Arc; + typedef C Compare; +}; + +typedef ILabelCompare<StdArc> StdILabelCompare; + +typedef OLabelCompare<StdArc> StdOLabelCompare; + +} // namespace fst + +#endif // FST_LIB_ARCSORT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/bi-table.h b/kaldi_io/src/tools/openfst/include/fst/bi-table.h new file mode 100644 index 0000000..d220ce4 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/bi-table.h @@ -0,0 +1,532 @@ +// bi-table.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Classes for representing a bijective mapping between an arbitrary entry +// of type T and a signed integral ID. + +#ifndef FST_LIB_BI_TABLE_H__ +#define FST_LIB_BI_TABLE_H__ + +#include <deque> +using std::deque; +#include <functional> +#include <vector> +using std::vector; + +#include <tr1/unordered_set> +using std::tr1::unordered_set; +using std::tr1::unordered_multiset; + +namespace fst { + +// BI TABLES - these determine a bijective mapping between an +// arbitrary entry of type T and an signed integral ID of type I. The IDs are +// allocated starting from 0 in order. +// +// template <class I, class T> +// class BiTable { +// public: +// +// // Required constructors. +// BiTable(); +// +// // Lookup integer ID from entry. If it doesn't exist and 'insert' +// / is true, then add it. Otherwise return -1. +// I FindId(const T &entry, bool insert = true); +// // Lookup entry from integer ID. +// const T &FindEntry(I) const; +// // # of stored entries. +// I Size() const; +// }; + +// An implementation using a hash map for the entry to ID mapping. +// H is the hash function and E is the equality function. +// If passed to the constructor, ownership is given to this class. + +template <class I, class T, class H, class E = std::equal_to<T> > +class HashBiTable { + public: + // Reserves space for 'table_size' elements. + explicit HashBiTable(size_t table_size = 0, H *h = 0, E *e = 0) + : hash_func_(h), + hash_equal_(e), + entry2id_(table_size, (h ? *h : H()), (e ? *e : E())) { + if (table_size) + id2entry_.reserve(table_size); + } + + HashBiTable(const HashBiTable<I, T, H, E> &table) + : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0), + hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0), + entry2id_(table.entry2id_.begin(), table.entry2id_.end(), + table.entry2id_.size(), + (hash_func_ ? *hash_func_ : H()), + (hash_equal_ ? *hash_equal_ : E())), + id2entry_(table.id2entry_) { } + + ~HashBiTable() { + delete hash_func_; + delete hash_equal_; + } + + I FindId(const T &entry, bool insert = true) { + I &id_ref = entry2id_[entry]; + if (id_ref == 0) { // T not found + if (insert) { // store and assign it a new ID + id2entry_.push_back(entry); + id_ref = id2entry_.size(); + } else { + return -1; + } + } + return id_ref - 1; // NB: id_ref = ID + 1 + } + + const T &FindEntry(I s) const { + return id2entry_[s]; + } + + I Size() const { return id2entry_.size(); } + + private: + H *hash_func_; + E *hash_equal_; + unordered_map<T, I, H, E> entry2id_; + vector<T> id2entry_; + + void operator=(const HashBiTable<I, T, H, E> &table); // disallow +}; + + +// Enables alternative hash set representations below. +// typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType; +typedef enum { HS_STL = 0, HS_DENSE = 1, HS_SPARSE = 2 } HSType; + +// Default hash set is STL hash_set +template<class K, class H, class E, HSType> +struct HashSet : public unordered_set<K, H, E> { + HashSet(size_t n = 0, const H &h = H(), const E &e = E()) + : unordered_set<K, H, E>(n, h, e) { } + + void rehash(size_t n) { } +}; + + +// An implementation using a hash set for the entry to ID mapping. +// The hash set holds 'keys' which are either the ID or kCurrentKey. +// These keys can be mapped to entrys either by looking up in the +// entry vector or, if kCurrentKey, in current_entry_ member. The hash +// and key equality functions map to entries first. H +// is the hash function and E is the equality function. If passed to +// the constructor, ownership is given to this class. +template <class I, class T, class H, + class E = std::equal_to<T>, HSType HS = HS_DENSE> +class CompactHashBiTable { + public: + friend class HashFunc; + friend class HashEqual; + + // Reserves space for 'table_size' elements. + explicit CompactHashBiTable(size_t table_size = 0, H *h = 0, E *e = 0) + : hash_func_(h), + hash_equal_(e), + compact_hash_func_(*this), + compact_hash_equal_(*this), + keys_(table_size, compact_hash_func_, compact_hash_equal_) { + if (table_size) + id2entry_.reserve(table_size); + } + + CompactHashBiTable(const CompactHashBiTable<I, T, H, E, HS> &table) + : hash_func_(table.hash_func_ ? new H(*table.hash_func_) : 0), + hash_equal_(table.hash_equal_ ? new E(*table.hash_equal_) : 0), + compact_hash_func_(*this), + compact_hash_equal_(*this), + keys_(table.keys_.size(), compact_hash_func_, compact_hash_equal_), + id2entry_(table.id2entry_) { + keys_.insert(table.keys_.begin(), table.keys_.end()); + } + + ~CompactHashBiTable() { + delete hash_func_; + delete hash_equal_; + } + + I FindId(const T &entry, bool insert = true) { + current_entry_ = &entry; + typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); + if (it == keys_.end()) { // T not found + if (insert) { // store and assign it a new ID + I key = id2entry_.size(); + id2entry_.push_back(entry); + keys_.insert(key); + return key; + } else { + return -1; + } + } else { + return *it; + } + } + + const T &FindEntry(I s) const { return id2entry_[s]; } + + I Size() const { return id2entry_.size(); } + + // Clear content. With argument, erases last n IDs. + void Clear(ssize_t n = -1) { + if (n < 0 || n > id2entry_.size()) + n = id2entry_.size(); + while (n-- > 0) { + I key = id2entry_.size() - 1; + keys_.erase(key); + id2entry_.pop_back(); + } + keys_.rehash(0); + } + + private: + static const I kCurrentKey; // -1 + static const I kEmptyKey; // -2 + static const I kDeletedKey; // -3 + + class HashFunc { + public: + HashFunc(const CompactHashBiTable &ht) : ht_(&ht) {} + + size_t operator()(I k) const { + if (k >= kCurrentKey) { + return (*ht_->hash_func_)(ht_->Key2Entry(k)); + } else { + return 0; + } + } + + private: + const CompactHashBiTable *ht_; + }; + + class HashEqual { + public: + HashEqual(const CompactHashBiTable &ht) : ht_(&ht) {} + + bool operator()(I k1, I k2) const { + if (k1 >= kCurrentKey && k2 >= kCurrentKey) { + return (*ht_->hash_equal_)(ht_->Key2Entry(k1), ht_->Key2Entry(k2)); + } else { + return k1 == k2; + } + } + private: + const CompactHashBiTable *ht_; + }; + + typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet; + + const T &Key2Entry(I k) const { + if (k == kCurrentKey) + return *current_entry_; + else + return id2entry_[k]; + } + + H *hash_func_; + E *hash_equal_; + HashFunc compact_hash_func_; + HashEqual compact_hash_equal_; + KeyHashSet keys_; + vector<T> id2entry_; + const T *current_entry_; + + void operator=(const CompactHashBiTable<I, T, H, E, HS> &table); // disallow +}; + + +template <class I, class T, class H, class E, HSType HS> +const I CompactHashBiTable<I, T, H, E, HS>::kCurrentKey = -1; + +template <class I, class T, class H, class E, HSType HS> +const I CompactHashBiTable<I, T, H, E, HS>::kEmptyKey = -2; + +template <class I, class T, class H, class E, HSType HS> +const I CompactHashBiTable<I, T, H, E, HS>::kDeletedKey = -3; + + +// An implementation using a vector for the entry to ID mapping. +// It is passed a function object FP that should fingerprint entries +// uniquely to an integer that can used as a vector index. Normally, +// VectorBiTable constructs the FP object. The user can instead +// pass in this object; in that case, VectorBiTable takes its +// ownership. +template <class I, class T, class FP> +class VectorBiTable { + public: + // Reserves space for 'table_size' elements. + explicit VectorBiTable(FP *fp = 0, size_t table_size = 0) + : fp_(fp ? fp : new FP()) { + if (table_size) + id2entry_.reserve(table_size); + } + + VectorBiTable(const VectorBiTable<I, T, FP> &table) + : fp_(table.fp_ ? new FP(*table.fp_) : 0), + fp2id_(table.fp2id_), + id2entry_(table.id2entry_) { } + + ~VectorBiTable() { delete fp_; } + + I FindId(const T &entry, bool insert = true) { + ssize_t fp = (*fp_)(entry); + if (fp >= fp2id_.size()) + fp2id_.resize(fp + 1); + I &id_ref = fp2id_[fp]; + if (id_ref == 0) { // T not found + if (insert) { // store and assign it a new ID + id2entry_.push_back(entry); + id_ref = id2entry_.size(); + } else { + return -1; + } + } + return id_ref - 1; // NB: id_ref = ID + 1 + } + + const T &FindEntry(I s) const { return id2entry_[s]; } + + I Size() const { return id2entry_.size(); } + + const FP &Fingerprint() const { return *fp_; } + + private: + FP *fp_; + vector<I> fp2id_; + vector<T> id2entry_; + + void operator=(const VectorBiTable<I, T, FP> &table); // disallow +}; + + +// An implementation using a vector and a compact hash table. The +// selecting functor S returns true for entries to be hashed in the +// vector. The fingerprinting functor FP returns a unique fingerprint +// for each entry to be hashed in the vector (these need to be +// suitable for indexing in a vector). The hash functor H is used +// when hashing entry into the compact hash table. If passed to the +// constructor, ownership is given to this class. +template <class I, class T, class S, class FP, class H, HSType HS = HS_DENSE> +class VectorHashBiTable { + public: + friend class HashFunc; + friend class HashEqual; + + explicit VectorHashBiTable(S *s, FP *fp = 0, H *h = 0, + size_t vector_size = 0, + size_t entry_size = 0) + : selector_(s), + fp_(fp ? fp : new FP()), + h_(h ? h : new H()), + hash_func_(*this), + hash_equal_(*this), + keys_(0, hash_func_, hash_equal_) { + if (vector_size) + fp2id_.reserve(vector_size); + if (entry_size) + id2entry_.reserve(entry_size); + } + + VectorHashBiTable(const VectorHashBiTable<I, T, S, FP, H, HS> &table) + : selector_(new S(table.s_)), + fp_(table.fp_ ? new FP(*table.fp_) : 0), + h_(table.h_ ? new H(*table.h_) : 0), + id2entry_(table.id2entry_), + fp2id_(table.fp2id_), + hash_func_(*this), + hash_equal_(*this), + keys_(table.keys_.size(), hash_func_, hash_equal_) { + keys_.insert(table.keys_.begin(), table.keys_.end()); + } + + ~VectorHashBiTable() { + delete selector_; + delete fp_; + delete h_; + } + + I FindId(const T &entry, bool insert = true) { + if ((*selector_)(entry)) { // Use the vector if 'selector_(entry) == true' + uint64 fp = (*fp_)(entry); + if (fp2id_.size() <= fp) + fp2id_.resize(fp + 1, 0); + if (fp2id_[fp] == 0) { // T not found + if (insert) { // store and assign it a new ID + id2entry_.push_back(entry); + fp2id_[fp] = id2entry_.size(); + } else { + return -1; + } + } + return fp2id_[fp] - 1; // NB: assoc_value = ID + 1 + } else { // Use the hash table otherwise. + current_entry_ = &entry; + typename KeyHashSet::const_iterator it = keys_.find(kCurrentKey); + if (it == keys_.end()) { + if (insert) { + I key = id2entry_.size(); + id2entry_.push_back(entry); + keys_.insert(key); + return key; + } else { + return -1; + } + } else { + return *it; + } + } + } + + const T &FindEntry(I s) const { + return id2entry_[s]; + } + + I Size() const { return id2entry_.size(); } + + const S &Selector() const { return *selector_; } + + const FP &Fingerprint() const { return *fp_; } + + const H &Hash() const { return *h_; } + + private: + static const I kCurrentKey; // -1 + static const I kEmptyKey; // -2 + + class HashFunc { + public: + HashFunc(const VectorHashBiTable &ht) : ht_(&ht) {} + + size_t operator()(I k) const { + if (k >= kCurrentKey) { + return (*(ht_->h_))(ht_->Key2Entry(k)); + } else { + return 0; + } + } + private: + const VectorHashBiTable *ht_; + }; + + class HashEqual { + public: + HashEqual(const VectorHashBiTable &ht) : ht_(&ht) {} + + bool operator()(I k1, I k2) const { + if (k1 >= kCurrentKey && k2 >= kCurrentKey) { + return ht_->Key2Entry(k1) == ht_->Key2Entry(k2); + } else { + return k1 == k2; + } + } + private: + const VectorHashBiTable *ht_; + }; + + typedef HashSet<I, HashFunc, HashEqual, HS> KeyHashSet; + + const T &Key2Entry(I k) const { + if (k == kCurrentKey) + return *current_entry_; + else + return id2entry_[k]; + } + + S *selector_; // Returns true if entry hashed into vector + FP *fp_; // Fingerprint used when hashing entry into vector + H *h_; // Hash function used when hashing entry into hash_set + + vector<T> id2entry_; // Maps state IDs to entry + vector<I> fp2id_; // Maps entry fingerprints to IDs + + // Compact implementation of the hash table mapping entrys to + // state IDs using the hash function 'h_' + HashFunc hash_func_; + HashEqual hash_equal_; + KeyHashSet keys_; + const T *current_entry_; + + // disallow + void operator=(const VectorHashBiTable<I, T, S, FP, H, HS> &table); +}; + +template <class I, class T, class S, class FP, class H, HSType HS> +const I VectorHashBiTable<I, T, S, FP, H, HS>::kCurrentKey = -1; + +template <class I, class T, class S, class FP, class H, HSType HS> +const I VectorHashBiTable<I, T, S, FP, H, HS>::kEmptyKey = -3; + + +// An implementation using a hash map for the entry to ID +// mapping. This version permits erasing of arbitrary states. The +// entry T must have == defined and its default constructor must +// produce a entry that will never be seen. F is the hash function. +template <class I, class T, class F> +class ErasableBiTable { + public: + ErasableBiTable() : first_(0) {} + + I FindId(const T &entry, bool insert = true) { + I &id_ref = entry2id_[entry]; + if (id_ref == 0) { // T not found + if (insert) { // store and assign it a new ID + id2entry_.push_back(entry); + id_ref = id2entry_.size() + first_; + } else { + return -1; + } + } + return id_ref - 1; // NB: id_ref = ID + 1 + } + + const T &FindEntry(I s) const { return id2entry_[s - first_]; } + + I Size() const { return id2entry_.size(); } + + void Erase(I s) { + T &entry = id2entry_[s - first_]; + typename unordered_map<T, I, F>::iterator it = + entry2id_.find(entry); + entry2id_.erase(it); + id2entry_[s - first_] = empty_entry_; + while (!id2entry_.empty() && id2entry_.front() == empty_entry_) { + id2entry_.pop_front(); + ++first_; + } + } + + private: + unordered_map<T, I, F> entry2id_; + deque<T> id2entry_; + const T empty_entry_; + I first_; // I of first element in the deque; + + // disallow + void operator=(const ErasableBiTable<I, T, F> &table); //disallow +}; + +} // namespace fst + +#endif // FST_LIB_BI_TABLE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/cache.h b/kaldi_io/src/tools/openfst/include/fst/cache.h new file mode 100644 index 0000000..7c96fe1 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/cache.h @@ -0,0 +1,861 @@ +// cache.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// An Fst implementation that caches FST elements of a delayed +// computation. + +#ifndef FST_LIB_CACHE_H__ +#define FST_LIB_CACHE_H__ + +#include <vector> +using std::vector; +#include <list> + +#include <fst/vector-fst.h> + + +DECLARE_bool(fst_default_cache_gc); +DECLARE_int64(fst_default_cache_gc_limit); + +namespace fst { + +struct CacheOptions { + bool gc; // enable GC + size_t gc_limit; // # of bytes allowed before GC + + CacheOptions(bool g, size_t l) : gc(g), gc_limit(l) {} + CacheOptions() + : gc(FLAGS_fst_default_cache_gc), + gc_limit(FLAGS_fst_default_cache_gc_limit) {} +}; + +// A CacheStateAllocator allocates and frees CacheStates +// template <class S> +// struct CacheStateAllocator { +// S *Allocate(StateId s); +// void Free(S *state, StateId s); +// }; +// + +// A simple allocator class, can be overridden as needed, +// maintains a single entry cache. +template <class S> +struct DefaultCacheStateAllocator { + typedef typename S::Arc::StateId StateId; + + DefaultCacheStateAllocator() : mru_(NULL) { } + + ~DefaultCacheStateAllocator() { + delete mru_; + } + + S *Allocate(StateId s) { + if (mru_) { + S *state = mru_; + mru_ = NULL; + state->Reset(); + return state; + } + return new S(); + } + + void Free(S *state, StateId s) { + if (mru_) { + delete mru_; + } + mru_ = state; + } + + private: + S *mru_; +}; + +// VectorState but additionally has a flags data member (see +// CacheState below). This class is used to cache FST elements with +// the flags used to indicate what has been cached. Use HasStart() +// HasFinal(), and HasArcs() to determine if cached and SetStart(), +// SetFinal(), AddArc(), (or PushArc() and SetArcs()) to cache. Note +// you must set the final weight even if the state is non-final to +// mark it as cached. If the 'gc' option is 'false', cached items have +// the extent of the FST - minimizing computation. If the 'gc' option +// is 'true', garbage collection of states (not in use in an arc +// iterator and not 'protected') is performed, in a rough +// approximation of LRU order, when 'gc_limit' bytes is reached - +// controlling memory use. When 'gc_limit' is 0, special optimizations +// apply - minimizing memory use. + +template <class S, class C = DefaultCacheStateAllocator<S> > +class CacheBaseImpl : public VectorFstBaseImpl<S> { + public: + typedef S State; + typedef C Allocator; + typedef typename State::Arc Arc; + typedef typename Arc::Weight Weight; + typedef typename Arc::StateId StateId; + + using FstImpl<Arc>::Type; + using FstImpl<Arc>::Properties; + using FstImpl<Arc>::SetProperties; + using VectorFstBaseImpl<State>::NumStates; + using VectorFstBaseImpl<State>::Start; + using VectorFstBaseImpl<State>::AddState; + using VectorFstBaseImpl<State>::SetState; + using VectorFstBaseImpl<State>::ReserveStates; + + explicit CacheBaseImpl(C *allocator = 0) + : cache_start_(false), nknown_states_(0), min_unexpanded_state_id_(0), + cache_first_state_id_(kNoStateId), cache_first_state_(0), + cache_gc_(FLAGS_fst_default_cache_gc), cache_size_(0), + cache_limit_(FLAGS_fst_default_cache_gc_limit > kMinCacheLimit || + FLAGS_fst_default_cache_gc_limit == 0 ? + FLAGS_fst_default_cache_gc_limit : kMinCacheLimit), + protect_(false) { + allocator_ = allocator ? allocator : new C(); + } + + explicit CacheBaseImpl(const CacheOptions &opts, C *allocator = 0) + : cache_start_(false), nknown_states_(0), + min_unexpanded_state_id_(0), cache_first_state_id_(kNoStateId), + cache_first_state_(0), cache_gc_(opts.gc), cache_size_(0), + cache_limit_(opts.gc_limit > kMinCacheLimit || opts.gc_limit == 0 ? + opts.gc_limit : kMinCacheLimit), + protect_(false) { + allocator_ = allocator ? allocator : new C(); + } + + // Preserve gc parameters. If preserve_cache true, also preserves + // cache data. + CacheBaseImpl(const CacheBaseImpl<S, C> &impl, bool preserve_cache = false) + : VectorFstBaseImpl<S>(), cache_start_(false), nknown_states_(0), + min_unexpanded_state_id_(0), cache_first_state_id_(kNoStateId), + cache_first_state_(0), cache_gc_(impl.cache_gc_), cache_size_(0), + cache_limit_(impl.cache_limit_), + protect_(impl.protect_) { + allocator_ = new C(); + if (preserve_cache) { + cache_start_ = impl.cache_start_; + nknown_states_ = impl.nknown_states_; + expanded_states_ = impl.expanded_states_; + min_unexpanded_state_id_ = impl.min_unexpanded_state_id_; + if (impl.cache_first_state_id_ != kNoStateId) { + cache_first_state_id_ = impl.cache_first_state_id_; + cache_first_state_ = allocator_->Allocate(cache_first_state_id_); + *cache_first_state_ = *impl.cache_first_state_; + } + cache_states_ = impl.cache_states_; + cache_size_ = impl.cache_size_; + ReserveStates(impl.NumStates()); + for (StateId s = 0; s < impl.NumStates(); ++s) { + const S *state = + static_cast<const VectorFstBaseImpl<S> &>(impl).GetState(s); + if (state) { + S *copied_state = allocator_->Allocate(s); + *copied_state = *state; + AddState(copied_state); + } else { + AddState(0); + } + } + VectorFstBaseImpl<S>::SetStart(impl.Start()); + } + } + + ~CacheBaseImpl() { + allocator_->Free(cache_first_state_, cache_first_state_id_); + delete allocator_; + } + + // Gets a state from its ID; state must exist. + const S *GetState(StateId s) const { + if (s == cache_first_state_id_) + return cache_first_state_; + else + return VectorFstBaseImpl<S>::GetState(s); + } + + // Gets a state from its ID; state must exist. + S *GetState(StateId s) { + if (s == cache_first_state_id_) + return cache_first_state_; + else + return VectorFstBaseImpl<S>::GetState(s); + } + + // Gets a state from its ID; return 0 if it doesn't exist. + const S *CheckState(StateId s) const { + if (s == cache_first_state_id_) + return cache_first_state_; + else if (s < NumStates()) + return VectorFstBaseImpl<S>::GetState(s); + else + return 0; + } + + // Gets a state from its ID; add it if necessary. + S *ExtendState(StateId s); + + void SetStart(StateId s) { + VectorFstBaseImpl<S>::SetStart(s); + cache_start_ = true; + if (s >= nknown_states_) + nknown_states_ = s + 1; + } + + void SetFinal(StateId s, Weight w) { + S *state = ExtendState(s); + state->final = w; + state->flags |= kCacheFinal | kCacheRecent | kCacheModified; + } + + // AddArc adds a single arc to state s and does incremental cache + // book-keeping. For efficiency, prefer PushArc and SetArcs below + // when possible. + void AddArc(StateId s, const Arc &arc) { + S *state = ExtendState(s); + state->arcs.push_back(arc); + if (arc.ilabel == 0) { + ++state->niepsilons; + } + if (arc.olabel == 0) { + ++state->noepsilons; + } + const Arc *parc = state->arcs.empty() ? 0 : &(state->arcs.back()); + SetProperties(AddArcProperties(Properties(), s, arc, parc)); + state->flags |= kCacheModified; + if (cache_gc_ && s != cache_first_state_id_ && + !(state->flags & kCacheProtect)) { + cache_size_ += sizeof(Arc); + if (cache_size_ > cache_limit_) + GC(s, false); + } + } + + // Adds a single arc to state s but delays cache book-keeping. + // SetArcs must be called when all PushArc calls at a state are + // complete. Do not mix with calls to AddArc. + void PushArc(StateId s, const Arc &arc) { + S *state = ExtendState(s); + state->arcs.push_back(arc); + } + + // Marks arcs of state s as cached and does cache book-keeping after all + // calls to PushArc have been completed. Do not mix with calls to AddArc. + void SetArcs(StateId s) { + S *state = ExtendState(s); + vector<Arc> &arcs = state->arcs; + state->niepsilons = state->noepsilons = 0; + for (size_t a = 0; a < arcs.size(); ++a) { + const Arc &arc = arcs[a]; + if (arc.nextstate >= nknown_states_) + nknown_states_ = arc.nextstate + 1; + if (arc.ilabel == 0) + ++state->niepsilons; + if (arc.olabel == 0) + ++state->noepsilons; + } + ExpandedState(s); + state->flags |= kCacheArcs | kCacheRecent | kCacheModified; + if (cache_gc_ && s != cache_first_state_id_ && + !(state->flags & kCacheProtect)) { + cache_size_ += arcs.capacity() * sizeof(Arc); + if (cache_size_ > cache_limit_) + GC(s, false); + } + }; + + void ReserveArcs(StateId s, size_t n) { + S *state = ExtendState(s); + state->arcs.reserve(n); + } + + void DeleteArcs(StateId s, size_t n) { + S *state = ExtendState(s); + const vector<Arc> &arcs = state->arcs; + for (size_t i = 0; i < n; ++i) { + size_t j = arcs.size() - i - 1; + if (arcs[j].ilabel == 0) + --state->niepsilons; + if (arcs[j].olabel == 0) + --state->noepsilons; + } + + state->arcs.resize(arcs.size() - n); + SetProperties(DeleteArcsProperties(Properties())); + state->flags |= kCacheModified; + if (cache_gc_ && s != cache_first_state_id_ && + !(state->flags & kCacheProtect)) { + cache_size_ -= n * sizeof(Arc); + } + } + + void DeleteArcs(StateId s) { + S *state = ExtendState(s); + size_t n = state->arcs.size(); + state->niepsilons = 0; + state->noepsilons = 0; + state->arcs.clear(); + SetProperties(DeleteArcsProperties(Properties())); + state->flags |= kCacheModified; + if (cache_gc_ && s != cache_first_state_id_ && + !(state->flags & kCacheProtect)) { + cache_size_ -= n * sizeof(Arc); + } + } + + void DeleteStates(const vector<StateId> &dstates) { + size_t old_num_states = NumStates(); + vector<StateId> newid(old_num_states, 0); + for (size_t i = 0; i < dstates.size(); ++i) + newid[dstates[i]] = kNoStateId; + StateId nstates = 0; + for (StateId s = 0; s < old_num_states; ++s) { + if (newid[s] != kNoStateId) { + newid[s] = nstates; + ++nstates; + } + } + // just for states_.resize(), does unnecessary walk. + VectorFstBaseImpl<S>::DeleteStates(dstates); + SetProperties(DeleteStatesProperties(Properties())); + // Update list of cached states. + typename list<StateId>::iterator siter = cache_states_.begin(); + while (siter != cache_states_.end()) { + if (newid[*siter] != kNoStateId) { + *siter = newid[*siter]; + ++siter; + } else { + cache_states_.erase(siter++); + } + } + } + + void DeleteStates() { + cache_states_.clear(); + allocator_->Free(cache_first_state_, cache_first_state_id_); + for (int s = 0; s < NumStates(); ++s) { + allocator_->Free(VectorFstBaseImpl<S>::GetState(s), s); + SetState(s, 0); + } + nknown_states_ = 0; + min_unexpanded_state_id_ = 0; + cache_first_state_id_ = kNoStateId; + cache_first_state_ = 0; + cache_size_ = 0; + cache_start_ = false; + VectorFstBaseImpl<State>::DeleteStates(); + SetProperties(DeleteAllStatesProperties(Properties(), + kExpanded | kMutable)); + } + + // Is the start state cached? + bool HasStart() const { + if (!cache_start_ && Properties(kError)) + cache_start_ = true; + return cache_start_; + } + + // Is the final weight of state s cached? + bool HasFinal(StateId s) const { + const S *state = CheckState(s); + if (state && state->flags & kCacheFinal) { + state->flags |= kCacheRecent; + return true; + } else { + return false; + } + } + + // Are arcs of state s cached? + bool HasArcs(StateId s) const { + const S *state = CheckState(s); + if (state && state->flags & kCacheArcs) { + state->flags |= kCacheRecent; + return true; + } else { + return false; + } + } + + Weight Final(StateId s) const { + const S *state = GetState(s); + return state->final; + } + + size_t NumArcs(StateId s) const { + const S *state = GetState(s); + return state->arcs.size(); + } + + size_t NumInputEpsilons(StateId s) const { + const S *state = GetState(s); + return state->niepsilons; + } + + size_t NumOutputEpsilons(StateId s) const { + const S *state = GetState(s); + return state->noepsilons; + } + + // Provides information needed for generic arc iterator. + void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { + const S *state = GetState(s); + data->base = 0; + data->narcs = state->arcs.size(); + data->arcs = data->narcs > 0 ? &(state->arcs[0]) : 0; + data->ref_count = &(state->ref_count); + ++(*data->ref_count); + } + + // Number of known states. + StateId NumKnownStates() const { return nknown_states_; } + + // Update number of known states taking in account the existence of state s. + void UpdateNumKnownStates(StateId s) { + if (s >= nknown_states_) + nknown_states_ = s + 1; + } + + // Find the mininum never-expanded state Id + StateId MinUnexpandedState() const { + while (min_unexpanded_state_id_ < expanded_states_.size() && + expanded_states_[min_unexpanded_state_id_]) + ++min_unexpanded_state_id_; + return min_unexpanded_state_id_; + } + + // Removes from cache_states_ and uncaches (not referenced-counted + // or protected) states that have not been accessed since the last + // GC until at most cache_fraction * cache_limit_ bytes are cached. + // If that fails to free enough, recurs uncaching recently visited + // states as well. If still unable to free enough memory, then + // widens cache_limit_ to fulfill condition. + void GC(StateId current, bool free_recent, float cache_fraction = 0.666); + + // Setc/clears GC protection: if true, new states are protected + // from garbage collection. + void GCProtect(bool on) { protect_ = on; } + + void ExpandedState(StateId s) { + if (s < min_unexpanded_state_id_) + return; + while (expanded_states_.size() <= s) + expanded_states_.push_back(false); + expanded_states_[s] = true; + } + + C *GetAllocator() const { + return allocator_; + } + + // Caching on/off switch, limit and size accessors. + bool GetCacheGc() const { return cache_gc_; } + size_t GetCacheLimit() const { return cache_limit_; } + size_t GetCacheSize() const { return cache_size_; } + + private: + static const size_t kMinCacheLimit = 8096; // Minimum (non-zero) cache limit + + static const uint32 kCacheFinal = 0x0001; // Final weight has been cached + static const uint32 kCacheArcs = 0x0002; // Arcs have been cached + static const uint32 kCacheRecent = 0x0004; // Mark as visited since GC + static const uint32 kCacheProtect = 0x0008; // Mark state as GC protected + + public: + static const uint32 kCacheModified = 0x0010; // Mark state as modified + static const uint32 kCacheFlags = kCacheFinal | kCacheArcs | kCacheRecent + | kCacheProtect | kCacheModified; + + private: + C *allocator_; // used to allocate new states + mutable bool cache_start_; // Is the start state cached? + StateId nknown_states_; // # of known states + vector<bool> expanded_states_; // states that have been expanded + mutable StateId min_unexpanded_state_id_; // minimum never-expanded state Id + StateId cache_first_state_id_; // First cached state id + S *cache_first_state_; // First cached state + list<StateId> cache_states_; // list of currently cached states + bool cache_gc_; // enable GC + size_t cache_size_; // # of bytes cached + size_t cache_limit_; // # of bytes allowed before GC + bool protect_; // Protect new states from GC + + void operator=(const CacheBaseImpl<S, C> &impl); // disallow +}; + +// Gets a state from its ID; add it if necessary. +template <class S, class C> +S *CacheBaseImpl<S, C>::ExtendState(typename S::Arc::StateId s) { + // If 'protect_' true and a new state, protects from garbage collection. + if (s == cache_first_state_id_) { + return cache_first_state_; // Return 1st cached state + } else if (cache_limit_ == 0 && cache_first_state_id_ == kNoStateId) { + cache_first_state_id_ = s; // Remember 1st cached state + cache_first_state_ = allocator_->Allocate(s); + if (protect_) cache_first_state_->flags |= kCacheProtect; + return cache_first_state_; + } else if (cache_first_state_id_ != kNoStateId && + cache_first_state_->ref_count == 0 && + !(cache_first_state_->flags & kCacheProtect)) { + // With Default allocator, the Free and Allocate will reuse the same S*. + allocator_->Free(cache_first_state_, cache_first_state_id_); + cache_first_state_id_ = s; + cache_first_state_ = allocator_->Allocate(s); + if (protect_) cache_first_state_->flags |= kCacheProtect; + return cache_first_state_; // Return 1st cached state + } else { + while (NumStates() <= s) // Add state to main cache + AddState(0); + S *state = VectorFstBaseImpl<S>::GetState(s); + if (!state) { + state = allocator_->Allocate(s); + if (protect_) state->flags |= kCacheProtect; + SetState(s, state); + if (cache_first_state_id_ != kNoStateId) { // Forget 1st cached state + while (NumStates() <= cache_first_state_id_) + AddState(0); + SetState(cache_first_state_id_, cache_first_state_); + if (cache_gc_ && !(cache_first_state_->flags & kCacheProtect)) { + cache_states_.push_back(cache_first_state_id_); + cache_size_ += sizeof(S) + + cache_first_state_->arcs.capacity() * sizeof(Arc); + } + cache_limit_ = kMinCacheLimit; + cache_first_state_id_ = kNoStateId; + cache_first_state_ = 0; + } + if (cache_gc_ && !protect_) { + cache_states_.push_back(s); + cache_size_ += sizeof(S); + if (cache_size_ > cache_limit_) + GC(s, false); + } + } + return state; + } +} + +// Removes from cache_states_ and uncaches (not referenced-counted or +// protected) states that have not been accessed since the last GC +// until at most cache_fraction * cache_limit_ bytes are cached. If +// that fails to free enough, recurs uncaching recently visited states +// as well. If still unable to free enough memory, then widens cache_limit_ +// to fulfill condition. +template <class S, class C> +void CacheBaseImpl<S, C>::GC(typename S::Arc::StateId current, + bool free_recent, float cache_fraction) { + if (!cache_gc_) + return; + VLOG(2) << "CacheImpl: Enter GC: object = " << Type() << "(" << this + << "), free recently cached = " << free_recent + << ", cache size = " << cache_size_ + << ", cache frac = " << cache_fraction + << ", cache limit = " << cache_limit_ << "\n"; + typename list<StateId>::iterator siter = cache_states_.begin(); + + size_t cache_target = cache_fraction * cache_limit_; + while (siter != cache_states_.end()) { + StateId s = *siter; + S* state = VectorFstBaseImpl<S>::GetState(s); + if (cache_size_ > cache_target && state->ref_count == 0 && + (free_recent || !(state->flags & kCacheRecent)) && s != current) { + cache_size_ -= sizeof(S) + state->arcs.capacity() * sizeof(Arc); + allocator_->Free(state, s); + SetState(s, 0); + cache_states_.erase(siter++); + } else { + state->flags &= ~kCacheRecent; + ++siter; + } + } + if (!free_recent && cache_size_ > cache_target) { // recurses on recent + GC(current, true); + } else if (cache_target > 0) { // widens cache limit + while (cache_size_ > cache_target) { + cache_limit_ *= 2; + cache_target *= 2; + } + } else if (cache_size_ > 0) { + FSTERROR() << "CacheImpl:GC: Unable to free all cached states"; + } + VLOG(2) << "CacheImpl: Exit GC: object = " << Type() << "(" << this + << "), free recently cached = " << free_recent + << ", cache size = " << cache_size_ + << ", cache frac = " << cache_fraction + << ", cache limit = " << cache_limit_ << "\n"; +} + +template <class S, class C> const uint32 CacheBaseImpl<S, C>::kCacheFinal; +template <class S, class C> const uint32 CacheBaseImpl<S, C>::kCacheArcs; +template <class S, class C> const uint32 CacheBaseImpl<S, C>::kCacheRecent; +template <class S, class C> const uint32 CacheBaseImpl<S, C>::kCacheModified; +template <class S, class C> const size_t CacheBaseImpl<S, C>::kMinCacheLimit; + +// Arcs implemented by an STL vector per state. Similar to VectorState +// but adds flags and ref count to keep track of what has been cached. +template <class A> +struct CacheState { + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + + CacheState() : final(Weight::Zero()), flags(0), ref_count(0) {} + + void Reset() { + flags = 0; + ref_count = 0; + arcs.resize(0); + } + + Weight final; // Final weight + vector<A> arcs; // Arcs represenation + size_t niepsilons; // # of input epsilons + size_t noepsilons; // # of output epsilons + mutable uint32 flags; + mutable int ref_count; +}; + +// A CacheBaseImpl with a commonly used CacheState. +template <class A> +class CacheImpl : public CacheBaseImpl< CacheState<A> > { + public: + typedef CacheState<A> State; + + CacheImpl() {} + + explicit CacheImpl(const CacheOptions &opts) + : CacheBaseImpl< CacheState<A> >(opts) {} + + CacheImpl(const CacheImpl<A> &impl, bool preserve_cache = false) + : CacheBaseImpl<State>(impl, preserve_cache) {} + + private: + void operator=(const CacheImpl<State> &impl); // disallow +}; + + +// Use this to make a state iterator for a CacheBaseImpl-derived Fst, +// which must have type 'State' defined. Note this iterator only +// returns those states reachable from the initial state, so consider +// implementing a class-specific one. +template <class F> +class CacheStateIterator : public StateIteratorBase<typename F::Arc> { + public: + typedef typename F::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename F::State State; + typedef CacheBaseImpl<State> Impl; + + CacheStateIterator(const F &fst, Impl *impl) + : fst_(fst), impl_(impl), s_(0) { + fst_.Start(); // force start state + } + + bool Done() const { + if (s_ < impl_->NumKnownStates()) + return false; + if (s_ < impl_->NumKnownStates()) + return false; + for (StateId u = impl_->MinUnexpandedState(); + u < impl_->NumKnownStates(); + u = impl_->MinUnexpandedState()) { + // force state expansion + ArcIterator<F> aiter(fst_, u); + aiter.SetFlags(kArcValueFlags, kArcValueFlags | kArcNoCache); + for (; !aiter.Done(); aiter.Next()) + impl_->UpdateNumKnownStates(aiter.Value().nextstate); + impl_->ExpandedState(u); + if (s_ < impl_->NumKnownStates()) + return false; + } + return true; + } + + StateId Value() const { return s_; } + + void Next() { ++s_; } + + void Reset() { s_ = 0; } + + private: + // This allows base class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual bool Done_() const { return Done(); } + virtual StateId Value_() const { return Value(); } + virtual void Next_() { Next(); } + virtual void Reset_() { Reset(); } + + const F &fst_; + Impl *impl_; + StateId s_; +}; + + +// Use this to make an arc iterator for a CacheBaseImpl-derived Fst, +// which must have types 'Arc' and 'State' defined. +template <class F, + class C = DefaultCacheStateAllocator<CacheState<typename F::Arc> > > +class CacheArcIterator { + public: + typedef typename F::Arc Arc; + typedef typename F::State State; + typedef typename Arc::StateId StateId; + typedef CacheBaseImpl<State, C> Impl; + + CacheArcIterator(Impl *impl, StateId s) : i_(0) { + state_ = impl->ExtendState(s); + ++state_->ref_count; + } + + ~CacheArcIterator() { --state_->ref_count; } + + bool Done() const { return i_ >= state_->arcs.size(); } + + const Arc& Value() const { return state_->arcs[i_]; } + + void Next() { ++i_; } + + size_t Position() const { return i_; } + + void Reset() { i_ = 0; } + + void Seek(size_t a) { i_ = a; } + + uint32 Flags() const { + return kArcValueFlags; + } + + void SetFlags(uint32 flags, uint32 mask) {} + + private: + const State *state_; + size_t i_; + + DISALLOW_COPY_AND_ASSIGN(CacheArcIterator); +}; + +// Use this to make a mutable arc iterator for a CacheBaseImpl-derived Fst, +// which must have types 'Arc' and 'State' defined. +template <class F, + class C = DefaultCacheStateAllocator<CacheState<typename F::Arc> > > +class CacheMutableArcIterator + : public MutableArcIteratorBase<typename F::Arc> { + public: + typedef typename F::State State; + typedef typename F::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + typedef CacheBaseImpl<State, C> Impl; + + // You will need to call MutateCheck() in the constructor. + CacheMutableArcIterator(Impl *impl, StateId s) : i_(0), s_(s), impl_(impl) { + state_ = impl_->ExtendState(s_); + ++state_->ref_count; + }; + + ~CacheMutableArcIterator() { + --state_->ref_count; + } + + bool Done() const { return i_ >= state_->arcs.size(); } + + const Arc& Value() const { return state_->arcs[i_]; } + + void Next() { ++i_; } + + size_t Position() const { return i_; } + + void Reset() { i_ = 0; } + + void Seek(size_t a) { i_ = a; } + + void SetValue(const Arc& arc) { + state_->flags |= CacheBaseImpl<State, C>::kCacheModified; + uint64 properties = impl_->Properties(); + Arc& oarc = state_->arcs[i_]; + if (oarc.ilabel != oarc.olabel) + properties &= ~kNotAcceptor; + if (oarc.ilabel == 0) { + --state_->niepsilons; + properties &= ~kIEpsilons; + if (oarc.olabel == 0) + properties &= ~kEpsilons; + } + if (oarc.olabel == 0) { + --state_->noepsilons; + properties &= ~kOEpsilons; + } + if (oarc.weight != Weight::Zero() && oarc.weight != Weight::One()) + properties &= ~kWeighted; + oarc = arc; + if (arc.ilabel != arc.olabel) { + properties |= kNotAcceptor; + properties &= ~kAcceptor; + } + if (arc.ilabel == 0) { + ++state_->niepsilons; + properties |= kIEpsilons; + properties &= ~kNoIEpsilons; + if (arc.olabel == 0) { + properties |= kEpsilons; + properties &= ~kNoEpsilons; + } + } + if (arc.olabel == 0) { + ++state_->noepsilons; + properties |= kOEpsilons; + properties &= ~kNoOEpsilons; + } + if (arc.weight != Weight::Zero() && arc.weight != Weight::One()) { + properties |= kWeighted; + properties &= ~kUnweighted; + } + properties &= kSetArcProperties | kAcceptor | kNotAcceptor | + kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | + kOEpsilons | kNoOEpsilons | kWeighted | kUnweighted; + impl_->SetProperties(properties); + } + + uint32 Flags() const { + return kArcValueFlags; + } + + void SetFlags(uint32 f, uint32 m) {} + + private: + virtual bool Done_() const { return Done(); } + virtual const Arc& Value_() const { return Value(); } + virtual void Next_() { Next(); } + virtual size_t Position_() const { return Position(); } + virtual void Reset_() { Reset(); } + virtual void Seek_(size_t a) { Seek(a); } + virtual void SetValue_(const Arc &a) { SetValue(a); } + uint32 Flags_() const { return Flags(); } + void SetFlags_(uint32 f, uint32 m) { SetFlags(f, m); } + + size_t i_; + StateId s_; + Impl *impl_; + State *state_; + + DISALLOW_COPY_AND_ASSIGN(CacheMutableArcIterator); +}; + +} // namespace fst + +#endif // FST_LIB_CACHE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/closure.h b/kaldi_io/src/tools/openfst/include/fst/closure.h new file mode 100644 index 0000000..541562b --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/closure.h @@ -0,0 +1,155 @@ +// closure.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Functions and classes to compute the concatenative closure of an Fst. + +#ifndef FST_LIB_CLOSURE_H__ +#define FST_LIB_CLOSURE_H__ + +#include <vector> +using std::vector; +#include <algorithm> + +#include <fst/mutable-fst.h> +#include <fst/rational.h> + + +namespace fst { + +// Computes the concatenative closure. This version modifies its +// MutableFst input. If FST transduces string x to y with weight a, +// then the closure transduces x to y with weight a, xx to yy with +// weight Times(a, a), xxx to yyy with with Times(Times(a, a), a), +// etc. If closure_type == CLOSURE_STAR, then the empty string is +// transduced to itself with weight Weight::One() as well. +// +// Complexity: +// - Time: O(V) +// - Space: O(V) +// where V = # of states. +template<class Arc> +void Closure(MutableFst<Arc> *fst, ClosureType closure_type) { + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + uint64 props = fst->Properties(kFstProperties, false); + StateId start = fst->Start(); + for (StateIterator< MutableFst<Arc> > siter(*fst); + !siter.Done(); + siter.Next()) { + StateId s = siter.Value(); + Weight final = fst->Final(s); + if (final != Weight::Zero()) + fst->AddArc(s, Arc(0, 0, final, start)); + } + if (closure_type == CLOSURE_STAR) { + fst->ReserveStates(fst->NumStates() + 1); + StateId nstart = fst->AddState(); + fst->SetStart(nstart); + fst->SetFinal(nstart, Weight::One()); + if (start != kNoLabel) + fst->AddArc(nstart, Arc(0, 0, Weight::One(), start)); + } + fst->SetProperties(ClosureProperties(props, closure_type == CLOSURE_STAR), + kFstProperties); +} + +// Computes the concatenative closure. This version modifies its +// RationalFst input. +template<class Arc> +void Closure(RationalFst<Arc> *fst, ClosureType closure_type) { + fst->GetImpl()->AddClosure(closure_type); +} + + +struct ClosureFstOptions : RationalFstOptions { + ClosureType type; + + ClosureFstOptions(const RationalFstOptions &opts, ClosureType t) + : RationalFstOptions(opts), type(t) {} + explicit ClosureFstOptions(ClosureType t) : type(t) {} + ClosureFstOptions() : type(CLOSURE_STAR) {} +}; + + +// Computes the concatenative closure. This version is a delayed +// Fst. If FST transduces string x to y with weight a, then the +// closure transduces x to y with weight a, xx to yy with weight +// Times(a, a), xxx to yyy with weight Times(Times(a, a), a), etc. If +// closure_type == CLOSURE_STAR, then The empty string is transduced +// to itself with weight Weight::One() as well. +// +// Complexity: +// - Time: O(v) +// - Space: O(v) +// where v = # of states visited. Constant time and space to visit an +// input state or arc is assumed and exclusive of caching. +template <class A> +class ClosureFst : public RationalFst<A> { + public: + using ImplToFst< RationalFstImpl<A> >::GetImpl; + + typedef A Arc; + + ClosureFst(const Fst<A> &fst, ClosureType closure_type) { + GetImpl()->InitClosure(fst, closure_type); + } + + ClosureFst(const Fst<A> &fst, const ClosureFstOptions &opts) + : RationalFst<A>(opts) { + GetImpl()->InitClosure(fst, opts.type); + } + + // See Fst<>::Copy() for doc. + ClosureFst(const ClosureFst<A> &fst, bool safe = false) + : RationalFst<A>(fst, safe) {} + + // Get a copy of this ClosureFst. See Fst<>::Copy() for further doc. + virtual ClosureFst<A> *Copy(bool safe = false) const { + return new ClosureFst<A>(*this, safe); + } +}; + + +// Specialization for ClosureFst. +template <class A> +class StateIterator< ClosureFst<A> > : public StateIterator< RationalFst<A> > { + public: + explicit StateIterator(const ClosureFst<A> &fst) + : StateIterator< RationalFst<A> >(fst) {} +}; + + +// Specialization for ClosureFst. +template <class A> +class ArcIterator< ClosureFst<A> > : public ArcIterator< RationalFst<A> > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const ClosureFst<A> &fst, StateId s) + : ArcIterator< RationalFst<A> >(fst, s) {} +}; + + +// Useful alias when using StdArc. +typedef ClosureFst<StdArc> StdClosureFst; + +} // namespace fst + +#endif // FST_LIB_CLOSURE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/compact-fst.h b/kaldi_io/src/tools/openfst/include/fst/compact-fst.h new file mode 100644 index 0000000..6db3317 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/compact-fst.h @@ -0,0 +1,1438 @@ +// compact-fst.h + + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// +// \file +// FST Class for memory-efficient representation of common types of +// FSTs: linear automata, acceptors, unweighted FSTs, ... + +#ifndef FST_LIB_COMPACT_FST_H__ +#define FST_LIB_COMPACT_FST_H__ + +#include <iterator> +#include <utility> +using std::pair; using std::make_pair; +#include <vector> +using std::vector; + +#include <fst/cache.h> +#include <fst/expanded-fst.h> +#include <fst/fst-decl.h> // For optional argument declarations +#include <fst/mapped-file.h> +#include <fst/matcher.h> +#include <fst/test-properties.h> +#include <fst/util.h> + + +namespace fst { + +struct CompactFstOptions : public CacheOptions { + // CompactFst default caching behaviour is to do no caching. Most + // compactors are cheap and therefore we save memory by not doing + // caching. + CompactFstOptions() : CacheOptions(true, 0) {} + CompactFstOptions(const CacheOptions &opts) : CacheOptions(opts) {} +}; + +// Compactor Interface - class determinies how arcs and final weights +// are compacted and expanded. +// +// Final weights are treated as transitions to the superfinal state, +// i.e. ilabel = olabel = kNoLabel and nextstate = kNoStateId. +// +// There are two types of compactors: +// +// * Fixed out-degree compactors: 'compactor.Size()' returns a +// positive integer 's'. An FST can be compacted by this compactor +// only if each state has exactly 's' outgoing transitions (counting a +// non-Zero() final weight as a transition). A typical example is a +// compactor for string FSTs, i.e. 's == 1'. +// +// * Variable out-degree compactors: 'compactor.Size() == -1'. There +// are no out-degree restrictions for these compactors. +// +// +// class Compactor { +// public: +// // Element is the type of the compacted transitions. +// typedef ... Element; +// // Return the compacted representation of a transition 'arc' +// // at a state 's'. +// Element Compact(StateId s, const Arc &arc); +// // Return the transition at state 's' represented by the compacted +// // transition 'e'. +// Arc Expand(StateId s, const Element &e); +// // Return -1 for variable out-degree compactors, and the mandatory +// // out-degree otherwise. +// ssize_t Size(); +// // Test whether 'fst' can be compacted by this compactor. +// bool Compatible(const Fst<A> &fst); +// // Return the properties that are always true for an fst +// // compacted using this compactor +// uint64 Properties(); +// // Return a string identifying the type of compactor. +// static const string &Type(); +// // Write a compactor to a file. +// bool Write(ostream &strm); +// // Read a compactor from a file. +// static Compactor *Read(istream &strm); +// // Default constructor (optional, see comment below). +// Compactor(); +// }; +// +// The default constructor is only required for FST_REGISTER to work +// (i.e. enabling Convert() and the command-line utilities to work +// with this new compactor). However, a default constructor always +// needs to be specify for this code to compile, but one can have it +// simply raised an error when called: +// +// Compactor::Compactor() { +// FSTERROR() << "Compactor: no default constructor"; +// } + + +// Implementation data for Compact Fst, which can shared between otherwise +// independent copies. +// +// The implementation contains two arrays: 'states_' and 'compacts_'. +// +// For fixed out-degree compactors, the 'states_' array is unallocated. +// The 'compacts_' contains the compacted transitions. Its size is +// 'ncompacts_'. The outgoing transitions at a given state are stored +// consecutively. For a given state 's', its 'compactor.Size()' outgoing +// transitions (including superfinal transition when 's' is final), are +// stored in position ['s*compactor.Size()', '(s+1)*compactor_.Size()'). +// +// For variable out-degree compactors, the states_ array has size +// 'nstates_ + 1' and contains pointers to positions into 'compacts_'. +// For a given state 's', the compacted transitions of 's' are +// stored in positions [ 'states_[s]', 'states_[s + 1]' ) in 'compacts_'. +// By convention, 'states_[nstates_] == ncompacts_'. +// +// In both cases, the superfinal transitons (when 's' is final, i.e. +// 'Final(s) != Weight::Zero()') is stored first. +// +// The unsigned type U is used to represent indices into the compacts_ +// array. +template <class E, class U> +class CompactFstData { + public: + typedef E CompactElement; + typedef U Unsigned; + + CompactFstData() + : states_region_(0), + compacts_region_(0), + states_(0), + compacts_(0), + nstates_(0), + ncompacts_(0), + narcs_(0), + start_(kNoStateId), + error_(false) {} + + template <class A, class Compactor> + CompactFstData(const Fst<A> &fst, const Compactor &compactor); + + template <class Iterator, class Compactor> + CompactFstData(const Iterator &begin, const Iterator &end, + const Compactor &compactor); + + ~CompactFstData() { + if (states_region_ == NULL) { + delete [] states_; + } + delete states_region_; + if (compacts_region_ == NULL) { + delete [] compacts_; + } + delete compacts_region_; + } + + template <class Compactor> + static CompactFstData<E, U> *Read(istream &strm, + const FstReadOptions &opts, + const FstHeader &hdr, + const Compactor &compactor); + + bool Write(ostream &strm, const FstWriteOptions &opts) const; + + Unsigned States(ssize_t i) const { return states_[i]; } + const CompactElement &Compacts(size_t i) const { return compacts_[i]; } + size_t NumStates() const { return nstates_; } + size_t NumCompacts() const { return ncompacts_; } + size_t NumArcs() const { return narcs_; } + ssize_t Start() const { return start_; } + + int RefCount() const { return ref_count_.count(); } + int IncrRefCount() { return ref_count_.Incr(); } + int DecrRefCount() { return ref_count_.Decr(); } + + bool Error() const { return error_; } + + private: + MappedFile *states_region_; + MappedFile *compacts_region_; + Unsigned *states_; + CompactElement *compacts_; + size_t nstates_; + size_t ncompacts_; + size_t narcs_; + ssize_t start_; + RefCounter ref_count_; + bool error_; +}; + +template <class E, class U> +template <class A, class C> +CompactFstData<E, U>::CompactFstData(const Fst<A> &fst, const C &compactor) + : states_region_(0), + compacts_region_(0), + states_(0), + compacts_(0), + nstates_(0), + ncompacts_(0), + narcs_(0), + start_(kNoStateId), + error_(false) { + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + start_ = fst.Start(); + // Count # of states and arcs. + StateId nfinals = 0; + for (StateIterator< Fst<A> > siter(fst); + !siter.Done(); + siter.Next()) { + ++nstates_; + StateId s = siter.Value(); + for (ArcIterator< Fst<A> > aiter(fst, s); + !aiter.Done(); + aiter.Next()) + ++narcs_; + if (fst.Final(s) != Weight::Zero()) ++nfinals; + } + if (compactor.Size() == -1) { + states_ = new Unsigned[nstates_ + 1]; + ncompacts_ = narcs_ + nfinals; + compacts_ = new CompactElement[ncompacts_]; + states_[nstates_] = ncompacts_; + } else { + states_ = 0; + ncompacts_ = nstates_ * compactor.Size(); + if ((narcs_ + nfinals) != ncompacts_) { + FSTERROR() << "CompactFstData: compactor incompatible with fst"; + error_ = true; + return; + } + compacts_ = new CompactElement[ncompacts_]; + } + size_t pos = 0, fpos = 0; + for (StateId s = 0; s < nstates_; ++s) { + fpos = pos; + if (compactor.Size() == -1) + states_[s] = pos; + if (fst.Final(s) != Weight::Zero()) + compacts_[pos++] = compactor.Compact(s, A(kNoLabel, kNoLabel, + fst.Final(s), kNoStateId)); + for (ArcIterator< Fst<A> > aiter(fst, s); + !aiter.Done(); + aiter.Next()) { + compacts_[pos++] = compactor.Compact(s, aiter.Value()); + } + if ((compactor.Size() != -1) && ((pos - fpos) != compactor.Size())) { + FSTERROR() << "CompactFstData: compactor incompatible with fst"; + error_ = true; + return; + } + } + if (pos != ncompacts_) { + FSTERROR() << "CompactFstData: compactor incompatible with fst"; + error_ = true; + return; + } +} + +template <class E, class U> +template <class Iterator, class C> +CompactFstData<E, U>::CompactFstData(const Iterator &begin, + const Iterator &end, + const C &compactor) + : states_region_(0), + compacts_region_(0), + states_(0), + compacts_(0), + nstates_(0), + ncompacts_(0), + narcs_(0), + start_(kNoStateId), + error_(false) { + typedef typename C::Arc Arc; + typedef typename Arc::Weight Weight; + if (compactor.Size() != -1) { + ncompacts_ = distance(begin, end); + if (compactor.Size() == 1) { + // For strings, allow implicit final weight. + // Empty input is the empty string. + if (ncompacts_ == 0) { + ++ncompacts_; + } else { + Arc arc = compactor.Expand(ncompacts_ - 1, + *(begin + (ncompacts_ - 1))); + if (arc.ilabel != kNoLabel) + ++ncompacts_; + } + } + if (ncompacts_ % compactor.Size()) { + FSTERROR() << "CompactFstData: size of input container incompatible" + << " with compactor"; + error_ = true; + return; + } + if (ncompacts_ == 0) + return; + start_ = 0; + nstates_ = ncompacts_ / compactor.Size(); + compacts_ = new CompactElement[ncompacts_]; + size_t i = 0; + Iterator it = begin; + for(; it != end; ++it, ++i){ + compacts_[i] = *it; + if (compactor.Expand(i, *it).ilabel != kNoLabel) + ++narcs_; + } + if (i < ncompacts_) + compacts_[i] = compactor.Compact(i, Arc(kNoLabel, kNoLabel, + Weight::One(), kNoStateId)); + } else { + if (distance(begin, end) == 0) + return; + // Count # of states, arcs and compacts. + Iterator it = begin; + for(size_t i = 0; it != end; ++it, ++i) { + Arc arc = compactor.Expand(i, *it); + if (arc.ilabel != kNoLabel) { + ++narcs_; + ++ncompacts_; + } else { + ++nstates_; + if (arc.weight != Weight::Zero()) + ++ncompacts_; + } + } + start_ = 0; + compacts_ = new CompactElement[ncompacts_]; + states_ = new Unsigned[nstates_ + 1]; + states_[nstates_] = ncompacts_; + size_t i = 0, s = 0; + for(it = begin; it != end; ++it) { + Arc arc = compactor.Expand(i, *it); + if (arc.ilabel != kNoLabel) { + compacts_[i++] = *it; + } else { + states_[s++] = i; + if (arc.weight != Weight::Zero()) + compacts_[i++] = *it; + } + } + if ((s != nstates_) || (i != ncompacts_)) { + FSTERROR() << "CompactFstData: ill-formed input container"; + error_ = true; + return; + } + } +} + +template <class E, class U> +template <class C> +CompactFstData<E, U> *CompactFstData<E, U>::Read( + istream &strm, + const FstReadOptions &opts, + const FstHeader &hdr, + const C &compactor) { + CompactFstData<E, U> *data = new CompactFstData<E, U>(); + data->start_ = hdr.Start(); + data->nstates_ = hdr.NumStates(); + data->narcs_ = hdr.NumArcs(); + + if (compactor.Size() == -1) { + if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { + LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source; + delete data; + return 0; + } + size_t b = (data->nstates_ + 1) * sizeof(Unsigned); + data->states_region_ = MappedFile::Map(&strm, opts, b); + if (!strm || data->states_region_ == NULL) { + LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source; + delete data; + return 0; + } + data->states_ = static_cast<Unsigned *>( + data->states_region_->mutable_data()); + } else { + data->states_ = 0; + } + data->ncompacts_ = compactor.Size() == -1 + ? data->states_[data->nstates_] + : data->nstates_ * compactor.Size(); + if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { + LOG(ERROR) << "CompactFst::Read: Alignment failed: " << opts.source; + delete data; + return 0; + } + size_t b = data->ncompacts_ * sizeof(CompactElement); + data->compacts_region_ = MappedFile::Map(&strm, opts, b); + if (!strm || data->compacts_region_ == NULL) { + LOG(ERROR) << "CompactFst::Read: Read failed: " << opts.source; + delete data; + return 0; + } + data->compacts_ = static_cast<CompactElement *>( + data->compacts_region_->mutable_data()); + return data; +} + +template<class E, class U> +bool CompactFstData<E, U>::Write(ostream &strm, + const FstWriteOptions &opts) const { + if (states_) { + if (opts.align && !AlignOutput(strm)) { + LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; + return false; + } + strm.write(reinterpret_cast<char *>(states_), + (nstates_ + 1) * sizeof(Unsigned)); + } + if (opts.align && !AlignOutput(strm)) { + LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; + return false; + } + strm.write(reinterpret_cast<char *>(compacts_), + ncompacts_ * sizeof(CompactElement)); + + strm.flush(); + if (!strm) { + LOG(ERROR) << "CompactFst::Write: Write failed: " << opts.source; + return false; + } + return true; +} + +template <class A, class C, class U> class CompactFst; +template <class F, class G> void Cast(const F &, G *); + +// Implementation class for CompactFst, which contains CompactFstData +// and Fst cache. +template <class A, class C, class U> +class CompactFstImpl : public CacheImpl<A> { + public: + using FstImpl<A>::SetType; + using FstImpl<A>::SetProperties; + using FstImpl<A>::Properties; + using FstImpl<A>::SetInputSymbols; + using FstImpl<A>::SetOutputSymbols; + using FstImpl<A>::WriteHeader; + + using CacheImpl<A>::PushArc; + using CacheImpl<A>::HasArcs; + using CacheImpl<A>::HasFinal; + using CacheImpl<A>::HasStart; + using CacheImpl<A>::SetArcs; + using CacheImpl<A>::SetFinal; + using CacheImpl<A>::SetStart; + + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef C Compactor; + typedef typename C::Element CompactElement; + typedef U Unsigned; + + CompactFstImpl() + : CacheImpl<A>(CompactFstOptions()), + compactor_(0), + own_compactor_(false), + data_(0) { + string type = "compact"; + if (sizeof(U) != sizeof(uint32)) { + string size; + Int64ToStr(8 * sizeof(U), &size); + type += size; + } + type += "_"; + type += C::Type(); + SetType(type); + SetProperties(kNullProperties | kStaticProperties); + } + + CompactFstImpl(const Fst<Arc> &fst, const C &compactor, + const CompactFstOptions &opts) + : CacheImpl<A>(opts), + compactor_(new C(compactor)), + own_compactor_(true), + data_(0) { + Init(fst); + } + + CompactFstImpl(const Fst<Arc> &fst, C *compactor, + const CompactFstOptions &opts) + : CacheImpl<A>(opts), + compactor_(compactor), + own_compactor_(false), + data_(0) { + Init(fst); + } + + template <class Iterator> + CompactFstImpl(const Iterator &b, const Iterator &e, const C &compactor, + const CompactFstOptions &opts) + : CacheImpl<A>(opts), + compactor_(new C(compactor)), + own_compactor_(true), + data_(0) { + Init(b, e); + } + + template <class Iterator> + CompactFstImpl(const Iterator &b, const Iterator &e, C *compactor, + const CompactFstOptions &opts) + : CacheImpl<A>(opts), + compactor_(compactor), + own_compactor_(false), + data_(0) { + Init(b, e); + } + + CompactFstImpl(const CompactFstImpl<A, C, U> &impl) + : CacheImpl<A>(impl), + compactor_(new C(*impl.compactor_)), + own_compactor_(true), + data_(impl.data_) { + if (data_) + data_->IncrRefCount(); + SetType(impl.Type()); + SetProperties(impl.Properties()); + SetInputSymbols(impl.InputSymbols()); + SetOutputSymbols(impl.OutputSymbols()); + } + + ~CompactFstImpl(){ + if (own_compactor_) + delete compactor_; + if (data_ && !data_->DecrRefCount()) + delete data_; + } + + StateId Start() { + if (!HasStart()) { + SetStart(data_->Start()); + } + return CacheImpl<A>::Start(); + } + + Weight Final(StateId s) { + if (HasFinal(s)) + return CacheImpl<A>::Final(s); + Arc arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId); + if ((compactor_->Size() != -1) || + (data_->States(s) != data_->States(s + 1))) + arc = ComputeArc(s, + compactor_->Size() == -1 + ? data_->States(s) + : s * compactor_->Size()); + return arc.ilabel == kNoLabel ? arc.weight : Weight::Zero(); + } + + StateId NumStates() const { + if (Properties(kError)) return 0; + return data_->NumStates(); + } + + size_t NumArcs(StateId s) { + if (HasArcs(s)) + return CacheImpl<A>::NumArcs(s); + Unsigned i, num_arcs; + if (compactor_->Size() == -1) { + i = data_->States(s); + num_arcs = data_->States(s + 1) - i; + } else { + i = s * compactor_->Size(); + num_arcs = compactor_->Size(); + } + if (num_arcs > 0) { + const A &arc = ComputeArc(s, i, kArcILabelValue); + if (arc.ilabel == kNoStateId) { + --num_arcs; + } + } + return num_arcs; + } + + size_t NumInputEpsilons(StateId s) { + if (!HasArcs(s) && !Properties(kILabelSorted)) + Expand(s); + if (HasArcs(s)) + return CacheImpl<A>::NumInputEpsilons(s); + return CountEpsilons(s, false); + } + + size_t NumOutputEpsilons(StateId s) { + if (!HasArcs(s) && !Properties(kOLabelSorted)) + Expand(s); + if (HasArcs(s)) + return CacheImpl<A>::NumOutputEpsilons(s); + return CountEpsilons(s, true); + } + + size_t CountEpsilons(StateId s, bool output_epsilons) { + size_t begin = compactor_->Size() == -1 ? + data_->States(s) : s * compactor_->Size(); + size_t end = compactor_->Size() == -1 ? + data_->States(s + 1) : (s + 1) * compactor_->Size(); + size_t num_eps = 0; + for (size_t i = begin; i < end; ++i) { + const A &arc = ComputeArc( + s, i, output_epsilons ? kArcOLabelValue : kArcILabelValue); + const typename A::Label &label = + (output_epsilons ? arc.olabel : arc.ilabel); + if (label == kNoLabel) + continue; + else if (label > 0) + break; + ++num_eps; + } + return num_eps; + } + + static CompactFstImpl<A, C, U> *Read(istream &strm, + const FstReadOptions &opts) { + CompactFstImpl<A, C, U> *impl = new CompactFstImpl<A, C, U>(); + FstHeader hdr; + if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) { + delete impl; + return 0; + } + + // Ensures compatibility + if (hdr.Version() == kAlignedFileVersion) + hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED); + + impl->compactor_ = C::Read(strm); + if (!impl->compactor_) { + delete impl; + return 0; + } + impl->own_compactor_ = true; + impl->data_ = CompactFstData<CompactElement, U>::Read(strm, opts, hdr, + *impl->compactor_); + if (!impl->data_) { + delete impl; + return 0; + } + return impl; + } + + bool Write(ostream &strm, const FstWriteOptions &opts) const { + FstHeader hdr; + hdr.SetStart(data_->Start()); + hdr.SetNumStates(data_->NumStates()); + hdr.SetNumArcs(data_->NumArcs()); + + // Ensures compatibility + int file_version = opts.align ? kAlignedFileVersion : kFileVersion; + WriteHeader(strm, opts, file_version, &hdr); + compactor_->Write(strm); + return data_->Write(strm, opts); + } + + // Provide information needed for generic state iterator + void InitStateIterator(StateIteratorData<A> *data) const { + data->base = 0; + data->nstates = data_->NumStates(); + } + + void InitArcIterator(StateId s, ArcIteratorData<A> *data) { + if (!HasArcs(s)) + Expand(s); + CacheImpl<A>::InitArcIterator(s, data); + } + + Arc ComputeArc(StateId s, Unsigned i, uint32 f = kArcValueFlags) const { + return compactor_->Expand(s, data_->Compacts(i), f); + } + + void Expand(StateId s) { + size_t begin = compactor_->Size() == -1 ? + data_->States(s) : s * compactor_->Size(); + size_t end = compactor_->Size() == -1 ? + data_->States(s + 1) : (s + 1) * compactor_->Size(); + for (size_t i = begin; i < end; ++i) { + const Arc &arc = ComputeArc(s, i); + if (arc.ilabel == kNoLabel) + SetFinal(s, arc.weight); + else + PushArc(s, arc); + } + if (!HasFinal(s)) + SetFinal(s, Weight::Zero()); + SetArcs(s); + } + + template <class Iterator> + void SetCompactElements(const Iterator &b, const Iterator &e) { + if (data_ && !data_->DecrRefCount()) + delete data_; + data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_); + } + + C *GetCompactor() const { return compactor_; } + CompactFstData<CompactElement, U> *Data() const { return data_; } + + // Properties always true of this Fst class + static const uint64 kStaticProperties = kExpanded; + + protected: + template <class B, class D> + explicit CompactFstImpl(const CompactFstImpl<B, D, U> &impl) + : CacheImpl<A>(CacheOptions(impl.GetCacheGc(), impl.GetCacheLimit())), + compactor_(new C(*impl.GetCompactor())), + own_compactor_(true), + data_(impl.Data()) { + if (data_) + data_->IncrRefCount(); + SetType(impl.Type()); + SetProperties(impl.Properties()); + SetInputSymbols(impl.InputSymbols()); + SetOutputSymbols(impl.OutputSymbols()); + } + + private: + friend class CompactFst<A, C, U>; // allow access during write. + + void Init(const Fst<Arc> &fst) { + string type = "compact"; + if (sizeof(U) != sizeof(uint32)) { + string size; + Int64ToStr(8 * sizeof(U), &size); + type += size; + } + type += "_"; + type += compactor_->Type(); + SetType(type); + SetInputSymbols(fst.InputSymbols()); + SetOutputSymbols(fst.OutputSymbols()); + data_ = new CompactFstData<CompactElement, U>(fst, *compactor_); + if (data_->Error()) + SetProperties(kError, kError); + uint64 copy_properties = fst.Properties(kCopyProperties, true); + if ((copy_properties & kError) || !compactor_->Compatible(fst)) { + FSTERROR() << "CompactFstImpl: input fst incompatible with compactor"; + SetProperties(kError, kError); + return; + } + SetProperties(copy_properties | kStaticProperties); + } + + template <class Iterator> + void Init(const Iterator &b, const Iterator &e) { + string type = "compact"; + if (sizeof(U) != sizeof(uint32)) { + string size; + Int64ToStr(8 * sizeof(U), &size); + type += size; + } + type += "_"; + type += compactor_->Type(); + SetType(type); + SetProperties(kStaticProperties | compactor_->Properties()); + data_ = new CompactFstData<CompactElement, U>(b, e, *compactor_); + if (data_->Error()) + SetProperties(kError, kError); + } + + // Current unaligned file format version + static const int kFileVersion = 2; + // Current aligned file format version + static const int kAlignedFileVersion = 1; + // Minimum file format version supported + static const int kMinFileVersion = 1; + + C *compactor_; + bool own_compactor_; + CompactFstData<CompactElement, U> *data_; +}; + +template <class A, class C, class U> +const uint64 CompactFstImpl<A, C, U>::kStaticProperties; +template <class A, class C, class U> +const int CompactFstImpl<A, C, U>::kFileVersion; +template <class A, class C, class U> +const int CompactFstImpl<A, C, U>::kAlignedFileVersion; +template <class A, class C, class U> +const int CompactFstImpl<A, C, U>::kMinFileVersion; + + +// CompactFst. This class attaches interface to implementation and +// handles reference counting, delegating most methods to +// ImplToExpandedFst. The unsigned type U is used to represent indices +// into the compact arc array (uint32 by default, declared in +// fst-decl.h). +template <class A, class C, class U> +class CompactFst : public ImplToExpandedFst< CompactFstImpl<A, C, U> > { + public: + friend class StateIterator< CompactFst<A, C, U> >; + friend class ArcIterator< CompactFst<A, C, U> >; + template <class F, class G> void friend Cast(const F &, G *); + + typedef A Arc; + typedef typename A::StateId StateId; + typedef CompactFstImpl<A, C, U> Impl; + typedef CacheState<A> State; + typedef U Unsigned; + + CompactFst() : ImplToExpandedFst<Impl>(new Impl()) {} + + explicit CompactFst(const Fst<A> &fst, const C &compactor = C(), + const CompactFstOptions &opts = CompactFstOptions()) + : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {} + + CompactFst(const Fst<A> &fst, C *compactor, + const CompactFstOptions &opts = CompactFstOptions()) + : ImplToExpandedFst<Impl>(new Impl(fst, compactor, opts)) {} + + // The following 2 constructors take as input two iterators delimiting + // a set of (already) compacted transitions, starting with the + // transitions out of the initial state. The format of the input + // differs for fixed out-degree and variable out-degree compactors. + // + // - For fixed out-degree compactors, the final weight (encoded as a + // compacted transition) needs to be given only for final + // states. All strings (compactor of size 1) will be assume to be + // terminated by a final state even when the final state is not + // implicitely given. + // + // - For variable out-degree compactors, the final weight (encoded + // as a compacted transition) needs to be given for all states and + // must appeared first in the list (for state s, final weight of s, + // followed by outgoing transitons in s). + // + // These 2 constructors allows the direct construction of a CompactFst + // without first creating a more memory hungry 'regular' FST. This + // is useful when memory usage is severely constrained. + template <class Iterator> + explicit CompactFst(const Iterator &begin, const Iterator &end, + const C &compactor = C(), + const CompactFstOptions &opts = CompactFstOptions()) + : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {} + + template <class Iterator> + CompactFst(const Iterator &begin, const Iterator &end, + C *compactor, const CompactFstOptions &opts = CompactFstOptions()) + : ImplToExpandedFst<Impl>(new Impl(begin, end, compactor, opts)) {} + + // See Fst<>::Copy() for doc. + CompactFst(const CompactFst<A, C, U> &fst, bool safe = false) + : ImplToExpandedFst<Impl>(fst, safe) {} + + // Get a copy of this CompactFst. See Fst<>::Copy() for further doc. + virtual CompactFst<A, C, U> *Copy(bool safe = false) const { + return new CompactFst<A, C, U>(*this, safe); + } + + // Read a CompactFst from an input stream; return NULL on error + static CompactFst<A, C, U> *Read(istream &strm, const FstReadOptions &opts) { + Impl* impl = Impl::Read(strm, opts); + return impl ? new CompactFst<A, C, U>(impl) : 0; + } + + // Read a CompactFst from a file; return NULL on error + // Empty filename reads from standard input + static CompactFst<A, C, U> *Read(const string &filename) { + Impl* impl = ImplToExpandedFst<Impl>::Read(filename); + return impl ? new CompactFst<A, C, U>(impl) : 0; + } + + virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { + return GetImpl()->Write(strm, opts); + } + + virtual bool Write(const string &filename) const { + return Fst<A>::WriteFile(filename); + } + + template <class F> + static bool WriteFst(const F &fst, const C &compactor, ostream &strm, + const FstWriteOptions &opts); + + virtual void InitStateIterator(StateIteratorData<A> *data) const { + GetImpl()->InitStateIterator(data); + } + + virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { + GetImpl()->InitArcIterator(s, data); + } + + virtual MatcherBase<A> *InitMatcher(MatchType match_type) const { + return new SortedMatcher<CompactFst<A, C, U> >(*this, match_type); + } + + template <class Iterator> + void SetCompactElements(const Iterator &b, const Iterator &e) { + GetImpl()->SetCompactElements(b, e); + } + + private: + CompactFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {} + + // Makes visible to friends. + Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); } + + void SetImpl(Impl *impl, bool own_impl = false) { + ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl); + } + + // Use overloading to extract the type of the argument. + static Impl* GetImplIfCompactFst(const CompactFst<A, C, U> &compact_fst) { + return compact_fst.GetImpl(); + } + + // This does not give privileged treatment to subclasses of CompactFst. + template<typename NonCompactFst> + static Impl* GetImplIfCompactFst(const NonCompactFst& fst) { + return NULL; + } + + void operator=(const CompactFst<A, C, U> &fst); // disallow +}; + +// Writes Fst in Compact format, potentially with a pass over the machine +// before writing to compute the number of states and arcs. +// +template <class A, class C, class U> +template <class F> +bool CompactFst<A, C, U>::WriteFst(const F &fst, + const C &compactor, + ostream &strm, + const FstWriteOptions &opts) { + typedef U Unsigned; + typedef typename C::Element CompactElement; + typedef typename A::Weight Weight; + int file_version = opts.align ? + CompactFstImpl<A, C, U>::kAlignedFileVersion : + CompactFstImpl<A, C, U>::kFileVersion; + size_t num_arcs = -1, num_states = -1, num_compacts = -1; + C first_pass_compactor = compactor; + if (Impl* impl = GetImplIfCompactFst(fst)) { + num_arcs = impl->Data()->NumArcs(); + num_states = impl->Data()->NumStates(); + num_compacts = impl->Data()->NumCompacts(); + first_pass_compactor = *impl->GetCompactor(); + } else { + // A first pass is needed to compute the state of the compactor, which + // is saved ahead of the rest of the data structures. This unfortunately + // means forcing a complete double compaction when writing in this format. + // TODO(allauzen): eliminate mutable state from compactors. + num_arcs = 0; + num_states = 0; + for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { + const StateId s = siter.Value(); + ++num_states; + if (fst.Final(s) != Weight::Zero()) { + first_pass_compactor.Compact( + s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId)); + } + for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) { + ++num_arcs; + first_pass_compactor.Compact(s, aiter.Value()); + } + } + } + FstHeader hdr; + hdr.SetStart(fst.Start()); + hdr.SetNumStates(num_states); + hdr.SetNumArcs(num_arcs); + string type = "compact"; + if (sizeof(U) != sizeof(uint32)) { + string size; + Int64ToStr(8 * sizeof(U), &size); + type += size; + } + type += "_"; + type += C::Type(); + uint64 copy_properties = fst.Properties(kCopyProperties, true); + if ((copy_properties & kError) || !compactor.Compatible(fst)) { + LOG(ERROR) << "fst incompatible with compactor"; + return false; + } + uint64 properties = copy_properties | + CompactFstImpl<A, C, U>::kStaticProperties; + FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, properties, + &hdr); + first_pass_compactor.Write(strm); + if (first_pass_compactor.Size() == -1) { + if (opts.align && !AlignOutput(strm)) { + LOG(ERROR) << "CompactFst::Write: Alignment failed: " << opts.source; + return false; + } + Unsigned compacts = 0; + for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { + const StateId s = siter.Value(); + strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts)); + if (fst.Final(s) != Weight::Zero()) { + ++compacts; + } + compacts += fst.NumArcs(s); + } + strm.write(reinterpret_cast<const char *>(&compacts), sizeof(compacts)); + } + if (opts.align && !AlignOutput(strm)) { + LOG(ERROR) << "Could not align file during write after writing states"; + } + C second_pass_compactor = compactor; + CompactElement element; + for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { + const StateId s = siter.Value(); + if (fst.Final(s) != Weight::Zero()) { + element = second_pass_compactor.Compact( + s, A(kNoLabel, kNoLabel, fst.Final(s), kNoStateId)); + strm.write(reinterpret_cast<const char *>(&element), sizeof(element)); + } + for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) { + element = second_pass_compactor.Compact(s, aiter.Value()); + strm.write(reinterpret_cast<const char *>(&element), sizeof(element)); + } + } + strm.flush(); + if (!strm) { + LOG(ERROR) << "CompactFst write failed: " << opts.source; + return false; + } + return true; +} + + +// Specialization for CompactFst; see generic version in fst.h +// for sample usage (but use the CompactFst type!). This version +// should inline. +template <class A, class C, class U> +class StateIterator< CompactFst<A, C, U> > { + public: + typedef typename A::StateId StateId; + + explicit StateIterator(const CompactFst<A, C, U> &fst) + : nstates_(fst.GetImpl()->NumStates()), s_(0) {} + + bool Done() const { return s_ >= nstates_; } + + StateId Value() const { return s_; } + + void Next() { ++s_; } + + void Reset() { s_ = 0; } + + private: + StateId nstates_; + StateId s_; + + DISALLOW_COPY_AND_ASSIGN(StateIterator); +}; + +// Specialization for CompactFst. +// Never caches, always iterates over the underlying compact elements. +template <class A, class C, class U> +class ArcIterator< CompactFst<A, C, U> > { + public: + typedef typename A::StateId StateId; + typedef typename C::Element CompactElement; + + ArcIterator(const CompactFst<A, C, U> &fst, StateId s) + : compactor_(fst.GetImpl()->GetCompactor()), state_(s), compacts_(0), + pos_(0), flags_(kArcValueFlags) { + + const CompactFstData<CompactElement, U> *data = fst.GetImpl()->Data(); + size_t offset; + if (compactor_->Size() == -1) { // Variable out-degree compactor + offset = data->States(s); + num_arcs_ = data->States(s + 1) - offset; + } else { // Fixed out-degree compactor + offset = s * compactor_->Size(); + num_arcs_ = compactor_->Size(); + } + if (num_arcs_ > 0) { + compacts_ = &(data->Compacts(offset)); + arc_ = compactor_->Expand(s, *compacts_, kArcILabelValue); + if (arc_.ilabel == kNoStateId) { + ++compacts_; + --num_arcs_; + } + } + } + + ~ArcIterator() {} + + bool Done() const { return pos_ >= num_arcs_; } + + const A& Value() const { + arc_ = compactor_->Expand(state_, compacts_[pos_], flags_); + return arc_; + } + + void Next() { ++pos_; } + + size_t Position() const { return pos_; } + + void Reset() { pos_ = 0; } + + void Seek(size_t pos) { pos_ = pos; } + + uint32 Flags() const { return flags_; } + + void SetFlags(uint32 f, uint32 m) { + flags_ &= ~m; + flags_ |= (f & kArcValueFlags); + } + + private: + C *compactor_; + StateId state_; + const CompactElement *compacts_; + size_t pos_; + size_t num_arcs_; + mutable A arc_; + uint32 flags_; + + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + +// // Specialization for CompactFst. +// // This is an optionally caching arc iterator. +// // TODO(allauzen): implements the kArcValueFlags, the current +// // implementation only implements the kArcNoCache flag. +// template <class A, class C, class U> +// class ArcIterator< CompactFst<A, C, U> > { +// public: +// typedef typename A::StateId StateId; + +// ArcIterator(const CompactFst<A, C, U> &fst, StateId s) +// : fst_(fst), state_(s), pos_(0), num_arcs_(0), offset_(0), +// flags_(kArcValueFlags) { +// cache_data_.ref_count = 0; + +// if (fst_.GetImpl()->HasArcs(state_)) { +// fst_.GetImpl()->InitArcIterator(s, &cache_data_); +// num_arcs_ = cache_data_.narcs; +// return; +// } + +// const C *compactor = fst_.GetImpl()->GetCompactor(); +// const CompactFstData<A, C, U> *data = fst_.GetImpl()->Data(); +// if (compactor->Size() == -1) { // Variable out-degree compactor +// offset_ = data->States(s); +// num_arcs_ = data->States(s + 1) - offset_; +// } else { // Fixed out-degree compactor +// offset_ = s * compactor->Size(); +// num_arcs_ = compactor->Size(); +// } +// if (num_arcs_ > 0) { +// const A &arc = fst_.GetImpl()->ComputeArc(s, offset_); +// if (arc.ilabel == kNoStateId) { +// ++offset_; +// --num_arcs_; +// } +// } +// } + + +// ~ArcIterator() { +// if (cache_data_.ref_count) +// --(*cache_data_.ref_count); +// } + +// bool Done() const { return pos_ >= num_arcs_; } + +// const A& Value() const { +// if (cache_data_.ref_count == 0) { +// if (flags_ & kArcNoCache) { +// arc_ = fst_.GetImpl()->ComputeArc(state_, pos_ + offset_); +// return arc_; +// } else { +// fst_.GetImpl()->InitArcIterator(state_, &cache_data_); +// } +// } +// return cache_data_.arcs[pos_]; +// } + +// void Next() { ++pos_; } + +// size_t Position() const { return pos_; } + +// void Reset() { pos_ = 0; } + +// void Seek(size_t pos) { pos_ = pos; } + +// uint32 Flags() const { return flags_; } + +// void SetFlags(uint32 f, uint32 m) { +// flags_ &= ~m; +// flags_ |= f; + +// if (!(flags_ & kArcNoCache) && cache_data_.ref_count == 0) +// fst_.GetImpl()->InitArcIterator(state_, &cache_data_); +// } + +// private: +// mutable const CompactFst<A, C, U> &fst_; +// StateId state_; +// size_t pos_; +// size_t num_arcs_; +// size_t offset_; +// uint32 flags_; +// mutable A arc_; +// mutable ArcIteratorData<A> cache_data_; + +// DISALLOW_COPY_AND_ASSIGN(ArcIterator); +// }; + + +// +// Utility Compactors +// + +// Compactor for unweighted string FSTs +template <class A> +class StringCompactor { + public: + typedef A Arc; + typedef typename A::Label Element; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + Element Compact(StateId s, const A &arc) const { return arc.ilabel; } + + Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { + return Arc(p, p, Weight::One(), p != kNoLabel ? s + 1 : kNoStateId); + } + + ssize_t Size() const { return 1; } + + uint64 Properties() const { + return kString | kAcceptor | kUnweighted; + } + + bool Compatible(const Fst<A> &fst) const { + uint64 props = Properties(); + return fst.Properties(props, true) == props; + } + + static const string &Type() { + static const string type = "string"; + return type; + } + + bool Write(ostream &strm) const { return true; } + + static StringCompactor *Read(istream &strm) { + return new StringCompactor; + } +}; + + +// Compactor for weighted string FSTs +template <class A> +class WeightedStringCompactor { + public: + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + typedef pair<Label, Weight> Element; + + Element Compact(StateId s, const A &arc) const { + return make_pair(arc.ilabel, arc.weight); + } + + Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { + return Arc(p.first, p.first, p.second, + p.first != kNoLabel ? s + 1 : kNoStateId); + } + + ssize_t Size() const { return 1;} + + uint64 Properties() const { + return kString | kAcceptor; + } + + bool Compatible(const Fst<A> &fst) const { + uint64 props = Properties(); + return fst.Properties(props, true) == props; + } + + static const string &Type() { + static const string type = "weighted_string"; + return type; + } + + bool Write(ostream &strm) const { return true; } + + static WeightedStringCompactor *Read(istream &strm) { + return new WeightedStringCompactor; + } +}; + + +// Compactor for unweighted acceptor FSTs +template <class A> +class UnweightedAcceptorCompactor { + public: + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + typedef pair<Label, StateId> Element; + + Element Compact(StateId s, const A &arc) const { + return make_pair(arc.ilabel, arc.nextstate); + } + + Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { + return Arc(p.first, p.first, Weight::One(), p.second); + } + + ssize_t Size() const { return -1;} + + uint64 Properties() const { + return kAcceptor | kUnweighted; + } + + bool Compatible(const Fst<A> &fst) const { + uint64 props = Properties(); + return fst.Properties(props, true) == props; + } + + static const string &Type() { + static const string type = "unweighted_acceptor"; + return type; + } + + bool Write(ostream &strm) const { return true; } + + static UnweightedAcceptorCompactor *Read(istream &istrm) { + return new UnweightedAcceptorCompactor; + } +}; + + +// Compactor for weighted acceptor FSTs +template <class A> +class AcceptorCompactor { + public: + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + typedef pair< pair<Label, Weight>, StateId > Element; + + Element Compact(StateId s, const A &arc) const { + return make_pair(make_pair(arc.ilabel, arc.weight), arc.nextstate); + } + + Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { + return Arc(p.first.first, p.first.first, p.first.second, p.second); + } + + ssize_t Size() const { return -1;} + + uint64 Properties() const { + return kAcceptor; + } + + bool Compatible(const Fst<A> &fst) const { + uint64 props = Properties(); + return fst.Properties(props, true) == props; + } + + static const string &Type() { + static const string type = "acceptor"; + return type; + } + + bool Write(ostream &strm) const { return true; } + + static AcceptorCompactor *Read(istream &strm) { + return new AcceptorCompactor; + } +}; + + +// Compactor for unweighted FSTs +template <class A> +class UnweightedCompactor { + public: + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + typedef pair< pair<Label, Label>, StateId > Element; + + Element Compact(StateId s, const A &arc) const { + return make_pair(make_pair(arc.ilabel, arc.olabel), arc.nextstate); + } + + Arc Expand(StateId s, const Element &p, uint32 f = kArcValueFlags) const { + return Arc(p.first.first, p.first.second, Weight::One(), p.second); + } + + ssize_t Size() const { return -1; } + + uint64 Properties() const { + return kUnweighted; + } + + bool Compatible(const Fst<A> &fst) const { + uint64 props = Properties(); + return fst.Properties(props, true) == props; + } + + static const string &Type() { + static const string type = "unweighted"; + return type; + } + + bool Write(ostream &strm) const { return true; } + + static UnweightedCompactor *Read(istream &strm) { + return new UnweightedCompactor; + } +}; + + +// Uselful aliases when using StdArc +typedef CompactFst< StdArc, StringCompactor<StdArc> > +StdCompactStringFst; +typedef CompactFst< StdArc, WeightedStringCompactor<StdArc> > +StdCompactWeightedStringFst; +typedef CompactFst<StdArc, AcceptorCompactor<StdArc> > +StdCompactAcceptorFst; +typedef CompactFst<StdArc, UnweightedCompactor<StdArc> > +StdCompactUnweightedFst; +typedef CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > +StdCompactUnweightedAcceptorFst; + +} // namespace fst + +#endif // FST_LIB_COMPACT_FST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/compat.h b/kaldi_io/src/tools/openfst/include/fst/compat.h new file mode 100644 index 0000000..3b5275d --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/compat.h @@ -0,0 +1,131 @@ +// compat.h +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: [email protected] (Michael Riley) +// +// \file +// Google compatibility declarations and inline definitions. + +#ifndef FST_LIB_COMPAT_H__ +#define FST_LIB_COMPAT_H__ + +#include <dlfcn.h> + +#include <climits> +#include <cstdlib> +#include <cstring> +#include <iostream> +#include <string> +#include <vector> + +// Makes copy constructor and operator= private +#define DISALLOW_COPY_AND_ASSIGN(type) \ + type(const type&); \ + void operator=(const type&) + +#include <fst/config.h> +#include <fst/types.h> +#include <fst/lock.h> +#include <fst/flags.h> +#include <fst/log.h> +#include <fst/icu.h> + +using std::cin; +using std::cout; +using std::cerr; +using std::endl; +using std::string; + +void FailedNewHandler(); + +namespace fst { + +using namespace std; + +void SplitToVector(char *line, const char *delim, + std::vector<char *> *vec, bool omit_empty_strings); + +// Downcasting +template<typename To, typename From> +inline To down_cast(From* f) { + return static_cast<To>(f); +} + +// Bitcasting +template <class Dest, class Source> +inline Dest bit_cast(const Source& source) { + // Compile time assertion: sizeof(Dest) == sizeof(Source) + // A compile error here means your Dest and Source have different sizes. + typedef char VerifySizesAreEqual [sizeof(Dest) == sizeof(Source) ? 1 : + -1]; + Dest dest; + memcpy(&dest, &source, sizeof(dest)); + return dest; +} + +// Check sums +class CheckSummer { + public: + CheckSummer() : count_(0) { + check_sum_.resize(kCheckSumLength, '\0'); + } + + void Reset() { + count_ = 0; + for (int i = 0; i < kCheckSumLength; ++i) + check_sum_[i] = '\0'; + } + + void Update(void const *data, int size) { + const char *p = reinterpret_cast<const char *>(data); + for (int i = 0; i < size; ++i) + check_sum_[(count_++) % kCheckSumLength] ^= p[i]; + } + + void Update(string const &data) { + for (int i = 0; i < data.size(); ++i) + check_sum_[(count_++) % kCheckSumLength] ^= data[i]; + } + + string Digest() { + return check_sum_; + } + + private: + static const int kCheckSumLength = 32; + int count_; + string check_sum_; + + DISALLOW_COPY_AND_ASSIGN(CheckSummer); +}; + +} // namespace fst + + +// Define missing hash functions if needed +#ifndef HAVE_STD__TR1__HASH_LONG_LONG_UNSIGNED_ +namespace std { +namespace tr1 { + +template <class T> class hash; + +template<> struct hash<uint64> { + size_t operator()(uint64 x) const { return x; } +}; + +} +} +#endif // HAVE_STD__TR1__HASH_LONG_LONG_UNSIGNED_ + +#endif // FST_LIB_COMPAT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/complement.h b/kaldi_io/src/tools/openfst/include/fst/complement.h new file mode 100644 index 0000000..dacf396 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/complement.h @@ -0,0 +1,338 @@ +// complement.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Class to complement an Fst. + +#ifndef FST_LIB_COMPLEMENT_H__ +#define FST_LIB_COMPLEMENT_H__ + +#include <algorithm> +#include <string> +#include <vector> +using std::vector; + +#include <fst/fst.h> +#include <fst/test-properties.h> + + +namespace fst { + +template <class A> class ComplementFst; + +// Implementation of delayed ComplementFst. The algorithm used +// completes the (deterministic) FSA and then exchanges final and +// non-final states. Completion, i.e. ensuring that all labels can be +// read from every state, is accomplished by using RHO labels, which +// match all labels that are otherwise not found leaving a state. The +// first state in the output is reserved to be a new state that is the +// destination of all RHO labels. Each remaining output state s +// corresponds to input state s - 1. The first arc in the output at +// these states is the rho label, the remaining arcs correspond to the +// input arcs. +template <class A> +class ComplementFstImpl : public FstImpl<A> { + public: + using FstImpl<A>::SetType; + using FstImpl<A>::SetProperties; + using FstImpl<A>::SetInputSymbols; + using FstImpl<A>::SetOutputSymbols; + + friend class StateIterator< ComplementFst<A> >; + friend class ArcIterator< ComplementFst<A> >; + + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + + explicit ComplementFstImpl(const Fst<A> &fst) : fst_(fst.Copy()) { + SetType("complement"); + uint64 props = fst.Properties(kILabelSorted, false); + SetProperties(ComplementProperties(props), kCopyProperties); + SetInputSymbols(fst.InputSymbols()); + SetOutputSymbols(fst.OutputSymbols()); + } + + ComplementFstImpl(const ComplementFstImpl<A> &impl) + : fst_(impl.fst_->Copy()) { + SetType("complement"); + SetProperties(impl.Properties(), kCopyProperties); + SetInputSymbols(impl.InputSymbols()); + SetOutputSymbols(impl.OutputSymbols()); + } + + ~ComplementFstImpl() { delete fst_; } + + StateId Start() const { + if (Properties(kError)) + return kNoStateId; + + StateId start = fst_->Start(); + if (start != kNoStateId) + return start + 1; + else + return 0; + } + + // Exchange final and non-final states; make rho destination state final. + Weight Final(StateId s) const { + if (s == 0 || fst_->Final(s - 1) == Weight::Zero()) + return Weight::One(); + else + return Weight::Zero(); + } + + size_t NumArcs(StateId s) const { + if (s == 0) + return 1; + else + return fst_->NumArcs(s - 1) + 1; + } + + size_t NumInputEpsilons(StateId s) const { + return s == 0 ? 0 : fst_->NumInputEpsilons(s - 1); + } + + size_t NumOutputEpsilons(StateId s) const { + return s == 0 ? 0 : fst_->NumOutputEpsilons(s - 1); + } + + + uint64 Properties() const { return Properties(kFstProperties); } + + // Set error if found; return FST impl properties. + uint64 Properties(uint64 mask) const { + if ((mask & kError) && fst_->Properties(kError, false)) + SetProperties(kError, kError); + return FstImpl<Arc>::Properties(mask); + } + + + private: + const Fst<A> *fst_; + + void operator=(const ComplementFstImpl<A> &fst); // Disallow +}; + + +// Complements an automaton. This is a library-internal operation that +// introduces a (negative) 'rho' label; use Difference/DifferenceFst in +// user code, which will not see this label. This version is a delayed Fst. +// +// This class attaches interface to implementation and handles +// reference counting, delegating most methods to ImplToFst. +template <class A> +class ComplementFst : public ImplToFst< ComplementFstImpl<A> > { + public: + friend class StateIterator< ComplementFst<A> >; + friend class ArcIterator< ComplementFst<A> >; + + using ImplToFst< ComplementFstImpl<A> >::GetImpl; + + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef ComplementFstImpl<A> Impl; + + explicit ComplementFst(const Fst<A> &fst) + : ImplToFst<Impl>(new Impl(fst)) { + uint64 props = kUnweighted | kNoEpsilons | kIDeterministic | kAcceptor; + if (fst.Properties(props, true) != props) { + FSTERROR() << "ComplementFst: argument not an unweighted " + << "epsilon-free deterministic acceptor"; + GetImpl()->SetProperties(kError, kError); + } + } + + // See Fst<>::Copy() for doc. + ComplementFst(const ComplementFst<A> &fst, bool safe = false) + : ImplToFst<Impl>(fst, safe) {} + + // Get a copy of this ComplementFst. See Fst<>::Copy() for further doc. + virtual ComplementFst<A> *Copy(bool safe = false) const { + return new ComplementFst<A>(*this, safe); + } + + virtual inline void InitStateIterator(StateIteratorData<A> *data) const; + + virtual inline void InitArcIterator(StateId s, + ArcIteratorData<A> *data) const; + + // Label that represents the rho transition. + // We use a negative value, which is thus private to the library and + // which will preserve FST label sort order. + static const Label kRhoLabel = -2; + private: + // Makes visible to friends. + Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } + + void operator=(const ComplementFst<A> &fst); // disallow +}; + +template <class A> const typename A::Label ComplementFst<A>::kRhoLabel; + + +// Specialization for ComplementFst. +template <class A> +class StateIterator< ComplementFst<A> > : public StateIteratorBase<A> { + public: + typedef typename A::StateId StateId; + typedef typename A::Label Label; + + explicit StateIterator(const ComplementFst<A> &fst) + : siter_(*fst.GetImpl()->fst_), s_(0) { + } + + bool Done() const { return s_ > 0 && siter_.Done(); } + + StateId Value() const { return s_; } + + void Next() { + if (s_ != 0) + siter_.Next(); + ++s_; + } + + void Reset() { + siter_.Reset(); + s_ = 0; + } + + private: + // This allows base class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual bool Done_() const { return Done(); } + virtual StateId Value_() const { return Value(); } + virtual void Next_() { Next(); } + virtual void Reset_() { Reset(); } + + StateIterator< Fst<A> > siter_; + StateId s_; + + DISALLOW_COPY_AND_ASSIGN(StateIterator); +}; + + +// Specialization for ComplementFst. +template <class A> +class ArcIterator< ComplementFst<A> > : public ArcIteratorBase<A> { + public: + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + + ArcIterator(const ComplementFst<A> &fst, StateId s) + : aiter_(0), s_(s), pos_(0) { + if (s_ != 0) + aiter_ = new ArcIterator< Fst<A> >(*fst.GetImpl()->fst_, s - 1); + } + + virtual ~ArcIterator() { delete aiter_; } + + bool Done() const { + if (s_ != 0) + return pos_ > 0 && aiter_->Done(); + else + return pos_ > 0; + } + + // Adds the rho label to the rho destination state. + const A& Value() const { + if (pos_ == 0) { + arc_.ilabel = arc_.olabel = ComplementFst<A>::kRhoLabel; + arc_.weight = Weight::One(); + arc_.nextstate = 0; + } else { + arc_ = aiter_->Value(); + ++arc_.nextstate; + } + return arc_; + } + + void Next() { + if (s_ != 0 && pos_ > 0) + aiter_->Next(); + ++pos_; + } + + size_t Position() const { + return pos_; + } + + void Reset() { + if (s_ != 0) + aiter_->Reset(); + pos_ = 0; + } + + void Seek(size_t a) { + if (s_ != 0) { + if (a == 0) { + aiter_->Reset(); + } else { + aiter_->Seek(a - 1); + } + } + pos_ = a; + } + + uint32 Flags() const { + return kArcValueFlags; + } + + void SetFlags(uint32 f, uint32 m) {} + + private: + // This allows base class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual bool Done_() const { return Done(); } + virtual const A& Value_() const { return Value(); } + virtual void Next_() { Next(); } + virtual size_t Position_() const { return Position(); } + virtual void Reset_() { Reset(); } + virtual void Seek_(size_t a) { Seek(a); } + uint32 Flags_() const { return Flags(); } + void SetFlags_(uint32 f, uint32 m) { SetFlags(f, m); } + + ArcIterator< Fst<A> > *aiter_; + StateId s_; + size_t pos_; + mutable A arc_; + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + + +template <class A> inline void +ComplementFst<A>::InitStateIterator(StateIteratorData<A> *data) const { + data->base = new StateIterator< ComplementFst<A> >(*this); +} + +template <class A> inline void +ComplementFst<A>::InitArcIterator(StateId s, ArcIteratorData<A> *data) const { + data->base = new ArcIterator< ComplementFst<A> >(*this, s); +} + + +// Useful alias when using StdArc. +typedef ComplementFst<StdArc> StdComplementFst; + +} // namespace fst + +#endif // FST_LIB_COMPLEMENT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/compose-filter.h b/kaldi_io/src/tools/openfst/include/fst/compose-filter.h new file mode 100644 index 0000000..6bf7736 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/compose-filter.h @@ -0,0 +1,542 @@ +// compose-filter.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Classes for filtering the composition matches, e.g. for correct epsilon +// handling. + +#ifndef FST_LIB_COMPOSE_FILTER_H__ +#define FST_LIB_COMPOSE_FILTER_H__ + +#include <fst/fst.h> +#include <fst/fst-decl.h> // For optional argument declarations +#include <fst/matcher.h> + + +namespace fst { + + +// COMPOSITION FILTER STATE - this represents the state of +// the composition filter. It has the form: +// +// class FilterState { +// public: +// // Required constructors +// FilterState(); +// FilterState(const FilterState &f); +// // An invalid filter state. +// static const FilterState NoState(); +// // Maps state to integer for hashing. +// size_t Hash() const; +// // Equality of filter states. +// bool operator==(const FilterState &f) const; +// // Inequality of filter states. +// bool operator!=(const FilterState &f) const; +// // Assignment to filter states. +// FilterState& operator=(const FilterState& f); +// }; + + +// Filter state that is a signed integral type. +template <typename T> +class IntegerFilterState { + public: + IntegerFilterState() : state_(kNoStateId) {} + explicit IntegerFilterState(T s) : state_(s) {} + + static const IntegerFilterState NoState() { return IntegerFilterState(); } + + size_t Hash() const { return static_cast<size_t>(state_); } + + bool operator==(const IntegerFilterState &f) const { + return state_ == f.state_; + } + + bool operator!=(const IntegerFilterState &f) const { + return state_ != f.state_; + } + + T GetState() const { return state_; } + + void SetState(T state) { state_ = state; } + +private: + T state_; +}; + +typedef IntegerFilterState<signed char> CharFilterState; +typedef IntegerFilterState<short> ShortFilterState; +typedef IntegerFilterState<int> IntFilterState; + + +// Filter state that is a weight (class). +template <class W> +class WeightFilterState { + public: + WeightFilterState() : weight_(W::Zero()) {} + explicit WeightFilterState(W w) : weight_(w) {} + + static const WeightFilterState NoState() { return WeightFilterState(); } + + size_t Hash() const { return weight_.Hash(); } + + bool operator==(const WeightFilterState &f) const { + return weight_ == f.weight_; + } + + bool operator!=(const WeightFilterState &f) const { + return weight_ != f.weight_; + } + + W GetWeight() const { return weight_; } + + void SetWeight(W w) { weight_ = w; } + +private: + W weight_; +}; + + +// Filter state that is the combination of two filter states. +template <class F1, class F2> +class PairFilterState { + public: + PairFilterState() : f1_(F1::NoState()), f2_(F2::NoState()) {} + + PairFilterState(const F1 &f1, const F2 &f2) : f1_(f1), f2_(f2) {} + + static const PairFilterState NoState() { return PairFilterState(); } + + size_t Hash() const { + size_t h1 = f1_.Hash(); + size_t h2 = f2_.Hash(); + const int lshift = 5; + const int rshift = CHAR_BIT * sizeof(size_t) - 5; + return h1 << lshift ^ h1 >> rshift ^ h2; + } + + bool operator==(const PairFilterState &f) const { + return f1_ == f.f1_ && f2_ == f.f2_; + } + + bool operator!=(const PairFilterState &f) const { + return f1_ != f.f1_ || f2_ != f.f2_; + } + + const F1 &GetState1() const { return f1_; } + const F2 &GetState2() const { return f2_; } + + void SetState(const F1 &f1, const F2 &f2) { + f1_ = f1; + f2_ = f2; + } + +private: + F1 f1_; + F2 f2_; +}; + + +// COMPOSITION FILTERS - these determine which matches are allowed to +// proceed. The filter's state is represented by the type +// ComposeFilter::FilterState. The basic filters handle correct +// epsilon matching. Their interface is: +// +// template <class M1, class M2> +// class ComposeFilter { +// public: +// typedef typename M1::FST1 FST1; +// typedef typename M1::FST2 FST2; +// typedef typename FST1::Arc Arc; +// typedef ... FilterState; +// typedef ... Matcher1; +// typedef ... Matcher2; +// +// // Required constructors. +// ComposeFilter(const FST1 &fst1, const FST2 &fst2, +// // M1 *matcher1 = 0, M2 *matcher2 = 0); +// // If safe=true, the copy is thread-safe. See Fst<>::Copy() +// // for further doc. +// ComposeFilter(const ComposeFilter<M1, M2> &filter, +// // bool safe = false); +// // Return start state of filter. +// FilterState Start() const; +// // Specifies current composition state. +// void SetState(StateId s1, StateId s2, const FilterState &f); +// +// // Apply filter at current composition state to these transitions. +// // If an arc label to be matched is kNolabel, then that side +// // does not consume a symbol. Returns the new filter state or, +// // if disallowed, FilterState::NoState(). The filter is permitted to +// // modify its inputs, e.g. for optimizations. +// FilterState FilterArc(Arc *arc1, Arc *arc2) const; + +// // Apply filter at current composition state to these final weights +// // (cf. superfinal transitions). The filter may modify its inputs, +// // e.g. for optimizations. +// void FilterFinal(Weight *final1, Weight *final2) const; +// +// // Return resp matchers. Ownership stays with filter. These +// // methods allow the filter to access and possibly modify +// // the composition matchers (useful e.g. with lookahead). +// Matcher1 *GetMatcher1(); +// Matcher2 *GetMatcher2(); +// +// // This specifies how the filter affects the composition result +// // properties. It takes as argument the properties that would +// // apply with a trivial composition fitler. +// uint64 Properties(uint64 props) const; +// }; + +// This filter requires epsilons on FST1 to be read before epsilons on FST2. +template <class M1, class M2> +class SequenceComposeFilter { + public: + typedef typename M1::FST FST1; + typedef typename M2::FST FST2; + typedef typename FST1::Arc Arc; + typedef CharFilterState FilterState; + typedef M1 Matcher1; + typedef M2 Matcher2; + + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + SequenceComposeFilter(const FST1 &fst1, const FST2 &fst2, + M1 *matcher1 = 0, M2 *matcher2 = 0) + : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)), + matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)), + fst1_(matcher1_->GetFst()), + s1_(kNoStateId), + s2_(kNoStateId), + f_(kNoStateId) {} + + SequenceComposeFilter(const SequenceComposeFilter<M1, M2> &filter, + bool safe = false) + : matcher1_(filter.matcher1_->Copy(safe)), + matcher2_(filter.matcher2_->Copy(safe)), + fst1_(matcher1_->GetFst()), + s1_(kNoStateId), + s2_(kNoStateId), + f_(kNoStateId) {} + + ~SequenceComposeFilter() { + delete matcher1_; + delete matcher2_; + } + + FilterState Start() const { return FilterState(0); } + + void SetState(StateId s1, StateId s2, const FilterState &f) { + if (s1_ == s1 && s2_ == s2 && f == f_) + return; + s1_ = s1; + s2_ = s2; + f_ = f; + size_t na1 = internal::NumArcs(fst1_, s1); + size_t ne1 = internal::NumOutputEpsilons(fst1_, s1); + bool fin1 = internal::Final(fst1_, s1) != Weight::Zero(); + alleps1_ = na1 == ne1 && !fin1; + noeps1_ = ne1 == 0; + } + + FilterState FilterArc(Arc *arc1, Arc *arc2) const { + if (arc1->olabel == kNoLabel) + return alleps1_ ? FilterState::NoState() : + noeps1_ ? FilterState(0) : FilterState(1); + else if (arc2->ilabel == kNoLabel) + return f_ != FilterState(0) ? FilterState::NoState() : FilterState(0); + else + return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0); + } + + void FilterFinal(Weight *, Weight *) const {} + + // Return resp matchers. Ownership stays with filter. + Matcher1 *GetMatcher1() { return matcher1_; } + Matcher2 *GetMatcher2() { return matcher2_; } + + uint64 Properties(uint64 props) const { return props; } + + private: + Matcher1 *matcher1_; + Matcher2 *matcher2_; + const FST1 &fst1_; + StateId s1_; // Current fst1_ state; + StateId s2_; // Current fst2_ state; + FilterState f_; // Current filter state + bool alleps1_; // Only epsilons (and non-final) leaving s1_? + bool noeps1_; // No epsilons leaving s1_? + + void operator=(const SequenceComposeFilter<M1, M2> &); // disallow +}; + + +// This filter requires epsilons on FST2 to be read before epsilons on FST1. +template <class M1, class M2> +class AltSequenceComposeFilter { + public: + typedef typename M1::FST FST1; + typedef typename M2::FST FST2; + typedef typename FST1::Arc Arc; + typedef CharFilterState FilterState; + typedef M1 Matcher1; + typedef M2 Matcher2; + + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + AltSequenceComposeFilter(const FST1 &fst1, const FST2 &fst2, + M1 *matcher1 = 0, M2 *matcher2 = 0) + : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)), + matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)), + fst2_(matcher2_->GetFst()), + s1_(kNoStateId), + s2_(kNoStateId), + f_(kNoStateId) {} + + AltSequenceComposeFilter(const AltSequenceComposeFilter<M1, M2> &filter, + bool safe = false) + : matcher1_(filter.matcher1_->Copy(safe)), + matcher2_(filter.matcher2_->Copy(safe)), + fst2_(matcher2_->GetFst()), + s1_(kNoStateId), + s2_(kNoStateId), + f_(kNoStateId) {} + + ~AltSequenceComposeFilter() { + delete matcher1_; + delete matcher2_; + } + + FilterState Start() const { return FilterState(0); } + + void SetState(StateId s1, StateId s2, const FilterState &f) { + if (s1_ == s1 && s2_ == s2 && f == f_) + return; + s1_ = s1; + s2_ = s2; + f_ = f; + size_t na2 = internal::NumArcs(fst2_, s2); + size_t ne2 = internal::NumInputEpsilons(fst2_, s2); + bool fin2 = internal::Final(fst2_, s2) != Weight::Zero(); + alleps2_ = na2 == ne2 && !fin2; + noeps2_ = ne2 == 0; + } + + FilterState FilterArc(Arc *arc1, Arc *arc2) const { + if (arc2->ilabel == kNoLabel) + return alleps2_ ? FilterState::NoState() : + noeps2_ ? FilterState(0) : FilterState(1); + else if (arc1->olabel == kNoLabel) + return f_ == FilterState(1) ? FilterState::NoState() : FilterState(0); + else + return arc1->olabel == 0 ? FilterState::NoState() : FilterState(0); + } + + void FilterFinal(Weight *, Weight *) const {} + + // Return resp matchers. Ownership stays with filter. + Matcher1 *GetMatcher1() { return matcher1_; } + Matcher2 *GetMatcher2() { return matcher2_; } + + uint64 Properties(uint64 props) const { return props; } + + private: + Matcher1 *matcher1_; + Matcher2 *matcher2_; + const FST2 &fst2_; + StateId s1_; // Current fst1_ state; + StateId s2_; // Current fst2_ state; + FilterState f_; // Current filter state + bool alleps2_; // Only epsilons (and non-final) leaving s2_? + bool noeps2_; // No epsilons leaving s2_? + +void operator=(const AltSequenceComposeFilter<M1, M2> &); // disallow +}; + + +// This filter requires epsilons on FST1 to be matched with epsilons on FST2 +// whenever possible. +template <class M1, class M2> +class MatchComposeFilter { + public: + typedef typename M1::FST FST1; + typedef typename M2::FST FST2; + typedef typename FST1::Arc Arc; + typedef CharFilterState FilterState; + typedef M1 Matcher1; + typedef M2 Matcher2; + + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + MatchComposeFilter(const FST1 &fst1, const FST2 &fst2, + M1 *matcher1 = 0, M2 *matcher2 = 0) + : matcher1_(matcher1 ? matcher1 : new M1(fst1, MATCH_OUTPUT)), + matcher2_(matcher2 ? matcher2 : new M2(fst2, MATCH_INPUT)), + fst1_(matcher1_->GetFst()), + fst2_(matcher2_->GetFst()), + s1_(kNoStateId), + s2_(kNoStateId), + f_(kNoStateId) {} + + MatchComposeFilter(const MatchComposeFilter<M1, M2> &filter, + bool safe = false) + : matcher1_(filter.matcher1_->Copy(safe)), + matcher2_(filter.matcher2_->Copy(safe)), + fst1_(matcher1_->GetFst()), + fst2_(matcher2_->GetFst()), + s1_(kNoStateId), + s2_(kNoStateId), + f_(kNoStateId) {} + + ~MatchComposeFilter() { + delete matcher1_; + delete matcher2_; + } + + FilterState Start() const { return FilterState(0); } + + void SetState(StateId s1, StateId s2, const FilterState &f) { + if (s1_ == s1 && s2_ == s2 && f == f_) + return; + s1_ = s1; + s2_ = s2; + f_ = f; + size_t na1 = internal::NumArcs(fst1_, s1); + size_t ne1 = internal::NumOutputEpsilons(fst1_, s1); + bool f1 = internal::Final(fst1_, s1) != Weight::Zero(); + alleps1_ = na1 == ne1 && !f1; + noeps1_ = ne1 == 0; + size_t na2 = internal::NumArcs(fst2_, s2); + size_t ne2 = internal::NumInputEpsilons(fst2_, s2); + bool f2 = internal::Final(fst2_, s2) != Weight::Zero(); + alleps2_ = na2 == ne2 && !f2; + noeps2_ = ne2 == 0; + } + + FilterState FilterArc(Arc *arc1, Arc *arc2) const { + if (arc2->ilabel == kNoLabel) // Epsilon on Fst1 + return f_ == FilterState(0) ? + (noeps2_ ? FilterState(0) : + (alleps2_ ? FilterState::NoState(): FilterState(1))) : + (f_ == FilterState(1) ? FilterState(1) : FilterState::NoState()); + else if (arc1->olabel == kNoLabel) // Epsilon on Fst2 + return f_ == FilterState(0) ? + (noeps1_ ? FilterState(0) : + (alleps1_ ? FilterState::NoState() : FilterState(2))) : + (f_ == FilterState(2) ? FilterState(2) : FilterState::NoState()); + else if (arc1->olabel == 0) // Epsilon on both + return f_ == FilterState(0) ? FilterState(0) : FilterState::NoState(); + else // Both are non-epsilons + return FilterState(0); + } + + void FilterFinal(Weight *, Weight *) const {} + + // Return resp matchers. Ownership stays with filter. + Matcher1 *GetMatcher1() { return matcher1_; } + Matcher2 *GetMatcher2() { return matcher2_; } + + uint64 Properties(uint64 props) const { return props; } + + private: + Matcher1 *matcher1_; + Matcher2 *matcher2_; + const FST1 &fst1_; + const FST2 &fst2_; + StateId s1_; // Current fst1_ state; + StateId s2_; // Current fst2_ state; + FilterState f_; // Current filter state ID + bool alleps1_, alleps2_; // Only epsilons (and non-final) leaving s1, s2? + bool noeps1_, noeps2_; // No epsilons leaving s1, s2? + + void operator=(const MatchComposeFilter<M1, M2> &); // disallow +}; + + +// This filter works with the MultiEpsMatcher to determine if +// 'multi-epsilons' are preserved in the composition output +// (rather than rewritten as 0) and ensures correct properties. +template <class F> +class MultiEpsFilter { + public: + typedef typename F::FST1 FST1; + typedef typename F::FST2 FST2; + typedef typename F::Arc Arc; + typedef typename F::Matcher1 Matcher1; + typedef typename F::Matcher2 Matcher2; + typedef typename F::FilterState FilterState; + typedef MultiEpsFilter<F> Filter; + + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + MultiEpsFilter(const FST1 &fst1, const FST2 &fst2, + Matcher1 *matcher1 = 0, Matcher2 *matcher2 = 0, + bool keep_multi_eps = false) + : filter_(fst1, fst2, matcher1, matcher2), + keep_multi_eps_(keep_multi_eps) {} + + MultiEpsFilter(const Filter &filter, bool safe = false) + : filter_(filter.filter_, safe), + keep_multi_eps_(filter.keep_multi_eps_) {} + + FilterState Start() const { return filter_.Start(); } + + void SetState(StateId s1, StateId s2, const FilterState &f) { + return filter_.SetState(s1, s2, f); + } + + FilterState FilterArc(Arc *arc1, Arc *arc2) const { + FilterState f = filter_.FilterArc(arc1, arc2); + if (keep_multi_eps_) { + if (arc1->olabel == kNoLabel) + arc1->ilabel = arc2->ilabel; + if (arc2->ilabel == kNoLabel) + arc2->olabel = arc1->olabel; + } + return f; + } + + void FilterFinal(Weight *w1, Weight *w2) const { + return filter_.FilterFinal(w1, w2); + } + + // Return resp matchers. Ownership stays with filter. + Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); } + Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); } + + uint64 Properties(uint64 iprops) const { + uint64 oprops = filter_.Properties(iprops); + return oprops & kILabelInvariantProperties & kOLabelInvariantProperties; + } + + private: + F filter_; + bool keep_multi_eps_; +}; + +} // namespace fst + + +#endif // FST_LIB_COMPOSE_FILTER_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/compose.h b/kaldi_io/src/tools/openfst/include/fst/compose.h new file mode 100644 index 0000000..db5ea3a --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/compose.h @@ -0,0 +1,728 @@ +// compose.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Class to compute the composition of two FSTs + +#ifndef FST_LIB_COMPOSE_H__ +#define FST_LIB_COMPOSE_H__ + +#include <algorithm> +#include <string> +#include <vector> +using std::vector; + +#include <fst/cache.h> +#include <fst/compose-filter.h> +#include <fst/lookahead-filter.h> +#include <fst/matcher.h> +#include <fst/state-table.h> +#include <fst/test-properties.h> + + +namespace fst { + +// Delayed composition options templated on the arc type, the matcher, +// the composition filter, and the composition state table. By +// default, the matchers, filter, and state table are constructed by +// composition. If set below, the user can instead pass in these +// objects; in that case, ComposeFst takes their ownership. This +// version controls composition implemented between generic Fst<Arc> +// types and a shared matcher type M for Fst<Arc>. This should be +// adequate for most applications, giving a reasonable tradeoff +// between efficiency and code sharing (but see ComposeFstImplOptions). +template <class A, + class M = Matcher<Fst<A> >, + class F = SequenceComposeFilter<M>, + class T = GenericComposeStateTable<A, typename F::FilterState> > +struct ComposeFstOptions : public CacheOptions { + M *matcher1; // FST1 matcher (see matcher.h) + M *matcher2; // FST2 matcher + F *filter; // Composition filter (see compose-filter.h) + T *state_table; // Composition state table (see compose-state-table.h) + + explicit ComposeFstOptions(const CacheOptions &opts, + M *mat1 = 0, M *mat2 = 0, + F *filt = 0, T *sttable= 0) + : CacheOptions(opts), matcher1(mat1), matcher2(mat2), + filter(filt), state_table(sttable) {} + + ComposeFstOptions() : matcher1(0), matcher2(0), filter(0), state_table(0) {} +}; + + +// Delayed composition options templated on the two matcher types, the +// composition filter, and the composition state table. By default, +// the matchers, filter, and state table are constructed by +// composition. If set below, the user can instead pass in these +// objects; in that case, ComposeFst takes their ownership. This +// version controls composition implemented using arbitrary matchers +// (of the same Arc type but otherwise arbitrary Fst type). The user +// must ensure the matchers are compatible. These options permit the +// most efficient use, but shares the least code. This is for advanced +// use only in the most demanding or specialized applications that can +// benefit from it (o.w. prefer ComposeFstOptions). +template <class M1, class M2, + class F = SequenceComposeFilter<M1, M2>, + class T = GenericComposeStateTable<typename M1::Arc, + typename F::FilterState> > +struct ComposeFstImplOptions : public CacheOptions { + M1 *matcher1; // FST1 matcher (see matcher.h) + M2 *matcher2; // FST2 matcher + F *filter; // Composition filter (see compose-filter.h) + T *state_table; // Composition state table (see compose-state-table.h) + + explicit ComposeFstImplOptions(const CacheOptions &opts, + M1 *mat1 = 0, M2 *mat2 = 0, + F *filt = 0, T *sttable= 0) + : CacheOptions(opts), matcher1(mat1), matcher2(mat2), + filter(filt), state_table(sttable) {} + + ComposeFstImplOptions() + : matcher1(0), matcher2(0), filter(0), state_table(0) {} +}; + + +// Implementation of delayed composition. This base class is +// common to the variants with different matchers, composition filters +// and state tables. +template <class A> +class ComposeFstImplBase : public CacheImpl<A> { + public: + using FstImpl<A>::SetType; + using FstImpl<A>::SetProperties; + using FstImpl<A>::Properties; + using FstImpl<A>::SetInputSymbols; + using FstImpl<A>::SetOutputSymbols; + + using CacheBaseImpl< CacheState<A> >::HasStart; + using CacheBaseImpl< CacheState<A> >::HasFinal; + using CacheBaseImpl< CacheState<A> >::HasArcs; + using CacheBaseImpl< CacheState<A> >::SetFinal; + using CacheBaseImpl< CacheState<A> >::SetStart; + + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef CacheState<A> State; + + ComposeFstImplBase(const Fst<A> &fst1, const Fst<A> &fst2, + const CacheOptions &opts) + : CacheImpl<A>(opts) { + VLOG(2) << "ComposeFst(" << this << "): Begin"; + SetType("compose"); + + if (!CompatSymbols(fst2.InputSymbols(), fst1.OutputSymbols())) { + FSTERROR() << "ComposeFst: output symbol table of 1st argument " + << "does not match input symbol table of 2nd argument"; + SetProperties(kError, kError); + } + + SetInputSymbols(fst1.InputSymbols()); + SetOutputSymbols(fst2.OutputSymbols()); + } + + ComposeFstImplBase(const ComposeFstImplBase<A> &impl) + : CacheImpl<A>(impl, true) { + SetProperties(impl.Properties(), kCopyProperties); + SetInputSymbols(impl.InputSymbols()); + SetOutputSymbols(impl.OutputSymbols()); + } + + virtual ComposeFstImplBase<A> *Copy() = 0; + + virtual ~ComposeFstImplBase() {} + + StateId Start() { + if (!HasStart()) { + StateId start = ComputeStart(); + if (start != kNoStateId) { + SetStart(start); + } + } + return CacheImpl<A>::Start(); + } + + Weight Final(StateId s) { + if (!HasFinal(s)) { + Weight final = ComputeFinal(s); + SetFinal(s, final); + } + return CacheImpl<A>::Final(s); + } + + virtual void Expand(StateId s) = 0; + + size_t NumArcs(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<A>::NumArcs(s); + } + + size_t NumInputEpsilons(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<A>::NumInputEpsilons(s); + } + + size_t NumOutputEpsilons(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<A>::NumOutputEpsilons(s); + } + + void InitArcIterator(StateId s, ArcIteratorData<A> *data) { + if (!HasArcs(s)) + Expand(s); + CacheImpl<A>::InitArcIterator(s, data); + } + + protected: + virtual StateId ComputeStart() = 0; + virtual Weight ComputeFinal(StateId s) = 0; +}; + + +// Implementaion of delayed composition templated on the matchers (see +// matcher.h), composition filter (see compose-filter-inl.h) and +// the composition state table (see compose-state-table.h). +template <class M1, class M2, class F, class T> +class ComposeFstImpl : public ComposeFstImplBase<typename M1::Arc> { + typedef typename M1::FST FST1; + typedef typename M2::FST FST2; + typedef typename M1::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + typedef typename F::FilterState FilterState; + typedef typename F::Matcher1 Matcher1; + typedef typename F::Matcher2 Matcher2; + + using CacheBaseImpl<CacheState<Arc> >::SetArcs; + using FstImpl<Arc>::SetType; + using FstImpl<Arc>::SetProperties; + + typedef ComposeStateTuple<StateId, FilterState> StateTuple; + + public: + ComposeFstImpl(const FST1 &fst1, const FST2 &fst2, + const ComposeFstImplOptions<M1, M2, F, T> &opts); + + ComposeFstImpl(const ComposeFstImpl<M1, M2, F, T> &impl) + : ComposeFstImplBase<Arc>(impl), + filter_(new F(*impl.filter_, true)), + matcher1_(filter_->GetMatcher1()), + matcher2_(filter_->GetMatcher2()), + fst1_(matcher1_->GetFst()), + fst2_(matcher2_->GetFst()), + state_table_(new T(*impl.state_table_)), + match_type_(impl.match_type_) {} + + ~ComposeFstImpl() { + VLOG(2) << "ComposeFst(" << this + << "): End: # of visited states: " << state_table_->Size(); + + delete filter_; + delete state_table_; + } + + virtual ComposeFstImpl<M1, M2, F, T> *Copy() { + return new ComposeFstImpl<M1, M2, F, T>(*this); + } + + uint64 Properties() const { return Properties(kFstProperties); } + + // Set error if found; return FST impl properties. + uint64 Properties(uint64 mask) const { + if ((mask & kError) && + (fst1_.Properties(kError, false) || + fst2_.Properties(kError, false) || + (matcher1_->Properties(0) & kError) || + (matcher2_->Properties(0) & kError) | + (filter_->Properties(0) & kError) || + state_table_->Error())) { + SetProperties(kError, kError); + } + return FstImpl<Arc>::Properties(mask); + } + + // Arranges it so that the first arg to OrderedExpand is the Fst + // that will be matched on. + void Expand(StateId s) { + const StateTuple &tuple = state_table_->Tuple(s); + StateId s1 = tuple.state_id1; + StateId s2 = tuple.state_id2; + filter_->SetState(s1, s2, tuple.filter_state); + if (match_type_ == MATCH_OUTPUT || + (match_type_ == MATCH_BOTH && + internal::NumArcs(fst1_, s1) > internal::NumArcs(fst2_, s2))) + OrderedExpand(s, fst1_, s1, fst2_, s2, matcher1_, false); + else + OrderedExpand(s, fst2_, s2, fst1_, s1, matcher2_, true); + } + + const FST1 &GetFst1() { return fst1_; } + const FST2 &GetFst2() { return fst2_; } + M1 *GetMatcher1() { return matcher1_; } + M2 *GetMatcher2() { return matcher2_; } + F *GetFilter() { return filter_; } + T *GetStateTable() { return state_table_; } + + private: + // This does that actual matching of labels in the composition. The + // arguments are ordered so matching is called on state 'sa' of + // 'fsta' for each arc leaving state 'sb' of 'fstb'. The 'match_input' arg + // determines whether the input or output label of arcs at 'sb' is + // the one to match on. + template <class FST, class Matcher> + void OrderedExpand(StateId s, const Fst<Arc> &, StateId sa, + const FST &fstb, StateId sb, + Matcher *matchera, bool match_input) { + matchera->SetState(sa); + + // First process non-consuming symbols (e.g., epsilons) on FSTA. + Arc loop(match_input ? 0 : kNoLabel, match_input ? kNoLabel : 0, + Weight::One(), sb); + MatchArc(s, matchera, loop, match_input); + + // Then process matches on FSTB. + for (ArcIterator<FST> iterb(fstb, sb); !iterb.Done(); iterb.Next()) + MatchArc(s, matchera, iterb.Value(), match_input); + + SetArcs(s); + } + + // Matches a single transition from 'fstb' against 'fata' at 's'. + template <class Matcher> + void MatchArc(StateId s, Matcher *matchera, + const Arc &arc, bool match_input) { + if (matchera->Find(match_input ? arc.olabel : arc.ilabel)) { + for (; !matchera->Done(); matchera->Next()) { + Arc arca = matchera->Value(); + Arc arcb = arc; + if (match_input) { + const FilterState &f = filter_->FilterArc(&arcb, &arca); + if (f != FilterState::NoState()) + AddArc(s, arcb, arca, f); + } else { + const FilterState &f = filter_->FilterArc(&arca, &arcb); + if (f != FilterState::NoState()) + AddArc(s, arca, arcb, f); + } + } + } + } + + // Add a matching transition at 's'. + void AddArc(StateId s, const Arc &arc1, const Arc &arc2, + const FilterState &f) { + StateTuple tuple(arc1.nextstate, arc2.nextstate, f); + Arc oarc(arc1.ilabel, arc2.olabel, Times(arc1.weight, arc2.weight), + state_table_->FindState(tuple)); + CacheImpl<Arc>::PushArc(s, oarc); + } + + StateId ComputeStart() { + StateId s1 = fst1_.Start(); + if (s1 == kNoStateId) + return kNoStateId; + + StateId s2 = fst2_.Start(); + if (s2 == kNoStateId) + return kNoStateId; + + const FilterState &f = filter_->Start(); + StateTuple tuple(s1, s2, f); + return state_table_->FindState(tuple); + } + + Weight ComputeFinal(StateId s) { + const StateTuple &tuple = state_table_->Tuple(s); + StateId s1 = tuple.state_id1; + Weight final1 = internal::Final(fst1_, s1); + if (final1 == Weight::Zero()) + return final1; + + StateId s2 = tuple.state_id2; + Weight final2 = internal::Final(fst2_, s2); + if (final2 == Weight::Zero()) + return final2; + + filter_->SetState(s1, s2, tuple.filter_state); + filter_->FilterFinal(&final1, &final2); + return Times(final1, final2); + } + + // Identifies and verifies the capabilities of the matcher to be used for + // composition. + void SetMatchType(); + + F *filter_; + Matcher1 *matcher1_; + Matcher2 *matcher2_; + const FST1 &fst1_; + const FST2 &fst2_; + T *state_table_; + + MatchType match_type_; + + void operator=(const ComposeFstImpl<M1, M2, F, T> &); // disallow +}; + +template <class M1, class M2, class F, class T> inline +ComposeFstImpl<M1, M2, F, T>::ComposeFstImpl( + const FST1 &fst1, const FST2 &fst2, + const ComposeFstImplOptions<M1, M2, F, T> &opts) + : ComposeFstImplBase<Arc>(fst1, fst2, opts), + filter_(opts.filter ? opts.filter : + new F(fst1, fst2, opts.matcher1, opts.matcher2)), + matcher1_(filter_->GetMatcher1()), + matcher2_(filter_->GetMatcher2()), + fst1_(matcher1_->GetFst()), + fst2_(matcher2_->GetFst()), + state_table_(opts.state_table ? opts.state_table : + new T(fst1_, fst2_)) { + SetMatchType(); + if (match_type_ == MATCH_NONE) + SetProperties(kError, kError); + VLOG(2) << "ComposeFst(" << this << "): Match type: " + << (match_type_ == MATCH_OUTPUT ? "output" : + (match_type_ == MATCH_INPUT ? "input" : + (match_type_ == MATCH_BOTH ? "both" : + (match_type_ == MATCH_NONE ? "none" : "unknown")))); + + uint64 fprops1 = fst1.Properties(kFstProperties, false); + uint64 fprops2 = fst2.Properties(kFstProperties, false); + uint64 mprops1 = matcher1_->Properties(fprops1); + uint64 mprops2 = matcher2_->Properties(fprops2); + uint64 cprops = ComposeProperties(mprops1, mprops2); + SetProperties(filter_->Properties(cprops), kCopyProperties); + if (state_table_->Error()) SetProperties(kError, kError); + VLOG(2) << "ComposeFst(" << this << "): Initialized"; +} + +template <class M1, class M2, class F, class T> +void ComposeFstImpl<M1, M2, F, T>::SetMatchType() { + MatchType type1 = matcher1_->Type(false); + MatchType type2 = matcher2_->Type(false); + uint32 flags1 = matcher1_->Flags(); + uint32 flags2 = matcher2_->Flags(); + if (flags1 & flags2 & kRequireMatch) { + FSTERROR() << "ComposeFst: only one argument can require matching."; + match_type_ = MATCH_NONE; + } else if (flags1 & kRequireMatch) { + if (matcher1_->Type(true) != MATCH_OUTPUT) { + FSTERROR() << "ComposeFst: 1st argument requires matching but cannot."; + match_type_ = MATCH_NONE; + } + match_type_ = MATCH_OUTPUT; + } else if (flags2 & kRequireMatch) { + if (matcher2_->Type(true) != MATCH_INPUT) { + FSTERROR() << "ComposeFst: 2nd argument requires matching but cannot."; + match_type_ = MATCH_NONE; + } + match_type_ = MATCH_INPUT; + } else if (flags1 & flags2 & kPreferMatch && + type1 == MATCH_OUTPUT && type2 == MATCH_INPUT) { + match_type_ = MATCH_BOTH; + } else if (flags1 & kPreferMatch && type1 == MATCH_OUTPUT) { + match_type_ = MATCH_OUTPUT; + } else if (flags2 & kPreferMatch && type2 == MATCH_INPUT) { + match_type_ = MATCH_INPUT; + } else if (type1 == MATCH_OUTPUT && type2 == MATCH_INPUT) { + match_type_ = MATCH_BOTH; + } else if (type1 == MATCH_OUTPUT) { + match_type_ = MATCH_OUTPUT; + } else if (type2 == MATCH_INPUT) { + match_type_ = MATCH_INPUT; + } else if (flags1 & kPreferMatch && matcher1_->Type(true) == MATCH_OUTPUT) { + match_type_ = MATCH_OUTPUT; + } else if (flags2 & kPreferMatch && matcher2_->Type(true) == MATCH_INPUT) { + match_type_ = MATCH_INPUT; + } else if (matcher1_->Type(true) == MATCH_OUTPUT) { + match_type_ = MATCH_OUTPUT; + } else if (matcher2_->Type(true) == MATCH_INPUT) { + match_type_ = MATCH_INPUT; + } else { + FSTERROR() << "ComposeFst: 1st argument cannot match on output labels " + << "and 2nd argument cannot match on input labels (sort?)."; + match_type_ = MATCH_NONE; + } +} + + +// Computes the composition of two transducers. This version is a +// delayed Fst. If FST1 transduces string x to y with weight a and FST2 +// transduces y to z with weight b, then their composition transduces +// string x to z with weight Times(x, z). +// +// The output labels of the first transducer or the input labels of +// the second transducer must be sorted (with the default matcher). +// The weights need to form a commutative semiring (valid for +// TropicalWeight and LogWeight). +// +// Complexity: +// Assuming the first FST is unsorted and the second is sorted: +// - Time: O(v1 v2 d1 (log d2 + m2)), +// - Space: O(v1 v2) +// where vi = # of states visited, di = maximum out-degree, and mi the +// maximum multiplicity of the states visited for the ith +// FST. Constant time and space to visit an input state or arc is +// assumed and exclusive of caching. +// +// Caveats: +// - ComposeFst does not trim its output (since it is a delayed operation). +// - The efficiency of composition can be strongly affected by several factors: +// - the choice of which tnansducer is sorted - prefer sorting the FST +// that has the greater average out-degree. +// - the amount of non-determinism +// - the presence and location of epsilon transitions - avoid epsilon +// transitions on the output side of the first transducer or +// the input side of the second transducer or prefer placing +// them later in a path since they delay matching and can +// introduce non-coaccessible states and transitions. +// +// This class attaches interface to implementation and handles +// reference counting, delegating most methods to ImplToFst. +template <class A> +class ComposeFst : public ImplToFst< ComposeFstImplBase<A> > { + public: + friend class ArcIterator< ComposeFst<A> >; + friend class StateIterator< ComposeFst<A> >; + + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef CacheState<A> State; + typedef ComposeFstImplBase<A> Impl; + + using ImplToFst<Impl>::SetImpl; + + // Compose specifying only caching options. + ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2, + const CacheOptions &opts = CacheOptions()) + : ImplToFst<Impl>(CreateBase(fst1, fst2, opts)) {} + + // Compose specifying one shared matcher type M. Requires input + // Fsts and matcher FST type (M::FST) be Fst<A>. Recommended for + // best code-sharing and matcher compatiblity. + template <class M, class F, class T> + ComposeFst(const Fst<A> &fst1, const Fst<A> &fst2, + const ComposeFstOptions<A, M, F, T> &opts) + : ImplToFst<Impl>(CreateBase1(fst1, fst2, opts)) {} + + // Compose specifying two matcher types M1 and M2. Requires input + // Fsts (of the same Arc type but o.w. arbitrary) match the + // corresponding matcher FST types (M1::FST, M2::FST). Recommended + // only for advanced use in demanding or specialized applications + // due to potential code bloat and matcher incompatibilities. + template <class M1, class M2, class F, class T> + ComposeFst(const typename M1::FST &fst1, const typename M2::FST &fst2, + const ComposeFstImplOptions<M1, M2, F, T> &opts) + : ImplToFst<Impl>(CreateBase2(fst1, fst2, opts)) {} + + // See Fst<>::Copy() for doc. + ComposeFst(const ComposeFst<A> &fst, bool safe = false) { + if (safe) + SetImpl(fst.GetImpl()->Copy()); + else + SetImpl(fst.GetImpl(), false); + } + + // Get a copy of this ComposeFst. See Fst<>::Copy() for further doc. + virtual ComposeFst<A> *Copy(bool safe = false) const { + return new ComposeFst<A>(*this, safe); + } + + virtual inline void InitStateIterator(StateIteratorData<A> *data) const; + + virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { + GetImpl()->InitArcIterator(s, data); + } + + protected: + ComposeFst() {} + + // Create compose implementation specifying two matcher types. + template <class M1, class M2, class F, class T> + static Impl *CreateBase2( + const typename M1::FST &fst1, const typename M2::FST &fst2, + const ComposeFstImplOptions<M1, M2, F, T> &opts) { + Impl *impl = new ComposeFstImpl<M1, M2, F, T>(fst1, fst2, opts); + if (!(Weight::Properties() & kCommutative)) { + int64 props1 = fst1.Properties(kUnweighted, true); + int64 props2 = fst2.Properties(kUnweighted, true); + if (!(props1 & kUnweighted) && !(props2 & kUnweighted)) { + FSTERROR() << "ComposeFst: Weights must be a commutative semiring: " + << Weight::Type(); + impl->SetProperties(kError, kError); + } + } + return impl; + } + + // Create compose implementation specifying one matcher type. + // Requires input Fsts and matcher FST type (M::FST) be Fst<A> + template <class M, class F, class T> + static Impl *CreateBase1(const Fst<A> &fst1, const Fst<A> &fst2, + const ComposeFstOptions<A, M, F, T> &opts) { + ComposeFstImplOptions<M, M, F, T> nopts(opts, opts.matcher1, opts.matcher2, + opts.filter, opts.state_table); + return CreateBase2(fst1, fst2, nopts); + } + + // Create compose implementation specifying no matcher type. + static Impl *CreateBase(const Fst<A> &fst1, const Fst<A> &fst2, + const CacheOptions &opts) { + switch (LookAheadMatchType(fst1, fst2)) { // Check for lookahead matchers + default: + case MATCH_NONE: { // Default composition (no look-ahead) + VLOG(2) << "ComposeFst: Default composition (no look-ahead)"; + ComposeFstOptions<Arc> nopts(opts); + return CreateBase1(fst1, fst2, nopts); + } + case MATCH_OUTPUT: { // Lookahead on fst1 + VLOG(2) << "ComposeFst: Lookahead on fst1"; + typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::FstMatcher M; + typedef typename DefaultLookAhead<Arc, MATCH_OUTPUT>::ComposeFilter F; + ComposeFstOptions<Arc, M, F> nopts(opts); + return CreateBase1(fst1, fst2, nopts); + } + case MATCH_INPUT: { // Lookahead on fst2 + VLOG(2) << "ComposeFst: Lookahead on fst2"; + typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::FstMatcher M; + typedef typename DefaultLookAhead<Arc, MATCH_INPUT>::ComposeFilter F; + ComposeFstOptions<Arc, M, F> nopts(opts); + return CreateBase1(fst1, fst2, nopts); + } + } + } + + private: + // Makes visible to friends. + Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } + + void operator=(const ComposeFst<A> &fst); // disallow +}; + + +// Specialization for ComposeFst. +template<class A> +class StateIterator< ComposeFst<A> > + : public CacheStateIterator< ComposeFst<A> > { + public: + explicit StateIterator(const ComposeFst<A> &fst) + : CacheStateIterator< ComposeFst<A> >(fst, fst.GetImpl()) {} +}; + + +// Specialization for ComposeFst. +template <class A> +class ArcIterator< ComposeFst<A> > + : public CacheArcIterator< ComposeFst<A> > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const ComposeFst<A> &fst, StateId s) + : CacheArcIterator< ComposeFst<A> >(fst.GetImpl(), s) { + if (!fst.GetImpl()->HasArcs(s)) + fst.GetImpl()->Expand(s); + } + + private: + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + +template <class A> inline +void ComposeFst<A>::InitStateIterator(StateIteratorData<A> *data) const { + data->base = new StateIterator< ComposeFst<A> >(*this); +} + +// Useful alias when using StdArc. +typedef ComposeFst<StdArc> StdComposeFst; + +enum ComposeFilter { AUTO_FILTER, SEQUENCE_FILTER, ALT_SEQUENCE_FILTER, + MATCH_FILTER }; + +struct ComposeOptions { + bool connect; // Connect output + ComposeFilter filter_type; // Which pre-defined filter to use + + ComposeOptions(bool c, ComposeFilter ft = AUTO_FILTER) + : connect(c), filter_type(ft) {} + ComposeOptions() : connect(true), filter_type(AUTO_FILTER) {} +}; + +// Computes the composition of two transducers. This version writes +// the composed FST into a MurableFst. If FST1 transduces string x to +// y with weight a and FST2 transduces y to z with weight b, then +// their composition transduces string x to z with weight +// Times(x, z). +// +// The output labels of the first transducer or the input labels of +// the second transducer must be sorted. The weights need to form a +// commutative semiring (valid for TropicalWeight and LogWeight). +// +// Complexity: +// Assuming the first FST is unsorted and the second is sorted: +// - Time: O(V1 V2 D1 (log D2 + M2)), +// - Space: O(V1 V2 D1 M2) +// where Vi = # of states, Di = maximum out-degree, and Mi is +// the maximum multiplicity for the ith FST. +// +// Caveats: +// - Compose trims its output. +// - The efficiency of composition can be strongly affected by several factors: +// - the choice of which tnansducer is sorted - prefer sorting the FST +// that has the greater average out-degree. +// - the amount of non-determinism +// - the presence and location of epsilon transitions - avoid epsilon +// transitions on the output side of the first transducer or +// the input side of the second transducer or prefer placing +// them later in a path since they delay matching and can +// introduce non-coaccessible states and transitions. +template<class Arc> +void Compose(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2, + MutableFst<Arc> *ofst, + const ComposeOptions &opts = ComposeOptions()) { + typedef Matcher< Fst<Arc> > M; + + if (opts.filter_type == AUTO_FILTER) { + CacheOptions nopts; + nopts.gc_limit = 0; // Cache only the last state for fastest copy. + *ofst = ComposeFst<Arc>(ifst1, ifst2, nopts); + } else if (opts.filter_type == SEQUENCE_FILTER) { + ComposeFstOptions<Arc> copts; + copts.gc_limit = 0; // Cache only the last state for fastest copy. + *ofst = ComposeFst<Arc>(ifst1, ifst2, copts); + } else if (opts.filter_type == ALT_SEQUENCE_FILTER) { + ComposeFstOptions<Arc, M, AltSequenceComposeFilter<M> > copts; + copts.gc_limit = 0; // Cache only the last state for fastest copy. + *ofst = ComposeFst<Arc>(ifst1, ifst2, copts); + } else if (opts.filter_type == MATCH_FILTER) { + ComposeFstOptions<Arc, M, MatchComposeFilter<M> > copts; + copts.gc_limit = 0; // Cache only the last state for fastest copy. + *ofst = ComposeFst<Arc>(ifst1, ifst2, copts); + } + + if (opts.connect) + Connect(ofst); +} + +} // namespace fst + +#endif // FST_LIB_COMPOSE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/concat.h b/kaldi_io/src/tools/openfst/include/fst/concat.h new file mode 100644 index 0000000..8500d50 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/concat.h @@ -0,0 +1,246 @@ +// concat.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Functions and classes to compute the concat of two FSTs. + +#ifndef FST_LIB_CONCAT_H__ +#define FST_LIB_CONCAT_H__ + +#include <vector> +using std::vector; +#include <algorithm> + +#include <fst/mutable-fst.h> +#include <fst/rational.h> + + +namespace fst { + +// Computes the concatenation (product) of two FSTs. If FST1 +// transduces string x to y with weight a and FST2 transduces string w +// to v with weight b, then their concatenation transduces string xw +// to yv with Times(a, b). +// +// This version modifies its MutableFst argument (in first position). +// +// Complexity: +// - Time: O(V1 + V2 + E2) +// - Space: O(V1 + V2 + E2) +// where Vi = # of states and Ei = # of arcs of the ith FST. +// +template<class Arc> +void Concat(MutableFst<Arc> *fst1, const Fst<Arc> &fst2) { + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + // TODO(riley): restore when voice actions issues fixed + // Check that the symbol table are compatible + if (!CompatSymbols(fst1->InputSymbols(), fst2.InputSymbols()) || + !CompatSymbols(fst1->OutputSymbols(), fst2.OutputSymbols())) { + LOG(ERROR) << "Concat: input/output symbol tables of 1st argument " + << "do not match input/output symbol tables of 2nd argument"; + // fst1->SetProperties(kError, kError); + // return; + } + + uint64 props1 = fst1->Properties(kFstProperties, false); + uint64 props2 = fst2.Properties(kFstProperties, false); + + StateId start1 = fst1->Start(); + if (start1 == kNoStateId) { + if (props2 & kError) fst1->SetProperties(kError, kError); + return; + } + + StateId numstates1 = fst1->NumStates(); + if (fst2.Properties(kExpanded, false)) + fst1->ReserveStates(numstates1 + CountStates(fst2)); + + for (StateIterator< Fst<Arc> > siter2(fst2); + !siter2.Done(); + siter2.Next()) { + StateId s1 = fst1->AddState(); + StateId s2 = siter2.Value(); + fst1->SetFinal(s1, fst2.Final(s2)); + fst1->ReserveArcs(s1, fst2.NumArcs(s2)); + for (ArcIterator< Fst<Arc> > aiter(fst2, s2); + !aiter.Done(); + aiter.Next()) { + Arc arc = aiter.Value(); + arc.nextstate += numstates1; + fst1->AddArc(s1, arc); + } + } + + StateId start2 = fst2.Start(); + for (StateId s1 = 0; s1 < numstates1; ++s1) { + Weight final = fst1->Final(s1); + if (final != Weight::Zero()) { + fst1->SetFinal(s1, Weight::Zero()); + if (start2 != kNoStateId) + fst1->AddArc(s1, Arc(0, 0, final, start2 + numstates1)); + } + } + if (start2 != kNoStateId) + fst1->SetProperties(ConcatProperties(props1, props2), kFstProperties); +} + +// Computes the concatentation of two FSTs. This version modifies its +// MutableFst argument (in second position). +// +// Complexity: +// - Time: O(V1 + E1) +// - Space: O(V1 + E1) +// where Vi = # of states and Ei = # of arcs of the ith FST. +// +template<class Arc> +void Concat(const Fst<Arc> &fst1, MutableFst<Arc> *fst2) { + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + // Check that the symbol table are compatible + if (!CompatSymbols(fst1.InputSymbols(), fst2->InputSymbols()) || + !CompatSymbols(fst1.OutputSymbols(), fst2->OutputSymbols())) { + LOG(ERROR) << "Concat: input/output symbol tables of 1st argument " + << "do not match input/output symbol tables of 2nd argument"; + // fst2->SetProperties(kError, kError); + // return; + } + + uint64 props1 = fst1.Properties(kFstProperties, false); + uint64 props2 = fst2->Properties(kFstProperties, false); + + StateId start2 = fst2->Start(); + if (start2 == kNoStateId) { + if (props1 & kError) fst2->SetProperties(kError, kError); + return; + } + + StateId numstates2 = fst2->NumStates(); + if (fst1.Properties(kExpanded, false)) + fst2->ReserveStates(numstates2 + CountStates(fst1)); + + for (StateIterator< Fst<Arc> > siter(fst1); + !siter.Done(); + siter.Next()) { + StateId s1 = siter.Value(); + StateId s2 = fst2->AddState(); + Weight final = fst1.Final(s1); + fst2->ReserveArcs(s2, fst1.NumArcs(s1) + (final != Weight::Zero() ? 1 : 0)); + if (final != Weight::Zero()) + fst2->AddArc(s2, Arc(0, 0, final, start2)); + for (ArcIterator< Fst<Arc> > aiter(fst1, s1); + !aiter.Done(); + aiter.Next()) { + Arc arc = aiter.Value(); + arc.nextstate += numstates2; + fst2->AddArc(s2, arc); + } + } + StateId start1 = fst1.Start(); + fst2->SetStart(start1 == kNoStateId ? fst2->AddState() : start1 + numstates2); + if (start1 != kNoStateId) + fst2->SetProperties(ConcatProperties(props1, props2), kFstProperties); +} + + +// Computes the concatentation of two FSTs. This version modifies its +// RationalFst input (in first position). +template<class Arc> +void Concat(RationalFst<Arc> *fst1, const Fst<Arc> &fst2) { + fst1->GetImpl()->AddConcat(fst2, true); +} + +// Computes the concatentation of two FSTs. This version modifies its +// RationalFst input (in second position). +template<class Arc> +void Concat(const Fst<Arc> &fst1, RationalFst<Arc> *fst2) { + fst2->GetImpl()->AddConcat(fst1, false); +} + +typedef RationalFstOptions ConcatFstOptions; + + +// Computes the concatenation (product) of two FSTs; this version is a +// delayed Fst. If FST1 transduces string x to y with weight a and FST2 +// transduces string w to v with weight b, then their concatenation +// transduces string xw to yv with Times(a, b). +// +// Complexity: +// - Time: O(v1 + e1 + v2 + e2), +// - Space: O(v1 + v2) +// where vi = # of states visited and ei = # of arcs visited of the +// ith FST. Constant time and space to visit an input state or arc is +// assumed and exclusive of caching. +template <class A> +class ConcatFst : public RationalFst<A> { + public: + using ImplToFst< RationalFstImpl<A> >::GetImpl; + + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + + ConcatFst(const Fst<A> &fst1, const Fst<A> &fst2) { + GetImpl()->InitConcat(fst1, fst2); + } + + ConcatFst(const Fst<A> &fst1, const Fst<A> &fst2, + const ConcatFstOptions &opts) : RationalFst<A>(opts) { + GetImpl()->InitConcat(fst1, fst2); + } + + // See Fst<>::Copy() for doc. + ConcatFst(const ConcatFst<A> &fst, bool safe = false) + : RationalFst<A>(fst, safe) {} + + // Get a copy of this ConcatFst. See Fst<>::Copy() for further doc. + virtual ConcatFst<A> *Copy(bool safe = false) const { + return new ConcatFst<A>(*this, safe); + } +}; + + +// Specialization for ConcatFst. +template <class A> +class StateIterator< ConcatFst<A> > : public StateIterator< RationalFst<A> > { + public: + explicit StateIterator(const ConcatFst<A> &fst) + : StateIterator< RationalFst<A> >(fst) {} +}; + + +// Specialization for ConcatFst. +template <class A> +class ArcIterator< ConcatFst<A> > : public ArcIterator< RationalFst<A> > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const ConcatFst<A> &fst, StateId s) + : ArcIterator< RationalFst<A> >(fst, s) {} +}; + + +// Useful alias when using StdArc. +typedef ConcatFst<StdArc> StdConcatFst; + +} // namespace fst + +#endif // FST_LIB_CONCAT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/config.h b/kaldi_io/src/tools/openfst/include/fst/config.h new file mode 100644 index 0000000..046b49c --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/config.h @@ -0,0 +1,12 @@ +/* src/include/fst/config.h. Generated from config.h.in by configure. */ +// OpenFst config file + +/* Define to 1 if you have the ICU library. */ +/* #undef HAVE_ICU */ + +/* Define to 1 if the system has the type `std::tr1::hash<long long + unsigned>'. */ +#define HAVE_STD__TR1__HASH_LONG_LONG_UNSIGNED_ 1 + +/* Define to 1 if the system has the type `__gnu_cxx::slist<int>'. */ +#define HAVE___GNU_CXX__SLIST_INT_ 1 diff --git a/kaldi_io/src/tools/openfst/include/fst/connect.h b/kaldi_io/src/tools/openfst/include/fst/connect.h new file mode 100644 index 0000000..427808c --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/connect.h @@ -0,0 +1,319 @@ +// connect.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Classes and functions to remove unsuccessful paths from an Fst. + +#ifndef FST_LIB_CONNECT_H__ +#define FST_LIB_CONNECT_H__ + +#include <vector> +using std::vector; + +#include <fst/dfs-visit.h> +#include <fst/union-find.h> +#include <fst/mutable-fst.h> + + +namespace fst { + +// Finds and returns connected components. Use with Visit(). +template <class A> +class CcVisitor { + public: + typedef A Arc; + typedef typename Arc::Weight Weight; + typedef typename A::StateId StateId; + + // cc[i]: connected component number for state i. + CcVisitor(vector<StateId> *cc) + : comps_(new UnionFind<StateId>(0, kNoStateId)), + cc_(cc), + nstates_(0) { } + + // comps: connected components equiv classes. + CcVisitor(UnionFind<StateId> *comps) + : comps_(comps), + cc_(0), + nstates_(0) { } + + ~CcVisitor() { + if (cc_) // own comps_? + delete comps_; + } + + void InitVisit(const Fst<A> &fst) { } + + bool InitState(StateId s, StateId root) { + ++nstates_; + if (comps_->FindSet(s) == kNoStateId) + comps_->MakeSet(s); + return true; + } + + bool WhiteArc(StateId s, const A &arc) { + comps_->MakeSet(arc.nextstate); + comps_->Union(s, arc.nextstate); + return true; + } + + bool GreyArc(StateId s, const A &arc) { + comps_->Union(s, arc.nextstate); + return true; + } + + bool BlackArc(StateId s, const A &arc) { + comps_->Union(s, arc.nextstate); + return true; + } + + void FinishState(StateId s) { } + + void FinishVisit() { + if (cc_) + GetCcVector(cc_); + } + + // cc[i]: connected component number for state i. + // Returns number of components. + int GetCcVector(vector<StateId> *cc) { + cc->clear(); + cc->resize(nstates_, kNoStateId); + StateId ncomp = 0; + for (StateId i = 0; i < nstates_; ++i) { + StateId rep = comps_->FindSet(i); + StateId &comp = (*cc)[rep]; + if (comp == kNoStateId) { + comp = ncomp; + ++ncomp; + } + (*cc)[i] = comp; + } + return ncomp; + } + + private: + UnionFind<StateId> *comps_; // Components + vector<StateId> *cc_; // State's cc number + StateId nstates_; // State count +}; + + +// Finds and returns strongly-connected components, accessible and +// coaccessible states and related properties. Uses Tarjan's single +// DFS SCC algorithm (see Aho, et al, "Design and Analysis of Computer +// Algorithms", 189pp). Use with DfsVisit(); +template <class A> +class SccVisitor { + public: + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + + // scc[i]: strongly-connected component number for state i. + // SCC numbers will be in topological order for acyclic input. + // access[i]: accessibility of state i. + // coaccess[i]: coaccessibility of state i. + // Any of above can be NULL. + // props: related property bits (cyclicity, initial cyclicity, + // accessibility, coaccessibility) set/cleared (o.w. unchanged). + SccVisitor(vector<StateId> *scc, vector<bool> *access, + vector<bool> *coaccess, uint64 *props) + : scc_(scc), access_(access), coaccess_(coaccess), props_(props) {} + SccVisitor(uint64 *props) + : scc_(0), access_(0), coaccess_(0), props_(props) {} + + void InitVisit(const Fst<A> &fst); + + bool InitState(StateId s, StateId root); + + bool TreeArc(StateId s, const A &arc) { return true; } + + bool BackArc(StateId s, const A &arc) { + StateId t = arc.nextstate; + if ((*dfnumber_)[t] < (*lowlink_)[s]) + (*lowlink_)[s] = (*dfnumber_)[t]; + if ((*coaccess_)[t]) + (*coaccess_)[s] = true; + *props_ |= kCyclic; + *props_ &= ~kAcyclic; + if (arc.nextstate == start_) { + *props_ |= kInitialCyclic; + *props_ &= ~kInitialAcyclic; + } + return true; + } + + bool ForwardOrCrossArc(StateId s, const A &arc) { + StateId t = arc.nextstate; + if ((*dfnumber_)[t] < (*dfnumber_)[s] /* cross edge */ && + (*onstack_)[t] && (*dfnumber_)[t] < (*lowlink_)[s]) + (*lowlink_)[s] = (*dfnumber_)[t]; + if ((*coaccess_)[t]) + (*coaccess_)[s] = true; + return true; + } + + void FinishState(StateId s, StateId p, const A *); + + void FinishVisit() { + // Numbers SCC's in topological order when acyclic. + if (scc_) + for (StateId i = 0; i < scc_->size(); ++i) + (*scc_)[i] = nscc_ - 1 - (*scc_)[i]; + if (coaccess_internal_) + delete coaccess_; + delete dfnumber_; + delete lowlink_; + delete onstack_; + delete scc_stack_; + } + + private: + vector<StateId> *scc_; // State's scc number + vector<bool> *access_; // State's accessibility + vector<bool> *coaccess_; // State's coaccessibility + uint64 *props_; + const Fst<A> *fst_; + StateId start_; + StateId nstates_; // State count + StateId nscc_; // SCC count + bool coaccess_internal_; + vector<StateId> *dfnumber_; // state discovery times + vector<StateId> *lowlink_; // lowlink[s] == dfnumber[s] => SCC root + vector<bool> *onstack_; // is a state on the SCC stack + vector<StateId> *scc_stack_; // SCC stack (w/ random access) +}; + +template <class A> inline +void SccVisitor<A>::InitVisit(const Fst<A> &fst) { + if (scc_) + scc_->clear(); + if (access_) + access_->clear(); + if (coaccess_) { + coaccess_->clear(); + coaccess_internal_ = false; + } else { + coaccess_ = new vector<bool>; + coaccess_internal_ = true; + } + *props_ |= kAcyclic | kInitialAcyclic | kAccessible | kCoAccessible; + *props_ &= ~(kCyclic | kInitialCyclic | kNotAccessible | kNotCoAccessible); + fst_ = &fst; + start_ = fst.Start(); + nstates_ = 0; + nscc_ = 0; + dfnumber_ = new vector<StateId>; + lowlink_ = new vector<StateId>; + onstack_ = new vector<bool>; + scc_stack_ = new vector<StateId>; +} + +template <class A> inline +bool SccVisitor<A>::InitState(StateId s, StateId root) { + scc_stack_->push_back(s); + while (dfnumber_->size() <= s) { + if (scc_) + scc_->push_back(-1); + if (access_) + access_->push_back(false); + coaccess_->push_back(false); + dfnumber_->push_back(-1); + lowlink_->push_back(-1); + onstack_->push_back(false); + } + (*dfnumber_)[s] = nstates_; + (*lowlink_)[s] = nstates_; + (*onstack_)[s] = true; + if (root == start_) { + if (access_) + (*access_)[s] = true; + } else { + if (access_) + (*access_)[s] = false; + *props_ |= kNotAccessible; + *props_ &= ~kAccessible; + } + ++nstates_; + return true; +} + +template <class A> inline +void SccVisitor<A>::FinishState(StateId s, StateId p, const A *) { + if (fst_->Final(s) != Weight::Zero()) + (*coaccess_)[s] = true; + if ((*dfnumber_)[s] == (*lowlink_)[s]) { // root of new SCC + bool scc_coaccess = false; + size_t i = scc_stack_->size(); + StateId t; + do { + t = (*scc_stack_)[--i]; + if ((*coaccess_)[t]) + scc_coaccess = true; + } while (s != t); + do { + t = scc_stack_->back(); + if (scc_) + (*scc_)[t] = nscc_; + if (scc_coaccess) + (*coaccess_)[t] = true; + (*onstack_)[t] = false; + scc_stack_->pop_back(); + } while (s != t); + if (!scc_coaccess) { + *props_ |= kNotCoAccessible; + *props_ &= ~kCoAccessible; + } + ++nscc_; + } + if (p != kNoStateId) { + if ((*coaccess_)[s]) + (*coaccess_)[p] = true; + if ((*lowlink_)[s] < (*lowlink_)[p]) + (*lowlink_)[p] = (*lowlink_)[s]; + } +} + + +// Trims an FST, removing states and arcs that are not on successful +// paths. This version modifies its input. +// +// Complexity: +// - Time: O(V + E) +// - Space: O(V + E) +// where V = # of states and E = # of arcs. +template<class Arc> +void Connect(MutableFst<Arc> *fst) { + typedef typename Arc::StateId StateId; + + vector<bool> access; + vector<bool> coaccess; + uint64 props = 0; + SccVisitor<Arc> scc_visitor(0, &access, &coaccess, &props); + DfsVisit(*fst, &scc_visitor); + vector<StateId> dstates; + for (StateId s = 0; s < access.size(); ++s) + if (!access[s] || !coaccess[s]) + dstates.push_back(s); + fst->DeleteStates(dstates); + fst->SetProperties(kAccessible | kCoAccessible, kAccessible | kCoAccessible); +} + +} // namespace fst + +#endif // FST_LIB_CONNECT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/const-fst.h b/kaldi_io/src/tools/openfst/include/fst/const-fst.h new file mode 100644 index 0000000..e6e85af --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/const-fst.h @@ -0,0 +1,497 @@ +// const-fst.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Simple concrete immutable FST whose states and arcs are each stored +// in single arrays. + +#ifndef FST_LIB_CONST_FST_H__ +#define FST_LIB_CONST_FST_H__ + +#include <string> +#include <vector> +using std::vector; + +#include <fst/expanded-fst.h> +#include <fst/fst-decl.h> // For optional argument declarations +#include <fst/mapped-file.h> +#include <fst/test-properties.h> +#include <fst/util.h> + + +namespace fst { + +template <class A, class U> class ConstFst; +template <class F, class G> void Cast(const F &, G *); + +// States and arcs each implemented by single arrays, templated on the +// Arc definition. The unsigned type U is used to represent indices into +// the arc array. +template <class A, class U> +class ConstFstImpl : public FstImpl<A> { + public: + using FstImpl<A>::SetInputSymbols; + using FstImpl<A>::SetOutputSymbols; + using FstImpl<A>::SetType; + using FstImpl<A>::SetProperties; + using FstImpl<A>::Properties; + + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef U Unsigned; + + ConstFstImpl() + : states_region_(0), arcs_region_(0), states_(0), arcs_(0), nstates_(0), + narcs_(0), start_(kNoStateId) { + string type = "const"; + if (sizeof(U) != sizeof(uint32)) { + string size; + Int64ToStr(8 * sizeof(U), &size); + type += size; + } + SetType(type); + SetProperties(kNullProperties | kStaticProperties); + } + + explicit ConstFstImpl(const Fst<A> &fst); + + ~ConstFstImpl() { + delete arcs_region_; + delete states_region_; + } + + StateId Start() const { return start_; } + + Weight Final(StateId s) const { return states_[s].final; } + + StateId NumStates() const { return nstates_; } + + size_t NumArcs(StateId s) const { return states_[s].narcs; } + + size_t NumInputEpsilons(StateId s) const { return states_[s].niepsilons; } + + size_t NumOutputEpsilons(StateId s) const { return states_[s].noepsilons; } + + static ConstFstImpl<A, U> *Read(istream &strm, const FstReadOptions &opts); + + A *Arcs(StateId s) { return arcs_ + states_[s].pos; } + + // Provide information needed for generic state iterator + void InitStateIterator(StateIteratorData<A> *data) const { + data->base = 0; + data->nstates = nstates_; + } + + // Provide information needed for the generic arc iterator + void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { + data->base = 0; + data->arcs = arcs_ + states_[s].pos; + data->narcs = states_[s].narcs; + data->ref_count = 0; + } + + private: + friend class ConstFst<A, U>; // Allow finding narcs_, nstates_ during Write + + // States implemented by array *states_ below, arcs by (single) *arcs_. + struct State { + Weight final; // Final weight + Unsigned pos; // Start of state's arcs in *arcs_ + Unsigned narcs; // Number of arcs (per state) + Unsigned niepsilons; // # of input epsilons + Unsigned noepsilons; // # of output epsilons + State() : final(Weight::Zero()), niepsilons(0), noepsilons(0) {} + }; + + // Properties always true of this Fst class + static const uint64 kStaticProperties = kExpanded; + // Current unaligned file format version. The unaligned version was added and + // made the default since the aligned version does not work on pipes. + static const int kFileVersion = 2; + // Current aligned file format version + static const int kAlignedFileVersion = 1; + // Minimum file format version supported + static const int kMinFileVersion = 1; + + MappedFile *states_region_; // Mapped file for states + MappedFile *arcs_region_; // Mapped file for arcs + State *states_; // States represenation + A *arcs_; // Arcs representation + StateId nstates_; // Number of states + size_t narcs_; // Number of arcs (per FST) + StateId start_; // Initial state + + DISALLOW_COPY_AND_ASSIGN(ConstFstImpl); +}; + +template <class A, class U> +const uint64 ConstFstImpl<A, U>::kStaticProperties; +template <class A, class U> +const int ConstFstImpl<A, U>::kFileVersion; +template <class A, class U> +const int ConstFstImpl<A, U>::kAlignedFileVersion; +template <class A, class U> +const int ConstFstImpl<A, U>::kMinFileVersion; + + +template<class A, class U> +ConstFstImpl<A, U>::ConstFstImpl(const Fst<A> &fst) : nstates_(0), narcs_(0) { + string type = "const"; + if (sizeof(U) != sizeof(uint32)) { + string size; + Int64ToStr(sizeof(U) * 8, &size); + type += size; + } + SetType(type); + SetInputSymbols(fst.InputSymbols()); + SetOutputSymbols(fst.OutputSymbols()); + start_ = fst.Start(); + + // Count # of states and arcs. + for (StateIterator< Fst<A> > siter(fst); + !siter.Done(); + siter.Next()) { + ++nstates_; + StateId s = siter.Value(); + for (ArcIterator< Fst<A> > aiter(fst, s); + !aiter.Done(); + aiter.Next()) + ++narcs_; + } + states_region_ = MappedFile::Allocate(nstates_ * sizeof(*states_)); + arcs_region_ = MappedFile::Allocate(narcs_ * sizeof(*arcs_)); + states_ = reinterpret_cast<State*>(states_region_->mutable_data()); + arcs_ = reinterpret_cast<A*>(arcs_region_->mutable_data()); + size_t pos = 0; + for (StateId s = 0; s < nstates_; ++s) { + states_[s].final = fst.Final(s); + states_[s].pos = pos; + states_[s].narcs = 0; + states_[s].niepsilons = 0; + states_[s].noepsilons = 0; + for (ArcIterator< Fst<A> > aiter(fst, s); + !aiter.Done(); + aiter.Next()) { + const A &arc = aiter.Value(); + ++states_[s].narcs; + if (arc.ilabel == 0) + ++states_[s].niepsilons; + if (arc.olabel == 0) + ++states_[s].noepsilons; + arcs_[pos++] = arc; + } + } + SetProperties(fst.Properties(kCopyProperties, true) | kStaticProperties); +} + + +template<class A, class U> +ConstFstImpl<A, U> *ConstFstImpl<A, U>::Read(istream &strm, + const FstReadOptions &opts) { + ConstFstImpl<A, U> *impl = new ConstFstImpl<A, U>; + FstHeader hdr; + if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) { + delete impl; + return 0; + } + impl->start_ = hdr.Start(); + impl->nstates_ = hdr.NumStates(); + impl->narcs_ = hdr.NumArcs(); + + // Ensures compatibility + if (hdr.Version() == kAlignedFileVersion) + hdr.SetFlags(hdr.GetFlags() | FstHeader::IS_ALIGNED); + + if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { + LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source; + delete impl; + return 0; + } + + size_t b = impl->nstates_ * sizeof(typename ConstFstImpl<A, U>::State); + impl->states_region_ = MappedFile::Map(&strm, opts, b); + if (!strm || impl->states_region_ == NULL) { + LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source; + delete impl; + return 0; + } + impl->states_ = reinterpret_cast<State*>( + impl->states_region_->mutable_data()); + if ((hdr.GetFlags() & FstHeader::IS_ALIGNED) && !AlignInput(strm)) { + LOG(ERROR) << "ConstFst::Read: Alignment failed: " << opts.source; + delete impl; + return 0; + } + + b = impl->narcs_ * sizeof(A); + impl->arcs_region_ = MappedFile::Map(&strm, opts, b); + if (!strm || impl->arcs_region_ == NULL) { + LOG(ERROR) << "ConstFst::Read: Read failed: " << opts.source; + delete impl; + return 0; + } + impl->arcs_ = reinterpret_cast<A*>(impl->arcs_region_->mutable_data()); + return impl; +} + +// Simple concrete immutable FST. This class attaches interface to +// implementation and handles reference counting, delegating most +// methods to ImplToExpandedFst. The unsigned type U is used to +// represent indices into the arc array (uint32 by default, declared +// in fst-decl.h). +template <class A, class U> +class ConstFst : public ImplToExpandedFst< ConstFstImpl<A, U> > { + public: + friend class StateIterator< ConstFst<A, U> >; + friend class ArcIterator< ConstFst<A, U> >; + template <class F, class G> void friend Cast(const F &, G *); + + typedef A Arc; + typedef typename A::StateId StateId; + typedef ConstFstImpl<A, U> Impl; + typedef U Unsigned; + + ConstFst() : ImplToExpandedFst<Impl>(new Impl()) {} + + explicit ConstFst(const Fst<A> &fst) + : ImplToExpandedFst<Impl>(new Impl(fst)) {} + + ConstFst(const ConstFst<A, U> &fst) : ImplToExpandedFst<Impl>(fst) {} + + // Get a copy of this ConstFst. See Fst<>::Copy() for further doc. + virtual ConstFst<A, U> *Copy(bool safe = false) const { + return new ConstFst<A, U>(*this); + } + + // Read a ConstFst from an input stream; return NULL on error + static ConstFst<A, U> *Read(istream &strm, const FstReadOptions &opts) { + Impl* impl = Impl::Read(strm, opts); + return impl ? new ConstFst<A, U>(impl) : 0; + } + + // Read a ConstFst from a file; return NULL on error + // Empty filename reads from standard input + static ConstFst<A, U> *Read(const string &filename) { + Impl* impl = ImplToExpandedFst<Impl>::Read(filename); + return impl ? new ConstFst<A, U>(impl) : 0; + } + + virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { + return WriteFst(*this, strm, opts); + } + + virtual bool Write(const string &filename) const { + return Fst<A>::WriteFile(filename); + } + + template <class F> + static bool WriteFst(const F &fst, ostream &strm, + const FstWriteOptions &opts); + + virtual void InitStateIterator(StateIteratorData<Arc> *data) const { + GetImpl()->InitStateIterator(data); + } + + virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { + GetImpl()->InitArcIterator(s, data); + } + + private: + explicit ConstFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {} + + // Makes visible to friends. + Impl *GetImpl() const { return ImplToFst<Impl, ExpandedFst<A> >::GetImpl(); } + + void SetImpl(Impl *impl, bool own_impl = true) { + ImplToFst< Impl, ExpandedFst<A> >::SetImpl(impl, own_impl); + } + + // Use overloading to extract the type of the argument. + static Impl* GetImplIfConstFst(const ConstFst &const_fst) { + return const_fst.GetImpl(); + } + + // Note that this does not give privileged treatment to subtypes of ConstFst. + template<typename NonConstFst> + static Impl* GetImplIfConstFst(const NonConstFst& fst) { + return NULL; + } + + void operator=(const ConstFst<A, U> &fst); // disallow +}; + +// Writes Fst in Const format, potentially with a pass over the machine +// before writing to compute number of states and arcs. +// +template <class A, class U> +template <class F> +bool ConstFst<A, U>::WriteFst(const F &fst, ostream &strm, + const FstWriteOptions &opts) { + int file_version = opts.align ? ConstFstImpl<A, U>::kAlignedFileVersion : + ConstFstImpl<A, U>::kFileVersion; + size_t num_arcs = -1, num_states = -1; + size_t start_offset = 0; + bool update_header = true; + if (Impl* impl = GetImplIfConstFst(fst)) { + num_arcs = impl->narcs_; + num_states = impl->nstates_; + update_header = false; + } else if ((start_offset = strm.tellp()) == -1) { + // precompute values needed for header when we cannot seek to rewrite it. + num_arcs = 0; + num_states = 0; + for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { + num_arcs += fst.NumArcs(siter.Value()); + ++num_states; + } + update_header = false; + } + FstHeader hdr; + hdr.SetStart(fst.Start()); + hdr.SetNumStates(num_states); + hdr.SetNumArcs(num_arcs); + string type = "const"; + if (sizeof(U) != sizeof(uint32)) { + string size; + Int64ToStr(8 * sizeof(U), &size); + type += size; + } + uint64 properties = fst.Properties(kCopyProperties, true) | + ConstFstImpl<A, U>::kStaticProperties; + FstImpl<A>::WriteFstHeader(fst, strm, opts, file_version, type, properties, + &hdr); + if (opts.align && !AlignOutput(strm)) { + LOG(ERROR) << "Could not align file during write after header"; + return false; + } + size_t pos = 0, states = 0; + typename ConstFstImpl<A, U>::State state; + for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { + state.final = fst.Final(siter.Value()); + state.pos = pos; + state.narcs = fst.NumArcs(siter.Value()); + state.niepsilons = fst.NumInputEpsilons(siter.Value()); + state.noepsilons = fst.NumOutputEpsilons(siter.Value()); + strm.write(reinterpret_cast<const char *>(&state), sizeof(state)); + pos += state.narcs; + ++states; + } + hdr.SetNumStates(states); + hdr.SetNumArcs(pos); + if (opts.align && !AlignOutput(strm)) { + LOG(ERROR) << "Could not align file during write after writing states"; + } + for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { + StateId s = siter.Value(); + for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) { + const A &arc = aiter.Value(); + strm.write(reinterpret_cast<const char *>(&arc), sizeof(arc)); + } + } + strm.flush(); + if (!strm) { + LOG(ERROR) << "ConstFst Write write failed: " << opts.source; + return false; + } + if (update_header) { + return FstImpl<A>::UpdateFstHeader(fst, strm, opts, file_version, type, + properties, &hdr, start_offset); + } else { + if (hdr.NumStates() != num_states) { + LOG(ERROR) << "Inconsistent number of states observed during write"; + return false; + } + if (hdr.NumArcs() != num_arcs) { + LOG(ERROR) << "Inconsistent number of arcs observed during write"; + return false; + } + } + return true; +} + +// Specialization for ConstFst; see generic version in fst.h +// for sample usage (but use the ConstFst type!). This version +// should inline. +template <class A, class U> +class StateIterator< ConstFst<A, U> > { + public: + typedef typename A::StateId StateId; + + explicit StateIterator(const ConstFst<A, U> &fst) + : nstates_(fst.GetImpl()->NumStates()), s_(0) {} + + bool Done() const { return s_ >= nstates_; } + + StateId Value() const { return s_; } + + void Next() { ++s_; } + + void Reset() { s_ = 0; } + + private: + StateId nstates_; + StateId s_; + + DISALLOW_COPY_AND_ASSIGN(StateIterator); +}; + + +// Specialization for ConstFst; see generic version in fst.h +// for sample usage (but use the ConstFst type!). This version +// should inline. +template <class A, class U> +class ArcIterator< ConstFst<A, U> > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const ConstFst<A, U> &fst, StateId s) + : arcs_(fst.GetImpl()->Arcs(s)), + narcs_(fst.GetImpl()->NumArcs(s)), i_(0) {} + + bool Done() const { return i_ >= narcs_; } + + const A& Value() const { return arcs_[i_]; } + + void Next() { ++i_; } + + size_t Position() const { return i_; } + + void Reset() { i_ = 0; } + + void Seek(size_t a) { i_ = a; } + + uint32 Flags() const { + return kArcValueFlags; + } + + void SetFlags(uint32 f, uint32 m) {} + + private: + const A *arcs_; + size_t narcs_; + size_t i_; + + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + +// A useful alias when using StdArc. +typedef ConstFst<StdArc> StdConstFst; + +} // namespace fst + +#endif // FST_LIB_CONST_FST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/determinize.h b/kaldi_io/src/tools/openfst/include/fst/determinize.h new file mode 100644 index 0000000..9ff8723 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/determinize.h @@ -0,0 +1,1015 @@ +// determinize.h + + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Functions and classes to determinize an FST. + +#ifndef FST_LIB_DETERMINIZE_H__ +#define FST_LIB_DETERMINIZE_H__ + +#include <algorithm> +#include <climits> +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include <map> +#include <fst/slist.h> +#include <string> +#include <vector> +using std::vector; + +#include <fst/arc-map.h> +#include <fst/cache.h> +#include <fst/bi-table.h> +#include <fst/factor-weight.h> +#include <fst/prune.h> +#include <fst/test-properties.h> + + +namespace fst { + +// +// COMMON DIVISORS - these are used in determinization to compute +// the transition weights. In the simplest case, it is just the same +// as the semiring Plus(). However, other choices permit more efficient +// determinization when the output contains strings. +// + +// The default common divisor uses the semiring Plus. +template <class W> +class DefaultCommonDivisor { + public: + typedef W Weight; + + W operator()(const W &w1, const W &w2) const { return Plus(w1, w2); } +}; + + +// The label common divisor for a (left) string semiring selects a +// single letter common prefix or the empty string. This is used in +// the determinization of output strings so that at most a single +// letter will appear in the output of a transtion. +template <typename L, StringType S> +class LabelCommonDivisor { + public: + typedef StringWeight<L, S> Weight; + + Weight operator()(const Weight &w1, const Weight &w2) const { + StringWeightIterator<L, S> iter1(w1); + StringWeightIterator<L, S> iter2(w2); + + if (!(StringWeight<L, S>::Properties() & kLeftSemiring)) { + FSTERROR() << "LabelCommonDivisor: Weight needs to be left semiring"; + return Weight::NoWeight(); + } else if (w1.Size() == 0 || w2.Size() == 0) { + return Weight::One(); + } else if (w1 == Weight::Zero()) { + return Weight(iter2.Value()); + } else if (w2 == Weight::Zero()) { + return Weight(iter1.Value()); + } else if (iter1.Value() == iter2.Value()) { + return Weight(iter1.Value()); + } else { + return Weight::One(); + } + } +}; + + +// The gallic common divisor uses the label common divisor on the +// string component and the template argument D common divisor on the +// weight component, which defaults to the default common divisor. +template <class L, class W, StringType S, class D = DefaultCommonDivisor<W> > +class GallicCommonDivisor { + public: + typedef GallicWeight<L, W, S> Weight; + + Weight operator()(const Weight &w1, const Weight &w2) const { + return Weight(label_common_divisor_(w1.Value1(), w2.Value1()), + weight_common_divisor_(w1.Value2(), w2.Value2())); + } + + private: + LabelCommonDivisor<L, S> label_common_divisor_; + D weight_common_divisor_; +}; + + +// Represents an element in a subset +template <class A> +struct DeterminizeElement { + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + DeterminizeElement() {} + + DeterminizeElement(StateId s, Weight w) : state_id(s), weight(w) {} + + bool operator==(const DeterminizeElement<A> & element) const { + return state_id == element.state_id && weight == element.weight; + } + + bool operator<(const DeterminizeElement<A> & element) const { + return state_id < element.state_id || + (state_id == element.state_id && weight == element.weight); + } + + StateId state_id; // Input state Id + Weight weight; // Residual weight +}; + + +// +// DETERMINIZE FILTERS - these can be used in determinization to compute +// transformations on the subsets prior to their being added as destination +// states. The filter operates on a map between a label and the +// corresponding destination subsets. The possibly modified map is +// then used to construct the destination states for arcs exiting state 's'. +// It must define the ordered map type LabelMap and have a default +// and copy constructor. + +// A determinize filter that does not modify its input. +template <class Arc> +struct IdentityDeterminizeFilter { + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef slist< DeterminizeElement<Arc> > Subset; + typedef map<Label, Subset*> LabelMap; + + static uint64 Properties(uint64 props) { return props; } + + void operator()(StateId s, LabelMap *label_map) {} +}; + + +// +// DETERMINIZATION STATE TABLES +// +// The determiziation state table has the form: +// +// template <class Arc> +// class DeterminizeStateTable { +// public: +// typedef typename Arc::StateId StateId; +// typedef DeterminizeElement<Arc> Element; +// typedef slist<Element> Subset; +// +// // Required constuctor +// DeterminizeStateTable(); +// +// // Required copy constructor that does not copy state +// DeterminizeStateTable(const DeterminizeStateTable<A,P> &table); +// +// // Lookup state ID by subset (not depending of the element order). +// // If it doesn't exist, then add it. FindState takes +// // ownership of the subset argument (so that it doesn't have to +// // copy it if it creates a new state). +// StateId FindState(Subset *subset); +// +// // Lookup subset by ID. +// const Subset *FindSubset(StateId id) const; +// }; +// + +// The default determinization state table based on the +// compact hash bi-table. +template <class Arc> +class DefaultDeterminizeStateTable { + public: + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + typedef DeterminizeElement<Arc> Element; + typedef slist<Element> Subset; + + explicit DefaultDeterminizeStateTable(size_t table_size = 0) + : table_size_(table_size), + subsets_(table_size_, new SubsetKey(), new SubsetEqual(&elements_)) { } + + DefaultDeterminizeStateTable(const DefaultDeterminizeStateTable<Arc> &table) + : table_size_(table.table_size_), + subsets_(table_size_, new SubsetKey(), new SubsetEqual(&elements_)) { } + + ~DefaultDeterminizeStateTable() { + for (StateId s = 0; s < subsets_.Size(); ++s) + delete subsets_.FindEntry(s); + } + + // Finds the state corresponding to a subset. Only creates a new + // state if the subset is not found. FindState takes ownership of + // the subset argument (so that it doesn't have to copy it if it + // creates a new state). + StateId FindState(Subset *subset) { + StateId ns = subsets_.Size(); + StateId s = subsets_.FindId(subset); + if (s != ns) delete subset; // subset found + return s; + } + + const Subset* FindSubset(StateId s) { return subsets_.FindEntry(s); } + + private: + // Comparison object for hashing Subset(s). Subsets are not sorted in this + // implementation, so ordering must not be assumed in the equivalence + // test. + class SubsetEqual { + public: + SubsetEqual() { // needed for compilation but should never be called + FSTERROR() << "SubsetEqual: default constructor not implemented"; + } + + // Constructor takes vector needed to check equality. See immediately + // below for constraints on it. + explicit SubsetEqual(vector<Element *> *elements) + : elements_(elements) {} + + // At each call to operator(), the elements_ vector should contain + // only NULLs. When this operator returns, elements_ will still + // have this property. + bool operator()(Subset* subset1, Subset* subset2) const { + if (!subset1 && !subset2) + return true; + if ((subset1 && !subset2) || (!subset1 && subset2)) + return false; + + if (subset1->size() != subset2->size()) + return false; + + // Loads first subset elements in element vector. + for (typename Subset::iterator iter1 = subset1->begin(); + iter1 != subset1->end(); + ++iter1) { + Element &element1 = *iter1; + while (elements_->size() <= element1.state_id) + elements_->push_back(0); + (*elements_)[element1.state_id] = &element1; + } + + // Checks second subset matches first via element vector. + for (typename Subset::iterator iter2 = subset2->begin(); + iter2 != subset2->end(); + ++iter2) { + Element &element2 = *iter2; + while (elements_->size() <= element2.state_id) + elements_->push_back(0); + Element *element1 = (*elements_)[element2.state_id]; + if (!element1 || element1->weight != element2.weight) { + // Mismatch found. Resets element vector before returning false. + for (typename Subset::iterator iter1 = subset1->begin(); + iter1 != subset1->end(); + ++iter1) + (*elements_)[iter1->state_id] = 0; + return false; + } else { + (*elements_)[element2.state_id] = 0; // Clears entry + } + } + return true; + } + private: + vector<Element *> *elements_; + }; + + // Hash function for Subset to Fst states. Subset elements are not + // sorted in this implementation, so the hash must be invariant + // under subset reordering. + class SubsetKey { + public: + size_t operator()(const Subset* subset) const { + size_t hash = 0; + if (subset) { + for (typename Subset::const_iterator iter = subset->begin(); + iter != subset->end(); + ++iter) { + const Element &element = *iter; + int lshift = element.state_id % (CHAR_BIT * sizeof(size_t) - 1) + 1; + int rshift = CHAR_BIT * sizeof(size_t) - lshift; + size_t n = element.state_id; + hash ^= n << lshift ^ n >> rshift ^ element.weight.Hash(); + } + } + return hash; + } + }; + + size_t table_size_; + + typedef CompactHashBiTable<StateId, Subset *, + SubsetKey, SubsetEqual, HS_STL> SubsetTable; + + SubsetTable subsets_; + vector<Element *> elements_; + + void operator=(const DefaultDeterminizeStateTable<Arc> &); // disallow +}; + +// Options for finite-state transducer determinization templated on +// the arc type, common divisor, the determinization filter and the +// state table. DeterminizeFst takes ownership of the determinization +// filter and state table if provided. +template <class Arc, + class D = DefaultCommonDivisor<typename Arc::Weight>, + class F = IdentityDeterminizeFilter<Arc>, + class T = DefaultDeterminizeStateTable<Arc> > +struct DeterminizeFstOptions : CacheOptions { + typedef typename Arc::Label Label; + float delta; // Quantization delta for subset weights + Label subsequential_label; // Label used for residual final output + // when producing subsequential transducers. + F *filter; // Determinization filter + T *state_table; // Determinization state table + + explicit DeterminizeFstOptions(const CacheOptions &opts, + float del = kDelta, Label lab = 0, + F *filt = 0, + T *table = 0) + : CacheOptions(opts), delta(del), subsequential_label(lab), + filter(filt), state_table(table) {} + + explicit DeterminizeFstOptions(float del = kDelta, Label lab = 0, + F *filt = 0, T *table = 0) + : delta(del), subsequential_label(lab), filter(filt), + state_table(table) {} +}; + +// Implementation of delayed DeterminizeFst. This base class is +// common to the variants that implement acceptor and transducer +// determinization. +template <class A> +class DeterminizeFstImplBase : public CacheImpl<A> { + public: + using FstImpl<A>::SetType; + using FstImpl<A>::SetProperties; + using FstImpl<A>::Properties; + using FstImpl<A>::SetInputSymbols; + using FstImpl<A>::SetOutputSymbols; + + using CacheBaseImpl< CacheState<A> >::HasStart; + using CacheBaseImpl< CacheState<A> >::HasFinal; + using CacheBaseImpl< CacheState<A> >::HasArcs; + using CacheBaseImpl< CacheState<A> >::SetFinal; + using CacheBaseImpl< CacheState<A> >::SetStart; + + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef CacheState<A> State; + + template <class D, class F, class T> + DeterminizeFstImplBase(const Fst<A> &fst, + const DeterminizeFstOptions<A, D, F, T> &opts) + : CacheImpl<A>(opts), fst_(fst.Copy()) { + SetType("determinize"); + uint64 iprops = fst.Properties(kFstProperties, false); + uint64 dprops = DeterminizeProperties(iprops, + opts.subsequential_label != 0); + SetProperties(F::Properties(dprops), kCopyProperties); + SetInputSymbols(fst.InputSymbols()); + SetOutputSymbols(fst.OutputSymbols()); + } + + DeterminizeFstImplBase(const DeterminizeFstImplBase<A> &impl) + : CacheImpl<A>(impl), + fst_(impl.fst_->Copy(true)) { + SetType("determinize"); + SetProperties(impl.Properties(), kCopyProperties); + SetInputSymbols(impl.InputSymbols()); + SetOutputSymbols(impl.OutputSymbols()); + } + + virtual ~DeterminizeFstImplBase() { delete fst_; } + + virtual DeterminizeFstImplBase<A> *Copy() = 0; + + StateId Start() { + if (!HasStart()) { + StateId start = ComputeStart(); + if (start != kNoStateId) { + SetStart(start); + } + } + return CacheImpl<A>::Start(); + } + + Weight Final(StateId s) { + if (!HasFinal(s)) { + Weight final = ComputeFinal(s); + SetFinal(s, final); + } + return CacheImpl<A>::Final(s); + } + + virtual void Expand(StateId s) = 0; + + size_t NumArcs(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<A>::NumArcs(s); + } + + size_t NumInputEpsilons(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<A>::NumInputEpsilons(s); + } + + size_t NumOutputEpsilons(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<A>::NumOutputEpsilons(s); + } + + void InitArcIterator(StateId s, ArcIteratorData<A> *data) { + if (!HasArcs(s)) + Expand(s); + CacheImpl<A>::InitArcIterator(s, data); + } + + virtual StateId ComputeStart() = 0; + + virtual Weight ComputeFinal(StateId s) = 0; + + const Fst<A> &GetFst() const { return *fst_; } + + private: + const Fst<A> *fst_; // Input Fst + + void operator=(const DeterminizeFstImplBase<A> &); // disallow +}; + + +// Implementation of delayed determinization for weighted acceptors. +// It is templated on the arc type A and the common divisor D. +template <class A, class D, class F, class T> +class DeterminizeFsaImpl : public DeterminizeFstImplBase<A> { + public: + using FstImpl<A>::SetProperties; + using DeterminizeFstImplBase<A>::GetFst; + using DeterminizeFstImplBase<A>::SetArcs; + + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef DeterminizeElement<A> Element; + typedef slist<Element> Subset; + typedef typename F::LabelMap LabelMap; + + DeterminizeFsaImpl(const Fst<A> &fst, + const vector<Weight> *in_dist, vector<Weight> *out_dist, + const DeterminizeFstOptions<A, D, F, T> &opts) + : DeterminizeFstImplBase<A>(fst, opts), + delta_(opts.delta), + in_dist_(in_dist), + out_dist_(out_dist), + filter_(opts.filter ? opts.filter : new F()), + state_table_(opts.state_table ? opts.state_table : new T()) { + if (!fst.Properties(kAcceptor, true)) { + FSTERROR() << "DeterminizeFst: argument not an acceptor"; + SetProperties(kError, kError); + } + if (!(Weight::Properties() & kLeftSemiring)) { + FSTERROR() << "DeterminizeFst: Weight needs to be left distributive: " + << Weight::Type(); + SetProperties(kError, kError); + } + if (out_dist_) + out_dist_->clear(); + } + + DeterminizeFsaImpl(const DeterminizeFsaImpl<A, D, F, T> &impl) + : DeterminizeFstImplBase<A>(impl), + delta_(impl.delta_), + in_dist_(0), + out_dist_(0), + filter_(new F(*impl.filter_)), + state_table_(new T(*impl.state_table_)) { + if (impl.out_dist_) { + FSTERROR() << "DeterminizeFsaImpl: cannot copy with out_dist vector"; + SetProperties(kError, kError); + } + } + + virtual ~DeterminizeFsaImpl() { + delete filter_; + delete state_table_; + } + + virtual DeterminizeFsaImpl<A, D, F, T> *Copy() { + return new DeterminizeFsaImpl<A, D, F, T>(*this); + } + + uint64 Properties() const { return Properties(kFstProperties); } + + // Set error if found; return FST impl properties. + uint64 Properties(uint64 mask) const { + if ((mask & kError) && (GetFst().Properties(kError, false))) + SetProperties(kError, kError); + return FstImpl<A>::Properties(mask); + } + + virtual StateId ComputeStart() { + StateId s = GetFst().Start(); + if (s == kNoStateId) + return kNoStateId; + Element element(s, Weight::One()); + Subset *subset = new Subset; + subset->push_front(element); + return FindState(subset); + } + + virtual Weight ComputeFinal(StateId s) { + const Subset *subset = state_table_->FindSubset(s); + Weight final = Weight::Zero(); + for (typename Subset::const_iterator siter = subset->begin(); + siter != subset->end(); + ++siter) { + const Element &element = *siter; + final = Plus(final, Times(element.weight, + GetFst().Final(element.state_id))); + if (!final.Member()) + SetProperties(kError, kError); + } + return final; + } + + StateId FindState(Subset *subset) { + StateId s = state_table_->FindState(subset); + if (in_dist_ && out_dist_->size() <= s) + out_dist_->push_back(ComputeDistance(subset)); + return s; + } + + // Compute distance from a state to the final states in the DFA + // given the distances in the NFA. + Weight ComputeDistance(const Subset *subset) { + Weight outd = Weight::Zero(); + for (typename Subset::const_iterator siter = subset->begin(); + siter != subset->end(); ++siter) { + const Element &element = *siter; + Weight ind = element.state_id < in_dist_->size() ? + (*in_dist_)[element.state_id] : Weight::Zero(); + outd = Plus(outd, Times(element.weight, ind)); + } + return outd; + } + + // Computes the outgoing transitions from a state, creating new destination + // states as needed. + virtual void Expand(StateId s) { + + LabelMap label_map; + LabelSubsets(s, &label_map); + + for (typename LabelMap::iterator liter = label_map.begin(); + liter != label_map.end(); + ++liter) + AddArc(s, liter->first, liter->second); + SetArcs(s); + } + + private: + // Constructs destination subsets per label. At return, subset + // element weights include the input automaton label weights and the + // subsets may contain duplicate states. + void LabelSubsets(StateId s, LabelMap *label_map) { + const Subset *src_subset = state_table_->FindSubset(s); + + for (typename Subset::const_iterator siter = src_subset->begin(); + siter != src_subset->end(); + ++siter) { + const Element &src_element = *siter; + for (ArcIterator< Fst<A> > aiter(GetFst(), src_element.state_id); + !aiter.Done(); + aiter.Next()) { + const A &arc = aiter.Value(); + Element dest_element(arc.nextstate, + Times(src_element.weight, arc.weight)); + + // The LabelMap may be a e.g. multimap with more complex + // determinization filters, so we insert efficiently w/o using []. + typename LabelMap::iterator liter = label_map->lower_bound(arc.ilabel); + Subset* dest_subset; + if (liter == label_map->end() || liter->first != arc.ilabel) { + dest_subset = new Subset; + label_map->insert(liter, make_pair(arc.ilabel, dest_subset)); + } else { + dest_subset = liter->second; + } + + dest_subset->push_front(dest_element); + } + } + // Applies the determinization filter + (*filter_)(s, label_map); + } + + // Adds an arc from state S to the destination state associated + // with subset DEST_SUBSET (as created by LabelSubsets). + void AddArc(StateId s, Label label, Subset *dest_subset) { + A arc; + arc.ilabel = label; + arc.olabel = label; + arc.weight = Weight::Zero(); + + typename Subset::iterator oiter; + for (typename Subset::iterator diter = dest_subset->begin(); + diter != dest_subset->end();) { + Element &dest_element = *diter; + // Computes label weight. + arc.weight = common_divisor_(arc.weight, dest_element.weight); + + while (elements_.size() <= dest_element.state_id) + elements_.push_back(0); + Element *matching_element = elements_[dest_element.state_id]; + if (matching_element) { + // Found duplicate state: sums state weight and deletes dup. + matching_element->weight = Plus(matching_element->weight, + dest_element.weight); + if (!matching_element->weight.Member()) + SetProperties(kError, kError); + ++diter; + dest_subset->erase_after(oiter); + } else { + // Saves element so we can check for duplicate for this state. + elements_[dest_element.state_id] = &dest_element; + oiter = diter; + ++diter; + } + } + + // Divides out label weight from destination subset elements. + // Quantizes to ensure comparisons are effective. + // Clears element vector. + for (typename Subset::iterator diter = dest_subset->begin(); + diter != dest_subset->end(); + ++diter) { + Element &dest_element = *diter; + dest_element.weight = Divide(dest_element.weight, arc.weight, + DIVIDE_LEFT); + dest_element.weight = dest_element.weight.Quantize(delta_); + elements_[dest_element.state_id] = 0; + } + + arc.nextstate = FindState(dest_subset); + CacheImpl<A>::PushArc(s, arc); + } + + float delta_; // Quantization delta for subset weights + const vector<Weight> *in_dist_; // Distance to final NFA states + vector<Weight> *out_dist_; // Distance to final DFA states + + D common_divisor_; + F *filter_; + T *state_table_; + + vector<Element *> elements_; + + void operator=(const DeterminizeFsaImpl<A, D, F, T> &); // disallow +}; + + +// Implementation of delayed determinization for transducers. +// Transducer determinization is implemented by mapping the input to +// the Gallic semiring as an acceptor whose weights contain the output +// strings and using acceptor determinization above to determinize +// that acceptor. +template <class A, StringType S, class D, class F, class T> +class DeterminizeFstImpl : public DeterminizeFstImplBase<A> { + public: + using FstImpl<A>::SetProperties; + using DeterminizeFstImplBase<A>::GetFst; + using CacheBaseImpl< CacheState<A> >::GetCacheGc; + using CacheBaseImpl< CacheState<A> >::GetCacheLimit; + + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + + typedef ToGallicMapper<A, S> ToMapper; + typedef FromGallicMapper<A, S> FromMapper; + + typedef typename ToMapper::ToArc ToArc; + typedef ArcMapFst<A, ToArc, ToMapper> ToFst; + typedef ArcMapFst<ToArc, A, FromMapper> FromFst; + + typedef GallicCommonDivisor<Label, Weight, S, D> CommonDivisor; + typedef GallicFactor<Label, Weight, S> FactorIterator; + + DeterminizeFstImpl(const Fst<A> &fst, + const DeterminizeFstOptions<A, D, F, T> &opts) + : DeterminizeFstImplBase<A>(fst, opts), + delta_(opts.delta), + subsequential_label_(opts.subsequential_label) { + Init(GetFst()); + } + + DeterminizeFstImpl(const DeterminizeFstImpl<A, S, D, F, T> &impl) + : DeterminizeFstImplBase<A>(impl), + delta_(impl.delta_), + subsequential_label_(impl.subsequential_label_) { + Init(GetFst()); + } + + ~DeterminizeFstImpl() { delete from_fst_; } + + virtual DeterminizeFstImpl<A, S, D, F, T> *Copy() { + return new DeterminizeFstImpl<A, S, D, F, T>(*this); + } + + uint64 Properties() const { return Properties(kFstProperties); } + + // Set error if found; return FST impl properties. + uint64 Properties(uint64 mask) const { + if ((mask & kError) && (GetFst().Properties(kError, false) || + from_fst_->Properties(kError, false))) + SetProperties(kError, kError); + return FstImpl<A>::Properties(mask); + } + + virtual StateId ComputeStart() { return from_fst_->Start(); } + + virtual Weight ComputeFinal(StateId s) { return from_fst_->Final(s); } + + virtual void Expand(StateId s) { + for (ArcIterator<FromFst> aiter(*from_fst_, s); + !aiter.Done(); + aiter.Next()) + CacheImpl<A>::PushArc(s, aiter.Value()); + CacheImpl<A>::SetArcs(s); + } + + private: + // Initialization of transducer determinization implementation, which + // is defined after DeterminizeFst since it calls it. + void Init(const Fst<A> &fst); + + float delta_; + Label subsequential_label_; + FromFst *from_fst_; + + void operator=(const DeterminizeFstImpl<A, S, D, F, T> &); // disallow +}; + + +// Determinizes a weighted transducer. This version is a delayed +// Fst. The result will be an equivalent FST that has the property +// that no state has two transitions with the same input label. +// For this algorithm, epsilon transitions are treated as regular +// symbols (cf. RmEpsilon). +// +// The transducer must be functional. The weights must be (weakly) +// left divisible (valid for TropicalWeight and LogWeight for instance) +// and be zero-sum-free if for all a,b: (Plus(a, b) = 0 => a = b = 0. +// +// Complexity: +// - Determinizable: exponential (polynomial in the size of the output) +// - Non-determinizable) does not terminate +// +// The determinizable automata include all unweighted and all acyclic input. +// +// References: +// - Mehryar Mohri, "Finite-State Transducers in Language and Speech +// Processing". Computational Linguistics, 23:2, 1997. +// +// This class attaches interface to implementation and handles +// reference counting, delegating most methods to ImplToFst. +template <class A> +class DeterminizeFst : public ImplToFst< DeterminizeFstImplBase<A> > { + public: + friend class ArcIterator< DeterminizeFst<A> >; + friend class StateIterator< DeterminizeFst<A> >; + template <class B, StringType S, class D, class F, class T> + friend class DeterminizeFstImpl; + + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef CacheState<A> State; + typedef DeterminizeFstImplBase<A> Impl; + + using ImplToFst<Impl>::SetImpl; + + explicit DeterminizeFst(const Fst<A> &fst) { + typedef DefaultCommonDivisor<Weight> D; + typedef IdentityDeterminizeFilter<A> F; + typedef DefaultDeterminizeStateTable<A> T; + DeterminizeFstOptions<A, D, F, T> opts; + if (fst.Properties(kAcceptor, true)) { + // Calls implementation for acceptors. + SetImpl(new DeterminizeFsaImpl<A, D, F, T>(fst, 0, 0, opts)); + } else { + // Calls implementation for transducers. + SetImpl(new + DeterminizeFstImpl<A, STRING_LEFT_RESTRICT, D, F, T>(fst, opts)); + } + } + + template <class D, class F, class T> + DeterminizeFst(const Fst<A> &fst, + const DeterminizeFstOptions<A, D, F, T> &opts) { + if (fst.Properties(kAcceptor, true)) { + // Calls implementation for acceptors. + SetImpl(new DeterminizeFsaImpl<A, D, F, T>(fst, 0, 0, opts)); + } else { + // Calls implementation for transducers. + SetImpl(new + DeterminizeFstImpl<A, STRING_LEFT_RESTRICT, D, F, T>(fst, opts)); + } + } + + // This acceptor-only version additionally computes the distance to + // final states in the output if provided with those distances for the + // input. Useful for e.g. unique N-shortest paths. + template <class D, class F, class T> + DeterminizeFst(const Fst<A> &fst, + const vector<Weight> *in_dist, vector<Weight> *out_dist, + const DeterminizeFstOptions<A, D, F, T> &opts) { + if (!fst.Properties(kAcceptor, true)) { + FSTERROR() << "DeterminizeFst:" + << " distance to final states computed for acceptors only"; + GetImpl()->SetProperties(kError, kError); + } + SetImpl(new DeterminizeFsaImpl<A, D, F, T>(fst, in_dist, out_dist, opts)); + } + + // See Fst<>::Copy() for doc. + DeterminizeFst(const DeterminizeFst<A> &fst, bool safe = false) { + if (safe) + SetImpl(fst.GetImpl()->Copy()); + else + SetImpl(fst.GetImpl(), false); + } + + // Get a copy of this DeterminizeFst. See Fst<>::Copy() for further doc. + virtual DeterminizeFst<A> *Copy(bool safe = false) const { + return new DeterminizeFst<A>(*this, safe); + } + + virtual inline void InitStateIterator(StateIteratorData<A> *data) const; + + virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { + GetImpl()->InitArcIterator(s, data); + } + + private: + // Makes visible to friends. + Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } + + void operator=(const DeterminizeFst<A> &fst); // Disallow +}; + + +// Initialization of transducer determinization implementation. which +// is defined after DeterminizeFst since it calls it. +template <class A, StringType S, class D, class F, class T> +void DeterminizeFstImpl<A, S, D, F, T>::Init(const Fst<A> &fst) { + // Mapper to an acceptor. + ToFst to_fst(fst, ToMapper()); + + // Determinizes acceptor. + // This recursive call terminates since it passes the common divisor + // to a private constructor. + CacheOptions copts(GetCacheGc(), GetCacheLimit()); + DeterminizeFstOptions<ToArc, CommonDivisor> dopts(copts, delta_); + // Uses acceptor-only constructor to avoid template recursion + DeterminizeFst<ToArc> det_fsa(to_fst, 0, 0, dopts); + + // Mapper back to transducer. + FactorWeightOptions<ToArc> fopts(CacheOptions(true, 0), delta_, + kFactorFinalWeights, + subsequential_label_, + subsequential_label_); + FactorWeightFst<ToArc, FactorIterator> factored_fst(det_fsa, fopts); + from_fst_ = new FromFst(factored_fst, FromMapper(subsequential_label_)); +} + + +// Specialization for DeterminizeFst. +template <class A> +class StateIterator< DeterminizeFst<A> > + : public CacheStateIterator< DeterminizeFst<A> > { + public: + explicit StateIterator(const DeterminizeFst<A> &fst) + : CacheStateIterator< DeterminizeFst<A> >(fst, fst.GetImpl()) {} +}; + + +// Specialization for DeterminizeFst. +template <class A> +class ArcIterator< DeterminizeFst<A> > + : public CacheArcIterator< DeterminizeFst<A> > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const DeterminizeFst<A> &fst, StateId s) + : CacheArcIterator< DeterminizeFst<A> >(fst.GetImpl(), s) { + if (!fst.GetImpl()->HasArcs(s)) + fst.GetImpl()->Expand(s); + } + + private: + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + + +template <class A> inline +void DeterminizeFst<A>::InitStateIterator(StateIteratorData<A> *data) const +{ + data->base = new StateIterator< DeterminizeFst<A> >(*this); +} + + +// Useful aliases when using StdArc. +typedef DeterminizeFst<StdArc> StdDeterminizeFst; + + +template <class Arc> +struct DeterminizeOptions { + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + typedef typename Arc::Label Label; + + float delta; // Quantization delta for subset weights. + Weight weight_threshold; // Pruning weight threshold. + StateId state_threshold; // Pruning state threshold. + Label subsequential_label; // Label used for residual final output + // when producing subsequential transducers. + + explicit DeterminizeOptions(float d = kDelta, Weight w = Weight::Zero(), + StateId n = kNoStateId, Label l = 0) + : delta(d), weight_threshold(w), state_threshold(n), + subsequential_label(l) {} +}; + + +// Determinizes a weighted transducer. This version writes the +// determinized Fst to an output MutableFst. The result will be an +// equivalent FST that has the property that no state has two +// transitions with the same input label. For this algorithm, epsilon +// transitions are treated as regular symbols (cf. RmEpsilon). +// +// The transducer must be functional. The weights must be (weakly) +// left divisible (valid for TropicalWeight and LogWeight). +// +// Complexity: +// - Determinizable: exponential (polynomial in the size of the output) +// - Non-determinizable: does not terminate +// +// The determinizable automata include all unweighted and all acyclic input. +// +// References: +// - Mehryar Mohri, "Finite-State Transducers in Language and Speech +// Processing". Computational Linguistics, 23:2, 1997. +template <class Arc> +void Determinize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst, + const DeterminizeOptions<Arc> &opts + = DeterminizeOptions<Arc>()) { + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + + DeterminizeFstOptions<Arc> nopts; + nopts.delta = opts.delta; + nopts.subsequential_label = opts.subsequential_label; + + nopts.gc_limit = 0; // Cache only the last state for fastest copy. + + if (opts.weight_threshold != Weight::Zero() || + opts.state_threshold != kNoStateId) { + if (ifst.Properties(kAcceptor, false)) { + vector<Weight> idistance, odistance; + ShortestDistance(ifst, &idistance, true); + DeterminizeFst<Arc> dfst(ifst, &idistance, &odistance, nopts); + PruneOptions< Arc, AnyArcFilter<Arc> > popts(opts.weight_threshold, + opts.state_threshold, + AnyArcFilter<Arc>(), + &odistance); + Prune(dfst, ofst, popts); + } else { + *ofst = DeterminizeFst<Arc>(ifst, nopts); + Prune(ofst, opts.weight_threshold, opts.state_threshold); + } + } else { + *ofst = DeterminizeFst<Arc>(ifst, nopts); + } +} + + +} // namespace fst + +#endif // FST_LIB_DETERMINIZE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/dfs-visit.h b/kaldi_io/src/tools/openfst/include/fst/dfs-visit.h new file mode 100644 index 0000000..4d93a39 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/dfs-visit.h @@ -0,0 +1,205 @@ +// dfs-visit.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Depth-first search visitation. See visit.h for more general +// search queue disciplines. + +#ifndef FST_LIB_DFS_VISIT_H__ +#define FST_LIB_DFS_VISIT_H__ + +#include <stack> +#include <vector> +using std::vector; + +#include <fst/arcfilter.h> +#include <fst/fst.h> + + +namespace fst { + +// Visitor Interface - class determines actions taken during a Dfs. +// If any of the boolean member functions return false, the DFS is +// aborted by first calling FinishState() on all currently grey states +// and then calling FinishVisit(). +// +// Note this is similar to the more general visitor interface in visit.h +// except that FinishState returns additional information appropriate only for +// a DFS and some methods names here are better suited to a DFS. +// +// template <class Arc> +// class Visitor { +// public: +// typedef typename Arc::StateId StateId; +// +// Visitor(T *return_data); +// // Invoked before DFS visit +// void InitVisit(const Fst<Arc> &fst); +// // Invoked when state discovered (2nd arg is DFS tree root) +// bool InitState(StateId s, StateId root); +// // Invoked when tree arc examined (to white/undiscovered state) +// bool TreeArc(StateId s, const Arc &a); +// // Invoked when back arc examined (to grey/unfinished state) +// bool BackArc(StateId s, const Arc &a); +// // Invoked when forward or cross arc examined (to black/finished state) +// bool ForwardOrCrossArc(StateId s, const Arc &a); +// // Invoked when state finished (PARENT is kNoStateID and ARC == NULL +// // when S is tree root) +// void FinishState(StateId s, StateId parent, const Arc *parent_arc); +// // Invoked after DFS visit +// void FinishVisit(); +// }; + +// An Fst state's DFS status +const int kDfsWhite = 0; // Undiscovered +const int kDfsGrey = 1; // Discovered & unfinished +const int kDfsBlack = 2; // Finished + +// An Fst state's DFS stack state +template <class Arc> +struct DfsState { + typedef typename Arc::StateId StateId; + + DfsState(const Fst<Arc> &fst, StateId s): state_id(s), arc_iter(fst, s) {} + + StateId state_id; // Fst state ... + ArcIterator< Fst<Arc> > arc_iter; // and its corresponding arcs +}; + + +// Performs depth-first visitation. Visitor class argument determines +// actions and contains any return data. ArcFilter determines arcs +// that are considered. +// +// Note this is similar to Visit() in visit.h called with a LIFO +// queue except this version has a Visitor class specialized and +// augmented for a DFS. +template <class Arc, class V, class ArcFilter> +void DfsVisit(const Fst<Arc> &fst, V *visitor, ArcFilter filter) { + typedef typename Arc::StateId StateId; + + visitor->InitVisit(fst); + + StateId start = fst.Start(); + if (start == kNoStateId) { + visitor->FinishVisit(); + return; + } + + vector<char> state_color; // Fst state DFS status + stack<DfsState<Arc> *> state_stack; // DFS execution stack + + StateId nstates = start + 1; // # of known states in general case + bool expanded = false; + if (fst.Properties(kExpanded, false)) { // tests if expanded case, then + nstates = CountStates(fst); // uses ExpandedFst::NumStates(). + expanded = true; + } + + state_color.resize(nstates, kDfsWhite); + StateIterator< Fst<Arc> > siter(fst); + + // Continue DFS while true + bool dfs = true; + + // Iterate over trees in DFS forest. + for (StateId root = start; dfs && root < nstates;) { + state_color[root] = kDfsGrey; + state_stack.push(new DfsState<Arc>(fst, root)); + dfs = visitor->InitState(root, root); + while (!state_stack.empty()) { + DfsState<Arc> *dfs_state = state_stack.top(); + StateId s = dfs_state->state_id; + if (s >= state_color.size()) { + nstates = s + 1; + state_color.resize(nstates, kDfsWhite); + } + ArcIterator< Fst<Arc> > &aiter = dfs_state->arc_iter; + if (!dfs || aiter.Done()) { + state_color[s] = kDfsBlack; + delete dfs_state; + state_stack.pop(); + if (!state_stack.empty()) { + DfsState<Arc> *parent_state = state_stack.top(); + StateId p = parent_state->state_id; + ArcIterator< Fst<Arc> > &piter = parent_state->arc_iter; + visitor->FinishState(s, p, &piter.Value()); + piter.Next(); + } else { + visitor->FinishState(s, kNoStateId, 0); + } + continue; + } + const Arc &arc = aiter.Value(); + if (arc.nextstate >= state_color.size()) { + nstates = arc.nextstate + 1; + state_color.resize(nstates, kDfsWhite); + } + if (!filter(arc)) { + aiter.Next(); + continue; + } + int next_color = state_color[arc.nextstate]; + switch (next_color) { + default: + case kDfsWhite: + dfs = visitor->TreeArc(s, arc); + if (!dfs) break; + state_color[arc.nextstate] = kDfsGrey; + state_stack.push(new DfsState<Arc>(fst, arc.nextstate)); + dfs = visitor->InitState(arc.nextstate, root); + break; + case kDfsGrey: + dfs = visitor->BackArc(s, arc); + aiter.Next(); + break; + case kDfsBlack: + dfs = visitor->ForwardOrCrossArc(s, arc); + aiter.Next(); + break; + } + } + + // Find next tree root + for (root = root == start ? 0 : root + 1; + root < nstates && state_color[root] != kDfsWhite; + ++root) { + } + + // Check for a state beyond the largest known state + if (!expanded && root == nstates) { + for (; !siter.Done(); siter.Next()) { + if (siter.Value() == nstates) { + ++nstates; + state_color.push_back(kDfsWhite); + break; + } + } + } + } + visitor->FinishVisit(); +} + + +template <class Arc, class V> +void DfsVisit(const Fst<Arc> &fst, V *visitor) { + DfsVisit(fst, visitor, AnyArcFilter<Arc>()); +} + +} // namespace fst + +#endif // FST_LIB_DFS_VISIT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/difference.h b/kaldi_io/src/tools/openfst/include/fst/difference.h new file mode 100644 index 0000000..8a3306f --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/difference.h @@ -0,0 +1,189 @@ +// difference.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Class to compute the difference between two FSAs + +#ifndef FST_LIB_DIFFERENCE_H__ +#define FST_LIB_DIFFERENCE_H__ + +#include <vector> +using std::vector; +#include <algorithm> + +#include <fst/cache.h> +#include <fst/compose.h> +#include <fst/complement.h> + + +namespace fst { + +template <class A, + class M = Matcher<Fst<A> >, + class F = SequenceComposeFilter<M>, + class T = GenericComposeStateTable<A, typename F::FilterState> > +struct DifferenceFstOptions : public ComposeFstOptions<A, M, F, T> { + explicit DifferenceFstOptions(const CacheOptions &opts, + M *mat1 = 0, M *mat2 = 0, + F *filt = 0, T *sttable= 0) + : ComposeFstOptions<A, M, F, T>(mat1, mat2, filt, sttable) { } + + DifferenceFstOptions() {} +}; + +// Computes the difference between two FSAs. This version is a delayed +// Fst. Only strings that are in the first automaton but not in second +// are retained in the result. +// +// The first argument must be an acceptor; the second argument must be +// an unweighted, epsilon-free, deterministic acceptor. One of the +// arguments must be label-sorted. +// +// Complexity: same as ComposeFst. +// +// Caveats: same as ComposeFst. +template <class A> +class DifferenceFst : public ComposeFst<A> { + public: + using ImplToFst< ComposeFstImplBase<A> >::SetImpl; + using ImplToFst< ComposeFstImplBase<A> >::GetImpl; + + using ComposeFst<A>::CreateBase1; + + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + + // A - B = A ^ B'. + DifferenceFst(const Fst<A> &fst1, const Fst<A> &fst2, + const CacheOptions &opts = CacheOptions()) { + typedef RhoMatcher< Matcher<Fst<A> > > R; + + ComplementFst<A> cfst(fst2); + ComposeFstOptions<A, R> copts(CacheOptions(), + new R(fst1, MATCH_NONE), + new R(cfst, MATCH_INPUT, + ComplementFst<A>::kRhoLabel)); + SetImpl(CreateBase1(fst1, cfst, copts)); + + if (!fst1.Properties(kAcceptor, true)) { + FSTERROR() << "DifferenceFst: 1st argument not an acceptor"; + GetImpl()->SetProperties(kError, kError); + } + } + + template <class M, class F, class T> + DifferenceFst(const Fst<A> &fst1, const Fst<A> &fst2, + const DifferenceFstOptions<A, M, F, T> &opts) { + typedef RhoMatcher<M> R; + + ComplementFst<A> cfst(fst2); + ComposeFstOptions<A, R> copts(opts); + copts.matcher1 = new R(fst1, MATCH_NONE, kNoLabel, MATCHER_REWRITE_ALWAYS, + opts.matcher1); + copts.matcher2 = new R(cfst, MATCH_INPUT, ComplementFst<A>::kRhoLabel, + MATCHER_REWRITE_ALWAYS, opts.matcher2); + + SetImpl(CreateBase1(fst1, cfst, copts)); + + if (!fst1.Properties(kAcceptor, true)) { + FSTERROR() << "DifferenceFst: 1st argument not an acceptor"; + GetImpl()->SetProperties(kError, kError); + } + } + + // See Fst<>::Copy() for doc. + DifferenceFst(const DifferenceFst<A> &fst, bool safe = false) + : ComposeFst<A>(fst, safe) {} + + // Get a copy of this DifferenceFst. See Fst<>::Copy() for further doc. + virtual DifferenceFst<A> *Copy(bool safe = false) const { + return new DifferenceFst<A>(*this, safe); + } +}; + + +// Specialization for DifferenceFst. +template <class A> +class StateIterator< DifferenceFst<A> > + : public StateIterator< ComposeFst<A> > { + public: + explicit StateIterator(const DifferenceFst<A> &fst) + : StateIterator< ComposeFst<A> >(fst) {} +}; + + +// Specialization for DifferenceFst. +template <class A> +class ArcIterator< DifferenceFst<A> > + : public ArcIterator< ComposeFst<A> > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const DifferenceFst<A> &fst, StateId s) + : ArcIterator< ComposeFst<A> >(fst, s) {} +}; + +// Useful alias when using StdArc. +typedef DifferenceFst<StdArc> StdDifferenceFst; + + +typedef ComposeOptions DifferenceOptions; + + +// Computes the difference between two FSAs. This version is writes +// the difference to an output MutableFst. Only strings that are in +// the first automaton but not in second are retained in the result. +// +// The first argument must be an acceptor; the second argument must be +// an unweighted, epsilon-free, deterministic acceptor. One of the +// arguments must be label-sorted. +// +// Complexity: same as Compose. +// +// Caveats: same as Compose. +template<class Arc> +void Difference(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2, + MutableFst<Arc> *ofst, + const DifferenceOptions &opts = DifferenceOptions()) { + typedef Matcher< Fst<Arc> > M; + + if (opts.filter_type == AUTO_FILTER) { + CacheOptions nopts; + nopts.gc_limit = 0; // Cache only the last state for fastest copy. + *ofst = DifferenceFst<Arc>(ifst1, ifst2, nopts); + } else if (opts.filter_type == SEQUENCE_FILTER) { + DifferenceFstOptions<Arc> dopts; + dopts.gc_limit = 0; // Cache only the last state for fastest copy. + *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts); + } else if (opts.filter_type == ALT_SEQUENCE_FILTER) { + DifferenceFstOptions<Arc, M, AltSequenceComposeFilter<M> > dopts; + dopts.gc_limit = 0; // Cache only the last state for fastest copy. + *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts); + } else if (opts.filter_type == MATCH_FILTER) { + DifferenceFstOptions<Arc, M, MatchComposeFilter<M> > dopts; + dopts.gc_limit = 0; // Cache only the last state for fastest copy. + *ofst = DifferenceFst<Arc>(ifst1, ifst2, dopts); + } + + if (opts.connect) + Connect(ofst); +} + +} // namespace fst + +#endif // FST_LIB_DIFFERENCE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/edit-fst.h b/kaldi_io/src/tools/openfst/include/fst/edit-fst.h new file mode 100644 index 0000000..bd33b9d --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/edit-fst.h @@ -0,0 +1,779 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Dan Bikel) +// +// An \ref Fst implementation that allows non-destructive edit operations on an +// existing fst. + +#ifndef FST_LIB_EDIT_FST_H_ +#define FST_LIB_EDIT_FST_H_ + +#include <vector> +using std::vector; + +#include <fst/cache.h> + +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; + +namespace fst { + +// The EditFst class enables non-destructive edit operations on a wrapped +// ExpandedFst. The implementation uses copy-on-write semantics at the node +// level: if a user has an underlying fst on which he or she wants to perform a +// relatively small number of edits (read: mutations), then this implementation +// will copy the edited node to an internal MutableFst and perform any edits in +// situ on that copied node. This class supports all the methods of MutableFst +// except for DeleteStates(const vector<StateId> &); thus, new nodes may also be +// added, and one may add transitions from existing nodes of the wrapped fst to +// new nodes. +// +// N.B.: The documentation for Fst::Copy(true) says that its behavior is +// undefined if invoked on an fst that has already been accessed. This class +// requires that the Fst implementation it wraps provides consistent, reliable +// behavior when its Copy(true) method is invoked, where consistent means +// the graph structure, graph properties and state numbering and do not change. +// VectorFst and CompactFst, for example, are both well-behaved in this regard. + +// The EditFstData class is a container for all mutable data for EditFstImpl; +// also, this class provides most of the actual implementation of what EditFst +// does (that is, most of EditFstImpl's methods delegate to methods in this, the +// EditFstData class). Instances of this class are reference-counted and can be +// shared between otherwise independent EditFstImpl instances. This scheme +// allows EditFstImpl to implement the thread-safe, copy-on-write semantics +// required by Fst::Copy(true). +// +// template parameters: +// A the type of arc to use +// WrappedFstT the type of fst wrapped by the EditFst instance that +// this EditFstData instance is backing +// MutableFstT the type of mutable fst to use internally for edited states; +// crucially, MutableFstT::Copy(false) *must* yield an fst that is +// thread-safe for reading (VectorFst, for example, has this property) +template <typename A, + typename WrappedFstT = ExpandedFst<A>, + typename MutableFstT = VectorFst<A> > +class EditFstData { + public: + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef typename unordered_map<StateId, StateId>::const_iterator + IdMapIterator; + typedef typename unordered_map<StateId, Weight>::const_iterator + FinalWeightIterator; + + + EditFstData() : num_new_states_(0) { + SetEmptyAndDeleteKeysForInternalMaps(); + } + + EditFstData(const EditFstData &other) : + edits_(other.edits_), + external_to_internal_ids_(other.external_to_internal_ids_), + edited_final_weights_(other.edited_final_weights_), + num_new_states_(other.num_new_states_) { + } + + ~EditFstData() { + } + + static EditFstData<A, WrappedFstT, MutableFstT> *Read(istream &strm, + const FstReadOptions &opts); + + bool Write(ostream &strm, const FstWriteOptions &opts) const { + // Serialize all private data members of this class. + FstWriteOptions edits_opts(opts); + edits_opts.write_header = true; // Force writing contained header. + edits_.Write(strm, edits_opts); + WriteType(strm, external_to_internal_ids_); + WriteType(strm, edited_final_weights_); + WriteType(strm, num_new_states_); + if (!strm) { + LOG(ERROR) << "EditFstData::Write: write failed: " << opts.source; + return false; + } + return true; + } + + int RefCount() const { return ref_count_.count(); } + int IncrRefCount() { return ref_count_.Incr(); } + int DecrRefCount() { return ref_count_.Decr(); } + + StateId NumNewStates() const { + return num_new_states_; + } + + // accessor methods for the fst holding edited states + StateId EditedStart() const { + return edits_.Start(); + } + + Weight Final(StateId s, const WrappedFstT *wrapped) const { + FinalWeightIterator final_weight_it = GetFinalWeightIterator(s); + if (final_weight_it == NotInFinalWeightMap()) { + IdMapIterator it = GetEditedIdMapIterator(s); + return it == NotInEditedMap() ? + wrapped->Final(s) : edits_.Final(it->second); + } + else { + return final_weight_it->second; + } + } + + size_t NumArcs(StateId s, const WrappedFstT *wrapped) const { + IdMapIterator it = GetEditedIdMapIterator(s); + return it == NotInEditedMap() ? + wrapped->NumArcs(s) : edits_.NumArcs(it->second); + } + + size_t NumInputEpsilons(StateId s, const WrappedFstT *wrapped) const { + IdMapIterator it = GetEditedIdMapIterator(s); + return it == NotInEditedMap() ? + wrapped->NumInputEpsilons(s) : + edits_.NumInputEpsilons(it->second); + } + + size_t NumOutputEpsilons(StateId s, const WrappedFstT *wrapped) const { + IdMapIterator it = GetEditedIdMapIterator(s); + return it == NotInEditedMap() ? + wrapped->NumOutputEpsilons(s) : + edits_.NumOutputEpsilons(it->second); + } + + void SetEditedProperties(uint64 props, uint64 mask) { + edits_.SetProperties(props, mask); + } + + // non-const MutableFst operations + + // Sets the start state for this fst. + void SetStart(StateId s) { + edits_.SetStart(s); + } + + // Sets the final state for this fst. + Weight SetFinal(StateId s, Weight w, const WrappedFstT *wrapped) { + Weight old_weight = Final(s, wrapped); + IdMapIterator it = GetEditedIdMapIterator(s); + // if we haven't already edited state s, don't add it to edited_ (which can + // be expensive if s has many transitions); just use the + // edited_final_weights_ map + if (it == NotInEditedMap()) { + edited_final_weights_[s] = w; + } + else { + edits_.SetFinal(GetEditableInternalId(s, wrapped), w); + } + return old_weight; + } + + // Adds a new state to this fst, initially with no arcs. + StateId AddState(StateId curr_num_states) { + StateId internal_state_id = edits_.AddState(); + StateId external_state_id = curr_num_states; + external_to_internal_ids_[external_state_id] = internal_state_id; + num_new_states_++; + return external_state_id; + } + + // Adds the specified arc to the specified state of this fst. + const A *AddArc(StateId s, const Arc &arc, const WrappedFstT *wrapped) { + StateId internal_id = GetEditableInternalId(s, wrapped); + + size_t num_arcs = edits_.NumArcs(internal_id); + ArcIterator<MutableFstT> arc_it(edits_, internal_id); + const A *prev_arc = NULL; + if (num_arcs > 0) { + // grab the final arc associated with this state in edits_ + arc_it.Seek(num_arcs - 1); + prev_arc = &(arc_it.Value()); + } + edits_.AddArc(internal_id, arc); + return prev_arc; + } + + void DeleteStates() { + edits_.DeleteStates(); + num_new_states_ = 0; + external_to_internal_ids_.clear(); + edited_final_weights_.clear(); + } + + // Removes all but the first n outgoing arcs of the specified state. + void DeleteArcs(StateId s, size_t n, const WrappedFstT *wrapped) { + edits_.DeleteArcs(GetEditableInternalId(s, wrapped), n); + } + + // Removes all outgoing arcs from the specified state. + void DeleteArcs(StateId s, const WrappedFstT *wrapped) { + edits_.DeleteArcs(GetEditableInternalId(s, wrapped)); + } + + // end methods for non-const MutableFst operations + + // Provides information for the generic arc iterator. + void InitArcIterator(StateId s, ArcIteratorData<Arc> *data, + const WrappedFstT *wrapped) const { + IdMapIterator id_map_it = GetEditedIdMapIterator(s); + if (id_map_it == NotInEditedMap()) { + VLOG(3) << "EditFstData::InitArcIterator: iterating on state " + << s << " of original fst"; + wrapped->InitArcIterator(s, data); + } else { + VLOG(2) << "EditFstData::InitArcIterator: iterating on edited state " + << s << " (internal state id: " << id_map_it->second << ")"; + edits_.InitArcIterator(id_map_it->second, data); + } + } + + // Provides information for the generic mutable arc iterator. + void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data, + const WrappedFstT *wrapped) { + data->base = + new MutableArcIterator<MutableFstT>(&edits_, + GetEditableInternalId(s, wrapped)); + } + + // Prints out the map from external to internal state id's (for debugging + // purposes). + void PrintMap() { + for (IdMapIterator map_it = external_to_internal_ids_.begin(); + map_it != NotInEditedMap(); ++map_it) { + LOG(INFO) << "(external,internal)=(" + << map_it->first << "," << map_it->second << ")"; + } + } + + + private: + void SetEmptyAndDeleteKeysForInternalMaps() { + } + + // Returns the iterator of the map from external to internal state id's + // of edits_ for the specified external state id. + IdMapIterator GetEditedIdMapIterator(StateId s) const { + return external_to_internal_ids_.find(s); + } + IdMapIterator NotInEditedMap() const { + return external_to_internal_ids_.end(); + } + + FinalWeightIterator GetFinalWeightIterator(StateId s) const { + return edited_final_weights_.find(s); + } + FinalWeightIterator NotInFinalWeightMap() const { + return edited_final_weights_.end(); + } + + // Returns the internal state id of the specified external id if the state has + // already been made editable, or else copies the state from wrapped_ + // to edits_ and returns the state id of the newly editable state in edits_. + // + // \return makes the specified state editable if it isn't already and returns + // its state id in edits_ + StateId GetEditableInternalId(StateId s, const WrappedFstT *wrapped) { + IdMapIterator id_map_it = GetEditedIdMapIterator(s); + if (id_map_it == NotInEditedMap()) { + StateId new_internal_id = edits_.AddState(); + VLOG(2) << "EditFstData::GetEditableInternalId: editing state " << s + << " of original fst; new internal state id:" << new_internal_id; + external_to_internal_ids_[s] = new_internal_id; + for (ArcIterator< Fst<A> > arc_iterator(*wrapped, s); + !arc_iterator.Done(); + arc_iterator.Next()) { + edits_.AddArc(new_internal_id, arc_iterator.Value()); + } + // copy the final weight + FinalWeightIterator final_weight_it = GetFinalWeightIterator(s); + if (final_weight_it == NotInFinalWeightMap()) { + edits_.SetFinal(new_internal_id, wrapped->Final(s)); + } else { + edits_.SetFinal(new_internal_id, final_weight_it->second); + edited_final_weights_.erase(s); + } + return new_internal_id; + } else { + return id_map_it->second; + } + } + + // A mutable fst (by default, a VectorFst) to contain new states, and/or + // copies of states from a wrapped ExpandedFst that have been modified in + // some way. + MutableFstT edits_; + // A mapping from external state id's to the internal id's of states that + // appear in edits_. + unordered_map<StateId, StateId> external_to_internal_ids_; + // A mapping from external state id's to final state weights assigned to + // those states. The states in this map are *only* those whose final weight + // has been modified; if any other part of the state has been modified, + // the entire state is copied to edits_, and all modifications reside there. + unordered_map<StateId, Weight> edited_final_weights_; + // The number of new states added to this mutable fst impl, which is <= the + // number of states in edits_ (since edits_ contains both edited *and* new + // states). + StateId num_new_states_; + RefCounter ref_count_; +}; + +// EditFstData method implementations: just the Read method. +template <typename A, typename WrappedFstT, typename MutableFstT> +EditFstData<A, WrappedFstT, MutableFstT> * +EditFstData<A, WrappedFstT, MutableFstT>::Read(istream &strm, + const FstReadOptions &opts) { + EditFstData<A, WrappedFstT, MutableFstT> *data = + new EditFstData<A, WrappedFstT, MutableFstT>(); + // next read in MutabelFstT machine that stores edits + FstReadOptions edits_opts(opts); + edits_opts.header = 0; // Contained header was written out, so read it in. + + // Because our internal representation of edited states is a solid object + // of type MutableFstT (defaults to VectorFst<A>) and not a pointer, + // and because the static Read method allocates a new object on the heap, + // we need to call Read, check if there was a failure, use + // MutableFstT::operator= to assign the object (not the pointer) to the + // edits_ data member (which will increase the ref count by 1 on the impl) + // and, finally, delete the heap-allocated object. + MutableFstT *edits = MutableFstT::Read(strm, edits_opts); + if (!edits) { + return 0; + } + data->edits_ = *edits; + delete edits; + // finally, read in rest of private data members + ReadType(strm, &data->external_to_internal_ids_); + ReadType(strm, &data->edited_final_weights_); + ReadType(strm, &data->num_new_states_); + if (!strm) { + LOG(ERROR) << "EditFst::Read: read failed: " << opts.source; + return 0; + } + return data; +} + +// This class enables non-destructive edit operations on a wrapped ExpandedFst. +// The implementation uses copy-on-write semantics at the node level: if a user +// has an underlying fst on which he or she wants to perform a relatively small +// number of edits (read: mutations), then this implementation will copy the +// edited node to an internal MutableFst and perform any edits in situ on that +// copied node. This class supports all the methods of MutableFst except for +// DeleteStates(const vector<StateId> &); thus, new nodes may also be added, and +// one may add transitions from existing nodes of the wrapped fst to new nodes. +// +// template parameters: +// A the type of arc to use +// WrappedFstT the type of fst wrapped by the EditFst instance that +// this EditFstImpl instance is backing +// MutableFstT the type of mutable fst to use internally for edited states; +// crucially, MutableFstT::Copy(false) *must* yield an fst that is +// thread-safe for reading (VectorFst, for example, has this property) +template <typename A, + typename WrappedFstT = ExpandedFst<A>, + typename MutableFstT = VectorFst<A> > +class EditFstImpl : public FstImpl<A> { + public: + using FstImpl<A>::SetProperties; + using FstImpl<A>::SetInputSymbols; + using FstImpl<A>::SetOutputSymbols; + using FstImpl<A>::WriteHeader; + + typedef A Arc; + typedef typename Arc::Weight Weight; + typedef typename Arc::StateId StateId; + + // Constructs an editable fst implementation with no states. Effectively, + // this initially-empty fst will in every way mimic the behavior of + // a VectorFst--more precisely, a VectorFstImpl instance--but with slightly + // slower performance (by a constant factor), due to the fact that + // this class maintains a mapping between external state id's and + // their internal equivalents. + EditFstImpl() { + FstImpl<A>::SetType("edit"); + wrapped_ = new MutableFstT(); + InheritPropertiesFromWrapped(); + data_ = new EditFstData<A, WrappedFstT, MutableFstT>(); + } + + // Wraps the specified ExpandedFst. This constructor requires that the + // specified Fst is an ExpandedFst instance. This requirement is only enforced + // at runtime. (See below for the reason.) + // + // This library uses the pointer-to-implementation or "PIMPL" design pattern. + // In particular, to make it convenient to bind an implementation class to its + // interface, there are a pair of template "binder" classes, one for immutable + // and one for mutable fst's (ImplToFst and ImplToMutableFst, respectively). + // As it happens, the API for the ImplToMutableFst<I,F> class requires that + // the implementation class--the template parameter "I"--have a constructor + // taking a const Fst<A> reference. Accordingly, the constructor here must + // perform a static_cast to the WrappedFstT type required by EditFst and + // therefore EditFstImpl. + explicit EditFstImpl(const Fst<A> &wrapped) + : wrapped_(static_cast<WrappedFstT *>(wrapped.Copy())) { + FstImpl<A>::SetType("edit"); + + data_ = new EditFstData<A, WrappedFstT, MutableFstT>(); + // have edits_ inherit all properties from wrapped_ + data_->SetEditedProperties(wrapped_->Properties(kFstProperties, false), + kFstProperties); + InheritPropertiesFromWrapped(); + } + + // A copy constructor for this implementation class, used to implement + // the Copy() method of the Fst interface. + EditFstImpl(const EditFstImpl &impl) + : FstImpl<A>(), + wrapped_(static_cast<WrappedFstT *>(impl.wrapped_->Copy(true))), + data_(impl.data_) { + data_->IncrRefCount(); + SetProperties(impl.Properties()); + } + + ~EditFstImpl() { + delete wrapped_; + if (!data_->DecrRefCount()) { + delete data_; + } + } + + // const Fst/ExpandedFst operations, declared in the Fst and ExpandedFst + // interfaces + StateId Start() const { + StateId edited_start = data_->EditedStart(); + return edited_start == kNoStateId ? wrapped_->Start() : edited_start; + } + + Weight Final(StateId s) const { + return data_->Final(s, wrapped_); + } + + size_t NumArcs(StateId s) const { + return data_->NumArcs(s, wrapped_); + } + + size_t NumInputEpsilons(StateId s) const { + return data_->NumInputEpsilons(s, wrapped_); + } + + size_t NumOutputEpsilons(StateId s) const { + return data_->NumOutputEpsilons(s, wrapped_); + } + + StateId NumStates() const { + return wrapped_->NumStates() + data_->NumNewStates(); + } + + static EditFstImpl<A, WrappedFstT, MutableFstT> * + Read(istream &strm, + const FstReadOptions &opts); + + bool Write(ostream &strm, const FstWriteOptions &opts) const { + FstHeader hdr; + hdr.SetStart(Start()); + hdr.SetNumStates(NumStates()); + FstWriteOptions header_opts(opts); + header_opts.write_isymbols = false; // Let contained FST hold any symbols. + header_opts.write_osymbols = false; + WriteHeader(strm, header_opts, kFileVersion, &hdr); + + // First, serialize wrapped fst to stream. + FstWriteOptions wrapped_opts(opts); + wrapped_opts.write_header = true; // Force writing contained header. + wrapped_->Write(strm, wrapped_opts); + + data_->Write(strm, opts); + + strm.flush(); + if (!strm) { + LOG(ERROR) << "EditFst::Write: write failed: " << opts.source; + return false; + } + return true; + } + // end const Fst operations + + // non-const MutableFst operations + + // Sets the start state for this fst. + void SetStart(StateId s) { + MutateCheck(); + data_->SetStart(s); + SetProperties(SetStartProperties(FstImpl<A>::Properties())); + } + + // Sets the final state for this fst. + void SetFinal(StateId s, Weight w) { + MutateCheck(); + Weight old_weight = data_->SetFinal(s, w, wrapped_); + SetProperties(SetFinalProperties(FstImpl<A>::Properties(), old_weight, w)); + } + + // Adds a new state to this fst, initially with no arcs. + StateId AddState() { + MutateCheck(); + SetProperties(AddStateProperties(FstImpl<A>::Properties())); + return data_->AddState(NumStates()); + } + + // Adds the specified arc to the specified state of this fst. + void AddArc(StateId s, const Arc &arc) { + MutateCheck(); + const A *prev_arc = data_->AddArc(s, arc, wrapped_); + SetProperties(AddArcProperties(FstImpl<A>::Properties(), s, arc, prev_arc)); + } + + void DeleteStates(const vector<StateId>& dstates) { + FSTERROR() << ": EditFstImpl::DeleteStates(const std::vector<StateId>&): " + << " not implemented"; + SetProperties(kError, kError); + } + + // Deletes all states in this fst. + void DeleteStates(); + + // Removes all but the first n outgoing arcs of the specified state. + void DeleteArcs(StateId s, size_t n) { + MutateCheck(); + data_->DeleteArcs(s, n, wrapped_); + SetProperties(DeleteArcsProperties(FstImpl<A>::Properties())); + } + + // Removes all outgoing arcs from the specified state. + void DeleteArcs(StateId s) { + MutateCheck(); + data_->DeleteArcs(s, wrapped_); + SetProperties(DeleteArcsProperties(FstImpl<A>::Properties())); + } + + void ReserveStates(StateId s) { + } + + void ReserveArcs(StateId s, size_t n) { + } + + // end non-const MutableFst operations + + // Provides information for the generic state iterator. + void InitStateIterator(StateIteratorData<Arc> *data) const { + data->base = 0; + data->nstates = NumStates(); + } + + // Provides information for the generic arc iterator. + void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { + data_->InitArcIterator(s, data, wrapped_); + } + + // Provides information for the generic mutable arc iterator. + void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data) { + MutateCheck(); + data_->InitMutableArcIterator(s, data, wrapped_); + } + + private: + typedef typename unordered_map<StateId, StateId>::const_iterator + IdMapIterator; + typedef typename unordered_map<StateId, Weight>::const_iterator + FinalWeightIterator; + // Properties always true of this Fst class + static const uint64 kStaticProperties = kExpanded | kMutable; + // Current file format version + static const int kFileVersion = 2; + // Minimum file format version supported + static const int kMinFileVersion = 2; + + // Causes this fst to inherit all the properties from its wrapped fst, except + // for the two properties that always apply to EditFst instances: kExpanded + // and kMutable. + void InheritPropertiesFromWrapped() { + SetProperties(wrapped_->Properties(kCopyProperties, false) | + kStaticProperties); + SetInputSymbols(wrapped_->InputSymbols()); + SetOutputSymbols(wrapped_->OutputSymbols()); + } + + // This method ensures that any operations that alter the mutable data + // portion of this EditFstImpl cause the data_ member to be copied when its + // reference count is greater than 1. Note that this method is distinct from + // MutableFst::Mutate, which gets invoked whenever one of the basic mutation + // methods defined in MutableFst is invoked, such as SetInputSymbols. + // The MutateCheck here in EditFstImpl is invoked whenever one of the + // mutating methods specifically related to the types of edits provided + // by EditFst is performed, such as changing an arc of an existing state + // of the wrapped fst via a MutableArcIterator, or adding a new state via + // AddState(). + void MutateCheck() { + if (data_->RefCount() > 1) { + EditFstData<A, WrappedFstT, MutableFstT> *data_copy = + new EditFstData<A, WrappedFstT, MutableFstT>(*data_); + if (data_ && !data_->DecrRefCount()) { + delete data_; + } + data_ = data_copy; + } + } + + // The fst that this fst wraps. The purpose of this class is to enable + // non-destructive edits on this wrapped fst. + const WrappedFstT *wrapped_; + // The mutable data for this EditFst instance, with delegates for all the + // methods that can mutate data. + EditFstData<A, WrappedFstT, MutableFstT> *data_; +}; + +template <typename A, typename WrappedFstT, typename MutableFstT> +const uint64 EditFstImpl<A, WrappedFstT, MutableFstT>::kStaticProperties; + +// EditFstImpl IMPLEMENTATION STARTS HERE + +template<typename A, typename WrappedFstT, typename MutableFstT> +inline void EditFstImpl<A, WrappedFstT, MutableFstT>::DeleteStates() { + data_->DeleteStates(); + delete wrapped_; + // we are deleting all states, so just forget about pointer to wrapped_ + // and do what default constructor does: set wrapped_ to a new VectorFst + wrapped_ = new MutableFstT(); + uint64 newProps = DeleteAllStatesProperties(FstImpl<A>::Properties(), + kStaticProperties); + FstImpl<A>::SetProperties(newProps); +} + +template <typename A, typename WrappedFstT, typename MutableFstT> +EditFstImpl<A, WrappedFstT, MutableFstT> * +EditFstImpl<A, WrappedFstT, MutableFstT>::Read(istream &strm, + const FstReadOptions &opts) { + EditFstImpl<A, WrappedFstT, MutableFstT> *impl = new EditFstImpl(); + FstHeader hdr; + if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) { + return 0; + } + impl->SetStart(hdr.Start()); + + // first, read in wrapped fst + FstReadOptions wrapped_opts(opts); + wrapped_opts.header = 0; // Contained header was written out, so read it in. + Fst<A> *wrapped_fst = Fst<A>::Read(strm, wrapped_opts); + if (!wrapped_fst) { + return 0; + } + impl->wrapped_ = static_cast<WrappedFstT *>(wrapped_fst); + + impl->data_ = EditFstData<A, WrappedFstT, MutableFstT>::Read(strm, opts); + + if (!impl->data_) { + delete wrapped_fst; + return 0; + } + + return impl; +} + +// END EditFstImpl IMPLEMENTATION + +// Concrete, editable FST. This class attaches interface to implementation. +template <typename A, + typename WrappedFstT = ExpandedFst<A>, + typename MutableFstT = VectorFst<A> > +class EditFst : + public ImplToMutableFst< EditFstImpl<A, WrappedFstT, MutableFstT> > { + public: + friend class MutableArcIterator< EditFst<A, WrappedFstT, MutableFstT> >; + + typedef A Arc; + typedef typename A::StateId StateId; + typedef EditFstImpl<A, WrappedFstT, MutableFstT> Impl; + + EditFst() : ImplToMutableFst<Impl>(new Impl()) {} + + explicit EditFst(const Fst<A> &fst) : + ImplToMutableFst<Impl>(new Impl(fst)) {} + + explicit EditFst(const WrappedFstT &fst) : + ImplToMutableFst<Impl>(new Impl(fst)) {} + + // See Fst<>::Copy() for doc. + EditFst(const EditFst<A, WrappedFstT, MutableFstT> &fst, bool safe = false) : + ImplToMutableFst<Impl>(fst, safe) {} + + virtual ~EditFst() {} + + // Get a copy of this EditFst. See Fst<>::Copy() for further doc. + virtual EditFst<A, WrappedFstT, MutableFstT> *Copy(bool safe = false) const { + return new EditFst<A, WrappedFstT, MutableFstT>(*this, safe); + } + + EditFst<A, WrappedFstT, MutableFstT> & + operator=(const EditFst<A, WrappedFstT, MutableFstT> &fst) { + SetImpl(fst.GetImpl(), false); + return *this; + } + + virtual EditFst<A, WrappedFstT, MutableFstT> &operator=(const Fst<A> &fst) { + if (this != &fst) { + SetImpl(new Impl(fst)); + } + return *this; + } + + // Read an EditFst from an input stream; return NULL on error. + static EditFst<A, WrappedFstT, MutableFstT> * + Read(istream &strm, + const FstReadOptions &opts) { + Impl* impl = Impl::Read(strm, opts); + return impl ? new EditFst<A>(impl) : 0; + } + + // Read an EditFst from a file; return NULL on error. + // Empty filename reads from standard input. + static EditFst<A, WrappedFstT, MutableFstT> *Read(const string &filename) { + Impl* impl = ImplToExpandedFst<Impl, MutableFst<A> >::Read(filename); + return impl ? new EditFst<A, WrappedFstT, MutableFstT>(impl) : 0; + } + + virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { + return GetImpl()->Write(strm, opts); + } + + virtual bool Write(const string &filename) const { + return Fst<A>::WriteFile(filename); + } + + virtual void InitStateIterator(StateIteratorData<Arc> *data) const { + GetImpl()->InitStateIterator(data); + } + + virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { + GetImpl()->InitArcIterator(s, data); + } + + virtual + void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *data) { + GetImpl()->InitMutableArcIterator(s, data); + } + private: + explicit EditFst(Impl *impl) : ImplToMutableFst<Impl>(impl) {} + + // Makes visible to friends. + Impl *GetImpl() const { return ImplToFst< Impl, MutableFst<A> >::GetImpl(); } + + void SetImpl(Impl *impl, bool own_impl = true) { + ImplToFst< Impl, MutableFst<A> >::SetImpl(impl, own_impl); + } +}; + +} // namespace fst + +#endif // FST_LIB_EDIT_FST_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/encode.h b/kaldi_io/src/tools/openfst/include/fst/encode.h new file mode 100644 index 0000000..08b84cb --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/encode.h @@ -0,0 +1,599 @@ +// encode.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Johan Schalkwyk) +// +// \file +// Class to encode and decoder an fst. + +#ifndef FST_LIB_ENCODE_H__ +#define FST_LIB_ENCODE_H__ + +#include <climits> +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include <string> +#include <vector> +using std::vector; + +#include <fst/arc-map.h> +#include <fst/rmfinalepsilon.h> + + +namespace fst { + +static const uint32 kEncodeLabels = 0x0001; +static const uint32 kEncodeWeights = 0x0002; +static const uint32 kEncodeFlags = 0x0003; // All non-internal flags + +static const uint32 kEncodeHasISymbols = 0x0004; // For internal use +static const uint32 kEncodeHasOSymbols = 0x0008; // For internal use + +enum EncodeType { ENCODE = 1, DECODE = 2 }; + +// Identifies stream data as an encode table (and its endianity) +static const int32 kEncodeMagicNumber = 2129983209; + + +// The following class encapsulates implementation details for the +// encoding and decoding of label/weight tuples used for encoding +// and decoding of Fsts. The EncodeTable is bidirectional. I.E it +// stores both the Tuple of encode labels and weights to a unique +// label, and the reverse. +template <class A> class EncodeTable { + public: + typedef typename A::Label Label; + typedef typename A::Weight Weight; + + // Encoded data consists of arc input/output labels and arc weight + struct Tuple { + Tuple() {} + Tuple(Label ilabel_, Label olabel_, Weight weight_) + : ilabel(ilabel_), olabel(olabel_), weight(weight_) {} + Tuple(const Tuple& tuple) + : ilabel(tuple.ilabel), olabel(tuple.olabel), weight(tuple.weight) {} + + Label ilabel; + Label olabel; + Weight weight; + }; + + // Comparison object for hashing EncodeTable Tuple(s). + class TupleEqual { + public: + bool operator()(const Tuple* x, const Tuple* y) const { + return (x->ilabel == y->ilabel && + x->olabel == y->olabel && + x->weight == y->weight); + } + }; + + // Hash function for EncodeTabe Tuples. Based on the encode flags + // we either hash the labels, weights or combination of them. + class TupleKey { + public: + TupleKey() + : encode_flags_(kEncodeLabels | kEncodeWeights) {} + + TupleKey(const TupleKey& key) + : encode_flags_(key.encode_flags_) {} + + explicit TupleKey(uint32 encode_flags) + : encode_flags_(encode_flags) {} + + size_t operator()(const Tuple* x) const { + size_t hash = x->ilabel; + const int lshift = 5; + const int rshift = CHAR_BIT * sizeof(size_t) - 5; + if (encode_flags_ & kEncodeLabels) + hash = hash << lshift ^ hash >> rshift ^ x->olabel; + if (encode_flags_ & kEncodeWeights) + hash = hash << lshift ^ hash >> rshift ^ x->weight.Hash(); + return hash; + } + + private: + int32 encode_flags_; + }; + + typedef unordered_map<const Tuple*, + Label, + TupleKey, + TupleEqual> EncodeHash; + + explicit EncodeTable(uint32 encode_flags) + : flags_(encode_flags), + encode_hash_(1024, TupleKey(encode_flags)), + isymbols_(0), osymbols_(0) {} + + ~EncodeTable() { + for (size_t i = 0; i < encode_tuples_.size(); ++i) { + delete encode_tuples_[i]; + } + delete isymbols_; + delete osymbols_; + } + + // Given an arc encode either input/ouptut labels or input/costs or both + Label Encode(const A &arc) { + const Tuple tuple(arc.ilabel, + flags_ & kEncodeLabels ? arc.olabel : 0, + flags_ & kEncodeWeights ? arc.weight : Weight::One()); + typename EncodeHash::const_iterator it = encode_hash_.find(&tuple); + if (it == encode_hash_.end()) { + encode_tuples_.push_back(new Tuple(tuple)); + encode_hash_[encode_tuples_.back()] = encode_tuples_.size(); + return encode_tuples_.size(); + } else { + return it->second; + } + } + + // Given an arc, look up its encoded label. Returns kNoLabel if not found. + Label GetLabel(const A &arc) const { + const Tuple tuple(arc.ilabel, + flags_ & kEncodeLabels ? arc.olabel : 0, + flags_ & kEncodeWeights ? arc.weight : Weight::One()); + typename EncodeHash::const_iterator it = encode_hash_.find(&tuple); + if (it == encode_hash_.end()) { + return kNoLabel; + } else { + return it->second; + } + } + + // Given an encode arc Label decode back to input/output labels and costs + const Tuple* Decode(Label key) const { + if (key < 1 || key > encode_tuples_.size()) { + LOG(ERROR) << "EncodeTable::Decode: unknown decode key: " << key; + return 0; + } + return encode_tuples_[key - 1]; + } + + size_t Size() const { return encode_tuples_.size(); } + + bool Write(ostream &strm, const string &source) const; + + static EncodeTable<A> *Read(istream &strm, const string &source); + + const uint32 flags() const { return flags_ & kEncodeFlags; } + + int RefCount() const { return ref_count_.count(); } + int IncrRefCount() { return ref_count_.Incr(); } + int DecrRefCount() { return ref_count_.Decr(); } + + + SymbolTable *InputSymbols() const { return isymbols_; } + + SymbolTable *OutputSymbols() const { return osymbols_; } + + void SetInputSymbols(const SymbolTable* syms) { + if (isymbols_) delete isymbols_; + if (syms) { + isymbols_ = syms->Copy(); + flags_ |= kEncodeHasISymbols; + } else { + isymbols_ = 0; + flags_ &= ~kEncodeHasISymbols; + } + } + + void SetOutputSymbols(const SymbolTable* syms) { + if (osymbols_) delete osymbols_; + if (syms) { + osymbols_ = syms->Copy(); + flags_ |= kEncodeHasOSymbols; + } else { + osymbols_ = 0; + flags_ &= ~kEncodeHasOSymbols; + } + } + + private: + uint32 flags_; + vector<Tuple*> encode_tuples_; + EncodeHash encode_hash_; + RefCounter ref_count_; + SymbolTable *isymbols_; // Pre-encoded ilabel symbol table + SymbolTable *osymbols_; // Pre-encoded olabel symbol table + + DISALLOW_COPY_AND_ASSIGN(EncodeTable); +}; + +template <class A> inline +bool EncodeTable<A>::Write(ostream &strm, const string &source) const { + WriteType(strm, kEncodeMagicNumber); + WriteType(strm, flags_); + int64 size = encode_tuples_.size(); + WriteType(strm, size); + for (size_t i = 0; i < size; ++i) { + const Tuple* tuple = encode_tuples_[i]; + WriteType(strm, tuple->ilabel); + WriteType(strm, tuple->olabel); + tuple->weight.Write(strm); + } + + if (flags_ & kEncodeHasISymbols) + isymbols_->Write(strm); + + if (flags_ & kEncodeHasOSymbols) + osymbols_->Write(strm); + + strm.flush(); + if (!strm) { + LOG(ERROR) << "EncodeTable::Write: write failed: " << source; + return false; + } + return true; +} + +template <class A> inline +EncodeTable<A> *EncodeTable<A>::Read(istream &strm, const string &source) { + int32 magic_number = 0; + ReadType(strm, &magic_number); + if (magic_number != kEncodeMagicNumber) { + LOG(ERROR) << "EncodeTable::Read: Bad encode table header: " << source; + return 0; + } + uint32 flags; + ReadType(strm, &flags); + EncodeTable<A> *table = new EncodeTable<A>(flags); + + int64 size; + ReadType(strm, &size); + if (!strm) { + LOG(ERROR) << "EncodeTable::Read: read failed: " << source; + return 0; + } + + for (size_t i = 0; i < size; ++i) { + Tuple* tuple = new Tuple(); + ReadType(strm, &tuple->ilabel); + ReadType(strm, &tuple->olabel); + tuple->weight.Read(strm); + if (!strm) { + LOG(ERROR) << "EncodeTable::Read: read failed: " << source; + return 0; + } + table->encode_tuples_.push_back(tuple); + table->encode_hash_[table->encode_tuples_.back()] = + table->encode_tuples_.size(); + } + + if (flags & kEncodeHasISymbols) + table->isymbols_ = SymbolTable::Read(strm, source); + + if (flags & kEncodeHasOSymbols) + table->osymbols_ = SymbolTable::Read(strm, source); + + return table; +} + + +// A mapper to encode/decode weighted transducers. Encoding of an +// Fst is useful for performing classical determinization or minimization +// on a weighted transducer by treating it as an unweighted acceptor over +// encoded labels. +// +// The Encode mapper stores the encoding in a local hash table (EncodeTable) +// This table is shared (and reference counted) between the encoder and +// decoder. A decoder has read only access to the EncodeTable. +// +// The EncodeMapper allows on the fly encoding of the machine. As the +// EncodeTable is generated the same table may by used to decode the machine +// on the fly. For example in the following sequence of operations +// +// Encode -> Determinize -> Decode +// +// we will use the encoding table generated during the encode step in the +// decode, even though the encoding is not complete. +// +template <class A> class EncodeMapper { + typedef typename A::Weight Weight; + typedef typename A::Label Label; + public: + EncodeMapper(uint32 flags, EncodeType type) + : flags_(flags), + type_(type), + table_(new EncodeTable<A>(flags)), + error_(false) {} + + EncodeMapper(const EncodeMapper& mapper) + : flags_(mapper.flags_), + type_(mapper.type_), + table_(mapper.table_), + error_(false) { + table_->IncrRefCount(); + } + + // Copy constructor but setting the type, typically to DECODE + EncodeMapper(const EncodeMapper& mapper, EncodeType type) + : flags_(mapper.flags_), + type_(type), + table_(mapper.table_), + error_(mapper.error_) { + table_->IncrRefCount(); + } + + ~EncodeMapper() { + if (!table_->DecrRefCount()) delete table_; + } + + A operator()(const A &arc); + + MapFinalAction FinalAction() const { + return (type_ == ENCODE && (flags_ & kEncodeWeights)) ? + MAP_REQUIRE_SUPERFINAL : MAP_NO_SUPERFINAL; + } + + MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; } + + MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;} + + uint64 Properties(uint64 inprops) { + uint64 outprops = inprops; + if (error_) outprops |= kError; + + uint64 mask = kFstProperties; + if (flags_ & kEncodeLabels) + mask &= kILabelInvariantProperties & kOLabelInvariantProperties; + if (flags_ & kEncodeWeights) + mask &= kILabelInvariantProperties & kWeightInvariantProperties & + (type_ == ENCODE ? kAddSuperFinalProperties : + kRmSuperFinalProperties); + + return outprops & mask; + } + + const uint32 flags() const { return flags_; } + const EncodeType type() const { return type_; } + const EncodeTable<A> &table() const { return *table_; } + + bool Write(ostream &strm, const string& source) { + return table_->Write(strm, source); + } + + bool Write(const string& filename) { + ofstream strm(filename.c_str(), ofstream::out | ofstream::binary); + if (!strm) { + LOG(ERROR) << "EncodeMap: Can't open file: " << filename; + return false; + } + return Write(strm, filename); + } + + static EncodeMapper<A> *Read(istream &strm, + const string& source, + EncodeType type = ENCODE) { + EncodeTable<A> *table = EncodeTable<A>::Read(strm, source); + return table ? new EncodeMapper(table->flags(), type, table) : 0; + } + + static EncodeMapper<A> *Read(const string& filename, + EncodeType type = ENCODE) { + ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); + if (!strm) { + LOG(ERROR) << "EncodeMap: Can't open file: " << filename; + return NULL; + } + return Read(strm, filename, type); + } + + SymbolTable *InputSymbols() const { return table_->InputSymbols(); } + + SymbolTable *OutputSymbols() const { return table_->OutputSymbols(); } + + void SetInputSymbols(const SymbolTable* syms) { + table_->SetInputSymbols(syms); + } + + void SetOutputSymbols(const SymbolTable* syms) { + table_->SetOutputSymbols(syms); + } + + private: + uint32 flags_; + EncodeType type_; + EncodeTable<A>* table_; + bool error_; + + explicit EncodeMapper(uint32 flags, EncodeType type, EncodeTable<A> *table) + : flags_(flags), type_(type), table_(table) {} + void operator=(const EncodeMapper &); // Disallow. +}; + +template <class A> inline +A EncodeMapper<A>::operator()(const A &arc) { + if (type_ == ENCODE) { // labels and/or weights to single label + if ((arc.nextstate == kNoStateId && !(flags_ & kEncodeWeights)) || + (arc.nextstate == kNoStateId && (flags_ & kEncodeWeights) && + arc.weight == Weight::Zero())) { + return arc; + } else { + Label label = table_->Encode(arc); + return A(label, + flags_ & kEncodeLabels ? label : arc.olabel, + flags_ & kEncodeWeights ? Weight::One() : arc.weight, + arc.nextstate); + } + } else { // type_ == DECODE + if (arc.nextstate == kNoStateId) { + return arc; + } else { + if (arc.ilabel == 0) return arc; + if (flags_ & kEncodeLabels && arc.ilabel != arc.olabel) { + FSTERROR() << "EncodeMapper: Label-encoded arc has different " + "input and output labels"; + error_ = true; + } + if (flags_ & kEncodeWeights && arc.weight != Weight::One()) { + FSTERROR() << + "EncodeMapper: Weight-encoded arc has non-trivial weight"; + error_ = true; + } + const typename EncodeTable<A>::Tuple* tuple = table_->Decode(arc.ilabel); + if (!tuple) { + FSTERROR() << "EncodeMapper: decode failed"; + error_ = true; + return A(kNoLabel, kNoLabel, Weight::NoWeight(), arc.nextstate); + } else { + return A(tuple->ilabel, + flags_ & kEncodeLabels ? tuple->olabel : arc.olabel, + flags_ & kEncodeWeights ? tuple->weight : arc.weight, + arc.nextstate); + } + } + } +} + + +// Complexity: O(nstates + narcs) +template<class A> inline +void Encode(MutableFst<A> *fst, EncodeMapper<A>* mapper) { + mapper->SetInputSymbols(fst->InputSymbols()); + mapper->SetOutputSymbols(fst->OutputSymbols()); + ArcMap(fst, mapper); +} + +template<class A> inline +void Decode(MutableFst<A>* fst, const EncodeMapper<A>& mapper) { + ArcMap(fst, EncodeMapper<A>(mapper, DECODE)); + RmFinalEpsilon(fst); + fst->SetInputSymbols(mapper.InputSymbols()); + fst->SetOutputSymbols(mapper.OutputSymbols()); +} + + +// On the fly label and/or weight encoding of input Fst +// +// Complexity: +// - Constructor: O(1) +// - Traversal: O(nstates_visited + narcs_visited), assuming constant +// time to visit an input state or arc. +template <class A> +class EncodeFst : public ArcMapFst<A, A, EncodeMapper<A> > { + public: + typedef A Arc; + typedef EncodeMapper<A> C; + typedef ArcMapFstImpl< A, A, EncodeMapper<A> > Impl; + using ImplToFst<Impl>::GetImpl; + + EncodeFst(const Fst<A> &fst, EncodeMapper<A>* encoder) + : ArcMapFst<A, A, C>(fst, encoder, ArcMapFstOptions()) { + encoder->SetInputSymbols(fst.InputSymbols()); + encoder->SetOutputSymbols(fst.OutputSymbols()); + } + + EncodeFst(const Fst<A> &fst, const EncodeMapper<A>& encoder) + : ArcMapFst<A, A, C>(fst, encoder, ArcMapFstOptions()) {} + + // See Fst<>::Copy() for doc. + EncodeFst(const EncodeFst<A> &fst, bool copy = false) + : ArcMapFst<A, A, C>(fst, copy) {} + + // Get a copy of this EncodeFst. See Fst<>::Copy() for further doc. + virtual EncodeFst<A> *Copy(bool safe = false) const { + if (safe) { + FSTERROR() << "EncodeFst::Copy(true): not allowed."; + GetImpl()->SetProperties(kError, kError); + } + return new EncodeFst(*this); + } +}; + + +// On the fly label and/or weight encoding of input Fst +// +// Complexity: +// - Constructor: O(1) +// - Traversal: O(nstates_visited + narcs_visited), assuming constant +// time to visit an input state or arc. +template <class A> +class DecodeFst : public ArcMapFst<A, A, EncodeMapper<A> > { + public: + typedef A Arc; + typedef EncodeMapper<A> C; + typedef ArcMapFstImpl< A, A, EncodeMapper<A> > Impl; + using ImplToFst<Impl>::GetImpl; + + DecodeFst(const Fst<A> &fst, const EncodeMapper<A>& encoder) + : ArcMapFst<A, A, C>(fst, + EncodeMapper<A>(encoder, DECODE), + ArcMapFstOptions()) { + GetImpl()->SetInputSymbols(encoder.InputSymbols()); + GetImpl()->SetOutputSymbols(encoder.OutputSymbols()); + } + + // See Fst<>::Copy() for doc. + DecodeFst(const DecodeFst<A> &fst, bool safe = false) + : ArcMapFst<A, A, C>(fst, safe) {} + + // Get a copy of this DecodeFst. See Fst<>::Copy() for further doc. + virtual DecodeFst<A> *Copy(bool safe = false) const { + return new DecodeFst(*this, safe); + } +}; + + +// Specialization for EncodeFst. +template <class A> +class StateIterator< EncodeFst<A> > + : public StateIterator< ArcMapFst<A, A, EncodeMapper<A> > > { + public: + explicit StateIterator(const EncodeFst<A> &fst) + : StateIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst) {} +}; + + +// Specialization for EncodeFst. +template <class A> +class ArcIterator< EncodeFst<A> > + : public ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > > { + public: + ArcIterator(const EncodeFst<A> &fst, typename A::StateId s) + : ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst, s) {} +}; + + +// Specialization for DecodeFst. +template <class A> +class StateIterator< DecodeFst<A> > + : public StateIterator< ArcMapFst<A, A, EncodeMapper<A> > > { + public: + explicit StateIterator(const DecodeFst<A> &fst) + : StateIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst) {} +}; + + +// Specialization for DecodeFst. +template <class A> +class ArcIterator< DecodeFst<A> > + : public ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > > { + public: + ArcIterator(const DecodeFst<A> &fst, typename A::StateId s) + : ArcIterator< ArcMapFst<A, A, EncodeMapper<A> > >(fst, s) {} +}; + + +// Useful aliases when using StdArc. +typedef EncodeFst<StdArc> StdEncodeFst; + +typedef DecodeFst<StdArc> StdDecodeFst; + +} // namespace fst + +#endif // FST_LIB_ENCODE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/epsnormalize.h b/kaldi_io/src/tools/openfst/include/fst/epsnormalize.h new file mode 100644 index 0000000..9d178b1 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/epsnormalize.h @@ -0,0 +1,73 @@ +// epsnormalize.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// +// \file +// Function that implements epsilon normalization. + +#ifndef FST_LIB_EPSNORMALIZE_H__ +#define FST_LIB_EPSNORMALIZE_H__ + +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; + + +#include <fst/factor-weight.h> +#include <fst/invert.h> +#include <fst/arc-map.h> +#include <fst/rmepsilon.h> + + +namespace fst { + +enum EpsNormalizeType {EPS_NORM_INPUT, EPS_NORM_OUTPUT}; + +// Returns an equivalent FST that is epsilon-normalized. An acceptor is +// epsilon-normalized if it is epsilon-removed. A transducer is input +// epsilon-normalized if additionally if on each path any epsilon input +// label follows all non-epsilon input labels. Output epsilon-normalized +// is defined similarly. +// +// The input FST needs to be functional. +// +// References: +// - Mehryar Mohri. "Generic epsilon-removal and input epsilon-normalization +// algorithms for weighted transducers", International Journal of Computer +// Science, 13(1): 129-143, 2002. +template <class Arc> +void EpsNormalize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst, + EpsNormalizeType type = EPS_NORM_INPUT) { + VectorFst< GallicArc<Arc, STRING_RIGHT_RESTRICT> > gfst; + if (type == EPS_NORM_INPUT) + ArcMap(ifst, &gfst, ToGallicMapper<Arc, STRING_RIGHT_RESTRICT>()); + else // type == EPS_NORM_OUTPUT + ArcMap(InvertFst<Arc>(ifst), &gfst, + ToGallicMapper<Arc, STRING_RIGHT_RESTRICT>()); + RmEpsilon(&gfst); + FactorWeightFst< GallicArc<Arc, STRING_RIGHT_RESTRICT>, + GallicFactor<typename Arc::Label, + typename Arc::Weight, STRING_RIGHT_RESTRICT> > + fwfst(gfst); + ArcMap(fwfst, ofst, FromGallicMapper<Arc, STRING_RIGHT_RESTRICT>()); + ofst->SetOutputSymbols(ifst.OutputSymbols()); + if(type == EPS_NORM_OUTPUT) + Invert(ofst); +} + +} // namespace fst + +#endif // FST_LIB_EPSNORMALIZE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/equal.h b/kaldi_io/src/tools/openfst/include/fst/equal.h new file mode 100644 index 0000000..33be198 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/equal.h @@ -0,0 +1,124 @@ +// test.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Function to test equality of two Fsts. + +#ifndef FST_LIB_EQUAL_H__ +#define FST_LIB_EQUAL_H__ + +#include <fst/fst.h> + + +namespace fst { + +// Tests if two Fsts have the same states and arcs in the same order. +template<class Arc> +bool Equal(const Fst<Arc> &fst1, const Fst<Arc> &fst2, float delta = kDelta) { + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + + if (fst1.Start() != fst2.Start()) { + VLOG(1) << "Equal: mismatched start states"; + return false; + } + + StateIterator< Fst<Arc> > siter1(fst1); + StateIterator< Fst<Arc> > siter2(fst2); + + while (!siter1.Done() || !siter2.Done()) { + if (siter1.Done() || siter2.Done()) { + VLOG(1) << "Equal: mismatched # of states"; + return false; + } + StateId s1 = siter1.Value(); + StateId s2 = siter2.Value(); + if (s1 != s2) { + VLOG(1) << "Equal: mismatched states:" + << ", state1 = " << s1 + << ", state2 = " << s2; + return false; + } + Weight final1 = fst1.Final(s1); + Weight final2 = fst2.Final(s2); + if (!ApproxEqual(final1, final2, delta)) { + VLOG(1) << "Equal: mismatched final weights:" + << " state = " << s1 + << ", final1 = " << final1 + << ", final2 = " << final2; + return false; + } + ArcIterator< Fst<Arc> > aiter1(fst1, s1); + ArcIterator< Fst<Arc> > aiter2(fst2, s2); + for (size_t a = 0; !aiter1.Done() || !aiter2.Done(); ++a) { + if (aiter1.Done() || aiter2.Done()) { + VLOG(1) << "Equal: mismatched # of arcs" + << " state = " << s1; + return false; + } + Arc arc1 = aiter1.Value(); + Arc arc2 = aiter2.Value(); + if (arc1.ilabel != arc2.ilabel) { + VLOG(1) << "Equal: mismatched arc input labels:" + << " state = " << s1 + << ", arc = " << a + << ", ilabel1 = " << arc1.ilabel + << ", ilabel2 = " << arc2.ilabel; + return false; + } else if (arc1.olabel != arc2.olabel) { + VLOG(1) << "Equal: mismatched arc output labels:" + << " state = " << s1 + << ", arc = " << a + << ", olabel1 = " << arc1.olabel + << ", olabel2 = " << arc2.olabel; + return false; + } else if (!ApproxEqual(arc1.weight, arc2.weight, delta)) { + VLOG(1) << "Equal: mismatched arc weights:" + << " state = " << s1 + << ", arc = " << a + << ", weight1 = " << arc1.weight + << ", weight2 = " << arc2.weight; + return false; + } else if (arc1.nextstate != arc2.nextstate) { + VLOG(1) << "Equal: mismatched input label:" + << " state = " << s1 + << ", arc = " << a + << ", nextstate1 = " << arc1.nextstate + << ", nextstate2 = " << arc2.nextstate; + return false; + } + aiter1.Next(); + aiter2.Next(); + + } + // Sanity checks: should never fail + if (fst1.NumArcs(s1) != fst2.NumArcs(s2) || + fst1.NumInputEpsilons(s1) != fst2.NumInputEpsilons(s2) || + fst1.NumOutputEpsilons(s1) != fst2.NumOutputEpsilons(s2)) { + FSTERROR() << "Equal: inconsistent arc/epsilon counts"; + } + + siter1.Next(); + siter2.Next(); + } + return true; +} + +} // namespace fst + + +#endif // FST_LIB_EQUAL_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/equivalent.h b/kaldi_io/src/tools/openfst/include/fst/equivalent.h new file mode 100644 index 0000000..e28fea1 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/equivalent.h @@ -0,0 +1,275 @@ +// equivalent.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Wojciech Skut) +// +// \file Functions and classes to determine the equivalence of two +// FSTs. + +#ifndef FST_LIB_EQUIVALENT_H__ +#define FST_LIB_EQUIVALENT_H__ + +#include <algorithm> +#include <deque> +using std::deque; +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include <utility> +using std::pair; using std::make_pair; +#include <vector> +using std::vector; + +#include <fst/encode.h> +#include <fst/push.h> +#include <fst/union-find.h> +#include <fst/vector-fst.h> + + +namespace fst { + +// Traits-like struct holding utility functions/typedefs/constants for +// the equivalence algorithm. +// +// Encoding device: in order to make the statesets of the two acceptors +// disjoint, we map Arc::StateId on the type MappedId. The states of +// the first acceptor are mapped on odd numbers (s -> 2s + 1), and +// those of the second one on even numbers (s -> 2s + 2). The number 0 +// is reserved for an implicit (non-final) 'dead state' (required for +// the correct treatment of non-coaccessible states; kNoStateId is +// mapped to kDeadState for both acceptors). The union-find algorithm +// operates on the mapped IDs. +template <class Arc> +struct EquivalenceUtil { + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + typedef StateId MappedId; // ID for an equivalence class. + + // MappedId for an implicit dead state. + static const MappedId kDeadState = 0; + + // MappedId for lookup failure. + static const MappedId kInvalidId = -1; + + // Maps state ID to the representative of the corresponding + // equivalence class. The parameter 'which_fst' takes the values 1 + // and 2, identifying the input FST. + static MappedId MapState(StateId s, int32 which_fst) { + return + (kNoStateId == s) + ? + kDeadState + : + (static_cast<MappedId>(s) << 1) + which_fst; + } + // Maps set ID to State ID. + static StateId UnMapState(MappedId id) { + return static_cast<StateId>((--id) >> 1); + } + // Convenience function: checks if state with MappedId 's' is final + // in acceptor 'fa'. + static bool IsFinal(const Fst<Arc> &fa, MappedId s) { + return + (kDeadState == s) ? + false : (fa.Final(UnMapState(s)) != Weight::Zero()); + } + // Convenience function: returns the representative of 'id' in 'sets', + // creating a new set if needed. + static MappedId FindSet(UnionFind<MappedId> *sets, MappedId id) { + MappedId repr = sets->FindSet(id); + if (repr != kInvalidId) { + return repr; + } else { + sets->MakeSet(id); + return id; + } + } +}; + +template <class Arc> const +typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kDeadState; + +template <class Arc> const +typename EquivalenceUtil<Arc>::MappedId EquivalenceUtil<Arc>::kInvalidId; + + +// Equivalence checking algorithm: determines if the two FSTs +// <code>fst1</code> and <code>fst2</code> are equivalent. The input +// FSTs must be deterministic input-side epsilon-free acceptors, +// unweighted or with weights over a left semiring. Two acceptors are +// considered equivalent if they accept exactly the same set of +// strings (with the same weights). +// +// The algorithm (cf. Aho, Hopcroft and Ullman, "The Design and +// Analysis of Computer Programs") successively constructs sets of +// states that can be reached by the same prefixes, starting with a +// set containing the start states of both acceptors. A disjoint tree +// forest (the union-find algorithm) is used to represent the sets of +// states. The algorithm returns 'false' if one of the constructed +// sets contains both final and non-final states. Returns optional error +// value (when FLAGS_error_fatal = false). +// +// Complexity: quasi-linear, i.e. O(n G(n)), where +// n = |S1| + |S2| is the number of states in both acceptors +// G(n) is a very slowly growing function that can be approximated +// by 4 by all practical purposes. +// +template <class Arc> +bool Equivalent(const Fst<Arc> &fst1, + const Fst<Arc> &fst2, + double delta = kDelta, bool *error = 0) { + typedef typename Arc::Weight Weight; + if (error) *error = false; + + // Check that the symbol table are compatible + if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) || + !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) { + FSTERROR() << "Equivalent: input/output symbol tables of 1st argument " + << "do not match input/output symbol tables of 2nd argument"; + if (error) *error = true; + return false; + } + // Check properties first: + uint64 props = kNoEpsilons | kIDeterministic | kAcceptor; + if (fst1.Properties(props, true) != props) { + FSTERROR() << "Equivalent: first argument not an" + << " epsilon-free deterministic acceptor"; + if (error) *error = true; + return false; + } + if (fst2.Properties(props, true) != props) { + FSTERROR() << "Equivalent: second argument not an" + << " epsilon-free deterministic acceptor"; + if (error) *error = true; + return false; + } + + if ((fst1.Properties(kUnweighted , true) != kUnweighted) + || (fst2.Properties(kUnweighted , true) != kUnweighted)) { + VectorFst<Arc> efst1(fst1); + VectorFst<Arc> efst2(fst2); + Push(&efst1, REWEIGHT_TO_INITIAL, delta); + Push(&efst2, REWEIGHT_TO_INITIAL, delta); + ArcMap(&efst1, QuantizeMapper<Arc>(delta)); + ArcMap(&efst2, QuantizeMapper<Arc>(delta)); + EncodeMapper<Arc> mapper(kEncodeWeights|kEncodeLabels, ENCODE); + ArcMap(&efst1, &mapper); + ArcMap(&efst2, &mapper); + return Equivalent(efst1, efst2); + } + + // Convenience typedefs: + typedef typename Arc::StateId StateId; + typedef EquivalenceUtil<Arc> Util; + typedef typename Util::MappedId MappedId; + enum { FST1 = 1, FST2 = 2 }; // Required by Util::MapState(...) + + MappedId s1 = Util::MapState(fst1.Start(), FST1); + MappedId s2 = Util::MapState(fst2.Start(), FST2); + + // The union-find structure. + UnionFind<MappedId> eq_classes(1000, Util::kInvalidId); + + // Initialize the union-find structure. + eq_classes.MakeSet(s1); + eq_classes.MakeSet(s2); + + // Data structure for the (partial) acceptor transition function of + // fst1 and fst2: input labels mapped to pairs of MappedId's + // representing destination states of the corresponding arcs in fst1 + // and fst2, respectively. + typedef + unordered_map<typename Arc::Label, pair<MappedId, MappedId> > + Label2StatePairMap; + + Label2StatePairMap arc_pairs; + + // Pairs of MappedId's to be processed, organized in a queue. + deque<pair<MappedId, MappedId> > q; + + bool ret = true; + // Early return if the start states differ w.r.t. being final. + if (Util::IsFinal(fst1, s1) != Util::IsFinal(fst2, s2)) { + ret = false; + } + + // Main loop: explores the two acceptors in a breadth-first manner, + // updating the equivalence relation on the statesets. Loop + // invariant: each block of states contains either final states only + // or non-final states only. + for (q.push_back(make_pair(s1, s2)); ret && !q.empty(); q.pop_front()) { + s1 = q.front().first; + s2 = q.front().second; + + // Representatives of the equivalence classes of s1/s2. + MappedId rep1 = Util::FindSet(&eq_classes, s1); + MappedId rep2 = Util::FindSet(&eq_classes, s2); + + if (rep1 != rep2) { + eq_classes.Union(rep1, rep2); + arc_pairs.clear(); + + // Copy outgoing arcs starting at s1 into the hashtable. + if (Util::kDeadState != s1) { + ArcIterator<Fst<Arc> > arc_iter(fst1, Util::UnMapState(s1)); + for (; !arc_iter.Done(); arc_iter.Next()) { + const Arc &arc = arc_iter.Value(); + if (arc.weight != Weight::Zero()) { // Zero-weight arcs + // are treated as + // non-exisitent. + arc_pairs[arc.ilabel].first = Util::MapState(arc.nextstate, FST1); + } + } + } + // Copy outgoing arcs starting at s2 into the hashtable. + if (Util::kDeadState != s2) { + ArcIterator<Fst<Arc> > arc_iter(fst2, Util::UnMapState(s2)); + for (; !arc_iter.Done(); arc_iter.Next()) { + const Arc &arc = arc_iter.Value(); + if (arc.weight != Weight::Zero()) { // Zero-weight arcs + // are treated as + // non-existent. + arc_pairs[arc.ilabel].second = Util::MapState(arc.nextstate, FST2); + } + } + } + // Iterate through the hashtable and process pairs of target + // states. + for (typename Label2StatePairMap::const_iterator + arc_iter = arc_pairs.begin(); + arc_iter != arc_pairs.end(); + ++arc_iter) { + const pair<MappedId, MappedId> &p = arc_iter->second; + if (Util::IsFinal(fst1, p.first) != Util::IsFinal(fst2, p.second)) { + // Detected inconsistency: return false. + ret = false; + break; + } + q.push_back(p); + } + } + } + + if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) { + if (error) *error = true; + return false; + } + + return ret; +} + +} // namespace fst + +#endif // FST_LIB_EQUIVALENT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/expanded-fst.h b/kaldi_io/src/tools/openfst/include/fst/expanded-fst.h new file mode 100644 index 0000000..676ceb3 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/expanded-fst.h @@ -0,0 +1,189 @@ +// expanded-fst.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Generic FST augmented with state count - interface class definition. +// + +#ifndef FST_LIB_EXPANDED_FST_H__ +#define FST_LIB_EXPANDED_FST_H__ + +#include <sys/types.h> +#include <string> + +#include <fst/fst.h> + + +namespace fst { + +// A generic FST plus state count. +template <class A> +class ExpandedFst : public Fst<A> { + public: + typedef A Arc; + typedef typename A::StateId StateId; + + virtual StateId NumStates() const = 0; // State count + + // Get a copy of this ExpandedFst. See Fst<>::Copy() for further doc. + virtual ExpandedFst<A> *Copy(bool safe = false) const = 0; + + // Read an ExpandedFst from an input stream; return NULL on error. + static ExpandedFst<A> *Read(istream &strm, const FstReadOptions &opts) { + FstReadOptions ropts(opts); + FstHeader hdr; + if (ropts.header) + hdr = *opts.header; + else { + if (!hdr.Read(strm, opts.source)) + return 0; + ropts.header = &hdr; + } + if (!(hdr.Properties() & kExpanded)) { + LOG(ERROR) << "ExpandedFst::Read: Not an ExpandedFst: " << ropts.source; + return 0; + } + FstRegister<A> *registr = FstRegister<A>::GetRegister(); + const typename FstRegister<A>::Reader reader = + registr->GetReader(hdr.FstType()); + if (!reader) { + LOG(ERROR) << "ExpandedFst::Read: Unknown FST type \"" << hdr.FstType() + << "\" (arc type = \"" << A::Type() + << "\"): " << ropts.source; + return 0; + } + Fst<A> *fst = reader(strm, ropts); + if (!fst) return 0; + return static_cast<ExpandedFst<A> *>(fst); + } + + // Read an ExpandedFst from a file; return NULL on error. + // Empty filename reads from standard input. + static ExpandedFst<A> *Read(const string &filename) { + if (!filename.empty()) { + ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); + if (!strm) { + LOG(ERROR) << "ExpandedFst::Read: Can't open file: " << filename; + return 0; + } + return Read(strm, FstReadOptions(filename)); + } else { + return Read(cin, FstReadOptions("standard input")); + } + } +}; + + +namespace internal { + +// ExpandedFst<A> case - abstract methods. +template <class A> inline +typename A::Weight Final(const ExpandedFst<A> &fst, typename A::StateId s) { + return fst.Final(s); +} + +template <class A> inline +ssize_t NumArcs(const ExpandedFst<A> &fst, typename A::StateId s) { + return fst.NumArcs(s); +} + +template <class A> inline +ssize_t NumInputEpsilons(const ExpandedFst<A> &fst, typename A::StateId s) { + return fst.NumInputEpsilons(s); +} + +template <class A> inline +ssize_t NumOutputEpsilons(const ExpandedFst<A> &fst, typename A::StateId s) { + return fst.NumOutputEpsilons(s); +} + +} // namespace internal + + +// A useful alias when using StdArc. +typedef ExpandedFst<StdArc> StdExpandedFst; + + +// This is a helper class template useful for attaching an ExpandedFst +// interface to its implementation, handling reference counting. It +// delegates to ImplToFst the handling of the Fst interface methods. +template < class I, class F = ExpandedFst<typename I::Arc> > +class ImplToExpandedFst : public ImplToFst<I, F> { + public: + typedef typename I::Arc Arc; + typedef typename Arc::Weight Weight; + typedef typename Arc::StateId StateId; + + using ImplToFst<I, F>::GetImpl; + + virtual StateId NumStates() const { return GetImpl()->NumStates(); } + + protected: + ImplToExpandedFst() : ImplToFst<I, F>() {} + + ImplToExpandedFst(I *impl) : ImplToFst<I, F>(impl) {} + + ImplToExpandedFst(const ImplToExpandedFst<I, F> &fst) + : ImplToFst<I, F>(fst) {} + + ImplToExpandedFst(const ImplToExpandedFst<I, F> &fst, bool safe) + : ImplToFst<I, F>(fst, safe) {} + + // Read FST implementation from a file; return NULL on error. + // Empty filename reads from standard input. + static I *Read(const string &filename) { + if (!filename.empty()) { + ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); + if (!strm) { + LOG(ERROR) << "ExpandedFst::Read: Can't open file: " << filename; + return 0; + } + return I::Read(strm, FstReadOptions(filename)); + } else { + return I::Read(cin, FstReadOptions("standard input")); + } + } + + private: + // Disallow + ImplToExpandedFst<I, F> &operator=(const ImplToExpandedFst<I, F> &fst); + + ImplToExpandedFst<I, F> &operator=(const Fst<Arc> &fst) { + FSTERROR() << "ImplToExpandedFst: Assignment operator disallowed"; + GetImpl()->SetProperties(kError, kError); + return *this; + } +}; + +// Function to return the number of states in an FST, counting them +// if necessary. +template <class Arc> +typename Arc::StateId CountStates(const Fst<Arc> &fst) { + if (fst.Properties(kExpanded, false)) { + const ExpandedFst<Arc> *efst = static_cast<const ExpandedFst<Arc> *>(&fst); + return efst->NumStates(); + } else { + typename Arc::StateId nstates = 0; + for (StateIterator< Fst<Arc> > siter(fst); !siter.Done(); siter.Next()) + ++nstates; + return nstates; + } +} + +} // namespace fst + +#endif // FST_LIB_EXPANDED_FST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/expectation-weight.h b/kaldi_io/src/tools/openfst/include/fst/expectation-weight.h new file mode 100644 index 0000000..5226cad --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/expectation-weight.h @@ -0,0 +1,142 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Kasturi Rangan Raghavan) +// Inspiration: [email protected] (Masha Maria Shugrina) +// \file +// Expectation semiring as described by Jason Eisner: +// See: doi=10.1.1.22.9398 +// Multiplex semiring operations and identities: +// One: <One, Zero> +// Zero: <Zero, Zero> +// Plus: <a1, b1> + <a2, b2> = < (a1 + a2) , (b1 + b2) > +// Times: <a1, b1> * <a2, b2> = < (a1 * a2) , [(a1 * b2) + (a2 * b1)] > +// Division: Undefined (currently) +// +// Usually used to store the pair <probability, random_variable> so that +// ShortestDistance[Fst<ArcTpl<ExpectationWeight<P, V> > >] +// == < PosteriorProbability, Expected_Value[V] > + +#ifndef FST_LIB_EXPECTATION_WEIGHT_H_ +#define FST_LIB_EXPECTATION_WEIGHT_H_ + +#include<string> + +#include <fst/pair-weight.h> + + +namespace fst { + +// X1 is usually a probability weight like LogWeight +// X2 is usually a random variable or vector +// see SignedLogWeight or SparsePowerWeight +// +// If X1 is distinct from X2, it is required that there is an external +// product between X1 and X2 and if both semriring are commutative, or +// left or right semirings, then result must have those properties. +template <class X1, class X2> +class ExpectationWeight : public PairWeight<X1, X2> { + public: + using PairWeight<X1, X2>::Value1; + using PairWeight<X1, X2>::Value2; + + using PairWeight<X1, X2>::Reverse; + using PairWeight<X1, X2>::Quantize; + using PairWeight<X1, X2>::Member; + + typedef X1 W1; + typedef X2 W2; + + typedef ExpectationWeight<typename X1::ReverseWeight, + typename X2::ReverseWeight> ReverseWeight; + + ExpectationWeight() : PairWeight<X1, X2>(Zero()) { } + + ExpectationWeight(const ExpectationWeight<X1, X2>& w) + : PairWeight<X1, X2> (w) { } + + ExpectationWeight(const PairWeight<X1, X2>& w) + : PairWeight<X1, X2> (w) { } + + ExpectationWeight(const X1& x1, const X2& x2) + : PairWeight<X1, X2>(x1, x2) { } + + static const ExpectationWeight<X1, X2> &Zero() { + static const ExpectationWeight<X1, X2> zero(X1::Zero(), X2::Zero()); + return zero; + } + + static const ExpectationWeight<X1, X2> &One() { + static const ExpectationWeight<X1, X2> one(X1::One(), X2::Zero()); + return one; + } + + static const ExpectationWeight<X1, X2> &NoWeight() { + static const ExpectationWeight<X1, X2> no_weight(X1::NoWeight(), + X2::NoWeight()); + return no_weight; + } + + static const string &Type() { + static const string type = "expectation_" + X1::Type() + "_" + X2::Type(); + return type; + } + + PairWeight<X1, X2> Quantize(float delta = kDelta) const { + return PairWeight<X1, X2>::Quantize(); + } + + ReverseWeight Reverse() const { + return PairWeight<X1, X2>::Reverse(); + } + + bool Member() const { + return PairWeight<X1, X2>::Member(); + } + + static uint64 Properties() { + uint64 props1 = W1::Properties(); + uint64 props2 = W2::Properties(); + return props1 & props2 & (kLeftSemiring | kRightSemiring | + kCommutative | kIdempotent); + } +}; + +template <class X1, class X2> +inline ExpectationWeight<X1, X2> Plus(const ExpectationWeight<X1, X2> &w, + const ExpectationWeight<X1, X2> &v) { + return ExpectationWeight<X1, X2>(Plus(w.Value1(), v.Value1()), + Plus(w.Value2(), v.Value2())); +} + + +template <class X1, class X2> +inline ExpectationWeight<X1, X2> Times(const ExpectationWeight<X1, X2> &w, + const ExpectationWeight<X1, X2> &v) { + return ExpectationWeight<X1, X2>(Times(w.Value1(), v.Value1()), + Plus(Times(w.Value1(), v.Value2()), + Times(w.Value2(), v.Value1()))); +} + +template <class X1, class X2> +inline ExpectationWeight<X1, X2> Divide(const ExpectationWeight<X1, X2> &w, + const ExpectationWeight<X1, X2> &v, + DivideType typ = DIVIDE_ANY) { + FSTERROR() << "ExpectationWeight::Divide: not implemented"; + return ExpectationWeight<X1, X2>::NoWeight(); +} + +} // namespace fst + +#endif // FST_LIB_EXPECTATION_WEIGHT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/compile-strings.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/compile-strings.h new file mode 100644 index 0000000..ca247db --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/extensions/far/compile-strings.h @@ -0,0 +1,304 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Authors: [email protected] (Cyril Allauzen) +// [email protected] (Terry Tai) +// [email protected] (Jake Ratkiewicz) + + +#ifndef FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_ +#define FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_ + +#include <libgen.h> +#include <string> +#include <vector> +using std::vector; + +#include <fst/extensions/far/far.h> +#include <fst/string.h> + +namespace fst { + +// Construct a reader that provides FSTs from a file (stream) either on a +// line-by-line basis or on a per-stream basis. Note that the freshly +// constructed reader is already set to the first input. +// +// Sample Usage: +// for (StringReader<Arc> reader(...); !reader.Done(); reader.Next()) { +// Fst *fst = reader.GetVectorFst(); +// } +template <class A> +class StringReader { + public: + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename StringCompiler<A>::TokenType TokenType; + + enum EntryType { LINE = 1, FILE = 2 }; + + StringReader(istream &istrm, + const string &source, + EntryType entry_type, + TokenType token_type, + bool allow_negative_labels, + const SymbolTable *syms = 0, + Label unknown_label = kNoStateId) + : nline_(0), strm_(istrm), source_(source), entry_type_(entry_type), + token_type_(token_type), symbols_(syms), done_(false), + compiler_(token_type, syms, unknown_label, allow_negative_labels) { + Next(); // Initialize the reader to the first input. + } + + bool Done() { + return done_; + } + + void Next() { + VLOG(1) << "Processing source " << source_ << " at line " << nline_; + if (!strm_) { // We're done if we have no more input. + done_ = true; + return; + } + if (entry_type_ == LINE) { + getline(strm_, content_); + ++nline_; + } else { + content_.clear(); + string line; + while (getline(strm_, line)) { + ++nline_; + content_.append(line); + content_.append("\n"); + } + } + if (!strm_ && content_.empty()) // We're also done if we read off all the + done_ = true; // whitespace at the end of a file. + } + + VectorFst<A> *GetVectorFst(bool keep_symbols = false) { + VectorFst<A> *fst = new VectorFst<A>; + if (keep_symbols) { + fst->SetInputSymbols(symbols_); + fst->SetOutputSymbols(symbols_); + } + if (compiler_(content_, fst)) { + return fst; + } else { + delete fst; + return NULL; + } + } + + CompactFst<A, StringCompactor<A> > *GetCompactFst(bool keep_symbols = false) { + CompactFst<A, StringCompactor<A> > *fst; + if (keep_symbols) { + VectorFst<A> tmp; + tmp.SetInputSymbols(symbols_); + tmp.SetOutputSymbols(symbols_); + fst = new CompactFst<A, StringCompactor<A> >(tmp); + } else { + fst = new CompactFst<A, StringCompactor<A> >; + } + if (compiler_(content_, fst)) { + return fst; + } else { + delete fst; + return NULL; + } + } + + private: + size_t nline_; + istream &strm_; + string source_; + EntryType entry_type_; + TokenType token_type_; + const SymbolTable *symbols_; + bool done_; + StringCompiler<A> compiler_; + string content_; // The actual content of the input stream's next FST. + + DISALLOW_COPY_AND_ASSIGN(StringReader); +}; + +// Compute the minimal length required to encode each line number as a decimal +// number. +int KeySize(const char *filename); + +template <class Arc> +void FarCompileStrings(const vector<string> &in_fnames, + const string &out_fname, + const string &fst_type, + const FarType &far_type, + int32 generate_keys, + FarEntryType fet, + FarTokenType tt, + const string &symbols_fname, + const string &unknown_symbol, + bool keep_symbols, + bool initial_symbols, + bool allow_negative_labels, + bool file_list_input, + const string &key_prefix, + const string &key_suffix) { + typename StringReader<Arc>::EntryType entry_type; + if (fet == FET_LINE) { + entry_type = StringReader<Arc>::LINE; + } else if (fet == FET_FILE) { + entry_type = StringReader<Arc>::FILE; + } else { + FSTERROR() << "FarCompileStrings: unknown entry type"; + return; + } + + typename StringCompiler<Arc>::TokenType token_type; + if (tt == FTT_SYMBOL) { + token_type = StringCompiler<Arc>::SYMBOL; + } else if (tt == FTT_BYTE) { + token_type = StringCompiler<Arc>::BYTE; + } else if (tt == FTT_UTF8) { + token_type = StringCompiler<Arc>::UTF8; + } else { + FSTERROR() << "FarCompileStrings: unknown token type"; + return; + } + + bool compact; + if (fst_type.empty() || (fst_type == "vector")) { + compact = false; + } else if (fst_type == "compact") { + compact = true; + } else { + FSTERROR() << "FarCompileStrings: unknown fst type: " + << fst_type; + return; + } + + const SymbolTable *syms = 0; + typename Arc::Label unknown_label = kNoLabel; + if (!symbols_fname.empty()) { + SymbolTableTextOptions opts; + opts.allow_negative = allow_negative_labels; + syms = SymbolTable::ReadText(symbols_fname, opts); + if (!syms) { + FSTERROR() << "FarCompileStrings: error reading symbol table: " + << symbols_fname; + return; + } + if (!unknown_symbol.empty()) { + unknown_label = syms->Find(unknown_symbol); + if (unknown_label == kNoLabel) { + FSTERROR() << "FarCompileStrings: unknown label \"" << unknown_label + << "\" missing from symbol table: " << symbols_fname; + return; + } + } + } + + FarWriter<Arc> *far_writer = + FarWriter<Arc>::Create(out_fname, far_type); + if (!far_writer) return; + + vector<string> inputs; + if (file_list_input) { + for (int i = 1; i < in_fnames.size(); ++i) { + istream *istrm = in_fnames.empty() ? &cin : + new ifstream(in_fnames[i].c_str()); + string str; + while (getline(*istrm, str)) + inputs.push_back(str); + if (!in_fnames.empty()) + delete istrm; + } + } else { + inputs = in_fnames; + } + + for (int i = 0, n = 0; i < inputs.size(); ++i) { + if (generate_keys == 0 && inputs[i].empty()) { + FSTERROR() << "FarCompileStrings: read from a file instead of stdin or" + << " set the --generate_keys flags."; + delete far_writer; + delete syms; + return; + } + int key_size = generate_keys ? generate_keys : + (entry_type == StringReader<Arc>::FILE ? 1 : + KeySize(inputs[i].c_str())); + istream *istrm = inputs[i].empty() ? &cin : + new ifstream(inputs[i].c_str()); + + bool keep_syms = keep_symbols; + for (StringReader<Arc> reader( + *istrm, inputs[i].empty() ? "stdin" : inputs[i], + entry_type, token_type, allow_negative_labels, + syms, unknown_label); + !reader.Done(); + reader.Next()) { + ++n; + const Fst<Arc> *fst; + if (compact) + fst = reader.GetCompactFst(keep_syms); + else + fst = reader.GetVectorFst(keep_syms); + if (initial_symbols) + keep_syms = false; + if (!fst) { + FSTERROR() << "FarCompileStrings: compiling string number " << n + << " in file " << inputs[i] << " failed with token_type = " + << (tt == FTT_BYTE ? "byte" : + (tt == FTT_UTF8 ? "utf8" : + (tt == FTT_SYMBOL ? "symbol" : "unknown"))) + << " and entry_type = " + << (fet == FET_LINE ? "line" : + (fet == FET_FILE ? "file" : "unknown")); + delete far_writer; + delete syms; + if (!inputs[i].empty()) delete istrm; + return; + } + ostringstream keybuf; + keybuf.width(key_size); + keybuf.fill('0'); + keybuf << n; + string key; + if (generate_keys > 0) { + key = keybuf.str(); + } else { + char* filename = new char[inputs[i].size() + 1]; + strcpy(filename, inputs[i].c_str()); + key = basename(filename); + if (entry_type != StringReader<Arc>::FILE) { + key += "-"; + key += keybuf.str(); + } + delete[] filename; + } + far_writer->Add(key_prefix + key + key_suffix, *fst); + delete fst; + } + if (generate_keys == 0) + n = 0; + if (!inputs[i].empty()) + delete istrm; + } + + delete far_writer; +} + +} // namespace fst + + +#endif // FST_EXTENSIONS_FAR_COMPILE_STRINGS_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/create.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/create.h new file mode 100644 index 0000000..edb31e7 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/extensions/far/create.h @@ -0,0 +1,87 @@ +// create-main.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// Modified: [email protected] (Jake Ratkiewicz) to use new dispatch +// +// \file +// Creates a finite-state archive from component FSTs. Includes +// helper function for farcreate.cc that templates the main on the arc +// type to support multiple and extensible arc types. +// + +#ifndef FST_EXTENSIONS_FAR_CREATE_H__ +#define FST_EXTENSIONS_FAR_CREATE_H__ + +#include <libgen.h> +#include <string> +#include <vector> +using std::vector; + +#include <fst/extensions/far/far.h> + +namespace fst { + +template <class Arc> +void FarCreate(const vector<string> &in_fnames, + const string &out_fname, + const int32 generate_keys, + const bool file_list_input, + const FarType &far_type, + const string &key_prefix, + const string &key_suffix) { + FarWriter<Arc> *far_writer = + FarWriter<Arc>::Create(out_fname, far_type); + if (!far_writer) return; + + vector<string> inputs; + if (file_list_input) { + for (int i = 1; i < in_fnames.size(); ++i) { + ifstream istrm(in_fnames[i].c_str()); + string str; + while (getline(istrm, str)) + inputs.push_back(str); + } + } else { + inputs = in_fnames; + } + + for (int i = 0; i < inputs.size(); ++i) { + Fst<Arc> *ifst = Fst<Arc>::Read(inputs[i]); + if (!ifst) return; + string key; + if (generate_keys > 0) { + ostringstream keybuf; + keybuf.width(generate_keys); + keybuf.fill('0'); + keybuf << i + 1; + key = keybuf.str(); + } else { + char* filename = new char[inputs[i].size() + 1]; + strcpy(filename, inputs[i].c_str()); + key = basename(filename); + delete[] filename; + } + + far_writer->Add(key_prefix + key + key_suffix, *ifst); + delete ifst; + } + + delete far_writer; +} + +} // namespace fst + +#endif // FST_EXTENSIONS_FAR_CREATE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/equal.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/equal.h new file mode 100644 index 0000000..be82e2d --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/extensions/far/equal.h @@ -0,0 +1,99 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) + +#ifndef FST_EXTENSIONS_FAR_EQUAL_H_ +#define FST_EXTENSIONS_FAR_EQUAL_H_ + +#include <string> + +#include <fst/extensions/far/far.h> +#include <fst/equal.h> + +namespace fst { + +template <class Arc> +bool FarEqual(const string &filename1, + const string &filename2, + float delta = kDelta, + const string &begin_key = string(), + const string &end_key = string()) { + + FarReader<Arc> *reader1 = FarReader<Arc>::Open(filename1); + FarReader<Arc> *reader2 = FarReader<Arc>::Open(filename2); + if (!reader1 || !reader2) { + delete reader1; + delete reader2; + VLOG(1) << "FarEqual: cannot open input Far file(s)"; + return false; + } + + if (!begin_key.empty()) { + bool find_begin1 = reader1->Find(begin_key); + bool find_begin2 = reader2->Find(begin_key); + if (!find_begin1 || !find_begin2) { + bool ret = !find_begin1 && !find_begin2; + if (!ret) { + VLOG(1) << "FarEqual: key \"" << begin_key << "\" missing from " + << (find_begin1 ? "second" : "first") << " archive."; + } + delete reader1; + delete reader2; + return ret; + } + } + + for(; !reader1->Done() && !reader2->Done(); + reader1->Next(), reader2->Next()) { + const string key1 = reader1->GetKey(); + const string key2 = reader2->GetKey(); + if (!end_key.empty() && end_key < key1 && end_key < key2) { + delete reader1; + delete reader2; + return true; + } + if (key1 != key2) { + VLOG(1) << "FarEqual: mismatched keys \"" + << key1 << "\" <> \"" << key2 << "\"."; + delete reader1; + delete reader2; + return false; + } + if (!Equal(reader1->GetFst(), reader2->GetFst(), delta)) { + VLOG(1) << "FarEqual: Fsts for key \"" << key1 << "\" are not equal."; + delete reader1; + delete reader2; + return false; + } + } + + if (!reader1->Done() || !reader2->Done()) { + VLOG(1) << "FarEqual: key \"" + << (reader1->Done() ? reader2->GetKey() : reader1->GetKey()) + << "\" missing form " << (reader2->Done() ? "first" : "second") + << " archive."; + delete reader1; + delete reader2; + return false; + } + + delete reader1; + delete reader2; + return true; +} + +} // namespace fst + +#endif // FST_EXTENSIONS_FAR_EQUAL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/extract.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/extract.h new file mode 100644 index 0000000..95866de --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/extensions/far/extract.h @@ -0,0 +1,140 @@ +// extract-main.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// Modified: [email protected] (Jake Ratkiewicz) to use the new arc-dispatch + +// \file +// Extracts component FSTs from an finite-state archive. +// + +#ifndef FST_EXTENSIONS_FAR_EXTRACT_H__ +#define FST_EXTENSIONS_FAR_EXTRACT_H__ + +#include <string> +#include <vector> +using std::vector; + +#include <fst/extensions/far/far.h> + +namespace fst { + +template<class Arc> +inline void FarWriteFst(const Fst<Arc>* fst, string key, + string* okey, int* nrep, + const int32 &generate_filenames, int i, + const string &filename_prefix, + const string &filename_suffix) { + if (key == *okey) + ++*nrep; + else + *nrep = 0; + + *okey = key; + + string ofilename; + if (generate_filenames) { + ostringstream tmp; + tmp.width(generate_filenames); + tmp.fill('0'); + tmp << i; + ofilename = tmp.str(); + } else { + if (*nrep > 0) { + ostringstream tmp; + tmp << '.' << nrep; + key.append(tmp.str().data(), tmp.str().size()); + } + ofilename = key; + } + fst->Write(filename_prefix + ofilename + filename_suffix); +} + +template<class Arc> +void FarExtract(const vector<string> &ifilenames, + const int32 &generate_filenames, + const string &keys, + const string &key_separator, + const string &range_delimiter, + const string &filename_prefix, + const string &filename_suffix) { + FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames); + if (!far_reader) return; + + string okey; + int nrep = 0; + + vector<char *> key_vector; + // User has specified a set of fsts to extract, where some of the "fsts" could + // be ranges. + if (!keys.empty()) { + char *keys_cstr = new char[keys.size()+1]; + strcpy(keys_cstr, keys.c_str()); + SplitToVector(keys_cstr, key_separator.c_str(), &key_vector, true); + int i = 0; + for (int k = 0; k < key_vector.size(); ++k, ++i) { + string key = string(key_vector[k]); + char *key_cstr = new char[key.size()+1]; + strcpy(key_cstr, key.c_str()); + vector<char *> range_vector; + SplitToVector(key_cstr, range_delimiter.c_str(), &range_vector, false); + if (range_vector.size() == 1) { // Not a range + if (!far_reader->Find(key)) { + LOG(ERROR) << "FarExtract: Cannot find key: " << key; + return; + } + const Fst<Arc> &fst = far_reader->GetFst(); + FarWriteFst(&fst, key, &okey, &nrep, generate_filenames, i, + filename_prefix, filename_suffix); + } else if (range_vector.size() == 2) { // A legal range + string begin_key = string(range_vector[0]); + string end_key = string(range_vector[1]); + if (begin_key.empty() || end_key.empty()) { + LOG(ERROR) << "FarExtract: Illegal range specification: " << key; + return; + } + if (!far_reader->Find(begin_key)) { + LOG(ERROR) << "FarExtract: Cannot find key: " << begin_key; + return; + } + for ( ; !far_reader->Done(); far_reader->Next(), ++i) { + string ikey = far_reader->GetKey(); + if (end_key < ikey) break; + const Fst<Arc> &fst = far_reader->GetFst(); + FarWriteFst(&fst, ikey, &okey, &nrep, generate_filenames, i, + filename_prefix, filename_suffix); + } + } else { + LOG(ERROR) << "FarExtract: Illegal range specification: " << key; + return; + } + delete key_cstr; + } + delete keys_cstr; + return; + } + // Nothing specified: extract everything. + for (int i = 1; !far_reader->Done(); far_reader->Next(), ++i) { + string key = far_reader->GetKey(); + const Fst<Arc> &fst = far_reader->GetFst(); + FarWriteFst(&fst, key, &okey, &nrep, generate_filenames, i, + filename_prefix, filename_suffix); + } + return; +} + +} // namespace fst + +#endif // FST_EXTENSIONS_FAR_EXTRACT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/far.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/far.h new file mode 100644 index 0000000..acce76e --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/extensions/far/far.h @@ -0,0 +1,532 @@ +// far.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Finite-State Transducer (FST) archive classes. +// + +#ifndef FST_EXTENSIONS_FAR_FAR_H__ +#define FST_EXTENSIONS_FAR_FAR_H__ + +#include <fst/extensions/far/stlist.h> +#include <fst/extensions/far/sttable.h> +#include <fst/fst.h> +#include <fst/vector-fst.h> + +namespace fst { + +enum FarEntryType { FET_LINE, FET_FILE }; +enum FarTokenType { FTT_SYMBOL, FTT_BYTE, FTT_UTF8 }; + +inline bool IsFst(const string &filename) { + ifstream strm(filename.c_str()); + if (!strm) + return false; + return IsFstHeader(strm, filename); +} + +// FST archive header class +class FarHeader { + public: + const string &FarType() const { return fartype_; } + const string &ArcType() const { return arctype_; } + + bool Read(const string &filename) { + FstHeader fsthdr; + if (filename.empty()) { + // Header reading unsupported on stdin. Assumes STList and StdArc. + fartype_ = "stlist"; + arctype_ = "standard"; + return true; + } else if (IsSTTable(filename)) { // Check if STTable + ReadSTTableHeader(filename, &fsthdr); + fartype_ = "sttable"; + arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); + return true; + } else if (IsSTList(filename)) { // Check if STList + ReadSTListHeader(filename, &fsthdr); + fartype_ = "sttable"; + arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); + return true; + } else if (IsFst(filename)) { // Check if Fst + ifstream istrm(filename.c_str()); + fsthdr.Read(istrm, filename); + fartype_ = "fst"; + arctype_ = fsthdr.ArcType().empty() ? "unknown" : fsthdr.ArcType(); + return true; + } + return false; + } + + private: + string fartype_; + string arctype_; +}; + +enum FarType { + FAR_DEFAULT = 0, + FAR_STTABLE = 1, + FAR_STLIST = 2, + FAR_FST = 3, +}; + +// This class creates an archive of FSTs. +template <class A> +class FarWriter { + public: + typedef A Arc; + + // Creates a new (empty) FST archive; returns NULL on error. + static FarWriter *Create(const string &filename, FarType type = FAR_DEFAULT); + + // Adds an FST to the end of an archive. Keys must be non-empty and + // in lexicographic order. FSTs must have a suitable write method. + virtual void Add(const string &key, const Fst<A> &fst) = 0; + + virtual FarType Type() const = 0; + + virtual bool Error() const = 0; + + virtual ~FarWriter() {} + + protected: + FarWriter() {} + + private: + DISALLOW_COPY_AND_ASSIGN(FarWriter); +}; + + +// This class iterates through an existing archive of FSTs. +template <class A> +class FarReader { + public: + typedef A Arc; + + // Opens an existing FST archive in a single file; returns NULL on error. + // Sets current position to the beginning of the achive. + static FarReader *Open(const string &filename); + + // Opens an existing FST archive in multiple files; returns NULL on error. + // Sets current position to the beginning of the achive. + static FarReader *Open(const vector<string> &filenames); + + // Resets current posision to beginning of archive. + virtual void Reset() = 0; + + // Sets current position to first entry >= key. Returns true if a match. + virtual bool Find(const string &key) = 0; + + // Current position at end of archive? + virtual bool Done() const = 0; + + // Move current position to next FST. + virtual void Next() = 0; + + // Returns key at the current position. This reference is invalidated if + // the current position in the archive is changed. + virtual const string &GetKey() const = 0; + + // Returns FST at the current position. This reference is invalidated if + // the current position in the archive is changed. + virtual const Fst<A> &GetFst() const = 0; + + virtual FarType Type() const = 0; + + virtual bool Error() const = 0; + + virtual ~FarReader() {} + + protected: + FarReader() {} + + private: + DISALLOW_COPY_AND_ASSIGN(FarReader); +}; + + +template <class A> +class FstWriter { + public: + void operator()(ostream &strm, const Fst<A> &fst) const { + fst.Write(strm, FstWriteOptions()); + } +}; + + +template <class A> +class STTableFarWriter : public FarWriter<A> { + public: + typedef A Arc; + + static STTableFarWriter *Create(const string &filename) { + STTableWriter<Fst<A>, FstWriter<A> > *writer = + STTableWriter<Fst<A>, FstWriter<A> >::Create(filename); + return new STTableFarWriter(writer); + } + + void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); } + + FarType Type() const { return FAR_STTABLE; } + + bool Error() const { return writer_->Error(); } + + ~STTableFarWriter() { delete writer_; } + + private: + explicit STTableFarWriter(STTableWriter<Fst<A>, FstWriter<A> > *writer) + : writer_(writer) {} + + private: + STTableWriter<Fst<A>, FstWriter<A> > *writer_; + + DISALLOW_COPY_AND_ASSIGN(STTableFarWriter); +}; + + +template <class A> +class STListFarWriter : public FarWriter<A> { + public: + typedef A Arc; + + static STListFarWriter *Create(const string &filename) { + STListWriter<Fst<A>, FstWriter<A> > *writer = + STListWriter<Fst<A>, FstWriter<A> >::Create(filename); + return new STListFarWriter(writer); + } + + void Add(const string &key, const Fst<A> &fst) { writer_->Add(key, fst); } + + FarType Type() const { return FAR_STLIST; } + + bool Error() const { return writer_->Error(); } + + ~STListFarWriter() { delete writer_; } + + private: + explicit STListFarWriter(STListWriter<Fst<A>, FstWriter<A> > *writer) + : writer_(writer) {} + + private: + STListWriter<Fst<A>, FstWriter<A> > *writer_; + + DISALLOW_COPY_AND_ASSIGN(STListFarWriter); +}; + + +template <class A> +class FstFarWriter : public FarWriter<A> { + public: + typedef A Arc; + + explicit FstFarWriter(const string &filename) + : filename_(filename), error_(false), written_(false) {} + + static FstFarWriter *Create(const string &filename) { + return new FstFarWriter(filename); + } + + void Add(const string &key, const Fst<A> &fst) { + if (written_) { + LOG(WARNING) << "FstFarWriter::Add: only one Fst supported," + << " subsequent entries discarded."; + } else { + error_ = !fst.Write(filename_); + written_ = true; + } + } + + FarType Type() const { return FAR_FST; } + + bool Error() const { return error_; } + + ~FstFarWriter() {} + + private: + string filename_; + bool error_; + bool written_; + + DISALLOW_COPY_AND_ASSIGN(FstFarWriter); +}; + + +template <class A> +FarWriter<A> *FarWriter<A>::Create(const string &filename, FarType type) { + switch(type) { + case FAR_DEFAULT: + if (filename.empty()) + return STListFarWriter<A>::Create(filename); + case FAR_STTABLE: + return STTableFarWriter<A>::Create(filename); + break; + case FAR_STLIST: + return STListFarWriter<A>::Create(filename); + break; + case FAR_FST: + return FstFarWriter<A>::Create(filename); + break; + default: + LOG(ERROR) << "FarWriter::Create: unknown far type"; + return 0; + } +} + + +template <class A> +class FstReader { + public: + Fst<A> *operator()(istream &strm) const { + return Fst<A>::Read(strm, FstReadOptions()); + } +}; + + +template <class A> +class STTableFarReader : public FarReader<A> { + public: + typedef A Arc; + + static STTableFarReader *Open(const string &filename) { + STTableReader<Fst<A>, FstReader<A> > *reader = + STTableReader<Fst<A>, FstReader<A> >::Open(filename); + // TODO: error check + return new STTableFarReader(reader); + } + + static STTableFarReader *Open(const vector<string> &filenames) { + STTableReader<Fst<A>, FstReader<A> > *reader = + STTableReader<Fst<A>, FstReader<A> >::Open(filenames); + // TODO: error check + return new STTableFarReader(reader); + } + + void Reset() { reader_->Reset(); } + + bool Find(const string &key) { return reader_->Find(key); } + + bool Done() const { return reader_->Done(); } + + void Next() { return reader_->Next(); } + + const string &GetKey() const { return reader_->GetKey(); } + + const Fst<A> &GetFst() const { return reader_->GetEntry(); } + + FarType Type() const { return FAR_STTABLE; } + + bool Error() const { return reader_->Error(); } + + ~STTableFarReader() { delete reader_; } + + private: + explicit STTableFarReader(STTableReader<Fst<A>, FstReader<A> > *reader) + : reader_(reader) {} + + private: + STTableReader<Fst<A>, FstReader<A> > *reader_; + + DISALLOW_COPY_AND_ASSIGN(STTableFarReader); +}; + + +template <class A> +class STListFarReader : public FarReader<A> { + public: + typedef A Arc; + + static STListFarReader *Open(const string &filename) { + STListReader<Fst<A>, FstReader<A> > *reader = + STListReader<Fst<A>, FstReader<A> >::Open(filename); + // TODO: error check + return new STListFarReader(reader); + } + + static STListFarReader *Open(const vector<string> &filenames) { + STListReader<Fst<A>, FstReader<A> > *reader = + STListReader<Fst<A>, FstReader<A> >::Open(filenames); + // TODO: error check + return new STListFarReader(reader); + } + + void Reset() { reader_->Reset(); } + + bool Find(const string &key) { return reader_->Find(key); } + + bool Done() const { return reader_->Done(); } + + void Next() { return reader_->Next(); } + + const string &GetKey() const { return reader_->GetKey(); } + + const Fst<A> &GetFst() const { return reader_->GetEntry(); } + + FarType Type() const { return FAR_STLIST; } + + bool Error() const { return reader_->Error(); } + + ~STListFarReader() { delete reader_; } + + private: + explicit STListFarReader(STListReader<Fst<A>, FstReader<A> > *reader) + : reader_(reader) {} + + private: + STListReader<Fst<A>, FstReader<A> > *reader_; + + DISALLOW_COPY_AND_ASSIGN(STListFarReader); +}; + +template <class A> +class FstFarReader : public FarReader<A> { + public: + typedef A Arc; + + static FstFarReader *Open(const string &filename) { + vector<string> filenames; + filenames.push_back(filename); + return new FstFarReader<A>(filenames); + } + + static FstFarReader *Open(const vector<string> &filenames) { + return new FstFarReader<A>(filenames); + } + + FstFarReader(const vector<string> &filenames) + : keys_(filenames), has_stdin_(false), pos_(0), fst_(0), error_(false) { + sort(keys_.begin(), keys_.end()); + streams_.resize(keys_.size(), 0); + for (size_t i = 0; i < keys_.size(); ++i) { + if (keys_[i].empty()) { + if (!has_stdin_) { + streams_[i] = &cin; + //sources_[i] = "stdin"; + has_stdin_ = true; + } else { + FSTERROR() << "FstFarReader::FstFarReader: stdin should only " + << "appear once in the input file list."; + error_ = true; + return; + } + } else { + streams_[i] = new ifstream( + keys_[i].c_str(), ifstream::in | ifstream::binary); + } + } + if (pos_ >= keys_.size()) return; + ReadFst(); + } + + void Reset() { + if (has_stdin_) { + FSTERROR() << "FstFarReader::Reset: operation not supported on stdin"; + error_ = true; + return; + } + pos_ = 0; + ReadFst(); + } + + bool Find(const string &key) { + if (has_stdin_) { + FSTERROR() << "FstFarReader::Find: operation not supported on stdin"; + error_ = true; + return false; + } + pos_ = 0;//TODO + ReadFst(); + return true; + } + + bool Done() const { return error_ || pos_ >= keys_.size(); } + + void Next() { + ++pos_; + ReadFst(); + } + + const string &GetKey() const { + return keys_[pos_]; + } + + const Fst<A> &GetFst() const { + return *fst_; + } + + FarType Type() const { return FAR_FST; } + + bool Error() const { return error_; } + + ~FstFarReader() { + if (fst_) delete fst_; + for (size_t i = 0; i < keys_.size(); ++i) + delete streams_[i]; + } + + private: + void ReadFst() { + if (fst_) delete fst_; + if (pos_ >= keys_.size()) return; + streams_[pos_]->seekg(0); + fst_ = Fst<A>::Read(*streams_[pos_], FstReadOptions()); + if (!fst_) { + FSTERROR() << "FstFarReader: error reading Fst from: " << keys_[pos_]; + error_ = true; + } + } + + private: + vector<string> keys_; + vector<istream*> streams_; + bool has_stdin_; + size_t pos_; + mutable Fst<A> *fst_; + mutable bool error_; + + DISALLOW_COPY_AND_ASSIGN(FstFarReader); +}; + +template <class A> +FarReader<A> *FarReader<A>::Open(const string &filename) { + if (filename.empty()) + return STListFarReader<A>::Open(filename); + else if (IsSTTable(filename)) + return STTableFarReader<A>::Open(filename); + else if (IsSTList(filename)) + return STListFarReader<A>::Open(filename); + else if (IsFst(filename)) + return FstFarReader<A>::Open(filename); + return 0; +} + + +template <class A> +FarReader<A> *FarReader<A>::Open(const vector<string> &filenames) { + if (!filenames.empty() && filenames[0].empty()) + return STListFarReader<A>::Open(filenames); + else if (!filenames.empty() && IsSTTable(filenames[0])) + return STTableFarReader<A>::Open(filenames); + else if (!filenames.empty() && IsSTList(filenames[0])) + return STListFarReader<A>::Open(filenames); + else if (!filenames.empty() && IsFst(filenames[0])) + return FstFarReader<A>::Open(filenames); + return 0; +} + +} // namespace fst + +#endif // FST_EXTENSIONS_FAR_FAR_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/farlib.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/farlib.h new file mode 100644 index 0000000..91ba224 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/extensions/far/farlib.h @@ -0,0 +1,31 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +// A finite-state archive (FAR) is used to store an indexable collection of +// FSTs in a single file. Utilities are provided to create FARs from FSTs, +// to iterate over FARs, and to extract specific FSTs from FARs. + +#ifndef FST_EXTENSIONS_FAR_FARLIB_H_ +#define FST_EXTENSIONS_FAR_FARLIB_H_ + +#include <fst/extensions/far/far.h> +#include <fst/extensions/far/compile-strings.h> +#include <fst/extensions/far/create.h> +#include <fst/extensions/far/extract.h> +#include <fst/extensions/far/info.h> +#include <fst/extensions/far/print-strings.h> + +#endif // FST_EXTENSIONS_FAR_FARLIB_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/farscript.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/farscript.h new file mode 100644 index 0000000..cfd9167 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/extensions/far/farscript.h @@ -0,0 +1,273 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +// Convenience file for including all of the FAR operations, +// or registering them for new arc types. + +#ifndef FST_EXTENSIONS_FAR_FARSCRIPT_H_ +#define FST_EXTENSIONS_FAR_FARSCRIPT_H_ + +#include <vector> +using std::vector; +#include <string> + +#include <fst/script/arg-packs.h> +#include <fst/extensions/far/compile-strings.h> +#include <fst/extensions/far/create.h> +#include <fst/extensions/far/equal.h> +#include <fst/extensions/far/extract.h> +#include <fst/extensions/far/info.h> +#include <fst/extensions/far/print-strings.h> +#include <fst/extensions/far/far.h> + +#include <fst/types.h> + +namespace fst { +namespace script { + +// Note: it is safe to pass these strings as references because +// this struct is only used to pass them deeper in the call graph. +// Be sure you understand why this is so before using this struct +// for anything else! +struct FarCompileStringsArgs { + const vector<string> &in_fnames; + const string &out_fname; + const string &fst_type; + const FarType &far_type; + const int32 generate_keys; + const FarEntryType fet; + const FarTokenType tt; + const string &symbols_fname; + const string &unknown_symbol; + const bool keep_symbols; + const bool initial_symbols; + const bool allow_negative_labels; + const bool file_list_input; + const string &key_prefix; + const string &key_suffix; + + FarCompileStringsArgs(const vector<string> &in_fnames, + const string &out_fname, + const string &fst_type, + const FarType &far_type, + int32 generate_keys, + FarEntryType fet, + FarTokenType tt, + const string &symbols_fname, + const string &unknown_symbol, + bool keep_symbols, + bool initial_symbols, + bool allow_negative_labels, + bool file_list_input, + const string &key_prefix, + const string &key_suffix) : + in_fnames(in_fnames), out_fname(out_fname), fst_type(fst_type), + far_type(far_type), generate_keys(generate_keys), fet(fet), + tt(tt), symbols_fname(symbols_fname), unknown_symbol(unknown_symbol), + keep_symbols(keep_symbols), initial_symbols(initial_symbols), + allow_negative_labels(allow_negative_labels), + file_list_input(file_list_input), key_prefix(key_prefix), + key_suffix(key_suffix) { } +}; + +template <class Arc> +void FarCompileStrings(FarCompileStringsArgs *args) { + fst::FarCompileStrings<Arc>( + args->in_fnames, args->out_fname, args->fst_type, args->far_type, + args->generate_keys, args->fet, args->tt, args->symbols_fname, + args->unknown_symbol, args->keep_symbols, args->initial_symbols, + args->allow_negative_labels, args->file_list_input, + args->key_prefix, args->key_suffix); +} + +void FarCompileStrings( + const vector<string> &in_fnames, + const string &out_fname, + const string &arc_type, + const string &fst_type, + const FarType &far_type, + int32 generate_keys, + FarEntryType fet, + FarTokenType tt, + const string &symbols_fname, + const string &unknown_symbol, + bool keep_symbols, + bool initial_symbols, + bool allow_negative_labels, + bool file_list_input, + const string &key_prefix, + const string &key_suffix); + + +// Note: it is safe to pass these strings as references because +// this struct is only used to pass them deeper in the call graph. +// Be sure you understand why this is so before using this struct +// for anything else! +struct FarCreateArgs { + const vector<string> &in_fnames; + const string &out_fname; + const int32 generate_keys; + const bool file_list_input; + const FarType &far_type; + const string &key_prefix; + const string &key_suffix; + + FarCreateArgs( + const vector<string> &in_fnames, const string &out_fname, + const int32 generate_keys, const bool file_list_input, + const FarType &far_type, const string &key_prefix, + const string &key_suffix) + : in_fnames(in_fnames), out_fname(out_fname), + generate_keys(generate_keys), file_list_input(file_list_input), + far_type(far_type), key_prefix(key_prefix), key_suffix(key_suffix) { } +}; + +template<class Arc> +void FarCreate(FarCreateArgs *args) { + fst::FarCreate<Arc>(args->in_fnames, args->out_fname, args->generate_keys, + args->file_list_input, args->far_type, + args->key_prefix, args->key_suffix); +} + +void FarCreate(const vector<string> &in_fnames, + const string &out_fname, + const string &arc_type, + const int32 generate_keys, + const bool file_list_input, + const FarType &far_type, + const string &key_prefix, + const string &key_suffix); + + +typedef args::Package<const string &, const string &, float, + const string &, const string &> FarEqualInnerArgs; +typedef args::WithReturnValue<bool, FarEqualInnerArgs> FarEqualArgs; + +template <class Arc> +void FarEqual(FarEqualArgs *args) { + args->retval = fst::FarEqual<Arc>( + args->args.arg1, args->args.arg2, args->args.arg3, + args->args.arg4, args->args.arg5); +} + +bool FarEqual(const string &filename1, + const string &filename2, + const string &arc_type, + float delta = kDelta, + const string &begin_key = string(), + const string &end_key = string()); + + +typedef args::Package<const vector<string> &, int32, + const string&, const string&, const string&, + const string&, const string&> FarExtractArgs; + +template<class Arc> +void FarExtract(FarExtractArgs *args) { + fst::FarExtract<Arc>( + args->arg1, args->arg2, args->arg3, args->arg4, args->arg5, args->arg6, + args->arg7); +} + +void FarExtract(const vector<string> &ifilenames, + const string &arc_type, + int32 generate_filenames, + const string &keys, + const string &key_separator, + const string &range_delimiter, + const string &filename_prefix, + const string &filename_suffix); + +typedef args::Package<const vector<string> &, const string &, + const string &, const bool> FarInfoArgs; + +template <class Arc> +void FarInfo(FarInfoArgs *args) { + fst::FarInfo<Arc>(args->arg1, args->arg2, args->arg3, args->arg4); +} + +void FarInfo(const vector<string> &filenames, + const string &arc_type, + const string &begin_key, + const string &end_key, + const bool list_fsts); + +struct FarPrintStringsArgs { + const vector<string> &ifilenames; + const FarEntryType entry_type; + const FarTokenType token_type; + const string &begin_key; + const string &end_key; + const bool print_key; + const bool print_weight; + const string &symbols_fname; + const bool initial_symbols; + const int32 generate_filenames; + const string &filename_prefix; + const string &filename_suffix; + + FarPrintStringsArgs( + const vector<string> &ifilenames, const FarEntryType entry_type, + const FarTokenType token_type, const string &begin_key, + const string &end_key, const bool print_key, const bool print_weight, + const string &symbols_fname, const bool initial_symbols, + const int32 generate_filenames, + const string &filename_prefix, const string &filename_suffix) : + ifilenames(ifilenames), entry_type(entry_type), token_type(token_type), + begin_key(begin_key), end_key(end_key), + print_key(print_key), print_weight(print_weight), + symbols_fname(symbols_fname), initial_symbols(initial_symbols), + generate_filenames(generate_filenames), filename_prefix(filename_prefix), + filename_suffix(filename_suffix) { } +}; + +template <class Arc> +void FarPrintStrings(FarPrintStringsArgs *args) { + fst::FarPrintStrings<Arc>( + args->ifilenames, args->entry_type, args->token_type, + args->begin_key, args->end_key, args->print_key, args->print_weight, + args->symbols_fname, args->initial_symbols, args->generate_filenames, + args->filename_prefix, args->filename_suffix); +} + + +void FarPrintStrings(const vector<string> &ifilenames, + const string &arc_type, + const FarEntryType entry_type, + const FarTokenType token_type, + const string &begin_key, + const string &end_key, + const bool print_key, + const bool print_weight, + const string &symbols_fname, + const bool initial_symbols, + const int32 generate_filenames, + const string &filename_prefix, + const string &filename_suffix); + +} // namespace script +} // namespace fst + + +#define REGISTER_FST_FAR_OPERATIONS(ArcType) \ + REGISTER_FST_OPERATION(FarCompileStrings, ArcType, FarCompileStringsArgs); \ + REGISTER_FST_OPERATION(FarCreate, ArcType, FarCreateArgs); \ + REGISTER_FST_OPERATION(FarEqual, ArcType, FarEqualArgs); \ + REGISTER_FST_OPERATION(FarExtract, ArcType, FarExtractArgs); \ + REGISTER_FST_OPERATION(FarInfo, ArcType, FarInfoArgs); \ + REGISTER_FST_OPERATION(FarPrintStrings, ArcType, FarPrintStringsArgs) + +#endif // FST_EXTENSIONS_FAR_FARSCRIPT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/info.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/info.h new file mode 100644 index 0000000..100fe68 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/extensions/far/info.h @@ -0,0 +1,128 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// Modified: [email protected] (Jake Ratkiewicz) + +#ifndef FST_EXTENSIONS_FAR_INFO_H_ +#define FST_EXTENSIONS_FAR_INFO_H_ + +#include <iomanip> +#include <set> +#include <string> +#include <vector> +using std::vector; + +#include <fst/extensions/far/far.h> +#include <fst/extensions/far/main.h> // For FarTypeToString + +namespace fst { + +template <class Arc> +void CountStatesAndArcs(const Fst<Arc> &fst, size_t *nstate, size_t *narc) { + StateIterator<Fst<Arc> > siter(fst); + for (; !siter.Done(); siter.Next(), ++(*nstate)) { + ArcIterator<Fst<Arc> > aiter(fst, siter.Value()); + for (; !aiter.Done(); aiter.Next(), ++(*narc)) {} + } +} + +struct KeyInfo { + string key; + string type; + size_t nstate; + size_t narc; + + KeyInfo(string k, string t, int64 ns = 0, int64 na = 0) + : key(k), type(t), nstate(ns), narc(na) {} +}; + +template <class Arc> +void FarInfo(const vector<string> &filenames, const string &begin_key, + const string &end_key, const bool list_fsts) { + FarReader<Arc> *far_reader = FarReader<Arc>::Open(filenames); + if (!far_reader) return; + + if (!begin_key.empty()) + far_reader->Find(begin_key); + + vector<KeyInfo> *infos = list_fsts ? new vector<KeyInfo>() : 0; + size_t nfst = 0, nstate = 0, narc = 0; + set<string> fst_types; + for (; !far_reader->Done(); far_reader->Next()) { + string key = far_reader->GetKey(); + if (!end_key.empty() && end_key < key) + break; + ++nfst; + const Fst<Arc> &fst = far_reader->GetFst(); + fst_types.insert(fst.Type()); + if (infos) { + KeyInfo info(key, fst.Type()); + CountStatesAndArcs(fst, &info.nstate, &info.narc); + nstate += info.nstate; + nstate += info.narc; + infos->push_back(info); + } else { + CountStatesAndArcs(fst, &nstate, &narc); + } + } + + if (!infos) { + cout << std::left << setw(50) << "far type" + << FarTypeToString(far_reader->Type()) << endl; + cout << std::left << setw(50) << "arc type" << Arc::Type() << endl; + cout << std::left << setw(50) << "fst type"; + for (set<string>::const_iterator iter = fst_types.begin(); + iter != fst_types.end(); + ++iter) { + if (iter != fst_types.begin()) + cout << ","; + cout << *iter; + } + cout << endl; + cout << std::left << setw(50) << "# of FSTs" << nfst << endl; + cout << std::left << setw(50) << "total # of states" << nstate << endl; + cout << std::left << setw(50) << "total # of arcs" << narc << endl; + + } else { + int wkey = 10, wtype = 10, wnstate = 16, wnarc = 16; + for (size_t i = 0; i < infos->size(); ++i) { + const KeyInfo &info = (*infos)[i]; + if (info.key.size() + 2 > wkey) + wkey = info.key.size() + 2; + if (info.type.size() + 2 > wtype) + wtype = info.type.size() + 2; + if (ceil(log10(info.nstate)) + 2 > wnstate) + wnstate = ceil(log10(info.nstate)) + 2; + if (ceil(log10(info.narc)) + 2 > wnarc) + wnarc = ceil(log10(info.narc)) + 2; + } + + cout << std::left << setw(wkey) << "key" << setw(wtype) << "type" + << std::right << setw(wnstate) << "# of states" + << setw(wnarc) << "# of arcs" << endl; + + for (size_t i = 0; i < infos->size(); ++i) { + const KeyInfo &info = (*infos)[i]; + cout << std::left << setw(wkey) << info.key << setw(wtype) << info.type + << std::right << setw(wnstate) << info.nstate + << setw(wnarc) << info.narc << endl; + } + } +} + +} // namespace fst + + +#endif // FST_EXTENSIONS_FAR_INFO_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/main.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/main.h new file mode 100644 index 0000000..00ccfef --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/extensions/far/main.h @@ -0,0 +1,43 @@ +// main.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Classes and functions for registering and invoking Far main +// functions that support multiple and extensible arc types. + +#ifndef FST_EXTENSIONS_FAR_MAIN_H__ +#define FST_EXTENSIONS_FAR_MAIN_H__ + +#include <fst/extensions/far/far.h> + +namespace fst { + +FarEntryType StringToFarEntryType(const string &s); +FarTokenType StringToFarTokenType(const string &s); + +// Return the 'FarType' value corresponding to a far type name. +FarType FarTypeFromString(const string &str); + +// Return the textual name corresponding to a 'FarType;. +string FarTypeToString(FarType type); + +string LoadArcTypeFromFar(const string& far_fname); +string LoadArcTypeFromFst(const string& far_fname); + +} // namespace fst + +#endif // FST_EXTENSIONS_FAR_MAIN_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/print-strings.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/print-strings.h new file mode 100644 index 0000000..dcc7351 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/extensions/far/print-strings.h @@ -0,0 +1,138 @@ +// printstrings-main.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// Modified by: [email protected] (Jake Ratkiewicz) +// +// \file +// Output as strings the string FSTs in a finite-state archive. + +#ifndef FST_EXTENSIONS_FAR_PRINT_STRINGS_H__ +#define FST_EXTENSIONS_FAR_PRINT_STRINGS_H__ + +#include <string> +#include <vector> +using std::vector; + +#include <fst/extensions/far/far.h> +#include <fst/shortest-distance.h> +#include <fst/string.h> + +DECLARE_string(far_field_separator); + +namespace fst { + +template <class Arc> +void FarPrintStrings( + const vector<string> &ifilenames, const FarEntryType entry_type, + const FarTokenType far_token_type, const string &begin_key, + const string &end_key, const bool print_key, const bool print_weight, + const string &symbols_fname, const bool initial_symbols, + const int32 generate_filenames, + const string &filename_prefix, const string &filename_suffix) { + + typename StringPrinter<Arc>::TokenType token_type; + if (far_token_type == FTT_SYMBOL) { + token_type = StringPrinter<Arc>::SYMBOL; + } else if (far_token_type == FTT_BYTE) { + token_type = StringPrinter<Arc>::BYTE; + } else if (far_token_type == FTT_UTF8) { + token_type = StringPrinter<Arc>::UTF8; + } else { + FSTERROR() << "FarPrintStrings: unknown token type"; + return; + } + + const SymbolTable *syms = 0; + if (!symbols_fname.empty()) { + // allow negative flag? + SymbolTableTextOptions opts; + opts.allow_negative = true; + syms = SymbolTable::ReadText(symbols_fname, opts); + if (!syms) { + FSTERROR() << "FarPrintStrings: error reading symbol table: " + << symbols_fname; + return; + } + } + + FarReader<Arc> *far_reader = FarReader<Arc>::Open(ifilenames); + if (!far_reader) return; + + if (!begin_key.empty()) + far_reader->Find(begin_key); + + string okey; + int nrep = 0; + for (int i = 1; !far_reader->Done(); far_reader->Next(), ++i) { + string key = far_reader->GetKey(); + if (!end_key.empty() && end_key < key) + break; + if (okey == key) + ++nrep; + else + nrep = 0; + okey = key; + + const Fst<Arc> &fst = far_reader->GetFst(); + if (i == 1 && initial_symbols && syms == 0 && fst.InputSymbols() != 0) + syms = fst.InputSymbols()->Copy(); + string str; + VLOG(2) << "Handling key: " << key; + StringPrinter<Arc> string_printer( + token_type, syms ? syms : fst.InputSymbols()); + string_printer(fst, &str); + + if (entry_type == FET_LINE) { + if (print_key) + cout << key << FLAGS_far_field_separator[0]; + cout << str; + if (print_weight) + cout << FLAGS_far_field_separator[0] << ShortestDistance(fst); + cout << endl; + } else if (entry_type == FET_FILE) { + stringstream sstrm; + if (generate_filenames) { + sstrm.fill('0'); + sstrm << std::right << setw(generate_filenames) << i; + } else { + sstrm << key; + if (nrep > 0) + sstrm << "." << nrep; + } + + string filename; + filename = filename_prefix + sstrm.str() + filename_suffix; + + ofstream ostrm(filename.c_str()); + if (!ostrm) { + FSTERROR() << "FarPrintStrings: Can't open file:" << filename; + delete syms; + delete far_reader; + return; + } + ostrm << str; + if (token_type == StringPrinter<Arc>::SYMBOL) + ostrm << "\n"; + } + } + delete syms; +} + + + +} // namespace fst + +#endif // FST_EXTENSIONS_FAR_PRINT_STRINGS_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/stlist.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/stlist.h new file mode 100644 index 0000000..ff3d98b --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/extensions/far/stlist.h @@ -0,0 +1,305 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// +// \file +// A generic (string,type) list file format. +// +// This is a stripped-down version of STTable that does +// not support the Find() operation but that does support +// reading/writting from standard in/out. + +#ifndef FST_EXTENSIONS_FAR_STLIST_H_ +#define FST_EXTENSIONS_FAR_STLIST_H_ + +#include <iostream> +#include <fstream> +#include <sstream> +#include <fst/util.h> + +#include <algorithm> +#include <functional> +#include <queue> +#include <string> +#include <utility> +using std::pair; using std::make_pair; +#include <vector> +using std::vector; + +namespace fst { + +static const int32 kSTListMagicNumber = 5656924; +static const int32 kSTListFileVersion = 1; + +// String-type list writing class for object of type 'T' using functor 'W' +// to write an object of type 'T' from a stream. 'W' must conform to the +// following interface: +// +// struct Writer { +// void operator()(ostream &, const T &) const; +// }; +// +template <class T, class W> +class STListWriter { + public: + typedef T EntryType; + typedef W EntryWriter; + + explicit STListWriter(const string filename) + : stream_( + filename.empty() ? &cout : + new ofstream(filename.c_str(), ofstream::out | ofstream::binary)), + error_(false) { + WriteType(*stream_, kSTListMagicNumber); + WriteType(*stream_, kSTListFileVersion); + if (!stream_) { + FSTERROR() << "STListWriter::STListWriter: error writing to file: " + << filename; + error_ = true; + } + } + + static STListWriter<T, W> *Create(const string &filename) { + return new STListWriter<T, W>(filename); + } + + void Add(const string &key, const T &t) { + if (key == "") { + FSTERROR() << "STListWriter::Add: key empty: " << key; + error_ = true; + } else if (key < last_key_) { + FSTERROR() << "STListWriter::Add: key disorder: " << key; + error_ = true; + } + if (error_) return; + last_key_ = key; + WriteType(*stream_, key); + entry_writer_(*stream_, t); + } + + bool Error() const { return error_; } + + ~STListWriter() { + WriteType(*stream_, string()); + if (stream_ != &cout) + delete stream_; + } + + private: + EntryWriter entry_writer_; // Write functor for 'EntryType' + ostream *stream_; // Output stream + string last_key_; // Last key + bool error_; + + DISALLOW_COPY_AND_ASSIGN(STListWriter); +}; + + +// String-type list reading class for object of type 'T' using functor 'R' +// to read an object of type 'T' form a stream. 'R' must conform to the +// following interface: +// +// struct Reader { +// T *operator()(istream &) const; +// }; +// +template <class T, class R> +class STListReader { + public: + typedef T EntryType; + typedef R EntryReader; + + explicit STListReader(const vector<string> &filenames) + : sources_(filenames), entry_(0), error_(false) { + streams_.resize(filenames.size(), 0); + bool has_stdin = false; + for (size_t i = 0; i < filenames.size(); ++i) { + if (filenames[i].empty()) { + if (!has_stdin) { + streams_[i] = &cin; + sources_[i] = "stdin"; + has_stdin = true; + } else { + FSTERROR() << "STListReader::STListReader: stdin should only " + << "appear once in the input file list."; + error_ = true; + return; + } + } else { + streams_[i] = new ifstream( + filenames[i].c_str(), ifstream::in | ifstream::binary); + } + int32 magic_number = 0, file_version = 0; + ReadType(*streams_[i], &magic_number); + ReadType(*streams_[i], &file_version); + if (magic_number != kSTListMagicNumber) { + FSTERROR() << "STListReader::STListReader: wrong file type: " + << filenames[i]; + error_ = true; + return; + } + if (file_version != kSTListFileVersion) { + FSTERROR() << "STListReader::STListReader: wrong file version: " + << filenames[i]; + error_ = true; + return; + } + string key; + ReadType(*streams_[i], &key); + if (!key.empty()) + heap_.push(make_pair(key, i)); + if (!*streams_[i]) { + FSTERROR() << "STListReader: error reading file: " << sources_[i]; + error_ = true; + return; + } + } + if (heap_.empty()) return; + size_t current = heap_.top().second; + entry_ = entry_reader_(*streams_[current]); + if (!entry_ || !*streams_[current]) { + FSTERROR() << "STListReader: error reading entry for key: " + << heap_.top().first << ", file: " << sources_[current]; + error_ = true; + } + } + + ~STListReader() { + for (size_t i = 0; i < streams_.size(); ++i) { + if (streams_[i] != &cin) + delete streams_[i]; + } + if (entry_) + delete entry_; + } + + static STListReader<T, R> *Open(const string &filename) { + vector<string> filenames; + filenames.push_back(filename); + return new STListReader<T, R>(filenames); + } + + static STListReader<T, R> *Open(const vector<string> &filenames) { + return new STListReader<T, R>(filenames); + } + + void Reset() { + FSTERROR() + << "STListReader::Reset: stlist does not support reset operation"; + error_ = true; + } + + bool Find(const string &key) { + FSTERROR() + << "STListReader::Find: stlist does not support find operation"; + error_ = true; + return false; + } + + bool Done() const { + return error_ || heap_.empty(); + } + + void Next() { + if (error_) return; + size_t current = heap_.top().second; + string key; + heap_.pop(); + ReadType(*(streams_[current]), &key); + if (!*streams_[current]) { + FSTERROR() << "STListReader: error reading file: " + << sources_[current]; + error_ = true; + return; + } + if (!key.empty()) + heap_.push(make_pair(key, current)); + + if(!heap_.empty()) { + current = heap_.top().second; + if (entry_) + delete entry_; + entry_ = entry_reader_(*streams_[current]); + if (!entry_ || !*streams_[current]) { + FSTERROR() << "STListReader: error reading entry for key: " + << heap_.top().first << ", file: " << sources_[current]; + error_ = true; + } + } + } + + const string &GetKey() const { + return heap_.top().first; + } + + const EntryType &GetEntry() const { + return *entry_; + } + + bool Error() const { return error_; } + + private: + EntryReader entry_reader_; // Read functor for 'EntryType' + vector<istream*> streams_; // Input streams + vector<string> sources_; // and corresponding file names + priority_queue< + pair<string, size_t>, vector<pair<string, size_t> >, + greater<pair<string, size_t> > > heap_; // (Key, stream id) heap + mutable EntryType *entry_; // Pointer to the currently read entry + bool error_; + + DISALLOW_COPY_AND_ASSIGN(STListReader); +}; + + +// String-type list header reading function template on the entry header +// type 'H' having a member function: +// Read(istream &strm, const string &filename); +// Checks that 'filename' is an STList and call the H::Read() on the last +// entry in the STList. +// Does not support reading from stdin. +template <class H> +bool ReadSTListHeader(const string &filename, H *header) { + if (filename.empty()) { + LOG(ERROR) << "ReadSTListHeader: reading header not supported on stdin"; + return false; + } + ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); + int32 magic_number = 0, file_version = 0; + ReadType(strm, &magic_number); + ReadType(strm, &file_version); + if (magic_number != kSTListMagicNumber) { + LOG(ERROR) << "ReadSTListHeader: wrong file type: " << filename; + return false; + } + if (file_version != kSTListFileVersion) { + LOG(ERROR) << "ReadSTListHeader: wrong file version: " << filename; + return false; + } + string key; + ReadType(strm, &key); + header->Read(strm, filename + ":" + key); + if (!strm) { + LOG(ERROR) << "ReadSTListHeader: error reading file: " << filename; + return false; + } + return true; +} + +bool IsSTList(const string &filename); + +} // namespace fst + +#endif // FST_EXTENSIONS_FAR_STLIST_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/far/sttable.h b/kaldi_io/src/tools/openfst/include/fst/extensions/far/sttable.h new file mode 100644 index 0000000..3ce0a4b --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/extensions/far/sttable.h @@ -0,0 +1,371 @@ +// sttable.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// +// \file +// A generic string-to-type table file format +// +// This is not meant as a generalization of SSTable. This is more of +// a simple replacement for SSTable in order to provide an open-source +// implementation of the FAR format for the external version of the +// FST Library. + +#ifndef FST_EXTENSIONS_FAR_STTABLE_H_ +#define FST_EXTENSIONS_FAR_STTABLE_H_ + +#include <algorithm> +#include <iostream> +#include <fstream> +#include <sstream> +#include <fst/util.h> + +namespace fst { + +static const int32 kSTTableMagicNumber = 2125656924; +static const int32 kSTTableFileVersion = 1; + +// String-to-type table writing class for object of type 'T' using functor 'W' +// to write an object of type 'T' from a stream. 'W' must conform to the +// following interface: +// +// struct Writer { +// void operator()(ostream &, const T &) const; +// }; +// +template <class T, class W> +class STTableWriter { + public: + typedef T EntryType; + typedef W EntryWriter; + + explicit STTableWriter(const string &filename) + : stream_(filename.c_str(), ofstream::out | ofstream::binary), + error_(false) { + WriteType(stream_, kSTTableMagicNumber); + WriteType(stream_, kSTTableFileVersion); + if (!stream_) { + FSTERROR() << "STTableWriter::STTableWriter: error writing to file: " + << filename; + error_=true; + } + } + + static STTableWriter<T, W> *Create(const string &filename) { + if (filename.empty()) { + LOG(ERROR) << "STTableWriter: writing to standard out unsupported."; + return 0; + } + return new STTableWriter<T, W>(filename); + } + + void Add(const string &key, const T &t) { + if (key == "") { + FSTERROR() << "STTableWriter::Add: key empty: " << key; + error_ = true; + } else if (key < last_key_) { + FSTERROR() << "STTableWriter::Add: key disorder: " << key; + error_ = true; + } + if (error_) return; + last_key_ = key; + positions_.push_back(stream_.tellp()); + WriteType(stream_, key); + entry_writer_(stream_, t); + } + + bool Error() const { return error_; } + + ~STTableWriter() { + WriteType(stream_, positions_); + WriteType(stream_, static_cast<int64>(positions_.size())); + } + + private: + EntryWriter entry_writer_; // Write functor for 'EntryType' + ofstream stream_; // Output stream + vector<int64> positions_; // Position in file of each key-entry pair + string last_key_; // Last key + bool error_; + + DISALLOW_COPY_AND_ASSIGN(STTableWriter); +}; + + +// String-to-type table reading class for object of type 'T' using functor 'R' +// to read an object of type 'T' form a stream. 'R' must conform to the +// following interface: +// +// struct Reader { +// T *operator()(istream &) const; +// }; +// +template <class T, class R> +class STTableReader { + public: + typedef T EntryType; + typedef R EntryReader; + + explicit STTableReader(const vector<string> &filenames) + : sources_(filenames), entry_(0), error_(false) { + compare_ = new Compare(&keys_); + keys_.resize(filenames.size()); + streams_.resize(filenames.size(), 0); + positions_.resize(filenames.size()); + for (size_t i = 0; i < filenames.size(); ++i) { + streams_[i] = new ifstream( + filenames[i].c_str(), ifstream::in | ifstream::binary); + int32 magic_number = 0, file_version = 0; + ReadType(*streams_[i], &magic_number); + ReadType(*streams_[i], &file_version); + if (magic_number != kSTTableMagicNumber) { + FSTERROR() << "STTableReader::STTableReader: wrong file type: " + << filenames[i]; + error_ = true; + return; + } + if (file_version != kSTTableFileVersion) { + FSTERROR() << "STTableReader::STTableReader: wrong file version: " + << filenames[i]; + error_ = true; + return; + } + int64 num_entries; + streams_[i]->seekg(-static_cast<int>(sizeof(int64)), ios_base::end); + ReadType(*streams_[i], &num_entries); + streams_[i]->seekg(-static_cast<int>(sizeof(int64)) * + (num_entries + 1), ios_base::end); + positions_[i].resize(num_entries); + for (size_t j = 0; (j < num_entries) && (*streams_[i]); ++j) + ReadType(*streams_[i], &(positions_[i][j])); + streams_[i]->seekg(positions_[i][0]); + if (!*streams_[i]) { + FSTERROR() << "STTableReader::STTableReader: error reading file: " + << filenames[i]; + error_ = true; + return; + } + + } + MakeHeap(); + } + + ~STTableReader() { + for (size_t i = 0; i < streams_.size(); ++i) + delete streams_[i]; + delete compare_; + if (entry_) + delete entry_; + } + + static STTableReader<T, R> *Open(const string &filename) { + if (filename.empty()) { + LOG(ERROR) << "STTableReader: reading from standard in not supported"; + return 0; + } + vector<string> filenames; + filenames.push_back(filename); + return new STTableReader<T, R>(filenames); + } + + static STTableReader<T, R> *Open(const vector<string> &filenames) { + return new STTableReader<T, R>(filenames); + } + + void Reset() { + if (error_) return; + for (size_t i = 0; i < streams_.size(); ++i) + streams_[i]->seekg(positions_[i].front()); + MakeHeap(); + } + + bool Find(const string &key) { + if (error_) return false; + for (size_t i = 0; i < streams_.size(); ++i) + LowerBound(i, key); + MakeHeap(); + return keys_[current_] == key; + } + + bool Done() const { return error_ || heap_.empty(); } + + void Next() { + if (error_) return; + if (streams_[current_]->tellg() <= positions_[current_].back()) { + ReadType(*(streams_[current_]), &(keys_[current_])); + if (!*streams_[current_]) { + FSTERROR() << "STTableReader: error reading file: " + << sources_[current_]; + error_ = true; + return; + } + push_heap(heap_.begin(), heap_.end(), *compare_); + } else { + heap_.pop_back(); + } + if (!heap_.empty()) + PopHeap(); + } + + const string &GetKey() const { + return keys_[current_]; + } + + const EntryType &GetEntry() const { + return *entry_; + } + + bool Error() const { return error_; } + + private: + // Comparison functor used to compare stream IDs in the heap + struct Compare { + Compare(const vector<string> *keys) : keys_(keys) {} + + bool operator()(size_t i, size_t j) const { + return (*keys_)[i] > (*keys_)[j]; + }; + + private: + const vector<string> *keys_; + }; + + // Position the stream with ID 'id' at the position corresponding + // to the lower bound for key 'find_key' + void LowerBound(size_t id, const string &find_key) { + ifstream *strm = streams_[id]; + const vector<int64> &positions = positions_[id]; + size_t low = 0, high = positions.size() - 1; + + while (low < high) { + size_t mid = (low + high)/2; + strm->seekg(positions[mid]); + string key; + ReadType(*strm, &key); + if (key > find_key) { + high = mid; + } else if (key < find_key) { + low = mid + 1; + } else { + for (size_t i = mid; i > low; --i) { + strm->seekg(positions[i - 1]); + ReadType(*strm, &key); + if (key != find_key) { + strm->seekg(positions[i]); + return; + } + } + strm->seekg(positions[low]); + return; + } + } + strm->seekg(positions[low]); + } + + // Add all streams to the heap + void MakeHeap() { + heap_.clear(); + for (size_t i = 0; i < streams_.size(); ++i) { + ReadType(*streams_[i], &(keys_[i])); + if (!*streams_[i]) { + FSTERROR() << "STTableReader: error reading file: " << sources_[i]; + error_ = true; + return; + } + heap_.push_back(i); + } + make_heap(heap_.begin(), heap_.end(), *compare_); + PopHeap(); + } + + // Position the stream with the lowest key at the top + // of the heap, set 'current_' to the ID of that stream + // and read the current entry from that stream + void PopHeap() { + pop_heap(heap_.begin(), heap_.end(), *compare_); + current_ = heap_.back(); + if (entry_) + delete entry_; + entry_ = entry_reader_(*streams_[current_]); + if (!entry_) + error_ = true; + if (!*streams_[current_]) { + FSTERROR() << "STTableReader: error reading entry for key: " + << keys_[current_] << ", file: " << sources_[current_]; + error_ = true; + } + } + + + EntryReader entry_reader_; // Read functor for 'EntryType' + vector<ifstream*> streams_; // Input streams + vector<string> sources_; // and corresponding file names + vector<vector<int64> > positions_; // Index of positions for each stream + vector<string> keys_; // Lowest unread key for each stream + vector<int64> heap_; // Heap containing ID of streams with unread keys + int64 current_; // Id of current stream to be read + Compare *compare_; // Functor comparing stream IDs for the heap + mutable EntryType *entry_; // Pointer to the currently read entry + bool error_; + + DISALLOW_COPY_AND_ASSIGN(STTableReader); +}; + + +// String-to-type table header reading function template on the entry header +// type 'H' having a member function: +// Read(istream &strm, const string &filename); +// Checks that 'filename' is an STTable and call the H::Read() on the last +// entry in the STTable. +template <class H> +bool ReadSTTableHeader(const string &filename, H *header) { + ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); + int32 magic_number = 0, file_version = 0; + ReadType(strm, &magic_number); + ReadType(strm, &file_version); + if (magic_number != kSTTableMagicNumber) { + LOG(ERROR) << "ReadSTTableHeader: wrong file type: " << filename; + return false; + } + if (file_version != kSTTableFileVersion) { + LOG(ERROR) << "ReadSTTableHeader: wrong file version: " << filename; + return false; + } + int64 i = -1; + strm.seekg(-static_cast<int>(sizeof(int64)), ios_base::end); + ReadType(strm, &i); // Read number of entries + if (!strm) { + LOG(ERROR) << "ReadSTTableHeader: error reading file: " << filename; + return false; + } + if (i == 0) return true; // No entry header to read + strm.seekg(-2 * static_cast<int>(sizeof(int64)), ios_base::end); + ReadType(strm, &i); // Read position for last entry in file + strm.seekg(i); + string key; + ReadType(strm, &key); + header->Read(strm, filename + ":" + key); + if (!strm) { + LOG(ERROR) << "ReadSTTableHeader: error reading file: " << filename; + return false; + } + return true; +} + +bool IsSTTable(const string &filename); + +} // namespace fst + +#endif // FST_EXTENSIONS_FAR_STTABLE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/bitmap-index.h b/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/bitmap-index.h new file mode 100644 index 0000000..f5a5ba7 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/bitmap-index.h @@ -0,0 +1,183 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jeffrey Sorensen) + +#ifndef FST_EXTENSIONS_NGRAM_BITMAP_INDEX_H_ +#define FST_EXTENSIONS_NGRAM_BITMAP_INDEX_H_ + +#include <vector> +using std::vector; + +#include <fst/compat.h> + +// This class is a bitstring storage class with an index that allows +// seeking to the Nth set or clear bit in time O(Log(N)) where N is +// the length of the bit vector. In addition, it allows counting set or +// clear bits over ranges in constant time. +// +// This is accomplished by maintaining an "secondary" index of limited +// size in bits that maintains a running count of the number of bits set +// in each block of bitmap data. A block is defined as the number of +// uint64 values that can fit in the secondary index before an overflow +// occurs. +// +// To handle overflows, a "primary" index containing a running count of +// bits set in each block is created using the type uint64. + +namespace fst { + +class BitmapIndex { + public: + static size_t StorageSize(size_t size) { + return ((size + kStorageBlockMask) >> kStorageLogBitSize); + } + + BitmapIndex() : bits_(NULL), size_(0) { } + + bool Get(size_t index) const { + return (bits_[index >> kStorageLogBitSize] & + (kOne << (index & kStorageBlockMask))) != 0; + } + + static void Set(uint64* bits, size_t index) { + bits[index >> kStorageLogBitSize] |= (kOne << (index & kStorageBlockMask)); + } + + static void Clear(uint64* bits, size_t index) { + bits[index >> kStorageLogBitSize] &= ~(kOne << (index & kStorageBlockMask)); + } + + size_t Bits() const { + return size_; + } + + size_t ArraySize() const { + return StorageSize(size_); + } + + // Returns the number of one bits in the bitmap + size_t GetOnesCount() const { + return primary_index_[primary_index_size() - 1]; + } + + // Returns the number of one bits in positions 0 to limit - 1. + // REQUIRES: limit <= Bits() + size_t Rank1(size_t end) const; + + // Returns the number of one bits in the range start to end - 1. + // REQUIRES: limit <= Bits() + size_t GetOnesCountInRange(size_t start, size_t end) const { + return Rank1(end) - Rank1(start); + } + + // Returns the number of zero bits in positions 0 to limit - 1. + // REQUIRES: limit <= Bits() + size_t Rank0(size_t end) const { + return end - Rank1(end); + } + + // Returns the number of zero bits in the range start to end - 1. + // REQUIRES: limit <= Bits() + size_t GetZeroesCountInRange(size_t start, size_t end) const { + return end - start - GetOnesCountInRange(start, end); + } + + // Return true if any bit between begin inclusive and end exclusive + // is set. 0 <= begin <= end <= Bits() is required. + // + bool TestRange(size_t start, size_t end) const { + return Rank1(end) > Rank1(start); + } + + // Returns the offset to the nth set bit (zero based) + // or Bits() if index >= number of ones + size_t Select1(size_t bit_index) const; + + // Returns the offset to the nth clear bit (zero based) + // or Bits() if index > number of + size_t Select0(size_t bit_index) const; + + // Rebuilds from index for the associated Bitmap, should be called + // whenever changes have been made to the Bitmap or else behavior + // of the indexed bitmap methods will be undefined. + void BuildIndex(const uint64 *bits, size_t size); + + // the secondary index accumulates counts until it can possibly overflow + // this constant computes the number of uint64 units that can fit into + // units the size of uint16. + static const uint64 kOne = 1; + static const uint32 kStorageBitSize = 64; + static const uint32 kStorageLogBitSize = 6; + static const uint32 kSecondaryBlockSize = ((1 << 16) - 1) + >> kStorageLogBitSize; + + private: + static const uint32 kStorageBlockMask = kStorageBitSize - 1; + + // returns, from the index, the count of ones up to array_index + size_t get_index_ones_count(size_t array_index) const; + + // because the indexes, both primary and secondary, contain a running + // count of the population of one bits contained in [0,i), there is + // no reason to have an element in the zeroth position as this value would + // necessarily be zero. (The bits are indexed in a zero based way.) Thus + // we don't store the 0th element in either index. Both of the following + // functions, if greater than 0, must be decremented by one before retreiving + // the value from the corresponding array. + // returns the 1 + the block that contains the bitindex in question + // the inverted version works the same but looks for zeros using an inverted + // view of the index + size_t find_primary_block(size_t bit_index) const; + + size_t find_inverted_primary_block(size_t bit_index) const; + + // similarly, the secondary index (which resets its count to zero at + // the end of every kSecondaryBlockSize entries) does not store the element + // at 0. Note that the rem_bit_index parameter is the number of bits + // within the secondary block, after the bits accounted for by the primary + // block have been removed (i.e. the remaining bits) And, because we + // reset to zero with each new block, there is no need to store those + // actual zeros. + // returns 1 + the secondary block that contains the bitindex in question + size_t find_secondary_block(size_t block, size_t rem_bit_index) const; + + size_t find_inverted_secondary_block(size_t block, size_t rem_bit_index) + const; + + // We create a primary index based upon the number of secondary index + // blocks. The primary index uses fields wide enough to accomodate any + // index of the bitarray so cannot overflow + // The primary index is the actual running + // count of one bits set for all blocks (and, thus, all uint64s). + size_t primary_index_size() const { + return (ArraySize() + kSecondaryBlockSize - 1) / kSecondaryBlockSize; + } + + const uint64* bits_; + size_t size_; + + // The primary index contains the running popcount of all blocks + // which means the nth value contains the popcounts of + // [0,n*kSecondaryBlockSize], however, the 0th element is omitted. + vector<uint32> primary_index_; + // The secondary index contains the running popcount of the associated + // bitmap. It is the same length (in units of uint16) as the + // bitmap's map is in units of uint64s. + vector<uint16> secondary_index_; +}; + +} // end namespace fst + +#endif // FST_EXTENSIONS_NGRAM_BITMAP_INDEX_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/ngram-fst.h b/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/ngram-fst.h new file mode 100644 index 0000000..d113fb3 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/ngram-fst.h @@ -0,0 +1,934 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jeffrey Sorensen) +// +#ifndef FST_EXTENSIONS_NGRAM_NGRAM_FST_H_ +#define FST_EXTENSIONS_NGRAM_NGRAM_FST_H_ + +#include <stddef.h> +#include <string.h> +#include <algorithm> +#include <string> +#include <vector> +using std::vector; + +#include <fst/compat.h> +#include <fst/fstlib.h> +#include <fst/mapped-file.h> +#include <fst/extensions/ngram/bitmap-index.h> + +// NgramFst implements a n-gram language model based upon the LOUDS data +// structure. Please refer to "Unary Data Strucutres for Language Models" +// http://research.google.com/pubs/archive/37218.pdf + +namespace fst { +template <class A> class NGramFst; +template <class A> class NGramFstMatcher; + +// Instance data containing mutable state for bookkeeping repeated access to +// the same state. +template <class A> +struct NGramFstInst { + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + StateId state_; + size_t num_futures_; + size_t offset_; + size_t node_; + StateId node_state_; + vector<Label> context_; + StateId context_state_; + NGramFstInst() + : state_(kNoStateId), node_state_(kNoStateId), + context_state_(kNoStateId) { } +}; + +// Implementation class for LOUDS based NgramFst interface +template <class A> +class NGramFstImpl : public FstImpl<A> { + using FstImpl<A>::SetInputSymbols; + using FstImpl<A>::SetOutputSymbols; + using FstImpl<A>::SetType; + using FstImpl<A>::WriteHeader; + + friend class ArcIterator<NGramFst<A> >; + friend class NGramFstMatcher<A>; + + public: + using FstImpl<A>::InputSymbols; + using FstImpl<A>::SetProperties; + using FstImpl<A>::Properties; + + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + NGramFstImpl() : data_region_(0), data_(0), owned_(false) { + SetType("ngram"); + SetInputSymbols(NULL); + SetOutputSymbols(NULL); + SetProperties(kStaticProperties); + } + + NGramFstImpl(const Fst<A> &fst, vector<StateId>* order_out); + + ~NGramFstImpl() { + if (owned_) { + delete [] data_; + } + delete data_region_; + } + + static NGramFstImpl<A>* Read(istream &strm, // NOLINT + const FstReadOptions &opts) { + NGramFstImpl<A>* impl = new NGramFstImpl(); + FstHeader hdr; + if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) return 0; + uint64 num_states, num_futures, num_final; + const size_t offset = sizeof(num_states) + sizeof(num_futures) + + sizeof(num_final); + // Peek at num_states and num_futures to see how much more needs to be read. + strm.read(reinterpret_cast<char *>(&num_states), sizeof(num_states)); + strm.read(reinterpret_cast<char *>(&num_futures), sizeof(num_futures)); + strm.read(reinterpret_cast<char *>(&num_final), sizeof(num_final)); + size_t size = Storage(num_states, num_futures, num_final); + MappedFile *data_region = MappedFile::Allocate(size); + char *data = reinterpret_cast<char *>(data_region->mutable_data()); + // Copy num_states, num_futures and num_final back into data. + memcpy(data, reinterpret_cast<char *>(&num_states), sizeof(num_states)); + memcpy(data + sizeof(num_states), reinterpret_cast<char *>(&num_futures), + sizeof(num_futures)); + memcpy(data + sizeof(num_states) + sizeof(num_futures), + reinterpret_cast<char *>(&num_final), sizeof(num_final)); + strm.read(data + offset, size - offset); + if (!strm) { + delete impl; + return NULL; + } + impl->Init(data, false, data_region); + return impl; + } + + bool Write(ostream &strm, // NOLINT + const FstWriteOptions &opts) const { + FstHeader hdr; + hdr.SetStart(Start()); + hdr.SetNumStates(num_states_); + WriteHeader(strm, opts, kFileVersion, &hdr); + strm.write(data_, Storage(num_states_, num_futures_, num_final_)); + return strm; + } + + StateId Start() const { + return 1; + } + + Weight Final(StateId state) const { + if (final_index_.Get(state)) { + return final_probs_[final_index_.Rank1(state)]; + } else { + return Weight::Zero(); + } + } + + size_t NumArcs(StateId state, NGramFstInst<A> *inst = NULL) const { + if (inst == NULL) { + const size_t next_zero = future_index_.Select0(state + 1); + const size_t this_zero = future_index_.Select0(state); + return next_zero - this_zero - 1; + } + SetInstFuture(state, inst); + return inst->num_futures_ + ((state == 0) ? 0 : 1); + } + + size_t NumInputEpsilons(StateId state) const { + // State 0 has no parent, thus no backoff. + if (state == 0) return 0; + return 1; + } + + size_t NumOutputEpsilons(StateId state) const { + return NumInputEpsilons(state); + } + + StateId NumStates() const { + return num_states_; + } + + void InitStateIterator(StateIteratorData<A>* data) const { + data->base = 0; + data->nstates = num_states_; + } + + static size_t Storage(uint64 num_states, uint64 num_futures, + uint64 num_final) { + uint64 b64; + Weight weight; + Label label; + size_t offset = sizeof(num_states) + sizeof(num_futures) + + sizeof(num_final); + offset += sizeof(b64) * ( + BitmapIndex::StorageSize(num_states * 2 + 1) + + BitmapIndex::StorageSize(num_futures + num_states + 1) + + BitmapIndex::StorageSize(num_states)); + offset += (num_states + 1) * sizeof(label) + num_futures * sizeof(label); + // Pad for alignemnt, see + // http://en.wikipedia.org/wiki/Data_structure_alignment#Computing_padding + offset = (offset + sizeof(weight) - 1) & ~(sizeof(weight) - 1); + offset += (num_states + 1) * sizeof(weight) + num_final * sizeof(weight) + + (num_futures + 1) * sizeof(weight); + return offset; + } + + void SetInstFuture(StateId state, NGramFstInst<A> *inst) const { + if (inst->state_ != state) { + inst->state_ = state; + const size_t next_zero = future_index_.Select0(state + 1); + const size_t this_zero = future_index_.Select0(state); + inst->num_futures_ = next_zero - this_zero - 1; + inst->offset_ = future_index_.Rank1(future_index_.Select0(state) + 1); + } + } + + void SetInstNode(NGramFstInst<A> *inst) const { + if (inst->node_state_ != inst->state_) { + inst->node_state_ = inst->state_; + inst->node_ = context_index_.Select1(inst->state_); + } + } + + void SetInstContext(NGramFstInst<A> *inst) const { + SetInstNode(inst); + if (inst->context_state_ != inst->state_) { + inst->context_state_ = inst->state_; + inst->context_.clear(); + size_t node = inst->node_; + while (node != 0) { + inst->context_.push_back(context_words_[context_index_.Rank1(node)]); + node = context_index_.Select1(context_index_.Rank0(node) - 1); + } + } + } + + // Access to the underlying representation + const char* GetData(size_t* data_size) const { + *data_size = Storage(num_states_, num_futures_, num_final_); + return data_; + } + + void Init(const char* data, bool owned, MappedFile *file = 0); + + const vector<Label> &GetContext(StateId s, NGramFstInst<A> *inst) const { + SetInstFuture(s, inst); + SetInstContext(inst); + return inst->context_; + } + + private: + StateId Transition(const vector<Label> &context, Label future) const; + + // Properties always true for this Fst class. + static const uint64 kStaticProperties = kAcceptor | kIDeterministic | + kODeterministic | kEpsilons | kIEpsilons | kOEpsilons | kILabelSorted | + kOLabelSorted | kWeighted | kCyclic | kInitialAcyclic | kNotTopSorted | + kAccessible | kCoAccessible | kNotString | kExpanded; + // Current file format version. + static const int kFileVersion = 4; + // Minimum file format version supported. + static const int kMinFileVersion = 4; + + MappedFile *data_region_; + const char* data_; + bool owned_; // True if we own data_ + uint64 num_states_, num_futures_, num_final_; + size_t root_num_children_; + const Label *root_children_; + size_t root_first_child_; + // borrowed references + const uint64 *context_, *future_, *final_; + const Label *context_words_, *future_words_; + const Weight *backoff_, *final_probs_, *future_probs_; + BitmapIndex context_index_; + BitmapIndex future_index_; + BitmapIndex final_index_; + + void operator=(const NGramFstImpl<A> &); // Disallow +}; + +template<typename A> +NGramFstImpl<A>::NGramFstImpl(const Fst<A> &fst, vector<StateId>* order_out) + : data_region_(0), data_(0), owned_(false) { + typedef A Arc; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + typedef typename Arc::StateId StateId; + SetType("ngram"); + SetInputSymbols(fst.InputSymbols()); + SetOutputSymbols(fst.OutputSymbols()); + SetProperties(kStaticProperties); + + // Check basic requirements for an OpenGRM language model Fst. + int64 props = kAcceptor | kIDeterministic | kIEpsilons | kILabelSorted; + if (fst.Properties(props, true) != props) { + FSTERROR() << "NGramFst only accepts OpenGRM langauge models as input"; + SetProperties(kError, kError); + return; + } + + int64 num_states = CountStates(fst); + Label* context = new Label[num_states]; + + // Find the unigram state by starting from the start state, following + // epsilons. + StateId unigram = fst.Start(); + while (1) { + if (unigram == kNoStateId) { + FSTERROR() << "Could not identify unigram state."; + SetProperties(kError, kError); + return; + } + ArcIterator<Fst<A> > aiter(fst, unigram); + if (aiter.Done()) { + LOG(WARNING) << "Unigram state " << unigram << " has no arcs."; + break; + } + if (aiter.Value().ilabel != 0) break; + unigram = aiter.Value().nextstate; + } + + // Each state's context is determined by the subtree it is under from the + // unigram state. + queue<pair<StateId, Label> > label_queue; + vector<bool> visited(num_states); + // Force an epsilon link to the start state. + label_queue.push(make_pair(fst.Start(), 0)); + for (ArcIterator<Fst<A> > aiter(fst, unigram); + !aiter.Done(); aiter.Next()) { + label_queue.push(make_pair(aiter.Value().nextstate, aiter.Value().ilabel)); + } + // investigate states in breadth first fashion to assign context words. + while (!label_queue.empty()) { + pair<StateId, Label> &now = label_queue.front(); + if (!visited[now.first]) { + context[now.first] = now.second; + visited[now.first] = true; + for (ArcIterator<Fst<A> > aiter(fst, now.first); + !aiter.Done(); aiter.Next()) { + const Arc &arc = aiter.Value(); + if (arc.ilabel != 0) { + label_queue.push(make_pair(arc.nextstate, now.second)); + } + } + } + label_queue.pop(); + } + visited.clear(); + + // The arc from the start state should be assigned an epsilon to put it + // in front of the all other labels (which makes Start state 1 after + // unigram which is state 0). + context[fst.Start()] = 0; + + // Build the tree of contexts fst by reversing the epsilon arcs from fst. + VectorFst<Arc> context_fst; + uint64 num_final = 0; + for (int i = 0; i < num_states; ++i) { + if (fst.Final(i) != Weight::Zero()) { + ++num_final; + } + context_fst.SetFinal(context_fst.AddState(), fst.Final(i)); + } + context_fst.SetStart(unigram); + context_fst.SetInputSymbols(fst.InputSymbols()); + context_fst.SetOutputSymbols(fst.OutputSymbols()); + int64 num_context_arcs = 0; + int64 num_futures = 0; + for (StateIterator<Fst<A> > siter(fst); !siter.Done(); siter.Next()) { + const StateId &state = siter.Value(); + num_futures += fst.NumArcs(state) - fst.NumInputEpsilons(state); + ArcIterator<Fst<A> > aiter(fst, state); + if (!aiter.Done()) { + const Arc &arc = aiter.Value(); + // this arc goes from state to arc.nextstate, so create an arc from + // arc.nextstate to state to reverse it. + if (arc.ilabel == 0) { + context_fst.AddArc(arc.nextstate, Arc(context[state], context[state], + arc.weight, state)); + num_context_arcs++; + } + } + } + if (num_context_arcs != context_fst.NumStates() - 1) { + FSTERROR() << "Number of contexts arcs != number of states - 1"; + SetProperties(kError, kError); + return; + } + if (context_fst.NumStates() != num_states) { + FSTERROR() << "Number of contexts != number of states"; + SetProperties(kError, kError); + return; + } + int64 context_props = context_fst.Properties(kIDeterministic | + kILabelSorted, true); + if (!(context_props & kIDeterministic)) { + FSTERROR() << "Input fst is not structured properly"; + SetProperties(kError, kError); + return; + } + if (!(context_props & kILabelSorted)) { + ArcSort(&context_fst, ILabelCompare<Arc>()); + } + + delete [] context; + + uint64 b64; + Weight weight; + Label label = kNoLabel; + const size_t storage = Storage(num_states, num_futures, num_final); + MappedFile *data_region = MappedFile::Allocate(storage); + char *data = reinterpret_cast<char *>(data_region->mutable_data()); + memset(data, 0, storage); + size_t offset = 0; + memcpy(data + offset, reinterpret_cast<char *>(&num_states), + sizeof(num_states)); + offset += sizeof(num_states); + memcpy(data + offset, reinterpret_cast<char *>(&num_futures), + sizeof(num_futures)); + offset += sizeof(num_futures); + memcpy(data + offset, reinterpret_cast<char *>(&num_final), + sizeof(num_final)); + offset += sizeof(num_final); + uint64* context_bits = reinterpret_cast<uint64*>(data + offset); + offset += BitmapIndex::StorageSize(num_states * 2 + 1) * sizeof(b64); + uint64* future_bits = reinterpret_cast<uint64*>(data + offset); + offset += + BitmapIndex::StorageSize(num_futures + num_states + 1) * sizeof(b64); + uint64* final_bits = reinterpret_cast<uint64*>(data + offset); + offset += BitmapIndex::StorageSize(num_states) * sizeof(b64); + Label* context_words = reinterpret_cast<Label*>(data + offset); + offset += (num_states + 1) * sizeof(label); + Label* future_words = reinterpret_cast<Label*>(data + offset); + offset += num_futures * sizeof(label); + offset = (offset + sizeof(weight) - 1) & ~(sizeof(weight) - 1); + Weight* backoff = reinterpret_cast<Weight*>(data + offset); + offset += (num_states + 1) * sizeof(weight); + Weight* final_probs = reinterpret_cast<Weight*>(data + offset); + offset += num_final * sizeof(weight); + Weight* future_probs = reinterpret_cast<Weight*>(data + offset); + int64 context_arc = 0, future_arc = 0, context_bit = 0, future_bit = 0, + final_bit = 0; + + // pseudo-root bits + BitmapIndex::Set(context_bits, context_bit++); + ++context_bit; + context_words[context_arc] = label; + backoff[context_arc] = Weight::Zero(); + context_arc++; + + ++future_bit; + if (order_out) { + order_out->clear(); + order_out->resize(num_states); + } + + queue<StateId> context_q; + context_q.push(context_fst.Start()); + StateId state_number = 0; + while (!context_q.empty()) { + const StateId &state = context_q.front(); + if (order_out) { + (*order_out)[state] = state_number; + } + + const Weight &final = context_fst.Final(state); + if (final != Weight::Zero()) { + BitmapIndex::Set(final_bits, state_number); + final_probs[final_bit] = final; + ++final_bit; + } + + for (ArcIterator<VectorFst<A> > aiter(context_fst, state); + !aiter.Done(); aiter.Next()) { + const Arc &arc = aiter.Value(); + context_words[context_arc] = arc.ilabel; + backoff[context_arc] = arc.weight; + ++context_arc; + BitmapIndex::Set(context_bits, context_bit++); + context_q.push(arc.nextstate); + } + ++context_bit; + + for (ArcIterator<Fst<A> > aiter(fst, state); !aiter.Done(); aiter.Next()) { + const Arc &arc = aiter.Value(); + if (arc.ilabel != 0) { + future_words[future_arc] = arc.ilabel; + future_probs[future_arc] = arc.weight; + ++future_arc; + BitmapIndex::Set(future_bits, future_bit++); + } + } + ++future_bit; + ++state_number; + context_q.pop(); + } + + if ((state_number != num_states) || + (context_bit != num_states * 2 + 1) || + (context_arc != num_states) || + (future_arc != num_futures) || + (future_bit != num_futures + num_states + 1) || + (final_bit != num_final)) { + FSTERROR() << "Structure problems detected during construction"; + SetProperties(kError, kError); + return; + } + + Init(data, false, data_region); +} + +template<typename A> +inline void NGramFstImpl<A>::Init(const char* data, bool owned, + MappedFile *data_region) { + if (owned_) { + delete [] data_; + } + delete data_region_; + data_region_ = data_region; + owned_ = owned; + data_ = data; + size_t offset = 0; + num_states_ = *(reinterpret_cast<const uint64*>(data_ + offset)); + offset += sizeof(num_states_); + num_futures_ = *(reinterpret_cast<const uint64*>(data_ + offset)); + offset += sizeof(num_futures_); + num_final_ = *(reinterpret_cast<const uint64*>(data_ + offset)); + offset += sizeof(num_final_); + uint64 bits; + size_t context_bits = num_states_ * 2 + 1; + size_t future_bits = num_futures_ + num_states_ + 1; + context_ = reinterpret_cast<const uint64*>(data_ + offset); + offset += BitmapIndex::StorageSize(context_bits) * sizeof(bits); + future_ = reinterpret_cast<const uint64*>(data_ + offset); + offset += BitmapIndex::StorageSize(future_bits) * sizeof(bits); + final_ = reinterpret_cast<const uint64*>(data_ + offset); + offset += BitmapIndex::StorageSize(num_states_) * sizeof(bits); + context_words_ = reinterpret_cast<const Label*>(data_ + offset); + offset += (num_states_ + 1) * sizeof(*context_words_); + future_words_ = reinterpret_cast<const Label*>(data_ + offset); + offset += num_futures_ * sizeof(*future_words_); + offset = (offset + sizeof(*backoff_) - 1) & ~(sizeof(*backoff_) - 1); + backoff_ = reinterpret_cast<const Weight*>(data_ + offset); + offset += (num_states_ + 1) * sizeof(*backoff_); + final_probs_ = reinterpret_cast<const Weight*>(data_ + offset); + offset += num_final_ * sizeof(*final_probs_); + future_probs_ = reinterpret_cast<const Weight*>(data_ + offset); + + context_index_.BuildIndex(context_, context_bits); + future_index_.BuildIndex(future_, future_bits); + final_index_.BuildIndex(final_, num_states_); + + const size_t node_rank = context_index_.Rank1(0); + root_first_child_ = context_index_.Select0(node_rank) + 1; + if (context_index_.Get(root_first_child_) == false) { + FSTERROR() << "Missing unigrams"; + SetProperties(kError, kError); + return; + } + const size_t last_child = context_index_.Select0(node_rank + 1) - 1; + root_num_children_ = last_child - root_first_child_ + 1; + root_children_ = context_words_ + context_index_.Rank1(root_first_child_); +} + +template<typename A> +inline typename A::StateId NGramFstImpl<A>::Transition( + const vector<Label> &context, Label future) const { + size_t num_children = root_num_children_; + const Label *children = root_children_; + const Label *loc = lower_bound(children, children + num_children, future); + if (loc == children + num_children || *loc != future) { + return context_index_.Rank1(0); + } + size_t node = root_first_child_ + loc - children; + size_t node_rank = context_index_.Rank1(node); + size_t first_child = context_index_.Select0(node_rank) + 1; + if (context_index_.Get(first_child) == false) { + return context_index_.Rank1(node); + } + size_t last_child = context_index_.Select0(node_rank + 1) - 1; + num_children = last_child - first_child + 1; + for (int word = context.size() - 1; word >= 0; --word) { + children = context_words_ + context_index_.Rank1(first_child); + loc = lower_bound(children, children + last_child - first_child + 1, + context[word]); + if (loc == children + last_child - first_child + 1 || + *loc != context[word]) { + break; + } + node = first_child + loc - children; + node_rank = context_index_.Rank1(node); + first_child = context_index_.Select0(node_rank) + 1; + if (context_index_.Get(first_child) == false) break; + last_child = context_index_.Select0(node_rank + 1) - 1; + } + return context_index_.Rank1(node); +} + +/*****************************************************************************/ +template<class A> +class NGramFst : public ImplToExpandedFst<NGramFstImpl<A> > { + friend class ArcIterator<NGramFst<A> >; + friend class NGramFstMatcher<A>; + + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef NGramFstImpl<A> Impl; + + explicit NGramFst(const Fst<A> &dst) + : ImplToExpandedFst<Impl>(new Impl(dst, NULL)) {} + + NGramFst(const Fst<A> &fst, vector<StateId>* order_out) + : ImplToExpandedFst<Impl>(new Impl(fst, order_out)) {} + + // Because the NGramFstImpl is a const stateless data structure, there + // is never a need to do anything beside copy the reference. + NGramFst(const NGramFst<A> &fst, bool safe = false) + : ImplToExpandedFst<Impl>(fst, false) {} + + NGramFst() : ImplToExpandedFst<Impl>(new Impl()) {} + + // Non-standard constructor to initialize NGramFst directly from data. + NGramFst(const char* data, bool owned) : ImplToExpandedFst<Impl>(new Impl()) { + GetImpl()->Init(data, owned, NULL); + } + + // Get method that gets the data associated with Init(). + const char* GetData(size_t* data_size) const { + return GetImpl()->GetData(data_size); + } + + const vector<Label> GetContext(StateId s) const { + return GetImpl()->GetContext(s, &inst_); + } + + virtual size_t NumArcs(StateId s) const { + return GetImpl()->NumArcs(s, &inst_); + } + + virtual NGramFst<A>* Copy(bool safe = false) const { + return new NGramFst(*this, safe); + } + + static NGramFst<A>* Read(istream &strm, const FstReadOptions &opts) { + Impl* impl = Impl::Read(strm, opts); + return impl ? new NGramFst<A>(impl) : 0; + } + + static NGramFst<A>* Read(const string &filename) { + if (!filename.empty()) { + ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); + if (!strm) { + LOG(ERROR) << "NGramFst::Read: Can't open file: " << filename; + return 0; + } + return Read(strm, FstReadOptions(filename)); + } else { + return Read(cin, FstReadOptions("standard input")); + } + } + + virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { + return GetImpl()->Write(strm, opts); + } + + virtual bool Write(const string &filename) const { + return Fst<A>::WriteFile(filename); + } + + virtual inline void InitStateIterator(StateIteratorData<A>* data) const { + GetImpl()->InitStateIterator(data); + } + + virtual inline void InitArcIterator( + StateId s, ArcIteratorData<A>* data) const; + + virtual MatcherBase<A>* InitMatcher(MatchType match_type) const { + return new NGramFstMatcher<A>(*this, match_type); + } + + private: + explicit NGramFst(Impl* impl) : ImplToExpandedFst<Impl>(impl) {} + + Impl* GetImpl() const { + return + ImplToExpandedFst<Impl, ExpandedFst<A> >::GetImpl(); + } + + void SetImpl(Impl* impl, bool own_impl = true) { + ImplToExpandedFst<Impl, Fst<A> >::SetImpl(impl, own_impl); + } + + mutable NGramFstInst<A> inst_; +}; + +template <class A> inline void +NGramFst<A>::InitArcIterator(StateId s, ArcIteratorData<A>* data) const { + GetImpl()->SetInstFuture(s, &inst_); + GetImpl()->SetInstNode(&inst_); + data->base = new ArcIterator<NGramFst<A> >(*this, s); +} + +/*****************************************************************************/ +template <class A> +class NGramFstMatcher : public MatcherBase<A> { + public: + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + NGramFstMatcher(const NGramFst<A> &fst, MatchType match_type) + : fst_(fst), inst_(fst.inst_), match_type_(match_type), + current_loop_(false), + loop_(kNoLabel, 0, A::Weight::One(), kNoStateId) { + if (match_type_ == MATCH_OUTPUT) { + swap(loop_.ilabel, loop_.olabel); + } + } + + NGramFstMatcher(const NGramFstMatcher<A> &matcher, bool safe = false) + : fst_(matcher.fst_), inst_(matcher.inst_), + match_type_(matcher.match_type_), current_loop_(false), + loop_(kNoLabel, 0, A::Weight::One(), kNoStateId) { + if (match_type_ == MATCH_OUTPUT) { + swap(loop_.ilabel, loop_.olabel); + } + } + + virtual NGramFstMatcher<A>* Copy(bool safe = false) const { + return new NGramFstMatcher<A>(*this, safe); + } + + virtual MatchType Type(bool test) const { + return match_type_; + } + + virtual const Fst<A> &GetFst() const { + return fst_; + } + + virtual uint64 Properties(uint64 props) const { + return props; + } + + private: + virtual void SetState_(StateId s) { + fst_.GetImpl()->SetInstFuture(s, &inst_); + current_loop_ = false; + } + + virtual bool Find_(Label label) { + const Label nolabel = kNoLabel; + done_ = true; + if (label == 0 || label == nolabel) { + if (label == 0) { + current_loop_ = true; + loop_.nextstate = inst_.state_; + } + // The unigram state has no epsilon arc. + if (inst_.state_ != 0) { + arc_.ilabel = arc_.olabel = 0; + fst_.GetImpl()->SetInstNode(&inst_); + arc_.nextstate = fst_.GetImpl()->context_index_.Rank1( + fst_.GetImpl()->context_index_.Select1( + fst_.GetImpl()->context_index_.Rank0(inst_.node_) - 1)); + arc_.weight = fst_.GetImpl()->backoff_[inst_.state_]; + done_ = false; + } + } else { + const Label *start = fst_.GetImpl()->future_words_ + inst_.offset_; + const Label *end = start + inst_.num_futures_; + const Label* search = lower_bound(start, end, label); + if (search != end && *search == label) { + size_t state = search - start; + arc_.ilabel = arc_.olabel = label; + arc_.weight = fst_.GetImpl()->future_probs_[inst_.offset_ + state]; + fst_.GetImpl()->SetInstContext(&inst_); + arc_.nextstate = fst_.GetImpl()->Transition(inst_.context_, label); + done_ = false; + } + } + return !Done_(); + } + + virtual bool Done_() const { + return !current_loop_ && done_; + } + + virtual const Arc& Value_() const { + return (current_loop_) ? loop_ : arc_; + } + + virtual void Next_() { + if (current_loop_) { + current_loop_ = false; + } else { + done_ = true; + } + } + + const NGramFst<A>& fst_; + NGramFstInst<A> inst_; + MatchType match_type_; // Supplied by caller + bool done_; + Arc arc_; + bool current_loop_; // Current arc is the implicit loop + Arc loop_; +}; + +/*****************************************************************************/ +template<class A> +class ArcIterator<NGramFst<A> > : public ArcIteratorBase<A> { + public: + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + ArcIterator(const NGramFst<A> &fst, StateId state) + : lazy_(~0), impl_(fst.GetImpl()), i_(0), flags_(kArcValueFlags) { + inst_ = fst.inst_; + impl_->SetInstFuture(state, &inst_); + impl_->SetInstNode(&inst_); + } + + bool Done() const { + return i_ >= ((inst_.node_ == 0) ? inst_.num_futures_ : + inst_.num_futures_ + 1); + } + + const Arc &Value() const { + bool eps = (inst_.node_ != 0 && i_ == 0); + StateId state = (inst_.node_ == 0) ? i_ : i_ - 1; + if (flags_ & lazy_ & (kArcILabelValue | kArcOLabelValue)) { + arc_.ilabel = + arc_.olabel = eps ? 0 : impl_->future_words_[inst_.offset_ + state]; + lazy_ &= ~(kArcILabelValue | kArcOLabelValue); + } + if (flags_ & lazy_ & kArcNextStateValue) { + if (eps) { + arc_.nextstate = impl_->context_index_.Rank1( + impl_->context_index_.Select1( + impl_->context_index_.Rank0(inst_.node_) - 1)); + } else { + if (lazy_ & kArcNextStateValue) { + impl_->SetInstContext(&inst_); // first time only. + } + arc_.nextstate = + impl_->Transition(inst_.context_, + impl_->future_words_[inst_.offset_ + state]); + } + lazy_ &= ~kArcNextStateValue; + } + if (flags_ & lazy_ & kArcWeightValue) { + arc_.weight = eps ? impl_->backoff_[inst_.state_] : + impl_->future_probs_[inst_.offset_ + state]; + lazy_ &= ~kArcWeightValue; + } + return arc_; + } + + void Next() { + ++i_; + lazy_ = ~0; + } + + size_t Position() const { return i_; } + + void Reset() { + i_ = 0; + lazy_ = ~0; + } + + void Seek(size_t a) { + if (i_ != a) { + i_ = a; + lazy_ = ~0; + } + } + + uint32 Flags() const { + return flags_; + } + + void SetFlags(uint32 f, uint32 m) { + flags_ &= ~m; + flags_ |= (f & kArcValueFlags); + } + + private: + virtual bool Done_() const { return Done(); } + virtual const Arc& Value_() const { return Value(); } + virtual void Next_() { Next(); } + virtual size_t Position_() const { return Position(); } + virtual void Reset_() { Reset(); } + virtual void Seek_(size_t a) { Seek(a); } + uint32 Flags_() const { return Flags(); } + void SetFlags_(uint32 f, uint32 m) { SetFlags(f, m); } + + mutable Arc arc_; + mutable uint32 lazy_; + const NGramFstImpl<A> *impl_; + mutable NGramFstInst<A> inst_; + + size_t i_; + uint32 flags_; + + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + +/*****************************************************************************/ +// Specialization for NGramFst; see generic version in fst.h +// for sample usage (but use the ProdLmFst type!). This version +// should inline. +template <class A> +class StateIterator<NGramFst<A> > : public StateIteratorBase<A> { + public: + typedef typename A::StateId StateId; + + explicit StateIterator(const NGramFst<A> &fst) + : s_(0), num_states_(fst.NumStates()) { } + + bool Done() const { return s_ >= num_states_; } + StateId Value() const { return s_; } + void Next() { ++s_; } + void Reset() { s_ = 0; } + + private: + virtual bool Done_() const { return Done(); } + virtual StateId Value_() const { return Value(); } + virtual void Next_() { Next(); } + virtual void Reset_() { Reset(); } + + StateId s_, num_states_; + + DISALLOW_COPY_AND_ASSIGN(StateIterator); +}; +} // namespace fst +#endif // FST_EXTENSIONS_NGRAM_NGRAM_FST_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/nthbit.h b/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/nthbit.h new file mode 100644 index 0000000..d4a9a5a --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/extensions/ngram/nthbit.h @@ -0,0 +1,46 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jeffrey Sorensen) +// [email protected] (Doug Rohde) + +#ifndef FST_EXTENSIONS_NGRAM_NTHBIT_H_ +#define FST_EXTENSIONS_NGRAM_NTHBIT_H_ + +#include <fst/types.h> + +extern uint32 nth_bit_bit_offset[]; + +inline uint32 nth_bit(uint64 v, uint32 r) { + uint32 shift = 0; + uint32 c = __builtin_popcount(v & 0xffffffff); + uint32 mask = -(r > c); + r -= c & mask; + shift += (32 & mask); + + c = __builtin_popcount((v >> shift) & 0xffff); + mask = -(r > c); + r -= c & mask; + shift += (16 & mask); + + c = __builtin_popcount((v >> shift) & 0xff); + mask = -(r > c); + r -= c & mask; + shift += (8 & mask); + + return shift + ((nth_bit_bit_offset[(v >> shift) & 0xff] >> + ((r - 1) << 2)) & 0xf); +} + +#endif // FST_EXTENSIONS_NGRAM_NTHBIT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/factor-weight.h b/kaldi_io/src/tools/openfst/include/fst/factor-weight.h new file mode 100644 index 0000000..685155c --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/factor-weight.h @@ -0,0 +1,475 @@ +// factor-weight.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// +// \file +// Classes to factor weights in an FST. + +#ifndef FST_LIB_FACTOR_WEIGHT_H__ +#define FST_LIB_FACTOR_WEIGHT_H__ + +#include <algorithm> +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include <string> +#include <utility> +using std::pair; using std::make_pair; +#include <vector> +using std::vector; + +#include <fst/cache.h> +#include <fst/test-properties.h> + + +namespace fst { + +const uint32 kFactorFinalWeights = 0x00000001; +const uint32 kFactorArcWeights = 0x00000002; + +template <class Arc> +struct FactorWeightOptions : CacheOptions { + typedef typename Arc::Label Label; + float delta; + uint32 mode; // factor arc weights and/or final weights + Label final_ilabel; // input label of arc created when factoring final w's + Label final_olabel; // output label of arc created when factoring final w's + + FactorWeightOptions(const CacheOptions &opts, float d, + uint32 m = kFactorArcWeights | kFactorFinalWeights, + Label il = 0, Label ol = 0) + : CacheOptions(opts), delta(d), mode(m), final_ilabel(il), + final_olabel(ol) {} + + explicit FactorWeightOptions( + float d, uint32 m = kFactorArcWeights | kFactorFinalWeights, + Label il = 0, Label ol = 0) + : delta(d), mode(m), final_ilabel(il), final_olabel(ol) {} + + FactorWeightOptions(uint32 m = kFactorArcWeights | kFactorFinalWeights, + Label il = 0, Label ol = 0) + : delta(kDelta), mode(m), final_ilabel(il), final_olabel(ol) {} +}; + + +// A factor iterator takes as argument a weight w and returns a +// sequence of pairs of weights (xi,yi) such that the sum of the +// products xi times yi is equal to w. If w is fully factored, +// the iterator should return nothing. +// +// template <class W> +// class FactorIterator { +// public: +// FactorIterator(W w); +// bool Done() const; +// void Next(); +// pair<W, W> Value() const; +// void Reset(); +// } + + +// Factor trivially. +template <class W> +class IdentityFactor { + public: + IdentityFactor(const W &w) {} + bool Done() const { return true; } + void Next() {} + pair<W, W> Value() const { return make_pair(W::One(), W::One()); } // unused + void Reset() {} +}; + + +// Factor a StringWeight w as 'ab' where 'a' is a label. +template <typename L, StringType S = STRING_LEFT> +class StringFactor { + public: + StringFactor(const StringWeight<L, S> &w) + : weight_(w), done_(w.Size() <= 1) {} + + bool Done() const { return done_; } + + void Next() { done_ = true; } + + pair< StringWeight<L, S>, StringWeight<L, S> > Value() const { + StringWeightIterator<L, S> iter(weight_); + StringWeight<L, S> w1(iter.Value()); + StringWeight<L, S> w2; + for (iter.Next(); !iter.Done(); iter.Next()) + w2.PushBack(iter.Value()); + return make_pair(w1, w2); + } + + void Reset() { done_ = weight_.Size() <= 1; } + + private: + StringWeight<L, S> weight_; + bool done_; +}; + + +// Factor a GallicWeight using StringFactor. +template <class L, class W, StringType S = STRING_LEFT> +class GallicFactor { + public: + GallicFactor(const GallicWeight<L, W, S> &w) + : weight_(w), done_(w.Value1().Size() <= 1) {} + + bool Done() const { return done_; } + + void Next() { done_ = true; } + + pair< GallicWeight<L, W, S>, GallicWeight<L, W, S> > Value() const { + StringFactor<L, S> iter(weight_.Value1()); + GallicWeight<L, W, S> w1(iter.Value().first, weight_.Value2()); + GallicWeight<L, W, S> w2(iter.Value().second, W::One()); + return make_pair(w1, w2); + } + + void Reset() { done_ = weight_.Value1().Size() <= 1; } + + private: + GallicWeight<L, W, S> weight_; + bool done_; +}; + + +// Implementation class for FactorWeight +template <class A, class F> +class FactorWeightFstImpl + : public CacheImpl<A> { + public: + using FstImpl<A>::SetType; + using FstImpl<A>::SetProperties; + using FstImpl<A>::SetInputSymbols; + using FstImpl<A>::SetOutputSymbols; + + using CacheBaseImpl< CacheState<A> >::PushArc; + using CacheBaseImpl< CacheState<A> >::HasStart; + using CacheBaseImpl< CacheState<A> >::HasFinal; + using CacheBaseImpl< CacheState<A> >::HasArcs; + using CacheBaseImpl< CacheState<A> >::SetArcs; + using CacheBaseImpl< CacheState<A> >::SetFinal; + using CacheBaseImpl< CacheState<A> >::SetStart; + + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef F FactorIterator; + + struct Element { + Element() {} + + Element(StateId s, Weight w) : state(s), weight(w) {} + + StateId state; // Input state Id + Weight weight; // Residual weight + }; + + FactorWeightFstImpl(const Fst<A> &fst, const FactorWeightOptions<A> &opts) + : CacheImpl<A>(opts), + fst_(fst.Copy()), + delta_(opts.delta), + mode_(opts.mode), + final_ilabel_(opts.final_ilabel), + final_olabel_(opts.final_olabel) { + SetType("factor_weight"); + uint64 props = fst.Properties(kFstProperties, false); + SetProperties(FactorWeightProperties(props), kCopyProperties); + + SetInputSymbols(fst.InputSymbols()); + SetOutputSymbols(fst.OutputSymbols()); + + if (mode_ == 0) + LOG(WARNING) << "FactorWeightFst: factor mode is set to 0: " + << "factoring neither arc weights nor final weights."; + } + + FactorWeightFstImpl(const FactorWeightFstImpl<A, F> &impl) + : CacheImpl<A>(impl), + fst_(impl.fst_->Copy(true)), + delta_(impl.delta_), + mode_(impl.mode_), + final_ilabel_(impl.final_ilabel_), + final_olabel_(impl.final_olabel_) { + SetType("factor_weight"); + SetProperties(impl.Properties(), kCopyProperties); + SetInputSymbols(impl.InputSymbols()); + SetOutputSymbols(impl.OutputSymbols()); + } + + ~FactorWeightFstImpl() { + delete fst_; + } + + StateId Start() { + if (!HasStart()) { + StateId s = fst_->Start(); + if (s == kNoStateId) + return kNoStateId; + StateId start = FindState(Element(fst_->Start(), Weight::One())); + SetStart(start); + } + return CacheImpl<A>::Start(); + } + + Weight Final(StateId s) { + if (!HasFinal(s)) { + const Element &e = elements_[s]; + // TODO: fix so cast is unnecessary + Weight w = e.state == kNoStateId + ? e.weight + : (Weight) Times(e.weight, fst_->Final(e.state)); + FactorIterator f(w); + if (!(mode_ & kFactorFinalWeights) || f.Done()) + SetFinal(s, w); + else + SetFinal(s, Weight::Zero()); + } + return CacheImpl<A>::Final(s); + } + + size_t NumArcs(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<A>::NumArcs(s); + } + + size_t NumInputEpsilons(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<A>::NumInputEpsilons(s); + } + + size_t NumOutputEpsilons(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<A>::NumOutputEpsilons(s); + } + + uint64 Properties() const { return Properties(kFstProperties); } + + // Set error if found; return FST impl properties. + uint64 Properties(uint64 mask) const { + if ((mask & kError) && fst_->Properties(kError, false)) + SetProperties(kError, kError); + return FstImpl<Arc>::Properties(mask); + } + + void InitArcIterator(StateId s, ArcIteratorData<A> *data) { + if (!HasArcs(s)) + Expand(s); + CacheImpl<A>::InitArcIterator(s, data); + } + + + // Find state corresponding to an element. Create new state + // if element not found. + StateId FindState(const Element &e) { + if (!(mode_ & kFactorArcWeights) && e.weight == Weight::One()) { + while (unfactored_.size() <= e.state) + unfactored_.push_back(kNoStateId); + if (unfactored_[e.state] == kNoStateId) { + unfactored_[e.state] = elements_.size(); + elements_.push_back(e); + } + return unfactored_[e.state]; + } else { + typename ElementMap::iterator eit = element_map_.find(e); + if (eit != element_map_.end()) { + return (*eit).second; + } else { + StateId s = elements_.size(); + elements_.push_back(e); + element_map_.insert(pair<const Element, StateId>(e, s)); + return s; + } + } + } + + // Computes the outgoing transitions from a state, creating new destination + // states as needed. + void Expand(StateId s) { + Element e = elements_[s]; + if (e.state != kNoStateId) { + for (ArcIterator< Fst<A> > ait(*fst_, e.state); + !ait.Done(); + ait.Next()) { + const A &arc = ait.Value(); + Weight w = Times(e.weight, arc.weight); + FactorIterator fit(w); + if (!(mode_ & kFactorArcWeights) || fit.Done()) { + StateId d = FindState(Element(arc.nextstate, Weight::One())); + PushArc(s, Arc(arc.ilabel, arc.olabel, w, d)); + } else { + for (; !fit.Done(); fit.Next()) { + const pair<Weight, Weight> &p = fit.Value(); + StateId d = FindState(Element(arc.nextstate, + p.second.Quantize(delta_))); + PushArc(s, Arc(arc.ilabel, arc.olabel, p.first, d)); + } + } + } + } + + if ((mode_ & kFactorFinalWeights) && + ((e.state == kNoStateId) || + (fst_->Final(e.state) != Weight::Zero()))) { + Weight w = e.state == kNoStateId + ? e.weight + : Times(e.weight, fst_->Final(e.state)); + for (FactorIterator fit(w); + !fit.Done(); + fit.Next()) { + const pair<Weight, Weight> &p = fit.Value(); + StateId d = FindState(Element(kNoStateId, + p.second.Quantize(delta_))); + PushArc(s, Arc(final_ilabel_, final_olabel_, p.first, d)); + } + } + SetArcs(s); + } + + private: + static const size_t kPrime = 7853; + + // Equality function for Elements, assume weights have been quantized. + class ElementEqual { + public: + bool operator()(const Element &x, const Element &y) const { + return x.state == y.state && x.weight == y.weight; + } + }; + + // Hash function for Elements to Fst states. + class ElementKey { + public: + size_t operator()(const Element &x) const { + return static_cast<size_t>(x.state * kPrime + x.weight.Hash()); + } + private: + }; + + typedef unordered_map<Element, StateId, ElementKey, ElementEqual> ElementMap; + + const Fst<A> *fst_; + float delta_; + uint32 mode_; // factoring arc and/or final weights + Label final_ilabel_; // ilabel of arc created when factoring final w's + Label final_olabel_; // olabel of arc created when factoring final w's + vector<Element> elements_; // mapping Fst state to Elements + ElementMap element_map_; // mapping Elements to Fst state + // mapping between old/new 'StateId' for states that do not need to + // be factored when 'mode_' is '0' or 'kFactorFinalWeights' + vector<StateId> unfactored_; + + void operator=(const FactorWeightFstImpl<A, F> &); // disallow +}; + +template <class A, class F> const size_t FactorWeightFstImpl<A, F>::kPrime; + + +// FactorWeightFst takes as template parameter a FactorIterator as +// defined above. The result of weight factoring is a transducer +// equivalent to the input whose path weights have been factored +// according to the FactorIterator. States and transitions will be +// added as necessary. The algorithm is a generalization to arbitrary +// weights of the second step of the input epsilon-normalization +// algorithm due to Mohri, "Generic epsilon-removal and input +// epsilon-normalization algorithms for weighted transducers", +// International Journal of Computer Science 13(1): 129-143 (2002). +// +// This class attaches interface to implementation and handles +// reference counting, delegating most methods to ImplToFst. +template <class A, class F> +class FactorWeightFst : public ImplToFst< FactorWeightFstImpl<A, F> > { + public: + friend class ArcIterator< FactorWeightFst<A, F> >; + friend class StateIterator< FactorWeightFst<A, F> >; + + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef CacheState<A> State; + typedef FactorWeightFstImpl<A, F> Impl; + + FactorWeightFst(const Fst<A> &fst) + : ImplToFst<Impl>(new Impl(fst, FactorWeightOptions<A>())) {} + + FactorWeightFst(const Fst<A> &fst, const FactorWeightOptions<A> &opts) + : ImplToFst<Impl>(new Impl(fst, opts)) {} + + // See Fst<>::Copy() for doc. + FactorWeightFst(const FactorWeightFst<A, F> &fst, bool copy) + : ImplToFst<Impl>(fst, copy) {} + + // Get a copy of this FactorWeightFst. See Fst<>::Copy() for further doc. + virtual FactorWeightFst<A, F> *Copy(bool copy = false) const { + return new FactorWeightFst<A, F>(*this, copy); + } + + virtual inline void InitStateIterator(StateIteratorData<A> *data) const; + + virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { + GetImpl()->InitArcIterator(s, data); + } + + private: + // Makes visible to friends. + Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } + + void operator=(const FactorWeightFst<A, F> &fst); // Disallow +}; + + +// Specialization for FactorWeightFst. +template<class A, class F> +class StateIterator< FactorWeightFst<A, F> > + : public CacheStateIterator< FactorWeightFst<A, F> > { + public: + explicit StateIterator(const FactorWeightFst<A, F> &fst) + : CacheStateIterator< FactorWeightFst<A, F> >(fst, fst.GetImpl()) {} +}; + + +// Specialization for FactorWeightFst. +template <class A, class F> +class ArcIterator< FactorWeightFst<A, F> > + : public CacheArcIterator< FactorWeightFst<A, F> > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const FactorWeightFst<A, F> &fst, StateId s) + : CacheArcIterator< FactorWeightFst<A, F> >(fst.GetImpl(), s) { + if (!fst.GetImpl()->HasArcs(s)) + fst.GetImpl()->Expand(s); + } + + private: + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + +template <class A, class F> inline +void FactorWeightFst<A, F>::InitStateIterator(StateIteratorData<A> *data) const +{ + data->base = new StateIterator< FactorWeightFst<A, F> >(*this); +} + + +} // namespace fst + +#endif // FST_LIB_FACTOR_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/flags.h b/kaldi_io/src/tools/openfst/include/fst/flags.h new file mode 100644 index 0000000..b3bb66c --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/flags.h @@ -0,0 +1,242 @@ +// flags.h +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: [email protected] (Michael Riley) +// +// \file +// Google-style flag handling declarations and inline definitions. + +#ifndef FST_LIB_FLAGS_H__ +#define FST_LIB_FLAGS_H__ + +#include <iostream> +#include <map> +#include <set> +#include <sstream> +#include <string> + +#include <fst/types.h> +#include <fst/lock.h> + +using std::string; + +// +// FLAGS USAGE: +// +// Definition example: +// +// DEFINE_int32(length, 0, "length"); +// +// This defines variable FLAGS_length, initialized to 0. +// +// Declaration example: +// +// DECLARE_int32(length); +// +// SET_FLAGS() can be used to set flags from the command line +// using, for example, '--length=2'. +// +// ShowUsage() can be used to print out command and flag usage. +// + +#define DECLARE_bool(name) extern bool FLAGS_ ## name +#define DECLARE_string(name) extern string FLAGS_ ## name +#define DECLARE_int32(name) extern int32 FLAGS_ ## name +#define DECLARE_int64(name) extern int64 FLAGS_ ## name +#define DECLARE_double(name) extern double FLAGS_ ## name + +template <typename T> +struct FlagDescription { + FlagDescription(T *addr, const char *doc, const char *type, + const char *file, const T val) + : address(addr), + doc_string(doc), + type_name(type), + file_name(file), + default_value(val) {} + + T *address; + const char *doc_string; + const char *type_name; + const char *file_name; + const T default_value; +}; + +template <typename T> +class FlagRegister { + public: + static FlagRegister<T> *GetRegister() { + fst::FstOnceInit(®ister_init_, &FlagRegister<T>::Init); + return register_; + } + + const FlagDescription<T> &GetFlagDescription(const string &name) const { + fst::MutexLock l(register_lock_); + typename std::map< string, FlagDescription<T> >::const_iterator it = + flag_table_.find(name); + return it != flag_table_.end() ? it->second : 0; + } + void SetDescription(const string &name, + const FlagDescription<T> &desc) { + fst::MutexLock l(register_lock_); + flag_table_.insert(make_pair(name, desc)); + } + + bool SetFlag(const string &val, bool *address) const { + if (val == "true" || val == "1" || val.empty()) { + *address = true; + return true; + } else if (val == "false" || val == "0") { + *address = false; + return true; + } + else { + return false; + } + } + bool SetFlag(const string &val, string *address) const { + *address = val; + return true; + } + bool SetFlag(const string &val, int32 *address) const { + char *p = 0; + *address = strtol(val.c_str(), &p, 0); + return !val.empty() && *p == '\0'; + } + bool SetFlag(const string &val, int64 *address) const { + char *p = 0; + *address = strtoll(val.c_str(), &p, 0); + return !val.empty() && *p == '\0'; + } + bool SetFlag(const string &val, double *address) const { + char *p = 0; + *address = strtod(val.c_str(), &p); + return !val.empty() && *p == '\0'; + } + + bool SetFlag(const string &arg, const string &val) const { + for (typename std::map< string, FlagDescription<T> >::const_iterator it = + flag_table_.begin(); + it != flag_table_.end(); + ++it) { + const string &name = it->first; + const FlagDescription<T> &desc = it->second; + if (arg == name) + return SetFlag(val, desc.address); + } + return false; + } + + void GetUsage(std::set< std::pair<string, string> > *usage_set) const { + for (typename std::map< string, + FlagDescription<T> >::const_iterator it = + flag_table_.begin(); + it != flag_table_.end(); + ++it) { + const string &name = it->first; + const FlagDescription<T> &desc = it->second; + string usage = " --" + name; + usage += ": type = "; + usage += desc.type_name; + usage += ", default = "; + usage += GetDefault(desc.default_value) + "\n "; + usage += desc.doc_string; + usage_set->insert(make_pair(desc.file_name, usage)); + } + } + + private: + static void Init() { + register_lock_ = new fst::Mutex; + register_ = new FlagRegister<T>; + } + + std::map< string, FlagDescription<T> > flag_table_; + + string GetDefault(bool default_value) const { + return default_value ? "true" : "false"; + } + + string GetDefault(const string &default_value) const { + return "\"" + default_value + "\""; + } + + template<typename V> string GetDefault(const V& default_value) const { + std::ostringstream strm; + strm << default_value; + return strm.str(); + } + + static fst::FstOnceType register_init_; // ensures only called once + static fst::Mutex* register_lock_; // multithreading lock + static FlagRegister<T> *register_; +}; + +template <class T> +fst::FstOnceType FlagRegister<T>::register_init_ = fst::FST_ONCE_INIT; + +template <class T> +fst::Mutex *FlagRegister<T>::register_lock_ = 0; + +template <class T> +FlagRegister<T> *FlagRegister<T>::register_ = 0; + + +template <typename T> +class FlagRegisterer { + public: + FlagRegisterer(const string &name, const FlagDescription<T> &desc) { + FlagRegister<T> *registr = FlagRegister<T>::GetRegister(); + registr->SetDescription(name, desc); + } + + private: + DISALLOW_COPY_AND_ASSIGN(FlagRegisterer); +}; + + +#define DEFINE_VAR(type, name, value, doc) \ + type FLAGS_ ## name = value; \ + static FlagRegisterer<type> \ + name ## _flags_registerer(#name, FlagDescription<type>(&FLAGS_ ## name, \ + doc, \ + #type, \ + __FILE__, \ + value)) + +#define DEFINE_bool(name, value, doc) DEFINE_VAR(bool, name, value, doc) +#define DEFINE_string(name, value, doc) \ + DEFINE_VAR(string, name, value, doc) +#define DEFINE_int32(name, value, doc) DEFINE_VAR(int32, name, value, doc) +#define DEFINE_int64(name, value, doc) DEFINE_VAR(int64, name, value, doc) +#define DEFINE_double(name, value, doc) DEFINE_VAR(double, name, value, doc) + + +// Temporary directory +DECLARE_string(tmpdir); + +void SetFlags(const char *usage, int *argc, char ***argv, bool remove_flags, + const char *src = ""); + +#define SET_FLAGS(usage, argc, argv, rmflags) \ +SetFlags(usage, argc, argv, rmflags, __FILE__) + +// Deprecated - for backward compatibility +inline void InitFst(const char *usage, int *argc, char ***argv, bool rmflags) { + return SetFlags(usage, argc, argv, rmflags); +} + +void ShowUsage(bool long_usage = true); + +#endif // FST_LIB_FLAGS_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/float-weight.h b/kaldi_io/src/tools/openfst/include/fst/float-weight.h new file mode 100644 index 0000000..eb22638 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/float-weight.h @@ -0,0 +1,601 @@ +// float-weight.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Float weight set and associated semiring operation definitions. +// + +#ifndef FST_LIB_FLOAT_WEIGHT_H__ +#define FST_LIB_FLOAT_WEIGHT_H__ + +#include <limits> +#include <climits> +#include <sstream> +#include <string> + +#include <fst/util.h> +#include <fst/weight.h> + + +namespace fst { + +// numeric limits class +template <class T> +class FloatLimits { + public: + static const T PosInfinity() { + static const T pos_infinity = numeric_limits<T>::infinity(); + return pos_infinity; + } + + static const T NegInfinity() { + static const T neg_infinity = -PosInfinity(); + return neg_infinity; + } + + static const T NumberBad() { + static const T number_bad = numeric_limits<T>::quiet_NaN(); + return number_bad; + } + +}; + +// weight class to be templated on floating-points types +template <class T = float> +class FloatWeightTpl { + public: + FloatWeightTpl() {} + + FloatWeightTpl(T f) : value_(f) {} + + FloatWeightTpl(const FloatWeightTpl<T> &w) : value_(w.value_) {} + + FloatWeightTpl<T> &operator=(const FloatWeightTpl<T> &w) { + value_ = w.value_; + return *this; + } + + istream &Read(istream &strm) { + return ReadType(strm, &value_); + } + + ostream &Write(ostream &strm) const { + return WriteType(strm, value_); + } + + size_t Hash() const { + union { + T f; + size_t s; + } u; + u.s = 0; + u.f = value_; + return u.s; + } + + const T &Value() const { return value_; } + + protected: + void SetValue(const T &f) { value_ = f; } + + inline static string GetPrecisionString() { + int64 size = sizeof(T); + if (size == sizeof(float)) return ""; + size *= CHAR_BIT; + + string result; + Int64ToStr(size, &result); + return result; + } + + private: + T value_; +}; + +// Single-precision float weight +typedef FloatWeightTpl<float> FloatWeight; + +template <class T> +inline bool operator==(const FloatWeightTpl<T> &w1, + const FloatWeightTpl<T> &w2) { + // Volatile qualifier thwarts over-aggressive compiler optimizations + // that lead to problems esp. with NaturalLess(). + volatile T v1 = w1.Value(); + volatile T v2 = w2.Value(); + return v1 == v2; +} + +inline bool operator==(const FloatWeightTpl<double> &w1, + const FloatWeightTpl<double> &w2) { + return operator==<double>(w1, w2); +} + +inline bool operator==(const FloatWeightTpl<float> &w1, + const FloatWeightTpl<float> &w2) { + return operator==<float>(w1, w2); +} + +template <class T> +inline bool operator!=(const FloatWeightTpl<T> &w1, + const FloatWeightTpl<T> &w2) { + return !(w1 == w2); +} + +inline bool operator!=(const FloatWeightTpl<double> &w1, + const FloatWeightTpl<double> &w2) { + return operator!=<double>(w1, w2); +} + +inline bool operator!=(const FloatWeightTpl<float> &w1, + const FloatWeightTpl<float> &w2) { + return operator!=<float>(w1, w2); +} + +template <class T> +inline bool ApproxEqual(const FloatWeightTpl<T> &w1, + const FloatWeightTpl<T> &w2, + float delta = kDelta) { + return w1.Value() <= w2.Value() + delta && w2.Value() <= w1.Value() + delta; +} + +template <class T> +inline ostream &operator<<(ostream &strm, const FloatWeightTpl<T> &w) { + if (w.Value() == FloatLimits<T>::PosInfinity()) + return strm << "Infinity"; + else if (w.Value() == FloatLimits<T>::NegInfinity()) + return strm << "-Infinity"; + else if (w.Value() != w.Value()) // Fails for NaN + return strm << "BadNumber"; + else + return strm << w.Value(); +} + +template <class T> +inline istream &operator>>(istream &strm, FloatWeightTpl<T> &w) { + string s; + strm >> s; + if (s == "Infinity") { + w = FloatWeightTpl<T>(FloatLimits<T>::PosInfinity()); + } else if (s == "-Infinity") { + w = FloatWeightTpl<T>(FloatLimits<T>::NegInfinity()); + } else { + char *p; + T f = strtod(s.c_str(), &p); + if (p < s.c_str() + s.size()) + strm.clear(std::ios::badbit); + else + w = FloatWeightTpl<T>(f); + } + return strm; +} + + +// Tropical semiring: (min, +, inf, 0) +template <class T> +class TropicalWeightTpl : public FloatWeightTpl<T> { + public: + using FloatWeightTpl<T>::Value; + + typedef TropicalWeightTpl<T> ReverseWeight; + + TropicalWeightTpl() : FloatWeightTpl<T>() {} + + TropicalWeightTpl(T f) : FloatWeightTpl<T>(f) {} + + TropicalWeightTpl(const TropicalWeightTpl<T> &w) : FloatWeightTpl<T>(w) {} + + static const TropicalWeightTpl<T> Zero() { + return TropicalWeightTpl<T>(FloatLimits<T>::PosInfinity()); } + + static const TropicalWeightTpl<T> One() { + return TropicalWeightTpl<T>(0.0F); } + + static const TropicalWeightTpl<T> NoWeight() { + return TropicalWeightTpl<T>(FloatLimits<T>::NumberBad()); } + + static const string &Type() { + static const string type = "tropical" + + FloatWeightTpl<T>::GetPrecisionString(); + return type; + } + + bool Member() const { + // First part fails for IEEE NaN + return Value() == Value() && Value() != FloatLimits<T>::NegInfinity(); + } + + TropicalWeightTpl<T> Quantize(float delta = kDelta) const { + if (Value() == FloatLimits<T>::NegInfinity() || + Value() == FloatLimits<T>::PosInfinity() || + Value() != Value()) + return *this; + else + return TropicalWeightTpl<T>(floor(Value()/delta + 0.5F) * delta); + } + + TropicalWeightTpl<T> Reverse() const { return *this; } + + static uint64 Properties() { + return kLeftSemiring | kRightSemiring | kCommutative | + kPath | kIdempotent; + } +}; + +// Single precision tropical weight +typedef TropicalWeightTpl<float> TropicalWeight; + +template <class T> +inline TropicalWeightTpl<T> Plus(const TropicalWeightTpl<T> &w1, + const TropicalWeightTpl<T> &w2) { + if (!w1.Member() || !w2.Member()) + return TropicalWeightTpl<T>::NoWeight(); + return w1.Value() < w2.Value() ? w1 : w2; +} + +inline TropicalWeightTpl<float> Plus(const TropicalWeightTpl<float> &w1, + const TropicalWeightTpl<float> &w2) { + return Plus<float>(w1, w2); +} + +inline TropicalWeightTpl<double> Plus(const TropicalWeightTpl<double> &w1, + const TropicalWeightTpl<double> &w2) { + return Plus<double>(w1, w2); +} + +template <class T> +inline TropicalWeightTpl<T> Times(const TropicalWeightTpl<T> &w1, + const TropicalWeightTpl<T> &w2) { + if (!w1.Member() || !w2.Member()) + return TropicalWeightTpl<T>::NoWeight(); + T f1 = w1.Value(), f2 = w2.Value(); + if (f1 == FloatLimits<T>::PosInfinity()) + return w1; + else if (f2 == FloatLimits<T>::PosInfinity()) + return w2; + else + return TropicalWeightTpl<T>(f1 + f2); +} + +inline TropicalWeightTpl<float> Times(const TropicalWeightTpl<float> &w1, + const TropicalWeightTpl<float> &w2) { + return Times<float>(w1, w2); +} + +inline TropicalWeightTpl<double> Times(const TropicalWeightTpl<double> &w1, + const TropicalWeightTpl<double> &w2) { + return Times<double>(w1, w2); +} + +template <class T> +inline TropicalWeightTpl<T> Divide(const TropicalWeightTpl<T> &w1, + const TropicalWeightTpl<T> &w2, + DivideType typ = DIVIDE_ANY) { + if (!w1.Member() || !w2.Member()) + return TropicalWeightTpl<T>::NoWeight(); + T f1 = w1.Value(), f2 = w2.Value(); + if (f2 == FloatLimits<T>::PosInfinity()) + return FloatLimits<T>::NumberBad(); + else if (f1 == FloatLimits<T>::PosInfinity()) + return FloatLimits<T>::PosInfinity(); + else + return TropicalWeightTpl<T>(f1 - f2); +} + +inline TropicalWeightTpl<float> Divide(const TropicalWeightTpl<float> &w1, + const TropicalWeightTpl<float> &w2, + DivideType typ = DIVIDE_ANY) { + return Divide<float>(w1, w2, typ); +} + +inline TropicalWeightTpl<double> Divide(const TropicalWeightTpl<double> &w1, + const TropicalWeightTpl<double> &w2, + DivideType typ = DIVIDE_ANY) { + return Divide<double>(w1, w2, typ); +} + + +// Log semiring: (log(e^-x + e^y), +, inf, 0) +template <class T> +class LogWeightTpl : public FloatWeightTpl<T> { + public: + using FloatWeightTpl<T>::Value; + + typedef LogWeightTpl ReverseWeight; + + LogWeightTpl() : FloatWeightTpl<T>() {} + + LogWeightTpl(T f) : FloatWeightTpl<T>(f) {} + + LogWeightTpl(const LogWeightTpl<T> &w) : FloatWeightTpl<T>(w) {} + + static const LogWeightTpl<T> Zero() { + return LogWeightTpl<T>(FloatLimits<T>::PosInfinity()); + } + + static const LogWeightTpl<T> One() { + return LogWeightTpl<T>(0.0F); + } + + static const LogWeightTpl<T> NoWeight() { + return LogWeightTpl<T>(FloatLimits<T>::NumberBad()); } + + static const string &Type() { + static const string type = "log" + FloatWeightTpl<T>::GetPrecisionString(); + return type; + } + + bool Member() const { + // First part fails for IEEE NaN + return Value() == Value() && Value() != FloatLimits<T>::NegInfinity(); + } + + LogWeightTpl<T> Quantize(float delta = kDelta) const { + if (Value() == FloatLimits<T>::NegInfinity() || + Value() == FloatLimits<T>::PosInfinity() || + Value() != Value()) + return *this; + else + return LogWeightTpl<T>(floor(Value()/delta + 0.5F) * delta); + } + + LogWeightTpl<T> Reverse() const { return *this; } + + static uint64 Properties() { + return kLeftSemiring | kRightSemiring | kCommutative; + } +}; + +// Single-precision log weight +typedef LogWeightTpl<float> LogWeight; +// Double-precision log weight +typedef LogWeightTpl<double> Log64Weight; + +template <class T> +inline T LogExp(T x) { return log(1.0F + exp(-x)); } + +template <class T> +inline LogWeightTpl<T> Plus(const LogWeightTpl<T> &w1, + const LogWeightTpl<T> &w2) { + T f1 = w1.Value(), f2 = w2.Value(); + if (f1 == FloatLimits<T>::PosInfinity()) + return w2; + else if (f2 == FloatLimits<T>::PosInfinity()) + return w1; + else if (f1 > f2) + return LogWeightTpl<T>(f2 - LogExp(f1 - f2)); + else + return LogWeightTpl<T>(f1 - LogExp(f2 - f1)); +} + +inline LogWeightTpl<float> Plus(const LogWeightTpl<float> &w1, + const LogWeightTpl<float> &w2) { + return Plus<float>(w1, w2); +} + +inline LogWeightTpl<double> Plus(const LogWeightTpl<double> &w1, + const LogWeightTpl<double> &w2) { + return Plus<double>(w1, w2); +} + +template <class T> +inline LogWeightTpl<T> Times(const LogWeightTpl<T> &w1, + const LogWeightTpl<T> &w2) { + if (!w1.Member() || !w2.Member()) + return LogWeightTpl<T>::NoWeight(); + T f1 = w1.Value(), f2 = w2.Value(); + if (f1 == FloatLimits<T>::PosInfinity()) + return w1; + else if (f2 == FloatLimits<T>::PosInfinity()) + return w2; + else + return LogWeightTpl<T>(f1 + f2); +} + +inline LogWeightTpl<float> Times(const LogWeightTpl<float> &w1, + const LogWeightTpl<float> &w2) { + return Times<float>(w1, w2); +} + +inline LogWeightTpl<double> Times(const LogWeightTpl<double> &w1, + const LogWeightTpl<double> &w2) { + return Times<double>(w1, w2); +} + +template <class T> +inline LogWeightTpl<T> Divide(const LogWeightTpl<T> &w1, + const LogWeightTpl<T> &w2, + DivideType typ = DIVIDE_ANY) { + if (!w1.Member() || !w2.Member()) + return LogWeightTpl<T>::NoWeight(); + T f1 = w1.Value(), f2 = w2.Value(); + if (f2 == FloatLimits<T>::PosInfinity()) + return FloatLimits<T>::NumberBad(); + else if (f1 == FloatLimits<T>::PosInfinity()) + return FloatLimits<T>::PosInfinity(); + else + return LogWeightTpl<T>(f1 - f2); +} + +inline LogWeightTpl<float> Divide(const LogWeightTpl<float> &w1, + const LogWeightTpl<float> &w2, + DivideType typ = DIVIDE_ANY) { + return Divide<float>(w1, w2, typ); +} + +inline LogWeightTpl<double> Divide(const LogWeightTpl<double> &w1, + const LogWeightTpl<double> &w2, + DivideType typ = DIVIDE_ANY) { + return Divide<double>(w1, w2, typ); +} + +// MinMax semiring: (min, max, inf, -inf) +template <class T> +class MinMaxWeightTpl : public FloatWeightTpl<T> { + public: + using FloatWeightTpl<T>::Value; + + typedef MinMaxWeightTpl<T> ReverseWeight; + + MinMaxWeightTpl() : FloatWeightTpl<T>() {} + + MinMaxWeightTpl(T f) : FloatWeightTpl<T>(f) {} + + MinMaxWeightTpl(const MinMaxWeightTpl<T> &w) : FloatWeightTpl<T>(w) {} + + static const MinMaxWeightTpl<T> Zero() { + return MinMaxWeightTpl<T>(FloatLimits<T>::PosInfinity()); + } + + static const MinMaxWeightTpl<T> One() { + return MinMaxWeightTpl<T>(FloatLimits<T>::NegInfinity()); + } + + static const MinMaxWeightTpl<T> NoWeight() { + return MinMaxWeightTpl<T>(FloatLimits<T>::NumberBad()); } + + static const string &Type() { + static const string type = "minmax" + + FloatWeightTpl<T>::GetPrecisionString(); + return type; + } + + bool Member() const { + // Fails for IEEE NaN + return Value() == Value(); + } + + MinMaxWeightTpl<T> Quantize(float delta = kDelta) const { + // If one of infinities, or a NaN + if (Value() == FloatLimits<T>::NegInfinity() || + Value() == FloatLimits<T>::PosInfinity() || + Value() != Value()) + return *this; + else + return MinMaxWeightTpl<T>(floor(Value()/delta + 0.5F) * delta); + } + + MinMaxWeightTpl<T> Reverse() const { return *this; } + + static uint64 Properties() { + return kLeftSemiring | kRightSemiring | kCommutative | kIdempotent | kPath; + } +}; + +// Single-precision min-max weight +typedef MinMaxWeightTpl<float> MinMaxWeight; + +// Min +template <class T> +inline MinMaxWeightTpl<T> Plus( + const MinMaxWeightTpl<T> &w1, const MinMaxWeightTpl<T> &w2) { + if (!w1.Member() || !w2.Member()) + return MinMaxWeightTpl<T>::NoWeight(); + return w1.Value() < w2.Value() ? w1 : w2; +} + +inline MinMaxWeightTpl<float> Plus( + const MinMaxWeightTpl<float> &w1, const MinMaxWeightTpl<float> &w2) { + return Plus<float>(w1, w2); +} + +inline MinMaxWeightTpl<double> Plus( + const MinMaxWeightTpl<double> &w1, const MinMaxWeightTpl<double> &w2) { + return Plus<double>(w1, w2); +} + +// Max +template <class T> +inline MinMaxWeightTpl<T> Times( + const MinMaxWeightTpl<T> &w1, const MinMaxWeightTpl<T> &w2) { + if (!w1.Member() || !w2.Member()) + return MinMaxWeightTpl<T>::NoWeight(); + return w1.Value() >= w2.Value() ? w1 : w2; +} + +inline MinMaxWeightTpl<float> Times( + const MinMaxWeightTpl<float> &w1, const MinMaxWeightTpl<float> &w2) { + return Times<float>(w1, w2); +} + +inline MinMaxWeightTpl<double> Times( + const MinMaxWeightTpl<double> &w1, const MinMaxWeightTpl<double> &w2) { + return Times<double>(w1, w2); +} + +// Defined only for special cases +template <class T> +inline MinMaxWeightTpl<T> Divide(const MinMaxWeightTpl<T> &w1, + const MinMaxWeightTpl<T> &w2, + DivideType typ = DIVIDE_ANY) { + if (!w1.Member() || !w2.Member()) + return MinMaxWeightTpl<T>::NoWeight(); + // min(w1, x) = w2, w1 >= w2 => min(w1, x) = w2, x = w2 + return w1.Value() >= w2.Value() ? w1 : FloatLimits<T>::NumberBad(); +} + +inline MinMaxWeightTpl<float> Divide(const MinMaxWeightTpl<float> &w1, + const MinMaxWeightTpl<float> &w2, + DivideType typ = DIVIDE_ANY) { + return Divide<float>(w1, w2, typ); +} + +inline MinMaxWeightTpl<double> Divide(const MinMaxWeightTpl<double> &w1, + const MinMaxWeightTpl<double> &w2, + DivideType typ = DIVIDE_ANY) { + return Divide<double>(w1, w2, typ); +} + +// +// WEIGHT CONVERTER SPECIALIZATIONS. +// + +// Convert to tropical +template <> +struct WeightConvert<LogWeight, TropicalWeight> { + TropicalWeight operator()(LogWeight w) const { return w.Value(); } +}; + +template <> +struct WeightConvert<Log64Weight, TropicalWeight> { + TropicalWeight operator()(Log64Weight w) const { return w.Value(); } +}; + +// Convert to log +template <> +struct WeightConvert<TropicalWeight, LogWeight> { + LogWeight operator()(TropicalWeight w) const { return w.Value(); } +}; + +template <> +struct WeightConvert<Log64Weight, LogWeight> { + LogWeight operator()(Log64Weight w) const { return w.Value(); } +}; + +// Convert to log64 +template <> +struct WeightConvert<TropicalWeight, Log64Weight> { + Log64Weight operator()(TropicalWeight w) const { return w.Value(); } +}; + +template <> +struct WeightConvert<LogWeight, Log64Weight> { + Log64Weight operator()(LogWeight w) const { return w.Value(); } +}; + +} // namespace fst + +#endif // FST_LIB_FLOAT_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/fst-decl.h b/kaldi_io/src/tools/openfst/include/fst/fst-decl.h new file mode 100644 index 0000000..f27ded8 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/fst-decl.h @@ -0,0 +1,124 @@ +// fst-decl.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// This file contains declarations of classes in the Fst template library. +// + +#ifndef FST_LIB_FST_DECL_H__ +#define FST_LIB_FST_DECL_H__ + +#include <fst/types.h> + +namespace fst { + +class SymbolTable; +class SymbolTableIterator; + +template <class W> class FloatWeightTpl; +template <class W> class TropicalWeightTpl; +template <class W> class LogWeightTpl; +template <class W> class MinMaxWeightTpl; + +typedef FloatWeightTpl<float> FloatWeight; +typedef TropicalWeightTpl<float> TropicalWeight; +typedef LogWeightTpl<float> LogWeight; +typedef MinMaxWeightTpl<float> MinMaxWeight; + +template <class W> class ArcTpl; +typedef ArcTpl<TropicalWeight> StdArc; +typedef ArcTpl<LogWeight> LogArc; + +template <class A, class C, class U = uint32> class CompactFst; +template <class A, class U = uint32> class ConstFst; +template <class A, class W, class M> class EditFst; +template <class A> class ExpandedFst; +template <class A> class Fst; +template <class A> class MutableFst; +template <class A> class VectorFst; + +template <class A, class C> class ArcSortFst; +template <class A> class ClosureFst; +template <class A> class ComposeFst; +template <class A> class ConcatFst; +template <class A> class DeterminizeFst; +template <class A> class DifferenceFst; +template <class A> class IntersectFst; +template <class A> class InvertFst; +template <class A, class B, class C> class ArcMapFst; +template <class A> class ProjectFst; +template <class A, class B, class S> class RandGenFst; +template <class A> class RelabelFst; +template <class A, class T> class ReplaceFst; +template <class A> class RmEpsilonFst; +template <class A> class UnionFst; + +template <class T, class Compare, bool max> class Heap; + +template <class A> class AcceptorCompactor; +template <class A> class StringCompactor; +template <class A> class UnweightedAcceptorCompactor; +template <class A> class UnweightedCompactor; +template <class A> class WeightedStringCompactor; + +template <class A, class P> class DefaultReplaceStateTable; + +typedef CompactFst<StdArc, AcceptorCompactor<StdArc> > +StdCompactAcceptorFst; +typedef CompactFst< StdArc, StringCompactor<StdArc> > +StdCompactStringFst; +typedef CompactFst<StdArc, UnweightedAcceptorCompactor<StdArc> > +StdCompactUnweightedAcceptorFst; +typedef CompactFst<StdArc, UnweightedCompactor<StdArc> > +StdCompactUnweightedFst; +typedef CompactFst< StdArc, WeightedStringCompactor<StdArc> > +StdCompactWeightedStringFst; +typedef ConstFst<StdArc> StdConstFst; +typedef ExpandedFst<StdArc> StdExpandedFst; +typedef Fst<StdArc> StdFst; +typedef MutableFst<StdArc> StdMutableFst; +typedef VectorFst<StdArc> StdVectorFst; + + +template <class C> class StdArcSortFst; +typedef ClosureFst<StdArc> StdClosureFst; +typedef ComposeFst<StdArc> StdComposeFst; +typedef ConcatFst<StdArc> StdConcatFst; +typedef DeterminizeFst<StdArc> StdDeterminizeFst; +typedef DifferenceFst<StdArc> StdDifferenceFst; +typedef IntersectFst<StdArc> StdIntersectFst; +typedef InvertFst<StdArc> StdInvertFst; +typedef ProjectFst<StdArc> StdProjectFst; +typedef RelabelFst<StdArc> StdRelabelFst; +typedef ReplaceFst<StdArc, DefaultReplaceStateTable<StdArc, ssize_t> > +StdReplaceFst; +typedef RmEpsilonFst<StdArc> StdRmEpsilonFst; +typedef UnionFst<StdArc> StdUnionFst; + +template <typename T> class IntegerFilterState; +typedef IntegerFilterState<signed char> CharFilterState; +typedef IntegerFilterState<short> ShortFilterState; +typedef IntegerFilterState<int> IntFilterState; + +template <class F> class Matcher; +template <class M1, class M2 = M1> class SequenceComposeFilter; +template <class M1, class M2 = M1> class AltSequenceComposeFilter; +template <class M1, class M2 = M1> class MatchComposeFilter; + +} // namespace fst + +#endif // FST_LIB_FST_DECL_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/fst.h b/kaldi_io/src/tools/openfst/include/fst/fst.h new file mode 100644 index 0000000..150fc4e --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/fst.h @@ -0,0 +1,949 @@ +// fst.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Finite-State Transducer (FST) - abstract base class definition, +// state and arc iterator interface, and suggested base implementation. +// + +#ifndef FST_LIB_FST_H__ +#define FST_LIB_FST_H__ + +#include <stddef.h> +#include <sys/types.h> +#include <cmath> +#include <string> + +#include <fst/compat.h> +#include <fst/types.h> + +#include <fst/arc.h> +#include <fst/properties.h> +#include <fst/register.h> +#include <iostream> +#include <fstream> +#include <sstream> +#include <fst/symbol-table.h> +#include <fst/util.h> + + +DECLARE_bool(fst_align); + +namespace fst { + +bool IsFstHeader(istream &, const string &); + +class FstHeader; +template <class A> class StateIteratorData; +template <class A> class ArcIteratorData; +template <class A> class MatcherBase; + +struct FstReadOptions { + // FileReadMode(s) are advisory, there are many conditions than prevent a + // file from being mapped, READ mode will be selected in these cases with + // a warning indicating why it was chosen. + enum FileReadMode { READ, MAP }; + + string source; // Where you're reading from + const FstHeader *header; // Pointer to Fst header. If non-zero, use + // this info (don't read a stream header) + const SymbolTable* isymbols; // Pointer to input symbols. If non-zero, use + // this info (read and skip stream isymbols) + const SymbolTable* osymbols; // Pointer to output symbols. If non-zero, use + // this info (read and skip stream osymbols) + FileReadMode mode; // Read or map files (advisory, if possible) + + explicit FstReadOptions(const string& src = "<unspecified>", + const FstHeader *hdr = 0, + const SymbolTable* isym = 0, + const SymbolTable* osym = 0); + + explicit FstReadOptions(const string& src, + const SymbolTable* isym, + const SymbolTable* osym = 0); + + // Helper function to convert strings FileReadModes into their enum value. + static FileReadMode ReadMode(const string &mode); +}; + +struct FstWriteOptions { + string source; // Where you're writing to + bool write_header; // Write the header? + bool write_isymbols; // Write input symbols? + bool write_osymbols; // Write output symbols? + bool align; // Write data aligned where appropriate; + // this may fail on pipes + + explicit FstWriteOptions(const string& src = "<unspecifed>", + bool hdr = true, bool isym = true, + bool osym = true, bool alig = FLAGS_fst_align) + : source(src), write_header(hdr), + write_isymbols(isym), write_osymbols(osym), align(alig) {} +}; + +// +// Fst HEADER CLASS +// +// This is the recommended Fst file header representation. +// +class FstHeader { + public: + enum { + HAS_ISYMBOLS = 0x1, // Has input symbol table + HAS_OSYMBOLS = 0x2, // Has output symbol table + IS_ALIGNED = 0x4, // Memory-aligned (where appropriate) + } Flags; + + FstHeader() : version_(0), flags_(0), properties_(0), start_(-1), + numstates_(0), numarcs_(0) {} + const string &FstType() const { return fsttype_; } + const string &ArcType() const { return arctype_; } + int32 Version() const { return version_; } + int32 GetFlags() const { return flags_; } + uint64 Properties() const { return properties_; } + int64 Start() const { return start_; } + int64 NumStates() const { return numstates_; } + int64 NumArcs() const { return numarcs_; } + + void SetFstType(const string& type) { fsttype_ = type; } + void SetArcType(const string& type) { arctype_ = type; } + void SetVersion(int32 version) { version_ = version; } + void SetFlags(int32 flags) { flags_ = flags; } + void SetProperties(uint64 properties) { properties_ = properties; } + void SetStart(int64 start) { start_ = start; } + void SetNumStates(int64 numstates) { numstates_ = numstates; } + void SetNumArcs(int64 numarcs) { numarcs_ = numarcs; } + + bool Read(istream &strm, const string &source, bool rewind = false); + bool Write(ostream &strm, const string &source) const; + + private: + + string fsttype_; // E.g. "vector" + string arctype_; // E.g. "standard" + int32 version_; // Type version # + int32 flags_; // File format bits + uint64 properties_; // FST property bits + int64 start_; // Start state + int64 numstates_; // # of states + int64 numarcs_; // # of arcs +}; + + +// Specifies matcher action. +enum MatchType { MATCH_INPUT, // Match input label. + MATCH_OUTPUT, // Match output label. + MATCH_BOTH, // Match input or output label. + MATCH_NONE, // Match nothing. + MATCH_UNKNOWN }; // Match type unknown. + +// +// Fst INTERFACE CLASS DEFINITION +// + +// A generic FST, templated on the arc definition, with +// common-demoninator methods (use StateIterator and ArcIterator to +// iterate over its states and arcs). +template <class A> +class Fst { + public: + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + + virtual ~Fst() {} + + virtual StateId Start() const = 0; // Initial state + + virtual Weight Final(StateId) const = 0; // State's final weight + + virtual size_t NumArcs(StateId) const = 0; // State's arc count + + virtual size_t NumInputEpsilons(StateId) + const = 0; // State's input epsilon count + + virtual size_t NumOutputEpsilons(StateId) + const = 0; // State's output epsilon count + + // If test=false, return stored properties bits for mask (some poss. unknown) + // If test=true, return property bits for mask (computing o.w. unknown) + virtual uint64 Properties(uint64 mask, bool test) + const = 0; // Property bits + + virtual const string& Type() const = 0; // Fst type name + + // Get a copy of this Fst. The copying behaves as follows: + // + // (1) The copying is constant time if safe = false or if safe = true + // and is on an otherwise unaccessed Fst. + // + // (2) If safe = true, the copy is thread-safe in that the original + // and copy can be safely accessed (but not necessarily mutated) by + // separate threads. For some Fst types, 'Copy(true)' should only be + // called on an Fst that has not otherwise been accessed. Its behavior + // is undefined otherwise. + // + // (3) If a MutableFst is copied and then mutated, then the original is + // unmodified and vice versa (often by a copy-on-write on the initial + // mutation, which may not be constant time). + virtual Fst<A> *Copy(bool safe = false) const = 0; + + // Read an Fst from an input stream; returns NULL on error + static Fst<A> *Read(istream &strm, const FstReadOptions &opts) { + FstReadOptions ropts(opts); + FstHeader hdr; + if (ropts.header) + hdr = *opts.header; + else { + if (!hdr.Read(strm, opts.source)) + return 0; + ropts.header = &hdr; + } + FstRegister<A> *registr = FstRegister<A>::GetRegister(); + const typename FstRegister<A>::Reader reader = + registr->GetReader(hdr.FstType()); + if (!reader) { + LOG(ERROR) << "Fst::Read: Unknown FST type \"" << hdr.FstType() + << "\" (arc type = \"" << A::Type() + << "\"): " << ropts.source; + return 0; + } + return reader(strm, ropts); + }; + + // Read an Fst from a file; return NULL on error + // Empty filename reads from standard input + static Fst<A> *Read(const string &filename) { + if (!filename.empty()) { + ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); + if (!strm) { + LOG(ERROR) << "Fst::Read: Can't open file: " << filename; + return 0; + } + return Read(strm, FstReadOptions(filename)); + } else { + return Read(cin, FstReadOptions("standard input")); + } + } + + // Write an Fst to an output stream; return false on error + virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { + LOG(ERROR) << "Fst::Write: No write stream method for " << Type() + << " Fst type"; + return false; + } + + // Write an Fst to a file; return false on error + // Empty filename writes to standard output + virtual bool Write(const string &filename) const { + LOG(ERROR) << "Fst::Write: No write filename method for " << Type() + << " Fst type"; + return false; + } + + // Return input label symbol table; return NULL if not specified + virtual const SymbolTable* InputSymbols() const = 0; + + // Return output label symbol table; return NULL if not specified + virtual const SymbolTable* OutputSymbols() const = 0; + + // For generic state iterator construction; not normally called + // directly by users. + virtual void InitStateIterator(StateIteratorData<A> *) const = 0; + + // For generic arc iterator construction; not normally called + // directly by users. + virtual void InitArcIterator(StateId s, ArcIteratorData<A> *) const = 0; + + // For generic matcher construction; not normally called + // directly by users. + virtual MatcherBase<A> *InitMatcher(MatchType match_type) const; + + protected: + bool WriteFile(const string &filename) const { + if (!filename.empty()) { + ofstream strm(filename.c_str(), ofstream::out | ofstream::binary); + if (!strm) { + LOG(ERROR) << "Fst::Write: Can't open file: " << filename; + return false; + } + return Write(strm, FstWriteOptions(filename)); + } else { + return Write(cout, FstWriteOptions("standard output")); + } + } +}; + + +// +// STATE and ARC ITERATOR DEFINITIONS +// + +// State iterator interface templated on the Arc definition; used +// for StateIterator specializations returned by the InitStateIterator +// Fst method. +template <class A> +class StateIteratorBase { + public: + typedef A Arc; + typedef typename A::StateId StateId; + + virtual ~StateIteratorBase() {} + + bool Done() const { return Done_(); } // End of iterator? + StateId Value() const { return Value_(); } // Current state (when !Done) + void Next() { Next_(); } // Advance to next state (when !Done) + void Reset() { Reset_(); } // Return to initial condition + + private: + // This allows base class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual bool Done_() const = 0; + virtual StateId Value_() const = 0; + virtual void Next_() = 0; + virtual void Reset_() = 0; +}; + + +// StateIterator initialization data + +template <class A> struct StateIteratorData { + StateIteratorBase<A> *base; // Specialized iterator if non-zero + typename A::StateId nstates; // O.w. total # of states +}; + + +// Generic state iterator, templated on the FST definition +// - a wrapper around pointer to specific one. +// Here is a typical use: \code +// for (StateIterator<StdFst> siter(fst); +// !siter.Done(); +// siter.Next()) { +// StateId s = siter.Value(); +// ... +// } \endcode +template <class F> +class StateIterator { + public: + typedef F FST; + typedef typename F::Arc Arc; + typedef typename Arc::StateId StateId; + + explicit StateIterator(const F &fst) : s_(0) { + fst.InitStateIterator(&data_); + } + + ~StateIterator() { if (data_.base) delete data_.base; } + + bool Done() const { + return data_.base ? data_.base->Done() : s_ >= data_.nstates; + } + + StateId Value() const { return data_.base ? data_.base->Value() : s_; } + + void Next() { + if (data_.base) + data_.base->Next(); + else + ++s_; + } + + void Reset() { + if (data_.base) + data_.base->Reset(); + else + s_ = 0; + } + + private: + StateIteratorData<Arc> data_; + StateId s_; + + DISALLOW_COPY_AND_ASSIGN(StateIterator); +}; + + +// Flags to control the behavior on an arc iterator: +static const uint32 kArcILabelValue = 0x0001; // Value() gives valid ilabel +static const uint32 kArcOLabelValue = 0x0002; // " " " olabel +static const uint32 kArcWeightValue = 0x0004; // " " " weight +static const uint32 kArcNextStateValue = 0x0008; // " " " nextstate +static const uint32 kArcNoCache = 0x0010; // No need to cache arcs + +static const uint32 kArcValueFlags = + kArcILabelValue | kArcOLabelValue | + kArcWeightValue | kArcNextStateValue; + +static const uint32 kArcFlags = kArcValueFlags | kArcNoCache; + + +// Arc iterator interface, templated on the Arc definition; used +// for Arc iterator specializations that are returned by the InitArcIterator +// Fst method. +template <class A> +class ArcIteratorBase { + public: + typedef A Arc; + typedef typename A::StateId StateId; + + virtual ~ArcIteratorBase() {} + + bool Done() const { return Done_(); } // End of iterator? + const A& Value() const { return Value_(); } // Current arc (when !Done) + void Next() { Next_(); } // Advance to next arc (when !Done) + size_t Position() const { return Position_(); } // Return current position + void Reset() { Reset_(); } // Return to initial condition + void Seek(size_t a) { Seek_(a); } // Random arc access by position + uint32 Flags() const { return Flags_(); } // Return current behavorial flags + void SetFlags(uint32 flags, uint32 mask) { // Set behavorial flags + SetFlags_(flags, mask); + } + + private: + // This allows base class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual bool Done_() const = 0; + virtual const A& Value_() const = 0; + virtual void Next_() = 0; + virtual size_t Position_() const = 0; + virtual void Reset_() = 0; + virtual void Seek_(size_t a) = 0; + virtual uint32 Flags_() const = 0; + virtual void SetFlags_(uint32 flags, uint32 mask) = 0; +}; + + +// ArcIterator initialization data +template <class A> struct ArcIteratorData { + ArcIteratorBase<A> *base; // Specialized iterator if non-zero + const A *arcs; // O.w. arcs pointer + size_t narcs; // ... and arc count + int *ref_count; // ... and reference count if non-zero +}; + + +// Generic arc iterator, templated on the FST definition +// - a wrapper around pointer to specific one. +// Here is a typical use: \code +// for (ArcIterator<StdFst> aiter(fst, s)); +// !aiter.Done(); +// aiter.Next()) { +// StdArc &arc = aiter.Value(); +// ... +// } \endcode +template <class F> +class ArcIterator { + public: + typedef F FST; + typedef typename F::Arc Arc; + typedef typename Arc::StateId StateId; + + ArcIterator(const F &fst, StateId s) : i_(0) { + fst.InitArcIterator(s, &data_); + } + + explicit ArcIterator(const ArcIteratorData<Arc> &data) : data_(data), i_(0) { + if (data_.ref_count) + ++(*data_.ref_count); + } + + ~ArcIterator() { + if (data_.base) + delete data_.base; + else if (data_.ref_count) + --(*data_.ref_count); + } + + bool Done() const { + return data_.base ? data_.base->Done() : i_ >= data_.narcs; + } + + const Arc& Value() const { + return data_.base ? data_.base->Value() : data_.arcs[i_]; + } + + void Next() { + if (data_.base) + data_.base->Next(); + else + ++i_; + } + + void Reset() { + if (data_.base) + data_.base->Reset(); + else + i_ = 0; + } + + void Seek(size_t a) { + if (data_.base) + data_.base->Seek(a); + else + i_ = a; + } + + size_t Position() const { + return data_.base ? data_.base->Position() : i_; + } + + uint32 Flags() const { + if (data_.base) + return data_.base->Flags(); + else + return kArcValueFlags; + } + + void SetFlags(uint32 flags, uint32 mask) { + if (data_.base) + data_.base->SetFlags(flags, mask); + } + + private: + ArcIteratorData<Arc> data_; + size_t i_; + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + +// +// MATCHER DEFINITIONS +// + +template <class A> +MatcherBase<A> *Fst<A>::InitMatcher(MatchType match_type) const { + return 0; // Use the default matcher +} + + +// +// FST ACCESSORS - Useful functions in high-performance cases. +// + +namespace internal { + +// General case - requires non-abstract, 'final' methods. Use for inlining. +template <class F> inline +typename F::Arc::Weight Final(const F &fst, typename F::Arc::StateId s) { + return fst.F::Final(s); +} + +template <class F> inline +ssize_t NumArcs(const F &fst, typename F::Arc::StateId s) { + return fst.F::NumArcs(s); +} + +template <class F> inline +ssize_t NumInputEpsilons(const F &fst, typename F::Arc::StateId s) { + return fst.F::NumInputEpsilons(s); +} + +template <class F> inline +ssize_t NumOutputEpsilons(const F &fst, typename F::Arc::StateId s) { + return fst.F::NumOutputEpsilons(s); +} + + +// Fst<A> case - abstract methods. +template <class A> inline +typename A::Weight Final(const Fst<A> &fst, typename A::StateId s) { + return fst.Final(s); +} + +template <class A> inline +ssize_t NumArcs(const Fst<A> &fst, typename A::StateId s) { + return fst.NumArcs(s); +} + +template <class A> inline +ssize_t NumInputEpsilons(const Fst<A> &fst, typename A::StateId s) { + return fst.NumInputEpsilons(s); +} + +template <class A> inline +ssize_t NumOutputEpsilons(const Fst<A> &fst, typename A::StateId s) { + return fst.NumOutputEpsilons(s); +} + +} // namespace internal + +// A useful alias when using StdArc. +typedef Fst<StdArc> StdFst; + + +// +// CONSTANT DEFINITIONS +// + +const int kNoStateId = -1; // Not a valid state ID +const int kNoLabel = -1; // Not a valid label + +// +// Fst IMPLEMENTATION BASE +// +// This is the recommended Fst implementation base class. It will +// handle reference counts, property bits, type information and symbols. +// + +template <class A> class FstImpl { + public: + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + + FstImpl() + : properties_(0), type_("null"), isymbols_(0), osymbols_(0) {} + + FstImpl(const FstImpl<A> &impl) + : properties_(impl.properties_), type_(impl.type_), + isymbols_(impl.isymbols_ ? impl.isymbols_->Copy() : 0), + osymbols_(impl.osymbols_ ? impl.osymbols_->Copy() : 0) {} + + virtual ~FstImpl() { + delete isymbols_; + delete osymbols_; + } + + const string& Type() const { return type_; } + + void SetType(const string &type) { type_ = type; } + + virtual uint64 Properties() const { return properties_; } + + virtual uint64 Properties(uint64 mask) const { return properties_ & mask; } + + void SetProperties(uint64 props) { + properties_ &= kError; // kError can't be cleared + properties_ |= props; + } + + void SetProperties(uint64 props, uint64 mask) { + properties_ &= ~mask | kError; // kError can't be cleared + properties_ |= props & mask; + } + + // Allows (only) setting error bit on const FST impls + void SetProperties(uint64 props, uint64 mask) const { + if (mask != kError) + FSTERROR() << "FstImpl::SetProperties() const: can only set kError"; + properties_ |= kError; + } + + const SymbolTable* InputSymbols() const { return isymbols_; } + + const SymbolTable* OutputSymbols() const { return osymbols_; } + + SymbolTable* InputSymbols() { return isymbols_; } + + SymbolTable* OutputSymbols() { return osymbols_; } + + void SetInputSymbols(const SymbolTable* isyms) { + if (isymbols_) delete isymbols_; + isymbols_ = isyms ? isyms->Copy() : 0; + } + + void SetOutputSymbols(const SymbolTable* osyms) { + if (osymbols_) delete osymbols_; + osymbols_ = osyms ? osyms->Copy() : 0; + } + + int RefCount() const { + return ref_count_.count(); + } + + int IncrRefCount() { + return ref_count_.Incr(); + } + + int DecrRefCount() { + return ref_count_.Decr(); + } + + // Read-in header and symbols from input stream, initialize Fst, and + // return the header. If opts.header is non-null, skip read-in and + // use the option value. If opts.[io]symbols is non-null, read-in + // (if present), but use the option value. + bool ReadHeader(istream &strm, const FstReadOptions& opts, + int min_version, FstHeader *hdr); + + // Write-out header and symbols from output stream. + // If a opts.header is false, skip writing header. + // If opts.[io]symbols is false, skip writing those symbols. + // This method is needed for Impl's that implement Write methods. + void WriteHeader(ostream &strm, const FstWriteOptions& opts, + int version, FstHeader *hdr) const { + if (opts.write_header) { + hdr->SetFstType(type_); + hdr->SetArcType(A::Type()); + hdr->SetVersion(version); + hdr->SetProperties(properties_); + int32 file_flags = 0; + if (isymbols_ && opts.write_isymbols) + file_flags |= FstHeader::HAS_ISYMBOLS; + if (osymbols_ && opts.write_osymbols) + file_flags |= FstHeader::HAS_OSYMBOLS; + if (opts.align) + file_flags |= FstHeader::IS_ALIGNED; + hdr->SetFlags(file_flags); + hdr->Write(strm, opts.source); + } + if (isymbols_ && opts.write_isymbols) isymbols_->Write(strm); + if (osymbols_ && opts.write_osymbols) osymbols_->Write(strm); + } + + // Write-out header and symbols to output stream. + // If a opts.header is false, skip writing header. + // If opts.[io]symbols is false, skip writing those symbols. + // type is the Fst type being written. + // This method is used in the cross-type serialization methods Fst::WriteFst. + static void WriteFstHeader(const Fst<A> &fst, ostream &strm, + const FstWriteOptions& opts, int version, + const string &type, uint64 properties, + FstHeader *hdr) { + if (opts.write_header) { + hdr->SetFstType(type); + hdr->SetArcType(A::Type()); + hdr->SetVersion(version); + hdr->SetProperties(properties); + int32 file_flags = 0; + if (fst.InputSymbols() && opts.write_isymbols) + file_flags |= FstHeader::HAS_ISYMBOLS; + if (fst.OutputSymbols() && opts.write_osymbols) + file_flags |= FstHeader::HAS_OSYMBOLS; + if (opts.align) + file_flags |= FstHeader::IS_ALIGNED; + hdr->SetFlags(file_flags); + hdr->Write(strm, opts.source); + } + if (fst.InputSymbols() && opts.write_isymbols) { + fst.InputSymbols()->Write(strm); + } + if (fst.OutputSymbols() && opts.write_osymbols) { + fst.OutputSymbols()->Write(strm); + } + } + + // In serialization routines where the header cannot be written until after + // the machine has been serialized, this routine can be called to seek to + // the beginning of the file an rewrite the header with updated fields. + // It repositions the file pointer back at the end of the file. + // returns true on success, false on failure. + static bool UpdateFstHeader(const Fst<A> &fst, ostream &strm, + const FstWriteOptions& opts, int version, + const string &type, uint64 properties, + FstHeader *hdr, size_t header_offset) { + strm.seekp(header_offset); + if (!strm) { + LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source; + return false; + } + WriteFstHeader(fst, strm, opts, version, type, properties, hdr); + if (!strm) { + LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source; + return false; + } + strm.seekp(0, ios_base::end); + if (!strm) { + LOG(ERROR) << "Fst::UpdateFstHeader: write failed: " << opts.source; + return false; + } + return true; + } + + protected: + mutable uint64 properties_; // Property bits + + private: + string type_; // Unique name of Fst class + SymbolTable *isymbols_; // Ilabel symbol table + SymbolTable *osymbols_; // Olabel symbol table + RefCounter ref_count_; // Reference count + + void operator=(const FstImpl<A> &impl); // disallow +}; + +template <class A> inline +bool FstImpl<A>::ReadHeader(istream &strm, const FstReadOptions& opts, + int min_version, FstHeader *hdr) { + if (opts.header) + *hdr = *opts.header; + else if (!hdr->Read(strm, opts.source)) + return false; + + if (FLAGS_v >= 2) { + LOG(INFO) << "FstImpl::ReadHeader: source: " << opts.source + << ", fst_type: " << hdr->FstType() + << ", arc_type: " << A::Type() + << ", version: " << hdr->Version() + << ", flags: " << hdr->GetFlags(); + } + + if (hdr->FstType() != type_) { + LOG(ERROR) << "FstImpl::ReadHeader: Fst not of type \"" << type_ + << "\": " << opts.source; + return false; + } + if (hdr->ArcType() != A::Type()) { + LOG(ERROR) << "FstImpl::ReadHeader: Arc not of type \"" << A::Type() + << "\": " << opts.source; + return false; + } + if (hdr->Version() < min_version) { + LOG(ERROR) << "FstImpl::ReadHeader: Obsolete " << type_ + << " Fst version: " << opts.source; + return false; + } + properties_ = hdr->Properties(); + if (hdr->GetFlags() & FstHeader::HAS_ISYMBOLS) + isymbols_ = SymbolTable::Read(strm, opts.source); + if (hdr->GetFlags() & FstHeader::HAS_OSYMBOLS) + osymbols_ =SymbolTable::Read(strm, opts.source); + + if (opts.isymbols) { + delete isymbols_; + isymbols_ = opts.isymbols->Copy(); + } + if (opts.osymbols) { + delete osymbols_; + osymbols_ = opts.osymbols->Copy(); + } + return true; +} + + +template<class Arc> +uint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known); + + +// This is a helper class template useful for attaching an Fst interface to +// its implementation, handling reference counting. +template < class I, class F = Fst<typename I::Arc> > +class ImplToFst : public F { + public: + typedef typename I::Arc Arc; + typedef typename Arc::Weight Weight; + typedef typename Arc::StateId StateId; + + virtual ~ImplToFst() { if (!impl_->DecrRefCount()) delete impl_; } + + virtual StateId Start() const { return impl_->Start(); } + + virtual Weight Final(StateId s) const { return impl_->Final(s); } + + virtual size_t NumArcs(StateId s) const { return impl_->NumArcs(s); } + + virtual size_t NumInputEpsilons(StateId s) const { + return impl_->NumInputEpsilons(s); + } + + virtual size_t NumOutputEpsilons(StateId s) const { + return impl_->NumOutputEpsilons(s); + } + + virtual uint64 Properties(uint64 mask, bool test) const { + if (test) { + uint64 knownprops, testprops = TestProperties(*this, mask, &knownprops); + impl_->SetProperties(testprops, knownprops); + return testprops & mask; + } else { + return impl_->Properties(mask); + } + } + + virtual const string& Type() const { return impl_->Type(); } + + virtual const SymbolTable* InputSymbols() const { + return impl_->InputSymbols(); + } + + virtual const SymbolTable* OutputSymbols() const { + return impl_->OutputSymbols(); + } + + protected: + ImplToFst() : impl_(0) {} + + ImplToFst(I *impl) : impl_(impl) {} + + ImplToFst(const ImplToFst<I, F> &fst) { + impl_ = fst.impl_; + impl_->IncrRefCount(); + } + + // This constructor presumes there is a copy constructor for the + // implementation. + ImplToFst(const ImplToFst<I, F> &fst, bool safe) { + if (safe) { + impl_ = new I(*(fst.impl_)); + } else { + impl_ = fst.impl_; + impl_->IncrRefCount(); + } + } + + I *GetImpl() const { return impl_; } + + // Change Fst implementation pointer. If 'own_impl' is true, + // ownership of the input implementation is given to this + // object; otherwise, the input implementation's reference count + // should be incremented. + void SetImpl(I *impl, bool own_impl = true) { + if (!own_impl) + impl->IncrRefCount(); + if (impl_ && !impl_->DecrRefCount()) delete impl_; + impl_ = impl; + } + + private: + // Disallow + ImplToFst<I, F> &operator=(const ImplToFst<I, F> &fst); + + ImplToFst<I, F> &operator=(const Fst<Arc> &fst) { + FSTERROR() << "ImplToFst: Assignment operator disallowed"; + GetImpl()->SetProperties(kError, kError); + return *this; + } + + I *impl_; +}; + + +// Converts FSTs by casting their implementations, where this makes +// sense (which excludes implementations with weight-dependent virtual +// methods). Must be a friend of the Fst classes involved (currently +// the concrete Fsts: VectorFst, ConstFst, CompactFst). +template<class F, class G> void Cast(const F &ifst, G *ofst) { + ofst->SetImpl(reinterpret_cast<typename G::Impl *>(ifst.GetImpl()), false); +} + +// Fst Serialization +template <class A> +void FstToString(const Fst<A> &fst, string *result) { + ostringstream ostrm; + fst.Write(ostrm, FstWriteOptions("FstToString")); + *result = ostrm.str(); +} + +template <class A> +Fst<A> *StringToFst(const string &s) { + istringstream istrm(s); + return Fst<A>::Read(istrm, FstReadOptions("StringToFst")); +} + +} // namespace fst + +#endif // FST_LIB_FST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/fstlib.h b/kaldi_io/src/tools/openfst/include/fst/fstlib.h new file mode 100644 index 0000000..de5976d --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/fstlib.h @@ -0,0 +1,153 @@ +// fstlib.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \page FstLib FST - Weighted Finite State Transducers +// This is a library for constructing, combining, optimizing, and +// searching "weighted finite-state transducers" (FSTs). Weighted +// finite-state transducers are automata where each transition has an +// input label, an output label, and a weight. The more familiar +// finite-state acceptor is represented as a transducer with each +// transition's input and output the same. Finite-state acceptors +// are used to represent sets of strings (specifically, "regular" or +// "rational sets"); finite-state transducers are used to represent +// binary relations between pairs of strings (specifically, "rational +// transductions"). The weights can be used to represent the cost of +// taking a particular transition. +// +// In this library, the transducers are templated on the Arc +// (transition) definition, which allows changing the label, weight, +// and state ID sets. Labels and state IDs are restricted to signed +// integral types but the weight can be an arbitrary type whose +// members satisfy certain algebraic ("semiring") properties. +// +// For more information, see the FST Library Wiki page: +// http://wiki.corp.google.com/twiki/bin/view/Main/FstLibrary + +// \file +// This convenience file includes all other FST inl.h files. +// + +#ifndef FST_LIB_FSTLIB_H__ +#define FST_LIB_FSTLIB_H__ + + +// Abstract FST classes +#include <fst/fst.h> +#include <fst/expanded-fst.h> +#include <fst/mutable-fst.h> + +// Concrete FST classes +#include <fst/compact-fst.h> +#include <fst/const-fst.h> +#include <fst/edit-fst.h> +#include <fst/vector-fst.h> + +// FST algorithms and delayed FST classes +#include <fst/arcsort.h> +#include <fst/arc-map.h> +#include <fst/closure.h> +#include <fst/compose.h> +#include <fst/concat.h> +#include <fst/connect.h> +#include <fst/determinize.h> +#include <fst/difference.h> +#include <fst/encode.h> +#include <fst/epsnormalize.h> +#include <fst/equal.h> +#include <fst/equivalent.h> +#include <fst/factor-weight.h> +#include <fst/intersect.h> +#include <fst/invert.h> +#include <fst/map.h> +#include <fst/minimize.h> +#include <fst/project.h> +#include <fst/prune.h> +#include <fst/push.h> +#include <fst/randequivalent.h> +#include <fst/randgen.h> +#include <fst/rational.h> +#include <fst/relabel.h> +#include <fst/replace.h> +#include <fst/replace-util.h> +#include <fst/reverse.h> +#include <fst/reweight.h> +#include <fst/rmepsilon.h> +#include <fst/rmfinalepsilon.h> +#include <fst/shortest-distance.h> +#include <fst/shortest-path.h> +#include <fst/statesort.h> +#include <fst/state-map.h> +#include <fst/synchronize.h> +#include <fst/topsort.h> +#include <fst/union.h> +#include <fst/verify.h> +#include <fst/visit.h> + +// Weights +#include <fst/weight.h> +#include <fst/expectation-weight.h> +#include <fst/float-weight.h> +#include <fst/lexicographic-weight.h> +#include <fst/pair-weight.h> +#include <fst/power-weight.h> +#include <fst/product-weight.h> +#include <fst/random-weight.h> +#include <fst/signed-log-weight.h> +#include <fst/sparse-power-weight.h> +#include <fst/sparse-tuple-weight.h> +#include <fst/string-weight.h> +#include <fst/tuple-weight.h> + +// Auxiliary classes for composition +#include <fst/compose-filter.h> +#include <fst/lookahead-filter.h> +#include <fst/lookahead-matcher.h> +#include <fst/matcher-fst.h> +#include <fst/matcher.h> +#include <fst/state-table.h> + +// Data structures +#include <fst/heap.h> +#include <fst/interval-set.h> +#include <fst/queue.h> +#include <fst/union-find.h> + +// Miscellaneous +#include <fst/accumulator.h> +#include <fst/add-on.h> +#include <fst/arc.h> +#include <fst/arcfilter.h> +#include <fst/cache.h> +#include <fst/complement.h> +#include <fst/dfs-visit.h> +#include <fst/generic-register.h> +#include <fst/label-reachable.h> +#include <fst/partition.h> +#include <fst/properties.h> +#include <fst/register.h> +#include <fst/state-reachable.h> +#include <iostream> +#include <fstream> +#include <sstream> +#include <fst/string.h> +#include <fst/symbol-table.h> +#include <fst/symbol-table-ops.h> +#include <fst/test-properties.h> +#include <fst/util.h> + + +#endif // FST_LIB_FSTLIB_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/generic-register.h b/kaldi_io/src/tools/openfst/include/fst/generic-register.h new file mode 100644 index 0000000..4f8b512 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/generic-register.h @@ -0,0 +1,159 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_LIB_GENERIC_REGISTER_H_ +#define FST_LIB_GENERIC_REGISTER_H_ + +#include <map> +#include <string> + +#include <fst/compat.h> +#include <fst/types.h> + +// Generic class representing a globally-stored correspondence between +// objects of KeyType and EntryType. +// KeyType must: +// a) be such as can be stored as a key in a map<> +// b) be concatenable with a const char* with the + operator +// (or you must subclass and redefine LoadEntryFromSharedObject) +// EntryType must be default constructible. +// +// The third template parameter should be the type of a subclass of this class +// (think CRTP). This is to allow GetRegister() to instantiate and return +// an object of the appropriate type. + +namespace fst { + +template<class KeyType, class EntryType, class RegisterType> +class GenericRegister { + public: + typedef KeyType Key; + typedef EntryType Entry; + + static RegisterType *GetRegister() { + FstOnceInit(®ister_init_, + &RegisterType::Init); + + return register_; + } + + void SetEntry(const KeyType &key, + const EntryType &entry) { + MutexLock l(register_lock_); + + register_table_.insert(make_pair(key, entry)); + } + + EntryType GetEntry(const KeyType &key) const { + const EntryType *entry = LookupEntry(key); + if (entry) { + return *entry; + } else { + return LoadEntryFromSharedObject(key); + } + } + + virtual ~GenericRegister() { } + + protected: + // Override this if you want to be able to load missing definitions from + // shared object files. + virtual EntryType LoadEntryFromSharedObject(const KeyType &key) const { + string so_filename = ConvertKeyToSoFilename(key); + + void *handle = dlopen(so_filename.c_str(), RTLD_LAZY); + if (handle == 0) { + LOG(ERROR) << "GenericRegister::GetEntry : " << dlerror(); + return EntryType(); + } + + // We assume that the DSO constructs a static object in its global + // scope that does the registration. Thus we need only load it, not + // call any methods. + const EntryType *entry = this->LookupEntry(key); + if (entry == 0) { + LOG(ERROR) << "GenericRegister::GetEntry : " + << "lookup failed in shared object: " << so_filename; + return EntryType(); + } + return *entry; + } + + // Override this to define how to turn a key into an SO filename. + virtual string ConvertKeyToSoFilename(const KeyType& key) const = 0; + + virtual const EntryType *LookupEntry( + const KeyType &key) const { + MutexLock l(register_lock_); + + typename RegisterMapType::const_iterator it = register_table_.find(key); + + if (it != register_table_.end()) { + return &it->second; + } else { + return 0; + } + } + + private: + typedef map<KeyType, EntryType> RegisterMapType; + + static void Init() { + register_lock_ = new Mutex; + register_ = new RegisterType; + } + + static FstOnceType register_init_; + static Mutex *register_lock_; + static RegisterType *register_; + + RegisterMapType register_table_; +}; + +template<class KeyType, class EntryType, class RegisterType> +FstOnceType GenericRegister<KeyType, EntryType, + RegisterType>::register_init_ = FST_ONCE_INIT; + +template<class KeyType, class EntryType, class RegisterType> +Mutex *GenericRegister<KeyType, EntryType, RegisterType>::register_lock_ = 0; + +template<class KeyType, class EntryType, class RegisterType> +RegisterType *GenericRegister<KeyType, EntryType, RegisterType>::register_ = 0; + +// +// GENERIC REGISTRATION +// + +// Generic register-er class capable of creating new register entries in the +// given RegisterType template parameter. This type must define types Key +// and Entry, and have appropriate static GetRegister() and instance +// SetEntry() functions. An easy way to accomplish this is to have RegisterType +// be the type of a subclass of GenericRegister. +template<class RegisterType> +class GenericRegisterer { + public: + typedef typename RegisterType::Key Key; + typedef typename RegisterType::Entry Entry; + + GenericRegisterer(Key key, Entry entry) { + RegisterType *reg = RegisterType::GetRegister(); + reg->SetEntry(key, entry); + } +}; + +} // namespace fst + +#endif // FST_LIB_GENERIC_REGISTER_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/heap.h b/kaldi_io/src/tools/openfst/include/fst/heap.h new file mode 100644 index 0000000..a7affbd --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/heap.h @@ -0,0 +1,206 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// All Rights Reserved. +// Author: Johan Schalkwyk ([email protected]) +// +// \file +// Implementation of a heap as in STL, but allows tracking positions +// in heap using a key. The key can be used to do an in-place update of +// values in the heap. + +#ifndef FST_LIB_HEAP_H__ +#define FST_LIB_HEAP_H__ + +#include <vector> +using std::vector; +#include <functional> + +#include <fst/compat.h> +namespace fst { + +// +// \class Heap +// \brief A templated heap implementation that support in-place update +// of values. +// +// The templated heap implementation is a little different from the +// STL priority_queue and the *_heap operations in STL. This heap +// supports indexing of values in the heap via an associated key. +// +// Each value is internally associated with a key which is returned +// to the calling functions on heap insert. This key can be used +// to later update the specific value in the heap. +// +// \param T the element type of the hash, can be POD, Data or Ptr to Data +// \param Compare Comparison class for determiningg min-heapness. +// \param whether heap top should be max or min element w.r.t. Compare +// + +static const int kNoKey = -1; +template <class T, class Compare, bool max> +class Heap { + public: + + // Initialize with a specific comparator + Heap(Compare comp) : comp_(comp), size_(0) { } + + // Create a heap with initial size of internal arrays of 0 + Heap() : size_(0) { } + + ~Heap() { } + + // Insert a value into the heap + int Insert(const T& val) { + if (size_ < A_.size()) { + A_[size_] = val; + pos_[key_[size_]] = size_; + } else { + A_.push_back(val); + pos_.push_back(size_); + key_.push_back(size_); + } + + ++size_; + return Insert(val, size_ - 1); + } + + // Update a value at position given by the key. The pos array is first + // indexed by the key. The position gives the position in the heap array. + // Once we have the position we can then use the standard heap operations + // to calculate the parent and child positions. + void Update(int key, const T& val) { + int i = pos_[key]; + if (Better(val, A_[Parent(i)])) { + Insert(val, i); + } else { + A_[i] = val; + Heapify(i); + } + } + + // Return the greatest (max=true) / least (max=false) value w.r.t. + // from the heap. + T Pop() { + T top = A_[0]; + + Swap(0, size_-1); + size_--; + Heapify(0); + return top; + } + + // Return the greatest (max=true) / least (max=false) value w.r.t. + // comp object from the heap. + T Top() const { + return A_[0]; + } + + // Check if the heap is empty + bool Empty() const { + return size_ == 0; + } + + void Clear() { + size_ = 0; + } + + + // + // The following protected routines are used in a supportive role + // for managing the heap and keeping the heap properties. + // + private: + // Compute left child of parent + int Left(int i) { + return 2*(i+1)-1; // 0 -> 1, 1 -> 3 + } + + // Compute right child of parent + int Right(int i) { + return 2*(i+1); // 0 -> 2, 1 -> 4 + } + + // Given a child compute parent + int Parent(int i) { + return (i-1)/2; // 1 -> 0, 2 -> 0, 3 -> 1, 4-> 1 + } + + // Swap a child, parent. Use to move element up/down tree. + // Note a little tricky here. When we swap we need to swap: + // the value + // the associated keys + // the position of the value in the heap + void Swap(int j, int k) { + int tkey = key_[j]; + pos_[key_[j] = key_[k]] = j; + pos_[key_[k] = tkey] = k; + + T val = A_[j]; + A_[j] = A_[k]; + A_[k] = val; + } + + // Returns the greater (max=true) / least (max=false) of two + // elements. + bool Better(const T& x, const T& y) { + return max ? comp_(y, x) : comp_(x, y); + } + + // Heapify subtree rooted at index i. + void Heapify(int i) { + int l = Left(i); + int r = Right(i); + int largest; + + if (l < size_ && Better(A_[l], A_[i]) ) + largest = l; + else + largest = i; + + if (r < size_ && Better(A_[r], A_[largest]) ) + largest = r; + + if (largest != i) { + Swap(i, largest); + Heapify(largest); + } + } + + + // Insert (update) element at subtree rooted at index i + int Insert(const T& val, int i) { + int p; + while (i > 0 && !Better(A_[p = Parent(i)], val)) { + Swap(i, p); + i = p; + } + + return key_[i]; + } + + private: + Compare comp_; + + vector<int> pos_; + vector<int> key_; + vector<T> A_; + int size_; + + // DISALLOW_COPY_AND_ASSIGN(Heap); +}; + +} // namespace fst + +#endif // FST_LIB_HEAP_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/icu.h b/kaldi_io/src/tools/openfst/include/fst/icu.h new file mode 100644 index 0000000..3947716 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/icu.h @@ -0,0 +1,116 @@ +// icu.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jeffrey Sorensen) +// [email protected] (Fredrik Roubert) +// +// This library implements an unrestricted Thompson/Pike UTF-8 parser and +// serializer. UTF-8 is a restricted subset of this byte stream encoding. See +// http://en.wikipedia.org/wiki/UTF-8 for a good description of the encoding +// details. + +#ifndef FST_LIB_ICU_H_ +#define FST_LIB_ICU_H_ + +#include <iostream> +#include <fstream> +#include <sstream> + +namespace fst { + +template <class Label> +bool UTF8StringToLabels(const string &str, vector<Label> *labels) { + const char *data = str.data(); + size_t length = str.size(); + for (int i = 0; i < length; /* no update */) { + int c = data[i++] & 0xff; + if ((c & 0x80) == 0) { + labels->push_back(c); + } else { + if ((c & 0xc0) == 0x80) { + LOG(ERROR) << "UTF8StringToLabels: continuation byte as lead byte"; + return false; + } + int count = (c >= 0xc0) + (c >= 0xe0) + (c >= 0xf0) + (c >= 0xf8) + + (c >= 0xfc); + int code = c & ((1 << (6 - count)) - 1); + while (count != 0) { + if (i == length) { + LOG(ERROR) << "UTF8StringToLabels: truncated utf-8 byte sequence"; + return false; + } + char cb = data[i++]; + if ((cb & 0xc0) != 0x80) { + LOG(ERROR) << "UTF8StringToLabels: missing/invalid continuation byte"; + return false; + } + code = (code << 6) | (cb & 0x3f); + count--; + } + if (code < 0) { + // This should not be able to happen. + LOG(ERROR) << "UTF8StringToLabels: Invalid character found: " << c; + return false; + } + labels->push_back(code); + } + } + return true; +} + +template <class Label> +bool LabelsToUTF8String(const vector<Label> &labels, string *str) { + ostringstream ostr; + for (size_t i = 0; i < labels.size(); ++i) { + int32_t code = labels[i]; + if (code < 0) { + LOG(ERROR) << "LabelsToUTF8String: Invalid character found: " << code; + return false; + } else if (code < 0x80) { + ostr << static_cast<char>(code); + } else if (code < 0x800) { + ostr << static_cast<char>((code >> 6) | 0xc0); + ostr << static_cast<char>((code & 0x3f) | 0x80); + } else if (code < 0x10000) { + ostr << static_cast<char>((code >> 12) | 0xe0); + ostr << static_cast<char>(((code >> 6) & 0x3f) | 0x80); + ostr << static_cast<char>((code & 0x3f) | 0x80); + } else if (code < 0x200000) { + ostr << static_cast<char>((code >> 18) | 0xf0); + ostr << static_cast<char>(((code >> 12) & 0x3f) | 0x80); + ostr << static_cast<char>(((code >> 6) & 0x3f) | 0x80); + ostr << static_cast<char>((code & 0x3f) | 0x80); + } else if (code < 0x4000000) { + ostr << static_cast<char>((code >> 24) | 0xf8); + ostr << static_cast<char>(((code >> 18) & 0x3f) | 0x80); + ostr << static_cast<char>(((code >> 12) & 0x3f) | 0x80); + ostr << static_cast<char>(((code >> 6) & 0x3f) | 0x80); + ostr << static_cast<char>((code & 0x3f) | 0x80); + } else { + ostr << static_cast<char>((code >> 30) | 0xfc); + ostr << static_cast<char>(((code >> 24) & 0x3f) | 0x80); + ostr << static_cast<char>(((code >> 18) & 0x3f) | 0x80); + ostr << static_cast<char>(((code >> 12) & 0x3f) | 0x80); + ostr << static_cast<char>(((code >> 6) & 0x3f) | 0x80); + ostr << static_cast<char>((code & 0x3f) | 0x80); + } + } + *str = ostr.str(); + return true; +} + +} // namespace fst + +#endif // FST_LIB_ICU_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/intersect.h b/kaldi_io/src/tools/openfst/include/fst/intersect.h new file mode 100644 index 0000000..f46116f --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/intersect.h @@ -0,0 +1,172 @@ +// intersect.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Class to compute the intersection of two FSAs + +#ifndef FST_LIB_INTERSECT_H__ +#define FST_LIB_INTERSECT_H__ + +#include <algorithm> +#include <vector> +using std::vector; + +#include <fst/cache.h> +#include <fst/compose.h> + + +namespace fst { + +template <class A, + class M = Matcher<Fst<A> >, + class F = SequenceComposeFilter<M>, + class T = GenericComposeStateTable<A, typename F::FilterState> > +struct IntersectFstOptions : public ComposeFstOptions<A, M, F, T> { + explicit IntersectFstOptions(const CacheOptions &opts, + M *mat1 = 0, M *mat2 = 0, + F *filt = 0, T *sttable= 0) + : ComposeFstOptions<A, M, F, T>(opts, mat1, mat2, filt, sttable) { } + + IntersectFstOptions() {} +}; + +// Computes the intersection (Hadamard product) of two FSAs. This +// version is a delayed Fst. Only strings that are in both automata +// are retained in the result. +// +// The two arguments must be acceptors. One of the arguments must be +// label-sorted. +// +// Complexity: same as ComposeFst. +// +// Caveats: same as ComposeFst. +template <class A> +class IntersectFst : public ComposeFst<A> { + public: + using ComposeFst<A>::CreateBase; + using ComposeFst<A>::CreateBase1; + using ComposeFst<A>::Properties; + using ImplToFst< ComposeFstImplBase<A> >::GetImpl; + using ImplToFst< ComposeFstImplBase<A> >::SetImpl; + + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + + IntersectFst(const Fst<A> &fst1, const Fst<A> &fst2, + const CacheOptions opts = CacheOptions()) { + bool acceptors = fst1.Properties(kAcceptor, true) && + fst2.Properties(kAcceptor, true); + SetImpl(CreateBase(fst1, fst2, opts)); + if (!acceptors) { + FSTERROR() << "IntersectFst: input FSTs are not acceptors"; + GetImpl()->SetProperties(kError); + } + } + + template <class M, class F, class T> + IntersectFst(const Fst<A> &fst1, const Fst<A> &fst2, + const IntersectFstOptions<A, M, F, T> &opts) { + bool acceptors = fst1.Properties(kAcceptor, true) && + fst2.Properties(kAcceptor, true); + SetImpl(CreateBase1(fst1, fst2, opts)); + if (!acceptors) { + FSTERROR() << "IntersectFst: input FSTs are not acceptors"; + GetImpl()->SetProperties(kError); + } + } + + // See Fst<>::Copy() for doc. + IntersectFst(const IntersectFst<A> &fst, bool safe = false) : + ComposeFst<A>(fst, safe) {} + + // Get a copy of this IntersectFst. See Fst<>::Copy() for further doc. + virtual IntersectFst<A> *Copy(bool safe = false) const { + return new IntersectFst<A>(*this, safe); + } +}; + + +// Specialization for IntersectFst. +template <class A> +class StateIterator< IntersectFst<A> > + : public StateIterator< ComposeFst<A> > { + public: + explicit StateIterator(const IntersectFst<A> &fst) + : StateIterator< ComposeFst<A> >(fst) {} +}; + + +// Specialization for IntersectFst. +template <class A> +class ArcIterator< IntersectFst<A> > + : public ArcIterator< ComposeFst<A> > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const IntersectFst<A> &fst, StateId s) + : ArcIterator< ComposeFst<A> >(fst, s) {} +}; + +// Useful alias when using StdArc. +typedef IntersectFst<StdArc> StdIntersectFst; + + +typedef ComposeOptions IntersectOptions; + + +// Computes the intersection (Hadamard product) of two FSAs. This +// version writes the intersection to an output MurableFst. Only +// strings that are in both automata are retained in the result. +// +// The two arguments must be acceptors. One of the arguments must be +// label-sorted. +// +// Complexity: same as Compose. +// +// Caveats: same as Compose. +template<class Arc> +void Intersect(const Fst<Arc> &ifst1, const Fst<Arc> &ifst2, + MutableFst<Arc> *ofst, + const IntersectOptions &opts = IntersectOptions()) { + typedef Matcher< Fst<Arc> > M; + + if (opts.filter_type == AUTO_FILTER) { + CacheOptions nopts; + nopts.gc_limit = 0; // Cache only the last state for fastest copy. + *ofst = IntersectFst<Arc>(ifst1, ifst2, nopts); + } else if (opts.filter_type == SEQUENCE_FILTER) { + IntersectFstOptions<Arc> iopts; + iopts.gc_limit = 0; // Cache only the last state for fastest copy. + *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts); + } else if (opts.filter_type == ALT_SEQUENCE_FILTER) { + IntersectFstOptions<Arc, M, AltSequenceComposeFilter<M> > iopts; + iopts.gc_limit = 0; // Cache only the last state for fastest copy. + *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts); + } else if (opts.filter_type == MATCH_FILTER) { + IntersectFstOptions<Arc, M, MatchComposeFilter<M> > iopts; + iopts.gc_limit = 0; // Cache only the last state for fastest copy. + *ofst = IntersectFst<Arc>(ifst1, ifst2, iopts); + } + + if (opts.connect) + Connect(ofst); +} + +} // namespace fst + +#endif // FST_LIB_INTERSECT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/interval-set.h b/kaldi_io/src/tools/openfst/include/fst/interval-set.h new file mode 100644 index 0000000..58cad44 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/interval-set.h @@ -0,0 +1,381 @@ +// interval-set.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Class to represent and operate on sets of intervals. + +#ifndef FST_LIB_INTERVAL_SET_H__ +#define FST_LIB_INTERVAL_SET_H__ + +#include <iostream> +#include <vector> +using std::vector; + + +#include <fst/util.h> + + +namespace fst { + +// Stores and operates on a set of half-open integral intervals [a,b) +// of signed integers of type T. +template <typename T> +class IntervalSet { + public: + struct Interval { + T begin_; + T end_; + + Interval() : begin_(-1), end_(-1) {} + + Interval(T b, T e) : begin_(b), end_(e) {} + + bool operator<(const Interval &i) const { + return begin_ < i.begin_ || (begin_ == i.begin_ && end_ > i.end_); + } + + bool operator==(const Interval &i) const { + return begin_ == i.begin_ && end_ == i.end_; + } + + bool operator!=(const Interval &i) const { + return begin_ != i.begin_ || end_ != i.end_; + } + + istream &Read(istream &strm) { + T n; + ReadType(strm, &n); + begin_ = n; + ReadType(strm, &n); + end_ = n; + return strm; + } + + ostream &Write(ostream &strm) const { + T n = begin_; + WriteType(strm, n); + n = end_; + WriteType(strm, n); + return strm; + } + }; + + IntervalSet() : count_(-1) {} + + // Returns the interval set as a vector. + vector<Interval> *Intervals() { return &intervals_; } + + const vector<Interval> *Intervals() const { return &intervals_; } + + bool Empty() const { return intervals_.empty(); } + + T Size() const { return intervals_.size(); } + + // Number of points in the intervals (undefined if not normalized). + T Count() const { return count_; } + + void Clear() { + intervals_.clear(); + count_ = 0; + } + + // Adds an interval set to the set. The result may not be normalized. + void Union(const IntervalSet<T> &iset) { + const vector<Interval> *intervals = iset.Intervals(); + for (typename vector<Interval>::const_iterator it = intervals->begin(); + it != intervals->end(); ++it) + intervals_.push_back(*it); + } + + // Requires intervals be normalized. + bool Member(T value) const { + Interval interval(value, value); + typename vector<Interval>::const_iterator lb = + lower_bound(intervals_.begin(), intervals_.end(), interval); + if (lb == intervals_.begin()) + return false; + return (--lb)->end_ > value; + } + + // Requires intervals be normalized. + bool operator==(const IntervalSet<T>& iset) const { + return *(iset.Intervals()) == intervals_; + } + + // Requires intervals be normalized. + bool operator!=(const IntervalSet<T>& iset) const { + return *(iset.Intervals()) != intervals_; + } + + bool Singleton() const { + return intervals_.size() == 1 && + intervals_[0].begin_ + 1 == intervals_[0].end_; + } + + + // Sorts; collapses overlapping and adjacent interals; sets count. + void Normalize(); + + // Intersects an interval set with the set. Requires intervals be + // normalized. The result is normalized. + void Intersect(const IntervalSet<T> &iset, IntervalSet<T> *oset) const; + + // Complements the set w.r.t [0, maxval). Requires intervals be + // normalized. The result is normalized. + void Complement(T maxval, IntervalSet<T> *oset) const; + + // Subtract an interval set from the set. Requires intervals be + // normalized. The result is normalized. + void Difference(const IntervalSet<T> &iset, IntervalSet<T> *oset) const; + + // Determines if an interval set overlaps with the set. Requires + // intervals be normalized. + bool Overlaps(const IntervalSet<T> &iset) const; + + // Determines if an interval set overlaps with the set but neither + // is contained in the other. Requires intervals be normalized. + bool StrictlyOverlaps(const IntervalSet<T> &iset) const; + + // Determines if an interval set is contained within the set. Requires + // intervals be normalized. + bool Contains(const IntervalSet<T> &iset) const; + + istream &Read(istream &strm) { + ReadType(strm, &intervals_); + return ReadType(strm, &count_); + } + + ostream &Write(ostream &strm) const { + WriteType(strm, intervals_); + return WriteType(strm, count_); + } + + private: + vector<Interval> intervals_; + T count_; +}; + +// Sorts; collapses overlapping and adjacent interavls; sets count. +template <typename T> +void IntervalSet<T>::Normalize() { + sort(intervals_.begin(), intervals_.end()); + + count_ = 0; + T size = 0; + for (T i = 0; i < intervals_.size(); ++i) { + Interval &inti = intervals_[i]; + if (inti.begin_ == inti.end_) + continue; + for (T j = i + 1; j < intervals_.size(); ++j) { + Interval &intj = intervals_[j]; + if (intj.begin_ > inti.end_) + break; + if (intj.end_ > inti.end_) + inti.end_ = intj.end_; + ++i; + } + count_ += inti.end_ - inti.begin_; + intervals_[size++] = inti; + } + intervals_.resize(size); +} + +// Intersects an interval set with the set. Requires intervals be normalized. +// The result is normalized. +template <typename T> +void IntervalSet<T>::Intersect(const IntervalSet<T> &iset, + IntervalSet<T> *oset) const { + const vector<Interval> *iintervals = iset.Intervals(); + vector<Interval> *ointervals = oset->Intervals(); + typename vector<Interval>::const_iterator it1 = intervals_.begin(); + typename vector<Interval>::const_iterator it2 = iintervals->begin(); + + ointervals->clear(); + oset->count_ = 0; + + while (it1 != intervals_.end() && it2 != iintervals->end()) { + if (it1->end_ <= it2->begin_) { + ++it1; + } else if (it2->end_ <= it1->begin_) { + ++it2; + } else { + Interval interval; + interval.begin_ = max(it1->begin_, it2->begin_); + interval.end_ = min(it1->end_, it2->end_); + ointervals->push_back(interval); + oset->count_ += interval.end_ - interval.begin_; + if (it1->end_ < it2->end_) + ++it1; + else + ++it2; + } + } +} + +// Complements the set w.r.t [0, maxval). Requires intervals be normalized. +// The result is normalized. +template <typename T> +void IntervalSet<T>::Complement(T maxval, IntervalSet<T> *oset) const { + vector<Interval> *ointervals = oset->Intervals(); + ointervals->clear(); + oset->count_ = 0; + + Interval interval; + interval.begin_ = 0; + for (typename vector<Interval>::const_iterator it = intervals_.begin(); + it != intervals_.end(); + ++it) { + interval.end_ = min(it->begin_, maxval); + if (interval.begin_ < interval.end_) { + ointervals->push_back(interval); + oset->count_ += interval.end_ - interval.begin_; + } + interval.begin_ = it->end_; + } + interval.end_ = maxval; + if (interval.begin_ < interval.end_) { + ointervals->push_back(interval); + oset->count_ += interval.end_ - interval.begin_; + } +} + +// Subtract an interval set from the set. Requires intervals be normalized. +// The result is normalized. +template <typename T> +void IntervalSet<T>::Difference(const IntervalSet<T> &iset, + IntervalSet<T> *oset) const { + if (intervals_.empty()) { + oset->Intervals()->clear(); + oset->count_ = 0; + } else { + IntervalSet<T> cset; + iset.Complement(intervals_.back().end_, &cset); + Intersect(cset, oset); + } +} + +// Determines if an interval set overlaps with the set. Requires +// intervals be normalized. +template <typename T> +bool IntervalSet<T>::Overlaps(const IntervalSet<T> &iset) const { + const vector<Interval> *intervals = iset.Intervals(); + typename vector<Interval>::const_iterator it1 = intervals_.begin(); + typename vector<Interval>::const_iterator it2 = intervals->begin(); + + while (it1 != intervals_.end() && it2 != intervals->end()) { + if (it1->end_ <= it2->begin_) { + ++it1; + } else if (it2->end_ <= it1->begin_) { + ++it2; + } else { + return true; + } + } + return false; +} + +// Determines if an interval set overlaps with the set but neither +// is contained in the other. Requires intervals be normalized. +template <typename T> +bool IntervalSet<T>::StrictlyOverlaps(const IntervalSet<T> &iset) const { + const vector<Interval> *intervals = iset.Intervals(); + typename vector<Interval>::const_iterator it1 = intervals_.begin(); + typename vector<Interval>::const_iterator it2 = intervals->begin(); + bool only1 = false; // point in intervals_ but not intervals + bool only2 = false; // point in intervals but not intervals_ + bool overlap = false; // point in both intervals_ and intervals + + while (it1 != intervals_.end() && it2 != intervals->end()) { + if (it1->end_ <= it2->begin_) { // no overlap - it1 first + only1 = true; + ++it1; + } else if (it2->end_ <= it1->begin_) { // no overlap - it2 first + only2 = true; + ++it2; + } else if (it2->begin_ == it1->begin_ && it2->end_ == it1->end_) { // equals + overlap = true; + ++it1; + ++it2; + } else if (it2->begin_ <= it1->begin_ && it2->end_ >= it1->end_) { // 1 c 2 + only2 = true; + overlap = true; + ++it1; + } else if (it1->begin_ <= it2->begin_ && it1->end_ >= it2->end_) { // 2 c 1 + only1 = true; + overlap = true; + ++it2; + } else { // strict overlap + only1 = true; + only2 = true; + overlap = true; + } + if (only1 == true && only2 == true && overlap == true) + return true; + } + if (it1 != intervals_.end()) + only1 = true; + if (it2 != intervals->end()) + only2 = true; + + return only1 == true && only2 == true && overlap == true; +} + +// Determines if an interval set is contained within the set. Requires +// intervals be normalized. +template <typename T> +bool IntervalSet<T>::Contains(const IntervalSet<T> &iset) const { + if (iset.Count() > Count()) + return false; + + const vector<Interval> *intervals = iset.Intervals(); + typename vector<Interval>::const_iterator it1 = intervals_.begin(); + typename vector<Interval>::const_iterator it2 = intervals->begin(); + + while (it1 != intervals_.end() && it2 != intervals->end()) { + if (it1->end_ <= it2->begin_) { // no overlap - it1 first + ++it1; + } else if (it2->begin_ < it1->begin_ || it2->end_ > it1->end_) { // no C + return false; + } else if (it2->end_ == it1->end_) { + ++it1; + ++it2; + } else { + ++it2; + } + } + return it2 == intervals->end(); +} + +template <typename T> +ostream &operator<<(ostream &strm, const IntervalSet<T> &s) { + typedef typename IntervalSet<T>::Interval Interval; + const vector<Interval> *intervals = s.Intervals(); + strm << "{"; + for (typename vector<Interval>::const_iterator it = intervals->begin(); + it != intervals->end(); + ++it) { + if (it != intervals->begin()) + strm << ","; + strm << "[" << it->begin_ << "," << it->end_ << ")"; + } + strm << "}"; + return strm; +} + +} // namespace fst + +#endif // FST_LIB_INTERVAL_SET_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/invert.h b/kaldi_io/src/tools/openfst/include/fst/invert.h new file mode 100644 index 0000000..bc83a5d --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/invert.h @@ -0,0 +1,125 @@ +// invert.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Functions and classes to invert an Fst. + +#ifndef FST_LIB_INVERT_H__ +#define FST_LIB_INVERT_H__ + +#include <fst/arc-map.h> +#include <fst/mutable-fst.h> + + +namespace fst { + +// Mapper to implement inversion of an arc. +template <class A> struct InvertMapper { + InvertMapper() {} + + A operator()(const A &arc) { + return A(arc.olabel, arc.ilabel, arc.weight, arc.nextstate); + } + + MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } + + MapSymbolsAction InputSymbolsAction() const { return MAP_CLEAR_SYMBOLS; } + + MapSymbolsAction OutputSymbolsAction() const { return MAP_CLEAR_SYMBOLS;} + + uint64 Properties(uint64 props) { return InvertProperties(props); } +}; + + +// Inverts the transduction corresponding to an FST by exchanging the +// FST's input and output labels. This version modifies its input. +// +// Complexity: +// - Time: O(V + E) +// - Space: O(1) +// where V = # of states and E = # of arcs. +template<class Arc> inline +void Invert(MutableFst<Arc> *fst) { + SymbolTable *input = fst->InputSymbols() ? fst->InputSymbols()->Copy() : 0; + SymbolTable *output = fst->OutputSymbols() ? fst->OutputSymbols()->Copy() : 0; + ArcMap(fst, InvertMapper<Arc>()); + fst->SetInputSymbols(output); + fst->SetOutputSymbols(input); + delete input; + delete output; +} + + +// Inverts the transduction corresponding to an FST by exchanging the +// FST's input and output labels. This version is a delayed Fst. +// +// Complexity: +// - Time: O(v + e) +// - Space: O(1) +// where v = # of states visited, e = # of arcs visited. Constant +// time and to visit an input state or arc is assumed and exclusive +// of caching. +template <class A> +class InvertFst : public ArcMapFst<A, A, InvertMapper<A> > { + public: + typedef A Arc; + typedef InvertMapper<A> C; + typedef ArcMapFstImpl< A, A, InvertMapper<A> > Impl; + using ImplToFst<Impl>::GetImpl; + + explicit InvertFst(const Fst<A> &fst) : ArcMapFst<A, A, C>(fst, C()) { + GetImpl()->SetOutputSymbols(fst.InputSymbols()); + GetImpl()->SetInputSymbols(fst.OutputSymbols()); + } + + // See Fst<>::Copy() for doc. + InvertFst(const InvertFst<A> &fst, bool safe = false) + : ArcMapFst<A, A, C>(fst, safe) {} + + // Get a copy of this InvertFst. See Fst<>::Copy() for further doc. + virtual InvertFst<A> *Copy(bool safe = false) const { + return new InvertFst(*this, safe); + } +}; + + +// Specialization for InvertFst. +template <class A> +class StateIterator< InvertFst<A> > + : public StateIterator< ArcMapFst<A, A, InvertMapper<A> > > { + public: + explicit StateIterator(const InvertFst<A> &fst) + : StateIterator< ArcMapFst<A, A, InvertMapper<A> > >(fst) {} +}; + + +// Specialization for InvertFst. +template <class A> +class ArcIterator< InvertFst<A> > + : public ArcIterator< ArcMapFst<A, A, InvertMapper<A> > > { + public: + ArcIterator(const InvertFst<A> &fst, typename A::StateId s) + : ArcIterator< ArcMapFst<A, A, InvertMapper<A> > >(fst, s) {} +}; + + +// Useful alias when using StdArc. +typedef InvertFst<StdArc> StdInvertFst; + +} // namespace fst + +#endif // FST_LIB_INVERT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/label-reachable.h b/kaldi_io/src/tools/openfst/include/fst/label-reachable.h new file mode 100644 index 0000000..af06eef --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/label-reachable.h @@ -0,0 +1,565 @@ +// label_reachable.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Class to determine if a non-epsilon label can be read as the +// first non-epsilon symbol along some path from a given state. + + +#ifndef FST_LIB_LABEL_REACHABLE_H__ +#define FST_LIB_LABEL_REACHABLE_H__ + +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include <vector> +using std::vector; + +#include <fst/accumulator.h> +#include <fst/arcsort.h> +#include <fst/interval-set.h> +#include <fst/state-reachable.h> +#include <fst/vector-fst.h> + + +namespace fst { + +// Stores shareable data for label reachable class copies. +template <typename L> +class LabelReachableData { + public: + typedef L Label; + typedef typename IntervalSet<L>::Interval Interval; + + explicit LabelReachableData(bool reach_input, bool keep_relabel_data = true) + : reach_input_(reach_input), + keep_relabel_data_(keep_relabel_data), + have_relabel_data_(true), + final_label_(kNoLabel) {} + + ~LabelReachableData() {} + + bool ReachInput() const { return reach_input_; } + + vector< IntervalSet<L> > *IntervalSets() { return &isets_; } + + unordered_map<L, L> *Label2Index() { + if (!have_relabel_data_) + FSTERROR() << "LabelReachableData: no relabeling data"; + return &label2index_; + } + + Label FinalLabel() { + if (final_label_ == kNoLabel) + final_label_ = label2index_[kNoLabel]; + return final_label_; + } + + static LabelReachableData<L> *Read(istream &istrm) { + LabelReachableData<L> *data = new LabelReachableData<L>(); + + ReadType(istrm, &data->reach_input_); + ReadType(istrm, &data->keep_relabel_data_); + data->have_relabel_data_ = data->keep_relabel_data_; + if (data->keep_relabel_data_) + ReadType(istrm, &data->label2index_); + ReadType(istrm, &data->final_label_); + ReadType(istrm, &data->isets_); + return data; + } + + bool Write(ostream &ostrm) { + WriteType(ostrm, reach_input_); + WriteType(ostrm, keep_relabel_data_); + if (keep_relabel_data_) + WriteType(ostrm, label2index_); + WriteType(ostrm, FinalLabel()); + WriteType(ostrm, isets_); + return true; + } + + int RefCount() const { return ref_count_.count(); } + int IncrRefCount() { return ref_count_.Incr(); } + int DecrRefCount() { return ref_count_.Decr(); } + + private: + LabelReachableData() {} + + bool reach_input_; // Input or output labels considered? + bool keep_relabel_data_; // Save label2index_ to file? + bool have_relabel_data_; // Using label2index_? + Label final_label_; // Final label + RefCounter ref_count_; // Reference count. + unordered_map<L, L> label2index_; // Finds index for a label. + vector<IntervalSet <L> > isets_; // Interval sets per state. + + DISALLOW_COPY_AND_ASSIGN(LabelReachableData); +}; + + +// Tests reachability of labels from a given state. If reach_input = +// true, then input labels are considered, o.w. output labels are +// considered. To test for reachability from a state s, first do +// SetState(s). Then a label l can be reached from state s of FST f +// iff Reach(r) is true where r = Relabel(l). The relabeling is +// required to ensure a compact representation of the reachable +// labels. + +// The whole FST can be relabeled instead with Relabel(&f, +// reach_input) so that the test Reach(r) applies directly to the +// labels of the transformed FST f. The relabeled FST will also be +// sorted appropriately for composition. +// +// Reachablity of a final state from state s (via an epsilon path) +// can be tested with ReachFinal(); +// +// Reachability can also be tested on the set of labels specified by +// an arc iterator, useful for FST composition. In particular, +// Reach(aiter, ...) is true if labels on the input (output) side of +// the transitions of the arc iterator, when iter_input is true +// (false), can be reached from the state s. The iterator labels must +// have already been relabeled. +// +// With the arc iterator test of reachability, the begin position, end +// position and accumulated arc weight of the matches can be +// returned. The optional template argument controls how reachable arc +// weights are accumulated. The default uses the semiring +// Plus(). Alternative ones can be used to distribute the weights in +// composition in various ways. +template <class A, class S = DefaultAccumulator<A> > +class LabelReachable { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename IntervalSet<Label>::Interval Interval; + + LabelReachable(const Fst<A> &fst, bool reach_input, S *s = 0, + bool keep_relabel_data = true) + : fst_(new VectorFst<Arc>(fst)), + s_(kNoStateId), + data_(new LabelReachableData<Label>(reach_input, keep_relabel_data)), + accumulator_(s ? s : new S()), + ncalls_(0), + nintervals_(0), + error_(false) { + StateId ins = fst_->NumStates(); + TransformFst(); + FindIntervals(ins); + delete fst_; + } + + explicit LabelReachable(LabelReachableData<Label> *data, S *s = 0) + : fst_(0), + s_(kNoStateId), + data_(data), + accumulator_(s ? s : new S()), + ncalls_(0), + nintervals_(0), + error_(false) { + data_->IncrRefCount(); + } + + LabelReachable(const LabelReachable<A, S> &reachable) : + fst_(0), + s_(kNoStateId), + data_(reachable.data_), + accumulator_(new S(*reachable.accumulator_)), + ncalls_(0), + nintervals_(0), + error_(reachable.error_) { + data_->IncrRefCount(); + } + + ~LabelReachable() { + if (!data_->DecrRefCount()) + delete data_; + delete accumulator_; + if (ncalls_ > 0) { + VLOG(2) << "# of calls: " << ncalls_; + VLOG(2) << "# of intervals/call: " << (nintervals_ / ncalls_); + } + } + + // Relabels w.r.t labels that give compact label sets. + Label Relabel(Label label) { + if (label == 0 || error_) + return label; + unordered_map<Label, Label> &label2index = *data_->Label2Index(); + Label &relabel = label2index[label]; + if (!relabel) // Add new label + relabel = label2index.size() + 1; + return relabel; + } + + // Relabels Fst w.r.t to labels that give compact label sets. + void Relabel(MutableFst<Arc> *fst, bool relabel_input) { + for (StateIterator< MutableFst<Arc> > siter(*fst); + !siter.Done(); siter.Next()) { + StateId s = siter.Value(); + for (MutableArcIterator< MutableFst<Arc> > aiter(fst, s); + !aiter.Done(); + aiter.Next()) { + Arc arc = aiter.Value(); + if (relabel_input) + arc.ilabel = Relabel(arc.ilabel); + else + arc.olabel = Relabel(arc.olabel); + aiter.SetValue(arc); + } + } + if (relabel_input) { + ArcSort(fst, ILabelCompare<Arc>()); + fst->SetInputSymbols(0); + } else { + ArcSort(fst, OLabelCompare<Arc>()); + fst->SetOutputSymbols(0); + } + } + + // Returns relabeling pairs (cf. relabel.h::Relabel()). + // If 'avoid_collisions' is true, extra pairs are added to + // ensure no collisions when relabeling automata that have + // labels unseen here. + void RelabelPairs(vector<pair<Label, Label> > *pairs, + bool avoid_collisions = false) { + pairs->clear(); + unordered_map<Label, Label> &label2index = *data_->Label2Index(); + // Maps labels to their new values in [1, label2index().size()] + for (typename unordered_map<Label, Label>::const_iterator + it = label2index.begin(); it != label2index.end(); ++it) + if (it->second != data_->FinalLabel()) + pairs->push_back(pair<Label, Label>(it->first, it->second)); + if (avoid_collisions) { + // Ensures any label in [1, label2index().size()] is mapped either + // by the above step or to label2index() + 1 (to avoid collisions). + for (int i = 1; i <= label2index.size(); ++i) { + typename unordered_map<Label, Label>::const_iterator + it = label2index.find(i); + if (it == label2index.end() || it->second == data_->FinalLabel()) + pairs->push_back(pair<Label, Label>(i, label2index.size() + 1)); + } + } + } + + // Set current state. Optionally set state associated + // with arc iterator to be passed to Reach. + void SetState(StateId s, StateId aiter_s = kNoStateId) { + s_ = s; + if (aiter_s != kNoStateId) { + accumulator_->SetState(aiter_s); + if (accumulator_->Error()) error_ = true; + } + } + + // Can reach this label from current state? + // Original labels must be transformed by the Relabel methods above. + bool Reach(Label label) { + if (label == 0 || error_) + return false; + vector< IntervalSet<Label> > &isets = *data_->IntervalSets(); + return isets[s_].Member(label); + + } + + // Can reach final state (via epsilon transitions) from this state? + bool ReachFinal() { + if (error_) return false; + vector< IntervalSet<Label> > &isets = *data_->IntervalSets(); + return isets[s_].Member(data_->FinalLabel()); + } + + // Initialize with secondary FST to be used with Reach(Iterator,...). + // If copy is true, then 'fst' is a copy of the FST used in the + // previous call to this method (useful to avoid unnecessary updates). + template <class F> + void ReachInit(const F &fst, bool copy = false) { + accumulator_->Init(fst, copy); + if (accumulator_->Error()) error_ = true; + } + + // Can reach any arc iterator label between iterator positions + // aiter_begin and aiter_end? If aiter_input = true, then iterator + // input labels are considered, o.w. output labels are considered. + // Arc iterator labels must be transformed by the Relabel methods + // above. If compute_weight is true, user may call ReachWeight(). + template <class Iterator> + bool Reach(Iterator *aiter, ssize_t aiter_begin, + ssize_t aiter_end, bool aiter_input, bool compute_weight) { + if (error_) return false; + vector< IntervalSet<Label> > &isets = *data_->IntervalSets(); + const vector<Interval> *intervals = isets[s_].Intervals(); + ++ncalls_; + nintervals_ += intervals->size(); + + reach_begin_ = -1; + reach_end_ = -1; + reach_weight_ = Weight::Zero(); + + uint32 flags = aiter->Flags(); // save flags to restore them on exit + aiter->SetFlags(kArcNoCache, kArcNoCache); // make caching optional + aiter->Seek(aiter_begin); + + if (2 * (aiter_end - aiter_begin) < intervals->size()) { + // Check each arc against intervals. + // Set arc iterator flags to only compute the ilabel or olabel values, + // since they are the only values required for most of the arcs processed. + aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue, + kArcValueFlags); + Label reach_label = kNoLabel; + for (ssize_t aiter_pos = aiter_begin; + aiter_pos < aiter_end; aiter->Next(), ++aiter_pos) { + const A &arc = aiter->Value(); + Label label = aiter_input ? arc.ilabel : arc.olabel; + if (label == reach_label || Reach(label)) { + reach_label = label; + if (reach_begin_ < 0) + reach_begin_ = aiter_pos; + reach_end_ = aiter_pos + 1; + if (compute_weight) { + if (!(aiter->Flags() & kArcWeightValue)) { + // If the 'arc.weight' wasn't computed by the call + // to 'aiter->Value()' above, we need to call + // 'aiter->Value()' again after having set the arc iterator + // flags to compute the arc weight value. + aiter->SetFlags(kArcWeightValue, kArcValueFlags); + const A &arcb = aiter->Value(); + // Call the accumulator. + reach_weight_ = accumulator_->Sum(reach_weight_, arcb.weight); + // Only ilabel or olabel required to process the following + // arcs. + aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue, + kArcValueFlags); + } else { + // Call the accumulator. + reach_weight_ = accumulator_->Sum(reach_weight_, arc.weight); + } + } + } + } + } else { + // Check each interval against arcs + ssize_t begin_low, end_low = aiter_begin; + for (typename vector<Interval>::const_iterator + iiter = intervals->begin(); + iiter != intervals->end(); ++iiter) { + begin_low = LowerBound(aiter, end_low, aiter_end, + aiter_input, iiter->begin); + end_low = LowerBound(aiter, begin_low, aiter_end, + aiter_input, iiter->end); + if (end_low - begin_low > 0) { + if (reach_begin_ < 0) + reach_begin_ = begin_low; + reach_end_ = end_low; + if (compute_weight) { + aiter->SetFlags(kArcWeightValue, kArcValueFlags); + reach_weight_ = accumulator_->Sum(reach_weight_, aiter, + begin_low, end_low); + } + } + } + } + + aiter->SetFlags(flags, kArcFlags); // restore original flag values + return reach_begin_ >= 0; + } + + // Returns iterator position of first matching arc. + ssize_t ReachBegin() const { return reach_begin_; } + + // Returns iterator position one past last matching arc. + ssize_t ReachEnd() const { return reach_end_; } + + // Return the sum of the weights for matching arcs. + // Valid only if compute_weight was true in Reach() call. + Weight ReachWeight() const { return reach_weight_; } + + // Access to the relabeling map. Excludes epsilon (0) label but + // includes kNoLabel that is used internally for super-final + // transitons. + const unordered_map<Label, Label>& Label2Index() const { + return *data_->Label2Index(); + } + + LabelReachableData<Label> *GetData() const { return data_; } + + bool Error() const { return error_ || accumulator_->Error(); } + + private: + // Redirects labeled arcs (input or output labels determined by + // ReachInput()) to new label-specific final states. Each original + // final state is redirected via a transition labeled with kNoLabel + // to a new kNoLabel-specific final state. Creates super-initial + // state for all states with zero in-degree. + void TransformFst() { + StateId ins = fst_->NumStates(); + StateId ons = ins; + + vector<ssize_t> indeg(ins, 0); + + // Redirects labeled arcs to new final states. + for (StateId s = 0; s < ins; ++s) { + for (MutableArcIterator< VectorFst<Arc> > aiter(fst_, s); + !aiter.Done(); + aiter.Next()) { + Arc arc = aiter.Value(); + Label label = data_->ReachInput() ? arc.ilabel : arc.olabel; + if (label) { + if (label2state_.find(label) == label2state_.end()) { + label2state_[label] = ons; + indeg.push_back(0); + ++ons; + } + arc.nextstate = label2state_[label]; + aiter.SetValue(arc); + } + ++indeg[arc.nextstate]; // Finds in-degrees for next step. + } + + // Redirects final weights to new final state. + Weight final = fst_->Final(s); + if (final != Weight::Zero()) { + if (label2state_.find(kNoLabel) == label2state_.end()) { + label2state_[kNoLabel] = ons; + indeg.push_back(0); + ++ons; + } + Arc arc(kNoLabel, kNoLabel, final, label2state_[kNoLabel]); + fst_->AddArc(s, arc); + ++indeg[arc.nextstate]; // Finds in-degrees for next step. + + fst_->SetFinal(s, Weight::Zero()); + } + } + + // Add new final states to Fst. + while (fst_->NumStates() < ons) { + StateId s = fst_->AddState(); + fst_->SetFinal(s, Weight::One()); + } + + // Creates a super-initial state for all states with zero in-degree. + StateId start = fst_->AddState(); + fst_->SetStart(start); + for (StateId s = 0; s < start; ++s) { + if (indeg[s] == 0) { + Arc arc(0, 0, Weight::One(), s); + fst_->AddArc(start, arc); + } + } + } + + void FindIntervals(StateId ins) { + StateReachable<A, Label> state_reachable(*fst_); + if (state_reachable.Error()) { + error_ = true; + return; + } + + vector<Label> &state2index = state_reachable.State2Index(); + vector< IntervalSet<Label> > &isets = *data_->IntervalSets(); + isets = state_reachable.IntervalSets(); + isets.resize(ins); + + unordered_map<Label, Label> &label2index = *data_->Label2Index(); + for (typename unordered_map<Label, StateId>::const_iterator + it = label2state_.begin(); + it != label2state_.end(); + ++it) { + Label l = it->first; + StateId s = it->second; + Label i = state2index[s]; + label2index[l] = i; + } + label2state_.clear(); + + double nintervals = 0; + ssize_t non_intervals = 0; + for (ssize_t s = 0; s < ins; ++s) { + nintervals += isets[s].Size(); + if (isets[s].Size() > 1) { + ++non_intervals; + VLOG(3) << "state: " << s << " # of intervals: " << isets[s].Size(); + } + } + VLOG(2) << "# of states: " << ins; + VLOG(2) << "# of intervals: " << nintervals; + VLOG(2) << "# of intervals/state: " << nintervals/ins; + VLOG(2) << "# of non-interval states: " << non_intervals; + } + + template <class Iterator> + ssize_t LowerBound(Iterator *aiter, ssize_t aiter_begin, + ssize_t aiter_end, bool aiter_input, + Label match_label) const { + // Only need to compute the ilabel or olabel of arcs when + // performing the binary search. + aiter->SetFlags(aiter_input ? kArcILabelValue : kArcOLabelValue, + kArcValueFlags); + ssize_t low = aiter_begin; + ssize_t high = aiter_end; + while (low < high) { + ssize_t mid = (low + high) / 2; + aiter->Seek(mid); + Label label = aiter_input ? + aiter->Value().ilabel : aiter->Value().olabel; + if (label > match_label) { + high = mid; + } else if (label < match_label) { + low = mid + 1; + } else { + // Find first matching label (when non-deterministic) + for (ssize_t i = mid; i > low; --i) { + aiter->Seek(i - 1); + label = aiter_input ? aiter->Value().ilabel : aiter->Value().olabel; + if (label != match_label) { + aiter->Seek(i); + aiter->SetFlags(kArcValueFlags, kArcValueFlags); + return i; + } + } + aiter->SetFlags(kArcValueFlags, kArcValueFlags); + return low; + } + } + aiter->Seek(low); + aiter->SetFlags(kArcValueFlags, kArcValueFlags); + return low; + } + + VectorFst<Arc> *fst_; + StateId s_; // Current state + unordered_map<Label, StateId> label2state_; // Finds final state for a label + + ssize_t reach_begin_; // Iterator pos of first match + ssize_t reach_end_; // Iterator pos after last match + Weight reach_weight_; // Gives weight sum of arc iterator + // arcs with reachable labels. + LabelReachableData<Label> *data_; // Shareable data between copies + S *accumulator_; // Sums arc weights + + double ncalls_; + double nintervals_; + bool error_; + + void operator=(const LabelReachable<A, S> &); // Disallow +}; + +} // namespace fst + +#endif // FST_LIB_LABEL_REACHABLE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/lexicographic-weight.h b/kaldi_io/src/tools/openfst/include/fst/lexicographic-weight.h new file mode 100644 index 0000000..4b55c50 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/lexicographic-weight.h @@ -0,0 +1,151 @@ +// lexicographic-weight.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Richard Sproat) +// +// \file +// Lexicographic weight set and associated semiring operation definitions. +// +// A lexicographic weight is a sequence of weights, each of which must have the +// path property and Times() must be (strongly) cancellative +// (for all a,b,c != Zero(): Times(c, a) = Times(c, b) => a = b, +// Times(a, c) = Times(b, c) => a = b). +// The + operation on two weights a and b is the lexicographically +// prior of a and b. + +#ifndef FST_LIB_LEXICOGRAPHIC_WEIGHT_H__ +#define FST_LIB_LEXICOGRAPHIC_WEIGHT_H__ + +#include <string> + +#include <fst/pair-weight.h> +#include <fst/weight.h> + + +namespace fst { + +template<class W1, class W2> +class LexicographicWeight : public PairWeight<W1, W2> { + public: + using PairWeight<W1, W2>::Value1; + using PairWeight<W1, W2>::Value2; + using PairWeight<W1, W2>::SetValue1; + using PairWeight<W1, W2>::SetValue2; + using PairWeight<W1, W2>::Zero; + using PairWeight<W1, W2>::One; + using PairWeight<W1, W2>::NoWeight; + using PairWeight<W1, W2>::Quantize; + using PairWeight<W1, W2>::Reverse; + + typedef LexicographicWeight<typename W1::ReverseWeight, + typename W2::ReverseWeight> + ReverseWeight; + + LexicographicWeight() {} + + LexicographicWeight(const PairWeight<W1, W2>& w) + : PairWeight<W1, W2>(w) {} + + LexicographicWeight(W1 w1, W2 w2) : PairWeight<W1, W2>(w1, w2) { + uint64 props = kPath; + if ((W1::Properties() & props) != props) { + FSTERROR() << "LexicographicWeight must " + << "have the path property: " << W1::Type(); + SetValue1(W1::NoWeight()); + } + if ((W2::Properties() & props) != props) { + FSTERROR() << "LexicographicWeight must " + << "have the path property: " << W2::Type(); + SetValue2(W2::NoWeight()); + } + } + + static const LexicographicWeight<W1, W2> &Zero() { + static const LexicographicWeight<W1, W2> zero(PairWeight<W1, W2>::Zero()); + return zero; + } + + static const LexicographicWeight<W1, W2> &One() { + static const LexicographicWeight<W1, W2> one(PairWeight<W1, W2>::One()); + return one; + } + + static const LexicographicWeight<W1, W2> &NoWeight() { + static const LexicographicWeight<W1, W2> no_weight( + PairWeight<W1, W2>::NoWeight()); + return no_weight; + } + + static const string &Type() { + static const string type = W1::Type() + "_LT_" + W2::Type(); + return type; + } + + bool Member() const { + if (!Value1().Member() || !Value2().Member()) return false; + // Lexicographic weights cannot mix zeroes and non-zeroes. + if (Value1() == W1::Zero() && Value2() == W2::Zero()) return true; + if (Value1() != W1::Zero() && Value2() != W2::Zero()) return true; + return false; + } + + LexicographicWeight<W1, W2> Quantize(float delta = kDelta) const { + return PairWeight<W1, W2>::Quantize(); + } + + ReverseWeight Reverse() const { + return PairWeight<W1, W2>::Reverse(); + } + + static uint64 Properties() { + uint64 props1 = W1::Properties(); + uint64 props2 = W2::Properties(); + return props1 & props2 & (kLeftSemiring | kRightSemiring | kPath | + kIdempotent | kCommutative); + } +}; + +template <class W1, class W2> +inline LexicographicWeight<W1, W2> Plus(const LexicographicWeight<W1, W2> &w, + const LexicographicWeight<W1, W2> &v) { + if (!w.Member() || !v.Member()) + return LexicographicWeight<W1, W2>::NoWeight(); + NaturalLess<W1> less1; + NaturalLess<W2> less2; + if (less1(w.Value1(), v.Value1())) return w; + if (less1(v.Value1(), w.Value1())) return v; + if (less2(w.Value2(), v.Value2())) return w; + if (less2(v.Value2(), w.Value2())) return v; + return w; +} + +template <class W1, class W2> +inline LexicographicWeight<W1, W2> Times(const LexicographicWeight<W1, W2> &w, + const LexicographicWeight<W1, W2> &v) { + return LexicographicWeight<W1, W2>(Times(w.Value1(), v.Value1()), + Times(w.Value2(), v.Value2())); +} + +template <class W1, class W2> +inline LexicographicWeight<W1, W2> Divide(const LexicographicWeight<W1, W2> &w, + const LexicographicWeight<W1, W2> &v, + DivideType typ = DIVIDE_ANY) { + return LexicographicWeight<W1, W2>(Divide(w.Value1(), v.Value1(), typ), + Divide(w.Value2(), v.Value2(), typ)); +} + +} // namespace fst + +#endif // FST_LIB_LEXICOGRAPHIC_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/lock.h b/kaldi_io/src/tools/openfst/include/fst/lock.h new file mode 100644 index 0000000..58cb22a --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/lock.h @@ -0,0 +1,100 @@ +// lock.h +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: [email protected] (Michael Riley) +// +// \file +// Google-compatibility locking declarations and inline definitions +// +// Classes and functions here are no-ops (by design); proper locking requires +// actual implementation. + +#ifndef FST_LIB_LOCK_H__ +#define FST_LIB_LOCK_H__ + +#include <fst/compat.h> // for DISALLOW_COPY_AND_ASSIGN + +namespace fst { + +using namespace std; + +// +// Single initialization - single-thread implementation +// + +typedef int FstOnceType; + +static const int FST_ONCE_INIT = 1; + +inline int FstOnceInit(FstOnceType *once, void (*init)(void)) { + if (*once) + (*init)(); + *once = 0; + return 0; +} + +// +// Thread locking - single-thread (non-)implementation +// + +class Mutex { + public: + Mutex() {} + + private: + DISALLOW_COPY_AND_ASSIGN(Mutex); +}; + +class MutexLock { + public: + MutexLock(Mutex *) {} + + private: + DISALLOW_COPY_AND_ASSIGN(MutexLock); +}; + +class ReaderMutexLock { + public: + ReaderMutexLock(Mutex *) {} + + private: + DISALLOW_COPY_AND_ASSIGN(ReaderMutexLock); +}; + +// Reference counting - single-thread implementation +class RefCounter { + public: + RefCounter() : count_(1) {} + + int count() const { return count_; } + +// below lines are modifications of openfst for multi-thrads support, +// from tools/extras/openfst_gcc41up.patch, applied by tools/Makefile, +// applicable to gcc 4.1 or above + // int Incr() const { return ++count_; } + // int Decr() const { return --count_; } + + int Incr() const { return __sync_add_and_fetch(&count_, 1); } + int Decr() const { return __sync_sub_and_fetch(&count_, 1); } +// end modifications + + private: + mutable int count_; + + DISALLOW_COPY_AND_ASSIGN(RefCounter); +}; + +} // namespace fst + +#endif // FST_LIB_LOCK_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/log.h b/kaldi_io/src/tools/openfst/include/fst/log.h new file mode 100644 index 0000000..d1492cd --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/log.h @@ -0,0 +1,66 @@ +// log.h +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: [email protected] (Michael Riley) +// +// \file +// Google-style logging declarations and inline definitions. + +#ifndef FST_LIB_LOG_H__ +#define FST_LIB_LOG_H__ + +#include <cassert> +#include <iostream> +#include <string> + +#include <fst/types.h> +#include <fst/flags.h> + +using std::string; + +DECLARE_int32(v); + +class LogMessage { + public: + LogMessage(const string &type) : fatal_(type == "FATAL") { + std::cerr << type << ": "; + } + ~LogMessage() { + std::cerr << std::endl; + if(fatal_) + exit(1); + } + std::ostream &stream() { return std::cerr; } + + private: + bool fatal_; +}; + +#define LOG(type) LogMessage(#type).stream() +#define VLOG(level) if ((level) <= FLAGS_v) LOG(INFO) + +// Checks +inline void CHECK(bool x) { assert(x); } + +#define CHECK_EQ(x, y) CHECK((x) == (y)) +#define CHECK_LT(x, y) CHECK((x) < (y)) +#define CHECK_GT(x, y) CHECK((x) > (y)) +#define CHECK_LE(x, y) CHECK((x) <= (y)) +#define CHECK_GE(x, y) CHECK((x) >= (y)) +#define CHECK_NE(x, y) CHECK((x) != (y)) + +// Ports +#define ATTRIBUTE_DEPRECATED __attribute__((deprecated)) + +#endif // FST_LIB_LOG_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/lookahead-filter.h b/kaldi_io/src/tools/openfst/include/fst/lookahead-filter.h new file mode 100644 index 0000000..e11c1bb --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/lookahead-filter.h @@ -0,0 +1,698 @@ +// lookahead-filter.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Composition filters to support lookahead matchers, useful for improving +// composition efficiency with certain inputs. + +#ifndef FST_LIB_LOOKAHEAD_FILTER_H__ +#define FST_LIB_LOOKAHEAD_FILTER_H__ + +#include <vector> +using std::vector; + +#include <fst/fst.h> +#include <fst/lookahead-matcher.h> + + +namespace fst { + +// Identifies and verifies the capabilities of the matcher to be used for +// lookahead with the composition filters below. This version is passed +// the matchers. +template <class M1, class M2> +MatchType LookAheadMatchType(const M1 &m1, const M2 &m2) { + MatchType type1 = m1.Type(false); + MatchType type2 = m2.Type(false); + if (type1 == MATCH_OUTPUT && + m1.Flags() & kOutputLookAheadMatcher) + return MATCH_OUTPUT; + else if (type2 == MATCH_INPUT && + m2.Flags() & kInputLookAheadMatcher) + return MATCH_INPUT; + else if (m1.Flags() & kOutputLookAheadMatcher && + m1.Type(true) == MATCH_OUTPUT) + return MATCH_OUTPUT; + else if (m2.Flags() & kInputLookAheadMatcher && + m2.Type(true) == MATCH_INPUT) + return MATCH_INPUT; + else + return MATCH_NONE; +} + +// Identifies and verifies the capabilities of the matcher to be used for +// lookahead with the composition filters below. This version uses the +// Fst's default matchers. +template <class Arc> +MatchType LookAheadMatchType(const Fst<Arc> &fst1, const Fst<Arc> &fst2) { + LookAheadMatcher< Fst <Arc> > matcher1(fst1, MATCH_OUTPUT); + LookAheadMatcher< Fst <Arc> > matcher2(fst2, MATCH_INPUT); + return LookAheadMatchType(matcher1, matcher2); +} + +// +// LookAheadSelector - a helper class for selecting among possibly +// distinct FST and matcher types w/o using a common base class. This +// lets us avoid virtual function calls. +// + +// Stores and returns the appropriate FST and matcher for lookahead. +// It is templated on the matcher types. General case has no methods +// since not currently supported. +template <class M1, class M2, MatchType MT> +class LookAheadSelector { +}; + +// Stores and returns the appropriate FST and matcher for lookahead. +// Specialized for two matchers of same type with the (match) 'type' +// arg determining which is used for lookahead. +template <class M, MatchType MT> +class LookAheadSelector<M, M, MT> { + public: + typedef typename M::Arc Arc; + typedef typename M::FST F; + + LookAheadSelector(M *lmatcher1, M *lmatcher2, MatchType type) + : lmatcher1_(lmatcher1->Copy()), + lmatcher2_(lmatcher2->Copy()), + type_(type) {} + + LookAheadSelector(const LookAheadSelector<M, M, MT> &selector) + : lmatcher1_(selector.lmatcher1_->Copy()), + lmatcher2_(selector.lmatcher2_->Copy()), + type_(selector.type_) {} + + ~LookAheadSelector() { + delete lmatcher1_; + delete lmatcher2_; + } + + const F &GetFst() const { + return type_ == MATCH_OUTPUT ? lmatcher2_->GetFst() : + lmatcher1_->GetFst(); + } + + M *GetMatcher() const { + return type_ == MATCH_OUTPUT ? lmatcher1_ : lmatcher2_; + } + + private: + M *lmatcher1_; + M *lmatcher2_; + MatchType type_; + + void operator=(const LookAheadSelector<M, M, MT> &); // disallow +}; + +// Stores and returns the appropriate FST and matcher for lookahead. +// Specialized for lookahead on input labels. +template <class M1, class M2> +class LookAheadSelector<M1, M2, MATCH_INPUT> { + public: + typedef typename M1::FST F1; + + LookAheadSelector(M1 *lmatcher1, M2 *lmatcher2, MatchType) + : fst_(lmatcher1->GetFst().Copy()), + lmatcher_(lmatcher2->Copy()) {} + + LookAheadSelector(const LookAheadSelector<M1, M2, MATCH_INPUT> &selector) + : fst_(selector.fst_->Copy()), + lmatcher_(selector.lmatcher_->Copy()) {} + + ~LookAheadSelector() { + delete lmatcher_; + delete fst_; + } + + const F1 &GetFst() const { return *fst_; } + + M2 *GetMatcher() const { return lmatcher_; } + + private: + const F1 *fst_; + M2 *lmatcher_; + + void operator=(const LookAheadSelector<M1, M2, MATCH_INPUT> &); // disallow +}; + + +// Stores and returns the appropriate FST and matcher for lookahead. +// Specialized for lookahead on output labels. +template <class M1, class M2> +class LookAheadSelector<M1, M2, MATCH_OUTPUT> { + public: + typedef typename M2::FST F2; + + LookAheadSelector(M1 *lmatcher1, M2 *lmatcher2, MatchType) + : fst_(lmatcher2->GetFst().Copy()), + lmatcher_(lmatcher1->Copy()) {} + + LookAheadSelector(const LookAheadSelector<M1, M2, MATCH_OUTPUT> &selector) + : fst_(selector.fst_->Copy()), + lmatcher_(selector.lmatcher_->Copy()) {} + + ~LookAheadSelector() { + delete lmatcher_; + delete fst_; + } + + const F2 &GetFst() const { return *fst_; } + + M1 *GetMatcher() const { return lmatcher_; } + + private: + const F2 *fst_; + M1 *lmatcher_; + + void operator=(const LookAheadSelector<M1, M2, MATCH_OUTPUT> &); // disallow +}; + +// This filter uses a lookahead matcher in FilterArc(arc1, arc2) to +// examine the future of the composition state (arc1.nextstate, +// arc2.nextstate), blocking moving forward when its determined to be +// non-coaccessible. It is templated on an underlying filter, +// typically the epsilon filter. Which matcher is the lookahead +// matcher is determined by the template argument MT unless it is +// MATCH_BOTH. In that case, both matcher arguments must be lookahead +// matchers of the same type and one will be selected by +// LookAheadMatchType() based on their capability. +template <class F, + class M1 = LookAheadMatcher<typename F::FST1>, + class M2 = M1, + MatchType MT = MATCH_BOTH> +class LookAheadComposeFilter { + public: + typedef typename F::FST1 FST1; + typedef typename F::FST2 FST2; + typedef typename F::Arc Arc; + typedef typename F::Matcher1 Matcher1; + typedef typename F::Matcher2 Matcher2; + typedef typename F::FilterState FilterState; + typedef LookAheadComposeFilter<F, M1, M2, MT> Filter; + + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + LookAheadComposeFilter(const FST1 &fst1, const FST2 &fst2, + M1 *matcher1, M2 *matcher2) + : filter_(fst1, fst2, matcher1, matcher2), + lookahead_type_(MT == MATCH_BOTH ? + LookAheadMatchType(*filter_.GetMatcher1(), + *filter_.GetMatcher2()) : MT), + selector_(filter_.GetMatcher1(), filter_.GetMatcher2(), + lookahead_type_), + flags_(lookahead_type_ == MATCH_OUTPUT ? + filter_.GetMatcher1()->Flags() : + filter_.GetMatcher2()->Flags()) { + if (lookahead_type_ == MATCH_NONE) { + FSTERROR() << "LookAheadComposeFilter: 1st argument cannot " + << "match/look-ahead on output labels and 2nd argument " + << "cannot match/look-ahead on input labels."; + } + selector_.GetMatcher()->InitLookAheadFst(selector_.GetFst()); + } + + LookAheadComposeFilter(const LookAheadComposeFilter<F, M1, M2, MT> &filter, + bool safe = false) + : filter_(filter.filter_, safe), + lookahead_type_(filter.lookahead_type_), + selector_(filter_.GetMatcher1(), filter_.GetMatcher2(), + lookahead_type_), + flags_(filter.flags_) { + selector_.GetMatcher()->InitLookAheadFst(selector_.GetFst(), true); + } + + FilterState Start() const { + return filter_.Start(); + } + + void SetState(StateId s1, StateId s2, const FilterState &f) { + filter_.SetState(s1, s2, f); + } + + FilterState FilterArc(Arc *arc1, Arc *arc2) const { + lookahead_arc_ = false; + + const FilterState &f = filter_.FilterArc(arc1, arc2); + if (f == FilterState::NoState()) + return FilterState::NoState(); + + return LookAheadOutput() ? LookAheadFilterArc(arc1, arc2, f) : + LookAheadFilterArc(arc2, arc1, f); + } + + void FilterFinal(Weight *weight1, Weight *weight2) const { + filter_.FilterFinal(weight1, weight2); + } + + // Return resp matchers. Ownership stays with filter. + Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); } + Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); } + + const LookAheadSelector<Matcher1, Matcher2, MT> &Selector() const { + return selector_; + } + + uint64 Properties(uint64 inprops) const { + uint64 outprops = filter_.Properties(inprops); + if (lookahead_type_ == MATCH_NONE) + outprops |= kError; + return outprops; + } + + uint32 LookAheadFlags() const { return flags_; } + + bool LookAheadArc() const { return lookahead_arc_; } + + bool LookAheadOutput() const { + if (MT == MATCH_OUTPUT) + return true; + else if (MT == MATCH_INPUT) + return false; + else if (lookahead_type_ == MATCH_OUTPUT) + return true; + else + return false; + } + + private: + FilterState LookAheadFilterArc(Arc *arca, Arc *arcb, + const FilterState &f) const { + Label &labela = LookAheadOutput() ? arca->olabel : arca->ilabel; + + if (labela != 0 && !(flags_ & kLookAheadNonEpsilons)) + return f; + if (labela == 0 && !(flags_ & kLookAheadEpsilons)) + return f; + + lookahead_arc_ = true; + selector_.GetMatcher()->SetState(arca->nextstate); + + return selector_.GetMatcher()->LookAheadFst(selector_.GetFst(), + arcb->nextstate) ? f : + FilterState::NoState(); + } + + F filter_; // Underlying filter + MatchType lookahead_type_; // Lookahead match type + LookAheadSelector<Matcher1, Matcher2, MT> selector_; + uint32 flags_; // Lookahead flags + mutable bool lookahead_arc_; // Look-ahead performed at last FilterArc()? + + void operator=(const LookAheadComposeFilter<F, M1, M2> &); // disallow +}; + + +// This filter adds weight-pushing to a lookahead composition filter +// using the LookAheadWeight() method of matcher argument. It is +// templated on an underlying lookahead filter, typically the basic +// lookahead filter. Weight-pushing in composition brings weights +// forward as much as possible based on the lookahead information. +template <class F, + class M1 = LookAheadMatcher<typename F::FST1>, + class M2 = M1, + MatchType MT = MATCH_BOTH> +class PushWeightsComposeFilter { + public: + typedef typename F::FST1 FST1; + typedef typename F::FST2 FST2; + typedef typename F::Arc Arc; + typedef typename F::Matcher1 Matcher1; + typedef typename F::Matcher2 Matcher2; + typedef typename F::FilterState FilterState1; + typedef WeightFilterState<typename Arc::Weight> FilterState2; + typedef PairFilterState<FilterState1, FilterState2> FilterState; + + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + PushWeightsComposeFilter(const FST1 &fst1, const FST2 &fst2, + M1 *matcher1, M2 *matcher2) + : filter_(fst1, fst2, matcher1, matcher2), + f_(FilterState::NoState()) {} + + PushWeightsComposeFilter(const PushWeightsComposeFilter<F, M1, M2, MT> + &filter, + bool safe = false) + : filter_(filter.filter_, safe), + f_(FilterState::NoState()) {} + + FilterState Start() const { + return FilterState(filter_.Start(), FilterState2(Weight::One())); + } + + void SetState(StateId s1, StateId s2, const FilterState &f) { + f_ = f; + filter_.SetState(s1, s2, f.GetState1()); + } + + FilterState FilterArc(Arc *arc1, Arc *arc2) const { + const FilterState1 &f1 = filter_.FilterArc(arc1, arc2); + if (f1 == FilterState1::NoState()) + return FilterState::NoState(); + + if (!(LookAheadFlags() & kLookAheadWeight)) + return FilterState(f1, FilterState2(Weight::One())); + + const Weight &lweight = filter_.LookAheadArc() ? + Selector().GetMatcher()->LookAheadWeight() : Weight::One(); + const FilterState2 &f2 = f_.GetState2(); + const Weight &fweight = f2.GetWeight(); + + arc2->weight = Divide(Times(arc2->weight, lweight), fweight); + return FilterState(f1, FilterState2(lweight)); + } + + void FilterFinal(Weight *weight1, Weight *weight2) const { + filter_.FilterFinal(weight1, weight2); + if (!(LookAheadFlags() & kLookAheadWeight) || *weight1 == Weight::Zero()) + return; + + const FilterState2 &f2 = f_.GetState2(); + const Weight &fweight = f2.GetWeight(); + *weight1 = Divide(*weight1, fweight); + } + // Return resp matchers. Ownership states with filter. + Matcher1 *GetMatcher1() { return filter_.GetMatcher1(); } + Matcher2 *GetMatcher2() { return filter_.GetMatcher2(); } + + const LookAheadSelector<Matcher1, Matcher2, MT> &Selector() const { + return filter_.Selector(); + } + + uint32 LookAheadFlags() const { return filter_.LookAheadFlags(); } + bool LookAheadArc() const { return filter_.LookAheadArc(); } + bool LookAheadOutput() const { return filter_.LookAheadOutput(); } + + uint64 Properties(uint64 props) const { + return filter_.Properties(props) & kWeightInvariantProperties; + } + + private: + F filter_; // Underlying filter + FilterState f_; // Current filter state + + void operator=(const PushWeightsComposeFilter<F, M1, M2, MT> &); // disallow +}; + +// This filter adds label-pushing to a lookahead composition filter +// using the LookAheadPrefix() method of the matcher argument. It is +// templated on an underlying filter, typically the basic lookahead +// or weight-pushing lookahead filter. Label-pushing in composition +// matches labels as early as possible based on the lookahead +// information. +template <class F, + class M1 = LookAheadMatcher<typename F::FST1>, + class M2 = M1, + MatchType MT = MATCH_BOTH> +class PushLabelsComposeFilter { + public: + typedef typename F::FST1 FST1; + typedef typename F::FST2 FST2; + typedef typename F::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + typedef MultiEpsMatcher<typename F::Matcher1> Matcher1; + typedef MultiEpsMatcher<typename F::Matcher2> Matcher2; + typedef typename F::FilterState FilterState1; + typedef IntegerFilterState<typename Arc::Label> FilterState2; + typedef PairFilterState<FilterState1, FilterState2> FilterState; + + PushLabelsComposeFilter(const FST1 &fst1, const FST2 &fst2, + M1 *matcher1, M2 *matcher2) + : filter_(fst1, fst2, matcher1, matcher2), + f_(FilterState::NoState()), + fst1_(filter_.GetMatcher1()->GetFst()), + fst2_(filter_.GetMatcher2()->GetFst()), + matcher1_(fst1_, MATCH_OUTPUT, + filter_.LookAheadOutput() ? kMultiEpsList : kMultiEpsLoop, + filter_.GetMatcher1(), + false), + matcher2_(fst2_, MATCH_INPUT, + filter_.LookAheadOutput() ? kMultiEpsLoop : kMultiEpsList, + filter_.GetMatcher2(), + false) {} + + PushLabelsComposeFilter(const PushLabelsComposeFilter<F, M1, M2, MT> &filter, + bool safe = false) + : filter_(filter.filter_, safe), + f_(FilterState::NoState()), + fst1_(filter_.GetMatcher1()->GetFst()), + fst2_(filter_.GetMatcher2()->GetFst()), + matcher1_(fst1_, MATCH_OUTPUT, + filter_.LookAheadOutput() ? kMultiEpsList : kMultiEpsLoop, + filter_.GetMatcher1(), + false), + matcher2_(fst2_, MATCH_INPUT, + filter_.LookAheadOutput() ? kMultiEpsLoop : kMultiEpsList, + filter_.GetMatcher2(), + false) { + } + + FilterState Start() const { + return FilterState(filter_.Start(), FilterState2(kNoLabel)); + } + + void SetState(StateId s1, StateId s2, const FilterState &f) { + f_ = f; + filter_.SetState(s1, s2, f.GetState1()); + if (!(LookAheadFlags() & kLookAheadPrefix)) + return; + + narcsa_ = LookAheadOutput() ? internal::NumArcs(fst1_, s1) + : internal::NumArcs(fst2_, s2); + + const FilterState2 &f2 = f_.GetState2(); + const Label &flabel = f2.GetState(); + + GetMatcher1()->ClearMultiEpsLabels(); + GetMatcher2()->ClearMultiEpsLabels(); + if (flabel != kNoLabel) { // Have a lookahead label? + GetMatcher1()->AddMultiEpsLabel(flabel); // Yes, make it a multi-epsilon + GetMatcher2()->AddMultiEpsLabel(flabel); // label so that it matches the + } // implicit epsilon arc to be + } // modified below when pushing. + + FilterState FilterArc(Arc *arc1, Arc *arc2) const { + if (!(LookAheadFlags() & kLookAheadPrefix)) + return FilterState(filter_.FilterArc(arc1, arc2), + FilterState2(kNoLabel)); + + const FilterState2 &f2 = f_.GetState2(); + const Label &flabel = f2.GetState(); + if (flabel != kNoLabel) // Have a lookahead label? + return LookAheadOutput() ? PushedLabelFilterArc(arc1, arc2, flabel) : + PushedLabelFilterArc(arc2, arc1, flabel); + + const FilterState1 &f1 = filter_.FilterArc(arc1, arc2); + if (f1 == FilterState1::NoState()) + return FilterState::NoState(); + + if (!filter_.LookAheadArc()) + return FilterState(f1, FilterState2(kNoLabel)); + + return LookAheadOutput() ? PushLabelFilterArc(arc1, arc2, f1) : + PushLabelFilterArc(arc2, arc1, f1); + } + + void FilterFinal(Weight *weight1, Weight *weight2) const { + filter_.FilterFinal(weight1, weight2); + if (!(LookAheadFlags() & kLookAheadPrefix) || + *weight1 == Weight::Zero()) + return; + + const FilterState2 &f2 = f_.GetState2(); + const Label &flabel = f2.GetState(); + if (flabel != kNoLabel) + *weight1 = Weight::Zero(); + } + + // Return resp matchers. Ownership states with filter. + Matcher1 *GetMatcher1() { return &matcher1_; } + Matcher2 *GetMatcher2() { return &matcher2_; } + + uint64 Properties(uint64 iprops) const { + uint64 oprops = filter_.Properties(iprops); + if (LookAheadOutput()) + return oprops & kOLabelInvariantProperties; + else + return oprops & kILabelInvariantProperties; + } + + private: + const LookAheadSelector<typename F::Matcher1, typename F::Matcher2, MT> + &Selector() const { + return filter_.Selector(); + } + + // Consumes an already pushed label. + FilterState PushedLabelFilterArc(Arc *arca, Arc *arcb, + Label flabel) const { + Label &labela = LookAheadOutput() ? arca->olabel : arca->ilabel; + const Label &labelb = LookAheadOutput() ? arcb->ilabel : arcb->olabel; + + if (labelb != kNoLabel) { + return FilterState::NoState(); // Block non- (multi-) epsilon label + } else if (labela == flabel) { + labela = 0; // Convert match to multi-eps to eps + return Start(); + } else if (labela == 0) { + if (narcsa_ == 1) + return f_; // Take eps; keep state w/ label + Selector().GetMatcher()->SetState(arca->nextstate); + if (Selector().GetMatcher()->LookAheadLabel(flabel)) + return f_; // Take eps; keep state w/ label + else + return FilterState::NoState(); // Block non-coaccessible path + } else { + return FilterState::NoState(); // Block mismatch to multi-eps label + } + } + + // Pushes a label forward when possible. + FilterState PushLabelFilterArc(Arc *arca, Arc *arcb, + const FilterState1 &f1) const { + Label &labela = LookAheadOutput() ? arca->olabel : arca->ilabel; + const Label &labelb = LookAheadOutput() ? arcb->olabel : arcb->ilabel; + + if (labelb != 0) // No place to push. + return FilterState(f1, FilterState2(kNoLabel)); + if (labela != 0 && // Wrong lookahead prefix type? + LookAheadFlags() & kLookAheadNonEpsilonPrefix) + return FilterState(f1, FilterState2(kNoLabel)); + + Arc larc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId); + + if (Selector().GetMatcher()->LookAheadPrefix(&larc)) { // Have prefix arc? + labela = LookAheadOutput() ? larc.ilabel : larc.olabel; + arcb->ilabel = larc.ilabel; // Yes, go forward on that arc, + arcb->olabel = larc.olabel; // thus pushing the label. + arcb->weight = Times(arcb->weight, larc.weight); + arcb->nextstate = larc.nextstate; + return FilterState(f1, FilterState2(labela)); + } else { + return FilterState(f1, FilterState2(kNoLabel)); + } + } + + uint32 LookAheadFlags() const { return filter_.LookAheadFlags(); } + bool LookAheadArc() const { return filter_.LookAheadArc(); } + bool LookAheadOutput() const { return filter_.LookAheadOutput(); } + + F filter_; // Underlying filter + FilterState f_ ; // Current filter state + const FST1 &fst1_; + const FST2 &fst2_; + Matcher1 matcher1_; // Multi-epsilon matcher for fst1 + Matcher2 matcher2_; // Multi-epsilon matcher for fst2 + ssize_t narcsa_; // Number of arcs leaving look-ahead match FST + + void operator=(const PushLabelsComposeFilter<F, M1, M2, MT> &); // disallow +}; + +// +// CONVENIENCE CLASS useful for setting up composition with a default +// look-ahead matcher and filter. +// + +template <class A, MatchType type> // MATCH_NONE +class DefaultLookAhead { + public: + typedef Matcher< Fst<A> > M; + typedef SequenceComposeFilter<M> ComposeFilter; + typedef M FstMatcher; +}; + +// Specializes for MATCH_INPUT to allow lookahead. +template <class A> +class DefaultLookAhead<A, MATCH_INPUT> { + public: + typedef LookAheadMatcher< Fst<A> > M; + typedef SequenceComposeFilter<M> SF; + typedef LookAheadComposeFilter<SF, M> ComposeFilter; + typedef M FstMatcher; +}; + +// Specializes for MATCH_OUTPUT to allow lookahead. +template <class A> +class DefaultLookAhead<A, MATCH_OUTPUT> { + public: + typedef LookAheadMatcher< Fst<A> > M; + typedef AltSequenceComposeFilter<M> SF; + typedef LookAheadComposeFilter<SF, M> ComposeFilter; + typedef M FstMatcher; +}; + +// Specializes for StdArc to allow weight and label pushing. +template <> +class DefaultLookAhead<StdArc, MATCH_INPUT> { + public: + typedef StdArc A; + typedef LookAheadMatcher< Fst<A> > M; + typedef SequenceComposeFilter<M> SF; + typedef LookAheadComposeFilter<SF, M> LF; + typedef PushWeightsComposeFilter<LF, M> WF; + typedef PushLabelsComposeFilter<WF, M> ComposeFilter; + typedef M FstMatcher; +}; + +// Specializes for StdArc to allow weight and label pushing. +template <> +class DefaultLookAhead<StdArc, MATCH_OUTPUT> { + public: + typedef StdArc A; + typedef LookAheadMatcher< Fst<A> > M; + typedef AltSequenceComposeFilter<M> SF; + typedef LookAheadComposeFilter<SF, M> LF; + typedef PushWeightsComposeFilter<LF, M> WF; + typedef PushLabelsComposeFilter<WF, M> ComposeFilter; + typedef M FstMatcher; +}; + +// Specializes for LogArc to allow weight and label pushing. +template <> +class DefaultLookAhead<LogArc, MATCH_INPUT> { + public: + typedef LogArc A; + typedef LookAheadMatcher< Fst<A> > M; + typedef SequenceComposeFilter<M> SF; + typedef LookAheadComposeFilter<SF, M> LF; + typedef PushWeightsComposeFilter<LF, M> WF; + typedef PushLabelsComposeFilter<WF, M> ComposeFilter; + typedef M FstMatcher; +}; + +// Specializes for LogArc to allow weight and label pushing. +template <> +class DefaultLookAhead<LogArc, MATCH_OUTPUT> { + public: + typedef LogArc A; + typedef LookAheadMatcher< Fst<A> > M; + typedef AltSequenceComposeFilter<M> SF; + typedef LookAheadComposeFilter<SF, M> LF; + typedef PushWeightsComposeFilter<LF, M> WF; + typedef PushLabelsComposeFilter<WF, M> ComposeFilter; + typedef M FstMatcher; +}; + +} // namespace fst + +#endif // FST_LIB_LOOKAHEAD_FILTER_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/lookahead-matcher.h b/kaldi_io/src/tools/openfst/include/fst/lookahead-matcher.h new file mode 100644 index 0000000..f927d65 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/lookahead-matcher.h @@ -0,0 +1,812 @@ +// lookahead-matcher.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Classes to add lookahead to FST matchers, useful e.g. for improving +// composition efficiency with certain inputs. + +#ifndef FST_LIB_LOOKAHEAD_MATCHER_H__ +#define FST_LIB_LOOKAHEAD_MATCHER_H__ + +#include <fst/add-on.h> +#include <fst/const-fst.h> +#include <fst/fst.h> +#include <fst/label-reachable.h> +#include <fst/matcher.h> + + +DECLARE_string(save_relabel_ipairs); +DECLARE_string(save_relabel_opairs); + +namespace fst { + +// LOOKAHEAD MATCHERS - these have the interface of Matchers (see +// matcher.h) and these additional methods: +// +// template <class F> +// class LookAheadMatcher { +// public: +// typedef F FST; +// typedef F::Arc Arc; +// typedef typename Arc::StateId StateId; +// typedef typename Arc::Label Label; +// typedef typename Arc::Weight Weight; +// +// // Required constructors. +// LookAheadMatcher(const F &fst, MatchType match_type); +// // If safe=true, the copy is thread-safe (except the lookahead Fst is +// // preserved). See Fst<>::Cop() for further doc. +// LookAheadMatcher(const LookAheadMatcher &matcher, bool safe = false); +// +// Below are methods for looking ahead for a match to a label and +// more generally, to a rational set. Each returns false if there is +// definitely not a match and returns true if there possibly is a +// match. + +// // LABEL LOOKAHEAD: Can 'label' be read from the current matcher state +// // after possibly following epsilon transitions? +// bool LookAheadLabel(Label label) const; +// +// // RATIONAL LOOKAHEAD: The next methods allow looking ahead for an +// // arbitrary rational set of strings, specified by an FST and a state +// // from which to begin the matching. If the lookahead FST is a +// // transducer, this looks on the side different from the matcher +// // 'match_type' (cf. composition). +// +// // Are there paths P from 's' in the lookahead FST that can be read from +// // the cur. matcher state? +// bool LookAheadFst(const Fst<Arc>& fst, StateId s); +// +// // Gives an estimate of the combined weight of the paths P in the +// // lookahead and matcher FSTs for the last call to LookAheadFst. +// // A trivial implementation returns Weight::One(). Non-trivial +// // implementations are useful for weight-pushing in composition. +// Weight LookAheadWeight() const; +// +// // Is there is a single non-epsilon arc found in the lookahead FST +// // that begins P (after possibly following any epsilons) in the last +// // call LookAheadFst? If so, return true and copy it to '*arc', o.w. +// // return false. A trivial implementation returns false. Non-trivial +// // implementations are useful for label-pushing in composition. +// bool LookAheadPrefix(Arc *arc); +// +// // Optionally pre-specifies the lookahead FST that will be passed +// // to LookAheadFst() for possible precomputation. If copy is true, +// // then 'fst' is a copy of the FST used in the previous call to +// // this method (useful to avoid unnecessary updates). +// void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false); +// +// }; + +// +// LOOK-AHEAD FLAGS (see also kMatcherFlags in matcher.h): +// +// Matcher is a lookahead matcher when 'match_type' is MATCH_INPUT. +const uint32 kInputLookAheadMatcher = 0x00000010; + +// Matcher is a lookahead matcher when 'match_type' is MATCH_OUTPUT. +const uint32 kOutputLookAheadMatcher = 0x00000020; + +// A non-trivial implementation of LookAheadWeight() method defined and +// should be used? +const uint32 kLookAheadWeight = 0x00000040; + +// A non-trivial implementation of LookAheadPrefix() method defined and +// should be used? +const uint32 kLookAheadPrefix = 0x00000080; + +// Look-ahead of matcher FST non-epsilon arcs? +const uint32 kLookAheadNonEpsilons = 0x00000100; + +// Look-ahead of matcher FST epsilon arcs? +const uint32 kLookAheadEpsilons = 0x00000200; + +// Ignore epsilon paths for the lookahead prefix? Note this gives +// correct results in composition only with an appropriate composition +// filter since it depends on the filter blocking the ignored paths. +const uint32 kLookAheadNonEpsilonPrefix = 0x00000400; + +// For LabelLookAheadMatcher, save relabeling data to file +const uint32 kLookAheadKeepRelabelData = 0x00000800; + +// Flags used for lookahead matchers. +const uint32 kLookAheadFlags = 0x00000ff0; + +// LookAhead Matcher interface, templated on the Arc definition; used +// for lookahead matcher specializations that are returned by the +// InitMatcher() Fst method. +template <class A> +class LookAheadMatcherBase : public MatcherBase<A> { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + + LookAheadMatcherBase() + : weight_(Weight::One()), + prefix_arc_(kNoLabel, kNoLabel, Weight::One(), kNoStateId) {} + + virtual ~LookAheadMatcherBase() {} + + bool LookAheadLabel(Label label) const { return LookAheadLabel_(label); } + + bool LookAheadFst(const Fst<Arc> &fst, StateId s) { + return LookAheadFst_(fst, s); + } + + Weight LookAheadWeight() const { return weight_; } + + bool LookAheadPrefix(Arc *arc) const { + if (prefix_arc_.nextstate != kNoStateId) { + *arc = prefix_arc_; + return true; + } else { + return false; + } + } + + virtual void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) = 0; + + protected: + void SetLookAheadWeight(const Weight &w) { weight_ = w; } + + void SetLookAheadPrefix(const Arc &arc) { prefix_arc_ = arc; } + + void ClearLookAheadPrefix() { prefix_arc_.nextstate = kNoStateId; } + + private: + virtual bool LookAheadLabel_(Label label) const = 0; + virtual bool LookAheadFst_(const Fst<Arc> &fst, + StateId s) = 0; // This must set l.a. weight and + // prefix if non-trivial. + Weight weight_; // Look-ahead weight + Arc prefix_arc_; // Look-ahead prefix arc +}; + + +// Don't really lookahead, just declare future looks good regardless. +template <class M> +class TrivialLookAheadMatcher + : public LookAheadMatcherBase<typename M::FST::Arc> { + public: + typedef typename M::FST FST; + typedef typename M::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + TrivialLookAheadMatcher(const FST &fst, MatchType match_type) + : matcher_(fst, match_type) {} + + TrivialLookAheadMatcher(const TrivialLookAheadMatcher<M> &lmatcher, + bool safe = false) + : matcher_(lmatcher.matcher_, safe) {} + + // General matcher methods + TrivialLookAheadMatcher<M> *Copy(bool safe = false) const { + return new TrivialLookAheadMatcher<M>(*this, safe); + } + + MatchType Type(bool test) const { return matcher_.Type(test); } + void SetState(StateId s) { return matcher_.SetState(s); } + bool Find(Label label) { return matcher_.Find(label); } + bool Done() const { return matcher_.Done(); } + const Arc& Value() const { return matcher_.Value(); } + void Next() { matcher_.Next(); } + virtual const FST &GetFst() const { return matcher_.GetFst(); } + uint64 Properties(uint64 props) const { return matcher_.Properties(props); } + uint32 Flags() const { + return matcher_.Flags() | kInputLookAheadMatcher | kOutputLookAheadMatcher; + } + + // Look-ahead methods. + bool LookAheadLabel(Label label) const { return true; } + bool LookAheadFst(const Fst<Arc> &fst, StateId s) {return true; } + Weight LookAheadWeight() const { return Weight::One(); } + bool LookAheadPrefix(Arc *arc) const { return false; } + void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) {} + + private: + // This allows base class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual void SetState_(StateId s) { SetState(s); } + virtual bool Find_(Label label) { return Find(label); } + virtual bool Done_() const { return Done(); } + virtual const Arc& Value_() const { return Value(); } + virtual void Next_() { Next(); } + + bool LookAheadLabel_(Label l) const { return LookAheadLabel(l); } + + bool LookAheadFst_(const Fst<Arc> &fst, StateId s) { + return LookAheadFst(fst, s); + } + + Weight LookAheadWeight_() const { return LookAheadWeight(); } + bool LookAheadPrefix_(Arc *arc) const { return LookAheadPrefix(arc); } + + M matcher_; +}; + +// Look-ahead of one transition. Template argument F accepts flags to +// control behavior. +template <class M, uint32 F = kLookAheadNonEpsilons | kLookAheadEpsilons | + kLookAheadWeight | kLookAheadPrefix> +class ArcLookAheadMatcher + : public LookAheadMatcherBase<typename M::FST::Arc> { + public: + typedef typename M::FST FST; + typedef typename M::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + typedef NullAddOn MatcherData; + + using LookAheadMatcherBase<Arc>::LookAheadWeight; + using LookAheadMatcherBase<Arc>::SetLookAheadPrefix; + using LookAheadMatcherBase<Arc>::SetLookAheadWeight; + using LookAheadMatcherBase<Arc>::ClearLookAheadPrefix; + + ArcLookAheadMatcher(const FST &fst, MatchType match_type, + MatcherData *data = 0) + : matcher_(fst, match_type), + fst_(matcher_.GetFst()), + lfst_(0), + s_(kNoStateId) {} + + ArcLookAheadMatcher(const ArcLookAheadMatcher<M, F> &lmatcher, + bool safe = false) + : matcher_(lmatcher.matcher_, safe), + fst_(matcher_.GetFst()), + lfst_(lmatcher.lfst_), + s_(kNoStateId) {} + + // General matcher methods + ArcLookAheadMatcher<M, F> *Copy(bool safe = false) const { + return new ArcLookAheadMatcher<M, F>(*this, safe); + } + + MatchType Type(bool test) const { return matcher_.Type(test); } + + void SetState(StateId s) { + s_ = s; + matcher_.SetState(s); + } + + bool Find(Label label) { return matcher_.Find(label); } + bool Done() const { return matcher_.Done(); } + const Arc& Value() const { return matcher_.Value(); } + void Next() { matcher_.Next(); } + const FST &GetFst() const { return fst_; } + uint64 Properties(uint64 props) const { return matcher_.Properties(props); } + uint32 Flags() const { + return matcher_.Flags() | kInputLookAheadMatcher | + kOutputLookAheadMatcher | F; + } + + // Writable matcher methods + MatcherData *GetData() const { return 0; } + + // Look-ahead methods. + bool LookAheadLabel(Label label) const { return matcher_.Find(label); } + + // Checks if there is a matching (possibly super-final) transition + // at (s_, s). + bool LookAheadFst(const Fst<Arc> &fst, StateId s); + + void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) { + lfst_ = &fst; + } + + private: + // This allows base class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual void SetState_(StateId s) { SetState(s); } + virtual bool Find_(Label label) { return Find(label); } + virtual bool Done_() const { return Done(); } + virtual const Arc& Value_() const { return Value(); } + virtual void Next_() { Next(); } + + bool LookAheadLabel_(Label l) const { return LookAheadLabel(l); } + bool LookAheadFst_(const Fst<Arc> &fst, StateId s) { + return LookAheadFst(fst, s); + } + + mutable M matcher_; + const FST &fst_; // Matcher FST + const Fst<Arc> *lfst_; // Look-ahead FST + StateId s_; // Matcher state +}; + +template <class M, uint32 F> +bool ArcLookAheadMatcher<M, F>::LookAheadFst(const Fst<Arc> &fst, StateId s) { + if (&fst != lfst_) + InitLookAheadFst(fst); + + bool ret = false; + ssize_t nprefix = 0; + if (F & kLookAheadWeight) + SetLookAheadWeight(Weight::Zero()); + if (F & kLookAheadPrefix) + ClearLookAheadPrefix(); + if (fst_.Final(s_) != Weight::Zero() && + lfst_->Final(s) != Weight::Zero()) { + if (!(F & (kLookAheadWeight | kLookAheadPrefix))) + return true; + ++nprefix; + if (F & kLookAheadWeight) + SetLookAheadWeight(Plus(LookAheadWeight(), + Times(fst_.Final(s_), lfst_->Final(s)))); + ret = true; + } + if (matcher_.Find(kNoLabel)) { + if (!(F & (kLookAheadWeight | kLookAheadPrefix))) + return true; + ++nprefix; + if (F & kLookAheadWeight) + for (; !matcher_.Done(); matcher_.Next()) + SetLookAheadWeight(Plus(LookAheadWeight(), matcher_.Value().weight)); + ret = true; + } + for (ArcIterator< Fst<Arc> > aiter(*lfst_, s); + !aiter.Done(); + aiter.Next()) { + const Arc &arc = aiter.Value(); + Label label = kNoLabel; + switch (matcher_.Type(false)) { + case MATCH_INPUT: + label = arc.olabel; + break; + case MATCH_OUTPUT: + label = arc.ilabel; + break; + default: + FSTERROR() << "ArcLookAheadMatcher::LookAheadFst: bad match type"; + return true; + } + if (label == 0) { + if (!(F & (kLookAheadWeight | kLookAheadPrefix))) + return true; + if (!(F & kLookAheadNonEpsilonPrefix)) + ++nprefix; + if (F & kLookAheadWeight) + SetLookAheadWeight(Plus(LookAheadWeight(), arc.weight)); + ret = true; + } else if (matcher_.Find(label)) { + if (!(F & (kLookAheadWeight | kLookAheadPrefix))) + return true; + for (; !matcher_.Done(); matcher_.Next()) { + ++nprefix; + if (F & kLookAheadWeight) + SetLookAheadWeight(Plus(LookAheadWeight(), + Times(arc.weight, + matcher_.Value().weight))); + if ((F & kLookAheadPrefix) && nprefix == 1) + SetLookAheadPrefix(arc); + } + ret = true; + } + } + if (F & kLookAheadPrefix) { + if (nprefix == 1) + SetLookAheadWeight(Weight::One()); // Avoids double counting. + else + ClearLookAheadPrefix(); + } + return ret; +} + + +// Template argument F accepts flags to control behavior. +// It must include precisely one of KInputLookAheadMatcher or +// KOutputLookAheadMatcher. +template <class M, uint32 F = kLookAheadEpsilons | kLookAheadWeight | + kLookAheadPrefix | kLookAheadNonEpsilonPrefix | + kLookAheadKeepRelabelData, + class S = DefaultAccumulator<typename M::Arc> > +class LabelLookAheadMatcher + : public LookAheadMatcherBase<typename M::FST::Arc> { + public: + typedef typename M::FST FST; + typedef typename M::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + typedef LabelReachableData<Label> MatcherData; + + using LookAheadMatcherBase<Arc>::LookAheadWeight; + using LookAheadMatcherBase<Arc>::SetLookAheadPrefix; + using LookAheadMatcherBase<Arc>::SetLookAheadWeight; + using LookAheadMatcherBase<Arc>::ClearLookAheadPrefix; + + LabelLookAheadMatcher(const FST &fst, MatchType match_type, + MatcherData *data = 0, S *s = 0) + : matcher_(fst, match_type), + lfst_(0), + label_reachable_(0), + s_(kNoStateId), + error_(false) { + if (!(F & (kInputLookAheadMatcher | kOutputLookAheadMatcher))) { + FSTERROR() << "LabelLookaheadMatcher: bad matcher flags: " << F; + error_ = true; + } + bool reach_input = match_type == MATCH_INPUT; + if (data) { + if (reach_input == data->ReachInput()) + label_reachable_ = new LabelReachable<Arc, S>(data, s); + } else if ((reach_input && (F & kInputLookAheadMatcher)) || + (!reach_input && (F & kOutputLookAheadMatcher))) { + label_reachable_ = new LabelReachable<Arc, S>( + fst, reach_input, s, F & kLookAheadKeepRelabelData); + } + } + + LabelLookAheadMatcher(const LabelLookAheadMatcher<M, F, S> &lmatcher, + bool safe = false) + : matcher_(lmatcher.matcher_, safe), + lfst_(lmatcher.lfst_), + label_reachable_( + lmatcher.label_reachable_ ? + new LabelReachable<Arc, S>(*lmatcher.label_reachable_) : 0), + s_(kNoStateId), + error_(lmatcher.error_) {} + + ~LabelLookAheadMatcher() { + delete label_reachable_; + } + + // General matcher methods + LabelLookAheadMatcher<M, F, S> *Copy(bool safe = false) const { + return new LabelLookAheadMatcher<M, F, S>(*this, safe); + } + + MatchType Type(bool test) const { return matcher_.Type(test); } + + void SetState(StateId s) { + if (s_ == s) + return; + s_ = s; + match_set_state_ = false; + reach_set_state_ = false; + } + + bool Find(Label label) { + if (!match_set_state_) { + matcher_.SetState(s_); + match_set_state_ = true; + } + return matcher_.Find(label); + } + + bool Done() const { return matcher_.Done(); } + const Arc& Value() const { return matcher_.Value(); } + void Next() { matcher_.Next(); } + const FST &GetFst() const { return matcher_.GetFst(); } + + uint64 Properties(uint64 inprops) const { + uint64 outprops = matcher_.Properties(inprops); + if (error_ || (label_reachable_ && label_reachable_->Error())) + outprops |= kError; + return outprops; + } + + uint32 Flags() const { + if (label_reachable_ && label_reachable_->GetData()->ReachInput()) + return matcher_.Flags() | F | kInputLookAheadMatcher; + else if (label_reachable_ && !label_reachable_->GetData()->ReachInput()) + return matcher_.Flags() | F | kOutputLookAheadMatcher; + else + return matcher_.Flags(); + } + + // Writable matcher methods + MatcherData *GetData() const { + return label_reachable_ ? label_reachable_->GetData() : 0; + }; + + // Look-ahead methods. + bool LookAheadLabel(Label label) const { + if (label == 0) + return true; + + if (label_reachable_) { + if (!reach_set_state_) { + label_reachable_->SetState(s_); + reach_set_state_ = true; + } + return label_reachable_->Reach(label); + } else { + return true; + } + } + + // Checks if there is a matching (possibly super-final) transition + // at (s_, s). + template <class L> + bool LookAheadFst(const L &fst, StateId s); + + void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) { + lfst_ = &fst; + if (label_reachable_) + label_reachable_->ReachInit(fst, copy); + } + + template <class L> + void InitLookAheadFst(const L& fst, bool copy = false) { + lfst_ = static_cast<const Fst<Arc> *>(&fst); + if (label_reachable_) + label_reachable_->ReachInit(fst, copy); + } + + private: + // This allows base class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual void SetState_(StateId s) { SetState(s); } + virtual bool Find_(Label label) { return Find(label); } + virtual bool Done_() const { return Done(); } + virtual const Arc& Value_() const { return Value(); } + virtual void Next_() { Next(); } + + bool LookAheadLabel_(Label l) const { return LookAheadLabel(l); } + bool LookAheadFst_(const Fst<Arc> &fst, StateId s) { + return LookAheadFst(fst, s); + } + + mutable M matcher_; + const Fst<Arc> *lfst_; // Look-ahead FST + LabelReachable<Arc, S> *label_reachable_; // Label reachability info + StateId s_; // Matcher state + bool match_set_state_; // matcher_.SetState called? + mutable bool reach_set_state_; // reachable_.SetState called? + bool error_; +}; + +template <class M, uint32 F, class S> +template <class L> inline +bool LabelLookAheadMatcher<M, F, S>::LookAheadFst(const L &fst, StateId s) { + if (static_cast<const Fst<Arc> *>(&fst) != lfst_) + InitLookAheadFst(fst); + + SetLookAheadWeight(Weight::One()); + ClearLookAheadPrefix(); + + if (!label_reachable_) + return true; + + label_reachable_->SetState(s_, s); + reach_set_state_ = true; + + bool compute_weight = F & kLookAheadWeight; + bool compute_prefix = F & kLookAheadPrefix; + + bool reach_input = Type(false) == MATCH_OUTPUT; + ArcIterator<L> aiter(fst, s); + bool reach_arc = label_reachable_->Reach(&aiter, 0, + internal::NumArcs(*lfst_, s), + reach_input, compute_weight); + Weight lfinal = internal::Final(*lfst_, s); + bool reach_final = lfinal != Weight::Zero() && label_reachable_->ReachFinal(); + if (reach_arc) { + ssize_t begin = label_reachable_->ReachBegin(); + ssize_t end = label_reachable_->ReachEnd(); + if (compute_prefix && end - begin == 1 && !reach_final) { + aiter.Seek(begin); + SetLookAheadPrefix(aiter.Value()); + compute_weight = false; + } else if (compute_weight) { + SetLookAheadWeight(label_reachable_->ReachWeight()); + } + } + if (reach_final && compute_weight) + SetLookAheadWeight(reach_arc ? + Plus(LookAheadWeight(), lfinal) : lfinal); + + return reach_arc || reach_final; +} + + +// Label-lookahead relabeling class. +template <class A> +class LabelLookAheadRelabeler { + public: + typedef typename A::Label Label; + typedef LabelReachableData<Label> MatcherData; + typedef AddOnPair<MatcherData, MatcherData> D; + + // Relabels matcher Fst - initialization function object. + template <typename I> + LabelLookAheadRelabeler(I **impl); + + // Relabels arbitrary Fst. Class L should be a label-lookahead Fst. + template <class L> + static void Relabel(MutableFst<A> *fst, const L &mfst, + bool relabel_input) { + typename L::Impl *impl = mfst.GetImpl(); + D *data = impl->GetAddOn(); + LabelReachable<A> reachable(data->First() ? + data->First() : data->Second()); + reachable.Relabel(fst, relabel_input); + } + + // Returns relabeling pairs (cf. relabel.h::Relabel()). + // Class L should be a label-lookahead Fst. + // If 'avoid_collisions' is true, extra pairs are added to + // ensure no collisions when relabeling automata that have + // labels unseen here. + template <class L> + static void RelabelPairs(const L &mfst, vector<pair<Label, Label> > *pairs, + bool avoid_collisions = false) { + typename L::Impl *impl = mfst.GetImpl(); + D *data = impl->GetAddOn(); + LabelReachable<A> reachable(data->First() ? + data->First() : data->Second()); + reachable.RelabelPairs(pairs, avoid_collisions); + } +}; + +template <class A> +template <typename I> inline +LabelLookAheadRelabeler<A>::LabelLookAheadRelabeler(I **impl) { + Fst<A> &fst = (*impl)->GetFst(); + D *data = (*impl)->GetAddOn(); + const string name = (*impl)->Type(); + bool is_mutable = fst.Properties(kMutable, false); + MutableFst<A> *mfst = 0; + if (is_mutable) { + mfst = static_cast<MutableFst<A> *>(&fst); + } else { + mfst = new VectorFst<A>(fst); + data->IncrRefCount(); + delete *impl; + } + if (data->First()) { // reach_input + LabelReachable<A> reachable(data->First()); + reachable.Relabel(mfst, true); + if (!FLAGS_save_relabel_ipairs.empty()) { + vector<pair<Label, Label> > pairs; + reachable.RelabelPairs(&pairs, true); + WriteLabelPairs(FLAGS_save_relabel_ipairs, pairs); + } + } else { + LabelReachable<A> reachable(data->Second()); + reachable.Relabel(mfst, false); + if (!FLAGS_save_relabel_opairs.empty()) { + vector<pair<Label, Label> > pairs; + reachable.RelabelPairs(&pairs, true); + WriteLabelPairs(FLAGS_save_relabel_opairs, pairs); + } + } + if (!is_mutable) { + *impl = new I(*mfst, name); + (*impl)->SetAddOn(data); + delete mfst; + data->DecrRefCount(); + } +} + + +// Generic lookahead matcher, templated on the FST definition +// - a wrapper around pointer to specific one. +template <class F> +class LookAheadMatcher { + public: + typedef F FST; + typedef typename F::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + typedef LookAheadMatcherBase<Arc> LBase; + + LookAheadMatcher(const F &fst, MatchType match_type) { + base_ = fst.InitMatcher(match_type); + if (!base_) + base_ = new SortedMatcher<F>(fst, match_type); + lookahead_ = false; + } + + LookAheadMatcher(const LookAheadMatcher<F> &matcher, bool safe = false) { + base_ = matcher.base_->Copy(safe); + lookahead_ = matcher.lookahead_; + } + + ~LookAheadMatcher() { delete base_; } + + // General matcher methods + LookAheadMatcher<F> *Copy(bool safe = false) const { + return new LookAheadMatcher<F>(*this, safe); + } + + MatchType Type(bool test) const { return base_->Type(test); } + void SetState(StateId s) { base_->SetState(s); } + bool Find(Label label) { return base_->Find(label); } + bool Done() const { return base_->Done(); } + const Arc& Value() const { return base_->Value(); } + void Next() { base_->Next(); } + const F &GetFst() const { return static_cast<const F &>(base_->GetFst()); } + + uint64 Properties(uint64 props) const { return base_->Properties(props); } + + uint32 Flags() const { return base_->Flags(); } + + // Look-ahead methods + bool LookAheadLabel(Label label) const { + if (LookAheadCheck()) { + LBase *lbase = static_cast<LBase *>(base_); + return lbase->LookAheadLabel(label); + } else { + return true; + } + } + + bool LookAheadFst(const Fst<Arc> &fst, StateId s) { + if (LookAheadCheck()) { + LBase *lbase = static_cast<LBase *>(base_); + return lbase->LookAheadFst(fst, s); + } else { + return true; + } + } + + Weight LookAheadWeight() const { + if (LookAheadCheck()) { + LBase *lbase = static_cast<LBase *>(base_); + return lbase->LookAheadWeight(); + } else { + return Weight::One(); + } + } + + bool LookAheadPrefix(Arc *arc) const { + if (LookAheadCheck()) { + LBase *lbase = static_cast<LBase *>(base_); + return lbase->LookAheadPrefix(arc); + } else { + return false; + } + } + + void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) { + if (LookAheadCheck()) { + LBase *lbase = static_cast<LBase *>(base_); + lbase->InitLookAheadFst(fst, copy); + } + } + + private: + bool LookAheadCheck() const { + if (!lookahead_) { + lookahead_ = base_->Flags() & + (kInputLookAheadMatcher | kOutputLookAheadMatcher); + if (!lookahead_) { + FSTERROR() << "LookAheadMatcher: No look-ahead matcher defined"; + } + } + return lookahead_; + } + + MatcherBase<Arc> *base_; + mutable bool lookahead_; + + void operator=(const LookAheadMatcher<Arc> &); // disallow +}; + +} // namespace fst + +#endif // FST_LIB_LOOKAHEAD_MATCHER_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/map.h b/kaldi_io/src/tools/openfst/include/fst/map.h new file mode 100644 index 0000000..419cac4 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/map.h @@ -0,0 +1,121 @@ +// map.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Compatability file for old-style Map() functions and MapFst class +// that have been renamed to ArcMap (cf. StateMap). + +#ifndef FST_LIB_MAP_H__ +#define FST_LIB_MAP_H__ + + +#include <fst/arc-map.h> + + +namespace fst { + +template<class A, class C> +void Map(MutableFst<A> *fst, C* mapper) { + ArcMap(fst, mapper); +} + +template<class A, class C> +void Map(MutableFst<A> *fst, C mapper) { + ArcMap(fst, mapper); +} + +template<class A, class B, class C> +void Map(const Fst<A> &ifst, MutableFst<B> *ofst, C* mapper) { + ArcMap(ifst, ofst, mapper); +} + +template<class A, class B, class C> +void Map(const Fst<A> &ifst, MutableFst<B> *ofst, C mapper) { + ArcMap(ifst, ofst, mapper); +} + +typedef ArcMapFstOptions MapFstOptions; + +template <class A, class B, class C> +class MapFst : public ArcMapFst<A, B, C> { + public: + typedef B Arc; + typedef typename B::Weight Weight; + typedef typename B::StateId StateId; + typedef CacheState<B> State; + + MapFst(const Fst<A> &fst, const C &mapper, const MapFstOptions& opts) + : ArcMapFst<A, B, C>(fst, mapper, opts) {} + + MapFst(const Fst<A> &fst, C* mapper, const MapFstOptions& opts) + : ArcMapFst<A, B, C>(fst, mapper, opts) {} + + MapFst(const Fst<A> &fst, const C &mapper) + : ArcMapFst<A, B, C>(fst, mapper) {} + + MapFst(const Fst<A> &fst, C* mapper) : ArcMapFst<A, B, C>(fst, mapper) {} + + // See Fst<>::Copy() for doc. + MapFst(const ArcMapFst<A, B, C> &fst, bool safe = false) + : ArcMapFst<A, B, C>(fst, safe) {} + + // Get a copy of this MapFst. See Fst<>::Copy() for further doc. +virtual MapFst<A, B, C> *Copy(bool safe = false) const { + return new MapFst(*this, safe); + } +}; + + +// Specialization for MapFst. +template <class A, class B, class C> +class StateIterator< MapFst<A, B, C> > + : public StateIterator< ArcMapFst<A, B, C> > { + public: + explicit StateIterator(const ArcMapFst<A, B, C> &fst) + : StateIterator< ArcMapFst<A, B, C> >(fst) {} +}; + + +// Specialization for MapFst. +template <class A, class B, class C> +class ArcIterator< MapFst<A, B, C> > + : public ArcIterator< ArcMapFst<A, B, C> > { + public: + ArcIterator(const ArcMapFst<A, B, C> &fst, typename A::StateId s) + : ArcIterator< ArcMapFst<A, B, C> >(fst, s) {} +}; + + +template <class A> +struct IdentityMapper { + typedef A FromArc; + typedef A ToArc; + + A operator()(const A &arc) const { return arc; } + + MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } + + MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + + MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} + + uint64 Properties(uint64 props) const { return props; } +}; + +} // namespace fst + +#endif // FST_LIB_MAP_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/mapped-file.h b/kaldi_io/src/tools/openfst/include/fst/mapped-file.h new file mode 100644 index 0000000..d61bc14 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/mapped-file.h @@ -0,0 +1,83 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jeffrey Sorensen) + +#ifndef FST_LIB_MAPPED_FILE_H_ +#define FST_LIB_MAPPED_FILE_H_ + +#include <unistd.h> +#include <sys/mman.h> + +#include <fst/fst.h> +#include <iostream> +#include <fstream> +#include <sstream> + +DECLARE_int32(fst_arch_alignment); // defined in mapped-file.h + +namespace fst { + +// A memory region is a simple abstraction for allocated memory or data from +// mmap'ed files. If mmap equals NULL, then data represents an owned region of +// size bytes. Otherwise, mmap and size refer to the mapping and data is a +// casted pointer to a region contained within [mmap, mmap + size). +// If size is 0, then mmap refers and data refer to a block of memory managed +// externally by some other allocator. +struct MemoryRegion { + void *data; + void *mmap; + size_t size; +}; + +class MappedFile { + public: + virtual ~MappedFile(); + + void* mutable_data() const { + return reinterpret_cast<void*>(region_.data); + } + + const void* data() const { + return reinterpret_cast<void*>(region_.data); + } + + // Returns a MappedFile object that contains the contents of the input + // stream s starting from the current file position with size bytes. + // The file name must also be provided in the FstReadOptions as opts.source + // or else mapping will fail. If mapping is not possible, then a MappedFile + // object with a new[]'ed block of memory will be created. + static MappedFile* Map(istream* s, const FstReadOptions& opts, size_t size); + + // Creates a MappedFile object with a new[]'ed block of memory of size. + // RECOMMENDED FOR INTERNAL USE ONLY, may change in future releases. + static MappedFile* Allocate(size_t size); + + // Creates a MappedFile object pointing to a borrowed reference to data. + // This block of memory is not owned by the MappedFile object and will not + // be freed. + // RECOMMENDED FOR INTERNAL USE ONLY, may change in future releases. + static MappedFile* Borrow(void *data); + + static const int kArchAlignment; + + private: + explicit MappedFile(const MemoryRegion ®ion); + + MemoryRegion region_; + DISALLOW_COPY_AND_ASSIGN(MappedFile); +}; +} // namespace fst + +#endif // FST_LIB_MAPPED_FILE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/matcher-fst.h b/kaldi_io/src/tools/openfst/include/fst/matcher-fst.h new file mode 100644 index 0000000..73e64ad --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/matcher-fst.h @@ -0,0 +1,359 @@ +// matcher-fst.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Class to add a matcher to an FST. + +#ifndef FST_LIB_MATCHER_FST_FST_H__ +#define FST_LIB_MATCHER_FST_FST_H__ + +#include <fst/add-on.h> +#include <fst/const-fst.h> +#include <fst/lookahead-matcher.h> + + +namespace fst { + +// WRITABLE MATCHERS - these have the interface of Matchers (see +// matcher.h) and these additional methods: +// +// template <class F> +// class Matcher { +// public: +// typedef ... MatcherData; // Initialization data +// ... +// // Constructor with additional argument for external initialization +// // data; matcher increments its reference count on construction and +// // decrements the reference count, and if 0 deletes, on destruction. +// Matcher(const F &fst, MatchType type, MatcherData *data); +// +// // Returns pointer to initialization data that can be +// // passed to a Matcher constructor. +// MatcherData *GetData() const; +// }; + +// The matcher initialization data class must have the form: +// class MatcherData { +// public: +// // Required copy constructor. +// MatcherData(const MatcherData &); +// // +// // Required I/O methods. +// static MatcherData *Read(istream &istrm); +// bool Write(ostream &ostrm); +// +// // Required reference counting. +// int RefCount() const; +// int IncrRefCount(); +// int DecrRefCount(); +// }; + +// Default MatcherFst initializer - does nothing. +template <class M> +class NullMatcherFstInit { + public: + typedef AddOnPair<typename M::MatcherData, typename M::MatcherData> D; + typedef AddOnImpl<typename M::FST, D> Impl; + NullMatcherFstInit(Impl **) {} +}; + +// Class to add a matcher M to an Fst F. Creates a new Fst of type name N. +// Optional function object I can be used to initialize the Fst. +template <class F, class M, const char* N, + class I = NullMatcherFstInit<M> > +class MatcherFst + : public ImplToExpandedFst< + AddOnImpl<F, + AddOnPair<typename M::MatcherData, + typename M::MatcherData> > > { + public: + friend class StateIterator< MatcherFst<F, M, N, I> >; + friend class ArcIterator< MatcherFst<F, M, N, I> >; + + typedef F FST; + typedef M FstMatcher; + typedef typename F::Arc Arc; + typedef typename Arc::StateId StateId; + typedef AddOnPair<typename M::MatcherData, typename M::MatcherData> D; + typedef AddOnImpl<F, D> Impl; + + MatcherFst() : ImplToExpandedFst<Impl>(new Impl(F(), N)) {} + + explicit MatcherFst(const F &fst) + : ImplToExpandedFst<Impl>(CreateImpl(fst, N)) {} + + explicit MatcherFst(const Fst<Arc> &fst) + : ImplToExpandedFst<Impl>(CreateImpl(fst, N)) {} + + // See Fst<>::Copy() for doc. + MatcherFst(const MatcherFst<F, M, N, I> &fst, bool safe = false) + : ImplToExpandedFst<Impl>(fst, safe) {} + + // Get a copy of this MatcherFst. See Fst<>::Copy() for further doc. + virtual MatcherFst<F, M, N, I> *Copy(bool safe = false) const { + return new MatcherFst<F, M, N, I>(*this, safe); + } + + // Read a MatcherFst from an input stream; return NULL on error + static MatcherFst<F, M, N, I> *Read(istream &strm, + const FstReadOptions &opts) { + Impl *impl = Impl::Read(strm, opts); + return impl ? new MatcherFst<F, M, N, I>(impl) : 0; + } + + // Read a MatcherFst from a file; return NULL on error + // Empty filename reads from standard input + static MatcherFst<F, M, N, I> *Read(const string &filename) { + Impl *impl = ImplToExpandedFst<Impl>::Read(filename); + return impl ? new MatcherFst<F, M, N, I>(impl) : 0; + } + + virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { + return GetImpl()->Write(strm, opts); + } + + virtual bool Write(const string &filename) const { + return Fst<Arc>::WriteFile(filename); + } + + virtual void InitStateIterator(StateIteratorData<Arc> *data) const { + return GetImpl()->InitStateIterator(data); + } + + virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { + return GetImpl()->InitArcIterator(s, data); + } + + virtual M *InitMatcher(MatchType match_type) const { + return new M(GetFst(), match_type, GetData(match_type)); + } + + // Allows access to MatcherFst components. + Impl *GetImpl() const { + return ImplToFst<Impl, ExpandedFst<Arc> >::GetImpl(); + } + + F& GetFst() const { return GetImpl()->GetFst(); } + + typename M::MatcherData *GetData(MatchType match_type) const { + D *data = GetImpl()->GetAddOn(); + return match_type == MATCH_INPUT ? data->First() : data->Second(); + } + + private: + static Impl *CreateImpl(const F &fst, const string &name) { + M imatcher(fst, MATCH_INPUT); + M omatcher(fst, MATCH_OUTPUT); + D *data = new D(imatcher.GetData(), omatcher.GetData()); + Impl *impl = new Impl(fst, name); + impl->SetAddOn(data); + I init(&impl); + data->DecrRefCount(); + return impl; + } + + static Impl *CreateImpl(const Fst<Arc> &fst, const string &name) { + F ffst(fst); + return CreateImpl(ffst, name); + } + + explicit MatcherFst(Impl *impl) : ImplToExpandedFst<Impl>(impl) {} + + // Makes visible to friends. + void SetImpl(Impl *impl, bool own_impl = true) { + ImplToFst< Impl, ExpandedFst<Arc> >::SetImpl(impl, own_impl); + } + + void operator=(const MatcherFst<F, M, N, I> &fst); // disallow +}; + + +// Specialization fo MatcherFst. +template <class F, class M, const char* N, class I> +class StateIterator< MatcherFst<F, M, N, I> > : public StateIterator<F> { + public: + explicit StateIterator(const MatcherFst<F, M, N, I> &fst) : + StateIterator<F>(fst.GetImpl()->GetFst()) {} +}; + + +// Specialization for MatcherFst. +template <class F, class M, const char* N, class I> +class ArcIterator< MatcherFst<F, M, N, I> > : public ArcIterator<F> { + public: + ArcIterator(const MatcherFst<F, M, N, I> &fst, typename F::Arc::StateId s) + : ArcIterator<F>(fst.GetImpl()->GetFst(), s) {} +}; + + +// Specialization for MatcherFst +template <class F, class M, const char* N, class I> +class Matcher< MatcherFst<F, M, N, I> > { + public: + typedef MatcherFst<F, M, N, I> FST; + typedef typename F::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + + Matcher(const FST &fst, MatchType match_type) { + matcher_ = fst.InitMatcher(match_type); + } + + Matcher(const Matcher<FST> &matcher) { + matcher_ = matcher.matcher_->Copy(); + } + + ~Matcher() { delete matcher_; } + + Matcher<FST> *Copy() const { + return new Matcher<FST>(*this); + } + + MatchType Type(bool test) const { return matcher_->Type(test); } + void SetState(StateId s) { matcher_->SetState(s); } + bool Find(Label label) { return matcher_->Find(label); } + bool Done() const { return matcher_->Done(); } + const Arc& Value() const { return matcher_->Value(); } + void Next() { matcher_->Next(); } + uint64 Properties(uint64 props) const { return matcher_->Properties(props); } + uint32 Flags() const { return matcher_->Flags(); } + + private: + M *matcher_; + + void operator=(const Matcher<Arc> &); // disallow +}; + + +// Specialization for MatcherFst +template <class F, class M, const char* N, class I> +class LookAheadMatcher< MatcherFst<F, M, N, I> > { + public: + typedef MatcherFst<F, M, N, I> FST; + typedef typename F::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + LookAheadMatcher(const FST &fst, MatchType match_type) { + matcher_ = fst.InitMatcher(match_type); + } + + LookAheadMatcher(const LookAheadMatcher<FST> &matcher, bool safe = false) { + matcher_ = matcher.matcher_->Copy(safe); + } + + ~LookAheadMatcher() { delete matcher_; } + + // General matcher methods + LookAheadMatcher<FST> *Copy(bool safe = false) const { + return new LookAheadMatcher<FST>(*this, safe); + } + + MatchType Type(bool test) const { return matcher_->Type(test); } + void SetState(StateId s) { matcher_->SetState(s); } + bool Find(Label label) { return matcher_->Find(label); } + bool Done() const { return matcher_->Done(); } + const Arc& Value() const { return matcher_->Value(); } + void Next() { matcher_->Next(); } + const FST &GetFst() const { return matcher_->GetFst(); } + uint64 Properties(uint64 props) const { return matcher_->Properties(props); } + uint32 Flags() const { return matcher_->Flags(); } + + // Look-ahead methods + bool LookAheadLabel(Label label) const { + return matcher_->LookAheadLabel(label); + } + + bool LookAheadFst(const Fst<Arc> &fst, StateId s) { + return matcher_->LookAheadFst(fst, s); + } + + Weight LookAheadWeight() const { return matcher_->LookAheadWeight(); } + + bool LookAheadPrefix(Arc *arc) const { + return matcher_->LookAheadPrefix(arc); + } + + void InitLookAheadFst(const Fst<Arc>& fst, bool copy = false) { + matcher_->InitLookAheadFst(fst, copy); + } + + private: + M *matcher_; + + void operator=(const LookAheadMatcher<FST> &); // disallow +}; + +// +// Useful aliases when using StdArc and LogArc. +// + +// Arc look-ahead matchers +extern const char arc_lookahead_fst_type[]; + +typedef MatcherFst<ConstFst<StdArc>, + ArcLookAheadMatcher<SortedMatcher<ConstFst<StdArc> > >, + arc_lookahead_fst_type> StdArcLookAheadFst; + +typedef MatcherFst<ConstFst<LogArc>, + ArcLookAheadMatcher<SortedMatcher<ConstFst<LogArc> > >, + arc_lookahead_fst_type> LogArcLookAheadFst; + + +// Label look-ahead matchers +extern const char ilabel_lookahead_fst_type[]; +extern const char olabel_lookahead_fst_type[]; + +static const uint32 ilabel_lookahead_flags = kInputLookAheadMatcher | + kLookAheadWeight | kLookAheadPrefix | + kLookAheadEpsilons | kLookAheadNonEpsilonPrefix; +static const uint32 olabel_lookahead_flags = kOutputLookAheadMatcher | + kLookAheadWeight | kLookAheadPrefix | + kLookAheadEpsilons | kLookAheadNonEpsilonPrefix; + +typedef MatcherFst<ConstFst<StdArc>, + LabelLookAheadMatcher<SortedMatcher<ConstFst<StdArc> >, + ilabel_lookahead_flags, + FastLogAccumulator<StdArc> >, + ilabel_lookahead_fst_type, + LabelLookAheadRelabeler<StdArc> > StdILabelLookAheadFst; + +typedef MatcherFst<ConstFst<LogArc>, + LabelLookAheadMatcher<SortedMatcher<ConstFst<LogArc> >, + ilabel_lookahead_flags, + FastLogAccumulator<LogArc> >, + ilabel_lookahead_fst_type, + LabelLookAheadRelabeler<LogArc> > LogILabelLookAheadFst; + +typedef MatcherFst<ConstFst<StdArc>, + LabelLookAheadMatcher<SortedMatcher<ConstFst<StdArc> >, + olabel_lookahead_flags, + FastLogAccumulator<StdArc> >, + olabel_lookahead_fst_type, + LabelLookAheadRelabeler<StdArc> > StdOLabelLookAheadFst; + +typedef MatcherFst<ConstFst<LogArc>, + LabelLookAheadMatcher<SortedMatcher<ConstFst<LogArc> >, + olabel_lookahead_flags, + FastLogAccumulator<LogArc> >, + olabel_lookahead_fst_type, + LabelLookAheadRelabeler<LogArc> > LogOLabelLookAheadFst; + +} // namespace fst + +#endif // FST_LIB_MATCHER_FST_FST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/matcher.h b/kaldi_io/src/tools/openfst/include/fst/matcher.h new file mode 100644 index 0000000..89ed9be --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/matcher.h @@ -0,0 +1,1205 @@ +// matcher.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Classes to allow matching labels leaving FST states. + +#ifndef FST_LIB_MATCHER_H__ +#define FST_LIB_MATCHER_H__ + +#include <algorithm> +#include <set> + +#include <fst/mutable-fst.h> // for all internal FST accessors + + +namespace fst { + +// MATCHERS - these can find and iterate through requested labels at +// FST states. In the simplest form, these are just some associative +// map or search keyed on labels. More generally, they may +// implement matching special labels that represent sets of labels +// such as 'sigma' (all), 'rho' (rest), or 'phi' (fail). +// The Matcher interface is: +// +// template <class F> +// class Matcher { +// public: +// typedef F FST; +// typedef F::Arc Arc; +// typedef typename Arc::StateId StateId; +// typedef typename Arc::Label Label; +// typedef typename Arc::Weight Weight; +// +// // Required constructors. +// Matcher(const F &fst, MatchType type); +// // If safe=true, the copy is thread-safe. See Fst<>::Copy() +// // for further doc. +// Matcher(const Matcher &matcher, bool safe = false); +// +// // If safe=true, the copy is thread-safe. See Fst<>::Copy() +// // for further doc. +// Matcher<F> *Copy(bool safe = false) const; +// +// // Returns the match type that can be provided (depending on +// // compatibility of the input FST). It is either +// // the requested match type, MATCH_NONE, or MATCH_UNKNOWN. +// // If 'test' is false, a constant time test is performed, but +// // MATCH_UNKNOWN may be returned. If 'test' is true, +// // a definite answer is returned, but may involve more costly +// // computation (e.g., visiting the Fst). +// MatchType Type(bool test) const; +// // Specifies the current state. +// void SetState(StateId s); +// +// // This finds matches to a label at the current state. +// // Returns true if a match found. kNoLabel matches any +// // 'non-consuming' transitions, e.g., epsilon transitions, +// // which do not require a matching symbol. +// bool Find(Label label); +// // These iterate through any matches found: +// bool Done() const; // No more matches. +// const A& Value() const; // Current arc (when !Done) +// void Next(); // Advance to next arc (when !Done) +// // Initially and after SetState() the iterator methods +// // have undefined behavior until Find() is called. +// +// // Return matcher FST. +// const F& GetFst() const; +// // This specifies the known Fst properties as viewed from this +// // matcher. It takes as argument the input Fst's known properties. +// uint64 Properties(uint64 props) const; +// }; + +// +// MATCHER FLAGS (see also kLookAheadFlags in lookahead-matcher.h) +// +// Matcher prefers being used as the matching side in composition. +const uint32 kPreferMatch = 0x00000001; + +// Matcher needs to be used as the matching side in composition. +const uint32 kRequireMatch = 0x00000002; + +// Flags used for basic matchers (see also lookahead.h). +const uint32 kMatcherFlags = kPreferMatch | kRequireMatch; + +// Matcher interface, templated on the Arc definition; used +// for matcher specializations that are returned by the +// InitMatcher Fst method. +template <class A> +class MatcherBase { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + + virtual ~MatcherBase() {} + + virtual MatcherBase<A> *Copy(bool safe = false) const = 0; + virtual MatchType Type(bool test) const = 0; + void SetState(StateId s) { SetState_(s); } + bool Find(Label label) { return Find_(label); } + bool Done() const { return Done_(); } + const A& Value() const { return Value_(); } + void Next() { Next_(); } + virtual const Fst<A> &GetFst() const = 0; + virtual uint64 Properties(uint64 props) const = 0; + virtual uint32 Flags() const { return 0; } + private: + virtual void SetState_(StateId s) = 0; + virtual bool Find_(Label label) = 0; + virtual bool Done_() const = 0; + virtual const A& Value_() const = 0; + virtual void Next_() = 0; +}; + + +// A matcher that expects sorted labels on the side to be matched. +// If match_type == MATCH_INPUT, epsilons match the implicit self loop +// Arc(kNoLabel, 0, Weight::One(), current_state) as well as any +// actual epsilon transitions. If match_type == MATCH_OUTPUT, then +// Arc(0, kNoLabel, Weight::One(), current_state) is instead matched. +template <class F> +class SortedMatcher : public MatcherBase<typename F::Arc> { + public: + typedef F FST; + typedef typename F::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + // Labels >= binary_label will be searched for by binary search, + // o.w. linear search is used. + SortedMatcher(const F &fst, MatchType match_type, + Label binary_label = 1) + : fst_(fst.Copy()), + s_(kNoStateId), + aiter_(0), + match_type_(match_type), + binary_label_(binary_label), + match_label_(kNoLabel), + narcs_(0), + loop_(kNoLabel, 0, Weight::One(), kNoStateId), + error_(false) { + switch(match_type_) { + case MATCH_INPUT: + case MATCH_NONE: + break; + case MATCH_OUTPUT: + swap(loop_.ilabel, loop_.olabel); + break; + default: + FSTERROR() << "SortedMatcher: bad match type"; + match_type_ = MATCH_NONE; + error_ = true; + } + } + + SortedMatcher(const SortedMatcher<F> &matcher, bool safe = false) + : fst_(matcher.fst_->Copy(safe)), + s_(kNoStateId), + aiter_(0), + match_type_(matcher.match_type_), + binary_label_(matcher.binary_label_), + match_label_(kNoLabel), + narcs_(0), + loop_(matcher.loop_), + error_(matcher.error_) {} + + virtual ~SortedMatcher() { + if (aiter_) + delete aiter_; + delete fst_; + } + + virtual SortedMatcher<F> *Copy(bool safe = false) const { + return new SortedMatcher<F>(*this, safe); + } + + virtual MatchType Type(bool test) const { + if (match_type_ == MATCH_NONE) + return match_type_; + + uint64 true_prop = match_type_ == MATCH_INPUT ? + kILabelSorted : kOLabelSorted; + uint64 false_prop = match_type_ == MATCH_INPUT ? + kNotILabelSorted : kNotOLabelSorted; + uint64 props = fst_->Properties(true_prop | false_prop, test); + + if (props & true_prop) + return match_type_; + else if (props & false_prop) + return MATCH_NONE; + else + return MATCH_UNKNOWN; + } + + void SetState(StateId s) { + if (s_ == s) + return; + s_ = s; + if (match_type_ == MATCH_NONE) { + FSTERROR() << "SortedMatcher: bad match type"; + error_ = true; + } + if (aiter_) + delete aiter_; + aiter_ = new ArcIterator<F>(*fst_, s); + aiter_->SetFlags(kArcNoCache, kArcNoCache); + narcs_ = internal::NumArcs(*fst_, s); + loop_.nextstate = s; + } + + bool Find(Label match_label) { + exact_match_ = true; + if (error_) { + current_loop_ = false; + match_label_ = kNoLabel; + return false; + } + current_loop_ = match_label == 0; + match_label_ = match_label == kNoLabel ? 0 : match_label; + if (Search()) { + return true; + } else { + return current_loop_; + } + } + + // Positions matcher to the first position where inserting + // match_label would maintain the sort order. + void LowerBound(Label match_label) { + exact_match_ = false; + current_loop_ = false; + if (error_) { + match_label_ = kNoLabel; + return; + } + match_label_ = match_label; + Search(); + } + + // After Find(), returns false if no more exact matches. + // After LowerBound(), returns false if no more arcs. + bool Done() const { + if (current_loop_) + return false; + if (aiter_->Done()) + return true; + if (!exact_match_) + return false; + aiter_->SetFlags( + match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue, + kArcValueFlags); + Label label = match_type_ == MATCH_INPUT ? + aiter_->Value().ilabel : aiter_->Value().olabel; + return label != match_label_; + } + + const Arc& Value() const { + if (current_loop_) { + return loop_; + } + aiter_->SetFlags(kArcValueFlags, kArcValueFlags); + return aiter_->Value(); + } + + void Next() { + if (current_loop_) + current_loop_ = false; + else + aiter_->Next(); + } + + virtual const F &GetFst() const { return *fst_; } + + virtual uint64 Properties(uint64 inprops) const { + uint64 outprops = inprops; + if (error_) outprops |= kError; + return outprops; + } + + size_t Position() const { return aiter_ ? aiter_->Position() : 0; } + + private: + virtual void SetState_(StateId s) { SetState(s); } + virtual bool Find_(Label label) { return Find(label); } + virtual bool Done_() const { return Done(); } + virtual const Arc& Value_() const { return Value(); } + virtual void Next_() { Next(); } + + bool Search(); + + const F *fst_; + StateId s_; // Current state + ArcIterator<F> *aiter_; // Iterator for current state + MatchType match_type_; // Type of match to perform + Label binary_label_; // Least label for binary search + Label match_label_; // Current label to be matched + size_t narcs_; // Current state arc count + Arc loop_; // For non-consuming symbols + bool current_loop_; // Current arc is the implicit loop + bool exact_match_; // Exact match or lower bound? + bool error_; // Error encountered + + void operator=(const SortedMatcher<F> &); // Disallow +}; + +// Returns true iff match to match_label_. Positions arc iterator at +// lower bound regardless. +template <class F> inline +bool SortedMatcher<F>::Search() { + aiter_->SetFlags( + match_type_ == MATCH_INPUT ? kArcILabelValue : kArcOLabelValue, + kArcValueFlags); + if (match_label_ >= binary_label_) { + // Binary search for match. + size_t low = 0; + size_t high = narcs_; + while (low < high) { + size_t mid = (low + high) / 2; + aiter_->Seek(mid); + Label label = match_type_ == MATCH_INPUT ? + aiter_->Value().ilabel : aiter_->Value().olabel; + if (label > match_label_) { + high = mid; + } else if (label < match_label_) { + low = mid + 1; + } else { + // find first matching label (when non-determinism) + for (size_t i = mid; i > low; --i) { + aiter_->Seek(i - 1); + label = match_type_ == MATCH_INPUT ? aiter_->Value().ilabel : + aiter_->Value().olabel; + if (label != match_label_) { + aiter_->Seek(i); + return true; + } + } + return true; + } + } + aiter_->Seek(low); + return false; + } else { + // Linear search for match. + for (aiter_->Reset(); !aiter_->Done(); aiter_->Next()) { + Label label = match_type_ == MATCH_INPUT ? + aiter_->Value().ilabel : aiter_->Value().olabel; + if (label == match_label_) { + return true; + } + if (label > match_label_) + break; + } + return false; + } +} + + +// Specifies whether during matching we rewrite both the input and output sides. +enum MatcherRewriteMode { + MATCHER_REWRITE_AUTO = 0, // Rewrites both sides iff acceptor. + MATCHER_REWRITE_ALWAYS, + MATCHER_REWRITE_NEVER +}; + + +// For any requested label that doesn't match at a state, this matcher +// considers all transitions that match the label 'rho_label' (rho = +// 'rest'). Each such rho transition found is returned with the +// rho_label rewritten as the requested label (both sides if an +// acceptor, or if 'rewrite_both' is true and both input and output +// labels of the found transition are 'rho_label'). If 'rho_label' is +// kNoLabel, this special matching is not done. RhoMatcher is +// templated itself on a matcher, which is used to perform the +// underlying matching. By default, the underlying matcher is +// constructed by RhoMatcher. The user can instead pass in this +// object; in that case, RhoMatcher takes its ownership. +template <class M> +class RhoMatcher : public MatcherBase<typename M::Arc> { + public: + typedef typename M::FST FST; + typedef typename M::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + RhoMatcher(const FST &fst, + MatchType match_type, + Label rho_label = kNoLabel, + MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO, + M *matcher = 0) + : matcher_(matcher ? matcher : new M(fst, match_type)), + match_type_(match_type), + rho_label_(rho_label), + error_(false) { + if (match_type == MATCH_BOTH) { + FSTERROR() << "RhoMatcher: bad match type"; + match_type_ = MATCH_NONE; + error_ = true; + } + if (rho_label == 0) { + FSTERROR() << "RhoMatcher: 0 cannot be used as rho_label"; + rho_label_ = kNoLabel; + error_ = true; + } + + if (rewrite_mode == MATCHER_REWRITE_AUTO) + rewrite_both_ = fst.Properties(kAcceptor, true); + else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) + rewrite_both_ = true; + else + rewrite_both_ = false; + } + + RhoMatcher(const RhoMatcher<M> &matcher, bool safe = false) + : matcher_(new M(*matcher.matcher_, safe)), + match_type_(matcher.match_type_), + rho_label_(matcher.rho_label_), + rewrite_both_(matcher.rewrite_both_), + error_(matcher.error_) {} + + virtual ~RhoMatcher() { + delete matcher_; + } + + virtual RhoMatcher<M> *Copy(bool safe = false) const { + return new RhoMatcher<M>(*this, safe); + } + + virtual MatchType Type(bool test) const { return matcher_->Type(test); } + + void SetState(StateId s) { + matcher_->SetState(s); + has_rho_ = rho_label_ != kNoLabel; + } + + bool Find(Label match_label) { + if (match_label == rho_label_ && rho_label_ != kNoLabel) { + FSTERROR() << "RhoMatcher::Find: bad label (rho)"; + error_ = true; + return false; + } + if (matcher_->Find(match_label)) { + rho_match_ = kNoLabel; + return true; + } else if (has_rho_ && match_label != 0 && match_label != kNoLabel && + (has_rho_ = matcher_->Find(rho_label_))) { + rho_match_ = match_label; + return true; + } else { + return false; + } + } + + bool Done() const { return matcher_->Done(); } + + const Arc& Value() const { + if (rho_match_ == kNoLabel) { + return matcher_->Value(); + } else { + rho_arc_ = matcher_->Value(); + if (rewrite_both_) { + if (rho_arc_.ilabel == rho_label_) + rho_arc_.ilabel = rho_match_; + if (rho_arc_.olabel == rho_label_) + rho_arc_.olabel = rho_match_; + } else if (match_type_ == MATCH_INPUT) { + rho_arc_.ilabel = rho_match_; + } else { + rho_arc_.olabel = rho_match_; + } + return rho_arc_; + } + } + + void Next() { matcher_->Next(); } + + virtual const FST &GetFst() const { return matcher_->GetFst(); } + + virtual uint64 Properties(uint64 props) const; + + virtual uint32 Flags() const { + if (rho_label_ == kNoLabel || match_type_ == MATCH_NONE) + return matcher_->Flags(); + return matcher_->Flags() | kRequireMatch; + } + + private: + virtual void SetState_(StateId s) { SetState(s); } + virtual bool Find_(Label label) { return Find(label); } + virtual bool Done_() const { return Done(); } + virtual const Arc& Value_() const { return Value(); } + virtual void Next_() { Next(); } + + M *matcher_; + MatchType match_type_; // Type of match requested + Label rho_label_; // Label that represents the rho transition + bool rewrite_both_; // Rewrite both sides when both are 'rho_label_' + bool has_rho_; // Are there possibly rhos at the current state? + Label rho_match_; // Current label that matches rho transition + mutable Arc rho_arc_; // Arc to return when rho match + bool error_; // Error encountered + + void operator=(const RhoMatcher<M> &); // Disallow +}; + +template <class M> inline +uint64 RhoMatcher<M>::Properties(uint64 inprops) const { + uint64 outprops = matcher_->Properties(inprops); + if (error_) outprops |= kError; + + if (match_type_ == MATCH_NONE) { + return outprops; + } else if (match_type_ == MATCH_INPUT) { + if (rewrite_both_) { + return outprops & ~(kODeterministic | kNonODeterministic | kString | + kILabelSorted | kNotILabelSorted | + kOLabelSorted | kNotOLabelSorted); + } else { + return outprops & ~(kODeterministic | kAcceptor | kString | + kILabelSorted | kNotILabelSorted); + } + } else if (match_type_ == MATCH_OUTPUT) { + if (rewrite_both_) { + return outprops & ~(kIDeterministic | kNonIDeterministic | kString | + kILabelSorted | kNotILabelSorted | + kOLabelSorted | kNotOLabelSorted); + } else { + return outprops & ~(kIDeterministic | kAcceptor | kString | + kOLabelSorted | kNotOLabelSorted); + } + } else { + // Shouldn't ever get here. + FSTERROR() << "RhoMatcher:: bad match type: " << match_type_; + return 0; + } +} + + +// For any requested label, this matcher considers all transitions +// that match the label 'sigma_label' (sigma = "any"), and this in +// additions to transitions with the requested label. Each such sigma +// transition found is returned with the sigma_label rewritten as the +// requested label (both sides if an acceptor, or if 'rewrite_both' is +// true and both input and output labels of the found transition are +// 'sigma_label'). If 'sigma_label' is kNoLabel, this special +// matching is not done. SigmaMatcher is templated itself on a +// matcher, which is used to perform the underlying matching. By +// default, the underlying matcher is constructed by SigmaMatcher. +// The user can instead pass in this object; in that case, +// SigmaMatcher takes its ownership. +template <class M> +class SigmaMatcher : public MatcherBase<typename M::Arc> { + public: + typedef typename M::FST FST; + typedef typename M::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + SigmaMatcher(const FST &fst, + MatchType match_type, + Label sigma_label = kNoLabel, + MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO, + M *matcher = 0) + : matcher_(matcher ? matcher : new M(fst, match_type)), + match_type_(match_type), + sigma_label_(sigma_label), + error_(false) { + if (match_type == MATCH_BOTH) { + FSTERROR() << "SigmaMatcher: bad match type"; + match_type_ = MATCH_NONE; + error_ = true; + } + if (sigma_label == 0) { + FSTERROR() << "SigmaMatcher: 0 cannot be used as sigma_label"; + sigma_label_ = kNoLabel; + error_ = true; + } + + if (rewrite_mode == MATCHER_REWRITE_AUTO) + rewrite_both_ = fst.Properties(kAcceptor, true); + else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) + rewrite_both_ = true; + else + rewrite_both_ = false; + } + + SigmaMatcher(const SigmaMatcher<M> &matcher, bool safe = false) + : matcher_(new M(*matcher.matcher_, safe)), + match_type_(matcher.match_type_), + sigma_label_(matcher.sigma_label_), + rewrite_both_(matcher.rewrite_both_), + error_(matcher.error_) {} + + virtual ~SigmaMatcher() { + delete matcher_; + } + + virtual SigmaMatcher<M> *Copy(bool safe = false) const { + return new SigmaMatcher<M>(*this, safe); + } + + virtual MatchType Type(bool test) const { return matcher_->Type(test); } + + void SetState(StateId s) { + matcher_->SetState(s); + has_sigma_ = + sigma_label_ != kNoLabel ? matcher_->Find(sigma_label_) : false; + } + + bool Find(Label match_label) { + match_label_ = match_label; + if (match_label == sigma_label_ && sigma_label_ != kNoLabel) { + FSTERROR() << "SigmaMatcher::Find: bad label (sigma)"; + error_ = true; + return false; + } + if (matcher_->Find(match_label)) { + sigma_match_ = kNoLabel; + return true; + } else if (has_sigma_ && match_label != 0 && match_label != kNoLabel && + matcher_->Find(sigma_label_)) { + sigma_match_ = match_label; + return true; + } else { + return false; + } + } + + bool Done() const { + return matcher_->Done(); + } + + const Arc& Value() const { + if (sigma_match_ == kNoLabel) { + return matcher_->Value(); + } else { + sigma_arc_ = matcher_->Value(); + if (rewrite_both_) { + if (sigma_arc_.ilabel == sigma_label_) + sigma_arc_.ilabel = sigma_match_; + if (sigma_arc_.olabel == sigma_label_) + sigma_arc_.olabel = sigma_match_; + } else if (match_type_ == MATCH_INPUT) { + sigma_arc_.ilabel = sigma_match_; + } else { + sigma_arc_.olabel = sigma_match_; + } + return sigma_arc_; + } + } + + void Next() { + matcher_->Next(); + if (matcher_->Done() && has_sigma_ && (sigma_match_ == kNoLabel) && + (match_label_ > 0)) { + matcher_->Find(sigma_label_); + sigma_match_ = match_label_; + } + } + + virtual const FST &GetFst() const { return matcher_->GetFst(); } + + virtual uint64 Properties(uint64 props) const; + + virtual uint32 Flags() const { + if (sigma_label_ == kNoLabel || match_type_ == MATCH_NONE) + return matcher_->Flags(); + // kRequireMatch temporarily disabled until issues + // in //speech/gaudi/annotation/util/denorm are resolved. + // return matcher_->Flags() | kRequireMatch; + return matcher_->Flags(); + } + +private: + virtual void SetState_(StateId s) { SetState(s); } + virtual bool Find_(Label label) { return Find(label); } + virtual bool Done_() const { return Done(); } + virtual const Arc& Value_() const { return Value(); } + virtual void Next_() { Next(); } + + M *matcher_; + MatchType match_type_; // Type of match requested + Label sigma_label_; // Label that represents the sigma transition + bool rewrite_both_; // Rewrite both sides when both are 'sigma_label_' + bool has_sigma_; // Are there sigmas at the current state? + Label sigma_match_; // Current label that matches sigma transition + mutable Arc sigma_arc_; // Arc to return when sigma match + Label match_label_; // Label being matched + bool error_; // Error encountered + + void operator=(const SigmaMatcher<M> &); // disallow +}; + +template <class M> inline +uint64 SigmaMatcher<M>::Properties(uint64 inprops) const { + uint64 outprops = matcher_->Properties(inprops); + if (error_) outprops |= kError; + + if (match_type_ == MATCH_NONE) { + return outprops; + } else if (rewrite_both_) { + return outprops & ~(kIDeterministic | kNonIDeterministic | + kODeterministic | kNonODeterministic | + kILabelSorted | kNotILabelSorted | + kOLabelSorted | kNotOLabelSorted | + kString); + } else if (match_type_ == MATCH_INPUT) { + return outprops & ~(kIDeterministic | kNonIDeterministic | + kODeterministic | kNonODeterministic | + kILabelSorted | kNotILabelSorted | + kString | kAcceptor); + } else if (match_type_ == MATCH_OUTPUT) { + return outprops & ~(kIDeterministic | kNonIDeterministic | + kODeterministic | kNonODeterministic | + kOLabelSorted | kNotOLabelSorted | + kString | kAcceptor); + } else { + // Shouldn't ever get here. + FSTERROR() << "SigmaMatcher:: bad match type: " << match_type_; + return 0; + } +} + + +// For any requested label that doesn't match at a state, this matcher +// considers the *unique* transition that matches the label 'phi_label' +// (phi = 'fail'), and recursively looks for a match at its +// destination. When 'phi_loop' is true, if no match is found but a +// phi self-loop is found, then the phi transition found is returned +// with the phi_label rewritten as the requested label (both sides if +// an acceptor, or if 'rewrite_both' is true and both input and output +// labels of the found transition are 'phi_label'). If 'phi_label' is +// kNoLabel, this special matching is not done. PhiMatcher is +// templated itself on a matcher, which is used to perform the +// underlying matching. By default, the underlying matcher is +// constructed by PhiMatcher. The user can instead pass in this +// object; in that case, PhiMatcher takes its ownership. +// Warning: phi non-determinism not supported (for simplicity). +template <class M> +class PhiMatcher : public MatcherBase<typename M::Arc> { + public: + typedef typename M::FST FST; + typedef typename M::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + PhiMatcher(const FST &fst, + MatchType match_type, + Label phi_label = kNoLabel, + bool phi_loop = true, + MatcherRewriteMode rewrite_mode = MATCHER_REWRITE_AUTO, + M *matcher = 0) + : matcher_(matcher ? matcher : new M(fst, match_type)), + match_type_(match_type), + phi_label_(phi_label), + state_(kNoStateId), + phi_loop_(phi_loop), + error_(false) { + if (match_type == MATCH_BOTH) { + FSTERROR() << "PhiMatcher: bad match type"; + match_type_ = MATCH_NONE; + error_ = true; + } + + if (rewrite_mode == MATCHER_REWRITE_AUTO) + rewrite_both_ = fst.Properties(kAcceptor, true); + else if (rewrite_mode == MATCHER_REWRITE_ALWAYS) + rewrite_both_ = true; + else + rewrite_both_ = false; + } + + PhiMatcher(const PhiMatcher<M> &matcher, bool safe = false) + : matcher_(new M(*matcher.matcher_, safe)), + match_type_(matcher.match_type_), + phi_label_(matcher.phi_label_), + rewrite_both_(matcher.rewrite_both_), + state_(kNoStateId), + phi_loop_(matcher.phi_loop_), + error_(matcher.error_) {} + + virtual ~PhiMatcher() { + delete matcher_; + } + + virtual PhiMatcher<M> *Copy(bool safe = false) const { + return new PhiMatcher<M>(*this, safe); + } + + virtual MatchType Type(bool test) const { return matcher_->Type(test); } + + void SetState(StateId s) { + matcher_->SetState(s); + state_ = s; + has_phi_ = phi_label_ != kNoLabel; + } + + bool Find(Label match_label); + + bool Done() const { return matcher_->Done(); } + + const Arc& Value() const { + if ((phi_match_ == kNoLabel) && (phi_weight_ == Weight::One())) { + return matcher_->Value(); + } else if (phi_match_ == 0) { // Virtual epsilon loop + phi_arc_ = Arc(kNoLabel, 0, Weight::One(), state_); + if (match_type_ == MATCH_OUTPUT) + swap(phi_arc_.ilabel, phi_arc_.olabel); + return phi_arc_; + } else { + phi_arc_ = matcher_->Value(); + phi_arc_.weight = Times(phi_weight_, phi_arc_.weight); + if (phi_match_ != kNoLabel) { // Phi loop match + if (rewrite_both_) { + if (phi_arc_.ilabel == phi_label_) + phi_arc_.ilabel = phi_match_; + if (phi_arc_.olabel == phi_label_) + phi_arc_.olabel = phi_match_; + } else if (match_type_ == MATCH_INPUT) { + phi_arc_.ilabel = phi_match_; + } else { + phi_arc_.olabel = phi_match_; + } + } + return phi_arc_; + } + } + + void Next() { matcher_->Next(); } + + virtual const FST &GetFst() const { return matcher_->GetFst(); } + + virtual uint64 Properties(uint64 props) const; + + virtual uint32 Flags() const { + if (phi_label_ == kNoLabel || match_type_ == MATCH_NONE) + return matcher_->Flags(); + return matcher_->Flags() | kRequireMatch; + } + +private: + virtual void SetState_(StateId s) { SetState(s); } + virtual bool Find_(Label label) { return Find(label); } + virtual bool Done_() const { return Done(); } + virtual const Arc& Value_() const { return Value(); } + virtual void Next_() { Next(); } + + M *matcher_; + MatchType match_type_; // Type of match requested + Label phi_label_; // Label that represents the phi transition + bool rewrite_both_; // Rewrite both sides when both are 'phi_label_' + bool has_phi_; // Are there possibly phis at the current state? + Label phi_match_; // Current label that matches phi loop + mutable Arc phi_arc_; // Arc to return + StateId state_; // State where looking for matches + Weight phi_weight_; // Product of the weights of phi transitions taken + bool phi_loop_; // When true, phi self-loop are allowed and treated + // as rho (required for Aho-Corasick) + bool error_; // Error encountered + + void operator=(const PhiMatcher<M> &); // disallow +}; + +template <class M> inline +bool PhiMatcher<M>::Find(Label match_label) { + if (match_label == phi_label_ && phi_label_ != kNoLabel && phi_label_ != 0) { + FSTERROR() << "PhiMatcher::Find: bad label (phi): " << phi_label_; + error_ = true; + return false; + } + matcher_->SetState(state_); + phi_match_ = kNoLabel; + phi_weight_ = Weight::One(); + if (phi_label_ == 0) { // When 'phi_label_ == 0', + if (match_label == kNoLabel) // there are no more true epsilon arcs, + return false; + if (match_label == 0) { // but virtual eps loop need to be returned + if (!matcher_->Find(kNoLabel)) { + return matcher_->Find(0); + } else { + phi_match_ = 0; + return true; + } + } + } + if (!has_phi_ || match_label == 0 || match_label == kNoLabel) + return matcher_->Find(match_label); + StateId state = state_; + while (!matcher_->Find(match_label)) { + // Look for phi transition (if phi_label_ == 0, we need to look + // for -1 to avoid getting the virtual self-loop) + if (!matcher_->Find(phi_label_ == 0 ? -1 : phi_label_)) + return false; + if (phi_loop_ && matcher_->Value().nextstate == state) { + phi_match_ = match_label; + return true; + } + phi_weight_ = Times(phi_weight_, matcher_->Value().weight); + state = matcher_->Value().nextstate; + matcher_->Next(); + if (!matcher_->Done()) { + FSTERROR() << "PhiMatcher: phi non-determinism not supported"; + error_ = true; + } + matcher_->SetState(state); + } + return true; +} + +template <class M> inline +uint64 PhiMatcher<M>::Properties(uint64 inprops) const { + uint64 outprops = matcher_->Properties(inprops); + if (error_) outprops |= kError; + + if (match_type_ == MATCH_NONE) { + return outprops; + } else if (match_type_ == MATCH_INPUT) { + if (phi_label_ == 0) { + outprops &= ~kEpsilons | ~kIEpsilons | ~kOEpsilons; + outprops |= kNoEpsilons | kNoIEpsilons; + } + if (rewrite_both_) { + return outprops & ~(kODeterministic | kNonODeterministic | kString | + kILabelSorted | kNotILabelSorted | + kOLabelSorted | kNotOLabelSorted); + } else { + return outprops & ~(kODeterministic | kAcceptor | kString | + kILabelSorted | kNotILabelSorted | + kOLabelSorted | kNotOLabelSorted); + } + } else if (match_type_ == MATCH_OUTPUT) { + if (phi_label_ == 0) { + outprops &= ~kEpsilons | ~kIEpsilons | ~kOEpsilons; + outprops |= kNoEpsilons | kNoOEpsilons; + } + if (rewrite_both_) { + return outprops & ~(kIDeterministic | kNonIDeterministic | kString | + kILabelSorted | kNotILabelSorted | + kOLabelSorted | kNotOLabelSorted); + } else { + return outprops & ~(kIDeterministic | kAcceptor | kString | + kILabelSorted | kNotILabelSorted | + kOLabelSorted | kNotOLabelSorted); + } + } else { + // Shouldn't ever get here. + FSTERROR() << "PhiMatcher:: bad match type: " << match_type_; + return 0; + } +} + + +// +// MULTI-EPS MATCHER FLAGS +// + +// Return multi-epsilon arcs for Find(kNoLabel). +const uint32 kMultiEpsList = 0x00000001; + +// Return a kNolabel loop for Find(multi_eps). +const uint32 kMultiEpsLoop = 0x00000002; + +// MultiEpsMatcher: allows treating multiple non-0 labels as +// non-consuming labels in addition to 0 that is always +// non-consuming. Precise behavior controlled by 'flags' argument. By +// default, the underlying matcher is constructed by +// MultiEpsMatcher. The user can instead pass in this object; in that +// case, MultiEpsMatcher takes its ownership iff 'own_matcher' is +// true. +template <class M> +class MultiEpsMatcher { + public: + typedef typename M::FST FST; + typedef typename M::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + MultiEpsMatcher(const FST &fst, MatchType match_type, + uint32 flags = (kMultiEpsLoop | kMultiEpsList), + M *matcher = 0, bool own_matcher = true) + : matcher_(matcher ? matcher : new M(fst, match_type)), + flags_(flags), + own_matcher_(matcher ? own_matcher : true) { + if (match_type == MATCH_INPUT) { + loop_.ilabel = kNoLabel; + loop_.olabel = 0; + } else { + loop_.ilabel = 0; + loop_.olabel = kNoLabel; + } + loop_.weight = Weight::One(); + loop_.nextstate = kNoStateId; + } + + MultiEpsMatcher(const MultiEpsMatcher<M> &matcher, bool safe = false) + : matcher_(new M(*matcher.matcher_, safe)), + flags_(matcher.flags_), + own_matcher_(true), + multi_eps_labels_(matcher.multi_eps_labels_), + loop_(matcher.loop_) { + loop_.nextstate = kNoStateId; + } + + ~MultiEpsMatcher() { + if (own_matcher_) + delete matcher_; + } + + MultiEpsMatcher<M> *Copy(bool safe = false) const { + return new MultiEpsMatcher<M>(*this, safe); + } + + MatchType Type(bool test) const { return matcher_->Type(test); } + + void SetState(StateId s) { + matcher_->SetState(s); + loop_.nextstate = s; + } + + bool Find(Label match_label); + + bool Done() const { + return done_; + } + + const Arc& Value() const { + return current_loop_ ? loop_ : matcher_->Value(); + } + + void Next() { + if (!current_loop_) { + matcher_->Next(); + done_ = matcher_->Done(); + if (done_ && multi_eps_iter_ != multi_eps_labels_.End()) { + ++multi_eps_iter_; + while ((multi_eps_iter_ != multi_eps_labels_.End()) && + !matcher_->Find(*multi_eps_iter_)) + ++multi_eps_iter_; + if (multi_eps_iter_ != multi_eps_labels_.End()) + done_ = false; + else + done_ = !matcher_->Find(kNoLabel); + + } + } else { + done_ = true; + } + } + + const FST &GetFst() const { return matcher_->GetFst(); } + + uint64 Properties(uint64 props) const { return matcher_->Properties(props); } + + uint32 Flags() const { return matcher_->Flags(); } + + void AddMultiEpsLabel(Label label) { + if (label == 0) { + FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0"; + } else { + multi_eps_labels_.Insert(label); + } + } + + void RemoveMultiEpsLabel(Label label) { + if (label == 0) { + FSTERROR() << "MultiEpsMatcher: Bad multi-eps label: 0"; + } else { + multi_eps_labels_.Erase(label); + } + } + + void ClearMultiEpsLabels() { + multi_eps_labels_.Clear(); + } + +private: + M *matcher_; + uint32 flags_; + bool own_matcher_; // Does this class delete the matcher? + + // Multi-eps label set + CompactSet<Label, kNoLabel> multi_eps_labels_; + typename CompactSet<Label, kNoLabel>::const_iterator multi_eps_iter_; + + bool current_loop_; // Current arc is the implicit loop + mutable Arc loop_; // For non-consuming symbols + bool done_; // Matching done + + void operator=(const MultiEpsMatcher<M> &); // Disallow +}; + +template <class M> inline +bool MultiEpsMatcher<M>::Find(Label match_label) { + multi_eps_iter_ = multi_eps_labels_.End(); + current_loop_ = false; + bool ret; + if (match_label == 0) { + ret = matcher_->Find(0); + } else if (match_label == kNoLabel) { + if (flags_ & kMultiEpsList) { + // return all non-consuming arcs (incl. epsilon) + multi_eps_iter_ = multi_eps_labels_.Begin(); + while ((multi_eps_iter_ != multi_eps_labels_.End()) && + !matcher_->Find(*multi_eps_iter_)) + ++multi_eps_iter_; + if (multi_eps_iter_ != multi_eps_labels_.End()) + ret = true; + else + ret = matcher_->Find(kNoLabel); + } else { + // return all epsilon arcs + ret = matcher_->Find(kNoLabel); + } + } else if ((flags_ & kMultiEpsLoop) && + multi_eps_labels_.Find(match_label) != multi_eps_labels_.End()) { + // return 'implicit' loop + current_loop_ = true; + ret = true; + } else { + ret = matcher_->Find(match_label); + } + done_ = !ret; + return ret; +} + + +// Generic matcher, templated on the FST definition +// - a wrapper around pointer to specific one. +// Here is a typical use: \code +// Matcher<StdFst> matcher(fst, MATCH_INPUT); +// matcher.SetState(state); +// if (matcher.Find(label)) +// for (; !matcher.Done(); matcher.Next()) { +// StdArc &arc = matcher.Value(); +// ... +// } \endcode +template <class F> +class Matcher { + public: + typedef F FST; + typedef typename F::Arc Arc; + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + Matcher(const F &fst, MatchType match_type) { + base_ = fst.InitMatcher(match_type); + if (!base_) + base_ = new SortedMatcher<F>(fst, match_type); + } + + Matcher(const Matcher<F> &matcher, bool safe = false) { + base_ = matcher.base_->Copy(safe); + } + + // Takes ownership of the provided matcher + Matcher(MatcherBase<Arc>* base_matcher) { base_ = base_matcher; } + + ~Matcher() { delete base_; } + + Matcher<F> *Copy(bool safe = false) const { + return new Matcher<F>(*this, safe); + } + + MatchType Type(bool test) const { return base_->Type(test); } + void SetState(StateId s) { base_->SetState(s); } + bool Find(Label label) { return base_->Find(label); } + bool Done() const { return base_->Done(); } + const Arc& Value() const { return base_->Value(); } + void Next() { base_->Next(); } + const F &GetFst() const { return static_cast<const F &>(base_->GetFst()); } + uint64 Properties(uint64 props) const { return base_->Properties(props); } + uint32 Flags() const { return base_->Flags() & kMatcherFlags; } + + private: + MatcherBase<Arc> *base_; + + void operator=(const Matcher<Arc> &); // disallow +}; + +} // namespace fst + + + +#endif // FST_LIB_MATCHER_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/minimize.h b/kaldi_io/src/tools/openfst/include/fst/minimize.h new file mode 100644 index 0000000..6e9dd3d --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/minimize.h @@ -0,0 +1,591 @@ +// minimize.h +// minimize.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Johan Schalkwyk) +// +// \file Functions and classes to minimize a finite state acceptor +// + +#ifndef FST_LIB_MINIMIZE_H__ +#define FST_LIB_MINIMIZE_H__ + +#include <cmath> + +#include <algorithm> +#include <map> +#include <queue> +#include <vector> +using std::vector; + +#include <fst/arcsort.h> +#include <fst/connect.h> +#include <fst/dfs-visit.h> +#include <fst/encode.h> +#include <fst/factor-weight.h> +#include <fst/fst.h> +#include <fst/mutable-fst.h> +#include <fst/partition.h> +#include <fst/push.h> +#include <fst/queue.h> +#include <fst/reverse.h> +#include <fst/state-map.h> + + +namespace fst { + +// comparator for creating partition based on sorting on +// - states +// - final weight +// - out degree, +// - (input label, output label, weight, destination_block) +template <class A> +class StateComparator { + public: + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + static const uint32 kCompareFinal = 0x00000001; + static const uint32 kCompareOutDegree = 0x00000002; + static const uint32 kCompareArcs = 0x00000004; + static const uint32 kCompareAll = 0x00000007; + + StateComparator(const Fst<A>& fst, + const Partition<typename A::StateId>& partition, + uint32 flags = kCompareAll) + : fst_(fst), partition_(partition), flags_(flags) {} + + // compare state x with state y based on sort criteria + bool operator()(const StateId x, const StateId y) const { + // check for final state equivalence + if (flags_ & kCompareFinal) { + const size_t xfinal = fst_.Final(x).Hash(); + const size_t yfinal = fst_.Final(y).Hash(); + if (xfinal < yfinal) return true; + else if (xfinal > yfinal) return false; + } + + if (flags_ & kCompareOutDegree) { + // check for # arcs + if (fst_.NumArcs(x) < fst_.NumArcs(y)) return true; + if (fst_.NumArcs(x) > fst_.NumArcs(y)) return false; + + if (flags_ & kCompareArcs) { + // # arcs are equal, check for arc match + for (ArcIterator<Fst<A> > aiter1(fst_, x), aiter2(fst_, y); + !aiter1.Done() && !aiter2.Done(); aiter1.Next(), aiter2.Next()) { + const A& arc1 = aiter1.Value(); + const A& arc2 = aiter2.Value(); + if (arc1.ilabel < arc2.ilabel) return true; + if (arc1.ilabel > arc2.ilabel) return false; + + if (partition_.class_id(arc1.nextstate) < + partition_.class_id(arc2.nextstate)) return true; + if (partition_.class_id(arc1.nextstate) > + partition_.class_id(arc2.nextstate)) return false; + } + } + } + + return false; + } + + private: + const Fst<A>& fst_; + const Partition<typename A::StateId>& partition_; + const uint32 flags_; +}; + +template <class A> const uint32 StateComparator<A>::kCompareFinal; +template <class A> const uint32 StateComparator<A>::kCompareOutDegree; +template <class A> const uint32 StateComparator<A>::kCompareArcs; +template <class A> const uint32 StateComparator<A>::kCompareAll; + + +// Computes equivalence classes for cyclic Fsts. For cyclic minimization +// we use the classic HopCroft minimization algorithm, which is of +// +// O(E)log(N), +// +// where E is the number of edges in the machine and N is number of states. +// +// The following paper describes the original algorithm +// An N Log N algorithm for minimizing states in a finite automaton +// by John HopCroft, January 1971 +// +template <class A, class Queue> +class CyclicMinimizer { + public: + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::StateId ClassId; + typedef typename A::Weight Weight; + typedef ReverseArc<A> RevA; + + CyclicMinimizer(const ExpandedFst<A>& fst): + // tell the Partition data-member to expect multiple repeated + // calls to SplitOn with the same element if we are non-deterministic. + P_(fst.Properties(kIDeterministic, true) == 0) { + if(fst.Properties(kIDeterministic, true) == 0) + CHECK(Weight::Properties() & kIdempotent); // this minimization + // algorithm for non-deterministic FSTs can only work with idempotent + // semirings. + Initialize(fst); + Compute(fst); + } + + ~CyclicMinimizer() { + delete aiter_queue_; + } + + const Partition<StateId>& partition() const { + return P_; + } + + // helper classes + private: + typedef ArcIterator<Fst<RevA> > ArcIter; + class ArcIterCompare { + public: + ArcIterCompare(const Partition<StateId>& partition) + : partition_(partition) {} + + ArcIterCompare(const ArcIterCompare& comp) + : partition_(comp.partition_) {} + + // compare two iterators based on there input labels, and proto state + // (partition class Ids) + bool operator()(const ArcIter* x, const ArcIter* y) const { + const RevA& xarc = x->Value(); + const RevA& yarc = y->Value(); + return (xarc.ilabel > yarc.ilabel); + } + + private: + const Partition<StateId>& partition_; + }; + + typedef priority_queue<ArcIter*, vector<ArcIter*>, ArcIterCompare> + ArcIterQueue; + + // helper methods + private: + // prepartitions the space into equivalence classes with + // same final weight + // same # arcs per state + // same outgoing arcs + void PrePartition(const Fst<A>& fst) { + VLOG(5) << "PrePartition"; + + typedef map<StateId, StateId, StateComparator<A> > EquivalenceMap; + StateComparator<A> comp(fst, P_, StateComparator<A>::kCompareFinal); + EquivalenceMap equiv_map(comp); + + StateIterator<Fst<A> > siter(fst); + StateId class_id = P_.AddClass(); + P_.Add(siter.Value(), class_id); + equiv_map[siter.Value()] = class_id; + L_.Enqueue(class_id); + for (siter.Next(); !siter.Done(); siter.Next()) { + StateId s = siter.Value(); + typename EquivalenceMap::const_iterator it = equiv_map.find(s); + if (it == equiv_map.end()) { + class_id = P_.AddClass(); + P_.Add(s, class_id); + equiv_map[s] = class_id; + L_.Enqueue(class_id); + } else { + P_.Add(s, it->second); + equiv_map[s] = it->second; + } + } + + VLOG(5) << "Initial Partition: " << P_.num_classes(); + } + + // - Create inverse transition Tr_ = rev(fst) + // - loop over states in fst and split on final, creating two blocks + // in the partition corresponding to final, non-final + void Initialize(const Fst<A>& fst) { + // construct Tr + Reverse(fst, &Tr_); + ILabelCompare<RevA> ilabel_comp; + ArcSort(&Tr_, ilabel_comp); + + // initial split (F, S - F) + P_.Initialize(Tr_.NumStates() - 1); + + // prep partition + PrePartition(fst); + + // allocate arc iterator queue + ArcIterCompare comp(P_); + aiter_queue_ = new ArcIterQueue(comp); + } + + // partition all classes with destination C + void Split(ClassId C) { + // Prep priority queue. Open arc iterator for each state in C, and + // insert into priority queue. + for (PartitionIterator<StateId> siter(P_, C); + !siter.Done(); siter.Next()) { + StateId s = siter.Value(); + if (Tr_.NumArcs(s + 1)) + aiter_queue_->push(new ArcIterator<Fst<RevA> >(Tr_, s + 1)); + } + + // Now pop arc iterator from queue, split entering equivalence class + // re-insert updated iterator into queue. + Label prev_label = -1; + while (!aiter_queue_->empty()) { + ArcIterator<Fst<RevA> >* aiter = aiter_queue_->top(); + aiter_queue_->pop(); + if (aiter->Done()) { + delete aiter; + continue; + } + + const RevA& arc = aiter->Value(); + StateId from_state = aiter->Value().nextstate - 1; + Label from_label = arc.ilabel; + if (prev_label != from_label) + P_.FinalizeSplit(&L_); + + StateId from_class = P_.class_id(from_state); + if (P_.class_size(from_class) > 1) + P_.SplitOn(from_state); + + prev_label = from_label; + aiter->Next(); + if (aiter->Done()) + delete aiter; + else + aiter_queue_->push(aiter); + } + P_.FinalizeSplit(&L_); + } + + // Main loop for hopcroft minimization. + void Compute(const Fst<A>& fst) { + // process active classes (FIFO, or FILO) + while (!L_.Empty()) { + ClassId C = L_.Head(); + L_.Dequeue(); + + // split on C, all labels in C + Split(C); + } + } + + // helper data + private: + // Partioning of states into equivalence classes + Partition<StateId> P_; + + // L = set of active classes to be processed in partition P + Queue L_; + + // reverse transition function + VectorFst<RevA> Tr_; + + // Priority queue of open arc iterators for all states in the 'splitter' + // equivalence class + ArcIterQueue* aiter_queue_; +}; + + +// Computes equivalence classes for acyclic Fsts. The implementation details +// for this algorithms is documented by the following paper. +// +// Minimization of acyclic deterministic automata in linear time +// Dominque Revuz +// +// Complexity O(|E|) +// +template <class A> +class AcyclicMinimizer { + public: + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::StateId ClassId; + typedef typename A::Weight Weight; + + AcyclicMinimizer(const ExpandedFst<A>& fst): + // tell the Partition data-member to expect multiple repeated + // calls to SplitOn with the same element if we are non-deterministic. + partition_(fst.Properties(kIDeterministic, true) == 0) { + if(fst.Properties(kIDeterministic, true) == 0) + CHECK(Weight::Properties() & kIdempotent); // minimization for + // non-deterministic FSTs can only work with idempotent semirings. + Initialize(fst); + Refine(fst); + } + + const Partition<StateId>& partition() { + return partition_; + } + + // helper classes + private: + // DFS visitor to compute the height (distance) to final state. + class HeightVisitor { + public: + HeightVisitor() : max_height_(0), num_states_(0) { } + + // invoked before dfs visit + void InitVisit(const Fst<A>& fst) {} + + // invoked when state is discovered (2nd arg is DFS tree root) + bool InitState(StateId s, StateId root) { + // extend height array and initialize height (distance) to 0 + for (size_t i = height_.size(); i <= s; ++i) + height_.push_back(-1); + + if (s >= num_states_) num_states_ = s + 1; + return true; + } + + // invoked when tree arc examined (to undiscoverted state) + bool TreeArc(StateId s, const A& arc) { + return true; + } + + // invoked when back arc examined (to unfinished state) + bool BackArc(StateId s, const A& arc) { + return true; + } + + // invoked when forward or cross arc examined (to finished state) + bool ForwardOrCrossArc(StateId s, const A& arc) { + if (height_[arc.nextstate] + 1 > height_[s]) + height_[s] = height_[arc.nextstate] + 1; + return true; + } + + // invoked when state finished (parent is kNoStateId for tree root) + void FinishState(StateId s, StateId parent, const A* parent_arc) { + if (height_[s] == -1) height_[s] = 0; + StateId h = height_[s] + 1; + if (parent >= 0) { + if (h > height_[parent]) height_[parent] = h; + if (h > max_height_) max_height_ = h; + } + } + + // invoked after DFS visit + void FinishVisit() {} + + size_t max_height() const { return max_height_; } + + const vector<StateId>& height() const { return height_; } + + const size_t num_states() const { return num_states_; } + + private: + vector<StateId> height_; + size_t max_height_; + size_t num_states_; + }; + + // helper methods + private: + // cluster states according to height (distance to final state) + void Initialize(const Fst<A>& fst) { + // compute height (distance to final state) + HeightVisitor hvisitor; + DfsVisit(fst, &hvisitor); + + // create initial partition based on height + partition_.Initialize(hvisitor.num_states()); + partition_.AllocateClasses(hvisitor.max_height() + 1); + const vector<StateId>& hstates = hvisitor.height(); + for (size_t s = 0; s < hstates.size(); ++s) + partition_.Add(s, hstates[s]); + } + + // refine states based on arc sort (out degree, arc equivalence) + void Refine(const Fst<A>& fst) { + typedef map<StateId, StateId, StateComparator<A> > EquivalenceMap; + StateComparator<A> comp(fst, partition_); + + // start with tail (height = 0) + size_t height = partition_.num_classes(); + for (size_t h = 0; h < height; ++h) { + EquivalenceMap equiv_classes(comp); + + // sort states within equivalence class + PartitionIterator<StateId> siter(partition_, h); + equiv_classes[siter.Value()] = h; + for (siter.Next(); !siter.Done(); siter.Next()) { + const StateId s = siter.Value(); + typename EquivalenceMap::const_iterator it = equiv_classes.find(s); + if (it == equiv_classes.end()) + equiv_classes[s] = partition_.AddClass(); + else + equiv_classes[s] = it->second; + } + + // create refined partition + for (siter.Reset(); !siter.Done();) { + const StateId s = siter.Value(); + const StateId old_class = partition_.class_id(s); + const StateId new_class = equiv_classes[s]; + + // a move operation can invalidate the iterator, so + // we first update the iterator to the next element + // before we move the current element out of the list + siter.Next(); + if (old_class != new_class) + partition_.Move(s, new_class); + } + } + } + + private: + Partition<StateId> partition_; +}; + + +// Given a partition and a mutable fst, merge states of Fst inplace +// (i.e. destructively). Merging works by taking the first state in +// a class of the partition to be the representative state for the class. +// Each arc is then reconnected to this state. All states in the class +// are merged by adding there arcs to the representative state. +template <class A> +void MergeStates( + const Partition<typename A::StateId>& partition, MutableFst<A>* fst) { + typedef typename A::StateId StateId; + + vector<StateId> state_map(partition.num_classes()); + for (size_t i = 0; i < partition.num_classes(); ++i) { + PartitionIterator<StateId> siter(partition, i); + state_map[i] = siter.Value(); // first state in partition; + } + + // relabel destination states + for (size_t c = 0; c < partition.num_classes(); ++c) { + for (PartitionIterator<StateId> siter(partition, c); + !siter.Done(); siter.Next()) { + StateId s = siter.Value(); + for (MutableArcIterator<MutableFst<A> > aiter(fst, s); + !aiter.Done(); aiter.Next()) { + A arc = aiter.Value(); + arc.nextstate = state_map[partition.class_id(arc.nextstate)]; + + if (s == state_map[c]) // first state just set destination + aiter.SetValue(arc); + else + fst->AddArc(state_map[c], arc); + } + } + } + fst->SetStart(state_map[partition.class_id(fst->Start())]); + + Connect(fst); +} + +template <class A> +void AcceptorMinimize(MutableFst<A>* fst) { + typedef typename A::StateId StateId; + if (!(fst->Properties(kAcceptor | kUnweighted, true))) { + FSTERROR() << "FST is not an unweighted acceptor"; + fst->SetProperties(kError, kError); + return; + } + + // connect fst before minimization, handles disconnected states + Connect(fst); + if (fst->NumStates() == 0) return; + + if (fst->Properties(kAcyclic, true)) { + // Acyclic minimization (revuz) + VLOG(2) << "Acyclic Minimization"; + ArcSort(fst, ILabelCompare<A>()); + AcyclicMinimizer<A> minimizer(*fst); + MergeStates(minimizer.partition(), fst); + + } else { + // Cyclic minimizaton (hopcroft) + VLOG(2) << "Cyclic Minimization"; + CyclicMinimizer<A, LifoQueue<StateId> > minimizer(*fst); + MergeStates(minimizer.partition(), fst); + } + + // Merge in appropriate semiring + ArcUniqueMapper<A> mapper(*fst); + StateMap(fst, mapper); +} + + +// In place minimization of deterministic weighted automata and transducers. +// For transducers, then the 'sfst' argument is not null, the algorithm +// produces a compact factorization of the minimal transducer. +// +// In the acyclic case, we use an algorithm from Dominique Revuz that +// is linear in the number of arcs (edges) in the machine. +// Complexity = O(E) +// +// In the cyclic case, we use the classical hopcroft minimization. +// Complexity = O(|E|log(|N|) +// +template <class A> +void Minimize(MutableFst<A>* fst, + MutableFst<A>* sfst = 0, + float delta = kDelta) { + uint64 props = fst->Properties(kAcceptor | kWeighted | kUnweighted, true); + + if (!(props & kAcceptor)) { // weighted transducer + VectorFst< GallicArc<A, STRING_LEFT> > gfst; + ArcMap(*fst, &gfst, ToGallicMapper<A, STRING_LEFT>()); + fst->DeleteStates(); + gfst.SetProperties(kAcceptor, kAcceptor); + Push(&gfst, REWEIGHT_TO_INITIAL, delta); + ArcMap(&gfst, QuantizeMapper< GallicArc<A, STRING_LEFT> >(delta)); + EncodeMapper< GallicArc<A, STRING_LEFT> > + encoder(kEncodeLabels | kEncodeWeights, ENCODE); + Encode(&gfst, &encoder); + AcceptorMinimize(&gfst); + Decode(&gfst, encoder); + + if (sfst == 0) { + FactorWeightFst< GallicArc<A, STRING_LEFT>, + GallicFactor<typename A::Label, + typename A::Weight, STRING_LEFT> > fwfst(gfst); + SymbolTable *osyms = fst->OutputSymbols() ? + fst->OutputSymbols()->Copy() : 0; + ArcMap(fwfst, fst, FromGallicMapper<A, STRING_LEFT>()); + fst->SetOutputSymbols(osyms); + delete osyms; + } else { + sfst->SetOutputSymbols(fst->OutputSymbols()); + GallicToNewSymbolsMapper<A, STRING_LEFT> mapper(sfst); + ArcMap(gfst, fst, &mapper); + fst->SetOutputSymbols(sfst->InputSymbols()); + } + } else if (props & kWeighted) { // weighted acceptor + Push(fst, REWEIGHT_TO_INITIAL, delta); + ArcMap(fst, QuantizeMapper<A>(delta)); + EncodeMapper<A> encoder(kEncodeLabels | kEncodeWeights, ENCODE); + Encode(fst, &encoder); + AcceptorMinimize(fst); + Decode(fst, encoder); + } else { // unweighted acceptor + AcceptorMinimize(fst); + } +} + +} // namespace fst + +#endif // FST_LIB_MINIMIZE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/mutable-fst.h b/kaldi_io/src/tools/openfst/include/fst/mutable-fst.h new file mode 100644 index 0000000..09eb237 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/mutable-fst.h @@ -0,0 +1,378 @@ +// mutable-fst.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Expanded FST augmented with mutators - interface class definition +// and mutable arc iterator interface. +// + +#ifndef FST_LIB_MUTABLE_FST_H__ +#define FST_LIB_MUTABLE_FST_H__ + +#include <stddef.h> +#include <sys/types.h> +#include <string> +#include <vector> +using std::vector; + +#include <fst/expanded-fst.h> + + +namespace fst { + +template <class A> class MutableArcIteratorData; + +// An expanded FST plus mutators (use MutableArcIterator to modify arcs). +template <class A> +class MutableFst : public ExpandedFst<A> { + public: + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + + virtual MutableFst<A> &operator=(const Fst<A> &fst) = 0; + + MutableFst<A> &operator=(const MutableFst<A> &fst) { + return operator=(static_cast<const Fst<A> &>(fst)); + } + + virtual void SetStart(StateId) = 0; // Set the initial state + virtual void SetFinal(StateId, Weight) = 0; // Set a state's final weight + virtual void SetProperties(uint64 props, + uint64 mask) = 0; // Set property bits wrt mask + + virtual StateId AddState() = 0; // Add a state, return its ID + virtual void AddArc(StateId, const A &arc) = 0; // Add an arc to state + + virtual void DeleteStates(const vector<StateId>&) = 0; // Delete some states + virtual void DeleteStates() = 0; // Delete all states + virtual void DeleteArcs(StateId, size_t n) = 0; // Delete some arcs at state + virtual void DeleteArcs(StateId) = 0; // Delete all arcs at state + + virtual void ReserveStates(StateId n) { } // Optional, best effort only. + virtual void ReserveArcs(StateId s, size_t n) { } // Optional, Best effort. + + // Return input label symbol table; return NULL if not specified + virtual const SymbolTable* InputSymbols() const = 0; + // Return output label symbol table; return NULL if not specified + virtual const SymbolTable* OutputSymbols() const = 0; + + // Return input label symbol table; return NULL if not specified + virtual SymbolTable* MutableInputSymbols() = 0; + // Return output label symbol table; return NULL if not specified + virtual SymbolTable* MutableOutputSymbols() = 0; + + // Set input label symbol table; NULL signifies not unspecified + virtual void SetInputSymbols(const SymbolTable* isyms) = 0; + // Set output label symbol table; NULL signifies not unspecified + virtual void SetOutputSymbols(const SymbolTable* osyms) = 0; + + // Get a copy of this MutableFst. See Fst<>::Copy() for further doc. + virtual MutableFst<A> *Copy(bool safe = false) const = 0; + + // Read an MutableFst from an input stream; return NULL on error. + static MutableFst<A> *Read(istream &strm, const FstReadOptions &opts) { + FstReadOptions ropts(opts); + FstHeader hdr; + if (ropts.header) + hdr = *opts.header; + else { + if (!hdr.Read(strm, opts.source)) + return 0; + ropts.header = &hdr; + } + if (!(hdr.Properties() & kMutable)) { + LOG(ERROR) << "MutableFst::Read: Not an MutableFst: " << ropts.source; + return 0; + } + FstRegister<A> *registr = FstRegister<A>::GetRegister(); + const typename FstRegister<A>::Reader reader = + registr->GetReader(hdr.FstType()); + if (!reader) { + LOG(ERROR) << "MutableFst::Read: Unknown FST type \"" << hdr.FstType() + << "\" (arc type = \"" << A::Type() + << "\"): " << ropts.source; + return 0; + } + Fst<A> *fst = reader(strm, ropts); + if (!fst) return 0; + return static_cast<MutableFst<A> *>(fst); + } + + // Read a MutableFst from a file; return NULL on error. + // Empty filename reads from standard input. If 'convert' is true, + // convert to a mutable FST of type 'convert_type' if file is + // a non-mutable FST. + static MutableFst<A> *Read(const string &filename, bool convert = false, + const string &convert_type = "vector") { + if (convert == false) { + if (!filename.empty()) { + ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); + if (!strm) { + LOG(ERROR) << "MutableFst::Read: Can't open file: " << filename; + return 0; + } + return Read(strm, FstReadOptions(filename)); + } else { + return Read(cin, FstReadOptions("standard input")); + } + } else { // Converts to 'convert_type' if not mutable. + Fst<A> *ifst = Fst<A>::Read(filename); + if (!ifst) return 0; + if (ifst->Properties(kMutable, false)) { + return static_cast<MutableFst *>(ifst); + } else { + Fst<A> *ofst = Convert(*ifst, convert_type); + delete ifst; + if (!ofst) return 0; + if (!ofst->Properties(kMutable, false)) + LOG(ERROR) << "MutableFst: bad convert type: " << convert_type; + return static_cast<MutableFst *>(ofst); + } + } + } + + // For generic mutuble arc iterator construction; not normally called + // directly by users. + virtual void InitMutableArcIterator(StateId s, + MutableArcIteratorData<A> *) = 0; +}; + +// Mutable arc iterator interface, templated on the Arc definition; used +// for mutable Arc iterator specializations that are returned by +// the InitMutableArcIterator MutableFst method. +template <class A> +class MutableArcIteratorBase : public ArcIteratorBase<A> { + public: + typedef A Arc; + + void SetValue(const A &arc) { SetValue_(arc); } // Set current arc's content + + private: + virtual void SetValue_(const A &arc) = 0; +}; + +template <class A> +struct MutableArcIteratorData { + MutableArcIteratorBase<A> *base; // Specific iterator +}; + +// Generic mutable arc iterator, templated on the FST definition +// - a wrapper around pointer to specific one. +// Here is a typical use: \code +// for (MutableArcIterator<StdFst> aiter(&fst, s)); +// !aiter.Done(); +// aiter.Next()) { +// StdArc arc = aiter.Value(); +// arc.ilabel = 7; +// aiter.SetValue(arc); +// ... +// } \endcode +// This version requires function calls. +template <class F> +class MutableArcIterator { + public: + typedef F FST; + typedef typename F::Arc Arc; + typedef typename Arc::StateId StateId; + + MutableArcIterator(F *fst, StateId s) { + fst->InitMutableArcIterator(s, &data_); + } + ~MutableArcIterator() { delete data_.base; } + + bool Done() const { return data_.base->Done(); } + const Arc& Value() const { return data_.base->Value(); } + void Next() { data_.base->Next(); } + size_t Position() const { return data_.base->Position(); } + void Reset() { data_.base->Reset(); } + void Seek(size_t a) { data_.base->Seek(a); } + void SetValue(const Arc &a) { data_.base->SetValue(a); } + uint32 Flags() const { return data_.base->Flags(); } + void SetFlags(uint32 f, uint32 m) { + return data_.base->SetFlags(f, m); + } + + private: + MutableArcIteratorData<Arc> data_; + DISALLOW_COPY_AND_ASSIGN(MutableArcIterator); +}; + + +namespace internal { + +// MutableFst<A> case - abstract methods. +template <class A> inline +typename A::Weight Final(const MutableFst<A> &fst, typename A::StateId s) { + return fst.Final(s); +} + +template <class A> inline +ssize_t NumArcs(const MutableFst<A> &fst, typename A::StateId s) { + return fst.NumArcs(s); +} + +template <class A> inline +ssize_t NumInputEpsilons(const MutableFst<A> &fst, typename A::StateId s) { + return fst.NumInputEpsilons(s); +} + +template <class A> inline +ssize_t NumOutputEpsilons(const MutableFst<A> &fst, typename A::StateId s) { + return fst.NumOutputEpsilons(s); +} + +} // namespace internal + + +// A useful alias when using StdArc. +typedef MutableFst<StdArc> StdMutableFst; + + +// This is a helper class template useful for attaching a MutableFst +// interface to its implementation, handling reference counting and +// copy-on-write. +template <class I, class F = MutableFst<typename I::Arc> > +class ImplToMutableFst : public ImplToExpandedFst<I, F> { + public: + typedef typename I::Arc Arc; + typedef typename Arc::Weight Weight; + typedef typename Arc::StateId StateId; + + using ImplToFst<I, F>::GetImpl; + using ImplToFst<I, F>::SetImpl; + + virtual void SetStart(StateId s) { + MutateCheck(); + GetImpl()->SetStart(s); + } + + virtual void SetFinal(StateId s, Weight w) { + MutateCheck(); + GetImpl()->SetFinal(s, w); + } + + virtual void SetProperties(uint64 props, uint64 mask) { + // Can skip mutate check if extrinsic properties don't change, + // since it is then safe to update all (shallow) copies + uint64 exprops = kExtrinsicProperties & mask; + if (GetImpl()->Properties(exprops) != (props & exprops)) + MutateCheck(); + GetImpl()->SetProperties(props, mask); + } + + virtual StateId AddState() { + MutateCheck(); + return GetImpl()->AddState(); + } + + virtual void AddArc(StateId s, const Arc &arc) { + MutateCheck(); + GetImpl()->AddArc(s, arc); + } + + virtual void DeleteStates(const vector<StateId> &dstates) { + MutateCheck(); + GetImpl()->DeleteStates(dstates); + } + + virtual void DeleteStates() { + MutateCheck(); + GetImpl()->DeleteStates(); + } + + virtual void DeleteArcs(StateId s, size_t n) { + MutateCheck(); + GetImpl()->DeleteArcs(s, n); + } + + virtual void DeleteArcs(StateId s) { + MutateCheck(); + GetImpl()->DeleteArcs(s); + } + + virtual void ReserveStates(StateId s) { + MutateCheck(); + GetImpl()->ReserveStates(s); + } + + virtual void ReserveArcs(StateId s, size_t n) { + MutateCheck(); + GetImpl()->ReserveArcs(s, n); + } + + virtual const SymbolTable* InputSymbols() const { + return GetImpl()->InputSymbols(); + } + + virtual const SymbolTable* OutputSymbols() const { + return GetImpl()->OutputSymbols(); + } + + virtual SymbolTable* MutableInputSymbols() { + MutateCheck(); + return GetImpl()->InputSymbols(); + } + + virtual SymbolTable* MutableOutputSymbols() { + MutateCheck(); + return GetImpl()->OutputSymbols(); + } + + virtual void SetInputSymbols(const SymbolTable* isyms) { + MutateCheck(); + GetImpl()->SetInputSymbols(isyms); + } + + virtual void SetOutputSymbols(const SymbolTable* osyms) { + MutateCheck(); + GetImpl()->SetOutputSymbols(osyms); + } + + protected: + ImplToMutableFst() : ImplToExpandedFst<I, F>() {} + + ImplToMutableFst(I *impl) : ImplToExpandedFst<I, F>(impl) {} + + + ImplToMutableFst(const ImplToMutableFst<I, F> &fst) + : ImplToExpandedFst<I, F>(fst) {} + + ImplToMutableFst(const ImplToMutableFst<I, F> &fst, bool safe) + : ImplToExpandedFst<I, F>(fst, safe) {} + + void MutateCheck() { + // Copy on write + if (GetImpl()->RefCount() > 1) + SetImpl(new I(*this)); + } + + private: + // Disallow + ImplToMutableFst<I, F> &operator=(const ImplToMutableFst<I, F> &fst); + + ImplToMutableFst<I, F> &operator=(const Fst<Arc> &fst) { + FSTERROR() << "ImplToMutableFst: Assignment operator disallowed"; + GetImpl()->SetProperties(kError, kError); + return *this; + } +}; + + +} // namespace fst + +#endif // FST_LIB_MUTABLE_FST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/pair-weight.h b/kaldi_io/src/tools/openfst/include/fst/pair-weight.h new file mode 100644 index 0000000..7d8aa11 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/pair-weight.h @@ -0,0 +1,280 @@ +// pair-weight.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Masha Maria Shugrina) +// +// \file +// Pair weight templated base class for weight classes that +// contain two weights (e.g. Product, Lexicographic) + +#ifndef FST_LIB_PAIR_WEIGHT_H_ +#define FST_LIB_PAIR_WEIGHT_H_ + +#include <climits> +#include <stack> +#include <string> + +#include <fst/weight.h> + + +DECLARE_string(fst_weight_parentheses); +DECLARE_string(fst_weight_separator); + +namespace fst { + +template<class W1, class W2> class PairWeight; +template <class W1, class W2> +istream &operator>>(istream &strm, PairWeight<W1, W2> &w); + +template<class W1, class W2> +class PairWeight { + public: + friend istream &operator>><W1, W2>(istream&, PairWeight<W1, W2>&); + + typedef PairWeight<typename W1::ReverseWeight, + typename W2::ReverseWeight> + ReverseWeight; + + PairWeight() {} + + PairWeight(const PairWeight& w) : value1_(w.value1_), value2_(w.value2_) {} + + PairWeight(W1 w1, W2 w2) : value1_(w1), value2_(w2) {} + + static const PairWeight<W1, W2> &Zero() { + static const PairWeight<W1, W2> zero(W1::Zero(), W2::Zero()); + return zero; + } + + static const PairWeight<W1, W2> &One() { + static const PairWeight<W1, W2> one(W1::One(), W2::One()); + return one; + } + + static const PairWeight<W1, W2> &NoWeight() { + static const PairWeight<W1, W2> no_weight(W1::NoWeight(), W2::NoWeight()); + return no_weight; + } + + istream &Read(istream &strm) { + value1_.Read(strm); + return value2_.Read(strm); + } + + ostream &Write(ostream &strm) const { + value1_.Write(strm); + return value2_.Write(strm); + } + + PairWeight<W1, W2> &operator=(const PairWeight<W1, W2> &w) { + value1_ = w.Value1(); + value2_ = w.Value2(); + return *this; + } + + bool Member() const { return value1_.Member() && value2_.Member(); } + + size_t Hash() const { + size_t h1 = value1_.Hash(); + size_t h2 = value2_.Hash(); + const int lshift = 5; + const int rshift = CHAR_BIT * sizeof(size_t) - 5; + return h1 << lshift ^ h1 >> rshift ^ h2; + } + + PairWeight<W1, W2> Quantize(float delta = kDelta) const { + return PairWeight<W1, W2>(value1_.Quantize(delta), + value2_.Quantize(delta)); + } + + ReverseWeight Reverse() const { + return ReverseWeight(value1_.Reverse(), value2_.Reverse()); + } + + const W1& Value1() const { return value1_; } + + const W2& Value2() const { return value2_; } + + protected: + void SetValue1(const W1 &w) { value1_ = w; } + void SetValue2(const W2 &w) { value2_ = w; } + + // Reads PairWeight when there are not parentheses around pair terms + inline static istream &ReadNoParen( + istream &strm, PairWeight<W1, W2>& w, char separator) { + int c; + do { + c = strm.get(); + } while (isspace(c)); + + string s1; + while (c != separator) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s1 += c; + c = strm.get(); + } + istringstream strm1(s1); + W1 w1 = W1::Zero(); + strm1 >> w1; + + // read second element + W2 w2 = W2::Zero(); + strm >> w2; + + w = PairWeight<W1, W2>(w1, w2); + return strm; + } + + // Reads PairWeight when there are parentheses around pair terms + inline static istream &ReadWithParen( + istream &strm, PairWeight<W1, W2>& w, + char separator, char open_paren, char close_paren) { + int c; + do { + c = strm.get(); + } while (isspace(c)); + if (c != open_paren) { + FSTERROR() << " is fst_weight_parentheses flag set correcty? "; + strm.clear(std::ios::failbit); + return strm; + } + c = strm.get(); + + // read first element + stack<int> parens; + string s1; + while (c != separator || !parens.empty()) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s1 += c; + // if parens encountered before separator, they must be matched + if (c == open_paren) { + parens.push(1); + } else if (c == close_paren) { + // Fail for mismatched parens + if (parens.empty()) { + strm.clear(std::ios::failbit); + return strm; + } + parens.pop(); + } + c = strm.get(); + } + istringstream strm1(s1); + W1 w1 = W1::Zero(); + strm1 >> w1; + + // read second element + string s2; + c = strm.get(); + while (c != EOF) { + s2 += c; + c = strm.get(); + } + if (s2.empty() || (s2[s2.size() - 1] != close_paren)) { + FSTERROR() << " is fst_weight_parentheses flag set correcty? "; + strm.clear(std::ios::failbit); + return strm; + } + + s2.erase(s2.size() - 1, 1); + istringstream strm2(s2); + W2 w2 = W2::Zero(); + strm2 >> w2; + + w = PairWeight<W1, W2>(w1, w2); + return strm; + } + + private: + W1 value1_; + W2 value2_; + +}; + +template <class W1, class W2> +inline bool operator==(const PairWeight<W1, W2> &w, + const PairWeight<W1, W2> &v) { + return w.Value1() == v.Value1() && w.Value2() == v.Value2(); +} + +template <class W1, class W2> +inline bool operator!=(const PairWeight<W1, W2> &w1, + const PairWeight<W1, W2> &w2) { + return w1.Value1() != w2.Value1() || w1.Value2() != w2.Value2(); +} + + +template <class W1, class W2> +inline bool ApproxEqual(const PairWeight<W1, W2> &w1, + const PairWeight<W1, W2> &w2, + float delta = kDelta) { + return ApproxEqual(w1.Value1(), w2.Value1(), delta) && + ApproxEqual(w1.Value2(), w2.Value2(), delta); +} + +template <class W1, class W2> +inline ostream &operator<<(ostream &strm, const PairWeight<W1, W2> &w) { + if(FLAGS_fst_weight_separator.size() != 1) { + FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1"; + strm.clear(std::ios::badbit); + return strm; + } + char separator = FLAGS_fst_weight_separator[0]; + if (FLAGS_fst_weight_parentheses.empty()) + return strm << w.Value1() << separator << w.Value2(); + + if (FLAGS_fst_weight_parentheses.size() != 2) { + FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2"; + strm.clear(std::ios::badbit); + return strm; + } + char open_paren = FLAGS_fst_weight_parentheses[0]; + char close_paren = FLAGS_fst_weight_parentheses[1]; + return strm << open_paren << w.Value1() << separator + << w.Value2() << close_paren ; +} + +template <class W1, class W2> +inline istream &operator>>(istream &strm, PairWeight<W1, W2> &w) { + if(FLAGS_fst_weight_separator.size() != 1) { + FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1"; + strm.clear(std::ios::badbit); + return strm; + } + char separator = FLAGS_fst_weight_separator[0]; + bool read_parens = !FLAGS_fst_weight_parentheses.empty(); + if (read_parens) { + if (FLAGS_fst_weight_parentheses.size() != 2) { + FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2"; + strm.clear(std::ios::badbit); + return strm; + } + return PairWeight<W1, W2>::ReadWithParen( + strm, w, separator, FLAGS_fst_weight_parentheses[0], + FLAGS_fst_weight_parentheses[1]); + } else { + return PairWeight<W1, W2>::ReadNoParen(strm, w, separator); + } +} + +} // namespace fst + +#endif // FST_LIB_PAIR_WEIGHT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/partition.h b/kaldi_io/src/tools/openfst/include/fst/partition.h new file mode 100644 index 0000000..40b849a --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/partition.h @@ -0,0 +1,305 @@ +// partition.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Johan Schalkwyk) +// +// \file Functions and classes to create a partition of states +// + +#ifndef FST_LIB_PARTITION_H__ +#define FST_LIB_PARTITION_H__ + +#include <vector> +using std::vector; +#include <algorithm> + + +#include <fst/queue.h> + + + +namespace fst { + +template <typename T> class PartitionIterator; + +// \class Partition +// \brief Defines a partitioning of states. Typically used to represent +// equivalence classes for Fst operations like minimization. +// +template <typename T> +class Partition { + friend class PartitionIterator<T>; + + struct Element { + Element() : value(0), next(0), prev(0) {} + Element(T v) : value(v), next(0), prev(0) {} + + T value; + Element* next; + Element* prev; + }; + + public: + Partition(bool allow_repeated_split): + allow_repeated_split_(allow_repeated_split) {} + + Partition(bool allow_repeated_split, T num_states): + allow_repeated_split_(allow_repeated_split) { + Initialize(num_states); + } + + ~Partition() { + for (size_t i = 0; i < elements_.size(); ++i) + delete elements_[i]; + } + + // Create an empty partition for num_states. At initialization time + // all elements are not assigned to a class (i.e class_index = -1). + // Initialize just creates num_states of elements. All element + // operations are then done by simply disconnecting the element from + // it current class and placing it at the head of the next class. + void Initialize(size_t num_states) { + for (size_t i = 0; i < elements_.size(); ++i) + delete elements_[i]; + elements_.clear(); + classes_.clear(); + class_index_.clear(); + + elements_.resize(num_states); + class_index_.resize(num_states, -1); + class_size_.reserve(num_states); + for (size_t i = 0; i < num_states; ++i) + elements_[i] = new Element(i); + num_states_ = num_states; + } + + // Add a class, resize classes_ and class_size_ resource by 1. + size_t AddClass() { + size_t num_classes = classes_.size(); + classes_.resize(num_classes + 1, 0); + class_size_.resize(num_classes + 1, 0); + class_split_.resize(num_classes + 1, 0); + split_size_.resize(num_classes + 1, 0); + return num_classes; + } + + void AllocateClasses(T num_classes) { + size_t n = classes_.size() + num_classes; + classes_.resize(n, 0); + class_size_.resize(n, 0); + class_split_.resize(n, 0); + split_size_.resize(n, 0); + } + + // Add element_id to class_id. The Add method is used to initialize + // partition. Once elements have been added to a class, you need to + // use the Move() method move an element from once class to another. + void Add(T element_id, T class_id) { + Element* element = elements_[element_id]; + + if (classes_[class_id]) + classes_[class_id]->prev = element; + element->next = classes_[class_id]; + element->prev = 0; + classes_[class_id] = element; + + class_index_[element_id] = class_id; + class_size_[class_id]++; + } + + // Move and element_id to class_id. Disconnects (removes) element + // from it current class and + void Move(T element_id, T class_id) { + T old_class_id = class_index_[element_id]; + + Element* element = elements_[element_id]; + if (element->next) element->next->prev = element->prev; + if (element->prev) element->prev->next = element->next; + else classes_[old_class_id] = element->next; + + Add(element_id, class_id); + class_size_[old_class_id]--; + } + + // split class on the element_id + void SplitOn(T element_id) { + T class_id = class_index_[element_id]; + if (class_size_[class_id] == 1) return; + + // first time class is split + if (split_size_[class_id] == 0) { + visited_classes_.push_back(class_id); + class_split_[class_id] = classes_[class_id]; + } + // increment size of split (set of element at head of chain) + split_size_[class_id]++; + + // update split point + if (class_split_[class_id] != 0 + && class_split_[class_id] == elements_[element_id]) + class_split_[class_id] = elements_[element_id]->next; + + // move to head of chain in same class + Move(element_id, class_id); + } + + // Finalize class_id, split if required, and update class_splits, + // class indices of the newly created class. Returns the new_class id + // or -1 if no new class was created. + T SplitRefine(T class_id) { + + Element* split_el = class_split_[class_id]; + // only split if necessary + //if (class_size_[class_id] == split_size_[class_id]) { + if(split_el == NULL) { // we split on everything... + split_size_[class_id] = 0; + return -1; + } else { + T new_class = AddClass(); + + if(allow_repeated_split_) { // split_size_ is possibly + // inaccurate, so work it out exactly. + size_t split_count; Element *e; + for(split_count=0,e=classes_[class_id]; + e != split_el; split_count++, e=e->next); + split_size_[class_id] = split_count; + } + size_t remainder = class_size_[class_id] - split_size_[class_id]; + if (remainder < split_size_[class_id]) { // add smaller + classes_[new_class] = split_el; + split_el->prev->next = 0; + split_el->prev = 0; + class_size_[class_id] = split_size_[class_id]; + class_size_[new_class] = remainder; + } else { + classes_[new_class] = classes_[class_id]; + class_size_[class_id] = remainder; + class_size_[new_class] = split_size_[class_id]; + split_el->prev->next = 0; + split_el->prev = 0; + classes_[class_id] = split_el; + } + + // update class index for element in new class + for (Element* el = classes_[new_class]; el; el = el->next) + class_index_[el->value] = new_class; + + class_split_[class_id] = 0; + split_size_[class_id] = 0; + + return new_class; + } + } + + // Once all states have been processed for a particular class C, we + // can finalize the split. FinalizeSplit() will update each block in the + // partition, create new once and update the queue of active classes + // that require further refinement. + template <class Queue> + void FinalizeSplit(Queue* L) { + for (size_t i = 0; i < visited_classes_.size(); ++i) { + T new_class = SplitRefine(visited_classes_[i]); + if (new_class != -1 && L) + L->Enqueue(new_class); + } + visited_classes_.clear(); + } + + + const T class_id(T element_id) const { + return class_index_[element_id]; + } + + const vector<T>& class_sizes() const { + return class_size_; + } + + const size_t class_size(T class_id) const { + return class_size_[class_id]; + } + + const T num_classes() const { + return classes_.size(); + } + + + private: + int num_states_; + + // container of all elements (owner of ptrs) + vector<Element*> elements_; + + // linked list of elements belonging to class + vector<Element*> classes_; + + // pointer to split point for each class + vector<Element*> class_split_; + + // class index of element + vector<T> class_index_; + + // class sizes + vector<T> class_size_; + + // size of split for each class + // in the nondeterministic case, split_size_ is actually an upper + // bound on the size of split for each class. + vector<T> split_size_; + + // set of visited classes to be used in split refine + vector<T> visited_classes_; + + // true if input fst was deterministic: we can make + // certain assumptions in this case that speed up the algorithm. + bool allow_repeated_split_; +}; + + +// iterate over members of a class in a partition +template <typename T> +class PartitionIterator { + typedef typename Partition<T>::Element Element; + public: + PartitionIterator(const Partition<T>& partition, T class_id) + : p_(partition), + element_(p_.classes_[class_id]), + class_id_(class_id) {} + + bool Done() { + return (element_ == 0); + } + + const T Value() { + return (element_->value); + } + + void Next() { + element_ = element_->next; + } + + void Reset() { + element_ = p_.classes_[class_id_]; + } + + private: + const Partition<T>& p_; + + const Element* element_; + + T class_id_; +}; +} // namespace fst + +#endif // FST_LIB_PARTITION_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/power-weight.h b/kaldi_io/src/tools/openfst/include/fst/power-weight.h new file mode 100644 index 0000000..256928d --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/power-weight.h @@ -0,0 +1,159 @@ +// power-weight.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// +// \file +// Cartesian power weight semiring operation definitions. + +#ifndef FST_LIB_POWER_WEIGHT_H__ +#define FST_LIB_POWER_WEIGHT_H__ + +#include <fst/tuple-weight.h> +#include <fst/weight.h> + + +namespace fst { + +// Cartesian power semiring: W ^ n +// Forms: +// - a left semimodule when W is a left semiring, +// - a right semimodule when W is a right semiring, +// - a bisemimodule when W is a semiring, +// the free semimodule of rank n over W +// The Times operation is overloaded to provide the +// left and right scalar products. +template <class W, unsigned int n> +class PowerWeight : public TupleWeight<W, n> { + public: + using TupleWeight<W, n>::Zero; + using TupleWeight<W, n>::One; + using TupleWeight<W, n>::NoWeight; + using TupleWeight<W, n>::Quantize; + using TupleWeight<W, n>::Reverse; + + typedef PowerWeight<typename W::ReverseWeight, n> ReverseWeight; + + PowerWeight() {} + + PowerWeight(const TupleWeight<W, n> &w) : TupleWeight<W, n>(w) {} + + template <class Iterator> + PowerWeight(Iterator begin, Iterator end) : TupleWeight<W, n>(begin, end) {} + + static const PowerWeight<W, n> &Zero() { + static const PowerWeight<W, n> zero(TupleWeight<W, n>::Zero()); + return zero; + } + + static const PowerWeight<W, n> &One() { + static const PowerWeight<W, n> one(TupleWeight<W, n>::One()); + return one; + } + + static const PowerWeight<W, n> &NoWeight() { + static const PowerWeight<W, n> no_weight(TupleWeight<W, n>::NoWeight()); + return no_weight; + } + + static const string &Type() { + static string type; + if (type.empty()) { + string power; + Int64ToStr(n, &power); + type = W::Type() + "_^" + power; + } + return type; + } + + static uint64 Properties() { + uint64 props = W::Properties(); + return props & (kLeftSemiring | kRightSemiring | + kCommutative | kIdempotent); + } + + PowerWeight<W, n> Quantize(float delta = kDelta) const { + return TupleWeight<W, n>::Quantize(delta); + } + + ReverseWeight Reverse() const { + return TupleWeight<W, n>::Reverse(); + } +}; + + +// Semiring plus operation +template <class W, unsigned int n> +inline PowerWeight<W, n> Plus(const PowerWeight<W, n> &w1, + const PowerWeight<W, n> &w2) { + PowerWeight<W, n> w; + for (size_t i = 0; i < n; ++i) + w.SetValue(i, Plus(w1.Value(i), w2.Value(i))); + return w; +} + +// Semiring times operation +template <class W, unsigned int n> +inline PowerWeight<W, n> Times(const PowerWeight<W, n> &w1, + const PowerWeight<W, n> &w2) { + PowerWeight<W, n> w; + for (size_t i = 0; i < n; ++i) + w.SetValue(i, Times(w1.Value(i), w2.Value(i))); + return w; +} + +// Semiring divide operation +template <class W, unsigned int n> +inline PowerWeight<W, n> Divide(const PowerWeight<W, n> &w1, + const PowerWeight<W, n> &w2, + DivideType type = DIVIDE_ANY) { + PowerWeight<W, n> w; + for (size_t i = 0; i < n; ++i) + w.SetValue(i, Divide(w1.Value(i), w2.Value(i), type)); + return w; +} + +// Semimodule left scalar product +template <class W, unsigned int n> +inline PowerWeight<W, n> Times(const W &s, const PowerWeight<W, n> &w) { + PowerWeight<W, n> sw; + for (size_t i = 0; i < n; ++i) + sw.SetValue(i, Times(s, w.Value(i))); + return w; +} + +// Semimodule right scalar product +template <class W, unsigned int n> +inline PowerWeight<W, n> Times(const PowerWeight<W, n> &w, const W &s) { + PowerWeight<W, n> ws; + for (size_t i = 0; i < n; ++i) + ws.SetValue(i, Times(w.Value(i), s)); + return w; +} + +// Semimodule dot product +template <class W, unsigned int n> +inline W DotProduct(const PowerWeight<W, n> &w1, + const PowerWeight<W, n> &w2) { + W w = W::Zero(); + for (size_t i = 0; i < n; ++i) + w = Plus(w, Times(w1.Value(i), w2.Value(i))); + return w; +} + + +} // namespace fst + +#endif // FST_LIB_POWER_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/product-weight.h b/kaldi_io/src/tools/openfst/include/fst/product-weight.h new file mode 100644 index 0000000..16dede8 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/product-weight.h @@ -0,0 +1,115 @@ +// product-weight.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Product weight set and associated semiring operation definitions. + +#ifndef FST_LIB_PRODUCT_WEIGHT_H__ +#define FST_LIB_PRODUCT_WEIGHT_H__ + +#include <stack> +#include <string> + +#include <fst/pair-weight.h> +#include <fst/weight.h> + + +namespace fst { + +// Product semiring: W1 * W2 +template<class W1, class W2> +class ProductWeight : public PairWeight<W1, W2> { + public: + using PairWeight<W1, W2>::Zero; + using PairWeight<W1, W2>::One; + using PairWeight<W1, W2>::NoWeight; + using PairWeight<W1, W2>::Quantize; + using PairWeight<W1, W2>::Reverse; + + typedef ProductWeight<typename W1::ReverseWeight, typename W2::ReverseWeight> + ReverseWeight; + + ProductWeight() {} + + ProductWeight(const PairWeight<W1, W2>& w) : PairWeight<W1, W2>(w) {} + + ProductWeight(W1 w1, W2 w2) : PairWeight<W1, W2>(w1, w2) {} + + static const ProductWeight<W1, W2> &Zero() { + static const ProductWeight<W1, W2> zero(PairWeight<W1, W2>::Zero()); + return zero; + } + + static const ProductWeight<W1, W2> &One() { + static const ProductWeight<W1, W2> one(PairWeight<W1, W2>::One()); + return one; + } + + static const ProductWeight<W1, W2> &NoWeight() { + static const ProductWeight<W1, W2> no_weight( + PairWeight<W1, W2>::NoWeight()); + return no_weight; + } + + static const string &Type() { + static const string type = W1::Type() + "_X_" + W2::Type(); + return type; + } + + static uint64 Properties() { + uint64 props1 = W1::Properties(); + uint64 props2 = W2::Properties(); + return props1 & props2 & (kLeftSemiring | kRightSemiring | + kCommutative | kIdempotent); + } + + ProductWeight<W1, W2> Quantize(float delta = kDelta) const { + return PairWeight<W1, W2>::Quantize(delta); + } + + ReverseWeight Reverse() const { + return PairWeight<W1, W2>::Reverse(); + } + + +}; + +template <class W1, class W2> +inline ProductWeight<W1, W2> Plus(const ProductWeight<W1, W2> &w, + const ProductWeight<W1, W2> &v) { + return ProductWeight<W1, W2>(Plus(w.Value1(), v.Value1()), + Plus(w.Value2(), v.Value2())); +} + +template <class W1, class W2> +inline ProductWeight<W1, W2> Times(const ProductWeight<W1, W2> &w, + const ProductWeight<W1, W2> &v) { + return ProductWeight<W1, W2>(Times(w.Value1(), v.Value1()), + Times(w.Value2(), v.Value2())); +} + +template <class W1, class W2> +inline ProductWeight<W1, W2> Divide(const ProductWeight<W1, W2> &w, + const ProductWeight<W1, W2> &v, + DivideType typ = DIVIDE_ANY) { + return ProductWeight<W1, W2>(Divide(w.Value1(), v.Value1(), typ), + Divide(w.Value2(), v.Value2(), typ)); +} + +} // namespace fst + +#endif // FST_LIB_PRODUCT_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/project.h b/kaldi_io/src/tools/openfst/include/fst/project.h new file mode 100644 index 0000000..07946c3 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/project.h @@ -0,0 +1,148 @@ +// project.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Functions and classes to project an Fst on to its domain or range. + +#ifndef FST_LIB_PROJECT_H__ +#define FST_LIB_PROJECT_H__ + +#include <fst/arc-map.h> +#include <fst/mutable-fst.h> + + +namespace fst { + +// This specifies whether to project on input or output. +enum ProjectType { PROJECT_INPUT = 1, PROJECT_OUTPUT = 2 }; + + +// Mapper to implement projection per arc. +template <class A> class ProjectMapper { + public: + explicit ProjectMapper(ProjectType project_type) + : project_type_(project_type) {} + + A operator()(const A &arc) { + typename A::Label label = project_type_ == PROJECT_INPUT + ? arc.ilabel : arc.olabel; + return A(label, label, arc.weight, arc.nextstate); + } + + MapFinalAction FinalAction() const { return MAP_NO_SUPERFINAL; } + + MapSymbolsAction InputSymbolsAction() const { + return project_type_ == PROJECT_INPUT ? MAP_COPY_SYMBOLS : + MAP_CLEAR_SYMBOLS; + } + + MapSymbolsAction OutputSymbolsAction() const { + return project_type_ == PROJECT_OUTPUT ? MAP_COPY_SYMBOLS : + MAP_CLEAR_SYMBOLS; + } + + uint64 Properties(uint64 props) { + return ProjectProperties(props, project_type_ == PROJECT_INPUT); + } + + + private: + ProjectType project_type_; +}; + + +// Projects an FST onto its domain or range by either copying each arcs' +// input label to the output label or vice versa. This version modifies +// its input. +// +// Complexity: +// - Time: O(V + E) +// - Space: O(1) +// where V = # of states and E = # of arcs. +template<class Arc> inline +void Project(MutableFst<Arc> *fst, ProjectType project_type) { + ArcMap(fst, ProjectMapper<Arc>(project_type)); + if (project_type == PROJECT_INPUT) + fst->SetOutputSymbols(fst->InputSymbols()); + if (project_type == PROJECT_OUTPUT) + fst->SetInputSymbols(fst->OutputSymbols()); +} + + +// Projects an FST onto its domain or range by either copying each arc's +// input label to the output label or vice versa. This version is a delayed +// Fst. +// +// Complexity: +// - Time: O(v + e) +// - Space: O(1) +// where v = # of states visited, e = # of arcs visited. Constant +// time and to visit an input state or arc is assumed and exclusive +// of caching. +template <class A> +class ProjectFst : public ArcMapFst<A, A, ProjectMapper<A> > { + public: + typedef A Arc; + typedef ProjectMapper<A> C; + typedef ArcMapFstImpl< A, A, ProjectMapper<A> > Impl; + using ImplToFst<Impl>::GetImpl; + + ProjectFst(const Fst<A> &fst, ProjectType project_type) + : ArcMapFst<A, A, C>(fst, C(project_type)) { + if (project_type == PROJECT_INPUT) + GetImpl()->SetOutputSymbols(fst.InputSymbols()); + if (project_type == PROJECT_OUTPUT) + GetImpl()->SetInputSymbols(fst.OutputSymbols()); + } + + // See Fst<>::Copy() for doc. + ProjectFst(const ProjectFst<A> &fst, bool safe = false) + : ArcMapFst<A, A, C>(fst, safe) {} + + // Get a copy of this ProjectFst. See Fst<>::Copy() for further doc. + virtual ProjectFst<A> *Copy(bool safe = false) const { + return new ProjectFst(*this, safe); + } +}; + + +// Specialization for ProjectFst. +template <class A> +class StateIterator< ProjectFst<A> > + : public StateIterator< ArcMapFst<A, A, ProjectMapper<A> > > { + public: + explicit StateIterator(const ProjectFst<A> &fst) + : StateIterator< ArcMapFst<A, A, ProjectMapper<A> > >(fst) {} +}; + + +// Specialization for ProjectFst. +template <class A> +class ArcIterator< ProjectFst<A> > + : public ArcIterator< ArcMapFst<A, A, ProjectMapper<A> > > { + public: + ArcIterator(const ProjectFst<A> &fst, typename A::StateId s) + : ArcIterator< ArcMapFst<A, A, ProjectMapper<A> > >(fst, s) {} +}; + + +// Useful alias when using StdArc. +typedef ProjectFst<StdArc> StdProjectFst; + +} // namespace fst + +#endif // FST_LIB_PROJECT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/properties.h b/kaldi_io/src/tools/openfst/include/fst/properties.h new file mode 100644 index 0000000..8fab16f --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/properties.h @@ -0,0 +1,460 @@ +// properties.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: Michael Riley <[email protected]> +// \file +// FST property bits. + +#ifndef FST_LIB_PROPERTIES_H__ +#define FST_LIB_PROPERTIES_H__ + +#include <sys/types.h> +#include <vector> +using std::vector; + +#include <fst/compat.h> + +namespace fst { + +// The property bits here assert facts about an FST. If individual +// bits are added, then the composite properties below, the property +// functions and property names in properties.cc, and +// TestProperties() in test-properties.h should be updated. + +// +// BINARY PROPERTIES +// +// For each property below, there is a single bit. If it is set, +// the property is true. If it is not set, the property is false. +// + +// The Fst is an ExpandedFst +const uint64 kExpanded = 0x0000000000000001ULL; + +// The Fst is a MutableFst +const uint64 kMutable = 0x0000000000000002ULL; + +// An error was detected while constructing/using the FST +const uint64 kError = 0x0000000000000004ULL; + +// +// TRINARY PROPERTIES +// +// For each of these properties below there is a pair of property bits +// - one positive and one negative. If the positive bit is set, the +// property is true. If the negative bit is set, the property is +// false. If neither is set, the property has unknown value. Both +// should never be simultaneously set. The individual positive and +// negative bit pairs should be adjacent with the positive bit +// at an odd and lower position. + +// ilabel == olabel for each arc +const uint64 kAcceptor = 0x0000000000010000ULL; +// ilabel != olabel for some arc +const uint64 kNotAcceptor = 0x0000000000020000ULL; + +// ilabels unique leaving each state +const uint64 kIDeterministic = 0x0000000000040000ULL; +// ilabels not unique leaving some state +const uint64 kNonIDeterministic = 0x0000000000080000ULL; + +// olabels unique leaving each state +const uint64 kODeterministic = 0x0000000000100000ULL; +// olabels not unique leaving some state +const uint64 kNonODeterministic = 0x0000000000200000ULL; + +// FST has input/output epsilons +const uint64 kEpsilons = 0x0000000000400000ULL; +// FST has no input/output epsilons +const uint64 kNoEpsilons = 0x0000000000800000ULL; + +// FST has input epsilons +const uint64 kIEpsilons = 0x0000000001000000ULL; +// FST has no input epsilons +const uint64 kNoIEpsilons = 0x0000000002000000ULL; + +// FST has output epsilons +const uint64 kOEpsilons = 0x0000000004000000ULL; +// FST has no output epsilons +const uint64 kNoOEpsilons = 0x0000000008000000ULL; + +// ilabels sorted wrt < for each state +const uint64 kILabelSorted = 0x0000000010000000ULL; +// ilabels not sorted wrt < for some state +const uint64 kNotILabelSorted = 0x0000000020000000ULL; + +// olabels sorted wrt < for each state +const uint64 kOLabelSorted = 0x0000000040000000ULL; +// olabels not sorted wrt < for some state +const uint64 kNotOLabelSorted = 0x0000000080000000ULL; + +// Non-trivial arc or final weights +const uint64 kWeighted = 0x0000000100000000ULL; +// Only trivial arc and final weights +const uint64 kUnweighted = 0x0000000200000000ULL; + +// FST has cycles +const uint64 kCyclic = 0x0000000400000000ULL; +// FST has no cycles +const uint64 kAcyclic = 0x0000000800000000ULL; + +// FST has cycles containing the initial state +const uint64 kInitialCyclic = 0x0000001000000000ULL; +// FST has no cycles containing the initial state +const uint64 kInitialAcyclic = 0x0000002000000000ULL; + +// FST is topologically sorted +const uint64 kTopSorted = 0x0000004000000000ULL; +// FST is not topologically sorted +const uint64 kNotTopSorted = 0x0000008000000000ULL; + +// All states reachable from the initial state +const uint64 kAccessible = 0x0000010000000000ULL; +// Not all states reachable from the initial state +const uint64 kNotAccessible = 0x0000020000000000ULL; + +// All states can reach a final state +const uint64 kCoAccessible = 0x0000040000000000ULL; +// Not all states can reach a final state +const uint64 kNotCoAccessible = 0x0000080000000000ULL; + +// If NumStates() > 0, then state 0 is initial, state NumStates()-1 is +// final, there is a transition from each non-final state i to +// state i+1, and there are no other transitions. +const uint64 kString = 0x0000100000000000ULL; + +// Not a string FST +const uint64 kNotString = 0x0000200000000000ULL; + +// +// COMPOSITE PROPERTIES +// + +// Properties of an empty machine +const uint64 kNullProperties + = kAcceptor | kIDeterministic | kODeterministic | kNoEpsilons | + kNoIEpsilons | kNoOEpsilons | kILabelSorted | kOLabelSorted | + kUnweighted | kAcyclic | kInitialAcyclic | kTopSorted | + kAccessible | kCoAccessible | kString; + +// Properties that are preserved when an FST is copied +const uint64 kCopyProperties + = kError | kAcceptor | kNotAcceptor | kIDeterministic | kNonIDeterministic | + kODeterministic | kNonODeterministic | kEpsilons | kNoEpsilons | + kIEpsilons | kNoIEpsilons | kOEpsilons | kNoOEpsilons | + kILabelSorted | kNotILabelSorted | kOLabelSorted | + kNotOLabelSorted | kWeighted | kUnweighted | kCyclic | kAcyclic | + kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted | + kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible | + kString | kNotString; + +// Properites that are intrinsic to the FST +const uint64 kIntrinsicProperties + = kExpanded | kMutable | kAcceptor | kNotAcceptor | kIDeterministic | + kNonIDeterministic | kODeterministic | kNonODeterministic | + kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons | + kNoOEpsilons | kILabelSorted | kNotILabelSorted | kOLabelSorted | + kNotOLabelSorted | kWeighted | kUnweighted | kCyclic | kAcyclic | + kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted | + kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible | + kString | kNotString; + +// Properites that are (potentially) extrinsic to the FST +const uint64 kExtrinsicProperties = kError; + +// Properties that are preserved when an FST start state is set +const uint64 kSetStartProperties + = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | + kIDeterministic | kNonIDeterministic | kODeterministic | + kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons | + kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted | + kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kWeighted | + kUnweighted | kCyclic | kAcyclic | kTopSorted | kNotTopSorted | + kCoAccessible | kNotCoAccessible; + +// Properties that are preserved when an FST final weight is set +const uint64 kSetFinalProperties + = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | + kIDeterministic | kNonIDeterministic | kODeterministic | + kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons | + kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted | + kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kCyclic | + kAcyclic | kInitialCyclic | kInitialAcyclic | kTopSorted | + kNotTopSorted | kAccessible | kNotAccessible; + +// Properties that are preserved when an FST state is added +const uint64 kAddStateProperties + = kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | + kIDeterministic | kNonIDeterministic | kODeterministic | + kNonODeterministic | kEpsilons | kNoEpsilons | kIEpsilons | + kNoIEpsilons | kOEpsilons | kNoOEpsilons | kILabelSorted | + kNotILabelSorted | kOLabelSorted | kNotOLabelSorted | kWeighted | + kUnweighted | kCyclic | kAcyclic | kInitialCyclic | + kInitialAcyclic | kTopSorted | kNotTopSorted | kNotAccessible | + kNotCoAccessible | kNotString; + +// Properties that are preserved when an FST arc is added +const uint64 kAddArcProperties = kExpanded | kMutable | kError | kNotAcceptor | + kNonIDeterministic | kNonODeterministic | kEpsilons | kIEpsilons | + kOEpsilons | kNotILabelSorted | kNotOLabelSorted | kWeighted | + kCyclic | kInitialCyclic | kNotTopSorted | kAccessible | kCoAccessible; + +// Properties that are preserved when an FST arc is set +const uint64 kSetArcProperties = kExpanded | kMutable | kError; + +// Properties that are preserved when FST states are deleted +const uint64 kDeleteStatesProperties + = kExpanded | kMutable | kError | kAcceptor | kIDeterministic | + kODeterministic | kNoEpsilons | kNoIEpsilons | kNoOEpsilons | + kILabelSorted | kOLabelSorted | kUnweighted | kAcyclic | + kInitialAcyclic | kTopSorted; + +// Properties that are preserved when FST arcs are deleted +const uint64 kDeleteArcsProperties + = kExpanded | kMutable | kError | kAcceptor | kIDeterministic | + kODeterministic | kNoEpsilons | kNoIEpsilons | kNoOEpsilons | + kILabelSorted | kOLabelSorted | kUnweighted | kAcyclic | + kInitialAcyclic | kTopSorted | kNotAccessible | kNotCoAccessible; + +// Properties that are preserved when an FST's states are reordered +const uint64 kStateSortProperties = kExpanded | kMutable | kError | kAcceptor | + kNotAcceptor | kIDeterministic | kNonIDeterministic | + kODeterministic | kNonODeterministic | kEpsilons | kNoEpsilons | + kIEpsilons | kNoIEpsilons | kOEpsilons | kNoOEpsilons | + kILabelSorted | kNotILabelSorted | kOLabelSorted | kNotOLabelSorted + | kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic | + kInitialAcyclic | kAccessible | kNotAccessible | kCoAccessible | + kNotCoAccessible; + +// Properties that are preserved when an FST's arcs are reordered +const uint64 kArcSortProperties = + kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kIDeterministic | + kNonIDeterministic | kODeterministic | kNonODeterministic | + kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons | + kNoOEpsilons | kWeighted | kUnweighted | kCyclic | kAcyclic | + kInitialCyclic | kInitialAcyclic | kTopSorted | kNotTopSorted | + kAccessible | kNotAccessible | kCoAccessible | kNotCoAccessible | + kString | kNotString; + +// Properties that are preserved when an FST's input labels are changed. +const uint64 kILabelInvariantProperties = + kExpanded | kMutable | kError | kODeterministic | kNonODeterministic | + kOEpsilons | kNoOEpsilons | kOLabelSorted | kNotOLabelSorted | + kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic | + kInitialAcyclic | kTopSorted | kNotTopSorted | kAccessible | + kNotAccessible | kCoAccessible | kNotCoAccessible | kString | kNotString; + +// Properties that are preserved when an FST's output labels are changed. +const uint64 kOLabelInvariantProperties = + kExpanded | kMutable | kError | kIDeterministic | kNonIDeterministic | + kIEpsilons | kNoIEpsilons | kILabelSorted | kNotILabelSorted | + kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic | + kInitialAcyclic | kTopSorted | kNotTopSorted | kAccessible | + kNotAccessible | kCoAccessible | kNotCoAccessible | kString | kNotString; + +// Properties that are preserved when an FST's weights are changed. +// This assumes that the set of states that are non-final is not changed. +const uint64 kWeightInvariantProperties = + kExpanded | kMutable | kError | kAcceptor | kNotAcceptor | kIDeterministic | + kNonIDeterministic | kODeterministic | kNonODeterministic | + kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | kOEpsilons | + kNoOEpsilons | kILabelSorted | kNotILabelSorted | kOLabelSorted | + kNotOLabelSorted | kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic | + kTopSorted | kNotTopSorted | kAccessible | kNotAccessible | kCoAccessible | + kNotCoAccessible | kString | kNotString; + +// Properties that are preserved when a superfinal state is added +// and an FSTs final weights are directed to it via new transitions. +const uint64 kAddSuperFinalProperties = kExpanded | kMutable | kError | + kAcceptor | kNotAcceptor | kNonIDeterministic | kNonODeterministic | + kEpsilons | kIEpsilons | kOEpsilons | kNotILabelSorted | kNotOLabelSorted | + kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic | + kInitialAcyclic | kNotTopSorted | kNotAccessible | kCoAccessible | + kNotCoAccessible | kNotString; + +// Properties that are preserved when a superfinal state is removed +// and the epsilon transitions directed to it are made final weights. +const uint64 kRmSuperFinalProperties = kExpanded | kMutable | kError | + kAcceptor | kNotAcceptor | kIDeterministic | kODeterministic | + kNoEpsilons | kNoIEpsilons | kNoOEpsilons | kILabelSorted | kOLabelSorted | + kWeighted | kUnweighted | kCyclic | kAcyclic | kInitialCyclic | + kInitialAcyclic | kTopSorted | kAccessible | kCoAccessible | + kNotCoAccessible | kString; + +// All binary properties +const uint64 kBinaryProperties = 0x0000000000000007ULL; + +// All trinary properties +const uint64 kTrinaryProperties = 0x00003fffffff0000ULL; + +// +// COMPUTED PROPERTIES +// + +// 1st bit of trinary properties +const uint64 kPosTrinaryProperties = + kTrinaryProperties & 0x5555555555555555ULL; + +// 2nd bit of trinary properties +const uint64 kNegTrinaryProperties = + kTrinaryProperties & 0xaaaaaaaaaaaaaaaaULL; + +// All properties +const uint64 kFstProperties = kBinaryProperties | kTrinaryProperties; + +// +// PROPERTY FUNCTIONS and STRING NAMES (defined in properties.cc) +// + +// Below are functions for getting property bit vectors when executing +// mutating fst operations. +inline uint64 SetStartProperties(uint64 inprops); +template <typename Weight> +uint64 SetFinalProperties(uint64 inprops, Weight old_weight, + Weight new_weight); +inline uint64 AddStateProperties(uint64 inprops); +template <typename A> +uint64 AddArcProperties(uint64 inprops, typename A::StateId s, const A &arc, + const A *prev_arc); +inline uint64 DeleteStatesProperties(uint64 inprops); +inline uint64 DeleteAllStatesProperties(uint64 inprops, uint64 staticProps); +inline uint64 DeleteArcsProperties(uint64 inprops); + +uint64 ClosureProperties(uint64 inprops, bool star, bool delayed = false); +uint64 ComplementProperties(uint64 inprops); +uint64 ComposeProperties(uint64 inprops1, uint64 inprops2); +uint64 ConcatProperties(uint64 inprops1, uint64 inprops2, + bool delayed = false); +uint64 DeterminizeProperties(uint64 inprops, bool has_subsequential_label); +uint64 FactorWeightProperties(uint64 inprops); +uint64 InvertProperties(uint64 inprops); +uint64 ProjectProperties(uint64 inprops, bool project_input); +uint64 RandGenProperties(uint64 inprops, bool weighted); +uint64 RelabelProperties(uint64 inprops); +uint64 ReplaceProperties(const vector<uint64>& inprops, + ssize_t root, + bool epsilon_on_replace, + bool no_empty_fst); +uint64 ReverseProperties(uint64 inprops); +uint64 ReweightProperties(uint64 inprops); +uint64 RmEpsilonProperties(uint64 inprops, bool delayed = false); +uint64 ShortestPathProperties(uint64 props); +uint64 SynchronizeProperties(uint64 inprops); +uint64 UnionProperties(uint64 inprops1, uint64 inprops2, bool delayed = false); + +// Definitions of inlined functions. + +uint64 SetStartProperties(uint64 inprops) { + uint64 outprops = inprops & kSetStartProperties; + if (inprops & kAcyclic) { + outprops |= kInitialAcyclic; + } + return outprops; +} + +uint64 AddStateProperties(uint64 inprops) { + return inprops & kAddStateProperties; +} + +uint64 DeleteStatesProperties(uint64 inprops) { + return inprops & kDeleteStatesProperties; +} + +uint64 DeleteAllStatesProperties(uint64 inprops, uint64 staticprops) { + uint64 outprops = inprops & kError; + return outprops | kNullProperties | staticprops; +} + +uint64 DeleteArcsProperties(uint64 inprops) { + return inprops & kDeleteArcsProperties; +} + +// Definitions of template functions. + +// +template <typename Weight> +uint64 SetFinalProperties(uint64 inprops, Weight old_weight, + Weight new_weight) { + uint64 outprops = inprops; + if (old_weight != Weight::Zero() && old_weight != Weight::One()) { + outprops &= ~kWeighted; + } + if (new_weight != Weight::Zero() && new_weight != Weight::One()) { + outprops |= kWeighted; + outprops &= ~kUnweighted; + } + outprops &= kSetFinalProperties | kWeighted | kUnweighted; + return outprops; +} + +/// Gets the properties for the MutableFst::AddArc method. +/// +/// \param inprops the current properties of the fst +/// \param s the id of the state to which an arc is being added +/// \param arc the arc being added to the state with the specified id +/// \param prev_arc the previously-added (or "last") arc of state s, or NULL if +/// s currently has no arcs +template <typename A> +uint64 AddArcProperties(uint64 inprops, typename A::StateId s, + const A &arc, const A *prev_arc) { + uint64 outprops = inprops; + if (arc.ilabel != arc.olabel) { + outprops |= kNotAcceptor; + outprops &= ~kAcceptor; + } + if (arc.ilabel == 0) { + outprops |= kIEpsilons; + outprops &= ~kNoIEpsilons; + if (arc.olabel == 0) { + outprops |= kEpsilons; + outprops &= ~kNoEpsilons; + } + } + if (arc.olabel == 0) { + outprops |= kOEpsilons; + outprops &= ~kNoOEpsilons; + } + if (prev_arc != 0) { + if (prev_arc->ilabel > arc.ilabel) { + outprops |= kNotILabelSorted; + outprops &= ~kILabelSorted; + } + if (prev_arc->olabel > arc.olabel) { + outprops |= kNotOLabelSorted; + outprops &= ~kOLabelSorted; + } + } + if (arc.weight != A::Weight::Zero() && arc.weight != A::Weight::One()) { + outprops |= kWeighted; + outprops &= ~kUnweighted; + } + if (arc.nextstate <= s) { + outprops |= kNotTopSorted; + outprops &= ~kTopSorted; + } + outprops &= kAddArcProperties | kAcceptor | + kNoEpsilons | kNoIEpsilons | kNoOEpsilons | + kILabelSorted | kOLabelSorted | kUnweighted | kTopSorted; + if (outprops & kTopSorted) { + outprops |= kAcyclic | kInitialAcyclic; + } + return outprops; +} + +extern const char *PropertyNames[]; + +} // namespace fst + +#endif // FST_LIB_PROPERTIES_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/prune.h b/kaldi_io/src/tools/openfst/include/fst/prune.h new file mode 100644 index 0000000..5ea5b4d --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/prune.h @@ -0,0 +1,339 @@ +// prune.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// +// \file +// Functions implementing pruning. + +#ifndef FST_LIB_PRUNE_H__ +#define FST_LIB_PRUNE_H__ + +#include <vector> +using std::vector; + +#include <fst/arcfilter.h> +#include <fst/heap.h> +#include <fst/shortest-distance.h> + + +namespace fst { + +template <class A, class ArcFilter> +class PruneOptions { + public: + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + + // Pruning weight threshold. + Weight weight_threshold; + // Pruning state threshold. + StateId state_threshold; + // Arc filter. + ArcFilter filter; + // If non-zero, passes in pre-computed shortest distance to final states. + const vector<Weight> *distance; + // Determines the degree of convergence required when computing shortest + // distances. + float delta; + + explicit PruneOptions(const Weight& w, StateId s, ArcFilter f, + vector<Weight> *d = 0, float e = kDelta) + : weight_threshold(w), + state_threshold(s), + filter(f), + distance(d), + delta(e) {} + private: + PruneOptions(); // disallow +}; + + +template <class S, class W> +class PruneCompare { + public: + typedef S StateId; + typedef W Weight; + + PruneCompare(const vector<Weight> &idistance, + const vector<Weight> &fdistance) + : idistance_(idistance), fdistance_(fdistance) {} + + bool operator()(const StateId x, const StateId y) const { + Weight wx = Times(x < idistance_.size() ? idistance_[x] : Weight::Zero(), + x < fdistance_.size() ? fdistance_[x] : Weight::Zero()); + Weight wy = Times(y < idistance_.size() ? idistance_[y] : Weight::Zero(), + y < fdistance_.size() ? fdistance_[y] : Weight::Zero()); + return less_(wx, wy); + } + + private: + const vector<Weight> &idistance_; + const vector<Weight> &fdistance_; + NaturalLess<Weight> less_; +}; + + + +// Pruning algorithm: this version modifies its input and it takes an +// options class as an argment. Delete states and arcs in 'fst' that +// do not belong to a successful path whose weight is no more than +// the weight of the shortest path Times() 'opts.weight_threshold'. +// When 'opts.state_threshold != kNoStateId', the resulting transducer +// will restricted further to have at most 'opts.state_threshold' +// states. Weights need to be commutative and have the path +// property. The weight 'w' of any cycle needs to be bounded, i.e., +// 'Plus(w, W::One()) = One()'. +template <class Arc, class ArcFilter> +void Prune(MutableFst<Arc> *fst, + const PruneOptions<Arc, ArcFilter> &opts) { + typedef typename Arc::Weight Weight; + typedef typename Arc::StateId StateId; + + if ((Weight::Properties() & (kPath | kCommutative)) + != (kPath | kCommutative)) { + FSTERROR() << "Prune: Weight needs to have the path property and" + << " be commutative: " + << Weight::Type(); + fst->SetProperties(kError, kError); + return; + } + StateId ns = fst->NumStates(); + if (ns == 0) return; + vector<Weight> idistance(ns, Weight::Zero()); + vector<Weight> tmp; + if (!opts.distance) { + tmp.reserve(ns); + ShortestDistance(*fst, &tmp, true, opts.delta); + } + const vector<Weight> *fdistance = opts.distance ? opts.distance : &tmp; + + if ((opts.state_threshold == 0) || + (fdistance->size() <= fst->Start()) || + ((*fdistance)[fst->Start()] == Weight::Zero())) { + fst->DeleteStates(); + return; + } + PruneCompare<StateId, Weight> compare(idistance, *fdistance); + Heap< StateId, PruneCompare<StateId, Weight>, false> heap(compare); + vector<bool> visited(ns, false); + vector<size_t> enqueued(ns, kNoKey); + vector<StateId> dead; + dead.push_back(fst->AddState()); + NaturalLess<Weight> less; + Weight limit = Times((*fdistance)[fst->Start()], opts.weight_threshold); + + StateId num_visited = 0; + StateId s = fst->Start(); + if (!less(limit, (*fdistance)[s])) { + idistance[s] = Weight::One(); + enqueued[s] = heap.Insert(s); + ++num_visited; + } + + while (!heap.Empty()) { + s = heap.Top(); + heap.Pop(); + enqueued[s] = kNoKey; + visited[s] = true; + if (less(limit, Times(idistance[s], fst->Final(s)))) + fst->SetFinal(s, Weight::Zero()); + for (MutableArcIterator< MutableFst<Arc> > ait(fst, s); + !ait.Done(); + ait.Next()) { + Arc arc = ait.Value(); + if (!opts.filter(arc)) continue; + Weight weight = Times(Times(idistance[s], arc.weight), + arc.nextstate < fdistance->size() + ? (*fdistance)[arc.nextstate] + : Weight::Zero()); + if (less(limit, weight)) { + arc.nextstate = dead[0]; + ait.SetValue(arc); + continue; + } + if (less(Times(idistance[s], arc.weight), idistance[arc.nextstate])) + idistance[arc.nextstate] = Times(idistance[s], arc.weight); + if (visited[arc.nextstate]) continue; + if ((opts.state_threshold != kNoStateId) && + (num_visited >= opts.state_threshold)) + continue; + if (enqueued[arc.nextstate] == kNoKey) { + enqueued[arc.nextstate] = heap.Insert(arc.nextstate); + ++num_visited; + } else { + heap.Update(enqueued[arc.nextstate], arc.nextstate); + } + } + } + for (size_t i = 0; i < visited.size(); ++i) + if (!visited[i]) dead.push_back(i); + fst->DeleteStates(dead); +} + + +// Pruning algorithm: this version modifies its input and simply takes +// the pruning threshold as an argument. Delete states and arcs in +// 'fst' that do not belong to a successful path whose weight is no +// more than the weight of the shortest path Times() +// 'weight_threshold'. When 'state_threshold != kNoStateId', the +// resulting transducer will be restricted further to have at most +// 'opts.state_threshold' states. Weights need to be commutative and +// have the path property. The weight 'w' of any cycle needs to be +// bounded, i.e., 'Plus(w, W::One()) = One()'. +template <class Arc> +void Prune(MutableFst<Arc> *fst, + typename Arc::Weight weight_threshold, + typename Arc::StateId state_threshold = kNoStateId, + double delta = kDelta) { + PruneOptions<Arc, AnyArcFilter<Arc> > opts(weight_threshold, state_threshold, + AnyArcFilter<Arc>(), 0, delta); + Prune(fst, opts); +} + + +// Pruning algorithm: this version writes the pruned input Fst to an +// output MutableFst and it takes an options class as an argument. +// 'ofst' contains states and arcs that belong to a successful path in +// 'ifst' whose weight is no more than the weight of the shortest path +// Times() 'opts.weight_threshold'. When 'opts.state_threshold != +// kNoStateId', 'ofst' will be restricted further to have at most +// 'opts.state_threshold' states. Weights need to be commutative and +// have the path property. The weight 'w' of any cycle needs to be +// bounded, i.e., 'Plus(w, W::One()) = One()'. +template <class Arc, class ArcFilter> +void Prune(const Fst<Arc> &ifst, + MutableFst<Arc> *ofst, + const PruneOptions<Arc, ArcFilter> &opts) { + typedef typename Arc::Weight Weight; + typedef typename Arc::StateId StateId; + + if ((Weight::Properties() & (kPath | kCommutative)) + != (kPath | kCommutative)) { + FSTERROR() << "Prune: Weight needs to have the path property and" + << " be commutative: " + << Weight::Type(); + ofst->SetProperties(kError, kError); + return; + } + ofst->DeleteStates(); + ofst->SetInputSymbols(ifst.InputSymbols()); + ofst->SetOutputSymbols(ifst.OutputSymbols()); + if (ifst.Start() == kNoStateId) + return; + NaturalLess<Weight> less; + if (less(opts.weight_threshold, Weight::One()) || + (opts.state_threshold == 0)) + return; + vector<Weight> idistance; + vector<Weight> tmp; + if (!opts.distance) + ShortestDistance(ifst, &tmp, true, opts.delta); + const vector<Weight> *fdistance = opts.distance ? opts.distance : &tmp; + + if ((fdistance->size() <= ifst.Start()) || + ((*fdistance)[ifst.Start()] == Weight::Zero())) { + return; + } + PruneCompare<StateId, Weight> compare(idistance, *fdistance); + Heap< StateId, PruneCompare<StateId, Weight>, false> heap(compare); + vector<StateId> copy; + vector<size_t> enqueued; + vector<bool> visited; + + StateId s = ifst.Start(); + Weight limit = Times(s < fdistance->size() ? (*fdistance)[s] : Weight::Zero(), + opts.weight_threshold); + while (copy.size() <= s) + copy.push_back(kNoStateId); + copy[s] = ofst->AddState(); + ofst->SetStart(copy[s]); + while (idistance.size() <= s) + idistance.push_back(Weight::Zero()); + idistance[s] = Weight::One(); + while (enqueued.size() <= s) { + enqueued.push_back(kNoKey); + visited.push_back(false); + } + enqueued[s] = heap.Insert(s); + + while (!heap.Empty()) { + s = heap.Top(); + heap.Pop(); + enqueued[s] = kNoKey; + visited[s] = true; + if (!less(limit, Times(idistance[s], ifst.Final(s)))) + ofst->SetFinal(copy[s], ifst.Final(s)); + for (ArcIterator< Fst<Arc> > ait(ifst, s); + !ait.Done(); + ait.Next()) { + const Arc &arc = ait.Value(); + if (!opts.filter(arc)) continue; + Weight weight = Times(Times(idistance[s], arc.weight), + arc.nextstate < fdistance->size() + ? (*fdistance)[arc.nextstate] + : Weight::Zero()); + if (less(limit, weight)) continue; + if ((opts.state_threshold != kNoStateId) && + (ofst->NumStates() >= opts.state_threshold)) + continue; + while (idistance.size() <= arc.nextstate) + idistance.push_back(Weight::Zero()); + if (less(Times(idistance[s], arc.weight), + idistance[arc.nextstate])) + idistance[arc.nextstate] = Times(idistance[s], arc.weight); + while (copy.size() <= arc.nextstate) + copy.push_back(kNoStateId); + if (copy[arc.nextstate] == kNoStateId) + copy[arc.nextstate] = ofst->AddState(); + ofst->AddArc(copy[s], Arc(arc.ilabel, arc.olabel, arc.weight, + copy[arc.nextstate])); + while (enqueued.size() <= arc.nextstate) { + enqueued.push_back(kNoKey); + visited.push_back(false); + } + if (visited[arc.nextstate]) continue; + if (enqueued[arc.nextstate] == kNoKey) + enqueued[arc.nextstate] = heap.Insert(arc.nextstate); + else + heap.Update(enqueued[arc.nextstate], arc.nextstate); + } + } +} + + +// Pruning algorithm: this version writes the pruned input Fst to an +// output MutableFst and simply takes the pruning threshold as an +// argument. 'ofst' contains states and arcs that belong to a +// successful path in 'ifst' whose weight is no more than +// the weight of the shortest path Times() 'weight_threshold'. When +// 'state_threshold != kNoStateId', 'ofst' will be restricted further +// to have at most 'opts.state_threshold' states. Weights need to be +// commutative and have the path property. The weight 'w' of any cycle +// needs to be bounded, i.e., 'Plus(w, W::One()) = W::One()'. +template <class Arc> +void Prune(const Fst<Arc> &ifst, + MutableFst<Arc> *ofst, + typename Arc::Weight weight_threshold, + typename Arc::StateId state_threshold = kNoStateId, + float delta = kDelta) { + PruneOptions<Arc, AnyArcFilter<Arc> > opts(weight_threshold, state_threshold, + AnyArcFilter<Arc>(), 0, delta); + Prune(ifst, ofst, opts); +} + +} // namespace fst + +#endif // FST_LIB_PRUNE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/push.h b/kaldi_io/src/tools/openfst/include/fst/push.h new file mode 100644 index 0000000..1f7a8fa --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/push.h @@ -0,0 +1,175 @@ +// push.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// +// \file +// Class to reweight/push an FST. + +#ifndef FST_LIB_PUSH_H__ +#define FST_LIB_PUSH_H__ + +#include <vector> +using std::vector; + +#include <fst/factor-weight.h> +#include <fst/fst.h> +#include <fst/arc-map.h> +#include <fst/reweight.h> +#include <fst/shortest-distance.h> + + +namespace fst { + +// Private helper functions for Push +namespace internal { + +// Compute the total weight (sum of the weights of all accepting paths) from +// the output of ShortestDistance. 'distance' is the shortest distance from the +// initial state when 'reverse == false' and to the final states when +// 'reverse == true'. +template <class Arc> +typename Arc::Weight ComputeTotalWeight( + const Fst<Arc> &fst, + const vector<typename Arc::Weight> &distance, + bool reverse) { + if (reverse) + return fst.Start() < distance.size() ? + distance[fst.Start()] : Arc::Weight::Zero(); + + typename Arc::Weight sum = Arc::Weight::Zero(); + for (typename Arc::StateId s = 0; s < distance.size(); ++s) + sum = Plus(sum, Times(distance[s], fst.Final(s))); + return sum; +} + +// Divide the weight of every accepting path by 'w'. The weight 'w' is +// divided at the final states if 'at_final == true' and at the +// initial state otherwise. +template <class Arc> +void RemoveWeight(MutableFst<Arc> *fst, typename Arc::Weight w, bool at_final) { + if ((w == Arc::Weight::One()) || (w == Arc::Weight::Zero())) + return; + + if (at_final) { + // Remove 'w' from the final states + for (StateIterator< MutableFst<Arc> > sit(*fst); + !sit.Done(); + sit.Next()) + fst->SetFinal(sit.Value(), + Divide(fst->Final(sit.Value()), w, DIVIDE_RIGHT)); + } else { // at_final == false + // Remove 'w' from the initial state + typename Arc::StateId start = fst->Start(); + for (MutableArcIterator<MutableFst<Arc> > ait(fst, start); + !ait.Done(); + ait.Next()) { + Arc arc = ait.Value(); + arc.weight = Divide(arc.weight, w, DIVIDE_LEFT); + ait.SetValue(arc); + } + fst->SetFinal(start, Divide(fst->Final(start), w, DIVIDE_LEFT)); + } +} +} // namespace internal + +// Pushes the weights in FST in the direction defined by TYPE. If +// pushing towards the initial state, the sum of the weight of the +// outgoing transitions and final weight at a non-initial state is +// equal to One() in the resulting machine. If pushing towards the +// final state, the same property holds on the reverse machine. +// +// Weight needs to be left distributive when pushing towards the +// initial state and right distributive when pushing towards the final +// states. +template <class Arc> +void Push(MutableFst<Arc> *fst, + ReweightType type, + float delta = kDelta, + bool remove_total_weight = false) { + vector<typename Arc::Weight> distance; + ShortestDistance(*fst, &distance, type == REWEIGHT_TO_INITIAL, delta); + typename Arc::Weight total_weight = Arc::Weight::One(); + if (remove_total_weight) + total_weight = internal::ComputeTotalWeight(*fst, distance, + type == REWEIGHT_TO_INITIAL); + Reweight(fst, distance, type); + if (remove_total_weight) + internal::RemoveWeight(fst, total_weight, type == REWEIGHT_TO_FINAL); +} + +const uint32 kPushWeights = 0x0001; +const uint32 kPushLabels = 0x0002; +const uint32 kPushRemoveTotalWeight = 0x0004; +const uint32 kPushRemoveCommonAffix = 0x0008; + +// OFST obtained from IFST by pushing weights and/or labels according +// to PTYPE in the direction defined by RTYPE. Weight needs to be +// left distributive when pushing weights towards the initial state +// and right distributive when pushing weights towards the final +// states. +template <class Arc, ReweightType rtype> +void Push(const Fst<Arc> &ifst, + MutableFst<Arc> *ofst, + uint32 ptype, + float delta = kDelta) { + + if ((ptype & (kPushWeights | kPushLabels)) == kPushWeights) { + *ofst = ifst; + Push(ofst, rtype, delta, ptype & kPushRemoveTotalWeight); + } else if (ptype & kPushLabels) { + const StringType stype = rtype == REWEIGHT_TO_INITIAL + ? STRING_LEFT + : STRING_RIGHT; + vector<typename GallicArc<Arc, stype>::Weight> gdistance; + VectorFst<GallicArc<Arc, stype> > gfst; + ArcMap(ifst, &gfst, ToGallicMapper<Arc, stype>()); + if (ptype & kPushWeights ) { + ShortestDistance(gfst, &gdistance, rtype == REWEIGHT_TO_INITIAL, delta); + } else { + ArcMapFst<Arc, Arc, RmWeightMapper<Arc> > + uwfst(ifst, RmWeightMapper<Arc>()); + ArcMapFst<Arc, GallicArc<Arc, stype>, ToGallicMapper<Arc, stype> > + guwfst(uwfst, ToGallicMapper<Arc, stype>()); + ShortestDistance(guwfst, &gdistance, rtype == REWEIGHT_TO_INITIAL, delta); + } + typename GallicArc<Arc, stype>::Weight total_weight = + GallicArc<Arc, stype>::Weight::One(); + if (ptype & (kPushRemoveTotalWeight | kPushRemoveCommonAffix)) { + total_weight = internal::ComputeTotalWeight( + gfst, gdistance, rtype == REWEIGHT_TO_INITIAL); + total_weight = typename GallicArc<Arc, stype>::Weight( + ptype & kPushRemoveCommonAffix ? total_weight.Value1() + : StringWeight<typename Arc::Label, stype>::One(), + ptype & kPushRemoveTotalWeight ? total_weight.Value2() + : Arc::Weight::One()); + } + Reweight(&gfst, gdistance, rtype); + if (ptype & (kPushRemoveTotalWeight | kPushRemoveCommonAffix)) + internal::RemoveWeight(&gfst, total_weight, rtype == REWEIGHT_TO_FINAL); + FactorWeightFst< GallicArc<Arc, stype>, GallicFactor<typename Arc::Label, + typename Arc::Weight, stype> > fwfst(gfst); + ArcMap(fwfst, ofst, FromGallicMapper<Arc, stype>()); + ofst->SetOutputSymbols(ifst.OutputSymbols()); + } else { + LOG(WARNING) << "Push: pushing type is set to 0: " + << "pushing neither labels nor weights."; + *ofst = ifst; + } +} + +} // namespace fst + +#endif /* FST_LIB_PUSH_H_ */ diff --git a/kaldi_io/src/tools/openfst/include/fst/queue.h b/kaldi_io/src/tools/openfst/include/fst/queue.h new file mode 100644 index 0000000..95a082d --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/queue.h @@ -0,0 +1,938 @@ +// queue.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// +// \file +// Functions and classes for various Fst state queues with +// a unified interface. + +#ifndef FST_LIB_QUEUE_H__ +#define FST_LIB_QUEUE_H__ + +#include <deque> +using std::deque; +#include <vector> +using std::vector; + +#include <fst/arcfilter.h> +#include <fst/connect.h> +#include <fst/heap.h> +#include <fst/topsort.h> + + +namespace fst { + +// template <class S> +// class Queue { +// public: +// typedef typename S StateId; +// +// // Ctr: may need args (e.g., Fst, comparator) for some queues +// Queue(...); +// // Returns the head of the queue +// StateId Head() const; +// // Inserts a state +// void Enqueue(StateId s); +// // Removes the head of the queue +// void Dequeue(); +// // Updates ordering of state s when weight changes, if necessary +// void Update(StateId s); +// // Does the queue contain no elements? +// bool Empty() const; +// // Remove all states from queue +// void Clear(); +// }; + +// State queue types. +enum QueueType { + TRIVIAL_QUEUE = 0, // Single state queue + FIFO_QUEUE = 1, // First-in, first-out queue + LIFO_QUEUE = 2, // Last-in, first-out queue + SHORTEST_FIRST_QUEUE = 3, // Shortest-first queue + TOP_ORDER_QUEUE = 4, // Topologically-ordered queue + STATE_ORDER_QUEUE = 5, // State-ID ordered queue + SCC_QUEUE = 6, // Component graph top-ordered meta-queue + AUTO_QUEUE = 7, // Auto-selected queue + OTHER_QUEUE = 8 + }; + + +// QueueBase, templated on the StateId, is the base class shared by the +// queues considered by AutoQueue. +template <class S> +class QueueBase { + public: + typedef S StateId; + + QueueBase(QueueType type) : queue_type_(type), error_(false) {} + virtual ~QueueBase() {} + StateId Head() const { return Head_(); } + void Enqueue(StateId s) { Enqueue_(s); } + void Dequeue() { Dequeue_(); } + void Update(StateId s) { Update_(s); } + bool Empty() const { return Empty_(); } + void Clear() { Clear_(); } + QueueType Type() { return queue_type_; } + bool Error() const { return error_; } + void SetError(bool error) { error_ = error; } + + private: + // This allows base-class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual StateId Head_() const = 0; + virtual void Enqueue_(StateId s) = 0; + virtual void Dequeue_() = 0; + virtual void Update_(StateId s) = 0; + virtual bool Empty_() const = 0; + virtual void Clear_() = 0; + + QueueType queue_type_; + bool error_; +}; + + +// Trivial queue discipline, templated on the StateId. You may enqueue +// at most one state at a time. It is used for strongly connected components +// with only one state and no self loops. +template <class S> +class TrivialQueue : public QueueBase<S> { +public: + typedef S StateId; + + TrivialQueue() : QueueBase<S>(TRIVIAL_QUEUE), front_(kNoStateId) {} + StateId Head() const { return front_; } + void Enqueue(StateId s) { front_ = s; } + void Dequeue() { front_ = kNoStateId; } + void Update(StateId s) {} + bool Empty() const { return front_ == kNoStateId; } + void Clear() { front_ = kNoStateId; } + + +private: + // This allows base-class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual StateId Head_() const { return Head(); } + virtual void Enqueue_(StateId s) { Enqueue(s); } + virtual void Dequeue_() { Dequeue(); } + virtual void Update_(StateId s) { Update(s); } + virtual bool Empty_() const { return Empty(); } + virtual void Clear_() { return Clear(); } + + StateId front_; +}; + + +// First-in, first-out queue discipline, templated on the StateId. +template <class S> +class FifoQueue : public QueueBase<S>, public deque<S> { + public: + using deque<S>::back; + using deque<S>::push_front; + using deque<S>::pop_back; + using deque<S>::empty; + using deque<S>::clear; + + typedef S StateId; + + FifoQueue() : QueueBase<S>(FIFO_QUEUE) {} + StateId Head() const { return back(); } + void Enqueue(StateId s) { push_front(s); } + void Dequeue() { pop_back(); } + void Update(StateId s) {} + bool Empty() const { return empty(); } + void Clear() { clear(); } + + private: + // This allows base-class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual StateId Head_() const { return Head(); } + virtual void Enqueue_(StateId s) { Enqueue(s); } + virtual void Dequeue_() { Dequeue(); } + virtual void Update_(StateId s) { Update(s); } + virtual bool Empty_() const { return Empty(); } + virtual void Clear_() { return Clear(); } +}; + + +// Last-in, first-out queue discipline, templated on the StateId. +template <class S> +class LifoQueue : public QueueBase<S>, public deque<S> { + public: + using deque<S>::front; + using deque<S>::push_front; + using deque<S>::pop_front; + using deque<S>::empty; + using deque<S>::clear; + + typedef S StateId; + + LifoQueue() : QueueBase<S>(LIFO_QUEUE) {} + StateId Head() const { return front(); } + void Enqueue(StateId s) { push_front(s); } + void Dequeue() { pop_front(); } + void Update(StateId s) {} + bool Empty() const { return empty(); } + void Clear() { clear(); } + + private: + // This allows base-class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual StateId Head_() const { return Head(); } + virtual void Enqueue_(StateId s) { Enqueue(s); } + virtual void Dequeue_() { Dequeue(); } + virtual void Update_(StateId s) { Update(s); } + virtual bool Empty_() const { return Empty(); } + virtual void Clear_() { return Clear(); } +}; + + +// Shortest-first queue discipline, templated on the StateId and +// comparison function object. Comparison function object COMP is +// used to compare two StateIds. If a (single) state's order changes, +// it can be reordered in the queue with a call to Update(). +// If 'update == false', call to Update() does not reorder the queue. +template <typename S, typename C, bool update = true> +class ShortestFirstQueue : public QueueBase<S> { + public: + typedef S StateId; + typedef C Compare; + + ShortestFirstQueue(C comp) + : QueueBase<S>(SHORTEST_FIRST_QUEUE), heap_(comp) {} + + StateId Head() const { return heap_.Top(); } + + void Enqueue(StateId s) { + if (update) { + for (StateId i = key_.size(); i <= s; ++i) + key_.push_back(kNoKey); + key_[s] = heap_.Insert(s); + } else { + heap_.Insert(s); + } + } + + void Dequeue() { + if (update) + key_[heap_.Pop()] = kNoKey; + else + heap_.Pop(); + } + + void Update(StateId s) { + if (!update) + return; + if (s >= key_.size() || key_[s] == kNoKey) { + Enqueue(s); + } else { + heap_.Update(key_[s], s); + } + } + + bool Empty() const { return heap_.Empty(); } + + void Clear() { + heap_.Clear(); + if (update) key_.clear(); + } + + private: + Heap<S, C, false> heap_; + vector<ssize_t> key_; + + // This allows base-class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual StateId Head_() const { return Head(); } + virtual void Enqueue_(StateId s) { Enqueue(s); } + virtual void Dequeue_() { Dequeue(); } + virtual void Update_(StateId s) { Update(s); } + virtual bool Empty_() const { return Empty(); } + virtual void Clear_() { return Clear(); } +}; + + +// Given a vector that maps from states to weights and a Less +// comparison function object between weights, this class defines a +// comparison function object between states. +template <typename S, typename L> +class StateWeightCompare { + public: + typedef L Less; + typedef typename L::Weight Weight; + typedef S StateId; + + StateWeightCompare(const vector<Weight>& weights, const L &less) + : weights_(weights), less_(less) {} + + bool operator()(const S x, const S y) const { + return less_(weights_[x], weights_[y]); + } + + private: + const vector<Weight>& weights_; + L less_; +}; + + +// Shortest-first queue discipline, templated on the StateId and Weight, is +// specialized to use the weight's natural order for the comparison function. +template <typename S, typename W> +class NaturalShortestFirstQueue : + public ShortestFirstQueue<S, StateWeightCompare<S, NaturalLess<W> > > { + public: + typedef StateWeightCompare<S, NaturalLess<W> > C; + + NaturalShortestFirstQueue(const vector<W> &distance) : + ShortestFirstQueue<S, C>(C(distance, less_)) {} + + private: + NaturalLess<W> less_; +}; + +// Topological-order queue discipline, templated on the StateId. +// States are ordered in the queue topologically. The FST must be acyclic. +template <class S> +class TopOrderQueue : public QueueBase<S> { + public: + typedef S StateId; + + // This constructor computes the top. order. It accepts an arc filter + // to limit the transitions considered in that computation (e.g., only + // the epsilon graph). + template <class Arc, class ArcFilter> + TopOrderQueue(const Fst<Arc> &fst, ArcFilter filter) + : QueueBase<S>(TOP_ORDER_QUEUE), front_(0), back_(kNoStateId), + order_(0), state_(0) { + bool acyclic; + TopOrderVisitor<Arc> top_order_visitor(&order_, &acyclic); + DfsVisit(fst, &top_order_visitor, filter); + if (!acyclic) { + FSTERROR() << "TopOrderQueue: fst is not acyclic."; + QueueBase<S>::SetError(true); + } + state_.resize(order_.size(), kNoStateId); + } + + // This constructor is passed the top. order, useful when we know it + // beforehand. + TopOrderQueue(const vector<StateId> &order) + : QueueBase<S>(TOP_ORDER_QUEUE), front_(0), back_(kNoStateId), + order_(order), state_(order.size(), kNoStateId) {} + + StateId Head() const { return state_[front_]; } + + void Enqueue(StateId s) { + if (front_ > back_) front_ = back_ = order_[s]; + else if (order_[s] > back_) back_ = order_[s]; + else if (order_[s] < front_) front_ = order_[s]; + state_[order_[s]] = s; + } + + void Dequeue() { + state_[front_] = kNoStateId; + while ((front_ <= back_) && (state_[front_] == kNoStateId)) ++front_; + } + + void Update(StateId s) {} + + bool Empty() const { return front_ > back_; } + + void Clear() { + for (StateId i = front_; i <= back_; ++i) state_[i] = kNoStateId; + back_ = kNoStateId; + front_ = 0; + } + + private: + StateId front_; + StateId back_; + vector<StateId> order_; + vector<StateId> state_; + + // This allows base-class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual StateId Head_() const { return Head(); } + virtual void Enqueue_(StateId s) { Enqueue(s); } + virtual void Dequeue_() { Dequeue(); } + virtual void Update_(StateId s) { Update(s); } + virtual bool Empty_() const { return Empty(); } + virtual void Clear_() { return Clear(); } +}; + + +// State order queue discipline, templated on the StateId. +// States are ordered in the queue by state Id. +template <class S> +class StateOrderQueue : public QueueBase<S> { +public: + typedef S StateId; + + StateOrderQueue() + : QueueBase<S>(STATE_ORDER_QUEUE), front_(0), back_(kNoStateId) {} + + StateId Head() const { return front_; } + + void Enqueue(StateId s) { + if (front_ > back_) front_ = back_ = s; + else if (s > back_) back_ = s; + else if (s < front_) front_ = s; + while (enqueued_.size() <= s) enqueued_.push_back(false); + enqueued_[s] = true; + } + + void Dequeue() { + enqueued_[front_] = false; + while ((front_ <= back_) && (enqueued_[front_] == false)) ++front_; + } + + void Update(StateId s) {} + + bool Empty() const { return front_ > back_; } + + void Clear() { + for (StateId i = front_; i <= back_; ++i) enqueued_[i] = false; + front_ = 0; + back_ = kNoStateId; + } + +private: + StateId front_; + StateId back_; + vector<bool> enqueued_; + + // This allows base-class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual StateId Head_() const { return Head(); } + virtual void Enqueue_(StateId s) { Enqueue(s); } + virtual void Dequeue_() { Dequeue(); } + virtual void Update_(StateId s) { Update(s); } + virtual bool Empty_() const { return Empty(); } + virtual void Clear_() { return Clear(); } + +}; + + +// SCC topological-order meta-queue discipline, templated on the StateId S +// and a queue Q, which is used inside each SCC. It visits the SCC's +// of an FST in topological order. Its constructor is passed the queues to +// to use within an SCC. +template <class S, class Q> +class SccQueue : public QueueBase<S> { + public: + typedef S StateId; + typedef Q Queue; + + // Constructor takes a vector specifying the SCC number per state + // and a vector giving the queue to use per SCC number. + SccQueue(const vector<StateId> &scc, vector<Queue*> *queue) + : QueueBase<S>(SCC_QUEUE), queue_(queue), scc_(scc), front_(0), + back_(kNoStateId) {} + + StateId Head() const { + while ((front_ <= back_) && + (((*queue_)[front_] && (*queue_)[front_]->Empty()) + || (((*queue_)[front_] == 0) && + ((front_ >= trivial_queue_.size()) + || (trivial_queue_[front_] == kNoStateId))))) + ++front_; + if ((*queue_)[front_]) + return (*queue_)[front_]->Head(); + else + return trivial_queue_[front_]; + } + + void Enqueue(StateId s) { + if (front_ > back_) front_ = back_ = scc_[s]; + else if (scc_[s] > back_) back_ = scc_[s]; + else if (scc_[s] < front_) front_ = scc_[s]; + if ((*queue_)[scc_[s]]) { + (*queue_)[scc_[s]]->Enqueue(s); + } else { + while (trivial_queue_.size() <= scc_[s]) + trivial_queue_.push_back(kNoStateId); + trivial_queue_[scc_[s]] = s; + } + } + + void Dequeue() { + if ((*queue_)[front_]) + (*queue_)[front_]->Dequeue(); + else if (front_ < trivial_queue_.size()) + trivial_queue_[front_] = kNoStateId; + } + + void Update(StateId s) { + if ((*queue_)[scc_[s]]) + (*queue_)[scc_[s]]->Update(s); + } + + bool Empty() const { + if (front_ < back_) // Queue scc # back_ not empty unless back_==front_ + return false; + else if (front_ > back_) + return true; + else if ((*queue_)[front_]) + return (*queue_)[front_]->Empty(); + else + return (front_ >= trivial_queue_.size()) + || (trivial_queue_[front_] == kNoStateId); + } + + void Clear() { + for (StateId i = front_; i <= back_; ++i) + if ((*queue_)[i]) + (*queue_)[i]->Clear(); + else if (i < trivial_queue_.size()) + trivial_queue_[i] = kNoStateId; + front_ = 0; + back_ = kNoStateId; + } + +private: + vector<Queue*> *queue_; + const vector<StateId> &scc_; + mutable StateId front_; + StateId back_; + vector<StateId> trivial_queue_; + + // This allows base-class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual StateId Head_() const { return Head(); } + virtual void Enqueue_(StateId s) { Enqueue(s); } + virtual void Dequeue_() { Dequeue(); } + virtual void Update_(StateId s) { Update(s); } + virtual bool Empty_() const { return Empty(); } + virtual void Clear_() { return Clear(); } + + DISALLOW_COPY_AND_ASSIGN(SccQueue); +}; + + +// Automatic queue discipline, templated on the StateId. It selects a +// queue discipline for a given FST based on its properties. +template <class S> +class AutoQueue : public QueueBase<S> { +public: + typedef S StateId; + + // This constructor takes a state distance vector that, if non-null and if + // the Weight type has the path property, will entertain the + // shortest-first queue using the natural order w.r.t to the distance. + template <class Arc, class ArcFilter> + AutoQueue(const Fst<Arc> &fst, const vector<typename Arc::Weight> *distance, + ArcFilter filter) : QueueBase<S>(AUTO_QUEUE) { + typedef typename Arc::Weight Weight; + typedef StateWeightCompare< StateId, NaturalLess<Weight> > Compare; + + // First check if the FST is known to have these properties. + uint64 props = fst.Properties(kAcyclic | kCyclic | + kTopSorted | kUnweighted, false); + if ((props & kTopSorted) || fst.Start() == kNoStateId) { + queue_ = new StateOrderQueue<StateId>(); + VLOG(2) << "AutoQueue: using state-order discipline"; + } else if (props & kAcyclic) { + queue_ = new TopOrderQueue<StateId>(fst, filter); + VLOG(2) << "AutoQueue: using top-order discipline"; + } else if ((props & kUnweighted) && (Weight::Properties() & kIdempotent)) { + queue_ = new LifoQueue<StateId>(); + VLOG(2) << "AutoQueue: using LIFO discipline"; + } else { + uint64 properties; + // Decompose into strongly-connected components. + SccVisitor<Arc> scc_visitor(&scc_, 0, 0, &properties); + DfsVisit(fst, &scc_visitor, filter); + StateId nscc = *max_element(scc_.begin(), scc_.end()) + 1; + vector<QueueType> queue_types(nscc); + NaturalLess<Weight> *less = 0; + Compare *comp = 0; + if (distance && (Weight::Properties() & kPath)) { + less = new NaturalLess<Weight>; + comp = new Compare(*distance, *less); + } + // Find the queue type to use per SCC. + bool unweighted; + bool all_trivial; + SccQueueType(fst, scc_, &queue_types, filter, less, &all_trivial, + &unweighted); + // If unweighted and semiring is idempotent, use lifo queue. + if (unweighted) { + queue_ = new LifoQueue<StateId>(); + VLOG(2) << "AutoQueue: using LIFO discipline"; + delete comp; + delete less; + return; + } + // If all the scc are trivial, FST is acyclic and the scc# gives + // the topological order. + if (all_trivial) { + queue_ = new TopOrderQueue<StateId>(scc_); + VLOG(2) << "AutoQueue: using top-order discipline"; + delete comp; + delete less; + return; + } + VLOG(2) << "AutoQueue: using SCC meta-discipline"; + queues_.resize(nscc); + for (StateId i = 0; i < nscc; ++i) { + switch(queue_types[i]) { + case TRIVIAL_QUEUE: + queues_[i] = 0; + VLOG(3) << "AutoQueue: SCC #" << i + << ": using trivial discipline"; + break; + case SHORTEST_FIRST_QUEUE: + queues_[i] = new ShortestFirstQueue<StateId, Compare, false>(*comp); + VLOG(3) << "AutoQueue: SCC #" << i << + ": using shortest-first discipline"; + break; + case LIFO_QUEUE: + queues_[i] = new LifoQueue<StateId>(); + VLOG(3) << "AutoQueue: SCC #" << i + << ": using LIFO disciplle"; + break; + case FIFO_QUEUE: + default: + queues_[i] = new FifoQueue<StateId>(); + VLOG(3) << "AutoQueue: SCC #" << i + << ": using FIFO disciplle"; + break; + } + } + queue_ = new SccQueue< StateId, QueueBase<StateId> >(scc_, &queues_); + delete comp; + delete less; + } + } + + ~AutoQueue() { + for (StateId i = 0; i < queues_.size(); ++i) + delete queues_[i]; + delete queue_; + } + + StateId Head() const { return queue_->Head(); } + + void Enqueue(StateId s) { queue_->Enqueue(s); } + + void Dequeue() { queue_->Dequeue(); } + + void Update(StateId s) { queue_->Update(s); } + + bool Empty() const { return queue_->Empty(); } + + void Clear() { queue_->Clear(); } + + + private: + QueueBase<StateId> *queue_; + vector< QueueBase<StateId>* > queues_; + vector<StateId> scc_; + + template <class Arc, class ArcFilter, class Less> + static void SccQueueType(const Fst<Arc> &fst, + const vector<StateId> &scc, + vector<QueueType> *queue_types, + ArcFilter filter, Less *less, + bool *all_trivial, bool *unweighted); + + // This allows base-class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual StateId Head_() const { return Head(); } + + virtual void Enqueue_(StateId s) { Enqueue(s); } + + virtual void Dequeue_() { Dequeue(); } + + virtual void Update_(StateId s) { Update(s); } + + virtual bool Empty_() const { return Empty(); } + + virtual void Clear_() { return Clear(); } + + DISALLOW_COPY_AND_ASSIGN(AutoQueue); +}; + + +// Examines the states in an Fst's strongly connected components and +// determines which type of queue to use per SCC. Stores result in +// vector QUEUE_TYPES, which is assumed to have length equal to the +// number of SCCs. An arc filter is used to limit the transitions +// considered (e.g., only the epsilon graph). ALL_TRIVIAL is set +// to true if every queue is the trivial queue. UNWEIGHTED is set to +// true if the semiring is idempotent and all the arc weights are equal to +// Zero() or One(). +template <class StateId> +template <class A, class ArcFilter, class Less> +void AutoQueue<StateId>::SccQueueType(const Fst<A> &fst, + const vector<StateId> &scc, + vector<QueueType> *queue_type, + ArcFilter filter, Less *less, + bool *all_trivial, bool *unweighted) { + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + *all_trivial = true; + *unweighted = true; + + for (StateId i = 0; i < queue_type->size(); ++i) + (*queue_type)[i] = TRIVIAL_QUEUE; + + for (StateIterator< Fst<Arc> > sit(fst); !sit.Done(); sit.Next()) { + StateId state = sit.Value(); + for (ArcIterator< Fst<Arc> > ait(fst, state); + !ait.Done(); + ait.Next()) { + const Arc &arc = ait.Value(); + if (!filter(arc)) continue; + if (scc[state] == scc[arc.nextstate]) { + QueueType &type = (*queue_type)[scc[state]]; + if (!less || ((*less)(arc.weight, Weight::One()))) + type = FIFO_QUEUE; + else if ((type == TRIVIAL_QUEUE) || (type == LIFO_QUEUE)) { + if (!(Weight::Properties() & kIdempotent) || + (arc.weight != Weight::Zero() && arc.weight != Weight::One())) + type = SHORTEST_FIRST_QUEUE; + else + type = LIFO_QUEUE; + } + if (type != TRIVIAL_QUEUE) *all_trivial = false; + } + if (!(Weight::Properties() & kIdempotent) || + (arc.weight != Weight::Zero() && arc.weight != Weight::One())) + *unweighted = false; + } + } +} + + +// An A* estimate is a function object that maps from a state ID to a +// an estimate of the shortest distance to the final states. +// The trivial A* estimate is always One(). +template <typename S, typename W> +struct TrivialAStarEstimate { + W operator()(S s) const { return W::One(); } +}; + + +// Given a vector that maps from states to weights representing the +// shortest distance from the initial state, a Less comparison +// function object between weights, and an estimate E of the +// shortest distance to the final states, this class defines a +// comparison function object between states. +template <typename S, typename L, typename E> +class AStarWeightCompare { + public: + typedef L Less; + typedef typename L::Weight Weight; + typedef S StateId; + + AStarWeightCompare(const vector<Weight>& weights, const L &less, + const E &estimate) + : weights_(weights), less_(less), estimate_(estimate) {} + + bool operator()(const S x, const S y) const { + Weight wx = Times(weights_[x], estimate_(x)); + Weight wy = Times(weights_[y], estimate_(y)); + return less_(wx, wy); + } + + private: + const vector<Weight>& weights_; + L less_; + const E &estimate_; +}; + + +// A* queue discipline, templated on the StateId, Weight and an +// estimate E of the shortest distance to the final states, is specialized +// to use the weight's natural order for the comparison function. +template <typename S, typename W, typename E> +class NaturalAStarQueue : + public ShortestFirstQueue<S, AStarWeightCompare<S, NaturalLess<W>, E> > { + public: + typedef AStarWeightCompare<S, NaturalLess<W>, E> C; + + NaturalAStarQueue(const vector<W> &distance, const E &estimate) : + ShortestFirstQueue<S, C>(C(distance, less_, estimate)) {} + + private: + NaturalLess<W> less_; +}; + + +// A state equivalence class is a function object that +// maps from a state ID to an equivalence class (state) ID. +// The trivial equivalence class maps a state to itself. +template <typename S> +struct TrivialStateEquivClass { + S operator()(S s) const { return s; } +}; + + +// Distance-based pruning queue discipline: Enqueues a state 's' +// only when its shortest distance (so far), as specified by +// 'distance', is less than (as specified by 'comp') the shortest +// distance Times() the 'threshold' to any state in the same +// equivalence class, as specified by the function object +// 'class_func'. The underlying queue discipline is specified by +// 'queue'. The ownership of 'queue' is given to this class. +template <typename Q, typename L, typename C> +class PruneQueue : public QueueBase<typename Q::StateId> { + public: + typedef typename Q::StateId StateId; + typedef typename L::Weight Weight; + + PruneQueue(const vector<Weight> &distance, Q *queue, L comp, + const C &class_func, Weight threshold) + : QueueBase<StateId>(OTHER_QUEUE), + distance_(distance), + queue_(queue), + less_(comp), + class_func_(class_func), + threshold_(threshold) {} + + ~PruneQueue() { delete queue_; } + + StateId Head() const { return queue_->Head(); } + + void Enqueue(StateId s) { + StateId c = class_func_(s); + if (c >= class_distance_.size()) + class_distance_.resize(c + 1, Weight::Zero()); + if (less_(distance_[s], class_distance_[c])) + class_distance_[c] = distance_[s]; + + // Enqueue only if below threshold limit + Weight limit = Times(class_distance_[c], threshold_); + if (less_(distance_[s], limit)) + queue_->Enqueue(s); + } + + void Dequeue() { queue_->Dequeue(); } + + void Update(StateId s) { + StateId c = class_func_(s); + if (less_(distance_[s], class_distance_[c])) + class_distance_[c] = distance_[s]; + queue_->Update(s); + } + + bool Empty() const { return queue_->Empty(); } + void Clear() { queue_->Clear(); } + + private: + // This allows base-class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual StateId Head_() const { return Head(); } + virtual void Enqueue_(StateId s) { Enqueue(s); } + virtual void Dequeue_() { Dequeue(); } + virtual void Update_(StateId s) { Update(s); } + virtual bool Empty_() const { return Empty(); } + virtual void Clear_() { return Clear(); } + + const vector<Weight> &distance_; // shortest distance to state + Q *queue_; + L less_; + const C &class_func_; // eqv. class function object + Weight threshold_; // pruning weight threshold + vector<Weight> class_distance_; // shortest distance to class + + DISALLOW_COPY_AND_ASSIGN(PruneQueue); +}; + + +// Pruning queue discipline (see above) using the weight's natural +// order for the comparison function. The ownership of 'queue' is +// given to this class. +template <typename Q, typename W, typename C> +class NaturalPruneQueue : + public PruneQueue<Q, NaturalLess<W>, C> { + public: + typedef typename Q::StateId StateId; + typedef W Weight; + + NaturalPruneQueue(const vector<W> &distance, Q *queue, + const C &class_func_, Weight threshold) : + PruneQueue<Q, NaturalLess<W>, C>(distance, queue, less_, + class_func_, threshold) {} + + private: + NaturalLess<W> less_; +}; + + +// Filter-based pruning queue discipline: Enqueues a state 's' only +// if allowed by the filter, specified by the function object 'state_filter'. +// The underlying queue discipline is specified by 'queue'. The ownership +// of 'queue' is given to this class. +template <typename Q, typename F> +class FilterQueue : public QueueBase<typename Q::StateId> { + public: + typedef typename Q::StateId StateId; + + FilterQueue(Q *queue, const F &state_filter) + : QueueBase<StateId>(OTHER_QUEUE), + queue_(queue), + state_filter_(state_filter) {} + + ~FilterQueue() { delete queue_; } + + StateId Head() const { return queue_->Head(); } + + // Enqueues only if allowed by state filter. + void Enqueue(StateId s) { + if (state_filter_(s)) { + queue_->Enqueue(s); + } + } + + void Dequeue() { queue_->Dequeue(); } + + void Update(StateId s) {} + bool Empty() const { return queue_->Empty(); } + void Clear() { queue_->Clear(); } + + private: + // This allows base-class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual StateId Head_() const { return Head(); } + virtual void Enqueue_(StateId s) { Enqueue(s); } + virtual void Dequeue_() { Dequeue(); } + virtual void Update_(StateId s) { Update(s); } + virtual bool Empty_() const { return Empty(); } + virtual void Clear_() { return Clear(); } + + Q *queue_; + const F &state_filter_; // Filter to prune states + + DISALLOW_COPY_AND_ASSIGN(FilterQueue); +}; + +} // namespace fst + +#endif diff --git a/kaldi_io/src/tools/openfst/include/fst/randequivalent.h b/kaldi_io/src/tools/openfst/include/fst/randequivalent.h new file mode 100644 index 0000000..1aaccf7 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/randequivalent.h @@ -0,0 +1,135 @@ +// randequivalent.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// +// \file +// Tests if two FSTS are equivalent by checking if random +// strings from one FST are transduced the same by both FSTs. + +#ifndef FST_RANDEQUIVALENT_H__ +#define FST_RANDEQUIVALENT_H__ + +#include <fst/arcsort.h> +#include <fst/compose.h> +#include <fst/project.h> +#include <fst/randgen.h> +#include <fst/shortest-distance.h> +#include <fst/vector-fst.h> + + +namespace fst { + +// Test if two FSTs are equivalent by randomly generating 'num_paths' +// paths (as specified by the RandGenOptions 'opts') in these FSTs. +// +// For each randomly generated path, the algorithm computes for each +// of the two FSTs the sum of the weights of all the successful paths +// sharing the same input and output labels as the considered randomly +// generated path and checks that these two values are within +// 'delta'. Returns optional error value (when FLAGS_error_fatal = false). +template<class Arc, class ArcSelector> +bool RandEquivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2, + ssize_t num_paths, float delta, + const RandGenOptions<ArcSelector> &opts, + bool *error = 0) { + typedef typename Arc::Weight Weight; + if (error) *error = false; + + // Check that the symbol table are compatible + if (!CompatSymbols(fst1.InputSymbols(), fst2.InputSymbols()) || + !CompatSymbols(fst1.OutputSymbols(), fst2.OutputSymbols())) { + FSTERROR() << "RandEquivalent: input/output symbol tables of 1st " + << "argument do not match input/output symbol tables of 2nd " + << "argument"; + if (error) *error = true; + return false; + } + + ILabelCompare<Arc> icomp; + OLabelCompare<Arc> ocomp; + VectorFst<Arc> sfst1(fst1); + VectorFst<Arc> sfst2(fst2); + Connect(&sfst1); + Connect(&sfst2); + ArcSort(&sfst1, icomp); + ArcSort(&sfst2, icomp); + + bool ret = true; + for (ssize_t n = 0; n < num_paths; ++n) { + VectorFst<Arc> path; + const Fst<Arc> &fst = rand() % 2 ? sfst1 : sfst2; + RandGen(fst, &path, opts); + + VectorFst<Arc> ipath(path); + VectorFst<Arc> opath(path); + Project(&ipath, PROJECT_INPUT); + Project(&opath, PROJECT_OUTPUT); + + VectorFst<Arc> cfst1, pfst1; + Compose(ipath, sfst1, &cfst1); + ArcSort(&cfst1, ocomp); + Compose(cfst1, opath, &pfst1); + // Give up if there are epsilon cycles in a non-idempotent semiring + if (!(Weight::Properties() & kIdempotent) && + pfst1.Properties(kCyclic, true)) + continue; + Weight sum1 = ShortestDistance(pfst1); + + VectorFst<Arc> cfst2, pfst2; + Compose(ipath, sfst2, &cfst2); + ArcSort(&cfst2, ocomp); + Compose(cfst2, opath, &pfst2); + // Give up if there are epsilon cycles in a non-idempotent semiring + if (!(Weight::Properties() & kIdempotent) && + pfst2.Properties(kCyclic, true)) + continue; + Weight sum2 = ShortestDistance(pfst2); + + if (!ApproxEqual(sum1, sum2, delta)) { + VLOG(1) << "Sum1 = " << sum1; + VLOG(1) << "Sum2 = " << sum2; + ret = false; + break; + } + } + + if (fst1.Properties(kError, false) || fst2.Properties(kError, false)) { + if (error) *error = true; + return false; + } + + return ret; +} + + +// Test if two FSTs are equivalent by randomly generating 'num_paths' paths +// of length no more than 'path_length' using the seed 'seed' in these FSTs. +// Returns optional error value (when FLAGS_error_fatal = false). +template <class Arc> +bool RandEquivalent(const Fst<Arc> &fst1, const Fst<Arc> &fst2, + ssize_t num_paths, float delta = kDelta, + int seed = time(0), int path_length = INT_MAX, + bool *error = 0) { + UniformArcSelector<Arc> uniform_selector(seed); + RandGenOptions< UniformArcSelector<Arc> > + opts(uniform_selector, path_length); + return RandEquivalent(fst1, fst2, num_paths, delta, opts, error); +} + + +} // namespace fst + +#endif // FST_LIB_RANDEQUIVALENT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/randgen.h b/kaldi_io/src/tools/openfst/include/fst/randgen.h new file mode 100644 index 0000000..82ddffa --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/randgen.h @@ -0,0 +1,712 @@ +// randgen.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Classes and functions to generate random paths through an FST. + +#ifndef FST_LIB_RANDGEN_H__ +#define FST_LIB_RANDGEN_H__ + +#include <cmath> +#include <cstdlib> +#include <ctime> +#include <map> + +#include <fst/accumulator.h> +#include <fst/cache.h> +#include <fst/dfs-visit.h> +#include <fst/mutable-fst.h> + +namespace fst { + +// +// ARC SELECTORS - these function objects are used to select a random +// transition to take from an FST's state. They should return a number +// N s.t. 0 <= N <= NumArcs(). If N < NumArcs(), then the N-th +// transition is selected. If N == NumArcs(), then the final weight at +// that state is selected (i.e., the 'super-final' transition is selected). +// It can be assumed these will not be called unless either there +// are transitions leaving the state and/or the state is final. +// + +// Randomly selects a transition using the uniform distribution. +template <class A> +struct UniformArcSelector { + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + UniformArcSelector(int seed = time(0)) { srand(seed); } + + size_t operator()(const Fst<A> &fst, StateId s) const { + double r = rand()/(RAND_MAX + 1.0); + size_t n = fst.NumArcs(s); + if (fst.Final(s) != Weight::Zero()) + ++n; + return static_cast<size_t>(r * n); + } +}; + + +// Randomly selects a transition w.r.t. the weights treated as negative +// log probabilities after normalizing for the total weight leaving +// the state. Weight::zero transitions are disregarded. +// Assumes Weight::Value() accesses the floating point +// representation of the weight. +template <class A> +class LogProbArcSelector { + public: + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + LogProbArcSelector(int seed = time(0)) { srand(seed); } + + size_t operator()(const Fst<A> &fst, StateId s) const { + // Find total weight leaving state + double sum = 0.0; + for (ArcIterator< Fst<A> > aiter(fst, s); !aiter.Done(); + aiter.Next()) { + const A &arc = aiter.Value(); + sum += exp(-to_log_weight_(arc.weight).Value()); + } + sum += exp(-to_log_weight_(fst.Final(s)).Value()); + + double r = rand()/(RAND_MAX + 1.0); + double p = 0.0; + int n = 0; + for (ArcIterator< Fst<A> > aiter(fst, s); !aiter.Done(); + aiter.Next(), ++n) { + const A &arc = aiter.Value(); + p += exp(-to_log_weight_(arc.weight).Value()); + if (p > r * sum) return n; + } + return n; + } + + private: + WeightConvert<Weight, Log64Weight> to_log_weight_; +}; + +// Convenience definitions +typedef LogProbArcSelector<StdArc> StdArcSelector; +typedef LogProbArcSelector<LogArc> LogArcSelector; + + +// Same as LogProbArcSelector but use CacheLogAccumulator to cache +// the cummulative weight computations. +template <class A> +class FastLogProbArcSelector : public LogProbArcSelector<A> { + public: + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + using LogProbArcSelector<A>::operator(); + + FastLogProbArcSelector(int seed = time(0)) + : LogProbArcSelector<A>(seed), + seed_(seed) {} + + size_t operator()(const Fst<A> &fst, StateId s, + CacheLogAccumulator<A> *accumulator) const { + accumulator->SetState(s); + ArcIterator< Fst<A> > aiter(fst, s); + // Find total weight leaving state + double sum = to_log_weight_(accumulator->Sum(fst.Final(s), &aiter, 0, + fst.NumArcs(s))).Value(); + double r = -log(rand()/(RAND_MAX + 1.0)); + return accumulator->LowerBound(r + sum, &aiter); + } + + int Seed() const { return seed_; } + private: + int seed_; + WeightConvert<Weight, Log64Weight> to_log_weight_; +}; + +// Random path state info maintained by RandGenFst and passed to samplers. +template <typename A> +struct RandState { + typedef typename A::StateId StateId; + + StateId state_id; // current input FST state + size_t nsamples; // # of samples to be sampled at this state + size_t length; // length of path to this random state + size_t select; // previous sample arc selection + const RandState<A> *parent; // previous random state on this path + + RandState(StateId s, size_t n, size_t l, size_t k, const RandState<A> *p) + : state_id(s), nsamples(n), length(l), select(k), parent(p) {} + + RandState() + : state_id(kNoStateId), nsamples(0), length(0), select(0), parent(0) {} +}; + +// This class, given an arc selector, samples, with raplacement, +// multiple random transitions from an FST's state. This is a generic +// version with a straight-forward use of the arc selector. +// Specializations may be defined for arc selectors for greater +// efficiency or special behavior. +template <class A, class S> +class ArcSampler { + public: + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + // The 'max_length' may be interpreted (including ignored) by a + // sampler as it chooses. This generic version interprets this literally. + ArcSampler(const Fst<A> &fst, const S &arc_selector, + int max_length = INT_MAX) + : fst_(fst), + arc_selector_(arc_selector), + max_length_(max_length) {} + + // Allow updating Fst argument; pass only if changed. + ArcSampler(const ArcSampler<A, S> &sampler, const Fst<A> *fst = 0) + : fst_(fst ? *fst : sampler.fst_), + arc_selector_(sampler.arc_selector_), + max_length_(sampler.max_length_) { + Reset(); + } + + // Samples 'rstate.nsamples' from state 'state_id'. The 'rstate.length' is + // the length of the path to 'rstate'. Returns true if samples were + // collected. No samples may be collected if either there are no (including + // 'super-final') transitions leaving that state or if the + // 'max_length' has been deemed reached. Use the iterator members to + // read the samples. The samples will be in their original order. + bool Sample(const RandState<A> &rstate) { + sample_map_.clear(); + if ((fst_.NumArcs(rstate.state_id) == 0 && + fst_.Final(rstate.state_id) == Weight::Zero()) || + rstate.length == max_length_) { + Reset(); + return false; + } + + for (size_t i = 0; i < rstate.nsamples; ++i) + ++sample_map_[arc_selector_(fst_, rstate.state_id)]; + Reset(); + return true; + } + + // More samples? + bool Done() const { return sample_iter_ == sample_map_.end(); } + + // Gets the next sample. + void Next() { ++sample_iter_; } + + // Returns a pair (N, K) where 0 <= N <= NumArcs(s) and 0 < K <= nsamples. + // If N < NumArcs(s), then the N-th transition is specified. + // If N == NumArcs(s), then the final weight at that state is + // specified (i.e., the 'super-final' transition is specified). + // For the specified transition, K repetitions have been sampled. + pair<size_t, size_t> Value() const { return *sample_iter_; } + + void Reset() { sample_iter_ = sample_map_.begin(); } + + bool Error() const { return false; } + + private: + const Fst<A> &fst_; + const S &arc_selector_; + int max_length_; + + // Stores (N, K) as described for Value(). + map<size_t, size_t> sample_map_; + map<size_t, size_t>::const_iterator sample_iter_; + + // disallow + ArcSampler<A, S> & operator=(const ArcSampler<A, S> &s); +}; + + +// Specialization for FastLogProbArcSelector. +template <class A> +class ArcSampler<A, FastLogProbArcSelector<A> > { + public: + typedef FastLogProbArcSelector<A> S; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + typedef CacheLogAccumulator<A> C; + + ArcSampler(const Fst<A> &fst, const S &arc_selector, int max_length = INT_MAX) + : fst_(fst), + arc_selector_(arc_selector), + max_length_(max_length), + accumulator_(new C()) { + accumulator_->Init(fst); + } + + ArcSampler(const ArcSampler<A, S> &sampler, const Fst<A> *fst = 0) + : fst_(fst ? *fst : sampler.fst_), + arc_selector_(sampler.arc_selector_), + max_length_(sampler.max_length_) { + if (fst) { + accumulator_ = new C(); + accumulator_->Init(*fst); + } else { // shallow copy + accumulator_ = new C(*sampler.accumulator_); + } + } + + ~ArcSampler() { + delete accumulator_; + } + + bool Sample(const RandState<A> &rstate) { + sample_map_.clear(); + if ((fst_.NumArcs(rstate.state_id) == 0 && + fst_.Final(rstate.state_id) == Weight::Zero()) || + rstate.length == max_length_) { + Reset(); + return false; + } + + for (size_t i = 0; i < rstate.nsamples; ++i) + ++sample_map_[arc_selector_(fst_, rstate.state_id, accumulator_)]; + Reset(); + return true; + } + + bool Done() const { return sample_iter_ == sample_map_.end(); } + void Next() { ++sample_iter_; } + pair<size_t, size_t> Value() const { return *sample_iter_; } + void Reset() { sample_iter_ = sample_map_.begin(); } + + bool Error() const { return accumulator_->Error(); } + + private: + const Fst<A> &fst_; + const S &arc_selector_; + int max_length_; + + // Stores (N, K) as described for Value(). + map<size_t, size_t> sample_map_; + map<size_t, size_t>::const_iterator sample_iter_; + C *accumulator_; + + // disallow + ArcSampler<A, S> & operator=(const ArcSampler<A, S> &s); +}; + + +// Options for random path generation with RandGenFst. The template argument +// is an arc sampler, typically class 'ArcSampler' above. Ownership of +// the sampler is taken by RandGenFst. +template <class S> +struct RandGenFstOptions : public CacheOptions { + S *arc_sampler; // How to sample transitions at a state + size_t npath; // # of paths to generate + bool weighted; // Output tree weighted by path count; o.w. + // output unweighted DAG + bool remove_total_weight; // Remove total weight when output is weighted. + + RandGenFstOptions(const CacheOptions &copts, S *samp, + size_t n = 1, bool w = true, bool rw = false) + : CacheOptions(copts), + arc_sampler(samp), + npath(n), + weighted(w), + remove_total_weight(rw) {} +}; + + +// Implementation of RandGenFst. +template <class A, class B, class S> +class RandGenFstImpl : public CacheImpl<B> { + public: + using FstImpl<B>::SetType; + using FstImpl<B>::SetProperties; + using FstImpl<B>::SetInputSymbols; + using FstImpl<B>::SetOutputSymbols; + + using CacheBaseImpl< CacheState<B> >::AddArc; + using CacheBaseImpl< CacheState<B> >::HasArcs; + using CacheBaseImpl< CacheState<B> >::HasFinal; + using CacheBaseImpl< CacheState<B> >::HasStart; + using CacheBaseImpl< CacheState<B> >::SetArcs; + using CacheBaseImpl< CacheState<B> >::SetFinal; + using CacheBaseImpl< CacheState<B> >::SetStart; + + typedef B Arc; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + + RandGenFstImpl(const Fst<A> &fst, const RandGenFstOptions<S> &opts) + : CacheImpl<B>(opts), + fst_(fst.Copy()), + arc_sampler_(opts.arc_sampler), + npath_(opts.npath), + weighted_(opts.weighted), + remove_total_weight_(opts.remove_total_weight), + superfinal_(kNoLabel) { + SetType("randgen"); + + uint64 props = fst.Properties(kFstProperties, false); + SetProperties(RandGenProperties(props, weighted_), kCopyProperties); + + SetInputSymbols(fst.InputSymbols()); + SetOutputSymbols(fst.OutputSymbols()); + } + + RandGenFstImpl(const RandGenFstImpl &impl) + : CacheImpl<B>(impl), + fst_(impl.fst_->Copy(true)), + arc_sampler_(new S(*impl.arc_sampler_, fst_)), + npath_(impl.npath_), + weighted_(impl.weighted_), + superfinal_(kNoLabel) { + SetType("randgen"); + SetProperties(impl.Properties(), kCopyProperties); + SetInputSymbols(impl.InputSymbols()); + SetOutputSymbols(impl.OutputSymbols()); + } + + ~RandGenFstImpl() { + for (int i = 0; i < state_table_.size(); ++i) + delete state_table_[i]; + delete fst_; + delete arc_sampler_; + } + + StateId Start() { + if (!HasStart()) { + StateId s = fst_->Start(); + if (s == kNoStateId) + return kNoStateId; + StateId start = state_table_.size(); + SetStart(start); + RandState<A> *rstate = new RandState<A>(s, npath_, 0, 0, 0); + state_table_.push_back(rstate); + } + return CacheImpl<B>::Start(); + } + + Weight Final(StateId s) { + if (!HasFinal(s)) { + Expand(s); + } + return CacheImpl<B>::Final(s); + } + + size_t NumArcs(StateId s) { + if (!HasArcs(s)) { + Expand(s); + } + return CacheImpl<B>::NumArcs(s); + } + + size_t NumInputEpsilons(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<B>::NumInputEpsilons(s); + } + + size_t NumOutputEpsilons(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<B>::NumOutputEpsilons(s); + } + + uint64 Properties() const { return Properties(kFstProperties); } + + // Set error if found; return FST impl properties. + uint64 Properties(uint64 mask) const { + if ((mask & kError) && + (fst_->Properties(kError, false) || arc_sampler_->Error())) { + SetProperties(kError, kError); + } + return FstImpl<Arc>::Properties(mask); + } + + void InitArcIterator(StateId s, ArcIteratorData<B> *data) { + if (!HasArcs(s)) + Expand(s); + CacheImpl<B>::InitArcIterator(s, data); + } + + // Computes the outgoing transitions from a state, creating new destination + // states as needed. + void Expand(StateId s) { + if (s == superfinal_) { + SetFinal(s, Weight::One()); + SetArcs(s); + return; + } + + SetFinal(s, Weight::Zero()); + const RandState<A> &rstate = *state_table_[s]; + arc_sampler_->Sample(rstate); + ArcIterator< Fst<A> > aiter(*fst_, rstate.state_id); + size_t narcs = fst_->NumArcs(rstate.state_id); + for (;!arc_sampler_->Done(); arc_sampler_->Next()) { + const pair<size_t, size_t> &sample_pair = arc_sampler_->Value(); + size_t pos = sample_pair.first; + size_t count = sample_pair.second; + double prob = static_cast<double>(count)/rstate.nsamples; + if (pos < narcs) { // regular transition + aiter.Seek(sample_pair.first); + const A &aarc = aiter.Value(); + Weight weight = weighted_ ? to_weight_(-log(prob)) : Weight::One(); + B barc(aarc.ilabel, aarc.olabel, weight, state_table_.size()); + AddArc(s, barc); + RandState<A> *nrstate = + new RandState<A>(aarc.nextstate, count, rstate.length + 1, + pos, &rstate); + state_table_.push_back(nrstate); + } else { // super-final transition + if (weighted_) { + Weight weight = remove_total_weight_ ? + to_weight_(-log(prob)) : to_weight_(-log(prob * npath_)); + SetFinal(s, weight); + } else { + if (superfinal_ == kNoLabel) { + superfinal_ = state_table_.size(); + RandState<A> *nrstate = new RandState<A>(kNoStateId, 0, 0, 0, 0); + state_table_.push_back(nrstate); + } + for (size_t n = 0; n < count; ++n) { + B barc(0, 0, Weight::One(), superfinal_); + AddArc(s, barc); + } + } + } + } + SetArcs(s); + } + + private: + Fst<A> *fst_; + S *arc_sampler_; + size_t npath_; + vector<RandState<A> *> state_table_; + bool weighted_; + bool remove_total_weight_; + StateId superfinal_; + WeightConvert<Log64Weight, Weight> to_weight_; + + void operator=(const RandGenFstImpl<A, B, S> &); // disallow +}; + + +// Fst class to randomly generate paths through an FST; details controlled +// by RandGenOptionsFst. Output format is a tree weighted by the +// path count. +template <class A, class B, class S> +class RandGenFst : public ImplToFst< RandGenFstImpl<A, B, S> > { + public: + friend class ArcIterator< RandGenFst<A, B, S> >; + friend class StateIterator< RandGenFst<A, B, S> >; + typedef B Arc; + typedef S Sampler; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef CacheState<B> State; + typedef RandGenFstImpl<A, B, S> Impl; + + RandGenFst(const Fst<A> &fst, const RandGenFstOptions<S> &opts) + : ImplToFst<Impl>(new Impl(fst, opts)) {} + + // See Fst<>::Copy() for doc. + RandGenFst(const RandGenFst<A, B, S> &fst, bool safe = false) + : ImplToFst<Impl>(fst, safe) {} + + // Get a copy of this RandGenFst. See Fst<>::Copy() for further doc. + virtual RandGenFst<A, B, S> *Copy(bool safe = false) const { + return new RandGenFst<A, B, S>(*this, safe); + } + + virtual inline void InitStateIterator(StateIteratorData<B> *data) const; + + virtual void InitArcIterator(StateId s, ArcIteratorData<B> *data) const { + GetImpl()->InitArcIterator(s, data); + } + + private: + // Makes visible to friends. + Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } + + void operator=(const RandGenFst<A, B, S> &fst); // Disallow +}; + + + +// Specialization for RandGenFst. +template <class A, class B, class S> +class StateIterator< RandGenFst<A, B, S> > + : public CacheStateIterator< RandGenFst<A, B, S> > { + public: + explicit StateIterator(const RandGenFst<A, B, S> &fst) + : CacheStateIterator< RandGenFst<A, B, S> >(fst, fst.GetImpl()) {} + + private: + DISALLOW_COPY_AND_ASSIGN(StateIterator); +}; + + +// Specialization for RandGenFst. +template <class A, class B, class S> +class ArcIterator< RandGenFst<A, B, S> > + : public CacheArcIterator< RandGenFst<A, B, S> > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const RandGenFst<A, B, S> &fst, StateId s) + : CacheArcIterator< RandGenFst<A, B, S> >(fst.GetImpl(), s) { + if (!fst.GetImpl()->HasArcs(s)) + fst.GetImpl()->Expand(s); + } + + private: + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + + +template <class A, class B, class S> inline +void RandGenFst<A, B, S>::InitStateIterator(StateIteratorData<B> *data) const +{ + data->base = new StateIterator< RandGenFst<A, B, S> >(*this); +} + +// Options for random path generation. +template <class S> +struct RandGenOptions { + const S &arc_selector; // How an arc is selected at a state + int max_length; // Maximum path length + size_t npath; // # of paths to generate + bool weighted; // Output is tree weighted by path count; o.w. + // output unweighted union of paths. + bool remove_total_weight; // Remove total weight when output is weighted. + + RandGenOptions(const S &sel, int len = INT_MAX, size_t n = 1, + bool w = false, bool rw = false) + : arc_selector(sel), + max_length(len), + npath(n), + weighted(w), + remove_total_weight(rw) {} +}; + + +template <class IArc, class OArc> +class RandGenVisitor { + public: + typedef typename IArc::Weight Weight; + typedef typename IArc::StateId StateId; + + RandGenVisitor(MutableFst<OArc> *ofst) : ofst_(ofst) {} + + void InitVisit(const Fst<IArc> &ifst) { + ifst_ = &ifst; + + ofst_->DeleteStates(); + ofst_->SetInputSymbols(ifst.InputSymbols()); + ofst_->SetOutputSymbols(ifst.OutputSymbols()); + if (ifst.Properties(kError, false)) + ofst_->SetProperties(kError, kError); + path_.clear(); + } + + bool InitState(StateId s, StateId root) { return true; } + + bool TreeArc(StateId s, const IArc &arc) { + if (ifst_->Final(arc.nextstate) == Weight::Zero()) { + path_.push_back(arc); + } else { + OutputPath(); + } + return true; + } + + bool BackArc(StateId s, const IArc &arc) { + FSTERROR() << "RandGenVisitor: cyclic input"; + ofst_->SetProperties(kError, kError); + return false; + } + + bool ForwardOrCrossArc(StateId s, const IArc &arc) { + OutputPath(); + return true; + } + + void FinishState(StateId s, StateId p, const IArc *) { + if (p != kNoStateId && ifst_->Final(s) == Weight::Zero()) + path_.pop_back(); + } + + void FinishVisit() {} + + private: + void OutputPath() { + if (ofst_->Start() == kNoStateId) { + StateId start = ofst_->AddState(); + ofst_->SetStart(start); + } + + StateId src = ofst_->Start(); + for (size_t i = 0; i < path_.size(); ++i) { + StateId dest = ofst_->AddState(); + OArc arc(path_[i].ilabel, path_[i].olabel, Weight::One(), dest); + ofst_->AddArc(src, arc); + src = dest; + } + ofst_->SetFinal(src, Weight::One()); + } + + const Fst<IArc> *ifst_; + MutableFst<OArc> *ofst_; + vector<OArc> path_; + + DISALLOW_COPY_AND_ASSIGN(RandGenVisitor); +}; + + +// Randomly generate paths through an FST; details controlled by +// RandGenOptions. +template<class IArc, class OArc, class Selector> +void RandGen(const Fst<IArc> &ifst, MutableFst<OArc> *ofst, + const RandGenOptions<Selector> &opts) { + typedef ArcSampler<IArc, Selector> Sampler; + typedef RandGenFst<IArc, OArc, Sampler> RandFst; + typedef typename OArc::StateId StateId; + typedef typename OArc::Weight Weight; + + Sampler* arc_sampler = new Sampler(ifst, opts.arc_selector, opts.max_length); + RandGenFstOptions<Sampler> fopts(CacheOptions(true, 0), arc_sampler, + opts.npath, opts.weighted, + opts.remove_total_weight); + RandFst rfst(ifst, fopts); + if (opts.weighted) { + *ofst = rfst; + } else { + RandGenVisitor<IArc, OArc> rand_visitor(ofst); + DfsVisit(rfst, &rand_visitor); + } +} + +// Randomly generate a path through an FST with the uniform distribution +// over the transitions. +template<class IArc, class OArc> +void RandGen(const Fst<IArc> &ifst, MutableFst<OArc> *ofst) { + UniformArcSelector<IArc> uniform_selector; + RandGenOptions< UniformArcSelector<IArc> > opts(uniform_selector); + RandGen(ifst, ofst, opts); +} + +} // namespace fst + +#endif // FST_LIB_RANDGEN_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/random-weight.h b/kaldi_io/src/tools/openfst/include/fst/random-weight.h new file mode 100644 index 0000000..0ccd95d --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/random-weight.h @@ -0,0 +1,348 @@ +// random-weight.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Function objects to generate random weights in various semirings +// for testing purposes. + +#ifndef FST_LIB_RANDOM_WEIGHT_H__ +#define FST_LIB_RANDOM_WEIGHT_H__ + +#include <cstdlib> +#include <ctime> +#include <vector> +using std::vector; + + +#include <fst/float-weight.h> +#include <fst/product-weight.h> +#include <fst/string-weight.h> +#include <fst/lexicographic-weight.h> +#include <fst/power-weight.h> +#include <fst/signed-log-weight.h> +#include <fst/sparse-power-weight.h> + + +namespace fst { + +// The boolean 'allow_zero' below determines whether Zero() and zero +// divisors should be returned in the random weight generation. + +// This function object returns TropicalWeightTpl<T>'s that are random integers +// chosen from [0, kNumRandomWeights). +template <class T> +class TropicalWeightGenerator_ { + public: + typedef TropicalWeightTpl<T> Weight; + + TropicalWeightGenerator_(int seed = time(0), bool allow_zero = true) + : allow_zero_(allow_zero) { + srand(seed); + } + + Weight operator() () const { + int n = rand() % (kNumRandomWeights + allow_zero_); + if (allow_zero_ && n == kNumRandomWeights) + return Weight::Zero(); + + return Weight(static_cast<T>(n)); + } + + private: + // The number of alternative random weights. + static const int kNumRandomWeights = 5; + + bool allow_zero_; // permit Zero() and zero divisors +}; + +template <class T> const int TropicalWeightGenerator_<T>::kNumRandomWeights; + +typedef TropicalWeightGenerator_<float> TropicalWeightGenerator; + + +// This function object returns LogWeightTpl<T>'s that are random integers +// chosen from [0, kNumRandomWeights). +template <class T> +class LogWeightGenerator_ { + public: + typedef LogWeightTpl<T> Weight; + + LogWeightGenerator_(int seed = time(0), bool allow_zero = true) + : allow_zero_(allow_zero) { + srand(seed); + } + + Weight operator() () const { + int n = rand() % (kNumRandomWeights + allow_zero_); + if (allow_zero_ && n == kNumRandomWeights) + return Weight::Zero(); + + return Weight(static_cast<T>(n)); + } + + private: + // Number of alternative random weights. + static const int kNumRandomWeights = 5; + + bool allow_zero_; // permit Zero() and zero divisors +}; + +template <class T> const int LogWeightGenerator_<T>::kNumRandomWeights; + +typedef LogWeightGenerator_<float> LogWeightGenerator; + + +// This function object returns MinMaxWeightTpl<T>'s that are random integers +// chosen from (-kNumRandomWeights, kNumRandomWeights) in addition to +// One(), and Zero() if zero is allowed. +template <class T> +class MinMaxWeightGenerator_ { + public: + typedef MinMaxWeightTpl<T> Weight; + + MinMaxWeightGenerator_(int seed = time(0), bool allow_zero = true) + : allow_zero_(allow_zero) { + srand(seed); + } + + Weight operator() () const { + int n = (rand() % (2*kNumRandomWeights + allow_zero_)) - kNumRandomWeights; + if (allow_zero_ && n == kNumRandomWeights) + return Weight::Zero(); + else if (n == -kNumRandomWeights) + return Weight::One(); + + return Weight(static_cast<T>(n)); + } + + private: + // Parameters controlling the number of alternative random weights. + static const int kNumRandomWeights = 5; + + bool allow_zero_; // permit Zero() and zero divisors +}; + +template <class T> const int MinMaxWeightGenerator_<T>::kNumRandomWeights; + +typedef MinMaxWeightGenerator_<float> MinMaxWeightGenerator; + + +// This function object returns StringWeights that are random integer +// strings chosen from {1,...,kAlphabetSize}^{0,kMaxStringLength} U { Zero } +template <typename L, StringType S = STRING_LEFT> +class StringWeightGenerator { + public: + typedef StringWeight<L, S> Weight; + + StringWeightGenerator(int seed = time(0), bool allow_zero = true) + : allow_zero_(allow_zero) { + srand(seed); + } + + Weight operator() () const { + int n = rand() % (kMaxStringLength + allow_zero_); + if (allow_zero_ && n == kMaxStringLength) + return Weight::Zero(); + + vector<L> v; + for (int i = 0; i < n; ++i) + v.push_back(rand() % kAlphabetSize + 1); + return Weight(v.begin(), v.end()); + } + + private: + // Alphabet size for random weights. + static const int kAlphabetSize = 5; + // Number of alternative random weights. + static const int kMaxStringLength = 5; + + bool allow_zero_; // permit Zero() and zero +}; + +template <typename L, StringType S> +const int StringWeightGenerator<L, S>::kAlphabetSize; +template <typename L, StringType S> +const int StringWeightGenerator<L, S>::kMaxStringLength; + + +// This function object returns a weight generator over the product of the +// weights (by default) for the generators G1 and G2. +template <class G1, class G2, + class W = ProductWeight<typename G1::Weight, typename G2::Weight> > +class ProductWeightGenerator { + public: + typedef typename G1::Weight W1; + typedef typename G2::Weight W2; + typedef W Weight; + + ProductWeightGenerator(int seed = time(0), bool allow_zero = true) + : generator1_(seed, allow_zero), generator2_(seed, allow_zero) {} + + Weight operator() () const { + W1 w1 = generator1_(); + W2 w2 = generator2_(); + return Weight(w1, w2); + } + + private: + G1 generator1_; + G2 generator2_; +}; + + +// This function object returns a weight generator for a lexicographic weight +// composed out of weights for the generators G1 and G2. For lexicographic +// weights, we cannot generate zeroes for the two subweights separately: +// weights are members iff both members are zero or both members are non-zero. +template <class G1, class G2> +class LexicographicWeightGenerator { + public: + typedef typename G1::Weight W1; + typedef typename G2::Weight W2; + typedef LexicographicWeight<W1, W2> Weight; + + LexicographicWeightGenerator(int seed = time(0), bool allow_zero = true) + : generator1_(seed, false), generator2_(seed, false), + allow_zero_(allow_zero) {} + + Weight operator() () const { + if (allow_zero_) { + int n = rand() % (kNumRandomWeights + allow_zero_); + if (n == kNumRandomWeights) + return Weight(W1::Zero(), W2::Zero()); + } + W1 w1 = generator1_(); + W2 w2 = generator2_(); + return Weight(w1, w2); + } + + private: + G1 generator1_; + G2 generator2_; + static const int kNumRandomWeights = 5; + bool allow_zero_; +}; + +template <class G1, class G2> +const int LexicographicWeightGenerator<G1, G2>::kNumRandomWeights; + + +// Product generator of a string weight generator and an +// arbitrary weight generator. +template <class L, class G, StringType S = STRING_LEFT> +class GallicWeightGenerator + : public ProductWeightGenerator<StringWeightGenerator<L, S>, G> { + + public: + typedef ProductWeightGenerator<StringWeightGenerator<L, S>, G> PG; + typedef typename G::Weight W; + typedef GallicWeight<L, W, S> Weight; + + GallicWeightGenerator(int seed = time(0), bool allow_zero = true) + : PG(seed, allow_zero) {} + + GallicWeightGenerator(const PG &pg) : PG(pg) {} +}; + +// This function object returms a weight generator over the catersian power +// of rank n of the weights for the generator G. +template <class G, unsigned int n> +class PowerWeightGenerator { + public: + typedef typename G::Weight W; + typedef PowerWeight<W, n> Weight; + + PowerWeightGenerator(int seed = time(0), bool allow_zero = true) + : generator_(seed, allow_zero) {} + + Weight operator()() const { + Weight w; + for (size_t i = 0; i < n; ++i) { + W r = generator_(); + w.SetValue(i, r); + } + return w; + } + + private: + G generator_; +}; + +// This function object returns SignedLogWeightTpl<T>'s that are +// random integers chosen from [0, kNumRandomWeights). +// The sign is randomly chosen as well. +template <class T> +class SignedLogWeightGenerator_ { + public: + typedef SignedLogWeightTpl<T> Weight; + + SignedLogWeightGenerator_(int seed = time(0), bool allow_zero = true) + : allow_zero_(allow_zero) { + srand(seed); + } + + Weight operator() () const { + int m = rand() % 2; + int n = rand() % (kNumRandomWeights + allow_zero_); + + return SignedLogWeightTpl<T>( + (m == 0) ? + TropicalWeight(-1.0) : + TropicalWeight(1.0), + (allow_zero_ && n == kNumRandomWeights) ? + LogWeightTpl<T>::Zero() : + LogWeightTpl<T>(static_cast<T>(n))); + } + + private: + // Number of alternative random weights. + static const int kNumRandomWeights = 5; + bool allow_zero_; // permit Zero() and zero divisors +}; + +template <class T> const int SignedLogWeightGenerator_<T>::kNumRandomWeights; + +typedef SignedLogWeightGenerator_<float> SignedLogWeightGenerator; + +// This function object returms a weight generator over the catersian power +// of rank n of the weights for the generator G. +template <class G, class K, unsigned int n> +class SparsePowerWeightGenerator { + public: + typedef typename G::Weight W; + typedef SparsePowerWeight<W, K> Weight; + + SparsePowerWeightGenerator(int seed = time(0), bool allow_zero = true) + : generator_(seed, allow_zero) {} + + Weight operator()() const { + Weight w; + for (size_t i = 1; i <= n; ++i) { + W r = generator_(); + K p = i; + w.Push(p, r, true); + } + return w; + } + + private: + G generator_; +}; + +} // namespace fst + +#endif // FST_LIB_RANDOM_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/rational.h b/kaldi_io/src/tools/openfst/include/fst/rational.h new file mode 100644 index 0000000..96aa00d --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/rational.h @@ -0,0 +1,330 @@ +// rational.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// An Fst implementation and base interface for delayed unions, +// concatenations and closures. + +#ifndef FST_LIB_RATIONAL_H__ +#define FST_LIB_RATIONAL_H__ + +#include <algorithm> +#include <string> +#include <vector> +using std::vector; + +#include <fst/mutable-fst.h> +#include <fst/replace.h> +#include <fst/test-properties.h> + + +namespace fst { + +typedef CacheOptions RationalFstOptions; + +// This specifies whether to add the empty string. +enum ClosureType { CLOSURE_STAR = 0, // T* -> add the empty string + CLOSURE_PLUS = 1 }; // T+ -> don't add the empty string + +template <class A> class RationalFst; +template <class A> void Union(RationalFst<A> *fst1, const Fst<A> &fst2); +template <class A> void Concat(RationalFst<A> *fst1, const Fst<A> &fst2); +template <class A> void Concat(const Fst<A> &fst1, RationalFst<A> *fst2); +template <class A> void Closure(RationalFst<A> *fst, ClosureType closure_type); + + +// Implementation class for delayed unions, concatenations and closures. +template<class A> +class RationalFstImpl : public FstImpl<A> { + public: + using FstImpl<A>::SetType; + using FstImpl<A>::SetProperties; + using FstImpl<A>::WriteHeader; + using FstImpl<A>::SetInputSymbols; + using FstImpl<A>::SetOutputSymbols; + + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef typename A::Label Label; + + explicit RationalFstImpl(const RationalFstOptions &opts) + : nonterminals_(0), + replace_(0), + replace_options_(opts, 0) { + SetType("rational"); + fst_tuples_.push_back(pair<Label, const Fst<A>*>(0, 0)); + } + + RationalFstImpl(const RationalFstImpl<A> &impl) + : rfst_(impl.rfst_), + nonterminals_(impl.nonterminals_), + + replace_(impl.replace_ ? impl.replace_->Copy(true) : 0), + replace_options_(impl.replace_options_) { + SetType("rational"); + fst_tuples_.reserve(impl.fst_tuples_.size()); + for (size_t i = 0; i < impl.fst_tuples_.size(); ++i) + fst_tuples_.push_back(make_pair(impl.fst_tuples_[i].first, + impl.fst_tuples_[i].second + ? impl.fst_tuples_[i].second->Copy(true) + : 0)); + } + + virtual ~RationalFstImpl() { + for (size_t i = 0; i < fst_tuples_.size(); ++i) + if (fst_tuples_[i].second) + delete fst_tuples_[i].second; + if (replace_) + delete replace_; + } + + StateId Start() { return Replace()->Start(); } + + Weight Final(StateId s) { return Replace()->Final(s); } + + size_t NumArcs(StateId s) { return Replace()->NumArcs(s); } + + size_t NumInputEpsilons(StateId s) { + return Replace()->NumInputEpsilons(s); + } + + size_t NumOutputEpsilons(StateId s) { + return Replace()->NumOutputEpsilons(s); + } + + uint64 Properties() const { return Properties(kFstProperties); } + + // Set error if found; return FST impl properties. + uint64 Properties(uint64 mask) const { + if ((mask & kError) && Replace()->Properties(kError, false)) + SetProperties(kError, kError); + return FstImpl<Arc>::Properties(mask); + } + + // Implementation of UnionFst(fst1,fst2) + void InitUnion(const Fst<A> &fst1, const Fst<A> &fst2) { + if (replace_) + delete replace_; + uint64 props1 = fst1.Properties(kFstProperties, false); + uint64 props2 = fst2.Properties(kFstProperties, false); + SetInputSymbols(fst1.InputSymbols()); + SetOutputSymbols(fst1.OutputSymbols()); + rfst_.AddState(); + rfst_.AddState(); + rfst_.SetStart(0); + rfst_.SetFinal(1, Weight::One()); + rfst_.SetInputSymbols(fst1.InputSymbols()); + rfst_.SetOutputSymbols(fst1.OutputSymbols()); + nonterminals_ = 2; + rfst_.AddArc(0, A(0, -1, Weight::One(), 1)); + rfst_.AddArc(0, A(0, -2, Weight::One(), 1)); + fst_tuples_.push_back(make_pair(-1, fst1.Copy())); + fst_tuples_.push_back(make_pair(-2, fst2.Copy())); + SetProperties(UnionProperties(props1, props2, true), kCopyProperties); + } + + // Implementation of ConcatFst(fst1,fst2) + void InitConcat(const Fst<A> &fst1, const Fst<A> &fst2) { + if (replace_) + delete replace_; + uint64 props1 = fst1.Properties(kFstProperties, false); + uint64 props2 = fst2.Properties(kFstProperties, false); + SetInputSymbols(fst1.InputSymbols()); + SetOutputSymbols(fst1.OutputSymbols()); + rfst_.AddState(); + rfst_.AddState(); + rfst_.AddState(); + rfst_.SetStart(0); + rfst_.SetFinal(2, Weight::One()); + rfst_.SetInputSymbols(fst1.InputSymbols()); + rfst_.SetOutputSymbols(fst1.OutputSymbols()); + nonterminals_ = 2; + rfst_.AddArc(0, A(0, -1, Weight::One(), 1)); + rfst_.AddArc(1, A(0, -2, Weight::One(), 2)); + fst_tuples_.push_back(make_pair(-1, fst1.Copy())); + fst_tuples_.push_back(make_pair(-2, fst2.Copy())); + SetProperties(ConcatProperties(props1, props2, true), kCopyProperties); + } + + // Implementation of ClosureFst(fst, closure_type) + void InitClosure(const Fst<A> &fst, ClosureType closure_type) { + if (replace_) + delete replace_; + uint64 props = fst.Properties(kFstProperties, false); + SetInputSymbols(fst.InputSymbols()); + SetOutputSymbols(fst.OutputSymbols()); + if (closure_type == CLOSURE_STAR) { + rfst_.AddState(); + rfst_.SetStart(0); + rfst_.SetFinal(0, Weight::One()); + rfst_.AddArc(0, A(0, -1, Weight::One(), 0)); + } else { + rfst_.AddState(); + rfst_.AddState(); + rfst_.SetStart(0); + rfst_.SetFinal(1, Weight::One()); + rfst_.AddArc(0, A(0, -1, Weight::One(), 1)); + rfst_.AddArc(1, A(0, 0, Weight::One(), 0)); + } + rfst_.SetInputSymbols(fst.InputSymbols()); + rfst_.SetOutputSymbols(fst.OutputSymbols()); + fst_tuples_.push_back(make_pair(-1, fst.Copy())); + nonterminals_ = 1; + SetProperties(ClosureProperties(props, closure_type == CLOSURE_STAR, true), + kCopyProperties); + } + + // Implementation of Union(Fst &, RationalFst *) + void AddUnion(const Fst<A> &fst) { + if (replace_) + delete replace_; + uint64 props1 = FstImpl<A>::Properties(); + uint64 props2 = fst.Properties(kFstProperties, false); + VectorFst<A> afst; + afst.AddState(); + afst.AddState(); + afst.SetStart(0); + afst.SetFinal(1, Weight::One()); + ++nonterminals_; + afst.AddArc(0, A(0, -nonterminals_, Weight::One(), 1)); + Union(&rfst_, afst); + fst_tuples_.push_back(make_pair(-nonterminals_, fst.Copy())); + SetProperties(UnionProperties(props1, props2, true), kCopyProperties); + } + + // Implementation of Concat(Fst &, RationalFst *) + void AddConcat(const Fst<A> &fst, bool append) { + if (replace_) + delete replace_; + uint64 props1 = FstImpl<A>::Properties(); + uint64 props2 = fst.Properties(kFstProperties, false); + VectorFst<A> afst; + afst.AddState(); + afst.AddState(); + afst.SetStart(0); + afst.SetFinal(1, Weight::One()); + ++nonterminals_; + afst.AddArc(0, A(0, -nonterminals_, Weight::One(), 1)); + if (append) + Concat(&rfst_, afst); + else + Concat(afst, &rfst_); + fst_tuples_.push_back(make_pair(-nonterminals_, fst.Copy())); + SetProperties(ConcatProperties(props1, props2, true), kCopyProperties); + } + + // Implementation of Closure(RationalFst *, closure_type) + void AddClosure(ClosureType closure_type) { + if (replace_) + delete replace_; + uint64 props = FstImpl<A>::Properties(); + Closure(&rfst_, closure_type); + SetProperties(ClosureProperties(props, closure_type == CLOSURE_STAR, true), + kCopyProperties); + } + + // Returns the underlying ReplaceFst. + ReplaceFst<A> *Replace() const { + if (!replace_) { + fst_tuples_[0].second = rfst_.Copy(); + replace_ = new ReplaceFst<A>(fst_tuples_, replace_options_); + } + return replace_; + } + + private: + VectorFst<A> rfst_; // rational topology machine; uses neg. nonterminals + Label nonterminals_; // # of nonterminals used + // Contains the nonterminals and their corresponding FSTs. + mutable vector<pair<Label, const Fst<A>*> > fst_tuples_; + mutable ReplaceFst<A> *replace_; // Underlying ReplaceFst + ReplaceFstOptions<A> replace_options_; // Options for creating 'replace_' + + void operator=(const RationalFstImpl<A> &impl); // disallow +}; + +// Parent class for the delayed rational operations - delayed union, +// concatenation, and closure. +// +// This class attaches interface to implementation and handles +// reference counting, delegating most methods to ImplToFst. +template <class A> +class RationalFst : public ImplToFst< RationalFstImpl<A> > { + public: + friend class StateIterator< RationalFst<A> >; + friend class ArcIterator< RationalFst<A> >; + friend void Union<>(RationalFst<A> *fst1, const Fst<A> &fst2); + friend void Concat<>(RationalFst<A> *fst1, const Fst<A> &fst2); + friend void Concat<>(const Fst<A> &fst1, RationalFst<A> *fst2); + friend void Closure<>(RationalFst<A> *fst, ClosureType closure_type); + + typedef A Arc; + typedef typename A::StateId StateId; + typedef RationalFstImpl<A> Impl; + + virtual void InitStateIterator(StateIteratorData<A> *data) const { + GetImpl()->Replace()->InitStateIterator(data); + } + + virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { + GetImpl()->Replace()->InitArcIterator(s, data); + } + + protected: + RationalFst() + : ImplToFst<Impl>(new Impl(RationalFstOptions())) {} + + explicit RationalFst(const RationalFstOptions &opts) + : ImplToFst<Impl>(new Impl(opts)) {} + + // See Fst<>::Copy() for doc. + RationalFst(const RationalFst<A> &fst , bool safe = false) + : ImplToFst<Impl>(fst, safe) {} + + private: + // Makes visible to friends. + Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } + + void operator=(const RationalFst<A> &fst); // disallow +}; + + +// Specialization for RationalFst. +template <class A> +class StateIterator< RationalFst<A> > + : public StateIterator< ReplaceFst<A> > { + public: + explicit StateIterator(const RationalFst<A> &fst) + : StateIterator< ReplaceFst<A> >(*(fst.GetImpl()->Replace())) {} +}; + + +// Specialization for RationalFst. +template <class A> +class ArcIterator< RationalFst<A> > + : public CacheArcIterator< ReplaceFst<A> > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const RationalFst<A> &fst, StateId s) + : ArcIterator< ReplaceFst<A> >(*(fst.GetImpl()->Replace()), s) {} +}; + +} // namespace fst + +#endif // FST_LIB_RATIONAL_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/register.h b/kaldi_io/src/tools/openfst/include/fst/register.h new file mode 100644 index 0000000..ea3f4d8 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/register.h @@ -0,0 +1,133 @@ +// register.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley), [email protected] (Jake Ratkiewicz) +// +// \file +// Classes for registering derived Fsts for generic reading +// + +#ifndef FST_LIB_REGISTER_H__ +#define FST_LIB_REGISTER_H__ + +#include <string> + + +#include <fst/compat.h> +#include <iostream> +#include <fstream> +#include <sstream> +#include <fst/util.h> +#include <fst/generic-register.h> + + +#include <fst/types.h> + +namespace fst { + +template <class A> class Fst; +struct FstReadOptions; + +// This class represents a single entry in a FstRegister +template<class A> +struct FstRegisterEntry { + typedef Fst<A> *(*Reader)(istream &strm, const FstReadOptions &opts); + typedef Fst<A> *(*Converter)(const Fst<A> &fst); + + Reader reader; + Converter converter; + FstRegisterEntry() : reader(0), converter(0) {} + FstRegisterEntry(Reader r, Converter c) : reader(r), converter(c) { } +}; + +// This class maintains the correspondence between a string describing +// an FST type, and its reader and converter. +template<class A> +class FstRegister : public GenericRegister<string, FstRegisterEntry<A>, + FstRegister<A> > { + public: + typedef typename FstRegisterEntry<A>::Reader Reader; + typedef typename FstRegisterEntry<A>::Converter Converter; + + const Reader GetReader(const string &type) const { + return this->GetEntry(type).reader; + } + + const Converter GetConverter(const string &type) const { + return this->GetEntry(type).converter; + } + + protected: + virtual string ConvertKeyToSoFilename(const string& key) const { + string legal_type(key); + + ConvertToLegalCSymbol(&legal_type); + + return legal_type + "-fst.so"; + } +}; + + +// This class registers an Fst type for generic reading and creating. +// The Fst type must have a default constructor and a copy constructor +// from 'Fst<Arc>' for this to work. +template <class F> +class FstRegisterer + : public GenericRegisterer<FstRegister<typename F::Arc> > { + public: + typedef typename F::Arc Arc; + typedef typename FstRegister<Arc>::Entry Entry; + typedef typename FstRegister<Arc>::Reader Reader; + + FstRegisterer() : + GenericRegisterer<FstRegister<typename F::Arc> >( + F().Type(), BuildEntry()) { } + + private: + Entry BuildEntry() { + F *(*reader)(istream &strm, + const FstReadOptions &opts) = &F::Read; + + return Entry(reinterpret_cast<Reader>(reader), + &FstRegisterer<F>::Convert); + } + + static Fst<Arc> *Convert(const Fst<Arc> &fst) { return new F(fst); } +}; + + +// Convenience macro to generate static FstRegisterer instance. +#define REGISTER_FST(F, A) \ +static fst::FstRegisterer< F<A> > F ## _ ## A ## _registerer + + +// Converts an fst to type 'type'. +template <class A> +Fst<A> *Convert(const Fst<A> &fst, const string &ftype) { + FstRegister<A> *registr = FstRegister<A>::GetRegister(); + const typename FstRegister<A>::Converter + converter = registr->GetConverter(ftype); + if (!converter) { + string atype = A::Type(); + LOG(ERROR) << "Fst::Convert: Unknown FST type \"" << ftype + << "\" (arc type = \"" << atype << "\")"; + return 0; + } + return converter(fst); +} + +} // namespace fst + +#endif // FST_LIB_REGISTER_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/relabel.h b/kaldi_io/src/tools/openfst/include/fst/relabel.h new file mode 100644 index 0000000..dc675b6 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/relabel.h @@ -0,0 +1,528 @@ +// relabel.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Johan Schalkwyk) +// +// \file +// Functions and classes to relabel an Fst (either on input or output) +// +#ifndef FST_LIB_RELABEL_H__ +#define FST_LIB_RELABEL_H__ + +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include <string> +#include <utility> +using std::pair; using std::make_pair; +#include <vector> +using std::vector; + +#include <fst/cache.h> +#include <fst/test-properties.h> + + +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; + +namespace fst { + +// +// Relabels either the input labels or output labels. The old to +// new labels are specified using a vector of pair<Label,Label>. +// Any label associations not specified are assumed to be identity +// mapping. +// +// \param fst input fst, must be mutable +// \param ipairs vector of input label pairs indicating old to new mapping +// \param opairs vector of output label pairs indicating old to new mapping +// +template <class A> +void Relabel( + MutableFst<A> *fst, + const vector<pair<typename A::Label, typename A::Label> >& ipairs, + const vector<pair<typename A::Label, typename A::Label> >& opairs) { + typedef typename A::StateId StateId; + typedef typename A::Label Label; + + uint64 props = fst->Properties(kFstProperties, false); + + // construct label to label hash. + unordered_map<Label, Label> input_map; + for (size_t i = 0; i < ipairs.size(); ++i) { + input_map[ipairs[i].first] = ipairs[i].second; + } + + unordered_map<Label, Label> output_map; + for (size_t i = 0; i < opairs.size(); ++i) { + output_map[opairs[i].first] = opairs[i].second; + } + + for (StateIterator<MutableFst<A> > siter(*fst); + !siter.Done(); siter.Next()) { + StateId s = siter.Value(); + for (MutableArcIterator<MutableFst<A> > aiter(fst, s); + !aiter.Done(); aiter.Next()) { + A arc = aiter.Value(); + + // relabel input + // only relabel if relabel pair defined + typename unordered_map<Label, Label>::iterator it = + input_map.find(arc.ilabel); + if (it != input_map.end()) { + if (it->second == kNoLabel) { + FSTERROR() << "Input symbol id " << arc.ilabel + << " missing from target vocabulary"; + fst->SetProperties(kError, kError); + return; + } + arc.ilabel = it->second; + } + + // relabel output + it = output_map.find(arc.olabel); + if (it != output_map.end()) { + if (it->second == kNoLabel) { + FSTERROR() << "Output symbol id " << arc.olabel + << " missing from target vocabulary"; + fst->SetProperties(kError, kError); + return; + } + arc.olabel = it->second; + } + + aiter.SetValue(arc); + } + } + + fst->SetProperties(RelabelProperties(props), kFstProperties); +} + +// +// Relabels either the input labels or output labels. The old to +// new labels mappings are specified using an input Symbol set. +// Any label associations not specified are assumed to be identity +// mapping. +// +// \param fst input fst, must be mutable +// \param new_isymbols symbol set indicating new mapping of input symbols +// \param new_osymbols symbol set indicating new mapping of output symbols +// +template<class A> +void Relabel(MutableFst<A> *fst, + const SymbolTable* new_isymbols, + const SymbolTable* new_osymbols) { + Relabel(fst, + fst->InputSymbols(), new_isymbols, true, + fst->OutputSymbols(), new_osymbols, true); +} + +template<class A> +void Relabel(MutableFst<A> *fst, + const SymbolTable* old_isymbols, + const SymbolTable* new_isymbols, + bool attach_new_isymbols, + const SymbolTable* old_osymbols, + const SymbolTable* new_osymbols, + bool attach_new_osymbols) { + typedef typename A::StateId StateId; + typedef typename A::Label Label; + + vector<pair<Label, Label> > ipairs; + if (old_isymbols && new_isymbols) { + for (SymbolTableIterator syms_iter(*old_isymbols); !syms_iter.Done(); + syms_iter.Next()) { + string isymbol = syms_iter.Symbol(); + int isymbol_val = syms_iter.Value(); + int new_isymbol_val = new_isymbols->Find(isymbol); + ipairs.push_back(make_pair(isymbol_val, new_isymbol_val)); + } + if (attach_new_isymbols) + fst->SetInputSymbols(new_isymbols); + } + + vector<pair<Label, Label> > opairs; + if (old_osymbols && new_osymbols) { + for (SymbolTableIterator syms_iter(*old_osymbols); !syms_iter.Done(); + syms_iter.Next()) { + string osymbol = syms_iter.Symbol(); + int osymbol_val = syms_iter.Value(); + int new_osymbol_val = new_osymbols->Find(osymbol); + opairs.push_back(make_pair(osymbol_val, new_osymbol_val)); + } + if (attach_new_osymbols) + fst->SetOutputSymbols(new_osymbols); + } + + // call relabel using vector of relabel pairs. + Relabel(fst, ipairs, opairs); +} + + +typedef CacheOptions RelabelFstOptions; + +template <class A> class RelabelFst; + +// +// \class RelabelFstImpl +// \brief Implementation for delayed relabeling +// +// Relabels an FST from one symbol set to another. Relabeling +// can either be on input or output space. RelabelFst implements +// a delayed version of the relabel. Arcs are relabeled on the fly +// and not cached. I.e each request is recomputed. +// +template<class A> +class RelabelFstImpl : public CacheImpl<A> { + friend class StateIterator< RelabelFst<A> >; + public: + using FstImpl<A>::SetType; + using FstImpl<A>::SetProperties; + using FstImpl<A>::WriteHeader; + using FstImpl<A>::SetInputSymbols; + using FstImpl<A>::SetOutputSymbols; + + using CacheImpl<A>::PushArc; + using CacheImpl<A>::HasArcs; + using CacheImpl<A>::HasFinal; + using CacheImpl<A>::HasStart; + using CacheImpl<A>::SetArcs; + using CacheImpl<A>::SetFinal; + using CacheImpl<A>::SetStart; + + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef CacheState<A> State; + + RelabelFstImpl(const Fst<A>& fst, + const vector<pair<Label, Label> >& ipairs, + const vector<pair<Label, Label> >& opairs, + const RelabelFstOptions &opts) + : CacheImpl<A>(opts), fst_(fst.Copy()), + relabel_input_(false), relabel_output_(false) { + uint64 props = fst.Properties(kCopyProperties, false); + SetProperties(RelabelProperties(props)); + SetType("relabel"); + + // create input label map + if (ipairs.size() > 0) { + for (size_t i = 0; i < ipairs.size(); ++i) { + input_map_[ipairs[i].first] = ipairs[i].second; + } + relabel_input_ = true; + } + + // create output label map + if (opairs.size() > 0) { + for (size_t i = 0; i < opairs.size(); ++i) { + output_map_[opairs[i].first] = opairs[i].second; + } + relabel_output_ = true; + } + } + + RelabelFstImpl(const Fst<A>& fst, + const SymbolTable* old_isymbols, + const SymbolTable* new_isymbols, + const SymbolTable* old_osymbols, + const SymbolTable* new_osymbols, + const RelabelFstOptions &opts) + : CacheImpl<A>(opts), fst_(fst.Copy()), + relabel_input_(false), relabel_output_(false) { + SetType("relabel"); + + uint64 props = fst.Properties(kCopyProperties, false); + SetProperties(RelabelProperties(props)); + SetInputSymbols(old_isymbols); + SetOutputSymbols(old_osymbols); + + if (old_isymbols && new_isymbols && + old_isymbols->LabeledCheckSum() != new_isymbols->LabeledCheckSum()) { + for (SymbolTableIterator syms_iter(*old_isymbols); !syms_iter.Done(); + syms_iter.Next()) { + input_map_[syms_iter.Value()] = new_isymbols->Find(syms_iter.Symbol()); + } + SetInputSymbols(new_isymbols); + relabel_input_ = true; + } + + if (old_osymbols && new_osymbols && + old_osymbols->LabeledCheckSum() != new_osymbols->LabeledCheckSum()) { + for (SymbolTableIterator syms_iter(*old_osymbols); !syms_iter.Done(); + syms_iter.Next()) { + output_map_[syms_iter.Value()] = + new_osymbols->Find(syms_iter.Symbol()); + } + SetOutputSymbols(new_osymbols); + relabel_output_ = true; + } + } + + RelabelFstImpl(const RelabelFstImpl<A>& impl) + : CacheImpl<A>(impl), + fst_(impl.fst_->Copy(true)), + input_map_(impl.input_map_), + output_map_(impl.output_map_), + relabel_input_(impl.relabel_input_), + relabel_output_(impl.relabel_output_) { + SetType("relabel"); + SetProperties(impl.Properties(), kCopyProperties); + SetInputSymbols(impl.InputSymbols()); + SetOutputSymbols(impl.OutputSymbols()); + } + + ~RelabelFstImpl() { delete fst_; } + + StateId Start() { + if (!HasStart()) { + StateId s = fst_->Start(); + SetStart(s); + } + return CacheImpl<A>::Start(); + } + + Weight Final(StateId s) { + if (!HasFinal(s)) { + SetFinal(s, fst_->Final(s)); + } + return CacheImpl<A>::Final(s); + } + + size_t NumArcs(StateId s) { + if (!HasArcs(s)) { + Expand(s); + } + return CacheImpl<A>::NumArcs(s); + } + + size_t NumInputEpsilons(StateId s) { + if (!HasArcs(s)) { + Expand(s); + } + return CacheImpl<A>::NumInputEpsilons(s); + } + + size_t NumOutputEpsilons(StateId s) { + if (!HasArcs(s)) { + Expand(s); + } + return CacheImpl<A>::NumOutputEpsilons(s); + } + + uint64 Properties() const { return Properties(kFstProperties); } + + // Set error if found; return FST impl properties. + uint64 Properties(uint64 mask) const { + if ((mask & kError) && fst_->Properties(kError, false)) + SetProperties(kError, kError); + return FstImpl<Arc>::Properties(mask); + } + + void InitArcIterator(StateId s, ArcIteratorData<A>* data) { + if (!HasArcs(s)) { + Expand(s); + } + CacheImpl<A>::InitArcIterator(s, data); + } + + void Expand(StateId s) { + for (ArcIterator<Fst<A> > aiter(*fst_, s); !aiter.Done(); aiter.Next()) { + A arc = aiter.Value(); + + // relabel input + if (relabel_input_) { + typename unordered_map<Label, Label>::iterator it = + input_map_.find(arc.ilabel); + if (it != input_map_.end()) { arc.ilabel = it->second; } + } + + // relabel output + if (relabel_output_) { + typename unordered_map<Label, Label>::iterator it = + output_map_.find(arc.olabel); + if (it != output_map_.end()) { arc.olabel = it->second; } + } + + PushArc(s, arc); + } + SetArcs(s); + } + + + private: + const Fst<A> *fst_; + + unordered_map<Label, Label> input_map_; + unordered_map<Label, Label> output_map_; + bool relabel_input_; + bool relabel_output_; + + void operator=(const RelabelFstImpl<A> &); // disallow +}; + + +// +// \class RelabelFst +// \brief Delayed implementation of arc relabeling +// +// This class attaches interface to implementation and handles +// reference counting, delegating most methods to ImplToFst. +template <class A> +class RelabelFst : public ImplToFst< RelabelFstImpl<A> > { + public: + friend class ArcIterator< RelabelFst<A> >; + friend class StateIterator< RelabelFst<A> >; + + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef CacheState<A> State; + typedef RelabelFstImpl<A> Impl; + + RelabelFst(const Fst<A>& fst, + const vector<pair<Label, Label> >& ipairs, + const vector<pair<Label, Label> >& opairs) + : ImplToFst<Impl>(new Impl(fst, ipairs, opairs, RelabelFstOptions())) {} + + RelabelFst(const Fst<A>& fst, + const vector<pair<Label, Label> >& ipairs, + const vector<pair<Label, Label> >& opairs, + const RelabelFstOptions &opts) + : ImplToFst<Impl>(new Impl(fst, ipairs, opairs, opts)) {} + + RelabelFst(const Fst<A>& fst, + const SymbolTable* new_isymbols, + const SymbolTable* new_osymbols) + : ImplToFst<Impl>(new Impl(fst, fst.InputSymbols(), new_isymbols, + fst.OutputSymbols(), new_osymbols, + RelabelFstOptions())) {} + + RelabelFst(const Fst<A>& fst, + const SymbolTable* new_isymbols, + const SymbolTable* new_osymbols, + const RelabelFstOptions &opts) + : ImplToFst<Impl>(new Impl(fst, fst.InputSymbols(), new_isymbols, + fst.OutputSymbols(), new_osymbols, opts)) {} + + RelabelFst(const Fst<A>& fst, + const SymbolTable* old_isymbols, + const SymbolTable* new_isymbols, + const SymbolTable* old_osymbols, + const SymbolTable* new_osymbols) + : ImplToFst<Impl>(new Impl(fst, old_isymbols, new_isymbols, old_osymbols, + new_osymbols, RelabelFstOptions())) {} + + RelabelFst(const Fst<A>& fst, + const SymbolTable* old_isymbols, + const SymbolTable* new_isymbols, + const SymbolTable* old_osymbols, + const SymbolTable* new_osymbols, + const RelabelFstOptions &opts) + : ImplToFst<Impl>(new Impl(fst, old_isymbols, new_isymbols, old_osymbols, + new_osymbols, opts)) {} + + // See Fst<>::Copy() for doc. + RelabelFst(const RelabelFst<A> &fst, bool safe = false) + : ImplToFst<Impl>(fst, safe) {} + + // Get a copy of this RelabelFst. See Fst<>::Copy() for further doc. + virtual RelabelFst<A> *Copy(bool safe = false) const { + return new RelabelFst<A>(*this, safe); + } + + virtual void InitStateIterator(StateIteratorData<A> *data) const; + + virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { + return GetImpl()->InitArcIterator(s, data); + } + + private: + // Makes visible to friends. + Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } + + void operator=(const RelabelFst<A> &fst); // disallow +}; + +// Specialization for RelabelFst. +template<class A> +class StateIterator< RelabelFst<A> > : public StateIteratorBase<A> { + public: + typedef typename A::StateId StateId; + + explicit StateIterator(const RelabelFst<A> &fst) + : impl_(fst.GetImpl()), siter_(*impl_->fst_), s_(0) {} + + bool Done() const { return siter_.Done(); } + + StateId Value() const { return s_; } + + void Next() { + if (!siter_.Done()) { + ++s_; + siter_.Next(); + } + } + + void Reset() { + s_ = 0; + siter_.Reset(); + } + + private: + bool Done_() const { return Done(); } + StateId Value_() const { return Value(); } + void Next_() { Next(); } + void Reset_() { Reset(); } + + const RelabelFstImpl<A> *impl_; + StateIterator< Fst<A> > siter_; + StateId s_; + + DISALLOW_COPY_AND_ASSIGN(StateIterator); +}; + + +// Specialization for RelabelFst. +template <class A> +class ArcIterator< RelabelFst<A> > + : public CacheArcIterator< RelabelFst<A> > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const RelabelFst<A> &fst, StateId s) + : CacheArcIterator< RelabelFst<A> >(fst.GetImpl(), s) { + if (!fst.GetImpl()->HasArcs(s)) + fst.GetImpl()->Expand(s); + } + + private: + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + +template <class A> inline +void RelabelFst<A>::InitStateIterator(StateIteratorData<A> *data) const { + data->base = new StateIterator< RelabelFst<A> >(*this); +} + +// Useful alias when using StdArc. +typedef RelabelFst<StdArc> StdRelabelFst; + +} // namespace fst + +#endif // FST_LIB_RELABEL_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/replace-util.h b/kaldi_io/src/tools/openfst/include/fst/replace-util.h new file mode 100644 index 0000000..d58cb15 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/replace-util.h @@ -0,0 +1,550 @@ +// replace-util.h + + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// + +// \file +// Utility classes for the recursive replacement of Fsts (RTNs). + +#ifndef FST_LIB_REPLACE_UTIL_H__ +#define FST_LIB_REPLACE_UTIL_H__ + +#include <vector> +using std::vector; +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include <tr1/unordered_set> +using std::tr1::unordered_set; +using std::tr1::unordered_multiset; +#include <map> + +#include <fst/connect.h> +#include <fst/mutable-fst.h> +#include <fst/topsort.h> + + +namespace fst { + +template <class Arc> +void Replace(const vector<pair<typename Arc::Label, const Fst<Arc>* > >&, + MutableFst<Arc> *, typename Arc::Label, bool); + + +// Utility class for the recursive replacement of Fsts (RTNs). The +// user provides a set of Label, Fst pairs at construction. These are +// used by methods for testing cyclic dependencies and connectedness +// and doing RTN connection and specific Fst replacement by label or +// for various optimization properties. The modified results can be +// obtained with the GetFstPairs() or GetMutableFstPairs() methods. +template <class Arc> +class ReplaceUtil { + public: + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + typedef typename Arc::StateId StateId; + + typedef pair<Label, const Fst<Arc>*> FstPair; + typedef pair<Label, MutableFst<Arc>*> MutableFstPair; + typedef unordered_map<Label, Label> NonTerminalHash; + + // Constructs from mutable Fsts; Fst ownership given to ReplaceUtil. + ReplaceUtil(const vector<MutableFstPair> &fst_pairs, + Label root_label, bool epsilon_on_replace = false); + + // Constructs from Fsts; Fst ownership retained by caller. + ReplaceUtil(const vector<FstPair> &fst_pairs, + Label root_label, bool epsilon_on_replace = false); + + // Constructs from ReplaceFst internals; ownership retained by caller. + ReplaceUtil(const vector<const Fst<Arc> *> &fst_array, + const NonTerminalHash &nonterminal_hash, Label root_fst, + bool epsilon_on_replace = false); + + ~ReplaceUtil() { + for (Label i = 0; i < fst_array_.size(); ++i) + delete fst_array_[i]; + } + + // True if the non-terminal dependencies are cyclic. Cyclic + // dependencies will result in an unexpandable replace fst. + bool CyclicDependencies() const { + GetDependencies(false); + return depprops_ & kCyclic; + } + + // Returns true if no useless Fsts, states or transitions. + bool Connected() const { + GetDependencies(false); + uint64 props = kAccessible | kCoAccessible; + for (Label i = 0; i < fst_array_.size(); ++i) { + if (!fst_array_[i]) + continue; + if (fst_array_[i]->Properties(props, true) != props || !depaccess_[i]) + return false; + } + return true; + } + + // Removes useless Fsts, states and transitions. + void Connect(); + + // Replaces Fsts specified by labels. + // Does nothing if there are cyclic dependencies. + void ReplaceLabels(const vector<Label> &labels); + + // Replaces Fsts that have at most 'nstates' states, 'narcs' arcs and + // 'nnonterm' non-terminals (updating in reverse dependency order). + // Does nothing if there are cyclic dependencies. + void ReplaceBySize(size_t nstates, size_t narcs, size_t nnonterms); + + // Replaces singleton Fsts. + // Does nothing if there are cyclic dependencies. + void ReplaceTrivial() { ReplaceBySize(2, 1, 1); } + + // Replaces non-terminals that have at most 'ninstances' instances + // (updating in dependency order). + // Does nothing if there are cyclic dependencies. + void ReplaceByInstances(size_t ninstances); + + // Replaces non-terminals that have only one instance. + // Does nothing if there are cyclic dependencies. + void ReplaceUnique() { ReplaceByInstances(1); } + + // Returns Label, Fst pairs; Fst ownership retained by ReplaceUtil. + void GetFstPairs(vector<FstPair> *fst_pairs); + + // Returns Label, MutableFst pairs; Fst ownership given to caller. + void GetMutableFstPairs(vector<MutableFstPair> *mutable_fst_pairs); + + private: + // Per Fst statistics + struct ReplaceStats { + StateId nstates; // # of states + StateId nfinal; // # of final states + size_t narcs; // # of arcs + Label nnonterms; // # of non-terminals in Fst + size_t nref; // # of non-terminal instances referring to this Fst + + // # of times that ith Fst references this Fst + map<Label, size_t> inref; + // # of times that this Fst references the ith Fst + map<Label, size_t> outref; + + ReplaceStats() + : nstates(0), + nfinal(0), + narcs(0), + nnonterms(0), + nref(0) {} + }; + + // Check Mutable Fsts exist o.w. create them. + void CheckMutableFsts(); + + // Computes the dependency graph of the replace Fsts. + // If 'stats' is true, dependency statistics computed as well. + void GetDependencies(bool stats) const; + + void ClearDependencies() const { + depfst_.DeleteStates(); + stats_.clear(); + depprops_ = 0; + have_stats_ = false; + } + + // Get topological order of dependencies. Returns false with cyclic input. + bool GetTopOrder(const Fst<Arc> &fst, vector<Label> *toporder) const; + + // Update statistics assuming that jth Fst will be replaced. + void UpdateStats(Label j); + + Label root_label_; // root non-terminal + Label root_fst_; // root Fst ID + bool epsilon_on_replace_; // see Replace() + vector<const Fst<Arc> *> fst_array_; // Fst per ID + vector<MutableFst<Arc> *> mutable_fst_array_; // MutableFst per ID + vector<Label> nonterminal_array_; // Fst ID to non-terminal + NonTerminalHash nonterminal_hash_; // non-terminal to Fst ID + mutable VectorFst<Arc> depfst_; // Fst ID dependencies + mutable vector<bool> depaccess_; // Fst ID accessibility + mutable uint64 depprops_; // dependency Fst props + mutable bool have_stats_; // have dependency statistics + mutable vector<ReplaceStats> stats_; // Per Fst statistics + DISALLOW_COPY_AND_ASSIGN(ReplaceUtil); +}; + +template <class Arc> +ReplaceUtil<Arc>::ReplaceUtil( + const vector<MutableFstPair> &fst_pairs, + Label root_label, bool epsilon_on_replace) + : root_label_(root_label), + epsilon_on_replace_(epsilon_on_replace), + depprops_(0), + have_stats_(false) { + fst_array_.push_back(0); + mutable_fst_array_.push_back(0); + nonterminal_array_.push_back(kNoLabel); + for (Label i = 0; i < fst_pairs.size(); ++i) { + Label label = fst_pairs[i].first; + MutableFst<Arc> *fst = fst_pairs[i].second; + nonterminal_hash_[label] = fst_array_.size(); + nonterminal_array_.push_back(label); + fst_array_.push_back(fst); + mutable_fst_array_.push_back(fst); + } + root_fst_ = nonterminal_hash_[root_label_]; + if (!root_fst_) + FSTERROR() << "ReplaceUtil: no root FST for label: " << root_label_; +} + +template <class Arc> +ReplaceUtil<Arc>::ReplaceUtil( + const vector<FstPair> &fst_pairs, + Label root_label, bool epsilon_on_replace) + : root_label_(root_label), + epsilon_on_replace_(epsilon_on_replace), + depprops_(0), + have_stats_(false) { + fst_array_.push_back(0); + nonterminal_array_.push_back(kNoLabel); + for (Label i = 0; i < fst_pairs.size(); ++i) { + Label label = fst_pairs[i].first; + const Fst<Arc> *fst = fst_pairs[i].second; + nonterminal_hash_[label] = fst_array_.size(); + nonterminal_array_.push_back(label); + fst_array_.push_back(fst->Copy()); + } + root_fst_ = nonterminal_hash_[root_label]; + if (!root_fst_) + FSTERROR() << "ReplaceUtil: no root FST for label: " << root_label_; +} + +template <class Arc> +ReplaceUtil<Arc>::ReplaceUtil( + const vector<const Fst<Arc> *> &fst_array, + const NonTerminalHash &nonterminal_hash, Label root_fst, + bool epsilon_on_replace) + : root_fst_(root_fst), + epsilon_on_replace_(epsilon_on_replace), + nonterminal_array_(fst_array.size()), + nonterminal_hash_(nonterminal_hash), + depprops_(0), + have_stats_(false) { + fst_array_.push_back(0); + for (Label i = 1; i < fst_array.size(); ++i) + fst_array_.push_back(fst_array[i]->Copy()); + for (typename NonTerminalHash::const_iterator it = + nonterminal_hash.begin(); it != nonterminal_hash.end(); ++it) + nonterminal_array_[it->second] = it->first; + root_label_ = nonterminal_array_[root_fst_]; +} + +template <class Arc> +void ReplaceUtil<Arc>::GetDependencies(bool stats) const { + if (depfst_.NumStates() > 0) { + if (stats && !have_stats_) + ClearDependencies(); + else + return; + } + + have_stats_ = stats; + if (have_stats_) + stats_.reserve(fst_array_.size()); + + for (Label i = 0; i < fst_array_.size(); ++i) { + depfst_.AddState(); + depfst_.SetFinal(i, Weight::One()); + if (have_stats_) + stats_.push_back(ReplaceStats()); + } + depfst_.SetStart(root_fst_); + + // An arc from each state (representing the fst) to the + // state representing the fst being replaced + for (Label i = 0; i < fst_array_.size(); ++i) { + const Fst<Arc> *ifst = fst_array_[i]; + if (!ifst) + continue; + for (StateIterator<Fst<Arc> > siter(*ifst); !siter.Done(); siter.Next()) { + StateId s = siter.Value(); + if (have_stats_) { + ++stats_[i].nstates; + if (ifst->Final(s) != Weight::Zero()) + ++stats_[i].nfinal; + } + for (ArcIterator<Fst<Arc> > aiter(*ifst, s); + !aiter.Done(); aiter.Next()) { + if (have_stats_) + ++stats_[i].narcs; + const Arc& arc = aiter.Value(); + + typename NonTerminalHash::const_iterator it = + nonterminal_hash_.find(arc.olabel); + if (it != nonterminal_hash_.end()) { + Label j = it->second; + depfst_.AddArc(i, Arc(arc.olabel, arc.olabel, Weight::One(), j)); + if (have_stats_) { + ++stats_[i].nnonterms; + ++stats_[j].nref; + ++stats_[j].inref[i]; + ++stats_[i].outref[j]; + } + } + } + } + } + + // Gets accessibility info + SccVisitor<Arc> scc_visitor(0, &depaccess_, 0, &depprops_); + DfsVisit(depfst_, &scc_visitor); +} + +template <class Arc> +void ReplaceUtil<Arc>::UpdateStats(Label j) { + if (!have_stats_) { + FSTERROR() << "ReplaceUtil::UpdateStats: stats not available"; + return; + } + + if (j == root_fst_) // can't replace root + return; + + typedef typename map<Label, size_t>::iterator Iter; + for (Iter in = stats_[j].inref.begin(); + in != stats_[j].inref.end(); + ++in) { + Label i = in->first; + size_t ni = in->second; + stats_[i].nstates += stats_[j].nstates * ni; + stats_[i].narcs += (stats_[j].narcs + 1) * ni; // narcs - 1 + 2 (eps) + stats_[i].nnonterms += (stats_[j].nnonterms - 1) * ni; + stats_[i].outref.erase(stats_[i].outref.find(j)); + for (Iter out = stats_[j].outref.begin(); + out != stats_[j].outref.end(); + ++out) { + Label k = out->first; + size_t nk = out->second; + stats_[i].outref[k] += ni * nk; + } + } + + for (Iter out = stats_[j].outref.begin(); + out != stats_[j].outref.end(); + ++out) { + Label k = out->first; + size_t nk = out->second; + stats_[k].nref -= nk; + stats_[k].inref.erase(stats_[k].inref.find(j)); + for (Iter in = stats_[j].inref.begin(); + in != stats_[j].inref.end(); + ++in) { + Label i = in->first; + size_t ni = in->second; + stats_[k].inref[i] += ni * nk; + stats_[k].nref += ni * nk; + } + } +} + +template <class Arc> +void ReplaceUtil<Arc>::CheckMutableFsts() { + if (mutable_fst_array_.size() == 0) { + for (Label i = 0; i < fst_array_.size(); ++i) { + if (!fst_array_[i]) { + mutable_fst_array_.push_back(0); + } else { + mutable_fst_array_.push_back(new VectorFst<Arc>(*fst_array_[i])); + delete fst_array_[i]; + fst_array_[i] = mutable_fst_array_[i]; + } + } + } +} + +template <class Arc> +void ReplaceUtil<Arc>::Connect() { + CheckMutableFsts(); + uint64 props = kAccessible | kCoAccessible; + for (Label i = 0; i < mutable_fst_array_.size(); ++i) { + if (!mutable_fst_array_[i]) + continue; + if (mutable_fst_array_[i]->Properties(props, false) != props) + fst::Connect(mutable_fst_array_[i]); + } + GetDependencies(false); + for (Label i = 0; i < mutable_fst_array_.size(); ++i) { + MutableFst<Arc> *fst = mutable_fst_array_[i]; + if (fst && !depaccess_[i]) { + delete fst; + fst_array_[i] = 0; + mutable_fst_array_[i] = 0; + } + } + ClearDependencies(); +} + +template <class Arc> +bool ReplaceUtil<Arc>::GetTopOrder(const Fst<Arc> &fst, + vector<Label> *toporder) const { + // Finds topological order of dependencies. + vector<StateId> order; + bool acyclic = false; + + TopOrderVisitor<Arc> top_order_visitor(&order, &acyclic); + DfsVisit(fst, &top_order_visitor); + if (!acyclic) { + LOG(WARNING) << "ReplaceUtil::GetTopOrder: Cyclical label dependencies"; + return false; + } + + toporder->resize(order.size()); + for (Label i = 0; i < order.size(); ++i) + (*toporder)[order[i]] = i; + + return true; +} + +template <class Arc> +void ReplaceUtil<Arc>::ReplaceLabels(const vector<Label> &labels) { + CheckMutableFsts(); + unordered_set<Label> label_set; + for (Label i = 0; i < labels.size(); ++i) + if (labels[i] != root_label_) // can't replace root + label_set.insert(labels[i]); + + // Finds Fst dependencies restricted to the labels requested. + GetDependencies(false); + VectorFst<Arc> pfst(depfst_); + for (StateId i = 0; i < pfst.NumStates(); ++i) { + vector<Arc> arcs; + for (ArcIterator< VectorFst<Arc> > aiter(pfst, i); + !aiter.Done(); aiter.Next()) { + const Arc &arc = aiter.Value(); + Label label = nonterminal_array_[arc.nextstate]; + if (label_set.count(label) > 0) + arcs.push_back(arc); + } + pfst.DeleteArcs(i); + for (size_t j = 0; j < arcs.size(); ++j) + pfst.AddArc(i, arcs[j]); + } + + vector<Label> toporder; + if (!GetTopOrder(pfst, &toporder)) { + ClearDependencies(); + return; + } + + // Visits Fsts in reverse topological order of dependencies and + // performs replacements. + for (Label o = toporder.size() - 1; o >= 0; --o) { + vector<FstPair> fst_pairs; + StateId s = toporder[o]; + for (ArcIterator< VectorFst<Arc> > aiter(pfst, s); + !aiter.Done(); aiter.Next()) { + const Arc &arc = aiter.Value(); + Label label = nonterminal_array_[arc.nextstate]; + const Fst<Arc> *fst = fst_array_[arc.nextstate]; + fst_pairs.push_back(make_pair(label, fst)); + } + if (fst_pairs.empty()) + continue; + Label label = nonterminal_array_[s]; + const Fst<Arc> *fst = fst_array_[s]; + fst_pairs.push_back(make_pair(label, fst)); + + Replace(fst_pairs, mutable_fst_array_[s], label, epsilon_on_replace_); + } + ClearDependencies(); +} + +template <class Arc> +void ReplaceUtil<Arc>::ReplaceBySize(size_t nstates, size_t narcs, + size_t nnonterms) { + vector<Label> labels; + GetDependencies(true); + + vector<Label> toporder; + if (!GetTopOrder(depfst_, &toporder)) { + ClearDependencies(); + return; + } + + for (Label o = toporder.size() - 1; o >= 0; --o) { + Label j = toporder[o]; + if (stats_[j].nstates <= nstates && + stats_[j].narcs <= narcs && + stats_[j].nnonterms <= nnonterms) { + labels.push_back(nonterminal_array_[j]); + UpdateStats(j); + } + } + ReplaceLabels(labels); +} + +template <class Arc> +void ReplaceUtil<Arc>::ReplaceByInstances(size_t ninstances) { + vector<Label> labels; + GetDependencies(true); + + vector<Label> toporder; + if (!GetTopOrder(depfst_, &toporder)) { + ClearDependencies(); + return; + } + for (Label o = 0; o < toporder.size(); ++o) { + Label j = toporder[o]; + if (stats_[j].nref <= ninstances) { + labels.push_back(nonterminal_array_[j]); + UpdateStats(j); + } + } + ReplaceLabels(labels); +} + +template <class Arc> +void ReplaceUtil<Arc>::GetFstPairs(vector<FstPair> *fst_pairs) { + CheckMutableFsts(); + fst_pairs->clear(); + for (Label i = 0; i < fst_array_.size(); ++i) { + Label label = nonterminal_array_[i]; + const Fst<Arc> *fst = fst_array_[i]; + if (!fst) + continue; + fst_pairs->push_back(make_pair(label, fst)); + } +} + +template <class Arc> +void ReplaceUtil<Arc>::GetMutableFstPairs( + vector<MutableFstPair> *mutable_fst_pairs) { + CheckMutableFsts(); + mutable_fst_pairs->clear(); + for (Label i = 0; i < mutable_fst_array_.size(); ++i) { + Label label = nonterminal_array_[i]; + MutableFst<Arc> *fst = mutable_fst_array_[i]; + if (!fst) + continue; + mutable_fst_pairs->push_back(make_pair(label, fst->Copy())); + } +} + +} // namespace fst + +#endif // FST_LIB_REPLACE_UTIL_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/replace.h b/kaldi_io/src/tools/openfst/include/fst/replace.h new file mode 100644 index 0000000..ef5f6cc --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/replace.h @@ -0,0 +1,1453 @@ +// replace.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Johan Schalkwyk) +// +// \file +// Functions and classes for the recursive replacement of Fsts. +// + +#ifndef FST_LIB_REPLACE_H__ +#define FST_LIB_REPLACE_H__ + +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include <set> +#include <string> +#include <utility> +using std::pair; using std::make_pair; +#include <vector> +using std::vector; + +#include <fst/cache.h> +#include <fst/expanded-fst.h> +#include <fst/fst.h> +#include <fst/matcher.h> +#include <fst/replace-util.h> +#include <fst/state-table.h> +#include <fst/test-properties.h> + +namespace fst { + +// +// REPLACE STATE TUPLES AND TABLES +// +// The replace state table has the form +// +// template <class A, class P> +// class ReplaceStateTable { +// public: +// typedef A Arc; +// typedef P PrefixId; +// typedef typename A::StateId StateId; +// typedef ReplaceStateTuple<StateId, PrefixId> StateTuple; +// typedef typename A::Label Label; +// +// // Required constuctor +// ReplaceStateTable(const vector<pair<Label, const Fst<A>*> > &fst_tuples, +// Label root); +// +// // Required copy constructor that does not copy state +// ReplaceStateTable(const ReplaceStateTable<A,P> &table); +// +// // Lookup state ID by tuple. If it doesn't exist, then add it. +// StateId FindState(const StateTuple &tuple); +// +// // Lookup state tuple by ID. +// const StateTuple &Tuple(StateId id) const; +// }; + + +// \struct ReplaceStateTuple +// \brief Tuple of information that uniquely defines a state in replace +template <class S, class P> +struct ReplaceStateTuple { + typedef S StateId; + typedef P PrefixId; + + ReplaceStateTuple() + : prefix_id(-1), fst_id(kNoStateId), fst_state(kNoStateId) {} + + ReplaceStateTuple(PrefixId p, StateId f, StateId s) + : prefix_id(p), fst_id(f), fst_state(s) {} + + PrefixId prefix_id; // index in prefix table + StateId fst_id; // current fst being walked + StateId fst_state; // current state in fst being walked, not to be + // confused with the state_id of the combined fst +}; + + +// Equality of replace state tuples. +template <class S, class P> +inline bool operator==(const ReplaceStateTuple<S, P>& x, + const ReplaceStateTuple<S, P>& y) { + return x.prefix_id == y.prefix_id && + x.fst_id == y.fst_id && + x.fst_state == y.fst_state; +} + + +// \class ReplaceRootSelector +// Functor returning true for tuples corresponding to states in the root FST +template <class S, class P> +class ReplaceRootSelector { + public: + bool operator()(const ReplaceStateTuple<S, P> &tuple) const { + return tuple.prefix_id == 0; + } +}; + + +// \class ReplaceFingerprint +// Fingerprint for general replace state tuples. +template <class S, class P> +class ReplaceFingerprint { + public: + ReplaceFingerprint(const vector<uint64> *size_array) + : cumulative_size_array_(size_array) {} + + uint64 operator()(const ReplaceStateTuple<S, P> &tuple) const { + return tuple.prefix_id * (cumulative_size_array_->back()) + + cumulative_size_array_->at(tuple.fst_id - 1) + + tuple.fst_state; + } + + private: + const vector<uint64> *cumulative_size_array_; +}; + + +// \class ReplaceFstStateFingerprint +// Useful when the fst_state uniquely define the tuple. +template <class S, class P> +class ReplaceFstStateFingerprint { + public: + uint64 operator()(const ReplaceStateTuple<S, P>& tuple) const { + return tuple.fst_state; + } +}; + + +// \class ReplaceHash +// A generic hash function for replace state tuples. +template <typename S, typename P> +class ReplaceHash { + public: + size_t operator()(const ReplaceStateTuple<S, P>& t) const { + return t.prefix_id + t.fst_id * kPrime0 + t.fst_state * kPrime1; + } + private: + static const size_t kPrime0; + static const size_t kPrime1; +}; + +template <typename S, typename P> +const size_t ReplaceHash<S, P>::kPrime0 = 7853; + +template <typename S, typename P> +const size_t ReplaceHash<S, P>::kPrime1 = 7867; + +template <class A, class T> class ReplaceFstMatcher; + + +// \class VectorHashReplaceStateTable +// A two-level state table for replace. +// Warning: calls CountStates to compute the number of states of each +// component Fst. +template <class A, class P = ssize_t> +class VectorHashReplaceStateTable { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef P PrefixId; + typedef ReplaceStateTuple<StateId, P> StateTuple; + typedef VectorHashStateTable<ReplaceStateTuple<StateId, P>, + ReplaceRootSelector<StateId, P>, + ReplaceFstStateFingerprint<StateId, P>, + ReplaceFingerprint<StateId, P> > StateTable; + + VectorHashReplaceStateTable( + const vector<pair<Label, const Fst<A>*> > &fst_tuples, + Label root) : root_size_(0) { + cumulative_size_array_.push_back(0); + for (size_t i = 0; i < fst_tuples.size(); ++i) { + if (fst_tuples[i].first == root) { + root_size_ = CountStates(*(fst_tuples[i].second)); + cumulative_size_array_.push_back(cumulative_size_array_.back()); + } else { + cumulative_size_array_.push_back(cumulative_size_array_.back() + + CountStates(*(fst_tuples[i].second))); + } + } + state_table_ = new StateTable( + new ReplaceRootSelector<StateId, P>, + new ReplaceFstStateFingerprint<StateId, P>, + new ReplaceFingerprint<StateId, P>(&cumulative_size_array_), + root_size_, + root_size_ + cumulative_size_array_.back()); + } + + VectorHashReplaceStateTable(const VectorHashReplaceStateTable<A, P> &table) + : root_size_(table.root_size_), + cumulative_size_array_(table.cumulative_size_array_) { + state_table_ = new StateTable( + new ReplaceRootSelector<StateId, P>, + new ReplaceFstStateFingerprint<StateId, P>, + new ReplaceFingerprint<StateId, P>(&cumulative_size_array_), + root_size_, + root_size_ + cumulative_size_array_.back()); + } + + ~VectorHashReplaceStateTable() { + delete state_table_; + } + + StateId FindState(const StateTuple &tuple) { + return state_table_->FindState(tuple); + } + + const StateTuple &Tuple(StateId id) const { + return state_table_->Tuple(id); + } + + private: + StateId root_size_; + vector<uint64> cumulative_size_array_; + StateTable *state_table_; +}; + + +// \class DefaultReplaceStateTable +// Default replace state table +template <class A, class P = ssize_t> +class DefaultReplaceStateTable : public CompactHashStateTable< + ReplaceStateTuple<typename A::StateId, P>, + ReplaceHash<typename A::StateId, P> > { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef P PrefixId; + typedef ReplaceStateTuple<StateId, P> StateTuple; + typedef CompactHashStateTable<StateTuple, + ReplaceHash<StateId, PrefixId> > StateTable; + + using StateTable::FindState; + using StateTable::Tuple; + + DefaultReplaceStateTable( + const vector<pair<Label, const Fst<A>*> > &fst_tuples, + Label root) {} + + DefaultReplaceStateTable(const DefaultReplaceStateTable<A, P> &table) + : StateTable() {} +}; + +// +// REPLACE FST CLASS +// + +// By default ReplaceFst will copy the input label of the 'replace arc'. +// For acceptors we do not want this behaviour. Instead we need to +// create an epsilon arc when recursing into the appropriate Fst. +// The 'epsilon_on_replace' option can be used to toggle this behaviour. +template <class A, class T = DefaultReplaceStateTable<A> > +struct ReplaceFstOptions : CacheOptions { + int64 root; // root rule for expansion + bool epsilon_on_replace; + bool take_ownership; // take ownership of input Fst(s) + T* state_table; + + ReplaceFstOptions(const CacheOptions &opts, int64 r) + : CacheOptions(opts), + root(r), + epsilon_on_replace(false), + take_ownership(false), + state_table(0) {} + explicit ReplaceFstOptions(int64 r) + : root(r), + epsilon_on_replace(false), + take_ownership(false), + state_table(0) {} + ReplaceFstOptions(int64 r, bool epsilon_replace_arc) + : root(r), + epsilon_on_replace(epsilon_replace_arc), + take_ownership(false), + state_table(0) {} + ReplaceFstOptions() + : root(kNoLabel), + epsilon_on_replace(false), + take_ownership(false), + state_table(0) {} +}; + + +// \class ReplaceFstImpl +// \brief Implementation class for replace class Fst +// +// The replace implementation class supports a dynamic +// expansion of a recursive transition network represented as Fst +// with dynamic replacable arcs. +// +template <class A, class T> +class ReplaceFstImpl : public CacheImpl<A> { + friend class ReplaceFstMatcher<A, T>; + + public: + using FstImpl<A>::SetType; + using FstImpl<A>::SetProperties; + using FstImpl<A>::WriteHeader; + using FstImpl<A>::SetInputSymbols; + using FstImpl<A>::SetOutputSymbols; + using FstImpl<A>::InputSymbols; + using FstImpl<A>::OutputSymbols; + + using CacheImpl<A>::PushArc; + using CacheImpl<A>::HasArcs; + using CacheImpl<A>::HasFinal; + using CacheImpl<A>::HasStart; + using CacheImpl<A>::SetArcs; + using CacheImpl<A>::SetFinal; + using CacheImpl<A>::SetStart; + + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef CacheState<A> State; + typedef A Arc; + typedef unordered_map<Label, Label> NonTerminalHash; + + typedef T StateTable; + typedef typename T::PrefixId PrefixId; + typedef ReplaceStateTuple<StateId, PrefixId> StateTuple; + + // constructor for replace class implementation. + // \param fst_tuples array of label/fst tuples, one for each non-terminal + ReplaceFstImpl(const vector< pair<Label, const Fst<A>* > >& fst_tuples, + const ReplaceFstOptions<A, T> &opts) + : CacheImpl<A>(opts), + epsilon_on_replace_(opts.epsilon_on_replace), + state_table_(opts.state_table ? opts.state_table : + new StateTable(fst_tuples, opts.root)) { + + SetType("replace"); + + if (fst_tuples.size() > 0) { + SetInputSymbols(fst_tuples[0].second->InputSymbols()); + SetOutputSymbols(fst_tuples[0].second->OutputSymbols()); + } + + bool all_negative = true; // all nonterminals are negative? + bool dense_range = true; // all nonterminals are positive + // and form a dense range containing 1? + for (size_t i = 0; i < fst_tuples.size(); ++i) { + Label nonterminal = fst_tuples[i].first; + if (nonterminal >= 0) + all_negative = false; + if (nonterminal > fst_tuples.size() || nonterminal <= 0) + dense_range = false; + } + + vector<uint64> inprops; + bool all_ilabel_sorted = true; + bool all_olabel_sorted = true; + bool all_non_empty = true; + fst_array_.push_back(0); + for (size_t i = 0; i < fst_tuples.size(); ++i) { + Label label = fst_tuples[i].first; + const Fst<A> *fst = fst_tuples[i].second; + nonterminal_hash_[label] = fst_array_.size(); + nonterminal_set_.insert(label); + fst_array_.push_back(opts.take_ownership ? fst : fst->Copy()); + if (fst->Start() == kNoStateId) + all_non_empty = false; + if(!fst->Properties(kILabelSorted, false)) + all_ilabel_sorted = false; + if(!fst->Properties(kOLabelSorted, false)) + all_olabel_sorted = false; + inprops.push_back(fst->Properties(kCopyProperties, false)); + if (i) { + if (!CompatSymbols(InputSymbols(), fst->InputSymbols())) { + FSTERROR() << "ReplaceFstImpl: input symbols of Fst " << i + << " does not match input symbols of base Fst (0'th fst)"; + SetProperties(kError, kError); + } + if (!CompatSymbols(OutputSymbols(), fst->OutputSymbols())) { + FSTERROR() << "ReplaceFstImpl: output symbols of Fst " << i + << " does not match output symbols of base Fst " + << "(0'th fst)"; + SetProperties(kError, kError); + } + } + } + Label nonterminal = nonterminal_hash_[opts.root]; + if ((nonterminal == 0) && (fst_array_.size() > 1)) { + FSTERROR() << "ReplaceFstImpl: no Fst corresponding to root label '" + << opts.root << "' in the input tuple vector"; + SetProperties(kError, kError); + } + root_ = (nonterminal > 0) ? nonterminal : 1; + + SetProperties(ReplaceProperties(inprops, root_ - 1, epsilon_on_replace_, + all_non_empty)); + // We assume that all terminals are positive. The resulting + // ReplaceFst is known to be kILabelSorted when all sub-FSTs are + // kILabelSorted and one of the 3 following conditions is satisfied: + // 1. 'epsilon_on_replace' is false, or + // 2. all non-terminals are negative, or + // 3. all non-terninals are positive and form a dense range containing 1. + if (all_ilabel_sorted && + (!epsilon_on_replace_ || all_negative || dense_range)) + SetProperties(kILabelSorted, kILabelSorted); + // Similarly, the resulting ReplaceFst is known to be + // kOLabelSorted when all sub-FSTs are kOLabelSorted and one of + // the 2 following conditions is satisfied: + // 1. all non-terminals are negative, or + // 2. all non-terninals are positive and form a dense range containing 1. + if (all_olabel_sorted && (all_negative || dense_range)) + SetProperties(kOLabelSorted, kOLabelSorted); + + // Enable optional caching as long as sorted and all non empty. + if (Properties(kILabelSorted | kOLabelSorted) && all_non_empty) + always_cache_ = false; + else + always_cache_ = true; + VLOG(2) << "ReplaceFstImpl::ReplaceFstImpl: always_cache = " + << (always_cache_ ? "true" : "false"); + } + + ReplaceFstImpl(const ReplaceFstImpl& impl) + : CacheImpl<A>(impl), + epsilon_on_replace_(impl.epsilon_on_replace_), + always_cache_(impl.always_cache_), + state_table_(new StateTable(*(impl.state_table_))), + nonterminal_set_(impl.nonterminal_set_), + nonterminal_hash_(impl.nonterminal_hash_), + root_(impl.root_) { + SetType("replace"); + SetProperties(impl.Properties(), kCopyProperties); + SetInputSymbols(impl.InputSymbols()); + SetOutputSymbols(impl.OutputSymbols()); + fst_array_.reserve(impl.fst_array_.size()); + fst_array_.push_back(0); + for (size_t i = 1; i < impl.fst_array_.size(); ++i) { + fst_array_.push_back(impl.fst_array_[i]->Copy(true)); + } + } + + ~ReplaceFstImpl() { + VLOG(2) << "~ReplaceFstImpl: gc = " + << (CacheImpl<A>::GetCacheGc() ? "true" : "false") + << ", gc_size = " << CacheImpl<A>::GetCacheSize() + << ", gc_limit = " << CacheImpl<A>::GetCacheLimit(); + + delete state_table_; + for (size_t i = 1; i < fst_array_.size(); ++i) { + delete fst_array_[i]; + } + } + + // Computes the dependency graph of the replace class and returns + // true if the dependencies are cyclic. Cyclic dependencies will result + // in an un-expandable replace fst. + bool CyclicDependencies() const { + ReplaceUtil<A> replace_util(fst_array_, nonterminal_hash_, root_); + return replace_util.CyclicDependencies(); + } + + // Return or compute start state of replace fst + StateId Start() { + if (!HasStart()) { + if (fst_array_.size() == 1) { // no fsts defined for replace + SetStart(kNoStateId); + return kNoStateId; + } else { + const Fst<A>* fst = fst_array_[root_]; + StateId fst_start = fst->Start(); + if (fst_start == kNoStateId) // root Fst is empty + return kNoStateId; + + PrefixId prefix = GetPrefixId(StackPrefix()); + StateId start = state_table_->FindState( + StateTuple(prefix, root_, fst_start)); + SetStart(start); + return start; + } + } else { + return CacheImpl<A>::Start(); + } + } + + // return final weight of state (kInfWeight means state is not final) + Weight Final(StateId s) { + if (!HasFinal(s)) { + const StateTuple& tuple = state_table_->Tuple(s); + const StackPrefix& stack = stackprefix_array_[tuple.prefix_id]; + const Fst<A>* fst = fst_array_[tuple.fst_id]; + StateId fst_state = tuple.fst_state; + + if (fst->Final(fst_state) != Weight::Zero() && stack.Depth() == 0) + SetFinal(s, fst->Final(fst_state)); + else + SetFinal(s, Weight::Zero()); + } + return CacheImpl<A>::Final(s); + } + + size_t NumArcs(StateId s) { + if (HasArcs(s)) { // If state cached, use the cached value. + return CacheImpl<A>::NumArcs(s); + } else if (always_cache_) { // If always caching, expand and cache state. + Expand(s); + return CacheImpl<A>::NumArcs(s); + } else { // Otherwise compute the number of arcs without expanding. + StateTuple tuple = state_table_->Tuple(s); + if (tuple.fst_state == kNoStateId) + return 0; + + const Fst<A>* fst = fst_array_[tuple.fst_id]; + size_t num_arcs = fst->NumArcs(tuple.fst_state); + if (ComputeFinalArc(tuple, 0)) + num_arcs++; + + return num_arcs; + } + } + + // Returns whether a given label is a non terminal + bool IsNonTerminal(Label l) const { + // TODO(allauzen): be smarter and take advantage of + // all_dense or all_negative. + // Use also in ComputeArc, this would require changes to replace + // so that recursing into an empty fst lead to a non co-accessible + // state instead of deleting the arc as done currently. + // Current use correct, since i/olabel sorted iff all_non_empty. + typename NonTerminalHash::const_iterator it = + nonterminal_hash_.find(l); + return it != nonterminal_hash_.end(); + } + + size_t NumInputEpsilons(StateId s) { + if (HasArcs(s)) { + // If state cached, use the cached value. + return CacheImpl<A>::NumInputEpsilons(s); + } else if (always_cache_ || !Properties(kILabelSorted)) { + // If always caching or if the number of input epsilons is too expensive + // to compute without caching (i.e. not ilabel sorted), + // then expand and cache state. + Expand(s); + return CacheImpl<A>::NumInputEpsilons(s); + } else { + // Otherwise, compute the number of input epsilons without caching. + StateTuple tuple = state_table_->Tuple(s); + if (tuple.fst_state == kNoStateId) + return 0; + const Fst<A>* fst = fst_array_[tuple.fst_id]; + size_t num = 0; + if (!epsilon_on_replace_) { + // If epsilon_on_replace is false, all input epsilon arcs + // are also input epsilons arcs in the underlying machine. + fst->NumInputEpsilons(tuple.fst_state); + } else { + // Otherwise, one need to consider that all non-terminal arcs + // in the underlying machine also become input epsilon arc. + ArcIterator<Fst<A> > aiter(*fst, tuple.fst_state); + for (; !aiter.Done() && + ((aiter.Value().ilabel == 0) || + IsNonTerminal(aiter.Value().olabel)); + aiter.Next()) + ++num; + } + if (ComputeFinalArc(tuple, 0)) + num++; + return num; + } + } + + size_t NumOutputEpsilons(StateId s) { + if (HasArcs(s)) { + // If state cached, use the cached value. + return CacheImpl<A>::NumOutputEpsilons(s); + } else if(always_cache_ || !Properties(kOLabelSorted)) { + // If always caching or if the number of output epsilons is too expensive + // to compute without caching (i.e. not olabel sorted), + // then expand and cache state. + Expand(s); + return CacheImpl<A>::NumOutputEpsilons(s); + } else { + // Otherwise, compute the number of output epsilons without caching. + StateTuple tuple = state_table_->Tuple(s); + if (tuple.fst_state == kNoStateId) + return 0; + const Fst<A>* fst = fst_array_[tuple.fst_id]; + size_t num = 0; + ArcIterator<Fst<A> > aiter(*fst, tuple.fst_state); + for (; !aiter.Done() && + ((aiter.Value().olabel == 0) || + IsNonTerminal(aiter.Value().olabel)); + aiter.Next()) + ++num; + if (ComputeFinalArc(tuple, 0)) + num++; + return num; + } + } + + uint64 Properties() const { return Properties(kFstProperties); } + + // Set error if found; return FST impl properties. + uint64 Properties(uint64 mask) const { + if (mask & kError) { + for (size_t i = 1; i < fst_array_.size(); ++i) { + if (fst_array_[i]->Properties(kError, false)) + SetProperties(kError, kError); + } + } + return FstImpl<Arc>::Properties(mask); + } + + // return the base arc iterator, if arcs have not been computed yet, + // extend/recurse for new arcs. + void InitArcIterator(StateId s, ArcIteratorData<A> *data) { + if (!HasArcs(s)) + Expand(s); + CacheImpl<A>::InitArcIterator(s, data); + // TODO(allauzen): Set behaviour of generic iterator + // Warning: ArcIterator<ReplaceFst<A> >::InitCache() + // relies on current behaviour. + } + + + // Extend current state (walk arcs one level deep) + void Expand(StateId s) { + StateTuple tuple = state_table_->Tuple(s); + + // If local fst is empty + if (tuple.fst_state == kNoStateId) { + SetArcs(s); + return; + } + + ArcIterator< Fst<A> > aiter( + *(fst_array_[tuple.fst_id]), tuple.fst_state); + Arc arc; + + // Create a final arc when needed + if (ComputeFinalArc(tuple, &arc)) + PushArc(s, arc); + + // Expand all arcs leaving the state + for (;!aiter.Done(); aiter.Next()) { + if (ComputeArc(tuple, aiter.Value(), &arc)) + PushArc(s, arc); + } + + SetArcs(s); + } + + void Expand(StateId s, const StateTuple &tuple, + const ArcIteratorData<A> &data) { + // If local fst is empty + if (tuple.fst_state == kNoStateId) { + SetArcs(s); + return; + } + + ArcIterator< Fst<A> > aiter(data); + Arc arc; + + // Create a final arc when needed + if (ComputeFinalArc(tuple, &arc)) + AddArc(s, arc); + + // Expand all arcs leaving the state + for (; !aiter.Done(); aiter.Next()) { + if (ComputeArc(tuple, aiter.Value(), &arc)) + AddArc(s, arc); + } + + SetArcs(s); + } + + // If arcp == 0, only returns if a final arc is required, does not + // actually compute it. + bool ComputeFinalArc(const StateTuple &tuple, A* arcp, + uint32 flags = kArcValueFlags) { + const Fst<A>* fst = fst_array_[tuple.fst_id]; + StateId fst_state = tuple.fst_state; + if (fst_state == kNoStateId) + return false; + + // if state is final, pop up stack + const StackPrefix& stack = stackprefix_array_[tuple.prefix_id]; + if (fst->Final(fst_state) != Weight::Zero() && stack.Depth()) { + if (arcp) { + arcp->ilabel = 0; + arcp->olabel = 0; + if (flags & kArcNextStateValue) { + PrefixId prefix_id = PopPrefix(stack); + const PrefixTuple& top = stack.Top(); + arcp->nextstate = state_table_->FindState( + StateTuple(prefix_id, top.fst_id, top.nextstate)); + } + if (flags & kArcWeightValue) + arcp->weight = fst->Final(fst_state); + } + return true; + } else { + return false; + } + } + + // Compute the arc in the replace fst corresponding to a given + // in the underlying machine. Returns false if the underlying arc + // corresponds to no arc in the replace. + bool ComputeArc(const StateTuple &tuple, const A &arc, A* arcp, + uint32 flags = kArcValueFlags) { + if (!epsilon_on_replace_ && + (flags == (flags & (kArcILabelValue | kArcWeightValue)))) { + *arcp = arc; + return true; + } + + if (arc.olabel == 0) { // expand local fst + StateId nextstate = flags & kArcNextStateValue + ? state_table_->FindState( + StateTuple(tuple.prefix_id, tuple.fst_id, arc.nextstate)) + : kNoStateId; + *arcp = A(arc.ilabel, arc.olabel, arc.weight, nextstate); + } else { + // check for non terminal + typename NonTerminalHash::const_iterator it = + nonterminal_hash_.find(arc.olabel); + if (it != nonterminal_hash_.end()) { // recurse into non terminal + Label nonterminal = it->second; + const Fst<A>* nt_fst = fst_array_[nonterminal]; + PrefixId nt_prefix = PushPrefix(stackprefix_array_[tuple.prefix_id], + tuple.fst_id, arc.nextstate); + + // if start state is valid replace, else arc is implicitly + // deleted + StateId nt_start = nt_fst->Start(); + if (nt_start != kNoStateId) { + StateId nt_nextstate = flags & kArcNextStateValue + ? state_table_->FindState( + StateTuple(nt_prefix, nonterminal, nt_start)) + : kNoStateId; + Label ilabel = (epsilon_on_replace_) ? 0 : arc.ilabel; + *arcp = A(ilabel, 0, arc.weight, nt_nextstate); + } else { + return false; + } + } else { + StateId nextstate = flags & kArcNextStateValue + ? state_table_->FindState( + StateTuple(tuple.prefix_id, tuple.fst_id, arc.nextstate)) + : kNoStateId; + *arcp = A(arc.ilabel, arc.olabel, arc.weight, nextstate); + } + } + return true; + } + + // Returns the arc iterator flags supported by this Fst. + uint32 ArcIteratorFlags() const { + uint32 flags = kArcValueFlags; + if (!always_cache_) + flags |= kArcNoCache; + return flags; + } + + T* GetStateTable() const { + return state_table_; + } + + const Fst<A>* GetFst(Label fst_id) const { + return fst_array_[fst_id]; + } + + bool EpsilonOnReplace() const { return epsilon_on_replace_; } + + // private helper classes + private: + static const size_t kPrime0; + + // \class PrefixTuple + // \brief Tuple of fst_id and destination state (entry in stack prefix) + struct PrefixTuple { + PrefixTuple(Label f, StateId s) : fst_id(f), nextstate(s) {} + + Label fst_id; + StateId nextstate; + }; + + // \class StackPrefix + // \brief Container for stack prefix. + class StackPrefix { + public: + StackPrefix() {} + + // copy constructor + StackPrefix(const StackPrefix& x) : + prefix_(x.prefix_) { + } + + void Push(StateId fst_id, StateId nextstate) { + prefix_.push_back(PrefixTuple(fst_id, nextstate)); + } + + void Pop() { + prefix_.pop_back(); + } + + const PrefixTuple& Top() const { + return prefix_[prefix_.size()-1]; + } + + size_t Depth() const { + return prefix_.size(); + } + + public: + vector<PrefixTuple> prefix_; + }; + + + // \class StackPrefixEqual + // \brief Compare two stack prefix classes for equality + class StackPrefixEqual { + public: + bool operator()(const StackPrefix& x, const StackPrefix& y) const { + if (x.prefix_.size() != y.prefix_.size()) return false; + for (size_t i = 0; i < x.prefix_.size(); ++i) { + if (x.prefix_[i].fst_id != y.prefix_[i].fst_id || + x.prefix_[i].nextstate != y.prefix_[i].nextstate) return false; + } + return true; + } + }; + + // + // \class StackPrefixKey + // \brief Hash function for stack prefix to prefix id + class StackPrefixKey { + public: + size_t operator()(const StackPrefix& x) const { + size_t sum = 0; + for (size_t i = 0; i < x.prefix_.size(); ++i) { + sum += x.prefix_[i].fst_id + x.prefix_[i].nextstate*kPrime0; + } + return sum; + } + }; + + typedef unordered_map<StackPrefix, PrefixId, StackPrefixKey, StackPrefixEqual> + StackPrefixHash; + + // private methods + private: + // hash stack prefix (return unique index into stackprefix array) + PrefixId GetPrefixId(const StackPrefix& prefix) { + typename StackPrefixHash::iterator it = prefix_hash_.find(prefix); + if (it == prefix_hash_.end()) { + PrefixId prefix_id = stackprefix_array_.size(); + stackprefix_array_.push_back(prefix); + prefix_hash_[prefix] = prefix_id; + return prefix_id; + } else { + return it->second; + } + } + + // prefix id after a stack pop + PrefixId PopPrefix(StackPrefix prefix) { + prefix.Pop(); + return GetPrefixId(prefix); + } + + // prefix id after a stack push + PrefixId PushPrefix(StackPrefix prefix, Label fst_id, StateId nextstate) { + prefix.Push(fst_id, nextstate); + return GetPrefixId(prefix); + } + + + // private data + private: + // runtime options + bool epsilon_on_replace_; + bool always_cache_; // Optionally caching arc iterator disabled when true + + // state table + StateTable *state_table_; + + // cross index of unique stack prefix + // could potentially have one copy of prefix array + StackPrefixHash prefix_hash_; + vector<StackPrefix> stackprefix_array_; + + set<Label> nonterminal_set_; + NonTerminalHash nonterminal_hash_; + vector<const Fst<A>*> fst_array_; + Label root_; + + void operator=(const ReplaceFstImpl<A, T> &); // disallow +}; + + +template <class A, class T> +const size_t ReplaceFstImpl<A, T>::kPrime0 = 7853; + +// +// \class ReplaceFst +// \brief Recursivively replaces arcs in the root Fst with other Fsts. +// This version is a delayed Fst. +// +// ReplaceFst supports dynamic replacement of arcs in one Fst with +// another Fst. This replacement is recursive. ReplaceFst can be used +// to support a variety of delayed constructions such as recursive +// transition networks, union, or closure. It is constructed with an +// array of Fst(s). One Fst represents the root (or topology) +// machine. The root Fst refers to other Fsts by recursively replacing +// arcs labeled as non-terminals with the matching non-terminal +// Fst. Currently the ReplaceFst uses the output symbols of the arcs +// to determine whether the arc is a non-terminal arc or not. A +// non-terminal can be any label that is not a non-zero terminal label +// in the output alphabet. +// +// Note that the constructor uses a vector of pair<>. These correspond +// to the tuple of non-terminal Label and corresponding Fst. For example +// to implement the closure operation we need 2 Fsts. The first root +// Fst is a single Arc on the start State that self loops, it references +// the particular machine for which we are performing the closure operation. +// +// The ReplaceFst class supports an optionally caching arc iterator: +// ArcIterator< ReplaceFst<A> > +// The ReplaceFst need to be built such that it is known to be ilabel +// or olabel sorted (see usage below). +// +// Observe that Matcher<Fst<A> > will use the optionally caching arc +// iterator when available (Fst is ilabel sorted and matching on the +// input, or Fst is olabel sorted and matching on the output). +// In order to obtain the most efficient behaviour, it is recommended +// to set 'epsilon_on_replace' to false (this means constructing acceptors +// as transducers with epsilons on the input side of nonterminal arcs) +// and matching on the input side. +// +// This class attaches interface to implementation and handles +// reference counting, delegating most methods to ImplToFst. +template <class A, class T = DefaultReplaceStateTable<A> > +class ReplaceFst : public ImplToFst< ReplaceFstImpl<A, T> > { + public: + friend class ArcIterator< ReplaceFst<A, T> >; + friend class StateIterator< ReplaceFst<A, T> >; + friend class ReplaceFstMatcher<A, T>; + + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef CacheState<A> State; + typedef ReplaceFstImpl<A, T> Impl; + + using ImplToFst<Impl>::Properties; + + ReplaceFst(const vector<pair<Label, const Fst<A>* > >& fst_array, + Label root) + : ImplToFst<Impl>(new Impl(fst_array, ReplaceFstOptions<A, T>(root))) {} + + ReplaceFst(const vector<pair<Label, const Fst<A>* > >& fst_array, + const ReplaceFstOptions<A, T> &opts) + : ImplToFst<Impl>(new Impl(fst_array, opts)) {} + + // See Fst<>::Copy() for doc. + ReplaceFst(const ReplaceFst<A, T>& fst, bool safe = false) + : ImplToFst<Impl>(fst, safe) {} + + // Get a copy of this ReplaceFst. See Fst<>::Copy() for further doc. + virtual ReplaceFst<A, T> *Copy(bool safe = false) const { + return new ReplaceFst<A, T>(*this, safe); + } + + virtual inline void InitStateIterator(StateIteratorData<A> *data) const; + + virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { + GetImpl()->InitArcIterator(s, data); + } + + virtual MatcherBase<A> *InitMatcher(MatchType match_type) const { + if ((GetImpl()->ArcIteratorFlags() & kArcNoCache) && + ((match_type == MATCH_INPUT && Properties(kILabelSorted, false)) || + (match_type == MATCH_OUTPUT && Properties(kOLabelSorted, false)))) { + return new ReplaceFstMatcher<A, T>(*this, match_type); + } + else { + VLOG(2) << "Not using replace matcher"; + return 0; + } + } + + bool CyclicDependencies() const { + return GetImpl()->CyclicDependencies(); + } + + private: + // Makes visible to friends. + Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } + + void operator=(const ReplaceFst<A> &fst); // disallow +}; + + +// Specialization for ReplaceFst. +template<class A, class T> +class StateIterator< ReplaceFst<A, T> > + : public CacheStateIterator< ReplaceFst<A, T> > { + public: + explicit StateIterator(const ReplaceFst<A, T> &fst) + : CacheStateIterator< ReplaceFst<A, T> >(fst, fst.GetImpl()) {} + + private: + DISALLOW_COPY_AND_ASSIGN(StateIterator); +}; + + +// Specialization for ReplaceFst. +// Implements optional caching. It can be used as follows: +// +// ReplaceFst<A> replace; +// ArcIterator< ReplaceFst<A> > aiter(replace, s); +// // Note: ArcIterator< Fst<A> > is always a caching arc iterator. +// aiter.SetFlags(kArcNoCache, kArcNoCache); +// // Use the arc iterator, no arc will be cached, no state will be expanded. +// // The varied 'kArcValueFlags' can be used to decide which part +// // of arc values needs to be computed. +// aiter.SetFlags(kArcILabelValue, kArcValueFlags); +// // Only want the ilabel for this arc +// aiter.Value(); // Does not compute the destination state. +// aiter.Next(); +// aiter.SetFlags(kArcNextStateValue, kArcNextStateValue); +// // Want both ilabel and nextstate for that arc +// aiter.Value(); // Does compute the destination state and inserts it +// // in the replace state table. +// // No Arc has been cached at that point. +// +template <class A, class T> +class ArcIterator< ReplaceFst<A, T> > { + public: + typedef A Arc; + typedef typename A::StateId StateId; + + ArcIterator(const ReplaceFst<A, T> &fst, StateId s) + : fst_(fst), state_(s), pos_(0), offset_(0), flags_(0), arcs_(0), + data_flags_(0), final_flags_(0) { + cache_data_.ref_count = 0; + local_data_.ref_count = 0; + + // If FST does not support optional caching, force caching. + if(!(fst_.GetImpl()->ArcIteratorFlags() & kArcNoCache) && + !(fst_.GetImpl()->HasArcs(state_))) + fst_.GetImpl()->Expand(state_); + + // If state is already cached, use cached arcs array. + if (fst_.GetImpl()->HasArcs(state_)) { + (fst_.GetImpl())->template CacheImpl<A>::InitArcIterator(state_, + &cache_data_); + num_arcs_ = cache_data_.narcs; + arcs_ = cache_data_.arcs; // 'arcs_' is a ptr to the cached arcs. + data_flags_ = kArcValueFlags; // All the arc member values are valid. + } else { // Otherwise delay decision until Value() is called. + tuple_ = fst_.GetImpl()->GetStateTable()->Tuple(state_); + if (tuple_.fst_state == kNoStateId) { + num_arcs_ = 0; + } else { + // The decision to cache or not to cache has been defered + // until Value() or SetFlags() is called. However, the arc + // iterator is set up now to be ready for non-caching in order + // to keep the Value() method simple and efficient. + const Fst<A>* fst = fst_.GetImpl()->GetFst(tuple_.fst_id); + fst->InitArcIterator(tuple_.fst_state, &local_data_); + // 'arcs_' is a pointer to the arcs in the underlying machine. + arcs_ = local_data_.arcs; + // Compute the final arc (but not its destination state) + // if a final arc is required. + bool has_final_arc = fst_.GetImpl()->ComputeFinalArc( + tuple_, + &final_arc_, + kArcValueFlags & ~kArcNextStateValue); + // Set the arc value flags that hold for 'final_arc_'. + final_flags_ = kArcValueFlags & ~kArcNextStateValue; + // Compute the number of arcs. + num_arcs_ = local_data_.narcs; + if (has_final_arc) + ++num_arcs_; + // Set the offset between the underlying arc positions and + // the positions in the arc iterator. + offset_ = num_arcs_ - local_data_.narcs; + // Defers the decision to cache or not until Value() or + // SetFlags() is called. + data_flags_ = 0; + } + } + } + + ~ArcIterator() { + if (cache_data_.ref_count) + --(*cache_data_.ref_count); + if (local_data_.ref_count) + --(*local_data_.ref_count); + } + + void ExpandAndCache() const { + // TODO(allauzen): revisit this + // fst_.GetImpl()->Expand(state_, tuple_, local_data_); + // (fst_.GetImpl())->CacheImpl<A>*>::InitArcIterator(state_, + // &cache_data_); + // + fst_.InitArcIterator(state_, &cache_data_); // Expand and cache state. + arcs_ = cache_data_.arcs; // 'arcs_' is a pointer to the cached arcs. + data_flags_ = kArcValueFlags; // All the arc member values are valid. + offset_ = 0; // No offset + + } + + void Init() { + if (flags_ & kArcNoCache) { // If caching is disabled + // 'arcs_' is a pointer to the arcs in the underlying machine. + arcs_ = local_data_.arcs; + // Set the arcs value flags that hold for 'arcs_'. + data_flags_ = kArcWeightValue; + if (!fst_.GetImpl()->EpsilonOnReplace()) + data_flags_ |= kArcILabelValue; + // Set the offset between the underlying arc positions and + // the positions in the arc iterator. + offset_ = num_arcs_ - local_data_.narcs; + } else { // Otherwise, expand and cache + ExpandAndCache(); + } + } + + bool Done() const { return pos_ >= num_arcs_; } + + const A& Value() const { + // If 'data_flags_' was set to 0, non-caching was not requested + if (!data_flags_) { + // TODO(allauzen): revisit this. + if (flags_ & kArcNoCache) { + // Should never happen. + FSTERROR() << "ReplaceFst: inconsistent arc iterator flags"; + } + ExpandAndCache(); // Expand and cache. + } + + if (pos_ - offset_ >= 0) { // The requested arc is not the 'final' arc. + const A& arc = arcs_[pos_ - offset_]; + if ((data_flags_ & flags_) == (flags_ & kArcValueFlags)) { + // If the value flags for 'arc' match the recquired value flags + // then return 'arc'. + return arc; + } else { + // Otherwise, compute the corresponding arc on-the-fly. + fst_.GetImpl()->ComputeArc(tuple_, arc, &arc_, flags_ & kArcValueFlags); + return arc_; + } + } else { // The requested arc is the 'final' arc. + if ((final_flags_ & flags_) != (flags_ & kArcValueFlags)) { + // If the arc value flags that hold for the final arc + // do not match the requested value flags, then + // 'final_arc_' needs to be updated. + fst_.GetImpl()->ComputeFinalArc(tuple_, &final_arc_, + flags_ & kArcValueFlags); + final_flags_ = flags_ & kArcValueFlags; + } + return final_arc_; + } + } + + void Next() { ++pos_; } + + size_t Position() const { return pos_; } + + void Reset() { pos_ = 0; } + + void Seek(size_t pos) { pos_ = pos; } + + uint32 Flags() const { return flags_; } + + void SetFlags(uint32 f, uint32 mask) { + // Update the flags taking into account what flags are supported + // by the Fst. + flags_ &= ~mask; + flags_ |= (f & fst_.GetImpl()->ArcIteratorFlags()); + // If non-caching is not requested (and caching has not already + // been performed), then flush 'data_flags_' to request caching + // during the next call to Value(). + if (!(flags_ & kArcNoCache) && data_flags_ != kArcValueFlags) { + if (!fst_.GetImpl()->HasArcs(state_)) + data_flags_ = 0; + } + // If 'data_flags_' has been flushed but non-caching is requested + // before calling Value(), then set up the iterator for non-caching. + if ((f & kArcNoCache) && (!data_flags_)) + Init(); + } + + private: + const ReplaceFst<A, T> &fst_; // Reference to the FST + StateId state_; // State in the FST + mutable typename T::StateTuple tuple_; // Tuple corresponding to state_ + + ssize_t pos_; // Current position + mutable ssize_t offset_; // Offset between position in iterator and in arcs_ + ssize_t num_arcs_; // Number of arcs at state_ + uint32 flags_; // Behavorial flags for the arc iterator + mutable Arc arc_; // Memory to temporarily store computed arcs + + mutable ArcIteratorData<Arc> cache_data_; // Arc iterator data in cache + mutable ArcIteratorData<Arc> local_data_; // Arc iterator data in local fst + + mutable const A* arcs_; // Array of arcs + mutable uint32 data_flags_; // Arc value flags valid for data in arcs_ + mutable Arc final_arc_; // Final arc (when required) + mutable uint32 final_flags_; // Arc value flags valid for final_arc_ + + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + + +template <class A, class T> +class ReplaceFstMatcher : public MatcherBase<A> { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef MultiEpsMatcher<Matcher<Fst<A> > > LocalMatcher; + + ReplaceFstMatcher(const ReplaceFst<A, T> &fst, fst::MatchType match_type) + : fst_(fst), + impl_(fst_.GetImpl()), + s_(fst::kNoStateId), + match_type_(match_type), + current_loop_(false), + final_arc_(false), + loop_(fst::kNoLabel, 0, A::Weight::One(), fst::kNoStateId) { + if (match_type_ == fst::MATCH_OUTPUT) + swap(loop_.ilabel, loop_.olabel); + InitMatchers(); + } + + ReplaceFstMatcher(const ReplaceFstMatcher<A, T> &matcher, bool safe = false) + : fst_(matcher.fst_), + impl_(fst_.GetImpl()), + s_(fst::kNoStateId), + match_type_(matcher.match_type_), + current_loop_(false), + loop_(fst::kNoLabel, 0, A::Weight::One(), fst::kNoStateId) { + if (match_type_ == fst::MATCH_OUTPUT) + swap(loop_.ilabel, loop_.olabel); + InitMatchers(); + } + + // Create a local matcher for each component Fst of replace. + // LocalMatcher is a multi epsilon wrapper matcher. MultiEpsilonMatcher + // is used to match each non-terminal arc, since these non-terminal + // turn into epsilons on recursion. + void InitMatchers() { + const vector<const Fst<A>*>& fst_array = impl_->fst_array_; + matcher_.resize(fst_array.size(), 0); + for (size_t i = 0; i < fst_array.size(); ++i) { + if (fst_array[i]) { + matcher_[i] = + new LocalMatcher(*fst_array[i], match_type_, kMultiEpsList); + + typename set<Label>::iterator it = impl_->nonterminal_set_.begin(); + for (; it != impl_->nonterminal_set_.end(); ++it) { + matcher_[i]->AddMultiEpsLabel(*it); + } + } + } + } + + virtual ReplaceFstMatcher<A, T> *Copy(bool safe = false) const { + return new ReplaceFstMatcher<A, T>(*this, safe); + } + + virtual ~ReplaceFstMatcher() { + for (size_t i = 0; i < matcher_.size(); ++i) + delete matcher_[i]; + } + + virtual MatchType Type(bool test) const { + if (match_type_ == MATCH_NONE) + return match_type_; + + uint64 true_prop = match_type_ == MATCH_INPUT ? + kILabelSorted : kOLabelSorted; + uint64 false_prop = match_type_ == MATCH_INPUT ? + kNotILabelSorted : kNotOLabelSorted; + uint64 props = fst_.Properties(true_prop | false_prop, test); + + if (props & true_prop) + return match_type_; + else if (props & false_prop) + return MATCH_NONE; + else + return MATCH_UNKNOWN; + } + + virtual const Fst<A> &GetFst() const { + return fst_; + } + + virtual uint64 Properties(uint64 props) const { + return props; + } + + private: + // Set the sate from which our matching happens. + virtual void SetState_(StateId s) { + if (s_ == s) return; + + s_ = s; + tuple_ = impl_->GetStateTable()->Tuple(s_); + if (tuple_.fst_state == kNoStateId) { + done_ = true; + return; + } + // Get current matcher. Used for non epsilon matching + current_matcher_ = matcher_[tuple_.fst_id]; + current_matcher_->SetState(tuple_.fst_state); + loop_.nextstate = s_; + + final_arc_ = false; + } + + // Search for label, from previous set state. If label == 0, first + // hallucinate and epsilon loop, else use the underlying matcher to + // search for the label or epsilons. + // - Note since the ReplaceFST recursion on non-terminal arcs causes + // epsilon transitions to be created we use the MultiEpsilonMatcher + // to search for possible matches of non terminals. + // - If the component Fst reaches a final state we also need to add + // the exiting final arc. + virtual bool Find_(Label label) { + bool found = false; + label_ = label; + if (label_ == 0 || label_ == kNoLabel) { + // Compute loop directly, saving Replace::ComputeArc + if (label_ == 0) { + current_loop_ = true; + found = true; + } + // Search for matching multi epsilons + final_arc_ = impl_->ComputeFinalArc(tuple_, 0); + found = current_matcher_->Find(kNoLabel) || final_arc_ || found; + } else { + // Search on sub machine directly using sub machine matcher. + found = current_matcher_->Find(label_); + } + return found; + } + + virtual bool Done_() const { + return !current_loop_ && !final_arc_ && current_matcher_->Done(); + } + + virtual const Arc& Value_() const { + if (current_loop_) { + return loop_; + } + if (final_arc_) { + impl_->ComputeFinalArc(tuple_, &arc_); + return arc_; + } + const Arc& component_arc = current_matcher_->Value(); + impl_->ComputeArc(tuple_, component_arc, &arc_); + return arc_; + } + + virtual void Next_() { + if (current_loop_) { + current_loop_ = false; + return; + } + if (final_arc_) { + final_arc_ = false; + return; + } + current_matcher_->Next(); + } + + const ReplaceFst<A, T>& fst_; + ReplaceFstImpl<A, T> *impl_; + LocalMatcher* current_matcher_; + vector<LocalMatcher*> matcher_; + + StateId s_; // Current state + Label label_; // Current label + + MatchType match_type_; // Supplied by caller + mutable bool done_; + mutable bool current_loop_; // Current arc is the implicit loop + mutable bool final_arc_; // Current arc for exiting recursion + mutable typename T::StateTuple tuple_; // Tuple corresponding to state_ + mutable Arc arc_; + Arc loop_; +}; + +template <class A, class T> inline +void ReplaceFst<A, T>::InitStateIterator(StateIteratorData<A> *data) const { + data->base = new StateIterator< ReplaceFst<A, T> >(*this); +} + +typedef ReplaceFst<StdArc> StdReplaceFst; + + +// // Recursivively replaces arcs in the root Fst with other Fsts. +// This version writes the result of replacement to an output MutableFst. +// +// Replace supports replacement of arcs in one Fst with another +// Fst. This replacement is recursive. Replace takes an array of +// Fst(s). One Fst represents the root (or topology) machine. The root +// Fst refers to other Fsts by recursively replacing arcs labeled as +// non-terminals with the matching non-terminal Fst. Currently Replace +// uses the output symbols of the arcs to determine whether the arc is +// a non-terminal arc or not. A non-terminal can be any label that is +// not a non-zero terminal label in the output alphabet. Note that +// input argument is a vector of pair<>. These correspond to the tuple +// of non-terminal Label and corresponding Fst. +template<class Arc> +void Replace(const vector<pair<typename Arc::Label, + const Fst<Arc>* > >& ifst_array, + MutableFst<Arc> *ofst, typename Arc::Label root, + bool epsilon_on_replace) { + ReplaceFstOptions<Arc> opts(root, epsilon_on_replace); + opts.gc_limit = 0; // Cache only the last state for fastest copy. + *ofst = ReplaceFst<Arc>(ifst_array, opts); +} + +template<class Arc> +void Replace(const vector<pair<typename Arc::Label, + const Fst<Arc>* > >& ifst_array, + MutableFst<Arc> *ofst, typename Arc::Label root) { + Replace(ifst_array, ofst, root, false); +} + +} // namespace fst + +#endif // FST_LIB_REPLACE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/reverse.h b/kaldi_io/src/tools/openfst/include/fst/reverse.h new file mode 100644 index 0000000..4d4c75c --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/reverse.h @@ -0,0 +1,91 @@ +// reverse.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Functions and classes to sort arcs in an FST. + +#ifndef FST_LIB_REVERSE_H__ +#define FST_LIB_REVERSE_H__ + +#include <algorithm> +#include <vector> +using std::vector; + +#include <fst/cache.h> + + +namespace fst { + +// Reverses an FST. The reversed result is written to an output +// MutableFst. If A transduces string x to y with weight a, then the +// reverse of A transduces the reverse of x to the reverse of y with +// weight a.Reverse(). +// +// Typically, a = a.Reverse() and Arc = RevArc (e.g. for +// TropicalWeight or LogWeight). In general, e.g. when the weights +// only form a left or right semiring, the output arc type must match +// the input arc type except having the reversed Weight type. +template<class Arc, class RevArc> +void Reverse(const Fst<Arc> &ifst, MutableFst<RevArc> *ofst) { + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + typedef typename RevArc::Weight RevWeight; + + ofst->DeleteStates(); + ofst->SetInputSymbols(ifst.InputSymbols()); + ofst->SetOutputSymbols(ifst.OutputSymbols()); + if (ifst.Properties(kExpanded, false)) + ofst->ReserveStates(CountStates(ifst) + 1); + StateId istart = ifst.Start(); + StateId ostart = ofst->AddState(); + ofst->SetStart(ostart); + + for (StateIterator< Fst<Arc> > siter(ifst); + !siter.Done(); + siter.Next()) { + StateId is = siter.Value(); + StateId os = is + 1; + while (ofst->NumStates() <= os) + ofst->AddState(); + if (is == istart) + ofst->SetFinal(os, RevWeight::One()); + + Weight final = ifst.Final(is); + if (final != Weight::Zero()) { + RevArc oarc(0, 0, final.Reverse(), os); + ofst->AddArc(0, oarc); + } + + for (ArcIterator< Fst<Arc> > aiter(ifst, is); + !aiter.Done(); + aiter.Next()) { + const Arc &iarc = aiter.Value(); + RevArc oarc(iarc.ilabel, iarc.olabel, iarc.weight.Reverse(), os); + StateId nos = iarc.nextstate + 1; + while (ofst->NumStates() <= nos) + ofst->AddState(); + ofst->AddArc(nos, oarc); + } + } + uint64 iprops = ifst.Properties(kCopyProperties, false); + uint64 oprops = ofst->Properties(kFstProperties, false); + ofst->SetProperties(ReverseProperties(iprops) | oprops, kFstProperties); +} + +} // namespace fst + +#endif // FST_LIB_REVERSE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/reweight.h b/kaldi_io/src/tools/openfst/include/fst/reweight.h new file mode 100644 index 0000000..c051c2a --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/reweight.h @@ -0,0 +1,146 @@ +// reweight.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// +// \file +// Function to reweight an FST. + +#ifndef FST_LIB_REWEIGHT_H__ +#define FST_LIB_REWEIGHT_H__ + +#include <vector> +using std::vector; + +#include <fst/mutable-fst.h> + + +namespace fst { + +enum ReweightType { REWEIGHT_TO_INITIAL, REWEIGHT_TO_FINAL }; + +// Reweight FST according to the potentials defined by the POTENTIAL +// vector in the direction defined by TYPE. Weight needs to be left +// distributive when reweighting towards the initial state and right +// distributive when reweighting towards the final states. +// +// An arc of weight w, with an origin state of potential p and +// destination state of potential q, is reweighted by p\wq when +// reweighting towards the initial state and by pw/q when reweighting +// towards the final states. +template <class Arc> +void Reweight(MutableFst<Arc> *fst, + const vector<typename Arc::Weight> &potential, + ReweightType type) { + typedef typename Arc::Weight Weight; + + if (fst->NumStates() == 0) + return; + + if (type == REWEIGHT_TO_FINAL && !(Weight::Properties() & kRightSemiring)) { + FSTERROR() << "Reweight: Reweighting to the final states requires " + << "Weight to be right distributive: " + << Weight::Type(); + fst->SetProperties(kError, kError); + return; + } + + if (type == REWEIGHT_TO_INITIAL && !(Weight::Properties() & kLeftSemiring)) { + FSTERROR() << "Reweight: Reweighting to the initial state requires " + << "Weight to be left distributive: " + << Weight::Type(); + fst->SetProperties(kError, kError); + return; + } + + StateIterator< MutableFst<Arc> > sit(*fst); + for (; !sit.Done(); sit.Next()) { + typename Arc::StateId state = sit.Value(); + if (state == potential.size()) + break; + typename Arc::Weight weight = potential[state]; + if (weight != Weight::Zero()) { + for (MutableArcIterator< MutableFst<Arc> > ait(fst, state); + !ait.Done(); + ait.Next()) { + Arc arc = ait.Value(); + if (arc.nextstate >= potential.size()) + continue; + typename Arc::Weight nextweight = potential[arc.nextstate]; + if (nextweight == Weight::Zero()) + continue; + if (type == REWEIGHT_TO_INITIAL) + arc.weight = Divide(Times(arc.weight, nextweight), weight, + DIVIDE_LEFT); + if (type == REWEIGHT_TO_FINAL) + arc.weight = Divide(Times(weight, arc.weight), nextweight, + DIVIDE_RIGHT); + ait.SetValue(arc); + } + if (type == REWEIGHT_TO_INITIAL) + fst->SetFinal(state, Divide(fst->Final(state), weight, DIVIDE_LEFT)); + } + if (type == REWEIGHT_TO_FINAL) + fst->SetFinal(state, Times(weight, fst->Final(state))); + } + + // This handles elements past the end of the potentials array. + for (; !sit.Done(); sit.Next()) { + typename Arc::StateId state = sit.Value(); + if (type == REWEIGHT_TO_FINAL) + fst->SetFinal(state, Times(Weight::Zero(), fst->Final(state))); + } + + typename Arc::Weight startweight = fst->Start() < potential.size() ? + potential[fst->Start()] : Weight::Zero(); + if ((startweight != Weight::One()) && (startweight != Weight::Zero())) { + if (fst->Properties(kInitialAcyclic, true) & kInitialAcyclic) { + typename Arc::StateId state = fst->Start(); + for (MutableArcIterator< MutableFst<Arc> > ait(fst, state); + !ait.Done(); + ait.Next()) { + Arc arc = ait.Value(); + if (type == REWEIGHT_TO_INITIAL) + arc.weight = Times(startweight, arc.weight); + else + arc.weight = Times( + Divide(Weight::One(), startweight, DIVIDE_RIGHT), + arc.weight); + ait.SetValue(arc); + } + if (type == REWEIGHT_TO_INITIAL) + fst->SetFinal(state, Times(startweight, fst->Final(state))); + else + fst->SetFinal(state, Times(Divide(Weight::One(), startweight, + DIVIDE_RIGHT), + fst->Final(state))); + } else { + typename Arc::StateId state = fst->AddState(); + Weight w = type == REWEIGHT_TO_INITIAL ? startweight : + Divide(Weight::One(), startweight, DIVIDE_RIGHT); + Arc arc(0, 0, w, fst->Start()); + fst->AddArc(state, arc); + fst->SetStart(state); + } + } + + fst->SetProperties(ReweightProperties( + fst->Properties(kFstProperties, false)), + kFstProperties); +} + +} // namespace fst + +#endif // FST_LIB_REWEIGHT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/rmepsilon.h b/kaldi_io/src/tools/openfst/include/fst/rmepsilon.h new file mode 100644 index 0000000..89b8178 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/rmepsilon.h @@ -0,0 +1,600 @@ +// rmepsilon.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// +// \file +// Functions and classes that implemement epsilon-removal. + +#ifndef FST_LIB_RMEPSILON_H__ +#define FST_LIB_RMEPSILON_H__ + +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include <fst/slist.h> +#include <stack> +#include <string> +#include <utility> +using std::pair; using std::make_pair; +#include <vector> +using std::vector; + +#include <fst/arcfilter.h> +#include <fst/cache.h> +#include <fst/connect.h> +#include <fst/factor-weight.h> +#include <fst/invert.h> +#include <fst/prune.h> +#include <fst/queue.h> +#include <fst/shortest-distance.h> +#include <fst/topsort.h> + + +namespace fst { + +template <class Arc, class Queue> +class RmEpsilonOptions + : public ShortestDistanceOptions<Arc, Queue, EpsilonArcFilter<Arc> > { + public: + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + + bool connect; // Connect output + Weight weight_threshold; // Pruning weight threshold. + StateId state_threshold; // Pruning state threshold. + + explicit RmEpsilonOptions(Queue *q, float d = kDelta, bool c = true, + Weight w = Weight::Zero(), + StateId n = kNoStateId) + : ShortestDistanceOptions< Arc, Queue, EpsilonArcFilter<Arc> >( + q, EpsilonArcFilter<Arc>(), kNoStateId, d), + connect(c), weight_threshold(w), state_threshold(n) {} + private: + RmEpsilonOptions(); // disallow +}; + +// Computation state of the epsilon-removal algorithm. +template <class Arc, class Queue> +class RmEpsilonState { + public: + typedef typename Arc::Label Label; + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + + RmEpsilonState(const Fst<Arc> &fst, + vector<Weight> *distance, + const RmEpsilonOptions<Arc, Queue> &opts) + : fst_(fst), distance_(distance), sd_state_(fst_, distance, opts, true), + expand_id_(0) {} + + // Compute arcs and final weight for state 's' + void Expand(StateId s); + + // Returns arcs of expanded state. + vector<Arc> &Arcs() { return arcs_; } + + // Returns final weight of expanded state. + const Weight &Final() const { return final_; } + + // Return true if an error has occured. + bool Error() const { return sd_state_.Error(); } + + private: + static const size_t kPrime0 = 7853; + static const size_t kPrime1 = 7867; + + struct Element { + Label ilabel; + Label olabel; + StateId nextstate; + + Element() {} + + Element(Label i, Label o, StateId s) + : ilabel(i), olabel(o), nextstate(s) {} + }; + + class ElementKey { + public: + size_t operator()(const Element& e) const { + return static_cast<size_t>(e.nextstate + + e.ilabel * kPrime0 + + e.olabel * kPrime1); + } + + private: + }; + + class ElementEqual { + public: + bool operator()(const Element &e1, const Element &e2) const { + return (e1.ilabel == e2.ilabel) && (e1.olabel == e2.olabel) + && (e1.nextstate == e2.nextstate); + } + }; + + typedef unordered_map<Element, pair<StateId, size_t>, + ElementKey, ElementEqual> ElementMap; + + const Fst<Arc> &fst_; + // Distance from state being expanded in epsilon-closure. + vector<Weight> *distance_; + // Shortest distance algorithm computation state. + ShortestDistanceState<Arc, Queue, EpsilonArcFilter<Arc> > sd_state_; + // Maps an element 'e' to a pair 'p' corresponding to a position + // in the arcs vector of the state being expanded. 'e' corresponds + // to the position 'p.second' in the 'arcs_' vector if 'p.first' is + // equal to the state being expanded. + ElementMap element_map_; + EpsilonArcFilter<Arc> eps_filter_; + stack<StateId> eps_queue_; // Queue used to visit the epsilon-closure + vector<bool> visited_; // '[i] = true' if state 'i' has been visited + slist<StateId> visited_states_; // List of visited states + vector<Arc> arcs_; // Arcs of state being expanded + Weight final_; // Final weight of state being expanded + StateId expand_id_; // Unique ID for each call to Expand + + DISALLOW_COPY_AND_ASSIGN(RmEpsilonState); +}; + +template <class Arc, class Queue> +const size_t RmEpsilonState<Arc, Queue>::kPrime0; +template <class Arc, class Queue> +const size_t RmEpsilonState<Arc, Queue>::kPrime1; + + +template <class Arc, class Queue> +void RmEpsilonState<Arc,Queue>::Expand(typename Arc::StateId source) { + final_ = Weight::Zero(); + arcs_.clear(); + sd_state_.ShortestDistance(source); + if (sd_state_.Error()) + return; + eps_queue_.push(source); + + while (!eps_queue_.empty()) { + StateId state = eps_queue_.top(); + eps_queue_.pop(); + + while (visited_.size() <= state) visited_.push_back(false); + if (visited_[state]) continue; + visited_[state] = true; + visited_states_.push_front(state); + + for (ArcIterator< Fst<Arc> > ait(fst_, state); + !ait.Done(); + ait.Next()) { + Arc arc = ait.Value(); + arc.weight = Times((*distance_)[state], arc.weight); + + if (eps_filter_(arc)) { + while (visited_.size() <= arc.nextstate) + visited_.push_back(false); + if (!visited_[arc.nextstate]) + eps_queue_.push(arc.nextstate); + } else { + Element element(arc.ilabel, arc.olabel, arc.nextstate); + typename ElementMap::iterator it = element_map_.find(element); + if (it == element_map_.end()) { + element_map_.insert( + pair<Element, pair<StateId, size_t> > + (element, pair<StateId, size_t>(expand_id_, arcs_.size()))); + arcs_.push_back(arc); + } else { + if (((*it).second).first == expand_id_) { + Weight &w = arcs_[((*it).second).second].weight; + w = Plus(w, arc.weight); + } else { + ((*it).second).first = expand_id_; + ((*it).second).second = arcs_.size(); + arcs_.push_back(arc); + } + } + } + } + final_ = Plus(final_, Times((*distance_)[state], fst_.Final(state))); + } + + while (!visited_states_.empty()) { + visited_[visited_states_.front()] = false; + visited_states_.pop_front(); + } + ++expand_id_; +} + +// Removes epsilon-transitions (when both the input and output label +// are an epsilon) from a transducer. The result will be an equivalent +// FST that has no such epsilon transitions. This version modifies +// its input. It allows fine control via the options argument; see +// below for a simpler interface. +// +// The vector 'distance' will be used to hold the shortest distances +// during the epsilon-closure computation. The state queue discipline +// and convergence delta are taken in the options argument. +template <class Arc, class Queue> +void RmEpsilon(MutableFst<Arc> *fst, + vector<typename Arc::Weight> *distance, + const RmEpsilonOptions<Arc, Queue> &opts) { + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + typedef typename Arc::Label Label; + + if (fst->Start() == kNoStateId) { + return; + } + + // 'noneps_in[s]' will be set to true iff 's' admits a non-epsilon + // incoming transition or is the start state. + vector<bool> noneps_in(fst->NumStates(), false); + noneps_in[fst->Start()] = true; + for (StateId i = 0; i < fst->NumStates(); ++i) { + for (ArcIterator<Fst<Arc> > aiter(*fst, i); + !aiter.Done(); + aiter.Next()) { + if (aiter.Value().ilabel != 0 || aiter.Value().olabel != 0) + noneps_in[aiter.Value().nextstate] = true; + } + } + + // States sorted in topological order when (acyclic) or generic + // topological order (cyclic). + vector<StateId> states; + states.reserve(fst->NumStates()); + + if (fst->Properties(kTopSorted, false) & kTopSorted) { + for (StateId i = 0; i < fst->NumStates(); i++) + states.push_back(i); + } else if (fst->Properties(kAcyclic, false) & kAcyclic) { + vector<StateId> order; + bool acyclic; + TopOrderVisitor<Arc> top_order_visitor(&order, &acyclic); + DfsVisit(*fst, &top_order_visitor, EpsilonArcFilter<Arc>()); + // Sanity check: should be acyclic if property bit is set. + if(!acyclic) { + FSTERROR() << "RmEpsilon: inconsistent acyclic property bit"; + fst->SetProperties(kError, kError); + return; + } + states.resize(order.size()); + for (StateId i = 0; i < order.size(); i++) + states[order[i]] = i; + } else { + uint64 props; + vector<StateId> scc; + SccVisitor<Arc> scc_visitor(&scc, 0, 0, &props); + DfsVisit(*fst, &scc_visitor, EpsilonArcFilter<Arc>()); + vector<StateId> first(scc.size(), kNoStateId); + vector<StateId> next(scc.size(), kNoStateId); + for (StateId i = 0; i < scc.size(); i++) { + if (first[scc[i]] != kNoStateId) + next[i] = first[scc[i]]; + first[scc[i]] = i; + } + for (StateId i = 0; i < first.size(); i++) + for (StateId j = first[i]; j != kNoStateId; j = next[j]) + states.push_back(j); + } + + RmEpsilonState<Arc, Queue> + rmeps_state(*fst, distance, opts); + + while (!states.empty()) { + StateId state = states.back(); + states.pop_back(); + if (!noneps_in[state]) + continue; + rmeps_state.Expand(state); + fst->SetFinal(state, rmeps_state.Final()); + fst->DeleteArcs(state); + vector<Arc> &arcs = rmeps_state.Arcs(); + fst->ReserveArcs(state, arcs.size()); + while (!arcs.empty()) { + fst->AddArc(state, arcs.back()); + arcs.pop_back(); + } + } + + for (StateId s = 0; s < fst->NumStates(); ++s) { + if (!noneps_in[s]) + fst->DeleteArcs(s); + } + + if(rmeps_state.Error()) + fst->SetProperties(kError, kError); + fst->SetProperties( + RmEpsilonProperties(fst->Properties(kFstProperties, false)), + kFstProperties); + + if (opts.weight_threshold != Weight::Zero() || + opts.state_threshold != kNoStateId) + Prune(fst, opts.weight_threshold, opts.state_threshold); + if (opts.connect && (opts.weight_threshold == Weight::Zero() || + opts.state_threshold != kNoStateId)) + Connect(fst); +} + +// Removes epsilon-transitions (when both the input and output label +// are an epsilon) from a transducer. The result will be an equivalent +// FST that has no such epsilon transitions. This version modifies its +// input. It has a simplified interface; see above for a version that +// allows finer control. +// +// Complexity: +// - Time: +// - Unweighted: O(V2 + V E) +// - Acyclic: O(V2 + V E) +// - Tropical semiring: O(V2 log V + V E) +// - General: exponential +// - Space: O(V E) +// where V = # of states visited, E = # of arcs. +// +// References: +// - Mehryar Mohri. Generic Epsilon-Removal and Input +// Epsilon-Normalization Algorithms for Weighted Transducers, +// "International Journal of Computer Science", 13(1):129-143 (2002). +template <class Arc> +void RmEpsilon(MutableFst<Arc> *fst, + bool connect = true, + typename Arc::Weight weight_threshold = Arc::Weight::Zero(), + typename Arc::StateId state_threshold = kNoStateId, + float delta = kDelta) { + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + typedef typename Arc::Label Label; + + vector<Weight> distance; + AutoQueue<StateId> state_queue(*fst, &distance, EpsilonArcFilter<Arc>()); + RmEpsilonOptions<Arc, AutoQueue<StateId> > + opts(&state_queue, delta, connect, weight_threshold, state_threshold); + + RmEpsilon(fst, &distance, opts); +} + + +struct RmEpsilonFstOptions : CacheOptions { + float delta; + + RmEpsilonFstOptions(const CacheOptions &opts, float delta = kDelta) + : CacheOptions(opts), delta(delta) {} + + explicit RmEpsilonFstOptions(float delta = kDelta) : delta(delta) {} +}; + + +// Implementation of delayed RmEpsilonFst. +template <class A> +class RmEpsilonFstImpl : public CacheImpl<A> { + public: + using FstImpl<A>::SetType; + using FstImpl<A>::SetProperties; + using FstImpl<A>::SetInputSymbols; + using FstImpl<A>::SetOutputSymbols; + + using CacheBaseImpl< CacheState<A> >::PushArc; + using CacheBaseImpl< CacheState<A> >::HasArcs; + using CacheBaseImpl< CacheState<A> >::HasFinal; + using CacheBaseImpl< CacheState<A> >::HasStart; + using CacheBaseImpl< CacheState<A> >::SetArcs; + using CacheBaseImpl< CacheState<A> >::SetFinal; + using CacheBaseImpl< CacheState<A> >::SetStart; + + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef CacheState<A> State; + + RmEpsilonFstImpl(const Fst<A>& fst, const RmEpsilonFstOptions &opts) + : CacheImpl<A>(opts), + fst_(fst.Copy()), + delta_(opts.delta), + rmeps_state_( + *fst_, + &distance_, + RmEpsilonOptions<A, FifoQueue<StateId> >(&queue_, delta_, false)) { + SetType("rmepsilon"); + uint64 props = fst.Properties(kFstProperties, false); + SetProperties(RmEpsilonProperties(props, true), kCopyProperties); + SetInputSymbols(fst.InputSymbols()); + SetOutputSymbols(fst.OutputSymbols()); + } + + RmEpsilonFstImpl(const RmEpsilonFstImpl &impl) + : CacheImpl<A>(impl), + fst_(impl.fst_->Copy(true)), + delta_(impl.delta_), + rmeps_state_( + *fst_, + &distance_, + RmEpsilonOptions<A, FifoQueue<StateId> >(&queue_, delta_, false)) { + SetType("rmepsilon"); + SetProperties(impl.Properties(), kCopyProperties); + SetInputSymbols(impl.InputSymbols()); + SetOutputSymbols(impl.OutputSymbols()); + } + + ~RmEpsilonFstImpl() { + delete fst_; + } + + StateId Start() { + if (!HasStart()) { + SetStart(fst_->Start()); + } + return CacheImpl<A>::Start(); + } + + Weight Final(StateId s) { + if (!HasFinal(s)) { + Expand(s); + } + return CacheImpl<A>::Final(s); + } + + size_t NumArcs(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<A>::NumArcs(s); + } + + size_t NumInputEpsilons(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<A>::NumInputEpsilons(s); + } + + size_t NumOutputEpsilons(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<A>::NumOutputEpsilons(s); + } + + uint64 Properties() const { return Properties(kFstProperties); } + + // Set error if found; return FST impl properties. + uint64 Properties(uint64 mask) const { + if ((mask & kError) && + (fst_->Properties(kError, false) || rmeps_state_.Error())) + SetProperties(kError, kError); + return FstImpl<A>::Properties(mask); + } + + void InitArcIterator(StateId s, ArcIteratorData<A> *data) { + if (!HasArcs(s)) + Expand(s); + CacheImpl<A>::InitArcIterator(s, data); + } + + void Expand(StateId s) { + rmeps_state_.Expand(s); + SetFinal(s, rmeps_state_.Final()); + vector<A> &arcs = rmeps_state_.Arcs(); + while (!arcs.empty()) { + PushArc(s, arcs.back()); + arcs.pop_back(); + } + SetArcs(s); + } + + private: + const Fst<A> *fst_; + float delta_; + vector<Weight> distance_; + FifoQueue<StateId> queue_; + RmEpsilonState<A, FifoQueue<StateId> > rmeps_state_; + + void operator=(const RmEpsilonFstImpl<A> &); // disallow +}; + + +// Removes epsilon-transitions (when both the input and output label +// are an epsilon) from a transducer. The result will be an equivalent +// FST that has no such epsilon transitions. This version is a +// delayed Fst. +// +// Complexity: +// - Time: +// - Unweighted: O(v^2 + v e) +// - General: exponential +// - Space: O(v e) +// where v = # of states visited, e = # of arcs visited. Constant time +// to visit an input state or arc is assumed and exclusive of caching. +// +// References: +// - Mehryar Mohri. Generic Epsilon-Removal and Input +// Epsilon-Normalization Algorithms for Weighted Transducers, +// "International Journal of Computer Science", 13(1):129-143 (2002). +// +// This class attaches interface to implementation and handles +// reference counting, delegating most methods to ImplToFst. +template <class A> +class RmEpsilonFst : public ImplToFst< RmEpsilonFstImpl<A> > { + public: + friend class ArcIterator< RmEpsilonFst<A> >; + friend class StateIterator< RmEpsilonFst<A> >; + + typedef A Arc; + typedef typename A::StateId StateId; + typedef CacheState<A> State; + typedef RmEpsilonFstImpl<A> Impl; + + RmEpsilonFst(const Fst<A> &fst) + : ImplToFst<Impl>(new Impl(fst, RmEpsilonFstOptions())) {} + + RmEpsilonFst(const Fst<A> &fst, const RmEpsilonFstOptions &opts) + : ImplToFst<Impl>(new Impl(fst, opts)) {} + + // See Fst<>::Copy() for doc. + RmEpsilonFst(const RmEpsilonFst<A> &fst, bool safe = false) + : ImplToFst<Impl>(fst, safe) {} + + // Get a copy of this RmEpsilonFst. See Fst<>::Copy() for further doc. + virtual RmEpsilonFst<A> *Copy(bool safe = false) const { + return new RmEpsilonFst<A>(*this, safe); + } + + virtual inline void InitStateIterator(StateIteratorData<A> *data) const; + + virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { + GetImpl()->InitArcIterator(s, data); + } + + private: + // Makes visible to friends. + Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } + + void operator=(const RmEpsilonFst<A> &fst); // disallow +}; + +// Specialization for RmEpsilonFst. +template<class A> +class StateIterator< RmEpsilonFst<A> > + : public CacheStateIterator< RmEpsilonFst<A> > { + public: + explicit StateIterator(const RmEpsilonFst<A> &fst) + : CacheStateIterator< RmEpsilonFst<A> >(fst, fst.GetImpl()) {} +}; + + +// Specialization for RmEpsilonFst. +template <class A> +class ArcIterator< RmEpsilonFst<A> > + : public CacheArcIterator< RmEpsilonFst<A> > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const RmEpsilonFst<A> &fst, StateId s) + : CacheArcIterator< RmEpsilonFst<A> >(fst.GetImpl(), s) { + if (!fst.GetImpl()->HasArcs(s)) + fst.GetImpl()->Expand(s); + } + + private: + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + + +template <class A> inline +void RmEpsilonFst<A>::InitStateIterator(StateIteratorData<A> *data) const { + data->base = new StateIterator< RmEpsilonFst<A> >(*this); +} + + +// Useful alias when using StdArc. +typedef RmEpsilonFst<StdArc> StdRmEpsilonFst; + +} // namespace fst + +#endif // FST_LIB_RMEPSILON_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/rmfinalepsilon.h b/kaldi_io/src/tools/openfst/include/fst/rmfinalepsilon.h new file mode 100644 index 0000000..eb0f937 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/rmfinalepsilon.h @@ -0,0 +1,107 @@ +// rmfinalepsilon.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Johan Schalkwyk) +// +// \file +// Function to remove of final states that have epsilon only input arcs. + +#ifndef FST_LIB_RMFINALEPSILON_H__ +#define FST_LIB_RMFINALEPSILON_H__ + +#include <tr1/unordered_set> +using std::tr1::unordered_set; +using std::tr1::unordered_multiset; +#include <vector> +using std::vector; + +#include <fst/connect.h> +#include <fst/mutable-fst.h> + + +namespace fst { + +template<class A> +void RmFinalEpsilon(MutableFst<A>* fst) { + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + // Determine the coaccesibility of states. + vector<bool> access; + vector<bool> coaccess; + uint64 props = 0; + SccVisitor<A> scc_visitor(0, &access, &coaccess, &props); + DfsVisit(*fst, &scc_visitor); + + // Find potential list of removable final states. These are final states + // that have no outgoing transitions or final states that have a + // non-coaccessible future. Complexity O(S) + unordered_set<StateId> finals; + for (StateIterator<Fst<A> > siter(*fst); !siter.Done(); siter.Next()) { + StateId s = siter.Value(); + if (fst->Final(s) != Weight::Zero()) { + bool future_coaccess = false; + for (ArcIterator<Fst<A> > aiter(*fst, s); !aiter.Done(); aiter.Next()) { + const A& arc = aiter.Value(); + if (coaccess[arc.nextstate]) { + future_coaccess = true; + break; + } + } + if (!future_coaccess) { + finals.insert(s); + } + } + } + + // Move the final weight. Complexity O(E) + vector<A> arcs; + for (StateIterator<Fst<A> > siter(*fst); !siter.Done(); siter.Next()) { + StateId s = siter.Value(); + Weight w(fst->Final(s)); + + arcs.clear(); + for (ArcIterator<Fst<A> > aiter(*fst, s); !aiter.Done(); aiter.Next()) { + const A& arc = aiter.Value(); + // is next state in the list of finals + if (finals.find(arc.nextstate) != finals.end()) { + // sum up all epsilon arcs + if (arc.ilabel == 0 && arc.olabel == 0) { + w = Plus(Times(fst->Final(arc.nextstate), arc.weight), w); + } else { + arcs.push_back(arc); + } + } else { + arcs.push_back(arc); + } + } + + // If some arcs (epsilon arcs) were deleted, delete all + // arcs and add back only the non epsilon arcs + if (arcs.size() < fst->NumArcs(s)) { + fst->DeleteArcs(s); + fst->SetFinal(s, w); + for (size_t i = 0; i < arcs.size(); ++i) { + fst->AddArc(s, arcs[i]); + } + } + } + + Connect(fst); +} + +} // namespace fst + +#endif // FST_LIB_RMFINALEPSILON_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/arcsort.h b/kaldi_io/src/tools/openfst/include/fst/script/arcsort.h new file mode 100644 index 0000000..4277332 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/arcsort.h @@ -0,0 +1,49 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_ARCSORT_H_ +#define FST_SCRIPT_ARCSORT_H_ + +#include <fst/arcsort.h> +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> + +namespace fst { +namespace script { + +enum ArcSortType { ILABEL_COMPARE, OLABEL_COMPARE }; + +typedef args::Package<MutableFstClass*, const ArcSortType> ArcSortArgs; + +template<class Arc> +void ArcSort(ArcSortArgs *args) { + MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>(); + + if (args->arg2 == ILABEL_COMPARE) { + ILabelCompare<Arc> icomp; + ArcSort(fst, icomp); + } else { // OLABEL_COMPARE + OLabelCompare<Arc> ocomp; + ArcSort(fst, ocomp); + } +} + +void ArcSort(MutableFstClass *ofst, ArcSortType sort_type); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_ARCSORT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/arg-packs.h b/kaldi_io/src/tools/openfst/include/fst/script/arg-packs.h new file mode 100644 index 0000000..8ebf8d8 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/arg-packs.h @@ -0,0 +1,240 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +// Convenience templates for defining arg packs for the FstClass operations. + +// See operation-templates.h for a discussion about why these are needed; the +// short story is that all FstClass operations must be implemented by a version +// that takes one argument, most likely a struct bundling all the +// logical arguments together. These template structs provide convenient ways +// to specify these bundles (e.g. by means of appropriate typedefs). + +// The ArgPack template is sufficient for bundling together all the args for +// a particular function. The function is assumed to be void-returning. If +// you want a space for a return value, use the WithReturnValue template +// as follows: + +// WithReturnValue<bool, ArgPack<...> > + +#ifndef FST_SCRIPT_ARG_PACKS_H_ +#define FST_SCRIPT_ARG_PACKS_H_ + +namespace fst { +namespace script { +namespace args { + +// Sentinel value that means "no arg here." +class none_type { }; + +// Base arg pack template class. Specializations follow that allow +// fewer numbers of arguments (down to 2). If the maximum number of arguments +// increases, you will need to change three things: +// 1) Add more template parameters to this template +// 2) Add more specializations to allow fewer numbers of parameters than +// the new max. +// 3) Add extra none_types to all existing specializations to fill +// the new slots. + + +// 9 args (max) +template<class T1, + class T2 = none_type, + class T3 = none_type, + class T4 = none_type, + class T5 = none_type, + class T6 = none_type, + class T7 = none_type, + class T8 = none_type, + class T9 = none_type> +struct Package { + T1 arg1; + T2 arg2; + T3 arg3; + T4 arg4; + T5 arg5; + T6 arg6; + T7 arg7; + T8 arg8; + T9 arg9; + + Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, + T7 arg7, T8 arg8, T9 arg9) : + arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5), + arg6(arg6), arg7(arg7), arg8(arg8), arg9(arg9) { } +}; + +// 8 args +template<class T1, + class T2, + class T3, + class T4, + class T5, + class T6, + class T7, + class T8> +struct Package<T1, T2, T3, T4, T5, T6, T7, T8, none_type> { + T1 arg1; + T2 arg2; + T3 arg3; + T4 arg4; + T5 arg5; + T6 arg6; + T7 arg7; + T8 arg8; + + Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, + T7 arg7, T8 arg8) : + arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5), + arg6(arg6), arg7(arg7), arg8(arg8) { } +}; + +// 7 args +template<class T1, + class T2, + class T3, + class T4, + class T5, + class T6, + class T7> +struct Package<T1, T2, T3, T4, T5, T6, T7, + none_type, none_type> { + T1 arg1; + T2 arg2; + T3 arg3; + T4 arg4; + T5 arg5; + T6 arg6; + T7 arg7; + + Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, + T7 arg7) : + arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5), + arg6(arg6), arg7(arg7) { } +}; + +// 6 args +template<class T1, + class T2, + class T3, + class T4, + class T5, + class T6> +struct Package<T1, T2, T3, T4, T5, T6, none_type, + none_type, none_type> { + T1 arg1; + T2 arg2; + T3 arg3; + T4 arg4; + T5 arg5; + T6 arg6; + + Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6) : + arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5), + arg6(arg6) { } +}; + +// 5 args +template<class T1, + class T2, + class T3, + class T4, + class T5> +struct Package<T1, T2, T3, T4, T5, none_type, none_type, + none_type, none_type> { + T1 arg1; + T2 arg2; + T3 arg3; + T4 arg4; + T5 arg5; + + Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5) : + arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5) { } +}; + +// 4 args +template<class T1, + class T2, + class T3, + class T4> +struct Package<T1, T2, T3, T4, none_type, none_type, + none_type, none_type, none_type> { + T1 arg1; + T2 arg2; + T3 arg3; + T4 arg4; + + Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4) : + arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4) { } +}; + +// 3 args +template<class T1, + class T2, + class T3> +struct Package<T1, T2, T3, none_type, none_type, + none_type, none_type, none_type, + none_type> { + T1 arg1; + T2 arg2; + T3 arg3; + + Package(T1 arg1, T2 arg2, T3 arg3) : + arg1(arg1), arg2(arg2), arg3(arg3) { } +}; + +// 2 args (minimum) +template<class T1, + class T2> +struct Package<T1, T2, none_type, none_type, + none_type, none_type, none_type, + none_type, none_type> { + T1 arg1; + T2 arg2; + + Package(T1 arg1, T2 arg2) : + arg1(arg1), arg2(arg2) { } +}; + +// Tack this on to an existing arg pack to add a return value. +// The syntax for accessing the args is then slightly more stilted, +// as you must do an extra member access (since the args are stored +// as a member of this class). +// The alternative is to declare another slew of templates for functions +// that return a value, analogous to the above. + +template<class Retval, class ArgPackage> +struct WithReturnValue { + Retval retval; + const ArgPackage &args; + + explicit WithReturnValue(const ArgPackage &args) : args(args) { } +}; + +// We don't want to store a reference to a reference, if ArgPackage is +// already some reference type. +template<class Retval, class ArgPackage> +struct WithReturnValue<Retval, ArgPackage&> { + Retval retval; + const ArgPackage &args; + + explicit WithReturnValue(const ArgPackage &args) : args(args) { } +}; + +} // namespace args +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_ARG_PACKS_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/closure.h b/kaldi_io/src/tools/openfst/include/fst/script/closure.h new file mode 100644 index 0000000..93b5ec3 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/closure.h @@ -0,0 +1,41 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_CLOSURE_H_ +#define FST_SCRIPT_CLOSURE_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/closure.h> + +namespace fst { +namespace script { + +typedef args::Package<MutableFstClass*, const ClosureType> ClosureArgs; + +template<class Arc> +void Closure(ClosureArgs *args) { + MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>(); + + Closure(fst, args->arg2); +} + +void Closure(MutableFstClass *ofst, ClosureType closure_type); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_CLOSURE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/compile-impl.h b/kaldi_io/src/tools/openfst/include/fst/script/compile-impl.h new file mode 100644 index 0000000..68f37c3 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/compile-impl.h @@ -0,0 +1,216 @@ +// compile.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Class to to compile a binary Fst from textual input. + +#ifndef FST_SCRIPT_COMPILE_IMPL_H_ +#define FST_SCRIPT_COMPILE_IMPL_H_ + +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include <sstream> +#include <string> +#include <vector> +using std::vector; + +#include <iostream> +#include <fstream> +#include <sstream> +#include <fst/fst.h> +#include <fst/util.h> +#include <fst/vector-fst.h> + +DECLARE_string(fst_field_separator); + +namespace fst { + +// Compile a binary Fst from textual input, helper class for fstcompile.cc +// WARNING: Stand-alone use of this class not recommended, most code should +// read/write using the binary format which is much more efficient. +template <class A> class FstCompiler { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + + // WARNING: use of 'allow_negative_labels = true' not recommended; may + // cause conflicts + FstCompiler(istream &istrm, const string &source, + const SymbolTable *isyms, const SymbolTable *osyms, + const SymbolTable *ssyms, bool accep, bool ikeep, + bool okeep, bool nkeep, bool allow_negative_labels = false) + : nline_(0), source_(source), + isyms_(isyms), osyms_(osyms), ssyms_(ssyms), + nstates_(0), keep_state_numbering_(nkeep), + allow_negative_labels_(allow_negative_labels) { + char line[kLineLen]; + while (istrm.getline(line, kLineLen)) { + ++nline_; + vector<char *> col; + string separator = FLAGS_fst_field_separator + "\n"; + SplitToVector(line, separator.c_str(), &col, true); + if (col.size() == 0 || col[0][0] == '\0') // empty line + continue; + if (col.size() > 5 || + (col.size() > 4 && accep) || + (col.size() == 3 && !accep)) { + FSTERROR() << "FstCompiler: Bad number of columns, source = " + << source_ + << ", line = " << nline_; + fst_.SetProperties(kError, kError); + return; + } + StateId s = StrToStateId(col[0]); + while (s >= fst_.NumStates()) + fst_.AddState(); + if (nline_ == 1) + fst_.SetStart(s); + + Arc arc; + StateId d = s; + switch (col.size()) { + case 1: + fst_.SetFinal(s, Weight::One()); + break; + case 2: + fst_.SetFinal(s, StrToWeight(col[1], true)); + break; + case 3: + arc.nextstate = d = StrToStateId(col[1]); + arc.ilabel = StrToILabel(col[2]); + arc.olabel = arc.ilabel; + arc.weight = Weight::One(); + fst_.AddArc(s, arc); + break; + case 4: + arc.nextstate = d = StrToStateId(col[1]); + arc.ilabel = StrToILabel(col[2]); + if (accep) { + arc.olabel = arc.ilabel; + arc.weight = StrToWeight(col[3], false); + } else { + arc.olabel = StrToOLabel(col[3]); + arc.weight = Weight::One(); + } + fst_.AddArc(s, arc); + break; + case 5: + arc.nextstate = d = StrToStateId(col[1]); + arc.ilabel = StrToILabel(col[2]); + arc.olabel = StrToOLabel(col[3]); + arc.weight = StrToWeight(col[4], false); + fst_.AddArc(s, arc); + } + while (d >= fst_.NumStates()) + fst_.AddState(); + } + if (ikeep) + fst_.SetInputSymbols(isyms); + if (okeep) + fst_.SetOutputSymbols(osyms); + } + + const VectorFst<A> &Fst() const { + return fst_; + } + + private: + // Maximum line length in text file. + static const int kLineLen = 8096; + + int64 StrToId(const char *s, const SymbolTable *syms, + const char *name, bool allow_negative = false) const { + int64 n = 0; + + if (syms) { + n = syms->Find(s); + if (n == -1 || (!allow_negative && n < 0)) { + FSTERROR() << "FstCompiler: Symbol \"" << s + << "\" is not mapped to any integer " << name + << ", symbol table = " << syms->Name() + << ", source = " << source_ << ", line = " << nline_; + fst_.SetProperties(kError, kError); + } + } else { + char *p; + n = strtoll(s, &p, 10); + if (p < s + strlen(s) || (!allow_negative && n < 0)) { + FSTERROR() << "FstCompiler: Bad " << name << " integer = \"" << s + << "\", source = " << source_ << ", line = " << nline_; + fst_.SetProperties(kError, kError); + } + } + return n; + } + + StateId StrToStateId(const char *s) { + StateId n = StrToId(s, ssyms_, "state ID"); + + if (keep_state_numbering_) + return n; + + // remap state IDs to make dense set + typename unordered_map<StateId, StateId>::const_iterator it = states_.find(n); + if (it == states_.end()) { + states_[n] = nstates_; + return nstates_++; + } else { + return it->second; + } + } + + StateId StrToILabel(const char *s) const { + return StrToId(s, isyms_, "arc ilabel", allow_negative_labels_); + } + + StateId StrToOLabel(const char *s) const { + return StrToId(s, osyms_, "arc olabel", allow_negative_labels_); + } + + Weight StrToWeight(const char *s, bool allow_zero) const { + Weight w; + istringstream strm(s); + strm >> w; + if (!strm || (!allow_zero && w == Weight::Zero())) { + FSTERROR() << "FstCompiler: Bad weight = \"" << s + << "\", source = " << source_ << ", line = " << nline_; + fst_.SetProperties(kError, kError); + w = Weight::NoWeight(); + } + return w; + } + + mutable VectorFst<A> fst_; + size_t nline_; + string source_; // text FST source name + const SymbolTable *isyms_; // ilabel symbol table + const SymbolTable *osyms_; // olabel symbol table + const SymbolTable *ssyms_; // slabel symbol table + unordered_map<StateId, StateId> states_; // state ID map + StateId nstates_; // number of seen states + bool keep_state_numbering_; + bool allow_negative_labels_; // not recommended; may cause conflicts + + DISALLOW_COPY_AND_ASSIGN(FstCompiler); +}; + +} // namespace fst + +#endif // FST_SCRIPT_COMPILE_IMPL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/compile.h b/kaldi_io/src/tools/openfst/include/fst/script/compile.h new file mode 100644 index 0000000..bb6ea56 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/compile.h @@ -0,0 +1,92 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_COMPILE_H_ +#define FST_SCRIPT_COMPILE_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/script/compile-impl.h> + +namespace fst { +namespace script { + +// Note: it is safe to pass these strings as references because +// this struct is only used to pass them deeper in the call graph. +// Be sure you understand why this is so before using this struct +// for anything else! +struct FstCompileArgs { + fst::istream &istrm; + const string &source; + const string &dest; + const string &fst_type; + const fst::SymbolTable *isyms; + const fst::SymbolTable *osyms; + const fst::SymbolTable *ssyms; + const bool accep; + const bool ikeep; + const bool okeep; + const bool nkeep; + const bool allow_negative_labels; + + FstCompileArgs(istream &istrm, const string &source, const string &dest, + const string &fst_type, const fst::SymbolTable *isyms, + const fst::SymbolTable *osyms, + const fst::SymbolTable *ssyms, + bool accep, bool ikeep, bool okeep, bool nkeep, + bool allow_negative_labels = false) : + istrm(istrm), source(source), dest(dest), fst_type(fst_type), + isyms(isyms), osyms(osyms), ssyms(ssyms), accep(accep), ikeep(ikeep), + okeep(okeep), nkeep(nkeep), + allow_negative_labels(allow_negative_labels) { } +}; + +template<class Arc> +void CompileFst(FstCompileArgs *args) { + using fst::FstCompiler; + using fst::Convert; + using fst::Fst; + + FstCompiler<Arc> fstcompiler(args->istrm, args->source, args->isyms, + args->osyms, args->ssyms, + args->accep, args->ikeep, + args->okeep, args->nkeep, + args->allow_negative_labels); + + const Fst<Arc> *fst = &fstcompiler.Fst(); + if (args->fst_type != "vector") { + fst = Convert<Arc>(*fst, args->fst_type); + if (!fst) { + FSTERROR() << "Failed to convert FST to desired type: " + << args->fst_type; + return; + } + } + + fst->Write(args->dest); +} + +void CompileFst(istream &istrm, const string &source, const string &dest, + const string &fst_type, const string &arc_type, + const SymbolTable *isyms, + const SymbolTable *osyms, const SymbolTable *ssyms, + bool accep, bool ikeep, bool okeep, bool nkeep, + bool allow_negative_labels); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_COMPILE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/compose.h b/kaldi_io/src/tools/openfst/include/fst/script/compose.h new file mode 100644 index 0000000..96375f7 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/compose.h @@ -0,0 +1,63 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_COMPOSE_H_ +#define FST_SCRIPT_COMPOSE_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/compose.h> + +namespace fst { +namespace script { + +typedef args::Package<const FstClass&, const FstClass&, + MutableFstClass*, ComposeFilter> ComposeArgs1; + +template<class Arc> +void Compose(ComposeArgs1 *args) { + const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>()); + const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>(); + + Compose(ifst1, ifst2, ofst, args->arg4); +} + +typedef fst::ComposeOptions ComposeOptions; + +typedef args::Package<const FstClass&, const FstClass&, + MutableFstClass*, const ComposeOptions &> ComposeArgs2; + +template<class Arc> +void Compose(ComposeArgs2 *args) { + const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>()); + const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>(); + + Compose(ifst1, ifst2, ofst, args->arg4); +} + +void Compose(const FstClass &ifst1, const FstClass &ifst2, + MutableFstClass *ofst, + const ComposeOptions &opts = fst::script::ComposeOptions()); + +void Compose(const FstClass &ifst1, const FstClass &ifst2, + MutableFstClass *ofst, ComposeFilter compose_filter); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_COMPOSE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/concat.h b/kaldi_io/src/tools/openfst/include/fst/script/concat.h new file mode 100644 index 0000000..46c4407 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/concat.h @@ -0,0 +1,54 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_CONCAT_H_ +#define FST_SCRIPT_CONCAT_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/concat.h> + +namespace fst { +namespace script { + +typedef args::Package<MutableFstClass*, const FstClass&> ConcatArgs1; +typedef args::Package<const FstClass&, MutableFstClass*> ConcatArgs2; + +template<class Arc> +void Concat(ConcatArgs1 *args) { + MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>(); + const Fst<Arc> &ifst = *(args->arg2.GetFst<Arc>()); + + Concat(ofst, ifst); +} + +template<class Arc> +void Concat(ConcatArgs2 *args) { + const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); + + Concat(ifst, ofst); +} + +void Concat(MutableFstClass *ofst, const FstClass &ifst); +void Concat(const FstClass &ifst, MutableFstClass *ofst); + +} // namespace script +} // namespace fst + + + +#endif // FST_SCRIPT_CONCAT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/connect.h b/kaldi_io/src/tools/openfst/include/fst/script/connect.h new file mode 100644 index 0000000..19c4390 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/connect.h @@ -0,0 +1,45 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_CONNECT_H_ +#define FST_SCRIPT_CONNECT_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/dfs-visit.h> +#include <fst/connect.h> + +namespace fst { +namespace script { + +// This function confuses SWIG, because both versions have the same args +#ifndef SWIG +template<class Arc> +void Connect(MutableFstClass *fst) { + MutableFst<Arc> *typed_fst = fst->GetMutableFst<Arc>(); + + Connect(typed_fst); +} +#endif + +void Connect(MutableFstClass *fst); + +} // namespace script +} // namespace fst + + + +#endif // FST_SCRIPT_CONNECT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/convert.h b/kaldi_io/src/tools/openfst/include/fst/script/convert.h new file mode 100644 index 0000000..4a3ce6b --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/convert.h @@ -0,0 +1,49 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_CONVERT_H_ +#define FST_SCRIPT_CONVERT_H_ + +#include <string> + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> + +namespace fst { +namespace script { + +typedef args::Package<const FstClass&, const string&> ConvertInnerArgs; +typedef args::WithReturnValue<FstClass*, ConvertInnerArgs> ConvertArgs; + +template<class Arc> +void Convert(ConvertArgs *args) { + const Fst<Arc> &fst = *(args->args.arg1.GetFst<Arc>()); + const string &new_type = args->args.arg2; + + Fst<Arc> *result = Convert(fst, new_type); + args->retval = new FstClass(*result); + delete result; +} + +#ifdef SWIG +%newobject Convert; +#endif +FstClass *Convert(const FstClass& f, const string &new_type); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_CONVERT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/decode.h b/kaldi_io/src/tools/openfst/include/fst/script/decode.h new file mode 100644 index 0000000..1064ad5 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/decode.h @@ -0,0 +1,46 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_DECODE_H_ +#define FST_SCRIPT_DECODE_H_ + +#include <string> + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/encode.h> + +namespace fst { +namespace script { + +typedef args::Package<MutableFstClass*, const string&> DecodeArgs; + +template<class Arc> +void Decode(DecodeArgs *args) { + MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>(); + + EncodeMapper<Arc> *decoder = EncodeMapper<Arc>::Read(args->arg2, DECODE); + Decode(ofst, *decoder); + + delete decoder; +} + +void Decode(MutableFstClass *fst, const string &coder_fname); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_DECODE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/determinize.h b/kaldi_io/src/tools/openfst/include/fst/script/determinize.h new file mode 100644 index 0000000..38fd7ad --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/determinize.h @@ -0,0 +1,68 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_DETERMINIZE_H_ +#define FST_SCRIPT_DETERMINIZE_H_ + +#include <fst/determinize.h> +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/script/weight-class.h> + +namespace fst { +namespace script { + +struct DeterminizeOptions { + float delta; + WeightClass weight_threshold; + int64 state_threshold; + int64 subsequential_label; + + explicit DeterminizeOptions(float d = fst::kDelta, + WeightClass w = + fst::script::WeightClass::Zero(), + int64 n = fst::kNoStateId, int64 l = 0) + : delta(d), weight_threshold(w), state_threshold(n), + subsequential_label(l) {} +}; + +typedef args::Package<const FstClass&, MutableFstClass*, + const DeterminizeOptions &> DeterminizeArgs; + +template<class Arc> +void Determinize(DeterminizeArgs *args) { + const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); + const DeterminizeOptions &opts = args->arg3; + + fst::DeterminizeOptions<Arc> detargs; + detargs.delta = opts.delta; + detargs.weight_threshold = + *(opts.weight_threshold.GetWeight<typename Arc::Weight>()); + detargs.state_threshold = opts.state_threshold; + detargs.subsequential_label = opts.subsequential_label; + + Determinize(ifst, ofst, detargs); +} + +void Determinize(const FstClass &ifst, MutableFstClass *ofst, + const DeterminizeOptions &opts = + fst::script::DeterminizeOptions()); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_DETERMINIZE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/difference.h b/kaldi_io/src/tools/openfst/include/fst/script/difference.h new file mode 100644 index 0000000..76490d4 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/difference.h @@ -0,0 +1,67 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_DIFFERENCE_H_ +#define FST_SCRIPT_DIFFERENCE_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/script/compose.h> // for ComposeFilter +#include <fst/difference.h> + +namespace fst { +namespace script { + +typedef args::Package<const FstClass&, const FstClass&, + MutableFstClass*, ComposeFilter> DifferenceArgs1; + +template<class Arc> +void Difference(DifferenceArgs1 *args) { + const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>()); + const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>(); + + Difference(ifst1, ifst2, ofst, args->arg4); +} + +typedef args::Package<const FstClass&, const FstClass&, + MutableFstClass*, const ComposeOptions &> DifferenceArgs2; + +template<class Arc> +void Difference(DifferenceArgs2 *args) { + const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>()); + const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>(); + + Difference(ifst1, ifst2, ofst, args->arg4); +} + + +void Difference(const FstClass &ifst1, const FstClass &ifst2, + MutableFstClass *ofst, + ComposeFilter compose_filter); + +void Difference(const FstClass &ifst1, const FstClass &ifst2, + MutableFstClass *ofst, + const ComposeOptions &opts = fst::script::ComposeOptions()); + + +} // namespace script +} // namespace fst + + + +#endif // FST_SCRIPT_DIFFERENCE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/disambiguate.h b/kaldi_io/src/tools/openfst/include/fst/script/disambiguate.h new file mode 100644 index 0000000..e42a9c2 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/disambiguate.h @@ -0,0 +1,68 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_DISAMBIGUATE_H_ +#define FST_SCRIPT_DISAMBIGUATE_H_ + +#include <fst/disambiguate.h> +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/script/weight-class.h> + +namespace fst { +namespace script { + +struct DisambiguateOptions { + float delta; + WeightClass weight_threshold; + int64 state_threshold; + int64 subsequential_label; + + explicit DisambiguateOptions(float d = fst::kDelta, + WeightClass w = + fst::script::WeightClass::Zero(), + int64 n = fst::kNoStateId, int64 l = 0) + : delta(d), weight_threshold(w), state_threshold(n), + subsequential_label(l) {} +}; + +typedef args::Package<const FstClass&, MutableFstClass*, + const DisambiguateOptions &> DisambiguateArgs; + +template<class Arc> +void Disambiguate(DisambiguateArgs *args) { + const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); + const DisambiguateOptions &opts = args->arg3; + + fst::DisambiguateOptions<Arc> detargs; + detargs.delta = opts.delta; + detargs.weight_threshold = + *(opts.weight_threshold.GetWeight<typename Arc::Weight>()); + detargs.state_threshold = opts.state_threshold; + detargs.subsequential_label = opts.subsequential_label; + + Disambiguate(ifst, ofst, detargs); +} + +void Disambiguate(const FstClass &ifst, MutableFstClass *ofst, + const DisambiguateOptions &opts = + fst::script::DisambiguateOptions()); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_DISAMBIGUATE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/draw-impl.h b/kaldi_io/src/tools/openfst/include/fst/script/draw-impl.h new file mode 100644 index 0000000..893e258 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/draw-impl.h @@ -0,0 +1,234 @@ +// draw.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// +// \file +// Class to draw a binary FST by producing a text file in dot format, +// helper class to fstdraw.cc + +#ifndef FST_SCRIPT_DRAW_IMPL_H_ +#define FST_SCRIPT_DRAW_IMPL_H_ + +#include <sstream> +#include <string> + +#include <fst/script/fst-class.h> +#include <fst/fst.h> +#include <fst/util.h> + +namespace fst { + +// Print a binary Fst in the dot textual format, helper class for fstdraw.cc +// WARNING: Stand-alone use not recommend. +template <class A> class FstDrawer { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + + FstDrawer(const Fst<A> &fst, + const SymbolTable *isyms, + const SymbolTable *osyms, + const SymbolTable *ssyms, + bool accep, + string title, + float width, + float height, + bool portrait, + bool vertical, + float ranksep, + float nodesep, + int fontsize, + int precision, + bool show_weight_one) + : fst_(fst), isyms_(isyms), osyms_(osyms), ssyms_(ssyms), + accep_(accep && fst.Properties(kAcceptor, true)), ostrm_(0), + title_(title), width_(width), height_(height), portrait_(portrait), + vertical_(vertical), ranksep_(ranksep), nodesep_(nodesep), + fontsize_(fontsize), precision_(precision), + show_weight_one_(show_weight_one) {} + + // Draw Fst to an output buffer (or stdout if buf = 0) + void Draw(ostream *strm, const string &dest) { + ostrm_ = strm; + dest_ = dest; + StateId start = fst_.Start(); + if (start == kNoStateId) + return; + + PrintString("digraph FST {\n"); + if (vertical_) + PrintString("rankdir = BT;\n"); + else + PrintString("rankdir = LR;\n"); + PrintString("size = \""); + Print(width_); + PrintString(","); + Print(height_); + PrintString("\";\n"); + if (!dest_.empty()) + PrintString("label = \"" + title_ + "\";\n"); + PrintString("center = 1;\n"); + if (portrait_) + PrintString("orientation = Portrait;\n"); + else + PrintString("orientation = Landscape;\n"); + PrintString("ranksep = \""); + Print(ranksep_); + PrintString("\";\n"); + PrintString("nodesep = \""); + Print(nodesep_); + PrintString("\";\n"); + // initial state first + DrawState(start); + for (StateIterator< Fst<A> > siter(fst_); + !siter.Done(); + siter.Next()) { + StateId s = siter.Value(); + if (s != start) + DrawState(s); + } + PrintString("}\n"); + } + + private: + // Maximum line length in text file. + static const int kLineLen = 8096; + + void PrintString(const string &s) const { + *ostrm_ << s; + } + + // Escapes backslash and double quote if these occur in the string. Dot will + // not deal gracefully with these if they are not escaped. + inline void EscapeChars(const string &s, string* ns) const { + const char* c = s.c_str(); + while (*c) { + if (*c == '\\' || *c == '"') ns->push_back('\\'); + ns->push_back(*c); + ++c; + } + } + + void PrintId(int64 id, const SymbolTable *syms, + const char *name) const { + if (syms) { + string symbol = syms->Find(id); + if (symbol == "") { + FSTERROR() << "FstDrawer: Integer " << id + << " is not mapped to any textual symbol" + << ", symbol table = " << syms->Name() + << ", destination = " << dest_; + symbol = "?"; + } + string nsymbol; + EscapeChars(symbol, &nsymbol); + PrintString(nsymbol); + } else { + string idstr; + Int64ToStr(id, &idstr); + PrintString(idstr); + } + } + + void PrintStateId(StateId s) const { + PrintId(s, ssyms_, "state ID"); + } + + void PrintILabel(Label l) const { + PrintId(l, isyms_, "arc input label"); + } + + void PrintOLabel(Label l) const { + PrintId(l, osyms_, "arc output label"); + } + + template <class T> + void Print(T t) const { + *ostrm_ << t; + } + + void DrawState(StateId s) const { + Print(s); + PrintString(" [label = \""); + PrintStateId(s); + Weight final = fst_.Final(s); + if (final != Weight::Zero()) { + if (show_weight_one_ || (final != Weight::One())) { + PrintString("/"); + Print(final); + } + PrintString("\", shape = doublecircle,"); + } else { + PrintString("\", shape = circle,"); + } + if (s == fst_.Start()) + PrintString(" style = bold,"); + else + PrintString(" style = solid,"); + PrintString(" fontsize = "); + Print(fontsize_); + PrintString("]\n"); + for (ArcIterator< Fst<A> > aiter(fst_, s); + !aiter.Done(); + aiter.Next()) { + Arc arc = aiter.Value(); + PrintString("\t"); + Print(s); + PrintString(" -> "); + Print(arc.nextstate); + PrintString(" [label = \""); + PrintILabel(arc.ilabel); + if (!accep_) { + PrintString(":"); + PrintOLabel(arc.olabel); + } + if (show_weight_one_ || (arc.weight != Weight::One())) { + PrintString("/"); + Print(arc.weight); + } + PrintString("\", fontsize = "); + Print(fontsize_); + PrintString("];\n"); + } + } + + const Fst<A> &fst_; + const SymbolTable *isyms_; // ilabel symbol table + const SymbolTable *osyms_; // olabel symbol table + const SymbolTable *ssyms_; // slabel symbol table + bool accep_; // print as acceptor when possible + ostream *ostrm_; // drawn FST destination + string dest_; // drawn FST destination name + + string title_; + float width_; + float height_; + bool portrait_; + bool vertical_; + float ranksep_; + float nodesep_; + int fontsize_; + int precision_; + bool show_weight_one_; + + DISALLOW_COPY_AND_ASSIGN(FstDrawer); +}; + +} // namespace fst + +#endif // FST_SCRIPT_DRAW_IMPL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/draw.h b/kaldi_io/src/tools/openfst/include/fst/script/draw.h new file mode 100644 index 0000000..2b66373 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/draw.h @@ -0,0 +1,114 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_DRAW_H_ +#define FST_SCRIPT_DRAW_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/script/draw-impl.h> +#include <iostream> +#include <fstream> +#include <sstream> + +namespace fst { +namespace script { + +// Note: it is safe to pass these strings as references because +// this struct is only used to pass them deeper in the call graph. +// Be sure you understand why this is so before using this struct +// for anything else! +struct FstDrawerArgs { + const FstClass &fst; + const SymbolTable *isyms; + const SymbolTable *osyms; + const SymbolTable *ssyms; + const bool accep; + const string& title; + const float width; + const float height; + const bool portrait; + const bool vertical; + const float ranksep; + const float nodesep; + const int fontsize; + const int precision; + const bool show_weight_one; + ostream *ostrm; + const string &dest; + + FstDrawerArgs(const FstClass &fst, + const SymbolTable *isyms, + const SymbolTable *osyms, + const SymbolTable *ssyms, + bool accep, + const string &title, + float width, + float height, + bool portrait, + bool vertical, + float ranksep, + float nodesep, + int fontsize, + int precision, + bool show_weight_one, + ostream *ostrm, + const string &dest) : + fst(fst), isyms(isyms), osyms(osyms), ssyms(ssyms), accep(accep), + title(title), width(width), height(height), portrait(portrait), + vertical(vertical), ranksep(ranksep), nodesep(nodesep), + fontsize(fontsize), precision(precision), + show_weight_one(show_weight_one), ostrm(ostrm), dest(dest) { } +}; + + +template<class Arc> +void DrawFst(FstDrawerArgs *args) { + const Fst<Arc> &fst = *(args->fst.GetFst<Arc>()); + + FstDrawer<Arc> fstdrawer(fst, args->isyms, args->osyms, args->ssyms, + args->accep, args->title, args->width, + args->height, args->portrait, + args->vertical, args->ranksep, + args->nodesep, args->fontsize, + args->precision, args->show_weight_one); + fstdrawer.Draw(args->ostrm, args->dest); +} + +void DrawFst(const FstClass &fst, + const SymbolTable *isyms, + const SymbolTable *osyms, + const SymbolTable *ssyms, + bool accep, + const string &title, + float width, + float height, + bool portrait, + bool vertical, + float ranksep, + float nodesep, + int fontsize, + int precision, + bool show_weight_one, + ostream *ostrm, + const string &dest); + +} // namespace script +} // namespace fst + + + +#endif // FST_SCRIPT_DRAW_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/encode.h b/kaldi_io/src/tools/openfst/include/fst/script/encode.h new file mode 100644 index 0000000..dc1a290 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/encode.h @@ -0,0 +1,58 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_ENCODE_H_ +#define FST_SCRIPT_ENCODE_H_ + +#include <string> + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/encode.h> + +namespace fst { +namespace script { + +typedef args::Package<MutableFstClass*, uint32, bool, + const string &> EncodeArgs; + +template<class Arc> +void Encode(EncodeArgs *args) { + MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>(); + bool reuse_encoder = args->arg3; + const string &coder_fname = args->arg4; + uint32 flags = args->arg2; + + EncodeMapper<Arc> *encoder = reuse_encoder + ? EncodeMapper<Arc>::Read(coder_fname, ENCODE) + : new EncodeMapper<Arc>(flags, ENCODE); + + Encode(ofst, encoder); + if (!args->arg3) + encoder->Write(coder_fname); + + delete encoder; +} + +void Encode(MutableFstClass *fst, uint32 flags, bool reuse_encoder, + const string &coder_fname); + +} // namespace script +} // namespace fst + + + +#endif // FST_SCRIPT_ENCODE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/epsnormalize.h b/kaldi_io/src/tools/openfst/include/fst/script/epsnormalize.h new file mode 100644 index 0000000..50b12da --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/epsnormalize.h @@ -0,0 +1,44 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_EPSNORMALIZE_H_ +#define FST_SCRIPT_EPSNORMALIZE_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/epsnormalize.h> + +namespace fst { +namespace script { + +typedef args::Package<const FstClass&, MutableFstClass*, + EpsNormalizeType> EpsNormalizeArgs; + +template<class Arc> +void EpsNormalize(EpsNormalizeArgs *args) { + const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); + + EpsNormalize(ifst, ofst, args->arg3); +} + +void EpsNormalize(const FstClass &ifst, MutableFstClass *ofst, + EpsNormalizeType norm_type = EPS_NORM_INPUT); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_EPSNORMALIZE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/equal.h b/kaldi_io/src/tools/openfst/include/fst/script/equal.h new file mode 100644 index 0000000..9fb2d3c --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/equal.h @@ -0,0 +1,45 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_EQUAL_H_ +#define FST_SCRIPT_EQUAL_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/equal.h> + +namespace fst { +namespace script { + +typedef args::Package<const FstClass&, const FstClass&, float> EqualInnerArgs; +typedef args::WithReturnValue<bool, EqualInnerArgs> EqualArgs; + +template<class Arc> +void Equal(EqualArgs *args) { + const Fst<Arc> &fst1 = *(args->args.arg1.GetFst<Arc>()); + const Fst<Arc> &fst2 = *(args->args.arg2.GetFst<Arc>()); + + args->retval = Equal(fst1, fst2, args->args.arg3); +} + +bool Equal(const FstClass &fst1, const FstClass &fst2, + float delta = kDelta); + +} // namespace script +} // namespace fst + + +#endif // FST_SCRIPT_EQUAL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/equivalent.h b/kaldi_io/src/tools/openfst/include/fst/script/equivalent.h new file mode 100644 index 0000000..43460c6 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/equivalent.h @@ -0,0 +1,47 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_EQUIVALENT_H_ +#define FST_SCRIPT_EQUIVALENT_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/equivalent.h> + +namespace fst { +namespace script { + +typedef args::Package<const FstClass &, const FstClass &, + float> EquivalentInnerArgs; +typedef args::WithReturnValue<bool, EquivalentInnerArgs> EquivalentArgs; + +template<class Arc> +void Equivalent(EquivalentArgs *args) { + const Fst<Arc> &fst1 = *(args->args.arg1.GetFst<Arc>()); + const Fst<Arc> &fst2 = *(args->args.arg2.GetFst<Arc>()); + + args->retval = Equivalent(fst1, fst2, args->args.arg3); +} + +bool Equivalent(const FstClass &fst1, const FstClass &fst2, + float delta = kDelta); + +} // namespace script +} // namespace fst + + + +#endif // FST_SCRIPT_EQUIVALENT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/fst-class.h b/kaldi_io/src/tools/openfst/include/fst/script/fst-class.h new file mode 100644 index 0000000..fe2cf53 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/fst-class.h @@ -0,0 +1,382 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_FST_CLASS_H_ +#define FST_SCRIPT_FST_CLASS_H_ + +#include <string> + +#include <fst/fst.h> +#include <fst/mutable-fst.h> +#include <fst/vector-fst.h> +#include <iostream> +#include <fstream> +#include <sstream> + +// Classes to support "boxing" all existing types of FST arcs in a single +// FstClass which hides the arc types. This allows clients to load +// and work with FSTs without knowing the arc type. + +// These classes are only recommended for use in high-level scripting +// applications. Most users should use the lower-level templated versions +// corresponding to these classes. + +namespace fst { +namespace script { + +// +// Abstract base class defining the set of functionalities implemented +// in all impls, and passed through by all bases Below FstClassBase +// the class hierarchy bifurcates; FstClassImplBase serves as the base +// class for all implementations (of which FstClassImpl is currently +// the only one) and FstClass serves as the base class for all +// interfaces. +// +class FstClassBase { + public: + virtual const string &ArcType() const = 0; + virtual const string &FstType() const = 0; + virtual const string &WeightType() const = 0; + virtual const SymbolTable *InputSymbols() const = 0; + virtual const SymbolTable *OutputSymbols() const = 0; + virtual bool Write(const string& fname) const = 0; + virtual bool Write(ostream &ostr, const FstWriteOptions &opts) const = 0; + virtual uint64 Properties(uint64 mask, bool test) const = 0; + virtual ~FstClassBase() { } +}; + +class FstClassImplBase : public FstClassBase { + public: + virtual FstClassImplBase *Copy() = 0; + virtual void SetInputSymbols(SymbolTable *is) = 0; + virtual void SetOutputSymbols(SymbolTable *is) = 0; + virtual ~FstClassImplBase() { } +}; + + +// +// CONTAINER CLASS +// Wraps an Fst<Arc>, hiding its arc type. Whether this Fst<Arc> +// pointer refers to a special kind of FST (e.g. a MutableFst) is +// known by the type of interface class that owns the pointer to this +// container. +// + +template<class Arc> +class FstClassImpl : public FstClassImplBase { + public: + explicit FstClassImpl(Fst<Arc> *impl, + bool should_own = false) : + impl_(should_own ? impl : impl->Copy()) { } + + explicit FstClassImpl(const Fst<Arc> &impl) : impl_(impl.Copy()) { } + + virtual const string &ArcType() const { + return Arc::Type(); + } + + virtual const string &FstType() const { + return impl_->Type(); + } + + virtual const string &WeightType() const { + return Arc::Weight::Type(); + } + + virtual const SymbolTable *InputSymbols() const { + return impl_->InputSymbols(); + } + + virtual const SymbolTable *OutputSymbols() const { + return impl_->OutputSymbols(); + } + + // Warning: calling this method casts the FST to a mutable FST. + virtual void SetInputSymbols(SymbolTable *is) { + static_cast<MutableFst<Arc> *>(impl_)->SetInputSymbols(is); + } + + // Warning: calling this method casts the FST to a mutable FST. + virtual void SetOutputSymbols(SymbolTable *os) { + static_cast<MutableFst<Arc> *>(impl_)->SetOutputSymbols(os); + } + + virtual bool Write(const string &fname) const { + return impl_->Write(fname); + } + + virtual bool Write(ostream &ostr, const FstWriteOptions &opts) const { + return impl_->Write(ostr, opts); + } + + virtual uint64 Properties(uint64 mask, bool test) const { + return impl_->Properties(mask, test); + } + + virtual ~FstClassImpl() { delete impl_; } + + Fst<Arc> *GetImpl() const { return impl_; } + + Fst<Arc> *GetImpl() { return impl_; } + + virtual FstClassImpl *Copy() { + return new FstClassImpl<Arc>(impl_); + } + + private: + Fst<Arc> *impl_; +}; + +// +// BASE CLASS DEFINITIONS +// + +class MutableFstClass; + +class FstClass : public FstClassBase { + public: + template<class Arc> + static FstClass *Read(istream &stream, + const FstReadOptions &opts) { + if (!opts.header) { + FSTERROR() << "FstClass::Read: options header not specified"; + return 0; + } + const FstHeader &hdr = *opts.header; + + if (hdr.Properties() & kMutable) { + return ReadTypedFst<MutableFstClass, MutableFst<Arc> >(stream, opts); + } else { + return ReadTypedFst<FstClass, Fst<Arc> >(stream, opts); + } + } + + FstClass() : impl_(NULL) { + } + + template<class Arc> + explicit FstClass(const Fst<Arc> &fst) : impl_(new FstClassImpl<Arc>(fst)) { + } + + FstClass(const FstClass &other) : impl_(other.impl_->Copy()) { } + + FstClass &operator=(const FstClass &other) { + delete impl_; + impl_ = other.impl_->Copy(); + return *this; + } + + static FstClass *Read(const string &fname); + + static FstClass *Read(istream &istr, const string &source); + + virtual const string &ArcType() const { + return impl_->ArcType(); + } + + virtual const string& FstType() const { + return impl_->FstType(); + } + + virtual const SymbolTable *InputSymbols() const { + return impl_->InputSymbols(); + } + + virtual const SymbolTable *OutputSymbols() const { + return impl_->OutputSymbols(); + } + + virtual const string& WeightType() const { + return impl_->WeightType(); + } + + virtual bool Write(const string &fname) const { + return impl_->Write(fname); + } + + virtual bool Write(ostream &ostr, const FstWriteOptions &opts) const { + return impl_->Write(ostr, opts); + } + + virtual uint64 Properties(uint64 mask, bool test) const { + return impl_->Properties(mask, test); + } + + template<class Arc> + const Fst<Arc> *GetFst() const { + if (Arc::Type() != ArcType()) { + return NULL; + } else { + FstClassImpl<Arc> *typed_impl = static_cast<FstClassImpl<Arc> *>(impl_); + return typed_impl->GetImpl(); + } + } + + virtual ~FstClass() { delete impl_; } + + // These methods are required by IO registration + template<class Arc> + static FstClassImplBase *Convert(const FstClass &other) { + LOG(ERROR) << "Doesn't make sense to convert any class to type FstClass."; + return 0; + } + + template<class Arc> + static FstClassImplBase *Create() { + LOG(ERROR) << "Doesn't make sense to create an FstClass with a " + << "particular arc type."; + return 0; + } + + + protected: + explicit FstClass(FstClassImplBase *impl) : impl_(impl) { } + + // Generic template method for reading an arc-templated FST of type + // UnderlyingT, and returning it wrapped as FstClassT, with appropriate + // error checking. Called from arc-templated Read() static methods. + template<class FstClassT, class UnderlyingT> + static FstClassT* ReadTypedFst(istream &stream, + const FstReadOptions &opts) { + UnderlyingT *u = UnderlyingT::Read(stream, opts); + if (!u) { + return 0; + } else { + FstClassT *r = new FstClassT(*u); + delete u; + return r; + } + } + + FstClassImplBase *GetImpl() const { return impl_; } + + FstClassImplBase *GetImpl() { return impl_; } + +// friend ostream &operator<<(ostream&, const FstClass&); + + private: + FstClassImplBase *impl_; +}; + +// +// Specific types of FstClass with special properties +// + +class MutableFstClass : public FstClass { + public: + template<class Arc> + explicit MutableFstClass(const MutableFst<Arc> &fst) : + FstClass(fst) { } + + template<class Arc> + MutableFst<Arc> *GetMutableFst() { + Fst<Arc> *fst = const_cast<Fst<Arc> *>(this->GetFst<Arc>()); + MutableFst<Arc> *mfst = static_cast<MutableFst<Arc> *>(fst); + + return mfst; + } + + template<class Arc> + static MutableFstClass *Read(istream &stream, + const FstReadOptions &opts) { + MutableFst<Arc> *mfst = MutableFst<Arc>::Read(stream, opts); + if (!mfst) { + return 0; + } else { + MutableFstClass *retval = new MutableFstClass(*mfst); + delete mfst; + return retval; + } + } + + virtual bool Write(const string &fname) const { + return GetImpl()->Write(fname); + } + + virtual bool Write(ostream &ostr, const FstWriteOptions &opts) const { + return GetImpl()->Write(ostr, opts); + } + + static MutableFstClass *Read(const string &fname, bool convert = false); + + virtual void SetInputSymbols(SymbolTable *is) { + GetImpl()->SetInputSymbols(is); + } + + virtual void SetOutputSymbols(SymbolTable *os) { + GetImpl()->SetOutputSymbols(os); + } + + // These methods are required by IO registration + template<class Arc> + static FstClassImplBase *Convert(const FstClass &other) { + LOG(ERROR) << "Doesn't make sense to convert any class to type " + << "MutableFstClass."; + return 0; + } + + template<class Arc> + static FstClassImplBase *Create() { + LOG(ERROR) << "Doesn't make sense to create a MutableFstClass with a " + << "particular arc type."; + return 0; + } + + protected: + explicit MutableFstClass(FstClassImplBase *impl) : FstClass(impl) { } +}; + + +class VectorFstClass : public MutableFstClass { + public: + explicit VectorFstClass(const FstClass &other); + explicit VectorFstClass(const string &arc_type); + + template<class Arc> + explicit VectorFstClass(const VectorFst<Arc> &fst) : + MutableFstClass(fst) { } + + template<class Arc> + static VectorFstClass *Read(istream &stream, + const FstReadOptions &opts) { + VectorFst<Arc> *vfst = VectorFst<Arc>::Read(stream, opts); + if (!vfst) { + return 0; + } else { + VectorFstClass *retval = new VectorFstClass(*vfst); + delete vfst; + return retval; + } + } + + static VectorFstClass *Read(const string &fname); + + // Converter / creator for known arc types + template<class Arc> + static FstClassImplBase *Convert(const FstClass &other) { + return new FstClassImpl<Arc>(new VectorFst<Arc>( + *other.GetFst<Arc>()), true); + } + + template<class Arc> + static FstClassImplBase *Create() { + return new FstClassImpl<Arc>(new VectorFst<Arc>(), true); + } +}; + +} // namespace script +} // namespace fst +#endif // FST_SCRIPT_FST_CLASS_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/fstscript-decl.h b/kaldi_io/src/tools/openfst/include/fst/script/fstscript-decl.h new file mode 100644 index 0000000..fee813e --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/fstscript-decl.h @@ -0,0 +1,35 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +// Forward declarations for the FST and FST-script classes. + +#ifndef FST_SCRIPT_FSTSCRIPT_DECL_H_ +#define FST_SCRIPT_FSTSCRIPT_DECL_H_ + +#include <fst/fst-decl.h> + +namespace fst { +namespace script { + +class FstClass; +class MutableFstClass; +class VectorFstClass; +class WeightClass; + +} // namespace script +} // namespace fst; + +#endif // FST_SCRIPT_FSTSCRIPT_DECL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/fstscript.h b/kaldi_io/src/tools/openfst/include/fst/script/fstscript.h new file mode 100644 index 0000000..90e1e75 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/fstscript.h @@ -0,0 +1,154 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +// Convenience file that includes all FstScript functionality + +#ifndef FST_SCRIPT_FSTSCRIPT_H_ +#define FST_SCRIPT_FSTSCRIPT_H_ + +// Major classes +#include <fst/script/fst-class.h> +#include <fst/script/weight-class.h> +#include <fst/script/text-io.h> + +// Templates like Operation< >, Apply< > +#include <fst/script/script-impl.h> + +// Operations +#include <fst/script/arcsort.h> +#include <fst/script/closure.h> +#include <fst/script/compile.h> +#include <fst/script/compose.h> +#include <fst/script/concat.h> +#include <fst/script/connect.h> +#include <fst/script/convert.h> +#include <fst/script/decode.h> +#include <fst/script/determinize.h> +#include <fst/script/difference.h> +#include <fst/script/draw.h> +#include <fst/script/encode.h> +#include <fst/script/epsnormalize.h> +#include <fst/script/equal.h> +#include <fst/script/equivalent.h> +#include <fst/script/info.h> +#include <fst/script/intersect.h> +#include <fst/script/invert.h> +#include <fst/script/map.h> +#include <fst/script/minimize.h> +#include <fst/script/print.h> +#include <fst/script/project.h> +#include <fst/script/prune.h> +#include <fst/script/push.h> +#include <fst/script/randequivalent.h> +#include <fst/script/randgen.h> +#include <fst/script/relabel.h> +#include <fst/script/replace.h> +#include <fst/script/reverse.h> +#include <fst/script/reweight.h> +#include <fst/script/rmepsilon.h> +#include <fst/script/shortest-distance.h> +#include <fst/script/shortest-path.h> +#include <fst/script/symbols.h> +#include <fst/script/synchronize.h> +#include <fst/script/topsort.h> +#include <fst/script/union.h> +#include <fst/script/verify.h> + +// +// REGISTER OPERATIONS +// + + +// This class is necessary because registering each of the operations +// separately overfills the stack, as there's so many of them. +namespace fst { +namespace script { +template<class Arc> +class AllFstOperationsRegisterer { + public: + AllFstOperationsRegisterer() { + RegisterBatch1(); + RegisterBatch2(); + } + + private: + void RegisterBatch1() { + REGISTER_FST_OPERATION(ArcSort, Arc, ArcSortArgs); + REGISTER_FST_OPERATION(Closure, Arc, ClosureArgs); + REGISTER_FST_OPERATION(CompileFst, Arc, FstCompileArgs); + REGISTER_FST_OPERATION(Compose, Arc, ComposeArgs1); + REGISTER_FST_OPERATION(Compose, Arc, ComposeArgs2); + REGISTER_FST_OPERATION(Concat, Arc, ConcatArgs1); + REGISTER_FST_OPERATION(Concat, Arc, ConcatArgs2); + REGISTER_FST_OPERATION(Connect, Arc, MutableFstClass); + REGISTER_FST_OPERATION(Convert, Arc, ConvertArgs); + REGISTER_FST_OPERATION(Decode, Arc, DecodeArgs); + REGISTER_FST_OPERATION(Determinize, Arc, DeterminizeArgs); + REGISTER_FST_OPERATION(Difference, Arc, DifferenceArgs1); + REGISTER_FST_OPERATION(Difference, Arc, DifferenceArgs2); + REGISTER_FST_OPERATION(DrawFst, Arc, FstDrawerArgs); + REGISTER_FST_OPERATION(Encode, Arc, EncodeArgs); + REGISTER_FST_OPERATION(EpsNormalize, Arc, EpsNormalizeArgs); + REGISTER_FST_OPERATION(Equal, Arc, EqualArgs); + REGISTER_FST_OPERATION(Equivalent, Arc, EquivalentArgs); + REGISTER_FST_OPERATION(PrintFstInfo, Arc, InfoArgs); + REGISTER_FST_OPERATION(Intersect, Arc, IntersectArgs1); + REGISTER_FST_OPERATION(Intersect, Arc, IntersectArgs2); + REGISTER_FST_OPERATION(Invert, Arc, MutableFstClass); + REGISTER_FST_OPERATION(Map, Arc, MapArgs); + REGISTER_FST_OPERATION(Minimize, Arc, MinimizeArgs); + } + + void RegisterBatch2() { + REGISTER_FST_OPERATION(PrintFst, Arc, FstPrinterArgs); + REGISTER_FST_OPERATION(Project, Arc, ProjectArgs); + REGISTER_FST_OPERATION(Prune, Arc, PruneArgs1); + REGISTER_FST_OPERATION(Prune, Arc, PruneArgs2); + REGISTER_FST_OPERATION(Prune, Arc, PruneArgs3); + REGISTER_FST_OPERATION(Prune, Arc, PruneArgs4); + REGISTER_FST_OPERATION(Push, Arc, PushArgs1); + REGISTER_FST_OPERATION(Push, Arc, PushArgs2); + REGISTER_FST_OPERATION(RandEquivalent, Arc, RandEquivalentArgs1); + REGISTER_FST_OPERATION(RandEquivalent, Arc, RandEquivalentArgs2); + REGISTER_FST_OPERATION(RandGen, Arc, RandGenArgs); + REGISTER_FST_OPERATION(Relabel, Arc, RelabelArgs1); + REGISTER_FST_OPERATION(Relabel, Arc, RelabelArgs2); + REGISTER_FST_OPERATION(Relabel, Arc, RelabelArgs3); + REGISTER_FST_OPERATION(Replace, Arc, ReplaceArgs); + REGISTER_FST_OPERATION(Reverse, Arc, ReverseArgs); + REGISTER_FST_OPERATION(Reweight, Arc, ReweightArgs); + REGISTER_FST_OPERATION(RmEpsilon, Arc, RmEpsilonArgs1); + REGISTER_FST_OPERATION(RmEpsilon, Arc, RmEpsilonArgs2); + REGISTER_FST_OPERATION(RmEpsilon, Arc, RmEpsilonArgs3); + REGISTER_FST_OPERATION(ShortestDistance, Arc, ShortestDistanceArgs1); + REGISTER_FST_OPERATION(ShortestDistance, Arc, ShortestDistanceArgs2); + REGISTER_FST_OPERATION(ShortestDistance, Arc, ShortestDistanceArgs3); + REGISTER_FST_OPERATION(ShortestPath, Arc, ShortestPathArgs1); + REGISTER_FST_OPERATION(ShortestPath, Arc, ShortestPathArgs2); + REGISTER_FST_OPERATION(Synchronize, Arc, SynchronizeArgs); + REGISTER_FST_OPERATION(TopSort, Arc, TopSortArgs); + REGISTER_FST_OPERATION(Union, Arc, UnionArgs); + REGISTER_FST_OPERATION(Verify, Arc, VerifyArgs); + } +}; +} // namespace script +} // namespace fst + + +#define REGISTER_FST_OPERATIONS(Arc) \ + AllFstOperationsRegisterer<Arc> register_all_fst_operations ## Arc; + +#endif // FST_SCRIPT_FSTSCRIPT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/info-impl.h b/kaldi_io/src/tools/openfst/include/fst/script/info-impl.h new file mode 100644 index 0000000..408fbcd --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/info-impl.h @@ -0,0 +1,325 @@ +// info.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Class to compute various information about FSTs, helper class for fstinfo.cc + +#ifndef FST_SCRIPT_INFO_IMPL_H_ +#define FST_SCRIPT_INFO_IMPL_H_ + +#include <string> +#include <vector> +using std::vector; + +#include <fst/connect.h> +#include <fst/dfs-visit.h> +#include <fst/fst.h> +#include <fst/lookahead-matcher.h> +#include <fst/matcher.h> +#include <fst/queue.h> +#include <fst/test-properties.h> +#include <fst/verify.h> +#include <fst/visit.h> + +namespace fst { + +// Compute various information about FSTs, helper class for fstinfo.cc. +// WARNING: Stand-alone use of this class is not recommended, most code +// should call directly the relevant library functions: Fst<A>::NumStates, +// Fst<A>::NumArcs, TestProperties, ... +template <class A> class FstInfo { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + + // When info_type is "short" (or "auto" and not an ExpandedFst) + // then only minimal info is computed and can be requested. + FstInfo(const Fst<A> &fst, bool test_properties, + const string &arc_filter_type = "any", + string info_type = "auto", bool verify = true) + : fst_type_(fst.Type()), + input_symbols_(fst.InputSymbols() ? + fst.InputSymbols()->Name() : "none"), + output_symbols_(fst.OutputSymbols() ? + fst.OutputSymbols()->Name() : "none"), + nstates_(0), narcs_(0), start_(kNoStateId), nfinal_(0), + nepsilons_(0), niepsilons_(0), noepsilons_(0), + naccess_(0), ncoaccess_(0), nconnect_(0), ncc_(0), nscc_(0), + input_match_type_(MATCH_NONE), output_match_type_(MATCH_NONE), + input_lookahead_(false), output_lookahead_(false), + properties_(0), arc_filter_type_(arc_filter_type), long_info_(true) { + if (info_type == "long") { + long_info_ = true; + } else if (info_type == "short") { + long_info_ = false; + } else if (info_type == "auto") { + long_info_ = fst.Properties(kExpanded, false); + } else { + FSTERROR() << "Bad info type: " << info_type; + return; + } + + if (!long_info_) + return; + + // If the FST is not sane, we return. + if (verify && !Verify(fst)) { + FSTERROR() << "FstInfo: Verify: FST not well-formed."; + return; + } + + start_ = fst.Start(); + properties_ = fst.Properties(kFstProperties, test_properties); + + for (StateIterator< Fst<A> > siter(fst); + !siter.Done(); + siter.Next()) { + ++nstates_; + StateId s = siter.Value(); + if (fst.Final(s) != Weight::Zero()) + ++nfinal_; + for (ArcIterator< Fst<A> > aiter(fst, s); + !aiter.Done(); + aiter.Next()) { + const A &arc = aiter.Value(); + ++narcs_; + if (arc.ilabel == 0 && arc.olabel == 0) + ++nepsilons_; + if (arc.ilabel == 0) + ++niepsilons_; + if (arc.olabel == 0) + ++noepsilons_; + } + } + + { + vector<StateId> cc; + CcVisitor<Arc> cc_visitor(&cc); + FifoQueue<StateId> fifo_queue; + if (arc_filter_type == "any") { + Visit(fst, &cc_visitor, &fifo_queue); + } else if (arc_filter_type == "epsilon") { + Visit(fst, &cc_visitor, &fifo_queue, EpsilonArcFilter<Arc>()); + } else if (arc_filter_type == "iepsilon") { + Visit(fst, &cc_visitor, &fifo_queue, InputEpsilonArcFilter<Arc>()); + } else if (arc_filter_type == "oepsilon") { + Visit(fst, &cc_visitor, &fifo_queue, OutputEpsilonArcFilter<Arc>()); + } else { + FSTERROR() << "Bad arc filter type: " << arc_filter_type; + return; + } + + for (StateId s = 0; s < cc.size(); ++s) { + if (cc[s] >= ncc_) + ncc_ = cc[s] + 1; + } + } + + { + vector<StateId> scc; + vector<bool> access, coaccess; + uint64 props = 0; + SccVisitor<Arc> scc_visitor(&scc, &access, &coaccess, &props); + if (arc_filter_type == "any") { + DfsVisit(fst, &scc_visitor); + } else if (arc_filter_type == "epsilon") { + DfsVisit(fst, &scc_visitor, EpsilonArcFilter<Arc>()); + } else if (arc_filter_type == "iepsilon") { + DfsVisit(fst, &scc_visitor, InputEpsilonArcFilter<Arc>()); + } else if (arc_filter_type == "oepsilon") { + DfsVisit(fst, &scc_visitor, OutputEpsilonArcFilter<Arc>()); + } else { + FSTERROR() << "Bad arc filter type: " << arc_filter_type; + return; + } + + for (StateId s = 0; s < scc.size(); ++s) { + if (access[s]) + ++naccess_; + if (coaccess[s]) + ++ncoaccess_; + if (access[s] && coaccess[s]) + ++nconnect_; + if (scc[s] >= nscc_) + nscc_ = scc[s] + 1; + } + } + + LookAheadMatcher< Fst<A> > imatcher(fst, MATCH_INPUT); + input_match_type_ = imatcher.Type(test_properties); + input_lookahead_ = imatcher.Flags() & kInputLookAheadMatcher; + + LookAheadMatcher< Fst<A> > omatcher(fst, MATCH_OUTPUT); + output_match_type_ = omatcher.Type(test_properties); + output_lookahead_ = omatcher.Flags() & kOutputLookAheadMatcher; + } + + // Short info + const string& FstType() const { return fst_type_; } + const string& ArcType() const { return A::Type(); } + const string& InputSymbols() const { return input_symbols_; } + const string& OutputSymbols() const { return output_symbols_; } + const bool LongInfo() const { return long_info_; } + const string& ArcFilterType() const { return arc_filter_type_; } + + // Long info + MatchType InputMatchType() const { CheckLong(); return input_match_type_; } + MatchType OutputMatchType() const { CheckLong(); return output_match_type_; } + bool InputLookAhead() const { CheckLong(); return input_lookahead_; } + bool OutputLookAhead() const { CheckLong(); return output_lookahead_; } + int64 NumStates() const { CheckLong(); return nstates_; } + int64 NumArcs() const { CheckLong(); return narcs_; } + int64 Start() const { CheckLong(); return start_; } + int64 NumFinal() const { CheckLong(); return nfinal_; } + int64 NumEpsilons() const { CheckLong(); return nepsilons_; } + int64 NumInputEpsilons() const { CheckLong(); return niepsilons_; } + int64 NumOutputEpsilons() const { CheckLong(); return noepsilons_; } + int64 NumAccessible() const { CheckLong(); return naccess_; } + int64 NumCoAccessible() const { CheckLong(); return ncoaccess_; } + int64 NumConnected() const { CheckLong(); return nconnect_; } + int64 NumCc() const { CheckLong(); return ncc_; } + int64 NumScc() const { CheckLong(); return nscc_; } + uint64 Properties() const { CheckLong(); return properties_; } + + private: + void CheckLong() const { + if (!long_info_) + FSTERROR() << "FstInfo: method only available with long info version"; + } + + string fst_type_; + string input_symbols_; + string output_symbols_; + int64 nstates_; + int64 narcs_; + int64 start_; + int64 nfinal_; + int64 nepsilons_; + int64 niepsilons_; + int64 noepsilons_; + int64 naccess_; + int64 ncoaccess_; + int64 nconnect_; + int64 ncc_; + int64 nscc_; + MatchType input_match_type_; + MatchType output_match_type_; + bool input_lookahead_; + bool output_lookahead_; + uint64 properties_; + string arc_filter_type_; + bool long_info_; + DISALLOW_COPY_AND_ASSIGN(FstInfo); +}; + +template <class A> +void PrintFstInfo(const FstInfo<A> &fstinfo, bool pipe = false) { + ostream &os = pipe ? cerr : cout; + + ios_base::fmtflags old = os.setf(ios::left); + os.width(50); + os << "fst type" << fstinfo.FstType() << endl; + os.width(50); + os << "arc type" << fstinfo.ArcType() << endl; + os.width(50); + os << "input symbol table" << fstinfo.InputSymbols() << endl; + os.width(50); + os << "output symbol table" << fstinfo.OutputSymbols() << endl; + + if (!fstinfo.LongInfo()) { + os.setf(old); + return; + } + + os.width(50); + os << "# of states" << fstinfo.NumStates() << endl; + os.width(50); + os << "# of arcs" << fstinfo.NumArcs() << endl; + os.width(50); + os << "initial state" << fstinfo.Start() << endl; + os.width(50); + os << "# of final states" << fstinfo.NumFinal() << endl; + os.width(50); + os << "# of input/output epsilons" << fstinfo.NumEpsilons() << endl; + os.width(50); + os << "# of input epsilons" << fstinfo.NumInputEpsilons() << endl; + os.width(50); + os << "# of output epsilons" << fstinfo.NumOutputEpsilons() << endl; + os.width(50); + + string arc_type = ""; + if (fstinfo.ArcFilterType() == "epsilon") + arc_type = "epsilon "; + else if (fstinfo.ArcFilterType() == "iepsilon") + arc_type = "input-epsilon "; + else if (fstinfo.ArcFilterType() == "oepsilon") + arc_type = "output-epsilon "; + + string accessible_label = "# of " + arc_type + "accessible states"; + os.width(50); + os << accessible_label << fstinfo.NumAccessible() << endl; + string coaccessible_label = "# of " + arc_type + "coaccessible states"; + os.width(50); + os << coaccessible_label << fstinfo.NumCoAccessible() << endl; + string connected_label = "# of " + arc_type + "connected states"; + os.width(50); + os << connected_label << fstinfo.NumConnected() << endl; + string numcc_label = "# of " + arc_type + "connected components"; + os.width(50); + os << numcc_label << fstinfo.NumCc() << endl; + string numscc_label = "# of " + arc_type + "strongly conn components"; + os.width(50); + os << numscc_label << fstinfo.NumScc() << endl; + + os.width(50); + os << "input matcher" + << (fstinfo.InputMatchType() == MATCH_INPUT ? 'y' : + fstinfo.InputMatchType() == MATCH_NONE ? 'n' : '?') << endl; + os.width(50); + os << "output matcher" + << (fstinfo.OutputMatchType() == MATCH_OUTPUT ? 'y' : + fstinfo.OutputMatchType() == MATCH_NONE ? 'n' : '?') << endl; + os.width(50); + os << "input lookahead" + << (fstinfo.InputLookAhead() ? 'y' : 'n') << endl; + os.width(50); + os << "output lookahead" + << (fstinfo.OutputLookAhead() ? 'y' : 'n') << endl; + + uint64 prop = 1; + for (int i = 0; i < 64; ++i, prop <<= 1) { + if (prop & kBinaryProperties) { + char value = 'n'; + if (fstinfo.Properties() & prop) value = 'y'; + os.width(50); + os << PropertyNames[i] << value << endl; + } else if (prop & kPosTrinaryProperties) { + char value = '?'; + if (fstinfo.Properties() & prop) value = 'y'; + else if (fstinfo.Properties() & prop << 1) value = 'n'; + os.width(50); + os << PropertyNames[i] << value << endl; + } + } + os.setf(old); +} + +} // namespace fst + +#endif // FST_SCRIPT_INFO_IMPL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/info.h b/kaldi_io/src/tools/openfst/include/fst/script/info.h new file mode 100644 index 0000000..f434bd5 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/info.h @@ -0,0 +1,48 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_INFO_H_ +#define FST_SCRIPT_INFO_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/script/info-impl.h> + +namespace fst { +namespace script { + +typedef args::Package<const FstClass&, bool, const string&, + const string&, bool, bool> InfoArgs; + +template<class Arc> +void PrintFstInfo(InfoArgs *args) { + const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>()); + FstInfo<Arc> fstinfo(fst, args->arg2, args->arg3, + args->arg4, args->arg5); + PrintFstInfo(fstinfo, args->arg6); + + if (args->arg6) + fst.Write(""); +} + +void PrintFstInfo(const FstClass &f, bool test_properties, + const string &arc_filter, const string &info_type, + bool pipe, bool verify); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_INFO_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/intersect.h b/kaldi_io/src/tools/openfst/include/fst/script/intersect.h new file mode 100644 index 0000000..8011024 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/intersect.h @@ -0,0 +1,65 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_INTERSECT_H_ +#define FST_SCRIPT_INTERSECT_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/intersect.h> +#include <fst/script/compose.h> // for ComposeOptions, ComposeFilter + +namespace fst { +namespace script { + +typedef args::Package<const FstClass&, const FstClass&, + MutableFstClass*, ComposeFilter> IntersectArgs1; + +template<class Arc> +void Intersect(IntersectArgs1 *args) { + const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>()); + const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>(); + + Intersect(ifst1, ifst2, ofst, args->arg4); +} + +typedef args::Package<const FstClass&, const FstClass&, + MutableFstClass*, const ComposeOptions &> IntersectArgs2; + +template<class Arc> +void Intersect(IntersectArgs2 *args) { + const Fst<Arc> &ifst1 = *(args->arg1.GetFst<Arc>()); + const Fst<Arc> &ifst2 = *(args->arg2.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg3->GetMutableFst<Arc>(); + + Intersect(ifst1, ifst2, ofst, args->arg4); +} + +void Intersect(const FstClass &ifst1, const FstClass &ifst2, + MutableFstClass *ofst, + ComposeFilter compose_filter); + +void Intersect(const FstClass &ifst, const FstClass &ifst2, + MutableFstClass *ofst, + const ComposeOptions &opts = fst::script::ComposeOptions()); + +} // namespace script +} // namespace fst + + + +#endif // FST_SCRIPT_INTERSECT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/invert.h b/kaldi_io/src/tools/openfst/include/fst/script/invert.h new file mode 100644 index 0000000..1befd9f --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/invert.h @@ -0,0 +1,43 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_INVERT_H_ +#define FST_SCRIPT_INVERT_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/invert.h> + +namespace fst { +namespace script { + +// The following confuses swig, because it has the same arguments +// as the non-templated version +#ifndef SWIG +template<class Arc> +void Invert(MutableFstClass *fst) { + MutableFst<Arc> *typed_fst = fst->GetMutableFst<Arc>(); + + Invert(typed_fst); +} +#endif + +void Invert(MutableFstClass *fst); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_INVERT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/map.h b/kaldi_io/src/tools/openfst/include/fst/script/map.h new file mode 100644 index 0000000..3caaa9f --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/map.h @@ -0,0 +1,123 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_MAP_H_ +#define FST_SCRIPT_MAP_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/script/weight-class.h> +#include <fst/arc-map.h> +#include <fst/state-map.h> + +namespace fst { +namespace script { + +template <class M> +Fst<typename M::ToArc> *ArcMap(const Fst<typename M::FromArc> &fst, + const M &mapper) { + typedef typename M::ToArc ToArc; + VectorFst<ToArc> *ofst = new VectorFst<ToArc>; + ArcMap(fst, ofst, mapper); + return ofst; +} + +template <class M> +Fst<typename M::ToArc> *StateMap(const Fst<typename M::FromArc> &fst, + const M &mapper) { + typedef typename M::ToArc ToArc; + VectorFst<ToArc> *ofst = new VectorFst<ToArc>; + StateMap(fst, ofst, mapper); + return ofst; +} + +enum MapType { ARC_SUM_MAPPER, IDENTITY_MAPPER, INVERT_MAPPER, PLUS_MAPPER, + QUANTIZE_MAPPER, RMWEIGHT_MAPPER, SUPERFINAL_MAPPER, + TIMES_MAPPER, TO_LOG_MAPPER, TO_LOG64_MAPPER, TO_STD_MAPPER }; + +typedef args::Package<const FstClass&, MapType, float, + const WeightClass &> MapInnerArgs; +typedef args::WithReturnValue<FstClass*, MapInnerArgs> MapArgs; + +template <class Arc> +void Map(MapArgs *args) { + const Fst<Arc> &ifst = *(args->args.arg1.GetFst<Arc>()); + MapType map_type = args->args.arg2; + float delta = args->args.arg3; + typename Arc::Weight w = *(args->args.arg4.GetWeight<typename Arc::Weight>()); + + Fst<Arc> *fst = NULL; + Fst<LogArc> *lfst = NULL; + Fst<Log64Arc> *l64fst = NULL; + Fst<StdArc> *sfst = NULL; + if (map_type == ARC_SUM_MAPPER) { + args->retval = new FstClass(*(fst = + script::StateMap(ifst, ArcSumMapper<Arc>(ifst)))); + } else if (map_type == IDENTITY_MAPPER) { + args->retval = new FstClass(*(fst = + script::ArcMap(ifst, IdentityArcMapper<Arc>()))); + } else if (map_type == INVERT_MAPPER) { + args->retval = new FstClass(*(fst = + script::ArcMap(ifst, InvertWeightMapper<Arc>()))); + } else if (map_type == PLUS_MAPPER) { + args->retval = new FstClass(*(fst = + script::ArcMap(ifst, PlusMapper<Arc>(w)))); + } else if (map_type == QUANTIZE_MAPPER) { + args->retval = new FstClass(*(fst = + script::ArcMap(ifst, QuantizeMapper<Arc>(delta)))); + } else if (map_type == RMWEIGHT_MAPPER) { + args->retval = new FstClass(*(fst = + script::ArcMap(ifst, RmWeightMapper<Arc>()))); + } else if (map_type == SUPERFINAL_MAPPER) { + args->retval = new FstClass(*(fst = + script::ArcMap(ifst, SuperFinalMapper<Arc>()))); + } else if (map_type == TIMES_MAPPER) { + args->retval = new FstClass(*(fst = + script::ArcMap(ifst, TimesMapper<Arc>(w)))); + } else if (map_type == TO_LOG_MAPPER) { + args->retval = new FstClass(*(lfst = + script::ArcMap(ifst, WeightConvertMapper<Arc, LogArc>()))); + } else if (map_type == TO_LOG64_MAPPER) { + args->retval = new FstClass(*(l64fst = + script::ArcMap(ifst, WeightConvertMapper<Arc, Log64Arc>()))); + } else if (map_type == TO_STD_MAPPER) { + args->retval = new FstClass(*(sfst = + script::ArcMap(ifst, WeightConvertMapper<Arc, StdArc>()))); + } else { + FSTERROR() << "Error: unknown/unsupported mapper type: " + << map_type; + VectorFst<Arc> *ofst = new VectorFst<Arc>; + ofst->SetProperties(kError, kError); + args->retval = new FstClass(*(fst =ofst)); + } + delete sfst; + delete l64fst; + delete lfst; + delete fst; +} + + +#ifdef SWIG +%newobject Map; +#endif +FstClass *Map(const FstClass& f, MapType map_type, + float delta = fst::kDelta, + const WeightClass &w = fst::script::WeightClass::Zero()); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_MAP_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/minimize.h b/kaldi_io/src/tools/openfst/include/fst/script/minimize.h new file mode 100644 index 0000000..f250d03 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/minimize.h @@ -0,0 +1,45 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_MINIMIZE_H_ +#define FST_SCRIPT_MINIMIZE_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/minimize.h> + +namespace fst { +namespace script { + +typedef args::Package<MutableFstClass*, MutableFstClass*, float> MinimizeArgs; + +template<class Arc> +void Minimize(MinimizeArgs *args) { + MutableFst<Arc> *ofst1 = args->arg1->GetMutableFst<Arc>(); + MutableFst<Arc> *ofst2 = args->arg2 ? args->arg2->GetMutableFst<Arc>() : 0; + + Minimize(ofst1, ofst2, args->arg3); +} + +void Minimize(MutableFstClass *ofst1, MutableFstClass *ofst2 = 0, + float delta = kDelta); + +} // namespace script +} // namespace fst + + + +#endif // FST_SCRIPT_MINIMIZE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/print-impl.h b/kaldi_io/src/tools/openfst/include/fst/script/print-impl.h new file mode 100644 index 0000000..1433a29 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/print-impl.h @@ -0,0 +1,149 @@ +// print.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Stand-alone class to print out binary FSTs in the AT&T format, +// helper class for fstprint.cc + +#ifndef FST_SCRIPT_PRINT_IMPL_H_ +#define FST_SCRIPT_PRINT_IMPL_H_ + +#include <sstream> +#include <string> + +#include <fst/fst.h> +#include <fst/util.h> + +DECLARE_string(fst_field_separator); + +namespace fst { + +// Print a binary Fst in textual format, helper class for fstprint.cc +// WARNING: Stand-alone use of this class not recommended, most code should +// read/write using the binary format which is much more efficient. +template <class A> class FstPrinter { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + + FstPrinter(const Fst<A> &fst, + const SymbolTable *isyms, + const SymbolTable *osyms, + const SymbolTable *ssyms, + bool accep, + bool show_weight_one) + : fst_(fst), isyms_(isyms), osyms_(osyms), ssyms_(ssyms), + accep_(accep && fst.Properties(kAcceptor, true)), ostrm_(0), + show_weight_one_(show_weight_one) {} + + // Print Fst to an output stream + void Print(ostream *ostrm, const string &dest) { + ostrm_ = ostrm; + dest_ = dest; + StateId start = fst_.Start(); + if (start == kNoStateId) + return; + // initial state first + PrintState(start); + for (StateIterator< Fst<A> > siter(fst_); + !siter.Done(); + siter.Next()) { + StateId s = siter.Value(); + if (s != start) + PrintState(s); + } + } + + private: + // Maximum line length in text file. + static const int kLineLen = 8096; + + void PrintId(int64 id, const SymbolTable *syms, + const char *name) const { + if (syms) { + string symbol = syms->Find(id); + if (symbol == "") { + FSTERROR() << "FstPrinter: Integer " << id + << " is not mapped to any textual symbol" + << ", symbol table = " << syms->Name() + << ", destination = " << dest_; + symbol = "?"; + } + *ostrm_ << symbol; + } else { + *ostrm_ << id; + } + } + + void PrintStateId(StateId s) const { + PrintId(s, ssyms_, "state ID"); + } + + void PrintILabel(Label l) const { + PrintId(l, isyms_, "arc input label"); + } + + void PrintOLabel(Label l) const { + PrintId(l, osyms_, "arc output label"); + } + + void PrintState(StateId s) const { + bool output = false; + for (ArcIterator< Fst<A> > aiter(fst_, s); + !aiter.Done(); + aiter.Next()) { + Arc arc = aiter.Value(); + PrintStateId(s); + *ostrm_ << FLAGS_fst_field_separator[0]; + PrintStateId(arc.nextstate); + *ostrm_ << FLAGS_fst_field_separator[0]; + PrintILabel(arc.ilabel); + if (!accep_) { + *ostrm_ << FLAGS_fst_field_separator[0]; + PrintOLabel(arc.olabel); + } + if (show_weight_one_ || arc.weight != Weight::One()) + *ostrm_ << FLAGS_fst_field_separator[0] << arc.weight; + *ostrm_ << "\n"; + output = true; + } + Weight final = fst_.Final(s); + if (final != Weight::Zero() || !output) { + PrintStateId(s); + if (show_weight_one_ || final != Weight::One()) { + *ostrm_ << FLAGS_fst_field_separator[0] << final; + } + *ostrm_ << "\n"; + } + } + + const Fst<A> &fst_; + const SymbolTable *isyms_; // ilabel symbol table + const SymbolTable *osyms_; // olabel symbol table + const SymbolTable *ssyms_; // slabel symbol table + bool accep_; // print as acceptor when possible + ostream *ostrm_; // text FST destination + string dest_; // text FST destination name + bool show_weight_one_; // print weights equal to Weight::One() + DISALLOW_COPY_AND_ASSIGN(FstPrinter); +}; + +} // namespace fst + +#endif // FST_SCRIPT_PRINT_IMPL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/print.h b/kaldi_io/src/tools/openfst/include/fst/script/print.h new file mode 100644 index 0000000..f82b19b --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/print.h @@ -0,0 +1,86 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_PRINT_H_ +#define FST_SCRIPT_PRINT_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/script/print-impl.h> + +namespace fst { +namespace script { + +// Note: it is safe to pass these strings as references because +// this struct is only used to pass them deeper in the call graph. +// Be sure you understand why this is so before using this struct +// for anything else! +struct FstPrinterArgs { + const FstClass &fst; + const SymbolTable *isyms; + const SymbolTable *osyms; + const SymbolTable *ssyms; + const bool accept; + const bool show_weight_one; + ostream *ostrm; + const string &dest; + + FstPrinterArgs(const FstClass &fst, + const SymbolTable *isyms, + const SymbolTable *osyms, + const SymbolTable *ssyms, + bool accept, + bool show_weight_one, + ostream *ostrm, + const string &dest) : + fst(fst), isyms(isyms), osyms(osyms), ssyms(ssyms), accept(accept), + show_weight_one(show_weight_one), ostrm(ostrm), dest(dest) { } +}; + +template<class Arc> +void PrintFst(FstPrinterArgs *args) { + const Fst<Arc> &fst = *(args->fst.GetFst<Arc>()); + + fst::FstPrinter<Arc> fstprinter(fst, args->isyms, args->osyms, + args->ssyms, args->accept, + args->show_weight_one); + fstprinter.Print(args->ostrm, args->dest); +} + +void PrintFst(const FstClass &fst, ostream &ostrm, const string &dest, + const SymbolTable *isyms, + const SymbolTable *osyms, + const SymbolTable *ssyms, + bool accept, bool show_weight_one); + + +// Below are two printing methods with useful defaults for a few of +// the fst printer arguments. +template <class Arc> +void PrintFst(const Fst<Arc> &fst, ostream &os, const string dest = "", + const SymbolTable *isyms = NULL, + const SymbolTable *osyms = NULL, + const SymbolTable *ssyms = NULL) { + fst::FstPrinter<Arc> fstprinter(fst, isyms, osyms, ssyms, true, true); + fstprinter.Print(&os, dest); +} + +} // namespace script +} // namespace fst + + + +#endif // FST_SCRIPT_PRINT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/project.h b/kaldi_io/src/tools/openfst/include/fst/script/project.h new file mode 100644 index 0000000..12ee890 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/project.h @@ -0,0 +1,43 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_PROJECT_H_ +#define FST_SCRIPT_PROJECT_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/project.h> // for ProjectType + +namespace fst { +namespace script { + +typedef args::Package<MutableFstClass*, ProjectType> ProjectArgs; + +template<class Arc> +void Project(ProjectArgs *args) { + MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>(); + + Project(ofst, args->arg2); +} + +void Project(MutableFstClass *ofst, ProjectType project_type); + +} // namespace script +} // namespace fst + + + +#endif // FST_SCRIPT_PROJECT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/prune.h b/kaldi_io/src/tools/openfst/include/fst/script/prune.h new file mode 100644 index 0000000..7118ff1 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/prune.h @@ -0,0 +1,153 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_PRUNE_H_ +#define FST_SCRIPT_PRUNE_H_ + +#include <vector> +using std::vector; + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/script/weight-class.h> +#include <fst/prune.h> +#include <fst/arcfilter.h> + +namespace fst { +namespace script { + +struct PruneOptions { + WeightClass weight_threshold; + int64 state_threshold; + const vector<WeightClass> *distance; + float delta; + + explicit PruneOptions(const WeightClass& w, int64 s, + vector<WeightClass> *d = 0, float e = kDelta) + : weight_threshold(w), + state_threshold(s), + distance(d), + delta(e) {} + private: + PruneOptions(); // disallow +}; + +// converts a script::PruneOptions into a fst::PruneOptions. +// Notes: +// If the original opts.distance is not NULL, a new distance will be +// created with new; it's the client's responsibility to delete this. + +template<class A> +fst::PruneOptions<A, AnyArcFilter<A> > ConvertPruneOptions( + const PruneOptions &opts) { + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + + Weight weight_threshold = *(opts.weight_threshold.GetWeight<Weight>()); + StateId state_threshold = opts.state_threshold; + vector<Weight> *distance = 0; + + if (opts.distance) { + distance = new vector<Weight>(opts.distance->size()); + for (unsigned i = 0; i < opts.distance->size(); ++i) { + (*distance)[i] = *((*opts.distance)[i].GetWeight<Weight>()); + } + } + + return fst::PruneOptions<A, AnyArcFilter<A> >( + weight_threshold, state_threshold, AnyArcFilter<A>(), distance, + opts.delta); +} + +// 1 +typedef args::Package<MutableFstClass *, const PruneOptions &> PruneArgs1; + +template<class Arc> +void Prune(PruneArgs1 *args) { + MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>(); + + typedef typename Arc::Weight Weight; + typedef typename Arc::StateId StateId; + + fst::PruneOptions<Arc, AnyArcFilter<Arc> > opts = + ConvertPruneOptions<Arc>(args->arg2); + Prune(ofst, opts); + delete opts.distance; +} + +// 2 +typedef args::Package<const FstClass &, MutableFstClass *, + const PruneOptions &> PruneArgs2; + +template<class Arc> +void Prune(PruneArgs2 *args) { + const Fst<Arc>& ifst = *(args->arg1.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); + + fst::PruneOptions<Arc, AnyArcFilter<Arc> > opts = + ConvertPruneOptions<Arc>(args->arg3); + Prune(ifst, ofst, opts); + delete opts.distance; +} + +// 3 +typedef args::Package<const FstClass &, + MutableFstClass *, + const WeightClass &, int64, float> PruneArgs3; + +template<class Arc> +void Prune(PruneArgs3 *args) { + const Fst<Arc>& ifst = *(args->arg1.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); + typename Arc::Weight w = *(args->arg3.GetWeight<typename Arc::Weight>()); + + Prune(ifst, ofst, w, args->arg4, args->arg5); +} + +// 4 +typedef args::Package<MutableFstClass *, const WeightClass&, + int64, float> PruneArgs4; +template<class Arc> +void Prune(PruneArgs4 *args) { + MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>(); + typename Arc::Weight w = *(args->arg2.GetWeight<typename Arc::Weight>()); + Prune(fst, w, args->arg3, args->arg4); +} + + +// 1 +void Prune(MutableFstClass *fst, const PruneOptions &opts); + +// 2 +void Prune(const FstClass &ifst, MutableFstClass *fst, + const PruneOptions &opts); + +// 3 +void Prune(const FstClass &ifst, MutableFstClass *ofst, + const WeightClass &weight_threshold, + int64 state_threshold = kNoStateId, + float delta = kDelta); + +// 4 +void Prune(MutableFstClass *fst, const WeightClass& weight_threshold, + int64 state_threshold, float delta); + +} // namespace script +} // namespace fst + + + +#endif // FST_SCRIPT_PRUNE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/push.h b/kaldi_io/src/tools/openfst/include/fst/script/push.h new file mode 100644 index 0000000..cebd655 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/push.h @@ -0,0 +1,70 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_PUSH_H_ +#define FST_SCRIPT_PUSH_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/push.h> + +namespace fst { +namespace script { + +// 1 +typedef args::Package<MutableFstClass*, ReweightType, float, bool> PushArgs1; + +template<class Arc> +void Push(PushArgs1 *args) { + MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>(); + + if (args->arg2 == REWEIGHT_TO_FINAL) { + fst::Push(ofst, REWEIGHT_TO_FINAL, args->arg3, args->arg4); + } else { + fst::Push(ofst, REWEIGHT_TO_INITIAL, args->arg3, args->arg4); + } +} + +// 2 +typedef args::Package<const FstClass &, MutableFstClass *, uint32, + ReweightType, float> PushArgs2; + +template<class Arc> +void Push(PushArgs2 *args) { + const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); + + if (args->arg4 == REWEIGHT_TO_FINAL) { + fst::Push<Arc, REWEIGHT_TO_FINAL>(ifst, ofst, args->arg3, args->arg5); + } else { + fst::Push<Arc, REWEIGHT_TO_INITIAL>(ifst, ofst, args->arg3, args->arg5); + } +} + +// 1 +void Push(MutableFstClass *ofst, ReweightType type, float delta = kDelta, + bool remove_total_weight = false); + +// 2 +void Push(const FstClass &ifst, MutableFstClass *ofst, uint32 flags, + ReweightType dir, float delta); + +} // namespace script +} // namespace fst + + + +#endif // FST_SCRIPT_PUSH_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/randequivalent.h b/kaldi_io/src/tools/openfst/include/fst/script/randequivalent.h new file mode 100644 index 0000000..b929683 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/randequivalent.h @@ -0,0 +1,105 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_RANDEQUIVALENT_H_ +#define FST_SCRIPT_RANDEQUIVALENT_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/script/randgen.h> // for RandArcSelection +#include <fst/randequivalent.h> + +namespace fst { +namespace script { + +// 1 +typedef args::Package<const FstClass&, const FstClass&, + int32, float, int, int> RandEquivalentInnerArgs1; +typedef args::WithReturnValue<bool, + RandEquivalentInnerArgs1> RandEquivalentArgs1; + +template<class Arc> +void RandEquivalent(RandEquivalentArgs1 *args) { + const Fst<Arc> &fst1 = *(args->args.arg1.GetFst<Arc>()); + const Fst<Arc> &fst2 = *(args->args.arg2.GetFst<Arc>()); + + args->retval = RandEquivalent(fst1, fst2, args->args.arg3, args->args.arg4, + args->args.arg5, args->args.arg6); +} + +// 2 +typedef args::Package<const FstClass &, const FstClass &, int32, + ssize_t, float, + const RandGenOptions<RandArcSelection> &> + RandEquivalentInnerArgs2; + +typedef args::WithReturnValue<bool, + RandEquivalentInnerArgs2> RandEquivalentArgs2; + +template<class Arc> +void RandEquivalent(RandEquivalentArgs2 *args) { + const Fst<Arc> &fst1 = *(args->args.arg1.GetFst<Arc>()); + const Fst<Arc> &fst2 = *(args->args.arg2.GetFst<Arc>()); + const RandGenOptions<RandArcSelection> &opts = args->args.arg6; + int32 seed = args->args.arg3; + + if (opts.arc_selector == UNIFORM_ARC_SELECTOR) { + UniformArcSelector<Arc> arc_selector(seed); + RandGenOptions< UniformArcSelector<Arc> > + ropts(arc_selector, opts.max_length, opts.npath); + + args->retval = RandEquivalent(fst1, fst2, args->args.arg4, + args->args.arg5, ropts); + } else if (opts.arc_selector == FAST_LOG_PROB_ARC_SELECTOR) { + FastLogProbArcSelector<Arc> arc_selector(seed); + RandGenOptions< FastLogProbArcSelector<Arc> > + ropts(arc_selector, opts.max_length, opts.npath); + + args->retval = RandEquivalent(fst1, fst2, args->args.arg4, + args->args.arg5, ropts); + } else { + LogProbArcSelector<Arc> arc_selector(seed); + RandGenOptions< LogProbArcSelector<Arc> > + ropts(arc_selector, opts.max_length, opts.npath); + args->retval = RandEquivalent(fst1, fst2, args->args.arg4, + args->args.arg5, ropts); + } +} + + +// 1 +bool RandEquivalent(const FstClass &fst1, + const FstClass &fst2, + int32 seed = time(0), + ssize_t num_paths = 1, + float delta = fst::kDelta, + int path_length = INT_MAX); + +// 2 +bool RandEquivalent(const FstClass &fst1, + const FstClass &fst2, + int32 seed, + ssize_t num_paths, + float delta, + const fst::RandGenOptions< + fst::script::RandArcSelection> &opts); + +} // namespace script +} // namespace fst + + + +#endif // FST_SCRIPT_RANDEQUIVALENT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/randgen.h b/kaldi_io/src/tools/openfst/include/fst/script/randgen.h new file mode 100644 index 0000000..817f9c1 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/randgen.h @@ -0,0 +1,76 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_RANDGEN_H_ +#define FST_SCRIPT_RANDGEN_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/randgen.h> + +namespace fst { +namespace script { + +enum RandArcSelection { + UNIFORM_ARC_SELECTOR, + LOG_PROB_ARC_SELECTOR, + FAST_LOG_PROB_ARC_SELECTOR +}; + +typedef args::Package<const FstClass &, MutableFstClass*, int32, + const RandGenOptions<RandArcSelection> &> RandGenArgs; + +template<class Arc> +void RandGen(RandGenArgs *args) { + const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); + int32 seed = args->arg3; + const RandGenOptions<RandArcSelection> &opts = args->arg4; + + if (opts.arc_selector == UNIFORM_ARC_SELECTOR) { + UniformArcSelector<Arc> arc_selector(seed); + RandGenOptions< UniformArcSelector<Arc> > + ropts(arc_selector, opts.max_length, + opts.npath, opts.weighted); + RandGen(ifst, ofst, ropts); + } else if (opts.arc_selector == FAST_LOG_PROB_ARC_SELECTOR) { + FastLogProbArcSelector<Arc> arc_selector(seed); + RandGenOptions< FastLogProbArcSelector<Arc> > + ropts(arc_selector, opts.max_length, + opts.npath, opts.weighted); + RandGen(ifst, ofst, ropts); + } else { + LogProbArcSelector<Arc> arc_selector(seed); + RandGenOptions< LogProbArcSelector<Arc> > + ropts(arc_selector, opts.max_length, + opts.npath, opts.weighted); + RandGen(ifst, ofst, ropts); + } +} + + +// Client-facing prototype +void RandGen(const FstClass &ifst, MutableFstClass *ofst, int32 seed = time(0), + const RandGenOptions<RandArcSelection> &opts = + fst::RandGenOptions<fst::script::RandArcSelection>( + fst::script::UNIFORM_ARC_SELECTOR)); + +} // namespace script +} // namespace fst + + + +#endif // FST_SCRIPT_RANDGEN_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/register.h b/kaldi_io/src/tools/openfst/include/fst/script/register.h new file mode 100644 index 0000000..03e0e36 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/register.h @@ -0,0 +1,120 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_REGISTER_H_ +#define FST_SCRIPT_REGISTER_H_ + +#include <string> + +#include <fst/generic-register.h> +#include <fst/script/fst-class.h> +#include <fst/script/weight-class.h> + +// Holds methods and classes responsible for maintaining +// the register for FstClass arc types. + +namespace fst { +namespace script { + +// +// Registers for reading and converting various kinds of FST classes. +// + +// This class definition is to avoid a nested class definition inside +// the IORegistration struct. +template<class Reader, class Creator, class Converter> +struct FstClassRegEntry { + Reader reader; + Creator creator; + Converter converter; + + FstClassRegEntry(Reader r, Creator cr, Converter co) : + reader(r), creator(cr), converter(co) { } + FstClassRegEntry() : reader(0), creator(0), converter(0) { } +}; + +template<class Reader, class Creator, class Converter> +class FstClassIORegister + : public GenericRegister<string, + FstClassRegEntry<Reader, Creator, Converter>, + FstClassIORegister<Reader, Creator, + Converter> > { + public: + Reader GetReader(const string &arc_type) const { + return this->GetEntry(arc_type).reader; + } + + Creator GetCreator(const string &arc_type) const { + return this->GetEntry(arc_type).creator; + } + + Converter GetConverter(const string &arc_type) const { + return this->GetEntry(arc_type).converter; + } + + protected: + virtual string ConvertKeyToSoFilename( + const string& key) const { + string legal_type(key); + ConvertToLegalCSymbol(&legal_type); + + return legal_type + "-arc.so"; + } +}; + +// +// Struct containing everything needed to register a particular type +// of FST class (e.g. a plain FstClass, or a MutableFstClass, etc) +// +template<class FstClassType> +struct IORegistration { + typedef FstClassType *(*Reader)(istream &stream, + const FstReadOptions &opts); + + typedef FstClassImplBase *(*Creator)(); + typedef FstClassImplBase *(*Converter)(const FstClass &other); + + typedef FstClassRegEntry<Reader, Creator, Converter> Entry; + + // FST class Register + typedef FstClassIORegister<Reader, Creator, Converter> Register; + + // FST class Register-er + typedef GenericRegisterer<FstClassIORegister<Reader, Creator, Converter> > + Registerer; +}; + + +// +// REGISTRATION MACROS +// + +#define REGISTER_FST_CLASS(Class, Arc) \ + static IORegistration<Class>::Registerer Class ## _ ## Arc ## _registerer( \ + Arc::Type(), \ + IORegistration<Class>::Entry(Class::Read<Arc>, \ + Class::Create<Arc>, \ + Class::Convert<Arc>)) + +#define REGISTER_FST_CLASSES(Arc) \ + REGISTER_FST_CLASS(FstClass, Arc); \ + REGISTER_FST_CLASS(MutableFstClass, Arc); \ + REGISTER_FST_CLASS(VectorFstClass, Arc); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_REGISTER_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/relabel.h b/kaldi_io/src/tools/openfst/include/fst/script/relabel.h new file mode 100644 index 0000000..6bbb4c5 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/relabel.h @@ -0,0 +1,102 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_RELABEL_H_ +#define FST_SCRIPT_RELABEL_H_ + +#include <utility> +using std::pair; using std::make_pair; +#include <algorithm> +#include <vector> +using std::vector; + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/relabel.h> + +namespace fst { +namespace script { + +// 1 +typedef args::Package<MutableFstClass *, + const SymbolTable *, const SymbolTable *, bool, + const SymbolTable *, const SymbolTable *, + bool> RelabelArgs1; + +template<class Arc> +void Relabel(RelabelArgs1 *args) { + MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>(); + + Relabel(ofst, args->arg2, args->arg3, args->arg4, + args->arg5, args->arg6, args->arg7); +} + +// 2 +typedef args::Package<MutableFstClass*, + const vector<pair<int64, int64> > &, + const vector<pair<int64, int64> > > RelabelArgs2; + +template<class Arc> +void Relabel(RelabelArgs2 *args) { + MutableFst<Arc> *ofst = args->arg1->GetMutableFst<Arc>(); + + // In case int64 is not the same as Arc::Label, + // copy the reassignments + typedef typename Arc::Label Label; + + vector<pair<Label, Label> > converted_ipairs(args->arg2.size()); + copy(args->arg2.begin(), args->arg2.end(), converted_ipairs.begin()); + + vector<pair<Label, Label> > converted_opairs(args->arg3.size()); + copy(args->arg3.begin(), args->arg3.end(), converted_opairs.begin()); + + Relabel(ofst, converted_ipairs, converted_opairs); +} + +// 3 +typedef args::Package<MutableFstClass*, const SymbolTable*, + const SymbolTable*> RelabelArgs3; +template<class Arc> +void Relabel(args::Package<MutableFstClass*, const SymbolTable*, + const SymbolTable*> *args) { + MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>(); + Relabel(fst, args->arg2, args->arg3); +} + + +// 1 +void Relabel(MutableFstClass *ofst, + const SymbolTable *old_isyms, const SymbolTable *relabel_isyms, + bool attach_new_isyms, + const SymbolTable *old_osyms, const SymbolTable *relabel_osyms, + bool attch_new_osyms); + +// 2 +void Relabel(MutableFstClass *ofst, + const vector<pair<int64, int64> > &ipairs, + const vector<pair<int64, int64> > &opairs); + + +// 3 +void Relabel(MutableFstClass *fst, + const SymbolTable *new_isymbols, + const SymbolTable *new_osymbols); + + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_RELABEL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/replace.h b/kaldi_io/src/tools/openfst/include/fst/script/replace.h new file mode 100644 index 0000000..5eaf5bf --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/replace.h @@ -0,0 +1,62 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_REPLACE_H_ +#define FST_SCRIPT_REPLACE_H_ + +#include <utility> +using std::pair; using std::make_pair; +#include <vector> +using std::vector; + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/replace.h> + +namespace fst { +namespace script { + +typedef args::Package<const vector<pair<int64, const FstClass *> > &, + MutableFstClass *, const int64, bool> ReplaceArgs; + +template<class Arc> +void Replace(ReplaceArgs *args) { + // Now that we know the arc type, we construct a vector of + // pair<real label, real fst> that the real Replace will use + const vector<pair<int64, const FstClass *> >& untyped_tuples = + args->arg1; + + vector<pair<typename Arc::Label, const Fst<Arc> *> > fst_tuples( + untyped_tuples.size()); + + for (unsigned i = 0; i < untyped_tuples.size(); ++i) { + fst_tuples[i].first = untyped_tuples[i].first; // convert label + fst_tuples[i].second = untyped_tuples[i].second->GetFst<Arc>(); + } + + MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); + + Replace(fst_tuples, ofst, args->arg3, args->arg4); +} + +void Replace(const vector<pair<int64, const FstClass *> > &tuples, + MutableFstClass *ofst, const int64 &root, + bool epsilon_on_replace = false); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_REPLACE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/reverse.h b/kaldi_io/src/tools/openfst/include/fst/script/reverse.h new file mode 100644 index 0000000..3930875 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/reverse.h @@ -0,0 +1,42 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_REVERSE_H_ +#define FST_SCRIPT_REVERSE_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/reverse.h> + +namespace fst { +namespace script { + +typedef args::Package<const FstClass &, MutableFstClass *> ReverseArgs; + +template<class Arc> +void Reverse(ReverseArgs *args) { + const Fst<Arc> &fst1 = *(args->arg1.GetFst<Arc>()); + MutableFst<Arc> *fst2 = args->arg2->GetMutableFst<Arc>(); + + Reverse(fst1, fst2); +} + +void Reverse(const FstClass &fst1, MutableFstClass *fst2); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_REVERSE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/reweight.h b/kaldi_io/src/tools/openfst/include/fst/script/reweight.h new file mode 100644 index 0000000..7bce839 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/reweight.h @@ -0,0 +1,53 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_REWEIGHT_H_ +#define FST_SCRIPT_REWEIGHT_H_ + +#include <vector> +using std::vector; + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/script/weight-class.h> +#include <fst/reweight.h> + +namespace fst { +namespace script { + +typedef args::Package<MutableFstClass *, const vector<WeightClass> &, + ReweightType> ReweightArgs; + +template<class Arc> +void Reweight(ReweightArgs *args) { + MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>(); + typedef typename Arc::Weight Weight; + vector<Weight> potentials(args->arg2.size()); + + for (unsigned i = 0; i < args->arg2.size(); ++i) { + potentials[i] = *(args->arg2[i].GetWeight<Weight>()); + } + + Reweight(fst, potentials, args->arg3); +} + +void Reweight(MutableFstClass *fst, const vector<WeightClass> &potential, + ReweightType reweight_type); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_REWEIGHT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/rmepsilon.h b/kaldi_io/src/tools/openfst/include/fst/script/rmepsilon.h new file mode 100644 index 0000000..62fed03 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/rmepsilon.h @@ -0,0 +1,211 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_RMEPSILON_H_ +#define FST_SCRIPT_RMEPSILON_H_ + +#include <vector> +using std::vector; + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/script/weight-class.h> +#include <fst/script/shortest-distance.h> // for ShortestDistanceOptions +#include <fst/rmepsilon.h> +#include <fst/queue.h> + +// the following is necessary, or SWIG complains mightily about +// shortestdistanceoptions not being defined before being used as a base. +#ifdef SWIG +%include "nlp/fst/script/shortest-distance.h" +#endif + + +namespace fst { +namespace script { + +// +// OPTIONS +// + +struct RmEpsilonOptions : public fst::script::ShortestDistanceOptions { + bool connect; + WeightClass weight_threshold; + int64 state_threshold; + + RmEpsilonOptions(QueueType qt = AUTO_QUEUE, float d = kDelta, bool c = true, + WeightClass w = fst::script::WeightClass::Zero(), + int64 n = kNoStateId) + : ShortestDistanceOptions(qt, EPSILON_ARC_FILTER, + kNoStateId, d), + connect(c), weight_threshold(w), state_threshold(n) { } +}; + + +// +// TEMPLATES +// + +// this function takes care of transforming a script-land RmEpsilonOptions +// into a lib-land RmEpsilonOptions +template<class Arc> +void RmEpsilonHelper(MutableFst<Arc> *fst, + vector<typename Arc::Weight> *distance, + const RmEpsilonOptions &opts) { + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + + typename Arc::Weight weight_thresh = + *(opts.weight_threshold.GetWeight<Weight>()); + + switch (opts.queue_type) { + case AUTO_QUEUE: { + AutoQueue<StateId> queue(*fst, distance, EpsilonArcFilter<Arc>()); + fst::RmEpsilonOptions<Arc, AutoQueue<StateId> > ropts( + &queue, opts.delta, opts.connect, weight_thresh, + opts.state_threshold); + RmEpsilon(fst, distance, ropts); + break; + } + case FIFO_QUEUE: { + FifoQueue<StateId> queue; + fst::RmEpsilonOptions<Arc, FifoQueue<StateId> > ropts( + &queue, opts.delta, opts.connect, weight_thresh, + opts.state_threshold); + RmEpsilon(fst, distance, ropts); + break; + } + case LIFO_QUEUE: { + LifoQueue<StateId> queue; + fst::RmEpsilonOptions<Arc, LifoQueue<StateId> > ropts( + &queue, opts.delta, opts.connect, weight_thresh, + opts.state_threshold); + RmEpsilon(fst, distance, ropts); + break; + } + case SHORTEST_FIRST_QUEUE: { + NaturalShortestFirstQueue<StateId, Weight> queue(*distance); + fst::RmEpsilonOptions<Arc, NaturalShortestFirstQueue<StateId, + Weight> > ropts( + &queue, opts.delta, opts.connect, weight_thresh, + opts.state_threshold); + RmEpsilon(fst, distance, ropts); + break; + } + case STATE_ORDER_QUEUE: { + StateOrderQueue<StateId> queue; + fst::RmEpsilonOptions<Arc, StateOrderQueue<StateId> > ropts( + &queue, opts.delta, opts.connect, weight_thresh, + opts.state_threshold); + RmEpsilon(fst, distance, ropts); + break; + } + case TOP_ORDER_QUEUE: { + TopOrderQueue<StateId> queue(*fst, EpsilonArcFilter<Arc>()); + fst::RmEpsilonOptions<Arc, TopOrderQueue<StateId> > ropts( + &queue, opts.delta, opts.connect, weight_thresh, + opts.state_threshold); + RmEpsilon(fst, distance, ropts); + break; + } + default: + FSTERROR() << "Unknown or unsupported queue type: " << opts.queue_type; + fst->SetProperties(kError, kError); + } +} + +// 1 +typedef args::Package<const FstClass &, MutableFstClass *, + bool, const RmEpsilonOptions &> RmEpsilonArgs1; + +template<class Arc> +void RmEpsilon(RmEpsilonArgs1 *args) { + const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); + vector<typename Arc::Weight> distance; + bool reverse = args->arg3; + + if (reverse) { + VectorFst<Arc> rfst; + Reverse(ifst, &rfst); + RmEpsilonHelper(&rfst, &distance, args->arg4); + Reverse(rfst, ofst); + } else { + *ofst = ifst; + } + RmEpsilonHelper(ofst, &distance, args->arg4); +} + +// 2 +typedef args::Package<MutableFstClass *, bool, + const WeightClass, int64, + float> RmEpsilonArgs2; + +template<class Arc> +void RmEpsilon(RmEpsilonArgs2 *args) { + MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>(); + typename Arc::Weight w = *(args->arg3.GetWeight<typename Arc::Weight>()); + + RmEpsilon(fst, args->arg2, w, args->arg4, args->arg5); +} + +// 3 +typedef args::Package<MutableFstClass *, vector<WeightClass> *, + const RmEpsilonOptions &> RmEpsilonArgs3; + +template<class Arc> +void RmEpsilon(RmEpsilonArgs3 *args) { + MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>(); + const RmEpsilonOptions &opts = args->arg3; + + vector<typename Arc::Weight> weights; + + RmEpsilonHelper(fst, &weights, opts); + + // Copy the weights back + args->arg2->resize(weights.size()); + for (unsigned i = 0; i < weights.size(); ++i) { + (*args->arg2)[i] = WeightClass(weights[i]); + } +} + +// +// PROTOTYPES +// + +// 1 +void RmEpsilon(const FstClass &ifst, MutableFstClass *ofst, + bool reverse = false, + const RmEpsilonOptions& opts = + fst::script::RmEpsilonOptions()); + +// 2 +void RmEpsilon(MutableFstClass *arc, bool connect = true, + const WeightClass &weight_threshold = + fst::script::WeightClass::Zero(), + int64 state_threshold = fst::kNoStateId, + float delta = fst::kDelta); + +// 3 +void RmEpsilon(MutableFstClass *fst, vector<WeightClass> *distance, + const RmEpsilonOptions &opts); + + +} // namespace script +} // namespace fst + + +#endif // FST_SCRIPT_RMEPSILON_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/script-impl.h b/kaldi_io/src/tools/openfst/include/fst/script/script-impl.h new file mode 100644 index 0000000..452c7c5 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/script-impl.h @@ -0,0 +1,206 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +// This file defines the registration mechanism for new operations. +// These operations are designed to enable scripts to work with FST classes +// at a high level. + +// If you have a new arc type and want these operations to work with FSTs +// with that arc type, see below for the registration steps +// you must take. + +// These methods are only recommended for use in high-level scripting +// applications. Most users should use the lower-level templated versions +// corresponding to these. + +// If you have a new arc type you'd like these operations to work with, +// use the REGISTER_FST_OPERATIONS macro defined in fstcsript.h + +// If you have a custom operation you'd like to define, you need four +// components. In the following, assume you want to create a new operation +// with the signature +// +// void Foo(const FstClass &ifst, MutableFstClass *ofst); +// +// You need: +// +// 1) A way to bundle the args that your new Foo operation will take, as +// a single struct. The template structs in arg-packs.h provide a handy +// way to do this. In Foo's case, that might look like this: +// +// typedef args::Package<const FstClass &, +// MutableFstClass *> FooArgs; +// +// Note: this package of args is going to be passed by non-const pointer. +// +// 2) A function template that is able to perform Foo, given the args and +// arc type. Yours might look like this: +// +// template<class Arc> +// void Foo(FooArgs *args) { +// // Pull out the actual, arc-templated FSTs +// const Fst<Arc> &ifst = args->arg1.GetFst<Arc>(); +// MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); +// +// // actually perform foo on ifst and ofst... +// } +// +// 3) a client-facing function for your operation. This would look like +// the following: +// +// void Foo(const FstClass &ifst, MutableFstClass *ofst) { +// // Check that the arc types of the FSTs match +// if (!ArcTypesMatch(ifst, *ofst, "Foo")) return; +// // package the args +// FooArgs args(ifst, ofst); +// // Finally, call the operation +// Apply<Operation<FooArgs> >("Foo", ifst->ArcType(), &args); +// } +// +// The Apply<> function template takes care of the link between 2 and 3, +// provided you also have: +// +// 4) A registration for your new operation, on the arc types you care about. +// This can be provided easily by the REGISTER_FST_OPERATION macro in +// operations.h: +// +// REGISTER_FST_OPERATION(Foo, StdArc, FooArgs); +// REGISTER_FST_OPERATION(Foo, MyArc, FooArgs); +// // .. etc +// +// +// That's it! Now when you call Foo(const FstClass &, MutableFstClass *), +// it dispatches (in #3) via the Apply<> function to the correct +// instantiation of the template function in #2. +// + + +#ifndef FST_SCRIPT_SCRIPT_IMPL_H_ +#define FST_SCRIPT_SCRIPT_IMPL_H_ + +// +// This file contains general-purpose templates which are used in the +// implementation of the operations. +// + +#include <utility> +using std::pair; using std::make_pair; +#include <string> + +#include <fst/script/fst-class.h> +#include <fst/generic-register.h> +#include <fst/script/arg-packs.h> + +#include <fst/types.h> + +namespace fst { +namespace script { + +// +// A generic register for operations with various kinds of signatures. +// Needed since every function signature requires a new registration class. +// The pair<string, string> is understood to be the operation name and arc +// type; subclasses (or typedefs) need only provide the operation signature. +// + +template<class OperationSignature> +class GenericOperationRegister + : public GenericRegister<pair<string, string>, + OperationSignature, + GenericOperationRegister<OperationSignature> > { + public: + void RegisterOperation(const string &operation_name, + const string &arc_type, + OperationSignature op) { + this->SetEntry(make_pair(operation_name, arc_type), op); + } + + OperationSignature GetOperation( + const string &operation_name, const string &arc_type) { + return this->GetEntry(make_pair(operation_name, arc_type)); + } + + protected: + virtual string ConvertKeyToSoFilename( + const pair<string, string>& key) const { + // Just use the old-style FST for now. + string legal_type(key.second); // the arc type + ConvertToLegalCSymbol(&legal_type); + + return legal_type + "-arc.so"; + } +}; + + +// Operation package - everything you need to register a new type of operation + +// The ArgPack should be the type that's passed into each wrapped function - +// for instance, it might be a struct containing all the args. +// It's always passed by pointer, so const members should be used to enforce +// constness where it's needed. Return values should be implemented as a +// member of ArgPack as well. + +template<class ArgPack> +struct Operation { + typedef ArgPack Args; + typedef void (*OpType)(ArgPack *args); + + // The register (hash) type + typedef GenericOperationRegister<OpType> Register; + + // The register-er type + typedef GenericRegisterer<Register> Registerer; +}; + + +// Macro for registering new types of operations. + +#define REGISTER_FST_OPERATION(Op, Arc, ArgPack) \ + static fst::script::Operation<ArgPack>::Registerer \ + arc_dispatched_operation_ ## ArgPack ## Op ## Arc ## _registerer( \ + make_pair(#Op, Arc::Type()), Op<Arc>) + + +// +// Template function to apply an operation by name +// + +template<class OpReg> +void Apply(const string &op_name, const string &arc_type, + typename OpReg::Args *args) { + typename OpReg::Register *reg = OpReg::Register::GetRegister(); + + typename OpReg::OpType op = reg->GetOperation(op_name, arc_type); + + if (op == 0) { + FSTERROR() << "No operation found for \"" << op_name << "\" on " + << "arc type " << arc_type; + return; + } + + op(args); +} + + +// Helper that logs to ERROR if the arc types of a and b don't match. +// The op_name is also printed. +bool ArcTypesMatch(const FstClass &a, const FstClass &b, + const string &op_name); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_SCRIPT_IMPL_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/shortest-distance.h b/kaldi_io/src/tools/openfst/include/fst/script/shortest-distance.h new file mode 100644 index 0000000..5fc2976 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/shortest-distance.h @@ -0,0 +1,250 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_SHORTEST_DISTANCE_H_ +#define FST_SCRIPT_SHORTEST_DISTANCE_H_ + +#include <vector> +using std::vector; + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/script/weight-class.h> +#include <fst/script/prune.h> // for ArcFilterType +#include <fst/queue.h> // for QueueType +#include <fst/shortest-distance.h> + +namespace fst { +namespace script { + +enum ArcFilterType { ANY_ARC_FILTER, EPSILON_ARC_FILTER, + INPUT_EPSILON_ARC_FILTER, OUTPUT_EPSILON_ARC_FILTER }; + +// See nlp/fst/lib/shortest-distance.h for the template options class +// that this one shadows +struct ShortestDistanceOptions { + const QueueType queue_type; + const ArcFilterType arc_filter_type; + const int64 source; + const float delta; + const bool first_path; + + ShortestDistanceOptions(QueueType qt, ArcFilterType aft, int64 s, + float d) + : queue_type(qt), arc_filter_type(aft), source(s), delta(d), + first_path(false) { } +}; + + + +// 1 +typedef args::Package<const FstClass &, vector<WeightClass> *, + const ShortestDistanceOptions &> ShortestDistanceArgs1; + +template<class Queue, class Arc, class ArcFilter> +struct QueueConstructor { + // template<class Arc, class ArcFilter> + static Queue *Construct(const Fst<Arc> &, + const vector<typename Arc::Weight> *) { + return new Queue(); + } +}; + +// Specializations to deal with AutoQueue, NaturalShortestFirstQueue, +// and TopOrderQueue's different constructors +template<class Arc, class ArcFilter> +struct QueueConstructor<AutoQueue<typename Arc::StateId>, Arc, ArcFilter> { + // template<class Arc, class ArcFilter> + static AutoQueue<typename Arc::StateId> *Construct( + const Fst<Arc> &fst, + const vector<typename Arc::Weight> *distance) { + return new AutoQueue<typename Arc::StateId>(fst, distance, ArcFilter()); + } +}; + +template<class Arc, class ArcFilter> +struct QueueConstructor<NaturalShortestFirstQueue<typename Arc::StateId, + typename Arc::Weight>, + Arc, ArcFilter> { + // template<class Arc, class ArcFilter> + static NaturalShortestFirstQueue<typename Arc::StateId, typename Arc::Weight> + *Construct(const Fst<Arc> &fst, + const vector<typename Arc::Weight> *distance) { + return new NaturalShortestFirstQueue<typename Arc::StateId, + typename Arc::Weight>(*distance); + } +}; + +template<class Arc, class ArcFilter> +struct QueueConstructor<TopOrderQueue<typename Arc::StateId>, Arc, ArcFilter> { + // template<class Arc, class ArcFilter> + static TopOrderQueue<typename Arc::StateId> *Construct( + const Fst<Arc> &fst, const vector<typename Arc::Weight> *weights) { + return new TopOrderQueue<typename Arc::StateId>(fst, ArcFilter()); + } +}; + + +template<class Arc, class Queue> +void ShortestDistanceHelper(ShortestDistanceArgs1 *args) { + const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>()); + const ShortestDistanceOptions &opts = args->arg3; + + vector<typename Arc::Weight> weights; + + switch (opts.arc_filter_type) { + case ANY_ARC_FILTER: { + Queue *queue = + QueueConstructor<Queue, Arc, AnyArcFilter<Arc> >::Construct( + fst, &weights); + fst::ShortestDistanceOptions<Arc, Queue, AnyArcFilter<Arc> > sdopts( + queue, AnyArcFilter<Arc>(), opts.source, opts.delta); + ShortestDistance(fst, &weights, sdopts); + delete queue; + break; + } + case EPSILON_ARC_FILTER: { + Queue *queue = + QueueConstructor<Queue, Arc, AnyArcFilter<Arc> >::Construct( + fst, &weights); + fst::ShortestDistanceOptions<Arc, Queue, + EpsilonArcFilter<Arc> > sdopts( + queue, EpsilonArcFilter<Arc>(), opts.source, opts.delta); + ShortestDistance(fst, &weights, sdopts); + delete queue; + break; + } + case INPUT_EPSILON_ARC_FILTER: { + Queue *queue = + QueueConstructor<Queue, Arc, InputEpsilonArcFilter<Arc> >::Construct( + fst, &weights); + fst::ShortestDistanceOptions<Arc, Queue, + InputEpsilonArcFilter<Arc> > sdopts( + queue, InputEpsilonArcFilter<Arc>(), opts.source, opts.delta); + ShortestDistance(fst, &weights, sdopts); + delete queue; + break; + } + case OUTPUT_EPSILON_ARC_FILTER: { + Queue *queue = + QueueConstructor<Queue, Arc, + OutputEpsilonArcFilter<Arc> >::Construct( + fst, &weights); + fst::ShortestDistanceOptions<Arc, Queue, + OutputEpsilonArcFilter<Arc> > sdopts( + queue, OutputEpsilonArcFilter<Arc>(), opts.source, opts.delta); + ShortestDistance(fst, &weights, sdopts); + delete queue; + break; + } + } + + // Copy the weights back + args->arg2->resize(weights.size()); + for (unsigned i = 0; i < weights.size(); ++i) { + (*args->arg2)[i] = WeightClass(weights[i]); + } +} + +template<class Arc> +void ShortestDistance(ShortestDistanceArgs1 *args) { + const ShortestDistanceOptions &opts = args->arg3; + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + + // Must consider (opts.queue_type x opts.filter_type) options + switch (opts.queue_type) { + default: + FSTERROR() << "Unknown queue type." << opts.queue_type; + + case AUTO_QUEUE: + ShortestDistanceHelper<Arc, AutoQueue<StateId> >(args); + return; + + case FIFO_QUEUE: + ShortestDistanceHelper<Arc, FifoQueue<StateId> >(args); + return; + + case LIFO_QUEUE: + ShortestDistanceHelper<Arc, LifoQueue<StateId> >(args); + return; + + case SHORTEST_FIRST_QUEUE: + ShortestDistanceHelper<Arc, + NaturalShortestFirstQueue<StateId, Weight> >(args); + return; + + case STATE_ORDER_QUEUE: + ShortestDistanceHelper<Arc, StateOrderQueue<StateId> >(args); + return; + + case TOP_ORDER_QUEUE: + ShortestDistanceHelper<Arc, TopOrderQueue<StateId> >(args); + return; + } +} + +// 2 +typedef args::Package<const FstClass&, vector<WeightClass>*, + bool, double> ShortestDistanceArgs2; + +template<class Arc> +void ShortestDistance(ShortestDistanceArgs2 *args) { + const Fst<Arc> &fst = *(args->arg1.GetFst<Arc>()); + vector<typename Arc::Weight> distance; + + ShortestDistance(fst, &distance, args->arg3, args->arg4); + + // convert the typed weights back into weightclass + vector<WeightClass> *retval = args->arg2; + retval->resize(distance.size()); + + for (unsigned i = 0; i < distance.size(); ++i) { + (*retval)[i] = WeightClass(distance[i]); + } +} + +// 3 +typedef args::WithReturnValue<WeightClass, + const FstClass &> ShortestDistanceArgs3; + +template<class Arc> +void ShortestDistance(ShortestDistanceArgs3 *args) { + const Fst<Arc> &fst = *(args->args.GetFst<Arc>()); + + args->retval = WeightClass(ShortestDistance(fst)); +} + + +// 1 +void ShortestDistance(const FstClass &fst, vector<WeightClass> *distance, + const ShortestDistanceOptions &opts); + +// 2 +void ShortestDistance(const FstClass &ifst, vector<WeightClass> *distance, + bool reverse = false, double delta = fst::kDelta); + +#ifndef SWIG +// 3 +WeightClass ShortestDistance(const FstClass &ifst); +#endif + +} // namespace script +} // namespace fst + + + +#endif // FST_SCRIPT_SHORTEST_DISTANCE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/shortest-path.h b/kaldi_io/src/tools/openfst/include/fst/script/shortest-path.h new file mode 100644 index 0000000..b3a3eb9 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/shortest-path.h @@ -0,0 +1,190 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_SHORTEST_PATH_H_ +#define FST_SCRIPT_SHORTEST_PATH_H_ + +#include <vector> +using std::vector; + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/script/weight-class.h> +#include <fst/shortest-path.h> +#include <fst/script/shortest-distance.h> // for ShortestDistanceOptions + +namespace fst { +namespace script { + +struct ShortestPathOptions + : public fst::script::ShortestDistanceOptions { + const size_t nshortest; + const bool unique; + const bool has_distance; + const bool first_path; + const WeightClass weight_threshold; + const int64 state_threshold; + + ShortestPathOptions(QueueType qt, size_t n = 1, + bool u = false, bool hasdist = false, + float d = fst::kDelta, bool fp = false, + WeightClass w = fst::script::WeightClass::Zero(), + int64 s = fst::kNoStateId) + : ShortestDistanceOptions(qt, ANY_ARC_FILTER, kNoStateId, d), + nshortest(n), unique(u), has_distance(hasdist), first_path(fp), + weight_threshold(w), state_threshold(s) { } +}; + +typedef args::Package<const FstClass &, MutableFstClass *, + vector<WeightClass> *, const ShortestPathOptions &> + ShortestPathArgs1; + + +template<class Arc> +void ShortestPath(ShortestPathArgs1 *args) { + const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); + const ShortestPathOptions &opts = args->arg4; + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + typedef AnyArcFilter<Arc> ArcFilter; + + vector<typename Arc::Weight> weights; + typename Arc::Weight weight_threshold = + *(opts.weight_threshold.GetWeight<Weight>()); + + switch (opts.queue_type) { + case AUTO_QUEUE: { + typedef AutoQueue<StateId> Queue; + Queue *queue = QueueConstructor<Queue, Arc, + ArcFilter>::Construct(ifst, &weights); + fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts( + queue, ArcFilter(), opts.nshortest, opts.unique, + opts.has_distance, opts.delta, opts.first_path, + weight_threshold, opts.state_threshold); + ShortestPath(ifst, ofst, &weights, spopts); + delete queue; + return; + } + case FIFO_QUEUE: { + typedef FifoQueue<StateId> Queue; + Queue *queue = QueueConstructor<Queue, Arc, + ArcFilter>::Construct(ifst, &weights); + fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts( + queue, ArcFilter(), opts.nshortest, opts.unique, + opts.has_distance, opts.delta, opts.first_path, + weight_threshold, opts.state_threshold); + ShortestPath(ifst, ofst, &weights, spopts); + delete queue; + return; + } + case LIFO_QUEUE: { + typedef LifoQueue<StateId> Queue; + Queue *queue = QueueConstructor<Queue, Arc, + ArcFilter >::Construct(ifst, &weights); + fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts( + queue, ArcFilter(), opts.nshortest, opts.unique, + opts.has_distance, opts.delta, opts.first_path, + weight_threshold, opts.state_threshold); + ShortestPath(ifst, ofst, &weights, spopts); + delete queue; + return; + } + case SHORTEST_FIRST_QUEUE: { + typedef NaturalShortestFirstQueue<StateId, Weight> Queue; + Queue *queue = QueueConstructor<Queue, Arc, + ArcFilter>::Construct(ifst, &weights); + fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts( + queue, ArcFilter(), opts.nshortest, opts.unique, + opts.has_distance, opts.delta, opts.first_path, + weight_threshold, opts.state_threshold); + ShortestPath(ifst, ofst, &weights, spopts); + delete queue; + return; + } + case STATE_ORDER_QUEUE: { + typedef StateOrderQueue<StateId> Queue; + Queue *queue = QueueConstructor<Queue, Arc, + ArcFilter>::Construct(ifst, &weights); + fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts( + queue, ArcFilter(), opts.nshortest, opts.unique, + opts.has_distance, opts.delta, opts.first_path, + weight_threshold, opts.state_threshold); + ShortestPath(ifst, ofst, &weights, spopts); + delete queue; + return; + } + case TOP_ORDER_QUEUE: { + typedef TopOrderQueue<StateId> Queue; + Queue *queue = QueueConstructor<Queue, Arc, + ArcFilter>::Construct(ifst, &weights); + fst::ShortestPathOptions<Arc, Queue, ArcFilter> spopts( + queue, ArcFilter(), opts.nshortest, opts.unique, + opts.has_distance, opts.delta, opts.first_path, + weight_threshold, opts.state_threshold); + ShortestPath(ifst, ofst, &weights, spopts); + delete queue; + return; + } + default: + FSTERROR() << "Unknown queue type: " << opts.queue_type; + ofst->SetProperties(kError, kError); + } + + // Copy the weights back + args->arg3->resize(weights.size()); + for (unsigned i = 0; i < weights.size(); ++i) { + (*args->arg3)[i] = WeightClass(weights[i]); + } +} + +// 2 +typedef args::Package<const FstClass &, MutableFstClass *, + size_t, bool, bool, WeightClass, + int64> ShortestPathArgs2; + +template<class Arc> +void ShortestPath(ShortestPathArgs2 *args) { + const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); + typename Arc::Weight weight_threshold = + *(args->arg6.GetWeight<typename Arc::Weight>()); + + ShortestPath(ifst, ofst, args->arg3, args->arg4, args->arg5, + weight_threshold, args->arg7); +} + + +// 1 +void ShortestPath(const FstClass &ifst, MutableFstClass *ofst, + vector<WeightClass> *distance, + const ShortestPathOptions &opts); + + +// 2 +void ShortestPath(const FstClass &ifst, MutableFstClass *ofst, + size_t n = 1, bool unique = false, + bool first_path = false, + WeightClass weight_threshold = + fst::script::WeightClass::Zero(), + int64 state_threshold = fst::kNoStateId); + +} // namespace script +} // namespace fst + + + +#endif // FST_SCRIPT_SHORTEST_PATH_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/symbols.h b/kaldi_io/src/tools/openfst/include/fst/script/symbols.h new file mode 100644 index 0000000..927600a --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/symbols.h @@ -0,0 +1,20 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_SYMBOLS_H_ +#define FST_SCRIPT_SYMBOLS_H_ + +#endif // FST_SCRIPT_SYMBOLS_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/synchronize.h b/kaldi_io/src/tools/openfst/include/fst/script/synchronize.h new file mode 100644 index 0000000..3c0c905 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/synchronize.h @@ -0,0 +1,42 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_SYNCHRONIZE_H_ +#define FST_SCRIPT_SYNCHRONIZE_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/synchronize.h> + +namespace fst { +namespace script { + +typedef args::Package<const FstClass &, MutableFstClass *> SynchronizeArgs; + +template<class Arc> +void Synchronize(SynchronizeArgs *args) { + const Fst<Arc> &ifst = *(args->arg1.GetFst<Arc>()); + MutableFst<Arc> *ofst = args->arg2->GetMutableFst<Arc>(); + + Synchronize(ifst, ofst); +} + +void Synchronize(const FstClass &ifst, MutableFstClass *ofst); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_SYNCHRONIZE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/text-io.h b/kaldi_io/src/tools/openfst/include/fst/script/text-io.h new file mode 100644 index 0000000..d97a007 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/text-io.h @@ -0,0 +1,51 @@ +// text-io.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// Modified: [email protected] (Jake Ratkiewicz) to work with generic WeightClass +// +// \file +// Utilities for reading and writing textual strings representing +// states, labels, and weights and files specifying label-label pairs +// and potentials (state-weight pairs). +// + +#ifndef FST_SCRIPT_TEXT_IO_H__ +#define FST_SCRIPT_TEXT_IO_H__ + +#include <string> +#include <vector> +using std::vector; + + +#include <iostream> +#include <fstream> +#include <sstream> +#include <fst/script/weight-class.h> + +namespace fst { +namespace script { + +bool ReadPotentials(const string &weight_type, + const string& filename, + vector<WeightClass>* potential); + +bool WritePotentials(const string& filename, + const vector<WeightClass>& potential); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_TEXT_IO_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/topsort.h b/kaldi_io/src/tools/openfst/include/fst/script/topsort.h new file mode 100644 index 0000000..4e27e48 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/topsort.h @@ -0,0 +1,40 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_TOPSORT_H_ +#define FST_SCRIPT_TOPSORT_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/topsort.h> + +namespace fst { +namespace script { + +typedef args::WithReturnValue<bool, MutableFstClass*> TopSortArgs; + +template<class Arc> +void TopSort(TopSortArgs *args) { + MutableFst<Arc> *fst = args->args->GetMutableFst<Arc>(); + args->retval = TopSort(fst); +} + +bool TopSort(MutableFstClass *fst); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_TOPSORT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/union.h b/kaldi_io/src/tools/openfst/include/fst/script/union.h new file mode 100644 index 0000000..780e484 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/union.h @@ -0,0 +1,42 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_UNION_H_ +#define FST_SCRIPT_UNION_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/union.h> + +namespace fst { +namespace script { + +typedef args::Package<MutableFstClass *, const FstClass &> UnionArgs; + +template<class Arc> +void Union(UnionArgs *args) { + MutableFst<Arc> *fst1 = args->arg1->GetMutableFst<Arc>(); + const Fst<Arc> &fst2 = *(args->arg2.GetFst<Arc>()); + + Union(fst1, fst2); +} + +void Union(MutableFstClass *fst1, const FstClass &fst2); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_UNION_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/verify.h b/kaldi_io/src/tools/openfst/include/fst/script/verify.h new file mode 100644 index 0000000..6904003 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/verify.h @@ -0,0 +1,40 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jeffrey Sorensen) + +#ifndef FST_SCRIPT_VERIFY_H_ +#define FST_SCRIPT_VERIFY_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/verify.h> + +namespace fst { +namespace script { + +typedef args::WithReturnValue<bool, const FstClass *> VerifyArgs; + +template<class Arc> +void Verify(VerifyArgs *args) { + const Fst<Arc> *fst = args->args->GetFst<Arc>(); + args->retval = Verify(*fst); +} + +bool Verify(const FstClass &fst1); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_VERIFY_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/weight-class.h b/kaldi_io/src/tools/openfst/include/fst/script/weight-class.h new file mode 100644 index 0000000..b9f7ddf --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/weight-class.h @@ -0,0 +1,223 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jake Ratkiewicz) + +// Represents a generic weight in an FST -- that is, represents a specific +// type of weight underneath while hiding that type from a client. + + +#ifndef FST_SCRIPT_WEIGHT_CLASS_H_ +#define FST_SCRIPT_WEIGHT_CLASS_H_ + +#include <string> + +#include <fst/generic-register.h> +#include <fst/util.h> + +namespace fst { +namespace script { + +class WeightImplBase { + public: + virtual WeightImplBase *Copy() const = 0; + virtual void Print(ostream *o) const = 0; + virtual const string &Type() const = 0; + virtual string to_string() const = 0; + virtual bool operator == (const WeightImplBase &other) const = 0; + virtual ~WeightImplBase() { } +}; + +template<class W> +struct WeightClassImpl : public WeightImplBase { + W weight; + + explicit WeightClassImpl(const W& weight) : weight(weight) { } + + virtual WeightClassImpl<W> *Copy() const { + return new WeightClassImpl<W>(weight); + } + + virtual const string &Type() const { return W::Type(); } + + virtual void Print(ostream *o) const { + *o << weight; + } + + virtual string to_string() const { + string str; + WeightToStr(weight, &str); + return str; + } + + virtual bool operator == (const WeightImplBase &other) const { + if (Type() != other.Type()) { + return false; + } else { + const WeightClassImpl<W> *typed_other = + static_cast<const WeightClassImpl<W> *>(&other); + + return typed_other->weight == weight; + } + } +}; + + +class WeightClass { + public: + WeightClass() : element_type_(ZERO), impl_(0) { } + + template<class W> + explicit WeightClass(const W& weight) + : element_type_(OTHER), impl_(new WeightClassImpl<W>(weight)) { } + + WeightClass(const string &weight_type, const string &weight_str); + + WeightClass(const WeightClass &other) : + element_type_(other.element_type_), + impl_(other.impl_ ? other.impl_->Copy() : 0) { } + + WeightClass &operator = (const WeightClass &other) { + if (impl_) delete impl_; + impl_ = other.impl_ ? other.impl_->Copy() : 0; + element_type_ = other.element_type_; + return *this; + } + + template<class W> + const W* GetWeight() const; + + string to_string() const { + switch (element_type_) { + case ZERO: + return "ZERO"; + case ONE: + return "ONE"; + default: + case OTHER: + return impl_->to_string(); + } + } + + bool operator == (const WeightClass &other) const { + return element_type_ == other.element_type_ && + ((impl_ && other.impl_ && (*impl_ == *other.impl_)) || + (impl_ == 0 && other.impl_ == 0)); + } + + static const WeightClass &Zero() { + static WeightClass w(ZERO); + + return w; + } + + static const WeightClass &One() { + static WeightClass w(ONE); + + return w; + } + + const string &Type() const { + if (impl_) return impl_->Type(); + static const string no_type = "none"; + return no_type; + } + + + ~WeightClass() { if (impl_) delete impl_; } + private: + enum ElementType { ZERO, ONE, OTHER }; + ElementType element_type_; + + WeightImplBase *impl_; + + explicit WeightClass(ElementType et) : element_type_(et), impl_(0) { } + + friend ostream &operator << (ostream &o, const WeightClass &c); +}; + +template<class W> +const W* WeightClass::GetWeight() const { + // We need to store zero and one as statics, because the weight type + // W might return them as temporaries. We're returning a pointer, + // and it won't do to get the address of a temporary. + static const W zero = W::Zero(); + static const W one = W::One(); + + if (element_type_ == ZERO) { + return &zero; + } else if (element_type_ == ONE) { + return &one; + } else { + if (W::Type() != impl_->Type()) { + return NULL; + } else { + WeightClassImpl<W> *typed_impl = + static_cast<WeightClassImpl<W> *>(impl_); + return &typed_impl->weight; + } + } +} + +// +// Registration for generic weight types. +// + +typedef WeightImplBase* (*StrToWeightImplBaseT)(const string &str, + const string &src, + size_t nline); + +template<class W> +WeightImplBase* StrToWeightImplBase(const string &str, + const string &src, size_t nline) { + return new WeightClassImpl<W>(StrToWeight<W>(str, src, nline)); +} + +// The following confuses swig, and doesn't need to be wrapped anyway. +#ifndef SWIG +ostream& operator << (ostream &o, const WeightClass &c); + +class WeightClassRegister : public GenericRegister<string, + StrToWeightImplBaseT, + WeightClassRegister> { + protected: + virtual string ConvertKeyToSoFilename(const string &key) const { + return key + ".so"; + } +}; + +typedef GenericRegisterer<WeightClassRegister> WeightClassRegisterer; +#endif + +// internal version, needs to be called by wrapper in order for +// macro args to expand +#define REGISTER_FST_WEIGHT__(Weight, line) \ + static WeightClassRegisterer weight_registerer ## _ ## line( \ + Weight::Type(), \ + StrToWeightImplBase<Weight>) + +// This layer is where __FILE__ and __LINE__ are expanded +#define REGISTER_FST_WEIGHT_EXPANDER(Weight, line) \ + REGISTER_FST_WEIGHT__(Weight, line) + +// +// Macro for registering new weight types. Clients call this. +// +#define REGISTER_FST_WEIGHT(Weight) \ + REGISTER_FST_WEIGHT_EXPANDER(Weight, __LINE__) + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_WEIGHT_CLASS_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/shortest-distance.h b/kaldi_io/src/tools/openfst/include/fst/shortest-distance.h new file mode 100644 index 0000000..ec47a14 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/shortest-distance.h @@ -0,0 +1,348 @@ +// shortest-distance.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// +// \file +// Functions and classes to find shortest distance in an FST. + +#ifndef FST_LIB_SHORTEST_DISTANCE_H__ +#define FST_LIB_SHORTEST_DISTANCE_H__ + +#include <deque> +using std::deque; +#include <vector> +using std::vector; + +#include <fst/arcfilter.h> +#include <fst/cache.h> +#include <fst/queue.h> +#include <fst/reverse.h> +#include <fst/test-properties.h> + + +namespace fst { + +template <class Arc, class Queue, class ArcFilter> +struct ShortestDistanceOptions { + typedef typename Arc::StateId StateId; + + Queue *state_queue; // Queue discipline used; owned by caller + ArcFilter arc_filter; // Arc filter (e.g., limit to only epsilon graph) + StateId source; // If kNoStateId, use the Fst's initial state + float delta; // Determines the degree of convergence required + bool first_path; // For a semiring with the path property (o.w. + // undefined), compute the shortest-distances along + // along the first path to a final state found + // by the algorithm. That path is the shortest-path + // only if the FST has a unique final state (or all + // the final states have the same final weight), the + // queue discipline is shortest-first and all the + // weights in the FST are between One() and Zero() + // according to NaturalLess. + + ShortestDistanceOptions(Queue *q, ArcFilter filt, StateId src = kNoStateId, + float d = kDelta) + : state_queue(q), arc_filter(filt), source(src), delta(d), + first_path(false) {} +}; + + +// Computation state of the shortest-distance algorithm. Reusable +// information is maintained across calls to member function +// ShortestDistance(source) when 'retain' is true for improved +// efficiency when calling multiple times from different source states +// (e.g., in epsilon removal). Contrary to usual conventions, 'fst' +// may not be freed before this class. Vector 'distance' should not be +// modified by the user between these calls. +// The Error() method returns true if an error was encountered. +template<class Arc, class Queue, class ArcFilter> +class ShortestDistanceState { + public: + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + + ShortestDistanceState( + const Fst<Arc> &fst, + vector<Weight> *distance, + const ShortestDistanceOptions<Arc, Queue, ArcFilter> &opts, + bool retain) + : fst_(fst), distance_(distance), state_queue_(opts.state_queue), + arc_filter_(opts.arc_filter), delta_(opts.delta), + first_path_(opts.first_path), retain_(retain), source_id_(0), + error_(false) { + distance_->clear(); + } + + ~ShortestDistanceState() {} + + void ShortestDistance(StateId source); + + bool Error() const { return error_; } + + private: + const Fst<Arc> &fst_; + vector<Weight> *distance_; + Queue *state_queue_; + ArcFilter arc_filter_; + float delta_; + bool first_path_; + bool retain_; // Retain and reuse information across calls + + vector<Weight> rdistance_; // Relaxation distance. + vector<bool> enqueued_; // Is state enqueued? + vector<StateId> sources_; // Source ID for ith state in 'distance_', + // 'rdistance_', and 'enqueued_' if retained. + StateId source_id_; // Unique ID characterizing each call to SD + + bool error_; +}; + +// Compute the shortest distance. If 'source' is kNoStateId, use +// the initial state of the Fst. +template <class Arc, class Queue, class ArcFilter> +void ShortestDistanceState<Arc, Queue, ArcFilter>::ShortestDistance( + StateId source) { + if (fst_.Start() == kNoStateId) { + if (fst_.Properties(kError, false)) error_ = true; + return; + } + + if (!(Weight::Properties() & kRightSemiring)) { + FSTERROR() << "ShortestDistance: Weight needs to be right distributive: " + << Weight::Type(); + error_ = true; + return; + } + + if (first_path_ && !(Weight::Properties() & kPath)) { + FSTERROR() << "ShortestDistance: first_path option disallowed when " + << "Weight does not have the path property: " + << Weight::Type(); + error_ = true; + return; + } + + state_queue_->Clear(); + + if (!retain_) { + distance_->clear(); + rdistance_.clear(); + enqueued_.clear(); + } + + if (source == kNoStateId) + source = fst_.Start(); + + while (distance_->size() <= source) { + distance_->push_back(Weight::Zero()); + rdistance_.push_back(Weight::Zero()); + enqueued_.push_back(false); + } + if (retain_) { + while (sources_.size() <= source) + sources_.push_back(kNoStateId); + sources_[source] = source_id_; + } + (*distance_)[source] = Weight::One(); + rdistance_[source] = Weight::One(); + enqueued_[source] = true; + + state_queue_->Enqueue(source); + + while (!state_queue_->Empty()) { + StateId s = state_queue_->Head(); + state_queue_->Dequeue(); + while (distance_->size() <= s) { + distance_->push_back(Weight::Zero()); + rdistance_.push_back(Weight::Zero()); + enqueued_.push_back(false); + } + if (first_path_ && (fst_.Final(s) != Weight::Zero())) + break; + enqueued_[s] = false; + Weight r = rdistance_[s]; + rdistance_[s] = Weight::Zero(); + for (ArcIterator< Fst<Arc> > aiter(fst_, s); + !aiter.Done(); + aiter.Next()) { + const Arc &arc = aiter.Value(); + if (!arc_filter_(arc)) + continue; + while (distance_->size() <= arc.nextstate) { + distance_->push_back(Weight::Zero()); + rdistance_.push_back(Weight::Zero()); + enqueued_.push_back(false); + } + if (retain_) { + while (sources_.size() <= arc.nextstate) + sources_.push_back(kNoStateId); + if (sources_[arc.nextstate] != source_id_) { + (*distance_)[arc.nextstate] = Weight::Zero(); + rdistance_[arc.nextstate] = Weight::Zero(); + enqueued_[arc.nextstate] = false; + sources_[arc.nextstate] = source_id_; + } + } + Weight &nd = (*distance_)[arc.nextstate]; + Weight &nr = rdistance_[arc.nextstate]; + Weight w = Times(r, arc.weight); + if (!ApproxEqual(nd, Plus(nd, w), delta_)) { + nd = Plus(nd, w); + nr = Plus(nr, w); + if (!nd.Member() || !nr.Member()) { + error_ = true; + return; + } + if (!enqueued_[arc.nextstate]) { + state_queue_->Enqueue(arc.nextstate); + enqueued_[arc.nextstate] = true; + } else { + state_queue_->Update(arc.nextstate); + } + } + } + } + ++source_id_; + if (fst_.Properties(kError, false)) error_ = true; +} + + +// Shortest-distance algorithm: this version allows fine control +// via the options argument. See below for a simpler interface. +// +// This computes the shortest distance from the 'opts.source' state to +// each visited state S and stores the value in the 'distance' vector. +// An unvisited state S has distance Zero(), which will be stored in +// the 'distance' vector if S is less than the maximum visited state. +// The state queue discipline, arc filter, and convergence delta are +// taken in the options argument. +// The 'distance' vector will contain a unique element for which +// Member() is false if an error was encountered. +// +// The weights must must be right distributive and k-closed (i.e., 1 + +// x + x^2 + ... + x^(k +1) = 1 + x + x^2 + ... + x^k). +// +// The algorithm is from Mohri, "Semiring Framweork and Algorithms for +// Shortest-Distance Problems", Journal of Automata, Languages and +// Combinatorics 7(3):321-350, 2002. The complexity of algorithm +// depends on the properties of the semiring and the queue discipline +// used. Refer to the paper for more details. +template<class Arc, class Queue, class ArcFilter> +void ShortestDistance( + const Fst<Arc> &fst, + vector<typename Arc::Weight> *distance, + const ShortestDistanceOptions<Arc, Queue, ArcFilter> &opts) { + + ShortestDistanceState<Arc, Queue, ArcFilter> + sd_state(fst, distance, opts, false); + sd_state.ShortestDistance(opts.source); + if (sd_state.Error()) { + distance->clear(); + distance->resize(1, Arc::Weight::NoWeight()); + } +} + +// Shortest-distance algorithm: simplified interface. See above for a +// version that allows finer control. +// +// If 'reverse' is false, this computes the shortest distance from the +// initial state to each state S and stores the value in the +// 'distance' vector. If 'reverse' is true, this computes the shortest +// distance from each state to the final states. An unvisited state S +// has distance Zero(), which will be stored in the 'distance' vector +// if S is less than the maximum visited state. The state queue +// discipline is automatically-selected. +// The 'distance' vector will contain a unique element for which +// Member() is false if an error was encountered. +// +// The weights must must be right (left) distributive if reverse is +// false (true) and k-closed (i.e., 1 + x + x^2 + ... + x^(k +1) = 1 + +// x + x^2 + ... + x^k). +// +// The algorithm is from Mohri, "Semiring Framweork and Algorithms for +// Shortest-Distance Problems", Journal of Automata, Languages and +// Combinatorics 7(3):321-350, 2002. The complexity of algorithm +// depends on the properties of the semiring and the queue discipline +// used. Refer to the paper for more details. +template<class Arc> +void ShortestDistance(const Fst<Arc> &fst, + vector<typename Arc::Weight> *distance, + bool reverse = false, + float delta = kDelta) { + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + + if (!reverse) { + AnyArcFilter<Arc> arc_filter; + AutoQueue<StateId> state_queue(fst, distance, arc_filter); + ShortestDistanceOptions< Arc, AutoQueue<StateId>, AnyArcFilter<Arc> > + opts(&state_queue, arc_filter); + opts.delta = delta; + ShortestDistance(fst, distance, opts); + } else { + typedef ReverseArc<Arc> ReverseArc; + typedef typename ReverseArc::Weight ReverseWeight; + AnyArcFilter<ReverseArc> rarc_filter; + VectorFst<ReverseArc> rfst; + Reverse(fst, &rfst); + vector<ReverseWeight> rdistance; + AutoQueue<StateId> state_queue(rfst, &rdistance, rarc_filter); + ShortestDistanceOptions< ReverseArc, AutoQueue<StateId>, + AnyArcFilter<ReverseArc> > + ropts(&state_queue, rarc_filter); + ropts.delta = delta; + ShortestDistance(rfst, &rdistance, ropts); + distance->clear(); + if (rdistance.size() == 1 && !rdistance[0].Member()) { + distance->resize(1, Arc::Weight::NoWeight()); + return; + } + while (distance->size() < rdistance.size() - 1) + distance->push_back(rdistance[distance->size() + 1].Reverse()); + } +} + + +// Return the sum of the weight of all successful paths in an FST, i.e., +// the shortest-distance from the initial state to the final states. +// Returns a weight such that Member() is false if an error was encountered. +template <class Arc> +typename Arc::Weight ShortestDistance(const Fst<Arc> &fst, float delta = kDelta) { + typedef typename Arc::Weight Weight; + typedef typename Arc::StateId StateId; + vector<Weight> distance; + if (Weight::Properties() & kRightSemiring) { + ShortestDistance(fst, &distance, false, delta); + if (distance.size() == 1 && !distance[0].Member()) + return Arc::Weight::NoWeight(); + Weight sum = Weight::Zero(); + for (StateId s = 0; s < distance.size(); ++s) + sum = Plus(sum, Times(distance[s], fst.Final(s))); + return sum; + } else { + ShortestDistance(fst, &distance, true, delta); + StateId s = fst.Start(); + if (distance.size() == 1 && !distance[0].Member()) + return Arc::Weight::NoWeight(); + return s != kNoStateId && s < distance.size() ? + distance[s] : Weight::Zero(); + } +} + + +} // namespace fst + +#endif // FST_LIB_SHORTEST_DISTANCE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/shortest-path.h b/kaldi_io/src/tools/openfst/include/fst/shortest-path.h new file mode 100644 index 0000000..9cd13d9 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/shortest-path.h @@ -0,0 +1,501 @@ +// shortest-path.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// +// \file +// Functions to find shortest paths in an FST. + +#ifndef FST_LIB_SHORTEST_PATH_H__ +#define FST_LIB_SHORTEST_PATH_H__ + +#include <functional> +#include <utility> +using std::pair; using std::make_pair; +#include <vector> +using std::vector; + +#include <fst/cache.h> +#include <fst/determinize.h> +#include <fst/queue.h> +#include <fst/shortest-distance.h> +#include <fst/test-properties.h> + + +namespace fst { + +template <class Arc, class Queue, class ArcFilter> +struct ShortestPathOptions + : public ShortestDistanceOptions<Arc, Queue, ArcFilter> { + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + size_t nshortest; // return n-shortest paths + bool unique; // only return paths with distinct input strings + bool has_distance; // distance vector already contains the + // shortest distance from the initial state + bool first_path; // Single shortest path stops after finding the first + // path to a final state. That path is the shortest path + // only when using the ShortestFirstQueue and + // only when all the weights in the FST are between + // One() and Zero() according to NaturalLess. + Weight weight_threshold; // pruning weight threshold. + StateId state_threshold; // pruning state threshold. + + ShortestPathOptions(Queue *q, ArcFilter filt, size_t n = 1, bool u = false, + bool hasdist = false, float d = kDelta, + bool fp = false, Weight w = Weight::Zero(), + StateId s = kNoStateId) + : ShortestDistanceOptions<Arc, Queue, ArcFilter>(q, filt, kNoStateId, d), + nshortest(n), unique(u), has_distance(hasdist), first_path(fp), + weight_threshold(w), state_threshold(s) {} +}; + + +// Shortest-path algorithm: normally not called directly; prefer +// 'ShortestPath' below with n=1. 'ofst' contains the shortest path in +// 'ifst'. 'distance' returns the shortest distances from the source +// state to each state in 'ifst'. 'opts' is used to specify options +// such as the queue discipline, the arc filter and delta. +// +// The shortest path is the lowest weight path w.r.t. the natural +// semiring order. +// +// The weights need to be right distributive and have the path (kPath) +// property. +template<class Arc, class Queue, class ArcFilter> +void SingleShortestPath(const Fst<Arc> &ifst, + MutableFst<Arc> *ofst, + vector<typename Arc::Weight> *distance, + ShortestPathOptions<Arc, Queue, ArcFilter> &opts) { + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + + ofst->DeleteStates(); + ofst->SetInputSymbols(ifst.InputSymbols()); + ofst->SetOutputSymbols(ifst.OutputSymbols()); + + if (ifst.Start() == kNoStateId) { + if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError); + return; + } + + vector<bool> enqueued; + vector<StateId> parent; + vector<Arc> arc_parent; + + Queue *state_queue = opts.state_queue; + StateId source = opts.source == kNoStateId ? ifst.Start() : opts.source; + Weight f_distance = Weight::Zero(); + StateId f_parent = kNoStateId; + + distance->clear(); + state_queue->Clear(); + if (opts.nshortest != 1) { + FSTERROR() << "SingleShortestPath: for nshortest > 1, use ShortestPath" + << " instead"; + ofst->SetProperties(kError, kError); + return; + } + if (opts.weight_threshold != Weight::Zero() || + opts.state_threshold != kNoStateId) { + FSTERROR() << + "SingleShortestPath: weight and state thresholds not applicable"; + ofst->SetProperties(kError, kError); + return; + } + if ((Weight::Properties() & (kPath | kRightSemiring)) + != (kPath | kRightSemiring)) { + FSTERROR() << "SingleShortestPath: Weight needs to have the path" + << " property and be right distributive: " << Weight::Type(); + ofst->SetProperties(kError, kError); + return; + } + while (distance->size() < source) { + distance->push_back(Weight::Zero()); + enqueued.push_back(false); + parent.push_back(kNoStateId); + arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId)); + } + distance->push_back(Weight::One()); + parent.push_back(kNoStateId); + arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(), kNoStateId)); + state_queue->Enqueue(source); + enqueued.push_back(true); + + while (!state_queue->Empty()) { + StateId s = state_queue->Head(); + state_queue->Dequeue(); + enqueued[s] = false; + Weight sd = (*distance)[s]; + if (ifst.Final(s) != Weight::Zero()) { + Weight w = Times(sd, ifst.Final(s)); + if (f_distance != Plus(f_distance, w)) { + f_distance = Plus(f_distance, w); + f_parent = s; + } + if (!f_distance.Member()) { + ofst->SetProperties(kError, kError); + return; + } + if (opts.first_path) + break; + } + for (ArcIterator< Fst<Arc> > aiter(ifst, s); + !aiter.Done(); + aiter.Next()) { + const Arc &arc = aiter.Value(); + while (distance->size() <= arc.nextstate) { + distance->push_back(Weight::Zero()); + enqueued.push_back(false); + parent.push_back(kNoStateId); + arc_parent.push_back(Arc(kNoLabel, kNoLabel, Weight::Zero(), + kNoStateId)); + } + Weight &nd = (*distance)[arc.nextstate]; + Weight w = Times(sd, arc.weight); + if (nd != Plus(nd, w)) { + nd = Plus(nd, w); + if (!nd.Member()) { + ofst->SetProperties(kError, kError); + return; + } + parent[arc.nextstate] = s; + arc_parent[arc.nextstate] = arc; + if (!enqueued[arc.nextstate]) { + state_queue->Enqueue(arc.nextstate); + enqueued[arc.nextstate] = true; + } else { + state_queue->Update(arc.nextstate); + } + } + } + } + + StateId s_p = kNoStateId, d_p = kNoStateId; + for (StateId s = f_parent, d = kNoStateId; + s != kNoStateId; + d = s, s = parent[s]) { + d_p = s_p; + s_p = ofst->AddState(); + if (d == kNoStateId) { + ofst->SetFinal(s_p, ifst.Final(f_parent)); + } else { + arc_parent[d].nextstate = d_p; + ofst->AddArc(s_p, arc_parent[d]); + } + } + ofst->SetStart(s_p); + if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError); + ofst->SetProperties( + ShortestPathProperties(ofst->Properties(kFstProperties, false)), + kFstProperties); +} + + +template <class S, class W> +class ShortestPathCompare { + public: + typedef S StateId; + typedef W Weight; + typedef pair<StateId, Weight> Pair; + + ShortestPathCompare(const vector<Pair>& pairs, + const vector<Weight>& distance, + StateId sfinal, float d) + : pairs_(pairs), distance_(distance), superfinal_(sfinal), delta_(d) {} + + bool operator()(const StateId x, const StateId y) const { + const Pair &px = pairs_[x]; + const Pair &py = pairs_[y]; + Weight dx = px.first == superfinal_ ? Weight::One() : + px.first < distance_.size() ? distance_[px.first] : Weight::Zero(); + Weight dy = py.first == superfinal_ ? Weight::One() : + py.first < distance_.size() ? distance_[py.first] : Weight::Zero(); + Weight wx = Times(dx, px.second); + Weight wy = Times(dy, py.second); + // Penalize complete paths to ensure correct results with inexact weights. + // This forms a strict weak order so long as ApproxEqual(a, b) => + // ApproxEqual(a, c) for all c s.t. less_(a, c) && less_(c, b). + if (px.first == superfinal_ && py.first != superfinal_) { + return less_(wy, wx) || ApproxEqual(wx, wy, delta_); + } else if (py.first == superfinal_ && px.first != superfinal_) { + return less_(wy, wx) && !ApproxEqual(wx, wy, delta_); + } else { + return less_(wy, wx); + } + } + + private: + const vector<Pair> &pairs_; + const vector<Weight> &distance_; + StateId superfinal_; + float delta_; + NaturalLess<Weight> less_; +}; + + +// N-Shortest-path algorithm: implements the core n-shortest path +// algorithm. The output is built REVERSED. See below for versions with +// more options and not reversed. +// +// 'ofst' contains the REVERSE of 'n'-shortest paths in 'ifst'. +// 'distance' must contain the shortest distance from each state to a final +// state in 'ifst'. 'delta' is the convergence delta. +// +// The n-shortest paths are the n-lowest weight paths w.r.t. the +// natural semiring order. The single path that can be read from the +// ith of at most n transitions leaving the initial state of 'ofst' is +// the ith shortest path. Disregarding the initial state and initial +// transitions, the n-shortest paths, in fact, form a tree rooted at +// the single final state. +// +// The weights need to be left and right distributive (kSemiring) and +// have the path (kPath) property. +// +// The algorithm is from Mohri and Riley, "An Efficient Algorithm for +// the n-best-strings problem", ICSLP 2002. The algorithm relies on +// the shortest-distance algorithm. There are some issues with the +// pseudo-code as written in the paper (viz., line 11). +// +// IMPLEMENTATION NOTE: The input fst 'ifst' can be a delayed fst and +// and at any state in its expansion the values of distance vector need only +// be defined at that time for the states that are known to exist. +template<class Arc, class RevArc> +void NShortestPath(const Fst<RevArc> &ifst, + MutableFst<Arc> *ofst, + const vector<typename Arc::Weight> &distance, + size_t n, + float delta = kDelta, + typename Arc::Weight weight_threshold = Arc::Weight::Zero(), + typename Arc::StateId state_threshold = kNoStateId) { + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + typedef pair<StateId, Weight> Pair; + typedef typename RevArc::Weight RevWeight; + + if (n <= 0) return; + if ((Weight::Properties() & (kPath | kSemiring)) != (kPath | kSemiring)) { + FSTERROR() << "NShortestPath: Weight needs to have the " + << "path property and be distributive: " + << Weight::Type(); + ofst->SetProperties(kError, kError); + return; + } + ofst->DeleteStates(); + ofst->SetInputSymbols(ifst.InputSymbols()); + ofst->SetOutputSymbols(ifst.OutputSymbols()); + // Each state in 'ofst' corresponds to a path with weight w from the + // initial state of 'ifst' to a state s in 'ifst', that can be + // characterized by a pair (s,w). The vector 'pairs' maps each + // state in 'ofst' to the corresponding pair maps states in OFST to + // the corresponding pair (s,w). + vector<Pair> pairs; + // The supefinal state is denoted by -1, 'compare' knows that the + // distance from 'superfinal' to the final state is 'Weight::One()', + // hence 'distance[superfinal]' is not needed. + StateId superfinal = -1; + ShortestPathCompare<StateId, Weight> + compare(pairs, distance, superfinal, delta); + vector<StateId> heap; + // 'r[s + 1]', 's' state in 'fst', is the number of states in 'ofst' + // which corresponding pair contains 's' ,i.e. , it is number of + // paths computed so far to 's'. Valid for 's == -1' (superfinal). + vector<int> r; + NaturalLess<Weight> less; + if (ifst.Start() == kNoStateId || + distance.size() <= ifst.Start() || + distance[ifst.Start()] == Weight::Zero() || + less(weight_threshold, Weight::One()) || + state_threshold == 0) { + if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError); + return; + } + ofst->SetStart(ofst->AddState()); + StateId final = ofst->AddState(); + ofst->SetFinal(final, Weight::One()); + while (pairs.size() <= final) + pairs.push_back(Pair(kNoStateId, Weight::Zero())); + pairs[final] = Pair(ifst.Start(), Weight::One()); + heap.push_back(final); + Weight limit = Times(distance[ifst.Start()], weight_threshold); + + while (!heap.empty()) { + pop_heap(heap.begin(), heap.end(), compare); + StateId state = heap.back(); + Pair p = pairs[state]; + heap.pop_back(); + Weight d = p.first == superfinal ? Weight::One() : + p.first < distance.size() ? distance[p.first] : Weight::Zero(); + + if (less(limit, Times(d, p.second)) || + (state_threshold != kNoStateId && + ofst->NumStates() >= state_threshold)) + continue; + + while (r.size() <= p.first + 1) r.push_back(0); + ++r[p.first + 1]; + if (p.first == superfinal) + ofst->AddArc(ofst->Start(), Arc(0, 0, Weight::One(), state)); + if ((p.first == superfinal) && (r[p.first + 1] == n)) break; + if (r[p.first + 1] > n) continue; + if (p.first == superfinal) continue; + + for (ArcIterator< Fst<RevArc> > aiter(ifst, p.first); + !aiter.Done(); + aiter.Next()) { + const RevArc &rarc = aiter.Value(); + Arc arc(rarc.ilabel, rarc.olabel, rarc.weight.Reverse(), rarc.nextstate); + Weight w = Times(p.second, arc.weight); + StateId next = ofst->AddState(); + pairs.push_back(Pair(arc.nextstate, w)); + arc.nextstate = state; + ofst->AddArc(next, arc); + heap.push_back(next); + push_heap(heap.begin(), heap.end(), compare); + } + + Weight finalw = ifst.Final(p.first).Reverse(); + if (finalw != Weight::Zero()) { + Weight w = Times(p.second, finalw); + StateId next = ofst->AddState(); + pairs.push_back(Pair(superfinal, w)); + ofst->AddArc(next, Arc(0, 0, finalw, state)); + heap.push_back(next); + push_heap(heap.begin(), heap.end(), compare); + } + } + Connect(ofst); + if (ifst.Properties(kError, false)) ofst->SetProperties(kError, kError); + ofst->SetProperties( + ShortestPathProperties(ofst->Properties(kFstProperties, false)), + kFstProperties); +} + + +// N-Shortest-path algorithm: this version allow fine control +// via the options argument. See below for a simpler interface. +// +// 'ofst' contains the n-shortest paths in 'ifst'. 'distance' returns +// the shortest distances from the source state to each state in +// 'ifst'. 'opts' is used to specify options such as the number of +// paths to return, whether they need to have distinct input +// strings, the queue discipline, the arc filter and the convergence +// delta. +// +// The n-shortest paths are the n-lowest weight paths w.r.t. the +// natural semiring order. The single path that can be read from the +// ith of at most n transitions leaving the initial state of 'ofst' is +// the ith shortest path. Disregarding the initial state and initial +// transitions, The n-shortest paths, in fact, form a tree rooted at +// the single final state. + +// The weights need to be right distributive and have the path (kPath) +// property. They need to be left distributive as well for nshortest +// > 1. +// +// The algorithm is from Mohri and Riley, "An Efficient Algorithm for +// the n-best-strings problem", ICSLP 2002. The algorithm relies on +// the shortest-distance algorithm. There are some issues with the +// pseudo-code as written in the paper (viz., line 11). +template<class Arc, class Queue, class ArcFilter> +void ShortestPath(const Fst<Arc> &ifst, MutableFst<Arc> *ofst, + vector<typename Arc::Weight> *distance, + ShortestPathOptions<Arc, Queue, ArcFilter> &opts) { + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + typedef ReverseArc<Arc> ReverseArc; + + size_t n = opts.nshortest; + if (n == 1) { + SingleShortestPath(ifst, ofst, distance, opts); + return; + } + if (n <= 0) return; + if ((Weight::Properties() & (kPath | kSemiring)) != (kPath | kSemiring)) { + FSTERROR() << "ShortestPath: n-shortest: Weight needs to have the " + << "path property and be distributive: " + << Weight::Type(); + ofst->SetProperties(kError, kError); + return; + } + if (!opts.has_distance) { + ShortestDistance(ifst, distance, opts); + if (distance->size() == 1 && !(*distance)[0].Member()) { + ofst->SetProperties(kError, kError); + return; + } + } + // Algorithm works on the reverse of 'fst' : 'rfst', 'distance' is + // the distance to the final state in 'rfst', 'ofst' is built as the + // reverse of the tree of n-shortest path in 'rfst'. + VectorFst<ReverseArc> rfst; + Reverse(ifst, &rfst); + Weight d = Weight::Zero(); + for (ArcIterator< VectorFst<ReverseArc> > aiter(rfst, 0); + !aiter.Done(); aiter.Next()) { + const ReverseArc &arc = aiter.Value(); + StateId s = arc.nextstate - 1; + if (s < distance->size()) + d = Plus(d, Times(arc.weight.Reverse(), (*distance)[s])); + } + distance->insert(distance->begin(), d); + + if (!opts.unique) { + NShortestPath(rfst, ofst, *distance, n, opts.delta, + opts.weight_threshold, opts.state_threshold); + } else { + vector<Weight> ddistance; + DeterminizeFstOptions<ReverseArc> dopts(opts.delta); + DeterminizeFst<ReverseArc> dfst(rfst, distance, &ddistance, dopts); + NShortestPath(dfst, ofst, ddistance, n, opts.delta, + opts.weight_threshold, opts.state_threshold); + } + distance->erase(distance->begin()); +} + + +// Shortest-path algorithm: simplified interface. See above for a +// version that allows finer control. +// +// 'ofst' contains the 'n'-shortest paths in 'ifst'. The queue +// discipline is automatically selected. When 'unique' == true, only +// paths with distinct input labels are returned. +// +// The n-shortest paths are the n-lowest weight paths w.r.t. the +// natural semiring order. The single path that can be read from the +// ith of at most n transitions leaving the initial state of 'ofst' is +// the ith best path. +// +// The weights need to be right distributive and have the path +// (kPath) property. +template<class Arc> +void ShortestPath(const Fst<Arc> &ifst, MutableFst<Arc> *ofst, + size_t n = 1, bool unique = false, + bool first_path = false, + typename Arc::Weight weight_threshold = Arc::Weight::Zero(), + typename Arc::StateId state_threshold = kNoStateId) { + vector<typename Arc::Weight> distance; + AnyArcFilter<Arc> arc_filter; + AutoQueue<typename Arc::StateId> state_queue(ifst, &distance, arc_filter); + ShortestPathOptions< Arc, AutoQueue<typename Arc::StateId>, + AnyArcFilter<Arc> > opts(&state_queue, arc_filter, n, unique, false, + kDelta, first_path, weight_threshold, + state_threshold); + ShortestPath(ifst, ofst, &distance, opts); +} + +} // namespace fst + +#endif // FST_LIB_SHORTEST_PATH_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/signed-log-weight.h b/kaldi_io/src/tools/openfst/include/fst/signed-log-weight.h new file mode 100644 index 0000000..61adefb --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/signed-log-weight.h @@ -0,0 +1,367 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Kasturi Rangan Raghavan) +// \file +// LogWeight along with sign information that represents the value X in the +// linear domain as <sign(X), -ln(|X|)> +// The sign is a TropicalWeight: +// positive, TropicalWeight.Value() > 0.0, recommended value 1.0 +// negative, TropicalWeight.Value() <= 0.0, recommended value -1.0 + +#ifndef FST_LIB_SIGNED_LOG_WEIGHT_H_ +#define FST_LIB_SIGNED_LOG_WEIGHT_H_ + +#include <fst/float-weight.h> +#include <fst/pair-weight.h> + + +namespace fst { +template <class T> +class SignedLogWeightTpl + : public PairWeight<TropicalWeight, LogWeightTpl<T> > { + public: + typedef TropicalWeight X1; + typedef LogWeightTpl<T> X2; + using PairWeight<X1, X2>::Value1; + using PairWeight<X1, X2>::Value2; + + using PairWeight<X1, X2>::Reverse; + using PairWeight<X1, X2>::Quantize; + using PairWeight<X1, X2>::Member; + + typedef SignedLogWeightTpl<T> ReverseWeight; + + SignedLogWeightTpl() : PairWeight<X1, X2>() {} + + SignedLogWeightTpl(const SignedLogWeightTpl<T>& w) + : PairWeight<X1, X2> (w) { } + + SignedLogWeightTpl(const PairWeight<X1, X2>& w) + : PairWeight<X1, X2> (w) { } + + SignedLogWeightTpl(const X1& x1, const X2& x2) + : PairWeight<X1, X2>(x1, x2) { } + + static const SignedLogWeightTpl<T> &Zero() { + static const SignedLogWeightTpl<T> zero(X1(1.0), X2::Zero()); + return zero; + } + + static const SignedLogWeightTpl<T> &One() { + static const SignedLogWeightTpl<T> one(X1(1.0), X2::One()); + return one; + } + + static const SignedLogWeightTpl<T> &NoWeight() { + static const SignedLogWeightTpl<T> no_weight(X1(1.0), X2::NoWeight()); + return no_weight; + } + + static const string &Type() { + static const string type = "signed_log_" + X1::Type() + "_" + X2::Type(); + return type; + } + + ProductWeight<X1, X2> Quantize(float delta = kDelta) const { + return PairWeight<X1, X2>::Quantize(); + } + + ReverseWeight Reverse() const { + return PairWeight<X1, X2>::Reverse(); + } + + bool Member() const { + return PairWeight<X1, X2>::Member(); + } + + static uint64 Properties() { + // not idempotent nor path + return kLeftSemiring | kRightSemiring | kCommutative; + } + + size_t Hash() const { + size_t h1; + if (Value2() == X2::Zero() || Value1().Value() > 0.0) + h1 = TropicalWeight(1.0).Hash(); + else + h1 = TropicalWeight(-1.0).Hash(); + size_t h2 = Value2().Hash(); + const int lshift = 5; + const int rshift = CHAR_BIT * sizeof(size_t) - 5; + return h1 << lshift ^ h1 >> rshift ^ h2; + } +}; + +template <class T> +inline SignedLogWeightTpl<T> Plus(const SignedLogWeightTpl<T> &w1, + const SignedLogWeightTpl<T> &w2) { + if (!w1.Member() || !w2.Member()) + return SignedLogWeightTpl<T>::NoWeight(); + bool s1 = w1.Value1().Value() > 0.0; + bool s2 = w2.Value1().Value() > 0.0; + T f1 = w1.Value2().Value(); + T f2 = w2.Value2().Value(); + if (f1 == FloatLimits<T>::PosInfinity()) + return w2; + else if (f2 == FloatLimits<T>::PosInfinity()) + return w1; + else if (f1 == f2) { + if (s1 == s2) + return SignedLogWeightTpl<T>(w1.Value1(), (f2 - log(2.0F))); + else + return SignedLogWeightTpl<T>::Zero(); + } else if (f1 > f2) { + if (s1 == s2) { + return SignedLogWeightTpl<T>( + w1.Value1(), (f2 - log(1.0F + exp(f2 - f1)))); + } else { + return SignedLogWeightTpl<T>( + w2.Value1(), (f2 - log(1.0F - exp(f2 - f1)))); + } + } else { + if (s2 == s1) { + return SignedLogWeightTpl<T>( + w2.Value1(), (f1 - log(1.0F + exp(f1 - f2)))); + } else { + return SignedLogWeightTpl<T>( + w1.Value1(), (f1 - log(1.0F - exp(f1 - f2)))); + } + } +} + +template <class T> +inline SignedLogWeightTpl<T> Minus(const SignedLogWeightTpl<T> &w1, + const SignedLogWeightTpl<T> &w2) { + SignedLogWeightTpl<T> minus_w2(-w2.Value1().Value(), w2.Value2()); + return Plus(w1, minus_w2); +} + +template <class T> +inline SignedLogWeightTpl<T> Times(const SignedLogWeightTpl<T> &w1, + const SignedLogWeightTpl<T> &w2) { + if (!w1.Member() || !w2.Member()) + return SignedLogWeightTpl<T>::NoWeight(); + bool s1 = w1.Value1().Value() > 0.0; + bool s2 = w2.Value1().Value() > 0.0; + T f1 = w1.Value2().Value(); + T f2 = w2.Value2().Value(); + if (s1 == s2) + return SignedLogWeightTpl<T>(TropicalWeight(1.0), (f1 + f2)); + else + return SignedLogWeightTpl<T>(TropicalWeight(-1.0), (f1 + f2)); +} + +template <class T> +inline SignedLogWeightTpl<T> Divide(const SignedLogWeightTpl<T> &w1, + const SignedLogWeightTpl<T> &w2, + DivideType typ = DIVIDE_ANY) { + if (!w1.Member() || !w2.Member()) + return SignedLogWeightTpl<T>::NoWeight(); + bool s1 = w1.Value1().Value() > 0.0; + bool s2 = w2.Value1().Value() > 0.0; + T f1 = w1.Value2().Value(); + T f2 = w2.Value2().Value(); + if (f2 == FloatLimits<T>::PosInfinity()) + return SignedLogWeightTpl<T>(TropicalWeight(1.0), + FloatLimits<T>::NumberBad()); + else if (f1 == FloatLimits<T>::PosInfinity()) + return SignedLogWeightTpl<T>(TropicalWeight(1.0), + FloatLimits<T>::PosInfinity()); + else if (s1 == s2) + return SignedLogWeightTpl<T>(TropicalWeight(1.0), (f1 - f2)); + else + return SignedLogWeightTpl<T>(TropicalWeight(-1.0), (f1 - f2)); +} + +template <class T> +inline bool ApproxEqual(const SignedLogWeightTpl<T> &w1, + const SignedLogWeightTpl<T> &w2, + float delta = kDelta) { + bool s1 = w1.Value1().Value() > 0.0; + bool s2 = w2.Value1().Value() > 0.0; + if (s1 == s2) { + return ApproxEqual(w1.Value2(), w2.Value2(), delta); + } else { + return w1.Value2() == LogWeightTpl<T>::Zero() + && w2.Value2() == LogWeightTpl<T>::Zero(); + } +} + +template <class T> +inline bool operator==(const SignedLogWeightTpl<T> &w1, + const SignedLogWeightTpl<T> &w2) { + bool s1 = w1.Value1().Value() > 0.0; + bool s2 = w2.Value1().Value() > 0.0; + if (s1 == s2) + return w1.Value2() == w2.Value2(); + else + return (w1.Value2() == LogWeightTpl<T>::Zero()) && + (w2.Value2() == LogWeightTpl<T>::Zero()); +} + + +// Single-precision signed-log weight +typedef SignedLogWeightTpl<float> SignedLogWeight; +// Double-precision signed-log weight +typedef SignedLogWeightTpl<double> SignedLog64Weight; + +// +// WEIGHT CONVERTER SPECIALIZATIONS. +// + +template <class W1, class W2> +bool SignedLogConvertCheck(W1 w) { + if (w.Value1().Value() < 0.0) { + FSTERROR() << "WeightConvert: can't convert weight from \"" + << W1::Type() << "\" to \"" << W2::Type(); + return false; + } + return true; +} + +// Convert to tropical +template <> +struct WeightConvert<SignedLogWeight, TropicalWeight> { + TropicalWeight operator()(SignedLogWeight w) const { + if (!SignedLogConvertCheck<SignedLogWeight, TropicalWeight>(w)) + return TropicalWeight::NoWeight(); + return w.Value2().Value(); + } +}; + +template <> +struct WeightConvert<SignedLog64Weight, TropicalWeight> { + TropicalWeight operator()(SignedLog64Weight w) const { + if (!SignedLogConvertCheck<SignedLog64Weight, TropicalWeight>(w)) + return TropicalWeight::NoWeight(); + return w.Value2().Value(); + } +}; + +// Convert to log +template <> +struct WeightConvert<SignedLogWeight, LogWeight> { + LogWeight operator()(SignedLogWeight w) const { + if (!SignedLogConvertCheck<SignedLogWeight, LogWeight>(w)) + return LogWeight::NoWeight(); + return w.Value2().Value(); + } +}; + +template <> +struct WeightConvert<SignedLog64Weight, LogWeight> { + LogWeight operator()(SignedLog64Weight w) const { + if (!SignedLogConvertCheck<SignedLog64Weight, LogWeight>(w)) + return LogWeight::NoWeight(); + return w.Value2().Value(); + } +}; + +// Convert to log64 +template <> +struct WeightConvert<SignedLogWeight, Log64Weight> { + Log64Weight operator()(SignedLogWeight w) const { + if (!SignedLogConvertCheck<SignedLogWeight, Log64Weight>(w)) + return Log64Weight::NoWeight(); + return w.Value2().Value(); + } +}; + +template <> +struct WeightConvert<SignedLog64Weight, Log64Weight> { + Log64Weight operator()(SignedLog64Weight w) const { + if (!SignedLogConvertCheck<SignedLog64Weight, Log64Weight>(w)) + return Log64Weight::NoWeight(); + return w.Value2().Value(); + } +}; + +// Convert to signed log +template <> +struct WeightConvert<TropicalWeight, SignedLogWeight> { + SignedLogWeight operator()(TropicalWeight w) const { + TropicalWeight x1 = 1.0; + LogWeight x2 = w.Value(); + return SignedLogWeight(x1, x2); + } +}; + +template <> +struct WeightConvert<LogWeight, SignedLogWeight> { + SignedLogWeight operator()(LogWeight w) const { + TropicalWeight x1 = 1.0; + LogWeight x2 = w.Value(); + return SignedLogWeight(x1, x2); + } +}; + +template <> +struct WeightConvert<Log64Weight, SignedLogWeight> { + SignedLogWeight operator()(Log64Weight w) const { + TropicalWeight x1 = 1.0; + LogWeight x2 = w.Value(); + return SignedLogWeight(x1, x2); + } +}; + +template <> +struct WeightConvert<SignedLog64Weight, SignedLogWeight> { + SignedLogWeight operator()(SignedLog64Weight w) const { + TropicalWeight x1 = w.Value1(); + LogWeight x2 = w.Value2().Value(); + return SignedLogWeight(x1, x2); + } +}; + +// Convert to signed log64 +template <> +struct WeightConvert<TropicalWeight, SignedLog64Weight> { + SignedLog64Weight operator()(TropicalWeight w) const { + TropicalWeight x1 = 1.0; + Log64Weight x2 = w.Value(); + return SignedLog64Weight(x1, x2); + } +}; + +template <> +struct WeightConvert<LogWeight, SignedLog64Weight> { + SignedLog64Weight operator()(LogWeight w) const { + TropicalWeight x1 = 1.0; + Log64Weight x2 = w.Value(); + return SignedLog64Weight(x1, x2); + } +}; + +template <> +struct WeightConvert<Log64Weight, SignedLog64Weight> { + SignedLog64Weight operator()(Log64Weight w) const { + TropicalWeight x1 = 1.0; + Log64Weight x2 = w.Value(); + return SignedLog64Weight(x1, x2); + } +}; + +template <> +struct WeightConvert<SignedLogWeight, SignedLog64Weight> { + SignedLog64Weight operator()(SignedLogWeight w) const { + TropicalWeight x1 = w.Value1(); + Log64Weight x2 = w.Value2().Value(); + return SignedLog64Weight(x1, x2); + } +}; + +} // namespace fst + +#endif // FST_LIB_SIGNED_LOG_WEIGHT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/slist.h b/kaldi_io/src/tools/openfst/include/fst/slist.h new file mode 100644 index 0000000..b800522 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/slist.h @@ -0,0 +1,61 @@ +// slist.h +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: [email protected] (Michael Riley) +// +// \file +// Includes slist definition or defines in terms of STL list as a fallback. + +#ifndef FST_LIB_SLIST_H__ +#define FST_LIB_SLIST_H__ + +#include <fst/config.h> + +#ifdef HAVE___GNU_CXX__SLIST_INT_ + +#include <ext/slist> + +namespace fst { + +using __gnu_cxx::slist; + +} + +#else + +#include <list> + +namespace fst { + +using std::list; + +template <typename T> class slist : public list<T> { + public: + typedef typename list<T>::iterator iterator; + typedef typename list<T>::const_iterator const_iterator; + + using list<T>::erase; + + iterator erase_after(iterator pos) { + iterator npos = pos; + erase(++npos); + return pos; + } +}; + +} // namespace fst + +#endif // HAVE___GNU_CXX__SLIST_INT_ + +#endif // FST_LIB_SLIST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/sparse-power-weight.h b/kaldi_io/src/tools/openfst/include/fst/sparse-power-weight.h new file mode 100644 index 0000000..a1ff56a --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/sparse-power-weight.h @@ -0,0 +1,225 @@ +// sparse-power-weight.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Kasturi Rangan Raghavan) +// Inspiration: [email protected] (Cyril Allauzen) +// +// \file +// Cartesian power weight semiring operation definitions. +// Uses SparseTupleWeight as underlying representation. + +#ifndef FST_LIB_SPARSE_POWER_WEIGHT_H__ +#define FST_LIB_SPARSE_POWER_WEIGHT_H__ + +#include<string> + +#include <fst/sparse-tuple-weight.h> +#include <fst/weight.h> + + +namespace fst { + +// Below SparseTupleWeight*Mapper are used in conjunction with +// SparseTupleWeightMap to compute the respective semiring operations +template<class W, class K> +struct SparseTupleWeightPlusMapper { + W Map(const K& k, const W& v1, const W& v2) const { + return Plus(v1, v2); + } +}; + +template<class W, class K> +struct SparseTupleWeightTimesMapper { + W Map(const K& k, const W& v1, const W& v2) const { + return Times(v1, v2); + } +}; + +template<class W, class K> +struct SparseTupleWeightDivideMapper { + SparseTupleWeightDivideMapper(DivideType divide_type) { + divide_type_ = divide_type; + } + W Map(const K& k, const W& v1, const W& v2) const { + return Divide(v1, v2, divide_type_); + } + DivideType divide_type_; +}; + +template<class W, class K> +struct SparseTupleWeightApproxMapper { + SparseTupleWeightApproxMapper(float delta) { delta_ = delta; } + W Map(const K& k, const W& v1, const W& v2) const { + return ApproxEqual(v1, v2, delta_) ? W::One() : W::Zero(); + } + float delta_; +}; + +// Sparse cartesian power semiring: W ^ n +// Forms: +// - a left semimodule when W is a left semiring, +// - a right semimodule when W is a right semiring, +// - a bisemimodule when W is a semiring, +// the free semimodule of rank n over W +// The Times operation is overloaded to provide the +// left and right scalar products. +// K is the key value type. kNoKey(-1) is reserved for internal use +template <class W, class K = int> +class SparsePowerWeight : public SparseTupleWeight<W, K> { + public: + using SparseTupleWeight<W, K>::Zero; + using SparseTupleWeight<W, K>::One; + using SparseTupleWeight<W, K>::NoWeight; + using SparseTupleWeight<W, K>::Quantize; + using SparseTupleWeight<W, K>::Reverse; + + typedef SparsePowerWeight<typename W::ReverseWeight, K> ReverseWeight; + + SparsePowerWeight() {} + + SparsePowerWeight(const SparseTupleWeight<W, K> &w) : + SparseTupleWeight<W, K>(w) { } + + template <class Iterator> + SparsePowerWeight(Iterator begin, Iterator end) : + SparseTupleWeight<W, K>(begin, end) { } + + SparsePowerWeight(const K &key, const W &w) : + SparseTupleWeight<W, K>(key, w) { } + + static const SparsePowerWeight<W, K> &Zero() { + static const SparsePowerWeight<W, K> zero(SparseTupleWeight<W, K>::Zero()); + return zero; + } + + static const SparsePowerWeight<W, K> &One() { + static const SparsePowerWeight<W, K> one(SparseTupleWeight<W, K>::One()); + return one; + } + + static const SparsePowerWeight<W, K> &NoWeight() { + static const SparsePowerWeight<W, K> no_weight( + SparseTupleWeight<W, K>::NoWeight()); + return no_weight; + } + + // Overide this: Overwrite the Type method to reflect the key type + // if using non-default key type. + static const string &Type() { + static string type; + if(type.empty()) { + type = W::Type() + "_^n"; + if(sizeof(K) != sizeof(uint32)) { + string size; + Int64ToStr(8 * sizeof(K), &size); + type += "_" + size; + } + } + return type; + } + + static uint64 Properties() { + uint64 props = W::Properties(); + return props & (kLeftSemiring | kRightSemiring | + kCommutative | kIdempotent); + } + + SparsePowerWeight<W, K> Quantize(float delta = kDelta) const { + return SparseTupleWeight<W, K>::Quantize(delta); + } + + ReverseWeight Reverse() const { + return SparseTupleWeight<W, K>::Reverse(); + } +}; + +// Semimodule plus operation +template <class W, class K> +inline SparsePowerWeight<W, K> Plus(const SparsePowerWeight<W, K> &w1, + const SparsePowerWeight<W, K> &w2) { + SparsePowerWeight<W, K> ret; + SparseTupleWeightPlusMapper<W, K> operator_mapper; + SparseTupleWeightMap(&ret, w1, w2, operator_mapper); + return ret; +} + +// Semimodule times operation +template <class W, class K> +inline SparsePowerWeight<W, K> Times(const SparsePowerWeight<W, K> &w1, + const SparsePowerWeight<W, K> &w2) { + SparsePowerWeight<W, K> ret; + SparseTupleWeightTimesMapper<W, K> operator_mapper; + SparseTupleWeightMap(&ret, w1, w2, operator_mapper); + return ret; +} + +// Semimodule divide operation +template <class W, class K> +inline SparsePowerWeight<W, K> Divide(const SparsePowerWeight<W, K> &w1, + const SparsePowerWeight<W, K> &w2, + DivideType type = DIVIDE_ANY) { + SparsePowerWeight<W, K> ret; + SparseTupleWeightDivideMapper<W, K> operator_mapper(type); + SparseTupleWeightMap(&ret, w1, w2, operator_mapper); + return ret; +} + +// Semimodule dot product +template <class W, class K> +inline const W& DotProduct(const SparsePowerWeight<W, K> &w1, + const SparsePowerWeight<W, K> &w2) { + const SparsePowerWeight<W, K>& product = Times(w1, w2); + W ret(W::Zero()); + for (SparseTupleWeightIterator<W, K> it(product); !it.Done(); it.Next()) { + ret = Plus(ret, it.Value().second); + } + return ret; +} + +template <class W, class K> +inline bool ApproxEqual(const SparsePowerWeight<W, K> &w1, + const SparsePowerWeight<W, K> &w2, + float delta = kDelta) { + SparseTupleWeight<W, K> ret; + SparseTupleWeightApproxMapper<W, K> operator_mapper(kDelta); + SparseTupleWeightMap(&ret, w1, w2, operator_mapper); + return ret == SparsePowerWeight<W, K>::One(); +} + +template <class W, class K> +inline SparsePowerWeight<W, K> Times(const W &k, + const SparsePowerWeight<W, K> &w2) { + SparsePowerWeight<W, K> w1(k); + return Times(w1, w2); +} + +template <class W, class K> +inline SparsePowerWeight<W, K> Times(const SparsePowerWeight<W, K> &w1, + const W &k) { + SparsePowerWeight<W, K> w2(k); + return Times(w1, w2); +} + +template <class W, class K> +inline SparsePowerWeight<W, K> Divide(const SparsePowerWeight<W, K> &w1, + const W &k, + DivideType divide_type = DIVIDE_ANY) { + SparsePowerWeight<W, K> w2(k); + return Divide(w1, w2, divide_type); +} + +} // namespace fst + +#endif // FST_LIB_SPARSE_POWER_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/sparse-tuple-weight.h b/kaldi_io/src/tools/openfst/include/fst/sparse-tuple-weight.h new file mode 100644 index 0000000..c12ef4f --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/sparse-tuple-weight.h @@ -0,0 +1,640 @@ +// sparse-tuple-weight.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Kasturi Rangan Raghavan) +// Inspiration: [email protected] (Cyril Allauzen) +// \file +// Sparse version of tuple-weight, based on tuple-weight.h +// Internally stores sparse key, value pairs in linked list +// Default value elemnt is the assumed value of unset keys +// Internal singleton implementation that stores first key, +// value pair as a initialized member variable to avoide +// unnecessary allocation on heap. +// Use SparseTupleWeightIterator to iterate through the key,value pairs +// Note: this does NOT iterate through the default value. +// +// Sparse tuple weight set operation definitions. + +#ifndef FST_LIB_SPARSE_TUPLE_WEIGHT_H__ +#define FST_LIB_SPARSE_TUPLE_WEIGHT_H__ + +#include<string> +#include<list> +#include<stack> +#include<tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; + +#include <fst/weight.h> + + +DECLARE_string(fst_weight_parentheses); +DECLARE_string(fst_weight_separator); + +namespace fst { + +template <class W, class K> class SparseTupleWeight; + +template<class W, class K> +class SparseTupleWeightIterator; + +template <class W, class K> +istream &operator>>(istream &strm, SparseTupleWeight<W, K> &w); + +// Arbitrary dimension tuple weight, stored as a sorted linked-list +// W is any weight class, +// K is the key value type. kNoKey(-1) is reserved for internal use +template <class W, class K = int> +class SparseTupleWeight { + public: + typedef pair<K, W> Pair; + typedef SparseTupleWeight<typename W::ReverseWeight, K> ReverseWeight; + + const static K kNoKey = -1; + SparseTupleWeight() { + Init(); + } + + template <class Iterator> + SparseTupleWeight(Iterator begin, Iterator end) { + Init(); + // Assumes input iterator is sorted + for (Iterator it = begin; it != end; ++it) + Push(*it); + } + + + SparseTupleWeight(const K& key, const W &w) { + Init(); + Push(key, w); + } + + SparseTupleWeight(const W &w) { + Init(w); + } + + SparseTupleWeight(const SparseTupleWeight<W, K> &w) { + Init(w.DefaultValue()); + SetDefaultValue(w.DefaultValue()); + for (SparseTupleWeightIterator<W, K> it(w); !it.Done(); it.Next()) { + Push(it.Value()); + } + } + + static const SparseTupleWeight<W, K> &Zero() { + static SparseTupleWeight<W, K> zero; + return zero; + } + + static const SparseTupleWeight<W, K> &One() { + static SparseTupleWeight<W, K> one(W::One()); + return one; + } + + static const SparseTupleWeight<W, K> &NoWeight() { + static SparseTupleWeight<W, K> no_weight(W::NoWeight()); + return no_weight; + } + + istream &Read(istream &strm) { + ReadType(strm, &default_); + ReadType(strm, &first_); + return ReadType(strm, &rest_); + } + + ostream &Write(ostream &strm) const { + WriteType(strm, default_); + WriteType(strm, first_); + return WriteType(strm, rest_); + } + + SparseTupleWeight<W, K> &operator=(const SparseTupleWeight<W, K> &w) { + if (this == &w) return *this; // check for w = w + Init(w.DefaultValue()); + for (SparseTupleWeightIterator<W, K> it(w); !it.Done(); it.Next()) { + Push(it.Value()); + } + return *this; + } + + bool Member() const { + if (!DefaultValue().Member()) return false; + for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) { + if (!it.Value().second.Member()) return false; + } + return true; + } + + // Assumes H() function exists for the hash of the key value + size_t Hash() const { + uint64 h = 0; + std::tr1::hash<K> H; + for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) { + h = 5 * h + H(it.Value().first); + h = 13 * h + it.Value().second.Hash(); + } + return size_t(h); + } + + SparseTupleWeight<W, K> Quantize(float delta = kDelta) const { + SparseTupleWeight<W, K> w; + for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) { + w.Push(it.Value().first, it.Value().second.Quantize(delta)); + } + return w; + } + + ReverseWeight Reverse() const { + SparseTupleWeight<W, K> w; + for (SparseTupleWeightIterator<W, K> it(*this); !it.Done(); it.Next()) { + w.Push(it.Value().first, it.Value().second.Reverse()); + } + return w; + } + + // Common initializer among constructors. + void Init() { + Init(W::Zero()); + } + + void Init(const W& default_value) { + first_.first = kNoKey; + /* initialized to the reserved key value */ + default_ = default_value; + rest_.clear(); + } + + size_t Size() const { + if (first_.first == kNoKey) + return 0; + else + return rest_.size() + 1; + } + + inline void Push(const K &k, const W &w, bool default_value_check = true) { + Push(make_pair(k, w), default_value_check); + } + + inline void Push(const Pair &p, bool default_value_check = true) { + if (default_value_check && p.second == default_) return; + if (first_.first == kNoKey) { + first_ = p; + } else { + rest_.push_back(p); + } + } + + void SetDefaultValue(const W& val) { default_ = val; } + + const W& DefaultValue() const { return default_; } + + protected: + static istream& ReadNoParen( + istream&, SparseTupleWeight<W, K>&, char separator); + + static istream& ReadWithParen( + istream&, SparseTupleWeight<W, K>&, + char separator, char open_paren, char close_paren); + + private: + // Assumed default value of uninitialized keys, by default W::Zero() + W default_; + + // Key values pairs are first stored in first_, then fill rest_ + // this way we can avoid dynamic allocation in the common case + // where the weight is a single key,val pair. + Pair first_; + list<Pair> rest_; + + friend istream &operator>><W, K>(istream&, SparseTupleWeight<W, K>&); + friend class SparseTupleWeightIterator<W, K>; +}; + +template<class W, class K> +class SparseTupleWeightIterator { + public: + typedef typename SparseTupleWeight<W, K>::Pair Pair; + typedef typename list<Pair>::const_iterator const_iterator; + typedef typename list<Pair>::iterator iterator; + + explicit SparseTupleWeightIterator(const SparseTupleWeight<W, K>& w) + : first_(w.first_), rest_(w.rest_), init_(true), + iter_(rest_.begin()) {} + + bool Done() const { + if (init_) + return first_.first == SparseTupleWeight<W, K>::kNoKey; + else + return iter_ == rest_.end(); + } + + const Pair& Value() const { return init_ ? first_ : *iter_; } + + void Next() { + if (init_) + init_ = false; + else + ++iter_; + } + + void Reset() { + init_ = true; + iter_ = rest_.begin(); + } + + private: + const Pair &first_; + const list<Pair> & rest_; + bool init_; // in the initialized state? + typename list<Pair>::const_iterator iter_; + + DISALLOW_COPY_AND_ASSIGN(SparseTupleWeightIterator); +}; + +template<class W, class K, class M> +inline void SparseTupleWeightMap( + SparseTupleWeight<W, K>* ret, + const SparseTupleWeight<W, K>& w1, + const SparseTupleWeight<W, K>& w2, + const M& operator_mapper) { + SparseTupleWeightIterator<W, K> w1_it(w1); + SparseTupleWeightIterator<W, K> w2_it(w2); + const W& v1_def = w1.DefaultValue(); + const W& v2_def = w2.DefaultValue(); + ret->SetDefaultValue(operator_mapper.Map(0, v1_def, v2_def)); + while (!w1_it.Done() || !w2_it.Done()) { + const K& k1 = (w1_it.Done()) ? w2_it.Value().first : w1_it.Value().first; + const K& k2 = (w2_it.Done()) ? w1_it.Value().first : w2_it.Value().first; + const W& v1 = (w1_it.Done()) ? v1_def : w1_it.Value().second; + const W& v2 = (w2_it.Done()) ? v2_def : w2_it.Value().second; + if (k1 == k2) { + ret->Push(k1, operator_mapper.Map(k1, v1, v2)); + if (!w1_it.Done()) w1_it.Next(); + if (!w2_it.Done()) w2_it.Next(); + } else if (k1 < k2) { + ret->Push(k1, operator_mapper.Map(k1, v1, v2_def)); + w1_it.Next(); + } else { + ret->Push(k2, operator_mapper.Map(k2, v1_def, v2)); + w2_it.Next(); + } + } +} + +template <class W, class K> +inline bool operator==(const SparseTupleWeight<W, K> &w1, + const SparseTupleWeight<W, K> &w2) { + const W& v1_def = w1.DefaultValue(); + const W& v2_def = w2.DefaultValue(); + if (v1_def != v2_def) return false; + + SparseTupleWeightIterator<W, K> w1_it(w1); + SparseTupleWeightIterator<W, K> w2_it(w2); + while (!w1_it.Done() || !w2_it.Done()) { + const K& k1 = (w1_it.Done()) ? w2_it.Value().first : w1_it.Value().first; + const K& k2 = (w2_it.Done()) ? w1_it.Value().first : w2_it.Value().first; + const W& v1 = (w1_it.Done()) ? v1_def : w1_it.Value().second; + const W& v2 = (w2_it.Done()) ? v2_def : w2_it.Value().second; + if (k1 == k2) { + if (v1 != v2) return false; + if (!w1_it.Done()) w1_it.Next(); + if (!w2_it.Done()) w2_it.Next(); + } else if (k1 < k2) { + if (v1 != v2_def) return false; + w1_it.Next(); + } else { + if (v1_def != v2) return false; + w2_it.Next(); + } + } + return true; +} + +template <class W, class K> +inline bool operator!=(const SparseTupleWeight<W, K> &w1, + const SparseTupleWeight<W, K> &w2) { + return !(w1 == w2); +} + +template <class W, class K> +inline ostream &operator<<(ostream &strm, const SparseTupleWeight<W, K> &w) { + if(FLAGS_fst_weight_separator.size() != 1) { + FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1"; + strm.clear(std::ios::badbit); + return strm; + } + char separator = FLAGS_fst_weight_separator[0]; + bool write_parens = false; + if (!FLAGS_fst_weight_parentheses.empty()) { + if (FLAGS_fst_weight_parentheses.size() != 2) { + FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2"; + strm.clear(std::ios::badbit); + return strm; + } + write_parens = true; + } + + if (write_parens) + strm << FLAGS_fst_weight_parentheses[0]; + + strm << w.DefaultValue(); + strm << separator; + + size_t n = w.Size(); + strm << n; + strm << separator; + + for (SparseTupleWeightIterator<W, K> it(w); !it.Done(); it.Next()) { + strm << it.Value().first; + strm << separator; + strm << it.Value().second; + strm << separator; + } + + if (write_parens) + strm << FLAGS_fst_weight_parentheses[1]; + + return strm; +} + +template <class W, class K> +inline istream &operator>>(istream &strm, SparseTupleWeight<W, K> &w) { + if(FLAGS_fst_weight_separator.size() != 1) { + FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1"; + strm.clear(std::ios::badbit); + return strm; + } + char separator = FLAGS_fst_weight_separator[0]; + + if (!FLAGS_fst_weight_parentheses.empty()) { + if (FLAGS_fst_weight_parentheses.size() != 2) { + FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2"; + strm.clear(std::ios::badbit); + return strm; + } + return SparseTupleWeight<W, K>::ReadWithParen( + strm, w, separator, FLAGS_fst_weight_parentheses[0], + FLAGS_fst_weight_parentheses[1]); + } else { + return SparseTupleWeight<W, K>::ReadNoParen(strm, w, separator); + } +} + +// Reads SparseTupleWeight when there are no parentheses around tuple terms +template <class W, class K> +inline istream& SparseTupleWeight<W, K>::ReadNoParen( + istream &strm, + SparseTupleWeight<W, K> &w, + char separator) { + int c; + size_t n; + + do { + c = strm.get(); + } while (isspace(c)); + + + { // Read default weight + W default_value; + string s; + while (c != separator) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + c = strm.get(); + } + istringstream sstrm(s); + sstrm >> default_value; + w.SetDefaultValue(default_value); + } + + c = strm.get(); + + { // Read n + string s; + while (c != separator) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + c = strm.get(); + } + istringstream sstrm(s); + sstrm >> n; + } + + // Read n elements + for (size_t i = 0; i < n; ++i) { + // discard separator + c = strm.get(); + K p; + W r; + + { // read key + string s; + while (c != separator) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + c = strm.get(); + } + istringstream sstrm(s); + sstrm >> p; + } + + c = strm.get(); + + { // read weight + string s; + while (c != separator) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + c = strm.get(); + } + istringstream sstrm(s); + sstrm >> r; + } + + w.Push(p, r); + } + + c = strm.get(); + if (c != separator) { + strm.clear(std::ios::badbit); + } + + return strm; +} + +// Reads SparseTupleWeight when there are parentheses around tuple terms +template <class W, class K> +inline istream& SparseTupleWeight<W, K>::ReadWithParen( + istream &strm, + SparseTupleWeight<W, K> &w, + char separator, + char open_paren, + char close_paren) { + int c; + size_t n; + + do { + c = strm.get(); + } while (isspace(c)); + + if (c != open_paren) { + FSTERROR() << "is fst_weight_parentheses flag set correcty? "; + strm.clear(std::ios::badbit); + return strm; + } + + c = strm.get(); + + { // Read weight + W default_value; + stack<int> parens; + string s; + while (c != separator || !parens.empty()) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + // If parens encountered before separator, they must be matched + if (c == open_paren) { + parens.push(1); + } else if (c == close_paren) { + // Fail for mismatched parens + if (parens.empty()) { + strm.clear(std::ios::failbit); + return strm; + } + parens.pop(); + } + c = strm.get(); + } + istringstream sstrm(s); + sstrm >> default_value; + w.SetDefaultValue(default_value); + } + + c = strm.get(); + + { // Read n + string s; + while (c != separator) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + c = strm.get(); + } + istringstream sstrm(s); + sstrm >> n; + } + + // Read n elements + for (size_t i = 0; i < n; ++i) { + // discard separator + c = strm.get(); + K p; + W r; + + { // Read key + stack<int> parens; + string s; + while (c != separator || !parens.empty()) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + // If parens encountered before separator, they must be matched + if (c == open_paren) { + parens.push(1); + } else if (c == close_paren) { + // Fail for mismatched parens + if (parens.empty()) { + strm.clear(std::ios::failbit); + return strm; + } + parens.pop(); + } + c = strm.get(); + } + istringstream sstrm(s); + sstrm >> p; + } + + c = strm.get(); + + { // Read weight + stack<int> parens; + string s; + while (c != separator || !parens.empty()) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + // If parens encountered before separator, they must be matched + if (c == open_paren) { + parens.push(1); + } else if (c == close_paren) { + // Fail for mismatched parens + if (parens.empty()) { + strm.clear(std::ios::failbit); + return strm; + } + parens.pop(); + } + c = strm.get(); + } + istringstream sstrm(s); + sstrm >> r; + } + + w.Push(p, r); + } + + if (c != separator) { + FSTERROR() << " separator expected, not found! "; + strm.clear(std::ios::badbit); + return strm; + } + + c = strm.get(); + if (c != close_paren) { + FSTERROR() << " is fst_weight_parentheses flag set correcty? "; + strm.clear(std::ios::badbit); + return strm; + } + + return strm; +} + + + +} // namespace fst + +#endif // FST_LIB_SPARSE_TUPLE_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/state-map.h b/kaldi_io/src/tools/openfst/include/fst/state-map.h new file mode 100644 index 0000000..9d6db74 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/state-map.h @@ -0,0 +1,605 @@ +// map.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Class to map over/transform states e.g., sort transitions +// Consider using when operation does not change the number of states. + +#ifndef FST_LIB_STATE_MAP_H__ +#define FST_LIB_STATE_MAP_H__ + +#include <algorithm> +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include <string> +#include <utility> +using std::pair; using std::make_pair; + +#include <fst/cache.h> +#include <fst/arc-map.h> +#include <fst/mutable-fst.h> + + +namespace fst { + +// StateMapper Interface - class determinies how states are mapped. +// Useful for implementing operations that do not change the number of states. +// +// class StateMapper { +// public: +// typedef A FromArc; +// typedef B ToArc; +// +// // Typical constructor +// StateMapper(const Fst<A> &fst); +// // Required copy constructor that allows updating Fst argument; +// // pass only if relevant and changed. +// StateMapper(const StateMapper &mapper, const Fst<A> *fst = 0); +// +// // Specifies initial state of result +// B::StateId Start() const; +// // Specifies state's final weight in result +// B::Weight Final(B::StateId s) const; +// +// // These methods iterate through a state's arcs in result +// // Specifies state to iterate over +// void SetState(B::StateId s); +// // End of arcs? +// bool Done() const; +// // Current arc + +// const B &Value() const; +// // Advance to next arc (when !Done) +// void Next(); +// +// // Specifies input symbol table action the mapper requires (see above). +// MapSymbolsAction InputSymbolsAction() const; +// // Specifies output symbol table action the mapper requires (see above). +// MapSymbolsAction OutputSymbolsAction() const; +// // This specifies the known properties of an Fst mapped by this +// // mapper. It takes as argument the input Fst's known properties. +// uint64 Properties(uint64 props) const; +// }; +// +// We include a various state map versions below. One dimension of +// variation is whether the mapping mutates its input, writes to a +// new result Fst, or is an on-the-fly Fst. Another dimension is how +// we pass the mapper. We allow passing the mapper by pointer +// for cases that we need to change the state of the user's mapper. +// We also include map versions that pass the mapper +// by value or const reference when this suffices. + +// Maps an arc type A using a mapper function object C, passed +// by pointer. This version modifies its Fst input. +template<class A, class C> +void StateMap(MutableFst<A> *fst, C* mapper) { + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) + fst->SetInputSymbols(0); + + if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) + fst->SetOutputSymbols(0); + + if (fst->Start() == kNoStateId) + return; + + uint64 props = fst->Properties(kFstProperties, false); + + fst->SetStart(mapper->Start()); + + for (StateId s = 0; s < fst->NumStates(); ++s) { + mapper->SetState(s); + fst->DeleteArcs(s); + for (; !mapper->Done(); mapper->Next()) + fst->AddArc(s, mapper->Value()); + fst->SetFinal(s, mapper->Final(s)); + } + + fst->SetProperties(mapper->Properties(props), kFstProperties); +} + +// Maps an arc type A using a mapper function object C, passed +// by value. This version modifies its Fst input. +template<class A, class C> +void StateMap(MutableFst<A> *fst, C mapper) { + StateMap(fst, &mapper); +} + + +// Maps an arc type A to an arc type B using mapper function +// object C, passed by pointer. This version writes the mapped +// input Fst to an output MutableFst. +template<class A, class B, class C> +void StateMap(const Fst<A> &ifst, MutableFst<B> *ofst, C* mapper) { + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + ofst->DeleteStates(); + + if (mapper->InputSymbolsAction() == MAP_COPY_SYMBOLS) + ofst->SetInputSymbols(ifst.InputSymbols()); + else if (mapper->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) + ofst->SetInputSymbols(0); + + if (mapper->OutputSymbolsAction() == MAP_COPY_SYMBOLS) + ofst->SetOutputSymbols(ifst.OutputSymbols()); + else if (mapper->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) + ofst->SetOutputSymbols(0); + + uint64 iprops = ifst.Properties(kCopyProperties, false); + + if (ifst.Start() == kNoStateId) { + if (iprops & kError) ofst->SetProperties(kError, kError); + return; + } + + // Add all states. + if (ifst.Properties(kExpanded, false)) + ofst->ReserveStates(CountStates(ifst)); + for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next()) + ofst->AddState(); + + ofst->SetStart(mapper->Start()); + + for (StateIterator< Fst<A> > siter(ifst); !siter.Done(); siter.Next()) { + StateId s = siter.Value(); + mapper->SetState(s); + for (; !mapper->Done(); mapper->Next()) + ofst->AddArc(s, mapper->Value()); + ofst->SetFinal(s, mapper->Final(s)); + } + + uint64 oprops = ofst->Properties(kFstProperties, false); + ofst->SetProperties(mapper->Properties(iprops) | oprops, kFstProperties); +} + +// Maps an arc type A to an arc type B using mapper function +// object C, passed by value. This version writes the mapped input +// Fst to an output MutableFst. +template<class A, class B, class C> +void StateMap(const Fst<A> &ifst, MutableFst<B> *ofst, C mapper) { + StateMap(ifst, ofst, &mapper); +} + +typedef CacheOptions StateMapFstOptions; + +template <class A, class B, class C> class StateMapFst; + +// Implementation of delayed StateMapFst. +template <class A, class B, class C> +class StateMapFstImpl : public CacheImpl<B> { + public: + using FstImpl<B>::SetType; + using FstImpl<B>::SetProperties; + using FstImpl<B>::SetInputSymbols; + using FstImpl<B>::SetOutputSymbols; + + using VectorFstBaseImpl<typename CacheImpl<B>::State>::NumStates; + + using CacheImpl<B>::PushArc; + using CacheImpl<B>::HasArcs; + using CacheImpl<B>::HasFinal; + using CacheImpl<B>::HasStart; + using CacheImpl<B>::SetArcs; + using CacheImpl<B>::SetFinal; + using CacheImpl<B>::SetStart; + + friend class StateIterator< StateMapFst<A, B, C> >; + + typedef B Arc; + typedef typename B::Weight Weight; + typedef typename B::StateId StateId; + + StateMapFstImpl(const Fst<A> &fst, const C &mapper, + const StateMapFstOptions& opts) + : CacheImpl<B>(opts), + fst_(fst.Copy()), + mapper_(new C(mapper, fst_)), + own_mapper_(true) { + Init(); + } + + StateMapFstImpl(const Fst<A> &fst, C *mapper, + const StateMapFstOptions& opts) + : CacheImpl<B>(opts), + fst_(fst.Copy()), + mapper_(mapper), + own_mapper_(false) { + Init(); + } + + StateMapFstImpl(const StateMapFstImpl<A, B, C> &impl) + : CacheImpl<B>(impl), + fst_(impl.fst_->Copy(true)), + mapper_(new C(*impl.mapper_, fst_)), + own_mapper_(true) { + Init(); + } + + ~StateMapFstImpl() { + delete fst_; + if (own_mapper_) delete mapper_; + } + + StateId Start() { + if (!HasStart()) + SetStart(mapper_->Start()); + return CacheImpl<B>::Start(); + } + + Weight Final(StateId s) { + if (!HasFinal(s)) + SetFinal(s, mapper_->Final(s)); + return CacheImpl<B>::Final(s); + } + + size_t NumArcs(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<B>::NumArcs(s); + } + + size_t NumInputEpsilons(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<B>::NumInputEpsilons(s); + } + + size_t NumOutputEpsilons(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<B>::NumOutputEpsilons(s); + } + + void InitStateIterator(StateIteratorData<A> *data) const { + fst_->InitStateIterator(data); + } + + void InitArcIterator(StateId s, ArcIteratorData<B> *data) { + if (!HasArcs(s)) + Expand(s); + CacheImpl<B>::InitArcIterator(s, data); + } + + uint64 Properties() const { return Properties(kFstProperties); } + + // Set error if found; return FST impl properties. + uint64 Properties(uint64 mask) const { + if ((mask & kError) && (fst_->Properties(kError, false) || + (mapper_->Properties(0) & kError))) + SetProperties(kError, kError); + return FstImpl<Arc>::Properties(mask); + } + + void Expand(StateId s) { + // Add exiting arcs. + for (mapper_->SetState(s); !mapper_->Done(); mapper_->Next()) + PushArc(s, mapper_->Value()); + SetArcs(s); + } + + const Fst<A> &GetFst() const { + return *fst_; + } + + private: + void Init() { + SetType("statemap"); + + if (mapper_->InputSymbolsAction() == MAP_COPY_SYMBOLS) + SetInputSymbols(fst_->InputSymbols()); + else if (mapper_->InputSymbolsAction() == MAP_CLEAR_SYMBOLS) + SetInputSymbols(0); + + if (mapper_->OutputSymbolsAction() == MAP_COPY_SYMBOLS) + SetOutputSymbols(fst_->OutputSymbols()); + else if (mapper_->OutputSymbolsAction() == MAP_CLEAR_SYMBOLS) + SetOutputSymbols(0); + + uint64 props = fst_->Properties(kCopyProperties, false); + SetProperties(mapper_->Properties(props)); + } + + const Fst<A> *fst_; + C* mapper_; + bool own_mapper_; + + void operator=(const StateMapFstImpl<A, B, C> &); // disallow +}; + + +// Maps an arc type A to an arc type B using Mapper function object +// C. This version is a delayed Fst. +template <class A, class B, class C> +class StateMapFst : public ImplToFst< StateMapFstImpl<A, B, C> > { + public: + friend class ArcIterator< StateMapFst<A, B, C> >; + + typedef B Arc; + typedef typename B::Weight Weight; + typedef typename B::StateId StateId; + typedef CacheState<B> State; + typedef StateMapFstImpl<A, B, C> Impl; + + StateMapFst(const Fst<A> &fst, const C &mapper, + const StateMapFstOptions& opts) + : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {} + + StateMapFst(const Fst<A> &fst, C* mapper, const StateMapFstOptions& opts) + : ImplToFst<Impl>(new Impl(fst, mapper, opts)) {} + + StateMapFst(const Fst<A> &fst, const C &mapper) + : ImplToFst<Impl>(new Impl(fst, mapper, StateMapFstOptions())) {} + + StateMapFst(const Fst<A> &fst, C* mapper) + : ImplToFst<Impl>(new Impl(fst, mapper, StateMapFstOptions())) {} + + // See Fst<>::Copy() for doc. + StateMapFst(const StateMapFst<A, B, C> &fst, bool safe = false) + : ImplToFst<Impl>(fst, safe) {} + + // Get a copy of this StateMapFst. See Fst<>::Copy() for further doc. + virtual StateMapFst<A, B, C> *Copy(bool safe = false) const { + return new StateMapFst<A, B, C>(*this, safe); + } + + virtual void InitStateIterator(StateIteratorData<A> *data) const { + GetImpl()->InitStateIterator(data); + } + + virtual void InitArcIterator(StateId s, ArcIteratorData<B> *data) const { + GetImpl()->InitArcIterator(s, data); + } + + protected: + Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } + + private: + void operator=(const StateMapFst<A, B, C> &fst); // disallow +}; + + +// Specialization for StateMapFst. +template <class A, class B, class C> +class ArcIterator< StateMapFst<A, B, C> > + : public CacheArcIterator< StateMapFst<A, B, C> > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const StateMapFst<A, B, C> &fst, StateId s) + : CacheArcIterator< StateMapFst<A, B, C> >(fst.GetImpl(), s) { + if (!fst.GetImpl()->HasArcs(s)) + fst.GetImpl()->Expand(s); + } + + private: + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + +// +// Utility Mappers +// + +// Mapper that returns its input. +template <class A> +class IdentityStateMapper { + public: + typedef A FromArc; + typedef A ToArc; + + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + explicit IdentityStateMapper(const Fst<A> &fst) : fst_(fst), aiter_(0) {} + + // Allows updating Fst argument; pass only if changed. + IdentityStateMapper(const IdentityStateMapper<A> &mapper, + const Fst<A> *fst = 0) + : fst_(fst ? *fst : mapper.fst_), aiter_(0) {} + + ~IdentityStateMapper() { delete aiter_; } + + StateId Start() const { return fst_.Start(); } + + Weight Final(StateId s) const { return fst_.Final(s); } + + void SetState(StateId s) { + if (aiter_) delete aiter_; + aiter_ = new ArcIterator< Fst<A> >(fst_, s); + } + + bool Done() const { return aiter_->Done(); } + const A &Value() const { return aiter_->Value(); } + void Next() { aiter_->Next(); } + + MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS;} + + uint64 Properties(uint64 props) const { return props; } + + private: + const Fst<A> &fst_; + ArcIterator< Fst<A> > *aiter_; +}; + +template <class A> +class ArcSumMapper { + public: + typedef A FromArc; + typedef A ToArc; + + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + explicit ArcSumMapper(const Fst<A> &fst) : fst_(fst), i_(0) {} + + // Allows updating Fst argument; pass only if changed. + ArcSumMapper(const ArcSumMapper<A> &mapper, + const Fst<A> *fst = 0) + : fst_(fst ? *fst : mapper.fst_), i_(0) {} + + StateId Start() const { return fst_.Start(); } + Weight Final(StateId s) const { return fst_.Final(s); } + + void SetState(StateId s) { + i_ = 0; + arcs_.clear(); + arcs_.reserve(fst_.NumArcs(s)); + for (ArcIterator<Fst<A> > aiter(fst_, s); !aiter.Done(); aiter.Next()) + arcs_.push_back(aiter.Value()); + + // First sorts the exiting arcs by input label, output label + // and destination state and then sums weights of arcs with + // the same input label, output label, and destination state. + sort(arcs_.begin(), arcs_.end(), comp_); + size_t narcs = 0; + for (size_t i = 0; i < arcs_.size(); ++i) { + if (narcs > 0 && equal_(arcs_[i], arcs_[narcs - 1])) { + arcs_[narcs - 1].weight = Plus(arcs_[narcs - 1].weight, + arcs_[i].weight); + } else { + arcs_[narcs++] = arcs_[i]; + } + } + arcs_.resize(narcs); + } + + bool Done() const { return i_ >= arcs_.size(); } + const A &Value() const { return arcs_[i_]; } + void Next() { ++i_; } + + MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + + uint64 Properties(uint64 props) const { + return props & kArcSortProperties & + kDeleteArcsProperties & kWeightInvariantProperties; + } + + private: + struct Compare { + bool operator()(const A& x, const A& y) { + if (x.ilabel < y.ilabel) return true; + if (x.ilabel > y.ilabel) return false; + if (x.olabel < y.olabel) return true; + if (x.olabel > y.olabel) return false; + if (x.nextstate < y.nextstate) return true; + if (x.nextstate > y.nextstate) return false; + return false; + } + }; + + struct Equal { + bool operator()(const A& x, const A& y) { + return (x.ilabel == y.ilabel && + x.olabel == y.olabel && + x.nextstate == y.nextstate); + } + }; + + const Fst<A> &fst_; + Compare comp_; + Equal equal_; + vector<A> arcs_; + ssize_t i_; // current arc position + + void operator=(const ArcSumMapper<A> &); // disallow +}; + +template <class A> +class ArcUniqueMapper { + public: + typedef A FromArc; + typedef A ToArc; + + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + explicit ArcUniqueMapper(const Fst<A> &fst) : fst_(fst), i_(0) {} + + // Allows updating Fst argument; pass only if changed. + ArcUniqueMapper(const ArcUniqueMapper<A> &mapper, + const Fst<A> *fst = 0) + : fst_(fst ? *fst : mapper.fst_), i_(0) {} + + StateId Start() const { return fst_.Start(); } + Weight Final(StateId s) const { return fst_.Final(s); } + + void SetState(StateId s) { + i_ = 0; + arcs_.clear(); + arcs_.reserve(fst_.NumArcs(s)); + for (ArcIterator<Fst<A> > aiter(fst_, s); !aiter.Done(); aiter.Next()) + arcs_.push_back(aiter.Value()); + + // First sorts the exiting arcs by input label, output label + // and destination state and then uniques identical arcs + sort(arcs_.begin(), arcs_.end(), comp_); + typename vector<A>::iterator unique_end = + unique(arcs_.begin(), arcs_.end(), equal_); + arcs_.resize(unique_end - arcs_.begin()); + } + + bool Done() const { return i_ >= arcs_.size(); } + const A &Value() const { return arcs_[i_]; } + void Next() { ++i_; } + + MapSymbolsAction InputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + MapSymbolsAction OutputSymbolsAction() const { return MAP_COPY_SYMBOLS; } + + uint64 Properties(uint64 props) const { + return props & kArcSortProperties & kDeleteArcsProperties; + } + + private: + struct Compare { + bool operator()(const A& x, const A& y) { + if (x.ilabel < y.ilabel) return true; + if (x.ilabel > y.ilabel) return false; + if (x.olabel < y.olabel) return true; + if (x.olabel > y.olabel) return false; + if (x.nextstate < y.nextstate) return true; + if (x.nextstate > y.nextstate) return false; + return false; + } + }; + + struct Equal { + bool operator()(const A& x, const A& y) { + return (x.ilabel == y.ilabel && + x.olabel == y.olabel && + x.nextstate == y.nextstate && + x.weight == y.weight); + } + }; + + const Fst<A> &fst_; + Compare comp_; + Equal equal_; + vector<A> arcs_; + ssize_t i_; // current arc position + + void operator=(const ArcUniqueMapper<A> &); // disallow +}; + + +} // namespace fst + +#endif // FST_LIB_STATE_MAP_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/state-reachable.h b/kaldi_io/src/tools/openfst/include/fst/state-reachable.h new file mode 100644 index 0000000..6d0c971 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/state-reachable.h @@ -0,0 +1,198 @@ +// state-reachable.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Class to determine whether a given (final) state can be reached from some +// other given state. + +#ifndef FST_LIB_STATE_REACHABLE_H__ +#define FST_LIB_STATE_REACHABLE_H__ + +#include <vector> +using std::vector; + +#include <fst/dfs-visit.h> +#include <fst/fst.h> +#include <fst/interval-set.h> + + +namespace fst { + +// Computes the (final) states reachable from a given state in an FST. +// After this visitor has been called, a final state f can be reached +// from a state s iff (*isets)[s].Member(state2index[f]) is true, where +// (*isets[s]) is a set of half-open inteval of final state indices +// and state2index[f] maps from a final state to its index. +// +// If state2index is empty, it is filled-in with suitable indices. +// If it is non-empty, those indices are used; in this case, the +// final states must have out-degree 0. +template <class A, typename I = typename A::StateId> +class IntervalReachVisitor { + public: + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename IntervalSet<I>::Interval Interval; + + IntervalReachVisitor(const Fst<A> &fst, + vector< IntervalSet<I> > *isets, + vector<I> *state2index) + : fst_(fst), + isets_(isets), + state2index_(state2index), + index_(state2index->empty() ? 1 : -1), + error_(false) { + isets_->clear(); + } + + void InitVisit(const Fst<A> &fst) { error_ = false; } + + bool InitState(StateId s, StateId r) { + while (isets_->size() <= s) + isets_->push_back(IntervalSet<Label>()); + while (state2index_->size() <= s) + state2index_->push_back(-1); + + if (fst_.Final(s) != Weight::Zero()) { + // Create tree interval + vector<Interval> *intervals = (*isets_)[s].Intervals(); + if (index_ < 0) { // Use state2index_ map to set index + if (fst_.NumArcs(s) > 0) { + FSTERROR() << "IntervalReachVisitor: state2index map must be empty " + << "for this FST"; + error_ = true; + return false; + } + I index = (*state2index_)[s]; + if (index < 0) { + FSTERROR() << "IntervalReachVisitor: state2index map incomplete"; + error_ = true; + return false; + } + intervals->push_back(Interval(index, index + 1)); + } else { // Use pre-order index + intervals->push_back(Interval(index_, index_ + 1)); + (*state2index_)[s] = index_++; + } + } + return true; + } + + bool TreeArc(StateId s, const A &arc) { + return true; + } + + bool BackArc(StateId s, const A &arc) { + FSTERROR() << "IntervalReachVisitor: cyclic input"; + error_ = true; + return false; + } + + bool ForwardOrCrossArc(StateId s, const A &arc) { + // Non-tree interval + (*isets_)[s].Union((*isets_)[arc.nextstate]); + return true; + } + + void FinishState(StateId s, StateId p, const A *arc) { + if (index_ >= 0 && fst_.Final(s) != Weight::Zero()) { + vector<Interval> *intervals = (*isets_)[s].Intervals(); + (*intervals)[0].end = index_; // Update tree interval end + } + (*isets_)[s].Normalize(); + if (p != kNoStateId) + (*isets_)[p].Union((*isets_)[s]); // Propagate intervals to parent + } + + void FinishVisit() {} + + bool Error() const { return error_; } + + private: + const Fst<A> &fst_; + vector< IntervalSet<I> > *isets_; + vector<I> *state2index_; + I index_; + bool error_; +}; + + +// Tests reachability of final states from a given state. To test for +// reachability from a state s, first do SetState(s). Then a final +// state f can be reached from state s of FST iff Reach(f) is true. +template <class A, typename I = typename A::StateId> +class StateReachable { + public: + typedef A Arc; + typedef I Index; + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename IntervalSet<I>::Interval Interval; + + StateReachable(const Fst<A> &fst) + : error_(false) { + IntervalReachVisitor<Arc> reach_visitor(fst, &isets_, &state2index_); + DfsVisit(fst, &reach_visitor); + if (reach_visitor.Error()) error_ = true; + } + + StateReachable(const StateReachable<A> &reachable) { + FSTERROR() << "Copy constructor for state reachable class " + << "not yet implemented."; + error_ = true; + } + + // Set current state. + void SetState(StateId s) { s_ = s; } + + // Can reach this label from current state? + bool Reach(StateId s) { + if (s >= state2index_.size()) + return false; + + I i = state2index_[s]; + if (i < 0) { + FSTERROR() << "StateReachable: state non-final: " << s; + error_ = true; + return false; + } + return isets_[s_].Member(i); + } + + // Access to the state-to-index mapping. Unassigned states have index -1. + vector<I> &State2Index() { return state2index_; } + + // Access to the interval sets. These specify the reachability + // to the final states as intervals of the final state indices. + const vector< IntervalSet<I> > &IntervalSets() { return isets_; } + + bool Error() const { return error_; } + + private: + StateId s_; // Current state + vector< IntervalSet<I> > isets_; // Interval sets per state + vector<I> state2index_; // Finds index for a final state + bool error_; + + void operator=(const StateReachable<A> &); // Disallow +}; + +} // namespace fst + +#endif // FST_LIB_STATE_REACHABLE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/state-table.h b/kaldi_io/src/tools/openfst/include/fst/state-table.h new file mode 100644 index 0000000..d8107a1 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/state-table.h @@ -0,0 +1,481 @@ +// state-table.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Classes for representing the mapping between state tuples and state Ids. + +#ifndef FST_LIB_STATE_TABLE_H__ +#define FST_LIB_STATE_TABLE_H__ + +#include <deque> +using std::deque; +#include <vector> +using std::vector; + +#include <fst/bi-table.h> +#include <fst/expanded-fst.h> + + +namespace fst { + +// STATE TABLES - these determine the bijective mapping between state +// tuples (e.g. in composition triples of two FST states and a +// composition filter state) and their corresponding state IDs. +// They are classes, templated on state tuples, of the form: +// +// template <class T> +// class StateTable { +// public: +// typedef typename T StateTuple; +// +// // Required constructors. +// StateTable(); +// +// // Lookup state ID by tuple. If it doesn't exist, then add it. +// StateId FindState(const StateTuple &); +// // Lookup state tuple by state ID. +// const StateTuple<StateId> &Tuple(StateId) const; +// // # of stored tuples. +// StateId Size() const; +// }; +// +// A state tuple has the form: +// +// template <class S> +// struct StateTuple { +// typedef typename S StateId; +// +// // Required constructors. +// StateTuple(); +// StateTuple(const StateTuple &); +// }; + + +// An implementation using a hash map for the tuple to state ID mapping. +// The state tuple T must have == defined. H is the hash function. +template <class T, class H> +class HashStateTable : public HashBiTable<typename T::StateId, T, H> { + public: + typedef T StateTuple; + typedef typename StateTuple::StateId StateId; + using HashBiTable<StateId, T, H>::FindId; + using HashBiTable<StateId, T, H>::FindEntry; + using HashBiTable<StateId, T, H>::Size; + + HashStateTable() : HashBiTable<StateId, T, H>() {} + + // Reserves space for table_size elements. + explicit HashStateTable(size_t table_size) + : HashBiTable<StateId, T, H>(table_size) {} + + StateId FindState(const StateTuple &tuple) { return FindId(tuple); } + const StateTuple &Tuple(StateId s) const { return FindEntry(s); } +}; + + +// An implementation using a hash map for the tuple to state ID mapping. +// The state tuple T must have == defined. H is the hash function. +template <class T, class H> +class CompactHashStateTable + : public CompactHashBiTable<typename T::StateId, T, H> { + public: + typedef T StateTuple; + typedef typename StateTuple::StateId StateId; + using CompactHashBiTable<StateId, T, H>::FindId; + using CompactHashBiTable<StateId, T, H>::FindEntry; + using CompactHashBiTable<StateId, T, H>::Size; + + CompactHashStateTable() : CompactHashBiTable<StateId, T, H>() {} + + // Reserves space for 'table_size' elements. + explicit CompactHashStateTable(size_t table_size) + : CompactHashBiTable<StateId, T, H>(table_size) {} + + StateId FindState(const StateTuple &tuple) { return FindId(tuple); } + const StateTuple &Tuple(StateId s) const { return FindEntry(s); } +}; + +// An implementation using a vector for the tuple to state mapping. +// It is passed a function object FP that should fingerprint tuples +// uniquely to an integer that can used as a vector index. Normally, +// VectorStateTable constructs the FP object. The user can instead +// pass in this object; in that case, VectorStateTable takes its +// ownership. +template <class T, class FP> +class VectorStateTable + : public VectorBiTable<typename T::StateId, T, FP> { + public: + typedef T StateTuple; + typedef typename StateTuple::StateId StateId; + using VectorBiTable<StateId, T, FP>::FindId; + using VectorBiTable<StateId, T, FP>::FindEntry; + using VectorBiTable<StateId, T, FP>::Size; + using VectorBiTable<StateId, T, FP>::Fingerprint; + + // Reserves space for 'table_size' elements. + explicit VectorStateTable(FP *fp = 0, size_t table_size = 0) + : VectorBiTable<StateId, T, FP>(fp, table_size) {} + + StateId FindState(const StateTuple &tuple) { return FindId(tuple); } + const StateTuple &Tuple(StateId s) const { return FindEntry(s); } +}; + + +// An implementation using a vector and a compact hash table. The +// selecting functor S returns true for tuples to be hashed in the +// vector. The fingerprinting functor FP returns a unique fingerprint +// for each tuple to be hashed in the vector (these need to be +// suitable for indexing in a vector). The hash functor H is used when +// hashing tuple into the compact hash table. +template <class T, class S, class FP, class H> +class VectorHashStateTable + : public VectorHashBiTable<typename T::StateId, T, S, FP, H> { + public: + typedef T StateTuple; + typedef typename StateTuple::StateId StateId; + using VectorHashBiTable<StateId, T, S, FP, H>::FindId; + using VectorHashBiTable<StateId, T, S, FP, H>::FindEntry; + using VectorHashBiTable<StateId, T, S, FP, H>::Size; + using VectorHashBiTable<StateId, T, S, FP, H>::Selector; + using VectorHashBiTable<StateId, T, S, FP, H>::Fingerprint; + using VectorHashBiTable<StateId, T, S, FP, H>::Hash; + + VectorHashStateTable(S *s, FP *fp, H *h, + size_t vector_size = 0, + size_t tuple_size = 0) + : VectorHashBiTable<StateId, T, S, FP, H>( + s, fp, h, vector_size, tuple_size) {} + + StateId FindState(const StateTuple &tuple) { return FindId(tuple); } + const StateTuple &Tuple(StateId s) const { return FindEntry(s); } +}; + + +// An implementation using a hash map for the tuple to state ID +// mapping. This version permits erasing of states. The state tuple T +// must have == defined and its default constructor must produce a +// tuple that will never be seen. F is the hash function. +template <class T, class F> +class ErasableStateTable : public ErasableBiTable<typename T::StateId, T, F> { + public: + typedef T StateTuple; + typedef typename StateTuple::StateId StateId; + using ErasableBiTable<StateId, T, F>::FindId; + using ErasableBiTable<StateId, T, F>::FindEntry; + using ErasableBiTable<StateId, T, F>::Size; + using ErasableBiTable<StateId, T, F>::Erase; + + ErasableStateTable() : ErasableBiTable<StateId, T, F>() {} + StateId FindState(const StateTuple &tuple) { return FindId(tuple); } + const StateTuple &Tuple(StateId s) const { return FindEntry(s); } +}; + +// +// COMPOSITION STATE TUPLES AND TABLES +// +// The composition state table has the form: +// +// template <class A, class F> +// class ComposeStateTable { +// public: +// typedef A Arc; +// typedef F FilterState; +// typedef typename A::StateId StateId; +// typedef ComposeStateTuple<StateId> StateTuple; +// +// // Required constructors. Copy constructor does not copy state. +// ComposeStateTable(const Fst<Arc> &fst1, const Fst<Arc> &fst2); +// ComposeStateTable(const ComposeStateTable<A, F> &table); +// // Lookup state ID by tuple. If it doesn't exist, then add it. +// StateId FindState(const StateTuple &); +// // Lookup state tuple by state ID. +// const StateTuple<StateId> &Tuple(StateId) const; +// // # of stored tuples. +// StateId Size() const; +// // Return true if error encountered +// bool Error() const; +// }; + +// Represents the composition state. +template <typename S, typename F> +struct ComposeStateTuple { + typedef S StateId; + typedef F FilterState; + + ComposeStateTuple() + : state_id1(kNoStateId), state_id2(kNoStateId), + filter_state(FilterState::NoState()) {} + + ComposeStateTuple(StateId s1, StateId s2, const FilterState &f) + : state_id1(s1), state_id2(s2), filter_state(f) {} + + StateId state_id1; // State Id on fst1 + StateId state_id2; // State Id on fst2 + FilterState filter_state; // State of composition filter +}; + +// Equality of composition state tuples. +template <typename S, typename F> +inline bool operator==(const ComposeStateTuple<S, F>& x, + const ComposeStateTuple<S, F>& y) { + if (&x == &y) + return true; + return x.state_id1 == y.state_id1 && + x.state_id2 == y.state_id2 && + x.filter_state == y.filter_state; +} + + +// Hashing of composition state tuples. +template <typename S, typename F> +class ComposeHash { + public: + size_t operator()(const ComposeStateTuple<S, F>& t) const { + return t.state_id1 + t.state_id2 * kPrime0 + + t.filter_state.Hash() * kPrime1; + } + private: + static const size_t kPrime0; + static const size_t kPrime1; +}; + +template <typename S, typename F> +const size_t ComposeHash<S, F>::kPrime0 = 7853; + +template <typename S, typename F> +const size_t ComposeHash<S, F>::kPrime1 = 7867; + + +// A HashStateTable over composition tuples. +template <typename A, + typename F, + typename H = + CompactHashStateTable<ComposeStateTuple<typename A::StateId, F>, + ComposeHash<typename A::StateId, F> > > +class GenericComposeStateTable : public H { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef F FilterState; + typedef ComposeStateTuple<StateId, F> StateTuple; + + GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {} + + // Reserves space for 'table_size' elements. + GenericComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2, + size_t table_size) : H(table_size) {} + + bool Error() const { return false; } + + private: + void operator=(const GenericComposeStateTable<A, F> &table); // disallow +}; + + +// Fingerprint for general composition tuples. +template <typename S, typename F> +class ComposeFingerprint { + public: + typedef S StateId; + typedef F FilterState; + typedef ComposeStateTuple<S, F> StateTuple; + + // Required but suboptimal constructor. + ComposeFingerprint() : mult1_(8192), mult2_(8192) { + LOG(WARNING) << "TupleFingerprint: # of FST states should be provided."; + } + + // Constructor is provided the sizes of the input FSTs + ComposeFingerprint(StateId nstates1, StateId nstates2) + : mult1_(nstates1), mult2_(nstates1 * nstates2) { } + + size_t operator()(const StateTuple &tuple) { + return tuple.state_id1 + tuple.state_id2 * mult1_ + + tuple.filter_state.Hash() * mult2_; + } + + private: + ssize_t mult1_; + ssize_t mult2_; +}; + + +// Useful when the first composition state determines the tuple. +template <typename S, typename F> +class ComposeState1Fingerprint { + public: + typedef S StateId; + typedef F FilterState; + typedef ComposeStateTuple<S, F> StateTuple; + + size_t operator()(const StateTuple &tuple) { return tuple.state_id1; } +}; + + +// Useful when the second composition state determines the tuple. +template <typename S, typename F> +class ComposeState2Fingerprint { + public: + typedef S StateId; + typedef F FilterState; + typedef ComposeStateTuple<S, F> StateTuple; + + size_t operator()(const StateTuple &tuple) { return tuple.state_id2; } +}; + + +// A VectorStateTable over composition tuples. This can be used when +// the product of number of states in FST1 and FST2 (and the +// composition filter state hash) is manageable. If the FSTs are not +// expanded Fsts, they will first have their states counted. +template <typename A, typename F> +class ProductComposeStateTable : public +VectorStateTable<ComposeStateTuple<typename A::StateId, F>, + ComposeFingerprint<typename A::StateId, F> > { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef F FilterState; + typedef ComposeStateTuple<StateId, F> StateTuple; + typedef VectorStateTable<StateTuple, + ComposeFingerprint<StateId, F> > StateTable; + + // Reserves space for 'table_size' elements. + ProductComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2, + size_t table_size = 0) + : StateTable(new ComposeFingerprint<StateId, F>(CountStates(fst1), + CountStates(fst2)), + table_size) {} + + ProductComposeStateTable(const ProductComposeStateTable<A, F> &table) + : StateTable(new ComposeFingerprint<StateId, F>(table.Fingerprint())) {} + + bool Error() const { return false; } + + private: + void operator=(const ProductComposeStateTable<A, F> &table); // disallow +}; + +// A VectorStateTable over composition tuples. This can be used when +// FST1 is a string (satisfies kStringProperties) and FST2 is +// epsilon-free and deterministic. It should be used with a +// composition filter that creates at most one filter state per tuple +// under these conditions (e.g. SequenceComposeFilter or +// MatchComposeFilter). +template <typename A, typename F> +class StringDetComposeStateTable : public +VectorStateTable<ComposeStateTuple<typename A::StateId, F>, + ComposeState1Fingerprint<typename A::StateId, F> > { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef F FilterState; + typedef ComposeStateTuple<StateId, F> StateTuple; + typedef VectorStateTable<StateTuple, + ComposeState1Fingerprint<StateId, F> > StateTable; + + StringDetComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) + : error_(false) { + uint64 props1 = kString; + uint64 props2 = kIDeterministic | kNoIEpsilons; + if (fst1.Properties(props1, true) != props1 || + fst2.Properties(props2, true) != props2) { + FSTERROR() << "StringDetComposeStateTable: fst1 not a string or" + << " fst2 not input deterministic and epsilon-free"; + error_ = true; + } + } + + StringDetComposeStateTable(const StringDetComposeStateTable<A, F> &table) + : StateTable(table), error_(table.error_) {} + + bool Error() const { return error_; } + + private: + bool error_; + + void operator=(const StringDetComposeStateTable<A, F> &table); // disallow +}; + + +// A VectorStateTable over composition tuples. This can be used when +// FST2 is a string (satisfies kStringProperties) and FST1 is +// epsilon-free and deterministic. It should be used with a +// composition filter that creates at most one filter state per tuple +// under these conditions (e.g. SequenceComposeFilter or +// MatchComposeFilter). +template <typename A, typename F> +class DetStringComposeStateTable : public +VectorStateTable<ComposeStateTuple<typename A::StateId, F>, + ComposeState2Fingerprint<typename A::StateId, F> > { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef F FilterState; + typedef ComposeStateTuple<StateId, F> StateTuple; + typedef VectorStateTable<StateTuple, + ComposeState2Fingerprint<StateId, F> > StateTable; + + DetStringComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) + :error_(false) { + uint64 props1 = kODeterministic | kNoOEpsilons; + uint64 props2 = kString; + if (fst1.Properties(props1, true) != props1 || + fst2.Properties(props2, true) != props2) { + FSTERROR() << "StringDetComposeStateTable: fst2 not a string or" + << " fst1 not output deterministic and epsilon-free"; + error_ = true; + } + } + + DetStringComposeStateTable(const DetStringComposeStateTable<A, F> &table) + : StateTable(table), error_(table.error_) {} + + bool Error() const { return error_; } + + private: + bool error_; + + void operator=(const DetStringComposeStateTable<A, F> &table); // disallow +}; + + +// An ErasableStateTable over composition tuples. The Erase(StateId) method +// can be called if the user either is sure that composition will never return +// to that tuple or doesn't care that if it does, it is assigned a new +// state ID. +template <typename A, typename F> +class ErasableComposeStateTable : public +ErasableStateTable<ComposeStateTuple<typename A::StateId, F>, + ComposeHash<typename A::StateId, F> > { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef F FilterState; + typedef ComposeStateTuple<StateId, F> StateTuple; + + ErasableComposeStateTable(const Fst<A> &fst1, const Fst<A> &fst2) {} + + bool Error() const { return false; } + + private: + void operator=(const ErasableComposeStateTable<A, F> &table); // disallow +}; + +} // namespace fst + +#endif // FST_LIB_STATE_TABLE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/statesort.h b/kaldi_io/src/tools/openfst/include/fst/statesort.h new file mode 100644 index 0000000..6f827f4 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/statesort.h @@ -0,0 +1,97 @@ +// statesort.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Function to sort states of an Fst. + +#ifndef FST_LIB_STATESORT_H__ +#define FST_LIB_STATESORT_H__ + +#include <vector> +using std::vector; +#include <algorithm> + +#include <fst/mutable-fst.h> + + +namespace fst { + +// Sorts the input states of an FST, modifying it. ORDER[i] gives the +// the state Id after sorting that corresponds to state Id i before +// sorting. ORDER must be a permutation of FST's states ID sequence: +// (0, 1, 2, ..., fst->NumStates() - 1). +template <class Arc> +void StateSort(MutableFst<Arc> *fst, + const vector<typename Arc::StateId> &order) { + typedef typename Arc::StateId StateId; + typedef typename Arc::Weight Weight; + + if (order.size() != fst->NumStates()) { + FSTERROR() << "StateSort: bad order vector size: " << order.size(); + fst->SetProperties(kError, kError); + return; + } + + if (fst->Start() == kNoStateId) + return; + + uint64 props = fst->Properties(kStateSortProperties, false); + + vector<bool> done(order.size(), false); + vector<Arc> arcsa, arcsb; + vector<Arc> *arcs1 = &arcsa, *arcs2 = &arcsb; + + fst->SetStart(order[fst->Start()]); + + for (StateIterator< MutableFst<Arc> > siter(*fst); + !siter.Done(); + siter.Next()) { + StateId s1 = siter.Value(), s2; + if (done[s1]) + continue; + Weight final1 = fst->Final(s1), final2 = Weight::Zero(); + arcs1->clear(); + for (ArcIterator< MutableFst<Arc> > aiter(*fst, s1); + !aiter.Done(); + aiter.Next()) + arcs1->push_back(aiter.Value()); + for (; !done[s1]; s1 = s2, final1 = final2, swap(arcs1, arcs2)) { + s2 = order[s1]; + if (!done[s2]) { + final2 = fst->Final(s2); + arcs2->clear(); + for (ArcIterator< MutableFst<Arc> > aiter(*fst, s2); + !aiter.Done(); + aiter.Next()) + arcs2->push_back(aiter.Value()); + } + fst->SetFinal(s2, final1); + fst->DeleteArcs(s2); + for (size_t i = 0; i < arcs1->size(); ++i) { + Arc arc = (*arcs1)[i]; + arc.nextstate = order[arc.nextstate]; + fst->AddArc(s2, arc); + } + done[s1] = true; + } + } + fst->SetProperties(props, kFstProperties); +} + +} // namespace fst + +#endif // FST_LIB_STATESORT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/string-weight.h b/kaldi_io/src/tools/openfst/include/fst/string-weight.h new file mode 100644 index 0000000..1beeb33 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/string-weight.h @@ -0,0 +1,560 @@ +// string-weight.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// String weight set and associated semiring operation definitions. + +#ifndef FST_LIB_STRING_WEIGHT_H__ +#define FST_LIB_STRING_WEIGHT_H__ + +#include <list> +#include <string> + +#include <fst/product-weight.h> +#include <fst/weight.h> + +namespace fst { + +const int kStringInfinity = -1; // Label for the infinite string +const int kStringBad = -2; // Label for a non-string +const char kStringSeparator = '_'; // Label separator in strings + +// Determines whether to use left or right string semiring. Includes +// restricted versions that signal an error if proper prefixes +// (suffixes) would otherwise be returned by Plus, useful with various +// algorithms that require functional transducer input with the +// string semirings. +enum StringType { STRING_LEFT = 0, STRING_RIGHT = 1 , + STRING_LEFT_RESTRICT = 2, STRING_RIGHT_RESTRICT }; + +#define REVERSE_STRING_TYPE(S) \ + ((S) == STRING_LEFT ? STRING_RIGHT : \ + ((S) == STRING_RIGHT ? STRING_LEFT : \ + ((S) == STRING_LEFT_RESTRICT ? STRING_RIGHT_RESTRICT : \ + STRING_LEFT_RESTRICT))) + +template <typename L, StringType S = STRING_LEFT> +class StringWeight; + +template <typename L, StringType S = STRING_LEFT> +class StringWeightIterator; + +template <typename L, StringType S = STRING_LEFT> +class StringWeightReverseIterator; + +template <typename L, StringType S> +bool operator==(const StringWeight<L, S> &, const StringWeight<L, S> &); + + +// String semiring: (longest_common_prefix/suffix, ., Infinity, Epsilon) +template <typename L, StringType S> +class StringWeight { + public: + typedef L Label; + typedef StringWeight<L, REVERSE_STRING_TYPE(S)> ReverseWeight; + + friend class StringWeightIterator<L, S>; + friend class StringWeightReverseIterator<L, S>; + friend bool operator==<>(const StringWeight<L, S> &, + const StringWeight<L, S> &); + + StringWeight() { Init(); } + + template <typename Iter> + StringWeight(const Iter &begin, const Iter &end) { + Init(); + for (Iter iter = begin; iter != end; ++iter) + PushBack(*iter); + } + + explicit StringWeight(L l) { Init(); PushBack(l); } + + static const StringWeight<L, S> &Zero() { + static const StringWeight<L, S> zero(kStringInfinity); + return zero; + } + + static const StringWeight<L, S> &One() { + static const StringWeight<L, S> one; + return one; + } + + static const StringWeight<L, S> &NoWeight() { + static const StringWeight<L, S> no_weight(kStringBad); + return no_weight; + } + + static const string &Type() { + static const string type = + S == STRING_LEFT ? "string" : + (S == STRING_RIGHT ? "right_string" : + (S == STRING_LEFT_RESTRICT ? "restricted_string" : + "right_restricted_string")); + return type; + } + + bool Member() const; + + istream &Read(istream &strm); + + ostream &Write(ostream &strm) const; + + size_t Hash() const; + + StringWeight<L, S> Quantize(float delta = kDelta) const { + return *this; + } + + ReverseWeight Reverse() const; + + static uint64 Properties() { + return (S == STRING_LEFT || S == STRING_LEFT_RESTRICT ? + kLeftSemiring : kRightSemiring) | kIdempotent; + } + + // NB: This needs to be uncommented only if default fails for this impl. + // StringWeight<L, S> &operator=(const StringWeight<L, S> &w); + + // These operations combined with the StringWeightIterator and + // StringWeightReverseIterator provide the access and mutation of + // the string internal elements. + + // Common initializer among constructors. + void Init() { first_ = 0; } + + // Clear existing StringWeight. + void Clear() { first_ = 0; rest_.clear(); } + + size_t Size() const { return first_ ? rest_.size() + 1 : 0; } + + void PushFront(L l) { + if (first_) + rest_.push_front(first_); + first_ = l; + } + + void PushBack(L l) { + if (!first_) + first_ = l; + else + rest_.push_back(l); + } + + private: + L first_; // first label in string (0 if empty) + list<L> rest_; // remaining labels in string +}; + + +// Traverses string in forward direction. +template <typename L, StringType S> +class StringWeightIterator { + public: + explicit StringWeightIterator(const StringWeight<L, S>& w) + : first_(w.first_), rest_(w.rest_), init_(true), + iter_(rest_.begin()) {} + + bool Done() const { + if (init_) return first_ == 0; + else return iter_ == rest_.end(); + } + + const L& Value() const { return init_ ? first_ : *iter_; } + + void Next() { + if (init_) init_ = false; + else ++iter_; + } + + void Reset() { + init_ = true; + iter_ = rest_.begin(); + } + + private: + const L &first_; + const list<L> &rest_; + bool init_; // in the initialized state? + typename list<L>::const_iterator iter_; + + DISALLOW_COPY_AND_ASSIGN(StringWeightIterator); +}; + + +// Traverses string in backward direction. +template <typename L, StringType S> +class StringWeightReverseIterator { + public: + explicit StringWeightReverseIterator(const StringWeight<L, S>& w) + : first_(w.first_), rest_(w.rest_), fin_(first_ == 0), + iter_(rest_.rbegin()) {} + + bool Done() const { return fin_; } + + const L& Value() const { return iter_ == rest_.rend() ? first_ : *iter_; } + + void Next() { + if (iter_ == rest_.rend()) fin_ = true; + else ++iter_; + } + + void Reset() { + fin_ = false; + iter_ = rest_.rbegin(); + } + + private: + const L &first_; + const list<L> &rest_; + bool fin_; // in the final state? + typename list<L>::const_reverse_iterator iter_; + + DISALLOW_COPY_AND_ASSIGN(StringWeightReverseIterator); +}; + + +// StringWeight member functions follow that require +// StringWeightIterator or StringWeightReverseIterator. + +template <typename L, StringType S> +inline istream &StringWeight<L, S>::Read(istream &strm) { + Clear(); + int32 size; + ReadType(strm, &size); + for (int i = 0; i < size; ++i) { + L label; + ReadType(strm, &label); + PushBack(label); + } + return strm; +} + +template <typename L, StringType S> +inline ostream &StringWeight<L, S>::Write(ostream &strm) const { + int32 size = Size(); + WriteType(strm, size); + for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next()) { + L label = iter.Value(); + WriteType(strm, label); + } + return strm; +} + +template <typename L, StringType S> +inline bool StringWeight<L, S>::Member() const { + if (Size() != 1) + return true; + StringWeightIterator<L, S> iter(*this); + return iter.Value() != kStringBad; +} + +template <typename L, StringType S> +inline typename StringWeight<L, S>::ReverseWeight +StringWeight<L, S>::Reverse() const { + ReverseWeight rw; + for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next()) + rw.PushFront(iter.Value()); + return rw; +} + +template <typename L, StringType S> +inline size_t StringWeight<L, S>::Hash() const { + size_t h = 0; + for (StringWeightIterator<L, S> iter(*this); !iter.Done(); iter.Next()) + h ^= h<<1 ^ iter.Value(); + return h; +} + +// NB: This needs to be uncommented only if default fails for this the impl. +// +// template <typename L, StringType S> +// inline StringWeight<L, S> +// &StringWeight<L, S>::operator=(const StringWeight<L, S> &w) { +// if (this != &w) { +// Clear(); +// for (StringWeightIterator<L, S> iter(w); !iter.Done(); iter.Next()) +// PushBack(iter.Value()); +// } +// return *this; +// } + +template <typename L, StringType S> +inline bool operator==(const StringWeight<L, S> &w1, + const StringWeight<L, S> &w2) { + if (w1.Size() != w2.Size()) + return false; + + StringWeightIterator<L, S> iter1(w1); + StringWeightIterator<L, S> iter2(w2); + + for (; !iter1.Done() ; iter1.Next(), iter2.Next()) + if (iter1.Value() != iter2.Value()) + return false; + + return true; +} + +template <typename L, StringType S> +inline bool operator!=(const StringWeight<L, S> &w1, + const StringWeight<L, S> &w2) { + return !(w1 == w2); +} + +template <typename L, StringType S> +inline bool ApproxEqual(const StringWeight<L, S> &w1, + const StringWeight<L, S> &w2, + float delta = kDelta) { + return w1 == w2; +} + +template <typename L, StringType S> +inline ostream &operator<<(ostream &strm, const StringWeight<L, S> &w) { + StringWeightIterator<L, S> iter(w); + if (iter.Done()) + return strm << "Epsilon"; + else if (iter.Value() == kStringInfinity) + return strm << "Infinity"; + else if (iter.Value() == kStringBad) + return strm << "BadString"; + else + for (size_t i = 0; !iter.Done(); ++i, iter.Next()) { + if (i > 0) + strm << kStringSeparator; + strm << iter.Value(); + } + return strm; +} + +template <typename L, StringType S> +inline istream &operator>>(istream &strm, StringWeight<L, S> &w) { + string s; + strm >> s; + if (s == "Infinity") { + w = StringWeight<L, S>::Zero(); + } else if (s == "Epsilon") { + w = StringWeight<L, S>::One(); + } else { + w.Clear(); + char *p = 0; + for (const char *cs = s.c_str(); !p || *p != '\0'; cs = p + 1) { + int l = strtoll(cs, &p, 10); + if (p == cs || (*p != 0 && *p != kStringSeparator)) { + strm.clear(std::ios::badbit); + break; + } + w.PushBack(l); + } + } + return strm; +} + + +// Default is for the restricted left and right semirings. String +// equality is required (for non-Zero() input. This restriction +// is used in e.g. Determinize to ensure functional input. +template <typename L, StringType S> inline StringWeight<L, S> +Plus(const StringWeight<L, S> &w1, + const StringWeight<L, S> &w2) { + if (!w1.Member() || !w2.Member()) + return StringWeight<L, S>::NoWeight(); + if (w1 == StringWeight<L, S>::Zero()) + return w2; + if (w2 == StringWeight<L, S>::Zero()) + return w1; + + if (w1 != w2) { + FSTERROR() << "StringWeight::Plus: unequal arguments " + << "(non-functional FST?)" + << " w1 = " << w1 + << " w2 = " << w2; + return StringWeight<L, S>::NoWeight(); + } + + return w1; +} + + +// Longest common prefix for left string semiring. +template <typename L> inline StringWeight<L, STRING_LEFT> +Plus(const StringWeight<L, STRING_LEFT> &w1, + const StringWeight<L, STRING_LEFT> &w2) { + if (!w1.Member() || !w2.Member()) + return StringWeight<L, STRING_LEFT>::NoWeight(); + if (w1 == StringWeight<L, STRING_LEFT>::Zero()) + return w2; + if (w2 == StringWeight<L, STRING_LEFT>::Zero()) + return w1; + + StringWeight<L, STRING_LEFT> sum; + StringWeightIterator<L, STRING_LEFT> iter1(w1); + StringWeightIterator<L, STRING_LEFT> iter2(w2); + for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value(); + iter1.Next(), iter2.Next()) + sum.PushBack(iter1.Value()); + return sum; +} + + +// Longest common suffix for right string semiring. +template <typename L> inline StringWeight<L, STRING_RIGHT> +Plus(const StringWeight<L, STRING_RIGHT> &w1, + const StringWeight<L, STRING_RIGHT> &w2) { + if (!w1.Member() || !w2.Member()) + return StringWeight<L, STRING_RIGHT>::NoWeight(); + if (w1 == StringWeight<L, STRING_RIGHT>::Zero()) + return w2; + if (w2 == StringWeight<L, STRING_RIGHT>::Zero()) + return w1; + + StringWeight<L, STRING_RIGHT> sum; + StringWeightReverseIterator<L, STRING_RIGHT> iter1(w1); + StringWeightReverseIterator<L, STRING_RIGHT> iter2(w2); + for (; !iter1.Done() && !iter2.Done() && iter1.Value() == iter2.Value(); + iter1.Next(), iter2.Next()) + sum.PushFront(iter1.Value()); + return sum; +} + + +template <typename L, StringType S> +inline StringWeight<L, S> Times(const StringWeight<L, S> &w1, + const StringWeight<L, S> &w2) { + if (!w1.Member() || !w2.Member()) + return StringWeight<L, S>::NoWeight(); + if (w1 == StringWeight<L, S>::Zero() || w2 == StringWeight<L, S>::Zero()) + return StringWeight<L, S>::Zero(); + + StringWeight<L, S> prod(w1); + for (StringWeightIterator<L, S> iter(w2); !iter.Done(); iter.Next()) + prod.PushBack(iter.Value()); + + return prod; +} + + +// Default is for left division in the left string and the +// left restricted string semirings. +template <typename L, StringType S> inline StringWeight<L, S> +Divide(const StringWeight<L, S> &w1, + const StringWeight<L, S> &w2, + DivideType typ) { + + if (typ != DIVIDE_LEFT) { + FSTERROR() << "StringWeight::Divide: only left division is defined " + << "for the " << StringWeight<L, S>::Type() << " semiring"; + return StringWeight<L, S>::NoWeight(); + } + + if (!w1.Member() || !w2.Member()) + return StringWeight<L, S>::NoWeight(); + + if (w2 == StringWeight<L, S>::Zero()) + return StringWeight<L, S>(kStringBad); + else if (w1 == StringWeight<L, S>::Zero()) + return StringWeight<L, S>::Zero(); + + StringWeight<L, S> div; + StringWeightIterator<L, S> iter(w1); + for (int i = 0; !iter.Done(); iter.Next(), ++i) { + if (i >= w2.Size()) + div.PushBack(iter.Value()); + } + return div; +} + + +// Right division in the right string semiring. +template <typename L> inline StringWeight<L, STRING_RIGHT> +Divide(const StringWeight<L, STRING_RIGHT> &w1, + const StringWeight<L, STRING_RIGHT> &w2, + DivideType typ) { + + if (typ != DIVIDE_RIGHT) { + FSTERROR() << "StringWeight::Divide: only right division is defined " + << "for the right string semiring"; + return StringWeight<L, STRING_RIGHT>::NoWeight(); + } + + if (!w1.Member() || !w2.Member()) + return StringWeight<L, STRING_RIGHT>::NoWeight(); + + if (w2 == StringWeight<L, STRING_RIGHT>::Zero()) + return StringWeight<L, STRING_RIGHT>(kStringBad); + else if (w1 == StringWeight<L, STRING_RIGHT>::Zero()) + return StringWeight<L, STRING_RIGHT>::Zero(); + + StringWeight<L, STRING_RIGHT> div; + StringWeightReverseIterator<L, STRING_RIGHT> iter(w1); + for (int i = 0; !iter.Done(); iter.Next(), ++i) { + if (i >= w2.Size()) + div.PushFront(iter.Value()); + } + return div; +} + + +// Right division in the right restricted string semiring. +template <typename L> inline StringWeight<L, STRING_RIGHT_RESTRICT> +Divide(const StringWeight<L, STRING_RIGHT_RESTRICT> &w1, + const StringWeight<L, STRING_RIGHT_RESTRICT> &w2, + DivideType typ) { + + if (typ != DIVIDE_RIGHT) { + FSTERROR() << "StringWeight::Divide: only right division is defined " + << "for the right restricted string semiring"; + return StringWeight<L, STRING_RIGHT_RESTRICT>::NoWeight(); + } + + if (!w1.Member() || !w2.Member()) + return StringWeight<L, STRING_RIGHT_RESTRICT>::NoWeight(); + + if (w2 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero()) + return StringWeight<L, STRING_RIGHT_RESTRICT>(kStringBad); + else if (w1 == StringWeight<L, STRING_RIGHT_RESTRICT>::Zero()) + return StringWeight<L, STRING_RIGHT_RESTRICT>::Zero(); + + StringWeight<L, STRING_RIGHT_RESTRICT> div; + StringWeightReverseIterator<L, STRING_RIGHT_RESTRICT> iter(w1); + for (int i = 0; !iter.Done(); iter.Next(), ++i) { + if (i >= w2.Size()) + div.PushFront(iter.Value()); + } + return div; +} + + +// Product of string weight and an arbitray weight. +template <class L, class W, StringType S = STRING_LEFT> +struct GallicWeight : public ProductWeight<StringWeight<L, S>, W> { + typedef GallicWeight<L, typename W::ReverseWeight, REVERSE_STRING_TYPE(S)> + ReverseWeight; + + GallicWeight() {} + + GallicWeight(StringWeight<L, S> w1, W w2) + : ProductWeight<StringWeight<L, S>, W>(w1, w2) {} + + explicit GallicWeight(const string &s, int *nread = 0) + : ProductWeight<StringWeight<L, S>, W>(s, nread) {} + + GallicWeight(const ProductWeight<StringWeight<L, S>, W> &w) + : ProductWeight<StringWeight<L, S>, W>(w) {} +}; + +} // namespace fst + +#endif // FST_LIB_STRING_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/string.h b/kaldi_io/src/tools/openfst/include/fst/string.h new file mode 100644 index 0000000..9eaf7a3 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/string.h @@ -0,0 +1,271 @@ + +// string.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// +// \file +// Utilities to convert strings into FSTs. +// + +#ifndef FST_LIB_STRING_H_ +#define FST_LIB_STRING_H_ + +#include <fst/compact-fst.h> +#include <fst/icu.h> +#include <fst/mutable-fst.h> + +DECLARE_string(fst_field_separator); + +namespace fst { + +// Functor compiling a string in an FST +template <class A> +class StringCompiler { + public: + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + + enum TokenType { SYMBOL = 1, BYTE = 2, UTF8 = 3 }; + + StringCompiler(TokenType type, const SymbolTable *syms = 0, + Label unknown_label = kNoLabel, + bool allow_negative = false) + : token_type_(type), syms_(syms), unknown_label_(unknown_label), + allow_negative_(allow_negative) {} + + // Compile string 's' into FST 'fst'. + template <class F> + bool operator()(const string &s, F *fst) const { + vector<Label> labels; + if (!ConvertStringToLabels(s, &labels)) + return false; + Compile(labels, fst); + return true; + } + + template <class F> + bool operator()(const string &s, F *fst, Weight w) const { + vector<Label> labels; + if (!ConvertStringToLabels(s, &labels)) + return false; + Compile(labels, fst, w); + return true; + } + + private: + bool ConvertStringToLabels(const string &str, vector<Label> *labels) const { + labels->clear(); + if (token_type_ == BYTE) { + for (size_t i = 0; i < str.size(); ++i) + labels->push_back(static_cast<unsigned char>(str[i])); + } else if (token_type_ == UTF8) { + return UTF8StringToLabels(str, labels); + } else { + char *c_str = new char[str.size() + 1]; + str.copy(c_str, str.size()); + c_str[str.size()] = 0; + vector<char *> vec; + string separator = "\n" + FLAGS_fst_field_separator; + SplitToVector(c_str, separator.c_str(), &vec, true); + for (size_t i = 0; i < vec.size(); ++i) { + Label label; + if (!ConvertSymbolToLabel(vec[i], &label)) + return false; + labels->push_back(label); + } + delete[] c_str; + } + return true; + } + + void Compile(const vector<Label> &labels, MutableFst<A> *fst, + const Weight &weight = Weight::One()) const { + fst->DeleteStates(); + while (fst->NumStates() <= labels.size()) + fst->AddState(); + for (size_t i = 0; i < labels.size(); ++i) + fst->AddArc(i, Arc(labels[i], labels[i], Weight::One(), i + 1)); + fst->SetStart(0); + fst->SetFinal(labels.size(), weight); + } + + template <class Unsigned> + void Compile(const vector<Label> &labels, + CompactFst<A, StringCompactor<A>, Unsigned> *fst) const { + fst->SetCompactElements(labels.begin(), labels.end()); + } + + template <class Unsigned> + void Compile(const vector<Label> &labels, + CompactFst<A, WeightedStringCompactor<A>, Unsigned> *fst, + const Weight &weight = Weight::One()) const { + vector<pair<Label, Weight> > compacts; + compacts.reserve(labels.size()); + for (size_t i = 0; i < labels.size(); ++i) + compacts.push_back(make_pair(labels[i], Weight::One())); + compacts.back().second = weight; + fst->SetCompactElements(compacts.begin(), compacts.end()); + } + + bool ConvertSymbolToLabel(const char *s, Label* output) const { + int64 n; + if (syms_) { + n = syms_->Find(s); + if ((n == -1) && (unknown_label_ != kNoLabel)) + n = unknown_label_; + if (n == -1 || (!allow_negative_ && n < 0)) { + VLOG(1) << "StringCompiler::ConvertSymbolToLabel: Symbol \"" << s + << "\" is not mapped to any integer label, symbol table = " + << syms_->Name(); + return false; + } + } else { + char *p; + n = strtoll(s, &p, 10); + if (p < s + strlen(s) || (!allow_negative_ && n < 0)) { + VLOG(1) << "StringCompiler::ConvertSymbolToLabel: Bad label integer " + << "= \"" << s << "\""; + return false; + } + } + *output = n; + return true; + } + + TokenType token_type_; // Token type: symbol, byte or utf8 encoded + const SymbolTable *syms_; // Symbol table used when token type is symbol + Label unknown_label_; // Label for token missing from symbol table + bool allow_negative_; // Negative labels allowed? + + DISALLOW_COPY_AND_ASSIGN(StringCompiler); +}; + +// Functor to print a string FST as a string. +template <class A> +class StringPrinter { + public: + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + enum TokenType { SYMBOL = 1, BYTE = 2, UTF8 = 3 }; + + StringPrinter(TokenType token_type, + const SymbolTable *syms = 0) + : token_type_(token_type), syms_(syms) {} + + // Convert the FST 'fst' into the string 'output' + bool operator()(const Fst<A> &fst, string *output) { + bool is_a_string = FstToLabels(fst); + if (!is_a_string) { + VLOG(1) << "StringPrinter::operator(): Fst is not a string."; + return false; + } + + output->clear(); + + if (token_type_ == SYMBOL) { + stringstream sstrm; + for (size_t i = 0; i < labels_.size(); ++i) { + if (i) + sstrm << *(FLAGS_fst_field_separator.rbegin()); + if (!PrintLabel(labels_[i], sstrm)) + return false; + } + *output = sstrm.str(); + } else if (token_type_ == BYTE) { + output->reserve(labels_.size()); + for (size_t i = 0; i < labels_.size(); ++i) { + output->push_back(labels_[i]); + } + } else if (token_type_ == UTF8) { + return LabelsToUTF8String(labels_, output); + } else { + VLOG(1) << "StringPrinter::operator(): Unknown token type: " + << token_type_; + return false; + } + return true; + } + + private: + bool FstToLabels(const Fst<A> &fst) { + labels_.clear(); + + StateId s = fst.Start(); + if (s == kNoStateId) { + VLOG(2) << "StringPrinter::FstToLabels: Invalid starting state for " + << "string fst."; + return false; + } + + while (fst.Final(s) == Weight::Zero()) { + ArcIterator<Fst<A> > aiter(fst, s); + if (aiter.Done()) { + VLOG(2) << "StringPrinter::FstToLabels: String fst traversal does " + << "not reach final state."; + return false; + } + + const A& arc = aiter.Value(); + labels_.push_back(arc.olabel); + + s = arc.nextstate; + if (s == kNoStateId) { + VLOG(2) << "StringPrinter::FstToLabels: Transition to invalid " + << "state."; + return false; + } + + aiter.Next(); + if (!aiter.Done()) { + VLOG(2) << "StringPrinter::FstToLabels: State with multiple " + << "outgoing arcs found."; + return false; + } + } + + return true; + } + + bool PrintLabel(Label lab, ostream& ostrm) { + if (syms_) { + string symbol = syms_->Find(lab); + if (symbol == "") { + VLOG(2) << "StringPrinter::PrintLabel: Integer " << lab << " is not " + << "mapped to any textual symbol, symbol table = " + << syms_->Name(); + return false; + } + ostrm << symbol; + } else { + ostrm << lab; + } + return true; + } + + TokenType token_type_; // Token type: symbol, byte or utf8 encoded + const SymbolTable *syms_; // Symbol table used when token type is symbol + vector<Label> labels_; // Input FST labels. + + DISALLOW_COPY_AND_ASSIGN(StringPrinter); +}; + +} // namespace fst + +#endif // FST_LIB_STRING_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/symbol-table-ops.h b/kaldi_io/src/tools/openfst/include/fst/symbol-table-ops.h new file mode 100644 index 0000000..1f327da --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/symbol-table-ops.h @@ -0,0 +1,91 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Jeffrey Sorensen) + +#ifndef FST_LIB_SYMBOL_TABLE_OPS_H_ +#define FST_LIB_SYMBOL_TABLE_OPS_H_ + +#include <vector> +using std::vector; +#include <string> +#include <tr1/unordered_set> +using std::tr1::unordered_set; +using std::tr1::unordered_multiset; + + +#include <fst/fst.h> +#include <fst/symbol-table.h> + + +namespace fst { + +// Returns a minimal symbol table containing only symbols referenced by the +// passed fst. Symbols preserve their original numbering, so fst does not +// require relabeling. +template<class Arc> +SymbolTable *PruneSymbolTable(const Fst<Arc> &fst, const SymbolTable &syms, + bool input) { + unordered_set<typename Arc::Label> seen; + seen.insert(0); // Always keep epslion + StateIterator<Fst<Arc> > siter(fst); + for (; !siter.Done(); siter.Next()) { + ArcIterator<Fst<Arc> > aiter(fst, siter.Value()); + for (; !aiter.Done(); aiter.Next()) { + typename Arc::Label sym = (input) ? aiter.Value().ilabel : + aiter.Value().olabel; + seen.insert(sym); + } + } + SymbolTable *pruned = new SymbolTable(syms.Name() + "_pruned"); + for (SymbolTableIterator stiter(syms); !stiter.Done(); stiter.Next()) { + typename Arc::Label label = stiter.Value(); + if (seen.find(label) != seen.end()) { + pruned->AddSymbol(stiter.Symbol(), stiter.Value()); + } + } + return pruned; +} + +// Relabels a symbol table to make it a contiguous mapping. +SymbolTable *CompactSymbolTable(const SymbolTable &syms); + +// Merges two SymbolTables, all symbols from left will be merged into right +// with the same ids. Symbols in right that have conflicting ids with those +// in left will be assigned to value assigned from the left SymbolTable. +// The returned symbol table will never modify symbol assignments from the left +// side, but may do so on the right. If right_relabel_output is non-NULL, it +// will be assigned true if the symbols from the right table needed to be +// reassigned. +// A potential use case is to Compose two Fst's that have different symbol +// tables. You can reconcile them in the following way: +// Fst<Arc> a, b; +// bool relabel; +// SymbolTable *bnew = MergeSymbolTable(a.OutputSymbols(), +// b.InputSymbols(), &relabel); +// if (relabel) { +// Relabel(b, bnew, NULL); +// } +// b.SetInputSymbols(bnew); +// delete bnew; +SymbolTable *MergeSymbolTable(const SymbolTable &left, const SymbolTable &right, + bool *right_relabel_output = 0); + +// Read the symbol table from any Fst::Read()able file, without loading the +// corresponding Fst. Returns NULL if the Fst does not contain a symbol table +// or the symbol table cannot be read. +SymbolTable *FstReadSymbols(const string &filename, bool input); + +} // namespace fst +#endif // FST_LIB_SYMBOL_TABLE_OPS_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/symbol-table.h b/kaldi_io/src/tools/openfst/include/fst/symbol-table.h new file mode 100644 index 0000000..6eb6c2d --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/symbol-table.h @@ -0,0 +1,537 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// All Rights Reserved. +// +// Author : Johan Schalkwyk +// +// \file +// Classes to provide symbol-to-integer and integer-to-symbol mappings. + +#ifndef FST_LIB_SYMBOL_TABLE_H__ +#define FST_LIB_SYMBOL_TABLE_H__ + +#include <cstring> +#include <string> +#include <utility> +using std::pair; using std::make_pair; +#include <vector> +using std::vector; + + +#include <fst/compat.h> +#include <iostream> +#include <fstream> +#include <sstream> + + +#include <map> + +DECLARE_bool(fst_compat_symbols); + +namespace fst { + +// WARNING: Reading via symbol table read options should +// not be used. This is a temporary work around for +// reading symbol ranges of previously stored symbol sets. +struct SymbolTableReadOptions { + SymbolTableReadOptions() { } + + SymbolTableReadOptions(vector<pair<int64, int64> > string_hash_ranges_, + const string& source_) + : string_hash_ranges(string_hash_ranges_), + source(source_) { } + + vector<pair<int64, int64> > string_hash_ranges; + string source; +}; + +struct SymbolTableTextOptions { + SymbolTableTextOptions(); + + bool allow_negative; + string fst_field_separator; +}; + +class SymbolTableImpl { + public: + SymbolTableImpl(const string &name) + : name_(name), + available_key_(0), + dense_key_limit_(0), + check_sum_finalized_(false) {} + + explicit SymbolTableImpl(const SymbolTableImpl& impl) + : name_(impl.name_), + available_key_(0), + dense_key_limit_(0), + check_sum_finalized_(false) { + for (size_t i = 0; i < impl.symbols_.size(); ++i) { + AddSymbol(impl.symbols_[i], impl.Find(impl.symbols_[i])); + } + } + + ~SymbolTableImpl() { + for (size_t i = 0; i < symbols_.size(); ++i) + delete[] symbols_[i]; + } + + // TODO(johans): Add flag to specify whether the symbol + // should be indexed as string or int or both. + int64 AddSymbol(const string& symbol, int64 key); + + int64 AddSymbol(const string& symbol) { + int64 key = Find(symbol); + return (key == -1) ? AddSymbol(symbol, available_key_++) : key; + } + + static SymbolTableImpl* ReadText( + istream &strm, const string &name, + const SymbolTableTextOptions &opts = SymbolTableTextOptions()); + + static SymbolTableImpl* Read(istream &strm, + const SymbolTableReadOptions& opts); + + bool Write(ostream &strm) const; + + // + // Return the string associated with the key. If the key is out of + // range (<0, >max), return an empty string. + string Find(int64 key) const { + if (key >=0 && key < dense_key_limit_) + return string(symbols_[key]); + + map<int64, const char*>::const_iterator it = + key_map_.find(key); + if (it == key_map_.end()) { + return ""; + } + return string(it->second); + } + + // + // Return the key associated with the symbol. If the symbol + // does not exists, return SymbolTable::kNoSymbol. + int64 Find(const string& symbol) const { + return Find(symbol.c_str()); + } + + // + // Return the key associated with the symbol. If the symbol + // does not exists, return SymbolTable::kNoSymbol. + int64 Find(const char* symbol) const { + map<const char *, int64, StrCmp>::const_iterator it = + symbol_map_.find(symbol); + if (it == symbol_map_.end()) { + return -1; + } + return it->second; + } + + int64 GetNthKey(ssize_t pos) const { + if ((pos < 0) || (pos >= symbols_.size())) return -1; + else return Find(symbols_[pos]); + } + + const string& Name() const { return name_; } + + int IncrRefCount() const { + return ref_count_.Incr(); + } + int DecrRefCount() const { + return ref_count_.Decr(); + } + int RefCount() const { + return ref_count_.count(); + } + + string CheckSum() const { + MaybeRecomputeCheckSum(); + return check_sum_string_; + } + + string LabeledCheckSum() const { + MaybeRecomputeCheckSum(); + return labeled_check_sum_string_; + } + + int64 AvailableKey() const { + return available_key_; + } + + size_t NumSymbols() const { + return symbols_.size(); + } + + private: + // Recomputes the checksums (both of them) if we've had changes since the last + // computation (i.e., if check_sum_finalized_ is false). + // Takes ~2.5 microseconds (dbg) or ~230 nanoseconds (opt) on a 2.67GHz Xeon + // if the checksum is up-to-date (requiring no recomputation). + void MaybeRecomputeCheckSum() const; + + struct StrCmp { + bool operator()(const char *s1, const char *s2) const { + return strcmp(s1, s2) < 0; + } + }; + + string name_; + int64 available_key_; + int64 dense_key_limit_; + vector<const char *> symbols_; + map<int64, const char*> key_map_; + map<const char *, int64, StrCmp> symbol_map_; + + mutable RefCounter ref_count_; + mutable bool check_sum_finalized_; + mutable string check_sum_string_; + mutable string labeled_check_sum_string_; + mutable Mutex check_sum_mutex_; +}; + +// +// \class SymbolTable +// \brief Symbol (string) to int and reverse mapping +// +// The SymbolTable implements the mappings of labels to strings and reverse. +// SymbolTables are used to describe the alphabet of the input and output +// labels for arcs in a Finite State Transducer. +// +// SymbolTables are reference counted and can therefore be shared across +// multiple machines. For example a language model grammar G, with a +// SymbolTable for the words in the language model can share this symbol +// table with the lexical representation L o G. +// +class SymbolTable { + public: + static const int64 kNoSymbol = -1; + + // Construct symbol table with an unspecified name. + SymbolTable() : impl_(new SymbolTableImpl("<unspecified>")) {} + + // Construct symbol table with a unique name. + SymbolTable(const string& name) : impl_(new SymbolTableImpl(name)) {} + + // Create a reference counted copy. + SymbolTable(const SymbolTable& table) : impl_(table.impl_) { + impl_->IncrRefCount(); + } + + // Derefence implentation object. When reference count hits 0, delete + // implementation. + virtual ~SymbolTable() { + if (!impl_->DecrRefCount()) delete impl_; + } + + // Copys the implemenation from one symbol table to another. + void operator=(const SymbolTable &st) { + if (impl_ != st.impl_) { + st.impl_->IncrRefCount(); + if (!impl_->DecrRefCount()) delete impl_; + impl_ = st.impl_; + } + } + + // Read an ascii representation of the symbol table from an istream. Pass a + // name to give the resulting SymbolTable. + static SymbolTable* ReadText( + istream &strm, const string& name, + const SymbolTableTextOptions &opts = SymbolTableTextOptions()) { + SymbolTableImpl* impl = SymbolTableImpl::ReadText(strm, name, opts); + if (!impl) + return 0; + else + return new SymbolTable(impl); + } + + // read an ascii representation of the symbol table + static SymbolTable* ReadText(const string& filename, + const SymbolTableTextOptions &opts = SymbolTableTextOptions()) { + ifstream strm(filename.c_str(), ifstream::in); + if (!strm) { + LOG(ERROR) << "SymbolTable::ReadText: Can't open file " << filename; + return 0; + } + return ReadText(strm, filename, opts); + } + + + // WARNING: Reading via symbol table read options should + // not be used. This is a temporary work around. + static SymbolTable* Read(istream &strm, + const SymbolTableReadOptions& opts) { + SymbolTableImpl* impl = SymbolTableImpl::Read(strm, opts); + if (!impl) + return 0; + else + return new SymbolTable(impl); + } + + // read a binary dump of the symbol table from a stream + static SymbolTable* Read(istream &strm, const string& source) { + SymbolTableReadOptions opts; + opts.source = source; + return Read(strm, opts); + } + + // read a binary dump of the symbol table + static SymbolTable* Read(const string& filename) { + ifstream strm(filename.c_str(), ifstream::in | ifstream::binary); + if (!strm) { + LOG(ERROR) << "SymbolTable::Read: Can't open file " << filename; + return 0; + } + return Read(strm, filename); + } + + //-------------------------------------------------------- + // Derivable Interface (final) + //-------------------------------------------------------- + // create a reference counted copy + virtual SymbolTable* Copy() const { + return new SymbolTable(*this); + } + + // Add a symbol with given key to table. A symbol table also + // keeps track of the last available key (highest key value in + // the symbol table). + virtual int64 AddSymbol(const string& symbol, int64 key) { + MutateCheck(); + return impl_->AddSymbol(symbol, key); + } + + // Add a symbol to the table. The associated value key is automatically + // assigned by the symbol table. + virtual int64 AddSymbol(const string& symbol) { + MutateCheck(); + return impl_->AddSymbol(symbol); + } + + // Add another symbol table to this table. All key values will be offset + // by the current available key (highest key value in the symbol table). + // Note string symbols with the same key value with still have the same + // key value after the symbol table has been merged, but a different + // value. Adding symbol tables do not result in changes in the base table. + virtual void AddTable(const SymbolTable& table); + + // return the name of the symbol table + virtual const string& Name() const { + return impl_->Name(); + } + + // Return the label-agnostic MD5 check-sum for this table. All new symbols + // added to the table will result in an updated checksum. + // DEPRECATED. + virtual string CheckSum() const { + return impl_->CheckSum(); + } + + // Same as CheckSum(), but this returns an label-dependent version. + virtual string LabeledCheckSum() const { + return impl_->LabeledCheckSum(); + } + + virtual bool Write(ostream &strm) const { + return impl_->Write(strm); + } + + bool Write(const string& filename) const { + ofstream strm(filename.c_str(), ofstream::out | ofstream::binary); + if (!strm) { + LOG(ERROR) << "SymbolTable::Write: Can't open file " << filename; + return false; + } + return Write(strm); + } + + // Dump an ascii text representation of the symbol table via a stream + virtual bool WriteText( + ostream &strm, + const SymbolTableTextOptions &opts = SymbolTableTextOptions()) const; + + // Dump an ascii text representation of the symbol table + bool WriteText(const string& filename) const { + ofstream strm(filename.c_str()); + if (!strm) { + LOG(ERROR) << "SymbolTable::WriteText: Can't open file " << filename; + return false; + } + return WriteText(strm); + } + + // Return the string associated with the key. If the key is out of + // range (<0, >max), log error and return an empty string. + virtual string Find(int64 key) const { + return impl_->Find(key); + } + + // Return the key associated with the symbol. If the symbol + // does not exists, log error and return SymbolTable::kNoSymbol + virtual int64 Find(const string& symbol) const { + return impl_->Find(symbol); + } + + // Return the key associated with the symbol. If the symbol + // does not exists, log error and return SymbolTable::kNoSymbol + virtual int64 Find(const char* symbol) const { + return impl_->Find(symbol); + } + + // Return the current available key (i.e highest key number+1) in + // the symbol table + virtual int64 AvailableKey(void) const { + return impl_->AvailableKey(); + } + + // Return the current number of symbols in table (not necessarily + // equal to AvailableKey()) + virtual size_t NumSymbols(void) const { + return impl_->NumSymbols(); + } + + virtual int64 GetNthKey(ssize_t pos) const { + return impl_->GetNthKey(pos); + } + + private: + explicit SymbolTable(SymbolTableImpl* impl) : impl_(impl) {} + + void MutateCheck() { + // Copy on write + if (impl_->RefCount() > 1) { + impl_->DecrRefCount(); + impl_ = new SymbolTableImpl(*impl_); + } + } + + const SymbolTableImpl* Impl() const { + return impl_; + } + + private: + SymbolTableImpl* impl_; +}; + + +// +// \class SymbolTableIterator +// \brief Iterator class for symbols in a symbol table +class SymbolTableIterator { + public: + SymbolTableIterator(const SymbolTable& table) + : table_(table), + pos_(0), + nsymbols_(table.NumSymbols()), + key_(table.GetNthKey(0)) { } + + ~SymbolTableIterator() { } + + // is iterator done + bool Done(void) { + return (pos_ == nsymbols_); + } + + // return the Value() of the current symbol (int64 key) + int64 Value(void) { + return key_; + } + + // return the string of the current symbol + string Symbol(void) { + return table_.Find(key_); + } + + // advance iterator forward + void Next(void) { + ++pos_; + if (pos_ < nsymbols_) key_ = table_.GetNthKey(pos_); + } + + // reset iterator + void Reset(void) { + pos_ = 0; + key_ = table_.GetNthKey(0); + } + + private: + const SymbolTable& table_; + ssize_t pos_; + size_t nsymbols_; + int64 key_; +}; + + +// Tests compatibilty between two sets of symbol tables +inline bool CompatSymbols(const SymbolTable *syms1, const SymbolTable *syms2, + bool warning = true) { + if (!FLAGS_fst_compat_symbols) { + return true; + } else if (!syms1 && !syms2) { + return true; + } else if (syms1 && !syms2) { + if (warning) + LOG(WARNING) << + "CompatSymbols: first symbol table present but second missing"; + return false; + } else if (!syms1 && syms2) { + if (warning) + LOG(WARNING) << + "CompatSymbols: second symbol table present but first missing"; + return false; + } else if (syms1->LabeledCheckSum() != syms2->LabeledCheckSum()) { + if (warning) + LOG(WARNING) << "CompatSymbols: Symbol table check sums do not match"; + return false; + } else { + return true; + } +} + + +// Relabels a symbol table as specified by the input vector of pairs +// (old label, new label). The new symbol table only retains symbols +// for which a relabeling is *explicitely* specified. +// TODO(allauzen): consider adding options to allow for some form +// of implicit identity relabeling. +template <class Label> +SymbolTable *RelabelSymbolTable(const SymbolTable *table, + const vector<pair<Label, Label> > &pairs) { + SymbolTable *new_table = new SymbolTable( + table->Name().empty() ? string() : + (string("relabeled_") + table->Name())); + + for (size_t i = 0; i < pairs.size(); ++i) + new_table->AddSymbol(table->Find(pairs[i].first), pairs[i].second); + + return new_table; +} + +// Symbol Table Serialization +inline void SymbolTableToString(const SymbolTable *table, string *result) { + ostringstream ostrm; + table->Write(ostrm); + *result = ostrm.str(); +} + +inline SymbolTable *StringToSymbolTable(const string &s) { + istringstream istrm(s); + return SymbolTable::Read(istrm, SymbolTableReadOptions()); +} + + + +} // namespace fst + +#endif // FST_LIB_SYMBOL_TABLE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/synchronize.h b/kaldi_io/src/tools/openfst/include/fst/synchronize.h new file mode 100644 index 0000000..9582926 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/synchronize.h @@ -0,0 +1,457 @@ +// synchronize.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Cyril Allauzen) +// +// \file +// Synchronize an FST with bounded delay. + +#ifndef FST_LIB_SYNCHRONIZE_H__ +#define FST_LIB_SYNCHRONIZE_H__ + +#include <algorithm> +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include <tr1/unordered_set> +using std::tr1::unordered_set; +using std::tr1::unordered_multiset; +#include <string> +#include <utility> +using std::pair; using std::make_pair; +#include <vector> +using std::vector; + +#include <fst/cache.h> +#include <fst/test-properties.h> + + +namespace fst { + +typedef CacheOptions SynchronizeFstOptions; + + +// Implementation class for SynchronizeFst +template <class A> +class SynchronizeFstImpl + : public CacheImpl<A> { + public: + using FstImpl<A>::SetType; + using FstImpl<A>::SetProperties; + using FstImpl<A>::SetInputSymbols; + using FstImpl<A>::SetOutputSymbols; + + using CacheBaseImpl< CacheState<A> >::PushArc; + using CacheBaseImpl< CacheState<A> >::HasArcs; + using CacheBaseImpl< CacheState<A> >::HasFinal; + using CacheBaseImpl< CacheState<A> >::HasStart; + using CacheBaseImpl< CacheState<A> >::SetArcs; + using CacheBaseImpl< CacheState<A> >::SetFinal; + using CacheBaseImpl< CacheState<A> >::SetStart; + + typedef A Arc; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + + typedef basic_string<Label> String; + + struct Element { + Element() {} + + Element(StateId s, const String *i, const String *o) + : state(s), istring(i), ostring(o) {} + + StateId state; // Input state Id + const String *istring; // Residual input labels + const String *ostring; // Residual output labels + // Residual strings are represented by const pointers to + // basic_string<Label> and are stored in a hash_set. The pointed + // memory is owned by the hash_set string_set_. + }; + + SynchronizeFstImpl(const Fst<A> &fst, const SynchronizeFstOptions &opts) + : CacheImpl<A>(opts), fst_(fst.Copy()) { + SetType("synchronize"); + uint64 props = fst.Properties(kFstProperties, false); + SetProperties(SynchronizeProperties(props), kCopyProperties); + + SetInputSymbols(fst.InputSymbols()); + SetOutputSymbols(fst.OutputSymbols()); + } + + SynchronizeFstImpl(const SynchronizeFstImpl &impl) + : CacheImpl<A>(impl), + fst_(impl.fst_->Copy(true)) { + SetType("synchronize"); + SetProperties(impl.Properties(), kCopyProperties); + SetInputSymbols(impl.InputSymbols()); + SetOutputSymbols(impl.OutputSymbols()); + } + + ~SynchronizeFstImpl() { + delete fst_; + // Extract pointers from the hash set + vector<const String*> strings; + typename StringSet::iterator it = string_set_.begin(); + for (; it != string_set_.end(); ++it) + strings.push_back(*it); + // Free the extracted pointers + for (size_t i = 0; i < strings.size(); ++i) + delete strings[i]; + } + + StateId Start() { + if (!HasStart()) { + StateId s = fst_->Start(); + if (s == kNoStateId) + return kNoStateId; + const String *empty = FindString(new String()); + StateId start = FindState(Element(fst_->Start(), empty, empty)); + SetStart(start); + } + return CacheImpl<A>::Start(); + } + + Weight Final(StateId s) { + if (!HasFinal(s)) { + const Element &e = elements_[s]; + Weight w = e.state == kNoStateId ? Weight::One() : fst_->Final(e.state); + if ((w != Weight::Zero()) && (e.istring)->empty() && (e.ostring)->empty()) + SetFinal(s, w); + else + SetFinal(s, Weight::Zero()); + } + return CacheImpl<A>::Final(s); + } + + size_t NumArcs(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<A>::NumArcs(s); + } + + size_t NumInputEpsilons(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<A>::NumInputEpsilons(s); + } + + size_t NumOutputEpsilons(StateId s) { + if (!HasArcs(s)) + Expand(s); + return CacheImpl<A>::NumOutputEpsilons(s); + } + + uint64 Properties() const { return Properties(kFstProperties); } + + // Set error if found; return FST impl properties. + uint64 Properties(uint64 mask) const { + if ((mask & kError) && fst_->Properties(kError, false)) + SetProperties(kError, kError); + return FstImpl<Arc>::Properties(mask); + } + + void InitArcIterator(StateId s, ArcIteratorData<A> *data) { + if (!HasArcs(s)) + Expand(s); + CacheImpl<A>::InitArcIterator(s, data); + } + + // Returns the first character of the string obtained by + // concatenating s and l. + Label Car(const String *s, Label l = 0) const { + if (!s->empty()) + return (*s)[0]; + else + return l; + } + + // Computes the residual string obtained by removing the first + // character in the concatenation of s and l. + const String *Cdr(const String *s, Label l = 0) { + String *r = new String(); + for (int i = 1; i < s->size(); ++i) + r->push_back((*s)[i]); + if (l && !(s->empty())) r->push_back(l); + return FindString(r); + } + + // Computes the concatenation of s and l. + const String *Concat(const String *s, Label l = 0) { + String *r = new String(); + for (int i = 0; i < s->size(); ++i) + r->push_back((*s)[i]); + if (l) r->push_back(l); + return FindString(r); + } + + // Tests if the concatenation of s and l is empty + bool Empty(const String *s, Label l = 0) const { + if (s->empty()) + return l == 0; + else + return false; + } + + // Finds the string pointed by s in the hash set. Transfers the + // pointer ownership to the hash set. + const String *FindString(const String *s) { + typename StringSet::iterator it = string_set_.find(s); + if (it != string_set_.end()) { + delete s; + return (*it); + } else { + string_set_.insert(s); + return s; + } + } + + // Finds state corresponding to an element. Creates new state + // if element not found. + StateId FindState(const Element &e) { + typename ElementMap::iterator eit = element_map_.find(e); + if (eit != element_map_.end()) { + return (*eit).second; + } else { + StateId s = elements_.size(); + elements_.push_back(e); + element_map_.insert(pair<const Element, StateId>(e, s)); + return s; + } + } + + + // Computes the outgoing transitions from a state, creating new destination + // states as needed. + void Expand(StateId s) { + Element e = elements_[s]; + + if (e.state != kNoStateId) + for (ArcIterator< Fst<A> > ait(*fst_, e.state); + !ait.Done(); + ait.Next()) { + const A &arc = ait.Value(); + if (!Empty(e.istring, arc.ilabel) && !Empty(e.ostring, arc.olabel)) { + const String *istring = Cdr(e.istring, arc.ilabel); + const String *ostring = Cdr(e.ostring, arc.olabel); + StateId d = FindState(Element(arc.nextstate, istring, ostring)); + PushArc(s, Arc(Car(e.istring, arc.ilabel), + Car(e.ostring, arc.olabel), arc.weight, d)); + } else { + const String *istring = Concat(e.istring, arc.ilabel); + const String *ostring = Concat(e.ostring, arc.olabel); + StateId d = FindState(Element(arc.nextstate, istring, ostring)); + PushArc(s, Arc(0 , 0, arc.weight, d)); + } + } + + Weight w = e.state == kNoStateId ? Weight::One() : fst_->Final(e.state); + if ((w != Weight::Zero()) && + ((e.istring)->size() + (e.ostring)->size() > 0)) { + const String *istring = Cdr(e.istring); + const String *ostring = Cdr(e.ostring); + StateId d = FindState(Element(kNoStateId, istring, ostring)); + PushArc(s, Arc(Car(e.istring), Car(e.ostring), w, d)); + } + SetArcs(s); + } + + private: + // Equality function for Elements, assume strings have been hashed. + class ElementEqual { + public: + bool operator()(const Element &x, const Element &y) const { + return x.state == y.state && + x.istring == y.istring && + x.ostring == y.ostring; + } + }; + + // Hash function for Elements to Fst states. + class ElementKey { + public: + size_t operator()(const Element &x) const { + size_t key = x.state; + key = (key << 1) ^ (x.istring)->size(); + for (size_t i = 0; i < (x.istring)->size(); ++i) + key = (key << 1) ^ (*x.istring)[i]; + key = (key << 1) ^ (x.ostring)->size(); + for (size_t i = 0; i < (x.ostring)->size(); ++i) + key = (key << 1) ^ (*x.ostring)[i]; + return key; + } + }; + + // Equality function for strings + class StringEqual { + public: + bool operator()(const String * const &x, const String * const &y) const { + if (x->size() != y->size()) return false; + for (size_t i = 0; i < x->size(); ++i) + if ((*x)[i] != (*y)[i]) return false; + return true; + } + }; + + // Hash function for set of strings + class StringKey{ + public: + size_t operator()(const String * const & x) const { + size_t key = x->size(); + for (size_t i = 0; i < x->size(); ++i) + key = (key << 1) ^ (*x)[i]; + return key; + } + }; + + + typedef unordered_map<Element, StateId, ElementKey, ElementEqual> ElementMap; + typedef unordered_set<const String*, StringKey, StringEqual> StringSet; + + const Fst<A> *fst_; + vector<Element> elements_; // mapping Fst state to Elements + ElementMap element_map_; // mapping Elements to Fst state + StringSet string_set_; + + void operator=(const SynchronizeFstImpl<A> &); // disallow +}; + + +// Synchronizes a transducer. This version is a delayed Fst. The +// result will be an equivalent FST that has the property that during +// the traversal of a path, the delay is either zero or strictly +// increasing, where the delay is the difference between the number of +// non-epsilon output labels and input labels along the path. +// +// For the algorithm to terminate, the input transducer must have +// bounded delay, i.e., the delay of every cycle must be zero. +// +// Complexity: +// - A has bounded delay: exponential +// - A does not have bounded delay: does not terminate +// +// References: +// - Mehryar Mohri. Edit-Distance of Weighted Automata: General +// Definitions and Algorithms, International Journal of Computer +// Science, 14(6): 957-982 (2003). +// +// This class attaches interface to implementation and handles +// reference counting, delegating most methods to ImplToFst. +template <class A> +class SynchronizeFst : public ImplToFst< SynchronizeFstImpl<A> > { + public: + friend class ArcIterator< SynchronizeFst<A> >; + friend class StateIterator< SynchronizeFst<A> >; + + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + typedef CacheState<A> State; + typedef SynchronizeFstImpl<A> Impl; + + SynchronizeFst(const Fst<A> &fst) + : ImplToFst<Impl>(new Impl(fst, SynchronizeFstOptions())) {} + + SynchronizeFst(const Fst<A> &fst, const SynchronizeFstOptions &opts) + : ImplToFst<Impl>(new Impl(fst, opts)) {} + + // See Fst<>::Copy() for doc. + SynchronizeFst(const SynchronizeFst<A> &fst, bool safe = false) + : ImplToFst<Impl>(fst, safe) {} + + // Get a copy of this SynchronizeFst. See Fst<>::Copy() for further doc. + virtual SynchronizeFst<A> *Copy(bool safe = false) const { + return new SynchronizeFst<A>(*this, safe); + } + + virtual inline void InitStateIterator(StateIteratorData<A> *data) const; + + virtual void InitArcIterator(StateId s, ArcIteratorData<A> *data) const { + GetImpl()->InitArcIterator(s, data); + } + + private: + // Makes visible to friends. + Impl *GetImpl() const { return ImplToFst<Impl>::GetImpl(); } + + void operator=(const SynchronizeFst<A> &fst); // Disallow +}; + + +// Specialization for SynchronizeFst. +template<class A> +class StateIterator< SynchronizeFst<A> > + : public CacheStateIterator< SynchronizeFst<A> > { + public: + explicit StateIterator(const SynchronizeFst<A> &fst) + : CacheStateIterator< SynchronizeFst<A> >(fst, fst.GetImpl()) {} +}; + + +// Specialization for SynchronizeFst. +template <class A> +class ArcIterator< SynchronizeFst<A> > + : public CacheArcIterator< SynchronizeFst<A> > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const SynchronizeFst<A> &fst, StateId s) + : CacheArcIterator< SynchronizeFst<A> >(fst.GetImpl(), s) { + if (!fst.GetImpl()->HasArcs(s)) + fst.GetImpl()->Expand(s); + } + + private: + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + + +template <class A> inline +void SynchronizeFst<A>::InitStateIterator(StateIteratorData<A> *data) const +{ + data->base = new StateIterator< SynchronizeFst<A> >(*this); +} + + + +// Synchronizes a transducer. This version writes the synchronized +// result to a MutableFst. The result will be an equivalent FST that +// has the property that during the traversal of a path, the delay is +// either zero or strictly increasing, where the delay is the +// difference between the number of non-epsilon output labels and +// input labels along the path. +// +// For the algorithm to terminate, the input transducer must have +// bounded delay, i.e., the delay of every cycle must be zero. +// +// Complexity: +// - A has bounded delay: exponential +// - A does not have bounded delay: does not terminate +// +// References: +// - Mehryar Mohri. Edit-Distance of Weighted Automata: General +// Definitions and Algorithms, International Journal of Computer +// Science, 14(6): 957-982 (2003). +template<class Arc> +void Synchronize(const Fst<Arc> &ifst, MutableFst<Arc> *ofst) { + SynchronizeFstOptions opts; + opts.gc_limit = 0; // Cache only the last state for fastest copy. + *ofst = SynchronizeFst<Arc>(ifst, opts); +} + +} // namespace fst + +#endif // FST_LIB_SYNCHRONIZE_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/test-properties.h b/kaldi_io/src/tools/openfst/include/fst/test-properties.h new file mode 100644 index 0000000..80af593 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/test-properties.h @@ -0,0 +1,250 @@ +// test-properties.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Functions to manipulate and test property bits + +#ifndef FST_LIB_TEST_PROPERTIES_H__ +#define FST_LIB_TEST_PROPERTIES_H__ + +#include <tr1/unordered_set> +using std::tr1::unordered_set; +using std::tr1::unordered_multiset; + +#include <fst/dfs-visit.h> +#include <fst/connect.h> + + +DECLARE_bool(fst_verify_properties); + +namespace fst { + +// For a binary property, the bit is always returned set. +// For a trinary (i.e. two-bit) property, both bits are +// returned set iff either corresponding input bit is set. +inline uint64 KnownProperties(uint64 props) { + return kBinaryProperties | (props & kTrinaryProperties) | + ((props & kPosTrinaryProperties) << 1) | + ((props & kNegTrinaryProperties) >> 1); +} + +// Tests compatibility between two sets of properties +inline bool CompatProperties(uint64 props1, uint64 props2) { + uint64 known_props1 = KnownProperties(props1); + uint64 known_props2 = KnownProperties(props2); + uint64 known_props = known_props1 & known_props2; + uint64 incompat_props = (props1 & known_props) ^ (props2 & known_props); + if (incompat_props) { + uint64 prop = 1; + for (int i = 0; i < 64; ++i, prop <<= 1) + if (prop & incompat_props) + LOG(ERROR) << "CompatProperties: mismatch: " << PropertyNames[i] + << ": props1 = " << (props1 & prop ? "true" : "false") + << ", props2 = " << (props2 & prop ? "true" : "false"); + return false; + } else { + return true; + } +} + +// Computes FST property values defined in properties.h. The value of +// each property indicated in the mask will be determined and returned +// (these will never be unknown here). In the course of determining +// the properties specifically requested in the mask, certain other +// properties may be determined (those with little additional expense) +// and their values will be returned as well. The complete set of +// known properties (whether true or false) determined by this +// operation will be assigned to the the value pointed to by KNOWN. +// If 'use_stored' is true, pre-computed FST properties may be used +// when possible. This routine is seldom called directly; instead it +// is used to implement fst.Properties(mask, true). +template<class Arc> +uint64 ComputeProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known, + bool use_stored) { + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + typedef typename Arc::StateId StateId; + + uint64 fst_props = fst.Properties(kFstProperties, false); // Fst-stored + + // Check stored FST properties first if allowed. + if (use_stored) { + uint64 known_props = KnownProperties(fst_props); + // If FST contains required info, return it. + if ((known_props & mask) == mask) { + *known = known_props; + return fst_props; + } + } + + // Compute (trinary) properties explicitly. + + // Initialize with binary properties (already known). + uint64 comp_props = fst_props & kBinaryProperties; + + // Compute these trinary properties with a DFS. We compute only those + // that need a DFS here, since we otherwise would like to avoid a DFS + // since its stack could grow large. + uint64 dfs_props = kCyclic | kAcyclic | kInitialCyclic | kInitialAcyclic | + kAccessible | kNotAccessible | + kCoAccessible | kNotCoAccessible; + if (mask & dfs_props) { + SccVisitor<Arc> scc_visitor(&comp_props); + DfsVisit(fst, &scc_visitor); + } + + // Compute any remaining trinary properties via a state and arcs iterations + if (mask & ~(kBinaryProperties | dfs_props)) { + comp_props |= kAcceptor | kNoEpsilons | kNoIEpsilons | kNoOEpsilons | + kILabelSorted | kOLabelSorted | kUnweighted | kTopSorted | kString; + if (mask & (kIDeterministic | kNonIDeterministic)) + comp_props |= kIDeterministic; + if (mask & (kODeterministic | kNonODeterministic)) + comp_props |= kODeterministic; + + unordered_set<Label> *ilabels = 0; + unordered_set<Label> *olabels = 0; + + StateId nfinal = 0; + for (StateIterator< Fst<Arc> > siter(fst); + !siter.Done(); + siter.Next()) { + StateId s = siter.Value(); + + Arc prev_arc; + // Create these only if we need to + if (mask & (kIDeterministic | kNonIDeterministic)) + ilabels = new unordered_set<Label>; + if (mask & (kODeterministic | kNonODeterministic)) + olabels = new unordered_set<Label>; + + bool first_arc = true; + for (ArcIterator< Fst<Arc> > aiter(fst, s); + !aiter.Done(); + aiter.Next()) { + const Arc &arc =aiter.Value(); + + if (ilabels && ilabels->find(arc.ilabel) != ilabels->end()) { + comp_props |= kNonIDeterministic; + comp_props &= ~kIDeterministic; + } + if (olabels && olabels->find(arc.olabel) != olabels->end()) { + comp_props |= kNonODeterministic; + comp_props &= ~kODeterministic; + } + if (arc.ilabel != arc.olabel) { + comp_props |= kNotAcceptor; + comp_props &= ~kAcceptor; + } + if (arc.ilabel == 0 && arc.olabel == 0) { + comp_props |= kEpsilons; + comp_props &= ~kNoEpsilons; + } + if (arc.ilabel == 0) { + comp_props |= kIEpsilons; + comp_props &= ~kNoIEpsilons; + } + if (arc.olabel == 0) { + comp_props |= kOEpsilons; + comp_props &= ~kNoOEpsilons; + } + if (!first_arc) { + if (arc.ilabel < prev_arc.ilabel) { + comp_props |= kNotILabelSorted; + comp_props &= ~kILabelSorted; + } + if (arc.olabel < prev_arc.olabel) { + comp_props |= kNotOLabelSorted; + comp_props &= ~kOLabelSorted; + } + } + if (arc.weight != Weight::One() && arc.weight != Weight::Zero()) { + comp_props |= kWeighted; + comp_props &= ~kUnweighted; + } + if (arc.nextstate <= s) { + comp_props |= kNotTopSorted; + comp_props &= ~kTopSorted; + } + if (arc.nextstate != s + 1) { + comp_props |= kNotString; + comp_props &= ~kString; + } + prev_arc = arc; + first_arc = false; + if (ilabels) + ilabels->insert(arc.ilabel); + if (olabels) + olabels->insert(arc.olabel); + } + + if (nfinal > 0) { // final state not last + comp_props |= kNotString; + comp_props &= ~kString; + } + + Weight final = fst.Final(s); + + if (final != Weight::Zero()) { // final state + if (final != Weight::One()) { + comp_props |= kWeighted; + comp_props &= ~kUnweighted; + } + ++nfinal; + } else { // non-final state + if (fst.NumArcs(s) != 1) { + comp_props |= kNotString; + comp_props &= ~kString; + } + } + + delete ilabels; + delete olabels; + } + + if (fst.Start() != kNoStateId && fst.Start() != 0) { + comp_props |= kNotString; + comp_props &= ~kString; + } + } + + *known = KnownProperties(comp_props); + return comp_props; +} + +// This is a wrapper around ComputeProperties that will cause a fatal +// error if the stored properties and the computed properties are +// incompatible when 'FLAGS_fst_verify_properties' is true. This +// routine is seldom called directly; instead it is used to implement +// fst.Properties(mask, true). +template<class Arc> +uint64 TestProperties(const Fst<Arc> &fst, uint64 mask, uint64 *known) { + if (FLAGS_fst_verify_properties) { + uint64 stored_props = fst.Properties(kFstProperties, false); + uint64 computed_props = ComputeProperties(fst, mask, known, false); + if (!CompatProperties(stored_props, computed_props)) + LOG(FATAL) << "TestProperties: stored Fst properties incorrect" + << " (stored: props1, computed: props2)"; + return computed_props; + } else { + return ComputeProperties(fst, mask, known, true); + } +} + +} // namespace fst + +#endif // FST_LIB_TEST_PROPERTIES_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/topsort.h b/kaldi_io/src/tools/openfst/include/fst/topsort.h new file mode 100644 index 0000000..53735e5 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/topsort.h @@ -0,0 +1,112 @@ +// topsort.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Topological sort of FSTs + +#ifndef FST_LIB_TOPSORT_H__ +#define FST_LIB_TOPSORT_H__ + +#include <algorithm> +#include <vector> +using std::vector; + + +#include <fst/dfs-visit.h> +#include <fst/fst.h> +#include <fst/statesort.h> + + +namespace fst { + +// DFS visitor class to return topological ordering. +template <class A> +class TopOrderVisitor { + public: + typedef A Arc; + typedef typename A::StateId StateId; + + // If acyclic, ORDER[i] gives the topological position of state Id i; + // otherwise unchanged. ACYCLIC will be true iff the FST has + // no cycles. + TopOrderVisitor(vector<StateId> *order, bool *acyclic) + : order_(order), acyclic_(acyclic) {} + + void InitVisit(const Fst<A> &fst) { + finish_ = new vector<StateId>; + *acyclic_ = true; + } + + bool InitState(StateId s, StateId r) { return true; } + + bool TreeArc(StateId s, const A &arc) { return true; } + + bool BackArc(StateId s, const A &arc) { return (*acyclic_ = false); } + + bool ForwardOrCrossArc(StateId s, const A &arc) { return true; } + + void FinishState(StateId s, StateId p, const A *) { finish_->push_back(s); } + + void FinishVisit() { + if (*acyclic_) { + order_->clear(); + for (StateId s = 0; s < finish_->size(); ++s) + order_->push_back(kNoStateId); + for (StateId s = 0; s < finish_->size(); ++s) + (*order_)[(*finish_)[finish_->size() - s - 1]] = s; + } + delete finish_; + } + + private: + vector<StateId> *order_; + bool *acyclic_; + vector<StateId> *finish_; // states in finishing-time order +}; + + +// Topologically sorts its input if acyclic, modifying it. Otherwise, +// the input is unchanged. When sorted, all transitions are from +// lower to higher state IDs. +// +// Complexity: +// - Time: O(V + E) +// - Space: O(V + E) +// where V = # of states and E = # of arcs. +template <class Arc> +bool TopSort(MutableFst<Arc> *fst) { + typedef typename Arc::StateId StateId; + + vector<StateId> order; + bool acyclic; + + TopOrderVisitor<Arc> top_order_visitor(&order, &acyclic); + DfsVisit(*fst, &top_order_visitor); + + if (acyclic) { + StateSort(fst, order); + fst->SetProperties(kAcyclic | kInitialAcyclic | kTopSorted, + kAcyclic | kInitialAcyclic | kTopSorted); + } else { + fst->SetProperties(kCyclic | kNotTopSorted, kCyclic | kNotTopSorted); + } + return acyclic; +} + +} // namespace fst + +#endif // FST_LIB_TOPSORT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/tuple-weight.h b/kaldi_io/src/tools/openfst/include/fst/tuple-weight.h new file mode 100644 index 0000000..184026c --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/tuple-weight.h @@ -0,0 +1,332 @@ +// tuple-weight.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: allauzen@google (Cyril Allauzen) +// +// \file +// Tuple weight set operation definitions. + +#ifndef FST_LIB_TUPLE_WEIGHT_H__ +#define FST_LIB_TUPLE_WEIGHT_H__ + +#include <string> +#include <vector> +using std::vector; + +#include <fst/weight.h> + + +DECLARE_string(fst_weight_parentheses); +DECLARE_string(fst_weight_separator); + +namespace fst { + +template<class W, unsigned int n> class TupleWeight; +template <class W, unsigned int n> +istream &operator>>(istream &strm, TupleWeight<W, n> &w); + +// n-tuple weight, element of the n-th catersian power of W +template <class W, unsigned int n> +class TupleWeight { + public: + typedef TupleWeight<typename W::ReverseWeight, n> ReverseWeight; + + TupleWeight() {} + + TupleWeight(const TupleWeight &w) { + for (size_t i = 0; i < n; ++i) + values_[i] = w.values_[i]; + } + + template <class Iterator> + TupleWeight(Iterator begin, Iterator end) { + for (Iterator iter = begin; iter != end; ++iter) + values_[iter - begin] = *iter; + } + + TupleWeight(const W &w) { + for (size_t i = 0; i < n; ++i) + values_[i] = w; + } + + static const TupleWeight<W, n> &Zero() { + static const TupleWeight<W, n> zero(W::Zero()); + return zero; + } + + static const TupleWeight<W, n> &One() { + static const TupleWeight<W, n> one(W::One()); + return one; + } + + static const TupleWeight<W, n> &NoWeight() { + static const TupleWeight<W, n> no_weight(W::NoWeight()); + return no_weight; + } + + static unsigned int Length() { + return n; + } + + istream &Read(istream &strm) { + for (size_t i = 0; i < n; ++i) + values_[i].Read(strm); + return strm; + } + + ostream &Write(ostream &strm) const { + for (size_t i = 0; i < n; ++i) + values_[i].Write(strm); + return strm; + } + + TupleWeight<W, n> &operator=(const TupleWeight<W, n> &w) { + for (size_t i = 0; i < n; ++i) + values_[i] = w.values_[i]; + return *this; + } + + bool Member() const { + bool member = true; + for (size_t i = 0; i < n; ++i) + member = member && values_[i].Member(); + return member; + } + + size_t Hash() const { + uint64 hash = 0; + for (size_t i = 0; i < n; ++i) + hash = 5 * hash + values_[i].Hash(); + return size_t(hash); + } + + TupleWeight<W, n> Quantize(float delta = kDelta) const { + TupleWeight<W, n> w; + for (size_t i = 0; i < n; ++i) + w.values_[i] = values_[i].Quantize(delta); + return w; + } + + ReverseWeight Reverse() const { + TupleWeight<W, n> w; + for (size_t i = 0; i < n; ++i) + w.values_[i] = values_[i].Reverse(); + return w; + } + + const W& Value(size_t i) const { return values_[i]; } + + void SetValue(size_t i, const W &w) { values_[i] = w; } + + protected: + // Reads TupleWeight when there are no parentheses around tuple terms + inline static istream &ReadNoParen(istream &strm, + TupleWeight<W, n> &w, + char separator) { + int c; + do { + c = strm.get(); + } while (isspace(c)); + + for (size_t i = 0; i < n - 1; ++i) { + string s; + if (i) + c = strm.get(); + while (c != separator) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + c = strm.get(); + } + // read (i+1)-th element + istringstream sstrm(s); + W r = W::Zero(); + sstrm >> r; + w.SetValue(i, r); + } + + // read n-th element + W r = W::Zero(); + strm >> r; + w.SetValue(n - 1, r); + + return strm; + } + + // Reads TupleWeight when there are parentheses around tuple terms + inline static istream &ReadWithParen(istream &strm, + TupleWeight<W, n> &w, + char separator, + char open_paren, + char close_paren) { + int c; + do { + c = strm.get(); + } while (isspace(c)); + + if (c != open_paren) { + FSTERROR() << " is fst_weight_parentheses flag set correcty? "; + strm.clear(std::ios::badbit); + return strm; + } + + for (size_t i = 0; i < n - 1; ++i) { + // read (i+1)-th element + stack<int> parens; + string s; + c = strm.get(); + while (c != separator || !parens.empty()) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + // if parens encountered before separator, they must be matched + if (c == open_paren) { + parens.push(1); + } else if (c == close_paren) { + // Fail for mismatched parens + if (parens.empty()) { + strm.clear(std::ios::failbit); + return strm; + } + parens.pop(); + } + c = strm.get(); + } + istringstream sstrm(s); + W r = W::Zero(); + sstrm >> r; + w.SetValue(i, r); + } + + // read n-th element + string s; + c = strm.get(); + while (c != EOF) { + s += c; + c = strm.get(); + } + if (s.empty() || *s.rbegin() != close_paren) { + FSTERROR() << " is fst_weight_parentheses flag set correcty? "; + strm.clear(std::ios::failbit); + return strm; + } + s.erase(s.size() - 1, 1); + istringstream sstrm(s); + W r = W::Zero(); + sstrm >> r; + w.SetValue(n - 1, r); + + return strm; + } + + + private: + W values_[n]; + + friend istream &operator>><W, n>(istream&, TupleWeight<W, n>&); +}; + +template <class W, unsigned int n> +inline bool operator==(const TupleWeight<W, n> &w1, + const TupleWeight<W, n> &w2) { + bool equal = true; + for (size_t i = 0; i < n; ++i) + equal = equal && (w1.Value(i) == w2.Value(i)); + return equal; +} + +template <class W, unsigned int n> +inline bool operator!=(const TupleWeight<W, n> &w1, + const TupleWeight<W, n> &w2) { + bool not_equal = false; + for (size_t i = 0; (i < n) && !not_equal; ++i) + not_equal = not_equal || (w1.Value(i) != w2.Value(i)); + return not_equal; +} + +template <class W, unsigned int n> +inline bool ApproxEqual(const TupleWeight<W, n> &w1, + const TupleWeight<W, n> &w2, + float delta = kDelta) { + bool approx_equal = true; + for (size_t i = 0; i < n; ++i) + approx_equal = approx_equal && + ApproxEqual(w1.Value(i), w2.Value(i), delta); + return approx_equal; +} + +template <class W, unsigned int n> +inline ostream &operator<<(ostream &strm, const TupleWeight<W, n> &w) { + if(FLAGS_fst_weight_separator.size() != 1) { + FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1"; + strm.clear(std::ios::badbit); + return strm; + } + char separator = FLAGS_fst_weight_separator[0]; + bool write_parens = false; + if (!FLAGS_fst_weight_parentheses.empty()) { + if (FLAGS_fst_weight_parentheses.size() != 2) { + FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2"; + strm.clear(std::ios::badbit); + return strm; + } + write_parens = true; + } + + if (write_parens) + strm << FLAGS_fst_weight_parentheses[0]; + for (size_t i = 0; i < n; ++i) { + if(i) + strm << separator; + strm << w.Value(i); + } + if (write_parens) + strm << FLAGS_fst_weight_parentheses[1]; + + return strm; +} + +template <class W, unsigned int n> +inline istream &operator>>(istream &strm, TupleWeight<W, n> &w) { + if(FLAGS_fst_weight_separator.size() != 1) { + FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1"; + strm.clear(std::ios::badbit); + return strm; + } + char separator = FLAGS_fst_weight_separator[0]; + + if (!FLAGS_fst_weight_parentheses.empty()) { + if (FLAGS_fst_weight_parentheses.size() != 2) { + FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2"; + strm.clear(std::ios::badbit); + return strm; + } + return TupleWeight<W, n>::ReadWithParen( + strm, w, separator, FLAGS_fst_weight_parentheses[0], + FLAGS_fst_weight_parentheses[1]); + } else { + return TupleWeight<W, n>::ReadNoParen(strm, w, separator); + } +} + + + +} // namespace fst + +#endif // FST_LIB_TUPLE_WEIGHT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/types.h b/kaldi_io/src/tools/openfst/include/fst/types.h new file mode 100644 index 0000000..8c4367a --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/types.h @@ -0,0 +1,38 @@ +// types.h +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Author: [email protected] (Michael Riley) +// +// \file +// Various type definitions (mostly for Google compatibility). + +#include <cstdlib> // for ssize_t +#include <stdint.h> // *int*_t + +#include <fst/compat.h> // for DISALLOW_COPY_AND_ASSIGN + +#ifndef FST_LIB_TYPES_H__ +#define FST_LIB_TYPES_H__ + +typedef int8_t int8; +typedef int16_t int16; +typedef int32_t int32; +typedef int64_t int64; + +typedef uint8_t uint8; +typedef uint16_t uint16; +typedef uint32_t uint32; +typedef uint64_t uint64; + +#endif // FST_LIB_TYPES_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/union-find.h b/kaldi_io/src/tools/openfst/include/fst/union-find.h new file mode 100644 index 0000000..c8633e0 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/union-find.h @@ -0,0 +1,110 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Wojciech Skut) +// +// \file Union-Find algorithm for dense sets of non-negative +// integers. Implemented using disjoint tree forests with rank +// heuristics and path compression. + +#ifndef __fst_union_find_inl_h__ +#define __fst_union_find_inl_h__ + +#include <stack> +#include <vector> +using std::vector; +#include <fst/types.h> + +namespace fst { + +// Union-Find algorithm for dense sets of non-negative integers +// (exact type: T). +template <class T> +class UnionFind { + public: + // Ctor: creates a disjoint set forest for the range [0;max). + // 'fail' is a value indicating that an element hasn't been + // initialized using MakeSet(...). The upper bound of the range + // can be reset (increased) using MakeSet(...). + UnionFind(T max, T fail) + : parent_(max, fail), rank_(max), fail_(fail) { } + + // Finds the representative of the set 'item' belongs to. + // Performs path compression if needed. + T FindSet(T item) { + if (item >= parent_.size() + || item == fail_ + || parent_[item] == fail_) return fail_; + + T *p = &parent_[item]; + for (; *p != item; item = *p, p = &parent_[item]) { + exec_stack_.push(p); + } + for (; ! exec_stack_.empty(); exec_stack_.pop()) { + *exec_stack_.top() = *p; + } + return *p; + } + + // Creates the (destructive) union of the sets x and y belong to. + void Union(T x, T y) { + Link(FindSet(x), FindSet(y)); + } + + // Initialization of an element: creates a singleton set containing + // 'item'. The range [0;max) is reset if item >= max. + T MakeSet(T item) { + if (item >= parent_.size()) { + // New value in parent_ should be initialized to fail_ + size_t nitem = item > 0 ? 2 * item : 2; + parent_.resize(nitem, fail_); + rank_.resize(nitem); + } + parent_[item] = item; + return item; + } + + // Initialization of all elements starting from 0 to max - 1 to distinct sets + void MakeAllSet(T max) { + parent_.resize(max); + for (T item = 0; item < max; ++item) { + parent_[item] = item; + } + } + + private: + vector<T> parent_; // Parent nodes. + vector<int> rank_; // Rank of an element = min. depth in tree. + T fail_; // Value indicating lookup failure. + stack<T*> exec_stack_; // Used for path compression. + + // Links trees rooted in 'x' and 'y'. + void Link(T x, T y) { + if (x == y) return; + + if (rank_[x] > rank_[y]) { + parent_[y] = x; + } else { + parent_[x] = y; + if (rank_[x] == rank_[y]) { + ++rank_[y]; + } + } + } + DISALLOW_COPY_AND_ASSIGN(UnionFind); +}; + +} // namespace fst + +#endif // __fst_union_find_inl_h__ diff --git a/kaldi_io/src/tools/openfst/include/fst/union.h b/kaldi_io/src/tools/openfst/include/fst/union.h new file mode 100644 index 0000000..a2f97fb --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/union.h @@ -0,0 +1,185 @@ +// union.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Functions and classes to compute the union of two FSTs. + +#ifndef FST_LIB_UNION_H__ +#define FST_LIB_UNION_H__ + +#include <vector> +using std::vector; +#include <algorithm> + +#include <fst/mutable-fst.h> +#include <fst/rational.h> + + +namespace fst { + +// Computes the union (sum) of two FSTs. This version writes the +// union to an output MurableFst. If A transduces string x to y with +// weight a and B transduces string w to v with weight b, then their +// union transduces x to y with weight a and w to v with weight b. +// +// Complexity: +// - Time: (V2 + E2) +// - Space: O(V2 + E2) +// where Vi = # of states and Ei = # of arcs of the ith FST. +template <class Arc> +void Union(MutableFst<Arc> *fst1, const Fst<Arc> &fst2) { + typedef typename Arc::StateId StateId; + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + + // TODO(riley): restore when voice actions issues fixed + // Check that the symbol table are compatible + if (!CompatSymbols(fst1->InputSymbols(), fst2.InputSymbols()) || + !CompatSymbols(fst1->OutputSymbols(), fst2.OutputSymbols())) { + LOG(ERROR) << "Union: input/output symbol tables of 1st argument " + << "do not match input/output symbol tables of 2nd argument"; + // fst1->SetProperties(kError, kError); + // return; + } + + StateId numstates1 = fst1->NumStates(); + bool initial_acyclic1 = fst1->Properties(kInitialAcyclic, true); + uint64 props1 = fst1->Properties(kFstProperties, false); + uint64 props2 = fst2.Properties(kFstProperties, false); + + StateId start2 = fst2.Start(); + if (start2 == kNoStateId) { + if (props2 & kError) fst1->SetProperties(kError, kError); + return; + } + + if (fst2.Properties(kExpanded, false)) { + fst1->ReserveStates( + numstates1 + CountStates(fst2) + (initial_acyclic1 ? 0 : 1)); + } + + for (StateIterator< Fst<Arc> > siter(fst2); + !siter.Done(); + siter.Next()) { + StateId s1 = fst1->AddState(); + StateId s2 = siter.Value(); + fst1->SetFinal(s1, fst2.Final(s2)); + fst1->ReserveArcs(s1, fst2.NumArcs(s2)); + for (ArcIterator< Fst<Arc> > aiter(fst2, s2); + !aiter.Done(); + aiter.Next()) { + Arc arc = aiter.Value(); + arc.nextstate += numstates1; + fst1->AddArc(s1, arc); + } + } + StateId start1 = fst1->Start(); + if (start1 == kNoStateId) { + fst1->SetStart(start2); + fst1->SetProperties(props2, kCopyProperties); + return; + } + + if (initial_acyclic1) { + fst1->AddArc(start1, Arc(0, 0, Weight::One(), start2 + numstates1)); + } else { + StateId nstart1 = fst1->AddState(); + fst1->SetStart(nstart1); + fst1->AddArc(nstart1, Arc(0, 0, Weight::One(), start1)); + fst1->AddArc(nstart1, Arc(0, 0, Weight::One(), start2 + numstates1)); + } + fst1->SetProperties(UnionProperties(props1, props2), kFstProperties); +} + + +// Computes the union of two FSTs; this version modifies its +// RationalFst argument. +template<class Arc> +void Union(RationalFst<Arc> *fst1, const Fst<Arc> &fst2) { + fst1->GetImpl()->AddUnion(fst2); +} + + +typedef RationalFstOptions UnionFstOptions; + + +// Computes the union (sum) of two FSTs. This version is a delayed +// Fst. If A transduces string x to y with weight a and B transduces +// string w to v with weight b, then their union transduces x to y +// with weight a and w to v with weight b. +// +// Complexity: +// - Time: O(v1 + e1 + v2 + e2) +// - Sapce: O(v1 + v2) +// where vi = # of states visited and ei = # of arcs visited of the +// ith FST. Constant time and space to visit an input state or arc +// is assumed and exclusive of caching. +template <class A> +class UnionFst : public RationalFst<A> { + public: + using ImplToFst< RationalFstImpl<A> >::GetImpl; + + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + + UnionFst(const Fst<A> &fst1, const Fst<A> &fst2) { + GetImpl()->InitUnion(fst1, fst2); + } + + UnionFst(const Fst<A> &fst1, const Fst<A> &fst2, const UnionFstOptions &opts) + : RationalFst<A>(opts) { + GetImpl()->InitUnion(fst1, fst2); + } + + // See Fst<>::Copy() for doc. + UnionFst(const UnionFst<A> &fst, bool safe = false) + : RationalFst<A>(fst, safe) {} + + // Get a copy of this UnionFst. See Fst<>::Copy() for further doc. + virtual UnionFst<A> *Copy(bool safe = false) const { + return new UnionFst<A>(*this, safe); + } +}; + + +// Specialization for UnionFst. +template <class A> +class StateIterator< UnionFst<A> > : public StateIterator< RationalFst<A> > { + public: + explicit StateIterator(const UnionFst<A> &fst) + : StateIterator< RationalFst<A> >(fst) {} +}; + + +// Specialization for UnionFst. +template <class A> +class ArcIterator< UnionFst<A> > : public ArcIterator< RationalFst<A> > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const UnionFst<A> &fst, StateId s) + : ArcIterator< RationalFst<A> >(fst, s) {} +}; + + +// Useful alias when using StdArc. +typedef UnionFst<StdArc> StdUnionFst; + +} // namespace fst + +#endif // FST_LIB_UNION_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/util.h b/kaldi_io/src/tools/openfst/include/fst/util.h new file mode 100644 index 0000000..57d7c4b --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/util.h @@ -0,0 +1,437 @@ +// util.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// FST utility inline definitions. + +#ifndef FST_LIB_UTIL_H__ +#define FST_LIB_UTIL_H__ + +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include <tr1/unordered_set> +using std::tr1::unordered_set; +using std::tr1::unordered_multiset; +#include <list> +#include <map> +#include <set> +#include <sstream> +#include <string> +#include <vector> +using std::vector; + + +#include <fst/compat.h> +#include <fst/types.h> + +#include <iostream> +#include <fstream> +#include <sstream> + +// +// UTILITY FOR ERROR HANDLING +// + +DECLARE_bool(fst_error_fatal); + +#define FSTERROR() (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR)) + +namespace fst { + +// +// UTILITIES FOR TYPE I/O +// + +// Read some types from an input stream. + +// Generic case. +template <typename T> +inline istream &ReadType(istream &strm, T *t) { + return t->Read(strm); +} + +// Fixed size, contiguous memory read. +#define READ_POD_TYPE(T) \ +inline istream &ReadType(istream &strm, T *t) { \ + return strm.read(reinterpret_cast<char *>(t), sizeof(T)); \ +} + +READ_POD_TYPE(bool); +READ_POD_TYPE(char); +READ_POD_TYPE(signed char); +READ_POD_TYPE(unsigned char); +READ_POD_TYPE(short); +READ_POD_TYPE(unsigned short); +READ_POD_TYPE(int); +READ_POD_TYPE(unsigned int); +READ_POD_TYPE(long); +READ_POD_TYPE(unsigned long); +READ_POD_TYPE(long long); +READ_POD_TYPE(unsigned long long); +READ_POD_TYPE(float); +READ_POD_TYPE(double); + +// String case. +inline istream &ReadType(istream &strm, string *s) { + s->clear(); + int32 ns = 0; + strm.read(reinterpret_cast<char *>(&ns), sizeof(ns)); + for (int i = 0; i < ns; ++i) { + char c; + strm.read(&c, 1); + *s += c; + } + return strm; +} + +// Pair case. +template <typename S, typename T> +inline istream &ReadType(istream &strm, pair<S, T> *p) { + ReadType(strm, &p->first); + ReadType(strm, &p->second); + return strm; +} + +template <typename S, typename T> +inline istream &ReadType(istream &strm, pair<const S, T> *p) { + ReadType(strm, const_cast<S *>(&p->first)); + ReadType(strm, &p->second); + return strm; +} + +// General case - no-op. +template <typename C> +void StlReserve(C *c, int64 n) {} + +// Specialization for vectors. +template <typename S, typename T> +void StlReserve(vector<S, T> *c, int64 n) { + c->reserve(n); +} + +// STL sequence container. +#define READ_STL_SEQ_TYPE(C) \ +template <typename S, typename T> \ +inline istream &ReadType(istream &strm, C<S, T> *c) { \ + c->clear(); \ + int64 n = 0; \ + strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \ + StlReserve(c, n); \ + for (ssize_t i = 0; i < n; ++i) { \ + typename C<S, T>::value_type value; \ + ReadType(strm, &value); \ + c->insert(c->end(), value); \ + } \ + return strm; \ +} + +READ_STL_SEQ_TYPE(vector); +READ_STL_SEQ_TYPE(list); + +// STL associative container. +#define READ_STL_ASSOC_TYPE(C) \ +template <typename S, typename T, typename U> \ +inline istream &ReadType(istream &strm, C<S, T, U> *c) { \ + c->clear(); \ + int64 n = 0; \ + strm.read(reinterpret_cast<char *>(&n), sizeof(n)); \ + for (ssize_t i = 0; i < n; ++i) { \ + typename C<S, T, U>::value_type value; \ + ReadType(strm, &value); \ + c->insert(value); \ + } \ + return strm; \ +} + +READ_STL_ASSOC_TYPE(set); +READ_STL_ASSOC_TYPE(unordered_set); +READ_STL_ASSOC_TYPE(map); +READ_STL_ASSOC_TYPE(unordered_map); + +// Write some types to an output stream. + +// Generic case. +template <typename T> +inline ostream &WriteType(ostream &strm, const T t) { + t.Write(strm); + return strm; +} + +// Fixed size, contiguous memory write. +#define WRITE_POD_TYPE(T) \ +inline ostream &WriteType(ostream &strm, const T t) { \ + return strm.write(reinterpret_cast<const char *>(&t), sizeof(T)); \ +} + +WRITE_POD_TYPE(bool); +WRITE_POD_TYPE(char); +WRITE_POD_TYPE(signed char); +WRITE_POD_TYPE(unsigned char); +WRITE_POD_TYPE(short); +WRITE_POD_TYPE(unsigned short); +WRITE_POD_TYPE(int); +WRITE_POD_TYPE(unsigned int); +WRITE_POD_TYPE(long); +WRITE_POD_TYPE(unsigned long); +WRITE_POD_TYPE(long long); +WRITE_POD_TYPE(unsigned long long); +WRITE_POD_TYPE(float); +WRITE_POD_TYPE(double); + +// String case. +inline ostream &WriteType(ostream &strm, const string &s) { + int32 ns = s.size(); + strm.write(reinterpret_cast<const char *>(&ns), sizeof(ns)); + return strm.write(s.data(), ns); +} + +// Pair case. +template <typename S, typename T> +inline ostream &WriteType(ostream &strm, const pair<S, T> &p) { + WriteType(strm, p.first); + WriteType(strm, p.second); + return strm; +} + +// STL sequence container. +#define WRITE_STL_SEQ_TYPE(C) \ +template <typename S, typename T> \ +inline ostream &WriteType(ostream &strm, const C<S, T> &c) { \ + int64 n = c.size(); \ + strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \ + for (typename C<S, T>::const_iterator it = c.begin(); \ + it != c.end(); ++it) \ + WriteType(strm, *it); \ + return strm; \ +} + +WRITE_STL_SEQ_TYPE(vector); +WRITE_STL_SEQ_TYPE(list); + +// STL associative container. +#define WRITE_STL_ASSOC_TYPE(C) \ +template <typename S, typename T, typename U> \ +inline ostream &WriteType(ostream &strm, const C<S, T, U> &c) { \ + int64 n = c.size(); \ + strm.write(reinterpret_cast<char *>(&n), sizeof(n)); \ + for (typename C<S, T, U>::const_iterator it = c.begin(); \ + it != c.end(); ++it) \ + WriteType(strm, *it); \ + return strm; \ +} + +WRITE_STL_ASSOC_TYPE(set); +WRITE_STL_ASSOC_TYPE(unordered_set); +WRITE_STL_ASSOC_TYPE(map); +WRITE_STL_ASSOC_TYPE(unordered_map); + +// Utilities for converting between int64 or Weight and string. + +int64 StrToInt64(const string &s, const string &src, size_t nline, + bool allow_negative, bool *error = 0); + +template <typename Weight> +Weight StrToWeight(const string &s, const string &src, size_t nline) { + Weight w; + istringstream strm(s); + strm >> w; + if (!strm) { + FSTERROR() << "StrToWeight: Bad weight = \"" << s + << "\", source = " << src << ", line = " << nline; + return Weight::NoWeight(); + } + return w; +} + +void Int64ToStr(int64 n, string *s); + +template <typename Weight> +void WeightToStr(Weight w, string *s) { + ostringstream strm; + strm.precision(9); + strm << w; + s->append(strm.str().data(), strm.str().size()); +} + +// Utilities for reading/writing label pairs + +// Returns true on success +template <typename Label> +bool ReadLabelPairs(const string& filename, + vector<pair<Label, Label> >* pairs, + bool allow_negative = false) { + ifstream strm(filename.c_str()); + + if (!strm) { + LOG(ERROR) << "ReadLabelPairs: Can't open file: " << filename; + return false; + } + + const int kLineLen = 8096; + char line[kLineLen]; + size_t nline = 0; + + pairs->clear(); + while (strm.getline(line, kLineLen)) { + ++nline; + vector<char *> col; + SplitToVector(line, "\n\t ", &col, true); + if (col.size() == 0 || col[0][0] == '\0') // empty line + continue; + if (col.size() != 2) { + LOG(ERROR) << "ReadLabelPairs: Bad number of columns, " + << "file = " << filename << ", line = " << nline; + return false; + } + + bool err; + Label frmlabel = StrToInt64(col[0], filename, nline, allow_negative, &err); + if (err) return false; + Label tolabel = StrToInt64(col[1], filename, nline, allow_negative, &err); + if (err) return false; + pairs->push_back(make_pair(frmlabel, tolabel)); + } + return true; +} + +// Returns true on success +template <typename Label> +bool WriteLabelPairs(const string& filename, + const vector<pair<Label, Label> >& pairs) { + ostream *strm = &cout; + if (!filename.empty()) { + strm = new ofstream(filename.c_str()); + if (!*strm) { + LOG(ERROR) << "WriteLabelPairs: Can't open file: " << filename; + return false; + } + } + + for (ssize_t n = 0; n < pairs.size(); ++n) + *strm << pairs[n].first << "\t" << pairs[n].second << "\n"; + + if (!*strm) { + LOG(ERROR) << "WriteLabelPairs: Write failed: " + << (filename.empty() ? "standard output" : filename); + return false; + } + if (strm != &cout) + delete strm; + return true; +} + +// Utilities for converting a type name to a legal C symbol. + +void ConvertToLegalCSymbol(string *s); + + +// +// UTILITIES FOR STREAM I/O +// + +bool AlignInput(istream &strm); +bool AlignOutput(ostream &strm); + +// +// UTILITIES FOR PROTOCOL BUFFER I/O +// + + +// An associative container for which testing membership is +// faster than an STL set if members are restricted to an interval +// that excludes most non-members. A 'Key' must have ==, !=, and < defined. +// Element 'NoKey' should be a key that marks an uninitialized key and +// is otherwise unused. 'Find()' returns an STL const_iterator to the match +// found, otherwise it equals 'End()'. +template <class Key, Key NoKey> +class CompactSet { +public: + typedef typename set<Key>::const_iterator const_iterator; + + CompactSet() + : min_key_(NoKey), + max_key_(NoKey) { } + + CompactSet(const CompactSet<Key, NoKey> &compact_set) + : set_(compact_set.set_), + min_key_(compact_set.min_key_), + max_key_(compact_set.max_key_) { } + + void Insert(Key key) { + set_.insert(key); + if (min_key_ == NoKey || key < min_key_) + min_key_ = key; + if (max_key_ == NoKey || max_key_ < key) + max_key_ = key; + } + + void Erase(Key key) { + set_.erase(key); + if (set_.empty()) { + min_key_ = max_key_ = NoKey; + } else if (key == min_key_) { + ++min_key_; + } else if (key == max_key_) { + --max_key_; + } + } + + void Clear() { + set_.clear(); + min_key_ = max_key_ = NoKey; + } + + const_iterator Find(Key key) const { + if (min_key_ == NoKey || + key < min_key_ || max_key_ < key) + return set_.end(); + else + return set_.find(key); + } + + bool Member(Key key) const { + if (min_key_ == NoKey || key < min_key_ || max_key_ < key) { + return false; // out of range + } else if (min_key_ != NoKey && max_key_ + 1 == min_key_ + set_.size()) { + return true; // dense range + } else { + return set_.find(key) != set_.end(); + } + } + + const_iterator Begin() const { return set_.begin(); } + + const_iterator End() const { return set_.end(); } + + // All stored keys are greater than or equal to this value. + Key LowerBound() const { return min_key_; } + + // All stored keys are less than or equal to this value. + Key UpperBound() const { return max_key_; } + +private: + set<Key> set_; + Key min_key_; + Key max_key_; + + void operator=(const CompactSet<Key, NoKey> &); //disallow +}; + +} // namespace fst + +#endif // FST_LIB_UTIL_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/vector-fst.h b/kaldi_io/src/tools/openfst/include/fst/vector-fst.h new file mode 100644 index 0000000..8b80876 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/vector-fst.h @@ -0,0 +1,731 @@ +// vector-fst.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Simple concrete, mutable FST whose states and arcs are stored in STL +// vectors. + +#ifndef FST_LIB_VECTOR_FST_H__ +#define FST_LIB_VECTOR_FST_H__ + +#include <string> +#include <vector> +using std::vector; + +#include <fst/mutable-fst.h> +#include <fst/test-properties.h> + + +namespace fst { + +template <class A> class VectorFst; +template <class F, class G> void Cast(const F &, G *); + + +// States and arcs implemented by STL vectors, templated on the +// State definition. This does not manage the Fst properties. +template <class State> +class VectorFstBaseImpl : public FstImpl<typename State::Arc> { + public: + typedef typename State::Arc Arc; + typedef typename Arc::Weight Weight; + typedef typename Arc::StateId StateId; + + VectorFstBaseImpl() : start_(kNoStateId) {} + + ~VectorFstBaseImpl() { + for (StateId s = 0; s < states_.size(); ++s) + delete states_[s]; + } + + StateId Start() const { return start_; } + + Weight Final(StateId s) const { return states_[s]->final; } + + StateId NumStates() const { return states_.size(); } + + size_t NumArcs(StateId s) const { return states_[s]->arcs.size(); } + + void SetStart(StateId s) { start_ = s; } + + void SetFinal(StateId s, Weight w) { states_[s]->final = w; } + + StateId AddState() { + states_.push_back(new State); + return states_.size() - 1; + } + + StateId AddState(State *state) { + states_.push_back(state); + return states_.size() - 1; + } + + void AddArc(StateId s, const Arc &arc) { + states_[s]->arcs.push_back(arc); + } + + void DeleteStates(const vector<StateId>& dstates) { + vector<StateId> newid(states_.size(), 0); + for (size_t i = 0; i < dstates.size(); ++i) + newid[dstates[i]] = kNoStateId; + StateId nstates = 0; + for (StateId s = 0; s < states_.size(); ++s) { + if (newid[s] != kNoStateId) { + newid[s] = nstates; + if (s != nstates) + states_[nstates] = states_[s]; + ++nstates; + } else { + delete states_[s]; + } + } + states_.resize(nstates); + for (StateId s = 0; s < states_.size(); ++s) { + vector<Arc> &arcs = states_[s]->arcs; + size_t narcs = 0; + for (size_t i = 0; i < arcs.size(); ++i) { + StateId t = newid[arcs[i].nextstate]; + if (t != kNoStateId) { + arcs[i].nextstate = t; + if (i != narcs) + arcs[narcs] = arcs[i]; + ++narcs; + } else { + if (arcs[i].ilabel == 0) + --states_[s]->niepsilons; + if (arcs[i].olabel == 0) + --states_[s]->noepsilons; + } + } + arcs.resize(narcs); + } + if (Start() != kNoStateId) + SetStart(newid[Start()]); + } + + void DeleteStates() { + for (StateId s = 0; s < states_.size(); ++s) + delete states_[s]; + states_.clear(); + SetStart(kNoStateId); + } + + void DeleteArcs(StateId s, size_t n) { + states_[s]->arcs.resize(states_[s]->arcs.size() - n); + } + + void DeleteArcs(StateId s) { states_[s]->arcs.clear(); } + + State *GetState(StateId s) { return states_[s]; } + + const State *GetState(StateId s) const { return states_[s]; } + + void SetState(StateId s, State *state) { states_[s] = state; } + + void ReserveStates(StateId n) { states_.reserve(n); } + + void ReserveArcs(StateId s, size_t n) { states_[s]->arcs.reserve(n); } + + // Provide information needed for generic state iterator + void InitStateIterator(StateIteratorData<Arc> *data) const { + data->base = 0; + data->nstates = states_.size(); + } + + // Provide information needed for generic arc iterator + void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { + data->base = 0; + data->narcs = states_[s]->arcs.size(); + data->arcs = data->narcs > 0 ? &states_[s]->arcs[0] : 0; + data->ref_count = 0; + } + + private: + vector<State *> states_; // States represenation. + StateId start_; // initial state + + DISALLOW_COPY_AND_ASSIGN(VectorFstBaseImpl); +}; + +// Arcs implemented by an STL vector per state. +template <class A> +struct VectorState { + typedef A Arc; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + + VectorState() : final(Weight::Zero()), niepsilons(0), noepsilons(0) {} + + Weight final; // Final weight + vector<A> arcs; // Arcs represenation + size_t niepsilons; // # of input epsilons + size_t noepsilons; // # of output epsilons +}; + +// This is a VectorFstBaseImpl container that holds VectorState's. It +// manages Fst properties and the # of input and output epsilons. +template <class A> +class VectorFstImpl : public VectorFstBaseImpl< VectorState<A> > { + public: + using FstImpl<A>::SetInputSymbols; + using FstImpl<A>::SetOutputSymbols; + using FstImpl<A>::SetType; + using FstImpl<A>::SetProperties; + using FstImpl<A>::Properties; + + using VectorFstBaseImpl<VectorState<A> >::Start; + using VectorFstBaseImpl<VectorState<A> >::NumStates; + using VectorFstBaseImpl<VectorState<A> >::GetState; + using VectorFstBaseImpl<VectorState<A> >::ReserveArcs; + + friend class MutableArcIterator< VectorFst<A> >; + + typedef VectorFstBaseImpl< VectorState<A> > BaseImpl; + typedef typename A::Weight Weight; + typedef typename A::StateId StateId; + + VectorFstImpl() { + SetType("vector"); + SetProperties(kNullProperties | kStaticProperties); + } + explicit VectorFstImpl(const Fst<A> &fst); + + static VectorFstImpl<A> *Read(istream &strm, const FstReadOptions &opts); + + size_t NumInputEpsilons(StateId s) const { return GetState(s)->niepsilons; } + + size_t NumOutputEpsilons(StateId s) const { return GetState(s)->noepsilons; } + + void SetStart(StateId s) { + BaseImpl::SetStart(s); + SetProperties(SetStartProperties(Properties())); + } + + void SetFinal(StateId s, Weight w) { + Weight ow = BaseImpl::Final(s); + BaseImpl::SetFinal(s, w); + SetProperties(SetFinalProperties(Properties(), ow, w)); + } + + StateId AddState() { + StateId s = BaseImpl::AddState(); + SetProperties(AddStateProperties(Properties())); + return s; + } + + void AddArc(StateId s, const A &arc) { + VectorState<A> *state = GetState(s); + if (arc.ilabel == 0) { + ++state->niepsilons; + } + if (arc.olabel == 0) { + ++state->noepsilons; + } + + const A *parc = state->arcs.empty() ? 0 : &(state->arcs.back()); + SetProperties(AddArcProperties(Properties(), s, arc, parc)); + + BaseImpl::AddArc(s, arc); + } + + void DeleteStates(const vector<StateId> &dstates) { + BaseImpl::DeleteStates(dstates); + SetProperties(DeleteStatesProperties(Properties())); + } + + void DeleteStates() { + BaseImpl::DeleteStates(); + SetProperties(DeleteAllStatesProperties(Properties(), + kStaticProperties)); + } + + void DeleteArcs(StateId s, size_t n) { + const vector<A> &arcs = GetState(s)->arcs; + for (size_t i = 0; i < n; ++i) { + size_t j = arcs.size() - i - 1; + if (arcs[j].ilabel == 0) + --GetState(s)->niepsilons; + if (arcs[j].olabel == 0) + --GetState(s)->noepsilons; + } + BaseImpl::DeleteArcs(s, n); + SetProperties(DeleteArcsProperties(Properties())); + } + + void DeleteArcs(StateId s) { + GetState(s)->niepsilons = 0; + GetState(s)->noepsilons = 0; + BaseImpl::DeleteArcs(s); + SetProperties(DeleteArcsProperties(Properties())); + } + + // Properties always true of this Fst class + static const uint64 kStaticProperties = kExpanded | kMutable; + + private: + // Current file format version + static const int kFileVersion = 2; + // Minimum file format version supported + static const int kMinFileVersion = 1; + + DISALLOW_COPY_AND_ASSIGN(VectorFstImpl); +}; + +template <class A> const uint64 VectorFstImpl<A>::kStaticProperties; +template <class A> const int VectorFstImpl<A>::kFileVersion; +template <class A> const int VectorFstImpl<A>::kMinFileVersion; + + +template <class A> +VectorFstImpl<A>::VectorFstImpl(const Fst<A> &fst) { + SetType("vector"); + SetInputSymbols(fst.InputSymbols()); + SetOutputSymbols(fst.OutputSymbols()); + BaseImpl::SetStart(fst.Start()); + if (fst.Properties(kExpanded, false)) + BaseImpl::ReserveStates(CountStates(fst)); + + for (StateIterator< Fst<A> > siter(fst); + !siter.Done(); + siter.Next()) { + StateId s = siter.Value(); + BaseImpl::AddState(); + BaseImpl::SetFinal(s, fst.Final(s)); + ReserveArcs(s, fst.NumArcs(s)); + for (ArcIterator< Fst<A> > aiter(fst, s); + !aiter.Done(); + aiter.Next()) { + const A &arc = aiter.Value(); + BaseImpl::AddArc(s, arc); + if (arc.ilabel == 0) + ++GetState(s)->niepsilons; + if (arc.olabel == 0) + ++GetState(s)->noepsilons; + } + } + SetProperties(fst.Properties(kCopyProperties, false) | kStaticProperties); +} + +template <class A> +VectorFstImpl<A> *VectorFstImpl<A>::Read(istream &strm, + const FstReadOptions &opts) { + VectorFstImpl<A> *impl = new VectorFstImpl; + FstHeader hdr; + if (!impl->ReadHeader(strm, opts, kMinFileVersion, &hdr)) { + delete impl; + return 0; + } + impl->BaseImpl::SetStart(hdr.Start()); + if (hdr.NumStates() != kNoStateId) { + impl->ReserveStates(hdr.NumStates()); + } + + StateId s = 0; + for (;hdr.NumStates() == kNoStateId || s < hdr.NumStates(); ++s) { + typename A::Weight final; + if (!final.Read(strm)) break; + impl->BaseImpl::AddState(); + VectorState<A> *state = impl->GetState(s); + state->final = final; + int64 narcs; + ReadType(strm, &narcs); + if (!strm) { + LOG(ERROR) << "VectorFst::Read: read failed: " << opts.source; + delete impl; + return 0; + } + impl->ReserveArcs(s, narcs); + for (size_t j = 0; j < narcs; ++j) { + A arc; + ReadType(strm, &arc.ilabel); + ReadType(strm, &arc.olabel); + arc.weight.Read(strm); + ReadType(strm, &arc.nextstate); + if (!strm) { + LOG(ERROR) << "VectorFst::Read: read failed: " << opts.source; + delete impl; + return 0; + } + impl->BaseImpl::AddArc(s, arc); + if (arc.ilabel == 0) + ++state->niepsilons; + if (arc.olabel == 0) + ++state->noepsilons; + } + } + if (hdr.NumStates() != kNoStateId && s != hdr.NumStates()) { + LOG(ERROR) << "VectorFst::Read: unexpected end of file: " << opts.source; + delete impl; + return 0; + } + return impl; +} + +// Converts a string into a weight. +template <class W> class WeightFromString { + public: + W operator()(const string &s); +}; + +// Generic case fails. +template <class W> inline +W WeightFromString<W>::operator()(const string &s) { + FSTERROR() << "VectorFst::Read: Obsolete file format"; + return W::NoWeight(); +} + +// TropicalWeight version. +template <> inline +TropicalWeight WeightFromString<TropicalWeight>::operator()(const string &s) { + float f; + memcpy(&f, s.data(), sizeof(f)); + return TropicalWeight(f); +} + +// LogWeight version. +template <> inline +LogWeight WeightFromString<LogWeight>::operator()(const string &s) { + float f; + memcpy(&f, s.data(), sizeof(f)); + return LogWeight(f); +} + +// Simple concrete, mutable FST. This class attaches interface to +// implementation and handles reference counting, delegating most +// methods to ImplToMutableFst. Supports additional operations: +// ReserveStates and ReserveArcs (cf. STL vectors). +template <class A> +class VectorFst : public ImplToMutableFst< VectorFstImpl<A> > { + public: + friend class StateIterator< VectorFst<A> >; + friend class ArcIterator< VectorFst<A> >; + friend class MutableArcIterator< VectorFst<A> >; + template <class F, class G> friend void Cast(const F &, G *); + + typedef A Arc; + typedef typename A::StateId StateId; + typedef VectorFstImpl<A> Impl; + + VectorFst() : ImplToMutableFst<Impl>(new Impl) {} + + explicit VectorFst(const Fst<A> &fst) + : ImplToMutableFst<Impl>(new Impl(fst)) {} + + VectorFst(const VectorFst<A> &fst) : ImplToMutableFst<Impl>(fst) {} + + // Get a copy of this VectorFst. See Fst<>::Copy() for further doc. + virtual VectorFst<A> *Copy(bool safe = false) const { + return new VectorFst<A>(*this); + } + + VectorFst<A> &operator=(const VectorFst<A> &fst) { + SetImpl(fst.GetImpl(), false); + return *this; + } + + virtual VectorFst<A> &operator=(const Fst<A> &fst) { + if (this != &fst) SetImpl(new Impl(fst)); + return *this; + } + + // Read a VectorFst from an input stream; return NULL on error + static VectorFst<A> *Read(istream &strm, const FstReadOptions &opts) { + Impl* impl = Impl::Read(strm, opts); + return impl ? new VectorFst<A>(impl) : 0; + } + + // Read a VectorFst from a file; return NULL on error + // Empty filename reads from standard input + static VectorFst<A> *Read(const string &filename) { + Impl* impl = ImplToExpandedFst<Impl, MutableFst<A> >::Read(filename); + return impl ? new VectorFst<A>(impl) : 0; + } + + virtual bool Write(ostream &strm, const FstWriteOptions &opts) const { + return WriteFst(*this, strm, opts); + } + + virtual bool Write(const string &filename) const { + return Fst<A>::WriteFile(filename); + } + + template <class F> + static bool WriteFst(const F &fst, ostream &strm, + const FstWriteOptions &opts); + + void ReserveStates(StateId n) { + MutateCheck(); + GetImpl()->ReserveStates(n); + } + + void ReserveArcs(StateId s, size_t n) { + MutateCheck(); + GetImpl()->ReserveArcs(s, n); + } + + virtual void InitStateIterator(StateIteratorData<Arc> *data) const { + GetImpl()->InitStateIterator(data); + } + + virtual void InitArcIterator(StateId s, ArcIteratorData<Arc> *data) const { + GetImpl()->InitArcIterator(s, data); + } + + virtual inline + void InitMutableArcIterator(StateId s, MutableArcIteratorData<A> *); + + private: + explicit VectorFst(Impl *impl) : ImplToMutableFst<Impl>(impl) {} + + // Makes visible to friends. + Impl *GetImpl() const { return ImplToFst< Impl, MutableFst<A> >::GetImpl(); } + + void SetImpl(Impl *impl, bool own_impl = true) { + ImplToFst< Impl, MutableFst<A> >::SetImpl(impl, own_impl); + } + + void MutateCheck() { return ImplToMutableFst<Impl>::MutateCheck(); } +}; + +// Specialization for VectorFst; see generic version in fst.h +// for sample usage (but use the VectorFst type!). This version +// should inline. +template <class A> +class StateIterator< VectorFst<A> > { + public: + typedef typename A::StateId StateId; + + explicit StateIterator(const VectorFst<A> &fst) + : nstates_(fst.GetImpl()->NumStates()), s_(0) {} + + bool Done() const { return s_ >= nstates_; } + + StateId Value() const { return s_; } + + void Next() { ++s_; } + + void Reset() { s_ = 0; } + + private: + StateId nstates_; + StateId s_; + + DISALLOW_COPY_AND_ASSIGN(StateIterator); +}; + +// Writes Fst to file, will call CountStates so may involve two passes if +// called from an Fst that is not derived from Expanded. +template <class A> +template <class F> +bool VectorFst<A>::WriteFst(const F &fst, ostream &strm, + const FstWriteOptions &opts) { + static const int kFileVersion = 2; + bool update_header = true; + FstHeader hdr; + hdr.SetStart(fst.Start()); + hdr.SetNumStates(kNoStateId); + size_t start_offset = 0; + if (fst.Properties(kExpanded, false) || (start_offset = strm.tellp()) != -1) { + hdr.SetNumStates(CountStates(fst)); + update_header = false; + } + uint64 properties = fst.Properties(kCopyProperties, false) | + VectorFstImpl<A>::kStaticProperties; + FstImpl<A>::WriteFstHeader(fst, strm, opts, kFileVersion, "vector", + properties, &hdr); + StateId num_states = 0; + for (StateIterator<F> siter(fst); !siter.Done(); siter.Next()) { + typename A::StateId s = siter.Value(); + fst.Final(s).Write(strm); + int64 narcs = fst.NumArcs(s); + WriteType(strm, narcs); + for (ArcIterator<F> aiter(fst, s); !aiter.Done(); aiter.Next()) { + const A &arc = aiter.Value(); + WriteType(strm, arc.ilabel); + WriteType(strm, arc.olabel); + arc.weight.Write(strm); + WriteType(strm, arc.nextstate); + } + num_states++; + } + strm.flush(); + if (!strm) { + LOG(ERROR) << "VectorFst::Write: write failed: " << opts.source; + return false; + } + if (update_header) { + hdr.SetNumStates(num_states); + return FstImpl<A>::UpdateFstHeader(fst, strm, opts, kFileVersion, "vector", + properties, &hdr, start_offset); + } else { + if (num_states != hdr.NumStates()) { + LOG(ERROR) << "Inconsistent number of states observed during write"; + return false; + } + } + return true; +} + +// Specialization for VectorFst; see generic version in fst.h +// for sample usage (but use the VectorFst type!). This version +// should inline. +template <class A> +class ArcIterator< VectorFst<A> > { + public: + typedef typename A::StateId StateId; + + ArcIterator(const VectorFst<A> &fst, StateId s) + : arcs_(fst.GetImpl()->GetState(s)->arcs), i_(0) {} + + bool Done() const { return i_ >= arcs_.size(); } + + const A& Value() const { return arcs_[i_]; } + + void Next() { ++i_; } + + void Reset() { i_ = 0; } + + void Seek(size_t a) { i_ = a; } + + size_t Position() const { return i_; } + + uint32 Flags() const { + return kArcValueFlags; + } + + void SetFlags(uint32 f, uint32 m) {} + + private: + const vector<A>& arcs_; + size_t i_; + + DISALLOW_COPY_AND_ASSIGN(ArcIterator); +}; + +// Specialization for VectorFst; see generic version in fst.h +// for sample usage (but use the VectorFst type!). This version +// should inline. +template <class A> +class MutableArcIterator< VectorFst<A> > + : public MutableArcIteratorBase<A> { + public: + typedef typename A::StateId StateId; + typedef typename A::Weight Weight; + + MutableArcIterator(VectorFst<A> *fst, StateId s) : i_(0) { + fst->MutateCheck(); + state_ = fst->GetImpl()->GetState(s); + properties_ = &fst->GetImpl()->properties_; + } + + bool Done() const { return i_ >= state_->arcs.size(); } + + const A& Value() const { return state_->arcs[i_]; } + + void Next() { ++i_; } + + size_t Position() const { return i_; } + + void Reset() { i_ = 0; } + + void Seek(size_t a) { i_ = a; } + + void SetValue(const A &arc) { + A& oarc = state_->arcs[i_]; + if (oarc.ilabel != oarc.olabel) + *properties_ &= ~kNotAcceptor; + if (oarc.ilabel == 0) { + --state_->niepsilons; + *properties_ &= ~kIEpsilons; + if (oarc.olabel == 0) + *properties_ &= ~kEpsilons; + } + if (oarc.olabel == 0) { + --state_->noepsilons; + *properties_ &= ~kOEpsilons; + } + if (oarc.weight != Weight::Zero() && oarc.weight != Weight::One()) + *properties_ &= ~kWeighted; + oarc = arc; + if (arc.ilabel != arc.olabel) { + *properties_ |= kNotAcceptor; + *properties_ &= ~kAcceptor; + } + if (arc.ilabel == 0) { + ++state_->niepsilons; + *properties_ |= kIEpsilons; + *properties_ &= ~kNoIEpsilons; + if (arc.olabel == 0) { + *properties_ |= kEpsilons; + *properties_ &= ~kNoEpsilons; + } + } + if (arc.olabel == 0) { + ++state_->noepsilons; + *properties_ |= kOEpsilons; + *properties_ &= ~kNoOEpsilons; + } + if (arc.weight != Weight::Zero() && arc.weight != Weight::One()) { + *properties_ |= kWeighted; + *properties_ &= ~kUnweighted; + } + *properties_ &= kSetArcProperties | kAcceptor | kNotAcceptor | + kEpsilons | kNoEpsilons | kIEpsilons | kNoIEpsilons | + kOEpsilons | kNoOEpsilons | kWeighted | kUnweighted; + } + + uint32 Flags() const { + return kArcValueFlags; + } + + void SetFlags(uint32 f, uint32 m) {} + + + private: + // This allows base-class virtual access to non-virtual derived- + // class members of the same name. It makes the derived class more + // efficient to use but unsafe to further derive. + virtual bool Done_() const { return Done(); } + virtual const A& Value_() const { return Value(); } + virtual void Next_() { Next(); } + virtual size_t Position_() const { return Position(); } + virtual void Reset_() { Reset(); } + virtual void Seek_(size_t a) { Seek(a); } + virtual void SetValue_(const A &a) { SetValue(a); } + uint32 Flags_() const { return Flags(); } + void SetFlags_(uint32 f, uint32 m) { SetFlags(f, m); } + + struct VectorState<A> *state_; + uint64 *properties_; + size_t i_; + + DISALLOW_COPY_AND_ASSIGN(MutableArcIterator); +}; + +// Provide information needed for the generic mutable arc iterator +template <class A> inline +void VectorFst<A>::InitMutableArcIterator( + StateId s, MutableArcIteratorData<A> *data) { + data->base = new MutableArcIterator< VectorFst<A> >(this, s); +} + +// A useful alias when using StdArc. +typedef VectorFst<StdArc> StdVectorFst; + +} // namespace fst + +#endif // FST_LIB_VECTOR_FST_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/verify.h b/kaldi_io/src/tools/openfst/include/fst/verify.h new file mode 100644 index 0000000..576cfca --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/verify.h @@ -0,0 +1,126 @@ +// verify.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Function to verify an Fst's contents + +#ifndef FST_LIB_VERIFY_H__ +#define FST_LIB_VERIFY_H__ + +#include <fst/fst.h> +#include <fst/test-properties.h> + + +namespace fst { + +// Verifies that an Fst's contents are sane. +template<class Arc> +bool Verify(const Fst<Arc> &fst, bool allow_negative_labels = false) { + typedef typename Arc::Label Label; + typedef typename Arc::Weight Weight; + typedef typename Arc::StateId StateId; + + StateId start = fst.Start(); + const SymbolTable *isyms = fst.InputSymbols(); + const SymbolTable *osyms = fst.OutputSymbols(); + + // Count states + StateId ns = 0; + for (StateIterator< Fst<Arc> > siter(fst); + !siter.Done(); + siter.Next()) + ++ns; + + if (start == kNoStateId && ns > 0) { + LOG(ERROR) << "Verify: Fst start state ID unset"; + return false; + } else if (start >= ns) { + LOG(ERROR) << "Verify: Fst start state ID exceeds number of states"; + return false; + } + + for (StateIterator< Fst<Arc> > siter(fst); + !siter.Done(); + siter.Next()) { + StateId s = siter.Value(); + size_t na = 0; + for (ArcIterator< Fst<Arc> > aiter(fst, s); + !aiter.Done(); + aiter.Next()) { + const Arc &arc =aiter.Value(); + if (!allow_negative_labels && arc.ilabel < 0) { + LOG(ERROR) << "Verify: Fst input label ID of arc at position " + << na << " of state " << s << " is negative"; + return false; + } else if (isyms && isyms->Find(arc.ilabel) == "") { + LOG(ERROR) << "Verify: Fst input label ID " << arc.ilabel + << " of arc at position " << na << " of state " << s + << " is missing from input symbol table \"" + << isyms->Name() << "\""; + return false; + } else if (!allow_negative_labels && arc.olabel < 0) { + LOG(ERROR) << "Verify: Fst output label ID of arc at position " + << na << " of state " << s << " is negative"; + return false; + } else if (osyms && osyms->Find(arc.olabel) == "") { + LOG(ERROR) << "Verify: Fst output label ID " << arc.olabel + << " of arc at position " << na << " of state " << s + << " is missing from output symbol table \"" + << osyms->Name() << "\""; + return false; + } else if (!arc.weight.Member() || arc.weight == Weight::Zero()) { + LOG(ERROR) << "Verify: Fst weight of arc at position " + << na << " of state " << s << " is invalid"; + return false; + } else if (arc.nextstate < 0) { + LOG(ERROR) << "Verify: Fst destination state ID of arc at position " + << na << " of state " << s << " is negative"; + return false; + } else if (arc.nextstate >= ns) { + LOG(ERROR) << "Verify: Fst destination state ID of arc at position " + << na << " of state " << s + << " exceeds number of states"; + return false; + } + ++na; + } + if (!fst.Final(s).Member()) { + LOG(ERROR) << "Verify: Fst final weight of state " << s << " is invalid"; + return false; + } + } + uint64 fst_props = fst.Properties(kFstProperties, false); + if (fst_props & kError) { + LOG(ERROR) << "Verify: Fst error property is set"; + return false; + } + + uint64 known_props; + uint64 test_props = ComputeProperties(fst, kFstProperties, &known_props, + false); + if (!CompatProperties(fst_props, test_props)) { + LOG(ERROR) << "Verify: stored Fst properties incorrect " + << "(props1 = stored props, props2 = tested)"; + return false; + } else { + return true; + } +} + +} // namespace fst + +#endif // FST_LIB_VERIFY_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/visit.h b/kaldi_io/src/tools/openfst/include/fst/visit.h new file mode 100644 index 0000000..5f5059a --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/visit.h @@ -0,0 +1,284 @@ +// visit.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// Queue-dependent visitation of finite-state transducers. See also +// dfs-visit.h. + +#ifndef FST_LIB_VISIT_H__ +#define FST_LIB_VISIT_H__ + + +#include <fst/arcfilter.h> +#include <fst/mutable-fst.h> + + +namespace fst { + +// Visitor Interface - class determines actions taken during a visit. +// If any of the boolean member functions return false, the visit is +// aborted by first calling FinishState() on all unfinished (grey) +// states and then calling FinishVisit(). +// +// Note this is more general than the visitor interface in +// dfs-visit.h but lacks some DFS-specific behavior. +// +// template <class Arc> +// class Visitor { +// public: +// typedef typename Arc::StateId StateId; +// +// Visitor(T *return_data); +// // Invoked before visit +// void InitVisit(const Fst<Arc> &fst); +// // Invoked when state discovered (2nd arg is visitation root) +// bool InitState(StateId s, StateId root); +// // Invoked when arc to white/undiscovered state examined +// bool WhiteArc(StateId s, const Arc &a); +// // Invoked when arc to grey/unfinished state examined +// bool GreyArc(StateId s, const Arc &a); +// // Invoked when arc to black/finished state examined +// bool BlackArc(StateId s, const Arc &a); +// // Invoked when state finished. +// void FinishState(StateId s); +// // Invoked after visit +// void FinishVisit(); +// }; + +// Performs queue-dependent visitation. Visitor class argument +// determines actions and contains any return data. ArcFilter +// determines arcs that are considered. +// +// Note this is more general than DfsVisit() in dfs-visit.h but lacks +// some DFS-specific Visitor behavior. +template <class Arc, class V, class Q, class ArcFilter> +void Visit(const Fst<Arc> &fst, V *visitor, Q *queue, ArcFilter filter) { + + typedef typename Arc::StateId StateId; + typedef ArcIterator< Fst<Arc> > AIterator; + + visitor->InitVisit(fst); + + StateId start = fst.Start(); + if (start == kNoStateId) { + visitor->FinishVisit(); + return; + } + + // An Fst state's visit color + const unsigned kWhiteState = 0x01; // Undiscovered + const unsigned kGreyState = 0x02; // Discovered & unfinished + const unsigned kBlackState = 0x04; // Finished + + // We destroy an iterator as soon as possible and mark it so + const unsigned kArcIterDone = 0x08; // Arc iterator done and destroyed + + vector<unsigned char> state_status; + vector<AIterator *> arc_iterator; + + StateId nstates = start + 1; // # of known states in general case + bool expanded = false; + if (fst.Properties(kExpanded, false)) { // tests if expanded case, then + nstates = CountStates(fst); // uses ExpandedFst::NumStates(). + expanded = true; + } + + state_status.resize(nstates, kWhiteState); + arc_iterator.resize(nstates); + StateIterator< Fst<Arc> > siter(fst); + + // Continues visit while true + bool visit = true; + + // Iterates over trees in visit forest. + for (StateId root = start; visit && root < nstates;) { + visit = visitor->InitState(root, root); + state_status[root] = kGreyState; + queue->Enqueue(root); + while (!queue->Empty()) { + StateId s = queue->Head(); + if (s >= state_status.size()) { + nstates = s + 1; + state_status.resize(nstates, kWhiteState); + arc_iterator.resize(nstates); + } + // Creates arc iterator if needed. + if (arc_iterator[s] == 0 && !(state_status[s] & kArcIterDone) && visit) + arc_iterator[s] = new AIterator(fst, s); + // Deletes arc iterator if done. + AIterator *aiter = arc_iterator[s]; + if ((aiter && aiter->Done()) || !visit) { + delete aiter; + arc_iterator[s] = 0; + state_status[s] |= kArcIterDone; + } + // Dequeues state and marks black if done + if (state_status[s] & kArcIterDone) { + queue->Dequeue(); + visitor->FinishState(s); + state_status[s] = kBlackState; + continue; + } + + const Arc &arc = aiter->Value(); + if (arc.nextstate >= state_status.size()) { + nstates = arc.nextstate + 1; + state_status.resize(nstates, kWhiteState); + arc_iterator.resize(nstates); + } + // Visits respective arc types + if (filter(arc)) { + // Enqueues destination state and marks grey if white + if (state_status[arc.nextstate] == kWhiteState) { + visit = visitor->WhiteArc(s, arc); + if (!visit) continue; + visit = visitor->InitState(arc.nextstate, root); + state_status[arc.nextstate] = kGreyState; + queue->Enqueue(arc.nextstate); + } else if (state_status[arc.nextstate] == kBlackState) { + visit = visitor->BlackArc(s, arc); + } else { + visit = visitor->GreyArc(s, arc); + } + } + aiter->Next(); + // Destroys an iterator ASAP for efficiency. + if (aiter->Done()) { + delete aiter; + arc_iterator[s] = 0; + state_status[s] |= kArcIterDone; + } + } + // Finds next tree root + for (root = root == start ? 0 : root + 1; + root < nstates && state_status[root] != kWhiteState; + ++root) { + } + + // Check for a state beyond the largest known state + if (!expanded && root == nstates) { + for (; !siter.Done(); siter.Next()) { + if (siter.Value() == nstates) { + ++nstates; + state_status.push_back(kWhiteState); + arc_iterator.push_back(0); + break; + } + } + } + } + visitor->FinishVisit(); +} + + +template <class Arc, class V, class Q> +inline void Visit(const Fst<Arc> &fst, V *visitor, Q* queue) { + Visit(fst, visitor, queue, AnyArcFilter<Arc>()); +} + +// Copies input FST to mutable FST following queue order. +template <class A> +class CopyVisitor { + public: + typedef A Arc; + typedef typename A::StateId StateId; + + CopyVisitor(MutableFst<Arc> *ofst) : ifst_(0), ofst_(ofst) {} + + void InitVisit(const Fst<A> &ifst) { + ifst_ = &ifst; + ofst_->DeleteStates(); + ofst_->SetStart(ifst_->Start()); + } + + bool InitState(StateId s, StateId) { + while (ofst_->NumStates() <= s) + ofst_->AddState(); + return true; + } + + bool WhiteArc(StateId s, const Arc &arc) { + ofst_->AddArc(s, arc); + return true; + } + + bool GreyArc(StateId s, const Arc &arc) { + ofst_->AddArc(s, arc); + return true; + } + + bool BlackArc(StateId s, const Arc &arc) { + ofst_->AddArc(s, arc); + return true; + } + + void FinishState(StateId s) { + ofst_->SetFinal(s, ifst_->Final(s)); + } + + void FinishVisit() {} + + private: + const Fst<Arc> *ifst_; + MutableFst<Arc> *ofst_; +}; + + +// Visits input FST up to a state limit following queue order. If +// 'access_only' is true, aborts on visiting first state not +// accessible from the initial state. +template <class A> +class PartialVisitor { + public: + typedef A Arc; + typedef typename A::StateId StateId; + + explicit PartialVisitor(StateId maxvisit, bool access_only = false) + : maxvisit_(maxvisit), + access_only_(access_only), + start_(kNoStateId) {} + + void InitVisit(const Fst<A> &ifst) { + nvisit_ = 0; + start_ = ifst.Start(); + } + + bool InitState(StateId s, StateId root) { + if (access_only_ && root != start_) + return false; + ++nvisit_; + return nvisit_ <= maxvisit_; + } + + bool WhiteArc(StateId s, const Arc &arc) { return true; } + bool GreyArc(StateId s, const Arc &arc) { return true; } + bool BlackArc(StateId s, const Arc &arc) { return true; } + void FinishState(StateId s) {} + void FinishVisit() {} + + private: + StateId maxvisit_; + bool access_only_; + StateId nvisit_; + StateId start_; + +}; + + +} // namespace fst + +#endif // FST_LIB_VISIT_H__ diff --git a/kaldi_io/src/tools/openfst/include/fst/weight.h b/kaldi_io/src/tools/openfst/include/fst/weight.h new file mode 100644 index 0000000..7eb4bb1 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/weight.h @@ -0,0 +1,179 @@ +// weight.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: [email protected] (Michael Riley) +// +// \file +// General weight set and associated semiring operation definitions. +// +// A semiring is specified by two binary operations Plus and Times and +// two designated elements Zero and One with the following properties: +// Plus: associative, commutative, and has Zero as its identity. +// Times: associative and has identity One, distributes w.r.t. Plus, and +// has Zero as an annihilator: +// Times(Zero(), a) == Times(a, Zero()) = Zero(). +// +// A left semiring distributes on the left; a right semiring is +// similarly defined. +// +// A Weight class must have binary functions =Plus= and =Times= and +// static member functions =Zero()= and =One()= and these must form +// (at least) a left or right semiring. +// +// In addition, the following should be defined for a Weight: +// Member: predicate on set membership. +// NoWeight: static member function that returns an element that is +// not a set member; used to signal an error. +// >>: reads textual representation of a weight. +// <<: prints textual representation of a weight. +// Read(istream &strm): reads binary representation of a weight. +// Write(ostream &strm): writes binary representation of a weight. +// Hash: maps weight to size_t. +// ApproxEqual: approximate equality (for inexact weights) +// Quantize: quantizes wrt delta (for inexact weights) +// Divide: for all a,b,c s.t. Times(a, b) == c +// --> b' = Divide(c, a, DIVIDE_LEFT) if a left semiring, b'.Member() +// and Times(a, b') == c +// --> a' = Divide(c, b, DIVIDE_RIGHT) if a right semiring, a'.Member() +// and Times(a', b) == c +// --> b' = Divide(c, a) = Divide(c, a, DIVIDE_ANY) = +// Divide(c, a, DIVIDE_LEFT) = Divide(c, a, DIVIDE_RIGHT) if a +// commutative semiring, b'.Member() and Times(a, b') = Times(b', a) = c +// ReverseWeight: the type of the corresponding reverse weight. +// Typically the same type as Weight for a (both left and right) semiring. +// For the left string semiring, it is the right string semiring. +// Reverse: a mapping from Weight to ReverseWeight s.t. +// --> Reverse(Reverse(a)) = a +// --> Reverse(Plus(a, b)) = Plus(Reverse(a), Reverse(b)) +// --> Reverse(Times(a, b)) = Times(Reverse(b), Reverse(a)) +// Typically the identity mapping in a (both left and right) semiring. +// In the left string semiring, it maps to the reverse string +// in the right string semiring. +// Properties: specifies additional properties that hold: +// LeftSemiring: indicates weights form a left semiring. +// RightSemiring: indicates weights form a right semiring. +// Commutative: for all a,b: Times(a,b) == Times(b,a) +// Idempotent: for all a: Plus(a, a) == a. +// Path: for all a, b: Plus(a, b) == a or Plus(a, b) == b. + + +#ifndef FST_LIB_WEIGHT_H__ +#define FST_LIB_WEIGHT_H__ + +#include <cmath> +#include <cctype> +#include <iostream> +#include <sstream> + +#include <fst/compat.h> + +#include <fst/util.h> + + +namespace fst { + +// +// CONSTANT DEFINITIONS +// + +// A representable float near .001 +const float kDelta = 1.0F/1024.0F; + +// For all a,b,c: Times(c, Plus(a,b)) = Plus(Times(c,a), Times(c, b)) +const uint64 kLeftSemiring = 0x0000000000000001ULL; + +// For all a,b,c: Times(Plus(a,b), c) = Plus(Times(a,c), Times(b, c)) +const uint64 kRightSemiring = 0x0000000000000002ULL; + +const uint64 kSemiring = kLeftSemiring | kRightSemiring; + +// For all a,b: Times(a,b) = Times(b,a) +const uint64 kCommutative = 0x0000000000000004ULL; + +// For all a: Plus(a, a) = a +const uint64 kIdempotent = 0x0000000000000008ULL; + +// For all a,b: Plus(a,b) = a or Plus(a,b) = b +const uint64 kPath = 0x0000000000000010ULL; + + +// Determines direction of division. +enum DivideType { DIVIDE_LEFT, // left division + DIVIDE_RIGHT, // right division + DIVIDE_ANY }; // division in a commutative semiring + +// NATURAL ORDER +// +// By definition: +// a <= b iff a + b = a +// The natural order is a negative partial order iff the semiring is +// idempotent. It is trivially monotonic for plus. It is left +// (resp. right) monotonic for times iff the semiring is left +// (resp. right) distributive. It is a total order iff the semiring +// has the path property. See Mohri, "Semiring Framework and +// Algorithms for Shortest-Distance Problems", Journal of Automata, +// Languages and Combinatorics 7(3):321-350, 2002. We define the +// strict version of this order below. + +template <class W> +class NaturalLess { + public: + typedef W Weight; + + NaturalLess() { + if (!(W::Properties() & kIdempotent)) { + FSTERROR() << "NaturalLess: Weight type is not idempotent: " + << W::Type(); + } + } + + bool operator()(const W &w1, const W &w2) const { + return (Plus(w1, w2) == w1) && w1 != w2; + } +}; + + +// Power is the iterated product for arbitrary semirings such that +// Power(w, 0) is One() for the semiring, and +// Power(w, n) = Times(Power(w, n-1), w) + +template <class W> +W Power(W w, size_t n) { + W result = W::One(); + for (size_t i = 0; i < n; ++i) { + result = Times(result, w); + } + return result; +} + +// General weight converter - raises error. +template <class W1, class W2> +struct WeightConvert { + W2 operator()(W1 w1) const { + FSTERROR() << "WeightConvert: can't convert weight from \"" + << W1::Type() << "\" to \"" << W2::Type(); + return W2::NoWeight(); + } +}; + +// Specialized weight converter to self. +template <class W> +struct WeightConvert<W, W> { + W operator()(W w) const { return w; } +}; + +} // namespace fst + +#endif // FST_LIB_WEIGHT_H__ |