From 96a32415ab43377cf1575bd3f4f2980f58028209 Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 14 Aug 2015 11:51:42 +0800 Subject: add implementation for kaldi io (by ymz) --- .../openfst/include/fst/sparse-tuple-weight.h | 640 +++++++++++++++++++++ 1 file changed, 640 insertions(+) create mode 100644 kaldi_io/src/tools/openfst/include/fst/sparse-tuple-weight.h (limited to 'kaldi_io/src/tools/openfst/include/fst/sparse-tuple-weight.h') diff --git a/kaldi_io/src/tools/openfst/include/fst/sparse-tuple-weight.h b/kaldi_io/src/tools/openfst/include/fst/sparse-tuple-weight.h new file mode 100644 index 0000000..c12ef4f --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/sparse-tuple-weight.h @@ -0,0 +1,640 @@ +// sparse-tuple-weight.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: krr@google.com (Kasturi Rangan Raghavan) +// Inspiration: allauzen@google.com (Cyril Allauzen) +// \file +// Sparse version of tuple-weight, based on tuple-weight.h +// Internally stores sparse key, value pairs in linked list +// Default value elemnt is the assumed value of unset keys +// Internal singleton implementation that stores first key, +// value pair as a initialized member variable to avoide +// unnecessary allocation on heap. +// Use SparseTupleWeightIterator to iterate through the key,value pairs +// Note: this does NOT iterate through the default value. +// +// Sparse tuple weight set operation definitions. + +#ifndef FST_LIB_SPARSE_TUPLE_WEIGHT_H__ +#define FST_LIB_SPARSE_TUPLE_WEIGHT_H__ + +#include +#include +#include +#include +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; + +#include + + +DECLARE_string(fst_weight_parentheses); +DECLARE_string(fst_weight_separator); + +namespace fst { + +template class SparseTupleWeight; + +template +class SparseTupleWeightIterator; + +template +istream &operator>>(istream &strm, SparseTupleWeight &w); + +// Arbitrary dimension tuple weight, stored as a sorted linked-list +// W is any weight class, +// K is the key value type. kNoKey(-1) is reserved for internal use +template +class SparseTupleWeight { + public: + typedef pair Pair; + typedef SparseTupleWeight ReverseWeight; + + const static K kNoKey = -1; + SparseTupleWeight() { + Init(); + } + + template + SparseTupleWeight(Iterator begin, Iterator end) { + Init(); + // Assumes input iterator is sorted + for (Iterator it = begin; it != end; ++it) + Push(*it); + } + + + SparseTupleWeight(const K& key, const W &w) { + Init(); + Push(key, w); + } + + SparseTupleWeight(const W &w) { + Init(w); + } + + SparseTupleWeight(const SparseTupleWeight &w) { + Init(w.DefaultValue()); + SetDefaultValue(w.DefaultValue()); + for (SparseTupleWeightIterator it(w); !it.Done(); it.Next()) { + Push(it.Value()); + } + } + + static const SparseTupleWeight &Zero() { + static SparseTupleWeight zero; + return zero; + } + + static const SparseTupleWeight &One() { + static SparseTupleWeight one(W::One()); + return one; + } + + static const SparseTupleWeight &NoWeight() { + static SparseTupleWeight no_weight(W::NoWeight()); + return no_weight; + } + + istream &Read(istream &strm) { + ReadType(strm, &default_); + ReadType(strm, &first_); + return ReadType(strm, &rest_); + } + + ostream &Write(ostream &strm) const { + WriteType(strm, default_); + WriteType(strm, first_); + return WriteType(strm, rest_); + } + + SparseTupleWeight &operator=(const SparseTupleWeight &w) { + if (this == &w) return *this; // check for w = w + Init(w.DefaultValue()); + for (SparseTupleWeightIterator it(w); !it.Done(); it.Next()) { + Push(it.Value()); + } + return *this; + } + + bool Member() const { + if (!DefaultValue().Member()) return false; + for (SparseTupleWeightIterator it(*this); !it.Done(); it.Next()) { + if (!it.Value().second.Member()) return false; + } + return true; + } + + // Assumes H() function exists for the hash of the key value + size_t Hash() const { + uint64 h = 0; + std::tr1::hash H; + for (SparseTupleWeightIterator it(*this); !it.Done(); it.Next()) { + h = 5 * h + H(it.Value().first); + h = 13 * h + it.Value().second.Hash(); + } + return size_t(h); + } + + SparseTupleWeight Quantize(float delta = kDelta) const { + SparseTupleWeight w; + for (SparseTupleWeightIterator it(*this); !it.Done(); it.Next()) { + w.Push(it.Value().first, it.Value().second.Quantize(delta)); + } + return w; + } + + ReverseWeight Reverse() const { + SparseTupleWeight w; + for (SparseTupleWeightIterator it(*this); !it.Done(); it.Next()) { + w.Push(it.Value().first, it.Value().second.Reverse()); + } + return w; + } + + // Common initializer among constructors. + void Init() { + Init(W::Zero()); + } + + void Init(const W& default_value) { + first_.first = kNoKey; + /* initialized to the reserved key value */ + default_ = default_value; + rest_.clear(); + } + + size_t Size() const { + if (first_.first == kNoKey) + return 0; + else + return rest_.size() + 1; + } + + inline void Push(const K &k, const W &w, bool default_value_check = true) { + Push(make_pair(k, w), default_value_check); + } + + inline void Push(const Pair &p, bool default_value_check = true) { + if (default_value_check && p.second == default_) return; + if (first_.first == kNoKey) { + first_ = p; + } else { + rest_.push_back(p); + } + } + + void SetDefaultValue(const W& val) { default_ = val; } + + const W& DefaultValue() const { return default_; } + + protected: + static istream& ReadNoParen( + istream&, SparseTupleWeight&, char separator); + + static istream& ReadWithParen( + istream&, SparseTupleWeight&, + char separator, char open_paren, char close_paren); + + private: + // Assumed default value of uninitialized keys, by default W::Zero() + W default_; + + // Key values pairs are first stored in first_, then fill rest_ + // this way we can avoid dynamic allocation in the common case + // where the weight is a single key,val pair. + Pair first_; + list rest_; + + friend istream &operator>>(istream&, SparseTupleWeight&); + friend class SparseTupleWeightIterator; +}; + +template +class SparseTupleWeightIterator { + public: + typedef typename SparseTupleWeight::Pair Pair; + typedef typename list::const_iterator const_iterator; + typedef typename list::iterator iterator; + + explicit SparseTupleWeightIterator(const SparseTupleWeight& w) + : first_(w.first_), rest_(w.rest_), init_(true), + iter_(rest_.begin()) {} + + bool Done() const { + if (init_) + return first_.first == SparseTupleWeight::kNoKey; + else + return iter_ == rest_.end(); + } + + const Pair& Value() const { return init_ ? first_ : *iter_; } + + void Next() { + if (init_) + init_ = false; + else + ++iter_; + } + + void Reset() { + init_ = true; + iter_ = rest_.begin(); + } + + private: + const Pair &first_; + const list & rest_; + bool init_; // in the initialized state? + typename list::const_iterator iter_; + + DISALLOW_COPY_AND_ASSIGN(SparseTupleWeightIterator); +}; + +template +inline void SparseTupleWeightMap( + SparseTupleWeight* ret, + const SparseTupleWeight& w1, + const SparseTupleWeight& w2, + const M& operator_mapper) { + SparseTupleWeightIterator w1_it(w1); + SparseTupleWeightIterator w2_it(w2); + const W& v1_def = w1.DefaultValue(); + const W& v2_def = w2.DefaultValue(); + ret->SetDefaultValue(operator_mapper.Map(0, v1_def, v2_def)); + while (!w1_it.Done() || !w2_it.Done()) { + const K& k1 = (w1_it.Done()) ? w2_it.Value().first : w1_it.Value().first; + const K& k2 = (w2_it.Done()) ? w1_it.Value().first : w2_it.Value().first; + const W& v1 = (w1_it.Done()) ? v1_def : w1_it.Value().second; + const W& v2 = (w2_it.Done()) ? v2_def : w2_it.Value().second; + if (k1 == k2) { + ret->Push(k1, operator_mapper.Map(k1, v1, v2)); + if (!w1_it.Done()) w1_it.Next(); + if (!w2_it.Done()) w2_it.Next(); + } else if (k1 < k2) { + ret->Push(k1, operator_mapper.Map(k1, v1, v2_def)); + w1_it.Next(); + } else { + ret->Push(k2, operator_mapper.Map(k2, v1_def, v2)); + w2_it.Next(); + } + } +} + +template +inline bool operator==(const SparseTupleWeight &w1, + const SparseTupleWeight &w2) { + const W& v1_def = w1.DefaultValue(); + const W& v2_def = w2.DefaultValue(); + if (v1_def != v2_def) return false; + + SparseTupleWeightIterator w1_it(w1); + SparseTupleWeightIterator w2_it(w2); + while (!w1_it.Done() || !w2_it.Done()) { + const K& k1 = (w1_it.Done()) ? w2_it.Value().first : w1_it.Value().first; + const K& k2 = (w2_it.Done()) ? w1_it.Value().first : w2_it.Value().first; + const W& v1 = (w1_it.Done()) ? v1_def : w1_it.Value().second; + const W& v2 = (w2_it.Done()) ? v2_def : w2_it.Value().second; + if (k1 == k2) { + if (v1 != v2) return false; + if (!w1_it.Done()) w1_it.Next(); + if (!w2_it.Done()) w2_it.Next(); + } else if (k1 < k2) { + if (v1 != v2_def) return false; + w1_it.Next(); + } else { + if (v1_def != v2) return false; + w2_it.Next(); + } + } + return true; +} + +template +inline bool operator!=(const SparseTupleWeight &w1, + const SparseTupleWeight &w2) { + return !(w1 == w2); +} + +template +inline ostream &operator<<(ostream &strm, const SparseTupleWeight &w) { + if(FLAGS_fst_weight_separator.size() != 1) { + FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1"; + strm.clear(std::ios::badbit); + return strm; + } + char separator = FLAGS_fst_weight_separator[0]; + bool write_parens = false; + if (!FLAGS_fst_weight_parentheses.empty()) { + if (FLAGS_fst_weight_parentheses.size() != 2) { + FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2"; + strm.clear(std::ios::badbit); + return strm; + } + write_parens = true; + } + + if (write_parens) + strm << FLAGS_fst_weight_parentheses[0]; + + strm << w.DefaultValue(); + strm << separator; + + size_t n = w.Size(); + strm << n; + strm << separator; + + for (SparseTupleWeightIterator it(w); !it.Done(); it.Next()) { + strm << it.Value().first; + strm << separator; + strm << it.Value().second; + strm << separator; + } + + if (write_parens) + strm << FLAGS_fst_weight_parentheses[1]; + + return strm; +} + +template +inline istream &operator>>(istream &strm, SparseTupleWeight &w) { + if(FLAGS_fst_weight_separator.size() != 1) { + FSTERROR() << "FLAGS_fst_weight_separator.size() is not equal to 1"; + strm.clear(std::ios::badbit); + return strm; + } + char separator = FLAGS_fst_weight_separator[0]; + + if (!FLAGS_fst_weight_parentheses.empty()) { + if (FLAGS_fst_weight_parentheses.size() != 2) { + FSTERROR() << "FLAGS_fst_weight_parentheses.size() is not equal to 2"; + strm.clear(std::ios::badbit); + return strm; + } + return SparseTupleWeight::ReadWithParen( + strm, w, separator, FLAGS_fst_weight_parentheses[0], + FLAGS_fst_weight_parentheses[1]); + } else { + return SparseTupleWeight::ReadNoParen(strm, w, separator); + } +} + +// Reads SparseTupleWeight when there are no parentheses around tuple terms +template +inline istream& SparseTupleWeight::ReadNoParen( + istream &strm, + SparseTupleWeight &w, + char separator) { + int c; + size_t n; + + do { + c = strm.get(); + } while (isspace(c)); + + + { // Read default weight + W default_value; + string s; + while (c != separator) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + c = strm.get(); + } + istringstream sstrm(s); + sstrm >> default_value; + w.SetDefaultValue(default_value); + } + + c = strm.get(); + + { // Read n + string s; + while (c != separator) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + c = strm.get(); + } + istringstream sstrm(s); + sstrm >> n; + } + + // Read n elements + for (size_t i = 0; i < n; ++i) { + // discard separator + c = strm.get(); + K p; + W r; + + { // read key + string s; + while (c != separator) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + c = strm.get(); + } + istringstream sstrm(s); + sstrm >> p; + } + + c = strm.get(); + + { // read weight + string s; + while (c != separator) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + c = strm.get(); + } + istringstream sstrm(s); + sstrm >> r; + } + + w.Push(p, r); + } + + c = strm.get(); + if (c != separator) { + strm.clear(std::ios::badbit); + } + + return strm; +} + +// Reads SparseTupleWeight when there are parentheses around tuple terms +template +inline istream& SparseTupleWeight::ReadWithParen( + istream &strm, + SparseTupleWeight &w, + char separator, + char open_paren, + char close_paren) { + int c; + size_t n; + + do { + c = strm.get(); + } while (isspace(c)); + + if (c != open_paren) { + FSTERROR() << "is fst_weight_parentheses flag set correcty? "; + strm.clear(std::ios::badbit); + return strm; + } + + c = strm.get(); + + { // Read weight + W default_value; + stack parens; + string s; + while (c != separator || !parens.empty()) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + // If parens encountered before separator, they must be matched + if (c == open_paren) { + parens.push(1); + } else if (c == close_paren) { + // Fail for mismatched parens + if (parens.empty()) { + strm.clear(std::ios::failbit); + return strm; + } + parens.pop(); + } + c = strm.get(); + } + istringstream sstrm(s); + sstrm >> default_value; + w.SetDefaultValue(default_value); + } + + c = strm.get(); + + { // Read n + string s; + while (c != separator) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + c = strm.get(); + } + istringstream sstrm(s); + sstrm >> n; + } + + // Read n elements + for (size_t i = 0; i < n; ++i) { + // discard separator + c = strm.get(); + K p; + W r; + + { // Read key + stack parens; + string s; + while (c != separator || !parens.empty()) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + // If parens encountered before separator, they must be matched + if (c == open_paren) { + parens.push(1); + } else if (c == close_paren) { + // Fail for mismatched parens + if (parens.empty()) { + strm.clear(std::ios::failbit); + return strm; + } + parens.pop(); + } + c = strm.get(); + } + istringstream sstrm(s); + sstrm >> p; + } + + c = strm.get(); + + { // Read weight + stack parens; + string s; + while (c != separator || !parens.empty()) { + if (c == EOF) { + strm.clear(std::ios::badbit); + return strm; + } + s += c; + // If parens encountered before separator, they must be matched + if (c == open_paren) { + parens.push(1); + } else if (c == close_paren) { + // Fail for mismatched parens + if (parens.empty()) { + strm.clear(std::ios::failbit); + return strm; + } + parens.pop(); + } + c = strm.get(); + } + istringstream sstrm(s); + sstrm >> r; + } + + w.Push(p, r); + } + + if (c != separator) { + FSTERROR() << " separator expected, not found! "; + strm.clear(std::ios::badbit); + return strm; + } + + c = strm.get(); + if (c != close_paren) { + FSTERROR() << " is fst_weight_parentheses flag set correcty? "; + strm.clear(std::ios::badbit); + return strm; + } + + return strm; +} + + + +} // namespace fst + +#endif // FST_LIB_SPARSE_TUPLE_WEIGHT_H__ -- cgit v1.2.3-70-g09d2