From 96a32415ab43377cf1575bd3f4f2980f58028209 Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 14 Aug 2015 11:51:42 +0800 Subject: add implementation for kaldi io (by ymz) --- kaldi_io/src/tools/openfst/include/fst/util.h | 437 ++++++++++++++++++++++++++ 1 file changed, 437 insertions(+) create mode 100644 kaldi_io/src/tools/openfst/include/fst/util.h (limited to 'kaldi_io/src/tools/openfst/include/fst/util.h') diff --git a/kaldi_io/src/tools/openfst/include/fst/util.h b/kaldi_io/src/tools/openfst/include/fst/util.h new file mode 100644 index 0000000..57d7c4b --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/util.h @@ -0,0 +1,437 @@ +// util.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: riley@google.com (Michael Riley) +// +// \file +// FST utility inline definitions. + +#ifndef FST_LIB_UTIL_H__ +#define FST_LIB_UTIL_H__ + +#include +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include +using std::tr1::unordered_set; +using std::tr1::unordered_multiset; +#include +#include +#include +#include +#include +#include +using std::vector; + + +#include +#include + +#include +#include +#include + +// +// UTILITY FOR ERROR HANDLING +// + +DECLARE_bool(fst_error_fatal); + +#define FSTERROR() (FLAGS_fst_error_fatal ? LOG(FATAL) : LOG(ERROR)) + +namespace fst { + +// +// UTILITIES FOR TYPE I/O +// + +// Read some types from an input stream. + +// Generic case. +template +inline istream &ReadType(istream &strm, T *t) { + return t->Read(strm); +} + +// Fixed size, contiguous memory read. +#define READ_POD_TYPE(T) \ +inline istream &ReadType(istream &strm, T *t) { \ + return strm.read(reinterpret_cast(t), sizeof(T)); \ +} + +READ_POD_TYPE(bool); +READ_POD_TYPE(char); +READ_POD_TYPE(signed char); +READ_POD_TYPE(unsigned char); +READ_POD_TYPE(short); +READ_POD_TYPE(unsigned short); +READ_POD_TYPE(int); +READ_POD_TYPE(unsigned int); +READ_POD_TYPE(long); +READ_POD_TYPE(unsigned long); +READ_POD_TYPE(long long); +READ_POD_TYPE(unsigned long long); +READ_POD_TYPE(float); +READ_POD_TYPE(double); + +// String case. +inline istream &ReadType(istream &strm, string *s) { + s->clear(); + int32 ns = 0; + strm.read(reinterpret_cast(&ns), sizeof(ns)); + for (int i = 0; i < ns; ++i) { + char c; + strm.read(&c, 1); + *s += c; + } + return strm; +} + +// Pair case. +template +inline istream &ReadType(istream &strm, pair *p) { + ReadType(strm, &p->first); + ReadType(strm, &p->second); + return strm; +} + +template +inline istream &ReadType(istream &strm, pair *p) { + ReadType(strm, const_cast(&p->first)); + ReadType(strm, &p->second); + return strm; +} + +// General case - no-op. +template +void StlReserve(C *c, int64 n) {} + +// Specialization for vectors. +template +void StlReserve(vector *c, int64 n) { + c->reserve(n); +} + +// STL sequence container. +#define READ_STL_SEQ_TYPE(C) \ +template \ +inline istream &ReadType(istream &strm, C *c) { \ + c->clear(); \ + int64 n = 0; \ + strm.read(reinterpret_cast(&n), sizeof(n)); \ + StlReserve(c, n); \ + for (ssize_t i = 0; i < n; ++i) { \ + typename C::value_type value; \ + ReadType(strm, &value); \ + c->insert(c->end(), value); \ + } \ + return strm; \ +} + +READ_STL_SEQ_TYPE(vector); +READ_STL_SEQ_TYPE(list); + +// STL associative container. +#define READ_STL_ASSOC_TYPE(C) \ +template \ +inline istream &ReadType(istream &strm, C *c) { \ + c->clear(); \ + int64 n = 0; \ + strm.read(reinterpret_cast(&n), sizeof(n)); \ + for (ssize_t i = 0; i < n; ++i) { \ + typename C::value_type value; \ + ReadType(strm, &value); \ + c->insert(value); \ + } \ + return strm; \ +} + +READ_STL_ASSOC_TYPE(set); +READ_STL_ASSOC_TYPE(unordered_set); +READ_STL_ASSOC_TYPE(map); +READ_STL_ASSOC_TYPE(unordered_map); + +// Write some types to an output stream. + +// Generic case. +template +inline ostream &WriteType(ostream &strm, const T t) { + t.Write(strm); + return strm; +} + +// Fixed size, contiguous memory write. +#define WRITE_POD_TYPE(T) \ +inline ostream &WriteType(ostream &strm, const T t) { \ + return strm.write(reinterpret_cast(&t), sizeof(T)); \ +} + +WRITE_POD_TYPE(bool); +WRITE_POD_TYPE(char); +WRITE_POD_TYPE(signed char); +WRITE_POD_TYPE(unsigned char); +WRITE_POD_TYPE(short); +WRITE_POD_TYPE(unsigned short); +WRITE_POD_TYPE(int); +WRITE_POD_TYPE(unsigned int); +WRITE_POD_TYPE(long); +WRITE_POD_TYPE(unsigned long); +WRITE_POD_TYPE(long long); +WRITE_POD_TYPE(unsigned long long); +WRITE_POD_TYPE(float); +WRITE_POD_TYPE(double); + +// String case. +inline ostream &WriteType(ostream &strm, const string &s) { + int32 ns = s.size(); + strm.write(reinterpret_cast(&ns), sizeof(ns)); + return strm.write(s.data(), ns); +} + +// Pair case. +template +inline ostream &WriteType(ostream &strm, const pair &p) { + WriteType(strm, p.first); + WriteType(strm, p.second); + return strm; +} + +// STL sequence container. +#define WRITE_STL_SEQ_TYPE(C) \ +template \ +inline ostream &WriteType(ostream &strm, const C &c) { \ + int64 n = c.size(); \ + strm.write(reinterpret_cast(&n), sizeof(n)); \ + for (typename C::const_iterator it = c.begin(); \ + it != c.end(); ++it) \ + WriteType(strm, *it); \ + return strm; \ +} + +WRITE_STL_SEQ_TYPE(vector); +WRITE_STL_SEQ_TYPE(list); + +// STL associative container. +#define WRITE_STL_ASSOC_TYPE(C) \ +template \ +inline ostream &WriteType(ostream &strm, const C &c) { \ + int64 n = c.size(); \ + strm.write(reinterpret_cast(&n), sizeof(n)); \ + for (typename C::const_iterator it = c.begin(); \ + it != c.end(); ++it) \ + WriteType(strm, *it); \ + return strm; \ +} + +WRITE_STL_ASSOC_TYPE(set); +WRITE_STL_ASSOC_TYPE(unordered_set); +WRITE_STL_ASSOC_TYPE(map); +WRITE_STL_ASSOC_TYPE(unordered_map); + +// Utilities for converting between int64 or Weight and string. + +int64 StrToInt64(const string &s, const string &src, size_t nline, + bool allow_negative, bool *error = 0); + +template +Weight StrToWeight(const string &s, const string &src, size_t nline) { + Weight w; + istringstream strm(s); + strm >> w; + if (!strm) { + FSTERROR() << "StrToWeight: Bad weight = \"" << s + << "\", source = " << src << ", line = " << nline; + return Weight::NoWeight(); + } + return w; +} + +void Int64ToStr(int64 n, string *s); + +template +void WeightToStr(Weight w, string *s) { + ostringstream strm; + strm.precision(9); + strm << w; + s->append(strm.str().data(), strm.str().size()); +} + +// Utilities for reading/writing label pairs + +// Returns true on success +template +bool ReadLabelPairs(const string& filename, + vector >* pairs, + bool allow_negative = false) { + ifstream strm(filename.c_str()); + + if (!strm) { + LOG(ERROR) << "ReadLabelPairs: Can't open file: " << filename; + return false; + } + + const int kLineLen = 8096; + char line[kLineLen]; + size_t nline = 0; + + pairs->clear(); + while (strm.getline(line, kLineLen)) { + ++nline; + vector col; + SplitToVector(line, "\n\t ", &col, true); + if (col.size() == 0 || col[0][0] == '\0') // empty line + continue; + if (col.size() != 2) { + LOG(ERROR) << "ReadLabelPairs: Bad number of columns, " + << "file = " << filename << ", line = " << nline; + return false; + } + + bool err; + Label frmlabel = StrToInt64(col[0], filename, nline, allow_negative, &err); + if (err) return false; + Label tolabel = StrToInt64(col[1], filename, nline, allow_negative, &err); + if (err) return false; + pairs->push_back(make_pair(frmlabel, tolabel)); + } + return true; +} + +// Returns true on success +template +bool WriteLabelPairs(const string& filename, + const vector >& pairs) { + ostream *strm = &cout; + if (!filename.empty()) { + strm = new ofstream(filename.c_str()); + if (!*strm) { + LOG(ERROR) << "WriteLabelPairs: Can't open file: " << filename; + return false; + } + } + + for (ssize_t n = 0; n < pairs.size(); ++n) + *strm << pairs[n].first << "\t" << pairs[n].second << "\n"; + + if (!*strm) { + LOG(ERROR) << "WriteLabelPairs: Write failed: " + << (filename.empty() ? "standard output" : filename); + return false; + } + if (strm != &cout) + delete strm; + return true; +} + +// Utilities for converting a type name to a legal C symbol. + +void ConvertToLegalCSymbol(string *s); + + +// +// UTILITIES FOR STREAM I/O +// + +bool AlignInput(istream &strm); +bool AlignOutput(ostream &strm); + +// +// UTILITIES FOR PROTOCOL BUFFER I/O +// + + +// An associative container for which testing membership is +// faster than an STL set if members are restricted to an interval +// that excludes most non-members. A 'Key' must have ==, !=, and < defined. +// Element 'NoKey' should be a key that marks an uninitialized key and +// is otherwise unused. 'Find()' returns an STL const_iterator to the match +// found, otherwise it equals 'End()'. +template +class CompactSet { +public: + typedef typename set::const_iterator const_iterator; + + CompactSet() + : min_key_(NoKey), + max_key_(NoKey) { } + + CompactSet(const CompactSet &compact_set) + : set_(compact_set.set_), + min_key_(compact_set.min_key_), + max_key_(compact_set.max_key_) { } + + void Insert(Key key) { + set_.insert(key); + if (min_key_ == NoKey || key < min_key_) + min_key_ = key; + if (max_key_ == NoKey || max_key_ < key) + max_key_ = key; + } + + void Erase(Key key) { + set_.erase(key); + if (set_.empty()) { + min_key_ = max_key_ = NoKey; + } else if (key == min_key_) { + ++min_key_; + } else if (key == max_key_) { + --max_key_; + } + } + + void Clear() { + set_.clear(); + min_key_ = max_key_ = NoKey; + } + + const_iterator Find(Key key) const { + if (min_key_ == NoKey || + key < min_key_ || max_key_ < key) + return set_.end(); + else + return set_.find(key); + } + + bool Member(Key key) const { + if (min_key_ == NoKey || key < min_key_ || max_key_ < key) { + return false; // out of range + } else if (min_key_ != NoKey && max_key_ + 1 == min_key_ + set_.size()) { + return true; // dense range + } else { + return set_.find(key) != set_.end(); + } + } + + const_iterator Begin() const { return set_.begin(); } + + const_iterator End() const { return set_.end(); } + + // All stored keys are greater than or equal to this value. + Key LowerBound() const { return min_key_; } + + // All stored keys are less than or equal to this value. + Key UpperBound() const { return max_key_; } + +private: + set set_; + Key min_key_; + Key max_key_; + + void operator=(const CompactSet &); //disallow +}; + +} // namespace fst + +#endif // FST_LIB_UTIL_H__ -- cgit v1.2.3-70-g09d2