From 96a32415ab43377cf1575bd3f4f2980f58028209 Mon Sep 17 00:00:00 2001 From: Determinant Date: Fri, 14 Aug 2015 11:51:42 +0800 Subject: add implementation for kaldi io (by ymz) --- .../openfst/include/fst/script/compile-impl.h | 216 +++++++++++++++++++++ 1 file changed, 216 insertions(+) create mode 100644 kaldi_io/src/tools/openfst/include/fst/script/compile-impl.h (limited to 'kaldi_io/src/tools/openfst/include/fst/script/compile-impl.h') diff --git a/kaldi_io/src/tools/openfst/include/fst/script/compile-impl.h b/kaldi_io/src/tools/openfst/include/fst/script/compile-impl.h new file mode 100644 index 0000000..68f37c3 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/compile-impl.h @@ -0,0 +1,216 @@ +// compile.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: riley@google.com (Michael Riley) +// +// \file +// Class to to compile a binary Fst from textual input. + +#ifndef FST_SCRIPT_COMPILE_IMPL_H_ +#define FST_SCRIPT_COMPILE_IMPL_H_ + +#include +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include +#include +#include +using std::vector; + +#include +#include +#include +#include +#include +#include + +DECLARE_string(fst_field_separator); + +namespace fst { + +// Compile a binary Fst from textual input, helper class for fstcompile.cc +// WARNING: Stand-alone use of this class not recommended, most code should +// read/write using the binary format which is much more efficient. +template class FstCompiler { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + + // WARNING: use of 'allow_negative_labels = true' not recommended; may + // cause conflicts + FstCompiler(istream &istrm, const string &source, + const SymbolTable *isyms, const SymbolTable *osyms, + const SymbolTable *ssyms, bool accep, bool ikeep, + bool okeep, bool nkeep, bool allow_negative_labels = false) + : nline_(0), source_(source), + isyms_(isyms), osyms_(osyms), ssyms_(ssyms), + nstates_(0), keep_state_numbering_(nkeep), + allow_negative_labels_(allow_negative_labels) { + char line[kLineLen]; + while (istrm.getline(line, kLineLen)) { + ++nline_; + vector col; + string separator = FLAGS_fst_field_separator + "\n"; + SplitToVector(line, separator.c_str(), &col, true); + if (col.size() == 0 || col[0][0] == '\0') // empty line + continue; + if (col.size() > 5 || + (col.size() > 4 && accep) || + (col.size() == 3 && !accep)) { + FSTERROR() << "FstCompiler: Bad number of columns, source = " + << source_ + << ", line = " << nline_; + fst_.SetProperties(kError, kError); + return; + } + StateId s = StrToStateId(col[0]); + while (s >= fst_.NumStates()) + fst_.AddState(); + if (nline_ == 1) + fst_.SetStart(s); + + Arc arc; + StateId d = s; + switch (col.size()) { + case 1: + fst_.SetFinal(s, Weight::One()); + break; + case 2: + fst_.SetFinal(s, StrToWeight(col[1], true)); + break; + case 3: + arc.nextstate = d = StrToStateId(col[1]); + arc.ilabel = StrToILabel(col[2]); + arc.olabel = arc.ilabel; + arc.weight = Weight::One(); + fst_.AddArc(s, arc); + break; + case 4: + arc.nextstate = d = StrToStateId(col[1]); + arc.ilabel = StrToILabel(col[2]); + if (accep) { + arc.olabel = arc.ilabel; + arc.weight = StrToWeight(col[3], false); + } else { + arc.olabel = StrToOLabel(col[3]); + arc.weight = Weight::One(); + } + fst_.AddArc(s, arc); + break; + case 5: + arc.nextstate = d = StrToStateId(col[1]); + arc.ilabel = StrToILabel(col[2]); + arc.olabel = StrToOLabel(col[3]); + arc.weight = StrToWeight(col[4], false); + fst_.AddArc(s, arc); + } + while (d >= fst_.NumStates()) + fst_.AddState(); + } + if (ikeep) + fst_.SetInputSymbols(isyms); + if (okeep) + fst_.SetOutputSymbols(osyms); + } + + const VectorFst &Fst() const { + return fst_; + } + + private: + // Maximum line length in text file. + static const int kLineLen = 8096; + + int64 StrToId(const char *s, const SymbolTable *syms, + const char *name, bool allow_negative = false) const { + int64 n = 0; + + if (syms) { + n = syms->Find(s); + if (n == -1 || (!allow_negative && n < 0)) { + FSTERROR() << "FstCompiler: Symbol \"" << s + << "\" is not mapped to any integer " << name + << ", symbol table = " << syms->Name() + << ", source = " << source_ << ", line = " << nline_; + fst_.SetProperties(kError, kError); + } + } else { + char *p; + n = strtoll(s, &p, 10); + if (p < s + strlen(s) || (!allow_negative && n < 0)) { + FSTERROR() << "FstCompiler: Bad " << name << " integer = \"" << s + << "\", source = " << source_ << ", line = " << nline_; + fst_.SetProperties(kError, kError); + } + } + return n; + } + + StateId StrToStateId(const char *s) { + StateId n = StrToId(s, ssyms_, "state ID"); + + if (keep_state_numbering_) + return n; + + // remap state IDs to make dense set + typename unordered_map::const_iterator it = states_.find(n); + if (it == states_.end()) { + states_[n] = nstates_; + return nstates_++; + } else { + return it->second; + } + } + + StateId StrToILabel(const char *s) const { + return StrToId(s, isyms_, "arc ilabel", allow_negative_labels_); + } + + StateId StrToOLabel(const char *s) const { + return StrToId(s, osyms_, "arc olabel", allow_negative_labels_); + } + + Weight StrToWeight(const char *s, bool allow_zero) const { + Weight w; + istringstream strm(s); + strm >> w; + if (!strm || (!allow_zero && w == Weight::Zero())) { + FSTERROR() << "FstCompiler: Bad weight = \"" << s + << "\", source = " << source_ << ", line = " << nline_; + fst_.SetProperties(kError, kError); + w = Weight::NoWeight(); + } + return w; + } + + mutable VectorFst fst_; + size_t nline_; + string source_; // text FST source name + const SymbolTable *isyms_; // ilabel symbol table + const SymbolTable *osyms_; // olabel symbol table + const SymbolTable *ssyms_; // slabel symbol table + unordered_map states_; // state ID map + StateId nstates_; // number of seen states + bool keep_state_numbering_; + bool allow_negative_labels_; // not recommended; may cause conflicts + + DISALLOW_COPY_AND_ASSIGN(FstCompiler); +}; + +} // namespace fst + +#endif // FST_SCRIPT_COMPILE_IMPL_H_ -- cgit v1.2.3