diff options
Diffstat (limited to 'kaldi_io/src/tools/openfst/include/fst/script')
51 files changed, 5236 insertions, 0 deletions
diff --git a/kaldi_io/src/tools/openfst/include/fst/script/arcsort.h b/kaldi_io/src/tools/openfst/include/fst/script/arcsort.h new file mode 100644 index 0000000..4277332 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/arcsort.h @@ -0,0 +1,49 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: jpr@google.com (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_ARCSORT_H_ +#define FST_SCRIPT_ARCSORT_H_ + +#include <fst/arcsort.h> +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> + +namespace fst { +namespace script { + +enum ArcSortType { ILABEL_COMPARE, OLABEL_COMPARE }; + +typedef args::Package<MutableFstClass*, const ArcSortType> ArcSortArgs; + +template<class Arc> +void ArcSort(ArcSortArgs *args) { + MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>(); + + if (args->arg2 == ILABEL_COMPARE) { + ILabelCompare<Arc> icomp; + ArcSort(fst, icomp); + } else { // OLABEL_COMPARE + OLabelCompare<Arc> ocomp; + ArcSort(fst, ocomp); + } +} + +void ArcSort(MutableFstClass *ofst, ArcSortType sort_type); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_ARCSORT_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/arg-packs.h b/kaldi_io/src/tools/openfst/include/fst/script/arg-packs.h new file mode 100644 index 0000000..8ebf8d8 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/arg-packs.h @@ -0,0 +1,240 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: jpr@google.com (Jake Ratkiewicz) + +// Convenience templates for defining arg packs for the FstClass operations. + +// See operation-templates.h for a discussion about why these are needed; the +// short story is that all FstClass operations must be implemented by a version +// that takes one argument, most likely a struct bundling all the +// logical arguments together. These template structs provide convenient ways +// to specify these bundles (e.g. by means of appropriate typedefs). + +// The ArgPack template is sufficient for bundling together all the args for +// a particular function. The function is assumed to be void-returning. If +// you want a space for a return value, use the WithReturnValue template +// as follows: + +// WithReturnValue<bool, ArgPack<...> > + +#ifndef FST_SCRIPT_ARG_PACKS_H_ +#define FST_SCRIPT_ARG_PACKS_H_ + +namespace fst { +namespace script { +namespace args { + +// Sentinel value that means "no arg here." +class none_type { }; + +// Base arg pack template class. Specializations follow that allow +// fewer numbers of arguments (down to 2). If the maximum number of arguments +// increases, you will need to change three things: +// 1) Add more template parameters to this template +// 2) Add more specializations to allow fewer numbers of parameters than +// the new max. +// 3) Add extra none_types to all existing specializations to fill +// the new slots. + + +// 9 args (max) +template<class T1, + class T2 = none_type, + class T3 = none_type, + class T4 = none_type, + class T5 = none_type, + class T6 = none_type, + class T7 = none_type, + class T8 = none_type, + class T9 = none_type> +struct Package { + T1 arg1; + T2 arg2; + T3 arg3; + T4 arg4; + T5 arg5; + T6 arg6; + T7 arg7; + T8 arg8; + T9 arg9; + + Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, + T7 arg7, T8 arg8, T9 arg9) : + arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5), + arg6(arg6), arg7(arg7), arg8(arg8), arg9(arg9) { } +}; + +// 8 args +template<class T1, + class T2, + class T3, + class T4, + class T5, + class T6, + class T7, + class T8> +struct Package<T1, T2, T3, T4, T5, T6, T7, T8, none_type> { + T1 arg1; + T2 arg2; + T3 arg3; + T4 arg4; + T5 arg5; + T6 arg6; + T7 arg7; + T8 arg8; + + Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, + T7 arg7, T8 arg8) : + arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5), + arg6(arg6), arg7(arg7), arg8(arg8) { } +}; + +// 7 args +template<class T1, + class T2, + class T3, + class T4, + class T5, + class T6, + class T7> +struct Package<T1, T2, T3, T4, T5, T6, T7, + none_type, none_type> { + T1 arg1; + T2 arg2; + T3 arg3; + T4 arg4; + T5 arg5; + T6 arg6; + T7 arg7; + + Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, + T7 arg7) : + arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5), + arg6(arg6), arg7(arg7) { } +}; + +// 6 args +template<class T1, + class T2, + class T3, + class T4, + class T5, + class T6> +struct Package<T1, T2, T3, T4, T5, T6, none_type, + none_type, none_type> { + T1 arg1; + T2 arg2; + T3 arg3; + T4 arg4; + T5 arg5; + T6 arg6; + + Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6) : + arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5), + arg6(arg6) { } +}; + +// 5 args +template<class T1, + class T2, + class T3, + class T4, + class T5> +struct Package<T1, T2, T3, T4, T5, none_type, none_type, + none_type, none_type> { + T1 arg1; + T2 arg2; + T3 arg3; + T4 arg4; + T5 arg5; + + Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5) : + arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4), arg5(arg5) { } +}; + +// 4 args +template<class T1, + class T2, + class T3, + class T4> +struct Package<T1, T2, T3, T4, none_type, none_type, + none_type, none_type, none_type> { + T1 arg1; + T2 arg2; + T3 arg3; + T4 arg4; + + Package(T1 arg1, T2 arg2, T3 arg3, T4 arg4) : + arg1(arg1), arg2(arg2), arg3(arg3), arg4(arg4) { } +}; + +// 3 args +template<class T1, + class T2, + class T3> +struct Package<T1, T2, T3, none_type, none_type, + none_type, none_type, none_type, + none_type> { + T1 arg1; + T2 arg2; + T3 arg3; + + Package(T1 arg1, T2 arg2, T3 arg3) : + arg1(arg1), arg2(arg2), arg3(arg3) { } +}; + +// 2 args (minimum) +template<class T1, + class T2> +struct Package<T1, T2, none_type, none_type, + none_type, none_type, none_type, + none_type, none_type> { + T1 arg1; + T2 arg2; + + Package(T1 arg1, T2 arg2) : + arg1(arg1), arg2(arg2) { } +}; + +// Tack this on to an existing arg pack to add a return value. +// The syntax for accessing the args is then slightly more stilted, +// as you must do an extra member access (since the args are stored +// as a member of this class). +// The alternative is to declare another slew of templates for functions +// that return a value, analogous to the above. + +template<class Retval, class ArgPackage> +struct WithReturnValue { + Retval retval; + const ArgPackage &args; + + explicit WithReturnValue(const ArgPackage &args) : args(args) { } +}; + +// We don't want to store a reference to a reference, if ArgPackage is +// already some reference type. +template<class Retval, class ArgPackage> +struct WithReturnValue<Retval, ArgPackage&> { + Retval retval; + const ArgPackage &args; + + explicit WithReturnValue(const ArgPackage &args) : args(args) { } +}; + +} // namespace args +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_ARG_PACKS_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/closure.h b/kaldi_io/src/tools/openfst/include/fst/script/closure.h new file mode 100644 index 0000000..93b5ec3 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/closure.h @@ -0,0 +1,41 @@ + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: jpr@google.com (Jake Ratkiewicz) + +#ifndef FST_SCRIPT_CLOSURE_H_ +#define FST_SCRIPT_CLOSURE_H_ + +#include <fst/script/arg-packs.h> +#include <fst/script/fst-class.h> +#include <fst/closure.h> + +namespace fst { +namespace script { + +typedef args::Package<MutableFstClass*, const ClosureType> ClosureArgs; + +template<class Arc> +void Closure(ClosureArgs *args) { + MutableFst<Arc> *fst = args->arg1->GetMutableFst<Arc>(); + + Closure(fst, args->arg2); +} + +void Closure(MutableFstClass *ofst, ClosureType closure_type); + +} // namespace script +} // namespace fst + +#endif // FST_SCRIPT_CLOSURE_H_ diff --git a/kaldi_io/src/tools/openfst/include/fst/script/compile-impl.h b/kaldi_io/src/tools/openfst/include/fst/script/compile-impl.h new file mode 100644 index 0000000..68f37c3 --- /dev/null +++ b/kaldi_io/src/tools/openfst/include/fst/script/compile-impl.h @@ -0,0 +1,216 @@ +// compile.h + +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Copyright 2005-2010 Google, Inc. +// Author: riley@google.com (Michael Riley) +// +// \file +// Class to to compile a binary Fst from textual input. + +#ifndef FST_SCRIPT_COMPILE_IMPL_H_ +#define FST_SCRIPT_COMPILE_IMPL_H_ + +#include <tr1/unordered_map> +using std::tr1::unordered_map; +using std::tr1::unordered_multimap; +#include <sstream> +#include <string> +#include <vector> +using std::vector; + +#include <iostream> +#include <fstream> +#include <sstream> +#include <fst/fst.h> +#include <fst/util.h> +#include <fst/vector-fst.h> + +DECLARE_string(fst_field_separator); + +namespace fst { + +// Compile a binary Fst from textual input, helper class for fstcompile.cc +// WARNING: Stand-alone use of this class not recommended, most code should +// read/write using the binary format which is much more efficient. +template <class A> class FstCompiler { + public: + typedef A Arc; + typedef typename A::StateId StateId; + typedef typename A::Label Label; + typedef typename A::Weight Weight; + + // WARNING: use of 'allow_negative_labels = true' not recommended; may + // cause conflicts + FstCompiler(istream &istrm, const string &source, + const SymbolTable *isyms, const SymbolTable *osyms, + const SymbolTable *ssyms, bool accep, bool ikeep, + bool okeep, bool nkeep, bool allow_negative_labels = false) + : nline_(0), source_(source), + isyms_(isyms), osyms_(osyms), ssyms_(ssyms), + nstates_(0), keep_state_numbering_(nkeep), + allow_negative_labels_(allow_negative_labels) { + char line[kLineLen]; + while (istrm.getline(line, kLineLen)) { + ++nline_; + vector<char *> col; + string separator = FLAGS_fst_field_separator + "\n"; + SplitToVector(line, separator.c_str(), &col, true); + if (col.size() == 0 || col[0][0] == '\0') // empty line + continue; + if (col.size() > 5 || + (col.size() > 4 && accep) || + (col.size() == 3 && !accep)) { + FSTERROR() << "FstCompiler: Bad number of columns, source = " + << source_ + << ", line = " << nline_; + fst_.SetProperties(kError, kError); + return; + } + StateId s = StrToStateId(col[0]); + while (s >= fst_.NumStates()) + fst_.AddState(); + if (nline_ == 1) + fst_.SetStart(s); + + Arc arc; + StateId d = s; + switch (col.size()) { + case 1: + fst_.SetFinal(s, Weight::One()); + break; + case 2: + fst_.SetFinal(s, StrToWeight(col[1], true)); + break; + case 3: + arc.nextstate = d = StrToStateId(col[1]); + arc.ilabel = StrToILabel(col[2]); + arc.olabel = arc.ilabel; + arc.weight = Weight::One(); + fst_.AddArc(s, arc); + break; + case 4: + arc.nextstate = d = StrToStateId(col[1]); + arc.ilabel = StrToILabel(col[2]); + if (accep) { + arc.olabel = arc.ilabel; + arc.weight = StrToWeight(col[3], false); + } else { + arc.olabel = StrToOLabel(col[3]); + arc.weight = Weight::One(); + } + fst_.AddArc(s, arc); + break; + case 5: + arc.nextstate = d = StrToStateId(col[1]); + arc.ilabel = StrToILabel(col[2]); + arc.olabel = StrToOLabel(col[3]); + arc.weight = StrToWeight(col[4], false); + fst_.AddArc(s, arc); + } + while (d >= fst_.NumStates()) + fst_.AddState(); + } + if (ikeep) + fst_.SetInputSymbols(isyms); + if (okeep) + fst_.SetOutputSymbols(osyms); + } + + const VectorFst<A> &Fst() const { + return fst_; + } + + private: + // Maximum line length in text file. + static const int kLineLen = 8096; + + int64 StrToId(const char *s, const SymbolTable *syms, + const char *name, bool allow_negative = false) const { + int64 n = 0; + + if (syms) { + n = syms->Find(s); + if (n == -1 || (!allow_negative && n < 0)) { + FSTERROR() << "FstCompiler: Symbol \"" << s + << "\" is not mapped to any integer " << name + << ", symbol table = " << syms->Name() + << ", source = " << source_ << ", line = " << nline_; + fst_.SetProperties(kError, kError); + } + } else { + char *p; + n = strtoll(s, &p, 10); + if (p < s + strlen(s) || (!allow_negative && n < 0)) { + FSTERROR() << "FstCompiler: Bad " << name << " integer = \"" << s + << "\", source = " << source_ << ", line = " << nline_; + fst_.SetProperties(kError, kError); + } + } + return n; + } + + StateId StrToStateId(const char *s) { + StateId n = StrToId(s, ssyms_, "state ID"); + + if (keep_state_numbering_) + return n; + + // remap state IDs to make dense set + typename unordered_map<StateId, StateId>::const_iterator it = states_.find(n); + if (it == states_.end()) { + states_[n] = nstates_; + return nstates_++; + } else { + return it->second; + } + } + + StateId StrToILabel(const char *s) const { + return StrToId(s, isyms_, "arc ilabel", allow_negative_labels_); + } + + StateId StrToOLabel(const char *s) const { + return StrToId(s, osyms_, "arc olabel", allow_negative_labels_); + } + + Weight StrToWeight(const char *s, bool allow_zero) const { + Weight w; + istringstream strm(s); + strm >> w; + if (!strm || (!allow_zero && w == Weight::Zero())) { + FSTERROR() << "FstCompiler: Bad weight = \"" &l |