add implementation for kaldi io (by ymz)

author: Determinant <ted.sybil@gmail.com> 2015-08-14 11:51:42 +0800
committer: Determinant <ted.sybil@gmail.com> 2015-08-14 11:51:42 +0800
commit: 96a32415ab43377cf1575bd3f4f2980f58028209 (patch)
tree: 30a2d92d73e8f40ac87b79f6f56e227bfc4eea6e /kaldi_io/src/kaldi/hmm/hmm-utils.h
parent: c177a7549bd90670af4b29fa813ddea32cfe0f78 (diff)
1 files changed, 295 insertions, 0 deletions
diff --git a/kaldi_io/src/kaldi/hmm/hmm-utils.h b/kaldi_io/src/kaldi/hmm/hmm-utils.h
new file mode 100644
index 0000000..240f706
--- /dev/null
+++ b/kaldi_io/src/kaldi/hmm/hmm-utils.h
@@ -0,0 +1,295 @@
+// hmm/hmm-utils.h
+
+// Copyright 2009-2011  Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_HMM_HMM_UTILS_H_
+#define KALDI_HMM_HMM_UTILS_H_
+
+#include "hmm/hmm-topology.h"
+#include "hmm/transition-model.h"
+#include "lat/kaldi-lattice.h"
+
+namespace kaldi {
+
+
+/// \defgroup hmm_group_graph Classes and functions for creating FSTs from HMMs
+/// \ingroup hmm_group
+/// @{
+
+/// Configuration class for the GetHTransducer() function; see
+/// \ref hmm_graph_config for context.
+struct HTransducerConfig {
+  /// Transition log-prob scale, see \ref hmm_scale.
+  /// Note this doesn't apply to self-loops; GetHTransducer() does
+  /// not include self-loops.
+  BaseFloat transition_scale;
+
+  /// if true, we are constructing time-reversed FST: phone-seqs in ilabel_info
+  /// are backwards, and we want to output a backwards version of the HMM
+  /// corresponding to each phone.  If reverse == true,
+  bool reverse;
+
+  /// This variable is only looked at if reverse == true.  If reverse == true
+  /// and push_weights == true, then we push the weights in the reversed FSTs we create for each
+  /// phone HMM.  This is only safe if the HMMs are probabilistic (i.e. not discriminatively
+  bool push_weights;
+
+  /// delta used if we do push_weights [only relevant if reverse == true
+  /// and push_weights == true].
+  BaseFloat push_delta;
+
+  HTransducerConfig():
+      transition_scale(1.0),
+      reverse(false),
+      push_weights(true),
+      push_delta(0.001)
+  { }
+
+  // Note-- this Register registers the easy-to-register options
+  // but not the "sym_type" which is an enum and should be handled
+  // separately in main().
+  void Register (OptionsItf *po) {
+    po->Register("transition-scale", &transition_scale,
+                 "Scale of transition probs (relative to LM)");
+    po->Register("reverse", &reverse,
+                 "Set true to build time-reversed FST.");
+    po->Register("push-weights", &push_weights,
+                 "Push weights (only applicable if reverse == true)");
+    po->Register("push-delta", &push_delta,
+                 "Delta used in pushing weights (only applicable if "
+                 "reverse && push-weights");
+  }
+};
+
+
+struct HmmCacheHash {
+  int operator () (const std::pair<int32, std::vector<int32> >&p) const {
+    VectorHasher<int32> v;
+    int32 prime = 103049;
+    return prime*p.first + v(p.second);
+  }
+};
+
+/// HmmCacheType is a map from (central-phone, sequence of pdf-ids) to FST, used
+/// as cache in GetHmmAsFst, as an optimization.
+typedef unordered_map<std::pair<int32, std::vector<int32> >,
+                      fst::VectorFst<fst::StdArc>*,
+                      HmmCacheHash> HmmCacheType;
+
+
+/// Called by GetHTransducer() and probably will not need to be called directly;
+/// it creates the FST corresponding to the phone.  Does not include self-loops;
+/// you have to call AddSelfLoops() for that.  Result owned by caller.
+/// Returns an acceptor (i.e. ilabels, olabels identical) with transition-ids
+/// as the symbols.
+/// For documentation in context, see \ref hmm_graph_get_hmm_as_fst
+///   @param context_window  A vector representing the phonetic context; see
+///            \ref tree_window "here" for explanation.
+///   @param ctx_dep The object that contains the phonetic decision-tree
+///   @param trans_model The transition-model object, which provides
+///         the mappings to transition-ids and also the transition
+///         probabilities.
+///   @param config Configuration object, see \ref HTransducerConfig.
+///   @param cache Object used as a lookaside buffer to save computation;
+///       if it finds that the object it needs is already there, it will
+///       just return a pointer value from "cache"-- not that this means
+///       you have to be careful not to delete things twice.
+
+fst::VectorFst<fst::StdArc> *GetHmmAsFst(
+    std::vector<int32> context_window,
+    const ContextDependencyInterface &ctx_dep,
+    const TransitionModel &trans_model,
+    const HTransducerConfig &config,
+    HmmCacheType *cache = NULL);
+
+/// Included mainly as a form of documentation, not used in any other code
+/// currently.  Creates the FST with self-loops, and with fewer options.
+fst::VectorFst<fst::StdArc>*
+GetHmmAsFstSimple(std::vector<int32> context_window,
+                  const ContextDependencyInterface &ctx_dep,
+                  const TransitionModel &trans_model,
+                  BaseFloat prob_scale);
+
+
+/**
+  * Returns the H tranducer; result owned by caller.
+  * See \ref hmm_graph_get_h_transducer.  The H transducer has on the
+  * input transition-ids, and also possibly some disambiguation symbols, which
+  * will be put in disambig_syms.  The output side contains the identifiers that
+  * are indexes into "ilabel_info" (these represent phones-in-context or
+  * disambiguation symbols).  The ilabel_info vector allows GetHTransducer to map
+  * from symbols to phones-in-context (i.e. phonetic context windows).  Any
+  * singleton symbols in the ilabel_info vector which are not phones, will be
+  * treated as disambiguation symbols.  [Not all recipes use these].  The output
+  * "disambig_syms_left" will be set to a list of the disambiguation symbols on
+  * the input of the transducer (i.e. same symbol type as whatever is on the
+  * input of the transducer
+  */
+fst::VectorFst<fst::StdArc>*
+GetHTransducer (const std::vector<std::vector<int32> > &ilabel_info,
+                const ContextDependencyInterface &ctx_dep,
+                const TransitionModel &trans_model,
+                const HTransducerConfig &config,
+                std::vector<int32> *disambig_syms_left);
+
+/**
+  * GetIlabelMapping produces a mapping that's similar to HTK's logical-to-physical
+  * model mapping (i.e. the xwrd.clustered.mlist files).   It groups together
+  * "logical HMMs" (i.e. in our world, phonetic context windows) that share the
+  * same sequence of transition-ids.   This can be used in an
+  * optional graph-creation step that produces a remapped form of CLG that can be
+  * more productively determinized and minimized.  This is used in the command-line program
+  * make-ilabel-transducer.cc.
+  * @param ilabel_info_old [in] The original \ref tree_ilabel "ilabel_info" vector
+  * @param ctx_dep [in] The tree
+  * @param trans_model [in] The transition-model object
+  * @param old2new_map [out] The output; this vector, which is of size equal to the
+  *       number of new labels, is a mapping to the old labels such that we could
+  *       create a vector ilabel_info_new such that
+  *       ilabel_info_new[i] == ilabel_info_old[old2new_map[i]]
+  */
+void GetIlabelMapping (const std::vector<std::vector<int32> > &ilabel_info_old,
+                       const ContextDependencyInterface &ctx_dep,
+                       const TransitionModel &trans_model,
+                       std::vector<int32> *old2new_map);
+
+
+
+/**
+  * For context, see \ref hmm_graph_add_self_loops.  Expands an FST that has been
+  * built without self-loops, and adds the self-loops (it also needs to modify
+  * the probability of the non-self-loop ones, as the graph without self-loops
+  * was created in such a way that it was stochastic).  Note that the
+  * disambig_syms will be empty in some recipes (e.g.  if you already removed
+  * the disambiguation symbols).
+  * @param trans_model [in] Transition model
+  * @param disambig_syms [in] Sorted, uniq list of disambiguation symbols, required
+  *       if the graph contains disambiguation symbols but only needed for sanity checks.
+  * @param self_loop_scale [in] Transition-probability scale for self-loops; c.f.
+  *                    \ref hmm_scale
+  * @param reorder [in] If true, reorders the transitions (see \ref hmm_reorder).
+  * @param  fst [in, out] The FST to be modified.
+  */
+void AddSelfLoops(const TransitionModel &trans_model,
+                  const std::vector<int32> &disambig_syms,  // used as a check only.
+                  BaseFloat self_loop_scale,
+                  bool reorder,  // true->dan-style, false->lukas-style.
+                  fst::VectorFst<fst::StdArc> *fst);
+
+/**
+  * Adds transition-probs, with the supplied
+  * scales (see \ref hmm_scale), to the graph.
+  * Useful if you want to create a graph without transition probs, then possibly
+  * train the model (including the transition probs) but keep the graph fixed,
+  * and add back in the transition probs.  It assumes the fst has transition-ids
+  * on it.  It is not an error if the FST has no states (nothing will be done).
+  * @param trans_model [in] The transition model
+  * @param disambig_syms [in] A list of disambiguation symbols, required if the
+  *                       graph has disambiguation symbols on its input but only
+  *                       used for checks.
+  * @param transition_scale [in] A scale on transition-probabilities apart from
+  *                      those involving self-loops; see \ref hmm_scale.
+  * @param self_loop_scale [in] A scale on self-loop transition probabilities;
+  *                      see \ref hmm_scale.
+  * @param  fst [in, out] The FST to be modified.
+  */
+void AddTransitionProbs(const TransitionModel &trans_model,
+                        const std::vector<int32> &disambig_syms,
+                        BaseFloat transition_scale,
+                        BaseFloat self_loop_scale,
+                        fst::VectorFst<fst::StdArc> *fst);
+
+/**
+   This is as AddSelfLoops(), but operates on a Lattice, where
+   it affects the graph part of the weight (the first element
+   of the pair). */
+void AddTransitionProbs(const TransitionModel &trans_model,
+                        BaseFloat transition_scale,
+                        BaseFloat self_loop_scale,
+                        Lattice *lat);
+
+
+/// Returns a transducer from pdfs plus one (input) to  transition-ids (output).
+/// Currenly of use only for testing.
+fst::VectorFst<fst::StdArc>*
+GetPdfToTransitionIdTransducer(const TransitionModel &trans_model);
+
+/// Converts all transition-ids in the FST to pdfs plus one.
+/// Placeholder: not implemented yet!
+void ConvertTransitionIdsToPdfs(const TransitionModel &trans_model,
+                                const std::vector<int32> &disambig_syms,
+                                fst::VectorFst<fst::StdArc> *fst);
+
+/// @} end "defgroup hmm_group_graph"
+
+/// \addtogroup hmm_group
+/// @{
+
+/// SplitToPhones splits up the TransitionIds in "alignment" into their
+/// individual phones (one vector per instance of a phone).  At output,
+/// the sum of the sizes of the vectors in split_alignment will be the same
+/// as the corresponding sum for "alignment".  The function returns
+/// true on success.  If the alignment appears to be incomplete, e.g.
+/// not ending at the end-state of a phone, it will still break it up into
+/// phones but it will return false.  For more serious errors it will
+/// die or throw an exception.
+/// This function works out by itself whether the graph was created
+/// with "reordering" (dan-style graph), and just does the right thing.
+
+bool SplitToPhones(const TransitionModel &trans_model,
+                   const std::vector<int32> &alignment,
+                   std::vector<std::vector<int32> > *split_alignment);
+
+/// ConvertAlignment converts an alignment that was created using one
+/// model, to another model.  They must use a compatible topology (so we
+/// know the state alignments of the new model).
+/// It returns false if it could not be split to phones (probably
+/// because the alignment was partial), but for other kinds of
+/// error that are more likely a coding error, it will throw
+/// an exception.
+bool ConvertAlignment(const TransitionModel &old_trans_model,
+                      const TransitionModel &new_trans_model,
+                      const ContextDependencyInterface &new_ctx_dep,
+                      const std::vector<int32> &old_alignment,
+                      const std::vector<int32> *phone_map,  // may be NULL
+                      std::vector<int32> *new_alignment);
+
+// ConvertPhnxToProns is only needed in bin/phones-to-prons.cc and
+// isn't closely related with HMMs, but we put it here as there isn't
+// any other obvious place for it and it needs to be tested.
+// This function takes a phone-sequence with word-start and word-end
+// markers in it, and a word-sequence, and outputs the pronunciations
+// "prons"... the format of "prons" is, each element is a vector,
+// where the first element is the word (or zero meaning no word, e.g.
+// for optional silence introduced by the lexicon), and the remaining
+// elements are the phones in the word's pronunciation.
+// It returns false if it encounters a problem of some kind, e.g.
+// if the phone-sequence doesn't seem to have the right number of
+// words in it.
+bool ConvertPhnxToProns(const std::vector<int32> &phnx,
+                        const std::vector<int32> &words,
+                        int32 word_start_sym,
+                        int32 word_end_sym,
+                        std::vector<std::vector<int32> > *prons);
+
+/// @} end "addtogroup hmm_group"
+
+} // end namespace kaldi
+
+
+#endif
author	Determinant <ted.sybil@gmail.com>	2015-08-14 11:51:42 +0800
committer	Determinant <ted.sybil@gmail.com>	2015-08-14 11:51:42 +0800
commit	96a32415ab43377cf1575bd3f4f2980f58028209 (patch)
tree	30a2d92d73e8f40ac87b79f6f56e227bfc4eea6e /kaldi_io/src/kaldi/hmm/hmm-utils.h
parent	c177a7549bd90670af4b29fa813ddea32cfe0f78 (diff)