diff options
Diffstat (limited to 'kaldi_io/src/kaldi/hmm/hmm-utils.h')
-rw-r--r-- | kaldi_io/src/kaldi/hmm/hmm-utils.h | 295 |
1 files changed, 0 insertions, 295 deletions
diff --git a/kaldi_io/src/kaldi/hmm/hmm-utils.h b/kaldi_io/src/kaldi/hmm/hmm-utils.h deleted file mode 100644 index 240f706..0000000 --- a/kaldi_io/src/kaldi/hmm/hmm-utils.h +++ /dev/null @@ -1,295 +0,0 @@ -// hmm/hmm-utils.h - -// Copyright 2009-2011 Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_HMM_HMM_UTILS_H_ -#define KALDI_HMM_HMM_UTILS_H_ - -#include "hmm/hmm-topology.h" -#include "hmm/transition-model.h" -#include "lat/kaldi-lattice.h" - -namespace kaldi { - - -/// \defgroup hmm_group_graph Classes and functions for creating FSTs from HMMs -/// \ingroup hmm_group -/// @{ - -/// Configuration class for the GetHTransducer() function; see -/// \ref hmm_graph_config for context. -struct HTransducerConfig { - /// Transition log-prob scale, see \ref hmm_scale. - /// Note this doesn't apply to self-loops; GetHTransducer() does - /// not include self-loops. - BaseFloat transition_scale; - - /// if true, we are constructing time-reversed FST: phone-seqs in ilabel_info - /// are backwards, and we want to output a backwards version of the HMM - /// corresponding to each phone. If reverse == true, - bool reverse; - - /// This variable is only looked at if reverse == true. If reverse == true - /// and push_weights == true, then we push the weights in the reversed FSTs we create for each - /// phone HMM. This is only safe if the HMMs are probabilistic (i.e. not discriminatively - bool push_weights; - - /// delta used if we do push_weights [only relevant if reverse == true - /// and push_weights == true]. - BaseFloat push_delta; - - HTransducerConfig(): - transition_scale(1.0), - reverse(false), - push_weights(true), - push_delta(0.001) - { } - - // Note-- this Register registers the easy-to-register options - // but not the "sym_type" which is an enum and should be handled - // separately in main(). - void Register (OptionsItf *po) { - po->Register("transition-scale", &transition_scale, - "Scale of transition probs (relative to LM)"); - po->Register("reverse", &reverse, - "Set true to build time-reversed FST."); - po->Register("push-weights", &push_weights, - "Push weights (only applicable if reverse == true)"); - po->Register("push-delta", &push_delta, - "Delta used in pushing weights (only applicable if " - "reverse && push-weights"); - } -}; - - -struct HmmCacheHash { - int operator () (const std::pair<int32, std::vector<int32> >&p) const { - VectorHasher<int32> v; - int32 prime = 103049; - return prime*p.first + v(p.second); - } -}; - -/// HmmCacheType is a map from (central-phone, sequence of pdf-ids) to FST, used -/// as cache in GetHmmAsFst, as an optimization. -typedef unordered_map<std::pair<int32, std::vector<int32> >, - fst::VectorFst<fst::StdArc>*, - HmmCacheHash> HmmCacheType; - - -/// Called by GetHTransducer() and probably will not need to be called directly; -/// it creates the FST corresponding to the phone. Does not include self-loops; -/// you have to call AddSelfLoops() for that. Result owned by caller. -/// Returns an acceptor (i.e. ilabels, olabels identical) with transition-ids -/// as the symbols. -/// For documentation in context, see \ref hmm_graph_get_hmm_as_fst -/// @param context_window A vector representing the phonetic context; see -/// \ref tree_window "here" for explanation. -/// @param ctx_dep The object that contains the phonetic decision-tree -/// @param trans_model The transition-model object, which provides -/// the mappings to transition-ids and also the transition -/// probabilities. -/// @param config Configuration object, see \ref HTransducerConfig. -/// @param cache Object used as a lookaside buffer to save computation; -/// if it finds that the object it needs is already there, it will -/// just return a pointer value from "cache"-- not that this means -/// you have to be careful not to delete things twice. - -fst::VectorFst<fst::StdArc> *GetHmmAsFst( - std::vector<int32> context_window, - const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, - const HTransducerConfig &config, - HmmCacheType *cache = NULL); - -/// Included mainly as a form of documentation, not used in any other code -/// currently. Creates the FST with self-loops, and with fewer options. -fst::VectorFst<fst::StdArc>* -GetHmmAsFstSimple(std::vector<int32> context_window, - const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, - BaseFloat prob_scale); - - -/** - * Returns the H tranducer; result owned by caller. - * See \ref hmm_graph_get_h_transducer. The H transducer has on the - * input transition-ids, and also possibly some disambiguation symbols, which - * will be put in disambig_syms. The output side contains the identifiers that - * are indexes into "ilabel_info" (these represent phones-in-context or - * disambiguation symbols). The ilabel_info vector allows GetHTransducer to map - * from symbols to phones-in-context (i.e. phonetic context windows). Any - * singleton symbols in the ilabel_info vector which are not phones, will be - * treated as disambiguation symbols. [Not all recipes use these]. The output - * "disambig_syms_left" will be set to a list of the disambiguation symbols on - * the input of the transducer (i.e. same symbol type as whatever is on the - * input of the transducer - */ -fst::VectorFst<fst::StdArc>* -GetHTransducer (const std::vector<std::vector<int32> > &ilabel_info, - const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, - const HTransducerConfig &config, - std::vector<int32> *disambig_syms_left); - -/** - * GetIlabelMapping produces a mapping that's similar to HTK's logical-to-physical - * model mapping (i.e. the xwrd.clustered.mlist files). It groups together - * "logical HMMs" (i.e. in our world, phonetic context windows) that share the - * same sequence of transition-ids. This can be used in an - * optional graph-creation step that produces a remapped form of CLG that can be - * more productively determinized and minimized. This is used in the command-line program - * make-ilabel-transducer.cc. - * @param ilabel_info_old [in] The original \ref tree_ilabel "ilabel_info" vector - * @param ctx_dep [in] The tree - * @param trans_model [in] The transition-model object - * @param old2new_map [out] The output; this vector, which is of size equal to the - * number of new labels, is a mapping to the old labels such that we could - * create a vector ilabel_info_new such that - * ilabel_info_new[i] == ilabel_info_old[old2new_map[i]] - */ -void GetIlabelMapping (const std::vector<std::vector<int32> > &ilabel_info_old, - const ContextDependencyInterface &ctx_dep, - const TransitionModel &trans_model, - std::vector<int32> *old2new_map); - - - -/** - * For context, see \ref hmm_graph_add_self_loops. Expands an FST that has been - * built without self-loops, and adds the self-loops (it also needs to modify - * the probability of the non-self-loop ones, as the graph without self-loops - * was created in such a way that it was stochastic). Note that the - * disambig_syms will be empty in some recipes (e.g. if you already removed - * the disambiguation symbols). - * @param trans_model [in] Transition model - * @param disambig_syms [in] Sorted, uniq list of disambiguation symbols, required - * if the graph contains disambiguation symbols but only needed for sanity checks. - * @param self_loop_scale [in] Transition-probability scale for self-loops; c.f. - * \ref hmm_scale - * @param reorder [in] If true, reorders the transitions (see \ref hmm_reorder). - * @param fst [in, out] The FST to be modified. - */ -void AddSelfLoops(const TransitionModel &trans_model, - const std::vector<int32> &disambig_syms, // used as a check only. - BaseFloat self_loop_scale, - bool reorder, // true->dan-style, false->lukas-style. - fst::VectorFst<fst::StdArc> *fst); - -/** - * Adds transition-probs, with the supplied - * scales (see \ref hmm_scale), to the graph. - * Useful if you want to create a graph without transition probs, then possibly - * train the model (including the transition probs) but keep the graph fixed, - * and add back in the transition probs. It assumes the fst has transition-ids - * on it. It is not an error if the FST has no states (nothing will be done). - * @param trans_model [in] The transition model - * @param disambig_syms [in] A list of disambiguation symbols, required if the - * graph has disambiguation symbols on its input but only - * used for checks. - * @param transition_scale [in] A scale on transition-probabilities apart from - * those involving self-loops; see \ref hmm_scale. - * @param self_loop_scale [in] A scale on self-loop transition probabilities; - * see \ref hmm_scale. - * @param fst [in, out] The FST to be modified. - */ -void AddTransitionProbs(const TransitionModel &trans_model, - const std::vector<int32> &disambig_syms, - BaseFloat transition_scale, - BaseFloat self_loop_scale, - fst::VectorFst<fst::StdArc> *fst); - -/** - This is as AddSelfLoops(), but operates on a Lattice, where - it affects the graph part of the weight (the first element - of the pair). */ -void AddTransitionProbs(const TransitionModel &trans_model, - BaseFloat transition_scale, - BaseFloat self_loop_scale, - Lattice *lat); - - -/// Returns a transducer from pdfs plus one (input) to transition-ids (output). -/// Currenly of use only for testing. -fst::VectorFst<fst::StdArc>* -GetPdfToTransitionIdTransducer(const TransitionModel &trans_model); - -/// Converts all transition-ids in the FST to pdfs plus one. -/// Placeholder: not implemented yet! -void ConvertTransitionIdsToPdfs(const TransitionModel &trans_model, - const std::vector<int32> &disambig_syms, - fst::VectorFst<fst::StdArc> *fst); - -/// @} end "defgroup hmm_group_graph" - -/// \addtogroup hmm_group -/// @{ - -/// SplitToPhones splits up the TransitionIds in "alignment" into their -/// individual phones (one vector per instance of a phone). At output, -/// the sum of the sizes of the vectors in split_alignment will be the same -/// as the corresponding sum for "alignment". The function returns -/// true on success. If the alignment appears to be incomplete, e.g. -/// not ending at the end-state of a phone, it will still break it up into -/// phones but it will return false. For more serious errors it will -/// die or throw an exception. -/// This function works out by itself whether the graph was created -/// with "reordering" (dan-style graph), and just does the right thing. - -bool SplitToPhones(const TransitionModel &trans_model, - const std::vector<int32> &alignment, - std::vector<std::vector<int32> > *split_alignment); - -/// ConvertAlignment converts an alignment that was created using one -/// model, to another model. They must use a compatible topology (so we -/// know the state alignments of the new model). -/// It returns false if it could not be split to phones (probably -/// because the alignment was partial), but for other kinds of -/// error that are more likely a coding error, it will throw -/// an exception. -bool ConvertAlignment(const TransitionModel &old_trans_model, - const TransitionModel &new_trans_model, - const ContextDependencyInterface &new_ctx_dep, - const std::vector<int32> &old_alignment, - const std::vector<int32> *phone_map, // may be NULL - std::vector<int32> *new_alignment); - -// ConvertPhnxToProns is only needed in bin/phones-to-prons.cc and -// isn't closely related with HMMs, but we put it here as there isn't -// any other obvious place for it and it needs to be tested. -// This function takes a phone-sequence with word-start and word-end -// markers in it, and a word-sequence, and outputs the pronunciations -// "prons"... the format of "prons" is, each element is a vector, -// where the first element is the word (or zero meaning no word, e.g. -// for optional silence introduced by the lexicon), and the remaining -// elements are the phones in the word's pronunciation. -// It returns false if it encounters a problem of some kind, e.g. -// if the phone-sequence doesn't seem to have the right number of -// words in it. -bool ConvertPhnxToProns(const std::vector<int32> &phnx, - const std::vector<int32> &words, - int32 word_start_sym, - int32 word_end_sym, - std::vector<std::vector<int32> > *prons); - -/// @} end "addtogroup hmm_group" - -} // end namespace kaldi - - -#endif |