summaryrefslogtreecommitdiff
path: root/kaldi_io/src/kaldi/hmm
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/kaldi/hmm')
-rw-r--r--kaldi_io/src/kaldi/hmm/hmm-topology.h172
-rw-r--r--kaldi_io/src/kaldi/hmm/hmm-utils.h295
-rw-r--r--kaldi_io/src/kaldi/hmm/posterior.h214
-rw-r--r--kaldi_io/src/kaldi/hmm/transition-model.h345
-rw-r--r--kaldi_io/src/kaldi/hmm/tree-accu.h69
5 files changed, 0 insertions, 1095 deletions
diff --git a/kaldi_io/src/kaldi/hmm/hmm-topology.h b/kaldi_io/src/kaldi/hmm/hmm-topology.h
deleted file mode 100644
index 53ca427..0000000
--- a/kaldi_io/src/kaldi/hmm/hmm-topology.h
+++ /dev/null
@@ -1,172 +0,0 @@
-// hmm/hmm-topology.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_HMM_HMM_TOPOLOGY_H_
-#define KALDI_HMM_HMM_TOPOLOGY_H_
-
-#include "base/kaldi-common.h"
-#include "tree/context-dep.h"
-#include "util/const-integer-set.h"
-
-
-namespace kaldi {
-
-
-/// \addtogroup hmm_group
-/// @{
-
-/*
- // The following would be the text form for the "normal" HMM topology.
- // Note that the first state is the start state, and the final state,
- // which must have no output transitions and must be nonemitting, has
- // an exit probability of one (no other state can have nonzero exit
- // probability; you can treat the transition probability to the final
- // state as an exit probability).
- // Note also that it's valid to omit the "<PdfClass>" entry of the <State>, which
- // will mean we won't have a pdf on that state [non-emitting state]. This is equivalent
- // to setting the <PdfClass> to -1. We do this normally just for the final state.
- // The Topology object can have multiple <TopologyEntry> blocks.
- // This is useful if there are multiple types of topology in the system.
-
- <Topology>
- <TopologyEntry>
- <ForPhones> 1 2 3 4 5 6 7 8 </ForPhones>
- <State> 0 <PdfClass> 0
- <Transition> 0 0.5
- <Transition> 1 0.5
- </State>
- <State> 1 <PdfClass> 1
- <Transition> 1 0.5
- <Transition> 2 0.5
- </State>
- <State> 2 <PdfClass> 2
- <Transition> 2 0.5
- <Transition> 3 0.5
- <Final> 0.5
- </State>
- <State> 3
- </State>
- </TopologyEntry>
- </Topology>
-*/
-
-// kNoPdf is used where pdf_class or pdf would be used, to indicate,
-// none is there. Mainly useful in skippable models, but also used
-// for end states.
-// A caveat with nonemitting states is that their out-transitions
-// are not trainable, due to technical issues with the way
-// we decided to accumulate the stats. Any transitions arising from (*)
-// HMM states with "kNoPdf" as the label are second-class transitions,
-// They do not have "transition-states" or "transition-ids" associated
-// with them. They are used to create the FST version of the
-// HMMs, where they lead to epsilon arcs.
-// (*) "arising from" is a bit of a technical term here, due to the way
-// (if reorder == true), we put the transition-id associated with the
-// outward arcs of the state, on the input transition to the state.
-
-/// A constant used in the HmmTopology class as the \ref pdf_class "pdf-class"
-/// kNoPdf, which is used when a HMM-state is nonemitting (has no associated
-/// PDF).
-
-static const int32 kNoPdf = -1;
-
-/// A class for storing topology information for phones. See \ref hmm for context.
-/// This object is sometimes accessed in a file by itself, but more often
-/// as a class member of the Transition class (this is for convenience to reduce
-/// the number of files programs have to access).
-
-class HmmTopology {
- public:
- /// A structure defined inside HmmTopology to represent a HMM state.
- struct HmmState {
- /// The \ref pdf_class pdf-class, typically 0, 1 or 2 (the same as the HMM-state index),
- /// but may be different to enable us to hardwire sharing of state, and may be
- /// equal to \ref kNoPdf == -1 in order to specify nonemitting states (unusual).
- int32 pdf_class;
-
- /// A list of transitions. The first member of each pair is the index of
- /// the next HmmState, and the second is the default transition probability
- /// (before training).
- std::vector<std::pair<int32, BaseFloat> > transitions;
-
- explicit HmmState(int32 p): pdf_class(p) { }
-
- bool operator == (const HmmState &other) const {
- return (pdf_class == other.pdf_class && transitions == other.transitions);
- }
-
- HmmState(): pdf_class(-1) { }
- };
-
- /// TopologyEntry is a typedef that represents the topology of
- /// a single (prototype) state.
- typedef std::vector<HmmState> TopologyEntry;
-
- void Read(std::istream &is, bool binary);
- void Write(std::ostream &os, bool binary) const;
-
- // Checks that the object is valid, and throw exception otherwise.
- void Check();
-
-
- /// Returns the topology entry (i.e. vector of HmmState) for this phone;
- /// will throw exception if phone not covered by the topology.
- const TopologyEntry &TopologyForPhone(int32 phone) const;
-
- /// Returns the number of \ref pdf_class "pdf-classes" for this phone;
- /// throws exception if phone not covered by this topology.
- int32 NumPdfClasses(int32 phone) const;
-
- /// Returns a reference to a sorted, unique list of phones covered by
- /// the topology (these phones will be positive integers, and usually
- /// contiguous and starting from one but the toolkit doesn't assume
- /// they are contiguous).
- const std::vector<int32> &GetPhones() const { return phones_; };
-
- /// Outputs a vector of int32, indexed by phone, that gives the
- /// number of \ref pdf_class pdf-classes for the phones; this is
- /// used by tree-building code such as BuildTree().
- void GetPhoneToNumPdfClasses(std::vector<int32> *phone2num_pdf_classes) const;
-
- HmmTopology() {}
-
- bool operator == (const HmmTopology &other) const {
- return phones_ == other.phones_ && phone2idx_ == other.phone2idx_
- && entries_ == other.entries_;
- }
- // Allow default assignment operator and copy constructor.
- private:
- std::vector<int32> phones_; // list of all phones we have topology for. Sorted, uniq. no epsilon (zero) phone.
- std::vector<int32> phone2idx_; // map from phones to indexes into the entries vector (or -1 for not present).
- std::vector<TopologyEntry> entries_;
-};
-
-
-/// This function returns a HmmTopology object giving a normal 3-state topology,
-/// covering all phones in the list "phones". This is mainly of use in testing
-/// code.
-HmmTopology GetDefaultTopology(const std::vector<int32> &phones);
-
-/// @} end "addtogroup hmm_group"
-
-
-} // end namespace kaldi
-
-
-#endif
diff --git a/kaldi_io/src/kaldi/hmm/hmm-utils.h b/kaldi_io/src/kaldi/hmm/hmm-utils.h
deleted file mode 100644
index 240f706..0000000
--- a/kaldi_io/src/kaldi/hmm/hmm-utils.h
+++ /dev/null
@@ -1,295 +0,0 @@
-// hmm/hmm-utils.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_HMM_HMM_UTILS_H_
-#define KALDI_HMM_HMM_UTILS_H_
-
-#include "hmm/hmm-topology.h"
-#include "hmm/transition-model.h"
-#include "lat/kaldi-lattice.h"
-
-namespace kaldi {
-
-
-/// \defgroup hmm_group_graph Classes and functions for creating FSTs from HMMs
-/// \ingroup hmm_group
-/// @{
-
-/// Configuration class for the GetHTransducer() function; see
-/// \ref hmm_graph_config for context.
-struct HTransducerConfig {
- /// Transition log-prob scale, see \ref hmm_scale.
- /// Note this doesn't apply to self-loops; GetHTransducer() does
- /// not include self-loops.
- BaseFloat transition_scale;
-
- /// if true, we are constructing time-reversed FST: phone-seqs in ilabel_info
- /// are backwards, and we want to output a backwards version of the HMM
- /// corresponding to each phone. If reverse == true,
- bool reverse;
-
- /// This variable is only looked at if reverse == true. If reverse == true
- /// and push_weights == true, then we push the weights in the reversed FSTs we create for each
- /// phone HMM. This is only safe if the HMMs are probabilistic (i.e. not discriminatively
- bool push_weights;
-
- /// delta used if we do push_weights [only relevant if reverse == true
- /// and push_weights == true].
- BaseFloat push_delta;
-
- HTransducerConfig():
- transition_scale(1.0),
- reverse(false),
- push_weights(true),
- push_delta(0.001)
- { }
-
- // Note-- this Register registers the easy-to-register options
- // but not the "sym_type" which is an enum and should be handled
- // separately in main().
- void Register (OptionsItf *po) {
- po->Register("transition-scale", &transition_scale,
- "Scale of transition probs (relative to LM)");
- po->Register("reverse", &reverse,
- "Set true to build time-reversed FST.");
- po->Register("push-weights", &push_weights,
- "Push weights (only applicable if reverse == true)");
- po->Register("push-delta", &push_delta,
- "Delta used in pushing weights (only applicable if "
- "reverse && push-weights");
- }
-};
-
-
-struct HmmCacheHash {
- int operator () (const std::pair<int32, std::vector<int32> >&p) const {
- VectorHasher<int32> v;
- int32 prime = 103049;
- return prime*p.first + v(p.second);
- }
-};
-
-/// HmmCacheType is a map from (central-phone, sequence of pdf-ids) to FST, used
-/// as cache in GetHmmAsFst, as an optimization.
-typedef unordered_map<std::pair<int32, std::vector<int32> >,
- fst::VectorFst<fst::StdArc>*,
- HmmCacheHash> HmmCacheType;
-
-
-/// Called by GetHTransducer() and probably will not need to be called directly;
-/// it creates the FST corresponding to the phone. Does not include self-loops;
-/// you have to call AddSelfLoops() for that. Result owned by caller.
-/// Returns an acceptor (i.e. ilabels, olabels identical) with transition-ids
-/// as the symbols.
-/// For documentation in context, see \ref hmm_graph_get_hmm_as_fst
-/// @param context_window A vector representing the phonetic context; see
-/// \ref tree_window "here" for explanation.
-/// @param ctx_dep The object that contains the phonetic decision-tree
-/// @param trans_model The transition-model object, which provides
-/// the mappings to transition-ids and also the transition
-/// probabilities.
-/// @param config Configuration object, see \ref HTransducerConfig.
-/// @param cache Object used as a lookaside buffer to save computation;
-/// if it finds that the object it needs is already there, it will
-/// just return a pointer value from "cache"-- not that this means
-/// you have to be careful not to delete things twice.
-
-fst::VectorFst<fst::StdArc> *GetHmmAsFst(
- std::vector<int32> context_window,
- const ContextDependencyInterface &ctx_dep,
- const TransitionModel &trans_model,
- const HTransducerConfig &config,
- HmmCacheType *cache = NULL);
-
-/// Included mainly as a form of documentation, not used in any other code
-/// currently. Creates the FST with self-loops, and with fewer options.
-fst::VectorFst<fst::StdArc>*
-GetHmmAsFstSimple(std::vector<int32> context_window,
- const ContextDependencyInterface &ctx_dep,
- const TransitionModel &trans_model,
- BaseFloat prob_scale);
-
-
-/**
- * Returns the H tranducer; result owned by caller.
- * See \ref hmm_graph_get_h_transducer. The H transducer has on the
- * input transition-ids, and also possibly some disambiguation symbols, which
- * will be put in disambig_syms. The output side contains the identifiers that
- * are indexes into "ilabel_info" (these represent phones-in-context or
- * disambiguation symbols). The ilabel_info vector allows GetHTransducer to map
- * from symbols to phones-in-context (i.e. phonetic context windows). Any
- * singleton symbols in the ilabel_info vector which are not phones, will be
- * treated as disambiguation symbols. [Not all recipes use these]. The output
- * "disambig_syms_left" will be set to a list of the disambiguation symbols on
- * the input of the transducer (i.e. same symbol type as whatever is on the
- * input of the transducer
- */
-fst::VectorFst<fst::StdArc>*
-GetHTransducer (const std::vector<std::vector<int32> > &ilabel_info,
- const ContextDependencyInterface &ctx_dep,
- const TransitionModel &trans_model,
- const HTransducerConfig &config,
- std::vector<int32> *disambig_syms_left);
-
-/**
- * GetIlabelMapping produces a mapping that's similar to HTK's logical-to-physical
- * model mapping (i.e. the xwrd.clustered.mlist files). It groups together
- * "logical HMMs" (i.e. in our world, phonetic context windows) that share the
- * same sequence of transition-ids. This can be used in an
- * optional graph-creation step that produces a remapped form of CLG that can be
- * more productively determinized and minimized. This is used in the command-line program
- * make-ilabel-transducer.cc.
- * @param ilabel_info_old [in] The original \ref tree_ilabel "ilabel_info" vector
- * @param ctx_dep [in] The tree
- * @param trans_model [in] The transition-model object
- * @param old2new_map [out] The output; this vector, which is of size equal to the
- * number of new labels, is a mapping to the old labels such that we could
- * create a vector ilabel_info_new such that
- * ilabel_info_new[i] == ilabel_info_old[old2new_map[i]]
- */
-void GetIlabelMapping (const std::vector<std::vector<int32> > &ilabel_info_old,
- const ContextDependencyInterface &ctx_dep,
- const TransitionModel &trans_model,
- std::vector<int32> *old2new_map);
-
-
-
-/**
- * For context, see \ref hmm_graph_add_self_loops. Expands an FST that has been
- * built without self-loops, and adds the self-loops (it also needs to modify
- * the probability of the non-self-loop ones, as the graph without self-loops
- * was created in such a way that it was stochastic). Note that the
- * disambig_syms will be empty in some recipes (e.g. if you already removed
- * the disambiguation symbols).
- * @param trans_model [in] Transition model
- * @param disambig_syms [in] Sorted, uniq list of disambiguation symbols, required
- * if the graph contains disambiguation symbols but only needed for sanity checks.
- * @param self_loop_scale [in] Transition-probability scale for self-loops; c.f.
- * \ref hmm_scale
- * @param reorder [in] If true, reorders the transitions (see \ref hmm_reorder).
- * @param fst [in, out] The FST to be modified.
- */
-void AddSelfLoops(const TransitionModel &trans_model,
- const std::vector<int32> &disambig_syms, // used as a check only.
- BaseFloat self_loop_scale,
- bool reorder, // true->dan-style, false->lukas-style.
- fst::VectorFst<fst::StdArc> *fst);
-
-/**
- * Adds transition-probs, with the supplied
- * scales (see \ref hmm_scale), to the graph.
- * Useful if you want to create a graph without transition probs, then possibly
- * train the model (including the transition probs) but keep the graph fixed,
- * and add back in the transition probs. It assumes the fst has transition-ids
- * on it. It is not an error if the FST has no states (nothing will be done).
- * @param trans_model [in] The transition model
- * @param disambig_syms [in] A list of disambiguation symbols, required if the
- * graph has disambiguation symbols on its input but only
- * used for checks.
- * @param transition_scale [in] A scale on transition-probabilities apart from
- * those involving self-loops; see \ref hmm_scale.
- * @param self_loop_scale [in] A scale on self-loop transition probabilities;
- * see \ref hmm_scale.
- * @param fst [in, out] The FST to be modified.
- */
-void AddTransitionProbs(const TransitionModel &trans_model,
- const std::vector<int32> &disambig_syms,
- BaseFloat transition_scale,
- BaseFloat self_loop_scale,
- fst::VectorFst<fst::StdArc> *fst);
-
-/**
- This is as AddSelfLoops(), but operates on a Lattice, where
- it affects the graph part of the weight (the first element
- of the pair). */
-void AddTransitionProbs(const TransitionModel &trans_model,
- BaseFloat transition_scale,
- BaseFloat self_loop_scale,
- Lattice *lat);
-
-
-/// Returns a transducer from pdfs plus one (input) to transition-ids (output).
-/// Currenly of use only for testing.
-fst::VectorFst<fst::StdArc>*
-GetPdfToTransitionIdTransducer(const TransitionModel &trans_model);
-
-/// Converts all transition-ids in the FST to pdfs plus one.
-/// Placeholder: not implemented yet!
-void ConvertTransitionIdsToPdfs(const TransitionModel &trans_model,
- const std::vector<int32> &disambig_syms,
- fst::VectorFst<fst::StdArc> *fst);
-
-/// @} end "defgroup hmm_group_graph"
-
-/// \addtogroup hmm_group
-/// @{
-
-/// SplitToPhones splits up the TransitionIds in "alignment" into their
-/// individual phones (one vector per instance of a phone). At output,
-/// the sum of the sizes of the vectors in split_alignment will be the same
-/// as the corresponding sum for "alignment". The function returns
-/// true on success. If the alignment appears to be incomplete, e.g.
-/// not ending at the end-state of a phone, it will still break it up into
-/// phones but it will return false. For more serious errors it will
-/// die or throw an exception.
-/// This function works out by itself whether the graph was created
-/// with "reordering" (dan-style graph), and just does the right thing.
-
-bool SplitToPhones(const TransitionModel &trans_model,
- const std::vector<int32> &alignment,
- std::vector<std::vector<int32> > *split_alignment);
-
-/// ConvertAlignment converts an alignment that was created using one
-/// model, to another model. They must use a compatible topology (so we
-/// know the state alignments of the new model).
-/// It returns false if it could not be split to phones (probably
-/// because the alignment was partial), but for other kinds of
-/// error that are more likely a coding error, it will throw
-/// an exception.
-bool ConvertAlignment(const TransitionModel &old_trans_model,
- const TransitionModel &new_trans_model,
- const ContextDependencyInterface &new_ctx_dep,
- const std::vector<int32> &old_alignment,
- const std::vector<int32> *phone_map, // may be NULL
- std::vector<int32> *new_alignment);
-
-// ConvertPhnxToProns is only needed in bin/phones-to-prons.cc and
-// isn't closely related with HMMs, but we put it here as there isn't
-// any other obvious place for it and it needs to be tested.
-// This function takes a phone-sequence with word-start and word-end
-// markers in it, and a word-sequence, and outputs the pronunciations
-// "prons"... the format of "prons" is, each element is a vector,
-// where the first element is the word (or zero meaning no word, e.g.
-// for optional silence introduced by the lexicon), and the remaining
-// elements are the phones in the word's pronunciation.
-// It returns false if it encounters a problem of some kind, e.g.
-// if the phone-sequence doesn't seem to have the right number of
-// words in it.
-bool ConvertPhnxToProns(const std::vector<int32> &phnx,
- const std::vector<int32> &words,
- int32 word_start_sym,
- int32 word_end_sym,
- std::vector<std::vector<int32> > *prons);
-
-/// @} end "addtogroup hmm_group"
-
-} // end namespace kaldi
-
-
-#endif
diff --git a/kaldi_io/src/kaldi/hmm/posterior.h b/kaldi_io/src/kaldi/hmm/posterior.h
deleted file mode 100644
index be73be9..0000000
--- a/kaldi_io/src/kaldi/hmm/posterior.h
+++ /dev/null
@@ -1,214 +0,0 @@
-// hmm/posterior.h
-
-// Copyright 2009-2011 Microsoft Corporation
-// 2013-2014 Johns Hopkins University (author: Daniel Povey)
-// 2014 Guoguo Chen
-
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_HMM_POSTERIOR_H_
-#define KALDI_HMM_POSTERIOR_H_
-
-#include "base/kaldi-common.h"
-#include "tree/context-dep.h"
-#include "util/const-integer-set.h"
-#include "util/kaldi-table.h"
-#include "hmm/transition-model.h"
-
-
-namespace kaldi {
-
-
-/// \addtogroup posterior_group
-/// @{
-
-/// Posterior is a typedef for storing acoustic-state (actually, transition-id)
-/// posteriors over an utterance. The "int32" is a transition-id, and the BaseFloat
-/// is a probability (typically between zero and one).
-typedef std::vector<std::vector<std::pair<int32, BaseFloat> > > Posterior;
-
-/// GaussPost is a typedef for storing Gaussian-level posteriors for an utterance.
-/// the "int32" is a transition-id, and the Vector<BaseFloat> is a vector of
-/// Gaussian posteriors.
-/// WARNING: We changed "int32" from transition-id to pdf-id, and the change is
-/// applied for all programs using GaussPost. This is for efficiency purpose. We
-/// also changed the name slightly from GauPost to GaussPost to reduce the
-/// chance that the change will go un-noticed in downstream code.
-typedef std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > > GaussPost;
-
-
-// PosteriorHolder is a holder for Posterior, which is
-// std::vector<std::vector<std::pair<int32, BaseFloat> > >
-// This is used for storing posteriors of transition id's for an
-// utterance.
-class PosteriorHolder {
- public:
- typedef Posterior T;
-
- PosteriorHolder() { }
-
- static bool Write(std::ostream &os, bool binary, const T &t);
-
- void Clear() { Posterior tmp; std::swap(tmp, t_); }
-
- // Reads into the holder.
- bool Read(std::istream &is);
-
- // Kaldi objects always have the stream open in binary mode for
- // reading.
- static bool IsReadInBinary() { return true; }
-
- const T &Value() const { return t_; }
-
- private:
- KALDI_DISALLOW_COPY_AND_ASSIGN(PosteriorHolder);
- T t_;
-};
-
-
-// GaussPostHolder is a holder for GaussPost, which is
-// std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > >
-// This is used for storing posteriors of transition id's for an
-// utterance.
-class GaussPostHolder {
- public:
- typedef GaussPost T;
-
- GaussPostHolder() { }
-
- static bool Write(std::ostream &os, bool binary, const T &t);
-
- void Clear() { GaussPost tmp; std::swap(tmp, t_); }
-
- // Reads into the holder.
- bool Read(std::istream &is);
-
- // Kaldi objects always have the stream open in binary mode for
- // reading.
- static bool IsReadInBinary() { return true; }
-
- const T &Value() const { return t_; }
-
- private:
- KALDI_DISALLOW_COPY_AND_ASSIGN(GaussPostHolder);
- T t_;
-};
-
-
-// Posterior is a typedef: vector<vector<pair<int32, BaseFloat> > >,
-// representing posteriors over (typically) transition-ids for an
-// utterance.
-typedef TableWriter<PosteriorHolder> PosteriorWriter;
-typedef SequentialTableReader<PosteriorHolder> SequentialPosteriorReader;
-typedef RandomAccessTableReader<PosteriorHolder> RandomAccessPosteriorReader;
-
-
-// typedef std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > > GaussPost;
-typedef TableWriter<GaussPostHolder> GaussPostWriter;
-typedef SequentialTableReader<GaussPostHolder> SequentialGaussPostReader;
-typedef RandomAccessTableReader<GaussPostHolder> RandomAccessGaussPostReader;
-
-
-/// Scales the BaseFloat (weight) element in the posterior entries.
-void ScalePosterior(BaseFloat scale, Posterior *post);
-
-/// Returns the total of all the weights in "post".
-BaseFloat TotalPosterior(const Posterior &post);
-
-/// Returns true if the two lists of pairs have no common .first element.
-bool PosteriorEntriesAreDisjoint(
- const std::vector<std::pair<int32, BaseFloat> > &post_elem1,
- const std::vector<std::pair<int32, BaseFloat> > &post_elem2);
-
-
-/// Merge two sets of posteriors, which must have the same length. If "merge"
-/// is true, it will make a common entry whenever there are duplicated entries,
-/// adding up the weights. If "drop_frames" is true, for frames where the
-/// two sets of posteriors were originally disjoint, makes no entries for that
-/// frame (relates to frame dropping, or drop_frames, see Vesely et al, ICASSP
-/// 2013). Returns the number of frames for which the two posteriors were
-/// disjoint (i.e. no common transition-ids or whatever index we are using).
-int32 MergePosteriors(const Posterior &post1,
- const Posterior &post2,
- bool merge,
- bool drop_frames,
- Posterior *post);
-
-/// Given a vector of log-likelihoods (typically of Gaussians in a GMM
-/// but could be of pdf-ids), a number gselect >= 1 and a minimum posterior
-/// 0 <= min_post < 1, it gets the posterior for each element of log-likes
-/// by applying Softmax(), then prunes the posteriors using "gselect" and
-/// "min_post" (keeping at least one), and outputs the result into
-/// "post_entry", sorted from greatest to least posterior.
-/// Returns the total log-likelihood (the output of calling ApplySoftMax()
-/// on a copy of log_likes).
-BaseFloat VectorToPosteriorEntry(
- const VectorBase<BaseFloat> &log_likes,
- int32 num_gselect,
- BaseFloat min_post,
- std::vector<std::pair<int32, BaseFloat> > *post_entry);
-
-/// Convert an alignment to a posterior (with a scale of 1.0 on
-/// each entry).
-void AlignmentToPosterior(const std::vector<int32> &ali,
- Posterior *post);
-
-/// Sorts posterior entries so that transition-ids with same pdf-id are next to
-/// each other.
-void SortPosteriorByPdfs(const TransitionModel &tmodel,
- Posterior *post);
-
-
-/// Converts a posterior over transition-ids to be a posterior
-/// over pdf-ids.
-void ConvertPosteriorToPdfs(const TransitionModel &tmodel,
- const Posterior &post_in,
- Posterior *post_out);
-
-/// Converts a posterior over transition-ids to be a posterior
-/// over phones.
-void ConvertPosteriorToPhones(const TransitionModel &tmodel,
- const Posterior &post_in,
- Posterior *post_out);
-
-/// Weight any silence phones in the posterior (i.e. any phones
-/// in the set "silence_set" by scale "silence_scale".
-/// The interface was changed in Feb 2014 to do the modification
-/// "in-place" rather than having separate input and output.
-void WeightSilencePost(const TransitionModel &trans_model,
- const ConstIntegerSet<int32> &silence_set,
- BaseFloat silence_scale,
- Posterior *post);
-
-/// This is similar to WeightSilencePost, except that on each frame it
-/// works out the amount by which the overall posterior would be reduced,
-/// and scales down everything on that frame by the same amount. It
-/// has the effect that frames that are mostly silence get down-weighted.
-/// The interface was changed in Feb 2014 to do the modification
-/// "in-place" rather than having separate input and output.
-void WeightSilencePostDistributed(const TransitionModel &trans_model,
- const ConstIntegerSet<int32> &silence_set,
- BaseFloat silence_scale,
- Posterior *post);
-
-/// @} end "addtogroup posterior_group"
-
-
-} // end namespace kaldi
-
-
-#endif
diff --git a/kaldi_io/src/kaldi/hmm/transition-model.h b/kaldi_io/src/kaldi/hmm/transition-model.h
deleted file mode 100644
index ccc4f11..0000000
--- a/kaldi_io/src/kaldi/hmm/transition-model.h
+++ /dev/null
@@ -1,345 +0,0 @@
-// hmm/transition-model.h
-
-// Copyright 2009-2012 Microsoft Corporation
-// Johns Hopkins University (author: Guoguo Chen)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_HMM_TRANSITION_MODEL_H_
-#define KALDI_HMM_TRANSITION_MODEL_H_
-
-#include "base/kaldi-common.h"
-#include "tree/context-dep.h"
-#include "util/const-integer-set.h"
-#include "fst/fst-decl.h" // forward declarations.
-#include "hmm/hmm-topology.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/// \addtogroup hmm_group
-/// @{
-
-// The class TransitionModel is a repository for the transition probabilities.
-// It also handles certain integer mappings.
-// The basic model is as follows. Each phone has a HMM topology defined in
-// hmm-topology.h. Each HMM-state of each of these phones has a number of
-// transitions (and final-probs) out of it. Each HMM-state defined in the
-// HmmTopology class has an associated "pdf_class". This gets replaced with
-// an actual pdf-id via the tree. The transition model associates the
-// transition probs with the (phone, HMM-state, pdf-id). We associate with
-// each such triple a transition-state. Each
-// transition-state has a number of associated probabilities to estimate;
-// this depends on the number of transitions/final-probs in the topology for
-// that (phone, HMM-state). Each probability has an associated transition-index.
-// We associate with each (transition-state, transition-index) a unique transition-id.
-// Each individual probability estimated by the transition-model is asociated with a
-// transition-id.
-//
-// List of the various types of quantity referred to here and what they mean:
-// phone: a phone index (1, 2, 3 ...)
-// HMM-state: a number (0, 1, 2...) that indexes TopologyEntry (see hmm-topology.h)
-// pdf-id: a number output by the Compute function of ContextDependency (it
-// indexes pdf's). Zero-based.
-// transition-state: the states for which we estimate transition probabilities for transitions
-// out of them. In some topologies, will map one-to-one with pdf-ids.
-// One-based, since it appears on FSTs.
-// transition-index: identifier of a transition (or final-prob) in the HMM. Indexes the
-// "transitions" vector in HmmTopology::HmmState. [if it is out of range,
-// equal to transitions.size(), it refers to the final-prob.]
-// Zero-based.
-// transition-id: identifier of a unique parameter of the TransitionModel.
-// Associated with a (transition-state, transition-index) pair.
-// One-based, since it appears on FSTs.
-//
-// List of the possible mappings TransitionModel can do:
-// (phone, HMM-state, pdf-id) -> transition-state
-// (transition-state, transition-index) -> transition-id
-// Reverse mappings:
-// transition-id -> transition-state
-// transition-id -> transition-index
-// transition-state -> phone
-// transition-state -> HMM-state
-// transition-state -> pdf-id
-//
-// The main things the TransitionModel object can do are:
-// Get initialized (need ContextDependency and HmmTopology objects).
-// Read/write.
-// Update [given a vector of counts indexed by transition-id].
-// Do the various integer mappings mentioned above.
-// Get the probability (or log-probability) associated with a particular transition-id.
-
-
-// Note: this was previously called TransitionUpdateConfig.
-struct MleTransitionUpdateConfig {
- BaseFloat floor;
- BaseFloat mincount;
- bool share_for_pdfs; // If true, share all transition parameters that have the same pdf.
- MleTransitionUpdateConfig(BaseFloat floor = 0.01,
- BaseFloat mincount = 5.0,
- bool share_for_pdfs = false):
- floor(floor), mincount(mincount), share_for_pdfs(share_for_pdfs) {}
-
- void Register (OptionsItf *po) {
- po->Register("transition-floor", &floor,
- "Floor for transition probabilities");
- po->Register("transition-min-count", &mincount,
- "Minimum count required to update transitions from a state");
- po->Register("share-for-pdfs", &share_for_pdfs,
- "If true, share all transition parameters where the states "
- "have the same pdf.");
- }
-};
-
-struct MapTransitionUpdateConfig {
- BaseFloat tau;
- bool share_for_pdfs; // If true, share all transition parameters that have the same pdf.
- MapTransitionUpdateConfig(): tau(5.0), share_for_pdfs(false) { }
-
- void Register (OptionsItf *po) {
- po->Register("transition-tau", &tau, "Tau value for MAP estimation of transition "
- "probabilities.");
- po->Register("share-for-pdfs", &share_for_pdfs,
- "If true, share all transition parameters where the states "
- "have the same pdf.");
- }
-};
-
-class TransitionModel {
-
- public:
- /// Initialize the object [e.g. at the start of training].
- /// The class keeps a copy of the HmmTopology object, but not
- /// the ContextDependency object.
- TransitionModel(const ContextDependency &ctx_dep,
- const HmmTopology &hmm_topo);
-
-
- /// Constructor that takes no arguments: typically used prior to calling Read.
- TransitionModel() { }
-
- void Read(std::istream &is, bool binary); // note, no symbol table: topo object always read/written w/o symbols.
- void Write(std::ostream &os, bool binary) const;
-
-
- /// return reference to HMM-topology object.
- const HmmTopology &GetTopo() const { return topo_; }
-
- /// \name Integer mapping functions
- /// @{
-
- int32 TripleToTransitionState(int32 phone, int32 hmm_state, int32 pdf) const;
- int32 PairToTransitionId(int32 trans_state, int32 trans_index) const;
- int32 TransitionIdToTransitionState(int32 trans_id) const;
- int32 TransitionIdToTransitionIndex(int32 trans_id) const;
- int32 TransitionStateToPhone(int32 trans_state) const;
- int32 TransitionStateToHmmState(int32 trans_state) const;
- int32 TransitionStateToPdf(int32 trans_state) const;
- int32 SelfLoopOf(int32 trans_state) const; // returns the self-loop transition-id, or zero if
- // this state doesn't have a self-loop.
-
- inline int32 TransitionIdToPdf(int32 trans_id) const;
- int32 TransitionIdToPhone(int32 trans_id) const;
- int32 TransitionIdToPdfClass(int32 trans_id) const;
- int32 TransitionIdToHmmState(int32 trans_id) const;
-
- /// @}
-
- bool IsFinal(int32 trans_id) const; // returns true if this trans_id goes to the final state
- // (which is bound to be nonemitting).
- bool IsSelfLoop(int32 trans_id) const; // return true if this trans_id corresponds to a self-loop.
-
- /// Returns the total number of transition-ids (note, these are one-based).
- inline int32 NumTransitionIds() const { return id2state_.size()-1; }
-
- /// Returns the number of transition-indices for a particular transition-state.
- /// Note: "Indices" is the plural of "index". Index is not the same as "id",
- /// here. A transition-index is a zero-based offset into the transitions
- /// out of a particular transition state.
- int32 NumTransitionIndices(int32 trans_state) const;
-
- /// Returns the total number of transition-states (note, these are one-based).
- int32 NumTransitionStates() const { return triples_.size(); }
-
- // NumPdfs() actually returns the highest-numbered pdf we ever saw, plus one.
- // In normal cases this should equal the number of pdfs in the system, but if you
- // initialized this object with fewer than all the phones, and it happens that
- // an unseen phone has the highest-numbered pdf, this might be different.
- int32 NumPdfs() const { return num_pdfs_; }
-
- // This loops over the triples and finds the highest phone index present. If
- // the FST symbol table for the phones is created in the expected way, i.e.:
- // starting from 1 (<eps> is 0) and numbered contiguously till the last phone,
- // this will be the total number of phones.
- int32 NumPhones() const;
-
- /// Returns a sorted, unique list of phones.
- const std::vector<int32> &GetPhones() const { return topo_.GetPhones(); }
-
- // Transition-parameter-getting functions:
- BaseFloat GetTransitionProb(int32 trans_id) const;
- BaseFloat GetTransitionLogProb(int32 trans_id) const;
-
- // The following functions are more specialized functions for getting
- // transition probabilities, that are provided for convenience.
-
- /// Returns the log-probability of a particular non-self-loop transition
- /// after subtracting the probability mass of the self-loop and renormalizing;
- /// will crash if called on a self-loop. Specifically:
- /// for non-self-loops it returns the log of that prob divided by (1 minus
- /// self-loop-prob-for-that-state).
- BaseFloat GetTransitionLogProbIgnoringSelfLoops(int32 trans_id) const;
-
- /// Returns the log-prob of the non-self-loop probability
- /// mass for this transition state. (you can get the self-loop prob, if a self-loop
- /// exists, by calling GetTransitionLogProb(SelfLoopOf(trans_state)).
- BaseFloat GetNonSelfLoopLogProb(int32 trans_state) const;
-
- /// Does Maximum Likelihood estimation. The stats are counts/weights, indexed
- /// by transition-id. This was previously called Update().
- void MleUpdate(const Vector<double> &stats,
- const MleTransitionUpdateConfig &cfg,
- BaseFloat *objf_impr_out,
- BaseFloat *count_out);
-
- /// Does Maximum A Posteriori (MAP) estimation. The stats are counts/weights,
- /// indexed by transition-id.
- void MapUpdate(const Vector<double> &stats,
- const MapTransitionUpdateConfig &cfg,
- BaseFloat *objf_impr_out,
- BaseFloat *count_out);
-
- /// Print will print the transition model in a human-readable way, for purposes of human
- /// inspection. The "occs" are optional (they are indexed by pdf-id).
- void Print(std::ostream &os,
- const std::vector<std::string> &phone_names,
- const Vector<double> *occs = NULL);
-
-
- void InitStats(Vector<double> *stats) const { stats->Resize(NumTransitionIds()+1); }
-
- void Accumulate(BaseFloat prob, int32 trans_id, Vector<double> *stats) const {
- KALDI_ASSERT(trans_id <= NumTransitionIds());
- (*stats)(trans_id) += prob;
- // This is trivial and doesn't require class members, but leaves us more open
- // to design changes than doing it manually.
- }
-
- /// returns true if all the integer class members are identical (but does not
- /// compare the transition probabilities.
- bool Compatible(const TransitionModel &other) const;
-
- private:
- void MleUpdateShared(const Vector<double> &stats,
- const MleTransitionUpdateConfig &cfg,
- BaseFloat *objf_impr_out, BaseFloat *count_out);
- void MapUpdateShared(const Vector<double> &stats,
- const MapTransitionUpdateConfig &cfg,
- BaseFloat *objf_impr_out, BaseFloat *count_out);
- void ComputeTriples(const ContextDependency &ctx_dep); // called from constructor. initializes triples_.
- void ComputeDerived(); // called from constructor and Read function: computes state2id_ and id2state_.
- void ComputeDerivedOfProbs(); // computes quantities derived from log-probs (currently just
- // non_self_loop_log_probs_; called whenever log-probs change.
- void InitializeProbs(); // called from constructor.
- void Check() const;
-
- struct Triple {
- int32 phone;
- int32 hmm_state;
- int32 pdf;
- Triple() { }
- Triple(int32 phone, int32 hmm_state, int32 pdf):
- phone(phone), hmm_state(hmm_state), pdf(pdf) { }
- bool operator < (const Triple &other) const {
- if (phone < other.phone) return true;
- else if (phone > other.phone) return false;
- else if (hmm_state < other.hmm_state) return true;
- else if (hmm_state > other.hmm_state) return false;
- else return pdf < other.pdf;
- }
- bool operator == (const Triple &other) const {
- return (phone == other.phone && hmm_state == other.hmm_state
- && pdf == other.pdf);
- }
- };
-
- HmmTopology topo_;
-
- /// Triples indexed by transition state minus one;
- /// the triples are in sorted order which allows us to do the reverse mapping from
- /// triple to transition state
- std::vector<Triple> triples_;
-
- /// Gives the first transition_id of each transition-state; indexed by
- /// the transition-state. Array indexed 1..num-transition-states+1 (the last one
- /// is needed so we can know the num-transitions of the last transition-state.
- std::vector<int32> state2id_;
-
- /// For each transition-id, the corresponding transition
- /// state (indexed by transition-id).
- std::vector<int32> id2state_;
-
- /// For each transition-id, the corresponding log-prob. Indexed by transition-id.
- Vector<BaseFloat> log_probs_;
-
- /// For each transition-state, the log of (1 - self-loop-prob). Indexed by
- /// transition-state.
- Vector<BaseFloat> non_self_loop_log_probs_;
-
- /// This is actually one plus the highest-numbered pdf we ever got back from the
- /// tree (but the tree numbers pdfs contiguously from zero so this is the number
- /// of pdfs).
- int32 num_pdfs_;
-
-
- DISALLOW_COPY_AND_ASSIGN(TransitionModel);
-
-};
-
-inline int32 TransitionModel::TransitionIdToPdf(int32 trans_id) const {
- // If a lot of time is spent here we may create an extra array
- // to handle this.
- KALDI_ASSERT(static_cast<size_t>(trans_id) < id2state_.size() &&
- "Likely graph/model mismatch (graph built from wrong model?)");
- int32 trans_state = id2state_[trans_id];
- return triples_[trans_state-1].pdf;
-}
-
-/// Works out which pdfs might correspond to the given phones. Will return true
-/// if these pdfs correspond *just* to these phones, false if these pdfs are also
-/// used by other phones.
-/// @param trans_model [in] Transition-model used to work out this information
-/// @param phones [in] A sorted, uniq vector that represents a set of phones
-/// @param pdfs [out] Will be set to a sorted, uniq list of pdf-ids that correspond
-/// to one of this set of phones.
-/// @return Returns true if all of the pdfs output to "pdfs" correspond to phones from
-/// just this set (false if they may be shared with phones outside this set).
-bool GetPdfsForPhones(const TransitionModel &trans_model,
- const std::vector<int32> &phones,
- std::vector<int32> *pdfs);
-
-/// Works out which phones might correspond to the given pdfs. Similar to the
-/// above GetPdfsForPhones(, ,)
-bool GetPhonesForPdfs(const TransitionModel &trans_model,
- const std::vector<int32> &pdfs,
- std::vector<int32> *phones);
-/// @}
-
-
-} // end namespace kaldi
-
-
-#endif
diff --git a/kaldi_io/src/kaldi/hmm/tree-accu.h b/kaldi_io/src/kaldi/hmm/tree-accu.h
deleted file mode 100644
index d571762..0000000
--- a/kaldi_io/src/kaldi/hmm/tree-accu.h
+++ /dev/null
@@ -1,69 +0,0 @@
-// hmm/tree-accu.h
-
-// Copyright 2009-2011 Microsoft Corporation
-// 2013 Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-// http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_HMM_TREE_ACCU_H_
-#define KALDI_HMM_TREE_ACCU_H_
-
-#include <cctype> // For isspace.
-#include <limits>
-#include "base/kaldi-common.h"
-#include "hmm/transition-model.h"
-#include "tree/clusterable-classes.h"
-#include "tree/build-tree-questions.h" // needed for this typedef:
-// typedef std::vector<std::pair<EventVector, Clusterable*> > BuildTreeStatsType;
-
-namespace kaldi {
-
-/// \ingroup tree_group_top
-/// @{
-
-
-/// Accumulates the stats needed for training context-dependency trees (in the
-/// "normal" way). It adds to 'stats' the stats obtained from this file. Any
-/// new GaussClusterable* pointers in "stats" will be allocated with "new".
-
-void AccumulateTreeStats(const TransitionModel &trans_model,
- BaseFloat var_floor,
- int N, // context window size.
- int P, // central position.
- const std::vector<int32> &ci_phones, // sorted
- const std::vector<int32> &alignment,
- const Matrix<BaseFloat> &features,
- const std::vector<int32> *phone_map, // or NULL
- std::map<EventType, GaussClusterable*> *stats);
-
-
-
-/*** Read a mapping from one phone set to another. The phone map file has lines
- of the form <old-phone> <new-phone>, where both entries are integers, usually
- nonzero (but this is not enforced). This program will crash if the input is
- invalid, e.g. there are multiple inconsistent entries for the same old phone.
- The output vector "phone_map" will be indexed by old-phone and will contain
- the corresponding new-phone, or -1 for any entry that was not defined. */
-
-void ReadPhoneMap(std::string phone_map_rxfilename,
- std::vector<int32> *phone_map);
-
-
-
-/// @}
-
-} // end namespace kaldi.
-
-#endif