5 files changed, 1095 insertions, 0 deletions
diff --git a/kaldi_io/src/kaldi/hmm/hmm-topology.h b/kaldi_io/src/kaldi/hmm/hmm-topology.h
new file mode 100644
index 0000000..53ca427
--- /dev/null
+++ b/kaldi_io/src/kaldi/hmm/hmm-topology.h
@@ -0,0 +1,172 @@
+// hmm/hmm-topology.h
+
+// Copyright 2009-2011  Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_HMM_HMM_TOPOLOGY_H_
+#define KALDI_HMM_HMM_TOPOLOGY_H_
+
+#include "base/kaldi-common.h"
+#include "tree/context-dep.h"
+#include "util/const-integer-set.h"
+
+
+namespace kaldi {
+
+
+/// \addtogroup hmm_group
+/// @{
+
+/*
+ // The following would be the text form for the "normal" HMM topology.
+ // Note that the first state is the start state, and the final state,
+ // which must have no output transitions and must be nonemitting, has
+ // an exit probability of one (no other state can have nonzero exit
+ // probability; you can treat the transition probability to the final
+ // state as an exit probability).
+ // Note also that it's valid to omit the "<PdfClass>" entry of the <State>, which
+ // will mean we won't have a pdf on that state [non-emitting state].  This is equivalent
+ // to setting the <PdfClass> to -1.  We do this normally just for the final state.
+ // The Topology object can have multiple <TopologyEntry> blocks.
+ // This is useful if there are multiple types of topology in the system.
+
+ <Topology>
+ <TopologyEntry>
+ <ForPhones> 1 2 3 4 5 6 7 8 </ForPhones>
+ <State> 0 <PdfClass> 0
+ <Transition> 0 0.5
+ <Transition> 1 0.5
+ </State>
+ <State> 1 <PdfClass> 1
+ <Transition> 1 0.5
+ <Transition> 2 0.5
+ </State>
+ <State> 2 <PdfClass> 2
+ <Transition> 2 0.5
+ <Transition> 3 0.5
+ <Final> 0.5
+ </State>
+ <State> 3
+ </State> 
+ </TopologyEntry>
+ </Topology>
+*/
+
+// kNoPdf is used where pdf_class or pdf would be used, to indicate,
+// none is there.  Mainly useful in skippable models, but also used
+// for end states.
+// A caveat with nonemitting states is that their out-transitions
+// are not trainable, due to technical issues with the way
+// we decided to accumulate the stats.  Any transitions arising from (*)
+// HMM states with "kNoPdf" as the label are second-class transitions,
+// They do not have "transition-states" or "transition-ids" associated
+// with them.  They are used to create the FST version of the
+// HMMs, where they lead to epsilon arcs.
+// (*) "arising from" is a bit of a technical term here, due to the way
+// (if reorder == true), we put the transition-id associated with the
+// outward arcs of the state, on the input transition to the state.
+
+/// A constant used in the HmmTopology class as the \ref pdf_class "pdf-class"
+/// kNoPdf, which is used when a HMM-state is nonemitting (has no associated
+/// PDF).
+
+static const int32 kNoPdf = -1;
+
+/// A class for storing topology information for phones.  See  \ref hmm for context.
+/// This object is sometimes accessed in a file by itself, but more often
+/// as a class member of the Transition class (this is for convenience to reduce
+/// the number of files programs have to access).
+
+class HmmTopology {
+ public:
+  /// A structure defined inside HmmTopology to represent a HMM state.
+  struct HmmState {
+    /// The \ref pdf_class pdf-class, typically 0, 1 or 2 (the same as the HMM-state index),
+    /// but may be different to enable us to hardwire sharing of state, and may be
+    /// equal to \ref kNoPdf == -1 in order to specify nonemitting states (unusual).
+    int32 pdf_class;
+
+    /// A list of transitions.  The first member of each pair is the index of
+    /// the next HmmState, and the second is the default transition probability
+    /// (before training).
+    std::vector<std::pair<int32, BaseFloat> > transitions;
+
+    explicit HmmState(int32 p): pdf_class(p) { }
+
+    bool operator == (const HmmState &other) const {
+      return (pdf_class == other.pdf_class && transitions == other.transitions);
+    }
+    
+    HmmState(): pdf_class(-1) { }
+  };
+
+  /// TopologyEntry is a typedef that represents the topology of
+  /// a single (prototype) state.
+  typedef std::vector<HmmState> TopologyEntry;
+
+  void Read(std::istream &is, bool binary);
+  void Write(std::ostream &os, bool binary) const;
+
+  // Checks that the object is valid, and throw exception otherwise.
+  void Check();
+
+
+  /// Returns the topology entry (i.e. vector of HmmState) for this phone;
+  /// will throw exception if phone not covered by the topology.
+  const TopologyEntry &TopologyForPhone(int32 phone) const;
+
+  /// Returns the number of \ref pdf_class "pdf-classes" for this phone;
+  /// throws exception if phone not covered by this topology.
+  int32 NumPdfClasses(int32 phone) const;
+
+  /// Returns a reference to a sorted, unique list of phones covered by
+  /// the topology (these phones will be positive integers, and usually
+  /// contiguous and starting from one but the toolkit doesn't assume
+  /// they are contiguous).
+  const std::vector<int32> &GetPhones() const { return phones_; };
+
+  /// Outputs a vector of int32, indexed by phone, that gives the
+  /// number of \ref pdf_class pdf-classes for the phones; this is
+  /// used by tree-building code such as BuildTree().
+  void GetPhoneToNumPdfClasses(std::vector<int32> *phone2num_pdf_classes) const;
+
+  HmmTopology() {}
+
+  bool operator == (const HmmTopology &other) const {
+    return phones_ == other.phones_ && phone2idx_ == other.phone2idx_
+        && entries_ == other.entries_;
+  }
+  // Allow default assignment operator and copy constructor.
+ private:
+  std::vector<int32> phones_;  // list of all phones we have topology for.  Sorted, uniq.  no epsilon (zero) phone.
+  std::vector<int32> phone2idx_;  // map from phones to indexes into the entries vector (or -1 for not present).
+  std::vector<TopologyEntry> entries_;
+};
+
+
+/// This function returns a HmmTopology object giving a normal 3-state topology,
+/// covering all phones in the list "phones".  This is mainly of use in testing
+/// code.
+HmmTopology GetDefaultTopology(const std::vector<int32> &phones);
+
+/// @} end "addtogroup hmm_group"
+
+
+} // end namespace kaldi
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/hmm/hmm-utils.h b/kaldi_io/src/kaldi/hmm/hmm-utils.h
new file mode 100644
index 0000000..240f706
--- /dev/null
+++ b/kaldi_io/src/kaldi/hmm/hmm-utils.h
@@ -0,0 +1,295 @@
+// hmm/hmm-utils.h
+
+// Copyright 2009-2011  Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_HMM_HMM_UTILS_H_
+#define KALDI_HMM_HMM_UTILS_H_
+
+#include "hmm/hmm-topology.h"
+#include "hmm/transition-model.h"
+#include "lat/kaldi-lattice.h"
+
+namespace kaldi {
+
+
+/// \defgroup hmm_group_graph Classes and functions for creating FSTs from HMMs
+/// \ingroup hmm_group
+/// @{
+
+/// Configuration class for the GetHTransducer() function; see
+/// \ref hmm_graph_config for context.
+struct HTransducerConfig {
+  /// Transition log-prob scale, see \ref hmm_scale.
+  /// Note this doesn't apply to self-loops; GetHTransducer() does
+  /// not include self-loops.
+  BaseFloat transition_scale;
+
+  /// if true, we are constructing time-reversed FST: phone-seqs in ilabel_info
+  /// are backwards, and we want to output a backwards version of the HMM
+  /// corresponding to each phone.  If reverse == true,
+  bool reverse;
+
+  /// This variable is only looked at if reverse == true.  If reverse == true
+  /// and push_weights == true, then we push the weights in the reversed FSTs we create for each
+  /// phone HMM.  This is only safe if the HMMs are probabilistic (i.e. not discriminatively
+  bool push_weights;
+
+  /// delta used if we do push_weights [only relevant if reverse == true
+  /// and push_weights == true].
+  BaseFloat push_delta;
+
+  HTransducerConfig():
+      transition_scale(1.0),
+      reverse(false),
+      push_weights(true),
+      push_delta(0.001)
+  { }
+
+  // Note-- this Register registers the easy-to-register options
+  // but not the "sym_type" which is an enum and should be handled
+  // separately in main().
+  void Register (OptionsItf *po) {
+    po->Register("transition-scale", &transition_scale,
+                 "Scale of transition probs (relative to LM)");
+    po->Register("reverse", &reverse,
+                 "Set true to build time-reversed FST.");
+    po->Register("push-weights", &push_weights,
+                 "Push weights (only applicable if reverse == true)");
+    po->Register("push-delta", &push_delta,
+                 "Delta used in pushing weights (only applicable if "
+                 "reverse && push-weights");
+  }
+};
+
+
+struct HmmCacheHash {
+  int operator () (const std::pair<int32, std::vector<int32> >&p) const {
+    VectorHasher<int32> v;
+    int32 prime = 103049;
+    return prime*p.first + v(p.second);
+  }
+};
+
+/// HmmCacheType is a map from (central-phone, sequence of pdf-ids) to FST, used
+/// as cache in GetHmmAsFst, as an optimization.
+typedef unordered_map<std::pair<int32, std::vector<int32> >,
+                      fst::VectorFst<fst::StdArc>*,
+                      HmmCacheHash> HmmCacheType;
+
+
+/// Called by GetHTransducer() and probably will not need to be called directly;
+/// it creates the FST corresponding to the phone.  Does not include self-loops;
+/// you have to call AddSelfLoops() for that.  Result owned by caller.
+/// Returns an acceptor (i.e. ilabels, olabels identical) with transition-ids
+/// as the symbols.
+/// For documentation in context, see \ref hmm_graph_get_hmm_as_fst
+///   @param context_window  A vector representing the phonetic context; see
+///            \ref tree_window "here" for explanation.
+///   @param ctx_dep The object that contains the phonetic decision-tree
+///   @param trans_model The transition-model object, which provides
+///         the mappings to transition-ids and also the transition
+///         probabilities.
+///   @param config Configuration object, see \ref HTransducerConfig.
+///   @param cache Object used as a lookaside buffer to save computation;
+///       if it finds that the object it needs is already there, it will
+///       just return a pointer value from "cache"-- not that this means
+///       you have to be careful not to delete things twice.
+
+fst::VectorFst<fst::StdArc> *GetHmmAsFst(
+    std::vector<int32> context_window,
+    const ContextDependencyInterface &ctx_dep,
+    const TransitionModel &trans_model,
+    const HTransducerConfig &config,
+    HmmCacheType *cache = NULL);
+
+/// Included mainly as a form of documentation, not used in any other code
+/// currently.  Creates the FST with self-loops, and with fewer options.
+fst::VectorFst<fst::StdArc>*
+GetHmmAsFstSimple(std::vector<int32> context_window,
+                  const ContextDependencyInterface &ctx_dep,
+                  const TransitionModel &trans_model,
+                  BaseFloat prob_scale);
+
+
+/**
+  * Returns the H tranducer; result owned by caller.
+  * See \ref hmm_graph_get_h_transducer.  The H transducer has on the
+  * input transition-ids, and also possibly some disambiguation symbols, which
+  * will be put in disambig_syms.  The output side contains the identifiers that
+  * are indexes into "ilabel_info" (these represent phones-in-context or
+  * disambiguation symbols).  The ilabel_info vector allows GetHTransducer to map
+  * from symbols to phones-in-context (i.e. phonetic context windows).  Any
+  * singleton symbols in the ilabel_info vector which are not phones, will be
+  * treated as disambiguation symbols.  [Not all recipes use these].  The output
+  * "disambig_syms_left" will be set to a list of the disambiguation symbols on
+  * the input of the transducer (i.e. same symbol type as whatever is on the
+  * input of the transducer
+  */
+fst::VectorFst<fst::StdArc>*
+GetHTransducer (const std::vector<std::vector<int32> > &ilabel_info,
+                const ContextDependencyInterface &ctx_dep,
+                const TransitionModel &trans_model,
+                const HTransducerConfig &config,
+                std::vector<int32> *disambig_syms_left);
+
+/**
+  * GetIlabelMapping produces a mapping that's similar to HTK's logical-to-physical
+  * model mapping (i.e. the xwrd.clustered.mlist files).   It groups together
+  * "logical HMMs" (i.e. in our world, phonetic context windows) that share the
+  * same sequence of transition-ids.   This can be used in an
+  * optional graph-creation step that produces a remapped form of CLG that can be
+  * more productively determinized and minimized.  This is used in the command-line program
+  * make-ilabel-transducer.cc.
+  * @param ilabel_info_old [in] The original \ref tree_ilabel "ilabel_info" vector
+  * @param ctx_dep [in] The tree
+  * @param trans_model [in] The transition-model object
+  * @param old2new_map [out] The output; this vector, which is of size equal to the
+  *       number of new labels, is a mapping to the old labels such that we could
+  *       create a vector ilabel_info_new such that
+  *       ilabel_info_new[i] == ilabel_info_old[old2new_map[i]]
+  */
+void GetIlabelMapping (const std::vector<std::vector<int32> > &ilabel_info_old,
+                       const ContextDependencyInterface &ctx_dep,
+                       const TransitionModel &trans_model,
+                       std::vector<int32> *old2new_map);
+
+
+
+/**
+  * For context, see \ref hmm_graph_add_self_loops.  Expands an FST that has been
+  * built without self-loops, and adds the self-loops (it also needs to modify
+  * the probability of the non-self-loop ones, as the graph without self-loops
+  * was created in such a way that it was stochastic).  Note that the
+  * disambig_syms will be empty in some recipes (e.g.  if you already removed
+  * the disambiguation symbols).
+  * @param trans_model [in] Transition model
+  * @param disambig_syms [in] Sorted, uniq list of disambiguation symbols, required
+  *       if the graph contains disambiguation symbols but only needed for sanity checks.
+  * @param self_loop_scale [in] Transition-probability scale for self-loops; c.f.
+  *                    \ref hmm_scale
+  * @param reorder [in] If true, reorders the transitions (see \ref hmm_reorder).
+  * @param  fst [in, out] The FST to be modified.
+  */
+void AddSelfLoops(const TransitionModel &trans_model,
+                  const std::vector<int32> &disambig_syms,  // used as a check only.
+                  BaseFloat self_loop_scale,
+                  bool reorder,  // true->dan-style, false->lukas-style.
+                  fst::VectorFst<fst::StdArc> *fst);
+
+/**
+  * Adds transition-probs, with the supplied
+  * scales (see \ref hmm_scale), to the graph.
+  * Useful if you want to create a graph without transition probs, then possibly
+  * train the model (including the transition probs) but keep the graph fixed,
+  * and add back in the transition probs.  It assumes the fst has transition-ids
+  * on it.  It is not an error if the FST has no states (nothing will be done).
+  * @param trans_model [in] The transition model
+  * @param disambig_syms [in] A list of disambiguation symbols, required if the
+  *                       graph has disambiguation symbols on its input but only
+  *                       used for checks.
+  * @param transition_scale [in] A scale on transition-probabilities apart from
+  *                      those involving self-loops; see \ref hmm_scale.
+  * @param self_loop_scale [in] A scale on self-loop transition probabilities;
+  *                      see \ref hmm_scale.
+  * @param  fst [in, out] The FST to be modified.
+  */
+void AddTransitionProbs(const TransitionModel &trans_model,
+                        const std::vector<int32> &disambig_syms,
+                        BaseFloat transition_scale,
+                        BaseFloat self_loop_scale,
+                        fst::VectorFst<fst::StdArc> *fst);
+
+/**
+   This is as AddSelfLoops(), but operates on a Lattice, where
+   it affects the graph part of the weight (the first element
+   of the pair). */
+void AddTransitionProbs(const TransitionModel &trans_model,
+                        BaseFloat transition_scale,
+                        BaseFloat self_loop_scale,
+                        Lattice *lat);
+
+
+/// Returns a transducer from pdfs plus one (input) to  transition-ids (output).
+/// Currenly of use only for testing.
+fst::VectorFst<fst::StdArc>*
+GetPdfToTransitionIdTransducer(const TransitionModel &trans_model);
+
+/// Converts all transition-ids in the FST to pdfs plus one.
+/// Placeholder: not implemented yet!
+void ConvertTransitionIdsToPdfs(const TransitionModel &trans_model,
+                                const std::vector<int32> &disambig_syms,
+                                fst::VectorFst<fst::StdArc> *fst);
+
+/// @} end "defgroup hmm_group_graph"
+
+/// \addtogroup hmm_group
+/// @{
+
+/// SplitToPhones splits up the TransitionIds in "alignment" into their
+/// individual phones (one vector per instance of a phone).  At output,
+/// the sum of the sizes of the vectors in split_alignment will be the same
+/// as the corresponding sum for "alignment".  The function returns
+/// true on success.  If the alignment appears to be incomplete, e.g.
+/// not ending at the end-state of a phone, it will still break it up into
+/// phones but it will return false.  For more serious errors it will
+/// die or throw an exception.
+/// This function works out by itself whether the graph was created
+/// with "reordering" (dan-style graph), and just does the right thing.
+
+bool SplitToPhones(const TransitionModel &trans_model,
+                   const std::vector<int32> &alignment,
+                   std::vector<std::vector<int32> > *split_alignment);
+
+/// ConvertAlignment converts an alignment that was created using one
+/// model, to another model.  They must use a compatible topology (so we
+/// know the state alignments of the new model).
+/// It returns false if it could not be split to phones (probably
+/// because the alignment was partial), but for other kinds of
+/// error that are more likely a coding error, it will throw
+/// an exception.
+bool ConvertAlignment(const TransitionModel &old_trans_model,
+                      const TransitionModel &new_trans_model,
+                      const ContextDependencyInterface &new_ctx_dep,
+                      const std::vector<int32> &old_alignment,
+                      const std::vector<int32> *phone_map,  // may be NULL
+                      std::vector<int32> *new_alignment);
+
+// ConvertPhnxToProns is only needed in bin/phones-to-prons.cc and
+// isn't closely related with HMMs, but we put it here as there isn't
+// any other obvious place for it and it needs to be tested.
+// This function takes a phone-sequence with word-start and word-end
+// markers in it, and a word-sequence, and outputs the pronunciations
+// "prons"... the format of "prons" is, each element is a vector,
+// where the first element is the word (or zero meaning no word, e.g.
+// for optional silence introduced by the lexicon), and the remaining
+// elements are the phones in the word's pronunciation.
+// It returns false if it encounters a problem of some kind, e.g.
+// if the phone-sequence doesn't seem to have the right number of
+// words in it.
+bool ConvertPhnxToProns(const std::vector<int32> &phnx,
+                        const std::vector<int32> &words,
+                        int32 word_start_sym,
+                        int32 word_end_sym,
+                        std::vector<std::vector<int32> > *prons);
+
+/// @} end "addtogroup hmm_group"
+
+} // end namespace kaldi
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/hmm/posterior.h b/kaldi_io/src/kaldi/hmm/posterior.h
new file mode 100644
index 0000000..be73be9
--- /dev/null
+++ b/kaldi_io/src/kaldi/hmm/posterior.h
@@ -0,0 +1,214 @@
+// hmm/posterior.h
+
+// Copyright 2009-2011     Microsoft Corporation
+//           2013-2014     Johns Hopkins University (author: Daniel Povey)
+//                2014     Guoguo Chen
+
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_HMM_POSTERIOR_H_
+#define KALDI_HMM_POSTERIOR_H_
+
+#include "base/kaldi-common.h"
+#include "tree/context-dep.h"
+#include "util/const-integer-set.h"
+#include "util/kaldi-table.h"
+#include "hmm/transition-model.h"
+
+
+namespace kaldi {
+
+
+/// \addtogroup posterior_group
+/// @{
+
+/// Posterior is a typedef for storing acoustic-state (actually, transition-id)
+/// posteriors over an utterance.  The "int32" is a transition-id, and the BaseFloat
+/// is a probability (typically between zero and one).
+typedef std::vector<std::vector<std::pair<int32, BaseFloat> > > Posterior;
+
+/// GaussPost is a typedef for storing Gaussian-level posteriors for an utterance.
+/// the "int32" is a transition-id, and the Vector<BaseFloat> is a vector of
+/// Gaussian posteriors.
+/// WARNING: We changed "int32" from transition-id to pdf-id, and the change is
+/// applied for all programs using GaussPost. This is for efficiency purpose. We
+/// also changed the name slightly from GauPost to GaussPost to reduce the
+/// chance that the change will go un-noticed in downstream code.
+typedef std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > > GaussPost;
+
+
+// PosteriorHolder is a holder for Posterior, which is
+// std::vector<std::vector<std::pair<int32, BaseFloat> > >
+// This is used for storing posteriors of transition id's for an
+// utterance.
+class PosteriorHolder {
+ public:
+  typedef Posterior T;
+
+  PosteriorHolder() { }
+
+  static bool Write(std::ostream &os, bool binary, const T &t);
+  
+  void Clear() { Posterior tmp; std::swap(tmp, t_); }
+
+  // Reads into the holder.
+  bool Read(std::istream &is);
+  
+  // Kaldi objects always have the stream open in binary mode for
+  // reading.
+  static bool IsReadInBinary() { return true; }
+
+  const T &Value() const { return t_; }
+  
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(PosteriorHolder);
+  T t_;
+};
+
+
+// GaussPostHolder is a holder for GaussPost, which is
+// std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > >
+// This is used for storing posteriors of transition id's for an
+// utterance.
+class GaussPostHolder {
+ public:
+  typedef GaussPost T;
+
+  GaussPostHolder() { }
+
+  static bool Write(std::ostream &os, bool binary, const T &t);  
+
+  void Clear() {  GaussPost tmp;  std::swap(tmp, t_); }
+
+  // Reads into the holder.
+  bool Read(std::istream &is);
+  
+  // Kaldi objects always have the stream open in binary mode for
+  // reading.
+  static bool IsReadInBinary() { return true; }
+
+  const T &Value() const { return t_; }
+  
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(GaussPostHolder);
+  T t_;
+};
+
+
+// Posterior is a typedef: vector<vector<pair<int32, BaseFloat> > >,
+// representing posteriors over (typically) transition-ids for an
+// utterance.
+typedef TableWriter<PosteriorHolder> PosteriorWriter;
+typedef SequentialTableReader<PosteriorHolder> SequentialPosteriorReader;
+typedef RandomAccessTableReader<PosteriorHolder> RandomAccessPosteriorReader;
+
+
+// typedef std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > > GaussPost;
+typedef TableWriter<GaussPostHolder> GaussPostWriter;
+typedef SequentialTableReader<GaussPostHolder> SequentialGaussPostReader;
+typedef RandomAccessTableReader<GaussPostHolder> RandomAccessGaussPostReader;
+
+
+/// Scales the BaseFloat (weight) element in the posterior entries.
+void ScalePosterior(BaseFloat scale, Posterior *post);
+
+/// Returns the total of all the weights in "post".
+BaseFloat TotalPosterior(const Posterior &post);
+
+/// Returns true if the two lists of pairs have no common .first element.
+bool PosteriorEntriesAreDisjoint(
+    const std::vector<std::pair<int32, BaseFloat> > &post_elem1,
+    const std::vector<std::pair<int32, BaseFloat> > &post_elem2);
+
+
+/// Merge two sets of posteriors, which must have the same length.  If "merge"
+/// is true, it will make a common entry whenever there are duplicated entries,
+/// adding up the weights.  If "drop_frames" is true, for frames where the
+/// two sets of posteriors were originally disjoint, makes no entries for that
+/// frame (relates to frame dropping, or drop_frames, see Vesely et al, ICASSP
+/// 2013).  Returns the number of frames for which the two posteriors were
+/// disjoint (i.e. no common transition-ids or whatever index we are using).
+int32 MergePosteriors(const Posterior &post1,
+                      const Posterior &post2,
+                      bool merge,
+                      bool drop_frames,
+                      Posterior *post);
+
+/// Given a vector of log-likelihoods (typically of Gaussians in a GMM
+/// but could be of pdf-ids), a number gselect >= 1 and a minimum posterior
+/// 0 <= min_post < 1, it gets the posterior for each element of log-likes
+/// by applying Softmax(), then prunes the posteriors using "gselect" and
+/// "min_post" (keeping at least one), and outputs the result into
+/// "post_entry", sorted from greatest to least posterior.
+/// Returns the total log-likelihood (the output of calling ApplySoftMax()
+/// on a copy of log_likes).
+BaseFloat VectorToPosteriorEntry(
+    const VectorBase<BaseFloat> &log_likes,
+    int32 num_gselect,
+    BaseFloat min_post,
+    std::vector<std::pair<int32, BaseFloat> > *post_entry);
+
+/// Convert an alignment to a posterior (with a scale of 1.0 on
+/// each entry).
+void AlignmentToPosterior(const std::vector<int32> &ali,
+                          Posterior *post);
+
+/// Sorts posterior entries so that transition-ids with same pdf-id are next to
+/// each other.
+void SortPosteriorByPdfs(const TransitionModel &tmodel,
+                         Posterior *post);
+
+
+/// Converts a posterior over transition-ids to be a posterior
+/// over pdf-ids.
+void ConvertPosteriorToPdfs(const TransitionModel &tmodel,
+                            const Posterior &post_in,
+                            Posterior *post_out);
+
+/// Converts a posterior over transition-ids to be a posterior
+/// over phones.
+void ConvertPosteriorToPhones(const TransitionModel &tmodel,
+                              const Posterior &post_in,
+                              Posterior *post_out);
+
+/// Weight any silence phones in the posterior (i.e. any phones
+/// in the set "silence_set" by scale "silence_scale".
+/// The interface was changed in Feb 2014 to do the modification
+/// "in-place" rather than having separate input and output.
+void WeightSilencePost(const TransitionModel &trans_model,
+                       const ConstIntegerSet<int32> &silence_set,
+                       BaseFloat silence_scale,
+                       Posterior *post);
+
+/// This is similar to WeightSilencePost, except that on each frame it
+/// works out the amount by which the overall posterior would be reduced,
+/// and scales down everything on that frame by the same amount.  It
+/// has the effect that frames that are mostly silence get down-weighted.
+/// The interface was changed in Feb 2014 to do the modification
+/// "in-place" rather than having separate input and output.
+void WeightSilencePostDistributed(const TransitionModel &trans_model,
+                                  const ConstIntegerSet<int32> &silence_set,
+                                  BaseFloat silence_scale,
+                                  Posterior *post);
+
+/// @} end "addtogroup posterior_group"
+
+
+} // end namespace kaldi
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/hmm/transition-model.h b/kaldi_io/src/kaldi/hmm/transition-model.h
new file mode 100644
index 0000000..ccc4f11
--- /dev/null
+++ b/kaldi_io/src/kaldi/hmm/transition-model.h
@@ -0,0 +1,345 @@
+// hmm/transition-model.h
+
+// Copyright 2009-2012  Microsoft Corporation
+//                      Johns Hopkins University (author: Guoguo Chen)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_HMM_TRANSITION_MODEL_H_
+#define KALDI_HMM_TRANSITION_MODEL_H_
+
+#include "base/kaldi-common.h"
+#include "tree/context-dep.h"
+#include "util/const-integer-set.h"
+#include "fst/fst-decl.h" // forward declarations.
+#include "hmm/hmm-topology.h"
+#include "itf/options-itf.h"
+
+namespace kaldi {
+
+/// \addtogroup hmm_group
+/// @{
+
+// The class TransitionModel is a repository for the transition probabilities.
+// It also handles certain integer mappings.
+// The basic model is as follows.  Each phone has a HMM topology defined in
+// hmm-topology.h.  Each HMM-state of each of these phones has a number of
+// transitions (and final-probs) out of it.  Each HMM-state defined in the
+// HmmTopology class has an associated "pdf_class".  This gets replaced with
+// an actual pdf-id via the tree.  The transition model associates the
+// transition probs with the (phone, HMM-state, pdf-id).  We associate with
+// each such triple a transition-state.  Each
+// transition-state has a number of associated probabilities to estimate;
+// this depends on the number of transitions/final-probs in the topology for
+// that (phone, HMM-state).  Each probability has an associated transition-index.
+// We associate with each (transition-state, transition-index) a unique transition-id.
+// Each individual probability estimated by the transition-model is asociated with a
+// transition-id.
+//
+// List of the various types of quantity referred to here and what they mean:
+//           phone:  a phone index (1, 2, 3 ...)
+//       HMM-state:  a number (0, 1, 2...) that indexes TopologyEntry (see hmm-topology.h)
+//          pdf-id:  a number output by the Compute function of ContextDependency (it
+//                   indexes pdf's).  Zero-based.
+// transition-state:  the states for which we estimate transition probabilities for transitions
+//                    out of them.  In some topologies, will map one-to-one with pdf-ids.
+//                    One-based, since it appears on FSTs.
+// transition-index:  identifier of a transition (or final-prob) in the HMM.  Indexes the
+//                    "transitions" vector in HmmTopology::HmmState.  [if it is out of range,
+//                    equal to transitions.size(), it refers to the final-prob.]
+//                    Zero-based.
+//   transition-id:   identifier of a unique parameter of the TransitionModel.
+//                    Associated with a (transition-state, transition-index) pair.
+//                    One-based, since it appears on FSTs.
+//
+// List of the possible mappings TransitionModel can do:
+//             (phone, HMM-state, pdf-id) -> transition-state
+//   (transition-state, transition-index) -> transition-id
+//  Reverse mappings:
+//                        transition-id -> transition-state
+//                        transition-id -> transition-index
+//                     transition-state -> phone
+//                     transition-state -> HMM-state
+//                     transition-state -> pdf-id
+//
+// The main things the TransitionModel object can do are:
+//    Get initialized (need ContextDependency and HmmTopology objects).
+//    Read/write.
+//    Update [given a vector of counts indexed by transition-id].
+//    Do the various integer mappings mentioned above.
+//    Get the probability (or log-probability) associated with a particular transition-id.
+
+
+// Note: this was previously called TransitionUpdateConfig.
+struct MleTransitionUpdateConfig {
+  BaseFloat floor;
+  BaseFloat mincount;
+  bool share_for_pdfs; // If true, share all transition parameters that have the same pdf.
+  MleTransitionUpdateConfig(BaseFloat floor = 0.01,
+                            BaseFloat mincount = 5.0,
+                            bool share_for_pdfs = false):
+      floor(floor), mincount(mincount), share_for_pdfs(share_for_pdfs) {}
+  
+  void Register (OptionsItf *po) {
+    po->Register("transition-floor", &floor,
+                 "Floor for transition probabilities");
+    po->Register("transition-min-count", &mincount,
+                 "Minimum count required to update transitions from a state");
+    po->Register("share-for-pdfs", &share_for_pdfs,
+                 "If true, share all transition parameters where the states "
+                 "have the same pdf.");
+  }
+};
+
+struct MapTransitionUpdateConfig {
+  BaseFloat tau;
+  bool share_for_pdfs; // If true, share all transition parameters that have the same pdf.
+  MapTransitionUpdateConfig(): tau(5.0), share_for_pdfs(false) { }
+
+  void Register (OptionsItf *po) {
+    po->Register("transition-tau", &tau, "Tau value for MAP estimation of transition "
+                 "probabilities.");
+    po->Register("share-for-pdfs", &share_for_pdfs,
+                 "If true, share all transition parameters where the states "
+                 "have the same pdf.");
+  }
+};
+
+class TransitionModel {
+
+ public:
+  /// Initialize the object [e.g. at the start of training].
+  /// The class keeps a copy of the HmmTopology object, but not
+  /// the ContextDependency object.
+  TransitionModel(const ContextDependency &ctx_dep,
+                  const HmmTopology &hmm_topo);
+
+
+  /// Constructor that takes no arguments: typically used prior to calling Read.
+  TransitionModel() { }
+
+  void Read(std::istream &is, bool binary);  // note, no symbol table: topo object always read/written w/o symbols.
+  void Write(std::ostream &os, bool binary) const;
+
+
+  /// return reference to HMM-topology object.
+  const HmmTopology &GetTopo() const { return topo_; }
+
+  /// \name Integer mapping functions
+  /// @{
+
+  int32 TripleToTransitionState(int32 phone, int32 hmm_state, int32 pdf) const;
+  int32 PairToTransitionId(int32 trans_state, int32 trans_index) const;
+  int32 TransitionIdToTransitionState(int32 trans_id) const;
+  int32 TransitionIdToTransitionIndex(int32 trans_id) const;
+  int32 TransitionStateToPhone(int32 trans_state) const;
+  int32 TransitionStateToHmmState(int32 trans_state) const;
+  int32 TransitionStateToPdf(int32 trans_state) const;
+  int32 SelfLoopOf(int32 trans_state) const;  // returns the self-loop transition-id, or zero if
+  // this state doesn't have a self-loop.
+
+  inline int32 TransitionIdToPdf(int32 trans_id) const;
+  int32 TransitionIdToPhone(int32 trans_id) const;
+  int32 TransitionIdToPdfClass(int32 trans_id) const;
+  int32 TransitionIdToHmmState(int32 trans_id) const;
+
+  /// @}
+
+  bool IsFinal(int32 trans_id) const;  // returns true if this trans_id goes to the final state
+  // (which is bound to be nonemitting).
+  bool IsSelfLoop(int32 trans_id) const;  // return true if this trans_id corresponds to a self-loop.
+
+  /// Returns the total number of transition-ids (note, these are one-based).
+  inline int32 NumTransitionIds() const { return id2state_.size()-1; }
+
+  /// Returns the number of transition-indices for a particular transition-state.
+  /// Note: "Indices" is the plural of "index".   Index is not the same as "id",
+  /// here.  A transition-index is a zero-based offset into the transitions
+  /// out of a particular transition state.
+  int32 NumTransitionIndices(int32 trans_state) const;
+
+  /// Returns the total number of transition-states (note, these are one-based).
+  int32 NumTransitionStates() const { return triples_.size(); }
+
+  // NumPdfs() actually returns the highest-numbered pdf we ever saw, plus one.
+  // In normal cases this should equal the number of pdfs in the system, but if you
+  // initialized this object with fewer than all the phones, and it happens that
+  // an unseen phone has the highest-numbered pdf, this might be different.
+  int32 NumPdfs() const { return num_pdfs_; }
+
+  // This loops over the triples and finds the highest phone index present. If
+  // the FST symbol table for the phones is created in the expected way, i.e.:
+  // starting from 1 (<eps> is 0) and numbered contiguously till the last phone,
+  // this will be the total number of phones.
+  int32 NumPhones() const;
+
+  /// Returns a sorted, unique list of phones.
+  const std::vector<int32> &GetPhones() const { return topo_.GetPhones(); }
+
+  // Transition-parameter-getting functions:
+  BaseFloat GetTransitionProb(int32 trans_id) const;
+  BaseFloat GetTransitionLogProb(int32 trans_id) const;
+
+  // The following functions are more specialized functions for getting
+  // transition probabilities, that are provided for convenience.
+
+  /// Returns the log-probability of a particular non-self-loop transition
+  /// after subtracting the probability mass of the self-loop and renormalizing;
+  /// will crash if called on a self-loop.  Specifically:
+  /// for non-self-loops it returns the log of that prob divided by (1 minus
+  /// self-loop-prob-for-that-state).
+  BaseFloat GetTransitionLogProbIgnoringSelfLoops(int32 trans_id) const;
+
+  /// Returns the log-prob of the non-self-loop probability
+  /// mass for this transition state. (you can get the self-loop prob, if a self-loop
+  /// exists, by calling GetTransitionLogProb(SelfLoopOf(trans_state)).
+  BaseFloat GetNonSelfLoopLogProb(int32 trans_state) const;
+
+  /// Does Maximum Likelihood estimation.  The stats are counts/weights, indexed
+  /// by transition-id.  This was previously called Update().
+  void MleUpdate(const Vector<double> &stats, 
+                 const MleTransitionUpdateConfig &cfg,
+                 BaseFloat *objf_impr_out,
+                 BaseFloat *count_out);
+
+  /// Does Maximum A Posteriori (MAP) estimation.  The stats are counts/weights,
+  /// indexed by transition-id.
+  void MapUpdate(const Vector<double> &stats, 
+                 const MapTransitionUpdateConfig &cfg,
+                 BaseFloat *objf_impr_out,
+                 BaseFloat *count_out);
+  
+  /// Print will print the transition model in a human-readable way, for purposes of human
+  /// inspection.  The "occs" are optional (they are indexed by pdf-id).
+  void Print(std::ostream &os,
+             const std::vector<std::string> &phone_names,
+             const Vector<double> *occs = NULL);
+
+
+  void InitStats(Vector<double> *stats) const { stats->Resize(NumTransitionIds()+1); }
+
+  void Accumulate(BaseFloat prob, int32 trans_id, Vector<double> *stats) const {
+    KALDI_ASSERT(trans_id <= NumTransitionIds());
+    (*stats)(trans_id) += prob;
+    // This is trivial and doesn't require class members, but leaves us more open
+    // to design changes than doing it manually.
+  }
+
+  /// returns true if all the integer class members are identical (but does not
+  /// compare the transition probabilities.
+  bool Compatible(const TransitionModel &other) const;
+  
+ private:
+  void MleUpdateShared(const Vector<double> &stats,
+                       const MleTransitionUpdateConfig &cfg,
+                       BaseFloat *objf_impr_out, BaseFloat *count_out);
+  void MapUpdateShared(const Vector<double> &stats,
+                       const MapTransitionUpdateConfig &cfg,
+                       BaseFloat *objf_impr_out, BaseFloat *count_out);
+  void ComputeTriples(const ContextDependency &ctx_dep);  // called from constructor.  initializes triples_.
+  void ComputeDerived();  // called from constructor and Read function: computes state2id_ and id2state_.
+  void ComputeDerivedOfProbs();  // computes quantities derived from log-probs (currently just
+  // non_self_loop_log_probs_; called whenever log-probs change.
+  void InitializeProbs();  // called from constructor.
+  void Check() const;
+
+  struct Triple {
+    int32 phone;
+    int32 hmm_state;
+    int32 pdf;
+    Triple() { }
+    Triple(int32 phone, int32 hmm_state, int32 pdf):
+        phone(phone), hmm_state(hmm_state), pdf(pdf) { }
+    bool operator < (const Triple &other) const {
+      if (phone < other.phone) return true;
+      else if (phone > other.phone) return false;
+      else if (hmm_state < other.hmm_state) return true;
+      else if (hmm_state > other.hmm_state) return false;
+      else return pdf < other.pdf;
+    }
+    bool operator == (const Triple &other) const {
+      return (phone == other.phone && hmm_state == other.hmm_state
+              && pdf == other.pdf);
+    }
+  };
+
+  HmmTopology topo_;
+
+  /// Triples indexed by transition state minus one;
+  /// the triples are in sorted order which allows us to do the reverse mapping from
+  /// triple to transition state
+  std::vector<Triple> triples_;
+  
+  /// Gives the first transition_id of each transition-state; indexed by
+  /// the transition-state.  Array indexed 1..num-transition-states+1 (the last one
+  /// is needed so we can know the num-transitions of the last transition-state.
+  std::vector<int32> state2id_;
+
+  /// For each transition-id, the corresponding transition
+  /// state (indexed by transition-id).
+  std::vector<int32> id2state_;
+
+  /// For each transition-id, the corresponding log-prob.  Indexed by transition-id.
+  Vector<BaseFloat> log_probs_;
+
+  /// For each transition-state, the log of (1 - self-loop-prob).  Indexed by
+  /// transition-state.
+  Vector<BaseFloat> non_self_loop_log_probs_;
+
+  /// This is actually one plus the highest-numbered pdf we ever got back from the
+  /// tree (but the tree numbers pdfs contiguously from zero so this is the number
+  /// of pdfs).
+  int32 num_pdfs_;
+
+
+  DISALLOW_COPY_AND_ASSIGN(TransitionModel);
+
+};
+
+inline int32 TransitionModel::TransitionIdToPdf(int32 trans_id) const {
+  // If a lot of time is spent here we may create an extra array
+  // to handle this.
+  KALDI_ASSERT(static_cast<size_t>(trans_id) < id2state_.size() &&
+               "Likely graph/model mismatch (graph built from wrong model?)");
+  int32 trans_state = id2state_[trans_id];
+  return triples_[trans_state-1].pdf;
+}
+
+/// Works out which pdfs might correspond to the given phones.  Will return true
+/// if these pdfs correspond *just* to these phones, false if these pdfs are also
+/// used by other phones.
+/// @param trans_model [in] Transition-model used to work out this information
+/// @param phones [in] A sorted, uniq vector that represents a set of phones
+/// @param pdfs [out] Will be set to a sorted, uniq list of pdf-ids that correspond
+///                   to one of this set of phones.
+/// @return  Returns true if all of the pdfs output to "pdfs" correspond to phones from
+///          just this set (false if they may be shared with phones outside this set).
+bool GetPdfsForPhones(const TransitionModel &trans_model,
+                      const std::vector<int32> &phones,
+                      std::vector<int32> *pdfs);
+
+/// Works out which phones might correspond to the given pdfs. Similar to the
+/// above GetPdfsForPhones(, ,)
+bool GetPhonesForPdfs(const TransitionModel &trans_model,
+                      const std::vector<int32> &pdfs,
+                      std::vector<int32> *phones);
+/// @}
+
+
+} // end namespace kaldi
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/hmm/tree-accu.h b/kaldi_io/src/kaldi/hmm/tree-accu.h
new file mode 100644
index 0000000..d571762
--- /dev/null
+++ b/kaldi_io/src/kaldi/hmm/tree-accu.h
@@ -0,0 +1,69 @@
+// hmm/tree-accu.h
+
+// Copyright 2009-2011 Microsoft Corporation
+//                2013 Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_HMM_TREE_ACCU_H_
+#define KALDI_HMM_TREE_ACCU_H_
+
+#include <cctype>  // For isspace.
+#include <limits>
+#include "base/kaldi-common.h"
+#include "hmm/transition-model.h"
+#include "tree/clusterable-classes.h"
+#include "tree/build-tree-questions.h" // needed for this typedef:
+// typedef std::vector<std::pair<EventVector, Clusterable*> > BuildTreeStatsType;
+
+namespace kaldi {
+
+/// \ingroup tree_group_top
+/// @{
+
+
+/// Accumulates the stats needed for training context-dependency trees (in the
+/// "normal" way).  It adds to 'stats' the stats obtained from this file.  Any
+/// new GaussClusterable* pointers in "stats" will be allocated with "new".
+
+void AccumulateTreeStats(const TransitionModel &trans_model,
+                         BaseFloat var_floor,
+                         int N,  // context window size.
+                         int P,  // central position.
+                         const std::vector<int32> &ci_phones,  // sorted
+                         const std::vector<int32> &alignment,
+                         const Matrix<BaseFloat> &features,
+                         const std::vector<int32> *phone_map, // or NULL
+                         std::map<EventType, GaussClusterable*> *stats);
+
+
+
+/*** Read a mapping from one phone set to another.  The phone map file has lines
+ of the form <old-phone> <new-phone>, where both entries are integers, usually
+ nonzero (but this is not enforced).  This program will crash if the input is
+ invalid, e.g. there are multiple inconsistent entries for the same old phone.
+ The output vector "phone_map" will be indexed by old-phone and will contain
+ the corresponding new-phone, or -1 for any entry that was not defined. */
+ 
+void ReadPhoneMap(std::string phone_map_rxfilename,
+                  std::vector<int32> *phone_map);
+
+
+
+/// @}
+
+}  // end namespace kaldi.
+
+#endif