Merge pull request #2 from yimmon/master

remove needless files
author: Ted Yin <ted.sybil@gmail.com> 2015-08-14 17:42:26 +0800
committer: Ted Yin <ted.sybil@gmail.com> 2015-08-14 17:42:26 +0800
commit: c3cffb58b9921d78753336421b52b9ffdaa5515c (patch)
tree: bfea20e97c200cf734021e3756d749c892e658a4 /kaldi_io/src/kaldi/tree
parent: 10cce5f6a5c9e2f8e00d5a2a4d87c9cb7c26bf4c (diff)
parent: dfdd17afc2e984ec6c32ea01290f5c76309a456a (diff)
8 files changed, 0 insertions, 1771 deletions
diff --git a/kaldi_io/src/kaldi/tree/build-tree-questions.h b/kaldi_io/src/kaldi/tree/build-tree-questions.h
deleted file mode 100644
index a6bcfdd..0000000
--- a/kaldi_io/src/kaldi/tree/build-tree-questions.h
+++ /dev/null
@@ -1,133 +0,0 @@
-// tree/build-tree-questions.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_TREE_BUILD_TREE_QUESTIONS_H_
-#define KALDI_TREE_BUILD_TREE_QUESTIONS_H_
-
-#include "util/stl-utils.h"
-#include "tree/context-dep.h"
-
-namespace kaldi {
-
-
-/// \addtogroup tree_group
-/// @{
-/// Typedef for statistics to build trees.
-typedef std::vector<std::pair<EventType, Clusterable*> > BuildTreeStatsType;
-
-/// Typedef used when we get "all keys" from a set of stats-- used in specifying
-/// which kinds of questions to ask.
-typedef enum { kAllKeysInsistIdentical, kAllKeysIntersection, kAllKeysUnion } AllKeysType;
-
-/// @}
-
-/// \defgroup tree_group_questions Question sets for decision-tree clustering
-///  See \ref tree_internals (and specifically \ref treei_func_questions) for context.
-/// \ingroup tree_group
-/// @{
-
-/// QuestionsForKey is a class used to define the questions for a key,
-/// and also options that allow us to refine the question during tree-building
-/// (i.e. make a question specific to the location in the tree).
-/// The Questions class handles aggregating these options for a set
-/// of different keys.
-struct QuestionsForKey {  // Configuration class associated with a particular key
-  // (of type EventKeyType).  It also contains the questions themselves.
-  std::vector<std::vector<EventValueType> > initial_questions;
-  RefineClustersOptions refine_opts;  // if refine_opts.max_iter == 0,
-  // we just pick from the initial questions.
-  
-  QuestionsForKey(int32 num_iters = 5): refine_opts(num_iters, 2) {
-    // refine_cfg with 5 iters and top-n = 2 (this is no restriction because
-    // RefineClusters called with 2 clusters; would get set to that anyway as
-    // it's the only possible value for 2 clusters).  User has to add questions.
-    // This config won't work as-is, as it has no questions.
-  }
-
-  void Check() const {
-    for (size_t i = 0;i < initial_questions.size();i++) KALDI_ASSERT(IsSorted(initial_questions[i]));
-  }
-
-  void Write(std::ostream &os, bool binary) const;
-  void Read(std::istream &is, bool binary);
-
-  // copy and assign allowed.
-};
-
-/// This class defines, for each EventKeyType, a set of initial questions that
-/// it tries and also a number of iterations for which to refine the questions to increase
-/// likelihood. It is perhaps a bit more than an options class, as it contains the
-/// actual questions.
-class Questions {  // careful, this is a class.
- public:
-  const QuestionsForKey &GetQuestionsOf(EventKeyType key) const {
-    std::map<EventKeyType, size_t>::const_iterator iter;
-    if ( (iter = key_idx_.find(key)) == key_idx_.end()) {
-      KALDI_ERR << "Questions: no options for key "<< key;
-    }
-    size_t idx = iter->second;
-    KALDI_ASSERT(idx < key_options_.size());
-    key_options_[idx]->Check();
-    return *(key_options_[idx]);
-  }
-  void SetQuestionsOf(EventKeyType key, const QuestionsForKey &options_of_key) {
-    options_of_key.Check();
-    if (key_idx_.count(key) == 0) {
-      key_idx_[key] = key_options_.size();
-      key_options_.push_back(new QuestionsForKey());
-      *(key_options_.back()) = options_of_key;
-    } else {
-      size_t idx = key_idx_[key];
-      KALDI_ASSERT(idx < key_options_.size());
-      *(key_options_[idx]) = options_of_key;
-    }
-  }
-  void GetKeysWithQuestions(std::vector<EventKeyType> *keys_out) const {
-    KALDI_ASSERT(keys_out != NULL);
-    CopyMapKeysToVector(key_idx_, keys_out);
-  }
-  const bool HasQuestionsForKey(EventKeyType key) const { return (key_idx_.count(key) != 0); }
-  ~Questions() { kaldi::DeletePointers(&key_options_); }
-
-
-  /// Initializer with arguments.  After using this you would have to set up the config for each key you
-  /// are going to use, or use InitRand().
-  Questions() { }
-
-
-  /// InitRand attempts to generate "reasonable" random questions.  Only
-  /// of use for debugging.  This initializer creates a config that is
-  /// ready to use.
-  /// e.g. num_iters_refine = 0 means just use stated questions (if >1, will use
-  /// different questions at each split of the tree).
-  void InitRand(const BuildTreeStatsType &stats, int32 num_quest, int32 num_iters_refine, AllKeysType all_keys_type);
-
-  void Write(std::ostream &os, bool binary) const;
-  void Read(std::istream &is, bool binary);
- private:
-  std::vector<QuestionsForKey*> key_options_;
-  std::map<EventKeyType, size_t> key_idx_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(Questions);
-};
-
-/// @}
-
-}// end namespace kaldi
-
-#endif // KALDI_TREE_BUILD_TREE_QUESTIONS_H_
diff --git a/kaldi_io/src/kaldi/tree/build-tree-utils.h b/kaldi_io/src/kaldi/tree/build-tree-utils.h
deleted file mode 100644
index 464fc6b..0000000
--- a/kaldi_io/src/kaldi/tree/build-tree-utils.h
+++ /dev/null
@@ -1,324 +0,0 @@
-// tree/build-tree-utils.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_TREE_BUILD_TREE_UTILS_H_
-#define KALDI_TREE_BUILD_TREE_UTILS_H_
-
-#include "tree/build-tree-questions.h"
-
-// build-tree-questions.h needed for this typedef:
-// typedef std::vector<std::pair<EventType, Clusterable*> > BuildTreeStatsType;
-// and for other #includes.
-
-namespace kaldi {
-
-
-///   \defgroup tree_group_lower Low-level functions for manipulating statistics and event-maps
-///    See \ref tree_internals and specifically \ref treei_func for context.
-///   \ingroup tree_group
-///
-///   @{
-
-
-
-/// This frees the Clusterable* pointers in "stats", where non-NULL, and sets them to NULL.
-/// Does not delete the pointer "stats" itself.
-void DeleteBuildTreeStats(BuildTreeStatsType *stats);
-
-/// Writes BuildTreeStats object.  This works even if pointers are NULL.
-void WriteBuildTreeStats(std::ostream &os, bool binary,
-                         const BuildTreeStatsType &stats);
-
-/// Reads BuildTreeStats object.  The "example" argument must be of the same
-/// type as the stats on disk, and is needed for access to the correct "Read"
-/// function.  It was organized this way for easier extensibility (so adding new
-/// Clusterable derived classes isn't painful)
-void ReadBuildTreeStats(std::istream &is, bool binary,
-                        const Clusterable &example, BuildTreeStatsType *stats);
-
-/// Convenience function e.g. to work out possible values of the phones from just the stats.
-/// Returns true if key was always defined inside the stats.
-/// May be used with and == NULL to find out of key was always defined.
-bool PossibleValues(EventKeyType key, const BuildTreeStatsType &stats,
-                    std::vector<EventValueType> *ans);
-
-
-/// Splits stats according to the EventMap, indexing them at output by the
-/// leaf type.   A utility function.  NOTE-- pointers in stats_out point to
-/// the same memory location as those in stats.  No copying of Clusterable*
-/// objects happens.  Will add to stats in stats_out if non-empty at input.
-/// This function may increase the size of vector stats_out as necessary
-/// to accommodate stats, but will never decrease the size.
-void SplitStatsByMap(const BuildTreeStatsType &stats_in, const EventMap &e,
-                     std::vector<BuildTreeStatsType> *stats_out);
-
-/// SplitStatsByKey splits stats up according to the value of a particular key,
-/// which must be always defined and nonnegative.  Like MapStats.  Pointers to
-/// Clusterable* in stats_out are not newly allocated-- they are the same as the
-/// ones in stats_in.  Generally they will still be owned at stats_in (user can
-/// decide where to allocate ownership).
-void SplitStatsByKey(const BuildTreeStatsType &stats_in, EventKeyType key,
-                     std::vector<BuildTreeStatsType> *stats_out);
-
-
-/// Converts stats from a given context-window (N) and central-position (P) to a
-/// different N and P, by possibly reducing context.  This function does a job
-/// that's quite specific to the "normal" stats format we use.  See \ref
-/// tree_window for background.  This function may delete some keys and change
-/// others, depending on the N and P values.  It expects that at input, all keys
-/// will either be -1 or lie between 0 and oldN-1.  At output, keys will be
-/// either -1 or between 0 and newN-1.
-/// Returns false if we could not convert the stats (e.g. because newN is larger
-/// than oldN).
-bool ConvertStats(int32 oldN, int32 oldP, int32 newN, int32 newP,
-                  BuildTreeStatsType *stats);
-
-
-/// FilterStatsByKey filters the stats according the value of a specified key.
-/// If include_if_present == true, it only outputs the stats whose key is in
-/// "values"; otherwise it only outputs the stats whose key is not in "values".
-/// At input, "values" must be sorted and unique, and all stats in "stats_in"
-/// must have "key" defined.  At output, pointers to Clusterable* in stats_out
-/// are not newly allocated-- they are the same as the ones in stats_in.
-void FilterStatsByKey(const BuildTreeStatsType &stats_in,
-                      EventKeyType key,
-                      std::vector<EventValueType> &values,
-                      bool include_if_present,  // true-> retain only if in "values",
-                      // false-> retain only if not in "values".
-                      BuildTreeStatsType *stats_out);
-
-
-/// Sums stats, or returns NULL stats_in has no non-NULL stats.
-/// Stats are newly allocated, owned by caller.
-Clusterable *SumStats(const BuildTreeStatsType &stats_in);
-
-/// Sums the normalizer [typically, data-count] over the stats.
-BaseFloat SumNormalizer(const BuildTreeStatsType &stats_in);
-
-/// Sums the objective function over the stats.
-BaseFloat SumObjf(const BuildTreeStatsType &stats_in);
-
-
-/// Sum a vector of stats.  Leaves NULL as pointer if no stats available.
-/// The pointers in stats_out are owned by caller.  At output, there may be
-/// NULLs in the vector stats_out.
-void SumStatsVec(const std::vector<BuildTreeStatsType> &stats_in, std::vector<Clusterable*> *stats_out);
-
-/// Cluster the stats given the event map return the total objf given those clusters.
-BaseFloat ObjfGivenMap(const BuildTreeStatsType &stats_in, const EventMap &e);
-
-
-/// FindAllKeys puts in *keys the (sorted, unique) list of all key identities in the stats.
-/// If type == kAllKeysInsistIdentical, it will insist that this set of keys is the same for all the
-///   stats (else exception is thrown).
-/// if type == kAllKeysIntersection, it will return the smallest common set of keys present in
-///   the set of stats
-/// if type== kAllKeysUnion (currently probably not so useful since maps will return "undefined"
-///   if key is not present), it will return the union of all the keys present in the stats.
-void FindAllKeys(const BuildTreeStatsType &stats, AllKeysType keys_type,
-                 std::vector<EventKeyType> *keys);
-
-
-/// @}
-
-
-/**
- \defgroup tree_group_intermediate Intermediate-level functions used in building the tree
-    These functions are are used in top-level tree-building code (\ref tree_group_top); see
-     \ref tree_internals for documentation.
- \ingroup tree_group
- @{
-*/
-
-
-/// Returns a tree with just one node.  Used @ start of tree-building process.
-/// Not really used in current recipes.
-inline EventMap *TrivialTree(int32 *num_leaves) {
-  KALDI_ASSERT(*num_leaves == 0);  // in envisaged usage.
-  return new ConstantEventMap( (*num_leaves)++ );
-}
-
-/// DoTableSplit does a complete split on this key (e.g. might correspond to central phone
-/// (key = P-1), or HMM-state position (key == kPdfClass == -1).  Stats used to work out possible
-/// values of the event. "num_leaves" is used to allocate new leaves.   All stats must have
-/// this key defined, or this function will crash.
-EventMap *DoTableSplit(const EventMap &orig, EventKeyType key,
-                       const BuildTreeStatsType &stats, int32 *num_leaves);
-
-
-/// DoTableSplitMultiple does a complete split on all the keys, in order from keys[0],
-/// keys[1]
-/// and so on.  The stats are used to work out possible values corresponding to the key.
-/// "num_leaves" is used to allocate new leaves.   All stats must have
-/// the keys defined, or this function will crash.
-/// Returns a newly allocated event map.
-EventMap *DoTableSplitMultiple(const EventMap &orig,
-                               const std::vector<EventKeyType> &keys,
-                               const BuildTreeStatsType &stats,
-                               int32 *num_leaves);
-
-
-/// "ClusterEventMapGetMapping" clusters the leaves of the EventMap, with "thresh" a delta-likelihood
-/// threshold to control how many leaves we combine (might be the same as the delta-like
-/// threshold used in splitting.
-// The function returns the #leaves we combined.  The same leaf-ids of the leaves being clustered
-// will be used for the clustered leaves (but other than that there is no special rule which
-// leaf-ids should be used at output).
-// It outputs the mapping for leaves, in "mapping", which may be empty at the start
-// but may also contain mappings for other parts of the tree, which must contain
-// disjoint leaves from this part.  This is so that Cluster can
-// be called multiple times for sub-parts of the tree (with disjoint sets of leaves),
-// e.g. if we want to avoid sharing across phones.  Afterwards you can use Copy function
-// of EventMap to apply the mapping, i.e. call e_in.Copy(mapping) to get the new map.
-// Note that the application of Cluster creates gaps in the leaves.  You should then
-// call RenumberEventMap(e_in.Copy(mapping), num_leaves).
-// *If you only want to cluster a subset of the leaves (e.g. just non-silence, or just
-// a particular phone, do this by providing a set of "stats" that correspond to just
-// this subset of leaves*.  Leaves with no stats will not be clustered.
-// See build-tree.cc for an example of usage.
-int ClusterEventMapGetMapping(const EventMap &e_in, const BuildTreeStatsType &stats,
-                              BaseFloat thresh, std::vector<EventMap*> *mapping);
-
-/// This is as ClusterEventMapGetMapping but a more convenient interface
-/// that exposes less of the internals.  It uses a bottom-up clustering to
-/// combine the leaves, until the log-likelihood decrease from combinging two
-/// leaves exceeds the threshold.
-EventMap *ClusterEventMap(const EventMap &e_in, const BuildTreeStatsType &stats,
-                          BaseFloat thresh, int32 *num_removed);
-
-/// This is as ClusterEventMap, but first splits the stats on the keys specified
-/// in "keys" (e.g. typically keys = [ -1, P ]), and only clusters within the
-/// classes defined by that splitting.
-/// Note-- leaves will be non-consecutive at output, use RenumberEventMap.
-EventMap *ClusterEventMapRestrictedByKeys(const EventMap &e_in,
-                                          const BuildTreeStatsType &stats,
-                                          BaseFloat thresh,
-                                          const std::vector<EventKeyType> &keys,
-                                          int32 *num_removed);
-
-
-/// This version of ClusterEventMapRestricted restricts the clustering to only
-/// allow things that "e_restrict" maps to the same value to be clustered
-/// together.
-EventMap *ClusterEventMapRestrictedByMap(const EventMap &e_in,
-                                         const BuildTreeStatsType &stats,
-                                         BaseFloat thresh,
-                                         const EventMap &e_restrict,
-                                         int32 *num_removed);
-
-
-/// RenumberEventMap [intended to be used after calling ClusterEventMap] renumbers
-/// an EventMap so its leaves are consecutive.
-/// It puts the number of leaves in *num_leaves.  If later you need the mapping of
-/// the leaves, modify the function and add a new argument.
-EventMap *RenumberEventMap(const EventMap &e_in, int32 *num_leaves);
-
-/// This function remaps the event-map leaves using this mapping,
-/// indexed by the number at leaf.
-EventMap *MapEventMapLeaves(const EventMap &e_in,
-                            const std::vector<int32> &mapping);
-
-
-
-/// ShareEventMapLeaves performs a quite specific function that allows us to
-/// generate trees where, for a certain list of phones, and for all states in
-/// the phone, all the pdf's are shared.
-/// Each element of "values" contains a list of phones (may be just one phone),
-/// all states of which we want shared together).  Typically at input, "key" will
-/// equal P, the central-phone position, and "values" will contain just one
-/// list containing the silence phone.
-/// This function renumbers the event map leaves after doing the sharing, to
-/// make the event-map leaves contiguous.
-EventMap *ShareEventMapLeaves(const EventMap &e_in, EventKeyType key,
-                              std::vector<std::vector<EventValueType> > &values,
-                              int32 *num_leaves);
-
-
-
-/// Does a decision-tree split at the leaves of an EventMap.
-/// @param orig [in] The EventMap whose leaves we want to split. [may be either a trivial or a
-///           non-trivial one].
-/// @param stats [in] The statistics for splitting the tree; if you do not want a particular
-///          subset of leaves to be split, make sure the stats corresponding to those leaves
-///          are not present in "stats".
-/// @param qcfg [in] Configuration class that contains initial questions (e.g. sets of phones)
-///          for each key and says whether to refine these questions during tree building.
-/// @param thresh [in] A log-likelihood threshold (e.g. 300) that can be used to
-///           limit the number of leaves; you can use zero and set max_leaves instead.
-/// @param max_leaves [in] Will stop leaves being split after they reach this number.
-/// @param num_leaves [in,out] A pointer used to allocate leaves; always corresponds to the
-///             current number of leaves (is incremented when this is increased).
-/// @param objf_impr_out [out] If non-NULL, will be set to the objective improvement due to splitting
-///           (not normalized by the number of frames).
-/// @param smallest_split_change_out If non-NULL, will be set to the smallest objective-function
-///         improvement that we got from splitting any leaf; useful to provide a threshold
-///         for ClusterEventMap.
-/// @return The EventMap after splitting is returned; pointer is owned by caller.
-EventMap *SplitDecisionTree(const EventMap &orig,
-                            const BuildTreeStatsType &stats,
-                            Questions &qcfg,
-                            BaseFloat thresh,
-                            int32 max_leaves,  // max_leaves<=0 -> no maximum.
-                            int32 *num_leaves,
-                            BaseFloat *objf_impr_out,
-                            BaseFloat *smallest_split_change_out);
-
-/// CreateRandomQuestions will initialize a Questions randomly, in a reasonable
-/// way [for testing purposes, or when hand-designed questions are not available].
-/// e.g. num_quest = 5 might be a reasonable value if num_iters > 0, or num_quest = 20 otherwise.
-void CreateRandomQuestions(const BuildTreeStatsType &stats, int32 num_quest, Questions *cfg_out);
-
-
-/// FindBestSplitForKey is a function used in DoDecisionTreeSplit.
-/// It finds the best split for this key, given these stats.
-/// It will return 0 if the key was not always defined for the stats.
-BaseFloat FindBestSplitForKey(const BuildTreeStatsType &stats,
-                              const Questions &qcfg,
-                              EventKeyType key,
-                              std::vector<EventValueType> *yes_set);
-
-
-/// GetStubMap is used in tree-building functions to get the initial
-/// to-states map, before the decision-tree-building process.  It creates
-/// a simple map that splits on groups of phones.  For the set of phones in
-/// phone_sets[i] it creates either: if share_roots[i] == true, a single
-/// leaf node, or if share_roots[i] == false, separate root nodes for
-/// each HMM-position (it goes up to the highest position for any
-/// phone in the set, although it will warn if you share roots between
-/// phones with different numbers of states, which is a weird thing to
-/// do but should still work.  If any phone is present
-/// in "phone_sets" but "phone2num_pdf_classes" does not map it to a length,
-/// it is an error.  Note that the behaviour of the resulting map is
-/// undefined for phones not present in "phone_sets".
-/// At entry, this function should be called with (*num_leaves == 0).
-/// It will number the leaves starting from (*num_leaves).
-
-EventMap *GetStubMap(int32 P,
-                     const std::vector<std::vector<int32> > &phone_sets,
-                     const std::vector<int32> &phone2num_pdf_classes,
-                     const std::vector<bool> &share_roots,  // indexed by index into phone_sets.
-                     int32 *num_leaves);
-/// Note: GetStubMap with P = 0 can be used to get a standard monophone system.
-
-/// @}
-
-
-}// end namespace kaldi
-
-#endif
diff --git a/kaldi_io/src/kaldi/tree/build-tree.h b/kaldi_io/src/kaldi/tree/build-tree.h
deleted file mode 100644
index 37bb108..0000000
--- a/kaldi_io/src/kaldi/tree/build-tree.h
+++ /dev/null
@@ -1,250 +0,0 @@
-// tree/build-tree.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_TREE_BUILD_TREE_H_
-#define KALDI_TREE_BUILD_TREE_H_
-
-// The file build-tree.h contains outer-level routines used in tree-building
-// and related tasks, that are directly called by the command-line tools.
-
-#include "tree/build-tree-utils.h"
-#include "tree/context-dep.h"
-namespace kaldi {
-
-/// \defgroup tree_group_top Top-level tree-building functions
-/// See \ref tree_internals for context.
-/// \ingroup tree_group
-/// @{
-
-// Note, in tree_group_top we also include AccumulateTreeStats, in
-// ../hmm/tree-accu.h (it has some extra dependencies so we didn't
-// want to include it here).
-
-/**
- *  BuildTree is the normal way to build a set of decision trees.
- *  The sets "phone_sets" dictate how we set up the roots of the decision trees.
- *  each set of phones phone_sets[i] has shared decision-tree roots, and if
- *  the corresponding variable share_roots[i] is true, the root will be shared
- *  for the different HMM-positions in the phone.  All phones in "phone_sets"
- *  should be in the stats (use FixUnseenPhones to ensure this).
- *  if for any i, do_split[i] is false, we will not do any tree splitting for
- *  phones in that set.
- * @param qopts [in] Questions options class, contains questions for each key
- *                   (e.g. each phone position)
- * @param phone_sets [in] Each element of phone_sets is a set of phones whose
- *                 roots are shared together (prior to decision-tree splitting).
- * @param phone2num_pdf_classes [in] A map from phones to the number of
- *                 \ref pdf_class "pdf-classes"
- *                 in the phone (this info is derived from the HmmTopology object)
- * @param share_roots [in] A vector the same size as phone_sets; says for each
- *                phone set whether the root should be shared among all the
- *                pdf-classes or not.
- * @param do_split [in] A vector the same size as phone_sets; says for each
- *                phone set whether decision-tree splitting should be done
- *                 (generally true for non-silence phones).
- * @param stats [in] The statistics used in tree-building.
- * @param thresh [in] Threshold used in decision-tree splitting (e.g. 1000),
- *                   or you may use 0 in which case max_leaves becomes the
- *                    constraint.
- * @param max_leaves [in] Maximum number of leaves it will create; set this
- *                  to a large number if you want to just specify  "thresh".
- * @param cluster_thresh [in] Threshold for clustering leaves after decision-tree
- *                  splitting (only within each phone-set); leaves will be combined
- *                  if log-likelihood change is less than this.  A value about equal
- *                  to "thresh" is suitable
- *                  if thresh != 0; otherwise, zero will mean no clustering is done,
- *                  or a negative value (e.g. -1) sets it to the smallest likelihood
- *                  change seen during the splitting algorithm; this typically causes
- *                  about a 20% reduction in the number of leaves.
- 
- * @param P [in] The central position of the phone context window, e.g. 1 for a
- *                triphone system.
- * @return  Returns a pointer to an EventMap object that is the tree.
-
-*/
-
-EventMap *BuildTree(Questions &qopts,
-                    const std::vector<std::vector<int32> > &phone_sets,
-                    const std::vector<int32> &phone2num_pdf_classes,
-                    const std::vector<bool> &share_roots,
-                    const std::vector<bool> &do_split,
-                    const BuildTreeStatsType &stats,
-                    BaseFloat thresh,
-                    int32 max_leaves,
-                    BaseFloat cluster_thresh,  // typically == thresh.  If negative, use smallest split.
-                    int32 P);
-
-
-/**
- *
- *  BuildTreeTwoLevel builds a two-level tree, useful for example in building tied mixture
- *  systems with multiple codebooks.  It first builds a small tree by splitting to
- *  "max_leaves_first".  It then splits at the leaves of "max_leaves_first" (think of this
- *  as creating multiple little trees at the leaves of the first tree), until the total
- *  number of leaves reaches "max_leaves_second".  It then outputs the second tree, along
- *  with a mapping from the leaf-ids of the second tree to the leaf-ids of the first tree.
- *  Note that the interface is similar to BuildTree, and in fact it calls BuildTree
- *  internally.
- *
- *  The sets "phone_sets" dictate how we set up the roots of the decision trees.
- *  each set of phones phone_sets[i] has shared decision-tree roots, and if
- *  the corresponding variable share_roots[i] is true, the root will be shared
- *  for the different HMM-positions in the phone.  All phones in "phone_sets"
- *  should be in the stats (use FixUnseenPhones to ensure this).
- *  if for any i, do_split[i] is false, we will not do any tree splitting for
- *  phones in that set.
- *
- * @param qopts [in] Questions options class, contains questions for each key
- *                   (e.g. each phone position)
- * @param phone_sets [in] Each element of phone_sets is a set of phones whose
- *                 roots are shared together (prior to decision-tree splitting).
- * @param phone2num_pdf_classes [in] A map from phones to the number of
- *                 \ref pdf_class "pdf-classes"
- *                 in the phone (this info is derived from the HmmTopology object)
- * @param share_roots [in] A vector the same size as phone_sets; says for each
- *                phone set whether the root should be shared among all the
- *                pdf-classes or not.
- * @param do_split [in] A vector the same size as phone_sets; says for each
- *                phone set whether decision-tree splitting should be done
- *                 (generally true for non-silence phones).
- * @param stats [in] The statistics used in tree-building.
- * @param max_leaves_first [in] Maximum number of leaves it will create in first
- *                  level of decision tree. 
- * @param max_leaves_second [in] Maximum number of leaves it will create in second
- *                  level of decision tree.  Must be > max_leaves_first.
- * @param cluster_leaves [in] Boolean value; if true, we post-cluster the leaves produced
- *                  in the second level of decision-tree split; if false, we don't.
- *                  The threshold for post-clustering is the log-like change of the last
- *                  decision-tree split; this typically causes about a 20% reduction in
- *                  the number of leaves.
- * @param P [in]   The central position of the phone context window, e.g. 1 for a
- *                 triphone system.
- * @param leaf_map [out]  Will be set to be a mapping from the leaves of the
- *                 "big" tree to the leaves of the "little" tree, which you can
- *                 view as cluster centers.
- * @return  Returns a pointer to an EventMap object that is the (big) tree.
-
-*/
-
-EventMap *BuildTreeTwoLevel(Questions &qopts,
-                            const std::vector<std::vector<int32> > &phone_sets,
-                            const std::vector<int32> &phone2num_pdf_classes,
-                            const std::vector<bool> &share_roots,
-                            const std::vector<bool> &do_split,
-                            const BuildTreeStatsType &stats,
-                            int32 max_leaves_first,
-                            int32 max_leaves_second,
-                            bool cluster_leaves,
-                            int32 P,
-                            std::vector<int32> *leaf_map);
-
-
-/// GenRandStats generates random statistics of the form used by BuildTree.
-/// It tries to do so in such a way that they mimic "real" stats.  The event keys
-/// and their corresponding values are:
-/// - key == -1 == kPdfClass -> pdf-class, generally corresponds to
-///       zero-based position in HMM (0, 1, 2 .. hmm_lengths[phone]-1)
-/// - key == 0 -> phone-id of left-most context phone.
-/// - key == 1 -> phone-id of one-from-left-most context phone.
-/// - key == P-1 -> phone-id of central phone.
-/// - key == N-1 -> phone-id of right-most context phone.
-/// GenRandStats is useful only for testing but it serves to document the format of
-/// stats used by BuildTreeDefault.
-/// if is_ctx_dep[phone] is set to false, GenRandStats will not define the keys for
-/// other than the P-1'th phone.
-
-/// @param dim [in] dimension of features.
-/// @param num_stats [in] approximate number of separate phones-in-context wanted.
-/// @param N [in] context-size (typically 3)
-/// @param P [in] central-phone position in zero-based numbering (typically 1)
-/// @param phone_ids [in] integer ids of phones
-/// @param hmm_lengths [in] lengths of hmm for phone, indexed by phone.
-/// @param is_ctx_dep [in] boolean array indexed by phone, saying whether each phone
-///     is context dependent.
-/// @param ensure_all_phones_covered [in] Boolean argument: if true, GenRandStats
-///     ensures that every phone is seen at least once in the central position (P).
-/// @param stats_out [out] The statistics that this routine outputs.
-
-void GenRandStats(int32 dim, int32 num_stats, int32 N, int32 P,
-                  const std::vector<int32> &phone_ids,
-                  const std::vector<int32> &hmm_lengths,
-                  const std::vector<bool> &is_ctx_dep,
-                  bool ensure_all_phones_covered,
-                  BuildTreeStatsType *stats_out);
-
-
-/// included here because it's used in some tree-building
-/// calling code.  Reads an OpenFst symbl table,
-/// discards the symbols and outputs the integers
-void ReadSymbolTableAsIntegers(std::string filename,
-                               bool include_eps,
-                               std::vector<int32> *syms);
-
-
-
-/**
- *  Outputs sets of phones that are reasonable for questions
- *  to ask in the tree-building algorithm.  These are obtained by tree
- *  clustering of the phones; for each node in the tree, all the leaves
- *  accessible from that node form one of the sets of phones.
- *    @param stats [in] The statistics as used for normal tree-building.
- *    @param phone_sets_in [in] All the phones, pre-partitioned into sets.
- *       The output sets will be various unions of these sets.  These sets
- *       will normally correspond to "real phones", in cases where the phones
- *       have stress and position markings.
- *    @param all_pdf_classes_in [in] All the \ref pdf_class "pdf-classes"
- *      that we consider for clustering.  In the normal case this is the singleton
- *       set {1}, which means that we only consider the central hmm-position
- *       of the standard 3-state HMM, for clustering purposes.
- *    @param P [in] The central position in the phone context window; normally
- *       1 for triphone system.s
- *    @param questions_out [out] The questions (sets of phones) are output to here.
- **/
-void AutomaticallyObtainQuestions(BuildTreeStatsType &stats,
-                                  const std::vector<std::vector<int32> > &phone_sets_in,
-                                  const std::vector<int32> &all_pdf_classes_in,
-                                  int32 P,
-                                  std::vector<std::vector<int32> > *questions_out);
-
-/// This function clusters the phones (or some initially specified sets of phones)
-/// into sets of phones, using a k-means algorithm.  Useful, for example, in building
-/// simple models for purposes of adaptation.
-
-void KMeansClusterPhones(BuildTreeStatsType &stats,
-                         const std::vector<std::vector<int32> > &phone_sets_in,
-                         const std::vector<int32> &all_pdf_classes_in,
-                         int32 P,
-                         int32 num_classes,
-                         std::vector<std::vector<int32> > *sets_out);
-
-/// Reads the roots file (throws on error).  Format is lines like:
-///  "shared split 1 2 3 4",
-///  "not-shared not-split 5",
-/// and so on.  The numbers are indexes of phones.
-void ReadRootsFile(std::istream &is,
-                   std::vector<std::vector<int32> > *phone_sets,
-                   std::vector<bool> *is_shared_root,
-                   std::vector<bool> *is_split_root);
-
-
-/// @}
-
-}// end namespace kaldi
-
-#endif
diff --git a/kaldi_io/src/kaldi/tree/cluster-utils.h b/kaldi_io/src/kaldi/tree/cluster-utils.h
deleted file mode 100644
index 55583a2..0000000
--- a/kaldi_io/src/kaldi/tree/cluster-utils.h
+++ /dev/null
@@ -1,291 +0,0 @@
-// tree/cluster-utils.h
-
-// Copyright 2012   Arnab Ghoshal
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_TREE_CLUSTER_UTILS_H_
-#define KALDI_TREE_CLUSTER_UTILS_H_
-
-#include <vector>
-#include "matrix/matrix-lib.h"
-#include "itf/clusterable-itf.h"
-
-namespace kaldi {
-
-/// \addtogroup clustering_group_simple
-/// @{
-
-/// Returns the total objective function after adding up all the
-/// statistics in the vector (pointers may be NULL).
-BaseFloat SumClusterableObjf(const std::vector<Clusterable*> &vec);
-
-/// Returns the total normalizer (usually count) of the cluster (pointers may be NULL).
-BaseFloat SumClusterableNormalizer(const std::vector<Clusterable*> &vec);
-
-/// Sums stats (ptrs may be NULL). Returns NULL if no non-NULL stats present.
-Clusterable *SumClusterable(const std::vector<Clusterable*> &vec);
-
-/** Fills in any (NULL) holes in "stats" vector, with empty stats, because
- *  certain algorithms require non-NULL stats.  If "stats" nonempty, requires it
- *  to contain at least one non-NULL pointer that we can call Copy() on.
- */
-void EnsureClusterableVectorNotNull(std::vector<Clusterable*> *stats);
-
-
-/** Given stats and a vector "assignments" of the same size (that maps to
- * cluster indices), sums the stats up into "clusters."  It will add to any
- * stats already present in "clusters" (although typically "clusters" will be
- * empty when called), and it will extend with NULL pointers for any unseen
- * indices. Call EnsureClusterableStatsNotNull afterwards if you want to ensure
- * all non-NULL clusters. Pointer in "clusters" are owned by caller. Pointers in
- * "stats" do not have to be non-NULL.
- */
-void AddToClusters(const std::vector<Clusterable*> &stats,
-                   const std::vector<int32> &assignments,
-                   std::vector<Clusterable*> *clusters);
-
-
-/// AddToClustersOptimized does the same as AddToClusters (it sums up the stats
-/// within each cluster, except it uses the sum of all the stats ("total") to
-/// optimize the computation for speed, if possible.  This will generally only be
-/// a significant speedup in the case where there are just two clusters, which
-/// can happen in algorithms that are doing binary splits; the idea is that we
-/// sum up all the stats in one cluster (the one with the fewest points in it),
-/// and then subtract from the total.
-void AddToClustersOptimized(const std::vector<Clusterable*> &stats,
-                            const std::vector<int32> &assignments,
-                            const Clusterable &total,
-                            std::vector<Clusterable*> *clusters);
-
-/// @} end "addtogroup clustering_group_simple"
-
-/// \addtogroup clustering_group_algo
-/// @{
-
-// Note, in the algorithms below, it is assumed that the input "points" (which
-// is std::vector<Clusterable*>) is all non-NULL.
-
-/** A bottom-up clustering algorithm. There are two parameters that control how
- *  many clusters we get: a "max_merge_thresh" which is a threshold for merging
- *  clusters, and a min_clust which puts a floor on the number of clusters we want. Set
- *  max_merge_thresh = large to use the min_clust only, or min_clust to 0 to use
- *  the max_merge_thresh only.
- *
- *  The algorithm is:
- *  \code
- *      while (num-clusters > min_clust && smallest_merge_cost <= max_merge_thresh)
- *          merge the closest two clusters.
- *  \endcode
- *
- *  @param points [in] Points to be clustered (may not contain NULL pointers)
- *  @param thresh [in] Threshold on cost change from merging clusters; clusters
- *               won't be merged if the cost is more than this
- *  @param min_clust [in] Minimum number of clusters desired; we'll stop merging
- *                  after reaching this number.
- *  @param clusters_out [out] If non-NULL, will be set to a vector of size equal
- *                 to the number of output clusters, containing the clustered
- *                 statistics.  Must be empty when called.
- *  @param assignments_out [out] If non-NULL, will be resized to the number of
- *                 points, and each element is the index of the cluster that point
- *                 was assigned to.
- *  @return Returns the total objf change relative to all clusters being separate, which is
- *    a negative.  Note that this is not the same as what the other clustering algorithms return.
- */
-BaseFloat ClusterBottomUp(const std::vector<Clusterable*> &points,
-                          BaseFloat thresh,
-                          int32 min_clust,
-                          std::vector<Clusterable*> *clusters_out,
-                          std::vector<int32> *assignments_out);
-
-/** This is a bottom-up clustering where the points are pre-clustered in a set
- *  of compartments, such that only points in the same compartment are clustered
- *  together. The compartment and pair of points with the smallest merge cost
- *  is selected and the points are clustered. The result stays in the same
- *  compartment. The code does not merge compartments, and hence assumes that
- *  the number of compartments is smaller than the 'min_clust' option.
- *  The clusters in "clusters_out" are newly allocated and owned by the caller.
- */
-BaseFloat ClusterBottomUpCompartmentalized(
-    const std::vector< std::vector<Clusterable*> > &points, BaseFloat thresh,
-    int32 min_clust, std::vector< std::vector<Clusterable*> > *clusters_out,
-    std::vector< std::vector<int32> > *assignments_out);
-
-
-struct RefineClustersOptions {
-  int32 num_iters;  // must be >= 0.  If zero, does nothing.
-  int32 top_n;  // must be >= 2.
-  RefineClustersOptions() : num_iters(100), top_n(5) {}
-  RefineClustersOptions(int32 num_iters_in, int32 top_n_in)
-      : num_iters(num_iters_in), top_n(top_n_in) {}
-  // include Write and Read functions because this object gets written/read as
-  // part of the QuestionsForKeyOptions class.
-  void Write(std::ostream &os, bool binary) const;
-  void Read(std::istream &is, bool binary);
-};
-
-/** RefineClusters is mainly used internally by other clustering algorithms.
- *
- *  It starts with a given assignment of points to clusters and
- *  keeps trying to improve it by moving points from cluster to cluster, up to
- *  a maximum number of iterations.
- *
- *  "clusters" and "assignments" are both input and output variables, and so
- *  both MUST be non-NULL.
- *
- *  "top_n" (>=2) is a pruning value: more is more exact, fewer is faster. The
- *  algorithm initially finds the "top_n" closest clusters to any given point,
- *  and from that point only consider move to those "top_n" clusters. Since
- *  RefineClusters is called multiple times from ClusterKMeans (for instance),
- *  this is not really a limitation.
- */
-BaseFloat RefineClusters(const std::vector<Clusterable*> &points,
-                         std::vector<Clusterable*> *clusters /*non-NULL*/,
-                         std::vector<int32> *assignments /*non-NULL*/,
-                         RefineClustersOptions cfg = RefineClustersOptions());
-
-struct ClusterKMeansOptions {
-  RefineClustersOptions refine_cfg;
-  int32 num_iters;
-  int32 num_tries;  // if >1, try whole procedure >once and pick best.
-  bool verbose;
-  ClusterKMeansOptions()
-      : refine_cfg(), num_iters(20), num_tries(2), verbose(true)  {}
-};
-
-/** ClusterKMeans is a K-means-like clustering algorithm. It starts with
- *  pseudo-random initialization of points to clusters and uses RefineClusters
- *  to iteratively improve the cluster assignments.  It does this for
- *  multiple iterations and picks the result with the best objective function.
- *
- *
- *  ClusterKMeans implicitly uses Rand(). It will not necessarily return
- *  the same value on different calls.  Use sRand() if you want consistent
- *  results.
- *  The algorithm used in ClusterKMeans is a "k-means-like" algorithm that tries
- *  to be as efficient as possible.  Firstly, since the algorithm it uses
- *  includes random initialization, it tries the whole thing cfg.num_tries times
- *  and picks the one with the best objective function.  Each try, it does as
- *  follows: it randomly initializes points to clusters, and then for
- *  cfg.num_iters iterations it calls RefineClusters().  The options to
- *  RefineClusters() are given by cfg.refine_cfg.  Calling RefineClusters once
- *  will always be at least as good as doing one iteration of reassigning points to
- *  clusters, but will generally be quite a bit better (without taking too
- *  much extra time).
- *
- *  @param points [in]  points to be clustered (must be all non-NULL).
- *  @param num_clust [in] number of clusters requested (it will always return exactly
- *                 this many, or will fail if num_clust > points.size()).
- *  @param clusters_out [out] may be NULL; if non-NULL, should be empty when called.
- *          Will be set to a vector of statistics corresponding to the output clusters.
- *  @param assignments_out [out] may be NULL; if non-NULL, will be set to a vector of
- *             same size as "points", which says for each point which cluster
- *              it is assigned to.
- *  @param cfg [in] configuration class specifying options to the algorithm.
- *  @return Returns the objective function improvement versus everything being
- *     in the same cluster.
- *
- */
-BaseFloat ClusterKMeans(const std::vector<Clusterable*> &points,
-                        int32 num_clust,  // exact number of clusters
-                        std::vector<Clusterable*> *clusters_out,  // may be NULL
-                        std::vector<int32> *assignments_out,  // may be NULL
-                        ClusterKMeansOptions cfg = ClusterKMeansOptions());
-
-struct TreeClusterOptions  {
-  ClusterKMeansOptions kmeans_cfg;
-  int32 branch_factor;
-  BaseFloat thresh;  // Objf change: if >0, may be used to control number of leaves.
-  TreeClusterOptions()
-      : kmeans_cfg(), branch_factor(2), thresh(0) {
-    kmeans_cfg.verbose = false;
-  }
-};
-
-/** TreeCluster is a top-down clustering algorithm, using a binary tree (not
- *  necessarily balanced). Returns objf improvement versus having all points
- *  in one cluster.  The algorithm is:
- *     - Initialize to 1 cluster (tree with 1 node).
- *     - Maintain, for each cluster, a "best-binary-split" (using ClusterKMeans
- *       to do so). Always split the highest scoring cluster, until we can do no
- *       more splits.
- *
- *  @param points [in] Data points to be clustered
- *  @param max_clust  [in] Maximum number of clusters (you will get exactly this number,
- *                if there are at least this many points, except if you set the
- *                cfg.thresh value nonzero, in which case that threshold may limit
- *                the number of clusters.
- *  @param clusters_out [out] If non-NULL, will be set to the a vector whose first
- *                (*num_leaves_out) elements are the leaf clusters, and whose
- *                subsequent elements are the nonleaf nodes in the tree, in
- *                topological order with the root node last.  Must be empty vector
- *                when this function is called.
- *  @param assignments_out [out] If non-NULL, will be set to a vector to a vector the
- *               same size as "points", where assignments[i] is the leaf node index i
- *               to which the i'th point gets clustered.
- *  @param clust_assignments_out [out] If non-NULL, will be set to a vector the same size
- *                as clusters_out  which says for each node (leaf or nonleaf), the
- *                index of its parent.  For the root node (which is last),
- *                assignments_out[i] == i.  For each i, assignments_out[i]>=i, i.e.
- *                any node's parent is higher numbered than itself.  If you don't need
- *                this information, consider using instead the ClusterTopDown function.
- *  @param num_leaves_out [out] If non-NULL, will be set to the number of leaf nodes
- *                in the tree.
- *  @param cfg [in] Configuration object that controls clustering behavior.  Most
- *                 important value is "thresh", which provides an alternative mechanism
- *                 [other than max_clust] to limit the number of leaves.
- */
-BaseFloat TreeCluster(const std::vector<Clusterable*> &points,
-                      int32 max_clust,  // max number of leaf-level clusters.
-                      std::vector<Clusterable*> *clusters_out,
-                      std::vector<int32> *assignments_out,
-                      std::vector<int32> *clust_assignments_out,
-                      int32 *num_leaves_out,
-                      TreeClusterOptions cfg = TreeClusterOptions());
-
-
-/**
- *  A clustering algorithm that internally uses TreeCluster,
- *  but does not give you the information about the structure of the tree.
- *  The "clusters_out" and "assignments_out" may be NULL if the outputs are not
- *  needed.
- *
- *  @param points [in]  points to be clustered (must be all non-NULL).
- *  @param max_clust [in] Maximum number of clusters (you will get exactly this number,
- *                if there are at least this many points, except if you set the
- *                cfg.thresh value nonzero, in which case that threshold may limit
- *                the number of clusters.
- *  @param clusters_out [out] may be NULL; if non-NULL, should be empty when called.
- *           Will be set to a vector of statistics corresponding to the output clusters.
- *  @param assignments_out [out] may be NULL; if non-NULL, will be set to a vector of
- *           same size as "points", which says for each point which cluster
- *            it is assigned to.
- *  @param cfg [in] Configuration object that controls clustering behavior.  Most
- *                important value is "thresh", which provides an alternative mechanism
- *                [other than max_clust] to limit the number of leaves.
-*/
-BaseFloat ClusterTopDown(const std::vector<Clusterable*> &points,
-                         int32 max_clust,  // max number of clusters.
-                         std::vector<Clusterable*> *clusters_out,
-                         std::vector<int32> *assignments_out,
-                         TreeClusterOptions cfg = TreeClusterOptions());
-
-/// @} end of "addtogroup clustering_group_algo"
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_TREE_CLUSTER_UTILS_H_
diff --git a/kaldi_io/src/kaldi/tree/clusterable-classes.h b/kaldi_io/src/kaldi/tree/clusterable-classes.h
deleted file mode 100644
index 817d0c6..0000000
--- a/kaldi_io/src/kaldi/tree/clusterable-classes.h
+++ /dev/null
@@ -1,158 +0,0 @@
-// tree/clusterable-classes.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Saarland University
-//                2014  Daniel Povey
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_TREE_CLUSTERABLE_CLASSES_H_
-#define KALDI_TREE_CLUSTERABLE_CLASSES_H_ 1
-
-#include <string>
-#include "itf/clusterable-itf.h"
-#include "matrix/matrix-lib.h"
-
-namespace kaldi {
-
-// Note: see sgmm/sgmm-clusterable.h for an SGMM-based clusterable
-// class.  We didn't include it here, to avoid adding an extra
-// dependency to this directory.
-
-/// \addtogroup clustering_group
-/// @{
-
-/// ScalarClusterable clusters scalars with x^2 loss.
-class ScalarClusterable: public Clusterable {
- public:
-  ScalarClusterable(): x_(0), x2_(0), count_(0) {}
-  explicit ScalarClusterable(BaseFloat x): x_(x), x2_(x*x), count_(1) {}
-  virtual std::string Type() const { return "scalar"; }
-  virtual BaseFloat Objf() const;
-  virtual void SetZero() { count_ = x_ = x2_ = 0.0; }
-  virtual void Add(const Clusterable &other_in);
-  virtual void Sub(const Clusterable &other_in);
-  virtual Clusterable* Copy() const;
-  virtual BaseFloat Normalizer() const {
-    return static_cast<BaseFloat>(count_);
-  }
-
-  // Function to write data to stream. Will organize input later [more complex]
-  virtual void Write(std::ostream &os, bool binary) const;
-  virtual Clusterable* ReadNew(std::istream &is, bool binary) const;
-
-  std::string Info();  // For debugging.
-  BaseFloat Mean() { return (count_ != 0 ? x_/count_ : 0.0); }
-  private:
-  BaseFloat x_;
-  BaseFloat x2_;
-  BaseFloat count_;
-
-  void Read(std::istream &is, bool binary);
-};
-
-
-/// GaussClusterable wraps Gaussian statistics in a form accessible
-/// to generic clustering algorithms.
-class GaussClusterable: public Clusterable {
- public:
-  GaussClusterable(): count_(0.0), var_floor_(0.0) {}
-  GaussClusterable(int32 dim, BaseFloat var_floor):
-      count_(0.0), stats_(2, dim), var_floor_(var_floor) {}
-
-  GaussClusterable(const Vector<BaseFloat> &x_stats,
-                   const Vector<BaseFloat> &x2_stats,
-                   BaseFloat var_floor, BaseFloat count);
-
-  virtual std::string Type() const {  return "gauss"; }
-  void AddStats(const VectorBase<BaseFloat> &vec, BaseFloat weight = 1.0);
-  virtual BaseFloat Objf() const;
-  virtual void SetZero();
-  virtual void Add(const Clusterable &other_in);
-  virtual void Sub(const Clusterable &other_in);
-  virtual BaseFloat Normalizer() const { return count_; }
-  virtual Clusterable *Copy() const;
-  virtual void Scale(BaseFloat f);
-  virtual void Write(std::ostream &os, bool binary) const;
-  virtual Clusterable *ReadNew(std::istream &is, bool binary) const;
-  virtual ~GaussClusterable() {}
-
-  BaseFloat count() const { return count_; }
-  // The next two functions are not const-correct, because of SubVector.
-  SubVector<double> x_stats() const { return stats_.Row(0); }
-  SubVector<double> x2_stats() const { return stats_.Row(1); }
- private:
-  double count_;
-  Matrix<double> stats_; // two rows: sum, then sum-squared.
-  double var_floor_;  // should be common for all objects created.
-
-  void Read(std::istream &is, bool binary);
-};
-
-/// @} end of "addtogroup clustering_group"
-
-inline void GaussClusterable::SetZero() {
-  count_ = 0;
-  stats_.SetZero();
-}
-
-inline GaussClusterable::GaussClusterable(const Vector<BaseFloat> &x_stats,
-                                          const Vector<BaseFloat> &x2_stats,
-                                          BaseFloat var_floor, BaseFloat count):
-    count_(count), stats_(2, x_stats.Dim()), var_floor_(var_floor) {
-  stats_.Row(0).CopyFromVec(x_stats);
-  stats_.Row(1).CopyFromVec(x2_stats);
-}
-
-
-/// VectorClusterable wraps vectors in a form accessible to generic clustering
-/// algorithms.  Each vector is associated with a weight; these could be 1.0.
-/// The objective function (to be maximized) is the negated sum of squared
-/// distances from the cluster center to each vector, times that vector's
-/// weight.
-class VectorClusterable: public Clusterable {
- public:
-  VectorClusterable(): weight_(0.0), sumsq_(0.0) {}
-
-  VectorClusterable(const Vector<BaseFloat> &vector,
-                    BaseFloat weight);
-
-  virtual std::string Type() const {  return "vector"; }
-  // Objf is negated weighted sum of squared distances.
-  virtual BaseFloat Objf() const;
-  virtual void SetZero() { weight_ = 0.0; sumsq_ = 0.0; stats_.Set(0.0); }
-  virtual void Add(const Clusterable &other_in);
-  virtual void Sub(const Clusterable &other_in);
-  virtual BaseFloat Normalizer() const { return weight_; }
-  virtual Clusterable *Copy() const;
-  virtual void Scale(BaseFloat f);
-  virtual void Write(std::ostream &os, bool binary) const;
-  virtual Clusterable *ReadNew(std::istream &is, bool binary) const;
-  virtual ~VectorClusterable() {}
-
- private:
-  double weight_;  // sum of weights of the source vectors.  Never negative.
-  Vector<double> stats_; // Equals the weighted sum of the source vectors.
-  double sumsq_;  // Equals the sum over all sources, of weight_ * vec.vec,
-                  // where vec = stats_ / weight_.  Used in computing
-                  // the objective function.
-  void Read(std::istream &is, bool binary);
-};
-
-
-
-}  // end namespace kaldi.
-
-#endif  // KALDI_TREE_CLUSTERABLE_CLASSES_H_
diff --git a/kaldi_io/src/kaldi/tree/context-dep.h b/kaldi_io/src/kaldi/tree/context-dep.h
deleted file mode 100644
index 307fcd4..0000000
--- a/kaldi_io/src/kaldi/tree/context-dep.h
+++ /dev/null
@@ -1,166 +0,0 @@
-// tree/context-dep.h
-
-// Copyright 2009-2011  Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_TREE_CONTEXT_DEP_H_
-#define KALDI_TREE_CONTEXT_DEP_H_
-
-#include "itf/context-dep-itf.h"
-#include "tree/event-map.h"
-#include "matrix/matrix-lib.h"
-#include "tree/cluster-utils.h"
-
-/*
-  This header provides the declarations for the class ContextDependency, which inherits
-  from the interface class "ContextDependencyInterface" in itf/context-dep-itf.h.
-  This is basically a wrapper around an EventMap.  The EventMap
-  (tree/event-map.h) declares most of the internals of the class, and the building routines are
-  in build-tree.h which uses build-tree-utils.h, which uses cluster-utils.h . */
-
-
-namespace kaldi {
-
-static const EventKeyType kPdfClass = -1;  // The "name" to which we assign the
-// pdf-class (generally corresponds ot position in the HMM, zero-based);
-// must not be used for any other event.  I.e. the value corresponding to
-// this key is the pdf-class (see hmm-topology.h for explanation of what this is).
-
-
-/* ContextDependency is quite a generic decision tree.
-
-   It does not actually do very much-- all the magic is in the EventMap object.
-   All this class does is to encode the phone context as a sequence of events, and
-   pass this to the EventMap object to turn into what it will interpret as a
-   vector of pdfs.
-
-   Different versions of the ContextDependency class that are written in the future may
-   have slightly different interfaces and pass more stuff in as events, to the
-   EventMap object.
-
-   In order to separate the process of training decision trees from the process
-   of actually using them, we do not put any training code into the ContextDependency class.
- */
-class ContextDependency: public ContextDependencyInterface {
- public:
-  virtual int32 ContextWidth() const { return N_; }
-  virtual int32 CentralPosition() const { return P_; }
-
-
-  /// returns success or failure; outputs pdf to pdf_id
-  virtual bool Compute(const std::vector<int32> &phoneseq,
-                       int32 pdf_class, int32 *pdf_id) const;
-
-  virtual int32 NumPdfs() const {
-    // this routine could be simplified to return to_pdf_->MaxResult()+1.  we're a
-    // bit more paranoid than that.
-    if (!to_pdf_) return 0;
-    EventAnswerType max_result = to_pdf_->MaxResult();
-    if (max_result < 0 ) return 0;
-    else return (int32) max_result+1;
-  }
-  virtual ContextDependencyInterface *Copy() const {
-    return new ContextDependency(N_, P_, to_pdf_->Copy());
-  }
-
-  /// Read context-dependency object from disk; throws on error
-  void Read (std::istream &is, bool binary);
-
-  // Constructor with no arguments; will normally be called
-  // prior to Read()
-  ContextDependency(): N_(0), P_(0), to_pdf_(NULL) { }
-
-  // Constructor takes ownership of pointers.
-  ContextDependency(int32 N, int32 P,
-                    EventMap *to_pdf):
-      N_(N), P_(P), to_pdf_(to_pdf) { }
-  void Write (std::ostream &os, bool binary) const;
-
-  ~ContextDependency() { if (to_pdf_ != NULL) delete to_pdf_; }
-
-  const EventMap &ToPdfMap() const { return *to_pdf_; }
-
-  /// GetPdfInfo returns a vector indexed by pdf-id, saying for each pdf which
-  /// pairs of (phone, pdf-class) it can correspond to.  (Usually just one).
-  /// c.f. hmm/hmm-topology.h for meaning of pdf-class.
-
-  void GetPdfInfo(const std::vector<int32> &phones,  // list of phones
-                  const std::vector<int32> &num_pdf_classes,  // indexed by phone,
-                  std::vector<std::vector<std::pair<int32, int32> > > *pdf_info)
-      const;
-
- private:
-  int32 N_;  //
-  int32 P_;
-  EventMap *to_pdf_;  // owned here.
-
-  KALDI_DISALLOW_COPY_AND_ASSIGN(ContextDependency);
-};
-
-/// GenRandContextDependency is mainly of use for debugging.  Phones must be sorted and uniq
-/// on input.
-/// @param phones [in] A vector of phone id's [must be sorted and uniq].
-/// @param ensure_all_covered [in] boolean argument; if true,  GenRandContextDependency
-///        generates a context-dependency object that "works" for all phones [no gaps].
-/// @param num_pdf_classes [out] outputs a vector indexed by phone, of the number
-///          of pdf classes (e.g. states) for that phone.
-/// @return Returns the a context dependency object.
-ContextDependency *GenRandContextDependency(const std::vector<int32> &phones,
-                                            bool ensure_all_covered,
-                                            std::vector<int32> *num_pdf_classes);
-
-/// GenRandContextDependencyLarge is like GenRandContextDependency but generates a larger tree
-/// with specified N and P for use in "one-time" larger-scale tests.
-ContextDependency *GenRandContextDependencyLarge(const std::vector<int32> &phones,
-                                                 int N, int P,
-                                                 bool ensure_all_covered,
-                                                 std::vector<int32> *num_pdf_classes);
-
-// MonophoneContextDependency() returns a new ContextDependency object that
-// corresponds to a monophone system.
-// The map phone2num_pdf_classes maps from the phone id to the number of
-// pdf-classes we have for that phone (e.g. 3, so the pdf-classes would be
-// 0, 1, 2).
-
-ContextDependency*
-MonophoneContextDependency(const std::vector<int32> phones,
-                           const std::vector<int32> phone2num_pdf_classes);
-
-// MonophoneContextDependencyShared is as MonophoneContextDependency but lets
-// you define classes of phones which share pdfs (e.g. different stress-markers of a single
-// phone.)  Each element of phone_classes is a set of phones that are in that class.
-ContextDependency*
-MonophoneContextDependencyShared(const std::vector<std::vector<int32> > phone_classes,
-                                 const std::vector<int32> phone2num_pdf_classes);
-
-
-// Important note:
-// Statistics for training decision trees will be of type:
-// std::vector<std::pair<EventType, Clusterable*> >
-// We don't make this a typedef as it doesn't add clarity.
-// they will be sorted and unique on the EventType member, which
-// itself is sorted and unique on the name (see event-map.h).
-
-// See build-tree.h for functions relating to actually building the decision trees.
-
-
-
-
-}  // namespace Kaldi
-
-
-#endif
diff --git a/kaldi_io/src/kaldi/tree/event-map.h b/kaldi_io/src/kaldi/tree/event-map.h
deleted file mode 100644
index 07fcc2b..0000000
--- a/kaldi_io/src/kaldi/tree/event-map.h
+++ /dev/null
@@ -1,365 +0,0 @@
-// tree/event-map.h
-
-// Copyright 2009-2011  Microsoft Corporation;  Haihua Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_TREE_EVENT_MAP_H_
-#define KALDI_TREE_EVENT_MAP_H_
-
-#include <vector>
-#include <map>
-#include <algorithm>
-#include "base/kaldi-common.h"
-#include "util/stl-utils.h"
-#include "util/const-integer-set.h"
-
-namespace kaldi {
-
-/// \defgroup event_map_group Event maps
-/// \ingroup tree_group
-/// See \ref tree_internals for overview, and specifically \ref treei_event_map.
-
-
-// Note RE negative values: some of this code will not work if things of type
-// EventValueType are negative.  In particular, TableEventMap can't be used if
-// things of EventValueType are negative, and additionally TableEventMap won't
-// be efficient if things of EventValueType take on extremely large values.  The
-// EventKeyType can be negative though.
-
-/// Things of type EventKeyType can take any value.  The code does not assume they are contiguous.
-/// So values like -1, 1000000 and the like are acceptable.
-typedef int32 EventKeyType;
-
-/// Given current code, things of type EventValueType should generally be nonnegative and in a
-/// reasonably small range (e.g. not one million), as we sometimes construct vectors of the size:
-/// [largest value we saw for this key].  This deficiency may be fixed in future [would require
-/// modifying TableEventMap]
-typedef int32 EventValueType;
-
-/// As far as the event-map code itself is concerned, things of type EventAnswerType may take
-/// any value except kNoAnswer (== -1).  However, some specific uses of EventMap (e.g. in
-/// build-tree-utils.h) assume these quantities are nonnegative.
-typedef int32 EventAnswerType;
-
-typedef std::vector<std::pair<EventKeyType, EventValueType> > EventType;
-// It is required to be sorted and have unique keys-- i.e. functions assume this when called
-// with this type.
-
-inline std::pair<EventKeyType, EventValueType> MakeEventPair (EventKeyType k, EventValueType v) {  
-  return std::pair<EventKeyType, EventValueType>(k, v);
-}
-
-void WriteEventType(std::ostream &os, bool binary, const EventType &vec);
-void ReadEventType(std::istream &is, bool binary, EventType *vec);
-
-std::string EventTypeToString(const EventType &evec);  // so we can print events out in error messages.
-
-struct EventMapVectorHash {  // Hashing object for EventMapVector.  Works for both pointers and references.
-  // Not used in event-map.{h, cc}
-  size_t operator () (const EventType &vec);
-  size_t operator () (const EventType *ptr) { return (*this)(*ptr); }
-};
-struct EventMapVectorEqual {  // Equality object for EventType pointers-- test equality of underlying vector.
-  // Not used in event-map.{h, cc}
-  size_t operator () (const EventType *p1, const EventType *p2) { return (*p1 == *p2); }
-};
-
-
-/// A class that is capable of representing a generic mapping from
-/// EventType (which is a vector of (key, value) pairs) to
-/// EventAnswerType which is just an integer.  See \ref tree_internals
-/// for overview.
-class EventMap {
- public:
-  static void Check(const EventType &event);  // will crash if not sorted and unique on key.
-  static bool Lookup(const EventType &event, EventKeyType key, EventValueType *ans);
-
-  // Maps events to the answer type. input must be sorted.
-  virtual bool Map(const EventType &event, EventAnswerType *ans) const = 0;
-
-  // MultiMap maps a partially specified set of events to the set of answers it might
-  // map to.  It appends these to "ans".  "ans" is
-  // **not guaranteed unique at output** if the
-  // tree contains duplicate answers at leaves -- you should sort & uniq afterwards.
-  // e.g.: SortAndUniq(ans).
-  virtual void MultiMap(const EventType &event, std::vector<EventAnswerType> *ans) const = 0;
-
-  // GetChildren() returns the EventMaps that are immediate children of this
-  // EventMap (if they exist), by putting them in *out.  Useful for
-  // determining the structure of the event map.
-  virtual void GetChildren(std::vector<EventMap*> *out) const = 0;
-
-  // This Copy() does a deep copy of the event map.
-  // If new_leaves is nonempty when it reaches a leaf with value l s.t. new_leaves[l] != NULL,
-  // it replaces it with a copy of that EventMap.  This makes it possible to extend and modify
-  // It's the way we do splits of trees, and clustering of trees.  Think about this carefully, because
-  // the EventMap structure does not support modification of an existing tree.  Do not be tempted
-  // to do this differently, because other kinds of mechanisms would get very messy and unextensible.
-  // Copy() is the only mechanism to modify a tree.  It's similar to a kind of function composition.
-  // Copy() does not take ownership of the pointers in new_leaves (it uses the Copy() function of those
-  // EventMaps).
-  virtual EventMap *Copy(const std::vector<EventMap*> &new_leaves) const = 0;
-  
-  EventMap *Copy() const { std::vector<EventMap*> new_leaves; return Copy(new_leaves); }
-
-  // The function MapValues() is intended to be used to map phone-sets between
-  // different integer representations.  For all the keys in the set
-  // "keys_to_map", it will map the corresponding values using the map
-  // "value_map".  Note: these values are the values in the key->value pairs of
-  // the EventMap, which really correspond to phones in the usual case; they are
-  // not the "answers" of the EventMap which correspond to clustered states.  In
-  // case multiple values are mapped to the same value, it will try to deal with
-  // it gracefully where it can, but will crash if, for example, this would
-  // cause problems with the TableEventMap.  It will also crash if any values
-  // used for keys in "keys_to_map" are not mapped by "value_map".  This
-  // function is not currently used.
-  virtual EventMap *MapValues(
-      const unordered_set<EventKeyType> &keys_to_map,
-      const unordered_map<EventValueType,EventValueType> &value_map) const = 0;
-
-  // The function Prune() is like Copy(), except it removes parts of the tree
-  // that return only -1 (it will return NULL if this EventMap returns only -1).
-  // This is a mechanism to remove parts of the tree-- you would first use the
-  // Copy() function with a vector of EventMap*, and for the parts you don't
-  // want, you'd put a ConstantEventMap with -1; you'd then call
-  // Prune() on the result.  This function is not currently used.
-  virtual EventMap *Prune() const = 0;
-  
-  virtual EventAnswerType MaxResult() const {  // child classes may override this for efficiency; here is basic version.
-    // returns -1 if nothing found.
-    std::vector<EventAnswerType> tmp; EventType empty_event;
-    MultiMap(empty_event, &tmp);
-    if (tmp.empty()) {
-      KALDI_WARN << "EventMap::MaxResult(), empty result";
-      return std::numeric_limits<EventAnswerType>::min();
-    }
-    else { return * std::max_element(tmp.begin(), tmp.end()); }
-  }
-
-  /// Write to stream.
-  virtual void Write(std::ostream &os, bool binary) = 0;
-
-  virtual ~EventMap() {}
-
-  /// a Write function that takes care of NULL pointers.
-  static void Write(std::ostream &os, bool binary, EventMap *emap);
-  /// a Read function that reads an arbitrary EventMap; also
-  /// works for NULL pointers.
-  static EventMap *Read(std::istream &is, bool binary);
-};
-
-
-class ConstantEventMap: public EventMap {
- public:
-  virtual bool Map(const EventType &event, EventAnswerType *ans) const {
-    *ans = answer_;
-    return true;
-  }
-
-  virtual void MultiMap(const EventType &,
-                        std::vector<EventAnswerType> *ans) const {
-     ans->push_back(answer_);
-  }
-
-  virtual void GetChildren(std::vector<EventMap*> *out) const { out->clear(); }
-
-  virtual EventMap *Copy(const std::vector<EventMap*> &new_leaves) const {
-    if (answer_ < 0 || answer_ >= (EventAnswerType)new_leaves.size() ||
-        new_leaves[answer_] == NULL)
-      return new ConstantEventMap(answer_);
-    else return new_leaves[answer_]->Copy();
-  }
-
-  virtual EventMap *MapValues(
-      const unordered_set<EventKeyType> &keys_to_map,
-      const unordered_map<EventValueType,EventValueType> &value_map) const {
-    return new ConstantEventMap(answer_);
-  }
-
-  virtual EventMap *Prune() const {
-    return (answer_ == -1 ? NULL : new ConstantEventMap(answer_));
-  }
-  
-  explicit ConstantEventMap(EventAnswerType answer): answer_(answer) { }
-  
-  virtual void Write(std::ostream &os, bool binary);
-  static ConstantEventMap *Read(std::istream &is, bool binary);
- private:
-  EventAnswerType answer_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(ConstantEventMap);
-};
-
-class TableEventMap: public EventMap {
- public:
-
-  virtual bool Map(const EventType &event, EventAnswerType *ans) const {
-    EventValueType tmp;   *ans = -1;  // means no answer
-    if (Lookup(event, key_, &tmp) && tmp >= 0
-       && tmp < (EventValueType)table_.size() && table_[tmp] != NULL) {
-      return table_[tmp]->Map(event, ans);
-    }
-    return false;
-  }
-
-  virtual void GetChildren(std::vector<EventMap*> *out) const {
-    out->clear();
-    for (size_t i = 0; i<table_.size(); i++)
-      if (table_[i] != NULL) out->push_back(table_[i]);
-  }
-
-  virtual void MultiMap(const EventType &event, std::vector<EventAnswerType> *ans) const {
-    EventValueType tmp;
-    if (Lookup(event, key_, &tmp)) {
-      if (tmp >= 0 && tmp < (EventValueType)table_.size() && table_[tmp] != NULL)
-        return table_[tmp]->MultiMap(event, ans);
-      // else no answers.
-    } else {  // all answers are possible if no such key.
-      for (size_t i = 0;i < table_.size();i++)
-        if (table_[i] != NULL) table_[i]->MultiMap(event, ans);  // append.
-    }
-  }
-
-  virtual EventMap *Prune() const;
-  
-  virtual EventMap *MapValues(
-      const unordered_set<EventKeyType> &keys_to_map,
-      const unordered_map<EventValueType,EventValueType> &value_map) const;
-  
-  /// Takes ownership of pointers.
-  explicit TableEventMap(EventKeyType key, const std::vector<EventMap*> &table): key_(key), table_(table) {}
-  /// Takes ownership of pointers.
-  explicit TableEventMap(EventKeyType key, const std::map<EventValueType, EventMap*> &map_in);
-  /// This initializer creates a ConstantEventMap for each value in the map.
-  explicit TableEventMap(EventKeyType key, const std::map<EventValueType, EventAnswerType> &map_in);
-
-  virtual void Write(std::ostream &os, bool binary);
-  static TableEventMap *Read(std::istream &is, bool binary);
-
-  virtual EventMap *Copy(const std::vector<EventMap*> &new_leaves) const {
-    std::vector<EventMap*> new_table_(table_.size(), NULL);
-    for (size_t i = 0;i<table_.size();i++) if (table_[i]) new_table_[i]=table_[i]->Copy(new_leaves);
-    return new TableEventMap(key_, new_table_);
-  }
-  virtual ~TableEventMap() {
-    DeletePointers(&table_);
-  }
- private:
-  EventKeyType key_;
-  std::vector<EventMap*> table_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(TableEventMap);
-};
-
-
-
-
-class SplitEventMap: public EventMap {  // A decision tree [non-leaf] node.
- public:
-
-  virtual bool Map(const EventType &event, EventAnswerType *ans) const {
-    EventValueType value;
-    if (Lookup(event, key_, &value)) {
-      // if (std::binary_search(yes_set_.begin(), yes_set_.end(), value)) {
-      if (yes_set_.count(value)) {
-        return yes_->Map(event, ans);
-      }
-      return no_->Map(event, ans);
-    }
-    return false;
-  }
-
-  virtual void MultiMap(const EventType &event, std::vector<EventAnswerType> *ans) const {
-    EventValueType tmp;
-    if (Lookup(event, key_, &tmp)) {
-      if (std::binary_search(yes_set_.begin(), yes_set_.end(), tmp))
-        yes_->MultiMap(event, ans);
-      else
-        no_->MultiMap(event, ans);
-    } else {  // both yes and no contribute.
-      yes_->MultiMap(event, ans);
-      no_->MultiMap(event, ans);
-    }
-  }
-
-  virtual void GetChildren(std::vector<EventMap*> *out) const {
-    out->clear();
-    out->push_back(yes_);
-    out->push_back(no_);
-  }
-
-  virtual EventMap *Copy(const std::vector<EventMap*> &new_leaves) const {
-    return new SplitEventMap(key_, yes_set_, yes_->Copy(new_leaves), no_->Copy(new_leaves));
-  }
-
-  virtual void Write(std::ostream &os, bool binary);
-  static SplitEventMap *Read(std::istream &is, bool binary);
-
-  virtual EventMap *Prune() const;
-  
-  virtual EventMap *MapValues(
-      const unordered_set<EventKeyType> &keys_to_map,
-      const unordered_map<EventValueType,EventValueType> &value_map) const;
-  
-  virtual ~SplitEventMap() { Destroy(); }
-
-  /// This constructor takes ownership of the "yes" and "no" arguments.
-  SplitEventMap(EventKeyType key, const std::vector<EventValueType> &yes_set,
-                EventMap *yes, EventMap *no): key_(key), yes_set_(yes_set), yes_(yes), no_(no) {
-    KALDI_PARANOID_ASSERT(IsSorted(yes_set));
-    KALDI_ASSERT(yes_ != NULL && no_ != NULL);
-  }
-
-
- private:
-  /// This constructor used in the Copy() function.
-  SplitEventMap(EventKeyType key, const ConstIntegerSet<EventValueType> &yes_set,
-                EventMap *yes, EventMap *no): key_(key), yes_set_(yes_set), yes_(yes), no_(no) {
-    KALDI_ASSERT(yes_ != NULL && no_ != NULL);
-  }
-  void Destroy() {
-    delete yes_; delete no_;
-  }
-  EventKeyType key_;
-  //  std::vector<EventValueType> yes_set_;
-  ConstIntegerSet<EventValueType> yes_set_;  // more efficient Map function.
-  EventMap *yes_;  // owned here.
-  EventMap *no_;  // owned here.
-  SplitEventMap &operator = (const SplitEventMap &other);  // Disallow.
-};
-
-/**
-   This function gets the tree structure of the EventMap "map" in a convenient form.
-   If "map" corresponds to a tree structure (not necessarily binary) with leaves
-   uniquely numbered from 0 to num_leaves-1, then the function will return true,
-   output "num_leaves", and set "parent" to a vector of size equal to the number of
-   nodes in the tree (nonleaf and leaf), where each index corresponds to a node
-   and the leaf indices correspond to the values returned by the EventMap from
-   that leaf; for an index i, parent[i] equals the parent of that node in the tree
-   structure, where parent[i] > i, except for the last (root) node where parent[i] == i.
-   If the EventMap does not have this structure (e.g. if multiple different leaf nodes share
-   the same number), then it will return false.
-*/
-
-bool GetTreeStructure(const EventMap &map,
-                      int32 *num_leaves,
-                      std::vector<int32> *parents);
-
-
-/// @} end "addtogroup event_map_group"
-
-}
-
-#endif
diff --git a/kaldi_io/src/kaldi/tree/tree-renderer.h b/kaldi_io/src/kaldi/tree/tree-renderer.h
deleted file mode 100644
index 5e0b0d8..0000000
--- a/kaldi_io/src/kaldi/tree/tree-renderer.h
+++ /dev/null
@@ -1,84 +0,0 @@
-// tree/tree-renderer.h
-
-// Copyright 2012 Vassil Panayotov
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//  http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_TREE_TREE_RENDERER_H_
-#define KALDI_TREE_TREE_RENDERER_H_
-
-#include "base/kaldi-common.h"
-#include "tree/event-map.h"
-#include "util/common-utils.h"
-#include "hmm/transition-model.h"
-#include "fst/fstlib.h"
-
-namespace kaldi {
-
-// Parses a decision tree file and outputs its description in GraphViz format
-class TreeRenderer {
- public:
-  const static int32 kEdgeWidth; // normal width of the edges and state contours
-  const static int32 kEdgeWidthQuery; // edge and state width when in query
-  const static std::string kEdgeColor; // normal color for states and edges
-  const static std::string kEdgeColorQuery; // edge and state color when in query
-
-  TreeRenderer(std::istream &is, bool binary, std::ostream &os,
-               fst::SymbolTable &phone_syms, bool use_tooltips)
-      : phone_syms_(phone_syms), is_(is), out_(os), binary_(binary),
-        N_(-1), use_tooltips_(use_tooltips), next_id_(0) {}
-
-  // Renders the tree and if the "query" parameter is not NULL
-  // a distinctly colored trace corresponding to the event.
-  void Render(const EventType *query);
-
- private:
-  // Looks-up the next token from the stream and invokes
-  // the appropriate render method to visualize it
-  void RenderSubTree(const EventType *query, int32 id);
-
-  // Renders a leaf node (constant event map)
-  void RenderConstant(const EventType *query, int32 id);
-
-  // Renders a split event map node and the edges to the nodes
-  // representing YES and NO sets
-  void RenderSplit(const EventType *query, int32 id);
-
-  // Renders a table event map node and the edges to its (non-null) children
-  void RenderTable(const EventType *query, int32 id);
-
-  // Makes a comma-separated string from the elements of a set of identifiers
-  // If the identifiers represent phones, their symbolic representations are used
-  std::string MakeEdgeLabel(const EventKeyType &key,
-                            const ConstIntegerSet<EventValueType> &intset);
-
-  // Writes the GraphViz representation of a non-leaf node to the out stream
-  // A question about a phone from the context window or about pdf-class
-  // is used as a label.
-  void RenderNonLeaf(int32 id, const EventKeyType &key, bool in_query);
-
-  fst::SymbolTable &phone_syms_; // phone symbols to be used as edge labels
-  std::istream &is_; // the stream from which the tree is read
-  std::ostream &out_; // the GraphViz representation is written to this stream
-  bool binary_; // is the input stream binary?
-  int32 N_, P_; // context-width and central position
-  bool use_tooltips_;  // use tooltips(useful in e.g. SVG) instead of labels
-  int32 next_id_; // the first unused GraphViz node ID
-};
-
-} // namespace kaldi
-
-#endif //  KALDI_TREE_TREE_RENDERER_H_
author	Ted Yin <ted.sybil@gmail.com>	2015-08-14 17:42:26 +0800
committer	Ted Yin <ted.sybil@gmail.com>	2015-08-14 17:42:26 +0800
commit	c3cffb58b9921d78753336421b52b9ffdaa5515c (patch)
tree	bfea20e97c200cf734021e3756d749c892e658a4 /kaldi_io/src/kaldi/tree
parent	10cce5f6a5c9e2f8e00d5a2a4d87c9cb7c26bf4c (diff)
parent	dfdd17afc2e984ec6c32ea01290f5c76309a456a (diff)