summaryrefslogtreecommitdiff
path: root/kaldi_io/src/kaldi/tree/build-tree-questions.h
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/kaldi/tree/build-tree-questions.h')
-rw-r--r--kaldi_io/src/kaldi/tree/build-tree-questions.h133
1 files changed, 133 insertions, 0 deletions
diff --git a/kaldi_io/src/kaldi/tree/build-tree-questions.h b/kaldi_io/src/kaldi/tree/build-tree-questions.h
new file mode 100644
index 0000000..a6bcfdd
--- /dev/null
+++ b/kaldi_io/src/kaldi/tree/build-tree-questions.h
@@ -0,0 +1,133 @@
+// tree/build-tree-questions.h
+
+// Copyright 2009-2011 Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_TREE_BUILD_TREE_QUESTIONS_H_
+#define KALDI_TREE_BUILD_TREE_QUESTIONS_H_
+
+#include "util/stl-utils.h"
+#include "tree/context-dep.h"
+
+namespace kaldi {
+
+
+/// \addtogroup tree_group
+/// @{
+/// Typedef for statistics to build trees.
+typedef std::vector<std::pair<EventType, Clusterable*> > BuildTreeStatsType;
+
+/// Typedef used when we get "all keys" from a set of stats-- used in specifying
+/// which kinds of questions to ask.
+typedef enum { kAllKeysInsistIdentical, kAllKeysIntersection, kAllKeysUnion } AllKeysType;
+
+/// @}
+
+/// \defgroup tree_group_questions Question sets for decision-tree clustering
+/// See \ref tree_internals (and specifically \ref treei_func_questions) for context.
+/// \ingroup tree_group
+/// @{
+
+/// QuestionsForKey is a class used to define the questions for a key,
+/// and also options that allow us to refine the question during tree-building
+/// (i.e. make a question specific to the location in the tree).
+/// The Questions class handles aggregating these options for a set
+/// of different keys.
+struct QuestionsForKey { // Configuration class associated with a particular key
+ // (of type EventKeyType). It also contains the questions themselves.
+ std::vector<std::vector<EventValueType> > initial_questions;
+ RefineClustersOptions refine_opts; // if refine_opts.max_iter == 0,
+ // we just pick from the initial questions.
+
+ QuestionsForKey(int32 num_iters = 5): refine_opts(num_iters, 2) {
+ // refine_cfg with 5 iters and top-n = 2 (this is no restriction because
+ // RefineClusters called with 2 clusters; would get set to that anyway as
+ // it's the only possible value for 2 clusters). User has to add questions.
+ // This config won't work as-is, as it has no questions.
+ }
+
+ void Check() const {
+ for (size_t i = 0;i < initial_questions.size();i++) KALDI_ASSERT(IsSorted(initial_questions[i]));
+ }
+
+ void Write(std::ostream &os, bool binary) const;
+ void Read(std::istream &is, bool binary);
+
+ // copy and assign allowed.
+};
+
+/// This class defines, for each EventKeyType, a set of initial questions that
+/// it tries and also a number of iterations for which to refine the questions to increase
+/// likelihood. It is perhaps a bit more than an options class, as it contains the
+/// actual questions.
+class Questions { // careful, this is a class.
+ public:
+ const QuestionsForKey &GetQuestionsOf(EventKeyType key) const {
+ std::map<EventKeyType, size_t>::const_iterator iter;
+ if ( (iter = key_idx_.find(key)) == key_idx_.end()) {
+ KALDI_ERR << "Questions: no options for key "<< key;
+ }
+ size_t idx = iter->second;
+ KALDI_ASSERT(idx < key_options_.size());
+ key_options_[idx]->Check();
+ return *(key_options_[idx]);
+ }
+ void SetQuestionsOf(EventKeyType key, const QuestionsForKey &options_of_key) {
+ options_of_key.Check();
+ if (key_idx_.count(key) == 0) {
+ key_idx_[key] = key_options_.size();
+ key_options_.push_back(new QuestionsForKey());
+ *(key_options_.back()) = options_of_key;
+ } else {
+ size_t idx = key_idx_[key];
+ KALDI_ASSERT(idx < key_options_.size());
+ *(key_options_[idx]) = options_of_key;
+ }
+ }
+ void GetKeysWithQuestions(std::vector<EventKeyType> *keys_out) const {
+ KALDI_ASSERT(keys_out != NULL);
+ CopyMapKeysToVector(key_idx_, keys_out);
+ }
+ const bool HasQuestionsForKey(EventKeyType key) const { return (key_idx_.count(key) != 0); }
+ ~Questions() { kaldi::DeletePointers(&key_options_); }
+
+
+ /// Initializer with arguments. After using this you would have to set up the config for each key you
+ /// are going to use, or use InitRand().
+ Questions() { }
+
+
+ /// InitRand attempts to generate "reasonable" random questions. Only
+ /// of use for debugging. This initializer creates a config that is
+ /// ready to use.
+ /// e.g. num_iters_refine = 0 means just use stated questions (if >1, will use
+ /// different questions at each split of the tree).
+ void InitRand(const BuildTreeStatsType &stats, int32 num_quest, int32 num_iters_refine, AllKeysType all_keys_type);
+
+ void Write(std::ostream &os, bool binary) const;
+ void Read(std::istream &is, bool binary);
+ private:
+ std::vector<QuestionsForKey*> key_options_;
+ std::map<EventKeyType, size_t> key_idx_;
+ KALDI_DISALLOW_COPY_AND_ASSIGN(Questions);
+};
+
+/// @}
+
+}// end namespace kaldi
+
+#endif // KALDI_TREE_BUILD_TREE_QUESTIONS_H_