69 files changed, 17535 insertions, 0 deletions
diff --git a/kaldi_io/src/kaldi/base/io-funcs-inl.h b/kaldi_io/src/kaldi/base/io-funcs-inl.h
new file mode 100644
index 0000000..e55458e
--- /dev/null
+++ b/kaldi_io/src/kaldi/base/io-funcs-inl.h
@@ -0,0 +1,219 @@
+// base/io-funcs-inl.h
+
+// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
+//                      Jan Silovsky;   Yanmin Qian;  Johns Hopkins University (Author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_BASE_IO_FUNCS_INL_H_
+#define KALDI_BASE_IO_FUNCS_INL_H_ 1
+
+// Do not include this file directly.  It is included by base/io-funcs.h
+
+#include <limits>
+#include <vector>
+
+namespace kaldi {
+
+// Template that covers integers.
+template<class T>  void WriteBasicType(std::ostream &os,
+                                       bool binary, T t) {
+  // Compile time assertion that this is not called with a wrong type.
+  KALDI_ASSERT_IS_INTEGER_TYPE(T);
+  if (binary) {
+    char len_c = (std::numeric_limits<T>::is_signed ? 1 :  -1)
+        * static_cast<char>(sizeof(t));
+    os.put(len_c);
+    os.write(reinterpret_cast<const char *>(&t), sizeof(t));
+  } else {
+    if (sizeof(t) == 1)
+      os << static_cast<int16>(t) << " ";
+    else
+      os << t << " ";
+  }
+  if (os.fail()) {
+    throw std::runtime_error("Write failure in WriteBasicType.");
+  }
+}
+
+// Template that covers integers.
+template<class T> inline void ReadBasicType(std::istream &is,
+                                            bool binary, T *t) {
+  KALDI_PARANOID_ASSERT(t != NULL);
+  // Compile time assertion that this is not called with a wrong type.
+  KALDI_ASSERT_IS_INTEGER_TYPE(T);
+  if (binary) {
+    int len_c_in = is.get();
+    if (len_c_in == -1)
+      KALDI_ERR << "ReadBasicType: encountered end of stream.";
+    char len_c = static_cast<char>(len_c_in), len_c_expected
+      = (std::numeric_limits<T>::is_signed ? 1 :  -1)
+      * static_cast<char>(sizeof(*t));
+    
+    if (len_c !=  len_c_expected) {
+      KALDI_ERR << "ReadBasicType: did not get expected integer type, "
+                << static_cast<int>(len_c)
+                << " vs. " << static_cast<int>(len_c_expected)
+                << ".  You can change this code to successfully"
+                << " read it later, if needed.";
+      // insert code here to read "wrong" type.  Might have a switch statement.
+    }
+    is.read(reinterpret_cast<char *>(t), sizeof(*t));
+  } else {
+    if (sizeof(*t) == 1) {
+      int16 i;
+      is >> i;
+      *t = i;
+    } else {
+      is >> *t;
+    }
+  }
+  if (is.fail()) {
+    KALDI_ERR << "Read failure in ReadBasicType, file position is "
+              << is.tellg() << ", next char is " << is.peek();
+  }
+}
+
+
+template<class T> inline void WriteIntegerVector(std::ostream &os, bool binary,
+                                                 const std::vector<T> &v) {
+  // Compile time assertion that this is not called with a wrong type.
+  KALDI_ASSERT_IS_INTEGER_TYPE(T);
+  if (binary) {
+    char sz = sizeof(T);  // this is currently just a check.
+    os.write(&sz, 1);
+    int32 vecsz = static_cast<int32>(v.size());
+    KALDI_ASSERT((size_t)vecsz == v.size());
+    os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
+    if (vecsz != 0) {
+      os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T)*vecsz);
+    }
+  } else {
+    // focus here is on prettiness of text form rather than
+    // efficiency of reading-in.
+    // reading-in is dominated by low-level operations anyway:
+    // for efficiency use binary.
+    os << "[ ";
+    typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
+    for (; iter != end; ++iter) {
+      if (sizeof(T) == 1)
+        os << static_cast<int16>(*iter) << " ";
+      else
+        os << *iter << " ";
+    }
+    os << "]\n";
+  }
+  if (os.fail()) {
+    throw std::runtime_error("Write failure in WriteIntegerType.");
+  }
+}
+
+
+template<class T> inline void ReadIntegerVector(std::istream &is,
+                                                bool binary,
+                                                std::vector<T> *v) {
+  KALDI_ASSERT_IS_INTEGER_TYPE(T);
+  KALDI_ASSERT(v != NULL);
+  if (binary) {
+    int sz = is.peek();
+    if (sz == sizeof(T)) {
+      is.get();
+    } else {  // this is currently just a check.
+      KALDI_ERR << "ReadIntegerVector: expected to see type of size "
+                << sizeof(T) << ", saw instead " << sz << ", at file position "
+                << is.tellg();
+    }
+    int32 vecsz;
+    is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
+    if (is.fail() || vecsz < 0) goto bad;
+    v->resize(vecsz);
+    if (vecsz > 0) {
+      is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T)*vecsz);
+    }
+  } else {
+    std::vector<T> tmp_v;  // use temporary so v doesn't use extra memory
+                           // due to resizing.
+    is >> std::ws;
+    if (is.peek() != static_cast<int>('[')) {
+      KALDI_ERR << "ReadIntegerVector: expected to see [, saw "
+                << is.peek() << ", at file position " << is.tellg();
+    }
+    is.get();  // consume the '['.
+    is >> std::ws;  // consume whitespace.
+    while (is.peek() != static_cast<int>(']')) {
+      if (sizeof(T) == 1) {  // read/write chars as numbers.
+        int16 next_t;
+        is >> next_t >> std::ws;
+        if (is.fail()) goto bad;
+        else
+            tmp_v.push_back((T)next_t);
+      } else {
+        T next_t;
+        is >> next_t >> std::ws;
+        if (is.fail()) goto bad;
+        else
+            tmp_v.push_back(next_t);
+      }
+    }
+    is.get();  // get the final ']'.
+    *v = tmp_v;  // could use std::swap to use less temporary memory, but this
+    // uses less permanent memory.
+  }
+  if (!is.fail()) return;
+ bad:
+  KALDI_ERR << "ReadIntegerVector: read failure at file position "
+            << is.tellg();
+}
+
+// Initialize an opened stream for writing by writing an optional binary
+// header and modifying the floating-point precision.
+inline void InitKaldiOutputStream(std::ostream &os, bool binary) {
+  // This does not throw exceptions (does not check for errors).
+  if (binary) {
+    os.put('\0');
+    os.put('B');
+  }
+  // Note, in non-binary mode we may at some point want to mess with
+  // the precision a bit.
+  // 7 is a bit more than the precision of float..
+  if (os.precision() < 7)
+    os.precision(7);
+}
+
+/// Initialize an opened stream for reading by detecting the binary header and
+// setting the "binary" value appropriately.
+inline bool InitKaldiInputStream(std::istream &is, bool *binary) {
+  // Sets the 'binary' variable.
+  // Throws exception in the very unusual situation that stream
+  // starts with '\0' but not then 'B'.
+
+  if (is.peek() == '\0') {  // seems to be binary
+    is.get();
+    if (is.peek() != 'B') {
+      return false;
+    }
+    is.get();
+    *binary = true;
+    return true;
+  } else {
+    *binary = false;
+    return true;
+  }
+}
+
+}  // end namespace kaldi.
+
+#endif  // KALDI_BASE_IO_FUNCS_INL_H_
diff --git a/kaldi_io/src/kaldi/base/io-funcs.h b/kaldi_io/src/kaldi/base/io-funcs.h
new file mode 100644
index 0000000..2bc9da8
--- /dev/null
+++ b/kaldi_io/src/kaldi/base/io-funcs.h
@@ -0,0 +1,231 @@
+// base/io-funcs.h
+
+// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
+//                      Jan Silovsky;   Yanmin Qian
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_BASE_IO_FUNCS_H_
+#define KALDI_BASE_IO_FUNCS_H_
+
+// This header only contains some relatively low-level I/O functions.
+// The full Kaldi I/O declarations are in ../util/kaldi-io.h
+// and ../util/kaldi-table.h
+// They were put in util/ in order to avoid making the Matrix library
+// dependent on them.
+
+#include <cctype>
+#include <vector>
+#include <string>
+#include "base/kaldi-common.h"
+
+namespace kaldi {
+
+
+
+/*
+  This comment describes the Kaldi approach to I/O.  All objects can be written
+  and read in two modes: binary and text.  In addition we want to make the I/O
+  work if we redefine the typedef "BaseFloat" between floats and doubles.
+  We also want to have control over whitespace in text mode without affecting
+  the meaning of the file, for pretty-printing purposes.
+
+  Errors are handled by throwing an exception (std::runtime_error).
+
+  For integer and floating-point types (and boolean values):
+
+   WriteBasicType(std::ostream &, bool binary, const T&);
+   ReadBasicType(std::istream &, bool binary, T*);
+
+  and we expect these functions to be defined in such a way that they work when
+  the type T changes between float and double, so you can read float into double
+  and vice versa].  Note that for efficiency and space-saving reasons, the Vector
+  and Matrix classes do not use these functions [but they preserve the type
+  interchangeability in their own way]
+
+  For a class (or struct) C:
+  class C {
+  ..
+    Write(std::ostream &, bool binary, [possibly extra optional args for specific classes]) const;
+    Read(std::istream &, bool binary, [possibly extra optional args for specific classes]);
+  ..
+  }
+  NOTE: The only actual optional args we used are the "add" arguments in
+  Vector/Matrix classes, which specify whether we should sum the data already
+  in the class with the data being read.
+
+  For types which are typedef's involving stl classes, I/O is as follows:
+  typedef std::vector<std::pair<A, B> > MyTypedefName;
+
+  The user should define something like:
+
+   WriteMyTypedefName(std::ostream &, bool binary, const MyTypedefName &t);
+   ReadMyTypedefName(std::ostream &, bool binary, MyTypedefName *t);
+
+  The user would have to write these functions.
+
+  For a type std::vector<T>:
+
+   void WriteIntegerVector(std::ostream &os, bool binary, const std::vector<T> &v);
+   void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
+
+  For other types, e.g. vectors of pairs, the user should create a routine of the
+  type WriteMyTypedefName.  This is to avoid introducing confusing templated functions;
+  we could easily create templated functions to handle most of these cases but they
+  would have to share the same name.
+
+  It also often happens that the user needs to write/read special tokens as part
+  of a file.  These might be class headers, or separators/identifiers in the class.
+  We provide special functions for manipulating these.  These special tokens must
+  be nonempty and must not contain any whitespace.
+
+    void WriteToken(std::ostream &os, bool binary, const char*);
+    void WriteToken(std::ostream &os, bool binary, const std::string & token);
+    int Peek(std::istream &is, bool binary);
+    void ReadToken(std::istream &is, bool binary, std::string *str);
+    void PeekToken(std::istream &is, bool binary, std::string *str);
+
+
+  WriteToken writes the token and one space (whether in binary or text mode).
+
+  Peek returns the first character of the next token, by consuming whitespace
+  (in text mode) and then returning the peek() character.  It returns -1 at EOF;
+  it doesn't throw.  It's useful if a class can have various forms based on
+  typedefs and virtual classes, and wants to know which version to read.
+
+  ReadToken allow the caller to obtain the next token.  PeekToken works just
+  like ReadToken, but seeks back to the beginning of the token.  A subsequent
+  call to ReadToken will read the same token again.  This is useful when
+  different object types are written to the same file; using PeekToken one can
+  decide which of the objects to read.
+
+  There is currently no special functionality for writing/reading strings (where the strings
+  contain data rather than "special tokens" that are whitespace-free and nonempty).  This is
+  because Kaldi is structured in such a way that strings don't appear, except as OpenFst symbol
+  table entries (and these have their own format).
+
+
+  NOTE: you should not call ReadIntegerType and WriteIntegerType with types,
+  such as int and size_t, that are machine-independent -- at least not
+  if you want your file formats to port between machines.  Use int32 and
+  int64 where necessary.  There is no way to detect this using compile-time
+  assertions because C++ only keeps track of the internal representation of
+  the type.
+*/
+
+/// \addtogroup io_funcs_basic
+/// @{
+
+
+/// WriteBasicType is the name of the write function for bool, integer types,
+/// and floating-point types. They all throw on error.
+template<class T> void WriteBasicType(std::ostream &os, bool binary, T t);
+
+/// ReadBasicType is the name of the read function for bool, integer types,
+/// and floating-point types. They all throw on error.
+template<class T> void ReadBasicType(std::istream &is, bool binary, T *t);
+
+
+// Declare specialization for bool.
+template<>
+void WriteBasicType<bool>(std::ostream &os, bool binary, bool b);
+
+template <>
+void ReadBasicType<bool>(std::istream &is, bool binary, bool *b);
+
+// Declare specializations for float and double.
+template<>
+void WriteBasicType<float>(std::ostream &os, bool binary, float f);
+
+template<>
+void WriteBasicType<double>(std::ostream &os, bool binary, double f);
+
+template<>
+void ReadBasicType<float>(std::istream &is, bool binary, float *f);
+
+template<>
+void ReadBasicType<double>(std::istream &is, bool binary, double *f);
+
+// Define ReadBasicType that accepts an "add" parameter to add to
+// the destination.  Caution: if used in Read functions, be careful
+// to initialize the parameters concerned to zero in the default
+// constructor.
+template<class T>
+inline void ReadBasicType(std::istream &is, bool binary, T *t, bool add) {
+  if (!add) {
+    ReadBasicType(is, binary, t);
+  } else {
+    T tmp = T(0);
+    ReadBasicType(is, binary, &tmp);
+    *t += tmp;
+  }
+}
+
+/// Function for writing STL vectors of integer types.
+template<class T> inline void WriteIntegerVector(std::ostream &os, bool binary,
+                                                 const std::vector<T> &v);
+
+/// Function for reading STL vector of integer types.
+template<class T> inline void ReadIntegerVector(std::istream &is, bool binary,
+                                                std::vector<T> *v);
+
+/// The WriteToken functions are for writing nonempty sequences of non-space
+/// characters. They are not for general strings.
+void WriteToken(std::ostream &os, bool binary, const char *token);
+void WriteToken(std::ostream &os, bool binary, const std::string & token);
+
+/// Peek consumes whitespace (if binary == false) and then returns the peek()
+/// value of the stream.
+int Peek(std::istream &is, bool binary);
+
+/// ReadToken gets the next token and puts it in str (exception on failure).
+void ReadToken(std::istream &is, bool binary, std::string *token);
+
+/// PeekToken will return the first character of the next token, or -1 if end of
+/// file.  It's the same as Peek(), except if the first character is '<' it will
+/// skip over it and will return the next character.  It will unget the '<' so
+/// the stream is where it was before you did PeekToken().
+int PeekToken(std::istream &is, bool binary);
+
+/// ExpectToken tries to read in the given token, and throws an exception
+/// on failure.
+void ExpectToken(std::istream &is, bool binary, const char *token);
+void ExpectToken(std::istream &is, bool binary, const std::string & token);
+
+/// ExpectPretty attempts to read the text in "token", but only in non-binary
+/// mode.  Throws exception on failure.  It expects an exact match except that
+/// arbitrary whitespace matches arbitrary whitespace.
+void ExpectPretty(std::istream &is, bool binary, const char *token);
+void ExpectPretty(std::istream &is, bool binary, const std::string & token);
+
+/// @} end "addtogroup io_funcs_basic"
+
+
+/// InitKaldiOutputStream initializes an opened stream for writing by writing an
+/// optional binary header and modifying the floating-point precision; it will
+/// typically not be called by users directly.
+inline void InitKaldiOutputStream(std::ostream &os, bool binary);
+
+/// InitKaldiInputStream initializes an opened stream for reading by detecting
+/// the binary header and setting the "binary" value appropriately;
+/// It will typically not be called by users directly.
+inline bool InitKaldiInputStream(std::istream &is, bool *binary);
+
+}  // end namespace kaldi.
+
+#include "base/io-funcs-inl.h"
+
+#endif  // KALDI_BASE_IO_FUNCS_H_
diff --git a/kaldi_io/src/kaldi/base/kaldi-common.h b/kaldi_io/src/kaldi/base/kaldi-common.h
new file mode 100644
index 0000000..33f6f31
--- /dev/null
+++ b/kaldi_io/src/kaldi/base/kaldi-common.h
@@ -0,0 +1,41 @@
+// base/kaldi-common.h
+
+// Copyright 2009-2011 Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_BASE_KALDI_COMMON_H_
+#define KALDI_BASE_KALDI_COMMON_H_ 1
+
+#include <cstddef>
+#include <cstdlib>
+#include <cstring>  // C string stuff like strcpy
+#include <string>
+#include <sstream>
+#include <stdexcept>
+#include <cassert>
+#include <vector>
+#include <iostream>  
+#include <fstream>  
+
+#include "base/kaldi-utils.h"
+#include "base/kaldi-error.h"
+#include "base/kaldi-types.h"
+#include "base/io-funcs.h"
+#include "base/kaldi-math.h"
+
+#endif  // KALDI_BASE_KALDI_COMMON_H_
+
diff --git a/kaldi_io/src/kaldi/base/kaldi-error.h b/kaldi_io/src/kaldi/base/kaldi-error.h
new file mode 100644
index 0000000..8334e42
--- /dev/null
+++ b/kaldi_io/src/kaldi/base/kaldi-error.h
@@ -0,0 +1,153 @@
+// base/kaldi-error.h
+
+// Copyright 2009-2011  Microsoft Corporation;  Ondrej Glembek;  Lukas Burget;
+//                      Saarland University
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_BASE_KALDI_ERROR_H_
+#define KALDI_BASE_KALDI_ERROR_H_ 1
+
+#include <stdexcept>
+#include <string>
+#include <cstring>
+#include <sstream>
+#include <cstdio>
+
+#ifdef _MSC_VER
+#define NOEXCEPT(Predicate)
+#elif __cplusplus > 199711L || defined(__GXX_EXPERIMENTAL_CXX0X__)
+#define NOEXCEPT(Predicate) noexcept((Predicate))
+#else
+#define NOEXCEPT(Predicate)
+#endif
+
+#include "base/kaldi-types.h"
+#include "base/kaldi-utils.h"
+
+/* Important that this file does not depend on any other kaldi headers. */
+
+
+namespace kaldi {
+
+/// \addtogroup error_group
+/// @{
+
+/// This is set by util/parse-options.{h, cc} if you set --verbose = ? option
+extern int32 g_kaldi_verbose_level;
+
+/// This is set by util/parse-options.{h, cc} (from argv[0]) and used (if set)
+/// in error reporting code to display the name of the program (this is because
+/// in our scripts, we often mix together the stderr of many programs).  it is
+/// the base-name of the program (no directory), followed by ':' We don't use
+/// std::string, due to the static initialization order fiasco.
+extern const char *g_program_name;
+
+inline int32 GetVerboseLevel() { return g_kaldi_verbose_level; }
+
+/// This should be rarely used; command-line programs set the verbose level
+/// automatically from ParseOptions.
+inline void SetVerboseLevel(int32 i) { g_kaldi_verbose_level = i; }
+
+// Class KaldiLogMessage is invoked from the  KALDI_WARN, KALDI_VLOG and
+// KALDI_LOG macros. It prints the message to stderr.  Note: we avoid
+// using cerr, due to problems with thread safety.  fprintf is guaranteed
+// thread-safe.
+
+// class KaldiWarnMessage is invoked from the KALDI_WARN macro.
+class KaldiWarnMessage {
+ public:
+  inline std::ostream &stream() { return ss; }
+  KaldiWarnMessage(const char *func, const char *file, int32 line);
+  ~KaldiWarnMessage()  { fprintf(stderr, "%s\n", ss.str().c_str()); }
+ private:
+  std::ostringstream ss;
+};
+
+// class KaldiLogMessage is invoked from the KALDI_LOG macro.
+class KaldiLogMessage {
+ public:
+  inline std::ostream &stream() { return ss; }
+  KaldiLogMessage(const char *func, const char *file, int32 line);
+  ~KaldiLogMessage() { fprintf(stderr, "%s\n", ss.str().c_str()); }
+ private:
+  std::ostringstream ss;
+};
+
+// Class KaldiVlogMessage is invoked from the KALDI_VLOG macro.
+class KaldiVlogMessage {
+ public:
+  KaldiVlogMessage(const char *func, const char *file, int32 line,
+                   int32 verbose_level);
+  inline std::ostream &stream() { return ss; }
+  ~KaldiVlogMessage() { fprintf(stderr, "%s\n", ss.str().c_str()); }
+ private:
+  std::ostringstream ss;
+};
+
+
+// class KaldiErrorMessage is invoked from the KALDI_ERROR macro.
+// The destructor throws an exception.
+class KaldiErrorMessage {
+ public:
+  KaldiErrorMessage(const char *func, const char *file, int32 line);
+  inline std::ostream &stream() { return ss; }
+  ~KaldiErrorMessage() NOEXCEPT(false);  // defined in kaldi-error.cc
+ private:
+  std::ostringstream ss;
+};
+
+
+
+#ifdef _MSC_VER
+#define __func__ __FUNCTION__
+#endif
+
+#ifndef NDEBUG
+#define KALDI_ASSERT(cond) \
+  if (!(cond)) kaldi::KaldiAssertFailure_(__func__, __FILE__, __LINE__, #cond);
+#else
+#define KALDI_ASSERT(cond)
+#endif
+// also see KALDI_COMPILE_TIME_ASSERT, defined in base/kaldi-utils.h,
+// and KALDI_ASSERT_IS_INTEGER_TYPE and KALDI_ASSERT_IS_FLOATING_TYPE,
+// also defined there.
+#ifdef KALDI_PARANOID // some more expensive asserts only checked if this defined
+#define KALDI_PARANOID_ASSERT(cond) \
+  if (!(cond)) kaldi::KaldiAssertFailure_(__func__, __FILE__, __LINE__, #cond);
+#else
+#define KALDI_PARANOID_ASSERT(cond)
+#endif
+
+#define KALDI_ERR kaldi::KaldiErrorMessage(__func__, __FILE__, __LINE__).stream() 
+#define KALDI_WARN kaldi::KaldiWarnMessage(__func__, __FILE__, __LINE__).stream() 
+#define KALDI_LOG kaldi::KaldiLogMessage(__func__, __FILE__, __LINE__).stream()
+
+#define KALDI_VLOG(v) if (v <= kaldi::g_kaldi_verbose_level)     \
+           kaldi::KaldiVlogMessage(__func__, __FILE__, __LINE__, v).stream()
+
+inline bool IsKaldiError(const std::string &str) {
+  return(!strncmp(str.c_str(), "ERROR ", 6));
+}
+
+void KaldiAssertFailure_(const char *func, const char *file,
+                         int32 line, const char *cond_str);
+
+/// @} end "addtogroup error_group"
+
+}  // namespace kaldi
+
+#endif  // KALDI_BASE_KALDI_ERROR_H_
diff --git a/kaldi_io/src/kaldi/base/kaldi-math.h b/kaldi_io/src/kaldi/base/kaldi-math.h
new file mode 100644
index 0000000..4f60d00
--- /dev/null
+++ b/kaldi_io/src/kaldi/base/kaldi-math.h
@@ -0,0 +1,346 @@
+// base/kaldi-math.h
+
+// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Yanmin Qian;
+//                      Jan Silovsky;  Saarland University
+//
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_BASE_KALDI_MATH_H_
+#define KALDI_BASE_KALDI_MATH_H_ 1
+
+#ifdef _MSC_VER
+#include <float.h>
+#endif
+
+#include <cmath>
+#include <limits>
+#include <vector>
+
+#include "base/kaldi-types.h"
+#include "base/kaldi-common.h"
+
+
+#ifndef DBL_EPSILON
+#define DBL_EPSILON 2.2204460492503131e-16
+#endif
+#ifndef FLT_EPSILON
+#define FLT_EPSILON 1.19209290e-7f
+#endif
+
+#ifndef M_PI
+#  define M_PI 3.1415926535897932384626433832795
+#endif
+
+#ifndef M_SQRT2
+#  define M_SQRT2 1.4142135623730950488016887
+#endif
+
+
+#ifndef M_2PI
+#  define M_2PI 6.283185307179586476925286766559005
+#endif
+
+#ifndef M_SQRT1_2
+# define M_SQRT1_2 0.7071067811865475244008443621048490
+#endif
+
+#ifndef M_LOG_2PI
+#define M_LOG_2PI 1.8378770664093454835606594728112
+#endif
+
+#ifndef M_LN2
+#define M_LN2 0.693147180559945309417232121458
+#endif
+
+#ifdef _MSC_VER
+#  define KALDI_ISNAN _isnan
+#  define KALDI_ISINF(x) (!_isnan(x) && _isnan(x-x))
+#  define KALDI_ISFINITE _finite
+#else
+#  define KALDI_ISNAN std::isnan
+#  define KALDI_ISINF std::isinf
+#  define KALDI_ISFINITE(x) std::isfinite(x)
+#endif
+#if !defined(KALDI_SQR)
+# define KALDI_SQR(x) ((x) * (x))
+#endif
+
+namespace kaldi {
+
+// -infinity
+const float kLogZeroFloat = -std::numeric_limits<float>::infinity();
+const double kLogZeroDouble = -std::numeric_limits<double>::infinity();
+const BaseFloat kLogZeroBaseFloat = -std::numeric_limits<BaseFloat>::infinity();
+
+// Returns a random integer between 0 and RAND_MAX, inclusive
+int Rand(struct RandomState* state=NULL);
+
+// State for thread-safe random number generator
+struct RandomState {
+  RandomState();
+  unsigned seed;
+};
+
+// Returns a random integer between min and max inclusive.
+int32 RandInt(int32 min, int32 max, struct RandomState* state=NULL);
+
+bool WithProb(BaseFloat prob, struct RandomState* state=NULL); // Returns true with probability "prob",
+// with 0 <= prob <= 1 [we check this].
+// Internally calls Rand().  This function is carefully implemented so
+// that it should work even if prob is very small.
+
+/// Returns a random number strictly between 0 and 1.
+inline float RandUniform(struct RandomState* state = NULL) {
+  return static_cast<float>((Rand(state) + 1.0) / (RAND_MAX+2.0));
+}
+
+inline float RandGauss(struct RandomState* state = NULL) {
+  return static_cast<float>(sqrtf (-2 * logf(RandUniform(state)))
+                            * cosf(2*M_PI*RandUniform(state)));
+}
+
+// Returns poisson-distributed random number.  Uses Knuth's algorithm.
+// Take care: this takes time proportinal
+// to lambda.  Faster algorithms exist but are more complex.
+int32 RandPoisson(float lambda, struct RandomState* state=NULL);
+
+// Returns a pair of gaussian random numbers. Uses Box-Muller transform
+void RandGauss2(float *a, float *b, RandomState *state = NULL);
+void RandGauss2(double *a, double *b, RandomState *state = NULL);
+
+// Also see Vector<float,double>::RandCategorical().
+
+// This is a randomized pruning mechanism that preserves expectations,
+// that we typically use to prune posteriors.
+template<class Float>
+inline Float RandPrune(Float post, BaseFloat prune_thresh, struct RandomState* state=NULL) {
+  KALDI_ASSERT(prune_thresh >= 0.0);
+  if (post == 0.0 || std::abs(post) >= prune_thresh)
+    return post;
+  return (post >= 0 ? 1.0 : -1.0) *
+      (RandUniform(state) <= fabs(post)/prune_thresh ? prune_thresh : 0.0);
+}
+
+static const double kMinLogDiffDouble = std::log(DBL_EPSILON);  // negative!
+static const float kMinLogDiffFloat = std::log(FLT_EPSILON);  // negative!
+
+inline double LogAdd(double x, double y) {
+  double diff;
+  if (x < y) {
+    diff = x - y;
+    x = y;
+  } else {
+    diff = y - x;
+  }
+  // diff is negative.  x is now the larger one.
+
+  if (diff >= kMinLogDiffDouble) {
+    double res;
+#ifdef _MSC_VER
+    res = x + log(1.0 + exp(diff));
+#else
+    res = x + log1p(exp(diff));
+#endif
+    return res;
+  } else {
+    return x;  // return the larger one.
+  }
+}
+
+
+inline float LogAdd(float x, float y) {
+  float diff;
+  if (x < y) {
+    diff = x - y;
+    x = y;
+  } else {
+    diff = y - x;
+  }
+  // diff is negative.  x is now the larger one.
+
+  if (diff >= kMinLogDiffFloat) {
+    float res;
+#ifdef _MSC_VER
+    res = x + logf(1.0 + expf(diff));
+#else
+    res = x + log1pf(expf(diff));
+#endif
+    return res;
+  } else {
+    return x;  // return the larger one.
+  }
+}
+
+
+// returns exp(x) - exp(y).
+inline double LogSub(double x, double y) {
+  if (y >= x) {  // Throws exception if y>=x.
+    if (y == x)
+      return kLogZeroDouble;
+    else
+      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
+  }
+
+  double diff = y - x;  // Will be negative.
+  double res = x + log(1.0 - exp(diff));
+
+  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
+  if (KALDI_ISNAN(res))
+    return kLogZeroDouble;
+  return res;
+}
+
+
+// returns exp(x) - exp(y).
+inline float LogSub(float x, float y) {
+  if (y >= x) {  // Throws exception if y>=x.
+    if (y == x)
+      return kLogZeroDouble;
+    else
+      KALDI_ERR << "Cannot subtract a larger from a smaller number.";
+  }
+
+  float diff = y - x;  // Will be negative.
+  float res = x + logf(1.0 - expf(diff));
+
+  // res might be NAN if diff ~0.0, and 1.0-exp(diff) == 0 to machine precision
+  if (KALDI_ISNAN(res))
+    return kLogZeroFloat;
+  return res;
+}
+
+/// return abs(a - b) <= relative_tolerance * (abs(a)+abs(b)).
+static inline bool ApproxEqual(float a, float b,
+                               float relative_tolerance = 0.001) {
+  // a==b handles infinities.
+  if (a==b) return true;
+  float diff = std::abs(a-b);
+  if (diff == std::numeric_limits<float>::infinity()
+      || diff != diff) return false; // diff is +inf or nan.
+  return (diff <= relative_tolerance*(std::abs(a)+std::abs(b))); 
+}
+
+/// assert abs(a - b) <= relative_tolerance * (abs(a)+abs(b))
+static inline void AssertEqual(float a, float b,
+                               float relative_tolerance = 0.001) {
+  // a==b handles infinities.
+  KALDI_ASSERT(ApproxEqual(a, b, relative_tolerance));
+}
+
+
+// RoundUpToNearestPowerOfTwo does the obvious thing. It crashes if n <= 0.
+int32 RoundUpToNearestPowerOfTwo(int32 n);
+
+template<class I> I  Gcd(I m, I n) {
+  if (m == 0 || n == 0) {
+    if (m == 0 && n == 0) {  // gcd not defined, as all integers are divisors.
+      KALDI_ERR << "Undefined GCD since m = 0, n = 0.";
+    }
+    return (m == 0 ? (n > 0 ? n : -n) : ( m > 0 ? m : -m));
+    // return absolute value of whichever is nonzero
+  }
+  // could use compile-time assertion
+  // but involves messing with complex template stuff.
+  KALDI_ASSERT(std::numeric_limits<I>::is_integer);
+  while (1) {
+    m %= n;
+    if (m == 0) return (n > 0 ? n : -n);
+    n %= m;
+    if (n == 0) return (m > 0 ? m : -m);
+  }
+}
+
+/// Returns the least common multiple of two integers.  Will
+/// crash unless the inputs are positive.
+template<class I> I  Lcm(I m, I n) {
+  KALDI_ASSERT(m > 0 && n > 0);
+  I gcd = Gcd(m, n);
+  return gcd * (m/gcd) * (n/gcd);
+}
+
+
+template<class I> void Factorize(I m, std::vector<I> *factors) {
+  // Splits a number into its prime factors, in sorted order from
+  // least to greatest,  with duplication.  A very inefficient
+  // algorithm, which is mainly intended for use in the
+  // mixed-radix FFT computation (where we assume most factors
+  // are small).
+  KALDI_ASSERT(factors != NULL);
+  KALDI_ASSERT(m >= 1);  // Doesn't work for zero or negative numbers.
+  factors->clear();
+  I small_factors[10] = { 2, 3, 5, 7, 11, 13, 17, 19, 23, 29 };
+
+  // First try small factors.
+  for (I i = 0; i < 10; i++) {
+    if (m == 1) return;  // We're done.
+    while (m % small_factors[i] == 0) {
+      m /= small_factors[i];
+      factors->push_back(small_factors[i]);
+    }
+  }
+  // Next try all odd numbers starting from 31.
+  for (I j = 31;; j += 2) {
+    if (m == 1) return;
+    while (m % j == 0) {
+      m /= j;
+      factors->push_back(j);
+    }
+  }
+}
+
+inline double Hypot(double x, double y) {  return hypot(x, y); }
+
+inline float Hypot(float x, float y) {  return hypotf(x, y); }
+
+#if !defined(_MSC_VER) || (_MSC_VER >= 1800)
+inline double Log1p(double x) {  return log1p(x); }
+
+inline float Log1p(float x) {  return log1pf(x); }
+#else
+inline double Log1p(double x) {
+    const double cutoff = 1.0e-08;
+    if (x < cutoff)
+        return x - 2 * x * x;
+    else 
+        return log(1.0 + x);
+}
+
+inline float Log1p(float x) {
+    const float cutoff = 1.0e-07;
+    if (x < cutoff)
+        return x - 2 * x * x;
+    else 
+        return log(1.0 + x);
+}
+#endif
+
+inline double Exp(double x) { return exp(x); }
+
+#ifndef KALDI_NO_EXPF
+inline float Exp(float x) { return expf(x); }
+#else
+inline float Exp(float x) { return exp(x); }
+#endif
+
+inline double Log(double x) { return log(x); }
+
+inline float Log(float x) { return logf(x); }
+
+
+}  // namespace kaldi
+
+
+#endif  // KALDI_BASE_KALDI_MATH_H_
diff --git a/kaldi_io/src/kaldi/base/kaldi-types.h b/kaldi_io/src/kaldi/base/kaldi-types.h
new file mode 100644
index 0000000..04354b2
--- /dev/null
+++ b/kaldi_io/src/kaldi/base/kaldi-types.h
@@ -0,0 +1,64 @@
+// base/kaldi-types.h
+
+// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
+//                      Jan Silovsky;  Yanmin Qian
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_BASE_KALDI_TYPES_H_
+#define KALDI_BASE_KALDI_TYPES_H_ 1
+
+namespace kaldi {
+// TYPEDEFS ..................................................................
+#if (KALDI_DOUBLEPRECISION != 0)
+typedef double  BaseFloat;
+#else
+typedef float   BaseFloat;
+#endif
+}
+
+#ifdef _MSC_VER
+namespace kaldi {
+typedef unsigned __int16 uint16;
+typedef unsigned __int32 uint32;
+typedef __int16          int16;
+typedef __int32          int32;
+typedef __int64          int64;
+typedef unsigned __int64 uint64;
+typedef float          float32;
+typedef double        double64;
+}
+#include <basetsd.h>
+#define ssize_t SSIZE_T
+
+#else
+// we can do this a different way if some platform
+// we find in the future lacks stdint.h
+#include <stdint.h>
+
+namespace kaldi {
+typedef uint16_t        uint16;
+typedef uint32_t        uint32;
+typedef uint64_t        uint64;
+typedef int16_t         int16;
+typedef int32_t         int32;
+typedef int64_t         int64;
+typedef float           float32;
+typedef double         double64;
+}  // end namespace kaldi
+#endif
+
+#endif  // KALDI_BASE_KALDI_TYPES_H_
diff --git a/kaldi_io/src/kaldi/base/kaldi-utils.h b/kaldi_io/src/kaldi/base/kaldi-utils.h
new file mode 100644
index 0000000..1b2c893
--- /dev/null
+++ b/kaldi_io/src/kaldi/base/kaldi-utils.h
@@ -0,0 +1,157 @@
+// base/kaldi-utils.h
+
+// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;
+//                      Saarland University;  Karel Vesely;  Yanmin Qian
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_BASE_KALDI_UTILS_H_
+#define KALDI_BASE_KALDI_UTILS_H_ 1
+
+#include <limits>
+#include <string>
+
+#if defined(_MSC_VER)
+# define WIN32_LEAN_AND_MEAN
+# define NOMINMAX
+# include <windows.h>
+#endif
+
+#if defined(_MSC_VER)
+#pragma warning(disable: 4244 4056 4305 4800 4267 4996 4756 4661)
+#define __restrict__
+#endif
+
+#ifdef HAVE_POSIX_MEMALIGN
+#  define KALDI_MEMALIGN(align, size, pp_orig) \
+     (!posix_memalign(pp_orig, align, size) ? *(pp_orig) : NULL)
+#  define KALDI_MEMALIGN_FREE(x) free(x)
+#elif defined(HAVE_MEMALIGN)
+  /* Some systems have memalign() but no declaration for it */
+  void * memalign(size_t align, size_t size);
+#  define KALDI_MEMALIGN(align, size, pp_orig) \
+     (*(pp_orig) = memalign(align, size))
+#  define KALDI_MEMALIGN_FREE(x) free(x)
+#elif defined(_MSC_VER)
+#  define KALDI_MEMALIGN(align, size, pp_orig) \
+  (*(pp_orig) = _aligned_malloc(size, align))
+#  define KALDI_MEMALIGN_FREE(x) _aligned_free(x)
+#else
+#error Manual memory alignment is no longer supported
+#endif
+
+#ifdef __ICC
+#pragma warning(disable: 383)  // ICPC remark we don't want.
+#pragma warning(disable: 810)  // ICPC remark we don't want.
+#pragma warning(disable: 981)  // ICPC remark we don't want.
+#pragma warning(disable: 1418)  // ICPC remark we don't want.
+#pragma warning(disable: 444)  // ICPC remark we don't want.
+#pragma warning(disable: 869)  // ICPC remark we don't want.
+#pragma warning(disable: 1287)  // ICPC remark we don't want.
+#pragma warning(disable: 279)  // ICPC remark we don't want.
+#pragma warning(disable: 981)  // ICPC remark we don't want.
+#endif
+
+
+namespace kaldi {
+
+
+// CharToString prints the character in a human-readable form, for debugging.
+std::string CharToString(const char &c);
+
+
+inline int MachineIsLittleEndian() {
+  int check = 1;
+  return (*reinterpret_cast<char*>(&check) != 0);
+}
+
+// This function kaldi::Sleep() provides a portable way to sleep for a possibly fractional
+// number of seconds.  On Windows it's only accurate to microseconds.
+void Sleep(float seconds);
+
+}
+
+#define KALDI_SWAP8(a) { \
+  int t = ((char*)&a)[0]; ((char*)&a)[0]=((char*)&a)[7]; ((char*)&a)[7]=t;\
+      t = ((char*)&a)[1]; ((char*)&a)[1]=((char*)&a)[6]; ((char*)&a)[6]=t;\
+      t = ((char*)&a)[2]; ((char*)&a)[2]=((char*)&a)[5]; ((char*)&a)[5]=t;\
+      t = ((char*)&a)[3]; ((char*)&a)[3]=((char*)&a)[4]; ((char*)&a)[4]=t;}
+#define KALDI_SWAP4(a) { \
+  int t = ((char*)&a)[0]; ((char*)&a)[0]=((char*)&a)[3]; ((char*)&a)[3]=t;\
+      t = ((char*)&a)[1]; ((char*)&a)[1]=((char*)&a)[2]; ((char*)&a)[2]=t;}
+#define KALDI_SWAP2(a) { \
+  int t = ((char*)&a)[0]; ((char*)&a)[0]=((char*)&a)[1]; ((char*)&a)[1]=t;}
+
+
+// Makes copy constructor and operator= private.  Same as in compat.h of OpenFst
+// toolkit.  If using VS, for which this results in compilation errors, we
+// do it differently.
+
+#if defined(_MSC_VER)
+#define KALDI_DISALLOW_COPY_AND_ASSIGN(type) \
+  void operator = (const type&)
+#else
+#define KALDI_DISALLOW_COPY_AND_ASSIGN(type)    \
+  type(const type&);                  \
+  void operator = (const type&)
+#endif
+
+template<bool B> class KaldiCompileTimeAssert { };
+template<> class KaldiCompileTimeAssert<true> {
+ public:
+  static inline void Check() { }  
+};
+
+#define KALDI_COMPILE_TIME_ASSERT(b) KaldiCompileTimeAssert<(b)>::Check()
+
+#define KALDI_ASSERT_IS_INTEGER_TYPE(I) \
+  KaldiCompileTimeAssert<std::numeric_limits<I>::is_specialized \
+                 && std::numeric_limits<I>::is_integer>::Check()
+
+#define KALDI_ASSERT_IS_FLOATING_TYPE(F) \
+  KaldiCompileTimeAssert<std::numeric_limits<F>::is_specialized \
+                && !std::numeric_limits<F>::is_integer>::Check()
+
+#ifdef _MSC_VER
+#include <stdio.h>
+#define unlink _unlink
+#else
+#include <unistd.h>
+#endif
+
+
+#ifdef _MSC_VER
+#define KALDI_STRCASECMP _stricmp
+#else
+#define KALDI_STRCASECMP strcasecmp
+#endif
+#ifdef _MSC_VER
+#  define KALDI_STRTOLL(cur_cstr, end_cstr) _strtoi64(cur_cstr, end_cstr, 10);
+#else
+#  define KALDI_STRTOLL(cur_cstr, end_cstr) strtoll(cur_cstr, end_cstr, 10);
+#endif
+
+#define KALDI_STRTOD(cur_cstr, end_cstr) strtod(cur_cstr, end_cstr)
+
+#ifdef _MSC_VER
+#  define KALDI_STRTOF(cur_cstr, end_cstr) \
+    static_cast<float>(strtod(cur_cstr, end_cstr));
+#else
+#  define KALDI_STRTOF(cur_cstr, end_cstr) strtof(cur_cstr, end_cstr);
+#endif
+
+#endif  // KALDI_BASE_KALDI_UTILS_H_
+
diff --git a/kaldi_io/src/kaldi/base/timer.h b/kaldi_io/src/kaldi/base/timer.h
new file mode 100644
index 0000000..d93a461
--- /dev/null
+++ b/kaldi_io/src/kaldi/base/timer.h
@@ -0,0 +1,83 @@
+// base/timer.h
+
+// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_BASE_TIMER_H_
+#define KALDI_BASE_TIMER_H_
+
+#include "base/kaldi-utils.h"
+// Note: Sleep(float secs) is included in base/kaldi-utils.h.
+
+
+#if defined(_MSC_VER) || defined(MINGW)
+
+namespace kaldi
+{
+
+class Timer {
+ public:
+  Timer() { Reset(); }
+  void Reset() {
+    QueryPerformanceCounter(&time_start_);
+  }
+  double Elapsed() {
+    LARGE_INTEGER time_end;
+    LARGE_INTEGER freq;
+    QueryPerformanceCounter(&time_end);
+    if (QueryPerformanceFrequency(&freq) == 0) return 0.0;  // Hardware does not support this.
+    return ((double)time_end.QuadPart - (double)time_start_.QuadPart) /
+        ((double)freq.QuadPart);
+  }
+ private:
+  LARGE_INTEGER time_start_;
+};
+}
+
+#else
+
+# include <sys/time.h>
+# include <unistd.h>
+namespace kaldi
+{
+class Timer
+{
+ public:
+  Timer() { Reset(); }
+
+  void Reset() { gettimeofday(&this->time_start_, &time_zone_); }
+
+  /// Returns time in seconds.
+  double Elapsed() {
+    struct timeval time_end;
+    gettimeofday(&time_end, &time_zone_);
+    double t1, t2;
+    t1 =  (double)time_start_.tv_sec +
+        (double)time_start_.tv_usec/(1000*1000);
+    t2 =  (double)time_end.tv_sec + (double)time_end.tv_usec/(1000*1000);
+    return t2-t1;
+  }
+
+ private:
+  struct timeval time_start_;
+  struct timezone time_zone_;
+};
+}
+
+#endif
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/hmm/hmm-topology.h b/kaldi_io/src/kaldi/hmm/hmm-topology.h
new file mode 100644
index 0000000..53ca427
--- /dev/null
+++ b/kaldi_io/src/kaldi/hmm/hmm-topology.h
@@ -0,0 +1,172 @@
+// hmm/hmm-topology.h
+
+// Copyright 2009-2011  Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_HMM_HMM_TOPOLOGY_H_
+#define KALDI_HMM_HMM_TOPOLOGY_H_
+
+#include "base/kaldi-common.h"
+#include "tree/context-dep.h"
+#include "util/const-integer-set.h"
+
+
+namespace kaldi {
+
+
+/// \addtogroup hmm_group
+/// @{
+
+/*
+ // The following would be the text form for the "normal" HMM topology.
+ // Note that the first state is the start state, and the final state,
+ // which must have no output transitions and must be nonemitting, has
+ // an exit probability of one (no other state can have nonzero exit
+ // probability; you can treat the transition probability to the final
+ // state as an exit probability).
+ // Note also that it's valid to omit the "<PdfClass>" entry of the <State>, which
+ // will mean we won't have a pdf on that state [non-emitting state].  This is equivalent
+ // to setting the <PdfClass> to -1.  We do this normally just for the final state.
+ // The Topology object can have multiple <TopologyEntry> blocks.
+ // This is useful if there are multiple types of topology in the system.
+
+ <Topology>
+ <TopologyEntry>
+ <ForPhones> 1 2 3 4 5 6 7 8 </ForPhones>
+ <State> 0 <PdfClass> 0
+ <Transition> 0 0.5
+ <Transition> 1 0.5
+ </State>
+ <State> 1 <PdfClass> 1
+ <Transition> 1 0.5
+ <Transition> 2 0.5
+ </State>
+ <State> 2 <PdfClass> 2
+ <Transition> 2 0.5
+ <Transition> 3 0.5
+ <Final> 0.5
+ </State>
+ <State> 3
+ </State> 
+ </TopologyEntry>
+ </Topology>
+*/
+
+// kNoPdf is used where pdf_class or pdf would be used, to indicate,
+// none is there.  Mainly useful in skippable models, but also used
+// for end states.
+// A caveat with nonemitting states is that their out-transitions
+// are not trainable, due to technical issues with the way
+// we decided to accumulate the stats.  Any transitions arising from (*)
+// HMM states with "kNoPdf" as the label are second-class transitions,
+// They do not have "transition-states" or "transition-ids" associated
+// with them.  They are used to create the FST version of the
+// HMMs, where they lead to epsilon arcs.
+// (*) "arising from" is a bit of a technical term here, due to the way
+// (if reorder == true), we put the transition-id associated with the
+// outward arcs of the state, on the input transition to the state.
+
+/// A constant used in the HmmTopology class as the \ref pdf_class "pdf-class"
+/// kNoPdf, which is used when a HMM-state is nonemitting (has no associated
+/// PDF).
+
+static const int32 kNoPdf = -1;
+
+/// A class for storing topology information for phones.  See  \ref hmm for context.
+/// This object is sometimes accessed in a file by itself, but more often
+/// as a class member of the Transition class (this is for convenience to reduce
+/// the number of files programs have to access).
+
+class HmmTopology {
+ public:
+  /// A structure defined inside HmmTopology to represent a HMM state.
+  struct HmmState {
+    /// The \ref pdf_class pdf-class, typically 0, 1 or 2 (the same as the HMM-state index),
+    /// but may be different to enable us to hardwire sharing of state, and may be
+    /// equal to \ref kNoPdf == -1 in order to specify nonemitting states (unusual).
+    int32 pdf_class;
+
+    /// A list of transitions.  The first member of each pair is the index of
+    /// the next HmmState, and the second is the default transition probability
+    /// (before training).
+    std::vector<std::pair<int32, BaseFloat> > transitions;
+
+    explicit HmmState(int32 p): pdf_class(p) { }
+
+    bool operator == (const HmmState &other) const {
+      return (pdf_class == other.pdf_class && transitions == other.transitions);
+    }
+    
+    HmmState(): pdf_class(-1) { }
+  };
+
+  /// TopologyEntry is a typedef that represents the topology of
+  /// a single (prototype) state.
+  typedef std::vector<HmmState> TopologyEntry;
+
+  void Read(std::istream &is, bool binary);
+  void Write(std::ostream &os, bool binary) const;
+
+  // Checks that the object is valid, and throw exception otherwise.
+  void Check();
+
+
+  /// Returns the topology entry (i.e. vector of HmmState) for this phone;
+  /// will throw exception if phone not covered by the topology.
+  const TopologyEntry &TopologyForPhone(int32 phone) const;
+
+  /// Returns the number of \ref pdf_class "pdf-classes" for this phone;
+  /// throws exception if phone not covered by this topology.
+  int32 NumPdfClasses(int32 phone) const;
+
+  /// Returns a reference to a sorted, unique list of phones covered by
+  /// the topology (these phones will be positive integers, and usually
+  /// contiguous and starting from one but the toolkit doesn't assume
+  /// they are contiguous).
+  const std::vector<int32> &GetPhones() const { return phones_; };
+
+  /// Outputs a vector of int32, indexed by phone, that gives the
+  /// number of \ref pdf_class pdf-classes for the phones; this is
+  /// used by tree-building code such as BuildTree().
+  void GetPhoneToNumPdfClasses(std::vector<int32> *phone2num_pdf_classes) const;
+
+  HmmTopology() {}
+
+  bool operator == (const HmmTopology &other) const {
+    return phones_ == other.phones_ && phone2idx_ == other.phone2idx_
+        && entries_ == other.entries_;
+  }
+  // Allow default assignment operator and copy constructor.
+ private:
+  std::vector<int32> phones_;  // list of all phones we have topology for.  Sorted, uniq.  no epsilon (zero) phone.
+  std::vector<int32> phone2idx_;  // map from phones to indexes into the entries vector (or -1 for not present).
+  std::vector<TopologyEntry> entries_;
+};
+
+
+/// This function returns a HmmTopology object giving a normal 3-state topology,
+/// covering all phones in the list "phones".  This is mainly of use in testing
+/// code.
+HmmTopology GetDefaultTopology(const std::vector<int32> &phones);
+
+/// @} end "addtogroup hmm_group"
+
+
+} // end namespace kaldi
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/hmm/hmm-utils.h b/kaldi_io/src/kaldi/hmm/hmm-utils.h
new file mode 100644
index 0000000..240f706
--- /dev/null
+++ b/kaldi_io/src/kaldi/hmm/hmm-utils.h
@@ -0,0 +1,295 @@
+// hmm/hmm-utils.h
+
+// Copyright 2009-2011  Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_HMM_HMM_UTILS_H_
+#define KALDI_HMM_HMM_UTILS_H_
+
+#include "hmm/hmm-topology.h"
+#include "hmm/transition-model.h"
+#include "lat/kaldi-lattice.h"
+
+namespace kaldi {
+
+
+/// \defgroup hmm_group_graph Classes and functions for creating FSTs from HMMs
+/// \ingroup hmm_group
+/// @{
+
+/// Configuration class for the GetHTransducer() function; see
+/// \ref hmm_graph_config for context.
+struct HTransducerConfig {
+  /// Transition log-prob scale, see \ref hmm_scale.
+  /// Note this doesn't apply to self-loops; GetHTransducer() does
+  /// not include self-loops.
+  BaseFloat transition_scale;
+
+  /// if true, we are constructing time-reversed FST: phone-seqs in ilabel_info
+  /// are backwards, and we want to output a backwards version of the HMM
+  /// corresponding to each phone.  If reverse == true,
+  bool reverse;
+
+  /// This variable is only looked at if reverse == true.  If reverse == true
+  /// and push_weights == true, then we push the weights in the reversed FSTs we create for each
+  /// phone HMM.  This is only safe if the HMMs are probabilistic (i.e. not discriminatively
+  bool push_weights;
+
+  /// delta used if we do push_weights [only relevant if reverse == true
+  /// and push_weights == true].
+  BaseFloat push_delta;
+
+  HTransducerConfig():
+      transition_scale(1.0),
+      reverse(false),
+      push_weights(true),
+      push_delta(0.001)
+  { }
+
+  // Note-- this Register registers the easy-to-register options
+  // but not the "sym_type" which is an enum and should be handled
+  // separately in main().
+  void Register (OptionsItf *po) {
+    po->Register("transition-scale", &transition_scale,
+                 "Scale of transition probs (relative to LM)");
+    po->Register("reverse", &reverse,
+                 "Set true to build time-reversed FST.");
+    po->Register("push-weights", &push_weights,
+                 "Push weights (only applicable if reverse == true)");
+    po->Register("push-delta", &push_delta,
+                 "Delta used in pushing weights (only applicable if "
+                 "reverse && push-weights");
+  }
+};
+
+
+struct HmmCacheHash {
+  int operator () (const std::pair<int32, std::vector<int32> >&p) const {
+    VectorHasher<int32> v;
+    int32 prime = 103049;
+    return prime*p.first + v(p.second);
+  }
+};
+
+/// HmmCacheType is a map from (central-phone, sequence of pdf-ids) to FST, used
+/// as cache in GetHmmAsFst, as an optimization.
+typedef unordered_map<std::pair<int32, std::vector<int32> >,
+                      fst::VectorFst<fst::StdArc>*,
+                      HmmCacheHash> HmmCacheType;
+
+
+/// Called by GetHTransducer() and probably will not need to be called directly;
+/// it creates the FST corresponding to the phone.  Does not include self-loops;
+/// you have to call AddSelfLoops() for that.  Result owned by caller.
+/// Returns an acceptor (i.e. ilabels, olabels identical) with transition-ids
+/// as the symbols.
+/// For documentation in context, see \ref hmm_graph_get_hmm_as_fst
+///   @param context_window  A vector representing the phonetic context; see
+///            \ref tree_window "here" for explanation.
+///   @param ctx_dep The object that contains the phonetic decision-tree
+///   @param trans_model The transition-model object, which provides
+///         the mappings to transition-ids and also the transition
+///         probabilities.
+///   @param config Configuration object, see \ref HTransducerConfig.
+///   @param cache Object used as a lookaside buffer to save computation;
+///       if it finds that the object it needs is already there, it will
+///       just return a pointer value from "cache"-- not that this means
+///       you have to be careful not to delete things twice.
+
+fst::VectorFst<fst::StdArc> *GetHmmAsFst(
+    std::vector<int32> context_window,
+    const ContextDependencyInterface &ctx_dep,
+    const TransitionModel &trans_model,
+    const HTransducerConfig &config,
+    HmmCacheType *cache = NULL);
+
+/// Included mainly as a form of documentation, not used in any other code
+/// currently.  Creates the FST with self-loops, and with fewer options.
+fst::VectorFst<fst::StdArc>*
+GetHmmAsFstSimple(std::vector<int32> context_window,
+                  const ContextDependencyInterface &ctx_dep,
+                  const TransitionModel &trans_model,
+                  BaseFloat prob_scale);
+
+
+/**
+  * Returns the H tranducer; result owned by caller.
+  * See \ref hmm_graph_get_h_transducer.  The H transducer has on the
+  * input transition-ids, and also possibly some disambiguation symbols, which
+  * will be put in disambig_syms.  The output side contains the identifiers that
+  * are indexes into "ilabel_info" (these represent phones-in-context or
+  * disambiguation symbols).  The ilabel_info vector allows GetHTransducer to map
+  * from symbols to phones-in-context (i.e. phonetic context windows).  Any
+  * singleton symbols in the ilabel_info vector which are not phones, will be
+  * treated as disambiguation symbols.  [Not all recipes use these].  The output
+  * "disambig_syms_left" will be set to a list of the disambiguation symbols on
+  * the input of the transducer (i.e. same symbol type as whatever is on the
+  * input of the transducer
+  */
+fst::VectorFst<fst::StdArc>*
+GetHTransducer (const std::vector<std::vector<int32> > &ilabel_info,
+                const ContextDependencyInterface &ctx_dep,
+                const TransitionModel &trans_model,
+                const HTransducerConfig &config,
+                std::vector<int32> *disambig_syms_left);
+
+/**
+  * GetIlabelMapping produces a mapping that's similar to HTK's logical-to-physical
+  * model mapping (i.e. the xwrd.clustered.mlist files).   It groups together
+  * "logical HMMs" (i.e. in our world, phonetic context windows) that share the
+  * same sequence of transition-ids.   This can be used in an
+  * optional graph-creation step that produces a remapped form of CLG that can be
+  * more productively determinized and minimized.  This is used in the command-line program
+  * make-ilabel-transducer.cc.
+  * @param ilabel_info_old [in] The original \ref tree_ilabel "ilabel_info" vector
+  * @param ctx_dep [in] The tree
+  * @param trans_model [in] The transition-model object
+  * @param old2new_map [out] The output; this vector, which is of size equal to the
+  *       number of new labels, is a mapping to the old labels such that we could
+  *       create a vector ilabel_info_new such that
+  *       ilabel_info_new[i] == ilabel_info_old[old2new_map[i]]
+  */
+void GetIlabelMapping (const std::vector<std::vector<int32> > &ilabel_info_old,
+                       const ContextDependencyInterface &ctx_dep,
+                       const TransitionModel &trans_model,
+                       std::vector<int32> *old2new_map);
+
+
+
+/**
+  * For context, see \ref hmm_graph_add_self_loops.  Expands an FST that has been
+  * built without self-loops, and adds the self-loops (it also needs to modify
+  * the probability of the non-self-loop ones, as the graph without self-loops
+  * was created in such a way that it was stochastic).  Note that the
+  * disambig_syms will be empty in some recipes (e.g.  if you already removed
+  * the disambiguation symbols).
+  * @param trans_model [in] Transition model
+  * @param disambig_syms [in] Sorted, uniq list of disambiguation symbols, required
+  *       if the graph contains disambiguation symbols but only needed for sanity checks.
+  * @param self_loop_scale [in] Transition-probability scale for self-loops; c.f.
+  *                    \ref hmm_scale
+  * @param reorder [in] If true, reorders the transitions (see \ref hmm_reorder).
+  * @param  fst [in, out] The FST to be modified.
+  */
+void AddSelfLoops(const TransitionModel &trans_model,
+                  const std::vector<int32> &disambig_syms,  // used as a check only.
+                  BaseFloat self_loop_scale,
+                  bool reorder,  // true->dan-style, false->lukas-style.
+                  fst::VectorFst<fst::StdArc> *fst);
+
+/**
+  * Adds transition-probs, with the supplied
+  * scales (see \ref hmm_scale), to the graph.
+  * Useful if you want to create a graph without transition probs, then possibly
+  * train the model (including the transition probs) but keep the graph fixed,
+  * and add back in the transition probs.  It assumes the fst has transition-ids
+  * on it.  It is not an error if the FST has no states (nothing will be done).
+  * @param trans_model [in] The transition model
+  * @param disambig_syms [in] A list of disambiguation symbols, required if the
+  *                       graph has disambiguation symbols on its input but only
+  *                       used for checks.
+  * @param transition_scale [in] A scale on transition-probabilities apart from
+  *                      those involving self-loops; see \ref hmm_scale.
+  * @param self_loop_scale [in] A scale on self-loop transition probabilities;
+  *                      see \ref hmm_scale.
+  * @param  fst [in, out] The FST to be modified.
+  */
+void AddTransitionProbs(const TransitionModel &trans_model,
+                        const std::vector<int32> &disambig_syms,
+                        BaseFloat transition_scale,
+                        BaseFloat self_loop_scale,
+                        fst::VectorFst<fst::StdArc> *fst);
+
+/**
+   This is as AddSelfLoops(), but operates on a Lattice, where
+   it affects the graph part of the weight (the first element
+   of the pair). */
+void AddTransitionProbs(const TransitionModel &trans_model,
+                        BaseFloat transition_scale,
+                        BaseFloat self_loop_scale,
+                        Lattice *lat);
+
+
+/// Returns a transducer from pdfs plus one (input) to  transition-ids (output).
+/// Currenly of use only for testing.
+fst::VectorFst<fst::StdArc>*
+GetPdfToTransitionIdTransducer(const TransitionModel &trans_model);
+
+/// Converts all transition-ids in the FST to pdfs plus one.
+/// Placeholder: not implemented yet!
+void ConvertTransitionIdsToPdfs(const TransitionModel &trans_model,
+                                const std::vector<int32> &disambig_syms,
+                                fst::VectorFst<fst::StdArc> *fst);
+
+/// @} end "defgroup hmm_group_graph"
+
+/// \addtogroup hmm_group
+/// @{
+
+/// SplitToPhones splits up the TransitionIds in "alignment" into their
+/// individual phones (one vector per instance of a phone).  At output,
+/// the sum of the sizes of the vectors in split_alignment will be the same
+/// as the corresponding sum for "alignment".  The function returns
+/// true on success.  If the alignment appears to be incomplete, e.g.
+/// not ending at the end-state of a phone, it will still break it up into
+/// phones but it will return false.  For more serious errors it will
+/// die or throw an exception.
+/// This function works out by itself whether the graph was created
+/// with "reordering" (dan-style graph), and just does the right thing.
+
+bool SplitToPhones(const TransitionModel &trans_model,
+                   const std::vector<int32> &alignment,
+                   std::vector<std::vector<int32> > *split_alignment);
+
+/// ConvertAlignment converts an alignment that was created using one
+/// model, to another model.  They must use a compatible topology (so we
+/// know the state alignments of the new model).
+/// It returns false if it could not be split to phones (probably
+/// because the alignment was partial), but for other kinds of
+/// error that are more likely a coding error, it will throw
+/// an exception.
+bool ConvertAlignment(const TransitionModel &old_trans_model,
+                      const TransitionModel &new_trans_model,
+                      const ContextDependencyInterface &new_ctx_dep,
+                      const std::vector<int32> &old_alignment,
+                      const std::vector<int32> *phone_map,  // may be NULL
+                      std::vector<int32> *new_alignment);
+
+// ConvertPhnxToProns is only needed in bin/phones-to-prons.cc and
+// isn't closely related with HMMs, but we put it here as there isn't
+// any other obvious place for it and it needs to be tested.
+// This function takes a phone-sequence with word-start and word-end
+// markers in it, and a word-sequence, and outputs the pronunciations
+// "prons"... the format of "prons" is, each element is a vector,
+// where the first element is the word (or zero meaning no word, e.g.
+// for optional silence introduced by the lexicon), and the remaining
+// elements are the phones in the word's pronunciation.
+// It returns false if it encounters a problem of some kind, e.g.
+// if the phone-sequence doesn't seem to have the right number of
+// words in it.
+bool ConvertPhnxToProns(const std::vector<int32> &phnx,
+                        const std::vector<int32> &words,
+                        int32 word_start_sym,
+                        int32 word_end_sym,
+                        std::vector<std::vector<int32> > *prons);
+
+/// @} end "addtogroup hmm_group"
+
+} // end namespace kaldi
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/hmm/posterior.h b/kaldi_io/src/kaldi/hmm/posterior.h
new file mode 100644
index 0000000..be73be9
--- /dev/null
+++ b/kaldi_io/src/kaldi/hmm/posterior.h
@@ -0,0 +1,214 @@
+// hmm/posterior.h
+
+// Copyright 2009-2011     Microsoft Corporation
+//           2013-2014     Johns Hopkins University (author: Daniel Povey)
+//                2014     Guoguo Chen
+
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_HMM_POSTERIOR_H_
+#define KALDI_HMM_POSTERIOR_H_
+
+#include "base/kaldi-common.h"
+#include "tree/context-dep.h"
+#include "util/const-integer-set.h"
+#include "util/kaldi-table.h"
+#include "hmm/transition-model.h"
+
+
+namespace kaldi {
+
+
+/// \addtogroup posterior_group
+/// @{
+
+/// Posterior is a typedef for storing acoustic-state (actually, transition-id)
+/// posteriors over an utterance.  The "int32" is a transition-id, and the BaseFloat
+/// is a probability (typically between zero and one).
+typedef std::vector<std::vector<std::pair<int32, BaseFloat> > > Posterior;
+
+/// GaussPost is a typedef for storing Gaussian-level posteriors for an utterance.
+/// the "int32" is a transition-id, and the Vector<BaseFloat> is a vector of
+/// Gaussian posteriors.
+/// WARNING: We changed "int32" from transition-id to pdf-id, and the change is
+/// applied for all programs using GaussPost. This is for efficiency purpose. We
+/// also changed the name slightly from GauPost to GaussPost to reduce the
+/// chance that the change will go un-noticed in downstream code.
+typedef std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > > GaussPost;
+
+
+// PosteriorHolder is a holder for Posterior, which is
+// std::vector<std::vector<std::pair<int32, BaseFloat> > >
+// This is used for storing posteriors of transition id's for an
+// utterance.
+class PosteriorHolder {
+ public:
+  typedef Posterior T;
+
+  PosteriorHolder() { }
+
+  static bool Write(std::ostream &os, bool binary, const T &t);
+  
+  void Clear() { Posterior tmp; std::swap(tmp, t_); }
+
+  // Reads into the holder.
+  bool Read(std::istream &is);
+  
+  // Kaldi objects always have the stream open in binary mode for
+  // reading.
+  static bool IsReadInBinary() { return true; }
+
+  const T &Value() const { return t_; }
+  
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(PosteriorHolder);
+  T t_;
+};
+
+
+// GaussPostHolder is a holder for GaussPost, which is
+// std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > >
+// This is used for storing posteriors of transition id's for an
+// utterance.
+class GaussPostHolder {
+ public:
+  typedef GaussPost T;
+
+  GaussPostHolder() { }
+
+  static bool Write(std::ostream &os, bool binary, const T &t);  
+
+  void Clear() {  GaussPost tmp;  std::swap(tmp, t_); }
+
+  // Reads into the holder.
+  bool Read(std::istream &is);
+  
+  // Kaldi objects always have the stream open in binary mode for
+  // reading.
+  static bool IsReadInBinary() { return true; }
+
+  const T &Value() const { return t_; }
+  
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(GaussPostHolder);
+  T t_;
+};
+
+
+// Posterior is a typedef: vector<vector<pair<int32, BaseFloat> > >,
+// representing posteriors over (typically) transition-ids for an
+// utterance.
+typedef TableWriter<PosteriorHolder> PosteriorWriter;
+typedef SequentialTableReader<PosteriorHolder> SequentialPosteriorReader;
+typedef RandomAccessTableReader<PosteriorHolder> RandomAccessPosteriorReader;
+
+
+// typedef std::vector<std::vector<std::pair<int32, Vector<BaseFloat> > > > GaussPost;
+typedef TableWriter<GaussPostHolder> GaussPostWriter;
+typedef SequentialTableReader<GaussPostHolder> SequentialGaussPostReader;
+typedef RandomAccessTableReader<GaussPostHolder> RandomAccessGaussPostReader;
+
+
+/// Scales the BaseFloat (weight) element in the posterior entries.
+void ScalePosterior(BaseFloat scale, Posterior *post);
+
+/// Returns the total of all the weights in "post".
+BaseFloat TotalPosterior(const Posterior &post);
+
+/// Returns true if the two lists of pairs have no common .first element.
+bool PosteriorEntriesAreDisjoint(
+    const std::vector<std::pair<int32, BaseFloat> > &post_elem1,
+    const std::vector<std::pair<int32, BaseFloat> > &post_elem2);
+
+
+/// Merge two sets of posteriors, which must have the same length.  If "merge"
+/// is true, it will make a common entry whenever there are duplicated entries,
+/// adding up the weights.  If "drop_frames" is true, for frames where the
+/// two sets of posteriors were originally disjoint, makes no entries for that
+/// frame (relates to frame dropping, or drop_frames, see Vesely et al, ICASSP
+/// 2013).  Returns the number of frames for which the two posteriors were
+/// disjoint (i.e. no common transition-ids or whatever index we are using).
+int32 MergePosteriors(const Posterior &post1,
+                      const Posterior &post2,
+                      bool merge,
+                      bool drop_frames,
+                      Posterior *post);
+
+/// Given a vector of log-likelihoods (typically of Gaussians in a GMM
+/// but could be of pdf-ids), a number gselect >= 1 and a minimum posterior
+/// 0 <= min_post < 1, it gets the posterior for each element of log-likes
+/// by applying Softmax(), then prunes the posteriors using "gselect" and
+/// "min_post" (keeping at least one), and outputs the result into
+/// "post_entry", sorted from greatest to least posterior.
+/// Returns the total log-likelihood (the output of calling ApplySoftMax()
+/// on a copy of log_likes).
+BaseFloat VectorToPosteriorEntry(
+    const VectorBase<BaseFloat> &log_likes,
+    int32 num_gselect,
+    BaseFloat min_post,
+    std::vector<std::pair<int32, BaseFloat> > *post_entry);
+
+/// Convert an alignment to a posterior (with a scale of 1.0 on
+/// each entry).
+void AlignmentToPosterior(const std::vector<int32> &ali,
+                          Posterior *post);
+
+/// Sorts posterior entries so that transition-ids with same pdf-id are next to
+/// each other.
+void SortPosteriorByPdfs(const TransitionModel &tmodel,
+                         Posterior *post);
+
+
+/// Converts a posterior over transition-ids to be a posterior
+/// over pdf-ids.
+void ConvertPosteriorToPdfs(const TransitionModel &tmodel,
+                            const Posterior &post_in,
+                            Posterior *post_out);
+
+/// Converts a posterior over transition-ids to be a posterior
+/// over phones.
+void ConvertPosteriorToPhones(const TransitionModel &tmodel,
+                              const Posterior &post_in,
+                              Posterior *post_out);
+
+/// Weight any silence phones in the posterior (i.e. any phones
+/// in the set "silence_set" by scale "silence_scale".
+/// The interface was changed in Feb 2014 to do the modification
+/// "in-place" rather than having separate input and output.
+void WeightSilencePost(const TransitionModel &trans_model,
+                       const ConstIntegerSet<int32> &silence_set,
+                       BaseFloat silence_scale,
+                       Posterior *post);
+
+/// This is similar to WeightSilencePost, except that on each frame it
+/// works out the amount by which the overall posterior would be reduced,
+/// and scales down everything on that frame by the same amount.  It
+/// has the effect that frames that are mostly silence get down-weighted.
+/// The interface was changed in Feb 2014 to do the modification
+/// "in-place" rather than having separate input and output.
+void WeightSilencePostDistributed(const TransitionModel &trans_model,
+                                  const ConstIntegerSet<int32> &silence_set,
+                                  BaseFloat silence_scale,
+                                  Posterior *post);
+
+/// @} end "addtogroup posterior_group"
+
+
+} // end namespace kaldi
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/hmm/transition-model.h b/kaldi_io/src/kaldi/hmm/transition-model.h
new file mode 100644
index 0000000..ccc4f11
--- /dev/null
+++ b/kaldi_io/src/kaldi/hmm/transition-model.h
@@ -0,0 +1,345 @@
+// hmm/transition-model.h
+
+// Copyright 2009-2012  Microsoft Corporation
+//                      Johns Hopkins University (author: Guoguo Chen)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_HMM_TRANSITION_MODEL_H_
+#define KALDI_HMM_TRANSITION_MODEL_H_
+
+#include "base/kaldi-common.h"
+#include "tree/context-dep.h"
+#include "util/const-integer-set.h"
+#include "fst/fst-decl.h" // forward declarations.
+#include "hmm/hmm-topology.h"
+#include "itf/options-itf.h"
+
+namespace kaldi {
+
+/// \addtogroup hmm_group
+/// @{
+
+// The class TransitionModel is a repository for the transition probabilities.
+// It also handles certain integer mappings.
+// The basic model is as follows.  Each phone has a HMM topology defined in
+// hmm-topology.h.  Each HMM-state of each of these phones has a number of
+// transitions (and final-probs) out of it.  Each HMM-state defined in the
+// HmmTopology class has an associated "pdf_class".  This gets replaced with
+// an actual pdf-id via the tree.  The transition model associates the
+// transition probs with the (phone, HMM-state, pdf-id).  We associate with
+// each such triple a transition-state.  Each
+// transition-state has a number of associated probabilities to estimate;
+// this depends on the number of transitions/final-probs in the topology for
+// that (phone, HMM-state).  Each probability has an associated transition-index.
+// We associate with each (transition-state, transition-index) a unique transition-id.
+// Each individual probability estimated by the transition-model is asociated with a
+// transition-id.
+//
+// List of the various types of quantity referred to here and what they mean:
+//           phone:  a phone index (1, 2, 3 ...)
+//       HMM-state:  a number (0, 1, 2...) that indexes TopologyEntry (see hmm-topology.h)
+//          pdf-id:  a number output by the Compute function of ContextDependency (it
+//                   indexes pdf's).  Zero-based.
+// transition-state:  the states for which we estimate transition probabilities for transitions
+//                    out of them.  In some topologies, will map one-to-one with pdf-ids.
+//                    One-based, since it appears on FSTs.
+// transition-index:  identifier of a transition (or final-prob) in the HMM.  Indexes the
+//                    "transitions" vector in HmmTopology::HmmState.  [if it is out of range,
+//                    equal to transitions.size(), it refers to the final-prob.]
+//                    Zero-based.
+//   transition-id:   identifier of a unique parameter of the TransitionModel.
+//                    Associated with a (transition-state, transition-index) pair.
+//                    One-based, since it appears on FSTs.
+//
+// List of the possible mappings TransitionModel can do:
+//             (phone, HMM-state, pdf-id) -> transition-state
+//   (transition-state, transition-index) -> transition-id
+//  Reverse mappings:
+//                        transition-id -> transition-state
+//                        transition-id -> transition-index
+//                     transition-state -> phone
+//                     transition-state -> HMM-state
+//                     transition-state -> pdf-id
+//
+// The main things the TransitionModel object can do are:
+//    Get initialized (need ContextDependency and HmmTopology objects).
+//    Read/write.
+//    Update [given a vector of counts indexed by transition-id].
+//    Do the various integer mappings mentioned above.
+//    Get the probability (or log-probability) associated with a particular transition-id.
+
+
+// Note: this was previously called TransitionUpdateConfig.
+struct MleTransitionUpdateConfig {
+  BaseFloat floor;
+  BaseFloat mincount;
+  bool share_for_pdfs; // If true, share all transition parameters that have the same pdf.
+  MleTransitionUpdateConfig(BaseFloat floor = 0.01,
+                            BaseFloat mincount = 5.0,
+                            bool share_for_pdfs = false):
+      floor(floor), mincount(mincount), share_for_pdfs(share_for_pdfs) {}
+  
+  void Register (OptionsItf *po) {
+    po->Register("transition-floor", &floor,
+                 "Floor for transition probabilities");
+    po->Register("transition-min-count", &mincount,
+                 "Minimum count required to update transitions from a state");
+    po->Register("share-for-pdfs", &share_for_pdfs,
+                 "If true, share all transition parameters where the states "
+                 "have the same pdf.");
+  }
+};
+
+struct MapTransitionUpdateConfig {
+  BaseFloat tau;
+  bool share_for_pdfs; // If true, share all transition parameters that have the same pdf.
+  MapTransitionUpdateConfig(): tau(5.0), share_for_pdfs(false) { }
+
+  void Register (OptionsItf *po) {
+    po->Register("transition-tau", &tau, "Tau value for MAP estimation of transition "
+                 "probabilities.");
+    po->Register("share-for-pdfs", &share_for_pdfs,
+                 "If true, share all transition parameters where the states "
+                 "have the same pdf.");
+  }
+};
+
+class TransitionModel {
+
+ public:
+  /// Initialize the object [e.g. at the start of training].
+  /// The class keeps a copy of the HmmTopology object, but not
+  /// the ContextDependency object.
+  TransitionModel(const ContextDependency &ctx_dep,
+                  const HmmTopology &hmm_topo);
+
+
+  /// Constructor that takes no arguments: typically used prior to calling Read.
+  TransitionModel() { }
+
+  void Read(std::istream &is, bool binary);  // note, no symbol table: topo object always read/written w/o symbols.
+  void Write(std::ostream &os, bool binary) const;
+
+
+  /// return reference to HMM-topology object.
+  const HmmTopology &GetTopo() const { return topo_; }
+
+  /// \name Integer mapping functions
+  /// @{
+
+  int32 TripleToTransitionState(int32 phone, int32 hmm_state, int32 pdf) const;
+  int32 PairToTransitionId(int32 trans_state, int32 trans_index) const;
+  int32 TransitionIdToTransitionState(int32 trans_id) const;
+  int32 TransitionIdToTransitionIndex(int32 trans_id) const;
+  int32 TransitionStateToPhone(int32 trans_state) const;
+  int32 TransitionStateToHmmState(int32 trans_state) const;
+  int32 TransitionStateToPdf(int32 trans_state) const;
+  int32 SelfLoopOf(int32 trans_state) const;  // returns the self-loop transition-id, or zero if
+  // this state doesn't have a self-loop.
+
+  inline int32 TransitionIdToPdf(int32 trans_id) const;
+  int32 TransitionIdToPhone(int32 trans_id) const;
+  int32 TransitionIdToPdfClass(int32 trans_id) const;
+  int32 TransitionIdToHmmState(int32 trans_id) const;
+
+  /// @}
+
+  bool IsFinal(int32 trans_id) const;  // returns true if this trans_id goes to the final state
+  // (which is bound to be nonemitting).
+  bool IsSelfLoop(int32 trans_id) const;  // return true if this trans_id corresponds to a self-loop.
+
+  /// Returns the total number of transition-ids (note, these are one-based).
+  inline int32 NumTransitionIds() const { return id2state_.size()-1; }
+
+  /// Returns the number of transition-indices for a particular transition-state.
+  /// Note: "Indices" is the plural of "index".   Index is not the same as "id",
+  /// here.  A transition-index is a zero-based offset into the transitions
+  /// out of a particular transition state.
+  int32 NumTransitionIndices(int32 trans_state) const;
+
+  /// Returns the total number of transition-states (note, these are one-based).
+  int32 NumTransitionStates() const { return triples_.size(); }
+
+  // NumPdfs() actually returns the highest-numbered pdf we ever saw, plus one.
+  // In normal cases this should equal the number of pdfs in the system, but if you
+  // initialized this object with fewer than all the phones, and it happens that
+  // an unseen phone has the highest-numbered pdf, this might be different.
+  int32 NumPdfs() const { return num_pdfs_; }
+
+  // This loops over the triples and finds the highest phone index present. If
+  // the FST symbol table for the phones is created in the expected way, i.e.:
+  // starting from 1 (<eps> is 0) and numbered contiguously till the last phone,
+  // this will be the total number of phones.
+  int32 NumPhones() const;
+
+  /// Returns a sorted, unique list of phones.
+  const std::vector<int32> &GetPhones() const { return topo_.GetPhones(); }
+
+  // Transition-parameter-getting functions:
+  BaseFloat GetTransitionProb(int32 trans_id) const;
+  BaseFloat GetTransitionLogProb(int32 trans_id) const;
+
+  // The following functions are more specialized functions for getting
+  // transition probabilities, that are provided for convenience.
+
+  /// Returns the log-probability of a particular non-self-loop transition
+  /// after subtracting the probability mass of the self-loop and renormalizing;
+  /// will crash if called on a self-loop.  Specifically:
+  /// for non-self-loops it returns the log of that prob divided by (1 minus
+  /// self-loop-prob-for-that-state).
+  BaseFloat GetTransitionLogProbIgnoringSelfLoops(int32 trans_id) const;
+
+  /// Returns the log-prob of the non-self-loop probability
+  /// mass for this transition state. (you can get the self-loop prob, if a self-loop
+  /// exists, by calling GetTransitionLogProb(SelfLoopOf(trans_state)).
+  BaseFloat GetNonSelfLoopLogProb(int32 trans_state) const;
+
+  /// Does Maximum Likelihood estimation.  The stats are counts/weights, indexed
+  /// by transition-id.  This was previously called Update().
+  void MleUpdate(const Vector<double> &stats, 
+                 const MleTransitionUpdateConfig &cfg,
+                 BaseFloat *objf_impr_out,
+                 BaseFloat *count_out);
+
+  /// Does Maximum A Posteriori (MAP) estimation.  The stats are counts/weights,
+  /// indexed by transition-id.
+  void MapUpdate(const Vector<double> &stats, 
+                 const MapTransitionUpdateConfig &cfg,
+                 BaseFloat *objf_impr_out,
+                 BaseFloat *count_out);
+  
+  /// Print will print the transition model in a human-readable way, for purposes of human
+  /// inspection.  The "occs" are optional (they are indexed by pdf-id).
+  void Print(std::ostream &os,
+             const std::vector<std::string> &phone_names,
+             const Vector<double> *occs = NULL);
+
+
+  void InitStats(Vector<double> *stats) const { stats->Resize(NumTransitionIds()+1); }
+
+  void Accumulate(BaseFloat prob, int32 trans_id, Vector<double> *stats) const {
+    KALDI_ASSERT(trans_id <= NumTransitionIds());
+    (*stats)(trans_id) += prob;
+    // This is trivial and doesn't require class members, but leaves us more open
+    // to design changes than doing it manually.
+  }
+
+  /// returns true if all the integer class members are identical (but does not
+  /// compare the transition probabilities.
+  bool Compatible(const TransitionModel &other) const;
+  
+ private:
+  void MleUpdateShared(const Vector<double> &stats,
+                       const MleTransitionUpdateConfig &cfg,
+                       BaseFloat *objf_impr_out, BaseFloat *count_out);
+  void MapUpdateShared(const Vector<double> &stats,
+                       const MapTransitionUpdateConfig &cfg,
+                       BaseFloat *objf_impr_out, BaseFloat *count_out);
+  void ComputeTriples(const ContextDependency &ctx_dep);  // called from constructor.  initializes triples_.
+  void ComputeDerived();  // called from constructor and Read function: computes state2id_ and id2state_.
+  void ComputeDerivedOfProbs();  // computes quantities derived from log-probs (currently just
+  // non_self_loop_log_probs_; called whenever log-probs change.
+  void InitializeProbs();  // called from constructor.
+  void Check() const;
+
+  struct Triple {
+    int32 phone;
+    int32 hmm_state;
+    int32 pdf;
+    Triple() { }
+    Triple(int32 phone, int32 hmm_state, int32 pdf):
+        phone(phone), hmm_state(hmm_state), pdf(pdf) { }
+    bool operator < (const Triple &other) const {
+      if (phone < other.phone) return true;
+      else if (phone > other.phone) return false;
+      else if (hmm_state < other.hmm_state) return true;
+      else if (hmm_state > other.hmm_state) return false;
+      else return pdf < other.pdf;
+    }
+    bool operator == (const Triple &other) const {
+      return (phone == other.phone && hmm_state == other.hmm_state
+              && pdf == other.pdf);
+    }
+  };
+
+  HmmTopology topo_;
+
+  /// Triples indexed by transition state minus one;
+  /// the triples are in sorted order which allows us to do the reverse mapping from
+  /// triple to transition state
+  std::vector<Triple> triples_;
+  
+  /// Gives the first transition_id of each transition-state; indexed by
+  /// the transition-state.  Array indexed 1..num-transition-states+1 (the last one
+  /// is needed so we can know the num-transitions of the last transition-state.
+  std::vector<int32> state2id_;
+
+  /// For each transition-id, the corresponding transition
+  /// state (indexed by transition-id).
+  std::vector<int32> id2state_;
+
+  /// For each transition-id, the corresponding log-prob.  Indexed by transition-id.
+  Vector<BaseFloat> log_probs_;
+
+  /// For each transition-state, the log of (1 - self-loop-prob).  Indexed by
+  /// transition-state.
+  Vector<BaseFloat> non_self_loop_log_probs_;
+
+  /// This is actually one plus the highest-numbered pdf we ever got back from the
+  /// tree (but the tree numbers pdfs contiguously from zero so this is the number
+  /// of pdfs).
+  int32 num_pdfs_;
+
+
+  DISALLOW_COPY_AND_ASSIGN(TransitionModel);
+
+};
+
+inline int32 TransitionModel::TransitionIdToPdf(int32 trans_id) const {
+  // If a lot of time is spent here we may create an extra array
+  // to handle this.
+  KALDI_ASSERT(static_cast<size_t>(trans_id) < id2state_.size() &&
+               "Likely graph/model mismatch (graph built from wrong model?)");
+  int32 trans_state = id2state_[trans_id];
+  return triples_[trans_state-1].pdf;
+}
+
+/// Works out which pdfs might correspond to the given phones.  Will return true
+/// if these pdfs correspond *just* to these phones, false if these pdfs are also
+/// used by other phones.
+/// @param trans_model [in] Transition-model used to work out this information
+/// @param phones [in] A sorted, uniq vector that represents a set of phones
+/// @param pdfs [out] Will be set to a sorted, uniq list of pdf-ids that correspond
+///                   to one of this set of phones.
+/// @return  Returns true if all of the pdfs output to "pdfs" correspond to phones from
+///          just this set (false if they may be shared with phones outside this set).
+bool GetPdfsForPhones(const TransitionModel &trans_model,
+                      const std::vector<int32> &phones,
+                      std::vector<int32> *pdfs);
+
+/// Works out which phones might correspond to the given pdfs. Similar to the
+/// above GetPdfsForPhones(, ,)
+bool GetPhonesForPdfs(const TransitionModel &trans_model,
+                      const std::vector<int32> &pdfs,
+                      std::vector<int32> *phones);
+/// @}
+
+
+} // end namespace kaldi
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/hmm/tree-accu.h b/kaldi_io/src/kaldi/hmm/tree-accu.h
new file mode 100644
index 0000000..d571762
--- /dev/null
+++ b/kaldi_io/src/kaldi/hmm/tree-accu.h
@@ -0,0 +1,69 @@
+// hmm/tree-accu.h
+
+// Copyright 2009-2011 Microsoft Corporation
+//                2013 Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_HMM_TREE_ACCU_H_
+#define KALDI_HMM_TREE_ACCU_H_
+
+#include <cctype>  // For isspace.
+#include <limits>
+#include "base/kaldi-common.h"
+#include "hmm/transition-model.h"
+#include "tree/clusterable-classes.h"
+#include "tree/build-tree-questions.h" // needed for this typedef:
+// typedef std::vector<std::pair<EventVector, Clusterable*> > BuildTreeStatsType;
+
+namespace kaldi {
+
+/// \ingroup tree_group_top
+/// @{
+
+
+/// Accumulates the stats needed for training context-dependency trees (in the
+/// "normal" way).  It adds to 'stats' the stats obtained from this file.  Any
+/// new GaussClusterable* pointers in "stats" will be allocated with "new".
+
+void AccumulateTreeStats(const TransitionModel &trans_model,
+                         BaseFloat var_floor,
+                         int N,  // context window size.
+                         int P,  // central position.
+                         const std::vector<int32> &ci_phones,  // sorted
+                         const std::vector<int32> &alignment,
+                         const Matrix<BaseFloat> &features,
+                         const std::vector<int32> *phone_map, // or NULL
+                         std::map<EventType, GaussClusterable*> *stats);
+
+
+
+/*** Read a mapping from one phone set to another.  The phone map file has lines
+ of the form <old-phone> <new-phone>, where both entries are integers, usually
+ nonzero (but this is not enforced).  This program will crash if the input is
+ invalid, e.g. there are multiple inconsistent entries for the same old phone.
+ The output vector "phone_map" will be indexed by old-phone and will contain
+ the corresponding new-phone, or -1 for any entry that was not defined. */
+ 
+void ReadPhoneMap(std::string phone_map_rxfilename,
+                  std::vector<int32> *phone_map);
+
+
+
+/// @}
+
+}  // end namespace kaldi.
+
+#endif
diff --git a/kaldi_io/src/kaldi/itf/clusterable-itf.h b/kaldi_io/src/kaldi/itf/clusterable-itf.h
new file mode 100644
index 0000000..7ef9ae0
--- /dev/null
+++ b/kaldi_io/src/kaldi/itf/clusterable-itf.h
@@ -0,0 +1,97 @@
+// itf/clusterable-itf.h
+
+// Copyright 2009-2011     Microsoft Corporation;  Go Vivace Inc.
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_ITF_CLUSTERABLE_ITF_H_
+#define KALDI_ITF_CLUSTERABLE_ITF_H_ 1
+
+#include <string>
+#include "base/kaldi-common.h"
+
+namespace kaldi {
+
+
+/** \addtogroup clustering_group
+ @{
+  A virtual class for clusterable objects; see \ref clustering for an
+  explanation if its function.
+*/
+
+
+
+class Clusterable {
+ public:
+  /// \name Functions that must be overridden
+  /// @{
+
+  /// Return a copy of this object.
+  virtual Clusterable *Copy() const = 0;
+  /// Return the objective function associated with the stats
+  /// [assuming ML estimation]
+  virtual BaseFloat Objf() const = 0;
+  /// Return the normalizer (typically, count) associated with the stats
+  virtual BaseFloat Normalizer() const = 0;
+  /// Set stats to empty.
+  virtual void SetZero() = 0;
+  /// Add other stats.
+  virtual void Add(const Clusterable &other) = 0;
+  /// Subtract other stats.
+  virtual void Sub(const Clusterable &other) = 0;
+  /// Scale the stats by a positive number f [not mandatory to supply this].
+  virtual void Scale(BaseFloat f) {
+    KALDI_ERR << "This Clusterable object does not implement Scale().";
+  }
+
+  /// Return a string that describes the inherited type. 
+  virtual std::string Type() const = 0;
+
+  /// Write data to stream.
+  virtual void Write(std::ostream &os, bool binary) const = 0;
+
+  /// Read data from a stream and return the corresponding object (const
+  /// function; it's a class member because we need access to the vtable
+  /// so generic code can read derived types).
+  virtual Clusterable* ReadNew(std::istream &os, bool binary) const = 0;
+
+  virtual ~Clusterable() {}
+
+  /// @}
+
+  /// \name Functions that have default implementations
+  /// @{
+
+  // These functions have default implementations (but may be overridden for
+  // speed). Implementatons in tree/clusterable-classes.cc
+
+  /// Return the objective function of the combined object this + other.
+  virtual BaseFloat ObjfPlus(const Clusterable &other) const;
+  /// Return the objective function of the subtracted object this - other.
+  virtual BaseFloat ObjfMinus(const Clusterable &other) const;
+  /// Return the objective function decrease from merging the two
+  /// clusters, negated to be a positive number (or zero).
+  virtual BaseFloat Distance(const Clusterable &other) const;
+  /// @}
+
+};
+/// @} end of "ingroup clustering_group"
+
+}  // end namespace kaldi
+
+#endif  // KALDI_ITF_CLUSTERABLE_ITF_H_
+
diff --git a/kaldi_io/src/kaldi/itf/context-dep-itf.h b/kaldi_io/src/kaldi/itf/context-dep-itf.h
new file mode 100644
index 0000000..6a0bd0f
--- /dev/null
+++ b/kaldi_io/src/kaldi/itf/context-dep-itf.h
@@ -0,0 +1,80 @@
+// itf/context-dep-itf.h
+
+// Copyright 2009-2011     Microsoft Corporation;  Go Vivace Inc.
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_ITF_CONTEXT_DEP_ITF_H_
+#define KALDI_ITF_CONTEXT_DEP_ITF_H_
+#include "base/kaldi-common.h"
+
+namespace kaldi {
+/// @ingroup tree_group
+/// @{
+
+/// context-dep-itf.h provides a link between
+/// the tree-building code in ../tree/, and the FST code in ../fstext/
+/// (particularly, ../fstext/context-dep.h).  It is an abstract
+/// interface that describes an object that can map from a
+/// phone-in-context to a sequence of integer leaf-ids.
+class ContextDependencyInterface {
+ public:
+  /// ContextWidth() returns the value N (e.g. 3 for triphone models) that says how many phones
+  ///   are considered for computing context.
+  virtual int ContextWidth() const = 0;
+
+  /// Central position P of the phone context, in 0-based numbering, e.g. P = 1 for typical
+  /// triphone system.  We have to see if we can do without this function.
+  virtual int CentralPosition() const = 0;
+
+  /// The "new" Compute interface.  For typical topologies,
+  /// pdf_class would be 0, 1, 2.
+  /// Returns success or failure; outputs the pdf-id.
+  ///
+  /// "Compute" is the main function of this interface, that takes a
+  /// sequence of N phones (and it must be N phones), possibly
+  /// including epsilons (symbol id zero) but only at positions other
+  /// than P [these represent unknown phone context due to end or
+  /// begin of sequence].  We do not insist that Compute must always
+  /// output (into stateseq) a nonempty sequence of states, but we
+  /// anticipate that stateseq will alyway be nonempty at output in
+  /// typical use cases.  "Compute" returns false if expansion somehow
+  /// failed.  Normally the calling code should raise an exception if
+  /// this happens.  We can define a different interface later in
+  /// order to handle other kinds of information-- the underlying
+  /// data-structures from event-map.h are very flexible.
+  virtual bool Compute(const std::vector<int32> &phoneseq, int32 pdf_class,
+                       int32 *pdf_id) const = 0;
+
+
+
+  /// NumPdfs() returns the number of acoustic pdfs (they are numbered 0.. NumPdfs()-1).
+  virtual int32 NumPdfs() const = 0;
+
+  virtual ~ContextDependencyInterface() {};
+  ContextDependencyInterface() {}
+
+  /// Returns pointer to new object which is copy of current one.
+  virtual ContextDependencyInterface *Copy() const = 0;
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(ContextDependencyInterface);
+};
+/// @}
+}  // namespace Kaldi
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/itf/decodable-itf.h b/kaldi_io/src/kaldi/itf/decodable-itf.h
new file mode 100644
index 0000000..ba4d765
--- /dev/null
+++ b/kaldi_io/src/kaldi/itf/decodable-itf.h
@@ -0,0 +1,123 @@
+// itf/decodable-itf.h
+
+// Copyright 2009-2011  Microsoft Corporation;  Saarland University;
+//                      Mirko Hannemann;  Go Vivace Inc.;
+//                2013  Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_ITF_DECODABLE_ITF_H_
+#define KALDI_ITF_DECODABLE_ITF_H_ 1
+#include "base/kaldi-common.h"
+
+namespace kaldi {
+/// @ingroup Interfaces
+/// @{
+
+
+/**
+    DecodableInterface provides a link between the (acoustic-modeling and
+    feature-processing) code and the decoder.  The idea is to make this
+    interface as small as possible, and to make it as agnostic as possible about
+    the form of the acoustic model (e.g. don't assume the probabilities are a
+    function of just a vector of floats), and about the decoder (e.g. don't
+    assume it accesses frames in strict left-to-right order).  For normal
+    models, without on-line operation, the "decodable" sub-class will just be a
+    wrapper around a matrix of features and an acoustic model, and it will
+    answer the question 'what is the acoustic likelihood for this index and this
+    frame?'.
+
+    For online decoding, where the features are coming in in real time, it is
+    important to understand the IsLastFrame() and NumFramesReady() functions.
+    There are two ways these are used: the old online-decoding code, in ../online/,
+    and the new online-decoding code, in ../online2/.  In the old online-decoding
+    code, the decoder would do:
+    \code{.cc}
+    for (int frame = 0; !decodable.IsLastFrame(frame); frame++) {
+      // Process this frame
+    }
+    \endcode
+   and the the call to IsLastFrame would block if the features had not arrived yet.
+   The decodable object would have to know when to terminate the decoding.  This
+   online-decoding mode is still supported, it is what happens when you call, for
+   example, LatticeFasterDecoder::Decode().
+
+   We realized that this "blocking" mode of decoding is not very convenient
+   because it forces the program to be multi-threaded and makes it complex to
+   control endpointing.  In the "new" decoding code, you don't call (for example)
+   LatticeFasterDecoder::Decode(), you call LatticeFasterDecoder::InitDecoding(),
+   and then each time you get more features, you provide them to the decodable
+   object, and you call LatticeFasterDecoder::AdvanceDecoding(), which does
+   something like this:
+   \code{.cc}
+   while (num_frames_decoded_ < decodable.NumFramesReady()) {
+     // Decode one more frame [increments num_frames_decoded_]
+   }
+   \endcode
+   So the decodable object never has IsLastFrame() called.  For decoding where
+   you are starting with a matrix of features, the NumFramesReady() function will
+   always just return the number of frames in the file, and IsLastFrame() will
+   return true for the last frame.
+
+   For truly online decoding, the "old" online decodable objects in ../online/ have a
+   "blocking" IsLastFrame() and will crash if you call NumFramesReady().
+   The "new" online decodable objects in ../online2/ return the number of frames
+   currently accessible if you call NumFramesReady().  You will likely not need
+   to call IsLastFrame(), but we implement it to only return true for the last
+   frame of the file once we've decided to terminate decoding.
+*/
+
+class DecodableInterface {
+ public:
+  /// Returns the log likelihood, which will be negated in the decoder.
+  /// The "frame" starts from zero.  You should verify that IsLastFrame(frame-1)
+  /// returns false before calling this.
+  virtual BaseFloat LogLikelihood(int32 frame, int32 index) = 0;
+
+  /// Returns true if this is the last frame.  Frames are zero-based, so the
+  /// first frame is zero.  IsLastFrame(-1) will return false, unless the file
+  /// is empty (which is a case that I'm not sure all the code will handle, so
+  /// be careful).  Caution: the behavior of this function in an online setting
+  /// is being changed somewhat.  In future it may return false in cases where
+  /// we haven't yet decided to terminate decoding, but later true if we decide
+  /// to terminate decoding.  The plan in future is to rely more on
+  /// NumFramesReady(), and in future, IsLastFrame() would always return false
+  /// in an online-decoding setting, and would only return true in a
+  /// decoding-from-matrix setting where we want to allow the last delta or LDA
+  /// features to be flushed out for compatibility with the baseline setup.
+  virtual bool IsLastFrame(int32 frame) const = 0;
+  
+  /// The call NumFramesReady() will return the number of frames currently available
+  /// for this decodable object.  This is for use in setups where you don't want the
+  /// decoder to block while waiting for input.  This is newly added as of Jan 2014,
+  /// and I hope, going forward, to rely on this mechanism more than IsLastFrame to
+  /// know when to stop decoding.
+  virtual int32 NumFramesReady() const {
+    KALDI_ERR << "NumFramesReady() not implemented for this decodable type.";
+    return -1;
+  }
+
+  /// Returns the number of states in the acoustic model
+  /// (they will be indexed one-based, i.e. from 1 to NumIndices();
+  /// this is for compatibility with OpenFst.
+  virtual int32 NumIndices() const = 0;
+  
+  virtual ~DecodableInterface() {}
+};
+/// @}
+}  // namespace Kaldi
+
+#endif  // KALDI_ITF_DECODABLE_ITF_H_
diff --git a/kaldi_io/src/kaldi/itf/online-feature-itf.h b/kaldi_io/src/kaldi/itf/online-feature-itf.h
new file mode 100644
index 0000000..dafcd8a
--- /dev/null
+++ b/kaldi_io/src/kaldi/itf/online-feature-itf.h
@@ -0,0 +1,105 @@
+// itf/online-feature-itf.h
+
+// Copyright    2013  Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_ITF_ONLINE_FEATURE_ITF_H_
+#define KALDI_ITF_ONLINE_FEATURE_ITF_H_ 1
+#include "base/kaldi-common.h"
+#include "matrix/matrix-lib.h"
+
+namespace kaldi {
+/// @ingroup Interfaces
+/// @{
+
+/**
+   OnlineFeatureInterface is an interface for online feature processing (it is
+   also usable in the offline setting, but currently we're not using it for
+   that).  This is for use in the online2/ directory, and it supersedes the
+   interface in ../online/online-feat-input.h.  We have a slighty different
+   model that puts more control in the hands of the calling thread, and won't
+   involve waiting on semaphores in the decoding thread.
+
+   This interface only specifies how the object *outputs* the features.
+   How it obtains the features, e.g. from a previous object or objects of type
+   OnlineFeatureInterface, is not specified in the interface and you will
+   likely define new constructors or methods in the derived type to do that.
+
+   You should appreciate that this interface is designed to allow random
+   access to features, as long as they are ready.  That is, the user
+   can call GetFrame for any frame less than NumFramesReady(), and when
+   implementing a child class you must not make assumptions about the
+   order in which the user makes these calls.
+*/
+   
+class OnlineFeatureInterface {
+ public:
+  virtual int32 Dim() const = 0; /// returns the feature dimension.
+  
+  /// Returns the total number of frames, since the start of the utterance, that
+  /// are now available.  In an online-decoding context, this will likely
+  /// increase with time as more data becomes available.
+  virtual int32 NumFramesReady() const = 0;
+
+  /// Returns true if this is the last frame.  Frame indices are zero-based, so the
+  /// first frame is zero.  IsLastFrame(-1) will return false, unless the file
+  /// is empty (which is a case that I'm not sure all the code will handle, so
+  /// be careful).  This function may return false for some frame if
+  /// we haven't yet decided to terminate decoding, but later true if we decide
+  /// to terminate decoding.  This function exists mainly to correctly handle
+  /// end effects in feature extraction, and is not a mechanism to determine how
+  /// many frames are in the decodable object (as it used to be, and for backward
+  /// compatibility, still is, in the Decodable interface).
+  virtual bool IsLastFrame(int32 frame) const = 0;
+  
+  /// Gets the feature vector for this frame.  Before calling this for a given
+  /// frame, it is assumed that you called NumFramesReady() and it returned a
+  /// number greater than "frame".  Otherwise this call will likely crash with
+  /// an assert failure.  This function is not declared const, in case there is
+  /// some kind of caching going on, but most of the time it shouldn't modify
+  /// the class.
+  virtual void GetFrame(int32 frame, VectorBase<BaseFloat> *feat) = 0;
+
+  /// Virtual destructor.  Note: constructors that take another member of
+  /// type OnlineFeatureInterface are not expected to take ownership of
+  /// that pointer; the caller needs to keep track of that manually.
+  virtual ~OnlineFeatureInterface() { }  
+};
+
+
+/// Add a virtual class for "source" features such as MFCC or PLP or pitch
+/// features.
+class OnlineBaseFeature: public OnlineFeatureInterface {
+ public:
+  /// This would be called from the application, when you get more wave data.
+  /// Note: the sampling_rate is typically only provided so the code can assert
+  /// that it matches the sampling rate expected in the options.
+  virtual void AcceptWaveform(BaseFloat sampling_rate,
+                              const VectorBase<BaseFloat> &waveform) = 0;
+
+  /// InputFinished() tells the class you won't be providing any
+  /// more waveform.  This will help flush out the last few frames
+  /// of delta or LDA features (it will typically affect the return value
+  /// of IsLastFrame.
+  virtual void InputFinished() = 0;
+};
+
+
+/// @}
+}  // namespace Kaldi
+
+#endif  // KALDI_ITF_ONLINE_FEATURE_ITF_H_
diff --git a/kaldi_io/src/kaldi/itf/optimizable-itf.h b/kaldi_io/src/kaldi/itf/optimizable-itf.h
new file mode 100644
index 0000000..1b8f54b
--- /dev/null
+++ b/kaldi_io/src/kaldi/itf/optimizable-itf.h
@@ -0,0 +1,51 @@
+// itf/optimizable-itf.h
+
+// Copyright 2009-2011  Go Vivace Inc.;  Microsoft Corporation;  Georg Stemmer
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_ITF_OPTIMIZABLE_ITF_H_
+#define KALDI_ITF_OPTIMIZABLE_ITF_H_
+
+#include "base/kaldi-common.h"
+#include "matrix/matrix-lib.h"
+
+namespace kaldi {
+/// @ingroup Interfaces
+/// @{
+
+/// OptimizableInterface provides
+/// a virtual class for optimizable objects.
+/// E.g. a class that computed a likelihood function and
+/// its gradient using some parameter
+/// that has to be optimized on data
+/// could inherit from it.
+template<class Real>
+class OptimizableInterface {
+ public:
+  /// computes gradient for a parameter params and returns it
+  /// in gradient_out
+  virtual void ComputeGradient(const Vector<Real> &params,
+                               Vector<Real> *gradient_out) = 0;
+  /// computes the function value for a parameter params
+  /// and returns it
+  virtual Real ComputeValue(const Vector<Real> &params) = 0;
+
+  virtual ~OptimizableInterface() {}
+};
+/// @} end of "Interfaces"
+} // end namespace kaldi
+
+#endif
diff --git a/kaldi_io/src/kaldi/itf/options-itf.h b/kaldi_io/src/kaldi/itf/options-itf.h
new file mode 100644
index 0000000..204f46d
--- /dev/null
+++ b/kaldi_io/src/kaldi/itf/options-itf.h
@@ -0,0 +1,49 @@
+// itf/options-itf.h
+
+// Copyright 2013  Tanel Alumae, Tallinn University of Technology
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_ITF_OPTIONS_ITF_H_
+#define KALDI_ITF_OPTIONS_ITF_H_ 1
+#include "base/kaldi-common.h"
+
+namespace kaldi {
+
+class OptionsItf {
+ public:
+  
+  virtual void Register(const std::string &name,
+                bool *ptr, const std::string &doc) = 0; 
+  virtual void Register(const std::string &name,
+                int32 *ptr, const std::string &doc) = 0; 
+  virtual void Register(const std::string &name,
+                uint32 *ptr, const std::string &doc) = 0; 
+  virtual void Register(const std::string &name,
+                float *ptr, const std::string &doc) = 0; 
+  virtual void Register(const std::string &name,
+                double *ptr, const std::string &doc) = 0; 
+  virtual void Register(const std::string &name,
+                std::string *ptr, const std::string &doc) = 0; 
+  
+  virtual ~OptionsItf() {}
+};
+
+}  // namespace Kaldi
+
+#endif  // KALDI_ITF_OPTIONS_ITF_H_
+
+
diff --git a/kaldi_io/src/kaldi/matrix/cblas-wrappers.h b/kaldi_io/src/kaldi/matrix/cblas-wrappers.h
new file mode 100644
index 0000000..ebec0a3
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/cblas-wrappers.h
@@ -0,0 +1,491 @@
+// matrix/cblas-wrappers.h
+
+// Copyright 2012  Johns Hopkins University (author: Daniel Povey);
+//                 Haihua Xu; Wei Shi
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_MATRIX_CBLAS_WRAPPERS_H_
+#define KALDI_MATRIX_CBLAS_WRAPPERS_H_ 1
+
+
+#include <limits>
+#include "matrix/sp-matrix.h"
+#include "matrix/kaldi-vector.h"
+#include "matrix/kaldi-matrix.h"
+#include "matrix/matrix-functions.h"
+
+// Do not include this file directly.  It is to be included
+// by .cc files in this directory.
+
+namespace kaldi {
+
+
+inline void cblas_Xcopy(const int N, const float *X, const int incX, float *Y,
+                        const int incY) {
+  cblas_scopy(N, X, incX, Y, incY);
+}
+
+inline void cblas_Xcopy(const int N, const double *X, const int incX, double *Y,
+                        const int incY) {
+  cblas_dcopy(N, X, incX, Y, incY);
+}
+
+
+inline float cblas_Xasum(const int N, const float *X, const int incX) {
+  return cblas_sasum(N, X, incX);
+}
+
+inline double cblas_Xasum(const int N, const double *X, const int incX) {
+  return cblas_dasum(N, X, incX);
+}
+
+inline void cblas_Xrot(const int N, float *X, const int incX, float *Y,
+                       const int incY, const float c, const float s) {
+  cblas_srot(N, X, incX, Y, incY, c, s);
+}
+inline void cblas_Xrot(const int N, double *X, const int incX, double *Y,
+                       const int incY, const double c, const double s) {
+  cblas_drot(N, X, incX, Y, incY, c, s);
+}
+inline float cblas_Xdot(const int N, const float *const X,
+                        const int incX, const float *const Y,
+                        const int incY) {
+  return cblas_sdot(N, X, incX, Y, incY);
+}
+inline double cblas_Xdot(const int N, const double *const X,
+                        const int incX, const double *const Y,
+                        const int incY) {
+  return cblas_ddot(N, X, incX, Y, incY);
+}
+inline void cblas_Xaxpy(const int N, const float alpha, const float *X,
+                        const int incX, float *Y, const int incY) {
+  cblas_saxpy(N, alpha, X, incX, Y, incY);
+}
+inline void cblas_Xaxpy(const int N, const double alpha, const double *X,
+                        const int incX, double *Y, const int incY) {
+  cblas_daxpy(N, alpha, X, incX, Y, incY);
+}
+inline void cblas_Xscal(const int N, const float alpha, float *data,
+                        const int inc) {
+  cblas_sscal(N, alpha, data, inc);
+}
+inline void cblas_Xscal(const int N, const double alpha, double *data, 
+                        const int inc) {
+  cblas_dscal(N, alpha, data, inc);
+}
+inline void cblas_Xspmv(const float alpha, const int num_rows, const float *Mdata,
+                        const float *v, const int v_inc,
+                        const float beta, float *y, const int y_inc) {
+  cblas_sspmv(CblasRowMajor, CblasLower, num_rows, alpha, Mdata, v, v_inc, beta, y, y_inc);
+}
+inline void cblas_Xspmv(const double alpha, const int num_rows, const double *Mdata,
+                        const double *v, const int v_inc,
+                        const double beta, double *y, const int y_inc) {
+  cblas_dspmv(CblasRowMajor, CblasLower, num_rows, alpha, Mdata, v, v_inc, beta, y, y_inc);
+}
+inline void cblas_Xtpmv(MatrixTransposeType trans, const float *Mdata,
+                        const int num_rows, float *y, const int y_inc) {
+  cblas_stpmv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
+              CblasNonUnit, num_rows, Mdata, y, y_inc);
+}
+inline void cblas_Xtpmv(MatrixTransposeType trans, const double *Mdata,
+                        const int num_rows, double *y, const int y_inc) {
+  cblas_dtpmv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
+              CblasNonUnit, num_rows, Mdata, y, y_inc);
+}
+
+
+inline void cblas_Xtpsv(MatrixTransposeType trans, const float *Mdata,
+                        const int num_rows, float *y, const int y_inc) {
+  cblas_stpsv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
+              CblasNonUnit, num_rows, Mdata, y, y_inc);
+}
+inline void cblas_Xtpsv(MatrixTransposeType trans, const double *Mdata,
+                        const int num_rows, double *y, const int y_inc) {
+  cblas_dtpsv(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
+              CblasNonUnit, num_rows, Mdata, y, y_inc);
+}
+
+// x = alpha * M * y + beta * x
+inline void cblas_Xspmv(MatrixIndexT dim, float alpha, const float *Mdata,
+                        const float *ydata, MatrixIndexT ystride,
+                        float beta, float *xdata, MatrixIndexT xstride) {
+  cblas_sspmv(CblasRowMajor, CblasLower, dim, alpha, Mdata,
+              ydata, ystride, beta, xdata, xstride);
+}
+inline void cblas_Xspmv(MatrixIndexT dim, double alpha, const double *Mdata,
+                        const double *ydata, MatrixIndexT ystride,
+                        double beta, double *xdata, MatrixIndexT xstride) {
+  cblas_dspmv(CblasRowMajor, CblasLower, dim, alpha, Mdata,
+              ydata, ystride, beta, xdata, xstride);
+}
+
+// Implements  A += alpha * (x y'  + y x'); A is symmetric matrix.
+inline void cblas_Xspr2(MatrixIndexT dim, float alpha, const float *Xdata,
+                        MatrixIndexT incX, const float *Ydata, MatrixIndexT incY,
+                          float *Adata) {
+  cblas_sspr2(CblasRowMajor, CblasLower, dim, alpha, Xdata,
+              incX, Ydata, incY, Adata);
+}
+inline void cblas_Xspr2(MatrixIndexT dim, double alpha, const double *Xdata,
+                        MatrixIndexT incX, const double *Ydata, MatrixIndexT incY,
+                        double *Adata) {
+  cblas_dspr2(CblasRowMajor, CblasLower, dim, alpha, Xdata,
+              incX, Ydata, incY, Adata);
+}
+
+// Implements  A += alpha * (x x'); A is symmetric matrix.
+inline void cblas_Xspr(MatrixIndexT dim, float alpha, const float *Xdata,
+                       MatrixIndexT incX, float *Adata) {
+  cblas_sspr(CblasRowMajor, CblasLower, dim, alpha, Xdata, incX, Adata);
+}
+inline void cblas_Xspr(MatrixIndexT dim, double alpha, const double *Xdata,
+                       MatrixIndexT incX, double *Adata) {
+  cblas_dspr(CblasRowMajor, CblasLower, dim, alpha, Xdata, incX, Adata);
+}
+
+// sgemv,dgemv: y = alpha M x + beta y.
+inline void cblas_Xgemv(MatrixTransposeType trans, MatrixIndexT num_rows,
+                        MatrixIndexT num_cols, float alpha, const float *Mdata,
+                        MatrixIndexT stride, const float *xdata,
+                        MatrixIndexT incX, float beta, float *ydata, MatrixIndexT incY) {
+  cblas_sgemv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
+              num_cols, alpha, Mdata, stride, xdata, incX, beta, ydata, incY);
+}
+inline void cblas_Xgemv(MatrixTransposeType trans, MatrixIndexT num_rows,
+                        MatrixIndexT num_cols, double alpha, const double *Mdata,
+                        MatrixIndexT stride, const double *xdata,
+                        MatrixIndexT incX, double beta, double *ydata, MatrixIndexT incY) {
+  cblas_dgemv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
+              num_cols, alpha, Mdata, stride, xdata, incX, beta, ydata, incY);
+}
+
+// sgbmv, dgmmv: y = alpha M x +  + beta * y.
+inline void cblas_Xgbmv(MatrixTransposeType trans, MatrixIndexT num_rows,
+                        MatrixIndexT num_cols, MatrixIndexT num_below,
+                        MatrixIndexT num_above, float alpha, const float *Mdata,
+                        MatrixIndexT stride, const float *xdata,
+                        MatrixIndexT incX, float beta, float *ydata, MatrixIndexT incY) {
+  cblas_sgbmv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
+              num_cols, num_below, num_above, alpha, Mdata, stride, xdata,
+              incX, beta, ydata, incY);
+}
+inline void cblas_Xgbmv(MatrixTransposeType trans, MatrixIndexT num_rows,
+                        MatrixIndexT num_cols, MatrixIndexT num_below,
+                        MatrixIndexT num_above, double alpha, const double *Mdata,
+                        MatrixIndexT stride, const double *xdata,
+                        MatrixIndexT incX, double beta, double *ydata, MatrixIndexT incY) {
+  cblas_dgbmv(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(trans), num_rows,
+              num_cols, num_below, num_above, alpha, Mdata, stride, xdata,
+              incX, beta, ydata, incY);
+}
+
+
+template<typename Real>
+inline void Xgemv_sparsevec(MatrixTransposeType trans, MatrixIndexT num_rows,
+                            MatrixIndexT num_cols, Real alpha, const Real *Mdata,
+                            MatrixIndexT stride, const Real *xdata,
+                            MatrixIndexT incX, Real beta, Real *ydata,
+                            MatrixIndexT incY) {
+  if (trans == kNoTrans) {
+    if (beta != 1.0) cblas_Xscal(num_rows, beta, ydata, incY);
+    for (MatrixIndexT i = 0; i < num_cols; i++) {
+      Real x_i = xdata[i * incX];
+      if (x_i == 0.0) continue;
+      // Add to ydata, the i'th column of M, times alpha * x_i
+      cblas_Xaxpy(num_rows, x_i * alpha, Mdata + i, stride, ydata, incY);
+    }    
+  } else {
+    if (beta != 1.0) cblas_Xscal(num_cols, beta, ydata, incY);
+    for (MatrixIndexT i = 0; i < num_rows; i++) {
+      Real x_i = xdata[i * incX];
+      if (x_i == 0.0) continue;
+      // Add to ydata, the i'th row of M, times alpha * x_i
+      cblas_Xaxpy(num_cols, x_i * alpha,
+                  Mdata + (i * stride), 1, ydata, incY);
+    }
+  }
+}
+
+inline void cblas_Xgemm(const float alpha,
+                        MatrixTransposeType transA,
+                        const float *Adata,
+                        MatrixIndexT a_num_rows, MatrixIndexT a_num_cols, MatrixIndexT a_stride,
+                        MatrixTransposeType transB, 
+                        const float *Bdata, MatrixIndexT b_stride,
+                        const float beta,
+                        float *Mdata, 
+                        MatrixIndexT num_rows, MatrixIndexT num_cols,MatrixIndexT stride) {
+  cblas_sgemm(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(transA), 
+              static_cast<CBLAS_TRANSPOSE>(transB),
+              num_rows, num_cols, transA == kNoTrans ? a_num_cols : a_num_rows,
+              alpha, Adata, a_stride, Bdata, b_stride,
+              beta, Mdata, stride); 
+}
+inline void cblas_Xgemm(const double alpha,
+                        MatrixTransposeType transA,
+                        const double *Adata,
+                        MatrixIndexT a_num_rows, MatrixIndexT a_num_cols, MatrixIndexT a_stride,
+                        MatrixTransposeType transB, 
+                        const double *Bdata, MatrixIndexT b_stride,
+                        const double beta,
+                        double *Mdata, 
+                        MatrixIndexT num_rows, MatrixIndexT num_cols,MatrixIndexT stride) {
+  cblas_dgemm(CblasRowMajor, static_cast<CBLAS_TRANSPOSE>(transA), 
+              static_cast<CBLAS_TRANSPOSE>(transB),
+              num_rows, num_cols, transA == kNoTrans ? a_num_cols : a_num_rows,
+              alpha, Adata, a_stride, Bdata, b_stride,
+              beta, Mdata, stride); 
+}
+
+
+inline void cblas_Xsymm(const float alpha,
+                        MatrixIndexT sz,
+                        const float *Adata,MatrixIndexT a_stride,
+                        const float *Bdata,MatrixIndexT b_stride,
+                        const float beta,
+                        float *Mdata, MatrixIndexT stride) {
+  cblas_ssymm(CblasRowMajor, CblasLeft, CblasLower, sz, sz, alpha, Adata,
+              a_stride, Bdata, b_stride, beta, Mdata, stride);
+}
+inline void cblas_Xsymm(const double alpha,
+                        MatrixIndexT sz,
+                        const double *Adata,MatrixIndexT a_stride,
+                        const double *Bdata,MatrixIndexT b_stride,
+                        const double beta,
+                        double *Mdata, MatrixIndexT stride) {
+  cblas_dsymm(CblasRowMajor, CblasLeft, CblasLower, sz, sz, alpha, Adata,
+              a_stride, Bdata, b_stride, beta, Mdata, stride);
+}
+// ger: M += alpha x y^T.
+inline void cblas_Xger(MatrixIndexT num_rows, MatrixIndexT num_cols, float alpha,
+                       const float *xdata, MatrixIndexT incX, const float *ydata,
+                       MatrixIndexT incY, float *Mdata, MatrixIndexT stride) {
+  cblas_sger(CblasRowMajor, num_rows, num_cols, alpha, xdata, 1, ydata, 1,
+             Mdata, stride);
+}
+inline void cblas_Xger(MatrixIndexT num_rows, MatrixIndexT num_cols, double alpha,
+                       const double *xdata, MatrixIndexT incX, const double *ydata,
+                       MatrixIndexT incY, double *Mdata, MatrixIndexT stride) {
+  cblas_dger(CblasRowMajor, num_rows, num_cols, alpha, xdata, 1, ydata, 1,
+             Mdata, stride);
+}
+
+// syrk: symmetric rank-k update.
+// if trans==kNoTrans, then C = alpha A A^T + beta C
+// else C = alpha A^T A + beta C.
+// note: dim_c is dim(C), other_dim_a is the "other" dimension of A, i.e.
+// num-cols(A) if kNoTrans, or num-rows(A) if kTrans.
+// We only need the row-major and lower-triangular option of this, and this
+// is hard-coded.
+inline void cblas_Xsyrk (
+    const MatrixTransposeType trans, const MatrixIndexT dim_c,
+    const MatrixIndexT other_dim_a, const float alpha, const float *A,
+    const MatrixIndexT a_stride, const float beta, float *C,
+    const MatrixIndexT c_stride) {
+  cblas_ssyrk(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
+              dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride);
+}
+
+inline void cblas_Xsyrk(
+    const MatrixTransposeType trans, const MatrixIndexT dim_c,
+    const MatrixIndexT other_dim_a, const double alpha, const double *A,
+    const MatrixIndexT a_stride, const double beta, double *C,
+    const MatrixIndexT c_stride) {
+  cblas_dsyrk(CblasRowMajor, CblasLower, static_cast<CBLAS_TRANSPOSE>(trans),
+              dim_c, other_dim_a, alpha, A, a_stride, beta, C, c_stride);
+}
+
+/// matrix-vector multiply using a banded matrix; we always call this
+/// with b = 1 meaning we're multiplying by a diagonal matrix.  This is used for
+/// elementwise multiplication.  We miss some of the arguments out of this
+/// wrapper.
+inline void cblas_Xsbmv1(
+    const MatrixIndexT dim,
+    const double *A,
+    const double alpha,
+    const double *x,
+    const double beta,
+    double *y) {
+  cblas_dsbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A,
+              1, x, 1, beta, y, 1);
+}
+
+inline void cblas_Xsbmv1(
+    const MatrixIndexT dim,
+    const float *A,
+    const float alpha,
+    const float *x,
+    const float beta,
+    float *y) {
+  cblas_ssbmv(CblasRowMajor, CblasLower, dim, 0, alpha, A,
+              1, x, 1, beta, y, 1);
+}
+
+
+/// This is not really a wrapper for CBLAS as CBLAS does not have this; in future we could
+/// extend this somehow.
+inline void mul_elements(
+    const MatrixIndexT dim,
+    const double *a,
+    double *b) { // does b *= a, elementwise.
+  double c1, c2, c3, c4;
+  MatrixIndexT i;
+  for (i = 0; i + 4 <= dim; i += 4) {
+    c1 = a[i] * b[i];
+    c2 = a[i+1] * b[i+1];
+    c3 = a[i+2] * b[i+2];
+    c4 = a[i+3] * b[i+3];
+    b[i] = c1;
+    b[i+1] = c2;
+    b[i+2] = c3;
+    b[i+3] = c4;
+  }
+  for (; i < dim; i++)
+    b[i] *= a[i];
+}
+
+inline void mul_elements(
+    const MatrixIndexT dim,
+    const float *a,
+    float *b) { // does b *= a, elementwise.
+  float c1, c2, c3, c4;
+  MatrixIndexT i;
+  for (i = 0; i + 4 <= dim; i += 4) {
+    c1 = a[i] * b[i];
+    c2 = a[i+1] * b[i+1];
+    c3 = a[i+2] * b[i+2];
+    c4 = a[i+3] * b[i+3];
+    b[i] = c1;
+    b[i+1] = c2;
+    b[i+2] = c3;
+    b[i+3] = c4;
+  }
+  for (; i < dim; i++)
+    b[i] *= a[i];
+}
+
+
+
+// add clapack here
+#if !defined(HAVE_ATLAS)
+inline void clapack_Xtptri(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *result) {
+  stptri_(const_cast<char *>("U"), const_cast<char *>("N"), num_rows, Mdata, result);
+}
+inline void clapack_Xtptri(KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *result) {
+  dtptri_(const_cast<char *>("U"), const_cast<char *>("N"), num_rows, Mdata, result);
+}
+// 
+inline void clapack_Xgetrf2(KaldiBlasInt *num_rows, KaldiBlasInt *num_cols, 
+                            float *Mdata, KaldiBlasInt *stride, KaldiBlasInt *pivot, 
+                            KaldiBlasInt *result) {
+  sgetrf_(num_rows, num_cols, Mdata, stride, pivot, result);
+}
+inline void clapack_Xgetrf2(KaldiBlasInt *num_rows, KaldiBlasInt *num_cols, 
+                            double *Mdata, KaldiBlasInt *stride, KaldiBlasInt *pivot, 
+                            KaldiBlasInt *result) {
+  dgetrf_(num_rows, num_cols, Mdata, stride, pivot, result);
+}
+
+// 
+inline void clapack_Xgetri2(KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *stride,
+                           KaldiBlasInt *pivot, float *p_work, 
+                           KaldiBlasInt *l_work, KaldiBlasInt *result) {
+  sgetri_(num_rows, Mdata, stride, pivot, p_work, l_work, result);
+}
+inline void clapack_Xgetri2(KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *stride,
+                           KaldiBlasInt *pivot, double *p_work, 
+                           KaldiBlasInt *l_work, KaldiBlasInt *result) {
+  dgetri_(num_rows, Mdata, stride, pivot, p_work, l_work, result);
+}
+//
+inline void clapack_Xgesvd(char *v, char *u, KaldiBlasInt *num_cols,
+                           KaldiBlasInt *num_rows, float *Mdata, KaldiBlasInt *stride,
+                           float *sv, float *Vdata, KaldiBlasInt *vstride,
+                           float *Udata, KaldiBlasInt *ustride, float *p_work,
+                           KaldiBlasInt *l_work, KaldiBlasInt *result) {
+  sgesvd_(v, u,
+          num_cols, num_rows, Mdata, stride,
+          sv, Vdata, vstride, Udata, ustride, 
+          p_work, l_work, result); 
+}
+inline void clapack_Xgesvd(char *v, char *u, KaldiBlasInt *num_cols,
+                           KaldiBlasInt *num_rows, double *Mdata, KaldiBlasInt *stride,
+                           double *sv, double *Vdata, KaldiBlasInt *vstride,
+                           double *Udata, KaldiBlasInt *ustride, double *p_work,
+                           KaldiBlasInt *l_work, KaldiBlasInt *result) {
+  dgesvd_(v, u,
+          num_cols, num_rows, Mdata, stride,
+          sv, Vdata, vstride, Udata, ustride,
+          p_work, l_work, result); 
+}
+//
+void inline clapack_Xsptri(KaldiBlasInt *num_rows, float *Mdata, 
+                           KaldiBlasInt *ipiv, float *work, KaldiBlasInt *result) {
+  ssptri_(const_cast<char *>("U"), num_rows, Mdata, ipiv, work, result);
+}
+void inline clapack_Xsptri(KaldiBlasInt *num_rows, double *Mdata, 
+                           KaldiBlasInt *ipiv, double *work, KaldiBlasInt *result) {
+  dsptri_(const_cast<char *>("U"), num_rows, Mdata, ipiv, work, result);
+}
+//
+void inline clapack_Xsptrf(KaldiBlasInt *num_rows, float *Mdata,
+                           KaldiBlasInt *ipiv, KaldiBlasInt *result) {
+  ssptrf_(const_cast<char *>("U"), num_rows, Mdata, ipiv, result);
+}
+void inline clapack_Xsptrf(KaldiBlasInt *num_rows, double *Mdata,
+                           KaldiBlasInt *ipiv, KaldiBlasInt *result) {
+  dsptrf_(const_cast<char *>("U"), num_rows, Mdata, ipiv, result);
+}
+#else
+inline void clapack_Xgetrf(MatrixIndexT num_rows, MatrixIndexT num_cols,
+                           float *Mdata, MatrixIndexT stride, 
+                           int *pivot, int *result) {
+  *result = clapack_sgetrf(CblasColMajor, num_rows, num_cols,
+                              Mdata, stride, pivot);
+}
+
+inline void clapack_Xgetrf(MatrixIndexT num_rows, MatrixIndexT num_cols,
+                           double *Mdata, MatrixIndexT stride, 
+                           int *pivot, int *result) {
+  *result = clapack_dgetrf(CblasColMajor, num_rows, num_cols,
+                              Mdata, stride, pivot);
+}
+//
+inline int clapack_Xtrtri(int num_rows, float *Mdata, MatrixIndexT stride) {
+  return  clapack_strtri(CblasColMajor, CblasUpper, CblasNonUnit, num_rows,
+                              Mdata, stride);
+}
+
+inline int clapack_Xtrtri(int num_rows, double *Mdata, MatrixIndexT stride) {
+  return  clapack_dtrtri(CblasColMajor, CblasUpper, CblasNonUnit, num_rows,
+                              Mdata, stride);
+}
+//
+inline void clapack_Xgetri(MatrixIndexT num_rows, float *Mdata, MatrixIndexT stride,
+                      int *pivot, int *result) {
+  *result = clapack_sgetri(CblasColMajor, num_rows, Mdata, stride, pivot);
+}
+inline void clapack_Xgetri(MatrixIndexT num_rows, double *Mdata, MatrixIndexT stride,
+                      int *pivot, int *result) {
+  *result = clapack_dgetri(CblasColMajor, num_rows, Mdata, stride, pivot);
+}
+#endif
+
+}
+// namespace kaldi
+
+#endif
diff --git a/kaldi_io/src/kaldi/matrix/compressed-matrix.h b/kaldi_io/src/kaldi/matrix/compressed-matrix.h
new file mode 100644
index 0000000..746cab3
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/compressed-matrix.h
@@ -0,0 +1,179 @@
+// matrix/compressed-matrix.h
+
+// Copyright 2012  Johns Hopkins University (author: Daniel Povey)
+//                 Frantisek Skala, Wei Shi
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_MATRIX_COMPRESSED_MATRIX_H_
+#define KALDI_MATRIX_COMPRESSED_MATRIX_H_ 1
+
+#include "kaldi-matrix.h"
+
+namespace kaldi {
+
+/// \addtogroup matrix_group
+/// @{
+
+/// This class does lossy compression of a matrix.  It only
+/// supports copying to-from a KaldiMatrix.  For large matrices,
+/// each element is compressed into about one byte, but there
+/// is a little overhead on top of that (globally, and also per
+/// column).
+
+/// The basic idea is for each column (in the normal configuration)
+/// we work out the values at the 0th, 25th, 50th and 100th percentiles
+/// and store them as 16-bit integers; we then encode each value in
+/// the column as a single byte, in 3 separate ranges with different
+/// linear encodings (0-25th, 25-50th, 50th-100th).
+/// If the matrix has 8 rows or fewer, we simply store all values as
+/// uint16.
+
+class CompressedMatrix {
+ public:
+  CompressedMatrix(): data_(NULL) { }
+
+  ~CompressedMatrix() { Destroy(); }
+  
+  template<typename Real>
+  CompressedMatrix(const MatrixBase<Real> &mat): data_(NULL) { CopyFromMat(mat); }
+
+  /// Initializer that can be used to select part of an existing
+  /// CompressedMatrix without un-compressing and re-compressing (note: unlike
+  /// similar initializers for class Matrix, it doesn't point to the same memory
+  /// location).
+  CompressedMatrix(const CompressedMatrix &mat,
+                   const MatrixIndexT row_offset,
+                   const MatrixIndexT num_rows,
+                   const MatrixIndexT col_offset,
+                   const MatrixIndexT num_cols);
+
+  void *Data() const { return this->data_; }
+
+  /// This will resize *this and copy the contents of mat to *this.
+  template<typename Real>
+  void CopyFromMat(const MatrixBase<Real> &mat);
+
+  CompressedMatrix(const CompressedMatrix &mat);
+
+  CompressedMatrix &operator = (const CompressedMatrix &mat); // assignment operator.
+
+  template<typename Real>
+  CompressedMatrix &operator = (const MatrixBase<Real> &mat); // assignment operator.
+  
+  /// Copies contents to matrix.  Note: mat must have the correct size,
+  /// CopyToMat no longer attempts to resize it.
+  template<typename Real>
+  void CopyToMat(MatrixBase<Real> *mat) const;
+
+  void Write(std::ostream &os, bool binary) const;
+  
+  void Read(std::istream &is, bool binary);
+
+  /// Returns number of rows (or zero for emtpy matrix).
+  inline MatrixIndexT NumRows() const { return (data_ == NULL) ? 0 :
+      (*reinterpret_cast<GlobalHeader*>(data_)).num_rows; }
+
+  /// Returns number of columns (or zero for emtpy matrix).
+  inline MatrixIndexT NumCols() const { return (data_ == NULL) ? 0 :
+      (*reinterpret_cast<GlobalHeader*>(data_)).num_cols; }
+
+  /// Copies row #row of the matrix into vector v.
+  /// Note: v must have same size as #cols.
+  template<typename Real>
+  void CopyRowToVec(MatrixIndexT row, VectorBase<Real> *v) const;
+
+  /// Copies column #col of the matrix into vector v.
+  /// Note: v must have same size as #rows.
+  template<typename Real>
+  void CopyColToVec(MatrixIndexT col, VectorBase<Real> *v) const;
+
+  /// Copies submatrix of compressed matrix into matrix dest.
+  /// Submatrix starts at row row_offset and column column_offset and its size
+  /// is defined by size of provided matrix dest
+  template<typename Real>
+  void CopyToMat(int32 row_offset,
+                 int32 column_offset,
+                 MatrixBase<Real> *dest) const;
+
+  void Swap(CompressedMatrix *other) { std::swap(data_, other->data_); }
+  
+  friend class Matrix<float>;
+  friend class Matrix<double>;
+ private:
+
+  // allocates data using new [], ensures byte alignment
+  // sufficient for float.
+  static void *AllocateData(int32 num_bytes);
+
+  // the "format" will be 1 for the original format where each column has a
+  // PerColHeader, and 2 for the format now used for matrices with 8 or fewer
+  // rows, where everything is represented as 16-bit integers.
+  struct GlobalHeader {
+    int32 format;
+    float min_value;
+    float range;
+    int32 num_rows;
+    int32 num_cols;
+  };
+
+  static MatrixIndexT DataSize(const GlobalHeader &header);
+
+  struct PerColHeader {
+    uint16 percentile_0;
+    uint16 percentile_25;
+    uint16 percentile_75;
+    uint16 percentile_100;
+  };
+
+  template<typename Real>
+  static void CompressColumn(const GlobalHeader &global_header,
+                             const Real *data, MatrixIndexT stride,
+                             int32 num_rows, PerColHeader *header,
+                             unsigned char *byte_data);
+  template<typename Real>
+  static void ComputeColHeader(const GlobalHeader &global_header,
+                               const Real *data, MatrixIndexT stride,
+                               int32 num_rows, PerColHeader *header);
+
+  static inline uint16 FloatToUint16(const GlobalHeader &global_header,
+                                     float value);
+
+  static inline float Uint16ToFloat(const GlobalHeader &global_header,
+                                    uint16 value);
+  static inline unsigned char FloatToChar(float p0, float p25,
+                                          float p75, float p100,
+                                          float value);
+  static inline float CharToFloat(float p0, float p25,
+                                  float p75, float p100,
+                                  unsigned char value);
+  
+  void Destroy();
+  
+  void *data_; // first GlobalHeader, then PerColHeader (repeated), then
+  // the byte data for each column (repeated).  Note: don't intersperse
+  // the byte data with the PerColHeaders, because of alignment issues.
+
+};
+
+
+/// @} end of \addtogroup matrix_group
+
+
+}  // namespace kaldi
+
+
+#endif  // KALDI_MATRIX_COMPRESSED_MATRIX_H_
diff --git a/kaldi_io/src/kaldi/matrix/jama-eig.h b/kaldi_io/src/kaldi/matrix/jama-eig.h
new file mode 100644
index 0000000..c7278bc
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/jama-eig.h
@@ -0,0 +1,924 @@
+// matrix/jama-eig.h
+
+// Copyright 2009-2011 Microsoft Corporation 
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+// This file consists of a port and modification of materials from
+//   JAMA: A Java Matrix Package
+// under the following notice: This software is a cooperative product of
+// The MathWorks and the National Institute of Standards and Technology (NIST)
+// which has been released to the public.  This notice and the original code are
+// available at http://math.nist.gov/javanumerics/jama/domain.notice
+
+
+
+#ifndef KALDI_MATRIX_JAMA_EIG_H_
+#define KALDI_MATRIX_JAMA_EIG_H_ 1
+
+#include "matrix/kaldi-matrix.h"
+
+namespace kaldi {
+
+// This class is not to be used externally.  See the Eig function in the Matrix
+// class in kaldi-matrix.h.  This is the external interface.
+
+template<typename Real> class EigenvalueDecomposition {
+  // This class is based on the EigenvalueDecomposition class from the JAMA
+  // library (version 1.0.2).
+ public:
+  EigenvalueDecomposition(const MatrixBase<Real> &A);
+
+  ~EigenvalueDecomposition();  // free memory.
+
+  void GetV(MatrixBase<Real> *V_out) {  // V is what we call P externally; it's the matrix of
+    // eigenvectors.
+    KALDI_ASSERT(V_out->NumRows() == static_cast<MatrixIndexT>(n_)
+                 && V_out->NumCols() == static_cast<MatrixIndexT>(n_));
+    for (int i = 0; i < n_; i++)
+      for (int j = 0; j < n_; j++)
+        (*V_out)(i, j) = V(i, j);  // V(i, j) is member function.
+  }
+  void GetRealEigenvalues(VectorBase<Real> *r_out) {
+    // returns real part of eigenvalues.
+    KALDI_ASSERT(r_out->Dim() == static_cast<MatrixIndexT>(n_));
+    for (int i = 0; i < n_; i++)
+      (*r_out)(i) = d_[i];
+  }
+  void GetImagEigenvalues(VectorBase<Real> *i_out) {
+    // returns imaginary part of eigenvalues.
+    KALDI_ASSERT(i_out->Dim() == static_cast<MatrixIndexT>(n_));
+    for (int i = 0; i < n_; i++)
+      (*i_out)(i) = e_[i];
+  }
+ private:
+
+  inline Real &H(int r, int c) { return H_[r*n_ + c]; }
+  inline Real &V(int r, int c) { return V_[r*n_ + c]; }
+
+  // complex division
+  inline static void cdiv(Real xr, Real xi, Real yr, Real yi, Real *cdivr, Real *cdivi) {
+    Real r, d;
+    if (std::abs(yr) > std::abs(yi)) {
+      r = yi/yr;
+      d = yr + r*yi;
+      *cdivr = (xr + r*xi)/d;
+      *cdivi = (xi - r*xr)/d;
+    } else {
+      r = yr/yi;
+      d = yi + r*yr;
+      *cdivr = (r*xr + xi)/d;
+      *cdivi = (r*xi - xr)/d;
+    }
+  }
+
+  // Nonsymmetric reduction from Hessenberg to real Schur form.
+  void Hqr2 ();
+
+
+  int n_;  // matrix dimension.
+
+  Real *d_, *e_;  // real and imaginary parts of eigenvalues.
+  Real *V_;  // the eigenvectors (P in our external notation)
+  Real *H_;  // the nonsymmetric Hessenberg form.
+  Real *ort_;  // working storage for nonsymmetric algorithm.
+
+  // Symmetric Householder reduction to tridiagonal form.
+  void Tred2 ();
+
+  // Symmetric tridiagonal QL algorithm.
+  void Tql2 ();
+
+  // Nonsymmetric reduction to Hessenberg form.
+  void Orthes ();
+
+};
+
+template class EigenvalueDecomposition<float>;  // force instantiation.
+template class EigenvalueDecomposition<double>;  // force instantiation.
+
+template<typename Real> void  EigenvalueDecomposition<Real>::Tred2() {
+  //  This is derived from the Algol procedures tred2 by
+  //  Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
+  //  Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
+  //  Fortran subroutine in EISPACK.
+
+  for (int j = 0; j < n_; j++) {
+    d_[j] = V(n_-1, j);
+  }
+
+  // Householder reduction to tridiagonal form.
+
+  for (int i = n_-1; i > 0; i--) {
+
+    // Scale to avoid under/overflow.
+
+    Real scale = 0.0;
+    Real h = 0.0;
+    for (int k = 0; k < i; k++) {
+      scale = scale + std::abs(d_[k]);
+    }
+    if (scale == 0.0) {
+      e_[i] = d_[i-1];
+      for (int j = 0; j < i; j++) {
+        d_[j] = V(i-1, j);
+        V(i, j) = 0.0;
+        V(j, i) = 0.0;
+      }
+    } else {
+
+      // Generate Householder vector.
+
+      for (int k = 0; k < i; k++) {
+        d_[k] /= scale;
+        h += d_[k] * d_[k];
+      }
+      Real f = d_[i-1];
+      Real g = std::sqrt(h);
+      if (f > 0) {
+        g = -g;
+      }
+      e_[i] = scale * g;
+      h = h - f * g;
+      d_[i-1] = f - g;
+      for (int j = 0; j < i; j++) {
+        e_[j] = 0.0;
+      }
+
+      // Apply similarity transformation to remaining columns.
+
+      for (int j = 0; j < i; j++) {
+        f = d_[j];
+        V(j, i) = f;
+        g =e_[j] + V(j, j) * f;
+        for (int k = j+1; k <= i-1; k++) {
+          g += V(k, j) * d_[k];
+          e_[k] += V(k, j) * f;
+        }
+        e_[j] = g;
+      }
+      f = 0.0;
+      for (int j = 0; j < i; j++) {
+        e_[j] /= h;
+        f += e_[j] * d_[j];
+      }
+      Real hh = f / (h + h);
+      for (int j = 0; j < i; j++) {
+        e_[j] -= hh * d_[j];
+      }
+      for (int j = 0; j < i; j++) {
+        f = d_[j];
+        g = e_[j];
+        for (int k = j; k <= i-1; k++) {
+          V(k, j) -= (f * e_[k] + g * d_[k]);
+        }
+        d_[j] = V(i-1, j);
+        V(i, j) = 0.0;
+      }
+    }
+    d_[i] = h;
+  }
+
+  // Accumulate transformations.
+
+  for (int i = 0; i < n_-1; i++) {
+    V(n_-1, i) = V(i, i);
+    V(i, i) = 1.0;
+    Real h = d_[i+1];
+    if (h != 0.0) {
+      for (int k = 0; k <= i; k++) {
+        d_[k] = V(k, i+1) / h;
+      }
+      for (int j = 0; j <= i; j++) {
+        Real g = 0.0;
+        for (int k = 0; k <= i; k++) {
+          g += V(k, i+1) * V(k, j);
+        }
+        for (int k = 0; k <= i; k++) {
+          V(k, j) -= g * d_[k];
+        }
+      }
+    }
+    for (int k = 0; k <= i; k++) {
+      V(k, i+1) = 0.0;
+    }
+  }
+  for (int j = 0; j < n_; j++) {
+    d_[j] = V(n_-1, j);
+    V(n_-1, j) = 0.0;
+  }
+  V(n_-1, n_-1) = 1.0;
+   e_[0] = 0.0;
+}
+
+template<typename Real> void EigenvalueDecomposition<Real>::Tql2() {
+  //  This is derived from the Algol procedures tql2, by
+  //  Bowdler, Martin, Reinsch, and Wilkinson, Handbook for
+  //  Auto. Comp., Vol.ii-Linear Algebra, and the corresponding
+  //  Fortran subroutine in EISPACK.
+
+  for (int i = 1; i < n_; i++) {
+     e_[i-1] = e_[i];
+  }
+   e_[n_-1] = 0.0;
+
+  Real f = 0.0;
+  Real tst1 = 0.0;
+  Real eps = std::numeric_limits<Real>::epsilon();
+  for (int l = 0; l < n_; l++) {
+
+    // Find small subdiagonal element
+
+    tst1 = std::max(tst1, std::abs(d_[l]) + std::abs(e_[l]));
+    int m = l;
+    while (m < n_) {
+      if (std::abs(e_[m]) <= eps*tst1) {
+        break;
+      }
+      m++;
+    }
+
+    // If m == l, d_[l] is an eigenvalue,
+    // otherwise, iterate.
+
+    if (m > l) {
+      int iter = 0;
+      do {
+        iter = iter + 1;  // (Could check iteration count here.)
+
+        // Compute implicit shift
+
+        Real g = d_[l];
+        Real p = (d_[l+1] - g) / (2.0 *e_[l]);
+        Real r = Hypot(p, static_cast<Real>(1.0));  // This is a Kaldi version of hypot that works with templates.
+        if (p < 0) {
+          r = -r;
+        }
+        d_[l] =e_[l] / (p + r);
+        d_[l+1] =e_[l] * (p + r);
+        Real dl1 = d_[l+1];
+        Real h = g - d_[l];
+        for (int i = l+2; i < n_; i++) {
+          d_[i] -= h;
+        }
+        f = f + h;
+
+        // Implicit QL transformation.
+
+        p = d_[m];
+        Real c = 1.0;
+        Real c2 = c;
+        Real c3 = c;
+        Real el1 =e_[l+1];
+        Real s = 0.0;
+        Real s2 = 0.0;
+        for (int i = m-1; i >= l; i--) {
+          c3 = c2;
+          c2 = c;
+          s2 = s;
+          g = c *e_[i];
+          h = c * p;
+          r = Hypot(p, e_[i]);  // This is a Kaldi version of Hypot that works with templates.
+          e_[i+1] = s * r;
+          s =e_[i] / r;
+          c = p / r;
+          p = c * d_[i] - s * g;
+          d_[i+1] = h + s * (c * g + s * d_[i]);
+
+          // Accumulate transformation.
+
+          for (int k = 0; k < n_; k++) {
+            h = V(k, i+1);
+            V(k, i+1) = s * V(k, i) + c * h;
+            V(k, i) = c * V(k, i) - s * h;
+          }
+        }
+        p = -s * s2 * c3 * el1 *e_[l] / dl1;
+        e_[l] = s * p;
+        d_[l] = c * p;
+
+        // Check for convergence.
+
+      } while (std::abs(e_[l]) > eps*tst1);
+    }
+    d_[l] = d_[l] + f;
+    e_[l] = 0.0;
+  }
+
+  // Sort eigenvalues and corresponding vectors.
+
+  for (int i = 0; i < n_-1; i++) {
+    int k = i;
+    Real p = d_[i];
+    for (int j = i+1; j < n_; j++) {
+      if (d_[j] < p) {
+        k = j;
+        p = d_[j];
+      }
+    }
+    if (k != i) {
+      d_[k] = d_[i];
+      d_[i] = p;
+      for (int j = 0; j < n_; j++) {
+        p = V(j, i);
+        V(j, i) = V(j, k);
+        V(j, k) = p;
+      }
+    }
+  }
+}
+
+template<typename Real>
+void EigenvalueDecomposition<Real>::Orthes() {
+
+  //  This is derived from the Algol procedures orthes and ortran,
+  //  by Martin and Wilkinson, Handbook for Auto. Comp.,
+  //  Vol.ii-Linear Algebra, and the corresponding
+  //  Fortran subroutines in EISPACK.
+
+  int low = 0;
+  int high = n_-1;
+
+  for (int m = low+1; m <= high-1; m++) {
+
+    // Scale column.
+
+    Real scale = 0.0;
+    for (int i = m; i <= high; i++) {
+      scale = scale + std::abs(H(i, m-1));
+    }
+    if (scale != 0.0) {
+
+      // Compute Householder transformation.
+
+      Real h = 0.0;
+      for (int i = high; i >= m; i--) {
+        ort_[i] = H(i, m-1)/scale;
+        h += ort_[i] * ort_[i];
+      }
+      Real g = std::sqrt(h);
+      if (ort_[m] > 0) {
+        g = -g;
+      }
+      h = h - ort_[m] * g;
+      ort_[m] = ort_[m] - g;
+
+      // Apply Householder similarity transformation
+      // H = (I-u*u'/h)*H*(I-u*u')/h)
+
+      for (int j = m; j < n_; j++) {
+        Real f = 0.0;
+        for (int i = high; i >= m; i--) {
+          f += ort_[i]*H(i, j);
+        }
+        f = f/h;
+        for (int i = m; i <= high; i++) {
+          H(i, j) -= f*ort_[i];
+        }
+      }
+
+      for (int i = 0; i <= high; i++) {
+        Real f = 0.0;
+        for (int j = high; j >= m; j--) {
+          f += ort_[j]*H(i, j);
+        }
+        f = f/h;
+        for (int j = m; j <= high; j++) {
+          H(i, j) -= f*ort_[j];
+        }
+      }
+      ort_[m] = scale*ort_[m];
+      H(m, m-1) = scale*g;
+    }
+  }
+
+  // Accumulate transformations (Algol's ortran).
+
+  for (int i = 0; i < n_; i++) {
+    for (int j = 0; j < n_; j++) {
+      V(i, j) = (i == j ? 1.0 : 0.0);
+    }
+  }
+
+  for (int m = high-1; m >= low+1; m--) {
+    if (H(m, m-1) != 0.0) {
+      for (int i = m+1; i <= high; i++) {
+        ort_[i] = H(i, m-1);
+      }
+      for (int j = m; j <= high; j++) {
+        Real g = 0.0;
+        for (int i = m; i <= high; i++) {
+          g += ort_[i] * V(i, j);
+        }
+        // Double division avoids possible underflow
+        g = (g / ort_[m]) / H(m, m-1);
+        for (int i = m; i <= high; i++) {
+          V(i, j) += g * ort_[i];
+        }
+      }
+    }
+  }
+}
+
+template<typename Real> void  EigenvalueDecomposition<Real>::Hqr2() {
+  //  This is derived from the Algol procedure hqr2,
+  //  by Martin and Wilkinson, Handbook for Auto. Comp.,
+  //  Vol.ii-Linear Algebra, and the corresponding
+  //  Fortran subroutine in EISPACK.
+
+  int nn = n_;
+  int n = nn-1;
+  int low = 0;
+  int high = nn-1;
+  Real eps = std::numeric_limits<Real>::epsilon();
+  Real exshift = 0.0;
+  Real p = 0, q = 0, r = 0, s = 0, z=0, t, w, x, y;
+
+  // Store roots isolated by balanc and compute matrix norm
+
+  Real norm = 0.0;
+  for (int i = 0; i < nn; i++) {
+    if (i < low || i > high) {
+      d_[i] = H(i, i);
+      e_[i] = 0.0;
+    }
+    for (int j = std::max(i-1, 0); j < nn; j++) {
+      norm = norm + std::abs(H(i, j));
+    }
+  }
+
+  // Outer loop over eigenvalue index
+
+  int iter = 0;
+  while (n >= low) {
+
+    // Look for single small sub-diagonal element
+
+    int l = n;
+    while (l > low) {
+      s = std::abs(H(l-1, l-1)) + std::abs(H(l, l));
+      if (s == 0.0) {
+        s = norm;
+      }
+      if (std::abs(H(l, l-1)) < eps * s) {
+        break;
+      }
+      l--;
+    }
+
+    // Check for convergence
+    // One root found
+
+    if (l == n) {
+      H(n, n) = H(n, n) + exshift;
+      d_[n] = H(n, n);
+      e_[n] = 0.0;
+      n--;
+      iter = 0;
+
+      // Two roots found
+
+    } else if (l == n-1) {
+      w = H(n, n-1) * H(n-1, n);
+      p = (H(n-1, n-1) - H(n, n)) / 2.0;
+      q = p * p + w;
+      z = std::sqrt(std::abs(q));
+      H(n, n) = H(n, n) + exshift;
+      H(n-1, n-1) = H(n-1, n-1) + exshift;
+      x = H(n, n);
+
+      // Real pair
+
+      if (q >= 0) {
+        if (p >= 0) {
+          z = p + z;
+        } else {
+          z = p - z;
+        }
+        d_[n-1] = x + z;
+        d_[n] = d_[n-1];
+        if (z != 0.0) {
+          d_[n] = x - w / z;
+        }
+        e_[n-1] = 0.0;
+        e_[n] = 0.0;
+        x = H(n, n-1);
+        s = std::abs(x) + std::abs(z);
+        p = x / s;
+        q = z / s;
+        r = std::sqrt(p * p+q * q);
+        p = p / r;
+        q = q / r;
+
+        // Row modification
+
+        for (int j = n-1; j < nn; j++) {
+          z = H(n-1, j);
+          H(n-1, j) = q * z + p * H(n, j);
+          H(n, j) = q * H(n, j) - p * z;
+        }
+
+        // Column modification
+
+        for (int i = 0; i <= n; i++) {
+          z = H(i, n-1);
+          H(i, n-1) = q * z + p * H(i, n);
+          H(i, n) = q * H(i, n) - p * z;
+        }
+
+        // Accumulate transformations
+
+        for (int i = low; i <= high; i++) {
+          z = V(i, n-1);
+          V(i, n-1) = q * z + p * V(i, n);
+          V(i, n) = q * V(i, n) - p * z;
+        }
+
+        // Complex pair
+
+      } else {
+        d_[n-1] = x + p;
+        d_[n] = x + p;
+        e_[n-1] = z;
+        e_[n] = -z;
+      }
+      n = n - 2;
+      iter = 0;
+
+      // No convergence yet
+
+    } else {
+
+      // Form shift
+
+      x = H(n, n);
+      y = 0.0;
+      w = 0.0;
+      if (l < n) {
+        y = H(n-1, n-1);
+        w = H(n, n-1) * H(n-1, n);
+      }
+
+      // Wilkinson's original ad hoc shift
+
+      if (iter == 10) {
+        exshift += x;
+        for (int i = low; i <= n; i++) {
+          H(i, i) -= x;
+        }
+        s = std::abs(H(n, n-1)) + std::abs(H(n-1, n-2));
+        x = y = 0.75 * s;
+        w = -0.4375 * s * s;
+      }
+
+      // MATLAB's new ad hoc shift
+
+      if (iter == 30) {
+        s = (y - x) / 2.0;
+        s = s * s + w;
+        if (s > 0) {
+          s = std::sqrt(s);
+          if (y < x) {
+            s = -s;
+          }
+          s = x - w / ((y - x) / 2.0 + s);
+          for (int i = low; i <= n; i++) {
+            H(i, i) -= s;
+          }
+          exshift += s;
+          x = y = w = 0.964;
+        }
+      }
+
+      iter = iter + 1;   // (Could check iteration count here.)
+
+      // Look for two consecutive small sub-diagonal elements
+
+      int m = n-2;
+      while (m >= l) {
+        z = H(m, m);
+        r = x - z;
+        s = y - z;
+        p = (r * s - w) / H(m+1, m) + H(m, m+1);
+        q = H(m+1, m+1) - z - r - s;
+        r = H(m+2, m+1);
+        s = std::abs(p) + std::abs(q) + std::abs(r);
+        p = p / s;
+        q = q / s;
+        r = r / s;
+        if (m == l) {
+          break;
+        }
+        if (std::abs(H(m, m-1)) * (std::abs(q) + std::abs(r)) <
+            eps * (std::abs(p) * (std::abs(H(m-1, m-1)) + std::abs(z) +
+                                  std::abs(H(m+1, m+1))))) {
+          break;
+        }
+        m--;
+      }
+
+      for (int i = m+2; i <= n; i++) {
+        H(i, i-2) = 0.0;
+        if (i > m+2) {
+          H(i, i-3) = 0.0;
+        }
+      }
+
+      // Double QR step involving rows l:n and columns m:n
+
+      for (int k = m; k <= n-1; k++) {
+        bool notlast = (k != n-1);
+        if (k != m) {
+          p = H(k, k-1);
+          q = H(k+1, k-1);
+          r = (notlast ? H(k+2, k-1) : 0.0);
+          x = std::abs(p) + std::abs(q) + std::abs(r);
+          if (x != 0.0) {
+            p = p / x;
+            q = q / x;
+            r = r / x;
+          }
+        }
+        if (x == 0.0) {
+          break;
+        }
+        s = std::sqrt(p * p + q * q + r * r);
+        if (p < 0) {
+          s = -s;
+        }
+        if (s != 0) {
+          if (k != m) {
+            H(k, k-1) = -s * x;
+          } else if (l != m) {
+            H(k, k-1) = -H(k, k-1);
+          }
+          p = p + s;
+          x = p / s;
+          y = q / s;
+          z = r / s;
+          q = q / p;
+          r = r / p;
+
+          // Row modification
+
+          for (int j = k; j < nn; j++) {
+            p = H(k, j) + q * H(k+1, j);
+            if (notlast) {
+              p = p + r * H(k+2, j);
+              H(k+2, j) = H(k+2, j) - p * z;
+            }
+            H(k, j) = H(k, j) - p * x;
+            H(k+1, j) = H(k+1, j) - p * y;
+          }
+
+          // Column modification
+
+          for (int i = 0; i <= std::min(n, k+3); i++) {
+            p = x * H(i, k) + y * H(i, k+1);
+            if (notlast) {
+              p = p + z * H(i, k+2);
+              H(i, k+2) = H(i, k+2) - p * r;
+            }
+            H(i, k) = H(i, k) - p;
+            H(i, k+1) = H(i, k+1) - p * q;
+          }
+
+          // Accumulate transformations
+
+          for (int i = low; i <= high; i++) {
+            p = x * V(i, k) + y * V(i, k+1);
+            if (notlast) {
+              p = p + z * V(i, k+2);
+              V(i, k+2) = V(i, k+2) - p * r;
+            }
+            V(i, k) = V(i, k) - p;
+            V(i, k+1) = V(i, k+1) - p * q;
+          }
+        }  // (s != 0)
+      }  // k loop
+    }  // check convergence
+  }  // while (n >= low)
+
+  // Backsubstitute to find vectors of upper triangular form
+
+  if (norm == 0.0) {
+    return;
+  }
+
+  for (n = nn-1; n >= 0; n--) {
+    p = d_[n];
+    q = e_[n];
+
+    // Real vector
+
+    if (q == 0) {
+      int l = n;
+      H(n, n) = 1.0;
+      for (int i = n-1; i >= 0; i--) {
+        w = H(i, i) - p;
+        r = 0.0;
+        for (int j = l; j <= n; j++) {
+          r = r + H(i, j) * H(j, n);
+        }
+        if (e_[i] < 0.0) {
+          z = w;
+          s = r;
+        } else {
+          l = i;
+          if (e_[i] == 0.0) {
+            if (w != 0.0) {
+              H(i, n) = -r / w;
+            } else {
+              H(i, n) = -r / (eps * norm);
+            }
+
+            // Solve real equations
+
+          } else {
+            x = H(i, i+1);
+            y = H(i+1, i);
+            q = (d_[i] - p) * (d_[i] - p) +e_[i] *e_[i];
+            t = (x * s - z * r) / q;
+            H(i, n) = t;
+            if (std::abs(x) > std::abs(z)) {
+              H(i+1, n) = (-r - w * t) / x;
+            } else {
+              H(i+1, n) = (-s - y * t) / z;
+            }
+          }
+
+          // Overflow control
+
+          t = std::abs(H(i, n));
+          if ((eps * t) * t > 1) {
+            for (int j = i; j <= n; j++) {
+              H(j, n) = H(j, n) / t;
+            }
+          }
+        }
+      }
+
+      // Complex vector
+
+    } else if (q < 0) {
+      int l = n-1;
+
+      // Last vector component imaginary so matrix is triangular
+
+      if (std::abs(H(n, n-1)) > std::abs(H(n-1, n))) {
+        H(n-1, n-1) = q / H(n, n-1);
+        H(n-1, n) = -(H(n, n) - p) / H(n, n-1);
+      } else {
+        Real cdivr, cdivi;
+        cdiv(0.0, -H(n-1, n), H(n-1, n-1)-p, q, &cdivr, &cdivi);
+        H(n-1, n-1) = cdivr;
+        H(n-1, n) = cdivi;
+      }
+      H(n, n-1) = 0.0;
+      H(n, n) = 1.0;
+      for (int i = n-2; i >= 0; i--) {
+        Real ra, sa, vr, vi;
+        ra = 0.0;
+        sa = 0.0;
+        for (int j = l; j <= n; j++) {
+          ra = ra + H(i, j) * H(j, n-1);
+          sa = sa + H(i, j) * H(j, n);
+        }
+        w = H(i, i) - p;
+
+        if (e_[i] < 0.0) {
+          z = w;
+          r = ra;
+          s = sa;
+        } else {
+          l = i;
+          if (e_[i] == 0) {
+            Real cdivr, cdivi;
+            cdiv(-ra, -sa, w, q, &cdivr, &cdivi);
+            H(i, n-1) = cdivr;
+            H(i, n) = cdivi;
+          } else {
+            Real cdivr, cdivi;
+            // Solve complex equations
+
+            x = H(i, i+1);
+            y = H(i+1, i);
+            vr = (d_[i] - p) * (d_[i] - p) +e_[i] *e_[i] - q * q;
+            vi = (d_[i] - p) * 2.0 * q;
+            if (vr == 0.0 && vi == 0.0) {
+              vr = eps * norm * (std::abs(w) + std::abs(q) +
+                                 std::abs(x) + std::abs(y) + std::abs(z));
+            }
+            cdiv(x*r-z*ra+q*sa, x*s-z*sa-q*ra, vr, vi, &cdivr, &cdivi);
+            H(i, n-1) = cdivr;
+            H(i, n) = cdivi;
+            if (std::abs(x) > (std::abs(z) + std::abs(q))) {
+              H(i+1, n-1) = (-ra - w * H(i, n-1) + q * H(i, n)) / x;
+              H(i+1, n) = (-sa - w * H(i, n) - q * H(i, n-1)) / x;
+            } else {
+              cdiv(-r-y*H(i, n-1), -s-y*H(i, n), z, q, &cdivr, &cdivi);
+              H(i+1, n-1) = cdivr;
+              H(i+1, n) = cdivi;
+            }
+          }
+
+          // Overflow control
+
+          t = std::max(std::abs(H(i, n-1)), std::abs(H(i, n)));
+          if ((eps * t) * t > 1) {
+            for (int j = i; j <= n; j++) {
+              H(j, n-1) = H(j, n-1) / t;
+              H(j, n) = H(j, n) / t;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Vectors of isolated roots
+
+  for (int i = 0; i < nn; i++) {
+    if (i < low || i > high) {
+      for (int j = i; j < nn; j++) {
+        V(i, j) = H(i, j);
+      }
+    }
+  }
+
+  // Back transformation to get eigenvectors of original matrix
+
+  for (int j = nn-1; j >= low; j--) {
+    for (int i = low; i <= high; i++) {
+      z = 0.0;
+      for (int k = low; k <= std::min(j, high); k++) {
+        z = z + V(i, k) * H(k, j);
+      }
+      V(i, j) = z;
+    }
+  }
+}
+
+template<typename Real>
+EigenvalueDecomposition<Real>::EigenvalueDecomposition(const MatrixBase<Real> &A) {
+  KALDI_ASSERT(A.NumCols() == A.NumRows() && A.NumCols() >= 1);
+  n_ = A.NumRows();
+  V_ = new Real[n_*n_];
+  d_ = new Real[n_];
+  e_ = new Real[n_];
+  H_ = NULL;
+  ort_ = NULL;
+  if (A.IsSymmetric(0.0)) {
+
+    for (int i = 0; i < n_; i++)
+      for (int j = 0; j < n_; j++)
+        V(i, j) = A(i, j);  // Note that V(i, j) is a member function; A(i, j) is an operator
+    // of the matrix A.
+    // Tridiagonalize.
+    Tred2();
+
+    // Diagonalize.
+    Tql2();
+  } else {
+    H_ = new Real[n_*n_];
+    ort_ = new Real[n_];
+    for (int i = 0; i < n_; i++)
+      for (int j = 0; j < n_; j++)
+        H(i, j) = A(i, j);  // as before: H is member function, A(i, j) is operator of matrix.
+
+    // Reduce to Hessenberg form.
+    Orthes();
+
+    // Reduce Hessenberg to real Schur form.
+    Hqr2();
+  }
+}
+
+template<typename Real>
+EigenvalueDecomposition<Real>::~EigenvalueDecomposition() {
+  delete [] d_;
+  delete [] e_;
+  delete [] V_;
+  if (H_) delete [] H_;
+  if (ort_) delete [] ort_;
+}
+
+// see function MatrixBase<Real>::Eig in kaldi-matrix.cc
+
+
+} // namespace kaldi
+
+#endif // KALDI_MATRIX_JAMA_EIG_H_
diff --git a/kaldi_io/src/kaldi/matrix/jama-svd.h b/kaldi_io/src/kaldi/matrix/jama-svd.h
new file mode 100644
index 0000000..8304dac
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/jama-svd.h
@@ -0,0 +1,531 @@
+// matrix/jama-svd.h
+
+// Copyright 2009-2011 Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+// This file consists of a port and modification of materials from
+//   JAMA: A Java Matrix Package
+// under the following notice: This software is a cooperative product of
+// The MathWorks and the National Institute of Standards and Technology (NIST)
+// which has been released to the public.  This notice and the original code are
+// available at http://math.nist.gov/javanumerics/jama/domain.notice
+
+
+#ifndef KALDI_MATRIX_JAMA_SVD_H_
+#define KALDI_MATRIX_JAMA_SVD_H_ 1
+
+
+#include "matrix/kaldi-matrix.h"
+#include "matrix/sp-matrix.h"
+#include "matrix/cblas-wrappers.h"
+
+namespace kaldi {
+
+#if defined(HAVE_ATLAS) || defined(USE_KALDI_SVD)
+// using ATLAS as our math library, which doesn't have SVD -> need
+// to implement it.
+
+// This routine is a modified form of jama_svd.h which is part of the TNT distribution.
+// (originally comes from JAMA).
+
+/** Singular Value Decomposition.
+ * <P>
+ * For an m-by-n matrix A with m >= n, the singular value decomposition is
+ * an m-by-n orthogonal matrix U, an n-by-n diagonal matrix S, and
+ * an n-by-n orthogonal matrix V so that A = U*S*V'.
+ * <P>
+ * The singular values, sigma[k] = S(k, k), are ordered so that
+ * sigma[0] >= sigma[1] >= ... >= sigma[n-1].
+ * <P>
+ * The singular value decompostion always exists, so the constructor will
+ * never fail.  The matrix condition number and the effective numerical
+ * rank can be computed from this decomposition.
+
+ * <p>
+ *     (Adapted from JAMA, a Java Matrix Library, developed by jointly
+ *     by the Mathworks and NIST; see  http://math.nist.gov/javanumerics/jama).
+ */
+
+
+template<typename Real>
+bool MatrixBase<Real>::JamaSvd(VectorBase<Real> *s_in,
+                               MatrixBase<Real> *U_in,
+                               MatrixBase<Real> *V_in) {  //  Destructive!
+  KALDI_ASSERT(s_in != NULL && U_in != this && V_in != this);
+  int wantu = (U_in != NULL), wantv = (V_in != NULL);
+  Matrix<Real> Utmp, Vtmp;
+  MatrixBase<Real> &U = (U_in ? *U_in : Utmp), &V = (V_in ? *V_in : Vtmp);
+  VectorBase<Real> &s = *s_in;
+
+  int m = num_rows_, n = num_cols_;
+  KALDI_ASSERT(m>=n && m != 0 && n != 0);
+  if (wantu) KALDI_ASSERT((int)U.num_rows_ == m && (int)U.num_cols_ == n);
+  if (wantv) KALDI_ASSERT((int)V.num_rows_ == n && (int)V.num_cols_ == n);
+  KALDI_ASSERT((int)s.Dim() == n);  // n<=m so n is min.
+
+  int nu = n;
+  U.SetZero();  // make sure all zero.
+  Vector<Real> e(n);
+  Vector<Real> work(m);
+  MatrixBase<Real> &A(*this);
+  Real *adata = A.Data(), *workdata = work.Data(), *edata = e.Data(),
+      *udata = U.Data(), *vdata = V.Data();
+  int astride = static_cast<int>(A.Stride()),
+      ustride = static_cast<int>(U.Stride()),
+      vstride = static_cast<int>(V.Stride());
+  int i = 0, j = 0, k = 0;
+
+  // Reduce A to bidiagonal form, storing the diagonal elements
+  // in s and the super-diagonal elements in e.
+
+  int nct = std::min(m-1, n);
+  int nrt = std::max(0, std::min(n-2, m));
+  for (k = 0; k < std::max(nct, nrt); k++) {
+    if (k < nct) {
+
+      // Compute the transformation for the k-th column and
+      // place the k-th diagonal in s(k).
+      // Compute 2-norm of k-th column without under/overflow.
+      s(k) = 0;
+      for (i = k; i < m; i++) {
+        s(k) = hypot(s(k), A(i, k));
+      }
+      if (s(k) != 0.0) {
+        if (A(k, k) < 0.0) {
+          s(k) = -s(k);
+        }
+        for (i = k; i < m; i++) {
+          A(i, k) /= s(k);
+        }
+        A(k, k) += 1.0;
+      }
+      s(k) = -s(k);
+    }
+    for (j = k+1; j < n; j++) {
+      if ((k < nct) && (s(k) != 0.0))  {
+
+        // Apply the transformation.
+
+        Real t = cblas_Xdot(m - k, adata + astride*k + k, astride,
+                            adata + astride*k + j, astride);
+        /*for (i = k; i < m; i++) {
+          t += adata[i*astride + k]*adata[i*astride + j];  //   A(i, k)*A(i, j); // 3
+          }*/
+        t = -t/A(k, k);
+        cblas_Xaxpy(m - k, t, adata + k*astride + k, astride,
+                    adata + k*astride + j, astride);
+        /*for (i = k; i < m; i++) {
+          adata[i*astride + j] += t*adata[i*astride + k];  // A(i, j) += t*A(i, k); // 5
+          }*/
+      }
+
+      // Place the k-th row of A into e for the
+      // subsequent calculation of the row transformation.
+
+      e(j) = A(k, j);
+    }
+    if (wantu & (k < nct)) {
+
+      // Place the transformation in U for subsequent back
+      // multiplication.
+
+      for (i = k; i < m; i++) {
+        U(i, k) = A(i, k);
+      }
+    }
+    if (k < nrt) {
+
+      // Compute the k-th row transformation and place the
+      // k-th super-diagonal in e(k).
+      // Compute 2-norm without under/overflow.
+      e(k) = 0;
+      for (i = k+1; i < n; i++) {
+        e(k) = hypot(e(k), e(i));
+      }
+      if (e(k) != 0.0) {
+        if (e(k+1) < 0.0) {
+          e(k) = -e(k);
+        }
+        for (i = k+1; i < n; i++) {
+          e(i) /= e(k);
+        }
+        e(k+1) += 1.0;
+      }
+      e(k) = -e(k);
+      if ((k+1 < m) & (e(k) != 0.0)) {
+
+        // Apply the transformation.
+
+        for (i = k+1; i < m; i++) {
+          work(i) = 0.0;
+        }
+        for (j = k+1; j < n; j++) {
+          for (i = k+1; i < m; i++) {
+            workdata[i] += edata[j] * adata[i*astride + j];  // work(i) += e(j)*A(i, j); // 5
+          }
+        }
+        for (j = k+1; j < n; j++) {
+          Real t(-e(j)/e(k+1));
+          cblas_Xaxpy(m - (k+1), t, workdata + (k+1), 1,
+                      adata + (k+1)*astride + j, astride);
+          /*
+          for (i = k+1; i < m; i++) {
+            adata[i*astride + j] += t*workdata[i];  // A(i, j) += t*work(i); // 5
+            }*/
+        }
+      }
+      if (wantv) {
+
+        // Place the transformation in V for subsequent
+        // back multiplication.
+
+        for (i = k+1; i < n; i++) {
+          V(i, k) = e(i);
+        }
+      }
+    }
+  }
+
+  // Set up the final bidiagonal matrix or order p.
+
+  int p = std::min(n, m+1);
+  if (nct < n) {
+    s(nct) = A(nct, nct);
+  }
+  if (m < p) {
+    s(p-1) = 0.0;
+  }
+  if (nrt+1 < p) {
+    e(nrt) = A(nrt, p-1);
+  }
+  e(p-1) = 0.0;
+
+  // If required, generate U.
+
+  if (wantu) {
+    for (j = nct; j < nu; j++) {
+      for (i = 0; i < m; i++) {
+        U(i, j) = 0.0;
+      }
+      U(j, j) = 1.0;
+    }
+    for (k = nct-1; k >= 0; k--) {
+      if (s(k) != 0.0) {
+        for (j = k+1; j < nu; j++) {
+          Real t = cblas_Xdot(m - k, udata + k*ustride + k, ustride, udata + k*ustride + j, ustride);
+          //for (i = k; i < m; i++) {
+          //  t += udata[i*ustride + k]*udata[i*ustride + j];  // t += U(i, k)*U(i, j); // 8
+          // }
+          t = -t/U(k, k);
+          cblas_Xaxpy(m - k, t, udata + ustride*k + k, ustride,
+                      udata + k*ustride + j, ustride);
+          /*for (i = k; i < m; i++) {
+            udata[i*ustride + j] += t*udata[i*ustride + k];  // U(i, j) += t*U(i, k); // 4
+            }*/
+        }
+        for (i = k; i < m; i++ ) {
+          U(i, k) = -U(i, k);
+        }
+        U(k, k) = 1.0 + U(k, k);
+        for (i = 0; i < k-1; i++) {
+          U(i, k) = 0.0;
+        }
+      } else {
+        for (i = 0; i < m; i++) {
+          U(i, k) = 0.0;
+        }
+        U(k, k) = 1.0;
+      }
+    }
+  }
+
+  // If required, generate V.
+
+  if (wantv) {
+    for (k = n-1; k >= 0; k--) {
+      if ((k < nrt) & (e(k) != 0.0)) {
+        for (j = k+1; j < nu; j++) {
+          Real t = cblas_Xdot(n - (k+1), vdata + (k+1)*vstride + k, vstride,
+                              vdata + (k+1)*vstride + j, vstride); 
+          /*Real t (0.0);
+          for (i = k+1; i < n; i++) {
+            t += vdata[i*vstride + k]*vdata[i*vstride + j];  // t += V(i, k)*V(i, j); // 7
+            }*/
+          t = -t/V(k+1, k);
+          cblas_Xaxpy(n - (k+1), t, vdata + (k+1)*vstride + k, vstride,
+                      vdata + (k+1)*vstride + j, vstride);
+          /*for (i = k+1; i < n; i++) {
+            vdata[i*vstride + j] += t*vdata[i*vstride + k];  // V(i, j) += t*V(i, k); // 7
+            }*/
+        }
+      }
+      for (i = 0; i < n; i++) {
+        V(i, k) = 0.0;
+      }
+      V(k, k) = 1.0;
+    }
+  }
+
+  // Main iteration loop for the singular values.
+
+  int pp = p-1;
+  int iter = 0;
+  // note: -52.0 is from Jama code; the -23 is the extension
+  // to float, because mantissa length in (double, float)
+  // is (52, 23) bits respectively.
+  Real eps(pow(2.0, sizeof(Real) == 4 ? -23.0 : -52.0));
+  // Note: the -966 was taken from Jama code, but the -120 is a guess
+  // of how to extend this to float... the exponent in double goes
+  // from -1022 .. 1023, and in float from -126..127.  I'm not sure
+  // what the significance of 966 is, so -120 just represents a number
+  // that's a bit less negative than -126.  If we get convergence
+  // failure in float only, this may mean that we have to make the
+  // -120 value less negative.
+  Real tiny(pow(2.0, sizeof(Real) == 4 ? -120.0: -966.0 ));
+  
+  while (p > 0) {
+    int k = 0;
+    int kase = 0;
+
+    if (iter == 500 || iter == 750) {
+      KALDI_WARN << "Svd taking a long time: making convergence criterion less exact.";
+      eps = pow(static_cast<Real>(0.8), eps);
+      tiny = pow(static_cast<Real>(0.8), tiny);
+    }
+    if (iter > 1000) {
+      KALDI_WARN << "Svd not converging on matrix of size " << m << " by " <<n;
+      return false;
+    }
+
+    // This section of the program inspects for
+    // negligible elements in the s and e arrays.  On
+    // completion the variables kase and k are set as follows.
+
+    // kase = 1     if s(p) and e(k-1) are negligible and k < p
+    // kase = 2     if s(k) is negligible and k < p
+    // kase = 3     if e(k-1) is negligible, k < p, and
+    //              s(k), ..., s(p) are not negligible (qr step).
+    // kase = 4     if e(p-1) is negligible (convergence).
+
+    for (k = p-2; k >= -1; k--) {
+      if (k == -1) {
+        break;
+      }
+      if (std::abs(e(k)) <=
+          tiny + eps*(std::abs(s(k)) + std::abs(s(k+1)))) {
+        e(k) = 0.0;
+        break;
+      }
+    }
+    if (k == p-2) {
+      kase = 4;
+    } else {
+      int ks;
+      for (ks = p-1; ks >= k; ks--) {
+        if (ks == k) {
+          break;
+        }
+        Real t( (ks != p ? std::abs(e(ks)) : 0.) +
+                (ks != k+1 ? std::abs(e(ks-1)) : 0.));
+        if (std::abs(s(ks)) <= tiny + eps*t)  {
+          s(ks) = 0.0;
+          break;
+        }
+      }
+      if (ks == k) {
+        kase = 3;
+      } else if (ks == p-1) {
+        kase = 1;
+      } else {
+        kase = 2;
+        k = ks;
+      }
+    }
+    k++;
+
+    // Perform the task indicated by kase.
+
+    switch (kase) {
+
+      // Deflate negligible s(p).
+
+      case 1: {
+        Real f(e(p-2));
+        e(p-2) = 0.0;
+        for (j = p-2; j >= k; j--) {
+          Real t( hypot(s(j), f));
+          Real cs(s(j)/t);
+          Real sn(f/t);
+          s(j) = t;
+          if (j != k) {
+            f = -sn*e(j-1);
+            e(j-1) = cs*e(j-1);
+          }
+          if (wantv) {
+            for (i = 0; i < n; i++) {
+              t = cs*V(i, j) + sn*V(i, p-1);
+              V(i, p-1) = -sn*V(i, j) + cs*V(i, p-1);
+              V(i, j) = t;
+            }
+          }
+        }
+      }
+        break;
+
+        // Split at negligible s(k).
+
+      case 2: {
+        Real f(e(k-1));
+        e(k-1) = 0.0;
+        for (j = k; j < p; j++) {
+          Real t(hypot(s(j), f));
+          Real cs( s(j)/t);
+          Real sn(f/t);
+          s(j) = t;
+          f = -sn*e(j);
+          e(j) = cs*e(j);
+          if (wantu) {
+            for (i = 0; i < m; i++) {
+              t = cs*U(i, j) + sn*U(i, k-1);
+              U(i, k-1) = -sn*U(i, j) + cs*U(i, k-1);
+              U(i, j) = t;
+            }
+          }
+        }
+      }
+        break;
+
+        // Perform one qr step.
+
+      case 3: {
+
+        // Calculate the shift.
+
+        Real scale = std::max(std::max(std::max(std::max(
+            std::abs(s(p-1)), std::abs(s(p-2))), std::abs(e(p-2))),
+                                       std::abs(s(k))), std::abs(e(k)));
+        Real sp = s(p-1)/scale;
+        Real spm1 = s(p-2)/scale;
+        Real epm1 = e(p-2)/scale;
+        Real sk = s(k)/scale;
+        Real ek = e(k)/scale;
+        Real b = ((spm1 + sp)*(spm1 - sp) + epm1*epm1)/2.0;
+        Real c = (sp*epm1)*(sp*epm1);
+        Real shift = 0.0;
+        if ((b != 0.0) || (c != 0.0)) {
+          shift = std::sqrt(b*b + c);
+          if (b < 0.0) {
+            shift = -shift;
+          }
+          shift = c/(b + shift);
+        }
+        Real f = (sk + sp)*(sk - sp) + shift;
+        Real g = sk*ek;
+
+        // Chase zeros.
+
+        for (j = k; j < p-1; j++) {
+          Real t = hypot(f, g);
+          Real cs = f/t;
+          Real sn = g/t;
+          if (j != k) {
+            e(j-1) = t;
+          }
+          f = cs*s(j) + sn*e(j);
+          e(j) = cs*e(j) - sn*s(j);
+          g = sn*s(j+1);
+          s(j+1) = cs*s(j+1);
+          if (wantv) {
+            cblas_Xrot(n, vdata + j, vstride, vdata + j+1, vstride, cs, sn);
+            /*for (i = 0; i < n; i++) {
+              t = cs*vdata[i*vstride + j] + sn*vdata[i*vstride + j+1];  // t = cs*V(i, j) + sn*V(i, j+1);         // 13
+              vdata[i*vstride + j+1] = -sn*vdata[i*vstride + j] + cs*vdata[i*vstride + j+1];  // V(i, j+1) = -sn*V(i, j) + cs*V(i, j+1); // 5
+              vdata[i*vstride + j] = t;  // V(i, j) = t; // 4
+              }*/
+          }
+          t = hypot(f, g);
+          cs = f/t;
+          sn = g/t;
+          s(j) = t;
+          f = cs*e(j) + sn*s(j+1);
+          s(j+1) = -sn*e(j) + cs*s(j+1);
+          g = sn*e(j+1);
+          e(j+1) = cs*e(j+1);
+          if (wantu && (j < m-1)) {
+            cblas_Xrot(m, udata + j, ustride, udata + j+1, ustride, cs, sn);
+            /*for (i = 0; i < m; i++) {
+              t = cs*udata[i*ustride + j] + sn*udata[i*ustride + j+1];  // t = cs*U(i, j) + sn*U(i, j+1); // 7
+              udata[i*ustride + j+1] = -sn*udata[i*ustride + j] +cs*udata[i*ustride + j+1];  // U(i, j+1) = -sn*U(i, j) + cs*U(i, j+1); // 8
+              udata[i*ustride + j] = t;  // U(i, j) = t; // 1
+              }*/
+          }
+        }
+        e(p-2) = f;
+        iter = iter + 1;
+      }
+        break;
+
+        // Convergence.
+
+      case 4: {
+
+        // Make the singular values positive.
+
+        if (s(k) <= 0.0) {
+          s(k) = (s(k) < 0.0 ? -s(k) : 0.0);
+          if (wantv) {
+            for (i = 0; i <= pp; i++) {
+              V(i, k) = -V(i, k);
+            }
+          }
+        }
+
+        // Order the singular values.
+
+        while (k < pp) {
+          if (s(k) >= s(k+1)) {
+            break;
+          }
+          Real t = s(k);
+          s(k) = s(k+1);
+          s(k+1) = t;
+          if (wantv && (k < n-1)) {
+            for (i = 0; i < n; i++) {
+              t = V(i, k+1); V(i, k+1) = V(i, k); V(i, k) = t;
+            }
+          }
+          if (wantu && (k < m-1)) {
+            for (i = 0; i < m; i++) {
+              t = U(i, k+1); U(i, k+1) = U(i, k); U(i, k) = t;
+            }
+          }
+          k++;
+        }
+        iter = 0;
+        p--;
+      }
+        break;
+    }
+  }
+  return true;
+}
+
+#endif // defined(HAVE_ATLAS) || defined(USE_KALDI_SVD)
+
+} // namespace kaldi
+
+#endif // KALDI_MATRIX_JAMA_SVD_H_
diff --git a/kaldi_io/src/kaldi/matrix/kaldi-blas.h b/kaldi_io/src/kaldi/matrix/kaldi-blas.h
new file mode 100644
index 0000000..5d25ab8
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/kaldi-blas.h
@@ -0,0 +1,132 @@
+// matrix/kaldi-blas.h
+
+// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_MATRIX_KALDI_BLAS_H_
+#define KALDI_MATRIX_KALDI_BLAS_H_
+
+// This file handles the #includes for BLAS, LAPACK and so on.
+// It manipulates the declarations into a common format that kaldi can handle.
+// However, the kaldi code will check whether HAVE_ATLAS is defined as that
+// code is called a bit differently from CLAPACK that comes from other sources.
+
+// There are three alternatives:
+//   (i) you have ATLAS, which includes the ATLAS implementation of CBLAS
+//   plus a subset of CLAPACK (but with clapack_ in the function declarations).
+//   In this case, define HAVE_ATLAS and make sure the relevant directories are
+//   in the include path.
+
+//   (ii) you have CBLAS (some implementation thereof) plus CLAPACK.
+//   In this case, define HAVE_CLAPACK.
+//   [Since CLAPACK depends on BLAS, the presence of BLAS is implicit].
+
+//  (iii) you have the MKL library, which includes CLAPACK and CBLAS.
+
+// Note that if we are using ATLAS, no Svd implementation is supplied,
+// so we define HAVE_Svd to be zero and this directs our implementation to
+// supply its own "by hand" implementation which is based on TNT code.
+
+
+
+
+#if (defined(HAVE_CLAPACK) && (defined(HAVE_ATLAS) || defined(HAVE_MKL))) \
+    || (defined(HAVE_ATLAS) && defined(HAVE_MKL))
+#error "Do not define more than one of HAVE_CLAPACK, HAVE_ATLAS and HAVE_MKL"
+#endif
+
+#ifdef HAVE_ATLAS
+  extern "C" {
+    #include <cblas.h>
+    #include <clapack.h>
+  }
+#elif defined(HAVE_CLAPACK)
+  #ifdef __APPLE__
+    #ifndef __has_extension
+    #define __has_extension(x) 0
+    #endif
+    #define vImage_Utilities_h
+    #define vImage_CVUtilities_h
+    #include <Accelerate/Accelerate.h>
+    typedef __CLPK_integer          integer;
+    typedef __CLPK_logical          logical;
+    typedef __CLPK_real             real;
+    typedef __CLPK_doublereal       doublereal;
+    typedef __CLPK_complex          complex;
+    typedef __CLPK_doublecomplex    doublecomplex;
+    typedef __CLPK_ftnlen           ftnlen;
+  #else
+    extern "C" {
+      // May be in /usr/[local]/include if installed; else this uses the one
+      // from the tools/CLAPACK_include directory.
+      #include <cblas.h>
+      #include <f2c.h>
+      #include <clapack.h>  
+
+      // get rid of macros from f2c.h -- these are dangerous.
+      #undef abs
+      #undef dabs
+      #undef min
+      #undef max
+      #undef dmin
+      #undef dmax
+      #undef bit_test
+      #undef bit_clear
+      #undef bit_set
+    }
+  #endif
+#elif defined(HAVE_MKL)
+  extern "C" {
+    #include <mkl.h>
+  }
+#elif defined(HAVE_OPENBLAS)
+  // getting cblas.h and lapacke.h from <openblas-install-dir>/.
+  // putting in "" not <> to search -I before system libraries.
+  #include "cblas.h"
+  #include "lapacke.h"
+  #undef I
+  #undef complex
+  // get rid of macros from f2c.h -- these are dangerous.
+  #undef abs
+  #undef dabs
+  #undef min
+  #undef max
+  #undef dmin
+  #undef dmax
+  #undef bit_test
+  #undef bit_clear
+  #undef bit_set
+#else
+  #error "You need to define (using the preprocessor) either HAVE_CLAPACK or HAVE_ATLAS or HAVE_MKL (but not more than one)"  
+#endif
+
+#ifdef HAVE_OPENBLAS
+typedef int KaldiBlasInt; // try int.
+#endif
+#ifdef HAVE_CLAPACK
+typedef integer KaldiBlasInt;
+#endif
+#ifdef HAVE_MKL
+typedef MKL_INT KaldiBlasInt;
+#endif
+
+#ifdef HAVE_ATLAS
+// in this case there is no need for KaldiBlasInt-- this typedef is only needed
+// for Svd code which is not included in ATLAS (we re-implement it).
+#endif
+
+
+#endif  // KALDI_MATRIX_KALDI_BLAS_H_
diff --git a/kaldi_io/src/kaldi/matrix/kaldi-gpsr.h b/kaldi_io/src/kaldi/matrix/kaldi-gpsr.h
new file mode 100644
index 0000000..c294bdd
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/kaldi-gpsr.h
@@ -0,0 +1,166 @@
+// matrix/kaldi-gpsr.h
+
+// Copyright 2012  Arnab Ghoshal
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_MATRIX_KALDI_GPSR_H_
+#define KALDI_MATRIX_KALDI_GPSR_H_
+
+#include <string>
+#include <vector>
+
+#include "base/kaldi-common.h"
+#include "matrix/matrix-lib.h"
+#include "itf/options-itf.h"
+
+namespace kaldi {
+
+/// This is an implementation of the GPSR algorithm. See, Figueiredo, Nowak and
+/// Wright, "Gradient Projection for Sparse Reconstruction: Application to
+/// Compressed Sensing and Other Inverse Problems," IEEE Journal of Selected
+/// Topics in Signal Processing, vol. 1, no. 4, pp. 586-597, 2007.
+/// http://dx.doi.org/10.1109/JSTSP.2007.910281
+
+/// The GPSR algorithm, described in Figueiredo, et al., 2007, solves:
+/// \f[ \min_x 0.5 * ||y - Ax||_2^2 + \tau ||x||_1, \f]
+/// where \f$ x \in R^n, y \in R^k \f$, and \f$ A \in R^{n \times k} \f$.
+/// In this implementation, we solve:
+/// \f[ \min_x 0.5 * x^T H x - g^T x + \tau ||x||_1, \f]
+/// which is the more natural form in which such problems arise in our case.
+/// Here, \f$ H = A^T A \in R^{n \times n} \f$ and \f$ g = A^T y \in R^n \f$.
+
+
+/** \struct GpsrConfig
+ *  Configuration variables needed in the GPSR algorithm.
+ */
+struct GpsrConfig {
+  bool use_gpsr_bb;  ///< Use the Barzilai-Borwein gradient projection method
+
+  /// The following options are common to both the basic & Barzilai-Borwein
+  /// versions of GPSR
+  double stop_thresh;  ///< Stopping threshold
+  int32 max_iters;  ///< Maximum number of iterations
+  double gpsr_tau;  ///< Regularization scale
+  double alpha_min;  ///< Minimum step size in the feasible direction
+  double alpha_max;  ///< Maximum step size in the feasible direction
+  double max_sparsity;  ///< Maximum percentage of dimensions set to 0
+  double tau_reduction;  ///< Multiply tau by this if max_sparsity reached
+
+  /// The following options are for the backtracking line search in basic GPSR.
+  /// Step size reduction factor in backtracking line search. 0 < beta < 1
+  double gpsr_beta;
+  /// Improvement factor in backtracking line search, i.e. the new objective
+  /// function must be less than the old one by mu times the gradient in the
+  /// direction of the change in x. 0 < mu < 1
+  double gpsr_mu;
+  int32 max_iters_backtrak;  ///< Max iterations for backtracking line search
+
+  bool debias;  ///< Do debiasing, i.e. unconstrained optimization at the end
+  double stop_thresh_debias;  ///< Stopping threshold for debiasing stage
+  int32 max_iters_debias;  ///< Maximum number of iterations for debiasing stage
+
+  GpsrConfig() {
+    use_gpsr_bb = true;
+
+    stop_thresh = 0.005;
+    max_iters = 100;
+    gpsr_tau = 10;
+    alpha_min = 1.0e-10;
+    alpha_max = 1.0e+20;
+    max_sparsity = 0.9;
+    tau_reduction = 0.8;
+
+    gpsr_beta = 0.5;
+    gpsr_mu = 0.1;
+    max_iters_backtrak = 50;
+
+    debias = false;
+    stop_thresh_debias = 0.001;
+    max_iters_debias = 50;
+  }
+
+  void Register(OptionsItf *po);
+};
+
+inline void GpsrConfig::Register(OptionsItf *po) {
+  std::string module = "GpsrConfig: ";
+  po->Register("use-gpsr-bb", &use_gpsr_bb, module+
+               "Use the Barzilai-Borwein gradient projection method.");
+
+  po->Register("stop-thresh", &stop_thresh, module+
+               "Stopping threshold for GPSR.");
+  po->Register("max-iters", &max_iters, module+
+               "Maximum number of iterations of GPSR.");
+  po->Register("gpsr-tau", &gpsr_tau, module+
+               "Regularization scale for GPSR.");
+  po->Register("alpha-min", &alpha_min, module+
+               "Minimum step size in feasible direction.");
+  po->Register("alpha-max", &alpha_max, module+
+               "Maximum step size in feasible direction.");
+  po->Register("max-sparsity", &max_sparsity, module+
+               "Maximum percentage of dimensions set to 0.");
+  po->Register("tau-reduction", &tau_reduction, module+
+               "Multiply tau by this if maximum sparsity is reached.");
+
+  po->Register("gpsr-beta", &gpsr_beta, module+
+               "Step size reduction factor in backtracking line search (0<beta<1).");
+  po->Register("gpsr-mu", &gpsr_mu, module+
+               "Improvement factor in backtracking line search (0<mu<1).");
+  po->Register("max-iters-backtrack", &max_iters_backtrak, module+
+               "Maximum number of iterations of backtracking line search.");
+
+  po->Register("debias", &debias, module+
+               "Do final debiasing step.");
+  po->Register("stop-thresh-debias", &stop_thresh_debias, module+
+               "Stopping threshold for debiaisng step.");
+  po->Register("max-iters-debias", &max_iters_debias, module+
+               "Maximum number of iterations of debiasing.");
+}
+
+/// Solves a quadratic program in \f$ x \f$, with L_1 regularization:
+/// \f[ \min_x 0.5 * x^T H x - g^T x + \tau ||x||_1. \f]
+/// This is similar to SolveQuadraticProblem() in sp-matrix.h with an added
+/// L_1 term.
+template<typename Real>
+Real Gpsr(const GpsrConfig &opts, const SpMatrix<Real> &H,
+          const Vector<Real> &g, Vector<Real> *x,
+          const char *debug_str = "[unknown]") {
+  if (opts.use_gpsr_bb)
+    return GpsrBB(opts, H, g, x, debug_str);
+  else
+    return GpsrBasic(opts, H, g, x, debug_str);
+}
+
+/// This is the basic GPSR algorithm, where the step size is determined by a
+/// backtracking line search. The line search is called "Armijo rule along the
+/// projection arc" in Bertsekas, Nonlinear Programming, 2nd ed. page 230.
+template<typename Real>
+Real GpsrBasic(const GpsrConfig &opts, const SpMatrix<Real> &H,
+               const Vector<Real> &g, Vector<Real> *x,
+               const char *debug_str = "[unknown]");
+
+/// This is the paper calls the Barzilai-Borwein variant. This is a constrained
+/// Netwon's method where the Hessian is approximated by scaled identity matrix
+template<typename Real>
+Real GpsrBB(const GpsrConfig &opts, const SpMatrix<Real> &H,
+            const Vector<Real> &g, Vector<Real> *x,
+            const char *debug_str = "[unknown]");
+
+
+}  // namespace kaldi
+
+#endif  // KALDI_MATRIX_KALDI_GPSR_H_
diff --git a/kaldi_io/src/kaldi/matrix/kaldi-matrix-inl.h b/kaldi_io/src/kaldi/matrix/kaldi-matrix-inl.h
new file mode 100644
index 0000000..8bc4749
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/kaldi-matrix-inl.h
@@ -0,0 +1,62 @@
+// matrix/kaldi-matrix-inl.h
+
+// Copyright 2009-2011  Microsoft Corporation;  Haihua Xu
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_MATRIX_KALDI_MATRIX_INL_H_
+#define KALDI_MATRIX_KALDI_MATRIX_INL_H_ 1
+
+#include "matrix/kaldi-vector.h"
+
+namespace kaldi {
+
+/// Empty constructor
+template<typename Real>
+Matrix<Real>::Matrix(): MatrixBase<Real>(NULL, 0, 0, 0) { }
+
+
+template<>
+template<>
+void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float> &ra, const VectorBase<float> &rb);
+
+template<>
+template<>
+void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double> &ra, const VectorBase<double> &rb); 
+
+template<typename Real>
+inline std::ostream & operator << (std::ostream & os, const MatrixBase<Real> & M) {
+  M.Write(os, false);
+  return os;
+}
+
+template<typename Real>
+inline std::istream & operator >> (std::istream & is, Matrix<Real> & M) {
+  M.Read(is, false);
+  return is;
+}
+
+
+template<typename Real>
+inline std::istream & operator >> (std::istream & is, MatrixBase<Real> & M) {
+  M.Read(is, false);
+  return is;
+}
+
+}// namespace kaldi
+
+
+#endif  // KALDI_MATRIX_KALDI_MATRIX_INL_H_
diff --git a/kaldi_io/src/kaldi/matrix/kaldi-matrix.h b/kaldi_io/src/kaldi/matrix/kaldi-matrix.h
new file mode 100644
index 0000000..e6829e0
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/kaldi-matrix.h
@@ -0,0 +1,983 @@
+// matrix/kaldi-matrix.h
+
+// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Lukas Burget;
+//                      Saarland University;  Petr Schwarz;  Yanmin Qian;
+//                      Karel Vesely;  Go Vivace Inc.;  Haihua Xu
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_MATRIX_KALDI_MATRIX_H_
+#define KALDI_MATRIX_KALDI_MATRIX_H_ 1
+
+#include "matrix/matrix-common.h"
+
+namespace kaldi {
+
+/// @{ \addtogroup matrix_funcs_scalar
+
+/// We need to declare this here as it will be a friend function.
+/// tr(A B), or tr(A B^T).
+template<typename Real>
+Real TraceMatMat(const MatrixBase<Real> &A, const MatrixBase<Real> &B,
+                 MatrixTransposeType trans = kNoTrans);
+/// @}
+
+/// \addtogroup matrix_group
+/// @{
+
+/// Base class which provides matrix operations not involving resizing
+/// or allocation.   Classes Matrix and SubMatrix inherit from it and take care
+/// of allocation and resizing.
+template<typename Real>
+class MatrixBase {
+ public:
+  // so this child can access protected members of other instances.
+  friend class Matrix<Real>;
+  // friend declarations for CUDA matrices (see ../cudamatrix/)
+  friend class CuMatrixBase<Real>;
+  friend class CuMatrix<Real>;
+  friend class CuSubMatrix<Real>;
+  friend class CuPackedMatrix<Real>;
+  
+  friend class PackedMatrix<Real>;
+
+  /// Returns number of rows (or zero for emtpy matrix).
+  inline MatrixIndexT  NumRows() const { return num_rows_; }
+
+  /// Returns number of columns (or zero for emtpy matrix).
+  inline MatrixIndexT NumCols() const { return num_cols_; }
+
+  /// Stride (distance in memory between each row).  Will be >= NumCols.
+  inline MatrixIndexT Stride() const {  return stride_; }
+
+  /// Returns size in bytes of the data held by the matrix.
+  size_t  SizeInBytes() const {
+    return static_cast<size_t>(num_rows_) * static_cast<size_t>(stride_) *
+        sizeof(Real);
+  }
+
+  /// Gives pointer to raw data (const).
+  inline const Real* Data() const {
+    return data_;
+  }
+
+  /// Gives pointer to raw data (non-const).
+  inline Real* Data() { return data_; }
+
+  /// Returns pointer to data for one row (non-const)
+  inline  Real* RowData(MatrixIndexT i) {
+    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
+                 static_cast<UnsignedMatrixIndexT>(num_rows_));
+    return data_ + i * stride_;
+  }
+
+  /// Returns pointer to data for one row (const)
+  inline const Real* RowData(MatrixIndexT i) const {
+    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
+                 static_cast<UnsignedMatrixIndexT>(num_rows_));
+    return data_ + i * stride_;
+  }
+
+  /// Indexing operator, non-const
+  /// (only checks sizes if compiled with -DKALDI_PARANOID)
+  inline Real&  operator() (MatrixIndexT r, MatrixIndexT c) {
+    KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
+                          static_cast<UnsignedMatrixIndexT>(num_rows_) &&
+                          static_cast<UnsignedMatrixIndexT>(c) <
+                          static_cast<UnsignedMatrixIndexT>(num_cols_));
+    return *(data_ + r * stride_ + c);
+  }
+  /// Indexing operator, provided for ease of debugging (gdb doesn't work
+  /// with parenthesis operator).
+  Real &Index (MatrixIndexT r, MatrixIndexT c) {  return (*this)(r, c); }
+  
+  /// Indexing operator, const
+  /// (only checks sizes if compiled with -DKALDI_PARANOID)
+  inline const Real operator() (MatrixIndexT r, MatrixIndexT c) const {
+    KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
+                          static_cast<UnsignedMatrixIndexT>(num_rows_) &&
+                          static_cast<UnsignedMatrixIndexT>(c) <
+                          static_cast<UnsignedMatrixIndexT>(num_cols_));
+    return *(data_ + r * stride_ + c);
+  }
+
+  /*   Basic setting-to-special values functions. */
+
+  /// Sets matrix to zero.
+  void SetZero();
+  /// Sets all elements to a specific value.
+  void Set(Real);
+  /// Sets to zero, except ones along diagonal [for non-square matrices too]
+  void SetUnit();
+  /// Sets to random values of a normal distribution
+  void SetRandn();
+  /// Sets to numbers uniformly distributed on (0, 1)
+  void SetRandUniform();
+
+  /*  Copying functions.  These do not resize the matrix! */
+
+
+  /// Copy given matrix. (no resize is done).
+  template<typename OtherReal>
+  void CopyFromMat(const MatrixBase<OtherReal> & M,
+                   MatrixTransposeType trans = kNoTrans);
+
+  /// Copy from compressed matrix.
+  void CopyFromMat(const CompressedMatrix &M);
+  
+  /// Copy given spmatrix. (no resize is done).
+  template<typename OtherReal>
+  void CopyFromSp(const SpMatrix<OtherReal> &M);
+
+  /// Copy given tpmatrix. (no resize is done).
+  template<typename OtherReal>
+  void CopyFromTp(const TpMatrix<OtherReal> &M,
+                  MatrixTransposeType trans = kNoTrans);
+  
+  /// Copy from CUDA matrix.  Implemented in ../cudamatrix/cu-matrix.h
+  template<typename OtherReal>  
+  void CopyFromMat(const CuMatrixBase<OtherReal> &M,
+                   MatrixTransposeType trans = kNoTrans);
+
+  /// Inverse of vec() operator. Copies vector into matrix, row-by-row.
+  /// Note that rv.Dim() must either equal NumRows()*NumCols() or
+  /// NumCols()-- this has two modes of operation.
+  void CopyRowsFromVec(const VectorBase<Real> &v);
+
+  /// This version of CopyRowsFromVec is implemented in ../cudamatrix/cu-vector.cc
+  void CopyRowsFromVec(const CuVectorBase<Real> &v);
+  
+  template<typename OtherReal>
+  void CopyRowsFromVec(const VectorBase<OtherReal> &v);
+
+  /// Copies vector into matrix, column-by-column.
+  /// Note that rv.Dim() must either equal NumRows()*NumCols() or NumRows();
+  /// this has two modes of operation.
+  void CopyColsFromVec(const VectorBase<Real> &v);
+  
+  /// Copy vector into specific column of matrix.
+  void CopyColFromVec(const VectorBase<Real> &v, const MatrixIndexT col);
+  /// Copy vector into specific row of matrix.
+  void CopyRowFromVec(const VectorBase<Real> &v, const MatrixIndexT row);
+  /// Copy vector into diagonal of matrix.
+  void CopyDiagFromVec(const VectorBase<Real> &v);
+
+  /* Accessing of sub-parts of the matrix. */
+
+  /// Return specific row of matrix [const].
+  inline const SubVector<Real> Row(MatrixIndexT i) const {
+    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
+                 static_cast<UnsignedMatrixIndexT>(num_rows_));
+    return SubVector<Real>(data_ + (i * stride_), NumCols());
+  }
+
+  /// Return specific row of matrix.
+  inline SubVector<Real> Row(MatrixIndexT i) {
+    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
+                 static_cast<UnsignedMatrixIndexT>(num_rows_));
+    return SubVector<Real>(data_ + (i * stride_), NumCols());
+  }
+
+  /// Return a sub-part of matrix.
+  inline SubMatrix<Real> Range(const MatrixIndexT row_offset,
+                               const MatrixIndexT num_rows,
+                               const MatrixIndexT col_offset,
+                               const MatrixIndexT num_cols) const {
+    return SubMatrix<Real>(*this, row_offset, num_rows,
+                           col_offset, num_cols);
+  }
+  inline SubMatrix<Real> RowRange(const MatrixIndexT row_offset,
+                                  const MatrixIndexT num_rows) const {
+    return SubMatrix<Real>(*this, row_offset, num_rows, 0, num_cols_);
+  }  
+  inline SubMatrix<Real> ColRange(const MatrixIndexT col_offset,
+                                  const MatrixIndexT num_cols) const {
+    return SubMatrix<Real>(*this, 0, num_rows_, col_offset, num_cols);
+  }  
+
+  /* Various special functions. */
+  /// Returns sum of all elements in matrix.
+  Real Sum() const;
+  /// Returns trace of matrix.
+  Real Trace(bool check_square = true) const;
+  // If check_square = true, will crash if matrix is not square.
+
+  /// Returns maximum element of matrix.
+  Real Max() const;
+  /// Returns minimum element of matrix.
+  Real Min() const;
+
+  /// Element by element multiplication with a given matrix.
+  void MulElements(const MatrixBase<Real> &A);
+
+  /// Divide each element by the corresponding element of a given matrix.
+  void DivElements(const MatrixBase<Real> &A);
+
+  /// Multiply each element with a scalar value.
+  void Scale(Real alpha);
+
+  /// Set, element-by-element, *this = max(*this, A)
+  void Max(const MatrixBase<Real> &A);
+
+  /// Equivalent to (*this) = (*this) * diag(scale).  Scaling
+  /// each column by a scalar taken from that dimension of the vector.
+  void MulColsVec(const VectorBase<Real> &scale);
+
+  /// Equivalent to (*this) = diag(scale) * (*this).  Scaling
+  /// each row by a scalar taken from that dimension of the vector.
+  void MulRowsVec(const VectorBase<Real> &scale);
+
+  /// Divide each row into src.NumCols() equal groups, and then scale i'th row's
+  /// j'th group of elements by src(i, j).  Requires src.NumRows() ==
+  /// this->NumRows() and this->NumCols() % src.NumCols() == 0.
+  void MulRowsGroupMat(const MatrixBase<Real> &src);
+    
+  /// Returns logdet of matrix.
+  Real LogDet(Real *det_sign = NULL) const;
+  
+  /// matrix inverse.
+  /// if inverse_needed = false, will fill matrix with garbage.
+  /// (only useful if logdet wanted).
+  void Invert(Real *log_det = NULL, Real *det_sign = NULL,
+              bool inverse_needed = true);
+  /// matrix inverse [double].
+  /// if inverse_needed = false, will fill matrix with garbage
+  /// (only useful if logdet wanted).
+  /// Does inversion in double precision even if matrix was not double.
+  void InvertDouble(Real *LogDet = NULL, Real *det_sign = NULL,
+                      bool inverse_needed = true);
+
+  /// Inverts all the elements of the matrix
+  void InvertElements();
+
+  /// Transpose the matrix.  This one is only
+  /// applicable to square matrices (the one in the
+  /// Matrix child class works also for non-square.
+  void Transpose();
+
+  /// Copies column r from column indices[r] of src.
+  /// As a special case, if indexes[i] == -1, sets column i to zero
+  /// indices.size() must equal this->NumCols(),
+  /// all elements of "reorder" must be in [-1, src.NumCols()-1],
+  /// and src.NumRows() must equal this.NumRows()
+  void CopyCols(const MatrixBase<Real> &src,
+                const std::vector<MatrixIndexT> &indices);
+
+  /// Copies row r from row indices[r] of src.
+  /// As a special case, if indexes[i] == -1, sets row i to zero
+  /// "reorder".size() must equal this->NumRows(),
+  /// all elements of "reorder" must be in [-1, src.NumRows()-1],
+  /// and src.NumCols() must equal this.NumCols()
+  void CopyRows(const MatrixBase<Real> &src,
+                const std::vector<MatrixIndexT> &indices);
+  
+  /// Applies floor to all matrix elements
+  void ApplyFloor(Real floor_val);
+
+  /// Applies floor to all matrix elements
+  void ApplyCeiling(Real ceiling_val);
+
+  /// Calculates log of all the matrix elemnts
+  void ApplyLog();
+
+  /// Exponentiate each of the elements.
+  void ApplyExp();
+
+  /// Applies power to all matrix elements
+  void ApplyPow(Real power);
+
+  /// Apply power to the absolute value of each element. 
+  /// Include the sign of the input element if include_sign == true.
+  /// If the power is negative and the input to the power is zero,
+  /// The output will be set zero.
+  void ApplyPowAbs(Real power, bool include_sign=false);
+  
+  /// Applies the Heaviside step function (x > 0 ? 1 : 0) to all matrix elements
+  /// Note: in general you can make different choices for x = 0, but for now
+  /// please leave it as it (i.e. returning zero) because it affects the
+  /// RectifiedLinearComponent in the neural net code.
+  void ApplyHeaviside();
+  
+  /// Eigenvalue Decomposition of a square NxN matrix into the form (*this) = P D
+  /// P^{-1}.  Be careful: the relationship of D to the eigenvalues we output is
+  /// slightly complicated, due to the need for P to be real.  In the symmetric
+  /// case D is diagonal and real, but in
+  /// the non-symmetric case there may be complex-conjugate pairs of eigenvalues.
+  /// In this case, for the equation (*this) = P D P^{-1} to hold, D must actually
+  /// be block diagonal, with 2x2 blocks corresponding to any such pairs.  If a
+  /// pair is lambda +- i*mu, D will have a corresponding 2x2 block
+  /// [lambda, mu; -mu, lambda].
+  /// Note that if the input matrix (*this) is non-invertible, P may not be invertible
+  /// so in this case instead of the equation (*this) = P D P^{-1} holding, we have
+  /// instead (*this) P = P D.
+  ///
+  /// The non-member function CreateEigenvalueMatrix creates D from eigs_real and eigs_imag.
+  void Eig(MatrixBase<Real> *P,
+           VectorBase<Real> *eigs_real,
+           VectorBase<Real> *eigs_imag) const;
+
+  /// The Power method attempts to take the matrix to a power using a method that
+  /// works in general for fractional and negative powers.  The input matrix must
+  /// be invertible and have reasonable condition (or we don't guarantee the
+  /// results.  The method is based on the eigenvalue decomposition.  It will
+  /// return false and leave the matrix unchanged, if at entry the matrix had
+  /// real negative eigenvalues (or if it had zero eigenvalues and the power was
+  /// negative).
+  bool Power(Real pow);
+
+  /** Singular value decomposition
+     Major limitations:
+     For nonsquare matrices, we assume m>=n (NumRows >= NumCols), and we return
+     the "skinny" Svd, i.e. the matrix in the middle is diagonal, and the
+     one on the left is rectangular.
+
+     In Svd, *this = U*diag(S)*Vt.
+     Null pointers for U and/or Vt at input mean we do not want that output.  We
+     expect that S.Dim() == m, U is either NULL or m by n,
+     and v is either NULL or n by n.
+     The singular values are not sorted (use SortSvd for that).  */
+  void DestructiveSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
+                      MatrixBase<Real> *Vt);  // Destroys calling matrix.
+
+  /// Compute SVD (*this) = U diag(s) Vt.   Note that the V in the call is already
+  /// transposed; the normal formulation is U diag(s) V^T.
+  /// Null pointers for U or V mean we don't want that output (this saves
+  /// compute).  The singular values are not sorted (use SortSvd for that).
+  void Svd(VectorBase<Real> *s, MatrixBase<Real> *U,
+           MatrixBase<Real> *Vt) const;
+  /// Compute SVD but only retain the singular values.
+  void Svd(VectorBase<Real> *s) const { Svd(s, NULL, NULL); }
+
+
+  /// Returns smallest singular value.
+  Real MinSingularValue() const {
+    Vector<Real> tmp(std::min(NumRows(), NumCols()));
+    Svd(&tmp);
+    return tmp.Min();
+  }
+
+  void TestUninitialized() const; // This function is designed so that if any element
+  // if the matrix is uninitialized memory, valgrind will complain.
+  
+  /// Returns condition number by computing Svd.  Works even if cols > rows.
+  /// Returns infinity if all singular values are zero.
+  Real Cond() const;
+
+  /// Returns true if matrix is Symmetric.
+  bool IsSymmetric(Real cutoff = 1.0e-05) const;  // replace magic number
+
+  /// Returns true if matrix is Diagonal.
+  bool IsDiagonal(Real cutoff = 1.0e-05) const;  // replace magic number
+
+  /// Returns true if the matrix is all zeros, except for ones on diagonal.  (it
+  /// does not have to be square).  More specifically, this function returns
+  /// false if for any i, j, (*this)(i, j) differs by more than cutoff from the
+  /// expression (i == j ? 1 : 0).
+  bool IsUnit(Real cutoff = 1.0e-05) const;     // replace magic number
+
+  /// Returns true if matrix is all zeros.
+  bool IsZero(Real cutoff = 1.0e-05) const;     // replace magic number
+
+  /// Frobenius norm, which is the sqrt of sum of square elements.  Same as Schatten 2-norm,
+  /// or just "2-norm".
+  Real FrobeniusNorm() const;
+
+  /// Returns true if ((*this)-other).FrobeniusNorm()
+  /// <= tol * (*this).FrobeniusNorm().
+  bool ApproxEqual(const MatrixBase<Real> &other, float tol = 0.01) const;
+
+  /// Tests for exact equality.  It's usually preferable to use ApproxEqual.
+  bool Equal(const MatrixBase<Real> &other) const;
+
+  /// largest absolute value.
+  Real LargestAbsElem() const;  // largest absolute value.
+
+  /// Returns log(sum(exp())) without exp overflow
+  /// If prune > 0.0, it uses a pruning beam, discarding
+  /// terms less than (max - prune).  Note: in future
+  /// we may change this so that if prune = 0.0, it takes
+  /// the max, so use -1 if you don't want to prune.
+  Real LogSumExp(Real prune = -1.0) const;
+
+  /// Apply soft-max to the collection of all elements of the
+  /// matrix and return normalizer (log sum of exponentials).
+  Real ApplySoftMax();
+  
+  /// Set each element to the sigmoid of the corresponding element of "src".
+  void Sigmoid(const MatrixBase<Real> &src);
+
+  /// Set each element to y = log(1 + exp(x))
+  void SoftHinge(const MatrixBase<Real> &src);
+  
+  /// Apply the function y(i) = (sum_{j = i*G}^{(i+1)*G-1} x_j^(power))^(1 / p).
+  /// Requires src.NumRows() == this->NumRows() and  src.NumCols() % this->NumCols() == 0.
+  void GroupPnorm(const MatrixBase<Real> &src, Real power);
+
+
+  /// Calculate derivatives for the GroupPnorm function above...
+  /// if "input" is the input to the GroupPnorm function above (i.e. the "src" variable),
+  /// and "output" is the result of the computation (i.e. the "this" of that function
+  /// call), and *this has the same dimension as "input", then it sets each element
+  /// of *this to the derivative d(output-elem)/d(input-elem) for each element of "input", where
+  /// "output-elem" is whichever element of output depends on that input element.
+  void GroupPnormDeriv(const MatrixBase<Real> &input, const MatrixBase<Real> &output,
+                       Real power);
+
+
+  /// Set each element to the tanh of the corresponding element of "src".
+  void Tanh(const MatrixBase<Real> &src);
+
+  // Function used in backpropagating derivatives of the sigmoid function:
+  // element-by-element, set *this = diff * value * (1.0 - value).
+  void DiffSigmoid(const MatrixBase<Real> &value,
+                   const MatrixBase<Real> &diff);
+
+  // Function used in backpropagating derivatives of the tanh function:
+  // element-by-element, set *this = diff * (1.0 - value^2).
+  void DiffTanh(const MatrixBase<Real> &value,
+                const MatrixBase<Real> &diff);
+  
+  /** Uses Svd to compute the eigenvalue decomposition of a symmetric positive
+   * semi-definite matrix: (*this) = rP * diag(rS) * rP^T, with rP an
+   * orthogonal matrix so rP^{-1} = rP^T.   Throws exception if input was not
+   * positive semi-definite (check_thresh controls how stringent the check is;
+   * set it to 2 to ensure it won't ever complain, but it will zero out negative
+   * dimensions in your matrix.
+  */
+  void SymPosSemiDefEig(VectorBase<Real> *s, MatrixBase<Real> *P,
+                        Real check_thresh = 0.001);
+
+  friend Real kaldi::TraceMatMat<Real>(const MatrixBase<Real> &A,
+      const MatrixBase<Real> &B, MatrixTransposeType trans);  // tr (A B)
+
+  // so it can get around const restrictions on the pointer to data_.
+  friend class SubMatrix<Real>;
+
+  /// Add a scalar to each element
+  void Add(const Real alpha);
+
+  /// Add a scalar to each diagonal element.
+  void AddToDiag(const Real alpha);
+
+  /// *this += alpha * a * b^T
+  template<typename OtherReal>
+  void AddVecVec(const Real alpha, const VectorBase<OtherReal> &a,
+                 const VectorBase<OtherReal> &b);
+
+  /// [each row of *this] += alpha * v
+  template<typename OtherReal>
+  void AddVecToRows(const Real alpha, const VectorBase<OtherReal> &v);
+  
+  /// [each col of *this] += alpha * v
+  template<typename OtherReal>
+  void AddVecToCols(const Real alpha, const VectorBase<OtherReal> &v);      
+  
+  /// *this += alpha * M [or M^T]
+  void AddMat(const Real alpha, const MatrixBase<Real> &M,
+              MatrixTransposeType transA = kNoTrans);
+
+  /// *this = beta * *this + alpha * M M^T, for symmetric matrices.  It only
+  /// updates the lower triangle of *this.  It will leave the matrix asymmetric;
+  /// if you need it symmetric as a regular matrix, do CopyLowerToUpper().
+  void SymAddMat2(const Real alpha, const MatrixBase<Real> &M,
+                  MatrixTransposeType transA, Real beta);
+
+  /// *this = beta * *this + alpha * diag(v) * M [or M^T].
+  /// The same as adding M but scaling each row M_i by v(i).
+  void AddDiagVecMat(const Real alpha, VectorBase<Real> &v,
+                     const MatrixBase<Real> &M, MatrixTransposeType transM, 
+                     Real beta = 1.0);
+ 
+  /// *this = beta * *this + alpha * M [or M^T] * diag(v) 
+  /// The same as adding M but scaling each column M_j by v(j).
+  void AddMatDiagVec(const Real alpha, 
+                     const MatrixBase<Real> &M, MatrixTransposeType transM, 
+                     VectorBase<Real> &v,
+                     Real beta = 1.0);
+
+  /// *this = beta * *this + alpha * A .* B (.* element by element multiplication)
+  void AddMatMatElements(const Real alpha,
+                         const MatrixBase<Real>& A,
+                         const MatrixBase<Real>& B,
+                         const Real beta);
+  
+  /// *this += alpha * S
+  template<typename OtherReal>
+  void AddSp(const Real alpha, const SpMatrix<OtherReal> &S);
+
+  void AddMatMat(const Real alpha,
+                 const MatrixBase<Real>& A, MatrixTransposeType transA,
+                 const MatrixBase<Real>& B, MatrixTransposeType transB,
+                 const Real beta);
+ 
+  /// *this = a * b / c (by element; when c = 0, *this = a)
+  void AddMatMatDivMat(const MatrixBase<Real>& A,
+                        const MatrixBase<Real>& B,
+                       const MatrixBase<Real>& C);
+
+  /// A version of AddMatMat specialized for when the second argument
+  /// contains a lot of zeroes.
+  void AddMatSmat(const Real alpha,
+                  const MatrixBase<Real>& A, MatrixTransposeType transA,
+                  const MatrixBase<Real>& B, MatrixTransposeType transB,
+                  const Real beta);
+
+  /// A version of AddMatMat specialized for when the first argument
+  /// contains a lot of zeroes.  
+  void AddSmatMat(const Real alpha,
+                  const MatrixBase<Real>& A, MatrixTransposeType transA,
+                  const MatrixBase<Real>& B, MatrixTransposeType transB,
+                  const Real beta);
+
+  /// this <-- beta*this + alpha*A*B*C.
+  void AddMatMatMat(const Real alpha,
+                    const MatrixBase<Real>& A, MatrixTransposeType transA,
+                    const MatrixBase<Real>& B, MatrixTransposeType transB,
+                    const MatrixBase<Real>& C, MatrixTransposeType transC,
+                    const Real beta);
+
+  /// this <-- beta*this + alpha*SpA*B.
+  // This and the routines below are really
+  // stubs that need to be made more efficient.
+  void AddSpMat(const Real alpha,
+                const SpMatrix<Real>& A,
+                const MatrixBase<Real>& B, MatrixTransposeType transB,
+                const Real beta) {
+    Matrix<Real> M(A);
+    return AddMatMat(alpha, M, kNoTrans, B, transB, beta);
+  }
+  /// this <-- beta*this + alpha*A*B.
+  void AddTpMat(const Real alpha,
+                const TpMatrix<Real>& A, MatrixTransposeType transA,
+                const MatrixBase<Real>& B, MatrixTransposeType transB,
+                const Real beta) {
+    Matrix<Real> M(A);
+    return AddMatMat(alpha, M, transA, B, transB, beta);
+  }
+  /// this <-- beta*this + alpha*A*B.
+  void AddMatSp(const Real alpha,
+                const MatrixBase<Real>& A, MatrixTransposeType transA,
+                const SpMatrix<Real>& B,
+                const Real beta) {
+    Matrix<Real> M(B);
+    return AddMatMat(alpha, A, transA, M, kNoTrans, beta);
+  }
+  /// this <-- beta*this + alpha*A*B*C.
+  void AddSpMatSp(const Real alpha,
+                  const SpMatrix<Real> &A,
+                  const MatrixBase<Real>& B, MatrixTransposeType transB,
+                  const SpMatrix<Real>& C,
+                const Real beta) {
+    Matrix<Real> M(A), N(C);
+    return AddMatMatMat(alpha, M, kNoTrans, B, transB, N, kNoTrans, beta);
+  }
+  /// this <-- beta*this + alpha*A*B.
+  void AddMatTp(const Real alpha,
+                const MatrixBase<Real>& A, MatrixTransposeType transA,
+                const TpMatrix<Real>& B, MatrixTransposeType transB,
+                const Real beta) {
+    Matrix<Real> M(B);
+    return AddMatMat(alpha, A, transA, M, transB, beta);
+  }
+
+  /// this <-- beta*this + alpha*A*B.
+  void AddTpTp(const Real alpha,
+               const TpMatrix<Real>& A, MatrixTransposeType transA,
+               const TpMatrix<Real>& B, MatrixTransposeType transB,
+               const Real beta) {
+    Matrix<Real> M(A), N(B);
+    return AddMatMat(alpha, M, transA, N, transB, beta);
+  }
+
+  /// this <-- beta*this + alpha*A*B.
+  // This one is more efficient, not like the others above.
+  void AddSpSp(const Real alpha,
+               const SpMatrix<Real>& A, const SpMatrix<Real>& B,
+               const Real beta);
+
+  /// Copy lower triangle to upper triangle (symmetrize)
+  void CopyLowerToUpper();
+
+  /// Copy upper triangle to lower triangle (symmetrize)
+  void CopyUpperToLower();
+  
+  /// This function orthogonalizes the rows of a matrix using the Gram-Schmidt
+  /// process.  It is only applicable if NumRows() <= NumCols().  It will use
+  /// random number generation to fill in rows with something nonzero, in cases
+  /// where the original matrix was of deficient row rank.
+  void OrthogonalizeRows();
+
+  /// stream read.
+  /// Use instead of stream<<*this, if you want to add to existing contents.
+  // Will throw exception on failure.
+  void Read(std::istream & in, bool binary, bool add = false);
+  /// write to stream.
+  void Write(std::ostream & out, bool binary) const;
+
+  // Below is internal methods for Svd, user does not have to know about this.
+#if !defined(HAVE_ATLAS) && !defined(USE_KALDI_SVD)
+  // protected:
+  // Should be protected but used directly in testing routine.
+  // destroys *this!
+  void LapackGesvd(VectorBase<Real> *s, MatrixBase<Real> *U,
+                     MatrixBase<Real> *Vt);
+#else
+ protected:
+  // destroys *this!
+  bool JamaSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
+               MatrixBase<Real> *V);
+
+#endif
+ protected:
+
+  ///  Initializer, callable only from child.
+  explicit MatrixBase(Real *data, MatrixIndexT cols, MatrixIndexT rows, MatrixIndexT stride) :
+    data_(data), num_cols_(cols), num_rows_(rows), stride_(stride) {
+    KALDI_ASSERT_IS_FLOATING_TYPE(Real);
+  }
+
+  ///  Initializer, callable only from child.
+  /// Empty initializer, for un-initialized matrix.
+  explicit MatrixBase(): data_(NULL) {
+    KALDI_ASSERT_IS_FLOATING_TYPE(Real);
+  }
+
+  // Make sure pointers to MatrixBase cannot be deleted.
+  ~MatrixBase() { }
+
+  /// A workaround that allows SubMatrix to get a pointer to non-const data
+  /// for const Matrix. Unfortunately C++ does not allow us to declare a
+  /// "public const" inheritance or anything like that, so it would require
+  /// a lot of work to make the SubMatrix class totally const-correct--
+  /// we would have to override many of the Matrix functions.
+  inline Real*  Data_workaround() const {
+    return data_;
+  }
+
+  /// data memory area
+  Real*   data_;
+
+  /// these atributes store the real matrix size as it is stored in memory
+  /// including memalignment
+  MatrixIndexT    num_cols_;   /// < Number of columns
+  MatrixIndexT    num_rows_;   /// < Number of rows
+  /** True number of columns for the internal matrix. This number may differ
+   * from num_cols_ as memory alignment might be used. */
+  MatrixIndexT    stride_;
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(MatrixBase);
+};
+
+/// A class for storing matrices.
+template<typename Real>
+class Matrix : public MatrixBase<Real> {
+ public:
+
+  /// Empty constructor.
+  Matrix();
+
+  /// Basic constructor.  Sets to zero by default.
+  /// if set_zero == false, memory contents are undefined.
+  Matrix(const MatrixIndexT r, const MatrixIndexT c,
+         MatrixResizeType resize_type = kSetZero):
+      MatrixBase<Real>() { Resize(r, c, resize_type); }
+  
+  /// Copy constructor from CUDA matrix
+  /// This is defined in ../cudamatrix/cu-matrix.h
+  template<typename OtherReal>
+  explicit Matrix(const CuMatrixBase<OtherReal> &cu,
+                  MatrixTransposeType trans = kNoTrans);
+
+
+  /// Swaps the contents of *this and *other.  Shallow swap.
+  void Swap(Matrix<Real> *other);
+
+  /// Defined in ../cudamatrix/cu-matrix.cc
+  void Swap(CuMatrix<Real> *mat);
+
+  /// Constructor from any MatrixBase. Can also copy with transpose.
+  /// Allocates new memory.
+  explicit Matrix(const MatrixBase<Real> & M,
+                  MatrixTransposeType trans = kNoTrans);
+  
+  /// Same as above, but need to avoid default copy constructor.
+  Matrix(const Matrix<Real> & M);  //  (cannot make explicit)
+
+  /// Copy constructor: as above, but from another type.
+  template<typename OtherReal>
+  explicit Matrix(const MatrixBase<OtherReal> & M,
+                    MatrixTransposeType trans = kNoTrans);
+
+  /// Copy constructor taking SpMatrix...
+  /// It is symmetric, so no option for transpose, and NumRows == Cols
+  template<typename OtherReal>
+  explicit Matrix(const SpMatrix<OtherReal> & M) : MatrixBase<Real>() {
+    Resize(M.NumRows(), M.NumRows(), kUndefined);
+    this->CopyFromSp(M);
+  }
+
+  /// Constructor from CompressedMatrix
+  explicit Matrix(const CompressedMatrix &C);
+  
+  /// Copy constructor taking TpMatrix...
+  template <typename OtherReal>
+  explicit Matrix(const TpMatrix<OtherReal> & M,
+                  MatrixTransposeType trans = kNoTrans) : MatrixBase<Real>() {
+    if (trans == kNoTrans) {
+      Resize(M.NumRows(), M.NumCols(), kUndefined);
+      this->CopyFromTp(M);
+    } else {
+      Resize(M.NumCols(), M.NumRows(), kUndefined);
+      this->CopyFromTp(M, kTrans);
+    }
+  }
+
+  /// read from stream.
+  // Unlike one in base, allows resizing.
+  void Read(std::istream & in, bool binary, bool add = false);
+
+  /// Remove a specified row.
+  void RemoveRow(MatrixIndexT i);
+  
+  /// Transpose the matrix.  Works for non-square
+  /// matrices as well as square ones.
+  void Transpose();
+
+  /// Distructor to free matrices.
+  ~Matrix() { Destroy(); }
+
+  /// Sets matrix to a specified size (zero is OK as long as both r and c are
+  /// zero).  The value of the new data depends on resize_type:
+  ///   -if kSetZero, the new data will be zero
+  ///   -if kUndefined, the new data will be undefined
+  ///   -if kCopyData, the new data will be the same as the old data in any
+  ///      shared positions, and zero elsewhere.
+  /// This function takes time proportional to the number of data elements.
+  void Resize(const MatrixIndexT r,
+              const MatrixIndexT c,
+              MatrixResizeType resize_type = kSetZero);
+
+  /// Assignment operator that takes MatrixBase.
+  Matrix<Real> &operator = (const MatrixBase<Real> &other) {
+    if (MatrixBase<Real>::NumRows() != other.NumRows() ||
+        MatrixBase<Real>::NumCols() != other.NumCols())
+      Resize(other.NumRows(), other.NumCols(), kUndefined);
+    MatrixBase<Real>::CopyFromMat(other);
+    return *this;
+  }
+
+  /// Assignment operator. Needed for inclusion in std::vector.
+  Matrix<Real> &operator = (const Matrix<Real> &other) {
+    if (MatrixBase<Real>::NumRows() != other.NumRows() ||
+        MatrixBase<Real>::NumCols() != other.NumCols())
+      Resize(other.NumRows(), other.NumCols(), kUndefined);
+    MatrixBase<Real>::CopyFromMat(other);
+    return *this;
+  }
+  
+
+ private:
+  /// Deallocates memory and sets to empty matrix (dimension 0, 0).
+  void Destroy();
+  
+  /// Init assumes the current class contents are invalid (i.e. junk or have
+  /// already been freed), and it sets the matrix to newly allocated memory with
+  /// the specified number of rows and columns.  r == c == 0 is acceptable.  The data
+  /// memory contents will be undefined.
+  void Init(const MatrixIndexT r,
+            const MatrixIndexT c);
+
+};
+/// @} end "addtogroup matrix_group"
+
+/// \addtogroup matrix_funcs_io
+/// @{
+
+/// A structure containing the HTK header.
+/// [TODO: change the style of the variables to Kaldi-compliant]
+struct HtkHeader {
+  /// Number of samples.
+  int32    mNSamples;
+  /// Sample period.
+  int32    mSamplePeriod;
+  /// Sample size
+  int16    mSampleSize;
+  /// Sample kind.
+  uint16   mSampleKind;
+};
+
+// Read HTK formatted features from file into matrix.
+template<typename Real>
+bool ReadHtk(std::istream &is, Matrix<Real> *M, HtkHeader *header_ptr);
+
+// Write (HTK format) features to file from matrix.
+template<typename Real>
+bool WriteHtk(std::ostream &os, const MatrixBase<Real> &M, HtkHeader htk_hdr);
+
+// Write (CMUSphinx format) features to file from matrix.
+template<typename Real>
+bool WriteSphinx(std::ostream &os, const MatrixBase<Real> &M);
+
+/// @} end of "addtogroup matrix_funcs_io"
+
+/**
+  Sub-matrix representation.
+  Can work with sub-parts of a matrix using this class.
+  Note that SubMatrix is not very const-correct-- it allows you to
+  change the contents of a const Matrix.  Be careful!
+*/
+
+template<typename Real>
+class SubMatrix : public MatrixBase<Real> {
+ public:
+  // Initialize a SubMatrix from part of a matrix; this is
+  // a bit like A(b:c, d:e) in Matlab.
+  // This initializer is against the proper semantics of "const", since
+  // SubMatrix can change its contents.  It would be hard to implement
+  // a "const-safe" version of this class.
+  SubMatrix(const MatrixBase<Real>& T,
+            const MatrixIndexT ro,  // row offset, 0 < ro < NumRows()
+            const MatrixIndexT r,   // number of rows, r > 0
+            const MatrixIndexT co,  // column offset, 0 < co < NumCols()
+            const MatrixIndexT c);   // number of columns, c > 0
+  
+  // This initializer is mostly intended for use in CuMatrix and related
+  // classes.  Be careful!
+  SubMatrix(Real *data,
+            MatrixIndexT num_rows,
+            MatrixIndexT num_cols,
+            MatrixIndexT stride);
+  
+  ~SubMatrix<Real>() {}
+  
+  /// This type of constructor is needed for Range() to work [in Matrix base
+  /// class]. Cannot make it explicit.
+  SubMatrix<Real> (const SubMatrix &other):
+  MatrixBase<Real> (other.data_, other.num_cols_, other.num_rows_,
+                    other.stride_) {}
+
+ private:
+  /// Disallow assignment.
+  SubMatrix<Real> &operator = (const SubMatrix<Real> &other);
+};
+/// @} End of "addtogroup matrix_funcs_io".
+
+/// \addtogroup matrix_funcs_scalar
+/// @{
+
+// Some declarations.  These are traces of products.
+
+
+template<typename Real>
+bool ApproxEqual(const MatrixBase<Real> &A,
+                 const MatrixBase<Real> &B, Real tol = 0.01) {
+  return A.ApproxEqual(B, tol);
+}
+
+template<typename Real>
+inline void AssertEqual(const MatrixBase<Real> &A, const MatrixBase<Real> &B,
+                        float tol = 0.01) {
+  KALDI_ASSERT(A.ApproxEqual(B, tol));
+}
+
+/// Returns trace of matrix.
+template <typename Real>
+double TraceMat(const MatrixBase<Real> &A) { return A.Trace(); }
+
+
+/// Returns tr(A B C)
+template <typename Real>
+Real TraceMatMatMat(const MatrixBase<Real> &A, MatrixTransposeType transA,
+                      const MatrixBase<Real> &B, MatrixTransposeType transB,
+                      const MatrixBase<Real> &C, MatrixTransposeType transC);
+
+/// Returns tr(A B C D)
+template <typename Real>
+Real TraceMatMatMatMat(const MatrixBase<Real> &A, MatrixTransposeType transA,
+                         const MatrixBase<Real> &B, MatrixTransposeType transB,
+                         const MatrixBase<Real> &C, MatrixTransposeType transC,
+                         const MatrixBase<Real> &D, MatrixTransposeType transD);
+
+/// @} end "addtogroup matrix_funcs_scalar"
+
+
+/// \addtogroup matrix_funcs_misc
+/// @{
+
+
+/// Function to ensure that SVD is sorted.  This function is made as generic as
+/// possible, to be applicable to other types of problems.  s->Dim() should be
+/// the same as U->NumCols(), and we sort s from greatest to least absolute
+/// value (if sort_on_absolute_value == true) or greatest to least value
+/// otherwise, moving the columns of U, if it exists, and the rows of Vt, if it
+/// exists, around in the same way.  Note: the "absolute value" part won't matter
+/// if this is an actual SVD, since singular values are non-negative.
+template<typename Real> void SortSvd(VectorBase<Real> *s, MatrixBase<Real> *U,
+                                     MatrixBase<Real>* Vt = NULL,
+                                     bool sort_on_absolute_value = true);
+
+/// Creates the eigenvalue matrix D that is part of the decomposition used Matrix::Eig.
+/// D will be block-diagonal with blocks of size 1 (for real eigenvalues) or 2x2
+/// for complex pairs.  If a complex pair is lambda +- i*mu, D will have a corresponding
+/// 2x2 block [lambda, mu; -mu, lambda].
+/// This function will throw if any complex eigenvalues are not in complex conjugate
+/// pairs (or the members of such pairs are not consecutively numbered).
+template<typename Real>
+void CreateEigenvalueMatrix(const VectorBase<Real> &real, const VectorBase<Real> &imag,
+                            MatrixBase<Real> *D);
+
+/// The following function is used in Matrix::Power, and separately tested, so we
+/// declare it here mainly for the testing code to see.  It takes a complex value to
+/// a power using a method that will work for noninteger powers (but will fail if the
+/// complex value is real and negative).
+template<typename Real>
+bool AttemptComplexPower(Real *x_re, Real *x_im, Real power);
+
+
+
+/// @} end of addtogroup matrix_funcs_misc
+
+/// \addtogroup matrix_funcs_io
+/// @{
+template<typename Real>
+std::ostream & operator << (std::ostream & Out, const MatrixBase<Real> & M);
+
+template<typename Real>
+std::istream & operator >> (std::istream & In, MatrixBase<Real> & M);
+
+// The Matrix read allows resizing, so we override the MatrixBase one.
+template<typename Real>
+std::istream & operator >> (std::istream & In, Matrix<Real> & M);
+
+
+template<typename Real>
+bool SameDim(const MatrixBase<Real> &M, const MatrixBase<Real> &N) {
+  return (M.NumRows() == N.NumRows() && M.NumCols() == N.NumCols());
+}
+
+/// @} end of \addtogroup matrix_funcs_io
+
+
+}  // namespace kaldi
+
+
+
+// we need to include the implementation and some
+// template specializations.
+#include "matrix/kaldi-matrix-inl.h"
+
+
+#endif  // KALDI_MATRIX_KALDI_MATRIX_H_
diff --git a/kaldi_io/src/kaldi/matrix/kaldi-vector-inl.h b/kaldi_io/src/kaldi/matrix/kaldi-vector-inl.h
new file mode 100644
index 0000000..c3a4f52
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/kaldi-vector-inl.h
@@ -0,0 +1,58 @@
+// matrix/kaldi-vector-inl.h
+
+// Copyright 2009-2011   Ondrej Glembek;  Microsoft Corporation;
+//                       Haihua Xu
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+// This is an internal header file, included by other library headers.
+// You should not attempt to use it directly.
+
+#ifndef KALDI_MATRIX_KALDI_VECTOR_INL_H_
+#define KALDI_MATRIX_KALDI_VECTOR_INL_H_ 1
+
+namespace kaldi {
+
+template<typename Real>
+std::ostream & operator << (std::ostream &os, const VectorBase<Real> &rv) {
+  rv.Write(os, false);
+  return os;
+}
+
+template<typename Real>
+std::istream &operator >> (std::istream &is, VectorBase<Real> &rv) {
+  rv.Read(is, false);
+  return is;
+}
+
+template<typename Real>
+std::istream &operator >> (std::istream &is, Vector<Real> &rv) {
+  rv.Read(is, false);
+  return is;
+}
+
+template<>
+template<>
+void VectorBase<float>::AddVec(const float alpha, const VectorBase<float> &rv);
+
+template<>
+template<>
+void VectorBase<double>::AddVec<double>(const double alpha,
+                                        const VectorBase<double> &rv);
+
+}  // namespace kaldi
+
+#endif  // KALDI_MATRIX_KALDI_VECTOR_INL_H_
diff --git a/kaldi_io/src/kaldi/matrix/kaldi-vector.h b/kaldi_io/src/kaldi/matrix/kaldi-vector.h
new file mode 100644
index 0000000..2b3395b
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/kaldi-vector.h
@@ -0,0 +1,585 @@
+// matrix/kaldi-vector.h
+
+// Copyright 2009-2012   Ondrej Glembek;  Microsoft Corporation;  Lukas Burget;
+//                       Saarland University (Author: Arnab Ghoshal);
+//                       Ariya Rastrow;  Petr Schwarz;  Yanmin Qian;
+//                       Karel Vesely;  Go Vivace Inc.;  Arnab Ghoshal
+//                       Wei Shi;
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_MATRIX_KALDI_VECTOR_H_
+#define KALDI_MATRIX_KALDI_VECTOR_H_ 1
+
+#include "matrix/matrix-common.h"
+
+namespace kaldi {
+
+/// \addtogroup matrix_group
+/// @{
+
+///  Provides a vector abstraction class.
+///  This class provides a way to work with vectors in kaldi.
+///  It encapsulates basic operations and memory optimizations.
+template<typename Real>
+class VectorBase {
+ public:
+  /// Set vector to all zeros.
+  void SetZero();
+
+  /// Returns true if matrix is all zeros.
+  bool IsZero(Real cutoff = 1.0e-06) const;     // replace magic number
+
+  /// Set all members of a vector to a specified value.
+  void Set(Real f);
+
+  /// Set vector to random normally-distributed noise.
+  void SetRandn();
+
+  /// This function returns a random index into this vector,
+  /// chosen with probability proportional to the corresponding
+  /// element.  Requires that this->Min() >= 0 and this->Sum() > 0.
+  MatrixIndexT RandCategorical() const;
+  
+  /// Returns the  dimension of the vector.
+  inline MatrixIndexT Dim() const { return dim_; }
+
+  /// Returns the size in memory of the vector, in bytes.
+  inline MatrixIndexT SizeInBytes() const { return (dim_*sizeof(Real)); }
+
+  /// Returns a pointer to the start of the vector's data.
+  inline Real* Data() { return data_; }
+
+  /// Returns a pointer to the start of the vector's data (const).
+  inline const Real* Data() const { return data_; }
+
+  /// Indexing  operator (const).
+  inline Real operator() (MatrixIndexT i) const {
+    KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
+                 static_cast<UnsignedMatrixIndexT>(dim_));
+    return *(data_ + i);
+  }
+
+  /// Indexing operator (non-const).
+  inline Real & operator() (MatrixIndexT i) {
+    KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
+                 static_cast<UnsignedMatrixIndexT>(dim_));
+    return *(data_ + i);
+  }
+
+  /** @brief Returns a sub-vector of a vector (a range of elements).
+   *  @param o [in] Origin, 0 < o < Dim()
+   *  @param l [in] Length 0 < l < Dim()-o
+   *  @return A SubVector object that aliases the data of the Vector object.
+   *  See @c SubVector class for details   */
+  SubVector<Real> Range(const MatrixIndexT o, const MatrixIndexT l) {
+    return SubVector<Real>(*this, o, l);
+  }
+
+  /** @brief Returns a const sub-vector of a vector (a range of elements).
+   *  @param o [in] Origin, 0 < o < Dim()
+   *  @param l [in] Length 0 < l < Dim()-o
+   *  @return A SubVector object that aliases the data of the Vector object.
+   *  See @c SubVector class for details   */
+  const SubVector<Real> Range(const MatrixIndexT o,
+                              const MatrixIndexT l) const {
+    return SubVector<Real>(*this, o, l);
+  }
+
+  /// Copy data from another vector (must match own size).
+  void CopyFromVec(const VectorBase<Real> &v);
+
+  /// Copy data from a SpMatrix or TpMatrix (must match own size).
+  template<typename OtherReal>
+  void CopyFromPacked(const PackedMatrix<OtherReal> &M);
+  
+  /// Copy data from another vector of different type (double vs. float)
+  template<typename OtherReal>
+  void CopyFromVec(const VectorBase<OtherReal> &v);
+
+  /// Copy from CuVector.  This is defined in ../cudamatrix/cu-vector.h
+  template<typename OtherReal>
+  void CopyFromVec(const CuVectorBase<OtherReal> &v);
+
+  
+  /// Apply natural log to all elements.  Throw if any element of
+  /// the vector is negative (but doesn't complain about zero; the
+  /// log will be -infinity
+  void ApplyLog();
+
+  /// Apply natural log to another vector and put result in *this.
+  void ApplyLogAndCopy(const VectorBase<Real> &v);
+
+  /// Apply exponential to each value in vector.
+  void ApplyExp();
+
+  /// Take absolute value of each of the elements
+  void ApplyAbs();
+
+  /// Applies floor to all elements. Returns number of elements floored.
+  MatrixIndexT ApplyFloor(Real floor_val);
+
+  /// Applies ceiling to all elements. Returns number of elements changed.
+  MatrixIndexT ApplyCeiling(Real ceil_val);
+  
+  /// Applies floor to all elements. Returns number of elements floored.
+  MatrixIndexT ApplyFloor(const VectorBase<Real> &floor_vec);
+
+  /// Apply soft-max to vector and return normalizer (log sum of exponentials).
+  /// This is the same as: \f$ x(i) = exp(x(i)) / \sum_i exp(x(i)) \f$
+  Real ApplySoftMax();
+
+  /// Sets each element of *this to the tanh of the corresponding element of "src".
+  void Tanh(const VectorBase<Real> &src);
+
+  /// Sets each element of *this to the sigmoid function of the corresponding
+  /// element of "src".
+  void Sigmoid(const VectorBase<Real> &src);
+  
+  /// Take all  elements of vector to a power.
+  void ApplyPow(Real power);
+
+  /// Take the absolute value of all elements of a vector to a power.
+  /// Include the sign of the input element if include_sign == true.
+  /// If power is negative and the input value is zero, the output is set zero.
+  void ApplyPowAbs(Real power, bool include_sign=false);
+  
+  /// Compute the p-th norm of the vector.
+  Real Norm(Real p) const;
+  
+  /// Returns true if ((*this)-other).Norm(2.0) <= tol * (*this).Norm(2.0).
+  bool ApproxEqual(const VectorBase<Real> &other, float tol = 0.01) const;
+  
+  /// Invert all elements.
+  void InvertElements();
+
+  /// Add vector : *this = *this + alpha * rv (with casting between floats and
+  /// doubles)
+  template<typename OtherReal>
+  void AddVec(const Real alpha, const VectorBase<OtherReal> &v);
+
+  /// Add vector : *this = *this + alpha * rv^2  [element-wise squaring].
+  void AddVec2(const Real alpha, const VectorBase<Real> &v);
+
+  /// Add vector : *this = *this + alpha * rv^2  [element-wise squaring],
+  /// with casting between floats and doubles.
+  template<typename OtherReal>
+  void AddVec2(const Real alpha, const VectorBase<OtherReal> &v);
+
+  /// Add matrix times vector : this <-- beta*this + alpha*M*v.
+  /// Calls BLAS GEMV.
+  void AddMatVec(const Real alpha, const MatrixBase<Real> &M,
+                 const MatrixTransposeType trans,  const VectorBase<Real> &v,
+                 const Real beta); // **beta previously defaulted to 0.0**
+
+  /// This is as AddMatVec, except optimized for where v contains a lot
+  /// of zeros.
+  void AddMatSvec(const Real alpha, const MatrixBase<Real> &M,
+                  const MatrixTransposeType trans,  const VectorBase<Real> &v,
+                  const Real beta); // **beta previously defaulted to 0.0**
+
+  
+  /// Add symmetric positive definite matrix times vector:
+  ///  this <-- beta*this + alpha*M*v.   Calls BLAS SPMV.
+  void AddSpVec(const Real alpha, const SpMatrix<Real> &M,
+                const VectorBase<Real> &v, const Real beta);  // **beta previously defaulted to 0.0**
+
+  /// Add triangular matrix times vector: this <-- beta*this + alpha*M*v.
+  /// Works even if rv == *this.
+  void AddTpVec(const Real alpha, const TpMatrix<Real> &M,
+                const MatrixTransposeType trans, const VectorBase<Real> &v,
+                const Real beta);  // **beta previously defaulted to 0.0**
+
+  /// Set each element to y = (x == orig ? changed : x).
+  void ReplaceValue(Real orig, Real changed);
+
+  /// Multipy element-by-element by another vector.
+  void MulElements(const VectorBase<Real> &v);
+  /// Multipy element-by-element by another vector of different type.
+  template<typename OtherReal>
+  void MulElements(const VectorBase<OtherReal> &v);
+
+  /// Divide element-by-element by a vector.
+  void DivElements(const VectorBase<Real> &v);
+  /// Divide element-by-element by a vector of different type.
+  template<typename OtherReal>
+  void DivElements(const VectorBase<OtherReal> &v);
+
+  /// Add a constant to each element of a vector.
+  void Add(Real c);
+
+  /// Add element-by-element product of vectlrs:
+  //  this <-- alpha * v .* r + beta*this .
+  void AddVecVec(Real alpha, const VectorBase<Real> &v,
+                 const VectorBase<Real> &r, Real beta);
+
+  /// Add element-by-element quotient of two vectors.
+  ///  this <---- alpha*v/r + beta*this
+  void AddVecDivVec(Real alpha, const VectorBase<Real> &v,
+                    const VectorBase<Real> &r, Real beta);
+
+  /// Multiplies all elements by this constant.
+  void Scale(Real alpha);
+
+  /// Multiplies this vector by lower-triangular marix:  *this <-- *this *M
+  void MulTp(const TpMatrix<Real> &M, const MatrixTransposeType trans);
+
+  /// If trans == kNoTrans, solves M x = b, where b is the value of *this at input
+  /// and x is the value of *this at output.
+  /// If trans == kTrans, solves M' x = b.
+  /// Does not test for M being singular or near-singular, so test it before
+  /// calling this routine.
+  void Solve(const TpMatrix<Real> &M, const MatrixTransposeType trans);
+
+  /// Performs a row stack of the matrix M
+  void CopyRowsFromMat(const MatrixBase<Real> &M);
+  template<typename OtherReal>
+  void CopyRowsFromMat(const MatrixBase<OtherReal> &M);
+
+  /// The following is implemented in ../cudamatrix/cu-matrix.cc
+  void CopyRowsFromMat(const CuMatrixBase<Real> &M);
+
+  /// Performs a column stack of the matrix M
+  void CopyColsFromMat(const MatrixBase<Real> &M);
+
+  /// Extracts a row of the matrix M.  Could also do this with
+  /// this->Copy(M[row]).
+  void CopyRowFromMat(const MatrixBase<Real> &M, MatrixIndexT row);
+  /// Extracts a row of the matrix M with type conversion.
+  template<typename OtherReal>
+  void CopyRowFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT row);
+
+  /// Extracts a row of the symmetric matrix S.
+  template<typename OtherReal>
+  void CopyRowFromSp(const SpMatrix<OtherReal> &S, MatrixIndexT row);
+  
+  /// Extracts a column of the matrix M.
+  template<typename OtherReal>
+  void CopyColFromMat(const MatrixBase<OtherReal> &M , MatrixIndexT col);
+
+  /// Extracts the diagonal of the matrix M.
+  void CopyDiagFromMat(const MatrixBase<Real> &M);
+
+  /// Extracts the diagonal of a packed matrix M; works for Sp or Tp.
+  void CopyDiagFromPacked(const PackedMatrix<Real> &M);
+
+
+  /// Extracts the diagonal of a symmetric matrix.
+  inline void CopyDiagFromSp(const SpMatrix<Real> &M) { CopyDiagFromPacked(M); }
+
+  /// Extracts the diagonal of a triangular matrix.
+  inline void CopyDiagFromTp(const TpMatrix<Real> &M) { CopyDiagFromPacked(M); }
+
+  /// Returns the maximum value of any element, or -infinity for the empty vector.
+  Real Max() const;
+
+  /// Returns the maximum value of any element, and the associated index.
+  /// Error if vector is empty.
+  Real Max(MatrixIndexT *index) const;
+  
+  /// Returns the minimum value of any element, or +infinity for the empty vector.
+  Real Min() const;
+
+  /// Returns the minimum value of any element, and the associated index.
+  /// Error if vector is empty.
+  Real Min(MatrixIndexT *index) const;
+  
+  /// Returns sum of the elements
+  Real Sum() const;
+
+  /// Returns sum of the logs of the elements.  More efficient than
+  /// just taking log of each.  Will return NaN if any elements are
+  /// negative.
+  Real SumLog() const;
+
+  /// Does *this = alpha * (sum of rows of M) + beta * *this.
+  void AddRowSumMat(Real alpha, const MatrixBase<Real> &M, Real beta = 1.0);
+  
+  /// Does *this = alpha * (sum of columns of M) + beta * *this.
+  void AddColSumMat(Real alpha, const MatrixBase<Real> &M, Real beta = 1.0);
+
+  /// Add the diagonal of a matrix times itself:
+  /// *this = diag(M M^T) +  beta * *this (if trans == kNoTrans), or
+  /// *this = diag(M^T M) +  beta * *this (if trans == kTrans).
+  void AddDiagMat2(Real alpha, const MatrixBase<Real> &M,
+                   MatrixTransposeType trans = kNoTrans, Real beta = 1.0);
+
+  /// Add the diagonal of a matrix product: *this = diag(M N), assuming the
+  /// "trans" arguments are both kNoTrans; for transpose arguments, it behaves
+  /// as you would expect.
+  void AddDiagMatMat(Real alpha, const MatrixBase<Real> &M, MatrixTransposeType transM,
+                     const MatrixBase<Real> &N, MatrixTransposeType transN,
+                     Real beta = 1.0);  
+
+  /// Returns log(sum(exp())) without exp overflow
+  /// If prune > 0.0, ignores terms less than the max - prune.
+  /// [Note: in future, if prune = 0.0, it will take the max.
+  /// For now, use -1 if you don't want it to prune.]
+  Real LogSumExp(Real prune = -1.0) const;
+
+  /// Reads from C++ stream (option to add to existing contents).
+  /// Throws exception on failure
+  void Read(std::istream & in, bool binary, bool add = false);
+
+  /// Writes to C++ stream (option to write in binary).
+  void Write(std::ostream &Out, bool binary) const;
+
+  friend class VectorBase<double>;
+  friend class VectorBase<float>;
+  friend class CuVectorBase<Real>;
+  friend class CuVector<Real>;
+ protected:
+  /// Destructor;  does not deallocate memory, this is handled by child classes.
+  /// This destructor is protected so this object so this object can only be
+  /// deleted via a child.
+  ~VectorBase() {}
+
+  /// Empty initializer, corresponds to vector of zero size.
+  explicit VectorBase(): data_(NULL), dim_(0) {
+    KALDI_ASSERT_IS_FLOATING_TYPE(Real);
+  }
+
+// Took this out since it is not currently used, and it is possible to create
+// objects where the allocated memory is not the same size as dim_ : Arnab
+//  /// Initializer from a pointer and a size; keeps the pointer internally
+//  /// (ownership or non-ownership depends on the child class).
+//  explicit VectorBase(Real* data, MatrixIndexT dim)
+//      : data_(data), dim_(dim) {}
+
+  // Arnab : made this protected since it is unsafe too.
+  /// Load data into the vector: sz must match own size.
+  void CopyFromPtr(const Real* Data, MatrixIndexT sz);
+
+  /// data memory area
+  Real* data_;
+  /// dimension of vector
+  MatrixIndexT dim_;
+  KALDI_DISALLOW_COPY_AND_ASSIGN(VectorBase);
+}; // class VectorBase
+
+/** @brief A class representing a vector.
+ *
+ *  This class provides a way to work with vectors in kaldi.
+ *  It encapsulates basic operations and memory optimizations.  */
+template<typename Real>
+class Vector: public VectorBase<Real> {
+ public:
+  /// Constructor that takes no arguments.  Initializes to empty.
+  Vector(): VectorBase<Real>() {}
+
+  /// Constructor with specific size.  Sets to all-zero by default
+  /// if set_zero == false, memory contents are undefined.
+  explicit Vector(const MatrixIndexT s,
+                  MatrixResizeType resize_type = kSetZero)
+      : VectorBase<Real>() {  Resize(s, resize_type);  }
+
+  /// Copy constructor from CUDA vector
+  /// This is defined in ../cudamatrix/cu-vector.h
+  template<typename OtherReal>
+  explicit Vector(const CuVectorBase<OtherReal> &cu);
+
+  /// Copy constructor.  The need for this is controversial.
+  Vector(const Vector<Real> &v) : VectorBase<Real>()  { //  (cannot be explicit)
+    Resize(v.Dim(), kUndefined);
+    this->CopyFromVec(v);
+  }
+
+  /// Copy-constructor from base-class, needed to copy from SubVector.
+  explicit Vector(const VectorBase<Real> &v) : VectorBase<Real>() {
+    Resize(v.Dim(), kUndefined);
+    this->CopyFromVec(v);
+  }
+
+  /// Type conversion constructor.
+  template<typename OtherReal>
+  explicit Vector(const VectorBase<OtherReal> &v): VectorBase<Real>() {
+    Resize(v.Dim(), kUndefined);
+    this->CopyFromVec(v);
+  }
+
+// Took this out since it is unsafe : Arnab
+//  /// Constructor from a pointer and a size; copies the data to a location
+//  /// it owns.
+//  Vector(const Real* Data, const MatrixIndexT s): VectorBase<Real>() {
+//    Resize(s);
+  //    CopyFromPtr(Data, s);
+//  }
+
+
+  /// Swaps the contents of *this and *other.  Shallow swap.
+  void Swap(Vector<Real> *other);
+
+  /// Destructor.  Deallocates memory.
+  ~Vector() { Destroy(); }
+
+  /// Read function using C++ streams.  Can also add to existing contents
+  /// of matrix.
+  void Read(std::istream & in, bool binary, bool add = false);
+
+  /// Set vector to a specified size (can be zero).
+  /// The value of the new data depends on resize_type:
+  ///   -if kSetZero, the new data will be zero
+  ///   -if kUndefined, the new data will be undefined
+  ///   -if kCopyData, the new data will be the same as the old data in any
+  ///      shared positions, and zero elsewhere.
+  /// This function takes time proportional to the number of data elements.
+  void Resize(MatrixIndexT length, MatrixResizeType resize_type = kSetZero);
+
+  /// Remove one element and shifts later elements down.
+  void RemoveElement(MatrixIndexT i);
+
+  /// Assignment operator, protected so it can only be used by std::vector
+  Vector<Real> &operator = (const Vector<Real> &other) {
+    Resize(other.Dim(), kUndefined);
+    this->CopyFromVec(other);
+    return *this;
+  }
+
+  /// Assignment operator that takes VectorBase.
+  Vector<Real> &operator = (const VectorBase<Real> &other) {
+    Resize(other.Dim(), kUndefined);
+    this->CopyFromVec(other);
+    return *this;
+  }
+ private:
+  /// Init assumes the current contents of the class are invalid (i.e. junk or
+  /// has already been freed), and it sets the vector to newly allocated memory
+  /// with the specified dimension.  dim == 0 is acceptable.  The memory contents
+  /// pointed to by data_ will be undefined.
+  void Init(const MatrixIndexT dim);
+
+  /// Destroy function, called internally.
+  void Destroy();
+
+};
+
+
+/// Represents a non-allocating general vector which can be defined
+/// as a sub-vector of higher-level vector [or as the row of a matrix].
+template<typename Real>
+class SubVector : public VectorBase<Real> {
+ public:
+  /// Constructor from a Vector or SubVector.
+  /// SubVectors are not const-safe and it's very hard to make them
+  /// so for now we just give up.  This function contains const_cast.
+  SubVector(const VectorBase<Real> &t, const MatrixIndexT origin,
+            const MatrixIndexT length) : VectorBase<Real>() {
+    // following assert equiv to origin>=0 && length>=0 &&
+    // origin+length <= rt.dim_
+    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin)+
+                 static_cast<UnsignedMatrixIndexT>(length) <=
+                 static_cast<UnsignedMatrixIndexT>(t.Dim()));
+    VectorBase<Real>::data_ = const_cast<Real*> (t.Data()+origin);
+    VectorBase<Real>::dim_   = length;
+  }
+
+  /// This constructor initializes the vector to point at the contents
+  /// of this packed matrix (SpMatrix or TpMatrix).
+  SubVector(const PackedMatrix<Real> &M) {
+    VectorBase<Real>::data_ = const_cast<Real*> (M.Data());
+    VectorBase<Real>::dim_   = (M.NumRows()*(M.NumRows()+1))/2;
+  }
+  
+  /// Copy constructor
+  SubVector(const SubVector &other) : VectorBase<Real> () {
+    // this copy constructor needed for Range() to work in base class.
+    VectorBase<Real>::data_ = other.data_;
+    VectorBase<Real>::dim_ = other.dim_;
+  }
+
+  /// Constructor from a pointer to memory and a length.  Keeps a pointer
+  /// to the data but does not take ownership (will never delete).
+  SubVector(Real *data, MatrixIndexT length) : VectorBase<Real> () {
+    VectorBase<Real>::data_ = data;
+    VectorBase<Real>::dim_   = length;
+  }
+
+
+  /// This operation does not preserve const-ness, so be careful.
+  SubVector(const MatrixBase<Real> &matrix, MatrixIndexT row) {
+    VectorBase<Real>::data_ = const_cast<Real*>(matrix.RowData(row));
+    VectorBase<Real>::dim_   = matrix.NumCols();
+  }
+
+  ~SubVector() {}  ///< Destructor (does nothing; no pointers are owned here).
+
+ private:
+  /// Disallow assignment operator.
+  SubVector & operator = (const SubVector &other) {}
+};
+
+/// @} end of "addtogroup matrix_group"
+/// \addtogroup matrix_funcs_io
+/// @{
+/// Output to a C++ stream.  Non-binary by default (use Write for
+/// binary output).
+template<typename Real>
+std::ostream & operator << (std::ostream & out, const VectorBase<Real> & v);
+
+/// Input from a C++ stream.  Will automatically read text or
+/// binary data from the stream.
+template<typename Real>
+std::istream & operator >> (std::istream & in, VectorBase<Real> & v);
+
+/// Input from a C++ stream. Will automatically read text or
+/// binary data from the stream.
+template<typename Real>
+std::istream & operator >> (std::istream & in, Vector<Real> & v);
+/// @} end of \addtogroup matrix_funcs_io
+
+/// \addtogroup matrix_funcs_scalar
+/// @{
+
+
+template<typename Real>
+bool ApproxEqual(const VectorBase<Real> &a,
+                 const VectorBase<Real> &b, Real tol = 0.01) {
+  return a.ApproxEqual(b, tol);
+}
+
+template<typename Real>
+inline void AssertEqual(VectorBase<Real> &a, VectorBase<Real> &b,
+                        float tol = 0.01) {
+  KALDI_ASSERT(a.ApproxEqual(b, tol));
+}
+
+
+/// Returns dot product between v1 and v2.
+template<typename Real>
+Real VecVec(const VectorBase<Real> &v1, const VectorBase<Real> &v2);
+
+template<typename Real, typename OtherReal>
+Real VecVec(const VectorBase<Real> &v1, const VectorBase<OtherReal> &v2);
+
+
+/// Returns \f$ v_1^T M v_2  \f$ .
+/// Not as efficient as it could be where v1 == v2.
+template<typename Real>
+Real VecMatVec(const VectorBase<Real> &v1, const MatrixBase<Real> &M,
+               const VectorBase<Real> &v2);
+
+/// @} End of "addtogroup matrix_funcs_scalar"
+
+
+}  // namespace kaldi
+
+// we need to include the implementation
+#include "matrix/kaldi-vector-inl.h"
+
+
+
+#endif  // KALDI_MATRIX_KALDI_VECTOR_H_
+
diff --git a/kaldi_io/src/kaldi/matrix/matrix-common.h b/kaldi_io/src/kaldi/matrix/matrix-common.h
new file mode 100644
index 0000000..d202b2e
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/matrix-common.h
@@ -0,0 +1,100 @@
+// matrix/matrix-common.h
+
+// Copyright 2009-2011  Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_MATRIX_MATRIX_COMMON_H_
+#define KALDI_MATRIX_MATRIX_COMMON_H_
+
+// This file contains some #includes, forward declarations
+// and typedefs that are needed by all the main header
+// files in this directory.
+
+#include "base/kaldi-common.h"
+#include "matrix/kaldi-blas.h"
+
+namespace kaldi {
+typedef enum {
+  kTrans    = CblasTrans,
+  kNoTrans = CblasNoTrans
+} MatrixTransposeType;
+
+typedef enum {
+  kSetZero,
+  kUndefined,
+  kCopyData
+} MatrixResizeType;
+
+typedef enum {
+  kTakeLower,
+  kTakeUpper,
+  kTakeMean,
+  kTakeMeanAndCheck
+} SpCopyType;
+
+template<typename Real> class VectorBase;
+template<typename Real> class Vector;
+template<typename Real> class SubVector;
+template<typename Real> class MatrixBase;
+template<typename Real> class SubMatrix;
+template<typename Real> class Matrix;
+template<typename Real> class SpMatrix;
+template<typename Real> class TpMatrix;
+template<typename Real> class PackedMatrix;
+
+// these are classes that won't be defined in this
+// directory; they're mostly needed for friend declarations.
+template<typename Real> class CuMatrixBase;
+template<typename Real> class CuSubMatrix;
+template<typename Real> class CuMatrix;
+template<typename Real> class CuVectorBase;
+template<typename Real> class CuSubVector;
+template<typename Real> class CuVector;
+template<typename Real> class CuPackedMatrix;
+template<typename Real> class CuSpMatrix;
+template<typename Real> class CuTpMatrix;
+
+class CompressedMatrix;
+
+/// This class provides a way for switching between double and float types.
+template<typename T> class OtherReal { };  // useful in reading+writing routines
+                                           // to switch double and float.
+/// A specialized class for switching from float to double.
+template<> class OtherReal<float> {
+ public:
+  typedef double Real;
+};
+/// A specialized class for switching from double to float.
+template<> class OtherReal<double> {
+ public:
+  typedef float Real;
+};
+
+
+typedef int32 MatrixIndexT;
+typedef int32 SignedMatrixIndexT;
+typedef uint32 UnsignedMatrixIndexT;
+
+// If you want to use size_t for the index type, do as follows instead:
+//typedef size_t MatrixIndexT;
+//typedef ssize_t SignedMatrixIndexT;
+//typedef size_t UnsignedMatrixIndexT;
+
+}
+
+
+
+#endif  // KALDI_MATRIX_MATRIX_COMMON_H_
diff --git a/kaldi_io/src/kaldi/matrix/matrix-functions-inl.h b/kaldi_io/src/kaldi/matrix/matrix-functions-inl.h
new file mode 100644
index 0000000..9fac851
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/matrix-functions-inl.h
@@ -0,0 +1,56 @@
+// matrix/matrix-functions-inl.h
+
+// Copyright 2009-2011 Microsoft Corporation
+//
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+//
+// (*) incorporates, with permission, FFT code from his book
+// "Signal Processing with Lapped Transforms", Artech, 1992.
+
+
+
+#ifndef KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_
+#define KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_
+
+namespace kaldi {
+
+//! ComplexMul implements, inline, the complex multiplication b *= a.
+template<typename Real> inline void ComplexMul(const Real &a_re, const Real &a_im,
+                                            Real *b_re, Real *b_im) {
+  Real tmp_re = (*b_re * a_re) - (*b_im * a_im);
+  *b_im = *b_re * a_im + *b_im * a_re;
+  *b_re = tmp_re;
+}
+
+template<typename Real> inline void ComplexAddProduct(const Real &a_re, const Real &a_im,
+                                                   const Real &b_re, const Real &b_im,
+                                                   Real *c_re, Real *c_im) {
+  *c_re += b_re*a_re - b_im*a_im;
+  *c_im += b_re*a_im + b_im*a_re;
+}
+
+
+template<typename Real> inline void ComplexImExp(Real x, Real *a_re, Real *a_im) {
+  *a_re = std::cos(x);
+  *a_im = std::sin(x);
+}
+
+
+} // end namespace kaldi
+
+
+#endif // KALDI_MATRIX_MATRIX_FUNCTIONS_INL_H_
+
diff --git a/kaldi_io/src/kaldi/matrix/matrix-functions.h b/kaldi_io/src/kaldi/matrix/matrix-functions.h
new file mode 100644
index 0000000..b70ca56
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/matrix-functions.h
@@ -0,0 +1,235 @@
+// matrix/matrix-functions.h
+
+// Copyright 2009-2011  Microsoft Corporation;  Go Vivace Inc.;  Jan Silovsky;
+//                      Yanmin Qian;   1991 Henrique (Rico) Malvar (*)
+//
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+//
+// (*) incorporates, with permission, FFT code from his book
+// "Signal Processing with Lapped Transforms", Artech, 1992.
+
+
+
+#ifndef KALDI_MATRIX_MATRIX_FUNCTIONS_H_
+#define KALDI_MATRIX_MATRIX_FUNCTIONS_H_
+
+#include "matrix/kaldi-vector.h"
+#include "matrix/kaldi-matrix.h"
+
+namespace kaldi {
+
+/// @addtogroup matrix_funcs_misc
+/// @{
+
+/** The function ComplexFft does an Fft on the vector argument v.
+   v is a vector of even dimension, interpreted for both input
+   and output as a vector of complex numbers i.e.
+   \f[ v = ( re_0, im_0, re_1, im_1, ... )    \f]
+   The dimension of v must be a power of 2.
+
+   If "forward == true" this routine does the Discrete Fourier Transform
+   (DFT), i.e.:
+   \f[   vout[m] \leftarrow \sum_{n = 0}^{N-1} vin[i] exp( -2pi m n / N )  \f]
+
+   If "backward" it does the Inverse Discrete Fourier Transform (IDFT)
+   *WITHOUT THE FACTOR 1/N*,
+   i.e.:
+   \f[   vout[m] <-- \sum_{n = 0}^{N-1} vin[i] exp(  2pi m n / N )   \f]
+   [note the sign difference on the 2 pi for the backward one.]
+
+   Note that this is the definition of the FT given in most texts, but
+   it differs from the Numerical Recipes version in which the forward
+   and backward algorithms are flipped.
+
+   Note that you would have to multiply by 1/N after the IDFT to get
+   back to where you started from.  We don't do this because
+   in some contexts, the transform is made symmetric by multiplying
+   by sqrt(N) in both passes.   The user can do this by themselves.
+
+   See also SplitRadixComplexFft, declared in srfft.h, which is more efficient
+   but only works if the length of the input is a power of 2.
+ */
+template<typename Real> void ComplexFft (VectorBase<Real> *v, bool forward, Vector<Real> *tmp_work = NULL);
+
+/// ComplexFt is the same as ComplexFft but it implements the Fourier
+/// transform in an inefficient way.  It is mainly included for testing purposes.
+/// See comment for ComplexFft to describe the input and outputs and what it does.
+template<typename Real> void ComplexFt (const VectorBase<Real> &in,
+                                     VectorBase<Real> *out, bool forward);
+
+/// RealFft is a fourier transform of real inputs.  Internally it uses
+/// ComplexFft.  The input dimension N must be even.  If forward == true,
+/// it transforms from a sequence of N real points to its complex fourier
+/// transform; otherwise it goes in the reverse direction.  If you call it
+/// in the forward and then reverse direction and multiply by 1.0/N, you
+/// will get back the original data.
+/// The interpretation of the complex-FFT data is as follows: the array
+/// is a sequence of complex numbers C_n of length N/2 with (real, im) format,
+/// i.e. [real0, real_{N/2}, real1, im1, real2, im2, real3, im3, ...].
+/// See also SplitRadixRealFft, declared in srfft.h, which is more efficient
+/// but only works if the length of the input is a power of 2.
+
+template<typename Real> void RealFft (VectorBase<Real> *v, bool forward);
+
+
+/// RealFt has the same input and output format as RealFft above, but it is
+/// an inefficient implementation included for testing purposes.
+template<typename Real> void RealFftInefficient (VectorBase<Real> *v, bool forward);
+
+/// ComputeDctMatrix computes a matrix corresponding to the DCT, such that
+/// M * v equals the DCT of vector v.  M must be square at input.
+/// This is the type = III DCT with normalization, corresponding to the
+/// following equations, where x is the signal and X is the DCT:
+/// X_0 = 1/sqrt(2*N) \sum_{n = 0}^{N-1} x_n
+/// X_k = 1/sqrt(N) \sum_{n = 0}^{N-1} x_n cos( \pi/N (n + 1/2) k )
+/// This matrix's transpose is its own inverse, so transposing this
+/// matrix will give the inverse DCT.
+/// Caution: the type III DCT is generally known as the "inverse DCT" (with the
+/// type II being the actual DCT), so this function is somewhatd mis-named.  It
+/// was probably done this way for HTK compatibility.  We don't change it
+/// because it was this way from the start and changing it would affect the
+/// feature generation.
+
+template<typename Real> void ComputeDctMatrix(Matrix<Real> *M);
+
+
+/// ComplexMul implements, inline, the complex multiplication b *= a.
+template<typename Real> inline void ComplexMul(const Real &a_re, const Real &a_im,
+                                            Real *b_re, Real *b_im);
+
+/// ComplexMul implements, inline, the complex operation c += (a * b).
+template<typename Real> inline void ComplexAddProduct(const Real &a_re, const Real &a_im,
+                                                   const Real &b_re, const Real &b_im,
+                                                   Real *c_re, Real *c_im);
+
+
+/// ComplexImExp implements a <-- exp(i x), inline.
+template<typename Real> inline void ComplexImExp(Real x, Real *a_re, Real *a_im);
+
+
+// This class allows you to compute the matrix exponential function
+// B = I + A + 1/2! A^2 + 1/3! A^3 + ...
+// This method is most accurate where the result is of the same order of
+// magnitude as the unit matrix (it will typically not work well when
+// the answer has almost-zero eigenvalues or is close to zero).
+// It also provides a function that allows you do back-propagate the
+// derivative of a scalar function through this calculation.
+// The
+template<typename Real>
+class MatrixExponential {
+ public:
+  MatrixExponential() { }
+
+  void Compute(const MatrixBase<Real> &M, MatrixBase<Real> *X);  // does *X = exp(M)
+
+  // Version for symmetric matrices (it just copies to full matrix).
+  void Compute(const SpMatrix<Real> &M, SpMatrix<Real> *X);  // does *X = exp(M)
+
+  void Backprop(const MatrixBase<Real> &hX, MatrixBase<Real> *hM) const;  // Propagates
+  // the gradient of a scalar function f backwards through this operation, i.e.:
+  // if the parameter dX represents df/dX (with no transpose, so element i, j of dX
+  // is the derivative of f w.r.t. E(i, j)), it sets dM to df/dM, again with no
+  // transpose (of course, only the part thereof that comes through the effect of
+  // A on B).  This applies to the values of A and E that were called most recently
+  // with Compute().
+
+  // Version for symmetric matrices (it just copies to full matrix).
+  void Backprop(const SpMatrix<Real> &hX, SpMatrix<Real> *hM) const;
+  
+ private:
+  void Clear();
+
+  static MatrixIndexT ComputeN(const MatrixBase<Real> &M);
+
+  // This is intended for matrices P with small norms: compute B_0 = exp(P) - I.
+  // Keeps adding terms in the Taylor series till there is no further
+  // change in the result.  Stores some of the powers of A in powers_,
+  // and the number of terms K as K_.
+  void ComputeTaylor(const MatrixBase<Real> &P, MatrixBase<Real> *B0);
+
+  // Backprop through the Taylor-series computation above.
+  // note: hX is \hat{X} in the math; hM is \hat{M} in the math.
+  void BackpropTaylor(const MatrixBase<Real> &hX,
+                      MatrixBase<Real> *hM) const;
+
+  Matrix<Real> P_;  // Equals M * 2^(-N_)
+  std::vector<Matrix<Real> > B_;  // B_[0] = exp(P_) - I,
+                                 //  B_[k] = 2 B_[k-1] + B_[k-1]^2   [k > 0],
+                                 //  ( = exp(P_)^k - I )
+                                 // goes from 0..N_ [size N_+1].
+
+  std::vector<Matrix<Real> > powers_;  // powers (>1) of P_ stored here,
+  // up to all but the last one used in the Taylor expansion (this is the
+  // last one we need in the backprop).  The index is the power minus 2.
+
+  MatrixIndexT N_;  // Power N_ >=0 such that P_ = A * 2^(-N_),
+  // we choose it so that P_ has a sufficiently small norm
+  // that the Taylor series will converge fast.
+};
+
+
+/**
+    ComputePCA does a PCA computation, using either outer products
+    or inner products, whichever is more efficient.  Let D be
+    the dimension of the data points, N be the number of data
+    points, and G be the PCA dimension we want to retain.  We assume
+    G <= N and G <= D.
+
+    @param X [in]  An N x D matrix.  Each row of X is a point x_i.
+    @param U [out] A G x D matrix.  Each row of U is a basis element u_i.
+    @param A [out] An N x D matrix, or NULL.  Each row of A is a set of coefficients
+         in the basis for a point x_i, so A(i, g) is the coefficient of u_i
+         in x_i.
+    @param print_eigs [in] If true, prints out diagnostic information about the
+         eigenvalues.
+    @param exact [in] If true, does the exact computation; if false, does
+         a much faster (but almost exact) computation based on the Lanczos
+         method.
+*/
+
+template<typename Real>
+void ComputePca(const MatrixBase<Real> &X,
+                MatrixBase<Real> *U,
+                MatrixBase<Real> *A,
+                bool print_eigs = false,
+                bool exact = true);
+
+
+
+// This function does: *plus += max(0, a b^T),
+// *minus += max(0, -(a b^T)).
+template<typename Real>
+void AddOuterProductPlusMinus(Real alpha,
+                              const VectorBase<Real> &a,
+                              const VectorBase<Real> &b,
+                              MatrixBase<Real> *plus, 
+                              MatrixBase<Real> *minus);
+
+template<typename Real1, typename Real2>
+inline void AssertSameDim(const MatrixBase<Real1> &mat1, const MatrixBase<Real2> &mat2) {
+  KALDI_ASSERT(mat1.NumRows() == mat2.NumRows()
+               && mat1.NumCols() == mat2.NumCols());
+}
+
+
+/// @} end of "addtogroup matrix_funcs_misc"
+
+} // end namespace kaldi
+
+#include "matrix/matrix-functions-inl.h"
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/matrix/matrix-lib.h b/kaldi_io/src/kaldi/matrix/matrix-lib.h
new file mode 100644
index 0000000..39acec5
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/matrix-lib.h
@@ -0,0 +1,37 @@
+// matrix/matrix-lib.h
+
+// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Haihua Xu
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+// Include everything from this directory.
+// These files include other stuff that we need.
+#ifndef KALDI_MATRIX_MATRIX_LIB_H_
+#define KALDI_MATRIX_MATRIX_LIB_H_
+
+#include "matrix/cblas-wrappers.h"
+#include "base/kaldi-common.h"
+#include "matrix/kaldi-vector.h"
+#include "matrix/kaldi-matrix.h"
+#include "matrix/sp-matrix.h"
+#include "matrix/tp-matrix.h"
+#include "matrix/matrix-functions.h"
+#include "matrix/srfft.h"
+#include "matrix/compressed-matrix.h"
+#include "matrix/optimization.h"
+
+#endif
+
diff --git a/kaldi_io/src/kaldi/matrix/optimization.h b/kaldi_io/src/kaldi/matrix/optimization.h
new file mode 100644
index 0000000..66309ac
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/optimization.h
@@ -0,0 +1,248 @@
+// matrix/optimization.h
+
+// Copyright 2012  Johns Hopkins University (author: Daniel Povey)
+//
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+//
+// (*) incorporates, with permission, FFT code from his book
+// "Signal Processing with Lapped Transforms", Artech, 1992.
+
+
+
+#ifndef KALDI_MATRIX_OPTIMIZATION_H_
+#define KALDI_MATRIX_OPTIMIZATION_H_
+
+#include "matrix/kaldi-vector.h"
+#include "matrix/kaldi-matrix.h"
+
+namespace kaldi {
+
+
+/// @addtogroup matrix_optimization
+/// @{
+
+struct LinearCgdOptions {
+  int32 max_iters;  //  Maximum number of iters (if >= 0).
+  BaseFloat max_error;  // Maximum 2-norm of the residual A x - b (convergence
+                        // test)
+  // Every time the residual 2-norm decreases by this recompute_residual_factor
+  // since the last time it was computed from scratch, recompute it from
+  // scratch.  This helps to keep the computed residual accurate even in the
+  // presence of roundoff.
+  BaseFloat recompute_residual_factor;
+  
+  LinearCgdOptions(): max_iters(-1),
+                      max_error(0.0),
+                      recompute_residual_factor(0.01) { }
+};
+  
+/*
+  This function uses linear conjugate gradient descent to approximately solve
+  the system A x = b.  The value of x at entry corresponds to the initial guess
+  of x.  The algorithm continues until the number of iterations equals b.Dim(),
+  or until the 2-norm of (A x - b) is <= max_error, or until the number of
+  iterations equals max_iter, whichever happens sooner.  It is a requirement
+  that A be positive definite.
+  It returns the number of iterations that were actually executed (this is
+  useful for testing purposes).
+*/
+template<typename Real>
+int32 LinearCgd(const LinearCgdOptions &opts,
+                const SpMatrix<Real> &A, const VectorBase<Real> &b,
+                VectorBase<Real> *x);
+
+
+
+
+
+
+/**
+   This is an implementation of L-BFGS.  It pushes responsibility for
+   determining when to stop, onto the user.  There is no call-back here:
+   everything is done via calls to the class itself (see the example in
+   matrix-lib-test.cc).  This does not implement constrained L-BFGS, but it will
+   handle constrained problems correctly as long as the function approaches
+   +infinity (or -infinity for maximization problems) when it gets close to the
+   bound of the constraint.  In these types of problems, you just let the
+   function value be +infinity for minimization problems, or -infinity for
+   maximization problems, outside these bounds).
+*/
+
+struct LbfgsOptions {
+  bool minimize; // if true, we're minimizing, else maximizing.
+  int m; // m is the number of stored vectors L-BFGS keeps.
+  float first_step_learning_rate; // The very first step of L-BFGS is
+  // like gradient descent.  If you want to configure the size of that step,
+  // you can do it using this variable.
+  float first_step_length; // If this variable is >0.0, it overrides
+  // first_step_learning_rate; on the first step we choose an approximate
+  // Hessian that is the multiple of the identity that would generate this
+  // step-length, or 1.0 if the gradient is zero.
+  float first_step_impr; // If this variable is >0.0, it overrides
+  // first_step_learning_rate; on the first step we choose an approximate
+  // Hessian that is the multiple of the identity that would generate this
+  // amount of objective function improvement (assuming the "real" objf
+  // was linear).
+  float c1; // A constant in Armijo rule = Wolfe condition i)
+  float c2; // A constant in Wolfe condition ii)
+  float d; // An amount > 1.0 (default 2.0) that we initially multiply or
+  // divide the step length by, in the line search.
+  int max_line_search_iters; // after this many iters we restart L-BFGS.
+  int avg_step_length; // number of iters to avg step length over, in
+  // RecentStepLength().
+  
+  LbfgsOptions (bool minimize = true):
+      minimize(minimize),
+      m(10),
+      first_step_learning_rate(1.0),
+      first_step_length(0.0),
+      first_step_impr(0.0),
+      c1(1.0e-04),
+      c2(0.9),
+      d(2.0),
+      max_line_search_iters(50),
+      avg_step_length(4) { }
+};
+
+template<typename Real>
+class OptimizeLbfgs {
+ public:
+  /// Initializer takes the starting value of x.
+  OptimizeLbfgs(const VectorBase<Real> &x,
+                const LbfgsOptions &opts);
+  
+  /// This returns the value of the variable x that has the best objective
+  /// function so far, and the corresponding objective function value if
+  /// requested.  This would typically be called only at the end.
+  const VectorBase<Real>& GetValue(Real *objf_value = NULL) const;
+  
+  /// This returns the value at which the function wants us
+  /// to compute the objective function and gradient.
+  const VectorBase<Real>& GetProposedValue() const { return new_x_; }
+  
+  /// Returns the average magnitude of the last n steps (but not
+  /// more than the number we have stored).  Before we have taken
+  /// any steps, returns +infinity.  Note: if the most recent
+  /// step length was 0, it returns 0, regardless of the other
+  /// step lengths.  This makes it suitable as a convergence test
+  /// (else we'd generate NaN's).
+  Real RecentStepLength() const;
+  
+  /// The user calls this function to provide the class with the
+  /// function and gradient info at the point GetProposedValue().
+  /// If this point is outside the constraints you can set function_value
+  /// to {+infinity,-infinity} for {minimization,maximization} problems.
+  /// In this case the gradient, and also the second derivative (if you call
+  /// the second overloaded version of this function) will be ignored.
+  void DoStep(Real function_value,
+              const VectorBase<Real> &gradient);
+  
+  /// The user can call this version of DoStep() if it is desired to set some
+  /// kind of approximate Hessian on this iteration.  Note: it is a prerequisite
+  /// that diag_approx_2nd_deriv must be strictly positive (minimizing), or
+  /// negative (maximizing).
+  void DoStep(Real function_value,
+              const VectorBase<Real> &gradient,
+              const VectorBase<Real> &diag_approx_2nd_deriv);
+  
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(OptimizeLbfgs);
+
+
+  // The following variable says what stage of the computation we're at.
+  // Refer to Algorithm 7.5 (L-BFGS) of Nodecdal & Wright, "Numerical
+  // Optimization", 2nd edition.
+  // kBeforeStep means we're about to do
+  /// "compute p_k <-- - H_k \delta f_k" (i.e. Algorithm 7.4).
+  // kWithinStep means we're at some point within line search; note
+  // that line search is iterative so we can stay in this state more
+  // than one time on each iteration.
+  enum ComputationState {
+    kBeforeStep,
+    kWithinStep, // This means we're within the step-size computation, and
+    // have not yet done the 1st function evaluation.
+  };
+  
+  inline MatrixIndexT Dim() { return x_.Dim(); }
+  inline MatrixIndexT M() { return opts_.m; }
+  SubVector<Real> Y(MatrixIndexT i) {
+    return SubVector<Real>(data_, (i % M()) * 2); // vector y_i
+  }
+  SubVector<Real> S(MatrixIndexT i) {
+    return SubVector<Real>(data_, (i % M()) * 2 + 1); // vector s_i
+  }
+  // The following are subroutines within DoStep():
+  bool AcceptStep(Real function_value,
+                  const VectorBase<Real> &gradient);
+  void Restart(const VectorBase<Real> &x,
+               Real function_value,
+               const VectorBase<Real> &gradient);
+  void ComputeNewDirection(Real function_value,
+                           const VectorBase<Real> &gradient);
+  void ComputeHifNeeded(const VectorBase<Real> &gradient);
+  void StepSizeIteration(Real function_value,
+                         const VectorBase<Real> &gradient);
+  void RecordStepLength(Real s);
+  
+  
+  LbfgsOptions opts_;
+  SignedMatrixIndexT k_; // Iteration number, starts from zero.  Gets set back to zero
+  // when we restart.
+  
+  ComputationState computation_state_;
+  bool H_was_set_; // True if the user specified H_; if false,
+  // we'll use a heuristic to estimate it.
+
+
+  Vector<Real> x_; // current x.
+  Vector<Real> new_x_; // the x proposed in the line search.
+  Vector<Real> best_x_; // the x with the best objective function so far
+                        // (either the same as x_ or something in the current line search.)
+  Vector<Real> deriv_; // The most recently evaluated derivative-- at x_k.
+  Vector<Real> temp_;
+  Real f_; // The function evaluated at x_k.
+  Real best_f_; // the best objective function so far.
+  Real d_; // a number d > 1.0, but during an iteration we may decrease this, when
+  // we switch between armijo and wolfe failures.
+
+  int num_wolfe_i_failures_; // the num times we decreased step size.
+  int num_wolfe_ii_failures_; // the num times we increased step size.
+  enum { kWolfeI, kWolfeII, kNone } last_failure_type_; // last type of step-search
+  // failure on this iter.
+  
+  Vector<Real> H_; // Current inverse-Hessian estimate.  May be computed by this class itself,
+  // or provided by user using 2nd form of SetGradientInfo().
+  Matrix<Real> data_; // dimension (m*2) x dim.  Even rows store
+  // gradients y_i, odd rows store steps s_i.
+  Vector<Real> rho_; // dimension m; rho_(m) = 1/(y_m^T s_m), Eq. 7.17.
+
+  std::vector<Real> step_lengths_; // The step sizes we took on the last
+  // (up to m) iterations; these are not stored in a rotating buffer but
+  // are shifted by one each time (this is more convenient when we
+  // restart, as we keep this info past restarting).
+  
+
+};
+  
+/// @} 
+
+
+} // end namespace kaldi
+
+
+
+#endif
+
diff --git a/kaldi_io/src/kaldi/matrix/packed-matrix.h b/kaldi_io/src/kaldi/matrix/packed-matrix.h
new file mode 100644
index 0000000..722d932
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/packed-matrix.h
@@ -0,0 +1,197 @@
+// matrix/packed-matrix.h
+
+// Copyright 2009-2013  Ondrej Glembek;  Lukas Burget;  Microsoft Corporation;
+//                      Saarland University;  Yanmin Qian;
+//                      Johns Hopkins University (Author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_MATRIX_PACKED_MATRIX_H_
+#define KALDI_MATRIX_PACKED_MATRIX_H_
+
+#include "matrix/matrix-common.h"
+#include <algorithm>
+
+namespace kaldi {
+
+/// \addtogroup matrix_funcs_io
+// we need to declare the friend << operator here
+template<typename Real>
+std::ostream & operator <<(std::ostream & out, const PackedMatrix<Real>& M);
+
+
+/// \addtogroup matrix_group
+/// @{
+
+/// @brief Packed matrix: base class for triangular and symmetric matrices.
+template<typename Real> class PackedMatrix {
+  friend class CuPackedMatrix<Real>;
+ public:
+  //friend class CuPackedMatrix<Real>;
+
+  PackedMatrix() : data_(NULL), num_rows_(0) {}
+
+  explicit PackedMatrix(MatrixIndexT r, MatrixResizeType resize_type = kSetZero):
+      data_(NULL) {  Resize(r, resize_type);  }
+
+  explicit PackedMatrix(const PackedMatrix<Real> &orig) : data_(NULL) {
+    Resize(orig.num_rows_, kUndefined);
+    CopyFromPacked(orig);
+  }
+
+  template<typename OtherReal>
+  explicit PackedMatrix(const PackedMatrix<OtherReal> &orig) : data_(NULL) {
+    Resize(orig.NumRows(), kUndefined);
+    CopyFromPacked(orig);
+  }
+  
+  void SetZero();  /// < Set to zero
+  void SetUnit();  /// < Set to unit matrix.
+  void SetRandn(); /// < Set to random values of a normal distribution
+
+  Real Trace() const;
+
+  // Needed for inclusion in std::vector
+  PackedMatrix<Real> & operator =(const PackedMatrix<Real> &other) {
+    Resize(other.NumRows());
+    CopyFromPacked(other);
+    return *this;
+  }
+
+  ~PackedMatrix() {
+    Destroy();
+  }
+
+  /// Set packed matrix to a specified size (can be zero).
+  /// The value of the new data depends on resize_type:
+  ///   -if kSetZero, the new data will be zero
+  ///   -if kUndefined, the new data will be undefined
+  ///   -if kCopyData, the new data will be the same as the old data in any
+  ///      shared positions, and zero elsewhere.
+  /// This function takes time proportional to the number of data elements.
+  void Resize(MatrixIndexT nRows, MatrixResizeType resize_type = kSetZero);
+
+  void AddToDiag(const Real r); // Adds r to diaginal
+
+  void ScaleDiag(const Real alpha);  // Scales diagonal by alpha.
+
+  void SetDiag(const Real alpha);  // Sets diagonal to this value.
+
+  template<typename OtherReal>
+  void CopyFromPacked(const PackedMatrix<OtherReal> &orig);
+  
+  /// CopyFromVec just interprets the vector as having the same layout
+  /// as the packed matrix.  Must have the same dimension, i.e.
+  /// orig.Dim() == (NumRows()*(NumRows()+1)) / 2;
+  template<typename OtherReal>
+  void CopyFromVec(const SubVector<OtherReal> &orig);
+  
+  Real* Data() { return data_; }
+  const Real* Data() const { return data_; }
+  inline MatrixIndexT NumRows() const { return num_rows_; }
+  inline MatrixIndexT NumCols() const { return num_rows_; }
+  size_t SizeInBytes() const {
+    size_t nr = static_cast<size_t>(num_rows_);
+    return ((nr * (nr+1)) / 2) * sizeof(Real);
+  }
+
+  //MatrixIndexT Stride() const { return stride_; }
+
+  // This code is duplicated in child classes to avoid extra levels of calls.
+  Real operator() (MatrixIndexT r, MatrixIndexT c) const {
+    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
+                 static_cast<UnsignedMatrixIndexT>(num_rows_) &&
+                 static_cast<UnsignedMatrixIndexT>(c) <
+                 static_cast<UnsignedMatrixIndexT>(num_rows_)
+                 && c <= r);
+    return *(data_ + (r * (r + 1)) / 2 + c);
+  }
+
+  // This code is duplicated in child classes to avoid extra levels of calls.
+  Real &operator() (MatrixIndexT r, MatrixIndexT c) {
+    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
+                 static_cast<UnsignedMatrixIndexT>(num_rows_) &&
+                 static_cast<UnsignedMatrixIndexT>(c) <
+                 static_cast<UnsignedMatrixIndexT>(num_rows_)
+                 && c <= r);
+    return *(data_ + (r * (r + 1)) / 2 + c);
+  }
+
+  Real Max() const {
+    KALDI_ASSERT(num_rows_ > 0);
+    return * (std::max_element(data_, data_ + ((num_rows_*(num_rows_+1))/2) ));
+  }
+
+  Real Min() const {
+    KALDI_ASSERT(num_rows_ > 0);
+    return * (std::min_element(data_, data_ + ((num_rows_*(num_rows_+1))/2) ));
+  }
+
+  void Scale(Real c);
+
+  friend std::ostream & operator << <> (std::ostream & out,
+                                     const PackedMatrix<Real> &m);
+  // Use instead of stream<<*this, if you want to add to existing contents.
+  // Will throw exception on failure.
+  void Read(std::istream &in, bool binary, bool add = false);
+
+  void Write(std::ostream &out, bool binary) const;
+  
+  void Destroy();
+
+  /// Swaps the contents of *this and *other.  Shallow swap.
+  void Swap(PackedMatrix<Real> *other);
+  void Swap(Matrix<Real> *other);
+
+
+ protected:
+  // Will only be called from this class or derived classes.
+  void AddPacked(const Real alpha, const PackedMatrix<Real>& M);
+  Real *data_;
+  MatrixIndexT num_rows_;
+  //MatrixIndexT stride_;
+ private:
+  /// Init assumes the current contents of the class are is invalid (i.e. junk or
+  /// has already been freed), and it sets the matrixd to newly allocated memory
+  /// with the specified dimension.  dim == 0 is acceptable.  The memory contents
+  /// pointed to by data_ will be undefined.
+  void Init(MatrixIndexT dim);
+
+};
+/// @} end "addtogroup matrix_group"
+
+
+/// \addtogroup matrix_funcs_io
+/// @{
+
+template<typename Real>
+std::ostream & operator << (std::ostream & os, const PackedMatrix<Real>& M) {
+  M.Write(os, false);
+  return os;
+}
+
+template<typename Real>
+std::istream & operator >> (std::istream &is, PackedMatrix<Real> &M) {
+  M.Read(is, false);
+  return is;
+}
+
+/// @}
+
+}  // namespace kaldi
+
+#endif
+
diff --git a/kaldi_io/src/kaldi/matrix/sp-matrix-inl.h b/kaldi_io/src/kaldi/matrix/sp-matrix-inl.h
new file mode 100644
index 0000000..1579592
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/sp-matrix-inl.h
@@ -0,0 +1,42 @@
+// matrix/sp-matrix-inl.h
+
+// Copyright 2009-2011  Ondrej Glembek;  Microsoft Corporation;  Haihua Xu
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_MATRIX_SP_MATRIX_INL_H_
+#define KALDI_MATRIX_SP_MATRIX_INL_H_
+
+#include "matrix/tp-matrix.h"
+
+namespace kaldi {
+
+// All the lines in this file seem to be declaring template specializations.
+// These tell the compiler that we'll implement the templated function
+// separately for the different template arguments (float, double).
+
+template<>
+double SolveQuadraticProblem(const SpMatrix<double> &H, const VectorBase<double> &g,
+                             const SolverOptions &opts, VectorBase<double> *x);
+
+template<>
+float SolveQuadraticProblem(const SpMatrix<float> &H, const VectorBase<float> &g,
+                            const SolverOptions &opts, VectorBase<float> *x);
+
+}  // namespace kaldi
+
+
+#endif  // KALDI_MATRIX_SP_MATRIX_INL_H_
diff --git a/kaldi_io/src/kaldi/matrix/sp-matrix.h b/kaldi_io/src/kaldi/matrix/sp-matrix.h
new file mode 100644
index 0000000..209d24a
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/sp-matrix.h
@@ -0,0 +1,524 @@
+// matrix/sp-matrix.h
+
+// Copyright 2009-2011   Ondrej Glembek;  Microsoft Corporation;  Lukas Burget;
+//                       Saarland University;  Ariya Rastrow;  Yanmin Qian;
+//                       Jan Silovsky
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_MATRIX_SP_MATRIX_H_
+#define KALDI_MATRIX_SP_MATRIX_H_
+
+#include <algorithm>
+#include <vector>
+
+#include "matrix/packed-matrix.h"
+
+namespace kaldi {
+
+
+/// \addtogroup matrix_group
+/// @{
+template<typename Real> class SpMatrix;
+
+
+/**
+ * @brief Packed symetric matrix class
+*/
+template<typename Real>
+class SpMatrix : public PackedMatrix<Real> {
+  friend class CuSpMatrix<Real>;
+ public:
+  // so it can use our assignment operator.
+  friend class std::vector<Matrix<Real> >;
+
+  SpMatrix(): PackedMatrix<Real>() {}
+
+  /// Copy constructor from CUDA version of SpMatrix
+  /// This is defined in ../cudamatrix/cu-sp-matrix.h
+  
+  explicit SpMatrix(const CuSpMatrix<Real> &cu);
+ 
+  explicit SpMatrix(MatrixIndexT r, MatrixResizeType resize_type = kSetZero)
+      : PackedMatrix<Real>(r, resize_type) {}
+
+  SpMatrix(const SpMatrix<Real> &orig)
+      : PackedMatrix<Real>(orig) {}
+
+  template<typename OtherReal>
+  explicit SpMatrix(const SpMatrix<OtherReal> &orig)
+      : PackedMatrix<Real>(orig) {}
+
+#ifdef KALDI_PARANOID
+  explicit SpMatrix(const MatrixBase<Real> & orig,
+                    SpCopyType copy_type = kTakeMeanAndCheck)
+      : PackedMatrix<Real>(orig.NumRows(), kUndefined) {
+    CopyFromMat(orig, copy_type);
+  }
+#else
+  explicit SpMatrix(const MatrixBase<Real> & orig,
+                    SpCopyType copy_type = kTakeMean)
+      : PackedMatrix<Real>(orig.NumRows(), kUndefined) {
+    CopyFromMat(orig, copy_type);
+  }
+#endif
+
+  /// Shallow swap.
+  void Swap(SpMatrix *other);
+
+  inline void Resize(MatrixIndexT nRows, MatrixResizeType resize_type = kSetZero) {
+    PackedMatrix<Real>::Resize(nRows, resize_type);
+  }
+
+  void CopyFromSp(const SpMatrix<Real> &other) {
+    PackedMatrix<Real>::CopyFromPacked(other);
+  }
+
+  template<typename OtherReal>
+  void CopyFromSp(const SpMatrix<OtherReal> &other) {
+    PackedMatrix<Real>::CopyFromPacked(other);
+  }
+
+#ifdef KALDI_PARANOID
+  void CopyFromMat(const MatrixBase<Real> &orig,
+                   SpCopyType copy_type = kTakeMeanAndCheck);
+#else  // different default arg if non-paranoid mode.
+  void CopyFromMat(const MatrixBase<Real> &orig,
+                   SpCopyType copy_type = kTakeMean);
+#endif
+
+  inline Real operator() (MatrixIndexT r, MatrixIndexT c) const {
+    // if column is less than row, then swap these as matrix is stored
+    // as upper-triangular...  only allowed for const matrix object.
+    if (static_cast<UnsignedMatrixIndexT>(c) >
+        static_cast<UnsignedMatrixIndexT>(r))
+      std::swap(c, r);
+    // c<=r now so don't have to check c.
+    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
+                 static_cast<UnsignedMatrixIndexT>(this->num_rows_));
+    return *(this->data_ + (r*(r+1)) / 2 + c);
+    // Duplicating code from PackedMatrix.h
+  }
+
+  inline Real &operator() (MatrixIndexT r, MatrixIndexT c) {
+    if (static_cast<UnsignedMatrixIndexT>(c) >
+        static_cast<UnsignedMatrixIndexT>(r))
+      std::swap(c, r);
+    // c<=r now so don't have to check c.
+    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
+                 static_cast<UnsignedMatrixIndexT>(this->num_rows_));
+    return *(this->data_ + (r * (r + 1)) / 2 + c);
+    // Duplicating code from PackedMatrix.h
+  }
+
+  using PackedMatrix<Real>::operator =;
+  using PackedMatrix<Real>::Scale;
+
+  /// matrix inverse.
+  /// if inverse_needed = false, will fill matrix with garbage.
+  /// (only useful if logdet wanted).
+  void Invert(Real *logdet = NULL, Real *det_sign= NULL,
+              bool inverse_needed = true);
+
+  // Below routine does inversion in double precision,
+  // even for single-precision object.
+  void InvertDouble(Real *logdet = NULL, Real *det_sign = NULL,
+                    bool inverse_needed = true);
+
+  /// Returns maximum ratio of singular values.
+  inline Real Cond() const {
+    Matrix<Real> tmp(*this);
+    return tmp.Cond();
+  }
+
+  /// Takes matrix to a fraction power via Svd.
+  /// Will throw exception if matrix is not positive semidefinite
+  /// (to within a tolerance)
+  void ApplyPow(Real exponent);
+
+  /// This is the version of SVD that we implement for symmetric positive
+  /// definite matrices.  This exists for historical reasons; right now its
+  /// internal implementation is the same as Eig().  It computes the eigenvalue
+  /// decomposition (*this) = P * diag(s) * P^T with P orthogonal.  Will throw
+  /// exception if input is not positive semidefinite to within a tolerance.
+  void SymPosSemiDefEig(VectorBase<Real> *s, MatrixBase<Real> *P,
+                        Real tolerance = 0.001) const;
+
+  /// Solves the symmetric eigenvalue problem: at end we should have (*this) = P
+  /// * diag(s) * P^T.  We solve the problem using the symmetric QR method.
+  /// P may be NULL.
+  /// Implemented in qr.cc.
+  /// If you need the eigenvalues sorted, the function SortSvd declared in
+  /// kaldi-matrix is suitable.
+  void Eig(VectorBase<Real> *s, MatrixBase<Real> *P = NULL) const;
+  
+  /// This function gives you, approximately, the largest eigenvalues of the
+  /// symmetric matrix and the corresponding eigenvectors.  (largest meaning,
+  /// further from zero).  It does this by doing a SVD within the Krylov
+  /// subspace generated by this matrix and a random vector.  This is
+  /// a form of the Lanczos method with complete reorthogonalization, followed
+  /// by SVD within a smaller dimension ("lanczos_dim").
+  ///
+  /// If *this is m by m, s should be of dimension n and P should be of
+  /// dimension m by n, with n <= m.  The *columns* of P are the approximate
+  /// eigenvectors; P * diag(s) * P^T would be a low-rank reconstruction of
+  /// *this.  The columns of P will be orthogonal, and the elements of s will be
+  /// the eigenvalues of *this projected into that subspace, but beyond that
+  /// there are no exact guarantees.  (This is because the convergence of this
+  /// method is statistical).  Note: it only makes sense to use this
+  /// method if you are in very high dimension and n is substantially smaller
+  /// than m: for example, if you want the 100 top eigenvalues of a 10k by 10k
+  /// matrix.  This function calls Rand() to initialize the lanczos
+  /// iterations and also for restarting.
+  /// If lanczos_dim is zero, it will default to the greater of:
+  /// s->Dim() + 50 or s->Dim() + s->Dim()/2, but not more than this->Dim().
+  /// If lanczos_dim == this->Dim(), you might as well just call the function
+  /// Eig() since the result will be the same, and Eig() would be faster; the
+  /// whole point of this function is to reduce the dimension of the SVD
+  /// computation.
+  void TopEigs(VectorBase<Real> *s, MatrixBase<Real> *P,
+               MatrixIndexT lanczos_dim = 0) const;
+
+
+  
+  /// Takes log of the matrix (does eigenvalue decomposition then takes
+  /// log of eigenvalues and reconstructs).  Will throw of not +ve definite.
+  void Log();
+
+
+  // Takes exponential of the matrix (equivalent to doing eigenvalue
+  // decomposition then taking exp of eigenvalues and reconstructing).
+  void Exp();
+
+  /// Returns the maximum of the absolute values of any of the
+  /// eigenvalues.
+  Real MaxAbsEig() const;
+
+  void PrintEigs(const char *name) {
+    Vector<Real> s((*this).NumRows());
+    Matrix<Real> P((*this).NumRows(), (*this).NumCols());
+    SymPosSemiDefEig(&s, &P);
+    KALDI_LOG << "PrintEigs: " << name << ": " << s;
+  }
+
+  bool IsPosDef() const;  // returns true if Cholesky succeeds.
+  void AddSp(const Real alpha, const SpMatrix<Real> &Ma) {
+    this->AddPacked(alpha, Ma);
+  }
+
+  /// Computes log determinant but only for +ve-def matrices
+  /// (it uses Cholesky).
+  /// If matrix is not +ve-def, it will throw an exception
+  /// was LogPDDeterminant()
+  Real LogPosDefDet() const;
+
+  Real LogDet(Real *det_sign = NULL) const;
+
+  /// rank-one update, this <-- this + alpha v v'
+  template<typename OtherReal>
+  void AddVec2(const Real alpha, const VectorBase<OtherReal> &v);
+
+  /// rank-two update, this <-- this + alpha (v w' + w v').
+  void AddVecVec(const Real alpha, const VectorBase<Real> &v,
+                 const VectorBase<Real> &w);
+
+  /// Does *this = beta * *thi + alpha * diag(v) * S * diag(v)
+  void AddVec2Sp(const Real alpha, const VectorBase<Real> &v,
+                 const SpMatrix<Real> &S, const Real beta);
+  
+  /// diagonal update, this <-- this + diag(v)
+  template<typename OtherReal>
+  void AddDiagVec(const Real alpha, const VectorBase<OtherReal> &v);
+
+  /// rank-N update:
+  /// if (transM == kNoTrans)
+  /// (*this) = beta*(*this) + alpha * M * M^T,
+  /// or  (if transM == kTrans)
+  ///  (*this) = beta*(*this) + alpha * M^T * M
+  /// Note: beta used to default to 0.0.
+  void AddMat2(const Real alpha, const MatrixBase<Real> &M,
+               MatrixTransposeType transM, const Real beta);
+
+  /// Extension of rank-N update:
+  /// this <-- beta*this  +  alpha * M * A * M^T.
+  /// (*this) and A are allowed to be the same.
+  /// If transM == kTrans, then we do it as M^T * A * M.
+  void AddMat2Sp(const Real alpha, const MatrixBase<Real> &M,
+                 MatrixTransposeType transM, const SpMatrix<Real> &A,
+                 const Real beta = 0.0);
+
+  /// This is a version of AddMat2Sp specialized for when M is fairly sparse.
+  /// This was required for making the raw-fMLLR code efficient.
+  void AddSmat2Sp(const Real alpha, const MatrixBase<Real> &M,
+                  MatrixTransposeType transM, const SpMatrix<Real> &A,
+                  const Real beta = 0.0);
+
+  /// The following function does:
+  /// this <-- beta*this  +  alpha * T * A * T^T.
+  /// (*this) and A are allowed to be the same.
+  /// If transM == kTrans, then we do it as alpha * T^T * A * T.
+  /// Currently it just calls AddMat2Sp, but if needed we
+  /// can implement it more efficiently.
+  void AddTp2Sp(const Real alpha, const TpMatrix<Real> &T,
+                MatrixTransposeType transM, const SpMatrix<Real> &A,
+                const Real beta = 0.0);
+
+  /// The following function does:
+  /// this <-- beta*this  +  alpha * T * T^T.
+  /// (*this) and A are allowed to be the same.
+  /// If transM == kTrans, then we do it as alpha * T^T *  T
+  /// Currently it just calls AddMat2, but if needed we
+  /// can implement it more efficiently.
+  void AddTp2(const Real alpha, const TpMatrix<Real> &T,
+              MatrixTransposeType transM, const Real beta = 0.0);
+
+  /// Extension of rank-N update:
+  /// this <-- beta*this + alpha * M * diag(v) * M^T.
+  /// if transM == kTrans, then
+  /// this <-- beta*this + alpha * M^T * diag(v) * M.
+  void AddMat2Vec(const Real alpha, const MatrixBase<Real> &M,
+                  MatrixTransposeType transM, const VectorBase<Real> &v,
+                  const Real beta = 0.0);
+
+
+  ///  Floors this symmetric matrix to the matrix
+  /// alpha * Floor, where the matrix Floor is positive
+  /// definite.
+  /// It is floored in the sense that after flooring,
+  ///  x^T (*this) x  >= x^T (alpha*Floor) x.
+  /// This is accomplished using an Svd.  It will crash
+  /// if Floor is not positive definite. Returns the number of
+  /// elements that were floored.
+  int ApplyFloor(const SpMatrix<Real> &Floor, Real alpha = 1.0,
+                 bool verbose = false);
+
+  /// Floor: Given a positive semidefinite matrix, floors the eigenvalues
+  /// to the specified quantity.  A previous version of this function had
+  /// a tolerance which is now no longer needed since we have code to
+  /// do the symmetric eigenvalue decomposition and no longer use the SVD
+  /// code for that purose.
+  int ApplyFloor(Real floor);
+  
+  bool IsDiagonal(Real cutoff = 1.0e-05) const;
+  bool IsUnit(Real cutoff = 1.0e-05) const;
+  bool IsZero(Real cutoff = 1.0e-05) const;
+  bool IsTridiagonal(Real cutoff = 1.0e-05) const;
+
+  /// sqrt of sum of square elements.
+  Real FrobeniusNorm() const;
+
+  /// Returns true if ((*this)-other).FrobeniusNorm() <=
+  ///   tol*(*this).FrobeniusNorma()
+  bool ApproxEqual(const SpMatrix<Real> &other, float tol = 0.01) const;
+
+  // LimitCond:
+  // Limits the condition of symmetric positive semidefinite matrix to
+  // a specified value
+  // by flooring all eigenvalues to a positive number which is some multiple
+  // of the largest one (or zero if there are no positive eigenvalues).
+  // Takes the condition number we are willing to accept, and floors
+  // eigenvalues to the largest eigenvalue divided by this.
+  //  Returns #eigs floored or already equal to the floor. 
+  // Throws exception if input is not positive definite.
+  // returns #floored.
+  MatrixIndexT LimitCond(Real maxCond = 1.0e+5, bool invert = false);
+
+  // as LimitCond but all done in double precision. // returns #floored.
+  MatrixIndexT LimitCondDouble(Real maxCond = 1.0e+5, bool invert = false) {
+    SpMatrix<double> dmat(*this);
+    MatrixIndexT ans = dmat.LimitCond(maxCond, invert);
+    (*this).CopyFromSp(dmat);
+    return ans;
+  }
+  Real Trace() const;
+
+  /// Tridiagonalize the matrix with an orthogonal transformation.  If
+  /// *this starts as S, produce T (and Q, if non-NULL) such that
+  /// T = Q A Q^T, i.e. S = Q^T T Q.  Caution: this is the other way
+  /// round from most authors (it's more efficient in row-major indexing).
+  void Tridiagonalize(MatrixBase<Real> *Q);
+
+  /// The symmetric QR algorithm.  This will mostly be useful in internal code.
+  /// Typically, you will call this after Tridiagonalize(), on the same object.
+  /// When called, *this (call it A at this point) must be tridiagonal; at exit,
+  /// *this will be a diagonal matrix D that is similar to A via orthogonal
+  /// transformations.  This algorithm right-multiplies Q by orthogonal
+  /// transformations.  It turns *this from a tridiagonal into a diagonal matrix
+  /// while maintaining that (Q *this Q^T) has the same value at entry and exit.
+  /// At entry Q should probably be either NULL or orthogonal, but we don't check
+  /// this.
+  void Qr(MatrixBase<Real> *Q);
+  
+ private:
+ void EigInternal(VectorBase<Real> *s, MatrixBase<Real> *P,
+                   Real tolerance, int recurse) const;
+};
+
+/// @} end of "addtogroup matrix_group"
+
+/// \addtogroup matrix_funcs_scalar
+/// @{
+
+
+/// Returns tr(A B).
+float TraceSpSp(const SpMatrix<float> &A, const SpMatrix<float> &B);
+double TraceSpSp(const SpMatrix<double> &A, const SpMatrix<double> &B);
+
+
+template<typename Real>
+inline bool ApproxEqual(const SpMatrix<Real> &A,
+                        const SpMatrix<Real> &B, Real tol = 0.01) {
+  return  A.ApproxEqual(B, tol);
+}
+
+template<typename Real>
+inline void AssertEqual(const SpMatrix<Real> &A,
+                        const SpMatrix<Real> &B, Real tol = 0.01) {
+  KALDI_ASSERT(ApproxEqual(A, B, tol));
+}
+
+
+
+/// Returns tr(A B).
+template<typename Real, typename OtherReal>
+Real TraceSpSp(const SpMatrix<Real> &A, const SpMatrix<OtherReal> &B);
+
+
+
+// TraceSpSpLower is the same as Trace(A B) except the lower-diagonal elements
+// are counted only once not twice as they should be.  It is useful in certain
+// optimizations.
+template<typename Real>
+Real TraceSpSpLower(const SpMatrix<Real> &A, const SpMatrix<Real> &B);
+
+
+/// Returns tr(A B).
+/// No option to transpose B because would make no difference.
+template<typename Real>
+Real TraceSpMat(const SpMatrix<Real> &A, const MatrixBase<Real> &B);
+
+/// Returns tr(A B C)
+/// (A and C may be transposed as specified by transA and transC).
+template<typename Real>
+Real TraceMatSpMat(const MatrixBase<Real> &A, MatrixTransposeType transA,
+                   const SpMatrix<Real> &B, const MatrixBase<Real> &C,
+                   MatrixTransposeType transC);
+
+/// Returns tr (A B C D)
+/// (A and C may be transposed as specified by transA and transB).
+template<typename Real>
+Real TraceMatSpMatSp(const MatrixBase<Real> &A, MatrixTransposeType transA,
+                     const SpMatrix<Real> &B, const MatrixBase<Real> &C,
+                     MatrixTransposeType transC, const SpMatrix<Real> &D);
+
+/** Computes v1^T * M * v2.  Not as efficient as it could be where v1 == v2
+ * (but no suitable blas routines available).
+ */
+
+/// Returns \f$ v_1^T M v_2 \f$
+/// Not as efficient as it could be where v1 == v2.
+template<typename Real>
+Real VecSpVec(const VectorBase<Real> &v1, const SpMatrix<Real> &M,
+               const VectorBase<Real> &v2);
+
+
+/// @} \addtogroup matrix_funcs_scalar
+
+/// \addtogroup matrix_funcs_misc
+/// @{
+
+
+/// This class describes the options for maximizing various quadratic objective
+/// functions.  It's mostly as described in the SGMM paper "the subspace
+/// Gaussian mixture model -- a structured model for speech recognition", but
+/// the diagonal_precondition option is newly added, to handle problems where
+/// different dimensions have very different scaling (we recommend to use the
+/// option but it's set false for back compatibility).
+struct SolverOptions {
+  BaseFloat K; // maximum condition number
+  BaseFloat eps; 
+  std::string name;
+  bool optimize_delta;
+  bool diagonal_precondition;
+  bool print_debug_output;
+  explicit SolverOptions(const std::string &name):
+      K(1.0e+4), eps(1.0e-40), name(name),
+      optimize_delta(true), diagonal_precondition(false),
+      print_debug_output(true) { }
+  SolverOptions(): K(1.0e+4), eps(1.0e-40), name("[unknown]"),
+                   optimize_delta(true), diagonal_precondition(false),
+                   print_debug_output(true) { }
+  void Check() const;
+};
+
+
+/// Maximizes the auxiliary function
+/// \f[    Q(x) = x.g - 0.5 x^T H x     \f]
+/// using a numerically stable method. Like a numerically stable version of
+/// \f$  x := Q^{-1} g.    \f$
+/// Assumes H positive semidefinite.
+/// Returns the objective-function change.
+
+template<typename Real>
+Real SolveQuadraticProblem(const SpMatrix<Real> &H,
+                           const VectorBase<Real> &g,
+                           const SolverOptions &opts,
+                           VectorBase<Real> *x);
+                           
+
+
+/// Maximizes the auxiliary function :
+/// \f[   Q(x) = tr(M^T P Y) - 0.5 tr(P M Q M^T)        \f]
+/// Like a numerically stable version of  \f$  M := Y Q^{-1}   \f$.
+/// Assumes Q and P positive semidefinite, and matrix dimensions match
+/// enough to make expressions meaningful.
+/// This is mostly as described in the SGMM paper "the subspace Gaussian mixture
+/// model -- a structured model for speech recognition", but the
+/// diagonal_precondition option is newly added, to handle problems
+/// where different dimensions have very different scaling (we recommend to use
+/// the option but it's set false for back compatibility).
+template<typename Real>
+Real SolveQuadraticMatrixProblem(const SpMatrix<Real> &Q,
+                                 const MatrixBase<Real> &Y,
+                                 const SpMatrix<Real> &P,
+                                 const SolverOptions &opts,
+                                 MatrixBase<Real> *M);
+
+/// Maximizes the auxiliary function :
+/// \f[   Q(M) =  tr(M^T G) -0.5 tr(P_1 M Q_1 M^T) -0.5 tr(P_2 M Q_2 M^T).   \f]
+/// Encountered in matrix update with a prior. We also apply a limit on the
+/// condition but it should be less frequently necessary, and can be set larger.
+template<typename Real>
+Real SolveDoubleQuadraticMatrixProblem(const MatrixBase<Real> &G,
+                                       const SpMatrix<Real> &P1,
+                                       const SpMatrix<Real> &P2,
+                                       const SpMatrix<Real> &Q1,
+                                       const SpMatrix<Real> &Q2,
+                                       const SolverOptions &opts,
+                                       MatrixBase<Real> *M);
+
+
+/// @} End of "addtogroup matrix_funcs_misc"
+
+}  // namespace kaldi
+
+
+// Including the implementation (now actually just includes some
+// template specializations).
+#include "matrix/sp-matrix-inl.h"
+
+
+#endif  // KALDI_MATRIX_SP_MATRIX_H_
+
diff --git a/kaldi_io/src/kaldi/matrix/srfft.h b/kaldi_io/src/kaldi/matrix/srfft.h
new file mode 100644
index 0000000..c0d36af
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/srfft.h
@@ -0,0 +1,132 @@
+// matrix/srfft.h
+
+// Copyright 2009-2011  Microsoft Corporation;  Go Vivace Inc.
+//                2014  Daniel Povey
+//
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+//
+// This file includes a modified version of code originally published in Malvar,
+// H., "Signal processing with lapped transforms, " Artech House, Inc., 1992.  The
+// current copyright holder of the original code, Henrique S. Malvar, has given
+// his permission for the release of this modified version under the Apache
+// License v2.0.
+
+#ifndef KALDI_MATRIX_SRFFT_H_
+#define KALDI_MATRIX_SRFFT_H_
+
+#include "matrix/kaldi-vector.h"
+#include "matrix/kaldi-matrix.h"
+
+namespace kaldi {
+
+/// @addtogroup matrix_funcs_misc
+/// @{
+
+
+// This class is based on code by Henrique (Rico) Malvar, from his book
+// "Signal Processing with Lapped Transforms" (1992).  Copied with
+// permission, optimized by Go Vivace Inc., and converted into C++ by
+// Microsoft Corporation
+// This is a more efficient way of doing the complex FFT than ComplexFft
+// (declared in matrix-functios.h), but it only works for powers of 2.
+// Note: in multi-threaded code, you would need to have one of these objects per
+// thread, because multiple calls to Compute in parallel would not work.
+template<typename Real>
+class SplitRadixComplexFft {
+ public:
+  typedef MatrixIndexT Integer;
+
+  // N is the number of complex points (must be a power of two, or this
+  // will crash).  Note that the constructor does some work so it's best to
+  // initialize the object once and do the computation many times.
+  SplitRadixComplexFft(Integer N);
+
+  // Does the FFT computation, given pointers to the real and
+  // imaginary parts.  If "forward", do the forward FFT; else
+  // do the inverse FFT (without the 1/N factor).
+  // xr and xi are pointers to zero-based arrays of size N,
+  // containing the real and imaginary parts
+  // respectively.
+  void Compute(Real *xr, Real *xi, bool forward) const;
+
+  // This version of Compute takes a single array of size N*2,
+  // containing [ r0 im0 r1 im1 ... ].  Otherwise its behavior is  the
+  // same as the version above.
+  void Compute(Real *x, bool forward);
+
+
+  // This version of Compute is const; it operates on an array of size N*2
+  // containing [ r0 im0 r1 im1 ... ], but it uses the argument "temp_buffer" as
+  // temporary storage instead of a class-member variable.  It will allocate it if
+  // needed.
+  void Compute(Real *x, bool forward, std::vector<Real> *temp_buffer) const;
+
+  ~SplitRadixComplexFft();
+
+ protected:
+  // temp_buffer_ is allocated only if someone calls Compute with only one Real*
+  // argument and we need a temporary buffer while creating interleaved data.
+  std::vector<Real> temp_buffer_;
+ private:
+  void ComputeTables();
+  void ComputeRecursive(Real *xr, Real *xi, Integer logn) const;
+  void BitReversePermute(Real *x, Integer logn) const;
+
+  Integer N_;
+  Integer logn_;  // log(N)
+
+  Integer *brseed_;
+  // brseed is Evans' seed table, ref:  (Ref: D. M. W.
+  // Evans, "An improved digit-reversal permutation algorithm ...",
+  // IEEE Trans. ASSP, Aug. 1987, pp. 1120-1125).
+  Real **tab_;       // Tables of butterfly coefficients.
+
+  KALDI_DISALLOW_COPY_AND_ASSIGN(SplitRadixComplexFft);
+};
+
+template<typename Real>
+class SplitRadixRealFft: private SplitRadixComplexFft<Real> {
+ public:
+  SplitRadixRealFft(MatrixIndexT N):  // will fail unless N>=4 and N is a power of 2.
+      SplitRadixComplexFft<Real> (N/2), N_(N) { }
+  
+  /// If forward == true, this function transforms from a sequence of N real points to its complex fourier
+  /// transform; otherwise it goes in the reverse direction.  If you call it
+  /// in the forward and then reverse direction and multiply by 1.0/N, you
+  /// will get back the original data.
+  /// The interpretation of the complex-FFT data is as follows: the array
+  /// is a sequence of complex numbers C_n of length N/2 with (real, im) format,
+  /// i.e. [real0, real_{N/2}, real1, im1, real2, im2, real3, im3, ...].
+  void Compute(Real *x, bool forward);
+
+
+  /// This is as the other Compute() function, but it is a const version that
+  /// uses a user-supplied buffer.
+  void Compute(Real *x, bool forward, std::vector<Real> *temp_buffer) const;
+
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(SplitRadixRealFft);  
+  int N_;
+};
+
+
+/// @} end of "addtogroup matrix_funcs_misc"
+
+} // end namespace kaldi
+
+
+#endif
+
diff --git a/kaldi_io/src/kaldi/matrix/tp-matrix.h b/kaldi_io/src/kaldi/matrix/tp-matrix.h
new file mode 100644
index 0000000..f43e86c
--- /dev/null
+++ b/kaldi_io/src/kaldi/matrix/tp-matrix.h
@@ -0,0 +1,131 @@
+// matrix/tp-matrix.h
+
+// Copyright 2009-2011  Ondrej Glembek;  Lukas Burget;  Microsoft Corporation;
+//                      Saarland University;  Yanmin Qian;   Haihua Xu
+//                2013  Johns Hopkins Universith (author: Daniel Povey)
+
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_MATRIX_TP_MATRIX_H_
+#define KALDI_MATRIX_TP_MATRIX_H_
+
+
+#include "matrix/packed-matrix.h"
+
+namespace kaldi {
+/// \addtogroup matrix_group
+/// @{
+
+template<typename Real> class TpMatrix;
+
+/// @brief Packed symetric matrix class
+template<typename Real>
+class TpMatrix : public PackedMatrix<Real> {
+  friend class CuTpMatrix<float>;
+  friend class CuTpMatrix<double>;
+ public:
+  TpMatrix() : PackedMatrix<Real>() {}
+  explicit TpMatrix(MatrixIndexT r, MatrixResizeType resize_type = kSetZero)
+      : PackedMatrix<Real>(r, resize_type) {}
+  TpMatrix(const TpMatrix<Real>& orig) : PackedMatrix<Real>(orig) {}
+
+  /// Copy constructor from CUDA TpMatrix
+  /// This is defined in ../cudamatrix/cu-tp-matrix.cc
+  explicit TpMatrix(const CuTpMatrix<Real> &cu);
+  
+  
+  template<typename OtherReal> explicit TpMatrix(const TpMatrix<OtherReal>& orig)
+      : PackedMatrix<Real>(orig) {}
+  
+  Real operator() (MatrixIndexT r, MatrixIndexT c) const {
+    if (static_cast<UnsignedMatrixIndexT>(c) >
+        static_cast<UnsignedMatrixIndexT>(r)) {
+      KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(c) <
+                   static_cast<UnsignedMatrixIndexT>(this->num_rows_));
+      return 0;
+    }
+    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
+                 static_cast<UnsignedMatrixIndexT>(this->num_rows_));
+    // c<=r now so don't have to check c.
+    return *(this->data_ + (r*(r+1)) / 2 + c);
+    // Duplicating code from PackedMatrix.h
+  }
+
+  Real &operator() (MatrixIndexT r, MatrixIndexT c) {
+    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(r) <
+                 static_cast<UnsignedMatrixIndexT>(this->num_rows_));
+    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(c) <=
+                 static_cast<UnsignedMatrixIndexT>(r) &&
+                 "you cannot access the upper triangle of TpMatrix using "
+                 "a non-const matrix object.");
+    return *(this->data_ + (r*(r+1)) / 2 + c);
+    // Duplicating code from PackedMatrix.h
+  }
+  // Note: Cholesky may throw std::runtime_error
+  void Cholesky(const SpMatrix<Real>& orig);
+  
+  void Invert();
+
+  // Inverts in double precision.
+  void InvertDouble() {
+    TpMatrix<double> dmat(*this);
+    dmat.Invert();
+    (*this).CopyFromTp(dmat);
+  }
+
+  /// Shallow swap
+  void Swap(TpMatrix<Real> *other);
+
+  /// Returns the determinant of the matrix (product of diagonals)
+  Real Determinant();
+
+  /// CopyFromMat copies the lower triangle of M into *this
+  /// (or the upper triangle, if Trans == kTrans).
+  void CopyFromMat(const MatrixBase<Real> &M,
+                   MatrixTransposeType Trans = kNoTrans);
+
+  /// This is implemented in ../cudamatrix/cu-tp-matrix.cc
+  void CopyFromMat(const CuTpMatrix<Real> &other);
+  
+  /// CopyFromTp copies another triangular matrix into this one.
+  void CopyFromTp(const TpMatrix<Real> &other) {
+    PackedMatrix<Real>::CopyFromPacked(other);
+  }
+
+  template<typename OtherReal> void CopyFromTp(const TpMatrix<OtherReal> &other) {
+    PackedMatrix<Real>::CopyFromPacked(other);
+  }
+
+  /// AddTp does *this += alpha * M.
+  void AddTp(const Real alpha, const TpMatrix<Real> &M) {
+    this->AddPacked(alpha, M);
+  }
+
+  using PackedMatrix<Real>::operator =;
+  using PackedMatrix<Real>::Scale;
+
+  void Resize(MatrixIndexT nRows, MatrixResizeType resize_type = kSetZero) {
+    PackedMatrix<Real>::Resize(nRows, resize_type);
+  }
+};
+
+/// @} end of "addtogroup matrix_group".
+
+}  // namespace kaldi
+
+
+#endif
+
diff --git a/kaldi_io/src/kaldi/tree/build-tree-questions.h b/kaldi_io/src/kaldi/tree/build-tree-questions.h
new file mode 100644
index 0000000..a6bcfdd
--- /dev/null
+++ b/kaldi_io/src/kaldi/tree/build-tree-questions.h
@@ -0,0 +1,133 @@
+// tree/build-tree-questions.h
+
+// Copyright 2009-2011  Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_TREE_BUILD_TREE_QUESTIONS_H_
+#define KALDI_TREE_BUILD_TREE_QUESTIONS_H_
+
+#include "util/stl-utils.h"
+#include "tree/context-dep.h"
+
+namespace kaldi {
+
+
+/// \addtogroup tree_group
+/// @{
+/// Typedef for statistics to build trees.
+typedef std::vector<std::pair<EventType, Clusterable*> > BuildTreeStatsType;
+
+/// Typedef used when we get "all keys" from a set of stats-- used in specifying
+/// which kinds of questions to ask.
+typedef enum { kAllKeysInsistIdentical, kAllKeysIntersection, kAllKeysUnion } AllKeysType;
+
+/// @}
+
+/// \defgroup tree_group_questions Question sets for decision-tree clustering
+///  See \ref tree_internals (and specifically \ref treei_func_questions) for context.
+/// \ingroup tree_group
+/// @{
+
+/// QuestionsForKey is a class used to define the questions for a key,
+/// and also options that allow us to refine the question during tree-building
+/// (i.e. make a question specific to the location in the tree).
+/// The Questions class handles aggregating these options for a set
+/// of different keys.
+struct QuestionsForKey {  // Configuration class associated with a particular key
+  // (of type EventKeyType).  It also contains the questions themselves.
+  std::vector<std::vector<EventValueType> > initial_questions;
+  RefineClustersOptions refine_opts;  // if refine_opts.max_iter == 0,
+  // we just pick from the initial questions.
+  
+  QuestionsForKey(int32 num_iters = 5): refine_opts(num_iters, 2) {
+    // refine_cfg with 5 iters and top-n = 2 (this is no restriction because
+    // RefineClusters called with 2 clusters; would get set to that anyway as
+    // it's the only possible value for 2 clusters).  User has to add questions.
+    // This config won't work as-is, as it has no questions.
+  }
+
+  void Check() const {
+    for (size_t i = 0;i < initial_questions.size();i++) KALDI_ASSERT(IsSorted(initial_questions[i]));
+  }
+
+  void Write(std::ostream &os, bool binary) const;
+  void Read(std::istream &is, bool binary);
+
+  // copy and assign allowed.
+};
+
+/// This class defines, for each EventKeyType, a set of initial questions that
+/// it tries and also a number of iterations for which to refine the questions to increase
+/// likelihood. It is perhaps a bit more than an options class, as it contains the
+/// actual questions.
+class Questions {  // careful, this is a class.
+ public:
+  const QuestionsForKey &GetQuestionsOf(EventKeyType key) const {
+    std::map<EventKeyType, size_t>::const_iterator iter;
+    if ( (iter = key_idx_.find(key)) == key_idx_.end()) {
+      KALDI_ERR << "Questions: no options for key "<< key;
+    }
+    size_t idx = iter->second;
+    KALDI_ASSERT(idx < key_options_.size());
+    key_options_[idx]->Check();
+    return *(key_options_[idx]);
+  }
+  void SetQuestionsOf(EventKeyType key, const QuestionsForKey &options_of_key) {
+    options_of_key.Check();
+    if (key_idx_.count(key) == 0) {
+      key_idx_[key] = key_options_.size();
+      key_options_.push_back(new QuestionsForKey());
+      *(key_options_.back()) = options_of_key;
+    } else {
+      size_t idx = key_idx_[key];
+      KALDI_ASSERT(idx < key_options_.size());
+      *(key_options_[idx]) = options_of_key;
+    }
+  }
+  void GetKeysWithQuestions(std::vector<EventKeyType> *keys_out) const {
+    KALDI_ASSERT(keys_out != NULL);
+    CopyMapKeysToVector(key_idx_, keys_out);
+  }
+  const bool HasQuestionsForKey(EventKeyType key) const { return (key_idx_.count(key) != 0); }
+  ~Questions() { kaldi::DeletePointers(&key_options_); }
+
+
+  /// Initializer with arguments.  After using this you would have to set up the config for each key you
+  /// are going to use, or use InitRand().
+  Questions() { }
+
+
+  /// InitRand attempts to generate "reasonable" random questions.  Only
+  /// of use for debugging.  This initializer creates a config that is
+  /// ready to use.
+  /// e.g. num_iters_refine = 0 means just use stated questions (if >1, will use
+  /// different questions at each split of the tree).
+  void InitRand(const BuildTreeStatsType &stats, int32 num_quest, int32 num_iters_refine, AllKeysType all_keys_type);
+
+  void Write(std::ostream &os, bool binary) const;
+  void Read(std::istream &is, bool binary);
+ private:
+  std::vector<QuestionsForKey*> key_options_;
+  std::map<EventKeyType, size_t> key_idx_;
+  KALDI_DISALLOW_COPY_AND_ASSIGN(Questions);
+};
+
+/// @}
+
+}// end namespace kaldi
+
+#endif // KALDI_TREE_BUILD_TREE_QUESTIONS_H_
diff --git a/kaldi_io/src/kaldi/tree/build-tree-utils.h b/kaldi_io/src/kaldi/tree/build-tree-utils.h
new file mode 100644
index 0000000..464fc6b
--- /dev/null
+++ b/kaldi_io/src/kaldi/tree/build-tree-utils.h
@@ -0,0 +1,324 @@
+// tree/build-tree-utils.h
+
+// Copyright 2009-2011  Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_TREE_BUILD_TREE_UTILS_H_
+#define KALDI_TREE_BUILD_TREE_UTILS_H_
+
+#include "tree/build-tree-questions.h"
+
+// build-tree-questions.h needed for this typedef:
+// typedef std::vector<std::pair<EventType, Clusterable*> > BuildTreeStatsType;
+// and for other #includes.
+
+namespace kaldi {
+
+
+///   \defgroup tree_group_lower Low-level functions for manipulating statistics and event-maps
+///    See \ref tree_internals and specifically \ref treei_func for context.
+///   \ingroup tree_group
+///
+///   @{
+
+
+
+/// This frees the Clusterable* pointers in "stats", where non-NULL, and sets them to NULL.
+/// Does not delete the pointer "stats" itself.
+void DeleteBuildTreeStats(BuildTreeStatsType *stats);
+
+/// Writes BuildTreeStats object.  This works even if pointers are NULL.
+void WriteBuildTreeStats(std::ostream &os, bool binary,
+                         const BuildTreeStatsType &stats);
+
+/// Reads BuildTreeStats object.  The "example" argument must be of the same
+/// type as the stats on disk, and is needed for access to the correct "Read"
+/// function.  It was organized this way for easier extensibility (so adding new
+/// Clusterable derived classes isn't painful)
+void ReadBuildTreeStats(std::istream &is, bool binary,
+                        const Clusterable &example, BuildTreeStatsType *stats);
+
+/// Convenience function e.g. to work out possible values of the phones from just the stats.
+/// Returns true if key was always defined inside the stats.
+/// May be used with and == NULL to find out of key was always defined.
+bool PossibleValues(EventKeyType key, const BuildTreeStatsType &stats,
+                    std::vector<EventValueType> *ans);
+
+
+/// Splits stats according to the EventMap, indexing them at output by the
+/// leaf type.   A utility function.  NOTE-- pointers in stats_out point to
+/// the same memory location as those in stats.  No copying of Clusterable*
+/// objects happens.  Will add to stats in stats_out if non-empty at input.
+/// This function may increase the size of vector stats_out as necessary
+/// to accommodate stats, but will never decrease the size.
+void SplitStatsByMap(const BuildTreeStatsType &stats_in, const EventMap &e,
+                     std::vector<BuildTreeStatsType> *stats_out);
+
+/// SplitStatsByKey splits stats up according to the value of a particular key,
+/// which must be always defined and nonnegative.  Like MapStats.  Pointers to
+/// Clusterable* in stats_out are not newly allocated-- they are the same as the
+/// ones in stats_in.  Generally they will still be owned at stats_in (user can
+/// decide where to allocate ownership).
+void SplitStatsByKey(const BuildTreeStatsType &stats_in, EventKeyType key,
+                     std::vector<BuildTreeStatsType> *stats_out);
+
+
+/// Converts stats from a given context-window (N) and central-position (P) to a
+/// different N and P, by possibly reducing context.  This function does a job
+/// that's quite specific to the "normal" stats format we use.  See \ref
+/// tree_window for background.  This function may delete some keys and change
+/// others, depending on the N and P values.  It expects that at input, all keys
+/// will either be -1 or lie between 0 and oldN-1.  At output, keys will be
+/// either -1 or between 0 and newN-1.
+/// Returns false if we could not convert the stats (e.g. because newN is larger
+/// than oldN).
+bool ConvertStats(int32 oldN, int32 oldP, int32 newN, int32 newP,
+                  BuildTreeStatsType *stats);
+
+
+/// FilterStatsByKey filters the stats according the value of a specified key.
+/// If include_if_present == true, it only outputs the stats whose key is in
+/// "values"; otherwise it only outputs the stats whose key is not in "values".
+/// At input, "values" must be sorted and unique, and all stats in "stats_in"
+/// must have "key" defined.  At output, pointers to Clusterable* in stats_out
+/// are not newly allocated-- they are the same as the ones in stats_in.
+void FilterStatsByKey(const BuildTreeStatsType &stats_in,
+                      EventKeyType key,
+                      std::vector<EventValueType> &values,
+                      bool include_if_present,  // true-> retain only if in "values",
+                      // false-> retain only if not in "values".
+                      BuildTreeStatsType *stats_out);
+
+
+/// Sums stats, or returns NULL stats_in has no non-NULL stats.
+/// Stats are newly allocated, owned by caller.
+Clusterable *SumStats(const BuildTreeStatsType &stats_in);
+
+/// Sums the normalizer [typically, data-count] over the stats.
+BaseFloat SumNormalizer(const BuildTreeStatsType &stats_in);
+
+/// Sums the objective function over the stats.
+BaseFloat SumObjf(const BuildTreeStatsType &stats_in);
+
+
+/// Sum a vector of stats.  Leaves NULL as pointer if no stats available.
+/// The pointers in stats_out are owned by caller.  At output, there may be
+/// NULLs in the vector stats_out.
+void SumStatsVec(const std::vector<BuildTreeStatsType> &stats_in, std::vector<Clusterable*> *stats_out);
+
+/// Cluster the stats given the event map return the total objf given those clusters.
+BaseFloat ObjfGivenMap(const BuildTreeStatsType &stats_in, const EventMap &e);
+
+
+/// FindAllKeys puts in *keys the (sorted, unique) list of all key identities in the stats.
+/// If type == kAllKeysInsistIdentical, it will insist that this set of keys is the same for all the
+///   stats (else exception is thrown).
+/// if type == kAllKeysIntersection, it will return the smallest common set of keys present in
+///   the set of stats
+/// if type== kAllKeysUnion (currently probably not so useful since maps will return "undefined"
+///   if key is not present), it will return the union of all the keys present in the stats.
+void FindAllKeys(const BuildTreeStatsType &stats, AllKeysType keys_type,
+                 std::vector<EventKeyType> *keys);
+
+
+/// @}
+
+
+/**
+ \defgroup tree_group_intermediate Intermediate-level functions used in building the tree
+    These functions are are used in top-level tree-building code (\ref tree_group_top); see
+     \ref tree_internals for documentation.
+ \ingroup tree_group
+ @{
+*/
+
+
+/// Returns a tree with just one node.  Used @ start of tree-building process.
+/// Not really used in current recipes.
+inline EventMap *TrivialTree(int32 *num_leaves) {
+  KALDI_ASSERT(*num_leaves == 0);  // in envisaged usage.
+  return new ConstantEventMap( (*num_leaves)++ );
+}
+
+/// DoTableSplit does a complete split on this key (e.g. might correspond to central phone
+/// (key = P-1), or HMM-state position (key == kPdfClass == -1).  Stats used to work out possible
+/// values of the event. "num_leaves" is used to allocate new leaves.   All stats must have
+/// this key defined, or this function will crash.
+EventMap *DoTableSplit(const EventMap &orig, EventKeyType key,
+                       const BuildTreeStatsType &stats, int32 *num_leaves);
+
+
+/// DoTableSplitMultiple does a complete split on all the keys, in order from keys[0],
+/// keys[1]
+/// and so on.  The stats are used to work out possible values corresponding to the key.
+/// "num_leaves" is used to allocate new leaves.   All stats must have
+/// the keys defined, or this function will crash.
+/// Returns a newly allocated event map.
+EventMap *DoTableSplitMultiple(const EventMap &orig,
+                               const std::vector<EventKeyType> &keys,
+                               const BuildTreeStatsType &stats,
+                               int32 *num_leaves);
+
+
+/// "ClusterEventMapGetMapping" clusters the leaves of the EventMap, with "thresh" a delta-likelihood
+/// threshold to control how many leaves we combine (might be the same as the delta-like
+/// threshold used in splitting.
+// The function returns the #leaves we combined.  The same leaf-ids of the leaves being clustered
+// will be used for the clustered leaves (but other than that there is no special rule which
+// leaf-ids should be used at output).
+// It outputs the mapping for leaves, in "mapping", which may be empty at the start
+// but may also contain mappings for other parts of the tree, which must contain
+// disjoint leaves from this part.  This is so that Cluster can
+// be called multiple times for sub-parts of the tree (with disjoint sets of leaves),
+// e.g. if we want to avoid sharing across phones.  Afterwards you can use Copy function
+// of EventMap to apply the mapping, i.e. call e_in.Copy(mapping) to get the new map.
+// Note that the application of Cluster creates gaps in the leaves.  You should then
+// call RenumberEventMap(e_in.Copy(mapping), num_leaves).
+// *If you only want to cluster a subset of the leaves (e.g. just non-silence, or just
+// a particular phone, do this by providing a set of "stats" that correspond to just
+// this subset of leaves*.  Leaves with no stats will not be clustered.
+// See build-tree.cc for an example of usage.
+int ClusterEventMapGetMapping(const EventMap &e_in, const BuildTreeStatsType &stats,
+                              BaseFloat thresh, std::vector<EventMap*> *mapping);
+
+/// This is as ClusterEventMapGetMapping but a more convenient interface
+/// that exposes less of the internals.  It uses a bottom-up clustering to
+/// combine the leaves, until the log-likelihood decrease from combinging two
+/// leaves exceeds the threshold.
+EventMap *ClusterEventMap(const EventMap &e_in, const BuildTreeStatsType &stats,
+                          BaseFloat thresh, int32 *num_removed);
+
+/// This is as ClusterEventMap, but first splits the stats on the keys specified
+/// in "keys" (e.g. typically keys = [ -1, P ]), and only clusters within the
+/// classes defined by that splitting.
+/// Note-- leaves will be non-consecutive at output, use RenumberEventMap.
+EventMap *ClusterEventMapRestrictedByKeys(const EventMap &e_in,
+                                          const BuildTreeStatsType &stats,
+                                          BaseFloat thresh,
+                                          const std::vector<EventKeyType> &keys,
+                                          int32 *num_removed);
+
+
+/// This version of ClusterEventMapRestricted restricts the clustering to only
+/// allow things that "e_restrict" maps to the same value to be clustered
+/// together.
+EventMap *ClusterEventMapRestrictedByMap(const EventMap &e_in,
+                                         const BuildTreeStatsType &stats,
+                                         BaseFloat thresh,
+                                         const EventMap &e_restrict,
+                                         int32 *num_removed);
+
+
+/// RenumberEventMap [intended to be used after calling ClusterEventMap] renumbers
+/// an EventMap so its leaves are consecutive.
+/// It puts the number of leaves in *num_leaves.  If later you need the mapping of
+/// the leaves, modify the function and add a new argument.
+EventMap *RenumberEventMap(const EventMap &e_in, int32 *num_leaves);
+
+/// This function remaps the event-map leaves using this mapping,
+/// indexed by the number at leaf.
+EventMap *MapEventMapLeaves(const EventMap &e_in,
+                            const std::vector<int32> &mapping);
+
+
+
+/// ShareEventMapLeaves performs a quite specific function that allows us to
+/// generate trees where, for a certain list of phones, and for all states in
+/// the phone, all the pdf's are shared.
+/// Each element of "values" contains a list of phones (may be just one phone),
+/// all states of which we want shared together).  Typically at input, "key" will
+/// equal P, the central-phone position, and "values" will contain just one
+/// list containing the silence phone.
+/// This function renumbers the event map leaves after doing the sharing, to
+/// make the event-map leaves contiguous.
+EventMap *ShareEventMapLeaves(const EventMap &e_in, EventKeyType key,
+                              std::vector<std::vector<EventValueType> > &values,
+                              int32 *num_leaves);
+
+
+
+/// Does a decision-tree split at the leaves of an EventMap.
+/// @param orig [in] The EventMap whose leaves we want to split. [may be either a trivial or a
+///           non-trivial one].
+/// @param stats [in] The statistics for splitting the tree; if you do not want a particular
+///          subset of leaves to be split, make sure the stats corresponding to those leaves
+///          are not present in "stats".
+/// @param qcfg [in] Configuration class that contains initial questions (e.g. sets of phones)
+///          for each key and says whether to refine these questions during tree building.
+/// @param thresh [in] A log-likelihood threshold (e.g. 300) that can be used to
+///           limit the number of leaves; you can use zero and set max_leaves instead.
+/// @param max_leaves [in] Will stop leaves being split after they reach this number.
+/// @param num_leaves [in,out] A pointer used to allocate leaves; always corresponds to the
+///             current number of leaves (is incremented when this is increased).
+/// @param objf_impr_out [out] If non-NULL, will be set to the objective improvement due to splitting
+///           (not normalized by the number of frames).
+/// @param smallest_split_change_out If non-NULL, will be set to the smallest objective-function
+///         improvement that we got from splitting any leaf; useful to provide a threshold
+///         for ClusterEventMap.
+/// @return The EventMap after splitting is returned; pointer is owned by caller.
+EventMap *SplitDecisionTree(const EventMap &orig,
+                            const BuildTreeStatsType &stats,
+                            Questions &qcfg,
+                            BaseFloat thresh,
+                            int32 max_leaves,  // max_leaves<=0 -> no maximum.
+                            int32 *num_leaves,
+                            BaseFloat *objf_impr_out,
+                            BaseFloat *smallest_split_change_out);
+
+/// CreateRandomQuestions will initialize a Questions randomly, in a reasonable
+/// way [for testing purposes, or when hand-designed questions are not available].
+/// e.g. num_quest = 5 might be a reasonable value if num_iters > 0, or num_quest = 20 otherwise.
+void CreateRandomQuestions(const BuildTreeStatsType &stats, int32 num_quest, Questions *cfg_out);
+
+
+/// FindBestSplitForKey is a function used in DoDecisionTreeSplit.
+/// It finds the best split for this key, given these stats.
+/// It will return 0 if the key was not always defined for the stats.
+BaseFloat FindBestSplitForKey(const BuildTreeStatsType &stats,
+                              const Questions &qcfg,
+                              EventKeyType key,
+                              std::vector<EventValueType> *yes_set);
+
+
+/// GetStubMap is used in tree-building functions to get the initial
+/// to-states map, before the decision-tree-building process.  It creates
+/// a simple map that splits on groups of phones.  For the set of phones in
+/// phone_sets[i] it creates either: if share_roots[i] == true, a single
+/// leaf node, or if share_roots[i] == false, separate root nodes for
+/// each HMM-position (it goes up to the highest position for any
+/// phone in the set, although it will warn if you share roots between
+/// phones with different numbers of states, which is a weird thing to
+/// do but should still work.  If any phone is present
+/// in "phone_sets" but "phone2num_pdf_classes" does not map it to a length,
+/// it is an error.  Note that the behaviour of the resulting map is
+/// undefined for phones not present in "phone_sets".
+/// At entry, this function should be called with (*num_leaves == 0).
+/// It will number the leaves starting from (*num_leaves).
+
+EventMap *GetStubMap(int32 P,
+                     const std::vector<std::vector<int32> > &phone_sets,
+                     const std::vector<int32> &phone2num_pdf_classes,
+                     const std::vector<bool> &share_roots,  // indexed by index into phone_sets.
+                     int32 *num_leaves);
+/// Note: GetStubMap with P = 0 can be used to get a standard monophone system.
+
+/// @}
+
+
+}// end namespace kaldi
+
+#endif
diff --git a/kaldi_io/src/kaldi/tree/build-tree.h b/kaldi_io/src/kaldi/tree/build-tree.h
new file mode 100644
index 0000000..37bb108
--- /dev/null
+++ b/kaldi_io/src/kaldi/tree/build-tree.h
@@ -0,0 +1,250 @@
+// tree/build-tree.h
+
+// Copyright 2009-2011  Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_TREE_BUILD_TREE_H_
+#define KALDI_TREE_BUILD_TREE_H_
+
+// The file build-tree.h contains outer-level routines used in tree-building
+// and related tasks, that are directly called by the command-line tools.
+
+#include "tree/build-tree-utils.h"
+#include "tree/context-dep.h"
+namespace kaldi {
+
+/// \defgroup tree_group_top Top-level tree-building functions
+/// See \ref tree_internals for context.
+/// \ingroup tree_group
+/// @{
+
+// Note, in tree_group_top we also include AccumulateTreeStats, in
+// ../hmm/tree-accu.h (it has some extra dependencies so we didn't
+// want to include it here).
+
+/**
+ *  BuildTree is the normal way to build a set of decision trees.
+ *  The sets "phone_sets" dictate how we set up the roots of the decision trees.
+ *  each set of phones phone_sets[i] has shared decision-tree roots, and if
+ *  the corresponding variable share_roots[i] is true, the root will be shared
+ *  for the different HMM-positions in the phone.  All phones in "phone_sets"
+ *  should be in the stats (use FixUnseenPhones to ensure this).
+ *  if for any i, do_split[i] is false, we will not do any tree splitting for
+ *  phones in that set.
+ * @param qopts [in] Questions options class, contains questions for each key
+ *                   (e.g. each phone position)
+ * @param phone_sets [in] Each element of phone_sets is a set of phones whose
+ *                 roots are shared together (prior to decision-tree splitting).
+ * @param phone2num_pdf_classes [in] A map from phones to the number of
+ *                 \ref pdf_class "pdf-classes"
+ *                 in the phone (this info is derived from the HmmTopology object)
+ * @param share_roots [in] A vector the same size as phone_sets; says for each
+ *                phone set whether the root should be shared among all the
+ *                pdf-classes or not.
+ * @param do_split [in] A vector the same size as phone_sets; says for each
+ *                phone set whether decision-tree splitting should be done
+ *                 (generally true for non-silence phones).
+ * @param stats [in] The statistics used in tree-building.
+ * @param thresh [in] Threshold used in decision-tree splitting (e.g. 1000),
+ *                   or you may use 0 in which case max_leaves becomes the
+ *                    constraint.
+ * @param max_leaves [in] Maximum number of leaves it will create; set this
+ *                  to a large number if you want to just specify  "thresh".
+ * @param cluster_thresh [in] Threshold for clustering leaves after decision-tree
+ *                  splitting (only within each phone-set); leaves will be combined
+ *                  if log-likelihood change is less than this.  A value about equal
+ *                  to "thresh" is suitable
+ *                  if thresh != 0; otherwise, zero will mean no clustering is done,
+ *                  or a negative value (e.g. -1) sets it to the smallest likelihood
+ *                  change seen during the splitting algorithm; this typically causes
+ *                  about a 20% reduction in the number of leaves.
+ 
+ * @param P [in] The central position of the phone context window, e.g. 1 for a
+ *                triphone system.
+ * @return  Returns a pointer to an EventMap object that is the tree.
+
+*/
+
+EventMap *BuildTree(Questions &qopts,
+                    const std::vector<std::vector<int32> > &phone_sets,
+                    const std::vector<int32> &phone2num_pdf_classes,
+                    const std::vector<bool> &share_roots,
+                    const std::vector<bool> &do_split,
+                    const BuildTreeStatsType &stats,
+                    BaseFloat thresh,
+                    int32 max_leaves,
+                    BaseFloat cluster_thresh,  // typically == thresh.  If negative, use smallest split.
+                    int32 P);
+
+
+/**
+ *
+ *  BuildTreeTwoLevel builds a two-level tree, useful for example in building tied mixture
+ *  systems with multiple codebooks.  It first builds a small tree by splitting to
+ *  "max_leaves_first".  It then splits at the leaves of "max_leaves_first" (think of this
+ *  as creating multiple little trees at the leaves of the first tree), until the total
+ *  number of leaves reaches "max_leaves_second".  It then outputs the second tree, along
+ *  with a mapping from the leaf-ids of the second tree to the leaf-ids of the first tree.
+ *  Note that the interface is similar to BuildTree, and in fact it calls BuildTree
+ *  internally.
+ *
+ *  The sets "phone_sets" dictate how we set up the roots of the decision trees.
+ *  each set of phones phone_sets[i] has shared decision-tree roots, and if
+ *  the corresponding variable share_roots[i] is true, the root will be shared
+ *  for the different HMM-positions in the phone.  All phones in "phone_sets"
+ *  should be in the stats (use FixUnseenPhones to ensure this).
+ *  if for any i, do_split[i] is false, we will not do any tree splitting for
+ *  phones in that set.
+ *
+ * @param qopts [in] Questions options class, contains questions for each key
+ *                   (e.g. each phone position)
+ * @param phone_sets [in] Each element of phone_sets is a set of phones whose
+ *                 roots are shared together (prior to decision-tree splitting).
+ * @param phone2num_pdf_classes [in] A map from phones to the number of
+ *                 \ref pdf_class "pdf-classes"
+ *                 in the phone (this info is derived from the HmmTopology object)
+ * @param share_roots [in] A vector the same size as phone_sets; says for each
+ *                phone set whether the root should be shared among all the
+ *                pdf-classes or not.
+ * @param do_split [in] A vector the same size as phone_sets; says for each
+ *                phone set whether decision-tree splitting should be done
+ *                 (generally true for non-silence phones).
+ * @param stats [in] The statistics used in tree-building.
+ * @param max_leaves_first [in] Maximum number of leaves it will create in first
+ *                  level of decision tree. 
+ * @param max_leaves_second [in] Maximum number of leaves it will create in second
+ *                  level of decision tree.  Must be > max_leaves_first.
+ * @param cluster_leaves [in] Boolean value; if true, we post-cluster the leaves produced
+ *                  in the second level of decision-tree split; if false, we don't.
+ *                  The threshold for post-clustering is the log-like change of the last
+ *                  decision-tree split; this typically causes about a 20% reduction in
+ *                  the number of leaves.
+ * @param P [in]   The central position of the phone context window, e.g. 1 for a
+ *                 triphone system.
+ * @param leaf_map [out]  Will be set to be a mapping from the leaves of the
+ *                 "big" tree to the leaves of the "little" tree, which you can
+ *                 view as cluster centers.
+ * @return  Returns a pointer to an EventMap object that is the (big) tree.
+
+*/
+
+EventMap *BuildTreeTwoLevel(Questions &qopts,
+                            const std::vector<std::vector<int32> > &phone_sets,
+                            const std::vector<int32> &phone2num_pdf_classes,
+                            const std::vector<bool> &share_roots,
+                            const std::vector<bool> &do_split,
+                            const BuildTreeStatsType &stats,
+                            int32 max_leaves_first,
+                            int32 max_leaves_second,
+                            bool cluster_leaves,
+                            int32 P,
+                            std::vector<int32> *leaf_map);
+
+
+/// GenRandStats generates random statistics of the form used by BuildTree.
+/// It tries to do so in such a way that they mimic "real" stats.  The event keys
+/// and their corresponding values are:
+/// - key == -1 == kPdfClass -> pdf-class, generally corresponds to
+///       zero-based position in HMM (0, 1, 2 .. hmm_lengths[phone]-1)
+/// - key == 0 -> phone-id of left-most context phone.
+/// - key == 1 -> phone-id of one-from-left-most context phone.
+/// - key == P-1 -> phone-id of central phone.
+/// - key == N-1 -> phone-id of right-most context phone.
+/// GenRandStats is useful only for testing but it serves to document the format of
+/// stats used by BuildTreeDefault.
+/// if is_ctx_dep[phone] is set to false, GenRandStats will not define the keys for
+/// other than the P-1'th phone.
+
+/// @param dim [in] dimension of features.
+/// @param num_stats [in] approximate number of separate phones-in-context wanted.
+/// @param N [in] context-size (typically 3)
+/// @param P [in] central-phone position in zero-based numbering (typically 1)
+/// @param phone_ids [in] integer ids of phones
+/// @param hmm_lengths [in] lengths of hmm for phone, indexed by phone.
+/// @param is_ctx_dep [in] boolean array indexed by phone, saying whether each phone
+///     is context dependent.
+/// @param ensure_all_phones_covered [in] Boolean argument: if true, GenRandStats
+///     ensures that every phone is seen at least once in the central position (P).
+/// @param stats_out [out] The statistics that this routine outputs.
+
+void GenRandStats(int32 dim, int32 num_stats, int32 N, int32 P,
+                  const std::vector<int32> &phone_ids,
+                  const std::vector<int32> &hmm_lengths,
+                  const std::vector<bool> &is_ctx_dep,
+                  bool ensure_all_phones_covered,
+                  BuildTreeStatsType *stats_out);
+
+
+/// included here because it's used in some tree-building
+/// calling code.  Reads an OpenFst symbl table,
+/// discards the symbols and outputs the integers
+void ReadSymbolTableAsIntegers(std::string filename,
+                               bool include_eps,
+                               std::vector<int32> *syms);
+
+
+
+/**
+ *  Outputs sets of phones that are reasonable for questions
+ *  to ask in the tree-building algorithm.  These are obtained by tree
+ *  clustering of the phones; for each node in the tree, all the leaves
+ *  accessible from that node form one of the sets of phones.
+ *    @param stats [in] The statistics as used for normal tree-building.
+ *    @param phone_sets_in [in] All the phones, pre-partitioned into sets.
+ *       The output sets will be various unions of these sets.  These sets
+ *       will normally correspond to "real phones", in cases where the phones
+ *       have stress and position markings.
+ *    @param all_pdf_classes_in [in] All the \ref pdf_class "pdf-classes"
+ *      that we consider for clustering.  In the normal case this is the singleton
+ *       set {1}, which means that we only consider the central hmm-position
+ *       of the standard 3-state HMM, for clustering purposes.
+ *    @param P [in] The central position in the phone context window; normally
+ *       1 for triphone system.s
+ *    @param questions_out [out] The questions (sets of phones) are output to here.
+ **/
+void AutomaticallyObtainQuestions(BuildTreeStatsType &stats,
+                                  const std::vector<std::vector<int32> > &phone_sets_in,
+                                  const std::vector<int32> &all_pdf_classes_in,
+                                  int32 P,
+                                  std::vector<std::vector<int32> > *questions_out);
+
+/// This function clusters the phones (or some initially specified sets of phones)
+/// into sets of phones, using a k-means algorithm.  Useful, for example, in building
+/// simple models for purposes of adaptation.
+
+void KMeansClusterPhones(BuildTreeStatsType &stats,
+                         const std::vector<std::vector<int32> > &phone_sets_in,
+                         const std::vector<int32> &all_pdf_classes_in,
+                         int32 P,
+                         int32 num_classes,
+                         std::vector<std::vector<int32> > *sets_out);
+
+/// Reads the roots file (throws on error).  Format is lines like:
+///  "shared split 1 2 3 4",
+///  "not-shared not-split 5",
+/// and so on.  The numbers are indexes of phones.
+void ReadRootsFile(std::istream &is,
+                   std::vector<std::vector<int32> > *phone_sets,
+                   std::vector<bool> *is_shared_root,
+                   std::vector<bool> *is_split_root);
+
+
+/// @}
+
+}// end namespace kaldi
+
+#endif
diff --git a/kaldi_io/src/kaldi/tree/cluster-utils.h b/kaldi_io/src/kaldi/tree/cluster-utils.h
new file mode 100644
index 0000000..55583a2
--- /dev/null
+++ b/kaldi_io/src/kaldi/tree/cluster-utils.h
@@ -0,0 +1,291 @@
+// tree/cluster-utils.h
+
+// Copyright 2012   Arnab Ghoshal
+// Copyright 2009-2011  Microsoft Corporation;  Saarland University
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_TREE_CLUSTER_UTILS_H_
+#define KALDI_TREE_CLUSTER_UTILS_H_
+
+#include <vector>
+#include "matrix/matrix-lib.h"
+#include "itf/clusterable-itf.h"
+
+namespace kaldi {
+
+/// \addtogroup clustering_group_simple
+/// @{
+
+/// Returns the total objective function after adding up all the
+/// statistics in the vector (pointers may be NULL).
+BaseFloat SumClusterableObjf(const std::vector<Clusterable*> &vec);
+
+/// Returns the total normalizer (usually count) of the cluster (pointers may be NULL).
+BaseFloat SumClusterableNormalizer(const std::vector<Clusterable*> &vec);
+
+/// Sums stats (ptrs may be NULL). Returns NULL if no non-NULL stats present.
+Clusterable *SumClusterable(const std::vector<Clusterable*> &vec);
+
+/** Fills in any (NULL) holes in "stats" vector, with empty stats, because
+ *  certain algorithms require non-NULL stats.  If "stats" nonempty, requires it
+ *  to contain at least one non-NULL pointer that we can call Copy() on.
+ */
+void EnsureClusterableVectorNotNull(std::vector<Clusterable*> *stats);
+
+
+/** Given stats and a vector "assignments" of the same size (that maps to
+ * cluster indices), sums the stats up into "clusters."  It will add to any
+ * stats already present in "clusters" (although typically "clusters" will be
+ * empty when called), and it will extend with NULL pointers for any unseen
+ * indices. Call EnsureClusterableStatsNotNull afterwards if you want to ensure
+ * all non-NULL clusters. Pointer in "clusters" are owned by caller. Pointers in
+ * "stats" do not have to be non-NULL.
+ */
+void AddToClusters(const std::vector<Clusterable*> &stats,
+                   const std::vector<int32> &assignments,
+                   std::vector<Clusterable*> *clusters);
+
+
+/// AddToClustersOptimized does the same as AddToClusters (it sums up the stats
+/// within each cluster, except it uses the sum of all the stats ("total") to
+/// optimize the computation for speed, if possible.  This will generally only be
+/// a significant speedup in the case where there are just two clusters, which
+/// can happen in algorithms that are doing binary splits; the idea is that we
+/// sum up all the stats in one cluster (the one with the fewest points in it),
+/// and then subtract from the total.
+void AddToClustersOptimized(const std::vector<Clusterable*> &stats,
+                            const std::vector<int32> &assignments,
+                            const Clusterable &total,
+                            std::vector<Clusterable*> *clusters);
+
+/// @} end "addtogroup clustering_group_simple"
+
+/// \addtogroup clustering_group_algo
+/// @{
+
+// Note, in the algorithms below, it is assumed that the input "points" (which
+// is std::vector<Clusterable*>) is all non-NULL.
+
+/** A bottom-up clustering algorithm. There are two parameters that control how
+ *  many clusters we get: a "max_merge_thresh" which is a threshold for merging
+ *  clusters, and a min_clust which puts a floor on the number of clusters we want. Set
+ *  max_merge_thresh = large to use the min_clust only, or min_clust to 0 to use
+ *  the max_merge_thresh only.
+ *
+ *  The algorithm is:
+ *  \code
+ *      while (num-clusters > min_clust && smallest_merge_cost <= max_merge_thresh)
+ *          merge the closest two clusters.
+ *  \endcode
+ *
+ *  @param points [in] Points to be clustered (may not contain NULL pointers)
+ *  @param thresh [in] Threshold on cost change from merging clusters; clusters
+ *               won't be merged if the cost is more than this
+ *  @param min_clust [in] Minimum number of clusters desired; we'll stop merging
+ *                  after reaching this number.
+ *  @param clusters_out [out] If non-NULL, will be set to a vector of size equal
+ *                 to the number of output clusters, containing the clustered
+ *                 statistics.  Must be empty when called.
+ *  @param assignments_out [out] If non-NULL, will be resized to the number of
+ *                 points, and each element is the index of the cluster that point
+ *                 was assigned to.
+ *  @return Returns the total objf change relative to all clusters being separate, which is
+ *    a negative.  Note that this is not the same as what the other clustering algorithms return.
+ */
+BaseFloat ClusterBottomUp(const std::vector<Clusterable*> &points,
+                          BaseFloat thresh,
+                          int32 min_clust,
+                          std::vector<Clusterable*> *clusters_out,
+                          std::vector<int32> *assignments_out);
+
+/** This is a bottom-up clustering where the points are pre-clustered in a set
+ *  of compartments, such that only points in the same compartment are clustered
+ *  together. The compartment and pair of points with the smallest merge cost
+ *  is selected and the points are clustered. The result stays in the same
+ *  compartment. The code does not merge compartments, and hence assumes that
+ *  the number of compartments is smaller than the 'min_clust' option.
+ *  The clusters in "clusters_out" are newly allocated and owned by the caller.
+ */
+BaseFloat ClusterBottomUpCompartmentalized(
+    const std::vector< std::vector<Clusterable*> > &points, BaseFloat thresh,
+    int32 min_clust, std::vector< std::vector<Clusterable*> > *clusters_out,
+    std::vector< std::vector<int32> > *assignments_out);
+
+
+struct RefineClustersOptions {
+  int32 num_iters;  // must be >= 0.  If zero, does nothing.
+  int32 top_n;  // must be >= 2.
+  RefineClustersOptions() : num_iters(100), top_n(5) {}
+  RefineClustersOptions(int32 num_iters_in, int32 top_n_in)
+      : num_iters(num_iters_in), top_n(top_n_in) {}
+  // include Write and Read functions because this object gets written/read as
+  // part of the QuestionsForKeyOptions class.
+  void Write(std::ostream &os, bool binary) const;
+  void Read(std::istream &is, bool binary);
+};
+
+/** RefineClusters is mainly used internally by other clustering algorithms.
+ *
+ *  It starts with a given assignment of points to clusters and
+ *  keeps trying to improve it by moving points from cluster to cluster, up to
+ *  a maximum number of iterations.
+ *
+ *  "clusters" and "assignments" are both input and output variables, and so
+ *  both MUST be non-NULL.
+ *
+ *  "top_n" (>=2) is a pruning value: more is more exact, fewer is faster. The
+ *  algorithm initially finds the "top_n" closest clusters to any given point,
+ *  and from that point only consider move to those "top_n" clusters. Since
+ *  RefineClusters is called multiple times from ClusterKMeans (for instance),
+ *  this is not really a limitation.
+ */
+BaseFloat RefineClusters(const std::vector<Clusterable*> &points,
+                         std::vector<Clusterable*> *clusters /*non-NULL*/,
+                         std::vector<int32> *assignments /*non-NULL*/,
+                         RefineClustersOptions cfg = RefineClustersOptions());
+
+struct ClusterKMeansOptions {
+  RefineClustersOptions refine_cfg;
+  int32 num_iters;
+  int32 num_tries;  // if >1, try whole procedure >once and pick best.
+  bool verbose;
+  ClusterKMeansOptions()
+      : refine_cfg(), num_iters(20), num_tries(2), verbose(true)  {}
+};
+
+/** ClusterKMeans is a K-means-like clustering algorithm. It starts with
+ *  pseudo-random initialization of points to clusters and uses RefineClusters
+ *  to iteratively improve the cluster assignments.  It does this for
+ *  multiple iterations and picks the result with the best objective function.
+ *
+ *
+ *  ClusterKMeans implicitly uses Rand(). It will not necessarily return
+ *  the same value on different calls.  Use sRand() if you want consistent
+ *  results.
+ *  The algorithm used in ClusterKMeans is a "k-means-like" algorithm that tries
+ *  to be as efficient as possible.  Firstly, since the algorithm it uses
+ *  includes random initialization, it tries the whole thing cfg.num_tries times
+ *  and picks the one with the best objective function.  Each try, it does as
+ *  follows: it randomly initializes points to clusters, and then for
+ *  cfg.num_iters iterations it calls RefineClusters().  The options to
+ *  RefineClusters() are given by cfg.refine_cfg.  Calling RefineClusters once
+ *  will always be at least as good as doing one iteration of reassigning points to
+ *  clusters, but will generally be quite a bit better (without taking too
+ *  much extra time).
+ *
+ *  @param points [in]  points to be clustered (must be all non-NULL).
+ *  @param num_clust [in] number of clusters requested (it will always return exactly
+ *                 this many, or will fail if num_clust > points.size()).
+ *  @param clusters_out [out] may be NULL; if non-NULL, should be empty when called.
+ *          Will be set to a vector of statistics corresponding to the output clusters.
+ *  @param assignments_out [out] may be NULL; if non-NULL, will be set to a vector of
+ *             same size as "points", which says for each point which cluster
+ *              it is assigned to.
+ *  @param cfg [in] configuration class specifying options to the algorithm.
+ *  @return Returns the objective function improvement versus everything being
+ *     in the same cluster.
+ *
+ */
+BaseFloat ClusterKMeans(const std::vector<Clusterable*> &points,
+                        int32 num_clust,  // exact number of clusters
+                        std::vector<Clusterable*> *clusters_out,  // may be NULL
+                        std::vector<int32> *assignments_out,  // may be NULL
+                        ClusterKMeansOptions cfg = ClusterKMeansOptions());
+
+struct TreeClusterOptions  {
+  ClusterKMeansOptions kmeans_cfg;
+  int32 branch_factor;
+  BaseFloat thresh;  // Objf change: if >0, may be used to control number of leaves.
+  TreeClusterOptions()
+      : kmeans_cfg(), branch_factor(2), thresh(0) {
+    kmeans_cfg.verbose = false;
+  }
+};
+
+/** TreeCluster is a top-down clustering algorithm, using a binary tree (not
+ *  necessarily balanced). Returns objf improvement versus having all points
+ *  in one cluster.  The algorithm is:
+ *     - Initialize to 1 cluster (tree with 1 node).
+ *     - Maintain, for each cluster, a "best-binary-split" (using ClusterKMeans
+ *       to do so). Always split the highest scoring cluster, until we can do no
+ *       more splits.
+ *
+ *  @param points [in] Data points to be clustered
+ *  @param max_clust  [in] Maximum number of clusters (you will get exactly this number,
+ *                if there are at least this many points, except if you set the
+ *                cfg.thresh value nonzero, in which case that threshold may limit
+ *                the number of clusters.
+ *  @param clusters_out [out] If non-NULL, will be set to the a vector whose first
+ *                (*num_leaves_out) elements are the leaf clusters, and whose
+ *                subsequent elements are the nonleaf nodes in the tree, in
+ *                topological order with the root node last.  Must be empty vector
+ *                when this function is called.
+ *  @param assignments_out [out] If non-NULL, will be set to a vector to a vector the
+ *               same size as "points", where assignments[i] is the leaf node index i
+ *               to which the i'th point gets clustered.
+ *  @param clust_assignments_out [out] If non-NULL, will be set to a vector the same size
+ *                as clusters_out  which says for each node (leaf or nonleaf), the
+ *                index of its parent.  For the root node (which is last),
+ *                assignments_out[i] == i.  For each i, assignments_out[i]>=i, i.e.
+ *                any node's parent is higher numbered than itself.  If you don't need
+ *                this information, consider using instead the ClusterTopDown function.
+ *  @param num_leaves_out [out] If non-NULL, will be set to the number of leaf nodes
+ *                in the tree.
+ *  @param cfg [in] Configuration object that controls clustering behavior.  Most
+ *                 important value is "thresh", which provides an alternative mechanism
+ *                 [other than max_clust] to limit the number of leaves.
+ */
+BaseFloat TreeCluster(const std::vector<Clusterable*> &points,
+                      int32 max_clust,  // max number of leaf-level clusters.
+                      std::vector<Clusterable*> *clusters_out,
+                      std::vector<int32> *assignments_out,
+                      std::vector<int32> *clust_assignments_out,
+                      int32 *num_leaves_out,
+                      TreeClusterOptions cfg = TreeClusterOptions());
+
+
+/**
+ *  A clustering algorithm that internally uses TreeCluster,
+ *  but does not give you the information about the structure of the tree.
+ *  The "clusters_out" and "assignments_out" may be NULL if the outputs are not
+ *  needed.
+ *
+ *  @param points [in]  points to be clustered (must be all non-NULL).
+ *  @param max_clust [in] Maximum number of clusters (you will get exactly this number,
+ *                if there are at least this many points, except if you set the
+ *                cfg.thresh value nonzero, in which case that threshold may limit
+ *                the number of clusters.
+ *  @param clusters_out [out] may be NULL; if non-NULL, should be empty when called.
+ *           Will be set to a vector of statistics corresponding to the output clusters.
+ *  @param assignments_out [out] may be NULL; if non-NULL, will be set to a vector of
+ *           same size as "points", which says for each point which cluster
+ *            it is assigned to.
+ *  @param cfg [in] Configuration object that controls clustering behavior.  Most
+ *                important value is "thresh", which provides an alternative mechanism
+ *                [other than max_clust] to limit the number of leaves.
+*/
+BaseFloat ClusterTopDown(const std::vector<Clusterable*> &points,
+                         int32 max_clust,  // max number of clusters.
+                         std::vector<Clusterable*> *clusters_out,
+                         std::vector<int32> *assignments_out,
+                         TreeClusterOptions cfg = TreeClusterOptions());
+
+/// @} end of "addtogroup clustering_group_algo"
+
+}  // end namespace kaldi.
+
+#endif  // KALDI_TREE_CLUSTER_UTILS_H_
diff --git a/kaldi_io/src/kaldi/tree/clusterable-classes.h b/kaldi_io/src/kaldi/tree/clusterable-classes.h
new file mode 100644
index 0000000..817d0c6
--- /dev/null
+++ b/kaldi_io/src/kaldi/tree/clusterable-classes.h
@@ -0,0 +1,158 @@
+// tree/clusterable-classes.h
+
+// Copyright 2009-2011  Microsoft Corporation;  Saarland University
+//                2014  Daniel Povey
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_TREE_CLUSTERABLE_CLASSES_H_
+#define KALDI_TREE_CLUSTERABLE_CLASSES_H_ 1
+
+#include <string>
+#include "itf/clusterable-itf.h"
+#include "matrix/matrix-lib.h"
+
+namespace kaldi {
+
+// Note: see sgmm/sgmm-clusterable.h for an SGMM-based clusterable
+// class.  We didn't include it here, to avoid adding an extra
+// dependency to this directory.
+
+/// \addtogroup clustering_group
+/// @{
+
+/// ScalarClusterable clusters scalars with x^2 loss.
+class ScalarClusterable: public Clusterable {
+ public:
+  ScalarClusterable(): x_(0), x2_(0), count_(0) {}
+  explicit ScalarClusterable(BaseFloat x): x_(x), x2_(x*x), count_(1) {}
+  virtual std::string Type() const { return "scalar"; }
+  virtual BaseFloat Objf() const;
+  virtual void SetZero() { count_ = x_ = x2_ = 0.0; }
+  virtual void Add(const Clusterable &other_in);
+  virtual void Sub(const Clusterable &other_in);
+  virtual Clusterable* Copy() const;
+  virtual BaseFloat Normalizer() const {
+    return static_cast<BaseFloat>(count_);
+  }
+
+  // Function to write data to stream. Will organize input later [more complex]
+  virtual void Write(std::ostream &os, bool binary) const;
+  virtual Clusterable* ReadNew(std::istream &is, bool binary) const;
+
+  std::string Info();  // For debugging.
+  BaseFloat Mean() { return (count_ != 0 ? x_/count_ : 0.0); }
+  private:
+  BaseFloat x_;
+  BaseFloat x2_;
+  BaseFloat count_;
+
+  void Read(std::istream &is, bool binary);
+};
+
+
+/// GaussClusterable wraps Gaussian statistics in a form accessible
+/// to generic clustering algorithms.
+class GaussClusterable: public Clusterable {
+ public:
+  GaussClusterable(): count_(0.0), var_floor_(0.0) {}
+  GaussClusterable(int32 dim, BaseFloat var_floor):
+      count_(0.0), stats_(2, dim), var_floor_(var_floor) {}
+
+  GaussClusterable(const Vector<BaseFloat> &x_stats,
+                   const Vector<BaseFloat> &x2_stats,
+                   BaseFloat var_floor, BaseFloat count);
+
+  virtual std::string Type() const {  return "gauss"; }
+  void AddStats(const VectorBase<BaseFloat> &vec, BaseFloat weight = 1.0);
+  virtual BaseFloat Objf() const;
+  virtual void SetZero();
+  virtual void Add(const Clusterable &other_in);
+  virtual void Sub(const Clusterable &other_in);
+  virtual BaseFloat Normalizer() const { return count_; }
+  virtual Clusterable *Copy() const;
+  virtual void Scale(BaseFloat f);
+  virtual void Write(std::ostream &os, bool binary) const;
+  virtual Clusterable *ReadNew(std::istream &is, bool binary) const;
+  virtual ~GaussClusterable() {}
+
+  BaseFloat count() const { return count_; }
+  // The next two functions are not const-correct, because of SubVector.
+  SubVector<double> x_stats() const { return stats_.Row(0); }
+  SubVector<double> x2_stats() const { return stats_.Row(1); }
+ private:
+  double count_;
+  Matrix<double> stats_; // two rows: sum, then sum-squared.
+  double var_floor_;  // should be common for all objects created.
+
+  void Read(std::istream &is, bool binary);
+};
+
+/// @} end of "addtogroup clustering_group"
+
+inline void GaussClusterable::SetZero() {
+  count_ = 0;
+  stats_.SetZero();
+}
+
+inline GaussClusterable::GaussClusterable(const Vector<BaseFloat> &x_stats,
+                                          const Vector<BaseFloat> &x2_stats,
+                                          BaseFloat var_floor, BaseFloat count):
+    count_(count), stats_(2, x_stats.Dim()), var_floor_(var_floor) {
+  stats_.Row(0).CopyFromVec(x_stats);
+  stats_.Row(1).CopyFromVec(x2_stats);
+}
+
+
+/// VectorClusterable wraps vectors in a form accessible to generic clustering
+/// algorithms.  Each vector is associated with a weight; these could be 1.0.
+/// The objective function (to be maximized) is the negated sum of squared
+/// distances from the cluster center to each vector, times that vector's
+/// weight.
+class VectorClusterable: public Clusterable {
+ public:
+  VectorClusterable(): weight_(0.0), sumsq_(0.0) {}
+
+  VectorClusterable(const Vector<BaseFloat> &vector,
+                    BaseFloat weight);
+
+  virtual std::string Type() const {  return "vector"; }
+  // Objf is negated weighted sum of squared distances.
+  virtual BaseFloat Objf() const;
+  virtual void SetZero() { weight_ = 0.0; sumsq_ = 0.0; stats_.Set(0.0); }
+  virtual void Add(const Clusterable &other_in);
+  virtual void Sub(const Clusterable &other_in);
+  virtual BaseFloat Normalizer() const { return weight_; }
+  virtual Clusterable *Copy() const;
+  virtual void Scale(BaseFloat f);
+  virtual void Write(std::ostream &os, bool binary) const;
+  virtual Clusterable *ReadNew(std::istream &is, bool binary) const;
+  virtual ~VectorClusterable() {}
+
+ private:
+  double weight_;  // sum of weights of the source vectors.  Never negative.
+  Vector<double> stats_; // Equals the weighted sum of the source vectors.
+  double sumsq_;  // Equals the sum over all sources, of weight_ * vec.vec,
+                  // where vec = stats_ / weight_.  Used in computing
+                  // the objective function.
+  void Read(std::istream &is, bool binary);
+};
+
+
+
+}  // end namespace kaldi.
+
+#endif  // KALDI_TREE_CLUSTERABLE_CLASSES_H_
diff --git a/kaldi_io/src/kaldi/tree/context-dep.h b/kaldi_io/src/kaldi/tree/context-dep.h
new file mode 100644
index 0000000..307fcd4
--- /dev/null
+++ b/kaldi_io/src/kaldi/tree/context-dep.h
@@ -0,0 +1,166 @@
+// tree/context-dep.h
+
+// Copyright 2009-2011  Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_TREE_CONTEXT_DEP_H_
+#define KALDI_TREE_CONTEXT_DEP_H_
+
+#include "itf/context-dep-itf.h"
+#include "tree/event-map.h"
+#include "matrix/matrix-lib.h"
+#include "tree/cluster-utils.h"
+
+/*
+  This header provides the declarations for the class ContextDependency, which inherits
+  from the interface class "ContextDependencyInterface" in itf/context-dep-itf.h.
+  This is basically a wrapper around an EventMap.  The EventMap
+  (tree/event-map.h) declares most of the internals of the class, and the building routines are
+  in build-tree.h which uses build-tree-utils.h, which uses cluster-utils.h . */
+
+
+namespace kaldi {
+
+static const EventKeyType kPdfClass = -1;  // The "name" to which we assign the
+// pdf-class (generally corresponds ot position in the HMM, zero-based);
+// must not be used for any other event.  I.e. the value corresponding to
+// this key is the pdf-class (see hmm-topology.h for explanation of what this is).
+
+
+/* ContextDependency is quite a generic decision tree.
+
+   It does not actually do very much-- all the magic is in the EventMap object.
+   All this class does is to encode the phone context as a sequence of events, and
+   pass this to the EventMap object to turn into what it will interpret as a
+   vector of pdfs.
+
+   Different versions of the ContextDependency class that are written in the future may
+   have slightly different interfaces and pass more stuff in as events, to the
+   EventMap object.
+
+   In order to separate the process of training decision trees from the process
+   of actually using them, we do not put any training code into the ContextDependency class.
+ */
+class ContextDependency: public ContextDependencyInterface {
+ public:
+  virtual int32 ContextWidth() const { return N_; }
+  virtual int32 CentralPosition() const { return P_; }
+
+
+  /// returns success or failure; outputs pdf to pdf_id
+  virtual bool Compute(const std::vector<int32> &phoneseq,
+                       int32 pdf_class, int32 *pdf_id) const;
+
+  virtual int32 NumPdfs() const {
+    // this routine could be simplified to return to_pdf_->MaxResult()+1.  we're a
+    // bit more paranoid than that.
+    if (!to_pdf_) return 0;
+    EventAnswerType max_result = to_pdf_->MaxResult();
+    if (max_result < 0 ) return 0;
+    else return (int32) max_result+1;
+  }
+  virtual ContextDependencyInterface *Copy() const {
+    return new ContextDependency(N_, P_, to_pdf_->Copy());
+  }
+
+  /// Read context-dependency object from disk; throws on error
+  void Read (std::istream &is, bool binary);
+
+  // Constructor with no arguments; will normally be called
+  // prior to Read()
+  ContextDependency(): N_(0), P_(0), to_pdf_(NULL) { }
+
+  // Constructor takes ownership of pointers.
+  ContextDependency(int32 N, int32 P,
+                    EventMap *to_pdf):
+      N_(N), P_(P), to_pdf_(to_pdf) { }
+  void Write (std::ostream &os, bool binary) const;
+
+  ~ContextDependency() { if (to_pdf_ != NULL) delete to_pdf_; }
+
+  const EventMap &ToPdfMap() const { return *to_pdf_; }
+
+  /// GetPdfInfo returns a vector indexed by pdf-id, saying for each pdf which
+  /// pairs of (phone, pdf-class) it can correspond to.  (Usually just one).
+  /// c.f. hmm/hmm-topology.h for meaning of pdf-class.
+
+  void GetPdfInfo(const std::vector<int32> &phones,  // list of phones
+                  const std::vector<int32> &num_pdf_classes,  // indexed by phone,
+                  std::vector<std::vector<std::pair<int32, int32> > > *pdf_info)
+      const;
+
+ private:
+  int32 N_;  //
+  int32 P_;
+  EventMap *to_pdf_;  // owned here.
+
+  KALDI_DISALLOW_COPY_AND_ASSIGN(ContextDependency);
+};
+
+/// GenRandContextDependency is mainly of use for debugging.  Phones must be sorted and uniq
+/// on input.
+/// @param phones [in] A vector of phone id's [must be sorted and uniq].
+/// @param ensure_all_covered [in] boolean argument; if true,  GenRandContextDependency
+///        generates a context-dependency object that "works" for all phones [no gaps].
+/// @param num_pdf_classes [out] outputs a vector indexed by phone, of the number
+///          of pdf classes (e.g. states) for that phone.
+/// @return Returns the a context dependency object.
+ContextDependency *GenRandContextDependency(const std::vector<int32> &phones,
+                                            bool ensure_all_covered,
+                                            std::vector<int32> *num_pdf_classes);
+
+/// GenRandContextDependencyLarge is like GenRandContextDependency but generates a larger tree
+/// with specified N and P for use in "one-time" larger-scale tests.
+ContextDependency *GenRandContextDependencyLarge(const std::vector<int32> &phones,
+                                                 int N, int P,
+                                                 bool ensure_all_covered,
+                                                 std::vector<int32> *num_pdf_classes);
+
+// MonophoneContextDependency() returns a new ContextDependency object that
+// corresponds to a monophone system.
+// The map phone2num_pdf_classes maps from the phone id to the number of
+// pdf-classes we have for that phone (e.g. 3, so the pdf-classes would be
+// 0, 1, 2).
+
+ContextDependency*
+MonophoneContextDependency(const std::vector<int32> phones,
+                           const std::vector<int32> phone2num_pdf_classes);
+
+// MonophoneContextDependencyShared is as MonophoneContextDependency but lets
+// you define classes of phones which share pdfs (e.g. different stress-markers of a single
+// phone.)  Each element of phone_classes is a set of phones that are in that class.
+ContextDependency*
+MonophoneContextDependencyShared(const std::vector<std::vector<int32> > phone_classes,
+                                 const std::vector<int32> phone2num_pdf_classes);
+
+
+// Important note:
+// Statistics for training decision trees will be of type:
+// std::vector<std::pair<EventType, Clusterable*> >
+// We don't make this a typedef as it doesn't add clarity.
+// they will be sorted and unique on the EventType member, which
+// itself is sorted and unique on the name (see event-map.h).
+
+// See build-tree.h for functions relating to actually building the decision trees.
+
+
+
+
+}  // namespace Kaldi
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/tree/event-map.h b/kaldi_io/src/kaldi/tree/event-map.h
new file mode 100644
index 0000000..07fcc2b
--- /dev/null
+++ b/kaldi_io/src/kaldi/tree/event-map.h
@@ -0,0 +1,365 @@
+// tree/event-map.h
+
+// Copyright 2009-2011  Microsoft Corporation;  Haihua Xu
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_TREE_EVENT_MAP_H_
+#define KALDI_TREE_EVENT_MAP_H_
+
+#include <vector>
+#include <map>
+#include <algorithm>
+#include "base/kaldi-common.h"
+#include "util/stl-utils.h"
+#include "util/const-integer-set.h"
+
+namespace kaldi {
+
+/// \defgroup event_map_group Event maps
+/// \ingroup tree_group
+/// See \ref tree_internals for overview, and specifically \ref treei_event_map.
+
+
+// Note RE negative values: some of this code will not work if things of type
+// EventValueType are negative.  In particular, TableEventMap can't be used if
+// things of EventValueType are negative, and additionally TableEventMap won't
+// be efficient if things of EventValueType take on extremely large values.  The
+// EventKeyType can be negative though.
+
+/// Things of type EventKeyType can take any value.  The code does not assume they are contiguous.
+/// So values like -1, 1000000 and the like are acceptable.
+typedef int32 EventKeyType;
+
+/// Given current code, things of type EventValueType should generally be nonnegative and in a
+/// reasonably small range (e.g. not one million), as we sometimes construct vectors of the size:
+/// [largest value we saw for this key].  This deficiency may be fixed in future [would require
+/// modifying TableEventMap]
+typedef int32 EventValueType;
+
+/// As far as the event-map code itself is concerned, things of type EventAnswerType may take
+/// any value except kNoAnswer (== -1).  However, some specific uses of EventMap (e.g. in
+/// build-tree-utils.h) assume these quantities are nonnegative.
+typedef int32 EventAnswerType;
+
+typedef std::vector<std::pair<EventKeyType, EventValueType> > EventType;
+// It is required to be sorted and have unique keys-- i.e. functions assume this when called
+// with this type.
+
+inline std::pair<EventKeyType, EventValueType> MakeEventPair (EventKeyType k, EventValueType v) {  
+  return std::pair<EventKeyType, EventValueType>(k, v);
+}
+
+void WriteEventType(std::ostream &os, bool binary, const EventType &vec);
+void ReadEventType(std::istream &is, bool binary, EventType *vec);
+
+std::string EventTypeToString(const EventType &evec);  // so we can print events out in error messages.
+
+struct EventMapVectorHash {  // Hashing object for EventMapVector.  Works for both pointers and references.
+  // Not used in event-map.{h, cc}
+  size_t operator () (const EventType &vec);
+  size_t operator () (const EventType *ptr) { return (*this)(*ptr); }
+};
+struct EventMapVectorEqual {  // Equality object for EventType pointers-- test equality of underlying vector.
+  // Not used in event-map.{h, cc}
+  size_t operator () (const EventType *p1, const EventType *p2) { return (*p1 == *p2); }
+};
+
+
+/// A class that is capable of representing a generic mapping from
+/// EventType (which is a vector of (key, value) pairs) to
+/// EventAnswerType which is just an integer.  See \ref tree_internals
+/// for overview.
+class EventMap {
+ public:
+  static void Check(const EventType &event);  // will crash if not sorted and unique on key.
+  static bool Lookup(const EventType &event, EventKeyType key, EventValueType *ans);
+
+  // Maps events to the answer type. input must be sorted.
+  virtual bool Map(const EventType &event, EventAnswerType *ans) const = 0;
+
+  // MultiMap maps a partially specified set of events to the set of answers it might
+  // map to.  It appends these to "ans".  "ans" is
+  // **not guaranteed unique at output** if the
+  // tree contains duplicate answers at leaves -- you should sort & uniq afterwards.
+  // e.g.: SortAndUniq(ans).
+  virtual void MultiMap(const EventType &event, std::vector<EventAnswerType> *ans) const = 0;
+
+  // GetChildren() returns the EventMaps that are immediate children of this
+  // EventMap (if they exist), by putting them in *out.  Useful for
+  // determining the structure of the event map.
+  virtual void GetChildren(std::vector<EventMap*> *out) const = 0;
+
+  // This Copy() does a deep copy of the event map.
+  // If new_leaves is nonempty when it reaches a leaf with value l s.t. new_leaves[l] != NULL,
+  // it replaces it with a copy of that EventMap.  This makes it possible to extend and modify
+  // It's the way we do splits of trees, and clustering of trees.  Think about this carefully, because
+  // the EventMap structure does not support modification of an existing tree.  Do not be tempted
+  // to do this differently, because other kinds of mechanisms would get very messy and unextensible.
+  // Copy() is the only mechanism to modify a tree.  It's similar to a kind of function composition.
+  // Copy() does not take ownership of the pointers in new_leaves (it uses the Copy() function of those
+  // EventMaps).
+  virtual EventMap *Copy(const std::vector<EventMap*> &new_leaves) const = 0;
+  
+  EventMap *Copy() const { std::vector<EventMap*> new_leaves; return Copy(new_leaves); }
+
+  // The function MapValues() is intended to be used to map phone-sets between
+  // different integer representations.  For all the keys in the set
+  // "keys_to_map", it will map the corresponding values using the map
+  // "value_map".  Note: these values are the values in the key->value pairs of
+  // the EventMap, which really correspond to phones in the usual case; they are
+  // not the "answers" of the EventMap which correspond to clustered states.  In
+  // case multiple values are mapped to the same value, it will try to deal with
+  // it gracefully where it can, but will crash if, for example, this would
+  // cause problems with the TableEventMap.  It will also crash if any values
+  // used for keys in "keys_to_map" are not mapped by "value_map".  This
+  // function is not currently used.
+  virtual EventMap *MapValues(
+      const unordered_set<EventKeyType> &keys_to_map,
+      const unordered_map<EventValueType,EventValueType> &value_map) const = 0;
+
+  // The function Prune() is like Copy(), except it removes parts of the tree
+  // that return only -1 (it will return NULL if this EventMap returns only -1).
+  // This is a mechanism to remove parts of the tree-- you would first use the
+  // Copy() function with a vector of EventMap*, and for the parts you don't
+  // want, you'd put a ConstantEventMap with -1; you'd then call
+  // Prune() on the result.  This function is not currently used.
+  virtual EventMap *Prune() const = 0;
+  
+  virtual EventAnswerType MaxResult() const {  // child classes may override this for efficiency; here is basic version.
+    // returns -1 if nothing found.
+    std::vector<EventAnswerType> tmp; EventType empty_event;
+    MultiMap(empty_event, &tmp);
+    if (tmp.empty()) {
+      KALDI_WARN << "EventMap::MaxResult(), empty result";
+      return std::numeric_limits<EventAnswerType>::min();
+    }
+    else { return * std::max_element(tmp.begin(), tmp.end()); }
+  }
+
+  /// Write to stream.
+  virtual void Write(std::ostream &os, bool binary) = 0;
+
+  virtual ~EventMap() {}
+
+  /// a Write function that takes care of NULL pointers.
+  static void Write(std::ostream &os, bool binary, EventMap *emap);
+  /// a Read function that reads an arbitrary EventMap; also
+  /// works for NULL pointers.
+  static EventMap *Read(std::istream &is, bool binary);
+};
+
+
+class ConstantEventMap: public EventMap {
+ public:
+  virtual bool Map(const EventType &event, EventAnswerType *ans) const {
+    *ans = answer_;
+    return true;
+  }
+
+  virtual void MultiMap(const EventType &,
+                        std::vector<EventAnswerType> *ans) const {
+     ans->push_back(answer_);
+  }
+
+  virtual void GetChildren(std::vector<EventMap*> *out) const { out->clear(); }
+
+  virtual EventMap *Copy(const std::vector<EventMap*> &new_leaves) const {
+    if (answer_ < 0 || answer_ >= (EventAnswerType)new_leaves.size() ||
+        new_leaves[answer_] == NULL)
+      return new ConstantEventMap(answer_);
+    else return new_leaves[answer_]->Copy();
+  }
+
+  virtual EventMap *MapValues(
+      const unordered_set<EventKeyType> &keys_to_map,
+      const unordered_map<EventValueType,EventValueType> &value_map) const {
+    return new ConstantEventMap(answer_);
+  }
+
+  virtual EventMap *Prune() const {
+    return (answer_ == -1 ? NULL : new ConstantEventMap(answer_));
+  }
+  
+  explicit ConstantEventMap(EventAnswerType answer): answer_(answer) { }
+  
+  virtual void Write(std::ostream &os, bool binary);
+  static ConstantEventMap *Read(std::istream &is, bool binary);
+ private:
+  EventAnswerType answer_;
+  KALDI_DISALLOW_COPY_AND_ASSIGN(ConstantEventMap);
+};
+
+class TableEventMap: public EventMap {
+ public:
+
+  virtual bool Map(const EventType &event, EventAnswerType *ans) const {
+    EventValueType tmp;   *ans = -1;  // means no answer
+    if (Lookup(event, key_, &tmp) && tmp >= 0
+       && tmp < (EventValueType)table_.size() && table_[tmp] != NULL) {
+      return table_[tmp]->Map(event, ans);
+    }
+    return false;
+  }
+
+  virtual void GetChildren(std::vector<EventMap*> *out) const {
+    out->clear();
+    for (size_t i = 0; i<table_.size(); i++)
+      if (table_[i] != NULL) out->push_back(table_[i]);
+  }
+
+  virtual void MultiMap(const EventType &event, std::vector<EventAnswerType> *ans) const {
+    EventValueType tmp;
+    if (Lookup(event, key_, &tmp)) {
+      if (tmp >= 0 && tmp < (EventValueType)table_.size() && table_[tmp] != NULL)
+        return table_[tmp]->MultiMap(event, ans);
+      // else no answers.
+    } else {  // all answers are possible if no such key.
+      for (size_t i = 0;i < table_.size();i++)
+        if (table_[i] != NULL) table_[i]->MultiMap(event, ans);  // append.
+    }
+  }
+
+  virtual EventMap *Prune() const;
+  
+  virtual EventMap *MapValues(
+      const unordered_set<EventKeyType> &keys_to_map,
+      const unordered_map<EventValueType,EventValueType> &value_map) const;
+  
+  /// Takes ownership of pointers.
+  explicit TableEventMap(EventKeyType key, const std::vector<EventMap*> &table): key_(key), table_(table) {}
+  /// Takes ownership of pointers.
+  explicit TableEventMap(EventKeyType key, const std::map<EventValueType, EventMap*> &map_in);
+  /// This initializer creates a ConstantEventMap for each value in the map.
+  explicit TableEventMap(EventKeyType key, const std::map<EventValueType, EventAnswerType> &map_in);
+
+  virtual void Write(std::ostream &os, bool binary);
+  static TableEventMap *Read(std::istream &is, bool binary);
+
+  virtual EventMap *Copy(const std::vector<EventMap*> &new_leaves) const {
+    std::vector<EventMap*> new_table_(table_.size(), NULL);
+    for (size_t i = 0;i<table_.size();i++) if (table_[i]) new_table_[i]=table_[i]->Copy(new_leaves);
+    return new TableEventMap(key_, new_table_);
+  }
+  virtual ~TableEventMap() {
+    DeletePointers(&table_);
+  }
+ private:
+  EventKeyType key_;
+  std::vector<EventMap*> table_;
+  KALDI_DISALLOW_COPY_AND_ASSIGN(TableEventMap);
+};
+
+
+
+
+class SplitEventMap: public EventMap {  // A decision tree [non-leaf] node.
+ public:
+
+  virtual bool Map(const EventType &event, EventAnswerType *ans) const {
+    EventValueType value;
+    if (Lookup(event, key_, &value)) {
+      // if (std::binary_search(yes_set_.begin(), yes_set_.end(), value)) {
+      if (yes_set_.count(value)) {
+        return yes_->Map(event, ans);
+      }
+      return no_->Map(event, ans);
+    }
+    return false;
+  }
+
+  virtual void MultiMap(const EventType &event, std::vector<EventAnswerType> *ans) const {
+    EventValueType tmp;
+    if (Lookup(event, key_, &tmp)) {
+      if (std::binary_search(yes_set_.begin(), yes_set_.end(), tmp))
+        yes_->MultiMap(event, ans);
+      else
+        no_->MultiMap(event, ans);
+    } else {  // both yes and no contribute.
+      yes_->MultiMap(event, ans);
+      no_->MultiMap(event, ans);
+    }
+  }
+
+  virtual void GetChildren(std::vector<EventMap*> *out) const {
+    out->clear();
+    out->push_back(yes_);
+    out->push_back(no_);
+  }
+
+  virtual EventMap *Copy(const std::vector<EventMap*> &new_leaves) const {
+    return new SplitEventMap(key_, yes_set_, yes_->Copy(new_leaves), no_->Copy(new_leaves));
+  }
+
+  virtual void Write(std::ostream &os, bool binary);
+  static SplitEventMap *Read(std::istream &is, bool binary);
+
+  virtual EventMap *Prune() const;
+  
+  virtual EventMap *MapValues(
+      const unordered_set<EventKeyType> &keys_to_map,
+      const unordered_map<EventValueType,EventValueType> &value_map) const;
+  
+  virtual ~SplitEventMap() { Destroy(); }
+
+  /// This constructor takes ownership of the "yes" and "no" arguments.
+  SplitEventMap(EventKeyType key, const std::vector<EventValueType> &yes_set,
+                EventMap *yes, EventMap *no): key_(key), yes_set_(yes_set), yes_(yes), no_(no) {
+    KALDI_PARANOID_ASSERT(IsSorted(yes_set));
+    KALDI_ASSERT(yes_ != NULL && no_ != NULL);
+  }
+
+
+ private:
+  /// This constructor used in the Copy() function.
+  SplitEventMap(EventKeyType key, const ConstIntegerSet<EventValueType> &yes_set,
+                EventMap *yes, EventMap *no): key_(key), yes_set_(yes_set), yes_(yes), no_(no) {
+    KALDI_ASSERT(yes_ != NULL && no_ != NULL);
+  }
+  void Destroy() {
+    delete yes_; delete no_;
+  }
+  EventKeyType key_;
+  //  std::vector<EventValueType> yes_set_;
+  ConstIntegerSet<EventValueType> yes_set_;  // more efficient Map function.
+  EventMap *yes_;  // owned here.
+  EventMap *no_;  // owned here.
+  SplitEventMap &operator = (const SplitEventMap &other);  // Disallow.
+};
+
+/**
+   This function gets the tree structure of the EventMap "map" in a convenient form.
+   If "map" corresponds to a tree structure (not necessarily binary) with leaves
+   uniquely numbered from 0 to num_leaves-1, then the function will return true,
+   output "num_leaves", and set "parent" to a vector of size equal to the number of
+   nodes in the tree (nonleaf and leaf), where each index corresponds to a node
+   and the leaf indices correspond to the values returned by the EventMap from
+   that leaf; for an index i, parent[i] equals the parent of that node in the tree
+   structure, where parent[i] > i, except for the last (root) node where parent[i] == i.
+   If the EventMap does not have this structure (e.g. if multiple different leaf nodes share
+   the same number), then it will return false.
+*/
+
+bool GetTreeStructure(const EventMap &map,
+                      int32 *num_leaves,
+                      std::vector<int32> *parents);
+
+
+/// @} end "addtogroup event_map_group"
+
+}
+
+#endif
diff --git a/kaldi_io/src/kaldi/tree/tree-renderer.h b/kaldi_io/src/kaldi/tree/tree-renderer.h
new file mode 100644
index 0000000..5e0b0d8
--- /dev/null
+++ b/kaldi_io/src/kaldi/tree/tree-renderer.h
@@ -0,0 +1,84 @@
+// tree/tree-renderer.h
+
+// Copyright 2012 Vassil Panayotov
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_TREE_TREE_RENDERER_H_
+#define KALDI_TREE_TREE_RENDERER_H_
+
+#include "base/kaldi-common.h"
+#include "tree/event-map.h"
+#include "util/common-utils.h"
+#include "hmm/transition-model.h"
+#include "fst/fstlib.h"
+
+namespace kaldi {
+
+// Parses a decision tree file and outputs its description in GraphViz format
+class TreeRenderer {
+ public:
+  const static int32 kEdgeWidth; // normal width of the edges and state contours
+  const static int32 kEdgeWidthQuery; // edge and state width when in query
+  const static std::string kEdgeColor; // normal color for states and edges
+  const static std::string kEdgeColorQuery; // edge and state color when in query
+
+  TreeRenderer(std::istream &is, bool binary, std::ostream &os,
+               fst::SymbolTable &phone_syms, bool use_tooltips)
+      : phone_syms_(phone_syms), is_(is), out_(os), binary_(binary),
+        N_(-1), use_tooltips_(use_tooltips), next_id_(0) {}
+
+  // Renders the tree and if the "query" parameter is not NULL
+  // a distinctly colored trace corresponding to the event.
+  void Render(const EventType *query);
+
+ private:
+  // Looks-up the next token from the stream and invokes
+  // the appropriate render method to visualize it
+  void RenderSubTree(const EventType *query, int32 id);
+
+  // Renders a leaf node (constant event map)
+  void RenderConstant(const EventType *query, int32 id);
+
+  // Renders a split event map node and the edges to the nodes
+  // representing YES and NO sets
+  void RenderSplit(const EventType *query, int32 id);
+
+  // Renders a table event map node and the edges to its (non-null) children
+  void RenderTable(const EventType *query, int32 id);
+
+  // Makes a comma-separated string from the elements of a set of identifiers
+  // If the identifiers represent phones, their symbolic representations are used
+  std::string MakeEdgeLabel(const EventKeyType &key,
+                            const ConstIntegerSet<EventValueType> &intset);
+
+  // Writes the GraphViz representation of a non-leaf node to the out stream
+  // A question about a phone from the context window or about pdf-class
+  // is used as a label.
+  void RenderNonLeaf(int32 id, const EventKeyType &key, bool in_query);
+
+  fst::SymbolTable &phone_syms_; // phone symbols to be used as edge labels
+  std::istream &is_; // the stream from which the tree is read
+  std::ostream &out_; // the GraphViz representation is written to this stream
+  bool binary_; // is the input stream binary?
+  int32 N_, P_; // context-width and central position
+  bool use_tooltips_;  // use tooltips(useful in e.g. SVG) instead of labels
+  int32 next_id_; // the first unused GraphViz node ID
+};
+
+} // namespace kaldi
+
+#endif //  KALDI_TREE_TREE_RENDERER_H_
diff --git a/kaldi_io/src/kaldi/util/basic-filebuf.h b/kaldi_io/src/kaldi/util/basic-filebuf.h
new file mode 100644
index 0000000..cf2e079
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/basic-filebuf.h
@@ -0,0 +1,1065 @@
+///////////////////////////////////////////////////////////////////////////////
+// This is a modified version of the std::basic_filebuf from libc++
+// (http://libcxx.llvm.org/).
+// It allows one to create basic_filebuf from an existing FILE* handle or file
+// descriptor.
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source License licenses. See LICENSE.TXT for details (included at the
+// bottom).
+///////////////////////////////////////////////////////////////////////////////
+#ifndef KALDI_UTIL_BASIC_FILEBUF_H_
+#define KALDI_UTIL_BASIC_FILEBUF_H_
+
+///////////////////////////////////////////////////////////////////////////////
+#include <fstream>
+#include <cstdio>
+#include <cstring>
+
+///////////////////////////////////////////////////////////////////////////////
+namespace kaldi
+{
+
+///////////////////////////////////////////////////////////////////////////////
+template <typename CharT, typename Traits = std::char_traits<CharT> >
+class basic_filebuf : public std::basic_streambuf<CharT, Traits>
+{
+public:
+    typedef CharT                            char_type;
+    typedef Traits                           traits_type;
+    typedef typename traits_type::int_type   int_type;
+    typedef typename traits_type::pos_type   pos_type;
+    typedef typename traits_type::off_type   off_type;
+    typedef typename traits_type::state_type state_type;
+
+    basic_filebuf();
+    basic_filebuf(basic_filebuf&& rhs);
+    virtual ~basic_filebuf();
+
+    basic_filebuf& operator=(basic_filebuf&& rhs);
+    void swap(basic_filebuf& rhs);
+
+    bool is_open() const;
+    basic_filebuf* open(const char* s, std::ios_base::openmode mode);
+    basic_filebuf* open(const std::string& s, std::ios_base::openmode mode);
+    basic_filebuf* open(int fd, std::ios_base::openmode mode);
+    basic_filebuf* open(FILE* f, std::ios_base::openmode mode);
+    basic_filebuf* close();
+
+    FILE* file() { return this->_M_file; }
+    int fd() { return fileno(this->_M_file); }
+
+protected:
+    int_type underflow() override;
+    int_type pbackfail(int_type c = traits_type::eof()) override;
+    int_type overflow (int_type c = traits_type::eof()) override;
+    std::basic_streambuf<char_type, traits_type>* setbuf(char_type* s, std::streamsize n) override;
+    pos_type seekoff(off_type off, std::ios_base::seekdir way,
+                     std::ios_base::openmode wch = std::ios_base::in | std::ios_base::out) override;
+    pos_type seekpos(pos_type sp,
+                     std::ios_base::openmode wch = std::ios_base::in | std::ios_base::out) override;
+    int sync() override;
+    void imbue(const std::locale& loc) override;
+
+protected:
+    char*       _M_extbuf;
+    const char* _M_extbufnext;
+    const char* _M_extbufend;
+    char _M_extbuf_min[8];
+    size_t _M_ebs;
+    char_type* _M_intbuf;
+    size_t _M_ibs;
+    FILE* _M_file;
+    const std::codecvt<char_type, char, state_type>* _M_cv;
+    state_type _M_st;
+    state_type _M_st_last;
+    std::ios_base::openmode _M_om;
+    std::ios_base::openmode _M_cm;
+    bool _M_owns_eb;
+    bool _M_owns_ib;
+    bool _M_always_noconv;
+
+    const char* _M_get_mode(std::ios_base::openmode mode);
+    bool _M_read_mode();
+    void _M_write_mode();
+};
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+basic_filebuf<CharT, Traits>::basic_filebuf()
+    : _M_extbuf(nullptr),
+      _M_extbufnext(nullptr),
+      _M_extbufend(nullptr),
+      _M_ebs(0),
+      _M_intbuf(nullptr),
+      _M_ibs(0),
+      _M_file(nullptr),
+      _M_cv(nullptr),
+      _M_st(),
+      _M_st_last(),
+      _M_om(std::ios_base::openmode(0)),
+      _M_cm(std::ios_base::openmode(0)),
+      _M_owns_eb(false),
+      _M_owns_ib(false),
+      _M_always_noconv(false)
+{
+    if (std::has_facet<std::codecvt<char_type, char, state_type> >(this->getloc()))
+    {
+        _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(this->getloc());
+        _M_always_noconv = _M_cv->always_noconv();
+    }
+    setbuf(0, 4096);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+basic_filebuf<CharT, Traits>::basic_filebuf(basic_filebuf&& rhs)
+    : std::basic_streambuf<CharT, Traits>(rhs)
+{
+    if (rhs._M_extbuf == rhs._M_extbuf_min)
+    {
+        _M_extbuf = _M_extbuf_min;
+        _M_extbufnext = _M_extbuf + (rhs._M_extbufnext - rhs._M_extbuf);
+        _M_extbufend = _M_extbuf + (rhs._M_extbufend - rhs._M_extbuf);
+    }
+    else
+    {
+        _M_extbuf = rhs._M_extbuf;
+        _M_extbufnext = rhs._M_extbufnext;
+        _M_extbufend = rhs._M_extbufend;
+    }
+    _M_ebs = rhs._M_ebs;
+    _M_intbuf = rhs._M_intbuf;
+    _M_ibs = rhs._M_ibs;
+    _M_file = rhs._M_file;
+    _M_cv = rhs._M_cv;
+    _M_st = rhs._M_st;
+    _M_st_last = rhs._M_st_last;
+    _M_om = rhs._M_om;
+    _M_cm = rhs._M_cm;
+    _M_owns_eb = rhs._M_owns_eb;
+    _M_owns_ib = rhs._M_owns_ib;
+    _M_always_noconv = rhs._M_always_noconv;
+    if (rhs.pbase())
+    {
+        if (rhs.pbase() == rhs._M_intbuf)
+            this->setp(_M_intbuf, _M_intbuf + (rhs. epptr() - rhs.pbase()));
+        else
+            this->setp((char_type*)_M_extbuf,
+                       (char_type*)_M_extbuf + (rhs. epptr() - rhs.pbase()));
+        this->pbump(rhs. pptr() - rhs.pbase());
+    }
+    else if (rhs.eback())
+    {
+        if (rhs.eback() == rhs._M_intbuf)
+            this->setg(_M_intbuf, _M_intbuf + (rhs.gptr() - rhs.eback()),
+                                  _M_intbuf + (rhs.egptr() - rhs.eback()));
+        else
+            this->setg((char_type*)_M_extbuf,
+                       (char_type*)_M_extbuf + (rhs.gptr() - rhs.eback()),
+                       (char_type*)_M_extbuf + (rhs.egptr() - rhs.eback()));
+    }
+    rhs._M_extbuf = nullptr;
+    rhs._M_extbufnext = nullptr;
+    rhs._M_extbufend = nullptr;
+    rhs._M_ebs = 0;
+    rhs._M_intbuf = nullptr;
+    rhs._M_ibs = 0;
+    rhs._M_file = nullptr;
+    rhs._M_st = state_type();
+    rhs._M_st_last = state_type();
+    rhs._M_om = std::ios_base::openmode(0);
+    rhs._M_cm = std::ios_base::openmode(0);
+    rhs._M_owns_eb = false;
+    rhs._M_owns_ib = false;
+    rhs.setg(0, 0, 0);
+    rhs.setp(0, 0);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+inline
+basic_filebuf<CharT, Traits>&
+basic_filebuf<CharT, Traits>::operator=(basic_filebuf&& rhs)
+{
+    close();
+    swap(rhs);
+    return *this;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+basic_filebuf<CharT, Traits>::~basic_filebuf()
+{
+    // try
+    // {
+    //     close();
+    // }
+    // catch (...)
+    // {
+    // }
+    if (_M_owns_eb)
+        delete [] _M_extbuf;
+    if (_M_owns_ib)
+        delete [] _M_intbuf;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+void
+basic_filebuf<CharT, Traits>::swap(basic_filebuf& rhs)
+{
+    std::basic_streambuf<char_type, traits_type>::swap(rhs);
+    if (_M_extbuf != _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min)
+    {
+        std::swap(_M_extbuf, rhs._M_extbuf);
+        std::swap(_M_extbufnext, rhs._M_extbufnext);
+        std::swap(_M_extbufend, rhs._M_extbufend);
+    }
+    else
+    {
+        ptrdiff_t ln = _M_extbufnext - _M_extbuf;
+        ptrdiff_t le = _M_extbufend - _M_extbuf;
+        ptrdiff_t rn = rhs._M_extbufnext - rhs._M_extbuf;
+        ptrdiff_t re = rhs._M_extbufend - rhs._M_extbuf;
+        if (_M_extbuf == _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min)
+        {
+            _M_extbuf = rhs._M_extbuf;
+            rhs._M_extbuf = rhs._M_extbuf_min;
+        }
+        else if (_M_extbuf != _M_extbuf_min && rhs._M_extbuf == rhs._M_extbuf_min)
+        {
+            rhs._M_extbuf = _M_extbuf;
+            _M_extbuf = _M_extbuf_min;
+        }
+        _M_extbufnext = _M_extbuf + rn;
+        _M_extbufend = _M_extbuf + re;
+        rhs._M_extbufnext = rhs._M_extbuf + ln;
+        rhs._M_extbufend = rhs._M_extbuf + le;
+    }
+    std::swap(_M_ebs, rhs._M_ebs);
+    std::swap(_M_intbuf, rhs._M_intbuf);
+    std::swap(_M_ibs, rhs._M_ibs);
+    std::swap(_M_file, rhs._M_file);
+    std::swap(_M_cv, rhs._M_cv);
+    std::swap(_M_st, rhs._M_st);
+    std::swap(_M_st_last, rhs._M_st_last);
+    std::swap(_M_om, rhs._M_om);
+    std::swap(_M_cm, rhs._M_cm);
+    std::swap(_M_owns_eb, rhs._M_owns_eb);
+    std::swap(_M_owns_ib, rhs._M_owns_ib);
+    std::swap(_M_always_noconv, rhs._M_always_noconv);
+    if (this->eback() == (char_type*)rhs._M_extbuf_min)
+    {
+        ptrdiff_t n = this->gptr() - this->eback();
+        ptrdiff_t e = this->egptr() - this->eback();
+        this->setg((char_type*)_M_extbuf_min,
+                   (char_type*)_M_extbuf_min + n,
+                   (char_type*)_M_extbuf_min + e);
+    }
+    else if (this->pbase() == (char_type*)rhs._M_extbuf_min)
+    {
+        ptrdiff_t n = this->pptr() - this->pbase();
+        ptrdiff_t e = this->epptr() - this->pbase();
+        this->setp((char_type*)_M_extbuf_min,
+                   (char_type*)_M_extbuf_min + e);
+        this->pbump(n);
+    }
+    if (rhs.eback() == (char_type*)_M_extbuf_min)
+    {
+        ptrdiff_t n = rhs.gptr() - rhs.eback();
+        ptrdiff_t e = rhs.egptr() - rhs.eback();
+        rhs.setg((char_type*)rhs._M_extbuf_min,
+                 (char_type*)rhs._M_extbuf_min + n,
+                 (char_type*)rhs._M_extbuf_min + e);
+    }
+    else if (rhs.pbase() == (char_type*)_M_extbuf_min)
+    {
+        ptrdiff_t n = rhs.pptr() - rhs.pbase();
+        ptrdiff_t e = rhs.epptr() - rhs.pbase();
+        rhs.setp((char_type*)rhs._M_extbuf_min,
+                 (char_type*)rhs._M_extbuf_min + e);
+        rhs.pbump(n);
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+inline
+void
+swap(basic_filebuf<CharT, Traits>& x, basic_filebuf<CharT, Traits>& y)
+{
+    x.swap(y);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+inline
+bool
+basic_filebuf<CharT, Traits>::is_open() const
+{
+    return _M_file != nullptr;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+const char* basic_filebuf<CharT, Traits>::_M_get_mode(std::ios_base::openmode mode)
+{
+    switch ((mode & ~std::ios_base::ate) | 0)
+    {
+    case std::ios_base::out:
+    case std::ios_base::out | std::ios_base::trunc:
+        return "w";
+    case std::ios_base::out | std::ios_base::app:
+    case std::ios_base::app:
+        return "a";
+        break;
+    case std::ios_base::in:
+        return "r";
+    case std::ios_base::in  | std::ios_base::out:
+        return "r+";
+    case std::ios_base::in  | std::ios_base::out | std::ios_base::trunc:
+        return "w+";
+    case std::ios_base::in  | std::ios_base::out | std::ios_base::app:
+    case std::ios_base::in  | std::ios_base::app:
+        return "a+";
+    case std::ios_base::out | std::ios_base::binary:
+    case std::ios_base::out | std::ios_base::trunc | std::ios_base::binary:
+        return "wb";
+    case std::ios_base::out | std::ios_base::app | std::ios_base::binary:
+    case std::ios_base::app | std::ios_base::binary:
+        return "ab";
+    case std::ios_base::in  | std::ios_base::binary:
+        return "rb";
+    case std::ios_base::in  | std::ios_base::out | std::ios_base::binary:
+        return "r+b";
+    case std::ios_base::in  | std::ios_base::out | std::ios_base::trunc | std::ios_base::binary:
+        return "w+b";
+    case std::ios_base::in  | std::ios_base::out | std::ios_base::app | std::ios_base::binary:
+    case std::ios_base::in  | std::ios_base::app | std::ios_base::binary:
+        return "a+b";
+    default:
+        return nullptr;
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+basic_filebuf<CharT, Traits>*
+basic_filebuf<CharT, Traits>::open(const char* s, std::ios_base::openmode mode)
+{
+    basic_filebuf<CharT, Traits>* rt = nullptr;
+    if (_M_file == nullptr)
+    {
+        const char* md= _M_get_mode(mode);
+        if (md)
+        {
+            _M_file = fopen(s, md);
+            if (_M_file)
+            {
+                rt = this;
+                _M_om = mode;
+                if (mode & std::ios_base::ate)
+                {
+                    if (fseek(_M_file, 0, SEEK_END))
+                    {
+                        fclose(_M_file);
+                        _M_file = nullptr;
+                        rt = nullptr;
+                    }
+                }
+            }
+        }
+    }
+    return rt;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+inline
+basic_filebuf<CharT, Traits>*
+basic_filebuf<CharT, Traits>::open(const std::string& s, std::ios_base::openmode mode)
+{
+    return open(s.c_str(), mode);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+basic_filebuf<CharT, Traits>*
+basic_filebuf<CharT, Traits>::open(int fd, std::ios_base::openmode mode)
+{
+    const char* md= this->_M_get_mode(mode);
+    if (md)
+    {
+        this->_M_file= fdopen(fd, md);
+        this->_M_om = mode;
+        return this;
+    }
+    else return nullptr;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+basic_filebuf<CharT, Traits>*
+basic_filebuf<CharT, Traits>::open(FILE* f, std::ios_base::openmode mode)
+{
+    this->_M_file = f;
+    this->_M_om = mode;
+    return this;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+basic_filebuf<CharT, Traits>*
+basic_filebuf<CharT, Traits>::close()
+{
+    basic_filebuf<CharT, Traits>* rt = nullptr;
+    if (_M_file)
+    {
+        rt = this;
+        std::unique_ptr<FILE, int(*)(FILE*)> h(_M_file, fclose);
+        if (sync())
+            rt = nullptr;
+        if (fclose(h.release()) == 0)
+            _M_file = nullptr;
+        else
+            rt = nullptr;
+    }
+    return rt;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+typename basic_filebuf<CharT, Traits>::int_type
+basic_filebuf<CharT, Traits>::underflow()
+{
+    if (_M_file == nullptr)
+        return traits_type::eof();
+    bool initial = _M_read_mode();
+    char_type buf;
+    if (this->gptr() == nullptr)
+        this->setg(&buf, &buf+1, &buf+1);
+    const size_t unget_sz = initial ? 0 : std::min<size_t>((this->egptr() - this->eback()) / 2, 4);
+    int_type c = traits_type::eof();
+    if (this->gptr() == this->egptr())
+    {
+        memmove(this->eback(), this->egptr() - unget_sz, unget_sz * sizeof(char_type));
+        if (_M_always_noconv)
+        {
+            size_t nmemb = static_cast<size_t>(this->egptr() - this->eback() - unget_sz);
+            nmemb = fread(this->eback() + unget_sz, 1, nmemb, _M_file);
+            if (nmemb != 0)
+            {
+                this->setg(this->eback(),
+                           this->eback() + unget_sz,
+                           this->eback() + unget_sz + nmemb);
+                c = traits_type::to_int_type(*this->gptr());
+            }
+        }
+        else
+        {
+            memmove(_M_extbuf, _M_extbufnext, _M_extbufend - _M_extbufnext);
+            _M_extbufnext = _M_extbuf + (_M_extbufend - _M_extbufnext);
+            _M_extbufend = _M_extbuf + (_M_extbuf == _M_extbuf_min ? sizeof(_M_extbuf_min) : _M_ebs);
+            size_t nmemb = std::min(static_cast<size_t>(_M_ibs - unget_sz),
+                                    static_cast<size_t>(_M_extbufend - _M_extbufnext));
+            std::codecvt_base::result r;
+            _M_st_last = _M_st;
+            size_t nr = fread((void*)_M_extbufnext, 1, nmemb, _M_file);
+            if (nr != 0)
+            {
+                if (!_M_cv)
+                    throw std::bad_cast();
+                _M_extbufend = _M_extbufnext + nr;
+                char_type*  inext;
+                r = _M_cv->in(_M_st, _M_extbuf, _M_extbufend, _M_extbufnext,
+                              this->eback() + unget_sz,
+                              this->eback() + _M_ibs, inext);
+                if (r == std::codecvt_base::noconv)
+                {
+                    this->setg((char_type*)_M_extbuf, (char_type*)_M_extbuf, (char_type*)_M_extbufend);
+                    c = traits_type::to_int_type(*this->gptr());
+                }
+                else if (inext != this->eback() + unget_sz)
+                {
+                    this->setg(this->eback(), this->eback() + unget_sz, inext);
+                    c = traits_type::to_int_type(*this->gptr());
+                }
+            }
+        }
+    }
+    else
+        c = traits_type::to_int_type(*this->gptr());
+    if (this->eback() == &buf)
+        this->setg(0, 0, 0);
+    return c;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+typename basic_filebuf<CharT, Traits>::int_type
+basic_filebuf<CharT, Traits>::pbackfail(int_type c)
+{
+    if (_M_file && this->eback() < this->gptr())
+    {
+        if (traits_type::eq_int_type(c, traits_type::eof()))
+        {
+            this->gbump(-1);
+            return traits_type::not_eof(c);
+        }
+        if ((_M_om & std::ios_base::out) ||
+            traits_type::eq(traits_type::to_char_type(c), this->gptr()[-1]))
+        {
+            this->gbump(-1);
+            *this->gptr() = traits_type::to_char_type(c);
+            return c;
+        }
+    }
+    return traits_type::eof();
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+typename basic_filebuf<CharT, Traits>::int_type
+basic_filebuf<CharT, Traits>::overflow(int_type c)
+{
+    if (_M_file == nullptr)
+        return traits_type::eof();
+    _M_write_mode();
+    char_type buf;
+    char_type* pb_save = this->pbase();
+    char_type* epb_save = this->epptr();
+    if (!traits_type::eq_int_type(c, traits_type::eof()))
+    {
+        if (this->pptr() == nullptr)
+            this->setp(&buf, &buf+1);
+        *this->pptr() = traits_type::to_char_type(c);
+        this->pbump(1);
+    }
+    if (this->pptr() != this->pbase())
+    {
+        if (_M_always_noconv)
+        {
+            size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
+            if (fwrite(this->pbase(), sizeof(char_type), nmemb, _M_file) != nmemb)
+                return traits_type::eof();
+        }
+        else
+        {
+            char* extbe = _M_extbuf;
+            std::codecvt_base::result r;
+            do
+            {
+                if (!_M_cv)
+                    throw std::bad_cast();
+                const char_type* e;
+                r = _M_cv->out(_M_st, this->pbase(), this->pptr(), e,
+                               _M_extbuf, _M_extbuf + _M_ebs, extbe);
+                if (e == this->pbase())
+                    return traits_type::eof();
+                if (r == std::codecvt_base::noconv)
+                {
+                    size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
+                    if (fwrite(this->pbase(), 1, nmemb, _M_file) != nmemb)
+                        return traits_type::eof();
+                }
+                else if (r == std::codecvt_base::ok || r == std::codecvt_base::partial)
+                {
+                    size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
+                    if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb)
+                        return traits_type::eof();
+                    if (r == std::codecvt_base::partial)
+                    {
+                        this->setp((char_type*)e, this->pptr());
+                        this->pbump(this->epptr() - this->pbase());
+                    }
+                }
+                else
+                    return traits_type::eof();
+            } while (r == std::codecvt_base::partial);
+        }
+        this->setp(pb_save, epb_save);
+    }
+    return traits_type::not_eof(c);
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+std::basic_streambuf<CharT, Traits>*
+basic_filebuf<CharT, Traits>::setbuf(char_type* s, std::streamsize n)
+{
+    this->setg(0, 0, 0);
+    this->setp(0, 0);
+    if (_M_owns_eb)
+        delete [] _M_extbuf;
+    if (_M_owns_ib)
+        delete [] _M_intbuf;
+    _M_ebs = n;
+    if (_M_ebs > sizeof(_M_extbuf_min))
+    {
+        if (_M_always_noconv && s)
+        {
+            _M_extbuf = (char*)s;
+            _M_owns_eb = false;
+        }
+        else
+        {
+            _M_extbuf = new char[_M_ebs];
+            _M_owns_eb = true;
+        }
+    }
+    else
+    {
+        _M_extbuf = _M_extbuf_min;
+        _M_ebs = sizeof(_M_extbuf_min);
+        _M_owns_eb = false;
+    }
+    if (!_M_always_noconv)
+    {
+        _M_ibs = std::max<std::streamsize>(n, sizeof(_M_extbuf_min));
+        if (s && _M_ibs >= sizeof(_M_extbuf_min))
+        {
+            _M_intbuf = s;
+            _M_owns_ib = false;
+        }
+        else
+        {
+            _M_intbuf = new char_type[_M_ibs];
+            _M_owns_ib = true;
+        }
+    }
+    else
+    {
+        _M_ibs = 0;
+        _M_intbuf = 0;
+        _M_owns_ib = false;
+    }
+    return this;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+typename basic_filebuf<CharT, Traits>::pos_type
+basic_filebuf<CharT, Traits>::seekoff(off_type off, std::ios_base::seekdir way,
+                                      std::ios_base::openmode)
+{
+    if (!_M_cv)
+        throw std::bad_cast();
+    int width = _M_cv->encoding();
+    if (_M_file == nullptr || (width <= 0 && off != 0) || sync())
+        return pos_type(off_type(-1));
+    // width > 0 || off == 0
+    int whence;
+    switch (way)
+    {
+    case std::ios_base::beg:
+        whence = SEEK_SET;
+        break;
+    case std::ios_base::cur:
+        whence = SEEK_CUR;
+        break;
+    case std::ios_base::end:
+        whence = SEEK_END;
+        break;
+    default:
+        return pos_type(off_type(-1));
+    }
+#if _WIN32
+    if (fseek(_M_file, width > 0 ? width * off : 0, whence))
+        return pos_type(off_type(-1));
+    pos_type r = ftell(_M_file);
+#else
+    if (fseeko(_M_file, width > 0 ? width * off : 0, whence))
+        return pos_type(off_type(-1));
+    pos_type r = ftello(_M_file);
+#endif
+    r.state(_M_st);
+    return r;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+typename basic_filebuf<CharT, Traits>::pos_type
+basic_filebuf<CharT, Traits>::seekpos(pos_type sp, std::ios_base::openmode)
+{
+    if (_M_file == nullptr || sync())
+        return pos_type(off_type(-1));
+#if _WIN32
+    if (fseek(_M_file, sp, SEEK_SET))
+        return pos_type(off_type(-1));
+#else
+    if (fseeko(_M_file, sp, SEEK_SET))
+        return pos_type(off_type(-1));
+#endif
+    _M_st = sp.state();
+    return sp;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+int
+basic_filebuf<CharT, Traits>::sync()
+{
+    if (_M_file == nullptr)
+        return 0;
+    if (!_M_cv)
+        throw std::bad_cast();
+    if (_M_cm & std::ios_base::out)
+    {
+        if (this->pptr() != this->pbase())
+            if (overflow() == traits_type::eof())
+                return -1;
+        std::codecvt_base::result r;
+        do
+        {
+            char* extbe;
+            r = _M_cv->unshift(_M_st, _M_extbuf, _M_extbuf + _M_ebs, extbe);
+            size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
+            if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb)
+                return -1;
+        } while (r == std::codecvt_base::partial);
+        if (r == std::codecvt_base::error)
+            return -1;
+        if (fflush(_M_file))
+            return -1;
+    }
+    else if (_M_cm & std::ios_base::in)
+    {
+        off_type c;
+        state_type state = _M_st_last;
+        bool update_st = false;
+        if (_M_always_noconv)
+            c = this->egptr() - this->gptr();
+        else
+        {
+            int width = _M_cv->encoding();
+            c = _M_extbufend - _M_extbufnext;
+            if (width > 0)
+                c += width * (this->egptr() - this->gptr());
+            else
+            {
+                if (this->gptr() != this->egptr())
+                {
+                    const int off =  _M_cv->length(state, _M_extbuf,
+                                                   _M_extbufnext,
+                                                   this->gptr() - this->eback());
+                    c += _M_extbufnext - _M_extbuf - off;
+                    update_st = true;
+                }
+            }
+        }
+#if _WIN32
+        if (fseek(_M_file_, -c, SEEK_CUR))
+            return -1;
+#else
+        if (fseeko(_M_file, -c, SEEK_CUR))
+            return -1;
+#endif
+        if (update_st)
+            _M_st = state;
+        _M_extbufnext = _M_extbufend = _M_extbuf;
+        this->setg(0, 0, 0);
+        _M_cm = std::ios_base::openmode(0);
+    }
+    return 0;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+void
+basic_filebuf<CharT, Traits>::imbue(const std::locale& loc)
+{
+    sync();
+    _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(loc);
+    bool old_anc = _M_always_noconv;
+    _M_always_noconv = _M_cv->always_noconv();
+    if (old_anc != _M_always_noconv)
+    {
+        this->setg(0, 0, 0);
+        this->setp(0, 0);
+        // invariant, char_type is char, else we couldn't get here
+        if (_M_always_noconv)  // need to dump _M_intbuf
+        {
+            if (_M_owns_eb)
+                delete [] _M_extbuf;
+            _M_owns_eb = _M_owns_ib;
+            _M_ebs = _M_ibs;
+            _M_extbuf = (char*)_M_intbuf;
+            _M_ibs = 0;
+            _M_intbuf = nullptr;
+            _M_owns_ib = false;
+        }
+        else  // need to obtain an _M_intbuf.
+        {     // If _M_extbuf is user-supplied, use it, else new _M_intbuf
+            if (!_M_owns_eb && _M_extbuf != _M_extbuf_min)
+            {
+                _M_ibs = _M_ebs;
+                _M_intbuf = (char_type*)_M_extbuf;
+                _M_owns_ib = false;
+                _M_extbuf = new char[_M_ebs];
+                _M_owns_eb = true;
+            }
+            else
+            {
+                _M_ibs = _M_ebs;
+                _M_intbuf = new char_type[_M_ibs];
+                _M_owns_ib = true;
+            }
+        }
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+bool
+basic_filebuf<CharT, Traits>::_M_read_mode()
+{
+    if (!(_M_cm & std::ios_base::in))
+    {
+        this->setp(0, 0);
+        if (_M_always_noconv)
+            this->setg((char_type*)_M_extbuf,
+                       (char_type*)_M_extbuf + _M_ebs,
+                       (char_type*)_M_extbuf + _M_ebs);
+        else
+            this->setg(_M_intbuf, _M_intbuf + _M_ibs, _M_intbuf + _M_ibs);
+        _M_cm = std::ios_base::in;
+        return true;
+    }
+    return false;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+template <class CharT, class Traits>
+void
+basic_filebuf<CharT, Traits>::_M_write_mode()
+{
+    if (!(_M_cm & std::ios_base::out))
+    {
+        this->setg(0, 0, 0);
+        if (_M_ebs > sizeof(_M_extbuf_min))
+        {
+            if (_M_always_noconv)
+                this->setp((char_type*)_M_extbuf,
+                           (char_type*)_M_extbuf + (_M_ebs - 1));
+            else
+                this->setp(_M_intbuf, _M_intbuf + (_M_ibs - 1));
+        }
+        else
+            this->setp(0, 0);
+        _M_cm = std::ios_base::out;
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+}
+
+///////////////////////////////////////////////////////////////////////////////
+#endif // KALDI_UTIL_BASIC_FILEBUF_H_
+
+///////////////////////////////////////////////////////////////////////////////
+
+/*
+ * ============================================================================
+ * libc++ License
+ * ============================================================================
+ *
+ * The libc++ library is dual licensed under both the University of Illinois
+ * "BSD-Like" license and the MIT license.  As a user of this code you may
+ * choose to use it under either license.  As a contributor, you agree to allow
+ * your code to be used under both.
+ *
+ * Full text of the relevant licenses is included below.
+ *
+ * ============================================================================
+ *
+ * University of Illinois/NCSA
+ * Open Source License
+ *
+ * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included below)
+ *
+ * All rights reserved.
+ *
+ * Developed by:
+ *
+ *     LLVM Team
+ *
+ *     University of Illinois at Urbana-Champaign
+ *
+ *     http://llvm.org
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal with
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is furnished to do
+ * so, subject to the following conditions:
+ *
+ *     * Redistributions of source code must retain the above copyright notice,
+ *       this list of conditions and the following disclaimers.
+ *
+ *     * Redistributions in binary form must reproduce the above copyright notice,
+ *       this list of conditions and the following disclaimers in the
+ *       documentation and/or other materials provided with the distribution.
+ *
+ *     * Neither the names of the LLVM Team, University of Illinois at
+ *       Urbana-Champaign, nor the names of its contributors may be used to
+ *       endorse or promote products derived from this Software without specific
+ *       prior written permission.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+ * SOFTWARE.
+ *
+ * ==============================================================================
+ *
+ * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included below)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * ==============================================================================
+ *
+ * This file is a partial list of people who have contributed to the LLVM/libc++
+ * project.  If you have contributed a patch or made some other contribution to
+ * LLVM/libc++, please submit a patch to this file to add yourself, and it will be
+ * done!
+ *
+ * The list is sorted by surname and formatted to allow easy grepping and
+ * beautification by scripts.  The fields are: name (N), email (E), web-address
+ * (W), PGP key ID and fingerprint (P), description (D), and snail-mail address
+ * (S).
+ *
+ * N: Saleem Abdulrasool
+ * E: compnerd@compnerd.org
+ * D: Minor patches and Linux fixes.
+ *
+ * N: Dimitry Andric
+ * E: dimitry@andric.com
+ * D: Visibility fixes, minor FreeBSD portability patches.
+ *
+ * N: Holger Arnold
+ * E: holgerar@gmail.com
+ * D: Minor fix.
+ *
+ * N: Ruben Van Boxem
+ * E: vanboxem dot ruben at gmail dot com
+ * D: Initial Windows patches.
+ *
+ * N: David Chisnall
+ * E: theraven at theravensnest dot org
+ * D: FreeBSD and Solaris ports, libcxxrt support, some atomics work.
+ *
+ * N: Marshall Clow
+ * E: mclow.lists@gmail.com
+ * E: marshall@idio.com
+ * D: C++14 support, patches and bug fixes.
+ *
+ * N: Bill Fisher
+ * E: william.w.fisher@gmail.com
+ * D: Regex bug fixes.
+ *
+ * N: Matthew Dempsky
+ * E: matthew@dempsky.org
+ * D: Minor patches and bug fixes.
+ *
+ * N: Google Inc.
+ * D: Copyright owner and contributor of the CityHash algorithm
+ *
+ * N: Howard Hinnant
+ * E: hhinnant@apple.com
+ * D: Architect and primary author of libc++
+ *
+ * N: Hyeon-bin Jeong
+ * E: tuhertz@gmail.com
+ * D: Minor patches and bug fixes.
+ *
+ * N: Argyrios Kyrtzidis
+ * E: kyrtzidis@apple.com
+ * D: Bug fixes.
+ *
+ * N: Bruce Mitchener, Jr.
+ * E: bruce.mitchener@gmail.com
+ * D: Emscripten-related changes.
+ *
+ * N: Michel Morin
+ * E: mimomorin@gmail.com
+ * D: Minor patches to is_convertible.
+ *
+ * N: Andrew Morrow
+ * E: andrew.c.morrow@gmail.com
+ * D: Minor patches and Linux fixes.
+ *
+ * N: Arvid Picciani
+ * E: aep at exys dot org
+ * D: Minor patches and musl port.
+ *
+ * N: Bjorn Reese
+ * E: breese@users.sourceforge.net
+ * D: Initial regex prototype
+ *
+ * N: Nico Rieck
+ * E: nico.rieck@gmail.com
+ * D: Windows fixes
+ *
+ * N: Jonathan Sauer
+ * D: Minor patches, mostly related to constexpr
+ *
+ * N: Craig Silverstein
+ * E: csilvers@google.com
+ * D: Implemented Cityhash as the string hash function on 64-bit machines
+ *
+ * N: Richard Smith
+ * D: Minor patches.
+ *
+ * N: Joerg Sonnenberger
+ * E: joerg@NetBSD.org
+ * D: NetBSD port.
+ *
+ * N: Stephan Tolksdorf
+ * E: st@quanttec.com
+ * D: Minor <atomic> fix
+ *
+ * N: Michael van der Westhuizen
+ * E: r1mikey at gmail dot com
+ *
+ * N: Klaas de Vries
+ * E: klaas at klaasgaaf dot nl
+ * D: Minor bug fix.
+ *
+ * N: Zhang Xiongpang
+ * E: zhangxiongpang@gmail.com
+ * D: Minor patches and bug fixes.
+ *
+ * N: Xing Xue
+ * E: xingxue@ca.ibm.com
+ * D: AIX port
+ *
+ * N: Zhihao Yuan
+ * E: lichray@gmail.com
+ * D: Standard compatibility fixes.
+ *
+ * N: Jeffrey Yasskin
+ * E: jyasskin@gmail.com
+ * E: jyasskin@google.com
+ * D: Linux fixes.
+ */
diff --git a/kaldi_io/src/kaldi/util/common-utils.h b/kaldi_io/src/kaldi/util/common-utils.h
new file mode 100644
index 0000000..9d39f9d
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/common-utils.h
@@ -0,0 +1,31 @@
+// util/common-utils.h
+
+// Copyright 2009-2011 Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_UTIL_COMMON_UTILS_H_
+#define KALDI_UTIL_COMMON_UTILS_H_
+
+#include "base/kaldi-common.h"
+#include "util/parse-options.h"
+#include "util/kaldi-io.h"
+#include "util/simple-io-funcs.h"
+#include "util/kaldi-holder.h"
+#include "util/kaldi-table.h"
+#include "util/table-types.h"
+#include "util/text-utils.h"
+
+#endif
diff --git a/kaldi_io/src/kaldi/util/const-integer-set-inl.h b/kaldi_io/src/kaldi/util/const-integer-set-inl.h
new file mode 100644
index 0000000..8f92ab2
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/const-integer-set-inl.h
@@ -0,0 +1,88 @@
+// util/const-integer-set-inl.h
+
+// Copyright 2009-2011     Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_UTIL_CONST_INTEGER_SET_INL_H_
+#define KALDI_UTIL_CONST_INTEGER_SET_INL_H_
+
+// Do not include this file directly.  It is included by const-integer-set.h
+
+
+namespace kaldi {
+
+template<class I>
+void ConstIntegerSet<I>::InitInternal() {
+  KALDI_ASSERT_IS_INTEGER_TYPE(I);
+  quick_set_.clear();  // just in case we previously had data.
+  if (slow_set_.size() == 0) {
+    lowest_member_=(I) 1;
+    highest_member_=(I) 0;
+    contiguous_ = false;
+    quick_ = false;
+  } else {
+    lowest_member_ = slow_set_.front();
+    highest_member_ = slow_set_.back();
+    size_t range = highest_member_ + 1 - lowest_member_;
+    if (range == slow_set_.size()) {
+      contiguous_ = true;
+      quick_=false;
+    } else {
+      contiguous_ = false;
+      if (range < slow_set_.size() * 8 * sizeof(I)) {  // If it would be more compact to store as bool
+        // (assuming 1 bit per element)...
+        quick_set_.resize(range, false);
+        for (size_t i = 0;i < slow_set_.size();i++)
+          quick_set_[slow_set_[i] - lowest_member_] = true;
+        quick_ = true;
+      } else {
+        quick_ = false;
+      }
+    }
+  }
+}
+
+template<class I>
+int ConstIntegerSet<I>::count(I i) const {
+  if (i < lowest_member_ || i > highest_member_) return 0;
+  else {
+    if (contiguous_) return true;
+    if (quick_) return (quick_set_[i-lowest_member_] ? 1 : 0);
+    else {
+      bool ans = std::binary_search(slow_set_.begin(), slow_set_.end(), i);
+      return (ans ? 1 : 0);
+    }
+  }
+}
+
+template<class I>
+void ConstIntegerSet<I>::Write(std::ostream &os, bool binary) const {
+  WriteIntegerVector(os, binary, slow_set_);
+}
+
+template<class I>
+void ConstIntegerSet<I>::Read(std::istream &is, bool binary) {
+  ReadIntegerVector(is, binary, &slow_set_);
+  InitInternal();
+}
+
+
+
+} // end namespace kaldi
+
+#endif
diff --git a/kaldi_io/src/kaldi/util/const-integer-set.h b/kaldi_io/src/kaldi/util/const-integer-set.h
new file mode 100644
index 0000000..ffdce4d
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/const-integer-set.h
@@ -0,0 +1,95 @@
+// util/const-integer-set.h
+
+// Copyright 2009-2011     Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_UTIL_CONST_INTEGER_SET_H_
+#define KALDI_UTIL_CONST_INTEGER_SET_H_
+#include <vector>
+#include <set>
+#include <algorithm>
+#include <limits>
+#include <cassert>
+#include "util/stl-utils.h"
+
+  /* ConstIntegerSet is a way to efficiently test whether something is in a
+     supplied set of integers.  It can be initialized from a vector or set, but
+     never changed after that. It either uses a sorted vector or an array of
+     bool, depending on the input.  It behaves like a const version of an STL set, with
+     only a subset of the functionality, except all the member functions are
+     upper-case.
+
+     Note that we could get rid of the member slow_set_, but we'd have to
+     do more work to implement an iterator type.  This would save memory.
+  */
+
+namespace kaldi {
+
+template<class I> class ConstIntegerSet {
+ public:
+  ConstIntegerSet(): lowest_member_(1), highest_member_(0) { }
+
+  void Init(const std::vector<I> &input) {
+    slow_set_ = input;
+    SortAndUniq(&slow_set_);
+    InitInternal();
+  }
+
+  void Init(const std::set<I> &input) {
+    CopySetToVector(input, &slow_set_);
+    InitInternal();
+  }
+
+  explicit ConstIntegerSet(const std::vector<I> &input): slow_set_(input) {
+    SortAndUniq(&slow_set_);
+    InitInternal();
+  }
+  explicit ConstIntegerSet(const std::set<I> &input) {
+    CopySetToVector(input, &slow_set_);
+    InitInternal();
+  }
+  explicit ConstIntegerSet(const ConstIntegerSet<I> &other): slow_set_(other.slow_set_) {
+    InitInternal();
+  }
+
+  int count(I i) const;  // returns 1 or 0.
+
+  typedef typename std::vector<I>::const_iterator iterator;
+  iterator begin() const { return slow_set_.begin(); }
+  iterator end() const { return slow_set_.end(); }
+  size_t size() const { return slow_set_.size(); }
+  bool empty() const { return slow_set_.empty(); }
+
+  void Write(std::ostream &os, bool binary) const;
+  void Read(std::istream &is, bool binary);
+
+ private:
+  I lowest_member_;
+  I highest_member_;
+  bool contiguous_;
+  bool quick_;
+  std::vector<bool> quick_set_;
+  std::vector<I> slow_set_;
+  void InitInternal();
+};
+
+} // end namespace kaldi
+
+#include "const-integer-set-inl.h"
+
+#endif
diff --git a/kaldi_io/src/kaldi/util/edit-distance-inl.h b/kaldi_io/src/kaldi/util/edit-distance-inl.h
new file mode 100644
index 0000000..ebbfb71
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/edit-distance-inl.h
@@ -0,0 +1,189 @@
+// util/edit-distance-inl.h
+
+// Copyright 2009-2011  Microsoft Corporation;  Haihua Xu;  Yanmin Qian
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_UTIL_EDIT_DISTANCE_INL_H_
+#define KALDI_UTIL_EDIT_DISTANCE_INL_H_
+#include "util/stl-utils.h"
+
+
+namespace kaldi {
+
+template<class T>
+int32 LevenshteinEditDistance(const std::vector<T> &a,
+                              const std::vector<T> &b) {
+  // Algorithm:
+  //  write A and B for the sequences, with elements a_0 ..
+  //  let |A| = M and |B| = N be the lengths, and have
+  //  elements a_0 ... a_{M-1} and b_0 ... b_{N-1}.
+  //  We are computing the recursion
+  //     E(m, n) = min(  E(m-1, n-1) + (1-delta(a_{m-1}, b_{n-1})),
+  //                    E(m-1, n),
+  //                    E(m, n-1) ).
+  //  where E(m, n) is defined for m = 0..M and n = 0..N and out-of-
+  //  bounds quantities are considered to be infinity (i.e. the
+  //  recursion does not visit them).
+
+  // We do this computation using a vector e of size N+1.
+  // The outer iterations range over m = 0..M.
+
+  int M = a.size(), N = b.size();
+  std::vector<int32> e(N+1);
+  std::vector<int32> e_tmp(N+1);
+  // initialize e.
+  for (size_t i = 0; i < e.size(); i++)
+    e[i] = i;
+  for (int32 m = 1; m <= M; m++) {
+    // computing E(m, .) from E(m-1, .)
+    // handle special case n = 0:
+    e_tmp[0] = e[0] + 1;
+
+    for (int32 n = 1; n <= N; n++) {
+      int32 term1 = e[n-1] + (a[m-1] == b[n-1] ? 0 : 1);
+      int32 term2 = e[n] + 1;
+      int32 term3 = e_tmp[n-1] + 1;
+      e_tmp[n] = std::min(term1, std::min(term2, term3));
+    }
+    e = e_tmp;
+  }
+  return e.back();
+}
+//
+struct error_stats{
+  int32 ins_num;
+  int32 del_num;
+  int32 sub_num;
+  int32 total_cost;  // minimum total cost to the current alignment.
+};
+// Note that both hyp and ref should not contain noise word in
+// the following implementation.
+
+template<class T>
+int32 LevenshteinEditDistance(const std::vector<T> &ref,
+                              const std::vector<T> &hyp,
+                              int32 *ins, int32 *del, int32 *sub) {
+  // temp sequence to remember error type and stats.
+  std::vector<error_stats> e(ref.size()+1);
+  std::vector<error_stats> cur_e(ref.size()+1);
+  // initialize the first hypothesis aligned to the reference at each
+  // position:[hyp_index =0][ref_index]
+  for (size_t i =0; i < e.size(); i ++) {
+    e[i].ins_num = 0;
+    e[i].sub_num = 0;
+    e[i].del_num = i;
+    e[i].total_cost = i;
+  }
+
+ // for other alignments
+ for (size_t hyp_index = 1; hyp_index <= hyp.size(); hyp_index ++) {
+   cur_e[0] = e[0];
+   cur_e[0].ins_num ++;
+   cur_e[0].total_cost ++;
+   for (size_t ref_index = 1; ref_index <= ref.size(); ref_index ++) {
+
+     int32 ins_err = e[ref_index].total_cost + 1;
+     int32 del_err = cur_e[ref_index-1].total_cost + 1;
+     int32 sub_err = e[ref_index-1].total_cost;
+      if (hyp[hyp_index-1] != ref[ref_index-1])
+       sub_err ++;
+
+     if (sub_err < ins_err && sub_err < del_err) {
+        cur_e[ref_index] =e[ref_index-1];
+        if (hyp[hyp_index-1] != ref[ref_index-1])
+          cur_e[ref_index].sub_num ++;   // substitution error should be increased
+        cur_e[ref_index].total_cost = sub_err;
+     }else if (del_err < ins_err ) {
+        cur_e[ref_index] = cur_e[ref_index-1];
+        cur_e[ref_index].total_cost = del_err;
+        cur_e[ref_index].del_num ++;    // deletion number is increased.
+     }else{
+        cur_e[ref_index] = e[ref_index];
+        cur_e[ref_index].total_cost = ins_err;
+        cur_e[ref_index].ins_num ++;    // insertion number is increased.
+     }
+   }
+   e = cur_e;  // alternate for the next recursion.
+ }
+  size_t ref_index = e.size()-1;
+  *ins = e[ref_index].ins_num, *del = e[ref_index].del_num, *sub = e[ref_index].sub_num;
+  return e[ref_index].total_cost;
+}
+
+template<class T>
+int32 LevenshteinAlignment(const std::vector<T> &a,
+                           const std::vector<T> &b,
+                           T eps_symbol,
+                           std::vector<std::pair<T, T> > *output) {
+  // Check inputs:
+  {
+    KALDI_ASSERT(output != NULL);
+    for (size_t i = 0; i < a.size(); i++) KALDI_ASSERT(a[i] != eps_symbol);
+    for (size_t i = 0; i < b.size(); i++) KALDI_ASSERT(b[i] != eps_symbol);
+  }
+  output->clear();
+  // This is very memory-inefficiently implemented using a vector of vectors.
+  size_t M = a.size(), N = b.size();
+  size_t m, n;
+  std::vector<std::vector<int32> > e(M+1);
+  for (m = 0; m <=M; m++) e[m].resize(N+1);
+  for (n = 0; n <= N; n++)
+    e[0][n]  = n;
+  for (m = 1; m <= M; m++) {
+    e[m][0] = e[m-1][0] + 1;
+    for (n = 1; n <= N; n++) {
+      int32 sub_or_ok = e[m-1][n-1] + (a[m-1] == b[n-1] ? 0 : 1);
+      int32 del = e[m-1][n] + 1;  // assumes a == ref, b == hyp.
+      int32 ins = e[m][n-1] + 1;
+      e[m][n] = std::min(sub_or_ok, std::min(del, ins));
+    }
+  }
+  // get time-reversed output first: trace back.
+  m = M; n = N;
+  while (m != 0 || n != 0) {
+    size_t last_m, last_n;
+    if (m == 0) { last_m = m; last_n = n-1; }
+    else if (n == 0) { last_m = m-1; last_n = n; }
+    else {
+      int32 sub_or_ok = e[m-1][n-1] + (a[m-1] == b[n-1] ? 0 : 1);
+      int32 del = e[m-1][n] + 1;  // assumes a == ref, b == hyp.
+      int32 ins = e[m][n-1] + 1;
+      if (sub_or_ok <= std::min(del, ins)) {  // choose sub_or_ok if all else equal.
+        last_m = m-1; last_n = n-1;
+      } else {
+        if (del <= ins) {  // choose del over ins if equal.
+          last_m = m-1; last_n = n;
+        } else {
+          last_m = m; last_n = n-1;
+        }
+      }
+    }
+    T a_sym, b_sym;
+    a_sym = (last_m == m ? eps_symbol : a[last_m]);
+    b_sym = (last_n == n ? eps_symbol : b[last_n]);
+    output->push_back(std::make_pair(a_sym, b_sym));
+    m = last_m;
+    n = last_n;
+  }
+  ReverseVector(output);
+  return e[M][N];
+}
+
+
+}  // end namespace kaldi
+
+#endif // KALDI_UTIL_EDIT_DISTANCE_INL_H_
diff --git a/kaldi_io/src/kaldi/util/edit-distance.h b/kaldi_io/src/kaldi/util/edit-distance.h
new file mode 100644
index 0000000..6000622
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/edit-distance.h
@@ -0,0 +1,63 @@
+// util/edit-distance.h
+
+// Copyright 2009-2011     Microsoft Corporation;  Haihua Xu
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_UTIL_EDIT_DISTANCE_H_
+#define KALDI_UTIL_EDIT_DISTANCE_H_
+#include <vector>
+#include <set>
+#include <algorithm>
+#include <limits>
+#include <cassert>
+#include "base/kaldi-types.h"
+
+namespace kaldi {
+
+// Compute the edit-distance between two strings.
+template<class T>
+int32 LevenshteinEditDistance(const std::vector<T> &a,
+                              const std::vector<T> &b);
+
+
+// edit distance calculation with conventional method.
+// note: noise word must be filtered out from the hypothesis and reference sequence
+// before the following procedure conducted.
+template<class T>
+int32 LevenshteinEditDistance(const std::vector<T> &ref,
+                              const std::vector<T> &hyp,
+                              int32 *ins, int32 *del, int32 *sub);
+
+// This version of the edit-distance computation outputs the alignment
+// between the two.  This is a vector of pairs of (symbol a, symbol b).
+// The epsilon symbol (eps_symbol) must not occur in sequences a or b.
+// Where one aligned to no symbol in the other (insertion or deletion),
+// epsilon will be the corresponding member of the pair.
+// It returns the edit-distance between the two strings.
+
+template<class T>
+int32 LevenshteinAlignment(const std::vector<T> &a,
+                           const std::vector<T> &b,
+                           T eps_symbol,
+                           std::vector<std::pair<T, T> > *output);
+
+} // end namespace kaldi
+
+#include "edit-distance-inl.h"
+
+#endif
diff --git a/kaldi_io/src/kaldi/util/hash-list-inl.h b/kaldi_io/src/kaldi/util/hash-list-inl.h
new file mode 100644
index 0000000..19c2bb6
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/hash-list-inl.h
@@ -0,0 +1,183 @@
+// util/hash-list-inl.h
+
+// Copyright 2009-2011   Microsoft Corporation
+//                2013   Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_UTIL_HASH_LIST_INL_H_
+#define KALDI_UTIL_HASH_LIST_INL_H_
+
+// Do not include this file directly.  It is included by fast-hash.h
+
+
+namespace kaldi {
+
+template<class I, class T> HashList<I, T>::HashList() {
+  list_head_ = NULL;
+  bucket_list_tail_ = static_cast<size_t>(-1);  // invalid.
+  hash_size_ = 0;
+  freed_head_ = NULL;
+}
+
+template<class I, class T> void HashList<I, T>::SetSize(size_t size) {
+  hash_size_ = size;
+  KALDI_ASSERT(list_head_ == NULL && bucket_list_tail_ == static_cast<size_t>(-1));  // make sure empty.
+  if (size > buckets_.size())
+    buckets_.resize(size, HashBucket(0, NULL));
+}
+
+template<class I, class T>
+typename HashList<I, T>::Elem* HashList<I, T>::Clear() {
+  // Clears the hashtable and gives ownership of the currently contained list to the
+  // user.
+  for (size_t cur_bucket = bucket_list_tail_;
+      cur_bucket != static_cast<size_t>(-1);
+      cur_bucket = buckets_[cur_bucket].prev_bucket) {
+    buckets_[cur_bucket].last_elem = NULL;  // this is how we indicate "empty".
+  }
+  bucket_list_tail_ = static_cast<size_t>(-1);
+  Elem *ans = list_head_;
+  list_head_ = NULL;
+  return ans;
+}
+
+template<class I, class T>
+const typename HashList<I, T>::Elem* HashList<I, T>::GetList() const {
+  return list_head_;
+}
+
+template<class I, class T>
+inline void HashList<I, T>::Delete(Elem *e) {
+  e->tail = freed_head_;
+  freed_head_ = e;
+}
+
+template<class I, class T>
+inline typename HashList<I, T>::Elem* HashList<I, T>::Find(I key) {
+  size_t index = (static_cast<size_t>(key) % hash_size_);
+  HashBucket &bucket = buckets_[index];
+  if (bucket.last_elem == NULL) {
+    return NULL;  // empty bucket.
+  } else {
+    Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1) ?
+                  list_head_ :
+                  buckets_[bucket.prev_bucket].last_elem->tail),
+        *tail = bucket.last_elem->tail;
+    for (Elem *e = head; e != tail; e = e->tail)
+      if (e->key == key) return e;
+    return NULL;  // Not found.
+  }
+}
+
+template<class I, class T>
+inline typename HashList<I, T>::Elem* HashList<I, T>::New() {
+  if (freed_head_) {
+    Elem *ans = freed_head_;
+    freed_head_ = freed_head_->tail;
+    return ans;
+  } else {
+    Elem *tmp = new Elem[allocate_block_size_];
+    for (size_t i = 0; i+1 < allocate_block_size_; i++)
+      tmp[i].tail = tmp+i+1;
+    tmp[allocate_block_size_-1].tail = NULL;
+    freed_head_ = tmp;
+    allocated_.push_back(tmp);
+    return this->New();
+  }
+}
+
+template<class I, class T>
+HashList<I, T>::~HashList() {
+  // First test whether we had any memory leak within the
+  // HashList, i.e. things for which the user did not call Delete().
+  size_t num_in_list = 0, num_allocated = 0;
+  for (Elem *e = freed_head_; e != NULL; e = e->tail)
+    num_in_list++;
+  for (size_t i = 0; i < allocated_.size(); i++) {
+    num_allocated += allocate_block_size_;
+    delete[] allocated_[i];
+  }
+  if (num_in_list != num_allocated) {
+    KALDI_WARN << "Possible memory leak: " << num_in_list
+               << " != " << num_allocated
+               << ": you might have forgotten to call Delete on "
+               << "some Elems";
+  }
+}
+
+
+template<class I, class T>
+void HashList<I, T>::Insert(I key, T val) {
+  size_t index = (static_cast<size_t>(key) % hash_size_);
+  HashBucket &bucket = buckets_[index];
+  Elem *elem = New();
+  elem->key = key;
+  elem->val = val;
+
+  if (bucket.last_elem == NULL) {  // Unoccupied bucket.  Insert at
+    // head of bucket list (which is tail of regular list, they go in
+    // opposite directions).
+    if (bucket_list_tail_ == static_cast<size_t>(-1)) {
+      // list was empty so this is the first elem.
+      KALDI_ASSERT(list_head_ == NULL);
+      list_head_ = elem;
+    } else {
+      // link in to the chain of Elems
+      buckets_[bucket_list_tail_].last_elem->tail = elem;
+    }
+    elem->tail = NULL;
+    bucket.last_elem = elem;
+    bucket.prev_bucket = bucket_list_tail_;
+    bucket_list_tail_ = index;
+  } else {
+    // Already-occupied bucket.  Insert at tail of list of elements within
+    // the bucket.
+    elem->tail = bucket.last_elem->tail;
+    bucket.last_elem->tail = elem;
+    bucket.last_elem = elem;
+  }
+}
+
+template<class I, class T>
+void HashList<I, T>::InsertMore(I key, T val) {
+  size_t index = (static_cast<size_t>(key) % hash_size_);
+  HashBucket &bucket = buckets_[index];
+  Elem *elem = New();
+  elem->key = key;
+  elem->val = val;
+
+  KALDI_ASSERT(bucket.last_elem != NULL); // we assume there is already one element
+  if (bucket.last_elem->key == key) { // standard behavior: add as last element
+    elem->tail = bucket.last_elem->tail;
+    bucket.last_elem->tail = elem;
+    bucket.last_elem = elem;
+    return;
+  } 
+  Elem *e = (bucket.prev_bucket == static_cast<size_t>(-1) ?
+             list_head_ : buckets_[bucket.prev_bucket].last_elem->tail);
+  // find place to insert in linked list 
+  while (e != bucket.last_elem->tail && e->key != key) e = e->tail;
+  KALDI_ASSERT(e->key == key); // not found? - should not happen
+  elem->tail = e->tail;
+  e->tail = elem;
+}
+
+
+} // end namespace kaldi
+
+#endif
diff --git a/kaldi_io/src/kaldi/util/hash-list.h b/kaldi_io/src/kaldi/util/hash-list.h
new file mode 100644
index 0000000..4524759
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/hash-list.h
@@ -0,0 +1,140 @@
+// util/hash-list.h
+
+// Copyright 2009-2011   Microsoft Corporation
+//                2013   Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_UTIL_HASH_LIST_H_
+#define KALDI_UTIL_HASH_LIST_H_
+#include <vector>
+#include <set>
+#include <algorithm>
+#include <limits>
+#include <cassert>
+#include "util/stl-utils.h"
+
+
+/* This header provides utilities for a structure that's used in a decoder (but
+   is quite generic in nature so we implement and test it separately).
+   Basically it's a singly-linked list, but implemented in such a way that we
+   can quickly search for elements in the list.  We give it a slightly richer
+   interface than just a hash and a list.  The idea is that we want to separate
+   the hash part and the list part: basically, in the decoder, we want to have a
+   single hash for the current frame and the next frame, because by the time we
+   need to access the hash for the next frame we no longer need the hash for the
+   previous frame.  So we have an operation that clears the hash but leaves the
+   list structure intact.  We also control memory management inside this object,
+   to avoid repeated new's/deletes.
+
+   See hash-list-test.cc for an example of how to use this object.
+*/
+
+
+namespace kaldi {
+
+template<class I, class T> class HashList {
+
+ public:
+  struct Elem {
+    I key;
+    T val;
+    Elem *tail;
+  };
+
+  /// Constructor takes no arguments.  Call SetSize to inform it of the likely size.
+  HashList();
+
+  /// Clears the hash and gives the head of the current list to the user;
+  /// ownership is transferred to the user (the user must call Delete()
+  /// for each element in the list, at his/her leisure).
+  Elem *Clear();
+
+  /// Gives the head of the current list to the user.  Ownership retained in the
+  /// class.  Caution: in December 2013 the return type was changed to const Elem*
+  /// and this function was made const.  You may need to change some types of
+  /// local Elem* variables to const if this produces compilation errors.
+  const Elem *GetList() const;
+
+  /// Think of this like delete().  It is to be called for each Elem in turn
+  /// after you "obtained ownership" by doing Clear().  This is not the opposite of
+  /// Insert, it is the opposite of New.  It's really a memory operation.
+  inline void Delete(Elem *e);
+
+  /// This should probably not be needed to be called directly by the user.  Think of it as opposite
+  /// to Delete();
+  inline Elem *New();
+
+  /// Find tries to find this element in the current list using the hashtable.
+  /// It returns NULL if not present.  The Elem it returns is not owned by the user,
+  /// it is part of the internal list owned by this object, but the user is
+  /// free to modify the "val" element.
+  inline Elem *Find(I key);
+  
+  /// Insert inserts a new element into the hashtable/stored list.  By calling this,
+  /// the user asserts that it is not already present (e.g. Find was called and
+  /// returned NULL).  With current code, calling this if an element already exists will
+  /// result in duplicate elements in the structure, and Find() will find the
+  /// first one that was added.  [but we don't guarantee this behavior].
+  inline void Insert(I key, T val);
+
+  /// Insert inserts another element with same key into the hashtable/stored list.
+  /// By calling this, the user asserts that one element with that key is already present.
+  /// We insert it that way, that all elements with the same key follow each other.
+  /// Find() will return the first one of the elements with the same key.
+  inline void InsertMore(I key, T val);
+
+  /// SetSize tells the object how many hash buckets to allocate (should typically be
+  /// at least twice the number of objects we expect to go in the structure, for fastest
+  /// performance).  It must be called while the hash is empty (e.g. after Clear() or
+  /// after initializing the object, but before adding anything to the hash.
+  void SetSize(size_t sz);
+
+  /// Returns current number of hash buckets.
+  inline size_t Size() { return hash_size_; }
+
+  ~HashList();
+ private:
+
+  struct HashBucket {
+    size_t prev_bucket;  // index to next bucket (-1 if list tail).  Note: list of buckets
+    // goes in opposite direction to list of Elems.
+    Elem *last_elem;  // pointer to last element in this bucket (NULL if empty)
+    inline HashBucket(size_t i, Elem *e): prev_bucket(i), last_elem(e) {}
+  };
+
+  Elem *list_head_;  // head of currently stored list.
+  size_t bucket_list_tail_;  // tail of list of active hash buckets.
+
+  size_t hash_size_;  // number of hash buckets.
+
+  std::vector<HashBucket> buckets_;
+
+  Elem *freed_head_;  // head of list of currently freed elements. [ready for allocation]
+
+  std::vector<Elem*> allocated_;  // list of allocated blocks.
+
+  static const size_t allocate_block_size_ = 1024;  // Number of Elements to allocate in one block.  Must be
+  // largish so storing allocated_ doesn't become a problem.
+};
+
+
+} // end namespace kaldi
+
+#include "hash-list-inl.h"
+
+#endif
diff --git a/kaldi_io/src/kaldi/util/kaldi-holder-inl.h b/kaldi_io/src/kaldi/util/kaldi-holder-inl.h
new file mode 100644
index 0000000..6a66e61
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/kaldi-holder-inl.h
@@ -0,0 +1,800 @@
+// util/kaldi-holder-inl.h
+
+// Copyright 2009-2011     Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_UTIL_KALDI_HOLDER_INL_H_
+#define KALDI_UTIL_KALDI_HOLDER_INL_H_
+
+#include <algorithm>
+#include "util/kaldi-io.h"
+#include "util/text-utils.h"
+#include "matrix/kaldi-matrix.h"
+
+namespace kaldi {
+
+/// \addtogroup holders
+/// @{
+
+
+// KaldiObjectHolder is valid only for Kaldi objects with
+// copy constructors, default constructors, and "normal"
+// Kaldi Write and Read functions.  E.g. it works for
+// Matrix and Vector.
+template<class KaldiType> class KaldiObjectHolder {
+ public:
+  typedef KaldiType T;
+
+  KaldiObjectHolder(): t_(NULL) { }
+
+  static bool Write(std::ostream &os, bool binary, const T &t) {
+    InitKaldiOutputStream(os, binary);  // Puts binary header if binary mode.
+    try {
+      t.Write(os, binary);
+      return os.good();
+    } catch (const std::exception &e) {
+      KALDI_WARN << "Exception caught writing Table object: " << e.what();
+      if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
+      return false;  // Write failure.
+    }
+  }
+
+  void Clear() {
+    if (t_) {
+      delete t_;
+      t_ = NULL;
+    }
+  }
+
+  // Reads into the holder.
+  bool Read(std::istream &is) {
+    if (t_) delete t_;
+    t_ = new T;
+    // Don't want any existing state to complicate the read functioN: get new object.
+    bool is_binary;
+    if (!InitKaldiInputStream(is, &is_binary)) {
+      KALDI_WARN << "Reading Table object, failed reading binary header\n";
+      return false;
+    }
+    try {
+      t_->Read(is, is_binary);
+      return true;
+    } catch (std::exception &e) {
+      KALDI_WARN << "Exception caught reading Table object ";
+      if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
+      delete t_;
+      t_ = NULL;
+      return false;
+    }
+  }
+
+  // Kaldi objects always have the stream open in binary mode for
+  // reading.
+  static bool IsReadInBinary() { return true; }
+
+  const T &Value() const {
+    // code error if !t_.
+    if (!t_) KALDI_ERR << "KaldiObjectHolder::Value() called wrongly.";
+    return *t_;
+  }
+
+  ~KaldiObjectHolder() { if (t_) delete t_; }
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(KaldiObjectHolder);
+  T *t_;
+};
+
+
+// BasicHolder is valid for float, double, bool, and integer
+// types.  There will be a compile time error otherwise, because
+// we make sure that the {Write, Read}BasicType functions do not
+// get instantiated for other types.
+
+template<class BasicType> class BasicHolder {
+ public:
+  typedef BasicType T;
+
+  BasicHolder(): t_(static_cast<T>(-1)) { }
+
+  static bool Write(std::ostream &os, bool binary, const T &t) {
+    InitKaldiOutputStream(os, binary);  // Puts binary header if binary mode.
+    try {
+      WriteBasicType(os, binary, t);
+      if (!binary) os << '\n';  // Makes output format more readable and
+      // easier to manipulate.
+      return os.good();
+    } catch (const std::exception &e) {
+      KALDI_WARN << "Exception caught writing Table object: " << e.what();
+      if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
+      return false;  // Write failure.
+    }
+  }
+
+  void Clear() { }
+
+  // Reads into the holder.
+  bool Read(std::istream &is) {
+    bool is_binary;
+    if (!InitKaldiInputStream(is, &is_binary)) {
+      KALDI_WARN << "Reading Table object [integer type], failed reading binary header\n";
+      return false;
+    }
+    try {
+      int c;
+      if (!is_binary) {  // This is to catch errors, the class would work without it..
+        // Eat up any whitespace and make sure it's not newline.
+        while (isspace((c = is.peek())) && c != static_cast<int>('\n')) is.get();
+        if (is.peek() == '\n') {
+          KALDI_WARN << "Found newline but expected basic type.";
+          return false;  // This is just to catch a more-
+          // likely-than average type of error (empty line before the token), since
+          // ReadBasicType will eat it up.
+        }
+      }
+
+      ReadBasicType(is, is_binary, &t_);
+
+      if (!is_binary) {  // This is to catch errors, the class would work without it..
+        // make sure there is a newline.
+        while (isspace((c = is.peek())) && c != static_cast<int>('\n')) is.get();
+        if (is.peek() != '\n') {
+          KALDI_WARN << "BasicHolder::Read, expected newline, got "
+                     << CharToString(is.peek()) << ", position " << is.tellg();
+          return false;
+        }
+        is.get();  // Consume the newline.
+      }
+      return true;
+    } catch (std::exception &e) {
+      KALDI_WARN << "Exception caught reading Table object";
+      if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
+      return false;
+    }
+  }
+
+  // Objects read/written with the Kaldi I/O functions always have the stream
+  // open in binary mode for reading.
+  static bool IsReadInBinary() { return true; }
+
+  const T &Value() const {
+    return t_;
+  }
+
+  ~BasicHolder() { }
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(BasicHolder);
+
+  T t_;
+};
+
+
+/// A Holder for a vector of basic types, e.g.
+/// std::vector<int32>, std::vector<float>, and so on.
+/// Note: a basic type is defined as a type for which ReadBasicType
+/// and WriteBasicType are implemented, i.e. integer and floating
+/// types, and bool.
+template<class BasicType> class BasicVectorHolder {
+ public:
+  typedef std::vector<BasicType> T;
+
+  BasicVectorHolder() { }
+
+  static bool Write(std::ostream &os, bool binary, const T &t) {
+    InitKaldiOutputStream(os, binary);  // Puts binary header if binary mode.
+    try {
+      if (binary) {  // need to write the size, in binary mode.
+        KALDI_ASSERT(static_cast<size_t>(static_cast<int32>(t.size())) == t.size());
+        // Or this Write routine cannot handle such a large vector.
+        // use int32 because it's fixed size regardless of compilation.
+        // change to int64 (plus in Read function) if this becomes a problem.
+        WriteBasicType(os, binary, static_cast<int32>(t.size()));
+        for (typename std::vector<BasicType>::const_iterator iter = t.begin();
+            iter != t.end(); ++iter)
+          WriteBasicType(os, binary, *iter);
+
+      } else {
+        for (typename std::vector<BasicType>::const_iterator iter = t.begin();
+            iter != t.end(); ++iter)
+          WriteBasicType(os, binary, *iter);
+        os << '\n';  // Makes output format more readable and
+        // easier to manipulate.  In text mode, this function writes something like
+        // "1 2 3\n".
+      }
+      return os.good();
+    } catch (const std::exception &e) {
+      KALDI_WARN << "Exception caught writing Table object (BasicVector). ";
+      if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
+      return false;  // Write failure.
+    }
+  }
+
+  void Clear() { t_.clear(); }
+
+  // Reads into the holder.
+  bool Read(std::istream &is) {
+    t_.clear();
+    bool is_binary;
+    if (!InitKaldiInputStream(is, &is_binary)) {
+      KALDI_WARN << "Reading Table object [integer type], failed reading binary header\n";
+      return false;
+    }
+    if (!is_binary) {
+      // In text mode, we terminate with newline.
+      std::string line;
+      getline(is, line);  // this will discard the \n, if present.
+      if (is.fail()) {
+        KALDI_WARN << "BasicVectorHolder::Read, error reading line " << (is.eof() ? "[eof]" : "");
+        return false;  // probably eof.  fail in any case.
+      }
+      std::istringstream line_is(line);
+      try {
+        while (1) {
+          line_is >> std::ws;  // eat up whitespace.
+          if (line_is.eof()) break;
+          BasicType bt;
+          ReadBasicType(line_is, false, &bt);
+          t_.push_back(bt);
+        }
+        return true;
+      } catch(std::exception &e) {
+        KALDI_WARN << "BasicVectorHolder::Read, could not interpret line: " << line;
+        if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
+        return false;
+      }
+    } else {  // binary mode.
+      size_t filepos = is.tellg();
+      try {
+        int32 size;
+        ReadBasicType(is, true, &size);
+        t_.resize(size);
+        for (typename std::vector<BasicType>::iterator iter = t_.begin();
+            iter != t_.end();
+            ++iter) {
+          ReadBasicType(is, true, &(*iter));
+        }
+        return true;
+      } catch (...) {
+        KALDI_WARN << "BasicVectorHolder::Read, read error or unexpected data at archive entry beginning at file position " << filepos;
+        return false;
+      }
+    }
+  }
+
+  // Objects read/written with the Kaldi I/O functions always have the stream
+  // open in binary mode for reading.
+  static bool IsReadInBinary() { return true; }
+
+  const T &Value() const {  return t_; }
+
+  ~BasicVectorHolder() { }
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(BasicVectorHolder);
+  T t_;
+};
+
+
+/// BasicVectorVectorHolder is a Holder for a vector of vector of
+/// a basic type, e.g. std::vector<std::vector<int32> >.
+/// Note: a basic type is defined as a type for which ReadBasicType
+/// and WriteBasicType are implemented, i.e. integer and floating
+/// types, and bool.
+template<class BasicType> class BasicVectorVectorHolder {
+ public:
+  typedef std::vector<std::vector<BasicType> > T;
+
+  BasicVectorVectorHolder() { }
+
+  static bool Write(std::ostream &os, bool binary, const T &t) {
+    InitKaldiOutputStream(os, binary);  // Puts binary header if binary mode.
+    try {
+      if (binary) {  // need to write the size, in binary mode.
+        KALDI_ASSERT(static_cast<size_t>(static_cast<int32>(t.size())) == t.size());
+        // Or this Write routine cannot handle such a large vector.
+        // use int32 because it's fixed size regardless of compilation.
+        // change to int64 (plus in Read function) if this becomes a problem.
+        WriteBasicType(os, binary, static_cast<int32>(t.size()));
+        for (typename std::vector<std::vector<BasicType> >::const_iterator iter = t.begin();
+            iter != t.end(); ++iter) {
+          KALDI_ASSERT(static_cast<size_t>(static_cast<int32>(iter->size())) == iter->size());
+          WriteBasicType(os, binary, static_cast<int32>(iter->size()));
+          for (typename std::vector<BasicType>::const_iterator iter2=iter->begin();
+              iter2 != iter->end(); ++iter2) {
+            WriteBasicType(os, binary, *iter2);
+          }
+        }
+      } else {  // text mode...
+        // In text mode, we write out something like (for integers):
+        // "1 2 3 ; 4 5 ; 6 ; ; 7 8 9 ;\n"
+        // where the semicolon is a terminator, not a separator
+        // (a separator would cause ambiguity between an
+        // empty list, and a list containing a single empty list).
+        for (typename std::vector<std::vector<BasicType> >::const_iterator iter = t.begin();
+            iter != t.end();
+             ++iter) {
+          for (typename std::vector<BasicType>::const_iterator iter2=iter->begin();
+               iter2 != iter->end(); ++iter2)
+            WriteBasicType(os, binary, *iter2);
+          os << "; ";
+        }
+        os << '\n';
+      }
+      return os.good();
+    } catch (const std::exception &e) {
+      KALDI_WARN << "Exception caught writing Table object. ";
+      if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
+      return false;  // Write failure.
+    }
+  }
+
+  void Clear() { t_.clear(); }
+
+  // Reads into the holder.
+  bool Read(std::istream &is) {
+    t_.clear();
+    bool is_binary;
+    if (!InitKaldiInputStream(is, &is_binary)) {
+      KALDI_WARN << "Failed reading binary header\n";
+      return false;
+    }
+    if (!is_binary) {
+      // In text mode, we terminate with newline.
+      try {  // catching errors from ReadBasicType..
+        std::vector<BasicType> v;  // temporary vector
+        while (1) {
+          int i = is.peek();
+          if (i == -1) {
+            KALDI_WARN << "Unexpected EOF";
+            return false;
+          } else if (static_cast<char>(i) == '\n') {
+            if (!v.empty()) {
+              KALDI_WARN << "No semicolon before newline (wrong format)";
+              return false;
+            } else { is.get(); return true; }
+          } else if (std::isspace(i)) {
+            is.get();
+          } else if (static_cast<char>(i) == ';') {
+            t_.push_back(v);
+            v.clear();
+            is.get();
+          } else {  // some object we want to read...
+            BasicType b;
+            ReadBasicType(is, false, &b);  // throws on error.
+            v.push_back(b);
+          }
+        }
+      } catch(std::exception &e) {
+        KALDI_WARN << "BasicVectorVectorHolder::Read, read error";
+        if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
+        return false;
+      }
+    } else {  // binary mode.
+      size_t filepos = is.tellg();
+      try {
+        int32 size;
+        ReadBasicType(is, true, &size);
+        t_.resize(size);
+        for (typename std::vector<std::vector<BasicType> >::iterator iter = t_.begin();
+            iter != t_.end();
+            ++iter) {
+          int32 size2;
+          ReadBasicType(is, true, &size2);
+          iter->resize(size2);
+          for (typename std::vector<BasicType>::iterator iter2 = iter->begin();
+              iter2 != iter->end();
+              ++iter2)
+            ReadBasicType(is, true, &(*iter2));
+        }
+        return true;
+      } catch (...) {
+        KALDI_WARN << "Read error or unexpected data at archive entry beginning at file position " << filepos;
+        return false;
+      }
+    }
+  }
+
+  // Objects read/written with the Kaldi I/O functions always have the stream
+  // open in binary mode for reading.
+  static bool IsReadInBinary() { return true; }
+
+  const T &Value() const {  return t_; }
+
+  ~BasicVectorVectorHolder() { }
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(BasicVectorVectorHolder);
+  T t_;
+};
+
+
+/// BasicPairVectorHolder is a Holder for a vector of pairs of
+/// a basic type, e.g. std::vector<std::pair<int32> >.
+/// Note: a basic type is defined as a type for which ReadBasicType
+/// and WriteBasicType are implemented, i.e. integer and floating
+/// types, and bool.
+template<class BasicType> class BasicPairVectorHolder {
+ public:
+  typedef std::vector<std::pair<BasicType, BasicType> > T;
+
+  BasicPairVectorHolder() { }
+  
+  static bool Write(std::ostream &os, bool binary, const T &t) {
+    InitKaldiOutputStream(os, binary);  // Puts binary header if binary mode.
+    try {
+      if (binary) {  // need to write the size, in binary mode.
+        KALDI_ASSERT(static_cast<size_t>(static_cast<int32>(t.size())) == t.size());
+        // Or this Write routine cannot handle such a large vector.
+        // use int32 because it's fixed size regardless of compilation.
+        // change to int64 (plus in Read function) if this becomes a problem.
+        WriteBasicType(os, binary, static_cast<int32>(t.size()));
+        for (typename T::const_iterator iter = t.begin();
+            iter != t.end(); ++iter) {
+          WriteBasicType(os, binary, iter->first);
+          WriteBasicType(os, binary, iter->second);
+        }
+      } else {  // text mode...
+        // In text mode, we write out something like (for integers):
+        // "1 2 ; 4 5 ; 6 7 ; 8 9 \n"
+        // where the semicolon is a separator, not a terminator.
+        for (typename T::const_iterator iter = t.begin();
+             iter != t.end();) {
+          WriteBasicType(os, binary, iter->first);
+          WriteBasicType(os, binary, iter->second);
+          ++iter;
+          if (iter != t.end())
+            os << "; ";
+        }
+        os << '\n';
+      }
+      return os.good();
+    } catch (const std::exception &e) {
+      KALDI_WARN << "Exception caught writing Table object. ";
+      if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
+      return false;  // Write failure.
+    }
+  }
+  
+  void Clear() { t_.clear(); }
+
+  // Reads into the holder.
+  bool Read(std::istream &is) {
+    t_.clear();
+    bool is_binary;
+    if (!InitKaldiInputStream(is, &is_binary)) {
+      KALDI_WARN << "Reading Table object [integer type], failed reading binary header\n";
+      return false;
+    }
+    if (!is_binary) {
+      // In text mode, we terminate with newline.
+      try {  // catching errors from ReadBasicType..
+        std::vector<BasicType> v;  // temporary vector
+        while (1) {
+          int i = is.peek();
+          if (i == -1) {
+            KALDI_WARN << "Unexpected EOF";
+            return false;
+          } else if (static_cast<char>(i) == '\n') {
+            if (t_.empty() && v.empty()) {
+              is.get();
+              return true;
+            } else if (v.size() == 2) {
+              t_.push_back(std::make_pair(v[0], v[1]));
+              is.get();
+              return true;
+            } else {
+              KALDI_WARN << "Unexpected newline, reading vector<pair<?> >; got "
+                         << v.size() << " elements, expected 2.";
+              return false;
+            }
+          } else if (std::isspace(i)) {
+            is.get();
+          } else if (static_cast<char>(i) == ';') {
+            if (v.size() != 2) {
+              KALDI_WARN << "Wrong input format, reading vector<pair<?> >; got "
+                         << v.size() << " elements, expected 2.";
+              return false;
+            }
+            t_.push_back(std::make_pair(v[0], v[1]));
+            v.clear();
+            is.get();
+          } else {  // some object we want to read...
+            BasicType b;
+            ReadBasicType(is, false, &b);  // throws on error.
+            v.push_back(b);
+          }
+        }
+      } catch(std::exception &e) {
+        KALDI_WARN << "BasicPairVectorHolder::Read, read error";
+        if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
+        return false;
+      }
+    } else {  // binary mode.
+      size_t filepos = is.tellg();
+      try {
+        int32 size;
+        ReadBasicType(is, true, &size);
+        t_.resize(size);
+        for (typename T::iterator iter = t_.begin();
+            iter != t_.end();
+            ++iter) {
+          ReadBasicType(is, true, &(iter->first));
+          ReadBasicType(is, true, &(iter->second));
+        }
+        return true;
+      } catch (...) {
+        KALDI_WARN << "BasicVectorHolder::Read, read error or unexpected data at archive entry beginning at file position " << filepos;
+        return false;
+      }
+    }
+  }
+
+  // Objects read/written with the Kaldi I/O functions always have the stream
+  // open in binary mode for reading.
+  static bool IsReadInBinary() { return true; }
+
+  const T &Value() const {  return t_; }
+
+  ~BasicPairVectorHolder() { }
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(BasicPairVectorHolder);
+  T t_;
+};
+
+
+
+
+// We define a Token as a nonempty, printable, whitespace-free std::string.
+// The binary and text formats here are the same (newline-terminated)
+// and as such we don't bother with the binary-mode headers.
+class TokenHolder {
+ public:
+  typedef std::string T;
+
+  TokenHolder() {}
+
+  static bool Write(std::ostream &os, bool, const T &t) {  // ignore binary-mode.
+    KALDI_ASSERT(IsToken(t));
+    os << t << '\n';
+    return os.good();
+  }
+
+  void Clear() { t_.clear(); }
+
+  // Reads into the holder.
+  bool Read(std::istream &is) {
+    is >> t_;
+    if (is.fail()) return false;
+    char c;
+    while (isspace(c = is.peek()) && c!= '\n') is.get();
+    if (is.peek() != '\n') {
+      KALDI_ERR << "TokenHolder::Read, expected newline, got char " << CharToString(is.peek())
+                << ", at stream pos " << is.tellg();
+      return false;
+    }
+    is.get();  // get '\n'
+    return true;
+  }
+
+
+  // Since this is fundamentally a text format, read in text mode (would work
+  // fine either way, but doing it this way will exercise more of the code).
+  static bool IsReadInBinary() { return false; }
+
+  const T &Value() const { return t_; }
+
+  ~TokenHolder() { }
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(TokenHolder);
+  T t_;
+};
+
+// A Token is a nonempty, whitespace-free std::string.
+// Class TokenVectorHolder is a Holder class for vectors of these.
+class TokenVectorHolder {
+ public:
+  typedef std::vector<std::string> T;
+
+  TokenVectorHolder() { }
+
+  static bool Write(std::ostream &os, bool, const T &t) {  // ignore binary-mode.
+    for (std::vector<std::string>::const_iterator iter = t.begin();
+        iter != t.end();
+        ++iter) {
+      KALDI_ASSERT(IsToken(*iter));  // make sure it's whitespace-free, printable and nonempty.
+      os << *iter << ' ';
+    }
+    os << '\n';
+    return os.good();
+  }
+
+  void Clear() { t_.clear(); }
+
+
+  // Reads into the holder.
+  bool Read(std::istream &is) {
+    t_.clear();
+
+    // there is no binary/non-binary mode.
+
+    std::string line;
+    getline(is, line);  // this will discard the \n, if present.
+    if (is.fail()) {
+      KALDI_WARN << "BasicVectorHolder::Read, error reading line " << (is.eof() ? "[eof]" : "");
+      return false;  // probably eof.  fail in any case.
+    }
+    const char *white_chars = " \t\n\r\f\v";
+    SplitStringToVector(line, white_chars, true, &t_);  // true== omit empty strings e.g.
+    // between spaces.
+    return true;
+  }
+
+  // Read in text format since it's basically a text-mode thing.. doesn't really matter,
+  // it would work either way since we ignore the extra '\r'.
+  static bool IsReadInBinary() { return false; }
+
+  const T &Value() const { return t_; }
+
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(TokenVectorHolder);
+  T t_;
+};
+
+
+class HtkMatrixHolder {
+ public:
+  typedef std::pair<Matrix<BaseFloat>, HtkHeader> T;
+
+  HtkMatrixHolder() {}
+
+  static bool Write(std::ostream &os, bool binary, const T &t) {
+    if (!binary)
+      KALDI_ERR << "Non-binary HTK-format write not supported.";
+    bool ans = WriteHtk(os, t.first, t.second);
+    if (!ans)
+      KALDI_WARN << "Error detected writing HTK-format matrix.";
+    return ans;
+  }
+
+  void Clear() { t_.first.Resize(0, 0); }
+
+  // Reads into the holder.
+  bool Read(std::istream &is) {
+    bool ans = ReadHtk(is, &t_.first, &t_.second);
+    if (!ans) {
+      KALDI_WARN << "Error detected reading HTK-format matrix.";
+      return false;
+    }
+    return ans;
+  }
+
+  // HTK-format matrices only read in binary.
+  static bool IsReadInBinary() { return true; }
+
+  const T &Value() const { return t_; }
+
+
+  // No destructor.
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(HtkMatrixHolder);
+  T t_;
+};
+
+// SphinxMatrixHolder can be used to read and write feature files in
+// CMU Sphinx format. 13-dimensional big-endian features are assumed.
+// The ultimate reference is SphinxBase's source code (for example see
+// feat_s2mfc_read() in src/libsphinxbase/feat/feat.c).
+// We can't fully automate the detection of machine/feature file endianess
+// mismatch here, because for this Sphinx relies on comparing the feature
+// file's size with the number recorded in its header. We are working with
+// streams, however(what happens if this is a Kaldi archive?). This should
+// be no problem, because the usage help of Sphinx' "wave2feat" for example
+// says that Sphinx features are always big endian.
+// Note: the kFeatDim defaults to 13, see forward declaration in kaldi-holder.h
+template<int kFeatDim> class SphinxMatrixHolder {
+ public:
+  typedef Matrix<BaseFloat> T;
+
+  SphinxMatrixHolder() {}
+
+  void Clear() { feats_.Resize(0, 0); }
+
+  // Writes Sphinx-format features
+  static bool Write(std::ostream &os, bool binary, const T &m) {
+    if (!binary) {
+      KALDI_WARN << "SphinxMatrixHolder can't write Sphinx features in text ";
+      return false;
+    }
+
+    int32 size = m.NumRows() * m.NumCols();
+    if (MachineIsLittleEndian())
+      KALDI_SWAP4(size);
+    os.write((char*) &size, sizeof(size)); // write the header
+
+    for (MatrixIndexT i = 0; i < m.NumRows(); i++) {
+      float32 tmp[m.NumCols()];
+      for (MatrixIndexT j = 0; j < m.NumCols(); j++) {
+        tmp[j] = static_cast<float32>(m(i, j));
+        if (MachineIsLittleEndian())
+          KALDI_SWAP4(tmp[j]);
+      }
+      os.write((char*) tmp, sizeof(tmp));
+    }
+
+    return true;
+  }
+
+  // Reads the features into a Kaldi Matrix
+  bool Read(std::istream &is) {
+    int32 nmfcc;
+
+    is.read((char*) &nmfcc, sizeof(nmfcc));
+    if (MachineIsLittleEndian())
+      KALDI_SWAP4(nmfcc);
+    KALDI_VLOG(2) << "#feats: " << nmfcc;
+    int32 nfvec = nmfcc / kFeatDim;
+    if ((nmfcc % kFeatDim) != 0) {
+      KALDI_WARN << "Sphinx feature count is inconsistent with vector length ";
+      return false;
+    }
+
+    feats_.Resize(nfvec, kFeatDim);
+    for (MatrixIndexT i = 0; i < feats_.NumRows(); i++) {
+      if (sizeof(BaseFloat) == sizeof(float32)) {
+        is.read((char*) feats_.RowData(i), kFeatDim * sizeof(float32));
+        if (!is.good()) {
+          KALDI_WARN << "Unexpected error/EOF while reading Sphinx features ";
+          return false;
+        }
+        if (MachineIsLittleEndian()) {
+          for (MatrixIndexT j=0; j < kFeatDim; j++)
+            KALDI_SWAP4(feats_(i, j));
+        }
+      } else { // KALDI_DOUBLEPRECISION=1
+        float32 tmp[kFeatDim];
+        is.read((char*) tmp, sizeof(tmp));
+        if (!is.good()) {
+          KALDI_WARN << "Unexpected error/EOF while reading Sphinx features ";
+          return false;
+        }
+        for (MatrixIndexT j=0; j < kFeatDim; j++) {
+          if (MachineIsLittleEndian())
+            KALDI_SWAP4(tmp[j]);
+          feats_(i, j) = static_cast<BaseFloat>(tmp[j]);
+        }
+      }
+    }
+
+    return true;
+  }
+
+  // Only read in binary
+  static bool IsReadInBinary() { return true; }
+
+  const T &Value() const { return feats_; }
+
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(SphinxMatrixHolder);
+  T feats_;
+};
+
+
+/// @} end "addtogroup holders"
+
+} // end namespace kaldi
+
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/util/kaldi-holder.h b/kaldi_io/src/kaldi/util/kaldi-holder.h
new file mode 100644
index 0000000..95f1183
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/kaldi-holder.h
@@ -0,0 +1,207 @@
+// util/kaldi-holder.h
+
+// Copyright 2009-2011     Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_UTIL_KALDI_HOLDER_H_
+#define KALDI_UTIL_KALDI_HOLDER_H_
+
+#include <algorithm>
+#include "util/kaldi-io.h"
+#include "util/text-utils.h"
+#include "matrix/kaldi-vector.h"
+
+namespace kaldi {
+
+
+// The Table class uses a Holder class to wrap objects, and make them behave
+// in a "normalized" way w.r.t. reading and writing, so the Table class can
+// be template-ized without too much trouble. Look below this
+// comment (search for GenericHolder) to see what it looks like.
+//
+//  Requirements of the holder class:
+//
+// They can only contain objects that can be read/written without external
+// information; other objects cannot be stored in this type of archive.
+//
+// In terms of what functions it should have, see GenericHolder below.
+// It is just for documentation.
+//
+// (1) Requirements of the Read and Write functions
+//
+// The Read and Write functions should have the property that in a longer
+// file, if the Read function is started from where the Write function started
+// writing, it should go to where the Write function stopped writing, in either
+// text or binary mode (but it's OK if it doesn't eat up trailing space).
+//
+//     [Desirable property: when writing in text mode the output should contain
+//      exactly one newline, at the end of the output; this makes it easier to manipulate]
+//
+//     [Desirable property for classes: the output should just be a binary-mode
+//      header (if in binary mode and it's a Kaldi object, or no header
+//      othewise), and then the output of Object.Write().  This means that when
+//      written to individual files with the scp: type of wspecifier, we can read
+//      the individual files in the "normal" Kaldi way by reading the binary
+//      header and then the object.]
+//
+//
+// The Write function takes a 'binary' argument.  In general, each object will
+// have two formats: text and binary.  However, it's permitted to throw() if
+// asked to read in the text format if there is none.  The file will be open, if
+// the file system has binary/text modes, in the corresponding mode.  However,
+// the object should have a file-mode in which it can read either text or binary
+// output.  It announces this via the static IsReadInBinary() function.  This
+// will generally be the binary mode and it means that where necessary, in text
+// formats, we must ignore \r characters.
+//
+// Memory requirements: if it allocates memory, the destructor should
+// free that memory.  Copying and assignment of Holder objects may be
+// disallowed as the Table code never does this.
+
+
+/// GenericHolder serves to document the requirements of the Holder interface;
+/// it's not intended to be used.
+template<class SomeType> class GenericHolder {
+ public:
+  typedef SomeType T;
+
+  /// Must have a constructor that takes no arguments.
+  GenericHolder() { }
+
+  /// Write writes this object of type T.  Possibly also writes a binary-mode
+  /// header so that the Read function knows which mode to read in (since the
+  /// Read function does not get this information).  It's a static member so we
+  /// can write those not inside this class (can use this function with Value()
+  /// to write from this class).  The Write method may throw if it cannot write
+  /// the object in the given (binary/non-binary) mode.  The holder object can
+  /// assume the stream has been opened in the given mode (where relevant).  The
+  /// object can write the data how it likes.
+  static bool Write(std::ostream &os, bool binary, const T &t);
+  
+  /// Reads into the holder.  Must work out from the stream (which will be opened
+  /// on Windows in binary mode if the IsReadInBinary() function of this class
+  /// returns true, and text mode otherwise) whether the actual data is binary or
+  /// not (usually via reading the Kaldi binary-mode header).  We put the
+  /// responsibility for reading the Kaldi binary-mode header in the Read
+  /// function (rather than making the binary mode an argument to this function),
+  /// so that for non-Kaldi binary files we don't have to write the header, which
+  /// would prevent the file being read by non-Kaldi programs (e.g. if we write
+  /// to individual files using an scp).
+  ///
+  /// Read must deallocate any existing data we have here, if applicable (must
+  /// not assume the object was newly constructed).
+  ///
+  /// Returns true on success.
+  bool Read(std::istream &is);
+
+  /// IsReadInBinary() will return true if the object wants the file to be
+  /// opened in binary for reading (if the file system has binary/text modes),
+  /// and false otherwise.  Static function.  Kaldi objects always return true
+  /// as they always read in binary mode.  Note that we must be able to read, in
+  /// this mode, objects written in both text and binary mode by Write (which
+  /// may mean ignoring "\r" characters).  I doubt we will ever want this
+  /// function to return false.
+  static bool IsReadInBinary() { return true; }
+
+  /// Returns the value of the object held here.  Will only
+  /// ever be called if Read() has been previously called and it returned
+  /// true (so OK to throw exception if no object was read).
+  const T &Value() const { return t_; } // if t is a pointer, would return *t_;
+
+  /// The Clear() function doesn't have to do anything.  Its purpose is to
+  /// allow the object to free resources if they're no longer needed.
+  void Clear() { }
+
+  /// If the object held pointers, the destructor would free them.
+  ~GenericHolder() { }
+
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(GenericHolder);
+  T t_;  // t_ may alternatively be of type T*.
+};
+
+
+// See kaldi-holder-inl.h for examples of some actual Holder
+// classes and templates.
+
+
+// The following two typedefs should probably be in their own file, but they're
+// here until there are enough of them to warrant their own header.
+
+
+/// \addtogroup holders
+/// @{
+
+/// KaldiObjectHolder works for Kaldi objects that have the "standard" Read and Write
+/// functions, and a copy constructor.
+template<class KaldiType> class KaldiObjectHolder;
+
+/// BasicHolder is valid for float, double, bool, and integer
+/// types.  There will be a compile time error otherwise, because
+/// we make sure that the {Write, Read}BasicType functions do not
+/// get instantiated for other types.
+template<class BasicType> class BasicHolder;
+
+
+// A Holder for a vector of basic types, e.g.
+// std::vector<int32>, std::vector<float>, and so on.
+// Note: a basic type is defined as a type for which ReadBasicType
+// and WriteBasicType are implemented, i.e. integer and floating
+// types, and bool.
+template<class BasicType> class BasicVectorHolder;
+
+
+// A holder for vectors of vectors of basic types, e.g.
+// std::vector<std::vector<int32> >, and so on.
+// Note: a basic type is defined as a type for which ReadBasicType
+// and WriteBasicType are implemented, i.e. integer and floating
+// types, and bool.
+template<class BasicType> class BasicVectorVectorHolder;
+
+// A holder for vectors of pairsof basic types, e.g.
+// std::vector<std::vector<int32> >, and so on.
+// Note: a basic type is defined as a type for which ReadBasicType
+// and WriteBasicType are implemented, i.e. integer and floating
+// types, and bool.  Text format is (e.g. for integers),
+// "1 12 ; 43 61 ; 17 8 \n"
+template<class BasicType> class BasicPairVectorHolder;
+
+/// We define a Token (not a typedef, just a word) as a nonempty, printable,
+/// whitespace-free std::string.  The binary and text formats here are the same
+/// (newline-terminated) and as such we don't bother with the binary-mode headers.
+class TokenHolder;
+
+/// Class TokenVectorHolder is a Holder class for vectors of Tokens (T == std::string).
+class TokenVectorHolder;
+
+/// A class for reading/writing HTK-format matrices.
+/// T == std::pair<Matrix<BaseFloat>, HtkHeader>
+class HtkMatrixHolder;
+
+/// A class for reading/writing Sphinx format matrices.
+template<int kFeatDim=13> class SphinxMatrixHolder;
+
+
+/// @} end "addtogroup holders"
+
+
+} // end namespace kaldi
+
+#include "kaldi-holder-inl.h"
+
+#endif
diff --git a/kaldi_io/src/kaldi/util/kaldi-io-inl.h b/kaldi_io/src/kaldi/util/kaldi-io-inl.h
new file mode 100644
index 0000000..7df7505
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/kaldi-io-inl.h
@@ -0,0 +1,45 @@
+// util/kaldi-io-inl.h
+
+// Copyright 2009-2011 Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_UTIL_KALDI_IO_INL_H_
+#define KALDI_UTIL_KALDI_IO_INL_H_
+
+
+namespace kaldi {
+
+bool Input::Open(const std::string &rxfilename, bool *binary) {
+  return OpenInternal(rxfilename, true, binary);
+}
+
+bool Input::OpenTextMode(const std::string &rxfilename) {
+  return OpenInternal(rxfilename, false, NULL);
+}
+
+bool Input::IsOpen() {
+  return impl_ != NULL;
+}
+
+bool Output::IsOpen() {
+  return impl_ != NULL;
+}
+
+
+}  // end namespace kaldi.
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/util/kaldi-io.h b/kaldi_io/src/kaldi/util/kaldi-io.h
new file mode 100644
index 0000000..f2c7563
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/kaldi-io.h
@@ -0,0 +1,264 @@
+// util/kaldi-io.h
+
+// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_UTIL_KALDI_IO_H_
+#define KALDI_UTIL_KALDI_IO_H_
+
+#include <cctype>  // For isspace.
+#include <limits>
+#include <string>
+#include "base/kaldi-common.h"
+#ifdef _MSC_VER
+# include <fcntl.h>
+# include <io.h>
+#endif
+
+
+
+namespace kaldi {
+
+class OutputImplBase;  // Forward decl; defined in a .cc file
+class InputImplBase;  // Forward decl; defined in a .cc file
+
+/// \addtogroup io_group
+/// @{
+
+// The Output and Input classes handle stream-opening for "extended" filenames
+// that include actual files, standard-input/standard-output, pipes, and
+// offsets into actual files.  They also handle reading and writing the
+// binary-mode headers for Kaldi files, where applicable.  The classes have
+// versions of the Open routines that throw and do not throw, depending whether
+// the calling code wants to catch the errors or not; there are also versions
+// that write (or do not write) the Kaldi binary-mode header that says if it's
+// binary mode.  Generally files that contain Kaldi objects will have the header
+// on, so we know upon reading them whether they have the header.  So you would
+// use the OpenWithHeader routines for these (or the constructor); but other
+// types of objects (e.g. FSTs) would have files without a header so you would
+// use OpenNoHeader.
+
+// We now document the types of extended filenames that we use.
+//
+// A "wxfilename"  is an extended filename for writing.  It can take three forms:
+// (1) Filename: e.g.    "/some/filename", "./a/b/c", "c:\Users\dpovey\My Documents\\boo"
+//          (whatever the actual file-system interprets)
+// (2) Standard output:  "" or "-"
+// (3) A pipe: e.g.  "gunzip -c /tmp/abc.gz |"
+//
+//
+// A "rxfilename" is an extended filename for reading.  It can take four forms:
+// (1) An actual filename, whatever the file-system can read, e.g. "/my/file".
+// (2) Standard input: "" or "-"
+// (3) A pipe: e.g. "| gzip -c > /tmp/abc.gz"
+// (4) An offset into a file, e.g.: "/mnt/blah/data/1.ark:24871"
+//   [these are created by the Table and TableWriter classes; I may also write
+//    a program that creates them for arbitrary files]
+//
+
+
+// Typical usage:
+// ...
+// bool binary;
+// MyObject.Write(Output(some_filename, binary).Stream(), binary);
+//
+// ... more extensive example:
+// {
+//    Output ko(some_filename, binary);
+//    MyObject1.Write(ko.Stream(), binary);
+//    MyObject2.Write(ko.Stream(), binary);
+// }
+
+
+
+enum OutputType {
+  kNoOutput,
+  kFileOutput,
+  kStandardOutput,
+  kPipeOutput
+};
+
+/// ClassifyWxfilename interprets filenames as follows:
+///  - kNoOutput: invalid filenames (leading or trailing space, things that look
+///     like wspecifiers and rspecifiers or like pipes to read from with leading |.
+///  - kFileOutput: Normal filenames
+///  - kStandardOutput: The empty string or "-", interpreted as standard output
+///  - kPipeOutput: pipes, e.g. "gunzip -c some_file.gz |"  
+OutputType ClassifyWxfilename(const std::string &wxfilename);
+
+enum InputType {
+  kNoInput,
+  kFileInput,
+  kStandardInput,
+  kOffsetFileInput,
+  kPipeInput
+};
+
+/// ClassifyRxfilenames interprets filenames for reading as follows:
+///  - kNoInput: invalid filenames (leading or trailing space, things that
+///       look like wspecifiers and rspecifiers or pipes to write to
+///       with trailing |.
+///  - kFileInput: normal filenames
+///  - kStandardInput: the empty string or "-"
+///  - kPipeInput: e.g. "| gzip -c > blah.gz"
+///  - kOffsetFileInput: offsets into files, e.g.  /some/filename:12970
+InputType ClassifyRxfilename(const std::string &rxfilename);
+
+
+class Output {
+ public:
+  // The normal constructor, provided for convenience.
+  // Equivalent to calling with default constructor then Open()
+  // with these arguments.
+  Output(const std::string &filename, bool binary, bool write_header = true);
+
+  Output(): impl_(NULL) {};
+
+  /// This opens the stream, with the given mode (binary or text).  It returns
+  /// true on success and false on failure.  However, it will throw if something
+  /// was already open and could not be closed (to avoid this, call Close()
+  /// first.  if write_header == true and binary == true, it writes the Kaldi
+  /// binary-mode header ('\0' then 'B').  You may call Open even if it is
+  /// already open; it will close the existing stream and reopen (however if
+  /// closing the old stream failed it will throw).
+  bool Open(const std::string &wxfilename, bool binary, bool write_header);
+
+  inline bool IsOpen();  // return true if we have an open stream.  Does not imply
+  // stream is good for writing.
+
+  std::ostream &Stream();  // will throw if not open; else returns stream.
+
+  // Close closes the stream. Calling Close is never necessary unless you
+  // want to avoid exceptions being thrown.  There are times when calling
+  // Close will hurt efficiency (basically, when using offsets into files,
+  // and using the same Input object),
+  // but most of the time the user won't be doing this directly, it will
+  // be done in kaldi-table.{h, cc}, so you don't have to worry about it.
+  bool Close();
+
+  // This will throw if stream could not be closed (to check error status,
+  // call Close()).
+  ~Output();
+
+ private:
+  OutputImplBase *impl_;  // non-NULL if open.
+  std::string filename_;
+  KALDI_DISALLOW_COPY_AND_ASSIGN(Output);
+};
+
+
+// bool binary_in;
+// Input ki(some_filename, &binary_in);
+// MyObject.Read(ki, binary_in);
+//
+// ... more extensive example:
+//
+// {
+//    bool binary_in;
+//    Input ki(some_filename, &binary_in);
+//    MyObject1.Read(ki.Stream(), &binary_in);
+//    MyObject2.Write(ki.Stream(), &binary_in);
+// }
+// Note that to catch errors you need to use try.. catch.
+// Input communicates errors by throwing exceptions.
+
+
+// Input interprets four kinds of filenames:
+//  (1) Normal filenames
+//  (2) The empty string or "-", interpreted as standard output
+//  (3) Pipes, e.g. "| gzip -c > some_file.gz"
+//  (4) Offsets into [real] files, e.g. "/my/filename:12049"
+// The last one has no correspondence in Output.
+
+
+class Input {
+ public:
+  /// The normal constructor.  Opens the stream in binary mode.
+  /// Equivalent to calling the default constructor followed by Open(); then, if
+  /// binary != NULL, it calls ReadHeader(), putting the output in "binary"; it
+  /// throws on error.
+  Input(const std::string &rxfilename, bool *contents_binary = NULL);
+
+  Input(): impl_(NULL) {}
+
+  // Open opens the stream for reading (the mode, where relevant, is binary; use
+  // OpenTextMode for text-mode, we made this a separate function rather than a
+  // boolean argument, to avoid confusion with Kaldi's text/binary distinction,
+  // since reading in the file system's text mode is unusual.)  If
+  // contents_binary != NULL, it reads the binary-mode header and puts it in the
+  // "binary" variable.  Returns true on success.  If it returns false it will
+  // not be open.  You may call Open even if it is already open; it will close
+  // the existing stream and reopen (however if closing the old stream failed it
+  // will throw).
+  inline bool Open(const std::string &rxfilename, bool *contents_binary = NULL);
+
+  // As Open but (if the file system has text/binary modes) opens in text mode;
+  // you shouldn't ever have to use this as in Kaldi we read even text files in
+  // binary mode (and ignore the \r).
+  inline bool OpenTextMode(const std::string &rxfilename);
+
+  // Return true if currently open for reading and Stream() will
+  // succeed.  Does not guarantee that the stream is good.
+  inline bool IsOpen();
+
+  // It is never necessary or helpful to call Close, except if
+  // you are concerned about to many filehandles being open.
+  // Close does not throw.
+  void Close();
+
+  // Returns the underlying stream. Throws if !IsOpen()
+  std::istream &Stream();
+
+  // Destructor does not throw: input streams may legitimately fail so we
+  // don't worry about the status when we close them.
+  ~Input();
+ private:
+  bool OpenInternal(const std::string &rxfilename, bool file_binary, bool *contents_binary);
+  InputImplBase *impl_;
+  KALDI_DISALLOW_COPY_AND_ASSIGN(Input);
+};
+
+template <class C> inline void ReadKaldiObject(const std::string &filename,
+                                               C *c) {
+  bool binary_in;
+  Input ki(filename, &binary_in);
+  c->Read(ki.Stream(), binary_in);
+}
+
+template <class C> inline void WriteKaldiObject(const C &c,
+                                                const std::string &filename,
+                                                bool binary) {
+  Output ko(filename, binary);
+  c.Write(ko.Stream(), binary);
+}
+
+/// PrintableRxfilename turns the rxfilename into a more human-readable
+/// form for error reporting, i.e. it does quoting and escaping and
+/// replaces "" or "-" with "standard input".
+std::string PrintableRxfilename(std::string rxfilename);
+
+/// PrintableWxfilename turns the filename into a more human-readable
+/// form for error reporting, i.e. it does quoting and escaping and
+/// replaces "" or "-" with "standard output".
+std::string PrintableWxfilename(std::string wxfilename);
+
+/// @}
+
+}  // end namespace kaldi.
+
+#include "kaldi-io-inl.h"
+
+#endif
diff --git a/kaldi_io/src/kaldi/util/kaldi-pipebuf.h b/kaldi_io/src/kaldi/util/kaldi-pipebuf.h
new file mode 100644
index 0000000..43e5a2e
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/kaldi-pipebuf.h
@@ -0,0 +1,90 @@
+// util/kaldi-pipebuf.h
+
+// Copyright 2009-2011  Ondrej Glembek
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+/** @file kaldi-pipebuf.h
+ *  This is an Kaldi C++ Library header.
+ */
+
+#ifndef KALDI_UTIL_KALDI_PIPEBUF_H_
+#define KALDI_UTIL_KALDI_PIPEBUF_H_
+
+#if defined(_LIBCPP_VERSION)  // libc++
+#include "basic-filebuf.h"
+#else
+#include <fstream>
+#endif
+
+namespace kaldi
+{
+// This class provides a way to initialize a filebuf with a FILE* pointer
+// directly; it will not close the file pointer when it is deleted.
+// The C++ standard does not allow implementations of C++ to provide
+// this constructor within basic_filebuf, which makes it hard to deal
+// with pipes using completely native C++.  This is a workaround
+
+#ifdef _MSC_VER
+#elif defined(_LIBCPP_VERSION)  // libc++
+template<class CharType, class Traits = std::char_traits<CharType> >
+class basic_pipebuf : public basic_filebuf<CharType, Traits>
+{
+ public:
+  typedef basic_pipebuf<CharType, Traits>   ThisType;
+
+ public:
+  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
+      : basic_filebuf<CharType, Traits>() {
+    this->open(fptr, mode);
+    if (!this->is_open()) {
+      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
+      // code error, if the fptr was good.
+      return;
+    }
+  }
+};  // class basic_pipebuf
+#else
+template<class CharType, class Traits = std::char_traits<CharType> >
+class basic_pipebuf : public std::basic_filebuf<CharType, Traits>
+{
+ public:
+  typedef basic_pipebuf<CharType, Traits>   ThisType;
+
+ public:
+  basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
+      : std::basic_filebuf<CharType, Traits>() {
+    this->_M_file.sys_open(fptr, mode);
+    if (!this->is_open()) {
+      KALDI_WARN << "Error initializing pipebuf";  // probably indicates
+      // code error, if the fptr was good.
+      return;
+    }
+    this->_M_mode = mode;
+    this->_M_buf_size = BUFSIZ;
+    this->_M_allocate_internal_buffer();
+    this->_M_reading = false;
+    this->_M_writing = false;
+    this->_M_set_buffer(-1);
+  }
+};  // class basic_pipebuf
+#endif // _MSC_VER
+
+};  // namespace kaldi
+
+#endif // KALDI_UTIL_KALDI_PIPEBUF_H_
+
diff --git a/kaldi_io/src/kaldi/util/kaldi-table-inl.h b/kaldi_io/src/kaldi/util/kaldi-table-inl.h
new file mode 100644
index 0000000..6b73c88
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/kaldi-table-inl.h
@@ -0,0 +1,2246 @@
+// util/kaldi-table-inl.h
+
+// Copyright 2009-2011    Microsoft Corporation
+//                2013    Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_UTIL_KALDI_TABLE_INL_H_
+#define KALDI_UTIL_KALDI_TABLE_INL_H_
+
+#include <algorithm>
+#include "util/kaldi-io.h"
+#include "util/text-utils.h"
+#include "util/stl-utils.h" // for StringHasher.
+
+
+namespace kaldi {
+
+/// \addtogroup table_impl_types
+/// @{
+
+template<class Holder> class SequentialTableReaderImplBase {
+ public:
+  typedef typename Holder::T T;
+  // note that Open takes rxfilename not rspecifier.
+  virtual bool Open(const std::string &rxfilename) = 0;
+  virtual bool Done() const = 0;
+  virtual bool IsOpen() const = 0;
+  virtual std::string Key() = 0;
+  virtual const T &Value() = 0;
+  virtual void FreeCurrent() = 0;
+  virtual void Next() = 0;
+  virtual bool Close() = 0;
+  SequentialTableReaderImplBase() { }
+  virtual ~SequentialTableReaderImplBase() { }
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(SequentialTableReaderImplBase);  
+};
+
+
+// This is the implementation for SequentialTableReader
+// when it's actually a script file.
+template<class Holder>  class SequentialTableReaderScriptImpl:
+      public SequentialTableReaderImplBase<Holder> {
+ public:
+  typedef typename Holder::T T;
+
+  SequentialTableReaderScriptImpl(): state_(kUninitialized) { }
+
+  virtual bool Open(const std::string &rspecifier) {
+    if (state_ != kUninitialized)
+      if (! Close()) // call Close() yourself to suppress this exception.
+        KALDI_ERR << "TableReader::Open, error closing previous input: "
+                  << "rspecifier was " << rspecifier_;
+    bool binary;
+    rspecifier_ = rspecifier;
+    RspecifierType rs = ClassifyRspecifier(rspecifier, &script_rxfilename_,
+                                           &opts_);
+    KALDI_ASSERT(rs == kScriptRspecifier);
+    if (!script_input_.Open(script_rxfilename_, &binary)) {  // Failure on Open
+      KALDI_WARN << "Failed to open script file "
+                 << PrintableRxfilename(script_rxfilename_);
+      state_ = kUninitialized;
+      return false;
+    } else {  // Open succeeded.
+      if (binary) {  // script file should not be binary file..
+        state_ = kError;  // bad script file.
+        script_input_.Close();
+        return false;
+      } else {
+        state_ = kFileStart;
+        Next();
+        if (state_ == kError) {
+          script_input_.Close();
+          return false;
+        }
+        if (opts_.permissive) {  // Next() will have preloaded.
+          KALDI_ASSERT(state_ == kLoadSucceeded || state_ == kEof);
+        } else {
+          KALDI_ASSERT(state_ == kHaveScpLine || state_ == kEof);
+        }
+        return true;  // Success.
+      }
+    }
+  }
+
+  virtual bool IsOpen() const {
+    switch (state_) {
+      case kEof: case kError: case kHaveScpLine: case kLoadSucceeded: case kLoadFailed: return true;
+      case kUninitialized:  return false;
+      default: KALDI_ERR << "IsOpen() called on invalid object.";  // kFileStart is not valid
+        // state for user to call something on.
+        return false;
+    }
+  }
+
+  virtual bool Done() const {
+    switch (state_) {
+      case kHaveScpLine: return false;
+      case kLoadSucceeded: case kLoadFailed: return false;
+        // These cases are because we want LoadCurrent()
+        // to be callable after Next() and to not change the Done() status [only Next() should change
+        // the Done() status].
+      case kEof: case kError: return true;  // Error condition, like Eof, counts as Done(); the destructor
+        // or Close() will inform the user of the error.
+      default: KALDI_ERR << "Done() called on TableReader object at the wrong time.";
+        return false;
+    }
+  }
+
+  virtual std::string Key() {
+    // Valid to call this whenever Done() returns false.
+    switch (state_) {
+      case kHaveScpLine: case kLoadSucceeded: case kLoadFailed: break;
+      default:
+        // coding error.
+        KALDI_ERR << "Key() called on TableReader object at the wrong time.";
+    }
+    return key_;
+  }
+  const T &Value() {
+    StateType orig_state = state_;
+    if (state_ == kHaveScpLine) LoadCurrent();  // Takes
+    // state_ to kLoadSucceeded or kLoadFailed.
+    if (state_ == kLoadFailed) {  // this can happen due to
+      // a file listed in an scp file not existing, or
+      // read failure, failure of a command, etc.
+      if (orig_state == kHaveScpLine)
+        KALDI_ERR << "TableReader: failed to load object from "
+                  << PrintableRxfilename(data_rxfilename_)
+                  << " (to suppress this error, add the permissive "
+                  << "(p, ) option to the rspecifier.";
+
+      else // orig_state_ was kLoadFailed, which only could have happened
+        // if the user called FreeCurrent().
+        KALDI_ERR << "TableReader: you called Value() after FreeCurrent().";
+    } else if (state_ != kLoadSucceeded) {
+      // This would be a coding error.
+      KALDI_ERR << "TableReader: Value() called at the wrong time.";
+    }
+    return holder_.Value();
+  }
+  void FreeCurrent() {
+    if (state_ == kLoadSucceeded) {
+      holder_.Clear();
+      state_ = kLoadFailed;
+    } else {
+      KALDI_WARN << "TableReader: FreeCurrent called at the wrong time.";
+    }
+  }
+  void Next() {
+    while (1) {
+      NextScpLine();
+      if (Done()) return;
+      if (opts_.permissive) {
+        // Permissive mode means, when reading scp files, we treat keys whose scp entry
+        // cannot be read as nonexistent.  This means trying to read.
+        if (LoadCurrent()) return;  // Success.
+        // else try the next scp line.
+      } else {
+        return;  // We go the next key; Value() will crash if we can't
+        // read the scp line.
+      }
+    }
+  }
+
+  virtual bool Close() {
+    // Close() will succeed if the stream was not in an error
+    // state.  To clean up, it also closes the Input objects if
+    // they're open.
+    if (script_input_.IsOpen())
+      script_input_.Close();
+    if (data_input_.IsOpen())
+      data_input_.Close();
+    if (state_ == kLoadSucceeded)
+      holder_.Clear();
+    if (!this->IsOpen())
+      KALDI_ERR << "Close() called on input that was not open.";
+    StateType old_state = state_;
+    state_ = kUninitialized;
+    if (old_state == kError) {
+      if (opts_.permissive) {
+        KALDI_WARN << "Close() called on scp file with read error, ignoring the "
+            "error because permissive mode specified.";
+        return true;
+      } else  return false;  // User will do something with the error status.
+    } else  return true;
+  }
+
+  virtual ~SequentialTableReaderScriptImpl() {
+    if (state_ == kError)
+      KALDI_ERR << "TableReader: reading script file failed: from scp "
+                << PrintableRxfilename(script_rxfilename_);
+    // If you don't want this exception to be thrown you can
+    // call Close() and check the status.
+    if (state_ == kLoadSucceeded)
+      holder_.Clear();
+  }
+ private:  
+  bool LoadCurrent() {
+    // Attempts to load object whose rxfilename is on the current scp line.
+    if (state_ != kHaveScpLine)
+      KALDI_ERR << "TableReader: LoadCurrent() called at the wrong time.";
+    bool ans;
+    // note, NULL means it doesn't read the binary-mode header
+    if (Holder::IsReadInBinary()) ans = data_input_.Open(data_rxfilename_, NULL);
+    else ans = data_input_.OpenTextMode(data_rxfilename_);
+    if (!ans) {
+      // May want to make this warning a VLOG at some point
+      KALDI_WARN << "TableReader: failed to open file "
+                 << PrintableRxfilename(data_rxfilename_);
+      state_ = kLoadFailed;
+      return false;
+    } else {
+      if (holder_.Read(data_input_.Stream())) {
+        state_ = kLoadSucceeded;
+        return true;
+      } else {  // holder_ will not contain data.
+        KALDI_WARN << "TableReader: failed to load object from "
+                   << PrintableRxfilename(data_rxfilename_);
+        state_ = kLoadFailed;
+        return false;
+      }
+    }
+  }
+
+  // Reads the next line in the script file.
+  void NextScpLine() {
+    switch (state_) {
+      case kLoadSucceeded: holder_.Clear(); break;
+      case kHaveScpLine: case kLoadFailed: case kFileStart: break;
+      default:
+        // No other states are valid to call Next() from.
+        KALDI_ERR << "Reading script file: Next called wrongly.";
+    }
+    std::string line;
+    if (getline(script_input_.Stream(), line)) {
+      SplitStringOnFirstSpace(line, &key_, &data_rxfilename_);
+      if (!key_.empty() && !data_rxfilename_.empty()) {
+        // Got a valid line.
+        state_ = kHaveScpLine;
+      } else {
+        // Got an invalid line.
+        state_ = kError;  // we can't make sense of this
+        // scp file and will now die.
+      }
+    } else {
+      state_ = kEof;  // nothing more in the scp file.
+      // Might as well close the input streams as don't need them.
+      script_input_.Close();
+      if (data_input_.IsOpen())
+        data_input_.Close();
+    }
+  }
+
+
+  Input script_input_;  // Input object for the .scp file
+  Input data_input_;   // Input object for the entries in
+  // the script file.
+  Holder holder_;  // Holds the object.
+  bool binary_;  // Binary-mode archive.
+  std::string key_;
+  std::string rspecifier_;
+  std::string script_rxfilename_;  // of the script file.
+  RspecifierOptions opts_;  // options.
+  std::string data_rxfilename_;  // of the file we're reading.
+  enum StateType {
+    //       [The state of the reading process]               [does holder_ [is script_inp_
+    //                                                         have object]   open]
+    kUninitialized,  // Uninitialized or closed.                    no         no
+    kEof,     // We did Next() and found eof in script file.       no         no
+    kError,   // Some other error                                  no         yes
+    kHaveScpLine,  // Just called Open() or Next() and have a       no         yes
+    // line of the script file but no data.
+    kLoadSucceeded,  // Called LoadCurrent() and it succeeded.     yes         yes
+    kLoadFailed,  // Called LoadCurrent() and it failed,           no         yes
+    // or the user called FreeCurrent().. note,
+    // if when called by user we are in this state,
+    // it means the user called FreeCurrent().
+    kFileStart,        // [state we only use internally]           no         yes
+  } state_;
+ private:
+};
+
+
+// This is the implementation for SequentialTableReader
+// when it's an archive.  Note that the archive format is:
+// key1 [space] object1 key2 [space]
+// object2 ... eof.
+// "object1" is the output of the Holder::Write function and will
+// typically contain a binary header (in binary mode) and then
+// the output of object.Write(os, binary).
+// The archive itself does not care whether it is in binary
+// or text mode, for reading purposes.
+
+template<class Holder>  class SequentialTableReaderArchiveImpl:
+      public SequentialTableReaderImplBase<Holder> {
+ public:
+  typedef typename Holder::T T;
+
+  SequentialTableReaderArchiveImpl(): state_(kUninitialized) { }
+
+  virtual bool Open(const std::string &rspecifier) {
+    if (state_ != kUninitialized) {
+      if (! Close()) {  // call Close() yourself to suppress this exception.
+        if (opts_.permissive)
+          KALDI_WARN << "TableReader::Open, error closing previous input "
+              "(only warning, since permissive mode).";
+        else
+          KALDI_ERR << "TableReader::Open, error closing previous input.";
+      }
+    }
+    rspecifier_ = rspecifier;
+    RspecifierType rs = ClassifyRspecifier(rspecifier,
+                                           &archive_rxfilename_,
+                                           &opts_);
+    KALDI_ASSERT(rs == kArchiveRspecifier);
+
+    bool ans;
+    // NULL means don't expect binary-mode header
+    if (Holder::IsReadInBinary())
+      ans = input_.Open(archive_rxfilename_, NULL);
+    else
+      ans = input_.OpenTextMode(archive_rxfilename_);
+    if (!ans) {  // header.
+      KALDI_WARN << "TableReader: failed to open stream "
+                 << PrintableRxfilename(archive_rxfilename_);
+      state_ = kUninitialized;  // Failure on Open
+      return false;  // User should print the error message.
+    }
+    state_ = kFileStart;
+    Next();
+    if (state_ == kError) {
+      KALDI_WARN << "Error beginning to read archive file (wrong filename?): "
+                 << PrintableRxfilename(archive_rxfilename_);
+      input_.Close();
+      state_ = kUninitialized;
+      return false;
+    }
+    KALDI_ASSERT(state_ == kHaveObject || state_ == kEof);
+    return true;
+  }
+
+  virtual void Next() {
+    switch (state_) {
+      case kHaveObject:
+        holder_.Clear(); break;
+      case kFileStart: case kFreedObject:
+        break;
+      default:
+        KALDI_ERR << "TableReader: Next() called wrongly.";
+    }
+    std::istream &is = input_.Stream();
+    is.clear();  // Clear any fail bits that may have been set... just in case
+    // this happened in the Read function.
+    is >> key_;  // This eats up any leading whitespace and gets the string.
+    if (is.eof()) {
+      state_ = kEof;
+      return;
+    }
+    if (is.fail()) {  // This shouldn't really happen, barring file-system errors.
+      KALDI_WARN << "Error reading archive "
+                 << PrintableRxfilename(archive_rxfilename_);
+      state_ = kError;
+      return;
+    }
+    int c;
+    if ((c = is.peek()) != ' ' && c != '\t' && c != '\n') {  // We expect a space ' ' after the key.
+      // We also allow tab [which is consumed] and newline [which is not], just
+      // so we can read archives generated by scripts that may not be fully
+      // aware of how this format works.
+      KALDI_WARN << "Invalid archive file format: expected space after key "
+                 << key_ << ", got character "
+                 << CharToString(static_cast<char>(is.peek())) << ", reading "
+                 << PrintableRxfilename(archive_rxfilename_);
+      state_ = kError;
+      return;
+    }
+    if (c != '\n') is.get();  // Consume the space or tab.
+    if (holder_.Read(is)) {
+      state_ = kHaveObject;
+      return;
+    } else {
+      KALDI_WARN << "Object read failed, reading archive "
+                 << PrintableRxfilename(archive_rxfilename_);
+      state_ = kError;
+      return;
+    }
+  }
+
+  virtual bool IsOpen() const {
+    switch (state_) {
+      case kEof: case kError: case kHaveObject: case kFreedObject: return true;
+      case kUninitialized: return false;
+      default: KALDI_ERR << "IsOpen() called on invalid object.";  // kFileStart is not valid
+        // state for user to call something on.
+        return false;
+    }
+  }
+
+  virtual bool Done() const {
+    switch (state_) {
+      case kHaveObject:
+        return false;
+      case kEof: case kError:
+        return true;  // Error-state counts as Done(), but destructor
+        // will fail (unless you check the status with Close()).
+      default:
+        KALDI_ERR << "Done() called on TableReader object at the wrong time.";
+        return false;
+    }
+  }
+
+  virtual std::string Key() {
+    // Valid to call this whenever Done() returns false
+    switch (state_) {
+      case kHaveObject: break;  // only valid case.
+      default:
+        // coding error.
+        KALDI_ERR << "Key() called on TableReader object at the wrong time.";
+    }
+    return key_;
+  }
+  const T &Value() {
+    switch (state_) {
+      case kHaveObject:
+        break;  // only valid case.
+      default:
+        // coding error.
+        KALDI_ERR << "Value() called on TableReader object at the wrong time.";
+    }
+    return holder_.Value();
+  }
+  virtual void FreeCurrent() {
+    if (state_ == kHaveObject) {
+      holder_.Clear();
+      state_ = kFreedObject;
+    } else
+      KALDI_WARN << "TableReader: FreeCurernt called at the wrong time.";
+  }
+
+  virtual bool Close() {
+    if (! this->IsOpen())
+      KALDI_ERR << "Close() called on TableReader twice or otherwise wrongly.";
+    if (input_.IsOpen())
+      input_.Close();
+    if (state_ == kHaveObject)
+      holder_.Clear();
+    bool ans;
+    if (opts_.permissive) {
+      ans = true;  // always return success.
+      if (state_ == kError)
+        KALDI_WARN << "Error detected closing TableReader for archive "
+                   << PrintableRxfilename(archive_rxfilename_) << " but ignoring "
+                   << "it as permissive mode specified.";
+    } else
+      ans = (state_ != kError);  // If error state, user should detect it.
+    state_ = kUninitialized;
+    return ans;
+  }
+
+  virtual ~SequentialTableReaderArchiveImpl() {
+    if (state_ == kError) {
+      if (opts_.permissive)
+        KALDI_WARN << "Error detected closing TableReader for archive "
+                   << PrintableRxfilename(archive_rxfilename_) << " but ignoring "
+                   << "it as permissive mode specified.";
+      else
+        KALDI_ERR << "TableReader: error detected closing archive "
+                  << PrintableRxfilename(archive_rxfilename_);
+    }
+    // If you don't want this exception to be thrown you can
+    // call Close() and check the status.
+    if (state_ == kHaveObject)
+      holder_.Clear();
+  }
+ private:
+  Input input_;  // Input object for the archive
+  Holder holder_;     // Holds the object.
+  std::string key_;
+  std::string rspecifier_;
+  std::string archive_rxfilename_;
+  RspecifierOptions opts_;
+  enum {  //  [The state of the reading process]               [does holder_ [is input_
+    //                                                         have object]   open]
+    kUninitialized,  // Uninitialized or closed.                    no         no
+    kFileStart,      // [state we use internally: just opened.]    no         yes
+    kEof,     // We did Next() and found eof in archive            no         no
+    kError,   // Some other error                                  no         no
+    kHaveObject,  // We read the key and the object after it.       yes        yes
+    kFreedObject,  // The user called FreeCurrent().                no         yes
+  } state_;
+};
+
+
+template<class Holder>
+SequentialTableReader<Holder>::SequentialTableReader(const std::string &rspecifier): impl_(NULL) {
+  if (rspecifier != "" && !Open(rspecifier))
+    KALDI_ERR << "Error constructing TableReader: rspecifier is " << rspecifier;
+}
+
+template<class Holder>
+bool SequentialTableReader<Holder>::Open(const std::string &rspecifier) {
+  if (IsOpen())
+    if (!Close())
+      KALDI_ERR << "Could not close previously open object.";
+  // now impl_ will be NULL.
+
+  RspecifierType wt = ClassifyRspecifier(rspecifier, NULL, NULL);
+  switch (wt) {
+    case kArchiveRspecifier:
+      impl_ = new SequentialTableReaderArchiveImpl<Holder>();
+      break;
+    case kScriptRspecifier:
+      impl_ = new SequentialTableReaderScriptImpl<Holder>();
+      break;
+    case kNoRspecifier: default:
+      KALDI_WARN << "Invalid rspecifier " << rspecifier;
+      return false;
+  }
+  if (!impl_->Open(rspecifier)) {
+    delete impl_;
+    impl_ = NULL;
+    return false;  // sub-object will have printed warnings.
+  }
+  else return true;
+}
+
+template<class Holder>
+bool SequentialTableReader<Holder>::Close() {
+  CheckImpl();  
+  bool ans = impl_->Close();
+  delete impl_;  // We don't keep around empty impl_ objects.
+  impl_ = NULL;
+  return ans;
+}
+
+
+template<class Holder>
+bool SequentialTableReader<Holder>::IsOpen() const {
+  return (impl_ != NULL);  // Because we delete the object whenever
+  // that object is not open.  Thus, the IsOpen functions of the
+  // Impl objects are not really needed.
+}
+
+template<class Holder>
+std::string SequentialTableReader<Holder>::Key() {
+  CheckImpl();
+  return impl_->Key();  // this call may throw if called wrongly in other ways,
+  // e.g. eof.
+}
+
+
+template<class Holder>
+void SequentialTableReader<Holder>::FreeCurrent() {
+  CheckImpl();
+  impl_->FreeCurrent();
+}
+
+
+template<class Holder>
+const typename SequentialTableReader<Holder>::T &
+SequentialTableReader<Holder>::Value() {
+  CheckImpl();
+  return impl_->Value();  // This may throw (if LoadCurrent() returned false you are safe.).
+}
+
+
+template<class Holder>
+void SequentialTableReader<Holder>::Next() {
+  CheckImpl();
+  impl_->Next();
+}
+
+template<class Holder>
+bool SequentialTableReader<Holder>::Done() {
+  CheckImpl();
+  return impl_->Done();
+}
+
+
+template<class Holder>
+SequentialTableReader<Holder>::~SequentialTableReader() {
+  if (impl_)  delete impl_;
+  // Destructor of impl_ may throw.
+}
+
+
+
+template<class Holder> class TableWriterImplBase {
+ public:
+  typedef typename Holder::T T;
+
+  virtual bool Open(const std::string &wspecifier) = 0;
+
+  // Write returns true on success, false on failure, but
+  // some errors may not be detected until we call Close().
+  // It throws (via KALDI_ERR) if called wrongly.  We could
+  // have just thrown on all errors, since this is what
+  // TableWriter does; it was designed this way because originally
+  // TableWriter::Write returned an exit status.
+  virtual bool Write(const std::string &key, const T &value) = 0;
+
+  // Flush will flush any archive; it does not return error status,
+  //  any errors will be reported on the next Write or Close.
+  virtual void Flush() = 0;
+
+  virtual bool Close() = 0;
+
+  virtual bool IsOpen() const = 0;
+
+  // May throw on write error if Close was not called.
+  virtual ~TableWriterImplBase() { }
+
+  TableWriterImplBase() { }
+ private:
+  KALDI_DISALLOW_COPY_AND_ASSIGN(TableWriterImplBase);
+};
+
+
+// The implementation of TableWriter we use when writing directly
+// to an archive with no associated scp.
+template<class Holder>
+class TableWriterArchiveImpl: public TableWriterImplBase<Holder> {
+ public:
+  typedef typename Holder::T T;
+
+  virtual bool Open(const std::string &wspecifier) {
+    switch (state_) {
+      case kUninitialized:
+        break;
+      case kWriteError:
+        KALDI_ERR << "TableWriter: opening stream, already open with write error.";
+      case kOpen: default:
+        if (!Close())  // throw because this error may not have been previously
+          // detected by the user.
+          KALDI_ERR << "TableWriter: opening stream, error closing previously open stream.";
+    }
+    wspecifier_ = wspecifier;
+    WspecifierType ws = ClassifyWspecifier(wspecifier,
+                                           &archive_wxfilename_,
+                                           NULL,
+                                           &opts_);
+    KALDI_ASSERT(ws == kArchiveWspecifier);  // or wrongly called.
+
+    if (output_.Open(archive_wxfilename_, opts_.binary, false)) {  // false means no binary header.
+      state_ = kOpen;
+      return true;
+    } else {
+      // stream will not be open.  User will report this error
+      // (we return bool), so don't bother printing anything.
+      state_ = kUninitialized;
+      return false;
+    }
+  }
+
+  virtual bool IsOpen() const {
+    switch (state_) {
+      case kUninitialized: return false;
+      case kOpen: case kWriteError: return true;
+      default: KALDI_ERR << "IsOpen() called on TableWriter in invalid state.";
+    }
+    return false;
+  }
+
+  // Write returns true on success, false on failure, but
+  // some errors may not be detected till we call Close().
+  virtual bool Write(const std::string &key, const T &value) {
+    switch (state_) {
+      case kOpen: break;
+      case kWriteError:
+        // user should have known from the last
+        // call to Write that there was a problem.
+        KALDI_WARN << "TableWriter: attempting to write to invalid stream.";
+        return false;
+      case kUninitialized: default:
+        KALDI_ERR << "TableWriter: Write called on invalid stream";
+
+    }
+    // state is now kOpen or kWriteError.
+    if (!IsToken(key)) // e.g. empty string or has spaces...
+      KALDI_ERR << "TableWriter: using invalid key " << key;
+    output_.Stream() << key << ' ';
+    if (!Holder::Write(output_.Stream(), opts_.binary, value)) {
+      KALDI_WARN << "TableWriter: write failure to "
+                 << PrintableWxfilename(archive_wxfilename_);
+      state_ = kWriteError;
+      return false;
+    }
+    if (state_ == kWriteError) return false;  // Even if this Write seems to have
+    // succeeded, we fail because a previous Write failed and the archive may be
+    // corrupted and unreadable.
+
+    if (opts_.flush)
+      Flush();
+    return true;
+  }
+
+  // Flush will flush any archive; it does not return error status,
+  //  any errors will be reported on the next Write or Close.
+  virtual void Flush() {
+    switch (state_) {
+      case kWriteError: case kOpen:
+        output_.Stream().flush();  // Don't check error status.
+        return;
+      default:
+        KALDI_WARN << "TableWriter: Flush called on not-open writer.";
+    }
+  }
+
+  virtual bool Close() {
+    if (!this->IsOpen() || !output_.IsOpen())
+      KALDI_ERR << "TableWriter: Close called on a stream that was not open." << this->IsOpen() << ", " << output_.IsOpen();
+    bool close_success = output_.Close();
+    if (!close_success) {
+      KALDI_WARN << "TableWriter: error closing stream: wspecifier is "
+                 << wspecifier_;
+      state_ = kUninitialized;
+      return false;
+    }
+    if (state_ == kWriteError) {
+      KALDI_WARN << "TableWriter: closing writer in error state: wspecifier is "
+                 << wspecifier_;
+      state_ = kUninitialized;
+      return false;
+    }
+    state_ = kUninitialized;
+    return true;
+  }
+
+  TableWriterArchiveImpl(): state_(kUninitialized) {}
+
+  // May throw on write error if Close was not called.
+  virtual ~TableWriterArchiveImpl() {
+    if (!IsOpen()) return;
+    else if (!Close())
+      KALDI_ERR << "At TableWriter destructor: Write failed or stream close "
+                << "failed: wspecifier is "<<  wspecifier_;
+  }
+
+ private:
+  Output output_;
+  WspecifierOptions opts_;
+  std::string wspecifier_;
+  std::string archive_wxfilename_;
+  enum {               // is stream open?
+    kUninitialized,    // no
+    kOpen,             // yes
+    kWriteError,       // yes
+  } state_;
+};
+
+
+
+
+// The implementation of TableWriter we use when writing to
+// individual files (more generally, wxfilenames) specified
+// in an scp file that we read.
+
+// Note: the code for this class is similar to RandomAccessTableReaderScriptImpl;
+// try to keep them in sync.
+
+template<class Holder>
+class TableWriterScriptImpl: public TableWriterImplBase<Holder> {
+ public:
+  typedef typename Holder::T T;
+
+  TableWriterScriptImpl(): last_found_(0), state_(kUninitialized) {}
+
+  virtual bool Open(const std::string &wspecifier) {
+    switch (state_) {
+      case kReadScript:
+        KALDI_ERR << " Opening already open TableWriter: call Close first.";
+      case kUninitialized: case kNotReadScript:
+        break;
+    }
+    wspecifier_ = wspecifier;
+    WspecifierType ws = ClassifyWspecifier(wspecifier,
+                                           NULL,
+                                           &script_rxfilename_,
+                                           &opts_);
+    KALDI_ASSERT(ws == kScriptWspecifier);  // or wrongly called.
+    KALDI_ASSERT(script_.empty());  // no way it could be nonempty at this point.
+
+    if (! ReadScriptFile(script_rxfilename_,
+                         true,  // print any warnings
+                         &script_)) {  // error reading script file or invalid format
+      state_ = kNotReadScript;
+      return false;  // no need to print further warnings.  user gets the error.
+    }
+    std::sort(script_.begin(), script_.end());
+    for (size_t i = 0; i+1 < script_.size(); i++) {
+      if (script_[i].first.compare(script_[i+1].first) >= 0) {
+        // script[i] not < script[i+1] in lexical order...
+        KALDI_WARN << "Script file " << PrintableRxfilename(script_rxfilename_)
+                   << " contains duplicate key " << script_[i].first;
+        state_ = kNotReadScript;
+        return false;
+      }
+    }
+    state_ = kReadScript;
+    return true;
+  }
+
+  virtual bool IsOpen() const {  return (state_ == kReadScript);  }
+
+  virtual bool Close() {
+    if (!IsOpen())
+      KALDI_ERR << "Close() called on TableWriter that was not open.";
+    state_ = kUninitialized;
+    last_found_ = 0;
+    script_.clear();
+    return true;
+  }
+
+  // Write returns true on success, false on failure, but
+  // some errors may not be detected till we call Close().
+  virtual bool Write(const std::string &key, const T &value) {
+    if (!IsOpen())
+      KALDI_ERR << "TableWriter: Write called on invalid stream";
+
+    if (!IsToken(key)) // e.g. empty string or has spaces...
+      KALDI_ERR << "TableWriter: using invalid key " << key;
+
+    std::string wxfilename;
+    if (!LookupFilename(key, &wxfilename)) {
+      if (opts_.permissive) {
+        return true; // In permissive mode, it's as if we're writing to /dev/null
+                     // for missing keys.
+      } else {
+        KALDI_WARN << "TableWriter: script file "
+                   << PrintableRxfilename(script_rxfilename_)
+                   << " has no entry for key "<<key;
+        return false;
+      }
+    }
+    Output output;
+    if (!output.Open(wxfilename, opts_.binary, false)) {
+      // Open in the text/binary mode (on Windows) given by member var. "binary"
+      // (obtained from wspecifier), but do not put the binary-mode header (it
+      // will be written, if needed, by the Holder::Write function.)
+      KALDI_WARN << "TableWriter: failed to open stream: "
+                 << PrintableWxfilename(wxfilename);
+      return false;
+    }
+    if (!Holder::Write(output.Stream(), opts_.binary, value)
+        || !output.Close()) {
+      KALDI_WARN << "TableWriter: failed to write data to "
+                 << PrintableWxfilename(wxfilename);
+      return false;
+    }
+    return true;
+  }
+
+  // Flush does nothing in this implementation, there is nothing to flush.
+  virtual void Flush() { }
+
+
+  virtual ~TableWriterScriptImpl() {
+    // Nothing to do in destructor.
+  }
+
+ private:
+  // Note: this function is almost the same as in RandomAccessTableReaderScriptImpl.
+  bool LookupFilename(const std::string &key, std::string *wxfilename) {
+    // First, an optimization: if we're going consecutively, this will
+    // make the lookup very fast.
+    last_found_++;
+    if (last_found_ < script_.size() && script_[last_found_].first == key) {
+      *wxfilename = script_[last_found_].second;
+      return true;
+    }
+    std::pair<std::string, std::string> pr(key, "");  // Important that ""
+    // compares less than or equal to any string, so lower_bound points to the
+    // element that has the same key.
+    typedef typename std::vector<std::pair<std::string, std::string> >::const_iterator 
+        IterType;
+    IterType iter = std::lower_bound(script_.begin(), script_.end(), pr);
+    if (iter != script_.end() && iter->first == key) {
+      last_found_ = iter - script_.begin();
+      *wxfilename = iter->second;
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+
+  WspecifierOptions opts_;
+  std::string wspecifier_;
+  std::string script_rxfilename_;
+
+  // the script_ variable contains pairs of (key, filename), sorted using
+  // std::sort.  This can be used with binary_search to look up filenames for
+  // writing.  If this becomes inefficient we can use std::unordered_map (but I
+  // suspect this wouldn't be significantly faster & would use more memory).
+  // If memory becomes a problem here, the user should probably be passing
+  // only the relevant part of the scp file rather than expecting us to get too
+  // clever in the code.
+  std::vector<std::pair<std::string, std::string> > script_;
+  size_t last_found_;  // This is for an optimization used in LookupFilename.
+
+  enum {
+    kUninitialized,
+    kReadScript,
+    kNotReadScript,  // read of script failed.
+  } state_;
+};
+
+
+// The implementation of TableWriter we use when writing directly
+// to an archive plus an associated scp.
+template<class Holder>
+class TableWriterBothImpl: public TableWriterImplBase<Holder> {
+ public:
+  typedef typename Holder::T T;
+
+  virtual bool Open(const std::string &wspecifier) {
+    switch (state_) {
+      case kUninitialized:
+        break;
+      case kWriteError:
+        KALDI_ERR << "TableWriter: opening stream, already open with write error.";
+      case kOpen: default:
+        if (!Close())  // throw because this error may not have been previously detected by user.
+          KALDI_ERR << "TableWriter: opening stream, error closing previously open stream.";
+    }
+    wspecifier_ = wspecifier;
+    WspecifierType ws = ClassifyWspecifier(wspecifier,
+                                           &archive_wxfilename_,
+                                           &script_wxfilename_,
+                                           &opts_);
+    KALDI_ASSERT(ws == kBothWspecifier);  // or wrongly called.
+    if (ClassifyWxfilename(archive_wxfilename_) != kFileOutput)
+      KALDI_WARN << "When writing to both archive and script, the script file "
+          "will generally not be interpreted correctly unless the archive is "
+          "an actual file: wspecifier = " << wspecifier;
+
+    if (!archive_output_.Open(archive_wxfilename_, opts_.binary, false)) {  // false means no binary header.
+      state_ = kUninitialized;
+      return false;
+    }
+    if (!script_output_.Open(script_wxfilename_, false, false)) {  // first false means text mode:
+      // script files always text-mode.   second false means don't write header (doesn't matter
+      // for text mode).
+      archive_output_.Close();  // Don't care about status: error anyway.
+      state_ = kUninitialized;
+      return false;
+    }
+    state_ = kOpen;
+    return true;
+  }
+
+  virtual bool IsOpen() const {
+    switch (state_) {
+      case kUninitialized: return false;
+      case kOpen: case kWriteError: return true;
+      default: KALDI_ERR << "IsOpen() called on TableWriter in invalid state.";
+    }
+    return false;
+  }
+
+  void MakeFilename(typename std::ostream::pos_type streampos, std::string *output) const {
+    std::ostringstream ss;
+    ss << ':' << streampos;
+    KALDI_ASSERT(ss.str() != ":-1");
+    *output = archive_wxfilename_ + ss.str();
+    
+    // e.g. /some/file:12302.
+    // Note that we warned if archive_wxfilename_ is not an actual filename;
+    // the philosophy is we give the user rope and if they want to hang
+    // themselves, with it, fine.
+  }
+
+  // Write returns true on success, false on failure, but
+  // some errors may not be detected till we call Close().
+  virtual bool Write(const std::string &key, const T &value) {
+    switch (state_) {
+      case kOpen: break;
+      case kWriteError:
+        // user should have known from the last
+        // call to Write that there was a problem.  Warn about it.
+        KALDI_WARN << "TableWriter: writing to non-open TableWriter object.";
+        return false;
+      case kUninitialized: default:
+        KALDI_ERR << "TableWriter: Write called on invalid stream";
+    }
+    // state is now kOpen or kWriteError.
+    if (!IsToken(key)) // e.g. empty string or has spaces...
+      KALDI_ERR << "TableWriter: using invalid key " << key;
+    std::ostream &archive_os = archive_output_.Stream();
+    archive_os << key << ' ';
+    typename std::ostream::pos_type archive_os_pos = archive_os.tellp();
+    // position at start of Write() to archive.  We will record this in the script file.
+    std::string offset_rxfilename;  // rxfilename with offset into the archive,
+    // e.g. some_archive_name.ark:431541423
+    MakeFilename(archive_os_pos, &offset_rxfilename);
+
+    // Write to the script file first.
+    // The idea is that we want to get all the information possible into the
+    // script file, to make it easier to unwind errors later.
+    std::ostream &script_os = script_output_.Stream();
+    script_output_.Stream() << key << ' ' << offset_rxfilename << '\n';
+
+    if (!Holder::Write(archive_output_.Stream(), opts_.binary, value)) {
+      KALDI_WARN << "TableWriter: write failure to"
+                 << PrintableWxfilename(archive_wxfilename_);
+      state_ = kWriteError;
+      return false;
+    }
+
+    if (script_os.fail()) {
+      KALDI_WARN << "TableWriter: write failure to script file detected: "
+                 << PrintableWxfilename(script_wxfilename_);
+      state_ = kWriteError;
+      return false;
+    }
+
+    if (archive_os.fail()) {
+      KALDI_WARN << "TableWriter: write failure to archive file detected: "
+                 << PrintableWxfilename(archive_wxfilename_);
+      state_ = kWriteError;
+      return false;
+    }
+
+    if (state_ == kWriteError) return false;  // Even if this Write seems to have
+    // succeeded, we fail because a previous Write failed and the archive may be
+    // corrupted and unreadable.
+
+    if (opts_.flush)
+      Flush();
+    return true;
+  }
+
+  // Flush will flush any archive; it does not return error status,
+  //  any errors will be reported on the next Write or Close.
+  virtual void Flush() {
+    switch (state_) {
+      case kWriteError: case kOpen:
+        archive_output_.Stream().flush();  // Don't check error status.
+        script_output_.Stream().flush();  // Don't check error status.
+        return;
+      default:
+        KALDI_WARN << "TableWriter: Flush called on not-open writer.";
+    }
+  }
+
+  virtual bool Close() {
+    if (!this->IsOpen())
+      KALDI_ERR << "TableWriter: Close called on a stream that was not open.";
+    bool close_success = true;
+    if (archive_output_.IsOpen())
+      if (!archive_output_.Close()) close_success = false;
+    if (script_output_.IsOpen())
+      if (!script_output_.Close()) close_success = false;
+    bool ans = close_success && (state_ != kWriteError);
+    state_ = kUninitialized;
+    return ans;
+  }
+
+  TableWriterBothImpl(): state_(kUninitialized) {}
+
+  // May throw on write error if Close() was not called.
+  // User can get the error status by calling Close().
+  virtual ~TableWriterBothImpl() {
+    if (!IsOpen()) return;
+    else if (!Close())
+      KALDI_ERR << "At TableWriter destructor: Write failed or stream close failed: "
+                << wspecifier_;
+  }
+
+ private:
+  Output archive_output_;
+  Output script_output_;
+  WspecifierOptions opts_;
+  std::string archive_wxfilename_;
+  std::string script_wxfilename_;
+  std::string wspecifier_;
+  enum {               // is stream open?
+    kUninitialized,    // no
+    kOpen,             // yes
+    kWriteError,       // yes
+  } state_;
+};
+
+
+template<class Holder>
+TableWriter<Holder>::TableWriter(const std::string &wspecifier): impl_(NULL) {
+  if (wspecifier != "" && !Open(wspecifier)) {
+    KALDI_ERR << "TableWriter: failed to write to "
+              << wspecifier;
+  }
+}
+
+template<class Holder>
+bool TableWriter<Holder>::IsOpen() const {
+  return (impl_ != NULL);
+}
+
+
+template<class Holder>
+bool TableWriter<Holder>::Open(const std::string &wspecifier) {
+
+  if (IsOpen()) {
+    if (!Close()) // call Close() yourself to suppress this exception.
+      KALDI_ERR << "TableWriter::Open, failed to close previously open writer.";
+  }
+  KALDI_ASSERT(impl_ == NULL);
+  WspecifierType wtype = ClassifyWspecifier(wspecifier, NULL, NULL, NULL);
+  switch (wtype) {
+    case kBothWspecifier:
+      impl_ = new TableWriterBothImpl<Holder>();
+      break;
+    case kArchiveWspecifier:
+      impl_ = new TableWriterArchiveImpl<Holder>();
+      break;
+    case kScriptWspecifier:
+      impl_ = new TableWriterScriptImpl<Holder>();
+      break;
+    case kNoWspecifier: default:
+      KALDI_WARN << "ClassifyWspecifier: invalid wspecifier " << wspecifier;
+      return false;
+  }
+  if (impl_->Open(wspecifier)) return true;
+  else {  // The class will have printed a more specific warning.
+    delete impl_;
+    impl_ = NULL;
+    return false;
+  }
+}
+
+template<class Holder>
+void TableWriter<Holder>::Write(const std::string &key,
+                                const T &value) const {
+  CheckImpl();
+  if (!impl_->Write(key, value))
+    KALDI_ERR << "Error in TableWriter::Write";
+  // More specific warning will have
+  // been printed in the Write function.
+}
+
+template<class Holder>
+void TableWriter<Holder>::Flush() {
+  CheckImpl();
+  impl_->Flush();
+}
+
+template<class Holder>
+bool TableWriter<Holder>::Close() {
+  CheckImpl();
+  bool ans = impl_->Close();
+  delete impl_;  // We don't keep around non-open impl_ objects [c.f. definition of IsOpen()]
+  impl_ = NULL;
+  return ans;
+}
+
+template<class Holder>
+TableWriter<Holder>::~TableWriter() {
+  if (IsOpen() && !Close()) {
+    KALDI_ERR << "Error closing TableWriter [in destructor].";
+  }
+}
+
+
+// Types of RandomAccessTableReader:
+// In principle, we would like to have four types of RandomAccessTableReader:
+//  the 4 combinations  [scp, archive], [seekable, not-seekable],
+// where if something is seekable we only store a file offset.  However,
+// it seems sufficient for now to only implement two of these, in both
+// cases assuming it's not seekable so we never store file offsets and always
+// store either the scp line or the data in the archive.  The reasons are:
+// (1)
+// For scp files, storing the actual entry is not that much more expensive
+// than storing the file offsets (since the entries are just filenames), and
+// avoids a lot of fseek operations that might be expensive.
+// (2)
+// For archive files, there is no real reason, if you have the archive file
+// on disk somewhere, why you wouldn't access it via its associated scp.
+// [i.e. write it as ark, scp].  The main reason to read archives directly
+// is if they are part of a pipe, and in this case it's not seekable, so
+// we implement only this case.
+//
+// Note that we will rarely in practice have to keep in memory everything in
+// the archive, as long as things are only read once from the archive (the
+// "o, " or "once" option) and as long as we keep our keys in sorted order; to take
+// advantage of this we need the "s, " (sorted) option, so we would read archives
+// as e.g. "s, o, ark:-" (this is the rspecifier we would use if it was the
+// standard input and these conditions held).
+
+template<class Holder> class RandomAccessTableReaderImplBase {
+ public:
+  typedef typename Holder::T T;
+
+  virtual bool Open(const std::string &rspecifier) = 0;
+
+  virtual bool HasKey(const std::string &key) = 0;
+
+  virtual const T &Value(const std::string &key) = 0;
+
+  virtual bool Close() = 0;
+
+  virtual ~RandomAccessTableReaderImplBase() {}
+};
+
+
+// Implementation of RandomAccessTableReader for a script file; for simplicity we
+// just read it in all in one go, as it's unlikely someone would generate this
+// from a pipe.  In principle we could read it on-demand as for the archives, but
+// this would probably be overkill.
+
+// Note: the code for this this class is similar to TableWriterScriptImpl:
+// try to keep them in sync.
+template<class Holder>
+class RandomAccessTableReaderScriptImpl:
+      public RandomAccessTableReaderImplBase<Holder> {
+
+ public:
+  typedef typename Holder::T T;
+
+  RandomAccessTableReaderScriptImpl(): last_found_(0), state_(kUninitialized) {}
+
+  virtual bool Open(const std::string &rspecifier) {
+    switch (state_) {
+      case kNotHaveObject: case kHaveObject: case kGaveObject:
+        KALDI_ERR << " Opening already open RandomAccessTableReader: call Close first.";
+      case kUninitialized: case kNotReadScript:
+        break;
+    }
+    rspecifier_ = rspecifier;
+    RspecifierType rs = ClassifyRspecifier(rspecifier,
+                                           &script_rxfilename_,
+                                           &opts_);
+    KALDI_ASSERT(rs == kScriptRspecifier);  // or wrongly called.
+    KALDI_ASSERT(script_.empty());  // no way it could be nonempty at this point.
+
+    if (! ReadScriptFile(script_rxfilename_,
+                        true,  // print any warnings
+                        &script_)) {  // error reading script file or invalid format
+      state_ = kNotReadScript;
+      return false;  // no need to print further warnings.  user gets the error.
+    }
+
+    rspecifier_ = rspecifier;
+    // If opts_.sorted, the user has asserted that the keys are already sorted.
+    // Although we could easily sort them, we want to let the user know of this
+    // mistake.  This same mistake could have serious effects if used with an
+    // archive rather than a script.
+    if (!opts_.sorted)
+      std::sort(script_.begin(), script_.end());
+    for (size_t i = 0; i+1 < script_.size(); i++) {
+      if (script_[i].first.compare(script_[i+1].first) >= 0) {
+        // script[i] not < script[i+1] in lexical order...
+        bool same = (script_[i].first == script_[i+1].first);
+        KALDI_WARN << "Script file " << PrintableRxfilename(script_rxfilename_)
+                   << (same ? " contains duplicate key: " :
+                       " is not sorted (remove s, option or add ns, option): key is ")
+                   << script_[i].first;
+        state_ = kNotReadScript;
+        return false;
+      }
+    }
+    state_ = kNotHaveObject;
+    return true;
+  }
+
+  virtual bool IsOpen() const {
+    return  (state_ == kNotHaveObject || state_ == kHaveObject ||
+             state_ == kGaveObject);
+  }
+
+  virtual bool Close() {
+    if (!IsOpen())
+      KALDI_ERR << "Close() called on RandomAccessTableReader that was not open.";
+    holder_.Clear();
+    state_ = kUninitialized;
+    last_found_ = 0;
+    script_.clear();
+    current_key_ = "";
+    // This one cannot fail because any errors of a "global"
+    // nature would have been detected when we did Open().
+    // With archives it's different.
+    return true;
+  }
+
+  virtual bool HasKey(const std::string &key) {
+    bool preload = opts_.permissive;
+    // In permissive mode, we have to check that we can read
+    // the scp entry before we assert that the key is there.
+    return HasKeyInternal(key, preload);
+  }
+
+
+  // Write returns true on success, false on failure, but
+  // some errors may not be detected till we call Close().
+  virtual const T&  Value(const std::string &key) {
+
+    if (!IsOpen())
+      KALDI_ERR << "Value() called on non-open object.";
+
+    if (!((state_ == kHaveObject || state_ == kGaveObject)
+          && key == current_key_)) {  // Not already stored...
+      bool has_key = HasKeyInternal(key, true);  // preload.
+      if (!has_key)
+        KALDI_ERR << "Could not get item for key " << key
+                  << ", rspecifier is " << rspecifier_ << "[to ignore this, "
+                  << "add the p, (permissive) option to the rspecifier.";
+      KALDI_ASSERT(state_ == kHaveObject && key == current_key_);
+    }
+
+    if (state_ == kHaveObject) {
+      state_ = kGaveObject;
+      if (opts_.once) MakeTombstone(key);  // make sure that future lookups fail.
+      return holder_.Value();
+    } else {  // state_ == kGaveObject
+      if (opts_.once)
+        KALDI_ERR << "Value called twice for the same key and ,o (once) option "
+                  << "is used: rspecifier is " << rspecifier_;
+      return holder_.Value();
+    }
+  }
+
+  virtual ~RandomAccessTableReaderScriptImpl() {
+    if (state_ == kHaveObject || state_ == kGaveObject)
+      holder_.Clear();
+  }
+
+ private:
+  // HasKeyInternal when called with preload == false just tells us whether the
+  // key is in the scp.  With preload == true, which happens when the ,p
+  // (permissive) option is given in the rspecifier, it will also check that we
+  // can preload the object from disk (loading from the rxfilename in the scp),
+  // and only return true if we can.  This function is called both from HasKey
+  // and from Value().
+  virtual bool HasKeyInternal(const std::string &key, bool preload) {
+    switch (state_) {
+      case kUninitialized: case kNotReadScript:
+        KALDI_ERR << "HasKey called on RandomAccessTableReader object that is not open.";
+      case kHaveObject: case kGaveObject:
+        if (key == current_key_)
+          return true;
+        break;
+      default: break;
+    }
+    KALDI_ASSERT(IsToken(key));
+    size_t key_pos = 0; // set to zero to suppress warning
+    bool ans = LookupKey(key, &key_pos);
+    if (!ans) return false;
+    else {
+      // First do a check regarding the "once" option.
+      if (opts_.once && script_[key_pos].second == "") {  // A "tombstone"; user is asking about
+        // already-read key.
+        KALDI_ERR << "HasKey called on key whose value was already read, and "
+            " you specified the \"once\" option (o, ): try removing o, or adding no, :"
+            " rspecifier is " << rspecifier_;
+      }
+      if (!preload)
+        return true;  // we have the key.
+      else {  // preload specified, so we have to pre-load the object before returning true.
+        if (!input_.Open(script_[key_pos].second)) {
+          KALDI_WARN << "Error opening stream "
+                     << PrintableRxfilename(script_[key_pos].second);
+          return false;
+        } else {
+          // Make sure holder empty.
+          if (state_ == kHaveObject || state_ == kGaveObject)
+            holder_.Clear();
+          if (holder_.Read(input_.Stream())) {
+            state_ = kHaveObject;
+            current_key_ = key;
+            return true;
+          } else {
+            KALDI_WARN << "Error reading object from "
+                "stream " << PrintableRxfilename(script_[key_pos].second);
+            state_ = kNotHaveObject;
+            return false;
+          }
+        }
+      }
+    }
+  }
+  void MakeTombstone(const std::string &key) {
+    size_t offset;
+    if (!LookupKey(key, &offset))
+      KALDI_ERR << "RandomAccessTableReader object in inconsistent state.";
+    else
+      script_[offset].second = "";
+  }
+  bool LookupKey(const std::string &key, size_t *script_offset) {
+    // First, an optimization: if we're going consecutively, this will
+    // make the lookup very fast.  Since we may call HasKey and then
+    // Value(), which both may look up the key, we test if either the
+    // current or next position are correct.
+    if (last_found_ < script_.size() && script_[last_found_].first == key) {
+      *script_offset = last_found_;
+      return true;
+    }
+    last_found_++;
+    if (last_found_ < script_.size() && script_[last_found_].first == key) {
+      *script_offset = last_found_;
+      return true;
+    }
+    std::pair<std::string, std::string> pr(key, "");  // Important that ""
+    // compares less than or equal to any string, so lower_bound points to the
+    // element that has the same key.
+    typedef typename std::vector<std::pair<std::string, std::string> >::const_iterator 
+        IterType;
+    IterType iter = std::lower_bound(script_.begin(), script_.end(), pr);
+    if (iter != script_.end() && iter->first == key) {
+      last_found_ = *script_offset = iter - script_.begin();
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+
+  Input input_;  // Use the same input_ object for reading each file, in case
+  // the scp specifies offsets in an archive (so we can keep the same file open).
+  RspecifierOptions opts_;
+  std::string rspecifier_;  // rspecifier used to open it; used in debug messages
+  std::string script_rxfilename_;  // filename of script.
+
+  std::string current_key_;  // Key of object in holder_
+  Holder holder_;
+
+  // the script_ variable contains pairs of (key, filename), sorted using
+  // std::sort.  This can be used with binary_search to look up filenames for
+  // writing.  If this becomes inefficient we can use std::unordered_map (but I
+  // suspect this wouldn't be significantly faster & would use more memory).
+  // If memory becomes a problem here, the user should probably be passing
+  // only the relevant part of the scp file rather than expecting us to get too
+  // clever in the code.
+  std::vector<std::pair<std::string, std::string> > script_;
+  size_t last_found_;  // This is for an optimization used in FindFilename.
+
+  enum {  //           [Do we have          [Does holder_
+    //                script_ set up?]      contain object?]
+    kUninitialized,  //     no                     no
+    kNotReadScript,  //     no                     no
+    kNotHaveObject,  //     yes                    no
+    kHaveObject,   //     yes                    yes
+    kGaveObject,   //     yes                    yes
+    // [kGaveObject is as kHaveObject but we note that the
+    //  user has already read it; this is for checking that
+    // if "once" is specified, the user actually only reads
+    // it once.
+  } state_;
+
+};
+
+
+
+
+// This is the base-class (with some implemented functions) for the
+// implementations of RandomAccessTableReader when it's an archive.  This
+// base-class handles opening the files, storing the state of the reading
+// process, and loading objects.  This is the only case in which we have
+// an intermediate class in the hierarchy between the virtual ImplBase
+// class and the actual Impl classes.
+// The child classes vary in the assumptions regarding sorting, etc.
+
+template<class Holder>  class RandomAccessTableReaderArchiveImplBase:
+      public RandomAccessTableReaderImplBase<Holder> {
+ public:
+  typedef typename Holder::T T;
+
+  RandomAccessTableReaderArchiveImplBase(): holder_(NULL), state_(kUninitialized) { }
+
+  virtual bool Open(const std::string &rspecifier) {
+    if (state_ != kUninitialized) {
+      if (! this->Close()) // call Close() yourself to suppress this exception.
+        KALDI_ERR << "TableReader::Open, error closing previous input.";
+    }
+    rspecifier_ = rspecifier;
+    RspecifierType rs = ClassifyRspecifier(rspecifier, &archive_rxfilename_,
+                                           &opts_);
+    KALDI_ASSERT(rs == kArchiveRspecifier);
+
+    // NULL means don't expect binary-mode header
+    bool ans;
+    if (Holder::IsReadInBinary())
+      ans = input_.Open(archive_rxfilename_, NULL);
+    else
+      ans = input_.OpenTextMode(archive_rxfilename_);
+    if (!ans) {  // header.
+      KALDI_WARN << "TableReader: failed to open stream "
+                 << PrintableRxfilename(archive_rxfilename_);
+      state_ = kUninitialized;  // Failure on Open
+      return false;  // User should print the error message.
+    } else {
+      state_ = kNoObject;
+    }
+    return true;
+  }
+
+  // ReadNextObject() requires that the state be kNoObject,
+  // and it will try read the next object.  If it succeeds,
+  // it sets the state to kHaveObject, and
+  // cur_key_ and holder_ have the key and value.  If it fails,
+  // it sets the state to kError or kEof.
+  void ReadNextObject() {
+    if (state_ != kNoObject)
+      KALDI_ERR << "TableReader: ReadNextObject() called from wrong state.";  // Code error
+    // somewhere in this class or a child class.
+    std::istream &is = input_.Stream();
+    is.clear();  // Clear any fail bits that may have been set... just in case
+    // this happened in the Read function.
+    is >> cur_key_;  // This eats up any leading whitespace and gets the string.
+    if (is.eof()) {
+      state_ = kEof;
+      return;
+    }
+    if (is.fail()) {  // This shouldn't really happen, barring file-system errors.
+      KALDI_WARN << "Error reading archive: rspecifier is " << rspecifier_;
+      state_ = kError;
+      return;
+    }
+    int c;
+    if ((c = is.peek()) != ' ' && c != '\t' && c != '\n') {  // We expect a space ' ' after the key.
+      // We also allow tab, just so we can read archives generated by scripts that may
+      // not be fully aware of how this format works.
+      KALDI_WARN << "Invalid archive file format: expected space after key " <<cur_key_
+                 <<", got character "
+                 << CharToString(static_cast<char>(is.peek())) << ", reading archive "
+                 << PrintableRxfilename(archive_rxfilename_);
+      state_ = kError;
+      return;
+    }
+    if (c != '\n') is.get();  // Consume the space or tab.
+    holder_ = new Holder;
+    if (holder_->Read(is)) {
+      state_ = kHaveObject;
+      return;
+    } else {
+      KALDI_WARN << "Object read failed, reading archive "
+                 << PrintableRxfilename(archive_rxfilename_);
+      state_ = kError;
+      delete holder_;
+      holder_ = NULL;
+      return;
+    }
+  }
+
+  virtual bool IsOpen() const {
+    switch (state_) {
+      case kEof: case kError: case kHaveObject: case kNoObject: return true;
+      case kUninitialized: return false;
+      default: KALDI_ERR << "IsOpen() called on invalid object.";
+        return false;
+    }
+  }
+
+  // Called by the child-class virutal Close() functions; does the
+  // shared parts of the cleanup.
+  bool CloseInternal() {
+    if (! this->IsOpen())
+      KALDI_ERR << "Close() called on TableReader twice or otherwise wrongly.";
+    if (input_.IsOpen())
+      input_.Close();
+    if (state_ == kHaveObject) {
+      KALDI_ASSERT(holder_ != NULL);
+      delete holder_;
+      holder_ = NULL;
+    } else KALDI_ASSERT(holder_ == NULL);
+    bool ans = (state_ != kError);
+    state_ = kUninitialized;
+    if (!ans && opts_.permissive) {
+      KALDI_WARN << "Error state detected closing reader.  "
+                 << "Ignoring it because you specified permissive mode.";
+      return true;
+    }
+    return ans;
+  }
+
+  ~RandomAccessTableReaderArchiveImplBase() {
+    // The child class has the responsibility to call CloseInternal().
+    KALDI_ASSERT(state_ == kUninitialized && holder_ == NULL);
+  }
+ private:
+  Input input_;       // Input object for the archive
+ protected:
+  // The variables below are accessed by child classes.
+
+  std::string cur_key_;   // current key (if state == kHaveObject).
+  Holder *holder_;     // Holds the object we just read (if state == kHaveObject)
+
+  std::string rspecifier_;
+  std::string archive_rxfilename_;
+  RspecifierOptions opts_;
+
+  enum {  //  [The state of the reading process]               [does holder_ [is input_
+    //                                                         have object]   open]
+    kUninitialized,  // Uninitialized or closed                     no         no
+    kNoObject,      // Do not have object in holder_               no         yes
+    kHaveObject,    // Have object in holder_                      yes        yes
+    kEof,           // End of file                                 no         yes
+    kError,         // Some kind of error-state in the reading.    no         yes
+  } state_;
+
+};
+
+
+// RandomAccessTableReaderDSortedArchiveImpl (DSorted for "doubly sorted") is the
+// implementation for random-access reading of archives when both the archive,
+// and the calling code, are in sorted order (i.e. we ask for the keys in sorted
+// order).  This is when the s and cs options are both given.  It only ever has
+// to keep one object in memory.  It inherits from
+// RandomAccessTableReaderArchiveImplBase which implements the common parts of
+// RandomAccessTableReader that are used when it's an archive we're reading from.
+
+template<class Holder>  class RandomAccessTableReaderDSortedArchiveImpl:
+      public RandomAccessTableReaderArchiveImplBase<Holder> {
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kEof;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kError;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::state_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::opts_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::holder_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject;
+ public:
+  typedef typename Holder::T T;
+
+  RandomAccessTableReaderDSortedArchiveImpl() { }
+
+  virtual bool Close() {
+    // We don't have anything additional to clean up, so just
+    // call generic base-class one.
+    return this->CloseInternal();
+  }
+
+  virtual bool HasKey(const std::string &key) {
+    return FindKeyInternal(key);
+  }
+  virtual const T & Value(const std::string &key) {
+    if (FindKeyInternal(key)) {
+      KALDI_ASSERT(this->state_ == kHaveObject && key == this->cur_key_
+                   && holder_ != NULL);
+      return this->holder_->Value();
+    } else {
+      KALDI_ERR << "Value() called but no such key " << key
+                << " in archive " << PrintableRxfilename(archive_rxfilename_);
+      return *(const T*)NULL;  // keep compiler happy.
+    }
+  }
+
+  virtual ~RandomAccessTableReaderDSortedArchiveImpl() {
+    if (this->IsOpen())
+      if (!Close()) // more specific warning will already have been printed.
+        // we are in some kind of error state & user did not find out by
+        // calling Close().
+        KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is "
+                  << rspecifier_;
+  }
+ private:
+  // FindKeyInternal tries to find the key by calling "ReadNextObject()"
+  // as many times as necessary till we get to it.  It is called from
+  // both FindKey and Value().
+  bool FindKeyInternal(const std::string &key) {
+    // First check that the user is calling us right: should be
+    // in sorted order.  If not, error.
+    if (!last_requested_key_.empty()) {
+      if (key.compare(last_requested_key_) < 0) {  // key < last_requested_key_
+        KALDI_ERR << "You provided the \"cs\" option "
+                  << "but are not calling with keys in sorted order: "
+                  << key << " < " << last_requested_key_ << ": rspecifier is "
+                  << rspecifier_;
+      }
+    }
+    // last_requested_key_ is just for debugging of order of calling.
+    last_requested_key_ = key;
+
+    if (state_ == kNoObject)
+      ReadNextObject();  // This can only happen
+      // once, the first time someone calls HasKey() or Value().  We don't
+      // do it in the initializer to stop the program hanging too soon,
+      // if reading from a pipe.
+
+    if (state_ == kEof || state_ == kError) return false;
+
+    if (state_ == kUninitialized)
+      KALDI_ERR << "Trying to access a RandomAccessTableReader object that is not open.";
+
+    std::string last_key_;  // To check that
+    // the archive we're reading is in sorted order.
+    while (1) {
+      KALDI_ASSERT(state_ == kHaveObject);
+      int compare = key.compare(cur_key_);
+      if (compare == 0) {  // key == key_
+        return true;  // we got it..
+      } else if (compare < 0) {  // key < cur_key_, so we already read past the
+        // place where we want to be.  This implies that we will never find it
+        // [due to the sorting etc., this means it just isn't in the archive].
+        return false;
+      } else {  // compare > 0, key > cur_key_.  We need to read further ahead.
+        last_key_ = cur_key_;
+        // read next object.. we have to set state to kNoObject first.
+        KALDI_ASSERT(holder_ != NULL);
+        delete holder_;
+        holder_ = NULL;
+        state_ = kNoObject;
+        ReadNextObject();
+        if (state_ != kHaveObject)
+          return false;  // eof or read error.
+        if (cur_key_.compare(last_key_) <= 0) {
+          KALDI_ERR << "You provided the \"s\" option "
+                    << " (sorted order), but keys are out of order or duplicated: "
+                    << last_key_ << " is followed by " << cur_key_
+                    << ": rspecifier is " << rspecifier_;
+        }
+      }
+    }
+  }
+
+  /// Last string provided to HasKey() or Value();
+  std::string last_requested_key_;
+
+
+};
+
+// RandomAccessTableReaderSortedArchiveImpl is for random-access reading of
+// archives when the user specified the sorted (s) option but not the
+// called-sorted (cs) options.
+template<class Holder>  class RandomAccessTableReaderSortedArchiveImpl:
+      public RandomAccessTableReaderArchiveImplBase<Holder> {
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kEof;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kError;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::state_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::opts_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::holder_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject;
+
+ public:
+  typedef typename Holder::T T;
+
+  RandomAccessTableReaderSortedArchiveImpl():
+      last_found_index_(static_cast<size_t>(-1)),
+      pending_delete_(static_cast<size_t>(-1)) { }
+
+  virtual bool Close() {
+    for (size_t i = 0; i < seen_pairs_.size(); i++)
+      if (seen_pairs_[i].second)
+        delete seen_pairs_[i].second;
+    seen_pairs_.clear();
+
+    pending_delete_ = static_cast<size_t>(-1);
+    last_found_index_ = static_cast<size_t>(-1);
+
+    return this->CloseInternal();
+  }
+  virtual bool HasKey(const std::string &key) {
+    HandlePendingDelete();
+    size_t index;
+    bool ans = FindKeyInternal(key, &index);
+    if (ans && opts_.once && seen_pairs_[index].second == NULL) {
+      // Just do a check RE the once option. "&&opts_.once" is for
+      // efficiency since this can only happen in that case.
+      KALDI_ERR << "Error: HasKey called after Value() already called for "
+                << " that key, and once (o) option specified: rspecifier is "
+                << rspecifier_;
+    }
+    return ans;
+  }
+  virtual const T & Value(const std::string &key) {
+    HandlePendingDelete();
+    size_t index;
+    if (FindKeyInternal(key, &index)) {
+      if (seen_pairs_[index].second == NULL) {  // can happen if opts.once_
+        KALDI_ERR << "Error: Value() called more than once for key "
+                  << key << " and once (o) option specified: rspecifier is "
+                  << rspecifier_;
+      }
+      if (opts_.once)
+        pending_delete_ = index;  // mark this index to be deleted on next call.
+      return seen_pairs_[index].second->Value();
+    } else {
+      KALDI_ERR << "Value() called but no such key " << key
+                << " in archive " << PrintableRxfilename(archive_rxfilename_);
+      return *(const T*)NULL;  // keep compiler happy.
+    }
+  }
+  virtual ~RandomAccessTableReaderSortedArchiveImpl() {
+    if (this->IsOpen())
+      if (!Close()) // more specific warning will already have been printed.
+        // we are in some kind of error state & user did not find out by
+        // calling Close().
+        KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is "
+                  << rspecifier_;
+  }
+ private:
+  void HandlePendingDelete() {
+    const size_t npos = static_cast<size_t>(-1);
+    if (pending_delete_ != npos) {
+      KALDI_ASSERT(pending_delete_ < seen_pairs_.size());
+      KALDI_ASSERT(seen_pairs_[pending_delete_].second != NULL);
+      delete seen_pairs_[pending_delete_].second;
+      seen_pairs_[pending_delete_].second = NULL;
+      pending_delete_ = npos;
+    }
+  }
+
+  // FindKeyInternal tries to find the key in the array "seen_pairs_".
+  // If it is not already there, it reads ahead as far as necessary
+  // to determine whether we have the key or not.  On success it returns
+  // true and puts the index into the array seen_pairs_, into "index";
+  // on failure it returns false.
+  // It will leave the state as either kNoObject, kEof or kError.
+  // FindKeyInternal does not do any checking about whether you are asking
+  // about a key that has been already given (with the "once" option).
+  // That is the user's responsibility.
+
+  bool FindKeyInternal(const std::string &key, size_t *index) {
+    // First, an optimization in case the previous call was for the
+    // same key, and we found it.
+    if (last_found_index_ < seen_pairs_.size()
+       && seen_pairs_[last_found_index_].first == key) {
+      *index = last_found_index_;
+      return true;
+    }
+
+    if (state_ == kUninitialized)
+      KALDI_ERR << "Trying to access a RandomAccessTableReader object that is not open.";
+    
+    // Step one is to see whether we have to read ahead for the object..
+    // Note, the possible states right now are kNoObject, kEof or kError.
+    // We are never in the state kHaveObject except just after calling
+    // ReadNextObject().
+    bool looped = false;
+    while (state_ == kNoObject &&
+          (seen_pairs_.empty() || key.compare(seen_pairs_.back().first) > 0)) {
+      looped = true;
+      // Read this as:
+      //  while ( the stream is potentially good for reading &&
+      //        ([got no keys] || key > most_recent_key) ) { ...
+      //     Try to read a new object.
+      // Note that the keys in seen_pairs_ are ordered from least to greatest.
+      ReadNextObject();
+      if (state_ == kHaveObject) {  // Successfully read object.
+        if (!seen_pairs_.empty() && // This is just a check.
+           cur_key_.compare(seen_pairs_.back().first) <= 0) {
+          // read the expression above as: !( cur_key_ > previous_key).
+          // it means we are not in sorted order [the user specified that we
+          // are, or we would not be using this implementation].
+          KALDI_ERR << "You provided the sorted (s) option but keys in archive "
+                    << PrintableRxfilename(archive_rxfilename_) << " are not "
+                    << "in sorted order: " << seen_pairs_.back().first
+                    << " is followed by " << cur_key_;
+        }
+        KALDI_ASSERT(holder_ != NULL);
+        seen_pairs_.push_back(std::make_pair(cur_key_, holder_));
+        holder_ = NULL;
+        state_ = kNoObject;
+      }
+    }
+    if (looped) {  // We only need to check the last element of the seen_pairs_ array,
+      // since we would not have read more after getting "key".
+      if (!seen_pairs_.empty() && seen_pairs_.back().first == key) {
+        last_found_index_ = *index = seen_pairs_.size() - 1;
+        return true;
+      } else return false;
+    }
+    // Now we have do an actual binary search in the seen_pairs_ array.
+    std::pair<std::string, Holder*> pr(key, static_cast<Holder*>(NULL));
+    typename std::vector<std::pair<std::string, Holder*> >::iterator
+        iter = std::lower_bound(seen_pairs_.begin(), seen_pairs_.end(),
+                                pr, PairCompare());
+    if (iter != seen_pairs_.end() &&
+       key == iter->first) {
+      last_found_index_ = *index = (iter - seen_pairs_.begin());
+      return true;
+    } else return false;
+  }
+
+  // These are the pairs of (key, object) we have read.  We keep all the keys we
+  // have read but the actual objects (if they are stored with pointers inside
+  // the Holder object) may be deallocated if once == true, and the Holder
+  // pointer set to NULL.
+  std::vector<std::pair<std::string, Holder*> > seen_pairs_;
+  size_t last_found_index_;  // An optimization s.t. if FindKeyInternal called twice with
+  // same key (as it often will), it doesn't have to do the key search twice.
+  size_t pending_delete_;  // If opts_.once == true, this is the index of
+  // element of seen_pairs_ that is pending deletion.
+  struct PairCompare {
+    // PairCompare is the Less-than operator for the pairs of(key, Holder).
+    // compares the keys.
+    inline bool operator() (const std::pair<std::string, Holder*> &pr1,
+                            const std::pair<std::string, Holder*> &pr2) {
+      return  (pr1.first.compare(pr2.first) < 0);
+    }
+  };
+};
+
+
+
+// RandomAccessTableReaderUnsortedArchiveImpl is for random-access reading of
+// archives when the user does not specify the sorted (s) option (in this case
+// the called-sorted, or "cs" option, is ignored).  This is the least efficient
+// of the random access archive readers, in general, but it can be as efficient
+// as the others, in speed, memory and latency, if the "once" option is specified
+// and it happens that the keys of the archive are the same as the keys the code
+// is called with (to HasKey() and Value()), and in the same order.  However, if
+// you ask it for a key that's not present it will have to read the archive till
+// the end and store it all in memory.
+
+template<class Holder>  class RandomAccessTableReaderUnsortedArchiveImpl:
+      public RandomAccessTableReaderArchiveImplBase<Holder> {
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kEof;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::kError;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::state_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::opts_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::holder_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_;
+  using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject;
+
+  typedef typename Holder::T T;
+
+ public:
+  RandomAccessTableReaderUnsortedArchiveImpl(): to_delete_iter_(map_.end()),
+                                                to_delete_iter_valid_(false)
+                                                 {
+    map_.max_load_factor(0.5);  // make it quite empty -> quite efficient.
+    // default seems to be 1.
+  }
+
+  virtual bool Close() {
+    for (typename MapType::iterator iter = map_.begin();
+        iter != map_.end();
+        ++iter) {
+      if (iter->second)
+        delete iter->second;
+    }
+    map_.clear();
+    first_deleted_string_ = "";
+    to_delete_iter_valid_ = false;
+    return this->CloseInternal();
+  }
+
+  virtual bool HasKey(const std::string &key) {
+    HandlePendingDelete();
+    return FindKeyInternal(key, NULL);
+  }
+  virtual const T & Value(const std::string &key) {
+    HandlePendingDelete();
+    const T *ans_ptr = NULL;
+    if (FindKeyInternal(key, &ans_ptr))
+      return *ans_ptr;
+    else
+      KALDI_ERR << "Value() called but no such key " << key
+                << " in archive " << PrintableRxfilename(archive_rxfilename_);
+    return *(const T*)NULL;  // keep compiler happy.
+  }
+  virtual ~RandomAccessTableReaderUnsortedArchiveImpl() {
+    if (this->IsOpen())
+      if (!Close()) // more specific warning will already have been printed.
+        // we are in some kind of error state & user did not find out by
+        // calling Close().
+        KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is "
+                  << rspecifier_;
+  }
+ private:
+  void HandlePendingDelete() {
+    if (to_delete_iter_valid_) {
+      to_delete_iter_valid_ = false;
+      delete to_delete_iter_->second;  // Delete Holder object.
+      if (first_deleted_string_.length() == 0)
+        first_deleted_string_ = to_delete_iter_->first;
+      map_.erase(to_delete_iter_);  // delete that element.
+    }
+  }
+
+  // FindKeyInternal tries to find the key in the map "map_"
+  // If it is not already there, it reads ahead either until it finds the
+  // key, or until end of file.  If called with value_ptr == NULL,
+  // it assumes it's called from HasKey() and just returns true or false
+  // and doesn't otherwise have side effects.  If called with value_ptr != 
+  // NULL, it assumes it's called from Value().  Thus, it will crash
+  // if it cannot find the key.  If it can find it it puts its address in
+  // *value_ptr, and if opts_once == true it will mark that element of the
+  // map to be deleted.
+
+  bool FindKeyInternal(const std::string &key, const T **value_ptr = NULL) {
+    typename MapType::iterator iter = map_.find(key);
+    if (iter != map_.end()) {  // Found in the map...
+      if (value_ptr == NULL) {  // called from HasKey
+        return true;  // this is all we have to do.
+      } else {
+        *value_ptr = &(iter->second->Value());
+        if (opts_.once) {  // value won't be needed again, so mark
+          // for deletion.
+          to_delete_iter_ = iter;  // pending delete.
+          KALDI_ASSERT(!to_delete_iter_valid_);
+          to_delete_iter_valid_ = true;
+        }
+        return true;
+      }
+    }
+    while (state_ == kNoObject) {
+      ReadNextObject();
+      if (state_ == kHaveObject) {  // Successfully read object.
+        state_ = kNoObject;  // we are about to transfer ownership
+        // of the object in holder_ to map_.
+        // Insert it into map_.
+        std::pair<typename MapType::iterator, bool> pr =
+            map_.insert(typename MapType::value_type(cur_key_, holder_));
+
+        if (!pr.second) {  // Was not inserted-- previous element w/ same key
+          delete holder_;  // map was not changed, no ownership transferred.
+          holder_ = NULL;
+          KALDI_ERR << "Error in RandomAccessTableReader: duplicate key "
+                    << cur_key_ << " in archive " << archive_rxfilename_;
+        }
+        holder_ = NULL;  // ownership transferred to map_.
+        if (cur_key_ == key) {  // the one we wanted..
+          if (value_ptr == NULL) {  // called from HasKey
+            return true;
+          } else {  // called from Value()
+            *value_ptr = &(pr.first->second->Value());  // this gives us the
+            // Value() from the Holder in the map.
+            if (opts_.once) {  // mark for deletion, as won't be needed again.
+              to_delete_iter_ = pr.first;
+              KALDI_ASSERT(!to_delete_iter_valid_);
+              to_delete_iter_valid_ = true;
+            }
+            return true;
+          }
+        }
+      }
+    }
+    if (opts_.once && key == first_deleted_string_) {
+      KALDI_ERR << "You specified the once (o) option but "
+                << "you are calling using key " << key
+                << " more than once: rspecifier is " << rspecifier_;
+    }
+    return false;  // We read the entire archive (or got to error state) and didn't
+    // find it.
+  }
+
+  typedef unordered_map<std::string, Holder*, StringHasher>  MapType;
+  MapType map_;
+
+  typename MapType::iterator to_delete_iter_;
+  bool to_delete_iter_valid_;
+
+  std::string first_deleted_string_;  // keep the first string we deleted
+  // from map_ (if opts_.once == true).  It's for an inexact spot-check that the
+  // "once" option isn't being used incorrectly.
+
+};
+
+
+
+
+
+template<class Holder>
+RandomAccessTableReader<Holder>::RandomAccessTableReader(const std::string &rspecifier):
+    impl_(NULL) {
+  if (rspecifier != "" && !Open(rspecifier))
+    KALDI_ERR << "Error opening RandomAccessTableReader object "
+        " (rspecifier is: " << rspecifier << ")";
+}
+
+template<class Holder>
+bool RandomAccessTableReader<Holder>::Open(const std::string &rspecifier) {
+  if (IsOpen())
+    KALDI_ERR << "Already open.";
+  RspecifierOptions opts;
+  RspecifierType rs = ClassifyRspecifier(rspecifier, NULL, &opts);
+  switch (rs) {
+    case kScriptRspecifier:
+      impl_ = new RandomAccessTableReaderScriptImpl<Holder>();
+      break;
+    case kArchiveRspecifier:
+      if (opts.sorted) {
+        if (opts.called_sorted) // "doubly" sorted case.
+          impl_ = new RandomAccessTableReaderDSortedArchiveImpl<Holder>();
+        else
+          impl_ = new RandomAccessTableReaderSortedArchiveImpl<Holder>();
+      } else impl_ = new RandomAccessTableReaderUnsortedArchiveImpl<Holder>();
+      break;
+    case kNoRspecifier: default:
+      KALDI_WARN << "Invalid rspecifier: "
+                 << rspecifier;
+      return false;
+  }
+  if (impl_->Open(rspecifier))
+    return true;
+  else {
+    // Warning will already have been printed.
+    delete impl_;
+    impl_ = NULL;
+    return false;
+  }
+}
+
+template<class Holder>
+bool RandomAccessTableReader<Holder>::HasKey(const std::string &key) {
+  CheckImpl();
+  if (!IsToken(key))
+    KALDI_ERR << "Invalid key \"" << key << '"';
+  return impl_->HasKey(key);
+}
+
+
+template<class Holder>
+const typename RandomAccessTableReader<Holder>::T&
+RandomAccessTableReader<Holder>::Value(const std::string &key) {
+  CheckImpl();  
+  return impl_->Value(key);
+}
+
+template<class Holder>
+bool RandomAccessTableReader<Holder>::Close() {
+  CheckImpl();
+  bool ans =impl_->Close();
+  delete impl_;
+  impl_ = NULL;
+  return ans;
+}
+
+template<class Holder>
+RandomAccessTableReader<Holder>::~RandomAccessTableReader() {
+  if (IsOpen() && !Close()) // call Close() yourself to stop this being thrown.
+    KALDI_ERR << "failure detected in destructor.";
+}
+
+template<class Holder>
+void SequentialTableReader<Holder>::CheckImpl() const {
+  if (!impl_) {
+    KALDI_ERR << "Trying to use empty SequentialTableReader (perhaps you "
+              << "passed the empty string as an argument to a program?)";
+  }
+}
+
+template<class Holder>
+void RandomAccessTableReader<Holder>::CheckImpl() const {
+  if (!impl_) {
+    KALDI_ERR << "Trying to use empty RandomAccessTableReader (perhaps you "
+              << "passed the empty string as an argument to a program?)";
+  }
+}
+
+template<class Holder>
+void TableWriter<Holder>::CheckImpl() const {
+  if (!impl_) {
+    KALDI_ERR << "Trying to use empty TableWriter (perhaps you "
+              << "passed the empty string as an argument to a program?)";
+  }
+}
+
+template<class Holder>
+RandomAccessTableReaderMapped<Holder>::RandomAccessTableReaderMapped(
+    const std::string &table_rxfilename,
+    const std::string &utt2spk_rxfilename):
+    reader_(table_rxfilename), token_reader_(table_rxfilename.empty() ? "" :
+                                             utt2spk_rxfilename),
+    utt2spk_rxfilename_(utt2spk_rxfilename) { }
+
+template<class Holder>
+bool RandomAccessTableReaderMapped<Holder>::Open(
+    const std::string &table_rxfilename,
+    const std::string &utt2spk_rxfilename) {
+  if (reader_.IsOpen()) reader_.Close();
+  if (token_reader_.IsOpen()) token_reader_.Close();
+  KALDI_ASSERT(!table_rxfilename.empty());
+  if (!reader_.Open(table_rxfilename)) return false; // will have printed
+  // warning internally, probably.
+  if (!utt2spk_rxfilename.empty()) {
+    if (!token_reader_.Open(utt2spk_rxfilename)) {
+      reader_.Close();
+      return false;
+    }
+  }
+  return true;
+}
+
+
+template<class Holder>
+bool RandomAccessTableReaderMapped<Holder>::HasKey(const std::string &utt) {
+  // We don't check IsOpen, we let the call go through to the member variable
+  // (reader_), which will crash with a more informative error message than
+  // we can give here, as we don't any longer know the rxfilename.
+  if (token_reader_.IsOpen()) { // We need to map the key from utt to spk.
+    if (!token_reader_.HasKey(utt))
+      KALDI_ERR << "Attempting to read key " << utt << ", which is not present "
+                << "in utt2spk map or similar map being read from "
+                << PrintableRxfilename(utt2spk_rxfilename_);
+    const std::string &spk = token_reader_.Value(utt);
+    return reader_.HasKey(spk);
+  } else {
+    return reader_.HasKey(utt);
+  }
+}
+
+template<class Holder>
+const typename Holder::T& RandomAccessTableReaderMapped<Holder>::Value(
+    const std::string &utt) {
+  if (token_reader_.IsOpen()) { // We need to map the key from utt to spk.
+    if (!token_reader_.HasKey(utt))
+      KALDI_ERR << "Attempting to read key " << utt << ", which is not present "
+                << "in utt2spk map or similar map being read from "
+                << PrintableRxfilename(utt2spk_rxfilename_);
+    const std::string &spk = token_reader_.Value(utt);
+    return reader_.Value(spk);
+  } else {
+    return reader_.Value(utt);
+  }
+}
+
+
+
+/// @}
+
+} // end namespace kaldi
+
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/util/kaldi-table.h b/kaldi_io/src/kaldi/util/kaldi-table.h
new file mode 100644
index 0000000..6f6cb98
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/kaldi-table.h
@@ -0,0 +1,459 @@
+// util/kaldi-table.h
+
+// Copyright 2009-2011    Microsoft Corporation
+//                2013    Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_UTIL_KALDI_TABLE_H_
+#define KALDI_UTIL_KALDI_TABLE_H_
+
+#include <string>
+#include <vector>
+#include <utility>
+
+#include "base/kaldi-common.h"
+#include "util/kaldi-holder.h"
+
+namespace kaldi {
+
+// Forward declarations
+template<class Holder> class RandomAccessTableReaderImplBase;
+template<class Holder>  class SequentialTableReaderImplBase;
+template<class Holder>  class TableWriterImplBase;
+
+/// \addtogroup table_group
+/// @{
+
+// This header defines the Table classes (RandomAccessTableReader,
+// SequentialTableReader and TableWriter) and explains what the Holder classes,
+// which the Table class requires as a template argument, are like.  It also
+// explains the "rspecifier" and "wspecifier" concepts (these are strings that
+// explain how to read/write objects via archives or scp files.  A table is
+// conceptually a collection of objects of a particular type T indexed by keys
+// of type std::string (these Keys additionally have an order within each table).
+// The Table classes are templated on a type (call it Holder) such that Holder::T
+// is a typedef equal to T.
+
+// see kaldi-holder.h for detail on the Holder classes.
+
+typedef std::vector<std::string> KeyList;
+
+// Documentation for "wspecifier"
+// "wspecifier" describes how we write a set of objects indexed by keys.
+// The basic, unadorned wspecifiers are as follows:
+//
+//  ark:wxfilename
+//  scp:rxfilename
+//  ark,scp:filename,wxfilename
+//  ark,scp:filename,wxfilename
+//
+//
+//  We also allow the following modifiers:
+//  t means text mode.
+//  b means binary mode.
+//  f means flush the stream after writing each entry.
+//   (nf means don't flush, and isn't very useful as the default is to flush).
+//  p means permissive mode, when writing to an "scp" file only: will ignore
+//     missing scp entries, i.e. won't write anything for those files but will
+//     return success status).
+//
+//  So the following are valid wspecifiers:
+//  ark,b,f:foo
+//  "ark,b,b:| gzip -c > foo"
+//  "ark,scp,t,nf:foo.ark,|gzip -c > foo.scp.gz"
+//  ark,b:-
+//
+//  The meanings of rxfilename and wxfilename are as described in
+//  kaldi-stream.h (they are filenames but include pipes, stdin/stdout
+//  and so on; filename is a regular filename.
+//
+
+//  The ark:wxfilename type of wspecifier instructs the class to
+//  write directly to an archive.  For small objects (e.g. lists of ints),
+//  the text archive format will generally be human readable with one line
+//  per entry in the archive.
+//
+//  The type "scp:xfilename" refers to an scp file which should
+//  already exist on disk, and tells us where to write the data for
+//  each key (usually an actual file); each line of the scp file
+//  would be:
+//   key xfilename
+//
+//  The type ark,scp:filename,wxfilename means
+//  we write both an archive and an scp file that specifies offsets into the
+//  archive, with lines like:
+//    key filename:12407
+//  where the number is the byte offset into the file.
+//  In this case we restrict the archive-filename to be an actual filename,
+//  as we can't see a situtation where an extended filename would make sense
+//  for this (we can't fseek() in pipes).
+
+enum WspecifierType  {
+  kNoWspecifier,
+  kArchiveWspecifier,
+  kScriptWspecifier,
+  kBothWspecifier
+};
+
+struct WspecifierOptions {
+  bool binary;
+  bool flush;
+  bool permissive; // will ignore absent scp entries.
+  WspecifierOptions(): binary(true), flush(false), permissive(false) { }
+};
+
+// ClassifyWspecifier returns the type of the wspecifier string,
+// and (if pointers are non-NULL) outputs the extra information
+// about the options, and the script and archive
+// filenames.
+WspecifierType ClassifyWspecifier(const std::string &wspecifier,
+                                  std::string *archive_wxfilename,
+                                  std::string *script_wxfilename,
+                                  WspecifierOptions *opts);
+
+// ReadScriptFile reads an .scp file in its entirety, and appends it
+// (in order as it was in the scp file) in script_out_, which contains
+// pairs of (key, xfilename).  The .scp
+// file format is: on each line, key xfilename
+// where xfilename means rxfilename or wxfilename, and may contain internal spaces
+// (we trim away any leading or trailing space).  The key is space-free.
+// ReadScriptFile returns true if the format was valid (empty files
+// are valid).
+// If 'print_warnings', it will print out warning messages that explain what kind
+// of error there was.
+bool ReadScriptFile(const std::string &rxfilename,
+                    bool print_warnings,
+                    std::vector<std::pair<std::string, std::string> > *script_out);
+
+// This version of ReadScriptFile works from an istream.
+bool ReadScriptFile(std::istream &is,
+                    bool print_warnings,
+                    std::vector<std::pair<std::string, std::string> > *script_out);
+
+// Writes, for each entry in script, the first element, then ' ', then the second
+// element then '\n'.  Checks that the keys (first elements of pairs) are valid
+// tokens (nonempty, no whitespace), and the values (second elements of pairs)
+// are newline-free and contain no leading or trailing space.  Returns true on
+// success.
+bool WriteScriptFile(const std::string &wxfilename,
+                     const std::vector<std::pair<std::string, std::string> > &script);
+
+// This version writes to an ostream.
+bool WriteScriptFile(std::ostream &os,
+                     const std::vector<std::pair<std::string, std::string> > &script);
+
+// Documentation for "rspecifier"
+// "rspecifier" describes how we read a set of objects indexed by keys.
+// The possibilities are:
+//
+// ark:rxfilename
+// scp:rxfilename
+//
+// We also allow various modifiers:
+//   o   means the program will only ask for each key once, which enables
+//       the reader to discard already-asked-for values.
+//   s   means the keys are sorted on input (means we don't have to read till
+//       eof if someone asked for a key that wasn't there).
+//   cs  means that it is called in sorted order (we are generally asserting this
+//       based on knowledge of how the program works).
+//   p   means "permissive", and causes it to skip over keys whose corresponding
+//       scp-file entries cannot be read. [and to ignore errors in archives and
+//       script files, and just consider the "good" entries].
+//       We allow the negation of the options above, as in no, ns, np,
+//       but these aren't currently very useful (just equivalent to omitting the
+//       corresponding option).
+//      [any of the above options can be prefixed by n to negate them, e.g. no, ns,
+//       ncs, np; but these aren't currently useful as you could just omit the option].
+//
+//   b   is ignored [for scripting convenience]
+//   t   is ignored [for scripting convenience]
+//
+//
+//  So for instance the following would be a valid rspecifier:
+//
+//   "o, s, p, ark:gunzip -c foo.gz|"
+
+struct  RspecifierOptions {
+  // These options only make a difference for the RandomAccessTableReader class.
+  bool once;   // we assert that the program will only ask for each key once.
+  bool sorted;  // we assert that the keys are sorted.
+  bool called_sorted;  // we assert that the (HasKey(), Value() functions will
+  // also be called in sorted order.  [this implies "once" but not vice versa].
+  bool permissive;  // If "permissive", when reading from scp files it treats
+  // scp files that can't be read as if the corresponding key were not there.
+  // For archive files it will suppress errors getting thrown if the archive
+  
+  // is corrupted and can't be read to the end.
+
+  RspecifierOptions(): once(false), sorted(false),
+                       called_sorted(false), permissive(false) { }
+};
+
+enum RspecifierType  {
+  kNoRspecifier,
+  kArchiveRspecifier,
+  kScriptRspecifier
+};
+
+RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename,
+                                  RspecifierOptions *opts);
+
+// Class Table<Holder> is useful when you want the entire set of
+// objects in memory.  NOT IMPLEMENTED YET.
+// It is the least scalable way of accessing data in Tables.
+// The *TableReader and TableWriter classes are more scalable.
+
+
+/// Allows random access to a collection
+/// of objects in an archive or script file; see \ref io_sec_tables.
+template<class Holder>
+class RandomAccessTableReader {
+ public:
+  typedef typename Holder::T T;
+
+  RandomAccessTableReader(): impl_(NULL) { }
+
+  // This constructor equivalent to default constructor + "open", but
+  // throws on error.
+  RandomAccessTableReader(const std::string &rspecifier);
+
+  // Opens the table.
+  bool Open(const std::string &rspecifier);
+
+  // Returns true if table is open.
+  bool IsOpen() const { return (impl_ != NULL); }
+
+  // Close() will close the table [throws if it was not open],
+  // and returns true on success (false if we were reading an
+  // archive and we discovered an error in the archive).
+  bool Close();
+
+  // Says if it has this key.
+  // If you are using the "permissive" (p) read option,
+  // it will return false for keys whose corresponding entry
+  // in the scp file cannot be read.
+
+  bool HasKey(const std::string &key);
+
+  // Value() may throw if you are reading an scp file, you
+  // do not have the "permissive" (p) option, and an entry
+  // in the scp file cannot be read.  Typically you won't
+  // want to catch this error.
+  const T &Value(const std::string &key);
+
+  ~RandomAccessTableReader();
+
+  // Allow copy-constructor only for non-opened readers (needed for inclusion in
+  // stl vector)
+  RandomAccessTableReader(const RandomAccessTableReader<Holder> &other):
+      impl_(NULL) { KALDI_ASSERT(other.impl_ == NULL); }
+ private:
+  // Disallow assignment.
+  RandomAccessTableReader &operator=(const RandomAccessTableReader<Holder>&);
+  void CheckImpl() const; // Checks that impl_ is non-NULL; prints an error
+                          // message and dies (with KALDI_ERR) if NULL.
+  RandomAccessTableReaderImplBase<Holder> *impl_;
+};
+
+
+
+/// A templated class for reading objects sequentially from an archive or script
+/// file; see \ref io_sec_tables.
+template<class Holder>
+class SequentialTableReader {
+ public:
+  typedef typename Holder::T T;
+
+  SequentialTableReader(): impl_(NULL) { }
+
+  // This constructor equivalent to default constructor + "open", but
+  // throws on error.
+  SequentialTableReader(const std::string &rspecifier);
+
+  // Opens the table.  Returns exit status; but does throw if previously
+  // open stream was in error state.  Call Close to stop this [anyway,
+  // calling Open more than once is not recommended.]
+  bool Open(const std::string &rspecifier);
+
+  // Returns true if we're done.  It will also return true if there's some kind
+  // of error and we can't read any more; in this case, you can detect the
+  // error by calling Close and checking the return status; otherwise
+  // the destructor will throw.
+  inline bool Done();
+
+  // Only valid to call Key() if Done() returned false.
+  inline std::string Key();
+
+  // FreeCurrent() is provided as an optimization to save memory, for large
+  // objects.  It instructs the class to deallocate the current value. The
+  // reference Value() will/ be invalidated by this.
+
+  void FreeCurrent();
+
+  // Return reference to the current value.
+  // The reference is valid till next call to this object.
+  // If will throw if you are reading an scp file, did not
+  // specify the "permissive" (p) option and the file cannot
+  // be read.  [The permissive option makes it behave as if that
+  // key does not even exist, if the corresponding file cannot be
+  // read.]  You probably wouldn't want to catch this exception;
+  // the user can just specify the p option in the rspecifier.
+  const T &Value();
+
+  // Next goes to the next key.  It will not throw; any error will
+  // result in Done() returning true, and then the destructor will
+  // throw unless you call Close().
+  void Next();
+
+  // Returns true if table is open for reading (does not imply
+  // stream is in good state).
+  bool IsOpen() const;
+
+  // Close() will return false (failure) if Done() became true
+  // because of an error/ condition rather than because we are
+  // really done [e.g. because of an error or early termination
+  // in the archive].
+  // If there is an error and you don't call Close(), the destructor
+  // will fail.
+  // Close()
+  bool Close();
+
+  // The destructor may throw.  This is the desired behaviour, as it's the way we
+  // signal the error to the user (to detect it, call Close().  The issue is that
+  // otherwise the user has no way to tell whether Done() returned true because
+  // we reached the end of the archive or script, or because there was an error
+  // that prevented further reading.
+  ~SequentialTableReader();
+
+  // Allow copy-constructor only for non-opened readers (needed for inclusion in
+  // stl vector)
+  SequentialTableReader(const SequentialTableReader<Holder> &other):
+      impl_(NULL) { KALDI_ASSERT(other.impl_ == NULL); }
+ private:
+  // Disallow assignment.
+  SequentialTableReader &operator = (const SequentialTableReader<Holder>&); 
+  void CheckImpl() const; // Checks that impl_ is non-NULL; prints an error
+                          // message and dies (with KALDI_ERR) if NULL.
+  SequentialTableReaderImplBase<Holder> *impl_;
+};
+
+
+/// A templated class for writing objects to an
+/// archive or script file; see \ref io_sec_tables.
+template<class Holder>
+class TableWriter {
+ public:
+  typedef typename Holder::T T;
+
+  TableWriter(): impl_(NULL) { }
+
+  // This constructor equivalent to default constructor
+  // + "open", but throws on error.  See docs for
+  // wspecifier above.
+  TableWriter(const std::string &wspecifier);
+
+  // Opens the table.  See docs for wspecifier above.
+  // If it returns true, it is open.
+  bool Open(const std::string &wspecifier);
+
+  // Returns true if open for writing.
+  bool IsOpen() const;
+
+  // Write the object.  Throws  std::runtime_error on error (via the
+  // KALDI_ERR macro)
+  inline void Write(const std::string &key, const T &value) const;
+
+
+  // Flush will flush any archive; it does not return error status
+  // or throw, any errors will be reported on the next Write or Close.
+  // Useful if we may be writing to a command in a pipe and want
+  // to ensure good CPU utilization.
+  void Flush();
+
+  // Close() is not necessary to call, as the destructor
+  // closes it; it's mainly useful if you want to handle
+  // error states because the destructor will throw on
+  // error if you do not call Close().
+  bool Close();
+
+  ~TableWriter();
+  
+  // Allow copy-constructor only for non-opened writers (needed for inclusion in
+  // stl vector)
+  TableWriter(const TableWriter &other): impl_(NULL) {
+    KALDI_ASSERT(other.impl_ == NULL);
+  }
+ private:
+  TableWriter &operator = (const TableWriter&); // Disallow assignment.
+  void CheckImpl() const; // Checks that impl_ is non-NULL; prints an error
+                          // message and dies (with KALDI_ERR) if NULL.
+  TableWriterImplBase<Holder> *impl_;
+};
+
+
+/// This class is for when you are reading something in random access, but
+/// it may actually be stored per-speaker (or something similar) but the 
+/// keys you're using are per utterance.  So you also provide an "rxfilename"
+/// for a file containing lines like
+/// utt1 spk1
+/// utt2 spk1
+/// utt3 spk1
+/// and so on.  Note: this is optional; if it is an empty string, we just won't
+/// do the mapping.  Also, "table_rxfilename" may be the empty string (as for
+/// a regular table), in which case the table just won't be opened.
+/// We provide only the most frequently used of the functions of RandomAccessTableReader.
+
+template<class Holder>
+class RandomAccessTableReaderMapped {
+ public:
+  typedef typename Holder::T T;
+  /// Note: "utt2spk_rxfilename" will in the normal case be an rxfilename
+  /// for an utterance to speaker map, but this code is general; it accepts
+  /// a generic map.
+  RandomAccessTableReaderMapped(const std::string &table_rxfilename,
+                                const std::string &utt2spk_rxfilename);
+
+  RandomAccessTableReaderMapped() {};
+
+  /// Note: when calling Open, utt2spk_rxfilename may be empty.
+  bool Open(const std::string &table_rxfilename,
+            const std::string &utt2spk_rxfilename);
+
+  bool HasKey(const std::string &key);
+  const T &Value(const std::string &key);
+  inline bool IsOpen() const { return reader_.IsOpen(); }
+  inline bool Close() { return reader_.Close(); }
+  
+
+
+  // The default copy-constructor will do what we want: it will crash
+  // for already-opened readers, by calling the member-variable copy-constructors.
+ private:
+  // Disallow assignment.
+  RandomAccessTableReaderMapped &operator=(const RandomAccessTableReaderMapped<Holder>&);
+  RandomAccessTableReader<Holder> reader_;
+  RandomAccessTableReader<TokenHolder> token_reader_;
+  std::string utt2spk_rxfilename_; // Used only in diagnostic messages.
+};
+
+
+/// @} end "addtogroup table_group"
+} // end namespace kaldi
+
+#include "kaldi-table-inl.h"
+
+#endif  // KALDI_UTIL_KALDI_TABLE_H_
diff --git a/kaldi_io/src/kaldi/util/parse-options.h b/kaldi_io/src/kaldi/util/parse-options.h
new file mode 100644
index 0000000..f563b54
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/parse-options.h
@@ -0,0 +1,264 @@
+// util/parse-options.h
+
+// Copyright 2009-2011  Karel Vesely;  Microsoft Corporation;
+//                      Saarland University (Author: Arnab Ghoshal);
+// Copyright 2012-2013  Frantisek Skala;  Arnab Ghoshal
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_UTIL_PARSE_OPTIONS_H_
+#define KALDI_UTIL_PARSE_OPTIONS_H_
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "base/kaldi-common.h"
+#include "itf/options-itf.h"
+
+namespace kaldi {
+
+/// The class ParseOptions is for parsing command-line options; see
+/// \ref parse_options for more documentation.
+class ParseOptions : public OptionsItf {
+ public:
+  explicit ParseOptions(const char *usage) :
+    print_args_(true), help_(false), usage_(usage), argc_(0), argv_(NULL),
+    prefix_(""), other_parser_(NULL) {
+#ifndef _MSC_VER  // This is just a convenient place to set the stderr to line
+    setlinebuf(stderr);  // buffering mode, since it's called at program start.
+#endif  // This helps ensure different programs' output is not mixed up.
+    RegisterStandard("config", &config_, "Configuration file to read (this "
+                     "option may be repeated)");
+    RegisterStandard("print-args", &print_args_,
+                     "Print the command line arguments (to stderr)");
+    RegisterStandard("help", &help_, "Print out usage message");
+    RegisterStandard("verbose", &g_kaldi_verbose_level,
+                     "Verbose level (higher->more logging)");
+  }
+
+  /**
+    This is a constructor for the special case where some options are
+    registered with a prefix to avoid conflicts.  The object thus created will
+    only be used temporarily to register an options class with the original
+    options parser (which is passed as the *other pointer) using the given
+    prefix.  It should not be used for any other purpose, and the prefix must
+    not be the empty string.  It seems to be the least bad way of implementing
+    options with prefixes at this point.
+    Example of usage is:
+     ParseOptions po;  // original ParseOptions object
+     ParseOptions po_mfcc("mfcc", &po); // object with prefix.
+     MfccOptions mfcc_opts;
+     mfcc_opts.Register(&po_mfcc);
+    The options will now get registered as, e.g., --mfcc.frame-shift=10.0
+    instead of just --frame-shift=10.0
+   */
+  ParseOptions(const std::string &prefix, OptionsItf *other);
+
+  ~ParseOptions() {}
+
+  // Methods from the interface
+  void Register(const std::string &name,
+                bool *ptr, const std::string &doc); 
+  void Register(const std::string &name,
+                int32 *ptr, const std::string &doc); 
+  void Register(const std::string &name,
+                uint32 *ptr, const std::string &doc); 
+  void Register(const std::string &name,
+                float *ptr, const std::string &doc); 
+  void Register(const std::string &name,
+                double *ptr, const std::string &doc); 
+  void Register(const std::string &name,
+                std::string *ptr, const std::string &doc);
+
+  /// If called after registering an option and before calling
+  /// Read(), disables that option from being used.  Will crash
+  /// at runtime if that option had not been registered.
+  void DisableOption(const std::string &name);
+
+  /// This one is used for registering standard parameters of all the programs
+  template<typename T>
+  void RegisterStandard(const std::string &name,
+                        T *ptr, const std::string &doc);
+
+  /**
+    Parses the command line options and fills the ParseOptions-registered
+    variables. This must be called after all the variables were registered!!!
+   
+    Initially the variables have implicit values,
+    then the config file values are set-up,
+    finally the command line vaues given.
+    Returns the first position in argv that was not used.
+    [typically not useful: use NumParams() and GetParam(). ]
+   */
+  int Read(int argc, const char *const *argv);
+
+  /// Prints the usage documentation [provided in the constructor].
+  void PrintUsage(bool print_command_line = false);
+  /// Prints the actual configuration of all the registered variables
+  void PrintConfig(std::ostream &os);
+
+  /// Reads the options values from a config file.  Must be called after
+  /// registering all options.  This is usually used internally after the
+  /// standard --config option is used, but it may also be called from a
+  /// program.
+  void ReadConfigFile(const std::string &filename);
+
+  /// Number of positional parameters (c.f. argc-1).
+  int NumArgs() const;
+
+  /// Returns one of the positional parameters; 1-based indexing for argc/argv
+  /// compatibility. Will crash if param is not >=1 and <=NumArgs().
+  std::string GetArg(int param) const;
+
+  std::string GetOptArg(int param) const {
+    return (param <= NumArgs() ? GetArg(param) : "");
+  }
+
+  /// The following function will return a possibly quoted and escaped
+  /// version of "str", according to the current shell.  Currently
+  /// this is just hardwired to bash.  It's useful for debug output.
+  static std::string Escape(const std::string &str);
+
+ private:
+  /// Template to register various variable types,
+  /// used for program-specific parameters
+  template<typename T>
+  void RegisterTmpl(const std::string &name, T *ptr, const std::string &doc);
+
+  // Following functions do just the datatype-specific part of the job
+  /// Register boolean variable
+  void RegisterSpecific(const std::string &name, const std::string &idx,
+                        bool *b, const std::string &doc, bool is_standard);
+  /// Register int32 variable
+  void RegisterSpecific(const std::string &name, const std::string &idx,
+                        int32 *i, const std::string &doc, bool is_standard);
+  /// Register unsinged  int32 variable
+  void RegisterSpecific(const std::string &name, const std::string &idx,
+                        uint32 *u,
+                        const std::string &doc, bool is_standard);
+  /// Register float variable
+  void RegisterSpecific(const std::string &name, const std::string &idx,
+                        float *f, const std::string &doc, bool is_standard);
+  /// Register double variable [useful as we change BaseFloat type].
+  void RegisterSpecific(const std::string &name, const std::string &idx,
+                        double *f, const std::string &doc, bool is_standard);
+  /// Register string variable
+  void RegisterSpecific(const std::string &name, const std::string &idx,
+                        std::string *s, const std::string &doc,
+                        bool is_standard);
+
+  /// Does the actual job for both kinds of parameters
+  /// Does the common part of the job for all datatypes,
+  /// then calls RegisterSpecific
+  template<typename T>
+  void RegisterCommon(const std::string &name,
+                      T *ptr, const std::string &doc, bool is_standard);
+
+  /// SplitLongArg parses an argument of the form --a=b, --a=, or --a,
+  /// and sets "has_equal_sign" to true if an equals-sign was parsed..
+  /// this is needed in order to correctly allow --x for a boolean option
+  /// x, and --y= for a string option y, and to disallow --x= and --y.
+  void SplitLongArg(std::string in, std::string *key, std::string *value,
+                    bool *has_equal_sign);
+  
+  void NormalizeArgName(std::string *str);
+
+  /// Set option with name "key" to "value"; will crash if can't do it.
+  /// "has_equal_sign" is used to allow --x for a boolean option x,
+  /// and --y=, for a string option y.
+  bool SetOption(const std::string &key, const std::string &value,
+                 bool has_equal_sign);
+
+  bool ToBool(std::string str);
+  int32 ToInt(std::string str);
+  uint32 ToUInt(std::string str);
+  float ToFloat(std::string str);
+  double ToDouble(std::string str);
+
+  // maps for option variables
+  std::map<std::string, bool*> bool_map_;
+  std::map<std::string, int32*> int_map_;
+  std::map<std::string, uint32*> uint_map_;
+  std::map<std::string, float*> float_map_;
+  std::map<std::string, double*> double_map_;
+  std::map<std::string, std::string*> string_map_;
+
+  /**
+     Structure for options' documentation
+   */
+  struct DocInfo {
+    DocInfo() {}
+    DocInfo(const std::string &name, const std::string &usemsg)
+      : name_(name), use_msg_(usemsg), is_standard_(false) {}
+    DocInfo(const std::string &name, const std::string &usemsg,
+            bool is_standard)
+      : name_(name), use_msg_(usemsg),  is_standard_(is_standard) {}
+
+    std::string name_;
+    std::string use_msg_;
+    bool is_standard_;
+  };
+  typedef std::map<std::string, DocInfo> DocMapType;
+  DocMapType doc_map_;  ///< map for the documentation
+
+  bool print_args_;     ///< variable for the implicit --print-args parameter
+  bool help_;           ///< variable for the implicit --help parameter
+  std::string config_;  ///< variable for the implicit --config parameter
+  std::vector<std::string> positional_args_;
+  const char *usage_;
+  int argc_;
+  const char *const *argv_;
+
+  /// These members are not normally used. They are only used when the object
+  /// is constructed with a prefix
+  std::string prefix_;
+  OptionsItf *other_parser_;
+};
+
+/// This template is provided for convenience in reading config classes from
+/// files; this is not the standard way to read configuration options, but may
+/// occasionally be needed.  This function assumes the config has a function
+/// "void Register(OptionsItf *po)" which it can call to register the
+/// ParseOptions object.
+template<class C> void ReadConfigFromFile(const std::string config_filename,
+                                          C *c) {
+  std::ostringstream usage_str;
+  usage_str << "Parsing config from "
+            << "from '" << config_filename << "'";
+  ParseOptions po(usage_str.str().c_str());
+  c->Register(&po);
+  po.ReadConfigFile(config_filename);
+}
+
+/// This variant of the template ReadConfigFromFile is for if you need to read
+/// two config classes from the same file.
+template<class C1, class C2> void ReadConfigsFromFile(const std::string config_filename,
+                                                      C1 *c1, C2 *c2) {
+  std::ostringstream usage_str;
+  usage_str << "Parsing config from "
+            << "from '" << config_filename << "'";
+  ParseOptions po(usage_str.str().c_str());
+  c1->Register(&po);
+  c2->Register(&po);
+  po.ReadConfigFile(config_filename);
+}
+
+
+
+}  // namespace kaldi
+
+#endif  // KALDI_UTIL_PARSE_OPTIONS_H_
diff --git a/kaldi_io/src/kaldi/util/simple-io-funcs.h b/kaldi_io/src/kaldi/util/simple-io-funcs.h
new file mode 100644
index 0000000..56573e4
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/simple-io-funcs.h
@@ -0,0 +1,56 @@
+// util/simple-io-funcs.h
+
+// Copyright 2009-2011  Microsoft Corporation;  Jan Silovsky
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+#ifndef KALDI_UTIL_SIMPLE_IO_FUNCS_H_
+#define KALDI_UTIL_SIMPLE_IO_FUNCS_H_
+
+#include "kaldi-io.h"
+
+// This header contains some utilities for reading some common, simple text formats:
+// integers in files, one per line, and integers in files, possibly multiple per line.
+// these are not really fully native Kaldi formats; they are mostly for small files that
+// might be generated by scripts, and can be read all at one time.
+// for longer files of this type, we would probably use the Table code.
+
+namespace kaldi {
+
+/// WriteToList attempts to write this list of integers, one per line,
+/// to the given file, in text format.
+/// returns true if succeeded.
+bool WriteIntegerVectorSimple(std::string wxfilename, const std::vector<int32> &v);
+
+/// ReadFromList attempts to read this list of integers, one per line,
+/// from the given file, in text format.
+/// returns true if succeeded.
+bool ReadIntegerVectorSimple(std::string rxfilename, std::vector<int32> *v);
+
+// This is a file format like:
+// 1 2
+// 3
+//
+// 4 5 6
+// etc.
+bool WriteIntegerVectorVectorSimple(std::string wxfilename, const std::vector<std::vector<int32> > &v);
+
+bool ReadIntegerVectorVectorSimple(std::string rxfilename, std::vector<std::vector<int32> > *v);
+
+
+}  // end namespace kaldi.
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/util/simple-options.h b/kaldi_io/src/kaldi/util/simple-options.h
new file mode 100644
index 0000000..58816af
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/simple-options.h
@@ -0,0 +1,112 @@
+// util/simple-options.hh
+
+// Copyright 2013  Tanel Alumae, Tallinn University of Technology
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_UTIL_SIMPLE_OPTIONS_H_
+#define KALDI_UTIL_SIMPLE_OPTIONS_H_
+
+#include <map>
+#include <string>
+#include <vector>
+
+#include "base/kaldi-common.h"
+#include "itf/options-itf.h"
+
+namespace kaldi {
+
+
+/// The class SimpleOptions is an implementation of OptionsItf that allows
+/// setting and getting option values programmatically, i.e., via getter
+/// and setter methods. It doesn't provide any command line parsing functionality.
+/// The class ParseOptions should be used for command-line options.
+class SimpleOptions : public OptionsItf {
+ public:
+  SimpleOptions() {
+  }
+
+  virtual ~SimpleOptions() {
+  }
+
+  // Methods from the interface
+  void Register(const std::string &name, bool *ptr, const std::string &doc);
+  void Register(const std::string &name, int32 *ptr, const std::string &doc);
+  void Register(const std::string &name, uint32 *ptr, const std::string &doc);
+  void Register(const std::string &name, float *ptr, const std::string &doc);
+  void Register(const std::string &name, double *ptr, const std::string &doc);
+  void Register(const std::string &name, std::string *ptr,
+                const std::string &doc);
+
+  // set option with the specified key, return true if successful
+  bool SetOption(const std::string &key, const bool &value);
+  bool SetOption(const std::string &key, const int32 &value);
+  bool SetOption(const std::string &key, const uint32 &value);
+  bool SetOption(const std::string &key, const float &value);
+  bool SetOption(const std::string &key, const double &value);
+  bool SetOption(const std::string &key, const std::string &value);
+  bool SetOption(const std::string &key, const char* value);
+
+  // get option with the specified key and put to 'value',
+  // return true if successful
+  bool GetOption(const std::string &key, bool *value);
+  bool GetOption(const std::string &key, int32 *value);
+  bool GetOption(const std::string &key, uint32 *value);
+  bool GetOption(const std::string &key, float *value);
+  bool GetOption(const std::string &key, double *value);
+  bool GetOption(const std::string &key, std::string *value);
+
+  enum OptionType {
+    kBool,
+    kInt32,
+    kUint32,
+    kFloat,
+    kDouble,
+    kString
+  };
+
+  struct OptionInfo {
+    OptionInfo(const std::string &doc, OptionType type) :
+      doc(doc), type(type) {
+    }
+    std::string doc;
+    OptionType type;
+  };
+
+  std::vector<std::pair<std::string, OptionInfo> > GetOptionInfoList();
+
+  /*
+   * Puts the type of the option with name 'key' in the argument 'type'.
+   * Return true if such option is found, false otherwise.
+   */
+  bool GetOptionType(const std::string &key, OptionType *type);
+
+ private:
+
+  std::vector<std::pair<std::string, OptionInfo> > option_info_list_;
+
+  // maps for option variables
+  std::map<std::string, bool*> bool_map_;
+  std::map<std::string, int32*> int_map_;
+  std::map<std::string, uint32*> uint_map_;
+  std::map<std::string, float*> float_map_;
+  std::map<std::string, double*> double_map_;
+  std::map<std::string, std::string*> string_map_;
+};
+
+}  // namespace kaldi
+
+#endif  // KALDI_UTIL_SIMPLE_OPTIONS_H_
diff --git a/kaldi_io/src/kaldi/util/stl-utils.h b/kaldi_io/src/kaldi/util/stl-utils.h
new file mode 100644
index 0000000..12526ff
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/stl-utils.h
@@ -0,0 +1,327 @@
+// util/stl-utils.h
+
+// Copyright 2009-2011  Microsoft Corporation;  Saarland University
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_UTIL_STL_UTILS_H_
+#define KALDI_UTIL_STL_UTILS_H_
+
+#include <algorithm>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+#include "base/kaldi-common.h"
+
+#ifdef _MSC_VER
+#include <unordered_map>
+#include <unordered_set>
+using std::unordered_map;
+using std::unordered_set;
+#elif __cplusplus > 199711L || defined(__GXX_EXPERIMENTAL_CXX0X__)
+#include <unordered_map>
+#include <unordered_set>
+using std::unordered_map;
+using std::unordered_set;
+#else
+#include <tr1/unordered_map>
+#include <tr1/unordered_set>
+using std::tr1::unordered_map;
+using std::tr1::unordered_set;
+#endif
+
+
+namespace kaldi {
+
+/// Sorts and uniq's (removes duplicates) from a vector.
+template<typename T>
+inline void SortAndUniq(std::vector<T> *vec) {
+  std::sort(vec->begin(), vec->end());
+  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
+}
+
+
+/// Returns true if the vector is sorted.
+template<typename T>
+inline bool IsSorted(const std::vector<T> &vec) {
+  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
+  if (iter == end) return true;
+  while (1) {
+    typename std::vector<T>::const_iterator next_iter = iter;
+    ++next_iter;
+    if (next_iter == end) return true;  // end of loop and nothing out of order
+    if (*next_iter < *iter) return false;
+    iter = next_iter;
+  }
+}
+
+
+/// Returns true if the vector is sorted and contains each element
+/// only once.
+template<typename T>
+inline bool IsSortedAndUniq(const std::vector<T> &vec) {
+  typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
+  if (iter == end) return true;
+  while (1) {
+    typename std::vector<T>::const_iterator next_iter = iter;
+    ++next_iter;
+    if (next_iter == end) return true;  // end of loop and nothing out of order
+    if (*next_iter <= *iter) return false;
+    iter = next_iter;
+  }
+}
+
+
+/// Removes duplicate elements from a sorted list.
+template<typename T>
+inline void Uniq(std::vector<T> *vec) {  // must be already sorted.
+  KALDI_PARANOID_ASSERT(IsSorted(*vec));
+  KALDI_ASSERT(vec);
+  vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
+}
+
+/// Copies the elements of a set to a vector.
+template<class T>
+void CopySetToVector(const std::set<T> &s, std::vector<T> *v) {
+  // adds members of s to v, in sorted order from lowest to highest
+  // (because the set was in sorted order).
+  KALDI_ASSERT(v != NULL);
+  v->resize(s.size());
+  typename std::set<T>::const_iterator siter = s.begin(), send = s.end();
+  typename std::vector<T>::iterator viter = v->begin();
+  for (; siter != send; ++siter, ++viter) {
+    *viter = *siter;
+  }
+}
+
+template<class T>
+void CopySetToVector(const unordered_set<T> &s, std::vector<T> *v) {
+  // adds members of s to v, in sorted order from lowest to highest
+  // (because the set was in sorted order).
+  KALDI_ASSERT(v != NULL);
+  v->resize(s.size());
+  typename unordered_set<T>::const_iterator siter = s.begin(), send = s.end();
+  typename std::vector<T>::iterator viter = v->begin();
+  for (; siter != send; ++siter, ++viter) {
+    *viter = *siter;
+  }
+}
+
+
+/// Copies the (key, value) pairs in a map to a vector of pairs.
+template<class A, class B>
+void CopyMapToVector(const std::map<A, B> &m,
+                     std::vector<std::pair<A, B> > *v) {
+  KALDI_ASSERT(v != NULL);
+  v->resize(m.size());
+  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
+  typename std::vector<std::pair<A, B> >::iterator viter = v->begin();
+  for (; miter != mend; ++miter, ++viter) {
+    *viter = std::make_pair(miter->first, miter->second);
+    // do it like this because of const casting.
+  }
+}
+
+/// Copies the keys in a map to a vector.
+template<class A, class B>
+void CopyMapKeysToVector(const std::map<A, B> &m, std::vector<A> *v) {
+  KALDI_ASSERT(v != NULL);
+  v->resize(m.size());
+  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
+  typename std::vector<A>::iterator viter = v->begin();
+  for (; miter != mend; ++miter, ++viter) {
+    *viter = miter->first;
+  }
+}
+
+/// Copies the values in a map to a vector.
+template<class A, class B>
+void CopyMapValuesToVector(const std::map<A, B> &m, std::vector<B> *v) {
+  KALDI_ASSERT(v != NULL);
+  v->resize(m.size());
+  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
+  typename std::vector<B>::iterator viter = v->begin();
+  for (; miter != mend; ++miter, ++viter) {
+    *viter = miter->second;
+  }
+}
+
+/// Copies the keys in a map to a set.
+template<class A, class B>
+void CopyMapKeysToSet(const std::map<A, B> &m, std::set<A> *s) {
+  KALDI_ASSERT(s != NULL);
+  s->clear();
+  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
+  for (; miter != mend; ++miter) {
+    s->insert(s->end(), miter->first);
+  }
+}
+
+/// Copies the values in a map to a set.
+template<class A, class B>
+void CopyMapValuesToSet(const std::map<A, B> &m, std::set<B> *s) {
+  KALDI_ASSERT(s != NULL);
+  s->clear();
+  typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
+  for (; miter != mend; ++miter)
+    s->insert(s->end(), miter->second);
+}
+
+
+/// Copies the contents of a vector to a set.
+template<class A>
+void CopyVectorToSet(const std::vector<A> &v, std::set<A> *s) {
+  KALDI_ASSERT(s != NULL);
+  s->clear();
+  typename std::vector<A>::const_iterator iter = v.begin(), end = v.end();
+  for (; iter != end; ++iter)
+    s->insert(s->end(), *iter);
+  // s->end() is a hint in case v was sorted.  will work regardless.
+}
+
+/// Deletes any non-NULL pointers in the vector v, and sets
+/// the corresponding entries of v to NULL
+template<class A>
+void DeletePointers(std::vector<A*> *v) {
+  KALDI_ASSERT(v != NULL);
+  typename std::vector<A*>::iterator iter = v->begin(), end = v->end();
+  for (; iter != end; ++iter) {
+    if (*iter != NULL) {
+      delete *iter;
+      *iter = NULL;  // set to NULL for extra safety.
+    }
+  }
+}
+
+/// Returns true if the vector of pointers contains NULL pointers.
+template<class A>
+bool ContainsNullPointers(const std::vector<A*> &v) {
+  typename std::vector<A*>::const_iterator iter = v.begin(), end = v.end();
+  for (; iter != end; ++iter)
+    if (*iter == static_cast<A*> (NULL)) return true;
+  return false;
+}
+
+/// Copies the contents a vector of one type to a vector
+/// of another type.
+template<typename A, typename B>
+void CopyVectorToVector(const std::vector<A> &vec_in, std::vector<B> *vec_out) {
+  KALDI_ASSERT(vec_out != NULL);
+  vec_out->resize(vec_in.size());
+  for (size_t i = 0; i < vec_in.size(); i++)
+    (*vec_out)[i] = static_cast<B> (vec_in[i]);
+}
+
+/// A hashing function-object for vectors.
+template<typename Int>
+struct VectorHasher {  // hashing function for vector<Int>.
+  size_t operator()(const std::vector<Int> &x) const {
+    size_t ans = 0;
+    typename std::vector<Int>::const_iterator iter = x.begin(), end = x.end();
+    for (; iter != end; ++iter) {
+      ans *= kPrime;
+      ans += *iter;
+    }
+    return ans;
+  }
+  VectorHasher() {  // Check we're instantiated with an integer type.
+    KALDI_ASSERT_IS_INTEGER_TYPE(Int);
+  }
+ private:
+  static const int kPrime = 7853;
+};
+
+/// A hashing function-object for pairs of ints
+template<typename Int>
+struct PairHasher { // hashing function for pair<int>
+  size_t operator()(const std::pair<Int,Int> &x) const {
+    return x.first + x.second * kPrime;
+  }
+  PairHasher() {  // Check we're instantiated with an integer type.
+    KALDI_ASSERT_IS_INTEGER_TYPE(Int);
+  }
+ private:
+  static const int kPrime = 7853;
+};
+
+
+/// A hashing function object for strings.
+struct StringHasher {  // hashing function for std::string
+  size_t operator()(const std::string &str) const {
+    size_t ans = 0, len = str.length();
+    const char *c = str.c_str(), *end = c + len;
+    for (; c != end; c++) {
+      ans *= kPrime;
+      ans += *c;
+    }
+    return ans;
+  }
+ private:
+  static const int kPrime = 7853;
+};
+
+/// Reverses the contents of a vector.
+template<typename T>
+inline void ReverseVector(std::vector<T> *vec) {
+  KALDI_ASSERT(vec != NULL);
+  size_t sz = vec->size();
+  for (size_t i = 0; i < sz/2; i++)
+    std::swap( (*vec)[i], (*vec)[sz-1-i]);
+}
+
+
+/// Comparator object for pairs that compares only the first pair.
+template<class A, class B>
+struct CompareFirstMemberOfPair {
+  inline bool operator() (const std::pair<A, B> &p1,
+                          const std::pair<A, B> &p2) {
+    return p1.first < p2.first;
+  }
+};
+
+/// For a vector of pair<I, F> where I is an integer and F a floating-point or
+/// integer type, this function sorts a vector of type vector<pair<I, F> > on
+/// the I value and then merges elements with equal I values, summing these over
+/// the F component and then removing any F component with zero value.  This
+/// is for where the vector of pairs represents a map from the integer to float
+/// component, with an "adding" type of semantics for combining the elements.
+template<typename I, typename F>
+inline void MergePairVectorSumming(std::vector<std::pair<I, F> > *vec) {
+  KALDI_ASSERT_IS_INTEGER_TYPE(I);
+  CompareFirstMemberOfPair<I, F> c;
+  std::sort(vec->begin(), vec->end(), c); // sort on 1st element. 
+  typename std::vector<std::pair<I, F> >::iterator out = vec->begin(),
+      in = vec->begin(), end = vec->end();
+  while (in < end) {
+    // We reach this point only at the first element of
+    // each stretch of identical .first elements.
+    *out = *in;
+    ++in;
+    while (in < end && in->first == out->first) {
+      out->second += in->second; // this is the merge operation.
+      ++in;
+    }
+    if (out->second != static_cast<F>(0)) // Don't keep zero elements.
+      out++;
+  }
+  vec->erase(out, end);
+}
+
+}  // namespace kaldi
+
+#endif  // KALDI_UTIL_STL_UTILS_H_
+
diff --git a/kaldi_io/src/kaldi/util/table-types.h b/kaldi_io/src/kaldi/util/table-types.h
new file mode 100644
index 0000000..313d1aa
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/table-types.h
@@ -0,0 +1,137 @@
+// util/table-types.h
+
+// Copyright 2009-2011     Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#ifndef KALDI_UTIL_TABLE_TYPES_H_
+#define KALDI_UTIL_TABLE_TYPES_H_
+#include "base/kaldi-common.h"
+#include "util/kaldi-table.h"
+#include "util/kaldi-holder.h"
+#include "matrix/matrix-lib.h"
+
+namespace kaldi {
+
+// This header defines typedefs that are specific instantiations of
+// the Table types.
+
+/// \addtogroup table_types
+/// @{
+
+typedef TableWriter<KaldiObjectHolder<Matrix<BaseFloat> > >  BaseFloatMatrixWriter;
+typedef SequentialTableReader<KaldiObjectHolder<Matrix<BaseFloat> > >  SequentialBaseFloatMatrixReader;
+typedef RandomAccessTableReader<KaldiObjectHolder<Matrix<BaseFloat> > >  RandomAccessBaseFloatMatrixReader;
+typedef RandomAccessTableReaderMapped<KaldiObjectHolder<Matrix<BaseFloat> > >  RandomAccessBaseFloatMatrixReaderMapped;
+
+typedef TableWriter<KaldiObjectHolder<Matrix<double> > >  DoubleMatrixWriter;
+typedef SequentialTableReader<KaldiObjectHolder<Matrix<double> > >  SequentialDoubleMatrixReader;
+typedef RandomAccessTableReader<KaldiObjectHolder<Matrix<double> > >  RandomAccessDoubleMatrixReader;
+typedef RandomAccessTableReaderMapped<KaldiObjectHolder<Matrix<double> > >  RandomAccessDoubleMatrixReaderMapped;
+
+typedef TableWriter<KaldiObjectHolder<CompressedMatrix> >  CompressedMatrixWriter;
+
+typedef TableWriter<KaldiObjectHolder<Vector<BaseFloat> > >  BaseFloatVectorWriter;
+typedef SequentialTableReader<KaldiObjectHolder<Vector<BaseFloat> > >  SequentialBaseFloatVectorReader;
+typedef RandomAccessTableReader<KaldiObjectHolder<Vector<BaseFloat> > >  RandomAccessBaseFloatVectorReader;
+typedef RandomAccessTableReaderMapped<KaldiObjectHolder<Vector<BaseFloat> > >  RandomAccessBaseFloatVectorReaderMapped;
+
+typedef TableWriter<KaldiObjectHolder<Vector<double> > >  DoubleVectorWriter;
+typedef SequentialTableReader<KaldiObjectHolder<Vector<double> > >  SequentialDoubleVectorReader;
+typedef RandomAccessTableReader<KaldiObjectHolder<Vector<double> > >  RandomAccessDoubleVectorReader;
+
+typedef TableWriter<KaldiObjectHolder<CuMatrix<BaseFloat> > >  BaseFloatCuMatrixWriter;
+typedef SequentialTableReader<KaldiObjectHolder<CuMatrix<BaseFloat> > >  SequentialBaseFloatCuMatrixReader;
+typedef RandomAccessTableReader<KaldiObjectHolder<CuMatrix<BaseFloat> > >  RandomAccessBaseFloatCuMatrixReader;
+typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuMatrix<BaseFloat> > >  RandomAccessBaseFloatCuMatrixReaderMapped;
+
+typedef TableWriter<KaldiObjectHolder<CuMatrix<double> > >  DoubleCuMatrixWriter;
+typedef SequentialTableReader<KaldiObjectHolder<CuMatrix<double> > >  SequentialDoubleCuMatrixReader;
+typedef RandomAccessTableReader<KaldiObjectHolder<CuMatrix<double> > >  RandomAccessDoubleCuMatrixReader;
+typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuMatrix<double> > >  RandomAccessDoubleCuMatrixReaderMapped;
+
+typedef TableWriter<KaldiObjectHolder<CuVector<BaseFloat> > >  BaseFloatCuVectorWriter;
+typedef SequentialTableReader<KaldiObjectHolder<CuVector<BaseFloat> > >  SequentialBaseFloatCuVectorReader;
+typedef RandomAccessTableReader<KaldiObjectHolder<CuVector<BaseFloat> > >  RandomAccessBaseFloatCuVectorReader;
+typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuVector<BaseFloat> > >  RandomAccessBaseFloatCuVectorReaderMapped;
+
+typedef TableWriter<KaldiObjectHolder<CuVector<double> > >  DoubleCuVectorWriter;
+typedef SequentialTableReader<KaldiObjectHolder<CuVector<double> > >  SequentialDoubleCuVectorReader;
+typedef RandomAccessTableReader<KaldiObjectHolder<CuVector<double> > >  RandomAccessDoubleCuVectorReader;
+
+
+typedef TableWriter<BasicHolder<int32> >  Int32Writer;
+typedef SequentialTableReader<BasicHolder<int32> >  SequentialInt32Reader;
+typedef RandomAccessTableReader<BasicHolder<int32> >  RandomAccessInt32Reader;
+
+typedef TableWriter<BasicVectorHolder<int32> >  Int32VectorWriter;
+typedef SequentialTableReader<BasicVectorHolder<int32> >  SequentialInt32VectorReader;
+typedef RandomAccessTableReader<BasicVectorHolder<int32> >  RandomAccessInt32VectorReader;
+
+typedef TableWriter<BasicVectorVectorHolder<int32> >  Int32VectorVectorWriter;
+typedef SequentialTableReader<BasicVectorVectorHolder<int32> >  SequentialInt32VectorVectorReader;
+typedef RandomAccessTableReader<BasicVectorVectorHolder<int32> >  RandomAccessInt32VectorVectorReader;
+
+typedef TableWriter<BasicPairVectorHolder<int32> >  Int32PairVectorWriter;
+typedef SequentialTableReader<BasicPairVectorHolder<int32> >  SequentialInt32PairVectorReader;
+typedef RandomAccessTableReader<BasicPairVectorHolder<int32> >  RandomAccessInt32PairVectorReader;
+
+typedef TableWriter<BasicPairVectorHolder<BaseFloat> >  BaseFloatPairVectorWriter;
+typedef SequentialTableReader<BasicPairVectorHolder<BaseFloat> >  SequentialBaseFloatPairVectorReader;
+typedef RandomAccessTableReader<BasicPairVectorHolder<BaseFloat> >  RandomAccessBaseFloatPairVectorReader;
+
+typedef TableWriter<BasicHolder<BaseFloat> >  BaseFloatWriter;
+typedef SequentialTableReader<BasicHolder<BaseFloat> >  SequentialBaseFloatReader;
+typedef RandomAccessTableReader<BasicHolder<BaseFloat> >  RandomAccessBaseFloatReader;
+typedef RandomAccessTableReaderMapped<BasicHolder<BaseFloat> >  RandomAccessBaseFloatReaderMapped;
+
+typedef TableWriter<BasicHolder<double> >  DoubleWriter;
+typedef SequentialTableReader<BasicHolder<double> >  SequentialDoubleReader;
+typedef RandomAccessTableReader<BasicHolder<double> >  RandomAccessDoubleReader;
+
+typedef TableWriter<BasicHolder<bool> >  BoolWriter;
+typedef SequentialTableReader<BasicHolder<bool> >  SequentialBoolReader;
+typedef RandomAccessTableReader<BasicHolder<bool> >  RandomAccessBoolReader;
+
+
+
+/// TokenWriter is a writer specialized for std::string where the strings
+/// are nonempty and whitespace-free.   T == std::string
+typedef TableWriter<TokenHolder> TokenWriter;
+typedef SequentialTableReader<TokenHolder> SequentialTokenReader;
+typedef RandomAccessTableReader<TokenHolder> RandomAccessTokenReader;
+
+
+/// TokenVectorWriter is a writer specialized for sequences of
+/// std::string where the strings are nonempty and whitespace-free.
+/// T == std::vector<std::string>
+typedef TableWriter<TokenVectorHolder> TokenVectorWriter;
+// Ditto for SequentialTokenVectorReader.
+typedef SequentialTableReader<TokenVectorHolder> SequentialTokenVectorReader;
+typedef RandomAccessTableReader<TokenVectorHolder> RandomAccessTokenVectorReader;
+
+
+/// @}
+
+// Note: for FST reader/writer, see ../fstext/fstext-utils.h
+// [not done yet].
+
+} // end namespace kaldi
+
+
+
+#endif
diff --git a/kaldi_io/src/kaldi/util/text-utils.h b/kaldi_io/src/kaldi/util/text-utils.h
new file mode 100644
index 0000000..1d85c47
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/text-utils.h
@@ -0,0 +1,169 @@
+// util/text-utils.h
+
+// Copyright 2009-2011  Saarland University;  Microsoft Corporation
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_UTIL_TEXT_UTILS_H_
+#define KALDI_UTIL_TEXT_UTILS_H_
+
+#include <algorithm>
+#include <map>
+#include <set>
+#include <string>
+#include <vector>
+#include <errno.h>
+
+#include "base/kaldi-common.h"
+
+namespace kaldi {
+
+/// Split a string using any of the single character delimiters.
+/// If omit_empty_strings == true, the output will contain any
+/// nonempty strings after splitting on any of the
+/// characters in the delimiter.  If omit_empty_strings == false,
+/// the output will contain n+1 strings if there are n characters
+/// in the set "delim" within the input string.  In this case
+/// the empty string is split to a single empty string.
+void SplitStringToVector(const std::string &full, const char *delim,
+                         bool omit_empty_strings,
+                         std::vector<std::string> *out);
+
+/// Joins the elements of a vector of strings into a single string using
+/// "delim" as the delimiter. If omit_empty_strings == true, any empty strings
+/// in the vector are skipped. A vector of empty strings results in an empty
+/// string on the output.
+void JoinVectorToString(const std::vector<std::string> &vec_in,
+                        const char *delim, bool omit_empty_strings,
+                        std::string *str_out);
+
+
+/// Split a string (e.g. 1:2:3) into a vector of integers.
+/// The delimiting char may be any character in "delim".
+/// returns true on success, false on failure.
+/// If omit_empty_strings == true, 1::2:3: will become
+/// { 1, 2, 3 }.  Otherwise it would be rejected.
+/// Regardless of the value of omit_empty_strings,
+/// the empty string is successfully parsed as an empty
+/// vector of integers
+template<class I>
+bool SplitStringToIntegers(const std::string &full,
+                           const char *delim,
+                           bool omit_empty_strings,  // typically false [but
+                                                     // should probably be true
+                                                     // if "delim" is spaces].
+                           std::vector<I> *out) {
+  KALDI_ASSERT(out != NULL);
+  KALDI_ASSERT_IS_INTEGER_TYPE(I);
+  if ( *(full.c_str()) == '\0') {
+    out->clear();
+    return true;
+  }
+  std::vector<std::string> split;
+  SplitStringToVector(full, delim, omit_empty_strings, &split);
+  out->resize(split.size());
+  for (size_t i = 0; i < split.size(); i++) {
+    const char *this_str = split[i].c_str();
+    char *end = NULL;
+    long long int j = 0;
+    j = KALDI_STRTOLL(this_str, &end);
+    if (end == this_str || *end != '\0') {
+      out->clear();
+      return false;
+    } else {
+      I jI = static_cast<I>(j);
+      if (static_cast<long long int>(jI) != j) {
+        // output type cannot fit this integer.
+        out->clear();
+        return false;
+      }
+      (*out)[i] = jI;
+    }
+  }
+  return true;
+}
+
+// This is defined for F = float and double.
+template<class F>
+bool SplitStringToFloats(const std::string &full,
+                         const char *delim,
+                         bool omit_empty_strings, // typically false
+                         std::vector<F> *out);
+
+
+/// Converts a string into an integer via strtoll and returns false if there was
+/// any kind of problem (i.e. the string was not an integer or contained extra
+/// non-whitespace junk, or the integer was too large to fit into the type it is
+/// being converted into).  Only sets *out if everything was OK and it returns
+/// true.
+template<class Int>
+bool ConvertStringToInteger(const std::string &str,
+                            Int *out) {
+  KALDI_ASSERT_IS_INTEGER_TYPE(Int);
+  const char *this_str = str.c_str();
+  char *end = NULL;
+  errno = 0;
+  long long int i = KALDI_STRTOLL(this_str, &end);
+  if (end != this_str)
+    while (isspace(*end)) end++;
+  if (end == this_str || *end != '\0' || errno != 0)
+    return false;
+  Int iInt = static_cast<Int>(i);
+  if (static_cast<long long int>(iInt) != i || (i<0 && !std::numeric_limits<Int>::is_signed)) {
+    return false;
+  }
+  *out = iInt;
+  return true;
+}
+
+
+/// ConvertStringToReal converts a string into either float or double via strtod,
+/// and returns false if there was any kind of problem (i.e. the string was not a
+/// floating point number or contained extra non-whitespace junk.
+/// Be careful- this function will successfully read inf's or nan's.
+bool ConvertStringToReal(const std::string &str,
+                         double *out);
+bool ConvertStringToReal(const std::string &str,
+                         float *out);
+
+
+/// Removes the beginning and trailing whitespaces from a string
+void Trim(std::string *str);
+
+
+/// Removes leading and trailing white space from the string, then splits on the
+/// first section of whitespace found (if present), putting the part before the
+/// whitespace in "first" and the rest in "rest".  If there is no such space,
+/// everything that remains after removing leading and trailing whitespace goes
+/// in "first".
+void SplitStringOnFirstSpace(const std::string &line,
+                             std::string *first,
+                             std::string *rest);
+
+
+/// Returns true if "token" is nonempty, and all characters are
+/// printable and whitespace-free.
+bool IsToken(const std::string &token);
+
+
+/// Returns true if "line" is free of \n characters and unprintable
+/// characters, and does not contain leading or trailing whitespace.
+bool IsLine(const std::string &line);
+
+
+}  // namespace kaldi
+
+#endif  // KALDI_UTIL_TEXT_UTILS_H_
diff --git a/kaldi_io/src/kaldi/util/timer.h b/kaldi_io/src/kaldi/util/timer.h
new file mode 100644
index 0000000..e3ee8d5
--- /dev/null
+++ b/kaldi_io/src/kaldi/util/timer.h
@@ -0,0 +1,27 @@
+// util/timer.h
+
+// Copyright 2014  Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+//  http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+// We are temporarily leaving this file to forward #includes to
+// base-timer.h.  Its use is deprecated; you should directrly
+// #include base/timer.h
+#ifndef KALDI_UTIL_TIMER_H_
+#define KALDI_UTIL_TIMER_H_
+#pragma message warning: please do not include util/timer.h, include base/timer.h (it has been moved)
+#include "base/timer.h"
+#endif