summaryrefslogtreecommitdiff
path: root/kaldi_io/src/kaldi
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/kaldi')
-rw-r--r--kaldi_io/src/kaldi/base/io-funcs-inl.h219
-rw-r--r--kaldi_io/src/kaldi/base/io-funcs.h231
-rw-r--r--kaldi_io/src/kaldi/base/kaldi-common.h41
-rw-r--r--kaldi_io/src/kaldi/base/kaldi-error.h153
-rw-r--r--kaldi_io/src/kaldi/base/kaldi-math.h346
-rw-r--r--kaldi_io/src/kaldi/base/kaldi-types.h64
-rw-r--r--kaldi_io/src/kaldi/base/kaldi-utils.h157
-rw-r--r--kaldi_io/src/kaldi/base/timer.h83
-rw-r--r--kaldi_io/src/kaldi/hmm/hmm-topology.h172
-rw-r--r--kaldi_io/src/kaldi/hmm/hmm-utils.h295
-rw-r--r--kaldi_io/src/kaldi/hmm/posterior.h214
-rw-r--r--kaldi_io/src/kaldi/hmm/transition-model.h345
-rw-r--r--kaldi_io/src/kaldi/hmm/tree-accu.h69
-rw-r--r--kaldi_io/src/kaldi/itf/clusterable-itf.h97
-rw-r--r--kaldi_io/src/kaldi/itf/context-dep-itf.h80
-rw-r--r--kaldi_io/src/kaldi/itf/decodable-itf.h123
-rw-r--r--kaldi_io/src/kaldi/itf/online-feature-itf.h105
-rw-r--r--kaldi_io/src/kaldi/itf/optimizable-itf.h51
-rw-r--r--kaldi_io/src/kaldi/itf/options-itf.h49
-rw-r--r--kaldi_io/src/kaldi/matrix/cblas-wrappers.h491
-rw-r--r--kaldi_io/src/kaldi/matrix/compressed-matrix.h179
-rw-r--r--kaldi_io/src/kaldi/matrix/jama-eig.h924
-rw-r--r--kaldi_io/src/kaldi/matrix/jama-svd.h531
-rw-r--r--kaldi_io/src/kaldi/matrix/kaldi-blas.h132
-rw-r--r--kaldi_io/src/kaldi/matrix/kaldi-gpsr.h166
-rw-r--r--kaldi_io/src/kaldi/matrix/kaldi-matrix-inl.h62
-rw-r--r--kaldi_io/src/kaldi/matrix/kaldi-matrix.h983
-rw-r--r--kaldi_io/src/kaldi/matrix/kaldi-vector-inl.h58
-rw-r--r--kaldi_io/src/kaldi/matrix/kaldi-vector.h585
-rw-r--r--kaldi_io/src/kaldi/matrix/matrix-common.h100
-rw-r--r--kaldi_io/src/kaldi/matrix/matrix-functions-inl.h56
-rw-r--r--kaldi_io/src/kaldi/matrix/matrix-functions.h235
-rw-r--r--kaldi_io/src/kaldi/matrix/matrix-lib.h37
-rw-r--r--kaldi_io/src/kaldi/matrix/optimization.h248
-rw-r--r--kaldi_io/src/kaldi/matrix/packed-matrix.h197
-rw-r--r--kaldi_io/src/kaldi/matrix/sp-matrix-inl.h42
-rw-r--r--kaldi_io/src/kaldi/matrix/sp-matrix.h524
-rw-r--r--kaldi_io/src/kaldi/matrix/srfft.h132
-rw-r--r--kaldi_io/src/kaldi/matrix/tp-matrix.h131
-rw-r--r--kaldi_io/src/kaldi/tree/build-tree-questions.h133
-rw-r--r--kaldi_io/src/kaldi/tree/build-tree-utils.h324
-rw-r--r--kaldi_io/src/kaldi/tree/build-tree.h250
-rw-r--r--kaldi_io/src/kaldi/tree/cluster-utils.h291
-rw-r--r--kaldi_io/src/kaldi/tree/clusterable-classes.h158
-rw-r--r--kaldi_io/src/kaldi/tree/context-dep.h166
-rw-r--r--kaldi_io/src/kaldi/tree/event-map.h365
-rw-r--r--kaldi_io/src/kaldi/tree/tree-renderer.h84
-rw-r--r--kaldi_io/src/kaldi/util/basic-filebuf.h1065
-rw-r--r--kaldi_io/src/kaldi/util/common-utils.h31
-rw-r--r--kaldi_io/src/kaldi/util/const-integer-set-inl.h88
-rw-r--r--kaldi_io/src/kaldi/util/const-integer-set.h95
-rw-r--r--kaldi_io/src/kaldi/util/edit-distance-inl.h189
-rw-r--r--kaldi_io/src/kaldi/util/edit-distance.h63
-rw-r--r--kaldi_io/src/kaldi/util/hash-list-inl.h183
-rw-r--r--kaldi_io/src/kaldi/util/hash-list.h140
-rw-r--r--kaldi_io/src/kaldi/util/kaldi-holder-inl.h800
-rw-r--r--kaldi_io/src/kaldi/util/kaldi-holder.h207
-rw-r--r--kaldi_io/src/kaldi/util/kaldi-io-inl.h45
-rw-r--r--kaldi_io/src/kaldi/util/kaldi-io.h264
-rw-r--r--kaldi_io/src/kaldi/util/kaldi-pipebuf.h90
-rw-r--r--kaldi_io/src/kaldi/util/kaldi-table-inl.h2246
-rw-r--r--kaldi_io/src/kaldi/util/kaldi-table.h459
-rw-r--r--kaldi_io/src/kaldi/util/parse-options.h264
-rw-r--r--kaldi_io/src/kaldi/util/simple-io-funcs.h56
-rw-r--r--kaldi_io/src/kaldi/util/simple-options.h112
-rw-r--r--kaldi_io/src/kaldi/util/stl-utils.h327
-rw-r--r--kaldi_io/src/kaldi/util/table-types.h137
-rw-r--r--kaldi_io/src/kaldi/util/text-utils.h169
-rw-r--r--kaldi_io/src/kaldi/util/timer.h27
69 files changed, 17535 insertions, 0 deletions
diff --git a/kaldi_io/src/kaldi/base/io-funcs-inl.h b/kaldi_io/src/kaldi/base/io-funcs-inl.h
new file mode 100644
index 0000000..e55458e
--- /dev/null
+++ b/kaldi_io/src/kaldi/base/io-funcs-inl.h
@@ -0,0 +1,219 @@
+// base/io-funcs-inl.h
+
+// Copyright 2009-2011 Microsoft Corporation; Saarland University;
+// Jan Silovsky; Yanmin Qian; Johns Hopkins University (Author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+// http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_BASE_IO_FUNCS_INL_H_
+#define KALDI_BASE_IO_FUNCS_INL_H_ 1
+
+// Do not include this file directly. It is included by base/io-funcs.h
+
+#include <limits>
+#include <vector>
+
+namespace kaldi {
+
+// Template that covers integers.
+template<class T> void WriteBasicType(std::ostream &os,
+ bool binary, T t) {
+ // Compile time assertion that this is not called with a wrong type.
+ KALDI_ASSERT_IS_INTEGER_TYPE(T);
+ if (binary) {
+ char len_c = (std::numeric_limits<T>::is_signed ? 1 : -1)
+ * static_cast<char>(sizeof(t));
+ os.put(len_c);
+ os.write(reinterpret_cast<const char *>(&t), sizeof(t));
+ } else {
+ if (sizeof(t) == 1)
+ os << static_cast<int16>(t) << " ";
+ else
+ os << t << " ";
+ }
+ if (os.fail()) {
+ throw std::runtime_error("Write failure in WriteBasicType.");
+ }
+}
+
+// Template that covers integers.
+template<class T> inline void ReadBasicType(std::istream &is,
+ bool binary, T *t) {
+ KALDI_PARANOID_ASSERT(t != NULL);
+ // Compile time assertion that this is not called with a wrong type.
+ KALDI_ASSERT_IS_INTEGER_TYPE(T);
+ if (binary) {
+ int len_c_in = is.get();
+ if (len_c_in == -1)
+ KALDI_ERR << "ReadBasicType: encountered end of stream.";
+ char len_c = static_cast<char>(len_c_in), len_c_expected
+ = (std::numeric_limits<T>::is_signed ? 1 : -1)
+ * static_cast<char>(sizeof(*t));
+
+ if (len_c != len_c_expected) {
+ KALDI_ERR << "ReadBasicType: did not get expected integer type, "
+ << static_cast<int>(len_c)
+ << " vs. " << static_cast<int>(len_c_expected)
+ << ". You can change this code to successfully"
+ << " read it later, if needed.";
+ // insert code here to read "wrong" type. Might have a switch statement.
+ }
+ is.read(reinterpret_cast<char *>(t), sizeof(*t));
+ } else {
+ if (sizeof(*t) == 1) {
+ int16 i;
+ is >> i;
+ *t = i;
+ } else {
+ is >> *t;
+ }
+ }
+ if (is.fail()) {
+ KALDI_ERR << "Read failure in ReadBasicType, file position is "
+ << is.tellg() << ", next char is " << is.peek();
+ }
+}
+
+
+template<class T> inline void WriteIntegerVector(std::ostream &os, bool binary,
+ const std::vector<T> &v) {
+ // Compile time assertion that this is not called with a wrong type.
+ KALDI_ASSERT_IS_INTEGER_TYPE(T);
+ if (binary) {
+ char sz = sizeof(T); // this is currently just a check.
+ os.write(&sz, 1);
+ int32 vecsz = static_cast<int32>(v.size());
+ KALDI_ASSERT((size_t)vecsz == v.size());
+ os.write(reinterpret_cast<const char *>(&vecsz), sizeof(vecsz));
+ if (vecsz != 0) {
+ os.write(reinterpret_cast<const char *>(&(v[0])), sizeof(T)*vecsz);
+ }
+ } else {
+ // focus here is on prettiness of text form rather than
+ // efficiency of reading-in.
+ // reading-in is dominated by low-level operations anyway:
+ // for efficiency use binary.
+ os << "[ ";
+ typename std::vector<T>::const_iterator iter = v.begin(), end = v.end();
+ for (; iter != end; ++iter) {
+ if (sizeof(T) == 1)
+ os << static_cast<int16>(*iter) << " ";
+ else
+ os << *iter << " ";
+ }
+ os << "]\n";
+ }
+ if (os.fail()) {
+ throw std::runtime_error("Write failure in WriteIntegerType.");
+ }
+}
+
+
+template<class T> inline void ReadIntegerVector(std::istream &is,
+ bool binary,
+ std::vector<T> *v) {
+ KALDI_ASSERT_IS_INTEGER_TYPE(T);
+ KALDI_ASSERT(v != NULL);
+ if (binary) {
+ int sz = is.peek();
+ if (sz == sizeof(T)) {
+ is.get();
+ } else { // this is currently just a check.
+ KALDI_ERR << "ReadIntegerVector: expected to see type of size "
+ << sizeof(T) << ", saw instead " << sz << ", at file position "
+ << is.tellg();
+ }
+ int32 vecsz;
+ is.read(reinterpret_cast<char *>(&vecsz), sizeof(vecsz));
+ if (is.fail() || vecsz < 0) goto bad;
+ v->resize(vecsz);
+ if (vecsz > 0) {
+ is.read(reinterpret_cast<char *>(&((*v)[0])), sizeof(T)*vecsz);
+ }
+ } else {
+ std::vector<T> tmp_v; // use temporary so v doesn't use extra memory
+ // due to resizing.
+ is >> std::ws;
+ if (is.peek() != static_cast<int>('[')) {
+ KALDI_ERR << "ReadIntegerVector: expected to see [, saw "
+ << is.peek() << ", at file position " << is.tellg();
+ }
+ is.get(); // consume the '['.
+ is >> std::ws; // consume whitespace.
+ while (is.peek() != static_cast<int>(']')) {
+ if (sizeof(T) == 1) { // read/write chars as numbers.
+ int16 next_t;
+ is >> next_t >> std::ws;
+ if (is.fail()) goto bad;
+ else
+ tmp_v.push_back((T)next_t);
+ } else {
+ T next_t;
+ is >> next_t >> std::ws;
+ if (is.fail()) goto bad;
+ else
+ tmp_v.push_back(next_t);
+ }
+ }
+ is.get(); // get the final ']'.
+ *v = tmp_v; // could use std::swap to use less temporary memory, but this
+ // uses less permanent memory.
+ }
+ if (!is.fail()) return;
+ bad:
+ KALDI_ERR << "ReadIntegerVector: read failure at file position "
+ << is.tellg();
+}
+
+// Initialize an opened stream for writing by writing an optional binary
+// header and modifying the floating-point precision.
+inline void InitKaldiOutputStream(std::ostream &os, bool binary) {
+ // This does not throw exceptions (does not check for errors).
+ if (binary) {
+ os.put('\0');
+ os.put('B');
+ }
+ // Note, in non-binary mode we may at some point want to mess with
+ // the precision a bit.
+ // 7 is a bit more than the precision of float..
+ if (os.precision() < 7)
+ os.precision(7);
+}
+
+/// Initialize an opened stream for reading by detecting the binary header and
+// setting the "binary" value appropriately.
+inline bool InitKaldiInputStream(std::istream &is, bool *binary) {
+ // Sets the 'binary' variable.
+ // Throws exception in the very unusual situation that stream
+ // starts with '\0' but not then 'B'.
+
+ if (is.peek() == '\0') { // seems to be binary
+ is.get();
+ if (is.peek() != 'B') {
+ return false;
+ }
+ is.get();
+ *binary = true;
+ return true;
+ } else {
+ *binary = false;
+ return true;
+ }
+}
+
+} // end namespace kaldi.
+
+#endif // KALDI_BASE_IO_FUNCS_INL_H_
diff --git a/kaldi_io/src/kaldi/base/io-funcs.h b/kaldi_io/src/kaldi/base/io-funcs.h
new file mode 100644
index 0000000..2bc9da8
--- /dev/null
+++ b/kaldi_io/src/kaldi/base/io-funcs.h
@@ -0,0 +1,231 @@
+// base/io-funcs.h
+
+// Copyright 2009-2011 Microsoft Corporation; Saarland University;
+// Jan Silovsky; Yanmin Qian
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+
+// http://www.apache.org/licenses/LICENSE-2.0
+
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef KALDI_BASE_IO_FUNCS_H_
+#define KALDI_BASE_IO_FUNCS_H_
+
+// This header only contains some relatively low-level I/O functions.
+// The full Kaldi I/O declarations are in ../util/kaldi-io.h
+// and ../util/kaldi-table.h
+// They were put in util/ in order to avoid making the Matrix library
+// dependent on them.
+
+#include <cctype>
+#include <vector>
+#include <string>
+#include "base/kaldi-common.h"
+
+namespace kaldi {
+
+
+
+/*
+ This comment describes the Kaldi approach to I/O. All objects can be written
+ and read in two modes: binary and text. In addition we want to make the I/O
+ work if we redefine the typedef "BaseFloat" between floats and doubles.
+ We also want to have control over whitespace in text mode without affecting
+ the meaning of the file, for pretty-printing purposes.
+
+ Errors are handled by throwing an exception (std::runtime_error).
+
+ For integer and floating-point types (and boolean values):
+
+ WriteBasicType(std::ostream &, bool binary, const T&);
+ ReadBasicType(std::istream &, bool binary, T*);
+
+ and we expect these functions to be defined in such a way that they work when
+ the type T changes between float and double, so you can read float into double
+ and vice versa]. Note that for efficiency and space-saving reasons, the Vector
+ and Matrix classes do not use these functions [but they preserve the type
+ interchangeability in their own way]
+
+ For a class (or struct) C:
+ class C {
+ ..
+ Write(std::ostream &, bool binary, [possibly extra optional args for specific classes]) const;
+ Read(std::istream &, bool binary, [possibly extra optional args for specific classes]);
+ ..
+ }
+ NOTE: The only actual optional args we used are the "add" arguments in
+ Vector/Matrix classes, which specify whether we should sum the data already
+ in the class with the data being read.
+
+ For types which are typedef's involving stl classes, I/O is as follows:
+ typedef std::vector<std::pair<A, B> > MyTypedefName;
+
+ The user should define something like:
+
+ WriteMyTypedefName(std::ostream &, bool binary, const MyTypedefName &t);
+ ReadMyTypedefName(std::ostream &, bool binary, MyTypedefName *t);
+
+ The user would have to write these functions.
+
+ For a type std::vector<T>:
+
+ void WriteIntegerVector(std::ostream &os, bool binary, const std::vector<T> &v);
+ void ReadIntegerVector(std::istream &is, bool binary, std::vector<T> *v);
+
+ For other types, e.g. vectors of pairs, the user should create a routine of the
+ type WriteMyTypedefName. This is to avoid introducing confusing templated functions;
+ we could easily create templated functions to handle most of these cases but they
+ would have to share the same name.
+
+ It also often happens that the user needs to write/read special tokens as part
+ of a file. These might be class headers, or separators/identifiers in the class.
+ We provide special functions for manipulating these. These special tokens must
+ be nonempty and must not contain any whitespace.
+
+ void WriteToken(std::ostream &os, bool binary, const char*);
+ void WriteToken(std::ostream &os, bool binary, const std::string & token);
+ int Peek(std::istream &is, bool binary);
+ void ReadToken(std::istream &is, bool binary, std::string *str);
+ void PeekToken(std::istream &is, bool binary, std::string *str);
+
+
+ WriteToken writes the token and one space (whether in binary or text mode).
+
+ Peek returns the first character of the next token, by consuming whitespace
+ (in text mode) and then returning the peek() character. It returns -1 at EOF;
+ it doesn't throw. It's useful if a class can have various forms based on
+ typedefs and virtual classes, and wants to know which version to read.
+
+ ReadToken allow the caller to obtain the next token. PeekToken works just
+ like ReadToken, but seeks back to the beginning of the token. A subsequent
+ call to ReadToken will read the same token again. This is useful when
+ different object types are written to the same file; using PeekToken one can
+ decide which of the objects to read.
+
+ There is currently no special functionality for writing/reading strings (where the strings
+ contain data rather than "special tokens" that are whitespace-free and nonempty). This is
+ because Kaldi is structured in such a way that strings don't appear, except as OpenFst symbol
+ table entries (and these have their own format).
+
+
+ NOTE: you should not call ReadIntegerType and WriteIntegerType with types,
+ such as int and size_t, that are machine-independent -- at least not
+ if you want your file formats to port between machines. Use int32 and
+ int64 where necessary. There is no way to detect this using compile-time
+ assertions because C++ only keeps track of the internal representation of
+ the type.
+*/
+
+/// \addtogroup io_funcs_basic
+/// @{
+
+
+/// WriteBasicType is the name of the write function for bool, integer types,
+/// and floating-point types. They all throw on error.
+template<class T> void WriteBasicType(std::ostream &os, bool binary, T t);
+
+/// ReadBasicType is the name of the read function for bool, integer types,
+/// and floating-point types. They all throw on error.
+template<class T> void ReadBasicType(std::istream &is, bool binary, T *t);
+
+
+// Declare specialization for bool.
+template<>
+void WriteBasicType<bool>(std::ostream &os, bool binary, bool b);
+
+template <>
+void ReadBasicType<bool>(std::istream &is, bool binary, bool *b);
+
+// Declare specializations for float and double.
+template<>
+void WriteBasicType<float>(std::ostream &os, bool binary, float f);
+
+template<>
+void WriteBasicType<double>(std::ostream &os, bool binary, double f);
+
+template<>
+void ReadBasicType<float>(std::istream &is, bool binary, float *f);
+
+template<>
+void ReadBasicType<double>(std::istream &is, bool binary, double *f);
+
+// Define ReadBasicType that accepts an "add" parameter to add to
+// the destination. Caution: if used in Read functions, be careful
+// to initialize the parameters concerned to zero in the default
+// constructor.
+template<class T>
+inline void ReadBasicType(std::istream &is, bool binary, T *t, bool add) {
+ if (!add) {
+ ReadBasicType(is, binary, t);
+ } else {
+ T tmp = T(0);
+ ReadBasicType(is, binary, &tmp);
+ *t += tmp;
+ }
+}
+
+/// Function for writing STL vectors of integer types.
+template<class T> inline void WriteIntegerVector(std::ostream &os, bool binary,
+ const std::vector<T> &v);
+
+/// Function for reading STL vector of integer types.
+template<class T> inline void ReadIntegerVector(std::istream &is, bool binary,
+ std::vector<T> *v);
+
+/// The WriteToken functions are for writing nonempty sequences of non-space
+/// characters. They are not for general strings.
+void WriteToken(std::ostream &os, bool binary, const char *token);
+void WriteToken(std::ostream &os, bool binary, const std::string & token);
+
+/// Peek consumes whitespace (if binary == false) and then returns the peek()
+/// value of the stream.
+int Peek(std::istream &is, bool binary);
+
+/// ReadToken gets the next token and puts it in str (exception on failure).
+void ReadToken(std::istream &is, bool binar