diff options
author | Determinant <ted.sybil@gmail.com> | 2015-05-29 23:06:58 +0800 |
---|---|---|
committer | Determinant <ted.sybil@gmail.com> | 2015-05-29 23:06:58 +0800 |
commit | 74b9f7cb88cd21cfac3c2e50c8efb802485df0c5 (patch) | |
tree | bd6e583088a086144acc2d8af3eaca59691194ff /tnet_io/KaldiLib |
init
Diffstat (limited to 'tnet_io/KaldiLib')
31 files changed, 11145 insertions, 0 deletions
diff --git a/tnet_io/KaldiLib/Common.cc b/tnet_io/KaldiLib/Common.cc new file mode 100644 index 0000000..40909ee --- /dev/null +++ b/tnet_io/KaldiLib/Common.cc @@ -0,0 +1,277 @@ +#include <string> +#include <stdexcept> +#include <cmath> +#include <cfloat> +#include <cstdio> + +#include "Common.h" +#include "MathAux.h" + + +/// Defines the white chars for string trimming +#if !defined(WHITE_CHARS) +# define WHITE_CHARS " \t" +#endif + +namespace TNet { + +#include <ios> + + // Allocating stream variable used by stream modifier MatrixVectorIostreamControl + const int MATRIX_IOS_FORMAT_IWORD = std::ios_base::xalloc(); + + //*************************************************************************** + //*************************************************************************** + int getHTKstr(char *str) + { + char termChar = '\0'; + char *chrptr = str; + + while (std::isspace(*chrptr)) ++chrptr; + + if (*chrptr == '\'' || *chrptr == '"') { + termChar = *chrptr; + chrptr++; + } + + for (; *chrptr; chrptr++) { + if (*chrptr == '\'' || *chrptr == '"') { + if (termChar == *chrptr) { + termChar = '\0'; + chrptr++; + break; + } + } + + if (std::isspace(*chrptr) && !termChar) { + break; + } + + if (*chrptr == '\\') { + ++chrptr; + if (*chrptr == '\0' || (*chrptr >= '0' && *chrptr <= '7' && + (*++chrptr < '0' || *chrptr > '7' || + *++chrptr < '0' || *chrptr > '7'))) { + return -1; + } + + if (*chrptr >= '0' && *chrptr <= '7') { + *chrptr = (char)((*chrptr - '0') + (chrptr[-1] - '0') * 8 + (chrptr[-2] - '0') * 64); + } + } + *str++ = *chrptr; + } + + if (termChar) { + return -2; + } + + *str = '\0'; + + return 0; + } + + + //***************************************************************************** + //***************************************************************************** + void + ParseHTKString(const std::string & rIn, std::string & rOut) + { + int ret_val; + + // the new string will be at most as long as the original, so we allocate + // space + char* new_str = new char[rIn.size() + 1]; + + char* p_htk_str = new_str; + + strcpy(p_htk_str, rIn.c_str()); + ret_val = getHTKstr(p_htk_str); + + // call the function + if (!ret_val) { + rOut = p_htk_str; + } + + delete [] new_str; + + if (ret_val) { + throw std::runtime_error("Error parsing HTK string"); + } + } + + + + //*************************************************************************** + //*************************************************************************** + bool + IsBigEndian() + { + int a = 1; + return (bool) ((char *) &a)[0] != 1; + } + + + //*************************************************************************** + //*************************************************************************** + void + MakeHtkFileName(char* pOutFileName, const char* inFileName, + const char* out_dir, const char* out_ext) + { + const char* base_name; + const char* bname_end = NULL; + const char* chrptr; + + // if (*inFileName == '*' && *++inFileName == '/') ++inFileName; + + // we don't do anything if file is stdin/out + if (!strcmp(inFileName, "-")) + { + pOutFileName[0] = '-'; + pOutFileName[1] = '\0'; + return; + } + + base_name = strrchr(inFileName, '/'); + base_name = base_name != NULL ? base_name + 1 : inFileName; + + if (out_ext) bname_end = strrchr(base_name, '.'); + if (!bname_end) bname_end = base_name + strlen(base_name); + + + if ((chrptr = strstr(inFileName, "/./")) != NULL) + { + // what is in path after /./ serve as base name + base_name = chrptr + 3; + } + /* else if (*inFileName != '/') + { + // if inFileName isn't absolut path, don't forget directory structure + base_name = inFileName; + }*/ + + *pOutFileName = '\0'; + if (out_dir) + { + if (*out_dir) + { + strcat(pOutFileName, out_dir); + strcat(pOutFileName, "/"); + } + strncat(pOutFileName, base_name, bname_end-base_name); + } + else + { + strncat(pOutFileName, inFileName, bname_end-inFileName); + } + + if (out_ext && *out_ext) + { + strcat(pOutFileName, "."); + strcat(pOutFileName, out_ext); + } + } + + + //**************************************************************************** + //**************************************************************************** + bool + CloseEnough(const float f1, const float f2, const float nRounds) + { + bool ret_val = (_ABS((f1 - f2) / (f2 == 0.0f ? 1.0f : f2)) + < (nRounds * FLT_EPSILON)); + + return ret_val; + } + + + //**************************************************************************** + //**************************************************************************** + bool + CloseEnough(const double f1, const double f2, const double nRounds) + { + bool ret_val = (_ABS((f1 - f2) / (f2 == 0.0 ? 1.0 : f2)) + < (nRounds * DBL_EPSILON)); + + return ret_val; + } + + + //**************************************************************************** + //**************************************************************************** + char* + ExpandHtkFilterCmd(const char *command, const char *filename, const char* pFilter) + { + + char *out, *outend; + const char *chrptr = command; + int ndollars = 0; + int fnlen = strlen(filename); + + while (*chrptr++) ndollars += (*chrptr == *pFilter); + + out = (char*) malloc(strlen(command) - ndollars + ndollars * fnlen + 1); + + outend = out; + + for (chrptr = command; *chrptr; chrptr++) { + if (*chrptr == *pFilter) { + strcpy(outend, filename); + outend += fnlen; + } else { + *outend++ = *chrptr; + } + } + *outend = '\0'; + return out; + } + + //*************************************************************************** + //*************************************************************************** + char * + StrToUpper(char *str) + { + char *chptr; + for (chptr = str; *chptr; chptr++) { + *chptr = (char)toupper(*chptr); + } + return str; + } + + + //**************************************************************************** + //**************************************************************************** + std::string& + Trim(std::string& rStr) + { + // WHITE_CHARS is defined in common.h + std::string::size_type pos = rStr.find_last_not_of(WHITE_CHARS); + if(pos != std::string::npos) + { + rStr.erase(pos + 1); + pos = rStr.find_first_not_of(WHITE_CHARS); + if(pos != std::string::npos) rStr.erase(0, pos); + } + else + rStr.erase(rStr.begin(), rStr.end()); + + return rStr; + } + + +} // namespace TNet + +//#ifdef CYGWIN + +void assertf(const char *c, int i, const char *msg){ + printf("Assertion \"%s\" failed: file \"%s\", line %d\n", msg?msg:"(null)", c?c:"(null)", i); + abort(); +} + + +void assertf_throw(const char *c, int i, const char *msg){ + char buf[2000]; + snprintf(buf, 1999, "Assertion \"%s\" failed, throwing exception: file \"%s\", line %d\n", msg?msg:"(null)", c?c:"(null)", i); + throw std::runtime_error((std::string)buf); +} +//#endif diff --git a/tnet_io/KaldiLib/Common.h b/tnet_io/KaldiLib/Common.h new file mode 100644 index 0000000..9cd9658 --- /dev/null +++ b/tnet_io/KaldiLib/Common.h @@ -0,0 +1,233 @@ +#ifndef TNet_Common_h +#define TNet_Common_h + +#include <cstdlib> +#include <string.h> // C string stuff like strcpy +#include <string> +#include <sstream> +#include <stdexcept> + +/* Alignment of critical dynamic data structure + * + * Not all platforms support memalign so we provide a stk_memalign wrapper + * void *stk_memalign( size_t align, size_t size, void **pp_orig ) + * *pp_orig is the pointer that has to be freed afterwards. + */ +#ifdef HAVE_POSIX_MEMALIGN +# define stk_memalign(align,size,pp_orig) \ + ( !posix_memalign( pp_orig, align, size ) ? *(pp_orig) : NULL ) +# ifdef STK_MEMALIGN_MANUAL +# undef STK_MEMALIGN_MANUAL +# endif +#elif defined(HAVE_MEMALIGN) + /* Some systems have memalign() but no declaration for it */ + //void * memalign( size_t align, size_t size ); +# define stk_memalign(align,size,pp_orig) \ + ( *(pp_orig) = memalign( align, size ) ) +# ifdef STK_MEMALIGN_MANUAL +# undef STK_MEMALIGN_MANUAL +# endif +#else /* We don't have any choice but to align manually */ +# define stk_memalign(align,size,pp_orig) \ + (( *(pp_orig) = malloc( size + align - 1 )) ? \ + (void *)( (((unsigned long)*(pp_orig)) + 15) & ~0xFUL ) : NULL ) +# define STK_MEMALIGN_MANUAL +#endif + + +#define swap8(a) { \ + char t=((char*)&a)[0]; ((char*)&a)[0]=((char*)&a)[7]; ((char*)&a)[7]=t;\ + t=((char*)&a)[1]; ((char*)&a)[1]=((char*)&a)[6]; ((char*)&a)[6]=t;\ + t=((char*)&a)[2]; ((char*)&a)[2]=((char*)&a)[5]; ((char*)&a)[5]=t;\ + t=((char*)&a)[3]; ((char*)&a)[3]=((char*)&a)[4]; ((char*)&a)[4]=t;} +#define swap4(a) { \ + char t=((char*)&a)[0]; ((char*)&a)[0]=((char*)&a)[3]; ((char*)&a)[3]=t;\ + t=((char*)&a)[1]; ((char*)&a)[1]=((char*)&a)[2]; ((char*)&a)[2]=t;} +#define swap2(a) { \ + char t=((char*)&a)[0]; ((char*)&a)[0]=((char*)&a)[1]; ((char*)&a)[1]=t;} + + +namespace TNet +{ + /** ************************************************************************** + ** ************************************************************************** + * @brief Aligns a number to a specified base + * @param n Number of type @c _T to align + * @return Aligned value of type @c _T + */ + template<size_t _align, typename _T> + inline _T + align(const _T n) + { + const _T x(_align - 1); + return (n + x) & ~(x); + } + + + /** + * @brief Returns true if architecture is big endian + */ + bool + IsBigEndian(); + + + /** + * @brief Returns true if two numbers are close enough to each other + * + * @param f1 First operand + * @param f2 Second operand + * @param nRounds Expected number of operations prior to this comparison + */ + bool + CloseEnough(const float f1, const float f2, const float nRounds); + + + /** + * @brief Returns true if two numbers are close enough to each other + * + * @param f1 First operand + * @param f2 Second operand + * @param nRounds Expected number of operations prior to this comparison + */ + bool + CloseEnough(const double f1, const double f2, const double nRounds); + + + /** + * @brief Parses a HTK-style string into a C++ std::string readable + * + * @param rIn HTK input string + * @param rOut output parsed string + */ + void + ParseHTKString(const std::string & rIn, std::string & rOut); + + + /** + * @brief Synthesize new file name based on name, path, and extension + * + * @param pOutFileName full ouptut file name + * @param pInFileName file name + * @param pOutDir directory + * @param pOutExt extension + */ + void + MakeHtkFileName(char *pOutFileName, const char* pInFileName, const char *pOutDir, + const char *pOutExt); + + + /** + * @brief Removes the leading and trailing white chars + * + * @param rStr Refference to the string to be processed + * @return Refference to the original string + * + * The white characters are determined by the @c WHITE_CHARS macro defined + * above. + */ + std::string& + Trim(std::string& rStr); + + + char* + StrToUpper(char* pStr); + + char* + ExpandHtkFilterCmd(const char *command, const char *filename, const char* pFilter); + + + template <class T> + std::string to_string(const T& val) + { + std::stringstream ss; + ss << val; + return ss.str(); + } + + inline void + ExpectKeyword(std::istream &i_stream, const char *kwd) + { + std::string token; + i_stream >> token; + if (token != kwd) { + throw std::runtime_error(std::string(kwd) + " expected"); + } + } + + extern const int MATRIX_IOS_FORMAT_IWORD; + + enum MatrixVectorIostreamControlBits { + ACCUMULATE_INPUT = 1, +// BINARY_OUTPUT = 2 + }; + + class MatrixVectorIostreamControl + { + public: + MatrixVectorIostreamControl(enum MatrixVectorIostreamControlBits bitsToBeSet, bool valueToBeSet) + : mBitsToBeSet(bitsToBeSet), mValueToBeSet(valueToBeSet) {} + + static long Flags(std::ios_base &rIos, enum MatrixVectorIostreamControlBits bits) + { return rIos.iword(MATRIX_IOS_FORMAT_IWORD); } + + long mBitsToBeSet; + bool mValueToBeSet; + + friend std::ostream & operator <<(std::ostream &rOs, const MatrixVectorIostreamControl modifier) + { + if(modifier.mValueToBeSet) { + rOs.iword(MATRIX_IOS_FORMAT_IWORD) |= modifier.mBitsToBeSet; + } else { + rOs.iword(MATRIX_IOS_FORMAT_IWORD) &= ~modifier.mBitsToBeSet; + } + return rOs; + } + + friend std::istream & operator >>(std::istream &rIs, const MatrixVectorIostreamControl modifier) + { + if(modifier.mValueToBeSet) { + rIs.iword(MATRIX_IOS_FORMAT_IWORD) |= modifier.mBitsToBeSet; + } else { + rIs.iword(MATRIX_IOS_FORMAT_IWORD) &= ~modifier.mBitsToBeSet; + } + return rIs; + } + }; + + + + +} // namespace TNet + +#ifdef __ICC +#pragma warning (disable: 383) // ICPC remark we don't want. +#pragma warning (disable: 810) // ICPC remark we don't want. +#pragma warning (disable: 981) // ICPC remark we don't want. +#pragma warning (disable: 1418) // ICPC remark we don't want. +#pragma warning (disable: 444) // ICPC remark we don't want. +#pragma warning (disable: 869) // ICPC remark we don't want. +#pragma warning (disable: 1287) // ICPC remark we don't want. +#pragma warning (disable: 279) // ICPC remark we don't want. +#pragma warning (disable: 981) // ICPC remark we don't want. +#endif + +//#ifdef CYGWIN +#if 1 +#undef assert +#ifndef NDEBUG +#define assert(e) ((e) ? (void)0 : assertf(__FILE__, __LINE__, #e)) +#else +#define assert(e) ((void)0) +#endif +void assertf(const char *c, int i, const char *msg); // Just make it possible to break into assert on gdb-- has some kind of bug on cygwin. +#else +#include <cassert> +#endif + +#define assert_throw(e) ((e) ? (void)0 : assertf_throw(__FILE__, __LINE__, #e)) +void assertf_throw(const char *c, int i, const char *msg); + +#define DAN_STYLE_IO + +#endif // ifndef TNet_Common_h + diff --git a/tnet_io/KaldiLib/Error.h b/tnet_io/KaldiLib/Error.h new file mode 100644 index 0000000..2228dde --- /dev/null +++ b/tnet_io/KaldiLib/Error.h @@ -0,0 +1,172 @@ +// +// C++ Interface: %{MODULE} +// +// Description: +// +// +// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR} +// +// Copyright: See COPYING file that comes with this distribution +// +// + +/** @file Error.h + * This header defines several types and functions relating to the + * handling of exceptions in STK. + */ + +#ifndef TNET_Error_h +#define TNET_Error_h + +#include <iostream> +#include <stdexcept> +#include <string> +#include <sstream> + +#include <cstdlib> +#include <execinfo.h> +#include <cstdarg> +#include <cstdio> + +// THESE MACROS TERRIBLY CLASH WITH STK!!!! +// WE MUST USE SAME MACROS! +// +//#define Error(msg) _Error_(__func__, __FILE__, __LINE__, msg) +//#define Warning(msg) _Warning_(__func__, __FILE__, __LINE__, msg) +//#define TraceLog(msg) _TraceLog_(__func__, __FILE__, __LINE__, msg) +// + +#ifndef Error + #define Error(...) _Error_(__func__, __FILE__, __LINE__, __VA_ARGS__) +#endif +#ifndef PError + #define PError(...) _PError_(__func__, __FILE__, __LINE__, __VA_ARGS__) +#endif +#ifndef Warning + #define Warning(...) _Warning_(__func__, __FILE__, __LINE__, __VA_ARGS__) +#endif +#ifndef TraceLog + #define TraceLog(...) _TraceLog_(__func__, __FILE__, __LINE__, __VA_ARGS__) +#endif + +namespace TNet { + + + + /** MyException + * Custom exception class, gets the stacktrace + */ + class MyException + : public std::runtime_error + { + public: + explicit MyException(const std::string& what_arg) throw(); + virtual ~MyException() throw(); + + const char* what() const throw() + { return mWhat.c_str(); } + + private: + std::string mWhat; + }; + + /** + * MyException:: implemenatation + */ + inline + MyException:: + MyException(const std::string& what_arg) throw() + : std::runtime_error(what_arg) + { + mWhat = what_arg; + mWhat += "\nTHE STACKTRACE INSIDE MyException OBJECT IS:\n"; + + void *array[10]; + size_t size; + char **strings; + size_t i; + + size = backtrace (array, 10); + strings = backtrace_symbols (array, size); + + //<< 0th string is the MyException ctor, so ignore and start by 1 + for (i = 1; i < size; i++) { + mWhat += strings[i]; + mWhat += "\n"; + } + + free (strings); + } + + + inline + MyException:: + ~MyException() throw() + { } + + + /** + * @brief Error throwing function (with backtrace) + */ + inline void + _Error_(const char *func, const char *file, int line, const std::string &msg) + { + std::stringstream ss; + ss << "ERROR (" << func << ':' << file << ':' << line << ") " << msg; + throw MyException(ss.str()); + } + + /** + * @brief Throw a formatted error + */ + inline void _PError_(const char *func, const char *file, int line, const char *fmt, ...) { + va_list ap; + char msg[256]; + va_start(ap, fmt); + vsnprintf(msg, sizeof msg, fmt, ap); + va_end(ap); + _Error_(func, file, line, msg); + } + + /** + * @brief Warning handling function + */ + inline void + _Warning_(const char *func, const char *file, int line, const std::string &msg) + { + std::cout << "WARNING (" << func << ':' << file << ':' << line << ") " << msg << std::endl; + } + + inline void + _TraceLog_(const char *func, const char *file, int line, const std::string &msg) + { + std::cout << "INFO (" << func << ':' << file << ':' << line << ") " << msg << std::endl; + std::cout.flush(); + } + + /** + * New kaldi error handling: + * + * class KaldiErrorMessage is invoked from the KALDI_ERROR macro. + * The destructor throws an exception. + */ + class KaldiErrorMessage { + public: + KaldiErrorMessage(const char *func, const char *file, int line) { + this->stream() << "ERROR (" + << func << "():" + << file << ':' << line << ") "; + } + inline std::ostream &stream() { return ss; } + ~KaldiErrorMessage() { throw MyException(ss.str()); } + private: + std::ostringstream ss; + }; + #define KALDI_ERR TNet::KaldiErrorMessage(__func__, __FILE__, __LINE__).stream() + + + +} // namespace TNet + +//#define TNET_Error_h +#endif diff --git a/tnet_io/KaldiLib/Features.cc b/tnet_io/KaldiLib/Features.cc new file mode 100644 index 0000000..64b63e8 --- /dev/null +++ b/tnet_io/KaldiLib/Features.cc @@ -0,0 +1,1798 @@ + +//enable feature repository profiling +#define PROFILING 1 + +#include <sstream> +#include <map> +#include <list> +#include <cstdio> + +#include "Features.h" +#include "Tokenizer.h" +#include "StkMatch.h" +#include "Types.h" + + + +namespace TNet +{ + const char + FeatureRepository:: + mpParmKindNames[13][16] = + { + {"WAVEFORM"}, + {"LPC"}, + {"LPREFC"}, + {"LPCEPSTRA"}, + {"LPDELCEP"}, + {"IREFC"}, + {"MFCC"}, + {"FBANK"}, + {"MELSPEC"}, + {"USER"}, + {"DISCRETE"}, + {"PLP"}, + {"ANON"} + }; + + //*************************************************************************** + //*************************************************************************** + + FileListElem:: + FileListElem(const std::string & rFileName) + { + std::string::size_type pos; + + mLogical = rFileName; + mWeight = 1.0; + + // some slash-backslash replacement hack + for (size_t i = 0; i < mLogical.size(); i++) { + if (mLogical[i] == '\\') { + mLogical[i] = '/'; + } + } + + // read sentence weight definition if any ( physical_file.fea[s,e]{weight} ) + if ((pos = mLogical.find('{')) != std::string::npos) + { + std::string tmp_weight(mLogical.begin() + pos + 1, mLogical.end()); + std::stringstream tmp_ss(tmp_weight); + + tmp_ss >> mWeight; + mLogical.erase(pos); + } + + // look for "=" symbol and if found, split it + if ((pos = mLogical.find('=')) != std::string::npos) + { + // copy all from mLogical[pos+1] till the end to mPhysical + mPhysical.assign(mLogical.begin() + pos + 1, mLogical.end()); + // erase all from pos + 1 till the end from mLogical + mLogical.erase(pos); + // trim the leading and trailing spaces + Trim(mPhysical); + Trim(mLogical); + } + else + { + // trim the leading and trailing spaces + Trim(mLogical); + + mPhysical = mLogical; + } + } + + + //########################################################################### + //########################################################################### + // FeatureRepository section + //########################################################################### + //########################################################################### + + //*************************************************************************** + //*************************************************************************** + void + FeatureRepository:: + ReadCepsNormFile( + const char * pFileName, + char ** pLastFileName, + BaseFloat ** vec_buff, + int sampleKind, + CNFileType type, + int coefs) + { + FILE* fp; + int i; + char s1[64]; + char s2[64]; + const char* typeStr = (type == CNF_Mean ? "MEAN" : + type == CNF_Variance ? "VARIANCE" : "VARSCALE"); + + const char* typeStr2 = (type == CNF_Mean ? "CMN" : + type == CNF_Variance ? "CVN" : "VarScale"); + + if (*pLastFileName != NULL && !strcmp(*pLastFileName, pFileName)) { + return; + } + free(*pLastFileName); + *pLastFileName=strdup(pFileName); + *vec_buff = (BaseFloat*) realloc(*vec_buff, coefs * sizeof(BaseFloat)); + + if (*pLastFileName == NULL || *vec_buff== NULL) + throw std::runtime_error("Insufficient memory"); + + if ((fp = fopen(pFileName, "r")) == NULL) { + throw std::runtime_error(std::string("Cannot open ") + typeStr2 + + " pFileName: '" + pFileName + "'"); + } + + if ((type != CNF_VarScale + && (fscanf(fp, " <%64[^>]> <%64[^>]>", s1, s2) != 2 + || strcmp(StrToUpper(s1), "CEPSNORM") + || ReadParmKind(s2, false) != sampleKind)) + || fscanf(fp, " <%64[^>]> %d", s1, &i) != 2 + || strcmp(StrToUpper(s1), typeStr) + || i != coefs) + { + ParmKind2Str(sampleKind, s2); + + //std::cout << "[[[TADY!!!!]]]" << pFileName << "\n" << std::flush; + + throw std::runtime_error(std::string("") + + (type == CNF_VarScale ? "" : "<CEPSNORM> <") + + (type == CNF_VarScale ? "" : s2) + + (type == CNF_VarScale ? "" : ">") + + " <" + typeStr + " ... expected in " + typeStr2 + + " file " + pFileName); + } + + for (i = 0; i < coefs; i++) { + if (fscanf(fp, " "FLOAT_FMT, *vec_buff+i) != 1) { + if (fscanf(fp, "%64s", s2) == 1) { + throw std::runtime_error(std::string("Decimal number expected but '") + + s2 + "' found in " + typeStr2 + " file " + pFileName); + } + else if (feof(fp)) { + throw std::runtime_error(std::string("Unexpected end of ") |