diff options
author | Ted Yin <[email protected]> | 2015-08-14 17:42:26 +0800 |
---|---|---|
committer | Ted Yin <[email protected]> | 2015-08-14 17:42:26 +0800 |
commit | c3cffb58b9921d78753336421b52b9ffdaa5515c (patch) | |
tree | bfea20e97c200cf734021e3756d749c892e658a4 /kaldi_io/src/kaldi/util | |
parent | 10cce5f6a5c9e2f8e00d5a2a4d87c9cb7c26bf4c (diff) | |
parent | dfdd17afc2e984ec6c32ea01290f5c76309a456a (diff) |
Merge pull request #2 from yimmon/master
remove needless files
Diffstat (limited to 'kaldi_io/src/kaldi/util')
22 files changed, 0 insertions, 7057 deletions
diff --git a/kaldi_io/src/kaldi/util/basic-filebuf.h b/kaldi_io/src/kaldi/util/basic-filebuf.h deleted file mode 100644 index cf2e079..0000000 --- a/kaldi_io/src/kaldi/util/basic-filebuf.h +++ /dev/null @@ -1,1065 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// This is a modified version of the std::basic_filebuf from libc++ -// (http://libcxx.llvm.org/). -// It allows one to create basic_filebuf from an existing FILE* handle or file -// descriptor. -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source License licenses. See LICENSE.TXT for details (included at the -// bottom). -/////////////////////////////////////////////////////////////////////////////// -#ifndef KALDI_UTIL_BASIC_FILEBUF_H_ -#define KALDI_UTIL_BASIC_FILEBUF_H_ - -/////////////////////////////////////////////////////////////////////////////// -#include <fstream> -#include <cstdio> -#include <cstring> - -/////////////////////////////////////////////////////////////////////////////// -namespace kaldi -{ - -/////////////////////////////////////////////////////////////////////////////// -template <typename CharT, typename Traits = std::char_traits<CharT> > -class basic_filebuf : public std::basic_streambuf<CharT, Traits> -{ -public: - typedef CharT char_type; - typedef Traits traits_type; - typedef typename traits_type::int_type int_type; - typedef typename traits_type::pos_type pos_type; - typedef typename traits_type::off_type off_type; - typedef typename traits_type::state_type state_type; - - basic_filebuf(); - basic_filebuf(basic_filebuf&& rhs); - virtual ~basic_filebuf(); - - basic_filebuf& operator=(basic_filebuf&& rhs); - void swap(basic_filebuf& rhs); - - bool is_open() const; - basic_filebuf* open(const char* s, std::ios_base::openmode mode); - basic_filebuf* open(const std::string& s, std::ios_base::openmode mode); - basic_filebuf* open(int fd, std::ios_base::openmode mode); - basic_filebuf* open(FILE* f, std::ios_base::openmode mode); - basic_filebuf* close(); - - FILE* file() { return this->_M_file; } - int fd() { return fileno(this->_M_file); } - -protected: - int_type underflow() override; - int_type pbackfail(int_type c = traits_type::eof()) override; - int_type overflow (int_type c = traits_type::eof()) override; - std::basic_streambuf<char_type, traits_type>* setbuf(char_type* s, std::streamsize n) override; - pos_type seekoff(off_type off, std::ios_base::seekdir way, - std::ios_base::openmode wch = std::ios_base::in | std::ios_base::out) override; - pos_type seekpos(pos_type sp, - std::ios_base::openmode wch = std::ios_base::in | std::ios_base::out) override; - int sync() override; - void imbue(const std::locale& loc) override; - -protected: - char* _M_extbuf; - const char* _M_extbufnext; - const char* _M_extbufend; - char _M_extbuf_min[8]; - size_t _M_ebs; - char_type* _M_intbuf; - size_t _M_ibs; - FILE* _M_file; - const std::codecvt<char_type, char, state_type>* _M_cv; - state_type _M_st; - state_type _M_st_last; - std::ios_base::openmode _M_om; - std::ios_base::openmode _M_cm; - bool _M_owns_eb; - bool _M_owns_ib; - bool _M_always_noconv; - - const char* _M_get_mode(std::ios_base::openmode mode); - bool _M_read_mode(); - void _M_write_mode(); -}; - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -basic_filebuf<CharT, Traits>::basic_filebuf() - : _M_extbuf(nullptr), - _M_extbufnext(nullptr), - _M_extbufend(nullptr), - _M_ebs(0), - _M_intbuf(nullptr), - _M_ibs(0), - _M_file(nullptr), - _M_cv(nullptr), - _M_st(), - _M_st_last(), - _M_om(std::ios_base::openmode(0)), - _M_cm(std::ios_base::openmode(0)), - _M_owns_eb(false), - _M_owns_ib(false), - _M_always_noconv(false) -{ - if (std::has_facet<std::codecvt<char_type, char, state_type> >(this->getloc())) - { - _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(this->getloc()); - _M_always_noconv = _M_cv->always_noconv(); - } - setbuf(0, 4096); -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -basic_filebuf<CharT, Traits>::basic_filebuf(basic_filebuf&& rhs) - : std::basic_streambuf<CharT, Traits>(rhs) -{ - if (rhs._M_extbuf == rhs._M_extbuf_min) - { - _M_extbuf = _M_extbuf_min; - _M_extbufnext = _M_extbuf + (rhs._M_extbufnext - rhs._M_extbuf); - _M_extbufend = _M_extbuf + (rhs._M_extbufend - rhs._M_extbuf); - } - else - { - _M_extbuf = rhs._M_extbuf; - _M_extbufnext = rhs._M_extbufnext; - _M_extbufend = rhs._M_extbufend; - } - _M_ebs = rhs._M_ebs; - _M_intbuf = rhs._M_intbuf; - _M_ibs = rhs._M_ibs; - _M_file = rhs._M_file; - _M_cv = rhs._M_cv; - _M_st = rhs._M_st; - _M_st_last = rhs._M_st_last; - _M_om = rhs._M_om; - _M_cm = rhs._M_cm; - _M_owns_eb = rhs._M_owns_eb; - _M_owns_ib = rhs._M_owns_ib; - _M_always_noconv = rhs._M_always_noconv; - if (rhs.pbase()) - { - if (rhs.pbase() == rhs._M_intbuf) - this->setp(_M_intbuf, _M_intbuf + (rhs. epptr() - rhs.pbase())); - else - this->setp((char_type*)_M_extbuf, - (char_type*)_M_extbuf + (rhs. epptr() - rhs.pbase())); - this->pbump(rhs. pptr() - rhs.pbase()); - } - else if (rhs.eback()) - { - if (rhs.eback() == rhs._M_intbuf) - this->setg(_M_intbuf, _M_intbuf + (rhs.gptr() - rhs.eback()), - _M_intbuf + (rhs.egptr() - rhs.eback())); - else - this->setg((char_type*)_M_extbuf, - (char_type*)_M_extbuf + (rhs.gptr() - rhs.eback()), - (char_type*)_M_extbuf + (rhs.egptr() - rhs.eback())); - } - rhs._M_extbuf = nullptr; - rhs._M_extbufnext = nullptr; - rhs._M_extbufend = nullptr; - rhs._M_ebs = 0; - rhs._M_intbuf = nullptr; - rhs._M_ibs = 0; - rhs._M_file = nullptr; - rhs._M_st = state_type(); - rhs._M_st_last = state_type(); - rhs._M_om = std::ios_base::openmode(0); - rhs._M_cm = std::ios_base::openmode(0); - rhs._M_owns_eb = false; - rhs._M_owns_ib = false; - rhs.setg(0, 0, 0); - rhs.setp(0, 0); -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -inline -basic_filebuf<CharT, Traits>& -basic_filebuf<CharT, Traits>::operator=(basic_filebuf&& rhs) -{ - close(); - swap(rhs); - return *this; -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -basic_filebuf<CharT, Traits>::~basic_filebuf() -{ - // try - // { - // close(); - // } - // catch (...) - // { - // } - if (_M_owns_eb) - delete [] _M_extbuf; - if (_M_owns_ib) - delete [] _M_intbuf; -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -void -basic_filebuf<CharT, Traits>::swap(basic_filebuf& rhs) -{ - std::basic_streambuf<char_type, traits_type>::swap(rhs); - if (_M_extbuf != _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) - { - std::swap(_M_extbuf, rhs._M_extbuf); - std::swap(_M_extbufnext, rhs._M_extbufnext); - std::swap(_M_extbufend, rhs._M_extbufend); - } - else - { - ptrdiff_t ln = _M_extbufnext - _M_extbuf; - ptrdiff_t le = _M_extbufend - _M_extbuf; - ptrdiff_t rn = rhs._M_extbufnext - rhs._M_extbuf; - ptrdiff_t re = rhs._M_extbufend - rhs._M_extbuf; - if (_M_extbuf == _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min) - { - _M_extbuf = rhs._M_extbuf; - rhs._M_extbuf = rhs._M_extbuf_min; - } - else if (_M_extbuf != _M_extbuf_min && rhs._M_extbuf == rhs._M_extbuf_min) - { - rhs._M_extbuf = _M_extbuf; - _M_extbuf = _M_extbuf_min; - } - _M_extbufnext = _M_extbuf + rn; - _M_extbufend = _M_extbuf + re; - rhs._M_extbufnext = rhs._M_extbuf + ln; - rhs._M_extbufend = rhs._M_extbuf + le; - } - std::swap(_M_ebs, rhs._M_ebs); - std::swap(_M_intbuf, rhs._M_intbuf); - std::swap(_M_ibs, rhs._M_ibs); - std::swap(_M_file, rhs._M_file); - std::swap(_M_cv, rhs._M_cv); - std::swap(_M_st, rhs._M_st); - std::swap(_M_st_last, rhs._M_st_last); - std::swap(_M_om, rhs._M_om); - std::swap(_M_cm, rhs._M_cm); - std::swap(_M_owns_eb, rhs._M_owns_eb); - std::swap(_M_owns_ib, rhs._M_owns_ib); - std::swap(_M_always_noconv, rhs._M_always_noconv); - if (this->eback() == (char_type*)rhs._M_extbuf_min) - { - ptrdiff_t n = this->gptr() - this->eback(); - ptrdiff_t e = this->egptr() - this->eback(); - this->setg((char_type*)_M_extbuf_min, - (char_type*)_M_extbuf_min + n, - (char_type*)_M_extbuf_min + e); - } - else if (this->pbase() == (char_type*)rhs._M_extbuf_min) - { - ptrdiff_t n = this->pptr() - this->pbase(); - ptrdiff_t e = this->epptr() - this->pbase(); - this->setp((char_type*)_M_extbuf_min, - (char_type*)_M_extbuf_min + e); - this->pbump(n); - } - if (rhs.eback() == (char_type*)_M_extbuf_min) - { - ptrdiff_t n = rhs.gptr() - rhs.eback(); - ptrdiff_t e = rhs.egptr() - rhs.eback(); - rhs.setg((char_type*)rhs._M_extbuf_min, - (char_type*)rhs._M_extbuf_min + n, - (char_type*)rhs._M_extbuf_min + e); - } - else if (rhs.pbase() == (char_type*)_M_extbuf_min) - { - ptrdiff_t n = rhs.pptr() - rhs.pbase(); - ptrdiff_t e = rhs.epptr() - rhs.pbase(); - rhs.setp((char_type*)rhs._M_extbuf_min, - (char_type*)rhs._M_extbuf_min + e); - rhs.pbump(n); - } -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -inline -void -swap(basic_filebuf<CharT, Traits>& x, basic_filebuf<CharT, Traits>& y) -{ - x.swap(y); -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -inline -bool -basic_filebuf<CharT, Traits>::is_open() const -{ - return _M_file != nullptr; -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -const char* basic_filebuf<CharT, Traits>::_M_get_mode(std::ios_base::openmode mode) -{ - switch ((mode & ~std::ios_base::ate) | 0) - { - case std::ios_base::out: - case std::ios_base::out | std::ios_base::trunc: - return "w"; - case std::ios_base::out | std::ios_base::app: - case std::ios_base::app: - return "a"; - break; - case std::ios_base::in: - return "r"; - case std::ios_base::in | std::ios_base::out: - return "r+"; - case std::ios_base::in | std::ios_base::out | std::ios_base::trunc: - return "w+"; - case std::ios_base::in | std::ios_base::out | std::ios_base::app: - case std::ios_base::in | std::ios_base::app: - return "a+"; - case std::ios_base::out | std::ios_base::binary: - case std::ios_base::out | std::ios_base::trunc | std::ios_base::binary: - return "wb"; - case std::ios_base::out | std::ios_base::app | std::ios_base::binary: - case std::ios_base::app | std::ios_base::binary: - return "ab"; - case std::ios_base::in | std::ios_base::binary: - return "rb"; - case std::ios_base::in | std::ios_base::out | std::ios_base::binary: - return "r+b"; - case std::ios_base::in | std::ios_base::out | std::ios_base::trunc | std::ios_base::binary: - return "w+b"; - case std::ios_base::in | std::ios_base::out | std::ios_base::app | std::ios_base::binary: - case std::ios_base::in | std::ios_base::app | std::ios_base::binary: - return "a+b"; - default: - return nullptr; - } -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -basic_filebuf<CharT, Traits>* -basic_filebuf<CharT, Traits>::open(const char* s, std::ios_base::openmode mode) -{ - basic_filebuf<CharT, Traits>* rt = nullptr; - if (_M_file == nullptr) - { - const char* md= _M_get_mode(mode); - if (md) - { - _M_file = fopen(s, md); - if (_M_file) - { - rt = this; - _M_om = mode; - if (mode & std::ios_base::ate) - { - if (fseek(_M_file, 0, SEEK_END)) - { - fclose(_M_file); - _M_file = nullptr; - rt = nullptr; - } - } - } - } - } - return rt; -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -inline -basic_filebuf<CharT, Traits>* -basic_filebuf<CharT, Traits>::open(const std::string& s, std::ios_base::openmode mode) -{ - return open(s.c_str(), mode); -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -basic_filebuf<CharT, Traits>* -basic_filebuf<CharT, Traits>::open(int fd, std::ios_base::openmode mode) -{ - const char* md= this->_M_get_mode(mode); - if (md) - { - this->_M_file= fdopen(fd, md); - this->_M_om = mode; - return this; - } - else return nullptr; -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -basic_filebuf<CharT, Traits>* -basic_filebuf<CharT, Traits>::open(FILE* f, std::ios_base::openmode mode) -{ - this->_M_file = f; - this->_M_om = mode; - return this; -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -basic_filebuf<CharT, Traits>* -basic_filebuf<CharT, Traits>::close() -{ - basic_filebuf<CharT, Traits>* rt = nullptr; - if (_M_file) - { - rt = this; - std::unique_ptr<FILE, int(*)(FILE*)> h(_M_file, fclose); - if (sync()) - rt = nullptr; - if (fclose(h.release()) == 0) - _M_file = nullptr; - else - rt = nullptr; - } - return rt; -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -typename basic_filebuf<CharT, Traits>::int_type -basic_filebuf<CharT, Traits>::underflow() -{ - if (_M_file == nullptr) - return traits_type::eof(); - bool initial = _M_read_mode(); - char_type buf; - if (this->gptr() == nullptr) - this->setg(&buf, &buf+1, &buf+1); - const size_t unget_sz = initial ? 0 : std::min<size_t>((this->egptr() - this->eback()) / 2, 4); - int_type c = traits_type::eof(); - if (this->gptr() == this->egptr()) - { - memmove(this->eback(), this->egptr() - unget_sz, unget_sz * sizeof(char_type)); - if (_M_always_noconv) - { - size_t nmemb = static_cast<size_t>(this->egptr() - this->eback() - unget_sz); - nmemb = fread(this->eback() + unget_sz, 1, nmemb, _M_file); - if (nmemb != 0) - { - this->setg(this->eback(), - this->eback() + unget_sz, - this->eback() + unget_sz + nmemb); - c = traits_type::to_int_type(*this->gptr()); - } - } - else - { - memmove(_M_extbuf, _M_extbufnext, _M_extbufend - _M_extbufnext); - _M_extbufnext = _M_extbuf + (_M_extbufend - _M_extbufnext); - _M_extbufend = _M_extbuf + (_M_extbuf == _M_extbuf_min ? sizeof(_M_extbuf_min) : _M_ebs); - size_t nmemb = std::min(static_cast<size_t>(_M_ibs - unget_sz), - static_cast<size_t>(_M_extbufend - _M_extbufnext)); - std::codecvt_base::result r; - _M_st_last = _M_st; - size_t nr = fread((void*)_M_extbufnext, 1, nmemb, _M_file); - if (nr != 0) - { - if (!_M_cv) - throw std::bad_cast(); - _M_extbufend = _M_extbufnext + nr; - char_type* inext; - r = _M_cv->in(_M_st, _M_extbuf, _M_extbufend, _M_extbufnext, - this->eback() + unget_sz, - this->eback() + _M_ibs, inext); - if (r == std::codecvt_base::noconv) - { - this->setg((char_type*)_M_extbuf, (char_type*)_M_extbuf, (char_type*)_M_extbufend); - c = traits_type::to_int_type(*this->gptr()); - } - else if (inext != this->eback() + unget_sz) - { - this->setg(this->eback(), this->eback() + unget_sz, inext); - c = traits_type::to_int_type(*this->gptr()); - } - } - } - } - else - c = traits_type::to_int_type(*this->gptr()); - if (this->eback() == &buf) - this->setg(0, 0, 0); - return c; -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -typename basic_filebuf<CharT, Traits>::int_type -basic_filebuf<CharT, Traits>::pbackfail(int_type c) -{ - if (_M_file && this->eback() < this->gptr()) - { - if (traits_type::eq_int_type(c, traits_type::eof())) - { - this->gbump(-1); - return traits_type::not_eof(c); - } - if ((_M_om & std::ios_base::out) || - traits_type::eq(traits_type::to_char_type(c), this->gptr()[-1])) - { - this->gbump(-1); - *this->gptr() = traits_type::to_char_type(c); - return c; - } - } - return traits_type::eof(); -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -typename basic_filebuf<CharT, Traits>::int_type -basic_filebuf<CharT, Traits>::overflow(int_type c) -{ - if (_M_file == nullptr) - return traits_type::eof(); - _M_write_mode(); - char_type buf; - char_type* pb_save = this->pbase(); - char_type* epb_save = this->epptr(); - if (!traits_type::eq_int_type(c, traits_type::eof())) - { - if (this->pptr() == nullptr) - this->setp(&buf, &buf+1); - *this->pptr() = traits_type::to_char_type(c); - this->pbump(1); - } - if (this->pptr() != this->pbase()) - { - if (_M_always_noconv) - { - size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase()); - if (fwrite(this->pbase(), sizeof(char_type), nmemb, _M_file) != nmemb) - return traits_type::eof(); - } - else - { - char* extbe = _M_extbuf; - std::codecvt_base::result r; - do - { - if (!_M_cv) - throw std::bad_cast(); - const char_type* e; - r = _M_cv->out(_M_st, this->pbase(), this->pptr(), e, - _M_extbuf, _M_extbuf + _M_ebs, extbe); - if (e == this->pbase()) - return traits_type::eof(); - if (r == std::codecvt_base::noconv) - { - size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase()); - if (fwrite(this->pbase(), 1, nmemb, _M_file) != nmemb) - return traits_type::eof(); - } - else if (r == std::codecvt_base::ok || r == std::codecvt_base::partial) - { - size_t nmemb = static_cast<size_t>(extbe - _M_extbuf); - if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb) - return traits_type::eof(); - if (r == std::codecvt_base::partial) - { - this->setp((char_type*)e, this->pptr()); - this->pbump(this->epptr() - this->pbase()); - } - } - else - return traits_type::eof(); - } while (r == std::codecvt_base::partial); - } - this->setp(pb_save, epb_save); - } - return traits_type::not_eof(c); -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -std::basic_streambuf<CharT, Traits>* -basic_filebuf<CharT, Traits>::setbuf(char_type* s, std::streamsize n) -{ - this->setg(0, 0, 0); - this->setp(0, 0); - if (_M_owns_eb) - delete [] _M_extbuf; - if (_M_owns_ib) - delete [] _M_intbuf; - _M_ebs = n; - if (_M_ebs > sizeof(_M_extbuf_min)) - { - if (_M_always_noconv && s) - { - _M_extbuf = (char*)s; - _M_owns_eb = false; - } - else - { - _M_extbuf = new char[_M_ebs]; - _M_owns_eb = true; - } - } - else - { - _M_extbuf = _M_extbuf_min; - _M_ebs = sizeof(_M_extbuf_min); - _M_owns_eb = false; - } - if (!_M_always_noconv) - { - _M_ibs = std::max<std::streamsize>(n, sizeof(_M_extbuf_min)); - if (s && _M_ibs >= sizeof(_M_extbuf_min)) - { - _M_intbuf = s; - _M_owns_ib = false; - } - else - { - _M_intbuf = new char_type[_M_ibs]; - _M_owns_ib = true; - } - } - else - { - _M_ibs = 0; - _M_intbuf = 0; - _M_owns_ib = false; - } - return this; -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -typename basic_filebuf<CharT, Traits>::pos_type -basic_filebuf<CharT, Traits>::seekoff(off_type off, std::ios_base::seekdir way, - std::ios_base::openmode) -{ - if (!_M_cv) - throw std::bad_cast(); - int width = _M_cv->encoding(); - if (_M_file == nullptr || (width <= 0 && off != 0) || sync()) - return pos_type(off_type(-1)); - // width > 0 || off == 0 - int whence; - switch (way) - { - case std::ios_base::beg: - whence = SEEK_SET; - break; - case std::ios_base::cur: - whence = SEEK_CUR; - break; - case std::ios_base::end: - whence = SEEK_END; - break; - default: - return pos_type(off_type(-1)); - } -#if _WIN32 - if (fseek(_M_file, width > 0 ? width * off : 0, whence)) - return pos_type(off_type(-1)); - pos_type r = ftell(_M_file); -#else - if (fseeko(_M_file, width > 0 ? width * off : 0, whence)) - return pos_type(off_type(-1)); - pos_type r = ftello(_M_file); -#endif - r.state(_M_st); - return r; -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -typename basic_filebuf<CharT, Traits>::pos_type -basic_filebuf<CharT, Traits>::seekpos(pos_type sp, std::ios_base::openmode) -{ - if (_M_file == nullptr || sync()) - return pos_type(off_type(-1)); -#if _WIN32 - if (fseek(_M_file, sp, SEEK_SET)) - return pos_type(off_type(-1)); -#else - if (fseeko(_M_file, sp, SEEK_SET)) - return pos_type(off_type(-1)); -#endif - _M_st = sp.state(); - return sp; -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -int -basic_filebuf<CharT, Traits>::sync() -{ - if (_M_file == nullptr) - return 0; - if (!_M_cv) - throw std::bad_cast(); - if (_M_cm & std::ios_base::out) - { - if (this->pptr() != this->pbase()) - if (overflow() == traits_type::eof()) - return -1; - std::codecvt_base::result r; - do - { - char* extbe; - r = _M_cv->unshift(_M_st, _M_extbuf, _M_extbuf + _M_ebs, extbe); - size_t nmemb = static_cast<size_t>(extbe - _M_extbuf); - if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb) - return -1; - } while (r == std::codecvt_base::partial); - if (r == std::codecvt_base::error) - return -1; - if (fflush(_M_file)) - return -1; - } - else if (_M_cm & std::ios_base::in) - { - off_type c; - state_type state = _M_st_last; - bool update_st = false; - if (_M_always_noconv) - c = this->egptr() - this->gptr(); - else - { - int width = _M_cv->encoding(); - c = _M_extbufend - _M_extbufnext; - if (width > 0) - c += width * (this->egptr() - this->gptr()); - else - { - if (this->gptr() != this->egptr()) - { - const int off = _M_cv->length(state, _M_extbuf, - _M_extbufnext, - this->gptr() - this->eback()); - c += _M_extbufnext - _M_extbuf - off; - update_st = true; - } - } - } -#if _WIN32 - if (fseek(_M_file_, -c, SEEK_CUR)) - return -1; -#else - if (fseeko(_M_file, -c, SEEK_CUR)) - return -1; -#endif - if (update_st) - _M_st = state; - _M_extbufnext = _M_extbufend = _M_extbuf; - this->setg(0, 0, 0); - _M_cm = std::ios_base::openmode(0); - } - return 0; -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -void -basic_filebuf<CharT, Traits>::imbue(const std::locale& loc) -{ - sync(); - _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(loc); - bool old_anc = _M_always_noconv; - _M_always_noconv = _M_cv->always_noconv(); - if (old_anc != _M_always_noconv) - { - this->setg(0, 0, 0); - this->setp(0, 0); - // invariant, char_type is char, else we couldn't get here - if (_M_always_noconv) // need to dump _M_intbuf - { - if (_M_owns_eb) - delete [] _M_extbuf; - _M_owns_eb = _M_owns_ib; - _M_ebs = _M_ibs; - _M_extbuf = (char*)_M_intbuf; - _M_ibs = 0; - _M_intbuf = nullptr; - _M_owns_ib = false; - } - else // need to obtain an _M_intbuf. - { // If _M_extbuf is user-supplied, use it, else new _M_intbuf - if (!_M_owns_eb && _M_extbuf != _M_extbuf_min) - { - _M_ibs = _M_ebs; - _M_intbuf = (char_type*)_M_extbuf; - _M_owns_ib = false; - _M_extbuf = new char[_M_ebs]; - _M_owns_eb = true; - } - else - { - _M_ibs = _M_ebs; - _M_intbuf = new char_type[_M_ibs]; - _M_owns_ib = true; - } - } - } -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -bool -basic_filebuf<CharT, Traits>::_M_read_mode() -{ - if (!(_M_cm & std::ios_base::in)) - { - this->setp(0, 0); - if (_M_always_noconv) - this->setg((char_type*)_M_extbuf, - (char_type*)_M_extbuf + _M_ebs, - (char_type*)_M_extbuf + _M_ebs); - else - this->setg(_M_intbuf, _M_intbuf + _M_ibs, _M_intbuf + _M_ibs); - _M_cm = std::ios_base::in; - return true; - } - return false; -} - -/////////////////////////////////////////////////////////////////////////////// -template <class CharT, class Traits> -void -basic_filebuf<CharT, Traits>::_M_write_mode() -{ - if (!(_M_cm & std::ios_base::out)) - { - this->setg(0, 0, 0); - if (_M_ebs > sizeof(_M_extbuf_min)) - { - if (_M_always_noconv) - this->setp((char_type*)_M_extbuf, - (char_type*)_M_extbuf + (_M_ebs - 1)); - else - this->setp(_M_intbuf, _M_intbuf + (_M_ibs - 1)); - } - else - this->setp(0, 0); - _M_cm = std::ios_base::out; - } -} - -/////////////////////////////////////////////////////////////////////////////// -} - -/////////////////////////////////////////////////////////////////////////////// -#endif // KALDI_UTIL_BASIC_FILEBUF_H_ - -/////////////////////////////////////////////////////////////////////////////// - -/* - * ============================================================================ - * libc++ License - * ============================================================================ - * - * The libc++ library is dual licensed under both the University of Illinois - * "BSD-Like" license and the MIT license. As a user of this code you may - * choose to use it under either license. As a contributor, you agree to allow - * your code to be used under both. - * - * Full text of the relevant licenses is included below. - * - * ============================================================================ - * - * University of Illinois/NCSA - * Open Source License - * - * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included below) - * - * All rights reserved. - * - * Developed by: - * - * LLVM Team - * - * University of Illinois at Urbana-Champaign - * - * http://llvm.org - * - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal with - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished to do - * so, subject to the following conditions: - * - * * Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimers. - * - * * Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimers in the - * documentation and/or other materials provided with the distribution. - * - * * Neither the names of the LLVM Team, University of Illinois at - * Urbana-Champaign, nor the names of its contributors may be used to - * endorse or promote products derived from this Software without specific - * prior written permission. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS - * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE - * SOFTWARE. - * - * ============================================================================== - * - * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included below) - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. - * - * ============================================================================== - * - * This file is a partial list of people who have contributed to the LLVM/libc++ - * project. If you have contributed a patch or made some other contribution to - * LLVM/libc++, please submit a patch to this file to add yourself, and it will be - * done! - * - * The list is sorted by surname and formatted to allow easy grepping and - * beautification by scripts. The fields are: name (N), email (E), web-address - * (W), PGP key ID and fingerprint (P), description (D), and snail-mail address - * (S). - * - * N: Saleem Abdulrasool - * E: [email protected] - * D: Minor patches and Linux fixes. - * - * N: Dimitry Andric - * E: [email protected] - * D: Visibility fixes, minor FreeBSD portability patches. - * - * N: Holger Arnold - * E: [email protected] - * D: Minor fix. - * - * N: Ruben Van Boxem - * E: vanboxem dot ruben at gmail dot com - * D: Initial Windows patches. - * - * N: David Chisnall - * E: theraven at theravensnest dot org - * D: FreeBSD and Solaris ports, libcxxrt support, some atomics work. - * - * N: Marshall Clow - * E: [email protected] - * E: [email protected] - * D: C++14 support, patches and bug fixes. - * - * N: Bill Fisher - * E: [email protected] - * D: Regex bug fixes. - * - * N: Matthew Dempsky - * E: [email protected] - * D: Minor patches and bug fixes. - * - * N: Google Inc. - * D: Copyright owner and contributor of the CityHash algorithm - * - * N: Howard Hinnant - * E: [email protected] - * D: Architect and primary author of libc++ - * - * N: Hyeon-bin Jeong - * E: [email protected] - * D: Minor patches and bug fixes. - * - * N: Argyrios Kyrtzidis - * E: [email protected] - * D: Bug fixes. - * - * N: Bruce Mitchener, Jr. - * E: [email protected] - * D: Emscripten-related changes. - * - * N: Michel Morin - * E: [email protected] - * D: Minor patches to is_convertible. - * - * N: Andrew Morrow - * E: [email protected] - * D: Minor patches and Linux fixes. - * - * N: Arvid Picciani - * E: aep at exys dot org - * D: Minor patches and musl port. - * - * N: Bjorn Reese - * E: [email protected] - * D: Initial regex prototype - * - * N: Nico Rieck - * E: [email protected] - * D: Windows fixes - * - * N: Jonathan Sauer - * D: Minor patches, mostly related to constexpr - * - * N: Craig Silverstein - * E: [email protected] - * D: Implemented Cityhash as the string hash function on 64-bit machines - * - * N: Richard Smith - * D: Minor patches. - * - * N: Joerg Sonnenberger - * E: [email protected] - * D: NetBSD port. - * - * N: Stephan Tolksdorf - * E: [email protected] - * D: Minor <atomic> fix - * - * N: Michael van der Westhuizen - * E: r1mikey at gmail dot com - * - * N: Klaas de Vries - * E: klaas at klaasgaaf dot nl - * D: Minor bug fix. - * - * N: Zhang Xiongpang - * E: [email protected] - * D: Minor patches and bug fixes. - * - * N: Xing Xue - * E: [email protected] - * D: AIX port - * - * N: Zhihao Yuan - * E: [email protected] - * D: Standard compatibility fixes. - * - * N: Jeffrey Yasskin - * E: [email protected] - * E: [email protected] - * D: Linux fixes. - */ diff --git a/kaldi_io/src/kaldi/util/common-utils.h b/kaldi_io/src/kaldi/util/common-utils.h deleted file mode 100644 index 9d39f9d..0000000 --- a/kaldi_io/src/kaldi/util/common-utils.h +++ /dev/null @@ -1,31 +0,0 @@ -// util/common-utils.h - -// Copyright 2009-2011 Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at - -// http://www.apache.org/licenses/LICENSE-2.0 - -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. -#ifndef KALDI_UTIL_COMMON_UTILS_H_ -#define KALDI_UTIL_COMMON_UTILS_H_ - -#include "base/kaldi-common.h" -#include "util/parse-options.h" -#include "util/kaldi-io.h" -#include "util/simple-io-funcs.h" -#include "util/kaldi-holder.h" -#include "util/kaldi-table.h" -#include "util/table-types.h" -#include "util/text-utils.h" - -#endif diff --git a/kaldi_io/src/kaldi/util/const-integer-set-inl.h b/kaldi_io/src/kaldi/util/const-integer-set-inl.h deleted file mode 100644 index 8f92ab2..0000000 --- a/kaldi_io/src/kaldi/util/const-integer-set-inl.h +++ /dev/null @@ -1,88 +0,0 @@ -// util/const-integer-set-inl.h - -// Copyright 2009-2011 Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#ifndef KALDI_UTIL_CONST_INTEGER_SET_INL_H_ -#define KALDI_UTIL_CONST_INTEGER_SET_INL_H_ - -// Do not include this file directly. It is included by const-integer-set.h - - -namespace kaldi { - -template<class I> -void ConstIntegerSet<I>::InitInternal() { - KALDI_ASSERT_IS_INTEGER_TYPE(I); - quick_set_.clear(); // just in case we previously had data. - if (slow_set_.size() == 0) { - lowest_member_=(I) 1; - highest_member_=(I) 0; - contiguous_ = false; - quick_ = false; - } else { - lowest_member_ = slow_set_.front(); - highest_member_ = slow_set_.back(); - size_t range = highest_member_ + 1 - lowest_member_; - if (range == slow_set_.size()) { - contiguous_ = true; - quick_=false; - } else { - contiguous_ = false; - if (range < slow_set_.size() * 8 * sizeof(I)) { // If it would be more compact to store as bool - // (assuming 1 bit per element)... - quick_set_.resize(range, false); - for (size_t i = 0;i < slow_set_.size();i++) - quick_set_[slow_set_[i] - lowest_member_] = true; - quick_ = true; - } else { - quick_ = false; - } - } - } -} - -template<class I> -int ConstIntegerSet<I>::count(I i) const { - if (i < lowest_member_ || i > highest_member_) return 0; - else { - if (contiguous_) return true; - if (quick_) return (quick_set_[i-lowest_member_] ? 1 : 0); - else { - bool ans = std::binary_search(slow_set_.begin(), slow_set_.end(), i); - return (ans ? 1 : 0); - } - } -} - -template<class I> -void ConstIntegerSet<I>::Write(std::ostream &os, bool binary) const { - WriteIntegerVector(os, binary, slow_set_); -} - -template<class I> -void ConstIntegerSet<I>::Read(std::istream &is, bool binary) { - ReadIntegerVector(is, binary, &slow_set_); - InitInternal(); -} - - - -} // end namespace kaldi - -#endif diff --git a/kaldi_io/src/kaldi/util/const-integer-set.h b/kaldi_io/src/kaldi/util/const-integer-set.h deleted file mode 100644 index ffdce4d..0000000 --- a/kaldi_io/src/kaldi/util/const-integer-set.h +++ /dev/null @@ -1,95 +0,0 @@ -// util/const-integer-set.h - -// Copyright 2009-2011 Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#ifndef KALDI_UTIL_CONST_INTEGER_SET_H_ -#define KALDI_UTIL_CONST_INTEGER_SET_H_ -#include <vector> -#include <set> -#include <algorithm> -#include <limits> -#include <cassert> -#include "util/stl-utils.h" - - /* ConstIntegerSet is a way to efficiently test whether something is in a - supplied set of integers. It can be initialized from a vector or set, but - never changed after that. It either uses a sorted vector or an array of - bool, depending on the input. It behaves like a const version of an STL set, with - only a subset of the functionality, except all the member functions are - upper-case. - - Note that we could get rid of the member slow_set_, but we'd have to - do more work to implement an iterator type. This would save memory. - */ - -namespace kaldi { - -template<class I> class ConstIntegerSet { - public: - ConstIntegerSet(): lowest_member_(1), highest_member_(0) { } - - void Init(const std::vector<I> &input) { - slow_set_ = input; - SortAndUniq(&slow_set_); - InitInternal(); - } - - void Init(const std::set<I> &input) { - CopySetToVector(input, &slow_set_); - InitInternal(); - } - - explicit ConstIntegerSet(const std::vector<I> &input): slow_set_(input) { - SortAndUniq(&slow_set_); - InitInternal(); - } - explicit ConstIntegerSet(const std::set<I> &input) { - CopySetToVector(input, &slow_set_); - InitInternal(); - } - explicit ConstIntegerSet(const ConstIntegerSet<I> &other): slow_set_(other.slow_set_) { - InitInternal(); - } - - int count(I i) const; // returns 1 or 0. - - typedef typename std::vector<I>::const_iterator iterator; - iterator begin() const { return slow_set_.begin(); } - iterator end() const { return slow_set_.end(); } - size_t size() const { return slow_set_.size(); } - bool empty() const { return slow_set_.empty(); } - - void Write(std::ostream &os, bool binary) const; - void Read(std::istream &is, bool binary); - - private: - I lowest_member_; - I highest_member_; - bool contiguous_; - bool quick_; - std::vector<bool> quick_set_; - std::vector<I> slow_set_; - void InitInternal(); -}; - -} // end namespace kaldi - -#include "const-integer-set-inl.h" - -#endif diff --git a/kaldi_io/src/kaldi/util/edit-distance-inl.h b/kaldi_io/src/kaldi/util/edit-distance-inl.h deleted file mode 100644 index ebbfb71..0000000 --- a/kaldi_io/src/kaldi/util/edit-distance-inl.h +++ /dev/null @@ -1,189 +0,0 @@ -// util/edit-distance-inl.h - -// Copyright 2009-2011 Microsoft Corporation; Haihua Xu; Yanmin Qian - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at - -// http://www.apache.org/licenses/LICENSE-2.0 - -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_UTIL_EDIT_DISTANCE_INL_H_ -#define KALDI_UTIL_EDIT_DISTANCE_INL_H_ -#include "util/stl-utils.h" - - -namespace kaldi { - -template<class T> -int32 LevenshteinEditDistance(const std::vector<T> &a, - const std::vector<T> &b) { - // Algorithm: - // write A and B for the sequences, with elements a_0 .. - // let |A| = M and |B| = N be the lengths, and have - // elements a_0 ... a_{M-1} and b_0 ... b_{N-1}. - // We are computing the recursion - // E(m, n) = min( E(m-1, n-1) + (1-delta(a_{m-1}, b_{n-1})), - // E(m-1, n), - // E(m, n-1) ). - // where E(m, n) is defined for m = 0..M and n = 0..N and out-of- - // bounds quantities are considered to be infinity (i.e. the - // recursion does not visit them). - - // We do this computation using a vector e of size N+1. - // The outer iterations range over m = 0..M. - - int M = a.size(), N = b.size(); - std::vector<int32> e(N+1); - std::vector<int32> e_tmp(N+1); - // initialize e. - for (size_t i = 0; i < e.size(); i++) - e[i] = i; - for (int32 m = 1; m <= M; m++) { - // computing E(m, .) from E(m-1, .) - // handle special case n = 0: - e_tmp[0] = e[0] + 1; - - for (int32 n = 1; n <= N; n++) { - int32 term1 = e[n-1] + (a[m-1] == b[n-1] ? 0 : 1); - int32 term2 = e[n] + 1; - int32 term3 = e_tmp[n-1] + 1; - e_tmp[n] = std::min(term1, std::min(term2, term3)); - } - e = e_tmp; - } - return e.back(); -} -// -struct error_stats{ - int32 ins_num; - int32 del_num; - int32 sub_num; - int32 total_cost; // minimum total cost to the current alignment. -}; -// Note that both hyp and ref should not contain noise word in -// the following implementation. - -template<class T> -int32 LevenshteinEditDistance(const std::vector<T> &ref, - const std::vector<T> &hyp, - int32 *ins, int32 *del, int32 *sub) { - // temp sequence to remember error type and stats. - std::vector<error_stats> e(ref.size()+1); - std::vector<error_stats> cur_e(ref.size()+1); - // initialize the first hypothesis aligned to the reference at each - // position:[hyp_index =0][ref_index] - for (size_t i =0; i < e.size(); i ++) { - e[i].ins_num = 0; - e[i].sub_num = 0; - e[i].del_num = i; - e[i].total_cost = i; - } - - // for other alignments - for (size_t hyp_index = 1; hyp_index <= hyp.size(); hyp_index ++) { - cur_e[0] = e[0]; - cur_e[0].ins_num ++; - cur_e[0].total_cost ++; - for (size_t ref_index = 1; ref_index <= ref.size(); ref_index ++) { - - int32 ins_err = e[ref_index].total_cost + 1; - int32 del_err = cur_e[ref_index-1].total_cost + 1; - int32 sub_err = e[ref_index-1].total_cost; - if (hyp[hyp_index-1] != ref[ref_index-1]) - sub_err ++; - - if (sub_err < ins_err && sub_err < del_err) { - cur_e[ref_index] =e[ref_index-1]; - if (hyp[hyp_index-1] != ref[ref_index-1]) - cur_e[ref_index].sub_num ++; // substitution error should be increased - cur_e[ref_index].total_cost = sub_err; - }else if (del_err < ins_err ) { - cur_e[ref_index] = cur_e[ref_index-1]; - cur_e[ref_index].total_cost = del_err; - cur_e[ref_index].del_num ++; // deletion number is increased. - }else{ - cur_e[ref_index] = e[ref_index]; - cur_e[ref_index].total_cost = ins_err; - cur_e[ref_index].ins_num ++; // insertion number is increased. - } - } - e = cur_e; // alternate for the next recursion. - } - size_t ref_index = e.size()-1; - *ins = e[ref_index].ins_num, *del = e[ref_index].del_num, *sub = e[ref_index].sub_num; - return e[ref_index].total_cost; -} - -template<class T> -int32 LevenshteinAlignment(const std::vector<T> &a, - const std::vector<T> &b, - T eps_symbol, - std::vector<std::pair<T, T> > *output) { - // Check inputs: - { - KALDI_ASSERT(output != NULL); - for (size_t i = 0; i < a.size(); i++) KALDI_ASSERT(a[i] != eps_symbol); - for (size_t i = 0; i < b.size(); i++) KALDI_ASSERT(b[i] != eps_symbol); - } - output->clear(); - // This is very memory-inefficiently implemented using a vector of vectors. - size_t M = a.size(), N = b.size(); - size_t m, n; - std::vector<std::vector<int32> > e(M+1); - for (m = 0; m <=M; m++) e[m].resize(N+1); - for (n = 0; n <= N; n++) - e[0][n] = n; - for (m = 1; m <= M; m++) { - e[m][0] = e[m-1][0] + 1; - for (n = 1; n <= N; n++) { - int32 sub_or_ok = e[m-1][n-1] + (a[m-1] == b[n-1] ? 0 : 1); - int32 del = e[m-1][n] + 1; // assumes a == ref, b == hyp. - int32 ins = e[m][n-1] + 1; - e[m][n] = std::min(sub_or_ok, std::min(del, ins)); - } - } - // get time-reversed output first: trace back. - m = M; n = N; - while (m != 0 || n != 0) { - size_t last_m, last_n; - if (m == 0) { last_m = m; last_n = n-1; } - else if (n == 0) { last_m = m-1; last_n = n; } - else { - int32 sub_or_ok = e[m-1][n-1] + (a[m-1] == b[n-1] ? 0 : 1); - int32 del = e[m-1][n] + 1; // assumes a == ref, b == hyp. - int32 ins = e[m][n-1] + 1; - if (sub_or_ok <= std::min(del, ins)) { // choose sub_or_ok if all else equal. - last_m = m-1; last_n = n-1; - } else { - if (del <= ins) { // choose del over ins if equal. - last_m = m-1; last_n = n; - } else { - last_m = m; last_n = n-1; - } - } - } - T a_sym, b_sym; - a_sym = (last_m == m ? eps_symbol : a[last_m]); - b_sym = (last_n == n ? eps_symbol : b[last_n]); - output->push_back(std::make_pair(a_sym, b_sym)); - m = last_m; - n = last_n; - } - ReverseVector(output); - return e[M][N]; -} - - -} // end namespace kaldi - -#endif // KALDI_UTIL_EDIT_DISTANCE_INL_H_ diff --git a/kaldi_io/src/kaldi/util/edit-distance.h b/kaldi_io/src/kaldi/util/edit-distance.h deleted file mode 100644 index 6000622..0000000 --- a/kaldi_io/src/kaldi/util/edit-distance.h +++ /dev/null @@ -1,63 +0,0 @@ -// util/edit-distance.h - -// Copyright 2009-2011 Microsoft Corporation; Haihua Xu - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#ifndef KALDI_UTIL_EDIT_DISTANCE_H_ -#define KALDI_UTIL_EDIT_DISTANCE_H_ -#include <vector> -#include <set> -#include <algorithm> -#include <limits> -#include <cassert> -#include "base/kaldi-types.h" - -namespace kaldi { - -// Compute the edit-distance between two strings. -template<class T> -int32 LevenshteinEditDistance(const std::vector<T> &a, - const std::vector<T> &b); - - -// edit distance calculation with conventional method. -// note: noise word must be filtered out from the hypothesis and reference sequence -// before the following procedure conducted. -template<class T> -int32 LevenshteinEditDistance(const std::vector<T> &ref, - const std::vector<T> &hyp, - int32 *ins, int32 *del, int32 *sub); - -// This version of the edit-distance computation outputs the alignment -// between the two. This is a vector of pairs of (symbol a, symbol b). -// The epsilon symbol (eps_symbol) must not occur in sequences a or b. -// Where one aligned to no symbol in the other (insertion or deletion), -// epsilon will be the corresponding member of the pair. -// It returns the edit-distance between the two strings. - -template<class T> -int32 LevenshteinAlignment(const std::vector<T> &a, - const std::vector<T> &b, - T eps_symbol, - std::vector<std::pair<T, T> > *output); - -} // end namespace kaldi - -#include "edit-distance-inl.h" - -#endif diff --git a/kaldi_io/src/kaldi/util/hash-list-inl.h b/kaldi_io/src/kaldi/util/hash-list-inl.h deleted file mode 100644 index 19c2bb6..0000000 --- a/kaldi_io/src/kaldi/util/hash-list-inl.h +++ /dev/null @@ -1,183 +0,0 @@ -// util/hash-list-inl.h - -// Copyright 2009-2011 Microsoft Corporation -// 2013 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#ifndef KALDI_UTIL_HASH_LIST_INL_H_ -#define KALDI_UTIL_HASH_LIST_INL_H_ - -// Do not include this file directly. It is included by fast-hash.h - - -namespace kaldi { - -template<class I, class T> HashList<I, T>::HashList() { - list_head_ = NULL; - bucket_list_tail_ = static_cast<size_t>(-1); // invalid. - hash_size_ = 0; - freed_head_ = NULL; -} - -template<class I, class T> void HashList<I, T>::SetSize(size_t size) { - hash_size_ = size; - KALDI_ASSERT(list_head_ == NULL && bucket_list_tail_ == static_cast<size_t>(-1)); // make sure empty. - if (size > buckets_.size()) - buckets_.resize(size, HashBucket(0, NULL)); -} - -template<class I, class T> -typename HashList<I, T>::Elem* HashList<I, T>::Clear() { - // Clears the hashtable and gives ownership of the currently contained list to the - // user. - for (size_t cur_bucket = bucket_list_tail_; - cur_bucket != static_cast<size_t>(-1); - cur_bucket = buckets_[cur_bucket].prev_bucket) { - buckets_[cur_bucket].last_elem = NULL; // this is how we indicate "empty". - } - bucket_list_tail_ = static_cast<size_t>(-1); - Elem *ans = list_head_; - list_head_ = NULL; - return ans; -} - -template<class I, class T> -const typename HashList<I, T>::Elem* HashList<I, T>::GetList() const { - return list_head_; -} - -template<class I, class T> -inline void HashList<I, T>::Delete(Elem *e) { - e->tail = freed_head_; - freed_head_ = e; -} - -template<class I, class T> -inline typename HashList<I, T>::Elem* HashList<I, T>::Find(I key) { - size_t index = (static_cast<size_t>(key) % hash_size_); - HashBucket &bucket = buckets_[index]; - if (bucket.last_elem == NULL) { - return NULL; // empty bucket. - } else { - Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1) ? - list_head_ : - buckets_[bucket.prev_bucket].last_elem->tail), - *tail = bucket.last_elem->tail; - for (Elem *e = head; e != tail; e = e->tail) - if (e->key == key) return e; - return NULL; // Not found. - } -} - -template<class I, class T> -inline typename HashList<I, T>::Elem* HashList<I, T>::New() { - if (freed_head_) { - Elem *ans = freed_head_; - freed_head_ = freed_head_->tail; - return ans; - } else { - Elem *tmp = new Elem[allocate_block_size_]; - for (size_t i = 0; i+1 < allocate_block_size_; i++) - tmp[i].tail = tmp+i+1; - tmp[allocate_block_size_-1].tail = NULL; - freed_head_ = tmp; - allocated_.push_back(tmp); - return this->New(); - } -} - -template<class I, class T> -HashList<I, T>::~HashList() { - // First test whether we had any memory leak within the - // HashList, i.e. things for which the user did not call Delete(). - size_t num_in_list = 0, num_allocated = 0; - for (Elem *e = freed_head_; e != NULL; e = e->tail) - num_in_list++; - for (size_t i = 0; i < allocated_.size(); i++) { - num_allocated += allocate_block_size_; - delete[] allocated_[i]; - } - if (num_in_list != num_allocated) { - KALDI_WARN << "Possible memory leak: " << num_in_list - << " != " << num_allocated - << ": you might have forgotten to call Delete on " - << "some Elems"; - } -} - - -template<class I, class T> -void HashList<I, T>::Insert(I key, T val) { - size_t index = (static_cast<size_t>(key) % hash_size_); - HashBucket &bucket = buckets_[index]; - Elem *elem = New(); - elem->key = key; - elem->val = val; - - if (bucket.last_elem == NULL) { // Unoccupied bucket. Insert at - // head of bucket list (which is tail of regular list, they go in - // opposite directions). - if (bucket_list_tail_ == static_cast<size_t>(-1)) { - // list was empty so this is the first elem. - KALDI_ASSERT(list_head_ == NULL); - list_head_ = elem; - } else { - // link in to the chain of Elems - buckets_[bucket_list_tail_].last_elem->tail = elem; - } - elem->tail = NULL; - bucket.last_elem = elem; - bucket.prev_bucket = bucket_list_tail_; - bucket_list_tail_ = index; - } else { - // Already-occupied bucket. Insert at tail of list of elements within - // the bucket. - elem->tail = bucket.last_elem->tail; - bucket.last_elem->tail = elem; - bucket.last_elem = elem; - } -} - -template<class I, class T> -void HashList<I, T>::InsertMore(I key, T val) { - size_t index = (static_cast<size_t>(key) % hash_size_); - HashBucket &bucket = buckets_[index]; - Elem *elem = New(); - elem->key = key; - elem->val = val; - - KALDI_ASSERT(bucket.last_elem != NULL); // we assume there is already one element - if (bucket.last_elem->key == key) { // standard behavior: add as last element - elem->tail = bucket.last_elem->tail; - bucket.last_elem->tail = elem; - bucket.last_elem = elem; - return; - } - Elem *e = (bucket.prev_bucket == static_cast<size_t>(-1) ? - list_head_ : buckets_[bucket.prev_bucket].last_elem->tail); - // find place to insert in linked list - while (e != bucket.last_elem->tail && e->key != key) e = e->tail; - KALDI_ASSERT(e->key == key); // not found? - should not happen - elem->tail = e->tail; - e->tail = elem; -} - - -} // end namespace kaldi - -#endif diff --git a/kaldi_io/src/kaldi/util/hash-list.h b/kaldi_io/src/kaldi/util/hash-list.h deleted file mode 100644 index 4524759..0000000 --- a/kaldi_io/src/kaldi/util/hash-list.h +++ /dev/null @@ -1,140 +0,0 @@ -// util/hash-list.h - -// Copyright 2009-2011 Microsoft Corporation -// 2013 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#ifndef KALDI_UTIL_HASH_LIST_H_ -#define KALDI_UTIL_HASH_LIST_H_ -#include <vector> -#include <set> -#include <algorithm> -#include <limits> -#include <cassert> -#include "util/stl-utils.h" - - -/* This header provides utilities for a structure that's used in a decoder (but - is quite generic in nature so we implement and test it separately). - Basically it's a singly-linked list, but implemented in such a way that we - can quickly search for elements in the list. We give it a slightly richer - interface than just a hash and a list. The idea is that we want to separate - the hash part and the list part: basically, in the decoder, we want to have a - single hash for the current frame and the next frame, because by the time we - need to access the hash for the next frame we no longer need the hash for the - previous frame. So we have an operation that clears the hash but leaves the - list structure intact. We also control memory management inside this object, - to avoid repeated new's/deletes. - - See hash-list-test.cc for an example of how to use this object. -*/ - - -namespace kaldi { - -template<class I, class T> class HashList { - - public: - struct Elem { - I key; - T val; - Elem *tail; - }; - - /// Constructor takes no arguments. Call SetSize to inform it of the likely size. - HashList(); - - /// Clears the hash and gives the head of the current list to the user; - /// ownership is transferred to the user (the user must call Delete() - /// for each element in the list, at his/her leisure). - Elem *Clear(); - - /// Gives the head of the current list to the user. Ownership retained in the - /// class. Caution: in December 2013 the return type was changed to const Elem* - /// and this function was made const. You may need to change some types of - /// local Elem* variables to const if this produces compilation errors. - const Elem *GetList() const; - - /// Think of this like delete(). It is to be called for each Elem in turn - /// after you "obtained ownership" by doing Clear(). This is not the opposite of - /// Insert, it is the opposite of New. It's really a memory operation. - inline void Delete(Elem *e); - - /// This should probably not be needed to be called directly by the user. Think of it as opposite - /// to Delete(); - inline Elem *New(); - - /// Find tries to find this element in the current list using the hashtable. - /// It returns NULL if not present. The Elem it returns is not owned by the user, - /// it is part of the internal list owned by this object, but the user is - /// free to modify the "val" element. - inline Elem *Find(I key); - - /// Insert inserts a new element into the hashtable/stored list. By calling this, - /// the user asserts that it is not already present (e.g. Find was called and - /// returned NULL). With current code, calling this if an element already exists will - /// result in duplicate elements in the structure, and Find() will find the - /// first one that was added. [but we don't guarantee this behavior]. - inline void Insert(I key, T val); - - /// Insert inserts another element with same key into the hashtable/stored list. - /// By calling this, the user asserts that one element with that key is already present. - /// We insert it that way, that all elements with the same key follow each other. - /// Find() will return the first one of the elements with the same key. - inline void InsertMore(I key, T val); - - /// SetSize tells the object how many hash buckets to allocate (should typically be - /// at least twice the number of objects we expect to go in the structure, for fastest - /// performance). It must be called while the hash is empty (e.g. after Clear() or - /// after initializing the object, but before adding anything to the hash. - void SetSize(size_t sz); - - /// Returns current number of hash buckets. - inline size_t Size() { return hash_size_; } - - ~HashList(); - private: - - struct HashBucket { - size_t prev_bucket; // index to next bucket (-1 if list tail). Note: list of buckets - // goes in opposite direction to list of Elems. - Elem *last_elem; // pointer to last element in this bucket (NULL if empty) - inline HashBucket(size_t i, Elem *e): prev_bucket(i), last_elem(e) {} - }; - - Elem *list_head_; // head of currently stored list. - size_t bucket_list_tail_; // tail of list of active hash buckets. - - size_t hash_size_; // number of hash buckets. - - std::vector<HashBucket> buckets_; - - Elem *freed_head_; // head of list of currently freed elements. [ready for allocation] - - std::vector<Elem*> allocated_; // list of allocated blocks. - - static const size_t allocate_block_size_ = 1024; // Number of Elements to allocate in one block. Must be - // largish so storing allocated_ doesn't become a problem. -}; - - -} // end namespace kaldi - -#include "hash-list-inl.h" - -#endif diff --git a/kaldi_io/src/kaldi/util/kaldi-holder-inl.h b/kaldi_io/src/kaldi/util/kaldi-holder-inl.h deleted file mode 100644 index 6a66e61..0000000 --- a/kaldi_io/src/kaldi/util/kaldi-holder-inl.h +++ /dev/null @@ -1,800 +0,0 @@ -// util/kaldi-holder-inl.h - -// Copyright 2009-2011 Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#ifndef KALDI_UTIL_KALDI_HOLDER_INL_H_ -#define KALDI_UTIL_KALDI_HOLDER_INL_H_ - -#include <algorithm> -#include "util/kaldi-io.h" -#include "util/text-utils.h" -#include "matrix/kaldi-matrix.h" - -namespace kaldi { - -/// \addtogroup holders -/// @{ - - -// KaldiObjectHolder is valid only for Kaldi objects with -// copy constructors, default constructors, and "normal" -// Kaldi Write and Read functions. E.g. it works for -// Matrix and Vector. -template<class KaldiType> class KaldiObjectHolder { - public: - typedef KaldiType T; - - KaldiObjectHolder(): t_(NULL) { } - - static bool Write(std::ostream &os, bool binary, const T &t) { - InitKaldiOutputStream(os, binary); // Puts binary header if binary mode. - try { - t.Write(os, binary); - return os.good(); - } catch (const std::exception &e) { - KALDI_WARN << "Exception caught writing Table object: " << e.what(); - if (!IsKaldiError(e.what())) { std::cerr << e.what(); } - return false; // Write failure. - } - } - - void Clear() { - if (t_) { - delete t_; - t_ = NULL; - } - } - - // Reads into the holder. - bool Read(std::istream &is) { - if (t_) delete t_; - t_ = new T; - // Don't want any existing state to complicate the read functioN: get new object. - bool is_binary; - if (!InitKaldiInputStream(is, &is_binary)) { - KALDI_WARN << "Reading Table object, failed reading binary header\n"; - return false; - } - try { - t_->Read(is, is_binary); - return true; - } catch (std::exception &e) { - KALDI_WARN << "Exception caught reading Table object "; - if (!IsKaldiError(e.what())) { std::cerr << e.what(); } - delete t_; - t_ = NULL; - return false; - } - } - - // Kaldi objects always have the stream open in binary mode for - // reading. - static bool IsReadInBinary() { return true; } - - const T &Value() const { - // code error if !t_. - if (!t_) KALDI_ERR << "KaldiObjectHolder::Value() called wrongly."; - return *t_; - } - - ~KaldiObjectHolder() { if (t_) delete t_; } - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(KaldiObjectHolder); - T *t_; -}; - - -// BasicHolder is valid for float, double, bool, and integer -// types. There will be a compile time error otherwise, because -// we make sure that the {Write, Read}BasicType functions do not -// get instantiated for other types. - -template<class BasicType> class BasicHolder { - public: - typedef BasicType T; - - BasicHolder(): t_(static_cast<T>(-1)) { } - - static bool Write(std::ostream &os, bool binary, const T &t) { - InitKaldiOutputStream(os, binary); // Puts binary header if binary mode. - try { - WriteBasicType(os, binary, t); - if (!binary) os << '\n'; // Makes output format more readable and - // easier to manipulate. - return os.good(); - } catch (const std::exception &e) { - KALDI_WARN << "Exception caught writing Table object: " << e.what(); - if (!IsKaldiError(e.what())) { std::cerr << e.what(); } - return false; // Write failure. - } - } - - void Clear() { } - - // Reads into the holder. - bool Read(std::istream &is) { - bool is_binary; - if (!InitKaldiInputStream(is, &is_binary)) { - KALDI_WARN << "Reading Table object [integer type], failed reading binary header\n"; - return false; - } - try { - int c; - if (!is_binary) { // This is to catch errors, the class would work without it.. - // Eat up any whitespace and make sure it's not newline. - while (isspace((c = is.peek())) && c != static_cast<int>('\n')) is.get(); - if (is.peek() == '\n') { - KALDI_WARN << "Found newline but expected basic type."; - return false; // This is just to catch a more- - // likely-than average type of error (empty line before the token), since - // ReadBasicType will eat it up. - } - } - - ReadBasicType(is, is_binary, &t_); - - if (!is_binary) { // This is to catch errors, the class would work without it.. - // make sure there is a newline. - while (isspace((c = is.peek())) && c != static_cast<int>('\n')) is.get(); - if (is.peek() != '\n') { - KALDI_WARN << "BasicHolder::Read, expected newline, got " - << CharToString(is.peek()) << ", position " << is.tellg(); - return false; - } - is.get(); // Consume the newline. - } - return true; - } catch (std::exception &e) { - KALDI_WARN << "Exception caught reading Table object"; - if (!IsKaldiError(e.what())) { std::cerr << e.what(); } - return false; - } - } - - // Objects read/written with the Kaldi I/O functions always have the stream - // open in binary mode for reading. - static bool IsReadInBinary() { return true; } - - const T &Value() const { - return t_; - } - - ~BasicHolder() { } - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(BasicHolder); - - T t_; -}; - - -/// A Holder for a vector of basic types, e.g. -/// std::vector<int32>, std::vector<float>, and so on. -/// Note: a basic type is defined as a type for which ReadBasicType -/// and WriteBasicType are implemented, i.e. integer and floating -/// types, and bool. -template<class BasicType> class BasicVectorHolder { - public: - typedef std::vector<BasicType> T; - - BasicVectorHolder() { } - - static bool Write(std::ostream &os, bool binary, const T &t) { - InitKaldiOutputStream(os, binary); // Puts binary header if binary mode. - try { - if (binary) { // need to write the size, in binary mode. - KALDI_ASSERT(static_cast<size_t>(static_cast<int32>(t.size())) == t.size()); - // Or this Write routine cannot handle such a large vector. - // use int32 because it's fixed size regardless of compilation. - // change to int64 (plus in Read function) if this becomes a problem. - WriteBasicType(os, binary, static_cast<int32>(t.size())); - for (typename std::vector<BasicType>::const_iterator iter = t.begin(); - iter != t.end(); ++iter) - WriteBasicType(os, binary, *iter); - - } else { - for (typename std::vector<BasicType>::const_iterator iter = t.begin(); - iter != t.end(); ++iter) - WriteBasicType(os, binary, *iter); - os << '\n'; // Makes output format more readable and - // easier to manipulate. In text mode, this function writes something like - // "1 2 3\n". - } - return os.good(); - } catch (const std::exception &e) { - KALDI_WARN << "Exception caught writing Table object (BasicVector). "; - if (!IsKaldiError(e.what())) { std::cerr << e.what(); } - return false; // Write failure. - } - } - - void Clear() { t_.clear(); } - - // Reads into the holder. - bool Read(std::istream &is) { - t_.clear(); - bool is_binary; - if (!InitKaldiInputStream(is, &is_binary)) { - KALDI_WARN << "Reading Table object [integer type], failed reading binary header\n"; - return false; - } - if (!is_binary) { - // In text mode, we terminate with newline. - std::string line; - getline(is, line); // this will discard the \n, if present. - if (is.fail()) { - KALDI_WARN << "BasicVectorHolder::Read, error reading line " << (is.eof() ? "[eof]" : ""); - return false; // probably eof. fail in any case. - } - std::istringstream line_is(line); - try { - while (1) { - line_is >> std::ws; // eat up whitespace. - if (line_is.eof()) break; - BasicType bt; - ReadBasicType(line_is, false, &bt); - t_.push_back(bt); - } - return true; - } catch(std::exception &e) { - KALDI_WARN << "BasicVectorHolder::Read, could not interpret line: " << line; - if (!IsKaldiError(e.what())) { std::cerr << e.what(); } - return false; - } - } else { // binary mode. - size_t filepos = is.tellg(); - try { - int32 size; - ReadBasicType(is, true, &size); - t_.resize(size); - for (typename std::vector<BasicType>::iterator iter = t_.begin(); - iter != t_.end(); - ++iter) { - ReadBasicType(is, true, &(*iter)); - } - return true; - } catch (...) { - KALDI_WARN << "BasicVectorHolder::Read, read error or unexpected data at archive entry beginning at file position " << filepos; - return false; - } - } - } - - // Objects read/written with the Kaldi I/O functions always have the stream - // open in binary mode for reading. - static bool IsReadInBinary() { return true; } - - const T &Value() const { return t_; } - - ~BasicVectorHolder() { } - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(BasicVectorHolder); - T t_; -}; - - -/// BasicVectorVectorHolder is a Holder for a vector of vector of -/// a basic type, e.g. std::vector<std::vector<int32> >. -/// Note: a basic type is defined as a type for which ReadBasicType -/// and WriteBasicType are implemented, i.e. integer and floating -/// types, and bool. -template<class BasicType> class BasicVectorVectorHolder { - public: - typedef std::vector<std::vector<BasicType> > T; - - BasicVectorVectorHolder() { } - - static bool Write(std::ostream &os, bool binary, const T &t) { - InitKaldiOutputStream(os, binary); // Puts binary header if binary mode. - try { - if (binary) { // need to write the size, in binary mode. - KALDI_ASSERT(static_cast<size_t>(static_cast<int32>(t.size())) == t.size()); - // Or this Write routine cannot handle such a large vector. - // use int32 because it's fixed size regardless of compilation. - // change to int64 (plus in Read function) if this becomes a problem. - WriteBasicType(os, binary, static_cast<int32>(t.size())); - for (typename std::vector<std::vector<BasicType> >::const_iterator iter = t.begin(); - iter != t.end(); ++iter) { - KALDI_ASSERT(static_cast<size_t>(static_cast<int32>(iter->size())) == iter->size()); - WriteBasicType(os, binary, static_cast<int32>(iter->size())); - for (typename std::vector<BasicType>::const_iterator iter2=iter->begin(); - iter2 != iter->end(); ++iter2) { - WriteBasicType(os, binary, *iter2); - } - } - } else { // text mode... - // In text mode, we write out something like (for integers): - // "1 2 3 ; 4 5 ; 6 ; ; 7 8 9 ;\n" - // where the semicolon is a terminator, not a separator - // (a separator would cause ambiguity between an - // empty list, and a list containing a single empty list). - for (typename std::vector<std::vector<BasicType> >::const_iterator iter = t.begin(); - iter != t.end(); - ++iter) { - for (typename std::vector<BasicType>::const_iterator iter2=iter->begin(); - iter2 != iter->end(); ++iter2) - WriteBasicType(os, binary, *iter2); - os << "; "; - } - os << '\n'; - } - return os.good(); - } catch (const std::exception &e) { - KALDI_WARN << "Exception caught writing Table object. "; - if (!IsKaldiError(e.what())) { std::cerr << e.what(); } - return false; // Write failure. - } - } - - void Clear() { t_.clear(); } - - // Reads into the holder. - bool Read(std::istream &is) { - t_.clear(); - bool is_binary; - if (!InitKaldiInputStream(is, &is_binary)) { - KALDI_WARN << "Failed reading binary header\n"; - return false; - } - if (!is_binary) { - // In text mode, we terminate with newline. - try { // catching errors from ReadBasicType.. - std::vector<BasicType> v; // temporary vector - while (1) { - int i = is.peek(); - if (i == -1) { - KALDI_WARN << "Unexpected EOF"; - return false; - } else if (static_cast<char>(i) == '\n') { - if (!v.empty()) { - KALDI_WARN << "No semicolon before newline (wrong format)"; - return false; - } else { is.get(); return true; } - } else if (std::isspace(i)) { - is.get(); - } else if (static_cast<char>(i) == ';') { - t_.push_back(v); - v.clear(); - is.get(); - } else { // some object we want to read... - BasicType b; - ReadBasicType(is, false, &b); // throws on error. - v.push_back(b); - } - } - } catch(std::exception &e) { - KALDI_WARN << "BasicVectorVectorHolder::Read, read error"; - if (!IsKaldiError(e.what())) { std::cerr << e.what(); } - return false; - } - } else { // binary mode. - size_t filepos = is.tellg(); - try { - int32 size; - ReadBasicType(is, true, &size); - t_.resize(size); - for (typename std::vector<std::vector<BasicType> >::iterator iter = t_.begin(); - iter != t_.end(); - ++iter) { - int32 size2; - ReadBasicType(is, true, &size2); - iter->resize(size2); - for (typename std::vector<BasicType>::iterator iter2 = iter->begin(); - iter2 != iter->end(); - ++iter2) - ReadBasicType(is, true, &(*iter2)); - } - return true; - } catch (...) { - KALDI_WARN << "Read error or unexpected data at archive entry beginning at file position " << filepos; - return false; - } - } - } - - // Objects read/written with the Kaldi I/O functions always have the stream - // open in binary mode for reading. - static bool IsReadInBinary() { return true; } - - const T &Value() const { return t_; } - - ~BasicVectorVectorHolder() { } - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(BasicVectorVectorHolder); - T t_; -}; - - -/// BasicPairVectorHolder is a Holder for a vector of pairs of -/// a basic type, e.g. std::vector<std::pair<int32> >. -/// Note: a basic type is defined as a type for which ReadBasicType -/// and WriteBasicType are implemented, i.e. integer and floating -/// types, and bool. -template<class BasicType> class BasicPairVectorHolder { - public: - typedef std::vector<std::pair<BasicType, BasicType> > T; - - BasicPairVectorHolder() { } - - static bool Write(std::ostream &os, bool binary, const T &t) { - InitKaldiOutputStream(os, binary); // Puts binary header if binary mode. - try { - if (binary) { // need to write the size, in binary mode. - KALDI_ASSERT(static_cast<size_t>(static_cast<int32>(t.size())) == t.size()); - // Or this Write routine cannot handle such a large vector. - // use int32 because it's fixed size regardless of compilation. - // change to int64 (plus in Read function) if this becomes a problem. - WriteBasicType(os, binary, static_cast<int32>(t.size())); - for (typename T::const_iterator iter = t.begin(); - iter != t.end(); ++iter) { - WriteBasicType(os, binary, iter->first); - WriteBasicType(os, binary, iter->second); - } - } else { // text mode... - // In text mode, we write out something like (for integers): - // "1 2 ; 4 5 ; 6 7 ; 8 9 \n" - // where the semicolon is a separator, not a terminator. - for (typename T::const_iterator iter = t.begin(); - iter != t.end();) { - WriteBasicType(os, binary, iter->first); - WriteBasicType(os, binary, iter->second); - ++iter; - if (iter != t.end()) - os << "; "; - } - os << '\n'; - } - return os.good(); - } catch (const std::exception &e) { - KALDI_WARN << "Exception caught writing Table object. "; - if (!IsKaldiError(e.what())) { std::cerr << e.what(); } - return false; // Write failure. - } - } - - void Clear() { t_.clear(); } - - // Reads into the holder. - bool Read(std::istream &is) { - t_.clear(); - bool is_binary; - if (!InitKaldiInputStream(is, &is_binary)) { - KALDI_WARN << "Reading Table object [integer type], failed reading binary header\n"; - return false; - } - if (!is_binary) { - // In text mode, we terminate with newline. - try { // catching errors from ReadBasicType.. - std::vector<BasicType> v; // temporary vector - while (1) { - int i = is.peek(); - if (i == -1) { - KALDI_WARN << "Unexpected EOF"; - return false; - } else if (static_cast<char>(i) == '\n') { - if (t_.empty() && v.empty()) { - is.get(); - return true; - } else if (v.size() == 2) { - t_.push_back(std::make_pair(v[0], v[1])); - is.get(); - return true; - } else { - KALDI_WARN << "Unexpected newline, reading vector<pair<?> >; got " - << v.size() << " elements, expected 2."; - return false; - } - } else if (std::isspace(i)) { - is.get(); - } else if (static_cast<char>(i) == ';') { - if (v.size() != 2) { - KALDI_WARN << "Wrong input format, reading vector<pair<?> >; got " - << v.size() << " elements, expected 2."; - return false; - } - t_.push_back(std::make_pair(v[0], v[1])); - v.clear(); - is.get(); - } else { // some object we want to read... - BasicType b; - ReadBasicType(is, false, &b); // throws on error. - v.push_back(b); - } - } - } catch(std::exception &e) { - KALDI_WARN << "BasicPairVectorHolder::Read, read error"; - if (!IsKaldiError(e.what())) { std::cerr << e.what(); } - return false; - } - } else { // binary mode. - size_t filepos = is.tellg(); - try { - int32 size; - ReadBasicType(is, true, &size); - t_.resize(size); - for (typename T::iterator iter = t_.begin(); - iter != t_.end(); - ++iter) { - ReadBasicType(is, true, &(iter->first)); - ReadBasicType(is, true, &(iter->second)); - } - return true; - } catch (...) { - KALDI_WARN << "BasicVectorHolder::Read, read error or unexpected data at archive entry beginning at file position " << filepos; - return false; - } - } - } - - // Objects read/written with the Kaldi I/O functions always have the stream - // open in binary mode for reading. - static bool IsReadInBinary() { return true; } - - const T &Value() const { return t_; } - - ~BasicPairVectorHolder() { } - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(BasicPairVectorHolder); - T t_; -}; - - - - -// We define a Token as a nonempty, printable, whitespace-free std::string. -// The binary and text formats here are the same (newline-terminated) -// and as such we don't bother with the binary-mode headers. -class TokenHolder { - public: - typedef std::string T; - - TokenHolder() {} - - static bool Write(std::ostream &os, bool, const T &t) { // ignore binary-mode. - KALDI_ASSERT(IsToken(t)); - os << t << '\n'; - return os.good(); - } - - void Clear() { t_.clear(); } - - // Reads into the holder. - bool Read(std::istream &is) { - is >> t_; - if (is.fail()) return false; - char c; - while (isspace(c = is.peek()) && c!= '\n') is.get(); - if (is.peek() != '\n') { - KALDI_ERR << "TokenHolder::Read, expected newline, got char " << CharToString(is.peek()) - << ", at stream pos " << is.tellg(); - return false; - } - is.get(); // get '\n' - return true; - } - - - // Since this is fundamentally a text format, read in text mode (would work - // fine either way, but doing it this way will exercise more of the code). - static bool IsReadInBinary() { return false; } - - const T &Value() const { return t_; } - - ~TokenHolder() { } - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(TokenHolder); - T t_; -}; - -// A Token is a nonempty, whitespace-free std::string. -// Class TokenVectorHolder is a Holder class for vectors of these. -class TokenVectorHolder { - public: - typedef std::vector<std::string> T; - - TokenVectorHolder() { } - - static bool Write(std::ostream &os, bool, const T &t) { // ignore binary-mode. - for (std::vector<std::string>::const_iterator iter = t.begin(); - iter != t.end(); - ++iter) { - KALDI_ASSERT(IsToken(*iter)); // make sure it's whitespace-free, printable and nonempty. - os << *iter << ' '; - } - os << '\n'; - return os.good(); - } - - void Clear() { t_.clear(); } - - - // Reads into the holder. - bool Read(std::istream &is) { - t_.clear(); - - // there is no binary/non-binary mode. - - std::string line; - getline(is, line); // this will discard the \n, if present. - if (is.fail()) { - KALDI_WARN << "BasicVectorHolder::Read, error reading line " << (is.eof() ? "[eof]" : ""); - return false; // probably eof. fail in any case. - } - const char *white_chars = " \t\n\r\f\v"; - SplitStringToVector(line, white_chars, true, &t_); // true== omit empty strings e.g. - // between spaces. - return true; - } - - // Read in text format since it's basically a text-mode thing.. doesn't really matter, - // it would work either way since we ignore the extra '\r'. - static bool IsReadInBinary() { return false; } - - const T &Value() const { return t_; } - - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(TokenVectorHolder); - T t_; -}; - - -class HtkMatrixHolder { - public: - typedef std::pair<Matrix<BaseFloat>, HtkHeader> T; - - HtkMatrixHolder() {} - - static bool Write(std::ostream &os, bool binary, const T &t) { - if (!binary) - KALDI_ERR << "Non-binary HTK-format write not supported."; - bool ans = WriteHtk(os, t.first, t.second); - if (!ans) - KALDI_WARN << "Error detected writing HTK-format matrix."; - return ans; - } - - void Clear() { t_.first.Resize(0, 0); } - - // Reads into the holder. - bool Read(std::istream &is) { - bool ans = ReadHtk(is, &t_.first, &t_.second); - if (!ans) { - KALDI_WARN << "Error detected reading HTK-format matrix."; - return false; - } - return ans; - } - - // HTK-format matrices only read in binary. - static bool IsReadInBinary() { return true; } - - const T &Value() const { return t_; } - - - // No destructor. - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(HtkMatrixHolder); - T t_; -}; - -// SphinxMatrixHolder can be used to read and write feature files in -// CMU Sphinx format. 13-dimensional big-endian features are assumed. -// The ultimate reference is SphinxBase's source code (for example see -// feat_s2mfc_read() in src/libsphinxbase/feat/feat.c). -// We can't fully automate the detection of machine/feature file endianess -// mismatch here, because for this Sphinx relies on comparing the feature -// file's size with the number recorded in its header. We are working with -// streams, however(what happens if this is a Kaldi archive?). This should -// be no problem, because the usage help of Sphinx' "wave2feat" for example -// says that Sphinx features are always big endian. -// Note: the kFeatDim defaults to 13, see forward declaration in kaldi-holder.h -template<int kFeatDim> class SphinxMatrixHolder { - public: - typedef Matrix<BaseFloat> T; - - SphinxMatrixHolder() {} - - void Clear() { feats_.Resize(0, 0); } - - // Writes Sphinx-format features - static bool Write(std::ostream &os, bool binary, const T &m) { - if (!binary) { - KALDI_WARN << "SphinxMatrixHolder can't write Sphinx features in text "; - return false; - } - - int32 size = m.NumRows() * m.NumCols(); - if (MachineIsLittleEndian()) - KALDI_SWAP4(size); - os.write((char*) &size, sizeof(size)); // write the header - - for (MatrixIndexT i = 0; i < m.NumRows(); i++) { - float32 tmp[m.NumCols()]; - for (MatrixIndexT j = 0; j < m.NumCols(); j++) { - tmp[j] = static_cast<float32>(m(i, j)); - if (MachineIsLittleEndian()) - KALDI_SWAP4(tmp[j]); - } - os.write((char*) tmp, sizeof(tmp)); - } - - return true; - } - - // Reads the features into a Kaldi Matrix - bool Read(std::istream &is) { - int32 nmfcc; - - is.read((char*) &nmfcc, sizeof(nmfcc)); - if (MachineIsLittleEndian()) - KALDI_SWAP4(nmfcc); - KALDI_VLOG(2) << "#feats: " << nmfcc; - int32 nfvec = nmfcc / kFeatDim; - if ((nmfcc % kFeatDim) != 0) { - KALDI_WARN << "Sphinx feature count is inconsistent with vector length "; - return false; - } - - feats_.Resize(nfvec, kFeatDim); - for (MatrixIndexT i = 0; i < feats_.NumRows(); i++) { - if (sizeof(BaseFloat) == sizeof(float32)) { - is.read((char*) feats_.RowData(i), kFeatDim * sizeof(float32)); - if (!is.good()) { - KALDI_WARN << "Unexpected error/EOF while reading Sphinx features "; - return false; - } - if (MachineIsLittleEndian()) { - for (MatrixIndexT j=0; j < kFeatDim; j++) - KALDI_SWAP4(feats_(i, j)); - } - } else { // KALDI_DOUBLEPRECISION=1 - float32 tmp[kFeatDim]; - is.read((char*) tmp, sizeof(tmp)); - if (!is.good()) { - KALDI_WARN << "Unexpected error/EOF while reading Sphinx features "; - return false; - } - for (MatrixIndexT j=0; j < kFeatDim; j++) { - if (MachineIsLittleEndian()) - KALDI_SWAP4(tmp[j]); - feats_(i, j) = static_cast<BaseFloat>(tmp[j]); - } - } - } - - return true; - } - - // Only read in binary - static bool IsReadInBinary() { return true; } - - const T &Value() const { return feats_; } - - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(SphinxMatrixHolder); - T feats_; -}; - - -/// @} end "addtogroup holders" - -} // end namespace kaldi - - - -#endif diff --git a/kaldi_io/src/kaldi/util/kaldi-holder.h b/kaldi_io/src/kaldi/util/kaldi-holder.h deleted file mode 100644 index 95f1183..0000000 --- a/kaldi_io/src/kaldi/util/kaldi-holder.h +++ /dev/null @@ -1,207 +0,0 @@ -// util/kaldi-holder.h - -// Copyright 2009-2011 Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#ifndef KALDI_UTIL_KALDI_HOLDER_H_ -#define KALDI_UTIL_KALDI_HOLDER_H_ - -#include <algorithm> -#include "util/kaldi-io.h" -#include "util/text-utils.h" -#include "matrix/kaldi-vector.h" - -namespace kaldi { - - -// The Table class uses a Holder class to wrap objects, and make them behave -// in a "normalized" way w.r.t. reading and writing, so the Table class can -// be template-ized without too much trouble. Look below this -// comment (search for GenericHolder) to see what it looks like. -// -// Requirements of the holder class: -// -// They can only contain objects that can be read/written without external -// information; other objects cannot be stored in this type of archive. -// -// In terms of what functions it should have, see GenericHolder below. -// It is just for documentation. -// -// (1) Requirements of the Read and Write functions -// -// The Read and Write functions should have the property that in a longer -// file, if the Read function is started from where the Write function started -// writing, it should go to where the Write function stopped writing, in either -// text or binary mode (but it's OK if it doesn't eat up trailing space). -// -// [Desirable property: when writing in text mode the output should contain -// exactly one newline, at the end of the output; this makes it easier to manipulate] -// -// [Desirable property for classes: the output should just be a binary-mode -// header (if in binary mode and it's a Kaldi object, or no header -// othewise), and then the output of Object.Write(). This means that when -// written to individual files with the scp: type of wspecifier, we can read -// the individual files in the "normal" Kaldi way by reading the binary -// header and then the object.] -// -// -// The Write function takes a 'binary' argument. In general, each object will -// have two formats: text and binary. However, it's permitted to throw() if -// asked to read in the text format if there is none. The file will be open, if -// the file system has binary/text modes, in the corresponding mode. However, -// the object should have a file-mode in which it can read either text or binary -// output. It announces this via the static IsReadInBinary() function. This -// will generally be the binary mode and it means that where necessary, in text -// formats, we must ignore \r characters. -// -// Memory requirements: if it allocates memory, the destructor should -// free that memory. Copying and assignment of Holder objects may be -// disallowed as the Table code never does this. - - -/// GenericHolder serves to document the requirements of the Holder interface; -/// it's not intended to be used. -template<class SomeType> class GenericHolder { - public: - typedef SomeType T; - - /// Must have a constructor that takes no arguments. - GenericHolder() { } - - /// Write writes this object of type T. Possibly also writes a binary-mode - /// header so that the Read function knows which mode to read in (since the - /// Read function does not get this information). It's a static member so we - /// can write those not inside this class (can use this function with Value() - /// to write from this class). The Write method may throw if it cannot write - /// the object in the given (binary/non-binary) mode. The holder object can - /// assume the stream has been opened in the given mode (where relevant). The - /// object can write the data how it likes. - static bool Write(std::ostream &os, bool binary, const T &t); - - /// Reads into the holder. Must work out from the stream (which will be opened - /// on Windows in binary mode if the IsReadInBinary() function of this class - /// returns true, and text mode otherwise) whether the actual data is binary or - /// not (usually via reading the Kaldi binary-mode header). We put the - /// responsibility for reading the Kaldi binary-mode header in the Read - /// function (rather than making the binary mode an argument to this function), - /// so that for non-Kaldi binary files we don't have to write the header, which - /// would prevent the file being read by non-Kaldi programs (e.g. if we write - /// to individual files using an scp). - /// - /// Read must deallocate any existing data we have here, if applicable (must - /// not assume the object was newly constructed). - /// - /// Returns true on success. - bool Read(std::istream &is); - - /// IsReadInBinary() will return true if the object wants the file to be - /// opened in binary for reading (if the file system has binary/text modes), - /// and false otherwise. Static function. Kaldi objects always return true - /// as they always read in binary mode. Note that we must be able to read, in - /// this mode, objects written in both text and binary mode by Write (which - /// may mean ignoring "\r" characters). I doubt we will ever want this - /// function to return false. - static bool IsReadInBinary() { return true; } - - /// Returns the value of the object held here. Will only - /// ever be called if Read() has been previously called and it returned - /// true (so OK to throw exception if no object was read). - const T &Value() const { return t_; } // if t is a pointer, would return *t_; - - /// The Clear() function doesn't have to do anything. Its purpose is to - /// allow the object to free resources if they're no longer needed. - void Clear() { } - - /// If the object held pointers, the destructor would free them. - ~GenericHolder() { } - - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(GenericHolder); - T t_; // t_ may alternatively be of type T*. -}; - - -// See kaldi-holder-inl.h for examples of some actual Holder -// classes and templates. - - -// The following two typedefs should probably be in their own file, but they're -// here until there are enough of them to warrant their own header. - - -/// \addtogroup holders -/// @{ - -/// KaldiObjectHolder works for Kaldi objects that have the "standard" Read and Write -/// functions, and a copy constructor. -template<class KaldiType> class KaldiObjectHolder; - -/// BasicHolder is valid for float, double, bool, and integer -/// types. There will be a compile time error otherwise, because -/// we make sure that the {Write, Read}BasicType functions do not -/// get instantiated for other types. -template<class BasicType> class BasicHolder; - - -// A Holder for a vector of basic types, e.g. -// std::vector<int32>, std::vector<float>, and so on. -// Note: a basic type is defined as a type for which ReadBasicType -// and WriteBasicType are implemented, i.e. integer and floating -// types, and bool. -template<class BasicType> class BasicVectorHolder; - - -// A holder for vectors of vectors of basic types, e.g. -// std::vector<std::vector<int32> >, and so on. -// Note: a basic type is defined as a type for which ReadBasicType -// and WriteBasicType are implemented, i.e. integer and floating -// types, and bool. -template<class BasicType> class BasicVectorVectorHolder; - -// A holder for vectors of pairsof basic types, e.g. -// std::vector<std::vector<int32> >, and so on. -// Note: a basic type is defined as a type for which ReadBasicType -// and WriteBasicType are implemented, i.e. integer and floating -// types, and bool. Text format is (e.g. for integers), -// "1 12 ; 43 61 ; 17 8 \n" -template<class BasicType> class BasicPairVectorHolder; - -/// We define a Token (not a typedef, just a word) as a nonempty, printable, -/// whitespace-free std::string. The binary and text formats here are the same -/// (newline-terminated) and as such we don't bother with the binary-mode headers. -class TokenHolder; - -/// Class TokenVectorHolder is a Holder class for vectors of Tokens (T == std::string). -class TokenVectorHolder; - -/// A class for reading/writing HTK-format matrices. -/// T == std::pair<Matrix<BaseFloat>, HtkHeader> -class HtkMatrixHolder; - -/// A class for reading/writing Sphinx format matrices. -template<int kFeatDim=13> class SphinxMatrixHolder; - - -/// @} end "addtogroup holders" - - -} // end namespace kaldi - -#include "kaldi-holder-inl.h" - -#endif diff --git a/kaldi_io/src/kaldi/util/kaldi-io-inl.h b/kaldi_io/src/kaldi/util/kaldi-io-inl.h deleted file mode 100644 index 7df7505..0000000 --- a/kaldi_io/src/kaldi/util/kaldi-io-inl.h +++ /dev/null @@ -1,45 +0,0 @@ -// util/kaldi-io-inl.h - -// Copyright 2009-2011 Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at - -// http://www.apache.org/licenses/LICENSE-2.0 - -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. -#ifndef KALDI_UTIL_KALDI_IO_INL_H_ -#define KALDI_UTIL_KALDI_IO_INL_H_ - - -namespace kaldi { - -bool Input::Open(const std::string &rxfilename, bool *binary) { - return OpenInternal(rxfilename, true, binary); -} - -bool Input::OpenTextMode(const std::string &rxfilename) { - return OpenInternal(rxfilename, false, NULL); -} - -bool Input::IsOpen() { - return impl_ != NULL; -} - -bool Output::IsOpen() { - return impl_ != NULL; -} - - -} // end namespace kaldi. - - -#endif diff --git a/kaldi_io/src/kaldi/util/kaldi-io.h b/kaldi_io/src/kaldi/util/kaldi-io.h deleted file mode 100644 index f2c7563..0000000 --- a/kaldi_io/src/kaldi/util/kaldi-io.h +++ /dev/null @@ -1,264 +0,0 @@ -// util/kaldi-io.h - -// Copyright 2009-2011 Microsoft Corporation; Jan Silovsky - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at - -// http://www.apache.org/licenses/LICENSE-2.0 - -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. -#ifndef KALDI_UTIL_KALDI_IO_H_ -#define KALDI_UTIL_KALDI_IO_H_ - -#include <cctype> // For isspace. -#include <limits> -#include <string> -#include "base/kaldi-common.h" -#ifdef _MSC_VER -# include <fcntl.h> -# include <io.h> -#endif - - - -namespace kaldi { - -class OutputImplBase; // Forward decl; defined in a .cc file -class InputImplBase; // Forward decl; defined in a .cc file - -/// \addtogroup io_group -/// @{ - -// The Output and Input classes handle stream-opening for "extended" filenames -// that include actual files, standard-input/standard-output, pipes, and -// offsets into actual files. They also handle reading and writing the -// binary-mode headers for Kaldi files, where applicable. The classes have -// versions of the Open routines that throw and do not throw, depending whether -// the calling code wants to catch the errors or not; there are also versions -// that write (or do not write) the Kaldi binary-mode header that says if it's -// binary mode. Generally files that contain Kaldi objects will have the header -// on, so we know upon reading them whether they have the header. So you would -// use the OpenWithHeader routines for these (or the constructor); but other -// types of objects (e.g. FSTs) would have files without a header so you would -// use OpenNoHeader. - -// We now document the types of extended filenames that we use. -// -// A "wxfilename" is an extended filename for writing. It can take three forms: -// (1) Filename: e.g. "/some/filename", "./a/b/c", "c:\Users\dpovey\My Documents\\boo" -// (whatever the actual file-system interprets) -// (2) Standard output: "" or "-" -// (3) A pipe: e.g. "gunzip -c /tmp/abc.gz |" -// -// -// A "rxfilename" is an extended filename for reading. It can take four forms: -// (1) An actual filename, whatever the file-system can read, e.g. "/my/file". -// (2) Standard input: "" or "-" -// (3) A pipe: e.g. "| gzip -c > /tmp/abc.gz" -// (4) An offset into a file, e.g.: "/mnt/blah/data/1.ark:24871" -// [these are created by the Table and TableWriter classes; I may also write -// a program that creates them for arbitrary files] -// - - -// Typical usage: -// ... -// bool binary; -// MyObject.Write(Output(some_filename, binary).Stream(), binary); -// -// ... more extensive example: -// { -// Output ko(some_filename, binary); -// MyObject1.Write(ko.Stream(), binary); -// MyObject2.Write(ko.Stream(), binary); -// } - - - -enum OutputType { - kNoOutput, - kFileOutput, - kStandardOutput, - kPipeOutput -}; - -/// ClassifyWxfilename interprets filenames as follows: -/// - kNoOutput: invalid filenames (leading or trailing space, things that look -/// like wspecifiers and rspecifiers or like pipes to read from with leading |. -/// - kFileOutput: Normal filenames -/// - kStandardOutput: The empty string or "-", interpreted as standard output -/// - kPipeOutput: pipes, e.g. "gunzip -c some_file.gz |" -OutputType ClassifyWxfilename(const std::string &wxfilename); - -enum InputType { - kNoInput, - kFileInput, - kStandardInput, - kOffsetFileInput, - kPipeInput -}; - -/// ClassifyRxfilenames interprets filenames for reading as follows: -/// - kNoInput: invalid filenames (leading or trailing space, things that -/// look like wspecifiers and rspecifiers or pipes to write to -/// with trailing |. -/// - kFileInput: normal filenames -/// - kStandardInput: the empty string or "-" -/// - kPipeInput: e.g. "| gzip -c > blah.gz" -/// - kOffsetFileInput: offsets into files, e.g. /some/filename:12970 -InputType ClassifyRxfilename(const std::string &rxfilename); - - -class Output { - public: - // The normal constructor, provided for convenience. - // Equivalent to calling with default constructor then Open() - // with these arguments. - Output(const std::string &filename, bool binary, bool write_header = true); - - Output(): impl_(NULL) {}; - - /// This opens the stream, with the given mode (binary or text). It returns - /// true on success and false on failure. However, it will throw if something - /// was already open and could not be closed (to avoid this, call Close() - /// first. if write_header == true and binary == true, it writes the Kaldi - /// binary-mode header ('\0' then 'B'). You may call Open even if it is - /// already open; it will close the existing stream and reopen (however if - /// closing the old stream failed it will throw). - bool Open(const std::string &wxfilename, bool binary, bool write_header); - - inline bool IsOpen(); // return true if we have an open stream. Does not imply - // stream is good for writing. - - std::ostream &Stream(); // will throw if not open; else returns stream. - - // Close closes the stream. Calling Close is never necessary unless you - // want to avoid exceptions being thrown. There are times when calling - // Close will hurt efficiency (basically, when using offsets into files, - // and using the same Input object), - // but most of the time the user won't be doing this directly, it will - // be done in kaldi-table.{h, cc}, so you don't have to worry about it. - bool Close(); - - // This will throw if stream could not be closed (to check error status, - // call Close()). - ~Output(); - - private: - OutputImplBase *impl_; // non-NULL if open. - std::string filename_; - KALDI_DISALLOW_COPY_AND_ASSIGN(Output); -}; - - -// bool binary_in; -// Input ki(some_filename, &binary_in); -// MyObject.Read(ki, binary_in); -// -// ... more extensive example: -// -// { -// bool binary_in; -// Input ki(some_filename, &binary_in); -// MyObject1.Read(ki.Stream(), &binary_in); -// MyObject2.Write(ki.Stream(), &binary_in); -// } -// Note that to catch errors you need to use try.. catch. -// Input communicates errors by throwing exceptions. - - -// Input interprets four kinds of filenames: -// (1) Normal filenames -// (2) The empty string or "-", interpreted as standard output -// (3) Pipes, e.g. "| gzip -c > some_file.gz" -// (4) Offsets into [real] files, e.g. "/my/filename:12049" -// The last one has no correspondence in Output. - - -class Input { - public: - /// The normal constructor. Opens the stream in binary mode. - /// Equivalent to calling the default constructor followed by Open(); then, if - /// binary != NULL, it calls ReadHeader(), putting the output in "binary"; it - /// throws on error. - Input(const std::string &rxfilename, bool *contents_binary = NULL); - - Input(): impl_(NULL) {} - - // Open opens the stream for reading (the mode, where relevant, is binary; use - // OpenTextMode for text-mode, we made this a separate function rather than a - // boolean argument, to avoid confusion with Kaldi's text/binary distinction, - // since reading in the file system's text mode is unusual.) If - // contents_binary != NULL, it reads the binary-mode header and puts it in the - // "binary" variable. Returns true on success. If it returns false it will - // not be open. You may call Open even if it is already open; it will close - // the existing stream and reopen (however if closing the old stream failed it - // will throw). - inline bool Open(const std::string &rxfilename, bool *contents_binary = NULL); - - // As Open but (if the file system has text/binary modes) opens in text mode; - // you shouldn't ever have to use this as in Kaldi we read even text files in - // binary mode (and ignore the \r). - inline bool OpenTextMode(const std::string &rxfilename); - - // Return true if currently open for reading and Stream() will - // succeed. Does not guarantee that the stream is good. - inline bool IsOpen(); - - // It is never necessary or helpful to call Close, except if - // you are concerned about to many filehandles being open. - // Close does not throw. - void Close(); - - // Returns the underlying stream. Throws if !IsOpen() - std::istream &Stream(); - - // Destructor does not throw: input streams may legitimately fail so we - // don't worry about the status when we close them. - ~Input(); - private: - bool OpenInternal(const std::string &rxfilename, bool file_binary, bool *contents_binary); - InputImplBase *impl_; - KALDI_DISALLOW_COPY_AND_ASSIGN(Input); -}; - -template <class C> inline void ReadKaldiObject(const std::string &filename, - C *c) { - bool binary_in; - Input ki(filename, &binary_in); - c->Read(ki.Stream(), binary_in); -} - -template <class C> inline void WriteKaldiObject(const C &c, - const std::string &filename, - bool binary) { - Output ko(filename, binary); - c.Write(ko.Stream(), binary); -} - -/// PrintableRxfilename turns the rxfilename into a more human-readable -/// form for error reporting, i.e. it does quoting and escaping and -/// replaces "" or "-" with "standard input". -std::string PrintableRxfilename(std::string rxfilename); - -/// PrintableWxfilename turns the filename into a more human-readable -/// form for error reporting, i.e. it does quoting and escaping and -/// replaces "" or "-" with "standard output". -std::string PrintableWxfilename(std::string wxfilename); - -/// @} - -} // end namespace kaldi. - -#include "kaldi-io-inl.h" - -#endif diff --git a/kaldi_io/src/kaldi/util/kaldi-pipebuf.h b/kaldi_io/src/kaldi/util/kaldi-pipebuf.h deleted file mode 100644 index 43e5a2e..0000000 --- a/kaldi_io/src/kaldi/util/kaldi-pipebuf.h +++ /dev/null @@ -1,90 +0,0 @@ -// util/kaldi-pipebuf.h - -// Copyright 2009-2011 Ondrej Glembek - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at - -// http://www.apache.org/licenses/LICENSE-2.0 - -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -/** @file kaldi-pipebuf.h - * This is an Kaldi C++ Library header. - */ - -#ifndef KALDI_UTIL_KALDI_PIPEBUF_H_ -#define KALDI_UTIL_KALDI_PIPEBUF_H_ - -#if defined(_LIBCPP_VERSION) // libc++ -#include "basic-filebuf.h" -#else -#include <fstream> -#endif - -namespace kaldi -{ -// This class provides a way to initialize a filebuf with a FILE* pointer -// directly; it will not close the file pointer when it is deleted. -// The C++ standard does not allow implementations of C++ to provide -// this constructor within basic_filebuf, which makes it hard to deal -// with pipes using completely native C++. This is a workaround - -#ifdef _MSC_VER -#elif defined(_LIBCPP_VERSION) // libc++ -template<class CharType, class Traits = std::char_traits<CharType> > -class basic_pipebuf : public basic_filebuf<CharType, Traits> -{ - public: - typedef basic_pipebuf<CharType, Traits> ThisType; - - public: - basic_pipebuf(FILE *fptr, std::ios_base::openmode mode) - : basic_filebuf<CharType, Traits>() { - this->open(fptr, mode); - if (!this->is_open()) { - KALDI_WARN << "Error initializing pipebuf"; // probably indicates - // code error, if the fptr was good. - return; - } - } -}; // class basic_pipebuf -#else -template<class CharType, class Traits = std::char_traits<CharType> > -class basic_pipebuf : public std::basic_filebuf<CharType, Traits> -{ - public: - typedef basic_pipebuf<CharType, Traits> ThisType; - - public: - basic_pipebuf(FILE *fptr, std::ios_base::openmode mode) - : std::basic_filebuf<CharType, Traits>() { - this->_M_file.sys_open(fptr, mode); - if (!this->is_open()) { - KALDI_WARN << "Error initializing pipebuf"; // probably indicates - // code error, if the fptr was good. - return; - } - this->_M_mode = mode; - this->_M_buf_size = BUFSIZ; - this->_M_allocate_internal_buffer(); - this->_M_reading = false; - this->_M_writing = false; - this->_M_set_buffer(-1); - } -}; // class basic_pipebuf -#endif // _MSC_VER - -}; // namespace kaldi - -#endif // KALDI_UTIL_KALDI_PIPEBUF_H_ - diff --git a/kaldi_io/src/kaldi/util/kaldi-table-inl.h b/kaldi_io/src/kaldi/util/kaldi-table-inl.h deleted file mode 100644 index 6b73c88..0000000 --- a/kaldi_io/src/kaldi/util/kaldi-table-inl.h +++ /dev/null @@ -1,2246 +0,0 @@ -// util/kaldi-table-inl.h - -// Copyright 2009-2011 Microsoft Corporation -// 2013 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#ifndef KALDI_UTIL_KALDI_TABLE_INL_H_ -#define KALDI_UTIL_KALDI_TABLE_INL_H_ - -#include <algorithm> -#include "util/kaldi-io.h" -#include "util/text-utils.h" -#include "util/stl-utils.h" // for StringHasher. - - -namespace kaldi { - -/// \addtogroup table_impl_types -/// @{ - -template<class Holder> class SequentialTableReaderImplBase { - public: - typedef typename Holder::T T; - // note that Open takes rxfilename not rspecifier. - virtual bool Open(const std::string &rxfilename) = 0; - virtual bool Done() const = 0; - virtual bool IsOpen() const = 0; - virtual std::string Key() = 0; - virtual const T &Value() = 0; - virtual void FreeCurrent() = 0; - virtual void Next() = 0; - virtual bool Close() = 0; - SequentialTableReaderImplBase() { } - virtual ~SequentialTableReaderImplBase() { } - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(SequentialTableReaderImplBase); -}; - - -// This is the implementation for SequentialTableReader -// when it's actually a script file. -template<class Holder> class SequentialTableReaderScriptImpl: - public SequentialTableReaderImplBase<Holder> { - public: - typedef typename Holder::T T; - - SequentialTableReaderScriptImpl(): state_(kUninitialized) { } - - virtual bool Open(const std::string &rspecifier) { - if (state_ != kUninitialized) - if (! Close()) // call Close() yourself to suppress this exception. - KALDI_ERR << "TableReader::Open, error closing previous input: " - << "rspecifier was " << rspecifier_; - bool binary; - rspecifier_ = rspecifier; - RspecifierType rs = ClassifyRspecifier(rspecifier, &script_rxfilename_, - &opts_); - KALDI_ASSERT(rs == kScriptRspecifier); - if (!script_input_.Open(script_rxfilename_, &binary)) { // Failure on Open - KALDI_WARN << "Failed to open script file " - << PrintableRxfilename(script_rxfilename_); - state_ = kUninitialized; - return false; - } else { // Open succeeded. - if (binary) { // script file should not be binary file.. - state_ = kError; // bad script file. - script_input_.Close(); - return false; - } else { - state_ = kFileStart; - Next(); - if (state_ == kError) { - script_input_.Close(); - return false; - } - if (opts_.permissive) { // Next() will have preloaded. - KALDI_ASSERT(state_ == kLoadSucceeded || state_ == kEof); - } else { - KALDI_ASSERT(state_ == kHaveScpLine || state_ == kEof); - } - return true; // Success. - } - } - } - - virtual bool IsOpen() const { - switch (state_) { - case kEof: case kError: case kHaveScpLine: case kLoadSucceeded: case kLoadFailed: return true; - case kUninitialized: return false; - default: KALDI_ERR << "IsOpen() called on invalid object."; // kFileStart is not valid - // state for user to call something on. - return false; - } - } - - virtual bool Done() const { - switch (state_) { - case kHaveScpLine: return false; - case kLoadSucceeded: case kLoadFailed: return false; - // These cases are because we want LoadCurrent() - // to be callable after Next() and to not change the Done() status [only Next() should change - // the Done() status]. - case kEof: case kError: return true; // Error condition, like Eof, counts as Done(); the destructor - // or Close() will inform the user of the error. - default: KALDI_ERR << "Done() called on TableReader object at the wrong time."; - return false; - } - } - - virtual std::string Key() { - // Valid to call this whenever Done() returns false. - switch (state_) { - case kHaveScpLine: case kLoadSucceeded: case kLoadFailed: break; - default: - // coding error. - KALDI_ERR << "Key() called on TableReader object at the wrong time."; - } - return key_; - } - const T &Value() { - StateType orig_state = state_; - if (state_ == kHaveScpLine) LoadCurrent(); // Takes - // state_ to kLoadSucceeded or kLoadFailed. - if (state_ == kLoadFailed) { // this can happen due to - // a file listed in an scp file not existing, or - // read failure, failure of a command, etc. - if (orig_state == kHaveScpLine) - KALDI_ERR << "TableReader: failed to load object from " - << PrintableRxfilename(data_rxfilename_) - << " (to suppress this error, add the permissive " - << "(p, ) option to the rspecifier."; - - else // orig_state_ was kLoadFailed, which only could have happened - // if the user called FreeCurrent(). - KALDI_ERR << "TableReader: you called Value() after FreeCurrent()."; - } else if (state_ != kLoadSucceeded) { - // This would be a coding error. - KALDI_ERR << "TableReader: Value() called at the wrong time."; - } - return holder_.Value(); - } - void FreeCurrent() { - if (state_ == kLoadSucceeded) { - holder_.Clear(); - state_ = kLoadFailed; - } else { - KALDI_WARN << "TableReader: FreeCurrent called at the wrong time."; - } - } - void Next() { - while (1) { - NextScpLine(); - if (Done()) return; - if (opts_.permissive) { - // Permissive mode means, when reading scp files, we treat keys whose scp entry - // cannot be read as nonexistent. This means trying to read. - if (LoadCurrent()) return; // Success. - // else try the next scp line. - } else { - return; // We go the next key; Value() will crash if we can't - // read the scp line. - } - } - } - - virtual bool Close() { - // Close() will succeed if the stream was not in an error - // state. To clean up, it also closes the Input objects if - // they're open. - if (script_input_.IsOpen()) - script_input_.Close(); - if (data_input_.IsOpen()) - data_input_.Close(); - if (state_ == kLoadSucceeded) - holder_.Clear(); - if (!this->IsOpen()) - KALDI_ERR << "Close() called on input that was not open."; - StateType old_state = state_; - state_ = kUninitialized; - if (old_state == kError) { - if (opts_.permissive) { - KALDI_WARN << "Close() called on scp file with read error, ignoring the " - "error because permissive mode specified."; - return true; - } else return false; // User will do something with the error status. - } else return true; - } - - virtual ~SequentialTableReaderScriptImpl() { - if (state_ == kError) - KALDI_ERR << "TableReader: reading script file failed: from scp " - << PrintableRxfilename(script_rxfilename_); - // If you don't want this exception to be thrown you can - // call Close() and check the status. - if (state_ == kLoadSucceeded) - holder_.Clear(); - } - private: - bool LoadCurrent() { - // Attempts to load object whose rxfilename is on the current scp line. - if (state_ != kHaveScpLine) - KALDI_ERR << "TableReader: LoadCurrent() called at the wrong time."; - bool ans; - // note, NULL means it doesn't read the binary-mode header - if (Holder::IsReadInBinary()) ans = data_input_.Open(data_rxfilename_, NULL); - else ans = data_input_.OpenTextMode(data_rxfilename_); - if (!ans) { - // May want to make this warning a VLOG at some point - KALDI_WARN << "TableReader: failed to open file " - << PrintableRxfilename(data_rxfilename_); - state_ = kLoadFailed; - return false; - } else { - if (holder_.Read(data_input_.Stream())) { - state_ = kLoadSucceeded; - return true; - } else { // holder_ will not contain data. - KALDI_WARN << "TableReader: failed to load object from " - << PrintableRxfilename(data_rxfilename_); - state_ = kLoadFailed; - return false; - } - } - } - - // Reads the next line in the script file. - void NextScpLine() { - switch (state_) { - case kLoadSucceeded: holder_.Clear(); break; - case kHaveScpLine: case kLoadFailed: case kFileStart: break; - default: - // No other states are valid to call Next() from. - KALDI_ERR << "Reading script file: Next called wrongly."; - } - std::string line; - if (getline(script_input_.Stream(), line)) { - SplitStringOnFirstSpace(line, &key_, &data_rxfilename_); - if (!key_.empty() && !data_rxfilename_.empty()) { - // Got a valid line. - state_ = kHaveScpLine; - } else { - // Got an invalid line. - state_ = kError; // we can't make sense of this - // scp file and will now die. - } - } else { - state_ = kEof; // nothing more in the scp file. - // Might as well close the input streams as don't need them. - script_input_.Close(); - if (data_input_.IsOpen()) - data_input_.Close(); - } - } - - - Input script_input_; // Input object for the .scp file - Input data_input_; // Input object for the entries in - // the script file. - Holder holder_; // Holds the object. - bool binary_; // Binary-mode archive. - std::string key_; - std::string rspecifier_; - std::string script_rxfilename_; // of the script file. - RspecifierOptions opts_; // options. - std::string data_rxfilename_; // of the file we're reading. - enum StateType { - // [The state of the reading process] [does holder_ [is script_inp_ - // have object] open] - kUninitialized, // Uninitialized or closed. no no - kEof, // We did Next() and found eof in script file. no no - kError, // Some other error no yes - kHaveScpLine, // Just called Open() or Next() and have a no yes - // line of the script file but no data. - kLoadSucceeded, // Called LoadCurrent() and it succeeded. yes yes - kLoadFailed, // Called LoadCurrent() and it failed, no yes - // or the user called FreeCurrent().. note, - // if when called by user we are in this state, - // it means the user called FreeCurrent(). - kFileStart, // [state we only use internally] no yes - } state_; - private: -}; - - -// This is the implementation for SequentialTableReader -// when it's an archive. Note that the archive format is: -// key1 [space] object1 key2 [space] -// object2 ... eof. -// "object1" is the output of the Holder::Write function and will -// typically contain a binary header (in binary mode) and then -// the output of object.Write(os, binary). -// The archive itself does not care whether it is in binary -// or text mode, for reading purposes. - -template<class Holder> class SequentialTableReaderArchiveImpl: - public SequentialTableReaderImplBase<Holder> { - public: - typedef typename Holder::T T; - - SequentialTableReaderArchiveImpl(): state_(kUninitialized) { } - - virtual bool Open(const std::string &rspecifier) { - if (state_ != kUninitialized) { - if (! Close()) { // call Close() yourself to suppress this exception. - if (opts_.permissive) - KALDI_WARN << "TableReader::Open, error closing previous input " - "(only warning, since permissive mode)."; - else - KALDI_ERR << "TableReader::Open, error closing previous input."; - } - } - rspecifier_ = rspecifier; - RspecifierType rs = ClassifyRspecifier(rspecifier, - &archive_rxfilename_, - &opts_); - KALDI_ASSERT(rs == kArchiveRspecifier); - - bool ans; - // NULL means don't expect binary-mode header - if (Holder::IsReadInBinary()) - ans = input_.Open(archive_rxfilename_, NULL); - else - ans = input_.OpenTextMode(archive_rxfilename_); - if (!ans) { // header. - KALDI_WARN << "TableReader: failed to open stream " - << PrintableRxfilename(archive_rxfilename_); - state_ = kUninitialized; // Failure on Open - return false; // User should print the error message. - } - state_ = kFileStart; - Next(); - if (state_ == kError) { - KALDI_WARN << "Error beginning to read archive file (wrong filename?): " - << PrintableRxfilename(archive_rxfilename_); - input_.Close(); - state_ = kUninitialized; - return false; - } - KALDI_ASSERT(state_ == kHaveObject || state_ == kEof); - return true; - } - - virtual void Next() { - switch (state_) { - case kHaveObject: - holder_.Clear(); break; - case kFileStart: case kFreedObject: - break; - default: - KALDI_ERR << "TableReader: Next() called wrongly."; - } - std::istream &is = input_.Stream(); - is.clear(); // Clear any fail bits that may have been set... just in case - // this happened in the Read function. - is >> key_; // This eats up any leading whitespace and gets the string. - if (is.eof()) { - state_ = kEof; - return; - } - if (is.fail()) { // This shouldn't really happen, barring file-system errors. - KALDI_WARN << "Error reading archive " - << PrintableRxfilename(archive_rxfilename_); - state_ = kError; - return; - } - int c; - if ((c = is.peek()) != ' ' && c != '\t' && c != '\n') { // We expect a space ' ' after the key. - // We also allow tab [which is consumed] and newline [which is not], just - // so we can read archives generated by scripts that may not be fully - // aware of how this format works. - KALDI_WARN << "Invalid archive file format: expected space after key " - << key_ << ", got character " - << CharToString(static_cast<char>(is.peek())) << ", reading " - << PrintableRxfilename(archive_rxfilename_); - state_ = kError; - return; - } - if (c != '\n') is.get(); // Consume the space or tab. - if (holder_.Read(is)) { - state_ = kHaveObject; - return; - } else { - KALDI_WARN << "Object read failed, reading archive " - << PrintableRxfilename(archive_rxfilename_); - state_ = kError; - return; - } - } - - virtual bool IsOpen() const { - switch (state_) { - case kEof: case kError: case kHaveObject: case kFreedObject: return true; - case kUninitialized: return false; - default: KALDI_ERR << "IsOpen() called on invalid object."; // kFileStart is not valid - // state for user to call something on. - return false; - } - } - - virtual bool Done() const { - switch (state_) { - case kHaveObject: - return false; - case kEof: case kError: - return true; // Error-state counts as Done(), but destructor - // will fail (unless you check the status with Close()). - default: - KALDI_ERR << "Done() called on TableReader object at the wrong time."; - return false; - } - } - - virtual std::string Key() { - // Valid to call this whenever Done() returns false - switch (state_) { - case kHaveObject: break; // only valid case. - default: - // coding error. - KALDI_ERR << "Key() called on TableReader object at the wrong time."; - } - return key_; - } - const T &Value() { - switch (state_) { - case kHaveObject: - break; // only valid case. - default: - // coding error. - KALDI_ERR << "Value() called on TableReader object at the wrong time."; - } - return holder_.Value(); - } - virtual void FreeCurrent() { - if (state_ == kHaveObject) { - holder_.Clear(); - state_ = kFreedObject; - } else - KALDI_WARN << "TableReader: FreeCurernt called at the wrong time."; - } - - virtual bool Close() { - if (! this->IsOpen()) - KALDI_ERR << "Close() called on TableReader twice or otherwise wrongly."; - if (input_.IsOpen()) - input_.Close(); - if (state_ == kHaveObject) - holder_.Clear(); - bool ans; - if (opts_.permissive) { - ans = true; // always return success. - if (state_ == kError) - KALDI_WARN << "Error detected closing TableReader for archive " - << PrintableRxfilename(archive_rxfilename_) << " but ignoring " - << "it as permissive mode specified."; - } else - ans = (state_ != kError); // If error state, user should detect it. - state_ = kUninitialized; - return ans; - } - - virtual ~SequentialTableReaderArchiveImpl() { - if (state_ == kError) { - if (opts_.permissive) - KALDI_WARN << "Error detected closing TableReader for archive " - << PrintableRxfilename(archive_rxfilename_) << " but ignoring " - << "it as permissive mode specified."; - else - KALDI_ERR << "TableReader: error detected closing archive " - << PrintableRxfilename(archive_rxfilename_); - } - // If you don't want this exception to be thrown you can - // call Close() and check the status. - if (state_ == kHaveObject) - holder_.Clear(); - } - private: - Input input_; // Input object for the archive - Holder holder_; // Holds the object. - std::string key_; - std::string rspecifier_; - std::string archive_rxfilename_; - RspecifierOptions opts_; - enum { // [The state of the reading process] [does holder_ [is input_ - // have object] open] - kUninitialized, // Uninitialized or closed. no no - kFileStart, // [state we use internally: just opened.] no yes - kEof, // We did Next() and found eof in archive no no - kError, // Some other error no no - kHaveObject, // We read the key and the object after it. yes yes - kFreedObject, // The user called FreeCurrent(). no yes - } state_; -}; - - -template<class Holder> -SequentialTableReader<Holder>::SequentialTableReader(const std::string &rspecifier): impl_(NULL) { - if (rspecifier != "" && !Open(rspecifier)) - KALDI_ERR << "Error constructing TableReader: rspecifier is " << rspecifier; -} - -template<class Holder> -bool SequentialTableReader<Holder>::Open(const std::string &rspecifier) { - if (IsOpen()) - if (!Close()) - KALDI_ERR << "Could not close previously open object."; - // now impl_ will be NULL. - - RspecifierType wt = ClassifyRspecifier(rspecifier, NULL, NULL); - switch (wt) { - case kArchiveRspecifier: - impl_ = new SequentialTableReaderArchiveImpl<Holder>(); - break; - case kScriptRspecifier: - impl_ = new SequentialTableReaderScriptImpl<Holder>(); - break; - case kNoRspecifier: default: - KALDI_WARN << "Invalid rspecifier " << rspecifier; - return false; - } - if (!impl_->Open(rspecifier)) { - delete impl_; - impl_ = NULL; - return false; // sub-object will have printed warnings. - } - else return true; -} - -template<class Holder> -bool SequentialTableReader<Holder>::Close() { - CheckImpl(); - bool ans = impl_->Close(); - delete impl_; // We don't keep around empty impl_ objects. - impl_ = NULL; - return ans; -} - - -template<class Holder> -bool SequentialTableReader<Holder>::IsOpen() const { - return (impl_ != NULL); // Because we delete the object whenever - // that object is not open. Thus, the IsOpen functions of the - // Impl objects are not really needed. -} - -template<class Holder> -std::string SequentialTableReader<Holder>::Key() { - CheckImpl(); - return impl_->Key(); // this call may throw if called wrongly in other ways, - // e.g. eof. -} - - -template<class Holder> -void SequentialTableReader<Holder>::FreeCurrent() { - CheckImpl(); - impl_->FreeCurrent(); -} - - -template<class Holder> -const typename SequentialTableReader<Holder>::T & -SequentialTableReader<Holder>::Value() { - CheckImpl(); - return impl_->Value(); // This may throw (if LoadCurrent() returned false you are safe.). -} - - -template<class Holder> -void SequentialTableReader<Holder>::Next() { - CheckImpl(); - impl_->Next(); -} - -template<class Holder> -bool SequentialTableReader<Holder>::Done() { - CheckImpl(); - return impl_->Done(); -} - - -template<class Holder> -SequentialTableReader<Holder>::~SequentialTableReader() { - if (impl_) delete impl_; - // Destructor of impl_ may throw. -} - - - -template<class Holder> class TableWriterImplBase { - public: - typedef typename Holder::T T; - - virtual bool Open(const std::string &wspecifier) = 0; - - // Write returns true on success, false on failure, but - // some errors may not be detected until we call Close(). - // It throws (via KALDI_ERR) if called wrongly. We could - // have just thrown on all errors, since this is what - // TableWriter does; it was designed this way because originally - // TableWriter::Write returned an exit status. - virtual bool Write(const std::string &key, const T &value) = 0; - - // Flush will flush any archive; it does not return error status, - // any errors will be reported on the next Write or Close. - virtual void Flush() = 0; - - virtual bool Close() = 0; - - virtual bool IsOpen() const = 0; - - // May throw on write error if Close was not called. - virtual ~TableWriterImplBase() { } - - TableWriterImplBase() { } - private: - KALDI_DISALLOW_COPY_AND_ASSIGN(TableWriterImplBase); -}; - - -// The implementation of TableWriter we use when writing directly -// to an archive with no associated scp. -template<class Holder> -class TableWriterArchiveImpl: public TableWriterImplBase<Holder> { - public: - typedef typename Holder::T T; - - virtual bool Open(const std::string &wspecifier) { - switch (state_) { - case kUninitialized: - break; - case kWriteError: - KALDI_ERR << "TableWriter: opening stream, already open with write error."; - case kOpen: default: - if (!Close()) // throw because this error may not have been previously - // detected by the user. - KALDI_ERR << "TableWriter: opening stream, error closing previously open stream."; - } - wspecifier_ = wspecifier; - WspecifierType ws = ClassifyWspecifier(wspecifier, - &archive_wxfilename_, - NULL, - &opts_); - KALDI_ASSERT(ws == kArchiveWspecifier); // or wrongly called. - - if (output_.Open(archive_wxfilename_, opts_.binary, false)) { // false means no binary header. - state_ = kOpen; - return true; - } else { - // stream will not be open. User will report this error - // (we return bool), so don't bother printing anything. - state_ = kUninitialized; - return false; - } - } - - virtual bool IsOpen() const { - switch (state_) { - case kUninitialized: return false; - case kOpen: case kWriteError: return true; - default: KALDI_ERR << "IsOpen() called on TableWriter in invalid state."; - } - return false; - } - - // Write returns true on success, false on failure, but - // some errors may not be detected till we call Close(). - virtual bool Write(const std::string &key, const T &value) { - switch (state_) { - case kOpen: break; - case kWriteError: - // user should have known from the last - // call to Write that there was a problem. - KALDI_WARN << "TableWriter: attempting to write to invalid stream."; - return false; - case kUninitialized: default: - KALDI_ERR << "TableWriter: Write called on invalid stream"; - - } - // state is now kOpen or kWriteError. - if (!IsToken(key)) // e.g. empty string or has spaces... - KALDI_ERR << "TableWriter: using invalid key " << key; - output_.Stream() << key << ' '; - if (!Holder::Write(output_.Stream(), opts_.binary, value)) { - KALDI_WARN << "TableWriter: write failure to " - << PrintableWxfilename(archive_wxfilename_); - state_ = kWriteError; - return false; - } - if (state_ == kWriteError) return false; // Even if this Write seems to have - // succeeded, we fail because a previous Write failed and the archive may be - // corrupted and unreadable. - - if (opts_.flush) - Flush(); - return true; - } - - // Flush will flush any archive; it does not return error status, - // any errors will be reported on the next Write or Close. - virtual void Flush() { - switch (state_) { - case kWriteError: case kOpen: - output_.Stream().flush(); // Don't check error status. - return; - default: - KALDI_WARN << "TableWriter: Flush called on not-open writer."; - } - } - - virtual bool Close() { - if (!this->IsOpen() || !output_.IsOpen()) - KALDI_ERR << "TableWriter: Close called on a stream that was not open." << this->IsOpen() << ", " << output_.IsOpen(); - bool close_success = output_.Close(); - if (!close_success) { - KALDI_WARN << "TableWriter: error closing stream: wspecifier is " - << wspecifier_; - state_ = kUninitialized; - return false; - } - if (state_ == kWriteError) { - KALDI_WARN << "TableWriter: closing writer in error state: wspecifier is " - << wspecifier_; - state_ = kUninitialized; - return false; - } - state_ = kUninitialized; - return true; - } - - TableWriterArchiveImpl(): state_(kUninitialized) {} - - // May throw on write error if Close was not called. - virtual ~TableWriterArchiveImpl() { - if (!IsOpen()) return; - else if (!Close()) - KALDI_ERR << "At TableWriter destructor: Write failed or stream close " - << "failed: wspecifier is "<< wspecifier_; - } - - private: - Output output_; - WspecifierOptions opts_; - std::string wspecifier_; - std::string archive_wxfilename_; - enum { // is stream open? - kUninitialized, // no - kOpen, // yes - kWriteError, // yes - } state_; -}; - - - - -// The implementation of TableWriter we use when writing to -// individual files (more generally, wxfilenames) specified -// in an scp file that we read. - -// Note: the code for this class is similar to RandomAccessTableReaderScriptImpl; -// try to keep them in sync. - -template<class Holder> -class TableWriterScriptImpl: public TableWriterImplBase<Holder> { - public: - typedef typename Holder::T T; - - TableWriterScriptImpl(): last_found_(0), state_(kUninitialized) {} - - virtual bool Open(const std::string &wspecifier) { - switch (state_) { - case kReadScript: - KALDI_ERR << " Opening already open TableWriter: call Close first."; - case kUninitialized: case kNotReadScript: - break; - } - wspecifier_ = wspecifier; - WspecifierType ws = ClassifyWspecifier(wspecifier, - NULL, - &script_rxfilename_, - &opts_); - KALDI_ASSERT(ws == kScriptWspecifier); // or wrongly called. - KALDI_ASSERT(script_.empty()); // no way it could be nonempty at this point. - - if (! ReadScriptFile(script_rxfilename_, - true, // print any warnings - &script_)) { // error reading script file or invalid format - state_ = kNotReadScript; - return false; // no need to print further warnings. user gets the error. - } - std::sort(script_.begin(), script_.end()); - for (size_t i = 0; i+1 < script_.size(); i++) { - if (script_[i].first.compare(script_[i+1].first) >= 0) { - // script[i] not < script[i+1] in lexical order... - KALDI_WARN << "Script file " << PrintableRxfilename(script_rxfilename_) - << " contains duplicate key " << script_[i].first; - state_ = kNotReadScript; - return false; - } - } - state_ = kReadScript; - return true; - } - - virtual bool IsOpen() const { return (state_ == kReadScript); } - - virtual bool Close() { - if (!IsOpen()) - KALDI_ERR << "Close() called on TableWriter that was not open."; - state_ = kUninitialized; - last_found_ = 0; - script_.clear(); - return true; - } - - // Write returns true on success, false on failure, but - // some errors may not be detected till we call Close(). - virtual bool Write(const std::string &key, const T &value) { - if (!IsOpen()) - KALDI_ERR << "TableWriter: Write called on invalid stream"; - - if (!IsToken(key)) // e.g. empty string or has spaces... - KALDI_ERR << "TableWriter: using invalid key " << key; - - std::string wxfilename; - if (!LookupFilename(key, &wxfilename)) { - if (opts_.permissive) { - return true; // In permissive mode, it's as if we're writing to /dev/null - // for missing keys. - } else { - KALDI_WARN << "TableWriter: script file " - << PrintableRxfilename(script_rxfilename_) - << " has no entry for key "<<key; - return false; - } - } - Output output; - if (!output.Open(wxfilename, opts_.binary, false)) { - // Open in the text/binary mode (on Windows) given by member var. "binary" - // (obtained from wspecifier), but do not put the binary-mode header (it - // will be written, if needed, by the Holder::Write function.) - KALDI_WARN << "TableWriter: failed to open stream: " - << PrintableWxfilename(wxfilename); - return false; - } - if (!Holder::Write(output.Stream(), opts_.binary, value) - || !output.Close()) { - KALDI_WARN << "TableWriter: failed to write data to " - << PrintableWxfilename(wxfilename); - return false; - } - return true; - } - - // Flush does nothing in this implementation, there is nothing to flush. - virtual void Flush() { } - - - virtual ~TableWriterScriptImpl() { - // Nothing to do in destructor. - } - - private: - // Note: this function is almost the same as in RandomAccessTableReaderScriptImpl. - bool LookupFilename(const std::string &key, std::string *wxfilename) { - // First, an optimization: if we're going consecutively, this will - // make the lookup very fast. - last_found_++; - if (last_found_ < script_.size() && script_[last_found_].first == key) { - *wxfilename = script_[last_found_].second; - return true; - } - std::pair<std::string, std::string> pr(key, ""); // Important that "" - // compares less than or equal to any string, so lower_bound points to the - // element that has the same key. - typedef typename std::vector<std::pair<std::string, std::string> >::const_iterator - IterType; - IterType iter = std::lower_bound(script_.begin(), script_.end(), pr); - if (iter != script_.end() && iter->first == key) { - last_found_ = iter - script_.begin(); - *wxfilename = iter->second; - return true; - } else { - return false; - } - } - - - WspecifierOptions opts_; - std::string wspecifier_; - std::string script_rxfilename_; - - // the script_ variable contains pairs of (key, filename), sorted using - // std::sort. This can be used with binary_search to look up filenames for - // writing. If this becomes inefficient we can use std::unordered_map (but I - // suspect this wouldn't be significantly faster & would use more memory). - // If memory becomes a problem here, the user should probably be passing - // only the relevant part of the scp file rather than expecting us to get too - // clever in the code. - std::vector<std::pair<std::string, std::string> > script_; - size_t last_found_; // This is for an optimization used in LookupFilename. - - enum { - kUninitialized, - kReadScript, - kNotReadScript, // read of script failed. - } state_; -}; - - -// The implementation of TableWriter we use when writing directly -// to an archive plus an associated scp. -template<class Holder> -class TableWriterBothImpl: public TableWriterImplBase<Holder> { - public: - typedef typename Holder::T T; - - virtual bool Open(const std::string &wspecifier) { - switch (state_) { - case kUninitialized: - break; - case kWriteError: - KALDI_ERR << "TableWriter: opening stream, already open with write error."; - case kOpen: default: - if (!Close()) // throw because this error may not have been previously detected by user. - KALDI_ERR << "TableWriter: opening stream, error closing previously open stream."; - } - wspecifier_ = wspecifier; - WspecifierType ws = ClassifyWspecifier(wspecifier, - &archive_wxfilename_, - &script_wxfilename_, - &opts_); - KALDI_ASSERT(ws == kBothWspecifier); // or wrongly called. - if (ClassifyWxfilename(archive_wxfilename_) != kFileOutput) - KALDI_WARN << "When writing to both archive and script, the script file " - "will generally not be interpreted correctly unless the archive is " - "an actual file: wspecifier = " << wspecifier; - - if (!archive_output_.Open(archive_wxfilename_, opts_.binary, false)) { // false means no binary header. - state_ = kUninitialized; - return false; - } - if (!script_output_.Open(script_wxfilename_, false, false)) { // first false means text mode: - // script files always text-mode. second false means don't write header (doesn't matter - // for text mode). - archive_output_.Close(); // Don't care about status: error anyway. - state_ = kUninitialized; - return false; - } - state_ = kOpen; - return true; - } - - virtual bool IsOpen() const { - switch (state_) { - case kUninitialized: return false; - case kOpen: case kWriteError: return true; - default: KALDI_ERR << "IsOpen() called on TableWriter in invalid state."; - } - return false; - } - - void MakeFilename(typename std::ostream::pos_type streampos, std::string *output) const { - std::ostringstream ss; - ss << ':' << streampos; - KALDI_ASSERT(ss.str() != ":-1"); - *output = archive_wxfilename_ + ss.str(); - - // e.g. /some/file:12302. - // Note that we warned if archive_wxfilename_ is not an actual filename; - // the philosophy is we give the user rope and if they want to hang - // themselves, with it, fine. - } - - // Write returns true on success, false on failure, but - // some errors may not be detected till we call Close(). - virtual bool Write(const std::string &key, const T &value) { - switch (state_) { - case kOpen: break; - case kWriteError: - // user should have known from the last - // call to Write that there was a problem. Warn about it. - KALDI_WARN << "TableWriter: writing to non-open TableWriter object."; - return false; - case kUninitialized: default: - KALDI_ERR << "TableWriter: Write called on invalid stream"; - } - // state is now kOpen or kWriteError. - if (!IsToken(key)) // e.g. empty string or has spaces... - KALDI_ERR << "TableWriter: using invalid key " << key; - std::ostream &archive_os = archive_output_.Stream(); - archive_os << key << ' '; - typename std::ostream::pos_type archive_os_pos = archive_os.tellp(); - // position at start of Write() to archive. We will record this in the script file. - std::string offset_rxfilename; // rxfilename with offset into the archive, - // e.g. some_archive_name.ark:431541423 - MakeFilename(archive_os_pos, &offset_rxfilename); - - // Write to the script file first. - // The idea is that we want to get all the information possible into the - // script file, to make it easier to unwind errors later. - std::ostream &script_os = script_output_.Stream(); - script_output_.Stream() << key << ' ' << offset_rxfilename << '\n'; - - if (!Holder::Write(archive_output_.Stream(), opts_.binary, value)) { - KALDI_WARN << "TableWriter: write failure to" - << PrintableWxfilename(archive_wxfilename_); - state_ = kWriteError; - return false; - } - - if (script_os.fail()) { - KALDI_WARN << "TableWriter: write failure to script file detected: " - << PrintableWxfilename(script_wxfilename_); - state_ = kWriteError; - return false; - } - - if (archive_os.fail()) { - KALDI_WARN << "TableWriter: write failure to archive file detected: " - << PrintableWxfilename(archive_wxfilename_); - state_ = kWriteError; - return false; - } - - if (state_ == kWriteError) return false; // Even if this Write seems to have - // succeeded, we fail because a previous Write failed and the archive may be - // corrupted and unreadable. - - if (opts_.flush) - Flush(); - return true; - } - - // Flush will flush any archive; it does not return error status, - // any errors will be reported on the next Write or Close. - virtual void Flush() { - switch (state_) { - case kWriteError: case kOpen: - archive_output_.Stream().flush(); // Don't check error status. - script_output_.Stream().flush(); // Don't check error status. - return; - default: - KALDI_WARN << "TableWriter: Flush called on not-open writer."; - } - } - - virtual bool Close() { - if (!this->IsOpen()) - KALDI_ERR << "TableWriter: Close called on a stream that was not open."; - bool close_success = true; - if (archive_output_.IsOpen()) - if (!archive_output_.Close()) close_success = false; - if (script_output_.IsOpen()) - if (!script_output_.Close()) close_success = false; - bool ans = close_success && (state_ != kWriteError); - state_ = kUninitialized; - return ans; - } - - TableWriterBothImpl(): state_(kUninitialized) {} - - // May throw on write error if Close() was not called. - // User can get the error status by calling Close(). - virtual ~TableWriterBothImpl() { - if (!IsOpen()) return; - else if (!Close()) - KALDI_ERR << "At TableWriter destructor: Write failed or stream close failed: " - << wspecifier_; - } - - private: - Output archive_output_; - Output script_output_; - WspecifierOptions opts_; - std::string archive_wxfilename_; - std::string script_wxfilename_; - std::string wspecifier_; - enum { // is stream open? - kUninitialized, // no - kOpen, // yes - kWriteError, // yes - } state_; -}; - - -template<class Holder> -TableWriter<Holder>::TableWriter(const std::string &wspecifier): impl_(NULL) { - if (wspecifier != "" && !Open(wspecifier)) { - KALDI_ERR << "TableWriter: failed to write to " - << wspecifier; - } -} - -template<class Holder> -bool TableWriter<Holder>::IsOpen() const { - return (impl_ != NULL); -} - - -template<class Holder> -bool TableWriter<Holder>::Open(const std::string &wspecifier) { - - if (IsOpen()) { - if (!Close()) // call Close() yourself to suppress this exception. - KALDI_ERR << "TableWriter::Open, failed to close previously open writer."; - } - KALDI_ASSERT(impl_ == NULL); - WspecifierType wtype = ClassifyWspecifier(wspecifier, NULL, NULL, NULL); - switch (wtype) { - case kBothWspecifier: - impl_ = new TableWriterBothImpl<Holder>(); - break; - case kArchiveWspecifier: - impl_ = new TableWriterArchiveImpl<Holder>(); - break; - case kScriptWspecifier: - impl_ = new TableWriterScriptImpl<Holder>(); - break; - case kNoWspecifier: default: - KALDI_WARN << "ClassifyWspecifier: invalid wspecifier " << wspecifier; - return false; - } - if (impl_->Open(wspecifier)) return true; - else { // The class will have printed a more specific warning. - delete impl_; - impl_ = NULL; - return false; - } -} - -template<class Holder> -void TableWriter<Holder>::Write(const std::string &key, - const T &value) const { - CheckImpl(); - if (!impl_->Write(key, value)) - KALDI_ERR << "Error in TableWriter::Write"; - // More specific warning will have - // been printed in the Write function. -} - -template<class Holder> -void TableWriter<Holder>::Flush() { - CheckImpl(); - impl_->Flush(); -} - -template<class Holder> -bool TableWriter<Holder>::Close() { - CheckImpl(); - bool ans = impl_->Close(); - delete impl_; // We don't keep around non-open impl_ objects [c.f. definition of IsOpen()] - impl_ = NULL; - return ans; -} - -template<class Holder> -TableWriter<Holder>::~TableWriter() { - if (IsOpen() && !Close()) { - KALDI_ERR << "Error closing TableWriter [in destructor]."; - } -} - - -// Types of RandomAccessTableReader: -// In principle, we would like to have four types of RandomAccessTableReader: -// the 4 combinations [scp, archive], [seekable, not-seekable], -// where if something is seekable we only store a file offset. However, -// it seems sufficient for now to only implement two of these, in both -// cases assuming it's not seekable so we never store file offsets and always -// store either the scp line or the data in the archive. The reasons are: -// (1) -// For scp files, storing the actual entry is not that much more expensive -// than storing the file offsets (since the entries are just filenames), and -// avoids a lot of fseek operations that might be expensive. -// (2) -// For archive files, there is no real reason, if you have the archive file -// on disk somewhere, why you wouldn't access it via its associated scp. -// [i.e. write it as ark, scp]. The main reason to read archives directly -// is if they are part of a pipe, and in this case it's not seekable, so -// we implement only this case. -// -// Note that we will rarely in practice have to keep in memory everything in -// the archive, as long as things are only read once from the archive (the -// "o, " or "once" option) and as long as we keep our keys in sorted order; to take -// advantage of this we need the "s, " (sorted) option, so we would read archives -// as e.g. "s, o, ark:-" (this is the rspecifier we would use if it was the -// standard input and these conditions held). - -template<class Holder> class RandomAccessTableReaderImplBase { - public: - typedef typename Holder::T T; - - virtual bool Open(const std::string &rspecifier) = 0; - - virtual bool HasKey(const std::string &key) = 0; - - virtual const T &Value(const std::string &key) = 0; - - virtual bool Close() = 0; - - virtual ~RandomAccessTableReaderImplBase() {} -}; - - -// Implementation of RandomAccessTableReader for a script file; for simplicity we -// just read it in all in one go, as it's unlikely someone would generate this -// from a pipe. In principle we could read it on-demand as for the archives, but -// this would probably be overkill. - -// Note: the code for this this class is similar to TableWriterScriptImpl: -// try to keep them in sync. -template<class Holder> -class RandomAccessTableReaderScriptImpl: - public RandomAccessTableReaderImplBase<Holder> { - - public: - typedef typename Holder::T T; - - RandomAccessTableReaderScriptImpl(): last_found_(0), state_(kUninitialized) {} - - virtual bool Open(const std::string &rspecifier) { - switch (state_) { - case kNotHaveObject: case kHaveObject: case kGaveObject: - KALDI_ERR << " Opening already open RandomAccessTableReader: call Close first."; - case kUninitialized: case kNotReadScript: - break; - } - rspecifier_ = rspecifier; - RspecifierType rs = ClassifyRspecifier(rspecifier, - &script_rxfilename_, - &opts_); - KALDI_ASSERT(rs == kScriptRspecifier); // or wrongly called. - KALDI_ASSERT(script_.empty()); // no way it could be nonempty at this point. - - if (! ReadScriptFile(script_rxfilename_, - true, // print any warnings - &script_)) { // error reading script file or invalid format - state_ = kNotReadScript; - return false; // no need to print further warnings. user gets the error. - } - - rspecifier_ = rspecifier; - // If opts_.sorted, the user has asserted that the keys are already sorted. - // Although we could easily sort them, we want to let the user know of this - // mistake. This same mistake could have serious effects if used with an - // archive rather than a script. - if (!opts_.sorted) - std::sort(script_.begin(), script_.end()); - for (size_t i = 0; i+1 < script_.size(); i++) { - if (script_[i].first.compare(script_[i+1].first) >= 0) { - // script[i] not < script[i+1] in lexical order... - bool same = (script_[i].first == script_[i+1].first); - KALDI_WARN << "Script file " << PrintableRxfilename(script_rxfilename_) - << (same ? " contains duplicate key: " : - " is not sorted (remove s, option or add ns, option): key is ") - << script_[i].first; - state_ = kNotReadScript; - return false; - } - } - state_ = kNotHaveObject; - return true; - } - - virtual bool IsOpen() const { - return (state_ == kNotHaveObject || state_ == kHaveObject || - state_ == kGaveObject); - } - - virtual bool Close() { - if (!IsOpen()) - KALDI_ERR << "Close() called on RandomAccessTableReader that was not open."; - holder_.Clear(); - state_ = kUninitialized; - last_found_ = 0; - script_.clear(); - current_key_ = ""; - // This one cannot fail because any errors of a "global" - // nature would have been detected when we did Open(). - // With archives it's different. - return true; - } - - virtual bool HasKey(const std::string &key) { - bool preload = opts_.permissive; - // In permissive mode, we have to check that we can read - // the scp entry before we assert that the key is there. - return HasKeyInternal(key, preload); - } - - - // Write returns true on success, false on failure, but - // some errors may not be detected till we call Close(). - virtual const T& Value(const std::string &key) { - - if (!IsOpen()) - KALDI_ERR << "Value() called on non-open object."; - - if (!((state_ == kHaveObject || state_ == kGaveObject) - && key == current_key_)) { // Not already stored... - bool has_key = HasKeyInternal(key, true); // preload. - if (!has_key) - KALDI_ERR << "Could not get item for key " << key - << ", rspecifier is " << rspecifier_ << "[to ignore this, " - << "add the p, (permissive) option to the rspecifier."; - KALDI_ASSERT(state_ == kHaveObject && key == current_key_); - } - - if (state_ == kHaveObject) { - state_ = kGaveObject; - if (opts_.once) MakeTombstone(key); // make sure that future lookups fail. - return holder_.Value(); - } else { // state_ == kGaveObject - if (opts_.once) - KALDI_ERR << "Value called twice for the same key and ,o (once) option " - << "is used: rspecifier is " << rspecifier_; - return holder_.Value(); - } - } - - virtual ~RandomAccessTableReaderScriptImpl() { - if (state_ == kHaveObject || state_ == kGaveObject) - holder_.Clear(); - } - - private: - // HasKeyInternal when called with preload == false just tells us whether the - // key is in the scp. With preload == true, which happens when the ,p - // (permissive) option is given in the rspecifier, it will also check that we - // can preload the object from disk (loading from the rxfilename in the scp), - // and only return true if we can. This function is called both from HasKey - // and from Value(). - virtual bool HasKeyInternal(const std::string &key, bool preload) { - switch (state_) { - case kUninitialized: case kNotReadScript: - KALDI_ERR << "HasKey called on RandomAccessTableReader object that is not open."; - case kHaveObject: case kGaveObject: - if (key == current_key_) - return true; - break; - default: break; - } - KALDI_ASSERT(IsToken(key)); - size_t key_pos = 0; // set to zero to suppress warning - bool ans = LookupKey(key, &key_pos); - if (!ans) return false; - else { - // First do a check regarding the "once" option. - if (opts_.once && script_[key_pos].second == "") { // A "tombstone"; user is asking about - // already-read key. - KALDI_ERR << "HasKey called on key whose value was already read, and " - " you specified the \"once\" option (o, ): try removing o, or adding no, :" - " rspecifier is " << rspecifier_; - } - if (!preload) - return true; // we have the key. - else { // preload specified, so we have to pre-load the object before returning true. - if (!input_.Open(script_[key_pos].second)) { - KALDI_WARN << "Error opening stream " - << PrintableRxfilename(script_[key_pos].second); - return false; - } else { - // Make sure holder empty. - if (state_ == kHaveObject || state_ == kGaveObject) - holder_.Clear(); - if (holder_.Read(input_.Stream())) { - state_ = kHaveObject; - current_key_ = key; - return true; - } else { - KALDI_WARN << "Error reading object from " - "stream " << PrintableRxfilename(script_[key_pos].second); - state_ = kNotHaveObject; - return false; - } - } - } - } - } - void MakeTombstone(const std::string &key) { - size_t offset; - if (!LookupKey(key, &offset)) - KALDI_ERR << "RandomAccessTableReader object in inconsistent state."; - else - script_[offset].second = ""; - } - bool LookupKey(const std::string &key, size_t *script_offset) { - // First, an optimization: if we're going consecutively, this will - // make the lookup very fast. Since we may call HasKey and then - // Value(), which both may look up the key, we test if either the - // current or next position are correct. - if (last_found_ < script_.size() && script_[last_found_].first == key) { - *script_offset = last_found_; - return true; - } - last_found_++; - if (last_found_ < script_.size() && script_[last_found_].first == key) { - *script_offset = last_found_; - return true; - } - std::pair<std::string, std::string> pr(key, ""); // Important that "" - // compares less than or equal to any string, so lower_bound points to the - // element that has the same key. - typedef typename std::vector<std::pair<std::string, std::string> >::const_iterator - IterType; - IterType iter = std::lower_bound(script_.begin(), script_.end(), pr); - if (iter != script_.end() && iter->first == key) { - last_found_ = *script_offset = iter - script_.begin(); - return true; - } else { - return false; - } - } - - - Input input_; // Use the same input_ object for reading each file, in case - // the scp specifies offsets in an archive (so we can keep the same file open). - RspecifierOptions opts_; - std::string rspecifier_; // rspecifier used to open it; used in debug messages - std::string script_rxfilename_; // filename of script. - - std::string current_key_; // Key of object in holder_ - Holder holder_; - - // the script_ variable contains pairs of (key, filename), sorted using - // std::sort. This can be used with binary_search to look up filenames for - // writing. If this becomes inefficient we can use std::unordered_map (but I - // suspect this wouldn't be significantly faster & would use more memory). - // If memory becomes a problem here, the user should probably be passing - // only the relevant part of the scp file rather than expecting us to get too - // clever in the code. - std::vector<std::pair<std::string, std::string> > script_; - size_t last_found_; // This is for an optimization used in FindFilename. - - enum { // [Do we have [Does holder_ - // script_ set up?] contain object?] - kUninitialized, // no no - kNotReadScript, // no no - kNotHaveObject, // yes no - kHaveObject, // yes yes - kGaveObject, // yes yes - // [kGaveObject is as kHaveObject but we note that the - // user has already read it; this is for checking that - // if "once" is specified, the user actually only reads - // it once. - } state_; - -}; - - - - -// This is the base-class (with some implemented functions) for the -// implementations of RandomAccessTableReader when it's an archive. This -// base-class handles opening the files, storing the state of the reading -// process, and loading objects. This is the only case in which we have -// an intermediate class in the hierarchy between the virtual ImplBase -// class and the actual Impl classes. -// The child classes vary in the assumptions regarding sorting, etc. - -template<class Holder> class RandomAccessTableReaderArchiveImplBase: - public RandomAccessTableReaderImplBase<Holder> { - public: - typedef typename Holder::T T; - - RandomAccessTableReaderArchiveImplBase(): holder_(NULL), state_(kUninitialized) { } - - virtual bool Open(const std::string &rspecifier) { - if (state_ != kUninitialized) { - if (! this->Close()) // call Close() yourself to suppress this exception. - KALDI_ERR << "TableReader::Open, error closing previous input."; - } - rspecifier_ = rspecifier; - RspecifierType rs = ClassifyRspecifier(rspecifier, &archive_rxfilename_, - &opts_); - KALDI_ASSERT(rs == kArchiveRspecifier); - - // NULL means don't expect binary-mode header - bool ans; - if (Holder::IsReadInBinary()) - ans = input_.Open(archive_rxfilename_, NULL); - else - ans = input_.OpenTextMode(archive_rxfilename_); - if (!ans) { // header. - KALDI_WARN << "TableReader: failed to open stream " - << PrintableRxfilename(archive_rxfilename_); - state_ = kUninitialized; // Failure on Open - return false; // User should print the error message. - } else { - state_ = kNoObject; - } - return true; - } - - // ReadNextObject() requires that the state be kNoObject, - // and it will try read the next object. If it succeeds, - // it sets the state to kHaveObject, and - // cur_key_ and holder_ have the key and value. If it fails, - // it sets the state to kError or kEof. - void ReadNextObject() { - if (state_ != kNoObject) - KALDI_ERR << "TableReader: ReadNextObject() called from wrong state."; // Code error - // somewhere in this class or a child class. - std::istream &is = input_.Stream(); - is.clear(); // Clear any fail bits that may have been set... just in case - // this happened in the Read function. - is >> cur_key_; // This eats up any leading whitespace and gets the string. - if (is.eof()) { - state_ = kEof; - return; - } - if (is.fail()) { // This shouldn't really happen, barring file-system errors. - KALDI_WARN << "Error reading archive: rspecifier is " << rspecifier_; - state_ = kError; - return; - } - int c; - if ((c = is.peek()) != ' ' && c != '\t' && c != '\n') { // We expect a space ' ' after the key. - // We also allow tab, just so we can read archives generated by scripts that may - // not be fully aware of how this format works. - KALDI_WARN << "Invalid archive file format: expected space after key " <<cur_key_ - <<", got character " - << CharToString(static_cast<char>(is.peek())) << ", reading archive " - << PrintableRxfilename(archive_rxfilename_); - state_ = kError; - return; - } - if (c != '\n') is.get(); // Consume the space or tab. - holder_ = new Holder; - if (holder_->Read(is)) { - state_ = kHaveObject; - return; - } else { - KALDI_WARN << "Object read failed, reading archive " - << PrintableRxfilename(archive_rxfilename_); - state_ = kError; - delete holder_; - holder_ = NULL; - return; - } - } - - virtual bool IsOpen() const { - switch (state_) { - case kEof: case kError: case kHaveObject: case kNoObject: return true; - case kUninitialized: return false; - default: KALDI_ERR << "IsOpen() called on invalid object."; - return false; - } - } - - // Called by the child-class virutal Close() functions; does the - // shared parts of the cleanup. - bool CloseInternal() { - if (! this->IsOpen()) - KALDI_ERR << "Close() called on TableReader twice or otherwise wrongly."; - if (input_.IsOpen()) - input_.Close(); - if (state_ == kHaveObject) { - KALDI_ASSERT(holder_ != NULL); - delete holder_; - holder_ = NULL; - } else KALDI_ASSERT(holder_ == NULL); - bool ans = (state_ != kError); - state_ = kUninitialized; - if (!ans && opts_.permissive) { - KALDI_WARN << "Error state detected closing reader. " - << "Ignoring it because you specified permissive mode."; - return true; - } - return ans; - } - - ~RandomAccessTableReaderArchiveImplBase() { - // The child class has the responsibility to call CloseInternal(). - KALDI_ASSERT(state_ == kUninitialized && holder_ == NULL); - } - private: - Input input_; // Input object for the archive - protected: - // The variables below are accessed by child classes. - - std::string cur_key_; // current key (if state == kHaveObject). - Holder *holder_; // Holds the object we just read (if state == kHaveObject) - - std::string rspecifier_; - std::string archive_rxfilename_; - RspecifierOptions opts_; - - enum { // [The state of the reading process] [does holder_ [is input_ - // have object] open] - kUninitialized, // Uninitialized or closed no no - kNoObject, // Do not have object in holder_ no yes - kHaveObject, // Have object in holder_ yes yes - kEof, // End of file no yes - kError, // Some kind of error-state in the reading. no yes - } state_; - -}; - - -// RandomAccessTableReaderDSortedArchiveImpl (DSorted for "doubly sorted") is the -// implementation for random-access reading of archives when both the archive, -// and the calling code, are in sorted order (i.e. we ask for the keys in sorted -// order). This is when the s and cs options are both given. It only ever has -// to keep one object in memory. It inherits from -// RandomAccessTableReaderArchiveImplBase which implements the common parts of -// RandomAccessTableReader that are used when it's an archive we're reading from. - -template<class Holder> class RandomAccessTableReaderDSortedArchiveImpl: - public RandomAccessTableReaderArchiveImplBase<Holder> { - using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized; - using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject; - using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject; - using RandomAccessTableReaderArchiveImplBase<Holder>::kEof; - using RandomAccessTableReaderArchiveImplBase<Holder>::kError; - using RandomAccessTableReaderArchiveImplBase<Holder>::state_; - using RandomAccessTableReaderArchiveImplBase<Holder>::opts_; - using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_; - using RandomAccessTableReaderArchiveImplBase<Holder>::holder_; - using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_; - using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_; - using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject; - public: - typedef typename Holder::T T; - - RandomAccessTableReaderDSortedArchiveImpl() { } - - virtual bool Close() { - // We don't have anything additional to clean up, so just - // call generic base-class one. - return this->CloseInternal(); - } - - virtual bool HasKey(const std::string &key) { - return FindKeyInternal(key); - } - virtual const T & Value(const std::string &key) { - if (FindKeyInternal(key)) { - KALDI_ASSERT(this->state_ == kHaveObject && key == this->cur_key_ - && holder_ != NULL); - return this->holder_->Value(); - } else { - KALDI_ERR << "Value() called but no such key " << key - << " in archive " << PrintableRxfilename(archive_rxfilename_); - return *(const T*)NULL; // keep compiler happy. - } - } - - virtual ~RandomAccessTableReaderDSortedArchiveImpl() { - if (this->IsOpen()) - if (!Close()) // more specific warning will already have been printed. - // we are in some kind of error state & user did not find out by - // calling Close(). - KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is " - << rspecifier_; - } - private: - // FindKeyInternal tries to find the key by calling "ReadNextObject()" - // as many times as necessary till we get to it. It is called from - // both FindKey and Value(). - bool FindKeyInternal(const std::string &key) { - // First check that the user is calling us right: should be - // in sorted order. If not, error. - if (!last_requested_key_.empty()) { - if (key.compare(last_requested_key_) < 0) { // key < last_requested_key_ - KALDI_ERR << "You provided the \"cs\" option " - << "but are not calling with keys in sorted order: " - << key << " < " << last_requested_key_ << ": rspecifier is " - << rspecifier_; - } - } - // last_requested_key_ is just for debugging of order of calling. - last_requested_key_ = key; - - if (state_ == kNoObject) - ReadNextObject(); // This can only happen - // once, the first time someone calls HasKey() or Value(). We don't - // do it in the initializer to stop the program hanging too soon, - // if reading from a pipe. - - if (state_ == kEof || state_ == kError) return false; - - if (state_ == kUninitialized) - KALDI_ERR << "Trying to access a RandomAccessTableReader object that is not open."; - - std::string last_key_; // To check that - // the archive we're reading is in sorted order. - while (1) { - KALDI_ASSERT(state_ == kHaveObject); - int compare = key.compare(cur_key_); - if (compare == 0) { // key == key_ - return true; // we got it.. - } else if (compare < 0) { // key < cur_key_, so we already read past the - // place where we want to be. This implies that we will never find it - // [due to the sorting etc., this means it just isn't in the archive]. - return false; - } else { // compare > 0, key > cur_key_. We need to read further ahead. - last_key_ = cur_key_; - // read next object.. we have to set state to kNoObject first. - KALDI_ASSERT(holder_ != NULL); - delete holder_; - holder_ = NULL; - state_ = kNoObject; - ReadNextObject(); - if (state_ != kHaveObject) - return false; // eof or read error. - if (cur_key_.compare(last_key_) <= 0) { - KALDI_ERR << "You provided the \"s\" option " - << " (sorted order), but keys are out of order or duplicated: " - << last_key_ << " is followed by " << cur_key_ - << ": rspecifier is " << rspecifier_; - } - } - } - } - - /// Last string provided to HasKey() or Value(); - std::string last_requested_key_; - - -}; - -// RandomAccessTableReaderSortedArchiveImpl is for random-access reading of -// archives when the user specified the sorted (s) option but not the -// called-sorted (cs) options. -template<class Holder> class RandomAccessTableReaderSortedArchiveImpl: - public RandomAccessTableReaderArchiveImplBase<Holder> { - using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized; - using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject; - using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject; - using RandomAccessTableReaderArchiveImplBase<Holder>::kEof; - using RandomAccessTableReaderArchiveImplBase<Holder>::kError; - using RandomAccessTableReaderArchiveImplBase<Holder>::state_; - using RandomAccessTableReaderArchiveImplBase<Holder>::opts_; - using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_; - using RandomAccessTableReaderArchiveImplBase<Holder>::holder_; - using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_; - using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_; - using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject; - - public: - typedef typename Holder::T T; - - RandomAccessTableReaderSortedArchiveImpl(): - last_found_index_(static_cast<size_t>(-1)), - pending_delete_(static_cast<size_t>(-1)) { } - - virtual bool Close() { - for (size_t i = 0; i < seen_pairs_.size(); i++) - if (seen_pairs_[i].second) - delete seen_pairs_[i].second; - seen_pairs_.clear(); - - pending_delete_ = static_cast<size_t>(-1); - last_found_index_ = static_cast<size_t>(-1); - - return this->CloseInternal(); - } - virtual bool HasKey(const std::string &key) { - HandlePendingDelete(); - size_t index; - bool ans = FindKeyInternal(key, &index); - if (ans && opts_.once && seen_pairs_[index].second == NULL) { - // Just do a check RE the once option. "&&opts_.once" is for - // efficiency since this can only happen in that case. - KALDI_ERR << "Error: HasKey called after Value() already called for " - << " that key, and once (o) option specified: rspecifier is " - << rspecifier_; - } - return ans; - } - virtual const T & Value(const std::string &key) { - HandlePendingDelete(); - size_t index; - if (FindKeyInternal(key, &index)) { - if (seen_pairs_[index].second == NULL) { // can happen if opts.once_ - KALDI_ERR << "Error: Value() called more than once for key " - << key << " and once (o) option specified: rspecifier is " - << rspecifier_; - } - if (opts_.once) - pending_delete_ = index; // mark this index to be deleted on next call. - return seen_pairs_[index].second->Value(); - } else { - KALDI_ERR << "Value() called but no such key " << key - << " in archive " << PrintableRxfilename(archive_rxfilename_); - return *(const T*)NULL; // keep compiler happy. - } - } - virtual ~RandomAccessTableReaderSortedArchiveImpl() { - if (this->IsOpen()) - if (!Close()) // more specific warning will already have been printed. - // we are in some kind of error state & user did not find out by - // calling Close(). - KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is " - << rspecifier_; - } - private: - void HandlePendingDelete() { - const size_t npos = static_cast<size_t>(-1); - if (pending_delete_ != npos) { - KALDI_ASSERT(pending_delete_ < seen_pairs_.size()); - KALDI_ASSERT(seen_pairs_[pending_delete_].second != NULL); - delete seen_pairs_[pending_delete_].second; - seen_pairs_[pending_delete_].second = NULL; - pending_delete_ = npos; - } - } - - // FindKeyInternal tries to find the key in the array "seen_pairs_". - // If it is not already there, it reads ahead as far as necessary - // to determine whether we have the key or not. On success it returns - // true and puts the index into the array seen_pairs_, into "index"; - // on failure it returns false. - // It will leave the state as either kNoObject, kEof or kError. - // FindKeyInternal does not do any checking about whether you are asking - // about a key that has been already given (with the "once" option). - // That is the user's responsibility. - - bool FindKeyInternal(const std::string &key, size_t *index) { - // First, an optimization in case the previous call was for the - // same key, and we found it. - if (last_found_index_ < seen_pairs_.size() - && seen_pairs_[last_found_index_].first == key) { - *index = last_found_index_; - return true; - } - - if (state_ == kUninitialized) - KALDI_ERR << "Trying to access a RandomAccessTableReader object that is not open."; - - // Step one is to see whether we have to read ahead for the object.. - // Note, the possible states right now are kNoObject, kEof or kError. - // We are never in the state kHaveObject except just after calling - // ReadNextObject(). - bool looped = false; - while (state_ == kNoObject && - (seen_pairs_.empty() || key.compare(seen_pairs_.back().first) > 0)) { - looped = true; - // Read this as: - // while ( the stream is potentially good for reading && - // ([got no keys] || key > most_recent_key) ) { ... - // Try to read a new object. - // Note that the keys in seen_pairs_ are ordered from least to greatest. - ReadNextObject(); - if (state_ == kHaveObject) { // Successfully read object. - if (!seen_pairs_.empty() && // This is just a check. - cur_key_.compare(seen_pairs_.back().first) <= 0) { - // read the expression above as: !( cur_key_ > previous_key). - // it means we are not in sorted order [the user specified that we - // are, or we would not be using this implementation]. - KALDI_ERR << "You provided the sorted (s) option but keys in archive " - << PrintableRxfilename(archive_rxfilename_) << " are not " - << "in sorted order: " << seen_pairs_.back().first - << " is followed by " << cur_key_; - } - KALDI_ASSERT(holder_ != NULL); - seen_pairs_.push_back(std::make_pair(cur_key_, holder_)); - holder_ = NULL; - state_ = kNoObject; - } - } - if (looped) { // We only need to check the last element of the seen_pairs_ array, - // since we would not have read more after getting "key". - if (!seen_pairs_.empty() && seen_pairs_.back().first == key) { - last_found_index_ = *index = seen_pairs_.size() - 1; - return true; - } else return false; - } - // Now we have do an actual binary search in the seen_pairs_ array. - std::pair<std::string, Holder*> pr(key, static_cast<Holder*>(NULL)); - typename std::vector<std::pair<std::string, Holder*> >::iterator - iter = std::lower_bound(seen_pairs_.begin(), seen_pairs_.end(), - pr, PairCompare()); - if (iter != seen_pairs_.end() && - key == iter->first) { - last_found_index_ = *index = (iter - seen_pairs_.begin()); - return true; - } else return false; - } - - // These are the pairs of (key, object) we have read. We keep all the keys we - // have read but the actual objects (if they are stored with pointers inside - // the Holder object) may be deallocated if once == true, and the Holder - // pointer set to NULL. - std::vector<std::pair<std::string, Holder*> > seen_pairs_; - size_t last_found_index_; // An optimization s.t. if FindKeyInternal called twice with - // same key (as it often will), it doesn't have to do the key search twice. - size_t pending_delete_; // If opts_.once == true, this is the index of - // element of seen_pairs_ that is pending deletion. - struct PairCompare { - // PairCompare is the Less-than operator for the pairs of(key, Holder). - // compares the keys. - inline bool operator() (const std::pair<std::string, Holder*> &pr1, - const std::pair<std::string, Holder*> &pr2) { - return (pr1.first.compare(pr2.first) < 0); - } - }; -}; - - - -// RandomAccessTableReaderUnsortedArchiveImpl is for random-access reading of -// archives when the user does not specify the sorted (s) option (in this case -// the called-sorted, or "cs" option, is ignored). This is the least efficient -// of the random access archive readers, in general, but it can be as efficient -// as the others, in speed, memory and latency, if the "once" option is specified -// and it happens that the keys of the archive are the same as the keys the code -// is called with (to HasKey() and Value()), and in the same order. However, if -// you ask it for a key that's not present it will have to read the archive till -// the end and store it all in memory. - -template<class Holder> class RandomAccessTableReaderUnsortedArchiveImpl: - public RandomAccessTableReaderArchiveImplBase<Holder> { - using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized; - using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject; - using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject; - using RandomAccessTableReaderArchiveImplBase<Holder>::kEof; - using RandomAccessTableReaderArchiveImplBase<Holder>::kError; - using RandomAccessTableReaderArchiveImplBase<Holder>::state_; - using RandomAccessTableReaderArchiveImplBase<Holder>::opts_; - using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_; - using RandomAccessTableReaderArchiveImplBase<Holder>::holder_; - using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_; - using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_; - using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject; - - typedef typename Holder::T T; - - public: - RandomAccessTableReaderUnsortedArchiveImpl(): to_delete_iter_(map_.end()), - to_delete_iter_valid_(false) - { - map_.max_load_factor(0.5); // make it quite empty -> quite efficient. - // default seems to be 1. - } - - virtual bool Close() { - for (typename MapType::iterator iter = map_.begin(); - iter != map_.end(); - ++iter) { - if (iter->second) - delete iter->second; - } - map_.clear(); - first_deleted_string_ = ""; - to_delete_iter_valid_ = false; - return this->CloseInternal(); - } - - virtual bool HasKey(const std::string &key) { - HandlePendingDelete(); - return FindKeyInternal(key, NULL); - } - virtual const T & Value(const std::string &key) { - HandlePendingDelete(); - const T *ans_ptr = NULL; - if (FindKeyInternal(key, &ans_ptr)) - return *ans_ptr; - else - KALDI_ERR << "Value() called but no such key " << key - << " in archive " << PrintableRxfilename(archive_rxfilename_); - return *(const T*)NULL; // keep compiler happy. - } - virtual ~RandomAccessTableReaderUnsortedArchiveImpl() { - if (this->IsOpen()) - if (!Close()) // more specific warning will already have been printed. - // we are in some kind of error state & user did not find out by - // calling Close(). - KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is " - << rspecifier_; - } - private: - void HandlePendingDelete() { - if (to_delete_iter_valid_) { - to_delete_iter_valid_ = false; - delete to_delete_iter_->second; // Delete Holder object. - if (first_deleted_string_.length() == 0) - first_deleted_string_ = to_delete_iter_->first; - map_.erase(to_delete_iter_); // delete that element. - } - } - - // FindKeyInternal tries to find the key in the map "map_" - // If it is not already there, it reads ahead either until it finds the - // key, or until end of file. If called with value_ptr == NULL, - // it assumes it's called from HasKey() and just returns true or false - // and doesn't otherwise have side effects. If called with value_ptr != - // NULL, it assumes it's called from Value(). Thus, it will crash - // if it cannot find the key. If it can find it it puts its address in - // *value_ptr, and if opts_once == true it will mark that element of the - // map to be deleted. - - bool FindKeyInternal(const std::string &key, const T **value_ptr = NULL) { - typename MapType::iterator iter = map_.find(key); - if (iter != map_.end()) { // Found in the map... - if (value_ptr == NULL) { // called from HasKey - return true; // this is all we have to do. - } else { - *value_ptr = &(iter->second->Value()); - if (opts_.once) { // value won't be needed again, so mark - // for deletion. - to_delete_iter_ = iter; // pending delete. - KALDI_ASSERT(!to_delete_iter_valid_); - to_delete_iter_valid_ = true; - } - return true; - } - } - while (state_ == kNoObject) { - ReadNextObject(); - if (state_ == kHaveObject) { // Successfully read object. - state_ = kNoObject; // we are about to transfer ownership - // of the object in holder_ to map_. - // Insert it into map_. - std::pair<typename MapType::iterator, bool> pr = - map_.insert(typename MapType::value_type(cur_key_, holder_)); - - if (!pr.second) { // Was not inserted-- previous element w/ same key - delete holder_; // map was not changed, no ownership transferred. - holder_ = NULL; - KALDI_ERR << "Error in RandomAccessTableReader: duplicate key " - << cur_key_ << " in archive " << archive_rxfilename_; - } - holder_ = NULL; // ownership transferred to map_. - if (cur_key_ == key) { // the one we wanted.. - if (value_ptr == NULL) { // called from HasKey - return true; - } else { // called from Value() - *value_ptr = &(pr.first->second->Value()); // this gives us the - // Value() from the Holder in the map. - if (opts_.once) { // mark for deletion, as won't be needed again. - to_delete_iter_ = pr.first; - KALDI_ASSERT(!to_delete_iter_valid_); - to_delete_iter_valid_ = true; - } - return true; - } - } - } - } - if (opts_.once && key == first_deleted_string_) { - KALDI_ERR << "You specified the once (o) option but " - << "you are calling using key " << key - << " more than once: rspecifier is " << rspecifier_; - } - return false; // We read the entire archive (or got to error state) and didn't - // find it. - } - - typedef unordered_map<std::string, Holder*, StringHasher> MapType; - MapType map_; - - typename MapType::iterator to_delete_iter_; - bool to_delete_iter_valid_; - - std::string first_deleted_string_; // keep the first string we deleted - // from map_ (if opts_.once == true). It's for an inexact spot-check that the - // "once" option isn't being used incorrectly. - -}; - - - - - -template<class Holder> -RandomAccessTableReader<Holder>::RandomAccessTableReader(const std::string &rspecifier): - impl_(NULL) { - if (rspecifier != "" && !Open(rspecifier)) - KALDI_ERR << "Error opening RandomAccessTableReader object " - " (rspecifier is: " << rspecifier << ")"; -} - -template<class Holder> -bool RandomAccessTableReader<Holder>::Open(const std::string &rspecifier) { - if (IsOpen()) - KALDI_ERR << "Already open."; - RspecifierOptions opts; - RspecifierType rs = ClassifyRspecifier(rspecifier, NULL, &opts); - switch (rs) { - case kScriptRspecifier: - impl_ = new RandomAccessTableReaderScriptImpl<Holder>(); - break; - case kArchiveRspecifier: - if (opts.sorted) { - if (opts.called_sorted) // "doubly" sorted case. - impl_ = new RandomAccessTableReaderDSortedArchiveImpl<Holder>(); - else - impl_ = new RandomAccessTableReaderSortedArchiveImpl<Holder>(); - } else impl_ = new RandomAccessTableReaderUnsortedArchiveImpl<Holder>(); - break; - case kNoRspecifier: default: - KALDI_WARN << "Invalid rspecifier: " - << rspecifier; - return false; - } - if (impl_->Open(rspecifier)) - return true; - else { - // Warning will already have been printed. - delete impl_; - impl_ = NULL; - return false; - } -} - -template<class Holder> -bool RandomAccessTableReader<Holder>::HasKey(const std::string &key) { - CheckImpl(); - if (!IsToken(key)) - KALDI_ERR << "Invalid key \"" << key << '"'; - return impl_->HasKey(key); -} - - -template<class Holder> -const typename RandomAccessTableReader<Holder>::T& -RandomAccessTableReader<Holder>::Value(const std::string &key) { - CheckImpl(); - return impl_->Value(key); -} - -template<class Holder> -bool RandomAccessTableReader<Holder>::Close() { - CheckImpl(); - bool ans =impl_->Close(); - delete impl_; - impl_ = NULL; - return ans; -} - -template<class Holder> -RandomAccessTableReader<Holder>::~RandomAccessTableReader() { - if (IsOpen() && !Close()) // call Close() yourself to stop this being thrown. - KALDI_ERR << "failure detected in destructor."; -} - -template<class Holder> -void SequentialTableReader<Holder>::CheckImpl() const { - if (!impl_) { - KALDI_ERR << "Trying to use empty SequentialTableReader (perhaps you " - << "passed the empty string as an argument to a program?)"; - } -} - -template<class Holder> -void RandomAccessTableReader<Holder>::CheckImpl() const { - if (!impl_) { - KALDI_ERR << "Trying to use empty RandomAccessTableReader (perhaps you " - << "passed the empty string as an argument to a program?)"; - } -} - -template<class Holder> -void TableWriter<Holder>::CheckImpl() const { - if (!impl_) { - KALDI_ERR << "Trying to use empty TableWriter (perhaps you " - << "passed the empty string as an argument to a program?)"; - } -} - -template<class Holder> -RandomAccessTableReaderMapped<Holder>::RandomAccessTableReaderMapped( - const std::string &table_rxfilename, - const std::string &utt2spk_rxfilename): - reader_(table_rxfilename), token_reader_(table_rxfilename.empty() ? "" : - utt2spk_rxfilename), - utt2spk_rxfilename_(utt2spk_rxfilename) { } - -template<class Holder> -bool RandomAccessTableReaderMapped<Holder>::Open( - const std::string &table_rxfilename, - const std::string &utt2spk_rxfilename) { - if (reader_.IsOpen()) reader_.Close(); - if (token_reader_.IsOpen()) token_reader_.Close(); - KALDI_ASSERT(!table_rxfilename.empty()); - if (!reader_.Open(table_rxfilename)) return false; // will have printed - // warning internally, probably. - if (!utt2spk_rxfilename.empty()) { - if (!token_reader_.Open(utt2spk_rxfilename)) { - reader_.Close(); - return false; - } - } - return true; -} - - -template<class Holder> -bool RandomAccessTableReaderMapped<Holder>::HasKey(const std::string &utt) { - // We don't check IsOpen, we let the call go through to the member variable - // (reader_), which will crash with a more informative error message than - // we can give here, as we don't any longer know the rxfilename. - if (token_reader_.IsOpen()) { // We need to map the key from utt to spk. - if (!token_reader_.HasKey(utt)) - KALDI_ERR << "Attempting to read key " << utt << ", which is not present " - << "in utt2spk map or similar map being read from " - << PrintableRxfilename(utt2spk_rxfilename_); - const std::string &spk = token_reader_.Value(utt); - return reader_.HasKey(spk); - } else { - return reader_.HasKey(utt); - } -} - -template<class Holder> -const typename Holder::T& RandomAccessTableReaderMapped<Holder>::Value( - const std::string &utt) { - if (token_reader_.IsOpen()) { // We need to map the key from utt to spk. - if (!token_reader_.HasKey(utt)) - KALDI_ERR << "Attempting to read key " << utt << ", which is not present " - << "in utt2spk map or similar map being read from " - << PrintableRxfilename(utt2spk_rxfilename_); - const std::string &spk = token_reader_.Value(utt); - return reader_.Value(spk); - } else { - return reader_.Value(utt); - } -} - - - -/// @} - -} // end namespace kaldi - - - -#endif diff --git a/kaldi_io/src/kaldi/util/kaldi-table.h b/kaldi_io/src/kaldi/util/kaldi-table.h deleted file mode 100644 index 6f6cb98..0000000 --- a/kaldi_io/src/kaldi/util/kaldi-table.h +++ /dev/null @@ -1,459 +0,0 @@ -// util/kaldi-table.h - -// Copyright 2009-2011 Microsoft Corporation -// 2013 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_UTIL_KALDI_TABLE_H_ -#define KALDI_UTIL_KALDI_TABLE_H_ - -#include <string> -#include <vector> -#include <utility> - -#include "base/kaldi-common.h" -#include "util/kaldi-holder.h" - -namespace kaldi { - -// Forward declarations -template<class Holder> class RandomAccessTableReaderImplBase; -template<class Holder> class SequentialTableReaderImplBase; -template<class Holder> class TableWriterImplBase; - -/// \addtogroup table_group -/// @{ - -// This header defines the Table classes (RandomAccessTableReader, -// SequentialTableReader and TableWriter) and explains what the Holder classes, -// which the Table class requires as a template argument, are like. It also -// explains the "rspecifier" and "wspecifier" concepts (these are strings that -// explain how to read/write objects via archives or scp files. A table is -// conceptually a collection of objects of a particular type T indexed by keys -// of type std::string (these Keys additionally have an order within each table). -// The Table classes are templated on a type (call it Holder) such that Holder::T -// is a typedef equal to T. - -// see kaldi-holder.h for detail on the Holder classes. - -typedef std::vector<std::string> KeyList; - -// Documentation for "wspecifier" -// "wspecifier" describes how we write a set of objects indexed by keys. -// The basic, unadorned wspecifiers are as follows: -// -// ark:wxfilename -// scp:rxfilename -// ark,scp:filename,wxfilename -// ark,scp:filename,wxfilename -// -// -// We also allow the following modifiers: -// t means text mode. -// b means binary mode. -// f means flush the stream after writing each entry. -// (nf means don't flush, and isn't very useful as the default is to flush). -// p means permissive mode, when writing to an "scp" file only: will ignore -// missing scp entries, i.e. won't write anything for those files but will -// return success status). -// -// So the following are valid wspecifiers: -// ark,b,f:foo -// "ark,b,b:| gzip -c > foo" -// "ark,scp,t,nf:foo.ark,|gzip -c > foo.scp.gz" -// ark,b:- -// -// The meanings of rxfilename and wxfilename are as described in -// kaldi-stream.h (they are filenames but include pipes, stdin/stdout -// and so on; filename is a regular filename. -// - -// The ark:wxfilename type of wspecifier instructs the class to -// write directly to an archive. For small objects (e.g. lists of ints), -// the text archive format will generally be human readable with one line -// per entry in the archive. -// -// The type "scp:xfilename" refers to an scp file which should -// already exist on disk, and tells us where to write the data for -// each key (usually an actual file); each line of the scp file -// would be: -// key xfilename -// -// The type ark,scp:filename,wxfilename means -// we write both an archive and an scp file that specifies offsets into the -// archive, with lines like: -// key filename:12407 -// where the number is the byte offset into the file. -// In this case we restrict the archive-filename to be an actual filename, -// as we can't see a situtation where an extended filename would make sense -// for this (we can't fseek() in pipes). - -enum WspecifierType { - kNoWspecifier, - kArchiveWspecifier, - kScriptWspecifier, - kBothWspecifier -}; - -struct WspecifierOptions { - bool binary; - bool flush; - bool permissive; // will ignore absent scp entries. - WspecifierOptions(): binary(true), flush(false), permissive(false) { } -}; - -// ClassifyWspecifier returns the type of the wspecifier string, -// and (if pointers are non-NULL) outputs the extra information -// about the options, and the script and archive -// filenames. -WspecifierType ClassifyWspecifier(const std::string &wspecifier, - std::string *archive_wxfilename, - std::string *script_wxfilename, - WspecifierOptions *opts); - -// ReadScriptFile reads an .scp file in its entirety, and appends it -// (in order as it was in the scp file) in script_out_, which contains -// pairs of (key, xfilename). The .scp -// file format is: on each line, key xfilename -// where xfilename means rxfilename or wxfilename, and may contain internal spaces -// (we trim away any leading or trailing space). The key is space-free. -// ReadScriptFile returns true if the format was valid (empty files -// are valid). -// If 'print_warnings', it will print out warning messages that explain what kind -// of error there was. -bool ReadScriptFile(const std::string &rxfilename, - bool print_warnings, - std::vector<std::pair<std::string, std::string> > *script_out); - -// This version of ReadScriptFile works from an istream. -bool ReadScriptFile(std::istream &is, - bool print_warnings, - std::vector<std::pair<std::string, std::string> > *script_out); - -// Writes, for each entry in script, the first element, then ' ', then the second -// element then '\n'. Checks that the keys (first elements of pairs) are valid -// tokens (nonempty, no whitespace), and the values (second elements of pairs) -// are newline-free and contain no leading or trailing space. Returns true on -// success. -bool WriteScriptFile(const std::string &wxfilename, - const std::vector<std::pair<std::string, std::string> > &script); - -// This version writes to an ostream. -bool WriteScriptFile(std::ostream &os, - const std::vector<std::pair<std::string, std::string> > &script); - -// Documentation for "rspecifier" -// "rspecifier" describes how we read a set of objects indexed by keys. -// The possibilities are: -// -// ark:rxfilename -// scp:rxfilename -// -// We also allow various modifiers: -// o means the program will only ask for each key once, which enables -// the reader to discard already-asked-for values. -// s means the keys are sorted on input (means we don't have to read till -// eof if someone asked for a key that wasn't there). -// cs means that it is called in sorted order (we are generally asserting this -// based on knowledge of how the program works). -// p means "permissive", and causes it to skip over keys whose corresponding -// scp-file entries cannot be read. [and to ignore errors in archives and -// script files, and just consider the "good" entries]. -// We allow the negation of the options above, as in no, ns, np, -// but these aren't currently very useful (just equivalent to omitting the -// corresponding option). -// [any of the above options can be prefixed by n to negate them, e.g. no, ns, -// ncs, np; but these aren't currently useful as you could just omit the option]. -// -// b is ignored [for scripting convenience] -// t is ignored [for scripting convenience] -// -// -// So for instance the following would be a valid rspecifier: -// -// "o, s, p, ark:gunzip -c foo.gz|" - -struct RspecifierOptions { - // These options only make a difference for the RandomAccessTableReader class. - bool once; // we assert that the program will only ask for each key once. - bool sorted; // we assert that the keys are sorted. - bool called_sorted; // we assert that the (HasKey(), Value() functions will - // also be called in sorted order. [this implies "once" but not vice versa]. - bool permissive; // If "permissive", when reading from scp files it treats - // scp files that can't be read as if the corresponding key were not there. - // For archive files it will suppress errors getting thrown if the archive - - // is corrupted and can't be read to the end. - - RspecifierOptions(): once(false), sorted(false), - called_sorted(false), permissive(false) { } -}; - -enum RspecifierType { - kNoRspecifier, - kArchiveRspecifier, - kScriptRspecifier -}; - -RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename, - RspecifierOptions *opts); - -// Class Table<Holder> is useful when you want the entire set of -// objects in memory. NOT IMPLEMENTED YET. -// It is the least scalable way of accessing data in Tables. -// The *TableReader and TableWriter classes are more scalable. - - -/// Allows random access to a collection -/// of objects in an archive or script file; see \ref io_sec_tables. -template<class Holder> -class RandomAccessTableReader { - public: - typedef typename Holder::T T; - - RandomAccessTableReader(): impl_(NULL) { } - - // This constructor equivalent to default constructor + "open", but - // throws on error. - RandomAccessTableReader(const std::string &rspecifier); - - // Opens the table. - bool Open(const std::string &rspecifier); - - // Returns true if table is open. - bool IsOpen() const { return (impl_ != NULL); } - - // Close() will close the table [throws if it was not open], - // and returns true on success (false if we were reading an - // archive and we discovered an error in the archive). - bool Close(); - - // Says if it has this key. - // If you are using the "permissive" (p) read option, - // it will return false for keys whose corresponding entry - // in the scp file cannot be read. - - bool HasKey(const std::string &key); - - // Value() may throw if you are reading an scp file, you - // do not have the "permissive" (p) option, and an entry - // in the scp file cannot be read. Typically you won't - // want to catch this error. - const T &Value(const std::string &key); - - ~RandomAccessTableReader(); - - // Allow copy-constructor only for non-opened readers (needed for inclusion in - // stl vector) - RandomAccessTableReader(const RandomAccessTableReader<Holder> &other): - impl_(NULL) { KALDI_ASSERT(other.impl_ == NULL); } - private: - // Disallow assignment. - RandomAccessTableReader &operator=(const RandomAccessTableReader<Holder>&); - void CheckImpl() const; // Checks that impl_ is non-NULL; prints an error - // message and dies (with KALDI_ERR) if NULL. - RandomAccessTableReaderImplBase<Holder> *impl_; -}; - - - -/// A templated class for reading objects sequentially from an archive or script -/// file; see \ref io_sec_tables. -template<class Holder> -class SequentialTableReader { - public: - typedef typename Holder::T T; - - SequentialTableReader(): impl_(NULL) { } - - // This constructor equivalent to default constructor + "open", but - // throws on error. - SequentialTableReader(const std::string &rspecifier); - - // Opens the table. Returns exit status; but does throw if previously - // open stream was in error state. Call Close to stop this [anyway, - // calling Open more than once is not recommended.] - bool Open(const std::string &rspecifier); - - // Returns true if we're done. It will also return true if there's some kind - // of error and we can't read any more; in this case, you can detect the - // error by calling Close and checking the return status; otherwise - // the destructor will throw. - inline bool Done(); - - // Only valid to call Key() if Done() returned false. - inline std::string Key(); - - // FreeCurrent() is provided as an optimization to save memory, for large - // objects. It instructs the class to deallocate the current value. The - // reference Value() will/ be invalidated by this. - - void FreeCurrent(); - - // Return reference to the current value. - // The reference is valid till next call to this object. - // If will throw if you are reading an scp file, did not - // specify the "permissive" (p) option and the file cannot - // be read. [The permissive option makes it behave as if that - // key does not even exist, if the corresponding file cannot be - // read.] You probably wouldn't want to catch this exception; - // the user can just specify the p option in the rspecifier. - const T &Value(); - - // Next goes to the next key. It will not throw; any error will - // result in Done() returning true, and then the destructor will - // throw unless you call Close(). - void Next(); - - // Returns true if table is open for reading (does not imply - // stream is in good state). - bool IsOpen() const; - - // Close() will return false (failure) if Done() became true - // because of an error/ condition rather than because we are - // really done [e.g. because of an error or early termination - // in the archive]. - // If there is an error and you don't call Close(), the destructor - // will fail. - // Close() - bool Close(); - - // The destructor may throw. This is the desired behaviour, as it's the way we - // signal the error to the user (to detect it, call Close(). The issue is that - // otherwise the user has no way to tell whether Done() returned true because - // we reached the end of the archive or script, or because there was an error - // that prevented further reading. - ~SequentialTableReader(); - - // Allow copy-constructor only for non-opened readers (needed for inclusion in - // stl vector) - SequentialTableReader(const SequentialTableReader<Holder> &other): - impl_(NULL) { KALDI_ASSERT(other.impl_ == NULL); } - private: - // Disallow assignment. - SequentialTableReader &operator = (const SequentialTableReader<Holder>&); - void CheckImpl() const; // Checks that impl_ is non-NULL; prints an error - // message and dies (with KALDI_ERR) if NULL. - SequentialTableReaderImplBase<Holder> *impl_; -}; - - -/// A templated class for writing objects to an -/// archive or script file; see \ref io_sec_tables. -template<class Holder> -class TableWriter { - public: - typedef typename Holder::T T; - - TableWriter(): impl_(NULL) { } - - // This constructor equivalent to default constructor - // + "open", but throws on error. See docs for - // wspecifier above. - TableWriter(const std::string &wspecifier); - - // Opens the table. See docs for wspecifier above. - // If it returns true, it is open. - bool Open(const std::string &wspecifier); - - // Returns true if open for writing. - bool IsOpen() const; - - // Write the object. Throws std::runtime_error on error (via the - // KALDI_ERR macro) - inline void Write(const std::string &key, const T &value) const; - - - // Flush will flush any archive; it does not return error status - // or throw, any errors will be reported on the next Write or Close. - // Useful if we may be writing to a command in a pipe and want - // to ensure good CPU utilization. - void Flush(); - - // Close() is not necessary to call, as the destructor - // closes it; it's mainly useful if you want to handle - // error states because the destructor will throw on - // error if you do not call Close(). - bool Close(); - - ~TableWriter(); - - // Allow copy-constructor only for non-opened writers (needed for inclusion in - // stl vector) - TableWriter(const TableWriter &other): impl_(NULL) { - KALDI_ASSERT(other.impl_ == NULL); - } - private: - TableWriter &operator = (const TableWriter&); // Disallow assignment. - void CheckImpl() const; // Checks that impl_ is non-NULL; prints an error - // message and dies (with KALDI_ERR) if NULL. - TableWriterImplBase<Holder> *impl_; -}; - - -/// This class is for when you are reading something in random access, but -/// it may actually be stored per-speaker (or something similar) but the -/// keys you're using are per utterance. So you also provide an "rxfilename" -/// for a file containing lines like -/// utt1 spk1 -/// utt2 spk1 -/// utt3 spk1 -/// and so on. Note: this is optional; if it is an empty string, we just won't -/// do the mapping. Also, "table_rxfilename" may be the empty string (as for -/// a regular table), in which case the table just won't be opened. -/// We provide only the most frequently used of the functions of RandomAccessTableReader. - -template<class Holder> -class RandomAccessTableReaderMapped { - public: - typedef typename Holder::T T; - /// Note: "utt2spk_rxfilename" will in the normal case be an rxfilename - /// for an utterance to speaker map, but this code is general; it accepts - /// a generic map. - RandomAccessTableReaderMapped(const std::string &table_rxfilename, - const std::string &utt2spk_rxfilename); - - RandomAccessTableReaderMapped() {}; - - /// Note: when calling Open, utt2spk_rxfilename may be empty. - bool Open(const std::string &table_rxfilename, - const std::string &utt2spk_rxfilename); - - bool HasKey(const std::string &key); - const T &Value(const std::string &key); - inline bool IsOpen() const { return reader_.IsOpen(); } - inline bool Close() { return reader_.Close(); } - - - - // The default copy-constructor will do what we want: it will crash - // for already-opened readers, by calling the member-variable copy-constructors. - private: - // Disallow assignment. - RandomAccessTableReaderMapped &operator=(const RandomAccessTableReaderMapped<Holder>&); - RandomAccessTableReader<Holder> reader_; - RandomAccessTableReader<TokenHolder> token_reader_; - std::string utt2spk_rxfilename_; // Used only in diagnostic messages. -}; - - -/// @} end "addtogroup table_group" -} // end namespace kaldi - -#include "kaldi-table-inl.h" - -#endif // KALDI_UTIL_KALDI_TABLE_H_ diff --git a/kaldi_io/src/kaldi/util/parse-options.h b/kaldi_io/src/kaldi/util/parse-options.h deleted file mode 100644 index f563b54..0000000 --- a/kaldi_io/src/kaldi/util/parse-options.h +++ /dev/null @@ -1,264 +0,0 @@ -// util/parse-options.h - -// Copyright 2009-2011 Karel Vesely; Microsoft Corporation; -// Saarland University (Author: Arnab Ghoshal); -// Copyright 2012-2013 Frantisek Skala; Arnab Ghoshal - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_UTIL_PARSE_OPTIONS_H_ -#define KALDI_UTIL_PARSE_OPTIONS_H_ - -#include <map> -#include <string> -#include <vector> - -#include "base/kaldi-common.h" -#include "itf/options-itf.h" - -namespace kaldi { - -/// The class ParseOptions is for parsing command-line options; see -/// \ref parse_options for more documentation. -class ParseOptions : public OptionsItf { - public: - explicit ParseOptions(const char *usage) : - print_args_(true), help_(false), usage_(usage), argc_(0), argv_(NULL), - prefix_(""), other_parser_(NULL) { -#ifndef _MSC_VER // This is just a convenient place to set the stderr to line - setlinebuf(stderr); // buffering mode, since it's called at program start. -#endif // This helps ensure different programs' output is not mixed up. - RegisterStandard("config", &config_, "Configuration file to read (this " - "option may be repeated)"); - RegisterStandard("print-args", &print_args_, - "Print the command line arguments (to stderr)"); - RegisterStandard("help", &help_, "Print out usage message"); - RegisterStandard("verbose", &g_kaldi_verbose_level, - "Verbose level (higher->more logging)"); - } - - /** - This is a constructor for the special case where some options are - registered with a prefix to avoid conflicts. The object thus created will - only be used temporarily to register an options class with the original - options parser (which is passed as the *other pointer) using the given - prefix. It should not be used for any other purpose, and the prefix must - not be the empty string. It seems to be the least bad way of implementing - options with prefixes at this point. - Example of usage is: - ParseOptions po; // original ParseOptions object - ParseOptions po_mfcc("mfcc", &po); // object with prefix. - MfccOptions mfcc_opts; - mfcc_opts.Register(&po_mfcc); - The options will now get registered as, e.g., --mfcc.frame-shift=10.0 - instead of just --frame-shift=10.0 - */ - ParseOptions(const std::string &prefix, OptionsItf *other); - - ~ParseOptions() {} - - // Methods from the interface - void Register(const std::string &name, - bool *ptr, const std::string &doc); - void Register(const std::string &name, - int32 *ptr, const std::string &doc); - void Register(const std::string &name, - uint32 *ptr, const std::string &doc); - void Register(const std::string &name, - float *ptr, const std::string &doc); - void Register(const std::string &name, - double *ptr, const std::string &doc); - void Register(const std::string &name, - std::string *ptr, const std::string &doc); - - /// If called after registering an option and before calling - /// Read(), disables that option from being used. Will crash - /// at runtime if that option had not been registered. - void DisableOption(const std::string &name); - - /// This one is used for registering standard parameters of all the programs - template<typename T> - void RegisterStandard(const std::string &name, - T *ptr, const std::string &doc); - - /** - Parses the command line options and fills the ParseOptions-registered - variables. This must be called after all the variables were registered!!! - - Initially the variables have implicit values, - then the config file values are set-up, - finally the command line vaues given. - Returns the first position in argv that was not used. - [typically not useful: use NumParams() and GetParam(). ] - */ - int Read(int argc, const char *const *argv); - - /// Prints the usage documentation [provided in the constructor]. - void PrintUsage(bool print_command_line = false); - /// Prints the actual configuration of all the registered variables - void PrintConfig(std::ostream &os); - - /// Reads the options values from a config file. Must be called after - /// registering all options. This is usually used internally after the - /// standard --config option is used, but it may also be called from a - /// program. - void ReadConfigFile(const std::string &filename); - - /// Number of positional parameters (c.f. argc-1). - int NumArgs() const; - - /// Returns one of the positional parameters; 1-based indexing for argc/argv - /// compatibility. Will crash if param is not >=1 and <=NumArgs(). - std::string GetArg(int param) const; - - std::string GetOptArg(int param) const { - return (param <= NumArgs() ? GetArg(param) : ""); - } - - /// The following function will return a possibly quoted and escaped - /// version of "str", according to the current shell. Currently - /// this is just hardwired to bash. It's useful for debug output. - static std::string Escape(const std::string &str); - - private: - /// Template to register various variable types, - /// used for program-specific parameters - template<typename T> - void RegisterTmpl(const std::string &name, T *ptr, const std::string &doc); - - // Following functions do just the datatype-specific part of the job - /// Register boolean variable - void RegisterSpecific(const std::string &name, const std::string &idx, - bool *b, const std::string &doc, bool is_standard); - /// Register int32 variable - void RegisterSpecific(const std::string &name, const std::string &idx, - int32 *i, const std::string &doc, bool is_standard); - /// Register unsinged int32 variable - void RegisterSpecific(const std::string &name, const std::string &idx, - uint32 *u, - const std::string &doc, bool is_standard); - /// Register float variable - void RegisterSpecific(const std::string &name, const std::string &idx, - float *f, const std::string &doc, bool is_standard); - /// Register double variable [useful as we change BaseFloat type]. - void RegisterSpecific(const std::string &name, const std::string &idx, - double *f, const std::string &doc, bool is_standard); - /// Register string variable - void RegisterSpecific(const std::string &name, const std::string &idx, - std::string *s, const std::string &doc, - bool is_standard); - - /// Does the actual job for both kinds of parameters - /// Does the common part of the job for all datatypes, - /// then calls RegisterSpecific - template<typename T> - void RegisterCommon(const std::string &name, - T *ptr, const std::string &doc, bool is_standard); - - /// SplitLongArg parses an argument of the form --a=b, --a=, or --a, - /// and sets "has_equal_sign" to true if an equals-sign was parsed.. - /// this is needed in order to correctly allow --x for a boolean option - /// x, and --y= for a string option y, and to disallow --x= and --y. - void SplitLongArg(std::string in, std::string *key, std::string *value, - bool *has_equal_sign); - - void NormalizeArgName(std::string *str); - - /// Set option with name "key" to "value"; will crash if can't do it. - /// "has_equal_sign" is used to allow --x for a boolean option x, - /// and --y=, for a string option y. - bool SetOption(const std::string &key, const std::string &value, - bool has_equal_sign); - - bool ToBool(std::string str); - int32 ToInt(std::string str); - uint32 ToUInt(std::string str); - float ToFloat(std::string str); - double ToDouble(std::string str); - - // maps for option variables - std::map<std::string, bool*> bool_map_; - std::map<std::string, int32*> int_map_; - std::map<std::string, uint32*> uint_map_; - std::map<std::string, float*> float_map_; - std::map<std::string, double*> double_map_; - std::map<std::string, std::string*> string_map_; - - /** - Structure for options' documentation - */ - struct DocInfo { - DocInfo() {} - DocInfo(const std::string &name, const std::string &usemsg) - : name_(name), use_msg_(usemsg), is_standard_(false) {} - DocInfo(const std::string &name, const std::string &usemsg, - bool is_standard) - : name_(name), use_msg_(usemsg), is_standard_(is_standard) {} - - std::string name_; - std::string use_msg_; - bool is_standard_; - }; - typedef std::map<std::string, DocInfo> DocMapType; - DocMapType doc_map_; ///< map for the documentation - - bool print_args_; ///< variable for the implicit --print-args parameter - bool help_; ///< variable for the implicit --help parameter - std::string config_; ///< variable for the implicit --config parameter - std::vector<std::string> positional_args_; - const char *usage_; - int argc_; - const char *const *argv_; - - /// These members are not normally used. They are only used when the object - /// is constructed with a prefix - std::string prefix_; - OptionsItf *other_parser_; -}; - -/// This template is provided for convenience in reading config classes from -/// files; this is not the standard way to read configuration options, but may -/// occasionally be needed. This function assumes the config has a function -/// "void Register(OptionsItf *po)" which it can call to register the -/// ParseOptions object. -template<class C> void ReadConfigFromFile(const std::string config_filename, - C *c) { - std::ostringstream usage_str; - usage_str << "Parsing config from " - << "from '" << config_filename << "'"; - ParseOptions po(usage_str.str().c_str()); - c->Register(&po); - po.ReadConfigFile(config_filename); -} - -/// This variant of the template ReadConfigFromFile is for if you need to read -/// two config classes from the same file. -template<class C1, class C2> void ReadConfigsFromFile(const std::string config_filename, - C1 *c1, C2 *c2) { - std::ostringstream usage_str; - usage_str << "Parsing config from " - << "from '" << config_filename << "'"; - ParseOptions po(usage_str.str().c_str()); - c1->Register(&po); - c2->Register(&po); - po.ReadConfigFile(config_filename); -} - - - -} // namespace kaldi - -#endif // KALDI_UTIL_PARSE_OPTIONS_H_ diff --git a/kaldi_io/src/kaldi/util/simple-io-funcs.h b/kaldi_io/src/kaldi/util/simple-io-funcs.h deleted file mode 100644 index 56573e4..0000000 --- a/kaldi_io/src/kaldi/util/simple-io-funcs.h +++ /dev/null @@ -1,56 +0,0 @@ -// util/simple-io-funcs.h - -// Copyright 2009-2011 Microsoft Corporation; Jan Silovsky - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at - -// http://www.apache.org/licenses/LICENSE-2.0 - -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. -#ifndef KALDI_UTIL_SIMPLE_IO_FUNCS_H_ -#define KALDI_UTIL_SIMPLE_IO_FUNCS_H_ - -#include "kaldi-io.h" - -// This header contains some utilities for reading some common, simple text formats: -// integers in files, one per line, and integers in files, possibly multiple per line. -// these are not really fully native Kaldi formats; they are mostly for small files that -// might be generated by scripts, and can be read all at one time. -// for longer files of this type, we would probably use the Table code. - -namespace kaldi { - -/// WriteToList attempts to write this list of integers, one per line, -/// to the given file, in text format. -/// returns true if succeeded. -bool WriteIntegerVectorSimple(std::string wxfilename, const std::vector<int32> &v); - -/// ReadFromList attempts to read this list of integers, one per line, -/// from the given file, in text format. -/// returns true if succeeded. -bool ReadIntegerVectorSimple(std::string rxfilename, std::vector<int32> *v); - -// This is a file format like: -// 1 2 -// 3 -// -// 4 5 6 -// etc. -bool WriteIntegerVectorVectorSimple(std::string wxfilename, const std::vector<std::vector<int32> > &v); - -bool ReadIntegerVectorVectorSimple(std::string rxfilename, std::vector<std::vector<int32> > *v); - - -} // end namespace kaldi. - - -#endif diff --git a/kaldi_io/src/kaldi/util/simple-options.h b/kaldi_io/src/kaldi/util/simple-options.h deleted file mode 100644 index 58816af..0000000 --- a/kaldi_io/src/kaldi/util/simple-options.h +++ /dev/null @@ -1,112 +0,0 @@ -// util/simple-options.hh - -// Copyright 2013 Tanel Alumae, Tallinn University of Technology - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_UTIL_SIMPLE_OPTIONS_H_ -#define KALDI_UTIL_SIMPLE_OPTIONS_H_ - -#include <map> -#include <string> -#include <vector> - -#include "base/kaldi-common.h" -#include "itf/options-itf.h" - -namespace kaldi { - - -/// The class SimpleOptions is an implementation of OptionsItf that allows -/// setting and getting option values programmatically, i.e., via getter -/// and setter methods. It doesn't provide any command line parsing functionality. -/// The class ParseOptions should be used for command-line options. -class SimpleOptions : public OptionsItf { - public: - SimpleOptions() { - } - - virtual ~SimpleOptions() { - } - - // Methods from the interface - void Register(const std::string &name, bool *ptr, const std::string &doc); - void Register(const std::string &name, int32 *ptr, const std::string &doc); - void Register(const std::string &name, uint32 *ptr, const std::string &doc); - void Register(const std::string &name, float *ptr, const std::string &doc); - void Register(const std::string &name, double *ptr, const std::string &doc); - void Register(const std::string &name, std::string *ptr, - const std::string &doc); - - // set option with the specified key, return true if successful - bool SetOption(const std::string &key, const bool &value); - bool SetOption(const std::string &key, const int32 &value); - bool SetOption(const std::string &key, const uint32 &value); - bool SetOption(const std::string &key, const float &value); - bool SetOption(const std::string &key, const double &value); - bool SetOption(const std::string &key, const std::string &value); - bool SetOption(const std::string &key, const char* value); - - // get option with the specified key and put to 'value', - // return true if successful - bool GetOption(const std::string &key, bool *value); - bool GetOption(const std::string &key, int32 *value); - bool GetOption(const std::string &key, uint32 *value); - bool GetOption(const std::string &key, float *value); - bool GetOption(const std::string &key, double *value); - bool GetOption(const std::string &key, std::string *value); - - enum OptionType { - kBool, - kInt32, - kUint32, - kFloat, - kDouble, - kString - }; - - struct OptionInfo { - OptionInfo(const std::string &doc, OptionType type) : - doc(doc), type(type) { - } - std::string doc; - OptionType type; - }; - - std::vector<std::pair<std::string, OptionInfo> > GetOptionInfoList(); - - /* - * Puts the type of the option with name 'key' in the argument 'type'. - * Return true if such option is found, false otherwise. - */ - bool GetOptionType(const std::string &key, OptionType *type); - - private: - - std::vector<std::pair<std::string, OptionInfo> > option_info_list_; - - // maps for option variables - std::map<std::string, bool*> bool_map_; - std::map<std::string, int32*> int_map_; - std::map<std::string, uint32*> uint_map_; - std::map<std::string, float*> float_map_; - std::map<std::string, double*> double_map_; - std::map<std::string, std::string*> string_map_; -}; - -} // namespace kaldi - -#endif // KALDI_UTIL_SIMPLE_OPTIONS_H_ diff --git a/kaldi_io/src/kaldi/util/stl-utils.h b/kaldi_io/src/kaldi/util/stl-utils.h deleted file mode 100644 index 12526ff..0000000 --- a/kaldi_io/src/kaldi/util/stl-utils.h +++ /dev/null @@ -1,327 +0,0 @@ -// util/stl-utils.h - -// Copyright 2009-2011 Microsoft Corporation; Saarland University - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_UTIL_STL_UTILS_H_ -#define KALDI_UTIL_STL_UTILS_H_ - -#include <algorithm> -#include <map> -#include <set> -#include <string> -#include <vector> -#include "base/kaldi-common.h" - -#ifdef _MSC_VER -#include <unordered_map> -#include <unordered_set> -using std::unordered_map; -using std::unordered_set; -#elif __cplusplus > 199711L || defined(__GXX_EXPERIMENTAL_CXX0X__) -#include <unordered_map> -#include <unordered_set> -using std::unordered_map; -using std::unordered_set; -#else -#include <tr1/unordered_map> -#include <tr1/unordered_set> -using std::tr1::unordered_map; -using std::tr1::unordered_set; -#endif - - -namespace kaldi { - -/// Sorts and uniq's (removes duplicates) from a vector. -template<typename T> -inline void SortAndUniq(std::vector<T> *vec) { - std::sort(vec->begin(), vec->end()); - vec->erase(std::unique(vec->begin(), vec->end()), vec->end()); -} - - -/// Returns true if the vector is sorted. -template<typename T> -inline bool IsSorted(const std::vector<T> &vec) { - typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end(); - if (iter == end) return true; - while (1) { - typename std::vector<T>::const_iterator next_iter = iter; - ++next_iter; - if (next_iter == end) return true; // end of loop and nothing out of order - if (*next_iter < *iter) return false; - iter = next_iter; - } -} - - -/// Returns true if the vector is sorted and contains each element -/// only once. -template<typename T> -inline bool IsSortedAndUniq(const std::vector<T> &vec) { - typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end(); - if (iter == end) return true; - while (1) { - typename std::vector<T>::const_iterator next_iter = iter; - ++next_iter; - if (next_iter == end) return true; // end of loop and nothing out of order - if (*next_iter <= *iter) return false; - iter = next_iter; - } -} - - -/// Removes duplicate elements from a sorted list. -template<typename T> -inline void Uniq(std::vector<T> *vec) { // must be already sorted. - KALDI_PARANOID_ASSERT(IsSorted(*vec)); - KALDI_ASSERT(vec); - vec->erase(std::unique(vec->begin(), vec->end()), vec->end()); -} - -/// Copies the elements of a set to a vector. -template<class T> -void CopySetToVector(const std::set<T> &s, std::vector<T> *v) { - // adds members of s to v, in sorted order from lowest to highest - // (because the set was in sorted order). - KALDI_ASSERT(v != NULL); - v->resize(s.size()); - typename std::set<T>::const_iterator siter = s.begin(), send = s.end(); - typename std::vector<T>::iterator viter = v->begin(); - for (; siter != send; ++siter, ++viter) { - *viter = *siter; - } -} - -template<class T> -void CopySetToVector(const unordered_set<T> &s, std::vector<T> *v) { - // adds members of s to v, in sorted order from lowest to highest - // (because the set was in sorted order). - KALDI_ASSERT(v != NULL); - v->resize(s.size()); - typename unordered_set<T>::const_iterator siter = s.begin(), send = s.end(); - typename std::vector<T>::iterator viter = v->begin(); - for (; siter != send; ++siter, ++viter) { - *viter = *siter; - } -} - - -/// Copies the (key, value) pairs in a map to a vector of pairs. -template<class A, class B> -void CopyMapToVector(const std::map<A, B> &m, - std::vector<std::pair<A, B> > *v) { - KALDI_ASSERT(v != NULL); - v->resize(m.size()); - typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end(); - typename std::vector<std::pair<A, B> >::iterator viter = v->begin(); - for (; miter != mend; ++miter, ++viter) { - *viter = std::make_pair(miter->first, miter->second); - // do it like this because of const casting. - } -} - -/// Copies the keys in a map to a vector. -template<class A, class B> -void CopyMapKeysToVector(const std::map<A, B> &m, std::vector<A> *v) { - KALDI_ASSERT(v != NULL); - v->resize(m.size()); - typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end(); - typename std::vector<A>::iterator viter = v->begin(); - for (; miter != mend; ++miter, ++viter) { - *viter = miter->first; - } -} - -/// Copies the values in a map to a vector. -template<class A, class B> -void CopyMapValuesToVector(const std::map<A, B> &m, std::vector<B> *v) { - KALDI_ASSERT(v != NULL); - v->resize(m.size()); - typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end(); - typename std::vector<B>::iterator viter = v->begin(); - for (; miter != mend; ++miter, ++viter) { - *viter = miter->second; - } -} - -/// Copies the keys in a map to a set. -template<class A, class B> -void CopyMapKeysToSet(const std::map<A, B> &m, std::set<A> *s) { - KALDI_ASSERT(s != NULL); - s->clear(); - typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end(); - for (; miter != mend; ++miter) { - s->insert(s->end(), miter->first); - } -} - -/// Copies the values in a map to a set. -template<class A, class B> -void CopyMapValuesToSet(const std::map<A, B> &m, std::set<B> *s) { - KALDI_ASSERT(s != NULL); - s->clear(); - typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end(); - for (; miter != mend; ++miter) - s->insert(s->end(), miter->second); -} - - -/// Copies the contents of a vector to a set. -template<class A> -void CopyVectorToSet(const std::vector<A> &v, std::set<A> *s) { - KALDI_ASSERT(s != NULL); - s->clear(); - typename std::vector<A>::const_iterator iter = v.begin(), end = v.end(); - for (; iter != end; ++iter) - s->insert(s->end(), *iter); - // s->end() is a hint in case v was sorted. will work regardless. -} - -/// Deletes any non-NULL pointers in the vector v, and sets -/// the corresponding entries of v to NULL -template<class A> -void DeletePointers(std::vector<A*> *v) { - KALDI_ASSERT(v != NULL); - typename std::vector<A*>::iterator iter = v->begin(), end = v->end(); - for (; iter != end; ++iter) { - if (*iter != NULL) { - delete *iter; - *iter = NULL; // set to NULL for extra safety. - } - } -} - -/// Returns true if the vector of pointers contains NULL pointers. -template<class A> -bool ContainsNullPointers(const std::vector<A*> &v) { - typename std::vector<A*>::const_iterator iter = v.begin(), end = v.end(); - for (; iter != end; ++iter) - if (*iter == static_cast<A*> (NULL)) return true; - return false; -} - -/// Copies the contents a vector of one type to a vector -/// of another type. -template<typename A, typename B> -void CopyVectorToVector(const std::vector<A> &vec_in, std::vector<B> *vec_out) { - KALDI_ASSERT(vec_out != NULL); - vec_out->resize(vec_in.size()); - for (size_t i = 0; i < vec_in.size(); i++) - (*vec_out)[i] = static_cast<B> (vec_in[i]); -} - -/// A hashing function-object for vectors. -template<typename Int> -struct VectorHasher { // hashing function for vector<Int>. - size_t operator()(const std::vector<Int> &x) const { - size_t ans = 0; - typename std::vector<Int>::const_iterator iter = x.begin(), end = x.end(); - for (; iter != end; ++iter) { - ans *= kPrime; - ans += *iter; - } - return ans; - } - VectorHasher() { // Check we're instantiated with an integer type. - KALDI_ASSERT_IS_INTEGER_TYPE(Int); - } - private: - static const int kPrime = 7853; -}; - -/// A hashing function-object for pairs of ints -template<typename Int> -struct PairHasher { // hashing function for pair<int> - size_t operator()(const std::pair<Int,Int> &x) const { - return x.first + x.second * kPrime; - } - PairHasher() { // Check we're instantiated with an integer type. - KALDI_ASSERT_IS_INTEGER_TYPE(Int); - } - private: - static const int kPrime = 7853; -}; - - -/// A hashing function object for strings. -struct StringHasher { // hashing function for std::string - size_t operator()(const std::string &str) const { - size_t ans = 0, len = str.length(); - const char *c = str.c_str(), *end = c + len; - for (; c != end; c++) { - ans *= kPrime; - ans += *c; - } - return ans; - } - private: - static const int kPrime = 7853; -}; - -/// Reverses the contents of a vector. -template<typename T> -inline void ReverseVector(std::vector<T> *vec) { - KALDI_ASSERT(vec != NULL); - size_t sz = vec->size(); - for (size_t i = 0; i < sz/2; i++) - std::swap( (*vec)[i], (*vec)[sz-1-i]); -} - - -/// Comparator object for pairs that compares only the first pair. -template<class A, class B> -struct CompareFirstMemberOfPair { - inline bool operator() (const std::pair<A, B> &p1, - const std::pair<A, B> &p2) { - return p1.first < p2.first; - } -}; - -/// For a vector of pair<I, F> where I is an integer and F a floating-point or -/// integer type, this function sorts a vector of type vector<pair<I, F> > on -/// the I value and then merges elements with equal I values, summing these over -/// the F component and then removing any F component with zero value. This -/// is for where the vector of pairs represents a map from the integer to float -/// component, with an "adding" type of semantics for combining the elements. -template<typename I, typename F> -inline void MergePairVectorSumming(std::vector<std::pair<I, F> > *vec) { - KALDI_ASSERT_IS_INTEGER_TYPE(I); - CompareFirstMemberOfPair<I, F> c; - std::sort(vec->begin(), vec->end(), c); // sort on 1st element. - typename std::vector<std::pair<I, F> >::iterator out = vec->begin(), - in = vec->begin(), end = vec->end(); - while (in < end) { - // We reach this point only at the first element of - // each stretch of identical .first elements. - *out = *in; - ++in; - while (in < end && in->first == out->first) { - out->second += in->second; // this is the merge operation. - ++in; - } - if (out->second != static_cast<F>(0)) // Don't keep zero elements. - out++; - } - vec->erase(out, end); -} - -} // namespace kaldi - -#endif // KALDI_UTIL_STL_UTILS_H_ - diff --git a/kaldi_io/src/kaldi/util/table-types.h b/kaldi_io/src/kaldi/util/table-types.h deleted file mode 100644 index 313d1aa..0000000 --- a/kaldi_io/src/kaldi/util/table-types.h +++ /dev/null @@ -1,137 +0,0 @@ -// util/table-types.h - -// Copyright 2009-2011 Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - - -#ifndef KALDI_UTIL_TABLE_TYPES_H_ -#define KALDI_UTIL_TABLE_TYPES_H_ -#include "base/kaldi-common.h" -#include "util/kaldi-table.h" -#include "util/kaldi-holder.h" -#include "matrix/matrix-lib.h" - -namespace kaldi { - -// This header defines typedefs that are specific instantiations of -// the Table types. - -/// \addtogroup table_types -/// @{ - -typedef TableWriter<KaldiObjectHolder<Matrix<BaseFloat> > > BaseFloatMatrixWriter; -typedef SequentialTableReader<KaldiObjectHolder<Matrix<BaseFloat> > > SequentialBaseFloatMatrixReader; -typedef RandomAccessTableReader<KaldiObjectHolder<Matrix<BaseFloat> > > RandomAccessBaseFloatMatrixReader; -typedef RandomAccessTableReaderMapped<KaldiObjectHolder<Matrix<BaseFloat> > > RandomAccessBaseFloatMatrixReaderMapped; - -typedef TableWriter<KaldiObjectHolder<Matrix<double> > > DoubleMatrixWriter; -typedef SequentialTableReader<KaldiObjectHolder<Matrix<double> > > SequentialDoubleMatrixReader; -typedef RandomAccessTableReader<KaldiObjectHolder<Matrix<double> > > RandomAccessDoubleMatrixReader; -typedef RandomAccessTableReaderMapped<KaldiObjectHolder<Matrix<double> > > RandomAccessDoubleMatrixReaderMapped; - -typedef TableWriter<KaldiObjectHolder<CompressedMatrix> > CompressedMatrixWriter; - -typedef TableWriter<KaldiObjectHolder<Vector<BaseFloat> > > BaseFloatVectorWriter; -typedef SequentialTableReader<KaldiObjectHolder<Vector<BaseFloat> > > SequentialBaseFloatVectorReader; -typedef RandomAccessTableReader<KaldiObjectHolder<Vector<BaseFloat> > > RandomAccessBaseFloatVectorReader; -typedef RandomAccessTableReaderMapped<KaldiObjectHolder<Vector<BaseFloat> > > RandomAccessBaseFloatVectorReaderMapped; - -typedef TableWriter<KaldiObjectHolder<Vector<double> > > DoubleVectorWriter; -typedef SequentialTableReader<KaldiObjectHolder<Vector<double> > > SequentialDoubleVectorReader; -typedef RandomAccessTableReader<KaldiObjectHolder<Vector<double> > > RandomAccessDoubleVectorReader; - -typedef TableWriter<KaldiObjectHolder<CuMatrix<BaseFloat> > > BaseFloatCuMatrixWriter; -typedef SequentialTableReader<KaldiObjectHolder<CuMatrix<BaseFloat> > > SequentialBaseFloatCuMatrixReader; -typedef RandomAccessTableReader<KaldiObjectHolder<CuMatrix<BaseFloat> > > RandomAccessBaseFloatCuMatrixReader; -typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuMatrix<BaseFloat> > > RandomAccessBaseFloatCuMatrixReaderMapped; - -typedef TableWriter<KaldiObjectHolder<CuMatrix<double> > > DoubleCuMatrixWriter; -typedef SequentialTableReader<KaldiObjectHolder<CuMatrix<double> > > SequentialDoubleCuMatrixReader; -typedef RandomAccessTableReader<KaldiObjectHolder<CuMatrix<double> > > RandomAccessDoubleCuMatrixReader; -typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuMatrix<double> > > RandomAccessDoubleCuMatrixReaderMapped; - -typedef TableWriter<KaldiObjectHolder<CuVector<BaseFloat> > > BaseFloatCuVectorWriter; -typedef SequentialTableReader<KaldiObjectHolder<CuVector<BaseFloat> > > SequentialBaseFloatCuVectorReader; -typedef RandomAccessTableReader<KaldiObjectHolder<CuVector<BaseFloat> > > RandomAccessBaseFloatCuVectorReader; -typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuVector<BaseFloat> > > RandomAccessBaseFloatCuVectorReaderMapped; - -typedef TableWriter<KaldiObjectHolder<CuVector<double> > > DoubleCuVectorWriter; -typedef SequentialTableReader<KaldiObjectHolder<CuVector<double> > > SequentialDoubleCuVectorReader; -typedef RandomAccessTableReader<KaldiObjectHolder<CuVector<double> > > RandomAccessDoubleCuVectorReader; - - -typedef TableWriter<BasicHolder<int32> > Int32Writer; -typedef SequentialTableReader<BasicHolder<int32> > SequentialInt32Reader; -typedef RandomAccessTableReader<BasicHolder<int32> > RandomAccessInt32Reader; - -typedef TableWriter<BasicVectorHolder<int32> > Int32VectorWriter; -typedef SequentialTableReader<BasicVectorHolder<int32> > SequentialInt32VectorReader; -typedef RandomAccessTableReader<BasicVectorHolder<int32> > RandomAccessInt32VectorReader; - -typedef TableWriter<BasicVectorVectorHolder<int32> > Int32VectorVectorWriter; -typedef SequentialTableReader<BasicVectorVectorHolder<int32> > SequentialInt32VectorVectorReader; -typedef RandomAccessTableReader<BasicVectorVectorHolder<int32> > RandomAccessInt32VectorVectorReader; - -typedef TableWriter<BasicPairVectorHolder<int32> > Int32PairVectorWriter; -typedef SequentialTableReader<BasicPairVectorHolder<int32> > SequentialInt32PairVectorReader; -typedef RandomAccessTableReader<BasicPairVectorHolder<int32> > RandomAccessInt32PairVectorReader; - -typedef TableWriter<BasicPairVectorHolder<BaseFloat> > BaseFloatPairVectorWriter; -typedef SequentialTableReader<BasicPairVectorHolder<BaseFloat> > SequentialBaseFloatPairVectorReader; -typedef RandomAccessTableReader<BasicPairVectorHolder<BaseFloat> > RandomAccessBaseFloatPairVectorReader; - -typedef TableWriter<BasicHolder<BaseFloat> > BaseFloatWriter; -typedef SequentialTableReader<BasicHolder<BaseFloat> > SequentialBaseFloatReader; -typedef RandomAccessTableReader<BasicHolder<BaseFloat> > RandomAccessBaseFloatReader; -typedef RandomAccessTableReaderMapped<BasicHolder<BaseFloat> > RandomAccessBaseFloatReaderMapped; - -typedef TableWriter<BasicHolder<double> > DoubleWriter; -typedef SequentialTableReader<BasicHolder<double> > SequentialDoubleReader; -typedef RandomAccessTableReader<BasicHolder<double> > RandomAccessDoubleReader; - -typedef TableWriter<BasicHolder<bool> > BoolWriter; -typedef SequentialTableReader<BasicHolder<bool> > SequentialBoolReader; -typedef RandomAccessTableReader<BasicHolder<bool> > RandomAccessBoolReader; - - - -/// TokenWriter is a writer specialized for std::string where the strings -/// are nonempty and whitespace-free. T == std::string -typedef TableWriter<TokenHolder> TokenWriter; -typedef SequentialTableReader<TokenHolder> SequentialTokenReader; -typedef RandomAccessTableReader<TokenHolder> RandomAccessTokenReader; - - -/// TokenVectorWriter is a writer specialized for sequences of -/// std::string where the strings are nonempty and whitespace-free. -/// T == std::vector<std::string> -typedef TableWriter<TokenVectorHolder> TokenVectorWriter; -// Ditto for SequentialTokenVectorReader. -typedef SequentialTableReader<TokenVectorHolder> SequentialTokenVectorReader; -typedef RandomAccessTableReader<TokenVectorHolder> RandomAccessTokenVectorReader; - - -/// @} - -// Note: for FST reader/writer, see ../fstext/fstext-utils.h -// [not done yet]. - -} // end namespace kaldi - - - -#endif diff --git a/kaldi_io/src/kaldi/util/text-utils.h b/kaldi_io/src/kaldi/util/text-utils.h deleted file mode 100644 index 1d85c47..0000000 --- a/kaldi_io/src/kaldi/util/text-utils.h +++ /dev/null @@ -1,169 +0,0 @@ -// util/text-utils.h - -// Copyright 2009-2011 Saarland University; Microsoft Corporation - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -#ifndef KALDI_UTIL_TEXT_UTILS_H_ -#define KALDI_UTIL_TEXT_UTILS_H_ - -#include <algorithm> -#include <map> -#include <set> -#include <string> -#include <vector> -#include <errno.h> - -#include "base/kaldi-common.h" - -namespace kaldi { - -/// Split a string using any of the single character delimiters. -/// If omit_empty_strings == true, the output will contain any -/// nonempty strings after splitting on any of the -/// characters in the delimiter. If omit_empty_strings == false, -/// the output will contain n+1 strings if there are n characters -/// in the set "delim" within the input string. In this case -/// the empty string is split to a single empty string. -void SplitStringToVector(const std::string &full, const char *delim, - bool omit_empty_strings, - std::vector<std::string> *out); - -/// Joins the elements of a vector of strings into a single string using -/// "delim" as the delimiter. If omit_empty_strings == true, any empty strings -/// in the vector are skipped. A vector of empty strings results in an empty -/// string on the output. -void JoinVectorToString(const std::vector<std::string> &vec_in, - const char *delim, bool omit_empty_strings, - std::string *str_out); - - -/// Split a string (e.g. 1:2:3) into a vector of integers. -/// The delimiting char may be any character in "delim". -/// returns true on success, false on failure. -/// If omit_empty_strings == true, 1::2:3: will become -/// { 1, 2, 3 }. Otherwise it would be rejected. -/// Regardless of the value of omit_empty_strings, -/// the empty string is successfully parsed as an empty -/// vector of integers -template<class I> -bool SplitStringToIntegers(const std::string &full, - const char *delim, - bool omit_empty_strings, // typically false [but - // should probably be true - // if "delim" is spaces]. - std::vector<I> *out) { - KALDI_ASSERT(out != NULL); - KALDI_ASSERT_IS_INTEGER_TYPE(I); - if ( *(full.c_str()) == '\0') { - out->clear(); - return true; - } - std::vector<std::string> split; - SplitStringToVector(full, delim, omit_empty_strings, &split); - out->resize(split.size()); - for (size_t i = 0; i < split.size(); i++) { - const char *this_str = split[i].c_str(); - char *end = NULL; - long long int j = 0; - j = KALDI_STRTOLL(this_str, &end); - if (end == this_str || *end != '\0') { - out->clear(); - return false; - } else { - I jI = static_cast<I>(j); - if (static_cast<long long int>(jI) != j) { - // output type cannot fit this integer. - out->clear(); - return false; - } - (*out)[i] = jI; - } - } - return true; -} - -// This is defined for F = float and double. -template<class F> -bool SplitStringToFloats(const std::string &full, - const char *delim, - bool omit_empty_strings, // typically false - std::vector<F> *out); - - -/// Converts a string into an integer via strtoll and returns false if there was -/// any kind of problem (i.e. the string was not an integer or contained extra -/// non-whitespace junk, or the integer was too large to fit into the type it is -/// being converted into). Only sets *out if everything was OK and it returns -/// true. -template<class Int> -bool ConvertStringToInteger(const std::string &str, - Int *out) { - KALDI_ASSERT_IS_INTEGER_TYPE(Int); - const char *this_str = str.c_str(); - char *end = NULL; - errno = 0; - long long int i = KALDI_STRTOLL(this_str, &end); - if (end != this_str) - while (isspace(*end)) end++; - if (end == this_str || *end != '\0' || errno != 0) - return false; - Int iInt = static_cast<Int>(i); - if (static_cast<long long int>(iInt) != i || (i<0 && !std::numeric_limits<Int>::is_signed)) { - return false; - } - *out = iInt; - return true; -} - - -/// ConvertStringToReal converts a string into either float or double via strtod, -/// and returns false if there was any kind of problem (i.e. the string was not a -/// floating point number or contained extra non-whitespace junk. -/// Be careful- this function will successfully read inf's or nan's. -bool ConvertStringToReal(const std::string &str, - double *out); -bool ConvertStringToReal(const std::string &str, - float *out); - - -/// Removes the beginning and trailing whitespaces from a string -void Trim(std::string *str); - - -/// Removes leading and trailing white space from the string, then splits on the -/// first section of whitespace found (if present), putting the part before the -/// whitespace in "first" and the rest in "rest". If there is no such space, -/// everything that remains after removing leading and trailing whitespace goes -/// in "first". -void SplitStringOnFirstSpace(const std::string &line, - std::string *first, - std::string *rest); - - -/// Returns true if "token" is nonempty, and all characters are -/// printable and whitespace-free. -bool IsToken(const std::string &token); - - -/// Returns true if "line" is free of \n characters and unprintable -/// characters, and does not contain leading or trailing whitespace. -bool IsLine(const std::string &line); - - -} // namespace kaldi - -#endif // KALDI_UTIL_TEXT_UTILS_H_ diff --git a/kaldi_io/src/kaldi/util/timer.h b/kaldi_io/src/kaldi/util/timer.h deleted file mode 100644 index e3ee8d5..0000000 --- a/kaldi_io/src/kaldi/util/timer.h +++ /dev/null @@ -1,27 +0,0 @@ -// util/timer.h - -// Copyright 2014 Johns Hopkins University (author: Daniel Povey) - -// See ../../COPYING for clarification regarding multiple authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at - -// http://www.apache.org/licenses/LICENSE-2.0 - -// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -// MERCHANTABLITY OR NON-INFRINGEMENT. -// See the Apache 2 License for the specific language governing permissions and -// limitations under the License. - -// We are temporarily leaving this file to forward #includes to -// base-timer.h. Its use is deprecated; you should directrly -// #include base/timer.h -#ifndef KALDI_UTIL_TIMER_H_ -#define KALDI_UTIL_TIMER_H_ -#pragma message warning: please do not include util/timer.h, include base/timer.h (it has been moved) -#include "base/timer.h" -#endif |