summaryrefslogtreecommitdiff
path: root/kaldi_io/src/kaldi/util
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/src/kaldi/util')
-rw-r--r--kaldi_io/src/kaldi/util/basic-filebuf.h1065
-rw-r--r--kaldi_io/src/kaldi/util/common-utils.h31
-rw-r--r--kaldi_io/src/kaldi/util/const-integer-set-inl.h88
-rw-r--r--kaldi_io/src/kaldi/util/const-integer-set.h95
-rw-r--r--kaldi_io/src/kaldi/util/edit-distance-inl.h189
-rw-r--r--kaldi_io/src/kaldi/util/edit-distance.h63
-rw-r--r--kaldi_io/src/kaldi/util/hash-list-inl.h183
-rw-r--r--kaldi_io/src/kaldi/util/hash-list.h140
-rw-r--r--kaldi_io/src/kaldi/util/kaldi-holder-inl.h800
-rw-r--r--kaldi_io/src/kaldi/util/kaldi-holder.h207
-rw-r--r--kaldi_io/src/kaldi/util/kaldi-io-inl.h45
-rw-r--r--kaldi_io/src/kaldi/util/kaldi-io.h264
-rw-r--r--kaldi_io/src/kaldi/util/kaldi-pipebuf.h90
-rw-r--r--kaldi_io/src/kaldi/util/kaldi-table-inl.h2246
-rw-r--r--kaldi_io/src/kaldi/util/kaldi-table.h459
-rw-r--r--kaldi_io/src/kaldi/util/parse-options.h264
-rw-r--r--kaldi_io/src/kaldi/util/simple-io-funcs.h56
-rw-r--r--kaldi_io/src/kaldi/util/simple-options.h112
-rw-r--r--kaldi_io/src/kaldi/util/stl-utils.h327
-rw-r--r--kaldi_io/src/kaldi/util/table-types.h137
-rw-r--r--kaldi_io/src/kaldi/util/text-utils.h169
-rw-r--r--kaldi_io/src/kaldi/util/timer.h27
22 files changed, 0 insertions, 7057 deletions
diff --git a/kaldi_io/src/kaldi/util/basic-filebuf.h b/kaldi_io/src/kaldi/util/basic-filebuf.h
deleted file mode 100644
index cf2e079..0000000
--- a/kaldi_io/src/kaldi/util/basic-filebuf.h
+++ /dev/null
@@ -1,1065 +0,0 @@
-///////////////////////////////////////////////////////////////////////////////
-// This is a modified version of the std::basic_filebuf from libc++
-// (http://libcxx.llvm.org/).
-// It allows one to create basic_filebuf from an existing FILE* handle or file
-// descriptor.
-//
-// This file is dual licensed under the MIT and the University of Illinois Open
-// Source License licenses. See LICENSE.TXT for details (included at the
-// bottom).
-///////////////////////////////////////////////////////////////////////////////
-#ifndef KALDI_UTIL_BASIC_FILEBUF_H_
-#define KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-#include <fstream>
-#include <cstdio>
-#include <cstring>
-
-///////////////////////////////////////////////////////////////////////////////
-namespace kaldi
-{
-
-///////////////////////////////////////////////////////////////////////////////
-template <typename CharT, typename Traits = std::char_traits<CharT> >
-class basic_filebuf : public std::basic_streambuf<CharT, Traits>
-{
-public:
- typedef CharT char_type;
- typedef Traits traits_type;
- typedef typename traits_type::int_type int_type;
- typedef typename traits_type::pos_type pos_type;
- typedef typename traits_type::off_type off_type;
- typedef typename traits_type::state_type state_type;
-
- basic_filebuf();
- basic_filebuf(basic_filebuf&& rhs);
- virtual ~basic_filebuf();
-
- basic_filebuf& operator=(basic_filebuf&& rhs);
- void swap(basic_filebuf& rhs);
-
- bool is_open() const;
- basic_filebuf* open(const char* s, std::ios_base::openmode mode);
- basic_filebuf* open(const std::string& s, std::ios_base::openmode mode);
- basic_filebuf* open(int fd, std::ios_base::openmode mode);
- basic_filebuf* open(FILE* f, std::ios_base::openmode mode);
- basic_filebuf* close();
-
- FILE* file() { return this->_M_file; }
- int fd() { return fileno(this->_M_file); }
-
-protected:
- int_type underflow() override;
- int_type pbackfail(int_type c = traits_type::eof()) override;
- int_type overflow (int_type c = traits_type::eof()) override;
- std::basic_streambuf<char_type, traits_type>* setbuf(char_type* s, std::streamsize n) override;
- pos_type seekoff(off_type off, std::ios_base::seekdir way,
- std::ios_base::openmode wch = std::ios_base::in | std::ios_base::out) override;
- pos_type seekpos(pos_type sp,
- std::ios_base::openmode wch = std::ios_base::in | std::ios_base::out) override;
- int sync() override;
- void imbue(const std::locale& loc) override;
-
-protected:
- char* _M_extbuf;
- const char* _M_extbufnext;
- const char* _M_extbufend;
- char _M_extbuf_min[8];
- size_t _M_ebs;
- char_type* _M_intbuf;
- size_t _M_ibs;
- FILE* _M_file;
- const std::codecvt<char_type, char, state_type>* _M_cv;
- state_type _M_st;
- state_type _M_st_last;
- std::ios_base::openmode _M_om;
- std::ios_base::openmode _M_cm;
- bool _M_owns_eb;
- bool _M_owns_ib;
- bool _M_always_noconv;
-
- const char* _M_get_mode(std::ios_base::openmode mode);
- bool _M_read_mode();
- void _M_write_mode();
-};
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf()
- : _M_extbuf(nullptr),
- _M_extbufnext(nullptr),
- _M_extbufend(nullptr),
- _M_ebs(0),
- _M_intbuf(nullptr),
- _M_ibs(0),
- _M_file(nullptr),
- _M_cv(nullptr),
- _M_st(),
- _M_st_last(),
- _M_om(std::ios_base::openmode(0)),
- _M_cm(std::ios_base::openmode(0)),
- _M_owns_eb(false),
- _M_owns_ib(false),
- _M_always_noconv(false)
-{
- if (std::has_facet<std::codecvt<char_type, char, state_type> >(this->getloc()))
- {
- _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(this->getloc());
- _M_always_noconv = _M_cv->always_noconv();
- }
- setbuf(0, 4096);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::basic_filebuf(basic_filebuf&& rhs)
- : std::basic_streambuf<CharT, Traits>(rhs)
-{
- if (rhs._M_extbuf == rhs._M_extbuf_min)
- {
- _M_extbuf = _M_extbuf_min;
- _M_extbufnext = _M_extbuf + (rhs._M_extbufnext - rhs._M_extbuf);
- _M_extbufend = _M_extbuf + (rhs._M_extbufend - rhs._M_extbuf);
- }
- else
- {
- _M_extbuf = rhs._M_extbuf;
- _M_extbufnext = rhs._M_extbufnext;
- _M_extbufend = rhs._M_extbufend;
- }
- _M_ebs = rhs._M_ebs;
- _M_intbuf = rhs._M_intbuf;
- _M_ibs = rhs._M_ibs;
- _M_file = rhs._M_file;
- _M_cv = rhs._M_cv;
- _M_st = rhs._M_st;
- _M_st_last = rhs._M_st_last;
- _M_om = rhs._M_om;
- _M_cm = rhs._M_cm;
- _M_owns_eb = rhs._M_owns_eb;
- _M_owns_ib = rhs._M_owns_ib;
- _M_always_noconv = rhs._M_always_noconv;
- if (rhs.pbase())
- {
- if (rhs.pbase() == rhs._M_intbuf)
- this->setp(_M_intbuf, _M_intbuf + (rhs. epptr() - rhs.pbase()));
- else
- this->setp((char_type*)_M_extbuf,
- (char_type*)_M_extbuf + (rhs. epptr() - rhs.pbase()));
- this->pbump(rhs. pptr() - rhs.pbase());
- }
- else if (rhs.eback())
- {
- if (rhs.eback() == rhs._M_intbuf)
- this->setg(_M_intbuf, _M_intbuf + (rhs.gptr() - rhs.eback()),
- _M_intbuf + (rhs.egptr() - rhs.eback()));
- else
- this->setg((char_type*)_M_extbuf,
- (char_type*)_M_extbuf + (rhs.gptr() - rhs.eback()),
- (char_type*)_M_extbuf + (rhs.egptr() - rhs.eback()));
- }
- rhs._M_extbuf = nullptr;
- rhs._M_extbufnext = nullptr;
- rhs._M_extbufend = nullptr;
- rhs._M_ebs = 0;
- rhs._M_intbuf = nullptr;
- rhs._M_ibs = 0;
- rhs._M_file = nullptr;
- rhs._M_st = state_type();
- rhs._M_st_last = state_type();
- rhs._M_om = std::ios_base::openmode(0);
- rhs._M_cm = std::ios_base::openmode(0);
- rhs._M_owns_eb = false;
- rhs._M_owns_ib = false;
- rhs.setg(0, 0, 0);
- rhs.setp(0, 0);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline
-basic_filebuf<CharT, Traits>&
-basic_filebuf<CharT, Traits>::operator=(basic_filebuf&& rhs)
-{
- close();
- swap(rhs);
- return *this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>::~basic_filebuf()
-{
- // try
- // {
- // close();
- // }
- // catch (...)
- // {
- // }
- if (_M_owns_eb)
- delete [] _M_extbuf;
- if (_M_owns_ib)
- delete [] _M_intbuf;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void
-basic_filebuf<CharT, Traits>::swap(basic_filebuf& rhs)
-{
- std::basic_streambuf<char_type, traits_type>::swap(rhs);
- if (_M_extbuf != _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min)
- {
- std::swap(_M_extbuf, rhs._M_extbuf);
- std::swap(_M_extbufnext, rhs._M_extbufnext);
- std::swap(_M_extbufend, rhs._M_extbufend);
- }
- else
- {
- ptrdiff_t ln = _M_extbufnext - _M_extbuf;
- ptrdiff_t le = _M_extbufend - _M_extbuf;
- ptrdiff_t rn = rhs._M_extbufnext - rhs._M_extbuf;
- ptrdiff_t re = rhs._M_extbufend - rhs._M_extbuf;
- if (_M_extbuf == _M_extbuf_min && rhs._M_extbuf != rhs._M_extbuf_min)
- {
- _M_extbuf = rhs._M_extbuf;
- rhs._M_extbuf = rhs._M_extbuf_min;
- }
- else if (_M_extbuf != _M_extbuf_min && rhs._M_extbuf == rhs._M_extbuf_min)
- {
- rhs._M_extbuf = _M_extbuf;
- _M_extbuf = _M_extbuf_min;
- }
- _M_extbufnext = _M_extbuf + rn;
- _M_extbufend = _M_extbuf + re;
- rhs._M_extbufnext = rhs._M_extbuf + ln;
- rhs._M_extbufend = rhs._M_extbuf + le;
- }
- std::swap(_M_ebs, rhs._M_ebs);
- std::swap(_M_intbuf, rhs._M_intbuf);
- std::swap(_M_ibs, rhs._M_ibs);
- std::swap(_M_file, rhs._M_file);
- std::swap(_M_cv, rhs._M_cv);
- std::swap(_M_st, rhs._M_st);
- std::swap(_M_st_last, rhs._M_st_last);
- std::swap(_M_om, rhs._M_om);
- std::swap(_M_cm, rhs._M_cm);
- std::swap(_M_owns_eb, rhs._M_owns_eb);
- std::swap(_M_owns_ib, rhs._M_owns_ib);
- std::swap(_M_always_noconv, rhs._M_always_noconv);
- if (this->eback() == (char_type*)rhs._M_extbuf_min)
- {
- ptrdiff_t n = this->gptr() - this->eback();
- ptrdiff_t e = this->egptr() - this->eback();
- this->setg((char_type*)_M_extbuf_min,
- (char_type*)_M_extbuf_min + n,
- (char_type*)_M_extbuf_min + e);
- }
- else if (this->pbase() == (char_type*)rhs._M_extbuf_min)
- {
- ptrdiff_t n = this->pptr() - this->pbase();
- ptrdiff_t e = this->epptr() - this->pbase();
- this->setp((char_type*)_M_extbuf_min,
- (char_type*)_M_extbuf_min + e);
- this->pbump(n);
- }
- if (rhs.eback() == (char_type*)_M_extbuf_min)
- {
- ptrdiff_t n = rhs.gptr() - rhs.eback();
- ptrdiff_t e = rhs.egptr() - rhs.eback();
- rhs.setg((char_type*)rhs._M_extbuf_min,
- (char_type*)rhs._M_extbuf_min + n,
- (char_type*)rhs._M_extbuf_min + e);
- }
- else if (rhs.pbase() == (char_type*)_M_extbuf_min)
- {
- ptrdiff_t n = rhs.pptr() - rhs.pbase();
- ptrdiff_t e = rhs.epptr() - rhs.pbase();
- rhs.setp((char_type*)rhs._M_extbuf_min,
- (char_type*)rhs._M_extbuf_min + e);
- rhs.pbump(n);
- }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline
-void
-swap(basic_filebuf<CharT, Traits>& x, basic_filebuf<CharT, Traits>& y)
-{
- x.swap(y);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline
-bool
-basic_filebuf<CharT, Traits>::is_open() const
-{
- return _M_file != nullptr;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-const char* basic_filebuf<CharT, Traits>::_M_get_mode(std::ios_base::openmode mode)
-{
- switch ((mode & ~std::ios_base::ate) | 0)
- {
- case std::ios_base::out:
- case std::ios_base::out | std::ios_base::trunc:
- return "w";
- case std::ios_base::out | std::ios_base::app:
- case std::ios_base::app:
- return "a";
- break;
- case std::ios_base::in:
- return "r";
- case std::ios_base::in | std::ios_base::out:
- return "r+";
- case std::ios_base::in | std::ios_base::out | std::ios_base::trunc:
- return "w+";
- case std::ios_base::in | std::ios_base::out | std::ios_base::app:
- case std::ios_base::in | std::ios_base::app:
- return "a+";
- case std::ios_base::out | std::ios_base::binary:
- case std::ios_base::out | std::ios_base::trunc | std::ios_base::binary:
- return "wb";
- case std::ios_base::out | std::ios_base::app | std::ios_base::binary:
- case std::ios_base::app | std::ios_base::binary:
- return "ab";
- case std::ios_base::in | std::ios_base::binary:
- return "rb";
- case std::ios_base::in | std::ios_base::out | std::ios_base::binary:
- return "r+b";
- case std::ios_base::in | std::ios_base::out | std::ios_base::trunc | std::ios_base::binary:
- return "w+b";
- case std::ios_base::in | std::ios_base::out | std::ios_base::app | std::ios_base::binary:
- case std::ios_base::in | std::ios_base::app | std::ios_base::binary:
- return "a+b";
- default:
- return nullptr;
- }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>*
-basic_filebuf<CharT, Traits>::open(const char* s, std::ios_base::openmode mode)
-{
- basic_filebuf<CharT, Traits>* rt = nullptr;
- if (_M_file == nullptr)
- {
- const char* md= _M_get_mode(mode);
- if (md)
- {
- _M_file = fopen(s, md);
- if (_M_file)
- {
- rt = this;
- _M_om = mode;
- if (mode & std::ios_base::ate)
- {
- if (fseek(_M_file, 0, SEEK_END))
- {
- fclose(_M_file);
- _M_file = nullptr;
- rt = nullptr;
- }
- }
- }
- }
- }
- return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-inline
-basic_filebuf<CharT, Traits>*
-basic_filebuf<CharT, Traits>::open(const std::string& s, std::ios_base::openmode mode)
-{
- return open(s.c_str(), mode);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>*
-basic_filebuf<CharT, Traits>::open(int fd, std::ios_base::openmode mode)
-{
- const char* md= this->_M_get_mode(mode);
- if (md)
- {
- this->_M_file= fdopen(fd, md);
- this->_M_om = mode;
- return this;
- }
- else return nullptr;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>*
-basic_filebuf<CharT, Traits>::open(FILE* f, std::ios_base::openmode mode)
-{
- this->_M_file = f;
- this->_M_om = mode;
- return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-basic_filebuf<CharT, Traits>*
-basic_filebuf<CharT, Traits>::close()
-{
- basic_filebuf<CharT, Traits>* rt = nullptr;
- if (_M_file)
- {
- rt = this;
- std::unique_ptr<FILE, int(*)(FILE*)> h(_M_file, fclose);
- if (sync())
- rt = nullptr;
- if (fclose(h.release()) == 0)
- _M_file = nullptr;
- else
- rt = nullptr;
- }
- return rt;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::underflow()
-{
- if (_M_file == nullptr)
- return traits_type::eof();
- bool initial = _M_read_mode();
- char_type buf;
- if (this->gptr() == nullptr)
- this->setg(&buf, &buf+1, &buf+1);
- const size_t unget_sz = initial ? 0 : std::min<size_t>((this->egptr() - this->eback()) / 2, 4);
- int_type c = traits_type::eof();
- if (this->gptr() == this->egptr())
- {
- memmove(this->eback(), this->egptr() - unget_sz, unget_sz * sizeof(char_type));
- if (_M_always_noconv)
- {
- size_t nmemb = static_cast<size_t>(this->egptr() - this->eback() - unget_sz);
- nmemb = fread(this->eback() + unget_sz, 1, nmemb, _M_file);
- if (nmemb != 0)
- {
- this->setg(this->eback(),
- this->eback() + unget_sz,
- this->eback() + unget_sz + nmemb);
- c = traits_type::to_int_type(*this->gptr());
- }
- }
- else
- {
- memmove(_M_extbuf, _M_extbufnext, _M_extbufend - _M_extbufnext);
- _M_extbufnext = _M_extbuf + (_M_extbufend - _M_extbufnext);
- _M_extbufend = _M_extbuf + (_M_extbuf == _M_extbuf_min ? sizeof(_M_extbuf_min) : _M_ebs);
- size_t nmemb = std::min(static_cast<size_t>(_M_ibs - unget_sz),
- static_cast<size_t>(_M_extbufend - _M_extbufnext));
- std::codecvt_base::result r;
- _M_st_last = _M_st;
- size_t nr = fread((void*)_M_extbufnext, 1, nmemb, _M_file);
- if (nr != 0)
- {
- if (!_M_cv)
- throw std::bad_cast();
- _M_extbufend = _M_extbufnext + nr;
- char_type* inext;
- r = _M_cv->in(_M_st, _M_extbuf, _M_extbufend, _M_extbufnext,
- this->eback() + unget_sz,
- this->eback() + _M_ibs, inext);
- if (r == std::codecvt_base::noconv)
- {
- this->setg((char_type*)_M_extbuf, (char_type*)_M_extbuf, (char_type*)_M_extbufend);
- c = traits_type::to_int_type(*this->gptr());
- }
- else if (inext != this->eback() + unget_sz)
- {
- this->setg(this->eback(), this->eback() + unget_sz, inext);
- c = traits_type::to_int_type(*this->gptr());
- }
- }
- }
- }
- else
- c = traits_type::to_int_type(*this->gptr());
- if (this->eback() == &buf)
- this->setg(0, 0, 0);
- return c;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::pbackfail(int_type c)
-{
- if (_M_file && this->eback() < this->gptr())
- {
- if (traits_type::eq_int_type(c, traits_type::eof()))
- {
- this->gbump(-1);
- return traits_type::not_eof(c);
- }
- if ((_M_om & std::ios_base::out) ||
- traits_type::eq(traits_type::to_char_type(c), this->gptr()[-1]))
- {
- this->gbump(-1);
- *this->gptr() = traits_type::to_char_type(c);
- return c;
- }
- }
- return traits_type::eof();
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::int_type
-basic_filebuf<CharT, Traits>::overflow(int_type c)
-{
- if (_M_file == nullptr)
- return traits_type::eof();
- _M_write_mode();
- char_type buf;
- char_type* pb_save = this->pbase();
- char_type* epb_save = this->epptr();
- if (!traits_type::eq_int_type(c, traits_type::eof()))
- {
- if (this->pptr() == nullptr)
- this->setp(&buf, &buf+1);
- *this->pptr() = traits_type::to_char_type(c);
- this->pbump(1);
- }
- if (this->pptr() != this->pbase())
- {
- if (_M_always_noconv)
- {
- size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
- if (fwrite(this->pbase(), sizeof(char_type), nmemb, _M_file) != nmemb)
- return traits_type::eof();
- }
- else
- {
- char* extbe = _M_extbuf;
- std::codecvt_base::result r;
- do
- {
- if (!_M_cv)
- throw std::bad_cast();
- const char_type* e;
- r = _M_cv->out(_M_st, this->pbase(), this->pptr(), e,
- _M_extbuf, _M_extbuf + _M_ebs, extbe);
- if (e == this->pbase())
- return traits_type::eof();
- if (r == std::codecvt_base::noconv)
- {
- size_t nmemb = static_cast<size_t>(this->pptr() - this->pbase());
- if (fwrite(this->pbase(), 1, nmemb, _M_file) != nmemb)
- return traits_type::eof();
- }
- else if (r == std::codecvt_base::ok || r == std::codecvt_base::partial)
- {
- size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
- if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb)
- return traits_type::eof();
- if (r == std::codecvt_base::partial)
- {
- this->setp((char_type*)e, this->pptr());
- this->pbump(this->epptr() - this->pbase());
- }
- }
- else
- return traits_type::eof();
- } while (r == std::codecvt_base::partial);
- }
- this->setp(pb_save, epb_save);
- }
- return traits_type::not_eof(c);
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-std::basic_streambuf<CharT, Traits>*
-basic_filebuf<CharT, Traits>::setbuf(char_type* s, std::streamsize n)
-{
- this->setg(0, 0, 0);
- this->setp(0, 0);
- if (_M_owns_eb)
- delete [] _M_extbuf;
- if (_M_owns_ib)
- delete [] _M_intbuf;
- _M_ebs = n;
- if (_M_ebs > sizeof(_M_extbuf_min))
- {
- if (_M_always_noconv && s)
- {
- _M_extbuf = (char*)s;
- _M_owns_eb = false;
- }
- else
- {
- _M_extbuf = new char[_M_ebs];
- _M_owns_eb = true;
- }
- }
- else
- {
- _M_extbuf = _M_extbuf_min;
- _M_ebs = sizeof(_M_extbuf_min);
- _M_owns_eb = false;
- }
- if (!_M_always_noconv)
- {
- _M_ibs = std::max<std::streamsize>(n, sizeof(_M_extbuf_min));
- if (s && _M_ibs >= sizeof(_M_extbuf_min))
- {
- _M_intbuf = s;
- _M_owns_ib = false;
- }
- else
- {
- _M_intbuf = new char_type[_M_ibs];
- _M_owns_ib = true;
- }
- }
- else
- {
- _M_ibs = 0;
- _M_intbuf = 0;
- _M_owns_ib = false;
- }
- return this;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekoff(off_type off, std::ios_base::seekdir way,
- std::ios_base::openmode)
-{
- if (!_M_cv)
- throw std::bad_cast();
- int width = _M_cv->encoding();
- if (_M_file == nullptr || (width <= 0 && off != 0) || sync())
- return pos_type(off_type(-1));
- // width > 0 || off == 0
- int whence;
- switch (way)
- {
- case std::ios_base::beg:
- whence = SEEK_SET;
- break;
- case std::ios_base::cur:
- whence = SEEK_CUR;
- break;
- case std::ios_base::end:
- whence = SEEK_END;
- break;
- default:
- return pos_type(off_type(-1));
- }
-#if _WIN32
- if (fseek(_M_file, width > 0 ? width * off : 0, whence))
- return pos_type(off_type(-1));
- pos_type r = ftell(_M_file);
-#else
- if (fseeko(_M_file, width > 0 ? width * off : 0, whence))
- return pos_type(off_type(-1));
- pos_type r = ftello(_M_file);
-#endif
- r.state(_M_st);
- return r;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-typename basic_filebuf<CharT, Traits>::pos_type
-basic_filebuf<CharT, Traits>::seekpos(pos_type sp, std::ios_base::openmode)
-{
- if (_M_file == nullptr || sync())
- return pos_type(off_type(-1));
-#if _WIN32
- if (fseek(_M_file, sp, SEEK_SET))
- return pos_type(off_type(-1));
-#else
- if (fseeko(_M_file, sp, SEEK_SET))
- return pos_type(off_type(-1));
-#endif
- _M_st = sp.state();
- return sp;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-int
-basic_filebuf<CharT, Traits>::sync()
-{
- if (_M_file == nullptr)
- return 0;
- if (!_M_cv)
- throw std::bad_cast();
- if (_M_cm & std::ios_base::out)
- {
- if (this->pptr() != this->pbase())
- if (overflow() == traits_type::eof())
- return -1;
- std::codecvt_base::result r;
- do
- {
- char* extbe;
- r = _M_cv->unshift(_M_st, _M_extbuf, _M_extbuf + _M_ebs, extbe);
- size_t nmemb = static_cast<size_t>(extbe - _M_extbuf);
- if (fwrite(_M_extbuf, 1, nmemb, _M_file) != nmemb)
- return -1;
- } while (r == std::codecvt_base::partial);
- if (r == std::codecvt_base::error)
- return -1;
- if (fflush(_M_file))
- return -1;
- }
- else if (_M_cm & std::ios_base::in)
- {
- off_type c;
- state_type state = _M_st_last;
- bool update_st = false;
- if (_M_always_noconv)
- c = this->egptr() - this->gptr();
- else
- {
- int width = _M_cv->encoding();
- c = _M_extbufend - _M_extbufnext;
- if (width > 0)
- c += width * (this->egptr() - this->gptr());
- else
- {
- if (this->gptr() != this->egptr())
- {
- const int off = _M_cv->length(state, _M_extbuf,
- _M_extbufnext,
- this->gptr() - this->eback());
- c += _M_extbufnext - _M_extbuf - off;
- update_st = true;
- }
- }
- }
-#if _WIN32
- if (fseek(_M_file_, -c, SEEK_CUR))
- return -1;
-#else
- if (fseeko(_M_file, -c, SEEK_CUR))
- return -1;
-#endif
- if (update_st)
- _M_st = state;
- _M_extbufnext = _M_extbufend = _M_extbuf;
- this->setg(0, 0, 0);
- _M_cm = std::ios_base::openmode(0);
- }
- return 0;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void
-basic_filebuf<CharT, Traits>::imbue(const std::locale& loc)
-{
- sync();
- _M_cv = &std::use_facet<std::codecvt<char_type, char, state_type> >(loc);
- bool old_anc = _M_always_noconv;
- _M_always_noconv = _M_cv->always_noconv();
- if (old_anc != _M_always_noconv)
- {
- this->setg(0, 0, 0);
- this->setp(0, 0);
- // invariant, char_type is char, else we couldn't get here
- if (_M_always_noconv) // need to dump _M_intbuf
- {
- if (_M_owns_eb)
- delete [] _M_extbuf;
- _M_owns_eb = _M_owns_ib;
- _M_ebs = _M_ibs;
- _M_extbuf = (char*)_M_intbuf;
- _M_ibs = 0;
- _M_intbuf = nullptr;
- _M_owns_ib = false;
- }
- else // need to obtain an _M_intbuf.
- { // If _M_extbuf is user-supplied, use it, else new _M_intbuf
- if (!_M_owns_eb && _M_extbuf != _M_extbuf_min)
- {
- _M_ibs = _M_ebs;
- _M_intbuf = (char_type*)_M_extbuf;
- _M_owns_ib = false;
- _M_extbuf = new char[_M_ebs];
- _M_owns_eb = true;
- }
- else
- {
- _M_ibs = _M_ebs;
- _M_intbuf = new char_type[_M_ibs];
- _M_owns_ib = true;
- }
- }
- }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-bool
-basic_filebuf<CharT, Traits>::_M_read_mode()
-{
- if (!(_M_cm & std::ios_base::in))
- {
- this->setp(0, 0);
- if (_M_always_noconv)
- this->setg((char_type*)_M_extbuf,
- (char_type*)_M_extbuf + _M_ebs,
- (char_type*)_M_extbuf + _M_ebs);
- else
- this->setg(_M_intbuf, _M_intbuf + _M_ibs, _M_intbuf + _M_ibs);
- _M_cm = std::ios_base::in;
- return true;
- }
- return false;
-}
-
-///////////////////////////////////////////////////////////////////////////////
-template <class CharT, class Traits>
-void
-basic_filebuf<CharT, Traits>::_M_write_mode()
-{
- if (!(_M_cm & std::ios_base::out))
- {
- this->setg(0, 0, 0);
- if (_M_ebs > sizeof(_M_extbuf_min))
- {
- if (_M_always_noconv)
- this->setp((char_type*)_M_extbuf,
- (char_type*)_M_extbuf + (_M_ebs - 1));
- else
- this->setp(_M_intbuf, _M_intbuf + (_M_ibs - 1));
- }
- else
- this->setp(0, 0);
- _M_cm = std::ios_base::out;
- }
-}
-
-///////////////////////////////////////////////////////////////////////////////
-}
-
-///////////////////////////////////////////////////////////////////////////////
-#endif // KALDI_UTIL_BASIC_FILEBUF_H_
-
-///////////////////////////////////////////////////////////////////////////////
-
-/*
- * ============================================================================
- * libc++ License
- * ============================================================================
- *
- * The libc++ library is dual licensed under both the University of Illinois
- * "BSD-Like" license and the MIT license. As a user of this code you may
- * choose to use it under either license. As a contributor, you agree to allow
- * your code to be used under both.
- *
- * Full text of the relevant licenses is included below.
- *
- * ============================================================================
- *
- * University of Illinois/NCSA
- * Open Source License
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included below)
- *
- * All rights reserved.
- *
- * Developed by:
- *
- * LLVM Team
- *
- * University of Illinois at Urbana-Champaign
- *
- * http://llvm.org
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy of
- * this software and associated documentation files (the "Software"), to deal with
- * the Software without restriction, including without limitation the rights to
- * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
- * of the Software, and to permit persons to whom the Software is furnished to do
- * so, subject to the following conditions:
- *
- * * Redistributions of source code must retain the above copyright notice,
- * this list of conditions and the following disclaimers.
- *
- * * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimers in the
- * documentation and/or other materials provided with the distribution.
- *
- * * Neither the names of the LLVM Team, University of Illinois at
- * Urbana-Champaign, nor the names of its contributors may be used to
- * endorse or promote products derived from this Software without specific
- * prior written permission.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
- * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
- * SOFTWARE.
- *
- * ==============================================================================
- *
- * Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT (included below)
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- * ==============================================================================
- *
- * This file is a partial list of people who have contributed to the LLVM/libc++
- * project. If you have contributed a patch or made some other contribution to
- * LLVM/libc++, please submit a patch to this file to add yourself, and it will be
- * done!
- *
- * The list is sorted by surname and formatted to allow easy grepping and
- * beautification by scripts. The fields are: name (N), email (E), web-address
- * (W), PGP key ID and fingerprint (P), description (D), and snail-mail address
- * (S).
- *
- * N: Saleem Abdulrasool
- * D: Minor patches and Linux fixes.
- *
- * N: Dimitry Andric
- * D: Visibility fixes, minor FreeBSD portability patches.
- *
- * N: Holger Arnold
- * D: Minor fix.
- *
- * N: Ruben Van Boxem
- * E: vanboxem dot ruben at gmail dot com
- * D: Initial Windows patches.
- *
- * N: David Chisnall
- * E: theraven at theravensnest dot org
- * D: FreeBSD and Solaris ports, libcxxrt support, some atomics work.
- *
- * N: Marshall Clow
- * D: C++14 support, patches and bug fixes.
- *
- * N: Bill Fisher
- * D: Regex bug fixes.
- *
- * N: Matthew Dempsky
- * D: Minor patches and bug fixes.
- *
- * N: Google Inc.
- * D: Copyright owner and contributor of the CityHash algorithm
- *
- * N: Howard Hinnant
- * D: Architect and primary author of libc++
- *
- * N: Hyeon-bin Jeong
- * D: Minor patches and bug fixes.
- *
- * N: Argyrios Kyrtzidis
- * D: Bug fixes.
- *
- * N: Bruce Mitchener, Jr.
- * D: Emscripten-related changes.
- *
- * N: Michel Morin
- * D: Minor patches to is_convertible.
- *
- * N: Andrew Morrow
- * D: Minor patches and Linux fixes.
- *
- * N: Arvid Picciani
- * E: aep at exys dot org
- * D: Minor patches and musl port.
- *
- * N: Bjorn Reese
- * D: Initial regex prototype
- *
- * N: Nico Rieck
- * D: Windows fixes
- *
- * N: Jonathan Sauer
- * D: Minor patches, mostly related to constexpr
- *
- * N: Craig Silverstein
- * D: Implemented Cityhash as the string hash function on 64-bit machines
- *
- * N: Richard Smith
- * D: Minor patches.
- *
- * N: Joerg Sonnenberger
- * D: NetBSD port.
- *
- * N: Stephan Tolksdorf
- * D: Minor <atomic> fix
- *
- * N: Michael van der Westhuizen
- * E: r1mikey at gmail dot com
- *
- * N: Klaas de Vries
- * E: klaas at klaasgaaf dot nl
- * D: Minor bug fix.
- *
- * N: Zhang Xiongpang
- * D: Minor patches and bug fixes.
- *
- * N: Xing Xue
- * D: AIX port
- *
- * N: Zhihao Yuan
- * D: Standard compatibility fixes.
- *
- * N: Jeffrey Yasskin
- * D: Linux fixes.
- */
diff --git a/kaldi_io/src/kaldi/util/common-utils.h b/kaldi_io/src/kaldi/util/common-utils.h
deleted file mode 100644
index 9d39f9d..0000000
--- a/kaldi_io/src/kaldi/util/common-utils.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// util/common-utils.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-// http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_COMMON_UTILS_H_
-#define KALDI_UTIL_COMMON_UTILS_H_
-
-#include "base/kaldi-common.h"
-#include "util/parse-options.h"
-#include "util/kaldi-io.h"
-#include "util/simple-io-funcs.h"
-#include "util/kaldi-holder.h"
-#include "util/kaldi-table.h"
-#include "util/table-types.h"
-#include "util/text-utils.h"
-
-#endif
diff --git a/kaldi_io/src/kaldi/util/const-integer-set-inl.h b/kaldi_io/src/kaldi/util/const-integer-set-inl.h
deleted file mode 100644
index 8f92ab2..0000000
--- a/kaldi_io/src/kaldi/util/const-integer-set-inl.h
+++ /dev/null
@@ -1,88 +0,0 @@
-// util/const-integer-set-inl.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_INL_H_
-
-// Do not include this file directly. It is included by const-integer-set.h
-
-
-namespace kaldi {
-
-template<class I>
-void ConstIntegerSet<I>::InitInternal() {
- KALDI_ASSERT_IS_INTEGER_TYPE(I);
- quick_set_.clear(); // just in case we previously had data.
- if (slow_set_.size() == 0) {
- lowest_member_=(I) 1;
- highest_member_=(I) 0;
- contiguous_ = false;
- quick_ = false;
- } else {
- lowest_member_ = slow_set_.front();
- highest_member_ = slow_set_.back();
- size_t range = highest_member_ + 1 - lowest_member_;
- if (range == slow_set_.size()) {
- contiguous_ = true;
- quick_=false;
- } else {
- contiguous_ = false;
- if (range < slow_set_.size() * 8 * sizeof(I)) { // If it would be more compact to store as bool
- // (assuming 1 bit per element)...
- quick_set_.resize(range, false);
- for (size_t i = 0;i < slow_set_.size();i++)
- quick_set_[slow_set_[i] - lowest_member_] = true;
- quick_ = true;
- } else {
- quick_ = false;
- }
- }
- }
-}
-
-template<class I>
-int ConstIntegerSet<I>::count(I i) const {
- if (i < lowest_member_ || i > highest_member_) return 0;
- else {
- if (contiguous_) return true;
- if (quick_) return (quick_set_[i-lowest_member_] ? 1 : 0);
- else {
- bool ans = std::binary_search(slow_set_.begin(), slow_set_.end(), i);
- return (ans ? 1 : 0);
- }
- }
-}
-
-template<class I>
-void ConstIntegerSet<I>::Write(std::ostream &os, bool binary) const {
- WriteIntegerVector(os, binary, slow_set_);
-}
-
-template<class I>
-void ConstIntegerSet<I>::Read(std::istream &is, bool binary) {
- ReadIntegerVector(is, binary, &slow_set_);
- InitInternal();
-}
-
-
-
-} // end namespace kaldi
-
-#endif
diff --git a/kaldi_io/src/kaldi/util/const-integer-set.h b/kaldi_io/src/kaldi/util/const-integer-set.h
deleted file mode 100644
index ffdce4d..0000000
--- a/kaldi_io/src/kaldi/util/const-integer-set.h
+++ /dev/null
@@ -1,95 +0,0 @@
-// util/const-integer-set.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_UTIL_CONST_INTEGER_SET_H_
-#define KALDI_UTIL_CONST_INTEGER_SET_H_
-#include <vector>
-#include <set>
-#include <algorithm>
-#include <limits>
-#include <cassert>
-#include "util/stl-utils.h"
-
- /* ConstIntegerSet is a way to efficiently test whether something is in a
- supplied set of integers. It can be initialized from a vector or set, but
- never changed after that. It either uses a sorted vector or an array of
- bool, depending on the input. It behaves like a const version of an STL set, with
- only a subset of the functionality, except all the member functions are
- upper-case.
-
- Note that we could get rid of the member slow_set_, but we'd have to
- do more work to implement an iterator type. This would save memory.
- */
-
-namespace kaldi {
-
-template<class I> class ConstIntegerSet {
- public:
- ConstIntegerSet(): lowest_member_(1), highest_member_(0) { }
-
- void Init(const std::vector<I> &input) {
- slow_set_ = input;
- SortAndUniq(&slow_set_);
- InitInternal();
- }
-
- void Init(const std::set<I> &input) {
- CopySetToVector(input, &slow_set_);
- InitInternal();
- }
-
- explicit ConstIntegerSet(const std::vector<I> &input): slow_set_(input) {
- SortAndUniq(&slow_set_);
- InitInternal();
- }
- explicit ConstIntegerSet(const std::set<I> &input) {
- CopySetToVector(input, &slow_set_);
- InitInternal();
- }
- explicit ConstIntegerSet(const ConstIntegerSet<I> &other): slow_set_(other.slow_set_) {
- InitInternal();
- }
-
- int count(I i) const; // returns 1 or 0.
-
- typedef typename std::vector<I>::const_iterator iterator;
- iterator begin() const { return slow_set_.begin(); }
- iterator end() const { return slow_set_.end(); }
- size_t size() const { return slow_set_.size(); }
- bool empty() const { return slow_set_.empty(); }
-
- void Write(std::ostream &os, bool binary) const;
- void Read(std::istream &is, bool binary);
-
- private:
- I lowest_member_;
- I highest_member_;
- bool contiguous_;
- bool quick_;
- std::vector<bool> quick_set_;
- std::vector<I> slow_set_;
- void InitInternal();
-};
-
-} // end namespace kaldi
-
-#include "const-integer-set-inl.h"
-
-#endif
diff --git a/kaldi_io/src/kaldi/util/edit-distance-inl.h b/kaldi_io/src/kaldi/util/edit-distance-inl.h
deleted file mode 100644
index ebbfb71..0000000
--- a/kaldi_io/src/kaldi/util/edit-distance-inl.h
+++ /dev/null
@@ -1,189 +0,0 @@
-// util/edit-distance-inl.h
-
-// Copyright 2009-2011 Microsoft Corporation; Haihua Xu; Yanmin Qian
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-// http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_EDIT_DISTANCE_INL_H_
-#define KALDI_UTIL_EDIT_DISTANCE_INL_H_
-#include "util/stl-utils.h"
-
-
-namespace kaldi {
-
-template<class T>
-int32 LevenshteinEditDistance(const std::vector<T> &a,
- const std::vector<T> &b) {
- // Algorithm:
- // write A and B for the sequences, with elements a_0 ..
- // let |A| = M and |B| = N be the lengths, and have
- // elements a_0 ... a_{M-1} and b_0 ... b_{N-1}.
- // We are computing the recursion
- // E(m, n) = min( E(m-1, n-1) + (1-delta(a_{m-1}, b_{n-1})),
- // E(m-1, n),
- // E(m, n-1) ).
- // where E(m, n) is defined for m = 0..M and n = 0..N and out-of-
- // bounds quantities are considered to be infinity (i.e. the
- // recursion does not visit them).
-
- // We do this computation using a vector e of size N+1.
- // The outer iterations range over m = 0..M.
-
- int M = a.size(), N = b.size();
- std::vector<int32> e(N+1);
- std::vector<int32> e_tmp(N+1);
- // initialize e.
- for (size_t i = 0; i < e.size(); i++)
- e[i] = i;
- for (int32 m = 1; m <= M; m++) {
- // computing E(m, .) from E(m-1, .)
- // handle special case n = 0:
- e_tmp[0] = e[0] + 1;
-
- for (int32 n = 1; n <= N; n++) {
- int32 term1 = e[n-1] + (a[m-1] == b[n-1] ? 0 : 1);
- int32 term2 = e[n] + 1;
- int32 term3 = e_tmp[n-1] + 1;
- e_tmp[n] = std::min(term1, std::min(term2, term3));
- }
- e = e_tmp;
- }
- return e.back();
-}
-//
-struct error_stats{
- int32 ins_num;
- int32 del_num;
- int32 sub_num;
- int32 total_cost; // minimum total cost to the current alignment.
-};
-// Note that both hyp and ref should not contain noise word in
-// the following implementation.
-
-template<class T>
-int32 LevenshteinEditDistance(const std::vector<T> &ref,
- const std::vector<T> &hyp,
- int32 *ins, int32 *del, int32 *sub) {
- // temp sequence to remember error type and stats.
- std::vector<error_stats> e(ref.size()+1);
- std::vector<error_stats> cur_e(ref.size()+1);
- // initialize the first hypothesis aligned to the reference at each
- // position:[hyp_index =0][ref_index]
- for (size_t i =0; i < e.size(); i ++) {
- e[i].ins_num = 0;
- e[i].sub_num = 0;
- e[i].del_num = i;
- e[i].total_cost = i;
- }
-
- // for other alignments
- for (size_t hyp_index = 1; hyp_index <= hyp.size(); hyp_index ++) {
- cur_e[0] = e[0];
- cur_e[0].ins_num ++;
- cur_e[0].total_cost ++;
- for (size_t ref_index = 1; ref_index <= ref.size(); ref_index ++) {
-
- int32 ins_err = e[ref_index].total_cost + 1;
- int32 del_err = cur_e[ref_index-1].total_cost + 1;
- int32 sub_err = e[ref_index-1].total_cost;
- if (hyp[hyp_index-1] != ref[ref_index-1])
- sub_err ++;
-
- if (sub_err < ins_err && sub_err < del_err) {
- cur_e[ref_index] =e[ref_index-1];
- if (hyp[hyp_index-1] != ref[ref_index-1])
- cur_e[ref_index].sub_num ++; // substitution error should be increased
- cur_e[ref_index].total_cost = sub_err;
- }else if (del_err < ins_err ) {
- cur_e[ref_index] = cur_e[ref_index-1];
- cur_e[ref_index].total_cost = del_err;
- cur_e[ref_index].del_num ++; // deletion number is increased.
- }else{
- cur_e[ref_index] = e[ref_index];
- cur_e[ref_index].total_cost = ins_err;
- cur_e[ref_index].ins_num ++; // insertion number is increased.
- }
- }
- e = cur_e; // alternate for the next recursion.
- }
- size_t ref_index = e.size()-1;
- *ins = e[ref_index].ins_num, *del = e[ref_index].del_num, *sub = e[ref_index].sub_num;
- return e[ref_index].total_cost;
-}
-
-template<class T>
-int32 LevenshteinAlignment(const std::vector<T> &a,
- const std::vector<T> &b,
- T eps_symbol,
- std::vector<std::pair<T, T> > *output) {
- // Check inputs:
- {
- KALDI_ASSERT(output != NULL);
- for (size_t i = 0; i < a.size(); i++) KALDI_ASSERT(a[i] != eps_symbol);
- for (size_t i = 0; i < b.size(); i++) KALDI_ASSERT(b[i] != eps_symbol);
- }
- output->clear();
- // This is very memory-inefficiently implemented using a vector of vectors.
- size_t M = a.size(), N = b.size();
- size_t m, n;
- std::vector<std::vector<int32> > e(M+1);
- for (m = 0; m <=M; m++) e[m].resize(N+1);
- for (n = 0; n <= N; n++)
- e[0][n] = n;
- for (m = 1; m <= M; m++) {
- e[m][0] = e[m-1][0] + 1;
- for (n = 1; n <= N; n++) {
- int32 sub_or_ok = e[m-1][n-1] + (a[m-1] == b[n-1] ? 0 : 1);
- int32 del = e[m-1][n] + 1; // assumes a == ref, b == hyp.
- int32 ins = e[m][n-1] + 1;
- e[m][n] = std::min(sub_or_ok, std::min(del, ins));
- }
- }
- // get time-reversed output first: trace back.
- m = M; n = N;
- while (m != 0 || n != 0) {
- size_t last_m, last_n;
- if (m == 0) { last_m = m; last_n = n-1; }
- else if (n == 0) { last_m = m-1; last_n = n; }
- else {
- int32 sub_or_ok = e[m-1][n-1] + (a[m-1] == b[n-1] ? 0 : 1);
- int32 del = e[m-1][n] + 1; // assumes a == ref, b == hyp.
- int32 ins = e[m][n-1] + 1;
- if (sub_or_ok <= std::min(del, ins)) { // choose sub_or_ok if all else equal.
- last_m = m-1; last_n = n-1;
- } else {
- if (del <= ins) { // choose del over ins if equal.
- last_m = m-1; last_n = n;
- } else {
- last_m = m; last_n = n-1;
- }
- }
- }
- T a_sym, b_sym;
- a_sym = (last_m == m ? eps_symbol : a[last_m]);
- b_sym = (last_n == n ? eps_symbol : b[last_n]);
- output->push_back(std::make_pair(a_sym, b_sym));
- m = last_m;
- n = last_n;
- }
- ReverseVector(output);
- return e[M][N];
-}
-
-
-} // end namespace kaldi
-
-#endif // KALDI_UTIL_EDIT_DISTANCE_INL_H_
diff --git a/kaldi_io/src/kaldi/util/edit-distance.h b/kaldi_io/src/kaldi/util/edit-distance.h
deleted file mode 100644
index 6000622..0000000
--- a/kaldi_io/src/kaldi/util/edit-distance.h
+++ /dev/null
@@ -1,63 +0,0 @@
-// util/edit-distance.h
-
-// Copyright 2009-2011 Microsoft Corporation; Haihua Xu
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_UTIL_EDIT_DISTANCE_H_
-#define KALDI_UTIL_EDIT_DISTANCE_H_
-#include <vector>
-#include <set>
-#include <algorithm>
-#include <limits>
-#include <cassert>
-#include "base/kaldi-types.h"
-
-namespace kaldi {
-
-// Compute the edit-distance between two strings.
-template<class T>
-int32 LevenshteinEditDistance(const std::vector<T> &a,
- const std::vector<T> &b);
-
-
-// edit distance calculation with conventional method.
-// note: noise word must be filtered out from the hypothesis and reference sequence
-// before the following procedure conducted.
-template<class T>
-int32 LevenshteinEditDistance(const std::vector<T> &ref,
- const std::vector<T> &hyp,
- int32 *ins, int32 *del, int32 *sub);
-
-// This version of the edit-distance computation outputs the alignment
-// between the two. This is a vector of pairs of (symbol a, symbol b).
-// The epsilon symbol (eps_symbol) must not occur in sequences a or b.
-// Where one aligned to no symbol in the other (insertion or deletion),
-// epsilon will be the corresponding member of the pair.
-// It returns the edit-distance between the two strings.
-
-template<class T>
-int32 LevenshteinAlignment(const std::vector<T> &a,
- const std::vector<T> &b,
- T eps_symbol,
- std::vector<std::pair<T, T> > *output);
-
-} // end namespace kaldi
-
-#include "edit-distance-inl.h"
-
-#endif
diff --git a/kaldi_io/src/kaldi/util/hash-list-inl.h b/kaldi_io/src/kaldi/util/hash-list-inl.h
deleted file mode 100644
index 19c2bb6..0000000
--- a/kaldi_io/src/kaldi/util/hash-list-inl.h
+++ /dev/null
@@ -1,183 +0,0 @@
-// util/hash-list-inl.h
-
-// Copyright 2009-2011 Microsoft Corporation
-// 2013 Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_UTIL_HASH_LIST_INL_H_
-#define KALDI_UTIL_HASH_LIST_INL_H_
-
-// Do not include this file directly. It is included by fast-hash.h
-
-
-namespace kaldi {
-
-template<class I, class T> HashList<I, T>::HashList() {
- list_head_ = NULL;
- bucket_list_tail_ = static_cast<size_t>(-1); // invalid.
- hash_size_ = 0;
- freed_head_ = NULL;
-}
-
-template<class I, class T> void HashList<I, T>::SetSize(size_t size) {
- hash_size_ = size;
- KALDI_ASSERT(list_head_ == NULL && bucket_list_tail_ == static_cast<size_t>(-1)); // make sure empty.
- if (size > buckets_.size())
- buckets_.resize(size, HashBucket(0, NULL));
-}
-
-template<class I, class T>
-typename HashList<I, T>::Elem* HashList<I, T>::Clear() {
- // Clears the hashtable and gives ownership of the currently contained list to the
- // user.
- for (size_t cur_bucket = bucket_list_tail_;
- cur_bucket != static_cast<size_t>(-1);
- cur_bucket = buckets_[cur_bucket].prev_bucket) {
- buckets_[cur_bucket].last_elem = NULL; // this is how we indicate "empty".
- }
- bucket_list_tail_ = static_cast<size_t>(-1);
- Elem *ans = list_head_;
- list_head_ = NULL;
- return ans;
-}
-
-template<class I, class T>
-const typename HashList<I, T>::Elem* HashList<I, T>::GetList() const {
- return list_head_;
-}
-
-template<class I, class T>
-inline void HashList<I, T>::Delete(Elem *e) {
- e->tail = freed_head_;
- freed_head_ = e;
-}
-
-template<class I, class T>
-inline typename HashList<I, T>::Elem* HashList<I, T>::Find(I key) {
- size_t index = (static_cast<size_t>(key) % hash_size_);
- HashBucket &bucket = buckets_[index];
- if (bucket.last_elem == NULL) {
- return NULL; // empty bucket.
- } else {
- Elem *head = (bucket.prev_bucket == static_cast<size_t>(-1) ?
- list_head_ :
- buckets_[bucket.prev_bucket].last_elem->tail),
- *tail = bucket.last_elem->tail;
- for (Elem *e = head; e != tail; e = e->tail)
- if (e->key == key) return e;
- return NULL; // Not found.
- }
-}
-
-template<class I, class T>
-inline typename HashList<I, T>::Elem* HashList<I, T>::New() {
- if (freed_head_) {
- Elem *ans = freed_head_;
- freed_head_ = freed_head_->tail;
- return ans;
- } else {
- Elem *tmp = new Elem[allocate_block_size_];
- for (size_t i = 0; i+1 < allocate_block_size_; i++)
- tmp[i].tail = tmp+i+1;
- tmp[allocate_block_size_-1].tail = NULL;
- freed_head_ = tmp;
- allocated_.push_back(tmp);
- return this->New();
- }
-}
-
-template<class I, class T>
-HashList<I, T>::~HashList() {
- // First test whether we had any memory leak within the
- // HashList, i.e. things for which the user did not call Delete().
- size_t num_in_list = 0, num_allocated = 0;
- for (Elem *e = freed_head_; e != NULL; e = e->tail)
- num_in_list++;
- for (size_t i = 0; i < allocated_.size(); i++) {
- num_allocated += allocate_block_size_;
- delete[] allocated_[i];
- }
- if (num_in_list != num_allocated) {
- KALDI_WARN << "Possible memory leak: " << num_in_list
- << " != " << num_allocated
- << ": you might have forgotten to call Delete on "
- << "some Elems";
- }
-}
-
-
-template<class I, class T>
-void HashList<I, T>::Insert(I key, T val) {
- size_t index = (static_cast<size_t>(key) % hash_size_);
- HashBucket &bucket = buckets_[index];
- Elem *elem = New();
- elem->key = key;
- elem->val = val;
-
- if (bucket.last_elem == NULL) { // Unoccupied bucket. Insert at
- // head of bucket list (which is tail of regular list, they go in
- // opposite directions).
- if (bucket_list_tail_ == static_cast<size_t>(-1)) {
- // list was empty so this is the first elem.
- KALDI_ASSERT(list_head_ == NULL);
- list_head_ = elem;
- } else {
- // link in to the chain of Elems
- buckets_[bucket_list_tail_].last_elem->tail = elem;
- }
- elem->tail = NULL;
- bucket.last_elem = elem;
- bucket.prev_bucket = bucket_list_tail_;
- bucket_list_tail_ = index;
- } else {
- // Already-occupied bucket. Insert at tail of list of elements within
- // the bucket.
- elem->tail = bucket.last_elem->tail;
- bucket.last_elem->tail = elem;
- bucket.last_elem = elem;
- }
-}
-
-template<class I, class T>
-void HashList<I, T>::InsertMore(I key, T val) {
- size_t index = (static_cast<size_t>(key) % hash_size_);
- HashBucket &bucket = buckets_[index];
- Elem *elem = New();
- elem->key = key;
- elem->val = val;
-
- KALDI_ASSERT(bucket.last_elem != NULL); // we assume there is already one element
- if (bucket.last_elem->key == key) { // standard behavior: add as last element
- elem->tail = bucket.last_elem->tail;
- bucket.last_elem->tail = elem;
- bucket.last_elem = elem;
- return;
- }
- Elem *e = (bucket.prev_bucket == static_cast<size_t>(-1) ?
- list_head_ : buckets_[bucket.prev_bucket].last_elem->tail);
- // find place to insert in linked list
- while (e != bucket.last_elem->tail && e->key != key) e = e->tail;
- KALDI_ASSERT(e->key == key); // not found? - should not happen
- elem->tail = e->tail;
- e->tail = elem;
-}
-
-
-} // end namespace kaldi
-
-#endif
diff --git a/kaldi_io/src/kaldi/util/hash-list.h b/kaldi_io/src/kaldi/util/hash-list.h
deleted file mode 100644
index 4524759..0000000
--- a/kaldi_io/src/kaldi/util/hash-list.h
+++ /dev/null
@@ -1,140 +0,0 @@
-// util/hash-list.h
-
-// Copyright 2009-2011 Microsoft Corporation
-// 2013 Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_UTIL_HASH_LIST_H_
-#define KALDI_UTIL_HASH_LIST_H_
-#include <vector>
-#include <set>
-#include <algorithm>
-#include <limits>
-#include <cassert>
-#include "util/stl-utils.h"
-
-
-/* This header provides utilities for a structure that's used in a decoder (but
- is quite generic in nature so we implement and test it separately).
- Basically it's a singly-linked list, but implemented in such a way that we
- can quickly search for elements in the list. We give it a slightly richer
- interface than just a hash and a list. The idea is that we want to separate
- the hash part and the list part: basically, in the decoder, we want to have a
- single hash for the current frame and the next frame, because by the time we
- need to access the hash for the next frame we no longer need the hash for the
- previous frame. So we have an operation that clears the hash but leaves the
- list structure intact. We also control memory management inside this object,
- to avoid repeated new's/deletes.
-
- See hash-list-test.cc for an example of how to use this object.
-*/
-
-
-namespace kaldi {
-
-template<class I, class T> class HashList {
-
- public:
- struct Elem {
- I key;
- T val;
- Elem *tail;
- };
-
- /// Constructor takes no arguments. Call SetSize to inform it of the likely size.
- HashList();
-
- /// Clears the hash and gives the head of the current list to the user;
- /// ownership is transferred to the user (the user must call Delete()
- /// for each element in the list, at his/her leisure).
- Elem *Clear();
-
- /// Gives the head of the current list to the user. Ownership retained in the
- /// class. Caution: in December 2013 the return type was changed to const Elem*
- /// and this function was made const. You may need to change some types of
- /// local Elem* variables to const if this produces compilation errors.
- const Elem *GetList() const;
-
- /// Think of this like delete(). It is to be called for each Elem in turn
- /// after you "obtained ownership" by doing Clear(). This is not the opposite of
- /// Insert, it is the opposite of New. It's really a memory operation.
- inline void Delete(Elem *e);
-
- /// This should probably not be needed to be called directly by the user. Think of it as opposite
- /// to Delete();
- inline Elem *New();
-
- /// Find tries to find this element in the current list using the hashtable.
- /// It returns NULL if not present. The Elem it returns is not owned by the user,
- /// it is part of the internal list owned by this object, but the user is
- /// free to modify the "val" element.
- inline Elem *Find(I key);
-
- /// Insert inserts a new element into the hashtable/stored list. By calling this,
- /// the user asserts that it is not already present (e.g. Find was called and
- /// returned NULL). With current code, calling this if an element already exists will
- /// result in duplicate elements in the structure, and Find() will find the
- /// first one that was added. [but we don't guarantee this behavior].
- inline void Insert(I key, T val);
-
- /// Insert inserts another element with same key into the hashtable/stored list.
- /// By calling this, the user asserts that one element with that key is already present.
- /// We insert it that way, that all elements with the same key follow each other.
- /// Find() will return the first one of the elements with the same key.
- inline void InsertMore(I key, T val);
-
- /// SetSize tells the object how many hash buckets to allocate (should typically be
- /// at least twice the number of objects we expect to go in the structure, for fastest
- /// performance). It must be called while the hash is empty (e.g. after Clear() or
- /// after initializing the object, but before adding anything to the hash.
- void SetSize(size_t sz);
-
- /// Returns current number of hash buckets.
- inline size_t Size() { return hash_size_; }
-
- ~HashList();
- private:
-
- struct HashBucket {
- size_t prev_bucket; // index to next bucket (-1 if list tail). Note: list of buckets
- // goes in opposite direction to list of Elems.
- Elem *last_elem; // pointer to last element in this bucket (NULL if empty)
- inline HashBucket(size_t i, Elem *e): prev_bucket(i), last_elem(e) {}
- };
-
- Elem *list_head_; // head of currently stored list.
- size_t bucket_list_tail_; // tail of list of active hash buckets.
-
- size_t hash_size_; // number of hash buckets.
-
- std::vector<HashBucket> buckets_;
-
- Elem *freed_head_; // head of list of currently freed elements. [ready for allocation]
-
- std::vector<Elem*> allocated_; // list of allocated blocks.
-
- static const size_t allocate_block_size_ = 1024; // Number of Elements to allocate in one block. Must be
- // largish so storing allocated_ doesn't become a problem.
-};
-
-
-} // end namespace kaldi
-
-#include "hash-list-inl.h"
-
-#endif
diff --git a/kaldi_io/src/kaldi/util/kaldi-holder-inl.h b/kaldi_io/src/kaldi/util/kaldi-holder-inl.h
deleted file mode 100644
index 6a66e61..0000000
--- a/kaldi_io/src/kaldi/util/kaldi-holder-inl.h
+++ /dev/null
@@ -1,800 +0,0 @@
-// util/kaldi-holder-inl.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_UTIL_KALDI_HOLDER_INL_H_
-#define KALDI_UTIL_KALDI_HOLDER_INL_H_
-
-#include <algorithm>
-#include "util/kaldi-io.h"
-#include "util/text-utils.h"
-#include "matrix/kaldi-matrix.h"
-
-namespace kaldi {
-
-/// \addtogroup holders
-/// @{
-
-
-// KaldiObjectHolder is valid only for Kaldi objects with
-// copy constructors, default constructors, and "normal"
-// Kaldi Write and Read functions. E.g. it works for
-// Matrix and Vector.
-template<class KaldiType> class KaldiObjectHolder {
- public:
- typedef KaldiType T;
-
- KaldiObjectHolder(): t_(NULL) { }
-
- static bool Write(std::ostream &os, bool binary, const T &t) {
- InitKaldiOutputStream(os, binary); // Puts binary header if binary mode.
- try {
- t.Write(os, binary);
- return os.good();
- } catch (const std::exception &e) {
- KALDI_WARN << "Exception caught writing Table object: " << e.what();
- if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
- return false; // Write failure.
- }
- }
-
- void Clear() {
- if (t_) {
- delete t_;
- t_ = NULL;
- }
- }
-
- // Reads into the holder.
- bool Read(std::istream &is) {
- if (t_) delete t_;
- t_ = new T;
- // Don't want any existing state to complicate the read functioN: get new object.
- bool is_binary;
- if (!InitKaldiInputStream(is, &is_binary)) {
- KALDI_WARN << "Reading Table object, failed reading binary header\n";
- return false;
- }
- try {
- t_->Read(is, is_binary);
- return true;
- } catch (std::exception &e) {
- KALDI_WARN << "Exception caught reading Table object ";
- if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
- delete t_;
- t_ = NULL;
- return false;
- }
- }
-
- // Kaldi objects always have the stream open in binary mode for
- // reading.
- static bool IsReadInBinary() { return true; }
-
- const T &Value() const {
- // code error if !t_.
- if (!t_) KALDI_ERR << "KaldiObjectHolder::Value() called wrongly.";
- return *t_;
- }
-
- ~KaldiObjectHolder() { if (t_) delete t_; }
- private:
- KALDI_DISALLOW_COPY_AND_ASSIGN(KaldiObjectHolder);
- T *t_;
-};
-
-
-// BasicHolder is valid for float, double, bool, and integer
-// types. There will be a compile time error otherwise, because
-// we make sure that the {Write, Read}BasicType functions do not
-// get instantiated for other types.
-
-template<class BasicType> class BasicHolder {
- public:
- typedef BasicType T;
-
- BasicHolder(): t_(static_cast<T>(-1)) { }
-
- static bool Write(std::ostream &os, bool binary, const T &t) {
- InitKaldiOutputStream(os, binary); // Puts binary header if binary mode.
- try {
- WriteBasicType(os, binary, t);
- if (!binary) os << '\n'; // Makes output format more readable and
- // easier to manipulate.
- return os.good();
- } catch (const std::exception &e) {
- KALDI_WARN << "Exception caught writing Table object: " << e.what();
- if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
- return false; // Write failure.
- }
- }
-
- void Clear() { }
-
- // Reads into the holder.
- bool Read(std::istream &is) {
- bool is_binary;
- if (!InitKaldiInputStream(is, &is_binary)) {
- KALDI_WARN << "Reading Table object [integer type], failed reading binary header\n";
- return false;
- }
- try {
- int c;
- if (!is_binary) { // This is to catch errors, the class would work without it..
- // Eat up any whitespace and make sure it's not newline.
- while (isspace((c = is.peek())) && c != static_cast<int>('\n')) is.get();
- if (is.peek() == '\n') {
- KALDI_WARN << "Found newline but expected basic type.";
- return false; // This is just to catch a more-
- // likely-than average type of error (empty line before the token), since
- // ReadBasicType will eat it up.
- }
- }
-
- ReadBasicType(is, is_binary, &t_);
-
- if (!is_binary) { // This is to catch errors, the class would work without it..
- // make sure there is a newline.
- while (isspace((c = is.peek())) && c != static_cast<int>('\n')) is.get();
- if (is.peek() != '\n') {
- KALDI_WARN << "BasicHolder::Read, expected newline, got "
- << CharToString(is.peek()) << ", position " << is.tellg();
- return false;
- }
- is.get(); // Consume the newline.
- }
- return true;
- } catch (std::exception &e) {
- KALDI_WARN << "Exception caught reading Table object";
- if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
- return false;
- }
- }
-
- // Objects read/written with the Kaldi I/O functions always have the stream
- // open in binary mode for reading.
- static bool IsReadInBinary() { return true; }
-
- const T &Value() const {
- return t_;
- }
-
- ~BasicHolder() { }
- private:
- KALDI_DISALLOW_COPY_AND_ASSIGN(BasicHolder);
-
- T t_;
-};
-
-
-/// A Holder for a vector of basic types, e.g.
-/// std::vector<int32>, std::vector<float>, and so on.
-/// Note: a basic type is defined as a type for which ReadBasicType
-/// and WriteBasicType are implemented, i.e. integer and floating
-/// types, and bool.
-template<class BasicType> class BasicVectorHolder {
- public:
- typedef std::vector<BasicType> T;
-
- BasicVectorHolder() { }
-
- static bool Write(std::ostream &os, bool binary, const T &t) {
- InitKaldiOutputStream(os, binary); // Puts binary header if binary mode.
- try {
- if (binary) { // need to write the size, in binary mode.
- KALDI_ASSERT(static_cast<size_t>(static_cast<int32>(t.size())) == t.size());
- // Or this Write routine cannot handle such a large vector.
- // use int32 because it's fixed size regardless of compilation.
- // change to int64 (plus in Read function) if this becomes a problem.
- WriteBasicType(os, binary, static_cast<int32>(t.size()));
- for (typename std::vector<BasicType>::const_iterator iter = t.begin();
- iter != t.end(); ++iter)
- WriteBasicType(os, binary, *iter);
-
- } else {
- for (typename std::vector<BasicType>::const_iterator iter = t.begin();
- iter != t.end(); ++iter)
- WriteBasicType(os, binary, *iter);
- os << '\n'; // Makes output format more readable and
- // easier to manipulate. In text mode, this function writes something like
- // "1 2 3\n".
- }
- return os.good();
- } catch (const std::exception &e) {
- KALDI_WARN << "Exception caught writing Table object (BasicVector). ";
- if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
- return false; // Write failure.
- }
- }
-
- void Clear() { t_.clear(); }
-
- // Reads into the holder.
- bool Read(std::istream &is) {
- t_.clear();
- bool is_binary;
- if (!InitKaldiInputStream(is, &is_binary)) {
- KALDI_WARN << "Reading Table object [integer type], failed reading binary header\n";
- return false;
- }
- if (!is_binary) {
- // In text mode, we terminate with newline.
- std::string line;
- getline(is, line); // this will discard the \n, if present.
- if (is.fail()) {
- KALDI_WARN << "BasicVectorHolder::Read, error reading line " << (is.eof() ? "[eof]" : "");
- return false; // probably eof. fail in any case.
- }
- std::istringstream line_is(line);
- try {
- while (1) {
- line_is >> std::ws; // eat up whitespace.
- if (line_is.eof()) break;
- BasicType bt;
- ReadBasicType(line_is, false, &bt);
- t_.push_back(bt);
- }
- return true;
- } catch(std::exception &e) {
- KALDI_WARN << "BasicVectorHolder::Read, could not interpret line: " << line;
- if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
- return false;
- }
- } else { // binary mode.
- size_t filepos = is.tellg();
- try {
- int32 size;
- ReadBasicType(is, true, &size);
- t_.resize(size);
- for (typename std::vector<BasicType>::iterator iter = t_.begin();
- iter != t_.end();
- ++iter) {
- ReadBasicType(is, true, &(*iter));
- }
- return true;
- } catch (...) {
- KALDI_WARN << "BasicVectorHolder::Read, read error or unexpected data at archive entry beginning at file position " << filepos;
- return false;
- }
- }
- }
-
- // Objects read/written with the Kaldi I/O functions always have the stream
- // open in binary mode for reading.
- static bool IsReadInBinary() { return true; }
-
- const T &Value() const { return t_; }
-
- ~BasicVectorHolder() { }
- private:
- KALDI_DISALLOW_COPY_AND_ASSIGN(BasicVectorHolder);
- T t_;
-};
-
-
-/// BasicVectorVectorHolder is a Holder for a vector of vector of
-/// a basic type, e.g. std::vector<std::vector<int32> >.
-/// Note: a basic type is defined as a type for which ReadBasicType
-/// and WriteBasicType are implemented, i.e. integer and floating
-/// types, and bool.
-template<class BasicType> class BasicVectorVectorHolder {
- public:
- typedef std::vector<std::vector<BasicType> > T;
-
- BasicVectorVectorHolder() { }
-
- static bool Write(std::ostream &os, bool binary, const T &t) {
- InitKaldiOutputStream(os, binary); // Puts binary header if binary mode.
- try {
- if (binary) { // need to write the size, in binary mode.
- KALDI_ASSERT(static_cast<size_t>(static_cast<int32>(t.size())) == t.size());
- // Or this Write routine cannot handle such a large vector.
- // use int32 because it's fixed size regardless of compilation.
- // change to int64 (plus in Read function) if this becomes a problem.
- WriteBasicType(os, binary, static_cast<int32>(t.size()));
- for (typename std::vector<std::vector<BasicType> >::const_iterator iter = t.begin();
- iter != t.end(); ++iter) {
- KALDI_ASSERT(static_cast<size_t>(static_cast<int32>(iter->size())) == iter->size());
- WriteBasicType(os, binary, static_cast<int32>(iter->size()));
- for (typename std::vector<BasicType>::const_iterator iter2=iter->begin();
- iter2 != iter->end(); ++iter2) {
- WriteBasicType(os, binary, *iter2);
- }
- }
- } else { // text mode...
- // In text mode, we write out something like (for integers):
- // "1 2 3 ; 4 5 ; 6 ; ; 7 8 9 ;\n"
- // where the semicolon is a terminator, not a separator
- // (a separator would cause ambiguity between an
- // empty list, and a list containing a single empty list).
- for (typename std::vector<std::vector<BasicType> >::const_iterator iter = t.begin();
- iter != t.end();
- ++iter) {
- for (typename std::vector<BasicType>::const_iterator iter2=iter->begin();
- iter2 != iter->end(); ++iter2)
- WriteBasicType(os, binary, *iter2);
- os << "; ";
- }
- os << '\n';
- }
- return os.good();
- } catch (const std::exception &e) {
- KALDI_WARN << "Exception caught writing Table object. ";
- if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
- return false; // Write failure.
- }
- }
-
- void Clear() { t_.clear(); }
-
- // Reads into the holder.
- bool Read(std::istream &is) {
- t_.clear();
- bool is_binary;
- if (!InitKaldiInputStream(is, &is_binary)) {
- KALDI_WARN << "Failed reading binary header\n";
- return false;
- }
- if (!is_binary) {
- // In text mode, we terminate with newline.
- try { // catching errors from ReadBasicType..
- std::vector<BasicType> v; // temporary vector
- while (1) {
- int i = is.peek();
- if (i == -1) {
- KALDI_WARN << "Unexpected EOF";
- return false;
- } else if (static_cast<char>(i) == '\n') {
- if (!v.empty()) {
- KALDI_WARN << "No semicolon before newline (wrong format)";
- return false;
- } else { is.get(); return true; }
- } else if (std::isspace(i)) {
- is.get();
- } else if (static_cast<char>(i) == ';') {
- t_.push_back(v);
- v.clear();
- is.get();
- } else { // some object we want to read...
- BasicType b;
- ReadBasicType(is, false, &b); // throws on error.
- v.push_back(b);
- }
- }
- } catch(std::exception &e) {
- KALDI_WARN << "BasicVectorVectorHolder::Read, read error";
- if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
- return false;
- }
- } else { // binary mode.
- size_t filepos = is.tellg();
- try {
- int32 size;
- ReadBasicType(is, true, &size);
- t_.resize(size);
- for (typename std::vector<std::vector<BasicType> >::iterator iter = t_.begin();
- iter != t_.end();
- ++iter) {
- int32 size2;
- ReadBasicType(is, true, &size2);
- iter->resize(size2);
- for (typename std::vector<BasicType>::iterator iter2 = iter->begin();
- iter2 != iter->end();
- ++iter2)
- ReadBasicType(is, true, &(*iter2));
- }
- return true;
- } catch (...) {
- KALDI_WARN << "Read error or unexpected data at archive entry beginning at file position " << filepos;
- return false;
- }
- }
- }
-
- // Objects read/written with the Kaldi I/O functions always have the stream
- // open in binary mode for reading.
- static bool IsReadInBinary() { return true; }
-
- const T &Value() const { return t_; }
-
- ~BasicVectorVectorHolder() { }
- private:
- KALDI_DISALLOW_COPY_AND_ASSIGN(BasicVectorVectorHolder);
- T t_;
-};
-
-
-/// BasicPairVectorHolder is a Holder for a vector of pairs of
-/// a basic type, e.g. std::vector<std::pair<int32> >.
-/// Note: a basic type is defined as a type for which ReadBasicType
-/// and WriteBasicType are implemented, i.e. integer and floating
-/// types, and bool.
-template<class BasicType> class BasicPairVectorHolder {
- public:
- typedef std::vector<std::pair<BasicType, BasicType> > T;
-
- BasicPairVectorHolder() { }
-
- static bool Write(std::ostream &os, bool binary, const T &t) {
- InitKaldiOutputStream(os, binary); // Puts binary header if binary mode.
- try {
- if (binary) { // need to write the size, in binary mode.
- KALDI_ASSERT(static_cast<size_t>(static_cast<int32>(t.size())) == t.size());
- // Or this Write routine cannot handle such a large vector.
- // use int32 because it's fixed size regardless of compilation.
- // change to int64 (plus in Read function) if this becomes a problem.
- WriteBasicType(os, binary, static_cast<int32>(t.size()));
- for (typename T::const_iterator iter = t.begin();
- iter != t.end(); ++iter) {
- WriteBasicType(os, binary, iter->first);
- WriteBasicType(os, binary, iter->second);
- }
- } else { // text mode...
- // In text mode, we write out something like (for integers):
- // "1 2 ; 4 5 ; 6 7 ; 8 9 \n"
- // where the semicolon is a separator, not a terminator.
- for (typename T::const_iterator iter = t.begin();
- iter != t.end();) {
- WriteBasicType(os, binary, iter->first);
- WriteBasicType(os, binary, iter->second);
- ++iter;
- if (iter != t.end())
- os << "; ";
- }
- os << '\n';
- }
- return os.good();
- } catch (const std::exception &e) {
- KALDI_WARN << "Exception caught writing Table object. ";
- if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
- return false; // Write failure.
- }
- }
-
- void Clear() { t_.clear(); }
-
- // Reads into the holder.
- bool Read(std::istream &is) {
- t_.clear();
- bool is_binary;
- if (!InitKaldiInputStream(is, &is_binary)) {
- KALDI_WARN << "Reading Table object [integer type], failed reading binary header\n";
- return false;
- }
- if (!is_binary) {
- // In text mode, we terminate with newline.
- try { // catching errors from ReadBasicType..
- std::vector<BasicType> v; // temporary vector
- while (1) {
- int i = is.peek();
- if (i == -1) {
- KALDI_WARN << "Unexpected EOF";
- return false;
- } else if (static_cast<char>(i) == '\n') {
- if (t_.empty() && v.empty()) {
- is.get();
- return true;
- } else if (v.size() == 2) {
- t_.push_back(std::make_pair(v[0], v[1]));
- is.get();
- return true;
- } else {
- KALDI_WARN << "Unexpected newline, reading vector<pair<?> >; got "
- << v.size() << " elements, expected 2.";
- return false;
- }
- } else if (std::isspace(i)) {
- is.get();
- } else if (static_cast<char>(i) == ';') {
- if (v.size() != 2) {
- KALDI_WARN << "Wrong input format, reading vector<pair<?> >; got "
- << v.size() << " elements, expected 2.";
- return false;
- }
- t_.push_back(std::make_pair(v[0], v[1]));
- v.clear();
- is.get();
- } else { // some object we want to read...
- BasicType b;
- ReadBasicType(is, false, &b); // throws on error.
- v.push_back(b);
- }
- }
- } catch(std::exception &e) {
- KALDI_WARN << "BasicPairVectorHolder::Read, read error";
- if (!IsKaldiError(e.what())) { std::cerr << e.what(); }
- return false;
- }
- } else { // binary mode.
- size_t filepos = is.tellg();
- try {
- int32 size;
- ReadBasicType(is, true, &size);
- t_.resize(size);
- for (typename T::iterator iter = t_.begin();
- iter != t_.end();
- ++iter) {
- ReadBasicType(is, true, &(iter->first));
- ReadBasicType(is, true, &(iter->second));
- }
- return true;
- } catch (...) {
- KALDI_WARN << "BasicVectorHolder::Read, read error or unexpected data at archive entry beginning at file position " << filepos;
- return false;
- }
- }
- }
-
- // Objects read/written with the Kaldi I/O functions always have the stream
- // open in binary mode for reading.
- static bool IsReadInBinary() { return true; }
-
- const T &Value() const { return t_; }
-
- ~BasicPairVectorHolder() { }
- private:
- KALDI_DISALLOW_COPY_AND_ASSIGN(BasicPairVectorHolder);
- T t_;
-};
-
-
-
-
-// We define a Token as a nonempty, printable, whitespace-free std::string.
-// The binary and text formats here are the same (newline-terminated)
-// and as such we don't bother with the binary-mode headers.
-class TokenHolder {
- public:
- typedef std::string T;
-
- TokenHolder() {}
-
- static bool Write(std::ostream &os, bool, const T &t) { // ignore binary-mode.
- KALDI_ASSERT(IsToken(t));
- os << t << '\n';
- return os.good();
- }
-
- void Clear() { t_.clear(); }
-
- // Reads into the holder.
- bool Read(std::istream &is) {
- is >> t_;
- if (is.fail()) return false;
- char c;
- while (isspace(c = is.peek()) && c!= '\n') is.get();
- if (is.peek() != '\n') {
- KALDI_ERR << "TokenHolder::Read, expected newline, got char " << CharToString(is.peek())
- << ", at stream pos " << is.tellg();
- return false;
- }
- is.get(); // get '\n'
- return true;
- }
-
-
- // Since this is fundamentally a text format, read in text mode (would work
- // fine either way, but doing it this way will exercise more of the code).
- static bool IsReadInBinary() { return false; }
-
- const T &Value() const { return t_; }
-
- ~TokenHolder() { }
- private:
- KALDI_DISALLOW_COPY_AND_ASSIGN(TokenHolder);
- T t_;
-};
-
-// A Token is a nonempty, whitespace-free std::string.
-// Class TokenVectorHolder is a Holder class for vectors of these.
-class TokenVectorHolder {
- public:
- typedef std::vector<std::string> T;
-
- TokenVectorHolder() { }
-
- static bool Write(std::ostream &os, bool, const T &t) { // ignore binary-mode.
- for (std::vector<std::string>::const_iterator iter = t.begin();
- iter != t.end();
- ++iter) {
- KALDI_ASSERT(IsToken(*iter)); // make sure it's whitespace-free, printable and nonempty.
- os << *iter << ' ';
- }
- os << '\n';
- return os.good();
- }
-
- void Clear() { t_.clear(); }
-
-
- // Reads into the holder.
- bool Read(std::istream &is) {
- t_.clear();
-
- // there is no binary/non-binary mode.
-
- std::string line;
- getline(is, line); // this will discard the \n, if present.
- if (is.fail()) {
- KALDI_WARN << "BasicVectorHolder::Read, error reading line " << (is.eof() ? "[eof]" : "");
- return false; // probably eof. fail in any case.
- }
- const char *white_chars = " \t\n\r\f\v";
- SplitStringToVector(line, white_chars, true, &t_); // true== omit empty strings e.g.
- // between spaces.
- return true;
- }
-
- // Read in text format since it's basically a text-mode thing.. doesn't really matter,
- // it would work either way since we ignore the extra '\r'.
- static bool IsReadInBinary() { return false; }
-
- const T &Value() const { return t_; }
-
- private:
- KALDI_DISALLOW_COPY_AND_ASSIGN(TokenVectorHolder);
- T t_;
-};
-
-
-class HtkMatrixHolder {
- public:
- typedef std::pair<Matrix<BaseFloat>, HtkHeader> T;
-
- HtkMatrixHolder() {}
-
- static bool Write(std::ostream &os, bool binary, const T &t) {
- if (!binary)
- KALDI_ERR << "Non-binary HTK-format write not supported.";
- bool ans = WriteHtk(os, t.first, t.second);
- if (!ans)
- KALDI_WARN << "Error detected writing HTK-format matrix.";
- return ans;
- }
-
- void Clear() { t_.first.Resize(0, 0); }
-
- // Reads into the holder.
- bool Read(std::istream &is) {
- bool ans = ReadHtk(is, &t_.first, &t_.second);
- if (!ans) {
- KALDI_WARN << "Error detected reading HTK-format matrix.";
- return false;
- }
- return ans;
- }
-
- // HTK-format matrices only read in binary.
- static bool IsReadInBinary() { return true; }
-
- const T &Value() const { return t_; }
-
-
- // No destructor.
- private:
- KALDI_DISALLOW_COPY_AND_ASSIGN(HtkMatrixHolder);
- T t_;
-};
-
-// SphinxMatrixHolder can be used to read and write feature files in
-// CMU Sphinx format. 13-dimensional big-endian features are assumed.
-// The ultimate reference is SphinxBase's source code (for example see
-// feat_s2mfc_read() in src/libsphinxbase/feat/feat.c).
-// We can't fully automate the detection of machine/feature file endianess
-// mismatch here, because for this Sphinx relies on comparing the feature
-// file's size with the number recorded in its header. We are working with
-// streams, however(what happens if this is a Kaldi archive?). This should
-// be no problem, because the usage help of Sphinx' "wave2feat" for example
-// says that Sphinx features are always big endian.
-// Note: the kFeatDim defaults to 13, see forward declaration in kaldi-holder.h
-template<int kFeatDim> class SphinxMatrixHolder {
- public:
- typedef Matrix<BaseFloat> T;
-
- SphinxMatrixHolder() {}
-
- void Clear() { feats_.Resize(0, 0); }
-
- // Writes Sphinx-format features
- static bool Write(std::ostream &os, bool binary, const T &m) {
- if (!binary) {
- KALDI_WARN << "SphinxMatrixHolder can't write Sphinx features in text ";
- return false;
- }
-
- int32 size = m.NumRows() * m.NumCols();
- if (MachineIsLittleEndian())
- KALDI_SWAP4(size);
- os.write((char*) &size, sizeof(size)); // write the header
-
- for (MatrixIndexT i = 0; i < m.NumRows(); i++) {
- float32 tmp[m.NumCols()];
- for (MatrixIndexT j = 0; j < m.NumCols(); j++) {
- tmp[j] = static_cast<float32>(m(i, j));
- if (MachineIsLittleEndian())
- KALDI_SWAP4(tmp[j]);
- }
- os.write((char*) tmp, sizeof(tmp));
- }
-
- return true;
- }
-
- // Reads the features into a Kaldi Matrix
- bool Read(std::istream &is) {
- int32 nmfcc;
-
- is.read((char*) &nmfcc, sizeof(nmfcc));
- if (MachineIsLittleEndian())
- KALDI_SWAP4(nmfcc);
- KALDI_VLOG(2) << "#feats: " << nmfcc;
- int32 nfvec = nmfcc / kFeatDim;
- if ((nmfcc % kFeatDim) != 0) {
- KALDI_WARN << "Sphinx feature count is inconsistent with vector length ";
- return false;
- }
-
- feats_.Resize(nfvec, kFeatDim);
- for (MatrixIndexT i = 0; i < feats_.NumRows(); i++) {
- if (sizeof(BaseFloat) == sizeof(float32)) {
- is.read((char*) feats_.RowData(i), kFeatDim * sizeof(float32));
- if (!is.good()) {
- KALDI_WARN << "Unexpected error/EOF while reading Sphinx features ";
- return false;
- }
- if (MachineIsLittleEndian()) {
- for (MatrixIndexT j=0; j < kFeatDim; j++)
- KALDI_SWAP4(feats_(i, j));
- }
- } else { // KALDI_DOUBLEPRECISION=1
- float32 tmp[kFeatDim];
- is.read((char*) tmp, sizeof(tmp));
- if (!is.good()) {
- KALDI_WARN << "Unexpected error/EOF while reading Sphinx features ";
- return false;
- }
- for (MatrixIndexT j=0; j < kFeatDim; j++) {
- if (MachineIsLittleEndian())
- KALDI_SWAP4(tmp[j]);
- feats_(i, j) = static_cast<BaseFloat>(tmp[j]);
- }
- }
- }
-
- return true;
- }
-
- // Only read in binary
- static bool IsReadInBinary() { return true; }
-
- const T &Value() const { return feats_; }
-
- private:
- KALDI_DISALLOW_COPY_AND_ASSIGN(SphinxMatrixHolder);
- T feats_;
-};
-
-
-/// @} end "addtogroup holders"
-
-} // end namespace kaldi
-
-
-
-#endif
diff --git a/kaldi_io/src/kaldi/util/kaldi-holder.h b/kaldi_io/src/kaldi/util/kaldi-holder.h
deleted file mode 100644
index 95f1183..0000000
--- a/kaldi_io/src/kaldi/util/kaldi-holder.h
+++ /dev/null
@@ -1,207 +0,0 @@
-// util/kaldi-holder.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_UTIL_KALDI_HOLDER_H_
-#define KALDI_UTIL_KALDI_HOLDER_H_
-
-#include <algorithm>
-#include "util/kaldi-io.h"
-#include "util/text-utils.h"
-#include "matrix/kaldi-vector.h"
-
-namespace kaldi {
-
-
-// The Table class uses a Holder class to wrap objects, and make them behave
-// in a "normalized" way w.r.t. reading and writing, so the Table class can
-// be template-ized without too much trouble. Look below this
-// comment (search for GenericHolder) to see what it looks like.
-//
-// Requirements of the holder class:
-//
-// They can only contain objects that can be read/written without external
-// information; other objects cannot be stored in this type of archive.
-//
-// In terms of what functions it should have, see GenericHolder below.
-// It is just for documentation.
-//
-// (1) Requirements of the Read and Write functions
-//
-// The Read and Write functions should have the property that in a longer
-// file, if the Read function is started from where the Write function started
-// writing, it should go to where the Write function stopped writing, in either
-// text or binary mode (but it's OK if it doesn't eat up trailing space).
-//
-// [Desirable property: when writing in text mode the output should contain
-// exactly one newline, at the end of the output; this makes it easier to manipulate]
-//
-// [Desirable property for classes: the output should just be a binary-mode
-// header (if in binary mode and it's a Kaldi object, or no header
-// othewise), and then the output of Object.Write(). This means that when
-// written to individual files with the scp: type of wspecifier, we can read
-// the individual files in the "normal" Kaldi way by reading the binary
-// header and then the object.]
-//
-//
-// The Write function takes a 'binary' argument. In general, each object will
-// have two formats: text and binary. However, it's permitted to throw() if
-// asked to read in the text format if there is none. The file will be open, if
-// the file system has binary/text modes, in the corresponding mode. However,
-// the object should have a file-mode in which it can read either text or binary
-// output. It announces this via the static IsReadInBinary() function. This
-// will generally be the binary mode and it means that where necessary, in text
-// formats, we must ignore \r characters.
-//
-// Memory requirements: if it allocates memory, the destructor should
-// free that memory. Copying and assignment of Holder objects may be
-// disallowed as the Table code never does this.
-
-
-/// GenericHolder serves to document the requirements of the Holder interface;
-/// it's not intended to be used.
-template<class SomeType> class GenericHolder {
- public:
- typedef SomeType T;
-
- /// Must have a constructor that takes no arguments.
- GenericHolder() { }
-
- /// Write writes this object of type T. Possibly also writes a binary-mode
- /// header so that the Read function knows which mode to read in (since the
- /// Read function does not get this information). It's a static member so we
- /// can write those not inside this class (can use this function with Value()
- /// to write from this class). The Write method may throw if it cannot write
- /// the object in the given (binary/non-binary) mode. The holder object can
- /// assume the stream has been opened in the given mode (where relevant). The
- /// object can write the data how it likes.
- static bool Write(std::ostream &os, bool binary, const T &t);
-
- /// Reads into the holder. Must work out from the stream (which will be opened
- /// on Windows in binary mode if the IsReadInBinary() function of this class
- /// returns true, and text mode otherwise) whether the actual data is binary or
- /// not (usually via reading the Kaldi binary-mode header). We put the
- /// responsibility for reading the Kaldi binary-mode header in the Read
- /// function (rather than making the binary mode an argument to this function),
- /// so that for non-Kaldi binary files we don't have to write the header, which
- /// would prevent the file being read by non-Kaldi programs (e.g. if we write
- /// to individual files using an scp).
- ///
- /// Read must deallocate any existing data we have here, if applicable (must
- /// not assume the object was newly constructed).
- ///
- /// Returns true on success.
- bool Read(std::istream &is);
-
- /// IsReadInBinary() will return true if the object wants the file to be
- /// opened in binary for reading (if the file system has binary/text modes),
- /// and false otherwise. Static function. Kaldi objects always return true
- /// as they always read in binary mode. Note that we must be able to read, in
- /// this mode, objects written in both text and binary mode by Write (which
- /// may mean ignoring "\r" characters). I doubt we will ever want this
- /// function to return false.
- static bool IsReadInBinary() { return true; }
-
- /// Returns the value of the object held here. Will only
- /// ever be called if Read() has been previously called and it returned
- /// true (so OK to throw exception if no object was read).
- const T &Value() const { return t_; } // if t is a pointer, would return *t_;
-
- /// The Clear() function doesn't have to do anything. Its purpose is to
- /// allow the object to free resources if they're no longer needed.
- void Clear() { }
-
- /// If the object held pointers, the destructor would free them.
- ~GenericHolder() { }
-
- private:
- KALDI_DISALLOW_COPY_AND_ASSIGN(GenericHolder);
- T t_; // t_ may alternatively be of type T*.
-};
-
-
-// See kaldi-holder-inl.h for examples of some actual Holder
-// classes and templates.
-
-
-// The following two typedefs should probably be in their own file, but they're
-// here until there are enough of them to warrant their own header.
-
-
-/// \addtogroup holders
-/// @{
-
-/// KaldiObjectHolder works for Kaldi objects that have the "standard" Read and Write
-/// functions, and a copy constructor.
-template<class KaldiType> class KaldiObjectHolder;
-
-/// BasicHolder is valid for float, double, bool, and integer
-/// types. There will be a compile time error otherwise, because
-/// we make sure that the {Write, Read}BasicType functions do not
-/// get instantiated for other types.
-template<class BasicType> class BasicHolder;
-
-
-// A Holder for a vector of basic types, e.g.
-// std::vector<int32>, std::vector<float>, and so on.
-// Note: a basic type is defined as a type for which ReadBasicType
-// and WriteBasicType are implemented, i.e. integer and floating
-// types, and bool.
-template<class BasicType> class BasicVectorHolder;
-
-
-// A holder for vectors of vectors of basic types, e.g.
-// std::vector<std::vector<int32> >, and so on.
-// Note: a basic type is defined as a type for which ReadBasicType
-// and WriteBasicType are implemented, i.e. integer and floating
-// types, and bool.
-template<class BasicType> class BasicVectorVectorHolder;
-
-// A holder for vectors of pairsof basic types, e.g.
-// std::vector<std::vector<int32> >, and so on.
-// Note: a basic type is defined as a type for which ReadBasicType
-// and WriteBasicType are implemented, i.e. integer and floating
-// types, and bool. Text format is (e.g. for integers),
-// "1 12 ; 43 61 ; 17 8 \n"
-template<class BasicType> class BasicPairVectorHolder;
-
-/// We define a Token (not a typedef, just a word) as a nonempty, printable,
-/// whitespace-free std::string. The binary and text formats here are the same
-/// (newline-terminated) and as such we don't bother with the binary-mode headers.
-class TokenHolder;
-
-/// Class TokenVectorHolder is a Holder class for vectors of Tokens (T == std::string).
-class TokenVectorHolder;
-
-/// A class for reading/writing HTK-format matrices.
-/// T == std::pair<Matrix<BaseFloat>, HtkHeader>
-class HtkMatrixHolder;
-
-/// A class for reading/writing Sphinx format matrices.
-template<int kFeatDim=13> class SphinxMatrixHolder;
-
-
-/// @} end "addtogroup holders"
-
-
-} // end namespace kaldi
-
-#include "kaldi-holder-inl.h"
-
-#endif
diff --git a/kaldi_io/src/kaldi/util/kaldi-io-inl.h b/kaldi_io/src/kaldi/util/kaldi-io-inl.h
deleted file mode 100644
index 7df7505..0000000
--- a/kaldi_io/src/kaldi/util/kaldi-io-inl.h
+++ /dev/null
@@ -1,45 +0,0 @@
-// util/kaldi-io-inl.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-// http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_INL_H_
-#define KALDI_UTIL_KALDI_IO_INL_H_
-
-
-namespace kaldi {
-
-bool Input::Open(const std::string &rxfilename, bool *binary) {
- return OpenInternal(rxfilename, true, binary);
-}
-
-bool Input::OpenTextMode(const std::string &rxfilename) {
- return OpenInternal(rxfilename, false, NULL);
-}
-
-bool Input::IsOpen() {
- return impl_ != NULL;
-}
-
-bool Output::IsOpen() {
- return impl_ != NULL;
-}
-
-
-} // end namespace kaldi.
-
-
-#endif
diff --git a/kaldi_io/src/kaldi/util/kaldi-io.h b/kaldi_io/src/kaldi/util/kaldi-io.h
deleted file mode 100644
index f2c7563..0000000
--- a/kaldi_io/src/kaldi/util/kaldi-io.h
+++ /dev/null
@@ -1,264 +0,0 @@
-// util/kaldi-io.h
-
-// Copyright 2009-2011 Microsoft Corporation; Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-// http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_KALDI_IO_H_
-#define KALDI_UTIL_KALDI_IO_H_
-
-#include <cctype> // For isspace.
-#include <limits>
-#include <string>
-#include "base/kaldi-common.h"
-#ifdef _MSC_VER
-# include <fcntl.h>
-# include <io.h>
-#endif
-
-
-
-namespace kaldi {
-
-class OutputImplBase; // Forward decl; defined in a .cc file
-class InputImplBase; // Forward decl; defined in a .cc file
-
-/// \addtogroup io_group
-/// @{
-
-// The Output and Input classes handle stream-opening for "extended" filenames
-// that include actual files, standard-input/standard-output, pipes, and
-// offsets into actual files. They also handle reading and writing the
-// binary-mode headers for Kaldi files, where applicable. The classes have
-// versions of the Open routines that throw and do not throw, depending whether
-// the calling code wants to catch the errors or not; there are also versions
-// that write (or do not write) the Kaldi binary-mode header that says if it's
-// binary mode. Generally files that contain Kaldi objects will have the header
-// on, so we know upon reading them whether they have the header. So you would
-// use the OpenWithHeader routines for these (or the constructor); but other
-// types of objects (e.g. FSTs) would have files without a header so you would
-// use OpenNoHeader.
-
-// We now document the types of extended filenames that we use.
-//
-// A "wxfilename" is an extended filename for writing. It can take three forms:
-// (1) Filename: e.g. "/some/filename", "./a/b/c", "c:\Users\dpovey\My Documents\\boo"
-// (whatever the actual file-system interprets)
-// (2) Standard output: "" or "-"
-// (3) A pipe: e.g. "gunzip -c /tmp/abc.gz |"
-//
-//
-// A "rxfilename" is an extended filename for reading. It can take four forms:
-// (1) An actual filename, whatever the file-system can read, e.g. "/my/file".
-// (2) Standard input: "" or "-"
-// (3) A pipe: e.g. "| gzip -c > /tmp/abc.gz"
-// (4) An offset into a file, e.g.: "/mnt/blah/data/1.ark:24871"
-// [these are created by the Table and TableWriter classes; I may also write
-// a program that creates them for arbitrary files]
-//
-
-
-// Typical usage:
-// ...
-// bool binary;
-// MyObject.Write(Output(some_filename, binary).Stream(), binary);
-//
-// ... more extensive example:
-// {
-// Output ko(some_filename, binary);
-// MyObject1.Write(ko.Stream(), binary);
-// MyObject2.Write(ko.Stream(), binary);
-// }
-
-
-
-enum OutputType {
- kNoOutput,
- kFileOutput,
- kStandardOutput,
- kPipeOutput
-};
-
-/// ClassifyWxfilename interprets filenames as follows:
-/// - kNoOutput: invalid filenames (leading or trailing space, things that look
-/// like wspecifiers and rspecifiers or like pipes to read from with leading |.
-/// - kFileOutput: Normal filenames
-/// - kStandardOutput: The empty string or "-", interpreted as standard output
-/// - kPipeOutput: pipes, e.g. "gunzip -c some_file.gz |"
-OutputType ClassifyWxfilename(const std::string &wxfilename);
-
-enum InputType {
- kNoInput,
- kFileInput,
- kStandardInput,
- kOffsetFileInput,
- kPipeInput
-};
-
-/// ClassifyRxfilenames interprets filenames for reading as follows:
-/// - kNoInput: invalid filenames (leading or trailing space, things that
-/// look like wspecifiers and rspecifiers or pipes to write to
-/// with trailing |.
-/// - kFileInput: normal filenames
-/// - kStandardInput: the empty string or "-"
-/// - kPipeInput: e.g. "| gzip -c > blah.gz"
-/// - kOffsetFileInput: offsets into files, e.g. /some/filename:12970
-InputType ClassifyRxfilename(const std::string &rxfilename);
-
-
-class Output {
- public:
- // The normal constructor, provided for convenience.
- // Equivalent to calling with default constructor then Open()
- // with these arguments.
- Output(const std::string &filename, bool binary, bool write_header = true);
-
- Output(): impl_(NULL) {};
-
- /// This opens the stream, with the given mode (binary or text). It returns
- /// true on success and false on failure. However, it will throw if something
- /// was already open and could not be closed (to avoid this, call Close()
- /// first. if write_header == true and binary == true, it writes the Kaldi
- /// binary-mode header ('\0' then 'B'). You may call Open even if it is
- /// already open; it will close the existing stream and reopen (however if
- /// closing the old stream failed it will throw).
- bool Open(const std::string &wxfilename, bool binary, bool write_header);
-
- inline bool IsOpen(); // return true if we have an open stream. Does not imply
- // stream is good for writing.
-
- std::ostream &Stream(); // will throw if not open; else returns stream.
-
- // Close closes the stream. Calling Close is never necessary unless you
- // want to avoid exceptions being thrown. There are times when calling
- // Close will hurt efficiency (basically, when using offsets into files,
- // and using the same Input object),
- // but most of the time the user won't be doing this directly, it will
- // be done in kaldi-table.{h, cc}, so you don't have to worry about it.
- bool Close();
-
- // This will throw if stream could not be closed (to check error status,
- // call Close()).
- ~Output();
-
- private:
- OutputImplBase *impl_; // non-NULL if open.
- std::string filename_;
- KALDI_DISALLOW_COPY_AND_ASSIGN(Output);
-};
-
-
-// bool binary_in;
-// Input ki(some_filename, &binary_in);
-// MyObject.Read(ki, binary_in);
-//
-// ... more extensive example:
-//
-// {
-// bool binary_in;
-// Input ki(some_filename, &binary_in);
-// MyObject1.Read(ki.Stream(), &binary_in);
-// MyObject2.Write(ki.Stream(), &binary_in);
-// }
-// Note that to catch errors you need to use try.. catch.
-// Input communicates errors by throwing exceptions.
-
-
-// Input interprets four kinds of filenames:
-// (1) Normal filenames
-// (2) The empty string or "-", interpreted as standard output
-// (3) Pipes, e.g. "| gzip -c > some_file.gz"
-// (4) Offsets into [real] files, e.g. "/my/filename:12049"
-// The last one has no correspondence in Output.
-
-
-class Input {
- public:
- /// The normal constructor. Opens the stream in binary mode.
- /// Equivalent to calling the default constructor followed by Open(); then, if
- /// binary != NULL, it calls ReadHeader(), putting the output in "binary"; it
- /// throws on error.
- Input(const std::string &rxfilename, bool *contents_binary = NULL);
-
- Input(): impl_(NULL) {}
-
- // Open opens the stream for reading (the mode, where relevant, is binary; use
- // OpenTextMode for text-mode, we made this a separate function rather than a
- // boolean argument, to avoid confusion with Kaldi's text/binary distinction,
- // since reading in the file system's text mode is unusual.) If
- // contents_binary != NULL, it reads the binary-mode header and puts it in the
- // "binary" variable. Returns true on success. If it returns false it will
- // not be open. You may call Open even if it is already open; it will close
- // the existing stream and reopen (however if closing the old stream failed it
- // will throw).
- inline bool Open(const std::string &rxfilename, bool *contents_binary = NULL);
-
- // As Open but (if the file system has text/binary modes) opens in text mode;
- // you shouldn't ever have to use this as in Kaldi we read even text files in
- // binary mode (and ignore the \r).
- inline bool OpenTextMode(const std::string &rxfilename);
-
- // Return true if currently open for reading and Stream() will
- // succeed. Does not guarantee that the stream is good.
- inline bool IsOpen();
-
- // It is never necessary or helpful to call Close, except if
- // you are concerned about to many filehandles being open.
- // Close does not throw.
- void Close();
-
- // Returns the underlying stream. Throws if !IsOpen()
- std::istream &Stream();
-
- // Destructor does not throw: input streams may legitimately fail so we
- // don't worry about the status when we close them.
- ~Input();
- private:
- bool OpenInternal(const std::string &rxfilename, bool file_binary, bool *contents_binary);
- InputImplBase *impl_;
- KALDI_DISALLOW_COPY_AND_ASSIGN(Input);
-};
-
-template <class C> inline void ReadKaldiObject(const std::string &filename,
- C *c) {
- bool binary_in;
- Input ki(filename, &binary_in);
- c->Read(ki.Stream(), binary_in);
-}
-
-template <class C> inline void WriteKaldiObject(const C &c,
- const std::string &filename,
- bool binary) {
- Output ko(filename, binary);
- c.Write(ko.Stream(), binary);
-}
-
-/// PrintableRxfilename turns the rxfilename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard input".
-std::string PrintableRxfilename(std::string rxfilename);
-
-/// PrintableWxfilename turns the filename into a more human-readable
-/// form for error reporting, i.e. it does quoting and escaping and
-/// replaces "" or "-" with "standard output".
-std::string PrintableWxfilename(std::string wxfilename);
-
-/// @}
-
-} // end namespace kaldi.
-
-#include "kaldi-io-inl.h"
-
-#endif
diff --git a/kaldi_io/src/kaldi/util/kaldi-pipebuf.h b/kaldi_io/src/kaldi/util/kaldi-pipebuf.h
deleted file mode 100644
index 43e5a2e..0000000
--- a/kaldi_io/src/kaldi/util/kaldi-pipebuf.h
+++ /dev/null
@@ -1,90 +0,0 @@
-// util/kaldi-pipebuf.h
-
-// Copyright 2009-2011 Ondrej Glembek
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-// http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-/** @file kaldi-pipebuf.h
- * This is an Kaldi C++ Library header.
- */
-
-#ifndef KALDI_UTIL_KALDI_PIPEBUF_H_
-#define KALDI_UTIL_KALDI_PIPEBUF_H_
-
-#if defined(_LIBCPP_VERSION) // libc++
-#include "basic-filebuf.h"
-#else
-#include <fstream>
-#endif
-
-namespace kaldi
-{
-// This class provides a way to initialize a filebuf with a FILE* pointer
-// directly; it will not close the file pointer when it is deleted.
-// The C++ standard does not allow implementations of C++ to provide
-// this constructor within basic_filebuf, which makes it hard to deal
-// with pipes using completely native C++. This is a workaround
-
-#ifdef _MSC_VER
-#elif defined(_LIBCPP_VERSION) // libc++
-template<class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public basic_filebuf<CharType, Traits>
-{
- public:
- typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
- basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
- : basic_filebuf<CharType, Traits>() {
- this->open(fptr, mode);
- if (!this->is_open()) {
- KALDI_WARN << "Error initializing pipebuf"; // probably indicates
- // code error, if the fptr was good.
- return;
- }
- }
-}; // class basic_pipebuf
-#else
-template<class CharType, class Traits = std::char_traits<CharType> >
-class basic_pipebuf : public std::basic_filebuf<CharType, Traits>
-{
- public:
- typedef basic_pipebuf<CharType, Traits> ThisType;
-
- public:
- basic_pipebuf(FILE *fptr, std::ios_base::openmode mode)
- : std::basic_filebuf<CharType, Traits>() {
- this->_M_file.sys_open(fptr, mode);
- if (!this->is_open()) {
- KALDI_WARN << "Error initializing pipebuf"; // probably indicates
- // code error, if the fptr was good.
- return;
- }
- this->_M_mode = mode;
- this->_M_buf_size = BUFSIZ;
- this->_M_allocate_internal_buffer();
- this->_M_reading = false;
- this->_M_writing = false;
- this->_M_set_buffer(-1);
- }
-}; // class basic_pipebuf
-#endif // _MSC_VER
-
-}; // namespace kaldi
-
-#endif // KALDI_UTIL_KALDI_PIPEBUF_H_
-
diff --git a/kaldi_io/src/kaldi/util/kaldi-table-inl.h b/kaldi_io/src/kaldi/util/kaldi-table-inl.h
deleted file mode 100644
index 6b73c88..0000000
--- a/kaldi_io/src/kaldi/util/kaldi-table-inl.h
+++ /dev/null
@@ -1,2246 +0,0 @@
-// util/kaldi-table-inl.h
-
-// Copyright 2009-2011 Microsoft Corporation
-// 2013 Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_UTIL_KALDI_TABLE_INL_H_
-#define KALDI_UTIL_KALDI_TABLE_INL_H_
-
-#include <algorithm>
-#include "util/kaldi-io.h"
-#include "util/text-utils.h"
-#include "util/stl-utils.h" // for StringHasher.
-
-
-namespace kaldi {
-
-/// \addtogroup table_impl_types
-/// @{
-
-template<class Holder> class SequentialTableReaderImplBase {
- public:
- typedef typename Holder::T T;
- // note that Open takes rxfilename not rspecifier.
- virtual bool Open(const std::string &rxfilename) = 0;
- virtual bool Done() const = 0;
- virtual bool IsOpen() const = 0;
- virtual std::string Key() = 0;
- virtual const T &Value() = 0;
- virtual void FreeCurrent() = 0;
- virtual void Next() = 0;
- virtual bool Close() = 0;
- SequentialTableReaderImplBase() { }
- virtual ~SequentialTableReaderImplBase() { }
- private:
- KALDI_DISALLOW_COPY_AND_ASSIGN(SequentialTableReaderImplBase);
-};
-
-
-// This is the implementation for SequentialTableReader
-// when it's actually a script file.
-template<class Holder> class SequentialTableReaderScriptImpl:
- public SequentialTableReaderImplBase<Holder> {
- public:
- typedef typename Holder::T T;
-
- SequentialTableReaderScriptImpl(): state_(kUninitialized) { }
-
- virtual bool Open(const std::string &rspecifier) {
- if (state_ != kUninitialized)
- if (! Close()) // call Close() yourself to suppress this exception.
- KALDI_ERR << "TableReader::Open, error closing previous input: "
- << "rspecifier was " << rspecifier_;
- bool binary;
- rspecifier_ = rspecifier;
- RspecifierType rs = ClassifyRspecifier(rspecifier, &script_rxfilename_,
- &opts_);
- KALDI_ASSERT(rs == kScriptRspecifier);
- if (!script_input_.Open(script_rxfilename_, &binary)) { // Failure on Open
- KALDI_WARN << "Failed to open script file "
- << PrintableRxfilename(script_rxfilename_);
- state_ = kUninitialized;
- return false;
- } else { // Open succeeded.
- if (binary) { // script file should not be binary file..
- state_ = kError; // bad script file.
- script_input_.Close();
- return false;
- } else {
- state_ = kFileStart;
- Next();
- if (state_ == kError) {
- script_input_.Close();
- return false;
- }
- if (opts_.permissive) { // Next() will have preloaded.
- KALDI_ASSERT(state_ == kLoadSucceeded || state_ == kEof);
- } else {
- KALDI_ASSERT(state_ == kHaveScpLine || state_ == kEof);
- }
- return true; // Success.
- }
- }
- }
-
- virtual bool IsOpen() const {
- switch (state_) {
- case kEof: case kError: case kHaveScpLine: case kLoadSucceeded: case kLoadFailed: return true;
- case kUninitialized: return false;
- default: KALDI_ERR << "IsOpen() called on invalid object."; // kFileStart is not valid
- // state for user to call something on.
- return false;
- }
- }
-
- virtual bool Done() const {
- switch (state_) {
- case kHaveScpLine: return false;
- case kLoadSucceeded: case kLoadFailed: return false;
- // These cases are because we want LoadCurrent()
- // to be callable after Next() and to not change the Done() status [only Next() should change
- // the Done() status].
- case kEof: case kError: return true; // Error condition, like Eof, counts as Done(); the destructor
- // or Close() will inform the user of the error.
- default: KALDI_ERR << "Done() called on TableReader object at the wrong time.";
- return false;
- }
- }
-
- virtual std::string Key() {
- // Valid to call this whenever Done() returns false.
- switch (state_) {
- case kHaveScpLine: case kLoadSucceeded: case kLoadFailed: break;
- default:
- // coding error.
- KALDI_ERR << "Key() called on TableReader object at the wrong time.";
- }
- return key_;
- }
- const T &Value() {
- StateType orig_state = state_;
- if (state_ == kHaveScpLine) LoadCurrent(); // Takes
- // state_ to kLoadSucceeded or kLoadFailed.
- if (state_ == kLoadFailed) { // this can happen due to
- // a file listed in an scp file not existing, or
- // read failure, failure of a command, etc.
- if (orig_state == kHaveScpLine)
- KALDI_ERR << "TableReader: failed to load object from "
- << PrintableRxfilename(data_rxfilename_)
- << " (to suppress this error, add the permissive "
- << "(p, ) option to the rspecifier.";
-
- else // orig_state_ was kLoadFailed, which only could have happened
- // if the user called FreeCurrent().
- KALDI_ERR << "TableReader: you called Value() after FreeCurrent().";
- } else if (state_ != kLoadSucceeded) {
- // This would be a coding error.
- KALDI_ERR << "TableReader: Value() called at the wrong time.";
- }
- return holder_.Value();
- }
- void FreeCurrent() {
- if (state_ == kLoadSucceeded) {
- holder_.Clear();
- state_ = kLoadFailed;
- } else {
- KALDI_WARN << "TableReader: FreeCurrent called at the wrong time.";
- }
- }
- void Next() {
- while (1) {
- NextScpLine();
- if (Done()) return;
- if (opts_.permissive) {
- // Permissive mode means, when reading scp files, we treat keys whose scp entry
- // cannot be read as nonexistent. This means trying to read.
- if (LoadCurrent()) return; // Success.
- // else try the next scp line.
- } else {
- return; // We go the next key; Value() will crash if we can't
- // read the scp line.
- }
- }
- }
-
- virtual bool Close() {
- // Close() will succeed if the stream was not in an error
- // state. To clean up, it also closes the Input objects if
- // they're open.
- if (script_input_.IsOpen())
- script_input_.Close();
- if (data_input_.IsOpen())
- data_input_.Close();
- if (state_ == kLoadSucceeded)
- holder_.Clear();
- if (!this->IsOpen())
- KALDI_ERR << "Close() called on input that was not open.";
- StateType old_state = state_;
- state_ = kUninitialized;
- if (old_state == kError) {
- if (opts_.permissive) {
- KALDI_WARN << "Close() called on scp file with read error, ignoring the "
- "error because permissive mode specified.";
- return true;
- } else return false; // User will do something with the error status.
- } else return true;
- }
-
- virtual ~SequentialTableReaderScriptImpl() {
- if (state_ == kError)
- KALDI_ERR << "TableReader: reading script file failed: from scp "
- << PrintableRxfilename(script_rxfilename_);
- // If you don't want this exception to be thrown you can
- // call Close() and check the status.
- if (state_ == kLoadSucceeded)
- holder_.Clear();
- }
- private:
- bool LoadCurrent() {
- // Attempts to load object whose rxfilename is on the current scp line.
- if (state_ != kHaveScpLine)
- KALDI_ERR << "TableReader: LoadCurrent() called at the wrong time.";
- bool ans;
- // note, NULL means it doesn't read the binary-mode header
- if (Holder::IsReadInBinary()) ans = data_input_.Open(data_rxfilename_, NULL);
- else ans = data_input_.OpenTextMode(data_rxfilename_);
- if (!ans) {
- // May want to make this warning a VLOG at some point
- KALDI_WARN << "TableReader: failed to open file "
- << PrintableRxfilename(data_rxfilename_);
- state_ = kLoadFailed;
- return false;
- } else {
- if (holder_.Read(data_input_.Stream())) {
- state_ = kLoadSucceeded;
- return true;
- } else { // holder_ will not contain data.
- KALDI_WARN << "TableReader: failed to load object from "
- << PrintableRxfilename(data_rxfilename_);
- state_ = kLoadFailed;
- return false;
- }
- }
- }
-
- // Reads the next line in the script file.
- void NextScpLine() {
- switch (state_) {
- case kLoadSucceeded: holder_.Clear(); break;
- case kHaveScpLine: case kLoadFailed: case kFileStart: break;
- default:
- // No other states are valid to call Next() from.
- KALDI_ERR << "Reading script file: Next called wrongly.";
- }
- std::string line;
- if (getline(script_input_.Stream(), line)) {
- SplitStringOnFirstSpace(line, &key_, &data_rxfilename_);
- if (!key_.empty() && !data_rxfilename_.empty()) {
- // Got a valid line.
- state_ = kHaveScpLine;
- } else {
- // Got an invalid line.
- state_ = kError; // we can't make sense of this
- // scp file and will now die.
- }
- } else {
- state_ = kEof; // nothing more in the scp file.
- // Might as well close the input streams as don't need them.
- script_input_.Close();
- if (data_input_.IsOpen())
- data_input_.Close();
- }
- }
-
-
- Input script_input_; // Input object for the .scp file
- Input data_input_; // Input object for the entries in
- // the script file.
- Holder holder_; // Holds the object.
- bool binary_; // Binary-mode archive.
- std::string key_;
- std::string rspecifier_;
- std::string script_rxfilename_; // of the script file.
- RspecifierOptions opts_; // options.
- std::string data_rxfilename_; // of the file we're reading.
- enum StateType {
- // [The state of the reading process] [does holder_ [is script_inp_
- // have object] open]
- kUninitialized, // Uninitialized or closed. no no
- kEof, // We did Next() and found eof in script file. no no
- kError, // Some other error no yes
- kHaveScpLine, // Just called Open() or Next() and have a no yes
- // line of the script file but no data.
- kLoadSucceeded, // Called LoadCurrent() and it succeeded. yes yes
- kLoadFailed, // Called LoadCurrent() and it failed, no yes
- // or the user called FreeCurrent().. note,
- // if when called by user we are in this state,
- // it means the user called FreeCurrent().
- kFileStart, // [state we only use internally] no yes
- } state_;
- private:
-};
-
-
-// This is the implementation for SequentialTableReader
-// when it's an archive. Note that the archive format is:
-// key1 [space] object1 key2 [space]
-// object2 ... eof.
-// "object1" is the output of the Holder::Write function and will
-// typically contain a binary header (in binary mode) and then
-// the output of object.Write(os, binary).
-// The archive itself does not care whether it is in binary
-// or text mode, for reading purposes.
-
-template<class Holder> class SequentialTableReaderArchiveImpl:
- public SequentialTableReaderImplBase<Holder> {
- public:
- typedef typename Holder::T T;
-
- SequentialTableReaderArchiveImpl(): state_(kUninitialized) { }
-
- virtual bool Open(const std::string &rspecifier) {
- if (state_ != kUninitialized) {
- if (! Close()) { // call Close() yourself to suppress this exception.
- if (opts_.permissive)
- KALDI_WARN << "TableReader::Open, error closing previous input "
- "(only warning, since permissive mode).";
- else
- KALDI_ERR << "TableReader::Open, error closing previous input.";
- }
- }
- rspecifier_ = rspecifier;
- RspecifierType rs = ClassifyRspecifier(rspecifier,
- &archive_rxfilename_,
- &opts_);
- KALDI_ASSERT(rs == kArchiveRspecifier);
-
- bool ans;
- // NULL means don't expect binary-mode header
- if (Holder::IsReadInBinary())
- ans = input_.Open(archive_rxfilename_, NULL);
- else
- ans = input_.OpenTextMode(archive_rxfilename_);
- if (!ans) { // header.
- KALDI_WARN << "TableReader: failed to open stream "
- << PrintableRxfilename(archive_rxfilename_);
- state_ = kUninitialized; // Failure on Open
- return false; // User should print the error message.
- }
- state_ = kFileStart;
- Next();
- if (state_ == kError) {
- KALDI_WARN << "Error beginning to read archive file (wrong filename?): "
- << PrintableRxfilename(archive_rxfilename_);
- input_.Close();
- state_ = kUninitialized;
- return false;
- }
- KALDI_ASSERT(state_ == kHaveObject || state_ == kEof);
- return true;
- }
-
- virtual void Next() {
- switch (state_) {
- case kHaveObject:
- holder_.Clear(); break;
- case kFileStart: case kFreedObject:
- break;
- default:
- KALDI_ERR << "TableReader: Next() called wrongly.";
- }
- std::istream &is = input_.Stream();
- is.clear(); // Clear any fail bits that may have been set... just in case
- // this happened in the Read function.
- is >> key_; // This eats up any leading whitespace and gets the string.
- if (is.eof()) {
- state_ = kEof;
- return;
- }
- if (is.fail()) { // This shouldn't really happen, barring file-system errors.
- KALDI_WARN << "Error reading archive "
- << PrintableRxfilename(archive_rxfilename_);
- state_ = kError;
- return;
- }
- int c;
- if ((c = is.peek()) != ' ' && c != '\t' && c != '\n') { // We expect a space ' ' after the key.
- // We also allow tab [which is consumed] and newline [which is not], just
- // so we can read archives generated by scripts that may not be fully
- // aware of how this format works.
- KALDI_WARN << "Invalid archive file format: expected space after key "
- << key_ << ", got character "
- << CharToString(static_cast<char>(is.peek())) << ", reading "
- << PrintableRxfilename(archive_rxfilename_);
- state_ = kError;
- return;
- }
- if (c != '\n') is.get(); // Consume the space or tab.
- if (holder_.Read(is)) {
- state_ = kHaveObject;
- return;
- } else {
- KALDI_WARN << "Object read failed, reading archive "
- << PrintableRxfilename(archive_rxfilename_);
- state_ = kError;
- return;
- }
- }
-
- virtual bool IsOpen() const {
- switch (state_) {
- case kEof: case kError: case kHaveObject: case kFreedObject: return true;
- case kUninitialized: return false;
- default: KALDI_ERR << "IsOpen() called on invalid object."; // kFileStart is not valid
- // state for user to call something on.
- return false;
- }
- }
-
- virtual bool Done() const {
- switch (state_) {
- case kHaveObject:
- return false;
- case kEof: case kError:
- return true; // Error-state counts as Done(), but destructor
- // will fail (unless you check the status with Close()).
- default:
- KALDI_ERR << "Done() called on TableReader object at the wrong time.";
- return false;
- }
- }
-
- virtual std::string Key() {
- // Valid to call this whenever Done() returns false
- switch (state_) {
- case kHaveObject: break; // only valid case.
- default:
- // coding error.
- KALDI_ERR << "Key() called on TableReader object at the wrong time.";
- }
- return key_;
- }
- const T &Value() {
- switch (state_) {
- case kHaveObject:
- break; // only valid case.
- default:
- // coding error.
- KALDI_ERR << "Value() called on TableReader object at the wrong time.";
- }
- return holder_.Value();
- }
- virtual void FreeCurrent() {
- if (state_ == kHaveObject) {
- holder_.Clear();
- state_ = kFreedObject;
- } else
- KALDI_WARN << "TableReader: FreeCurernt called at the wrong time.";
- }
-
- virtual bool Close() {
- if (! this->IsOpen())
- KALDI_ERR << "Close() called on TableReader twice or otherwise wrongly.";
- if (input_.IsOpen())
- input_.Close();
- if (state_ == kHaveObject)
- holder_.Clear();
- bool ans;
- if (opts_.permissive) {
- ans = true; // always return success.
- if (state_ == kError)
- KALDI_WARN << "Error detected closing TableReader for archive "
- << PrintableRxfilename(archive_rxfilename_) << " but ignoring "
- << "it as permissive mode specified.";
- } else
- ans = (state_ != kError); // If error state, user should detect it.
- state_ = kUninitialized;
- return ans;
- }
-
- virtual ~SequentialTableReaderArchiveImpl() {
- if (state_ == kError) {
- if (opts_.permissive)
- KALDI_WARN << "Error detected closing TableReader for archive "
- << PrintableRxfilename(archive_rxfilename_) << " but ignoring "
- << "it as permissive mode specified.";
- else
- KALDI_ERR << "TableReader: error detected closing archive "
- << PrintableRxfilename(archive_rxfilename_);
- }
- // If you don't want this exception to be thrown you can
- // call Close() and check the status.
- if (state_ == kHaveObject)
- holder_.Clear();
- }
- private:
- Input input_; // Input object for the archive
- Holder holder_; // Holds the object.
- std::string key_;
- std::string rspecifier_;
- std::string archive_rxfilename_;
- RspecifierOptions opts_;
- enum { // [The state of the reading process] [does holder_ [is input_
- // have object] open]
- kUninitialized, // Uninitialized or closed. no no
- kFileStart, // [state we use internally: just opened.] no yes
- kEof, // We did Next() and found eof in archive no no
- kError, // Some other error no no
- kHaveObject, // We read the key and the object after it. yes yes
- kFreedObject, // The user called FreeCurrent(). no yes
- } state_;
-};
-
-
-template<class Holder>
-SequentialTableReader<Holder>::SequentialTableReader(const std::string &rspecifier): impl_(NULL) {
- if (rspecifier != "" && !Open(rspecifier))
- KALDI_ERR << "Error constructing TableReader: rspecifier is " << rspecifier;
-}
-
-template<class Holder>
-bool SequentialTableReader<Holder>::Open(const std::string &rspecifier) {
- if (IsOpen())
- if (!Close())
- KALDI_ERR << "Could not close previously open object.";
- // now impl_ will be NULL.
-
- RspecifierType wt = ClassifyRspecifier(rspecifier, NULL, NULL);
- switch (wt) {
- case kArchiveRspecifier:
- impl_ = new SequentialTableReaderArchiveImpl<Holder>();
- break;
- case kScriptRspecifier:
- impl_ = new SequentialTableReaderScriptImpl<Holder>();
- break;
- case kNoRspecifier: default:
- KALDI_WARN << "Invalid rspecifier " << rspecifier;
- return false;
- }
- if (!impl_->Open(rspecifier)) {
- delete impl_;
- impl_ = NULL;
- return false; // sub-object will have printed warnings.
- }
- else return true;
-}
-
-template<class Holder>
-bool SequentialTableReader<Holder>::Close() {
- CheckImpl();
- bool ans = impl_->Close();
- delete impl_; // We don't keep around empty impl_ objects.
- impl_ = NULL;
- return ans;
-}
-
-
-template<class Holder>
-bool SequentialTableReader<Holder>::IsOpen() const {
- return (impl_ != NULL); // Because we delete the object whenever
- // that object is not open. Thus, the IsOpen functions of the
- // Impl objects are not really needed.
-}
-
-template<class Holder>
-std::string SequentialTableReader<Holder>::Key() {
- CheckImpl();
- return impl_->Key(); // this call may throw if called wrongly in other ways,
- // e.g. eof.
-}
-
-
-template<class Holder>
-void SequentialTableReader<Holder>::FreeCurrent() {
- CheckImpl();
- impl_->FreeCurrent();
-}
-
-
-template<class Holder>
-const typename SequentialTableReader<Holder>::T &
-SequentialTableReader<Holder>::Value() {
- CheckImpl();
- return impl_->Value(); // This may throw (if LoadCurrent() returned false you are safe.).
-}
-
-
-template<class Holder>
-void SequentialTableReader<Holder>::Next() {
- CheckImpl();
- impl_->Next();
-}
-
-template<class Holder>
-bool SequentialTableReader<Holder>::Done() {
- CheckImpl();
- return impl_->Done();
-}
-
-
-template<class Holder>
-SequentialTableReader<Holder>::~SequentialTableReader() {
- if (impl_) delete impl_;
- // Destructor of impl_ may throw.
-}
-
-
-
-template<class Holder> class TableWriterImplBase {
- public:
- typedef typename Holder::T T;
-
- virtual bool Open(const std::string &wspecifier) = 0;
-
- // Write returns true on success, false on failure, but
- // some errors may not be detected until we call Close().
- // It throws (via KALDI_ERR) if called wrongly. We could
- // have just thrown on all errors, since this is what
- // TableWriter does; it was designed this way because originally
- // TableWriter::Write returned an exit status.
- virtual bool Write(const std::string &key, const T &value) = 0;
-
- // Flush will flush any archive; it does not return error status,
- // any errors will be reported on the next Write or Close.
- virtual void Flush() = 0;
-
- virtual bool Close() = 0;
-
- virtual bool IsOpen() const = 0;
-
- // May throw on write error if Close was not called.
- virtual ~TableWriterImplBase() { }
-
- TableWriterImplBase() { }
- private:
- KALDI_DISALLOW_COPY_AND_ASSIGN(TableWriterImplBase);
-};
-
-
-// The implementation of TableWriter we use when writing directly
-// to an archive with no associated scp.
-template<class Holder>
-class TableWriterArchiveImpl: public TableWriterImplBase<Holder> {
- public:
- typedef typename Holder::T T;
-
- virtual bool Open(const std::string &wspecifier) {
- switch (state_) {
- case kUninitialized:
- break;
- case kWriteError:
- KALDI_ERR << "TableWriter: opening stream, already open with write error.";
- case kOpen: default:
- if (!Close()) // throw because this error may not have been previously
- // detected by the user.
- KALDI_ERR << "TableWriter: opening stream, error closing previously open stream.";
- }
- wspecifier_ = wspecifier;
- WspecifierType ws = ClassifyWspecifier(wspecifier,
- &archive_wxfilename_,
- NULL,
- &opts_);
- KALDI_ASSERT(ws == kArchiveWspecifier); // or wrongly called.
-
- if (output_.Open(archive_wxfilename_, opts_.binary, false)) { // false means no binary header.
- state_ = kOpen;
- return true;
- } else {
- // stream will not be open. User will report this error
- // (we return bool), so don't bother printing anything.
- state_ = kUninitialized;
- return false;
- }
- }
-
- virtual bool IsOpen() const {
- switch (state_) {
- case kUninitialized: return false;
- case kOpen: case kWriteError: return true;
- default: KALDI_ERR << "IsOpen() called on TableWriter in invalid state.";
- }
- return false;
- }
-
- // Write returns true on success, false on failure, but
- // some errors may not be detected till we call Close().
- virtual bool Write(const std::string &key, const T &value) {
- switch (state_) {
- case kOpen: break;
- case kWriteError:
- // user should have known from the last
- // call to Write that there was a problem.
- KALDI_WARN << "TableWriter: attempting to write to invalid stream.";
- return false;
- case kUninitialized: default:
- KALDI_ERR << "TableWriter: Write called on invalid stream";
-
- }
- // state is now kOpen or kWriteError.
- if (!IsToken(key)) // e.g. empty string or has spaces...
- KALDI_ERR << "TableWriter: using invalid key " << key;
- output_.Stream() << key << ' ';
- if (!Holder::Write(output_.Stream(), opts_.binary, value)) {
- KALDI_WARN << "TableWriter: write failure to "
- << PrintableWxfilename(archive_wxfilename_);
- state_ = kWriteError;
- return false;
- }
- if (state_ == kWriteError) return false; // Even if this Write seems to have
- // succeeded, we fail because a previous Write failed and the archive may be
- // corrupted and unreadable.
-
- if (opts_.flush)
- Flush();
- return true;
- }
-
- // Flush will flush any archive; it does not return error status,
- // any errors will be reported on the next Write or Close.
- virtual void Flush() {
- switch (state_) {
- case kWriteError: case kOpen:
- output_.Stream().flush(); // Don't check error status.
- return;
- default:
- KALDI_WARN << "TableWriter: Flush called on not-open writer.";
- }
- }
-
- virtual bool Close() {
- if (!this->IsOpen() || !output_.IsOpen())
- KALDI_ERR << "TableWriter: Close called on a stream that was not open." << this->IsOpen() << ", " << output_.IsOpen();
- bool close_success = output_.Close();
- if (!close_success) {
- KALDI_WARN << "TableWriter: error closing stream: wspecifier is "
- << wspecifier_;
- state_ = kUninitialized;
- return false;
- }
- if (state_ == kWriteError) {
- KALDI_WARN << "TableWriter: closing writer in error state: wspecifier is "
- << wspecifier_;
- state_ = kUninitialized;
- return false;
- }
- state_ = kUninitialized;
- return true;
- }
-
- TableWriterArchiveImpl(): state_(kUninitialized) {}
-
- // May throw on write error if Close was not called.
- virtual ~TableWriterArchiveImpl() {
- if (!IsOpen()) return;
- else if (!Close())
- KALDI_ERR << "At TableWriter destructor: Write failed or stream close "
- << "failed: wspecifier is "<< wspecifier_;
- }
-
- private:
- Output output_;
- WspecifierOptions opts_;
- std::string wspecifier_;
- std::string archive_wxfilename_;
- enum { // is stream open?
- kUninitialized, // no
- kOpen, // yes
- kWriteError, // yes
- } state_;
-};
-
-
-
-
-// The implementation of TableWriter we use when writing to
-// individual files (more generally, wxfilenames) specified
-// in an scp file that we read.
-
-// Note: the code for this class is similar to RandomAccessTableReaderScriptImpl;
-// try to keep them in sync.
-
-template<class Holder>
-class TableWriterScriptImpl: public TableWriterImplBase<Holder> {
- public:
- typedef typename Holder::T T;
-
- TableWriterScriptImpl(): last_found_(0), state_(kUninitialized) {}
-
- virtual bool Open(const std::string &wspecifier) {
- switch (state_) {
- case kReadScript:
- KALDI_ERR << " Opening already open TableWriter: call Close first.";
- case kUninitialized: case kNotReadScript:
- break;
- }
- wspecifier_ = wspecifier;
- WspecifierType ws = ClassifyWspecifier(wspecifier,
- NULL,
- &script_rxfilename_,
- &opts_);
- KALDI_ASSERT(ws == kScriptWspecifier); // or wrongly called.
- KALDI_ASSERT(script_.empty()); // no way it could be nonempty at this point.
-
- if (! ReadScriptFile(script_rxfilename_,
- true, // print any warnings
- &script_)) { // error reading script file or invalid format
- state_ = kNotReadScript;
- return false; // no need to print further warnings. user gets the error.
- }
- std::sort(script_.begin(), script_.end());
- for (size_t i = 0; i+1 < script_.size(); i++) {
- if (script_[i].first.compare(script_[i+1].first) >= 0) {
- // script[i] not < script[i+1] in lexical order...
- KALDI_WARN << "Script file " << PrintableRxfilename(script_rxfilename_)
- << " contains duplicate key " << script_[i].first;
- state_ = kNotReadScript;
- return false;
- }
- }
- state_ = kReadScript;
- return true;
- }
-
- virtual bool IsOpen() const { return (state_ == kReadScript); }
-
- virtual bool Close() {
- if (!IsOpen())
- KALDI_ERR << "Close() called on TableWriter that was not open.";
- state_ = kUninitialized;
- last_found_ = 0;
- script_.clear();
- return true;
- }
-
- // Write returns true on success, false on failure, but
- // some errors may not be detected till we call Close().
- virtual bool Write(const std::string &key, const T &value) {
- if (!IsOpen())
- KALDI_ERR << "TableWriter: Write called on invalid stream";
-
- if (!IsToken(key)) // e.g. empty string or has spaces...
- KALDI_ERR << "TableWriter: using invalid key " << key;
-
- std::string wxfilename;
- if (!LookupFilename(key, &wxfilename)) {
- if (opts_.permissive) {
- return true; // In permissive mode, it's as if we're writing to /dev/null
- // for missing keys.
- } else {
- KALDI_WARN << "TableWriter: script file "
- << PrintableRxfilename(script_rxfilename_)
- << " has no entry for key "<<key;
- return false;
- }
- }
- Output output;
- if (!output.Open(wxfilename, opts_.binary, false)) {
- // Open in the text/binary mode (on Windows) given by member var. "binary"
- // (obtained from wspecifier), but do not put the binary-mode header (it
- // will be written, if needed, by the Holder::Write function.)
- KALDI_WARN << "TableWriter: failed to open stream: "
- << PrintableWxfilename(wxfilename);
- return false;
- }
- if (!Holder::Write(output.Stream(), opts_.binary, value)
- || !output.Close()) {
- KALDI_WARN << "TableWriter: failed to write data to "
- << PrintableWxfilename(wxfilename);
- return false;
- }
- return true;
- }
-
- // Flush does nothing in this implementation, there is nothing to flush.
- virtual void Flush() { }
-
-
- virtual ~TableWriterScriptImpl() {
- // Nothing to do in destructor.
- }
-
- private:
- // Note: this function is almost the same as in RandomAccessTableReaderScriptImpl.
- bool LookupFilename(const std::string &key, std::string *wxfilename) {
- // First, an optimization: if we're going consecutively, this will
- // make the lookup very fast.
- last_found_++;
- if (last_found_ < script_.size() && script_[last_found_].first == key) {
- *wxfilename = script_[last_found_].second;
- return true;
- }
- std::pair<std::string, std::string> pr(key, ""); // Important that ""
- // compares less than or equal to any string, so lower_bound points to the
- // element that has the same key.
- typedef typename std::vector<std::pair<std::string, std::string> >::const_iterator
- IterType;
- IterType iter = std::lower_bound(script_.begin(), script_.end(), pr);
- if (iter != script_.end() && iter->first == key) {
- last_found_ = iter - script_.begin();
- *wxfilename = iter->second;
- return true;
- } else {
- return false;
- }
- }
-
-
- WspecifierOptions opts_;
- std::string wspecifier_;
- std::string script_rxfilename_;
-
- // the script_ variable contains pairs of (key, filename), sorted using
- // std::sort. This can be used with binary_search to look up filenames for
- // writing. If this becomes inefficient we can use std::unordered_map (but I
- // suspect this wouldn't be significantly faster & would use more memory).
- // If memory becomes a problem here, the user should probably be passing
- // only the relevant part of the scp file rather than expecting us to get too
- // clever in the code.
- std::vector<std::pair<std::string, std::string> > script_;
- size_t last_found_; // This is for an optimization used in LookupFilename.
-
- enum {
- kUninitialized,
- kReadScript,
- kNotReadScript, // read of script failed.
- } state_;
-};
-
-
-// The implementation of TableWriter we use when writing directly
-// to an archive plus an associated scp.
-template<class Holder>
-class TableWriterBothImpl: public TableWriterImplBase<Holder> {
- public:
- typedef typename Holder::T T;
-
- virtual bool Open(const std::string &wspecifier) {
- switch (state_) {
- case kUninitialized:
- break;
- case kWriteError:
- KALDI_ERR << "TableWriter: opening stream, already open with write error.";
- case kOpen: default:
- if (!Close()) // throw because this error may not have been previously detected by user.
- KALDI_ERR << "TableWriter: opening stream, error closing previously open stream.";
- }
- wspecifier_ = wspecifier;
- WspecifierType ws = ClassifyWspecifier(wspecifier,
- &archive_wxfilename_,
- &script_wxfilename_,
- &opts_);
- KALDI_ASSERT(ws == kBothWspecifier); // or wrongly called.
- if (ClassifyWxfilename(archive_wxfilename_) != kFileOutput)
- KALDI_WARN << "When writing to both archive and script, the script file "
- "will generally not be interpreted correctly unless the archive is "
- "an actual file: wspecifier = " << wspecifier;
-
- if (!archive_output_.Open(archive_wxfilename_, opts_.binary, false)) { // false means no binary header.
- state_ = kUninitialized;
- return false;
- }
- if (!script_output_.Open(script_wxfilename_, false, false)) { // first false means text mode:
- // script files always text-mode. second false means don't write header (doesn't matter
- // for text mode).
- archive_output_.Close(); // Don't care about status: error anyway.
- state_ = kUninitialized;
- return false;
- }
- state_ = kOpen;
- return true;
- }
-
- virtual bool IsOpen() const {
- switch (state_) {
- case kUninitialized: return false;
- case kOpen: case kWriteError: return true;
- default: KALDI_ERR << "IsOpen() called on TableWriter in invalid state.";
- }
- return false;
- }
-
- void MakeFilename(typename std::ostream::pos_type streampos, std::string *output) const {
- std::ostringstream ss;
- ss << ':' << streampos;
- KALDI_ASSERT(ss.str() != ":-1");
- *output = archive_wxfilename_ + ss.str();
-
- // e.g. /some/file:12302.
- // Note that we warned if archive_wxfilename_ is not an actual filename;
- // the philosophy is we give the user rope and if they want to hang
- // themselves, with it, fine.
- }
-
- // Write returns true on success, false on failure, but
- // some errors may not be detected till we call Close().
- virtual bool Write(const std::string &key, const T &value) {
- switch (state_) {
- case kOpen: break;
- case kWriteError:
- // user should have known from the last
- // call to Write that there was a problem. Warn about it.
- KALDI_WARN << "TableWriter: writing to non-open TableWriter object.";
- return false;
- case kUninitialized: default:
- KALDI_ERR << "TableWriter: Write called on invalid stream";
- }
- // state is now kOpen or kWriteError.
- if (!IsToken(key)) // e.g. empty string or has spaces...
- KALDI_ERR << "TableWriter: using invalid key " << key;
- std::ostream &archive_os = archive_output_.Stream();
- archive_os << key << ' ';
- typename std::ostream::pos_type archive_os_pos = archive_os.tellp();
- // position at start of Write() to archive. We will record this in the script file.
- std::string offset_rxfilename; // rxfilename with offset into the archive,
- // e.g. some_archive_name.ark:431541423
- MakeFilename(archive_os_pos, &offset_rxfilename);
-
- // Write to the script file first.
- // The idea is that we want to get all the information possible into the
- // script file, to make it easier to unwind errors later.
- std::ostream &script_os = script_output_.Stream();
- script_output_.Stream() << key << ' ' << offset_rxfilename << '\n';
-
- if (!Holder::Write(archive_output_.Stream(), opts_.binary, value)) {
- KALDI_WARN << "TableWriter: write failure to"
- << PrintableWxfilename(archive_wxfilename_);
- state_ = kWriteError;
- return false;
- }
-
- if (script_os.fail()) {
- KALDI_WARN << "TableWriter: write failure to script file detected: "
- << PrintableWxfilename(script_wxfilename_);
- state_ = kWriteError;
- return false;
- }
-
- if (archive_os.fail()) {
- KALDI_WARN << "TableWriter: write failure to archive file detected: "
- << PrintableWxfilename(archive_wxfilename_);
- state_ = kWriteError;
- return false;
- }
-
- if (state_ == kWriteError) return false; // Even if this Write seems to have
- // succeeded, we fail because a previous Write failed and the archive may be
- // corrupted and unreadable.
-
- if (opts_.flush)
- Flush();
- return true;
- }
-
- // Flush will flush any archive; it does not return error status,
- // any errors will be reported on the next Write or Close.
- virtual void Flush() {
- switch (state_) {
- case kWriteError: case kOpen:
- archive_output_.Stream().flush(); // Don't check error status.
- script_output_.Stream().flush(); // Don't check error status.
- return;
- default:
- KALDI_WARN << "TableWriter: Flush called on not-open writer.";
- }
- }
-
- virtual bool Close() {
- if (!this->IsOpen())
- KALDI_ERR << "TableWriter: Close called on a stream that was not open.";
- bool close_success = true;
- if (archive_output_.IsOpen())
- if (!archive_output_.Close()) close_success = false;
- if (script_output_.IsOpen())
- if (!script_output_.Close()) close_success = false;
- bool ans = close_success && (state_ != kWriteError);
- state_ = kUninitialized;
- return ans;
- }
-
- TableWriterBothImpl(): state_(kUninitialized) {}
-
- // May throw on write error if Close() was not called.
- // User can get the error status by calling Close().
- virtual ~TableWriterBothImpl() {
- if (!IsOpen()) return;
- else if (!Close())
- KALDI_ERR << "At TableWriter destructor: Write failed or stream close failed: "
- << wspecifier_;
- }
-
- private:
- Output archive_output_;
- Output script_output_;
- WspecifierOptions opts_;
- std::string archive_wxfilename_;
- std::string script_wxfilename_;
- std::string wspecifier_;
- enum { // is stream open?
- kUninitialized, // no
- kOpen, // yes
- kWriteError, // yes
- } state_;
-};
-
-
-template<class Holder>
-TableWriter<Holder>::TableWriter(const std::string &wspecifier): impl_(NULL) {
- if (wspecifier != "" && !Open(wspecifier)) {
- KALDI_ERR << "TableWriter: failed to write to "
- << wspecifier;
- }
-}
-
-template<class Holder>
-bool TableWriter<Holder>::IsOpen() const {
- return (impl_ != NULL);
-}
-
-
-template<class Holder>
-bool TableWriter<Holder>::Open(const std::string &wspecifier) {
-
- if (IsOpen()) {
- if (!Close()) // call Close() yourself to suppress this exception.
- KALDI_ERR << "TableWriter::Open, failed to close previously open writer.";
- }
- KALDI_ASSERT(impl_ == NULL);
- WspecifierType wtype = ClassifyWspecifier(wspecifier, NULL, NULL, NULL);
- switch (wtype) {
- case kBothWspecifier:
- impl_ = new TableWriterBothImpl<Holder>();
- break;
- case kArchiveWspecifier:
- impl_ = new TableWriterArchiveImpl<Holder>();
- break;
- case kScriptWspecifier:
- impl_ = new TableWriterScriptImpl<Holder>();
- break;
- case kNoWspecifier: default:
- KALDI_WARN << "ClassifyWspecifier: invalid wspecifier " << wspecifier;
- return false;
- }
- if (impl_->Open(wspecifier)) return true;
- else { // The class will have printed a more specific warning.
- delete impl_;
- impl_ = NULL;
- return false;
- }
-}
-
-template<class Holder>
-void TableWriter<Holder>::Write(const std::string &key,
- const T &value) const {
- CheckImpl();
- if (!impl_->Write(key, value))
- KALDI_ERR << "Error in TableWriter::Write";
- // More specific warning will have
- // been printed in the Write function.
-}
-
-template<class Holder>
-void TableWriter<Holder>::Flush() {
- CheckImpl();
- impl_->Flush();
-}
-
-template<class Holder>
-bool TableWriter<Holder>::Close() {
- CheckImpl();
- bool ans = impl_->Close();
- delete impl_; // We don't keep around non-open impl_ objects [c.f. definition of IsOpen()]
- impl_ = NULL;
- return ans;
-}
-
-template<class Holder>
-TableWriter<Holder>::~TableWriter() {
- if (IsOpen() && !Close()) {
- KALDI_ERR << "Error closing TableWriter [in destructor].";
- }
-}
-
-
-// Types of RandomAccessTableReader:
-// In principle, we would like to have four types of RandomAccessTableReader:
-// the 4 combinations [scp, archive], [seekable, not-seekable],
-// where if something is seekable we only store a file offset. However,
-// it seems sufficient for now to only implement two of these, in both
-// cases assuming it's not seekable so we never store file offsets and always
-// store either the scp line or the data in the archive. The reasons are:
-// (1)
-// For scp files, storing the actual entry is not that much more expensive
-// than storing the file offsets (since the entries are just filenames), and
-// avoids a lot of fseek operations that might be expensive.
-// (2)
-// For archive files, there is no real reason, if you have the archive file
-// on disk somewhere, why you wouldn't access it via its associated scp.
-// [i.e. write it as ark, scp]. The main reason to read archives directly
-// is if they are part of a pipe, and in this case it's not seekable, so
-// we implement only this case.
-//
-// Note that we will rarely in practice have to keep in memory everything in
-// the archive, as long as things are only read once from the archive (the
-// "o, " or "once" option) and as long as we keep our keys in sorted order; to take
-// advantage of this we need the "s, " (sorted) option, so we would read archives
-// as e.g. "s, o, ark:-" (this is the rspecifier we would use if it was the
-// standard input and these conditions held).
-
-template<class Holder> class RandomAccessTableReaderImplBase {
- public:
- typedef typename Holder::T T;
-
- virtual bool Open(const std::string &rspecifier) = 0;
-
- virtual bool HasKey(const std::string &key) = 0;
-
- virtual const T &Value(const std::string &key) = 0;
-
- virtual bool Close() = 0;
-
- virtual ~RandomAccessTableReaderImplBase() {}
-};
-
-
-// Implementation of RandomAccessTableReader for a script file; for simplicity we
-// just read it in all in one go, as it's unlikely someone would generate this
-// from a pipe. In principle we could read it on-demand as for the archives, but
-// this would probably be overkill.
-
-// Note: the code for this this class is similar to TableWriterScriptImpl:
-// try to keep them in sync.
-template<class Holder>
-class RandomAccessTableReaderScriptImpl:
- public RandomAccessTableReaderImplBase<Holder> {
-
- public:
- typedef typename Holder::T T;
-
- RandomAccessTableReaderScriptImpl(): last_found_(0), state_(kUninitialized) {}
-
- virtual bool Open(const std::string &rspecifier) {
- switch (state_) {
- case kNotHaveObject: case kHaveObject: case kGaveObject:
- KALDI_ERR << " Opening already open RandomAccessTableReader: call Close first.";
- case kUninitialized: case kNotReadScript:
- break;
- }
- rspecifier_ = rspecifier;
- RspecifierType rs = ClassifyRspecifier(rspecifier,
- &script_rxfilename_,
- &opts_);
- KALDI_ASSERT(rs == kScriptRspecifier); // or wrongly called.
- KALDI_ASSERT(script_.empty()); // no way it could be nonempty at this point.
-
- if (! ReadScriptFile(script_rxfilename_,
- true, // print any warnings
- &script_)) { // error reading script file or invalid format
- state_ = kNotReadScript;
- return false; // no need to print further warnings. user gets the error.
- }
-
- rspecifier_ = rspecifier;
- // If opts_.sorted, the user has asserted that the keys are already sorted.
- // Although we could easily sort them, we want to let the user know of this
- // mistake. This same mistake could have serious effects if used with an
- // archive rather than a script.
- if (!opts_.sorted)
- std::sort(script_.begin(), script_.end());
- for (size_t i = 0; i+1 < script_.size(); i++) {
- if (script_[i].first.compare(script_[i+1].first) >= 0) {
- // script[i] not < script[i+1] in lexical order...
- bool same = (script_[i].first == script_[i+1].first);
- KALDI_WARN << "Script file " << PrintableRxfilename(script_rxfilename_)
- << (same ? " contains duplicate key: " :
- " is not sorted (remove s, option or add ns, option): key is ")
- << script_[i].first;
- state_ = kNotReadScript;
- return false;
- }
- }
- state_ = kNotHaveObject;
- return true;
- }
-
- virtual bool IsOpen() const {
- return (state_ == kNotHaveObject || state_ == kHaveObject ||
- state_ == kGaveObject);
- }
-
- virtual bool Close() {
- if (!IsOpen())
- KALDI_ERR << "Close() called on RandomAccessTableReader that was not open.";
- holder_.Clear();
- state_ = kUninitialized;
- last_found_ = 0;
- script_.clear();
- current_key_ = "";
- // This one cannot fail because any errors of a "global"
- // nature would have been detected when we did Open().
- // With archives it's different.
- return true;
- }
-
- virtual bool HasKey(const std::string &key) {
- bool preload = opts_.permissive;
- // In permissive mode, we have to check that we can read
- // the scp entry before we assert that the key is there.
- return HasKeyInternal(key, preload);
- }
-
-
- // Write returns true on success, false on failure, but
- // some errors may not be detected till we call Close().
- virtual const T& Value(const std::string &key) {
-
- if (!IsOpen())
- KALDI_ERR << "Value() called on non-open object.";
-
- if (!((state_ == kHaveObject || state_ == kGaveObject)
- && key == current_key_)) { // Not already stored...
- bool has_key = HasKeyInternal(key, true); // preload.
- if (!has_key)
- KALDI_ERR << "Could not get item for key " << key
- << ", rspecifier is " << rspecifier_ << "[to ignore this, "
- << "add the p, (permissive) option to the rspecifier.";
- KALDI_ASSERT(state_ == kHaveObject && key == current_key_);
- }
-
- if (state_ == kHaveObject) {
- state_ = kGaveObject;
- if (opts_.once) MakeTombstone(key); // make sure that future lookups fail.
- return holder_.Value();
- } else { // state_ == kGaveObject
- if (opts_.once)
- KALDI_ERR << "Value called twice for the same key and ,o (once) option "
- << "is used: rspecifier is " << rspecifier_;
- return holder_.Value();
- }
- }
-
- virtual ~RandomAccessTableReaderScriptImpl() {
- if (state_ == kHaveObject || state_ == kGaveObject)
- holder_.Clear();
- }
-
- private:
- // HasKeyInternal when called with preload == false just tells us whether the
- // key is in the scp. With preload == true, which happens when the ,p
- // (permissive) option is given in the rspecifier, it will also check that we
- // can preload the object from disk (loading from the rxfilename in the scp),
- // and only return true if we can. This function is called both from HasKey
- // and from Value().
- virtual bool HasKeyInternal(const std::string &key, bool preload) {
- switch (state_) {
- case kUninitialized: case kNotReadScript:
- KALDI_ERR << "HasKey called on RandomAccessTableReader object that is not open.";
- case kHaveObject: case kGaveObject:
- if (key == current_key_)
- return true;
- break;
- default: break;
- }
- KALDI_ASSERT(IsToken(key));
- size_t key_pos = 0; // set to zero to suppress warning
- bool ans = LookupKey(key, &key_pos);
- if (!ans) return false;
- else {
- // First do a check regarding the "once" option.
- if (opts_.once && script_[key_pos].second == "") { // A "tombstone"; user is asking about
- // already-read key.
- KALDI_ERR << "HasKey called on key whose value was already read, and "
- " you specified the \"once\" option (o, ): try removing o, or adding no, :"
- " rspecifier is " << rspecifier_;
- }
- if (!preload)
- return true; // we have the key.
- else { // preload specified, so we have to pre-load the object before returning true.
- if (!input_.Open(script_[key_pos].second)) {
- KALDI_WARN << "Error opening stream "
- << PrintableRxfilename(script_[key_pos].second);
- return false;
- } else {
- // Make sure holder empty.
- if (state_ == kHaveObject || state_ == kGaveObject)
- holder_.Clear();
- if (holder_.Read(input_.Stream())) {
- state_ = kHaveObject;
- current_key_ = key;
- return true;
- } else {
- KALDI_WARN << "Error reading object from "
- "stream " << PrintableRxfilename(script_[key_pos].second);
- state_ = kNotHaveObject;
- return false;
- }
- }
- }
- }
- }
- void MakeTombstone(const std::string &key) {
- size_t offset;
- if (!LookupKey(key, &offset))
- KALDI_ERR << "RandomAccessTableReader object in inconsistent state.";
- else
- script_[offset].second = "";
- }
- bool LookupKey(const std::string &key, size_t *script_offset) {
- // First, an optimization: if we're going consecutively, this will
- // make the lookup very fast. Since we may call HasKey and then
- // Value(), which both may look up the key, we test if either the
- // current or next position are correct.
- if (last_found_ < script_.size() && script_[last_found_].first == key) {
- *script_offset = last_found_;
- return true;
- }
- last_found_++;
- if (last_found_ < script_.size() && script_[last_found_].first == key) {
- *script_offset = last_found_;
- return true;
- }
- std::pair<std::string, std::string> pr(key, ""); // Important that ""
- // compares less than or equal to any string, so lower_bound points to the
- // element that has the same key.
- typedef typename std::vector<std::pair<std::string, std::string> >::const_iterator
- IterType;
- IterType iter = std::lower_bound(script_.begin(), script_.end(), pr);
- if (iter != script_.end() && iter->first == key) {
- last_found_ = *script_offset = iter - script_.begin();
- return true;
- } else {
- return false;
- }
- }
-
-
- Input input_; // Use the same input_ object for reading each file, in case
- // the scp specifies offsets in an archive (so we can keep the same file open).
- RspecifierOptions opts_;
- std::string rspecifier_; // rspecifier used to open it; used in debug messages
- std::string script_rxfilename_; // filename of script.
-
- std::string current_key_; // Key of object in holder_
- Holder holder_;
-
- // the script_ variable contains pairs of (key, filename), sorted using
- // std::sort. This can be used with binary_search to look up filenames for
- // writing. If this becomes inefficient we can use std::unordered_map (but I
- // suspect this wouldn't be significantly faster & would use more memory).
- // If memory becomes a problem here, the user should probably be passing
- // only the relevant part of the scp file rather than expecting us to get too
- // clever in the code.
- std::vector<std::pair<std::string, std::string> > script_;
- size_t last_found_; // This is for an optimization used in FindFilename.
-
- enum { // [Do we have [Does holder_
- // script_ set up?] contain object?]
- kUninitialized, // no no
- kNotReadScript, // no no
- kNotHaveObject, // yes no
- kHaveObject, // yes yes
- kGaveObject, // yes yes
- // [kGaveObject is as kHaveObject but we note that the
- // user has already read it; this is for checking that
- // if "once" is specified, the user actually only reads
- // it once.
- } state_;
-
-};
-
-
-
-
-// This is the base-class (with some implemented functions) for the
-// implementations of RandomAccessTableReader when it's an archive. This
-// base-class handles opening the files, storing the state of the reading
-// process, and loading objects. This is the only case in which we have
-// an intermediate class in the hierarchy between the virtual ImplBase
-// class and the actual Impl classes.
-// The child classes vary in the assumptions regarding sorting, etc.
-
-template<class Holder> class RandomAccessTableReaderArchiveImplBase:
- public RandomAccessTableReaderImplBase<Holder> {
- public:
- typedef typename Holder::T T;
-
- RandomAccessTableReaderArchiveImplBase(): holder_(NULL), state_(kUninitialized) { }
-
- virtual bool Open(const std::string &rspecifier) {
- if (state_ != kUninitialized) {
- if (! this->Close()) // call Close() yourself to suppress this exception.
- KALDI_ERR << "TableReader::Open, error closing previous input.";
- }
- rspecifier_ = rspecifier;
- RspecifierType rs = ClassifyRspecifier(rspecifier, &archive_rxfilename_,
- &opts_);
- KALDI_ASSERT(rs == kArchiveRspecifier);
-
- // NULL means don't expect binary-mode header
- bool ans;
- if (Holder::IsReadInBinary())
- ans = input_.Open(archive_rxfilename_, NULL);
- else
- ans = input_.OpenTextMode(archive_rxfilename_);
- if (!ans) { // header.
- KALDI_WARN << "TableReader: failed to open stream "
- << PrintableRxfilename(archive_rxfilename_);
- state_ = kUninitialized; // Failure on Open
- return false; // User should print the error message.
- } else {
- state_ = kNoObject;
- }
- return true;
- }
-
- // ReadNextObject() requires that the state be kNoObject,
- // and it will try read the next object. If it succeeds,
- // it sets the state to kHaveObject, and
- // cur_key_ and holder_ have the key and value. If it fails,
- // it sets the state to kError or kEof.
- void ReadNextObject() {
- if (state_ != kNoObject)
- KALDI_ERR << "TableReader: ReadNextObject() called from wrong state."; // Code error
- // somewhere in this class or a child class.
- std::istream &is = input_.Stream();
- is.clear(); // Clear any fail bits that may have been set... just in case
- // this happened in the Read function.
- is >> cur_key_; // This eats up any leading whitespace and gets the string.
- if (is.eof()) {
- state_ = kEof;
- return;
- }
- if (is.fail()) { // This shouldn't really happen, barring file-system errors.
- KALDI_WARN << "Error reading archive: rspecifier is " << rspecifier_;
- state_ = kError;
- return;
- }
- int c;
- if ((c = is.peek()) != ' ' && c != '\t' && c != '\n') { // We expect a space ' ' after the key.
- // We also allow tab, just so we can read archives generated by scripts that may
- // not be fully aware of how this format works.
- KALDI_WARN << "Invalid archive file format: expected space after key " <<cur_key_
- <<", got character "
- << CharToString(static_cast<char>(is.peek())) << ", reading archive "
- << PrintableRxfilename(archive_rxfilename_);
- state_ = kError;
- return;
- }
- if (c != '\n') is.get(); // Consume the space or tab.
- holder_ = new Holder;
- if (holder_->Read(is)) {
- state_ = kHaveObject;
- return;
- } else {
- KALDI_WARN << "Object read failed, reading archive "
- << PrintableRxfilename(archive_rxfilename_);
- state_ = kError;
- delete holder_;
- holder_ = NULL;
- return;
- }
- }
-
- virtual bool IsOpen() const {
- switch (state_) {
- case kEof: case kError: case kHaveObject: case kNoObject: return true;
- case kUninitialized: return false;
- default: KALDI_ERR << "IsOpen() called on invalid object.";
- return false;
- }
- }
-
- // Called by the child-class virutal Close() functions; does the
- // shared parts of the cleanup.
- bool CloseInternal() {
- if (! this->IsOpen())
- KALDI_ERR << "Close() called on TableReader twice or otherwise wrongly.";
- if (input_.IsOpen())
- input_.Close();
- if (state_ == kHaveObject) {
- KALDI_ASSERT(holder_ != NULL);
- delete holder_;
- holder_ = NULL;
- } else KALDI_ASSERT(holder_ == NULL);
- bool ans = (state_ != kError);
- state_ = kUninitialized;
- if (!ans && opts_.permissive) {
- KALDI_WARN << "Error state detected closing reader. "
- << "Ignoring it because you specified permissive mode.";
- return true;
- }
- return ans;
- }
-
- ~RandomAccessTableReaderArchiveImplBase() {
- // The child class has the responsibility to call CloseInternal().
- KALDI_ASSERT(state_ == kUninitialized && holder_ == NULL);
- }
- private:
- Input input_; // Input object for the archive
- protected:
- // The variables below are accessed by child classes.
-
- std::string cur_key_; // current key (if state == kHaveObject).
- Holder *holder_; // Holds the object we just read (if state == kHaveObject)
-
- std::string rspecifier_;
- std::string archive_rxfilename_;
- RspecifierOptions opts_;
-
- enum { // [The state of the reading process] [does holder_ [is input_
- // have object] open]
- kUninitialized, // Uninitialized or closed no no
- kNoObject, // Do not have object in holder_ no yes
- kHaveObject, // Have object in holder_ yes yes
- kEof, // End of file no yes
- kError, // Some kind of error-state in the reading. no yes
- } state_;
-
-};
-
-
-// RandomAccessTableReaderDSortedArchiveImpl (DSorted for "doubly sorted") is the
-// implementation for random-access reading of archives when both the archive,
-// and the calling code, are in sorted order (i.e. we ask for the keys in sorted
-// order). This is when the s and cs options are both given. It only ever has
-// to keep one object in memory. It inherits from
-// RandomAccessTableReaderArchiveImplBase which implements the common parts of
-// RandomAccessTableReader that are used when it's an archive we're reading from.
-
-template<class Holder> class RandomAccessTableReaderDSortedArchiveImpl:
- public RandomAccessTableReaderArchiveImplBase<Holder> {
- using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized;
- using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject;
- using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject;
- using RandomAccessTableReaderArchiveImplBase<Holder>::kEof;
- using RandomAccessTableReaderArchiveImplBase<Holder>::kError;
- using RandomAccessTableReaderArchiveImplBase<Holder>::state_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::opts_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::holder_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject;
- public:
- typedef typename Holder::T T;
-
- RandomAccessTableReaderDSortedArchiveImpl() { }
-
- virtual bool Close() {
- // We don't have anything additional to clean up, so just
- // call generic base-class one.
- return this->CloseInternal();
- }
-
- virtual bool HasKey(const std::string &key) {
- return FindKeyInternal(key);
- }
- virtual const T & Value(const std::string &key) {
- if (FindKeyInternal(key)) {
- KALDI_ASSERT(this->state_ == kHaveObject && key == this->cur_key_
- && holder_ != NULL);
- return this->holder_->Value();
- } else {
- KALDI_ERR << "Value() called but no such key " << key
- << " in archive " << PrintableRxfilename(archive_rxfilename_);
- return *(const T*)NULL; // keep compiler happy.
- }
- }
-
- virtual ~RandomAccessTableReaderDSortedArchiveImpl() {
- if (this->IsOpen())
- if (!Close()) // more specific warning will already have been printed.
- // we are in some kind of error state & user did not find out by
- // calling Close().
- KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is "
- << rspecifier_;
- }
- private:
- // FindKeyInternal tries to find the key by calling "ReadNextObject()"
- // as many times as necessary till we get to it. It is called from
- // both FindKey and Value().
- bool FindKeyInternal(const std::string &key) {
- // First check that the user is calling us right: should be
- // in sorted order. If not, error.
- if (!last_requested_key_.empty()) {
- if (key.compare(last_requested_key_) < 0) { // key < last_requested_key_
- KALDI_ERR << "You provided the \"cs\" option "
- << "but are not calling with keys in sorted order: "
- << key << " < " << last_requested_key_ << ": rspecifier is "
- << rspecifier_;
- }
- }
- // last_requested_key_ is just for debugging of order of calling.
- last_requested_key_ = key;
-
- if (state_ == kNoObject)
- ReadNextObject(); // This can only happen
- // once, the first time someone calls HasKey() or Value(). We don't
- // do it in the initializer to stop the program hanging too soon,
- // if reading from a pipe.
-
- if (state_ == kEof || state_ == kError) return false;
-
- if (state_ == kUninitialized)
- KALDI_ERR << "Trying to access a RandomAccessTableReader object that is not open.";
-
- std::string last_key_; // To check that
- // the archive we're reading is in sorted order.
- while (1) {
- KALDI_ASSERT(state_ == kHaveObject);
- int compare = key.compare(cur_key_);
- if (compare == 0) { // key == key_
- return true; // we got it..
- } else if (compare < 0) { // key < cur_key_, so we already read past the
- // place where we want to be. This implies that we will never find it
- // [due to the sorting etc., this means it just isn't in the archive].
- return false;
- } else { // compare > 0, key > cur_key_. We need to read further ahead.
- last_key_ = cur_key_;
- // read next object.. we have to set state to kNoObject first.
- KALDI_ASSERT(holder_ != NULL);
- delete holder_;
- holder_ = NULL;
- state_ = kNoObject;
- ReadNextObject();
- if (state_ != kHaveObject)
- return false; // eof or read error.
- if (cur_key_.compare(last_key_) <= 0) {
- KALDI_ERR << "You provided the \"s\" option "
- << " (sorted order), but keys are out of order or duplicated: "
- << last_key_ << " is followed by " << cur_key_
- << ": rspecifier is " << rspecifier_;
- }
- }
- }
- }
-
- /// Last string provided to HasKey() or Value();
- std::string last_requested_key_;
-
-
-};
-
-// RandomAccessTableReaderSortedArchiveImpl is for random-access reading of
-// archives when the user specified the sorted (s) option but not the
-// called-sorted (cs) options.
-template<class Holder> class RandomAccessTableReaderSortedArchiveImpl:
- public RandomAccessTableReaderArchiveImplBase<Holder> {
- using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized;
- using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject;
- using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject;
- using RandomAccessTableReaderArchiveImplBase<Holder>::kEof;
- using RandomAccessTableReaderArchiveImplBase<Holder>::kError;
- using RandomAccessTableReaderArchiveImplBase<Holder>::state_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::opts_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::holder_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject;
-
- public:
- typedef typename Holder::T T;
-
- RandomAccessTableReaderSortedArchiveImpl():
- last_found_index_(static_cast<size_t>(-1)),
- pending_delete_(static_cast<size_t>(-1)) { }
-
- virtual bool Close() {
- for (size_t i = 0; i < seen_pairs_.size(); i++)
- if (seen_pairs_[i].second)
- delete seen_pairs_[i].second;
- seen_pairs_.clear();
-
- pending_delete_ = static_cast<size_t>(-1);
- last_found_index_ = static_cast<size_t>(-1);
-
- return this->CloseInternal();
- }
- virtual bool HasKey(const std::string &key) {
- HandlePendingDelete();
- size_t index;
- bool ans = FindKeyInternal(key, &index);
- if (ans && opts_.once && seen_pairs_[index].second == NULL) {
- // Just do a check RE the once option. "&&opts_.once" is for
- // efficiency since this can only happen in that case.
- KALDI_ERR << "Error: HasKey called after Value() already called for "
- << " that key, and once (o) option specified: rspecifier is "
- << rspecifier_;
- }
- return ans;
- }
- virtual const T & Value(const std::string &key) {
- HandlePendingDelete();
- size_t index;
- if (FindKeyInternal(key, &index)) {
- if (seen_pairs_[index].second == NULL) { // can happen if opts.once_
- KALDI_ERR << "Error: Value() called more than once for key "
- << key << " and once (o) option specified: rspecifier is "
- << rspecifier_;
- }
- if (opts_.once)
- pending_delete_ = index; // mark this index to be deleted on next call.
- return seen_pairs_[index].second->Value();
- } else {
- KALDI_ERR << "Value() called but no such key " << key
- << " in archive " << PrintableRxfilename(archive_rxfilename_);
- return *(const T*)NULL; // keep compiler happy.
- }
- }
- virtual ~RandomAccessTableReaderSortedArchiveImpl() {
- if (this->IsOpen())
- if (!Close()) // more specific warning will already have been printed.
- // we are in some kind of error state & user did not find out by
- // calling Close().
- KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is "
- << rspecifier_;
- }
- private:
- void HandlePendingDelete() {
- const size_t npos = static_cast<size_t>(-1);
- if (pending_delete_ != npos) {
- KALDI_ASSERT(pending_delete_ < seen_pairs_.size());
- KALDI_ASSERT(seen_pairs_[pending_delete_].second != NULL);
- delete seen_pairs_[pending_delete_].second;
- seen_pairs_[pending_delete_].second = NULL;
- pending_delete_ = npos;
- }
- }
-
- // FindKeyInternal tries to find the key in the array "seen_pairs_".
- // If it is not already there, it reads ahead as far as necessary
- // to determine whether we have the key or not. On success it returns
- // true and puts the index into the array seen_pairs_, into "index";
- // on failure it returns false.
- // It will leave the state as either kNoObject, kEof or kError.
- // FindKeyInternal does not do any checking about whether you are asking
- // about a key that has been already given (with the "once" option).
- // That is the user's responsibility.
-
- bool FindKeyInternal(const std::string &key, size_t *index) {
- // First, an optimization in case the previous call was for the
- // same key, and we found it.
- if (last_found_index_ < seen_pairs_.size()
- && seen_pairs_[last_found_index_].first == key) {
- *index = last_found_index_;
- return true;
- }
-
- if (state_ == kUninitialized)
- KALDI_ERR << "Trying to access a RandomAccessTableReader object that is not open.";
-
- // Step one is to see whether we have to read ahead for the object..
- // Note, the possible states right now are kNoObject, kEof or kError.
- // We are never in the state kHaveObject except just after calling
- // ReadNextObject().
- bool looped = false;
- while (state_ == kNoObject &&
- (seen_pairs_.empty() || key.compare(seen_pairs_.back().first) > 0)) {
- looped = true;
- // Read this as:
- // while ( the stream is potentially good for reading &&
- // ([got no keys] || key > most_recent_key) ) { ...
- // Try to read a new object.
- // Note that the keys in seen_pairs_ are ordered from least to greatest.
- ReadNextObject();
- if (state_ == kHaveObject) { // Successfully read object.
- if (!seen_pairs_.empty() && // This is just a check.
- cur_key_.compare(seen_pairs_.back().first) <= 0) {
- // read the expression above as: !( cur_key_ > previous_key).
- // it means we are not in sorted order [the user specified that we
- // are, or we would not be using this implementation].
- KALDI_ERR << "You provided the sorted (s) option but keys in archive "
- << PrintableRxfilename(archive_rxfilename_) << " are not "
- << "in sorted order: " << seen_pairs_.back().first
- << " is followed by " << cur_key_;
- }
- KALDI_ASSERT(holder_ != NULL);
- seen_pairs_.push_back(std::make_pair(cur_key_, holder_));
- holder_ = NULL;
- state_ = kNoObject;
- }
- }
- if (looped) { // We only need to check the last element of the seen_pairs_ array,
- // since we would not have read more after getting "key".
- if (!seen_pairs_.empty() && seen_pairs_.back().first == key) {
- last_found_index_ = *index = seen_pairs_.size() - 1;
- return true;
- } else return false;
- }
- // Now we have do an actual binary search in the seen_pairs_ array.
- std::pair<std::string, Holder*> pr(key, static_cast<Holder*>(NULL));
- typename std::vector<std::pair<std::string, Holder*> >::iterator
- iter = std::lower_bound(seen_pairs_.begin(), seen_pairs_.end(),
- pr, PairCompare());
- if (iter != seen_pairs_.end() &&
- key == iter->first) {
- last_found_index_ = *index = (iter - seen_pairs_.begin());
- return true;
- } else return false;
- }
-
- // These are the pairs of (key, object) we have read. We keep all the keys we
- // have read but the actual objects (if they are stored with pointers inside
- // the Holder object) may be deallocated if once == true, and the Holder
- // pointer set to NULL.
- std::vector<std::pair<std::string, Holder*> > seen_pairs_;
- size_t last_found_index_; // An optimization s.t. if FindKeyInternal called twice with
- // same key (as it often will), it doesn't have to do the key search twice.
- size_t pending_delete_; // If opts_.once == true, this is the index of
- // element of seen_pairs_ that is pending deletion.
- struct PairCompare {
- // PairCompare is the Less-than operator for the pairs of(key, Holder).
- // compares the keys.
- inline bool operator() (const std::pair<std::string, Holder*> &pr1,
- const std::pair<std::string, Holder*> &pr2) {
- return (pr1.first.compare(pr2.first) < 0);
- }
- };
-};
-
-
-
-// RandomAccessTableReaderUnsortedArchiveImpl is for random-access reading of
-// archives when the user does not specify the sorted (s) option (in this case
-// the called-sorted, or "cs" option, is ignored). This is the least efficient
-// of the random access archive readers, in general, but it can be as efficient
-// as the others, in speed, memory and latency, if the "once" option is specified
-// and it happens that the keys of the archive are the same as the keys the code
-// is called with (to HasKey() and Value()), and in the same order. However, if
-// you ask it for a key that's not present it will have to read the archive till
-// the end and store it all in memory.
-
-template<class Holder> class RandomAccessTableReaderUnsortedArchiveImpl:
- public RandomAccessTableReaderArchiveImplBase<Holder> {
- using RandomAccessTableReaderArchiveImplBase<Holder>::kUninitialized;
- using RandomAccessTableReaderArchiveImplBase<Holder>::kHaveObject;
- using RandomAccessTableReaderArchiveImplBase<Holder>::kNoObject;
- using RandomAccessTableReaderArchiveImplBase<Holder>::kEof;
- using RandomAccessTableReaderArchiveImplBase<Holder>::kError;
- using RandomAccessTableReaderArchiveImplBase<Holder>::state_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::opts_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::cur_key_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::holder_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::rspecifier_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::archive_rxfilename_;
- using RandomAccessTableReaderArchiveImplBase<Holder>::ReadNextObject;
-
- typedef typename Holder::T T;
-
- public:
- RandomAccessTableReaderUnsortedArchiveImpl(): to_delete_iter_(map_.end()),
- to_delete_iter_valid_(false)
- {
- map_.max_load_factor(0.5); // make it quite empty -> quite efficient.
- // default seems to be 1.
- }
-
- virtual bool Close() {
- for (typename MapType::iterator iter = map_.begin();
- iter != map_.end();
- ++iter) {
- if (iter->second)
- delete iter->second;
- }
- map_.clear();
- first_deleted_string_ = "";
- to_delete_iter_valid_ = false;
- return this->CloseInternal();
- }
-
- virtual bool HasKey(const std::string &key) {
- HandlePendingDelete();
- return FindKeyInternal(key, NULL);
- }
- virtual const T & Value(const std::string &key) {
- HandlePendingDelete();
- const T *ans_ptr = NULL;
- if (FindKeyInternal(key, &ans_ptr))
- return *ans_ptr;
- else
- KALDI_ERR << "Value() called but no such key " << key
- << " in archive " << PrintableRxfilename(archive_rxfilename_);
- return *(const T*)NULL; // keep compiler happy.
- }
- virtual ~RandomAccessTableReaderUnsortedArchiveImpl() {
- if (this->IsOpen())
- if (!Close()) // more specific warning will already have been printed.
- // we are in some kind of error state & user did not find out by
- // calling Close().
- KALDI_ERR << "Error closing RandomAccessTableReader: rspecifier is "
- << rspecifier_;
- }
- private:
- void HandlePendingDelete() {
- if (to_delete_iter_valid_) {
- to_delete_iter_valid_ = false;
- delete to_delete_iter_->second; // Delete Holder object.
- if (first_deleted_string_.length() == 0)
- first_deleted_string_ = to_delete_iter_->first;
- map_.erase(to_delete_iter_); // delete that element.
- }
- }
-
- // FindKeyInternal tries to find the key in the map "map_"
- // If it is not already there, it reads ahead either until it finds the
- // key, or until end of file. If called with value_ptr == NULL,
- // it assumes it's called from HasKey() and just returns true or false
- // and doesn't otherwise have side effects. If called with value_ptr !=
- // NULL, it assumes it's called from Value(). Thus, it will crash
- // if it cannot find the key. If it can find it it puts its address in
- // *value_ptr, and if opts_once == true it will mark that element of the
- // map to be deleted.
-
- bool FindKeyInternal(const std::string &key, const T **value_ptr = NULL) {
- typename MapType::iterator iter = map_.find(key);
- if (iter != map_.end()) { // Found in the map...
- if (value_ptr == NULL) { // called from HasKey
- return true; // this is all we have to do.
- } else {
- *value_ptr = &(iter->second->Value());
- if (opts_.once) { // value won't be needed again, so mark
- // for deletion.
- to_delete_iter_ = iter; // pending delete.
- KALDI_ASSERT(!to_delete_iter_valid_);
- to_delete_iter_valid_ = true;
- }
- return true;
- }
- }
- while (state_ == kNoObject) {
- ReadNextObject();
- if (state_ == kHaveObject) { // Successfully read object.
- state_ = kNoObject; // we are about to transfer ownership
- // of the object in holder_ to map_.
- // Insert it into map_.
- std::pair<typename MapType::iterator, bool> pr =
- map_.insert(typename MapType::value_type(cur_key_, holder_));
-
- if (!pr.second) { // Was not inserted-- previous element w/ same key
- delete holder_; // map was not changed, no ownership transferred.
- holder_ = NULL;
- KALDI_ERR << "Error in RandomAccessTableReader: duplicate key "
- << cur_key_ << " in archive " << archive_rxfilename_;
- }
- holder_ = NULL; // ownership transferred to map_.
- if (cur_key_ == key) { // the one we wanted..
- if (value_ptr == NULL) { // called from HasKey
- return true;
- } else { // called from Value()
- *value_ptr = &(pr.first->second->Value()); // this gives us the
- // Value() from the Holder in the map.
- if (opts_.once) { // mark for deletion, as won't be needed again.
- to_delete_iter_ = pr.first;
- KALDI_ASSERT(!to_delete_iter_valid_);
- to_delete_iter_valid_ = true;
- }
- return true;
- }
- }
- }
- }
- if (opts_.once && key == first_deleted_string_) {
- KALDI_ERR << "You specified the once (o) option but "
- << "you are calling using key " << key
- << " more than once: rspecifier is " << rspecifier_;
- }
- return false; // We read the entire archive (or got to error state) and didn't
- // find it.
- }
-
- typedef unordered_map<std::string, Holder*, StringHasher> MapType;
- MapType map_;
-
- typename MapType::iterator to_delete_iter_;
- bool to_delete_iter_valid_;
-
- std::string first_deleted_string_; // keep the first string we deleted
- // from map_ (if opts_.once == true). It's for an inexact spot-check that the
- // "once" option isn't being used incorrectly.
-
-};
-
-
-
-
-
-template<class Holder>
-RandomAccessTableReader<Holder>::RandomAccessTableReader(const std::string &rspecifier):
- impl_(NULL) {
- if (rspecifier != "" && !Open(rspecifier))
- KALDI_ERR << "Error opening RandomAccessTableReader object "
- " (rspecifier is: " << rspecifier << ")";
-}
-
-template<class Holder>
-bool RandomAccessTableReader<Holder>::Open(const std::string &rspecifier) {
- if (IsOpen())
- KALDI_ERR << "Already open.";
- RspecifierOptions opts;
- RspecifierType rs = ClassifyRspecifier(rspecifier, NULL, &opts);
- switch (rs) {
- case kScriptRspecifier:
- impl_ = new RandomAccessTableReaderScriptImpl<Holder>();
- break;
- case kArchiveRspecifier:
- if (opts.sorted) {
- if (opts.called_sorted) // "doubly" sorted case.
- impl_ = new RandomAccessTableReaderDSortedArchiveImpl<Holder>();
- else
- impl_ = new RandomAccessTableReaderSortedArchiveImpl<Holder>();
- } else impl_ = new RandomAccessTableReaderUnsortedArchiveImpl<Holder>();
- break;
- case kNoRspecifier: default:
- KALDI_WARN << "Invalid rspecifier: "
- << rspecifier;
- return false;
- }
- if (impl_->Open(rspecifier))
- return true;
- else {
- // Warning will already have been printed.
- delete impl_;
- impl_ = NULL;
- return false;
- }
-}
-
-template<class Holder>
-bool RandomAccessTableReader<Holder>::HasKey(const std::string &key) {
- CheckImpl();
- if (!IsToken(key))
- KALDI_ERR << "Invalid key \"" << key << '"';
- return impl_->HasKey(key);
-}
-
-
-template<class Holder>
-const typename RandomAccessTableReader<Holder>::T&
-RandomAccessTableReader<Holder>::Value(const std::string &key) {
- CheckImpl();
- return impl_->Value(key);
-}
-
-template<class Holder>
-bool RandomAccessTableReader<Holder>::Close() {
- CheckImpl();
- bool ans =impl_->Close();
- delete impl_;
- impl_ = NULL;
- return ans;
-}
-
-template<class Holder>
-RandomAccessTableReader<Holder>::~RandomAccessTableReader() {
- if (IsOpen() && !Close()) // call Close() yourself to stop this being thrown.
- KALDI_ERR << "failure detected in destructor.";
-}
-
-template<class Holder>
-void SequentialTableReader<Holder>::CheckImpl() const {
- if (!impl_) {
- KALDI_ERR << "Trying to use empty SequentialTableReader (perhaps you "
- << "passed the empty string as an argument to a program?)";
- }
-}
-
-template<class Holder>
-void RandomAccessTableReader<Holder>::CheckImpl() const {
- if (!impl_) {
- KALDI_ERR << "Trying to use empty RandomAccessTableReader (perhaps you "
- << "passed the empty string as an argument to a program?)";
- }
-}
-
-template<class Holder>
-void TableWriter<Holder>::CheckImpl() const {
- if (!impl_) {
- KALDI_ERR << "Trying to use empty TableWriter (perhaps you "
- << "passed the empty string as an argument to a program?)";
- }
-}
-
-template<class Holder>
-RandomAccessTableReaderMapped<Holder>::RandomAccessTableReaderMapped(
- const std::string &table_rxfilename,
- const std::string &utt2spk_rxfilename):
- reader_(table_rxfilename), token_reader_(table_rxfilename.empty() ? "" :
- utt2spk_rxfilename),
- utt2spk_rxfilename_(utt2spk_rxfilename) { }
-
-template<class Holder>
-bool RandomAccessTableReaderMapped<Holder>::Open(
- const std::string &table_rxfilename,
- const std::string &utt2spk_rxfilename) {
- if (reader_.IsOpen()) reader_.Close();
- if (token_reader_.IsOpen()) token_reader_.Close();
- KALDI_ASSERT(!table_rxfilename.empty());
- if (!reader_.Open(table_rxfilename)) return false; // will have printed
- // warning internally, probably.
- if (!utt2spk_rxfilename.empty()) {
- if (!token_reader_.Open(utt2spk_rxfilename)) {
- reader_.Close();
- return false;
- }
- }
- return true;
-}
-
-
-template<class Holder>
-bool RandomAccessTableReaderMapped<Holder>::HasKey(const std::string &utt) {
- // We don't check IsOpen, we let the call go through to the member variable
- // (reader_), which will crash with a more informative error message than
- // we can give here, as we don't any longer know the rxfilename.
- if (token_reader_.IsOpen()) { // We need to map the key from utt to spk.
- if (!token_reader_.HasKey(utt))
- KALDI_ERR << "Attempting to read key " << utt << ", which is not present "
- << "in utt2spk map or similar map being read from "
- << PrintableRxfilename(utt2spk_rxfilename_);
- const std::string &spk = token_reader_.Value(utt);
- return reader_.HasKey(spk);
- } else {
- return reader_.HasKey(utt);
- }
-}
-
-template<class Holder>
-const typename Holder::T& RandomAccessTableReaderMapped<Holder>::Value(
- const std::string &utt) {
- if (token_reader_.IsOpen()) { // We need to map the key from utt to spk.
- if (!token_reader_.HasKey(utt))
- KALDI_ERR << "Attempting to read key " << utt << ", which is not present "
- << "in utt2spk map or similar map being read from "
- << PrintableRxfilename(utt2spk_rxfilename_);
- const std::string &spk = token_reader_.Value(utt);
- return reader_.Value(spk);
- } else {
- return reader_.Value(utt);
- }
-}
-
-
-
-/// @}
-
-} // end namespace kaldi
-
-
-
-#endif
diff --git a/kaldi_io/src/kaldi/util/kaldi-table.h b/kaldi_io/src/kaldi/util/kaldi-table.h
deleted file mode 100644
index 6f6cb98..0000000
--- a/kaldi_io/src/kaldi/util/kaldi-table.h
+++ /dev/null
@@ -1,459 +0,0 @@
-// util/kaldi-table.h
-
-// Copyright 2009-2011 Microsoft Corporation
-// 2013 Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_KALDI_TABLE_H_
-#define KALDI_UTIL_KALDI_TABLE_H_
-
-#include <string>
-#include <vector>
-#include <utility>
-
-#include "base/kaldi-common.h"
-#include "util/kaldi-holder.h"
-
-namespace kaldi {
-
-// Forward declarations
-template<class Holder> class RandomAccessTableReaderImplBase;
-template<class Holder> class SequentialTableReaderImplBase;
-template<class Holder> class TableWriterImplBase;
-
-/// \addtogroup table_group
-/// @{
-
-// This header defines the Table classes (RandomAccessTableReader,
-// SequentialTableReader and TableWriter) and explains what the Holder classes,
-// which the Table class requires as a template argument, are like. It also
-// explains the "rspecifier" and "wspecifier" concepts (these are strings that
-// explain how to read/write objects via archives or scp files. A table is
-// conceptually a collection of objects of a particular type T indexed by keys
-// of type std::string (these Keys additionally have an order within each table).
-// The Table classes are templated on a type (call it Holder) such that Holder::T
-// is a typedef equal to T.
-
-// see kaldi-holder.h for detail on the Holder classes.
-
-typedef std::vector<std::string> KeyList;
-
-// Documentation for "wspecifier"
-// "wspecifier" describes how we write a set of objects indexed by keys.
-// The basic, unadorned wspecifiers are as follows:
-//
-// ark:wxfilename
-// scp:rxfilename
-// ark,scp:filename,wxfilename
-// ark,scp:filename,wxfilename
-//
-//
-// We also allow the following modifiers:
-// t means text mode.
-// b means binary mode.
-// f means flush the stream after writing each entry.
-// (nf means don't flush, and isn't very useful as the default is to flush).
-// p means permissive mode, when writing to an "scp" file only: will ignore
-// missing scp entries, i.e. won't write anything for those files but will
-// return success status).
-//
-// So the following are valid wspecifiers:
-// ark,b,f:foo
-// "ark,b,b:| gzip -c > foo"
-// "ark,scp,t,nf:foo.ark,|gzip -c > foo.scp.gz"
-// ark,b:-
-//
-// The meanings of rxfilename and wxfilename are as described in
-// kaldi-stream.h (they are filenames but include pipes, stdin/stdout
-// and so on; filename is a regular filename.
-//
-
-// The ark:wxfilename type of wspecifier instructs the class to
-// write directly to an archive. For small objects (e.g. lists of ints),
-// the text archive format will generally be human readable with one line
-// per entry in the archive.
-//
-// The type "scp:xfilename" refers to an scp file which should
-// already exist on disk, and tells us where to write the data for
-// each key (usually an actual file); each line of the scp file
-// would be:
-// key xfilename
-//
-// The type ark,scp:filename,wxfilename means
-// we write both an archive and an scp file that specifies offsets into the
-// archive, with lines like:
-// key filename:12407
-// where the number is the byte offset into the file.
-// In this case we restrict the archive-filename to be an actual filename,
-// as we can't see a situtation where an extended filename would make sense
-// for this (we can't fseek() in pipes).
-
-enum WspecifierType {
- kNoWspecifier,
- kArchiveWspecifier,
- kScriptWspecifier,
- kBothWspecifier
-};
-
-struct WspecifierOptions {
- bool binary;
- bool flush;
- bool permissive; // will ignore absent scp entries.
- WspecifierOptions(): binary(true), flush(false), permissive(false) { }
-};
-
-// ClassifyWspecifier returns the type of the wspecifier string,
-// and (if pointers are non-NULL) outputs the extra information
-// about the options, and the script and archive
-// filenames.
-WspecifierType ClassifyWspecifier(const std::string &wspecifier,
- std::string *archive_wxfilename,
- std::string *script_wxfilename,
- WspecifierOptions *opts);
-
-// ReadScriptFile reads an .scp file in its entirety, and appends it
-// (in order as it was in the scp file) in script_out_, which contains
-// pairs of (key, xfilename). The .scp
-// file format is: on each line, key xfilename
-// where xfilename means rxfilename or wxfilename, and may contain internal spaces
-// (we trim away any leading or trailing space). The key is space-free.
-// ReadScriptFile returns true if the format was valid (empty files
-// are valid).
-// If 'print_warnings', it will print out warning messages that explain what kind
-// of error there was.
-bool ReadScriptFile(const std::string &rxfilename,
- bool print_warnings,
- std::vector<std::pair<std::string, std::string> > *script_out);
-
-// This version of ReadScriptFile works from an istream.
-bool ReadScriptFile(std::istream &is,
- bool print_warnings,
- std::vector<std::pair<std::string, std::string> > *script_out);
-
-// Writes, for each entry in script, the first element, then ' ', then the second
-// element then '\n'. Checks that the keys (first elements of pairs) are valid
-// tokens (nonempty, no whitespace), and the values (second elements of pairs)
-// are newline-free and contain no leading or trailing space. Returns true on
-// success.
-bool WriteScriptFile(const std::string &wxfilename,
- const std::vector<std::pair<std::string, std::string> > &script);
-
-// This version writes to an ostream.
-bool WriteScriptFile(std::ostream &os,
- const std::vector<std::pair<std::string, std::string> > &script);
-
-// Documentation for "rspecifier"
-// "rspecifier" describes how we read a set of objects indexed by keys.
-// The possibilities are:
-//
-// ark:rxfilename
-// scp:rxfilename
-//
-// We also allow various modifiers:
-// o means the program will only ask for each key once, which enables
-// the reader to discard already-asked-for values.
-// s means the keys are sorted on input (means we don't have to read till
-// eof if someone asked for a key that wasn't there).
-// cs means that it is called in sorted order (we are generally asserting this
-// based on knowledge of how the program works).
-// p means "permissive", and causes it to skip over keys whose corresponding
-// scp-file entries cannot be read. [and to ignore errors in archives and
-// script files, and just consider the "good" entries].
-// We allow the negation of the options above, as in no, ns, np,
-// but these aren't currently very useful (just equivalent to omitting the
-// corresponding option).
-// [any of the above options can be prefixed by n to negate them, e.g. no, ns,
-// ncs, np; but these aren't currently useful as you could just omit the option].
-//
-// b is ignored [for scripting convenience]
-// t is ignored [for scripting convenience]
-//
-//
-// So for instance the following would be a valid rspecifier:
-//
-// "o, s, p, ark:gunzip -c foo.gz|"
-
-struct RspecifierOptions {
- // These options only make a difference for the RandomAccessTableReader class.
- bool once; // we assert that the program will only ask for each key once.
- bool sorted; // we assert that the keys are sorted.
- bool called_sorted; // we assert that the (HasKey(), Value() functions will
- // also be called in sorted order. [this implies "once" but not vice versa].
- bool permissive; // If "permissive", when reading from scp files it treats
- // scp files that can't be read as if the corresponding key were not there.
- // For archive files it will suppress errors getting thrown if the archive
-
- // is corrupted and can't be read to the end.
-
- RspecifierOptions(): once(false), sorted(false),
- called_sorted(false), permissive(false) { }
-};
-
-enum RspecifierType {
- kNoRspecifier,
- kArchiveRspecifier,
- kScriptRspecifier
-};
-
-RspecifierType ClassifyRspecifier(const std::string &rspecifier, std::string *rxfilename,
- RspecifierOptions *opts);
-
-// Class Table<Holder> is useful when you want the entire set of
-// objects in memory. NOT IMPLEMENTED YET.
-// It is the least scalable way of accessing data in Tables.
-// The *TableReader and TableWriter classes are more scalable.
-
-
-/// Allows random access to a collection
-/// of objects in an archive or script file; see \ref io_sec_tables.
-template<class Holder>
-class RandomAccessTableReader {
- public:
- typedef typename Holder::T T;
-
- RandomAccessTableReader(): impl_(NULL) { }
-
- // This constructor equivalent to default constructor + "open", but
- // throws on error.
- RandomAccessTableReader(const std::string &rspecifier);
-
- // Opens the table.
- bool Open(const std::string &rspecifier);
-
- // Returns true if table is open.
- bool IsOpen() const { return (impl_ != NULL); }
-
- // Close() will close the table [throws if it was not open],
- // and returns true on success (false if we were reading an
- // archive and we discovered an error in the archive).
- bool Close();
-
- // Says if it has this key.
- // If you are using the "permissive" (p) read option,
- // it will return false for keys whose corresponding entry
- // in the scp file cannot be read.
-
- bool HasKey(const std::string &key);
-
- // Value() may throw if you are reading an scp file, you
- // do not have the "permissive" (p) option, and an entry
- // in the scp file cannot be read. Typically you won't
- // want to catch this error.
- const T &Value(const std::string &key);
-
- ~RandomAccessTableReader();
-
- // Allow copy-constructor only for non-opened readers (needed for inclusion in
- // stl vector)
- RandomAccessTableReader(const RandomAccessTableReader<Holder> &other):
- impl_(NULL) { KALDI_ASSERT(other.impl_ == NULL); }
- private:
- // Disallow assignment.
- RandomAccessTableReader &operator=(const RandomAccessTableReader<Holder>&);
- void CheckImpl() const; // Checks that impl_ is non-NULL; prints an error
- // message and dies (with KALDI_ERR) if NULL.
- RandomAccessTableReaderImplBase<Holder> *impl_;
-};
-
-
-
-/// A templated class for reading objects sequentially from an archive or script
-/// file; see \ref io_sec_tables.
-template<class Holder>
-class SequentialTableReader {
- public:
- typedef typename Holder::T T;
-
- SequentialTableReader(): impl_(NULL) { }
-
- // This constructor equivalent to default constructor + "open", but
- // throws on error.
- SequentialTableReader(const std::string &rspecifier);
-
- // Opens the table. Returns exit status; but does throw if previously
- // open stream was in error state. Call Close to stop this [anyway,
- // calling Open more than once is not recommended.]
- bool Open(const std::string &rspecifier);
-
- // Returns true if we're done. It will also return true if there's some kind
- // of error and we can't read any more; in this case, you can detect the
- // error by calling Close and checking the return status; otherwise
- // the destructor will throw.
- inline bool Done();
-
- // Only valid to call Key() if Done() returned false.
- inline std::string Key();
-
- // FreeCurrent() is provided as an optimization to save memory, for large
- // objects. It instructs the class to deallocate the current value. The
- // reference Value() will/ be invalidated by this.
-
- void FreeCurrent();
-
- // Return reference to the current value.
- // The reference is valid till next call to this object.
- // If will throw if you are reading an scp file, did not
- // specify the "permissive" (p) option and the file cannot
- // be read. [The permissive option makes it behave as if that
- // key does not even exist, if the corresponding file cannot be
- // read.] You probably wouldn't want to catch this exception;
- // the user can just specify the p option in the rspecifier.
- const T &Value();
-
- // Next goes to the next key. It will not throw; any error will
- // result in Done() returning true, and then the destructor will
- // throw unless you call Close().
- void Next();
-
- // Returns true if table is open for reading (does not imply
- // stream is in good state).
- bool IsOpen() const;
-
- // Close() will return false (failure) if Done() became true
- // because of an error/ condition rather than because we are
- // really done [e.g. because of an error or early termination
- // in the archive].
- // If there is an error and you don't call Close(), the destructor
- // will fail.
- // Close()
- bool Close();
-
- // The destructor may throw. This is the desired behaviour, as it's the way we
- // signal the error to the user (to detect it, call Close(). The issue is that
- // otherwise the user has no way to tell whether Done() returned true because
- // we reached the end of the archive or script, or because there was an error
- // that prevented further reading.
- ~SequentialTableReader();
-
- // Allow copy-constructor only for non-opened readers (needed for inclusion in
- // stl vector)
- SequentialTableReader(const SequentialTableReader<Holder> &other):
- impl_(NULL) { KALDI_ASSERT(other.impl_ == NULL); }
- private:
- // Disallow assignment.
- SequentialTableReader &operator = (const SequentialTableReader<Holder>&);
- void CheckImpl() const; // Checks that impl_ is non-NULL; prints an error
- // message and dies (with KALDI_ERR) if NULL.
- SequentialTableReaderImplBase<Holder> *impl_;
-};
-
-
-/// A templated class for writing objects to an
-/// archive or script file; see \ref io_sec_tables.
-template<class Holder>
-class TableWriter {
- public:
- typedef typename Holder::T T;
-
- TableWriter(): impl_(NULL) { }
-
- // This constructor equivalent to default constructor
- // + "open", but throws on error. See docs for
- // wspecifier above.
- TableWriter(const std::string &wspecifier);
-
- // Opens the table. See docs for wspecifier above.
- // If it returns true, it is open.
- bool Open(const std::string &wspecifier);
-
- // Returns true if open for writing.
- bool IsOpen() const;
-
- // Write the object. Throws std::runtime_error on error (via the
- // KALDI_ERR macro)
- inline void Write(const std::string &key, const T &value) const;
-
-
- // Flush will flush any archive; it does not return error status
- // or throw, any errors will be reported on the next Write or Close.
- // Useful if we may be writing to a command in a pipe and want
- // to ensure good CPU utilization.
- void Flush();
-
- // Close() is not necessary to call, as the destructor
- // closes it; it's mainly useful if you want to handle
- // error states because the destructor will throw on
- // error if you do not call Close().
- bool Close();
-
- ~TableWriter();
-
- // Allow copy-constructor only for non-opened writers (needed for inclusion in
- // stl vector)
- TableWriter(const TableWriter &other): impl_(NULL) {
- KALDI_ASSERT(other.impl_ == NULL);
- }
- private:
- TableWriter &operator = (const TableWriter&); // Disallow assignment.
- void CheckImpl() const; // Checks that impl_ is non-NULL; prints an error
- // message and dies (with KALDI_ERR) if NULL.
- TableWriterImplBase<Holder> *impl_;
-};
-
-
-/// This class is for when you are reading something in random access, but
-/// it may actually be stored per-speaker (or something similar) but the
-/// keys you're using are per utterance. So you also provide an "rxfilename"
-/// for a file containing lines like
-/// utt1 spk1
-/// utt2 spk1
-/// utt3 spk1
-/// and so on. Note: this is optional; if it is an empty string, we just won't
-/// do the mapping. Also, "table_rxfilename" may be the empty string (as for
-/// a regular table), in which case the table just won't be opened.
-/// We provide only the most frequently used of the functions of RandomAccessTableReader.
-
-template<class Holder>
-class RandomAccessTableReaderMapped {
- public:
- typedef typename Holder::T T;
- /// Note: "utt2spk_rxfilename" will in the normal case be an rxfilename
- /// for an utterance to speaker map, but this code is general; it accepts
- /// a generic map.
- RandomAccessTableReaderMapped(const std::string &table_rxfilename,
- const std::string &utt2spk_rxfilename);
-
- RandomAccessTableReaderMapped() {};
-
- /// Note: when calling Open, utt2spk_rxfilename may be empty.
- bool Open(const std::string &table_rxfilename,
- const std::string &utt2spk_rxfilename);
-
- bool HasKey(const std::string &key);
- const T &Value(const std::string &key);
- inline bool IsOpen() const { return reader_.IsOpen(); }
- inline bool Close() { return reader_.Close(); }
-
-
-
- // The default copy-constructor will do what we want: it will crash
- // for already-opened readers, by calling the member-variable copy-constructors.
- private:
- // Disallow assignment.
- RandomAccessTableReaderMapped &operator=(const RandomAccessTableReaderMapped<Holder>&);
- RandomAccessTableReader<Holder> reader_;
- RandomAccessTableReader<TokenHolder> token_reader_;
- std::string utt2spk_rxfilename_; // Used only in diagnostic messages.
-};
-
-
-/// @} end "addtogroup table_group"
-} // end namespace kaldi
-
-#include "kaldi-table-inl.h"
-
-#endif // KALDI_UTIL_KALDI_TABLE_H_
diff --git a/kaldi_io/src/kaldi/util/parse-options.h b/kaldi_io/src/kaldi/util/parse-options.h
deleted file mode 100644
index f563b54..0000000
--- a/kaldi_io/src/kaldi/util/parse-options.h
+++ /dev/null
@@ -1,264 +0,0 @@
-// util/parse-options.h
-
-// Copyright 2009-2011 Karel Vesely; Microsoft Corporation;
-// Saarland University (Author: Arnab Ghoshal);
-// Copyright 2012-2013 Frantisek Skala; Arnab Ghoshal
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_PARSE_OPTIONS_H_
-#define KALDI_UTIL_PARSE_OPTIONS_H_
-
-#include <map>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-/// The class ParseOptions is for parsing command-line options; see
-/// \ref parse_options for more documentation.
-class ParseOptions : public OptionsItf {
- public:
- explicit ParseOptions(const char *usage) :
- print_args_(true), help_(false), usage_(usage), argc_(0), argv_(NULL),
- prefix_(""), other_parser_(NULL) {
-#ifndef _MSC_VER // This is just a convenient place to set the stderr to line
- setlinebuf(stderr); // buffering mode, since it's called at program start.
-#endif // This helps ensure different programs' output is not mixed up.
- RegisterStandard("config", &config_, "Configuration file to read (this "
- "option may be repeated)");
- RegisterStandard("print-args", &print_args_,
- "Print the command line arguments (to stderr)");
- RegisterStandard("help", &help_, "Print out usage message");
- RegisterStandard("verbose", &g_kaldi_verbose_level,
- "Verbose level (higher->more logging)");
- }
-
- /**
- This is a constructor for the special case where some options are
- registered with a prefix to avoid conflicts. The object thus created will
- only be used temporarily to register an options class with the original
- options parser (which is passed as the *other pointer) using the given
- prefix. It should not be used for any other purpose, and the prefix must
- not be the empty string. It seems to be the least bad way of implementing
- options with prefixes at this point.
- Example of usage is:
- ParseOptions po; // original ParseOptions object
- ParseOptions po_mfcc("mfcc", &po); // object with prefix.
- MfccOptions mfcc_opts;
- mfcc_opts.Register(&po_mfcc);
- The options will now get registered as, e.g., --mfcc.frame-shift=10.0
- instead of just --frame-shift=10.0
- */
- ParseOptions(const std::string &prefix, OptionsItf *other);
-
- ~ParseOptions() {}
-
- // Methods from the interface
- void Register(const std::string &name,
- bool *ptr, const std::string &doc);
- void Register(const std::string &name,
- int32 *ptr, const std::string &doc);
- void Register(const std::string &name,
- uint32 *ptr, const std::string &doc);
- void Register(const std::string &name,
- float *ptr, const std::string &doc);
- void Register(const std::string &name,
- double *ptr, const std::string &doc);
- void Register(const std::string &name,
- std::string *ptr, const std::string &doc);
-
- /// If called after registering an option and before calling
- /// Read(), disables that option from being used. Will crash
- /// at runtime if that option had not been registered.
- void DisableOption(const std::string &name);
-
- /// This one is used for registering standard parameters of all the programs
- template<typename T>
- void RegisterStandard(const std::string &name,
- T *ptr, const std::string &doc);
-
- /**
- Parses the command line options and fills the ParseOptions-registered
- variables. This must be called after all the variables were registered!!!
-
- Initially the variables have implicit values,
- then the config file values are set-up,
- finally the command line vaues given.
- Returns the first position in argv that was not used.
- [typically not useful: use NumParams() and GetParam(). ]
- */
- int Read(int argc, const char *const *argv);
-
- /// Prints the usage documentation [provided in the constructor].
- void PrintUsage(bool print_command_line = false);
- /// Prints the actual configuration of all the registered variables
- void PrintConfig(std::ostream &os);
-
- /// Reads the options values from a config file. Must be called after
- /// registering all options. This is usually used internally after the
- /// standard --config option is used, but it may also be called from a
- /// program.
- void ReadConfigFile(const std::string &filename);
-
- /// Number of positional parameters (c.f. argc-1).
- int NumArgs() const;
-
- /// Returns one of the positional parameters; 1-based indexing for argc/argv
- /// compatibility. Will crash if param is not >=1 and <=NumArgs().
- std::string GetArg(int param) const;
-
- std::string GetOptArg(int param) const {
- return (param <= NumArgs() ? GetArg(param) : "");
- }
-
- /// The following function will return a possibly quoted and escaped
- /// version of "str", according to the current shell. Currently
- /// this is just hardwired to bash. It's useful for debug output.
- static std::string Escape(const std::string &str);
-
- private:
- /// Template to register various variable types,
- /// used for program-specific parameters
- template<typename T>
- void RegisterTmpl(const std::string &name, T *ptr, const std::string &doc);
-
- // Following functions do just the datatype-specific part of the job
- /// Register boolean variable
- void RegisterSpecific(const std::string &name, const std::string &idx,
- bool *b, const std::string &doc, bool is_standard);
- /// Register int32 variable
- void RegisterSpecific(const std::string &name, const std::string &idx,
- int32 *i, const std::string &doc, bool is_standard);
- /// Register unsinged int32 variable
- void RegisterSpecific(const std::string &name, const std::string &idx,
- uint32 *u,
- const std::string &doc, bool is_standard);
- /// Register float variable
- void RegisterSpecific(const std::string &name, const std::string &idx,
- float *f, const std::string &doc, bool is_standard);
- /// Register double variable [useful as we change BaseFloat type].
- void RegisterSpecific(const std::string &name, const std::string &idx,
- double *f, const std::string &doc, bool is_standard);
- /// Register string variable
- void RegisterSpecific(const std::string &name, const std::string &idx,
- std::string *s, const std::string &doc,
- bool is_standard);
-
- /// Does the actual job for both kinds of parameters
- /// Does the common part of the job for all datatypes,
- /// then calls RegisterSpecific
- template<typename T>
- void RegisterCommon(const std::string &name,
- T *ptr, const std::string &doc, bool is_standard);
-
- /// SplitLongArg parses an argument of the form --a=b, --a=, or --a,
- /// and sets "has_equal_sign" to true if an equals-sign was parsed..
- /// this is needed in order to correctly allow --x for a boolean option
- /// x, and --y= for a string option y, and to disallow --x= and --y.
- void SplitLongArg(std::string in, std::string *key, std::string *value,
- bool *has_equal_sign);
-
- void NormalizeArgName(std::string *str);
-
- /// Set option with name "key" to "value"; will crash if can't do it.
- /// "has_equal_sign" is used to allow --x for a boolean option x,
- /// and --y=, for a string option y.
- bool SetOption(const std::string &key, const std::string &value,
- bool has_equal_sign);
-
- bool ToBool(std::string str);
- int32 ToInt(std::string str);
- uint32 ToUInt(std::string str);
- float ToFloat(std::string str);
- double ToDouble(std::string str);
-
- // maps for option variables
- std::map<std::string, bool*> bool_map_;
- std::map<std::string, int32*> int_map_;
- std::map<std::string, uint32*> uint_map_;
- std::map<std::string, float*> float_map_;
- std::map<std::string, double*> double_map_;
- std::map<std::string, std::string*> string_map_;
-
- /**
- Structure for options' documentation
- */
- struct DocInfo {
- DocInfo() {}
- DocInfo(const std::string &name, const std::string &usemsg)
- : name_(name), use_msg_(usemsg), is_standard_(false) {}
- DocInfo(const std::string &name, const std::string &usemsg,
- bool is_standard)
- : name_(name), use_msg_(usemsg), is_standard_(is_standard) {}
-
- std::string name_;
- std::string use_msg_;
- bool is_standard_;
- };
- typedef std::map<std::string, DocInfo> DocMapType;
- DocMapType doc_map_; ///< map for the documentation
-
- bool print_args_; ///< variable for the implicit --print-args parameter
- bool help_; ///< variable for the implicit --help parameter
- std::string config_; ///< variable for the implicit --config parameter
- std::vector<std::string> positional_args_;
- const char *usage_;
- int argc_;
- const char *const *argv_;
-
- /// These members are not normally used. They are only used when the object
- /// is constructed with a prefix
- std::string prefix_;
- OptionsItf *other_parser_;
-};
-
-/// This template is provided for convenience in reading config classes from
-/// files; this is not the standard way to read configuration options, but may
-/// occasionally be needed. This function assumes the config has a function
-/// "void Register(OptionsItf *po)" which it can call to register the
-/// ParseOptions object.
-template<class C> void ReadConfigFromFile(const std::string config_filename,
- C *c) {
- std::ostringstream usage_str;
- usage_str << "Parsing config from "
- << "from '" << config_filename << "'";
- ParseOptions po(usage_str.str().c_str());
- c->Register(&po);
- po.ReadConfigFile(config_filename);
-}
-
-/// This variant of the template ReadConfigFromFile is for if you need to read
-/// two config classes from the same file.
-template<class C1, class C2> void ReadConfigsFromFile(const std::string config_filename,
- C1 *c1, C2 *c2) {
- std::ostringstream usage_str;
- usage_str << "Parsing config from "
- << "from '" << config_filename << "'";
- ParseOptions po(usage_str.str().c_str());
- c1->Register(&po);
- c2->Register(&po);
- po.ReadConfigFile(config_filename);
-}
-
-
-
-} // namespace kaldi
-
-#endif // KALDI_UTIL_PARSE_OPTIONS_H_
diff --git a/kaldi_io/src/kaldi/util/simple-io-funcs.h b/kaldi_io/src/kaldi/util/simple-io-funcs.h
deleted file mode 100644
index 56573e4..0000000
--- a/kaldi_io/src/kaldi/util/simple-io-funcs.h
+++ /dev/null
@@ -1,56 +0,0 @@
-// util/simple-io-funcs.h
-
-// Copyright 2009-2011 Microsoft Corporation; Jan Silovsky
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-// http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-#ifndef KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-#define KALDI_UTIL_SIMPLE_IO_FUNCS_H_
-
-#include "kaldi-io.h"
-
-// This header contains some utilities for reading some common, simple text formats:
-// integers in files, one per line, and integers in files, possibly multiple per line.
-// these are not really fully native Kaldi formats; they are mostly for small files that
-// might be generated by scripts, and can be read all at one time.
-// for longer files of this type, we would probably use the Table code.
-
-namespace kaldi {
-
-/// WriteToList attempts to write this list of integers, one per line,
-/// to the given file, in text format.
-/// returns true if succeeded.
-bool WriteIntegerVectorSimple(std::string wxfilename, const std::vector<int32> &v);
-
-/// ReadFromList attempts to read this list of integers, one per line,
-/// from the given file, in text format.
-/// returns true if succeeded.
-bool ReadIntegerVectorSimple(std::string rxfilename, std::vector<int32> *v);
-
-// This is a file format like:
-// 1 2
-// 3
-//
-// 4 5 6
-// etc.
-bool WriteIntegerVectorVectorSimple(std::string wxfilename, const std::vector<std::vector<int32> > &v);
-
-bool ReadIntegerVectorVectorSimple(std::string rxfilename, std::vector<std::vector<int32> > *v);
-
-
-} // end namespace kaldi.
-
-
-#endif
diff --git a/kaldi_io/src/kaldi/util/simple-options.h b/kaldi_io/src/kaldi/util/simple-options.h
deleted file mode 100644
index 58816af..0000000
--- a/kaldi_io/src/kaldi/util/simple-options.h
+++ /dev/null
@@ -1,112 +0,0 @@
-// util/simple-options.hh
-
-// Copyright 2013 Tanel Alumae, Tallinn University of Technology
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_SIMPLE_OPTIONS_H_
-#define KALDI_UTIL_SIMPLE_OPTIONS_H_
-
-#include <map>
-#include <string>
-#include <vector>
-
-#include "base/kaldi-common.h"
-#include "itf/options-itf.h"
-
-namespace kaldi {
-
-
-/// The class SimpleOptions is an implementation of OptionsItf that allows
-/// setting and getting option values programmatically, i.e., via getter
-/// and setter methods. It doesn't provide any command line parsing functionality.
-/// The class ParseOptions should be used for command-line options.
-class SimpleOptions : public OptionsItf {
- public:
- SimpleOptions() {
- }
-
- virtual ~SimpleOptions() {
- }
-
- // Methods from the interface
- void Register(const std::string &name, bool *ptr, const std::string &doc);
- void Register(const std::string &name, int32 *ptr, const std::string &doc);
- void Register(const std::string &name, uint32 *ptr, const std::string &doc);
- void Register(const std::string &name, float *ptr, const std::string &doc);
- void Register(const std::string &name, double *ptr, const std::string &doc);
- void Register(const std::string &name, std::string *ptr,
- const std::string &doc);
-
- // set option with the specified key, return true if successful
- bool SetOption(const std::string &key, const bool &value);
- bool SetOption(const std::string &key, const int32 &value);
- bool SetOption(const std::string &key, const uint32 &value);
- bool SetOption(const std::string &key, const float &value);
- bool SetOption(const std::string &key, const double &value);
- bool SetOption(const std::string &key, const std::string &value);
- bool SetOption(const std::string &key, const char* value);
-
- // get option with the specified key and put to 'value',
- // return true if successful
- bool GetOption(const std::string &key, bool *value);
- bool GetOption(const std::string &key, int32 *value);
- bool GetOption(const std::string &key, uint32 *value);
- bool GetOption(const std::string &key, float *value);
- bool GetOption(const std::string &key, double *value);
- bool GetOption(const std::string &key, std::string *value);
-
- enum OptionType {
- kBool,
- kInt32,
- kUint32,
- kFloat,
- kDouble,
- kString
- };
-
- struct OptionInfo {
- OptionInfo(const std::string &doc, OptionType type) :
- doc(doc), type(type) {
- }
- std::string doc;
- OptionType type;
- };
-
- std::vector<std::pair<std::string, OptionInfo> > GetOptionInfoList();
-
- /*
- * Puts the type of the option with name 'key' in the argument 'type'.
- * Return true if such option is found, false otherwise.
- */
- bool GetOptionType(const std::string &key, OptionType *type);
-
- private:
-
- std::vector<std::pair<std::string, OptionInfo> > option_info_list_;
-
- // maps for option variables
- std::map<std::string, bool*> bool_map_;
- std::map<std::string, int32*> int_map_;
- std::map<std::string, uint32*> uint_map_;
- std::map<std::string, float*> float_map_;
- std::map<std::string, double*> double_map_;
- std::map<std::string, std::string*> string_map_;
-};
-
-} // namespace kaldi
-
-#endif // KALDI_UTIL_SIMPLE_OPTIONS_H_
diff --git a/kaldi_io/src/kaldi/util/stl-utils.h b/kaldi_io/src/kaldi/util/stl-utils.h
deleted file mode 100644
index 12526ff..0000000
--- a/kaldi_io/src/kaldi/util/stl-utils.h
+++ /dev/null
@@ -1,327 +0,0 @@
-// util/stl-utils.h
-
-// Copyright 2009-2011 Microsoft Corporation; Saarland University
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_STL_UTILS_H_
-#define KALDI_UTIL_STL_UTILS_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-#include "base/kaldi-common.h"
-
-#ifdef _MSC_VER
-#include <unordered_map>
-#include <unordered_set>
-using std::unordered_map;
-using std::unordered_set;
-#elif __cplusplus > 199711L || defined(__GXX_EXPERIMENTAL_CXX0X__)
-#include <unordered_map>
-#include <unordered_set>
-using std::unordered_map;
-using std::unordered_set;
-#else
-#include <tr1/unordered_map>
-#include <tr1/unordered_set>
-using std::tr1::unordered_map;
-using std::tr1::unordered_set;
-#endif
-
-
-namespace kaldi {
-
-/// Sorts and uniq's (removes duplicates) from a vector.
-template<typename T>
-inline void SortAndUniq(std::vector<T> *vec) {
- std::sort(vec->begin(), vec->end());
- vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-
-/// Returns true if the vector is sorted.
-template<typename T>
-inline bool IsSorted(const std::vector<T> &vec) {
- typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
- if (iter == end) return true;
- while (1) {
- typename std::vector<T>::const_iterator next_iter = iter;
- ++next_iter;
- if (next_iter == end) return true; // end of loop and nothing out of order
- if (*next_iter < *iter) return false;
- iter = next_iter;
- }
-}
-
-
-/// Returns true if the vector is sorted and contains each element
-/// only once.
-template<typename T>
-inline bool IsSortedAndUniq(const std::vector<T> &vec) {
- typename std::vector<T>::const_iterator iter = vec.begin(), end = vec.end();
- if (iter == end) return true;
- while (1) {
- typename std::vector<T>::const_iterator next_iter = iter;
- ++next_iter;
- if (next_iter == end) return true; // end of loop and nothing out of order
- if (*next_iter <= *iter) return false;
- iter = next_iter;
- }
-}
-
-
-/// Removes duplicate elements from a sorted list.
-template<typename T>
-inline void Uniq(std::vector<T> *vec) { // must be already sorted.
- KALDI_PARANOID_ASSERT(IsSorted(*vec));
- KALDI_ASSERT(vec);
- vec->erase(std::unique(vec->begin(), vec->end()), vec->end());
-}
-
-/// Copies the elements of a set to a vector.
-template<class T>
-void CopySetToVector(const std::set<T> &s, std::vector<T> *v) {
- // adds members of s to v, in sorted order from lowest to highest
- // (because the set was in sorted order).
- KALDI_ASSERT(v != NULL);
- v->resize(s.size());
- typename std::set<T>::const_iterator siter = s.begin(), send = s.end();
- typename std::vector<T>::iterator viter = v->begin();
- for (; siter != send; ++siter, ++viter) {
- *viter = *siter;
- }
-}
-
-template<class T>
-void CopySetToVector(const unordered_set<T> &s, std::vector<T> *v) {
- // adds members of s to v, in sorted order from lowest to highest
- // (because the set was in sorted order).
- KALDI_ASSERT(v != NULL);
- v->resize(s.size());
- typename unordered_set<T>::const_iterator siter = s.begin(), send = s.end();
- typename std::vector<T>::iterator viter = v->begin();
- for (; siter != send; ++siter, ++viter) {
- *viter = *siter;
- }
-}
-
-
-/// Copies the (key, value) pairs in a map to a vector of pairs.
-template<class A, class B>
-void CopyMapToVector(const std::map<A, B> &m,
- std::vector<std::pair<A, B> > *v) {
- KALDI_ASSERT(v != NULL);
- v->resize(m.size());
- typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
- typename std::vector<std::pair<A, B> >::iterator viter = v->begin();
- for (; miter != mend; ++miter, ++viter) {
- *viter = std::make_pair(miter->first, miter->second);
- // do it like this because of const casting.
- }
-}
-
-/// Copies the keys in a map to a vector.
-template<class A, class B>
-void CopyMapKeysToVector(const std::map<A, B> &m, std::vector<A> *v) {
- KALDI_ASSERT(v != NULL);
- v->resize(m.size());
- typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
- typename std::vector<A>::iterator viter = v->begin();
- for (; miter != mend; ++miter, ++viter) {
- *viter = miter->first;
- }
-}
-
-/// Copies the values in a map to a vector.
-template<class A, class B>
-void CopyMapValuesToVector(const std::map<A, B> &m, std::vector<B> *v) {
- KALDI_ASSERT(v != NULL);
- v->resize(m.size());
- typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
- typename std::vector<B>::iterator viter = v->begin();
- for (; miter != mend; ++miter, ++viter) {
- *viter = miter->second;
- }
-}
-
-/// Copies the keys in a map to a set.
-template<class A, class B>
-void CopyMapKeysToSet(const std::map<A, B> &m, std::set<A> *s) {
- KALDI_ASSERT(s != NULL);
- s->clear();
- typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
- for (; miter != mend; ++miter) {
- s->insert(s->end(), miter->first);
- }
-}
-
-/// Copies the values in a map to a set.
-template<class A, class B>
-void CopyMapValuesToSet(const std::map<A, B> &m, std::set<B> *s) {
- KALDI_ASSERT(s != NULL);
- s->clear();
- typename std::map<A, B>::const_iterator miter = m.begin(), mend = m.end();
- for (; miter != mend; ++miter)
- s->insert(s->end(), miter->second);
-}
-
-
-/// Copies the contents of a vector to a set.
-template<class A>
-void CopyVectorToSet(const std::vector<A> &v, std::set<A> *s) {
- KALDI_ASSERT(s != NULL);
- s->clear();
- typename std::vector<A>::const_iterator iter = v.begin(), end = v.end();
- for (; iter != end; ++iter)
- s->insert(s->end(), *iter);
- // s->end() is a hint in case v was sorted. will work regardless.
-}
-
-/// Deletes any non-NULL pointers in the vector v, and sets
-/// the corresponding entries of v to NULL
-template<class A>
-void DeletePointers(std::vector<A*> *v) {
- KALDI_ASSERT(v != NULL);
- typename std::vector<A*>::iterator iter = v->begin(), end = v->end();
- for (; iter != end; ++iter) {
- if (*iter != NULL) {
- delete *iter;
- *iter = NULL; // set to NULL for extra safety.
- }
- }
-}
-
-/// Returns true if the vector of pointers contains NULL pointers.
-template<class A>
-bool ContainsNullPointers(const std::vector<A*> &v) {
- typename std::vector<A*>::const_iterator iter = v.begin(), end = v.end();
- for (; iter != end; ++iter)
- if (*iter == static_cast<A*> (NULL)) return true;
- return false;
-}
-
-/// Copies the contents a vector of one type to a vector
-/// of another type.
-template<typename A, typename B>
-void CopyVectorToVector(const std::vector<A> &vec_in, std::vector<B> *vec_out) {
- KALDI_ASSERT(vec_out != NULL);
- vec_out->resize(vec_in.size());
- for (size_t i = 0; i < vec_in.size(); i++)
- (*vec_out)[i] = static_cast<B> (vec_in[i]);
-}
-
-/// A hashing function-object for vectors.
-template<typename Int>
-struct VectorHasher { // hashing function for vector<Int>.
- size_t operator()(const std::vector<Int> &x) const {
- size_t ans = 0;
- typename std::vector<Int>::const_iterator iter = x.begin(), end = x.end();
- for (; iter != end; ++iter) {
- ans *= kPrime;
- ans += *iter;
- }
- return ans;
- }
- VectorHasher() { // Check we're instantiated with an integer type.
- KALDI_ASSERT_IS_INTEGER_TYPE(Int);
- }
- private:
- static const int kPrime = 7853;
-};
-
-/// A hashing function-object for pairs of ints
-template<typename Int>
-struct PairHasher { // hashing function for pair<int>
- size_t operator()(const std::pair<Int,Int> &x) const {
- return x.first + x.second * kPrime;
- }
- PairHasher() { // Check we're instantiated with an integer type.
- KALDI_ASSERT_IS_INTEGER_TYPE(Int);
- }
- private:
- static const int kPrime = 7853;
-};
-
-
-/// A hashing function object for strings.
-struct StringHasher { // hashing function for std::string
- size_t operator()(const std::string &str) const {
- size_t ans = 0, len = str.length();
- const char *c = str.c_str(), *end = c + len;
- for (; c != end; c++) {
- ans *= kPrime;
- ans += *c;
- }
- return ans;
- }
- private:
- static const int kPrime = 7853;
-};
-
-/// Reverses the contents of a vector.
-template<typename T>
-inline void ReverseVector(std::vector<T> *vec) {
- KALDI_ASSERT(vec != NULL);
- size_t sz = vec->size();
- for (size_t i = 0; i < sz/2; i++)
- std::swap( (*vec)[i], (*vec)[sz-1-i]);
-}
-
-
-/// Comparator object for pairs that compares only the first pair.
-template<class A, class B>
-struct CompareFirstMemberOfPair {
- inline bool operator() (const std::pair<A, B> &p1,
- const std::pair<A, B> &p2) {
- return p1.first < p2.first;
- }
-};
-
-/// For a vector of pair<I, F> where I is an integer and F a floating-point or
-/// integer type, this function sorts a vector of type vector<pair<I, F> > on
-/// the I value and then merges elements with equal I values, summing these over
-/// the F component and then removing any F component with zero value. This
-/// is for where the vector of pairs represents a map from the integer to float
-/// component, with an "adding" type of semantics for combining the elements.
-template<typename I, typename F>
-inline void MergePairVectorSumming(std::vector<std::pair<I, F> > *vec) {
- KALDI_ASSERT_IS_INTEGER_TYPE(I);
- CompareFirstMemberOfPair<I, F> c;
- std::sort(vec->begin(), vec->end(), c); // sort on 1st element.
- typename std::vector<std::pair<I, F> >::iterator out = vec->begin(),
- in = vec->begin(), end = vec->end();
- while (in < end) {
- // We reach this point only at the first element of
- // each stretch of identical .first elements.
- *out = *in;
- ++in;
- while (in < end && in->first == out->first) {
- out->second += in->second; // this is the merge operation.
- ++in;
- }
- if (out->second != static_cast<F>(0)) // Don't keep zero elements.
- out++;
- }
- vec->erase(out, end);
-}
-
-} // namespace kaldi
-
-#endif // KALDI_UTIL_STL_UTILS_H_
-
diff --git a/kaldi_io/src/kaldi/util/table-types.h b/kaldi_io/src/kaldi/util/table-types.h
deleted file mode 100644
index 313d1aa..0000000
--- a/kaldi_io/src/kaldi/util/table-types.h
+++ /dev/null
@@ -1,137 +0,0 @@
-// util/table-types.h
-
-// Copyright 2009-2011 Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-
-#ifndef KALDI_UTIL_TABLE_TYPES_H_
-#define KALDI_UTIL_TABLE_TYPES_H_
-#include "base/kaldi-common.h"
-#include "util/kaldi-table.h"
-#include "util/kaldi-holder.h"
-#include "matrix/matrix-lib.h"
-
-namespace kaldi {
-
-// This header defines typedefs that are specific instantiations of
-// the Table types.
-
-/// \addtogroup table_types
-/// @{
-
-typedef TableWriter<KaldiObjectHolder<Matrix<BaseFloat> > > BaseFloatMatrixWriter;
-typedef SequentialTableReader<KaldiObjectHolder<Matrix<BaseFloat> > > SequentialBaseFloatMatrixReader;
-typedef RandomAccessTableReader<KaldiObjectHolder<Matrix<BaseFloat> > > RandomAccessBaseFloatMatrixReader;
-typedef RandomAccessTableReaderMapped<KaldiObjectHolder<Matrix<BaseFloat> > > RandomAccessBaseFloatMatrixReaderMapped;
-
-typedef TableWriter<KaldiObjectHolder<Matrix<double> > > DoubleMatrixWriter;
-typedef SequentialTableReader<KaldiObjectHolder<Matrix<double> > > SequentialDoubleMatrixReader;
-typedef RandomAccessTableReader<KaldiObjectHolder<Matrix<double> > > RandomAccessDoubleMatrixReader;
-typedef RandomAccessTableReaderMapped<KaldiObjectHolder<Matrix<double> > > RandomAccessDoubleMatrixReaderMapped;
-
-typedef TableWriter<KaldiObjectHolder<CompressedMatrix> > CompressedMatrixWriter;
-
-typedef TableWriter<KaldiObjectHolder<Vector<BaseFloat> > > BaseFloatVectorWriter;
-typedef SequentialTableReader<KaldiObjectHolder<Vector<BaseFloat> > > SequentialBaseFloatVectorReader;
-typedef RandomAccessTableReader<KaldiObjectHolder<Vector<BaseFloat> > > RandomAccessBaseFloatVectorReader;
-typedef RandomAccessTableReaderMapped<KaldiObjectHolder<Vector<BaseFloat> > > RandomAccessBaseFloatVectorReaderMapped;
-
-typedef TableWriter<KaldiObjectHolder<Vector<double> > > DoubleVectorWriter;
-typedef SequentialTableReader<KaldiObjectHolder<Vector<double> > > SequentialDoubleVectorReader;
-typedef RandomAccessTableReader<KaldiObjectHolder<Vector<double> > > RandomAccessDoubleVectorReader;
-
-typedef TableWriter<KaldiObjectHolder<CuMatrix<BaseFloat> > > BaseFloatCuMatrixWriter;
-typedef SequentialTableReader<KaldiObjectHolder<CuMatrix<BaseFloat> > > SequentialBaseFloatCuMatrixReader;
-typedef RandomAccessTableReader<KaldiObjectHolder<CuMatrix<BaseFloat> > > RandomAccessBaseFloatCuMatrixReader;
-typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuMatrix<BaseFloat> > > RandomAccessBaseFloatCuMatrixReaderMapped;
-
-typedef TableWriter<KaldiObjectHolder<CuMatrix<double> > > DoubleCuMatrixWriter;
-typedef SequentialTableReader<KaldiObjectHolder<CuMatrix<double> > > SequentialDoubleCuMatrixReader;
-typedef RandomAccessTableReader<KaldiObjectHolder<CuMatrix<double> > > RandomAccessDoubleCuMatrixReader;
-typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuMatrix<double> > > RandomAccessDoubleCuMatrixReaderMapped;
-
-typedef TableWriter<KaldiObjectHolder<CuVector<BaseFloat> > > BaseFloatCuVectorWriter;
-typedef SequentialTableReader<KaldiObjectHolder<CuVector<BaseFloat> > > SequentialBaseFloatCuVectorReader;
-typedef RandomAccessTableReader<KaldiObjectHolder<CuVector<BaseFloat> > > RandomAccessBaseFloatCuVectorReader;
-typedef RandomAccessTableReaderMapped<KaldiObjectHolder<CuVector<BaseFloat> > > RandomAccessBaseFloatCuVectorReaderMapped;
-
-typedef TableWriter<KaldiObjectHolder<CuVector<double> > > DoubleCuVectorWriter;
-typedef SequentialTableReader<KaldiObjectHolder<CuVector<double> > > SequentialDoubleCuVectorReader;
-typedef RandomAccessTableReader<KaldiObjectHolder<CuVector<double> > > RandomAccessDoubleCuVectorReader;
-
-
-typedef TableWriter<BasicHolder<int32> > Int32Writer;
-typedef SequentialTableReader<BasicHolder<int32> > SequentialInt32Reader;
-typedef RandomAccessTableReader<BasicHolder<int32> > RandomAccessInt32Reader;
-
-typedef TableWriter<BasicVectorHolder<int32> > Int32VectorWriter;
-typedef SequentialTableReader<BasicVectorHolder<int32> > SequentialInt32VectorReader;
-typedef RandomAccessTableReader<BasicVectorHolder<int32> > RandomAccessInt32VectorReader;
-
-typedef TableWriter<BasicVectorVectorHolder<int32> > Int32VectorVectorWriter;
-typedef SequentialTableReader<BasicVectorVectorHolder<int32> > SequentialInt32VectorVectorReader;
-typedef RandomAccessTableReader<BasicVectorVectorHolder<int32> > RandomAccessInt32VectorVectorReader;
-
-typedef TableWriter<BasicPairVectorHolder<int32> > Int32PairVectorWriter;
-typedef SequentialTableReader<BasicPairVectorHolder<int32> > SequentialInt32PairVectorReader;
-typedef RandomAccessTableReader<BasicPairVectorHolder<int32> > RandomAccessInt32PairVectorReader;
-
-typedef TableWriter<BasicPairVectorHolder<BaseFloat> > BaseFloatPairVectorWriter;
-typedef SequentialTableReader<BasicPairVectorHolder<BaseFloat> > SequentialBaseFloatPairVectorReader;
-typedef RandomAccessTableReader<BasicPairVectorHolder<BaseFloat> > RandomAccessBaseFloatPairVectorReader;
-
-typedef TableWriter<BasicHolder<BaseFloat> > BaseFloatWriter;
-typedef SequentialTableReader<BasicHolder<BaseFloat> > SequentialBaseFloatReader;
-typedef RandomAccessTableReader<BasicHolder<BaseFloat> > RandomAccessBaseFloatReader;
-typedef RandomAccessTableReaderMapped<BasicHolder<BaseFloat> > RandomAccessBaseFloatReaderMapped;
-
-typedef TableWriter<BasicHolder<double> > DoubleWriter;
-typedef SequentialTableReader<BasicHolder<double> > SequentialDoubleReader;
-typedef RandomAccessTableReader<BasicHolder<double> > RandomAccessDoubleReader;
-
-typedef TableWriter<BasicHolder<bool> > BoolWriter;
-typedef SequentialTableReader<BasicHolder<bool> > SequentialBoolReader;
-typedef RandomAccessTableReader<BasicHolder<bool> > RandomAccessBoolReader;
-
-
-
-/// TokenWriter is a writer specialized for std::string where the strings
-/// are nonempty and whitespace-free. T == std::string
-typedef TableWriter<TokenHolder> TokenWriter;
-typedef SequentialTableReader<TokenHolder> SequentialTokenReader;
-typedef RandomAccessTableReader<TokenHolder> RandomAccessTokenReader;
-
-
-/// TokenVectorWriter is a writer specialized for sequences of
-/// std::string where the strings are nonempty and whitespace-free.
-/// T == std::vector<std::string>
-typedef TableWriter<TokenVectorHolder> TokenVectorWriter;
-// Ditto for SequentialTokenVectorReader.
-typedef SequentialTableReader<TokenVectorHolder> SequentialTokenVectorReader;
-typedef RandomAccessTableReader<TokenVectorHolder> RandomAccessTokenVectorReader;
-
-
-/// @}
-
-// Note: for FST reader/writer, see ../fstext/fstext-utils.h
-// [not done yet].
-
-} // end namespace kaldi
-
-
-
-#endif
diff --git a/kaldi_io/src/kaldi/util/text-utils.h b/kaldi_io/src/kaldi/util/text-utils.h
deleted file mode 100644
index 1d85c47..0000000
--- a/kaldi_io/src/kaldi/util/text-utils.h
+++ /dev/null
@@ -1,169 +0,0 @@
-// util/text-utils.h
-
-// Copyright 2009-2011 Saarland University; Microsoft Corporation
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-#ifndef KALDI_UTIL_TEXT_UTILS_H_
-#define KALDI_UTIL_TEXT_UTILS_H_
-
-#include <algorithm>
-#include <map>
-#include <set>
-#include <string>
-#include <vector>
-#include <errno.h>
-
-#include "base/kaldi-common.h"
-
-namespace kaldi {
-
-/// Split a string using any of the single character delimiters.
-/// If omit_empty_strings == true, the output will contain any
-/// nonempty strings after splitting on any of the
-/// characters in the delimiter. If omit_empty_strings == false,
-/// the output will contain n+1 strings if there are n characters
-/// in the set "delim" within the input string. In this case
-/// the empty string is split to a single empty string.
-void SplitStringToVector(const std::string &full, const char *delim,
- bool omit_empty_strings,
- std::vector<std::string> *out);
-
-/// Joins the elements of a vector of strings into a single string using
-/// "delim" as the delimiter. If omit_empty_strings == true, any empty strings
-/// in the vector are skipped. A vector of empty strings results in an empty
-/// string on the output.
-void JoinVectorToString(const std::vector<std::string> &vec_in,
- const char *delim, bool omit_empty_strings,
- std::string *str_out);
-
-
-/// Split a string (e.g. 1:2:3) into a vector of integers.
-/// The delimiting char may be any character in "delim".
-/// returns true on success, false on failure.
-/// If omit_empty_strings == true, 1::2:3: will become
-/// { 1, 2, 3 }. Otherwise it would be rejected.
-/// Regardless of the value of omit_empty_strings,
-/// the empty string is successfully parsed as an empty
-/// vector of integers
-template<class I>
-bool SplitStringToIntegers(const std::string &full,
- const char *delim,
- bool omit_empty_strings, // typically false [but
- // should probably be true
- // if "delim" is spaces].
- std::vector<I> *out) {
- KALDI_ASSERT(out != NULL);
- KALDI_ASSERT_IS_INTEGER_TYPE(I);
- if ( *(full.c_str()) == '\0') {
- out->clear();
- return true;
- }
- std::vector<std::string> split;
- SplitStringToVector(full, delim, omit_empty_strings, &split);
- out->resize(split.size());
- for (size_t i = 0; i < split.size(); i++) {
- const char *this_str = split[i].c_str();
- char *end = NULL;
- long long int j = 0;
- j = KALDI_STRTOLL(this_str, &end);
- if (end == this_str || *end != '\0') {
- out->clear();
- return false;
- } else {
- I jI = static_cast<I>(j);
- if (static_cast<long long int>(jI) != j) {
- // output type cannot fit this integer.
- out->clear();
- return false;
- }
- (*out)[i] = jI;
- }
- }
- return true;
-}
-
-// This is defined for F = float and double.
-template<class F>
-bool SplitStringToFloats(const std::string &full,
- const char *delim,
- bool omit_empty_strings, // typically false
- std::vector<F> *out);
-
-
-/// Converts a string into an integer via strtoll and returns false if there was
-/// any kind of problem (i.e. the string was not an integer or contained extra
-/// non-whitespace junk, or the integer was too large to fit into the type it is
-/// being converted into). Only sets *out if everything was OK and it returns
-/// true.
-template<class Int>
-bool ConvertStringToInteger(const std::string &str,
- Int *out) {
- KALDI_ASSERT_IS_INTEGER_TYPE(Int);
- const char *this_str = str.c_str();
- char *end = NULL;
- errno = 0;
- long long int i = KALDI_STRTOLL(this_str, &end);
- if (end != this_str)
- while (isspace(*end)) end++;
- if (end == this_str || *end != '\0' || errno != 0)
- return false;
- Int iInt = static_cast<Int>(i);
- if (static_cast<long long int>(iInt) != i || (i<0 && !std::numeric_limits<Int>::is_signed)) {
- return false;
- }
- *out = iInt;
- return true;
-}
-
-
-/// ConvertStringToReal converts a string into either float or double via strtod,
-/// and returns false if there was any kind of problem (i.e. the string was not a
-/// floating point number or contained extra non-whitespace junk.
-/// Be careful- this function will successfully read inf's or nan's.
-bool ConvertStringToReal(const std::string &str,
- double *out);
-bool ConvertStringToReal(const std::string &str,
- float *out);
-
-
-/// Removes the beginning and trailing whitespaces from a string
-void Trim(std::string *str);
-
-
-/// Removes leading and trailing white space from the string, then splits on the
-/// first section of whitespace found (if present), putting the part before the
-/// whitespace in "first" and the rest in "rest". If there is no such space,
-/// everything that remains after removing leading and trailing whitespace goes
-/// in "first".
-void SplitStringOnFirstSpace(const std::string &line,
- std::string *first,
- std::string *rest);
-
-
-/// Returns true if "token" is nonempty, and all characters are
-/// printable and whitespace-free.
-bool IsToken(const std::string &token);
-
-
-/// Returns true if "line" is free of \n characters and unprintable
-/// characters, and does not contain leading or trailing whitespace.
-bool IsLine(const std::string &line);
-
-
-} // namespace kaldi
-
-#endif // KALDI_UTIL_TEXT_UTILS_H_
diff --git a/kaldi_io/src/kaldi/util/timer.h b/kaldi_io/src/kaldi/util/timer.h
deleted file mode 100644
index e3ee8d5..0000000
--- a/kaldi_io/src/kaldi/util/timer.h
+++ /dev/null
@@ -1,27 +0,0 @@
-// util/timer.h
-
-// Copyright 2014 Johns Hopkins University (author: Daniel Povey)
-
-// See ../../COPYING for clarification regarding multiple authors
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-
-// http://www.apache.org/licenses/LICENSE-2.0
-
-// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
-// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
-// MERCHANTABLITY OR NON-INFRINGEMENT.
-// See the Apache 2 License for the specific language governing permissions and
-// limitations under the License.
-
-// We are temporarily leaving this file to forward #includes to
-// base-timer.h. Its use is deprecated; you should directrly
-// #include base/timer.h
-#ifndef KALDI_UTIL_TIMER_H_
-#define KALDI_UTIL_TIMER_H_
-#pragma message warning: please do not include util/timer.h, include base/timer.h (it has been moved)
-#include "base/timer.h"
-#endif