diff options
Diffstat (limited to 'htk_io/src/KaldiLib/MlfStream.tcc')
-rw-r--r-- | htk_io/src/KaldiLib/MlfStream.tcc | 517 |
1 files changed, 517 insertions, 0 deletions
diff --git a/htk_io/src/KaldiLib/MlfStream.tcc b/htk_io/src/KaldiLib/MlfStream.tcc new file mode 100644 index 0000000..8978545 --- /dev/null +++ b/htk_io/src/KaldiLib/MlfStream.tcc @@ -0,0 +1,517 @@ +#ifndef STK_MlfStream_tcc +#define STK_MlfStream_tcc + +#include <algorithm> + +#include "Common.h" +#include "StkMatch.h" + +namespace TNet +{ + //**************************************************************************** + //**************************************************************************** + template< + typename _CharT, + typename _Traits, + typename _CharTA, + typename ByteT, + typename ByteAT + > + BasicOMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT>:: + BasicOMlfStreamBuf(OStreamReference rOStream, size_t bufferSize) + : mIsOpen(false), mOStream(rOStream) + { } + + + //**************************************************************************** + //**************************************************************************** + template< + typename _CharT, + typename _Traits, + typename _CharTA, + typename ByteT, + typename ByteAT + > + BasicOMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT>:: + ~BasicOMlfStreamBuf() + { + mOStream.flush(); + } + + + //**************************************************************************** + //**************************************************************************** + template< + typename _CharT, + typename _Traits, + typename _CharTA, + typename ByteT, + typename ByteAT + > + int + BasicOMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT>:: + sync() + { + mOStream.flush(); + return 0; + } + + + //**************************************************************************** + //**************************************************************************** + template< + typename _CharT, + typename _Traits, + typename _CharTA, + typename ByteT, + typename ByteAT + > + typename _Traits::int_type + BasicOMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT>:: + overflow(typename _Traits::int_type c) + { + // we don't use buffer here... + if (mIsOpen) { + if (_Traits::eof() == c) { + return _Traits::not_eof(c); + } + // only pass the character to the stream + mOStream.rdbuf()->sputc(c); + + // remember last char (in case we want to close) + mLastChar = c; + + return c; + } + else { + return _Traits::eof(); + } + } + + + //**************************************************************************** + //**************************************************************************** + template< + typename _CharT, + typename _Traits, + typename _CharTA, + typename ByteT, + typename ByteAT + > + void + BasicOMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT>:: + Close() + { + // if last character was not EOL, we need to insert it + if (mLastChar != '\n') { + mOStream.put('\n'); + } + mOStream << ".\n"; + + // flush the stream and declare the stream closed + mOStream.flush(); + mIsOpen = false; + } + + + //**************************************************************************** + //**************************************************************************** + template< + typename _CharT, + typename _Traits, + typename _CharTA, + typename ByteT, + typename ByteAT + > + BasicOMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT> * + BasicOMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT>:: + Open(const std::string& rFileName) + { + // retreive position + std::streampos pos = mOStream.tellp(); + + // write the initial "filename" in parantheses + mOStream << '"' << rFileName << '"' << std::endl; + mLastChar = '\n'; + + // return NULL if we canot open + if (!mOStream.good()) { + return NULL; + } + + // if ok, store the name position + if (-1 != pos) { + pos = mOStream.tellp(); + mLabels.Insert(rFileName, pos); + } + + // set open flag and return this + mIsOpen = true; + return this; + } + + + //**************************************************************************** + //**************************************************************************** + // BasicIMlfStreamBuf section + // + //**************************************************************************** + //**************************************************************************** + + //**************************************************************************** + //**************************************************************************** + template< + typename _CharT, + typename _Traits, + typename _CharTA, + typename ByteT, + typename ByteAT + > + BasicIMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT>:: + BasicIMlfStreamBuf(IStreamReference rIStream, size_t bufferSize) + : mIsOpen(false), mIsHashed(false), mIsEof(true), mState(IN_HEADER_STATE), + mIStream(rIStream), mLineBuffer() + { + // we reserve some place for the buffer... + mLineBuffer.reserve(bufferSize); + + //StreamBufType::setg(mpBuffer, mpBuffer + bufferSize, mpBuffer + bufferSize); + StreamBufType::setg(&(mLineBuffer.front()), &(mLineBuffer.back()), &(mLineBuffer.back())); + } + + //**************************************************************************** + //**************************************************************************** + template< + typename _CharT, + typename _Traits, + typename _CharTA, + typename ByteT, + typename ByteAT + > + BasicIMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT>:: + ~BasicIMlfStreamBuf() + { + } + + //**************************************************************************** + //**************************************************************************** + template< + typename _CharT, + typename _Traits, + typename _CharTA, + typename ByteT, + typename ByteAT + > + void + BasicIMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT>:: + Index() + { + // retreive position + std::streampos orig_pos = mIStream.tellg(); + int orig_state = mState; + + // for streams like stdin, pos will by definition be -1, so we can only + // rely on sequential access and cannot hash it. + if (-1 != orig_pos) { + std::string aux_name; + // we will constantly jump to next definition. the function automatically + // hashes the stream if possible + while (JumpToNextDefinition(aux_name)) + { } + + // move to the original position + mIStream.clear(); + mIStream.seekg(orig_pos); + mState = orig_state; + + // set as hashed + mIsHashed=true; + } + } + + + //**************************************************************************** + //**************************************************************************** + template< + typename _CharT, + typename _Traits, + typename _CharTA, + typename ByteT, + typename ByteAT + > + bool + BasicIMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT>:: + JumpToNextDefinition(std::string& rName) + { + if (!mIStream.good()) { + return false; + } + + // if we can, we will try to index the label + std::streampos pos = mIStream.tellg(); + + // we might be at a definition already, so first move one line further + FillLineBuffer(); + + // read lines till we get to definition again + while (mIStream.good() && mState != IN_TITLE_STATE) { + FillLineBuffer(); + } + + // decide what happened + if (IN_TITLE_STATE == mState) { + // if we can, we will try to index the label + pos = mIStream.tellg(); + + if (pos != static_cast<const std::streampos>(-1)) { + // if (pos !=std::string::npos) { // This line does not work under MinGW + std::string line_buffer(mLineBuffer.begin(), mLineBuffer.end()); + TNet::ParseHTKString(line_buffer, rName); + mLabels.Insert(rName, pos); + } + + return true; + } + else { + // we have been hashing all the way through so we know that if this is + // is the EOF, we are done hashing this stream + if (pos != static_cast<const std::streampos>(-1)) { + mIsHashed = true; + } + + // we are not in body state, so we just return false + return false; + } + } + + + //**************************************************************************** + //**************************************************************************** + template< + typename _CharT, + typename _Traits, + typename _CharTA, + typename ByteT, + typename ByteAT + > + BasicIMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT>* + BasicIMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT>:: + Close() + { + if (!mIsOpen) { + mIsEof = true; + return NULL; + } + else { + // if we try to close while in the body, we need to reach the end + if (mState == IN_BODY_STATE) { + while (mState == IN_BODY_STATE) { + FillLineBuffer(); + } + } + + // disable buffer mechanism + StreamBufType::setg(&(mLineBuffer.front()), &(mLineBuffer.front()), + &(mLineBuffer.front())); + + mIsEof = true; + mIsOpen = false; + + return this; + } + } + + + //**************************************************************************** + //**************************************************************************** + template< + typename _CharT, + typename _Traits, + typename _CharTA, + typename ByteT, + typename ByteAT + > + BasicIMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT>* + BasicIMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT>:: + Open(const std::string& rFileName) + { + BasicIMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT>* ret_val = NULL; + + // this behavior is compatible with ifstream + if (mIsOpen) { + Close(); + return NULL; + } + + // retreive position + std::streampos pos = mIStream.tellg(); + LabelRecord label_record; + + // for streams like stdin, pos will by definition be -1, so we can only + // rely on sequential access. At this place, we decide what to do + if ((-1 != pos) && (mLabels.Find(rFileName, label_record))) { + mIStream.seekg(label_record.mStreamPos); + mState = IN_TITLE_STATE; + + // we don't want the other stream to be bad, so we transfer the + // flagbits to this stream + if (!mIStream.good()) { + mIStream.clear(); + mIsOpen = false; + ret_val = NULL; + } + else { + mIsOpen = true; + mIsEof = false; + ret_val = this; + } + } + + // we don't have sequential stream and we didn't find the label, but + // we are hashed, so we can be sure, that we failed + else if ((-1 != pos) && mIsHashed) { + mIsOpen = false; + ret_val = NULL; + } + + // we either have sequential stream or didn't find anything, but we can + // still try to sequentially go and look for it + else { + bool found = false; + std::string aux_name; + std::string aux_name2; + + while ((!found) && JumpToNextDefinition(aux_name)) { + if (TNet::ProcessMask(rFileName, aux_name, aux_name2)) { + mIsOpen = true; + mIsEof = false; + found = true; + ret_val = this; + } + } + + if (!found) { + mIsOpen = false; + ret_val = NULL; + } + } + + return ret_val; + } + + + //**************************************************************************** + //**************************************************************************** + template< + typename _CharT, + typename _Traits, + typename _CharTA, + typename ByteT, + typename ByteAT + > + typename _Traits::int_type + BasicIMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT>:: + underflow() + { + // we don't do anything if EOF + if (mIsEof) { + StreamBufType::setg(&(mLineBuffer.front()), &(mLineBuffer.front()), + &(mLineBuffer.front())); + return _Traits::eof(); + } + + // read from buffer if we can + if (StreamBufType::gptr() && (StreamBufType::gptr() < StreamBufType::egptr())) { + return _Traits::not_eof(*StreamBufType::gptr()); + } + + // might happen that stream is in !good state + if (!mIStream.good()) { + mIsEof = true; + StreamBufType::setg(&(mLineBuffer.front()), &(mLineBuffer.front()), + &(mLineBuffer.front())); + return _Traits::eof(); + } + + // fill the line buffer and update my state + FillLineBuffer(); + + // if the whole line is just period or it's eof, declare EOF + if (mState == OUT_OF_BODY_STATE) { + mIsEof = true; + StreamBufType::setg(&(mLineBuffer.front()), &(mLineBuffer.front()), + &(mLineBuffer.front())); + return _Traits::eof(); + } + + // restore the buffer mechanism + StreamBufType::setg(&(mLineBuffer.front()), &(mLineBuffer.front()), + &(mLineBuffer.back()) + 1); + + return *StreamBufType::gptr(); + } + + + //**************************************************************************** + //**************************************************************************** + template< + typename _CharT, + typename _Traits, + typename _CharTA, + typename ByteT, + typename ByteAT + > + void + BasicIMlfStreamBuf<_CharT, _Traits, _CharTA, ByteT, ByteAT>:: + FillLineBuffer() + { + // reset line buffer + size_t capacity = mLineBuffer.capacity(); + mLineBuffer.clear(); + mLineBuffer.reserve(capacity); + + // read one line into buffer + int c; + while ((c = mIStream.get()) != '\n' && c != _Traits::eof()) { + mLineBuffer.push_back(char(c)); + } + + // we want to be able to pass last eol symbol + if (c == '\n') { + mLineBuffer.push_back(char(c)); + } + + // we will decide where we are + switch (mState) { + case IN_HEADER_STATE: + + case OUT_OF_BODY_STATE: + if (mLineBuffer[0] != '#') { + mState = IN_TITLE_STATE; + } + break; + + case IN_TITLE_STATE: + if (mLineBuffer[0] == '.' && (mLineBuffer.back() == '\n' || mIStream.eof())) { + mState = OUT_OF_BODY_STATE; + } + else { + mState = IN_BODY_STATE; + } + break; + + case IN_BODY_STATE: + // period or EOF will end the file + if (mLineBuffer[0] == '.' && (mLineBuffer.back() == '\n' || mIStream.eof())) { + mState = OUT_OF_BODY_STATE; + } + if (mLineBuffer.size() == 0) { + mState = OUT_OF_BODY_STATE; + } + break; + } + } +} // namespace TNet + + +#endif // STK_MlfStream_tcc |