diff options
Diffstat (limited to 'htk_io/src/KaldiLib/MlfStream.h')
-rw-r--r-- | htk_io/src/KaldiLib/MlfStream.h | 639 |
1 files changed, 639 insertions, 0 deletions
diff --git a/htk_io/src/KaldiLib/MlfStream.h b/htk_io/src/KaldiLib/MlfStream.h new file mode 100644 index 0000000..d643f5c --- /dev/null +++ b/htk_io/src/KaldiLib/MlfStream.h @@ -0,0 +1,639 @@ +/** @file MlfStream.h + * This is an TNet C++ Library header. + * + * The naming convention in this file coppies the std::* naming as well as STK + */ + + +#ifndef STK_MlfStream_h +#define STK_MlfStream_h + +#include <iostream> +#include <vector> +#include <map> +#include <list> +#include <set> + + +namespace TNet +{ + class LabelRecord; + class LabelContainer; + + + /// this container stores the lables in linear order as they came + /// i.e. they cannot be hashed + typedef std::list< std::pair<std::string,LabelRecord> *> LabelListType; + + /// type of the container used to store the labels + typedef std::map<std::string, LabelRecord> LabelHashType; + + + + /** + * @brief Describes type of MLF definition + * + * See HTK book for MLF structure. Terms used in TNet are + * compatible with those in HTK book. + */ + enum MlfDefType + { + MLF_DEF_UNKNOWN = 0, ///< unknown definition + MLF_DEF_IMMEDIATE_TRANSCRIPTION, ///< immediate transcription + MLF_DEF_SUB_DIR_DEF ///< subdirectory definition + }; + + + + /** ************************************************************************** + * @brief Holds association between label and stream + */ + class LabelRecord + { + + public: + LabelRecord() : miLabelListLimit(NULL) + { } + + ~LabelRecord() + { } + + /// definition type + MlfDefType mDefType; + + /// position of the label in the stream + std::streampos mStreamPos; + + /** + * @brief points to the current end of the LabelList + * + * The reason for storing this value is to know when we inserted + * a label into the hash. It is possible, that the hash label came + * after list label, in which case the list label is prefered + */ + LabelListType::iterator miLabelListLimit; + + }; + + + + + /** + * @brief Provides an interface to label hierarchy and searching + * + * This class stores label files in a map structure. When a wildcard + * convence is used, the class stores the labels in separate maps according + * to level of wildcard abstraction. By level we mean the directory structure + * depth. + */ + class LabelContainer + { + public: + /// The constructor + LabelContainer() : mUseHashedSearch(true) {} + + /// The destructor + ~LabelContainer(); + + /** + * @brief Inserts new label to the hash structure + */ + void + Insert( + const std::string & rLabel, + std::streampos Pos); + + + /** + * @brief Looks for a record in the hash + */ + bool + FindInHash( + const std::string& rLabel, + LabelRecord& rLS); + + /** + * @brief Looks for a record in the list + * @param rLabel Label to look for + * @param rLS Structure to fill with found data + * @param limitSearch If true @p rLS's @c mLabelListLimit gives the limiting position in the list + */ + bool + FindInList( + const std::string& rLabel, + LabelRecord& rLS, + bool limitSearch = false); + + /** + * @brief Looks for a record + */ + bool + Find( + const std::string & rLabel, + LabelRecord & rLS); + + /** + * @brief Returns the matched pattern + */ + const std::string & + MatchedPattern() const + { + return mMatchedPattern; + } + + /** + * @brief Returns the matched pattern mask (%%%) + */ + const std::string & + MatchedPatternMask() const + { + return mMatchedPatternMask; + } + + /** + * @brief Writes contents to stream (text) + * @param rOStream stream to write to + */ + void + Write(std::ostream& rOStream); + + private: + /// type used for directory depth notation + typedef size_t DepthType; + + + /// this set stores depths of * labels observed at insertion + std::set<DepthType> mDepths; + + /// stores the labels + LabelHashType mLabelMap; + LabelListType mLabelList; + + /// true if labels are to be sought by hashing function (fast) or by + /// sequential search (slow) + bool mUseHashedSearch; + + /// if Find matches the label, this var stores the pattern that matched the + /// query + std::string mMatchedPattern; + + /// if Find matches the label, this var stores the the masked characters. + /// The mask is given by '%' symbols + std::string mMatchedPatternMask; + + /** + * @brief Returns the directory depth of path + */ + size_t + DirDepth(const std::string & path); + + + }; + + + /** + * @brief MLF output buffer definition + */ + template< + typename _CharT, + typename _Traits = std::char_traits<_CharT>, + typename _CharTA = std::allocator<_CharT>, + typename ByteT = char, + typename ByteAT = std::allocator<ByteT> + > + class BasicOMlfStreamBuf + : public std::basic_streambuf<_CharT, _Traits> + { + public: + // necessary typedefs .................................................... + typedef BasicOMlfStreamBuf<_CharT,_Traits,_CharTA,ByteT,ByteAT> + this_type; + typedef std::basic_ostream<_CharT, _Traits>& + OStreamReference; + typedef std::basic_streambuf<_CharT, _Traits> + StreamBufType; + typedef _CharTA char_allocator_type; + typedef _CharT char_type; + typedef typename _Traits::int_type int_type; + typedef typename _Traits::pos_type pos_type; + typedef ByteT byte_type; + typedef ByteAT byte_allocator_type; + typedef byte_type* byte_buffer_type; + typedef std::vector<byte_type, byte_allocator_type > byte_vector_type; + typedef std::vector<char_type, char_allocator_type > char_vector_type; + + + BasicOMlfStreamBuf(OStreamReference rOStream, size_t bufferSize); + + ~BasicOMlfStreamBuf(); + + // virtual functions inherited from basic_streambuf....................... + int + sync(); + + /** + * @brief Write character in the case of overflow + * @param c Character to be written. + * @return A value different than EOF (or traits::eof() for other traits) + * signals success. If the function fails, either EOF + * (or traits::eof() for other traits) is returned or an + * exception is thrown. + */ + int_type + overflow(int_type c = _Traits::eof()); + + + // MLF specific functions ................................................ + /** + * @brief Creates a new MLF block + * @param rFileName filename to be opened + */ + this_type* + Open(const std::string& rFileName); + + /** + * @brief Closes MLF block + */ + void + Close(); + + /** + * @brief Returns true if the MLF is now in open state + */ + bool + IsOpen() const + { return mIsOpen; } + + LabelContainer& + rLabels() + { return mLabels; } + + private: + bool mIsOpen; + char_type mLastChar; + OStreamReference mOStream; + LabelContainer mLabels; + }; // class BasicOMlfStreamBuf + + + + /** + * @brief MLF input buffer definition + */ + template< + typename _CharT, + typename _Traits = std::char_traits<_CharT>, + typename _CharTA = std::allocator<_CharT>, + typename ByteT = char, + typename ByteAT = std::allocator<ByteT> + > + class BasicIMlfStreamBuf + : public std::basic_streambuf<_CharT, _Traits> + { + private: + // internal automaton states + static const int IN_HEADER_STATE = 0; + static const int OUT_OF_BODY_STATE = 1; + static const int IN_TITLE_STATE = 2; + static const int IN_BODY_STATE = 3; + + + public: // necessary typedefs .............................................. + typedef BasicIMlfStreamBuf<_CharT,_Traits,_CharTA,ByteT,ByteAT> + this_type; + typedef std::basic_istream<_CharT, _Traits>& IStreamReference; + typedef std::basic_streambuf<_CharT, _Traits> + StreamBufType; + typedef _CharTA char_allocator_type; + typedef _CharT char_type; + typedef typename _Traits::int_type int_type; + typedef typename _Traits::pos_type pos_type; + typedef ByteT byte_type; + typedef ByteAT byte_allocator_type; + typedef byte_type* byte_buffer_type; + typedef std::vector<byte_type, byte_allocator_type > byte_vector_type; + typedef std::vector<char_type, char_allocator_type > char_vector_type; + + + public: + // constructors and destructors .......................................... + BasicIMlfStreamBuf(IStreamReference rIStream, size_t bufferSize = 1024); + + ~BasicIMlfStreamBuf(); + + // virtual functions inherited from basic_streambuf....................... + /** + * @brief Get character in the case of underflow + * + * @return The new character available at the get pointer position, if + * any. Otherwise, traits::eof() is returned. + */ + int_type + underflow(); + + + // MLF specific functions ................................................ + /** + * @brief Creates a new MLF block + * @param rFileName filename to be opened + */ + this_type* + Open(const std::string& rFileName); + + /** + * @brief Closes MLF block + */ + this_type* + Close(); + + /** + * @brief Returns true if the MLF is now in open state + */ + bool + IsOpen() const + { return mIsOpen; } + + /** + * @brief Parses the stream (if possible) and stores positions to the + * label titles + */ + void + Index(); + + bool + IsHashed() const + { return mIsHashed; } + + /** + * @brief Jumps to next label definition + * @param rName std::string to be filled with the label name + * @return true on success + * + * The procedure automatically tries to hash the labels. + */ + bool + JumpToNextDefinition(std::string& rName); + + /** + * @brief Returns reference to the base stream + * @return reference to the stream + * + */ + IStreamReference + GetBaseStream() + { + return mIStream; + } + + private: // auxillary functions ............................................ + /** + * @brief Fills the line buffer with next line and updates the internal + * state of the finite automaton + */ + void + FillLineBuffer(); + + + private: // atributes ...................................................... + // some flags + bool mIsOpen; + bool mIsHashed; + bool mIsEof; + + /// internal state of the finite automaton + int mState; + + IStreamReference mIStream; + LabelContainer mLabels; + + std::vector<char_type> mLineBuffer; + }; // class BasicIMlfStreamBuf + + + + + /** + * @brief Base class with type-independent members for the Mlf Output + * Stram class + * + * This is a derivative of the basic_ios class. We derive it as we need + * to override some member functions + */ + template< + typename Elem, + typename Tr = std::char_traits<Elem>, + typename ElemA = std::allocator<Elem>, + typename ByteT = char, + typename ByteAT = std::allocator<ByteT> + > + class BasicOMlfStreamBase + : virtual public std::basic_ios<Elem,Tr> + { + public: + typedef std::basic_ostream<Elem, Tr>& OStreamReference; + typedef BasicOMlfStreamBuf < + Elem,Tr,ElemA,ByteT,ByteAT> OMlfStreamBufType; + + /** + * @brief constructor + * + * @param rOStream user defined output stream + */ + BasicOMlfStreamBase(OStreamReference rOStream, + size_t bufferSize) + : mBuf(rOStream, bufferSize) + { this->init(&mBuf); }; + + /** + * @brief Returns a pointer to the buffer object for this stream + */ + OMlfStreamBufType* + rdbuf() + { return &mBuf; }; + + private: + OMlfStreamBufType mBuf; + }; + + + template< + typename Elem, + typename Tr = std::char_traits<Elem>, + typename ElemA = std::allocator<Elem>, + typename ByteT = char, + typename ByteAT = std::allocator<ByteT> + > + class BasicIMlfStreamBase + : virtual public std::basic_ios<Elem,Tr> + { + public: + typedef std::basic_istream<Elem, Tr>& IStreamReference; + typedef BasicIMlfStreamBuf < + Elem,Tr,ElemA,ByteT,ByteAT> IMlfStreamBufType; + + BasicIMlfStreamBase( IStreamReference rIStream, + size_t bufferSize) + : mBuf(rIStream, bufferSize) + { this->init(&mBuf ); }; + + IMlfStreamBufType* + rdbuf() + { return &mBuf; }; + + IStreamReference + GetBaseStream() + { return mBuf.GetBaseStream(); } + + private: + IMlfStreamBufType mBuf; + }; + + + template< + typename Elem, + typename Tr = std::char_traits<Elem>, + typename ElemA = std::allocator<Elem>, + typename ByteT = char, + typename ByteAT = std::allocator<ByteT> + > + class BasicOMlfStream + : public BasicOMlfStreamBase<Elem,Tr,ElemA,ByteT,ByteAT>, + public std::basic_ostream<Elem,Tr> + { + public: + typedef BasicOMlfStreamBase< Elem,Tr,ElemA,ByteT,ByteAT> + BasicOMlfStreamBaseType; + typedef std::basic_ostream<Elem,Tr> OStreamType; + typedef OStreamType& OStreamReference; + + BasicOMlfStream(OStreamReference rOStream, size_t bufferSize = 32) + : BasicOMlfStreamBaseType(rOStream, bufferSize), + OStreamType(BasicOMlfStreamBaseType::rdbuf()) + { } + + /** + * @brief Destructor closes the stream + */ + ~BasicOMlfStream() + { } + + + /** + * @brief Creates a new MLF block + * @param rFileName filename to be opened + */ + void + Open(const std::string& rFileName) + { BasicOMlfStreamBaseType::rdbuf()->Open(rFileName); } + + /** + * @brief Closes MLF block + */ + void + Close() + { BasicOMlfStreamBaseType::rdbuf()->Close(); } + + /** + * @brief Returns true if the MLF is now in open state + */ + bool + IsOpen() const + { return BasicOMlfStreamBaseType::rdbuf()->IsOpen(); } + + /** + * @brief Accessor to the label container + * @return Reference to the label container + */ + LabelContainer& + rLabels() + { return BasicOMlfStreamBaseType::rdbuf()->rLabels(); } + }; + + + + template< + typename Elem, + typename Tr = std::char_traits<Elem>, + typename ElemA = std::allocator<Elem>, + typename ByteT = char, + typename ByteAT = std::allocator<ByteT> + > + class BasicIMlfStream + : public BasicIMlfStreamBase<Elem,Tr,ElemA,ByteT,ByteAT>, + public std::basic_istream<Elem,Tr> + { + public: + typedef BasicIMlfStreamBase <Elem,Tr,ElemA,ByteT,ByteAT> + BasicIMlfStreamBaseType; + typedef std::basic_istream<Elem,Tr> IStreamType; + typedef IStreamType& IStreamReference; + typedef unsigned char byte_type; + + BasicIMlfStream(IStreamReference rIStream, size_t bufferSize = 32) + : BasicIMlfStreamBaseType(rIStream, bufferSize), + IStreamType(BasicIMlfStreamBaseType::rdbuf()) + {}; + + + /** + * @brief Creates a new MLF block + * @param rFileName filename to be opened + */ + void + Open(const std::string& rFileName) + { + std::basic_streambuf<Elem, Tr>* p_buf; + + p_buf = BasicIMlfStreamBaseType::rdbuf()->Open(rFileName); + + if (NULL == p_buf) { + IStreamType::clear(IStreamType::rdstate() | std::ios::failbit); + } + else { + IStreamType::clear(); + } + } + + /** + * @brief Closes MLF block. + * In fact, nothing is done + */ + void + Close() + { + if (NULL == BasicIMlfStreamBaseType::rdbuf()->Close()) { + IStreamType::clear(IStreamType::rdstate() | std::ios::failbit); + } + } + + void + Index() + { BasicIMlfStreamBaseType::rdbuf()->Index(); } + + bool + IsHashed() const + { return BasicIMlfStreamBaseType::rdbuf()->IsHashed(); } + + }; + + + + // MAIN TYPEDEFS.............................................................. + typedef BasicOMlfStream<char> OMlfStream; + typedef BasicOMlfStream<wchar_t> WOMlfStream; + typedef BasicIMlfStream<char> IMlfStream; + typedef BasicIMlfStream<wchar_t> WIMlfStream; + + +#ifdef PATH_MAX + const size_t MAX_LABEL_DEPTH = PATH_MAX; +#else + const size_t MAX_LABEL_DEPTH = 1024; +#endif + + +} // namespace TNet + +#include "MlfStream.tcc" + +#endif |