/** @file MlfStream.h * This is an TNet C++ Library header. * * The naming convention in this file coppies the std::* naming as well as STK */ #ifndef STK_MlfStream_h #define STK_MlfStream_h #include #include #include #include #include namespace TNet { class LabelRecord; class LabelContainer; /// this container stores the lables in linear order as they came /// i.e. they cannot be hashed typedef std::list< std::pair *> LabelListType; /// type of the container used to store the labels typedef std::map LabelHashType; /** * @brief Describes type of MLF definition * * See HTK book for MLF structure. Terms used in TNet are * compatible with those in HTK book. */ enum MlfDefType { MLF_DEF_UNKNOWN = 0, ///< unknown definition MLF_DEF_IMMEDIATE_TRANSCRIPTION, ///< immediate transcription MLF_DEF_SUB_DIR_DEF ///< subdirectory definition }; /** ************************************************************************** * @brief Holds association between label and stream */ class LabelRecord { public: LabelRecord() : miLabelListLimit(NULL) { } ~LabelRecord() { } /// definition type MlfDefType mDefType; /// position of the label in the stream std::streampos mStreamPos; /** * @brief points to the current end of the LabelList * * The reason for storing this value is to know when we inserted * a label into the hash. It is possible, that the hash label came * after list label, in which case the list label is prefered */ LabelListType::iterator miLabelListLimit; }; /** * @brief Provides an interface to label hierarchy and searching * * This class stores label files in a map structure. When a wildcard * convence is used, the class stores the labels in separate maps according * to level of wildcard abstraction. By level we mean the directory structure * depth. */ class LabelContainer { public: /// The constructor LabelContainer() : mUseHashedSearch(true) {} /// The destructor ~LabelContainer(); /** * @brief Inserts new label to the hash structure */ void Insert( const std::string & rLabel, std::streampos Pos); /** * @brief Looks for a record in the hash */ bool FindInHash( const std::string& rLabel, LabelRecord& rLS); /** * @brief Looks for a record in the list * @param rLabel Label to look for * @param rLS Structure to fill with found data * @param limitSearch If true @p rLS's @c mLabelListLimit gives the limiting position in the list */ bool FindInList( const std::string& rLabel, LabelRecord& rLS, bool limitSearch = false); /** * @brief Looks for a record */ bool Find( const std::string & rLabel, LabelRecord & rLS); /** * @brief Returns the matched pattern */ const std::string & MatchedPattern() const { return mMatchedPattern; } /** * @brief Returns the matched pattern mask (%%%) */ const std::string & MatchedPatternMask() const { return mMatchedPatternMask; } /** * @brief Writes contents to stream (text) * @param rOStream stream to write to */ void Write(std::ostream& rOStream); private: /// type used for directory depth notation typedef size_t DepthType; /// this set stores depths of * labels observed at insertion std::set mDepths; /// stores the labels LabelHashType mLabelMap; LabelListType mLabelList; /// true if labels are to be sought by hashing function (fast) or by /// sequential search (slow) bool mUseHashedSearch; /// if Find matches the label, this var stores the pattern that matched the /// query std::string mMatchedPattern; /// if Find matches the label, this var stores the the masked characters. /// The mask is given by '%' symbols std::string mMatchedPatternMask; /** * @brief Returns the directory depth of path */ size_t DirDepth(const std::string & path); }; /** * @brief MLF output buffer definition */ template< typename _CharT, typename _Traits = std::char_traits<_CharT>, typename _CharTA = std::allocator<_CharT>, typename ByteT = char, typename ByteAT = std::allocator > class BasicOMlfStreamBuf : public std::basic_streambuf<_CharT, _Traits> { public: // necessary typedefs .................................................... typedef BasicOMlfStreamBuf<_CharT,_Traits,_CharTA,ByteT,ByteAT> this_type; typedef std::basic_ostream<_CharT, _Traits>& OStreamReference; typedef std::basic_streambuf<_CharT, _Traits> StreamBufType; typedef _CharTA char_allocator_type; typedef _CharT char_type; typedef typename _Traits::int_type int_type; typedef typename _Traits::pos_type pos_type; typedef ByteT byte_type; typedef ByteAT byte_allocator_type; typedef byte_type* byte_buffer_type; typedef std::vector byte_vector_type; typedef std::vector char_vector_type; BasicOMlfStreamBuf(OStreamReference rOStream, size_t bufferSize); ~BasicOMlfStreamBuf(); // virtual functions inherited from basic_streambuf....................... int sync(); /** * @brief Write character in the case of overflow * @param c Character to be written. * @return A value different than EOF (or traits::eof() for other traits) * signals success. If the function fails, either EOF * (or traits::eof() for other traits) is returned or an * exception is thrown. */ int_type overflow(int_type c = _Traits::eof()); // MLF specific functions ................................................ /** * @brief Creates a new MLF block * @param rFileName filename to be opened */ this_type* Open(const std::string& rFileName); /** * @brief Closes MLF block */ void Close(); /** * @brief Returns true if the MLF is now in open state */ bool IsOpen() const { return mIsOpen; } LabelContainer& rLabels() { return mLabels; } private: bool mIsOpen; char_type mLastChar; OStreamReference mOStream; LabelContainer mLabels; }; // class BasicOMlfStreamBuf /** * @brief MLF input buffer definition */ template< typename _CharT, typename _Traits = std::char_traits<_CharT>, typename _CharTA = std::allocator<_CharT>, typename ByteT = char, typename ByteAT = std::allocator > class BasicIMlfStreamBuf : public std::basic_streambuf<_CharT, _Traits> { private: // internal automaton states static const int IN_HEADER_STATE = 0; static const int OUT_OF_BODY_STATE = 1; static const int IN_TITLE_STATE = 2; static const int IN_BODY_STATE = 3; public: // necessary typedefs .............................................. typedef BasicIMlfStreamBuf<_CharT,_Traits,_CharTA,ByteT,ByteAT> this_type; typedef std::basic_istream<_CharT, _Traits>& IStreamReference; typedef std::basic_streambuf<_CharT, _Traits> StreamBufType; typedef _CharTA char_allocator_type; typedef _CharT char_type; typedef typename _Traits::int_type int_type; typedef typename _Traits::pos_type pos_type; typedef ByteT byte_type; typedef ByteAT byte_allocator_type; typedef byte_type* byte_buffer_type; typedef std::vector byte_vector_type; typedef std::vector char_vector_type; public: // constructors and destructors .......................................... BasicIMlfStreamBuf(IStreamReference rIStream, size_t bufferSize = 1024); ~BasicIMlfStreamBuf(); // virtual functions inherited from basic_streambuf....................... /** * @brief Get character in the case of underflow * * @return The new character available at the get pointer position, if * any. Otherwise, traits::eof() is returned. */ int_type underflow(); // MLF specific functions ................................................ /** * @brief Creates a new MLF block * @param rFileName filename to be opened */ this_type* Open(const std::string& rFileName); /** * @brief Closes MLF block */ this_type* Close(); /** * @brief Returns true if the MLF is now in open state */ bool IsOpen() const { return mIsOpen; } /** * @brief Parses the stream (if possible) and stores positions to the * label titles */ void Index(); bool IsHashed() const { return mIsHashed; } /** * @brief Jumps to next label definition * @param rName std::string to be filled with the label name * @return true on success * * The procedure automatically tries to hash the labels. */ bool JumpToNextDefinition(std::string& rName); /** * @brief Returns reference to the base stream * @return reference to the stream * */ IStreamReference GetBaseStream() { return mIStream; } private: // auxillary functions ............................................ /** * @brief Fills the line buffer with next line and updates the internal * state of the finite automaton */ void FillLineBuffer(); private: // atributes ...................................................... // some flags bool mIsOpen; bool mIsHashed; bool mIsEof; /// internal state of the finite automaton int mState; IStreamReference mIStream; LabelContainer mLabels; std::vector mLineBuffer; }; // class BasicIMlfStreamBuf /** * @brief Base class with type-independent members for the Mlf Output * Stram class * * This is a derivative of the basic_ios class. We derive it as we need * to override some member functions */ template< typename Elem, typename Tr = std::char_traits, typename ElemA = std::allocator, typename ByteT = char, typename ByteAT = std::allocator > class BasicOMlfStreamBase : virtual public std::basic_ios { public: typedef std::basic_ostream& OStreamReference; typedef BasicOMlfStreamBuf < Elem,Tr,ElemA,ByteT,ByteAT> OMlfStreamBufType; /** * @brief constructor * * @param rOStream user defined output stream */ BasicOMlfStreamBase(OStreamReference rOStream, size_t bufferSize) : mBuf(rOStream, bufferSize) { this->init(&mBuf); }; /** * @brief Returns a pointer to the buffer object for this stream */ OMlfStreamBufType* rdbuf() { return &mBuf; }; private: OMlfStreamBufType mBuf; }; template< typename Elem, typename Tr = std::char_traits, typename ElemA = std::allocator, typename ByteT = char, typename ByteAT = std::allocator > class BasicIMlfStreamBase : virtual public std::basic_ios { public: typedef std::basic_istream& IStreamReference; typedef BasicIMlfStreamBuf < Elem,Tr,ElemA,ByteT,ByteAT> IMlfStreamBufType; BasicIMlfStreamBase( IStreamReference rIStream, size_t bufferSize) : mBuf(rIStream, bufferSize) { this->init(&mBuf ); }; IMlfStreamBufType* rdbuf() { return &mBuf; }; IStreamReference GetBaseStream() { return mBuf.GetBaseStream(); } private: IMlfStreamBufType mBuf; }; template< typename Elem, typename Tr = std::char_traits, typename ElemA = std::allocator, typename ByteT = char, typename ByteAT = std::allocator > class BasicOMlfStream : public BasicOMlfStreamBase, public std::basic_ostream { public: typedef BasicOMlfStreamBase< Elem,Tr,ElemA,ByteT,ByteAT> BasicOMlfStreamBaseType; typedef std::basic_ostream OStreamType; typedef OStreamType& OStreamReference; BasicOMlfStream(OStreamReference rOStream, size_t bufferSize = 32) : BasicOMlfStreamBaseType(rOStream, bufferSize), OStreamType(BasicOMlfStreamBaseType::rdbuf()) { } /** * @brief Destructor closes the stream */ ~BasicOMlfStream() { } /** * @brief Creates a new MLF block * @param rFileName filename to be opened */ void Open(const std::string& rFileName) { BasicOMlfStreamBaseType::rdbuf()->Open(rFileName); } /** * @brief Closes MLF block */ void Close() { BasicOMlfStreamBaseType::rdbuf()->Close(); } /** * @brief Returns true if the MLF is now in open state */ bool IsOpen() const { return BasicOMlfStreamBaseType::rdbuf()->IsOpen(); } /** * @brief Accessor to the label container * @return Reference to the label container */ LabelContainer& rLabels() { return BasicOMlfStreamBaseType::rdbuf()->rLabels(); } }; template< typename Elem, typename Tr = std::char_traits, typename ElemA = std::allocator, typename ByteT = char, typename ByteAT = std::allocator > class BasicIMlfStream : public BasicIMlfStreamBase, public std::basic_istream { public: typedef BasicIMlfStreamBase BasicIMlfStreamBaseType; typedef std::basic_istream IStreamType; typedef IStreamType& IStreamReference; typedef unsigned char byte_type; BasicIMlfStream(IStreamReference rIStream, size_t bufferSize = 32) : BasicIMlfStreamBaseType(rIStream, bufferSize), IStreamType(BasicIMlfStreamBaseType::rdbuf()) {}; /** * @brief Creates a new MLF block * @param rFileName filename to be opened */ void Open(const std::string& rFileName) { std::basic_streambuf* p_buf; p_buf = BasicIMlfStreamBaseType::rdbuf()->Open(rFileName); if (NULL == p_buf) { IStreamType::clear(IStreamType::rdstate() | std::ios::failbit); } else { IStreamType::clear(); } } /** * @brief Closes MLF block. * In fact, nothing is done */ void Close() { if (NULL == BasicIMlfStreamBaseType::rdbuf()->Close()) { IStreamType::clear(IStreamType::rdstate() | std::ios::failbit); } } void Index() { BasicIMlfStreamBaseType::rdbuf()->Index(); } bool IsHashed() const { return BasicIMlfStreamBaseType::rdbuf()->IsHashed(); } }; // MAIN TYPEDEFS.............................................................. typedef BasicOMlfStream OMlfStream; typedef BasicOMlfStream WOMlfStream; typedef BasicIMlfStream IMlfStream; typedef BasicIMlfStream WIMlfStream; #ifdef PATH_MAX const size_t MAX_LABEL_DEPTH = PATH_MAX; #else const size_t MAX_LABEL_DEPTH = 1024; #endif } // namespace TNet #include "MlfStream.tcc" #endif