diff options
Diffstat (limited to 'tnet_io/KaldiLib/MlfStream.h')
-rw-r--r-- | tnet_io/KaldiLib/MlfStream.h | 639 |
1 files changed, 0 insertions, 639 deletions
diff --git a/tnet_io/KaldiLib/MlfStream.h b/tnet_io/KaldiLib/MlfStream.h deleted file mode 100644 index d643f5c..0000000 --- a/tnet_io/KaldiLib/MlfStream.h +++ /dev/null @@ -1,639 +0,0 @@ -/** @file MlfStream.h - * This is an TNet C++ Library header. - * - * The naming convention in this file coppies the std::* naming as well as STK - */ - - -#ifndef STK_MlfStream_h -#define STK_MlfStream_h - -#include <iostream> -#include <vector> -#include <map> -#include <list> -#include <set> - - -namespace TNet -{ - class LabelRecord; - class LabelContainer; - - - /// this container stores the lables in linear order as they came - /// i.e. they cannot be hashed - typedef std::list< std::pair<std::string,LabelRecord> *> LabelListType; - - /// type of the container used to store the labels - typedef std::map<std::string, LabelRecord> LabelHashType; - - - - /** - * @brief Describes type of MLF definition - * - * See HTK book for MLF structure. Terms used in TNet are - * compatible with those in HTK book. - */ - enum MlfDefType - { - MLF_DEF_UNKNOWN = 0, ///< unknown definition - MLF_DEF_IMMEDIATE_TRANSCRIPTION, ///< immediate transcription - MLF_DEF_SUB_DIR_DEF ///< subdirectory definition - }; - - - - /** ************************************************************************** - * @brief Holds association between label and stream - */ - class LabelRecord - { - - public: - LabelRecord() : miLabelListLimit(NULL) - { } - - ~LabelRecord() - { } - - /// definition type - MlfDefType mDefType; - - /// position of the label in the stream - std::streampos mStreamPos; - - /** - * @brief points to the current end of the LabelList - * - * The reason for storing this value is to know when we inserted - * a label into the hash. It is possible, that the hash label came - * after list label, in which case the list label is prefered - */ - LabelListType::iterator miLabelListLimit; - - }; - - - - - /** - * @brief Provides an interface to label hierarchy and searching - * - * This class stores label files in a map structure. When a wildcard - * convence is used, the class stores the labels in separate maps according - * to level of wildcard abstraction. By level we mean the directory structure - * depth. - */ - class LabelContainer - { - public: - /// The constructor - LabelContainer() : mUseHashedSearch(true) {} - - /// The destructor - ~LabelContainer(); - - /** - * @brief Inserts new label to the hash structure - */ - void - Insert( - const std::string & rLabel, - std::streampos Pos); - - - /** - * @brief Looks for a record in the hash - */ - bool - FindInHash( - const std::string& rLabel, - LabelRecord& rLS); - - /** - * @brief Looks for a record in the list - * @param rLabel Label to look for - * @param rLS Structure to fill with found data - * @param limitSearch If true @p rLS's @c mLabelListLimit gives the limiting position in the list - */ - bool - FindInList( - const std::string& rLabel, - LabelRecord& rLS, - bool limitSearch = false); - - /** - * @brief Looks for a record - */ - bool - Find( - const std::string & rLabel, - LabelRecord & rLS); - - /** - * @brief Returns the matched pattern - */ - const std::string & - MatchedPattern() const - { - return mMatchedPattern; - } - - /** - * @brief Returns the matched pattern mask (%%%) - */ - const std::string & - MatchedPatternMask() const - { - return mMatchedPatternMask; - } - - /** - * @brief Writes contents to stream (text) - * @param rOStream stream to write to - */ - void - Write(std::ostream& rOStream); - - private: - /// type used for directory depth notation - typedef size_t DepthType; - - - /// this set stores depths of * labels observed at insertion - std::set<DepthType> mDepths; - - /// stores the labels - LabelHashType mLabelMap; - LabelListType mLabelList; - - /// true if labels are to be sought by hashing function (fast) or by - /// sequential search (slow) - bool mUseHashedSearch; - - /// if Find matches the label, this var stores the pattern that matched the - /// query - std::string mMatchedPattern; - - /// if Find matches the label, this var stores the the masked characters. - /// The mask is given by '%' symbols - std::string mMatchedPatternMask; - - /** - * @brief Returns the directory depth of path - */ - size_t - DirDepth(const std::string & path); - - - }; - - - /** - * @brief MLF output buffer definition - */ - template< - typename _CharT, - typename _Traits = std::char_traits<_CharT>, - typename _CharTA = std::allocator<_CharT>, - typename ByteT = char, - typename ByteAT = std::allocator<ByteT> - > - class BasicOMlfStreamBuf - : public std::basic_streambuf<_CharT, _Traits> - { - public: - // necessary typedefs .................................................... - typedef BasicOMlfStreamBuf<_CharT,_Traits,_CharTA,ByteT,ByteAT> - this_type; - typedef std::basic_ostream<_CharT, _Traits>& - OStreamReference; - typedef std::basic_streambuf<_CharT, _Traits> - StreamBufType; - typedef _CharTA char_allocator_type; - typedef _CharT char_type; - typedef typename _Traits::int_type int_type; - typedef typename _Traits::pos_type pos_type; - typedef ByteT byte_type; - typedef ByteAT byte_allocator_type; - typedef byte_type* byte_buffer_type; - typedef std::vector<byte_type, byte_allocator_type > byte_vector_type; - typedef std::vector<char_type, char_allocator_type > char_vector_type; - - - BasicOMlfStreamBuf(OStreamReference rOStream, size_t bufferSize); - - ~BasicOMlfStreamBuf(); - - // virtual functions inherited from basic_streambuf....................... - int - sync(); - - /** - * @brief Write character in the case of overflow - * @param c Character to be written. - * @return A value different than EOF (or traits::eof() for other traits) - * signals success. If the function fails, either EOF - * (or traits::eof() for other traits) is returned or an - * exception is thrown. - */ - int_type - overflow(int_type c = _Traits::eof()); - - - // MLF specific functions ................................................ - /** - * @brief Creates a new MLF block - * @param rFileName filename to be opened - */ - this_type* - Open(const std::string& rFileName); - - /** - * @brief Closes MLF block - */ - void - Close(); - - /** - * @brief Returns true if the MLF is now in open state - */ - bool - IsOpen() const - { return mIsOpen; } - - LabelContainer& - rLabels() - { return mLabels; } - - private: - bool mIsOpen; - char_type mLastChar; - OStreamReference mOStream; - LabelContainer mLabels; - }; // class BasicOMlfStreamBuf - - - - /** - * @brief MLF input buffer definition - */ - template< - typename _CharT, - typename _Traits = std::char_traits<_CharT>, - typename _CharTA = std::allocator<_CharT>, - typename ByteT = char, - typename ByteAT = std::allocator<ByteT> - > - class BasicIMlfStreamBuf - : public std::basic_streambuf<_CharT, _Traits> - { - private: - // internal automaton states - static const int IN_HEADER_STATE = 0; - static const int OUT_OF_BODY_STATE = 1; - static const int IN_TITLE_STATE = 2; - static const int IN_BODY_STATE = 3; - - - public: // necessary typedefs .............................................. - typedef BasicIMlfStreamBuf<_CharT,_Traits,_CharTA,ByteT,ByteAT> - this_type; - typedef std::basic_istream<_CharT, _Traits>& IStreamReference; - typedef std::basic_streambuf<_CharT, _Traits> - StreamBufType; - typedef _CharTA char_allocator_type; - typedef _CharT char_type; - typedef typename _Traits::int_type int_type; - typedef typename _Traits::pos_type pos_type; - typedef ByteT byte_type; - typedef ByteAT byte_allocator_type; - typedef byte_type* byte_buffer_type; - typedef std::vector<byte_type, byte_allocator_type > byte_vector_type; - typedef std::vector<char_type, char_allocator_type > char_vector_type; - - - public: - // constructors and destructors .......................................... - BasicIMlfStreamBuf(IStreamReference rIStream, size_t bufferSize = 1024); - - ~BasicIMlfStreamBuf(); - - // virtual functions inherited from basic_streambuf....................... - /** - * @brief Get character in the case of underflow - * - * @return The new character available at the get pointer position, if - * any. Otherwise, traits::eof() is returned. - */ - int_type - underflow(); - - - // MLF specific functions ................................................ - /** - * @brief Creates a new MLF block - * @param rFileName filename to be opened - */ - this_type* - Open(const std::string& rFileName); - - /** - * @brief Closes MLF block - */ - this_type* - Close(); - - /** - * @brief Returns true if the MLF is now in open state - */ - bool - IsOpen() const - { return mIsOpen; } - - /** - * @brief Parses the stream (if possible) and stores positions to the - * label titles - */ - void - Index(); - - bool - IsHashed() const - { return mIsHashed; } - - /** - * @brief Jumps to next label definition - * @param rName std::string to be filled with the label name - * @return true on success - * - * The procedure automatically tries to hash the labels. - */ - bool - JumpToNextDefinition(std::string& rName); - - /** - * @brief Returns reference to the base stream - * @return reference to the stream - * - */ - IStreamReference - GetBaseStream() - { - return mIStream; - } - - private: // auxillary functions ............................................ - /** - * @brief Fills the line buffer with next line and updates the internal - * state of the finite automaton - */ - void - FillLineBuffer(); - - - private: // atributes ...................................................... - // some flags - bool mIsOpen; - bool mIsHashed; - bool mIsEof; - - /// internal state of the finite automaton - int mState; - - IStreamReference mIStream; - LabelContainer mLabels; - - std::vector<char_type> mLineBuffer; - }; // class BasicIMlfStreamBuf - - - - - /** - * @brief Base class with type-independent members for the Mlf Output - * Stram class - * - * This is a derivative of the basic_ios class. We derive it as we need - * to override some member functions - */ - template< - typename Elem, - typename Tr = std::char_traits<Elem>, - typename ElemA = std::allocator<Elem>, - typename ByteT = char, - typename ByteAT = std::allocator<ByteT> - > - class BasicOMlfStreamBase - : virtual public std::basic_ios<Elem,Tr> - { - public: - typedef std::basic_ostream<Elem, Tr>& OStreamReference; - typedef BasicOMlfStreamBuf < - Elem,Tr,ElemA,ByteT,ByteAT> OMlfStreamBufType; - - /** - * @brief constructor - * - * @param rOStream user defined output stream - */ - BasicOMlfStreamBase(OStreamReference rOStream, - size_t bufferSize) - : mBuf(rOStream, bufferSize) - { this->init(&mBuf); }; - - /** - * @brief Returns a pointer to the buffer object for this stream - */ - OMlfStreamBufType* - rdbuf() - { return &mBuf; }; - - private: - OMlfStreamBufType mBuf; - }; - - - template< - typename Elem, - typename Tr = std::char_traits<Elem>, - typename ElemA = std::allocator<Elem>, - typename ByteT = char, - typename ByteAT = std::allocator<ByteT> - > - class BasicIMlfStreamBase - : virtual public std::basic_ios<Elem,Tr> - { - public: - typedef std::basic_istream<Elem, Tr>& IStreamReference; - typedef BasicIMlfStreamBuf < - Elem,Tr,ElemA,ByteT,ByteAT> IMlfStreamBufType; - - BasicIMlfStreamBase( IStreamReference rIStream, - size_t bufferSize) - : mBuf(rIStream, bufferSize) - { this->init(&mBuf ); }; - - IMlfStreamBufType* - rdbuf() - { return &mBuf; }; - - IStreamReference - GetBaseStream() - { return mBuf.GetBaseStream(); } - - private: - IMlfStreamBufType mBuf; - }; - - - template< - typename Elem, - typename Tr = std::char_traits<Elem>, - typename ElemA = std::allocator<Elem>, - typename ByteT = char, - typename ByteAT = std::allocator<ByteT> - > - class BasicOMlfStream - : public BasicOMlfStreamBase<Elem,Tr,ElemA,ByteT,ByteAT>, - public std::basic_ostream<Elem,Tr> - { - public: - typedef BasicOMlfStreamBase< Elem,Tr,ElemA,ByteT,ByteAT> - BasicOMlfStreamBaseType; - typedef std::basic_ostream<Elem,Tr> OStreamType; - typedef OStreamType& OStreamReference; - - BasicOMlfStream(OStreamReference rOStream, size_t bufferSize = 32) - : BasicOMlfStreamBaseType(rOStream, bufferSize), - OStreamType(BasicOMlfStreamBaseType::rdbuf()) - { } - - /** - * @brief Destructor closes the stream - */ - ~BasicOMlfStream() - { } - - - /** - * @brief Creates a new MLF block - * @param rFileName filename to be opened - */ - void - Open(const std::string& rFileName) - { BasicOMlfStreamBaseType::rdbuf()->Open(rFileName); } - - /** - * @brief Closes MLF block - */ - void - Close() - { BasicOMlfStreamBaseType::rdbuf()->Close(); } - - /** - * @brief Returns true if the MLF is now in open state - */ - bool - IsOpen() const - { return BasicOMlfStreamBaseType::rdbuf()->IsOpen(); } - - /** - * @brief Accessor to the label container - * @return Reference to the label container - */ - LabelContainer& - rLabels() - { return BasicOMlfStreamBaseType::rdbuf()->rLabels(); } - }; - - - - template< - typename Elem, - typename Tr = std::char_traits<Elem>, - typename ElemA = std::allocator<Elem>, - typename ByteT = char, - typename ByteAT = std::allocator<ByteT> - > - class BasicIMlfStream - : public BasicIMlfStreamBase<Elem,Tr,ElemA,ByteT,ByteAT>, - public std::basic_istream<Elem,Tr> - { - public: - typedef BasicIMlfStreamBase <Elem,Tr,ElemA,ByteT,ByteAT> - BasicIMlfStreamBaseType; - typedef std::basic_istream<Elem,Tr> IStreamType; - typedef IStreamType& IStreamReference; - typedef unsigned char byte_type; - - BasicIMlfStream(IStreamReference rIStream, size_t bufferSize = 32) - : BasicIMlfStreamBaseType(rIStream, bufferSize), - IStreamType(BasicIMlfStreamBaseType::rdbuf()) - {}; - - - /** - * @brief Creates a new MLF block - * @param rFileName filename to be opened - */ - void - Open(const std::string& rFileName) - { - std::basic_streambuf<Elem, Tr>* p_buf; - - p_buf = BasicIMlfStreamBaseType::rdbuf()->Open(rFileName); - - if (NULL == p_buf) { - IStreamType::clear(IStreamType::rdstate() | std::ios::failbit); - } - else { - IStreamType::clear(); - } - } - - /** - * @brief Closes MLF block. - * In fact, nothing is done - */ - void - Close() - { - if (NULL == BasicIMlfStreamBaseType::rdbuf()->Close()) { - IStreamType::clear(IStreamType::rdstate() | std::ios::failbit); - } - } - - void - Index() - { BasicIMlfStreamBaseType::rdbuf()->Index(); } - - bool - IsHashed() const - { return BasicIMlfStreamBaseType::rdbuf()->IsHashed(); } - - }; - - - - // MAIN TYPEDEFS.............................................................. - typedef BasicOMlfStream<char> OMlfStream; - typedef BasicOMlfStream<wchar_t> WOMlfStream; - typedef BasicIMlfStream<char> IMlfStream; - typedef BasicIMlfStream<wchar_t> WIMlfStream; - - -#ifdef PATH_MAX - const size_t MAX_LABEL_DEPTH = PATH_MAX; -#else - const size_t MAX_LABEL_DEPTH = 1024; -#endif - - -} // namespace TNet - -#include "MlfStream.tcc" - -#endif |