summaryrefslogtreecommitdiff
path: root/htk_io/src/KaldiLib/MlfStream.h
diff options
context:
space:
mode:
authorDeterminant <[email protected]>2015-06-25 12:56:45 +0800
committerDeterminant <[email protected]>2015-06-25 12:56:45 +0800
commita74183ddb4ab8383bfe214b3745eb8a0a99ee47a (patch)
treed5e69cf8c4c2db2e3a4722778352fc3c95953bb2 /htk_io/src/KaldiLib/MlfStream.h
parentb6301089cde20f4c825c7f5deaf179082aad63da (diff)
let HTK I/O implementation be a single package
Diffstat (limited to 'htk_io/src/KaldiLib/MlfStream.h')
-rw-r--r--htk_io/src/KaldiLib/MlfStream.h639
1 files changed, 639 insertions, 0 deletions
diff --git a/htk_io/src/KaldiLib/MlfStream.h b/htk_io/src/KaldiLib/MlfStream.h
new file mode 100644
index 0000000..d643f5c
--- /dev/null
+++ b/htk_io/src/KaldiLib/MlfStream.h
@@ -0,0 +1,639 @@
+/** @file MlfStream.h
+ * This is an TNet C++ Library header.
+ *
+ * The naming convention in this file coppies the std::* naming as well as STK
+ */
+
+
+#ifndef STK_MlfStream_h
+#define STK_MlfStream_h
+
+#include <iostream>
+#include <vector>
+#include <map>
+#include <list>
+#include <set>
+
+
+namespace TNet
+{
+ class LabelRecord;
+ class LabelContainer;
+
+
+ /// this container stores the lables in linear order as they came
+ /// i.e. they cannot be hashed
+ typedef std::list< std::pair<std::string,LabelRecord> *> LabelListType;
+
+ /// type of the container used to store the labels
+ typedef std::map<std::string, LabelRecord> LabelHashType;
+
+
+
+ /**
+ * @brief Describes type of MLF definition
+ *
+ * See HTK book for MLF structure. Terms used in TNet are
+ * compatible with those in HTK book.
+ */
+ enum MlfDefType
+ {
+ MLF_DEF_UNKNOWN = 0, ///< unknown definition
+ MLF_DEF_IMMEDIATE_TRANSCRIPTION, ///< immediate transcription
+ MLF_DEF_SUB_DIR_DEF ///< subdirectory definition
+ };
+
+
+
+ /** **************************************************************************
+ * @brief Holds association between label and stream
+ */
+ class LabelRecord
+ {
+
+ public:
+ LabelRecord() : miLabelListLimit(NULL)
+ { }
+
+ ~LabelRecord()
+ { }
+
+ /// definition type
+ MlfDefType mDefType;
+
+ /// position of the label in the stream
+ std::streampos mStreamPos;
+
+ /**
+ * @brief points to the current end of the LabelList
+ *
+ * The reason for storing this value is to know when we inserted
+ * a label into the hash. It is possible, that the hash label came
+ * after list label, in which case the list label is prefered
+ */
+ LabelListType::iterator miLabelListLimit;
+
+ };
+
+
+
+
+ /**
+ * @brief Provides an interface to label hierarchy and searching
+ *
+ * This class stores label files in a map structure. When a wildcard
+ * convence is used, the class stores the labels in separate maps according
+ * to level of wildcard abstraction. By level we mean the directory structure
+ * depth.
+ */
+ class LabelContainer
+ {
+ public:
+ /// The constructor
+ LabelContainer() : mUseHashedSearch(true) {}
+
+ /// The destructor
+ ~LabelContainer();
+
+ /**
+ * @brief Inserts new label to the hash structure
+ */
+ void
+ Insert(
+ const std::string & rLabel,
+ std::streampos Pos);
+
+
+ /**
+ * @brief Looks for a record in the hash
+ */
+ bool
+ FindInHash(
+ const std::string& rLabel,
+ LabelRecord& rLS);
+
+ /**
+ * @brief Looks for a record in the list
+ * @param rLabel Label to look for
+ * @param rLS Structure to fill with found data
+ * @param limitSearch If true @p rLS's @c mLabelListLimit gives the limiting position in the list
+ */
+ bool
+ FindInList(
+ const std::string& rLabel,
+ LabelRecord& rLS,
+ bool limitSearch = false);
+
+ /**
+ * @brief Looks for a record
+ */
+ bool
+ Find(
+ const std::string & rLabel,
+ LabelRecord & rLS);
+
+ /**
+ * @brief Returns the matched pattern
+ */
+ const std::string &
+ MatchedPattern() const
+ {
+ return mMatchedPattern;
+ }
+
+ /**
+ * @brief Returns the matched pattern mask (%%%)
+ */
+ const std::string &
+ MatchedPatternMask() const
+ {
+ return mMatchedPatternMask;
+ }
+
+ /**
+ * @brief Writes contents to stream (text)
+ * @param rOStream stream to write to
+ */
+ void
+ Write(std::ostream& rOStream);
+
+ private:
+ /// type used for directory depth notation
+ typedef size_t DepthType;
+
+
+ /// this set stores depths of * labels observed at insertion
+ std::set<DepthType> mDepths;
+
+ /// stores the labels
+ LabelHashType mLabelMap;
+ LabelListType mLabelList;
+
+ /// true if labels are to be sought by hashing function (fast) or by
+ /// sequential search (slow)
+ bool mUseHashedSearch;
+
+ /// if Find matches the label, this var stores the pattern that matched the
+ /// query
+ std::string mMatchedPattern;
+
+ /// if Find matches the label, this var stores the the masked characters.
+ /// The mask is given by '%' symbols
+ std::string mMatchedPatternMask;
+
+ /**
+ * @brief Returns the directory depth of path
+ */
+ size_t
+ DirDepth(const std::string & path);
+
+
+ };
+
+
+ /**
+ * @brief MLF output buffer definition
+ */
+ template<
+ typename _CharT,
+ typename _Traits = std::char_traits<_CharT>,
+ typename _CharTA = std::allocator<_CharT>,
+ typename ByteT = char,
+ typename ByteAT = std::allocator<ByteT>
+ >
+ class BasicOMlfStreamBuf
+ : public std::basic_streambuf<_CharT, _Traits>
+ {
+ public:
+ // necessary typedefs ....................................................
+ typedef BasicOMlfStreamBuf<_CharT,_Traits,_CharTA,ByteT,ByteAT>
+ this_type;
+ typedef std::basic_ostream<_CharT, _Traits>&
+ OStreamReference;
+ typedef std::basic_streambuf<_CharT, _Traits>
+ StreamBufType;
+ typedef _CharTA char_allocator_type;
+ typedef _CharT char_type;
+ typedef typename _Traits::int_type int_type;
+ typedef typename _Traits::pos_type pos_type;
+ typedef ByteT byte_type;
+ typedef ByteAT byte_allocator_type;
+ typedef byte_type* byte_buffer_type;
+ typedef std::vector<byte_type, byte_allocator_type > byte_vector_type;
+ typedef std::vector<char_type, char_allocator_type > char_vector_type;
+
+
+ BasicOMlfStreamBuf(OStreamReference rOStream, size_t bufferSize);
+
+ ~BasicOMlfStreamBuf();
+
+ // virtual functions inherited from basic_streambuf.......................
+ int
+ sync();
+
+ /**
+ * @brief Write character in the case of overflow
+ * @param c Character to be written.
+ * @return A value different than EOF (or traits::eof() for other traits)
+ * signals success. If the function fails, either EOF
+ * (or traits::eof() for other traits) is returned or an
+ * exception is thrown.
+ */
+ int_type
+ overflow(int_type c = _Traits::eof());
+
+
+ // MLF specific functions ................................................
+ /**
+ * @brief Creates a new MLF block
+ * @param rFileName filename to be opened
+ */
+ this_type*
+ Open(const std::string& rFileName);
+
+ /**
+ * @brief Closes MLF block
+ */
+ void
+ Close();
+
+ /**
+ * @brief Returns true if the MLF is now in open state
+ */
+ bool
+ IsOpen() const
+ { return mIsOpen; }
+
+ LabelContainer&
+ rLabels()
+ { return mLabels; }
+
+ private:
+ bool mIsOpen;
+ char_type mLastChar;
+ OStreamReference mOStream;
+ LabelContainer mLabels;
+ }; // class BasicOMlfStreamBuf
+
+
+
+ /**
+ * @brief MLF input buffer definition
+ */
+ template<
+ typename _CharT,
+ typename _Traits = std::char_traits<_CharT>,
+ typename _CharTA = std::allocator<_CharT>,
+ typename ByteT = char,
+ typename ByteAT = std::allocator<ByteT>
+ >
+ class BasicIMlfStreamBuf
+ : public std::basic_streambuf<_CharT, _Traits>
+ {
+ private:
+ // internal automaton states
+ static const int IN_HEADER_STATE = 0;
+ static const int OUT_OF_BODY_STATE = 1;
+ static const int IN_TITLE_STATE = 2;
+ static const int IN_BODY_STATE = 3;
+
+
+ public: // necessary typedefs ..............................................
+ typedef BasicIMlfStreamBuf<_CharT,_Traits,_CharTA,ByteT,ByteAT>
+ this_type;
+ typedef std::basic_istream<_CharT, _Traits>& IStreamReference;
+ typedef std::basic_streambuf<_CharT, _Traits>
+ StreamBufType;
+ typedef _CharTA char_allocator_type;
+ typedef _CharT char_type;
+ typedef typename _Traits::int_type int_type;
+ typedef typename _Traits::pos_type pos_type;
+ typedef ByteT byte_type;
+ typedef ByteAT byte_allocator_type;
+ typedef byte_type* byte_buffer_type;
+ typedef std::vector<byte_type, byte_allocator_type > byte_vector_type;
+ typedef std::vector<char_type, char_allocator_type > char_vector_type;
+
+
+ public:
+ // constructors and destructors ..........................................
+ BasicIMlfStreamBuf(IStreamReference rIStream, size_t bufferSize = 1024);
+
+ ~BasicIMlfStreamBuf();
+
+ // virtual functions inherited from basic_streambuf.......................
+ /**
+ * @brief Get character in the case of underflow
+ *
+ * @return The new character available at the get pointer position, if
+ * any. Otherwise, traits::eof() is returned.
+ */
+ int_type
+ underflow();
+
+
+ // MLF specific functions ................................................
+ /**
+ * @brief Creates a new MLF block
+ * @param rFileName filename to be opened
+ */
+ this_type*
+ Open(const std::string& rFileName);
+
+ /**
+ * @brief Closes MLF block
+ */
+ this_type*
+ Close();
+
+ /**
+ * @brief Returns true if the MLF is now in open state
+ */
+ bool
+ IsOpen() const
+ { return mIsOpen; }
+
+ /**
+ * @brief Parses the stream (if possible) and stores positions to the
+ * label titles
+ */
+ void
+ Index();
+
+ bool
+ IsHashed() const
+ { return mIsHashed; }
+
+ /**
+ * @brief Jumps to next label definition
+ * @param rName std::string to be filled with the label name
+ * @return true on success
+ *
+ * The procedure automatically tries to hash the labels.
+ */
+ bool
+ JumpToNextDefinition(std::string& rName);
+
+ /**
+ * @brief Returns reference to the base stream
+ * @return reference to the stream
+ *
+ */
+ IStreamReference
+ GetBaseStream()
+ {
+ return mIStream;
+ }
+
+ private: // auxillary functions ............................................
+ /**
+ * @brief Fills the line buffer with next line and updates the internal
+ * state of the finite automaton
+ */
+ void
+ FillLineBuffer();
+
+
+ private: // atributes ......................................................
+ // some flags
+ bool mIsOpen;
+ bool mIsHashed;
+ bool mIsEof;
+
+ /// internal state of the finite automaton
+ int mState;
+
+ IStreamReference mIStream;
+ LabelContainer mLabels;
+
+ std::vector<char_type> mLineBuffer;
+ }; // class BasicIMlfStreamBuf
+
+
+
+
+ /**
+ * @brief Base class with type-independent members for the Mlf Output
+ * Stram class
+ *
+ * This is a derivative of the basic_ios class. We derive it as we need
+ * to override some member functions
+ */
+ template<
+ typename Elem,
+ typename Tr = std::char_traits<Elem>,
+ typename ElemA = std::allocator<Elem>,
+ typename ByteT = char,
+ typename ByteAT = std::allocator<ByteT>
+ >
+ class BasicOMlfStreamBase
+ : virtual public std::basic_ios<Elem,Tr>
+ {
+ public:
+ typedef std::basic_ostream<Elem, Tr>& OStreamReference;
+ typedef BasicOMlfStreamBuf <
+ Elem,Tr,ElemA,ByteT,ByteAT> OMlfStreamBufType;
+
+ /**
+ * @brief constructor
+ *
+ * @param rOStream user defined output stream
+ */
+ BasicOMlfStreamBase(OStreamReference rOStream,
+ size_t bufferSize)
+ : mBuf(rOStream, bufferSize)
+ { this->init(&mBuf); };
+
+ /**
+ * @brief Returns a pointer to the buffer object for this stream
+ */
+ OMlfStreamBufType*
+ rdbuf()
+ { return &mBuf; };
+
+ private:
+ OMlfStreamBufType mBuf;
+ };
+
+
+ template<
+ typename Elem,
+ typename Tr = std::char_traits<Elem>,
+ typename ElemA = std::allocator<Elem>,
+ typename ByteT = char,
+ typename ByteAT = std::allocator<ByteT>
+ >
+ class BasicIMlfStreamBase
+ : virtual public std::basic_ios<Elem,Tr>
+ {
+ public:
+ typedef std::basic_istream<Elem, Tr>& IStreamReference;
+ typedef BasicIMlfStreamBuf <
+ Elem,Tr,ElemA,ByteT,ByteAT> IMlfStreamBufType;
+
+ BasicIMlfStreamBase( IStreamReference rIStream,
+ size_t bufferSize)
+ : mBuf(rIStream, bufferSize)
+ { this->init(&mBuf ); };
+
+ IMlfStreamBufType*
+ rdbuf()
+ { return &mBuf; };
+
+ IStreamReference
+ GetBaseStream()
+ { return mBuf.GetBaseStream(); }
+
+ private:
+ IMlfStreamBufType mBuf;
+ };
+
+
+ template<
+ typename Elem,
+ typename Tr = std::char_traits<Elem>,
+ typename ElemA = std::allocator<Elem>,
+ typename ByteT = char,
+ typename ByteAT = std::allocator<ByteT>
+ >
+ class BasicOMlfStream
+ : public BasicOMlfStreamBase<Elem,Tr,ElemA,ByteT,ByteAT>,
+ public std::basic_ostream<Elem,Tr>
+ {
+ public:
+ typedef BasicOMlfStreamBase< Elem,Tr,ElemA,ByteT,ByteAT>
+ BasicOMlfStreamBaseType;
+ typedef std::basic_ostream<Elem,Tr> OStreamType;
+ typedef OStreamType& OStreamReference;
+
+ BasicOMlfStream(OStreamReference rOStream, size_t bufferSize = 32)
+ : BasicOMlfStreamBaseType(rOStream, bufferSize),
+ OStreamType(BasicOMlfStreamBaseType::rdbuf())
+ { }
+
+ /**
+ * @brief Destructor closes the stream
+ */
+ ~BasicOMlfStream()
+ { }
+
+
+ /**
+ * @brief Creates a new MLF block
+ * @param rFileName filename to be opened
+ */
+ void
+ Open(const std::string& rFileName)
+ { BasicOMlfStreamBaseType::rdbuf()->Open(rFileName); }
+
+ /**
+ * @brief Closes MLF block
+ */
+ void
+ Close()
+ { BasicOMlfStreamBaseType::rdbuf()->Close(); }
+
+ /**
+ * @brief Returns true if the MLF is now in open state
+ */
+ bool
+ IsOpen() const
+ { return BasicOMlfStreamBaseType::rdbuf()->IsOpen(); }
+
+ /**
+ * @brief Accessor to the label container
+ * @return Reference to the label container
+ */
+ LabelContainer&
+ rLabels()
+ { return BasicOMlfStreamBaseType::rdbuf()->rLabels(); }
+ };
+
+
+
+ template<
+ typename Elem,
+ typename Tr = std::char_traits<Elem>,
+ typename ElemA = std::allocator<Elem>,
+ typename ByteT = char,
+ typename ByteAT = std::allocator<ByteT>
+ >
+ class BasicIMlfStream
+ : public BasicIMlfStreamBase<Elem,Tr,ElemA,ByteT,ByteAT>,
+ public std::basic_istream<Elem,Tr>
+ {
+ public:
+ typedef BasicIMlfStreamBase <Elem,Tr,ElemA,ByteT,ByteAT>
+ BasicIMlfStreamBaseType;
+ typedef std::basic_istream<Elem,Tr> IStreamType;
+ typedef IStreamType& IStreamReference;
+ typedef unsigned char byte_type;
+
+ BasicIMlfStream(IStreamReference rIStream, size_t bufferSize = 32)
+ : BasicIMlfStreamBaseType(rIStream, bufferSize),
+ IStreamType(BasicIMlfStreamBaseType::rdbuf())
+ {};
+
+
+ /**
+ * @brief Creates a new MLF block
+ * @param rFileName filename to be opened
+ */
+ void
+ Open(const std::string& rFileName)
+ {
+ std::basic_streambuf<Elem, Tr>* p_buf;
+
+ p_buf = BasicIMlfStreamBaseType::rdbuf()->Open(rFileName);
+
+ if (NULL == p_buf) {
+ IStreamType::clear(IStreamType::rdstate() | std::ios::failbit);
+ }
+ else {
+ IStreamType::clear();
+ }
+ }
+
+ /**
+ * @brief Closes MLF block.
+ * In fact, nothing is done
+ */
+ void
+ Close()
+ {
+ if (NULL == BasicIMlfStreamBaseType::rdbuf()->Close()) {
+ IStreamType::clear(IStreamType::rdstate() | std::ios::failbit);
+ }
+ }
+
+ void
+ Index()
+ { BasicIMlfStreamBaseType::rdbuf()->Index(); }
+
+ bool
+ IsHashed() const
+ { return BasicIMlfStreamBaseType::rdbuf()->IsHashed(); }
+
+ };
+
+
+
+ // MAIN TYPEDEFS..............................................................
+ typedef BasicOMlfStream<char> OMlfStream;
+ typedef BasicOMlfStream<wchar_t> WOMlfStream;
+ typedef BasicIMlfStream<char> IMlfStream;
+ typedef BasicIMlfStream<wchar_t> WIMlfStream;
+
+
+#ifdef PATH_MAX
+ const size_t MAX_LABEL_DEPTH = PATH_MAX;
+#else
+ const size_t MAX_LABEL_DEPTH = 1024;
+#endif
+
+
+} // namespace TNet
+
+#include "MlfStream.tcc"
+
+#endif