/** @file MlfStream.h
* This is an TNet C++ Library header.
*
* The naming convention in this file coppies the std::* naming as well as STK
*/
#ifndef STK_MlfStream_h
#define STK_MlfStream_h
#include <iostream>
#include <vector>
#include <map>
#include <list>
#include <set>
namespace TNet
{
class LabelRecord;
class LabelContainer;
/// this container stores the lables in linear order as they came
/// i.e. they cannot be hashed
typedef std::list< std::pair<std::string,LabelRecord> *> LabelListType;
/// type of the container used to store the labels
typedef std::map<std::string, LabelRecord> LabelHashType;
/**
* @brief Describes type of MLF definition
*
* See HTK book for MLF structure. Terms used in TNet are
* compatible with those in HTK book.
*/
enum MlfDefType
{
MLF_DEF_UNKNOWN = 0, ///< unknown definition
MLF_DEF_IMMEDIATE_TRANSCRIPTION, ///< immediate transcription
MLF_DEF_SUB_DIR_DEF ///< subdirectory definition
};
/** **************************************************************************
* @brief Holds association between label and stream
*/
class LabelRecord
{
public:
LabelRecord() : miLabelListLimit(NULL)
{ }
~LabelRecord()
{ }
/// definition type
MlfDefType mDefType;
/// position of the label in the stream
std::streampos mStreamPos;
/**
* @brief points to the current end of the LabelList
*
* The reason for storing this value is to know when we inserted
* a label into the hash. It is possible, that the hash label came
* after list label, in which case the list label is prefered
*/
LabelListType::iterator miLabelListLimit;
};
/**
* @brief Provides an interface to label hierarchy and searching
*
* This class stores label files in a map structure. When a wildcard
* convence is used, the class stores the labels in separate maps according
* to level of wildcard abstraction. By level we mean the directory structure
* depth.
*/
class LabelContainer
{
public:
/// The constructor
LabelContainer() : mUseHashedSearch(true) {}
/// The destructor
~LabelContainer();
/**
* @brief Inserts new label to the hash structure
*/
void
Insert(
const std::string & rLabel,
std::streampos Pos);
/**
* @brief Looks for a record in the hash
*/
bool
FindInHash(
const std::string& rLabel,
LabelRecord& rLS);
/**
* @brief Looks for a record in the list
* @param rLabel Label to look for
* @param rLS Structure to fill with found data
* @param limitSearch If true @p rLS's @c mLabelListLimit gives the limiting position in the list
*/
bool
FindInList(
const std::string& rLabel,
LabelRecord& rLS,
bool limitSearch = false);
/**
* @brief Looks for a record
*/
bool
Find(
const std::string & rLabel,
LabelRecord & rLS);
/**
* @brief Returns the matched pattern
*/
const std::string &
MatchedPattern() const
{
return mMatchedPattern;
}
/**
* @brief Returns the matched pattern mask (%%%)
*/
const std::string &
MatchedPatternMask() const
{
return mMatchedPatternMask;
}
/**
* @brief Writes contents to stream (text)
* @param rOStream stream to write to
*/
void
Write(std::ostream& rOStream);
private:
/// type used for directory depth notation
typedef size_t DepthType;
/// this set stores depths of * labels observed at insertion
std::set<DepthType> mDepths;
/// stores the labels
LabelHashType mLabelMap;
LabelListType mLabelList;
/// true if labels are to be sought by hashing function (fast) or by
/// sequential search (slow)
bool mUseHashedSearch;
/// if Find matches the label, this var stores the pattern that matched the
/// query
std::string mMatchedPattern;
/// if Find matches the label, this var stores the the masked characters.
/// The mask is given by '%' symbols
std::string mMatchedPatternMask;
/**
* @brief Returns the directory depth of path
*/
size_t
DirDepth(const std::string & path);
};
/**
* @brief MLF output buffer definition
*/
template<
typename _CharT,
typename _Traits = std::char_traits<_CharT>,
typename _CharTA = std::allocator<_CharT>,
typename ByteT = char,
typename ByteAT = std::allocator<ByteT>
>
class BasicOMlfStreamBuf
: public std::basic_streambuf<_CharT, _Traits>
{
public:
// necessary typedefs ....................................................
typedef BasicOMlfStreamBuf<_CharT,_Traits,_CharTA,ByteT,ByteAT>
this_type;
typedef std::basic_ostream<_CharT, _Traits>&
OStreamReference;
typedef std::basic_streambuf<_CharT, _Traits>
StreamBufType;
typedef _CharTA char_allocator_type;
typedef _CharT char_type;
typedef typename _Traits::int_type int_type;
typedef typename _Traits::pos_type pos_type;
typedef ByteT byte_type;
typedef ByteAT byte_allocator_type;
typedef byte_type* byte_buffer_type;
typedef std::vector<byte_type, byte_allocator_type > byte_vector_type;
typedef std::vector<char_type, char_allocator_type > char_vector_type;
BasicOMlfStreamBuf(OStreamReference rOStream, size_t bufferSize);
~BasicOMlfStreamBuf();
// virtual functions inherited from basic_streambuf.......................
int
sync();
/**
* @brief Write character in the case of overflow
* @param c Character to be written.
* @return A value different than EOF (or traits::eof() for other traits)
* signals success. If the function fails, either EOF
* (or traits::eof() for other traits) is returned or an
* exception is thrown.
*/
int_type
overflow(int_type c = _Traits::eof());
// MLF specific functions ................................................
/**
* @brief Creates a new MLF block
* @param rFileName filename to be opened
*/
this_type*
Open(const std::string& rFileName);
/**
* @brief Closes MLF block
*/
void
Close();
/**
* @brief Returns true if the MLF is now in open state
*/
bool
IsOpen() const
{ return mIsOpen; }
LabelContainer&
rLabels()
{ return mLabels; }
private:
bool mIsOpen;
char_type mLastChar;
OStreamReference mOStream;
LabelContainer mLabels;
}; // class BasicOMlfStreamBuf
/**
* @brief MLF input buffer definition
*/
template<
typename _CharT,
typename _Traits = std::char_traits<_CharT>,
typename _CharTA = std::allocator<_CharT>,
typename ByteT = char,
typename ByteAT = std::allocator<ByteT>
>
class BasicIMlfStreamBuf
: public std::basic_streambuf<_CharT, _Traits>
{
private:
// internal automaton states
static const int IN_HEADER_STATE = 0;
static const int OUT_OF_BODY_STATE = 1;
static const int IN_TITLE_STATE = 2;
static const int IN_BODY_STATE = 3;
public: // necessary typedefs ..............................................
typedef BasicIMlfStreamBuf<_CharT,_Traits,_CharTA,ByteT,ByteAT>
this_type;
typedef std::basic_istream<_CharT, _Traits>& IStreamReference;
typedef std::basic_streambuf<_CharT, _Traits>
StreamBufType;
typedef _CharTA char_allocator_type;
typedef _CharT char_type;
typedef typename _Traits::int_type int_type;
typedef typename _Traits::pos_type pos_type;
typedef ByteT byte_type;
typedef ByteAT byte_allocator_type;
typedef byte_type* byte_buffer_type;
typedef std::vector<byte_type, byte_allocator_type > byte_vector_type;
typedef std::vector<char_type, char_allocator_type > char_vector_type;
public:
// constructors and destructors ..........................................
BasicIMlfStreamBuf(IStreamReference rIStream, size_t bufferSize = 1024);
~BasicIMlfStreamBuf();
// virtual functions inherited from basic_streambuf.......................
/**
* @brief Get character in the case of underflow
*
* @return The new character available at the get pointer position, if
* any. Otherwise, traits::eof() is returned.
*/
int_type
underflow();
// MLF specific functions ................................................
/**
* @brief Creates a new MLF block
* @param rFileName filename to be opened
*/
this_type*
Open(const std::string& rFileName);
/**
* @brief Closes MLF block
*/
this_type*
Close();
/**
* @brief Returns true if the MLF is now in open state
*/
bool
IsOpen() const
{ return mIsOpen; }
/**
* @brief Parses the stream (if possible) and stores positions to the
* label titles
*/
void
Index();
bool
IsHashed() const
{ return mIsHashed; }
/**
* @brief Jumps to next label definition
* @param rName std::string to be filled with the label name
* @return true on success
*
* The procedure automatically tries to hash the labels.
*/
bool
JumpToNextDefinition(std::string& rName);
/**
* @brief Returns reference to the base stream
* @return reference to the stream
*
*/
IStreamReference
GetBaseStream()
{
return mIStream;
}
private: // auxillary functions ............................................
/**
* @brief Fills the line buffer with next line and updates the internal
* state of the finite automaton
*/
void
FillLineBuffer();
private: // atributes ......................................................
// some flags
bool mIsOpen;
bool mIsHashed;
bool mIsEof;
/// internal state of the finite automaton
int mState;
IStreamReference mIStream;
LabelContainer mLabels;
std::vector<char_type> mLineBuffer;
}; // class BasicIMlfStreamBuf
/**
* @brief Base class with type-independent members for the Mlf Output
* Stram class
*
* This is a derivative of the basic_ios class. We derive it as we need
* to override some member functions
*/
template<
typename Elem,
typename Tr = std::char_traits<Elem>,
typename ElemA = std::allocator<Elem>,
typename ByteT = char,
typename ByteAT = std::allocator<ByteT>
>
class BasicOMlfStreamBase
: virtual public std::basic_ios<Elem,Tr>
{
public:
typedef std::basic_ostream<Elem, Tr>& OStreamReference;
typedef BasicOMlfStreamBuf <
Elem,Tr,ElemA,ByteT,ByteAT> OMlfStreamBufType;
/**
* @brief constructor
*
* @param rOStream user defined output stream
*/
BasicOMlfStreamBase(OStreamReference rOStream,
size_t bufferSize)
: mBuf(rOStream, bufferSize)
{ this->init(&mBuf); };
/**
* @brief Returns a pointer to the buffer object for this stream
*/
OMlfStreamBufType*
rdbuf()
{ return &mBuf; };
private:
OMlfStreamBufType mBuf;
};
template<
typename Elem,
typename Tr = std::char_traits<Elem>,
typename ElemA = std::allocator<Elem>,
typename ByteT = char,
typename ByteAT = std::allocator<ByteT>
>
class BasicIMlfStreamBase
: virtual public std::basic_ios<Elem,Tr>
{
public:
typedef std::basic_istream<Elem, Tr>& IStreamReference;
typedef BasicIMlfStreamBuf <
Elem,Tr,ElemA,ByteT,ByteAT> IMlfStreamBufType;
BasicIMlfStreamBase( IStreamReference rIStream,
size_t bufferSize)
: mBuf(rIStream, bufferSize)
{ this->init(&mBuf ); };
IMlfStreamBufType*
rdbuf()
{ return &mBuf; };
IStreamReference
GetBaseStream()
{ return mBuf.GetBaseStream(); }
private:
IMlfStreamBufType mBuf;
};
template<
typename Elem,
typename Tr = std::char_traits<Elem>,
typename ElemA = std::allocator<Elem>,
typename ByteT = char,
typename ByteAT = std::allocator<ByteT>
>
class BasicOMlfStream
: public BasicOMlfStreamBase<Elem,Tr,ElemA,ByteT,ByteAT>,
public std::basic_ostream<Elem,Tr>
{
public:
typedef BasicOMlfStreamBase< Elem,Tr,ElemA,ByteT,ByteAT>
BasicOMlfStreamBaseType;
typedef std::basic_ostream<Elem,Tr> OStreamType;
typedef OStreamType& OStreamReference;
BasicOMlfStream(OStreamReference rOStream, size_t bufferSize = 32)
: BasicOMlfStreamBaseType(rOStream, bufferSize),
OStreamType(BasicOMlfStreamBaseType::rdbuf())
{ }
/**
* @brief Destructor closes the stream
*/
~BasicOMlfStream()
{ }
/**
* @brief Creates a new MLF block
* @param rFileName filename to be opened
*/
void
Open(const std::string& rFileName)
{ BasicOMlfStreamBaseType::rdbuf()->Open(rFileName); }
/**
* @brief Closes MLF block
*/
void
Close()
{ BasicOMlfStreamBaseType::rdbuf()->Close(); }
/**
* @brief Returns true if the MLF is now in open state
*/
bool
IsOpen() const
{ return BasicOMlfStreamBaseType::rdbuf()->IsOpen(); }
/**
* @brief Accessor to the label container
* @return Reference to the label container
*/
LabelContainer&
rLabels()
{ return BasicOMlfStreamBaseType::rdbuf()->rLabels(); }
};
template<
typename Elem,
typename Tr = std::char_traits<Elem>,
typename ElemA = std::allocator<Elem>,
typename ByteT = char,
typename ByteAT = std::allocator<ByteT>
>
class BasicIMlfStream
: public BasicIMlfStreamBase<Elem,Tr,ElemA,ByteT,ByteAT>,
public std::basic_istream<Elem,Tr>
{
public:
typedef BasicIMlfStreamBase <Elem,Tr,ElemA,ByteT,ByteAT>
BasicIMlfStreamBaseType;
typedef std::basic_istream<Elem,Tr> IStreamType;
typedef IStreamType& IStreamReference;
typedef unsigned char byte_type;
BasicIMlfStream(IStreamReference rIStream, size_t bufferSize = 32)
: BasicIMlfStreamBaseType(rIStream, bufferSize),
IStreamType(BasicIMlfStreamBaseType::rdbuf())
{};
/**
* @brief Creates a new MLF block
* @param rFileName filename to be opened
*/
void
Open(const std::string& rFileName)
{
std::basic_streambuf<Elem, Tr>* p_buf;
p_buf = BasicIMlfStreamBaseType::rdbuf()->Open(rFileName);
if (NULL == p_buf) {
IStreamType::clear(IStreamType::rdstate() | std::ios::failbit);
}
else {
IStreamType::clear();
}
}
/**
* @brief Closes MLF block.
* In fact, nothing is done
*/
void
Close()
{
if (NULL == BasicIMlfStreamBaseType::rdbuf()->Close()) {
IStreamType::clear(IStreamType::rdstate() | std::ios::failbit);
}
}
void
Index()
{ BasicIMlfStreamBaseType::rdbuf()->Index(); }
bool
IsHashed() const
{ return BasicIMlfStreamBaseType::rdbuf()->IsHashed(); }
};
// MAIN TYPEDEFS..............................................................
typedef BasicOMlfStream<char> OMlfStream;
typedef BasicOMlfStream<wchar_t> WOMlfStream;
typedef BasicIMlfStream<char> IMlfStream;
typedef BasicIMlfStream<wchar_t> WIMlfStream;
#ifdef PATH_MAX
const size_t MAX_LABEL_DEPTH = PATH_MAX;
#else
const size_t MAX_LABEL_DEPTH = 1024;
#endif
} // namespace TNet
#include "MlfStream.tcc"
#endif