/** @file MlfStream.h
* This is an TNet C++ Library header.
*
* The naming convention in this file coppies the std::* naming as well as STK
*/
#ifndef STK_MlfStream_h
#define STK_MlfStream_h
#include <iostream>
#include <vector>
#include <map>
#include <list>
#include <set>
namespace TNet
{
class LabelRecord;
class LabelContainer;
/// this container stores the lables in linear order as they came
/// i.e. they cannot be hashed
typedef std::list< std::pair<std::string,LabelRecord> *> LabelListType;
/// type of the container used to store the labels
typedef std::map<std::string, LabelRecord> LabelHashType;
/**
* @brief Describes type of MLF definition
*
* See HTK book for MLF structure. Terms used in TNet are
* compatible with those in HTK book.
*/
enum MlfDefType
{
MLF_DEF_UNKNOWN = 0, ///< unknown definition
MLF_DEF_IMMEDIATE_TRANSCRIPTION, ///< immediate transcription
MLF_DEF_SUB_DIR_DEF ///< subdirectory definition
};
/** **************************************************************************
* @brief Holds association between label and stream
*/
class LabelRecord
{
public:
LabelRecord() : miLabelListLimit(NULL)
{ }
~LabelRecord()
{ }
/// definition type
MlfDefType mDefType;
/// position of the label in the stream
std::streampos mStreamPos;
/**
* @brief points to the current end of the LabelList
*
* The reason for storing this value is to know when we inserted
* a label into the hash. It is possible, that the hash label came
* after list label, in which case the list label is prefered
*/
LabelListType::iterator miLabelListLimit;
};
/**
* @brief Provides an interface to label hierarchy and searching
*
* This class stores label files in a map structure. When a wildcard
* convence is used, the class stores the labels in separate maps according
* to level of wildcard abstraction. By level we mean the directory structure
* depth.
*/
class LabelContainer
{
public:
/// The constructor
LabelContainer() : mUseHashedSearch(true) {}
/// The destructor
~LabelContainer();
/**
* @brief Inserts new label to the hash structure
*/
void
Insert(
const std::string & rLabel,
std::streampos Pos);
/**
* @brief Looks for a record in the hash
*/
bool
FindInHash(
const std::string& rLabel,
LabelRecord& rLS);
/**
* @brief Looks for a record in the list
* @param rLabel Label to look for
* @param rLS Structure to fill with found data
* @param limitSearch If true @p rLS's @c mLabelListLimit gives the limiting position in the list
*/
bool
FindInList(
const std::string& rLabel,
LabelRecord& rLS,
bool limitSearch = false);
/**
* @brief Looks for a record
*/
bool
Find(
const std::string & rLabel,
LabelRecord & rLS);
/**
* @brief Returns the matched pattern
*/
const std::string &
MatchedPattern() const
{
return mMatchedPattern;
}
/**
* @brief Returns the matched pattern mask (%%%)
*/
const std::string &
MatchedPatternMask() const
{
return mMatchedPatternMask;
}
/**
* @brief Writes contents to stream (text)
* @param rOStream stream to write to
*/
void
Write(std::ostream& rOStream);
private:
/// type used for directory depth notation
typedef size_t DepthType;
/// this set stores depths of * labels observed at insertion
std::set<DepthType> mDepths;
/// stores the labels
LabelHashType mLabelMap;
LabelListType mLabelList;
/// true if labels are to be sought by hashing function (fast) or by
/// sequential search (slow)
bool mUseHashedSearch;
/// if Find matches the label, this var stores the pattern that matched the
/// query
std::string mMatchedPattern;
/// if Find matches the label, this var stores the the masked characters.
/// The mask is given by '%' symbols
std::string mMatchedPatternMask;
/**
* @brief Returns the directory depth of path
*/
size_t
DirDepth(const std::string & path);
};
/**
* @brief MLF output buffer definition
*/
template<
typename _CharT,
typename _Traits = std::char_traits<_CharT>,
typename _CharTA = std::allocator<_CharT>,
typename ByteT = char,
typename ByteAT = std::allocator<ByteT>
>
class BasicOMlfStreamBuf
: public std::basic_streambuf<_CharT, _Traits>
{
public:
// necessary typedefs ....................................................
typedef BasicOMlfStreamBuf<_CharT,_Traits,_CharTA,ByteT,ByteAT>
this_type;
typedef std::basic_ostream<_CharT, _Traits>&
OStreamReference;
typedef std::basic_streambuf<_CharT, _Traits>
StreamBufType;
typedef _CharTA char_allocator_type;
typedef _CharT char_type;
typedef typename _Traits::int_type int_type;
typedef typename _Traits::pos_type pos_type;
typedef ByteT byte_type;
typedef ByteAT byte_allocator_type;
typedef byte_type* byte_buffer_type;
typedef std::vector<byte_type, byte_allocator_type > byte_vector_type;
typedef std::vector<char_type, char_allocator_type > char_vector_type;
BasicOMlfStreamBuf(OStreamReference rOStream, size_t bufferSize);
~BasicOMlfStreamBuf();
// virtual functions inherited from basic_streambuf.......................
int
sync();
/**
* @brief Write character in the case of overflow
* @param c Character to be written.
* @return A value different than EOF (or traits::eof() for other traits)
* signals success. If the function fails, either EOF
* (or traits::eof() for other traits) is returned or an
* exception is thrown.
*/
int_type
overflow(int_type c = _Traits::eof());
// MLF specific functions ................................................
/**
* @brief Creates a new MLF block
* @param rFileName filename to be opened
*/
this_type*
Open(const std::string& rFileName);
/**
* @brief Closes MLF block
*/
void
Close();
/**
* @brief Returns true if the MLF is now in open state
*/
bool
IsOpen() const
{ return mIsOpen; }
LabelContainer&
rLabels()
{ return mLabels; }
private:
bool mIsOpen;
char_type mLastChar;
OStreamReference mOStream;
LabelContainer mLabels;
}; // class BasicOMlfStreamBuf
/**
* @brief MLF input buffer definition
*/
template<
typename _CharT,
typename _Traits = std::char_traits<_CharT>,
typename _CharTA = std::allocator<_CharT>,
typename ByteT = char,
typename ByteAT = std::allocator<ByteT>
>
class BasicIMlfStreamBuf
: public std::basic_streambuf<_CharT, _Traits>
{
private:
// internal automaton states
static const int IN_HEADER_STATE = 0;
static const int OUT_OF_BODY_STATE = 1;
static const int IN_TITLE_STATE = 2;
static const int IN_BODY_STATE = 3;
public: // necessary typedefs ..............................................
typedef BasicIMlfStreamBuf<_CharT,_Traits,_CharTA,ByteT,ByteAT>
this_type;
typedef std::basic_istream<_CharT, _Traits>& IStreamReference;
typedef std::basic_streambuf<_CharT, _Traits>
StreamBufType;
typedef _CharTA char_allocator_type;
typedef _CharT char_type;
typedef typename _Traits::int_type int_type;
typedef typename _Traits::pos_type pos_type;
typedef ByteT byte_type;
typedef ByteAT byte_allocator_type;
typedef byte_type* byte_buffer_type;
typedef std::vector<byte_type, byte_allocator_type > byte_vector_type;
typedef std::vector<char_type, char_allocator_type > char_vector_type;
public:
// constructors and destructors ..........................................
BasicIMlfStreamBuf(IStreamReference rIStream,