#include "Labels.h"
#include "Timer.h"
#include "Error.h"
#include <cstdio>
#include <sstream>
namespace TNet {
////////////////////////////////////////////////////////////////////////
// Class LabelRepository::
void
LabelRepository::
Init(const char* pLabelMlfFile, const char* pOutputLabelMapFile, const char* pLabelDir, const char* pLabelExt) {
InitMap(pLabelMlfFile, pOutputLabelMapFile, pLabelDir, pLabelExt);
}
void
LabelRepository::
InitExt(const char* pLabelMlfFile, const char* fmt, const char *arg, const char* pLabelDir, const char* pLabelExt) {
if (strcmp(fmt, "map") == 0)
{
InitMap(pLabelMlfFile, arg, pLabelDir, pLabelExt);
mlf_fmt = MAP;
}
else if (strcmp(fmt, "raw") == 0)
{
InitRaw(pLabelMlfFile, arg, pLabelDir, pLabelExt);
mlf_fmt = RAW;
}
}
void
LabelRepository::
InitMap(const char* pLabelMlfFile, const char* pOutputLabelMapFile, const char* pLabelDir, const char* pLabelExt)
{
assert(NULL != pLabelMlfFile);
assert(NULL != pOutputLabelMapFile);
// initialize the label streams
delete mpLabelStream; //if NULL, does nothing
delete _mpLabelStream;
_mpLabelStream = new std::ifstream(pLabelMlfFile);
mpLabelStream = new IMlfStream(*_mpLabelStream);
// Label stream is initialized, just test it
if(!mpLabelStream->good())
Error(std::string("Cannot open Label MLF file: ")+pLabelMlfFile);
// Index the labels (good for randomized file lists)
Timer tim; tim.Start();
mpLabelStream->Index();
tim.End(); mIndexTime += tim.Val();
// Read the state-label to state-id map
ReadOutputLabelMap(pOutputLabelMapFile);
// Store the label dir/ext
mpLabelDir = pLabelDir;
mpLabelExt = pLabelExt;
}
void
LabelRepository::
InitRaw(const char* pLabelMlfFile, const char *arg, const char* pLabelDir, const char* pLabelExt)
{
assert(NULL != pLabelMlfFile);
std::istringstream iss(arg);
size_t dim;
iss >> dim;
if (iss.fail() || dim <= 0)
PError("[lab] malformed dimension specification");
raw_dim = dim;
// initialize the label streams
delete mpLabelStream; //if NULL, does nothing
delete _mpLabelStream;
_mpLabelStream = new std::ifstream(pLabelMlfFile);
mpLabelStream = new IMlfStream(*_mpLabelStream);
// Label stream is initialized, just test it
if(!mpLabelStream->good())
Error(std::string("Cannot open Label MLF file: ")+pLabelMlfFile);
// Index the labels (good for randomized file lists)
Timer tim; tim.Start();
mpLabelStream->Index();
tim.End(); mIndexTime += tim.Val();
// Read the state-label to state-id map
//ReadOutputLabelMap(pOutputLabelMapFile);
// Store the label dir/ext
mpLabelDir = pLabelDir;
mpLabelExt = pLabelExt;
}
void
LabelRepository::
GenDesiredMatrix(BfMatrix& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical, bool has_vad)
{
//timer
Timer tim; tim.Start();
//Get the MLF stream reference...
IMlfStream& mLabelStream = *mpLabelStream;
//Build the file name of the label
MakeHtkFileName(mpLabelFile, pFeatureLogical, mpLabelDir, mpLabelExt);
//Find block in MLF file
mLabelStream.Open(mpLabelFile);
if(!mLabelStream.good()) {
Error(std::string("Cannot open label MLF record: ") + mpLabelFile);
}
//resize the matrix
if(nFrames < 1) {
KALDI_ERR << "Number of frames:" << nFrames << " is lower than 1!!!\n"
<< pFeatureLogical;
}
int label_map_size = mLabelMap.size();
rDesired.Init(nFrames, label_map_size + (has_vad ? 2 : 0), true); //true: Zero()
//aux variables
std::string line, state;
unsigned long long beg, end;
size_t state_index;
size_t trunc_frames = 0;
TagToIdMap::iterator it;
int vad_state;
//parse the label file
while(!mLabelStream.eof()) {
std::getline(mLabelStream, line);
if(line == "") continue; //skip newlines/comments from MLF
if(line[0] == '#') continue;
std::istringstream& iss = mGenDesiredMatrixStream;
iss.clear();
iss.str(line);
//parse the line
//begin
iss >> std::ws >> beg;
if(iss.fail()) {
KALDI_ERR << "Cannot parse column 1 (begin)\n"
<< "line: " << line << "\n"
<< "file: " << mpLabelFile << "\n";
}
//end
iss >> std::ws >> end;
if(iss.fail()) {
KALDI_ERR << "Cannot parse column 2 (end)\n"
<< "line: " << line << "\n"
<< "file: " << mpLabelFile << "\n";
}
//state tag
iss >> std::ws >> state;
if(iss.fail()) {
KALDI_ERR << "Cannot parse column 3 (state_tag)\n"
<< "line: " << line << "\n"
<< "file: " << mpLabelFile << "\n";
}
if (has_vad) /* an additional column for vad */
{
iss >> std::ws >> vad_state;
if(iss.fail()) {
KALDI_ERR << "Cannot parse column 4 (vad_state)\n"
<< "line: " << line << "\n"
<< "file: " << mpLabelFile << "\n";
}
}
//fprintf(stderr, "Parsed: %lld %lld %s\n", beg, end, state.c_str());
//divide beg/end by sourceRate and round up to get interval of frames
beg = (beg+sourceRate/2)/sourceRate;
end = (end+sourceRate/2)/sourceRate;
//beg = (int)round(beg / (double)sourceRate);
//end = (int)round(end / (double)sourceRate);
//find the state id
it = mLabelMap.find(state);
if(mLabelMap.end() == it) {
Error(std::string("Unknown state tag: '") + state + "' file:'" + mpLabelFile);
}
state_index = it->second;
// Fill the desired matrix
for(unsigned long long frame=beg; frame<end; frame++) {
//don't write after matrix... (possible longer transcript than feature file)
if(frame >= (int)rDesired.Rows()) { trunc_frames++; continue; }
//check the next frame is empty:
if(0.0 != rDesired[frame].Sum()) {
//ERROR!!!
//find out what was previously filled!!!
BaseFloat max = rDesired[frame].Max();
int idx = -1;
for(int i=0; i<(int)rDesired[frame].Dim(); i++) {
if(rDesired[frame][i] == max) idx = i;
}
for(it=mLabelMap.begin(); it!=mLabelMap.end(); ++it) {
if((int)it->second == idx) break;
}
std::string state_prev = "error";
if(it != mLabelMap.end()) {
state_prev = it->first;
}
//print the error message
std::ostringstream os;
os << "Frame already assigned to other state, "
<< " file: " << mpLabelFile
<< " frame: " << frame
<< " nframes: " << nFrames
<< " sum: " << rDesired[frame].Sum()
<< " previously assigned to: " << state_prev << "(" << idx << ")"
<< " now should be assigned to: " << state << "(" << state_index << ")"
<< "\n";
Error(os.str());
}
//fill the row
rDesired[(size_t)frame][state_index] = 1.0f;
if (has_vad)
rDesired[(size_t)frame][label_map_size + !!vad_state] = 1.0f;
}
}
mLabelStream.Close();
//check the desired matrix (rows sum up to 1.0)
for(size_t i=0; i<rDesired.Rows(); ++i) {
float desired_row_sum = rDesired[i].Sum();
if(!desired_row_sum == 1.0) {
std::ostringstream os;
os << "Desired vector sum isn't 1.0, "
<< " file: " << mpLabelFile
<< " row: " << i
<< " nframes: " << nFrames
<< " content: " << rDesired[i]
<< " sum: " << desired_row_sum << "\n";
Error(os.str());
}
}
//warning when truncating many frames
if(trunc_frames > 10) {
std::ostringstream os;
os << "Truncated frames: " << trunc_frames
<< " Check sourcerate in features and validity of labels\n";
Warning(os.str());
}
//timer
tim.End(); mGenDesiredMatrixTime += tim.Val();
}
void
LabelRepository::
ReadOutputLabelMap(const char* file)
{
assert(mLabelMap.size() == 0);
int i = 0;
std::string state_tag;
std::ifstream in(file);
if(!in.good())
Error(std::string("Cannot open OutputLabelMapFile: ")+file);
in >> std::ws;
while(!in.eof()) {
in >> state_tag;
in >> std::ws;
assert(mLabelMap.find(state_tag) == mLabelMap.end());
mLabelMap[state_tag] = i++;
}
in.close();
assert(mLabelMap.size() > 0);
}
void
LabelRepository::
GenDesiredMatrixExt(std::vector<BfMatrix>& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical) {
switch (mlf_fmt)
{
case MAP: GenDesiredMatrixExtMap(rDesired, nFrames, sourceRate, pFeatureLogical);
break;
case RAW: GenDesiredMatrixExtRaw(rDesired, nFrames, sourceRate, pFeatureLogical);
break;
default: assert(0);
}
}
void
LabelRepository::
GenDesiredMatrixExtMap(std::vector<BfMatrix>& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical)
{
//timer
Timer tim; tim.Start();
//Get the MLF stream reference...
IMlfStream& mLabelStream = *mpLabelStream;
//Build the file name of the label
MakeHtkFileName(mpLabelFile, pFeatureLogical, mpLabelDir, mpLabelExt);
//Find block in MLF file
mLabelStream.Open(mpLabelFile);
if(!mLabelStream.good()) {
Error(std::string("Cannot open label MLF record: ") + mpLabelFile);
}
//resize the matrix
if(nFrames < 1) {
KALDI_ERR << "Number of frames:" << nFrames << " is lower than 1!!!\n"
<< pFeatureLogical;
}
size_t prev = rDesired.size();
rDesired.resize(prev + 1, BfMatrix()); /* state + vad */
int label_map_size = mLabelMap.size();
rDesired[prev].Init(nFrames, 1, true); //true: Zero()
//aux variables
std::string line, state;
unsigned long long beg, end;
size_t state_index;
size_t trunc_frames = 0;
TagToIdMap::iterator it;
//parse the label file
while(!mLabelStream.eof()) {
std::getline(mLabelStream, line);
if(line == "") continue; //skip newlines/comments from MLF
if(line[0] == '#') continue;
std::istringstream& iss = mGenDesiredMatrixStream;
iss.clear();
iss.str(line);
//parse the line
//begin
iss >> std::ws >> beg;
if(iss.fail()) {
KALDI_ERR << "Cannot parse column 1 (begin)\n"
<< "line: " << line << "\n"
<< "file: " << mpLabelFile << "\n";
}
//end
iss >> std::ws >> end;
if(iss.fail()) {
KALDI_ERR << "Cannot parse column 2 (end)\n"
<< "line: " << line << "\n"
<< "file: " << mpLabelFile << "\n";
}
//state tag
iss >> std::ws >> state;
if(iss.fail()) {
KALDI_ERR << "Cannot parse column 3 (state_tag)\n"
<< "line: " << line << "\n"
<< "file: " << mpLabelFile << "\n";
}
//fprintf(stderr, "Parsed: %lld %lld %s\n", beg, end, state.c_str());
//divide beg/end by sourceRate and round up to get interval of frames
beg = (beg+sourceRate/2)/sourceRate;
end = (end+sourceRate/2)/sourceRate;
//beg = (int)round(beg / (double)sourceRate);
//end = (int)round(end / (double)sourceRate);
//find the state id
it = mLabelMap.find(state);
if(mLabelMap.end() == it) {
Error(std::string("Unknown state tag: '") + state + "' file:'" + mpLabelFile);
}
state_index = it->second;
// Fill the desired matrix
for(unsigned long long frame=beg; frame<end; frame++) {
//don't write after matrix... (possible longer transcript than feature file)
if(frame >= (int)rDesired[prev].Rows()) { trunc_frames++; continue; }
//check the next frame is empty:
if(0.0 != rDesired[prev][frame][0]) {
//ERROR!!!
//find out what was previously filled!!!
/*
BaseFloat max = rDesired[prev][frame].Max();
int idx = -1;
for(int i=0; i<(int)rDesired[prev][frame].Dim(); i++) {
if(rDesired[prev][frame][i] == max) idx = i;
}
*/
BaseFloat max = rDesired[prev][frame][0];
int idx = round(max);
for(it=mLabelMap.begin(); it!=mLabelMap.end(); ++it) {
if((int)it->second == idx) break;
}
std::string state_prev = "error";
if(it != mLabelMap.end()) {
state_prev = it->first;
}
//print the error message
std::ostringstream os;
os << "Frame already assigned to other state, "
<< " file: " << mpLabelFile
<< " frame: " << frame
<< " nframes: " << nFrames
<< " sum: " << max
<< " previously assigned to: " << state_prev << "(" << idx << ")"
<< " now should be assigned to: " << state << "(" << state_index << ")"
<< "\n";
Error(os.str());
}
//fill the row
//rDesired[prev][(size_t)frame][state_index] = 1.0f;
rDesired[prev][(size_t)frame][0] = state_index;
}
}
mLabelStream.Close();
/*
//check the desired matrix (rows sum up to 1.0)
for(size_t i=0; i<rDesired[prev].Rows(); ++i) {
float desired_row_sum = rDesired[prev][i].Sum();
if(!desired_row_sum == 1.0) {
std::ostringstream os;
os << "Desired vector sum isn't 1.0, "
<< " file: " << mpLabelFile
<< " row: " << i
<< " nframes: " << nFrames
<< " content: " << rDesired[prev][i]
<< " sum: " << desired_row_sum << "\n";
Error(os.str());
}
}
*/
//warning when truncating many frames
if(trunc_frames > 10) {
std::ostringstream os;
os << "Truncated frames: " << trunc_frames
<< " Check sourcerate in features and validity of labels\n";
Warning(os.str());
}
//timer
tim.End(); mGenDesiredMatrixTime += tim.Val();
}
void
LabelRepository::
GenDesiredMatrixExtRaw(std::vector<BfMatrix>& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical)
{
//timer
Timer tim; tim.Start();
//Get the MLF stream reference...
IMlfStream& mLabelStream = *mpLabelStream;
//Build the file name of the label
MakeHtkFileName(mpLabelFile, pFeatureLogical, mpLabelDir, mpLabelExt);
//Find block in MLF file
mLabelStream.Open(mpLabelFile);
if(!mLabelStream.good()) {
Error(std::string("Cannot open label MLF record: ") + mpLabelFile);
}
//resize the matrix
if(nFrames < 1) {
KALDI_ERR << "Number of frames:" << nFrames << " is lower than 1!!!\n"
<< pFeatureLogical;
}
size_t prev = rDesired.size();
rDesired.resize(prev + 1, BfMatrix()); /* state + vad */
rDesired[prev].Init(nFrames, raw_dim, true); //true: Zero()
//aux variables
std::string line, state;
unsigned long long beg, end;
size_t trunc_frames = 0;
Vector<BaseFloat> raw;
raw.Init(raw_dim);
//parse the label file
while(!mLabelStream.eof()) {
std::getline(mLabelStream, line);
if(line == "") continue; //skip newlines/comments from MLF
if(line[0] == '#') continue;
std::istringstream& iss = mGenDesiredMatrixStream;
iss.clear();
iss.str(line);
//parse the line
//begin
iss >> std::ws >> beg;
if(iss.fail()) {
KALDI_ERR << "Cannot parse column 1 (begin)\n"
<< "line: " << line << "\n"
<< "file: " << mpLabelFile << "\n";
}
//end
iss >> std::ws >> end;
if(iss.fail()) {
KALDI_ERR << "Cannot parse column 2 (end)\n"
<< "line: " << line << "\n"
<< "file: " << mpLabelFile << "\n";
}
for (size_t i = 0; i < raw_dim; i++)
{
if (iss.eof())
PError("[label] insufficient columns for the label: %s", mpLabelFile);
iss >> raw[i];
if (iss.fail())
PError("[label] cannot parse raw value for the label: %s", mpLabelFile);
}
/*
for (size_t i = 0; i < raw_dim; i++)
fprintf(stderr, "%.3f", raw[i]);
fprintf(stderr, "\n");
*/
//divide beg/end by sourceRate and round up to get interval of frames
beg = (beg+sourceRate/2)/sourceRate;
if (end == (unsigned long long)-1)
end = rDesired[prev].Rows();
else
end = (end+sourceRate/2)/sourceRate;
//printf("end:%lld\n", end);
//beg = (int)round(beg / (double)sourceRate);
//end = (int)round(end / (double)sourceRate);
// Fill the desired matrix
for(unsigned long long frame=beg; frame<end; frame++) {
//don't write after matrix... (possible longer transcript than feature file)
if(frame >= (int)rDesired[prev].Rows()) { trunc_frames++; continue; }
//check the next frame is empty:
if(0.0 != rDesired[prev][frame][0]) {
//ERROR!!!
//find out what was previously filled!!!
/*
BaseFloat max = rDesired[prev][frame].Max();
int idx = -1;
for(int i=0; i<(int)rDesired[prev][frame].Dim(); i++) {
if(rDesired[prev][frame][i] == max) idx = i;
}
*/
BaseFloat max = rDesired[prev][frame][0];
//print the error message
std::ostringstream os;
os << "Frame already assigned to other state, "
<< " file: " << mpLabelFile
<< " frame: " << frame
<< " nframes: " << nFrames
<< " sum: " << max
<< "\n";
Error(os.str());
}
//fill the row
//rDesired[prev][(size_t)frame][state_index] = 1.0f;
rDesired[prev][(size_t)frame].Copy(raw);
}
}
mLabelStream.Close();
/*
//check the desired matrix (rows sum up to 1.0)
for(size_t i=0; i<rDesired[prev].Rows(); ++i) {
float desired_row_sum = rDesired[prev][i].Sum();
if(!desired_row_sum == 1.0) {
std::ostringstream os;
os << "Desired vector sum isn't 1.0, "
<< " file: " << mpLabelFile
<< " row: " << i
<< " nframes: " << nFrames
<< " content: " << rDesired[prev][i]
<< " sum: " << desired_row_sum << "\n";
Error(os.str());
}
}
*/
//warning when truncating many frames
if(trunc_frames > 10) {
std::ostringstream os;
os << "Truncated frames: " << trunc_frames
<< " Check sourcerate in features and validity of labels\n";
Warning(os.str());
}
//timer
tim.End(); mGenDesiredMatrixTime += tim.Val();
}
}//namespace