summaryrefslogtreecommitdiff
path: root/tnet_io/KaldiLib/Labels.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tnet_io/KaldiLib/Labels.cc')
-rw-r--r--tnet_io/KaldiLib/Labels.cc612
1 files changed, 0 insertions, 612 deletions
diff --git a/tnet_io/KaldiLib/Labels.cc b/tnet_io/KaldiLib/Labels.cc
deleted file mode 100644
index 8b04cde..0000000
--- a/tnet_io/KaldiLib/Labels.cc
+++ /dev/null
@@ -1,612 +0,0 @@
-#include "Labels.h"
-#include "Timer.h"
-#include "Error.h"
-#include <cstdio>
-#include <sstream>
-
-
-namespace TNet {
-
-
- ////////////////////////////////////////////////////////////////////////
- // Class LabelRepository::
- void
- LabelRepository::
- Init(const char* pLabelMlfFile, const char* pOutputLabelMapFile, const char* pLabelDir, const char* pLabelExt) {
- InitMap(pLabelMlfFile, pOutputLabelMapFile, pLabelDir, pLabelExt);
- }
- void
- LabelRepository::
- InitExt(const char* pLabelMlfFile, const char* fmt, const char *arg, const char* pLabelDir, const char* pLabelExt) {
- if (strcmp(fmt, "map") == 0)
- {
- InitMap(pLabelMlfFile, arg, pLabelDir, pLabelExt);
- mlf_fmt = MAP;
- }
- else if (strcmp(fmt, "raw") == 0)
- {
- InitRaw(pLabelMlfFile, arg, pLabelDir, pLabelExt);
- mlf_fmt = RAW;
- }
- }
-
- void
- LabelRepository::
- InitMap(const char* pLabelMlfFile, const char* pOutputLabelMapFile, const char* pLabelDir, const char* pLabelExt)
- {
- assert(NULL != pLabelMlfFile);
- assert(NULL != pOutputLabelMapFile);
-
- // initialize the label streams
- delete mpLabelStream; //if NULL, does nothing
- delete _mpLabelStream;
- _mpLabelStream = new std::ifstream(pLabelMlfFile);
- mpLabelStream = new IMlfStream(*_mpLabelStream);
-
- // Label stream is initialized, just test it
- if(!mpLabelStream->good())
- Error(std::string("Cannot open Label MLF file: ")+pLabelMlfFile);
-
- // Index the labels (good for randomized file lists)
- Timer tim; tim.Start();
- mpLabelStream->Index();
- tim.End(); mIndexTime += tim.Val();
-
- // Read the state-label to state-id map
- ReadOutputLabelMap(pOutputLabelMapFile);
-
- // Store the label dir/ext
- mpLabelDir = pLabelDir;
- mpLabelExt = pLabelExt;
- }
-
- void
- LabelRepository::
- InitRaw(const char* pLabelMlfFile, const char *arg, const char* pLabelDir, const char* pLabelExt)
- {
- assert(NULL != pLabelMlfFile);
- std::istringstream iss(arg);
- size_t dim;
- iss >> dim;
- if (iss.fail() || dim <= 0)
- PError("[lab] malformed dimension specification");
- raw_dim = dim;
- // initialize the label streams
- delete mpLabelStream; //if NULL, does nothing
- delete _mpLabelStream;
- _mpLabelStream = new std::ifstream(pLabelMlfFile);
- mpLabelStream = new IMlfStream(*_mpLabelStream);
-
- // Label stream is initialized, just test it
- if(!mpLabelStream->good())
- Error(std::string("Cannot open Label MLF file: ")+pLabelMlfFile);
-
- // Index the labels (good for randomized file lists)
- Timer tim; tim.Start();
- mpLabelStream->Index();
- tim.End(); mIndexTime += tim.Val();
-
- // Read the state-label to state-id map
- //ReadOutputLabelMap(pOutputLabelMapFile);
-
- // Store the label dir/ext
- mpLabelDir = pLabelDir;
- mpLabelExt = pLabelExt;
- }
-
-
- void
- LabelRepository::
- GenDesiredMatrix(BfMatrix& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical, bool has_vad)
- {
- //timer
- Timer tim; tim.Start();
-
- //Get the MLF stream reference...
- IMlfStream& mLabelStream = *mpLabelStream;
- //Build the file name of the label
- MakeHtkFileName(mpLabelFile, pFeatureLogical, mpLabelDir, mpLabelExt);
-
- //Find block in MLF file
- mLabelStream.Open(mpLabelFile);
- if(!mLabelStream.good()) {
- Error(std::string("Cannot open label MLF record: ") + mpLabelFile);
- }
-
-
- //resize the matrix
- if(nFrames < 1) {
- KALDI_ERR << "Number of frames:" << nFrames << " is lower than 1!!!\n"
- << pFeatureLogical;
- }
- int label_map_size = mLabelMap.size();
- rDesired.Init(nFrames, label_map_size + (has_vad ? 2 : 0), true); //true: Zero()
-
- //aux variables
- std::string line, state;
- unsigned long long beg, end;
- size_t state_index;
- size_t trunc_frames = 0;
- TagToIdMap::iterator it;
- int vad_state;
-
- //parse the label file
- while(!mLabelStream.eof()) {
- std::getline(mLabelStream, line);
- if(line == "") continue; //skip newlines/comments from MLF
- if(line[0] == '#') continue;
-
- std::istringstream& iss = mGenDesiredMatrixStream;
- iss.clear();
- iss.str(line);
-
- //parse the line
- //begin
- iss >> std::ws >> beg;
- if(iss.fail()) {
- KALDI_ERR << "Cannot parse column 1 (begin)\n"
- << "line: " << line << "\n"
- << "file: " << mpLabelFile << "\n";
- }
- //end
- iss >> std::ws >> end;
- if(iss.fail()) {
- KALDI_ERR << "Cannot parse column 2 (end)\n"
- << "line: " << line << "\n"
- << "file: " << mpLabelFile << "\n";
- }
- //state tag
- iss >> std::ws >> state;
- if(iss.fail()) {
- KALDI_ERR << "Cannot parse column 3 (state_tag)\n"
- << "line: " << line << "\n"
- << "file: " << mpLabelFile << "\n";
- }
-
- if (has_vad) /* an additional column for vad */
- {
- iss >> std::ws >> vad_state;
- if(iss.fail()) {
- KALDI_ERR << "Cannot parse column 4 (vad_state)\n"
- << "line: " << line << "\n"
- << "file: " << mpLabelFile << "\n";
- }
- }
-
- //fprintf(stderr, "Parsed: %lld %lld %s\n", beg, end, state.c_str());
-
- //divide beg/end by sourceRate and round up to get interval of frames
- beg = (beg+sourceRate/2)/sourceRate;
- end = (end+sourceRate/2)/sourceRate;
- //beg = (int)round(beg / (double)sourceRate);
- //end = (int)round(end / (double)sourceRate);
-
- //find the state id
- it = mLabelMap.find(state);
- if(mLabelMap.end() == it) {
- Error(std::string("Unknown state tag: '") + state + "' file:'" + mpLabelFile);
- }
- state_index = it->second;
-
- // Fill the desired matrix
- for(unsigned long long frame=beg; frame<end; frame++) {
- //don't write after matrix... (possible longer transcript than feature file)
- if(frame >= (int)rDesired.Rows()) { trunc_frames++; continue; }
-
- //check the next frame is empty:
- if(0.0 != rDesired[frame].Sum()) {
- //ERROR!!!
- //find out what was previously filled!!!
- BaseFloat max = rDesired[frame].Max();
- int idx = -1;
- for(int i=0; i<(int)rDesired[frame].Dim(); i++) {
- if(rDesired[frame][i] == max) idx = i;
- }
- for(it=mLabelMap.begin(); it!=mLabelMap.end(); ++it) {
- if((int)it->second == idx) break;
- }
- std::string state_prev = "error";
- if(it != mLabelMap.end()) {
- state_prev = it->first;
- }
- //print the error message
- std::ostringstream os;
- os << "Frame already assigned to other state, "
- << " file: " << mpLabelFile
- << " frame: " << frame
- << " nframes: " << nFrames
- << " sum: " << rDesired[frame].Sum()
- << " previously assigned to: " << state_prev << "(" << idx << ")"
- << " now should be assigned to: " << state << "(" << state_index << ")"
- << "\n";
- Error(os.str());
- }
-
- //fill the row
- rDesired[(size_t)frame][state_index] = 1.0f;
- if (has_vad)
- rDesired[(size_t)frame][label_map_size + !!vad_state] = 1.0f;
- }
- }
-
- mLabelStream.Close();
-
- //check the desired matrix (rows sum up to 1.0)
- for(size_t i=0; i<rDesired.Rows(); ++i) {
- float desired_row_sum = rDesired[i].Sum();
- if(!desired_row_sum == 1.0) {
- std::ostringstream os;
- os << "Desired vector sum isn't 1.0, "
- << " file: " << mpLabelFile
- << " row: " << i
- << " nframes: " << nFrames
- << " content: " << rDesired[i]
- << " sum: " << desired_row_sum << "\n";
- Error(os.str());
- }
- }
-
- //warning when truncating many frames
- if(trunc_frames > 10) {
- std::ostringstream os;
- os << "Truncated frames: " << trunc_frames
- << " Check sourcerate in features and validity of labels\n";
- Warning(os.str());
- }
-
- //timer
- tim.End(); mGenDesiredMatrixTime += tim.Val();
- }
-
-
-
- void
- LabelRepository::
- ReadOutputLabelMap(const char* file)
- {
- assert(mLabelMap.size() == 0);
- int i = 0;
- std::string state_tag;
- std::ifstream in(file);
- if(!in.good())
- Error(std::string("Cannot open OutputLabelMapFile: ")+file);
-
- in >> std::ws;
- while(!in.eof()) {
- in >> state_tag;
- in >> std::ws;
- assert(mLabelMap.find(state_tag) == mLabelMap.end());
- mLabelMap[state_tag] = i++;
- }
-
- in.close();
- assert(mLabelMap.size() > 0);
- }
-
-
- void
- LabelRepository::
- GenDesiredMatrixExt(std::vector<BfMatrix>& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical) {
- switch (mlf_fmt)
- {
- case MAP: GenDesiredMatrixExtMap(rDesired, nFrames, sourceRate, pFeatureLogical);
- break;
- case RAW: GenDesiredMatrixExtRaw(rDesired, nFrames, sourceRate, pFeatureLogical);
- break;
- default: assert(0);
- }
- }
-
- void
- LabelRepository::
- GenDesiredMatrixExtMap(std::vector<BfMatrix>& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical)
- {
- //timer
- Timer tim; tim.Start();
-
- //Get the MLF stream reference...
- IMlfStream& mLabelStream = *mpLabelStream;
- //Build the file name of the label
- MakeHtkFileName(mpLabelFile, pFeatureLogical, mpLabelDir, mpLabelExt);
-
- //Find block in MLF file
- mLabelStream.Open(mpLabelFile);
- if(!mLabelStream.good()) {
- Error(std::string("Cannot open label MLF record: ") + mpLabelFile);
- }
-
-
- //resize the matrix
- if(nFrames < 1) {
- KALDI_ERR << "Number of frames:" << nFrames << " is lower than 1!!!\n"
- << pFeatureLogical;
- }
-
- size_t prev = rDesired.size();
- rDesired.resize(prev + 1, BfMatrix()); /* state + vad */
- int label_map_size = mLabelMap.size();
- rDesired[prev].Init(nFrames, 1, true); //true: Zero()
-
- //aux variables
- std::string line, state;
- unsigned long long beg, end;
- size_t state_index;
- size_t trunc_frames = 0;
- TagToIdMap::iterator it;
-
- //parse the label file
- while(!mLabelStream.eof()) {
- std::getline(mLabelStream, line);
- if(line == "") continue; //skip newlines/comments from MLF
- if(line[0] == '#') continue;
-
- std::istringstream& iss = mGenDesiredMatrixStream;
- iss.clear();
- iss.str(line);
-
- //parse the line
- //begin
- iss >> std::ws >> beg;
- if(iss.fail()) {
- KALDI_ERR << "Cannot parse column 1 (begin)\n"
- << "line: " << line << "\n"
- << "file: " << mpLabelFile << "\n";
- }
- //end
- iss >> std::ws >> end;
- if(iss.fail()) {
- KALDI_ERR << "Cannot parse column 2 (end)\n"
- << "line: " << line << "\n"
- << "file: " << mpLabelFile << "\n";
- }
- //state tag
- iss >> std::ws >> state;
- if(iss.fail()) {
- KALDI_ERR << "Cannot parse column 3 (state_tag)\n"
- << "line: " << line << "\n"
- << "file: " << mpLabelFile << "\n";
- }
-
-
- //fprintf(stderr, "Parsed: %lld %lld %s\n", beg, end, state.c_str());
-
- //divide beg/end by sourceRate and round up to get interval of frames
- beg = (beg+sourceRate/2)/sourceRate;
- if (end == (unsigned long long)-1)
- end = rDesired[prev].Rows();
- else
- end = (end+sourceRate/2)/sourceRate;
- //beg = (int)round(beg / (double)sourceRate);
- //end = (int)round(end / (double)sourceRate);
-
- //find the state id
- it = mLabelMap.find(state);
- if(mLabelMap.end() == it) {
- Error(std::string("Unknown state tag: '") + state + "' file:'" + mpLabelFile);
- }
- state_index = it->second;
-
- // Fill the desired matrix
- for(unsigned long long frame=beg; frame<end; frame++) {
- //don't write after matrix... (possible longer transcript than feature file)
- if(frame >= (int)rDesired[prev].Rows()) { trunc_frames++; continue; }
-
- //check the next frame is empty:
- if(0.0 != rDesired[prev][frame][0]) {
- //ERROR!!!
- //find out what was previously filled!!!
- /*
- BaseFloat max = rDesired[prev][frame].Max();
- int idx = -1;
- for(int i=0; i<(int)rDesired[prev][frame].Dim(); i++) {
- if(rDesired[prev][frame][i] == max) idx = i;
- }
- */
- BaseFloat max = rDesired[prev][frame][0];
- int idx = round(max);
- for(it=mLabelMap.begin(); it!=mLabelMap.end(); ++it) {
- if((int)it->second == idx) break;
- }
- std::string state_prev = "error";
- if(it != mLabelMap.end()) {
- state_prev = it->first;
- }
- //print the error message
- std::ostringstream os;
- os << "Frame already assigned to other state, "
- << " file: " << mpLabelFile
- << " frame: " << frame
- << " nframes: " << nFrames
- << " sum: " << max
- << " previously assigned to: " << state_prev << "(" << idx << ")"
- << " now should be assigned to: " << state << "(" << state_index << ")"
- << "\n";
- Error(os.str());
- }
-
- //fill the row
- //rDesired[prev][(size_t)frame][state_index] = 1.0f;
- rDesired[prev][(size_t)frame][0] = state_index;
- }
- }
-
- mLabelStream.Close();
- /*
- //check the desired matrix (rows sum up to 1.0)
- for(size_t i=0; i<rDesired[prev].Rows(); ++i) {
- float desired_row_sum = rDesired[prev][i].Sum();
- if(!desired_row_sum == 1.0) {
- std::ostringstream os;
- os << "Desired vector sum isn't 1.0, "
- << " file: " << mpLabelFile
- << " row: " << i
- << " nframes: " << nFrames
- << " content: " << rDesired[prev][i]
- << " sum: " << desired_row_sum << "\n";
- Error(os.str());
- }
- }
- */
-
- //warning when truncating many frames
- if(trunc_frames > 10) {
- std::ostringstream os;
- os << "Truncated frames: " << trunc_frames
- << " Check sourcerate in features and validity of labels\n";
- Warning(os.str());
- }
-
- //timer
- tim.End(); mGenDesiredMatrixTime += tim.Val();
- }
-
- void
- LabelRepository::
- GenDesiredMatrixExtRaw(std::vector<BfMatrix>& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical)
- {
- //timer
- Timer tim; tim.Start();
-
- //Get the MLF stream reference...
- IMlfStream& mLabelStream = *mpLabelStream;
- //Build the file name of the label
- MakeHtkFileName(mpLabelFile, pFeatureLogical, mpLabelDir, mpLabelExt);
-
- //Find block in MLF file
- mLabelStream.Open(mpLabelFile);
- if(!mLabelStream.good()) {
- Error(std::string("Cannot open label MLF record: ") + mpLabelFile);
- }
-
-
- //resize the matrix
- if(nFrames < 1) {
- KALDI_ERR << "Number of frames:" << nFrames << " is lower than 1!!!\n"
- << pFeatureLogical;
- }
-
- size_t prev = rDesired.size();
- rDesired.resize(prev + 1, BfMatrix()); /* state + vad */
- rDesired[prev].Init(nFrames, raw_dim, true); //true: Zero()
-
- //aux variables
- std::string line, state;
- unsigned long long beg, end;
- size_t trunc_frames = 0;
- Vector<BaseFloat> raw;
- raw.Init(raw_dim);
-
- //parse the label file
- while(!mLabelStream.eof()) {
- std::getline(mLabelStream, line);
- if(line == "") continue; //skip newlines/comments from MLF
- if(line[0] == '#') continue;
-
- std::istringstream& iss = mGenDesiredMatrixStream;
- iss.clear();
- iss.str(line);
-
- //parse the line
- //begin
- iss >> std::ws >> beg;
- if(iss.fail()) {
- KALDI_ERR << "Cannot parse column 1 (begin)\n"
- << "line: " << line << "\n"
- << "file: " << mpLabelFile << "\n";
- }
- //end
- iss >> std::ws >> end;
- if(iss.fail()) {
- KALDI_ERR << "Cannot parse column 2 (end)\n"
- << "line: " << line << "\n"
- << "file: " << mpLabelFile << "\n";
- }
-
- for (size_t i = 0; i < raw_dim; i++)
- {
- if (iss.eof())
- PError("[label] insufficient columns for the label: %s", mpLabelFile);
- iss >> raw[i];
- if (iss.fail())
- PError("[label] cannot parse raw value for the label: %s", mpLabelFile);
- }
- /*
- for (size_t i = 0; i < raw_dim; i++)
- fprintf(stderr, "%.3f", raw[i]);
- fprintf(stderr, "\n");
- */
- //divide beg/end by sourceRate and round up to get interval of frames
- beg = (beg+sourceRate/2)/sourceRate;
- if (end == (unsigned long long)-1)
- end = rDesired[prev].Rows();
- else
- end = (end+sourceRate/2)/sourceRate;
- //printf("end:%lld\n", end);
- //beg = (int)round(beg / (double)sourceRate);
- //end = (int)round(end / (double)sourceRate);
-
- // Fill the desired matrix
- for(unsigned long long frame=beg; frame<end; frame++) {
- //don't write after matrix... (possible longer transcript than feature file)
- if(frame >= (int)rDesired[prev].Rows()) { trunc_frames++; continue; }
-
- //check the next frame is empty:
- if(0.0 != rDesired[prev][frame][0]) {
- //ERROR!!!
- //find out what was previously filled!!!
- /*
- BaseFloat max = rDesired[prev][frame].Max();
- int idx = -1;
- for(int i=0; i<(int)rDesired[prev][frame].Dim(); i++) {
- if(rDesired[prev][frame][i] == max) idx = i;
- }
- */
- BaseFloat max = rDesired[prev][frame][0];
- //print the error message
- std::ostringstream os;
- os << "Frame already assigned to other state, "
- << " file: " << mpLabelFile
- << " frame: " << frame
- << " nframes: " << nFrames
- << " sum: " << max
- << "\n";
- Error(os.str());
- }
-
- //fill the row
- //rDesired[prev][(size_t)frame][state_index] = 1.0f;
- rDesired[prev][(size_t)frame].Copy(raw);
- }
- }
-
- mLabelStream.Close();
- /*
- //check the desired matrix (rows sum up to 1.0)
- for(size_t i=0; i<rDesired[prev].Rows(); ++i) {
- float desired_row_sum = rDesired[prev][i].Sum();
- if(!desired_row_sum == 1.0) {
- std::ostringstream os;
- os << "Desired vector sum isn't 1.0, "
- << " file: " << mpLabelFile
- << " row: " << i
- << " nframes: " << nFrames
- << " content: " << rDesired[prev][i]
- << " sum: " << desired_row_sum << "\n";
- Error(os.str());
- }
- }
- */
-
- //warning when truncating many frames
- if(trunc_frames > 10) {
- std::ostringstream os;
- os << "Truncated frames: " << trunc_frames
- << " Check sourcerate in features and validity of labels\n";
- Warning(os.str());
- }
-
- //timer
- tim.End(); mGenDesiredMatrixTime += tim.Val();
- }
-
-}//namespace