summaryrefslogtreecommitdiff
path: root/htk_io/src/KaldiLib/Labels.cc
diff options
context:
space:
mode:
Diffstat (limited to 'htk_io/src/KaldiLib/Labels.cc')
-rw-r--r--htk_io/src/KaldiLib/Labels.cc612
1 files changed, 612 insertions, 0 deletions
diff --git a/htk_io/src/KaldiLib/Labels.cc b/htk_io/src/KaldiLib/Labels.cc
new file mode 100644
index 0000000..8b04cde
--- /dev/null
+++ b/htk_io/src/KaldiLib/Labels.cc
@@ -0,0 +1,612 @@
+#include "Labels.h"
+#include "Timer.h"
+#include "Error.h"
+#include <cstdio>
+#include <sstream>
+
+
+namespace TNet {
+
+
+ ////////////////////////////////////////////////////////////////////////
+ // Class LabelRepository::
+ void
+ LabelRepository::
+ Init(const char* pLabelMlfFile, const char* pOutputLabelMapFile, const char* pLabelDir, const char* pLabelExt) {
+ InitMap(pLabelMlfFile, pOutputLabelMapFile, pLabelDir, pLabelExt);
+ }
+ void
+ LabelRepository::
+ InitExt(const char* pLabelMlfFile, const char* fmt, const char *arg, const char* pLabelDir, const char* pLabelExt) {
+ if (strcmp(fmt, "map") == 0)
+ {
+ InitMap(pLabelMlfFile, arg, pLabelDir, pLabelExt);
+ mlf_fmt = MAP;
+ }
+ else if (strcmp(fmt, "raw") == 0)
+ {
+ InitRaw(pLabelMlfFile, arg, pLabelDir, pLabelExt);
+ mlf_fmt = RAW;
+ }
+ }
+
+ void
+ LabelRepository::
+ InitMap(const char* pLabelMlfFile, const char* pOutputLabelMapFile, const char* pLabelDir, const char* pLabelExt)
+ {
+ assert(NULL != pLabelMlfFile);
+ assert(NULL != pOutputLabelMapFile);
+
+ // initialize the label streams
+ delete mpLabelStream; //if NULL, does nothing
+ delete _mpLabelStream;
+ _mpLabelStream = new std::ifstream(pLabelMlfFile);
+ mpLabelStream = new IMlfStream(*_mpLabelStream);
+
+ // Label stream is initialized, just test it
+ if(!mpLabelStream->good())
+ Error(std::string("Cannot open Label MLF file: ")+pLabelMlfFile);
+
+ // Index the labels (good for randomized file lists)
+ Timer tim; tim.Start();
+ mpLabelStream->Index();
+ tim.End(); mIndexTime += tim.Val();
+
+ // Read the state-label to state-id map
+ ReadOutputLabelMap(pOutputLabelMapFile);
+
+ // Store the label dir/ext
+ mpLabelDir = pLabelDir;
+ mpLabelExt = pLabelExt;
+ }
+
+ void
+ LabelRepository::
+ InitRaw(const char* pLabelMlfFile, const char *arg, const char* pLabelDir, const char* pLabelExt)
+ {
+ assert(NULL != pLabelMlfFile);
+ std::istringstream iss(arg);
+ size_t dim;
+ iss >> dim;
+ if (iss.fail() || dim <= 0)
+ PError("[lab] malformed dimension specification");
+ raw_dim = dim;
+ // initialize the label streams
+ delete mpLabelStream; //if NULL, does nothing
+ delete _mpLabelStream;
+ _mpLabelStream = new std::ifstream(pLabelMlfFile);
+ mpLabelStream = new IMlfStream(*_mpLabelStream);
+
+ // Label stream is initialized, just test it
+ if(!mpLabelStream->good())
+ Error(std::string("Cannot open Label MLF file: ")+pLabelMlfFile);
+
+ // Index the labels (good for randomized file lists)
+ Timer tim; tim.Start();
+ mpLabelStream->Index();
+ tim.End(); mIndexTime += tim.Val();
+
+ // Read the state-label to state-id map
+ //ReadOutputLabelMap(pOutputLabelMapFile);
+
+ // Store the label dir/ext
+ mpLabelDir = pLabelDir;
+ mpLabelExt = pLabelExt;
+ }
+
+
+ void
+ LabelRepository::
+ GenDesiredMatrix(BfMatrix& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical, bool has_vad)
+ {
+ //timer
+ Timer tim; tim.Start();
+
+ //Get the MLF stream reference...
+ IMlfStream& mLabelStream = *mpLabelStream;
+ //Build the file name of the label
+ MakeHtkFileName(mpLabelFile, pFeatureLogical, mpLabelDir, mpLabelExt);
+
+ //Find block in MLF file
+ mLabelStream.Open(mpLabelFile);
+ if(!mLabelStream.good()) {
+ Error(std::string("Cannot open label MLF record: ") + mpLabelFile);
+ }
+
+
+ //resize the matrix
+ if(nFrames < 1) {
+ KALDI_ERR << "Number of frames:" << nFrames << " is lower than 1!!!\n"
+ << pFeatureLogical;
+ }
+ int label_map_size = mLabelMap.size();
+ rDesired.Init(nFrames, label_map_size + (has_vad ? 2 : 0), true); //true: Zero()
+
+ //aux variables
+ std::string line, state;
+ unsigned long long beg, end;
+ size_t state_index;
+ size_t trunc_frames = 0;
+ TagToIdMap::iterator it;
+ int vad_state;
+
+ //parse the label file
+ while(!mLabelStream.eof()) {
+ std::getline(mLabelStream, line);
+ if(line == "") continue; //skip newlines/comments from MLF
+ if(line[0] == '#') continue;
+
+ std::istringstream& iss = mGenDesiredMatrixStream;
+ iss.clear();
+ iss.str(line);
+
+ //parse the line
+ //begin
+ iss >> std::ws >> beg;
+ if(iss.fail()) {
+ KALDI_ERR << "Cannot parse column 1 (begin)\n"
+ << "line: " << line << "\n"
+ << "file: " << mpLabelFile << "\n";
+ }
+ //end
+ iss >> std::ws >> end;
+ if(iss.fail()) {
+ KALDI_ERR << "Cannot parse column 2 (end)\n"
+ << "line: " << line << "\n"
+ << "file: " << mpLabelFile << "\n";
+ }
+ //state tag
+ iss >> std::ws >> state;
+ if(iss.fail()) {
+ KALDI_ERR << "Cannot parse column 3 (state_tag)\n"
+ << "line: " << line << "\n"
+ << "file: " << mpLabelFile << "\n";
+ }
+
+ if (has_vad) /* an additional column for vad */
+ {
+ iss >> std::ws >> vad_state;
+ if(iss.fail()) {
+ KALDI_ERR << "Cannot parse column 4 (vad_state)\n"
+ << "line: " << line << "\n"
+ << "file: " << mpLabelFile << "\n";
+ }
+ }
+
+ //fprintf(stderr, "Parsed: %lld %lld %s\n", beg, end, state.c_str());
+
+ //divide beg/end by sourceRate and round up to get interval of frames
+ beg = (beg+sourceRate/2)/sourceRate;
+ end = (end+sourceRate/2)/sourceRate;
+ //beg = (int)round(beg / (double)sourceRate);
+ //end = (int)round(end / (double)sourceRate);
+
+ //find the state id
+ it = mLabelMap.find(state);
+ if(mLabelMap.end() == it) {
+ Error(std::string("Unknown state tag: '") + state + "' file:'" + mpLabelFile);
+ }
+ state_index = it->second;
+
+ // Fill the desired matrix
+ for(unsigned long long frame=beg; frame<end; frame++) {
+ //don't write after matrix... (possible longer transcript than feature file)
+ if(frame >= (int)rDesired.Rows()) { trunc_frames++; continue; }
+
+ //check the next frame is empty:
+ if(0.0 != rDesired[frame].Sum()) {
+ //ERROR!!!
+ //find out what was previously filled!!!
+ BaseFloat max = rDesired[frame].Max();
+ int idx = -1;
+ for(int i=0; i<(int)rDesired[frame].Dim(); i++) {
+ if(rDesired[frame][i] == max) idx = i;
+ }
+ for(it=mLabelMap.begin(); it!=mLabelMap.end(); ++it) {
+ if((int)it->second == idx) break;
+ }
+ std::string state_prev = "error";
+ if(it != mLabelMap.end()) {
+ state_prev = it->first;
+ }
+ //print the error message
+ std::ostringstream os;
+ os << "Frame already assigned to other state, "
+ << " file: " << mpLabelFile
+ << " frame: " << frame
+ << " nframes: " << nFrames
+ << " sum: " << rDesired[frame].Sum()
+ << " previously assigned to: " << state_prev << "(" << idx << ")"
+ << " now should be assigned to: " << state << "(" << state_index << ")"
+ << "\n";
+ Error(os.str());
+ }
+
+ //fill the row
+ rDesired[(size_t)frame][state_index] = 1.0f;
+ if (has_vad)
+ rDesired[(size_t)frame][label_map_size + !!vad_state] = 1.0f;
+ }
+ }
+
+ mLabelStream.Close();
+
+ //check the desired matrix (rows sum up to 1.0)
+ for(size_t i=0; i<rDesired.Rows(); ++i) {
+ float desired_row_sum = rDesired[i].Sum();
+ if(!desired_row_sum == 1.0) {
+ std::ostringstream os;
+ os << "Desired vector sum isn't 1.0, "
+ << " file: " << mpLabelFile
+ << " row: " << i
+ << " nframes: " << nFrames
+ << " content: " << rDesired[i]
+ << " sum: " << desired_row_sum << "\n";
+ Error(os.str());
+ }
+ }
+
+ //warning when truncating many frames
+ if(trunc_frames > 10) {
+ std::ostringstream os;
+ os << "Truncated frames: " << trunc_frames
+ << " Check sourcerate in features and validity of labels\n";
+ Warning(os.str());
+ }
+
+ //timer
+ tim.End(); mGenDesiredMatrixTime += tim.Val();
+ }
+
+
+
+ void
+ LabelRepository::
+ ReadOutputLabelMap(const char* file)
+ {
+ assert(mLabelMap.size() == 0);
+ int i = 0;
+ std::string state_tag;
+ std::ifstream in(file);
+ if(!in.good())
+ Error(std::string("Cannot open OutputLabelMapFile: ")+file);
+
+ in >> std::ws;
+ while(!in.eof()) {
+ in >> state_tag;
+ in >> std::ws;
+ assert(mLabelMap.find(state_tag) == mLabelMap.end());
+ mLabelMap[state_tag] = i++;
+ }
+
+ in.close();
+ assert(mLabelMap.size() > 0);
+ }
+
+
+ void
+ LabelRepository::
+ GenDesiredMatrixExt(std::vector<BfMatrix>& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical) {
+ switch (mlf_fmt)
+ {
+ case MAP: GenDesiredMatrixExtMap(rDesired, nFrames, sourceRate, pFeatureLogical);
+ break;
+ case RAW: GenDesiredMatrixExtRaw(rDesired, nFrames, sourceRate, pFeatureLogical);
+ break;
+ default: assert(0);
+ }
+ }
+
+ void
+ LabelRepository::
+ GenDesiredMatrixExtMap(std::vector<BfMatrix>& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical)
+ {
+ //timer
+ Timer tim; tim.Start();
+
+ //Get the MLF stream reference...
+ IMlfStream& mLabelStream = *mpLabelStream;
+ //Build the file name of the label
+ MakeHtkFileName(mpLabelFile, pFeatureLogical, mpLabelDir, mpLabelExt);
+
+ //Find block in MLF file
+ mLabelStream.Open(mpLabelFile);
+ if(!mLabelStream.good()) {
+ Error(std::string("Cannot open label MLF record: ") + mpLabelFile);
+ }
+
+
+ //resize the matrix
+ if(nFrames < 1) {
+ KALDI_ERR << "Number of frames:" << nFrames << " is lower than 1!!!\n"
+ << pFeatureLogical;
+ }
+
+ size_t prev = rDesired.size();
+ rDesired.resize(prev + 1, BfMatrix()); /* state + vad */
+ int label_map_size = mLabelMap.size();
+ rDesired[prev].Init(nFrames, 1, true); //true: Zero()
+
+ //aux variables
+ std::string line, state;
+ unsigned long long beg, end;
+ size_t state_index;
+ size_t trunc_frames = 0;
+ TagToIdMap::iterator it;
+
+ //parse the label file
+ while(!mLabelStream.eof()) {
+ std::getline(mLabelStream, line);
+ if(line == "") continue; //skip newlines/comments from MLF
+ if(line[0] == '#') continue;
+
+ std::istringstream& iss = mGenDesiredMatrixStream;
+ iss.clear();
+ iss.str(line);
+
+ //parse the line
+ //begin
+ iss >> std::ws >> beg;
+ if(iss.fail()) {
+ KALDI_ERR << "Cannot parse column 1 (begin)\n"
+ << "line: " << line << "\n"
+ << "file: " << mpLabelFile << "\n";
+ }
+ //end
+ iss >> std::ws >> end;
+ if(iss.fail()) {
+ KALDI_ERR << "Cannot parse column 2 (end)\n"
+ << "line: " << line << "\n"
+ << "file: " << mpLabelFile << "\n";
+ }
+ //state tag
+ iss >> std::ws >> state;
+ if(iss.fail()) {
+ KALDI_ERR << "Cannot parse column 3 (state_tag)\n"
+ << "line: " << line << "\n"
+ << "file: " << mpLabelFile << "\n";
+ }
+
+
+ //fprintf(stderr, "Parsed: %lld %lld %s\n", beg, end, state.c_str());
+
+ //divide beg/end by sourceRate and round up to get interval of frames
+ beg = (beg+sourceRate/2)/sourceRate;
+ if (end == (unsigned long long)-1)
+ end = rDesired[prev].Rows();
+ else
+ end = (end+sourceRate/2)/sourceRate;
+ //beg = (int)round(beg / (double)sourceRate);
+ //end = (int)round(end / (double)sourceRate);
+
+ //find the state id
+ it = mLabelMap.find(state);
+ if(mLabelMap.end() == it) {
+ Error(std::string("Unknown state tag: '") + state + "' file:'" + mpLabelFile);
+ }
+ state_index = it->second;
+
+ // Fill the desired matrix
+ for(unsigned long long frame=beg; frame<end; frame++) {
+ //don't write after matrix... (possible longer transcript than feature file)
+ if(frame >= (int)rDesired[prev].Rows()) { trunc_frames++; continue; }
+
+ //check the next frame is empty:
+ if(0.0 != rDesired[prev][frame][0]) {
+ //ERROR!!!
+ //find out what was previously filled!!!
+ /*
+ BaseFloat max = rDesired[prev][frame].Max();
+ int idx = -1;
+ for(int i=0; i<(int)rDesired[prev][frame].Dim(); i++) {
+ if(rDesired[prev][frame][i] == max) idx = i;
+ }
+ */
+ BaseFloat max = rDesired[prev][frame][0];
+ int idx = round(max);
+ for(it=mLabelMap.begin(); it!=mLabelMap.end(); ++it) {
+ if((int)it->second == idx) break;
+ }
+ std::string state_prev = "error";
+ if(it != mLabelMap.end()) {
+ state_prev = it->first;
+ }
+ //print the error message
+ std::ostringstream os;
+ os << "Frame already assigned to other state, "
+ << " file: " << mpLabelFile
+ << " frame: " << frame
+ << " nframes: " << nFrames
+ << " sum: " << max
+ << " previously assigned to: " << state_prev << "(" << idx << ")"
+ << " now should be assigned to: " << state << "(" << state_index << ")"
+ << "\n";
+ Error(os.str());
+ }
+
+ //fill the row
+ //rDesired[prev][(size_t)frame][state_index] = 1.0f;
+ rDesired[prev][(size_t)frame][0] = state_index;
+ }
+ }
+
+ mLabelStream.Close();
+ /*
+ //check the desired matrix (rows sum up to 1.0)
+ for(size_t i=0; i<rDesired[prev].Rows(); ++i) {
+ float desired_row_sum = rDesired[prev][i].Sum();
+ if(!desired_row_sum == 1.0) {
+ std::ostringstream os;
+ os << "Desired vector sum isn't 1.0, "
+ << " file: " << mpLabelFile
+ << " row: " << i
+ << " nframes: " << nFrames
+ << " content: " << rDesired[prev][i]
+ << " sum: " << desired_row_sum << "\n";
+ Error(os.str());
+ }
+ }
+ */
+
+ //warning when truncating many frames
+ if(trunc_frames > 10) {
+ std::ostringstream os;
+ os << "Truncated frames: " << trunc_frames
+ << " Check sourcerate in features and validity of labels\n";
+ Warning(os.str());
+ }
+
+ //timer
+ tim.End(); mGenDesiredMatrixTime += tim.Val();
+ }
+
+ void
+ LabelRepository::
+ GenDesiredMatrixExtRaw(std::vector<BfMatrix>& rDesired, size_t nFrames, size_t sourceRate, const char* pFeatureLogical)
+ {
+ //timer
+ Timer tim; tim.Start();
+
+ //Get the MLF stream reference...
+ IMlfStream& mLabelStream = *mpLabelStream;
+ //Build the file name of the label
+ MakeHtkFileName(mpLabelFile, pFeatureLogical, mpLabelDir, mpLabelExt);
+
+ //Find block in MLF file
+ mLabelStream.Open(mpLabelFile);
+ if(!mLabelStream.good()) {
+ Error(std::string("Cannot open label MLF record: ") + mpLabelFile);
+ }
+
+
+ //resize the matrix
+ if(nFrames < 1) {
+ KALDI_ERR << "Number of frames:" << nFrames << " is lower than 1!!!\n"
+ << pFeatureLogical;
+ }
+
+ size_t prev = rDesired.size();
+ rDesired.resize(prev + 1, BfMatrix()); /* state + vad */
+ rDesired[prev].Init(nFrames, raw_dim, true); //true: Zero()
+
+ //aux variables
+ std::string line, state;
+ unsigned long long beg, end;
+ size_t trunc_frames = 0;
+ Vector<BaseFloat> raw;
+ raw.Init(raw_dim);
+
+ //parse the label file
+ while(!mLabelStream.eof()) {
+ std::getline(mLabelStream, line);
+ if(line == "") continue; //skip newlines/comments from MLF
+ if(line[0] == '#') continue;
+
+ std::istringstream& iss = mGenDesiredMatrixStream;
+ iss.clear();
+ iss.str(line);
+
+ //parse the line
+ //begin
+ iss >> std::ws >> beg;
+ if(iss.fail()) {
+ KALDI_ERR << "Cannot parse column 1 (begin)\n"
+ << "line: " << line << "\n"
+ << "file: " << mpLabelFile << "\n";
+ }
+ //end
+ iss >> std::ws >> end;
+ if(iss.fail()) {
+ KALDI_ERR << "Cannot parse column 2 (end)\n"
+ << "line: " << line << "\n"
+ << "file: " << mpLabelFile << "\n";
+ }
+
+ for (size_t i = 0; i < raw_dim; i++)
+ {
+ if (iss.eof())
+ PError("[label] insufficient columns for the label: %s", mpLabelFile);
+ iss >> raw[i];
+ if (iss.fail())
+ PError("[label] cannot parse raw value for the label: %s", mpLabelFile);
+ }
+ /*
+ for (size_t i = 0; i < raw_dim; i++)
+ fprintf(stderr, "%.3f", raw[i]);
+ fprintf(stderr, "\n");
+ */
+ //divide beg/end by sourceRate and round up to get interval of frames
+ beg = (beg+sourceRate/2)/sourceRate;
+ if (end == (unsigned long long)-1)
+ end = rDesired[prev].Rows();
+ else
+ end = (end+sourceRate/2)/sourceRate;
+ //printf("end:%lld\n", end);
+ //beg = (int)round(beg / (double)sourceRate);
+ //end = (int)round(end / (double)sourceRate);
+
+ // Fill the desired matrix
+ for(unsigned long long frame=beg; frame<end; frame++) {
+ //don't write after matrix... (possible longer transcript than feature file)
+ if(frame >= (int)rDesired[prev].Rows()) { trunc_frames++; continue; }
+
+ //check the next frame is empty:
+ if(0.0 != rDesired[prev][frame][0]) {
+ //ERROR!!!
+ //find out what was previously filled!!!
+ /*
+ BaseFloat max = rDesired[prev][frame].Max();
+ int idx = -1;
+ for(int i=0; i<(int)rDesired[prev][frame].Dim(); i++) {
+ if(rDesired[prev][frame][i] == max) idx = i;
+ }
+ */
+ BaseFloat max = rDesired[prev][frame][0];
+ //print the error message
+ std::ostringstream os;
+ os << "Frame already assigned to other state, "
+ << " file: " << mpLabelFile
+ << " frame: " << frame
+ << " nframes: " << nFrames
+ << " sum: " << max
+ << "\n";
+ Error(os.str());
+ }
+
+ //fill the row
+ //rDesired[prev][(size_t)frame][state_index] = 1.0f;
+ rDesired[prev][(size_t)frame].Copy(raw);
+ }
+ }
+
+ mLabelStream.Close();
+ /*
+ //check the desired matrix (rows sum up to 1.0)
+ for(size_t i=0; i<rDesired[prev].Rows(); ++i) {
+ float desired_row_sum = rDesired[prev][i].Sum();
+ if(!desired_row_sum == 1.0) {
+ std::ostringstream os;
+ os << "Desired vector sum isn't 1.0, "
+ << " file: " << mpLabelFile
+ << " row: " << i
+ << " nframes: " << nFrames
+ << " content: " << rDesired[prev][i]
+ << " sum: " << desired_row_sum << "\n";
+ Error(os.str());
+ }
+ }
+ */
+
+ //warning when truncating many frames
+ if(trunc_frames > 10) {
+ std::ostringstream os;
+ os << "Truncated frames: " << trunc_frames
+ << " Check sourcerate in features and validity of labels\n";
+ Warning(os.str());
+ }
+
+ //timer
+ tim.End(); mGenDesiredMatrixTime += tim.Val();
+ }
+
+}//namespace