From 97425eb035b7e52918946f3799047fe48120d132 Mon Sep 17 00:00:00 2001 From: Determinant Date: Mon, 24 Aug 2015 15:49:02 +0800 Subject: add lookup feature IO --- kaldi_io/src/cwrapper_kaldi.cpp | 75 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 3 deletions(-) (limited to 'kaldi_io/src/cwrapper_kaldi.cpp') diff --git a/kaldi_io/src/cwrapper_kaldi.cpp b/kaldi_io/src/cwrapper_kaldi.cpp index f48d343..c19908b 100644 --- a/kaldi_io/src/cwrapper_kaldi.cpp +++ b/kaldi_io/src/cwrapper_kaldi.cpp @@ -1,9 +1,11 @@ #include +#include #include "base/kaldi-common.h" #include "hmm/posterior.h" #include "util/table-types.h" typedef kaldi::BaseFloat BaseFloat; - +typedef std::map > StringToMatrix_t; +typedef std::map StringToString_t; extern "C" { #include "cwrapper_kaldi.h" #include "string.h" @@ -61,12 +63,80 @@ extern "C" { return repo->feature_reader->Done(); } + const char *kaldi_feature_repo_key(KaldiFeatureRepo *repo) { + return repo->feature_reader->Key().c_str(); + } + void kaldi_feature_repo_destroy(KaldiFeatureRepo *repo) { if (repo->feature_reader) delete repo->feature_reader; delete repo; } + struct KaldiLookupFeatureRepo { + StringToMatrix_t key2mat; + StringToString_t map; + }; + + KaldiLookupFeatureRepo *kaldi_lookup_feature_repo_new(const char *feature_rspecifier, const char *map_rspecifier) { + KaldiLookupFeatureRepo *repo = new KaldiLookupFeatureRepo(); + kaldi::SequentialBaseFloatMatrixReader feature_reader = kaldi::SequentialBaseFloatMatrixReader(string(feature_rspecifier)); + for (; !feature_reader.Done(); feature_reader.Next()) + { + const std::string &key = feature_reader.Key(); + const kaldi::Matrix &feat = feature_reader.Value(); + if (repo->key2mat.find(key) != repo->key2mat.end()) + fprintf(stderr, "[kaldi] warning: lookup feature for key %s already exists", key.c_str()); + repo->key2mat[key] = feat; + } + kaldi::SequentialTokenVectorReader map_reader = kaldi::SequentialTokenVectorReader(string(map_rspecifier)); + for (; !map_reader.Done(); map_reader.Next()) + { + const std::vector target = map_reader.Value(); + assert(target.size() >= 1); + repo->map[map_reader.Key()] = *target.begin(); + } + return repo; + } + + Matrix *kaldi_lookup_feature_repo_read_utterance(KaldiLookupFeatureRepo *repo, KaldiFeatureRepo *frepo, int nframes, lua_State *L, int debug) { + Matrix *mat; /* nerv implementation */ + StringToString_t::iterator mit = repo->map.find(frepo->utt); + if (mit == repo->map.end()) + nerv_error(L, "[kaldi] mapped key for key %s not found", frepo->utt.c_str()); + const std::string &key = mit->second; + StringToMatrix_t::iterator it = repo->key2mat.find(key); + if (it == repo->key2mat.end()) + nerv_error(L, "[kaldi] lookup feature for key %s not found", key.c_str()); + const kaldi::Matrix &kmat = it->second; + + int n = kmat.NumRows() < nframes ? kmat.NumRows() : nframes; + int m = kmat.NumCols(); + Status status; + assert(sizeof(BaseFloat) == sizeof(float)); + if(sizeof(BaseFloat) == sizeof(float)) + mat = nerv_matrix_host_float_create(n, m, &status); + else if(sizeof(BaseFloat) == sizeof(double)) + mat = nerv_matrix_host_double_create(n, m, &status); + NERV_LUA_CHECK_STATUS(L, status); + size_t stride = mat->stride; + if (debug) + fprintf(stderr, "[kaldi] lookup feature: %s %d %d\n", frepo->utt.c_str(), n, m); + + for (int i = 0; i < n; i++) + { + const BaseFloat *row = kmat.RowData(i); + BaseFloat *nerv_row = (BaseFloat *)((char *)mat->data.f + i * stride); + /* use memmove to copy the row, since KaldiLib uses compact storage */ + memmove(nerv_row, row, sizeof(BaseFloat) * m); + } + return mat; + } + + void kaldi_lookup_feature_repo_destroy(KaldiLookupFeatureRepo *repo) { + delete repo; + } + struct KaldiLabelRepo { kaldi::RandomAccessPosteriorReader *targets_reader; }; @@ -77,7 +147,7 @@ extern "C" { return repo; } - Matrix *kaldi_label_repo_read_utterance(KaldiLabelRepo *repo, KaldiFeatureRepo *frepo, int frm_ext, int nframes, + Matrix *kaldi_label_repo_read_utterance(KaldiLabelRepo *repo, KaldiFeatureRepo *frepo, int nframes, lua_State *L, int debug) { Matrix *mat; @@ -94,7 +164,6 @@ extern "C" { mat = nerv_matrix_host_double_create(n, m, &status); NERV_LUA_CHECK_STATUS(L, status); size_t stride = mat->stride; - if (debug) fprintf(stderr, "[kaldi] label: %s %d %d\n", frepo->utt.c_str(), n, m); for (int i = 0; i < n; i++) -- cgit v1.2.3