diff options
-rw-r--r-- | htk_io/init.lua | 7 | ||||
-rw-r--r-- | kaldi_io/Makefile | 4 | ||||
-rw-r--r-- | kaldi_io/init.lua | 37 | ||||
-rw-r--r-- | kaldi_io/src/cwrapper_kaldi.cpp | 75 | ||||
-rw-r--r-- | kaldi_io/src/cwrapper_kaldi.h | 10 | ||||
-rw-r--r-- | kaldi_io/src/init.c | 63 | ||||
-rw-r--r-- | speech_utils/init.lua | 8 |
7 files changed, 173 insertions, 31 deletions
diff --git a/htk_io/init.lua b/htk_io/init.lua index c4dfff9..b836e15 100644 --- a/htk_io/init.lua +++ b/htk_io/init.lua @@ -6,7 +6,6 @@ function TNetReader:__init(global_conf, reader_conf) self.feat_id = reader_conf.id self.frm_ext = reader_conf.frm_ext self.gconf = global_conf - self.global_transf = reader_conf.global_transf self.debug = global_conf.debug if self.debug == nil then self.debug = false @@ -31,12 +30,6 @@ function TNetReader:get_data() local res = {} -- read HTK feature local feat_utter = self.feat_repo:cur_utter(self.debug) - -- global transf - if self.global_transf ~= nil then - feat_utter = nerv.speech_utils.global_transf(feat_utter, - self.global_transf, self.frm_ext, - self.frm_ext, self.gconf) - end res[self.feat_id] = feat_utter -- add corresponding labels for id, repo in pairs(self.lab_repo) do diff --git a/kaldi_io/Makefile b/kaldi_io/Makefile index 75ad48e..1066fc5 100644 --- a/kaldi_io/Makefile +++ b/kaldi_io/Makefile @@ -1,5 +1,5 @@ # Change KDIR to `kaldi-trunk' path (Kaldi must be compiled with --share) -KDIR := /slfs6/users/ymz09/kaldi/ +KDIR := /home/stuymf/kaldi-trunk/ SHELL := /bin/bash BUILD_DIR := $(CURDIR)/build @@ -25,7 +25,7 @@ build: $(OBJ_DIR) $(OBJ_SUBDIR) $(OBJS) $(OBJ_DIR)/src/test install: $(LUA_DIR) $(LUA_SUBDIR) $(LUA_LIBS) $(LIBS) include $(KDIR)/src/kaldi.mk -KL := $(KDIR)/src/feat/kaldi-feat.a $(KDIR)/src/matrix/kaldi-matrix.a $(KDIR)/src/base/kaldi-base.a $(KDIR)/src/util/kaldi-util.a $(KDIR)/src/hmm/kaldi-hmm.a $(KDIR)/src/tree/kaldi-tree.a -lcblas -llapack +KL := $(KDIR)/src/feat/kaldi-feat.a $(KDIR)/src/matrix/kaldi-matrix.a $(KDIR)/src/base/kaldi-base.a $(KDIR)/src/util/kaldi-util.a $(KDIR)/src/hmm/kaldi-hmm.a $(KDIR)/src/tree/kaldi-tree.a -lcblas -llapack_atlas $(OBJ_DIR) $(LUA_DIR) $(OBJ_SUBDIR) $(LUA_SUBDIR): -mkdir -p $@ diff --git a/kaldi_io/init.lua b/kaldi_io/init.lua index 9fdb080..930705e 100644 --- a/kaldi_io/init.lua +++ b/kaldi_io/init.lua @@ -6,7 +6,6 @@ function KaldiReader:__init(global_conf, reader_conf) self.feat_id = reader_conf.id self.frm_ext = reader_conf.frm_ext self.gconf = global_conf - self.global_transf = reader_conf.global_transf self.debug = global_conf.debug if self.debug == nil then self.debug = false @@ -15,9 +14,20 @@ function KaldiReader:__init(global_conf, reader_conf) self.lab_repo = {} for id, mlf_spec in pairs(reader_conf.mlfs) do + if mlf_spec.format == nil then + nerv.error("format spec is expected for label %s", id) + end self.lab_repo[id] = nerv.KaldiLabelRepo(mlf_spec.targets_rspecifier, mlf_spec.format) end + self.lookup_repo = {} + for id, lookup_spec in pairs(reader_conf.lookup) do + if lookup_spec.map_rspecifier == nil then + nerv.error("map spec is expected for lookup %s", id) + end + self.lookup_repo[id] = nerv.KaldiLookupFeatureRepo(lookup_spec.targets_rspecifier, + lookup_spec.map_rspecifier) + end end function KaldiReader:get_data() @@ -28,17 +38,32 @@ function KaldiReader:get_data() -- read Kaldi feature local feat_utter = self.feat_repo:cur_utter(self.debug) -- global transf - local transformed = nerv.speech_utils.global_transf(feat_utter, - self.global_transf, self.frm_ext, 0, self.gconf) - res[self.feat_id] = transformed + res[self.feat_id] = feat_utter -- add corresponding labels for id, repo in pairs(self.lab_repo) do local lab_utter = repo:get_utter(self.feat_repo, - self.frm_ext, - transformed:nrow(), + feat_utter:nrow(), self.debug) res[id] = lab_utter end + -- add corresponding lookup features + for id, repo in pairs(self.lookup_repo) do + local lookup_utter = repo:get_utter(self.feat_repo, + feat_utter:nrow(), + self.debug) + local nrow = lookup_utter:nrow() + if nrow < feat_utter:nrow() then + -- repeat the last frame + local nlu = lookup_utter:create(feat_utter:nrow()) + nlu:copy_from(lookup_utter, 0, nrow) + row = nlu[nrow - 1] + for i = 0, feat_utter:nrow() - nrow - 1 do + nlu:copy_from(nlu, nrow - 1, nrow, nrow + i) + end + lookup_utter = nlu + end + res[id] = lookup_utter + end -- move the pointer to next self.feat_repo:next() collectgarbage("collect") diff --git a/kaldi_io/src/cwrapper_kaldi.cpp b/kaldi_io/src/cwrapper_kaldi.cpp index f48d343..c19908b 100644 --- a/kaldi_io/src/cwrapper_kaldi.cpp +++ b/kaldi_io/src/cwrapper_kaldi.cpp @@ -1,9 +1,11 @@ #include <string> +#include <map> #include "base/kaldi-common.h" #include "hmm/posterior.h" #include "util/table-types.h" typedef kaldi::BaseFloat BaseFloat; - +typedef std::map<std::string, kaldi::Matrix<BaseFloat> > StringToMatrix_t; +typedef std::map<std::string, std::string > StringToString_t; extern "C" { #include "cwrapper_kaldi.h" #include "string.h" @@ -61,12 +63,80 @@ extern "C" { return repo->feature_reader->Done(); } + const char *kaldi_feature_repo_key(KaldiFeatureRepo *repo) { + return repo->feature_reader->Key().c_str(); + } + void kaldi_feature_repo_destroy(KaldiFeatureRepo *repo) { if (repo->feature_reader) delete repo->feature_reader; delete repo; } + struct KaldiLookupFeatureRepo { + StringToMatrix_t key2mat; + StringToString_t map; + }; + + KaldiLookupFeatureRepo *kaldi_lookup_feature_repo_new(const char *feature_rspecifier, const char *map_rspecifier) { + KaldiLookupFeatureRepo *repo = new KaldiLookupFeatureRepo(); + kaldi::SequentialBaseFloatMatrixReader feature_reader = kaldi::SequentialBaseFloatMatrixReader(string(feature_rspecifier)); + for (; !feature_reader.Done(); feature_reader.Next()) + { + const std::string &key = feature_reader.Key(); + const kaldi::Matrix<BaseFloat> &feat = feature_reader.Value(); + if (repo->key2mat.find(key) != repo->key2mat.end()) + fprintf(stderr, "[kaldi] warning: lookup feature for key %s already exists", key.c_str()); + repo->key2mat[key] = feat; + } + kaldi::SequentialTokenVectorReader map_reader = kaldi::SequentialTokenVectorReader(string(map_rspecifier)); + for (; !map_reader.Done(); map_reader.Next()) + { + const std::vector<std::string> target = map_reader.Value(); + assert(target.size() >= 1); + repo->map[map_reader.Key()] = *target.begin(); + } + return repo; + } + + Matrix *kaldi_lookup_feature_repo_read_utterance(KaldiLookupFeatureRepo *repo, KaldiFeatureRepo *frepo, int nframes, lua_State *L, int debug) { + Matrix *mat; /* nerv implementation */ + StringToString_t::iterator mit = repo->map.find(frepo->utt); + if (mit == repo->map.end()) + nerv_error(L, "[kaldi] mapped key for key %s not found", frepo->utt.c_str()); + const std::string &key = mit->second; + StringToMatrix_t::iterator it = repo->key2mat.find(key); + if (it == repo->key2mat.end()) + nerv_error(L, "[kaldi] lookup feature for key %s not found", key.c_str()); + const kaldi::Matrix<BaseFloat> &kmat = it->second; + + int n = kmat.NumRows() < nframes ? kmat.NumRows() : nframes; + int m = kmat.NumCols(); + Status status; + assert(sizeof(BaseFloat) == sizeof(float)); + if(sizeof(BaseFloat) == sizeof(float)) + mat = nerv_matrix_host_float_create(n, m, &status); + else if(sizeof(BaseFloat) == sizeof(double)) + mat = nerv_matrix_host_double_create(n, m, &status); + NERV_LUA_CHECK_STATUS(L, status); + size_t stride = mat->stride; + if (debug) + fprintf(stderr, "[kaldi] lookup feature: %s %d %d\n", frepo->utt.c_str(), n, m); + + for (int i = 0; i < n; i++) + { + const BaseFloat *row = kmat.RowData(i); + BaseFloat *nerv_row = (BaseFloat *)((char *)mat->data.f + i * stride); + /* use memmove to copy the row, since KaldiLib uses compact storage */ + memmove(nerv_row, row, sizeof(BaseFloat) * m); + } + return mat; + } + + void kaldi_lookup_feature_repo_destroy(KaldiLookupFeatureRepo *repo) { + delete repo; + } + struct KaldiLabelRepo { kaldi::RandomAccessPosteriorReader *targets_reader; }; @@ -77,7 +147,7 @@ extern "C" { return repo; } - Matrix *kaldi_label_repo_read_utterance(KaldiLabelRepo *repo, KaldiFeatureRepo *frepo, int frm_ext, int nframes, + Matrix *kaldi_label_repo_read_utterance(KaldiLabelRepo *repo, KaldiFeatureRepo *frepo, int nframes, lua_State *L, int debug) { Matrix *mat; @@ -94,7 +164,6 @@ extern "C" { mat = nerv_matrix_host_double_create(n, m, &status); NERV_LUA_CHECK_STATUS(L, status); size_t stride = mat->stride; - if (debug) fprintf(stderr, "[kaldi] label: %s %d %d\n", frepo->utt.c_str(), n, m); for (int i = 0; i < n; i++) diff --git a/kaldi_io/src/cwrapper_kaldi.h b/kaldi_io/src/cwrapper_kaldi.h index e34cb5a..c8a7a25 100644 --- a/kaldi_io/src/cwrapper_kaldi.h +++ b/kaldi_io/src/cwrapper_kaldi.h @@ -12,17 +12,25 @@ extern "C" { Matrix *kaldi_feature_repo_read_utterance(KaldiFeatureRepo *repo, lua_State *L, int debug); void kaldi_feature_repo_next(KaldiFeatureRepo *repo); int kaldi_feature_repo_is_end(KaldiFeatureRepo *repo); + const char *kaldi_feature_repo_key(KaldiFeatureRepo *repo); void kaldi_feature_repo_destroy(KaldiFeatureRepo *repo); typedef struct KaldiLabelRepo KaldiLabelRepo; KaldiLabelRepo *kaldi_label_repo_new(const char *, const char *fmt); - Matrix *kaldi_label_repo_read_utterance(KaldiLabelRepo *repo, KaldiFeatureRepo *, int, int, + Matrix *kaldi_label_repo_read_utterance(KaldiLabelRepo *repo, KaldiFeatureRepo *, int, lua_State *L, int debug); void kaldi_label_repo_destroy(KaldiLabelRepo *repo); + + typedef struct KaldiLookupFeatureRepo KaldiLookupFeatureRepo; + + KaldiLookupFeatureRepo *kaldi_lookup_feature_repo_new(const char *, const char *); + Matrix *kaldi_lookup_feature_repo_read_utterance(KaldiLookupFeatureRepo *repo, KaldiFeatureRepo *frepo, int nframes, lua_State *L, int debug); + void kaldi_lookup_feature_repo_destroy(KaldiLookupFeatureRepo *repo); + #ifdef __cplusplus } #endif diff --git a/kaldi_io/src/init.c b/kaldi_io/src/init.c index 413452c..529895b 100644 --- a/kaldi_io/src/init.c +++ b/kaldi_io/src/init.c @@ -3,6 +3,7 @@ #include <stdio.h> const char *nerv_kaldi_feat_repo_tname = "nerv.KaldiFeatureRepo"; +const char *nerv_kaldi_lookup_feat_repo_tname = "nerv.KaldiLookupFeatureRepo"; const char *nerv_kaldi_label_repo_tname = "nerv.KaldiLabelRepo"; const char *nerv_matrix_host_float_tname = "nerv.MMatrixFloat"; @@ -42,10 +43,51 @@ static int feat_repo_is_end(lua_State *L) { return 1; } +static int feat_repo_key(lua_State *L) { + KaldiFeatureRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_feat_repo_tname); + lua_pushstring(L, kaldi_feature_repo_key(repo)); + return 1; +} + static const luaL_Reg feat_repo_methods[] = { {"cur_utter", feat_repo_current_utterance}, {"next", feat_repo_next}, {"is_end", feat_repo_is_end}, + {"key", feat_repo_key}, + {NULL, NULL} +}; + +static int lookup_feat_repo_new(lua_State *L) { + const char *feature_rsepcifier = luaL_checkstring(L, 1); + const char *map_rspecifier = luaL_checkstring(L, 2); + KaldiLookupFeatureRepo *repo = kaldi_lookup_feature_repo_new(feature_rsepcifier, map_rspecifier); + luaT_pushudata(L, repo, nerv_kaldi_lookup_feat_repo_tname); + return 1; +} + +static int lookup_feat_repo_destroy(lua_State *L) { + KaldiLookupFeatureRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_lookup_feat_repo_tname); + kaldi_lookup_feature_repo_destroy(repo); + return 0; +} + +static int lookup_feat_repo_read_utterance(lua_State *L) { + KaldiLookupFeatureRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_lookup_feat_repo_tname); + KaldiFeatureRepo *feat_repo = luaT_checkudata(L, 2, nerv_kaldi_feat_repo_tname); + int nframes, debug; + if (!lua_isnumber(L, 3)) + nerv_error(L, "nframes should be a number"); + nframes = lua_tonumber(L, 3); + if (!lua_isboolean(L, 4)) + nerv_error(L, "debug flag should be a boolean"); + debug = lua_toboolean(L, 4); + Matrix *utter = kaldi_lookup_feature_repo_read_utterance(repo, feat_repo, nframes, L, debug); + luaT_pushudata(L, utter, nerv_matrix_host_float_tname); + return 1; +} + +static const luaL_Reg lookup_feat_repo_methods[] = { + {"get_utter", lookup_feat_repo_read_utterance}, {NULL, NULL} }; @@ -60,17 +102,14 @@ static int label_repo_new(lua_State *L) { static int label_repo_read_utterance(lua_State *L) { KaldiLabelRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_label_repo_tname); KaldiFeatureRepo *feat_repo = luaT_checkudata(L, 2, nerv_kaldi_feat_repo_tname); - int frm_ext, nframes, debug; + int nframes, debug; if (!lua_isnumber(L, 3)) - nerv_error(L, "frm_ext should be a number"); - frm_ext = lua_tonumber(L, 3); - if (!lua_isnumber(L, 4)) nerv_error(L, "nframes should be a number"); - nframes = lua_tonumber(L, 4); - if (!lua_isboolean(L, 5)) + nframes = lua_tonumber(L, 3); + if (!lua_isboolean(L, 4)) nerv_error(L, "debug flag should be a boolean"); - debug = lua_toboolean(L, 5); - Matrix *utter = kaldi_label_repo_read_utterance(repo, feat_repo, frm_ext, nframes, L, debug); + debug = lua_toboolean(L, 4); + Matrix *utter = kaldi_label_repo_read_utterance(repo, feat_repo, nframes, L, debug); luaT_pushudata(L, utter, nerv_matrix_host_float_tname); return 1; } @@ -93,6 +132,13 @@ static void feat_repo_init(lua_State *L) { lua_pop(L, 1); } +static void lookup_feat_repo_init(lua_State *L) { + luaT_newmetatable(L, nerv_kaldi_lookup_feat_repo_tname, NULL, + lookup_feat_repo_new, lookup_feat_repo_destroy, NULL); + luaL_register(L, NULL, lookup_feat_repo_methods); + lua_pop(L, 1); +} + static void label_repo_init(lua_State *L) { luaT_newmetatable(L, nerv_kaldi_label_repo_tname, NULL, label_repo_new, label_repo_destroy, NULL); @@ -102,5 +148,6 @@ static void label_repo_init(lua_State *L) { void kaldi_io_init(lua_State *L) { feat_repo_init(L); + lookup_feat_repo_init(L); label_repo_init(L); } diff --git a/speech_utils/init.lua b/speech_utils/init.lua index 3500eb4..7d5651f 100644 --- a/speech_utils/init.lua +++ b/speech_utils/init.lua @@ -3,8 +3,8 @@ nerv.speech_utils = {} function nerv.speech_utils.global_transf(feat_utter, global_transf, frm_ext, frm_trim, gconf) local rearranged - feat_utter = gconf.cumat_type.new_from_host(feat_utter) - if frm_ext > 0 then +-- feat_utter = gconf.cumat_type.new_from_host(feat_utter) + if frm_ext ~= nil and frm_ext > 0 then local step = frm_ext * 2 + 1 -- expand the feature local expanded = gconf.cumat_type(feat_utter:nrow(), feat_utter:ncol() * step) @@ -26,8 +26,8 @@ function nerv.speech_utils.global_transf(feat_utter, global_transf, global_transf:init(input[1]:nrow()) global_transf:propagate(input, output) -- trim frames - expanded = gconf.mmat_type(output[1]:nrow() - frm_trim * 2, output[1]:ncol()) - output[1]:copy_toh(expanded, frm_trim, feat_utter:nrow() - frm_trim) + expanded = gconf.cumat_type(output[1]:nrow() - frm_trim * 2, output[1]:ncol()) + expanded:copy_fromd(output[1], frm_trim, feat_utter:nrow() - frm_trim) collectgarbage("collect") return expanded end |