summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--htk_io/init.lua7
-rw-r--r--kaldi_io/Makefile4
-rw-r--r--kaldi_io/init.lua37
-rw-r--r--kaldi_io/src/cwrapper_kaldi.cpp75
-rw-r--r--kaldi_io/src/cwrapper_kaldi.h10
-rw-r--r--kaldi_io/src/init.c63
-rw-r--r--speech_utils/init.lua8
7 files changed, 173 insertions, 31 deletions
diff --git a/htk_io/init.lua b/htk_io/init.lua
index c4dfff9..b836e15 100644
--- a/htk_io/init.lua
+++ b/htk_io/init.lua
@@ -6,7 +6,6 @@ function TNetReader:__init(global_conf, reader_conf)
self.feat_id = reader_conf.id
self.frm_ext = reader_conf.frm_ext
self.gconf = global_conf
- self.global_transf = reader_conf.global_transf
self.debug = global_conf.debug
if self.debug == nil then
self.debug = false
@@ -31,12 +30,6 @@ function TNetReader:get_data()
local res = {}
-- read HTK feature
local feat_utter = self.feat_repo:cur_utter(self.debug)
- -- global transf
- if self.global_transf ~= nil then
- feat_utter = nerv.speech_utils.global_transf(feat_utter,
- self.global_transf, self.frm_ext,
- self.frm_ext, self.gconf)
- end
res[self.feat_id] = feat_utter
-- add corresponding labels
for id, repo in pairs(self.lab_repo) do
diff --git a/kaldi_io/Makefile b/kaldi_io/Makefile
index 75ad48e..1066fc5 100644
--- a/kaldi_io/Makefile
+++ b/kaldi_io/Makefile
@@ -1,5 +1,5 @@
# Change KDIR to `kaldi-trunk' path (Kaldi must be compiled with --share)
-KDIR := /slfs6/users/ymz09/kaldi/
+KDIR := /home/stuymf/kaldi-trunk/
SHELL := /bin/bash
BUILD_DIR := $(CURDIR)/build
@@ -25,7 +25,7 @@ build: $(OBJ_DIR) $(OBJ_SUBDIR) $(OBJS) $(OBJ_DIR)/src/test
install: $(LUA_DIR) $(LUA_SUBDIR) $(LUA_LIBS) $(LIBS)
include $(KDIR)/src/kaldi.mk
-KL := $(KDIR)/src/feat/kaldi-feat.a $(KDIR)/src/matrix/kaldi-matrix.a $(KDIR)/src/base/kaldi-base.a $(KDIR)/src/util/kaldi-util.a $(KDIR)/src/hmm/kaldi-hmm.a $(KDIR)/src/tree/kaldi-tree.a -lcblas -llapack
+KL := $(KDIR)/src/feat/kaldi-feat.a $(KDIR)/src/matrix/kaldi-matrix.a $(KDIR)/src/base/kaldi-base.a $(KDIR)/src/util/kaldi-util.a $(KDIR)/src/hmm/kaldi-hmm.a $(KDIR)/src/tree/kaldi-tree.a -lcblas -llapack_atlas
$(OBJ_DIR) $(LUA_DIR) $(OBJ_SUBDIR) $(LUA_SUBDIR):
-mkdir -p $@
diff --git a/kaldi_io/init.lua b/kaldi_io/init.lua
index 9fdb080..930705e 100644
--- a/kaldi_io/init.lua
+++ b/kaldi_io/init.lua
@@ -6,7 +6,6 @@ function KaldiReader:__init(global_conf, reader_conf)
self.feat_id = reader_conf.id
self.frm_ext = reader_conf.frm_ext
self.gconf = global_conf
- self.global_transf = reader_conf.global_transf
self.debug = global_conf.debug
if self.debug == nil then
self.debug = false
@@ -15,9 +14,20 @@ function KaldiReader:__init(global_conf, reader_conf)
self.lab_repo = {}
for id, mlf_spec in pairs(reader_conf.mlfs) do
+ if mlf_spec.format == nil then
+ nerv.error("format spec is expected for label %s", id)
+ end
self.lab_repo[id] = nerv.KaldiLabelRepo(mlf_spec.targets_rspecifier,
mlf_spec.format)
end
+ self.lookup_repo = {}
+ for id, lookup_spec in pairs(reader_conf.lookup) do
+ if lookup_spec.map_rspecifier == nil then
+ nerv.error("map spec is expected for lookup %s", id)
+ end
+ self.lookup_repo[id] = nerv.KaldiLookupFeatureRepo(lookup_spec.targets_rspecifier,
+ lookup_spec.map_rspecifier)
+ end
end
function KaldiReader:get_data()
@@ -28,17 +38,32 @@ function KaldiReader:get_data()
-- read Kaldi feature
local feat_utter = self.feat_repo:cur_utter(self.debug)
-- global transf
- local transformed = nerv.speech_utils.global_transf(feat_utter,
- self.global_transf, self.frm_ext, 0, self.gconf)
- res[self.feat_id] = transformed
+ res[self.feat_id] = feat_utter
-- add corresponding labels
for id, repo in pairs(self.lab_repo) do
local lab_utter = repo:get_utter(self.feat_repo,
- self.frm_ext,
- transformed:nrow(),
+ feat_utter:nrow(),
self.debug)
res[id] = lab_utter
end
+ -- add corresponding lookup features
+ for id, repo in pairs(self.lookup_repo) do
+ local lookup_utter = repo:get_utter(self.feat_repo,
+ feat_utter:nrow(),
+ self.debug)
+ local nrow = lookup_utter:nrow()
+ if nrow < feat_utter:nrow() then
+ -- repeat the last frame
+ local nlu = lookup_utter:create(feat_utter:nrow())
+ nlu:copy_from(lookup_utter, 0, nrow)
+ row = nlu[nrow - 1]
+ for i = 0, feat_utter:nrow() - nrow - 1 do
+ nlu:copy_from(nlu, nrow - 1, nrow, nrow + i)
+ end
+ lookup_utter = nlu
+ end
+ res[id] = lookup_utter
+ end
-- move the pointer to next
self.feat_repo:next()
collectgarbage("collect")
diff --git a/kaldi_io/src/cwrapper_kaldi.cpp b/kaldi_io/src/cwrapper_kaldi.cpp
index f48d343..c19908b 100644
--- a/kaldi_io/src/cwrapper_kaldi.cpp
+++ b/kaldi_io/src/cwrapper_kaldi.cpp
@@ -1,9 +1,11 @@
#include <string>
+#include <map>
#include "base/kaldi-common.h"
#include "hmm/posterior.h"
#include "util/table-types.h"
typedef kaldi::BaseFloat BaseFloat;
-
+typedef std::map<std::string, kaldi::Matrix<BaseFloat> > StringToMatrix_t;
+typedef std::map<std::string, std::string > StringToString_t;
extern "C" {
#include "cwrapper_kaldi.h"
#include "string.h"
@@ -61,12 +63,80 @@ extern "C" {
return repo->feature_reader->Done();
}
+ const char *kaldi_feature_repo_key(KaldiFeatureRepo *repo) {
+ return repo->feature_reader->Key().c_str();
+ }
+
void kaldi_feature_repo_destroy(KaldiFeatureRepo *repo) {
if (repo->feature_reader)
delete repo->feature_reader;
delete repo;
}
+ struct KaldiLookupFeatureRepo {
+ StringToMatrix_t key2mat;
+ StringToString_t map;
+ };
+
+ KaldiLookupFeatureRepo *kaldi_lookup_feature_repo_new(const char *feature_rspecifier, const char *map_rspecifier) {
+ KaldiLookupFeatureRepo *repo = new KaldiLookupFeatureRepo();
+ kaldi::SequentialBaseFloatMatrixReader feature_reader = kaldi::SequentialBaseFloatMatrixReader(string(feature_rspecifier));
+ for (; !feature_reader.Done(); feature_reader.Next())
+ {
+ const std::string &key = feature_reader.Key();
+ const kaldi::Matrix<BaseFloat> &feat = feature_reader.Value();
+ if (repo->key2mat.find(key) != repo->key2mat.end())
+ fprintf(stderr, "[kaldi] warning: lookup feature for key %s already exists", key.c_str());
+ repo->key2mat[key] = feat;
+ }
+ kaldi::SequentialTokenVectorReader map_reader = kaldi::SequentialTokenVectorReader(string(map_rspecifier));
+ for (; !map_reader.Done(); map_reader.Next())
+ {
+ const std::vector<std::string> target = map_reader.Value();
+ assert(target.size() >= 1);
+ repo->map[map_reader.Key()] = *target.begin();
+ }
+ return repo;
+ }
+
+ Matrix *kaldi_lookup_feature_repo_read_utterance(KaldiLookupFeatureRepo *repo, KaldiFeatureRepo *frepo, int nframes, lua_State *L, int debug) {
+ Matrix *mat; /* nerv implementation */
+ StringToString_t::iterator mit = repo->map.find(frepo->utt);
+ if (mit == repo->map.end())
+ nerv_error(L, "[kaldi] mapped key for key %s not found", frepo->utt.c_str());
+ const std::string &key = mit->second;
+ StringToMatrix_t::iterator it = repo->key2mat.find(key);
+ if (it == repo->key2mat.end())
+ nerv_error(L, "[kaldi] lookup feature for key %s not found", key.c_str());
+ const kaldi::Matrix<BaseFloat> &kmat = it->second;
+
+ int n = kmat.NumRows() < nframes ? kmat.NumRows() : nframes;
+ int m = kmat.NumCols();
+ Status status;
+ assert(sizeof(BaseFloat) == sizeof(float));
+ if(sizeof(BaseFloat) == sizeof(float))
+ mat = nerv_matrix_host_float_create(n, m, &status);
+ else if(sizeof(BaseFloat) == sizeof(double))
+ mat = nerv_matrix_host_double_create(n, m, &status);
+ NERV_LUA_CHECK_STATUS(L, status);
+ size_t stride = mat->stride;
+ if (debug)
+ fprintf(stderr, "[kaldi] lookup feature: %s %d %d\n", frepo->utt.c_str(), n, m);
+
+ for (int i = 0; i < n; i++)
+ {
+ const BaseFloat *row = kmat.RowData(i);
+ BaseFloat *nerv_row = (BaseFloat *)((char *)mat->data.f + i * stride);
+ /* use memmove to copy the row, since KaldiLib uses compact storage */
+ memmove(nerv_row, row, sizeof(BaseFloat) * m);
+ }
+ return mat;
+ }
+
+ void kaldi_lookup_feature_repo_destroy(KaldiLookupFeatureRepo *repo) {
+ delete repo;
+ }
+
struct KaldiLabelRepo {
kaldi::RandomAccessPosteriorReader *targets_reader;
};
@@ -77,7 +147,7 @@ extern "C" {
return repo;
}
- Matrix *kaldi_label_repo_read_utterance(KaldiLabelRepo *repo, KaldiFeatureRepo *frepo, int frm_ext, int nframes,
+ Matrix *kaldi_label_repo_read_utterance(KaldiLabelRepo *repo, KaldiFeatureRepo *frepo, int nframes,
lua_State *L,
int debug) {
Matrix *mat;
@@ -94,7 +164,6 @@ extern "C" {
mat = nerv_matrix_host_double_create(n, m, &status);
NERV_LUA_CHECK_STATUS(L, status);
size_t stride = mat->stride;
-
if (debug)
fprintf(stderr, "[kaldi] label: %s %d %d\n", frepo->utt.c_str(), n, m);
for (int i = 0; i < n; i++)
diff --git a/kaldi_io/src/cwrapper_kaldi.h b/kaldi_io/src/cwrapper_kaldi.h
index e34cb5a..c8a7a25 100644
--- a/kaldi_io/src/cwrapper_kaldi.h
+++ b/kaldi_io/src/cwrapper_kaldi.h
@@ -12,17 +12,25 @@ extern "C" {
Matrix *kaldi_feature_repo_read_utterance(KaldiFeatureRepo *repo, lua_State *L, int debug);
void kaldi_feature_repo_next(KaldiFeatureRepo *repo);
int kaldi_feature_repo_is_end(KaldiFeatureRepo *repo);
+ const char *kaldi_feature_repo_key(KaldiFeatureRepo *repo);
void kaldi_feature_repo_destroy(KaldiFeatureRepo *repo);
typedef struct KaldiLabelRepo KaldiLabelRepo;
KaldiLabelRepo *kaldi_label_repo_new(const char *, const char *fmt);
- Matrix *kaldi_label_repo_read_utterance(KaldiLabelRepo *repo, KaldiFeatureRepo *, int, int,
+ Matrix *kaldi_label_repo_read_utterance(KaldiLabelRepo *repo, KaldiFeatureRepo *, int,
lua_State *L,
int debug);
void kaldi_label_repo_destroy(KaldiLabelRepo *repo);
+
+ typedef struct KaldiLookupFeatureRepo KaldiLookupFeatureRepo;
+
+ KaldiLookupFeatureRepo *kaldi_lookup_feature_repo_new(const char *, const char *);
+ Matrix *kaldi_lookup_feature_repo_read_utterance(KaldiLookupFeatureRepo *repo, KaldiFeatureRepo *frepo, int nframes, lua_State *L, int debug);
+ void kaldi_lookup_feature_repo_destroy(KaldiLookupFeatureRepo *repo);
+
#ifdef __cplusplus
}
#endif
diff --git a/kaldi_io/src/init.c b/kaldi_io/src/init.c
index 413452c..529895b 100644
--- a/kaldi_io/src/init.c
+++ b/kaldi_io/src/init.c
@@ -3,6 +3,7 @@
#include <stdio.h>
const char *nerv_kaldi_feat_repo_tname = "nerv.KaldiFeatureRepo";
+const char *nerv_kaldi_lookup_feat_repo_tname = "nerv.KaldiLookupFeatureRepo";
const char *nerv_kaldi_label_repo_tname = "nerv.KaldiLabelRepo";
const char *nerv_matrix_host_float_tname = "nerv.MMatrixFloat";
@@ -42,10 +43,51 @@ static int feat_repo_is_end(lua_State *L) {
return 1;
}
+static int feat_repo_key(lua_State *L) {
+ KaldiFeatureRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_feat_repo_tname);
+ lua_pushstring(L, kaldi_feature_repo_key(repo));
+ return 1;
+}
+
static const luaL_Reg feat_repo_methods[] = {
{"cur_utter", feat_repo_current_utterance},
{"next", feat_repo_next},
{"is_end", feat_repo_is_end},
+ {"key", feat_repo_key},
+ {NULL, NULL}
+};
+
+static int lookup_feat_repo_new(lua_State *L) {
+ const char *feature_rsepcifier = luaL_checkstring(L, 1);
+ const char *map_rspecifier = luaL_checkstring(L, 2);
+ KaldiLookupFeatureRepo *repo = kaldi_lookup_feature_repo_new(feature_rsepcifier, map_rspecifier);
+ luaT_pushudata(L, repo, nerv_kaldi_lookup_feat_repo_tname);
+ return 1;
+}
+
+static int lookup_feat_repo_destroy(lua_State *L) {
+ KaldiLookupFeatureRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_lookup_feat_repo_tname);
+ kaldi_lookup_feature_repo_destroy(repo);
+ return 0;
+}
+
+static int lookup_feat_repo_read_utterance(lua_State *L) {
+ KaldiLookupFeatureRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_lookup_feat_repo_tname);
+ KaldiFeatureRepo *feat_repo = luaT_checkudata(L, 2, nerv_kaldi_feat_repo_tname);
+ int nframes, debug;
+ if (!lua_isnumber(L, 3))
+ nerv_error(L, "nframes should be a number");
+ nframes = lua_tonumber(L, 3);
+ if (!lua_isboolean(L, 4))
+ nerv_error(L, "debug flag should be a boolean");
+ debug = lua_toboolean(L, 4);
+ Matrix *utter = kaldi_lookup_feature_repo_read_utterance(repo, feat_repo, nframes, L, debug);
+ luaT_pushudata(L, utter, nerv_matrix_host_float_tname);
+ return 1;
+}
+
+static const luaL_Reg lookup_feat_repo_methods[] = {
+ {"get_utter", lookup_feat_repo_read_utterance},
{NULL, NULL}
};
@@ -60,17 +102,14 @@ static int label_repo_new(lua_State *L) {
static int label_repo_read_utterance(lua_State *L) {
KaldiLabelRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_label_repo_tname);
KaldiFeatureRepo *feat_repo = luaT_checkudata(L, 2, nerv_kaldi_feat_repo_tname);
- int frm_ext, nframes, debug;
+ int nframes, debug;
if (!lua_isnumber(L, 3))
- nerv_error(L, "frm_ext should be a number");
- frm_ext = lua_tonumber(L, 3);
- if (!lua_isnumber(L, 4))
nerv_error(L, "nframes should be a number");
- nframes = lua_tonumber(L, 4);
- if (!lua_isboolean(L, 5))
+ nframes = lua_tonumber(L, 3);
+ if (!lua_isboolean(L, 4))
nerv_error(L, "debug flag should be a boolean");
- debug = lua_toboolean(L, 5);
- Matrix *utter = kaldi_label_repo_read_utterance(repo, feat_repo, frm_ext, nframes, L, debug);
+ debug = lua_toboolean(L, 4);
+ Matrix *utter = kaldi_label_repo_read_utterance(repo, feat_repo, nframes, L, debug);
luaT_pushudata(L, utter, nerv_matrix_host_float_tname);
return 1;
}
@@ -93,6 +132,13 @@ static void feat_repo_init(lua_State *L) {
lua_pop(L, 1);
}
+static void lookup_feat_repo_init(lua_State *L) {
+ luaT_newmetatable(L, nerv_kaldi_lookup_feat_repo_tname, NULL,
+ lookup_feat_repo_new, lookup_feat_repo_destroy, NULL);
+ luaL_register(L, NULL, lookup_feat_repo_methods);
+ lua_pop(L, 1);
+}
+
static void label_repo_init(lua_State *L) {
luaT_newmetatable(L, nerv_kaldi_label_repo_tname, NULL,
label_repo_new, label_repo_destroy, NULL);
@@ -102,5 +148,6 @@ static void label_repo_init(lua_State *L) {
void kaldi_io_init(lua_State *L) {
feat_repo_init(L);
+ lookup_feat_repo_init(L);
label_repo_init(L);
}
diff --git a/speech_utils/init.lua b/speech_utils/init.lua
index 3500eb4..7d5651f 100644
--- a/speech_utils/init.lua
+++ b/speech_utils/init.lua
@@ -3,8 +3,8 @@ nerv.speech_utils = {}
function nerv.speech_utils.global_transf(feat_utter, global_transf,
frm_ext, frm_trim, gconf)
local rearranged
- feat_utter = gconf.cumat_type.new_from_host(feat_utter)
- if frm_ext > 0 then
+-- feat_utter = gconf.cumat_type.new_from_host(feat_utter)
+ if frm_ext ~= nil and frm_ext > 0 then
local step = frm_ext * 2 + 1
-- expand the feature
local expanded = gconf.cumat_type(feat_utter:nrow(), feat_utter:ncol() * step)
@@ -26,8 +26,8 @@ function nerv.speech_utils.global_transf(feat_utter, global_transf,
global_transf:init(input[1]:nrow())
global_transf:propagate(input, output)
-- trim frames
- expanded = gconf.mmat_type(output[1]:nrow() - frm_trim * 2, output[1]:ncol())
- output[1]:copy_toh(expanded, frm_trim, feat_utter:nrow() - frm_trim)
+ expanded = gconf.cumat_type(output[1]:nrow() - frm_trim * 2, output[1]:ncol())
+ expanded:copy_fromd(output[1], frm_trim, feat_utter:nrow() - frm_trim)
collectgarbage("collect")
return expanded
end