diff options
-rw-r--r-- | kaldi_io/.valgrind | 0 | ||||
-rw-r--r-- | kaldi_io/Makefile | 44 | ||||
-rw-r--r-- | kaldi_io/example/kaldi_io_example.lua | 8 | ||||
-rw-r--r-- | kaldi_io/example/swb_baseline.lua | 193 | ||||
-rw-r--r-- | kaldi_io/init.c | 8 | ||||
-rw-r--r-- | kaldi_io/init.lua | 46 | ||||
-rw-r--r-- | kaldi_io/kaldi_io-scm-1.rockspec | 36 | ||||
-rw-r--r-- | kaldi_io/src/cwrapper_kaldi.cpp | 111 | ||||
-rw-r--r-- | kaldi_io/src/cwrapper_kaldi.h | 29 | ||||
-rw-r--r-- | kaldi_io/src/init.c | 106 | ||||
-rw-r--r-- | kaldi_io/src/test.c | 48 | ||||
-rw-r--r-- | kaldi_io/tools/kaldi_to_nerv.cpp | 109 | ||||
-rw-r--r-- | kaldi_io/tools/nerv_to_kaldi.lua | 66 |
13 files changed, 804 insertions, 0 deletions
diff --git a/kaldi_io/.valgrind b/kaldi_io/.valgrind new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/kaldi_io/.valgrind diff --git a/kaldi_io/Makefile b/kaldi_io/Makefile new file mode 100644 index 0000000..75ad48e --- /dev/null +++ b/kaldi_io/Makefile @@ -0,0 +1,44 @@ +# Change KDIR to `kaldi-trunk' path (Kaldi must be compiled with --share) +KDIR := /slfs6/users/ymz09/kaldi/ + +SHELL := /bin/bash +BUILD_DIR := $(CURDIR)/build +INC_PATH := $(LUA_BINDIR)/../include/ +OBJS := init.o src/cwrapper_kaldi.o src/init.o +LIBS := libkaldiio.so +LUA_LIBS := init.lua +INCLUDE := -I $(LUA_INCDIR) -I $(INC_PATH) -DLUA_USE_APICHECK + +SUBDIR := src +OBJ_DIR := $(BUILD_DIR)/objs +LUA_DIR := $(INST_LUADIR)/kaldi_io +KALDIINCLUDE := -I $(KDIR)/tools/ATLAS/include/ -I $(KDIR)/tools/openfst/include/ -I $(KDIR)/src/ + +OBJS := $(addprefix $(OBJ_DIR)/,$(OBJS)) +LIBS := $(addprefix $(INST_LIBDIR)/,$(LIBS)) +OBJ_SUBDIR := $(addprefix $(OBJ_DIR)/,$(SUBDIR)) +LUA_SUBDIR := $(addprefix $(LUA_DIR)/,$(SUBDIR)) +LUA_LIBS := $(addprefix $(LUA_DIR)/,$(LUA_LIBS)) +LIB_PATH := $(LUA_BINDIR)/../lib + +build: $(OBJ_DIR) $(OBJ_SUBDIR) $(OBJS) $(OBJ_DIR)/src/test +install: $(LUA_DIR) $(LUA_SUBDIR) $(LUA_LIBS) $(LIBS) + +include $(KDIR)/src/kaldi.mk +KL := $(KDIR)/src/feat/kaldi-feat.a $(KDIR)/src/matrix/kaldi-matrix.a $(KDIR)/src/base/kaldi-base.a $(KDIR)/src/util/kaldi-util.a $(KDIR)/src/hmm/kaldi-hmm.a $(KDIR)/src/tree/kaldi-tree.a -lcblas -llapack + +$(OBJ_DIR) $(LUA_DIR) $(OBJ_SUBDIR) $(LUA_SUBDIR): + -mkdir -p $@ +$(LUA_DIR)/%.lua: %.lua + cp $< $@ +$(LIBS): $(OBJ_DIR)/src/cwrapper_kaldi.o $(OBJ_DIR)/init.o $(OBJ_DIR)/src/init.o + gcc -shared -fPIC -o $@ $(OBJ_DIR)/src/cwrapper_kaldi.o $(OBJ_DIR)/init.o $(OBJ_DIR)/src/init.o -lstdc++ -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -lluaT $(KL) +$(OBJ_DIR)/src/cwrapper_kaldi.o: src/cwrapper_kaldi.cpp + g++ -o $@ -c $< -DHAVE_ATLAS $(KALDIINCLUDE) -g -fPIC $(INCLUDE) -DKALDI_DOUBLEPRECISION=0 -msse2 -DHAVE_POSIX_MEMALIGN +$(OBJ_DIR)/src/test: $(OBJ_DIR)/src/cwrapper_kaldi.o $(OBJ_DIR)/src/test.o + gcc -o $@ $^ -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) $(INCLUDE) $(KALDIINCLUDE) -lnervcore -Wl,-rpath=$(LUA_LIBDIR) -L$(LUA_LIBDIR) -lluajit-5.1 -lstdc++ -lm $(KL) +$(OBJ_DIR)/%.o: %.c + gcc -o $@ -c $< -g $(INCLUDE) -fPIC +clean: + -rm $(OBJ_DIR)/src/*.o + diff --git a/kaldi_io/example/kaldi_io_example.lua b/kaldi_io/example/kaldi_io_example.lua new file mode 100644 index 0000000..8fd068a --- /dev/null +++ b/kaldi_io/example/kaldi_io_example.lua @@ -0,0 +1,8 @@ +require 'kaldi_io' + +frm_ext = 5 +feat_repo = nerv.KaldiFeatureRepo("ark:/slfs6/users/ymz09/kaldi/src/featbin/copy-feats scp:/slfs6/users/ymz09/swb_ivec/train_bp.scp ark:- |") + +feat_utter = feat_repo:cur_utter(true) +print(feat_utter) + diff --git a/kaldi_io/example/swb_baseline.lua b/kaldi_io/example/swb_baseline.lua new file mode 100644 index 0000000..8b1e122 --- /dev/null +++ b/kaldi_io/example/swb_baseline.lua @@ -0,0 +1,193 @@ +require 'kaldi_io' +gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + frm_ext = 5, + tr_scp = "ark:/slfs6/users/ymz09/kaldi/src/featbin/copy-feats scp:/slfs6/users/ymz09/swb_ivec/train_bp.scp ark:- |", + cv_scp = "ark:/slfs6/users/ymz09/kaldi/src/featbin/copy-feats scp:/slfs6/users/ymz09/swb_ivec/train_cv.scp ark:- |", + initialized_param = {"/slfs6/users/ymz09/swb_ivec/swb_init.nerv", + "/slfs6/users/ymz09/swb_ivec/swb_global_transf.nerv"}, + debug = false} + +function make_layer_repo(param_repo) + local layer_repo = nerv.LayerRepo( + { + -- global transf + ["nerv.BiasLayer"] = + { + blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, + blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, + wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} + }, + -- biased linearity + ["nerv.AffineLayer"] = + { + affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, + {dim_in = {429}, dim_out = {2048}}}, + affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, + {dim_in = {2048}, dim_out = {3001}}} + }, + ["nerv.SigmoidLayer"] = + { + sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} + }, + ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output + { + ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}} + }, + ["nerv.SoftmaxLayer"] = -- softmax for decode output + { + softmax = {{}, {dim_in = {3001}, dim_out = {3001}}} + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + global_transf = {{}, { + dim_in = {429}, dim_out = {429}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "blayer2[1]", + ["blayer2[1]"] = "wlayer2[1]", + ["wlayer2[1]"] = "<output>[1]" + } + }}, + main = {{}, { + dim_in = {429}, dim_out = {3001}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "affine0[1]", + ["affine0[1]"] = "sigmoid0[1]", + ["sigmoid0[1]"] = "affine1[1]", + ["affine1[1]"] = "sigmoid1[1]", + ["sigmoid1[1]"] = "affine2[1]", + ["affine2[1]"] = "sigmoid2[1]", + ["sigmoid2[1]"] = "affine3[1]", + ["affine3[1]"] = "sigmoid3[1]", + ["sigmoid3[1]"] = "affine4[1]", + ["affine4[1]"] = "sigmoid4[1]", + ["sigmoid4[1]"] = "affine5[1]", + ["affine5[1]"] = "sigmoid5[1]", + ["sigmoid5[1]"] = "affine6[1]", + ["affine6[1]"] = "sigmoid6[1]", + ["sigmoid6[1]"] = "affine7[1]", + ["affine7[1]"] = "<output>[1]" + } + }} + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + ce_output = {{}, { + dim_in = {429, 1}, dim_out = {1}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "main[1]", + ["main[1]"] = "ce_crit[1]", + ["<input>[2]"] = "ce_crit[2]", + ["ce_crit[1]"] = "<output>[1]" + } + }}, + softmax_output = {{}, { + dim_in = {429}, dim_out = {3001}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "main[1]", + ["main[1]"] = "softmax[1]", + ["softmax[1]"] = "<output>[1]" + } + }} + } + }, param_repo, gconf) + + return layer_repo +end + +function get_network(layer_repo) + return layer_repo:get_layer("ce_output") +end + +function get_decode_network(layer_repo) + return layer_repo:get_layer("softmax_output") +end + +function get_global_transf(layer_repo) + return layer_repo:get_layer("global_transf") +end + +function make_readers(feature_rspecifier, layer_repo) + return { + {reader = nerv.KaldiReader(gconf, + { + id = "main_scp", + feature_rspecifier = feature_rspecifier, + frm_ext = gconf.frm_ext, + mlfs = { + phone_state = { + targets_rspecifier = "ark:/slfs6/users/ymz09/kaldi/src/bin/ali-to-pdf /slfs6/users/ymz09/swb_ivec/final.mdl \"ark:gunzip -c /slfs6/users/ymz09/swb_ivec/ali.*.gz |\" ark:- | /slfs6/users/ymz09/kaldi/src/bin/ali-to-post ark:- ark:- |", + format = "map" + } + }, + global_transf = layer_repo:get_layer("global_transf") + }), + data = {main_scp = 429, phone_state = 1}} + } +end + +function make_buffer(readers) + return nerv.SGDBuffer(gconf, + { + buffer_size = gconf.buffer_size, + randomize = gconf.randomize, + readers = readers + }) +end + +function get_input_order() + return {"main_scp", "phone_state"} +end + +function get_accuracy(layer_repo) + local ce_crit = layer_repo:get_layer("ce_crit") + return ce_crit.total_correct / ce_crit.total_frames * 100 +end + +function print_stat(layer_repo) + local ce_crit = layer_repo:get_layer("ce_crit") + nerv.info("*** training stat begin ***") + nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce) + nerv.printf("correct:\t\t%d\n", ce_crit.total_correct) + nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames) + nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames) + nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo)) + nerv.info("*** training stat end ***") +end diff --git a/kaldi_io/init.c b/kaldi_io/init.c new file mode 100644 index 0000000..fe2f967 --- /dev/null +++ b/kaldi_io/init.c @@ -0,0 +1,8 @@ +#include "../nerv/common.h" +#include <stdio.h> + +extern void kaldi_io_init(lua_State *L); +int luaopen_libkaldiio(lua_State *L) { + kaldi_io_init(L); + return 1; +} diff --git a/kaldi_io/init.lua b/kaldi_io/init.lua new file mode 100644 index 0000000..9fdb080 --- /dev/null +++ b/kaldi_io/init.lua @@ -0,0 +1,46 @@ +require 'libkaldiio' +require 'speech_utils' +local KaldiReader = nerv.class("nerv.KaldiReader", "nerv.DataReader") + +function KaldiReader:__init(global_conf, reader_conf) + self.feat_id = reader_conf.id + self.frm_ext = reader_conf.frm_ext + self.gconf = global_conf + self.global_transf = reader_conf.global_transf + self.debug = global_conf.debug + if self.debug == nil then + self.debug = false + end + self.feat_repo = nerv.KaldiFeatureRepo(reader_conf.feature_rspecifier) + + self.lab_repo = {} + for id, mlf_spec in pairs(reader_conf.mlfs) do + self.lab_repo[id] = nerv.KaldiLabelRepo(mlf_spec.targets_rspecifier, + mlf_spec.format) + end +end + +function KaldiReader:get_data() + if self.feat_repo:is_end() then + return nil + end + local res = {} + -- read Kaldi feature + local feat_utter = self.feat_repo:cur_utter(self.debug) + -- global transf + local transformed = nerv.speech_utils.global_transf(feat_utter, + self.global_transf, self.frm_ext, 0, self.gconf) + res[self.feat_id] = transformed + -- add corresponding labels + for id, repo in pairs(self.lab_repo) do + local lab_utter = repo:get_utter(self.feat_repo, + self.frm_ext, + transformed:nrow(), + self.debug) + res[id] = lab_utter + end + -- move the pointer to next + self.feat_repo:next() + collectgarbage("collect") + return res +end diff --git a/kaldi_io/kaldi_io-scm-1.rockspec b/kaldi_io/kaldi_io-scm-1.rockspec new file mode 100644 index 0000000..7c9f8d8 --- /dev/null +++ b/kaldi_io/kaldi_io-scm-1.rockspec @@ -0,0 +1,36 @@ +package = "kaldi_io" +version = "scm-1" +source = { + url = "https://github.com/Nerv-SJTU/nerv-speech.git" +} +description = { + summary = "Kaldi I/O support (Kaldi I/O wrapper) for Nerv", + detailed = [[ + ]], + homepage = "https://github.com/Nerv-SJTU/nerv-speech", + license = "BSD" +} +dependencies = { + "nerv >= scm-1", + "lua >= 5.1" +} +build = { + type = "make", + build_variables = { + CFLAGS="$(CFLAGS)", + LIBFLAG="$(LIBFLAG)", + LUA_LIBDIR="$(LUA_LIBDIR)", + LUA_BINDIR="$(LUA_BINDIR)", + LUA_INCDIR="$(LUA_INCDIR)", + INST_PREFIX="$(PREFIX)", + LUA="$(LUA)", + }, + install_variables = { + LUA_BINDIR="$(LUA_BINDIR)", + INST_PREFIX="$(PREFIX)", + INST_BINDIR="$(BINDIR)", + INST_LIBDIR="$(LIBDIR)", + INST_LUADIR="$(LUADIR)", + INST_CONFDIR="$(CONFDIR)", + }, +} diff --git a/kaldi_io/src/cwrapper_kaldi.cpp b/kaldi_io/src/cwrapper_kaldi.cpp new file mode 100644 index 0000000..f48d343 --- /dev/null +++ b/kaldi_io/src/cwrapper_kaldi.cpp @@ -0,0 +1,111 @@ +#include <string> +#include "base/kaldi-common.h" +#include "hmm/posterior.h" +#include "util/table-types.h" +typedef kaldi::BaseFloat BaseFloat; + +extern "C" { +#include "cwrapper_kaldi.h" +#include "string.h" +#include "assert.h" +#include "nerv/common.h" + + extern Matrix *nerv_matrix_host_float_create(long nrow, long ncol, Status *status); + extern Matrix *nerv_matrix_host_double_create(long nrow, long ncol, Status *status); + + struct KaldiFeatureRepo { + kaldi::SequentialBaseFloatMatrixReader* feature_reader; + string utt; + }; + + KaldiFeatureRepo *kaldi_feature_repo_new(const char *feature_rspecifier) { + KaldiFeatureRepo *repo = new KaldiFeatureRepo(); + repo->feature_reader = new kaldi::SequentialBaseFloatMatrixReader(string(feature_rspecifier)); + return repo; + } + + Matrix *kaldi_feature_repo_read_utterance(KaldiFeatureRepo *repo, lua_State *L, int debug) { + Matrix *mat; /* nerv implementation */ + + repo->utt = repo->feature_reader->Key(); + kaldi::Matrix<BaseFloat> kmat = repo->feature_reader->Value(); + + int n = kmat.NumRows(); + int m = kmat.NumCols(); + Status status; + assert(sizeof(BaseFloat) == sizeof(float)); + if(sizeof(BaseFloat) == sizeof(float)) + mat = nerv_matrix_host_float_create(n, m, &status); + else if(sizeof(BaseFloat) == sizeof(double)) + mat = nerv_matrix_host_double_create(n, m, &status); + NERV_LUA_CHECK_STATUS(L, status); + size_t stride = mat->stride; + if (debug) + fprintf(stderr, "[kaldi] feature: %s %d %d\n", repo->utt.c_str(), n, m); + + for (int i = 0; i < n; i++) + { + const BaseFloat *row = kmat.RowData(i); + BaseFloat *nerv_row = (BaseFloat *)((char *)mat->data.f + i * stride); + /* use memmove to copy the row, since KaldiLib uses compact storage */ + memmove(nerv_row, row, sizeof(BaseFloat) * m); + } + return mat; + } + + void kaldi_feature_repo_next(KaldiFeatureRepo *repo) { + repo->feature_reader->Next(); + } + + int kaldi_feature_repo_is_end(KaldiFeatureRepo *repo) { + return repo->feature_reader->Done(); + } + + void kaldi_feature_repo_destroy(KaldiFeatureRepo *repo) { + if (repo->feature_reader) + delete repo->feature_reader; + delete repo; + } + + struct KaldiLabelRepo { + kaldi::RandomAccessPosteriorReader *targets_reader; + }; + + KaldiLabelRepo *kaldi_label_repo_new(const char *targets_rspecifier, const char *fmt) { + KaldiLabelRepo *repo = new KaldiLabelRepo(); + repo->targets_reader = new kaldi::RandomAccessPosteriorReader(string(targets_rspecifier)); + return repo; + } + + Matrix *kaldi_label_repo_read_utterance(KaldiLabelRepo *repo, KaldiFeatureRepo *frepo, int frm_ext, int nframes, + lua_State *L, + int debug) { + Matrix *mat; + kaldi::Posterior targets = repo->targets_reader->Value(frepo->utt); + + int n = targets.size() < nframes ? targets.size() : nframes; + int m = (int)targets[0].size(); + + Status status; + assert(sizeof(BaseFloat) == sizeof(float)); + if(sizeof(BaseFloat) == sizeof(float)) + mat = nerv_matrix_host_float_create(n, m, &status); + else if(sizeof(BaseFloat) == sizeof(double)) + mat = nerv_matrix_host_double_create(n, m, &status); + NERV_LUA_CHECK_STATUS(L, status); + size_t stride = mat->stride; + + if (debug) + fprintf(stderr, "[kaldi] label: %s %d %d\n", frepo->utt.c_str(), n, m); + for (int i = 0; i < n; i++) + for(int j = 0; j < m; j++) + *((BaseFloat *)((char *)mat->data.f + (i * stride + j))) = (BaseFloat)targets[i][j].first; + return mat; + } + + void kaldi_label_repo_destroy(KaldiLabelRepo *repo) { + if(repo->targets_reader) + delete repo->targets_reader; + delete repo; + } +} diff --git a/kaldi_io/src/cwrapper_kaldi.h b/kaldi_io/src/cwrapper_kaldi.h new file mode 100644 index 0000000..e34cb5a --- /dev/null +++ b/kaldi_io/src/cwrapper_kaldi.h @@ -0,0 +1,29 @@ +#ifndef NERV_kaldi_KALDI_IO_CWRAPPER +#define NERV_kaldi_KALDI_IO_CWRAPPER +#include "nerv/matrix/matrix.h" +#include "nerv/common.h" +#ifdef __cplusplus +extern "C" { +#endif + + typedef struct KaldiFeatureRepo KaldiFeatureRepo; + + KaldiFeatureRepo *kaldi_feature_repo_new(const char *); + Matrix *kaldi_feature_repo_read_utterance(KaldiFeatureRepo *repo, lua_State *L, int debug); + void kaldi_feature_repo_next(KaldiFeatureRepo *repo); + int kaldi_feature_repo_is_end(KaldiFeatureRepo *repo); + void kaldi_feature_repo_destroy(KaldiFeatureRepo *repo); + + typedef struct KaldiLabelRepo KaldiLabelRepo; + + KaldiLabelRepo *kaldi_label_repo_new(const char *, const char *fmt); + + Matrix *kaldi_label_repo_read_utterance(KaldiLabelRepo *repo, KaldiFeatureRepo *, int, int, + lua_State *L, + int debug); + + void kaldi_label_repo_destroy(KaldiLabelRepo *repo); +#ifdef __cplusplus +} +#endif +#endif diff --git a/kaldi_io/src/init.c b/kaldi_io/src/init.c new file mode 100644 index 0000000..413452c --- /dev/null +++ b/kaldi_io/src/init.c @@ -0,0 +1,106 @@ +#include "nerv/common.h" +#include "cwrapper_kaldi.h" +#include <stdio.h> + +const char *nerv_kaldi_feat_repo_tname = "nerv.KaldiFeatureRepo"; +const char *nerv_kaldi_label_repo_tname = "nerv.KaldiLabelRepo"; +const char *nerv_matrix_host_float_tname = "nerv.MMatrixFloat"; + +static int feat_repo_new(lua_State *L) { + const char *feature_rsepcifier = luaL_checkstring(L, 1); + KaldiFeatureRepo *repo = kaldi_feature_repo_new(feature_rsepcifier); + luaT_pushudata(L, repo, nerv_kaldi_feat_repo_tname); + return 1; +} + +static int feat_repo_destroy(lua_State *L) { + KaldiFeatureRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_feat_repo_tname); + kaldi_feature_repo_destroy(repo); + return 0; +} + +static int feat_repo_current_utterance(lua_State *L) { + KaldiFeatureRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_feat_repo_tname); + int debug; + if (!lua_isboolean(L, 2)) + nerv_error(L, "debug flag should be a boolean"); + debug = lua_toboolean(L, 2); + Matrix *utter = kaldi_feature_repo_read_utterance(repo, L, debug); + luaT_pushudata(L, utter, nerv_matrix_host_float_tname); + return 1; +} + +static int feat_repo_next(lua_State *L) { + KaldiFeatureRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_feat_repo_tname); + kaldi_feature_repo_next(repo); + return 0; +} + +static int feat_repo_is_end(lua_State *L) { + KaldiFeatureRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_feat_repo_tname); + lua_pushboolean(L, kaldi_feature_repo_is_end(repo)); + return 1; +} + +static const luaL_Reg feat_repo_methods[] = { + {"cur_utter", feat_repo_current_utterance}, + {"next", feat_repo_next}, + {"is_end", feat_repo_is_end}, + {NULL, NULL} +}; + +static int label_repo_new(lua_State *L) { + const char *targets_rspecifier = luaL_checkstring(L, 1); + const char *fmt = luaL_checkstring(L, 2); + KaldiLabelRepo *repo = kaldi_label_repo_new(targets_rspecifier, fmt); + luaT_pushudata(L, repo, nerv_kaldi_label_repo_tname); + return 1; +} + +static int label_repo_read_utterance(lua_State *L) { + KaldiLabelRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_label_repo_tname); + KaldiFeatureRepo *feat_repo = luaT_checkudata(L, 2, nerv_kaldi_feat_repo_tname); + int frm_ext, nframes, debug; + if (!lua_isnumber(L, 3)) + nerv_error(L, "frm_ext should be a number"); + frm_ext = lua_tonumber(L, 3); + if (!lua_isnumber(L, 4)) + nerv_error(L, "nframes should be a number"); + nframes = lua_tonumber(L, 4); + if (!lua_isboolean(L, 5)) + nerv_error(L, "debug flag should be a boolean"); + debug = lua_toboolean(L, 5); + Matrix *utter = kaldi_label_repo_read_utterance(repo, feat_repo, frm_ext, nframes, L, debug); + luaT_pushudata(L, utter, nerv_matrix_host_float_tname); + return 1; +} + +static int label_repo_destroy(lua_State *L) { + KaldiLabelRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_label_repo_tname); + kaldi_label_repo_destroy(repo); + return 0; +} + +static const luaL_Reg label_repo_methods[] = { + {"get_utter", label_repo_read_utterance}, + {NULL, NULL} +}; + +static void feat_repo_init(lua_State *L) { + luaT_newmetatable(L, nerv_kaldi_feat_repo_tname, NULL, + feat_repo_new, feat_repo_destroy, NULL); + luaL_register(L, NULL, feat_repo_methods); + lua_pop(L, 1); +} + +static void label_repo_init(lua_State *L) { + luaT_newmetatable(L, nerv_kaldi_label_repo_tname, NULL, + label_repo_new, label_repo_destroy, NULL); + luaL_register(L, NULL, label_repo_methods); + lua_pop(L, 1); +} + +void kaldi_io_init(lua_State *L) { + feat_repo_init(L); + label_repo_init(L); +} diff --git a/kaldi_io/src/test.c b/kaldi_io/src/test.c new file mode 100644 index 0000000..e92b4c9 --- /dev/null +++ b/kaldi_io/src/test.c @@ -0,0 +1,48 @@ +/********************************************************************************* +* File Name : test.c +* Created By : YIMMON, [email protected] +* Creation Date : [2015-08-05 17:39] +* Last Modified : [2015-08-06 14:28] +* Description : +**********************************************************************************/ + +#include "cwrapper_kaldi.h" +#include <stdio.h> + +char feature_rspecifier[] = {"ark:/slfs6/users/ymz09/kaldi/src/featbin/copy-feats scp:/slfs6/users/ymz09/swb_ivec/train_bp.scp ark:- |"}; + +void print_nerv_matrix(Matrix *mat) { + int n = mat->nrow; + int m = mat->ncol; + int i, j; + size_t stride = mat->stride; + for (i = 0; i < n; i++) + { + float *nerv_row = (float *)((char *)mat->data.f + i * stride); + for (j = 0; j < m; j++) + printf("%.8f ", nerv_row[j]); + puts(""); + } +} + +int main(int argc, char *argv[]) +{ + Matrix *mat; + KaldiFeatureRepo *repo = kaldi_feature_repo_new(feature_rspecifier); + + mat = kaldi_feature_repo_read_utterance(repo, NULL, 1); + printf("1st uttrance\n"); + print_nerv_matrix(mat); + + kaldi_feature_repo_next(repo); + + mat = kaldi_feature_repo_read_utterance(repo, NULL, 1); + printf("2nd uttrance\n"); + print_nerv_matrix(mat); + + printf("is end: %d\n", kaldi_feature_repo_is_end(repo)); + + kaldi_feature_repo_destroy(repo); + + return 0; +} diff --git a/kaldi_io/tools/kaldi_to_nerv.cpp b/kaldi_io/tools/kaldi_to_nerv.cpp new file mode 100644 index 0000000..1edb0f2 --- /dev/null +++ b/kaldi_io/tools/kaldi_to_nerv.cpp @@ -0,0 +1,109 @@ +#include <cstdio> +#include <fstream> +#include <string> +#include <cstring> +#include <cassert> + +char token[1024]; +char output[1024]; +double mat[4096][4096]; +int main(int argc, char **argv) { + std::ofstream fout; + fout.open(argv[1]); + int cnt = 0; + bool shift; + while (scanf("%s", token) != EOF) + { + int nrow, ncol; + int i, j; + if (strcmp(token, "<AffineTransform>") == 0) + { + double lrate, blrate, mnorm; + scanf("%d %d", &ncol, &nrow); + scanf("%s %lf %s %lf %s %lf", + token, &lrate, token, &blrate, token, &mnorm); + scanf("%s", token); + assert(*token == '['); + printf("%d %d\n", nrow, ncol); + for (j = 0; j < ncol; j++) + for (i = 0; i < nrow; i++) + scanf("%lf", mat[i] + j); + long base = fout.tellp(); + sprintf(output, "%16d", 0); + fout << output; + sprintf(output, "{type=\"nerv.LinearTransParam\",id=\"affine%d_ltp\"}\n", + cnt); + fout << output; + sprintf(output, "%d %d\n", nrow, ncol); + fout << output; + for (i = 0; i < nrow; i++) + { + for (j = 0; j < ncol; j++) + fout << mat[i][j] << " "; + fout << std::endl; + } + long length = fout.tellp() - base; + fout.seekp(base); + sprintf(output, "[%13lu]\n", length); + fout << output; + fout.seekp(0, std::ios_base::end); + scanf("%s", token); + assert(*token == ']'); + if (scanf("%s", token) == 1 && *token == '[') + { + base = fout.tellp(); + for (j = 0; j < ncol; j++) + scanf("%lf", mat[0] + j); + sprintf(output, "%16d", 0); + fout << output; + sprintf(output, "{type=\"nerv.BiasParam\",id=\"affine%d_bp\"}\n", + cnt); + fout << output; + sprintf(output, "1 %d\n", ncol); + fout << output; + for (j = 0; j < ncol; j++) + fout << mat[0][j] << " "; + fout << std::endl; + length = fout.tellp() - base; + fout.seekp(base); + sprintf(output, "[%13lu]\n", length); + fout << output; + fout.seekp(0, std::ios_base::end); + cnt++; + } + } + else if ((shift = (strcmp(token, "<AddShift>") == 0)) || + strcmp(token, "<Rescale>") == 0) + { + double lrate, blrate, mnorm; + scanf("%d %d", &ncol, &ncol); + scanf("%s %lf", + token, &lrate); + scanf("%s", token); + assert(*token == '['); + printf("%d\n", ncol); + for (j = 0; j < ncol; j++) + scanf("%lf", mat[0] + j); + long base = fout.tellp(); + sprintf(output, "%16d", 0); + fout << output; + sprintf(output, "{type=\"nerv.BiasParam\",id=\"%s%d\"}\n", + shift ? "bias" : "window", + cnt); + fout << output; + sprintf(output, "%d %d\n", 1, ncol); + fout << output; + for (j = 0; j < ncol; j++) + fout << mat[0][j] << " "; + fout << std::endl; + long length = fout.tellp() - base; + fout.seekp(base); + sprintf(output, "[%13lu]\n", length); + fout << output; + fout.seekp(0, std::ios_base::end); + scanf("%s", token); + assert(*token == ']'); + } + } + return 0; +} diff --git a/kaldi_io/tools/nerv_to_kaldi.lua b/kaldi_io/tools/nerv_to_kaldi.lua new file mode 100644 index 0000000..804f09b --- /dev/null +++ b/kaldi_io/tools/nerv_to_kaldi.lua @@ -0,0 +1,66 @@ +-- usage: nerv config_file nerv_param_input tnet_output + +dofile(arg[1]) +param_repo = nerv.ParamRepo() +param_repo:import({arg[2], gconf.initialized_param[2]}, nil, gconf) +layer_repo = make_layer_repo(param_repo) +f = assert(io.open(arg[3], "w")) + +function print_tnet_matrix(cumat) + local strs = {} + collectgarbage() + if cumat:nrow() == 1 then + local mat = nerv.MMatrixFloat(1, cumat:ncol()) + cumat:copy_toh(mat) + table.insert(strs, "[ ") + for j = 0, mat:ncol() - 1 do + table.insert(strs, string.format("%.8f ", mat[0][j])) + end + table.insert(strs, " ]\n") + f:write(table.concat(strs)) + else + cumat = cumat:trans() + local mat = nerv.MMatrixFloat(cumat:nrow(), cumat:ncol()) + cumat:copy_toh(mat) + table.insert(strs, string.format(" [\n", mat:nrow(), mat:ncol())) + for i = 0, mat:nrow() - 1 do + local row = mat[i] + for j = 0, mat:ncol() - 1 do + table.insert(strs, string.format("%.8f ", row[j])) + end + if i == mat:nrow() - 1 then + table.insert(strs, " ]\n") + else + table.insert(strs, "\n") + end + f:write(table.concat(strs)) + strs = {} + end + end +end +local lnames = {"affine0", "sigmoid0", + "affine1", "sigmoid1", + "affine2", "sigmoid2", + "affine3", "sigmoid3", + "affine4", "sigmoid4", + "affine5", "sigmoid5", + "affine6", "ce_crit"} +f:write("<Nnet>\n") +for i, name in ipairs(lnames) do + local layer = layer_repo:get_layer(name) + local layer_type = layer.__typename + if layer_type == "nerv.AffineLayer" then + f:write(string.format("<AffineTransform> %d %d\n<LearnRateCoef> 1 <BiasLearnRateCoef> 1 <MaxNorm> 0", + layer.dim_out[1], layer.dim_in[1])) + print_tnet_matrix(layer.ltp.trans) + print_tnet_matrix(layer.bp.trans) + elseif layer_type == "nerv.SigmoidLayer" then + f:write(string.format("<Sigmoid> %d %d\n", layer.dim_out[1], layer.dim_in[1])) + elseif layer_type == "nerv.SoftmaxCELayer" then + f:write(string.format("<Softmax> %d %d\n", layer.dim_in[1], layer.dim_in[1])) + else + nerv.error("unknown layer type %s", layer_type) + end +end +f:write("</Nnet>\n") +f:close() |