summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--kaldi_io/.valgrind0
-rw-r--r--kaldi_io/Makefile44
-rw-r--r--kaldi_io/example/kaldi_io_example.lua8
-rw-r--r--kaldi_io/example/swb_baseline.lua193
-rw-r--r--kaldi_io/init.c8
-rw-r--r--kaldi_io/init.lua46
-rw-r--r--kaldi_io/kaldi_io-scm-1.rockspec36
-rw-r--r--kaldi_io/src/cwrapper_kaldi.cpp111
-rw-r--r--kaldi_io/src/cwrapper_kaldi.h29
-rw-r--r--kaldi_io/src/init.c106
-rw-r--r--kaldi_io/src/test.c48
-rw-r--r--kaldi_io/tools/kaldi_to_nerv.cpp109
-rw-r--r--kaldi_io/tools/nerv_to_kaldi.lua66
13 files changed, 804 insertions, 0 deletions
diff --git a/kaldi_io/.valgrind b/kaldi_io/.valgrind
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/kaldi_io/.valgrind
diff --git a/kaldi_io/Makefile b/kaldi_io/Makefile
new file mode 100644
index 0000000..75ad48e
--- /dev/null
+++ b/kaldi_io/Makefile
@@ -0,0 +1,44 @@
+# Change KDIR to `kaldi-trunk' path (Kaldi must be compiled with --share)
+KDIR := /slfs6/users/ymz09/kaldi/
+
+SHELL := /bin/bash
+BUILD_DIR := $(CURDIR)/build
+INC_PATH := $(LUA_BINDIR)/../include/
+OBJS := init.o src/cwrapper_kaldi.o src/init.o
+LIBS := libkaldiio.so
+LUA_LIBS := init.lua
+INCLUDE := -I $(LUA_INCDIR) -I $(INC_PATH) -DLUA_USE_APICHECK
+
+SUBDIR := src
+OBJ_DIR := $(BUILD_DIR)/objs
+LUA_DIR := $(INST_LUADIR)/kaldi_io
+KALDIINCLUDE := -I $(KDIR)/tools/ATLAS/include/ -I $(KDIR)/tools/openfst/include/ -I $(KDIR)/src/
+
+OBJS := $(addprefix $(OBJ_DIR)/,$(OBJS))
+LIBS := $(addprefix $(INST_LIBDIR)/,$(LIBS))
+OBJ_SUBDIR := $(addprefix $(OBJ_DIR)/,$(SUBDIR))
+LUA_SUBDIR := $(addprefix $(LUA_DIR)/,$(SUBDIR))
+LUA_LIBS := $(addprefix $(LUA_DIR)/,$(LUA_LIBS))
+LIB_PATH := $(LUA_BINDIR)/../lib
+
+build: $(OBJ_DIR) $(OBJ_SUBDIR) $(OBJS) $(OBJ_DIR)/src/test
+install: $(LUA_DIR) $(LUA_SUBDIR) $(LUA_LIBS) $(LIBS)
+
+include $(KDIR)/src/kaldi.mk
+KL := $(KDIR)/src/feat/kaldi-feat.a $(KDIR)/src/matrix/kaldi-matrix.a $(KDIR)/src/base/kaldi-base.a $(KDIR)/src/util/kaldi-util.a $(KDIR)/src/hmm/kaldi-hmm.a $(KDIR)/src/tree/kaldi-tree.a -lcblas -llapack
+
+$(OBJ_DIR) $(LUA_DIR) $(OBJ_SUBDIR) $(LUA_SUBDIR):
+ -mkdir -p $@
+$(LUA_DIR)/%.lua: %.lua
+ cp $< $@
+$(LIBS): $(OBJ_DIR)/src/cwrapper_kaldi.o $(OBJ_DIR)/init.o $(OBJ_DIR)/src/init.o
+ gcc -shared -fPIC -o $@ $(OBJ_DIR)/src/cwrapper_kaldi.o $(OBJ_DIR)/init.o $(OBJ_DIR)/src/init.o -lstdc++ -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -lluaT $(KL)
+$(OBJ_DIR)/src/cwrapper_kaldi.o: src/cwrapper_kaldi.cpp
+ g++ -o $@ -c $< -DHAVE_ATLAS $(KALDIINCLUDE) -g -fPIC $(INCLUDE) -DKALDI_DOUBLEPRECISION=0 -msse2 -DHAVE_POSIX_MEMALIGN
+$(OBJ_DIR)/src/test: $(OBJ_DIR)/src/cwrapper_kaldi.o $(OBJ_DIR)/src/test.o
+ gcc -o $@ $^ -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) $(INCLUDE) $(KALDIINCLUDE) -lnervcore -Wl,-rpath=$(LUA_LIBDIR) -L$(LUA_LIBDIR) -lluajit-5.1 -lstdc++ -lm $(KL)
+$(OBJ_DIR)/%.o: %.c
+ gcc -o $@ -c $< -g $(INCLUDE) -fPIC
+clean:
+ -rm $(OBJ_DIR)/src/*.o
+
diff --git a/kaldi_io/example/kaldi_io_example.lua b/kaldi_io/example/kaldi_io_example.lua
new file mode 100644
index 0000000..8fd068a
--- /dev/null
+++ b/kaldi_io/example/kaldi_io_example.lua
@@ -0,0 +1,8 @@
+require 'kaldi_io'
+
+frm_ext = 5
+feat_repo = nerv.KaldiFeatureRepo("ark:/slfs6/users/ymz09/kaldi/src/featbin/copy-feats scp:/slfs6/users/ymz09/swb_ivec/train_bp.scp ark:- |")
+
+feat_utter = feat_repo:cur_utter(true)
+print(feat_utter)
+
diff --git a/kaldi_io/example/swb_baseline.lua b/kaldi_io/example/swb_baseline.lua
new file mode 100644
index 0000000..8b1e122
--- /dev/null
+++ b/kaldi_io/example/swb_baseline.lua
@@ -0,0 +1,193 @@
+require 'kaldi_io'
+gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9,
+ cumat_type = nerv.CuMatrixFloat,
+ mmat_type = nerv.MMatrixFloat,
+ frm_ext = 5,
+ tr_scp = "ark:/slfs6/users/ymz09/kaldi/src/featbin/copy-feats scp:/slfs6/users/ymz09/swb_ivec/train_bp.scp ark:- |",
+ cv_scp = "ark:/slfs6/users/ymz09/kaldi/src/featbin/copy-feats scp:/slfs6/users/ymz09/swb_ivec/train_cv.scp ark:- |",
+ initialized_param = {"/slfs6/users/ymz09/swb_ivec/swb_init.nerv",
+ "/slfs6/users/ymz09/swb_ivec/swb_global_transf.nerv"},
+ debug = false}
+
+function make_layer_repo(param_repo)
+ local layer_repo = nerv.LayerRepo(
+ {
+ -- global transf
+ ["nerv.BiasLayer"] =
+ {
+ blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}},
+ blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}}
+ },
+ ["nerv.WindowLayer"] =
+ {
+ wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}},
+ wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}}
+ },
+ -- biased linearity
+ ["nerv.AffineLayer"] =
+ {
+ affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"},
+ {dim_in = {429}, dim_out = {2048}}},
+ affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"},
+ {dim_in = {2048}, dim_out = {3001}}}
+ },
+ ["nerv.SigmoidLayer"] =
+ {
+ sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}}
+ },
+ ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output
+ {
+ ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}}
+ },
+ ["nerv.SoftmaxLayer"] = -- softmax for decode output
+ {
+ softmax = {{}, {dim_in = {3001}, dim_out = {3001}}}
+ }
+ }, param_repo, gconf)
+
+ layer_repo:add_layers(
+ {
+ ["nerv.DAGLayer"] =
+ {
+ global_transf = {{}, {
+ dim_in = {429}, dim_out = {429},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "blayer1[1]",
+ ["blayer1[1]"] = "wlayer1[1]",
+ ["wlayer1[1]"] = "blayer2[1]",
+ ["blayer2[1]"] = "wlayer2[1]",
+ ["wlayer2[1]"] = "<output>[1]"
+ }
+ }},
+ main = {{}, {
+ dim_in = {429}, dim_out = {3001},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "affine0[1]",
+ ["affine0[1]"] = "sigmoid0[1]",
+ ["sigmoid0[1]"] = "affine1[1]",
+ ["affine1[1]"] = "sigmoid1[1]",
+ ["sigmoid1[1]"] = "affine2[1]",
+ ["affine2[1]"] = "sigmoid2[1]",
+ ["sigmoid2[1]"] = "affine3[1]",
+ ["affine3[1]"] = "sigmoid3[1]",
+ ["sigmoid3[1]"] = "affine4[1]",
+ ["affine4[1]"] = "sigmoid4[1]",
+ ["sigmoid4[1]"] = "affine5[1]",
+ ["affine5[1]"] = "sigmoid5[1]",
+ ["sigmoid5[1]"] = "affine6[1]",
+ ["affine6[1]"] = "sigmoid6[1]",
+ ["sigmoid6[1]"] = "affine7[1]",
+ ["affine7[1]"] = "<output>[1]"
+ }
+ }}
+ }
+ }, param_repo, gconf)
+
+ layer_repo:add_layers(
+ {
+ ["nerv.DAGLayer"] =
+ {
+ ce_output = {{}, {
+ dim_in = {429, 1}, dim_out = {1},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "main[1]",
+ ["main[1]"] = "ce_crit[1]",
+ ["<input>[2]"] = "ce_crit[2]",
+ ["ce_crit[1]"] = "<output>[1]"
+ }
+ }},
+ softmax_output = {{}, {
+ dim_in = {429}, dim_out = {3001},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "main[1]",
+ ["main[1]"] = "softmax[1]",
+ ["softmax[1]"] = "<output>[1]"
+ }
+ }}
+ }
+ }, param_repo, gconf)
+
+ return layer_repo
+end
+
+function get_network(layer_repo)
+ return layer_repo:get_layer("ce_output")
+end
+
+function get_decode_network(layer_repo)
+ return layer_repo:get_layer("softmax_output")
+end
+
+function get_global_transf(layer_repo)
+ return layer_repo:get_layer("global_transf")
+end
+
+function make_readers(feature_rspecifier, layer_repo)
+ return {
+ {reader = nerv.KaldiReader(gconf,
+ {
+ id = "main_scp",
+ feature_rspecifier = feature_rspecifier,
+ frm_ext = gconf.frm_ext,
+ mlfs = {
+ phone_state = {
+ targets_rspecifier = "ark:/slfs6/users/ymz09/kaldi/src/bin/ali-to-pdf /slfs6/users/ymz09/swb_ivec/final.mdl \"ark:gunzip -c /slfs6/users/ymz09/swb_ivec/ali.*.gz |\" ark:- | /slfs6/users/ymz09/kaldi/src/bin/ali-to-post ark:- ark:- |",
+ format = "map"
+ }
+ },
+ global_transf = layer_repo:get_layer("global_transf")
+ }),
+ data = {main_scp = 429, phone_state = 1}}
+ }
+end
+
+function make_buffer(readers)
+ return nerv.SGDBuffer(gconf,
+ {
+ buffer_size = gconf.buffer_size,
+ randomize = gconf.randomize,
+ readers = readers
+ })
+end
+
+function get_input_order()
+ return {"main_scp", "phone_state"}
+end
+
+function get_accuracy(layer_repo)
+ local ce_crit = layer_repo:get_layer("ce_crit")
+ return ce_crit.total_correct / ce_crit.total_frames * 100
+end
+
+function print_stat(layer_repo)
+ local ce_crit = layer_repo:get_layer("ce_crit")
+ nerv.info("*** training stat begin ***")
+ nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce)
+ nerv.printf("correct:\t\t%d\n", ce_crit.total_correct)
+ nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames)
+ nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames)
+ nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo))
+ nerv.info("*** training stat end ***")
+end
diff --git a/kaldi_io/init.c b/kaldi_io/init.c
new file mode 100644
index 0000000..fe2f967
--- /dev/null
+++ b/kaldi_io/init.c
@@ -0,0 +1,8 @@
+#include "../nerv/common.h"
+#include <stdio.h>
+
+extern void kaldi_io_init(lua_State *L);
+int luaopen_libkaldiio(lua_State *L) {
+ kaldi_io_init(L);
+ return 1;
+}
diff --git a/kaldi_io/init.lua b/kaldi_io/init.lua
new file mode 100644
index 0000000..9fdb080
--- /dev/null
+++ b/kaldi_io/init.lua
@@ -0,0 +1,46 @@
+require 'libkaldiio'
+require 'speech_utils'
+local KaldiReader = nerv.class("nerv.KaldiReader", "nerv.DataReader")
+
+function KaldiReader:__init(global_conf, reader_conf)
+ self.feat_id = reader_conf.id
+ self.frm_ext = reader_conf.frm_ext
+ self.gconf = global_conf
+ self.global_transf = reader_conf.global_transf
+ self.debug = global_conf.debug
+ if self.debug == nil then
+ self.debug = false
+ end
+ self.feat_repo = nerv.KaldiFeatureRepo(reader_conf.feature_rspecifier)
+
+ self.lab_repo = {}
+ for id, mlf_spec in pairs(reader_conf.mlfs) do
+ self.lab_repo[id] = nerv.KaldiLabelRepo(mlf_spec.targets_rspecifier,
+ mlf_spec.format)
+ end
+end
+
+function KaldiReader:get_data()
+ if self.feat_repo:is_end() then
+ return nil
+ end
+ local res = {}
+ -- read Kaldi feature
+ local feat_utter = self.feat_repo:cur_utter(self.debug)
+ -- global transf
+ local transformed = nerv.speech_utils.global_transf(feat_utter,
+ self.global_transf, self.frm_ext, 0, self.gconf)
+ res[self.feat_id] = transformed
+ -- add corresponding labels
+ for id, repo in pairs(self.lab_repo) do
+ local lab_utter = repo:get_utter(self.feat_repo,
+ self.frm_ext,
+ transformed:nrow(),
+ self.debug)
+ res[id] = lab_utter
+ end
+ -- move the pointer to next
+ self.feat_repo:next()
+ collectgarbage("collect")
+ return res
+end
diff --git a/kaldi_io/kaldi_io-scm-1.rockspec b/kaldi_io/kaldi_io-scm-1.rockspec
new file mode 100644
index 0000000..7c9f8d8
--- /dev/null
+++ b/kaldi_io/kaldi_io-scm-1.rockspec
@@ -0,0 +1,36 @@
+package = "kaldi_io"
+version = "scm-1"
+source = {
+ url = "https://github.com/Nerv-SJTU/nerv-speech.git"
+}
+description = {
+ summary = "Kaldi I/O support (Kaldi I/O wrapper) for Nerv",
+ detailed = [[
+ ]],
+ homepage = "https://github.com/Nerv-SJTU/nerv-speech",
+ license = "BSD"
+}
+dependencies = {
+ "nerv >= scm-1",
+ "lua >= 5.1"
+}
+build = {
+ type = "make",
+ build_variables = {
+ CFLAGS="$(CFLAGS)",
+ LIBFLAG="$(LIBFLAG)",
+ LUA_LIBDIR="$(LUA_LIBDIR)",
+ LUA_BINDIR="$(LUA_BINDIR)",
+ LUA_INCDIR="$(LUA_INCDIR)",
+ INST_PREFIX="$(PREFIX)",
+ LUA="$(LUA)",
+ },
+ install_variables = {
+ LUA_BINDIR="$(LUA_BINDIR)",
+ INST_PREFIX="$(PREFIX)",
+ INST_BINDIR="$(BINDIR)",
+ INST_LIBDIR="$(LIBDIR)",
+ INST_LUADIR="$(LUADIR)",
+ INST_CONFDIR="$(CONFDIR)",
+ },
+}
diff --git a/kaldi_io/src/cwrapper_kaldi.cpp b/kaldi_io/src/cwrapper_kaldi.cpp
new file mode 100644
index 0000000..f48d343
--- /dev/null
+++ b/kaldi_io/src/cwrapper_kaldi.cpp
@@ -0,0 +1,111 @@
+#include <string>
+#include "base/kaldi-common.h"
+#include "hmm/posterior.h"
+#include "util/table-types.h"
+typedef kaldi::BaseFloat BaseFloat;
+
+extern "C" {
+#include "cwrapper_kaldi.h"
+#include "string.h"
+#include "assert.h"
+#include "nerv/common.h"
+
+ extern Matrix *nerv_matrix_host_float_create(long nrow, long ncol, Status *status);
+ extern Matrix *nerv_matrix_host_double_create(long nrow, long ncol, Status *status);
+
+ struct KaldiFeatureRepo {
+ kaldi::SequentialBaseFloatMatrixReader* feature_reader;
+ string utt;
+ };
+
+ KaldiFeatureRepo *kaldi_feature_repo_new(const char *feature_rspecifier) {
+ KaldiFeatureRepo *repo = new KaldiFeatureRepo();
+ repo->feature_reader = new kaldi::SequentialBaseFloatMatrixReader(string(feature_rspecifier));
+ return repo;
+ }
+
+ Matrix *kaldi_feature_repo_read_utterance(KaldiFeatureRepo *repo, lua_State *L, int debug) {
+ Matrix *mat; /* nerv implementation */
+
+ repo->utt = repo->feature_reader->Key();
+ kaldi::Matrix<BaseFloat> kmat = repo->feature_reader->Value();
+
+ int n = kmat.NumRows();
+ int m = kmat.NumCols();
+ Status status;
+ assert(sizeof(BaseFloat) == sizeof(float));
+ if(sizeof(BaseFloat) == sizeof(float))
+ mat = nerv_matrix_host_float_create(n, m, &status);
+ else if(sizeof(BaseFloat) == sizeof(double))
+ mat = nerv_matrix_host_double_create(n, m, &status);
+ NERV_LUA_CHECK_STATUS(L, status);
+ size_t stride = mat->stride;
+ if (debug)
+ fprintf(stderr, "[kaldi] feature: %s %d %d\n", repo->utt.c_str(), n, m);
+
+ for (int i = 0; i < n; i++)
+ {
+ const BaseFloat *row = kmat.RowData(i);
+ BaseFloat *nerv_row = (BaseFloat *)((char *)mat->data.f + i * stride);
+ /* use memmove to copy the row, since KaldiLib uses compact storage */
+ memmove(nerv_row, row, sizeof(BaseFloat) * m);
+ }
+ return mat;
+ }
+
+ void kaldi_feature_repo_next(KaldiFeatureRepo *repo) {
+ repo->feature_reader->Next();
+ }
+
+ int kaldi_feature_repo_is_end(KaldiFeatureRepo *repo) {
+ return repo->feature_reader->Done();
+ }
+
+ void kaldi_feature_repo_destroy(KaldiFeatureRepo *repo) {
+ if (repo->feature_reader)
+ delete repo->feature_reader;
+ delete repo;
+ }
+
+ struct KaldiLabelRepo {
+ kaldi::RandomAccessPosteriorReader *targets_reader;
+ };
+
+ KaldiLabelRepo *kaldi_label_repo_new(const char *targets_rspecifier, const char *fmt) {
+ KaldiLabelRepo *repo = new KaldiLabelRepo();
+ repo->targets_reader = new kaldi::RandomAccessPosteriorReader(string(targets_rspecifier));
+ return repo;
+ }
+
+ Matrix *kaldi_label_repo_read_utterance(KaldiLabelRepo *repo, KaldiFeatureRepo *frepo, int frm_ext, int nframes,
+ lua_State *L,
+ int debug) {
+ Matrix *mat;
+ kaldi::Posterior targets = repo->targets_reader->Value(frepo->utt);
+
+ int n = targets.size() < nframes ? targets.size() : nframes;
+ int m = (int)targets[0].size();
+
+ Status status;
+ assert(sizeof(BaseFloat) == sizeof(float));
+ if(sizeof(BaseFloat) == sizeof(float))
+ mat = nerv_matrix_host_float_create(n, m, &status);
+ else if(sizeof(BaseFloat) == sizeof(double))
+ mat = nerv_matrix_host_double_create(n, m, &status);
+ NERV_LUA_CHECK_STATUS(L, status);
+ size_t stride = mat->stride;
+
+ if (debug)
+ fprintf(stderr, "[kaldi] label: %s %d %d\n", frepo->utt.c_str(), n, m);
+ for (int i = 0; i < n; i++)
+ for(int j = 0; j < m; j++)
+ *((BaseFloat *)((char *)mat->data.f + (i * stride + j))) = (BaseFloat)targets[i][j].first;
+ return mat;
+ }
+
+ void kaldi_label_repo_destroy(KaldiLabelRepo *repo) {
+ if(repo->targets_reader)
+ delete repo->targets_reader;
+ delete repo;
+ }
+}
diff --git a/kaldi_io/src/cwrapper_kaldi.h b/kaldi_io/src/cwrapper_kaldi.h
new file mode 100644
index 0000000..e34cb5a
--- /dev/null
+++ b/kaldi_io/src/cwrapper_kaldi.h
@@ -0,0 +1,29 @@
+#ifndef NERV_kaldi_KALDI_IO_CWRAPPER
+#define NERV_kaldi_KALDI_IO_CWRAPPER
+#include "nerv/matrix/matrix.h"
+#include "nerv/common.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ typedef struct KaldiFeatureRepo KaldiFeatureRepo;
+
+ KaldiFeatureRepo *kaldi_feature_repo_new(const char *);
+ Matrix *kaldi_feature_repo_read_utterance(KaldiFeatureRepo *repo, lua_State *L, int debug);
+ void kaldi_feature_repo_next(KaldiFeatureRepo *repo);
+ int kaldi_feature_repo_is_end(KaldiFeatureRepo *repo);
+ void kaldi_feature_repo_destroy(KaldiFeatureRepo *repo);
+
+ typedef struct KaldiLabelRepo KaldiLabelRepo;
+
+ KaldiLabelRepo *kaldi_label_repo_new(const char *, const char *fmt);
+
+ Matrix *kaldi_label_repo_read_utterance(KaldiLabelRepo *repo, KaldiFeatureRepo *, int, int,
+ lua_State *L,
+ int debug);
+
+ void kaldi_label_repo_destroy(KaldiLabelRepo *repo);
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/kaldi_io/src/init.c b/kaldi_io/src/init.c
new file mode 100644
index 0000000..413452c
--- /dev/null
+++ b/kaldi_io/src/init.c
@@ -0,0 +1,106 @@
+#include "nerv/common.h"
+#include "cwrapper_kaldi.h"
+#include <stdio.h>
+
+const char *nerv_kaldi_feat_repo_tname = "nerv.KaldiFeatureRepo";
+const char *nerv_kaldi_label_repo_tname = "nerv.KaldiLabelRepo";
+const char *nerv_matrix_host_float_tname = "nerv.MMatrixFloat";
+
+static int feat_repo_new(lua_State *L) {
+ const char *feature_rsepcifier = luaL_checkstring(L, 1);
+ KaldiFeatureRepo *repo = kaldi_feature_repo_new(feature_rsepcifier);
+ luaT_pushudata(L, repo, nerv_kaldi_feat_repo_tname);
+ return 1;
+}
+
+static int feat_repo_destroy(lua_State *L) {
+ KaldiFeatureRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_feat_repo_tname);
+ kaldi_feature_repo_destroy(repo);
+ return 0;
+}
+
+static int feat_repo_current_utterance(lua_State *L) {
+ KaldiFeatureRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_feat_repo_tname);
+ int debug;
+ if (!lua_isboolean(L, 2))
+ nerv_error(L, "debug flag should be a boolean");
+ debug = lua_toboolean(L, 2);
+ Matrix *utter = kaldi_feature_repo_read_utterance(repo, L, debug);
+ luaT_pushudata(L, utter, nerv_matrix_host_float_tname);
+ return 1;
+}
+
+static int feat_repo_next(lua_State *L) {
+ KaldiFeatureRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_feat_repo_tname);
+ kaldi_feature_repo_next(repo);
+ return 0;
+}
+
+static int feat_repo_is_end(lua_State *L) {
+ KaldiFeatureRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_feat_repo_tname);
+ lua_pushboolean(L, kaldi_feature_repo_is_end(repo));
+ return 1;
+}
+
+static const luaL_Reg feat_repo_methods[] = {
+ {"cur_utter", feat_repo_current_utterance},
+ {"next", feat_repo_next},
+ {"is_end", feat_repo_is_end},
+ {NULL, NULL}
+};
+
+static int label_repo_new(lua_State *L) {
+ const char *targets_rspecifier = luaL_checkstring(L, 1);
+ const char *fmt = luaL_checkstring(L, 2);
+ KaldiLabelRepo *repo = kaldi_label_repo_new(targets_rspecifier, fmt);
+ luaT_pushudata(L, repo, nerv_kaldi_label_repo_tname);
+ return 1;
+}
+
+static int label_repo_read_utterance(lua_State *L) {
+ KaldiLabelRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_label_repo_tname);
+ KaldiFeatureRepo *feat_repo = luaT_checkudata(L, 2, nerv_kaldi_feat_repo_tname);
+ int frm_ext, nframes, debug;
+ if (!lua_isnumber(L, 3))
+ nerv_error(L, "frm_ext should be a number");
+ frm_ext = lua_tonumber(L, 3);
+ if (!lua_isnumber(L, 4))
+ nerv_error(L, "nframes should be a number");
+ nframes = lua_tonumber(L, 4);
+ if (!lua_isboolean(L, 5))
+ nerv_error(L, "debug flag should be a boolean");
+ debug = lua_toboolean(L, 5);
+ Matrix *utter = kaldi_label_repo_read_utterance(repo, feat_repo, frm_ext, nframes, L, debug);
+ luaT_pushudata(L, utter, nerv_matrix_host_float_tname);
+ return 1;
+}
+
+static int label_repo_destroy(lua_State *L) {
+ KaldiLabelRepo *repo = luaT_checkudata(L, 1, nerv_kaldi_label_repo_tname);
+ kaldi_label_repo_destroy(repo);
+ return 0;
+}
+
+static const luaL_Reg label_repo_methods[] = {
+ {"get_utter", label_repo_read_utterance},
+ {NULL, NULL}
+};
+
+static void feat_repo_init(lua_State *L) {
+ luaT_newmetatable(L, nerv_kaldi_feat_repo_tname, NULL,
+ feat_repo_new, feat_repo_destroy, NULL);
+ luaL_register(L, NULL, feat_repo_methods);
+ lua_pop(L, 1);
+}
+
+static void label_repo_init(lua_State *L) {
+ luaT_newmetatable(L, nerv_kaldi_label_repo_tname, NULL,
+ label_repo_new, label_repo_destroy, NULL);
+ luaL_register(L, NULL, label_repo_methods);
+ lua_pop(L, 1);
+}
+
+void kaldi_io_init(lua_State *L) {
+ feat_repo_init(L);
+ label_repo_init(L);
+}
diff --git a/kaldi_io/src/test.c b/kaldi_io/src/test.c
new file mode 100644
index 0000000..e92b4c9
--- /dev/null
+++ b/kaldi_io/src/test.c
@@ -0,0 +1,48 @@
+/*********************************************************************************
+* File Name : test.c
+* Created By : YIMMON, [email protected]
+* Creation Date : [2015-08-05 17:39]
+* Last Modified : [2015-08-06 14:28]
+* Description :
+**********************************************************************************/
+
+#include "cwrapper_kaldi.h"
+#include <stdio.h>
+
+char feature_rspecifier[] = {"ark:/slfs6/users/ymz09/kaldi/src/featbin/copy-feats scp:/slfs6/users/ymz09/swb_ivec/train_bp.scp ark:- |"};
+
+void print_nerv_matrix(Matrix *mat) {
+ int n = mat->nrow;
+ int m = mat->ncol;
+ int i, j;
+ size_t stride = mat->stride;
+ for (i = 0; i < n; i++)
+ {
+ float *nerv_row = (float *)((char *)mat->data.f + i * stride);
+ for (j = 0; j < m; j++)
+ printf("%.8f ", nerv_row[j]);
+ puts("");
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ Matrix *mat;
+ KaldiFeatureRepo *repo = kaldi_feature_repo_new(feature_rspecifier);
+
+ mat = kaldi_feature_repo_read_utterance(repo, NULL, 1);
+ printf("1st uttrance\n");
+ print_nerv_matrix(mat);
+
+ kaldi_feature_repo_next(repo);
+
+ mat = kaldi_feature_repo_read_utterance(repo, NULL, 1);
+ printf("2nd uttrance\n");
+ print_nerv_matrix(mat);
+
+ printf("is end: %d\n", kaldi_feature_repo_is_end(repo));
+
+ kaldi_feature_repo_destroy(repo);
+
+ return 0;
+}
diff --git a/kaldi_io/tools/kaldi_to_nerv.cpp b/kaldi_io/tools/kaldi_to_nerv.cpp
new file mode 100644
index 0000000..1edb0f2
--- /dev/null
+++ b/kaldi_io/tools/kaldi_to_nerv.cpp
@@ -0,0 +1,109 @@
+#include <cstdio>
+#include <fstream>
+#include <string>
+#include <cstring>
+#include <cassert>
+
+char token[1024];
+char output[1024];
+double mat[4096][4096];
+int main(int argc, char **argv) {
+ std::ofstream fout;
+ fout.open(argv[1]);
+ int cnt = 0;
+ bool shift;
+ while (scanf("%s", token) != EOF)
+ {
+ int nrow, ncol;
+ int i, j;
+ if (strcmp(token, "<AffineTransform>") == 0)
+ {
+ double lrate, blrate, mnorm;
+ scanf("%d %d", &ncol, &nrow);
+ scanf("%s %lf %s %lf %s %lf",
+ token, &lrate, token, &blrate, token, &mnorm);
+ scanf("%s", token);
+ assert(*token == '[');
+ printf("%d %d\n", nrow, ncol);
+ for (j = 0; j < ncol; j++)
+ for (i = 0; i < nrow; i++)
+ scanf("%lf", mat[i] + j);
+ long base = fout.tellp();
+ sprintf(output, "%16d", 0);
+ fout << output;
+ sprintf(output, "{type=\"nerv.LinearTransParam\",id=\"affine%d_ltp\"}\n",
+ cnt);
+ fout << output;
+ sprintf(output, "%d %d\n", nrow, ncol);
+ fout << output;
+ for (i = 0; i < nrow; i++)
+ {
+ for (j = 0; j < ncol; j++)
+ fout << mat[i][j] << " ";
+ fout << std::endl;
+ }
+ long length = fout.tellp() - base;
+ fout.seekp(base);
+ sprintf(output, "[%13lu]\n", length);
+ fout << output;
+ fout.seekp(0, std::ios_base::end);
+ scanf("%s", token);
+ assert(*token == ']');
+ if (scanf("%s", token) == 1 && *token == '[')
+ {
+ base = fout.tellp();
+ for (j = 0; j < ncol; j++)
+ scanf("%lf", mat[0] + j);
+ sprintf(output, "%16d", 0);
+ fout << output;
+ sprintf(output, "{type=\"nerv.BiasParam\",id=\"affine%d_bp\"}\n",
+ cnt);
+ fout << output;
+ sprintf(output, "1 %d\n", ncol);
+ fout << output;
+ for (j = 0; j < ncol; j++)
+ fout << mat[0][j] << " ";
+ fout << std::endl;
+ length = fout.tellp() - base;
+ fout.seekp(base);
+ sprintf(output, "[%13lu]\n", length);
+ fout << output;
+ fout.seekp(0, std::ios_base::end);
+ cnt++;
+ }
+ }
+ else if ((shift = (strcmp(token, "<AddShift>") == 0)) ||
+ strcmp(token, "<Rescale>") == 0)
+ {
+ double lrate, blrate, mnorm;
+ scanf("%d %d", &ncol, &ncol);
+ scanf("%s %lf",
+ token, &lrate);
+ scanf("%s", token);
+ assert(*token == '[');
+ printf("%d\n", ncol);
+ for (j = 0; j < ncol; j++)
+ scanf("%lf", mat[0] + j);
+ long base = fout.tellp();
+ sprintf(output, "%16d", 0);
+ fout << output;
+ sprintf(output, "{type=\"nerv.BiasParam\",id=\"%s%d\"}\n",
+ shift ? "bias" : "window",
+ cnt);
+ fout << output;
+ sprintf(output, "%d %d\n", 1, ncol);
+ fout << output;
+ for (j = 0; j < ncol; j++)
+ fout << mat[0][j] << " ";
+ fout << std::endl;
+ long length = fout.tellp() - base;
+ fout.seekp(base);
+ sprintf(output, "[%13lu]\n", length);
+ fout << output;
+ fout.seekp(0, std::ios_base::end);
+ scanf("%s", token);
+ assert(*token == ']');
+ }
+ }
+ return 0;
+}
diff --git a/kaldi_io/tools/nerv_to_kaldi.lua b/kaldi_io/tools/nerv_to_kaldi.lua
new file mode 100644
index 0000000..804f09b
--- /dev/null
+++ b/kaldi_io/tools/nerv_to_kaldi.lua
@@ -0,0 +1,66 @@
+-- usage: nerv config_file nerv_param_input tnet_output
+
+dofile(arg[1])
+param_repo = nerv.ParamRepo()
+param_repo:import({arg[2], gconf.initialized_param[2]}, nil, gconf)
+layer_repo = make_layer_repo(param_repo)
+f = assert(io.open(arg[3], "w"))
+
+function print_tnet_matrix(cumat)
+ local strs = {}
+ collectgarbage()
+ if cumat:nrow() == 1 then
+ local mat = nerv.MMatrixFloat(1, cumat:ncol())
+ cumat:copy_toh(mat)
+ table.insert(strs, "[ ")
+ for j = 0, mat:ncol() - 1 do
+ table.insert(strs, string.format("%.8f ", mat[0][j]))
+ end
+ table.insert(strs, " ]\n")
+ f:write(table.concat(strs))
+ else
+ cumat = cumat:trans()
+ local mat = nerv.MMatrixFloat(cumat:nrow(), cumat:ncol())
+ cumat:copy_toh(mat)
+ table.insert(strs, string.format(" [\n", mat:nrow(), mat:ncol()))
+ for i = 0, mat:nrow() - 1 do
+ local row = mat[i]
+ for j = 0, mat:ncol() - 1 do
+ table.insert(strs, string.format("%.8f ", row[j]))
+ end
+ if i == mat:nrow() - 1 then
+ table.insert(strs, " ]\n")
+ else
+ table.insert(strs, "\n")
+ end
+ f:write(table.concat(strs))
+ strs = {}
+ end
+ end
+end
+local lnames = {"affine0", "sigmoid0",
+ "affine1", "sigmoid1",
+ "affine2", "sigmoid2",
+ "affine3", "sigmoid3",
+ "affine4", "sigmoid4",
+ "affine5", "sigmoid5",
+ "affine6", "ce_crit"}
+f:write("<Nnet>\n")
+for i, name in ipairs(lnames) do
+ local layer = layer_repo:get_layer(name)
+ local layer_type = layer.__typename
+ if layer_type == "nerv.AffineLayer" then
+ f:write(string.format("<AffineTransform> %d %d\n<LearnRateCoef> 1 <BiasLearnRateCoef> 1 <MaxNorm> 0",
+ layer.dim_out[1], layer.dim_in[1]))
+ print_tnet_matrix(layer.ltp.trans)
+ print_tnet_matrix(layer.bp.trans)
+ elseif layer_type == "nerv.SigmoidLayer" then
+ f:write(string.format("<Sigmoid> %d %d\n", layer.dim_out[1], layer.dim_in[1]))
+ elseif layer_type == "nerv.SoftmaxCELayer" then
+ f:write(string.format("<Softmax> %d %d\n", layer.dim_in[1], layer.dim_in[1]))
+ else
+ nerv.error("unknown layer type %s", layer_type)
+ end
+end
+f:write("</Nnet>\n")
+f:close()