summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTed Yin <[email protected]>2015-10-12 09:26:53 +0800
committerTed Yin <[email protected]>2015-10-12 09:26:53 +0800
commit0dba4c998fcccb4bae29582b7d8be94de476dd0b (patch)
treeb8529d4f0c2ea0a91ee4b7a4b21a14c0616fc081
parent7acd14eca701deaffb2d16262528da37ee23263a (diff)
parente39fb231f64ddc8b79a6eb5434f529aadb3165fe (diff)
Merge pull request #6 from yimmon/master
add kaldi_seq
-rw-r--r--kaldi_io/Makefile2
-rw-r--r--kaldi_io/example/swb_baseline.lua3
-rw-r--r--kaldi_io/example/swb_baseline_basic.lua157
-rw-r--r--kaldi_io/init.lua1
-rw-r--r--kaldi_io/kaldi.mk70
-rw-r--r--kaldi_seq/.valgrind0
-rw-r--r--kaldi_seq/Makefile47
-rw-r--r--kaldi_seq/init.c8
-rw-r--r--kaldi_seq/init.lua2
-rw-r--r--kaldi_seq/kaldi_seq-scm-1.rockspec36
-rw-r--r--kaldi_seq/layer/mmi.lua50
-rw-r--r--kaldi_seq/layer/mpe.lua52
-rw-r--r--kaldi_seq/src/init.c131
-rw-r--r--kaldi_seq/src/kaldi_mmi.cpp427
-rw-r--r--kaldi_seq/src/kaldi_mmi.h20
-rw-r--r--kaldi_seq/src/kaldi_mpe.cpp411
-rw-r--r--kaldi_seq/src/kaldi_mpe.h21
-rw-r--r--kaldi_seq/tools/net_kaldi2nerv.cpp85
-rw-r--r--kaldi_seq/tools/transf_kaldi2nerv.cpp106
19 files changed, 1400 insertions, 229 deletions
diff --git a/kaldi_io/Makefile b/kaldi_io/Makefile
index 1066fc5..7b0c0bd 100644
--- a/kaldi_io/Makefile
+++ b/kaldi_io/Makefile
@@ -1,5 +1,5 @@
# Change KDIR to `kaldi-trunk' path (Kaldi must be compiled with --share)
-KDIR := /home/stuymf/kaldi-trunk/
+KDIR := /slfs6/users/ymz09/kaldi/
SHELL := /bin/bash
BUILD_DIR := $(CURDIR)/build
diff --git a/kaldi_io/example/swb_baseline.lua b/kaldi_io/example/swb_baseline.lua
index 8b1e122..3ef6c65 100644
--- a/kaldi_io/example/swb_baseline.lua
+++ b/kaldi_io/example/swb_baseline.lua
@@ -173,7 +173,8 @@ function make_buffer(readers)
end
function get_input_order()
- return {"main_scp", "phone_state"}
+ return {{id = "main_scp", global_transf = true},
+ {id = "phone_state"}}
end
function get_accuracy(layer_repo)
diff --git a/kaldi_io/example/swb_baseline_basic.lua b/kaldi_io/example/swb_baseline_basic.lua
deleted file mode 100644
index e6c8145..0000000
--- a/kaldi_io/example/swb_baseline_basic.lua
+++ /dev/null
@@ -1,157 +0,0 @@
-require 'kaldi_io'
-gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9,
- cumat_type = nerv.CuMatrixFloat,
- mmat_type = nerv.MMatrixFloat,
- frm_ext = 5,
- tr_rspecifier = "ark:/slfs6/users/ymz09/kaldi/src/featbin/copy-feats scp:/slfs6/users/ymz09/swb_ivec/train_bp.scp ark:- |",
- cv_rspecifier = "ark:/slfs6/users/ymz09/kaldi/src/featbin/copy-feats scp:/slfs6/users/ymz09/swb_ivec/train_cv.scp ark:- |",
- initialized_param = {"/slfs6/users/ymz09/swb_ivec/swb_init.nerv",
- "/slfs6/users/ymz09/swb_ivec/swb_global_transf.nerv"},
- debug = false}
-
-function make_sublayer_repo(param_repo)
- return nerv.LayerRepo(
- {
- -- global transf
- ["nerv.BiasLayer"] =
- {
- blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}},
- blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}}
- },
- ["nerv.WindowLayer"] =
- {
- wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}},
- wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}}
- },
- -- biased linearity
- ["nerv.AffineLayer"] =
- {
- affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"},
- {dim_in = {429}, dim_out = {2048}}},
- affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"},
- {dim_in = {2048}, dim_out = {3001}}}
- },
- ["nerv.SigmoidLayer"] =
- {
- sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}}
- },
- ["nerv.SoftmaxCELayer"] =
- {
- ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}}
- }
- }, param_repo, gconf)
-end
-
-function make_layer_repo(sublayer_repo, param_repo)
- return nerv.LayerRepo(
- {
- ["nerv.DAGLayer"] =
- {
- global_transf = {{}, {
- dim_in = {429}, dim_out = {429},
- sub_layers = sublayer_repo,
- connections = {
- ["<input>[1]"] = "blayer1[1]",
- ["blayer1[1]"] = "wlayer1[1]",
- ["wlayer1[1]"] = "blayer2[1]",
- ["blayer2[1]"] = "wlayer2[1]",
- ["wlayer2[1]"] = "<output>[1]"
- }
- }},
- main = {{}, {
- dim_in = {429, 1}, dim_out = {1},
- sub_layers = sublayer_repo,
- connections = {
- ["<input>[1]"] = "affine0[1]",
- ["affine0[1]"] = "sigmoid0[1]",
- ["sigmoid0[1]"] = "affine1[1]",
- ["affine1[1]"] = "sigmoid1[1]",
- ["sigmoid1[1]"] = "affine2[1]",
- ["affine2[1]"] = "sigmoid2[1]",
- ["sigmoid2[1]"] = "affine3[1]",
- ["affine3[1]"] = "sigmoid3[1]",
- ["sigmoid3[1]"] = "affine4[1]",
- ["affine4[1]"] = "sigmoid4[1]",
- ["sigmoid4[1]"] = "affine5[1]",
- ["affine5[1]"] = "sigmoid5[1]",
- ["sigmoid5[1]"] = "affine6[1]",
- ["affine6[1]"] = "sigmoid6[1]",
- ["sigmoid6[1]"] = "affine7[1]",
- ["affine7[1]"] = "ce_crit[1]",
- ["<input>[2]"] = "ce_crit[2]",
- ["ce_crit[1]"] = "<output>[1]"
- }
- }}
- }
- }, param_repo, gconf)
-end
-
-function get_network(layer_repo)
- return layer_repo:get_layer("main")
-end
-
-function make_readers(feature_rspecifier, layer_repo)
- return {
- {reader = nerv.KaldiReader(gconf,
- {
- id = "main_scp",
- feature_rspecifier = feature_rspecifier,
- frm_ext = gconf.frm_ext,
- mlfs = {
- phone_state = {
- targets_rspecifier = "ark:/slfs6/users/ymz09/kaldi/src/bin/ali-to-pdf /slfs6/users/ymz09/swb_ivec/final.mdl \"ark:gunzip -c /slfs6/users/ymz09/swb_ivec/ali.*.gz |\" ark:- | /slfs6/users/ymz09/kaldi/src/bin/ali-to-post ark:- ark:- |",
- format = "map"
- }
- },
- global_transf = layer_repo:get_layer("global_transf")
- }),
- data = {main_scp = 429, phone_state = 1}}
- }
-end
-
-function make_buffer(readers)
- return nerv.SGDBuffer(gconf,
- {
- buffer_size = gconf.buffer_size,
- randomize = gconf.randomize,
- readers = readers
- })
-end
-
-function get_input_order()
- return {"main_scp", "phone_state"}
-end
-
-function get_accuracy(sublayer_repo)
- local ce_crit = sublayer_repo:get_layer("ce_crit")
- return ce_crit.total_correct / ce_crit.total_frames * 100
-end
-
-function print_stat(sublayer_repo)
- local ce_crit = sublayer_repo:get_layer("ce_crit")
- nerv.info("*** training stat begin ***")
- nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce)
- nerv.printf("correct:\t\t%d\n", ce_crit.total_correct)
- nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames)
- nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames)
- nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(sublayer_repo))
- nerv.info("*** training stat end ***")
-end
diff --git a/kaldi_io/init.lua b/kaldi_io/init.lua
index 3fc5b10..b7e6da8 100644
--- a/kaldi_io/init.lua
+++ b/kaldi_io/init.lua
@@ -66,6 +66,7 @@ function KaldiReader:get_data()
rearranged:copy_toh(feat_utter)
end
res[self.feat_id] = feat_utter
+ res["key"] = self.feat_repo:key()
-- add corresponding labels
for id, repo in pairs(self.lab_repo) do
local lab_utter = repo:get_utter(self.feat_repo,
diff --git a/kaldi_io/kaldi.mk b/kaldi_io/kaldi.mk
deleted file mode 100644
index 4a397f0..0000000
--- a/kaldi_io/kaldi.mk
+++ /dev/null
@@ -1,70 +0,0 @@
-# This file was generated using the following command:
-# ./configure
-
-# Rules that enable valgrind debugging ("make valgrind")
-
-valgrind: .valgrind
-
-.valgrind:
- echo -n > valgrind.out
- for x in $(TESTFILES); do echo $$x>>valgrind.out; valgrind ./$$x >/dev/null 2>> valgrind.out; done
- ! ( grep 'ERROR SUMMARY' valgrind.out | grep -v '0 errors' )
- ! ( grep 'definitely lost' valgrind.out | grep -v -w 0 )
- rm valgrind.out
- touch .valgrind
-
-
-CONFIGURE_VERSION := 2
-OPENFSTLIBS = -L/slwork/users/wd007/src/kaldi/tools/openfst/lib -lfst
-OPENFSTLDFLAGS = -Wl,-rpath=/slwork/users/wd007/src/kaldi/tools/openfst/lib
-FSTROOT = /slwork/users/wd007/src/kaldi/tools/openfst
-ATLASINC = /slwork/users/wd007/src/kaldi/tools/ATLAS/include
-ATLASLIBS = -L/usr/lib -llapack -lcblas -latlas -lf77blas
-# You have to make sure ATLASLIBS is set...
-
-ifndef FSTROOT
-$(error FSTROOT not defined.)
-endif
-
-ifndef ATLASINC
-$(error ATLASINC not defined.)
-endif
-
-ifndef ATLASLIBS
-$(error ATLASLIBS not defined.)
-endif
-
-
-CXXFLAGS = -msse -msse2 -Wall -I.. \
- -fPIC \
- -DKALDI_DOUBLEPRECISION=0 -DHAVE_POSIX_MEMALIGN \
- -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \
- -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \
- -DHAVE_ATLAS -I$(ATLASINC) \
- -I$(FSTROOT)/include \
- $(EXTRA_CXXFLAGS) \
- -g # -O0 -DKALDI_PARANOID
-
-ifeq ($(KALDI_FLAVOR), dynamic)
-CXXFLAGS += -fPIC
-endif
-
-LDFLAGS = -rdynamic $(OPENFSTLDFLAGS)
-LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl
-CC = g++
-CXX = g++
-AR = ar
-AS = as
-RANLIB = ranlib
-
-#Next section enables CUDA for compilation
-CUDA = true
-CUDATKDIR = /usr/local/cuda
-
-CUDA_INCLUDE= -I$(CUDATKDIR)/include
-CUDA_FLAGS = -g -Xcompiler -fPIC --verbose --machine 64 -DHAVE_CUDA
-
-CXXFLAGS += -DHAVE_CUDA -I$(CUDATKDIR)/include
-CUDA_LDFLAGS += -L$(CUDATKDIR)/lib64 -Wl,-rpath,$(CUDATKDIR)/lib64
-CUDA_LDLIBS += -lcublas -lcudart #LDLIBS : The libs are loaded later than static libs in implicit rule
-
diff --git a/kaldi_seq/.valgrind b/kaldi_seq/.valgrind
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/kaldi_seq/.valgrind
diff --git a/kaldi_seq/Makefile b/kaldi_seq/Makefile
new file mode 100644
index 0000000..e76eea8
--- /dev/null
+++ b/kaldi_seq/Makefile
@@ -0,0 +1,47 @@
+# Change KDIR to `kaldi-trunk' path (Kaldi must be compiled with --share)
+KDIR := /slfs6/users/ymz09/kaldi/
+
+SHELL := /bin/bash
+BUILD_DIR := $(CURDIR)/build
+INC_PATH := $(LUA_BINDIR)/../include/
+OBJS := init.o src/kaldi_mpe.o src/kaldi_mmi.o src/init.o
+LIBS := libkaldiseq.so
+LUA_LIBS := init.lua layer/mpe.lua layer/mmi.lua
+INCLUDE := -I $(LUA_INCDIR) -I $(INC_PATH) -DLUA_USE_APICHECK
+
+SUBDIR := src layer
+OBJ_DIR := $(BUILD_DIR)/objs
+LUA_DIR := $(INST_LUADIR)/kaldi_seq
+KALDIINCLUDE := -I $(KDIR)/tools/ATLAS/include/ -I $(KDIR)/tools/openfst/include/ -I $(KDIR)/src/
+
+OBJS := $(addprefix $(OBJ_DIR)/,$(OBJS))
+LIBS := $(addprefix $(INST_LIBDIR)/,$(LIBS))
+OBJ_SUBDIR := $(addprefix $(OBJ_DIR)/,$(SUBDIR))
+LUA_SUBDIR := $(addprefix $(LUA_DIR)/,$(SUBDIR))
+LUA_LIBS := $(addprefix $(LUA_DIR)/,$(LUA_LIBS))
+LIB_PATH := $(LUA_BINDIR)/../lib
+
+build: $(OBJ_DIR) $(OBJ_SUBDIR) $(OBJS)
+install: $(LUA_DIR) $(LUA_SUBDIR) $(LUA_LIBS) $(LIBS)
+
+include $(KDIR)/src/kaldi.mk
+
+KL1 := -rdynamic -Wl,-rpath=$(KDIR)/tools/openfst/lib -L/usr/local/cuda/lib64 -Wl,-rpath,/usr/local/cuda/lib64 -Wl,-rpath=$(KDIR)/src/lib -L. -L$(KDIR)/src/nnet/ -L$(KDIR)/src/cudamatrix/ -L$(KDIR)/src/lat/ -L$(KDIR)/src/hmm/ -L$(KDIR)/src/tree/ -L$(KDIR)/src/matrix/ -L$(KDIR)/src/util/ -L$(KDIR)/src/base/ $(KDIR)/src/nnet//libkaldi-nnet.so $(KDIR)/src/cudamatrix//libkaldi-cudamatrix.so $(KDIR)/src/lat//libkaldi-lat.so $(KDIR)/src/hmm//libkaldi-hmm.so $(KDIR)/src/tree//libkaldi-tree.so $(KDIR)/src/matrix//libkaldi-matrix.so $(KDIR)/src/util//libkaldi-util.so $(KDIR)/src/base//libkaldi-base.so -L$(KDIR)/tools/openfst/lib -lfst /usr/lib/liblapack.so /usr/lib/libcblas.so /usr/lib/libatlas.so /usr/lib/libf77blas.so -lm -lpthread -ldl -lcublas -lcudart -lkaldi-nnet -lkaldi-cudamatrix -lkaldi-lat -lkaldi-hmm -lkaldi-tree -lkaldi-matrix -lkaldi-util -lkaldi-base
+
+KL2 := -msse -msse2 -Wall -pthread -DKALDI_DOUBLEPRECISION=0 -DHAVE_POSIX_MEMALIGN -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H -DHAVE_ATLAS -I$(KDIR)/tools/ATLAS/include -I$(KDIR)/tools/openfst/include -Wno-sign-compare -g -fPIC -I/usr/local/cuda/include -L/usr/local/cuda/lib64 -DKALDI_NO_EXPF
+
+$(OBJ_DIR) $(LUA_DIR) $(OBJ_SUBDIR) $(LUA_SUBDIR):
+ -mkdir -p $@
+$(LUA_DIR)/%.lua: %.lua
+ cp $< $@
+$(LIBS): $(OBJ_DIR)/src/kaldi_mpe.o $(OBJ_DIR)/src/kaldi_mmi.o $(OBJ_DIR)/init.o $(OBJ_DIR)/src/init.o
+ gcc -shared -fPIC -o $@ $(OBJ_DIR)/src/kaldi_mpe.o $(OBJ_DIR)/src/kaldi_mmi.o $(OBJ_DIR)/init.o $(OBJ_DIR)/src/init.o -lstdc++ -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -lluaT $(KL1)
+$(OBJ_DIR)/src/kaldi_mpe.o: src/kaldi_mpe.cpp
+ g++ -o $@ -c $< $(KALDIINCLUDE) -g -fPIC $(INCLUDE) $(KL2)
+$(OBJ_DIR)/src/kaldi_mmi.o: src/kaldi_mmi.cpp
+ g++ -o $@ -c $< $(KALDIINCLUDE) -g -fPIC $(INCLUDE) $(KL2)
+$(OBJ_DIR)/%.o: %.c
+ gcc -o $@ -c $< -g $(INCLUDE) -fPIC
+clean:
+ -rm $(OBJ_DIR)/src/*.o
+
diff --git a/kaldi_seq/init.c b/kaldi_seq/init.c
new file mode 100644
index 0000000..ed89473
--- /dev/null
+++ b/kaldi_seq/init.c
@@ -0,0 +1,8 @@
+#include "../nerv/common.h"
+#include <stdio.h>
+
+extern void kaldi_seq_init(lua_State *L);
+int luaopen_libkaldiseq(lua_State *L) {
+ kaldi_seq_init(L);
+ return 1;
+}
diff --git a/kaldi_seq/init.lua b/kaldi_seq/init.lua
new file mode 100644
index 0000000..39f4cb3
--- /dev/null
+++ b/kaldi_seq/init.lua
@@ -0,0 +1,2 @@
+nerv.include('layer/mpe.lua')
+nerv.include('layer/mmi.lua')
diff --git a/kaldi_seq/kaldi_seq-scm-1.rockspec b/kaldi_seq/kaldi_seq-scm-1.rockspec
new file mode 100644
index 0000000..41e34f0
--- /dev/null
+++ b/kaldi_seq/kaldi_seq-scm-1.rockspec
@@ -0,0 +1,36 @@
+package = "kaldi_seq"
+version = "scm-1"
+source = {
+ url = "https://github.com/Nerv-SJTU/nerv-speech.git"
+}
+description = {
+ summary = "Kaldi sequence training support for Nerv",
+ detailed = [[
+ ]],
+ homepage = "https://github.com/Nerv-SJTU/nerv-speech",
+ license = "BSD"
+}
+dependencies = {
+ "nerv >= scm-1",
+ "lua >= 5.1"
+}
+build = {
+ type = "make",
+ build_variables = {
+ CFLAGS="$(CFLAGS)",
+ LIBFLAG="$(LIBFLAG)",
+ LUA_LIBDIR="$(LUA_LIBDIR)",
+ LUA_BINDIR="$(LUA_BINDIR)",
+ LUA_INCDIR="$(LUA_INCDIR)",
+ INST_PREFIX="$(PREFIX)",
+ LUA="$(LUA)",
+ },
+ install_variables = {
+ LUA_BINDIR="$(LUA_BINDIR)",
+ INST_PREFIX="$(PREFIX)",
+ INST_BINDIR="$(BINDIR)",
+ INST_LIBDIR="$(LIBDIR)",
+ INST_LUADIR="$(LUADIR)",
+ INST_CONFDIR="$(CONFDIR)",
+ },
+}
diff --git a/kaldi_seq/layer/mmi.lua b/kaldi_seq/layer/mmi.lua
new file mode 100644
index 0000000..ecc7f48
--- /dev/null
+++ b/kaldi_seq/layer/mmi.lua
@@ -0,0 +1,50 @@
+require 'libkaldiseq'
+local MMILayer = nerv.class("nerv.MMILayer", "nerv.Layer")
+
+function MMILayer:__init(id, global_conf, layer_conf)
+ self.id = id
+ self.gconf = global_conf
+ self.dim_in = layer_conf.dim_in
+ self.dim_out = layer_conf.dim_out
+ self.arg = layer_conf.cmd.arg
+ self.mdl = layer_conf.cmd.mdl
+ self.lat = layer_conf.cmd.lat
+ self.ali = layer_conf.cmd.ali
+ self:check_dim_len(2, -1) -- two inputs: nn output and utt key
+end
+
+function MMILayer:init(batch_size)
+ self.total_frames = 0
+ self.kaldi_mmi = nerv.KaldiMMI(self.arg, self.mdl, self.lat, self.ali)
+ if self.kaldi_mmi == nil then
+ nerv.error("kaldi arguments is expected: %s %s %s %s", self.arg,
+ self.mdl, self.lat, self.ali)
+ end
+end
+
+function MMILayer:batch_resize(batch_size)
+ -- do nothing
+end
+
+function MMILayer:update(bp_err, input, output)
+ -- no params, therefore do nothing
+end
+
+function MMILayer:propagate(input, output)
+ self.valid = false
+ self.valid = self.kaldi_mmi:check(input[1], input[2])
+ return self.valid
+end
+
+function MMILayer:back_propagate(bp_err, next_bp_err, input, output)
+ if self.valid ~= true then
+ nerv.error("kaldi sequence training back_propagate fail")
+ end
+ local mmat = input[1]:new_to_host()
+ next_bp_err[1]:copy_fromh(self.kaldi_mmi:calc_diff(mmat, input[2]))
+ self.total_frames = self.total_frames + self.kaldi_mmi:get_num_frames()
+end
+
+function MMILayer:get_params()
+ return nerv.ParamRepo({})
+end
diff --git a/kaldi_seq/layer/mpe.lua b/kaldi_seq/layer/mpe.lua
new file mode 100644
index 0000000..ec8a8f3
--- /dev/null
+++ b/kaldi_seq/layer/mpe.lua
@@ -0,0 +1,52 @@
+require 'libkaldiseq'
+local MPELayer = nerv.class("nerv.MPELayer", "nerv.Layer")
+
+function MPELayer:__init(id, global_conf, layer_conf)
+ self.id = id
+ self.gconf = global_conf
+ self.dim_in = layer_conf.dim_in
+ self.dim_out = layer_conf.dim_out
+ self.arg = layer_conf.cmd.arg
+ self.mdl = layer_conf.cmd.mdl
+ self.lat = layer_conf.cmd.lat
+ self.ali = layer_conf.cmd.ali
+ self:check_dim_len(2, -1) -- two inputs: nn output and utt key
+end
+
+function MPELayer:init(batch_size)
+ self.total_correct = 0
+ self.total_frames = 0
+ self.kaldi_mpe = nerv.KaldiMPE(self.arg, self.mdl, self.lat, self.ali)
+ if self.kaldi_mpe == nil then
+ nerv.error("kaldi arguments is expected: %s %s %s %s", self.arg,
+ self.mdl, self.lat, self.ali)
+ end
+end
+
+function MPELayer:batch_resize(batch_size)
+ -- do nothing
+end
+
+function MPELayer:update(bp_err, input, output)
+ -- no params, therefore do nothing
+end
+
+function MPELayer:propagate(input, output)
+ self.valid = false
+ self.valid = self.kaldi_mpe:check(input[1], input[2])
+ return self.valid
+end
+
+function MPELayer:back_propagate(bp_err, next_bp_err, input, output)
+ if self.valid ~= true then
+ nerv.error("kaldi sequence training back_propagate fail")
+ end
+ local mmat = input[1]:new_to_host()
+ next_bp_err[1]:copy_fromh(self.kaldi_mpe:calc_diff(mmat, input[2]))
+ self.total_frames = self.total_frames + self.kaldi_mpe:get_num_frames()
+ self.total_correct = self.total_correct + self.kaldi_mpe:get_utt_frame_acc()
+end
+
+function MPELayer:get_params()
+ return nerv.ParamRepo({})
+end
diff --git a/kaldi_seq/src/init.c b/kaldi_seq/src/init.c
new file mode 100644
index 0000000..9b38056
--- /dev/null
+++ b/kaldi_seq/src/init.c
@@ -0,0 +1,131 @@
+#include "nerv/common.h"
+#include "kaldi_mpe.h"
+#include "kaldi_mmi.h"
+#include <stdio.h>
+
+const char *nerv_kaldi_mpe_tname = "nerv.KaldiMPE";
+const char *nerv_kaldi_mmi_tname = "nerv.KaldiMMI";
+const char *nerv_matrix_cuda_float_tname = "nerv.CuMatrixFloat";
+const char *nerv_matrix_host_float_tname = "nerv.MMatrixFloat";
+
+static int mpe_new(lua_State *L) {
+ const char *arg = luaL_checkstring(L, 1);
+ const char *mdl = luaL_checkstring(L, 2);
+ const char *lat = luaL_checkstring(L, 3);
+ const char *ali = luaL_checkstring(L, 4);
+ KaldiMPE *mpe = new_KaldiMPE(arg, mdl, lat, ali);
+ luaT_pushudata(L, mpe, nerv_kaldi_mpe_tname);
+ return 1;
+}
+
+static int mpe_destroy(lua_State *L) {
+ KaldiMPE *mpe = luaT_checkudata(L, 1, nerv_kaldi_mpe_tname);
+ destroy_KaldiMPE(mpe);
+ return 0;
+}
+
+static int mpe_check(lua_State *L) {
+ KaldiMPE *mpe = luaT_checkudata(L, 1, nerv_kaldi_mpe_tname);
+ const Matrix *cumat = luaT_checkudata(L, 2, nerv_matrix_cuda_float_tname);
+ const char *utt = luaL_checkstring(L, 3);
+
+ lua_pushboolean(L, check_mpe(mpe, cumat, utt));
+ return 1;
+}
+
+static int mpe_calc_diff(lua_State *L) {
+ KaldiMPE *mpe = luaT_checkudata(L, 1, nerv_kaldi_mpe_tname);
+ Matrix *mat = luaT_checkudata(L, 2, nerv_matrix_host_float_tname);
+ const char *utt = luaL_checkstring(L, 3);
+
+ Matrix *diff = calc_diff_mpe(mpe, mat, utt);
+ luaT_pushudata(L, diff, nerv_matrix_host_float_tname);
+ return 1;
+}
+
+static int mpe_get_num_frames(lua_State *L) {
+ KaldiMPE *mpe = luaT_checkudata(L, 1, nerv_kaldi_mpe_tname);
+ lua_pushnumber(L, get_num_frames_mpe(mpe));
+ return 1;
+}
+
+static int mpe_get_utt_frame_acc(lua_State *L) {
+ KaldiMPE *mpe = luaT_checkudata(L, 1, nerv_kaldi_mpe_tname);
+ lua_pushnumber(L, get_utt_frame_acc_mpe(mpe));
+ return 1;
+}
+
+static const luaL_Reg mpe_methods[] = {
+ {"check", mpe_check},
+ {"calc_diff", mpe_calc_diff},
+ {"get_num_frames", mpe_get_num_frames},
+ {"get_utt_frame_acc", mpe_get_utt_frame_acc},
+ {NULL, NULL}
+};
+
+static void mpe_init(lua_State *L) {
+ luaT_newmetatable(L, nerv_kaldi_mpe_tname, NULL,
+ mpe_new, mpe_destroy, NULL);
+ luaL_register(L, NULL, mpe_methods);
+ lua_pop(L, 1);
+}
+
+static int mmi_new(lua_State *L) {
+ const char *arg = luaL_checkstring(L, 1);
+ const char *mdl = luaL_checkstring(L, 2);
+ const char *lat = luaL_checkstring(L, 3);
+ const char *ali = luaL_checkstring(L, 4);
+ KaldiMMI *mmi = new_KaldiMMI(arg, mdl, lat, ali);
+ luaT_pushudata(L, mmi, nerv_kaldi_mmi_tname);
+ return 1;
+}
+
+static int mmi_destroy(lua_State *L) {
+ KaldiMMI *mmi = luaT_checkudata(L, 1, nerv_kaldi_mmi_tname);
+ destroy_KaldiMMI(mmi);
+ return 0;
+}
+
+static int mmi_check(lua_State *L) {
+ KaldiMMI *mmi = luaT_checkudata(L, 1, nerv_kaldi_mmi_tname);
+ const Matrix *cumat = luaT_checkudata(L, 2, nerv_matrix_cuda_float_tname);
+ const char *utt = luaL_checkstring(L, 3);
+
+ lua_pushboolean(L, check_mmi(mmi, cumat, utt));
+ return 1;
+}
+
+static int mmi_calc_diff(lua_State *L) {
+ KaldiMMI *mmi = luaT_checkudata(L, 1, nerv_kaldi_mmi_tname);
+ Matrix *mat = luaT_checkudata(L, 2, nerv_matrix_host_float_tname);
+ const char *utt = luaL_checkstring(L, 3);
+
+ Matrix *diff = calc_diff_mmi(mmi, mat, utt);
+ luaT_pushudata(L, diff, nerv_matrix_host_float_tname);
+ return 1;
+}
+
+static int mmi_get_num_frames(lua_State *L) {
+ KaldiMMI *mmi = luaT_checkudata(L, 1, nerv_kaldi_mmi_tname);
+ lua_pushnumber(L, get_num_frames_mmi(mmi));
+ return 1;
+}
+
+static const luaL_Reg mmi_methods[] = {
+ {"check", mmi_check},
+ {"calc_diff", mmi_calc_diff},
+ {"get_num_frames", mmi_get_num_frames},
+ {NULL, NULL}
+};
+
+static void mmi_init(lua_State *L) {
+ luaT_newmetatable(L, nerv_kaldi_mmi_tname, NULL,
+ mmi_new, mmi_destroy, NULL);
+ luaL_register(L, NULL, mmi_methods);
+ lua_pop(L, 1);
+}
+
+void kaldi_seq_init(lua_State *L) {
+ mpe_init(L);
+ mmi_init(L);
+}
diff --git a/kaldi_seq/src/kaldi_mmi.cpp b/kaldi_seq/src/kaldi_mmi.cpp
new file mode 100644
index 0000000..ea9b4f1
--- /dev/null
+++ b/kaldi_seq/src/kaldi_mmi.cpp
@@ -0,0 +1,427 @@
+#include <string>
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "tree/context-dep.h"
+#include "hmm/transition-model.h"
+#include "fstext/fstext-lib.h"
+#include "decoder/faster-decoder.h"
+#include "decoder/decodable-matrix.h"
+#include "lat/kaldi-lattice.h"
+#include "lat/lattice-functions.h"
+
+#include "nnet/nnet-trnopts.h"
+#include "nnet/nnet-component.h"
+#include "nnet/nnet-activation.h"
+#include "nnet/nnet-nnet.h"
+#include "nnet/nnet-pdf-prior.h"
+#include "nnet/nnet-utils.h"
+#include "base/timer.h"
+#include "cudamatrix/cu-device.h"
+
+#include <iomanip>
+
+typedef kaldi::BaseFloat BaseFloat;
+typedef struct Matrix NervMatrix;
+
+namespace kaldi{
+ namespace nnet1{
+ void LatticeAcousticRescore(const kaldi::Matrix<BaseFloat> &log_like,
+ const TransitionModel &trans_model,
+ const std::vector<int32> &state_times,
+ Lattice *lat);
+ }
+}
+
+extern "C" {
+#include "kaldi_mmi.h"
+#include "string.h"
+#include "assert.h"
+#include "nerv/common.h"
+
+ extern NervMatrix *nerv_matrix_host_float_create(long nrow, long ncol, Status *status);
+ extern void nerv_matrix_host_float_copy_fromd(NervMatrix *mat, const NervMatrix *cumat, int, int, int, Status *);
+ using namespace kaldi;
+ using namespace kaldi::nnet1;
+ typedef kaldi::int32 int32;
+
+ struct KaldiMMI {
+ TransitionModel *trans_model;
+ RandomAccessLatticeReader *den_lat_reader;
+ RandomAccessInt32VectorReader *ref_ali_reader;
+
+ Lattice den_lat;
+ vector<int32> state_times;
+
+ PdfPriorOptions *prior_opts;
+ PdfPrior *log_prior;
+
+ std::vector<int32> ref_ali;
+
+ Timer *time;
+ double time_now;
+
+ int32 num_done, num_no_ref_ali, num_no_den_lat, num_other_error;
+ int32 num_frm_drop;
+
+ kaldi::int64 total_frames;
+ double lat_like; // total likelihood of the lattice
+ double lat_ac_like; // acoustic likelihood weighted by posterior.
+ double total_mmi_obj, mmi_obj;
+ double total_post_on_ali, post_on_ali;
+
+ int32 num_frames;
+
+ bool binary;
+ BaseFloat acoustic_scale, lm_scale, old_acoustic_scale;
+ kaldi::int32 max_frames;
+ bool drop_frames;
+ std::string use_gpu;
+ };
+
+ KaldiMMI * new_KaldiMMI(const char* arg, const char* mdl, const char* lat, const char* ali)
+ {
+ KaldiMMI * mmi = new KaldiMMI;
+
+ const char *usage =
+ "Perform one iteration of DNN-MMI training by stochastic "
+ "gradient descent.\n"
+ "The network weights are updated on each utterance.\n"
+ "Usage: nnet-train-mmi-sequential [options] <model-in> <transition-model-in> "
+ "<feature-rspecifier> <den-lat-rspecifier> <ali-rspecifier> [<model-out>]\n"
+ "e.g.: \n"
+ " nnet-train-mmi-sequential nnet.init trans.mdl scp:train.scp scp:denlats.scp ark:train.ali "
+ "nnet.iter1\n";
+
+ ParseOptions po(usage);
+
+ NnetTrainOptions trn_opts; trn_opts.learn_rate=0.00001;
+ trn_opts.Register(&po);
+
+ mmi->binary = true;
+ po.Register("binary", &(mmi->binary), "Write output in binary mode");
+
+ std::string feature_transform;
+ po.Register("feature-transform", &feature_transform,
+ "Feature transform in Nnet format");
+
+ mmi->prior_opts = new PdfPriorOptions;
+ PdfPriorOptions &prior_opts = *(mmi->prior_opts);
+ prior_opts.Register(&po);
+
+ mmi->acoustic_scale = 1.0,
+ mmi->lm_scale = 1.0,
+ mmi->old_acoustic_scale = 0.0;
+ po.Register("acoustic-scale", &(mmi->acoustic_scale),
+ "Scaling factor for acoustic likelihoods");
+ po.Register("lm-scale", &(mmi->lm_scale),
+ "Scaling factor for \"graph costs\" (including LM costs)");
+ po.Register("old-acoustic-scale", &(mmi->old_acoustic_scale),
+ "Add in the scores in the input lattices with this scale, rather "
+ "than discarding them.");
+ mmi->max_frames = 6000; // Allow segments maximum of one minute by default
+ po.Register("max-frames",&(mmi->max_frames), "Maximum number of frames a segment can have to be processed");
+
+ mmi->drop_frames = true;
+ po.Register("drop-frames", &(mmi->drop_frames),
+ "Drop frames, where is zero den-posterior under numerator path "
+ "(ie. path not in lattice)");
+
+ mmi->use_gpu=std::string("yes");
+ po.Register("use-gpu", &(mmi->use_gpu), "yes|no|optional, only has effect if compiled with CUDA");
+
+ int narg = 0;
+ char args[64][1024];
+ char *token;
+ char *saveptr = NULL;
+ char tmpstr[1024];
+
+ strcpy(tmpstr, arg);
+ strcpy(args[0], "nnet-train-mmi-sequential");
+ for(narg = 1, token = strtok_r(tmpstr, " ", &saveptr); token; token = strtok_r(NULL, " ", &saveptr))
+ strcpy(args[narg++], token);
+ strcpy(args[narg++], "0.nnet");
+ strcpy(args[narg++], mdl);
+ strcpy(args[narg++], "feat");
+ strcpy(args[narg++], lat);
+ strcpy(args[narg++], ali);
+ strcpy(args[narg++], "1.nnet");
+
+ char **argsv = new char*[narg];
+ for(int _i = 0; _i < narg; _i++)
+ argsv[_i] = args[_i];
+
+ po.Read(narg, argsv);
+ delete [] argsv;
+
+ if (po.NumArgs() != 6) {
+ po.PrintUsage();
+ exit(1);
+ }
+
+ std::string transition_model_filename = po.GetArg(2),
+ den_lat_rspecifier = po.GetArg(4),
+ ref_ali_rspecifier = po.GetArg(5);
+
+ // Select the GPU
+#if HAVE_CUDA == 1
+ CuDevice::Instantiate().SelectGpuId(mmi->use_gpu);
+#endif
+
+ // Read the class-frame-counts, compute priors
+ mmi->log_prior = new PdfPrior(prior_opts);
+
+ // Read transition model
+ mmi->trans_model = new TransitionModel;
+ ReadKaldiObject(transition_model_filename, mmi->trans_model);
+
+ mmi->den_lat_reader = new RandomAccessLatticeReader(den_lat_rspecifier);
+ mmi->ref_ali_reader = new RandomAccessInt32VectorReader(ref_ali_rspecifier);
+
+ if (mmi->drop_frames) {
+ KALDI_LOG << "--drop-frames=true :"
+ " we will zero gradient for frames with total den/num mismatch."
+ " The mismatch is likely to be caused by missing correct path "
+ " from den-lattice due wrong annotation or search error."
+ " Leaving such frames out stabilizes the training.";
+ }
+
+ mmi->time = new Timer;
+ mmi->time_now = 0;
+ mmi->num_done =0;
+ mmi->num_no_ref_ali = 0;
+ mmi->num_no_den_lat = 0;
+ mmi->num_other_error = 0;
+ mmi->total_frames = 0;
+ mmi->num_frm_drop = 0;
+
+ mmi->total_mmi_obj = 0.0, mmi->mmi_obj = 0.0;
+ mmi->total_post_on_ali = 0.0, mmi->post_on_ali = 0.0;
+ return mmi;
+ }
+
+ void destroy_KaldiMMI(KaldiMMI *mmi)
+ {
+ delete mmi->trans_model;
+ delete mmi->den_lat_reader;
+ delete mmi->ref_ali_reader;
+ delete mmi->time;
+ delete mmi->prior_opts;
+ delete mmi->log_prior;
+ }
+
+ int check_mmi(KaldiMMI *mmi, const NervMatrix* mat, const char *key)
+ {
+ std::string utt(key);
+ if (!mmi->den_lat_reader->HasKey(utt)) {
+ KALDI_WARN << "Utterance " << utt << ": found no lattice.";
+ mmi->num_no_den_lat++;
+ return 0;
+ }
+ if (!mmi->ref_ali_reader->HasKey(utt)) {
+ KALDI_WARN << "Utterance " << utt << ": found no reference alignment.";
+ mmi->num_no_ref_ali++;
+ return 0;
+ }
+
+ assert(sizeof(BaseFloat) == sizeof(float));
+ // 1) get the features, numerator alignment
+ mmi->ref_ali = mmi->ref_ali_reader->Value(utt);
+ long mat_nrow = mat->nrow, mat_ncol = mat->ncol;
+ // check for temporal length of numerator alignments
+ if (static_cast<MatrixIndexT>(mmi->ref_ali.size()) != mat_nrow) {
+ KALDI_WARN << "Numerator alignment has wrong length "
+ << mmi->ref_ali.size() << " vs. "<< mat_nrow;
+ mmi->num_other_error++;
+ return 0;
+ }
+ if (mat_nrow > mmi->max_frames) {
+ KALDI_WARN << "Utterance " << utt << ": Skipped because it has " << mat_nrow <<
+ " frames, which is more than " << mmi->max_frames << ".";
+ mmi->num_other_error++;
+ return 0;
+ }
+ // 2) get the denominator lattice, preprocess
+ mmi->den_lat = mmi->den_lat_reader->Value(utt);
+ Lattice &den_lat = mmi->den_lat;
+ if (den_lat.Start() == -1) {
+ KALDI_WARN << "Empty lattice for utt " << utt;
+ mmi->num_other_error++;
+ return 0;
+ }
+ if (mmi->old_acoustic_scale != 1.0) {
+ fst::ScaleLattice(fst::AcousticLatticeScale(mmi->old_acoustic_scale),
+ &den_lat);
+ }
+ // optional sort it topologically
+ kaldi::uint64 props = den_lat.Properties(fst::kFstProperties, false);
+ if (!(props & fst::kTopSorted)) {
+ if (fst::TopSort(&den_lat) == false)
+ KALDI_ERR << "Cycles detected in lattice.";
+ }
+ // get the lattice length and times of states
+ mmi->state_times.clear();
+ vector<int32> &state_times = mmi->state_times;
+ int32 max_time = kaldi::LatticeStateTimes(den_lat, &state_times);
+ // check for temporal length of denominator lattices
+ if (max_time != mat_nrow) {
+ KALDI_WARN << "Denominator lattice has wrong length "
+ << max_time << " vs. " << mat_nrow;
+ mmi->num_other_error++;
+ return 0;
+ }
+
+ return 1;
+ }
+
+ NervMatrix * calc_diff_mmi(KaldiMMI * mmi, NervMatrix * mat, const char * key)
+ {
+ std::string utt(key);
+ assert(sizeof(BaseFloat) == sizeof(float));
+
+ kaldi::Matrix<BaseFloat> nnet_out_h, nnet_diff_h;
+ nnet_out_h.Resize(mat->nrow, mat->ncol, kUndefined);
+
+ size_t stride = mat->stride;
+ for (int i = 0; i < mat->nrow; i++)
+ {
+ const BaseFloat *nerv_row = (BaseFloat *)((char *)mat->data.f + i * stride);
+ BaseFloat *row = nnet_out_h.RowData(i);
+ memmove(row, nerv_row, sizeof(BaseFloat) * mat->ncol);
+ }
+
+ mmi->num_frames = nnet_out_h.NumRows();
+
+ PdfPriorOptions &prior_opts = *(mmi->prior_opts);
+ if (prior_opts.class_frame_counts != "") {
+ CuMatrix<BaseFloat> nnet_out;
+ nnet_out.Resize(mat->nrow, mat->ncol, kUndefined);
+ nnet_out.CopyFromMat(nnet_out_h);
+ mmi->log_prior->SubtractOnLogpost(&nnet_out);
+ nnet_out.CopyToMat(&nnet_out_h);
+ nnet_out.Resize(0,0);
+ }
+
+ // 4) rescore the latice
+ LatticeAcousticRescore(nnet_out_h, *(mmi->trans_model), mmi->state_times, &(mmi->den_lat));
+ if (mmi->acoustic_scale != 1.0 || mmi->lm_scale != 1.0)
+ fst::ScaleLattice(fst::LatticeScale(mmi->lm_scale, mmi->acoustic_scale), &(mmi->den_lat));
+
+ kaldi::Posterior post;
+ mmi->lat_like = kaldi::LatticeForwardBackward(mmi->den_lat, &post, &(mmi->lat_ac_like));
+
+ nnet_diff_h.Resize(mat->nrow, mat->ncol, kSetZero);
+ for (int32 t = 0; t < post.size(); t++) {
+ for (int32 arc = 0; arc < post[t].size(); arc++) {
+ int32 pdf = mmi->trans_model->TransitionIdToPdf(post[t][arc].first);
+ nnet_diff_h(t, pdf) += post[t][arc].second;
+ }
+ }
+
+ double path_ac_like = 0.0;
+ for(int32 t=0; t<mmi->num_frames; t++) {
+ int32 pdf = mmi->trans_model->TransitionIdToPdf(mmi->ref_ali[t]);
+ path_ac_like += nnet_out_h(t,pdf);
+ }
+ path_ac_like *= mmi->acoustic_scale;
+ mmi->mmi_obj = path_ac_like - mmi->lat_like;
+
+ mmi->post_on_ali = 0.0;
+ for(int32 t=0; t<mmi->num_frames; t++) {
+ int32 pdf = mmi->trans_model->TransitionIdToPdf(mmi->ref_ali[t]);
+ double posterior = nnet_diff_h(t, pdf);
+ mmi->post_on_ali += posterior;
+ }
+
+ KALDI_VLOG(1) << "Lattice #" << mmi->num_done + 1 << " processed"
+ << " (" << utt << "): found " << mmi->den_lat.NumStates()
+ << " states and " << fst::NumArcs(mmi->den_lat) << " arcs.";
+
+ KALDI_VLOG(1) << "Utterance " << utt << ": Average MMI obj. value = "
+ << (mmi->mmi_obj/mmi->num_frames) << " over " << mmi->num_frames
+ << " frames,"
+ << " (Avg. den-posterior on ali " << mmi->post_on_ali/mmi->num_frames << ")";
+
+ // 7a) Search for the frames with num/den mismatch
+ int32 frm_drop = 0;
+ std::vector<int32> frm_drop_vec;
+ for(int32 t=0; t<mmi->num_frames; t++) {
+ int32 pdf = mmi->trans_model->TransitionIdToPdf(mmi->ref_ali[t]);
+ double posterior = nnet_diff_h(t, pdf);
+ if(posterior < 1e-20) {
+ frm_drop++;
+ frm_drop_vec.push_back(t);
+ }
+ }
+
+ // 8) subtract the pdf-Viterbi-path
+ for(int32 t=0; t<nnet_diff_h.NumRows(); t++) {
+ int32 pdf = mmi->trans_model->TransitionIdToPdf(mmi->ref_ali[t]);
+ nnet_diff_h(t, pdf) -= 1.0;
+ }
+
+ // 9) Drop mismatched frames from the training by zeroing the derivative
+ if(mmi->drop_frames) {
+ for(int32 i=0; i<frm_drop_vec.size(); i++) {
+ nnet_diff_h.Row(frm_drop_vec[i]).Set(0.0);
+ }
+ mmi->num_frm_drop += frm_drop;
+ }
+
+ // Report the frame dropping
+ if (frm_drop > 0) {
+ std::stringstream ss;
+ ss << (mmi->drop_frames?"Dropped":"[dropping disabled] Would drop")
+ << " frames in " << utt << " " << frm_drop << "/" << mmi->num_frames << ",";
+ //get frame intervals from vec frm_drop_vec
+ ss << " intervals :";
+ //search for streaks of consecutive numbers:
+ int32 beg_streak=frm_drop_vec[0];
+ int32 len_streak=0;
+ int32 i;
+ for(i=0; i<frm_drop_vec.size(); i++,len_streak++) {
+ if(beg_streak + len_streak != frm_drop_vec[i]) {
+ ss << " " << beg_streak << ".." << frm_drop_vec[i-1] << "frm";
+ beg_streak = frm_drop_vec[i];
+ len_streak = 0;
+ }
+ }
+ ss << " " << beg_streak << ".." << frm_drop_vec[i-1] << "frm";
+ //print
+ KALDI_WARN << ss.str();
+ }
+
+ assert(mat->nrow == nnet_diff_h.NumRows() && mat->ncol == nnet_diff_h.NumCols());
+ stride = mat->stride;
+ for (int i = 0; i < mat->nrow; i++)
+ {
+ const BaseFloat *row = nnet_diff_h.RowData(i);
+ BaseFloat *nerv_row = (BaseFloat *)((char *)mat->data.f + i * stride);
+ memmove(nerv_row, row, sizeof(BaseFloat) * mat->ncol);
+ }
+ nnet_diff_h.Resize(0,0);
+
+ // increase time counter
+ mmi->total_mmi_obj += mmi->mmi_obj;
+ mmi->total_post_on_ali += mmi->post_on_ali;
+ mmi->total_frames += mmi->num_frames;
+ mmi->num_done++;
+
+ if (mmi->num_done % 100 == 0) {
+ mmi->time_now = mmi->time->Elapsed();
+ KALDI_VLOG(1) << "After " << mmi->num_done << " utterances: time elapsed = "
+ << mmi->time_now/60 << " min; processed " << mmi->total_frames/mmi->time_now
+ << " frames per second.";
+#if HAVE_CUDA==1
+ // check the GPU is not overheated
+ CuDevice::Instantiate().CheckGpuHealth();
+#endif
+ }
+ return mat;
+ }
+
+ double get_num_frames_mmi(const KaldiMMI *mmi)
+ {
+ return (double)mmi->num_frames;
+ }
+
+}
diff --git a/kaldi_seq/src/kaldi_mmi.h b/kaldi_seq/src/kaldi_mmi.h
new file mode 100644
index 0000000..ce6787c
--- /dev/null
+++ b/kaldi_seq/src/kaldi_mmi.h
@@ -0,0 +1,20 @@
+#ifndef NERV_kaldi_KALDI_MMI
+#define NERV_kaldi_KALDI_MMI
+#include "nerv/matrix/matrix.h"
+#include "nerv/common.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ typedef struct KaldiMMI KaldiMMI;
+
+ KaldiMMI * new_KaldiMMI(const char*, const char*, const char*, const char*);
+ void destroy_KaldiMMI(KaldiMMI *);
+ int check_mmi(KaldiMMI *, const Matrix*, const char *);
+ Matrix * calc_diff_mmi(KaldiMMI *, Matrix *, const char *);
+ double get_num_frames_mmi(const KaldiMMI *);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/kaldi_seq/src/kaldi_mpe.cpp b/kaldi_seq/src/kaldi_mpe.cpp
new file mode 100644
index 0000000..60384e2
--- /dev/null
+++ b/kaldi_seq/src/kaldi_mpe.cpp
@@ -0,0 +1,411 @@
+#include <string>
+#include "base/kaldi-common.h"
+#include "util/common-utils.h"
+#include "tree/context-dep.h"
+#include "hmm/transition-model.h"
+#include "fstext/fstext-lib.h"
+#include "decoder/faster-decoder.h"
+#include "decoder/decodable-matrix.h"
+#include "lat/kaldi-lattice.h"
+#include "lat/lattice-functions.h"
+
+#include "nnet/nnet-trnopts.h"
+#include "nnet/nnet-component.h"
+#include "nnet/nnet-activation.h"
+#include "nnet/nnet-nnet.h"
+#include "nnet/nnet-pdf-prior.h"
+#include "nnet/nnet-utils.h"
+#include "base/timer.h"
+#include "cudamatrix/cu-device.h"
+
+typedef kaldi::BaseFloat BaseFloat;
+typedef struct Matrix NervMatrix;
+
+namespace kaldi {
+ namespace nnet1 {
+
+ void LatticeAcousticRescore(const Matrix<BaseFloat> &log_like,
+ const TransitionModel &trans_model,
+ const std::vector<int32> &state_times,
+ Lattice *lat) {
+ kaldi::uint64 props = lat->Properties(fst::kFstProperties, false);
+ if (!(props & fst::kTopSorted))
+ KALDI_ERR << "Input lattice must be topologically sorted.";
+
+ KALDI_ASSERT(!state_times.empty());
+ std::vector<std::vector<int32> > time_to_state(log_like.NumRows());
+ for (size_t i = 0; i < state_times.size(); i++) {
+ KALDI_ASSERT(state_times[i] >= 0);
+ if (state_times[i] < log_like.NumRows()) // end state may be past this..
+ time_to_state[state_times[i]].push_back(i);
+ else
+ KALDI_ASSERT(state_times[i] == log_like.NumRows()
+ && "There appears to be lattice/feature mismatch.");
+ }
+
+ for (int32 t = 0; t < log_like.NumRows(); t++) {
+ for (size_t i = 0; i < time_to_state[t].size(); i++) {
+ int32 state = time_to_state[t][i];
+ for (fst::MutableArcIterator<Lattice> aiter(lat, state); !aiter.Done();
+ aiter.Next()) {
+ LatticeArc arc = aiter.Value();
+ int32 trans_id = arc.ilabel;
+ if (trans_id != 0) { // Non-epsilon input label on arc
+ int32 pdf_id = trans_model.TransitionIdToPdf(trans_id);
+ arc.weight.SetValue2(-log_like(t, pdf_id) + arc.weight.Value2());
+ aiter.SetValue(arc);
+ }
+ }
+ }
+ }
+ }
+
+ } // namespace nnet1
+} // namespace kaldi
+
+
+extern "C" {
+#include "kaldi_mpe.h"
+#include "string.h"
+#include "assert.h"
+#include "nerv/common.h"
+
+ extern NervMatrix *nerv_matrix_host_float_create(long nrow, long ncol, Status *status);
+ extern void nerv_matrix_host_float_copy_fromd(NervMatrix *mat, const NervMatrix *cumat, int, int, int, Status *);
+ using namespace kaldi;
+ using namespace kaldi::nnet1;
+ typedef kaldi::int32 int32;
+
+ struct KaldiMPE {
+ TransitionModel *trans_model;
+ RandomAccessLatticeReader *den_lat_reader;
+ RandomAccessInt32VectorReader *ref_ali_reader;
+
+ Lattice den_lat;
+ vector<int32> state_times;
+
+ PdfPriorOptions *prior_opts;
+ PdfPrior *log_prior;
+
+ std::vector<int32> silence_phones;
+ std::vector<int32> ref_ali;
+
+ Timer *time;
+ double time_now;
+
+ int32 num_done, num_no_ref_ali, num_no_den_lat, num_other_error;
+
+ kaldi::int64 total_frames;
+ int32 num_frames;
+ double total_frame_acc, utt_frame_acc;
+
+ bool binary;
+ bool one_silence_class;
+ BaseFloat acoustic_scale, lm_scale, old_acoustic_scale;
+ kaldi::int32 max_frames;
+ bool do_smbr;
+ std::string use_gpu;
+ };
+
+ KaldiMPE * new_KaldiMPE(const char* arg, const char* mdl, const char* lat, const char* ali)
+ {
+ KaldiMPE * mpe = new KaldiMPE;
+
+ const char *usage =
+ "Perform iteration of Neural Network MPE/sMBR training by stochastic "
+ "gradient descent.\n"
+ "The network weights are updated on each utterance.\n"
+ "Usage: nnet-train-mpe-sequential [options] <model-in> <transition-model-in> "
+ "<feature-rspecifier> <den-lat-rspecifier> <ali-rspecifier> [<model-out>]\n"
+ "e.g.: \n"
+ " nnet-train-mpe-sequential nnet.init trans.mdl scp:train.scp scp:denlats.scp ark:train.ali "
+ "nnet.iter1\n";
+
+ ParseOptions po(usage);
+
+ NnetTrainOptions trn_opts; trn_opts.learn_rate=0.00001;
+ trn_opts.Register(&po);
+
+ mpe->binary = true;
+ po.Register("binary", &(mpe->binary), "Write output in binary mode");
+
+ std::string feature_transform;
+ po.Register("feature-transform", &feature_transform,
+ "Feature transform in Nnet format");
+ std::string silence_phones_str;
+ po.Register("silence-phones", &silence_phones_str, "Colon-separated list "
+ "of integer id's of silence phones, e.g. 46:47");
+
+ mpe->prior_opts = new PdfPriorOptions;
+ PdfPriorOptions &prior_opts = *(mpe->prior_opts);
+ prior_opts.Register(&po);
+
+ mpe->one_silence_class = false;
+ mpe->acoustic_scale = 1.0,
+ mpe->lm_scale = 1.0,
+ mpe->old_acoustic_scale = 0.0;
+ po.Register("acoustic-scale", &(mpe->acoustic_scale),
+ "Scaling factor for acoustic likelihoods");
+ po.Register("lm-scale", &(mpe->lm_scale),
+ "Scaling factor for \"graph costs\" (including LM costs)");
+ po.Register("old-acoustic-scale", &(mpe->old_acoustic_scale),
+ "Add in the scores in the input lattices with this scale, rather "
+ "than discarding them.");
+ po.Register("one-silence-class", &(mpe->one_silence_class), "If true, newer "
+ "behavior which will tend to reduce insertions.");
+ mpe->max_frames = 6000; // Allow segments maximum of one minute by default
+ po.Register("max-frames",&(mpe->max_frames), "Maximum number of frames a segment can have to be processed");
+ mpe->do_smbr = false;
+ po.Register("do-smbr", &(mpe->do_smbr), "Use state-level accuracies instead of "
+ "phone accuracies.");
+
+ mpe->use_gpu=std::string("yes");
+ po.Register("use-gpu", &(mpe->use_gpu), "yes|no|optional, only has effect if compiled with CUDA");
+
+ int narg = 0;
+ char args[64][1024];
+ char *token;
+ char *saveptr = NULL;
+ char tmpstr[1024];
+
+ strcpy(tmpstr, arg);
+ strcpy(args[0], "nnet-train-mpe-sequential");
+ for(narg = 1, token = strtok_r(tmpstr, " ", &saveptr); token; token = strtok_r(NULL, " ", &saveptr))
+ strcpy(args[narg++], token);
+ strcpy(args[narg++], "0.nnet");
+ strcpy(args[narg++], mdl);
+ strcpy(args[narg++], "feat");
+ strcpy(args[narg++], lat);
+ strcpy(args[narg++], ali);
+ strcpy(args[narg++], "1.nnet");
+
+ char **argsv = new char*[narg];
+ for(int _i = 0; _i < narg; _i++)
+ argsv[_i] = args[_i];
+
+ po.Read(narg, argsv);
+ delete [] argsv;
+
+ if (po.NumArgs() != 6) {
+ po.PrintUsage();
+ exit(1);
+ }
+
+ std::string transition_model_filename = po.GetArg(2),
+ den_lat_rspecifier = po.GetArg(4),
+ ref_ali_rspecifier = po.GetArg(5);
+
+ std::vector<int32> &silence_phones = mpe->silence_phones;
+ if (!kaldi::SplitStringToIntegers(silence_phones_str, ":", false,
+ &silence_phones))
+ KALDI_ERR << "Invalid silence-phones string " << silence_phones_str;
+ kaldi::SortAndUniq(&silence_phones);
+ if (silence_phones.empty())
+ KALDI_LOG << "No silence phones specified.";
+
+ // Select the GPU
+#if HAVE_CUDA == 1
+ CuDevice::Instantiate().SelectGpuId(mpe->use_gpu);
+#endif
+
+ // Read the class-frame-counts, compute priors
+ mpe->log_prior = new PdfPrior(prior_opts);
+
+ // Read transition model
+ mpe->trans_model = new TransitionModel;
+ ReadKaldiObject(transition_model_filename, mpe->trans_model);
+
+ mpe->den_lat_reader = new RandomAccessLatticeReader(den_lat_rspecifier);
+ mpe->ref_ali_reader = new RandomAccessInt32VectorReader(ref_ali_rspecifier);
+
+ mpe->time = new Timer;
+ mpe->time_now = 0;
+ mpe->num_done =0;
+ mpe->num_no_ref_ali = 0;
+ mpe->num_no_den_lat = 0;
+ mpe->num_other_error = 0;
+ mpe->total_frames = 0;
+ mpe->total_frame_acc = 0.0;
+ mpe->utt_frame_acc = 0.0;
+
+ return mpe;
+ }
+
+ void destroy_KaldiMPE(KaldiMPE *mpe)
+ {
+ delete mpe->trans_model;
+ delete mpe->den_lat_reader;
+ delete mpe->ref_ali_reader;
+ delete mpe->time;
+ delete mpe->prior_opts;
+ delete mpe->log_prior;
+ }
+
+ int check_mpe(KaldiMPE *mpe, const NervMatrix* mat, const char *key)
+ {
+ std::string utt(key);
+ if (!mpe->den_lat_reader->HasKey(utt)) {
+ KALDI_WARN << "Utterance " << utt << ": found no lattice.";
+ mpe->num_no_den_lat++;
+ return 0;
+ }
+ if (!mpe->ref_ali_reader->HasKey(utt)) {
+ KALDI_WARN << "Utterance " << utt << ": found no reference alignment.";
+ mpe->num_no_ref_ali++;
+ return 0;
+ }
+
+ //assert(sizeof(BaseFloat) == sizeof(float));
+ // 1) get the features, numerator alignment
+ mpe->ref_ali = mpe->ref_ali_reader->Value(utt);
+ long mat_nrow = mat->nrow, mat_ncol = mat->ncol;
+ // check for temporal length of numerator alignments
+ if (static_cast<MatrixIndexT>(mpe->ref_ali.size()) != mat_nrow) {
+ KALDI_WARN << "Numerator alignment has wrong length "
+ << mpe->ref_ali.size() << " vs. "<< mat_nrow;
+ mpe->num_other_error++;
+ return 0;
+ }
+ if (mat_nrow > mpe->max_frames) {
+ KALDI_WARN << "Utterance " << utt << ": Skipped because it has " << mat_nrow <<
+ " frames, which is more than " << mpe->max_frames << ".";
+ mpe->num_other_error++;
+ return 0;
+ }
+ // 2) get the denominator lattice, preprocess
+ mpe->den_lat = mpe->den_lat_reader->Value(utt);
+ Lattice &den_lat = mpe->den_lat;
+ if (den_lat.Start() == -1) {
+ KALDI_WARN << "Empty lattice for utt " << utt;
+ mpe->num_other_error++;
+ return 0;
+ }
+ if (mpe->old_acoustic_scale != 1.0) {
+ fst::ScaleLattice(fst::AcousticLatticeScale(mpe->old_acoustic_scale),
+ &den_lat);
+ }
+ // optional sort it topologically
+ kaldi::uint64 props = den_lat.Properties(fst::kFstProperties, false);
+ if (!(props & fst::kTopSorted)) {
+ if (fst::TopSort(&den_lat) == false)
+ KALDI_ERR << "Cycles detected in lattice.";
+ }
+ // get the lattice length and times of states
+ mpe->state_times.clear();
+ vector<int32> &state_times = mpe->state_times;
+ int32 max_time = kaldi::LatticeStateTimes(den_lat, &state_times);
+ // check for temporal length of denominator lattices
+ if (max_time != mat_nrow) {
+ KALDI_WARN << "Denominator lattice has wrong length "
+ << max_time << " vs. " << mat_nrow;
+ mpe->num_other_error++;
+ return 0;
+ }
+
+ return 1;
+ }
+
+ NervMatrix * calc_diff_mpe(KaldiMPE * mpe, NervMatrix * mat, const char * key)
+ {
+ std::string utt(key);
+ //assert(sizeof(BaseFloat) == sizeof(float));
+
+ CuMatrix<BaseFloat> nnet_diff;
+ kaldi::Matrix<BaseFloat> nnet_out_h;
+ nnet_out_h.Resize(mat->nrow, mat->ncol, kUndefined);
+
+ size_t stride = mat->stride;
+ for (int i = 0; i < mat->nrow; i++)
+ {
+ const BaseFloat *nerv_row = (BaseFloat *)((char *)mat->data.f + i * stride);
+ BaseFloat *row = nnet_out_h.RowData(i);
+ memmove(row, nerv_row, sizeof(BaseFloat) * mat->ncol);
+ }
+
+ mpe->num_frames = nnet_out_h.NumRows();
+
+ PdfPriorOptions &prior_opts = *(mpe->prior_opts);
+ if (prior_opts.class_frame_counts != "") {
+ CuMatrix<BaseFloat> nnet_out;
+ nnet_out.Resize(nnet_out_h.NumRows(), nnet_out_h.NumCols(), kUndefined);
+ nnet_out.CopyFromMat(nnet_out_h);
+ mpe->log_prior->SubtractOnLogpost(&nnet_out);
+ nnet_out_h.Resize(nnet_out.NumRows(), nnet_out.NumCols(), kUndefined);
+ nnet_out.CopyToMat(&nnet_out_h);
+ nnet_out.Resize(0,0);
+ }
+
+ // 4) rescore the latice
+ LatticeAcousticRescore(nnet_out_h, *(mpe->trans_model), mpe->state_times, &(mpe->den_lat));
+ if (mpe->acoustic_scale != 1.0 || mpe->lm_scale != 1.0)
+ fst::ScaleLattice(fst::LatticeScale(mpe->lm_scale, mpe->acoustic_scale), &(mpe->den_lat));
+
+ kaldi::Posterior post;
+ std::vector<int32> &silence_phones = mpe->silence_phones;
+
+ if (mpe->do_smbr) { // use state-level accuracies, i.e. sMBR estimation
+ mpe->utt_frame_acc = LatticeForwardBackwardMpeVariants(
+ *(mpe->trans_model), silence_phones, mpe->den_lat, mpe->ref_ali, "smbr",
+ mpe->one_silence_class, &post);
+ } else { // use phone-level accuracies, i.e. MPFE (minimum phone frame error)
+ mpe->utt_frame_acc = LatticeForwardBackwardMpeVariants(
+ *(mpe->trans_model), silence_phones, mpe->den_lat, mpe->ref_ali, "mpfe",
+ mpe->one_silence_class, &post);
+ }
+
+ // 6) convert the Posterior to a matrix,
+ PosteriorToMatrixMapped(post, *(mpe->trans_model), &nnet_diff);
+ nnet_diff.Scale(-1.0); // need to flip the sign of derivative,
+
+ KALDI_VLOG(1) << "Lattice #" << mpe->num_done + 1 << " processed"
+ << " (" << utt << "): found " << mpe->den_lat.NumStates()
+ << " states and " << fst::NumArcs(mpe->den_lat) << " arcs.";
+
+ KALDI_VLOG(1) << "Utterance " << utt << ": Average frame accuracy = "
+ << (mpe->utt_frame_acc/mpe->num_frames) << " over " << mpe->num_frames
+ << " frames,"
+ << " diff-range(" << nnet_diff.Min() << "," << nnet_diff.Max() << ")";
+
+ nnet_out_h.Resize(nnet_diff.NumRows(), nnet_diff.NumCols(), kUndefined);
+ nnet_diff.CopyToMat(&nnet_out_h);
+ nnet_diff.Resize(0,0); // release GPU memory,
+
+ assert(mat->nrow == nnet_out_h.NumRows() && mat->ncol == nnet_out_h.NumCols());
+ stride = mat->stride;
+ for (int i = 0; i < mat->nrow; i++)
+ {
+ const BaseFloat *row = nnet_out_h.RowData(i);
+ BaseFloat *nerv_row = (BaseFloat *)((char *)mat->data.f + i * stride);
+ memmove(nerv_row, row, sizeof(BaseFloat) * mat->ncol);
+ }
+ nnet_out_h.Resize(0,0);
+
+ // increase time counter
+ mpe->total_frame_acc += mpe->utt_frame_acc;
+ mpe->total_frames += mpe->num_frames;
+ mpe->num_done++;
+
+ if (mpe->num_done % 100 == 0) {
+ mpe->time_now = mpe->time->Elapsed();
+ KALDI_VLOG(1) << "After " << mpe->num_done << " utterances: time elapsed = "
+ << mpe->time_now/60 << " min; processed " << mpe->total_frames/mpe->time_now
+ << " frames per second.";
+#if HAVE_CUDA==1
+ // check the GPU is not overheated
+ CuDevice::Instantiate().CheckGpuHealth();
+#endif
+ }
+ return mat;
+ }
+
+ double get_num_frames_mpe(const KaldiMPE *mpe)
+ {
+ return (double)mpe->num_frames;
+ }
+
+ double get_utt_frame_acc_mpe(const KaldiMPE *mpe)
+ {
+ return (double)mpe->utt_frame_acc;
+ }
+
+}
diff --git a/kaldi_seq/src/kaldi_mpe.h b/kaldi_seq/src/kaldi_mpe.h
new file mode 100644
index 0000000..fd09574
--- /dev/null
+++ b/kaldi_seq/src/kaldi_mpe.h
@@ -0,0 +1,21 @@
+#ifndef NERV_kaldi_KALDI_MPE
+#define NERV_kaldi_KALDI_MPE
+#include "nerv/matrix/matrix.h"
+#include "nerv/common.h"
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ typedef struct KaldiMPE KaldiMPE;
+
+ KaldiMPE * new_KaldiMPE(const char*, const char*, const char*, const char*);
+ void destroy_KaldiMPE(KaldiMPE *);
+ int check_mpe(KaldiMPE *, const Matrix*, const char *);
+ Matrix * calc_diff_mpe(KaldiMPE *, Matrix *, const char *);
+ double get_num_frames_mpe(const KaldiMPE *);
+ double get_utt_frame_acc_mpe(const KaldiMPE *);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/kaldi_seq/tools/net_kaldi2nerv.cpp b/kaldi_seq/tools/net_kaldi2nerv.cpp
new file mode 100644
index 0000000..bbac3db
--- /dev/null
+++ b/kaldi_seq/tools/net_kaldi2nerv.cpp
@@ -0,0 +1,85 @@
+#include <iostream>
+#include <cstdio>
+#include <cstring>
+#include <cstdlib>
+#include <cassert>
+using namespace std;
+
+const char fmt[] = "[%013d]\n";
+
+int main(int argc, char *argv[])
+{
+ if(argc < 3){
+ printf("Usage: %s kaldi_nnet nerv_output\n", argv[0]);
+ exit(0);
+ }
+
+ FILE *fin = fopen(argv[1], "r");
+ FILE *fout = fopen(argv[2], "w");
+
+ if(!fin || !fout){
+ printf("fopen error\n");
+ exit(1);
+ }
+
+ char buf[1024], tag[64];
+ int a, b;
+ char ***arr;
+ long start, size;
+ int affine_ltp = 0, affine_bp = 0;
+
+ while(fgets(buf, 1024, fin)){
+ if(sscanf(buf, "%s%d%d", tag, &b, &a) == 3 && strcmp(tag, "<AffineTransform>") == 0){
+ fgets(buf, 1024, fin);
+ arr = new char**[a];
+ for(int i = 0; i < a; i++)
+ arr[i] = new char*[b];
+ for(int j = 0; j < b; j++)
+ for(int i = 0; i < a; i++){
+ arr[i][j] = new char[16];
+ fscanf(fin, "%s", arr[i][j]);
+ }
+
+ start = ftell(fout);
+ fprintf(fout, fmt, 0);
+ fprintf(fout, "{type=\"nerv.LinearTransParam\",id=\"affine%d_ltp\"}\n", affine_ltp++);
+ fprintf(fout, "%d %d\n", a, b);
+ for(int i = 0; i < a; i++){
+ for(int j = 0; j < b; j++){
+ fprintf(fout, "%s ", arr[i][j]);
+ delete [] arr[i][j];
+ }
+ fprintf(fout, "\n");
+ delete [] arr[i];
+ }
+ delete [] arr;
+
+ size = ftell(fout) - start;
+ fseek(fout, start, SEEK_SET);
+ fprintf(fout, fmt, (int)size);
+ fseek(fout, 0, SEEK_END);
+
+ fgets(buf, 1024, fin);
+ fscanf(fin, "%*s");
+
+ start = ftell(fout);
+ fprintf(fout, fmt, 0);
+ fprintf(fout, "{type=\"nerv.BiasParam\",id=\"affine%d_bp\"}\n", affine_bp++);
+ fprintf(fout, "%d %d\n", 1, b);
+ for(int i = 0; i < b; i++){
+ fscanf(fin, "%s", buf);
+ fprintf(fout, "%s ", buf);
+ }
+ fputs("\n", fout);
+ size = ftell(fout) - start;
+ fseek(fout, start, SEEK_SET);
+ fprintf(fout, fmt, (int)size);
+ fseek(fout, 0, SEEK_END);
+ }
+ }
+
+ fclose(fin);
+ fclose(fout);
+
+ return 0;
+}
diff --git a/kaldi_seq/tools/transf_kaldi2nerv.cpp b/kaldi_seq/tools/transf_kaldi2nerv.cpp
new file mode 100644
index 0000000..525bcda
--- /dev/null
+++ b/kaldi_seq/tools/transf_kaldi2nerv.cpp
@@ -0,0 +1,106 @@
+#include <iostream>
+#include <cstdio>
+#include <cstring>
+#include <cstdlib>
+#include <cassert>
+using namespace std;
+
+const char fmt[] = "[%013d]\n";
+
+int main(int argc, char *argv[])
+{
+ if(argc < 3){
+ printf("Usage: %s kaldi_transf nerv_output\n", argv[0]);
+ exit(1);
+ }
+
+ FILE *fin = fopen(argv[1], "r");
+ FILE *fout = fopen(argv[2], "w");
+ if(!fin || !fout){
+ puts("fopen error");
+ exit(1);
+ }
+
+ char buf[1024], tag[64];
+ int a, b;
+ int size_window, size_bias;
+ char **window, **bias;
+
+ while(fgets(buf, sizeof(buf), fin))
+ {
+ if(sscanf(buf, "%s%d%d", tag, &a, &b) == 3){
+ if(strcmp(tag, "<AddShift>") == 0){
+ assert(a == b);
+ size_bias = a;
+ fscanf(fin, "%*s%*s%*s");
+ bias = new char*[size_bias];
+ for(int i = 0; i < size_bias; i++){
+ bias[i] = new char[16];
+ fscanf(fin, "%s", bias[i]);
+ }
+ } else if(strcmp(tag, "<Rescale>") == 0){
+ assert(a == b);
+ size_window = a;
+ fscanf(fin, "%*s%*s%*s");
+ window = new char*[size_window];
+ for(int i = 0; i < size_window; i++){
+ window[i] = new char[16];
+ fscanf(fin, "%s", window[i]);
+ }
+ }
+ }
+ }
+
+ long start = ftell(fout), size;
+ fprintf(fout, fmt, 0);
+ fprintf(fout, "{id = \"bias1\", type = \"nerv.MatrixParam\"}\n");
+ fprintf(fout, "1 %d\n", size_bias);
+ for(int i = 0; i<size_bias; i++)
+ fprintf(fout, "0 ");
+ fputs("\n", fout);
+ size = ftell(fout) - start;
+ fseek(fout, start, SEEK_SET);
+ fprintf(fout, fmt, (int)size);
+ fseek(fout, 0, SEEK_END);
+
+ start = ftell(fout);
+ fprintf(fout, fmt, 0);
+ fprintf(fout, "{id = \"window1\", type = \"nerv.MatrixParam\"}\n");
+ fprintf(fout, "1 %d\n", size_window);
+ for(int i = 0; i<size_window; i++)
+ fprintf(fout, "1 ");
+ fputs("\n", fout);
+ size = ftell(fout) - start;
+ fseek(fout, start, SEEK_SET);
+ fprintf(fout, fmt, (int)size);
+ fseek(fout, 0, SEEK_END);
+
+ start = ftell(fout);
+ fprintf(fout, fmt, 0);
+ fprintf(fout, "{id = \"bias2\", type = \"nerv.MatrixParam\"}\n");
+ fprintf(fout, "1 %d\n", size_bias);
+ for(int i = 0; i<size_bias; i++)
+ fprintf(fout, "%s ", bias[i]);
+ fputs("\n", fout);
+ size = ftell(fout) - start;
+ fseek(fout, start, SEEK_SET);
+ fprintf(fout, fmt, (int)size);
+ fseek(fout, 0, SEEK_END);
+
+ start = ftell(fout);
+ fprintf(fout, fmt, 0);
+ fprintf(fout, "{id = \"window2\", type = \"nerv.MatrixParam\"}\n");
+ fprintf(fout, "1 %d\n", size_window);
+ for(int i = 0; i<size_window; i++)
+ fprintf(fout, "%s ", window[i]);
+ fputs("\n", fout);
+ size = ftell(fout) - start;
+ fseek(fout, start, SEEK_SET);
+ fprintf(fout, fmt, (int)size);
+ fseek(fout, 0, SEEK_END);
+
+ fclose(fin);
+ fclose(fout);
+
+ return 0;
+}