summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--htk_io/init.lua3
-rw-r--r--htk_io/tools/nerv_to_tnet.lua60
-rw-r--r--htk_io/tools/tnet_to_nerv.c2
-rw-r--r--htk_io/tools/tnet_to_nerv.cpp8
-rw-r--r--kaldi_io/example/swb_baseline_basic.lua157
-rw-r--r--kaldi_io/kaldi.mk70
6 files changed, 297 insertions, 3 deletions
diff --git a/htk_io/init.lua b/htk_io/init.lua
index af92140..c4dfff9 100644
--- a/htk_io/init.lua
+++ b/htk_io/init.lua
@@ -34,7 +34,8 @@ function TNetReader:get_data()
-- global transf
if self.global_transf ~= nil then
feat_utter = nerv.speech_utils.global_transf(feat_utter,
- self.global_transf, self.frm_ext, self.gconf)
+ self.global_transf, self.frm_ext,
+ self.frm_ext, self.gconf)
end
res[self.feat_id] = feat_utter
-- add corresponding labels
diff --git a/htk_io/tools/nerv_to_tnet.lua b/htk_io/tools/nerv_to_tnet.lua
new file mode 100644
index 0000000..c0ac76b
--- /dev/null
+++ b/htk_io/tools/nerv_to_tnet.lua
@@ -0,0 +1,60 @@
+-- usage: nerv config_file nerv_param_input tnet_output
+
+dofile(arg[1])
+param_repo = nerv.ParamRepo()
+param_repo:import({arg[2], gconf.initialized_param[2]}, nil, gconf)
+layer_repo = make_layer_repo(param_repo)
+f = assert(io.open(arg[3], "w"))
+
+function print_tnet_matrix(cumat)
+ local strs = {}
+ collectgarbage()
+ if cumat:nrow() == 1 then
+ local mat = nerv.MMatrixFloat(1, cumat:ncol())
+ cumat:copy_toh(mat)
+ table.insert(strs, string.format("v %d\n", mat:ncol()))
+ for j = 0, mat:ncol() - 1 do
+ table.insert(strs, string.format("%.8f ", mat[0][j]))
+ end
+ table.insert(strs, "\n")
+ f:write(table.concat(strs))
+ else
+ cumat = cumat:trans()
+ local mat = nerv.MMatrixFloat(cumat:nrow(), cumat:ncol())
+ cumat:copy_toh(mat)
+ table.insert(strs, string.format("m %d %d\n", mat:nrow(), mat:ncol()))
+ for i = 0, mat:nrow() - 1 do
+ local row = mat[i]
+ for j = 0, mat:ncol() - 1 do
+ table.insert(strs, string.format("%.8f ", row[j]))
+ end
+ table.insert(strs, "\n")
+ f:write(table.concat(strs))
+ strs = {}
+ end
+ end
+end
+local lnames = {"affine0", "sigmoid0",
+ "affine1", "sigmoid1",
+ "affine2", "sigmoid2",
+ "affine3", "sigmoid3",
+ "affine4", "sigmoid4",
+ "affine5", "sigmoid5",
+ "affine6", "sigmoid6",
+ "affine7", "ce_crit"}
+for i, name in ipairs(lnames) do
+ local layer = layer_repo:get_layer(name)
+ local layer_type = layer.__typename
+ if layer_type == "nerv.AffineLayer" then
+ f:write(string.format("<biasedlinearity> %d %d\n", layer.dim_out[1], layer.dim_in[1]))
+ print_tnet_matrix(layer.ltp.trans)
+ print_tnet_matrix(layer.bp.trans)
+ elseif layer_type == "nerv.SigmoidLayer" then
+ f:write(string.format("<sigmoid> %d %d\n", layer.dim_out[1], layer.dim_in[1]))
+ elseif layer_type == "nerv.SoftmaxCELayer" then
+ f:write(string.format("<softmax> %d %d\n", layer.dim_in[1], layer.dim_in[1]))
+ else
+ nerv.error("unknown layer type %s", layer_type)
+ end
+end
+f:close()
diff --git a/htk_io/tools/tnet_to_nerv.c b/htk_io/tools/tnet_to_nerv.c
index f781236..5774819 100644
--- a/htk_io/tools/tnet_to_nerv.c
+++ b/htk_io/tools/tnet_to_nerv.c
@@ -41,7 +41,7 @@ int main() {
fprintf(fout, "%16d", 0);
fprintf(fout, "{type=\"nerv.BiasParam\",id=\"affine%d_bp\"}\n",
cnt);
- fprintf(fout, "1 %d\n", nrow, ncol);
+ fprintf(fout, "1 %d\n", ncol);
for (j = 0; j < ncol; j++)
fprintf(fout, "%.8f ", mat[0][j]);
fprintf(fout, "\n");
diff --git a/htk_io/tools/tnet_to_nerv.cpp b/htk_io/tools/tnet_to_nerv.cpp
index bbfddcf..a779a25 100644
--- a/htk_io/tools/tnet_to_nerv.cpp
+++ b/htk_io/tools/tnet_to_nerv.cpp
@@ -2,9 +2,10 @@
#include <fstream>
#include <string>
#include <cstring>
+#include <cstdlib>
char token[1024];
char output[1024];
-double mat[4096][4096];
+double **mat;
int main(int argc, char **argv) {
std::ofstream fout;
fout.open(argv[1]);
@@ -18,6 +19,9 @@ int main(int argc, char **argv) {
scanf("%d %d", &ncol, &nrow);
scanf("%s %d %d", token, &ncol, &nrow);
printf("%d %d\n", nrow, ncol);
+ mat = (double **)malloc(nrow * sizeof(double *));
+ for (i = 0; i < nrow; i++)
+ mat[i] = (double *)malloc(ncol * sizeof(double));
for (j = 0; j < ncol; j++)
for (i = 0; i < nrow; i++)
scanf("%lf", mat[i] + j);
@@ -34,7 +38,9 @@ int main(int argc, char **argv) {
for (j = 0; j < ncol; j++)
fout << mat[i][j] << " ";
fout << std::endl;
+ free(mat[i]);
}
+ free(mat);
long length = fout.tellp() - base;
fout.seekp(base);
sprintf(output, "[%13lu]\n", length);
diff --git a/kaldi_io/example/swb_baseline_basic.lua b/kaldi_io/example/swb_baseline_basic.lua
new file mode 100644
index 0000000..e6c8145
--- /dev/null
+++ b/kaldi_io/example/swb_baseline_basic.lua
@@ -0,0 +1,157 @@
+require 'kaldi_io'
+gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9,
+ cumat_type = nerv.CuMatrixFloat,
+ mmat_type = nerv.MMatrixFloat,
+ frm_ext = 5,
+ tr_rspecifier = "ark:/slfs6/users/ymz09/kaldi/src/featbin/copy-feats scp:/slfs6/users/ymz09/swb_ivec/train_bp.scp ark:- |",
+ cv_rspecifier = "ark:/slfs6/users/ymz09/kaldi/src/featbin/copy-feats scp:/slfs6/users/ymz09/swb_ivec/train_cv.scp ark:- |",
+ initialized_param = {"/slfs6/users/ymz09/swb_ivec/swb_init.nerv",
+ "/slfs6/users/ymz09/swb_ivec/swb_global_transf.nerv"},
+ debug = false}
+
+function make_sublayer_repo(param_repo)
+ return nerv.LayerRepo(
+ {
+ -- global transf
+ ["nerv.BiasLayer"] =
+ {
+ blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}},
+ blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}}
+ },
+ ["nerv.WindowLayer"] =
+ {
+ wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}},
+ wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}}
+ },
+ -- biased linearity
+ ["nerv.AffineLayer"] =
+ {
+ affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"},
+ {dim_in = {429}, dim_out = {2048}}},
+ affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"},
+ {dim_in = {2048}, dim_out = {3001}}}
+ },
+ ["nerv.SigmoidLayer"] =
+ {
+ sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}}
+ },
+ ["nerv.SoftmaxCELayer"] =
+ {
+ ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}}
+ }
+ }, param_repo, gconf)
+end
+
+function make_layer_repo(sublayer_repo, param_repo)
+ return nerv.LayerRepo(
+ {
+ ["nerv.DAGLayer"] =
+ {
+ global_transf = {{}, {
+ dim_in = {429}, dim_out = {429},
+ sub_layers = sublayer_repo,
+ connections = {
+ ["<input>[1]"] = "blayer1[1]",
+ ["blayer1[1]"] = "wlayer1[1]",
+ ["wlayer1[1]"] = "blayer2[1]",
+ ["blayer2[1]"] = "wlayer2[1]",
+ ["wlayer2[1]"] = "<output>[1]"
+ }
+ }},
+ main = {{}, {
+ dim_in = {429, 1}, dim_out = {1},
+ sub_layers = sublayer_repo,
+ connections = {
+ ["<input>[1]"] = "affine0[1]",
+ ["affine0[1]"] = "sigmoid0[1]",
+ ["sigmoid0[1]"] = "affine1[1]",
+ ["affine1[1]"] = "sigmoid1[1]",
+ ["sigmoid1[1]"] = "affine2[1]",
+ ["affine2[1]"] = "sigmoid2[1]",
+ ["sigmoid2[1]"] = "affine3[1]",
+ ["affine3[1]"] = "sigmoid3[1]",
+ ["sigmoid3[1]"] = "affine4[1]",
+ ["affine4[1]"] = "sigmoid4[1]",
+ ["sigmoid4[1]"] = "affine5[1]",
+ ["affine5[1]"] = "sigmoid5[1]",
+ ["sigmoid5[1]"] = "affine6[1]",
+ ["affine6[1]"] = "sigmoid6[1]",
+ ["sigmoid6[1]"] = "affine7[1]",
+ ["affine7[1]"] = "ce_crit[1]",
+ ["<input>[2]"] = "ce_crit[2]",
+ ["ce_crit[1]"] = "<output>[1]"
+ }
+ }}
+ }
+ }, param_repo, gconf)
+end
+
+function get_network(layer_repo)
+ return layer_repo:get_layer("main")
+end
+
+function make_readers(feature_rspecifier, layer_repo)
+ return {
+ {reader = nerv.KaldiReader(gconf,
+ {
+ id = "main_scp",
+ feature_rspecifier = feature_rspecifier,
+ frm_ext = gconf.frm_ext,
+ mlfs = {
+ phone_state = {
+ targets_rspecifier = "ark:/slfs6/users/ymz09/kaldi/src/bin/ali-to-pdf /slfs6/users/ymz09/swb_ivec/final.mdl \"ark:gunzip -c /slfs6/users/ymz09/swb_ivec/ali.*.gz |\" ark:- | /slfs6/users/ymz09/kaldi/src/bin/ali-to-post ark:- ark:- |",
+ format = "map"
+ }
+ },
+ global_transf = layer_repo:get_layer("global_transf")
+ }),
+ data = {main_scp = 429, phone_state = 1}}
+ }
+end
+
+function make_buffer(readers)
+ return nerv.SGDBuffer(gconf,
+ {
+ buffer_size = gconf.buffer_size,
+ randomize = gconf.randomize,
+ readers = readers
+ })
+end
+
+function get_input_order()
+ return {"main_scp", "phone_state"}
+end
+
+function get_accuracy(sublayer_repo)
+ local ce_crit = sublayer_repo:get_layer("ce_crit")
+ return ce_crit.total_correct / ce_crit.total_frames * 100
+end
+
+function print_stat(sublayer_repo)
+ local ce_crit = sublayer_repo:get_layer("ce_crit")
+ nerv.info("*** training stat begin ***")
+ nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce)
+ nerv.printf("correct:\t\t%d\n", ce_crit.total_correct)
+ nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames)
+ nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames)
+ nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(sublayer_repo))
+ nerv.info("*** training stat end ***")
+end
diff --git a/kaldi_io/kaldi.mk b/kaldi_io/kaldi.mk
new file mode 100644
index 0000000..4a397f0
--- /dev/null
+++ b/kaldi_io/kaldi.mk
@@ -0,0 +1,70 @@
+# This file was generated using the following command:
+# ./configure
+
+# Rules that enable valgrind debugging ("make valgrind")
+
+valgrind: .valgrind
+
+.valgrind:
+ echo -n > valgrind.out
+ for x in $(TESTFILES); do echo $$x>>valgrind.out; valgrind ./$$x >/dev/null 2>> valgrind.out; done
+ ! ( grep 'ERROR SUMMARY' valgrind.out | grep -v '0 errors' )
+ ! ( grep 'definitely lost' valgrind.out | grep -v -w 0 )
+ rm valgrind.out
+ touch .valgrind
+
+
+CONFIGURE_VERSION := 2
+OPENFSTLIBS = -L/slwork/users/wd007/src/kaldi/tools/openfst/lib -lfst
+OPENFSTLDFLAGS = -Wl,-rpath=/slwork/users/wd007/src/kaldi/tools/openfst/lib
+FSTROOT = /slwork/users/wd007/src/kaldi/tools/openfst
+ATLASINC = /slwork/users/wd007/src/kaldi/tools/ATLAS/include
+ATLASLIBS = -L/usr/lib -llapack -lcblas -latlas -lf77blas
+# You have to make sure ATLASLIBS is set...
+
+ifndef FSTROOT
+$(error FSTROOT not defined.)
+endif
+
+ifndef ATLASINC
+$(error ATLASINC not defined.)
+endif
+
+ifndef ATLASLIBS
+$(error ATLASLIBS not defined.)
+endif
+
+
+CXXFLAGS = -msse -msse2 -Wall -I.. \
+ -fPIC \
+ -DKALDI_DOUBLEPRECISION=0 -DHAVE_POSIX_MEMALIGN \
+ -Wno-sign-compare -Wno-unused-local-typedefs -Winit-self \
+ -DHAVE_EXECINFO_H=1 -rdynamic -DHAVE_CXXABI_H \
+ -DHAVE_ATLAS -I$(ATLASINC) \
+ -I$(FSTROOT)/include \
+ $(EXTRA_CXXFLAGS) \
+ -g # -O0 -DKALDI_PARANOID
+
+ifeq ($(KALDI_FLAVOR), dynamic)
+CXXFLAGS += -fPIC
+endif
+
+LDFLAGS = -rdynamic $(OPENFSTLDFLAGS)
+LDLIBS = $(EXTRA_LDLIBS) $(OPENFSTLIBS) $(ATLASLIBS) -lm -lpthread -ldl
+CC = g++
+CXX = g++
+AR = ar
+AS = as
+RANLIB = ranlib
+
+#Next section enables CUDA for compilation
+CUDA = true
+CUDATKDIR = /usr/local/cuda
+
+CUDA_INCLUDE= -I$(CUDATKDIR)/include
+CUDA_FLAGS = -g -Xcompiler -fPIC --verbose --machine 64 -DHAVE_CUDA
+
+CXXFLAGS += -DHAVE_CUDA -I$(CUDATKDIR)/include
+CUDA_LDFLAGS += -L$(CUDATKDIR)/lib64 -Wl,-rpath,$(CUDATKDIR)/lib64
+CUDA_LDLIBS += -lcublas -lcudart #LDLIBS : The libs are loaded later than static libs in implicit rule
+