summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--nerv/examples/asr_trainer.lua7
-rw-r--r--nerv/examples/gen_global_transf.lua62
-rw-r--r--nerv/examples/swb_baseline.lua204
3 files changed, 66 insertions, 207 deletions
diff --git a/nerv/examples/asr_trainer.lua b/nerv/examples/asr_trainer.lua
index 52cb754..aa1019d 100644
--- a/nerv/examples/asr_trainer.lua
+++ b/nerv/examples/asr_trainer.lua
@@ -248,7 +248,7 @@ end
dir.copyfile(arg[1], working_dir)
-- set logfile path
nerv.set_logfile(path.join(working_dir, logfile_name))
-path.chdir(working_dir)
+--path.chdir(working_dir)
-- start the training
local trainer = build_trainer(pf0)
@@ -258,7 +258,7 @@ nerv.info("initial cross validation: %.3f", gconf.accu_best)
for i = gconf.cur_iter, gconf.max_iter do
local stop = false
gconf.cur_iter = i
- dump_gconf(string.format("iter_%d.meta", i))
+ dump_gconf(path.join(working_dir, string.format("iter_%d.meta", i)))
repeat -- trick to implement `continue` statement
nerv.info("[NN] begin iteration %d with lrate = %.6f", i, gconf.lrate)
local accu_tr = trainer(nil, gconf.tr_scp, true, rebind_param_repo)
@@ -270,7 +270,8 @@ for i = gconf.cur_iter, gconf.max_iter do
os.date(date_pattern),
i, gconf.lrate,
accu_tr)
- local accu_new, pr_new, param_fname = trainer(param_prefix, gconf.cv_scp, false)
+ local accu_new, pr_new, param_fname =
+ trainer(path.join(working_dir, param_prefix), gconf.cv_scp, false)
nerv.info("[CV] cross validation %d: %.3f", i, accu_new)
local accu_prev = gconf.accu_best
if accu_new < gconf.accu_best then
diff --git a/nerv/examples/gen_global_transf.lua b/nerv/examples/gen_global_transf.lua
new file mode 100644
index 0000000..c4a3b42
--- /dev/null
+++ b/nerv/examples/gen_global_transf.lua
@@ -0,0 +1,62 @@
+if #arg < 1 then
+ return
+end
+
+dofile(arg[1])
+
+gconf.mmat_type = nerv.MMatrixFloat
+gconf.cumat_type = nerv.CuMatrixFloat
+local scp_file = gconf.tr_scp
+local loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
+local reader_spec = make_readers(scp_file)[1]
+local reader = reader_spec.reader
+local width = reader_spec.data['main_scp']
+local mean = gconf.mmat_type(1, width)
+local std = gconf.mmat_type(1, width)
+local colsum = gconf.mmat_type(1, width)
+local total = 0.0
+local EPS = 1e-7
+
+mean:fill(0)
+std:fill(0)
+
+local cnt = 0
+while (true) do
+ ret = reader:get_data()
+ if ret == nil then
+ break
+ end
+
+ local utt = ret['main_scp']
+ colsum = utt:colsum()
+ mean:add(mean, colsum, 1, 1)
+
+ utt:mul_elem(utt, utt)
+ colsum = utt:colsum()
+ std:add(std, colsum, 1, 1)
+
+ total = total + utt:nrow()
+ cnt = cnt + 1
+ if cnt == 1000 then
+ nerv.info("accumulated %d utterances", cnt)
+ cnt = 0
+ end
+end
+
+local bparam = nerv.BiasParam("bias0", gconf)
+bparam.trans = gconf.mmat_type(1, width)
+mean:add(mean,mean, -1.0 / total, 0) -- -E(X)
+bparam.trans:copy_fromh(mean)
+
+mean:mul_elem(mean, mean) -- E^2(X)
+std:add(std, mean, 1 / total, -1) -- sigma ^ 2
+
+for i = 0, width - 1 do
+ std[0][i] = math.sqrt(std[0][i] + EPS)
+ std[0][i] = 1 / (std[0][i] + EPS)
+end
+
+local wparam = nerv.BiasParam("window0", gconf)
+wparam.trans = std
+local pr = nerv.ParamRepo({bparam, wparam}, loc_type)
+pr:export("global_transf.nerv", nil)
diff --git a/nerv/examples/swb_baseline.lua b/nerv/examples/swb_baseline.lua
deleted file mode 100644
index ece4d44..0000000
--- a/nerv/examples/swb_baseline.lua
+++ /dev/null
@@ -1,204 +0,0 @@
-require 'htk_io'
-gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9,
- rearrange = true, -- just to make the context order consistent with old results, deprecated
- frm_ext = 5,
- frm_trim = 5, -- trim the first and last 5 frames, TNet just does this, deprecated
- tr_scp = "/slfs1/users/mfy43/swb_ivec/train_bp.scp",
- cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp",
- htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf",
- initialized_param = {"/slfs1/users/mfy43/swb_init.nerv",
- "/slfs1/users/mfy43/swb_global_transf.nerv"}}
-
-function make_layer_repo(param_repo)
- local layer_repo = nerv.LayerRepo(
- {
- -- global transf
- ["nerv.BiasLayer"] =
- {
- blayer1 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias1"}},
- blayer2 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias2"}}
- },
- ["nerv.WindowLayer"] =
- {
- wlayer1 = {dim_in = {429}, dim_out = {429}, params = {window = "window1"}},
- wlayer2 = {dim_in = {429}, dim_out = {429}, params = {window = "window2"}}
- },
- -- biased linearity
- ["nerv.AffineLayer"] =
- {
- affine0 = {dim_in = {429}, dim_out = {2048},
- params = {ltp = "affine0_ltp", bp = "affine0_bp"}},
- affine1 = {dim_in = {2048}, dim_out = {2048},
- params = {ltp = "affine1_ltp", bp = "affine1_bp"}},
- affine2 = {dim_in = {2048}, dim_out = {2048},
- params = {ltp = "affine2_ltp", bp = "affine2_bp"}},
- affine3 = {dim_in = {2048}, dim_out = {2048},
- params = {ltp = "affine3_ltp", bp = "affine3_bp"}},
- affine4 = {dim_in = {2048}, dim_out = {2048},
- params = {ltp = "affine4_ltp", bp = "affine4_bp"}},
- affine5 = {dim_in = {2048}, dim_out = {2048},
- params = {ltp = "affine5_ltp", bp = "affine5_bp"}},
- affine6 = {dim_in = {2048}, dim_out = {2048},
- params = {ltp = "affine6_ltp", bp = "affine6_bp"}},
- affine7 = {dim_in = {2048}, dim_out = {3001},
- params = {ltp = "affine7_ltp", bp = "affine7_bp"}}
- },
- ["nerv.SigmoidLayer"] =
- {
- sigmoid0 = {dim_in = {2048}, dim_out = {2048}},
- sigmoid1 = {dim_in = {2048}, dim_out = {2048}},
- sigmoid2 = {dim_in = {2048}, dim_out = {2048}},
- sigmoid3 = {dim_in = {2048}, dim_out = {2048}},
- sigmoid4 = {dim_in = {2048}, dim_out = {2048}},
- sigmoid5 = {dim_in = {2048}, dim_out = {2048}},
- sigmoid6 = {dim_in = {2048}, dim_out = {2048}}
- },
- ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output
- {
- ce_crit = {dim_in = {3001, 1}, dim_out = {1}, compressed = true}
- },
- ["nerv.SoftmaxLayer"] = -- softmax for decode output
- {
- softmax = {dim_in = {3001}, dim_out = {3001}}
- }
- }, param_repo, gconf)
-
- layer_repo:add_layers(
- {
- ["nerv.DAGLayer"] =
- {
- global_transf = {
- dim_in = {429}, dim_out = {429},
- sub_layers = layer_repo,
- connections = {
- ["<input>[1]"] = "blayer1[1]",
- ["blayer1[1]"] = "wlayer1[1]",
- ["wlayer1[1]"] = "blayer2[1]",
- ["blayer2[1]"] = "wlayer2[1]",
- ["wlayer2[1]"] = "<output>[1]"
- }
- },
- main = {
- dim_in = {429}, dim_out = {3001},
- sub_layers = layer_repo,
- connections = {
- ["<input>[1]"] = "affine0[1]",
- ["affine0[1]"] = "sigmoid0[1]",
- ["sigmoid0[1]"] = "affine1[1]",
- ["affine1[1]"] = "sigmoid1[1]",
- ["sigmoid1[1]"] = "affine2[1]",
- ["affine2[1]"] = "sigmoid2[1]",
- ["sigmoid2[1]"] = "affine3[1]",
- ["affine3[1]"] = "sigmoid3[1]",
- ["sigmoid3[1]"] = "affine4[1]",
- ["affine4[1]"] = "sigmoid4[1]",
- ["sigmoid4[1]"] = "affine5[1]",
- ["affine5[1]"] = "sigmoid5[1]",
- ["sigmoid5[1]"] = "affine6[1]",
- ["affine6[1]"] = "sigmoid6[1]",
- ["sigmoid6[1]"] = "affine7[1]",
- ["affine7[1]"] = "<output>[1]"
- }
- }
- }
- }, param_repo, gconf)
-
- layer_repo:add_layers(
- {
- ["nerv.DAGLayer"] =
- {
- ce_output = {
- dim_in = {429, 1}, dim_out = {1},
- sub_layers = layer_repo,
- connections = {
- ["<input>[1]"] = "main[1]",
- ["main[1]"] = "ce_crit[1]",
- ["<input>[2]"] = "ce_crit[2]",
- ["ce_crit[1]"] = "<output>[1]"
- }
- },
- softmax_output = {
- dim_in = {429}, dim_out = {3001},
- sub_layers = layer_repo,
- connections = {
- ["<input>[1]"] = "main[1]",
- ["main[1]"] = "softmax[1]",
- ["softmax[1]"] = "<output>[1]"
- }
- }
- }
- }, param_repo, gconf)
-
- return layer_repo
-end
-
-function get_network(layer_repo)
- return layer_repo:get_layer("ce_output")
-end
-
-function get_decode_network(layer_repo)
- return layer_repo:get_layer("softmax_output")
-end
-
-function get_global_transf(layer_repo)
- return layer_repo:get_layer("global_transf")
-end
-
-function make_readers(scp_file, layer_repo)
- return {
- {reader = nerv.TNetReader(gconf,
- {
- id = "main_scp",
- scp_file = scp_file,
- conf_file = gconf.htk_conf,
- frm_ext = gconf.frm_ext,
- mlfs = {
- phone_state = {
- file = "/slfs1/users/mfy43/swb_ivec/ref.mlf",
- format = "map",
- format_arg = "/slfs1/users/mfy43/swb_ivec/dict",
- dir = "*/",
- ext = "lab"
- }
- }
- }),
- data = {main_scp = 429, phone_state = 1}}
- }
-end
-
-function make_buffer(readers)
- return nerv.SGDBuffer(gconf,
- {
- buffer_size = gconf.buffer_size,
- batch_size = gconf.batch_size,
- chunk_size = gconf.chunk_size,
- randomize = gconf.randomize,
- readers = readers,
- use_gpu = true
- })
-end
-
-function get_input_order()
- return {{id = "main_scp", global_transf = true},
- {id = "phone_state"}}
-end
-
-function get_decode_input_order()
- return {{id = "main_scp", global_transf = true}}
-end
-
-function get_accuracy(layer_repo)
- local ce_crit = layer_repo:get_layer("ce_crit")
- return ce_crit.total_correct / ce_crit.total_frames * 100
-end
-
-function print_stat(layer_repo)
- local ce_crit = layer_repo:get_layer("ce_crit")
- nerv.info("*** training stat begin ***")
- nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce)
- nerv.printf("correct:\t\t%d\n", ce_crit.total_correct)
- nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames)
- nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames)
- nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo))
- nerv.info("*** training stat end ***")
-end