diff options
Diffstat (limited to 'fastnn/example/fastnn_baseline.lua')
-rw-r--r-- | fastnn/example/fastnn_baseline.lua | 258 |
1 files changed, 258 insertions, 0 deletions
diff --git a/fastnn/example/fastnn_baseline.lua b/fastnn/example/fastnn_baseline.lua new file mode 100644 index 0000000..6e774de --- /dev/null +++ b/fastnn/example/fastnn_baseline.lua @@ -0,0 +1,258 @@ +require 'htk_io' + +gconf = {lrate = 0.2, wcost = 1e-6, momentum = 0.9, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + frm_ext = 5, + frm_trim = 5, + batch_size = 256, + buffer_size = 81920, + rearrange = true, + tr_scp = "/sgfs/users/wd007/asr/baseline_chn_50h/finetune/finetune_baseline/train.scp", + cv_scp = "/sgfs/users/wd007/asr/baseline_chn_50h/finetune/finetune_baseline/train_cv.scp", + htk_conf = "/sgfs/users/wd007/asr/baseline_chn_50h/finetune/finetune_baseline/fbank_d_a_z.conf", + initialized_param = {"/sgfs/users/wd007/src/nerv/tools/nerv.global.transf", + "/sgfs/users/wd007/src/nerv/tools/nerv.svd0.55_3000h_iter1.init"}, + debug = false} + +function make_layer_repo(param_repo) + local layer_repo = nerv.LayerRepo( + { + -- global transf + ["nerv.BiasLayer"] = + { + blayer1 = {{bias = "bias1"}, {dim_in = {1320}, dim_out = {1320}}}, + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {{window = "window1"}, {dim_in = {1320}, dim_out = {1320}}}, + }, + -- biased linearity + ["nerv.AffineLayer"] = + { + affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, + {dim_in = {1320}, dim_out = {2048}}}, + affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, + {dim_in = {2048}, dim_out = {367}}}, + affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, + {dim_in = {367}, dim_out = {2048}}}, + affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, + {dim_in = {2048}, dim_out = {408}}}, + affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, + {dim_in = {408}, dim_out = {2048}}}, + affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, + {dim_in = {2048}, dim_out = {368}}}, + affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, + {dim_in = {368}, dim_out = {2048}}}, + affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, + {dim_in = {2048}, dim_out = {303}}}, + affine8 = {{ltp = "affine8_ltp", bp = "affine8_bp"}, + {dim_in = {303}, dim_out = {2048}}}, + affine9 = {{ltp = "affine9_ltp", bp = "affine9_bp"}, + {dim_in = {2048}, dim_out = {277}}}, + affine10 = {{ltp = "affine10_ltp", bp = "affine10_bp"}, + {dim_in = {277}, dim_out = {2048}}}, + affine11 = {{ltp = "affine11_ltp", bp = "affine11_bp"}, + {dim_in = {2048}, dim_out = {361}}}, + affine12 = {{ltp = "affine12_ltp", bp = "affine12_bp"}, + {dim_in = {361}, dim_out = {2048}}}, + affine13 = {{ltp = "affine13_ltp", bp = "affine13_bp"}, + {dim_in = {2048}, dim_out = {441}}}, + affine14 = {{ltp = "affine14_ltp", bp = "affine14_bp"}, + {dim_in = {441}, dim_out = {10092}}}, + }, + ["nerv.SigmoidLayer"] = + { + sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + }, + ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output + { + ce_crit = {{}, {dim_in = {10092, 1}, dim_out = {1}, compressed = true}} + }, + ["nerv.SoftmaxLayer"] = -- softmax for decode output + { + softmax = {{}, {dim_in = {10092}, dim_out = {10092}}} + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + global_transf = {{}, { + dim_in = {1320}, dim_out = {1320}, + sub_layers = layer_repo, + connections = + { + ["<input>[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "<output>[1]" + } + }}, + main = {{}, { + dim_in = {1320}, dim_out = {10092}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "affine0[1]", + ["affine0[1]"] = "sigmoid0[1]", + ["sigmoid0[1]"] = "affine1[1]", + ["affine1[1]"] = "affine2[1]", + ["affine2[1]"] = "sigmoid1[1]", + ["sigmoid1[1]"] = "affine3[1]", + ["affine3[1]"] = "affine4[1]", + ["affine4[1]"] = "sigmoid2[1]", + ["sigmoid2[1]"] = "affine5[1]", + ["affine5[1]"] = "affine6[1]", + ["affine6[1]"] = "sigmoid3[1]", + ["sigmoid3[1]"] = "affine7[1]", + ["affine7[1]"] = "affine8[1]", + ["affine8[1]"] = "sigmoid4[1]", + ["sigmoid4[1]"] = "affine9[1]", + ["affine9[1]"] = "affine10[1]", + ["affine10[1]"] = "sigmoid5[1]", + ["sigmoid5[1]"] = "affine11[1]", + ["affine11[1]"] = "affine12[1]", + ["affine12[1]"] = "sigmoid6[1]", + ["sigmoid6[1]"] = "affine13[1]", + ["affine13[1]"] = "affine14[1]", + ["affine14[1]"] = "<output>[1]", + } + }} + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + ce_output = {{}, { + dim_in = {1320, 1}, dim_out = {1}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "main[1]", + ["main[1]"] = "ce_crit[1]", + ["<input>[2]"] = "ce_crit[2]", + ["ce_crit[1]"] = "<output>[1]" + } + }}, + softmax_output = {{}, { + dim_in = {1320}, dim_out = {10092}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "main[1]", + ["main[1]"] = "softmax[1]", + ["softmax[1]"] = "<output>[1]" + } + }} + } + }, param_repo, gconf) + + return layer_repo +end + + +function get_network(layer_repo) + return layer_repo:get_layer("ce_output") +end + +function get_decode_network(layer_repo) + return layer_repo:get_layer("softmax_output") +end + +function get_global_transf(layer_repo) + return layer_repo:get_layer("global_transf") +end + + + +function make_readers(scp_file, layer_repo, feat_repo_shareid, data_mutex_shareid) + return { + {reader = nerv.TNetReader(gconf, + { + id = "main_scp", + scp_file = scp_file, + conf_file = gconf.htk_conf, + frm_ext = gconf.frm_ext, + mlfs = { + phone_state = { + file = "/sgfs/users/wd007/asr/baseline_chn_50h/finetune/finetune_baseline/ref.mlf", + format = "map", + format_arg = "/sgfs/users/wd007/asr/baseline_chn_50h/finetune/finetune_baseline/dict", + dir = "*/", + ext = "lab" + } + }, + global_transf = layer_repo:get_layer("global_transf") + }, feat_repo_shareid, data_mutex_shareid), + data = {main_scp = 1320, phone_state = 1}} + } +end + +function get_feat_id() + return {main_scp = true} +end + + +function make_buffer(readers) + return nerv.SGDBuffer(gconf, + { + buffer_size = gconf.buffer_size, + randomize = gconf.randomize, + readers = readers, + use_gpu = true + }) +end + +function get_input_order() + return {{id = "main_scp", global_transf = true}, + {id = "phone_state"}} +end + +function get_accuracy(layer_repo) + local ce_crit = layer_repo:get_layer("ce_crit") + return ce_crit.total_correct / ce_crit.total_frames * 100 +end + +function print_stat(layer_repo) + local ce_crit = layer_repo:get_layer("ce_crit") + nerv.info("*** training stat begin ***") + nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce) + nerv.printf("correct:\t\t%d\n", ce_crit.total_correct) + nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames) + nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames) + nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo)) + nerv.info("*** training stat end ***") +end + +function print_xent(xent) + local totalframes = xent:totalframes() + local loss = xent:loss() + local correct = xent:correct() + nerv.info_stderr("*** training statistics info begin ***") + nerv.info_stderr("total frames:\t\t%d", totalframes) + nerv.info_stderr("cross entropy:\t%.8f", loss/totalframes) + nerv.info_stderr("frame accuracy:\t%.3f%%", 100*correct/totalframes) + nerv.info_stderr("*** training statistics info end ***") +end + +function frame_acc(xent) + local correct = xent:correct() + local totalframes = xent:totalframes() + return string.format("%.3f", 100*correct/totalframes) +end + +function print_gconf() + nerv.info_stderr("%s \t:= %s", "network", gconf.initialized_param[1]) + nerv.info_stderr("%s \t:= %s", "transf", gconf.initialized_param[2]) + nerv.info_stderr("%s \t:= %s", "batch_size", gconf.batch_size) + nerv.info_stderr("%s \t:= %s", "buffer_size", gconf.buffer_size) + nerv.info_stderr("%s \t:= %s", "lrate", gconf.lrate) + nerv.info_stderr("%s \t:= %s", "tr_scp", gconf.tr_scp) + nerv.info_stderr("%s \t:= %s", "cv_scp", gconf.cv_scp) +end |