diff options
author | Ted Yin <[email protected]> | 2015-08-31 12:03:15 +0800 |
---|---|---|
committer | Ted Yin <[email protected]> | 2015-08-31 12:03:15 +0800 |
commit | 447bd1ec6b7be07f22653874fc9db84c9b6a9f9a (patch) | |
tree | 0268d85a8f75783daaa6b182bee1338dcd504f48 /fastnn/example | |
parent | cad144243b898a7bed91c18572bf42944e9db3b3 (diff) | |
parent | 3463789202b7ededf5074b199d5122ca85d328ea (diff) |
Merge pull request #4 from uphantom/master
fastnn first version, include follow submodular
Diffstat (limited to 'fastnn/example')
-rw-r--r-- | fastnn/example/asgd_data_trainer.lua | 405 | ||||
-rw-r--r-- | fastnn/example/asgd_sds_trainer.lua | 343 | ||||
-rw-r--r-- | fastnn/example/fastnn_baseline.lua | 258 |
3 files changed, 1006 insertions, 0 deletions
diff --git a/fastnn/example/asgd_data_trainer.lua b/fastnn/example/asgd_data_trainer.lua new file mode 100644 index 0000000..33d579a --- /dev/null +++ b/fastnn/example/asgd_data_trainer.lua @@ -0,0 +1,405 @@ +require 'fastnn' +require 'libhtkio' +require 'threads' + +dofile("fastnn/fastnn_baseline.lua") + +env = string.format([[ +package.path="/home/slhome/wd007/.luarocks/share/lua/5.1/?.lua;/home/slhome/wd007/.luarocks/share/lua/5.1/?/init.lua;/sgfs/users/wd007/src/nerv/install/share/lua/5.1/?.lua;/sgfs/users/wd007/src/nerv/install/share/lua/5.1/?/init.lua;"..package.path; +package.cpath="/home/slhome/wd007/.luarocks/lib/lua/5.1/?.so;/sgfs/users/wd007/src/nerv/install/lib/lua/5.1/?.so;"..package.cpath +local k,l,_=pcall(require,"luarocks.loader") _=k and l.add_context("nerv","scm-1") +]]) + + + +local data_thread_code = [[ +%s + +require 'nerv' +require 'fastnn' +dofile("fastnn/fastnn_baseline.lua") +os.execute("export MALLOC_CHECK_=0") + +local thread_idx = %d +local example_repo_shareid = %d +local data_mutex_shareid = %d +local feat_repo_shareid = %d +local gpu_shareid = %d +local batch_size = %d +local bp = %d +local scp_file = '%s' + +local share_mutex = threads.Mutex(data_mutex_shareid) +local share_example_repo = fastnn.CExamplesRepo(example_repo_shareid, true) +local share_gpu = fastnn.CDevice(gpu_shareid) + +--print(thread_idx) +--print(share_mutex) +--print(share_gpu) +--print(share_example_repo) + +if bp == 0 then + bp = false +else + bp = true +end +gconf.randomize = bp +--print(gconf.randomize) + +share_mutex:lock() +local gpuid = share_example_repo:get_gpuid() +if gpuid < 0 then + gpuid = share_gpu:select_gpu() + share_example_repo:set_gpuid(gpuid) +else + share_gpu:select_gpu(gpuid) +end + +nerv.info_stderr("thread %%d loading transf ...", thread_idx) +local param_transf_repo = nerv.ParamRepo() +param_transf_repo:import(gconf.transf, nil, gconf) +local transf_node_repo = make_transf_node_repo(param_transf_repo) +local transf_layer_repo = make_transf_link_repo(transf_node_repo, param_transf_repo) +local transf = transf_layer_repo:get_layer("global_transf") +share_mutex:unlock() + +local feat_id = get_feat_id() + +local buffer = make_buffer(make_readers(scp_file, transf_layer_repo, feat_repo_shareid, data_mutex_shareid)) + + local t = 1; + for data in buffer.get_data, buffer do + local example = fastnn.Example:PrepareData(data, nil, feat_id) + --print(string.format("Accept NO.%%d %%s", t, example)); t = t+1; + share_example_repo:accept(example) + --print("share_example_repo:accept") + + -- collect garbage in-time to save GPU memory + collectgarbage("collect") + end + share_example_repo:done() +-- print("share_example_repo:done") + +]] + + +train_thread_code = [[ +%s + +require 'nerv' +require 'fastnn' +dofile("fastnn/fastnn_baseline.lua") +os.execute("export MALLOC_CHECK_=0") + +local thread_idx = %d +local example_repo_shareid = %d +local data_mutex_shareid = %d +local master_shareid = %d +local gpu_shareid = %d +local xent_shareid = %d +local batch_size = %d +local lrate = %f +local bp = %d +local nnet_in = '%s' +local nnet_out = '%s' + +local share_example_repo = fastnn.CExamplesRepo(example_repo_shareid, true) +local share_mutex = threads.Mutex(data_mutex_shareid) +local share_master = fastnn.ModelSync(master_shareid) +local share_gpu = fastnn.CDevice(gpu_shareid) +local share_xent = fastnn.CXent(xent_shareid) + +if bp == 0 then + bp = false +else + bp = true +end + +gconf.randomize = bp +gconf.lrate = lrate +gconf.batch_size = batch_size +gconf.network[1] = nnet_in +nerv.info_stderr("input network: %%s", gconf.network[1]) +nerv.info_stderr(gconf.randomize) +nerv.info_stderr("input batch_size: %%d", gconf.batch_size) +nerv.info_stderr("input lrate: %%f", gconf.lrate) + +share_mutex:lock() +local gpuid = share_example_repo:get_gpuid() +if gpuid < 0 then + gpuid = share_gpu:select_gpu() + share_example_repo:set_gpuid(gpuid) +else + share_gpu:select_gpu(gpuid) +end + +nerv.context = nerv.CCuContext() +--print(nerv.context) + + +nerv.info_stderr("thread %%d loading network ...", thread_idx) +local param_network_repo = nerv.ParamRepo() +param_network_repo:import(gconf.network, nil, gconf) +local network_node_repo = make_network_node_repo(param_network_repo) +local network_layer_repo = make_network_link_repo(network_node_repo, param_network_repo) +local network = get_network(network_layer_repo) +share_mutex:unlock() + + + local input_order = get_input_order() + + -- initialize the network + network:init(gconf.batch_size) + gconf.cnt = 0 + err_input = {nerv.CuMatrixFloat(gconf.batch_size, 1)} + err_input[1]:fill(1) + + share_master:Initialize(network) + share_master:SyncInc() + + for example in share_example_repo.provide, share_example_repo do + + gconf.cnt = gconf.cnt + 1 + if gconf.cnt == 2000 then + print_stat(network_node_repo) + gconf.cnt = 0 + end + + local input = {} + local n = example:size() + for i = 0, n-1 do + table.insert(input, example:at(i)) + end + + local output = {nerv.CuMatrixFloat(gconf.batch_size, 1)} + err_output = {input[1]:create()} + network:propagate(input, output) + + if bp then + network:back_propagate(err_input, err_output, input, output) + network:gradient(err_input, input, output) + + share_master:LockModel() + share_master:WeightToD(network) + network:update_gradient() + share_master:WeightFromD(network) + share_master:UnLockModel() + end + + -- collect garbage in-time to save GPU memory + collectgarbage("collect") + end + + --print_stat(network_node_repo) + local ce_crit = network_node_repo:get_layer("ce_crit") + local xent = fastnn.CXent(ce_crit.total_frames, ce_crit.total_correct, ce_crit.total_ce, ce_crit.total_ce) + + share_master:LockModel() + share_xent:add(xent) + share_master:SyncDec() + --print(string.format("ThreadCount: %%d", share_master:ThreadCount())) + if share_master:ThreadCount() == 0 and bp then + share_master:WeightToD(network) + local fname = string.format("%%s_tr%%.3f", + nnet_out, frame_acc(share_xent)) + nerv.info_stderr("writing back %%s ...", fname) + network:get_params():export(fname, nil) + end + share_master:UnLockModel() +]] + +function get_data_thread(data_thread_code, env, thread_idx, example_repo_shareid, + data_mutex_shareid, feat_repo_shareid, gpu_shareid, + batch_size, bp, scp_file) + return string.format(data_thread_code, env, thread_idx, example_repo_shareid, + data_mutex_shareid, feat_repo_shareid, gpu_shareid, + batch_size, bp, scp_file) +end + +function get_train_thread(train_thread_code, env, thread_idx, example_repo_shareid, + data_mutex_shareid, master_shareid, gpu_shareid, xent_shareid, + batch_size, lrate, bp, nnet_in, nnet_out) + return string.format(train_thread_code, env, thread_idx, example_repo_shareid, + data_mutex_shareid, master_shareid, gpu_shareid, xent_shareid, + batch_size, lrate, bp, nnet_in, nnet_out) +end + +function trainer(batch_size, lrate, bp, scp_file, nnet_in, nnet_out, num_threads) + local train_threads={} + local trainer = {} + local data_threads = {} + local data = {} + local num_threads=num_threads + + local data_mutex = threads.Mutex() + local data_mutex_shareid = data_mutex:id() + + local master = fastnn.CModelSync() + local master_shareid = master:id() + --print(master) + + local xent = fastnn.CXent() + local xent_shareid = xent:id() + --print(xent) + + local gpu = fastnn.CDevice() + local gpu_shareid = gpu:id() + --print(gpu_shareid) + gpu:init() + + local example_repo = {} + local example_repo_shareid = {} + + local feat_repo = nerv.TNetFeatureRepo(scp_file, gconf.htk_conf, gconf.frm_ext) + local feat_repo_shareid = feat_repo:id() + + for i=1,num_threads,1 do + example_repo[i] = fastnn.CExamplesRepo(128, false) + example_repo_shareid[i] = example_repo[i]:id() + + data_threads[i] = get_data_thread(data_thread_code, env, i, example_repo_shareid[i], + data_mutex_shareid, feat_repo_shareid, gpu_shareid, + batch_size, bp, scp_file) + + train_threads[i] = get_train_thread(train_thread_code, env, i, example_repo_shareid[i], + data_mutex_shareid, master_shareid, gpu_shareid, xent_shareid, + batch_size, lrate, bp, nnet_in, nnet_out) + --print(train_threads[i]) + data[i] = threads.Thread(data_threads[i]) + trainer[i] = threads.Thread(train_threads[i]) + end + + nerv.info_stderr('| waiting for thread...') + + for i=1,num_threads,1 do + data[i]:free() + trainer[i]:free() + end + + print_xent(xent) + + nerv.info_stderr('| all thread finished!') + + return frame_acc(xent) +end + +function get_filename(fname) + return string.gsub((string.gsub(fname, "(.*/)(.*)", "%2")),"(.*)%..*", "%1") +end + +function do_sds(tr_scp, sds_scp, sds_rate) + math.randomseed(os.time()) + local scp_file = io.open(tr_scp, "r") + local sds_file = io.open(sds_scp, "w") + for line in scp_file:lines() do + rate = math.random() + if (rate < sds_rate) then + sds_file:write(line.."\n") + end + end + scp_file:close() + sds_file:close() +end + +function print_tag(iter) + io.stderr:write(string.format("########################################################\n")) + io.stderr:write(string.format("# NN TRAINING ITERATION:%d, %s\n", iter, os.date())) + io.stderr:write(string.format("########################################################\n")) +end + + +start_halving_inc = 0.5 +halving_factor = 0.8 +end_halving_inc = 0.1 +min_iter = 1 +max_iter = 20 +min_halving = 0 +gconf.batch_size = 256 +pf0 = get_filename(gconf.network[1]) +nnet_in = gconf.network[1] +nnet_out = "" +sds_scp = "tr_sds_"..string.format("%.4d", math.random()*10000)..".scp" --"tr_sds.scp" +sds_factor = 0.4 +num_threads = 1 +global_option = nil + +print_gconf() +os.execute("export MALLOC_CHECK_=0") + +-- training begin +nerv.info_stderr("begin initial cross validation") +local accu_best = trainer(gconf.batch_size, gconf.lrate, 0, + gconf.cv_scp, nnet_in, nil, num_threads) +local do_halving = false +local accu_new = accu_best + +nerv.info_stderr("initial cross validation: %.3f\n", accu_best) + +for i = 1, max_iter do + + if accu_new >= accu_best then + local sds_rate = math.cos((i-1)*11.0/180*math.pi) + if (sds_rate <= sds_factor) then + sds_rate = sds_factor + end + nerv.info_stderr("iteration %d sds_rate: %.6f", i, sds_rate) + do_sds(gconf.tr_scp, sds_scp, sds_rate) + end + + nnet_out=pf0.."_iter"..i + --print(nnet_out) + print_tag(i) + nerv.info_stderr("[NN] begin iteration %d learning_rate: %.3f batch_size: %d.", i, gconf.lrate, gconf.batch_size) + local accu_tr = trainer(gconf.batch_size, gconf.lrate, 1, + sds_scp, nnet_in, nnet_out, num_threads) + nerv.info_stderr("[TR] end iteration %d frame_accuracy: %.3f.\n", i, accu_tr) + os.execute("sleep " .. 3) + + nnet_out = nnet_out.."_tr"..accu_tr + accu_new = trainer(gconf.batch_size, gconf.lrate, 0, + gconf.cv_scp, nnet_out, nil, num_threads) + nerv.info_stderr("[CV] end iteration %d frame_accuracy: %.3f.\n\n", i, accu_new) + os.execute("sleep " .. 3) + + local nnet_tmp = string.format("%s_%s_iter_%d_lr%f_tr%.3f_cv%.3f", + pf0, + os.date("%Y%m%d%H%M%S"), + i, gconf.lrate, accu_tr, accu_new) + + -- TODO: revert the weights + local accu_diff = accu_new - accu_best + local cmd + if accu_new > accu_best then + accu_best = accu_new + nnet_in = nnet_tmp + gconf.batch_size = gconf.batch_size + 128 + if gconf.batch_size > 1024 then + gconf.batch_size = 1024 + end + else + -- reject + nnet_tmp = nnet_tmp.."_rejected" + do_halving = true + end + cmd = "mv "..nnet_out.." "..nnet_tmp + os.execute(cmd) + + if do_halving and accu_diff < end_halving_inc and i > min_iter then + break; + end + + if accu_diff < start_halving_inc and i >= min_halving then + do_halving = true + end + + if do_halving then + gconf.lrate = gconf.lrate * halving_factor + halving_factor = halving_factor - 0.025 + if halving_factor < 0.6 then + halving_factor = 0.6 + end + end + nerv.info_stderr("iteration %d done!", i) +end + + diff --git a/fastnn/example/asgd_sds_trainer.lua b/fastnn/example/asgd_sds_trainer.lua new file mode 100644 index 0000000..cf1c7a6 --- /dev/null +++ b/fastnn/example/asgd_sds_trainer.lua @@ -0,0 +1,343 @@ + +NERV_ROOT = "/sgfs/users/wd007/src/nerv-2" + +env = string.format([[ +package.path="/home/slhome/wd007/.luarocks/share/lua/5.1/?.lua;/home/slhome/wd007/.luarocks/share/lua/5.1/?/init.lua;%s/install/share/lua/5.1/?.lua;%s/install/share/lua/5.1/?/init.lua;"..package.path; +package.cpath="/home/slhome/wd007/.luarocks/lib/lua/5.1/?.so;%s/install/lib/lua/5.1/?.so;"..package.cpath +local k,l,_=pcall(require,"luarocks.loader") _=k and l.add_context("nerv","scm-1") +]], NERV_ROOT, NERV_ROOT, NERV_ROOT) + +loadstring(env)() + +require 'nerv' + +require 'fastnn' +require 'libhtkio' +require 'threads' + +dofile("fastnn/example/fastnn_baseline.lua") + + + +train_thread_code = [[ +%s + +require 'nerv' +require 'fastnn' +require 'libhtkio' + +dofile("fastnn/example/fastnn_baseline.lua") +os.execute("export MALLOC_CHECK_=0") + +local thread_idx = %d +local feat_repo_shareid = %d +local data_mutex_shareid = %d +local master_shareid = %d +local gpu_shareid = %d +local xent_shareid = %d +local batch_size = %d +local lrate = %f +local bp = %d +local scp_file = '%s' +local nnet_in = '%s' +local nnet_out = '%s' + +local share_mutex = threads.Mutex(data_mutex_shareid) +local share_master = fastnn.ModelSync(master_shareid) +local share_gpu = fastnn.CDevice(gpu_shareid) +local share_xent = fastnn.CXent(xent_shareid) + +if bp == 0 then + bp = false +else + bp = true + gconf.tr_scp = scp_file +end + +share_mutex:lock() + +gconf.randomize = bp +gconf.lrate = lrate +gconf.batch_size = batch_size +gconf.initialized_param[2] = nnet_in +nerv.info_stderr("input network: %%s", gconf.initialized_param[2]) +--nerv.info_stderr(gconf.randomize) +nerv.info_stderr("input batch_size: %%d", gconf.batch_size) +nerv.info_stderr("input scp_file: %%s", scp_file) +nerv.info_stderr("input lrate: %%f", gconf.lrate) + + + +share_gpu:select_gpu() + +nerv.context = nerv.CCuContext() +--print(nerv.context) + +nerv.info_stderr("thread %%d loading parameters ...", thread_idx) +local param_repo = nerv.ParamRepo() +param_repo:import(gconf.initialized_param, nil, gconf) +local layer_repo = make_layer_repo(param_repo) +local network = get_network(layer_repo) +local global_transf = get_global_transf(layer_repo) + +share_mutex:unlock() + +local buffer = make_buffer(make_readers(nil, layer_repo, feat_repo_shareid, data_mutex_shareid)) + +local input_order = get_input_order() + + -- initialize the network + network:init(gconf.batch_size) + gconf.cnt = 0 + err_input = {nerv.CuMatrixFloat(gconf.batch_size, 1)} + err_input[1]:fill(1) + + share_master:Initialize(network) + share_master:SyncInc() + + for data in buffer.get_data, buffer do + + gconf.cnt = gconf.cnt + 1 + if gconf.cnt == 2000 then + print_stat(layer_repo) + gconf.cnt = 0 + end + + local input = {} + + for i, e in ipairs(input_order) do + local id = e.id + if data[id] == nil then + nerv.error("input data %%s not found", id) + end + local transformed + if e.global_transf then + transformed = nerv.speech_utils.global_transf(data[id], + global_transf, + gconf.frm_ext or 0, 0, + gconf) + else + transformed = data[id] + end + table.insert(input, transformed) + end + + local output = {nerv.CuMatrixFloat(gconf.batch_size, 1)} + err_output = {} + for i = 1, #input do + table.insert(err_output, input[i]:create()) + end + + network:propagate(input, output) + + if bp then + network:back_propagate(err_input, err_output, input, output) + network:gradient(err_input, input, output) + + share_master:LockModel() + share_master:WeightToD(network) + network:update_gradient() + -- network:update(err_input, input, output) + share_master:WeightFromD(network) + share_master:UnLockModel() + end + + -- collect garbage in-time to save GPU memory + collectgarbage("collect") + end + + --print_stat(network_node_repo) + local ce_crit = layer_repo:get_layer("ce_crit") + local xent = fastnn.CXent(ce_crit.total_frames, ce_crit.total_correct, ce_crit.total_ce, ce_crit.total_ce) + + share_master:LockModel() + share_xent:add(xent) + share_master:SyncDec() + --print(string.format("ThreadCount: %%d", share_master:ThreadCount())) + if share_master:ThreadCount() == 0 and bp then + share_master:WeightToD(network) + local fname = string.format("%%s_tr%%.3f", + nnet_out, frame_acc(share_xent)) + nerv.info_stderr("writing back %%s ...", fname) + network:get_params():export(fname, nil) + end + share_master:UnLockModel() +]] + + +function get_train_thread(train_thread_code, env, thread_idx, feat_repo_shareid, + data_mutex_shareid, master_shareid, gpu_shareid, xent_shareid, + batch_size, lrate, bp, scp_file, nnet_in, nnet_out) + return string.format(train_thread_code, env, thread_idx, feat_repo_shareid, + data_mutex_shareid, master_shareid, gpu_shareid, xent_shareid, + batch_size, lrate, bp, scp_file, nnet_in, nnet_out) +end + +function trainer(batch_size, lrate, bp, scp_file, nnet_in, nnet_out, num_threads) + local train_threads={} + local trainer = {} + local num_threads=num_threads + + local data_mutex = threads.Mutex() + local data_mutex_shareid = data_mutex:id() + + local master = fastnn.CModelSync() + local master_shareid = master:id() + --print(master) + + local xent = fastnn.CXent() + local xent_shareid = xent:id() + --print(xent) + + local gpu = fastnn.CDevice() + local gpu_shareid = gpu:id() + --print(gpu_shareid) + gpu:init() + + local feat_repo = nerv.TNetFeatureRepo(scp_file, gconf.htk_conf, gconf.frm_ext) + local feat_repo_shareid = feat_repo:id() + + for i=1,num_threads,1 do + + train_threads[i] = get_train_thread(train_thread_code, env, i, feat_repo_shareid, + data_mutex_shareid, master_shareid, gpu_shareid, xent_shareid, + batch_size, lrate, bp, scp_file, nnet_in, nnet_out) + --print(train_threads[i]) + trainer[i] = threads.Thread(train_threads[i]) + end + + nerv.info_stderr('| waiting for thread...') + + for i=1,num_threads,1 do + trainer[i]:free() + end + + print_xent(xent) + + nerv.info_stderr('| all thread finished!') + + return frame_acc(xent) +end + +function get_filename(fname) + return string.gsub((string.gsub(fname, "(.*/)(.*)", "%2")),"(.*)%..*", "%1") +end + +function do_sds(tr_scp, sds_scp, sds_rate) + math.randomseed(os.time()) + local scp_file = io.open(tr_scp, "r") + local sds_file = io.open(sds_scp, "w") + for line in scp_file:lines() do + rate = math.random() + if (rate < sds_rate) then + sds_file:write(line.."\n") + end + end + scp_file:close() + sds_file:close() +end + +function print_tag(iter) + io.stderr:write(string.format("########################################################\n")) + io.stderr:write(string.format("# NN TRAINING ITERATION:%d, %s\n", iter, os.date())) + io.stderr:write(string.format("########################################################\n")) +end + + +start_halving_inc = 0.5 +halving_factor = 0.8 +end_halving_inc = 0.1 +min_iter = 1 +max_iter = 20 +min_halving = 0 +gconf.batch_size = 256 +pf0 = get_filename(gconf.initialized_param[2]) +nnet_in = gconf.initialized_param[2] +nnet_out = "" +sds_scp = "tr_sds_"..string.format("%.4d", math.random()*10000)..".scp" --"tr_sds.scp" +sds_factor = 0.4 +num_threads = 2 +global_option = nil + +os.execute("export MALLOC_CHECK_=0") +print_gconf() + +-- training begin +nerv.info_stderr("begin initial cross validation") +accu_best = trainer(gconf.batch_size, gconf.lrate, 0, + gconf.cv_scp, nnet_in, "", num_threads) +local do_halving = false +local accu_new = accu_best + +nerv.info_stderr("initial cross validation: %.3f\n", accu_best) + +for i = 1, max_iter do + + if accu_new >= accu_best then + local sds_rate = math.cos((i-1)*11.0/180*math.pi) + if (sds_rate <= sds_factor) then + sds_rate = sds_factor + end + nerv.info_stderr("iteration %d sds_rate: %.6f", i, sds_rate) + do_sds(gconf.tr_scp, sds_scp, sds_rate) + end + + nnet_out=pf0.."_iter"..i + --print(nnet_out) + print_tag(i) + nerv.info_stderr("[NN] begin iteration %d learning_rate: %.3f batch_size: %d.", i, gconf.lrate, gconf.batch_size) + accu_tr = trainer(gconf.batch_size, gconf.lrate, 1, + sds_scp, nnet_in, nnet_out, num_threads) + collectgarbage("collect") + nerv.info_stderr("[TR] end iteration %d frame_accuracy: %.3f.\n", i, accu_tr) + os.execute("sleep " .. 3) + + nnet_out = nnet_out.."_tr"..accu_tr + accu_new = trainer(gconf.batch_size, gconf.lrate, 0, + gconf.cv_scp, nnet_out, "", num_threads) + collectgarbage("collect") + nerv.info_stderr("[CV] end iteration %d frame_accuracy: %.3f.\n\n", i, accu_new) + os.execute("sleep " .. 3) + + local nnet_tmp = string.format("%s_%s_iter_%d_lr%f_tr%.3f_cv%.3f", + pf0, + os.date("%Y%m%d%H%M%S"), + i, gconf.lrate, accu_tr, accu_new) + + -- TODO: revert the weights + local accu_diff = accu_new - accu_best + local cmd + if accu_new > accu_best then + accu_best = accu_new + nnet_in = nnet_tmp + gconf.batch_size = gconf.batch_size + 128 + if gconf.batch_size > 1024 then + gconf.batch_size = 1024 + end + else + -- reject + nnet_tmp = nnet_tmp.."_rejected" + do_halving = true + end + cmd = "mv "..nnet_out.." "..nnet_tmp + os.execute(cmd) + + if do_halving and accu_diff < end_halving_inc and i > min_iter then + break; + end + + if accu_diff < start_halving_inc and i >= min_halving then + do_halving = true + end + + if do_halving then + gconf.lrate = gconf.lrate * halving_factor + halving_factor = halving_factor - 0.025 + if halving_factor < 0.6 then + halving_factor = 0.6 + end + end + nerv.info_stderr("iteration %d done!", i) +end + + diff --git a/fastnn/example/fastnn_baseline.lua b/fastnn/example/fastnn_baseline.lua new file mode 100644 index 0000000..6e774de --- /dev/null +++ b/fastnn/example/fastnn_baseline.lua @@ -0,0 +1,258 @@ +require 'htk_io' + +gconf = {lrate = 0.2, wcost = 1e-6, momentum = 0.9, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + frm_ext = 5, + frm_trim = 5, + batch_size = 256, + buffer_size = 81920, + rearrange = true, + tr_scp = "/sgfs/users/wd007/asr/baseline_chn_50h/finetune/finetune_baseline/train.scp", + cv_scp = "/sgfs/users/wd007/asr/baseline_chn_50h/finetune/finetune_baseline/train_cv.scp", + htk_conf = "/sgfs/users/wd007/asr/baseline_chn_50h/finetune/finetune_baseline/fbank_d_a_z.conf", + initialized_param = {"/sgfs/users/wd007/src/nerv/tools/nerv.global.transf", + "/sgfs/users/wd007/src/nerv/tools/nerv.svd0.55_3000h_iter1.init"}, + debug = false} + +function make_layer_repo(param_repo) + local layer_repo = nerv.LayerRepo( + { + -- global transf + ["nerv.BiasLayer"] = + { + blayer1 = {{bias = "bias1"}, {dim_in = {1320}, dim_out = {1320}}}, + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {{window = "window1"}, {dim_in = {1320}, dim_out = {1320}}}, + }, + -- biased linearity + ["nerv.AffineLayer"] = + { + affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, + {dim_in = {1320}, dim_out = {2048}}}, + affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, + {dim_in = {2048}, dim_out = {367}}}, + affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, + {dim_in = {367}, dim_out = {2048}}}, + affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, + {dim_in = {2048}, dim_out = {408}}}, + affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, + {dim_in = {408}, dim_out = {2048}}}, + affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, + {dim_in = {2048}, dim_out = {368}}}, + affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, + {dim_in = {368}, dim_out = {2048}}}, + affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, + {dim_in = {2048}, dim_out = {303}}}, + affine8 = {{ltp = "affine8_ltp", bp = "affine8_bp"}, + {dim_in = {303}, dim_out = {2048}}}, + affine9 = {{ltp = "affine9_ltp", bp = "affine9_bp"}, + {dim_in = {2048}, dim_out = {277}}}, + affine10 = {{ltp = "affine10_ltp", bp = "affine10_bp"}, + {dim_in = {277}, dim_out = {2048}}}, + affine11 = {{ltp = "affine11_ltp", bp = "affine11_bp"}, + {dim_in = {2048}, dim_out = {361}}}, + affine12 = {{ltp = "affine12_ltp", bp = "affine12_bp"}, + {dim_in = {361}, dim_out = {2048}}}, + affine13 = {{ltp = "affine13_ltp", bp = "affine13_bp"}, + {dim_in = {2048}, dim_out = {441}}}, + affine14 = {{ltp = "affine14_ltp", bp = "affine14_bp"}, + {dim_in = {441}, dim_out = {10092}}}, + }, + ["nerv.SigmoidLayer"] = + { + sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + }, + ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output + { + ce_crit = {{}, {dim_in = {10092, 1}, dim_out = {1}, compressed = true}} + }, + ["nerv.SoftmaxLayer"] = -- softmax for decode output + { + softmax = {{}, {dim_in = {10092}, dim_out = {10092}}} + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + global_transf = {{}, { + dim_in = {1320}, dim_out = {1320}, + sub_layers = layer_repo, + connections = + { + ["<input>[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "<output>[1]" + } + }}, + main = {{}, { + dim_in = {1320}, dim_out = {10092}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "affine0[1]", + ["affine0[1]"] = "sigmoid0[1]", + ["sigmoid0[1]"] = "affine1[1]", + ["affine1[1]"] = "affine2[1]", + ["affine2[1]"] = "sigmoid1[1]", + ["sigmoid1[1]"] = "affine3[1]", + ["affine3[1]"] = "affine4[1]", + ["affine4[1]"] = "sigmoid2[1]", + ["sigmoid2[1]"] = "affine5[1]", + ["affine5[1]"] = "affine6[1]", + ["affine6[1]"] = "sigmoid3[1]", + ["sigmoid3[1]"] = "affine7[1]", + ["affine7[1]"] = "affine8[1]", + ["affine8[1]"] = "sigmoid4[1]", + ["sigmoid4[1]"] = "affine9[1]", + ["affine9[1]"] = "affine10[1]", + ["affine10[1]"] = "sigmoid5[1]", + ["sigmoid5[1]"] = "affine11[1]", + ["affine11[1]"] = "affine12[1]", + ["affine12[1]"] = "sigmoid6[1]", + ["sigmoid6[1]"] = "affine13[1]", + ["affine13[1]"] = "affine14[1]", + ["affine14[1]"] = "<output>[1]", + } + }} + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + ce_output = {{}, { + dim_in = {1320, 1}, dim_out = {1}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "main[1]", + ["main[1]"] = "ce_crit[1]", + ["<input>[2]"] = "ce_crit[2]", + ["ce_crit[1]"] = "<output>[1]" + } + }}, + softmax_output = {{}, { + dim_in = {1320}, dim_out = {10092}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "main[1]", + ["main[1]"] = "softmax[1]", + ["softmax[1]"] = "<output>[1]" + } + }} + } + }, param_repo, gconf) + + return layer_repo +end + + +function get_network(layer_repo) + return layer_repo:get_layer("ce_output") +end + +function get_decode_network(layer_repo) + return layer_repo:get_layer("softmax_output") +end + +function get_global_transf(layer_repo) + return layer_repo:get_layer("global_transf") +end + + + +function make_readers(scp_file, layer_repo, feat_repo_shareid, data_mutex_shareid) + return { + {reader = nerv.TNetReader(gconf, + { + id = "main_scp", + scp_file = scp_file, + conf_file = gconf.htk_conf, + frm_ext = gconf.frm_ext, + mlfs = { + phone_state = { + file = "/sgfs/users/wd007/asr/baseline_chn_50h/finetune/finetune_baseline/ref.mlf", + format = "map", + format_arg = "/sgfs/users/wd007/asr/baseline_chn_50h/finetune/finetune_baseline/dict", + dir = "*/", + ext = "lab" + } + }, + global_transf = layer_repo:get_layer("global_transf") + }, feat_repo_shareid, data_mutex_shareid), + data = {main_scp = 1320, phone_state = 1}} + } +end + +function get_feat_id() + return {main_scp = true} +end + + +function make_buffer(readers) + return nerv.SGDBuffer(gconf, + { + buffer_size = gconf.buffer_size, + randomize = gconf.randomize, + readers = readers, + use_gpu = true + }) +end + +function get_input_order() + return {{id = "main_scp", global_transf = true}, + {id = "phone_state"}} +end + +function get_accuracy(layer_repo) + local ce_crit = layer_repo:get_layer("ce_crit") + return ce_crit.total_correct / ce_crit.total_frames * 100 +end + +function print_stat(layer_repo) + local ce_crit = layer_repo:get_layer("ce_crit") + nerv.info("*** training stat begin ***") + nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce) + nerv.printf("correct:\t\t%d\n", ce_crit.total_correct) + nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames) + nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames) + nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo)) + nerv.info("*** training stat end ***") +end + +function print_xent(xent) + local totalframes = xent:totalframes() + local loss = xent:loss() + local correct = xent:correct() + nerv.info_stderr("*** training statistics info begin ***") + nerv.info_stderr("total frames:\t\t%d", totalframes) + nerv.info_stderr("cross entropy:\t%.8f", loss/totalframes) + nerv.info_stderr("frame accuracy:\t%.3f%%", 100*correct/totalframes) + nerv.info_stderr("*** training statistics info end ***") +end + +function frame_acc(xent) + local correct = xent:correct() + local totalframes = xent:totalframes() + return string.format("%.3f", 100*correct/totalframes) +end + +function print_gconf() + nerv.info_stderr("%s \t:= %s", "network", gconf.initialized_param[1]) + nerv.info_stderr("%s \t:= %s", "transf", gconf.initialized_param[2]) + nerv.info_stderr("%s \t:= %s", "batch_size", gconf.batch_size) + nerv.info_stderr("%s \t:= %s", "buffer_size", gconf.buffer_size) + nerv.info_stderr("%s \t:= %s", "lrate", gconf.lrate) + nerv.info_stderr("%s \t:= %s", "tr_scp", gconf.tr_scp) + nerv.info_stderr("%s \t:= %s", "cv_scp", gconf.cv_scp) +end |