From 619cd9f2d27fa06dd6de774c8970c2eaa2f889c6 Mon Sep 17 00:00:00 2001 From: Yimmon Zhuang Date: Wed, 14 Oct 2015 15:33:48 +0800 Subject: support kaldi decoder --- nerv/examples/mpe_chime3.lua | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/nerv/examples/mpe_chime3.lua b/nerv/examples/mpe_chime3.lua index ec095b0..0615ddc 100644 --- a/nerv/examples/mpe_chime3.lua +++ b/nerv/examples/mpe_chime3.lua @@ -7,6 +7,8 @@ gconf = {lrate = 0.00001, wcost = 0, momentum = 0.0, tr_scp = "ark,s,cs:/slfs6/users/ymz09/kaldi/src/featbin/copy-feats scp:/slfs5/users/ymz09/chime/baseline/ASR/exp/tri4a_dnn_tr05_multi_enhanced_smbr/train.scp ark:- |", initialized_param = {"/slfs6/users/ymz09/nerv-project/nerv/nerv-speech/kaldi_seq/test/chime3_init.nerv", "/slfs6/users/ymz09/nerv-project/nerv/nerv-speech/kaldi_seq/test/chime3_global_transf.nerv"}, + decode_param = {"/slfs6/users/ymz09/nerv-project/test_mpe/1.nerv", + "/slfs6/users/ymz09/nerv-project/nerv/nerv-speech/kaldi_seq/test/chime3_global_transf.nerv"}, debug = false} function make_layer_repo(param_repo) @@ -125,13 +127,12 @@ function make_layer_repo(param_repo) ["mpe_crit[1]"] = "[1]" } }}, - softmax_output = {{}, { + decode_output = {{}, { dim_in = {440}, dim_out = {2011}, sub_layers = layer_repo, connections = { ["[1]"] = "main[1]", - ["main[1]"] = "softmax[1]", - ["softmax[1]"] = "[1]" + ["main[1]"] = "[1]" } }} } @@ -145,7 +146,7 @@ function get_network(layer_repo) end function get_decode_network(layer_repo) - return layer_repo:get_layer("softmax_output") + return layer_repo:get_layer("decode_output") end function get_global_transf(layer_repo) -- cgit v1.2.3 From 21a5f8e7e0c92fb8ba249a1784ac315bd3178855 Mon Sep 17 00:00:00 2001 From: Yimmon Zhuang Date: Mon, 23 Nov 2015 16:15:37 +0800 Subject: add need_key for sequence training --- nerv/examples/mmi_chime3.lua | 1 + nerv/examples/mpe_chime3.lua | 1 + 2 files changed, 2 insertions(+) diff --git a/nerv/examples/mmi_chime3.lua b/nerv/examples/mmi_chime3.lua index 6ac7f28..3daaafa 100644 --- a/nerv/examples/mmi_chime3.lua +++ b/nerv/examples/mmi_chime3.lua @@ -160,6 +160,7 @@ function make_readers(feature_rspecifier, layer_repo) feature_rspecifier = feature_rspecifier, frm_ext = gconf.frm_ext, global_transf = layer_repo:get_layer("global_transf"), + need_key = true, mlfs = {} }) } diff --git a/nerv/examples/mpe_chime3.lua b/nerv/examples/mpe_chime3.lua index 0615ddc..f9a2855 100644 --- a/nerv/examples/mpe_chime3.lua +++ b/nerv/examples/mpe_chime3.lua @@ -161,6 +161,7 @@ function make_readers(feature_rspecifier, layer_repo) feature_rspecifier = feature_rspecifier, frm_ext = gconf.frm_ext, global_transf = layer_repo:get_layer("global_transf"), + need_key = true, mlfs = {} }) } -- cgit v1.2.3 From bcead9a4063190d5573604e7ca1981368162d565 Mon Sep 17 00:00:00 2001 From: Determinant Date: Mon, 23 Nov 2015 17:01:00 +0800 Subject: use rearrange = true and frm_trim = 5 in settings and reproduce the results again --- nerv/examples/swb_baseline.lua | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nerv/examples/swb_baseline.lua b/nerv/examples/swb_baseline.lua index 8f72200..51052ba 100644 --- a/nerv/examples/swb_baseline.lua +++ b/nerv/examples/swb_baseline.lua @@ -2,9 +2,9 @@ require 'htk_io' gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.MMatrixFloat, - direct_update = true, + rearrange = true, -- just to make the context order consistent with old results, deprecated frm_ext = 5, - frm_trim = 5, + frm_trim = 5, -- trim the first and last 5 frames, TNet just does this, deprecated tr_scp = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp", htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", -- cgit v1.2.3 From 00c3f11361967a0f78fd770d20a2af3e9e7c1f50 Mon Sep 17 00:00:00 2001 From: txh18 Date: Thu, 10 Dec 2015 00:15:38 +0800 Subject: bilstm_v2 did not run well --- nerv/examples/lmptb/lm_trainer.lua | 2 +- nerv/tnn/tnn.lua | 13 ++++++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/nerv/examples/lmptb/lm_trainer.lua b/nerv/examples/lmptb/lm_trainer.lua index 3b8b5c3..ecedc9f 100644 --- a/nerv/examples/lmptb/lm_trainer.lua +++ b/nerv/examples/lmptb/lm_trainer.lua @@ -196,7 +196,6 @@ function LMTrainer.lm_process_file_birnn(global_conf, fn, tnn, do_train, p_conf) if r == false then break end - for t = 1, chunk_size do tnn.err_inputs_m[t][1]:fill(1) for i = 1, batch_size do @@ -269,6 +268,7 @@ function LMTrainer.lm_process_file_birnn(global_conf, fn, tnn, do_train, p_conf) collectgarbage("collect") + tnn:flush_all() --break --debug end diff --git a/nerv/tnn/tnn.lua b/nerv/tnn/tnn.lua index cf02123..bcfeb40 100644 --- a/nerv/tnn/tnn.lua +++ b/nerv/tnn/tnn.lua @@ -64,7 +64,7 @@ function TNN.make_initial_store(st, p, dim, batch_size, chunk_size, extend_t, gl if (type(st) ~= "table") then nerv.error("st should be a table") end - for i = 1 - extend_t - 1, chunk_size + extend_t + 1 do --intentionally allocated more time + for i = 1 - extend_t - 2, chunk_size + extend_t + 2 do --intentionally allocated more time if (st[i] == nil) then st[i] = {} end @@ -339,6 +339,11 @@ function TNN:net_propagate() --propagate according to feeds_now end local feeds_now = self.feeds_now + for t = 1, self.chunk_size do --some layer maybe do not have inputs from time 1..chunk_size + for id, ref in pairs(self.layers) do + self:propagate_dfs(ref, t) + end + end for t = 1, self.chunk_size do if (bit.band(feeds_now.flagsPack_now[t], nerv.TNN.FC.HAS_INPUT) > 0) then for i = 1, #self.dim_in do @@ -362,6 +367,7 @@ function TNN:net_propagate() --propagate according to feeds_now end end end + if (flag_out == false) then nerv.error("some thing wrong, some labeled output is not propagated") end @@ -458,6 +464,11 @@ function TNN:net_backpropagate(do_update) --propagate according to feeds_now end local feeds_now = self.feeds_now + for t = 1, self.chunk_size do --some layer maybe do not have outputs from time 1..chunk_size + for id, ref in pairs(self.layers) do + self:backpropagate_dfs(ref, t) + end + end for t = 1, self.chunk_size do if bit.band(feeds_now.flagsPack_now[t], nerv.TNN.FC.HAS_LABEL) > 0 then for i = 1, #self.dim_out do -- cgit v1.2.3 From 62169f73b935dd6df8fe0c5628beed58820d186e Mon Sep 17 00:00:00 2001 From: txh18 Date: Thu, 10 Dec 2015 00:16:26 +0800 Subject: ... --- nerv/examples/lmptb/bilstmlm_v2_ptb_main.lua | 522 +++++++++++++++++++++++++++ 1 file changed, 522 insertions(+) create mode 100644 nerv/examples/lmptb/bilstmlm_v2_ptb_main.lua diff --git a/nerv/examples/lmptb/bilstmlm_v2_ptb_main.lua b/nerv/examples/lmptb/bilstmlm_v2_ptb_main.lua new file mode 100644 index 0000000..4f52f29 --- /dev/null +++ b/nerv/examples/lmptb/bilstmlm_v2_ptb_main.lua @@ -0,0 +1,522 @@ +--[[ +The bilstm_v2 slightly changed the structure of tnn so that the current prediction won't have info about the very word to predict, so we should not get an amazingly low PPL +]]-- +require 'lmptb.lmvocab' +require 'lmptb.lmfeeder' +require 'lmptb.lmutil' +require 'lmptb.layer.init' +--require 'tnn.init' +require 'lmptb.lmseqreader' +require 'lm_trainer' + +--[[global function rename]]-- +--local printf = nerv.printf +local LMTrainer = nerv.LMTrainer +--[[global function rename ends]]-- + +--global_conf: table +--first_time: bool +--Returns: a ParamRepo +function prepare_parameters(global_conf, iter) + nerv.printf("%s preparing parameters...\n", global_conf.sche_log_pre) + + global_conf.paramRepo = nerv.ParamRepo() + local paramRepo = global_conf.paramRepo + + if iter == -1 then --first time + nerv.printf("%s first time, prepare some pre-set parameters, and leaving other parameters to auto-generation...\n", global_conf.sche_log_pre) + local f = nerv.ChunkFile(global_conf.param_fn .. '.0', 'w') + f:close() + --[[ + ltp_ih = nerv.LinearTransParam("ltp_ih", global_conf) + ltp_ih.trans = global_conf.cumat_type(global_conf.vocab:size(), global_conf.hidden_size) --index 0 is for zero, others correspond to vocab index(starting from 1) + ltp_ih.trans:generate(global_conf.param_random) + + ltp_hh = nerv.LinearTransParam("ltp_hh", global_conf) + ltp_hh.trans = global_conf.cumat_type(global_conf.hidden_size, global_conf.hidden_size) + ltp_hh.trans:generate(global_conf.param_random) + + --ltp_ho = nerv.LinearTransParam("ltp_ho", global_conf) + --ltp_ho.trans = global_conf.cumat_type(global_conf.hidden_size, global_conf.vocab:size()) + --ltp_ho.trans:generate(global_conf.param_random) + + bp_h = nerv.BiasParam("bp_h", global_conf) + bp_h.trans = global_conf.cumat_type(1, global_conf.hidden_size) + bp_h.trans:generate(global_conf.param_random) + + --bp_o = nerv.BiasParam("bp_o", global_conf) + --bp_o.trans = global_conf.cumat_type(1, global_conf.vocab:size()) + --bp_o.trans:generate(global_conf.param_random) + + local f = nerv.ChunkFile(global_conf.param_fn .. '.0', 'w') + f:write_chunk(ltp_ih) + f:write_chunk(ltp_hh) + --f:write_chunk(ltp_ho) + f:write_chunk(bp_h) + --f:write_chunk(bp_o) + f:close() + ]]-- + return nil + end + + nerv.printf("%s loading parameter from file %s...\n", global_conf.sche_log_pre, global_conf.param_fn .. '.' .. tostring(iter)) + paramRepo:import({global_conf.param_fn .. '.' .. tostring(iter)}, nil, global_conf) + + nerv.printf("%s preparing parameters end.\n", global_conf.sche_log_pre) + + return nil +end + +--global_conf: table +--Returns: nerv.LayerRepo +function prepare_layers(global_conf) + nerv.printf("%s preparing layers...\n", global_conf.sche_log_pre) + + local pr = global_conf.paramRepo + + local du = false + + local layers = { + ["nerv.LSTMLayerT"] = { + ["lstmFL1"] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["pr"] = pr}}, + ["lstmRL1"] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["pr"] = pr}}, + }, + + ["nerv.DropoutLayerT"] = { + ["dropoutL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}, + }, + + ["nerv.SelectLinearLayer"] = { + ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}, ["vocab"] = global_conf.vocab, ["pr"] = pr}}, + }, + + ["nerv.CombinerLayer"] = { + ["combinerXL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}}, + ["combinerHFL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}}, + ["combinerHRL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}}, + }, + + ["nerv.AffineLayer"] = { + ["biAffineL1"] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["pr"] = pr, ["lambda"] = {1, 1}}}, + ["outputL"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.vocab:size()}, ["direct_update"] = du, ["pr"] = pr}}, + }, + + ["nerv.TanhLayer"] = { + ["biTanhL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}, + }, + + ["nerv.SoftmaxCELayerT"] = { + ["softmaxL"] = {{}, {["dim_in"] = {global_conf.vocab:size(), global_conf.vocab:size()}, ["dim_out"] = {1}}}, + }, + } + + if global_conf.layer_num > 1 then + nerv.error("this script currently do not support more than one layer") + end + --[[ + for l = 2, global_conf.layer_num do + layers["nerv.DropoutLayerT"]["dropoutL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}} + layers["nerv.LSTMLayerT"]["lstmL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["pr"] = pr}} + layers["nerv.CombinerLayer"]["combinerL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}} + end + ]]-- + + local layerRepo = nerv.LayerRepo(layers, pr, global_conf) + nerv.printf("%s preparing layers end.\n", global_conf.sche_log_pre) + return layerRepo +end + +--global_conf: table +--layerRepo: nerv.LayerRepo +--Returns: a nerv.TNN +function prepare_tnn(global_conf, layerRepo) + nerv.printf("%s Generate and initing TNN ...\n", global_conf.sche_log_pre) + + --input: input_w, input_w, ... input_w_now, last_activation + local connections_t = { + {"[1]", "selectL1[1]", 0}, + + --{"selectL1[1]", "recurrentL1[1]", 0}, + --{"recurrentL1[1]", "sigmoidL1[1]", 0}, + --{"sigmoidL1[1]", "combinerL1[1]", 0}, + --{"combinerL1[1]", "recurrentL1[2]", 1}, + + {"selectL1[1]", "combinerXL1[1]", 0}, + {"combinerXL1[1]", "lstmFL1[1]", 0}, + {"lstmFL1[1]", "combinerHFL1[1]", 0}, + {"combinerHFL1[1]", "lstmFL1[2]", 1}, + {"lstmFL1[2]", "lstmFL1[3]", 1}, + {"combinerXL1[2]", "lstmRL1[1]", 0}, + {"lstmRL1[1]", "combinerHRL1[1]", 0}, + {"combinerHRL1[1]", "lstmRL1[2]", -1}, + {"lstmRL1[2]", "lstmRL1[3]", -1}, + {"combinerHFL1[2]", "biAffineL1[1]", 0}, + {"combinerHRL1[2]", "biAffineL1[2]", -2}, + {"biAffineL1[1]", "biTanhL1[1]", 0}, + {"biTanhL1[1]", "dropoutL1[1]", 0}, + + {"dropoutL"..global_conf.layer_num.."[1]", "outputL[1]", 0}, + {"outputL[1]", "softmaxL[1]", 0}, + {"[2]", "softmaxL[2]", 0}, + {"softmaxL[1]", "[1]", 0} + } + + --[[ + for l = 2, global_conf.layer_num do + table.insert(connections_t, {"dropoutL"..(l-1).."[1]", "lstmL"..l.."[1]", 0}) + table.insert(connections_t, {"lstmL"..l.."[2]", "lstmL"..l.."[3]", 1}) + table.insert(connections_t, {"lstmL"..l.."[1]", "combinerL"..l.."[1]", 0}) + table.insert(connections_t, {"combinerL"..l.."[1]", "lstmL"..l.."[2]", 1}) + table.insert(connections_t, {"combinerL"..l.."[2]", "dropoutL"..l.."[1]", 0}) + end + ]]-- + + --[[ + printf("%s printing DAG connections:\n", global_conf.sche_log_pre) + for key, value in pairs(connections_t) do + printf("\t%s->%s\n", key, value) + end + ]]-- + + local tnn = nerv.TNN("TNN", global_conf, {["dim_in"] = {1, global_conf.vocab:size()}, + ["dim_out"] = {1}, ["sub_layers"] = layerRepo, + ["connections"] = connections_t, ["clip_t"] = global_conf.clip_t, + }) + + tnn:init(global_conf.batch_size, global_conf.chunk_size) + + nerv.printf("%s Initing TNN end.\n", global_conf.sche_log_pre) + return tnn +end + +function load_net(global_conf, next_iter) + prepare_parameters(global_conf, next_iter) + local layerRepo = prepare_layers(global_conf) + local tnn = prepare_tnn(global_conf, layerRepo) + return tnn +end + +local train_fn, valid_fn, test_fn +global_conf = {} +local set = arg[1] --"test" + +if (set == "ptb") then + +root_dir = '/home/slhome/txh18/workspace' +data_dir = root_dir .. '/ptb/DATA' +train_fn = data_dir .. '/ptb.train.txt.adds' +valid_fn = data_dir .. '/ptb.valid.txt.adds' +test_fn = data_dir .. '/ptb.test.txt.adds' +vocab_fn = data_dir .. '/vocab' + +qdata_dir = root_dir .. '/ptb/questionGen/gen' + +global_conf = { + lrate = 0.015, wcost = 1e-5, momentum = 0, clip_t = 5, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + nn_act_default = 0, + + hidden_size = 300, + layer_num = 1, + chunk_size = 90, + batch_size = 20, + max_iter = 35, + lr_decay = 1.003, + decay_iter = 10, + param_random = function() return (math.random() / 5 - 0.1) end, + dropout_str = "0", + + train_fn = train_fn, + valid_fn = valid_fn, + test_fn = test_fn, + vocab_fn = vocab_fn, + max_sen_len = 90, + sche_log_pre = "[SCHEDULER]:", + log_w_num = 40000, --give a message when log_w_num words have been processed + timer = nerv.Timer(), + work_dir_base = '/home/slhome/txh18/workspace/ptb/EXP-nerv/bilstmlm_v2.0' +} + +elseif (set == "msr_sc") then + +data_dir = '/home/slhome/txh18/workspace/sentenceCompletion/DATA_PV2' +train_fn = data_dir .. '/normed_all.sf.len60.adds.train' +valid_fn = data_dir .. '/normed_all.sf.len60.adds.dev' +test_fn = data_dir .. '/answer_normed.adds' +vocab_fn = data_dir .. '/normed_all.choose.vocab30000.addqvocab' + +global_conf = { + lrate = 1, wcost = 1e-6, momentum = 0, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + nn_act_default = 0, + + hidden_size = 300, + layer_num = 1, + chunk_size = 15, + batch_size = 10, + max_iter = 30, + decay_iter = 10, + lr_decay = 1.003, + param_random = function() return (math.random() / 5 - 0.1) end, + dropout_str = "0", + + train_fn = train_fn, + valid_fn = valid_fn, + test_fn = test_fn, + vocab_fn = vocab_fn, + sche_log_pre = "[SCHEDULER]:", + log_w_num = 400000, --give a message when log_w_num words have been processed + timer = nerv.Timer(), + work_dir_base = '/home/slhome/txh18/workspace/sentenceCompletion/EXP-Nerv/rnnlm_test' +} + +elseif (set == "twitter") then + +root_dir = '/home/slhome/txh18/workspace' +data_dir = root_dir .. '/twitter_new/DATA' +train_fn = data_dir .. '/twitter.choose.adds' +valid_fn = data_dir .. '/twitter.valid.adds' +test_fn = data_dir .. '/comm.test.choose-ppl.adds' +vocab_fn = data_dir .. '/twitter.choose.train.vocab' + +--qdata_dir = root_dir .. '/ptb/questionGen/gen' + +global_conf = { + lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + nn_act_default = 0, + + hidden_size = 300, + layer_num = 1, + chunk_size = 15, + batch_size = 20, + max_iter = 35, + lr_decay = 1.003, + decay_iter = 10, + param_random = function() return (math.random() / 5 - 0.1) end, + dropout_str = "0", + + train_fn = train_fn, + valid_fn = valid_fn, + test_fn = test_fn, + vocab_fn = vocab_fn, + max_sen_len = 90, + sche_log_pre = "[SCHEDULER]:", + log_w_num = 40000, --give a message when log_w_num words have been processed + timer = nerv.Timer(), + work_dir_base = root_dir .. '/twitter_new/EXP-nerv/bilstmlm_v1.0' +} + +else + +valid_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text' +train_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text' +test_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text' +vocab_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text' + +global_conf = { + lrate = 0.01, wcost = 1e-5, momentum = 0, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + nn_act_default = 0, + + hidden_size = 20, + layer_num = 1, + chunk_size = 20, + batch_size = 10, + max_iter = 2, + param_random = function() return (math.random() / 5 - 0.1) end, + dropout_str = "0", + + train_fn = train_fn, + valid_fn = valid_fn, + test_fn = test_fn, + max_sen_len = 80, + lr_decay = 1.003, + decay_iter = 10, + vocab_fn = vocab_fn, + sche_log_pre = "[SCHEDULER]:", + log_w_num = 10, --give a message when log_w_num words have been processed + timer = nerv.Timer(), + work_dir_base = '/home/slhome/txh18/workspace/nerv/play/testEXP/tnn_bilstmlm_test' +} + +end + +lr_half = false --can not be local, to be set by loadstring +start_iter = -1 +ppl_last = 100000 +commands_str = "train:test" +commands = {} +test_iter = -1 +start_lr = nil + +--for testout(question) +q_file = "/home/slhome/txh18/workspace/ptb/questionGen/gen/ptb.test.txt.q10rs1_Msss.adds" + +if arg[2] ~= nil then + nerv.printf("%s applying arg[2](%s)...\n", global_conf.sche_log_pre, arg[2]) + loadstring(arg[2])() + nerv.LMUtil.wait(0.5) +else + nerv.printf("%s no user setting, all default...\n", global_conf.sche_log_pre) +end + + +global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'l' .. global_conf.layer_num --.. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate .. 'wc' .. global_conf.wcost .. 'dr' .. global_conf.dropout_str +global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf' +global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak' +global_conf.param_fn = global_conf.work_dir .. "/params" +global_conf.dropout_list = nerv.SUtil.parse_schedule(global_conf.dropout_str) +global_conf.log_fn = global_conf.work_dir .. '/log_lstm_tnn_' .. commands_str ..os.date("_TT%m_%d_%X",os.time()) +global_conf.log_fn, _ = string.gsub(global_conf.log_fn, ':', '-') +commands = nerv.SUtil.parse_commands_set(commands_str) +if start_lr ~= nil then + global_conf.lrate = start_lr --starting lr can be set by user(arg[2]) +end + +nerv.printf("%s creating work_dir(%s)...\n", global_conf.sche_log_pre, global_conf.work_dir) +nerv.LMUtil.wait(2) +os.execute("mkdir -p "..global_conf.work_dir) +os.execute("cp " .. global_conf.train_fn .. " " .. global_conf.train_fn_shuf) + +--redirecting log outputs! +nerv.SUtil.log_redirect(global_conf.log_fn) +nerv.LMUtil.wait(2) + +----------------printing options--------------------------------- +nerv.printf("%s printing global_conf...\n", global_conf.sche_log_pre) +for id, value in pairs(global_conf) do + nerv.printf("%s:\t%s\n", id, tostring(value)) +end +nerv.LMUtil.wait(2) + +nerv.printf("%s printing training scheduling options...\n", global_conf.sche_log_pre) +nerv.printf("lr_half:\t%s\n", tostring(lr_half)) +nerv.printf("start_iter:\t%s\n", tostring(start_iter)) +nerv.printf("ppl_last:\t%s\n", tostring(ppl_last)) +nerv.printf("commands_str:\t%s\n", commands_str) +nerv.printf("test_iter:\t%s\n", tostring(test_iter)) +nerv.printf("%s printing training scheduling end.\n", global_conf.sche_log_pre) +nerv.LMUtil.wait(2) +------------------printing options end------------------------------ + +math.randomseed(1) + +local vocab = nerv.LMVocab() +global_conf["vocab"] = vocab +nerv.printf("%s building vocab...\n", global_conf.sche_log_pre) +global_conf.vocab:build_file(global_conf.vocab_fn, false) +ppl_rec = {} + +local final_iter = -1 +if commands["train"] == 1 then + if start_iter == -1 then + prepare_parameters(global_conf, -1) --write pre_generated params to param.0 file + end + + if start_iter == -1 or start_iter == 0 then + nerv.printf("===INITIAL VALIDATION===\n") + local tnn = load_net(global_conf, 0) + global_conf.paramRepo = tnn:get_params() --get auto-generted params + global_conf.paramRepo:export(global_conf.param_fn .. '.0', nil) --some parameters are auto-generated, saved again to param.0 file + global_conf.dropout_rate = 0 + local result = LMTrainer.lm_process_file_birnn(global_conf, global_conf.valid_fn, tnn, false) --false update! + nerv.LMUtil.wait(1) + ppl_rec[0] = {} + ppl_rec[0].valid = result:ppl_all("birnn") + ppl_last = ppl_rec[0].valid + ppl_rec[0].train = 0 + ppl_rec[0].test = 0 + ppl_rec[0].lr = 0 + + start_iter = 1 + + nerv.printf("\n") + end + + for iter = start_iter, global_conf.max_iter, 1 do + final_iter = iter --for final testing + global_conf.sche_log_pre = "[SCHEDULER ITER"..iter.." LR"..global_conf.lrate.."]:" + tnn = load_net(global_conf, iter - 1) + nerv.printf("===ITERATION %d LR %f===\n", iter, global_conf.lrate) + global_conf.dropout_rate = nerv.SUtil.sche_get(global_conf.dropout_list, iter) + result = LMTrainer.lm_process_file_birnn(global_conf, global_conf.train_fn_shuf, tnn, true) --true update! + global_conf.dropout_rate = 0 + ppl_rec[iter] = {} + ppl_rec[iter].train = result:ppl_all("birnn") + --shuffling training file + nerv.printf("%s shuffling training file\n", global_conf.sche_log_pre) + os.execute('cp ' .. global_conf.train_fn_shuf .. ' ' .. global_conf.train_fn_shuf_bak) + os.execute('cat ' .. global_conf.train_fn_shuf_bak .. ' | sort -R --random-source=/dev/zero > ' .. global_conf.train_fn_shuf) + nerv.printf("===PEEK ON TEST %d===\n", iter) + result = LMTrainer.lm_process_file_birnn(global_conf, global_conf.test_fn, tnn, false) --false update! + ppl_rec[iter].test = result:ppl_all("birnn") + nerv.printf("===VALIDATION %d===\n", iter) + result = LMTrainer.lm_process_file_birnn(global_conf, global_conf.valid_fn, tnn, false) --false update! + ppl_rec[iter].valid = result:ppl_all("birnn") + ppl_rec[iter].lr = global_conf.lrate + if ((ppl_last / ppl_rec[iter].valid < global_conf.lr_decay or lr_half == true) and iter > global_conf.decay_iter) then + global_conf.lrate = (global_conf.lrate * 0.6) + end + if ppl_rec[iter].valid < ppl_last then + nerv.printf("%s PPL improves, saving net to file %s.%d...\n", global_conf.sche_log_pre, global_conf.param_fn, iter) + global_conf.paramRepo:export(global_conf.param_fn .. '.' .. tostring(iter), nil) + else + nerv.printf("%s PPL did not improve, rejected, copying param file of last iter...\n", global_conf.sche_log_pre) + os.execute('cp ' .. global_conf.param_fn..'.'..tostring(iter - 1) .. ' ' .. global_conf.param_fn..'.'..tostring(iter)) + end + if ppl_last / ppl_rec[iter].valid < global_conf.lr_decay or lr_half == true then + lr_half = true + end + if ppl_rec[iter].valid < ppl_last then + ppl_last = ppl_rec[iter].valid + end + nerv.printf("\n") + nerv.LMUtil.wait(2) + end + nerv.info("saving final nn to param.final") + os.execute('cp ' .. global_conf.param_fn .. '.' .. tostring(final_iter) .. ' ' .. global_conf.param_fn .. '.final') + + nerv.printf("===VALIDATION PPL record===\n") + for i, _ in pairs(ppl_rec) do + nerv.printf(" \n", i, ppl_rec[i].lr, ppl_rec[i].train, ppl_rec[i].valid, ppl_rec[i].test) + end + nerv.printf("\n") +end --if commands["train"] + +if commands["test"] == 1 then + nerv.printf("===FINAL TEST===\n") + global_conf.sche_log_pre = "[SCHEDULER FINAL_TEST]:" + if final_iter ~= -1 and test_iter == -1 then + test_iter = final_iter + end + if test_iter == -1 then + test_iter = "final" + end + tnn = load_net(global_conf, test_iter) + global_conf.dropout_rate = 0 + LMTrainer.lm_process_file_birnn(global_conf, global_conf.test_fn, tnn, false) --false update! +end --if commands["test"] + +if commands["testout"] == 1 then + nerv.printf("===TEST OUT===\n") + nerv.printf("q_file:\t%s\n", q_file) + local q_fn = q_file --qdata_dir .. '/' .. q_file + global_conf.sche_log_pre = "[SCHEDULER FINAL_TEST]:" + if final_iter ~= -1 and test_iter == -1 then + test_iter = final_iter + end + if test_iter == -1 then + test_iter = "final" + end + tnn = load_net(global_conf, test_iter) + global_conf.dropout_rate = 0 + LMTrainer.lm_process_file_birnn(global_conf, q_fn, tnn, false, + {["one_sen_report"] = true}) --false update! +end --if commands["testout"] + + -- cgit v1.2.3 From 91075c34160fa24e484148b26c1178e05c2212a4 Mon Sep 17 00:00:00 2001 From: txh18 Date: Thu, 10 Dec 2015 13:28:13 +0800 Subject: bug fix for recent changes in tnn --- nerv/examples/lmptb/lmptb/layer/select_linear.lua | 2 +- nerv/tnn/tnn.lua | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/nerv/examples/lmptb/lmptb/layer/select_linear.lua b/nerv/examples/lmptb/lmptb/layer/select_linear.lua index 580b9c5..431ef3a 100644 --- a/nerv/examples/lmptb/lmptb/layer/select_linear.lua +++ b/nerv/examples/lmptb/lmptb/layer/select_linear.lua @@ -30,7 +30,7 @@ function SL:init(batch_size) end function SL:update(bp_err, input, output) - --use this to produce reproducable result + --use this to produce reproducable result, don't forget to set the dropout to zero! --for i = 1, input[1]:nrow(), 1 do -- local word_vec = self.ltp.trans[input[1][i - 1][0]] -- word_vec:add(word_vec, bp_err[1][i - 1], 1, - self.gconf.lrate / self.gconf.batch_size) diff --git a/nerv/tnn/tnn.lua b/nerv/tnn/tnn.lua index bcfeb40..7ae3172 100644 --- a/nerv/tnn/tnn.lua +++ b/nerv/tnn/tnn.lua @@ -466,7 +466,7 @@ function TNN:net_backpropagate(do_update) --propagate according to feeds_now local feeds_now = self.feeds_now for t = 1, self.chunk_size do --some layer maybe do not have outputs from time 1..chunk_size for id, ref in pairs(self.layers) do - self:backpropagate_dfs(ref, t) + self:backpropagate_dfs(ref, t, do_update) end end for t = 1, self.chunk_size do @@ -500,6 +500,9 @@ end --ref: the TNN_ref of a layer --t: the current time to propagate function TNN:backpropagate_dfs(ref, t, do_update) + if do_update == nil then + nerv.error("got a nil do_update") + end if self:out_of_feedrange(t) then return end -- cgit v1.2.3 From 5cf7e88df3aa4cf60819e955f0f537d2cfeccaac Mon Sep 17 00:00:00 2001 From: txh18 Date: Thu, 10 Dec 2015 17:20:31 +0800 Subject: removed flush_all for every mb in process_birnn --- nerv/examples/lmptb/lm_trainer.lua | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nerv/examples/lmptb/lm_trainer.lua b/nerv/examples/lmptb/lm_trainer.lua index ecedc9f..eab6e2d 100644 --- a/nerv/examples/lmptb/lm_trainer.lua +++ b/nerv/examples/lmptb/lm_trainer.lua @@ -246,6 +246,8 @@ function LMTrainer.lm_process_file_birnn(global_conf, fn, tnn, do_train, p_conf) --tnn:move_right_to_nextmb({0}) --do not need history for bi directional model global_conf.timer:toc('tnn_afterprocess') + --tnn:flush_all() --you need this for bilstmlm_ptb_v2, because it has connection across 2 time steps + global_conf.timer:toc('most_out_loop_lmprocessfile') --print log @@ -268,7 +270,6 @@ function LMTrainer.lm_process_file_birnn(global_conf, fn, tnn, do_train, p_conf) collectgarbage("collect") - tnn:flush_all() --break --debug end -- cgit v1.2.3 From 1b0f2ab768e34f126ce946e1689cd47c86f41645 Mon Sep 17 00:00:00 2001 From: Determinant Date: Thu, 10 Dec 2015 21:46:11 +0800 Subject: fix the bug in freeing submatrix; add `offset` to Matrix structure --- nerv/lib/matrix/cukernel.cu | 1 + nerv/lib/matrix/cumatrix.c | 1 + nerv/lib/matrix/generic/cumatrix.c | 2 +- nerv/lib/matrix/generic/elem_type.h | 13 ++++++++++--- nerv/lib/matrix/generic/matrix.c | 9 +++++---- nerv/lib/matrix/matrix.h | 1 + nerv/lib/matrix/mmatrix.c | 5 ++++- nerv/matrix/cumatrix.c | 1 + nerv/matrix/generic/cumatrix.c | 2 +- nerv/matrix/generic/elem_type.h | 22 ---------------------- nerv/matrix/generic/mmatrix.c | 2 +- nerv/matrix/mmatrix.c | 2 ++ 12 files changed, 28 insertions(+), 33 deletions(-) delete mode 100644 nerv/matrix/generic/elem_type.h diff --git a/nerv/lib/matrix/cukernel.cu b/nerv/lib/matrix/cukernel.cu index 210e6bf..c20e538 100644 --- a/nerv/lib/matrix/cukernel.cu +++ b/nerv/lib/matrix/cukernel.cu @@ -44,6 +44,7 @@ __device__ float atomicAdd_nvidia(float* address, float val) { #undef MATRIX_USE_FLOAT #undef MATRIX_ELEM #undef MATRIX_ELEM_PTR +#undef MATRIX_ELEM_PTR_BASE #undef MATRIX_ELEM_FMT #undef MATRIX_ELEM_WRITE_FMT diff --git a/nerv/lib/matrix/cumatrix.c b/nerv/lib/matrix/cumatrix.c index ff1168d..a8ed075 100644 --- a/nerv/lib/matrix/cumatrix.c +++ b/nerv/lib/matrix/cumatrix.c @@ -57,6 +57,7 @@ void nerv_cumatrix_init() { #undef MATRIX_USE_FLOAT #undef MATRIX_ELEM #undef MATRIX_ELEM_PTR +#undef MATRIX_ELEM_PTR_BASE #undef MATRIX_ELEM_FMT #undef MATRIX_ELEM_WRITE_FMT #undef MATRIX_CUMATRIX_HOST_TNAME diff --git a/nerv/lib/matrix/generic/cumatrix.c b/nerv/lib/matrix/generic/cumatrix.c index 00af895..7643c01 100644 --- a/nerv/lib/matrix/generic/cumatrix.c +++ b/nerv/lib/matrix/generic/cumatrix.c @@ -315,7 +315,7 @@ void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b, long nrow = a->nrow; if (!(0 <= b_begin && b_begin + nrow <= idx->ncol)) NERV_EXIT_STATUS(status, MAT_INVALID_COPY_INTERVAL, 0); - float *idx_ptr = idx->data.f; + float *idx_ptr = MATRIX_ELEM_PTR_F(idx); int i; if (idx->nrow != 1) NERV_EXIT_STATUS(status, MAT_IDX_VECTOR_EXP, 0); diff --git a/nerv/lib/matrix/generic/elem_type.h b/nerv/lib/matrix/generic/elem_type.h index bffe940..07f6355 100644 --- a/nerv/lib/matrix/generic/elem_type.h +++ b/nerv/lib/matrix/generic/elem_type.h @@ -1,22 +1,29 @@ +#define MATRIX_ELEM_PTR_F(self) ((float *)((char *)((self)->data.f) + (self)->offset)) +#define MATRIX_ELEM_PTR_D(self) ((double *)((char *)((self)->data.d) + (self)->offset)) +#define MATRIX_ELEM_PTR_I(self) ((long *)((char *)((self)->data.i) + (self)->offset)) + #ifdef MATRIX_USE_FLOAT #define MATRIX_ELEM float #define MATRIX_ELEM_FMT "%f" #define MATRIX_ELEM_WRITE_FMT "%.8f" -#define MATRIX_ELEM_PTR(self) ((self)->data.f) +#define MATRIX_ELEM_PTR(self) MATRIX_ELEM_PTR_F(self) +#define MATRIX_ELEM_PTR_BASE(self) ((self)->data.f) #elif defined(MATRIX_USE_DOUBLE) #define MATRIX_ELEM double #define MATRIX_ELEM_FMT "%lf" #define MATRIX_ELEM_WRITE_FMT "%.8lf" -#define MATRIX_ELEM_PTR(self) ((self)->data.d) +#define MATRIX_ELEM_PTR(self) MATRIX_ELEM_PTR_D(self) +#define MATRIX_ELEM_PTR_BASE(self) ((self)->data.d) #elif defined(MATRIX_USE_INT) #define MATRIX_ELEM long #define MATRIX_ELEM_FMT "%ld" #define MATRIX_ELEM_WRITE_FMT "%ld" -#define MATRIX_ELEM_PTR(self) ((self)->data.i) +#define MATRIX_ELEM_PTR(self) MATRIX_ELEM_PTR_I(self) +#define MATRIX_ELEM_PTR_BASE(self) ((self)->data.i) #endif diff --git a/nerv/lib/matrix/generic/matrix.c b/nerv/lib/matrix/generic/matrix.c index 4246751..998d107 100644 --- a/nerv/lib/matrix/generic/matrix.c +++ b/nerv/lib/matrix/generic/matrix.c @@ -4,12 +4,11 @@ /* FIXME: malloc failure detection */ void nerv_matrix_(data_free)(Matrix *self, Status *status) { - if(*self->data_ref == 0) return; /* FIXME: repeat free memory */ assert(*self->data_ref > 0); if (--(*self->data_ref) == 0) { /* free matrix data */ - MATRIX_DATA_FREE(MATRIX_ELEM_PTR(self), status); + MATRIX_DATA_FREE(MATRIX_ELEM_PTR_BASE(self), status); free(self->data_ref); free(self); } @@ -29,7 +28,7 @@ Matrix *nerv_matrix_(create)(long nrow, long ncol, Status *status) { self->ncol = ncol; self->nmax = self->nrow * self->ncol; self->dim = 2; - MATRIX_DATA_ALLOC(&MATRIX_ELEM_PTR(self), &self->stride, + MATRIX_DATA_ALLOC(&MATRIX_ELEM_PTR_BASE(self), &self->stride, sizeof(MATRIX_ELEM) * self->ncol, self->nrow, status); if (status->err_code != NERV_NORMAL) @@ -39,6 +38,7 @@ Matrix *nerv_matrix_(create)(long nrow, long ncol, Status *status) { } self->data_ref = (long *)malloc(sizeof(long)); *self->data_ref = 0; + self->offset = 0; nerv_matrix_(data_retain)(self); NERV_SET_STATUS(status, NERV_NORMAL, 0); return self; @@ -55,8 +55,9 @@ Matrix *nerv_matrix_(getrow)(Matrix *self, int row) { prow->dim = 1; prow->stride = self->stride; prow->nmax = prow->ncol; - MATRIX_ELEM_PTR(prow) = MATRIX_ROW_PTR(self, row); + prow->data = self->data; prow->data_ref = self->data_ref; + prow->offset = row * self->stride; nerv_matrix_(data_retain)(prow); return prow; } diff --git a/nerv/lib/matrix/matrix.h b/nerv/lib/matrix/matrix.h index 67a6e30..073bd13 100644 --- a/nerv/lib/matrix/matrix.h +++ b/nerv/lib/matrix/matrix.h @@ -12,6 +12,7 @@ typedef struct Matrix { double *d; long *i; } data; /* pointer to actual storage */ + unsigned long offset; /* the actual beginning of the matrix */ long *data_ref; } Matrix; diff --git a/nerv/lib/matrix/mmatrix.c b/nerv/lib/matrix/mmatrix.c index b8157eb..b5670f2 100644 --- a/nerv/lib/matrix/mmatrix.c +++ b/nerv/lib/matrix/mmatrix.c @@ -6,6 +6,7 @@ #define host_matrix_(NAME) host_matrix_float_##NAME #define nerv_matrix_(NAME) nerv_matrix_host_float_##NAME #include "generic/matrix.h" +#include "generic/elem_type.h" #include "generic/mmatrix.c" Matrix *nerv_matrix_(perm_gen)(int ncol, Status *status) { @@ -13,7 +14,7 @@ Matrix *nerv_matrix_(perm_gen)(int ncol, Status *status) { Matrix *self = nerv_matrix_(create)(1, ncol, status); if (status->err_code != NERV_NORMAL) return NULL; - float *prow = self->data.f; + float *prow = MATRIX_ELEM_PTR_F(self); for (i = 0; i < ncol; i++) prow[i] = i; for (i = ncol - 1; i >= 0; i--) @@ -31,6 +32,7 @@ Matrix *nerv_matrix_(perm_gen)(int ncol, Status *status) { #undef MATRIX_USE_FLOAT #undef MATRIX_ELEM #undef MATRIX_ELEM_PTR +#undef MATRIX_ELEM_PTR_BASE #undef MATRIX_ELEM_FMT #undef MATRIX_ELEM_WRITE_FMT @@ -44,6 +46,7 @@ Matrix *nerv_matrix_(perm_gen)(int ncol, Status *status) { #undef MATRIX_USE_DOUBLE #undef MATRIX_ELEM #undef MATRIX_ELEM_PTR +#undef MATRIX_ELEM_PTR_BASE #undef MATRIX_ELEM_FMT #undef MATRIX_ELEM_WRITE_FMT diff --git a/nerv/matrix/cumatrix.c b/nerv/matrix/cumatrix.c index fef03fc..bf92f92 100644 --- a/nerv/matrix/cumatrix.c +++ b/nerv/matrix/cumatrix.c @@ -49,6 +49,7 @@ const char *nerv_matrix_(tname) = "nerv.CuMatrixFloat"; #undef MATRIX_USE_FLOAT #undef MATRIX_ELEM #undef MATRIX_ELEM_PTR +#undef MATRIX_ELEM_PTR_BASE #undef MATRIX_ELEM_FMT #undef MATRIX_ELEM_WRITE_FMT #undef MATRIX_CUMATRIX_HOST_TNAME diff --git a/nerv/matrix/generic/cumatrix.c b/nerv/matrix/generic/cumatrix.c index e1519b0..be3d627 100644 --- a/nerv/matrix/generic/cumatrix.c +++ b/nerv/matrix/generic/cumatrix.c @@ -1,5 +1,5 @@ #ifdef NERV_GENERIC_CUMATRIX -#include "elem_type.h" +#include "../../lib/matrix/generic/elem_type.h" #define MATRIX_DATA_WRITE(L, data, idx, val) cuda_matrix_(write)(L, data, idx, val) #define MATRIX_DATA_READ(L, data, idx) cuda_matrix_(read)(L, data, idx) #define MATRIX_INIT(L) cuda_matrix_(init)(L) diff --git a/nerv/matrix/generic/elem_type.h b/nerv/matrix/generic/elem_type.h deleted file mode 100644 index bffe940..0000000 --- a/nerv/matrix/generic/elem_type.h +++ /dev/null @@ -1,22 +0,0 @@ -#ifdef MATRIX_USE_FLOAT - -#define MATRIX_ELEM float -#define MATRIX_ELEM_FMT "%f" -#define MATRIX_ELEM_WRITE_FMT "%.8f" -#define MATRIX_ELEM_PTR(self) ((self)->data.f) - -#elif defined(MATRIX_USE_DOUBLE) - -#define MATRIX_ELEM double -#define MATRIX_ELEM_FMT "%lf" -#define MATRIX_ELEM_WRITE_FMT "%.8lf" -#define MATRIX_ELEM_PTR(self) ((self)->data.d) - -#elif defined(MATRIX_USE_INT) - -#define MATRIX_ELEM long -#define MATRIX_ELEM_FMT "%ld" -#define MATRIX_ELEM_WRITE_FMT "%ld" -#define MATRIX_ELEM_PTR(self) ((self)->data.i) - -#endif diff --git a/nerv/matrix/generic/mmatrix.c b/nerv/matrix/generic/mmatrix.c index 01dd9e5..a4e8489 100644 --- a/nerv/matrix/generic/mmatrix.c +++ b/nerv/matrix/generic/mmatrix.c @@ -1,6 +1,6 @@ #ifdef NERV_GENERIC_MMATRIX #include "../../lib/matrix/generic/matrix.h" -#include "elem_type.h" +#include "../../lib/matrix/generic/elem_type.h" #define MATRIX_DATA_WRITE(L, data, idx, val) (data[idx] = val) #define MATRIX_DATA_READ(L, data, idx) (data[idx]) #define MATRIX_INIT(L) host_matrix_(init)(L) diff --git a/nerv/matrix/mmatrix.c b/nerv/matrix/mmatrix.c index 961059c..20c31d6 100644 --- a/nerv/matrix/mmatrix.c +++ b/nerv/matrix/mmatrix.c @@ -45,6 +45,7 @@ static const luaL_Reg nerv_matrix_(extra_methods_int)[] = { #undef MATRIX_USE_FLOAT #undef MATRIX_ELEM #undef MATRIX_ELEM_PTR +#undef MATRIX_ELEM_PTR_BASE #undef MATRIX_ELEM_FMT #undef MATRIX_ELEM_WRITE_FMT #undef MMATRIX_INIT @@ -60,6 +61,7 @@ const char *nerv_matrix_(tname) = "nerv.MMatrixDouble"; #undef MATRIX_USE_DOUBLE #undef MATRIX_ELEM #undef MATRIX_ELEM_PTR +#undef MATRIX_ELEM_PTR_BASE #undef MATRIX_ELEM_FMT #undef MATRIX_ELEM_WRITE_FMT -- cgit v1.2.3 From 7fd5c2c8672c8ac75348e2d51f56a72b5fd21b7b Mon Sep 17 00:00:00 2001 From: txh18 Date: Thu, 10 Dec 2015 23:13:29 +0800 Subject: ... --- nerv/examples/lmptb/bilstmlm_ptb_main.lua | 6 ++++-- nerv/examples/lmptb/lstmlm_ptb_main.lua | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/nerv/examples/lmptb/bilstmlm_ptb_main.lua b/nerv/examples/lmptb/bilstmlm_ptb_main.lua index 0472588..e88eea2 100644 --- a/nerv/examples/lmptb/bilstmlm_ptb_main.lua +++ b/nerv/examples/lmptb/bilstmlm_ptb_main.lua @@ -345,7 +345,7 @@ end lr_half = false --can not be local, to be set by loadstring start_iter = -1 -start_lr = global_conf.lrate +start_lr = nil ppl_last = 100000 commands_str = "train:test" commands = {} @@ -371,7 +371,9 @@ global_conf.log_fn = global_conf.work_dir .. '/log_lstm_tnn_' .. commands_str .. global_conf.log_fn, _ = string.gsub(global_conf.log_fn, ':', '-') commands = nerv.SUtil.parse_commands_set(commands_str) -global_conf.lrate = start_lr --starting lr can be set by user(arg[2]) +if start_lr ~= nil then + global_conf.lrate = start_lr --starting lr can be set by user(arg[2]) +end nerv.printf("%s creating work_dir(%s)...\n", global_conf.sche_log_pre, global_conf.work_dir) nerv.LMUtil.wait(2) diff --git a/nerv/examples/lmptb/lstmlm_ptb_main.lua b/nerv/examples/lmptb/lstmlm_ptb_main.lua index 6e3fab9..9bdd5ff 100644 --- a/nerv/examples/lmptb/lstmlm_ptb_main.lua +++ b/nerv/examples/lmptb/lstmlm_ptb_main.lua @@ -333,7 +333,7 @@ end lr_half = false --can not be local, to be set by loadstring start_iter = -1 -start_lr = global_conf.lrate +start_lr = nil ppl_last = 100000 commands_str = "train:test" commands = {} @@ -358,7 +358,9 @@ global_conf.log_fn = global_conf.work_dir .. '/log_lstm_tnn_' .. commands_str .. global_conf.log_fn, _ = string.gsub(global_conf.log_fn, ':', '-') commands = nerv.SUtil.parse_commands_set(commands_str) -global_conf.lrate = start_lr +if start_lr ~= nil then + global_conf.lrate = start_lr +end nerv.printf("%s creating work_dir(%s)...\n", global_conf.sche_log_pre, global_conf.work_dir) nerv.LMUtil.wait(2) -- cgit v1.2.3 From 28bb2edd5ee81688f245cc89f872150db1e01e44 Mon Sep 17 00:00:00 2001 From: txh18 Date: Fri, 11 Dec 2015 21:43:55 +0800 Subject: bug fix: added check in the select_linear kernel --- nerv/lib/matrix/generic/cukernel.cu | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/nerv/lib/matrix/generic/cukernel.cu b/nerv/lib/matrix/generic/cukernel.cu index aa830b5..552f7a4 100644 --- a/nerv/lib/matrix/generic/cukernel.cu +++ b/nerv/lib/matrix/generic/cukernel.cu @@ -263,11 +263,14 @@ __global__ void cudak_(clip)(MATRIX_ELEM *a, #ifdef __NERV_FUTURE_CUDA_7 __global__ void cudak_(update_select_rows)(MATRIX_ELEM *c, const MATRIX_ELEM *a, const MATRIX_ELEM *idx, - int nrow_a, int ncol_a, int stride_c, int stride_a, double alpha, double beta) { + int nrow_a, int ncol_a, int nrow_c, int stride_c, int stride_a, double alpha, double beta) { int j = blockIdx.x * blockDim.x + threadIdx.x; int i = blockIdx.y * blockDim.y + threadIdx.y; if (i >= nrow_a || j >= ncol_a) return; int i_c = lrintf(idx[i]); + if (i_c < 0 || i_c >= nrow_c) { + printf("ERROR inside kernel update_select_rows, i_c(%d) out of range!", i_c); + } //critical: i_c could conflict among threads(same index in the idx array), so atomicAdd is used //c[j + i_c * stride_c] = c[j + i_c * stride_c] * (1 - beta * alpha) + a[j + i * stride_a] * alpha; atomicAdd_nvidia(c + j + i_c * stride_c, c[j + i_c * stride_c] * (- beta * alpha) + a[j + i * stride_a] * alpha); @@ -335,11 +338,15 @@ __global__ void cudak_(gen_col_idx)(MATRIX_ELEM *b, __global__ void cudak_(copy_rows_by_idx)(const MATRIX_ELEM *a, MATRIX_ELEM *b, const MATRIX_ELEM *idx, - int nrow, int ncol, int stride) { + int nrow, int ncol, int a_nrow, int stride) { int j = blockIdx.x * blockDim.x + threadIdx.x; int i = blockIdx.y * blockDim.y + threadIdx.y; if (i >= nrow || j >= ncol) return; - b[j + i * stride] = a[j + lrintf(idx[i]) * stride]; + int k = lrintf(idx[i]); + if (k < 0 || k >= a_nrow) { + printf("error in kernel copy_rows_by_idx k(%d) out of range\n", k); + } + b[j + i * stride] = a[j + k * stride]; } extern "C" { @@ -639,7 +646,7 @@ extern "C" { CEIL_DIV(a->nrow, threadsPerBlock.y)); cudak_(update_select_rows)<<>> \ (MATRIX_ELEM_PTR(c), MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(idx), - a->nrow, a->ncol, c->stride / sizeof(MATRIX_ELEM), + a->nrow, a->ncol, c->nrow, c->stride / sizeof(MATRIX_ELEM), a->stride / sizeof(MATRIX_ELEM), alpha, beta); cudaStreamSynchronize(0); } @@ -710,7 +717,7 @@ extern "C" { cudak_(copy_rows_by_idx)<<>> \ (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), MATRIX_ELEM_PTR(idx) + idx_begin, - b->nrow, b->ncol, b->stride / sizeof(MATRIX_ELEM)); + b->nrow, b->ncol, a->nrow, b->stride / sizeof(MATRIX_ELEM)); cudaStreamSynchronize(0); } } -- cgit v1.2.3 From 32c39bcaf72a7dc08968909d8b6a8b108ac923b4 Mon Sep 17 00:00:00 2001 From: txh18 Date: Sun, 13 Dec 2015 20:58:02 +0800 Subject: move lmvocab:read_line to lmutil --- nerv/examples/lmptb/lmptb/lmfeeder.lua | 3 ++- nerv/examples/lmptb/lmptb/lmseqreader.lua | 3 ++- nerv/examples/lmptb/lmptb/lmutil.lua | 27 +++++++++++++++++++++++++++ nerv/examples/lmptb/lmptb/lmvocab.lua | 12 ------------ 4 files changed, 31 insertions(+), 14 deletions(-) diff --git a/nerv/examples/lmptb/lmptb/lmfeeder.lua b/nerv/examples/lmptb/lmptb/lmfeeder.lua index 34631bf..e140f38 100644 --- a/nerv/examples/lmptb/lmptb/lmfeeder.lua +++ b/nerv/examples/lmptb/lmptb/lmfeeder.lua @@ -1,4 +1,5 @@ require 'lmptb.lmvocab' +require 'lmptb.lmutil' local Feeder = nerv.class("nerv.LMFeeder") @@ -39,7 +40,7 @@ function Feeder:refresh_stream(id) local st = self.streams[id] if (st.store[st.head] ~= nil) then return end if (self.fh == nil) then return end - local list = self.vocab:read_line(self.fh) + local list = nerv.LMUtil.read_line(self.fh) if (list == nil) then --file has end printf("%s file expires, closing.\n", self.log_pre) self.fh:close() diff --git a/nerv/examples/lmptb/lmptb/lmseqreader.lua b/nerv/examples/lmptb/lmptb/lmseqreader.lua index ed791d2..b603911 100644 --- a/nerv/examples/lmptb/lmptb/lmseqreader.lua +++ b/nerv/examples/lmptb/lmptb/lmseqreader.lua @@ -1,4 +1,5 @@ require 'lmptb.lmvocab' +require 'lmptb.lmutil' --require 'tnn.init' local LMReader = nerv.class("nerv.LMSeqReader") @@ -58,7 +59,7 @@ function LMReader:refresh_stream(id) local st = self.streams[id] if (st.store[st.head] ~= nil) then return end if (self.fh == nil) then return end - local list = self.vocab:read_line(self.fh) + local list = nerv.LMUtil.read_line(self.fh) if (list == nil) then --file has end printf("%s file expires, closing.\n", self.log_pre) self.fh:close() diff --git a/nerv/examples/lmptb/lmptb/lmutil.lua b/nerv/examples/lmptb/lmptb/lmutil.lua index 71e8e17..27b4b10 100644 --- a/nerv/examples/lmptb/lmptb/lmutil.lua +++ b/nerv/examples/lmptb/lmptb/lmutil.lua @@ -1,11 +1,38 @@ local Util = nerv.class("nerv.LMUtil") +local mysplit = function(inputstr, sep) + if sep == nil then + sep = "%s" + end + local t={} ; i=1 + for str in string.gmatch(inputstr, "([^"..sep.."]+)") do + t[i] = str + i = i + 1 + end + return t +end + --function rounds a number to the given number of decimal places. function Util.round(num, idp) local mult = 10^(idp or 0) return math.floor(num * mult + 0.5) / mult end +--fh: file_handle +--Returns: a list of tokens(string) in the line, if there is no "" at the end, the function will at it, if nothing to read, returns nil +function Util.read_line(fh) + local l_str, list + + repeat + l_str = fh:read("*line") + if (l_str == nil) then return nil end + list = mysplit(l_str) + until #list >= 1 + + return list +end + + --list: table, list of string(word) --vocab: nerv.LMVocab --ty: nerv.CuMatrix diff --git a/nerv/examples/lmptb/lmptb/lmvocab.lua b/nerv/examples/lmptb/lmptb/lmvocab.lua index 3d256c0..2ad0e7e 100644 --- a/nerv/examples/lmptb/lmptb/lmvocab.lua +++ b/nerv/examples/lmptb/lmptb/lmvocab.lua @@ -101,18 +101,6 @@ function Vocab:get_word_id(key) return self.map_id(key) end ---fh: file_handle ---Returns: a list of tokens(string) in the line, if there is no "" at the end, the function will at it, if nothing to read, returns nil -function Vocab:read_line(fh) - local l_str = fh:read("*line") - if (l_str == nil) then return nil end - local list = mysplit(l_str) - if (list[(#list)] ~= self.sen_end_token) then - list[#list + 1] = self.sen_end_token - end - return list -end - --fn: string --Add all words in fn to the vocab function Vocab:build_file(fn) -- cgit v1.2.3 From cbcdec35ae17511d7ff022f290e97c518d7a1f1b Mon Sep 17 00:00:00 2001 From: txh18 Date: Sun, 13 Dec 2015 21:03:26 +0800 Subject: ... --- nerv/examples/lmptb/lmptb/lmvocab.lua | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nerv/examples/lmptb/lmptb/lmvocab.lua b/nerv/examples/lmptb/lmptb/lmvocab.lua index 2ad0e7e..7f512fd 100644 --- a/nerv/examples/lmptb/lmptb/lmvocab.lua +++ b/nerv/examples/lmptb/lmptb/lmvocab.lua @@ -1,3 +1,5 @@ +require 'lmptb.lmutil' + local Vocab = nerv.class("nerv.LMVocab") local printf = nerv.printf @@ -107,7 +109,7 @@ function Vocab:build_file(fn) printf("%s Vocab building on file %s...\n", self.log_pre, fn) local file = io.open(fn, "r") while (true) do - local list = self:read_line(file) + local list = nerv.LMUtil.read_line(file) if (list == nil) then break else -- cgit v1.2.3 From 60ddaa4807978af9277edd0ad6758e1006d25223 Mon Sep 17 00:00:00 2001 From: txh18 Date: Sun, 13 Dec 2015 21:22:29 +0800 Subject: bug fix in lmptb.vocab --- nerv/examples/lmptb/lmptb/lmvocab.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerv/examples/lmptb/lmptb/lmvocab.lua b/nerv/examples/lmptb/lmptb/lmvocab.lua index 7f512fd..0e7ef3e 100644 --- a/nerv/examples/lmptb/lmptb/lmvocab.lua +++ b/nerv/examples/lmptb/lmptb/lmvocab.lua @@ -100,7 +100,7 @@ function Vocab:get_word_id(key) if (self.map_id[key] == nil) then nerv.error("id key %d does not exist.", key) end - return self.map_id(key) + return self.map_id[key] end --fn: string -- cgit v1.2.3 From 2be64c382aa8d2fedd6aaf69dff212e7afef22b5 Mon Sep 17 00:00:00 2001 From: txh18 Date: Sun, 20 Dec 2015 22:07:02 +0800 Subject: added find_param method for layersT --- nerv/tnn/init.lua | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nerv/tnn/init.lua b/nerv/tnn/init.lua index 979f5d8..4bbff12 100644 --- a/nerv/tnn/init.lua +++ b/nerv/tnn/init.lua @@ -33,6 +33,8 @@ function LayerT:check_dim_len(len_in, len_out) end end +layerT.find_param = nerv.layer.find_param + function LayerT:get_params() nerv.error_method_not_implemented() end -- cgit v1.2.3 From 996472e76c31ba560622841b4b31318244317c84 Mon Sep 17 00:00:00 2001 From: txh18 Date: Sun, 20 Dec 2015 22:08:54 +0800 Subject: small mistake in layersT --- nerv/tnn/init.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nerv/tnn/init.lua b/nerv/tnn/init.lua index 4bbff12..b375fa8 100644 --- a/nerv/tnn/init.lua +++ b/nerv/tnn/init.lua @@ -33,7 +33,7 @@ function LayerT:check_dim_len(len_in, len_out) end end -layerT.find_param = nerv.layer.find_param +LayerT.find_param = nerv.Layer.find_param function LayerT:get_params() nerv.error_method_not_implemented() -- cgit v1.2.3 From 7f03ce8da24870f2757473385a75ed990b36d817 Mon Sep 17 00:00:00 2001 From: txh18 Date: Mon, 21 Dec 2015 13:36:54 +0800 Subject: added compressed_label support in the reader --- nerv/examples/lmptb/lm_trainer.lua | 12 ++++++++++-- nerv/examples/lmptb/lmptb/lmseqreader.lua | 19 ++++++++++++++++++- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/nerv/examples/lmptb/lm_trainer.lua b/nerv/examples/lmptb/lm_trainer.lua index eab6e2d..06c1a4c 100644 --- a/nerv/examples/lmptb/lm_trainer.lua +++ b/nerv/examples/lmptb/lm_trainer.lua @@ -23,6 +23,9 @@ function LMTrainer.lm_process_file_rnn(global_conf, fn, tnn, do_train, p_conf) end local reader local r_conf = {} + if p_conf.compressed_label ~= nil then + r_conf.compressed_label = p_conf.compressed_label + end local chunk_size, batch_size if p_conf.one_sen_report == true then --report log prob one by one sentence if do_train == true then @@ -156,13 +159,16 @@ function LMTrainer.lm_process_file_birnn(global_conf, fn, tnn, do_train, p_conf) local reader local chunk_size, batch_size local r_conf = {["se_mode"] = true} + if p_conf.compressed_label ~= nil then + r_conf.compressed_label = p_conf.compressed_label + end if p_conf.one_sen_report == true then --report log prob one by one sentence if do_train == true then nerv.warning("LMTrainer.lm_process_file_birnn: warning, one_sen_report is true while do_train is also true, strange") end nerv.printf("lm_process_file_birnn: one_sen report mode, set batch_size to 1 and chunk_size to max_sen_len(%d)\n", global_conf.max_sen_len) - batch_size = 1 + batch_size = global_conf.batch_size chunk_size = global_conf.max_sen_len else batch_size = global_conf.batch_size @@ -239,7 +245,9 @@ function LMTrainer.lm_process_file_birnn(global_conf, fn, tnn, do_train, p_conf) end if p_conf.one_sen_report == true then for i = 1, batch_size do - nerv.printf("LMTrainer.lm_process_file_birnn: one_sen_report_output, %f\n", sen_logp[i]) + if sen_logp[i] ~= nil then + nerv.printf("LMTrainer.lm_process_file_birnn: one_sen_report_output, %f\n", sen_logp[i]) + end end end diff --git a/nerv/examples/lmptb/lmptb/lmseqreader.lua b/nerv/examples/lmptb/lmptb/lmseqreader.lua index b603911..0f29f8b 100644 --- a/nerv/examples/lmptb/lmptb/lmseqreader.lua +++ b/nerv/examples/lmptb/lmptb/lmseqreader.lua @@ -24,6 +24,10 @@ function LMReader:__init(global_conf, batch_size, chunk_size, vocab, r_conf) if r_conf.se_mode == true then self.se_mode = true end + self.compressed_label = false + if r_conf.compressed_label == true then + self.compressed_label = true + end end --fn: string @@ -46,6 +50,9 @@ function LMReader:open_file(fn) for j = 1, self.chunk_size, 1 do self.bak_inputs_m[j] = {} self.bak_inputs_m[j][1] = self.gconf.mmat_type(self.batch_size, 1) + if self.compressed_label == true then + self.bak_inputs_m[j][2] = self.gconf.mmat_type(self.batch_size, 1) + end --self.bak_inputs_m[j][2] = self.gconf.mmat_type(self.batch_size, self.vocab:size()) --since MMatrix does not yet have fill, this m[j][2] is not used end end @@ -118,6 +125,9 @@ function LMReader:get_batch(feeds) end inputs_s[j][i] = self.vocab.null_token self.bak_inputs_m[j][1][i - 1][0] = 0 + if self.compressed_label == true then + self.bak_inputs_m[j][2][i - 1][0] = 0 + end labels_s[j][i] = self.vocab.null_token else self:refresh_stream(i) @@ -132,7 +142,11 @@ function LMReader:get_batch(feeds) end if st.store[st.head + 1] ~= nil then labels_s[j][i] = st.store[st.head + 1] - inputs_m[j][2][i - 1][self.vocab:get_word_str(st.store[st.head + 1]).id - 1] = 1 + if self.compressed_label == true then + self.bak_inputs_m[j][2][i - 1][0] = self.vocab:get_word_str(st.store[st.head + 1]).id - 1 + else + inputs_m[j][2][i - 1][self.vocab:get_word_str(st.store[st.head + 1]).id - 1] = 1 + end else if (inputs_s[j][i] ~= self.vocab.null_token) then nerv.error("reader error : input not null but label is null_token") @@ -169,6 +183,9 @@ function LMReader:get_batch(feeds) flagsPack[j] = bit.bor(flagsPack[j], flags[j][i]) end inputs_m[j][1]:copy_fromh(self.bak_inputs_m[j][1]) + if self.compressed_label == true then + inputs_m[j][2]:copy_fromh(self.bak_inputs_m[j][2]) + end end --check for self.al_sen_start -- cgit v1.2.3 From 95ef51432218683ec90fdbaa2e92007f4fbd4610 Mon Sep 17 00:00:00 2001 From: txh18 Date: Mon, 21 Dec 2015 16:58:35 +0800 Subject: added a garbace collect in tnn --- nerv/tnn/tnn.lua | 1 + 1 file changed, 1 insertion(+) diff --git a/nerv/tnn/tnn.lua b/nerv/tnn/tnn.lua index 7ae3172..5351053 100644 --- a/nerv/tnn/tnn.lua +++ b/nerv/tnn/tnn.lua @@ -77,6 +77,7 @@ function TNN.make_initial_store(st, p, dim, batch_size, chunk_size, extend_t, gl st_c[i + t_c][p_c] = st[i][p] end end + collectgarbage("collect") --free the old one to save memory end function TNN:out_of_feedrange(t) --out of chunk, or no input, for the current feed -- cgit v1.2.3 From 9a172678f824351840283363161e8b38d2d5cfb2 Mon Sep 17 00:00:00 2001 From: txh18 Date: Tue, 22 Dec 2015 13:34:10 +0800 Subject: give a update_select_rows a more proper name --- nerv/lib/matrix/generic/cukernel.cu | 30 +++++++++++++++++++++++++++--- nerv/lib/matrix/generic/cumatrix.c | 18 +++++++++++++++--- nerv/lib/matrix/generic/cumatrix.h | 6 +++++- nerv/matrix/generic/cumatrix.c | 21 ++++++++++++++++++--- 4 files changed, 65 insertions(+), 10 deletions(-) diff --git a/nerv/lib/matrix/generic/cukernel.cu b/nerv/lib/matrix/generic/cukernel.cu index 552f7a4..9244783 100644 --- a/nerv/lib/matrix/generic/cukernel.cu +++ b/nerv/lib/matrix/generic/cukernel.cu @@ -262,7 +262,7 @@ __global__ void cudak_(clip)(MATRIX_ELEM *a, } #ifdef __NERV_FUTURE_CUDA_7 -__global__ void cudak_(update_select_rows)(MATRIX_ELEM *c, const MATRIX_ELEM *a, const MATRIX_ELEM *idx, +__global__ void cudak_(update_select_rows_by_rowidx)(MATRIX_ELEM *c, const MATRIX_ELEM *a, const MATRIX_ELEM *idx, int nrow_a, int ncol_a, int nrow_c, int stride_c, int stride_a, double alpha, double beta) { int j = blockIdx.x * blockDim.x + threadIdx.x; int i = blockIdx.y * blockDim.y + threadIdx.y; @@ -275,6 +275,20 @@ __global__ void cudak_(update_select_rows)(MATRIX_ELEM *c, const MATRIX_ELEM *a, //c[j + i_c * stride_c] = c[j + i_c * stride_c] * (1 - beta * alpha) + a[j + i * stride_a] * alpha; atomicAdd_nvidia(c + j + i_c * stride_c, c[j + i_c * stride_c] * (- beta * alpha) + a[j + i * stride_a] * alpha); } + +__global__ void cudak_(update_select_rows_by_colidx)(MATRIX_ELEM *c, const MATRIX_ELEM *a, const MATRIX_ELEM *idx, + int nrow_a, int ncol_a, int nrow_c, int stride_c, int stride_a, int stride_idx, double alpha, double beta) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + if (i >= nrow_a || j >= ncol_a) return; + int i_c = lrintf(idx[stride_idx * i]); + if (i_c < 0 || i_c >= nrow_c) { + printf("ERROR inside kernel update_select_rows, i_c(%d) out of range!", i_c); + } + //critical: i_c could conflict among threads(same index in the idx array), so atomicAdd is used + //c[j + i_c * stride_c] = c[j + i_c * stride_c] * (1 - beta * alpha) + a[j + i * stride_a] * alpha; + atomicAdd_nvidia(c + j + i_c * stride_c, c[j + i_c * stride_c] * (- beta * alpha) + a[j + i * stride_a] * alpha); +} #endif __global__ void cudak_(expand_frm)(const MATRIX_ELEM *a, MATRIX_ELEM *b, @@ -640,16 +654,26 @@ extern "C" { } #ifdef __NERV_FUTURE_CUDA_7 - void cudak_(cuda_update_select_rows)(Matrix *c, const Matrix *a, const Matrix *idx, double alpha, double beta) { + void cudak_(cuda_update_select_rows_by_rowidx)(Matrix *c, const Matrix *a, const Matrix *idx, double alpha, double beta) { dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); dim3 numBlocks(CEIL_DIV(a->ncol, threadsPerBlock.x), CEIL_DIV(a->nrow, threadsPerBlock.y)); - cudak_(update_select_rows)<<>> \ + cudak_(update_select_rows_by_rowidx)<<>> \ (MATRIX_ELEM_PTR(c), MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(idx), a->nrow, a->ncol, c->nrow, c->stride / sizeof(MATRIX_ELEM), a->stride / sizeof(MATRIX_ELEM), alpha, beta); cudaStreamSynchronize(0); } + void cudak_(cuda_update_select_rows_by_colidx)(Matrix *c, const Matrix *a, const Matrix *idx, double alpha, double beta) { + dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(a->ncol, threadsPerBlock.x), + CEIL_DIV(a->nrow, threadsPerBlock.y)); + cudak_(update_select_rows_by_colidx)<<>> \ + (MATRIX_ELEM_PTR(c), MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(idx), + a->nrow, a->ncol, c->nrow, c->stride / sizeof(MATRIX_ELEM), + a->stride / sizeof(MATRIX_ELEM), idx->stride / sizeof(MATRIX_ELEM), alpha, beta); + cudaStreamSynchronize(0); + } #endif void cudak_(cuda_expand_frm)(const Matrix *a, Matrix *b, int context) { diff --git a/nerv/lib/matrix/generic/cumatrix.c b/nerv/lib/matrix/generic/cumatrix.c index 68889ad..31d6b06 100644 --- a/nerv/lib/matrix/generic/cumatrix.c +++ b/nerv/lib/matrix/generic/cumatrix.c @@ -394,14 +394,26 @@ void nerv_matrix_(copy_rows_fromd_by_idx)(Matrix *a, const Matrix *b, }