diff options
author | Determinant <[email protected]> | 2016-02-17 20:14:06 +0800 |
---|---|---|
committer | Determinant <[email protected]> | 2016-02-17 20:14:06 +0800 |
commit | 0ee43c21af4fcd3aed070b1f5ad1eb9feb2ad159 (patch) | |
tree | ceb1d38328767fb657bc0d37ec6e513b08a86277 | |
parent | 490a10c2130773bd022f05513fa2905b6a6c6e91 (diff) |
try to merge manually
20 files changed, 321 insertions, 119 deletions
diff --git a/nerv/Makefile b/nerv/Makefile index a29309a..a472cfc 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -36,7 +36,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \ io/sgd_buffer.lua \ tnn/init.lua tnn/layer_dag_t.lua tnn/sutil.lua tnn/tnn.lua \ - tnn/layersT/dropout_t.lua tnn/layersT/lstm_t.lua tnn/layersT/softmax_ce_t.lua + tnn/layersT/dropout_t.lua tnn/layersT/lstm_t.lua tnn/layersT/gru_t.lua tnn/layersT/softmax_ce_t.lua INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK #CUDA_BASE := /usr/local/cuda-7.0 diff --git a/nerv/examples/lmptb/grulm_ptb_main.lua b/nerv/examples/lmptb/grulm_ptb_main.lua index ef5d7f9..4a3f39f 100644 --- a/nerv/examples/lmptb/grulm_ptb_main.lua +++ b/nerv/examples/lmptb/grulm_ptb_main.lua @@ -198,6 +198,7 @@ qdata_dir = root_dir .. '/ptb/questionGen/gen' global_conf = { lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5, cumat_type = nerv.CuMatrixFloat, + select_gpu = 0, mmat_type = nerv.MMatrixFloat, nn_act_default = 0, @@ -259,7 +260,7 @@ global_conf = { elseif (set == "twitter") then data_dir = root_dir .. '/twitter_new/DATA' -train_fn = data_dir .. '/twitter.choose2.adds' +train_fn = data_dir .. '/twitter.choose.adds' valid_fn = data_dir .. '/twitter.valid.adds' test_fn = data_dir .. '/comm.test.choose-ppl.adds' vocab_fn = data_dir .. '/twitter.choose.train.vocab' @@ -359,7 +360,14 @@ commands = nerv.SUtil.parse_commands_set(commands_str) if start_lr ~= nil then global_conf.lrate = start_lr end - + +nerv.printf("detecting gconf.select_gpu...\n") +if global_conf.select_gpu then + nerv.printf("select gpu to %d\n", global_conf.select_gpu) + global_conf.cumat_type.select_gpu(global_conf.select_gpu) + nerv.LMUtil.wait(1) +end + nerv.printf("%s creating work_dir(%s)...\n", global_conf.sche_log_pre, global_conf.work_dir) nerv.LMUtil.wait(2) os.execute("mkdir -p "..global_conf.work_dir) @@ -388,10 +396,10 @@ nerv.LMUtil.wait(2) math.randomseed(1) -local vocab = nerv.LMVocab() +local vocab = nerv.LMVocab(global_conf) global_conf["vocab"] = vocab nerv.printf("%s building vocab...\n", global_conf.sche_log_pre) -global_conf.vocab:build_file(global_conf.vocab_fn, false) +global_conf.vocab:build_file(global_conf.vocab_fn) ppl_rec = {} local final_iter = -1 diff --git a/nerv/examples/lmptb/lm_sampler.lua b/nerv/examples/lmptb/lm_sampler.lua index c25a75c..c9adf85 100644 --- a/nerv/examples/lmptb/lm_sampler.lua +++ b/nerv/examples/lmptb/lm_sampler.lua @@ -3,31 +3,34 @@ local LMSampler = nerv.class('nerv.LMSampler') function LMSampler:__init(global_conf) self.log_pre = "LMSampler" self.gconf = global_conf + self.batch_size = self.gconf.batch_size + self.chunk_size = self.gconf.chunk_size --largest sample sentence length self.vocab = self.gconf.vocab self.sen_end_token = self.vocab.sen_end_token self.sen_end_id = self.vocab:get_word_str(self.sen_end_token).id + + self.loaded = false end -function LMSampler:load_dagL(dagL) - self.batch_size = self.gconf.batch_size - self.chunk_size = self.gconf.chunk_size - +function LMSampler:load_dagL(dagL) nerv.printf("%s loading dagL\n", self.log_pre) self.dagL = dagL + self.dagL:init(self.batch_size) self.dagL_inputs = {} - self.dagL_inputs[1] = global_conf.cumat_type(global_conf.batch_size, 1) + self.dagL_inputs[1] = self.gconf.cumat_type(self.gconf.batch_size, 1) self.dagL_inputs[1]:fill(self.sen_end_id - 1) - self.dagL_inputs[2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size) + self.dagL_inputs[2] = self.gconf.cumat_type(self.gconf.batch_size, self.gconf.hidden_size) self.dagL_inputs[2]:fill(0) self.dagL_outputs = {} - self.dagL_outputs[1] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size()) - self.dagL_outputs[2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size) + self.dagL_outputs[1] = self.gconf.cumat_type(self.gconf.batch_size, self.gconf.vocab:size()) + self.dagL_outputs[2] = self.gconf.cumat_type(self.gconf.batch_size, self.gconf.hidden_size) - self.smout_d = global_conf.cumat_type(self.batch_size, self.vocab:size()) - self.smout_h = global_conf.mmat_type(self.batch_size, self.vocab:size()) + self.smout_d = self.gconf.cumat_type(self.batch_size, self.vocab:size()) + self.ssout_d = self.gconf.cumat_type(self.batch_size, self.vocab:size()) + self.ssout_h = self.gconf.mmat_type(self.batch_size, self.vocab:size()) self.store = {} for i = 1, self.batch_size do @@ -38,11 +41,31 @@ function LMSampler:load_dagL(dagL) self.store[i][1].p = 0 end self.repo = {} + + self.loaded = true end -function LMSampler:sample_to_store(smout) +function LMSampler:sample_to_store(ssout) --private for i = 1, self.batch_size do local ran = math.random() + local id = 1 + local low = 0 + local high = ssout:ncol() - 1 + if ssout[i - 1][high] < 0.9999 or ssout[i - 1][high] > 1.0001 then + nerv.error("%s ERROR, softmax output summation(%f) seems to have some problem", self.log_pre, ssout[i - 1][high]) + end + if ssout[i - 1][low] < ran then + while low + 1 < high do + local mid = math.floor((low + high) / 2) + if ssout[i - 1][mid] < ran then + low = mid + else + high = mid + end + end + id = high + 1 + end + --[[ local s = 0 local id = self.vocab:size() for j = 0, self.vocab:size() - 1 do @@ -52,19 +75,25 @@ function LMSampler:sample_to_store(smout) break end end + ]]-- if #self.store[i] >= self.chunk_size - 2 then id = self.sen_end_id end local tmp = {} tmp.w = self.vocab:get_word_id(id).str tmp.id = id - tmp.p = smout[i - 1][id - 1] + if id == 1 then + tmp.p = ssout[i - 1][id - 1] + else + tmp.p = ssout[i - 1][id - 1] - ssout[i - 1][id - 2] + end table.insert(self.store[i], tmp) end end ---Returns: LMResult function LMSampler:lm_sample_rnn_dagL(sample_num, p_conf) + assert(self.loaded == true) + local dagL = self.dagL local inputs = self.dagL_inputs local outputs = self.dagL_outputs @@ -74,9 +103,10 @@ function LMSampler:lm_sample_rnn_dagL(sample_num, p_conf) inputs[2]:copy_fromd(outputs[2]) --copy hidden activation self.smout_d:softmax(outputs[1]) - self.smout_d:copy_toh(self.smout_h) + self.ssout_d:prefixsum_row(self.smout_d) + self.ssout_d:copy_toh(self.ssout_h) - self:sample_to_store(self.smout_h) + self:sample_to_store(self.ssout_h) for i = 1, self.batch_size do inputs[1][i - 1][0] = self.store[i][#self.store[i]].id - 1 if self.store[i][#self.store[i]].id == self.sen_end_id then --meet a sentence end diff --git a/nerv/examples/lmptb/lmptb/layer/init.lua b/nerv/examples/lmptb/lmptb/layer/init.lua index b345244..ceae009 100644 --- a/nerv/examples/lmptb/lmptb/layer/init.lua +++ b/nerv/examples/lmptb/lmptb/layer/init.lua @@ -1,6 +1,6 @@ require 'lmptb.layer.select_linear' require 'lmptb.layer.affine_recurrent_plusvec' -require 'lmptb.layer.gru_t' +--require 'lmptb.layer.gru_t' require 'lmptb.layer.lm_affine_recurrent' diff --git a/nerv/examples/lmptb/lmptb/lmseqreader.lua b/nerv/examples/lmptb/lmptb/lmseqreader.lua index 0f29f8b..1272929 100644 --- a/nerv/examples/lmptb/lmptb/lmseqreader.lua +++ b/nerv/examples/lmptb/lmptb/lmseqreader.lua @@ -28,6 +28,10 @@ function LMReader:__init(global_conf, batch_size, chunk_size, vocab, r_conf) if r_conf.compressed_label == true then self.compressed_label = true end + self.same_io = false + if r_conf.same_io == true then --can be used to train P(wi|w1..(i-1),(i+1)..n) + self.same_io = true + end end --fn: string @@ -36,9 +40,9 @@ function LMReader:open_file(fn) if (self.fh ~= nil) then nerv.error("%s error: in open_file(fn is %s), file handle not nil.", self.log_pre, fn) end - printf("%s opening file %s...\n", self.log_pre, fn) - print(self.log_pre, "batch_size:", self.batch_size, "chunk_size", self.chunk_size) - print(self.log_pre, "se_mode:", self.se_mode) + nerv.printf("%s opening file %s...\n", self.log_pre, fn) + nerv.printf("%s batch_size:%d chunk_size:%d\n", self.log_pre, self.batch_size, self.chunk_size) + nerv.printf("%s se_mode:%s same_io:%s\n", self.log_pre, tostring(self.se_mode), tostring(self.same_io)) self.fh = io.open(fn, "r") self.streams = {} for i = 1, self.batch_size, 1 do @@ -132,12 +136,15 @@ function LMReader:get_batch(feeds) else self:refresh_stream(i) if st.store[st.head] ~= nil then - inputs_s[j][i] = st.store[st.head] - --inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head]).id - 1 - self.bak_inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head]).id - 1 + if self.same_io == false then + inputs_s[j][i] = st.store[st.head] + self.bak_inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head]).id - 1 + else + inputs_s[j][i] = st.store[st.head + 1] + self.bak_inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head + 1]).id - 1 + end else inputs_s[j][i] = self.vocab.null_token - --inputs_m[j][1][i - 1][0] = 0 self.bak_inputs_m[j][1][i - 1][0] = 0 end if st.store[st.head + 1] ~= nil then @@ -148,7 +155,7 @@ function LMReader:get_batch(feeds) inputs_m[j][2][i - 1][self.vocab:get_word_str(st.store[st.head + 1]).id - 1] = 1 end else - if (inputs_s[j][i] ~= self.vocab.null_token) then + if inputs_s[j][i] ~= self.vocab.null_token then nerv.error("reader error : input not null but label is null_token") end labels_s[j][i] = self.vocab.null_token @@ -159,6 +166,9 @@ function LMReader:get_batch(feeds) end flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_NORM) --has both input and label got_new = true + if st.store[st.head] == self.vocab.sen_end_token then + flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_START) + end st.store[st.head] = nil st.head = st.head + 1 if labels_s[j][i] == self.vocab.sen_end_token then @@ -169,10 +179,7 @@ function LMReader:get_batch(feeds) end_stream = true --meet sentence end, this stream ends now end end - if inputs_s[j][i] == self.vocab.sen_end_token then - flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_START) - end - end + end end end end @@ -190,7 +197,7 @@ function LMReader:get_batch(feeds) --check for self.al_sen_start for i = 1, self.batch_size do - if inputs_s[1][i] ~= self.vocab.sen_end_token and inputs_s[1][i] ~= self.vocab.null_token then + if bit.band(flags[1][i], nerv.TNN.FC.SEQ_START) == 0 and flags[1][i] > 0 then self.stat.al_sen_start = false end end @@ -198,7 +205,6 @@ function LMReader:get_batch(feeds) if got_new == false then nerv.info("lmseqreader file ends, printing stats...") nerv.printf("al_sen_start:%s\n", tostring(self.stat.al_sen_start)) - return false else return true diff --git a/nerv/examples/lmptb/lmptb/lmutil.lua b/nerv/examples/lmptb/lmptb/lmutil.lua index 6d66d6e..13a5c45 100644 --- a/nerv/examples/lmptb/lmptb/lmutil.lua +++ b/nerv/examples/lmptb/lmptb/lmutil.lua @@ -112,10 +112,17 @@ end --cla:string --w:string --prob:float, the probability -function Result:add(cla, w, prob) - self[cla].logp_all = self[cla].logp_all + math.log10(prob) +function Result:add(cla, w, prob, log10ed) + local lp + if log10ed == true then + lp = prob + else + lp = math.log10(prob) + end + + self[cla].logp_all = self[cla].logp_all + lp if (self.vocab:is_unk_str(w)) then - self[cla].logp_unk = self[cla].logp_unk + math.log10(prob) + self[cla].logp_unk = self[cla].logp_unk + lp self[cla].cn_unk = self[cla].cn_unk + 1 end if (w == self.vocab.sen_end_token) then diff --git a/nerv/examples/lmptb/lmptb/lmvocab.lua b/nerv/examples/lmptb/lmptb/lmvocab.lua index 0e7ef3e..38bb18e 100644 --- a/nerv/examples/lmptb/lmptb/lmvocab.lua +++ b/nerv/examples/lmptb/lmptb/lmvocab.lua @@ -2,8 +2,6 @@ require 'lmptb.lmutil' local Vocab = nerv.class("nerv.LMVocab") -local printf = nerv.printf - local mysplit = function(inputstr, sep) if sep == nil then sep = "%s" @@ -106,7 +104,7 @@ end --fn: string --Add all words in fn to the vocab function Vocab:build_file(fn) - printf("%s Vocab building on file %s...\n", self.log_pre, fn) + nerv.printf("%s Vocab building on file %s...\n", self.log_pre, fn) local file = io.open(fn, "r") while (true) do local list = nerv.LMUtil.read_line(file) @@ -119,7 +117,7 @@ function Vocab:build_file(fn) end end file:close() - printf("%s Building finished, vocab size now is %d.\n", self.log_pre, self:size()) + nerv.printf("%s Building finished, vocab size now is %d.\n", self.log_pre, self:size()) end --[[test diff --git a/nerv/examples/lmptb/lstmlm_ptb_main.lua b/nerv/examples/lmptb/lstmlm_ptb_main.lua index 9bdd5ff..b576834 100644 --- a/nerv/examples/lmptb/lstmlm_ptb_main.lua +++ b/nerv/examples/lmptb/lstmlm_ptb_main.lua @@ -277,7 +277,7 @@ global_conf = { hidden_size = 300, layer_num = 1, chunk_size = 15, - batch_size = 20, + batch_size = 32, max_iter = 35, lr_decay = 1.003, decay_iter = 10, @@ -390,10 +390,10 @@ nerv.LMUtil.wait(2) math.randomseed(1) -local vocab = nerv.LMVocab() +local vocab = nerv.LMVocab(global_conf) global_conf["vocab"] = vocab nerv.printf("%s building vocab...\n", global_conf.sche_log_pre) -global_conf.vocab:build_file(global_conf.vocab_fn, false) +global_conf.vocab:build_file(global_conf.vocab_fn) ppl_rec = {} local final_iter = -1 diff --git a/nerv/examples/lmptb/sample_grulm_ptb_main.lua b/nerv/examples/lmptb/m-tests/lm_sampler_test.lua index 9a13d36..effb2ad 100644 --- a/nerv/examples/lmptb/sample_grulm_ptb_main.lua +++ b/nerv/examples/lmptb/m-tests/lm_sampler_test.lua @@ -134,10 +134,39 @@ function prepare_tnn(global_conf, layerRepo) return tnn end -function prepare_dagL(global_conf, layerRepo) - nerv.printf("%s Generate and initing dagL ...\n", global_conf.sche_log_pre) +function load_net_tnn(global_conf, fn) + prepare_parameters(global_conf, fn) + local layerRepo = prepare_layers(global_conf) + local tnn = prepare_tnn(global_conf, layerRepo) + return tnn +end + +function prepare_sampler(sm_conf) + sm_conf.pr = nerv.ParamRepo() + sm_conf.pr:import({sm_conf.fn_to_sample}, nil, sm_conf) + + local layers = { + ["nerv.GRULayerT"] = { + ["gruL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size, sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size}, ["pr"] = sm_conf.pr}}, + }, + ["nerv.DropoutLayerT"] = { + ["dropoutL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size}}}, + }, + ["nerv.SelectLinearLayer"] = { + ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {sm_conf.hidden_size}, ["vocab"] = sm_conf.vocab, ["pr"] = sm_conf.pr}}, + }, + ["nerv.CombinerLayer"] = { + ["combinerL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size, sm_conf.hidden_size}, ["lambda"] = {1}}}, + }, + ["nerv.AffineLayer"] = { + ["outputL"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.vocab:size()}, ["pr"] = sm_conf.pr}}, + }, + ["nerv.SoftmaxCELayerT"] = { + ["softmaxL"] = {{}, {["dim_in"] = {sm_conf.vocab:size(), sm_conf.vocab:size()}, ["dim_out"] = {1}}}, + }, + } + local layerRepo = nerv.LayerRepo(layers, sm_conf.pr, sm_conf) - --input: input_w, input_w, ... input_w_now, last_activation local connections_t = { ["<input>[1]"] = "selectL1[1]", @@ -151,48 +180,19 @@ function prepare_dagL(global_conf, layerRepo) ["combinerL1[2]"] = "<output>[2]", } - if global_conf.layer_num > 1 then + if sm_conf.layer_num > 1 then nerv.error("multiple layer is currently not supported(not hard to implement though)") end - --[[ - for l = 2, global_conf.layer_num do - table.insert(connections_t, {"dropoutL"..(l-1).."[1]", "gruL"..l.."[1]", 0}) - table.insert(connections_t, {"gruL"..l.."[1]", "combinerL"..l.."[1]", 0}) - table.insert(connections_t, {"combinerL"..l.."[1]", "gruL"..l.."[2]", 1}) - table.insert(connections_t, {"combinerL"..l.."[2]", "dropoutL"..l.."[1]", 0}) - end - ]]-- - - --[[ - printf("%s printing DAG connections:\n", global_conf.sche_log_pre) - for key, value in pairs(connections_t) do - printf("\t%s->%s\n", key, value) - end - ]]-- - local dagL = nerv.DAGLayerT("dagL", global_conf, {["dim_in"] = {1, global_conf.hidden_size}, - ["dim_out"] = {global_conf.vocab:size(), global_conf.hidden_size}, ["sub_layers"] = layerRepo, + local dagL = nerv.DAGLayerT("dagL", sm_conf, {["dim_in"] = {1, sm_conf.hidden_size}, + ["dim_out"] = {sm_conf.vocab:size(), sm_conf.hidden_size}, ["sub_layers"] = layerRepo, ["connections"] = connections_t }) + + local sampler = nerv.LMSampler(sm_conf) + sampler:load_dagL(dagL) - dagL:init(global_conf.batch_size) - - nerv.printf("%s Initing DAGL end.\n", global_conf.sche_log_pre) - return dagL -end - -function load_net_tnn(global_conf, fn) - prepare_parameters(global_conf, fn) - local layerRepo = prepare_layers(global_conf) - local tnn = prepare_tnn(global_conf, layerRepo) - return tnn -end - -function load_net_dagL(global_conf, fn) - prepare_parameters(global_conf, fn) - local layerRepo = prepare_layers(global_conf) - local dagL = prepare_dagL(global_conf, layerRepo) - return dagL + return sampler end local train_fn, valid_fn, test_fn @@ -240,6 +240,23 @@ global_conf = { fn_to_sample = root_dir .. '/ptb/EXP-nerv/grulm_v1.0h300l1ch15ba32slr0.15wc1e-05dr0.5/params.final', } +sm_conf = { + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + nn_act_default = 0, + + hidden_size = 300, + layer_num = 1, + batch_size = 32, + chunk_size = 85, --largest sample sentence length + max_iter = 35, + max_sen_len = 90, + sche_log_pre = "[SAMPLER_S]:", + + timer = global_conf.timer, + fn_to_sample = root_dir .. '/ptb/EXP-nerv/grulm_v1.0h300l1ch15ba32slr0.15wc1e-05dr0.5/params.final', +} + elseif (set == "msr_sc") then data_dir = '/home/slhome/txh18/workspace/sentenceCompletion/DATA_PV2' @@ -276,15 +293,13 @@ global_conf = { elseif (set == "twitter") then -data_dir = root_dir .. '/twitter_new/DATA' -train_fn = data_dir .. '/twitter.choose2.adds' -valid_fn = data_dir .. '/twitter.valid.adds' -test_fn = data_dir .. '/comm.test.choose-ppl.adds' -vocab_fn = data_dir .. '/twitter.choose.train.vocab' - ---qdata_dir = root_dir .. '/ptb/questionGen/gen' - -global_conf = { + data_dir = root_dir .. '/twitter_new/DATA' + train_fn = data_dir .. '/twitter.choose2.adds' + valid_fn = data_dir .. '/twitter.valid.adds' + test_fn = data_dir .. '/comm.test.choose-ppl.adds' + vocab_fn = data_dir .. '/twitter.choose.train.vocab' + --qdata_dir = root_dir .. '/ptb/questionGen/gen' + global_conf = { lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5, cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.MMatrixFloat, @@ -309,7 +324,7 @@ global_conf = { log_w_num = 40000, --give a message when log_w_num words have been processed timer = nerv.Timer(), work_dir_base = root_dir .. '/twitter_new/EXP-nerv/grulm_v1.0' -} + } else @@ -347,15 +362,12 @@ global_conf = { end -lr_half = false --can not be local, to be set by loadstring -start_iter = -1 -start_lr = nil -ppl_last = 100000 commands_str = "sampling" --"train:test" commands = {} -test_iter = -1 ---for testout(question) -q_file = "/home/slhome/txh18/workspace/ptb/questionGen/gen/ptb.test.txt.q10rs1_Msss.adds" +test_iter = -1 --obselete +random_seed = 1 +sample_num = 10 +out_fn = nil if arg[2] ~= nil then nerv.printf("%s applying arg[2](%s)...\n", global_conf.sche_log_pre, arg[2]) @@ -385,26 +397,27 @@ nerv.LMUtil.wait(2) ]]-- ----------------printing options--------------------------------- -nerv.printf("%s printing global_conf...\n", global_conf.sche_log_pre) -for id, value in pairs(global_conf) do +nerv.printf("%s printing sm_conf...\n", sm_conf.sche_log_pre) +for id, value in pairs(sm_conf) do nerv.printf("%s:\t%s\n", id, tostring(value)) end nerv.LMUtil.wait(2) nerv.printf("%s printing training scheduling options...\n", global_conf.sche_log_pre) -nerv.printf("lr_half:\t%s\n", tostring(lr_half)) -nerv.printf("start_iter:\t%s\n", tostring(start_iter)) -nerv.printf("ppl_last:\t%s\n", tostring(ppl_last)) nerv.printf("commands_str:\t%s\n", commands_str) nerv.printf("test_iter:\t%s\n", tostring(test_iter)) +nerv.printf("random_seed:\t%s\n", tostring(random_seed)) +nerv.printf("sample_num:\t%s\n", tostring(sample_num)) +nerv.printf("out_fn:\t%s\n", tostring(out_fn)) nerv.printf("%s printing training scheduling end.\n", global_conf.sche_log_pre) nerv.LMUtil.wait(2) ------------------printing options end------------------------------ -math.randomseed(1) +math.randomseed(random_seed) local vocab = nerv.LMVocab() global_conf["vocab"] = vocab +sm_conf["vocab"] = global_conf.vocab nerv.printf("%s building vocab...\n", global_conf.sche_log_pre) global_conf.vocab:build_file(global_conf.vocab_fn, false) ppl_rec = {} @@ -421,18 +434,34 @@ end --if commands["test"] if commands["sampling"] == 1 then nerv.printf("===SAMPLE===\n") global_conf.sche_log_pre = "[SCHEDULER SAMPLING]:" - local dagL = load_net_dagL(global_conf, global_conf.fn_to_sample) - local sampler = nerv.LMSampler(global_conf) - sampler:load_dagL(dagL) - for k = 1, 5 do - local res = sampler:lm_sample_rnn_dagL(10, {}) + local sampler = prepare_sampler(sm_conf) + local out_fh = nil + if out_fn ~= nil then + out_fh = assert(io.open(out_fn, "w")) + nerv.printf("%s outputing samples to file \"%s\"...\n", global_conf.sche_log_pre, out_fn) + end + for k = 1, sample_num do + local res = sampler:lm_sample_rnn_dagL(1, {}) for i = 1, #res do + if out_fh == nil then nerv.printf("lm_sampler_output_sample: ") end for j = 1, #res[i] do - nerv.printf("%s ", res[i][j].w) + if out_fh == nil then + nerv.printf("%s %f ", res[i][j].w, res[i][j].p) + else + out_fh:write(nerv.sprintf("%s %f ", res[i][j].w, res[i][j].p)) + end + end + if out_fh == nil then + nerv.printf("\n") + else + out_fh:write(nerv.sprintf("\n")) end - nerv.printf("\n") end + if k % 10000 == 0 and out_fh ~= nil then nerv.printf("%s %d sample done\n", global_conf.sche_log_pre, k) end end + + if out_fh ~= nil then out_fh:close() end + nerv.printf("%s complete,bye\n", global_conf.sche_log_pre) --global_conf.dropout_rate = 0 --LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update! end --if commands["sampling"] diff --git a/nerv/examples/lmptb/m-tests/lmseqreader_test.lua b/nerv/examples/lmptb/m-tests/lmseqreader_test.lua index 9127559..3f99741 100644 --- a/nerv/examples/lmptb/m-tests/lmseqreader_test.lua +++ b/nerv/examples/lmptb/m-tests/lmseqreader_test.lua @@ -7,7 +7,7 @@ local test_fn = "/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-te --local test_fn = "/home/slhome/txh18/workspace/nerv-project/nerv/examples/lmptb/PTBdata/ptb.train.txt" local vocab = nerv.LMVocab() vocab:build_file(test_fn) -local chunk_size = 20 +local chunk_size = 15 local batch_size = 3 local global_conf = { lrate = 1, wcost = 1e-6, momentum = 0, @@ -30,7 +30,8 @@ local global_conf = { vocab = vocab } -local reader = nerv.LMSeqReader(global_conf, batch_size, chunk_size, vocab, {["se_mode"] = true}) +local reader = nerv.LMSeqReader(global_conf, batch_size, chunk_size, vocab, + {["se_mode"] = true, ["same_io"] = true}) reader:open_file(test_fn) local feeds = {} feeds.flags_now = {} @@ -40,14 +41,15 @@ for j = 1, chunk_size do feeds.inputs_m[j] = {global_conf.cumat_type(batch_size, 1), global_conf.cumat_type(batch_size, global_conf.vocab:size())} feeds.flags_now[j] = {} end -while (1) do +for k = 1, 5 do local r = reader:get_batch(feeds) if (r == false) then break end for j = 1, chunk_size, 1 do for i = 1, batch_size, 1 do - printf("%s[L(%s)] ", feeds.inputs_s[j][i], feeds.labels_s[j][i]) --vocab:get_word_str(input[i][j]).id + printf("%s[L(%s)]F%d ", feeds.inputs_s[j][i], feeds.labels_s[j][i], feeds.flags_now[j][i]) --vocab:get_word_str(input[i][j]).id end printf("\n") end printf("\n") end +printf("reader.sen_start %s\n", tostring(reader.stat.al_sen_start)) diff --git a/nerv/examples/lmptb/m-tests/some-text b/nerv/examples/lmptb/m-tests/some-text index da4bea9..6756fa0 100644 --- a/nerv/examples/lmptb/m-tests/some-text +++ b/nerv/examples/lmptb/m-tests/some-text @@ -1,4 +1,4 @@ -</s> aa bb cc aa bb cc aa bb cc aa bb cc aa bb cc aa </s> +</s> aa bb cc aa bb cc aa bb cc aa bb cc aa </s> </s> aa bb cc aa bb cc aa bb cc aa </s> </s> bb cc aa bb cc aa bb cc aa </s> </s> aa bb cc aa </s> diff --git a/nerv/examples/lmptb/rnnlm_ptb_main.lua b/nerv/examples/lmptb/rnnlm_ptb_main.lua index dc011fb..a1d9471 100644 --- a/nerv/examples/lmptb/rnnlm_ptb_main.lua +++ b/nerv/examples/lmptb/rnnlm_ptb_main.lua @@ -197,6 +197,43 @@ global_conf = { work_dir_base = root_dir .. '/ptb/EXP-nerv/rnnlm_tnn' } +elseif (set == "twitter") then + +data_dir = root_dir .. '/twitter_new/DATA' +train_fn = data_dir .. '/twitter.choose.adds' +valid_fn = data_dir .. '/twitter.valid.adds' +test_fn = data_dir .. '/comm.test.choose-ppl.adds' +vocab_fn = data_dir .. '/twitter.choose.train.vocab' + +--qdata_dir = root_dir .. '/ptb/questionGen/gen' + +global_conf = { + lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + nn_act_default = 0, + + hidden_size = 300, + layer_num = 1, + chunk_size = 15, + batch_size = 32, + max_iter = 30, + lr_decay = 1.003, + decay_iter = 10, + param_random = function() return (math.random() / 5 - 0.1) end, + dropout_str = "0.5", + + train_fn = train_fn, + valid_fn = valid_fn, + test_fn = test_fn, + vocab_fn = vocab_fn, + max_sen_len = 32, + sche_log_pre = "[SCHEDULER]:", + log_w_num = 40000, --give a message when log_w_num words have been processed + timer = nerv.Timer(), + work_dir_base = root_dir .. '/twitter_new/EXP-nerv/rnnlm_v1.0' +} + elseif (set == "msr_sc") then data_dir = '/home/slhome/txh18/workspace/sentenceCompletion/DATA_PV2' diff --git a/nerv/lib/matrix/cumatrix.c b/nerv/lib/matrix/cumatrix.c index 04205e4..58bdfe7 100644 --- a/nerv/lib/matrix/cumatrix.c +++ b/nerv/lib/matrix/cumatrix.c @@ -9,6 +9,14 @@ static cudaEvent_t profile_start, profile_stop; curandGenerator_t curand_gen; static HashMap *profile; +void nerv_cumatrix_select_gpu(int dev, Status *status) { + fprintf(stderr, "** selecting GPU %d\n", dev); + NERV_SET_STATUS(status, NERV_NORMAL, 0); + CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status); + CUDA_SAFE_SYNC_CALL(cublasDestroy(cublas_handle), status); + CUDA_SAFE_SYNC_CALL(cublasCreate(&cublas_handle), status); +} + void nerv_cumatrix_print_profile() { size_t i; fprintf(stderr, "*** [nerv cumatrix profile] **\n"); diff --git a/nerv/lib/matrix/generic/cukernel.cu b/nerv/lib/matrix/generic/cukernel.cu index 8fbe05d..51e3b6a 100644 --- a/nerv/lib/matrix/generic/cukernel.cu +++ b/nerv/lib/matrix/generic/cukernel.cu @@ -383,6 +383,20 @@ __global__ void cudak_(copy_rows_by_colidx)(const MATRIX_ELEM *a, MATRIX_ELEM *b b[j + i * stride] = a[j + k * stride]; } +__global__ void cudak_(prefixsum_row_reduce)(const MATRIX_ELEM *a, MATRIX_ELEM *b, + int nrow, int ncol, int stride_a, int stride_b, int offset) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + long idx_a, idx_b; + if (i >= nrow || j >= ncol) return; + idx_b = j + i * stride_b; + idx_a = j + i * stride_a; + //b[idx] = 1.0 / (1.0 + exp(-a[idx])); + if (j >= offset) + b[idx_b] = a[idx_a] + a[idx_a - offset]; + else + b[idx_b] = a[idx_a]; +} extern "C" { #include "../cukernel.h" @@ -745,6 +759,40 @@ extern "C" { cudaStreamSynchronize(0); } + void cudak_(cuda_prefixsum_row)(const Matrix *a, Matrix *b) { + dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), + CEIL_DIV(b->nrow, threadsPerBlock.y)); + + MATRIX_ELEM *tmp[2]; + size_t tmp_stride[2]; + cudaMallocPitch(tmp, tmp_stride + 0, a->ncol * sizeof(MATRIX_ELEM), a->nrow); + cudaMallocPitch(tmp + 1, tmp_stride + 1, a->ncol * sizeof(MATRIX_ELEM), a->nrow); + + int offset = 1; + cudak_(prefixsum_row_reduce)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(a), tmp[0], b->nrow, b->ncol, + a->stride / sizeof(MATRIX_ELEM), tmp_stride[0] / sizeof(MATRIX_ELEM), offset); + int pin = 0, pout = 1; + + for (offset = 2;offset <= a->ncol / 2;offset *= 2) { + cudak_(prefixsum_row_reduce)<<<numBlocks, threadsPerBlock>>> \ + (tmp[pin], tmp[pout], b->nrow, b->ncol, + tmp_stride[pin] / sizeof(MATRIX_ELEM), tmp_stride[pout] / sizeof(MATRIX_ELEM), offset); + pin = 1 - pin; + pout = 1 - pout; + } + + cudak_(prefixsum_row_reduce)<<<numBlocks, threadsPerBlock>>> \ + (tmp[pin], MATRIX_ELEM_PTR(b), b->nrow, b->ncol, + tmp_stride[pin] / sizeof(MATRIX_ELEM), b->stride / sizeof(MATRIX_ELEM), offset); + + cudaFree(tmp[0]); + cudaFree(tmp[1]); + + cudaStreamSynchronize(0); + } + void cudak_(cuda_decompress)(const Matrix *a, Matrix *b) { dim3 threadsPerBlock(1, CUDA_THREADS_NN); dim3 numBlocks(1, CEIL_DIV(a->nrow, threadsPerBlock.y)); diff --git a/nerv/lib/matrix/generic/cumatrix.c b/nerv/lib/matrix/generic/cumatrix.c index bf93b77..7b70607 100644 --- a/nerv/lib/matrix/generic/cumatrix.c +++ b/nerv/lib/matrix/generic/cumatrix.c @@ -486,6 +486,14 @@ void nerv_matrix_(scale_rows_by_row)(Matrix *a, const Matrix *b, NERV_SET_STATUS(status, NERV_NORMAL, 0); } +void nerv_matrix_(prefixsum_row)(Matrix *a, const Matrix *b, Status *status) { + CHECK_SAME_DIMENSION(a, b, status); + PROFILE_START + cudak_(cuda_prefixsum_row)(b, a); + PROFILE_STOP + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + static void cuda_matrix_(free)(MATRIX_ELEM *ptr, Status *status) { CUDA_SAFE_SYNC_CALL(cudaFree(ptr), status); NERV_SET_STATUS(status, NERV_NORMAL, 0); diff --git a/nerv/lib/matrix/generic/cumatrix.h b/nerv/lib/matrix/generic/cumatrix.h index 4f66a2c..5b8076f 100644 --- a/nerv/lib/matrix/generic/cumatrix.h +++ b/nerv/lib/matrix/generic/cumatrix.h @@ -61,6 +61,7 @@ void nerv_matrix_(scale_rows_by_col)(Matrix *a, const Matrix *b, Status *status); void nerv_matrix_(scale_rows_by_row)(Matrix *a, const Matrix *b, Status *status); +void nerv_matrix_(prefixsum_row)(Matrix *a, const Matrix *b, Status *status); void nerv_matrix_(thres_mask)(Matrix *a, Matrix *b, double thres, double low, double high, Status *status); diff --git a/nerv/matrix/cumatrix.c b/nerv/matrix/cumatrix.c index bf92f92..7f22d68 100644 --- a/nerv/matrix/cumatrix.c +++ b/nerv/matrix/cumatrix.c @@ -8,6 +8,14 @@ static cublasHandle_t cublas_handle; static cudaEvent_t profile_start, profile_stop; static HashMap *profile; +static int select_gpu(lua_State *L) { + Status status; + int dev = luaL_checkinteger(L, 1); + nerv_cumatrix_select_gpu(dev, &status); + NERV_LUA_CHECK_STATUS(L, status); + return 0; +} + static int print_profile(lua_State *L) { nerv_cumatrix_print_profile(); return 0; @@ -21,6 +29,7 @@ static int clear_profile(lua_State *L) { static const luaL_Reg cumatrix_methods[] = { {"print_profile", print_profile}, {"clear_profile", clear_profile}, + {"select_gpu", select_gpu}, {NULL, NULL} }; diff --git a/nerv/matrix/generic/cumatrix.c b/nerv/matrix/generic/cumatrix.c index cb55901..b706c21 100644 --- a/nerv/matrix/generic/cumatrix.c +++ b/nerv/matrix/generic/cumatrix.c @@ -15,6 +15,15 @@ static int nerv_matrix_(lua_get_blas_op)(char ch) { return (ch == 'T' || ch == 't') ? CUBLAS_OP_T : CUBLAS_OP_N; } +static int nerv_matrix_(lua_prefixsum_row)(lua_State *L) { + Status status; + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname)); + nerv_matrix_(prefixsum_row)(a, b, &status); + NERV_LUA_CHECK_STATUS(L, status); + return 0; +} + static int nerv_matrix_(lua_thres_mask)(lua_State *L) { Status status; Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); @@ -230,6 +239,7 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = { {"rearrange_frm", nerv_matrix_(lua_rearrange_frm)}, {"scale_rows_by_row", nerv_matrix_(lua_scale_rows_by_row)}, {"scale_rows_by_col", nerv_matrix_(lua_scale_rows_by_col)}, + {"prefixsum_row", nerv_matrix_(lua_prefixsum_row)}, #ifdef __NERV_FUTURE_CUDA_7 {"update_select_rows_by_rowidx", nerv_matrix_(lua_update_select_rows_by_rowidx)}, {"update_select_rows_by_colidx", nerv_matrix_(lua_update_select_rows_by_colidx)}, diff --git a/nerv/tnn/init.lua b/nerv/tnn/init.lua index b375fa8..7faca31 100644 --- a/nerv/tnn/init.lua +++ b/nerv/tnn/init.lua @@ -47,5 +47,6 @@ nerv.include('sutil.lua') nerv.include('tnn.lua') nerv.include('layersT/softmax_ce_t.lua') nerv.include('layersT/lstm_t.lua') +nerv.include('layersT/gru_t.lua') nerv.include('layersT/dropout_t.lua') nerv.include('layer_dag_t.lua') diff --git a/nerv/examples/lmptb/lmptb/layer/gru_t.lua b/nerv/tnn/layersT/gru_t.lua index 8f15cc8..8f15cc8 100644 --- a/nerv/examples/lmptb/lmptb/layer/gru_t.lua +++ b/nerv/tnn/layersT/gru_t.lua |