From 0ee43c21af4fcd3aed070b1f5ad1eb9feb2ad159 Mon Sep 17 00:00:00 2001 From: Determinant Date: Wed, 17 Feb 2016 20:14:06 +0800 Subject: try to merge manually --- nerv/Makefile | 2 +- nerv/examples/lmptb/grulm_ptb_main.lua | 16 +- nerv/examples/lmptb/lm_sampler.lua | 60 ++- nerv/examples/lmptb/lmptb/layer/gru_t.lua | 114 ------ nerv/examples/lmptb/lmptb/layer/init.lua | 2 +- nerv/examples/lmptb/lmptb/lmseqreader.lua | 34 +- nerv/examples/lmptb/lmptb/lmutil.lua | 13 +- nerv/examples/lmptb/lmptb/lmvocab.lua | 6 +- nerv/examples/lmptb/lstmlm_ptb_main.lua | 6 +- nerv/examples/lmptb/m-tests/lm_sampler_test.lua | 469 +++++++++++++++++++++++ nerv/examples/lmptb/m-tests/lmseqreader_test.lua | 10 +- nerv/examples/lmptb/m-tests/some-text | 2 +- nerv/examples/lmptb/rnnlm_ptb_main.lua | 37 ++ nerv/examples/lmptb/sample_grulm_ptb_main.lua | 440 --------------------- nerv/lib/matrix/cumatrix.c | 8 + nerv/lib/matrix/generic/cukernel.cu | 48 +++ nerv/lib/matrix/generic/cumatrix.c | 8 + nerv/lib/matrix/generic/cumatrix.h | 1 + nerv/matrix/cumatrix.c | 9 + nerv/matrix/generic/cumatrix.c | 10 + nerv/tnn/init.lua | 1 + nerv/tnn/layersT/gru_t.lua | 114 ++++++ 22 files changed, 806 insertions(+), 604 deletions(-) delete mode 100644 nerv/examples/lmptb/lmptb/layer/gru_t.lua create mode 100644 nerv/examples/lmptb/m-tests/lm_sampler_test.lua delete mode 100644 nerv/examples/lmptb/sample_grulm_ptb_main.lua create mode 100644 nerv/tnn/layersT/gru_t.lua diff --git a/nerv/Makefile b/nerv/Makefile index a29309a..a472cfc 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -36,7 +36,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \ io/sgd_buffer.lua \ tnn/init.lua tnn/layer_dag_t.lua tnn/sutil.lua tnn/tnn.lua \ - tnn/layersT/dropout_t.lua tnn/layersT/lstm_t.lua tnn/layersT/softmax_ce_t.lua + tnn/layersT/dropout_t.lua tnn/layersT/lstm_t.lua tnn/layersT/gru_t.lua tnn/layersT/softmax_ce_t.lua INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK #CUDA_BASE := /usr/local/cuda-7.0 diff --git a/nerv/examples/lmptb/grulm_ptb_main.lua b/nerv/examples/lmptb/grulm_ptb_main.lua index ef5d7f9..4a3f39f 100644 --- a/nerv/examples/lmptb/grulm_ptb_main.lua +++ b/nerv/examples/lmptb/grulm_ptb_main.lua @@ -198,6 +198,7 @@ qdata_dir = root_dir .. '/ptb/questionGen/gen' global_conf = { lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5, cumat_type = nerv.CuMatrixFloat, + select_gpu = 0, mmat_type = nerv.MMatrixFloat, nn_act_default = 0, @@ -259,7 +260,7 @@ global_conf = { elseif (set == "twitter") then data_dir = root_dir .. '/twitter_new/DATA' -train_fn = data_dir .. '/twitter.choose2.adds' +train_fn = data_dir .. '/twitter.choose.adds' valid_fn = data_dir .. '/twitter.valid.adds' test_fn = data_dir .. '/comm.test.choose-ppl.adds' vocab_fn = data_dir .. '/twitter.choose.train.vocab' @@ -359,7 +360,14 @@ commands = nerv.SUtil.parse_commands_set(commands_str) if start_lr ~= nil then global_conf.lrate = start_lr end - + +nerv.printf("detecting gconf.select_gpu...\n") +if global_conf.select_gpu then + nerv.printf("select gpu to %d\n", global_conf.select_gpu) + global_conf.cumat_type.select_gpu(global_conf.select_gpu) + nerv.LMUtil.wait(1) +end + nerv.printf("%s creating work_dir(%s)...\n", global_conf.sche_log_pre, global_conf.work_dir) nerv.LMUtil.wait(2) os.execute("mkdir -p "..global_conf.work_dir) @@ -388,10 +396,10 @@ nerv.LMUtil.wait(2) math.randomseed(1) -local vocab = nerv.LMVocab() +local vocab = nerv.LMVocab(global_conf) global_conf["vocab"] = vocab nerv.printf("%s building vocab...\n", global_conf.sche_log_pre) -global_conf.vocab:build_file(global_conf.vocab_fn, false) +global_conf.vocab:build_file(global_conf.vocab_fn) ppl_rec = {} local final_iter = -1 diff --git a/nerv/examples/lmptb/lm_sampler.lua b/nerv/examples/lmptb/lm_sampler.lua index c25a75c..c9adf85 100644 --- a/nerv/examples/lmptb/lm_sampler.lua +++ b/nerv/examples/lmptb/lm_sampler.lua @@ -3,31 +3,34 @@ local LMSampler = nerv.class('nerv.LMSampler') function LMSampler:__init(global_conf) self.log_pre = "LMSampler" self.gconf = global_conf + self.batch_size = self.gconf.batch_size + self.chunk_size = self.gconf.chunk_size --largest sample sentence length self.vocab = self.gconf.vocab self.sen_end_token = self.vocab.sen_end_token self.sen_end_id = self.vocab:get_word_str(self.sen_end_token).id + + self.loaded = false end -function LMSampler:load_dagL(dagL) - self.batch_size = self.gconf.batch_size - self.chunk_size = self.gconf.chunk_size - +function LMSampler:load_dagL(dagL) nerv.printf("%s loading dagL\n", self.log_pre) self.dagL = dagL + self.dagL:init(self.batch_size) self.dagL_inputs = {} - self.dagL_inputs[1] = global_conf.cumat_type(global_conf.batch_size, 1) + self.dagL_inputs[1] = self.gconf.cumat_type(self.gconf.batch_size, 1) self.dagL_inputs[1]:fill(self.sen_end_id - 1) - self.dagL_inputs[2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size) + self.dagL_inputs[2] = self.gconf.cumat_type(self.gconf.batch_size, self.gconf.hidden_size) self.dagL_inputs[2]:fill(0) self.dagL_outputs = {} - self.dagL_outputs[1] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size()) - self.dagL_outputs[2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size) + self.dagL_outputs[1] = self.gconf.cumat_type(self.gconf.batch_size, self.gconf.vocab:size()) + self.dagL_outputs[2] = self.gconf.cumat_type(self.gconf.batch_size, self.gconf.hidden_size) - self.smout_d = global_conf.cumat_type(self.batch_size, self.vocab:size()) - self.smout_h = global_conf.mmat_type(self.batch_size, self.vocab:size()) + self.smout_d = self.gconf.cumat_type(self.batch_size, self.vocab:size()) + self.ssout_d = self.gconf.cumat_type(self.batch_size, self.vocab:size()) + self.ssout_h = self.gconf.mmat_type(self.batch_size, self.vocab:size()) self.store = {} for i = 1, self.batch_size do @@ -38,11 +41,31 @@ function LMSampler:load_dagL(dagL) self.store[i][1].p = 0 end self.repo = {} + + self.loaded = true end -function LMSampler:sample_to_store(smout) +function LMSampler:sample_to_store(ssout) --private for i = 1, self.batch_size do local ran = math.random() + local id = 1 + local low = 0 + local high = ssout:ncol() - 1 + if ssout[i - 1][high] < 0.9999 or ssout[i - 1][high] > 1.0001 then + nerv.error("%s ERROR, softmax output summation(%f) seems to have some problem", self.log_pre, ssout[i - 1][high]) + end + if ssout[i - 1][low] < ran then + while low + 1 < high do + local mid = math.floor((low + high) / 2) + if ssout[i - 1][mid] < ran then + low = mid + else + high = mid + end + end + id = high + 1 + end + --[[ local s = 0 local id = self.vocab:size() for j = 0, self.vocab:size() - 1 do @@ -52,19 +75,25 @@ function LMSampler:sample_to_store(smout) break end end + ]]-- if #self.store[i] >= self.chunk_size - 2 then id = self.sen_end_id end local tmp = {} tmp.w = self.vocab:get_word_id(id).str tmp.id = id - tmp.p = smout[i - 1][id - 1] + if id == 1 then + tmp.p = ssout[i - 1][id - 1] + else + tmp.p = ssout[i - 1][id - 1] - ssout[i - 1][id - 2] + end table.insert(self.store[i], tmp) end end ---Returns: LMResult function LMSampler:lm_sample_rnn_dagL(sample_num, p_conf) + assert(self.loaded == true) + local dagL = self.dagL local inputs = self.dagL_inputs local outputs = self.dagL_outputs @@ -74,9 +103,10 @@ function LMSampler:lm_sample_rnn_dagL(sample_num, p_conf) inputs[2]:copy_fromd(outputs[2]) --copy hidden activation self.smout_d:softmax(outputs[1]) - self.smout_d:copy_toh(self.smout_h) + self.ssout_d:prefixsum_row(self.smout_d) + self.ssout_d:copy_toh(self.ssout_h) - self:sample_to_store(self.smout_h) + self:sample_to_store(self.ssout_h) for i = 1, self.batch_size do inputs[1][i - 1][0] = self.store[i][#self.store[i]].id - 1 if self.store[i][#self.store[i]].id == self.sen_end_id then --meet a sentence end diff --git a/nerv/examples/lmptb/lmptb/layer/gru_t.lua b/nerv/examples/lmptb/lmptb/layer/gru_t.lua deleted file mode 100644 index 8f15cc8..0000000 --- a/nerv/examples/lmptb/lmptb/layer/gru_t.lua +++ /dev/null @@ -1,114 +0,0 @@ -local GRULayerT = nerv.class('nerv.GRULayerT', 'nerv.LayerT') - -function GRULayerT:__init(id, global_conf, layer_conf) - --input1:x input2:h input3:c(h^~) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - - if self.dim_in[2] ~= self.dim_out[1] then - nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)", self.dim_in[2], self.dim_out[1]) - end - - --prepare a DAGLayerT to hold the lstm structure - local pr = layer_conf.pr - if pr == nil then - pr = nerv.ParamRepo() - end - - local function ap(str) - return self.id .. '.' .. str - end - - local layers = { - ["nerv.CombinerLayer"] = { - [ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]}, - ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}}, - [ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, - ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, - [ap("updateGDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, - ["dim_out"] = {self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, - [ap("updateMergeL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]}, - ["lambda"] = {1, -1, 1}}}, - }, - ["nerv.AffineLayer"] = { - [ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]}, ["pr"] = pr}}, - }, - ["nerv.TanhLayer"] = { - [ap("mainTanhL")] = {{}, {["dim_in"] = {self.dim_out[1]}, ["dim_out"] = {self.dim_out[1]}}}, - }, - ["nerv.GateFLayer"] = { - [ap("resetGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, - ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}}, - [ap("updateGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, - ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}}, - }, - ["nerv.ElemMulLayer"] = { - [ap("resetGMulL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, - [ap("updateGMulCL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, - [ap("updateGMulHL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, - }, - } - - local layerRepo = nerv.LayerRepo(layers, pr, global_conf) - - local connections_t = { - ["[1]"] = ap("inputXDup[1]"), - ["[2]"] = ap("inputHDup[1]"), - - [ap("inputXDup[1]")] = ap("resetGateL[1]"), - [ap("inputHDup[1]")] = ap("resetGateL[2]"), - [ap("inputXDup[2]")] = ap("updateGateL[1]"), - [ap("inputHDup[2]")] = ap("updateGateL[2]"), - [ap("updateGateL[1]")] = ap("updateGDup[1]"), - - [ap("resetGateL[1]")] = ap("resetGMulL[1]"), - [ap("inputHDup[3]")] = ap("resetGMulL[2]"), - - [ap("inputXDup[3]")] = ap("mainAffineL[1]"), - [ap("resetGMulL[1]")] = ap("mainAffineL[2]"), - [ap("mainAffineL[1]")] = ap("mainTanhL[1]"), - - [ap("updateGDup[1]")] = ap("updateGMulHL[1]"), - [ap("inputHDup[4]")] = ap("updateGMulHL[2]"), - [ap("updateGDup[2]")] = ap("updateGMulCL[1]"), - [ap("mainTanhL[1]")] = ap("updateGMulCL[2]"), - - [ap("inputHDup[5]")] = ap("updateMergeL[1]"), - [ap("updateGMulHL[1]")] = ap("updateMergeL[2]"), - [ap("updateGMulCL[1]")] = ap("updateMergeL[3]"), - - [ap("updateMergeL[1]")] = "[1]", - } - - self.dagL = nerv.DAGLayerT(self.id, global_conf, - {["dim_in"] = self.dim_in, ["dim_out"] = self.dim_out, ["sub_layers"] = layerRepo, - ["connections"] = connections_t}) - - self:check_dim_len(2, 1) -- x, h and h -end - -function GRULayerT:init(batch_size, chunk_size) - self.dagL:init(batch_size, chunk_size) -end - -function GRULayerT:batch_resize(batch_size, chunk_size) - self.dagL:batch_resize(batch_size, chunk_size) -end - -function GRULayerT:update(bp_err, input, output, t) - self.dagL:update(bp_err, input, output, t) -end - -function GRULayerT:propagate(input, output, t) - self.dagL:propagate(input, output, t) -end - -function GRULayerT:back_propagate(bp_err, next_bp_err, input, output, t) - self.dagL:back_propagate(bp_err, next_bp_err, input, output, t) -end - -function GRULayerT:get_params() - return self.dagL:get_params() -end diff --git a/nerv/examples/lmptb/lmptb/layer/init.lua b/nerv/examples/lmptb/lmptb/layer/init.lua index b345244..ceae009 100644 --- a/nerv/examples/lmptb/lmptb/layer/init.lua +++ b/nerv/examples/lmptb/lmptb/layer/init.lua @@ -1,6 +1,6 @@ require 'lmptb.layer.select_linear' require 'lmptb.layer.affine_recurrent_plusvec' -require 'lmptb.layer.gru_t' +--require 'lmptb.layer.gru_t' require 'lmptb.layer.lm_affine_recurrent' diff --git a/nerv/examples/lmptb/lmptb/lmseqreader.lua b/nerv/examples/lmptb/lmptb/lmseqreader.lua index 0f29f8b..1272929 100644 --- a/nerv/examples/lmptb/lmptb/lmseqreader.lua +++ b/nerv/examples/lmptb/lmptb/lmseqreader.lua @@ -28,6 +28,10 @@ function LMReader:__init(global_conf, batch_size, chunk_size, vocab, r_conf) if r_conf.compressed_label == true then self.compressed_label = true end + self.same_io = false + if r_conf.same_io == true then --can be used to train P(wi|w1..(i-1),(i+1)..n) + self.same_io = true + end end --fn: string @@ -36,9 +40,9 @@ function LMReader:open_file(fn) if (self.fh ~= nil) then nerv.error("%s error: in open_file(fn is %s), file handle not nil.", self.log_pre, fn) end - printf("%s opening file %s...\n", self.log_pre, fn) - print(self.log_pre, "batch_size:", self.batch_size, "chunk_size", self.chunk_size) - print(self.log_pre, "se_mode:", self.se_mode) + nerv.printf("%s opening file %s...\n", self.log_pre, fn) + nerv.printf("%s batch_size:%d chunk_size:%d\n", self.log_pre, self.batch_size, self.chunk_size) + nerv.printf("%s se_mode:%s same_io:%s\n", self.log_pre, tostring(self.se_mode), tostring(self.same_io)) self.fh = io.open(fn, "r") self.streams = {} for i = 1, self.batch_size, 1 do @@ -132,12 +136,15 @@ function LMReader:get_batch(feeds) else self:refresh_stream(i) if st.store[st.head] ~= nil then - inputs_s[j][i] = st.store[st.head] - --inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head]).id - 1 - self.bak_inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head]).id - 1 + if self.same_io == false then + inputs_s[j][i] = st.store[st.head] + self.bak_inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head]).id - 1 + else + inputs_s[j][i] = st.store[st.head + 1] + self.bak_inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head + 1]).id - 1 + end else inputs_s[j][i] = self.vocab.null_token - --inputs_m[j][1][i - 1][0] = 0 self.bak_inputs_m[j][1][i - 1][0] = 0 end if st.store[st.head + 1] ~= nil then @@ -148,7 +155,7 @@ function LMReader:get_batch(feeds) inputs_m[j][2][i - 1][self.vocab:get_word_str(st.store[st.head + 1]).id - 1] = 1 end else - if (inputs_s[j][i] ~= self.vocab.null_token) then + if inputs_s[j][i] ~= self.vocab.null_token then nerv.error("reader error : input not null but label is null_token") end labels_s[j][i] = self.vocab.null_token @@ -159,6 +166,9 @@ function LMReader:get_batch(feeds) end flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_NORM) --has both input and label got_new = true + if st.store[st.head] == self.vocab.sen_end_token then + flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_START) + end st.store[st.head] = nil st.head = st.head + 1 if labels_s[j][i] == self.vocab.sen_end_token then @@ -169,10 +179,7 @@ function LMReader:get_batch(feeds) end_stream = true --meet sentence end, this stream ends now end end - if inputs_s[j][i] == self.vocab.sen_end_token then - flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_START) - end - end + end end end end @@ -190,7 +197,7 @@ function LMReader:get_batch(feeds) --check for self.al_sen_start for i = 1, self.batch_size do - if inputs_s[1][i] ~= self.vocab.sen_end_token and inputs_s[1][i] ~= self.vocab.null_token then + if bit.band(flags[1][i], nerv.TNN.FC.SEQ_START) == 0 and flags[1][i] > 0 then self.stat.al_sen_start = false end end @@ -198,7 +205,6 @@ function LMReader:get_batch(feeds) if got_new == false then nerv.info("lmseqreader file ends, printing stats...") nerv.printf("al_sen_start:%s\n", tostring(self.stat.al_sen_start)) - return false else return true diff --git a/nerv/examples/lmptb/lmptb/lmutil.lua b/nerv/examples/lmptb/lmptb/lmutil.lua index 6d66d6e..13a5c45 100644 --- a/nerv/examples/lmptb/lmptb/lmutil.lua +++ b/nerv/examples/lmptb/lmptb/lmutil.lua @@ -112,10 +112,17 @@ end --cla:string --w:string --prob:float, the probability -function Result:add(cla, w, prob) - self[cla].logp_all = self[cla].logp_all + math.log10(prob) +function Result:add(cla, w, prob, log10ed) + local lp + if log10ed == true then + lp = prob + else + lp = math.log10(prob) + end + + self[cla].logp_all = self[cla].logp_all + lp if (self.vocab:is_unk_str(w)) then - self[cla].logp_unk = self[cla].logp_unk + math.log10(prob) + self[cla].logp_unk = self[cla].logp_unk + lp self[cla].cn_unk = self[cla].cn_unk + 1 end if (w == self.vocab.sen_end_token) then diff --git a/nerv/examples/lmptb/lmptb/lmvocab.lua b/nerv/examples/lmptb/lmptb/lmvocab.lua index 0e7ef3e..38bb18e 100644 --- a/nerv/examples/lmptb/lmptb/lmvocab.lua +++ b/nerv/examples/lmptb/lmptb/lmvocab.lua @@ -2,8 +2,6 @@ require 'lmptb.lmutil' local Vocab = nerv.class("nerv.LMVocab") -local printf = nerv.printf - local mysplit = function(inputstr, sep) if sep == nil then sep = "%s" @@ -106,7 +104,7 @@ end --fn: string --Add all words in fn to the vocab function Vocab:build_file(fn) - printf("%s Vocab building on file %s...\n", self.log_pre, fn) + nerv.printf("%s Vocab building on file %s...\n", self.log_pre, fn) local file = io.open(fn, "r") while (true) do local list = nerv.LMUtil.read_line(file) @@ -119,7 +117,7 @@ function Vocab:build_file(fn) end end file:close() - printf("%s Building finished, vocab size now is %d.\n", self.log_pre, self:size()) + nerv.printf("%s Building finished, vocab size now is %d.\n", self.log_pre, self:size()) end --[[test diff --git a/nerv/examples/lmptb/lstmlm_ptb_main.lua b/nerv/examples/lmptb/lstmlm_ptb_main.lua index 9bdd5ff..b576834 100644 --- a/nerv/examples/lmptb/lstmlm_ptb_main.lua +++ b/nerv/examples/lmptb/lstmlm_ptb_main.lua @@ -277,7 +277,7 @@ global_conf = { hidden_size = 300, layer_num = 1, chunk_size = 15, - batch_size = 20, + batch_size = 32, max_iter = 35, lr_decay = 1.003, decay_iter = 10, @@ -390,10 +390,10 @@ nerv.LMUtil.wait(2) math.randomseed(1) -local vocab = nerv.LMVocab() +local vocab = nerv.LMVocab(global_conf) global_conf["vocab"] = vocab nerv.printf("%s building vocab...\n", global_conf.sche_log_pre) -global_conf.vocab:build_file(global_conf.vocab_fn, false) +global_conf.vocab:build_file(global_conf.vocab_fn) ppl_rec = {} local final_iter = -1 diff --git a/nerv/examples/lmptb/m-tests/lm_sampler_test.lua b/nerv/examples/lmptb/m-tests/lm_sampler_test.lua new file mode 100644 index 0000000..effb2ad --- /dev/null +++ b/nerv/examples/lmptb/m-tests/lm_sampler_test.lua @@ -0,0 +1,469 @@ +require 'lmptb.lmvocab' +require 'lmptb.lmfeeder' +require 'lmptb.lmutil' +require 'lmptb.layer.init' +--require 'tnn.init' +require 'lmptb.lmseqreader' +require 'lm_trainer' +require 'lm_sampler' + +--[[global function rename]]-- +--local printf = nerv.printf +local LMTrainer = nerv.LMTrainer +--[[global function rename ends]]-- + +function prepare_parameters(global_conf, fn) + nerv.printf("%s preparing parameters...\n", global_conf.sche_log_pre) + + global_conf.paramRepo = nerv.ParamRepo() + local paramRepo = global_conf.paramRepo + + nerv.printf("%s loading parameter from file %s...\n", global_conf.sche_log_pre, fn) + paramRepo:import({fn}, nil, global_conf) + + nerv.printf("%s preparing parameters end.\n", global_conf.sche_log_pre) + + return nil +end + +--global_conf: table +--Returns: nerv.LayerRepo +function prepare_layers(global_conf) + nerv.printf("%s preparing layers...\n", global_conf.sche_log_pre) + + local pr = global_conf.paramRepo + + local du = false + + --local recurrentLconfig = {{["bp"] = "bp_h", ["ltp_hh"] = "ltp_hh"}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["break_id"] = global_conf.vocab:get_sen_entry().id, ["independent"] = global_conf.independent, ["clip"] = 10}} + --local recurrentLconfig = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["clip"] = 10, ["direct_update"] = du, ["pr"] = pr}} + + local layers = { + ["nerv.GRULayerT"] = { + ["gruL1"] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["pr"] = pr}}, + }, + + ["nerv.DropoutLayerT"] = { + ["dropoutL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}, + }, + + ["nerv.SelectLinearLayer"] = { + ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}, ["vocab"] = global_conf.vocab, ["pr"] = pr}}, + }, + + ["nerv.CombinerLayer"] = { + ["combinerL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}}, + }, + + ["nerv.AffineLayer"] = { + ["outputL"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.vocab:size()}, ["direct_update"] = du, ["pr"] = pr}}, + }, + + ["nerv.SoftmaxCELayerT"] = { + ["softmaxL"] = {{}, {["dim_in"] = {global_conf.vocab:size(), global_conf.vocab:size()}, ["dim_out"] = {1}}}, + }, + } + + for l = 2, global_conf.layer_num do + layers["nerv.DropoutLayerT"]["dropoutL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}} + layers["nerv.GRULayerT"]["gruL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["pr"] = pr}} + layers["nerv.CombinerLayer"]["combinerL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}} + end + --[[ --we do not need those in the new tnn framework + printf("%s adding %d bptt layers...\n", global_conf.sche_log_pre, global_conf.bptt) + for i = 1, global_conf.bptt do + layers["nerv.IndRecurrentLayer"]["recurrentL" .. (i + 1)] = recurrentLconfig + layers["nerv.SigmoidLayer"]["sigmoidL" .. (i + 1)] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}} + layers["nerv.SelectLinearLayer"]["selectL" .. (i + 1)] = {{["ltp"] = "ltp_ih"}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}}} + end + --]] + + local layerRepo = nerv.LayerRepo(layers, pr, global_conf) + nerv.printf("%s preparing layers end.\n", global_conf.sche_log_pre) + return layerRepo +end + +--global_conf: table +--layerRepo: nerv.LayerRepo +--Returns: a nerv.TNN +function prepare_tnn(global_conf, layerRepo) + nerv.printf("%s Generate and initing TNN ...\n", global_conf.sche_log_pre) + + --input: input_w, input_w, ... input_w_now, last_activation + local connections_t = { + {"[1]", "selectL1[1]", 0}, + + --{"selectL1[1]", "recurrentL1[1]", 0}, + --{"recurrentL1[1]", "sigmoidL1[1]", 0}, + --{"sigmoidL1[1]", "combinerL1[1]", 0}, + --{"combinerL1[1]", "recurrentL1[2]", 1}, + + {"selectL1[1]", "gruL1[1]", 0}, + {"gruL1[1]", "combinerL1[1]", 0}, + {"combinerL1[1]", "gruL1[2]", 1}, + {"combinerL1[2]", "dropoutL1[1]", 0}, + + {"dropoutL"..global_conf.layer_num.."[1]", "outputL[1]", 0}, + {"outputL[1]", "softmaxL[1]", 0}, + {"[2]", "softmaxL[2]", 0}, + {"softmaxL[1]", "[1]", 0} + } + + for l = 2, global_conf.layer_num do + table.insert(connections_t, {"dropoutL"..(l-1).."[1]", "gruL"..l.."[1]", 0}) + table.insert(connections_t, {"gruL"..l.."[1]", "combinerL"..l.."[1]", 0}) + table.insert(connections_t, {"combinerL"..l.."[1]", "gruL"..l.."[2]", 1}) + table.insert(connections_t, {"combinerL"..l.."[2]", "dropoutL"..l.."[1]", 0}) + end + + --[[ + printf("%s printing DAG connections:\n", global_conf.sche_log_pre) + for key, value in pairs(connections_t) do + printf("\t%s->%s\n", key, value) + end + ]]-- + + local tnn = nerv.TNN("TNN", global_conf, {["dim_in"] = {1, global_conf.vocab:size()}, + ["dim_out"] = {1}, ["sub_layers"] = layerRepo, + ["connections"] = connections_t, ["clip_t"] = global_conf.clip_t, + }) + + tnn:init(global_conf.batch_size, global_conf.chunk_size) + + nerv.printf("%s Initing TNN end.\n", global_conf.sche_log_pre) + return tnn +end + +function load_net_tnn(global_conf, fn) + prepare_parameters(global_conf, fn) + local layerRepo = prepare_layers(global_conf) + local tnn = prepare_tnn(global_conf, layerRepo) + return tnn +end + +function prepare_sampler(sm_conf) + sm_conf.pr = nerv.ParamRepo() + sm_conf.pr:import({sm_conf.fn_to_sample}, nil, sm_conf) + + local layers = { + ["nerv.GRULayerT"] = { + ["gruL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size, sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size}, ["pr"] = sm_conf.pr}}, + }, + ["nerv.DropoutLayerT"] = { + ["dropoutL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size}}}, + }, + ["nerv.SelectLinearLayer"] = { + ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {sm_conf.hidden_size}, ["vocab"] = sm_conf.vocab, ["pr"] = sm_conf.pr}}, + }, + ["nerv.CombinerLayer"] = { + ["combinerL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size, sm_conf.hidden_size}, ["lambda"] = {1}}}, + }, + ["nerv.AffineLayer"] = { + ["outputL"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.vocab:size()}, ["pr"] = sm_conf.pr}}, + }, + ["nerv.SoftmaxCELayerT"] = { + ["softmaxL"] = {{}, {["dim_in"] = {sm_conf.vocab:size(), sm_conf.vocab:size()}, ["dim_out"] = {1}}}, + }, + } + local layerRepo = nerv.LayerRepo(layers, sm_conf.pr, sm_conf) + + local connections_t = { + ["[1]"] = "selectL1[1]", + + ["selectL1[1]"] = "gruL1[1]", + ["gruL1[1]"] = "combinerL1[1]", + ["[2]"] = "gruL1[2]", + --{"combinerL1[2]", "dropoutL1[1]", 0}, + + ["combinerL" .. global_conf.layer_num .. "[1]"] = "outputL[1]", + ["outputL[1]"] = "[1]", + ["combinerL1[2]"] = "[2]", + } + + if sm_conf.layer_num > 1 then + nerv.error("multiple layer is currently not supported(not hard to implement though)") + end + + local dagL = nerv.DAGLayerT("dagL", sm_conf, {["dim_in"] = {1, sm_conf.hidden_size}, + ["dim_out"] = {sm_conf.vocab:size(), sm_conf.hidden_size}, ["sub_layers"] = layerRepo, + ["connections"] = connections_t + }) + + local sampler = nerv.LMSampler(sm_conf) + sampler:load_dagL(dagL) + + return sampler +end + +local train_fn, valid_fn, test_fn +global_conf = {} +local set = arg[1] --"test" + +root_dir = '/home/slhome/txh18/workspace' + +if (set == "ptb") then + +data_dir = root_dir .. '/ptb/DATA' +train_fn = data_dir .. '/ptb.train.txt.adds' +valid_fn = data_dir .. '/ptb.valid.txt.adds' +test_fn = data_dir .. '/ptb.test.txt.adds' +vocab_fn = data_dir .. '/vocab' + +qdata_dir = root_dir .. '/ptb/questionGen/gen' + +global_conf = { + lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + nn_act_default = 0, + + hidden_size = 300, + layer_num = 1, + chunk_size = 15, + batch_size = 32, + max_iter = 35, + lr_decay = 1.003, + decay_iter = 10, + param_random = function() return (math.random() / 5 - 0.1) end, + dropout_str = "0.5", + + train_fn = train_fn, + valid_fn = valid_fn, + test_fn = test_fn, + vocab_fn = vocab_fn, + max_sen_len = 90, + sche_log_pre = "[SCHEDULER]:", + log_w_num = 40000, --give a message when log_w_num words have been processed + timer = nerv.Timer(), + work_dir_base = root_dir .. '/ptb/EXP-nerv/grulm_v1.0', + + fn_to_sample = root_dir .. '/ptb/EXP-nerv/grulm_v1.0h300l1ch15ba32slr0.15wc1e-05dr0.5/params.final', +} + +sm_conf = { + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + nn_act_default = 0, + + hidden_size = 300, + layer_num = 1, + batch_size = 32, + chunk_size = 85, --largest sample sentence length + max_iter = 35, + max_sen_len = 90, + sche_log_pre = "[SAMPLER_S]:", + + timer = global_conf.timer, + fn_to_sample = root_dir .. '/ptb/EXP-nerv/grulm_v1.0h300l1ch15ba32slr0.15wc1e-05dr0.5/params.final', +} + +elseif (set == "msr_sc") then + +data_dir = '/home/slhome/txh18/workspace/sentenceCompletion/DATA_PV2' +train_fn = data_dir .. '/normed_all.sf.len60.adds.train' +valid_fn = data_dir .. '/normed_all.sf.len60.adds.dev' +test_fn = data_dir .. '/answer_normed.adds' +vocab_fn = data_dir .. '/normed_all.choose.vocab30000.addqvocab' + +global_conf = { + lrate = 1, wcost = 1e-6, momentum = 0, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + nn_act_default = 0, + + hidden_size = 300, + layer_num = 1, + chunk_size = 15, + batch_size = 10, + max_iter = 30, + decay_iter = 10, + lr_decay = 1.003, + param_random = function() return (math.random() / 5 - 0.1) end, + dropout_str = "0", + + train_fn = train_fn, + valid_fn = valid_fn, + test_fn = test_fn, + vocab_fn = vocab_fn, + sche_log_pre = "[SCHEDULER]:", + log_w_num = 400000, --give a message when log_w_num words have been processed + timer = nerv.Timer(), + work_dir_base = '/home/slhome/txh18/workspace/sentenceCompletion/EXP-Nerv/rnnlm_test' +} + +elseif (set == "twitter") then + + data_dir = root_dir .. '/twitter_new/DATA' + train_fn = data_dir .. '/twitter.choose2.adds' + valid_fn = data_dir .. '/twitter.valid.adds' + test_fn = data_dir .. '/comm.test.choose-ppl.adds' + vocab_fn = data_dir .. '/twitter.choose.train.vocab' + --qdata_dir = root_dir .. '/ptb/questionGen/gen' + global_conf = { + lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + nn_act_default = 0, + + hidden_size = 300, + layer_num = 1, + chunk_size = 15, + batch_size = 32, + max_iter = 30, + lr_decay = 1.003, + decay_iter = 10, + param_random = function() return (math.random() / 5 - 0.1) end, + dropout_str = "0.5", + + train_fn = train_fn, + valid_fn = valid_fn, + test_fn = test_fn, + vocab_fn = vocab_fn, + max_sen_len = 32, + sche_log_pre = "[SCHEDULER]:", + log_w_num = 40000, --give a message when log_w_num words have been processed + timer = nerv.Timer(), + work_dir_base = root_dir .. '/twitter_new/EXP-nerv/grulm_v1.0' + } + +else + +valid_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text-chn' +train_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text-chn' +test_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text-chn' +vocab_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text-chn' + +global_conf = { + lrate = 0.01, wcost = 1e-5, momentum = 0, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + nn_act_default = 0, + + hidden_size = 20, + layer_num = 1, + chunk_size = 2, + batch_size = 10, + max_iter = 3, + param_random = function() return (math.random() / 5 - 0.1) end, + dropout_str = "0", + + train_fn = train_fn, + valid_fn = valid_fn, + test_fn = test_fn, + max_sen_len = 80, + lr_decay = 1.003, + decay_iter = 10, + vocab_fn = vocab_fn, + sche_log_pre = "[SCHEDULER]:", + log_w_num = 10, --give a message when log_w_num words have been processed + timer = nerv.Timer(), + work_dir_base = '/home/slhome/txh18/workspace/nerv/play/testEXP/tnn_lstmlm_test' +} + +end + +commands_str = "sampling" --"train:test" +commands = {} +test_iter = -1 --obselete +random_seed = 1 +sample_num = 10 +out_fn = nil + +if arg[2] ~= nil then + nerv.printf("%s applying arg[2](%s)...\n", global_conf.sche_log_pre, arg[2]) + loadstring(arg[2])() + nerv.LMUtil.wait(0.5) +else + nerv.printf("%s no user setting, all default...\n", global_conf.sche_log_pre) +end + +global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'l' .. global_conf.layer_num .. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate .. 'wc' .. global_conf.wcost .. 'dr' .. global_conf.dropout_str +global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf' +global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak' +global_conf.param_fn = global_conf.work_dir .. "/params" +global_conf.dropout_list = nerv.SUtil.parse_schedule(global_conf.dropout_str) +global_conf.log_fn = global_conf.work_dir .. '/log_lstm_tnn_' .. commands_str ..os.date("_TT%m_%d_%X",os.time()) +global_conf.log_fn, _ = string.gsub(global_conf.log_fn, ':', '-') +commands = nerv.SUtil.parse_commands_set(commands_str) + +if start_lr ~= nil then + global_conf.lrate = start_lr +end + +--[[ +--redirecting log outputs! +nerv.SUtil.log_redirect(global_conf.log_fn) +nerv.LMUtil.wait(2) +]]-- + +----------------printing options--------------------------------- +nerv.printf("%s printing sm_conf...\n", sm_conf.sche_log_pre) +for id, value in pairs(sm_conf) do + nerv.printf("%s:\t%s\n", id, tostring(value)) +end +nerv.LMUtil.wait(2) + +nerv.printf("%s printing training scheduling options...\n", global_conf.sche_log_pre) +nerv.printf("commands_str:\t%s\n", commands_str) +nerv.printf("test_iter:\t%s\n", tostring(test_iter)) +nerv.printf("random_seed:\t%s\n", tostring(random_seed)) +nerv.printf("sample_num:\t%s\n", tostring(sample_num)) +nerv.printf("out_fn:\t%s\n", tostring(out_fn)) +nerv.printf("%s printing training scheduling end.\n", global_conf.sche_log_pre) +nerv.LMUtil.wait(2) +------------------printing options end------------------------------ + +math.randomseed(random_seed) + +local vocab = nerv.LMVocab() +global_conf["vocab"] = vocab +sm_conf["vocab"] = global_conf.vocab +nerv.printf("%s building vocab...\n", global_conf.sche_log_pre) +global_conf.vocab:build_file(global_conf.vocab_fn, false) +ppl_rec = {} + +local final_iter = -1 +if commands["test"] == 1 then + nerv.printf("===FINAL TEST===\n") + global_conf.sche_log_pre = "[SCHEDULER FINAL_TEST]:" + local tnn = load_net_tnn(global_conf, global_conf.fn_to_sample) + global_conf.dropout_rate = 0 + LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update! +end --if commands["test"] + +if commands["sampling"] == 1 then + nerv.printf("===SAMPLE===\n") + global_conf.sche_log_pre = "[SCHEDULER SAMPLING]:" + local sampler = prepare_sampler(sm_conf) + local out_fh = nil + if out_fn ~= nil then + out_fh = assert(io.open(out_fn, "w")) + nerv.printf("%s outputing samples to file \"%s\"...\n", global_conf.sche_log_pre, out_fn) + end + for k = 1, sample_num do + local res = sampler:lm_sample_rnn_dagL(1, {}) + for i = 1, #res do + if out_fh == nil then nerv.printf("lm_sampler_output_sample: ") end + for j = 1, #res[i] do + if out_fh == nil then + nerv.printf("%s %f ", res[i][j].w, res[i][j].p) + else + out_fh:write(nerv.sprintf("%s %f ", res[i][j].w, res[i][j].p)) + end + end + if out_fh == nil then + nerv.printf("\n") + else + out_fh:write(nerv.sprintf("\n")) + end + end + if k % 10000 == 0 and out_fh ~= nil then nerv.printf("%s %d sample done\n", global_conf.sche_log_pre, k) end + end + + if out_fh ~= nil then out_fh:close() end + nerv.printf("%s complete,bye\n", global_conf.sche_log_pre) + --global_conf.dropout_rate = 0 + --LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update! +end --if commands["sampling"] + + diff --git a/nerv/examples/lmptb/m-tests/lmseqreader_test.lua b/nerv/examples/lmptb/m-tests/lmseqreader_test.lua index 9127559..3f99741 100644 --- a/nerv/examples/lmptb/m-tests/lmseqreader_test.lua +++ b/nerv/examples/lmptb/m-tests/lmseqreader_test.lua @@ -7,7 +7,7 @@ local test_fn = "/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-te --local test_fn = "/home/slhome/txh18/workspace/nerv-project/nerv/examples/lmptb/PTBdata/ptb.train.txt" local vocab = nerv.LMVocab() vocab:build_file(test_fn) -local chunk_size = 20 +local chunk_size = 15 local batch_size = 3 local global_conf = { lrate = 1, wcost = 1e-6, momentum = 0, @@ -30,7 +30,8 @@ local global_conf = { vocab = vocab } -local reader = nerv.LMSeqReader(global_conf, batch_size, chunk_size, vocab, {["se_mode"] = true}) +local reader = nerv.LMSeqReader(global_conf, batch_size, chunk_size, vocab, + {["se_mode"] = true, ["same_io"] = true}) reader:open_file(test_fn) local feeds = {} feeds.flags_now = {} @@ -40,14 +41,15 @@ for j = 1, chunk_size do feeds.inputs_m[j] = {global_conf.cumat_type(batch_size, 1), global_conf.cumat_type(batch_size, global_conf.vocab:size())} feeds.flags_now[j] = {} end -while (1) do +for k = 1, 5 do local r = reader:get_batch(feeds) if (r == false) then break end for j = 1, chunk_size, 1 do for i = 1, batch_size, 1 do - printf("%s[L(%s)] ", feeds.inputs_s[j][i], feeds.labels_s[j][i]) --vocab:get_word_str(input[i][j]).id + printf("%s[L(%s)]F%d ", feeds.inputs_s[j][i], feeds.labels_s[j][i], feeds.flags_now[j][i]) --vocab:get_word_str(input[i][j]).id end printf("\n") end printf("\n") end +printf("reader.sen_start %s\n", tostring(reader.stat.al_sen_start)) diff --git a/nerv/examples/lmptb/m-tests/some-text b/nerv/examples/lmptb/m-tests/some-text index da4bea9..6756fa0 100644 --- a/nerv/examples/lmptb/m-tests/some-text +++ b/nerv/examples/lmptb/m-tests/some-text @@ -1,4 +1,4 @@ - aa bb cc aa bb cc aa bb cc aa bb cc aa bb cc aa + aa bb cc aa bb cc aa bb cc aa bb cc aa aa bb cc aa bb cc aa bb cc aa bb cc aa bb cc aa bb cc aa aa bb cc aa diff --git a/nerv/examples/lmptb/rnnlm_ptb_main.lua b/nerv/examples/lmptb/rnnlm_ptb_main.lua index dc011fb..a1d9471 100644 --- a/nerv/examples/lmptb/rnnlm_ptb_main.lua +++ b/nerv/examples/lmptb/rnnlm_ptb_main.lua @@ -197,6 +197,43 @@ global_conf = { work_dir_base = root_dir .. '/ptb/EXP-nerv/rnnlm_tnn' } +elseif (set == "twitter") then + +data_dir = root_dir .. '/twitter_new/DATA' +train_fn = data_dir .. '/twitter.choose.adds' +valid_fn = data_dir .. '/twitter.valid.adds' +test_fn = data_dir .. '/comm.test.choose-ppl.adds' +vocab_fn = data_dir .. '/twitter.choose.train.vocab' + +--qdata_dir = root_dir .. '/ptb/questionGen/gen' + +global_conf = { + lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + nn_act_default = 0, + + hidden_size = 300, + layer_num = 1, + chunk_size = 15, + batch_size = 32, + max_iter = 30, + lr_decay = 1.003, + decay_iter = 10, + param_random = function() return (math.random() / 5 - 0.1) end, + dropout_str = "0.5", + + train_fn = train_fn, + valid_fn = valid_fn, + test_fn = test_fn, + vocab_fn = vocab_fn, + max_sen_len = 32, + sche_log_pre = "[SCHEDULER]:", + log_w_num = 40000, --give a message when log_w_num words have been processed + timer = nerv.Timer(), + work_dir_base = root_dir .. '/twitter_new/EXP-nerv/rnnlm_v1.0' +} + elseif (set == "msr_sc") then data_dir = '/home/slhome/txh18/workspace/sentenceCompletion/DATA_PV2' diff --git a/nerv/examples/lmptb/sample_grulm_ptb_main.lua b/nerv/examples/lmptb/sample_grulm_ptb_main.lua deleted file mode 100644 index 9a13d36..0000000 --- a/nerv/examples/lmptb/sample_grulm_ptb_main.lua +++ /dev/null @@ -1,440 +0,0 @@ -require 'lmptb.lmvocab' -require 'lmptb.lmfeeder' -require 'lmptb.lmutil' -require 'lmptb.layer.init' ---require 'tnn.init' -require 'lmptb.lmseqreader' -require 'lm_trainer' -require 'lm_sampler' - ---[[global function rename]]-- ---local printf = nerv.printf -local LMTrainer = nerv.LMTrainer ---[[global function rename ends]]-- - -function prepare_parameters(global_conf, fn) - nerv.printf("%s preparing parameters...\n", global_conf.sche_log_pre) - - global_conf.paramRepo = nerv.ParamRepo() - local paramRepo = global_conf.paramRepo - - nerv.printf("%s loading parameter from file %s...\n", global_conf.sche_log_pre, fn) - paramRepo:import({fn}, nil, global_conf) - - nerv.printf("%s preparing parameters end.\n", global_conf.sche_log_pre) - - return nil -end - ---global_conf: table ---Returns: nerv.LayerRepo -function prepare_layers(global_conf) - nerv.printf("%s preparing layers...\n", global_conf.sche_log_pre) - - local pr = global_conf.paramRepo - - local du = false - - --local recurrentLconfig = {{["bp"] = "bp_h", ["ltp_hh"] = "ltp_hh"}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["break_id"] = global_conf.vocab:get_sen_entry().id, ["independent"] = global_conf.independent, ["clip"] = 10}} - --local recurrentLconfig = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["clip"] = 10, ["direct_update"] = du, ["pr"] = pr}} - - local layers = { - ["nerv.GRULayerT"] = { - ["gruL1"] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["pr"] = pr}}, - }, - - ["nerv.DropoutLayerT"] = { - ["dropoutL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}, - }, - - ["nerv.SelectLinearLayer"] = { - ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}, ["vocab"] = global_conf.vocab, ["pr"] = pr}}, - }, - - ["nerv.CombinerLayer"] = { - ["combinerL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}}, - }, - - ["nerv.AffineLayer"] = { - ["outputL"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.vocab:size()}, ["direct_update"] = du, ["pr"] = pr}}, - }, - - ["nerv.SoftmaxCELayerT"] = { - ["softmaxL"] = {{}, {["dim_in"] = {global_conf.vocab:size(), global_conf.vocab:size()}, ["dim_out"] = {1}}}, - }, - } - - for l = 2, global_conf.layer_num do - layers["nerv.DropoutLayerT"]["dropoutL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}} - layers["nerv.GRULayerT"]["gruL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["pr"] = pr}} - layers["nerv.CombinerLayer"]["combinerL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}} - end - --[[ --we do not need those in the new tnn framework - printf("%s adding %d bptt layers...\n", global_conf.sche_log_pre, global_conf.bptt) - for i = 1, global_conf.bptt do - layers["nerv.IndRecurrentLayer"]["recurrentL" .. (i + 1)] = recurrentLconfig - layers["nerv.SigmoidLayer"]["sigmoidL" .. (i + 1)] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}} - layers["nerv.SelectLinearLayer"]["selectL" .. (i + 1)] = {{["ltp"] = "ltp_ih"}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}}} - end - --]] - - local layerRepo = nerv.LayerRepo(layers, pr, global_conf) - nerv.printf("%s preparing layers end.\n", global_conf.sche_log_pre) - return layerRepo -end - ---global_conf: table ---layerRepo: nerv.LayerRepo ---Returns: a nerv.TNN -function prepare_tnn(global_conf, layerRepo) - nerv.printf("%s Generate and initing TNN ...\n", global_conf.sche_log_pre) - - --input: input_w, input_w, ... input_w_now, last_activation - local connections_t = { - {"[1]", "selectL1[1]", 0}, - - --{"selectL1[1]", "recurrentL1[1]", 0}, - --{"recurrentL1[1]", "sigmoidL1[1]", 0}, - --{"sigmoidL1[1]", "combinerL1[1]", 0}, - --{"combinerL1[1]", "recurrentL1[2]", 1}, - - {"selectL1[1]", "gruL1[1]", 0}, - {"gruL1[1]", "combinerL1[1]", 0}, - {"combinerL1[1]", "gruL1[2]", 1}, - {"combinerL1[2]", "dropoutL1[1]", 0}, - - {"dropoutL"..global_conf.layer_num.."[1]", "outputL[1]", 0}, - {"outputL[1]", "softmaxL[1]", 0}, - {"[2]", "softmaxL[2]", 0}, - {"softmaxL[1]", "[1]", 0} - } - - for l = 2, global_conf.layer_num do - table.insert(connections_t, {"dropoutL"..(l-1).."[1]", "gruL"..l.."[1]", 0}) - table.insert(connections_t, {"gruL"..l.."[1]", "combinerL"..l.."[1]", 0}) - table.insert(connections_t, {"combinerL"..l.."[1]", "gruL"..l.."[2]", 1}) - table.insert(connections_t, {"combinerL"..l.."[2]", "dropoutL"..l.."[1]", 0}) - end - - --[[ - printf("%s printing DAG connections:\n", global_conf.sche_log_pre) - for key, value in pairs(connections_t) do - printf("\t%s->%s\n", key, value) - end - ]]-- - - local tnn = nerv.TNN("TNN", global_conf, {["dim_in"] = {1, global_conf.vocab:size()}, - ["dim_out"] = {1}, ["sub_layers"] = layerRepo, - ["connections"] = connections_t, ["clip_t"] = global_conf.clip_t, - }) - - tnn:init(global_conf.batch_size, global_conf.chunk_size) - - nerv.printf("%s Initing TNN end.\n", global_conf.sche_log_pre) - return tnn -end - -function prepare_dagL(global_conf, layerRepo) - nerv.printf("%s Generate and initing dagL ...\n", global_conf.sche_log_pre) - - --input: input_w, input_w, ... input_w_now, last_activation - local connections_t = { - ["[1]"] = "selectL1[1]", - - ["selectL1[1]"] = "gruL1[1]", - ["gruL1[1]"] = "combinerL1[1]", - ["[2]"] = "gruL1[2]", - --{"combinerL1[2]", "dropoutL1[1]", 0}, - - ["combinerL" .. global_conf.layer_num .. "[1]"] = "outputL[1]", - ["outputL[1]"] = "[1]", - ["combinerL1[2]"] = "[2]", - } - - if global_conf.layer_num > 1 then - nerv.error("multiple layer is currently not supported(not hard to implement though)") - end - --[[ - for l = 2, global_conf.layer_num do - table.insert(connections_t, {"dropoutL"..(l-1).."[1]", "gruL"..l.."[1]", 0}) - table.insert(connections_t, {"gruL"..l.."[1]", "combinerL"..l.."[1]", 0}) - table.insert(connections_t, {"combinerL"..l.."[1]", "gruL"..l.."[2]", 1}) - table.insert(connections_t, {"combinerL"..l.."[2]", "dropoutL"..l.."[1]", 0}) - end - ]]-- - - --[[ - printf("%s printing DAG connections:\n", global_conf.sche_log_pre) - for key, value in pairs(connections_t) do - printf("\t%s->%s\n", key, value) - end - ]]-- - - local dagL = nerv.DAGLayerT("dagL", global_conf, {["dim_in"] = {1, global_conf.hidden_size}, - ["dim_out"] = {global_conf.vocab:size(), global_conf.hidden_size}, ["sub_layers"] = layerRepo, - ["connections"] = connections_t - }) - - dagL:init(global_conf.batch_size) - - nerv.printf("%s Initing DAGL end.\n", global_conf.sche_log_pre) - return dagL -end - -function load_net_tnn(global_conf, fn) - prepare_parameters(global_conf, fn) - local layerRepo = prepare_layers(global_conf) - local tnn = prepare_tnn(global_conf, layerRepo) - return tnn -end - -function load_net_dagL(global_conf, fn) - prepare_parameters(global_conf, fn) - local layerRepo = prepare_layers(global_conf) - local dagL = prepare_dagL(global_conf, layerRepo) - return dagL -end - -local train_fn, valid_fn, test_fn -global_conf = {} -local set = arg[1] --"test" - -root_dir = '/home/slhome/txh18/workspace' - -if (set == "ptb") then - -data_dir = root_dir .. '/ptb/DATA' -train_fn = data_dir .. '/ptb.train.txt.adds' -valid_fn = data_dir .. '/ptb.valid.txt.adds' -test_fn = data_dir .. '/ptb.test.txt.adds' -vocab_fn = data_dir .. '/vocab' - -qdata_dir = root_dir .. '/ptb/questionGen/gen' - -global_conf = { - lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5, - cumat_type = nerv.CuMatrixFloat, - mmat_type = nerv.MMatrixFloat, - nn_act_default = 0, - - hidden_size = 300, - layer_num = 1, - chunk_size = 15, - batch_size = 32, - max_iter = 35, - lr_decay = 1.003, - decay_iter = 10, - param_random = function() return (math.random() / 5 - 0.1) end, - dropout_str = "0.5", - - train_fn = train_fn, - valid_fn = valid_fn, - test_fn = test_fn, - vocab_fn = vocab_fn, - max_sen_len = 90, - sche_log_pre = "[SCHEDULER]:", - log_w_num = 40000, --give a message when log_w_num words have been processed - timer = nerv.Timer(), - work_dir_base = root_dir .. '/ptb/EXP-nerv/grulm_v1.0', - - fn_to_sample = root_dir .. '/ptb/EXP-nerv/grulm_v1.0h300l1ch15ba32slr0.15wc1e-05dr0.5/params.final', -} - -elseif (set == "msr_sc") then - -data_dir = '/home/slhome/txh18/workspace/sentenceCompletion/DATA_PV2' -train_fn = data_dir .. '/normed_all.sf.len60.adds.train' -valid_fn = data_dir .. '/normed_all.sf.len60.adds.dev' -test_fn = data_dir .. '/answer_normed.adds' -vocab_fn = data_dir .. '/normed_all.choose.vocab30000.addqvocab' - -global_conf = { - lrate = 1, wcost = 1e-6, momentum = 0, - cumat_type = nerv.CuMatrixFloat, - mmat_type = nerv.MMatrixFloat, - nn_act_default = 0, - - hidden_size = 300, - layer_num = 1, - chunk_size = 15, - batch_size = 10, - max_iter = 30, - decay_iter = 10, - lr_decay = 1.003, - param_random = function() return (math.random() / 5 - 0.1) end, - dropout_str = "0", - - train_fn = train_fn, - valid_fn = valid_fn, - test_fn = test_fn, - vocab_fn = vocab_fn, - sche_log_pre = "[SCHEDULER]:", - log_w_num = 400000, --give a message when log_w_num words have been processed - timer = nerv.Timer(), - work_dir_base = '/home/slhome/txh18/workspace/sentenceCompletion/EXP-Nerv/rnnlm_test' -} - -elseif (set == "twitter") then - -data_dir = root_dir .. '/twitter_new/DATA' -train_fn = data_dir .. '/twitter.choose2.adds' -valid_fn = data_dir .. '/twitter.valid.adds' -test_fn = data_dir .. '/comm.test.choose-ppl.adds' -vocab_fn = data_dir .. '/twitter.choose.train.vocab' - ---qdata_dir = root_dir .. '/ptb/questionGen/gen' - -global_conf = { - lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5, - cumat_type = nerv.CuMatrixFloat, - mmat_type = nerv.MMatrixFloat, - nn_act_default = 0, - - hidden_size = 300, - layer_num = 1, - chunk_size = 15, - batch_size = 32, - max_iter = 30, - lr_decay = 1.003, - decay_iter = 10, - param_random = function() return (math.random() / 5 - 0.1) end, - dropout_str = "0.5", - - train_fn = train_fn, - valid_fn = valid_fn, - test_fn = test_fn, - vocab_fn = vocab_fn, - max_sen_len = 32, - sche_log_pre = "[SCHEDULER]:", - log_w_num = 40000, --give a message when log_w_num words have been processed - timer = nerv.Timer(), - work_dir_base = root_dir .. '/twitter_new/EXP-nerv/grulm_v1.0' -} - -else - -valid_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text-chn' -train_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text-chn' -test_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text-chn' -vocab_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text-chn' - -global_conf = { - lrate = 0.01, wcost = 1e-5, momentum = 0, - cumat_type = nerv.CuMatrixFloat, - mmat_type = nerv.MMatrixFloat, - nn_act_default = 0, - - hidden_size = 20, - layer_num = 1, - chunk_size = 2, - batch_size = 10, - max_iter = 3, - param_random = function() return (math.random() / 5 - 0.1) end, - dropout_str = "0", - - train_fn = train_fn, - valid_fn = valid_fn, - test_fn = test_fn, - max_sen_len = 80, - lr_decay = 1.003, - decay_iter = 10, - vocab_fn = vocab_fn, - sche_log_pre = "[SCHEDULER]:", - log_w_num = 10, --give a message when log_w_num words have been processed - timer = nerv.Timer(), - work_dir_base = '/home/slhome/txh18/workspace/nerv/play/testEXP/tnn_lstmlm_test' -} - -end - -lr_half = false --can not be local, to be set by loadstring -start_iter = -1 -start_lr = nil -ppl_last = 100000 -commands_str = "sampling" --"train:test" -commands = {} -test_iter = -1 ---for testout(question) -q_file = "/home/slhome/txh18/workspace/ptb/questionGen/gen/ptb.test.txt.q10rs1_Msss.adds" - -if arg[2] ~= nil then - nerv.printf("%s applying arg[2](%s)...\n", global_conf.sche_log_pre, arg[2]) - loadstring(arg[2])() - nerv.LMUtil.wait(0.5) -else - nerv.printf("%s no user setting, all default...\n", global_conf.sche_log_pre) -end - -global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'l' .. global_conf.layer_num .. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate .. 'wc' .. global_conf.wcost .. 'dr' .. global_conf.dropout_str -global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf' -global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak' -global_conf.param_fn = global_conf.work_dir .. "/params" -global_conf.dropout_list = nerv.SUtil.parse_schedule(global_conf.dropout_str) -global_conf.log_fn = global_conf.work_dir .. '/log_lstm_tnn_' .. commands_str ..os.date("_TT%m_%d_%X",os.time()) -global_conf.log_fn, _ = string.gsub(global_conf.log_fn, ':', '-') -commands = nerv.SUtil.parse_commands_set(commands_str) - -if start_lr ~= nil then - global_conf.lrate = start_lr -end - ---[[ ---redirecting log outputs! -nerv.SUtil.log_redirect(global_conf.log_fn) -nerv.LMUtil.wait(2) -]]-- - -----------------printing options--------------------------------- -nerv.printf("%s printing global_conf...\n", global_conf.sche_log_pre) -for id, value in pairs(global_conf) do - nerv.printf("%s:\t%s\n", id, tostring(value)) -end -nerv.LMUtil.wait(2) - -nerv.printf("%s printing training scheduling options...\n", global_conf.sche_log_pre) -nerv.printf("lr_half:\t%s\n", tostring(lr_half)) -nerv.printf("start_iter:\t%s\n", tostring(start_iter)) -nerv.printf("ppl_last:\t%s\n", tostring(ppl_last)) -nerv.printf("commands_str:\t%s\n", commands_str) -nerv.printf("test_iter:\t%s\n", tostring(test_iter)) -nerv.printf("%s printing training scheduling end.\n", global_conf.sche_log_pre) -nerv.LMUtil.wait(2) -------------------printing options end------------------------------ - -math.randomseed(1) - -local vocab = nerv.LMVocab() -global_conf["vocab"] = vocab -nerv.printf("%s building vocab...\n", global_conf.sche_log_pre) -global_conf.vocab:build_file(global_conf.vocab_fn, false) -ppl_rec = {} - -local final_iter = -1 -if commands["test"] == 1 then - nerv.printf("===FINAL TEST===\n") - global_conf.sche_log_pre = "[SCHEDULER FINAL_TEST]:" - local tnn = load_net_tnn(global_conf, global_conf.fn_to_sample) - global_conf.dropout_rate = 0 - LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update! -end --if commands["test"] - -if commands["sampling"] == 1 then - nerv.printf("===SAMPLE===\n") - global_conf.sche_log_pre = "[SCHEDULER SAMPLING]:" - local dagL = load_net_dagL(global_conf, global_conf.fn_to_sample) - local sampler = nerv.LMSampler(global_conf) - sampler:load_dagL(dagL) - for k = 1, 5 do - local res = sampler:lm_sample_rnn_dagL(10, {}) - for i = 1, #res do - for j = 1, #res[i] do - nerv.printf("%s ", res[i][j].w) - end - nerv.printf("\n") - end - end - --global_conf.dropout_rate = 0 - --LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update! -end --if commands["sampling"] - - diff --git a/nerv/lib/matrix/cumatrix.c b/nerv/lib/matrix/cumatrix.c index 04205e4..58bdfe7 100644 --- a/nerv/lib/matrix/cumatrix.c +++ b/nerv/lib/matrix/cumatrix.c @@ -9,6 +9,14 @@ static cudaEvent_t profile_start, profile_stop; curandGenerator_t curand_gen; static HashMap *profile; +void nerv_cumatrix_select_gpu(int dev, Status *status) { + fprintf(stderr, "** selecting GPU %d\n", dev); + NERV_SET_STATUS(status, NERV_NORMAL, 0); + CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status); + CUDA_SAFE_SYNC_CALL(cublasDestroy(cublas_handle), status); + CUDA_SAFE_SYNC_CALL(cublasCreate(&cublas_handle), status); +} + void nerv_cumatrix_print_profile() { size_t i; fprintf(stderr, "*** [nerv cumatrix profile] **\n"); diff --git a/nerv/lib/matrix/generic/cukernel.cu b/nerv/lib/matrix/generic/cukernel.cu index 8fbe05d..51e3b6a 100644 --- a/nerv/lib/matrix/generic/cukernel.cu +++ b/nerv/lib/matrix/generic/cukernel.cu @@ -383,6 +383,20 @@ __global__ void cudak_(copy_rows_by_colidx)(const MATRIX_ELEM *a, MATRIX_ELEM *b b[j + i * stride] = a[j + k * stride]; } +__global__ void cudak_(prefixsum_row_reduce)(const MATRIX_ELEM *a, MATRIX_ELEM *b, + int nrow, int ncol, int stride_a, int stride_b, int offset) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + long idx_a, idx_b; + if (i >= nrow || j >= ncol) return; + idx_b = j + i * stride_b; + idx_a = j + i * stride_a; + //b[idx] = 1.0 / (1.0 + exp(-a[idx])); + if (j >= offset) + b[idx_b] = a[idx_a] + a[idx_a - offset]; + else + b[idx_b] = a[idx_a]; +} extern "C" { #include "../cukernel.h" @@ -745,6 +759,40 @@ extern "C" { cudaStreamSynchronize(0); } + void cudak_(cuda_prefixsum_row)(const Matrix *a, Matrix *b) { + dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), + CEIL_DIV(b->nrow, threadsPerBlock.y)); + + MATRIX_ELEM *tmp[2]; + size_t tmp_stride[2]; + cudaMallocPitch(tmp, tmp_stride + 0, a->ncol * sizeof(MATRIX_ELEM), a->nrow); + cudaMallocPitch(tmp + 1, tmp_stride + 1, a->ncol * sizeof(MATRIX_ELEM), a->nrow); + + int offset = 1; + cudak_(prefixsum_row_reduce)<<>> \ + (MATRIX_ELEM_PTR(a), tmp[0], b->nrow, b->ncol, + a->stride / sizeof(MATRIX_ELEM), tmp_stride[0] / sizeof(MATRIX_ELEM), offset); + int pin = 0, pout = 1; + + for (offset = 2;offset <= a->ncol / 2;offset *= 2) { + cudak_(prefixsum_row_reduce)<<>> \ + (tmp[pin], tmp[pout], b->nrow, b->ncol, + tmp_stride[pin] / sizeof(MATRIX_ELEM), tmp_stride[pout] / sizeof(MATRIX_ELEM), offset); + pin = 1 - pin; + pout = 1 - pout; + } + + cudak_(prefixsum_row_reduce)<<>> \ + (tmp[pin], MATRIX_ELEM_PTR(b), b->nrow, b->ncol, + tmp_stride[pin] / sizeof(MATRIX_ELEM), b->stride / sizeof(MATRIX_ELEM), offset); + + cudaFree(tmp[0]); + cudaFree(tmp[1]); + + cudaStreamSynchronize(0); + } + void cudak_(cuda_decompress)(const Matrix *a, Matrix *b) { dim3 threadsPerBlock(1, CUDA_THREADS_NN); dim3 numBlocks(1, CEIL_DIV(a->nrow, threadsPerBlock.y)); diff --git a/nerv/lib/matrix/generic/cumatrix.c b/nerv/lib/matrix/generic/cumatrix.c index bf93b77..7b70607 100644 --- a/nerv/lib/matrix/generic/cumatrix.c +++ b/nerv/lib/matrix/generic/cumatrix.c @@ -486,6 +486,14 @@ void nerv_matrix_(scale_rows_by_row)(Matrix *a, const Matrix *b, NERV_SET_STATUS(status, NERV_NORMAL, 0); } +void nerv_matrix_(prefixsum_row)(Matrix *a, const Matrix *b, Status *status) { + CHECK_SAME_DIMENSION(a, b, status); + PROFILE_START + cudak_(cuda_prefixsum_row)(b, a); + PROFILE_STOP + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + static void cuda_matrix_(free)(MATRIX_ELEM *ptr, Status *status) { CUDA_SAFE_SYNC_CALL(cudaFree(ptr), status); NERV_SET_STATUS(status, NERV_NORMAL, 0); diff --git a/nerv/lib/matrix/generic/cumatrix.h b/nerv/lib/matrix/generic/cumatrix.h index 4f66a2c..5b8076f 100644 --- a/nerv/lib/matrix/generic/cumatrix.h +++ b/nerv/lib/matrix/generic/cumatrix.h @@ -61,6 +61,7 @@ void nerv_matrix_(scale_rows_by_col)(Matrix *a, const Matrix *b, Status *status); void nerv_matrix_(scale_rows_by_row)(Matrix *a, const Matrix *b, Status *status); +void nerv_matrix_(prefixsum_row)(Matrix *a, const Matrix *b, Status *status); void nerv_matrix_(thres_mask)(Matrix *a, Matrix *b, double thres, double low, double high, Status *status); diff --git a/nerv/matrix/cumatrix.c b/nerv/matrix/cumatrix.c index bf92f92..7f22d68 100644 --- a/nerv/matrix/cumatrix.c +++ b/nerv/matrix/cumatrix.c @@ -8,6 +8,14 @@ static cublasHandle_t cublas_handle; static cudaEvent_t profile_start, profile_stop; static HashMap *profile; +static int select_gpu(lua_State *L) { + Status status; + int dev = luaL_checkinteger(L, 1); + nerv_cumatrix_select_gpu(dev, &status); + NERV_LUA_CHECK_STATUS(L, status); + return 0; +} + static int print_profile(lua_State *L) { nerv_cumatrix_print_profile(); return 0; @@ -21,6 +29,7 @@ static int clear_profile(lua_State *L) { static const luaL_Reg cumatrix_methods[] = { {"print_profile", print_profile}, {"clear_profile", clear_profile}, + {"select_gpu", select_gpu}, {NULL, NULL} }; diff --git a/nerv/matrix/generic/cumatrix.c b/nerv/matrix/generic/cumatrix.c index cb55901..b706c21 100644 --- a/nerv/matrix/generic/cumatrix.c +++ b/nerv/matrix/generic/cumatrix.c @@ -15,6 +15,15 @@ static int nerv_matrix_(lua_get_blas_op)(char ch) { return (ch == 'T' || ch == 't') ? CUBLAS_OP_T : CUBLAS_OP_N; } +static int nerv_matrix_(lua_prefixsum_row)(lua_State *L) { + Status status; + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname)); + nerv_matrix_(prefixsum_row)(a, b, &status); + NERV_LUA_CHECK_STATUS(L, status); + return 0; +} + static int nerv_matrix_(lua_thres_mask)(lua_State *L) { Status status; Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); @@ -230,6 +239,7 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = { {"rearrange_frm", nerv_matrix_(lua_rearrange_frm)}, {"scale_rows_by_row", nerv_matrix_(lua_scale_rows_by_row)}, {"scale_rows_by_col", nerv_matrix_(lua_scale_rows_by_col)}, + {"prefixsum_row", nerv_matrix_(lua_prefixsum_row)}, #ifdef __NERV_FUTURE_CUDA_7 {"update_select_rows_by_rowidx", nerv_matrix_(lua_update_select_rows_by_rowidx)}, {"update_select_rows_by_colidx", nerv_matrix_(lua_update_select_rows_by_colidx)}, diff --git a/nerv/tnn/init.lua b/nerv/tnn/init.lua index b375fa8..7faca31 100644 --- a/nerv/tnn/init.lua +++ b/nerv/tnn/init.lua @@ -47,5 +47,6 @@ nerv.include('sutil.lua') nerv.include('tnn.lua') nerv.include('layersT/softmax_ce_t.lua') nerv.include('layersT/lstm_t.lua') +nerv.include('layersT/gru_t.lua') nerv.include('layersT/dropout_t.lua') nerv.include('layer_dag_t.lua') diff --git a/nerv/tnn/layersT/gru_t.lua b/nerv/tnn/layersT/gru_t.lua new file mode 100644 index 0000000..8f15cc8 --- /dev/null +++ b/nerv/tnn/layersT/gru_t.lua @@ -0,0 +1,114 @@ +local GRULayerT = nerv.class('nerv.GRULayerT', 'nerv.LayerT') + +function GRULayerT:__init(id, global_conf, layer_conf) + --input1:x input2:h input3:c(h^~) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + + if self.dim_in[2] ~= self.dim_out[1] then + nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)", self.dim_in[2], self.dim_out[1]) + end + + --prepare a DAGLayerT to hold the lstm structure + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo() + end + + local function ap(str) + return self.id .. '.' .. str + end + + local layers = { + ["nerv.CombinerLayer"] = { + [ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]}, + ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}}, + [ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, + ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, + [ap("updateGDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, + ["dim_out"] = {self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}}, + [ap("updateMergeL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]}, + ["lambda"] = {1, -1, 1}}}, + }, + ["nerv.AffineLayer"] = { + [ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]}, ["pr"] = pr}}, + }, + ["nerv.TanhLayer"] = { + [ap("mainTanhL")] = {{}, {["dim_in"] = {self.dim_out[1]}, ["dim_out"] = {self.dim_out[1]}}}, + }, + ["nerv.GateFLayer"] = { + [ap("resetGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, + ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}}, + [ap("updateGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, + ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}}, + }, + ["nerv.ElemMulLayer"] = { + [ap("resetGMulL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, + [ap("updateGMulCL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, + [ap("updateGMulHL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}}, + }, + } + + local layerRepo = nerv.LayerRepo(layers, pr, global_conf) + + local connections_t = { + ["[1]"] = ap("inputXDup[1]"), + ["[2]"] = ap("inputHDup[1]"), + + [ap("inputXDup[1]")] = ap("resetGateL[1]"), + [ap("inputHDup[1]")] = ap("resetGateL[2]"), + [ap("inputXDup[2]")] = ap("updateGateL[1]"), + [ap("inputHDup[2]")] = ap("updateGateL[2]"), + [ap("updateGateL[1]")] = ap("updateGDup[1]"), + + [ap("resetGateL[1]")] = ap("resetGMulL[1]"), + [ap("inputHDup[3]")] = ap("resetGMulL[2]"), + + [ap("inputXDup[3]")] = ap("mainAffineL[1]"), + [ap("resetGMulL[1]")] = ap("mainAffineL[2]"), + [ap("mainAffineL[1]")] = ap("mainTanhL[1]"), + + [ap("updateGDup[1]")] = ap("updateGMulHL[1]"), + [ap("inputHDup[4]")] = ap("updateGMulHL[2]"), + [ap("updateGDup[2]")] = ap("updateGMulCL[1]"), + [ap("mainTanhL[1]")] = ap("updateGMulCL[2]"), + + [ap("inputHDup[5]")] = ap("updateMergeL[1]"), + [ap("updateGMulHL[1]")] = ap("updateMergeL[2]"), + [ap("updateGMulCL[1]")] = ap("updateMergeL[3]"), + + [ap("updateMergeL[1]")] = "[1]", + } + + self.dagL = nerv.DAGLayerT(self.id, global_conf, + {["dim_in"] = self.dim_in, ["dim_out"] = self.dim_out, ["sub_layers"] = layerRepo, + ["connections"] = connections_t}) + + self:check_dim_len(2, 1) -- x, h and h +end + +function GRULayerT:init(batch_size, chunk_size) + self.dagL:init(batch_size, chunk_size) +end + +function GRULayerT:batch_resize(batch_size, chunk_size) + self.dagL:batch_resize(batch_size, chunk_size) +end + +function GRULayerT:update(bp_err, input, output, t) + self.dagL:update(bp_err, input, output, t) +end + +function GRULayerT:propagate(input, output, t) + self.dagL:propagate(input, output, t) +end + +function GRULayerT:back_propagate(bp_err, next_bp_err, input, output, t) + self.dagL:back_propagate(bp_err, next_bp_err, input, output, t) +end + +function GRULayerT:get_params() + return self.dagL:get_params() +end -- cgit v1.2.3-70-g09d2