summaryrefslogtreecommitdiff
path: root/nerv
diff options
context:
space:
mode:
authorDeterminant <[email protected]>2016-02-17 20:14:06 +0800
committerDeterminant <[email protected]>2016-02-17 20:14:06 +0800
commit0ee43c21af4fcd3aed070b1f5ad1eb9feb2ad159 (patch)
treeceb1d38328767fb657bc0d37ec6e513b08a86277 /nerv
parent490a10c2130773bd022f05513fa2905b6a6c6e91 (diff)
try to merge manually
Diffstat (limited to 'nerv')
-rw-r--r--nerv/Makefile2
-rw-r--r--nerv/examples/lmptb/grulm_ptb_main.lua16
-rw-r--r--nerv/examples/lmptb/lm_sampler.lua60
-rw-r--r--nerv/examples/lmptb/lmptb/layer/init.lua2
-rw-r--r--nerv/examples/lmptb/lmptb/lmseqreader.lua34
-rw-r--r--nerv/examples/lmptb/lmptb/lmutil.lua13
-rw-r--r--nerv/examples/lmptb/lmptb/lmvocab.lua6
-rw-r--r--nerv/examples/lmptb/lstmlm_ptb_main.lua6
-rw-r--r--nerv/examples/lmptb/m-tests/lm_sampler_test.lua (renamed from nerv/examples/lmptb/sample_grulm_ptb_main.lua)167
-rw-r--r--nerv/examples/lmptb/m-tests/lmseqreader_test.lua10
-rw-r--r--nerv/examples/lmptb/m-tests/some-text2
-rw-r--r--nerv/examples/lmptb/rnnlm_ptb_main.lua37
-rw-r--r--nerv/lib/matrix/cumatrix.c8
-rw-r--r--nerv/lib/matrix/generic/cukernel.cu48
-rw-r--r--nerv/lib/matrix/generic/cumatrix.c8
-rw-r--r--nerv/lib/matrix/generic/cumatrix.h1
-rw-r--r--nerv/matrix/cumatrix.c9
-rw-r--r--nerv/matrix/generic/cumatrix.c10
-rw-r--r--nerv/tnn/init.lua1
-rw-r--r--nerv/tnn/layersT/gru_t.lua (renamed from nerv/examples/lmptb/lmptb/layer/gru_t.lua)0
20 files changed, 321 insertions, 119 deletions
diff --git a/nerv/Makefile b/nerv/Makefile
index a29309a..a472cfc 100644
--- a/nerv/Makefile
+++ b/nerv/Makefile
@@ -36,7 +36,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \
nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \
io/sgd_buffer.lua \
tnn/init.lua tnn/layer_dag_t.lua tnn/sutil.lua tnn/tnn.lua \
- tnn/layersT/dropout_t.lua tnn/layersT/lstm_t.lua tnn/layersT/softmax_ce_t.lua
+ tnn/layersT/dropout_t.lua tnn/layersT/lstm_t.lua tnn/layersT/gru_t.lua tnn/layersT/softmax_ce_t.lua
INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK
#CUDA_BASE := /usr/local/cuda-7.0
diff --git a/nerv/examples/lmptb/grulm_ptb_main.lua b/nerv/examples/lmptb/grulm_ptb_main.lua
index ef5d7f9..4a3f39f 100644
--- a/nerv/examples/lmptb/grulm_ptb_main.lua
+++ b/nerv/examples/lmptb/grulm_ptb_main.lua
@@ -198,6 +198,7 @@ qdata_dir = root_dir .. '/ptb/questionGen/gen'
global_conf = {
lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5,
cumat_type = nerv.CuMatrixFloat,
+ select_gpu = 0,
mmat_type = nerv.MMatrixFloat,
nn_act_default = 0,
@@ -259,7 +260,7 @@ global_conf = {
elseif (set == "twitter") then
data_dir = root_dir .. '/twitter_new/DATA'
-train_fn = data_dir .. '/twitter.choose2.adds'
+train_fn = data_dir .. '/twitter.choose.adds'
valid_fn = data_dir .. '/twitter.valid.adds'
test_fn = data_dir .. '/comm.test.choose-ppl.adds'
vocab_fn = data_dir .. '/twitter.choose.train.vocab'
@@ -359,7 +360,14 @@ commands = nerv.SUtil.parse_commands_set(commands_str)
if start_lr ~= nil then
global_conf.lrate = start_lr
end
-
+
+nerv.printf("detecting gconf.select_gpu...\n")
+if global_conf.select_gpu then
+ nerv.printf("select gpu to %d\n", global_conf.select_gpu)
+ global_conf.cumat_type.select_gpu(global_conf.select_gpu)
+ nerv.LMUtil.wait(1)
+end
+
nerv.printf("%s creating work_dir(%s)...\n", global_conf.sche_log_pre, global_conf.work_dir)
nerv.LMUtil.wait(2)
os.execute("mkdir -p "..global_conf.work_dir)
@@ -388,10 +396,10 @@ nerv.LMUtil.wait(2)
math.randomseed(1)
-local vocab = nerv.LMVocab()
+local vocab = nerv.LMVocab(global_conf)
global_conf["vocab"] = vocab
nerv.printf("%s building vocab...\n", global_conf.sche_log_pre)
-global_conf.vocab:build_file(global_conf.vocab_fn, false)
+global_conf.vocab:build_file(global_conf.vocab_fn)
ppl_rec = {}
local final_iter = -1
diff --git a/nerv/examples/lmptb/lm_sampler.lua b/nerv/examples/lmptb/lm_sampler.lua
index c25a75c..c9adf85 100644
--- a/nerv/examples/lmptb/lm_sampler.lua
+++ b/nerv/examples/lmptb/lm_sampler.lua
@@ -3,31 +3,34 @@ local LMSampler = nerv.class('nerv.LMSampler')
function LMSampler:__init(global_conf)
self.log_pre = "LMSampler"
self.gconf = global_conf
+ self.batch_size = self.gconf.batch_size
+ self.chunk_size = self.gconf.chunk_size --largest sample sentence length
self.vocab = self.gconf.vocab
self.sen_end_token = self.vocab.sen_end_token
self.sen_end_id = self.vocab:get_word_str(self.sen_end_token).id
+
+ self.loaded = false
end
-function LMSampler:load_dagL(dagL)
- self.batch_size = self.gconf.batch_size
- self.chunk_size = self.gconf.chunk_size
-
+function LMSampler:load_dagL(dagL)
nerv.printf("%s loading dagL\n", self.log_pre)
self.dagL = dagL
+ self.dagL:init(self.batch_size)
self.dagL_inputs = {}
- self.dagL_inputs[1] = global_conf.cumat_type(global_conf.batch_size, 1)
+ self.dagL_inputs[1] = self.gconf.cumat_type(self.gconf.batch_size, 1)
self.dagL_inputs[1]:fill(self.sen_end_id - 1)
- self.dagL_inputs[2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size)
+ self.dagL_inputs[2] = self.gconf.cumat_type(self.gconf.batch_size, self.gconf.hidden_size)
self.dagL_inputs[2]:fill(0)
self.dagL_outputs = {}
- self.dagL_outputs[1] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size())
- self.dagL_outputs[2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size)
+ self.dagL_outputs[1] = self.gconf.cumat_type(self.gconf.batch_size, self.gconf.vocab:size())
+ self.dagL_outputs[2] = self.gconf.cumat_type(self.gconf.batch_size, self.gconf.hidden_size)
- self.smout_d = global_conf.cumat_type(self.batch_size, self.vocab:size())
- self.smout_h = global_conf.mmat_type(self.batch_size, self.vocab:size())
+ self.smout_d = self.gconf.cumat_type(self.batch_size, self.vocab:size())
+ self.ssout_d = self.gconf.cumat_type(self.batch_size, self.vocab:size())
+ self.ssout_h = self.gconf.mmat_type(self.batch_size, self.vocab:size())
self.store = {}
for i = 1, self.batch_size do
@@ -38,11 +41,31 @@ function LMSampler:load_dagL(dagL)
self.store[i][1].p = 0
end
self.repo = {}
+
+ self.loaded = true
end
-function LMSampler:sample_to_store(smout)
+function LMSampler:sample_to_store(ssout) --private
for i = 1, self.batch_size do
local ran = math.random()
+ local id = 1
+ local low = 0
+ local high = ssout:ncol() - 1
+ if ssout[i - 1][high] < 0.9999 or ssout[i - 1][high] > 1.0001 then
+ nerv.error("%s ERROR, softmax output summation(%f) seems to have some problem", self.log_pre, ssout[i - 1][high])
+ end
+ if ssout[i - 1][low] < ran then
+ while low + 1 < high do
+ local mid = math.floor((low + high) / 2)
+ if ssout[i - 1][mid] < ran then
+ low = mid
+ else
+ high = mid
+ end
+ end
+ id = high + 1
+ end
+ --[[
local s = 0
local id = self.vocab:size()
for j = 0, self.vocab:size() - 1 do
@@ -52,19 +75,25 @@ function LMSampler:sample_to_store(smout)
break
end
end
+ ]]--
if #self.store[i] >= self.chunk_size - 2 then
id = self.sen_end_id
end
local tmp = {}
tmp.w = self.vocab:get_word_id(id).str
tmp.id = id
- tmp.p = smout[i - 1][id - 1]
+ if id == 1 then
+ tmp.p = ssout[i - 1][id - 1]
+ else
+ tmp.p = ssout[i - 1][id - 1] - ssout[i - 1][id - 2]
+ end
table.insert(self.store[i], tmp)
end
end
---Returns: LMResult
function LMSampler:lm_sample_rnn_dagL(sample_num, p_conf)
+ assert(self.loaded == true)
+
local dagL = self.dagL
local inputs = self.dagL_inputs
local outputs = self.dagL_outputs
@@ -74,9 +103,10 @@ function LMSampler:lm_sample_rnn_dagL(sample_num, p_conf)
inputs[2]:copy_fromd(outputs[2]) --copy hidden activation
self.smout_d:softmax(outputs[1])
- self.smout_d:copy_toh(self.smout_h)
+ self.ssout_d:prefixsum_row(self.smout_d)
+ self.ssout_d:copy_toh(self.ssout_h)
- self:sample_to_store(self.smout_h)
+ self:sample_to_store(self.ssout_h)
for i = 1, self.batch_size do
inputs[1][i - 1][0] = self.store[i][#self.store[i]].id - 1
if self.store[i][#self.store[i]].id == self.sen_end_id then --meet a sentence end
diff --git a/nerv/examples/lmptb/lmptb/layer/init.lua b/nerv/examples/lmptb/lmptb/layer/init.lua
index b345244..ceae009 100644
--- a/nerv/examples/lmptb/lmptb/layer/init.lua
+++ b/nerv/examples/lmptb/lmptb/layer/init.lua
@@ -1,6 +1,6 @@
require 'lmptb.layer.select_linear'
require 'lmptb.layer.affine_recurrent_plusvec'
-require 'lmptb.layer.gru_t'
+--require 'lmptb.layer.gru_t'
require 'lmptb.layer.lm_affine_recurrent'
diff --git a/nerv/examples/lmptb/lmptb/lmseqreader.lua b/nerv/examples/lmptb/lmptb/lmseqreader.lua
index 0f29f8b..1272929 100644
--- a/nerv/examples/lmptb/lmptb/lmseqreader.lua
+++ b/nerv/examples/lmptb/lmptb/lmseqreader.lua
@@ -28,6 +28,10 @@ function LMReader:__init(global_conf, batch_size, chunk_size, vocab, r_conf)
if r_conf.compressed_label == true then
self.compressed_label = true
end
+ self.same_io = false
+ if r_conf.same_io == true then --can be used to train P(wi|w1..(i-1),(i+1)..n)
+ self.same_io = true
+ end
end
--fn: string
@@ -36,9 +40,9 @@ function LMReader:open_file(fn)
if (self.fh ~= nil) then
nerv.error("%s error: in open_file(fn is %s), file handle not nil.", self.log_pre, fn)
end
- printf("%s opening file %s...\n", self.log_pre, fn)
- print(self.log_pre, "batch_size:", self.batch_size, "chunk_size", self.chunk_size)
- print(self.log_pre, "se_mode:", self.se_mode)
+ nerv.printf("%s opening file %s...\n", self.log_pre, fn)
+ nerv.printf("%s batch_size:%d chunk_size:%d\n", self.log_pre, self.batch_size, self.chunk_size)
+ nerv.printf("%s se_mode:%s same_io:%s\n", self.log_pre, tostring(self.se_mode), tostring(self.same_io))
self.fh = io.open(fn, "r")
self.streams = {}
for i = 1, self.batch_size, 1 do
@@ -132,12 +136,15 @@ function LMReader:get_batch(feeds)
else
self:refresh_stream(i)
if st.store[st.head] ~= nil then
- inputs_s[j][i] = st.store[st.head]
- --inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head]).id - 1
- self.bak_inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head]).id - 1
+ if self.same_io == false then
+ inputs_s[j][i] = st.store[st.head]
+ self.bak_inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head]).id - 1
+ else
+ inputs_s[j][i] = st.store[st.head + 1]
+ self.bak_inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head + 1]).id - 1
+ end
else
inputs_s[j][i] = self.vocab.null_token
- --inputs_m[j][1][i - 1][0] = 0
self.bak_inputs_m[j][1][i - 1][0] = 0
end
if st.store[st.head + 1] ~= nil then
@@ -148,7 +155,7 @@ function LMReader:get_batch(feeds)
inputs_m[j][2][i - 1][self.vocab:get_word_str(st.store[st.head + 1]).id - 1] = 1
end
else
- if (inputs_s[j][i] ~= self.vocab.null_token) then
+ if inputs_s[j][i] ~= self.vocab.null_token then
nerv.error("reader error : input not null but label is null_token")
end
labels_s[j][i] = self.vocab.null_token
@@ -159,6 +166,9 @@ function LMReader:get_batch(feeds)
end
flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_NORM) --has both input and label
got_new = true
+ if st.store[st.head] == self.vocab.sen_end_token then
+ flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_START)
+ end
st.store[st.head] = nil
st.head = st.head + 1
if labels_s[j][i] == self.vocab.sen_end_token then
@@ -169,10 +179,7 @@ function LMReader:get_batch(feeds)
end_stream = true --meet sentence end, this stream ends now
end
end
- if inputs_s[j][i] == self.vocab.sen_end_token then
- flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_START)
- end
- end
+ end
end
end
end
@@ -190,7 +197,7 @@ function LMReader:get_batch(feeds)
--check for self.al_sen_start
for i = 1, self.batch_size do
- if inputs_s[1][i] ~= self.vocab.sen_end_token and inputs_s[1][i] ~= self.vocab.null_token then
+ if bit.band(flags[1][i], nerv.TNN.FC.SEQ_START) == 0 and flags[1][i] > 0 then
self.stat.al_sen_start = false
end
end
@@ -198,7 +205,6 @@ function LMReader:get_batch(feeds)
if got_new == false then
nerv.info("lmseqreader file ends, printing stats...")
nerv.printf("al_sen_start:%s\n", tostring(self.stat.al_sen_start))
-
return false
else
return true
diff --git a/nerv/examples/lmptb/lmptb/lmutil.lua b/nerv/examples/lmptb/lmptb/lmutil.lua
index 6d66d6e..13a5c45 100644
--- a/nerv/examples/lmptb/lmptb/lmutil.lua
+++ b/nerv/examples/lmptb/lmptb/lmutil.lua
@@ -112,10 +112,17 @@ end
--cla:string
--w:string
--prob:float, the probability
-function Result:add(cla, w, prob)
- self[cla].logp_all = self[cla].logp_all + math.log10(prob)
+function Result:add(cla, w, prob, log10ed)
+ local lp
+ if log10ed == true then
+ lp = prob
+ else
+ lp = math.log10(prob)
+ end
+
+ self[cla].logp_all = self[cla].logp_all + lp
if (self.vocab:is_unk_str(w)) then
- self[cla].logp_unk = self[cla].logp_unk + math.log10(prob)
+ self[cla].logp_unk = self[cla].logp_unk + lp
self[cla].cn_unk = self[cla].cn_unk + 1
end
if (w == self.vocab.sen_end_token) then
diff --git a/nerv/examples/lmptb/lmptb/lmvocab.lua b/nerv/examples/lmptb/lmptb/lmvocab.lua
index 0e7ef3e..38bb18e 100644
--- a/nerv/examples/lmptb/lmptb/lmvocab.lua
+++ b/nerv/examples/lmptb/lmptb/lmvocab.lua
@@ -2,8 +2,6 @@ require 'lmptb.lmutil'
local Vocab = nerv.class("nerv.LMVocab")
-local printf = nerv.printf
-
local mysplit = function(inputstr, sep)
if sep == nil then
sep = "%s"
@@ -106,7 +104,7 @@ end
--fn: string
--Add all words in fn to the vocab
function Vocab:build_file(fn)
- printf("%s Vocab building on file %s...\n", self.log_pre, fn)
+ nerv.printf("%s Vocab building on file %s...\n", self.log_pre, fn)
local file = io.open(fn, "r")
while (true) do
local list = nerv.LMUtil.read_line(file)
@@ -119,7 +117,7 @@ function Vocab:build_file(fn)
end
end
file:close()
- printf("%s Building finished, vocab size now is %d.\n", self.log_pre, self:size())
+ nerv.printf("%s Building finished, vocab size now is %d.\n", self.log_pre, self:size())
end
--[[test
diff --git a/nerv/examples/lmptb/lstmlm_ptb_main.lua b/nerv/examples/lmptb/lstmlm_ptb_main.lua
index 9bdd5ff..b576834 100644
--- a/nerv/examples/lmptb/lstmlm_ptb_main.lua
+++ b/nerv/examples/lmptb/lstmlm_ptb_main.lua
@@ -277,7 +277,7 @@ global_conf = {
hidden_size = 300,
layer_num = 1,
chunk_size = 15,
- batch_size = 20,
+ batch_size = 32,
max_iter = 35,
lr_decay = 1.003,
decay_iter = 10,
@@ -390,10 +390,10 @@ nerv.LMUtil.wait(2)
math.randomseed(1)
-local vocab = nerv.LMVocab()
+local vocab = nerv.LMVocab(global_conf)
global_conf["vocab"] = vocab
nerv.printf("%s building vocab...\n", global_conf.sche_log_pre)
-global_conf.vocab:build_file(global_conf.vocab_fn, false)
+global_conf.vocab:build_file(global_conf.vocab_fn)
ppl_rec = {}
local final_iter = -1
diff --git a/nerv/examples/lmptb/sample_grulm_ptb_main.lua b/nerv/examples/lmptb/m-tests/lm_sampler_test.lua
index 9a13d36..effb2ad 100644
--- a/nerv/examples/lmptb/sample_grulm_ptb_main.lua
+++ b/nerv/examples/lmptb/m-tests/lm_sampler_test.lua
@@ -134,10 +134,39 @@ function prepare_tnn(global_conf, layerRepo)
return tnn
end
-function prepare_dagL(global_conf, layerRepo)
- nerv.printf("%s Generate and initing dagL ...\n", global_conf.sche_log_pre)
+function load_net_tnn(global_conf, fn)
+ prepare_parameters(global_conf, fn)
+ local layerRepo = prepare_layers(global_conf)
+ local tnn = prepare_tnn(global_conf, layerRepo)
+ return tnn
+end
+
+function prepare_sampler(sm_conf)
+ sm_conf.pr = nerv.ParamRepo()
+ sm_conf.pr:import({sm_conf.fn_to_sample}, nil, sm_conf)
+
+ local layers = {
+ ["nerv.GRULayerT"] = {
+ ["gruL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size, sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size}, ["pr"] = sm_conf.pr}},
+ },
+ ["nerv.DropoutLayerT"] = {
+ ["dropoutL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size}}},
+ },
+ ["nerv.SelectLinearLayer"] = {
+ ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {sm_conf.hidden_size}, ["vocab"] = sm_conf.vocab, ["pr"] = sm_conf.pr}},
+ },
+ ["nerv.CombinerLayer"] = {
+ ["combinerL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size, sm_conf.hidden_size}, ["lambda"] = {1}}},
+ },
+ ["nerv.AffineLayer"] = {
+ ["outputL"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.vocab:size()}, ["pr"] = sm_conf.pr}},
+ },
+ ["nerv.SoftmaxCELayerT"] = {
+ ["softmaxL"] = {{}, {["dim_in"] = {sm_conf.vocab:size(), sm_conf.vocab:size()}, ["dim_out"] = {1}}},
+ },
+ }
+ local layerRepo = nerv.LayerRepo(layers, sm_conf.pr, sm_conf)
- --input: input_w, input_w, ... input_w_now, last_activation
local connections_t = {
["<input>[1]"] = "selectL1[1]",
@@ -151,48 +180,19 @@ function prepare_dagL(global_conf, layerRepo)
["combinerL1[2]"] = "<output>[2]",
}
- if global_conf.layer_num > 1 then
+ if sm_conf.layer_num > 1 then
nerv.error("multiple layer is currently not supported(not hard to implement though)")
end
- --[[
- for l = 2, global_conf.layer_num do
- table.insert(connections_t, {"dropoutL"..(l-1).."[1]", "gruL"..l.."[1]", 0})
- table.insert(connections_t, {"gruL"..l.."[1]", "combinerL"..l.."[1]", 0})
- table.insert(connections_t, {"combinerL"..l.."[1]", "gruL"..l.."[2]", 1})
- table.insert(connections_t, {"combinerL"..l.."[2]", "dropoutL"..l.."[1]", 0})
- end
- ]]--
-
- --[[
- printf("%s printing DAG connections:\n", global_conf.sche_log_pre)
- for key, value in pairs(connections_t) do
- printf("\t%s->%s\n", key, value)
- end
- ]]--
- local dagL = nerv.DAGLayerT("dagL", global_conf, {["dim_in"] = {1, global_conf.hidden_size},
- ["dim_out"] = {global_conf.vocab:size(), global_conf.hidden_size}, ["sub_layers"] = layerRepo,
+ local dagL = nerv.DAGLayerT("dagL", sm_conf, {["dim_in"] = {1, sm_conf.hidden_size},
+ ["dim_out"] = {sm_conf.vocab:size(), sm_conf.hidden_size}, ["sub_layers"] = layerRepo,
["connections"] = connections_t
})
+
+ local sampler = nerv.LMSampler(sm_conf)
+ sampler:load_dagL(dagL)
- dagL:init(global_conf.batch_size)
-
- nerv.printf("%s Initing DAGL end.\n", global_conf.sche_log_pre)
- return dagL
-end
-
-function load_net_tnn(global_conf, fn)
- prepare_parameters(global_conf, fn)
- local layerRepo = prepare_layers(global_conf)
- local tnn = prepare_tnn(global_conf, layerRepo)
- return tnn
-end
-
-function load_net_dagL(global_conf, fn)
- prepare_parameters(global_conf, fn)
- local layerRepo = prepare_layers(global_conf)
- local dagL = prepare_dagL(global_conf, layerRepo)
- return dagL
+ return sampler
end
local train_fn, valid_fn, test_fn
@@ -240,6 +240,23 @@ global_conf = {
fn_to_sample = root_dir .. '/ptb/EXP-nerv/grulm_v1.0h300l1ch15ba32slr0.15wc1e-05dr0.5/params.final',
}
+sm_conf = {
+ cumat_type = nerv.CuMatrixFloat,
+ mmat_type = nerv.MMatrixFloat,
+ nn_act_default = 0,
+
+ hidden_size = 300,
+ layer_num = 1,
+ batch_size = 32,
+ chunk_size = 85, --largest sample sentence length
+ max_iter = 35,
+ max_sen_len = 90,
+ sche_log_pre = "[SAMPLER_S]:",
+
+ timer = global_conf.timer,
+ fn_to_sample = root_dir .. '/ptb/EXP-nerv/grulm_v1.0h300l1ch15ba32slr0.15wc1e-05dr0.5/params.final',
+}
+
elseif (set == "msr_sc") then
data_dir = '/home/slhome/txh18/workspace/sentenceCompletion/DATA_PV2'
@@ -276,15 +293,13 @@ global_conf = {
elseif (set == "twitter") then
-data_dir = root_dir .. '/twitter_new/DATA'
-train_fn = data_dir .. '/twitter.choose2.adds'
-valid_fn = data_dir .. '/twitter.valid.adds'
-test_fn = data_dir .. '/comm.test.choose-ppl.adds'
-vocab_fn = data_dir .. '/twitter.choose.train.vocab'
-
---qdata_dir = root_dir .. '/ptb/questionGen/gen'
-
-global_conf = {
+ data_dir = root_dir .. '/twitter_new/DATA'
+ train_fn = data_dir .. '/twitter.choose2.adds'
+ valid_fn = data_dir .. '/twitter.valid.adds'
+ test_fn = data_dir .. '/comm.test.choose-ppl.adds'
+ vocab_fn = data_dir .. '/twitter.choose.train.vocab'
+ --qdata_dir = root_dir .. '/ptb/questionGen/gen'
+ global_conf = {
lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5,
cumat_type = nerv.CuMatrixFloat,
mmat_type = nerv.MMatrixFloat,
@@ -309,7 +324,7 @@ global_conf = {
log_w_num = 40000, --give a message when log_w_num words have been processed
timer = nerv.Timer(),
work_dir_base = root_dir .. '/twitter_new/EXP-nerv/grulm_v1.0'
-}
+ }
else
@@ -347,15 +362,12 @@ global_conf = {
end
-lr_half = false --can not be local, to be set by loadstring
-start_iter = -1
-start_lr = nil
-ppl_last = 100000
commands_str = "sampling" --"train:test"
commands = {}
-test_iter = -1
---for testout(question)
-q_file = "/home/slhome/txh18/workspace/ptb/questionGen/gen/ptb.test.txt.q10rs1_Msss.adds"
+test_iter = -1 --obselete
+random_seed = 1
+sample_num = 10
+out_fn = nil
if arg[2] ~= nil then
nerv.printf("%s applying arg[2](%s)...\n", global_conf.sche_log_pre, arg[2])
@@ -385,26 +397,27 @@ nerv.LMUtil.wait(2)
]]--
----------------printing options---------------------------------
-nerv.printf("%s printing global_conf...\n", global_conf.sche_log_pre)
-for id, value in pairs(global_conf) do
+nerv.printf("%s printing sm_conf...\n", sm_conf.sche_log_pre)
+for id, value in pairs(sm_conf) do
nerv.printf("%s:\t%s\n", id, tostring(value))
end
nerv.LMUtil.wait(2)
nerv.printf("%s printing training scheduling options...\n", global_conf.sche_log_pre)
-nerv.printf("lr_half:\t%s\n", tostring(lr_half))
-nerv.printf("start_iter:\t%s\n", tostring(start_iter))
-nerv.printf("ppl_last:\t%s\n", tostring(ppl_last))
nerv.printf("commands_str:\t%s\n", commands_str)
nerv.printf("test_iter:\t%s\n", tostring(test_iter))
+nerv.printf("random_seed:\t%s\n", tostring(random_seed))
+nerv.printf("sample_num:\t%s\n", tostring(sample_num))
+nerv.printf("out_fn:\t%s\n", tostring(out_fn))
nerv.printf("%s printing training scheduling end.\n", global_conf.sche_log_pre)
nerv.LMUtil.wait(2)
------------------printing options end------------------------------
-math.randomseed(1)
+math.randomseed(random_seed)
local vocab = nerv.LMVocab()
global_conf["vocab"] = vocab
+sm_conf["vocab"] = global_conf.vocab
nerv.printf("%s building vocab...\n", global_conf.sche_log_pre)
global_conf.vocab:build_file(global_conf.vocab_fn, false)
ppl_rec = {}
@@ -421,18 +434,34 @@ end --if commands["test"]
if commands["sampling"] == 1 then
nerv.printf("===SAMPLE===\n")
global_conf.sche_log_pre = "[SCHEDULER SAMPLING]:"
- local dagL = load_net_dagL(global_conf, global_conf.fn_to_sample)
- local sampler = nerv.LMSampler(global_conf)
- sampler:load_dagL(dagL)
- for k = 1, 5 do
- local res = sampler:lm_sample_rnn_dagL(10, {})
+ local sampler = prepare_sampler(sm_conf)
+ local out_fh = nil
+ if out_fn ~= nil then
+ out_fh = assert(io.open(out_fn, "w"))
+ nerv.printf("%s outputing samples to file \"%s\"...\n", global_conf.sche_log_pre, out_fn)
+ end
+ for k = 1, sample_num do
+ local res = sampler:lm_sample_rnn_dagL(1, {})
for i = 1, #res do
+ if out_fh == nil then nerv.printf("lm_sampler_output_sample: ") end
for j = 1, #res[i] do
- nerv.printf("%s ", res[i][j].w)
+ if out_fh == nil then
+ nerv.printf("%s %f ", res[i][j].w, res[i][j].p)
+ else
+ out_fh:write(nerv.sprintf("%s %f ", res[i][j].w, res[i][j].p))
+ end
+ end
+ if out_fh == nil then
+ nerv.printf("\n")
+ else
+ out_fh:write(nerv.sprintf("\n"))
end
- nerv.printf("\n")
end
+ if k % 10000 == 0 and out_fh ~= nil then nerv.printf("%s %d sample done\n", global_conf.sche_log_pre, k) end
end
+
+ if out_fh ~= nil then out_fh:close() end
+ nerv.printf("%s complete,bye\n", global_conf.sche_log_pre)
--global_conf.dropout_rate = 0
--LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update!
end --if commands["sampling"]
diff --git a/nerv/examples/lmptb/m-tests/lmseqreader_test.lua b/nerv/examples/lmptb/m-tests/lmseqreader_test.lua
index 9127559..3f99741 100644
--- a/nerv/examples/lmptb/m-tests/lmseqreader_test.lua
+++ b/nerv/examples/lmptb/m-tests/lmseqreader_test.lua
@@ -7,7 +7,7 @@ local test_fn = "/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-te
--local test_fn = "/home/slhome/txh18/workspace/nerv-project/nerv/examples/lmptb/PTBdata/ptb.train.txt"
local vocab = nerv.LMVocab()
vocab:build_file(test_fn)
-local chunk_size = 20
+local chunk_size = 15
local batch_size = 3
local global_conf = {
lrate = 1, wcost = 1e-6, momentum = 0,
@@ -30,7 +30,8 @@ local global_conf = {
vocab = vocab
}
-local reader = nerv.LMSeqReader(global_conf, batch_size, chunk_size, vocab, {["se_mode"] = true})
+local reader = nerv.LMSeqReader(global_conf, batch_size, chunk_size, vocab,
+ {["se_mode"] = true, ["same_io"] = true})
reader:open_file(test_fn)
local feeds = {}
feeds.flags_now = {}
@@ -40,14 +41,15 @@ for j = 1, chunk_size do
feeds.inputs_m[j] = {global_conf.cumat_type(batch_size, 1), global_conf.cumat_type(batch_size, global_conf.vocab:size())}
feeds.flags_now[j] = {}
end
-while (1) do
+for k = 1, 5 do
local r = reader:get_batch(feeds)
if (r == false) then break end
for j = 1, chunk_size, 1 do
for i = 1, batch_size, 1 do
- printf("%s[L(%s)] ", feeds.inputs_s[j][i], feeds.labels_s[j][i]) --vocab:get_word_str(input[i][j]).id
+ printf("%s[L(%s)]F%d ", feeds.inputs_s[j][i], feeds.labels_s[j][i], feeds.flags_now[j][i]) --vocab:get_word_str(input[i][j]).id
end
printf("\n")
end
printf("\n")
end
+printf("reader.sen_start %s\n", tostring(reader.stat.al_sen_start))
diff --git a/nerv/examples/lmptb/m-tests/some-text b/nerv/examples/lmptb/m-tests/some-text
index da4bea9..6756fa0 100644
--- a/nerv/examples/lmptb/m-tests/some-text
+++ b/nerv/examples/lmptb/m-tests/some-text
@@ -1,4 +1,4 @@
-</s> aa bb cc aa bb cc aa bb cc aa bb cc aa bb cc aa </s>
+</s> aa bb cc aa bb cc aa bb cc aa bb cc aa </s>
</s> aa bb cc aa bb cc aa bb cc aa </s>
</s> bb cc aa bb cc aa bb cc aa </s>
</s> aa bb cc aa </s>
diff --git a/nerv/examples/lmptb/rnnlm_ptb_main.lua b/nerv/examples/lmptb/rnnlm_ptb_main.lua
index dc011fb..a1d9471 100644
--- a/nerv/examples/lmptb/rnnlm_ptb_main.lua
+++ b/nerv/examples/lmptb/rnnlm_ptb_main.lua
@@ -197,6 +197,43 @@ global_conf = {
work_dir_base = root_dir .. '/ptb/EXP-nerv/rnnlm_tnn'
}
+elseif (set == "twitter") then
+
+data_dir = root_dir .. '/twitter_new/DATA'
+train_fn = data_dir .. '/twitter.choose.adds'
+valid_fn = data_dir .. '/twitter.valid.adds'
+test_fn = data_dir .. '/comm.test.choose-ppl.adds'
+vocab_fn = data_dir .. '/twitter.choose.train.vocab'
+
+--qdata_dir = root_dir .. '/ptb/questionGen/gen'
+
+global_conf = {
+ lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5,
+ cumat_type = nerv.CuMatrixFloat,
+ mmat_type = nerv.MMatrixFloat,
+ nn_act_default = 0,
+
+ hidden_size = 300,
+ layer_num = 1,
+ chunk_size = 15,
+ batch_size = 32,
+ max_iter = 30,
+ lr_decay = 1.003,
+ decay_iter = 10,
+ param_random = function() return (math.random() / 5 - 0.1) end,
+ dropout_str = "0.5",
+
+ train_fn = train_fn,
+ valid_fn = valid_fn,
+ test_fn = test_fn,
+ vocab_fn = vocab_fn,
+ max_sen_len = 32,
+ sche_log_pre = "[SCHEDULER]:",
+ log_w_num = 40000, --give a message when log_w_num words have been processed
+ timer = nerv.Timer(),
+ work_dir_base = root_dir .. '/twitter_new/EXP-nerv/rnnlm_v1.0'
+}
+
elseif (set == "msr_sc") then
data_dir = '/home/slhome/txh18/workspace/sentenceCompletion/DATA_PV2'
diff --git a/nerv/lib/matrix/cumatrix.c b/nerv/lib/matrix/cumatrix.c
index 04205e4..58bdfe7 100644
--- a/nerv/lib/matrix/cumatrix.c
+++ b/nerv/lib/matrix/cumatrix.c
@@ -9,6 +9,14 @@ static cudaEvent_t profile_start, profile_stop;
curandGenerator_t curand_gen;
static HashMap *profile;
+void nerv_cumatrix_select_gpu(int dev, Status *status) {
+ fprintf(stderr, "** selecting GPU %d\n", dev);
+ NERV_SET_STATUS(status, NERV_NORMAL, 0);
+ CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status);
+ CUDA_SAFE_SYNC_CALL(cublasDestroy(cublas_handle), status);
+ CUDA_SAFE_SYNC_CALL(cublasCreate(&cublas_handle), status);
+}
+
void nerv_cumatrix_print_profile() {
size_t i;
fprintf(stderr, "*** [nerv cumatrix profile] **\n");
diff --git a/nerv/lib/matrix/generic/cukernel.cu b/nerv/lib/matrix/generic/cukernel.cu
index 8fbe05d..51e3b6a 100644
--- a/nerv/lib/matrix/generic/cukernel.cu
+++ b/nerv/lib/matrix/generic/cukernel.cu
@@ -383,6 +383,20 @@ __global__ void cudak_(copy_rows_by_colidx)(const MATRIX_ELEM *a, MATRIX_ELEM *b
b[j + i * stride] = a[j + k * stride];
}
+__global__ void cudak_(prefixsum_row_reduce)(const MATRIX_ELEM *a, MATRIX_ELEM *b,
+ int nrow, int ncol, int stride_a, int stride_b, int offset) {
+ int j = blockIdx.x * blockDim.x + threadIdx.x;
+ int i = blockIdx.y * blockDim.y + threadIdx.y;
+ long idx_a, idx_b;
+ if (i >= nrow || j >= ncol) return;
+ idx_b = j + i * stride_b;
+ idx_a = j + i * stride_a;
+ //b[idx] = 1.0 / (1.0 + exp(-a[idx]));
+ if (j >= offset)
+ b[idx_b] = a[idx_a] + a[idx_a - offset];
+ else
+ b[idx_b] = a[idx_a];
+}
extern "C" {
#include "../cukernel.h"
@@ -745,6 +759,40 @@ extern "C" {
cudaStreamSynchronize(0);
}
+ void cudak_(cuda_prefixsum_row)(const Matrix *a, Matrix *b) {
+ dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N);
+ dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x),
+ CEIL_DIV(b->nrow, threadsPerBlock.y));
+
+ MATRIX_ELEM *tmp[2];
+ size_t tmp_stride[2];
+ cudaMallocPitch(tmp, tmp_stride + 0, a->ncol * sizeof(MATRIX_ELEM), a->nrow);
+ cudaMallocPitch(tmp + 1, tmp_stride + 1, a->ncol * sizeof(MATRIX_ELEM), a->nrow);
+
+ int offset = 1;
+ cudak_(prefixsum_row_reduce)<<<numBlocks, threadsPerBlock>>> \
+ (MATRIX_ELEM_PTR(a), tmp[0], b->nrow, b->ncol,
+ a->stride / sizeof(MATRIX_ELEM), tmp_stride[0] / sizeof(MATRIX_ELEM), offset);
+ int pin = 0, pout = 1;
+
+ for (offset = 2;offset <= a->ncol / 2;offset *= 2) {
+ cudak_(prefixsum_row_reduce)<<<numBlocks, threadsPerBlock>>> \
+ (tmp[pin], tmp[pout], b->nrow, b->ncol,
+ tmp_stride[pin] / sizeof(MATRIX_ELEM), tmp_stride[pout] / sizeof(MATRIX_ELEM), offset);
+ pin = 1 - pin;
+ pout = 1 - pout;
+ }
+
+ cudak_(prefixsum_row_reduce)<<<numBlocks, threadsPerBlock>>> \
+ (tmp[pin], MATRIX_ELEM_PTR(b), b->nrow, b->ncol,
+ tmp_stride[pin] / sizeof(MATRIX_ELEM), b->stride / sizeof(MATRIX_ELEM), offset);
+
+ cudaFree(tmp[0]);
+ cudaFree(tmp[1]);
+
+ cudaStreamSynchronize(0);
+ }
+
void cudak_(cuda_decompress)(const Matrix *a, Matrix *b) {
dim3 threadsPerBlock(1, CUDA_THREADS_NN);
dim3 numBlocks(1, CEIL_DIV(a->nrow, threadsPerBlock.y));
diff --git a/nerv/lib/matrix/generic/cumatrix.c b/nerv/lib/matrix/generic/cumatrix.c
index bf93b77..7b70607 100644
--- a/nerv/lib/matrix/generic/cumatrix.c
+++ b/nerv/lib/matrix/generic/cumatrix.c
@@ -486,6 +486,14 @@ void nerv_matrix_(scale_rows_by_row)(Matrix *a, const Matrix *b,
NERV_SET_STATUS(status, NERV_NORMAL, 0);
}
+void nerv_matrix_(prefixsum_row)(Matrix *a, const Matrix *b, Status *status) {
+ CHECK_SAME_DIMENSION(a, b, status);
+ PROFILE_START
+ cudak_(cuda_prefixsum_row)(b, a);
+ PROFILE_STOP
+ NERV_SET_STATUS(status, NERV_NORMAL, 0);
+}
+
static void cuda_matrix_(free)(MATRIX_ELEM *ptr, Status *status) {
CUDA_SAFE_SYNC_CALL(cudaFree(ptr), status);
NERV_SET_STATUS(status, NERV_NORMAL, 0);
diff --git a/nerv/lib/matrix/generic/cumatrix.h b/nerv/lib/matrix/generic/cumatrix.h
index 4f66a2c..5b8076f 100644
--- a/nerv/lib/matrix/generic/cumatrix.h
+++ b/nerv/lib/matrix/generic/cumatrix.h
@@ -61,6 +61,7 @@ void nerv_matrix_(scale_rows_by_col)(Matrix *a, const Matrix *b,
Status *status);
void nerv_matrix_(scale_rows_by_row)(Matrix *a, const Matrix *b,
Status *status);
+void nerv_matrix_(prefixsum_row)(Matrix *a, const Matrix *b, Status *status);
void nerv_matrix_(thres_mask)(Matrix *a, Matrix *b,
double thres, double low, double high,
Status *status);
diff --git a/nerv/matrix/cumatrix.c b/nerv/matrix/cumatrix.c
index bf92f92..7f22d68 100644
--- a/nerv/matrix/cumatrix.c
+++ b/nerv/matrix/cumatrix.c
@@ -8,6 +8,14 @@ static cublasHandle_t cublas_handle;
static cudaEvent_t profile_start, profile_stop;
static HashMap *profile;
+static int select_gpu(lua_State *L) {
+ Status status;
+ int dev = luaL_checkinteger(L, 1);
+ nerv_cumatrix_select_gpu(dev, &status);
+ NERV_LUA_CHECK_STATUS(L, status);
+ return 0;
+}
+
static int print_profile(lua_State *L) {
nerv_cumatrix_print_profile();
return 0;
@@ -21,6 +29,7 @@ static int clear_profile(lua_State *L) {
static const luaL_Reg cumatrix_methods[] = {
{"print_profile", print_profile},
{"clear_profile", clear_profile},
+ {"select_gpu", select_gpu},
{NULL, NULL}
};
diff --git a/nerv/matrix/generic/cumatrix.c b/nerv/matrix/generic/cumatrix.c
index cb55901..b706c21 100644
--- a/nerv/matrix/generic/cumatrix.c
+++ b/nerv/matrix/generic/cumatrix.c
@@ -15,6 +15,15 @@ static int nerv_matrix_(lua_get_blas_op)(char ch) {
return (ch == 'T' || ch == 't') ? CUBLAS_OP_T : CUBLAS_OP_N;
}
+static int nerv_matrix_(lua_prefixsum_row)(lua_State *L) {
+ Status status;
+ Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname));
+ Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname));
+ nerv_matrix_(prefixsum_row)(a, b, &status);
+ NERV_LUA_CHECK_STATUS(L, status);
+ return 0;
+}
+
static int nerv_matrix_(lua_thres_mask)(lua_State *L) {
Status status;
Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname));
@@ -230,6 +239,7 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = {
{"rearrange_frm", nerv_matrix_(lua_rearrange_frm)},
{"scale_rows_by_row", nerv_matrix_(lua_scale_rows_by_row)},
{"scale_rows_by_col", nerv_matrix_(lua_scale_rows_by_col)},
+ {"prefixsum_row", nerv_matrix_(lua_prefixsum_row)},
#ifdef __NERV_FUTURE_CUDA_7
{"update_select_rows_by_rowidx", nerv_matrix_(lua_update_select_rows_by_rowidx)},
{"update_select_rows_by_colidx", nerv_matrix_(lua_update_select_rows_by_colidx)},
diff --git a/nerv/tnn/init.lua b/nerv/tnn/init.lua
index b375fa8..7faca31 100644
--- a/nerv/tnn/init.lua
+++ b/nerv/tnn/init.lua
@@ -47,5 +47,6 @@ nerv.include('sutil.lua')
nerv.include('tnn.lua')
nerv.include('layersT/softmax_ce_t.lua')
nerv.include('layersT/lstm_t.lua')
+nerv.include('layersT/gru_t.lua')
nerv.include('layersT/dropout_t.lua')
nerv.include('layer_dag_t.lua')
diff --git a/nerv/examples/lmptb/lmptb/layer/gru_t.lua b/nerv/tnn/layersT/gru_t.lua
index 8f15cc8..8f15cc8 100644
--- a/nerv/examples/lmptb/lmptb/layer/gru_t.lua
+++ b/nerv/tnn/layersT/gru_t.lua