aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTianxingHe <htx_2006@hotmail.com>2015-12-04 15:19:44 +0800
committerTianxingHe <htx_2006@hotmail.com>2015-12-04 15:19:44 +0800
commitaf684cb95478fc38cc3d9f284b6b518a431c87e2 (patch)
treefaab52eb3f6507331703b656c62a9e2ebf3b3f92
parentcbcce5ecc2864872b411eebbd307fa0f9a7e9dc0 (diff)
parent618450eb71817ded45c422f35d8fede2d52a66b2 (diff)
Merge pull request #15 from cloudygoose/txh18/rnnlm
LSTM&TNN
-rw-r--r--Makefile2
-rw-r--r--README.md1
-rw-r--r--nerv/Makefile13
-rw-r--r--nerv/examples/lmptb/lm_trainer.lua56
-rw-r--r--nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua2
-rw-r--r--nerv/examples/lmptb/lmptb/layer/select_linear.lua15
-rw-r--r--nerv/examples/lmptb/lmptb/lmseqreader.lua113
-rw-r--r--nerv/examples/lmptb/lmptb/lmutil.lua6
-rw-r--r--nerv/examples/lmptb/logs/LOG-tnn-h3009372
-rw-r--r--nerv/examples/lmptb/logs/LOG-tnn-h4009372
-rw-r--r--nerv/examples/lmptb/lstmlm_ptb_main.lua443
-rw-r--r--nerv/examples/lmptb/m-tests/LOG-tnn-h3003824
-rw-r--r--nerv/examples/lmptb/m-tests/lmseqreader_test.lua7
-rw-r--r--nerv/examples/lmptb/m-tests/sutil_test.lua15
-rw-r--r--nerv/examples/lmptb/rnnlm_ptb_main.lua (renamed from nerv/examples/lmptb/tnn_ptb_main.lua)115
-rw-r--r--nerv/examples/lmptb/unfold_ptb_main.lua19
-rw-r--r--nerv/init.lua1
-rw-r--r--nerv/layer/affine.lua110
-rw-r--r--nerv/layer/affine_recurrent.lua4
-rw-r--r--nerv/layer/elem_mul.lua38
-rw-r--r--nerv/layer/gate_fff.lua71
-rw-r--r--nerv/layer/init.lua23
-rw-r--r--nerv/layer/tanh.lua35
-rw-r--r--nerv/lib/matrix/cukernel.cu36
-rw-r--r--nerv/lib/matrix/cukernel.h4
-rw-r--r--nerv/lib/matrix/generic/cukernel.cu86
-rw-r--r--nerv/lib/matrix/generic/cumatrix.c36
-rw-r--r--nerv/lib/matrix/generic/cumatrix.h3
-rw-r--r--nerv/lib/matrix/generic/matrix.c8
-rw-r--r--nerv/lib/matrix/matrix.h2
-rw-r--r--nerv/matrix/generic/cumatrix.c52
-rw-r--r--nerv/matrix/init.lua12
-rw-r--r--nerv/nn/param_repo.lua8
-rw-r--r--nerv/tnn/init.lua (renamed from nerv/examples/lmptb/rnn/init.lua)24
-rw-r--r--nerv/tnn/layer_dag_t.lua386
-rw-r--r--nerv/tnn/layersT/dropout_t.lua71
-rw-r--r--nerv/tnn/layersT/lstm_t.lua125
-rw-r--r--nerv/tnn/layersT/softmax_ce_t.lua (renamed from nerv/examples/lmptb/rnn/softmax_ce_t.lua)16
-rw-r--r--nerv/tnn/sutil.lua79
-rw-r--r--nerv/tnn/tnn.lua (renamed from nerv/examples/lmptb/rnn/tnn.lua)82
40 files changed, 20627 insertions, 4060 deletions
diff --git a/Makefile b/Makefile
index 664a83b..72a5915 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@ luajit:
luarocks:
PREFIX=$(PREFIX) ./tools/build_luarocks.sh
install:
- cd nerv; $(PREFIX)/bin/luarocks make
+ cd nerv; $(PREFIX)/bin/luarocks make CFLAGS=$(CFLAGS)
speech:
cd speech/speech_utils; $(PREFIX)/bin/luarocks make
cd speech/htk_io; $(PREFIX)/bin/luarocks make
diff --git a/README.md b/README.md
index c198cc5..fe9dfc1 100644
--- a/README.md
+++ b/README.md
@@ -12,6 +12,7 @@ git clone https://github.com/Nerv-SJTU/nerv.git
cd nerv
git submodule init && git submodule update
make
+#To include some new CUDA feature(e.x. atomicCAS), use "make CFLAGS=-D__NERV_FUTURE_CUDA_7"
#further, if you want the speech modules
git clone https://github.com/Nerv-SJTU/nerv-speech.git speech
diff --git a/nerv/Makefile b/nerv/Makefile
index b449f82..5c329f9 100644
--- a/nerv/Makefile
+++ b/nerv/Makefile
@@ -7,7 +7,7 @@ INC_PATH := $(LUA_BINDIR)/../include/nerv
LUA_DIR = $(INST_LUADIR)/nerv
OBJ_DIR := $(BUILD_DIR)/objs
ISUBDIR := io matrix luaT
-SUBDIR := matrix io layer examples nn lib/io lib/luaT lib/matrix
+SUBDIR := matrix io layer examples nn lib/io lib/luaT lib/matrix tnn/layersT
INC_SUBDIR := $(addprefix $(INC_PATH)/,$(ISUBDIR))
OBJ_SUBDIR := $(addprefix $(OBJ_DIR)/,$(SUBDIR))
@@ -30,10 +30,13 @@ LUAT_OBJS := $(addprefix $(OBJ_DIR)/,$(LUAT_OBJS))
OBJS := $(CORE_OBJS) $(NERV_OBJS) $(LUAT_OBJS)
LIBS := $(INST_LIBDIR)/libnerv.so $(LIB_PATH)/libnervcore.so $(LIB_PATH)/libluaT.so
LUA_LIBS := matrix/init.lua io/init.lua init.lua \
- layer/init.lua layer/affine.lua layer/sigmoid.lua layer/softmax_ce.lua layer/softmax.lua \
+ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/tanh.lua layer/softmax_ce.lua layer/softmax.lua \
layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua layer/affine_recurrent.lua \
+ layer/elem_mul.lua layer/gate_fff.lua \
nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \
- io/sgd_buffer.lua
+ io/sgd_buffer.lua \
+ tnn/init.lua tnn/layer_dag_t.lua tnn/sutil.lua tnn/tnn.lua \
+ tnn/layersT/dropout_t.lua tnn/layersT/lstm_t.lua tnn/layersT/softmax_ce_t.lua
INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK
#CUDA_BASE := /usr/local/cuda-7.0
@@ -41,7 +44,7 @@ CUDA_BASE := /usr/local/cuda
CUDA_INCLUDE := -I $(CUDA_BASE)/include/
INCLUDE += $(CUDA_INCLUDE)
-LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcublas
+LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcublas -lcurand
CFLAGS := -Wall -Wextra -O2
NVCC := $(CUDA_BASE)/bin/nvcc
NVCC_FLAGS := -Xcompiler -fPIC,-Wall,-Wextra
@@ -55,7 +58,7 @@ $(OBJ_DIR) $(LUA_DIR) $(OBJ_SUBDIR) $(LUA_SUBDIR) $(INC_SUBDIR):
$(OBJ_DIR)/%.o: %.c $(patsubst /%.o,/%.c,$@)
gcc -c -o $@ $< $(INCLUDE) -fPIC $(CFLAGS)
$(OBJ_DIR)/lib/matrix/cukernel.o: lib/matrix/cukernel.cu
- $(NVCC) -c -o $@ $< $(INCLUDE) $(NVCC_FLAGS)
+ $(NVCC) -c -o $@ $< $(INCLUDE) $(NVCC_FLAGS) $(CFLAGS)
$(LUA_DIR)/%.lua: %.lua
cp $< $@
diff --git a/nerv/examples/lmptb/lm_trainer.lua b/nerv/examples/lmptb/lm_trainer.lua
index 44862dc..9ef4794 100644
--- a/nerv/examples/lmptb/lm_trainer.lua
+++ b/nerv/examples/lmptb/lm_trainer.lua
@@ -2,41 +2,55 @@ require 'lmptb.lmvocab'
require 'lmptb.lmfeeder'
require 'lmptb.lmutil'
require 'lmptb.layer.init'
-require 'rnn.init'
+--require 'tnn.init'
require 'lmptb.lmseqreader'
local LMTrainer = nerv.class('nerv.LMTrainer')
-local printf = nerv.printf
+--local printf = nerv.printf
+
+--The bias param update in nerv don't have wcost added
+function nerv.BiasParam:update_by_gradient(gradient)
+ local gconf = self.gconf
+ local l2 = 1 - gconf.lrate * gconf.wcost
+ self:_update_by_gradient(gradient, l2, l2)
+end
--Returns: LMResult
-function LMTrainer.lm_process_file(global_conf, fn, tnn, do_train)
+function LMTrainer.lm_process_file_rnn(global_conf, fn, tnn, do_train)
local reader = nerv.LMSeqReader(global_conf, global_conf.batch_size, global_conf.chunk_size, global_conf.vocab)
reader:open_file(fn)
local result = nerv.LMResult(global_conf, global_conf.vocab)
result:init("rnn")
-
+ if global_conf.dropout_rate ~= nil then
+ nerv.info("LMTrainer.lm_process_file_rnn: dropout_rate is %f", global_conf.dropout_rate)
+ end
+
global_conf.timer:flush()
tnn:flush_all() --caution: will also flush the inputs from the reader!
local next_log_wcn = global_conf.log_w_num
+ local neto_bakm = global_conf.mmat_type(global_conf.batch_size, 1) --space backup matrix for network output
while (1) do
global_conf.timer:tic('most_out_loop_lmprocessfile')
local r, feeds
-
- r, feeds = tnn:getFeedFromReader(reader)
- if (r == false) then break end
+ global_conf.timer:tic('tnn_beforeprocess')
+ r, feeds = tnn:getfeed_from_reader(reader)
+ if r == false then
+ break
+ end
for t = 1, global_conf.chunk_size do
tnn.err_inputs_m[t][1]:fill(1)
for i = 1, global_conf.batch_size do
- if (bit.band(feeds.flags_now[t][i], nerv.TNN.FC.HAS_LABEL) == 0) then
+ if bit.band(feeds.flags_now[t][i], nerv.TNN.FC.HAS_LABEL) == 0 then
tnn.err_inputs_m[t][1][i - 1][0] = 0
end
end
end
+ global_conf.timer:toc('tnn_beforeprocess')
--[[
for j = 1, global_conf.chunk_size, 1 do
@@ -50,29 +64,33 @@ function LMTrainer.lm_process_file(global_conf, fn, tnn, do_train)
tnn:net_propagate()
- if (do_train == true) then
+ if do_train == true then
tnn:net_backpropagate(false)
tnn:net_backpropagate(true)
end
-
+
+ global_conf.timer:tic('tnn_afterprocess')
for t = 1, global_conf.chunk_size, 1 do
+ tnn.outputs_m[t][1]:copy_toh(neto_bakm)
for i = 1, global_conf.batch_size, 1 do
if (feeds.labels_s[t][i] ~= global_conf.vocab.null_token) then
- result:add("rnn", feeds.labels_s[t][i], math.exp(tnn.outputs_m[t][1][i - 1][0]))
+ --result:add("rnn", feeds.labels_s[t][i], math.exp(tnn.outputs_m[t][1][i - 1][0]))
+ result:add("rnn", feeds.labels_s[t][i], math.exp(neto_bakm[i - 1][0]))
end
end
end
+ tnn:move_right_to_nextmb({0}) --only copy for time 0
+ global_conf.timer:toc('tnn_afterprocess')
- tnn:moveRightToNextMB()
global_conf.timer:toc('most_out_loop_lmprocessfile')
--print log
- if (result["rnn"].cn_w > next_log_wcn) then
+ if result["rnn"].cn_w > next_log_wcn then
next_log_wcn = next_log_wcn + global_conf.log_w_num
- printf("%s %d words processed %s.\n", global_conf.sche_log_pre, result["rnn"].cn_w, os.date())
- printf("\t%s log prob per sample :%f.\n", global_conf.sche_log_pre, result:logp_sample("rnn"))
+ nerv.printf("%s %d words processed %s.\n", global_conf.sche_log_pre, result["rnn"].cn_w, os.date())
+ nerv.printf("\t%s log prob per sample :%f.\n", global_conf.sche_log_pre, result:logp_sample("rnn"))
for key, value in pairs(global_conf.timer.rec) do
- printf("\t [global_conf.timer]: time spent on %s:%.5f clock time\n", key, value)
+ nerv.printf("\t [global_conf.timer]: time spent on %s:%.5f clock time\n", key, value)
end
global_conf.timer:flush()
nerv.LMUtil.wait(0.1)
@@ -90,9 +108,9 @@ function LMTrainer.lm_process_file(global_conf, fn, tnn, do_train)
--break --debug
end
- printf("%s Displaying result:\n", global_conf.sche_log_pre)
- printf("%s %s\n", global_conf.sche_log_pre, result:status("rnn"))
- printf("%s Doing on %s end.\n", global_conf.sche_log_pre, fn)
+ nerv.printf("%s Displaying result:\n", global_conf.sche_log_pre)
+ nerv.printf("%s %s\n", global_conf.sche_log_pre, result:status("rnn"))
+ nerv.printf("%s Doing on %s end.\n", global_conf.sche_log_pre, fn)
return result
end
diff --git a/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua b/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua
index a5ecce1..c43e567 100644
--- a/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua
+++ b/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua
@@ -14,7 +14,7 @@ function LMRecurrent:propagate(input, output)
output[1]:copy_fromd(input[1])
if (self.independent == true) then
for i = 1, input[1]:nrow() do
- if (self.gconf.input_word_id[self.id][0][i - 1] == self.break_id) then --here is sentence break
+ if (self.gconf.input_word_id[self.id][i - 1][0] == self.break_id) then --here is sentence break
input[2][i - 1]:fill(0)
end
end
diff --git a/nerv/examples/lmptb/lmptb/layer/select_linear.lua b/nerv/examples/lmptb/lmptb/layer/select_linear.lua
index 672b7e2..580b9c5 100644
--- a/nerv/examples/lmptb/lmptb/layer/select_linear.lua
+++ b/nerv/examples/lmptb/lmptb/layer/select_linear.lua
@@ -10,9 +10,9 @@ function SL:__init(id, global_conf, layer_conf)
self.dim_out = layer_conf.dim_out
self.gconf = global_conf
- self.ltp = layer_conf.ltp
self.vocab = layer_conf.vocab
-
+ self.ltp = self:find_param("ltp", layer_conf, global_conf, nerv.LinearTransParam, {self.vocab:size(), self.dim_out[1]}) --layer_conf.ltp
+
self:check_dim_len(1, 1)
end
@@ -30,12 +30,13 @@ function SL:init(batch_size)
end
function SL:update(bp_err, input, output)
- --for i = 1, input[1]:ncol(), 1 do
- -- if (input[1][0][i - 1] ~= 0) then
- -- local word_vec = self.ltp.trans[input[1][0][i - 1]]
- --word_vec:add(word_vec, bp_err[1][i - 1], 1, - self.gconf.lrate / self.gconf.batch_size)
- -- end
+ --use this to produce reproducable result
+ --for i = 1, input[1]:nrow(), 1 do
+ -- local word_vec = self.ltp.trans[input[1][i - 1][0]]
+ -- word_vec:add(word_vec, bp_err[1][i - 1], 1, - self.gconf.lrate / self.gconf.batch_size)
--end
+
+ --I tried the update_select_rows kernel which uses atomicAdd, but it generates unreproducable result
self.ltp.trans:update_select_rows(bp_err[1], input[1]:trans(), - self.gconf.lrate / self.gconf.batch_size, 0)
self.ltp.trans:add(self.ltp.trans, self.ltp.trans, 1.0, - self.gconf.lrate * self.gconf.wcost / self.gconf.batch_size)
end
diff --git a/nerv/examples/lmptb/lmptb/lmseqreader.lua b/nerv/examples/lmptb/lmptb/lmseqreader.lua
index e0dcd95..40471d5 100644
--- a/nerv/examples/lmptb/lmptb/lmseqreader.lua
+++ b/nerv/examples/lmptb/lmptb/lmseqreader.lua
@@ -1,4 +1,5 @@
require 'lmptb.lmvocab'
+--require 'tnn.init'
local LMReader = nerv.class("nerv.LMSeqReader")
@@ -7,7 +8,7 @@ local printf = nerv.printf
--global_conf: table
--batch_size: int
--vocab: nerv.LMVocab
-function LMReader:__init(global_conf, batch_size, chunk_size, vocab)
+function LMReader:__init(global_conf, batch_size, chunk_size, vocab, r_conf)
self.gconf = global_conf
self.fh = nil --file handle to read, nil means currently no file
self.batch_size = batch_size
@@ -15,6 +16,13 @@ function LMReader:__init(global_conf, batch_size, chunk_size, vocab)
self.log_pre = "[LOG]LMSeqReader:"
self.vocab = vocab
self.streams = nil
+ if r_conf == nil then
+ r_conf = {}
+ end
+ self.se_mode = false --sentence end mode, when a sentence end is met, the stream after will be null
+ if r_conf.se_mode == true then
+ self.se_mode = true
+ end
end
--fn: string
@@ -24,12 +32,21 @@ function LMReader:open_file(fn)
nerv.error("%s error: in open_file(fn is %s), file handle not nil.", self.log_pre, fn)
end
printf("%s opening file %s...\n", self.log_pre, fn)
- print("batch_size:", self.batch_size, "chunk_size", self.chunk_size)
+ print(self.log_pre, "batch_size:", self.batch_size, "chunk_size", self.chunk_size)
+ print(self.log_pre, "se_mode:", self.se_mode)
self.fh = io.open(fn, "r")
self.streams = {}
for i = 1, self.batch_size, 1 do
self.streams[i] = {["store"] = {}, ["head"] = 1, ["tail"] = 0}
end
+ self.stat = {} --stat collected during file reading
+ self.stat.al_sen_start = true --check whether it's always sentence_start at the begining of a minibatch
+ self.bak_inputs_m = {} --backup MMatrix for temporary storey(then copy to TNN CuMatrix)
+ for j = 1, self.chunk_size, 1 do
+ self.bak_inputs_m[j] = {}
+ self.bak_inputs_m[j][1] = self.gconf.mmat_type(self.batch_size, 1)
+ --self.bak_inputs_m[j][2] = self.gconf.mmat_type(self.batch_size, self.vocab:size()) --since MMatrix does not yet have fill, this m[j][2] is not used
+ end
end
--id: int
@@ -78,7 +95,7 @@ function LMReader:get_batch(feeds)
local labels_s = feeds.labels_s
for i = 1, self.chunk_size, 1 do
inputs_s[i] = {}
- labels_s[i] = {}
+ labels_s[i] = {}
end
local inputs_m = feeds.inputs_m --port 1 : word_id, port 2 : label
@@ -86,45 +103,62 @@ function LMReader:get_batch(feeds)
local flagsPack = feeds.flagsPack_now
local got_new = false
+ for j = 1, self.chunk_size, 1 do
+ inputs_m[j][2]:fill(0)
+ end
for i = 1, self.batch_size, 1 do
local st = self.streams[i]
+ local end_stream = false --used for se_mode, indicating that this stream is ended
for j = 1, self.chunk_size, 1 do
flags[j][i] = 0
- self:refresh_stream(i)
- if (st.store[st.head] ~= nil) then
- inputs_s[j][i] = st.store[st.head]
- inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head]).id - 1
- else
+ if end_stream == true then
+ if self.se_mode == false then
+ nerv.error("lmseqreader:getbatch: error, end_stream is true while se_mode is false")
+ end
inputs_s[j][i] = self.vocab.null_token
- inputs_m[j][1][i - 1][0] = 0
- end
- inputs_m[j][2][i - 1]:fill(0)
- if (st.store[st.head + 1] ~= nil) then
- labels_s[j][i] = st.store[st.head + 1]
- inputs_m[j][2][i - 1][self.vocab:get_word_str(st.store[st.head + 1]).id - 1] = 1
+ self.bak_inputs_m[j][1][i - 1][0] = 0
+ labels_s[j][i] = self.vocab.null_token
else
- if (inputs_s[j][i] ~= self.vocab.null_token) then
- nerv.error("reader error : input not null but label is null_token")
+ self:refresh_stream(i)
+ if st.store[st.head] ~= nil then
+ inputs_s[j][i] = st.store[st.head]
+ --inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head]).id - 1
+ self.bak_inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head]).id - 1
+ else
+ inputs_s[j][i] = self.vocab.null_token
+ --inputs_m[j][1][i - 1][0] = 0
+ self.bak_inputs_m[j][1][i - 1][0] = 0
end
- labels_s[j][i] = self.vocab.null_token
- end
- if (inputs_s[j][i] ~= self.vocab.null_token) then
- if (labels_s[j][i] == self.vocab.null_token) then
- nerv.error("reader error : label is null while input is not null")
+ if st.store[st.head + 1] ~= nil then
+ labels_s[j][i] = st.store[st.head + 1]
+ inputs_m[j][2][i - 1][self.vocab:get_word_str(st.store[st.head + 1]).id - 1] = 1
+ else
+ if (inputs_s[j][i] ~= self.vocab.null_token) then
+ nerv.error("reader error : input not null but label is null_token")
+ end
+ labels_s[j][i] = self.vocab.null_token
end
- flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_NORM)
- got_new = true
- st.store[st.head] = nil
- st.head = st.head + 1
- if (labels_s[j][i] == self.vocab.sen_end_token) then
- flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_END)
- st.store[st.head] = nil --sentence end is passed
+ if inputs_s[j][i] ~= self.vocab.null_token then
+ if labels_s[j][i] == self.vocab.null_token then
+ nerv.error("reader error : label is null while input is not null")
+ end
+ flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_NORM) --has both input and label
+ got_new = true
+ st.store[st.head] = nil
st.head = st.head + 1
- end
- if (inputs_s[j][i] == self.vocab.sen_end_token) then
- flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_START)
- end
- end
+ if labels_s[j][i] == self.vocab.sen_end_token then
+ flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_END)
+ st.store[st.head] = nil --sentence end is passed
+ st.head = st.head + 1
+ if self.se_mode == true then
+ end_stream = true --meet sentence end, this stream ends now
+ end
+ end
+ if inputs_s[j][i] == self.vocab.sen_end_token then
+ flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_START)
+ end
+ end
+ end
end
end
@@ -133,9 +167,20 @@ function LMReader:get_batch(feeds)
for i = 1, self.batch_size, 1 do
flagsPack[j] = bit.bor(flagsPack[j], flags[j][i])
end
+ inputs_m[j][1]:copy_fromh(self.bak_inputs_m[j][1])
end
- if (got_new == false) then
+ --check for self.al_sen_start
+ for i = 1, self.batch_size do
+ if inputs_s[1][i] ~= self.vocab.sen_end_token and inputs_s[1][i] ~= self.vocab.null_token then
+ self.stat.al_sen_start = false
+ end
+ end
+
+ if got_new == false then
+ nerv.info("lmseqreader file ends, printing stats...")
+ print("al_sen_start:", self.stat.al_sen_start)
+
return false
else
return true
diff --git a/nerv/examples/lmptb/lmptb/lmutil.lua b/nerv/examples/lmptb/lmptb/lmutil.lua
index 821aa94..71e8e17 100644
--- a/nerv/examples/lmptb/lmptb/lmutil.lua
+++ b/nerv/examples/lmptb/lmptb/lmutil.lua
@@ -48,15 +48,15 @@ end
--Returns: nerv.MMatrixInt
--Set the matrix to be ids of the words, id starting at 1, not 0
function Util.set_id(m, list, vocab)
- if (m:ncol() ~= #list or m:nrow() ~= 1) then
+ if (m:nrow() ~= #list or m:ncol() ~= 1) then
nerv.error("nrow of matrix mismatch with list or its col not one")
end
for i = 1, #list, 1 do
--index in matrix starts at 0
if (list[i] ~= vocab.null_token) then
- m[0][i - 1] = vocab:get_word_str(list[i]).id
+ m[i - 1][0] = vocab:get_word_str(list[i]).id
else
- m[0][i - 1] = 0
+ m[i - 1][0] = 0
end
end
return m
diff --git a/nerv/examples/lmptb/logs/LOG-tnn-h300 b/nerv/examples/lmptb/logs/LOG-tnn-h300
new file mode 100644
index 0000000..77fd612
--- /dev/null
+++ b/nerv/examples/lmptb/logs/LOG-tnn-h300
@@ -0,0 +1,9372 @@
+Greetings
+[SCHEDULER]: applying arg[2](global_conf.hidden_size=300)...
+[SCHEDULER]: printing global_conf...
+test_fn /home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/PTBdata/ptb.test.txt.adds
+train_fn /home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/PTBdata/ptb.train.txt.adds
+param_random function: 0x40237598
+train_fn_shuf_bak /home/slhome/txh18/workspace/nerv/play/dagL_test/train_fn_shuf_bak
+decay_iter 16
+mmat_type table: 0x409280f8
+vocab_fn /home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/PTBdata/vocab
+train_fn_shuf /home/slhome/txh18/workspace/nerv/play/dagL_test/train_fn_shuf
+param_fn /home/slhome/txh18/workspace/nerv/play/dagL_test/params
+log_w_num 40000
+work_dir /home/slhome/txh18/workspace/nerv/play/dagL_test
+batch_size 10
+hidden_size 300
+timer nerv.Timer
+sche_log_pre [SCHEDULER]:
+nn_act_default 0
+max_iter 35
+valid_fn /home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/PTBdata/ptb.valid.txt.adds
+lrate 1
+momentum 0
+wcost 1e-05
+chunk_size 15
+cumat_type table: 0x40935010
+[SCHEDULER]: printing training scheduling options...
+lr_half false
+start_iter -1
+ppl_last 100000
+[SCHEDULER]: printing training scheduling end.
+[SCHEDULER]: creating work_dir...
+[SCHEDULER]: building vocab...
+[LOG]LMVocab: Vocab building on file /home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/PTBdata/vocab...
+[LOG]LMVocab: Building finished, vocab size now is 10000.
+[SCHEDULER]: preparing parameters...
+[SCHEDULER]: first time, generating parameters...
+===INITIAL VALIDATION===
+[SCHEDULER]: preparing parameters...
+[SCHEDULER]: loading parameter from file /home/slhome/txh18/workspace/nerv/play/dagL_test/params.0...
+reading chunk 0 from 0
+metadata: return {type="nerv.LinearTransParam",id="ltp_ih"}
+
+reading chunk 1 from 34510155
+metadata: return {type="nerv.LinearTransParam",id="ltp_hh"}
+
+reading chunk 2 from 35545278
+metadata: return {type="nerv.LinearTransParam",id="ltp_ho"}
+
+reading chunk 3 from 70045626
+metadata: return {type="nerv.BiasParam",id="bp_h"}
+
+reading chunk 4 from 70049129
+metadata: return {type="nerv.BiasParam",id="bp_o"}
+
+reading chunk 5 from 70164107
+[SCHEDULER]: preparing parameters end.
+[SCHEDULER]: preparing layers...
+(10:08:46 2015-11-18)[nerv] info: create layer: recurrentL1
+(10:08:46 2015-11-18)[nerv] info: create layer: sigmoidL1
+(10:08:46 2015-11-18)[nerv] info: create layer: combinerL1
+(10:08:46 2015-11-18)[nerv] info: create layer: outputL
+(10:08:46 2015-11-18)[nerv] info: create layer: softmaxL
+(10:08:46 2015-11-18)[nerv] info: create layer: selectL1
+[SCHEDULER]: preparing layers end.
+[SCHEDULER]: Generate and initing TNN ...
+<input> selectL1 0
+selectL1 recurrentL1 0
+recurrentL1 sigmoidL1 0
+sigmoidL1 combinerL1 0
+combinerL1 recurrentL1 1
+combinerL1 outputL 0
+outputL softmaxL 0
+<input> softmaxL 0
+softmaxL <output> 0
+recurrentL1 #dim_in: 2 #dim_out: 1 #i_conns_p: 2 #o_conns_p 1
+outputL #dim_in: 1 #dim_out: 1 #i_conns_p: 1 #o_conns_p 1
+combinerL1 #dim_in: 1 #dim_out: 2 #i_conns_p: 1 #o_conns_p 2
+sigmoidL1 #dim_in: 1 #dim_out: 1 #i_conns_p: 1 #o_conns_p 1
+selectL1 #dim_in: 1 #dim_out: 1 #i_conns_p: 0 #o_conns_p 1
+softmaxL #dim_in: 2 #dim_out: 1 #i_conns_p: 1 #o_conns_p 0
+TNN initing storage selectL1 -> recurrentL1
+TNN initing storage recurrentL1 -> sigmoidL1
+TNN initing storage sigmoidL1 -> combinerL1
+TNN initing storage combinerL1 -> recurrentL1
+TNN initing storage combinerL1 -> outputL
+TNN initing storage outputL -> softmaxL
+[SCHEDULER]: Initing TNN end.
+[LOG]LMSeqReader: opening file /home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/PTBdata/ptb.valid.txt.adds...
+batch_size: 10 chunk_size 15
+[SCHEDULER]: 40095 words processed Wed Nov 18 10:08:51 2015.
+ [SCHEDULER]: log prob per sample :-4.003230.
+ [global_conf.timer]: time spent on tnn_beforeprocess:0.47666 clock time
+ [global_conf.timer]: time spent on most_out_loop_lmprocessfile:4.10684 clock time
+ [global_conf.timer]: time spent on tnn_actual_layer_propagate:3.41463 clock time
+ [global_conf.timer]: time spent on tnn_afterprocess:0.12208 clock time
+[LOG]LMSeqReader: file expires, closing.
+[SCHEDULER]: Displaying result:
+[SCHEDULER]: LMResult status of rnn: <SEN_CN 3370> <W_CN 70390> <PPL_NET 10545.667125173> <PPL_OOV 10092.059293004> <LOGP -295333.54956309>
+[SCHEDULER]: Doing on /home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/PTBdata/ptb.valid.txt.adds end.
+
+[SCHEDULER ITER1 LR1]: preparing parameters...
+[SCHEDULER ITER1 LR1]: loading parameter from file /home/slhome/txh18/workspace/nerv/play/dagL_test/params.0...
+reading chunk 0 from 0
+metadata: return {type="nerv.LinearTransParam",id="ltp_ih"}
+
+reading chunk 1 from 34510155
+metadata: return {type="nerv.LinearTransParam",id="ltp_hh"}
+
+reading chunk 2 from 35545278
+metadata: return {type="nerv.LinearTransParam",id="ltp_ho"}
+
+reading chunk 3 from 70045626
+metadata: return {type="nerv.BiasParam",id="bp_h"}
+
+reading chunk 4 from 70049129
+metadata: return {type="nerv.BiasParam",id="bp_o"}
+
+reading chunk 5 from 70164107
+[SCHEDULER ITER1 LR1]: preparing parameters end.
+[SCHEDULER ITER1 LR1]: preparing layers...
+(10:08:58 2015-11-18)[nerv] info: create layer: recurrentL1
+(10:08:58 2015-11-18)[nerv] info: create layer: sigmoidL1
+(10:08:58 2015-11-18)[nerv] info: create layer: combinerL1
+(10:08:58 2015-11-18)[nerv] info: create layer: outputL
+(10:08:58 2015-11-18)[nerv] info: create layer: softmaxL
+(10:08:58 2015-11-18)[nerv] info: create layer: selectL1
+[SCHEDULER ITER1 LR1]: preparing layers end.
+[SCHEDULER ITER1 LR1]: Generate and initing TNN ...
+<input> selectL1 0
+selectL1 recurrentL1 0
+recurrentL1 sigmoidL1 0
+sigmoidL1 combinerL1 0
+combinerL1 recurrentL1 1
+combinerL1 outputL 0
+outputL softmaxL 0
+<input> softmaxL 0
+softmaxL <output> 0
+recurrentL1 #dim_in: 2 #dim_out: 1 #i_conns_p: 2 #o_conns_p 1
+outputL #dim_in: 1 #dim_out: 1 #i_conns_p: 1 #o_conns_p 1
+combinerL1 #dim_in: 1