aboutsummaryrefslogtreecommitdiff
path: root/nerv
diff options
context:
space:
mode:
authortxh18 <cloudygooseg@gmail.com>2015-11-24 15:37:02 +0800
committertxh18 <cloudygooseg@gmail.com>2015-11-24 15:37:02 +0800
commit914a026734db6608e04987e9fcec9c82612e8673 (patch)
tree89e4db2987b6c757667aef96cc41974fda302927 /nerv
parentf829b2b49d1db7fb6a49109722b9c7a41ae9324a (diff)
added wcost for biasparam in lm_trainer
Diffstat (limited to 'nerv')
-rw-r--r--nerv/examples/lmptb/lm_trainer.lua9
-rw-r--r--nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua2
-rw-r--r--nerv/examples/lmptb/lmptb/lmutil.lua6
-rw-r--r--nerv/examples/lmptb/tnn/layer_dag_t.lua13
-rw-r--r--nerv/examples/lmptb/tnn_ptb_main.lua6
-rw-r--r--nerv/examples/lmptb/unfold_ptb_main.lua10
6 files changed, 32 insertions, 14 deletions
diff --git a/nerv/examples/lmptb/lm_trainer.lua b/nerv/examples/lmptb/lm_trainer.lua
index 62d8b50..185bc6d 100644
--- a/nerv/examples/lmptb/lm_trainer.lua
+++ b/nerv/examples/lmptb/lm_trainer.lua
@@ -2,13 +2,20 @@ require 'lmptb.lmvocab'
require 'lmptb.lmfeeder'
require 'lmptb.lmutil'
require 'lmptb.layer.init'
-require 'rnn.init'
+require 'tnn.init'
require 'lmptb.lmseqreader'
local LMTrainer = nerv.class('nerv.LMTrainer')
local printf = nerv.printf
+--The bias param update in nerv don't have wcost added
+function nerv.BiasParam:update_by_gradient(gradient)
+ local gconf = self.gconf
+ local l2 = 1 - gconf.lrate * gconf.wcost
+ self:_update_by_gradient(gradient, l2, l2)
+end
+
--Returns: LMResult
function LMTrainer.lm_process_file(global_conf, fn, tnn, do_train)
local reader = nerv.LMSeqReader(global_conf, global_conf.batch_size, global_conf.chunk_size, global_conf.vocab)
diff --git a/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua b/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua
index a5ecce1..c43e567 100644
--- a/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua
+++ b/nerv/examples/lmptb/lmptb/layer/lm_affine_recurrent.lua
@@ -14,7 +14,7 @@ function LMRecurrent:propagate(input, output)
output[1]:copy_fromd(input[1])
if (self.independent == true) then
for i = 1, input[1]:nrow() do
- if (self.gconf.input_word_id[self.id][0][i - 1] == self.break_id) then --here is sentence break
+ if (self.gconf.input_word_id[self.id][i - 1][0] == self.break_id) then --here is sentence break
input[2][i - 1]:fill(0)
end
end
diff --git a/nerv/examples/lmptb/lmptb/lmutil.lua b/nerv/examples/lmptb/lmptb/lmutil.lua
index 821aa94..71e8e17 100644
--- a/nerv/examples/lmptb/lmptb/lmutil.lua
+++ b/nerv/examples/lmptb/lmptb/lmutil.lua
@@ -48,15 +48,15 @@ end
--Returns: nerv.MMatrixInt
--Set the matrix to be ids of the words, id starting at 1, not 0
function Util.set_id(m, list, vocab)
- if (m:ncol() ~= #list or m:nrow() ~= 1) then
+ if (m:nrow() ~= #list or m:ncol() ~= 1) then
nerv.error("nrow of matrix mismatch with list or its col not one")
end
for i = 1, #list, 1 do
--index in matrix starts at 0
if (list[i] ~= vocab.null_token) then
- m[0][i - 1] = vocab:get_word_str(list[i]).id
+ m[i - 1][0] = vocab:get_word_str(list[i]).id
else
- m[0][i - 1] = 0
+ m[i - 1][0] = 0
end
end
return m
diff --git a/nerv/examples/lmptb/tnn/layer_dag_t.lua b/nerv/examples/lmptb/tnn/layer_dag_t.lua
index cd5fba7..ade65cc 100644
--- a/nerv/examples/lmptb/tnn/layer_dag_t.lua
+++ b/nerv/examples/lmptb/tnn/layer_dag_t.lua
@@ -266,7 +266,9 @@ function DAGLayerT:set_inputs(input, t)
end
local layer = self.inputs[i][1]
local port = self.inputs[i][2]
-
+ if layer.inputs[t] == nil then
+ layer.inputs[t] = {}
+ end
layer.inputs[t][port] = input[i]
end
end
@@ -278,6 +280,9 @@ function DAGLayerT:set_outputs(output, t)
end
local layer = self.outputs[i][1]
local port = self.outputs[i][2]
+ if layer.outputs[t] == nil then
+ layer.outputs[t] = {}
+ end
layer.outputs[t][port] = output[i]
end
end
@@ -286,6 +291,9 @@ function DAGLayerT:set_err_inputs(bp_err, t)
for i = 1, #self.dim_out do
local layer = self.outputs[i][1]
local port = self.outputs[i][2]
+ if layer.err_inputs[t] == nil then
+ layer.err_inputs[t] = {}
+ end
layer.err_inputs[t][port] = bp_err[i]
end
end
@@ -294,6 +302,9 @@ function DAGLayerT:set_err_outputs(next_bp_err, t)
for i = 1, #self.dim_in do
local layer = self.inputs[i][1]
local port = self.inputs[i][2]
+ if layer.err_outputs[t] == nil then
+ layer.err_outputs[t] = {}
+ end
layer.err_outputs[t][port] = next_bp_err[i]
end
end
diff --git a/nerv/examples/lmptb/tnn_ptb_main.lua b/nerv/examples/lmptb/tnn_ptb_main.lua
index 66c7317..9156b61 100644
--- a/nerv/examples/lmptb/tnn_ptb_main.lua
+++ b/nerv/examples/lmptb/tnn_ptb_main.lua
@@ -172,7 +172,7 @@ test_fn = data_dir .. '/ptb.test.txt.adds'
vocab_fn = data_dir .. '/vocab'
global_conf = {
- lrate = 1, wcost = 1e-5, momentum = 0,
+ lrate = 1, wcost = 1e-6, momentum = 0,
cumat_type = nerv.CuMatrixFloat,
mmat_type = nerv.MMatrixFloat,
nn_act_default = 0,
@@ -181,7 +181,7 @@ global_conf = {
chunk_size = 15,
batch_size = 10,
max_iter = 35,
- decay_iter = 16,
+ decay_iter = 15,
param_random = function() return (math.random() / 5 - 0.1) end,
train_fn = train_fn,
@@ -267,7 +267,7 @@ else
printf("%s not user setting, all default...\n", global_conf.sche_log_pre)
end
-global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate
+global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' .. global_conf.lrate .. 'wc' .. global_conf.wcost
global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf'
global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak'
global_conf.param_fn = global_conf.work_dir .. "/params"
diff --git a/nerv/examples/lmptb/unfold_ptb_main.lua b/nerv/examples/lmptb/unfold_ptb_main.lua
index 5affadf..eebab2b 100644
--- a/nerv/examples/lmptb/unfold_ptb_main.lua
+++ b/nerv/examples/lmptb/unfold_ptb_main.lua
@@ -174,7 +174,7 @@ function propagateFile(global_conf, dagL, fn, config)
local dagL_input = {}
for i = 1, global_conf.bptt + 1 do
- dagL_input[i] = global_conf.cumat_type(1, global_conf.batch_size) --changed to row vector, debughtx
+ dagL_input[i] = global_conf.cumat_type(global_conf.batch_size, 1) --changed to row vector, debughtx
end
dagL_input[global_conf.bptt + 2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size)
dagL_input[global_conf.bptt + 3] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size())
@@ -210,7 +210,7 @@ function propagateFile(global_conf, dagL, fn, config)
global_conf.timer:toc("dagL-propagate")
hidden_store[tnow] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size)
- hidden_store[tnow]:copy_fromd(sigmoidL_ref.outputs[1])
+ hidden_store[tnow]:copy_fromd(sigmoidL_ref.outputs[1][1])
if (config.do_train == true) then
global_conf.timer:tic("dagL-back_propagate")
@@ -280,7 +280,7 @@ if (set == "ptb") then
cumat_type = nerv.CuMatrixFloat,
mmat_type = nerv.MMatrixFloat,
- hidden_size = 200,
+ hidden_size = 50,
batch_size = 10,
bptt = 6, --train bptt_block's words. could be set to zero
max_iter = 18,
@@ -291,7 +291,7 @@ if (set == "ptb") then
valid_fn = valid_fn,
test_fn = test_fn,
sche_log_pre = "[SCHEDULER]:",
- log_w_num = 100000, --give a message when log_w_num words have been processed
+ log_w_num = 1000, --give a message when log_w_num words have been processed
timer = nerv.Timer()
}
global_conf.work_dir = work_dir_base.."/h"..global_conf.hidden_size.."bp"..global_conf.bptt.."slr"..global_conf.lrate --..os.date("_%bD%dH%H") --comment this for testing
@@ -304,7 +304,7 @@ elseif (set == "test") then
global_conf = {
lrate = 0.1, wcost = 1e-6, momentum = 0,
cumat_type = nerv.CuMatrixFloat,
- mmat_type = nerv.CuMatrixFloat,
+ mmat_type = nerv.MMatrixFloat,
hidden_size = 5,
batch_size = 1,