aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authortxh18 <cloudygooseg@gmail.com>2016-02-05 16:49:24 +0800
committertxh18 <cloudygooseg@gmail.com>2016-02-05 16:49:24 +0800
commit8a6385261a71b2432cd20347286a6eb0166e32b6 (patch)
tree05be1310d5de56156b9168b193a8fc06f94eb13a
parent2fc05a9b3bb28ea8cae66c82b891028cccc40e53 (diff)
made lm sampling code cleaner
-rw-r--r--nerv/examples/lmptb/lm_sampler.lua7
-rw-r--r--nerv/examples/lmptb/sample_grulm_ptb_main.lua122
2 files changed, 72 insertions, 57 deletions
diff --git a/nerv/examples/lmptb/lm_sampler.lua b/nerv/examples/lmptb/lm_sampler.lua
index 2a4f1c3..d194af9 100644
--- a/nerv/examples/lmptb/lm_sampler.lua
+++ b/nerv/examples/lmptb/lm_sampler.lua
@@ -3,18 +3,19 @@ local LMSampler = nerv.class('nerv.LMSampler')
function LMSampler:__init(global_conf)
self.log_pre = "LMSampler"
self.gconf = global_conf
+ self.batch_size = self.gconf.batch_size
+ self.chunk_size = self.gconf.chunk_size --largest sample sentence length
self.vocab = self.gconf.vocab
self.sen_end_token = self.vocab.sen_end_token
self.sen_end_id = self.vocab:get_word_str(self.sen_end_token).id
end
function LMSampler:load_dagL(dagL)
- self.batch_size = self.gconf.batch_size
- self.chunk_size = self.gconf.chunk_size
-
+
nerv.printf("%s loading dagL\n", self.log_pre)
self.dagL = dagL
+ self.dagL:init(self.batch_size)
self.dagL_inputs = {}
self.dagL_inputs[1] = global_conf.cumat_type(global_conf.batch_size, 1)
diff --git a/nerv/examples/lmptb/sample_grulm_ptb_main.lua b/nerv/examples/lmptb/sample_grulm_ptb_main.lua
index 30dfe26..42a5787 100644
--- a/nerv/examples/lmptb/sample_grulm_ptb_main.lua
+++ b/nerv/examples/lmptb/sample_grulm_ptb_main.lua
@@ -134,10 +134,39 @@ function prepare_tnn(global_conf, layerRepo)
return tnn
end
-function prepare_dagL(global_conf, layerRepo)
- nerv.printf("%s Generate and initing dagL ...\n", global_conf.sche_log_pre)
+function load_net_tnn(global_conf, fn)
+ prepare_parameters(global_conf, fn)
+ local layerRepo = prepare_layers(global_conf)
+ local tnn = prepare_tnn(global_conf, layerRepo)
+ return tnn
+end
+
+function prepare_sampler(sm_conf)
+ sm_conf.pr = nerv.ParamRepo()
+ sm_conf.pr:import({sm_conf.fn_to_sample}, nil, sm_conf)
+
+ local layers = {
+ ["nerv.GRULayerT"] = {
+ ["gruL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size, sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size}, ["pr"] = sm_conf.pr}},
+ },
+ ["nerv.DropoutLayerT"] = {
+ ["dropoutL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size}}},
+ },
+ ["nerv.SelectLinearLayer"] = {
+ ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {sm_conf.hidden_size}, ["vocab"] = sm_conf.vocab, ["pr"] = sm_conf.pr}},
+ },
+ ["nerv.CombinerLayer"] = {
+ ["combinerL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size, sm_conf.hidden_size}, ["lambda"] = {1}}},
+ },
+ ["nerv.AffineLayer"] = {
+ ["outputL"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.vocab:size()}, ["pr"] = sm_conf.pr}},
+ },
+ ["nerv.SoftmaxCELayerT"] = {
+ ["softmaxL"] = {{}, {["dim_in"] = {sm_conf.vocab:size(), sm_conf.vocab:size()}, ["dim_out"] = {1}}},
+ },
+ }
+ local layerRepo = nerv.LayerRepo(layers, sm_conf.pr, sm_conf)
- --input: input_w, input_w, ... input_w_now, last_activation
local connections_t = {
["<input>[1]"] = "selectL1[1]",
@@ -151,48 +180,19 @@ function prepare_dagL(global_conf, layerRepo)
["combinerL1[2]"] = "<output>[2]",
}
- if global_conf.layer_num > 1 then
+ if sm_conf.layer_num > 1 then
nerv.error("multiple layer is currently not supported(not hard to implement though)")
end
- --[[
- for l = 2, global_conf.layer_num do
- table.insert(connections_t, {"dropoutL"..(l-1).."[1]", "gruL"..l.."[1]", 0})
- table.insert(connections_t, {"gruL"..l.."[1]", "combinerL"..l.."[1]", 0})
- table.insert(connections_t, {"combinerL"..l.."[1]", "gruL"..l.."[2]", 1})
- table.insert(connections_t, {"combinerL"..l.."[2]", "dropoutL"..l.."[1]", 0})
- end
- ]]--
- --[[
- printf("%s printing DAG connections:\n", global_conf.sche_log_pre)
- for key, value in pairs(connections_t) do
- printf("\t%s->%s\n", key, value)
- end
- ]]--
-
- local dagL = nerv.DAGLayerT("dagL", global_conf, {["dim_in"] = {1, global_conf.hidden_size},
- ["dim_out"] = {global_conf.vocab:size(), global_conf.hidden_size}, ["sub_layers"] = layerRepo,
+ local dagL = nerv.DAGLayerT("dagL", sm_conf, {["dim_in"] = {1, sm_conf.hidden_size},
+ ["dim_out"] = {sm_conf.vocab:size(), sm_conf.hidden_size}, ["sub_layers"] = layerRepo,
["connections"] = connections_t
})
+
+ local sampler = nerv.LMSampler(sm_conf)
+ sampler:load_dagL(dagL)
- dagL:init(global_conf.batch_size)
-
- nerv.printf("%s Initing DAGL end.\n", global_conf.sche_log_pre)
- return dagL
-end
-
-function load_net_tnn(global_conf, fn)
- prepare_parameters(global_conf, fn)
- local layerRepo = prepare_layers(global_conf)
- local tnn = prepare_tnn(global_conf, layerRepo)
- return tnn
-end
-
-function load_net_dagL(global_conf, fn)
- prepare_parameters(global_conf, fn)
- local layerRepo = prepare_layers(global_conf)
- local dagL = prepare_dagL(global_conf, layerRepo)
- return dagL
+ return sampler
end
local train_fn, valid_fn, test_fn
@@ -240,6 +240,23 @@ global_conf = {
fn_to_sample = root_dir .. '/ptb/EXP-nerv/grulm_v1.0h300l1ch15ba32slr0.15wc1e-05dr0.5/params.final',
}
+sm_conf = {
+ cumat_type = nerv.CuMatrixFloat,
+ mmat_type = nerv.MMatrixFloat,
+ nn_act_default = 0,
+
+ hidden_size = 300,
+ layer_num = 1,
+ batch_size = 32,
+ chunk_size = 85, --largest sample sentence length
+ max_iter = 35,
+ max_sen_len = 90,
+ sche_log_pre = "[SAMPLER_S]:",
+
+ timer = global_conf.timer,
+ fn_to_sample = root_dir .. '/ptb/EXP-nerv/grulm_v1.0h300l1ch15ba32slr0.15wc1e-05dr0.5/params.final',
+}
+
elseif (set == "msr_sc") then
data_dir = '/home/slhome/txh18/workspace/sentenceCompletion/DATA_PV2'
@@ -276,15 +293,13 @@ global_conf = {
elseif (set == "twitter") then
-data_dir = root_dir .. '/twitter_new/DATA'
-train_fn = data_dir .. '/twitter.choose2.adds'
-valid_fn = data_dir .. '/twitter.valid.adds'
-test_fn = data_dir .. '/comm.test.choose-ppl.adds'
-vocab_fn = data_dir .. '/twitter.choose.train.vocab'
-
---qdata_dir = root_dir .. '/ptb/questionGen/gen'
-
-global_conf = {
+ data_dir = root_dir .. '/twitter_new/DATA'
+ train_fn = data_dir .. '/twitter.choose2.adds'
+ valid_fn = data_dir .. '/twitter.valid.adds'
+ test_fn = data_dir .. '/comm.test.choose-ppl.adds'
+ vocab_fn = data_dir .. '/twitter.choose.train.vocab'
+ --qdata_dir = root_dir .. '/ptb/questionGen/gen'
+ global_conf = {
lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5,
cumat_type = nerv.CuMatrixFloat,
mmat_type = nerv.MMatrixFloat,
@@ -309,7 +324,7 @@ global_conf = {
log_w_num = 40000, --give a message when log_w_num words have been processed
timer = nerv.Timer(),
work_dir_base = root_dir .. '/twitter_new/EXP-nerv/grulm_v1.0'
-}
+ }
else
@@ -385,8 +400,8 @@ nerv.LMUtil.wait(2)
]]--
----------------printing options---------------------------------
-nerv.printf("%s printing global_conf...\n", global_conf.sche_log_pre)
-for id, value in pairs(global_conf) do
+nerv.printf("%s printing sm_conf...\n", sm_conf.sche_log_pre)
+for id, value in pairs(sm_conf) do
nerv.printf("%s:\t%s\n", id, tostring(value))
end
nerv.LMUtil.wait(2)
@@ -405,6 +420,7 @@ math.randomseed(1)
local vocab = nerv.LMVocab()
global_conf["vocab"] = vocab
+sm_conf["vocab"] = global_conf.vocab
nerv.printf("%s building vocab...\n", global_conf.sche_log_pre)
global_conf.vocab:build_file(global_conf.vocab_fn, false)
ppl_rec = {}
@@ -421,9 +437,7 @@ end --if commands["test"]
if commands["sampling"] == 1 then
nerv.printf("===SAMPLE===\n")
global_conf.sche_log_pre = "[SCHEDULER SAMPLING]:"
- local dagL = load_net_dagL(global_conf, global_conf.fn_to_sample)
- local sampler = nerv.LMSampler(global_conf)
- sampler:load_dagL(dagL)
+ local sampler = prepare_sampler(sm_conf)
for k = 1, 1 do
local res = sampler:lm_sample_rnn_dagL(10, {})
for i = 1, #res do