diff options
-rw-r--r-- | nerv/examples/lmptb/lm_sampler.lua | 7 | ||||
-rw-r--r-- | nerv/examples/lmptb/sample_grulm_ptb_main.lua | 122 |
2 files changed, 72 insertions, 57 deletions
diff --git a/nerv/examples/lmptb/lm_sampler.lua b/nerv/examples/lmptb/lm_sampler.lua index 2a4f1c3..d194af9 100644 --- a/nerv/examples/lmptb/lm_sampler.lua +++ b/nerv/examples/lmptb/lm_sampler.lua @@ -3,18 +3,19 @@ local LMSampler = nerv.class('nerv.LMSampler') function LMSampler:__init(global_conf) self.log_pre = "LMSampler" self.gconf = global_conf + self.batch_size = self.gconf.batch_size + self.chunk_size = self.gconf.chunk_size --largest sample sentence length self.vocab = self.gconf.vocab self.sen_end_token = self.vocab.sen_end_token self.sen_end_id = self.vocab:get_word_str(self.sen_end_token).id end function LMSampler:load_dagL(dagL) - self.batch_size = self.gconf.batch_size - self.chunk_size = self.gconf.chunk_size - + nerv.printf("%s loading dagL\n", self.log_pre) self.dagL = dagL + self.dagL:init(self.batch_size) self.dagL_inputs = {} self.dagL_inputs[1] = global_conf.cumat_type(global_conf.batch_size, 1) diff --git a/nerv/examples/lmptb/sample_grulm_ptb_main.lua b/nerv/examples/lmptb/sample_grulm_ptb_main.lua index 30dfe26..42a5787 100644 --- a/nerv/examples/lmptb/sample_grulm_ptb_main.lua +++ b/nerv/examples/lmptb/sample_grulm_ptb_main.lua @@ -134,10 +134,39 @@ function prepare_tnn(global_conf, layerRepo) return tnn end -function prepare_dagL(global_conf, layerRepo) - nerv.printf("%s Generate and initing dagL ...\n", global_conf.sche_log_pre) +function load_net_tnn(global_conf, fn) + prepare_parameters(global_conf, fn) + local layerRepo = prepare_layers(global_conf) + local tnn = prepare_tnn(global_conf, layerRepo) + return tnn +end + +function prepare_sampler(sm_conf) + sm_conf.pr = nerv.ParamRepo() + sm_conf.pr:import({sm_conf.fn_to_sample}, nil, sm_conf) + + local layers = { + ["nerv.GRULayerT"] = { + ["gruL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size, sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size}, ["pr"] = sm_conf.pr}}, + }, + ["nerv.DropoutLayerT"] = { + ["dropoutL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size}}}, + }, + ["nerv.SelectLinearLayer"] = { + ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {sm_conf.hidden_size}, ["vocab"] = sm_conf.vocab, ["pr"] = sm_conf.pr}}, + }, + ["nerv.CombinerLayer"] = { + ["combinerL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size, sm_conf.hidden_size}, ["lambda"] = {1}}}, + }, + ["nerv.AffineLayer"] = { + ["outputL"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.vocab:size()}, ["pr"] = sm_conf.pr}}, + }, + ["nerv.SoftmaxCELayerT"] = { + ["softmaxL"] = {{}, {["dim_in"] = {sm_conf.vocab:size(), sm_conf.vocab:size()}, ["dim_out"] = {1}}}, + }, + } + local layerRepo = nerv.LayerRepo(layers, sm_conf.pr, sm_conf) - --input: input_w, input_w, ... input_w_now, last_activation local connections_t = { ["<input>[1]"] = "selectL1[1]", @@ -151,48 +180,19 @@ function prepare_dagL(global_conf, layerRepo) ["combinerL1[2]"] = "<output>[2]", } - if global_conf.layer_num > 1 then + if sm_conf.layer_num > 1 then nerv.error("multiple layer is currently not supported(not hard to implement though)") end - --[[ - for l = 2, global_conf.layer_num do - table.insert(connections_t, {"dropoutL"..(l-1).."[1]", "gruL"..l.."[1]", 0}) - table.insert(connections_t, {"gruL"..l.."[1]", "combinerL"..l.."[1]", 0}) - table.insert(connections_t, {"combinerL"..l.."[1]", "gruL"..l.."[2]", 1}) - table.insert(connections_t, {"combinerL"..l.."[2]", "dropoutL"..l.."[1]", 0}) - end - ]]-- - --[[ - printf("%s printing DAG connections:\n", global_conf.sche_log_pre) - for key, value in pairs(connections_t) do - printf("\t%s->%s\n", key, value) - end - ]]-- - - local dagL = nerv.DAGLayerT("dagL", global_conf, {["dim_in"] = {1, global_conf.hidden_size}, - ["dim_out"] = {global_conf.vocab:size(), global_conf.hidden_size}, ["sub_layers"] = layerRepo, + local dagL = nerv.DAGLayerT("dagL", sm_conf, {["dim_in"] = {1, sm_conf.hidden_size}, + ["dim_out"] = {sm_conf.vocab:size(), sm_conf.hidden_size}, ["sub_layers"] = layerRepo, ["connections"] = connections_t }) + + local sampler = nerv.LMSampler(sm_conf) + sampler:load_dagL(dagL) - dagL:init(global_conf.batch_size) - - nerv.printf("%s Initing DAGL end.\n", global_conf.sche_log_pre) - return dagL -end - -function load_net_tnn(global_conf, fn) - prepare_parameters(global_conf, fn) - local layerRepo = prepare_layers(global_conf) - local tnn = prepare_tnn(global_conf, layerRepo) - return tnn -end - -function load_net_dagL(global_conf, fn) - prepare_parameters(global_conf, fn) - local layerRepo = prepare_layers(global_conf) - local dagL = prepare_dagL(global_conf, layerRepo) - return dagL + return sampler end local train_fn, valid_fn, test_fn @@ -240,6 +240,23 @@ global_conf = { fn_to_sample = root_dir .. '/ptb/EXP-nerv/grulm_v1.0h300l1ch15ba32slr0.15wc1e-05dr0.5/params.final', } +sm_conf = { + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + nn_act_default = 0, + + hidden_size = 300, + layer_num = 1, + batch_size = 32, + chunk_size = 85, --largest sample sentence length + max_iter = 35, + max_sen_len = 90, + sche_log_pre = "[SAMPLER_S]:", + + timer = global_conf.timer, + fn_to_sample = root_dir .. '/ptb/EXP-nerv/grulm_v1.0h300l1ch15ba32slr0.15wc1e-05dr0.5/params.final', +} + elseif (set == "msr_sc") then data_dir = '/home/slhome/txh18/workspace/sentenceCompletion/DATA_PV2' @@ -276,15 +293,13 @@ global_conf = { elseif (set == "twitter") then -data_dir = root_dir .. '/twitter_new/DATA' -train_fn = data_dir .. '/twitter.choose2.adds' -valid_fn = data_dir .. '/twitter.valid.adds' -test_fn = data_dir .. '/comm.test.choose-ppl.adds' -vocab_fn = data_dir .. '/twitter.choose.train.vocab' - ---qdata_dir = root_dir .. '/ptb/questionGen/gen' - -global_conf = { + data_dir = root_dir .. '/twitter_new/DATA' + train_fn = data_dir .. '/twitter.choose2.adds' + valid_fn = data_dir .. '/twitter.valid.adds' + test_fn = data_dir .. '/comm.test.choose-ppl.adds' + vocab_fn = data_dir .. '/twitter.choose.train.vocab' + --qdata_dir = root_dir .. '/ptb/questionGen/gen' + global_conf = { lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5, cumat_type = nerv.CuMatrixFloat, mmat_type = nerv.MMatrixFloat, @@ -309,7 +324,7 @@ global_conf = { log_w_num = 40000, --give a message when log_w_num words have been processed timer = nerv.Timer(), work_dir_base = root_dir .. '/twitter_new/EXP-nerv/grulm_v1.0' -} + } else @@ -385,8 +400,8 @@ nerv.LMUtil.wait(2) ]]-- ----------------printing options--------------------------------- -nerv.printf("%s printing global_conf...\n", global_conf.sche_log_pre) -for id, value in pairs(global_conf) do +nerv.printf("%s printing sm_conf...\n", sm_conf.sche_log_pre) +for id, value in pairs(sm_conf) do nerv.printf("%s:\t%s\n", id, tostring(value)) end nerv.LMUtil.wait(2) @@ -405,6 +420,7 @@ math.randomseed(1) local vocab = nerv.LMVocab() global_conf["vocab"] = vocab +sm_conf["vocab"] = global_conf.vocab nerv.printf("%s building vocab...\n", global_conf.sche_log_pre) global_conf.vocab:build_file(global_conf.vocab_fn, false) ppl_rec = {} @@ -421,9 +437,7 @@ end --if commands["test"] if commands["sampling"] == 1 then nerv.printf("===SAMPLE===\n") global_conf.sche_log_pre = "[SCHEDULER SAMPLING]:" - local dagL = load_net_dagL(global_conf, global_conf.fn_to_sample) - local sampler = nerv.LMSampler(global_conf) - sampler:load_dagL(dagL) + local sampler = prepare_sampler(sm_conf) for k = 1, 1 do local res = sampler:lm_sample_rnn_dagL(10, {}) for i = 1, #res do |