require 'lmptb.lmvocab'
require 'lmptb.lmfeeder'
require 'lmptb.lmutil'
nerv.include('lmptb/layer/init.lua')
--[[global function rename]]--
printf = nerv.printf
--[[global function rename ends]]--
--global_conf: table
--first_time: bool
--Returns: a ParamRepo
function prepare_parameters(global_conf, first_time)
printf("%s preparing parameters...\n", global_conf.sche_log_pre)
if (first_time) then
ltp_ih = nerv.LinearTransParam("ltp_ih", global_conf)
ltp_ih.trans = global_conf.cumat_type(global_conf.vocab:size(), global_conf.hidden_size)
ltp_ih.trans:generate(global_conf.param_random)
ltp_hh = nerv.LinearTransParam("ltp_hh", global_conf)
ltp_hh.trans = global_conf.cumat_type(global_conf.hidden_size, global_conf.hidden_size)
ltp_hh.trans:generate(global_conf.param_random)
ltp_ho = nerv.LinearTransParam("ltp_ho", global_conf)
ltp_ho.trans = global_conf.cumat_type(global_conf.hidden_size, global_conf.vocab:size())
ltp_ho.trans:generate(global_conf.param_random)
bp_h = nerv.BiasParam("bp_h", global_conf)
bp_h.trans = global_conf.cumat_type(1, global_conf.hidden_size)
bp_h.trans:generate(global_conf.param_random)
bp_o = nerv.BiasParam("bp_o", global_conf)
bp_o.trans = global_conf.cumat_type(1, global_conf.vocab:size())
bp_o.trans:generate(global_conf.param_random)
local f = nerv.ChunkFile(global_conf.param_fn, 'w')
f:write_chunk(ltp_ih)
f:write_chunk(ltp_hh)
f:write_chunk(ltp_ho)
f:write_chunk(bp_h)
f:write_chunk(bp_o)
f:close()
end
local paramRepo = nerv.ParamRepo()
paramRepo:import({global_conf.param_fn}, nil, global_conf)
printf("%s preparing parameters end.\n", global_conf.sche_log_pre)
return paramRepo
end
--global_conf: table
--Returns: nerv.LayerRepo
function prepare_layers(global_conf, paramRepo)
printf("%s preparing layers...\n", global_conf.sche_log_pre)
local recurrentLconfig = {{["bp"] = "bp_h", ["ltp_ih"] = "ltp_ih", ["ltp_hh"] = "ltp_hh"}, {["dim_in"] = {global_conf.vocab:size(), global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["break_id"] = global_conf.vocab:get_sen_entry().id, ["independent"] = global_conf.independent}}
local layers = {
["nerv.LMAffineRecurrentLayer"] = {
["recurrentL1"] = recurrentLconfig,
},
["nerv.SigmoidLayer"] = {
["sigmoidL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}
},
["nerv.AffineLayer"] = {
["outputL"] = {{["ltp"] = "ltp_ho", ["bp"] = "bp_o"}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.vocab:size()}}},
},
["nerv.SoftmaxCELayer"] = {
["softmaxL"] = {{}, {["dim_in"] = {global_conf.vocab:size(), global_conf.vocab:size()}, ["dim_out"] = {1}}},
},
}
printf("%s adding %d bptt layers...\n", global_conf.sche_log_pre, global_conf.bptt)
for i = 1, global_conf.bptt do
layers["nerv.LMAffineRecurrentLayer"]["recurrentL" .. (i + 1)] = recurrentLconfig
layers["nerv.SigmoidLayer"]["sigmoidL" .. (i + 1)] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}
end
local layerRepo = nerv.LayerRepo(layers, paramRepo, global_conf)
printf("%s preparing layers end.\n", global_conf.sche_log_pre)
return layerRepo
end
--global_conf: table
--layerRepo: nerv.LayerRepo
--Returns: a nerv.DAGLayer
function prepare_dagLayer(global_conf, layerRepo)
printf("%s Initing daglayer ...\n", global_conf.sche_log_pre)
--input: input_w, input_w, ... input_w_now, last_activation
local dim_in_t = {}
for i = 1, global_conf.bptt + 1 do dim_in_t[i] = global_conf.vocab:size() end
dim_in_t[global_conf.bptt + 2] = global_conf.hidden_size
dim_in_t[global_conf.bptt + 3] = global_conf.vocab:size()
--[[ softmax
| \
ouptut i(bptt+3)
|
recurrentL(bptt+1)... recurrentL2-recurrentL1
/ | | |
i(bptt+2) i(bptt+1) i2 i1
]]--
local connections_t = {
["recurrentL1[1]"] = "sigmoidL1[1]",
["sigmoidL1[1]"] = "outputL[1]",
["outputL[1]"] = "softmaxL[1]",
["softmaxL[1]"] = "<output>[1]"
}
for i = 1, global_conf.bptt, 1 do
connections_t["<input>["..i.."]"] = "recurrentL"..i.."[1]"
connections_t["recurrentL"..(i+1).."[1]"] = "sigmoidL"..(i+1).."[1]"
connections_t["sigmoidL"..(i+1).."[1]"] = "recurrentL"..i.."[2]"
end
connections_t["<input>["..(global_conf.bptt+1).."]"] = "recurrentL"..(global_conf.bptt+1).."[1]"
connections_t["<input>["..(global_conf.bptt+2).."]"] = "recurrentL"..(global_conf.bptt+1).."[2]"
connections_t["<input>["..(global_conf.bptt+3).."]"] = "softmaxL[2]"
printf("%s printing DAG connections:\n", global_conf.sche_log_pre)
for key, value in pairs(connections_t) do
printf("\t%s->%s\n", key, value)
end
local dagL = nerv.DAGLayer("dagL", global_conf, {["dim_in"] = dim_in_t, ["dim_out"] = {1}, ["sub_layers"] = layerRepo,
["connections"] = connections_t,
})
dagL:init(global_conf.batch_size)
printf("%s Initing DAGLayer end.\n", global_conf.sche_log_pre)
return dagL
end
--Returns: table
function create_dag_input(global_conf, token_store, hidden_store, tnow)
local dagL_input = {}
for i = 1, global_conf.bptt + 1 do
dagL_input[i] = nerv.LMUtil.create_onehot(token_store[tnow - i + 1], global_conf.vocab, global_conf.cumat_type)
end
dagL_input[global_conf.bptt + 2] = hidden_store[tnow - global_conf.bptt - 1]
dagL_input[global_conf.bptt + 3] = nerv.LMUtil.create_onehot(token_store[tnow + 1], global_conf.vocab, global_conf.cumat_type)
return dagL_input
end
--global_conf: table
--dagL: nerv.DAGLayer
--fn: string
--config: table
--Returns: table, result
function propagateFile(global_conf, dagL, fn, config)
printf("%s Begining doing on %s...\n", global_conf.sche_log_pre, fn)
if (config.do_train == true) then printf("%s do_train in config is true.\n", global_conf.sche_log_pre) end
local feeder = nerv.LMFeeder(global_conf, global_conf.batch_size, global_conf.vocab)
feeder:open_file(fn)
local tnow = 1
local token_store = {}
local hidden_store = {}
local sigmoidL_ref = dagL.layers["sigmoidL1"]
token_store[tnow] = feeder:get_batch()
for i = 1, global_conf.bptt + 1 do
hidden_store[tnow - i] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size)
hidden_store[tnow - i]:fill(0)
token_store[tnow - i] = {}
for j = 1, global_conf.batch_size do token_store[tnow - i][j] = global_conf.vocab.null_token end
end
local dagL_output = {global_conf.cumat_type(global_conf.batch_size, 1)}
local dagL_err = {nil} --{global_conf.cumat_type(global_conf.batch_size, 1)}
local dagL_input_err = {}
for i = 1, global_conf.bptt + 1 do
dagL_input_err[i] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size())
end
dagL_input_err[global_conf.bptt + 2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size)
dagL_input_err[global_conf.bptt + 3] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size())
local result = nerv.LMResult(global_conf, global_conf.vocab)
result:init("rnn")
while (1) do
token_store[tnow + 1] = feeder:get_batch() --The next word(to predict)
if (token_store[tnow + 1] == nil) then break end
local dagL_input = create_dag_input(global_conf, token_store, hidden_store, tnow)
--dagL:propagate(dagL_input, dagL_output)
hidden_store[tnow] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size)
hidden_store[tnow]:copy_fromd(sigmoidL_ref.outputs[1])
if (config.do_train == true) then
--dagL:back_propagate(dagL_err, dagL_input_err, dagL_input, dagL_output)
--dagL:update(dagL_err, dagL_input, dagL_output)
end
for i = 1, global_conf.batch_size, 1 do
if (token_store[tnow + 1][i] ~= global_conf.vocab.null_token) then
result:add("rnn", token_store[tnow + 1][i], math.exp(dagL_output[1][i - 1][0]))
if (config.report_word == true) then
printf("%s %s: <stream %d> <prob %f>\n", global_conf.sche_log_pre, token_store[tnow + 1][i], i, math.exp(dagL_output[1][i - 1][0]))
end
end
if (result["rnn"].cn_w % global_conf.log_w_num == 0) then
printf("%s %d words processed.\n", global_conf.sche_log_pre, result["rnn"].cn_w)
end
end
token_store[tnow - 2 - global_conf.bptt] = nil
hidden_store[tnow - 2 - global_conf.bptt] =