diff options
-rw-r--r-- | nerv/Makefile | 2 | ||||
-rw-r--r-- | nerv/layer/dropout.lua | 77 | ||||
-rw-r--r-- | nerv/layer/elem_mul.lua | 14 | ||||
-rw-r--r-- | nerv/layer/gru.lua | 128 | ||||
-rw-r--r-- | nerv/layer/init.lua | 6 | ||||
-rw-r--r-- | nerv/layer/lstm.lua | 140 | ||||
-rw-r--r-- | nerv/layer/lstm_gate.lua | 77 | ||||
-rw-r--r-- | nerv/nn/layer_dag.lua | 146 |
8 files changed, 526 insertions, 64 deletions
diff --git a/nerv/Makefile b/nerv/Makefile index a472cfc..ee4b9c0 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -32,7 +32,7 @@ LIBS := $(INST_LIBDIR)/libnerv.so $(LIB_PATH)/libnervcore.so $(LIB_PATH)/libluaT LUA_LIBS := matrix/init.lua io/init.lua init.lua \ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/tanh.lua layer/softmax_ce.lua layer/softmax.lua \ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua layer/affine_recurrent.lua \ - layer/elem_mul.lua layer/gate_fff.lua \ + layer/elem_mul.lua layer/gate_fff.lua layer/lstm.lua layer/lstm_gate.lua layer/dropout.lua layer/gru.lua \ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \ io/sgd_buffer.lua \ tnn/init.lua tnn/layer_dag_t.lua tnn/sutil.lua tnn/tnn.lua \ diff --git a/nerv/layer/dropout.lua b/nerv/layer/dropout.lua new file mode 100644 index 0000000..42660cc --- /dev/null +++ b/nerv/layer/dropout.lua @@ -0,0 +1,77 @@ +local DropoutLayer = nerv.class("nerv.DropoutLayer", "nerv.Layer") + +function DropoutLayer:__init(id, global_conf, layer_conf) + self.id = id + self.gconf = global_conf + if self.gconf.use_cpu then + self.mat_type = self.gconf.mmat_type + else + self.mat_type = self.gconf.cumat_type + end + self.rate = layer_conf.dropout_rate or global_conf.dropout_rate + if self.rate == nil then + nerv.warning("[DropoutLayer:propagate] dropout rate is not set") + end + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self:check_dim_len(1, 1) -- two inputs: nn output and label +end + +function DropoutLayer:init(batch_size, chunk_size) + if self.dim_in[1] ~= self.dim_out[1] then + nerv.error("mismatching dimensions of input and output") + end + if chunk_size == nil then + chunk_size = 1 + end + self.mask = {} + for t = 1, chunk_size do + self.mask[t] = self.mat_type(batch_size, self.dim_in[1]) + end +end + +function DropoutLayer:batch_resize(batch_size, chunk_size) + if chunk_size == nil then + chunk_size = 1 + end + for t = 1, chunk_size do + if self.mask[t] == nil or self.mask[t]:nrow() ~= batch_size then + self.mask[t] = self.mat_type(batch_size, self.dim_in[1]) + end + end +end + +function DropoutLayer:propagate(input, output, t) + if t == nil then + t = 1 + end + if self.rate then + self.mask[t]:rand_uniform() + -- since we will lose a portion of the actvations, we multiply the + -- activations by 1 / (1 - rate) to compensate + self.mask[t]:thres_mask(self.mask[t], self.rate, + 0, 1 / (1.0 - self.rate)) + output[1]:mul_elem(input[1], self.mask[t]) + else + output[1]:copy_fromd(input[1]) + end +end + +function DropoutLayer:update(bp_err, input, output, t) + -- no params, therefore do nothing +end + +function DropoutLayer:back_propagate(bp_err, next_bp_err, input, output, t) + if t == nil then + t = 1 + end + if self.rate then + next_bp_err[1]:mul_elem(bp_err[1], self.mask[t]) + else + next_bp_err[1]:copy_fromd(bp_err[1]) + end +end + +function DropoutLayer:get_params() + return nerv.ParamRepo({}) +end diff --git a/nerv/layer/elem_mul.lua b/nerv/layer/elem_mul.lua index c809d3e..fe80a3f 100644 --- a/nerv/layer/elem_mul.lua +++ b/nerv/layer/elem_mul.lua @@ -5,19 +5,19 @@ function ElemMulLayer:__init(id, global_conf, layer_conf) self.dim_in = layer_conf.dim_in self.dim_out = layer_conf.dim_out self.gconf = global_conf - - self:check_dim_len(2, 1) -- Element-multiply input[1] and input[2] + -- element-wise multiplication of input[1] and input[2] + self:check_dim_len(2, 1) end function ElemMulLayer:init(batch_size) - if self.dim_in[1] ~= self.dim_in[2] or + if self.dim_in[1] ~= self.dim_in[2] or self.dim_in[1] ~= self.dim_out[1] then - nerv.error("dim_in and dim_out mismatch for ElemMulLayer") + nerv.error("mismatching dimensions of input and output") end end function ElemMulLayer:batch_resize(batch_size) - --do nothing + -- do nothing end function ElemMulLayer:propagate(input, output) @@ -25,12 +25,12 @@ function ElemMulLayer:propagate(input, output) end function ElemMulLayer:back_propagate(bp_err, next_bp_err, input, output) - next_bp_err[1]:mul_elem(bp_err[1], input[2]) + next_bp_err[1]:mul_elem(bp_err[1], input[2]) next_bp_err[2]:mul_elem(bp_err[1], input[1]) end function ElemMulLayer:update(bp_err, input, output) - --do nothing + -- do nothing end function ElemMulLayer:get_params() diff --git a/nerv/layer/gru.lua b/nerv/layer/gru.lua new file mode 100644 index 0000000..2162e28 --- /dev/null +++ b/nerv/layer/gru.lua @@ -0,0 +1,128 @@ +local GRULayer = nerv.class('nerv.GRULayer', 'nerv.Layer') + +function GRULayer:__init(id, global_conf, layer_conf) + -- input1:x + -- input2:h + -- input3:c (h^~) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + + if self.dim_in[2] ~= self.dim_out[1] then + nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)", + self.dim_in[2], self.dim_out[1]) + end + + -- prepare a DAGLayer to hold the lstm structure + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo() + end + + local function ap(str) + return self.id .. '.' .. str + end + local din1, din2 = self.dim_in[1], self.dim_in[2] + local dout1 = self.dim_out[1] + local layers = { + ["nerv.CombinerLayer"] = { + [ap("inputXDup")] = {{}, {dim_in = {din1}, + dim_out = {din1, din1, din1}, + lambda = {1}}}, + [ap("inputHDup")] = {{}, {dim_in = {din2}, + dim_out = {din2, din2, din2, din2, din2}, + lambda = {1}}}, + [ap("updateGDup")] = {{}, {dim_in = {din2}, + dim_out = {din2, din2}, + lambda = {1}}}, + [ap("updateMergeL")] = {{}, {dim_in = {din2, din2, din2}, + dim_out = {dout1}, + lambda = {1, -1, 1}}}, + }, + ["nerv.AffineLayer"] = { + [ap("mainAffineL")] = {{}, {dim_in = {din1, din2}, + dim_out = {dout1}, + pr = pr}}, + }, + ["nerv.TanhLayer"] = { + [ap("mainTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}}, + }, + ["nerv.GateFLayer"] = { + [ap("resetGateL")] = {{}, {dim_in = {din1, din2}, + dim_out = {din2}, + pr = pr}}, + [ap("updateGateL")] = {{}, {dim_in = {din1, din2}, + dim_out = {din2}, + pr = pr}}, + }, + ["nerv.ElemMulLayer"] = { + [ap("resetGMulL")] = {{}, {dim_in = {din2, din2}, dim_out = {din2}}}, + [ap("updateGMulCL")] = {{}, {dim_in = {din2, din2}, dim_out = {din2}}}, + [ap("updateGMulHL")] = {{}, {dim_in = {din2, din2}, dim_out = {din2}}}, + }, + } + + local layerRepo = nerv.LayerRepo(layers, pr, global_conf) + + local connections = { + ["<input>[1]"] = ap("inputXDup[1]"), + ["<input>[2]"] = ap("inputHDup[1]"), + + [ap("inputXDup[1]")] = ap("resetGateL[1]"), + [ap("inputHDup[1]")] = ap("resetGateL[2]"), + [ap("inputXDup[2]")] = ap("updateGateL[1]"), + [ap("inputHDup[2]")] = ap("updateGateL[2]"), + [ap("updateGateL[1]")] = ap("updateGDup[1]"), + + [ap("resetGateL[1]")] = ap("resetGMulL[1]"), + [ap("inputHDup[3]")] = ap("resetGMulL[2]"), + + [ap("inputXDup[3]")] = ap("mainAffineL[1]"), + [ap("resetGMulL[1]")] = ap("mainAffineL[2]"), + [ap("mainAffineL[1]")] = ap("mainTanhL[1]"), + + [ap("updateGDup[1]")] = ap("updateGMulHL[1]"), + [ap("inputHDup[4]")] = ap("updateGMulHL[2]"), + [ap("updateGDup[2]")] = ap("updateGMulCL[1]"), + [ap("mainTanhL[1]")] = ap("updateGMulCL[2]"), + + [ap("inputHDup[5]")] = ap("updateMergeL[1]"), + [ap("updateGMulHL[1]")] = ap("updateMergeL[2]"), + [ap("updateGMulCL[1]")] = ap("updateMergeL[3]"), + + [ap("updateMergeL[1]")] = "<output>[1]", + } + + self.dag = nerv.DAGLayer(self.id, global_conf, + {dim_in = self.dim_in, + dim_out = self.dim_out, + sub_layers = layerRepo, + connections = connections}) + + self:check_dim_len(2, 1) -- x, h and h +end + +function GRULayer:init(batch_size, chunk_size) + self.dag:init(batch_size, chunk_size) +end + +function GRULayer:batch_resize(batch_size, chunk_size) + self.dag:batch_resize(batch_size, chunk_size) +end + +function GRULayer:update(bp_err, input, output, t) + self.dag:update(bp_err, input, output, t) +end + +function GRULayer:propagate(input, output, t) + self.dag:propagate(input, output, t) +end + +function GRULayer:back_propagate(bp_err, next_bp_err, input, output, t) + self.dag:back_propagate(bp_err, next_bp_err, input, output, t) +end + +function GRULayer:get_params() + return self.dag:get_params() +end diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 43c2250..6b7a1d7 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -90,7 +90,7 @@ function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim) end end nerv.info("param [%s] of layer [%s] is not found in `layer_conf` or `layer_conf.pr`, " .. - "switch to auto-generate.", pid_list_str, self.id) + "switch to auto-generate", pid_list_str, self.id) local pid_g = self.id .. '_' .. pid_list[1] p = p_type(pid_g, gconf) p.trans = gconf.cumat_type(unpack(p_dim)) @@ -113,3 +113,7 @@ nerv.include('affine_recurrent.lua') nerv.include('softmax.lua') nerv.include('elem_mul.lua') nerv.include('gate_fff.lua') +nerv.include('lstm.lua') +nerv.include('lstm_gate.lua') +nerv.include('dropout.lua') +nerv.include('gru.lua') diff --git a/nerv/layer/lstm.lua b/nerv/layer/lstm.lua new file mode 100644 index 0000000..500bd87 --- /dev/null +++ b/nerv/layer/lstm.lua @@ -0,0 +1,140 @@ +local LSTMLayer = nerv.class('nerv.LSTMLayer', 'nerv.Layer') + +function LSTMLayer:__init(id, global_conf, layer_conf) + -- input1:x + -- input2:h + -- input3:c + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + + -- prepare a DAGLayer to hold the lstm structure + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo() + end + + local function ap(str) + return self.id .. '.' .. str + end + local din1, din2, din3 = self.dim_in[1], self.dim_in[2], self.dim_in[3] + local dout1, dout2, dout3 = self.dim_out[1], self.dim_out[2], self.dim_out[3] + local layers = { + ["nerv.CombinerLayer"] = { + [ap("inputXDup")] = {{}, {dim_in = {din1}, + dim_out = {din1, din1, din1, din1}, + lambda = {1}}}, + + [ap("inputHDup")] = {{}, {dim_in = {din2}, + dim_out = {din2, din2, din2, din2}, + lambda = {1}}}, + + [ap("inputCDup")] = {{}, {dim_in = {din3}, + dim_out = {din3, din3, din3}, + lambda = {1}}}, + + [ap("mainCDup")] = {{}, {dim_in = {din3, din3}, + dim_out = {din3, din3, din3}, + lambda = {1, 1}}}, + }, + ["nerv.AffineLayer"] = { + [ap("mainAffineL")] = {{}, {dim_in = {din1, din2}, + dim_out = {dout1}, + pr = pr}}, + }, + ["nerv.TanhLayer"] = { + [ap("mainTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}}, + [ap("outputTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}}, + }, + ["nerv.LSTMGateLayer"] = { + [ap("forgetGateL")] = {{}, {dim_in = {din1, din2, din3}, + dim_out = {din3}, pr = pr}}, + [ap("inputGateL")] = {{}, {dim_in = {din1, din2, din3}, + dim_out = {din3}, pr = pr}}, + [ap("outputGateL")] = {{}, {dim_in = {din1, din2, din3}, + dim_out = {din3}, pr = pr}}, + + }, + ["nerv.ElemMulLayer"] = { + [ap("inputGMulL")] = {{}, {dim_in = {din3, din3}, + dim_out = {din3}}}, + [ap("forgetGMulL")] = {{}, {dim_in = {din3, din3}, + dim_out = {din3}}}, + [ap("outputGMulL")] = {{}, {dim_in = {din3, din3}, + dim_out = {din3}}}, + }, + } + + local layerRepo = nerv.LayerRepo(layers, pr, global_conf) + + local connections = { + ["<input>[1]"] = ap("inputXDup[1]"), + ["<input>[2]"] = ap("inputHDup[1]"), + ["<input>[3]"] = ap("inputCDup[1]"), + + [ap("inputXDup[1]")] = ap("mainAffineL[1]"), + [ap("inputHDup[1]")] = ap("mainAffineL[2]"), + [ap("mainAffineL[1]")] = ap("mainTanhL[1]"), + + [ap("inputXDup[2]")] = ap("inputGateL[1]"), + [ap("inputHDup[2]")] = ap("inputGateL[2]"), + [ap("inputCDup[1]")] = ap("inputGateL[3]"), + + [ap("inputXDup[3]")] = ap("forgetGateL[1]"), + [ap("inputHDup[3]")] = ap("forgetGateL[2]"), + [ap("inputCDup[2]")] = ap("forgetGateL[3]"), + + [ap("mainTanhL[1]")] = ap("inputGMulL[1]"), + [ap("inputGateL[1]")] = ap("inputGMulL[2]"), + + [ap("inputCDup[3]")] = ap("forgetGMulL[1]"), + [ap("forgetGateL[1]")] = ap("forgetGMulL[2]"), + + [ap("inputGMulL[1]")] = ap("mainCDup[1]"), + [ap("forgetGMulL[1]")] = ap("mainCDup[2]"), + + [ap("inputXDup[4]")] = ap("outputGateL[1]"), + [ap("inputHDup[4]")] = ap("outputGateL[2]"), + [ap("mainCDup[3]")] = ap("outputGateL[3]"), + + [ap("mainCDup[2]")] = "<output>[2]", + [ap("mainCDup[1]")] = ap("outputTanhL[1]"), + + [ap("outputTanhL[1]")] = ap("outputGMulL[1]"), + [ap("outputGateL[1]")] = ap("outputGMulL[2]"), + + [ap("outputGMulL[1]")] = "<output>[1]", + } + self.dag = nerv.DAGLayer(self.id, global_conf, + {dim_in = self.dim_in, + dim_out = self.dim_out, + sub_layers = layerRepo, + connections = connections}) + + self:check_dim_len(3, 2) -- x, h, c and h, c +end + +function LSTMLayer:init(batch_size, chunk_size) + self.dag:init(batch_size, chunk_size) +end + +function LSTMLayer:batch_resize(batch_size, chunk_size) + self.dag:batch_resize(batch_size, chunk_size) +end + +function LSTMLayer:update(bp_err, input, output, t) + self.dag:update(bp_err, input, output, t) +end + +function LSTMLayer:propagate(input, output, t) + self.dag:propagate(input, output, t) +end + +function LSTMLayer:back_propagate(bp_err, next_bp_err, input, output, t) + self.dag:back_propagate(bp_err, next_bp_err, input, output, t) +end + +function LSTMLayer:get_params() + return self.dag:get_params() +end diff --git a/nerv/layer/lstm_gate.lua b/nerv/layer/lstm_gate.lua new file mode 100644 index 0000000..1963eba --- /dev/null +++ b/nerv/layer/lstm_gate.lua @@ -0,0 +1,77 @@ +local LSTMGateLayer = nerv.class('nerv.LSTMGateLayer', 'nerv.Layer') +-- NOTE: this is a full matrix gate + +function LSTMGateLayer:__init(id, global_conf, layer_conf) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + + for i = 1, #self.dim_in do + self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf, + nerv.LinearTransParam, + {self.dim_in[i], self.dim_out[1]}) + end + self.bp = self:find_param("bp", layer_conf, global_conf, + nerv.BiasParam, {1, self.dim_out[1]}) + + self:check_dim_len(-1, 1) --accept multiple inputs +end + +function LSTMGateLayer:init(batch_size) + for i = 1, #self.dim_in do + if self["ltp" .. i].trans:ncol() ~= self.bp.trans:ncol() then + nerv.error("mismatching dimensions of linear transform and bias paramter") + end + if self.dim_in[i] ~= self["ltp" .. i].trans:nrow() then + nerv.error("mismatching dimensions of linear transform parameter and input") + end + self["ltp"..i]:train_init() + end + + if self.dim_out[1] ~= self.ltp1.trans:ncol() then + nerv.error("mismatching dimensions of linear transform parameter and output") + end + self.bp:train_init() + self.err_bakm = self.gconf.cumat_type(batch_size, self.dim_out[1]) +end + +function LSTMGateLayer:batch_resize(batch_size) + if self.err_m:nrow() ~= batch_size then + self.err_bakm = self.gconf.cumat_type(batch_size, self.dim_out[1]) + end +end + +function LSTMGateLayer:propagate(input, output) + -- apply linear transform + output[1]:mul(input[1], self.ltp1.trans, 1.0, 0.0, 'N', 'N') + for i = 2, #self.dim_in do + output[1]:mul(input[i], self["ltp" .. i].trans, 1.0, 1.0, 'N', 'N') + end + -- add bias + output[1]:add_row(self.bp.trans, 1.0) + output[1]:sigmoid(output[1]) +end + +function LSTMGateLayer:back_propagate(bp_err, next_bp_err, input, output) + self.err_bakm:sigmoid_grad(bp_err[1], output[1]) + for i = 1, #self.dim_in do + next_bp_err[i]:mul(self.err_bakm, self["ltp" .. i].trans, 1.0, 0.0, 'N', 'T') + end +end + +function LSTMGateLayer:update(bp_err, input, output) + self.err_bakm:sigmoid_grad(bp_err[1], output[1]) + for i = 1, #self.dim_in do + self["ltp" .. i]:update_by_err_input(self.err_bakm, input[i]) + end + self.bp:update_by_gradient(self.err_bakm:colsum()) +end + +function LSTMGateLayer:get_params() + local pr = nerv.ParamRepo({self.bp}) + for i = 1, #self.dim_in do + pr:add(self["ltp" .. i].id, self["ltp" .. i]) + end + return pr +end diff --git a/nerv/nn/layer_dag.lua b/nerv/nn/layer_dag.lua index 6ad7ae9..6896878 100644 --- a/nerv/nn/layer_dag.lua +++ b/nerv/nn/layer_dag.lua @@ -2,7 +2,7 @@ local DAGLayer = nerv.class("nerv.DAGLayer", "nerv.Layer") local function parse_id(str) local id, port, _ - _, _, id, port = string.find(str, "([a-zA-Z0-9_]+)%[([0-9]+)%]") + _, _, id, port = string.find(str, "([a-zA-Z0-9_.]+)%[([0-9]+)%]") if id == nil or port == nil then _, _, id, port = string.find(str, "(.+)%[([0-9]+)%]") if not (id == "<input>" or id == "<output>") then @@ -38,6 +38,12 @@ local function discover(id, layers, layer_repo) return ref end +local function touch_list_by_idx(list, idx) + if list[idx] == nil then + list[idx] = {} + end +end + function DAGLayer:__init(id, global_conf, layer_conf) local layers = {} local inputs = {} @@ -51,11 +57,17 @@ function DAGLayer:__init(id, global_conf, layer_conf) local ref_from = discover(id_from, layers, layer_conf.sub_layers) local ref_to = discover(id_to, layers, layer_conf.sub_layers) local input_dim, output_dim, _ - if ref_from and ref_from.outputs[port_from] ~= nil then - nerv.error("%s has already been attached", from) + if ref_from then + touch_list_by_idx(ref_from.outputs, 1) + if ref_from.outputs[1][port_from] ~= nil then + nerv.error("%s has already been attached", from) + end end - if ref_to and ref_to.inputs[port_to] ~= nil then - nerv.error("%s has already been attached", to) + if ref_to then + touch_list_by_idx(ref_to.inputs, 1) + if ref_to.inputs[1][port_to] ~= nil then + nerv.error("%s has already been attached", to) + end end if id_from == "<input>" then input_dim, _ = ref_to.layer:get_dim() @@ -63,14 +75,14 @@ function DAGLayer:__init(id, global_conf, layer_conf) nerv.error("mismatching data dimension between %s and %s", from, to) end inputs[port_from] = {ref_to, port_to} - ref_to.inputs[port_to] = inputs -- just a place holder + ref_to.inputs[1][port_to] = inputs -- just a place holder elseif id_to == "<output>" then _, output_dim = ref_from.layer:get_dim() if output_dim[port_from] ~= dim_out[port_to] then nerv.error("mismatching data dimension between %s and %s", from, to) end outputs[port_to] = {ref_from, port_from} - ref_from.outputs[port_from] = outputs -- just a place holder + ref_from.outputs[1][port_from] = outputs -- just a place holder else _, output_dim = ref_from.layer:get_dim() input_dim, _ = ref_to.layer:get_dim() @@ -104,7 +116,7 @@ function DAGLayer:__init(id, global_conf, layer_conf) cur.visited = true l = l + 1 for _, nl in pairs(cur.next_layers) do - nl.in_deg = nl.in_deg - 1 + nl.in_deg = nl.in_deg - 1 if nl.in_deg == 0 then table.insert(queue, nl) r = r + 1 @@ -138,7 +150,10 @@ function DAGLayer:__init(id, global_conf, layer_conf) end end -function DAGLayer:init(batch_size) +function DAGLayer:init(batch_size, chunk_size) + if chunk_size == nil then + chunk_size = 1 + end for i, conn in ipairs(self.parsed_conn) do local _, output_dim local ref_from, port_from, ref_to, port_to @@ -149,28 +164,35 @@ function DAGLayer:init(batch_size) if output_dim[port_from] > 0 then dim = output_dim[port_from] end - local mid = self.mat_type(batch_size, dim) - local err_mid = mid:create() - ref_from.outputs[port_from] = mid - ref_to.inputs[port_to] = mid + for t = 1, chunk_size do + local mid = self.mat_type(batch_size, dim) + local err_mid = mid:create() + touch_list_by_idx(ref_to.inputs, t) + touch_list_by_idx(ref_from.outputs, t) + touch_list_by_idx(ref_from.err_inputs, t) + touch_list_by_idx(ref_to.err_outputs, t) + + ref_from.outputs[t][port_from] = mid + ref_to.inputs[t][port_to] = mid - ref_from.err_inputs[port_from] = err_mid - ref_to.err_outputs[port_to] = err_mid + ref_from.err_inputs[t][port_from] = err_mid + ref_to.err_outputs[t][port_to] = err_mid + end end for id, ref in pairs(self.layers) do for i = 1, ref.input_len do - if ref.inputs[i] == nil then + if ref.inputs[1][i] == nil then nerv.error("dangling input port %d of layer %s", i, id) end end for i = 1, ref.output_len do - if ref.outputs[i] == nil then + if ref.outputs[1][i] == nil then nerv.error("dangling output port %d of layer %s", i, id) end end -- initialize sub layers - ref.layer:init(batch_size) + ref.layer:init(batch_size, chunk_size) end for i = 1, #self.dim_in do if self.inputs[i] == nil then @@ -184,8 +206,10 @@ function DAGLayer:init(batch_size) end end -function DAGLayer:batch_resize(batch_size) - self.gconf.batch_size = batch_size +function DAGLayer:batch_resize(batch_size, chunk_size) + if chunk_size == nil then + chunk_size = 1 + end for i, conn in ipairs(self.parsed_conn) do local _, output_dim @@ -194,93 +218,105 @@ function DAGLayer:batch_resize(batch_size) ref_to, port_to = unpack(conn[2]) _, output_dim = ref_from.layer:get_dim() - if ref_from.outputs[port_from]:nrow() ~= batch_size and output_dim[port_from] > 0 then - local mid = self.mat_type(batch_size, output_dim[port_from]) - local err_mid = mid:create() + if ref_from.outputs[1][port_from]:nrow() ~= batch_size + and output_dim[port_from] > 0 then + for t = 1, chunk_size do + local mid = self.mat_type(batch_size, output_dim[port_from]) + local err_mid = mid:create() - ref_from.outputs[port_from] = mid - ref_to.inputs[port_to] = mid + ref_from.outputs[t][port_from] = mid + ref_to.inputs[t][port_to] = mid - ref_from.err_inputs[port_from] = err_mid - ref_to.err_outputs[port_to] = err_mid + ref_from.err_inputs[t][port_from] = err_mid + ref_to.err_outputs[t][port_to] = err_mid + end end end for id, ref in pairs(self.layers) do - ref.layer:batch_resize(batch_size) + ref.layer:batch_resize(batch_size, chunk_size) end collectgarbage("collect") end -function DAGLayer:set_inputs(input) +function DAGLayer:set_inputs(input, t) for i = 1, #self.dim_in do if input[i] == nil then nerv.error("some input is not provided"); end local layer = self.inputs[i][1] local port = self.inputs[i][2] - layer.inputs[port] = input[i] + touch_list_by_idx(layer.inputs, t) + layer.inputs[t][port] = input[i] end end -function DAGLayer:set_outputs(output) +function DAGLayer:set_outputs(output, t) for i = 1, #self.dim_out do if output[i] == nil then nerv.error("some output is not provided"); end local layer = self.outputs[i][1] local port = self.outputs[i][2] - layer.outputs[port] = output[i] + touch_list_by_idx(layer.outputs, t) + layer.outputs[t][port] = output[i] end end -function DAGLayer:set_err_inputs(bp_err) +function DAGLayer:set_err_inputs(bp_err, t) for i = 1, #self.dim_out do local layer = self.outputs[i][1] local port = self.outputs[i][2] - layer.err_inputs[port] = bp_err[i] + touch_list_by_idx(layer.err_inputs, t) + layer.err_inputs[t][port] = bp_err[i] end end -function DAGLayer:set_err_outputs(next_bp_err) +function DAGLayer:set_err_outputs(next_bp_err, t) for i = 1, #self.dim_in do local layer = self.inputs[i][1] local port = self.inputs[i][2] - layer.err_outputs[port] = next_bp_err[i] + touch_list_by_idx(layer.err_outputs, t) + layer.err_outputs[t][port] = next_bp_err[i] end end -function DAGLayer:update(bp_err, input, output) - self:set_err_inputs(bp_err) - self:set_inputs(input) - self:set_outputs(output) - -- print("update") +function DAGLayer:update(bp_err, input, output, t) + if t == nil then + t = 1 + end + self:set_err_inputs(bp_err, t) + self:set_inputs(input, t) + self:set_outputs(output, t) for id, ref in pairs(self.queue) do - -- print(ref.layer.id) - ref.layer:update(ref.err_inputs, ref.inputs, ref.outputs) + ref.layer:update(ref.err_inputs[t], ref.inputs[t], ref.outputs[t], t) end end -function DAGLayer:propagate(input, output) - self:set_inputs(input) - self:set_outputs(output) +function DAGLayer:propagate(input, output, t) + if t == nil then + t = 1 + end + self:set_inputs(input, t) + self:set_outputs(output, t) local ret = false for i = 1, #self.queue do local ref = self.queue[i] - -- print(ref.layer.id) - ret = ref.layer:propagate(ref.inputs, ref.outputs) + ret = ref.layer:propagate(ref.inputs[t], ref.outputs[t], t) end return ret end -function DAGLayer:back_propagate(bp_err, next_bp_err, input, output) - self:set_err_outputs(next_bp_err) - self:set_err_inputs(bp_err) - self:set_inputs(input) - self:set_outputs(output) +function DAGLayer:back_propagate(bp_err, next_bp_err, input, output, t) + if t == nil then + t = 1 + end + self:set_err_outputs(next_bp_err, t) + self:set_err_inputs(bp_err, t) + self:set_inputs(input, t) + self:set_outputs(output, t) for i = #self.queue, 1, -1 do local ref = self.queue[i] - -- print(ref.layer.id) - ref.layer:back_propagate(ref.err_inputs, ref.err_outputs, ref.inputs, ref.outputs) + ref.layer:back_propagate(ref.err_inputs[t], ref.err_outputs[t], ref.inputs[t], ref.outputs[t], t) end end |