diff options
Diffstat (limited to 'nerv/layer')
-rw-r--r-- | nerv/layer/dropout.lua | 77 | ||||
-rw-r--r-- | nerv/layer/elem_mul.lua | 14 | ||||
-rw-r--r-- | nerv/layer/gru.lua | 128 | ||||
-rw-r--r-- | nerv/layer/init.lua | 6 | ||||
-rw-r--r-- | nerv/layer/lstm.lua | 140 | ||||
-rw-r--r-- | nerv/layer/lstm_gate.lua | 77 |
6 files changed, 434 insertions, 8 deletions
diff --git a/nerv/layer/dropout.lua b/nerv/layer/dropout.lua new file mode 100644 index 0000000..42660cc --- /dev/null +++ b/nerv/layer/dropout.lua @@ -0,0 +1,77 @@ +local DropoutLayer = nerv.class("nerv.DropoutLayer", "nerv.Layer") + +function DropoutLayer:__init(id, global_conf, layer_conf) + self.id = id + self.gconf = global_conf + if self.gconf.use_cpu then + self.mat_type = self.gconf.mmat_type + else + self.mat_type = self.gconf.cumat_type + end + self.rate = layer_conf.dropout_rate or global_conf.dropout_rate + if self.rate == nil then + nerv.warning("[DropoutLayer:propagate] dropout rate is not set") + end + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self:check_dim_len(1, 1) -- two inputs: nn output and label +end + +function DropoutLayer:init(batch_size, chunk_size) + if self.dim_in[1] ~= self.dim_out[1] then + nerv.error("mismatching dimensions of input and output") + end + if chunk_size == nil then + chunk_size = 1 + end + self.mask = {} + for t = 1, chunk_size do + self.mask[t] = self.mat_type(batch_size, self.dim_in[1]) + end +end + +function DropoutLayer:batch_resize(batch_size, chunk_size) + if chunk_size == nil then + chunk_size = 1 + end + for t = 1, chunk_size do + if self.mask[t] == nil or self.mask[t]:nrow() ~= batch_size then + self.mask[t] = self.mat_type(batch_size, self.dim_in[1]) + end + end +end + +function DropoutLayer:propagate(input, output, t) + if t == nil then + t = 1 + end + if self.rate then + self.mask[t]:rand_uniform() + -- since we will lose a portion of the actvations, we multiply the + -- activations by 1 / (1 - rate) to compensate + self.mask[t]:thres_mask(self.mask[t], self.rate, + 0, 1 / (1.0 - self.rate)) + output[1]:mul_elem(input[1], self.mask[t]) + else + output[1]:copy_fromd(input[1]) + end +end + +function DropoutLayer:update(bp_err, input, output, t) + -- no params, therefore do nothing +end + +function DropoutLayer:back_propagate(bp_err, next_bp_err, input, output, t) + if t == nil then + t = 1 + end + if self.rate then + next_bp_err[1]:mul_elem(bp_err[1], self.mask[t]) + else + next_bp_err[1]:copy_fromd(bp_err[1]) + end +end + +function DropoutLayer:get_params() + return nerv.ParamRepo({}) +end diff --git a/nerv/layer/elem_mul.lua b/nerv/layer/elem_mul.lua index c809d3e..fe80a3f 100644 --- a/nerv/layer/elem_mul.lua +++ b/nerv/layer/elem_mul.lua @@ -5,19 +5,19 @@ function ElemMulLayer:__init(id, global_conf, layer_conf) self.dim_in = layer_conf.dim_in self.dim_out = layer_conf.dim_out self.gconf = global_conf - - self:check_dim_len(2, 1) -- Element-multiply input[1] and input[2] + -- element-wise multiplication of input[1] and input[2] + self:check_dim_len(2, 1) end function ElemMulLayer:init(batch_size) - if self.dim_in[1] ~= self.dim_in[2] or + if self.dim_in[1] ~= self.dim_in[2] or self.dim_in[1] ~= self.dim_out[1] then - nerv.error("dim_in and dim_out mismatch for ElemMulLayer") + nerv.error("mismatching dimensions of input and output") end end function ElemMulLayer:batch_resize(batch_size) - --do nothing + -- do nothing end function ElemMulLayer:propagate(input, output) @@ -25,12 +25,12 @@ function ElemMulLayer:propagate(input, output) end function ElemMulLayer:back_propagate(bp_err, next_bp_err, input, output) - next_bp_err[1]:mul_elem(bp_err[1], input[2]) + next_bp_err[1]:mul_elem(bp_err[1], input[2]) next_bp_err[2]:mul_elem(bp_err[1], input[1]) end function ElemMulLayer:update(bp_err, input, output) - --do nothing + -- do nothing end function ElemMulLayer:get_params() diff --git a/nerv/layer/gru.lua b/nerv/layer/gru.lua new file mode 100644 index 0000000..2162e28 --- /dev/null +++ b/nerv/layer/gru.lua @@ -0,0 +1,128 @@ +local GRULayer = nerv.class('nerv.GRULayer', 'nerv.Layer') + +function GRULayer:__init(id, global_conf, layer_conf) + -- input1:x + -- input2:h + -- input3:c (h^~) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + + if self.dim_in[2] ~= self.dim_out[1] then + nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)", + self.dim_in[2], self.dim_out[1]) + end + + -- prepare a DAGLayer to hold the lstm structure + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo() + end + + local function ap(str) + return self.id .. '.' .. str + end + local din1, din2 = self.dim_in[1], self.dim_in[2] + local dout1 = self.dim_out[1] + local layers = { + ["nerv.CombinerLayer"] = { + [ap("inputXDup")] = {{}, {dim_in = {din1}, + dim_out = {din1, din1, din1}, + lambda = {1}}}, + [ap("inputHDup")] = {{}, {dim_in = {din2}, + dim_out = {din2, din2, din2, din2, din2}, + lambda = {1}}}, + [ap("updateGDup")] = {{}, {dim_in = {din2}, + dim_out = {din2, din2}, + lambda = {1}}}, + [ap("updateMergeL")] = {{}, {dim_in = {din2, din2, din2}, + dim_out = {dout1}, + lambda = {1, -1, 1}}}, + }, + ["nerv.AffineLayer"] = { + [ap("mainAffineL")] = {{}, {dim_in = {din1, din2}, + dim_out = {dout1}, + pr = pr}}, + }, + ["nerv.TanhLayer"] = { + [ap("mainTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}}, + }, + ["nerv.GateFLayer"] = { + [ap("resetGateL")] = {{}, {dim_in = {din1, din2}, + dim_out = {din2}, + pr = pr}}, + [ap("updateGateL")] = {{}, {dim_in = {din1, din2}, + dim_out = {din2}, + pr = pr}}, + }, + ["nerv.ElemMulLayer"] = { + [ap("resetGMulL")] = {{}, {dim_in = {din2, din2}, dim_out = {din2}}}, + [ap("updateGMulCL")] = {{}, {dim_in = {din2, din2}, dim_out = {din2}}}, + [ap("updateGMulHL")] = {{}, {dim_in = {din2, din2}, dim_out = {din2}}}, + }, + } + + local layerRepo = nerv.LayerRepo(layers, pr, global_conf) + + local connections = { + ["<input>[1]"] = ap("inputXDup[1]"), + ["<input>[2]"] = ap("inputHDup[1]"), + + [ap("inputXDup[1]")] = ap("resetGateL[1]"), + [ap("inputHDup[1]")] = ap("resetGateL[2]"), + [ap("inputXDup[2]")] = ap("updateGateL[1]"), + [ap("inputHDup[2]")] = ap("updateGateL[2]"), + [ap("updateGateL[1]")] = ap("updateGDup[1]"), + + [ap("resetGateL[1]")] = ap("resetGMulL[1]"), + [ap("inputHDup[3]")] = ap("resetGMulL[2]"), + + [ap("inputXDup[3]")] = ap("mainAffineL[1]"), + [ap("resetGMulL[1]")] = ap("mainAffineL[2]"), + [ap("mainAffineL[1]")] = ap("mainTanhL[1]"), + + [ap("updateGDup[1]")] = ap("updateGMulHL[1]"), + [ap("inputHDup[4]")] = ap("updateGMulHL[2]"), + [ap("updateGDup[2]")] = ap("updateGMulCL[1]"), + [ap("mainTanhL[1]")] = ap("updateGMulCL[2]"), + + [ap("inputHDup[5]")] = ap("updateMergeL[1]"), + [ap("updateGMulHL[1]")] = ap("updateMergeL[2]"), + [ap("updateGMulCL[1]")] = ap("updateMergeL[3]"), + + [ap("updateMergeL[1]")] = "<output>[1]", + } + + self.dag = nerv.DAGLayer(self.id, global_conf, + {dim_in = self.dim_in, + dim_out = self.dim_out, + sub_layers = layerRepo, + connections = connections}) + + self:check_dim_len(2, 1) -- x, h and h +end + +function GRULayer:init(batch_size, chunk_size) + self.dag:init(batch_size, chunk_size) +end + +function GRULayer:batch_resize(batch_size, chunk_size) + self.dag:batch_resize(batch_size, chunk_size) +end + +function GRULayer:update(bp_err, input, output, t) + self.dag:update(bp_err, input, output, t) +end + +function GRULayer:propagate(input, output, t) + self.dag:propagate(input, output, t) +end + +function GRULayer:back_propagate(bp_err, next_bp_err, input, output, t) + self.dag:back_propagate(bp_err, next_bp_err, input, output, t) +end + +function GRULayer:get_params() + return self.dag:get_params() +end diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 43c2250..6b7a1d7 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -90,7 +90,7 @@ function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim) end end nerv.info("param [%s] of layer [%s] is not found in `layer_conf` or `layer_conf.pr`, " .. - "switch to auto-generate.", pid_list_str, self.id) + "switch to auto-generate", pid_list_str, self.id) local pid_g = self.id .. '_' .. pid_list[1] p = p_type(pid_g, gconf) p.trans = gconf.cumat_type(unpack(p_dim)) @@ -113,3 +113,7 @@ nerv.include('affine_recurrent.lua') nerv.include('softmax.lua') nerv.include('elem_mul.lua') nerv.include('gate_fff.lua') +nerv.include('lstm.lua') +nerv.include('lstm_gate.lua') +nerv.include('dropout.lua') +nerv.include('gru.lua') diff --git a/nerv/layer/lstm.lua b/nerv/layer/lstm.lua new file mode 100644 index 0000000..500bd87 --- /dev/null +++ b/nerv/layer/lstm.lua @@ -0,0 +1,140 @@ +local LSTMLayer = nerv.class('nerv.LSTMLayer', 'nerv.Layer') + +function LSTMLayer:__init(id, global_conf, layer_conf) + -- input1:x + -- input2:h + -- input3:c + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + + -- prepare a DAGLayer to hold the lstm structure + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo() + end + + local function ap(str) + return self.id .. '.' .. str + end + local din1, din2, din3 = self.dim_in[1], self.dim_in[2], self.dim_in[3] + local dout1, dout2, dout3 = self.dim_out[1], self.dim_out[2], self.dim_out[3] + local layers = { + ["nerv.CombinerLayer"] = { + [ap("inputXDup")] = {{}, {dim_in = {din1}, + dim_out = {din1, din1, din1, din1}, + lambda = {1}}}, + + [ap("inputHDup")] = {{}, {dim_in = {din2}, + dim_out = {din2, din2, din2, din2}, + lambda = {1}}}, + + [ap("inputCDup")] = {{}, {dim_in = {din3}, + dim_out = {din3, din3, din3}, + lambda = {1}}}, + + [ap("mainCDup")] = {{}, {dim_in = {din3, din3}, + dim_out = {din3, din3, din3}, + lambda = {1, 1}}}, + }, + ["nerv.AffineLayer"] = { + [ap("mainAffineL")] = {{}, {dim_in = {din1, din2}, + dim_out = {dout1}, + pr = pr}}, + }, + ["nerv.TanhLayer"] = { + [ap("mainTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}}, + [ap("outputTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}}, + }, + ["nerv.LSTMGateLayer"] = { + [ap("forgetGateL")] = {{}, {dim_in = {din1, din2, din3}, + dim_out = {din3}, pr = pr}}, + [ap("inputGateL")] = {{}, {dim_in = {din1, din2, din3}, + dim_out = {din3}, pr = pr}}, + [ap("outputGateL")] = {{}, {dim_in = {din1, din2, din3}, + dim_out = {din3}, pr = pr}}, + + }, + ["nerv.ElemMulLayer"] = { + [ap("inputGMulL")] = {{}, {dim_in = {din3, din3}, + dim_out = {din3}}}, + [ap("forgetGMulL")] = {{}, {dim_in = {din3, din3}, + dim_out = {din3}}}, + [ap("outputGMulL")] = {{}, {dim_in = {din3, din3}, + dim_out = {din3}}}, + }, + } + + local layerRepo = nerv.LayerRepo(layers, pr, global_conf) + + local connections = { + ["<input>[1]"] = ap("inputXDup[1]"), + ["<input>[2]"] = ap("inputHDup[1]"), + ["<input>[3]"] = ap("inputCDup[1]"), + + [ap("inputXDup[1]")] = ap("mainAffineL[1]"), + [ap("inputHDup[1]")] = ap("mainAffineL[2]"), + [ap("mainAffineL[1]")] = ap("mainTanhL[1]"), + + [ap("inputXDup[2]")] = ap("inputGateL[1]"), + [ap("inputHDup[2]")] = ap("inputGateL[2]"), + [ap("inputCDup[1]")] = ap("inputGateL[3]"), + + [ap("inputXDup[3]")] = ap("forgetGateL[1]"), + [ap("inputHDup[3]")] = ap("forgetGateL[2]"), + [ap("inputCDup[2]")] = ap("forgetGateL[3]"), + + [ap("mainTanhL[1]")] = ap("inputGMulL[1]"), + [ap("inputGateL[1]")] = ap("inputGMulL[2]"), + + [ap("inputCDup[3]")] = ap("forgetGMulL[1]"), + [ap("forgetGateL[1]")] = ap("forgetGMulL[2]"), + + [ap("inputGMulL[1]")] = ap("mainCDup[1]"), + [ap("forgetGMulL[1]")] = ap("mainCDup[2]"), + + [ap("inputXDup[4]")] = ap("outputGateL[1]"), + [ap("inputHDup[4]")] = ap("outputGateL[2]"), + [ap("mainCDup[3]")] = ap("outputGateL[3]"), + + [ap("mainCDup[2]")] = "<output>[2]", + [ap("mainCDup[1]")] = ap("outputTanhL[1]"), + + [ap("outputTanhL[1]")] = ap("outputGMulL[1]"), + [ap("outputGateL[1]")] = ap("outputGMulL[2]"), + + [ap("outputGMulL[1]")] = "<output>[1]", + } + self.dag = nerv.DAGLayer(self.id, global_conf, + {dim_in = self.dim_in, + dim_out = self.dim_out, + sub_layers = layerRepo, + connections = connections}) + + self:check_dim_len(3, 2) -- x, h, c and h, c +end + +function LSTMLayer:init(batch_size, chunk_size) + self.dag:init(batch_size, chunk_size) +end + +function LSTMLayer:batch_resize(batch_size, chunk_size) + self.dag:batch_resize(batch_size, chunk_size) +end + +function LSTMLayer:update(bp_err, input, output, t) + self.dag:update(bp_err, input, output, t) +end + +function LSTMLayer:propagate(input, output, t) + self.dag:propagate(input, output, t) +end + +function LSTMLayer:back_propagate(bp_err, next_bp_err, input, output, t) + self.dag:back_propagate(bp_err, next_bp_err, input, output, t) +end + +function LSTMLayer:get_params() + return self.dag:get_params() +end diff --git a/nerv/layer/lstm_gate.lua b/nerv/layer/lstm_gate.lua new file mode 100644 index 0000000..1963eba --- /dev/null +++ b/nerv/layer/lstm_gate.lua @@ -0,0 +1,77 @@ +local LSTMGateLayer = nerv.class('nerv.LSTMGateLayer', 'nerv.Layer') +-- NOTE: this is a full matrix gate + +function LSTMGateLayer:__init(id, global_conf, layer_conf) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + + for i = 1, #self.dim_in do + self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf, + nerv.LinearTransParam, + {self.dim_in[i], self.dim_out[1]}) + end + self.bp = self:find_param("bp", layer_conf, global_conf, + nerv.BiasParam, {1, self.dim_out[1]}) + + self:check_dim_len(-1, 1) --accept multiple inputs +end + +function LSTMGateLayer:init(batch_size) + for i = 1, #self.dim_in do + if self["ltp" .. i].trans:ncol() ~= self.bp.trans:ncol() then + nerv.error("mismatching dimensions of linear transform and bias paramter") + end + if self.dim_in[i] ~= self["ltp" .. i].trans:nrow() then + nerv.error("mismatching dimensions of linear transform parameter and input") + end + self["ltp"..i]:train_init() + end + + if self.dim_out[1] ~= self.ltp1.trans:ncol() then + nerv.error("mismatching dimensions of linear transform parameter and output") + end + self.bp:train_init() + self.err_bakm = self.gconf.cumat_type(batch_size, self.dim_out[1]) +end + +function LSTMGateLayer:batch_resize(batch_size) + if self.err_m:nrow() ~= batch_size then + self.err_bakm = self.gconf.cumat_type(batch_size, self.dim_out[1]) + end +end + +function LSTMGateLayer:propagate(input, output) + -- apply linear transform + output[1]:mul(input[1], self.ltp1.trans, 1.0, 0.0, 'N', 'N') + for i = 2, #self.dim_in do + output[1]:mul(input[i], self["ltp" .. i].trans, 1.0, 1.0, 'N', 'N') + end + -- add bias + output[1]:add_row(self.bp.trans, 1.0) + output[1]:sigmoid(output[1]) +end + +function LSTMGateLayer:back_propagate(bp_err, next_bp_err, input, output) + self.err_bakm:sigmoid_grad(bp_err[1], output[1]) + for i = 1, #self.dim_in do + next_bp_err[i]:mul(self.err_bakm, self["ltp" .. i].trans, 1.0, 0.0, 'N', 'T') + end +end + +function LSTMGateLayer:update(bp_err, input, output) + self.err_bakm:sigmoid_grad(bp_err[1], output[1]) + for i = 1, #self.dim_in do + self["ltp" .. i]:update_by_err_input(self.err_bakm, input[i]) + end + self.bp:update_by_gradient(self.err_bakm:colsum()) +end + +function LSTMGateLayer:get_params() + local pr = nerv.ParamRepo({self.bp}) + for i = 1, #self.dim_in do + pr:add(self["ltp" .. i].id, self["ltp" .. i]) + end + return pr +end |