summaryrefslogtreecommitdiff
path: root/nerv/layer
diff options
context:
space:
mode:
Diffstat (limited to 'nerv/layer')
-rw-r--r--nerv/layer/dropout.lua77
-rw-r--r--nerv/layer/elem_mul.lua14
-rw-r--r--nerv/layer/gru.lua128
-rw-r--r--nerv/layer/init.lua6
-rw-r--r--nerv/layer/lstm.lua140
-rw-r--r--nerv/layer/lstm_gate.lua77
6 files changed, 434 insertions, 8 deletions
diff --git a/nerv/layer/dropout.lua b/nerv/layer/dropout.lua
new file mode 100644
index 0000000..42660cc
--- /dev/null
+++ b/nerv/layer/dropout.lua
@@ -0,0 +1,77 @@
+local DropoutLayer = nerv.class("nerv.DropoutLayer", "nerv.Layer")
+
+function DropoutLayer:__init(id, global_conf, layer_conf)
+ self.id = id
+ self.gconf = global_conf
+ if self.gconf.use_cpu then
+ self.mat_type = self.gconf.mmat_type
+ else
+ self.mat_type = self.gconf.cumat_type
+ end
+ self.rate = layer_conf.dropout_rate or global_conf.dropout_rate
+ if self.rate == nil then
+ nerv.warning("[DropoutLayer:propagate] dropout rate is not set")
+ end
+ self.dim_in = layer_conf.dim_in
+ self.dim_out = layer_conf.dim_out
+ self:check_dim_len(1, 1) -- two inputs: nn output and label
+end
+
+function DropoutLayer:init(batch_size, chunk_size)
+ if self.dim_in[1] ~= self.dim_out[1] then
+ nerv.error("mismatching dimensions of input and output")
+ end
+ if chunk_size == nil then
+ chunk_size = 1
+ end
+ self.mask = {}
+ for t = 1, chunk_size do
+ self.mask[t] = self.mat_type(batch_size, self.dim_in[1])
+ end
+end
+
+function DropoutLayer:batch_resize(batch_size, chunk_size)
+ if chunk_size == nil then
+ chunk_size = 1
+ end
+ for t = 1, chunk_size do
+ if self.mask[t] == nil or self.mask[t]:nrow() ~= batch_size then
+ self.mask[t] = self.mat_type(batch_size, self.dim_in[1])
+ end
+ end
+end
+
+function DropoutLayer:propagate(input, output, t)
+ if t == nil then
+ t = 1
+ end
+ if self.rate then
+ self.mask[t]:rand_uniform()
+ -- since we will lose a portion of the actvations, we multiply the
+ -- activations by 1 / (1 - rate) to compensate
+ self.mask[t]:thres_mask(self.mask[t], self.rate,
+ 0, 1 / (1.0 - self.rate))
+ output[1]:mul_elem(input[1], self.mask[t])
+ else
+ output[1]:copy_fromd(input[1])
+ end
+end
+
+function DropoutLayer:update(bp_err, input, output, t)
+ -- no params, therefore do nothing
+end
+
+function DropoutLayer:back_propagate(bp_err, next_bp_err, input, output, t)
+ if t == nil then
+ t = 1
+ end
+ if self.rate then
+ next_bp_err[1]:mul_elem(bp_err[1], self.mask[t])
+ else
+ next_bp_err[1]:copy_fromd(bp_err[1])
+ end
+end
+
+function DropoutLayer:get_params()
+ return nerv.ParamRepo({})
+end
diff --git a/nerv/layer/elem_mul.lua b/nerv/layer/elem_mul.lua
index c809d3e..fe80a3f 100644
--- a/nerv/layer/elem_mul.lua
+++ b/nerv/layer/elem_mul.lua
@@ -5,19 +5,19 @@ function ElemMulLayer:__init(id, global_conf, layer_conf)
self.dim_in = layer_conf.dim_in
self.dim_out = layer_conf.dim_out
self.gconf = global_conf
-
- self:check_dim_len(2, 1) -- Element-multiply input[1] and input[2]
+ -- element-wise multiplication of input[1] and input[2]
+ self:check_dim_len(2, 1)
end
function ElemMulLayer:init(batch_size)
- if self.dim_in[1] ~= self.dim_in[2] or
+ if self.dim_in[1] ~= self.dim_in[2] or
self.dim_in[1] ~= self.dim_out[1] then
- nerv.error("dim_in and dim_out mismatch for ElemMulLayer")
+ nerv.error("mismatching dimensions of input and output")
end
end
function ElemMulLayer:batch_resize(batch_size)
- --do nothing
+ -- do nothing
end
function ElemMulLayer:propagate(input, output)
@@ -25,12 +25,12 @@ function ElemMulLayer:propagate(input, output)
end
function ElemMulLayer:back_propagate(bp_err, next_bp_err, input, output)
- next_bp_err[1]:mul_elem(bp_err[1], input[2])
+ next_bp_err[1]:mul_elem(bp_err[1], input[2])
next_bp_err[2]:mul_elem(bp_err[1], input[1])
end
function ElemMulLayer:update(bp_err, input, output)
- --do nothing
+ -- do nothing
end
function ElemMulLayer:get_params()
diff --git a/nerv/layer/gru.lua b/nerv/layer/gru.lua
new file mode 100644
index 0000000..2162e28
--- /dev/null
+++ b/nerv/layer/gru.lua
@@ -0,0 +1,128 @@
+local GRULayer = nerv.class('nerv.GRULayer', 'nerv.Layer')
+
+function GRULayer:__init(id, global_conf, layer_conf)
+ -- input1:x
+ -- input2:h
+ -- input3:c (h^~)
+ self.id = id
+ self.dim_in = layer_conf.dim_in
+ self.dim_out = layer_conf.dim_out
+ self.gconf = global_conf
+
+ if self.dim_in[2] ~= self.dim_out[1] then
+ nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)",
+ self.dim_in[2], self.dim_out[1])
+ end
+
+ -- prepare a DAGLayer to hold the lstm structure
+ local pr = layer_conf.pr
+ if pr == nil then
+ pr = nerv.ParamRepo()
+ end
+
+ local function ap(str)
+ return self.id .. '.' .. str
+ end
+ local din1, din2 = self.dim_in[1], self.dim_in[2]
+ local dout1 = self.dim_out[1]
+ local layers = {
+ ["nerv.CombinerLayer"] = {
+ [ap("inputXDup")] = {{}, {dim_in = {din1},
+ dim_out = {din1, din1, din1},
+ lambda = {1}}},
+ [ap("inputHDup")] = {{}, {dim_in = {din2},
+ dim_out = {din2, din2, din2, din2, din2},
+ lambda = {1}}},
+ [ap("updateGDup")] = {{}, {dim_in = {din2},
+ dim_out = {din2, din2},
+ lambda = {1}}},
+ [ap("updateMergeL")] = {{}, {dim_in = {din2, din2, din2},
+ dim_out = {dout1},
+ lambda = {1, -1, 1}}},
+ },
+ ["nerv.AffineLayer"] = {
+ [ap("mainAffineL")] = {{}, {dim_in = {din1, din2},
+ dim_out = {dout1},
+ pr = pr}},
+ },
+ ["nerv.TanhLayer"] = {
+ [ap("mainTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}},
+ },
+ ["nerv.GateFLayer"] = {
+ [ap("resetGateL")] = {{}, {dim_in = {din1, din2},
+ dim_out = {din2},
+ pr = pr}},
+ [ap("updateGateL")] = {{}, {dim_in = {din1, din2},
+ dim_out = {din2},
+ pr = pr}},
+ },
+ ["nerv.ElemMulLayer"] = {
+ [ap("resetGMulL")] = {{}, {dim_in = {din2, din2}, dim_out = {din2}}},
+ [ap("updateGMulCL")] = {{}, {dim_in = {din2, din2}, dim_out = {din2}}},
+ [ap("updateGMulHL")] = {{}, {dim_in = {din2, din2}, dim_out = {din2}}},
+ },
+ }
+
+ local layerRepo = nerv.LayerRepo(layers, pr, global_conf)
+
+ local connections = {
+ ["<input>[1]"] = ap("inputXDup[1]"),
+ ["<input>[2]"] = ap("inputHDup[1]"),
+
+ [ap("inputXDup[1]")] = ap("resetGateL[1]"),
+ [ap("inputHDup[1]")] = ap("resetGateL[2]"),
+ [ap("inputXDup[2]")] = ap("updateGateL[1]"),
+ [ap("inputHDup[2]")] = ap("updateGateL[2]"),
+ [ap("updateGateL[1]")] = ap("updateGDup[1]"),
+
+ [ap("resetGateL[1]")] = ap("resetGMulL[1]"),
+ [ap("inputHDup[3]")] = ap("resetGMulL[2]"),
+
+ [ap("inputXDup[3]")] = ap("mainAffineL[1]"),
+ [ap("resetGMulL[1]")] = ap("mainAffineL[2]"),
+ [ap("mainAffineL[1]")] = ap("mainTanhL[1]"),
+
+ [ap("updateGDup[1]")] = ap("updateGMulHL[1]"),
+ [ap("inputHDup[4]")] = ap("updateGMulHL[2]"),
+ [ap("updateGDup[2]")] = ap("updateGMulCL[1]"),
+ [ap("mainTanhL[1]")] = ap("updateGMulCL[2]"),
+
+ [ap("inputHDup[5]")] = ap("updateMergeL[1]"),
+ [ap("updateGMulHL[1]")] = ap("updateMergeL[2]"),
+ [ap("updateGMulCL[1]")] = ap("updateMergeL[3]"),
+
+ [ap("updateMergeL[1]")] = "<output>[1]",
+ }
+
+ self.dag = nerv.DAGLayer(self.id, global_conf,
+ {dim_in = self.dim_in,
+ dim_out = self.dim_out,
+ sub_layers = layerRepo,
+ connections = connections})
+
+ self:check_dim_len(2, 1) -- x, h and h
+end
+
+function GRULayer:init(batch_size, chunk_size)
+ self.dag:init(batch_size, chunk_size)
+end
+
+function GRULayer:batch_resize(batch_size, chunk_size)
+ self.dag:batch_resize(batch_size, chunk_size)
+end
+
+function GRULayer:update(bp_err, input, output, t)
+ self.dag:update(bp_err, input, output, t)
+end
+
+function GRULayer:propagate(input, output, t)
+ self.dag:propagate(input, output, t)
+end
+
+function GRULayer:back_propagate(bp_err, next_bp_err, input, output, t)
+ self.dag:back_propagate(bp_err, next_bp_err, input, output, t)
+end
+
+function GRULayer:get_params()
+ return self.dag:get_params()
+end
diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua
index 43c2250..6b7a1d7 100644
--- a/nerv/layer/init.lua
+++ b/nerv/layer/init.lua
@@ -90,7 +90,7 @@ function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim)
end
end
nerv.info("param [%s] of layer [%s] is not found in `layer_conf` or `layer_conf.pr`, " ..
- "switch to auto-generate.", pid_list_str, self.id)
+ "switch to auto-generate", pid_list_str, self.id)
local pid_g = self.id .. '_' .. pid_list[1]
p = p_type(pid_g, gconf)
p.trans = gconf.cumat_type(unpack(p_dim))
@@ -113,3 +113,7 @@ nerv.include('affine_recurrent.lua')
nerv.include('softmax.lua')
nerv.include('elem_mul.lua')
nerv.include('gate_fff.lua')
+nerv.include('lstm.lua')
+nerv.include('lstm_gate.lua')
+nerv.include('dropout.lua')
+nerv.include('gru.lua')
diff --git a/nerv/layer/lstm.lua b/nerv/layer/lstm.lua
new file mode 100644
index 0000000..500bd87
--- /dev/null
+++ b/nerv/layer/lstm.lua
@@ -0,0 +1,140 @@
+local LSTMLayer = nerv.class('nerv.LSTMLayer', 'nerv.Layer')
+
+function LSTMLayer:__init(id, global_conf, layer_conf)
+ -- input1:x
+ -- input2:h
+ -- input3:c
+ self.id = id
+ self.dim_in = layer_conf.dim_in
+ self.dim_out = layer_conf.dim_out
+ self.gconf = global_conf
+
+ -- prepare a DAGLayer to hold the lstm structure
+ local pr = layer_conf.pr
+ if pr == nil then
+ pr = nerv.ParamRepo()
+ end
+
+ local function ap(str)
+ return self.id .. '.' .. str
+ end
+ local din1, din2, din3 = self.dim_in[1], self.dim_in[2], self.dim_in[3]
+ local dout1, dout2, dout3 = self.dim_out[1], self.dim_out[2], self.dim_out[3]
+ local layers = {
+ ["nerv.CombinerLayer"] = {
+ [ap("inputXDup")] = {{}, {dim_in = {din1},
+ dim_out = {din1, din1, din1, din1},
+ lambda = {1}}},
+
+ [ap("inputHDup")] = {{}, {dim_in = {din2},
+ dim_out = {din2, din2, din2, din2},
+ lambda = {1}}},
+
+ [ap("inputCDup")] = {{}, {dim_in = {din3},
+ dim_out = {din3, din3, din3},
+ lambda = {1}}},
+
+ [ap("mainCDup")] = {{}, {dim_in = {din3, din3},
+ dim_out = {din3, din3, din3},
+ lambda = {1, 1}}},
+ },
+ ["nerv.AffineLayer"] = {
+ [ap("mainAffineL")] = {{}, {dim_in = {din1, din2},
+ dim_out = {dout1},
+ pr = pr}},
+ },
+ ["nerv.TanhLayer"] = {
+ [ap("mainTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}},
+ [ap("outputTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}},
+ },
+ ["nerv.LSTMGateLayer"] = {
+ [ap("forgetGateL")] = {{}, {dim_in = {din1, din2, din3},
+ dim_out = {din3}, pr = pr}},
+ [ap("inputGateL")] = {{}, {dim_in = {din1, din2, din3},
+ dim_out = {din3}, pr = pr}},
+ [ap("outputGateL")] = {{}, {dim_in = {din1, din2, din3},
+ dim_out = {din3}, pr = pr}},
+
+ },
+ ["nerv.ElemMulLayer"] = {
+ [ap("inputGMulL")] = {{}, {dim_in = {din3, din3},
+ dim_out = {din3}}},
+ [ap("forgetGMulL")] = {{}, {dim_in = {din3, din3},
+ dim_out = {din3}}},
+ [ap("outputGMulL")] = {{}, {dim_in = {din3, din3},
+ dim_out = {din3}}},
+ },
+ }
+
+ local layerRepo = nerv.LayerRepo(layers, pr, global_conf)
+
+ local connections = {
+ ["<input>[1]"] = ap("inputXDup[1]"),
+ ["<input>[2]"] = ap("inputHDup[1]"),
+ ["<input>[3]"] = ap("inputCDup[1]"),
+
+ [ap("inputXDup[1]")] = ap("mainAffineL[1]"),
+ [ap("inputHDup[1]")] = ap("mainAffineL[2]"),
+ [ap("mainAffineL[1]")] = ap("mainTanhL[1]"),
+
+ [ap("inputXDup[2]")] = ap("inputGateL[1]"),
+ [ap("inputHDup[2]")] = ap("inputGateL[2]"),
+ [ap("inputCDup[1]")] = ap("inputGateL[3]"),
+
+ [ap("inputXDup[3]")] = ap("forgetGateL[1]"),
+ [ap("inputHDup[3]")] = ap("forgetGateL[2]"),
+ [ap("inputCDup[2]")] = ap("forgetGateL[3]"),
+
+ [ap("mainTanhL[1]")] = ap("inputGMulL[1]"),
+ [ap("inputGateL[1]")] = ap("inputGMulL[2]"),
+
+ [ap("inputCDup[3]")] = ap("forgetGMulL[1]"),
+ [ap("forgetGateL[1]")] = ap("forgetGMulL[2]"),
+
+ [ap("inputGMulL[1]")] = ap("mainCDup[1]"),
+ [ap("forgetGMulL[1]")] = ap("mainCDup[2]"),
+
+ [ap("inputXDup[4]")] = ap("outputGateL[1]"),
+ [ap("inputHDup[4]")] = ap("outputGateL[2]"),
+ [ap("mainCDup[3]")] = ap("outputGateL[3]"),
+
+ [ap("mainCDup[2]")] = "<output>[2]",
+ [ap("mainCDup[1]")] = ap("outputTanhL[1]"),
+
+ [ap("outputTanhL[1]")] = ap("outputGMulL[1]"),
+ [ap("outputGateL[1]")] = ap("outputGMulL[2]"),
+
+ [ap("outputGMulL[1]")] = "<output>[1]",
+ }
+ self.dag = nerv.DAGLayer(self.id, global_conf,
+ {dim_in = self.dim_in,
+ dim_out = self.dim_out,
+ sub_layers = layerRepo,
+ connections = connections})
+
+ self:check_dim_len(3, 2) -- x, h, c and h, c
+end
+
+function LSTMLayer:init(batch_size, chunk_size)
+ self.dag:init(batch_size, chunk_size)
+end
+
+function LSTMLayer:batch_resize(batch_size, chunk_size)
+ self.dag:batch_resize(batch_size, chunk_size)
+end
+
+function LSTMLayer:update(bp_err, input, output, t)
+ self.dag:update(bp_err, input, output, t)
+end
+
+function LSTMLayer:propagate(input, output, t)
+ self.dag:propagate(input, output, t)
+end
+
+function LSTMLayer:back_propagate(bp_err, next_bp_err, input, output, t)
+ self.dag:back_propagate(bp_err, next_bp_err, input, output, t)
+end
+
+function LSTMLayer:get_params()
+ return self.dag:get_params()
+end
diff --git a/nerv/layer/lstm_gate.lua b/nerv/layer/lstm_gate.lua
new file mode 100644
index 0000000..1963eba
--- /dev/null
+++ b/nerv/layer/lstm_gate.lua
@@ -0,0 +1,77 @@
+local LSTMGateLayer = nerv.class('nerv.LSTMGateLayer', 'nerv.Layer')
+-- NOTE: this is a full matrix gate
+
+function LSTMGateLayer:__init(id, global_conf, layer_conf)
+ self.id = id
+ self.dim_in = layer_conf.dim_in
+ self.dim_out = layer_conf.dim_out
+ self.gconf = global_conf
+
+ for i = 1, #self.dim_in do
+ self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf,
+ nerv.LinearTransParam,
+ {self.dim_in[i], self.dim_out[1]})
+ end
+ self.bp = self:find_param("bp", layer_conf, global_conf,
+ nerv.BiasParam, {1, self.dim_out[1]})
+
+ self:check_dim_len(-1, 1) --accept multiple inputs
+end
+
+function LSTMGateLayer:init(batch_size)
+ for i = 1, #self.dim_in do
+ if self["ltp" .. i].trans:ncol() ~= self.bp.trans:ncol() then
+ nerv.error("mismatching dimensions of linear transform and bias paramter")
+ end
+ if self.dim_in[i] ~= self["ltp" .. i].trans:nrow() then
+ nerv.error("mismatching dimensions of linear transform parameter and input")
+ end
+ self["ltp"..i]:train_init()
+ end
+
+ if self.dim_out[1] ~= self.ltp1.trans:ncol() then
+ nerv.error("mismatching dimensions of linear transform parameter and output")
+ end
+ self.bp:train_init()
+ self.err_bakm = self.gconf.cumat_type(batch_size, self.dim_out[1])
+end
+
+function LSTMGateLayer:batch_resize(batch_size)
+ if self.err_m:nrow() ~= batch_size then
+ self.err_bakm = self.gconf.cumat_type(batch_size, self.dim_out[1])
+ end
+end
+
+function LSTMGateLayer:propagate(input, output)
+ -- apply linear transform
+ output[1]:mul(input[1], self.ltp1.trans, 1.0, 0.0, 'N', 'N')
+ for i = 2, #self.dim_in do
+ output[1]:mul(input[i], self["ltp" .. i].trans, 1.0, 1.0, 'N', 'N')
+ end
+ -- add bias
+ output[1]:add_row(self.bp.trans, 1.0)
+ output[1]:sigmoid(output[1])
+end
+
+function LSTMGateLayer:back_propagate(bp_err, next_bp_err, input, output)
+ self.err_bakm:sigmoid_grad(bp_err[1], output[1])
+ for i = 1, #self.dim_in do
+ next_bp_err[i]:mul(self.err_bakm, self["ltp" .. i].trans, 1.0, 0.0, 'N', 'T')
+ end
+end
+
+function LSTMGateLayer:update(bp_err, input, output)
+ self.err_bakm:sigmoid_grad(bp_err[1], output[1])
+ for i = 1, #self.dim_in do
+ self["ltp" .. i]:update_by_err_input(self.err_bakm, input[i])
+ end
+ self.bp:update_by_gradient(self.err_bakm:colsum())
+end
+
+function LSTMGateLayer:get_params()
+ local pr = nerv.ParamRepo({self.bp})
+ for i = 1, #self.dim_in do
+ pr:add(self["ltp" .. i].id, self["ltp" .. i])
+ end
+ return pr
+end