aboutsummaryrefslogtreecommitdiff
path: root/nerv/layer
diff options
context:
space:
mode:
Diffstat (limited to 'nerv/layer')
-rw-r--r--nerv/layer/affine.lua43
-rw-r--r--nerv/layer/bias.lua15
-rw-r--r--nerv/layer/combiner.lua16
-rw-r--r--nerv/layer/dropout.lua27
-rw-r--r--nerv/layer/duplicate.lua41
-rw-r--r--nerv/layer/elem_mul.lua11
-rw-r--r--nerv/layer/graph.lua156
-rw-r--r--nerv/layer/gru.lua20
-rw-r--r--nerv/layer/identity.lua30
-rw-r--r--nerv/layer/init.lua72
-rw-r--r--nerv/layer/lstm.lua192
-rw-r--r--nerv/layer/lstm_gate.lua17
-rw-r--r--nerv/layer/mse.lua16
-rw-r--r--nerv/layer/rnn.lua42
-rw-r--r--nerv/layer/sigmoid.lua17
-rw-r--r--nerv/layer/softmax.lua11
-rw-r--r--nerv/layer/softmax_ce.lua16
-rw-r--r--nerv/layer/tanh.lua11
-rw-r--r--nerv/layer/window.lua15
19 files changed, 504 insertions, 264 deletions
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua
index 4156dde..38743aa 100644
--- a/nerv/layer/affine.lua
+++ b/nerv/layer/affine.lua
@@ -8,21 +8,19 @@ local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer')
--- A parameter that consists of a single matrix
-- @type nerv.MatrixParam
+function MatrixParam:check(checker)
+ -- check trans matrix type
+ checker(self.trans)
+end
+
--- Read from a file handle.
-- @param handle the file handle
function MatrixParam:read(handle)
self.trans = self.gconf.mmat_type.load(handle)
- if not self.gconf.use_cpu then
- self.trans = self.gconf.cumat_type.new_from_host(self.trans)
- end
end
function MatrixParam:write(handle)
- local trans = self.trans
- if not self.gconf.use_cpu then
- trans = self.trans:new_to_host()
- end
- trans:save(handle)
+ self.trans:save(handle)
end
function MatrixParam:train_init()
@@ -30,6 +28,12 @@ function MatrixParam:train_init()
self.correction:fill(0)
end
+function MatrixParam:copy(copier)
+ local target = nerv.MatrixParam(self.id, self.gconf)
+ target.trans = copier(self.trans)
+ return target
+end
+
function MatrixParam:_update_by_gradient(gradient, alpha, beta)
local gconf = self.gconf
-- momentum gain
@@ -77,25 +81,24 @@ end
--- The constructor.
function AffineLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- if layer_conf.ltp ~= nil and layer_conf.ltp1 == nil then
- layer_conf.ltp1 = layer_conf.ltp
- end
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
+ self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs
+ self:bind_params()
+end
+
+function AffineLayer:bind_params()
for i = 1, #self.dim_in do
local pid = "ltp" .. i
local pid_list = i == 1 and {pid, "ltp"} or pid
- self["ltp" .. i] = self:find_param(pid_list, layer_conf, global_conf,
+ self["ltp" .. i] = self:find_param(pid_list, self.lconf, self.gconf,
nerv.LinearTransParam,
- {self.dim_in[i], self.dim_out[1]})
+ {self.dim_in[i], self.dim_out[1]})
end
self.ltp = self.ltp1 -- alias of ltp1
- self.bp = self:find_param("bp", layer_conf, global_conf,
+ self.bp = self:find_param("bp", self.lconf, self.gconf,
nerv.BiasParam,
{1, self.dim_out[1]})
- self.gconf = global_conf
- self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs
+
end
function AffineLayer:init(batch_size)
@@ -142,7 +145,7 @@ function AffineLayer:back_propagate(bp_err, next_bp_err, input, output)
end
function AffineLayer:get_params()
- local pr = nerv.ParamRepo({self.ltp1, self.bp})
+ local pr = nerv.ParamRepo({self.ltp1, self.bp}, self.loc_type)
for i = 2, #self.dim_in do
pr:add(self["ltp" .. i].id, self["ltp" .. i])
end
diff --git a/nerv/layer/bias.lua b/nerv/layer/bias.lua
index 924c3da..191be78 100644
--- a/nerv/layer/bias.lua
+++ b/nerv/layer/bias.lua
@@ -1,12 +1,15 @@
local BiasLayer = nerv.class("nerv.BiasLayer", "nerv.Layer")
function BiasLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.gconf = global_conf
- self.bias = layer_conf.bias
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self:check_dim_len(1, 1)
+ self:bind_params()
+end
+
+function BiasLayer:bind_params()
+ self.bias = self:find_param("bias", self.lconf, self.gconf,
+ nerv.BiasParam,
+ {1, self.dim_out[1]})
end
function BiasLayer:init()
@@ -28,5 +31,5 @@ function BiasLayer:propagate(input, output)
end
function BiasLayer:get_params()
- return nerv.ParamRepo({self.bias})
+ return nerv.ParamRepo({self.bias}, self.loc_type)
end
diff --git a/nerv/layer/combiner.lua b/nerv/layer/combiner.lua
index 22e89a9..028c970 100644
--- a/nerv/layer/combiner.lua
+++ b/nerv/layer/combiner.lua
@@ -1,16 +1,8 @@
local CombinerLayer = nerv.class('nerv.CombinerLayer', 'nerv.Layer')
function CombinerLayer:__init(id, global_conf, layer_conf)
- self.id = id
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self.lambda = layer_conf.lambda
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- self.gconf = global_conf
- if self.gconf.use_cpu then
- self.mat_type = self.gconf.mmat_type
- else
- self.mat_type = self.gconf.cumat_type
- end
self:check_dim_len(#self.lambda, -1)
if #self.dim_in < 1 then
nerv.error("no input specified")
@@ -20,6 +12,10 @@ function CombinerLayer:__init(id, global_conf, layer_conf)
end
end
+function CombinerLayer:bind_params()
+ -- do nothing
+end
+
function CombinerLayer:init(batch_size)
local dim = self.dim_in[1]
for i = 2, #self.dim_in do
@@ -66,5 +62,5 @@ function CombinerLayer:back_propagate(bp_err, next_bp_err, input, output)
end
function CombinerLayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/dropout.lua b/nerv/layer/dropout.lua
index 42660cc..39a8963 100644
--- a/nerv/layer/dropout.lua
+++ b/nerv/layer/dropout.lua
@@ -1,22 +1,17 @@
local DropoutLayer = nerv.class("nerv.DropoutLayer", "nerv.Layer")
function DropoutLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.gconf = global_conf
- if self.gconf.use_cpu then
- self.mat_type = self.gconf.mmat_type
- else
- self.mat_type = self.gconf.cumat_type
- end
- self.rate = layer_conf.dropout_rate or global_conf.dropout_rate
- if self.rate == nil then
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
+ if self.gconf.dropout_rate == nil then
nerv.warning("[DropoutLayer:propagate] dropout rate is not set")
end
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
self:check_dim_len(1, 1) -- two inputs: nn output and label
end
+function DropoutLayer:bind_params()
+ -- do nothing
+end
+
function DropoutLayer:init(batch_size, chunk_size)
if self.dim_in[1] ~= self.dim_out[1] then
nerv.error("mismatching dimensions of input and output")
@@ -45,12 +40,12 @@ function DropoutLayer:propagate(input, output, t)
if t == nil then
t = 1
end
- if self.rate then
+ if self.gconf.dropout_rate ~= 0 then
self.mask[t]:rand_uniform()
-- since we will lose a portion of the actvations, we multiply the
-- activations by 1 / (1 - rate) to compensate
- self.mask[t]:thres_mask(self.mask[t], self.rate,
- 0, 1 / (1.0 - self.rate))
+ self.mask[t]:thres_mask(self.mask[t], self.gconf.dropout_rate,
+ 0, 1 / (1.0 - self.gconf.dropout_rate))
output[1]:mul_elem(input[1], self.mask[t])
else
output[1]:copy_fromd(input[1])
@@ -65,7 +60,7 @@ function DropoutLayer:back_propagate(bp_err, next_bp_err, input, output, t)
if t == nil then
t = 1
end
- if self.rate then
+ if self.gconf.dropout_rate then
next_bp_err[1]:mul_elem(bp_err[1], self.mask[t])
else
next_bp_err[1]:copy_fromd(bp_err[1])
@@ -73,5 +68,5 @@ function DropoutLayer:back_propagate(bp_err, next_bp_err, input, output, t)
end
function DropoutLayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua
new file mode 100644
index 0000000..137472b
--- /dev/null
+++ b/nerv/layer/duplicate.lua
@@ -0,0 +1,41 @@
+local DuplicateLayer = nerv.class('nerv.DuplicateLayer', 'nerv.Layer')
+
+function DuplicateLayer:__init(id, global_conf, layer_conf)
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
+ self:check_dim_len(1, -1)
+ if #self.dim_out < 1 then
+ nerv.error('no output specified')
+ end
+ for i = 1, #self.dim_out do
+ if self.dim_out[i] ~= self.dim_in[1] then
+ nerv.error('mismatching dimensions of outputs')
+ end
+ end
+end
+
+function DuplicateLayer:init()
+end
+
+function DuplicateLayer:batch_resize()
+end
+
+function DuplicateLayer:propagate(input, output)
+ for i = 1, #self.dim_out do
+ output[i]:copy_from(input[1])
+ -- FIXME: use reference copy to speed up
+ end
+end
+
+function DuplicateLayer:back_propagate(bp_err, next_bp_err)
+ next_bp_err[1]:copy_from(bp_err[1])
+ for i = 2, #self.dim_out do
+ next_bp_err[1]:add(next_bp_err[1], bp_err[i], 1.0, 1.0)
+ end
+end
+
+function DuplicateLayer:update()
+end
+
+function DuplicateLayer:get_params()
+ return nerv.ParamRepo({}, self.loc_type)
+end
diff --git a/nerv/layer/elem_mul.lua b/nerv/layer/elem_mul.lua
index fe80a3f..f03649b 100644
--- a/nerv/layer/elem_mul.lua
+++ b/nerv/layer/elem_mul.lua
@@ -1,14 +1,15 @@
local ElemMulLayer = nerv.class('nerv.ElemMulLayer', 'nerv.Layer')
function ElemMulLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- self.gconf = global_conf
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
-- element-wise multiplication of input[1] and input[2]
self:check_dim_len(2, 1)
end
+function ElemMulLayer:bind_params()
+ -- do nothing
+end
+
function ElemMulLayer:init(batch_size)
if self.dim_in[1] ~= self.dim_in[2] or
self.dim_in[1] ~= self.dim_out[1] then
@@ -34,5 +35,5 @@ function ElemMulLayer:update(bp_err, input, output)
end
function ElemMulLayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua
new file mode 100644
index 0000000..68d5f51
--- /dev/null
+++ b/nerv/layer/graph.lua
@@ -0,0 +1,156 @@
+local GraphLayer = nerv.class('nerv.GraphLayer', 'nerv.Layer')
+
+function GraphLayer:__init(id, global_conf, layer_conf)
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
+ self:graph_init(layer_conf.layer_repo, layer_conf.connections)
+end
+
+local function parse_id(str)
+ local id, port, _
+ _, _, id, port = string.find(str, "([a-zA-Z0-9_.]+)%[([0-9]+)%]")
+ if id == nil or port == nil then
+ _, _, id, port = string.find(str, "(.+)%[([0-9]+)%]")
+ if not (id == "<input>" or id == "<output>") then
+ nerv.error("wrong format of connection id")
+ end
+ end
+ port = tonumber(port)
+ return id, port
+end
+
+function GraphLayer:add_prefix(layers, connections)
+ local function ap(name)
+ return self.id .. '.' .. name
+ end
+
+ for layer_type, sublayers in pairs(layers) do
+ local tmp = {}
+ for name, layer_config in pairs(sublayers) do
+ tmp[ap(name)] = layer_config
+ end
+ layers[layer_type] = tmp
+ end
+
+ for i = 1, #connections do
+ local from, to = connections[i][1], connections[i][2]
+ if parse_id(from) ~= '<input>' then
+ connections[i][1] = ap(from)
+ end
+ if parse_id(to) ~= '<output>' then
+ connections[i][2] = ap(to)
+ end
+ end
+end
+
+function GraphLayer:discover(id, layer_repo)
+ if id == '<output>' then
+ id = '<input>'
+ end
+ local layers = self.layers
+ local ref = layers[id]
+ if ref == nil then
+ local layer = layer_repo:get_layer(id)
+ local dim_in, dim_out = layer:get_dim()
+ self.layer_num = self.layer_num + 1
+ ref = {
+ layer = layer,
+ inputs = {},
+ outputs = {},
+ dim_in = dim_in,
+ dim_out = dim_out,
+ id = self.layer_num,
+ }
+ layers[id] = ref
+ end
+ return ref
+end
+
+function GraphLayer:graph_init(layer_repo, connections)
+ local layers = {}
+ layers['<input>'] = {
+ inputs = {},
+ outputs = {},
+ dim_in = self.dim_out,
+ dim_out = self.dim_in,
+ id = 0,
+ }
+ self.layers = layers
+ self.layer_num = 0
+ self.connections = {}
+
+ -- check data dimension between connected ports
+ for _, edge in pairs(connections) do
+ local from, to, time = edge[1], edge[2], edge[3]
+ local id_from, port_from = parse_id(from)
+ local id_to, port_to = parse_id(to)
+ local ref_from = self:discover(id_from, layer_repo)
+ local ref_to = self:discover(id_to, layer_repo)
+ if ref_from.outputs[port_from] ~= nil then
+ nerv.error('%s has already been attached', from)
+ end
+ if ref_to.inputs[port_to] ~= nil then
+ nerv.error('%s has already been attached', to)
+ end
+ if ref_from.dim_out[port_from] ~= ref_to.dim_in[port_to] then
+ nerv.error('mismatching data dimension between %s and %s', from, to)
+ end
+ if ref_from.id == 0 and ref_to.id == 0 then
+ nerv.error('short-circuit connection between <input> and <output>')
+ end
+ ref_from.outputs[port_from] = true
+ ref_to.inputs[port_to] = true
+ table.insert(self.connections, {ref_from.id, port_from, ref_to.id, port_to, time})
+ end
+
+ -- check dangling ports
+ for id, ref in pairs(layers) do
+ if id ~= '<input>' then
+ for i = 1, #ref.dim_in do
+ if ref.inputs[i] == nil then
+ nerv.error('dangling input port %d of layer %s', i, id)
+ end
+ end
+ for i = 1, #ref.dim_out do
+ if ref.outputs[i] == nil then
+ nerv.error('dangling output port %d of layer %s', i, id)
+ end
+ end
+ end
+ end
+ for i = 1, #self.dim_in do
+ if layers['<input>'].outputs[i] == nil then
+ nerv.error('dangling port %d of layer <input>', i)
+ end
+ end
+ for i = 1, #self.dim_out do
+ if layers['<input>'].inputs[i] == nil then
+ nerv.error('dangling port %d of layer <output>', i)
+ end
+ end
+end
+
+function GraphLayer:set_attr(name, value)
+ self[name] = value
+ for id, ref in pairs(self.layers) do
+ if id ~= '<input>' then
+ ref.layer:set_attr(name, value)
+ end
+ end
+end
+
+function GraphLayer:get_sublayer(id)
+ if self.layers[id] == nil or id == '<input>' then
+ nerv.error('layer with id %s not found', id)
+ end
+ return self.layers[id].layer
+end
+
+function GraphLayer:get_params()
+ local param_repos = {}
+ for id, ref in pairs(self.layers) do
+ if id ~= '<input>' then
+ table.insert(param_repos, ref.layer:get_params())
+ end
+ end
+ return nerv.ParamRepo.merge(param_repos, self.loc_type)
+end
diff --git a/nerv/layer/gru.lua b/nerv/layer/gru.lua
index e81d21a..71718d7 100644
--- a/nerv/layer/gru.lua
+++ b/nerv/layer/gru.lua
@@ -4,11 +4,7 @@ function GRULayer:__init(id, global_conf, layer_conf)
-- input1:x
-- input2:h
-- input3:c (h^~)
- self.id = id
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- self.gconf = global_conf
-
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
if self.dim_in[2] ~= self.dim_out[1] then
nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)",
self.dim_in[2], self.dim_out[1])
@@ -17,7 +13,7 @@ function GRULayer:__init(id, global_conf, layer_conf)
-- prepare a DAGLayer to hold the lstm structure
local pr = layer_conf.pr
if pr == nil then
- pr = nerv.ParamRepo()
+ pr = nerv.ParamRepo({}, self.loc_type)
end
local function ap(str)
@@ -63,7 +59,7 @@ function GRULayer:__init(id, global_conf, layer_conf)
},
}
- local layerRepo = nerv.LayerRepo(layers, pr, global_conf)
+ self.lrepo = nerv.LayerRepo(layers, pr, global_conf)
local connections = {
["<input>[1]"] = ap("inputXDup[1]"),
@@ -97,12 +93,20 @@ function GRULayer:__init(id, global_conf, layer_conf)
self.dag = nerv.DAGLayer(self.id, global_conf,
{dim_in = self.dim_in,
dim_out = self.dim_out,
- sub_layers = layerRepo,
+ sub_layers = self.lrepo,
connections = connections})
self:check_dim_len(2, 1) -- x, h and h
end
+function GRULayer:bind_params()
+ local pr = layer_conf.pr
+ if pr == nil then
+ pr = nerv.ParamRepo({}, self.loc_type)
+ end
+ self.lrepo:rebind(pr)
+end
+
function GRULayer:init(batch_size, chunk_size)
self.dag:init(batch_size, chunk_size)
end
diff --git a/nerv/layer/identity.lua b/nerv/layer/identity.lua
new file mode 100644
index 0000000..d56337d
--- /dev/null
+++ b/nerv/layer/identity.lua
@@ -0,0 +1,30 @@
+local IdentityLayer = nerv.class('nerv.IdentityLayer', 'nerv.Layer')
+
+function IdentityLayer:__init(id, global_conf, layer_conf)
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
+ self:check_dim_len(1, 1)
+ if self.dim_in[1] ~= self.dim_out[1] then
+ nerv.error('mismatching dimensions of input and output')
+ end
+end
+
+function IdentityLayer:init()
+end
+
+function IdentityLayer:batch_resize()
+end
+
+function IdentityLayer:propagate(input, output)
+ output[1]:copy_from(input[1])
+end
+
+function IdentityLayer:back_propagate(bp_err, next_bp_err)
+ next_bp_err[1]:copy_from(bp_err[1])
+end
+
+function IdentityLayer:update()
+end
+
+function IdentityLayer:get_params()
+ return nerv.ParamRepo({}, self.loc_type)
+end
diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua
index 54f33ae..475ef62 100644
--- a/nerv/layer/init.lua
+++ b/nerv/layer/init.lua
@@ -30,7 +30,18 @@ end
local Layer = nerv.class('nerv.Layer')
function Layer:__init(id, global_conf, layer_conf)
- nerv.error_method_not_implemented()
+ self.id = id
+ self.gconf = global_conf
+ self.lconf = layer_conf
+ if self.gconf.use_cpu then
+ self.mat_type = self.gconf.mmat_type
+ self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
+ else
+ self.mat_type = self.gconf.cumat_type
+ self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE
+ end
+ self.dim_in = layer_conf.dim_in
+ self.dim_out = layer_conf.dim_out
end
function Layer:init(batch_size)
@@ -66,34 +77,49 @@ function Layer:get_params()
nerv.error_method_not_implemented()
end
+function Layer:bind_params()
+ nerv.error_method_not_implemented()
+end
+
function Layer:get_dim()
return self.dim_in, self.dim_out
end
-function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim)
- if type(pid_list) == "string" then
- pid_list = {pid_list}
+function Layer:set_attr(name, value)
+ self[name] = value
+end
+
+function Layer:get_sublayer(id)
+ nerv.error('primitive layer does not have sublayers')
+end
+
+function Layer:find_param(plist, lconf, gconf, p_type, p_dim)
+ if type(plist) == "string" then
+ plist = {plist}
+ end
+ if lconf.params == nil then
+ lconf.params = {}
end
- pid_list_str = table.tostring(pid_list)
- for i, pid in ipairs(pid_list) do
- if lconf[pid] ~= nil then
- nerv.info("param [%s] of layer [%s] found in `layer_conf`.", pid, self.id)
- return lconf[pid]
+ plist_str = table.tostring(plist)
+ local pid
+ for i, pname in ipairs(plist) do
+ if lconf.params[pname] ~= nil then
+ nerv.info("param id for [%s] of layer [%s] specified in `layer_conf.params`.", pname, self.id)
+ pid = lconf.params[pname]
end
- local pid_g = self.id .. '_' .. pid --global identifier
- local pr = lconf.pr
- local p
- if pr ~= nil and pr:has_param(pid_g) == true then
- nerv.info("param [%s] of layer [%s] found in `layer_conf.pr`.", pid_list_str, self.id)
- p = pr:get_param(pid_g)
- return p
+ if lconf.pr:has_param(pid) then
+ return lconf.pr:get_param(pid)
end
end
- nerv.info("param [%s] of layer [%s] is not found in `layer_conf` or `layer_conf.pr`, " ..
- "switch to auto-generate", pid_list_str, self.id)
- local pid_g = self.id .. '_' .. pid_list[1]
- p = p_type(pid_g, gconf)
- p.trans = gconf.cumat_type(unpack(p_dim))
+ pid = self.id .. '_' .. plist[1]
+ if lconf.pr:has_param(pid) then
+ nerv.info("param id for [%s] of layer [%s] is generated automatically.", pname, self.id)
+ return lconf.pr:get_param(pid)
+ end
+ nerv.info("param id for [%s] of layer [%s] is not found in the specified param repo, " ..
+ "switch to auto-generate", plist_str, self.id)
+ local p = p_type(pid, gconf)
+ p.trans = self.mat_type(unpack(p_dim))
if type(gconf.param_random) ~= "function" then
nerv.error("a param generate function is needed")
end
@@ -101,6 +127,7 @@ function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim)
return p
end
+nerv.include('graph.lua')
nerv.include('affine.lua')
nerv.include('sigmoid.lua')
nerv.include('tanh.lua')
@@ -115,6 +142,9 @@ nerv.include('lstm.lua')
nerv.include('lstm_gate.lua')
nerv.include('dropout.lua')
nerv.include('gru.lua')
+nerv.include('rnn.lua')
+nerv.include('duplicate.lua')
+nerv.include('identity.lua')
-- The following lines are for backward compatibility, and will be removed in
-- the future. The use of these names are deprecated.
diff --git a/nerv/layer/lstm.lua b/nerv/layer/lstm.lua
index d8eee71..56f674a 100644
--- a/nerv/layer/lstm.lua
+++ b/nerv/layer/lstm.lua
@@ -1,143 +1,85 @@
-local LSTMLayer = nerv.class('nerv.LSTMLayer', 'nerv.Layer')
+local LSTMLayer = nerv.class('nerv.LSTMLayer', 'nerv.GraphLayer')
function LSTMLayer:__init(id, global_conf, layer_conf)
- -- input1:x
- -- input2:h
- -- input3:c
- self.id = id
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- self.gconf = global_conf
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
+ self:check_dim_len(1, 1)
+
+ local din = layer_conf.dim_in[1]
+ local dout = layer_conf.dim_out[1]
- -- prepare a DAGLayer to hold the lstm structure
local pr = layer_conf.pr
if pr == nil then
- pr = nerv.ParamRepo()
- end
-
- local function ap(str)
- return self.id .. '.' .. str
+ pr = nerv.ParamRepo({}, self.loc_type)
end
- local din1, din2, din3 = self.dim_in[1], self.dim_in[2], self.dim_in[3]
- local dout1, dout2 = self.dim_out[1], self.dim_out[2]
- local layers = {
- ["nerv.CombinerLayer"] = {
- [ap("inputXDup")] = {{}, {dim_in = {din1},
- dim_out = {din1, din1, din1, din1},
- lambda = {1}}},
- [ap("inputHDup")] = {{}, {dim_in = {din2},
- dim_out = {din2, din2, din2, din2},
- lambda = {1}}},
-
- [ap("inputCDup")] = {{}, {dim_in = {din3},
- dim_out = {din3, din3, din3},
- lambda = {1}}},
-
- [ap("mainCDup")] = {{}, {dim_in = {din3, din3},
- dim_out = {din3, din3, din3},
- lambda = {1, 1}}},
+ local layers = {
+ ['nerv.CombinerLayer'] = {
+ mainCombine = {dim_in = {dout, dout}, dim_out = {dout}, lambda = {1, 1}},
},
- ["nerv.AffineLayer"] = {
- [ap("mainAffineL")] = {{}, {dim_in = {din1, din2},
- dim_out = {dout1},
- pr = pr}},
+ ['nerv.DuplicateLayer'] = {
+ inputDup = {dim_in = {din}, dim_out = {din, din, din, din}},
+ outputDup = {dim_in = {dout}, dim_out = {dout, dout, dout, dout, dout}},
+ cellDup = {dim_in = {dout}, dim_out = {dout, dout, dout, dout, dout}},
},
- ["nerv.TanhLayer"] = {
- [ap("mainTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}},
- [ap("outputTanhL")] = {{}, {dim_in = {dout1}, dim_out = {dout1}}},
+ ['nerv.AffineLayer'] = {
+ mainAffine = {dim_in = {din, dout}, dim_out = {dout}, pr = pr},
},
- ["nerv.LSTMGateLayer"] = {
- [ap("forgetGateL")] = {{}, {dim_in = {din1, din2, din3},
- dim_out = {din3}, pr = pr,
- param_type = {'N', 'N', 'D'}}},
- [ap("inputGateL")] = {{}, {dim_in = {din1, din2, din3},
- dim_out = {din3}, pr = pr,
- param_type = {'N', 'N', 'D'}}},
- [ap("outputGateL")] = {{}, {dim_in = {din1, din2, din3},
- dim_out = {din3}, pr = pr,
- param_type = {'N', 'N', 'D'}}},
-
+ ['nerv.TanhLayer'] = {
+ mainTanh = {dim_in = {dout}, dim_out = {dout}},
+ outputTanh = {dim_in = {dout}, dim_out = {dout}},
},
- ["nerv.ElemMulLayer"] = {
- [ap("inputGMulL")] = {{}, {dim_in = {din3, din3},
- dim_out = {din3}}},
- [ap("forgetGMulL")] = {{}, {dim_in = {din3, din3},
- dim_out = {din3}}},
- [ap("outputGMulL")] = {{}, {dim_in = {din3, din3},
- dim_out = {din3}}},
+ ['nerv.LSTMGateLayer'] = {
+ forgetGate = {dim_in = {din, dout, dout}, dim_out = {dout}, param_type = {'N', 'N', 'D'}, pr = pr},
+ inputGate = {dim_in = {din, dout, dout}, dim_out = {dout}, param_type = {'N', 'N', 'D'}, pr = pr},
+ outputGate = {dim_in = {din, dout, dout}, dim_out = {dout}, param_type = {'N', 'N', 'D'}, pr = pr},
+ },
+ ['nerv.ElemMulLayer'] = {
+ inputGateMul = {dim_in = {dout, dout}, dim_out = {dout}},
+ forgetGateMul = {dim_in = {dout, dout}, dim_out = {dout}},
+ outputGateMul = {dim_in = {dout, dout}, dim_out = {dout}},
},
}
- local layerRepo = nerv.LayerRepo(layers, pr, global_conf)
-
local connections = {
- ["<input>[1]"] = ap("inputXDup[1]"),
- ["<input>[2]"] = ap("inputHDup[1]"),
- ["<input>[3]"] = ap("inputCDup[1]"),
-
- [ap("inputXDup[1]")] = ap("mainAffineL[1]"),
- [ap("inputHDup[1]")] = ap("mainAffineL[2]"),
- [ap("mainAffineL[1]")] = ap("mainTanhL[1]"),
-
- [ap("inputXDup[2]")] = ap("inputGateL[1]"),
- [ap("inputHDup[2]")] = ap("inputGateL[2]"),
- [ap("inputCDup[1]")] = ap("inputGateL[3]"),
-
- [ap("inputXDup[3]")] = ap("forgetGateL[1]"),
- [ap("inputHDup[3]")] = ap("forgetGateL[2]"),
- [ap("inputCDup[2]")] = ap("forgetGateL[3]"),
-
- [ap("mainTanhL[1]")] = ap("inputGMulL[1]"),
- [ap("inputGateL[1]")] = ap("inputGMulL[2]"),
-
- [ap("inputCDup[3]")] = ap("forgetGMulL[1]"),
- [ap("forgetGateL[1]")] = ap("forgetGMulL[2]"),
-
- [ap("inputGMulL[1]")] = ap("mainCDup[1]"),
- [ap("forgetGMulL[1]")] = ap("mainCDup[2]"),
-
- [ap("inputXDup[4]")] = ap("outputGateL[1]"),
- [ap("inputHDup[4]")] = ap("outputGateL[2]"),
- [ap("mainCDup[3]")] = ap("outputGateL[3]"),
-
- [ap("mainCDup[2]")] = "<output>[2]",
- [ap("mainCDup[1]")] = ap("outputTanhL[1]"),
-
- [ap("outputTanhL[1]")] = ap("outputGMulL[1]"),
- [ap("outputGateL[1]")] = ap("outputGMulL[2]"),
-
- [ap("outputGMulL[1]")] = "<output>[1]",
+ -- lstm input
+ {'<input>[1]', 'inputDup[1]', 0},
+
+ -- input gate
+ {'inputDup[1]', 'inputGate[1]', 0},
+ {'outputDup[1]', 'inputGate[2]', 1},
+ {'cellDup[1]', 'inputGate[3]', 1},
+
+ -- forget gate
+ {'inputDup[2]', 'forgetGate[1]', 0},
+ {'outputDup[2]', 'forgetGate[2]', 1},
+ {'cellDup[2]', 'forgetGate[3]', 1},
+
+ -- lstm cell
+ {'forgetGate[1]', 'forgetGateMul[1]', 0},
+ {'cellDup[3]', 'forgetGateMul[2]', 1},
+ {'inputDup[3]', 'mainAffine[1]', 0},
+ {'outputDup[3]', 'mainAffine[2]', 1},
+ {'mainAffine[1]', 'mainTanh[1]', 0},
+ {'inputGate[1]', 'inputGateMul[1]', 0},
+ {'mainTanh[1]', 'inputGateMul[2]', 0},
+ {'inputGateMul[1]', 'mainCombine[1]', 0},
+ {'forgetGateMul[1]', 'mainCombine[2]', 0},
+ {'mainCombine[1]', 'cellDup[1]', 0},
+
+ -- forget gate
+ {'inputDup[4]', 'outputGate[1]', 0},
+ {'outputDup[4]', 'outputGate[2]', 1},
+ {'cellDup[4]', 'outputGate[3]', 0},
+
+ -- lstm output
+ {'cellDup[5]', 'outputTanh[1]', 0},
+ {'outputGate[1]', 'outputGateMul[1]', 0},
+ {'outputTanh[1]', 'outputGateMul[2]', 0},
+ {'outputGateMul[1]', 'outputDup[1]', 0},
+ {'outputDup[5]', '<output>[1]', 0},
}
- self.dag = nerv.DAGLayer(self.id, global_conf,
- {dim_in = self.dim_in,
- dim_out = self.dim_out,
- sub_layers = layerRepo,
- connections = connections})
-
- self:check_dim_len(3, 2) -- x, h, c and h, c
-end
-
-function LSTMLayer:init(batch_size, chunk_size)
- self.dag:init(batch_size, chunk_size)
-end
-
-function LSTMLayer:batch_resize(batch_size, chunk_size)
- self.dag:batch_resize(batch_size, chunk_size)
-end
-
-function LSTMLayer:update(bp_err, input, output, t)
- self.dag:update(bp_err, input, output, t)
-end
-
-function LSTMLayer:propagate(input, output, t)
- self.dag:propagate(input, output, t)
-end
-
-function LSTMLayer:back_propagate(bp_err, next_bp_err, input, output, t)
- self.dag:back_propagate(bp_err, next_bp_err, input, output, t)
-end
-function LSTMLayer:get_params()
- return self.dag:get_params()
+ self:add_prefix(layers, connections)
+ local layer_repo = nerv.LayerRepo(layers, pr, global_conf)
+ self:graph_init(layer_repo, connections)
end
diff --git a/nerv/layer/lstm_gate.lua b/nerv/layer/lstm_gate.lua
index 8785b4f..e690721 100644
--- a/nerv/layer/lstm_gate.lua
+++ b/nerv/layer/lstm_gate.lua
@@ -2,24 +2,23 @@ local LSTMGateLayer = nerv.class('nerv.LSTMGateLayer', 'nerv.Layer')
-- NOTE: this is a full matrix gate
function LSTMGateLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self.param_type = layer_conf.param_type
- self.gconf = global_conf
+ self:check_dim_len(-1, 1) --accept multiple inputs
+ self:bind_params()
+end
+function LSTMGateLayer:bind_params()
for i = 1, #self.dim_in do
- self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf,
+ self["ltp" .. i] = self:find_param("ltp" .. i, self.lconf, self.gconf,
nerv.LinearTransParam,
{self.dim_in[i], self.dim_out[1]})
if self.param_type[i] == 'D' then
self["ltp" .. i].trans:diagonalize()
end
end
- self.bp = self:find_param("bp", layer_conf, global_conf,
+ self.bp = self:find_param("bp", self.lconf, self.gconf,
nerv.BiasParam, {1, self.dim_out[1]})
-
- self:check_dim_len(-1, 1) --accept multiple inputs
end
function LSTMGateLayer:init(batch_size)
@@ -76,7 +75,7 @@ function LSTMGateLayer:update(bp_err, input, output)
end
function LSTMGateLayer:get_params()
- local pr = nerv.ParamRepo({self.bp})
+ local pr = nerv.ParamRepo({self.bp}, self.loc_type)
for i = 1, #self.dim_in do
pr:add(self["ltp" .. i].id, self["ltp" .. i])
end
diff --git a/nerv/layer/mse.lua b/nerv/layer/mse.lua
index 1c218d0..458d086 100644
--- a/nerv/layer/mse.lua
+++ b/nerv/layer/mse.lua
@@ -1,18 +1,14 @@
local MSELayer = nerv.class("nerv.MSELayer", "nerv.Layer")
function MSELayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- self.gconf = global_conf
- if self.gconf.use_cpu then
- self.mat_type = self.gconf.mmat_type
- else
- self.mat_type = self.gconf.cumat_type
- end
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self:check_dim_len(2, -1)
end
+function MSELayer:bind_params()
+ -- do nothing
+end
+
function MSELayer:init(batch_size)
if self.dim_in[1] ~= self.dim_in[2] then
nerv.error("mismatching dimensions of previous network output and labels")
@@ -61,5 +57,5 @@ function MSELayer:back_propagate(bp_err, next_bp_err, input, output)
end
function MSELayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua
new file mode 100644
index 0000000..0b5ccaa
--- /dev/null
+++ b/nerv/layer/rnn.lua
@@ -0,0 +1,42 @@
+local RNNLayer = nerv.class('nerv.RNNLayer', 'nerv.GraphLayer')
+
+function RNNLayer:__init(id, global_conf, layer_conf)
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
+ self:check_dim_len(1, 1)
+
+ if layer_conf.activation == nil then
+ layer_conf.activation = 'nerv.SigmoidLayer'
+ end
+
+ local din = layer_conf.dim_in[1]
+ local dout = layer_conf.dim_out[1]
+
+ local pr = layer_conf.pr
+ if pr == nil then
+ pr = nerv.ParamRepo({}, self.loc_type)
+ end
+
+ local layers = {
+ ['nerv.AffineLayer'] = {
+ main = {dim_in = {din, dout}, dim_out = {dout}, pr = pr},
+ },
+ [layers.activation] = {
+ activation = {dim_in = {dout}, dim_out = {dout}},
+ },
+ ['nerv.DuplicateLayer'] = {
+ duplicate = {dim_in = {dout}, dim_out = {dout, dout}},
+ },
+ }
+
+ local connections = {
+ {'<input>[1]', 'main[1]', 0},
+ {'main[1]', 'activation[1]', 0},
+ {'activation[1]', 'duplicate[1]', 0},
+ {'duplicate[1]', 'main[2]', 1},
+ {'duplicate[2]', '<output>[1]', 0},
+ }
+
+ self:add_prefix(layers, connections)
+ local layer_repo = nerv.LayerRepo(layers, pr, global_conf)
+ self:graph_init(layer_repo, connections)
+end
diff --git a/nerv/layer/sigmoid.lua b/nerv/layer/sigmoid.lua
index 0a8bcdc..5974ffc 100644
--- a/nerv/layer/sigmoid.lua
+++ b/nerv/layer/sigmoid.lua
@@ -1,19 +1,20 @@
local SigmoidLayer = nerv.class("nerv.SigmoidLayer", "nerv.Layer")
function SigmoidLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.gconf = global_conf
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self:check_dim_len(1, 1)
-end
-
-function SigmoidLayer:init()
if self.dim_in[1] ~= self.dim_out[1] then
nerv.error("mismatching dimensions of input and output")
end
end
+function SigmoidLayer:bind_params()
+ -- do nothing
+end
+
+function SigmoidLayer:init()
+end
+
function SigmoidLayer:batch_resize(batch_size)
-- do nothing
end
@@ -31,5 +32,5 @@ function SigmoidLayer:back_propagate(bp_err, next_bp_err, input, output)
end
function SigmoidLayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/softmax.lua b/nerv/layer/softmax.lua
index 4205b66..f7a5163 100644
--- a/nerv/layer/softmax.lua
+++ b/nerv/layer/softmax.lua
@@ -1,13 +1,14 @@
local SoftmaxLayer = nerv.class("nerv.SoftmaxLayer", "nerv.Layer")
function SoftmaxLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.gconf = global_conf
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self:check_dim_len(1, 1) -- two inputs: nn output and label
end
+function SoftmaxLayer:bind_params()
+ -- do nothing
+end
+
function SoftmaxLayer:init(batch_size)
if self.dim_in[1] ~= self.dim_out[1] then
nerv.error("mismatching dimensions of input and output")
@@ -31,5 +32,5 @@ function SoftmaxLayer:back_propagate(bp_err, next_bp_err, input, output)
end
function SoftmaxLayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/softmax_ce.lua b/nerv/layer/softmax_ce.lua
index d7d650e..7b4a80c 100644
--- a/nerv/layer/softmax_ce.lua
+++ b/nerv/layer/softmax_ce.lua
@@ -1,15 +1,7 @@
local SoftmaxCELayer = nerv.class("nerv.SoftmaxCELayer", "nerv.Layer")
function SoftmaxCELayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.gconf = global_conf
- if self.gconf.use_cpu then
- self.mat_type = self.gconf.mmat_type
- else
- self.mat_type = self.gconf.cumat_type
- end
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self.compressed = layer_conf.compressed
if self.compressed == nil then
self.compressed = false
@@ -17,6 +9,10 @@ function SoftmaxCELayer:__init(id, global_conf, layer_conf)
self:check_dim_len(2, -1) -- two inputs: nn output and label
end
+function SoftmaxCELayer:bind_params()
+ -- do nothing
+end
+
function SoftmaxCELayer:init(batch_size, chunk_size)
if not self.compressed and (self.dim_in[1] ~= self.dim_in[2]) then
nerv.error("mismatching dimensions of previous network output and labels")
@@ -94,5 +90,5 @@ function SoftmaxCELayer:back_propagate(bp_err, next_bp_err, input, output, t)
end
function SoftmaxCELayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/tanh.lua b/nerv/layer/tanh.lua
index e1c32f2..7a19fc8 100644
--- a/nerv/layer/tanh.lua
+++ b/nerv/layer/tanh.lua
@@ -1,13 +1,14 @@
local TanhLayer = nerv.class("nerv.TanhLayer", "nerv.Layer")
function TanhLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.gconf = global_conf
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self:check_dim_len(1, 1)
end
+function TanhLayer:bind_params()
+ -- do nothing
+end
+
function TanhLayer:init()
if self.dim_in[1] ~= self.dim_out[1] then
nerv.error("mismatching dimensions of input and output")
@@ -31,5 +32,5 @@ function TanhLayer:back_propagate(bp_err, next_bp_err, input, output)
end
function TanhLayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/window.lua b/nerv/layer/window.lua
index 4933de0..364929f 100644
--- a/nerv/layer/window.lua
+++ b/nerv/layer/window.lua
@@ -1,12 +1,15 @@
local WindowLayer = nerv.class("nerv.WindowLayer", "nerv.Layer")
function WindowLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.gconf = global_conf
- self.window = layer_conf.window
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self:check_dim_len(1, 1)
+ self:bind_params()
+end
+
+function WindowLayer:bind_params()
+ self.window = self:find_param("window", self.lconf, self.gconf,
+ nerv.BiasParam,
+ {1, self.dim_out[1]})
end
function WindowLayer:init()
@@ -28,5 +31,5 @@ function WindowLayer:propagate(input, output)
end
function WindowLayer:get_params()
- return nerv.ParamRepo({self.window})
+ return nerv.ParamRepo({self.window}, self.loc_type)
end