--- Implements the concept of groups of parameters (`nerv.Param`) and
-- computation nodes (`nerv.Layer`).
--- The class describing a group of parameters (an internal state) that can be
-- bound to layers. This class also implements the *chunk* interface (see
-- `nerv.ChunkFile`) , which means instances of `nerv.Param` can be exported to
-- chunk files as chunks.
-- @type nerv.Param
local Param = nerv.class('nerv.Param')
--- The constructor.
-- @param id the identifier for the group of parameters
-- @param global_conf a table describing the computation state and providing
-- with some global settings
function Param:__init(id, global_conf)
self.id = id
self.gconf = global_conf
end
--- Retrieve the metadata of the parameter group. This function implements the
-- *chunk* interface.
-- @return a table containing all metadata
function Param:get_info()
return self.info
end
--- Set the metadata of the parameter group. This function implements the
-- *chunk* interface.
-- @param info a table containing all metadata
function Param:set_info(info)
self.info = info
end
--- Read from the given file handle. This function implements the
-- *chunk* interface.
-- @param handle the file handle
function Param:read(handle)
nerv.error_method_not_implemented()
end
--- Write to the given file handle. This function implements the
-- *chunk* interface.
-- @param handle the file handle
function Param:write(handle)
nerv.error_method_not_implemented()
end
--- Generate zero.
-- @return zero
function Param.gen_zero()
return 0
end
--- The class describing a single computation node which calculates from the
-- input ports to the output ports which could be the input of others.
-- @type nerv.Layer
local Layer = nerv.class('nerv.Layer')
--- The constructor. All inheriting classes should call this base constructor to
-- initialize some predefined fields (of `self`):
--
-- * `id`: the identifier of the layer
-- * `gconf`: a table describing the computation state and providing
-- with some global settings
-- * `lconf`: a table providing with settings dedicated for the layer. There
-- are some fields considered to be "standard" and shared by all
-- layers:
-- * `dim_in`: an array of each input port dimension (width) with order
-- * `dim_out`: an array of each output port dimension (width) with order
-- * `params`: optional, a table containing pairs of the manually bound
-- parameter name used by the layer and parameter id used to find the
-- parameter in the parameter repo
-- * `pr`: optional, the parameter repo (see `nerv.ParamRepo`) to find
-- parameters while binding, used by `nerv.Layer.find_param`
-- * `mat_type`: the type of matrix should be used when storing intermediate
-- results
-- * `loc_type`: a value from `nerv.ParamRepo.LOC_TYPES` indicating whether the
-- storage of `nerv.Param` instances is on host or device RAM
-- * `dim_in`: an array of each input port dimension (width) with order
-- * `dim_out`: an array of each output port dimension (width) with order
--
-- @param id the identifier
-- @param global_conf see `self.gconf`
-- @param layer_conf see `self.lconf`
function Layer:__init(id, global_conf, layer_conf)
self.id = id
self.gconf = global_conf
self.lconf = layer_conf
if self.gconf.use_cpu then
self.mat_type = self.gconf.mmat_type
self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
else
self.mat_type = self.gconf.cumat_type
self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE
end
self.dim_in = layer_conf.dim_in
self.dim_out = layer_conf.dim_out
end
--- Initialize the layer, called for each epoch.
function Layer:init(batch_size)
nerv.error_method_not_implemented()
end
--- Update (change the state of) the bound (tied) parameter according to the
-- calculation.
-- @param bp_err an array of row-major matrices storing the error
-- back-propagated from the output ports
-- @param input an array of row-major matrices storing the input before the
-- forward propagation
-- @param ouput an array of row-major matrices storing the output after the
-- forward propagation
-- @param t BPTT time `t`
function Layer:update(bp_err, input, output, t)
nerv.error_method_not_implemented()
end
--- Calculate the values in output ports according to the input.
-- @param input an array of row-major matrices storing the input before the
-- forward propagation
-- @param ouput an array of row-major matrices storing the output after the
-- forward propagation
-- @param t BPTT time `t`
function Layer:propagate(input, output, t)
nerv.error_method_not_implemented()
end
--- Calculate the next error value (`next_bp_err`) by back-propagation.
-- @param bp_err an array of row-major matrices storing the error
-- back-propagated from the output ports
-- @param next_bp_err an array of row-major matrices storing the next error
-- back-propagated to the input ports
-- @param input an array of row-major matrices storing the input before the
-- forward propagation
-- @param ouput an array of row-major matrices storing the output after the
-- forward propagation
-- @param t BPTT time `t`
function Layer:back_propagate(bp_err, next_bp_err, input, output, t)
nerv.error_method_not_implemented()
end
--- Check the length of the dimention array. This function is recommended for
-- invocation when checking the ports in your layer implementation.
-- @param len_in the expected number of input ports (-1 if variable)
-- @param len_out the expected number of output ports (-1 if variable)
function Layer:check_dim_len(len_in, len_out)
local expected_in = #self.dim_in
local expected_out = #self.dim_out
if len_in > 0 and expected_in ~= len_in then
nerv.error("layer %s expects %d inputs, %d given",
self.id, len_in, expected_in)
end
if len_out > 0 and expected_out ~= len_out then
nerv.error("layer %s expects %d outputs, %d given",
self.id, len_out, expected_out)
end
end
--- Get all the parameters used by (bound to) the layer.
-- @return a `nerv.ParamRepo` instance storing all the involved parameters
function Layer:get_params()
nerv.error_method_not_implemented()
end
--- Lookup and bind the parameters in the repo specified in layer settings.
-- This function will lead to the invocation of `find_param` if the layer is
-- implemented appropriately.
function Layer:bind_params()
nerv.error_method_not_implemented()
end
--- Get two arrays describing the dimension of input and ouput.
-- @return two arrays: `<dim_in>, <dim_out>`
function Layer:get_dim()
return self.dim_in, self.dim_out
end
--- Set an attribute to the layer.
-- @param name the name of the attribute
-- @param value the value of the attribute
function Layer:set_attr(name, value)
self[name] = value
end
--- Get the contained (nested) layer inside the layer (useful for complex
-- layers like `nerv.GraphLayer`).
-- @param id the identifier of the nested layer to be find
function Layer:get_sublayer(id)
nerv.error('primitive layer does not have sublayers')
end
--- Find the parameter according to the layer settings. This function should be
-- used as the only way to locate a parameter in the implementation of
-- `bind_params` (see `nerv.AffineLayer.bind_params`).
-- @param plist the potential names for the parameter in the order of lookup priority
-- @param lconf a table providing with settings dedicated for the layer
-- @param gconf a table describing the computation state and providing with
-- some global settings
-- @param ptype the type for constructing the parameter instance when
-- auto-generation is triggered
-- @param pdim an array specifying each dimension of the parameter when
-- auto-generation is triggered
-- @param pgen the function used to generate values when auto-generation is
-- triggered, `gconf.param_gen` will be used if `nil`
function Layer:find_param(plist, lconf, gconf, ptype, pdim, pgen)
if type(plist) == "string" then
plist = {plist}
end
if lconf.params == nil then
lconf.params = {}
end
plist_str = table.tostring(plist)
local pid
for i, pname in ipairs(plist) do
if lconf.params[pname] ~= nil then
nerv.info("param id for [%s] of layer [%s] specified in `layer_conf.params`.", pname, self.id)
pid = lconf.params[pname]
end
if lconf.pr:has_param(pid) then
return lconf.pr:get_param(pid)
end
pid = self.id .. '_' .. pname
if lconf.pr:has_param(pid) then
nerv.info("param id for [%s] of layer [%s] is generated automatically.", plist[1], self.id)
return lconf.pr:get_param(pid)
end
end
pid = self.id .. '_' .. plist[1]
nerv.info("param id for [%s] of layer [%s] is not found in the specified param repo, " ..
"switch to auto-generate", plist_str, self.id)
local p = ptype(pid, gconf)
p.trans = self.mat_type(unpack(pdim))
pgen = pgen or gconf.param_gen
or gconf.param_random -- obsolete name
if type(pgen) ~= "function" then
nerv.error("a param generate function is needed")
end
p.trans:generate(pgen)
return p
end
nerv.include('graph.lua')
nerv.include('affine.lua')
nerv.include('sigmoid.lua')
nerv.include('tanh.lua')
nerv.include('softmax_ce.lua')
nerv.include('bias.lua')
nerv.include('window.lua')
nerv.include('mse.lua')
nerv.include('combiner.lua')
nerv.include('softmax.lua')
nerv.include('elem_mul.lua')
nerv.include('lstm.lua')
nerv.include('lstm_gate.lua')
nerv.include('dropout.lua')
nerv.include('gru.lua')
nerv.include('rnn.lua')
nerv.include('duplicate.lua')
nerv.include('identity.lua')
nerv.include('projection.lua')
nerv.include('lstmp.lua')
nerv.include('relu.lua')
-- The following lines are for backward compatibility, and will be removed in
-- the future. The use of these names are deprecated.
nerv.DropoutLayerT = nerv.DropoutLayer
nerv.GRULayerT = nerv.GRULayer
nerv.LSTMLayerT = nerv.LSTMLayer
nerv.SoftmaxCELayerT = nerv.SoftmaxCELayer