path: root/nerv/layer/init.lua



--- Implements the concept of groups of parameters (`nerv.Param`) and
-- computation nodes (`nerv.Layer`).

--- The class describing a group of parameters (an internal state) that can be
-- bound to layers. This class also implements the *chunk* interface (see
-- `nerv.ChunkFile`) , which means instances of `nerv.Param` can be exported to
-- chunk files as chunks.
-- @type nerv.Param

local Param = nerv.class('nerv.Param')

--- The constructor.
-- @param id the identifier for the group of parameters
-- @param global_conf a table describing the computation state and providing
-- with some global settings

function Param:__init(id, global_conf)
    self.id = id
    self.gconf = global_conf
end

--- Retrieve the metadata of the parameter group. This function implements the
-- *chunk* interface.
-- @return a table containing all metadata

function Param:get_info()
    return self.info
end

--- Set the metadata of the parameter group. This function implements the
-- *chunk* interface.
-- @param info a table containing all metadata

function Param:set_info(info)
    self.info = info
end

--- Read from the given file handle. This function implements the
-- *chunk* interface.
-- @param handle the file handle

function Param:read(handle)
    nerv.error_method_not_implemented()
end

--- Write to the given file handle. This function implements the
-- *chunk* interface.
-- @param handle the file handle

function Param:write(handle)
    nerv.error_method_not_implemented()
end

--- Generate zero.
-- @return zero

function Param.gen_zero()
    return 0
end

--- The class describing a single computation node which calculates from the
-- input ports to the output ports which could be the input of others.
-- @type nerv.Layer

local Layer = nerv.class('nerv.Layer')

--- The constructor. All inheriting classes should call this base constructor to
-- initialize some predefined fields (of `self`):
--
-- * `id`: the identifier of the layer
-- * `gconf`: a table describing the computation state and providing
--   with some global settings
-- * `lconf`: a table providing with settings dedicated for the layer. There
--   are some fields considered to be "standard" and shared by all
--   layers:
--      * `dim_in`: an array of each input port dimension (width) with order
--      * `dim_out`: an array of each output port dimension (width) with order
--      * `params`: optional, a table containing pairs of the manually bound
--        parameter name used by the layer and parameter id used to find the
--        parameter in the parameter repo
--      * `pr`: optional, the parameter repo (see `nerv.ParamRepo`)  to find
--        parameters while binding, used by `nerv.Layer.find_param`
-- * `mat_type`: the type of matrix should be used when storing intermediate
--   results
-- * `loc_type`: a value from `nerv.ParamRepo.LOC_TYPES` indicating whether the
--    storage of `nerv.Param` instances is on host or device RAM
-- * `dim_in`: an array of each input port dimension (width) with order
-- * `dim_out`: an array of each output port dimension (width) with order
--
-- @param id the identifier
-- @param global_conf see `self.gconf`
-- @param layer_conf see `self.lconf`

function Layer:__init(id, global_conf, layer_conf)
    self.id = id
    self.gconf = global_conf
    self.lconf = layer_conf
    if self.gconf.use_cpu then
        self.mat_type = self.gconf.mmat_type
        self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
    else
        self.mat_type = self.gconf.cumat_type
        self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE
    end
    self.dim_in = layer_conf.dim_in
    self.dim_out = layer_conf.dim_out
end

--- Initialize the layer, called for each epoch.

function Layer:init(batch_size)
    nerv.error_method_not_implemented()
end

--- Update (change the state of) the bound (tied) parameter according to the
-- calculation.
-- @param bp_err an array of row-major matrices storing the error
-- back-propagated from the output ports
-- @param input an array of row-major matrices storing the input before the
-- forward propagation
-- @param ouput an array of row-major matrices storing the output after the
-- forward propagation
-- @param t BPTT time `t`

function Layer:update(bp_err, input, output, t)
    nerv.error_method_not_implemented()
end

--- Calculate the values in output ports according to the input.
-- @param input an array of row-major matrices storing the input before the
-- forward propagation
-- @param ouput an array of row-major matrices storing the output after the
-- forward propagation
-- @param t BPTT time `t`

function Layer:propagate(input, output, t)
    nerv.error_method_not_implemented()
end

--- Calculate the next error value (`next_bp_err`) by back-propagation.
-- @param bp_err an array of row-major matrices storing the error
-- back-propagated from the output ports
-- @param next_bp_err an array of row-major matrices storing the next error
-- back-propagated to the input ports
-- @param input an array of row-major matrices storing the input before the
-- forward propagation
-- @param ouput an array of row-major matrices storing the output after the
-- forward propagation
-- @param t BPTT time `t`

function Layer:back_propagate(bp_err, next_bp_err, input, output, t)
    nerv.error_method_not_implemented()
end

--- Check the length of the dimention array. This function is recommended for
-- invocation when checking the ports in your layer implementation.
-- @param len_in the expected number of input ports (-1 if variable)
-- @param len_out the expected number of output ports (-1 if variable)

function Layer:check_dim_len(len_in, len_out)
    local expected_in = #self.dim_in
    local expected_out = #self.dim_out
    if len_in > 0 and expected_in ~= len_in then
        nerv.error("layer %s expects %d inputs, %d given",
                    self.id, len_in, expected_in)
    end
    if len_out > 0 and expected_out ~= len_out then
        nerv.error("layer %s expects %d outputs, %d given",
                    self.id, len_out, expected_out)
    end
end

--- Get all the parameters used by (bound to) the layer.
-- @return a `nerv.ParamRepo` instance storing all the involved parameters

function Layer:get_params()
    nerv.error_method_not_implemented()
end

--- Lookup and bind the parameters in the repo specified in layer settings.
-- This function will lead to the invocation of `find_param` if the layer is
-- implemented appropriately.

function Layer:bind_params()
    nerv.error_method_not_implemented()
end

--- Get two arrays describing the dimension of input and ouput.
-- @return two arrays: `<dim_in>, <dim_out>`

function Layer:get_dim()
    return self.dim_in, self.dim_out
end

--- Set an attribute to the layer.
-- @param name the name of the attribute
-- @param value the value of the attribute

function Layer:set_attr(name, value)
    self[name] = value
end

--- Get the contained (nested) layer inside the layer (useful for complex
-- layers like `nerv.GraphLayer`).
-- @param id the identifier of the nested layer to be find

function Layer:get_sublayer(id)
    nerv.error('primitive layer does not have sublayers')
end

--- Find the parameter according to the layer settings. This function should be
-- used as the only way to locate a parameter in the implementation of
-- `bind_params` (see `nerv.AffineLayer.bind_params`).
-- @param plist the potential names for the parameter in the order of lookup priority
-- @param lconf a table providing with settings dedicated for the layer
-- @param gconf a table describing the computation state and providing with
-- some global settings
-- @param ptype the type for constructing the parameter instance when
-- auto-generation is triggered
-- @param pdim an array specifying each dimension of the parameter when
-- auto-generation is triggered
-- @param pgen the function used to generate values when auto-generation is
-- triggered, `gconf.param_gen` will be used if `nil`

function Layer:find_param(plist, lconf, gconf, ptype, pdim, pgen)

    if type(plist) == "string" then
        plist = {plist}
    end
    if lconf.params == nil then
        lconf.params = {}
    end
    plist_str = table.tostring(plist)
    local pid
    for i, pname in ipairs(plist) do
        if lconf.params[pname] ~= nil then
            nerv.info("param id for [%s] of layer [%s] specified in `layer_conf.params`.", pname, self.id)
            pid = lconf.params[pname]
        end
        if lconf.pr:has_param(pid) then
            return lconf.pr:get_param(pid)
        end
        pid = self.id .. '_' .. pname
        if lconf.pr:has_param(pid) then
            nerv.info("param id for [%s] of layer [%s] is generated automatically.", plist[1], self.id)
            return lconf.pr:get_param(pid)
        end
    end
    pid = self.id .. '_' .. plist[1]
    nerv.info("param id for [%s] of layer [%s] is not found in the specified param repo, " ..
                "switch to auto-generate", plist_str, self.id)
    local p = ptype(pid, gconf)
    p.trans = self.mat_type(unpack(pdim))
    pgen = pgen or gconf.param_gen
                or gconf.param_random -- obsolete name
    if type(pgen) ~= "function" then
        nerv.error("a param generate function is needed")
    end
    p.trans:generate(pgen)
    return p
end

nerv.include('graph.lua')
nerv.include('affine.lua')
nerv.include('sigmoid.lua')
nerv.include('tanh.lua')
nerv.include('softmax_ce.lua')
nerv.include('bias.lua')
nerv.include('window.lua')
nerv.include('mse.lua')
nerv.include('combiner.lua')
nerv.include('softmax.lua')
nerv.include('elem_mul.lua')
nerv.include('lstm.lua')
nerv.include('lstm_gate.lua')
nerv.include('dropout.lua')
nerv.include('gru.lua')
nerv.include('rnn.lua')
nerv.include('duplicate.lua')
nerv.include('identity.lua')
nerv.include('projection.lua')
nerv.include('lstmp.lua')
nerv.include('relu.lua')

-- The following lines are for backward compatibility, and will be removed in
-- the future. The use of these names are deprecated.
nerv.DropoutLayerT = nerv.DropoutLayer
nerv.GRULayerT = nerv.GRULayer
nerv.LSTMLayerT = nerv.LSTMLayer
nerv.SoftmaxCELayerT = nerv.SoftmaxCELayer
--- Implements the concept of groups of parameters (`nerv.Param`) and
-- computation nodes (`nerv.Layer`).

--- The class describing a group of parameters (an internal state) that can be
-- bound to layers. This class also implements the *chunk* interface (see
-- `nerv.ChunkFile`) , which means instances of `nerv.Param` can be exported to
-- chunk files as chunks.
-- @type nerv.Param

local Param = nerv.class('nerv.Param')

--- The constructor.
-- @param id the identifier for the group of parameters
-- @param global_conf a table describing the computation state and providing
-- with some global settings

function Param:__init(id, global_conf)
    self.id = id
    self.gconf = global_conf
end

--- Retrieve the metadata of the parameter group. This function implements the
-- *chunk* interface.
-- @return a table containing all metadata

function Param:get_info()
    return self.info
end

--- Set the metadata of the parameter group. This function implements the
-- *chunk* interface.
-- @param info a table containing all metadata

function Param:set_info(info)
    self.info = info
end

--- Read from the given file handle. This function implements the
-- *chunk* interface.
-- @param handle the file handle

function Param:read(handle)
    nerv.error_method_not_implemented()
end

--- Write to the given file handle. This function implements the
-- *chunk* interface.
-- @param handle the file handle

function Param:write(handle)
    nerv.error_method_not_implemented()
end

--- Generate zero.
-- @return zero

function Param.gen_zero()
    return 0
end

--- The class describing a single computation node which calculates from the
-- input ports to the output ports which could be the input of others.
-- @type nerv.Layer

local Layer = nerv.class('nerv.Layer')

--- The constructor. All inheriting classes should call this base constructor to
-- initialize some predefined fields (of `self`):
--
-- * `id`: the identifier of the layer
-- * `gconf`: a table describing the computation state and providing
--   with some global settings
-- * `lconf`: a table providing with settings dedicated for the layer. There
--   are some fields considered to be "standard" and shared by all
--   layers:
--      * `dim_in`: an array of each input port dimension (width) with order
--      * `dim_out`: an array of each output port dimension (width) with order
--      * `params`: optional, a table containing pairs of the manually bound
--        parameter name used by the layer and parameter id used to find the
--        parameter in the parameter repo
--      * `pr`: optional, the parameter repo (see `nerv.ParamRepo`)  to find
--        parameters while binding, used by `nerv.Layer.find_param`
-- * `mat_type`: the type of matrix should be used when storing intermediate
--   results
-- * `loc_type`: a value from `nerv.ParamRepo.LOC_TYPES` indicating whether the
--    storage of `nerv.Param` instances is on host or device RAM
-- * `dim_in`: an array of each input port dimension (width) with order
-- * `dim_out`: an array of each output port dimension (width) with order
--
-- @param id the identifier
-- @param global_conf see `self.gconf`
-- @param layer_conf see `self.lconf`

function Layer:__init(id, global_conf, layer_conf)
    self.id = id
    self.gconf = global_conf
    self.lconf = layer_conf
    if self.gconf.use_cpu then
        self.mat_type = self.gconf.mmat_type
        self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
    else
        self.mat_type = self.gconf.cumat_type
        self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE
    end
    self.dim_in = layer_conf.dim_in
    self.dim_out = layer_conf.dim_out
end

--- Initialize the layer, called for each epoch.

function Layer:init(batch_size)
    nerv.error_method_not_implemented()
end

--- Update (change the state of) the bound (tied) parameter according to the
-- calculation.
-- @param bp_err an array of row-major matrices storing the error
-- back-propagated from the output ports
-- @param input an array of row-major matrices storing the input before the
-- forward propagation
-- @param ouput an array of row-major matrices storing the output after the
-- forward propagation
-- @param t BPTT time `t`

function Layer:update(bp_err, input, output, t)
    nerv.error_method_not_implemented()
end

--- Calculate the values in output ports according to the input.
-- @param input an array of row-major matrices storing the input before the
-- forward propagation
-- @param ouput an array of row-major matrices storing the output after the
-- forward propagation
-- @param t BPTT time `t`

function Layer:propagate(input, output, t)
    nerv.error_method_not_implemented()
end

--- Calculate the next error value (`next_bp_err`) by back-propagation.
-- @param bp_err an array of row-major matrices storing the error
-- back-propagated from the output ports
-- @param next_bp_err an array of row-major matrices storing the next error
-- back-propagated to the input ports
-- @param input an array of row-major matrices storing the input before the
-- forward propagation
-- @param ouput an array of row-major matrices storing the output after the
-- forward propagation
-- @param t BPTT time `t`

function Layer:back_propagate(bp_err, next_bp_err, input, output, t)
    nerv.error_method_not_implemented()
end

--- Check the length of the dimention array. This function is recommended for
-- invocation when checking the ports in your layer implementation.
-- @param len_in the expected number of input ports (-1 if variable)
-- @param len_out the expected number of output ports (-1 if variable)

function Layer:check_dim_len(len_in, len_out)
    local expected_in = #self.dim_in
    local expected_out = #self.dim_out
    if len_in > 0 and expected_in ~= len_in then
        nerv.error("layer %s expects %d inputs, %d given",
                    self.id, len_in, expected_in)
    end
    if len_out > 0 and expected_out ~= len_out then
        nerv.error("layer %s expects %d outputs, %d given",
                    self.id, len_out, expected_out)
    end
end

--- Get all the parameters used by (bound to) the layer.
-- @return a `nerv.ParamRepo` instance storing all the involved parameters

function Layer:get_params()
    nerv.error_method_not_implemented()
end

--- Lookup and bind the parameters in the repo specified in layer settings.
-- This function will lead to the invocation of `find_param` if the layer is
-- implemented appropriately.

function Layer:bind_params()
    nerv.error_method_not_implemented()
end

--- Get two arrays describing the dimension of input and ouput.
-- @return two arrays: `<dim_in>, <dim_out>`

function Layer:get_dim()
    return self.dim_in, self.dim_out
end

--- Set an attribute to the layer.
-- @param name the name of the attribute
-- @param value the value of the attribute

function Layer:set_attr(name, value)
    self[name] = value
end

--- Get the contained (nested) layer inside the layer (useful for complex
-- layers like `nerv.GraphLayer`).
-- @param id the identifier of the nested layer to be find

function Layer:get_sublayer(id)
    nerv.error('primitive layer does not have sublayers')
end

--- Find the parameter according to the layer settings. This function should be
-- used as the only way to locate a parameter in the implementation of
-- `bind_params` (see `nerv.AffineLayer.bind_params`).
-- @param plist the potential names for the parameter in the order of lookup priority
-- @param lconf a table providing with settings dedicated for the layer
-- @param gconf a table describing the computation state and providing with
-- some global settings
-- @param ptype the type for constructing the parameter instance when
-- auto-generation is triggered
-- @param pdim an array specifying each dimension of the parameter when
-- auto-generation is triggered
-- @param pgen the function used to generate values when auto-generation is
-- triggered, `gconf.param_gen` will be used if `nil`

function Layer:find_param(plist, lconf, gconf, ptype, pdim, pgen)

    if type(plist) == "string" then
        plist = {plist}
    end
    if lconf.params == nil then
        lconf.params = {}
    end
    plist_str = table.tostring(plist)
    local pid
    for i, pname in ipairs(plist) do
        if lconf.params[pname] ~= nil then
            nerv.info("param id for [%s] of layer [%s] specified in `layer_conf.params`.", pname, self.id)
            pid = lconf.params[pname]
        end
        if lconf.pr:has_param(pid) then
            return lconf.pr:get_param(pid)
        end
        pid = self.id .. '_' .. pname
        if lconf.pr:has_param(pid) then
            nerv.info("param id for [%s] of layer [%s] is generated automatically.", plist[1], self.id)
            return lconf.pr:get_param(pid)
        end
    end
    pid = self.id .. '_' .. plist[1]
    nerv.info("param id for [%s] of layer [%s] is not found in the specified param repo, " ..
                "switch to auto-generate", plist_str, self.id)
    local p = ptype(pid, gconf)
    p.trans = self.mat_type(unpack(pdim))
    pgen = pgen or gconf.param_gen
                or gconf.param_random -- obsolete name
    if type(pgen) ~= "function" then
        nerv.error("a param generate function is needed")
    end
    p.trans:generate(pgen)
    return p
end

nerv.include('graph.lua')
nerv.include('affine.lua')
nerv.include('sigmoid.lua')
nerv.include('tanh.lua')
nerv.include('softmax_ce.lua')
nerv.include('bias.lua')
nerv.include('window.lua')
nerv.include('mse.lua')
nerv.include('combiner.lua')
nerv.include('softmax.lua')
nerv.include('elem_mul.lua')
nerv.include('lstm.lua')
nerv.include('lstm_gate.lua')
nerv.include('dropout.lua')
nerv.include('gru.lua')
nerv.include('rnn.lua')
nerv.include('duplicate.lua')
nerv.include('identity.lua')
nerv.include('projection.lua')
nerv.include('lstmp.lua')
nerv.include('relu.lua')

-- The following lines are for backward compatibility, and will be removed in
-- the future. The use of these names are deprecated.
nerv.DropoutLayerT = nerv.DropoutLayer
nerv.GRULayerT = nerv.GRULayer
nerv.LSTMLayerT = nerv.LSTMLayer
nerv.SoftmaxCELayerT = nerv.SoftmaxCELayer