diff options
-rw-r--r-- | TODO.rst | 1 | ||||
-rw-r--r-- | nerv/config.ld | 1 | ||||
-rw-r--r-- | nerv/init.lua | 19 | ||||
-rw-r--r-- | nerv/io/frm_buffer.lua | 4 | ||||
-rw-r--r-- | nerv/io/init.lua | 4 | ||||
-rw-r--r-- | nerv/io/seq_buffer.lua | 2 | ||||
-rw-r--r-- | nerv/layer/affine.lua | 39 | ||||
-rw-r--r-- | nerv/layer/graph.lua | 63 | ||||
-rw-r--r-- | nerv/layer/init.lua | 156 | ||||
-rw-r--r-- | nerv/matrix/init.lua | 26 | ||||
-rw-r--r-- | nerv/nn/network.lua | 13 |
11 files changed, 275 insertions, 53 deletions
@@ -4,3 +4,4 @@ TODO List - NERV user manual - NERV overview and introduction - C header file dependency detection in Makefiles +- remove layer ``batch_resize`` API? diff --git a/nerv/config.ld b/nerv/config.ld index 4ed5d6a..c84e429 100644 --- a/nerv/config.ld +++ b/nerv/config.ld @@ -8,3 +8,4 @@ style = '!pale' format = 'markdown' kind_names = {topic = 'Manual', script = 'Programs'} backtick_references = false +sort_modules = true diff --git a/nerv/init.lua b/nerv/init.lua index ba6a08d..86494e3 100644 --- a/nerv/init.lua +++ b/nerv/init.lua @@ -1,13 +1,11 @@ ---- NERV: a Lua-based toolkit for high-performance deep learning. --- This file contains misc utility functions of NERV and finally initializes --- NERV by including `init.lua` of other basic modules. +--- Contains misc utility functions of NERV and finally initializes NERV by +-- including `init.lua` of other basic modules. -- @author Ted Yin <[email protected]> -- @module nerv require 'libnerv' ---- Dummy function. --- Display a friendly error message when user attempts to invoke a +--- Display a friendly error message when user attempts to invoke a -- non-implemented function. function nerv.error_method_not_implemented() nerv.error("method not implemented"); @@ -185,7 +183,7 @@ function nerv.include(filename) return dofile(nerv.dirname(caller) .. filename) end ---- Parse the command-line options and arguments +--- Parse the command-line options and arguments. -- @param argv the argrument list to parsed -- @param options The specification of options, should be a list of tables, -- each one for exactly one available option, say `v`, with `v[1]`, `v[2]`, @@ -195,9 +193,10 @@ end -- value and description of the option. -- -- An example of specification: --- ```{{"aaa", "a", "boolean", default = false, desc = "an option called aaa"}, --- {"bbb", "b", "boolean", default = true, desc = "bbb is set to be true if --bbb=no does not present"}, --- {"ccc", nil, "int", default = 0, desc = "ccc expects an integeral value"}}``` +-- +-- {{"aaa", "a", "boolean", default = false, desc = "an option called aaa"}, +-- {"bbb", "b", "boolean", default = true, desc = "bbb is set to be true if --bbb=no does not present"}, +-- {"ccc", nil, "int", default = 0, desc = "ccc expects an integeral value"}} -- -- @return args, opts The non-option arguments and parsed options. `opts` is -- again a list of tables, each of which corresponds to one table in parameter @@ -311,7 +310,7 @@ function nerv.parse_args(argv, options, unordered) return args, opts end ---- Print usage information of the command-line options +--- Print usage information of the command-line options. -- @param options the list of options used in `parse_args` function nerv.print_usage(options) local full_maxlen = 0 diff --git a/nerv/io/frm_buffer.lua b/nerv/io/frm_buffer.lua index 45f73a0..06bea0b 100644 --- a/nerv/io/frm_buffer.lua +++ b/nerv/io/frm_buffer.lua @@ -3,7 +3,7 @@ -- @author Ted Yin <[email protected]> --- The class for a frame-level chopped and shuffled buffer --- which shall be used for acyclic feed forward NNs +-- which shall be used for acyclic feed forward NNs. -- @type nerv.FrmBuffer local FrmBuffer = nerv.class("nerv.FrmBuffer", "nerv.DataBuffer") @@ -150,7 +150,7 @@ function FrmBuffer:saturate() end --- Get a batch group from the buffer. --- See `nerv.DataBuffer` for reference +-- See `nerv.DataBuffer` for reference. function FrmBuffer:get_data() local batch_size = self.batch_size diff --git a/nerv/io/init.lua b/nerv/io/init.lua index 4ebbabf..f0a3d52 100644 --- a/nerv/io/init.lua +++ b/nerv/io/init.lua @@ -11,7 +11,7 @@ -- * `write(handle)`: define how to write to a file handle -- * `get_info()`: return a table of chunk metadata -- * `set_info(info)`: set the metadata of the chunk --- * for more information, please refer to `nerv.MatrixParam` as an example +-- * for more information, please refer to `nerv.MatrixParam` as an example. -- @type nerv.ChunkFile function nerv.ChunkFile:write_chunkdata(metadata, writer) @@ -80,7 +80,7 @@ function DataReader:__init(global_conf, reader_conf) nerv.error_method_not_implemented() end ---- Get a data block from the reader +--- Get a data block from the reader. -- @return a table which maps data slot identifiers to data matrices. A data -- slot identifier is a unique string naming one slot of data. Each identifier -- maps to a matrix containing the data. (`{<slot_id> = <data matrix>, ...}`) It diff --git a/nerv/io/seq_buffer.lua b/nerv/io/seq_buffer.lua index 65df617..8cde1b3 100644 --- a/nerv/io/seq_buffer.lua +++ b/nerv/io/seq_buffer.lua @@ -116,7 +116,7 @@ function SeqBuffer:saturate(batch) end --- Get a batch group from the buffer. --- See `nerv.DataBuffer` for reference +-- See `nerv.DataBuffer` for reference. function SeqBuffer:get_data() local has_data = false diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index a1c92b1..2dd2dc0 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -1,24 +1,38 @@ ---- Parameter and layer classes related to linear transform. +--- Contains parameter and layer classes related to linear (or affine) +-- transform. + +--- The class for linear transform parameter. +-- @type nerv.LinearTransParam -local MatrixParam = nerv.class('nerv.MatrixParam', 'nerv.Param') local LinearTransParam = nerv.class('nerv.LinearTransParam', 'nerv.MatrixParam') + +--- The class for bias parameter (currently implemented as a one-row matrix). +-- @type nerv.BiasParam + local BiasParam = nerv.class('nerv.BiasParam', 'nerv.MatrixParam') -local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer') ---- A parameter that consists of a single matrix +--- The class for all matrix-based parameters. The class has a single matrix +-- which can be accessed by `self.trans`. -- @type nerv.MatrixParam +local MatrixParam = nerv.class('nerv.MatrixParam', 'nerv.Param') + +--- Check the storage location of the contained matrix. This function is +-- required by `nerv.ParamRepo`. +-- @param checker the callback function for checking function MatrixParam:check(checker) -- check trans matrix type checker(self.trans) end ---- Read from a file handle. +--- Read from a file handle. See `nerv.Param.read`. -- @param handle the file handle function MatrixParam:read(handle) self.trans = self.gconf.mmat_type.load(handle) end +--- Write to a file handle. See `nerv.Param.write`. +-- @param handle the file handle function MatrixParam:write(handle) self.trans:save(handle) end @@ -69,10 +83,23 @@ function MatrixParam:update_by_err_input() self:_update(l2, l2) end ---- A fully-connected linear transform layer. +--- The affine layer that does the calculation Wx + b, also known as fully +-- connected linear transform layer. -- @type nerv.AffineLayer +local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer') + --- The constructor. +-- @param id the identifier +-- @param global_conf see `self.gconf` of `nerv.Layer.__init` +-- @param layer_conf a table providing with settings dedicated for the layer, +-- for `layer_conf` fields that are shared by all layers, see +-- `nerv.Layer.__init`. The affine layer requires parameters to be bound, the +-- following parameter names will be looked up while binding: +-- +-- * `ltp`: the linear transformation parameter, also known as the weight matrix, W in Wx + b +-- * `bp`: the bias parameter, also known as the bias matrix, b in Wx + b + function AffineLayer:__init(id, global_conf, layer_conf) nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua index 5790f95..5b5d4c7 100644 --- a/nerv/layer/graph.lua +++ b/nerv/layer/graph.lua @@ -1,5 +1,68 @@ +--- Implements a special kind of layers having an internal structure, a +-- directed graph of connected sub-level layers. + +--- The class describing the concept of a graph layer having an internal +-- structure, a directed graph of connected sub-level layers. Some of these +-- sub-level layers can again be graph layers, thus, it enables nested and +-- recursive layer declaration. The graph layer can be regarded as a container of +-- its sub-level layers. A layer other than a graph layer is also referenced as +-- "*primitive layer*". +-- @type nerv.GraphLayer + local GraphLayer = nerv.class('nerv.GraphLayer', 'nerv.Layer') +--- The constructor. +-- @param id the identifier +-- @param global_conf see `self.gconf` of `nerv.Layer.__init` +-- @param layer_conf a table providing with settings dedicated for the layer, +-- the following fields should be specified: +-- +-- * `lrepo`: the layer repo that should be used to find the sub-level layers +-- * `connections`: an array of 3-tuples describing the connections of +-- sub-level layers, the structure is as follow: +-- +-- { +-- {<from_port1>, <to_port1>, <time_shift1>}, -- tuple 1 +-- {<from_port2>, <to_port2>, <time_shift2>}, -- tuple 2 +-- {<from_port3>, <to_port3>, <time_shift3>}, -- tuple 3 +-- ... +-- } +-- Each tuple stands for a directed edge between two ports. The first two +-- elements in the tuple are called *port specification* which is a string +-- with the following format: +-- +-- <layer_id>[<port_idx>] +-- where the `<layer_id>` is a string that identifies the layer in +-- `lconf.lrepo`, and `<port_id>` is the input or output port index when used +-- in the first or second port specification respectively. +-- +-- The third element in the tuple is an integer specifying the time delay of +-- this connection. In most cases, it will be simply zero. But for an +-- recurrent network, a positive value `i` means the output from `<from_port>` +-- will be used as the input to `<to_port>` in `i`th computation of the future. +-- Negative values are also allowed to propagate the output to the past. +-- +-- Note that there are two possible strings of `<layer_id>` that have special +-- meanings: the string `"<input>"` and `"<output>"` are placeholders of the +-- the input and output ports of the outer graph layer. The input for the graph +-- layer as a whole can be used by establishing connections from +-- `"<input>[i]"`, and vice versa for the output. +-- +-- As an example, tuples: +-- +-- { +-- {"<input>[1]", "affine0[1]", 0}, +-- {"affine0[1]", "sigmoid0[1]", 0}, +-- {"sigmoid0[1]", "affine1[1]", 0}, +-- {"affine1[1]", "<output>[1]", 0} +-- } +-- Specify a graph layer that contains two stacked and fully connected linear +-- transformation sub-level layers. +-- +-- * `reversed`: optional, reverse the time shifting of all connections if true +-- +-- For other `layer_conf` fields that are shared by all layers, see `nerv.Layer.__init`. + function GraphLayer:__init(id, global_conf, layer_conf) nerv.Layer.__init(self, id, global_conf, layer_conf) self.lrepo = layer_conf.layer_repo diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index c893df3..7521b7a 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -1,38 +1,96 @@ --- The following methods must be implemented to let a layer work properly +--- Implements the concept of groups of parameters (`nerv.Param`) and +-- computation nodes (`nerv.Layer`). + +--- The class describing a group of parameters (an internal state) that can be +-- bound to layers. This class also implements the *chunk* interface (see +-- `nerv.ChunkFile`) , which means instances of `nerv.Param` can be exported to +-- chunk files as chunks. +-- @type nerv.Param local Param = nerv.class('nerv.Param') +--- The constructor. +-- @param id the identifier for the group of parameters +-- @param global_conf a table describing the computation state and providing +-- with some global settings + function Param:__init(id, global_conf) self.id = id self.gconf = global_conf end +--- Retrieve the metadata of the parameter group. This function implements the +-- *chunk* interface. +-- @return a table containing all metadata + function Param:get_info() return self.info end +--- Set the metadata of the parameter group. This function implements the +-- *chunk* interface. +-- @param info a table containing all metadata + function Param:set_info(info) self.info = info end +--- Read from the given file handle. This function implements the +-- *chunk* interface. +-- @param handle the file handle + function Param:read(handle) nerv.error_method_not_implemented() end +--- Write to the given file handle. This function implements the +-- *chunk* interface. +-- @param handle the file handle + function Param:write(handle) nerv.error_method_not_implemented() end -function Param:update(gradient) - nerv.error_method_not_implemented() -end +--- Generate zero. +-- @return zero -function Param:gen_zero() +function Param.gen_zero() return 0 end +--- The class describing a single computation node which calculates from the +-- input ports to the output ports which could be the input of others. +-- @type nerv.Layer + local Layer = nerv.class('nerv.Layer') +--- The constructor. All inheriting classes should call this base constructor to +-- initialize some predefined fields (of `self`): +-- +-- * `id`: the identifier of the layer +-- * `gconf`: a table describing the computation state and providing +-- with some global settings +-- * `lconf`: a table providing with settings dedicated for the layer. There +-- are some fields considered to be "standard" and shared by all +-- layers: +-- * `dim_in`: an array of each input port dimension (width) with order +-- * `dim_out`: an array of each output port dimension (width) with order +-- * `params`: optional, a table containing pairs of the manually bound +-- parameter name used by the layer and parameter id used to find the +-- parameter in the parameter repo +-- * `pr`: optional, the parameter repo (see `nerv.ParamRepo`) to find +-- parameters while binding, used by `nerv.Layer.find_param` +-- * `mat_type`: the type of matrix should be used when storing intermediate +-- results +-- * `loc_type`: a value from `nerv.ParamRepo.LOC_TYPES` indicating whether the +-- storage of `nerv.Param` instances is on host or device RAM +-- * `dim_in`: an array of each input port dimension (width) with order +-- * `dim_out`: an array of each output port dimension (width) with order +-- +-- @param id the identifier +-- @param global_conf see `self.gconf` +-- @param layer_conf see `self.lconf` + function Layer:__init(id, global_conf, layer_conf) self.id = id self.gconf = global_conf @@ -48,22 +106,57 @@ function Layer:__init(id, global_conf, layer_conf) self.dim_out = layer_conf.dim_out end +--- Initialize the layer, called for each epoch. + function Layer:init(batch_size) nerv.error_method_not_implemented() end -function Layer:update(bp_err, input, output) +--- Update (change the state of) the bound (tied) parameter according to the +-- calculation. +-- @param bp_err an array of row-major matrices storing the error +-- back-propagated from the output ports +-- @param input an array of row-major matrices storing the input before the +-- forward propagation +-- @param ouput an array of row-major matrices storing the output after the +-- forward propagation +-- @param t BPTT time `t` + +function Layer:update(bp_err, input, output, t) nerv.error_method_not_implemented() end -function Layer:propagate(input, output) +--- Calculate the values in output ports according to the input. +-- @param input an array of row-major matrices storing the input before the +-- forward propagation +-- @param ouput an array of row-major matrices storing the output after the +-- forward propagation +-- @param t BPTT time `t` + +function Layer:propagate(input, output, t) nerv.error_method_not_implemented() end -function Layer:back_propagate(bp_err, next_bp_err, input, output) +--- Calculate the next error value (`next_bp_err`) by back-propagation. +-- @param bp_err an array of row-major matrices storing the error +-- back-propagated from the output ports +-- @param next_bp_err an array of row-major matrices storing the next error +-- back-propagated to the input ports +-- @param input an array of row-major matrices storing the input before the +-- forward propagation +-- @param ouput an array of row-major matrices storing the output after the +-- forward propagation +-- @param t BPTT time `t` + +function Layer:back_propagate(bp_err, next_bp_err, input, output, t) nerv.error_method_not_implemented() end +--- Check the length of the dimention array. This function is recommended for +-- invocation when checking the ports in your layer implementation. +-- @param len_in the expected number of input ports (-1 if variable) +-- @param len_out the expected number of output ports (-1 if variable) + function Layer:check_dim_len(len_in, len_out) local expected_in = #self.dim_in local expected_out = #self.dim_out @@ -77,27 +170,60 @@ function Layer:check_dim_len(len_in, len_out) end end +--- Get all the parameters used by (bound to) the layer. +-- @return a `nerv.ParamRepo` instance storing all the involved parameters + function Layer:get_params() nerv.error_method_not_implemented() end +--- Lookup and bind the parameters in the repo specified in layer settings. +-- This function will lead to the invocation of `find_param` if the layer is +-- implemented appropriately. + function Layer:bind_params() nerv.error_method_not_implemented() end +--- Get two arrays describing the dimension of input and ouput. +-- @return two arrays: `<dim_in>, <dim_out>` + function Layer:get_dim() return self.dim_in, self.dim_out end +--- Set an attribute to the layer. +-- @param name the name of the attribute +-- @param value the value of the attribute + function Layer:set_attr(name, value) self[name] = value end +--- Get the contained (nested) layer inside the layer (useful for complex +-- layers like `nerv.GraphLayer`). +-- @param id the identifier of the nested layer to be find + function Layer:get_sublayer(id) nerv.error('primitive layer does not have sublayers') end -function Layer:find_param(plist, lconf, gconf, p_type, p_dim, p_gen) +--- Find the parameter according to the layer settings. This function should be +-- used as the only way to locate a parameter in the implementation of +-- `bind_params` (see `nerv.AffineLayer.bind_params`). +-- @param plist the potential names for the parameter in the order of lookup priority +-- @param lconf a table providing with settings dedicated for the layer +-- @param gconf a table describing the computation state and providing with +-- some global settings +-- @param ptype the type for constructing the parameter instance when +-- auto-generation is triggered +-- @param pdim an array specifying each dimension of the parameter when +-- auto-generation is triggered +-- @param pgen the function used to generate values when auto-generation is +-- triggered, `gconf.param_gen` will be used if `nil` + +function Layer:find_param(plist, lconf, gconf, ptype, pdim, pgen) + if type(plist) == "string" then plist = {plist} end @@ -123,14 +249,14 @@ function Layer:find_param(plist, lconf, gconf, p_type, p_dim, p_gen) pid = self.id .. '_' .. plist[1] nerv.info("param id for [%s] of layer [%s] is not found in the specified param repo, " .. "switch to auto-generate", plist_str, self.id) - local p = p_type(pid, gconf) - p.trans = self.mat_type(unpack(p_dim)) - p_gen = p_gen or gconf.param_gen - or gconf.param_random -- obsolete name - if type(p_gen) ~= "function" then + local p = ptype(pid, gconf) + p.trans = self.mat_type(unpack(pdim)) + pgen = pgen or gconf.param_gen + or gconf.param_random -- obsolete name + if type(pgen) ~= "function" then nerv.error("a param generate function is needed") end - p.trans:generate(p_gen) + p.trans:generate(pgen) return p end diff --git a/nerv/matrix/init.lua b/nerv/matrix/init.lua index 5d0544a..b84d735 100644 --- a/nerv/matrix/init.lua +++ b/nerv/matrix/init.lua @@ -102,21 +102,21 @@ function nerv.Matrix:__mul__(b) return c end ---- A wrapper function for `copy_from` +--- A wrapper function for `copy_from`. function nerv.Matrix:copy_to(b, ...) b:copy_from(self, ...) end ---- The base class for all device (in-GPU) matrices +--- The base class for all device (in-GPU) matrices. -- @type nerv.CuMatrix ---- A wrapper function for `copy_fromd` +--- A wrapper function for `copy_fromd`. nerv.CuMatrix.copy_tod = nerv.Matrix.copy_to ---- CUDA float matrices +--- CUDA float matrices. -- @type nerv.CuMatrixFloat ---- Create a CUDA matrix copy of the host matrix (in memory) +--- Create a CUDA matrix copy of the host matrix (in memory). -- @param mat the host matrix function nerv.CuMatrixFloat.new_from_host(mat) local res = nerv.CuMatrixFloat(mat:nrow(), mat:ncol()) @@ -124,17 +124,17 @@ function nerv.CuMatrixFloat.new_from_host(mat) return res end ---- Create a host matrix copy of the CUDA matrix +--- Create a host matrix copy of the CUDA matrix. function nerv.CuMatrixFloat:new_to_host() local res = nerv.MMatrixFloat(self:nrow(), self:ncol()) self:copy_toh(res) return res end ---- CUDA double matrices +--- CUDA double matrices. -- @type nerv.CuMatrixDouble ---- Create a CUDA matrix copy of the host matrix (in memory) +--- Create a CUDA matrix copy of the host matrix (in memory). -- @param mat the host matrix function nerv.CuMatrixDouble.new_from_host(mat) local res = nerv.CuMatrixDouble(mat:nrow(), mat:ncol()) @@ -142,25 +142,25 @@ function nerv.CuMatrixDouble.new_from_host(mat) return res end ---- Create a host matrix copy of the CUDA matrix +--- Create a host matrix copy of the CUDA matrix. function nerv.CuMatrixDouble:new_to_host() local res = nerv.MMatrixDouble(self:nrow(), self:ncol()) self:copy_toh(res) return res end ---- The base class for all host (in-memory) matrices +--- The base class for all host (in-memory) matrices. -- @type nerv.MMatrix ---- A wrapper function for `copy_fromh` +--- A wrapper function for `copy_fromh`. nerv.MMatrix.copy_toh = nerv.Matrix.copy_to ---- A wrapper function for `nerv.CuMatrix` copy +--- A wrapper function for `nerv.CuMatrix` copy. function nerv.MMatrix:copy_fromd(b, ...) b:copy_toh(self, ...) end ---- A wrapper function for `nerv.CuMatrix` copy +--- A wrapper function for `nerv.CuMatrix` copy. function nerv.MMatrix:copy_tod(b, ...) b:copy_fromh(self, ...) end diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua index 5a6abb6..bf69ccc 100644 --- a/nerv/nn/network.lua +++ b/nerv/nn/network.lua @@ -1,5 +1,5 @@ --- Implements the concept of computable but opaque networks built ("compiled") --- from nested layers +-- from nested layers. -- @author Qi Liu <[email protected]> -- @author Ted Yin <[email protected]> @@ -190,7 +190,7 @@ end --- Initialize the network for training. -- To be called before all the epochs, will resolve the structure of the --- network and allocate the memory for storing temporary values +-- network and allocate the memory for storing temporary values. -- @param batch_size The size of a batch matrix -- @param chunk_size The size of a BPTT chunk function network:init(batch_size, chunk_size) @@ -211,7 +211,8 @@ function network:init(batch_size, chunk_size) end --- Initialize the internal state of the network for the new epoch. --- To be called before each new epoch +-- To be called before each new epoch. + function network:epoch_init() self.timestamp = 0 for i = 1, #self.layers do @@ -503,7 +504,7 @@ function network:set_err_output(err_output) end --- Initialize the internal state of the network for the new mini-batch (a BPTT chunk). --- To be called before each propagation/back-propagation +-- To be called before each propagation/back-propagation. -- @param info a table containing information needed for the current mini-batch computation. The following fields must be supplied: -- -- * `input`: an array containing `chunk_size` number of row-major batch @@ -526,6 +527,7 @@ end -- * `err_output`: an array with the same structure as `input`. Although we -- are mostly not interested in its values, just allocate this to unify -- the computation and ease the implementation + function network:mini_batch_init(info) self.info = info self:set_input(self.info.input) @@ -634,6 +636,7 @@ function network:mini_batch_init(info) end --- Perform a propagation. + function network:propagate() for i = 1, #self.queue do local t, id = self.queue[i].chunk, self.queue[i].id @@ -652,6 +655,7 @@ function network:propagate() end --- Perform a backward propagation to calculate gradients used for update. + function network:back_propagate() for i = #self.queue, 1, -1 do local t, id = self.queue[i].chunk, self.queue[i].id @@ -677,6 +681,7 @@ function network:back_propagate() end --- Update the parameters bound to each layer. + function network:update() for i = 1, #self.layers do self.layers[i]:update() |