11 files changed, 275 insertions, 53 deletions
diff --git a/TODO.rst b/TODO.rst
index baeb7ba..7ce606d 100644
--- a/TODO.rst
+++ b/TODO.rst
@@ -4,3 +4,4 @@ TODO List
 - NERV user manual
 - NERV overview and introduction
 - C header file dependency detection in Makefiles
+- remove layer ``batch_resize`` API?
diff --git a/nerv/config.ld b/nerv/config.ld
index 4ed5d6a..c84e429 100644
--- a/nerv/config.ld
+++ b/nerv/config.ld
@@ -8,3 +8,4 @@ style = '!pale'
 format = 'markdown'
 kind_names = {topic = 'Manual', script = 'Programs'}
 backtick_references = false
+sort_modules = true
diff --git a/nerv/init.lua b/nerv/init.lua
index ba6a08d..86494e3 100644
--- a/nerv/init.lua
+++ b/nerv/init.lua
@@ -1,13 +1,11 @@
---- NERV: a Lua-based toolkit for high-performance deep learning.
--- This file contains misc utility functions of NERV and finally initializes
--- NERV by including `init.lua` of other basic modules.
+--- Contains misc utility functions of NERV and finally initializes NERV by
+-- including `init.lua` of other basic modules.
 -- @author Ted Yin <ted.sybil@gmail.com>
 -- @module nerv
 
 require 'libnerv'
 
---- Dummy function.
--- Display a friendly error message when user attempts to invoke a
+--- Display a friendly error message when user attempts to invoke a
 -- non-implemented function.
 function nerv.error_method_not_implemented()
     nerv.error("method not implemented");
@@ -185,7 +183,7 @@ function nerv.include(filename)
     return dofile(nerv.dirname(caller) .. filename)
 end
 
---- Parse the command-line options and arguments
+--- Parse the command-line options and arguments.
 -- @param argv the argrument list to parsed
 -- @param options The specification of options, should be a list of tables,
 -- each one for exactly one available option, say `v`, with `v[1]`, `v[2]`,
@@ -195,9 +193,10 @@ end
 -- value and description of the option.
 --
 -- An example of specification:
--- ```{{"aaa", "a", "boolean", default = false, desc = "an option called aaa"},
--- {"bbb", "b", "boolean", default = true, desc = "bbb is set to be true if --bbb=no does not present"},
--- {"ccc", nil, "int", default = 0, desc = "ccc expects an integeral value"}}```
+--
+--    {{"aaa", "a", "boolean", default = false, desc = "an option called aaa"},
+--     {"bbb", "b", "boolean", default = true, desc = "bbb is set to be true if --bbb=no does not present"},
+--     {"ccc", nil, "int", default = 0, desc = "ccc expects an integeral value"}}
 --
 -- @return args, opts The non-option arguments and parsed options. `opts` is
 -- again a list of tables, each of which corresponds to one table in parameter
@@ -311,7 +310,7 @@ function nerv.parse_args(argv, options, unordered)
     return args, opts
 end
 
---- Print usage information of the command-line options
+--- Print usage information of the command-line options.
 -- @param options the list of options used in `parse_args`
 function nerv.print_usage(options)
     local full_maxlen = 0
diff --git a/nerv/io/frm_buffer.lua b/nerv/io/frm_buffer.lua
index 45f73a0..06bea0b 100644
--- a/nerv/io/frm_buffer.lua
+++ b/nerv/io/frm_buffer.lua
@@ -3,7 +3,7 @@
 -- @author Ted Yin <ted.sybil@gmail.com>
 
 --- The class for a frame-level chopped and shuffled buffer
--- which shall be used for acyclic feed forward NNs 
+-- which shall be used for acyclic feed forward NNs.
 -- @type nerv.FrmBuffer
 
 local FrmBuffer = nerv.class("nerv.FrmBuffer", "nerv.DataBuffer")
@@ -150,7 +150,7 @@ function FrmBuffer:saturate()
 end
 
 --- Get a batch group from the buffer.
--- See `nerv.DataBuffer` for reference
+-- See `nerv.DataBuffer` for reference.
 
 function FrmBuffer:get_data()
     local batch_size = self.batch_size
diff --git a/nerv/io/init.lua b/nerv/io/init.lua
index 4ebbabf..f0a3d52 100644
--- a/nerv/io/init.lua
+++ b/nerv/io/init.lua
@@ -11,7 +11,7 @@
 -- * `write(handle)`: define how to write to a file handle
 -- * `get_info()`: return a table of chunk metadata
 -- * `set_info(info)`: set the metadata of the chunk
--- * for more information, please refer to `nerv.MatrixParam` as an example
+-- * for more information, please refer to `nerv.MatrixParam` as an example.
 -- @type nerv.ChunkFile
 
 function nerv.ChunkFile:write_chunkdata(metadata, writer)
@@ -80,7 +80,7 @@ function DataReader:__init(global_conf, reader_conf)
     nerv.error_method_not_implemented()
 end
 
---- Get a data block from the reader
+--- Get a data block from the reader.
 -- @return a table which maps data slot identifiers to data matrices. A data
 -- slot identifier is a unique string naming one slot of data. Each identifier
 -- maps to a matrix containing the data. (`{<slot_id> = <data matrix>, ...}`) It
diff --git a/nerv/io/seq_buffer.lua b/nerv/io/seq_buffer.lua
index 65df617..8cde1b3 100644
--- a/nerv/io/seq_buffer.lua
+++ b/nerv/io/seq_buffer.lua
@@ -116,7 +116,7 @@ function SeqBuffer:saturate(batch)
 end
 
 --- Get a batch group from the buffer.
--- See `nerv.DataBuffer` for reference
+-- See `nerv.DataBuffer` for reference.
 
 function SeqBuffer:get_data()
     local has_data = false
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua
index a1c92b1..2dd2dc0 100644
--- a/nerv/layer/affine.lua
+++ b/nerv/layer/affine.lua
@@ -1,24 +1,38 @@
---- Parameter and layer classes related to linear transform.
+--- Contains parameter and layer classes related to linear (or affine)
+-- transform.
+
+--- The class for linear transform parameter.
+-- @type nerv.LinearTransParam
 
-local MatrixParam = nerv.class('nerv.MatrixParam', 'nerv.Param')
 local LinearTransParam = nerv.class('nerv.LinearTransParam', 'nerv.MatrixParam')
+
+--- The class for bias parameter (currently implemented as a one-row matrix).
+-- @type nerv.BiasParam
+
 local BiasParam = nerv.class('nerv.BiasParam', 'nerv.MatrixParam')
-local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer')
 
---- A parameter that consists of a single matrix
+--- The class for all matrix-based parameters. The class has a single matrix
+-- which can be accessed by `self.trans`.
 -- @type nerv.MatrixParam
 
+local MatrixParam = nerv.class('nerv.MatrixParam', 'nerv.Param')
+
+--- Check the storage location of the contained matrix. This function is
+-- required by `nerv.ParamRepo`.
+-- @param checker the callback function for checking
 function MatrixParam:check(checker)
     -- check trans matrix type
     checker(self.trans)
 end
 
---- Read from a file handle.
+--- Read from a file handle. See `nerv.Param.read`.
 -- @param handle the file handle
 function MatrixParam:read(handle)
     self.trans = self.gconf.mmat_type.load(handle)
 end
 
+--- Write to a file handle. See `nerv.Param.write`.
+-- @param handle the file handle
 function MatrixParam:write(handle)
     self.trans:save(handle)
 end
@@ -69,10 +83,23 @@ function MatrixParam:update_by_err_input()
     self:_update(l2, l2)
 end
 
---- A fully-connected linear transform layer.
+--- The affine layer that does the calculation Wx + b, also known as fully
+-- connected linear transform layer.
 -- @type nerv.AffineLayer
 
+local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer')
+
 --- The constructor.
+-- @param id the identifier
+-- @param global_conf see `self.gconf` of `nerv.Layer.__init`
+-- @param layer_conf a table providing with settings dedicated for the layer,
+-- for `layer_conf` fields that are shared by all layers, see
+-- `nerv.Layer.__init`. The affine layer requires parameters to be bound, the
+-- following parameter names will be looked up while binding:
+--
+-- * `ltp`: the linear transformation parameter, also known as the weight matrix, W in Wx + b
+-- * `bp`: the bias parameter, also known as the bias matrix, b in Wx + b
+
 function AffineLayer:__init(id, global_conf, layer_conf)
     nerv.Layer.__init(self, id, global_conf, layer_conf)
     self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs
diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua
index 5790f95..5b5d4c7 100644
--- a/nerv/layer/graph.lua
+++ b/nerv/layer/graph.lua
@@ -1,5 +1,68 @@
+--- Implements a special kind of layers having an internal structure, a
+-- directed graph of connected sub-level layers.
+
+--- The class describing the concept of a graph layer having an internal
+-- structure, a directed graph of connected sub-level layers. Some of these
+-- sub-level layers can again be graph layers, thus, it enables nested and
+-- recursive layer declaration. The graph layer can be regarded as a container of
+-- its sub-level layers. A layer other than a graph layer is also referenced as
+-- "*primitive layer*".
+-- @type nerv.GraphLayer
+
 local GraphLayer = nerv.class('nerv.GraphLayer', 'nerv.Layer')
 
+--- The constructor.
+-- @param id the identifier
+-- @param global_conf see `self.gconf` of `nerv.Layer.__init`
+-- @param layer_conf a table providing with settings dedicated for the layer,
+-- the following fields should be specified:
+--
+-- * `lrepo`: the layer repo that should be used to find the sub-level layers
+-- * `connections`: an array of 3-tuples describing the connections of
+--   sub-level layers, the structure is as follow:
+--
+--        {
+--            {<from_port1>, <to_port1>, <time_shift1>}, -- tuple 1
+--            {<from_port2>, <to_port2>, <time_shift2>}, -- tuple 2
+--            {<from_port3>, <to_port3>, <time_shift3>}, -- tuple 3
+--            ...
+--        }
+--   Each tuple stands for a directed edge between two ports. The first two
+--   elements in the tuple are called *port specification* which is a string
+--   with the following format:
+--
+--        <layer_id>[<port_idx>]
+--   where the `<layer_id>` is a string that identifies the layer in
+--   `lconf.lrepo`, and `<port_id>` is the input or output port index when used
+--   in the first or second port specification respectively.
+--
+--   The third element in the tuple is an integer specifying the time delay of
+--   this connection. In most cases, it will be simply zero. But for an
+--   recurrent network, a positive value `i` means the output from `<from_port>`
+--   will be used as the input to `<to_port>` in `i`th computation of the future.
+--   Negative values are also allowed to propagate the output to the past.
+--
+--   Note that there are two possible strings of `<layer_id>` that have special
+--   meanings: the string `"<input>"` and `"<output>"` are placeholders of the
+--   the input and output ports of the outer graph layer. The input for the graph
+--   layer as a whole can be used  by establishing connections from
+--   `"<input>[i]"`, and vice versa for the output.
+--
+--   As an example, tuples:
+--
+--        {
+--            {"<input>[1]", "affine0[1]", 0},
+--            {"affine0[1]", "sigmoid0[1]", 0},
+--            {"sigmoid0[1]", "affine1[1]", 0},
+--            {"affine1[1]", "<output>[1]", 0}
+--        }
+--   Specify a graph layer that contains two stacked and fully connected linear
+--   transformation sub-level layers.
+--
+-- * `reversed`: optional, reverse the time shifting of all connections if true
+--
+-- For other `layer_conf` fields that are shared by all layers, see `nerv.Layer.__init`.
+
 function GraphLayer:__init(id, global_conf, layer_conf)
     nerv.Layer.__init(self, id, global_conf, layer_conf)
     self.lrepo = layer_conf.layer_repo
diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua
index c893df3..7521b7a 100644
--- a/nerv/layer/init.lua
+++ b/nerv/layer/init.lua
@@ -1,38 +1,96 @@
--- The following methods must be implemented to let a layer work properly
+--- Implements the concept of groups of parameters (`nerv.Param`) and
+-- computation nodes (`nerv.Layer`).
+
+--- The class describing a group of parameters (an internal state) that can be
+-- bound to layers. This class also implements the *chunk* interface (see
+-- `nerv.ChunkFile`) , which means instances of `nerv.Param` can be exported to
+-- chunk files as chunks.
+-- @type nerv.Param
 
 local Param = nerv.class('nerv.Param')
 
+--- The constructor.
+-- @param id the identifier for the group of parameters
+-- @param global_conf a table describing the computation state and providing
+-- with some global settings
+
 function Param:__init(id, global_conf)
     self.id = id
     self.gconf = global_conf
 end
 
+--- Retrieve the metadata of the parameter group. This function implements the
+-- *chunk* interface.
+-- @return a table containing all metadata
+
 function Param:get_info()
     return self.info
 end
 
+--- Set the metadata of the parameter group. This function implements the
+-- *chunk* interface.
+-- @param info a table containing all metadata
+
 function Param:set_info(info)
     self.info = info
 end
 
+--- Read from the given file handle. This function implements the
+-- *chunk* interface.
+-- @param handle the file handle
+
 function Param:read(handle)
     nerv.error_method_not_implemented()
 end
 
+--- Write to the given file handle. This function implements the
+-- *chunk* interface.
+-- @param handle the file handle
+
 function Param:write(handle)
     nerv.error_method_not_implemented()
 end
 
-function Param:update(gradient)
-    nerv.error_method_not_implemented()
-end
+--- Generate zero.
+-- @return zero
 
-function Param:gen_zero()
+function Param.gen_zero()
     return 0
 end
 
+--- The class describing a single computation node which calculates from the
+-- input ports to the output ports which could be the input of others.
+-- @type nerv.Layer
+
 local Layer = nerv.class('nerv.Layer')
 
+--- The constructor. All inheriting classes should call this base constructor to
+-- initialize some predefined fields (of `self`):
+--
+-- * `id`: the identifier of the layer
+-- * `gconf`: a table describing the computation state and providing
+--   with some global settings
+-- * `lconf`: a table providing with settings dedicated for the layer. There
+--   are some fields considered to be "standard" and shared by all
+--   layers:
+--      * `dim_in`: an array of each input port dimension (width) with order
+--      * `dim_out`: an array of each output port dimension (width) with order
+--      * `params`: optional, a table containing pairs of the manually bound
+--        parameter name used by the layer and parameter id used to find the
+--        parameter in the parameter repo
+--      * `pr`: optional, the parameter repo (see `nerv.ParamRepo`)  to find
+--        parameters while binding, used by `nerv.Layer.find_param`
+-- * `mat_type`: the type of matrix should be used when storing intermediate
+--   results
+-- * `loc_type`: a value from `nerv.ParamRepo.LOC_TYPES` indicating whether the
+--    storage of `nerv.Param` instances is on host or device RAM
+-- * `dim_in`: an array of each input port dimension (width) with order
+-- * `dim_out`: an array of each output port dimension (width) with order
+--
+-- @param id the identifier
+-- @param global_conf see `self.gconf`
+-- @param layer_conf see `self.lconf`
+
 function Layer:__init(id, global_conf, layer_conf)
     self.id = id
     self.gconf = global_conf
@@ -48,22 +106,57 @@ function Layer:__init(id, global_conf, layer_conf)
     self.dim_out = layer_conf.dim_out
 end
 
+--- Initialize the layer, called for each epoch.
+
 function Layer:init(batch_size)
     nerv.error_method_not_implemented()
 end
 
-function Layer:update(bp_err, input, output)
+--- Update (change the state of) the bound (tied) parameter according to the
+-- calculation.
+-- @param bp_err an array of row-major matrices storing the error
+-- back-propagated from the output ports
+-- @param input an array of row-major matrices storing the input before the
+-- forward propagation
+-- @param ouput an array of row-major matrices storing the output after the
+-- forward propagation
+-- @param t BPTT time `t`
+
+function Layer:update(bp_err, input, output, t)
     nerv.error_method_not_implemented()
 end
 
-function Layer:propagate(input, output)
+--- Calculate the values in output ports according to the input.
+-- @param input an array of row-major matrices storing the input before the
+-- forward propagation
+-- @param ouput an array of row-major matrices storing the output after the
+-- forward propagation
+-- @param t BPTT time `t`
+
+function Layer:propagate(input, output, t)
     nerv.error_method_not_implemented()
 end
 
-function Layer:back_propagate(bp_err, next_bp_err, input, output)
+--- Calculate the next error value (`next_bp_err`) by back-propagation.
+-- @param bp_err an array of row-major matrices storing the error
+-- back-propagated from the output ports
+-- @param next_bp_err an array of row-major matrices storing the next error
+-- back-propagated to the input ports
+-- @param input an array of row-major matrices storing the input before the
+-- forward propagation
+-- @param ouput an array of row-major matrices storing the output after the
+-- forward propagation
+-- @param t BPTT time `t`
+
+function Layer:back_propagate(bp_err, next_bp_err, input, output, t)
     nerv.error_method_not_implemented()
 end
 
+--- Check the length of the dimention array. This function is recommended for
+-- invocation when checking the ports in your layer implementation.
+-- @param len_in the expected number of input ports (-1 if variable)
+-- @param len_out the expected number of output ports (-1 if variable)
+
 function Layer:check_dim_len(len_in, len_out)
     local expected_in = #self.dim_in
     local expected_out = #self.dim_out
@@ -77,27 +170,60 @@ function Layer:check_dim_len(len_in, len_out)
     end
 end
 
+--- Get all the parameters used by (bound to) the layer.
+-- @return a `nerv.ParamRepo` instance storing all the involved parameters
+
 function Layer:get_params()
     nerv.error_method_not_implemented()
 end
 
+--- Lookup and bind the parameters in the repo specified in layer settings.
+-- This function will lead to the invocation of `find_param` if the layer is
+-- implemented appropriately.
+
 function Layer:bind_params()
     nerv.error_method_not_implemented()
 end
 
+--- Get two arrays describing the dimension of input and ouput.
+-- @return two arrays: `<dim_in>, <dim_out>`
+
 function Layer:get_dim()
     return self.dim_in, self.dim_out
 end
 
+--- Set an attribute to the layer.
+-- @param name the name of the attribute
+-- @param value the value of the attribute
+
 function Layer:set_attr(name, value)
     self[name] = value
 end
 
+--- Get the contained (nested) layer inside the layer (useful for complex
+-- layers like `nerv.GraphLayer`).
+-- @param id the identifier of the nested layer to be find
+
 function Layer:get_sublayer(id)
     nerv.error('primitive layer does not have sublayers')
 end
 
-function Layer:find_param(plist, lconf, gconf, p_type, p_dim, p_gen)
+--- Find the parameter according to the layer settings. This function should be
+-- used as the only way to locate a parameter in the implementation of
+-- `bind_params` (see `nerv.AffineLayer.bind_params`).
+-- @param plist the potential names for the parameter in the order of lookup priority
+-- @param lconf a table providing with settings dedicated for the layer
+-- @param gconf a table describing the computation state and providing with
+-- some global settings
+-- @param ptype the type for constructing the parameter instance when
+-- auto-generation is triggered
+-- @param pdim an array specifying each dimension of the parameter when
+-- auto-generation is triggered
+-- @param pgen the function used to generate values when auto-generation is
+-- triggered, `gconf.param_gen` will be used if `nil`
+
+function Layer:find_param(plist, lconf, gconf, ptype, pdim, pgen)
+
     if type(plist) == "string" then
         plist = {plist}
     end
@@ -123,14 +249,14 @@ function Layer:find_param(plist, lconf, gconf, p_type, p_dim, p_gen)
     pid = self.id .. '_' .. plist[1]
     nerv.info("param id for [%s] of layer [%s] is not found in the specified param repo, " ..
                 "switch to auto-generate", plist_str, self.id)
-    local p = p_type(pid, gconf)
-    p.trans = self.mat_type(unpack(p_dim))
-    p_gen = p_gen or gconf.param_gen
-                    or gconf.param_random -- obsolete name
-    if type(p_gen) ~= "function" then
+    local p = ptype(pid, gconf)
+    p.trans = self.mat_type(unpack(pdim))
+    pgen = pgen or gconf.param_gen
+                or gconf.param_random -- obsolete name
+    if type(pgen) ~= "function" then
         nerv.error("a param generate function is needed")
     end
-    p.trans:generate(p_gen)
+    p.trans:generate(pgen)
     return p
 end
 
diff --git a/nerv/matrix/init.lua b/nerv/matrix/init.lua
index 5d0544a..b84d735 100644
--- a/nerv/matrix/init.lua
+++ b/nerv/matrix/init.lua
@@ -102,21 +102,21 @@ function nerv.Matrix:__mul__(b)
     return c
 end
 
---- A wrapper function for `copy_from`
+--- A wrapper function for `copy_from`.
 function nerv.Matrix:copy_to(b, ...)
     b:copy_from(self, ...)
 end
 
---- The base class for all device (in-GPU) matrices
+--- The base class for all device (in-GPU) matrices.
 -- @type nerv.CuMatrix
 
---- A wrapper function for `copy_fromd`
+--- A wrapper function for `copy_fromd`.
 nerv.CuMatrix.copy_tod = nerv.Matrix.copy_to
 
---- CUDA float matrices
+--- CUDA float matrices.
 -- @type nerv.CuMatrixFloat
 
---- Create a CUDA matrix copy of the host matrix (in memory)
+--- Create a CUDA matrix copy of the host matrix (in memory).
 -- @param mat the host matrix
 function nerv.CuMatrixFloat.new_from_host(mat)
     local res = nerv.CuMatrixFloat(mat:nrow(), mat:ncol())
@@ -124,17 +124,17 @@ function nerv.CuMatrixFloat.new_from_host(mat)
     return res
 end
 
---- Create a host matrix copy of the CUDA matrix
+--- Create a host matrix copy of the CUDA matrix.
 function nerv.CuMatrixFloat:new_to_host()
     local res = nerv.MMatrixFloat(self:nrow(), self:ncol())
     self:copy_toh(res)
     return res
 end
 
---- CUDA double matrices
+--- CUDA double matrices.
 -- @type nerv.CuMatrixDouble
 
---- Create a CUDA matrix copy of the host matrix (in memory)
+--- Create a CUDA matrix copy of the host matrix (in memory).
 -- @param mat the host matrix
 function nerv.CuMatrixDouble.new_from_host(mat)
     local res = nerv.CuMatrixDouble(mat:nrow(), mat:ncol())
@@ -142,25 +142,25 @@ function nerv.CuMatrixDouble.new_from_host(mat)
     return res
 end
 
---- Create a host matrix copy of the CUDA matrix
+--- Create a host matrix copy of the CUDA matrix.
 function nerv.CuMatrixDouble:new_to_host()
     local res = nerv.MMatrixDouble(self:nrow(), self:ncol())
     self:copy_toh(res)
     return res
 end
 
---- The base class for all host (in-memory) matrices
+--- The base class for all host (in-memory) matrices.
 -- @type nerv.MMatrix
 
---- A wrapper function for `copy_fromh`
+--- A wrapper function for `copy_fromh`.
 nerv.MMatrix.copy_toh = nerv.Matrix.copy_to
 
---- A wrapper function for `nerv.CuMatrix` copy
+--- A wrapper function for `nerv.CuMatrix` copy.
 function nerv.MMatrix:copy_fromd(b, ...)
     b:copy_toh(self, ...)
 end
 
---- A wrapper function for `nerv.CuMatrix` copy
+--- A wrapper function for `nerv.CuMatrix` copy.
 function nerv.MMatrix:copy_tod(b, ...)
     b:copy_fromh(self, ...)
 end
diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua
index 5a6abb6..bf69ccc 100644
--- a/nerv/nn/network.lua
+++ b/nerv/nn/network.lua
@@ -1,5 +1,5 @@
 --- Implements the concept of computable but opaque networks built ("compiled")
--- from nested layers
+-- from nested layers.
 -- @author Qi Liu <liuq901@163.com>
 -- @author Ted Yin <ted.sybil@gmail.com>
 
@@ -190,7 +190,7 @@ end
 
 --- Initialize the network for training.
 -- To be called before all the epochs, will resolve the structure of the
--- network and allocate the memory for storing temporary values
+-- network and allocate the memory for storing temporary values.
 -- @param batch_size The size of a batch matrix
 -- @param chunk_size The size of a BPTT chunk
 function network:init(batch_size, chunk_size)
@@ -211,7 +211,8 @@ function network:init(batch_size, chunk_size)
 end
 
 --- Initialize the internal state of the network for the new epoch.
--- To be called before each new epoch
+-- To be called before each new epoch.
+
 function network:epoch_init()
     self.timestamp = 0
     for i = 1, #self.layers do
@@ -503,7 +504,7 @@ function network:set_err_output(err_output)
 end
 
 --- Initialize the internal state of the network for the new mini-batch (a BPTT chunk).
--- To be called before each propagation/back-propagation
+-- To be called before each propagation/back-propagation.
 -- @param info a table containing information needed for the current mini-batch computation. The following fields must be supplied:
 --
 -- * `input`: an array containing `chunk_size` number of row-major batch
@@ -526,6 +527,7 @@ end
 --  * `err_output`: an array with the same structure as `input`. Although we
 --    are mostly not interested in its values, just allocate this to unify
 --    the computation and ease the implementation
+
 function network:mini_batch_init(info)
     self.info = info
     self:set_input(self.info.input)
@@ -634,6 +636,7 @@ function network:mini_batch_init(info)
 end
 
 --- Perform a propagation.
+
 function network:propagate()
     for i = 1, #self.queue do
         local t, id = self.queue[i].chunk, self.queue[i].id
@@ -652,6 +655,7 @@ function network:propagate()
 end
 
 --- Perform a backward propagation to calculate gradients used for update.
+
 function network:back_propagate()
     for i = #self.queue, 1, -1 do
         local t, id = self.queue[i].chunk, self.queue[i].id
@@ -677,6 +681,7 @@ function network:back_propagate()
 end
 
 --- Update the parameters bound to each layer.
+
 function network:update()
     for i = 1, #self.layers do
         self.layers[i]:update()