add doc for io

author: Determinant <ted.sybil@gmail.com> 2016-04-20 10:35:11 +0800
committer: Determinant <ted.sybil@gmail.com> 2016-04-20 10:35:11 +0800
commit: 8fecbb8e488569cd8e2f930075120e5f1b1b54fb (patch)
tree: f0e31068b4557eee4396d75c7e089a1f15bb837e
parent: e3d67040f8591a77341a2c12e55461522abf3756 (diff)
4 files changed, 139 insertions, 1 deletions
diff --git a/nerv/io/frm_buffer.lua b/nerv/io/frm_buffer.lua
index 9761f16..45f73a0 100644
--- a/nerv/io/frm_buffer.lua
+++ b/nerv/io/frm_buffer.lua
@@ -1,5 +1,38 @@
+--- Implements a frame-level chopped and shuffled buffer which shall be used
+-- for acyclic feed forward NNs (`chunk_size = 1`).
+-- @author Ted Yin <ted.sybil@gmail.com>
+
+--- The class for a frame-level chopped and shuffled buffer
+-- which shall be used for acyclic feed forward NNs 
+-- @type nerv.FrmBuffer
+
 local FrmBuffer = nerv.class("nerv.FrmBuffer", "nerv.DataBuffer")
 
+--- The constructor.
+-- @param global_conf a table describing the computation state and providing
+-- with some global settings
+--
+-- The following fields in `global_conf` will be used:
+--
+-- * `use_cpu`: whether to provide with the chunks/"mini-batches" stored in the
+--   main memory on invocation of `get_data()`
+-- * `mmat_type`: the class used for creating matrices in CPU computation
+-- * `cumat_type` (if `use_cpu = false`): the class used for creating matrices
+-- in GPU computation
+--
+-- @param buffer_conf a table providing with settings dedicated for the buffer.
+-- Available fields includes:
+--
+-- * `readers`: an array of `nerv.DataReader` instances specifying the
+--   readers used to read data
+-- * `batch_size`: the number of rows for each batch matrix
+-- * `buffer_size`: the number of frames to be buffered and shuffled at once
+-- * `randomize`: shuffle the buffer after filled if true
+-- * `consume`: drop the last frames which cannot make up a full `batch_size`
+--   matrix if false
+-- * `use_gpu`: the buffer space will be allocated on the device (graphics
+--   card) if true
+
 function FrmBuffer:__init(global_conf, buffer_conf)
     self.gconf = global_conf
     self.batch_size = buffer_conf.batch_size
@@ -116,6 +149,9 @@ function FrmBuffer:saturate()
     return self.tail >= self.batch_size
 end
 
+--- Get a batch group from the buffer.
+-- See `nerv.DataBuffer` for reference
+
 function FrmBuffer:get_data()
     local batch_size = self.batch_size
     if self.head >= self.tail then -- buffer is empty
diff --git a/nerv/io/init.lua b/nerv/io/init.lua
index d3ba27c..4ebbabf 100644
--- a/nerv/io/init.lua
+++ b/nerv/io/init.lua
@@ -1,3 +1,19 @@
+--- Implements parts of ChunkFile operations (methods) in Lua and define
+-- the interface of DataReader and DataBuffer.
+-- @author Ted Yin <ted.sybil@gmail.com>
+
+--- The class for on-disk chunk storage.
+-- A *chunk* can be a group of parameters such as an instance of
+-- `nerv.Param` (or `nerv.MatrixParam`, etc.), but can also be something else as long as it implements the following interface:
+--
+-- * `.id` field: the unique identifier of the chunk
+-- * `read(handle)`: define how to read from a file handle
+-- * `write(handle)`: define how to write to a file handle
+-- * `get_info()`: return a table of chunk metadata
+-- * `set_info(info)`: set the metadata of the chunk
+-- * for more information, please refer to `nerv.MatrixParam` as an example
+-- @type nerv.ChunkFile
+
 function nerv.ChunkFile:write_chunkdata(metadata, writer)
     if type(metadata) ~= "table" then
         nerv.error("metadata should be a Lua table")
@@ -6,6 +22,9 @@ function nerv.ChunkFile:write_chunkdata(metadata, writer)
     return self._write_chunkdata(self.handle, table.tostring(metadata), writer)
 end
 
+--- Write a chunk to the file.
+-- @param chunk the chunk to be serialize. A *chunk* can be any Lua object
+-- which implements the required interface.
 function nerv.ChunkFile:write_chunk(chunk)
     local id = chunk.id
     local type = chunk.__typename
@@ -17,6 +36,11 @@ function nerv.ChunkFile:write_chunk(chunk)
                             info = chunk:get_info()}, chunk)
 end
 
+--- Read a chunk from the file.
+-- @param id specifying the chunk to be read from a chunk file which may stores multiple chunks
+-- @param global_conf a table describing the computation state and providing
+-- with some global settings, which will be passed to the constructor of the chunk.
+
 function nerv.ChunkFile:read_chunk(id, global_conf)
     if self.metadata == nil then
         nerv.error("wrong file opening mode")
@@ -32,26 +56,69 @@ function nerv.ChunkFile:read_chunk(id, global_conf)
     return chunk
 end
 
+--- Close a chunk file gracefully.
 function nerv.ChunkFile:close()
     self._close(self.handle)
 end
 
+--- The abstract class which defines the interface of data readers.
+-- Data readers, as the name suggests, take a duty of tackling with
+-- task-specific low-level I/O, reading from on-disk data files and producing a
+-- formalized data block once invoked by the data buffer (see
+-- `nerv.DataBuffer`). The main interface is defined by `get_data` method.
+-- @type nerv.DataReader
+
 local DataReader = nerv.class("nerv.DataReader")
 
+--- The constructor.
+-- @param global_conf a table describing the computation state and providing
+-- with some global settings
+-- @param reader_conf a table providing with settings dedicated for the
+-- reader
+
 function DataReader:__init(global_conf, reader_conf)
     nerv.error_method_not_implemented()
 end
 
+--- Get a data block from the reader
+-- @return a table which maps data slot identifiers to data matrices. A data
+-- slot identifier is a unique string naming one slot of data. Each identifier
+-- maps to a matrix containing the data. (`{<slot_id> = <data matrix>, ...}`) It
+-- is a requirement that the number of rows in all the matrices in the returned
+-- table stays the same.
 function DataReader:get_data()
     nerv.error_method_not_implemented()
 end
 
+--- The abstract class which defines the interface of data buffers.
+-- Buffer readers can be regarded as a data reorganizer which accepts variable
+-- length data blocks and chops the stacked blocks into equi-length batch
+-- groups used by network computation. The main interface is defined by
+-- `get_data` method.
+-- @type nerv.DataBuffer
 local DataBuffer = nerv.class("nerv.DataBuffer")
 
+--- The constructor.
+-- @param global_conf a table describing the computation
+-- state and providing with some global settings
+-- @param buffer_conf a table providing with settings
+-- dedicated for the buffer
 function DataBuffer:__init(global_conf, buffer_conf)
     nerv.error_method_not_implemented()
 end
 
+--- Get a batch group from the buffer.
+-- @return a table containing the following fields:
+--
+-- * `data`: a table which maps slot identifiers to chunks/"mini-batches". Each
+--   chunk is an array of batch matrices (`{<slot_id> = {<batch_1>, <batch_2>, ..., <batch_chunk_size>}, ...}`, see `nerv.Network`).
+-- * `seq_length` : a table containing the length (number of frames) of each
+--   sequence (utterance). (`{<fnum_1>, <fnum_2>, ..., <fnum_batch_size>}`)
+-- * `new_seq`: a table containing the indices of batch matrix rows that are the
+--    first frames of a sequence. (`{}` when there is no new appearing
+--    sequence/utterance, at most `batch_size` elements when all the
+--    sequences/utterances is on its first frame)
+
 function DataBuffer:get_data()
     nerv.error_method_not_implemented()
 end
diff --git a/nerv/io/seq_buffer.lua b/nerv/io/seq_buffer.lua
index 029e7b8..65df617 100644
--- a/nerv/io/seq_buffer.lua
+++ b/nerv/io/seq_buffer.lua
@@ -1,5 +1,36 @@
+--- Implements a sequence-level chopped and shuffled buffer which
+-- shall be used for cyclic NNs.
+-- @author Qi Liu <liuq901@163.com>
+
+--- The class for a sequence-level chopped and shuffled buffer which
+-- shall be used for cyclic NNs.
+-- @type nerv.SeqBuffer
+
 local SeqBuffer = nerv.class('nerv.SeqBuffer', 'nerv.DataBuffer')
 
+--- The constructor.
+-- @param global_conf a table describing the computation state and providing
+-- with some global settings
+--
+-- The following fields in `global_conf` will be used:
+--
+-- * `use_cpu`: whether to provide with the chunks/"mini-batches" stored in the
+--   main memory on invocation of `get_data()`
+-- * `mmat_type`: the class used for creating matrices in CPU computation
+-- * `cumat_type` (if `use_cpu = false`): the class used for creating matrices
+-- in GPU computation
+--
+-- @param buffer_conf a table providing with settings dedicated for the buffer.
+-- Available fields includes:
+--
+-- * `readers`: an array of `nerv.DataReader` instances specifying the
+--   readers used to read data
+-- * `batch_size`: the number of rows for each batch matrix
+-- * `chunk_size`: the length of the BPTT context (number of batch
+--   matrices to provide upon each invocation of `get_data()`)
+-- * `nn_act_default`: the default value to fill into the "holes" (non-data
+--   frames)
+
 function SeqBuffer:__init(global_conf, buffer_conf)
     self.gconf = global_conf
 
@@ -84,6 +115,9 @@ function SeqBuffer:saturate(batch)
     return true
 end
 
+--- Get a batch group from the buffer.
+-- See `nerv.DataBuffer` for reference
+
 function SeqBuffer:get_data()
     local has_data = false
     for i = 1, self.batch_size do
diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua
index 19fa9d3..5a6abb6 100644
--- a/nerv/nn/network.lua
+++ b/nerv/nn/network.lua
@@ -511,7 +511,8 @@ end
 -- * `output`: similar to `input`, but the matrices have different number of
 --    columns (depending on the width of the output, which is typically 1 for
 --    criteria, i.e. single column indicating the error), used to hold the output of the network
--- * `seq_length` : a table containing the length (number of frames) of each sequence (utterance)
+-- * `seq_length` : a table containing the length (number of frames) of each
+--   sequence (utterance)
 -- * `new_seq`: a table containing the indices of batch matrix rows that are the
 --    first frames of a sequence
 -- * `do_train`: a bool value indicating whether to update the network
author	Determinant <ted.sybil@gmail.com>	2016-04-20 10:35:11 +0800
committer	Determinant <ted.sybil@gmail.com>	2016-04-20 10:35:11 +0800
commit	8fecbb8e488569cd8e2f930075120e5f1b1b54fb (patch)
tree	f0e31068b4557eee4396d75c7e089a1f15bb837e
parent	e3d67040f8591a77341a2c12e55461522abf3756 (diff)