aboutsummaryrefslogtreecommitdiff
path: root/nerv/io/init.lua
blob: 4ebbabf50339a5e242240a45712af3d7f801aebc (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
--- Implements parts of ChunkFile operations (methods) in Lua and define
-- the interface of DataReader and DataBuffer.
-- @author Ted Yin <ted.sybil@gmail.com>

--- The class for on-disk chunk storage.
-- A *chunk* can be a group of parameters such as an instance of
-- `nerv.Param` (or `nerv.MatrixParam`, etc.), but can also be something else as long as it implements the following interface:
--
-- * `.id` field: the unique identifier of the chunk
-- * `read(handle)`: define how to read from a file handle
-- * `write(handle)`: define how to write to a file handle
-- * `get_info()`: return a table of chunk metadata
-- * `set_info(info)`: set the metadata of the chunk
-- * for more information, please refer to `nerv.MatrixParam` as an example
-- @type nerv.ChunkFile

function nerv.ChunkFile:write_chunkdata(metadata, writer)
    if type(metadata) ~= "table" then
        nerv.error("metadata should be a Lua table")
        return
    end
    return self._write_chunkdata(self.handle, table.tostring(metadata), writer)
end

--- Write a chunk to the file.
-- @param chunk the chunk to be serialize. A *chunk* can be any Lua object
-- which implements the required interface.
function nerv.ChunkFile:write_chunk(chunk)
    local id = chunk.id
    local type = chunk.__typename
    if id == nil then
        nerv.error("id of chunk %s must be specified", type)
    end
    self:write_chunkdata({id = id,
                            type = type,
                            info = chunk:get_info()}, chunk)
end

--- Read a chunk from the file.
-- @param id specifying the chunk to be read from a chunk file which may stores multiple chunks
-- @param global_conf a table describing the computation state and providing
-- with some global settings, which will be passed to the constructor of the chunk.

function nerv.ChunkFile:read_chunk(id, global_conf)
    if self.metadata == nil then
        nerv.error("wrong file opening mode")
    end
    local metadata = self.metadata[id]
    if metadata == nil then
        nerv.error("chunk with id %s does not exist", id)
    end
    local chunk_type = nerv.get_type(metadata.type)
    local chunk = chunk_type(id, global_conf)
    chunk:set_info(metadata.info)
    chunk:read(self._get_chunkdata(self.handle, metadata._chunk_info))
    return chunk
end

--- Close a chunk file gracefully.
function nerv.ChunkFile:close()
    self._close(self.handle)
end

--- The abstract class which defines the interface of data readers.
-- Data readers, as the name suggests, take a duty of tackling with
-- task-specific low-level I/O, reading from on-disk data files and producing a
-- formalized data block once invoked by the data buffer (see
-- `nerv.DataBuffer`). The main interface is defined by `get_data` method.
-- @type nerv.DataReader

local DataReader = nerv.class("nerv.DataReader")

--- The constructor.
-- @param global_conf a table describing the computation state and providing
-- with some global settings
-- @param reader_conf a table providing with settings dedicated for the
-- reader

function DataReader:__init(global_conf, reader_conf)
    nerv.error_method_not_implemented()
end

--- Get a data block from the reader
-- @return a table which maps data slot identifiers to data matrices. A data
-- slot identifier is a unique string naming one slot of data. Each identifier
-- maps to a matrix containing the data. (`{<slot_id> = <data matrix>, ...}`) It
-- is a requirement that the number of rows in all the matrices in the returned
-- table stays the same.
function DataReader:get_data()
    nerv.error_method_not_implemented()
end

--- The abstract class which defines the interface of data buffers.
-- Buffer readers can be regarded as a data reorganizer which accepts variable
-- length data blocks and chops the stacked blocks into equi-length batch
-- groups used by network computation. The main interface is defined by
-- `get_data` method.
-- @type nerv.DataBuffer
local DataBuffer = nerv.class("nerv.DataBuffer")

--- The constructor.
-- @param global_conf a table describing the computation
-- state and providing with some global settings
-- @param buffer_conf a table providing with settings
-- dedicated for the buffer
function DataBuffer:__init(global_conf, buffer_conf)
    nerv.error_method_not_implemented()
end

--- Get a batch group from the buffer.
-- @return a table containing the following fields:
--
-- * `data`: a table which maps slot identifiers to chunks/"mini-batches". Each
--   chunk is an array of batch matrices (`{<slot_id> = {<batch_1>, <batch_2>, ..., <batch_chunk_size>}, ...}`, see `nerv.Network`).
-- * `seq_length` : a table containing the length (number of frames) of each
--   sequence (utterance). (`{<fnum_1>, <fnum_2>, ..., <fnum_batch_size>}`)
-- * `new_seq`: a table containing the indices of batch matrix rows that are the
--    first frames of a sequence. (`{}` when there is no new appearing
--    sequence/utterance, at most `batch_size` elements when all the
--    sequences/utterances is on its first frame)

function DataBuffer:get_data()
    nerv.error_method_not_implemented()
end

nerv.include('frm_buffer.lua')
nerv.include('seq_buffer.lua')