nerv/layer/init.lua


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290

--- Implements the concept of groups of parameters (`nerv.Param`) and
-- computation nodes (`nerv.Layer`).

--- The class describing a group of parameters (an internal state) that can be
-- bound to layers. This class also implements the *chunk* interface (see
-- `nerv.ChunkFile`) , which means instances of `nerv.Param` can be exported to
-- chunk files as chunks.
-- @type nerv.Param

local Param = nerv.class('nerv.Param')

--- The constructor.
-- @param id the identifier for the group of parameters
-- @param global_conf a table describing the computation state and providing
-- with some global settings

function Param:__init(id, global_conf)
    self.id = id
    self.gconf = global_conf
end

--- Retrieve the metadata of the parameter group. This function implements the
-- *chunk* interface.
-- @return a table containing all metadata

function Param:get_info()
    return self.info
end

--- Set the metadata of the parameter group. This function implements the
-- *chunk* interface.
-- @param info a table containing all metadata

function Param:set_info(info)
    self.info = info
end

--- Read from the given file handle. This function implements the
-- *chunk* interface.
-- @param handle the file handle

function Param:read(handle)
    nerv.error_method_not_implemented()
end

--- Write to the given file handle. This function implements the
-- *chunk* interface.
-- @param handle the file handle

function Param:write(handle)
    nerv.error_method_not_implemented()
end

--- Generate zero.
-- @return zero

function Param.gen_zero()
    return 0
end

--- The class describing a single computation node which calculates from the
-- input ports to the output ports which could be the input of others.
-- @type nerv.Layer

local Layer = nerv.class('nerv.Layer')

--- The constructor. All inheriting classes should call this base constructor to
-- initialize some predefined fields (of `self`):
--
-- * `id`: the identifier of the layer
-- * `gconf`: a table describing the computation state and providing
--   with some global settings
-- * `lconf`: a table providing with settings dedicated for the layer. There
--   are some fields considered to be "standard" and shared by all
--   layers:
--      * `dim_in`: an array of each input port dimension (width) with order
--      * `dim_out`: an array of each output port dimension (width) with order
--      * `params`: optional, a table containing pairs of the manually bound
--        parameter name used by the layer and parameter id used to find the
--        parameter in the parameter repo
--      * `pr`: optional, the parameter repo (see `nerv.ParamRepo`)  to find
--        parameters while binding, used by `nerv.Layer.find_param`
-- * `mat_type`: the type of matrix should be used when storing intermediate
--   results
-- * `loc_type`: a value from `nerv.ParamRepo.LOC_TYPES` indicating whether the
--    storage of `nerv.Param` instances is on host or device RAM
-- * `dim_in`: an array of each input port dimension (width) with order
-- * `dim_out`: an array of each output port dimension (width) with order
--
-- @param id the identifier
-- @param global_conf see `self.gconf`
-- @param layer_conf see `self.lconf`

function Layer:__init(id, global_conf, layer_conf)
    self.id = id
    self.gconf = global_conf
    self.lconf = layer_conf
    if self.gconf.use_cpu then
        self.mat_type = self.gconf.mmat_type
        self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
    else
        self.mat_type = self.gconf.cumat_type
        self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE
    end
    self.dim_in = layer_conf.dim_in
    self.dim_out = layer_conf.dim_out
end

--- Initialize the layer, called for each epoch.

function Layer:init(batch_size)
    nerv.error_method_not_implemented()
end

--- Update (change the state of) the bound (tied) parameter according to the
-- calculation.
-- @param bp_err an array of row-major matrices storing the error
-- back-propagated from the output ports
-- @param input an array of row-major matrices storing the input before the
-- forward propagation
-- @param ouput an array of row-major matrices storing the output after the
-- forward propagation
-- @param t BPTT time `t`

function Layer:update(bp_err, input, output, t)
    nerv.error_method_not_implemented()
end

--- Calculate the values in output ports according to the input.
-- @param input an array of row-major matrices storing the input before the
-- forward propagation
-- @param ouput an array of row-major matrices storing the output after the
-- forward propagation
-- @param t BPTT time `t`

function Layer:propagate(input, output, t)
    nerv.error_method_not_implemented()
end

--- Calculate the next error value (`next_bp_err`) by back-propagation.
-- @param bp_err an array of row-major matrices storing the error
-- back-propagated from the output ports
-- @param next_bp_err an array of row-major matrices storing the next error
-- back-propagated to the input ports
-- @param input an array of row-major matrices storing the input before the
-- forward propagation
-- @param ouput an array of row-major matrices storing the output after the
-- forward propagation
-- @param t BPTT time `t`

function Layer:back_propagate(bp_err, next_bp_err, input, output, t)
    nerv.error_method_not_implemented()
end

--- Check the length of the dimention array. This function is recommended for
-- invocation when checking the ports in your layer implementation.
-- @param len_in the expected number of input ports (-1 if variable)
-- @param len_out the expected number of output ports (-1 if variable)

function Layer:check_dim_len(len_in, len_out)
    local expected_in = #self.dim_in
    local expected_out = #self.dim_out
    if len_in > 0 and expected_in ~= len_in then
        nerv.error("layer %s expects %d inputs, %d given",
                    self.id, len_in, expected_in)
    end
    if len_out > 0 and expected_out ~= len_out then
        nerv.error("layer %s expects %d outputs, %d given",
                    self.id, len_out, expected_out)
    end
end

--- Get all the parameters used by (bound to) the layer.
-- @return a `nerv.ParamRepo` instance storing all the involved parameters

function Layer:get_params()
    nerv.error_method_not_implemented()
end

--- Lookup and bind the parameters in the repo specified in layer settings.
-- This function will lead to the invocation of `find_param` if the layer is
-- implemented appropriately.

function Layer:bind_params()
    nerv.error_method_not_implemented()
end

--- Get two arrays describing the dimension of input and ouput.
-- @return two arrays: `<dim_in>, <dim_out>`

function Layer:get_dim()
    return self.dim_in, self.dim_out
end

--- Set an attribute to the layer.
-- @param name the name of the attribute
-- @param value the value of the attribute

function Layer:set_attr(name, value)
    self[name] = value
end

--- Get the contained (nested) layer inside the layer (useful for complex
-- layers like `nerv.GraphLayer`).
-- @param id the identifier of the nested layer to be find

function Layer:get_sublayer(id)
    nerv.error('primitive layer does not have sublayers')
end

--- Find the parameter according to the layer settings. This function should be
-- used as the only way to locate a parameter in the implementation of
-- `bind_params` (see `nerv.AffineLayer.bind_params`).
-- @param plist the potential names for the parameter in the order of lookup priority
-- @param lconf a table providing with settings dedicated for the layer
-- @param gconf a table describing the computation state and providing with
-- some global settings
-- @param ptype the type for constructing the parameter instance when
-- auto-generation is triggered
-- @param pdim an array specifying each dimension of the parameter when
-- auto-generation is triggered
-- @param pgen the function used to generate values when auto-generation is
-- triggered, `gconf.param_gen` will be used if `nil`

function Layer:find_param(plist, lconf, gconf, ptype, pdim, pgen)

    if type(plist) == "string" then
        plist = {plist}
    end
    if lconf.params == nil then
        lconf.params = {}
    end
    plist_str = table.tostring(plist)
    local pid
    for i, pname in ipairs(plist) do
        if lconf.params[pname] ~= nil then
            nerv.info("param id for [%s] of layer [%s] specified in `layer_conf.params`.", pname, self.id)
            pid = lconf.params[pname]
        end
        if lconf.pr:has_param(pid) then
            return lconf.pr:get_param(pid)
        end
        pid = self.id .. '_' .. pname
        if lconf.pr:has_param(pid) then
            nerv.info("param id for [%s] of layer [%s] is generated automatically.", plist[1], self.id)
            return lconf.pr:get_param(pid)
        end
    end
    pid = self.id .. '_' .. plist[1]
    nerv.info("param id for [%s] of layer [%s] is not found in the specified param repo, " ..
                "switch to auto-generate", plist_str, self.id)
    local p = ptype(pid, gconf)
    p.trans = self.mat_type(unpack(pdim))
    pgen = pgen or gconf.param_gen
                or gconf.param_random -- obsolete name
    if type(pgen) ~= "function" then
        nerv.error("a param generate function is needed")
    end
    p.trans:generate(pgen)
    return p
end

nerv.include('graph.lua')
nerv.include('affine.lua')
nerv.include('sigmoid.lua')
nerv.include('tanh.lua')
nerv.include('softmax_ce.lua')
nerv.include('bias.lua')
nerv.include('window.lua')
nerv.include('mse.lua')
nerv.include('combiner.lua')
nerv.include('softmax.lua')
nerv.include('elem_mul.lua')
nerv.include('lstm.lua')
nerv.include('lstm_gate.lua')
nerv.include('dropout.lua')
nerv.include('gru.lua')
nerv.include('rnn.lua')
nerv.include('duplicate.lua')
nerv.include('identity.lua')
nerv.include('projection.lua')
nerv.include('lstmp.lua')
nerv.include('relu.lua')

-- The following lines are for backward compatibility, and will be removed in
-- the future. The use of these names are deprecated.
nerv.DropoutLayerT = nerv.DropoutLayer
nerv.GRULayerT = nerv.GRULayer
nerv.LSTMLayerT = nerv.LSTMLayer
nerv.SoftmaxCELayerT = nerv.SoftmaxCELayer