1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
|
--- Implements the concept of groups of parameters (`nerv.Param`) and
-- computation nodes (`nerv.Layer`).
--- The class describing a group of parameters (an internal state) that can be
-- bound to layers. This class also implements the *chunk* interface (see
-- `nerv.ChunkFile`) , which means instances of `nerv.Param` can be exported to
-- chunk files as chunks.
-- @type nerv.Param
local Param = nerv.class('nerv.Param')
--- The constructor.
-- @param id the identifier for the group of parameters
-- @param global_conf a table describing the computation state and providing
-- with some global settings
function Param:__init(id, global_conf)
self.id = id
self.gconf = global_conf
end
--- Retrieve the metadata of the parameter group. This function implements the
-- *chunk* interface.
-- @return a table containing all metadata
function Param:get_info()
return self.info
end
--- Set the metadata of the parameter group. This function implements the
-- *chunk* interface.
-- @param info a table containing all metadata
function Param:set_info(info)
self.info = info
end
--- Read from the given file handle. This function implements the
-- *chunk* interface.
-- @param handle the file handle
function Param:read(handle)
nerv.error_method_not_implemented()
end
--- Write to the given file handle. This function implements the
-- *chunk* interface.
-- @param handle the file handle
function Param:write(handle)
nerv.error_method_not_implemented()
end
--- Generate zero.
-- @return zero
function Param.gen_zero()
return 0
end
--- The class describing a single computation node which calculates from the
-- input ports to the output ports which could be the input of others.
-- @type nerv.Layer
local Layer = nerv.class('nerv.Layer')
--- The constructor. All inheriting classes should call this base constructor to
-- initialize some predefined fields (of `self`):
--
-- * `id`: the identifier of the layer
-- * `gconf`: a table describing the computation state and providing
-- with some global settings
-- * `lconf`: a table providing with settings dedicated for the layer. There
-- are some fields considered to be "standard" and shared by all
-- layers:
-- * `dim_in`: an array of each input port dimension (width) with order
-- * `dim_out`: an array of each output port dimension (width) with order
-- * `params`: optional, a table containing pairs of the manually bound
-- parameter name used by the layer and parameter id used to find the
-- parameter in the parameter repo
-- * `pr`: optional, the parameter repo (see `nerv.ParamRepo`) to find
-- parameters while binding, used by `nerv.Layer.find_param`
-- * `mat_type`: the type of matrix should be used when storing intermediate
-- results
-- * `loc_type`: a value from `nerv.ParamRepo.LOC_TYPES` indicating whether the
-- storage of `nerv.Param` instances is on host or device RAM
-- * `dim_in`: an array of each input port dimension (width) with order
-- * `dim_out`: an array of each output port dimension (width) with order
--
-- @param id the identifier
-- @param global_conf see `self.gconf`
-- @param layer_conf see `self.lconf`
function Layer:__init(id, global_conf, layer_conf)
self.id = id
self.gconf = global_conf
self.lconf = layer_conf
if self.gconf.use_cpu then
self.mat_type = self.gconf.mmat_type
self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
else
self.mat_type = self.gconf.cumat_type
self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE
end
self.dim_in = layer_conf.dim_in
self.dim_out = layer_conf.dim_out
end
--- Initialize the layer, called for each epoch.
function Layer:init(batch_size)
nerv.error_method_not_implemented()
end
--- Update (change the state of) the bound (tied) parameter according to the
-- calculation.
-- @param bp_err an array of row-major matrices storing the error
-- back-propagated from the output ports
-- @param input an array of row-major matrices storing the input before the
-- forward propagation
-- @param ouput an array of row-major matrices storing the output after the
-- forward propagation
-- @param t BPTT time `t`
function Layer:update(bp_err, input, output, t)
nerv.error_method_not_implemented()
end
--- Calculate the values in output ports according to the input.
-- @param input an array of row-major matrices storing the input before the
-- forward propagation
-- @param ouput an array of row-major matrices storing the output after the
-- forward propagation
-- @param t BPTT time `t`
function Layer:propagate(input, output, t)
nerv.error_method_not_implemented()
end
--- Calculate the next error value (`next_bp_err`) by back-propagation.
-- @param bp_err an array of row-major matrices storing the error
-- back-propagated from the output ports
-- @param next_bp_err an array of row-major matrices storing the next error
-- back-propagated to the input ports
-- @param input an array of row-major matrices storing the input before the
-- forward propagation
-- @param ouput an array of row-major matrices storing the output after the
-- forward propagation
-- @param t BPTT time `t`
function Layer:back_propagate(bp_err, next_bp_err, input, output, t)
nerv.error_method_not_implemented()
end
--- Check the length of the dimention array. This function is recommended for
-- invocation when checking the ports in your layer implementation.
-- @param len_in the expected number of input ports (-1 if variable)
-- @param len_out the expected number of output ports (-1 if variable)
function Layer:check_dim_len(len_in, len_out)
local expected_in = #self.dim_in
local expected_out = #self.dim_out
if len_in > 0 and expected_in ~= len_in then
nerv.error("layer %s expects %d inputs, %d given",
self.id, len_in, expected_in)
end
if len_out > 0 and expected_out ~= len_out then
nerv.error("layer %s expects %d outputs, %d given",
self.id, len_out, expected_out)
end
end
--- Get all the parameters used by (bound to) the layer.
-- @return a `nerv.ParamRepo` instance storing all the involved parameters
function Layer:get_params()
nerv.error_method_not_implemented()
end
--- Lookup and bind the parameters in the repo specified in layer settings.
-- This function will lead to the invocation of `find_param` if the layer is
-- implemented appropriately.
function Layer:bind_params()
nerv.error_method_not_implemented()
end
--- Get two arrays describing the dimension of input and ouput.
-- @return two arrays: `<dim_in>, <dim_out>`
function Layer:get_dim()
return self.dim_in, self.dim_out
end
--- Set an attribute to the layer.
-- @param name the name of the attribute
-- @param value the value of the attribute
function Layer:set_attr(name, value)
self[name] = value
end
--- Get the contained (nested) layer inside the layer (useful for complex
-- layers like `nerv.GraphLayer`).
-- @param id the identifier of the nested layer to be find
function Layer:get_sublayer(id)
nerv.error('primitive layer does not have sublayers')
end
--- Find the parameter according to the layer settings. This function should be
-- used as the only way to locate a parameter in the implementation of
-- `bind_params` (see `nerv.AffineLayer.bind_params`).
-- @param plist the potential names for the parameter in the order of lookup priority
-- @param lconf a table providing with settings dedicated for the layer
-- @param gconf a table describing the computation state and providing with
-- some global settings
-- @param ptype the type for constructing the parameter instance when
-- auto-generation is triggered
-- @param pdim an array specifying each dimension of the parameter when
-- auto-generation is triggered
-- @param pgen the function used to generate values when auto-generation is
-- triggered, `gconf.param_gen` will be used if `nil`
function Layer:find_param(plist, lconf, gconf, ptype, pdim, pgen)
if type(plist) == "string" then
plist = {plist}
end
if lconf.params == nil then
lconf.params = {}
end
plist_str = table.tostring(plist)
local pid
for i, pname in ipairs(plist) do
if lconf.params[pname] ~= nil then
nerv.info("param id for [%s] of layer [%s] specified in `layer_conf.params`.", pname, self.id)
pid = lconf.params[pname]
end
if lconf.pr:has_param(pid) then
return lconf.pr:get_param(pid)
end
pid = self.id .. '_' .. pname
if lconf.pr:has_param(pid) then
nerv.info("param id for [%s] of layer [%s] is generated automatically.", plist[1], self.id)
return lconf.pr:get_param(pid)
end
end
pid = self.id .. '_' .. plist[1]
nerv.info("param id for [%s] of layer [%s] is not found in the specified param repo, " ..
"switch to auto-generate", plist_str, self.id)
local p = ptype(pid, gconf)
p.trans = self.mat_type(unpack(pdim))
pgen = pgen or gconf.param_gen
or gconf.param_random -- obsolete name
if type(pgen) ~= "function" then
nerv.error("a param generate function is needed")
end
p.trans:generate(pgen)
return p
end
nerv.include('graph.lua')
nerv.include('affine.lua')
nerv.include('sigmoid.lua')
nerv.include('tanh.lua')
nerv.include('softmax_ce.lua')
nerv.include('bias.lua')
nerv.include('window.lua')
nerv.include('mse.lua')
nerv.include('combiner.lua')
nerv.include('softmax.lua')
nerv.include('elem_mul.lua')
nerv.include('lstm.lua')
nerv.include('dropout.lua')
nerv.include('gru.lua')
nerv.include('rnn.lua')
nerv.include('duplicate.lua')
nerv.include('identity.lua')
nerv.include('lstmp.lua')
nerv.include('relu.lua')
-- The following lines are for backward compatibility, and will be removed in
-- the future. The use of these names are deprecated.
nerv.DropoutLayerT = nerv.DropoutLayer
nerv.GRULayerT = nerv.GRULayer
nerv.LSTMLayerT = nerv.LSTMLayer
nerv.SoftmaxCELayerT = nerv.SoftmaxCELayer
|