1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
|
--- Contains parameter and layer classes related to linear (or affine)
-- transform.
--- The class for all matrix-based parameters. The class has a single matrix
-- which can be accessed by `self.trans`.
-- @type nerv.MatrixParam
local MatrixParam = nerv.class('nerv.MatrixParam', 'nerv.Param')
--- Check the storage location of the contained matrix. This function is
-- required by `nerv.ParamRepo`.
-- @param checker the callback function for checking
function MatrixParam:check(checker)
-- check trans matrix type
checker(self.trans)
end
--- Read from a file handle. See `nerv.Param.read`.
-- @param handle the file handle
function MatrixParam:read(handle)
self.trans = self.gconf.mmat_type.load(handle)
end
--- Write to a file handle. See `nerv.Param.write`.
-- @param handle the file handle
function MatrixParam:write(handle)
self.trans:save(handle)
end
function MatrixParam:train_init()
self.correction = self.trans:create()
self.correction_acc = self.correction:create()
self.correction:fill(0)
self.correction_acc:fill(0)
end
function MatrixParam:copy(copier)
local target = nerv.MatrixParam(self.id, self.gconf)
target.trans = copier(self.trans)
return target
end
function MatrixParam:_update(alpha, beta)
if self.no_update then
return
end
local gconf = self.gconf
-- momentum gain
local mmt_gain = 1.0 / (1.0 - gconf.momentum)
local n = gconf.batch_size * mmt_gain
-- perform update
if gconf.momentum > 0 then
self.correction:add(self.correction, self.correction_acc, gconf.momentum, 1.0)
self.trans:add(self.trans, self.correction, alpha, -gconf.lrate / n * beta)
else
self.trans:add(self.trans, self.correction_acc, alpha, -gconf.lrate / n * beta)
end
self.correction_acc:fill(0)
end
function MatrixParam:back_propagate_by_gradient(gradient)
self.correction_acc:add(self.correction_acc, gradient, 1.0, 1.0)
end
function MatrixParam:back_propagate_by_err_input(err, input)
self.correction_acc:mul(input, err, 1.0, 1.0, 'T', 'N')
end
function MatrixParam:update_by_gradient()
self:_update(1.0, 1.0)
end
function MatrixParam:update_by_err_input()
local gconf = self.gconf
local l2 = 1 - gconf.lrate * gconf.wcost
self:_update(l2, l2)
end
--- The affine layer that does the calculation Wx + b, also known as fully
-- connected linear transform layer.
-- @type nerv.AffineLayer
local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer')
--- The constructor.
-- @param id the identifier
-- @param global_conf see `self.gconf` of `nerv.Layer.__init`
-- @param layer_conf a table providing with settings dedicated for the layer,
-- for `layer_conf` fields that are shared by all layers, see
-- `nerv.Layer.__init`. The affine layer requires parameters to be bound, the
-- following parameter names will be looked up while binding:
--
-- * `ltp`: the linear transformation parameter, also known as the weight matrix, W in Wx + b
-- * `bp`: the bias parameter, also known as the bias matrix, b in Wx + b
function AffineLayer:__init(id, global_conf, layer_conf)
nerv.Layer.__init(self, id, global_conf, layer_conf)
self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs
self:bind_params()
end
function AffineLayer:bind_params()
local lconf = self.lconf
lconf.no_update_ltp1 = lconf.no_update_ltp1 or lconf.no_update_ltp
for i = 1, #self.dim_in do
local pid = "ltp" .. i
local pid_list = i == 1 and {pid, "ltp"} or pid
self["ltp" .. i] = self:find_param(pid_list, lconf, self.gconf,
nerv.LinearTransParam,
{self.dim_in[i], self.dim_out[1]})
local no_update = lconf["no_update_ltp" .. i]
if (no_update ~= nil) and no_update or lconf.no_update_all then
self["ltp" .. i].no_update = true
end
end
self.ltp = self.ltp1 -- alias of ltp1
self.bp = self:find_param("bp", lconf, self.gconf,
nerv.BiasParam,
{1, self.dim_out[1]},
nerv.Param.gen_zero)
local no_update = lconf["no_update_bp"]
if (no_update ~= nil) and no_update or lconf.no_update_all then
self.bp.no_update = true
end
end
function AffineLayer:init(batch_size)
if self.dim_out[1] ~= self.bp.trans:ncol() then
nerv.error("mismatching dimensions of linear transform and bias paramter")
end
for i = 1, #self.dim_in do
if self.dim_in[i] ~= self["ltp" .. i].trans:nrow() then
nerv.error("mismatching dimensions of linear transform parameter and input")
end
if self.dim_out[1] ~= self["ltp" .. i].trans:ncol() then
nerv.error("mismatching dimensions of linear transform parameter and output")
end
self["ltp" .. i]:train_init()
end
self.bp:train_init()
end
function AffineLayer:batch_resize(batch_size)
-- do nothing
end
function AffineLayer:update()
for i = 1, #self.dim_in do
self["ltp" .. i]:update_by_err_input()
end
self.bp:update_by_gradient()
end
function AffineLayer:propagate(input, output)
-- apply linear transform
output[1]:mul(input[1], self.ltp1.trans, 1.0, 0.0, 'N', 'N')
for i = 2, #self.dim_in do
output[1]:mul(input[i], self["ltp" .. i].trans, 1.0, 1.0, 'N', 'N')
end
-- add bias
output[1]:add_row(self.bp.trans, 1.0)
end
function AffineLayer:back_propagate(bp_err, next_bp_err, input, output)
for i = 1, #self.dim_in do
next_bp_err[i]:mul(bp_err[1], self["ltp" .. i].trans, 1.0, 0.0, 'N', 'T')
self["ltp" .. i]:back_propagate_by_err_input(bp_err[1], input[i])
end
self.bp:back_propagate_by_gradient(bp_err[1]:colsum())
end
function AffineLayer:get_params()
local pr = nerv.ParamRepo({self.ltp1, self.bp}, self.loc_type)
for i = 2, #self.dim_in do
pr:add(self["ltp" .. i].id, self["ltp" .. i])
end
return pr
end
--- The class for linear transform parameter.
-- @type nerv.LinearTransParam
local LinearTransParam = nerv.class('nerv.LinearTransParam', 'nerv.MatrixParam')
--- The class for bias parameter (currently implemented as a one-row matrix).
-- @type nerv.BiasParam
local BiasParam = nerv.class('nerv.BiasParam', 'nerv.MatrixParam')
|