1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
|
--- Parameter and layer classes related to linear transform.
local MatrixParam = nerv.class('nerv.MatrixParam', 'nerv.Param')
local LinearTransParam = nerv.class('nerv.LinearTransParam', 'nerv.MatrixParam')
local BiasParam = nerv.class('nerv.BiasParam', 'nerv.MatrixParam')
local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer')
--- A parameter that consists of a single matrix
-- @type nerv.MatrixParam
--- Read from a file handle.
-- @param handle the file handle
function MatrixParam:read(handle)
self.trans = self.gconf.mmat_type.load(handle)
if not self.gconf.use_cpu then
self.trans = self.gconf.cumat_type.new_from_host(self.trans)
end
end
function MatrixParam:write(handle)
local trans = self.trans
if not self.gconf.use_cpu then
trans = self.trans:new_to_host()
end
trans:save(handle)
end
function MatrixParam:train_init()
self.correction = self.trans:create()
self.correction:fill(0)
end
function MatrixParam:_update_by_gradient(gradient, alpha, beta)
local gconf = self.gconf
-- momentum gain
local mmt_gain = 1.0 / (1.0 - gconf.momentum)
local n = gconf.batch_size * mmt_gain
-- perform update
if gconf.momentum > 0 then
self.correction:add(self.correction, gradient, gconf.momentum, 1.0)
self.trans:add(self.trans, self.correction, alpha, -gconf.lrate / n * beta)
else
self.trans:add(self.trans, gradient, alpha, -gconf.lrate / n * beta)
end
end
function MatrixParam:_update_by_err_input(err, input, alpha, beta)
local gconf = self.gconf
-- momentum gain
local mmt_gain = 1.0 / (1.0 - gconf.momentum)
local n = gconf.batch_size * mmt_gain
-- perform update
if gconf.momentum > 0 then
self.correction:mul(input, err, 1.0, gconf.momentum, 'T', 'N')
self.trans:add(self.trans, self.correction, alpha, -gconf.lrate / n * beta)
else
self.trans:mul(input, err, -gconf.lrate / n * beta, alpha, 'T', 'N')
end
end
function MatrixParam:update_by_gradient(gradient)
self:_update_by_gradient(gradient, 1.0, 1.0)
end
function MatrixParam:update_by_err_input(err, input)
self:_update_by_err_input(err, input, 1.0, 1.0)
end
function LinearTransParam:update_by_err_input(err, input)
local gconf = self.gconf
local l2 = 1 - gconf.lrate * gconf.wcost
self:_update_by_err_input(err, input, l2, l2)
end
--- A fully-connected linear transform layer.
-- @type nerv.AffineLayer
--- The constructor.
function AffineLayer:__init(id, global_conf, layer_conf)
self.id = id
self.dim_in = layer_conf.dim_in
self.dim_out = layer_conf.dim_out
if layer_conf.ltp ~= nil and layer_conf.ltp1 == nil then
layer_conf.ltp1 = layer_conf.ltp
end
for i = 1, #self.dim_in do
local pid = "ltp" .. i
local pid_list = i == 1 and {"ltp", pid} or pid
self["ltp" .. i] = self:find_param(pid_list, layer_conf, global_conf,
nerv.LinearTransParam,
{self.dim_in[i], self.dim_out[1]}, pid)
end
self.ltp = self.ltp1 -- alias of ltp1
self.bp = self:find_param("bp", layer_conf, global_conf,
nerv.BiasParam,
{1, self.dim_out[1]}, "bp")
self.gconf = global_conf
self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs
end
function AffineLayer:init(batch_size)
if self.ltp.trans:ncol() ~= self.bp.trans:ncol() then
nerv.error("mismatching dimensions of linear transform and bias paramter")
end
for i = 1, #self.dim_in do
if self.dim_in[i] ~= self["ltp" .. i].trans:nrow() then
nerv.error("mismatching dimensions of linear transform parameter and input")
end
if self.dim_out[1] ~= self["ltp" .. i].trans:ncol() then
nerv.error("mismatching dimensions of linear transform parameter and output")
end
self["ltp" .. i]:train_init()
end
self.bp:train_init()
end
function AffineLayer:batch_resize(batch_size)
-- do nothing
end
function AffineLayer:update(bp_err, input, output)
for i = 1, #self.dim_in do
self["ltp" .. i]:update_by_err_input(bp_err[1], input[i])
end
self.bp:update_by_gradient(bp_err[1]:colsum())
end
function AffineLayer:propagate(input, output)
-- apply linear transform
output[1]:mul(input[1], self.ltp1.trans, 1.0, 0.0, 'N', 'N')
for i = 2, #self.dim_in do
output[1]:mul(input[i], self["ltp" .. i].trans, 1.0, 1.0, 'N', 'N')
end
-- add bias
output[1]:add_row(self.bp.trans, 1.0)
end
function AffineLayer:back_propagate(bp_err, next_bp_err, input, output)
for i = 1, #self.dim_in do
next_bp_err[i]:mul(bp_err[1], self["ltp" .. i].trans, 1.0, 0.0, 'N', 'T')
end
end
function AffineLayer:get_params()
local pr = nerv.ParamRepo({self.ltp, self.bp})
for i = 2, #self.dim_in do
pr:add(self["ltp" .. i].id, self["ltp" .. i])
end
return pr
end
|