1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
local GRULayerT = nerv.class('nerv.GRULayerT', 'nerv.LayerT')
function GRULayerT:__init(id, global_conf, layer_conf)
--input1:x input2:h input3:c(h^~)
self.id = id
self.dim_in = layer_conf.dim_in
self.dim_out = layer_conf.dim_out
self.gconf = global_conf
if self.dim_in[2] ~= self.dim_out[1] then
nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)", self.dim_in[2], self.dim_out[1])
end
--prepare a DAGLayerT to hold the lstm structure
local pr = layer_conf.pr
if pr == nil then
pr = nerv.ParamRepo()
end
local function ap(str)
return self.id .. '.' .. str
end
local layers = {
["nerv.CombinerLayer"] = {
[ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]},
["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}},
[ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]},
["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}},
[ap("updateGDup")] = {{}, {["dim_in"] = {self.dim_in[2]},
["dim_out"] = {self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}},
[ap("updateMergeL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]},
["lambda"] = {1, -1, 1}}},
},
["nerv.AffineLayer"] = {
[ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]}, ["pr"] = pr}},
},
["nerv.TanhLayer"] = {
[ap("mainTanhL")] = {{}, {["dim_in"] = {self.dim_out[1]}, ["dim_out"] = {self.dim_out[1]}}},
},
["nerv.GateFLayer"] = {
[ap("resetGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]},
["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}},
[ap("updateGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]},
["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}},
},
["nerv.ElemMulLayer"] = {
[ap("resetGMulL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}},
[ap("updateGMulCL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}},
[ap("updateGMulHL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}},
},
}
local layerRepo = nerv.LayerRepo(layers, pr, global_conf)
local connections_t = {
["<input>[1]"] = ap("inputXDup[1]"),
["<input>[2]"] = ap("inputHDup[1]"),
[ap("inputXDup[1]")] = ap("resetGateL[1]"),
[ap("inputHDup[1]")] = ap("resetGateL[2]"),
[ap("inputXDup[2]")] = ap("updateGateL[1]"),
[ap("inputHDup[2]")] = ap("updateGateL[2]"),
[ap("updateGateL[1]")] = ap("updateGDup[1]"),
[ap("resetGateL[1]")] = ap("resetGMulL[1]"),
[ap("inputHDup[3]")] = ap("resetGMulL[2]"),
[ap("inputXDup[3]")] = ap("mainAffineL[1]"),
[ap("resetGMulL[1]")] = ap("mainAffineL[2]"),
[ap("mainAffineL[1]")] = ap("mainTanhL[1]"),
[ap("updateGDup[1]")] = ap("updateGMulHL[1]"),
[ap("inputHDup[4]")] = ap("updateGMulHL[2]"),
[ap("updateGDup[2]")] = ap("updateGMulCL[1]"),
[ap("mainTanhL[1]")] = ap("updateGMulCL[2]"),
[ap("inputHDup[5]")] = ap("updateMergeL[1]"),
[ap("updateGMulHL[1]")] = ap("updateMergeL[2]"),
[ap("updateGMulCL[1]")] = ap("updateMergeL[3]"),
[ap("updateMergeL[1]")] = "<output>[1]",
}
self.dagL = nerv.DAGLayerT(self.id, global_conf,
{["dim_in"] = self.dim_in, ["dim_out"] = self.dim_out, ["sub_layers"] = layerRepo,
["connections"] = connections_t})
self:check_dim_len(2, 1) -- x, h and h
end
function GRULayerT:init(batch_size, chunk_size)
self.dagL:init(batch_size, chunk_size)
end
function GRULayerT:batch_resize(batch_size, chunk_size)
self.dagL:batch_resize(batch_size, chunk_size)
end
function GRULayerT:update(bp_err, input, output, t)
self.dagL:update(bp_err, input, output, t)
end
function GRULayerT:propagate(input, output, t)
self.dagL:propagate(input, output, t)
end
function GRULayerT:back_propagate(bp_err, next_bp_err, input, output, t)
self.dagL:back_propagate(bp_err, next_bp_err, input, output, t)
end
function GRULayerT:get_params()
return self.dagL:get_params()
end
|