aboutsummaryrefslogtreecommitdiff
path: root/nerv/examples/lmptb/lmptb/lstm_t_v2.lua
blob: e7bf3a76fc475a8ab4aa7b81f799e99436170b24 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
local LSTMLayerT = nerv.class('nerv.LSTMLayerV2T', 'nerv.LayerT')

function LSTMLayerT:__init(id, global_conf, layer_conf)
    --input1:x input2:h input3:c
    self.id = id
    self.dim_in = layer_conf.dim_in
    self.dim_out = layer_conf.dim_out
    self.gconf = global_conf

    --prepare a DAGLayerT to hold the lstm structure
    local pr = layer_conf.pr
    if pr == nil then
        pr = nerv.ParamRepo()
    end
    
    local function ap(str)
        return self.id .. '.' .. str
    end

    local layers = {
        ["nerv.CombinerLayer"] = {
            [ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]}, 
                ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}},
            [ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, 
                ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}},
            --[ap("inputCDup")] = {{}, {["dim_in"] = {self.dim_in[3]}, 
                --["dim_out"] = {self.dim_in[3], self.dim_in[3], self.dim_in[3]}, ["lambda"] = {1}}},
            [ap("mainCDup")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3], self.dim_in[3]},
                ["lambda"] = {1, 1}}},
        },
        ["nerv.AffineLayer"] = {
            [ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, 
                ["dim_out"] = {self.dim_out[1]}, ["pr"] = pr}},
        },
        ["nerv.TanhLayer"] = {
            [ap("mainTanhL")] = {{}, {["dim_in"] = {self.dim_out[1]}, ["dim_out"] = {self.dim_out[1]}}},
            [ap("outputTanhL")] = {{}, {["dim_in"] = {self.dim_out[1]}, ["dim_out"] = {self.dim_out[1]}}},
        },
        ["nerv.GateFLayer"] = {
            [ap("forgetGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, 
                ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}},
            [ap("inputGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, 
                ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}},
            --[ap("outputGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2], self.dim_in[3]}, 
            --    ["dim_out"] = {self.dim_in[3]}, ["pr"] = pr}},

        },
        ["nerv.ElemMulLayer"] = {
            [ap("inputGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}},
            [ap("forgetGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}},
            --[ap("outputGMulL")] = {{}, {["dim_in"] = {self.dim_in[3], self.dim_in[3]}, ["dim_out"] = {self.dim_in[3]}}},
        },
    }
    
    local layerRepo = nerv.LayerRepo(layers, pr, global_conf)

    local connections_t = {
        ["<input>[1]"] = ap("inputXDup[1]"), 
        ["<input>[2]"] = ap("inputHDup[1]"),

        [ap("inputXDup[1]")] = ap("mainAffineL[1]"),
        [ap("inputHDup[1]")] = ap("mainAffineL[2]"),
        [ap("mainAffineL[1]")] = ap("mainTanhL[1]"),

        [ap("inputXDup[2]")] = ap("inputGateL[1]"),
        [ap("inputHDup[2]")] = ap("inputGateL[2]"),
        --[ap("inputCDup[1]")] = ap("inputGateL[3]"),
        
        [ap("inputXDup[3]")] = ap("forgetGateL[1]"),
        [ap("inputHDup[3]")] = ap("forgetGateL[2]"),
        --[ap("inputCDup[2]")] = ap("forgetGateL[3]"),

        [ap("mainTanhL[1]")] = ap("inputGMulL[1]"),
        [ap("inputGateL[1]")] = ap("inputGMulL[2]"),
        
        [ap("<input>[3]")] = ap("forgetGMulL[1]"),
        [ap("forgetGateL[1]")] = ap("forgetGMulL[2]"),

        [ap("inputGMulL[1]")] = ap("mainCDup[1]"),
        [ap("forgetGMulL[1]")] = ap("mainCDup[2]"),

        --[ap("inputXDup[4]")] = ap("outputGateL[1]"),
        --[ap("inputHDup[4]")] = ap("outputGateL[2]"),
        --[ap("mainCDup[3]")] = ap("outputGateL[3]"),

        [ap("mainCDup[2]")] = "<output>[2]",
        [ap("mainCDup[1]")] = ap("outputTanhL[1]"),
        
        [ap("outputTanhL[1]")] = "<output>[1]",
    }
    self.dagL = nerv.DAGLayerT(self.id, global_conf, 
            {["dim_in"] = self.dim_in, ["dim_out"] = self.dim_out, ["sub_layers"] = layerRepo, 
            ["connections"] = connections_t})
    
    self:check_dim_len(3, 2) -- x, h, c and h, c
end

function LSTMLayerT:init(batch_size, chunk_size)
    self.dagL:init(batch_size, chunk_size)
end

function LSTMLayerT:batch_resize(batch_size, chunk_size)
    self.dagL:batch_resize(batch_size, chunk_size)
end

function LSTMLayerT:update(bp_err, input, output, t)
    self.dagL:update(bp_err, input, output, t)
end

function LSTMLayerT:propagate(input, output, t)
    self.dagL:propagate(input, output, t)
end

function LSTMLayerT:back_propagate(bp_err, next_bp_err, input, output, t)
    self.dagL:back_propagate(bp_err, next_bp_err, input, output, t)
end

function LSTMLayerT:get_params()
    return self.dagL:get_params()
end