From 74d9e9e7371c80394698fb9805cbf0cbde67a8f3 Mon Sep 17 00:00:00 2001 From: Determinant Date: Tue, 2 Jun 2015 20:28:16 +0800 Subject: add ParamRepo, LayerRepo, DAGLayer --- nn/init.lua | 3 + nn/layer_dag.lua | 224 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ nn/layer_repo.lua | 34 +++++++++ nn/param_repo.lua | 26 +++++++ 4 files changed, 287 insertions(+) create mode 100644 nn/init.lua create mode 100644 nn/layer_dag.lua create mode 100644 nn/layer_repo.lua create mode 100644 nn/param_repo.lua (limited to 'nn') diff --git a/nn/init.lua b/nn/init.lua new file mode 100644 index 0000000..1bafa77 --- /dev/null +++ b/nn/init.lua @@ -0,0 +1,3 @@ +require 'nn.layer_repo' +require 'nn.param_repo' +require 'nn.layer_dag' diff --git a/nn/layer_dag.lua b/nn/layer_dag.lua new file mode 100644 index 0000000..8ea28a0 --- /dev/null +++ b/nn/layer_dag.lua @@ -0,0 +1,224 @@ +local DAGLayer = nerv.class("nerv.DAGLayer", "nerv.Layer") + +local function parse_id(str) + local id, port, _ + _, _, id, port = string.find(str, "([a-zA-Z0-9_]+)%[([0-9]+)%]") + if id == nil or port == nil then + _, _, id, port = string.find(str, "(.+)%[([0-9]+)%]") + if not (id == "" or id == "") then + nerv.error("wrong format of connection id") + end + end + port = tonumber(port) + return id, port +end + +local function discover(id, layers, layer_repo) + local ref = layers[id] + if id == "" or id == "" then + return nil + end + if ref == nil then + local layer = layer_repo:get_layer(id) + local dim_in, dim_out = layer:get_dim() + ref = { + layer = layer, + inputs = {}, + outputs = {}, + err_inputs = {}, + err_outputs = {}, + next_layers = {}, + input_len = #dim_in, + output_len = #dim_out, + in_deg = 0, + visited = false + } + layers[id] = ref + end + return ref +end + +function nerv.DAGLayer:__init(id, global_conf, layer_conf) + local layers = {} + local inputs = {} + local outputs = {} + local dim_in = layer_conf.dim_in + local dim_out = layer_conf.dim_out + for from, to in pairs(layer_conf.connections) do + local id_from, port_from = parse_id(from) + local id_to, port_to = parse_id(to) + local ref_from = discover(id_from, layers, layer_conf.sub_layers) + local ref_to = discover(id_to, layers, layer_conf.sub_layers) + local input_dim, output_dim, _ + if ref_from and ref_from.outputs[port_from] ~= nil then + nerv.error("%s has already been attached", from) + end + if ref_to and ref_to.inputs[port_to] ~= nil then + nerv.error("%s has already been attached", to) + end + if id_from == "" then + input_dim, _ = ref_to.layer:get_dim() + if dim_in[port_from] ~= input_dim[port_to] then + nerv.error("mismatching data dimension between %s and %s", from, to) + end + inputs[port_from] = {ref_to, port_to} + ref_to.inputs[port_to] = inputs -- just a place holder + elseif id_to == "" then + _, output_dim = ref_from.layer:get_dim() + if output_dim[port_from] ~= dim_out[port_to] then + nerv.error("mismatching data dimension between %s and %s", from, to) + end + outputs[port_to] = {ref_from, port_from} + ref_from.outputs[port_from] = outputs -- just a place holder + else + _, output_dim = ref_from.layer:get_dim() + input_dim, _ = ref_to.layer:get_dim() + if output_dim[port_from] ~= input_dim[port_to] then + nerv.error("mismatching data dimension between %s and %s", from, to) + end + local mid = global_conf.mat_type(global_conf.batch_size, + output_dim[port_from]) + local err_mid = mid:create() + + ref_from.outputs[port_from] = mid + ref_to.inputs[port_to] = mid + + ref_from.err_inputs[port_from] = err_mid + ref_to.err_outputs[port_to] = err_mid + + table.insert(ref_from.next_layers, ref_to) -- add edge + ref_to.in_deg = ref_to.in_deg + 1 -- increase the in-degree of the target layer + end + end + self.layers = layers + self.inputs = inputs + self.outputs = outputs + self.dim_in = dim_in + self.dim_out = dim_out +end + +function nerv.DAGLayer:init(id) -- topology sort + local queue = {} + local l = 1 + local r = 1 + for id, ref in pairs(self.layers) do + if ref.in_deg == 0 then + table.insert(queue, ref) + nerv.utils.printf("adding source layer: %s\n", id) + r = r + 1 + end + end + if l == r then + nerv.error("loop detected") + end + while l < r do + local cur = queue[l] + cur.visited = true + l = l + 1 + for _, nl in pairs(cur.next_layers) do + nl.in_deg = nl.in_deg - 1 + if nl.in_deg == 0 then + table.insert(queue, nl) + r = r + 1 + end + end + end + for i = 1, #queue do + nerv.utils.printf("queued layer: %s\n", queue[i].layer.id) + end + self.queue = queue + for id, ref in pairs(self.layers) do + -- check wether the graph is connected + if ref.visited == false then + nerv.utils.printf("warning: layer %s is ignored\n", id) + end + for i = 1, ref.input_len do + if ref.inputs[i] == nil then + nerv.error("dangling port %d of layer %s", i, id) + end + end + for i = 1, ref.output_len do + if ref.outputs[i] == nil then + nerv.error("dangling port %d of layer %s", i, id) + end + end + -- initialize sub layers + ref.layer:init() + end + for i = 1, #self.dim_in do + if self.inputs[i] == nil then + nerv.error("dangling port %d of layer ", i) + end + end + for i = 1, #self.dim_out do + if self.outputs[i] == nil then + nerv.error("dangling port %d of layer ", i) + end + end +end + +function nerv.DAGLayer:set_inputs(input) + for i = 1, #self.dim_in do + local layer = self.inputs[i][1] + local port = self.inputs[i][2] + layer.inputs[port] = input[i] + end +end + +function nerv.DAGLayer:set_outputs(output) + for i = 1, #self.dim_out do + local layer = self.outputs[i][1] + local port = self.outputs[i][2] + layer.outputs[port] = output[i] + end +end + +function nerv.DAGLayer:set_err_inputs(bp_err) + for i = 1, #self.dim_out do + local layer = self.outputs[i][1] + local port = self.outputs[i][2] + layer.err_inputs[port] = bp_err[i] + end +end + +function nerv.DAGLayer:set_err_outputs(next_bp_err) + for i = 1, #self.dim_in do + local layer = self.inputs[i][1] + local port = self.inputs[i][2] + layer.err_outputs[port] = next_bp_err[i] + end +end + +function nerv.DAGLayer:update(bp_err, input, output) + self:set_err_inputs(bp_err) + self:set_inputs(input) + self:set_outputs(output) + for id, ref in pairs(self.queue) do + ref.layer:update(ref.err_inputs, ref.inputs, ref.outputs) + end +end + +function nerv.DAGLayer:propagate(input, output) + self:set_inputs(input) + self:set_outputs(output) + for i = 1, #self.queue do + local ref = self.queue[i] + --[[ + print(ref.inputs[1]) + print(ref.outputs[1]) + print(#ref.inputs, #ref.outputs) + --]] + ref.layer:propagate(ref.inputs, ref.outputs) + end +end + +function nerv.DAGLayer:back_propagate(next_bp_err, bp_err, input, output) + self:set_err_outputs(next_bp_err) + self:set_err_inputs(bp_err) + self:set_inputs(input) + self:set_outputs(output) + for i = #self.queue, 1, -1 do + local ref = self.queue[i] + ref.layer:back_propagate(ref.err_outputs, ref.err_inputs, ref.inputs, ref.outputs) + end +end diff --git a/nn/layer_repo.lua b/nn/layer_repo.lua new file mode 100644 index 0000000..b1d2248 --- /dev/null +++ b/nn/layer_repo.lua @@ -0,0 +1,34 @@ +local LayerRepo = nerv.class("nerv.LayerRepo") + +function LayerRepo:__init(layer_spec, param_repo, global_conf) + local layers = {} + for ltype, llist in pairs(layer_spec) do + local layer_type = nerv.get_type(ltype) + for id, spec in pairs(llist) do + if layers[id] ~= nil then + nerv.error("a layer with id %s already exists", id) + end + nerv.utils.printf("id: %s\n", id) + if type(spec[2]) ~= "table" then + nerv.error("layer config table is need") + end + layer_config = spec[2] + if type(spec[1]) ~= "table" then + nerv.error("parameter description table is needed") + end + for pname, pid in pairs(spec[1]) do + layer_config[pname] = param_repo:get_param(pid, global_conf) + end + layers[id] = layer_type(id, global_conf, layer_config) + end + end + self.layers = layers +end + +function LayerRepo:get_layer(lid) + local layer = self.layers[lid] + if layer == nil then + nerv.error("layer with id %s not found", lid) + end + return layer +end diff --git a/nn/param_repo.lua b/nn/param_repo.lua new file mode 100644 index 0000000..3e37c31 --- /dev/null +++ b/nn/param_repo.lua @@ -0,0 +1,26 @@ +local ParamRepo = nerv.class("nerv.ParamRepo") + +function ParamRepo:__init(param_files) + local param_table = {} + if type(param_files) ~= "table" then + nerv.error("param file table is need") + end + for i = 1, #param_files do + local pf = nerv.ChunkFile(param_files[i], "r") + for cid, cspec in pairs(pf.metadata) do + if param_table[cid] ~= nil then + nerv.error("conflicting chunk id in param files") + end + param_table[cid] = pf + end + end + self.param_table = param_table +end + +function ParamRepo:get_param(pid, global_conf) + local pf = self.param_table[pid] + if pf == nil then + nerv.error("param with id %s not found", pid) + end + return pf:read_chunk(pid, global_conf) +end -- cgit v1.2.3-70-g09d2 From 08a52c03a77ce13ae4f6a4deb06ab0ae274d399a Mon Sep 17 00:00:00 2001 From: Determinant Date: Tue, 2 Jun 2015 23:07:15 +0800 Subject: fix a bug: input[1] should be input[2] (since Lua arrays are 1-based) --- examples/test_nn_lib.lua | 60 +++++++++++++++++++++++++++++++++++++----------- layer/softmax_ce.lua | 4 +++- nn/layer_dag.lua | 4 ++++ 3 files changed, 54 insertions(+), 14 deletions(-) (limited to 'nn') diff --git a/examples/test_nn_lib.lua b/examples/test_nn_lib.lua index fd7167a..ec338fe 100644 --- a/examples/test_nn_lib.lua +++ b/examples/test_nn_lib.lua @@ -1,25 +1,46 @@ -require 'layer.affine' -require 'layer.sigmoid' -require 'layer.softmax_ce' +-- require 'layer.affine' +-- require 'layer.sigmoid' +-- require 'layer.softmax_ce' gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, mat_type = nerv.CuMatrixFloat, batch_size = 10} -param_repo = nerv.ParamRepo({"affine.param"}) +param_repo = nerv.ParamRepo({"converted.nerv"}) sublayer_repo = nerv.LayerRepo( { ["nerv.AffineLayer"] = { - affine1 = {{ltp = "a", bp = "b"}, {dim_in = {429}, dim_out = {2048}}} + affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, + {dim_in = {429}, dim_out = {2048}}}, + affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, + {dim_in = {2048}, dim_out = {3001}}} }, ["nerv.SigmoidLayer"] = { - sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}} + sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} }, ["nerv.SoftmaxCELayer"] = { - softmax_ce1 = {{}, {dim_in = {2048, 2048}, dim_out = {}}} + softmax_ce0 = {{}, {dim_in = {3001, 3001}, dim_out = {}}} } }, param_repo, gconf) @@ -28,20 +49,33 @@ layer_repo = nerv.LayerRepo( ["nerv.DAGLayer"] = { main = {{}, { - dim_in = {429, 2048}, dim_out = {}, + dim_in = {429, 3001}, dim_out = {}, sub_layers = sublayer_repo, connections = { - ["[1]"] = "affine1[1]", + ["[1]"] = "affine0[1]", + ["affine0[1]"] = "sigmoid0[1]", + ["sigmoid0[1]"] = "affine1[1]", ["affine1[1]"] = "sigmoid1[1]", - ["sigmoid1[1]"] = "softmax_ce1[1]", - ["[2]"] = "softmax_ce1[2]" + ["sigmoid1[1]"] = "affine2[1]", + ["affine2[1]"] = "sigmoid2[1]", + ["sigmoid2[1]"] = "affine3[1]", + ["affine3[1]"] = "sigmoid3[1]", + ["sigmoid3[1]"] = "affine4[1]", + ["affine4[1]"] = "sigmoid4[1]", + ["sigmoid4[1]"] = "affine5[1]", + ["affine5[1]"] = "sigmoid5[1]", + ["sigmoid5[1]"] = "affine6[1]", + ["affine6[1]"] = "sigmoid6[1]", + ["sigmoid6[1]"] = "affine7[1]", + ["affine7[1]"] = "softmax_ce0[1]", + ["[2]"] = "softmax_ce0[2]" } }} } }, param_repo, gconf) df = nerv.ChunkFile("input.param", "r") -label = nerv.CuMatrixFloat(10, 2048) +label = nerv.CuMatrixFloat(10, 3001) label:fill(0) for i = 0, 9 do label[i][i] = 1.0 @@ -51,7 +85,7 @@ input = {df:read_chunk("input", gconf).trans, label} output = {} err_input = {} err_output = {input[1]:create()} -sm = sublayer_repo:get_layer("softmax_ce1") +sm = sublayer_repo:get_layer("softmax_ce0") main = layer_repo:get_layer("main") main:init() for i = 0, 3 do diff --git a/layer/softmax_ce.lua b/layer/softmax_ce.lua index 3dfebc5..09eb3a9 100644 --- a/layer/softmax_ce.lua +++ b/layer/softmax_ce.lua @@ -27,6 +27,8 @@ function SoftmaxCELayer:propagate(input, output) local ce = soutput:create() ce:log_elem(soutput) ce:mul_elem(ce, input[2]) +-- print(input[1][0]) +-- print(soutput[1][0]) -- add total ce self.total_ce = self.total_ce - ce:rowsum():colsum()[0] self.total_frames = self.total_frames + soutput:nrow() @@ -34,5 +36,5 @@ end function SoftmaxCELayer:back_propagate(next_bp_err, bp_err, input, output) -- softmax output - label - next_bp_err[1]:add(self.soutput, input[1], 1.0, -1.0) + next_bp_err[1]:add(self.soutput, input[2], 1.0, -1.0) end diff --git a/nn/layer_dag.lua b/nn/layer_dag.lua index 8ea28a0..1ab18fa 100644 --- a/nn/layer_dag.lua +++ b/nn/layer_dag.lua @@ -219,6 +219,10 @@ function nerv.DAGLayer:back_propagate(next_bp_err, bp_err, input, output) self:set_outputs(output) for i = #self.queue, 1, -1 do local ref = self.queue[i] + -- print(ref.layer.id) ref.layer:back_propagate(ref.err_outputs, ref.err_inputs, ref.inputs, ref.outputs) + -- if #ref.err_outputs > 0 then + -- print(ref.err_outputs[1]) + -- end end end -- cgit v1.2.3-70-g09d2