From 0d3d8f4afdc38726b8ed933dbfcb85e759145c43 Mon Sep 17 00:00:00 2001 From: Determinant Date: Tue, 2 Jun 2015 12:51:18 +0800 Subject: add preprocessing layers and change layer constructor interface --- Makefile | 3 ++- examples/test_dnn_layers.lua | 34 +++++++++++++++++++--------------- layer/affine.lua | 31 ++++++++++++++++++++++--------- layer/bias.lua | 24 ++++++++++++++++++++++++ layer/init.lua | 13 +++++++++++++ layer/sigmoid.lua | 12 +++++++++--- layer/softmax_ce.lua | 16 +++++++++++----- layer/window.lua | 24 ++++++++++++++++++++++++ speech | 2 +- 9 files changed, 125 insertions(+), 34 deletions(-) create mode 100644 layer/bias.lua create mode 100644 layer/window.lua diff --git a/Makefile b/Makefile index 69fb739..3325b4d 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,8 @@ OBJS := nerv.o luaT.o common.o \ LIBS := libnerv.so LUA_LIBS := matrix/init.lua io/init.lua nerv.lua \ pl/utils.lua pl/compat.lua \ - layer/init.lua layer/affine.lua layer/sigmoid.lua layer/softmax_ce.lua + layer/init.lua layer/affine.lua layer/sigmoid.lua layer/softmax_ce.lua \ + layer/window.lua layer/bias.lua INCLUDE := -I build/luajit-2.0/include/luajit-2.0/ -DLUA_USE_APICHECK CUDA_BASE := /usr/local/cuda-6.5 CUDA_INCLUDE := -I $(CUDA_BASE)/include/ diff --git a/examples/test_dnn_layers.lua b/examples/test_dnn_layers.lua index 866e685..9be9d71 100644 --- a/examples/test_dnn_layers.lua +++ b/examples/test_dnn_layers.lua @@ -11,10 +11,14 @@ bp = pf:read_chunk("b", global_conf) -- print(bp.trans) -af = nerv.AffineLayer("test", global_conf, ltp, bp) -sg = nerv.SigmoidLayer("test2", global_conf) -sm = nerv.SoftmaxCELayer("test3", global_conf) - +af = nerv.AffineLayer("test", global_conf, {["ltp"] = ltp, + ["bp"] = bp, + dim_in = {429}, + dim_out = {2048}}) +sg = nerv.SigmoidLayer("test2", global_conf, {dim_in = {2048}, + dim_out = {2048}}) +sm = nerv.SoftmaxCELayer("test3", global_conf, {dim_in = {2048, 2048}, + dim_out = {}}) af:init() sg:init() sm:init() @@ -27,18 +31,18 @@ for i = 0, 9 do label[i][i] = 1.0 end -input1 = {[0] = df:read_chunk("input", global_conf).trans} -output1 = {[0] = nerv.CuMatrixFloat(10, 2048)} +input1 = {df:read_chunk("input", global_conf).trans} +output1 = {nerv.CuMatrixFloat(10, 2048)} input2 = output1 -output2 = {[0] = nerv.CuMatrixFloat(10, 2048)} -input3 = {[0] = output2[0], [1] = label} +output2 = {nerv.CuMatrixFloat(10, 2048)} +input3 = {output2[1], label} output3 = nil err_input1 = nil -err_output1 = {[0] = nerv.CuMatrixFloat(10, 2048)} +err_output1 = {nerv.CuMatrixFloat(10, 2048)} err_input2 = err_output1 -err_output2 = {[0] = nerv.CuMatrixFloat(10, 2048)} +err_output2 = {nerv.CuMatrixFloat(10, 2048)} err_input3 = err_output2 -err_output3 = {[0] = input1[0]:create()} +err_output3 = {input1[1]:create()} for i = 0, 3 do -- propagate @@ -59,13 +63,13 @@ for i = 0, 3 do print("output1") - print(output1[0]) + print(output1[1]) print("output2") - print(output2[0]) + print(output2[1]) print("err_output1") - print(err_output1[0]) + print(err_output1[1]) print("err_output2") - print(err_output2[0]) + print(err_output2[1]) nerv.utils.printf("cross entropy: %.8f\n", sm.total_ce) nerv.utils.printf("frames: %.8f\n", sm.total_frames) end diff --git a/layer/affine.lua b/layer/affine.lua index 573b98d..90a1d16 100644 --- a/layer/affine.lua +++ b/layer/affine.lua @@ -12,14 +12,27 @@ function MatrixParam:write(pfhandle) self.trans:new_to_host():save(pfhandle) end -function AffineLayer:__init(id, global_conf, ltp, bp) +function AffineLayer:__init(id, global_conf, layer_conf) self.id = id - self.ltp = ltp - self.bp = bp + self.ltp = layer_conf.ltp + self.bp = layer_conf.bp + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out self.gconf = global_conf + self:check_dim_len(1, 1) -- exactly one input and one output end function AffineLayer:init() + if self.ltp.trans:ncol() ~= self.bp.trans:ncol() then + nerv.error("mismatching dimensions of linear transform and bias paramter") + end + if self.dim_in[1] ~= self.ltp.trans:nrow() then + nerv.error("mismatching dimensions of linear transform parameter and input") + end + if self.dim_out[1] ~= self.ltp.trans:ncol() then + nerv.error("mismatching dimensions of linear transform parameter and output") + end + -- linear transform correction self.ltc = self.ltp.trans:create() self.ltc:fill(0) @@ -36,10 +49,10 @@ function nerv.AffineLayer:update(bp_err, input, output) local gconf = self.gconf -- momentum gain local mmt_gain = 1.0 / (1.0 - gconf.momentum); - local n = input[0]:nrow() * mmt_gain + local n = input[1]:nrow() * mmt_gain -- update corrections (accumulated errors) - ltc:mul(input[0], bp_err[0], 1.0, gconf.momentum, 'T', 'N') - bc:add(bc, bp_err[0]:colsum(), gconf.momentum, 1.0) + ltc:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N') + bc:add(bc, bp_err[1]:colsum(), gconf.momentum, 1.0) -- perform update ltp:add(ltp, ltc, 1.0, -gconf.lrate / n) bp:add(bp, bc, 1.0, -gconf.lrate / n) @@ -49,11 +62,11 @@ end function nerv.AffineLayer:propagate(input, output) -- apply linear transform - output[0]:mul(input[0], self.ltp.trans, 1.0, 0.0, 'N', 'N') + output[1]:mul(input[1], self.ltp.trans, 1.0, 0.0, 'N', 'N') -- add bias - output[0]:add_row(self.bp.trans, 1.0) + output[1]:add_row(self.bp.trans, 1.0) end function nerv.AffineLayer:back_propagate(next_bp_err, bp_err, input, output) - next_bp_err[0]:mul(bp_err[0], self.ltp.trans, 1.0, 0.0, 'N', 'T') + next_bp_err[1]:mul(bp_err[1], self.ltp.trans, 1.0, 0.0, 'N', 'T') end diff --git a/layer/bias.lua b/layer/bias.lua new file mode 100644 index 0000000..6ddfe11 --- /dev/null +++ b/layer/bias.lua @@ -0,0 +1,24 @@ +local BiasLayer = nerv.class("nerv.BiasLayer", "nerv.Layer") + +function BiasLayer:__init(id, global_conf, layer_conf) + self.id = id + self.gconf = global_conf + self.bias = layer_conf.bias + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self:check_dim_len(1, 1) +end + +function BiasLayer:init() + if self.dim_in[1] ~= self.bias.trans:ncol() then + nerv.error("mismatching dimensions of input and bias parameter") + end + if self.dim_out[1] ~= self.bias.trans:ncol() then + nerv.error("mismatching dimensions of output and bias parameter") + end +end + +function BiasLayer:propagate(input, output) + output[1]:copy_fromd(input[1]) + output[1]:add_row(self.bias.trans, 1.0) +end diff --git a/layer/init.lua b/layer/init.lua index a98621d..4881cb7 100644 --- a/layer/init.lua +++ b/layer/init.lua @@ -44,3 +44,16 @@ end function nerv.Layer:back_propagate(next_bp_err, bp_err, input, output) nerv.error_method_not_implemented() end + +function nerv.Layer:check_dim_len(len_in, len_out) + local expected_in = table.getn(self.dim_in) + local expected_out = table.getn(self.dim_out) + if len_in > 0 and expected_in ~= len_in then + nerv.error("layer %s expects %d inputs, %d given", + self.id, len_in, expected_in) + end + if len_out > 0 and expected_out ~= len_out then + nerv.error("layer %s expects %d outputs, %d given", + self.id, len_out, expected_out) + end +end diff --git a/layer/sigmoid.lua b/layer/sigmoid.lua index ca34419..220b7af 100644 --- a/layer/sigmoid.lua +++ b/layer/sigmoid.lua @@ -1,11 +1,17 @@ local SigmoidLayer = nerv.class("nerv.SigmoidLayer", "nerv.Layer") -function SigmoidLayer:__init(id, global_conf) +function SigmoidLayer:__init(id, global_conf, layer_conf) self.id = id self.gconf = global_conf + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self:check_dim_len(1, 1) end function SigmoidLayer:init() + if self.dim_in[1] ~= self.dim_out[1] then + nerv.error("mismatching dimensions of input and output") + end end function SigmoidLayer:update(bp_err, input, output) @@ -13,9 +19,9 @@ function SigmoidLayer:update(bp_err, input, output) end function SigmoidLayer:propagate(input, output) - output[0]:sigmoid(input[0]) + output[1]:sigmoid(input[1]) end function SigmoidLayer:back_propagate(next_bp_err, bp_err, input, output) - next_bp_err[0]:sigmoid_grad(bp_err[0], output[0]) + next_bp_err[1]:sigmoid_grad(bp_err[1], output[1]) end diff --git a/layer/softmax_ce.lua b/layer/softmax_ce.lua index 37d2864..3dfebc5 100644 --- a/layer/softmax_ce.lua +++ b/layer/softmax_ce.lua @@ -1,11 +1,17 @@ local SoftmaxCELayer = nerv.class("nerv.SoftmaxCELayer", "nerv.Layer") -function SoftmaxCELayer:__init(id, global_conf) +function SoftmaxCELayer:__init(id, global_conf, layer_conf) self.id = id self.gconf = global_conf + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self:check_dim_len(2, -1) -- two inputs: nn output and label end function SoftmaxCELayer:init() + if self.dim_in[1] ~= self.dim_in[1] then + nerv.error("mismatching dimensions of previous network output and labels") + end self.total_ce = 0.0 self.total_frames = 0 end @@ -15,12 +21,12 @@ function SoftmaxCELayer:update(bp_err, input, output) end function SoftmaxCELayer:propagate(input, output) - local soutput = input[0]:create() -- temporary value for calc softmax + local soutput = input[1]:create() -- temporary value for calc softmax self.soutput = soutput - soutput:softmax(input[0]) + soutput:softmax(input[1]) local ce = soutput:create() ce:log_elem(soutput) - ce:mul_elem(ce, input[1]) + ce:mul_elem(ce, input[2]) -- add total ce self.total_ce = self.total_ce - ce:rowsum():colsum()[0] self.total_frames = self.total_frames + soutput:nrow() @@ -28,5 +34,5 @@ end function SoftmaxCELayer:back_propagate(next_bp_err, bp_err, input, output) -- softmax output - label - next_bp_err[0]:add(self.soutput, input[1], 1.0, -1.0) + next_bp_err[1]:add(self.soutput, input[1], 1.0, -1.0) end diff --git a/layer/window.lua b/layer/window.lua new file mode 100644 index 0000000..8e9e761 --- /dev/null +++ b/layer/window.lua @@ -0,0 +1,24 @@ +local WindowLayer = nerv.class("nerv.WindowLayer", "nerv.Layer") + +function WindowLayer:__init(id, global_conf, layer_conf) + self.id = id + self.gconf = global_conf + self.window = layer_conf.window + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self:check_dim_len(1, 1) +end + +function WindowLayer:init() + if self.dim_in[1] ~= self.window.trans:ncol() then + nerv.error("mismatching dimensions of input and window parameter") + end + if self.dim_out[1] ~= self.window.trans:ncol() then + nerv.error("mismatching dimensions of output and window parameter") + end +end + +function WindowLayer:propagate(input, output) + output[1]:copy_fromd(input[1]) + output[1]:scale_row(self.window.trans) +end diff --git a/speech b/speech index d8ea67e..821aec3 160000 --- a/speech +++ b/speech @@ -1 +1 @@ -Subproject commit d8ea67ee420c2fc73085da04de86df023acd98d7 +Subproject commit 821aec314824b89e9fe9c3ee467793a05ed89ee5 -- cgit v1.2.3-70-g09d2 From 74d9e9e7371c80394698fb9805cbf0cbde67a8f3 Mon Sep 17 00:00:00 2001 From: Determinant Date: Tue, 2 Jun 2015 20:28:16 +0800 Subject: add ParamRepo, LayerRepo, DAGLayer --- Makefile | 5 +- examples/test_dnn_layers.lua | 9 +- examples/test_nn_lib.lua | 63 ++++++++++++ io/init.lua | 2 +- layer/init.lua | 14 ++- nerv.lua | 5 + nn/init.lua | 3 + nn/layer_dag.lua | 224 +++++++++++++++++++++++++++++++++++++++++++ nn/layer_repo.lua | 34 +++++++ nn/param_repo.lua | 26 +++++ speech | 2 +- 11 files changed, 376 insertions(+), 11 deletions(-) create mode 100644 examples/test_nn_lib.lua create mode 100644 nn/init.lua create mode 100644 nn/layer_dag.lua create mode 100644 nn/layer_repo.lua create mode 100644 nn/param_repo.lua diff --git a/Makefile b/Makefile index 3325b4d..934235f 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,8 @@ LIBS := libnerv.so LUA_LIBS := matrix/init.lua io/init.lua nerv.lua \ pl/utils.lua pl/compat.lua \ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/softmax_ce.lua \ - layer/window.lua layer/bias.lua + layer/window.lua layer/bias.lua \ + nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua INCLUDE := -I build/luajit-2.0/include/luajit-2.0/ -DLUA_USE_APICHECK CUDA_BASE := /usr/local/cuda-6.5 CUDA_INCLUDE := -I $(CUDA_BASE)/include/ @@ -18,7 +19,7 @@ CFLAGS := -Wall -Wextra OBJ_DIR := $(BUILD_DIR)/objs LUA_DIR := $(BUILD_DIR)/lua LIB_DIR := $(BUILD_DIR)/lib -SUBDIR := matrix io layer examples pl +SUBDIR := matrix io layer examples pl nn NVCC := $(CUDA_BASE)/bin/nvcc NVCC_FLAGS := -Xcompiler -fPIC,-Wall,-Wextra diff --git a/examples/test_dnn_layers.lua b/examples/test_dnn_layers.lua index 9be9d71..6e4d98d 100644 --- a/examples/test_dnn_layers.lua +++ b/examples/test_dnn_layers.lua @@ -50,15 +50,14 @@ for i = 0, 3 do sg:propagate(input2, output2) sm:propagate(input3, output3) - -- back_propagate sm:back_propagate(err_output1, err_input1, input3, output3) - sm:update(err_input1, input3, output3) - sg:back_propagate(err_output2, err_input2, input2, output2) - sg:update(err_input2, input2, output2) - af:back_propagate(err_output3, err_input3, input1, output1) + + -- update + sm:update(err_input1, input3, output3) + sg:update(err_input2, input2, output2) af:update(err_input3, input1, output1) diff --git a/examples/test_nn_lib.lua b/examples/test_nn_lib.lua new file mode 100644 index 0000000..fd7167a --- /dev/null +++ b/examples/test_nn_lib.lua @@ -0,0 +1,63 @@ +require 'layer.affine' +require 'layer.sigmoid' +require 'layer.softmax_ce' + +gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, + mat_type = nerv.CuMatrixFloat, + batch_size = 10} + +param_repo = nerv.ParamRepo({"affine.param"}) +sublayer_repo = nerv.LayerRepo( + { + ["nerv.AffineLayer"] = + { + affine1 = {{ltp = "a", bp = "b"}, {dim_in = {429}, dim_out = {2048}}} + }, + ["nerv.SigmoidLayer"] = + { + sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}} + }, + ["nerv.SoftmaxCELayer"] = + { + softmax_ce1 = {{}, {dim_in = {2048, 2048}, dim_out = {}}} + } + }, param_repo, gconf) + +layer_repo = nerv.LayerRepo( + { + ["nerv.DAGLayer"] = + { + main = {{}, { + dim_in = {429, 2048}, dim_out = {}, + sub_layers = sublayer_repo, + connections = { + ["[1]"] = "affine1[1]", + ["affine1[1]"] = "sigmoid1[1]", + ["sigmoid1[1]"] = "softmax_ce1[1]", + ["[2]"] = "softmax_ce1[2]" + } + }} + } + }, param_repo, gconf) + +df = nerv.ChunkFile("input.param", "r") +label = nerv.CuMatrixFloat(10, 2048) +label:fill(0) +for i = 0, 9 do + label[i][i] = 1.0 +end + +input = {df:read_chunk("input", gconf).trans, label} +output = {} +err_input = {} +err_output = {input[1]:create()} +sm = sublayer_repo:get_layer("softmax_ce1") +main = layer_repo:get_layer("main") +main:init() +for i = 0, 3 do + main:propagate(input, output) + main:back_propagate(err_output, err_input, input, output) + main:update(err_input, input, output) + nerv.utils.printf("cross entropy: %.8f\n", sm.total_ce) + nerv.utils.printf("frames: %.8f\n", sm.total_frames) +end diff --git a/io/init.lua b/io/init.lua index 7c312f4..4a663a7 100644 --- a/io/init.lua +++ b/io/init.lua @@ -22,7 +22,7 @@ function nerv.ChunkFile:read_chunk(id, global_conf) if metadata == nil then nerv.error("chunk with id %s does not exist", id) end - local chunk_type = assert(loadstring("return " .. metadata.type))() + local chunk_type = nerv.get_type(metadata.type) local chunk = chunk_type(id, global_conf) chunk:set_info(metadata.info) chunk:read(self:get_chunkdata(id)) diff --git a/layer/init.lua b/layer/init.lua index 4881cb7..c8c691b 100644 --- a/layer/init.lua +++ b/layer/init.lua @@ -46,8 +46,8 @@ function nerv.Layer:back_propagate(next_bp_err, bp_err, input, output) end function nerv.Layer:check_dim_len(len_in, len_out) - local expected_in = table.getn(self.dim_in) - local expected_out = table.getn(self.dim_out) + local expected_in = #self.dim_in + local expected_out = #self.dim_out if len_in > 0 and expected_in ~= len_in then nerv.error("layer %s expects %d inputs, %d given", self.id, len_in, expected_in) @@ -57,3 +57,13 @@ function nerv.Layer:check_dim_len(len_in, len_out) self.id, len_out, expected_out) end end + +function nerv.Layer:get_dim() + return self.dim_in, self.dim_out +end + +require 'layer.affine' +require 'layer.sigmoid' +require 'layer.softmax_ce' +require 'layer.bias' +require 'layer.window' diff --git a/nerv.lua b/nerv.lua index 00042a7..cb53f29 100644 --- a/nerv.lua +++ b/nerv.lua @@ -71,6 +71,11 @@ function table.tostring(tbl) return "{" .. table.concat(result, ",") .. "}" end +function nerv.get_type(typename) + return assert(loadstring("return " .. typename))() +end + require 'matrix.init' require 'io.init' require 'layer.init' +require 'nn.init' diff --git a/nn/init.lua b/nn/init.lua new file mode 100644 index 0000000..1bafa77 --- /dev/null +++ b/nn/init.lua @@ -0,0 +1,3 @@ +require 'nn.layer_repo' +require 'nn.param_repo' +require 'nn.layer_dag' diff --git a/nn/layer_dag.lua b/nn/layer_dag.lua new file mode 100644 index 0000000..8ea28a0 --- /dev/null +++ b/nn/layer_dag.lua @@ -0,0 +1,224 @@ +local DAGLayer = nerv.class("nerv.DAGLayer", "nerv.Layer") + +local function parse_id(str) + local id, port, _ + _, _, id, port = string.find(str, "([a-zA-Z0-9_]+)%[([0-9]+)%]") + if id == nil or port == nil then + _, _, id, port = string.find(str, "(.+)%[([0-9]+)%]") + if not (id == "" or id == "") then + nerv.error("wrong format of connection id") + end + end + port = tonumber(port) + return id, port +end + +local function discover(id, layers, layer_repo) + local ref = layers[id] + if id == "" or id == "" then + return nil + end + if ref == nil then + local layer = layer_repo:get_layer(id) + local dim_in, dim_out = layer:get_dim() + ref = { + layer = layer, + inputs = {}, + outputs = {}, + err_inputs = {}, + err_outputs = {}, + next_layers = {}, + input_len = #dim_in, + output_len = #dim_out, + in_deg = 0, + visited = false + } + layers[id] = ref + end + return ref +end + +function nerv.DAGLayer:__init(id, global_conf, layer_conf) + local layers = {} + local inputs = {} + local outputs = {} + local dim_in = layer_conf.dim_in + local dim_out = layer_conf.dim_out + for from, to in pairs(layer_conf.connections) do + local id_from, port_from = parse_id(from) + local id_to, port_to = parse_id(to) + local ref_from = discover(id_from, layers, layer_conf.sub_layers) + local ref_to = discover(id_to, layers, layer_conf.sub_layers) + local input_dim, output_dim, _ + if ref_from and ref_from.outputs[port_from] ~= nil then + nerv.error("%s has already been attached", from) + end + if ref_to and ref_to.inputs[port_to] ~= nil then + nerv.error("%s has already been attached", to) + end + if id_from == "" then + input_dim, _ = ref_to.layer:get_dim() + if dim_in[port_from] ~= input_dim[port_to] then + nerv.error("mismatching data dimension between %s and %s", from, to) + end + inputs[port_from] = {ref_to, port_to} + ref_to.inputs[port_to] = inputs -- just a place holder + elseif id_to == "" then + _, output_dim = ref_from.layer:get_dim() + if output_dim[port_from] ~= dim_out[port_to] then + nerv.error("mismatching data dimension between %s and %s", from, to) + end + outputs[port_to] = {ref_from, port_from} + ref_from.outputs[port_from] = outputs -- just a place holder + else + _, output_dim = ref_from.layer:get_dim() + input_dim, _ = ref_to.layer:get_dim() + if output_dim[port_from] ~= input_dim[port_to] then + nerv.error("mismatching data dimension between %s and %s", from, to) + end + local mid = global_conf.mat_type(global_conf.batch_size, + output_dim[port_from]) + local err_mid = mid:create() + + ref_from.outputs[port_from] = mid + ref_to.inputs[port_to] = mid + + ref_from.err_inputs[port_from] = err_mid + ref_to.err_outputs[port_to] = err_mid + + table.insert(ref_from.next_layers, ref_to) -- add edge + ref_to.in_deg = ref_to.in_deg + 1 -- increase the in-degree of the target layer + end + end + self.layers = layers + self.inputs = inputs + self.outputs = outputs + self.dim_in = dim_in + self.dim_out = dim_out +end + +function nerv.DAGLayer:init(id) -- topology sort + local queue = {} + local l = 1 + local r = 1 + for id, ref in pairs(self.layers) do + if ref.in_deg == 0 then + table.insert(queue, ref) + nerv.utils.printf("adding source layer: %s\n", id) + r = r + 1 + end + end + if l == r then + nerv.error("loop detected") + end + while l < r do + local cur = queue[l] + cur.visited = true + l = l + 1 + for _, nl in pairs(cur.next_layers) do + nl.in_deg = nl.in_deg - 1 + if nl.in_deg == 0 then + table.insert(queue, nl) + r = r + 1 + end + end + end + for i = 1, #queue do + nerv.utils.printf("queued layer: %s\n", queue[i].layer.id) + end + self.queue = queue + for id, ref in pairs(self.layers) do + -- check wether the graph is connected + if ref.visited == false then + nerv.utils.printf("warning: layer %s is ignored\n", id) + end + for i = 1, ref.input_len do + if ref.inputs[i] == nil then + nerv.error("dangling port %d of layer %s", i, id) + end + end + for i = 1, ref.output_len do + if ref.outputs[i] == nil then + nerv.error("dangling port %d of layer %s", i, id) + end + end + -- initialize sub layers + ref.layer:init() + end + for i = 1, #self.dim_in do + if self.inputs[i] == nil then + nerv.error("dangling port %d of layer ", i) + end + end + for i = 1, #self.dim_out do + if self.outputs[i] == nil then + nerv.error("dangling port %d of layer ", i) + end + end +end + +function nerv.DAGLayer:set_inputs(input) + for i = 1, #self.dim_in do + local layer = self.inputs[i][1] + local port = self.inputs[i][2] + layer.inputs[port] = input[i] + end +end + +function nerv.DAGLayer:set_outputs(output) + for i = 1, #self.dim_out do + local layer = self.outputs[i][1] + local port = self.outputs[i][2] + layer.outputs[port] = output[i] + end +end + +function nerv.DAGLayer:set_err_inputs(bp_err) + for i = 1, #self.dim_out do + local layer = self.outputs[i][1] + local port = self.outputs[i][2] + layer.err_inputs[port] = bp_err[i] + end +end + +function nerv.DAGLayer:set_err_outputs(next_bp_err) + for i = 1, #self.dim_in do + local layer = self.inputs[i][1] + local port = self.inputs[i][2] + layer.err_outputs[port] = next_bp_err[i] + end +end + +function nerv.DAGLayer:update(bp_err, input, output) + self:set_err_inputs(bp_err) + self:set_inputs(input) + self:set_outputs(output) + for id, ref in pairs(self.queue) do + ref.layer:update(ref.err_inputs, ref.inputs, ref.outputs) + end +end + +function nerv.DAGLayer:propagate(input, output) + self:set_inputs(input) + self:set_outputs(output) + for i = 1, #self.queue do + local ref = self.queue[i] + --[[ + print(ref.inputs[1]) + print(ref.outputs[1]) + print(#ref.inputs, #ref.outputs) + --]] + ref.layer:propagate(ref.inputs, ref.outputs) + end +end + +function nerv.DAGLayer:back_propagate(next_bp_err, bp_err, input, output) + self:set_err_outputs(next_bp_err) + self:set_err_inputs(bp_err) + self:set_inputs(input) + self:set_outputs(output) + for i = #self.queue, 1, -1 do + local ref = self.queue[i] + ref.layer:back_propagate(ref.err_outputs, ref.err_inputs, ref.inputs, ref.outputs) + end +end diff --git a/nn/layer_repo.lua b/nn/layer_repo.lua new file mode 100644 index 0000000..b1d2248 --- /dev/null +++ b/nn/layer_repo.lua @@ -0,0 +1,34 @@ +local LayerRepo = nerv.class("nerv.LayerRepo") + +function LayerRepo:__init(layer_spec, param_repo, global_conf) + local layers = {} + for ltype, llist in pairs(layer_spec) do + local layer_type = nerv.get_type(ltype) + for id, spec in pairs(llist) do + if layers[id] ~= nil then + nerv.error("a layer with id %s already exists", id) + end + nerv.utils.printf("id: %s\n", id) + if type(spec[2]) ~= "table" then + nerv.error("layer config table is need") + end + layer_config = spec[2] + if type(spec[1]) ~= "table" then + nerv.error("parameter description table is needed") + end + for pname, pid in pairs(spec[1]) do + layer_config[pname] = param_repo:get_param(pid, global_conf) + end + layers[id] = layer_type(id, global_conf, layer_config) + end + end + self.layers = layers +end + +function LayerRepo:get_layer(lid) + local layer = self.layers[lid] + if layer == nil then + nerv.error("layer with id %s not found", lid) + end + return layer +end diff --git a/nn/param_repo.lua b/nn/param_repo.lua new file mode 100644 index 0000000..3e37c31 --- /dev/null +++ b/nn/param_repo.lua @@ -0,0 +1,26 @@ +local ParamRepo = nerv.class("nerv.ParamRepo") + +function ParamRepo:__init(param_files) + local param_table = {} + if type(param_files) ~= "table" then + nerv.error("param file table is need") + end + for i = 1, #param_files do + local pf = nerv.ChunkFile(param_files[i], "r") + for cid, cspec in pairs(pf.metadata) do + if param_table[cid] ~= nil then + nerv.error("conflicting chunk id in param files") + end + param_table[cid] = pf + end + end + self.param_table = param_table +end + +function ParamRepo:get_param(pid, global_conf) + local pf = self.param_table[pid] + if pf == nil then + nerv.error("param with id %s not found", pid) + end + return pf:read_chunk(pid, global_conf) +end diff --git a/speech b/speech index 821aec3..0c6ca6a 160000 --- a/speech +++ b/speech @@ -1 +1 @@ -Subproject commit 821aec314824b89e9fe9c3ee467793a05ed89ee5 +Subproject commit 0c6ca6a17f06821cd5d612f489ca6cb68c2c4d5b -- cgit v1.2.3-70-g09d2 From 08a52c03a77ce13ae4f6a4deb06ab0ae274d399a Mon Sep 17 00:00:00 2001 From: Determinant Date: Tue, 2 Jun 2015 23:07:15 +0800 Subject: fix a bug: input[1] should be input[2] (since Lua arrays are 1-based) --- examples/test_nn_lib.lua | 60 +++++++++++++++++++++++++++++++++++++----------- layer/softmax_ce.lua | 4 +++- nn/layer_dag.lua | 4 ++++ 3 files changed, 54 insertions(+), 14 deletions(-) diff --git a/examples/test_nn_lib.lua b/examples/test_nn_lib.lua index fd7167a..ec338fe 100644 --- a/examples/test_nn_lib.lua +++ b/examples/test_nn_lib.lua @@ -1,25 +1,46 @@ -require 'layer.affine' -require 'layer.sigmoid' -require 'layer.softmax_ce' +-- require 'layer.affine' +-- require 'layer.sigmoid' +-- require 'layer.softmax_ce' gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, mat_type = nerv.CuMatrixFloat, batch_size = 10} -param_repo = nerv.ParamRepo({"affine.param"}) +param_repo = nerv.ParamRepo({"converted.nerv"}) sublayer_repo = nerv.LayerRepo( { ["nerv.AffineLayer"] = { - affine1 = {{ltp = "a", bp = "b"}, {dim_in = {429}, dim_out = {2048}}} + affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, + {dim_in = {429}, dim_out = {2048}}}, + affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, + {dim_in = {2048}, dim_out = {3001}}} }, ["nerv.SigmoidLayer"] = { - sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}} + sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} }, ["nerv.SoftmaxCELayer"] = { - softmax_ce1 = {{}, {dim_in = {2048, 2048}, dim_out = {}}} + softmax_ce0 = {{}, {dim_in = {3001, 3001}, dim_out = {}}} } }, param_repo, gconf) @@ -28,20 +49,33 @@ layer_repo = nerv.LayerRepo( ["nerv.DAGLayer"] = { main = {{}, { - dim_in = {429, 2048}, dim_out = {}, + dim_in = {429, 3001}, dim_out = {}, sub_layers = sublayer_repo, connections = { - ["[1]"] = "affine1[1]", + ["[1]"] = "affine0[1]", + ["affine0[1]"] = "sigmoid0[1]", + ["sigmoid0[1]"] = "affine1[1]", ["affine1[1]"] = "sigmoid1[1]", - ["sigmoid1[1]"] = "softmax_ce1[1]", - ["[2]"] = "softmax_ce1[2]" + ["sigmoid1[1]"] = "affine2[1]", + ["affine2[1]"] = "sigmoid2[1]", + ["sigmoid2[1]"] = "affine3[1]", + ["affine3[1]"] = "sigmoid3[1]", + ["sigmoid3[1]"] = "affine4[1]", + ["affine4[1]"] = "sigmoid4[1]", + ["sigmoid4[1]"] = "affine5[1]", + ["affine5[1]"] = "sigmoid5[1]", + ["sigmoid5[1]"] = "affine6[1]", + ["affine6[1]"] = "sigmoid6[1]", + ["sigmoid6[1]"] = "affine7[1]", + ["affine7[1]"] = "softmax_ce0[1]", + ["[2]"] = "softmax_ce0[2]" } }} } }, param_repo, gconf) df = nerv.ChunkFile("input.param", "r") -label = nerv.CuMatrixFloat(10, 2048) +label = nerv.CuMatrixFloat(10, 3001) label:fill(0) for i = 0, 9 do label[i][i] = 1.0 @@ -51,7 +85,7 @@ input = {df:read_chunk("input", gconf).trans, label} output = {} err_input = {} err_output = {input[1]:create()} -sm = sublayer_repo:get_layer("softmax_ce1") +sm = sublayer_repo:get_layer("softmax_ce0") main = layer_repo:get_layer("main") main:init() for i = 0, 3 do diff --git a/layer/softmax_ce.lua b/layer/softmax_ce.lua index 3dfebc5..09eb3a9 100644 --- a/layer/softmax_ce.lua +++ b/layer/softmax_ce.lua @@ -27,6 +27,8 @@ function SoftmaxCELayer:propagate(input, output) local ce = soutput:create() ce:log_elem(soutput) ce:mul_elem(ce, input[2]) +-- print(input[1][0]) +-- print(soutput[1][0]) -- add total ce self.total_ce = self.total_ce - ce:rowsum():colsum()[0] self.total_frames = self.total_frames + soutput:nrow() @@ -34,5 +36,5 @@ end function SoftmaxCELayer:back_propagate(next_bp_err, bp_err, input, output) -- softmax output - label - next_bp_err[1]:add(self.soutput, input[1], 1.0, -1.0) + next_bp_err[1]:add(self.soutput, input[2], 1.0, -1.0) end diff --git a/nn/layer_dag.lua b/nn/layer_dag.lua index 8ea28a0..1ab18fa 100644 --- a/nn/layer_dag.lua +++ b/nn/layer_dag.lua @@ -219,6 +219,10 @@ function nerv.DAGLayer:back_propagate(next_bp_err, bp_err, input, output) self:set_outputs(output) for i = #self.queue, 1, -1 do local ref = self.queue[i] + -- print(ref.layer.id) ref.layer:back_propagate(ref.err_outputs, ref.err_inputs, ref.inputs, ref.outputs) + -- if #ref.err_outputs > 0 then + -- print(ref.err_outputs[1]) + -- end end end -- cgit v1.2.3-70-g09d2