add CombinerLayer to support branches in NN; add MSELayer

author: Determinant <ted.sybil@gmail.com> 2015-06-10 20:42:10 +0800
committer: Determinant <ted.sybil@gmail.com> 2015-06-10 20:42:10 +0800
commit: b818c2562d07a69083377cbc34f2add108e9fa66 (patch)
tree: a595ce4f269035951715334d2942d91d42ae236e
parent: c20af45d0756d5d3004105da10e51d42a382ad66 (diff)
11 files changed, 173 insertions, 17 deletions
diff --git a/Makefile b/Makefile
index cb694a2..448e003 100644
--- a/Makefile
+++ b/Makefile
@@ -8,12 +8,12 @@ LIBS := libnerv.so
 LUA_LIBS := matrix/init.lua io/init.lua nerv.lua \
 			pl/utils.lua pl/compat.lua \
 			layer/init.lua layer/affine.lua layer/sigmoid.lua layer/softmax_ce.lua \
-			layer/window.lua layer/bias.lua \
+			layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua \
 			nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \
 			io/sgd_buffer.lua
 INCLUDE := -I build/luajit-2.0/include/luajit-2.0/ -DLUA_USE_APICHECK
-CUDA_BASE := /usr/local/cuda-6.5
-#CUDA_BASE := /usr/local/cuda-5.0
+#CUDA_BASE := /usr/local/cuda-6.5
+CUDA_BASE := /usr/local/cuda-5.0
 CUDA_INCLUDE := -I $(CUDA_BASE)/include/
 INCLUDE += $(CUDA_INCLUDE)
 LDFLAGS := -L$(CUDA_BASE)/lib64/  -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcublas
diff --git a/common.h b/common.h
index 8be19b0..e21c7a5 100644
--- a/common.h
+++ b/common.h
@@ -26,6 +26,7 @@ typedef struct HashMap {
 HashMap *hashmap_create(size_t size, HashKey_t hfunc, HashMapCmp_t cmp);
 void *hashmap_getval(HashMap *h, const char *key);
 void hashmap_setval(HashMap *h, const char *key, void *val);
+void hashmap_clear(HashMap *h);
 
 size_t bkdr_hash(const char *key);
 
diff --git a/layer/combiner.lua b/layer/combiner.lua
new file mode 100644
index 0000000..2eac83c
--- /dev/null
+++ b/layer/combiner.lua
@@ -0,0 +1,55 @@
+local CombinerLayer = nerv.class('nerv.CombinerLayer', 'nerv.Layer')
+
+function CombinerLayer:__init(id, global_conf, layer_conf)
+    self.id = id
+    self.lambda = layer_conf.lambda
+    self.dim_in = layer_conf.dim_in
+    self.dim_out = layer_conf.dim_out
+    self.gconf = global_conf
+    self:check_dim_len(#self.lambda, -1)
+end
+
+function CombinerLayer:init()
+    local dim = self.dim_in[1]
+    for i = 2, #self.dim_in do
+        if self.dim_in[i] ~= dim then
+            nerv.error("mismatching dimensions of inputs")
+        end
+    end
+    for i = 1, #self.dim_out do
+        if self.dim_out[i] ~= dim then
+            nerv.error("mismatching dimensions of inputs/outputs")
+        end
+    end
+end
+
+function CombinerLayer:update(bp_err, input, output)
+end
+
+function CombinerLayer:propagate(input, output)
+    output[1]:fill(0)
+    for i = 1, #self.dim_in do
+        output[1]:add(output[1], input[i], 1.0, self.lambda[i])
+    end
+    for i = 2, #self.dim_out do
+        output[i]:copy_fromd(output[1]) 
+    end
+end
+
+function CombinerLayer:back_propagate(next_bp_err, bp_err, input, output)
+    local sum = bp_err[1]:create()
+    sum:fill(0)
+    for i = 1, #self.dim_out do
+        sum:add(sum, bp_err[i], 1.0, 1.0)
+    end
+    for i = 1, #self.dim_in do
+        local scale = nerv.CuMatrixFloat(sum:nrow(), 1)
+        scale:fill(self.lambda[i])
+        next_bp_err[i]:copy_fromd(sum)
+        next_bp_err[i]:scale_rows_by_col(scale)
+    end
+end
+
+function CombinerLayer:get_params()
+    return {self.lambda}
+end
diff --git a/layer/init.lua b/layer/init.lua
index 844f46b..169427d 100644
--- a/layer/init.lua
+++ b/layer/init.lua
@@ -71,3 +71,5 @@ require 'layer.sigmoid'
 require 'layer.softmax_ce'
 require 'layer.bias'
 require 'layer.window'
+require 'layer.mse'
+require 'layer.combiner'
diff --git a/layer/mse.lua b/layer/mse.lua
new file mode 100644
index 0000000..da5b24d
--- /dev/null
+++ b/layer/mse.lua
@@ -0,0 +1,52 @@
+local MSELayer = nerv.class("nerv.MSELayer", "nerv.Layer")
+
+function MSELayer:__init(id, global_conf, layer_conf)
+    self.id = id
+    self.dim_in = layer_conf.dim_in
+    self.dim_out = layer_conf.dim_out
+    self.gconf = global_conf
+    self:check_dim_len(2, -1)
+end
+
+function MSELayer:init()
+    if self.dim_in[1] ~= self.dim_in[2] then
+        nerv.error("mismatching dimensions of previous network output and labels")
+    end
+    self.total_mse = 0.0
+    self.total_frames = 0
+end
+
+function MSELayer:update(bp_err, input, output)
+    -- no params, therefore do nothing
+end
+
+function MSELayer:propagate(input, output)
+    local mse = input[1]:create()
+    mse:add(input[1], input[2], 1.0, -1.0)
+    self.diff = mse:create()
+    self.diff:copy_fromd(mse)
+    mse:mul_elem(mse, mse)
+    mse = mse:rowsum(mse)
+    local scale = nerv.CuMatrixFloat(mse:nrow(), 1)
+    scale:fill(1 / input[1]:ncol())
+    mse:scale_rows_by_col(scale)
+    if output[1] ~= nil then
+        output[1]:copy_fromd(mse)
+    end
+    self.total_mse = self.total_mse + mse:colsum()[0]
+    self.total_frames = self.total_frames + mse:nrow()
+end
+
+-- NOTE: must call propagate before back_propagate
+function MSELayer:back_propagate(next_bp_err, bp_err, input, output)
+    local nbe = next_bp_err[1]
+    nbe:copy_fromd(self.diff)
+    self.diff = nil
+    if bp_err[1] ~= nil then
+        nbe:scale_rows_by_col(bp_err[1])
+    end
+end
+
+function MSELayer:get_params()
+    return {}
+end
diff --git a/layer/softmax_ce.lua b/layer/softmax_ce.lua
index 2e1f5fb..7888540 100644
--- a/layer/softmax_ce.lua
+++ b/layer/softmax_ce.lua
@@ -36,8 +36,12 @@ function SoftmaxCELayer:propagate(input, output)
         label = label:decompress(input[1]:ncol())
     end
     ce:mul_elem(ce, label)
+    ce = ce:rowsum()
+    if output[1] ~= nil then
+        output[1]:copy_fromd(ce)
+    end
     -- add total ce
-    self.total_ce = self.total_ce - ce:rowsum():colsum()[0]
+    self.total_ce = self.total_ce - ce:colsum()[0]
     self.total_frames = self.total_frames + soutput:nrow()
     -- TODO: add colsame for uncompressed label
     if self.compressed then
@@ -51,7 +55,11 @@ function SoftmaxCELayer:back_propagate(next_bp_err, bp_err, input, output)
     if self.compressed then
         label = label:decompress(input[1]:ncol())
     end
-    next_bp_err[1]:add(self.soutput, label, 1.0, -1.0)
+    local nbe = next_bp_err[1]
+    nbe:add(self.soutput, label, 1.0, -1.0)
+    if bp_err[1] ~= nil then
+        nbe:scale_rows_by_col(bp_err[1])
+    end
 end
 
 function SoftmaxCELayer:get_params()
diff --git a/layer/window.lua b/layer/window.lua
index b381c9b..3a093f4 100644
--- a/layer/window.lua
+++ b/layer/window.lua
@@ -20,7 +20,7 @@ end
 
 function WindowLayer:propagate(input, output)
     output[1]:copy_fromd(input[1])
-    output[1]:scale_row(self.window.trans)
+    output[1]:scale_rows_by_row(self.window.trans)
 end
 
 function WindowLayer:get_params()
diff --git a/matrix/cukernel.h b/matrix/cukernel.h
index 23398c8..8a1494f 100644
--- a/matrix/cukernel.h
+++ b/matrix/cukernel.h
@@ -14,6 +14,7 @@ void cudak_(cuda_add_row)(const Matrix *a, Matrix *b, double beta);
 void cudak_(cuda_fill)(Matrix *a, double val);
 void cudak_(cuda_expand_frm)(const Matrix *a, Matrix *b, int context);
 void cudak_(cuda_rearrange_frm)(const Matrix *a, Matrix *b, int step);
-void cudak_(cuda_scale_row)(const Matrix *a, Matrix *b);
+void cudak_(cuda_scale_rows_by_row)(const Matrix *a, Matrix *b);
+void cudak_(cuda_scale_rows_by_col)(const Matrix *a, Matrix *b);
 void cudak_(cuda_decompress)(const Matrix *a, Matrix *b);
 #endif
diff --git a/matrix/generic/cukernel.cu b/matrix/generic/cukernel.cu
index ffae5ed..d6c8adc 100644
--- a/matrix/generic/cukernel.cu
+++ b/matrix/generic/cukernel.cu
@@ -237,9 +237,18 @@ __global__ void cudak_(rearrange_frm)(const MATRIX_ELEM *a, MATRIX_ELEM *b,
     b[j + i * stride] = a[j / step + (j % step) * orig_dim + i * stride];
 }
 
-__global__ void cudak_(scale_row)(const MATRIX_ELEM *a, MATRIX_ELEM *b,
-                                    int nrow, int ncol,
-                                    int stride) {
+__global__ void cudak_(scale_rows_by_col)(const MATRIX_ELEM *a, MATRIX_ELEM *b,
+                                        int nrow, int ncol,
+                                        int astride, int bstride) {
+    int j = blockIdx.x * blockDim.x + threadIdx.x;
+    int i = blockIdx.y * blockDim.y + threadIdx.y;
+    if (i >= nrow || j >= ncol) return;
+    b[j + i * bstride] *= a[i * astride];
+}
+
+__global__ void cudak_(scale_rows_by_row)(const MATRIX_ELEM *a, MATRIX_ELEM *b,
+                                        int nrow, int ncol,
+                                        int stride) {
     int j = blockIdx.x * blockDim.x + threadIdx.x;
     int i = blockIdx.y * blockDim.y + threadIdx.y;
     if (i >= nrow || j >= ncol) return;
@@ -526,11 +535,23 @@ extern "C" {
         cudaStreamSynchronize(0);
     }
 
-    void cudak_(cuda_scale_row)(const Matrix *a, Matrix *b) {
+    void cudak_(cuda_scale_rows_by_col)(const Matrix *a, Matrix *b) {
+        dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N);
+        dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x),
+                CEIL_DIV(b->nrow, threadsPerBlock.y));
+        cudak_(scale_rows_by_col)<<<numBlocks, threadsPerBlock>>> \
+            (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b),
+             b->nrow, b->ncol,
+             a->stride / sizeof(MATRIX_ELEM),
+             b->stride / sizeof(MATRIX_ELEM));
+        cudaStreamSynchronize(0);
+    }
+
+    void cudak_(cuda_scale_rows_by_row)(const Matrix *a, Matrix *b) {
         dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N);
         dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x),
                 CEIL_DIV(b->nrow, threadsPerBlock.y));
-        cudak_(scale_row)<<<numBlocks, threadsPerBlock>>> \
+        cudak_(scale_rows_by_row)<<<numBlocks, threadsPerBlock>>> \
             (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b),
              b->nrow, b->ncol, b->stride / sizeof(MATRIX_ELEM));
         cudaStreamSynchronize(0);
diff --git a/matrix/generic/cumatrix.c b/matrix/generic/cumatrix.c
index a8e18e0..b5d1a35 100644
--- a/matrix/generic/cumatrix.c
+++ b/matrix/generic/cumatrix.c
@@ -37,8 +37,8 @@ static int nerv_matrix_(add)(lua_State *L) {
     Matrix *c = luaT_checkudata(L, 1, nerv_matrix_(tname));
     Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname));
     Matrix *b = luaT_checkudata(L, 3, nerv_matrix_(tname));
-    MATRIX_ELEM alpha = luaL_checknumber(L, 4); /* alpha */
-    MATRIX_ELEM beta = luaL_checknumber(L, 5); /* alpha */
+    MATRIX_ELEM alpha = luaL_checknumber(L, 4);
+    MATRIX_ELEM beta = luaL_checknumber(L, 5);
     CHECK_SAME_DIMENSION(a, b);
     CHECK_SAME_DIMENSION(a, c);
     nerv_matrix_(add_)(L, a, b, c, alpha, beta);
@@ -396,7 +396,20 @@ static int nerv_matrix_(rearrange_frm)(lua_State *L) {
     return 0;
 }
 
-static int nerv_matrix_(scale_row)(lua_State *L) {
+static int nerv_matrix_(scale_rows_by_col)(lua_State *L) {
+    Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname));
+    Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname));
+    if (a->nrow != b->nrow)
+        nerv_error(L, "the number of rows is not the same");
+    if (b->ncol != 1)
+        nerv_error(L, "a column vector is expected");
+    PROFILE_START
+    cudak_(cuda_scale_rows_by_col)(b, a);
+    PROFILE_STOP
+    return 0;
+}
+
+static int nerv_matrix_(scale_rows_by_row)(lua_State *L) {
     Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname));
     Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname));
     if (a->ncol != b->ncol)
@@ -404,7 +417,7 @@ static int nerv_matrix_(scale_row)(lua_State *L) {
     if (b->nrow != 1)
         nerv_error(L, "a row vector is expected");
     PROFILE_START
-    cudak_(cuda_scale_row)(b, a);
+    cudak_(cuda_scale_rows_by_row)(b, a);
     PROFILE_STOP
     return 0;
 }
@@ -434,7 +447,8 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = {
     {"copy_rows_fromh_by_idx", nerv_matrix_(copy_rows_fromh_by_idx)},
     {"expand_frm", nerv_matrix_(expand_frm)},
     {"rearrange_frm", nerv_matrix_(rearrange_frm)},
-    {"scale_row", nerv_matrix_(scale_row)},
+    {"scale_rows_by_row", nerv_matrix_(scale_rows_by_row)},
+    {"scale_rows_by_col", nerv_matrix_(scale_rows_by_col)},
     {NULL, NULL}
 };
 
diff --git a/matrix/mmatrix.c b/matrix/mmatrix.c
index ffc058d..d1d68b9 100644
--- a/matrix/mmatrix.c
+++ b/matrix/mmatrix.c
@@ -1,10 +1,12 @@
 #define NERV_GENERIC_MMATRIX
+#include <stdlib.h>
 #include "../common.h"
 void nerv_matrix_host_float_init(lua_State *L);
 void nerv_matrix_host_double_init(lua_State *L);
 void nerv_matrix_host_int_init(lua_State *L);
 
 void nerv_mmatrix_init(lua_State *L) {
+    srand(1);
     nerv_matrix_host_float_init(L);
     nerv_matrix_host_double_init(L);
     nerv_matrix_host_int_init(L);
author	Determinant <ted.sybil@gmail.com>	2015-06-10 20:42:10 +0800
committer	Determinant <ted.sybil@gmail.com>	2015-06-10 20:42:10 +0800
commit	b818c2562d07a69083377cbc34f2add108e9fa66 (patch)
tree	a595ce4f269035951715334d2942d91d42ae236e
parent	c20af45d0756d5d3004105da10e51d42a382ad66 (diff)