summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile6
-rw-r--r--common.h1
-rw-r--r--layer/combiner.lua55
-rw-r--r--layer/init.lua2
-rw-r--r--layer/mse.lua52
-rw-r--r--layer/softmax_ce.lua12
-rw-r--r--layer/window.lua2
-rw-r--r--matrix/cukernel.h3
-rw-r--r--matrix/generic/cukernel.cu31
-rw-r--r--matrix/generic/cumatrix.c24
-rw-r--r--matrix/mmatrix.c2
11 files changed, 173 insertions, 17 deletions
diff --git a/Makefile b/Makefile
index cb694a2..448e003 100644
--- a/Makefile
+++ b/Makefile
@@ -8,12 +8,12 @@ LIBS := libnerv.so
LUA_LIBS := matrix/init.lua io/init.lua nerv.lua \
pl/utils.lua pl/compat.lua \
layer/init.lua layer/affine.lua layer/sigmoid.lua layer/softmax_ce.lua \
- layer/window.lua layer/bias.lua \
+ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua \
nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \
io/sgd_buffer.lua
INCLUDE := -I build/luajit-2.0/include/luajit-2.0/ -DLUA_USE_APICHECK
-CUDA_BASE := /usr/local/cuda-6.5
-#CUDA_BASE := /usr/local/cuda-5.0
+#CUDA_BASE := /usr/local/cuda-6.5
+CUDA_BASE := /usr/local/cuda-5.0
CUDA_INCLUDE := -I $(CUDA_BASE)/include/
INCLUDE += $(CUDA_INCLUDE)
LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcublas
diff --git a/common.h b/common.h
index 8be19b0..e21c7a5 100644
--- a/common.h
+++ b/common.h
@@ -26,6 +26,7 @@ typedef struct HashMap {
HashMap *hashmap_create(size_t size, HashKey_t hfunc, HashMapCmp_t cmp);
void *hashmap_getval(HashMap *h, const char *key);
void hashmap_setval(HashMap *h, const char *key, void *val);
+void hashmap_clear(HashMap *h);
size_t bkdr_hash(const char *key);
diff --git a/layer/combiner.lua b/layer/combiner.lua
new file mode 100644
index 0000000..2eac83c
--- /dev/null
+++ b/layer/combiner.lua
@@ -0,0 +1,55 @@
+local CombinerLayer = nerv.class('nerv.CombinerLayer', 'nerv.Layer')
+
+function CombinerLayer:__init(id, global_conf, layer_conf)
+ self.id = id
+ self.lambda = layer_conf.lambda
+ self.dim_in = layer_conf.dim_in
+ self.dim_out = layer_conf.dim_out
+ self.gconf = global_conf
+ self:check_dim_len(#self.lambda, -1)
+end
+
+function CombinerLayer:init()
+ local dim = self.dim_in[1]
+ for i = 2, #self.dim_in do
+ if self.dim_in[i] ~= dim then
+ nerv.error("mismatching dimensions of inputs")
+ end
+ end
+ for i = 1, #self.dim_out do
+ if self.dim_out[i] ~= dim then
+ nerv.error("mismatching dimensions of inputs/outputs")
+ end
+ end
+end
+
+function CombinerLayer:update(bp_err, input, output)
+end
+
+function CombinerLayer:propagate(input, output)
+ output[1]:fill(0)
+ for i = 1, #self.dim_in do
+ output[1]:add(output[1], input[i], 1.0, self.lambda[i])
+ end
+ for i = 2, #self.dim_out do
+ output[i]:copy_fromd(output[1])
+ end
+end
+
+function CombinerLayer:back_propagate(next_bp_err, bp_err, input, output)
+ local sum = bp_err[1]:create()
+ sum:fill(0)
+ for i = 1, #self.dim_out do
+ sum:add(sum, bp_err[i], 1.0, 1.0)
+ end
+ for i = 1, #self.dim_in do
+ local scale = nerv.CuMatrixFloat(sum:nrow(), 1)
+ scale:fill(self.lambda[i])
+ next_bp_err[i]:copy_fromd(sum)
+ next_bp_err[i]:scale_rows_by_col(scale)
+ end
+end
+
+function CombinerLayer:get_params()
+ return {self.lambda}
+end
diff --git a/layer/init.lua b/layer/init.lua
index 844f46b..169427d 100644
--- a/layer/init.lua
+++ b/layer/init.lua
@@ -71,3 +71,5 @@ require 'layer.sigmoid'
require 'layer.softmax_ce'
require 'layer.bias'
require 'layer.window'
+require 'layer.mse'
+require 'layer.combiner'
diff --git a/layer/mse.lua b/layer/mse.lua
new file mode 100644
index 0000000..da5b24d
--- /dev/null
+++ b/layer/mse.lua
@@ -0,0 +1,52 @@
+local MSELayer = nerv.class("nerv.MSELayer", "nerv.Layer")
+
+function MSELayer:__init(id, global_conf, layer_conf)
+ self.id = id
+ self.dim_in = layer_conf.dim_in
+ self.dim_out = layer_conf.dim_out
+ self.gconf = global_conf
+ self:check_dim_len(2, -1)
+end
+
+function MSELayer:init()
+ if self.dim_in[1] ~= self.dim_in[2] then
+ nerv.error("mismatching dimensions of previous network output and labels")
+ end
+ self.total_mse = 0.0
+ self.total_frames = 0
+end
+
+function MSELayer:update(bp_err, input, output)
+ -- no params, therefore do nothing
+end
+
+function MSELayer:propagate(input, output)
+ local mse = input[1]:create()
+ mse:add(input[1], input[2], 1.0, -1.0)
+ self.diff = mse:create()
+ self.diff:copy_fromd(mse)
+ mse:mul_elem(mse, mse)
+ mse = mse:rowsum(mse)
+ local scale = nerv.CuMatrixFloat(mse:nrow(), 1)
+ scale:fill(1 / input[1]:ncol())
+ mse:scale_rows_by_col(scale)
+ if output[1] ~= nil then
+ output[1]:copy_fromd(mse)
+ end
+ self.total_mse = self.total_mse + mse:colsum()[0]
+ self.total_frames = self.total_frames + mse:nrow()
+end
+
+-- NOTE: must call propagate before back_propagate
+function MSELayer:back_propagate(next_bp_err, bp_err, input, output)
+ local nbe = next_bp_err[1]
+ nbe:copy_fromd(self.diff)
+ self.diff = nil
+ if bp_err[1] ~= nil then
+ nbe:scale_rows_by_col(bp_err[1])
+ end
+end
+
+function MSELayer:get_params()
+ return {}
+end
diff --git a/layer/softmax_ce.lua b/layer/softmax_ce.lua
index 2e1f5fb..7888540 100644
--- a/layer/softmax_ce.lua
+++ b/layer/softmax_ce.lua
@@ -36,8 +36,12 @@ function SoftmaxCELayer:propagate(input, output)
label = label:decompress(input[1]:ncol())
end
ce:mul_elem(ce, label)
+ ce = ce:rowsum()
+ if output[1] ~= nil then
+ output[1]:copy_fromd(ce)
+ end
-- add total ce
- self.total_ce = self.total_ce - ce:rowsum():colsum()[0]
+ self.total_ce = self.total_ce - ce:colsum()[0]
self.total_frames = self.total_frames + soutput:nrow()
-- TODO: add colsame for uncompressed label
if self.compressed then
@@ -51,7 +55,11 @@ function SoftmaxCELayer:back_propagate(next_bp_err, bp_err, input, output)
if self.compressed then
label = label:decompress(input[1]:ncol())
end
- next_bp_err[1]:add(self.soutput, label, 1.0, -1.0)
+ local nbe = next_bp_err[1]
+ nbe:add(self.soutput, label, 1.0, -1.0)
+ if bp_err[1] ~= nil then
+ nbe:scale_rows_by_col(bp_err[1])
+ end
end
function SoftmaxCELayer:get_params()
diff --git a/layer/window.lua b/layer/window.lua
index b381c9b..3a093f4 100644
--- a/layer/window.lua
+++ b/layer/window.lua
@@ -20,7 +20,7 @@ end
function WindowLayer:propagate(input, output)
output[1]:copy_fromd(input[1])
- output[1]:scale_row(self.window.trans)
+ output[1]:scale_rows_by_row(self.window.trans)
end
function WindowLayer:get_params()
diff --git a/matrix/cukernel.h b/matrix/cukernel.h
index 23398c8..8a1494f 100644
--- a/matrix/cukernel.h
+++ b/matrix/cukernel.h
@@ -14,6 +14,7 @@ void cudak_(cuda_add_row)(const Matrix *a, Matrix *b, double beta);
void cudak_(cuda_fill)(Matrix *a, double val);
void cudak_(cuda_expand_frm)(const Matrix *a, Matrix *b, int context);
void cudak_(cuda_rearrange_frm)(const Matrix *a, Matrix *b, int step);
-void cudak_(cuda_scale_row)(const Matrix *a, Matrix *b);
+void cudak_(cuda_scale_rows_by_row)(const Matrix *a, Matrix *b);
+void cudak_(cuda_scale_rows_by_col)(const Matrix *a, Matrix *b);
void cudak_(cuda_decompress)(const Matrix *a, Matrix *b);
#endif
diff --git a/matrix/generic/cukernel.cu b/matrix/generic/cukernel.cu
index ffae5ed..d6c8adc 100644
--- a/matrix/generic/cukernel.cu
+++ b/matrix/generic/cukernel.cu
@@ -237,9 +237,18 @@ __global__ void cudak_(rearrange_frm)(const MATRIX_ELEM *a, MATRIX_ELEM *b,
b[j + i * stride] = a[j / step + (j % step) * orig_dim + i * stride];
}
-__global__ void cudak_(scale_row)(const MATRIX_ELEM *a, MATRIX_ELEM *b,
- int nrow, int ncol,
- int stride) {
+__global__ void cudak_(scale_rows_by_col)(const MATRIX_ELEM *a, MATRIX_ELEM *b,
+ int nrow, int ncol,
+ int astride, int bstride) {
+ int j = blockIdx.x * blockDim.x + threadIdx.x;
+ int i = blockIdx.y * blockDim.y + threadIdx.y;
+ if (i >= nrow || j >= ncol) return;
+ b[j + i * bstride] *= a[i * astride];
+}
+
+__global__ void cudak_(scale_rows_by_row)(const MATRIX_ELEM *a, MATRIX_ELEM *b,
+ int nrow, int ncol,
+ int stride) {
int j = blockIdx.x * blockDim.x + threadIdx.x;
int i = blockIdx.y * blockDim.y + threadIdx.y;
if (i >= nrow || j >= ncol) return;
@@ -526,11 +535,23 @@ extern "C" {
cudaStreamSynchronize(0);
}
- void cudak_(cuda_scale_row)(const Matrix *a, Matrix *b) {
+ void cudak_(cuda_scale_rows_by_col)(const Matrix *a, Matrix *b) {
+ dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N);
+ dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x),
+ CEIL_DIV(b->nrow, threadsPerBlock.y));
+ cudak_(scale_rows_by_col)<<<numBlocks, threadsPerBlock>>> \
+ (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b),
+ b->nrow, b->ncol,
+ a->stride / sizeof(MATRIX_ELEM),
+ b->stride / sizeof(MATRIX_ELEM));
+ cudaStreamSynchronize(0);
+ }
+
+ void cudak_(cuda_scale_rows_by_row)(const Matrix *a, Matrix *b) {
dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N);
dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x),
CEIL_DIV(b->nrow, threadsPerBlock.y));
- cudak_(scale_row)<<<numBlocks, threadsPerBlock>>> \
+ cudak_(scale_rows_by_row)<<<numBlocks, threadsPerBlock>>> \
(MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b),
b->nrow, b->ncol, b->stride / sizeof(MATRIX_ELEM));
cudaStreamSynchronize(0);
diff --git a/matrix/generic/cumatrix.c b/matrix/generic/cumatrix.c
index a8e18e0..b5d1a35 100644
--- a/matrix/generic/cumatrix.c
+++ b/matrix/generic/cumatrix.c
@@ -37,8 +37,8 @@ static int nerv_matrix_(add)(lua_State *L) {
Matrix *c = luaT_checkudata(L, 1, nerv_matrix_(tname));
Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname));
Matrix *b = luaT_checkudata(L, 3, nerv_matrix_(tname));
- MATRIX_ELEM alpha = luaL_checknumber(L, 4); /* alpha */
- MATRIX_ELEM beta = luaL_checknumber(L, 5); /* alpha */
+ MATRIX_ELEM alpha = luaL_checknumber(L, 4);
+ MATRIX_ELEM beta = luaL_checknumber(L, 5);
CHECK_SAME_DIMENSION(a, b);
CHECK_SAME_DIMENSION(a, c);
nerv_matrix_(add_)(L, a, b, c, alpha, beta);
@@ -396,7 +396,20 @@ static int nerv_matrix_(rearrange_frm)(lua_State *L) {
return 0;
}
-static int nerv_matrix_(scale_row)(lua_State *L) {
+static int nerv_matrix_(scale_rows_by_col)(lua_State *L) {
+ Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname));
+ Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname));
+ if (a->nrow != b->nrow)
+ nerv_error(L, "the number of rows is not the same");
+ if (b->ncol != 1)
+ nerv_error(L, "a column vector is expected");
+ PROFILE_START
+ cudak_(cuda_scale_rows_by_col)(b, a);
+ PROFILE_STOP
+ return 0;
+}
+
+static int nerv_matrix_(scale_rows_by_row)(lua_State *L) {
Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname));
Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname));
if (a->ncol != b->ncol)
@@ -404,7 +417,7 @@ static int nerv_matrix_(scale_row)(lua_State *L) {
if (b->nrow != 1)
nerv_error(L, "a row vector is expected");
PROFILE_START
- cudak_(cuda_scale_row)(b, a);
+ cudak_(cuda_scale_rows_by_row)(b, a);
PROFILE_STOP
return 0;
}
@@ -434,7 +447,8 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = {
{"copy_rows_fromh_by_idx", nerv_matrix_(copy_rows_fromh_by_idx)},
{"expand_frm", nerv_matrix_(expand_frm)},
{"rearrange_frm", nerv_matrix_(rearrange_frm)},
- {"scale_row", nerv_matrix_(scale_row)},
+ {"scale_rows_by_row", nerv_matrix_(scale_rows_by_row)},
+ {"scale_rows_by_col", nerv_matrix_(scale_rows_by_col)},
{NULL, NULL}
};
diff --git a/matrix/mmatrix.c b/matrix/mmatrix.c
index ffc058d..d1d68b9 100644
--- a/matrix/mmatrix.c
+++ b/matrix/mmatrix.c
@@ -1,10 +1,12 @@
#define NERV_GENERIC_MMATRIX
+#include <stdlib.h>
#include "../common.h"
void nerv_matrix_host_float_init(lua_State *L);
void nerv_matrix_host_double_init(lua_State *L);
void nerv_matrix_host_int_init(lua_State *L);
void nerv_mmatrix_init(lua_State *L) {
+ srand(1);
nerv_matrix_host_float_init(L);
nerv_matrix_host_double_init(L);
nerv_matrix_host_int_init(L);