summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--layer/affine.lua20
-rw-r--r--layer/init.lua2
-rw-r--r--matrix/cukernel.h1
-rw-r--r--matrix/generic/cukernel.cu18
-rw-r--r--matrix/generic/cumatrix.c25
-rw-r--r--matrix/init.lua2
6 files changed, 59 insertions, 9 deletions
diff --git a/layer/affine.lua b/layer/affine.lua
index 5f1b4ce..67c5854 100644
--- a/layer/affine.lua
+++ b/layer/affine.lua
@@ -10,16 +10,32 @@ function LinearTransParam:write(pfhandle)
self.trans:new_to_host():save(pfhandle)
end
-function AffineLayer:__init(id, ltp, bp)
+function AffineLayer:__init(id, global_conf, ltp, bp)
self.ltp = ltp
self.bp = bp
+ self.gconf = global_conf
+ -- linear transform correction
+ self.ltc = ltp:create()
+ self.ltc:fill(0)
+ -- bias correction
+ self.bc = bp:create()
+ self.bc:fill(0)
end
function nerv.AffineLayer:update(input, output)
+ -- momentum gain --
+ mmt_gain = 1.0 / (1.0 - gconf.momentum);
+ n = input.nrow() * mmt_gain
+-- ltc =
end
function nerv.AffineLayer:propagate(input, output)
+ -- apply linear transform
+ output:mul(input, self.ltp, 'N', 'N')
+ -- add bias
+ output:add_row(self.bp, 1.0)
end
-function nerv.AffineLayer:back_propagate(input, output)
+function nerv.AffineLayer:back_propagate(next_bp_err, bp_err, input, output)
+ next_bp_err:mul(bp_err, self.ltp, 'N', 'T')
end
diff --git a/layer/init.lua b/layer/init.lua
index 62a2924..6923dbd 100644
--- a/layer/init.lua
+++ b/layer/init.lua
@@ -24,7 +24,7 @@ end
local Layer = nerv.class('nerv.Layer')
-function nerv.Layer:_init(id, ...)
+function nerv.Layer:_init(id, global_conf, ...)
nerv.error_method_not_implemented()
end
diff --git a/matrix/cukernel.h b/matrix/cukernel.h
index dc4ac5a..3cad489 100644
--- a/matrix/cukernel.h
+++ b/matrix/cukernel.h
@@ -6,4 +6,5 @@ void cudak_(cuda_colsum)(const Matrix *a, Matrix *b);
void cudak_(cuda_softmax_denominator)(const Matrix *a, const Matrix *max, Matrix *b);
void cudak_(cuda_softmax_final)(const Matrix *a, const Matrix *max, const Matrix *deno, Matrix *b);
void cudak_(cuda_add_row)(const Matrix *a, Matrix *b, double beta);
+void cudak_(cuda_fill)(Matrix *a, double val);
#endif
diff --git a/matrix/generic/cukernel.cu b/matrix/generic/cukernel.cu
index 2e794b7..8b929e4 100644
--- a/matrix/generic/cukernel.cu
+++ b/matrix/generic/cukernel.cu
@@ -113,6 +113,14 @@ __global__ void cudak_(add_row)(const MATRIX_ELEM *a, MATRIX_ELEM *b,
b[j + i * stride] += beta * a[j];
}
+__global__ void cudak_(fill)(MATRIX_ELEM *a,
+ int nrow, int ncol, int stride, double val) {
+ int j = blockIdx.x * blockDim.x + threadIdx.x;
+ int i = blockIdx.y * blockDim.y + threadIdx.y;
+ if (i >= nrow || j >= ncol) return;
+ a[j + i * stride] = val;
+}
+
extern "C" {
#include "../cukernel.h"
@@ -242,5 +250,15 @@ extern "C" {
(MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), b->nrow, b->ncol,
b->stride / sizeof(MATRIX_ELEM), beta);
}
+
+ void cudak_(cuda_fill)(Matrix *a, double val) {
+ dim3 threadsPerBlock(CUDA_THREADS_N,
+ CUDA_THREADS_N);
+ dim3 numBlocks(CEIL_DIV(a->ncol, threadsPerBlock.x),
+ CEIL_DIV(a->nrow, threadsPerBlock.y));
+ cudak_(fill)<<<numBlocks, threadsPerBlock>>> \
+ (MATRIX_ELEM_PTR(a), a->nrow, a->ncol,
+ a->stride / sizeof(MATRIX_ELEM), val);
+ }
}
#endif
diff --git a/matrix/generic/cumatrix.c b/matrix/generic/cumatrix.c
index ae57b21..aa303d4 100644
--- a/matrix/generic/cumatrix.c
+++ b/matrix/generic/cumatrix.c
@@ -55,15 +55,17 @@ static int nerv_matrix_(mul)(lua_State *L) {
Matrix *c = luaT_checkudata(L, 1, nerv_matrix_(tname));
Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname));
Matrix *b = luaT_checkudata(L, 3, nerv_matrix_(tname));
+ MATRIX_ELEM alpha = luaL_checknumber(L, 4);
+ MATRIX_ELEM beta = luaL_checknumber(L, 5);
int nargs = lua_gettop(L);
- int ta = nargs > 3 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 4)) \
+ int ta = nargs > 5 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 6)) \
: CUBLAS_OP_N;
- int tb = nargs > 4 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 5)) \
+ int tb = nargs > 6 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 7)) \
: CUBLAS_OP_N;
printf("%d %d\n", ta, tb);
if (a->ncol != b->nrow)
nerv_error(L, "Wrong dimension of multipliers");
- MATRIX_ELEM alpha = 1.0f, beta = 0.0f;
+/* MATRIX_ELEM alpha = 1.0f, beta = 0.0f; */
NERV_CUBLAS_(gemm)(cublas_handle, tb, ta,
b->ncol, a->nrow, b->nrow,
&alpha,
@@ -131,10 +133,22 @@ static int nerv_matrix_(add_row)(lua_State *L) {
Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname));
Matrix *b = luaT_checkudata(L, 1, nerv_matrix_(tname));
double beta = luaL_checknumber(L, 3);
+ if (a->ncol != b->ncol)
+ nerv_error(L, "the number of columns is not the same");
+ if (a->nrow != 1)
+ nerv_error(L, "a row vector is expected");
cudak_(cuda_add_row)(a, b, beta);
return 0;
}
+static int nerv_matrix_(fill)(lua_State *L) {
+ Matrix *self = luaT_checkudata(L, 1, nerv_matrix_(tname));
+ double val = luaL_checknumber(L, 2);
+ cudak_(cuda_fill)(self, val);
+ return 0;
+}
+
+
extern const char *MATRIX_CUMATRIX_HOST_TNAME;
static int nerv_matrix_(copy_from)(lua_State *L) {
Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname));
@@ -162,8 +176,6 @@ static int nerv_matrix_(copy_to)(lua_State *L) {
static const luaL_Reg nerv_matrix_(extra_methods)[] = {
- {"add", nerv_matrix_(add)},
- {"mul", nerv_matrix_(mul)},
{"create", nerv_matrix_(create)},
{"sigmoid", nerv_matrix_(sigmoid)},
{"softmax", nerv_matrix_(softmax)},
@@ -173,7 +185,10 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = {
{"copy_from", nerv_matrix_(copy_from)},
{"copy_to", nerv_matrix_(copy_to)},
/* in-place calc */
+ {"add", nerv_matrix_(add)},
+ {"mul", nerv_matrix_(mul)},
{"add_row", nerv_matrix_(add_row)},
+ {"fill", nerv_matrix_(fill)},
{NULL, NULL}
};
diff --git a/matrix/init.lua b/matrix/init.lua
index a3d778e..09c9c64 100644
--- a/matrix/init.lua
+++ b/matrix/init.lua
@@ -35,7 +35,7 @@ end
function nerv.CuMatrix:__mul__(b)
c = self:create()
- c:mul(self, b, 'N', 'N')
+ c:mul(self, b, 0.5, 0.0, 'N', 'N')
return c
end