diff options
-rw-r--r-- | layer/affine.lua | 14 | ||||
-rw-r--r-- | layer/init.lua | 2 | ||||
-rw-r--r-- | matrix/cukernel.h | 1 | ||||
-rw-r--r-- | matrix/generic/cukernel.cu | 20 | ||||
-rw-r--r-- | matrix/generic/cumatrix.c | 11 |
5 files changed, 47 insertions, 1 deletions
diff --git a/layer/affine.lua b/layer/affine.lua index d5c50fc..5f1b4ce 100644 --- a/layer/affine.lua +++ b/layer/affine.lua @@ -9,3 +9,17 @@ end function LinearTransParam:write(pfhandle) self.trans:new_to_host():save(pfhandle) end + +function AffineLayer:__init(id, ltp, bp) + self.ltp = ltp + self.bp = bp +end + +function nerv.AffineLayer:update(input, output) +end + +function nerv.AffineLayer:propagate(input, output) +end + +function nerv.AffineLayer:back_propagate(input, output) +end diff --git a/layer/init.lua b/layer/init.lua index c57a405..62a2924 100644 --- a/layer/init.lua +++ b/layer/init.lua @@ -24,7 +24,7 @@ end local Layer = nerv.class('nerv.Layer') -function nerv.Layer:_init(param) +function nerv.Layer:_init(id, ...) nerv.error_method_not_implemented() end diff --git a/matrix/cukernel.h b/matrix/cukernel.h index 67a255e..dc4ac5a 100644 --- a/matrix/cukernel.h +++ b/matrix/cukernel.h @@ -5,4 +5,5 @@ void cudak_(cuda_rowmax)(const Matrix *a, Matrix *b); void cudak_(cuda_colsum)(const Matrix *a, Matrix *b); void cudak_(cuda_softmax_denominator)(const Matrix *a, const Matrix *max, Matrix *b); void cudak_(cuda_softmax_final)(const Matrix *a, const Matrix *max, const Matrix *deno, Matrix *b); +void cudak_(cuda_add_row)(const Matrix *a, Matrix *b, double beta); #endif diff --git a/matrix/generic/cukernel.cu b/matrix/generic/cukernel.cu index 4b6af61..2e794b7 100644 --- a/matrix/generic/cukernel.cu +++ b/matrix/generic/cukernel.cu @@ -105,6 +105,15 @@ __global__ void cudak_(block_reduce_rowmax)(const MATRIX_ELEM *input, output[blockIdx.x + ostride * blockIdx.y] = cudak_(arr)[0]; } +__global__ void cudak_(add_row)(const MATRIX_ELEM *a, MATRIX_ELEM *b, + int nrow, int ncol, int stride, double beta) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + if (i >= nrow || j >= ncol) return; + b[j + i * stride] += beta * a[j]; +} + + extern "C" { #include "../cukernel.h" void cudak_(cuda_sigmoid)(const Matrix *a, Matrix *b) { @@ -222,5 +231,16 @@ extern "C" { ncol); cudaFree(res); } + + /* in-place calc */ + void cudak_(cuda_add_row)(const Matrix *a, Matrix *b, double beta) { + dim3 threadsPerBlock(CUDA_THREADS_N, + CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), + CEIL_DIV(b->nrow, threadsPerBlock.y)); + cudak_(add_row)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), b->nrow, b->ncol, + b->stride / sizeof(MATRIX_ELEM), beta); + } } #endif diff --git a/matrix/generic/cumatrix.c b/matrix/generic/cumatrix.c index 557e4c1..ae57b21 100644 --- a/matrix/generic/cumatrix.c +++ b/matrix/generic/cumatrix.c @@ -126,6 +126,15 @@ static int nerv_matrix_(rowmax)(lua_State *L) { return 1; } + +static int nerv_matrix_(add_row)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 1, nerv_matrix_(tname)); + double beta = luaL_checknumber(L, 3); + cudak_(cuda_add_row)(a, b, beta); + return 0; +} + extern const char *MATRIX_CUMATRIX_HOST_TNAME; static int nerv_matrix_(copy_from)(lua_State *L) { Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); @@ -163,6 +172,8 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = { {"rowmax", nerv_matrix_(rowmax)}, {"copy_from", nerv_matrix_(copy_from)}, {"copy_to", nerv_matrix_(copy_to)}, + /* in-place calc */ + {"add_row", nerv_matrix_(add_row)}, {NULL, NULL} }; |