From c7e170d4f3cc9f73380380a96318cbea437d48ba Mon Sep 17 00:00:00 2001 From: Determinant Date: Tue, 26 May 2015 18:56:29 +0800 Subject: add fill for cumatrix --- matrix/generic/cukernel.cu | 18 ++++++++++++++++++ matrix/generic/cumatrix.c | 25 ++++++++++++++++++++----- 2 files changed, 38 insertions(+), 5 deletions(-) (limited to 'matrix/generic') diff --git a/matrix/generic/cukernel.cu b/matrix/generic/cukernel.cu index 2e794b7..8b929e4 100644 --- a/matrix/generic/cukernel.cu +++ b/matrix/generic/cukernel.cu @@ -113,6 +113,14 @@ __global__ void cudak_(add_row)(const MATRIX_ELEM *a, MATRIX_ELEM *b, b[j + i * stride] += beta * a[j]; } +__global__ void cudak_(fill)(MATRIX_ELEM *a, + int nrow, int ncol, int stride, double val) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + if (i >= nrow || j >= ncol) return; + a[j + i * stride] = val; +} + extern "C" { #include "../cukernel.h" @@ -242,5 +250,15 @@ extern "C" { (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), b->nrow, b->ncol, b->stride / sizeof(MATRIX_ELEM), beta); } + + void cudak_(cuda_fill)(Matrix *a, double val) { + dim3 threadsPerBlock(CUDA_THREADS_N, + CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(a->ncol, threadsPerBlock.x), + CEIL_DIV(a->nrow, threadsPerBlock.y)); + cudak_(fill)<<>> \ + (MATRIX_ELEM_PTR(a), a->nrow, a->ncol, + a->stride / sizeof(MATRIX_ELEM), val); + } } #endif diff --git a/matrix/generic/cumatrix.c b/matrix/generic/cumatrix.c index ae57b21..aa303d4 100644 --- a/matrix/generic/cumatrix.c +++ b/matrix/generic/cumatrix.c @@ -55,15 +55,17 @@ static int nerv_matrix_(mul)(lua_State *L) { Matrix *c = luaT_checkudata(L, 1, nerv_matrix_(tname)); Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname)); Matrix *b = luaT_checkudata(L, 3, nerv_matrix_(tname)); + MATRIX_ELEM alpha = luaL_checknumber(L, 4); + MATRIX_ELEM beta = luaL_checknumber(L, 5); int nargs = lua_gettop(L); - int ta = nargs > 3 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 4)) \ + int ta = nargs > 5 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 6)) \ : CUBLAS_OP_N; - int tb = nargs > 4 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 5)) \ + int tb = nargs > 6 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 7)) \ : CUBLAS_OP_N; printf("%d %d\n", ta, tb); if (a->ncol != b->nrow) nerv_error(L, "Wrong dimension of multipliers"); - MATRIX_ELEM alpha = 1.0f, beta = 0.0f; +/* MATRIX_ELEM alpha = 1.0f, beta = 0.0f; */ NERV_CUBLAS_(gemm)(cublas_handle, tb, ta, b->ncol, a->nrow, b->nrow, &alpha, @@ -131,10 +133,22 @@ static int nerv_matrix_(add_row)(lua_State *L) { Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname)); Matrix *b = luaT_checkudata(L, 1, nerv_matrix_(tname)); double beta = luaL_checknumber(L, 3); + if (a->ncol != b->ncol) + nerv_error(L, "the number of columns is not the same"); + if (a->nrow != 1) + nerv_error(L, "a row vector is expected"); cudak_(cuda_add_row)(a, b, beta); return 0; } +static int nerv_matrix_(fill)(lua_State *L) { + Matrix *self = luaT_checkudata(L, 1, nerv_matrix_(tname)); + double val = luaL_checknumber(L, 2); + cudak_(cuda_fill)(self, val); + return 0; +} + + extern const char *MATRIX_CUMATRIX_HOST_TNAME; static int nerv_matrix_(copy_from)(lua_State *L) { Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); @@ -162,8 +176,6 @@ static int nerv_matrix_(copy_to)(lua_State *L) { static const luaL_Reg nerv_matrix_(extra_methods)[] = { - {"add", nerv_matrix_(add)}, - {"mul", nerv_matrix_(mul)}, {"create", nerv_matrix_(create)}, {"sigmoid", nerv_matrix_(sigmoid)}, {"softmax", nerv_matrix_(softmax)}, @@ -173,7 +185,10 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = { {"copy_from", nerv_matrix_(copy_from)}, {"copy_to", nerv_matrix_(copy_to)}, /* in-place calc */ + {"add", nerv_matrix_(add)}, + {"mul", nerv_matrix_(mul)}, {"add_row", nerv_matrix_(add_row)}, + {"fill", nerv_matrix_(fill)}, {NULL, NULL} }; -- cgit v1.2.3