diff options
Diffstat (limited to 'matrix')
-rw-r--r-- | matrix/cukernel.h | 1 | ||||
-rw-r--r-- | matrix/generic/cukernel.cu | 18 | ||||
-rw-r--r-- | matrix/generic/cumatrix.c | 25 | ||||
-rw-r--r-- | matrix/init.lua | 2 |
4 files changed, 40 insertions, 6 deletions
diff --git a/matrix/cukernel.h b/matrix/cukernel.h index dc4ac5a..3cad489 100644 --- a/matrix/cukernel.h +++ b/matrix/cukernel.h @@ -6,4 +6,5 @@ void cudak_(cuda_colsum)(const Matrix *a, Matrix *b); void cudak_(cuda_softmax_denominator)(const Matrix *a, const Matrix *max, Matrix *b); void cudak_(cuda_softmax_final)(const Matrix *a, const Matrix *max, const Matrix *deno, Matrix *b); void cudak_(cuda_add_row)(const Matrix *a, Matrix *b, double beta); +void cudak_(cuda_fill)(Matrix *a, double val); #endif diff --git a/matrix/generic/cukernel.cu b/matrix/generic/cukernel.cu index 2e794b7..8b929e4 100644 --- a/matrix/generic/cukernel.cu +++ b/matrix/generic/cukernel.cu @@ -113,6 +113,14 @@ __global__ void cudak_(add_row)(const MATRIX_ELEM *a, MATRIX_ELEM *b, b[j + i * stride] += beta * a[j]; } +__global__ void cudak_(fill)(MATRIX_ELEM *a, + int nrow, int ncol, int stride, double val) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + if (i >= nrow || j >= ncol) return; + a[j + i * stride] = val; +} + extern "C" { #include "../cukernel.h" @@ -242,5 +250,15 @@ extern "C" { (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), b->nrow, b->ncol, b->stride / sizeof(MATRIX_ELEM), beta); } + + void cudak_(cuda_fill)(Matrix *a, double val) { + dim3 threadsPerBlock(CUDA_THREADS_N, + CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(a->ncol, threadsPerBlock.x), + CEIL_DIV(a->nrow, threadsPerBlock.y)); + cudak_(fill)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(a), a->nrow, a->ncol, + a->stride / sizeof(MATRIX_ELEM), val); + } } #endif diff --git a/matrix/generic/cumatrix.c b/matrix/generic/cumatrix.c index ae57b21..aa303d4 100644 --- a/matrix/generic/cumatrix.c +++ b/matrix/generic/cumatrix.c @@ -55,15 +55,17 @@ static int nerv_matrix_(mul)(lua_State *L) { Matrix *c = luaT_checkudata(L, 1, nerv_matrix_(tname)); Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname)); Matrix *b = luaT_checkudata(L, 3, nerv_matrix_(tname)); + MATRIX_ELEM alpha = luaL_checknumber(L, 4); + MATRIX_ELEM beta = luaL_checknumber(L, 5); int nargs = lua_gettop(L); - int ta = nargs > 3 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 4)) \ + int ta = nargs > 5 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 6)) \ : CUBLAS_OP_N; - int tb = nargs > 4 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 5)) \ + int tb = nargs > 6 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 7)) \ : CUBLAS_OP_N; printf("%d %d\n", ta, tb); if (a->ncol != b->nrow) nerv_error(L, "Wrong dimension of multipliers"); - MATRIX_ELEM alpha = 1.0f, beta = 0.0f; +/* MATRIX_ELEM alpha = 1.0f, beta = 0.0f; */ NERV_CUBLAS_(gemm)(cublas_handle, tb, ta, b->ncol, a->nrow, b->nrow, &alpha, @@ -131,10 +133,22 @@ static int nerv_matrix_(add_row)(lua_State *L) { Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname)); Matrix *b = luaT_checkudata(L, 1, nerv_matrix_(tname)); double beta = luaL_checknumber(L, 3); + if (a->ncol != b->ncol) + nerv_error(L, "the number of columns is not the same"); + if (a->nrow != 1) + nerv_error(L, "a row vector is expected"); cudak_(cuda_add_row)(a, b, beta); return 0; } +static int nerv_matrix_(fill)(lua_State *L) { + Matrix *self = luaT_checkudata(L, 1, nerv_matrix_(tname)); + double val = luaL_checknumber(L, 2); + cudak_(cuda_fill)(self, val); + return 0; +} + + extern const char *MATRIX_CUMATRIX_HOST_TNAME; static int nerv_matrix_(copy_from)(lua_State *L) { Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); @@ -162,8 +176,6 @@ static int nerv_matrix_(copy_to)(lua_State *L) { static const luaL_Reg nerv_matrix_(extra_methods)[] = { - {"add", nerv_matrix_(add)}, - {"mul", nerv_matrix_(mul)}, {"create", nerv_matrix_(create)}, {"sigmoid", nerv_matrix_(sigmoid)}, {"softmax", nerv_matrix_(softmax)}, @@ -173,7 +185,10 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = { {"copy_from", nerv_matrix_(copy_from)}, {"copy_to", nerv_matrix_(copy_to)}, /* in-place calc */ + {"add", nerv_matrix_(add)}, + {"mul", nerv_matrix_(mul)}, {"add_row", nerv_matrix_(add_row)}, + {"fill", nerv_matrix_(fill)}, {NULL, NULL} }; diff --git a/matrix/init.lua b/matrix/init.lua index a3d778e..09c9c64 100644 --- a/matrix/init.lua +++ b/matrix/init.lua @@ -35,7 +35,7 @@ end function nerv.CuMatrix:__mul__(b) c = self:create() - c:mul(self, b, 'N', 'N') + c:mul(self, b, 0.5, 0.0, 'N', 'N') return c end |