aboutsummaryrefslogtreecommitdiff
path: root/matrix
diff options
context:
space:
mode:
Diffstat (limited to 'matrix')
-rw-r--r--matrix/cukernel.h1
-rw-r--r--matrix/generic/cukernel.cu18
-rw-r--r--matrix/generic/cumatrix.c25
-rw-r--r--matrix/init.lua2
4 files changed, 40 insertions, 6 deletions
diff --git a/matrix/cukernel.h b/matrix/cukernel.h
index dc4ac5a..3cad489 100644
--- a/matrix/cukernel.h
+++ b/matrix/cukernel.h
@@ -6,4 +6,5 @@ void cudak_(cuda_colsum)(const Matrix *a, Matrix *b);
void cudak_(cuda_softmax_denominator)(const Matrix *a, const Matrix *max, Matrix *b);
void cudak_(cuda_softmax_final)(const Matrix *a, const Matrix *max, const Matrix *deno, Matrix *b);
void cudak_(cuda_add_row)(const Matrix *a, Matrix *b, double beta);
+void cudak_(cuda_fill)(Matrix *a, double val);
#endif
diff --git a/matrix/generic/cukernel.cu b/matrix/generic/cukernel.cu
index 2e794b7..8b929e4 100644
--- a/matrix/generic/cukernel.cu
+++ b/matrix/generic/cukernel.cu
@@ -113,6 +113,14 @@ __global__ void cudak_(add_row)(const MATRIX_ELEM *a, MATRIX_ELEM *b,
b[j + i * stride] += beta * a[j];
}
+__global__ void cudak_(fill)(MATRIX_ELEM *a,
+ int nrow, int ncol, int stride, double val) {
+ int j = blockIdx.x * blockDim.x + threadIdx.x;
+ int i = blockIdx.y * blockDim.y + threadIdx.y;
+ if (i >= nrow || j >= ncol) return;
+ a[j + i * stride] = val;
+}
+
extern "C" {
#include "../cukernel.h"
@@ -242,5 +250,15 @@ extern "C" {
(MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), b->nrow, b->ncol,
b->stride / sizeof(MATRIX_ELEM), beta);
}
+
+ void cudak_(cuda_fill)(Matrix *a, double val) {
+ dim3 threadsPerBlock(CUDA_THREADS_N,
+ CUDA_THREADS_N);
+ dim3 numBlocks(CEIL_DIV(a->ncol, threadsPerBlock.x),
+ CEIL_DIV(a->nrow, threadsPerBlock.y));
+ cudak_(fill)<<<numBlocks, threadsPerBlock>>> \
+ (MATRIX_ELEM_PTR(a), a->nrow, a->ncol,
+ a->stride / sizeof(MATRIX_ELEM), val);
+ }
}
#endif
diff --git a/matrix/generic/cumatrix.c b/matrix/generic/cumatrix.c
index ae57b21..aa303d4 100644
--- a/matrix/generic/cumatrix.c
+++ b/matrix/generic/cumatrix.c
@@ -55,15 +55,17 @@ static int nerv_matrix_(mul)(lua_State *L) {
Matrix *c = luaT_checkudata(L, 1, nerv_matrix_(tname));
Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname));
Matrix *b = luaT_checkudata(L, 3, nerv_matrix_(tname));
+ MATRIX_ELEM alpha = luaL_checknumber(L, 4);
+ MATRIX_ELEM beta = luaL_checknumber(L, 5);
int nargs = lua_gettop(L);
- int ta = nargs > 3 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 4)) \
+ int ta = nargs > 5 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 6)) \
: CUBLAS_OP_N;
- int tb = nargs > 4 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 5)) \
+ int tb = nargs > 6 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 7)) \
: CUBLAS_OP_N;
printf("%d %d\n", ta, tb);
if (a->ncol != b->nrow)
nerv_error(L, "Wrong dimension of multipliers");
- MATRIX_ELEM alpha = 1.0f, beta = 0.0f;
+/* MATRIX_ELEM alpha = 1.0f, beta = 0.0f; */
NERV_CUBLAS_(gemm)(cublas_handle, tb, ta,
b->ncol, a->nrow, b->nrow,
&alpha,
@@ -131,10 +133,22 @@ static int nerv_matrix_(add_row)(lua_State *L) {
Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname));
Matrix *b = luaT_checkudata(L, 1, nerv_matrix_(tname));
double beta = luaL_checknumber(L, 3);
+ if (a->ncol != b->ncol)
+ nerv_error(L, "the number of columns is not the same");
+ if (a->nrow != 1)
+ nerv_error(L, "a row vector is expected");
cudak_(cuda_add_row)(a, b, beta);
return 0;
}
+static int nerv_matrix_(fill)(lua_State *L) {
+ Matrix *self = luaT_checkudata(L, 1, nerv_matrix_(tname));
+ double val = luaL_checknumber(L, 2);
+ cudak_(cuda_fill)(self, val);
+ return 0;
+}
+
+
extern const char *MATRIX_CUMATRIX_HOST_TNAME;
static int nerv_matrix_(copy_from)(lua_State *L) {
Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname));
@@ -162,8 +176,6 @@ static int nerv_matrix_(copy_to)(lua_State *L) {
static const luaL_Reg nerv_matrix_(extra_methods)[] = {
- {"add", nerv_matrix_(add)},
- {"mul", nerv_matrix_(mul)},
{"create", nerv_matrix_(create)},
{"sigmoid", nerv_matrix_(sigmoid)},
{"softmax", nerv_matrix_(softmax)},
@@ -173,7 +185,10 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = {
{"copy_from", nerv_matrix_(copy_from)},
{"copy_to", nerv_matrix_(copy_to)},
/* in-place calc */
+ {"add", nerv_matrix_(add)},
+ {"mul", nerv_matrix_(mul)},
{"add_row", nerv_matrix_(add_row)},
+ {"fill", nerv_matrix_(fill)},
{NULL, NULL}
};
diff --git a/matrix/init.lua b/matrix/init.lua
index a3d778e..09c9c64 100644
--- a/matrix/init.lua
+++ b/matrix/init.lua
@@ -35,7 +35,7 @@ end
function nerv.CuMatrix:__mul__(b)
c = self:create()
- c:mul(self, b, 'N', 'N')
+ c:mul(self, b, 0.5, 0.0, 'N', 'N')
return c
end