diff options
-rw-r--r-- | examples/cumatrix_from_mmatrix.lua | 23 | ||||
-rw-r--r-- | matrix/cumatrix.c | 3 | ||||
-rw-r--r-- | matrix/generic/cumatrix.c | 28 |
3 files changed, 54 insertions, 0 deletions
diff --git a/examples/cumatrix_from_mmatrix.lua b/examples/cumatrix_from_mmatrix.lua new file mode 100644 index 0000000..fba8a90 --- /dev/null +++ b/examples/cumatrix_from_mmatrix.lua @@ -0,0 +1,23 @@ +m = 10 +n = 10 +fm = nerv.MMatrixFloat(m, n) +dm = nerv.MMatrixDouble(m, n) +for i = 0, m - 1 do + for j = 0, n - 1 do + -- local t = math.random(10) + t = i / (j + 1) + fm[i][j] = t + dm[i][j] = t + end +end +print(fm) +print(dm) + +fc = nerv.CuMatrixFloat(m, n) +dc = nerv.CuMatrixDouble(m, n) +fc:copy_from(fm) +dc:copy_from(dm) +print(fc) +print(dc) +print(fc:softmax()) +print(dc:softmax()) diff --git a/matrix/cumatrix.c b/matrix/cumatrix.c index db4c784..51a3681 100644 --- a/matrix/cumatrix.c +++ b/matrix/cumatrix.c @@ -5,6 +5,7 @@ #define nerv_matrix_(NAME) nerv_matrix_cuda_float_##NAME #define cudak_(NAME) cudak_float_ ## NAME #define NERV_CUBLAS_(NAME) cublasS##NAME +#define MATRIX_CUMATRIX_HOST_TNAME nerv_matrix_host_float_tname const char *nerv_matrix_(tname) = "nerv.CuMatrixFloat"; #include "generic/cumatrix.c" #undef NERV_CUBLAS_ @@ -15,11 +16,13 @@ const char *nerv_matrix_(tname) = "nerv.CuMatrixFloat"; #undef MATRIX_ELEM #undef MATRIX_ELEM_PTR #undef MATRIX_ELEM_FMT +#undef MATRIX_CUMATRIX_HOST_TNAME #define MATRIX_USE_DOUBLE #define cuda_matrix_(NAME) cuda_matrix_double_##NAME #define nerv_matrix_(NAME) nerv_matrix_cuda_double_##NAME #define cudak_(NAME) cudak_double_ ## NAME #define NERV_CUBLAS_(NAME) cublasD##NAME +#define MATRIX_CUMATRIX_HOST_TNAME nerv_matrix_host_double_tname const char *nerv_matrix_(tname) = "nerv.CuMatrixDouble"; #include "generic/cumatrix.c" diff --git a/matrix/generic/cumatrix.c b/matrix/generic/cumatrix.c index f846a73..557e4c1 100644 --- a/matrix/generic/cumatrix.c +++ b/matrix/generic/cumatrix.c @@ -126,6 +126,32 @@ static int nerv_matrix_(rowmax)(lua_State *L) { return 1; } +extern const char *MATRIX_CUMATRIX_HOST_TNAME; +static int nerv_matrix_(copy_from)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 2, MATRIX_CUMATRIX_HOST_TNAME); + if (!(a->nrow == b->nrow && a->ncol == b->ncol)) + nerv_error(L, "Matrices should be of the same dimension"); + cudaMemcpy2D(MATRIX_ELEM_PTR(a), a->stride, + MATRIX_ELEM_PTR(b), b->stride, + sizeof(MATRIX_ELEM) * b->ncol, b->nrow, + cudaMemcpyHostToDevice); + return 0; +} + +static int nerv_matrix_(copy_to)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 2, MATRIX_CUMATRIX_HOST_TNAME); + if (!(a->nrow == b->nrow && a->ncol == b->ncol)) + nerv_error(L, "Matrices should be of the same dimension"); + cudaMemcpy2D(MATRIX_ELEM_PTR(b), b->stride, + MATRIX_ELEM_PTR(a), a->stride, + sizeof(MATRIX_ELEM) * a->ncol, a->nrow, + cudaMemcpyDeviceToHost); + return 0; +} + + static const luaL_Reg nerv_matrix_(extra_methods)[] = { {"add", nerv_matrix_(add)}, {"mul", nerv_matrix_(mul)}, @@ -135,6 +161,8 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = { {"colsum", nerv_matrix_(colsum)}, {"rowsum", nerv_matrix_(rowsum)}, {"rowmax", nerv_matrix_(rowmax)}, + {"copy_from", nerv_matrix_(copy_from)}, + {"copy_to", nerv_matrix_(copy_to)}, {NULL, NULL} }; |