From ab12a9583bdd39884fde9bc2444e6fd1bc5f518e Mon Sep 17 00:00:00 2001 From: Determinant Date: Sun, 31 May 2015 11:18:16 +0800 Subject: add async copy by index; add MMatrixInt --- matrix/cuda_helper.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++ matrix/generic/cumatrix.c | 32 +++++++++++++++++++++++++++ matrix/generic/elem_type.h | 6 +++++ matrix/generic/matrix.c | 3 --- matrix/generic/matrix.h | 4 ++++ matrix/init.c | 1 + matrix/init.lua | 13 ++++++++--- matrix/mmatrix.c | 14 ++++++++++++ 8 files changed, 122 insertions(+), 6 deletions(-) create mode 100644 matrix/cuda_helper.h diff --git a/matrix/cuda_helper.h b/matrix/cuda_helper.h new file mode 100644 index 0000000..c0fa618 --- /dev/null +++ b/matrix/cuda_helper.h @@ -0,0 +1,55 @@ +#ifndef NERV_CUDA_HELPER_H +#define NERV_CUDA_HELPER_H +#define CUBLAS_SAFE_CALL(call) \ + do { \ + cublasStatus_t err = (call); \ + if (err != CUBLAS_STATUS_SUCCESS) \ + nerv_error(L, "cumatrix cublas error: %s", cublasGetErrorString(err)); \ + } while (0) + +#define CUDA_SAFE_CALL(call) \ + do { \ + cudaError_t err = (call); \ + if (err != cudaSuccess) \ + nerv_error(L, "cumatrix CUDA error: %s", cudaGetErrorString(err)); \ + } while (0) + +#define CUDA_SAFE_SYNC_CALL(call) \ + do { \ + CUDA_SAFE_CALL(call); \ + cudaDeviceSynchronize(); \ + } while (0) + +#define CHECK_SAME_DIMENSION(a, b) \ + do { \ + if (!(a->nrow == b->nrow && a->ncol == b->ncol)) \ + nerv_error(L, "Matrices should be of the same dimension"); \ + } while (0) + +static const char *cublasGetErrorString(cublasStatus_t err) { + switch (err) + { + case CUBLAS_STATUS_SUCCESS: + return "CUBLAS_STATUS_SUCCESS"; + case CUBLAS_STATUS_NOT_INITIALIZED: + return "CUBLAS_STATUS_NOT_INITIALIZED"; + case CUBLAS_STATUS_ALLOC_FAILED: + return "CUBLAS_STATUS_ALLOC_FAILED"; + case CUBLAS_STATUS_INVALID_VALUE: + return "CUBLAS_STATUS_INVALID_VALUE"; + case CUBLAS_STATUS_ARCH_MISMATCH: + return "CUBLAS_STATUS_ARCH_MISMATCH"; + case CUBLAS_STATUS_MAPPING_ERROR: + return "CUBLAS_STATUS_MAPPING_ERROR"; + case CUBLAS_STATUS_EXECUTION_FAILED: + return "CUBLAS_STATUS_EXECUTION_FAILED"; + case CUBLAS_STATUS_INTERNAL_ERROR: + return "CUBLAS_STATUS_INTERNAL_ERROR"; + case CUBLAS_STATUS_NOT_SUPPORTED: + return "CUBLAS_STATUS_NOT_SUPPORTED"; + case CUBLAS_STATUS_LICENSE_ERROR: + return "CUBLAS_STATUS_LICENSE_ERROR"; + } + return ""; +} +#endif diff --git a/matrix/generic/cumatrix.c b/matrix/generic/cumatrix.c index 7b0aa2a..3bc58d7 100644 --- a/matrix/generic/cumatrix.c +++ b/matrix/generic/cumatrix.c @@ -251,6 +251,37 @@ static int nerv_matrix_(log_elem)(lua_State *L) { return 0; } +extern const char *nerv_matrix_host_int_tname; +static int nerv_matrix_(copy_rows_fromh_by_idx)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 2, MATRIX_CUMATRIX_HOST_TNAME); + Matrix *idx = luaT_checkudata(L, 3, nerv_matrix_host_int_tname); + long *idx_ptr = idx->data.i; + int i; + long nrow = a->nrow; + if (idx->nrow != 1) + nerv_error(L, "index should be a vector"); + if (idx->ncol != nrow) + nerv_error(L, "index dimension mismatch"); + if (a->ncol != b->ncol) + nerv_error(L, "source/destination dimension mismatch"); + cudaStream_t *streams = (cudaStream_t*)malloc(sizeof(cudaStream_t) * nrow); + for (i = 0; i < nrow; i++) + { + CUDA_SAFE_CALL(cudaStreamCreate(streams + i)); + CUDA_SAFE_CALL(cudaMemcpyAsync(MATRIX_ROW_PTR(a, i), + MATRIX_ROW_PTR(b, idx_ptr[i]), + b->stride, + cudaMemcpyHostToDevice, streams[i])); + } + for (i = 0; i < nrow; i++) + { + CUDA_SAFE_CALL(cudaStreamSynchronize(streams[i])); + CUDA_SAFE_CALL(cudaStreamDestroy(streams[i])); + } + return 0; +} + static const luaL_Reg nerv_matrix_(extra_methods)[] = { {"create", nerv_matrix_(create)}, {"colsum", nerv_matrix_(colsum)}, @@ -271,6 +302,7 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = { {"softmax", nerv_matrix_(softmax)}, {"mul_elem", nerv_matrix_(mul_elem)}, {"log_elem", nerv_matrix_(log_elem)}, + {"copy_rows_fromh_by_idx", nerv_matrix_(copy_rows_fromh_by_idx)}, {NULL, NULL} }; diff --git a/matrix/generic/elem_type.h b/matrix/generic/elem_type.h index 78233a3..2a6ffa8 100644 --- a/matrix/generic/elem_type.h +++ b/matrix/generic/elem_type.h @@ -10,4 +10,10 @@ #define MATRIX_ELEM_FMT "%lf" #define MATRIX_ELEM_PTR(self) ((self)->data.d) +#elif defined(MATRIX_USE_INT) + +#define MATRIX_ELEM long +#define MATRIX_ELEM_FMT "%ld" +#define MATRIX_ELEM_PTR(self) ((self)->data.i) + #endif diff --git a/matrix/generic/matrix.c b/matrix/generic/matrix.c index e0098de..a0f9ecf 100644 --- a/matrix/generic/matrix.c +++ b/matrix/generic/matrix.c @@ -2,9 +2,6 @@ #include "../../common.h" #include "matrix.h" -#define MATRIX_ROW_PTR(self, row) \ - (MATRIX_ELEM *)((char *)MATRIX_ELEM_PTR(self) + (row) * (self)->stride) - extern const char *nerv_matrix_(tname); extern const char *MATRIX_BASE_TNAME; diff --git a/matrix/generic/matrix.h b/matrix/generic/matrix.h index 276ca5c..833724b 100644 --- a/matrix/generic/matrix.h +++ b/matrix/generic/matrix.h @@ -8,8 +8,12 @@ typedef struct Matrix { union { float *f; double *d; + long *i; } data; /* pointer to actual storage */ long *data_ref; } Matrix; +#define MATRIX_ROW_PTR(self, row) \ + (MATRIX_ELEM *)((char *)MATRIX_ELEM_PTR(self) + (row) * (self)->stride) + #endif diff --git a/matrix/init.c b/matrix/init.c index e55558a..b54cd12 100644 --- a/matrix/init.c +++ b/matrix/init.c @@ -35,4 +35,5 @@ void nerv_matrix_init(lua_State *L) { NULL, NULL, NULL); nerv_matrix_host_float_init(L); nerv_matrix_host_double_init(L); + nerv_matrix_host_int_init(L); } diff --git a/matrix/init.lua b/matrix/init.lua index 0075668..057b085 100644 --- a/matrix/init.lua +++ b/matrix/init.lua @@ -2,17 +2,22 @@ function nerv.Matrix:__tostring__() local ncol = self:ncol() local nrow = self:nrow() local strt = {} - + local fmt + if self.fmt then + fmt = self.fmt + else + fmt = "%.10f " + end if nrow == 1 then for col = 0, ncol - 1 do - table.insert(strt, string.format("%f ", self[col])) + table.insert(strt, string.format(fmt, self[col])) end table.insert(strt, "\n") else for row = 0, nrow - 1 do local rp = self[row] for col = 0, ncol - 1 do - table.insert(strt, string.format("%.10f ", rp[col])) + table.insert(strt, string.format(fmt, rp[col])) end table.insert(strt, "\n") end @@ -21,6 +26,8 @@ function nerv.Matrix:__tostring__() return table.concat(strt) end +nerv.MMatrixInt.fmt = "%d " + function nerv.CuMatrix:__add__(b) c = self:create() c:add(self, b, 1.0, 1.0) diff --git a/matrix/mmatrix.c b/matrix/mmatrix.c index b7d7dae..ab15197 100644 --- a/matrix/mmatrix.c +++ b/matrix/mmatrix.c @@ -17,3 +17,17 @@ const char *nerv_matrix_(tname) = "nerv.MMatrixFloat"; #define nerv_matrix_(NAME) nerv_matrix_host_double_##NAME const char *nerv_matrix_(tname) = "nerv.MMatrixDouble"; #include "generic/mmatrix.c" +#undef nerv_matrix_ +#undef host_matrix_ +#undef MATRIX_USE_DOUBLE +#undef MATRIX_ELEM +#undef MATRIX_ELEM_PTR +#undef MATRIX_ELEM_FMT + +#define NERV_GENERIC_MMATRIX +#define MATRIX_USE_INT +#define host_matrix_(NAME) host_matrix_int_##NAME +#define nerv_matrix_(NAME) nerv_matrix_host_int_##NAME +const char *nerv_matrix_(tname) = "nerv.MMatrixInt"; +#include "generic/mmatrix.c" + -- cgit v1.2.3