From ab12a9583bdd39884fde9bc2444e6fd1bc5f518e Mon Sep 17 00:00:00 2001 From: Determinant Date: Sun, 31 May 2015 11:18:16 +0800 Subject: add async copy by index; add MMatrixInt --- matrix/generic/cumatrix.c | 32 ++++++++++++++++++++++++++++++++ matrix/generic/elem_type.h | 6 ++++++ matrix/generic/matrix.c | 3 --- matrix/generic/matrix.h | 4 ++++ 4 files changed, 42 insertions(+), 3 deletions(-) (limited to 'matrix/generic') diff --git a/matrix/generic/cumatrix.c b/matrix/generic/cumatrix.c index 7b0aa2a..3bc58d7 100644 --- a/matrix/generic/cumatrix.c +++ b/matrix/generic/cumatrix.c @@ -251,6 +251,37 @@ static int nerv_matrix_(log_elem)(lua_State *L) { return 0; } +extern const char *nerv_matrix_host_int_tname; +static int nerv_matrix_(copy_rows_fromh_by_idx)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 2, MATRIX_CUMATRIX_HOST_TNAME); + Matrix *idx = luaT_checkudata(L, 3, nerv_matrix_host_int_tname); + long *idx_ptr = idx->data.i; + int i; + long nrow = a->nrow; + if (idx->nrow != 1) + nerv_error(L, "index should be a vector"); + if (idx->ncol != nrow) + nerv_error(L, "index dimension mismatch"); + if (a->ncol != b->ncol) + nerv_error(L, "source/destination dimension mismatch"); + cudaStream_t *streams = (cudaStream_t*)malloc(sizeof(cudaStream_t) * nrow); + for (i = 0; i < nrow; i++) + { + CUDA_SAFE_CALL(cudaStreamCreate(streams + i)); + CUDA_SAFE_CALL(cudaMemcpyAsync(MATRIX_ROW_PTR(a, i), + MATRIX_ROW_PTR(b, idx_ptr[i]), + b->stride, + cudaMemcpyHostToDevice, streams[i])); + } + for (i = 0; i < nrow; i++) + { + CUDA_SAFE_CALL(cudaStreamSynchronize(streams[i])); + CUDA_SAFE_CALL(cudaStreamDestroy(streams[i])); + } + return 0; +} + static const luaL_Reg nerv_matrix_(extra_methods)[] = { {"create", nerv_matrix_(create)}, {"colsum", nerv_matrix_(colsum)}, @@ -271,6 +302,7 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = { {"softmax", nerv_matrix_(softmax)}, {"mul_elem", nerv_matrix_(mul_elem)}, {"log_elem", nerv_matrix_(log_elem)}, + {"copy_rows_fromh_by_idx", nerv_matrix_(copy_rows_fromh_by_idx)}, {NULL, NULL} }; diff --git a/matrix/generic/elem_type.h b/matrix/generic/elem_type.h index 78233a3..2a6ffa8 100644 --- a/matrix/generic/elem_type.h +++ b/matrix/generic/elem_type.h @@ -10,4 +10,10 @@ #define MATRIX_ELEM_FMT "%lf" #define MATRIX_ELEM_PTR(self) ((self)->data.d) +#elif defined(MATRIX_USE_INT) + +#define MATRIX_ELEM long +#define MATRIX_ELEM_FMT "%ld" +#define MATRIX_ELEM_PTR(self) ((self)->data.i) + #endif diff --git a/matrix/generic/matrix.c b/matrix/generic/matrix.c index e0098de..a0f9ecf 100644 --- a/matrix/generic/matrix.c +++ b/matrix/generic/matrix.c @@ -2,9 +2,6 @@ #include "../../common.h" #include "matrix.h" -#define MATRIX_ROW_PTR(self, row) \ - (MATRIX_ELEM *)((char *)MATRIX_ELEM_PTR(self) + (row) * (self)->stride) - extern const char *nerv_matrix_(tname); extern const char *MATRIX_BASE_TNAME; diff --git a/matrix/generic/matrix.h b/matrix/generic/matrix.h index 276ca5c..833724b 100644 --- a/matrix/generic/matrix.h +++ b/matrix/generic/matrix.h @@ -8,8 +8,12 @@ typedef struct Matrix { union { float *f; double *d; + long *i; } data; /* pointer to actual storage */ long *data_ref; } Matrix; +#define MATRIX_ROW_PTR(self, row) \ + (MATRIX_ELEM *)((char *)MATRIX_ELEM_PTR(self) + (row) * (self)->stride) + #endif -- cgit v1.2.3