aboutsummaryrefslogtreecommitdiff
path: root/matrix/generic
diff options
context:
space:
mode:
Diffstat (limited to 'matrix/generic')
-rw-r--r--matrix/generic/cumatrix.c32
-rw-r--r--matrix/generic/elem_type.h6
-rw-r--r--matrix/generic/matrix.c3
-rw-r--r--matrix/generic/matrix.h4
4 files changed, 42 insertions, 3 deletions
diff --git a/matrix/generic/cumatrix.c b/matrix/generic/cumatrix.c
index 7b0aa2a..3bc58d7 100644
--- a/matrix/generic/cumatrix.c
+++ b/matrix/generic/cumatrix.c
@@ -251,6 +251,37 @@ static int nerv_matrix_(log_elem)(lua_State *L) {
return 0;
}
+extern const char *nerv_matrix_host_int_tname;
+static int nerv_matrix_(copy_rows_fromh_by_idx)(lua_State *L) {
+ Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname));
+ Matrix *b = luaT_checkudata(L, 2, MATRIX_CUMATRIX_HOST_TNAME);
+ Matrix *idx = luaT_checkudata(L, 3, nerv_matrix_host_int_tname);
+ long *idx_ptr = idx->data.i;
+ int i;
+ long nrow = a->nrow;
+ if (idx->nrow != 1)
+ nerv_error(L, "index should be a vector");
+ if (idx->ncol != nrow)
+ nerv_error(L, "index dimension mismatch");
+ if (a->ncol != b->ncol)
+ nerv_error(L, "source/destination dimension mismatch");
+ cudaStream_t *streams = (cudaStream_t*)malloc(sizeof(cudaStream_t) * nrow);
+ for (i = 0; i < nrow; i++)
+ {
+ CUDA_SAFE_CALL(cudaStreamCreate(streams + i));
+ CUDA_SAFE_CALL(cudaMemcpyAsync(MATRIX_ROW_PTR(a, i),
+ MATRIX_ROW_PTR(b, idx_ptr[i]),
+ b->stride,
+ cudaMemcpyHostToDevice, streams[i]));
+ }
+ for (i = 0; i < nrow; i++)
+ {
+ CUDA_SAFE_CALL(cudaStreamSynchronize(streams[i]));
+ CUDA_SAFE_CALL(cudaStreamDestroy(streams[i]));
+ }
+ return 0;
+}
+
static const luaL_Reg nerv_matrix_(extra_methods)[] = {
{"create", nerv_matrix_(create)},
{"colsum", nerv_matrix_(colsum)},
@@ -271,6 +302,7 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = {
{"softmax", nerv_matrix_(softmax)},
{"mul_elem", nerv_matrix_(mul_elem)},
{"log_elem", nerv_matrix_(log_elem)},
+ {"copy_rows_fromh_by_idx", nerv_matrix_(copy_rows_fromh_by_idx)},
{NULL, NULL}
};
diff --git a/matrix/generic/elem_type.h b/matrix/generic/elem_type.h
index 78233a3..2a6ffa8 100644
--- a/matrix/generic/elem_type.h
+++ b/matrix/generic/elem_type.h
@@ -10,4 +10,10 @@
#define MATRIX_ELEM_FMT "%lf"
#define MATRIX_ELEM_PTR(self) ((self)->data.d)
+#elif defined(MATRIX_USE_INT)
+
+#define MATRIX_ELEM long
+#define MATRIX_ELEM_FMT "%ld"
+#define MATRIX_ELEM_PTR(self) ((self)->data.i)
+
#endif
diff --git a/matrix/generic/matrix.c b/matrix/generic/matrix.c
index e0098de..a0f9ecf 100644
--- a/matrix/generic/matrix.c
+++ b/matrix/generic/matrix.c
@@ -2,9 +2,6 @@
#include "../../common.h"
#include "matrix.h"
-#define MATRIX_ROW_PTR(self, row) \
- (MATRIX_ELEM *)((char *)MATRIX_ELEM_PTR(self) + (row) * (self)->stride)
-
extern const char *nerv_matrix_(tname);
extern const char *MATRIX_BASE_TNAME;
diff --git a/matrix/generic/matrix.h b/matrix/generic/matrix.h
index 276ca5c..833724b 100644
--- a/matrix/generic/matrix.h
+++ b/matrix/generic/matrix.h
@@ -8,8 +8,12 @@ typedef struct Matrix {
union {
float *f;
double *d;
+ long *i;
} data; /* pointer to actual storage */
long *data_ref;
} Matrix;
+#define MATRIX_ROW_PTR(self, row) \
+ (MATRIX_ELEM *)((char *)MATRIX_ELEM_PTR(self) + (row) * (self)->stride)
+
#endif