aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDeterminant <ted.sybil@gmail.com>2015-05-31 11:18:16 +0800
committerDeterminant <ted.sybil@gmail.com>2015-05-31 11:18:16 +0800
commitab12a9583bdd39884fde9bc2444e6fd1bc5f518e (patch)
tree11d6bf3b4aafd4a07cf78fe8dc921fa5280d8a3e
parentc6f6ac13a1cf00e440e998422f89b42c69b073a6 (diff)
add async copy by index; add MMatrixInt
-rw-r--r--matrix/cuda_helper.h55
-rw-r--r--matrix/generic/cumatrix.c32
-rw-r--r--matrix/generic/elem_type.h6
-rw-r--r--matrix/generic/matrix.c3
-rw-r--r--matrix/generic/matrix.h4
-rw-r--r--matrix/init.c1
-rw-r--r--matrix/init.lua13
-rw-r--r--matrix/mmatrix.c14
8 files changed, 122 insertions, 6 deletions
diff --git a/matrix/cuda_helper.h b/matrix/cuda_helper.h
new file mode 100644
index 0000000..c0fa618
--- /dev/null
+++ b/matrix/cuda_helper.h
@@ -0,0 +1,55 @@
+#ifndef NERV_CUDA_HELPER_H
+#define NERV_CUDA_HELPER_H
+#define CUBLAS_SAFE_CALL(call) \
+ do { \
+ cublasStatus_t err = (call); \
+ if (err != CUBLAS_STATUS_SUCCESS) \
+ nerv_error(L, "cumatrix cublas error: %s", cublasGetErrorString(err)); \
+ } while (0)
+
+#define CUDA_SAFE_CALL(call) \
+ do { \
+ cudaError_t err = (call); \
+ if (err != cudaSuccess) \
+ nerv_error(L, "cumatrix CUDA error: %s", cudaGetErrorString(err)); \
+ } while (0)
+
+#define CUDA_SAFE_SYNC_CALL(call) \
+ do { \
+ CUDA_SAFE_CALL(call); \
+ cudaDeviceSynchronize(); \
+ } while (0)
+
+#define CHECK_SAME_DIMENSION(a, b) \
+ do { \
+ if (!(a->nrow == b->nrow && a->ncol == b->ncol)) \
+ nerv_error(L, "Matrices should be of the same dimension"); \
+ } while (0)
+
+static const char *cublasGetErrorString(cublasStatus_t err) {
+ switch (err)
+ {
+ case CUBLAS_STATUS_SUCCESS:
+ return "CUBLAS_STATUS_SUCCESS";
+ case CUBLAS_STATUS_NOT_INITIALIZED:
+ return "CUBLAS_STATUS_NOT_INITIALIZED";
+ case CUBLAS_STATUS_ALLOC_FAILED:
+ return "CUBLAS_STATUS_ALLOC_FAILED";
+ case CUBLAS_STATUS_INVALID_VALUE:
+ return "CUBLAS_STATUS_INVALID_VALUE";
+ case CUBLAS_STATUS_ARCH_MISMATCH:
+ return "CUBLAS_STATUS_ARCH_MISMATCH";
+ case CUBLAS_STATUS_MAPPING_ERROR:
+ return "CUBLAS_STATUS_MAPPING_ERROR";
+ case CUBLAS_STATUS_EXECUTION_FAILED:
+ return "CUBLAS_STATUS_EXECUTION_FAILED";
+ case CUBLAS_STATUS_INTERNAL_ERROR:
+ return "CUBLAS_STATUS_INTERNAL_ERROR";
+ case CUBLAS_STATUS_NOT_SUPPORTED:
+ return "CUBLAS_STATUS_NOT_SUPPORTED";
+ case CUBLAS_STATUS_LICENSE_ERROR:
+ return "CUBLAS_STATUS_LICENSE_ERROR";
+ }
+ return "<unknown>";
+}
+#endif
diff --git a/matrix/generic/cumatrix.c b/matrix/generic/cumatrix.c
index 7b0aa2a..3bc58d7 100644
--- a/matrix/generic/cumatrix.c
+++ b/matrix/generic/cumatrix.c
@@ -251,6 +251,37 @@ static int nerv_matrix_(log_elem)(lua_State *L) {
return 0;
}
+extern const char *nerv_matrix_host_int_tname;
+static int nerv_matrix_(copy_rows_fromh_by_idx)(lua_State *L) {
+ Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname));
+ Matrix *b = luaT_checkudata(L, 2, MATRIX_CUMATRIX_HOST_TNAME);
+ Matrix *idx = luaT_checkudata(L, 3, nerv_matrix_host_int_tname);
+ long *idx_ptr = idx->data.i;
+ int i;
+ long nrow = a->nrow;
+ if (idx->nrow != 1)
+ nerv_error(L, "index should be a vector");
+ if (idx->ncol != nrow)
+ nerv_error(L, "index dimension mismatch");
+ if (a->ncol != b->ncol)
+ nerv_error(L, "source/destination dimension mismatch");
+ cudaStream_t *streams = (cudaStream_t*)malloc(sizeof(cudaStream_t) * nrow);
+ for (i = 0; i < nrow; i++)
+ {
+ CUDA_SAFE_CALL(cudaStreamCreate(streams + i));
+ CUDA_SAFE_CALL(cudaMemcpyAsync(MATRIX_ROW_PTR(a, i),
+ MATRIX_ROW_PTR(b, idx_ptr[i]),
+ b->stride,
+ cudaMemcpyHostToDevice, streams[i]));
+ }
+ for (i = 0; i < nrow; i++)
+ {
+ CUDA_SAFE_CALL(cudaStreamSynchronize(streams[i]));
+ CUDA_SAFE_CALL(cudaStreamDestroy(streams[i]));
+ }
+ return 0;
+}
+
static const luaL_Reg nerv_matrix_(extra_methods)[] = {
{"create", nerv_matrix_(create)},
{"colsum", nerv_matrix_(colsum)},
@@ -271,6 +302,7 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = {
{"softmax", nerv_matrix_(softmax)},
{"mul_elem", nerv_matrix_(mul_elem)},
{"log_elem", nerv_matrix_(log_elem)},
+ {"copy_rows_fromh_by_idx", nerv_matrix_(copy_rows_fromh_by_idx)},
{NULL, NULL}
};
diff --git a/matrix/generic/elem_type.h b/matrix/generic/elem_type.h
index 78233a3..2a6ffa8 100644
--- a/matrix/generic/elem_type.h
+++ b/matrix/generic/elem_type.h
@@ -10,4 +10,10 @@
#define MATRIX_ELEM_FMT "%lf"
#define MATRIX_ELEM_PTR(self) ((self)->data.d)
+#elif defined(MATRIX_USE_INT)
+
+#define MATRIX_ELEM long
+#define MATRIX_ELEM_FMT "%ld"
+#define MATRIX_ELEM_PTR(self) ((self)->data.i)
+
#endif
diff --git a/matrix/generic/matrix.c b/matrix/generic/matrix.c
index e0098de..a0f9ecf 100644
--- a/matrix/generic/matrix.c
+++ b/matrix/generic/matrix.c
@@ -2,9 +2,6 @@
#include "../../common.h"
#include "matrix.h"
-#define MATRIX_ROW_PTR(self, row) \
- (MATRIX_ELEM *)((char *)MATRIX_ELEM_PTR(self) + (row) * (self)->stride)
-
extern const char *nerv_matrix_(tname);
extern const char *MATRIX_BASE_TNAME;
diff --git a/matrix/generic/matrix.h b/matrix/generic/matrix.h
index 276ca5c..833724b 100644
--- a/matrix/generic/matrix.h
+++ b/matrix/generic/matrix.h
@@ -8,8 +8,12 @@ typedef struct Matrix {
union {
float *f;
double *d;
+ long *i;
} data; /* pointer to actual storage */
long *data_ref;
} Matrix;
+#define MATRIX_ROW_PTR(self, row) \
+ (MATRIX_ELEM *)((char *)MATRIX_ELEM_PTR(self) + (row) * (self)->stride)
+
#endif
diff --git a/matrix/init.c b/matrix/init.c
index e55558a..b54cd12 100644
--- a/matrix/init.c
+++ b/matrix/init.c
@@ -35,4 +35,5 @@ void nerv_matrix_init(lua_State *L) {
NULL, NULL, NULL);
nerv_matrix_host_float_init(L);
nerv_matrix_host_double_init(L);
+ nerv_matrix_host_int_init(L);
}
diff --git a/matrix/init.lua b/matrix/init.lua
index 0075668..057b085 100644
--- a/matrix/init.lua
+++ b/matrix/init.lua
@@ -2,17 +2,22 @@ function nerv.Matrix:__tostring__()
local ncol = self:ncol()
local nrow = self:nrow()
local strt = {}
-
+ local fmt
+ if self.fmt then
+ fmt = self.fmt
+ else
+ fmt = "%.10f "
+ end
if nrow == 1 then
for col = 0, ncol - 1 do
- table.insert(strt, string.format("%f ", self[col]))
+ table.insert(strt, string.format(fmt, self[col]))
end
table.insert(strt, "\n")
else
for row = 0, nrow - 1 do
local rp = self[row]
for col = 0, ncol - 1 do
- table.insert(strt, string.format("%.10f ", rp[col]))
+ table.insert(strt, string.format(fmt, rp[col]))
end
table.insert(strt, "\n")
end
@@ -21,6 +26,8 @@ function nerv.Matrix:__tostring__()
return table.concat(strt)
end
+nerv.MMatrixInt.fmt = "%d "
+
function nerv.CuMatrix:__add__(b)
c = self:create()
c:add(self, b, 1.0, 1.0)
diff --git a/matrix/mmatrix.c b/matrix/mmatrix.c
index b7d7dae..ab15197 100644
--- a/matrix/mmatrix.c
+++ b/matrix/mmatrix.c
@@ -17,3 +17,17 @@ const char *nerv_matrix_(tname) = "nerv.MMatrixFloat";
#define nerv_matrix_(NAME) nerv_matrix_host_double_##NAME
const char *nerv_matrix_(tname) = "nerv.MMatrixDouble";
#include "generic/mmatrix.c"
+#undef nerv_matrix_
+#undef host_matrix_
+#undef MATRIX_USE_DOUBLE
+#undef MATRIX_ELEM
+#undef MATRIX_ELEM_PTR
+#undef MATRIX_ELEM_FMT
+
+#define NERV_GENERIC_MMATRIX
+#define MATRIX_USE_INT
+#define host_matrix_(NAME) host_matrix_int_##NAME
+#define nerv_matrix_(NAME) nerv_matrix_host_int_##NAME
+const char *nerv_matrix_(tname) = "nerv.MMatrixInt";
+#include "generic/mmatrix.c"
+