summaryrefslogtreecommitdiff
path: root/matrix
diff options
context:
space:
mode:
Diffstat (limited to 'matrix')
-rw-r--r--matrix/cumatrix.c92
-rw-r--r--matrix/generic/matrix.c26
-rw-r--r--matrix/generic/matrix.h5
-rw-r--r--matrix/matrix.c18
4 files changed, 124 insertions, 17 deletions
diff --git a/matrix/cumatrix.c b/matrix/cumatrix.c
index 87ad57b..9c2878a 100644
--- a/matrix/cumatrix.c
+++ b/matrix/cumatrix.c
@@ -1,11 +1,91 @@
-#define MATRIX_DATA_FREE(ptr) free(ptr)
-#define MATRIX_DATA_ALLOC(size) malloc(size)
-#define MATRIX_DATA_STRIDE(ncol) (sizeof(float) * (ncol))
-#define MATRIX_GENERIC
+#define MATRIX_DATA_FREE(ptr) cuda_float_array_free(ptr)
+#define MATRIX_DATA_ALLOC(dptr, stride, width, height) cuda_float_array_alloc(dptr, stride, width, height)
+#define MATRIX_DATA_WRITE(data, idx, val) cuda_float_array_write(data, idx, val)
+#define MATRIX_DATA_READ(data, idx) cuda_float_array_read(data, idx)
+#define MATRIX_INIT(L) cuda_float_init(L)
+#define NERV_GENERIC_MATRIX
#define nerv_float_matrix_(NAME) nerv_float_matrix_cuda_ ## NAME
-#include "generic/matrix.c"
+#include "../common.h"
+#include "generic/matrix.h"
+#include "cuda.h"
+#include "driver_types.h"
+#include "cublas_v2.h"
const char *nerv_float_matrix_(tname) = "nerv.FloatCuMatrix";
+static cublasHandle_t cublas_handle;
+
+Matrix *nerv_float_matrix_(new_)(long nrow, long ncol);
+static int nerv_float_matrix_(add)(lua_State *L) {
+ Matrix *a = luaT_checkudata(L, 1, nerv_float_matrix_(tname));
+ Matrix *b = luaT_checkudata(L, 2, nerv_float_matrix_(tname));
+ Matrix *c;
+ long nrow, ncol;
+ if (!(a->nrow == b->nrow && a->ncol == b->ncol))
+ nerv_error(L, "Matrices should be of the same dimension");
+ nrow = a->nrow;
+ ncol = a->ncol;
+ c = nerv_float_matrix_(new_)(nrow, ncol);
+ float alpha = 1.0f, beta = 1.0f;
+ cublasSgeam(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N,
+ ncol, nrow,
+ &alpha,
+ a->data.f, a->stride / sizeof(float),
+ &beta,
+ b->data.f, b->stride / sizeof(float),
+ c->data.f, c->stride / sizeof(float));
+ luaT_pushudata(L, c, nerv_float_matrix_(tname));
+ return 1;
+}
+
+static int nerv_float_matrix_(mul)(lua_State *L) {
+ Matrix *a = luaT_checkudata(L, 1, nerv_float_matrix_(tname));
+ Matrix *b = luaT_checkudata(L, 2, nerv_float_matrix_(tname));
+ Matrix *c;
+ if (a->ncol != b->nrow)
+ nerv_error(L, "Wrong dimension of multipliers");
+ c = nerv_float_matrix_(new_)(a->nrow, b->ncol);
+ float alpha = 1.0f, beta = 0.0f;
+ cublasSgemm(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N,
+ b->ncol, a->nrow, b->nrow,
+ &alpha,
+ b->data.f, b->stride / sizeof(float),
+ a->data.f, a->stride / sizeof(float),
+ &beta,
+ c->data.f, c->stride / sizeof(float));
+ luaT_pushudata(L, c, nerv_float_matrix_(tname));
+ return 1;
+}
+
+static const luaL_Reg nerv_float_matrix_(extra_methods)[] = {
+ {"__add__", nerv_float_matrix_(add)},
+ {"__mul__", nerv_float_matrix_(mul)},
+ {NULL, NULL}
+};
+
+static void cuda_float_init(lua_State *L) {
+ luaN_append_methods(L, nerv_float_matrix_(extra_methods));
+ cublasCreate(&cublas_handle);
+}
+
+static cuda_float_array_free(float *ptr) {
+ cudaFree(ptr);
+}
+
+static cuda_float_array_alloc(float **dptr, long *stride,
+ long width, long height) {
+ cudaMallocPitch(dptr, stride, width, height);
+}
+
+static float cuda_float_array_read(float *data, int idx) {
+ float res;
+ cudaMemcpy(&res, data + idx, sizeof(float), cudaMemcpyDeviceToHost);
+ return res;
+}
+
+static void cuda_float_array_write(float *data, int idx, float val) {
+ cudaMemcpy(data + idx, &val, sizeof(float), cudaMemcpyHostToDevice);
+}
+
int nerv_float_matrix_(get_elem)(lua_State *L) {
return nerv_error_method_not_implemented(L);
}
@@ -13,3 +93,5 @@ int nerv_float_matrix_(get_elem)(lua_State *L) {
int nerv_float_matrix_(set_elem)(lua_State *L) {
return nerv_error_method_not_implemented(L);
}
+
+#include "generic/matrix.c"
diff --git a/matrix/generic/matrix.c b/matrix/generic/matrix.c
index 29919d8..9ced397 100644
--- a/matrix/generic/matrix.c
+++ b/matrix/generic/matrix.c
@@ -1,4 +1,4 @@
-#ifdef MATRIX_GENERIC
+#ifdef NERV_GENERIC_MATRIX
#include "../../common.h"
#include "matrix.h"
@@ -14,17 +14,22 @@ void nerv_float_matrix_(data_retain)(Matrix *self) {
(*self->data_ref)++;
}
-int nerv_float_matrix_(new)(lua_State *L) {
+Matrix *nerv_float_matrix_(new_)(long nrow, long ncol) {
Matrix *self = (Matrix *)malloc(sizeof(Matrix));
- self->nrow = luaL_checkinteger(L, 1);
- self->ncol = luaL_checkinteger(L, 2);
+ self->nrow = nrow;
+ self->ncol = ncol;
self->nmax = self->nrow * self->ncol;
- self->stride = MATRIX_DATA_STRIDE(self->ncol);
- self->data.f = MATRIX_DATA_ALLOC(self->stride * self->nrow);
+ MATRIX_DATA_ALLOC(&self->data.f, &self->stride, sizeof(float) * self->ncol, self->nrow);
self->data_ref = (long *)malloc(sizeof(long));
*self->data_ref = 0;
nerv_float_matrix_(data_retain)(self);
- luaT_pushudata(L, self, nerv_float_matrix_(tname));
+ return self;
+}
+
+int nerv_float_matrix_(new)(lua_State *L) {
+ luaT_pushudata(L, nerv_float_matrix_(new_)(luaL_checkinteger(L, 1),
+ luaL_checkinteger(L, 2)),
+ nerv_float_matrix_(tname));
return 1;
}
@@ -58,7 +63,7 @@ static int nerv_float_matrix_(newindex)(lua_State *L) {
{
if (idx < 0 || idx >= self->ncol)
nerv_error(L, "index must be within range [0, %d)", self->ncol);
- self->data.f[idx] = luaL_checknumber(L, 3);
+ MATRIX_DATA_WRITE(self->data.f, idx, luaL_checknumber(L, 3));
}
else
nerv_error(L, "cannot assign a scalar to row vector");
@@ -82,7 +87,7 @@ static int nerv_float_matrix_(index)(lua_State *L) {
{
if (idx < 0 || idx >= self->ncol)
nerv_error(L, "index must be within range [0, %d)", self->ncol);
- lua_pushnumber(L, self->data.f[idx]);
+ lua_pushnumber(L, MATRIX_DATA_READ(self->data.f, idx));
}
else
{
@@ -127,6 +132,9 @@ void nerv_float_matrix_(init)(lua_State *L) {
luaT_newmetatable(L, nerv_float_matrix_(tname), nerv_matrix_tname,
nerv_float_matrix_(new), nerv_float_matrix_(destroy), NULL);
luaL_register(L, NULL, nerv_float_matrix_(methods));
+#ifdef MATRIX_INIT
+ MATRIX_INIT(L);
+#endif
lua_pop(L, 1);
}
#endif
diff --git a/matrix/generic/matrix.h b/matrix/generic/matrix.h
index d02b56e..655ff3d 100644
--- a/matrix/generic/matrix.h
+++ b/matrix/generic/matrix.h
@@ -1,3 +1,6 @@
+#ifndef NERV_GENERIC_MATRIX_H
+#define NERV_GENERIC_MATRIX_H
+
typedef struct Matrix {
long stride; /* size of a row */
long ncol, nrow, nmax; /* dimension of the matrix */
@@ -7,3 +10,5 @@ typedef struct Matrix {
} data; /* pointer to actual storage */
long *data_ref;
} Matrix;
+
+#endif
diff --git a/matrix/matrix.c b/matrix/matrix.c
index 0e5f75f..ef311d6 100644
--- a/matrix/matrix.c
+++ b/matrix/matrix.c
@@ -1,11 +1,21 @@
#define MATRIX_DATA_FREE(ptr) free(ptr)
-#define MATRIX_DATA_ALLOC(size) malloc(size)
+#define MATRIX_DATA_ALLOC(dptr, stride, width, height) host_float_array_alloc(dptr, stride, width, height)
#define MATRIX_DATA_STRIDE(ncol) (sizeof(float) * (ncol))
-#define MATRIX_GENERIC
+#define MATRIX_DATA_WRITE(data, idx, val) (data[idx] = val)
+#define MATRIX_DATA_READ(data, idx) (data[idx])
+#define NERV_GENERIC_MATRIX
#define nerv_float_matrix_(NAME) nerv_float_matrix_host_ ## NAME
-#include "generic/matrix.c"
+#include "../common.h"
+#include "generic/matrix.h"
const char *nerv_float_matrix_(tname) = "nerv.FloatMatrix";
+
+static void host_float_array_alloc(float **dptr, long *stride,
+ long width, long height) {
+ *dptr = (float *)malloc(width * height);
+ *stride = width;
+}
+
int nerv_float_matrix_(get_elem)(lua_State *L) {
Matrix *self = luaT_checkudata(L, 1, nerv_float_matrix_(tname));
int idx = luaL_checkinteger(L, 2);
@@ -25,3 +35,5 @@ int nerv_float_matrix_(set_elem)(lua_State *L) {
self->data.f[idx] = v;
return 0;
}
+
+#include "generic/matrix.c"