summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--nerv/layer/affine.lua3
-rw-r--r--nerv/lib/matrix/cuda_helper.h1
-rw-r--r--nerv/lib/matrix/cukernel.h2
-rw-r--r--nerv/lib/matrix/cumatrix.c4
-rw-r--r--nerv/lib/matrix/generic/cukernel.cu6
-rw-r--r--nerv/lib/matrix/generic/cumatrix.h4
-rw-r--r--nerv/lib/matrix/generic/matrix.c8
-rw-r--r--nerv/lib/matrix/matrix.h2
8 files changed, 18 insertions, 12 deletions
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua
index 0d4f7dd..ec13519 100644
--- a/nerv/layer/affine.lua
+++ b/nerv/layer/affine.lua
@@ -80,6 +80,9 @@ function AffineLayer:__init(id, global_conf, layer_conf)
self.id = id
self.dim_in = layer_conf.dim_in
self.dim_out = layer_conf.dim_out
+ if layer_conf.ltp ~= nil and layer_conf.ltp1 == nil then
+ layer_conf.ltp1 = layer_conf.ltp
+ end
for i = 1, #self.dim_in do
self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf,
nerv.LinearTransParam,
diff --git a/nerv/lib/matrix/cuda_helper.h b/nerv/lib/matrix/cuda_helper.h
index 8041efb..13d5728 100644
--- a/nerv/lib/matrix/cuda_helper.h
+++ b/nerv/lib/matrix/cuda_helper.h
@@ -4,6 +4,7 @@
#include "cuda_runtime.h"
#include "driver_types.h"
#include "cublas_v2.h"
+#include "curand.h"
#define CUBLAS_SAFE_SYNC_CALL_RET(call, status) \
do { \
diff --git a/nerv/lib/matrix/cukernel.h b/nerv/lib/matrix/cukernel.h
index 39d42eb..04d0e28 100644
--- a/nerv/lib/matrix/cukernel.h
+++ b/nerv/lib/matrix/cukernel.h
@@ -25,4 +25,6 @@ void cudak_(cuda_scale_rows_by_col)(const Matrix *a, Matrix *b);
void cudak_(cuda_decompress)(const Matrix *a, Matrix *b);
void cudak_(cuda_copy_rows_by_idx)(const Matrix *a, Matrix *b,
const Matrix *idx, int b_begin);
+void cudak_(cuda_copy_rows_by_colidx)(const Matrix *a, Matrix *b,
+ const Matrix *idx, int b_begin);
#endif
diff --git a/nerv/lib/matrix/cumatrix.c b/nerv/lib/matrix/cumatrix.c
index a8ed075..04205e4 100644
--- a/nerv/lib/matrix/cumatrix.c
+++ b/nerv/lib/matrix/cumatrix.c
@@ -2,9 +2,11 @@
#include "../common.h"
#include "cuda_helper.h"
#include <string.h>
+#include <time.h>
#define PROFILE_HASHMAP_SIZE 123457
static cublasHandle_t cublas_handle;
static cudaEvent_t profile_start, profile_stop;
+curandGenerator_t curand_gen;
static HashMap *profile;
void nerv_cumatrix_print_profile() {
@@ -37,6 +39,8 @@ void accu_profile(const char *name, float delta) {
void nerv_cumatrix_init() {
cublasCreate(&cublas_handle);
+ curandCreateGenerator(&curand_gen, CURAND_RNG_PSEUDO_DEFAULT);
+ curandSetPseudoRandomGeneratorSeed(curand_gen, time(NULL));
cudaEventCreate(&profile_start);
cudaEventCreate(&profile_stop);
profile = hashmap_create(PROFILE_HASHMAP_SIZE, bkdr_hash, strcmp);
diff --git a/nerv/lib/matrix/generic/cukernel.cu b/nerv/lib/matrix/generic/cukernel.cu
index 7f780a8..8fbe05d 100644
--- a/nerv/lib/matrix/generic/cukernel.cu
+++ b/nerv/lib/matrix/generic/cukernel.cu
@@ -4,6 +4,7 @@
#include "../matrix.h"
#include "cuda.h"
#include "float.h"
+#include "curand.h"
#define CUDA_THREADS_N 16
#define CUDA_THREADS_NN ((CUDA_THREADS_N) * (CUDA_THREADS_N))
#define CEIL_DIV(a, b) (((a) + (b) - 1) / (b))
@@ -430,12 +431,13 @@ extern "C" {
cudaStreamSynchronize(0);
}
+ extern curandGenerator_t curand_gen;
void cudak_(cuda_rand_uniform)(const Matrix *a) {
#ifdef MATRIX_USE_FLOAT
- curandGenerateUniform(*(a->curand_gen), MATRIX_ELEM_PTR(a), a->nrow * a->stride / sizeof(MATRIX_ELEM));
+ curandGenerateUniform(curand_gen, MATRIX_ELEM_PTR(a), a->nrow * a->stride / sizeof(MATRIX_ELEM));
#endif
#ifdef MATRIX_USE_DOUBLE
- curandGenerateUniformDouble(*(a->curand_gen), MATRIX_ELEM_PTR(a), a->nrow * a->stride / sizeof(MATRIX_ELEM));
+ curandGenerateUniformDouble(curand_gen, MATRIX_ELEM_PTR(a), a->nrow * a->stride / sizeof(MATRIX_ELEM));
#endif
}
diff --git a/nerv/lib/matrix/generic/cumatrix.h b/nerv/lib/matrix/generic/cumatrix.h
index e82dccd..9304060 100644
--- a/nerv/lib/matrix/generic/cumatrix.h
+++ b/nerv/lib/matrix/generic/cumatrix.h
@@ -61,3 +61,7 @@ void nerv_matrix_(scale_rows_by_col)(Matrix *a, const Matrix *b,
Status *status);
void nerv_matrix_(scale_rows_by_row)(Matrix *a, const Matrix *b,
Status *status);
+void nerv_matrix_(thres_mask)(Matrix *a, Matrix *b,
+ double thres, double low, double high,
+ Status *status);
+void nerv_matrix_(rand_uniform)(Matrix *a, Status *status);
diff --git a/nerv/lib/matrix/generic/matrix.c b/nerv/lib/matrix/generic/matrix.c
index 004d9aa..998d107 100644
--- a/nerv/lib/matrix/generic/matrix.c
+++ b/nerv/lib/matrix/generic/matrix.c
@@ -9,8 +9,6 @@ void nerv_matrix_(data_free)(Matrix *self, Status *status) {
{
/* free matrix data */
MATRIX_DATA_FREE(MATRIX_ELEM_PTR_BASE(self), status);
- curandDestroyGenerator(*(self->curand_gen));
- free(self->curand_gen);
free(self->data_ref);
free(self);
}
@@ -40,11 +38,6 @@ Matrix *nerv_matrix_(create)(long nrow, long ncol, Status *status) {
}
self->data_ref = (long *)malloc(sizeof(long));
*self->data_ref = 0;
-
- self->curand_gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t));
- curandCreateGenerator(self->curand_gen, CURAND_RNG_PSEUDO_DEFAULT);
- curandSetPseudoRandomGeneratorSeed(*(self->curand_gen), time(NULL));
-
self->offset = 0;
nerv_matrix_(data_retain)(self);
NERV_SET_STATUS(status, NERV_NORMAL, 0);
@@ -64,7 +57,6 @@ Matrix *nerv_matrix_(getrow)(Matrix *self, int row) {
prow->nmax = prow->ncol;
prow->data = self->data;
prow->data_ref = self->data_ref;
- prow->curand_gen = self->curand_gen;
prow->offset = row * self->stride;
nerv_matrix_(data_retain)(prow);
return prow;
diff --git a/nerv/lib/matrix/matrix.h b/nerv/lib/matrix/matrix.h
index 2562772..35b698f 100644
--- a/nerv/lib/matrix/matrix.h
+++ b/nerv/lib/matrix/matrix.h
@@ -2,7 +2,6 @@
#define NERV_GENERIC_MATRIX_H
#include <stddef.h>
-#include <curand.h>
typedef struct Matrix {
size_t stride; /* size of a row */
@@ -15,7 +14,6 @@ typedef struct Matrix {
} data; /* pointer to actual storage */
unsigned long offset; /* the actual beginning of the matrix */
long *data_ref;
- curandGenerator_t *curand_gen;
} Matrix;
#define MATRIX_ROW_PTR(self, row) \