From a7608a67f16f745309409f6a004354121e0b3ba6 Mon Sep 17 00:00:00 2001 From: Determinant Date: Mon, 15 Feb 2016 16:18:13 +0800 Subject: move curand_gen out of the matrix layout --- nerv/layer/affine.lua | 3 +++ nerv/lib/matrix/cuda_helper.h | 1 + nerv/lib/matrix/cukernel.h | 2 ++ nerv/lib/matrix/cumatrix.c | 4 ++++ nerv/lib/matrix/generic/cukernel.cu | 6 ++++-- nerv/lib/matrix/generic/cumatrix.h | 4 ++++ nerv/lib/matrix/generic/matrix.c | 8 -------- nerv/lib/matrix/matrix.h | 2 -- 8 files changed, 18 insertions(+), 12 deletions(-) diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index 0d4f7dd..ec13519 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -80,6 +80,9 @@ function AffineLayer:__init(id, global_conf, layer_conf) self.id = id self.dim_in = layer_conf.dim_in self.dim_out = layer_conf.dim_out + if layer_conf.ltp ~= nil and layer_conf.ltp1 == nil then + layer_conf.ltp1 = layer_conf.ltp + end for i = 1, #self.dim_in do self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf, nerv.LinearTransParam, diff --git a/nerv/lib/matrix/cuda_helper.h b/nerv/lib/matrix/cuda_helper.h index 8041efb..13d5728 100644 --- a/nerv/lib/matrix/cuda_helper.h +++ b/nerv/lib/matrix/cuda_helper.h @@ -4,6 +4,7 @@ #include "cuda_runtime.h" #include "driver_types.h" #include "cublas_v2.h" +#include "curand.h" #define CUBLAS_SAFE_SYNC_CALL_RET(call, status) \ do { \ diff --git a/nerv/lib/matrix/cukernel.h b/nerv/lib/matrix/cukernel.h index 39d42eb..04d0e28 100644 --- a/nerv/lib/matrix/cukernel.h +++ b/nerv/lib/matrix/cukernel.h @@ -25,4 +25,6 @@ void cudak_(cuda_scale_rows_by_col)(const Matrix *a, Matrix *b); void cudak_(cuda_decompress)(const Matrix *a, Matrix *b); void cudak_(cuda_copy_rows_by_idx)(const Matrix *a, Matrix *b, const Matrix *idx, int b_begin); +void cudak_(cuda_copy_rows_by_colidx)(const Matrix *a, Matrix *b, + const Matrix *idx, int b_begin); #endif diff --git a/nerv/lib/matrix/cumatrix.c b/nerv/lib/matrix/cumatrix.c index a8ed075..04205e4 100644 --- a/nerv/lib/matrix/cumatrix.c +++ b/nerv/lib/matrix/cumatrix.c @@ -2,9 +2,11 @@ #include "../common.h" #include "cuda_helper.h" #include +#include #define PROFILE_HASHMAP_SIZE 123457 static cublasHandle_t cublas_handle; static cudaEvent_t profile_start, profile_stop; +curandGenerator_t curand_gen; static HashMap *profile; void nerv_cumatrix_print_profile() { @@ -37,6 +39,8 @@ void accu_profile(const char *name, float delta) { void nerv_cumatrix_init() { cublasCreate(&cublas_handle); + curandCreateGenerator(&curand_gen, CURAND_RNG_PSEUDO_DEFAULT); + curandSetPseudoRandomGeneratorSeed(curand_gen, time(NULL)); cudaEventCreate(&profile_start); cudaEventCreate(&profile_stop); profile = hashmap_create(PROFILE_HASHMAP_SIZE, bkdr_hash, strcmp); diff --git a/nerv/lib/matrix/generic/cukernel.cu b/nerv/lib/matrix/generic/cukernel.cu index 7f780a8..8fbe05d 100644 --- a/nerv/lib/matrix/generic/cukernel.cu +++ b/nerv/lib/matrix/generic/cukernel.cu @@ -4,6 +4,7 @@ #include "../matrix.h" #include "cuda.h" #include "float.h" +#include "curand.h" #define CUDA_THREADS_N 16 #define CUDA_THREADS_NN ((CUDA_THREADS_N) * (CUDA_THREADS_N)) #define CEIL_DIV(a, b) (((a) + (b) - 1) / (b)) @@ -430,12 +431,13 @@ extern "C" { cudaStreamSynchronize(0); } + extern curandGenerator_t curand_gen; void cudak_(cuda_rand_uniform)(const Matrix *a) { #ifdef MATRIX_USE_FLOAT - curandGenerateUniform(*(a->curand_gen), MATRIX_ELEM_PTR(a), a->nrow * a->stride / sizeof(MATRIX_ELEM)); + curandGenerateUniform(curand_gen, MATRIX_ELEM_PTR(a), a->nrow * a->stride / sizeof(MATRIX_ELEM)); #endif #ifdef MATRIX_USE_DOUBLE - curandGenerateUniformDouble(*(a->curand_gen), MATRIX_ELEM_PTR(a), a->nrow * a->stride / sizeof(MATRIX_ELEM)); + curandGenerateUniformDouble(curand_gen, MATRIX_ELEM_PTR(a), a->nrow * a->stride / sizeof(MATRIX_ELEM)); #endif } diff --git a/nerv/lib/matrix/generic/cumatrix.h b/nerv/lib/matrix/generic/cumatrix.h index e82dccd..9304060 100644 --- a/nerv/lib/matrix/generic/cumatrix.h +++ b/nerv/lib/matrix/generic/cumatrix.h @@ -61,3 +61,7 @@ void nerv_matrix_(scale_rows_by_col)(Matrix *a, const Matrix *b, Status *status); void nerv_matrix_(scale_rows_by_row)(Matrix *a, const Matrix *b, Status *status); +void nerv_matrix_(thres_mask)(Matrix *a, Matrix *b, + double thres, double low, double high, + Status *status); +void nerv_matrix_(rand_uniform)(Matrix *a, Status *status); diff --git a/nerv/lib/matrix/generic/matrix.c b/nerv/lib/matrix/generic/matrix.c index 004d9aa..998d107 100644 --- a/nerv/lib/matrix/generic/matrix.c +++ b/nerv/lib/matrix/generic/matrix.c @@ -9,8 +9,6 @@ void nerv_matrix_(data_free)(Matrix *self, Status *status) { { /* free matrix data */ MATRIX_DATA_FREE(MATRIX_ELEM_PTR_BASE(self), status); - curandDestroyGenerator(*(self->curand_gen)); - free(self->curand_gen); free(self->data_ref); free(self); } @@ -40,11 +38,6 @@ Matrix *nerv_matrix_(create)(long nrow, long ncol, Status *status) { } self->data_ref = (long *)malloc(sizeof(long)); *self->data_ref = 0; - - self->curand_gen = (curandGenerator_t*)malloc(sizeof(curandGenerator_t)); - curandCreateGenerator(self->curand_gen, CURAND_RNG_PSEUDO_DEFAULT); - curandSetPseudoRandomGeneratorSeed(*(self->curand_gen), time(NULL)); - self->offset = 0; nerv_matrix_(data_retain)(self); NERV_SET_STATUS(status, NERV_NORMAL, 0); @@ -64,7 +57,6 @@ Matrix *nerv_matrix_(getrow)(Matrix *self, int row) { prow->nmax = prow->ncol; prow->data = self->data; prow->data_ref = self->data_ref; - prow->curand_gen = self->curand_gen; prow->offset = row * self->stride; nerv_matrix_(data_retain)(prow); return prow; diff --git a/nerv/lib/matrix/matrix.h b/nerv/lib/matrix/matrix.h index 2562772..35b698f 100644 --- a/nerv/lib/matrix/matrix.h +++ b/nerv/lib/matrix/matrix.h @@ -2,7 +2,6 @@ #define NERV_GENERIC_MATRIX_H #include -#include typedef struct Matrix { size_t stride; /* size of a row */ @@ -15,7 +14,6 @@ typedef struct Matrix { } data; /* pointer to actual storage */ unsigned long offset; /* the actual beginning of the matrix */ long *data_ref; - curandGenerator_t *curand_gen; } Matrix; #define MATRIX_ROW_PTR(self, row) \ -- cgit v1.2.3-70-g09d2