#define NERV_GENERIC_CUMATRIX #define MATRIX_CONTEXT CuContext #include "cumatrix.h" #include "cuda_helper.h" #include #include void nerv_cuda_context_print_profile(CuContext *context) { HashMap *profile = context->profile; size_t i; fprintf(stderr, "*** [nerv cumatrix profile] **\n"); for (i = 0; i < profile->size; i++) { HashNode *ptr; for (ptr = profile->bucket[i]; ptr; ptr = ptr->next) { fprintf(stderr, "%s:\t%.6f\n", ptr->key, *(float *)ptr->val); } } } void nerv_cuda_context_clear_profile(CuContext *context) { nerv_hashmap_clear(context->profile); } void nerv_cuda_context_accu_profile(CuContext *context, const char *name, float delta) { HashMap *profile = context->profile; float *val = nerv_hashmap_getval(profile, name); if (!val) { val = malloc(sizeof(float)); *val = 0; nerv_hashmap_setval(profile, name, val); } *val += delta; } static void new_cuda_handles(CuContext *context, Status *status) { CUBLAS_SAFE_SYNC_CALL(cublasCreate(&(context->cublas_handle)), status); CURAND_SAFE_SYNC_CALL(curandCreateGenerator(&(context->curand_gen), CURAND_RNG_PSEUDO_DEFAULT), status); CURAND_SAFE_SYNC_CALL( curandSetPseudoRandomGeneratorSeed(context->curand_gen, time(NULL)), status); CUDA_SAFE_SYNC_CALL(cudaEventCreate(&(context->profile_start)), status); CUDA_SAFE_SYNC_CALL(cudaEventCreate(&(context->profile_stop)), status); NERV_SET_STATUS(status, NERV_NORMAL, 0); } static void free_cuda_handles(CuContext *context, Status *status) { CUBLAS_SAFE_SYNC_CALL(cublasDestroy(context->cublas_handle), status); CURAND_SAFE_SYNC_CALL(curandDestroyGenerator(context->curand_gen), status); CUDA_SAFE_SYNC_CALL(cudaEventDestroy(context->profile_start), status); CUDA_SAFE_SYNC_CALL(cudaEventDestroy(context->profile_stop), status); NERV_SET_STATUS(status, NERV_NORMAL, 0); } CuContext *nerv_cuda_context_create(Status *status) { CuContext *context = (CuContext *)malloc(sizeof(CuContext)); new_cuda_handles(context, status); if (status->err_code != NERV_NORMAL) return NULL; context->profile = nerv_hashmap_create(PROFILE_HASHMAP_SIZE, bkdr_hash, strcmp); NERV_SET_STATUS(status, NERV_NORMAL, 0); return context; } void nerv_cuda_context_destroy(CuContext *context, Status *status) { free_cuda_handles(context, status); if (status->err_code != NERV_NORMAL) return; nerv_hashmap_destroy(context->profile); free(context); NERV_SET_STATUS(status, NERV_NORMAL, 0); } void nerv_cuda_context_select_gpu(CuContext *context, int dev, Status *status) { free_cuda_handles(context, status); if (status->err_code != NERV_NORMAL) return; CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status); new_cuda_handles(context, status); if (status->err_code != NERV_NORMAL) return; NERV_SET_STATUS(status, NERV_NORMAL, 0); } #define MATRIX_USE_FLOAT #define cuda_matrix_(NAME) cuda_matrix_float_##NAME #define nerv_matrix_(NAME) nerv_matrix_cuda_float_##NAME #define cudak_(NAME) cudak_float_ ## NAME #define NERV_CUBLAS_(NAME) cublasS##NAME #include "generic/cumatrix.c" #undef NERV_CUBLAS_ #undef cudak_ #undef nerv_matrix_ #undef cuda_matrix_ #undef MATRIX_USE_FLOAT #undef MATRIX_ELEM #undef MATRIX_ELEM_PTR #undef MATRIX_ELEM_PTR_BASE #undef MATRIX_ELEM_FMT #undef MATRIX_ELEM_WRITE_FMT #define MATRIX_USE_DOUBLE #define cuda_matrix_(NAME) cuda_matrix_double_##NAME #define nerv_matrix_(NAME) nerv_matrix_cuda_double_##NAME #define cudak_(NAME) cudak_double_ ## NAME #define NERV_CUBLAS_(NAME) cublasD##NAME #include "generic/cumatrix.c"