#define NERV_GENERIC_CUMATRIX
#define MATRIX_CONTEXT CuContext
#include <string.h>
#include <time.h>
#include "../common.h"
#include "cumatrix.h"
#include "cuda_helper.h"
void nerv_cuda_context_print_profile(CuContext *context) {
HashMap *profile = context->profile;
size_t i;
float tmp, tot = 0;
fprintf(stderr, "*** [nerv cumatrix profile] **\n");
for (i = 0; i < profile->size; i++)
{
HashNode *ptr;
for (ptr = profile->bucket[i]; ptr; ptr = ptr->next)
{
tmp = *(float *)ptr->val;
fprintf(stderr, "%s:\t%.6f\n", ptr->key, tmp);
tot += tmp;
}
}
fprintf(stderr, "Total time:\t%.6f\n", tot);
}
void nerv_cuda_context_clear_profile(CuContext *context) {
nerv_hashmap_clear(context->profile);
}
void nerv_cuda_context_accu_profile(CuContext *context,
const char *name, float delta) {
HashMap *profile = context->profile;
float *val = nerv_hashmap_getval(profile, name);
if (!val)
{
val = malloc(sizeof(float));
*val = 0;
nerv_hashmap_setval(profile, name, val);
}
*val += delta;
}
static void new_cuda_handles(CuContext *context, int dev, Status *status) {
if (context->has_handle) return;
CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status);
CUBLAS_SAFE_SYNC_CALL(cublasCreate(&(context->cublas_handle)), status);
CURAND_SAFE_SYNC_CALL(curandCreateGenerator(&(context->curand_gen),
CURAND_RNG_PSEUDO_DEFAULT), status);
CURAND_SAFE_SYNC_CALL(
curandSetPseudoRandomGeneratorSeed(context->curand_gen, time(NULL)),
status);
CUDA_SAFE_SYNC_CALL(cudaEventCreate(&(context->profile_start)), status);
CUDA_SAFE_SYNC_CALL(cudaEventCreate(&(context->profile_stop)), status);
NERV_SET_STATUS(status, NERV_NORMAL, 0);
context->has_handle = 1;
}
static void free_cuda_handles(CuContext *context, Status *status) {
if (!context->has_handle) return;
context->has_handle = 0;
CUBLAS_SAFE_SYNC_CALL(cublasDestroy(context->cublas_handle), status);
CURAND_SAFE_SYNC_CALL(curandDestroyGenerator(context->curand_gen), status);
CUDA_SAFE_SYNC_CALL(cudaEventDestroy(context->profile_start), status);
CUDA_SAFE_SYNC_CALL(cudaEventDestroy(context->profile_stop), status);
NERV_SET_STATUS(status, NERV_NORMAL, 0);
}
static int choose_best_gpu(Status *status) {
int i, n, dev = 0;
float best_ratio = 0;
fprintf(stderr, "*** select a GPU based on available space\n");
CUDA_SAFE_CALL_RET(cudaGetDeviceCount(&n), status);
for (i = 0; i < n; i++)
{
size_t avail, total;
float ratio;
CUDA_SAFE_SYNC_CALL_RET(cudaSetDevice(i), status);
CUDA_SAFE_SYNC_CALL_RET(cuMemGetInfo(&avail, &total), status);
ratio = (float)avail/total * 100;
fprintf(stderr, "* card %d: %.2f%%\n", i, ratio);
if (ratio > best_ratio)
{
best_ratio = ratio;
dev = i;
}
CUDA_SAFE_SYNC_CALL_RET(cudaDeviceReset(), status);
}
fprintf(stderr, "*** final decision: GPU %d\n", dev);
NERV_SET_STATUS(status, NERV_NORMAL, 0);
return dev;
}
CuContext *nerv_cuda_context_create(int dev, Status *status) {
CuContext *context = (CuContext *)malloc(sizeof(CuContext));
context->has_handle = 0; /* this line must come first */
if (dev == -1)
{
dev = choose_best_gpu(status);
if (status->err_code != NERV_NORMAL)
return NULL;
}
new_cuda_handles(context, dev, status);
if (status->err_code != NERV_NORMAL)
return NULL;
context->profile = nerv_hashmap_create(PROFILE_HASHMAP_SIZE, bkdr_hash, strcmp);
NERV_SET_STATUS(status, NERV_NORMAL, 0);
return context;
}
void nerv_cuda_context_destroy(CuContext *context, Status *status) {
free_cuda_handles(context, status);
if (status->err_code != NERV_NORMAL)
return;
nerv_hashmap_destroy(context->profile);
free(context);
NERV_SET_STATUS(status, NERV_NORMAL, 0);
}
void nerv_cuda_context_select_gpu(CuContext *context,
int dev, Status *status) {
/* free_cuda_handles(context, status);
if (status->err_code != NERV_NORMAL)
return;
*/
/* because of cudaDeviceReset */
context->has_handle = 0;
CUDA_SAFE_SYNC_CALL(cudaDeviceReset(), status);
new_cuda_handles(context, dev, status);
if (status->err_code != NERV_NORMAL)
return;
NERV_SET_STATUS(status, NERV_NORMAL, 0);
}
#define MATRIX_USE_FLOAT
#define cuda_matrix_(NAME) cuda_matrix_float_##NAME
#define nerv_matrix_(NAME) nerv_matrix_cuda_float_##NAME
#define cudak_(NAME) cudak_float_ ## NAME
#define NERV_CUBLAS_(NAME) cublasS##NAME
#include "generic/cumatrix.c"
#undef NERV_CUBLAS_
#undef cudak_
#undef nerv_matrix_
#undef cuda_matrix_
#undef MATRIX_USE_FLOAT
#undef MATRIX_ELEM
#undef MATRIX_ELEM_PTR
#undef MATRIX_ELEM_PTR_BASE
#undef MATRIX_ELEM_FMT
#undef MATRIX_ELEM_WRITE_FMT
#define MATRIX_USE_DOUBLE
#define cuda_matrix_(NAME) cuda_matrix_double_##NAME
#define nerv_matrix_(NAME) nerv_matrix_cuda_double_##NAME
#define cudak_(NAME) cudak_double_ ## NAME
#define NERV_CUBLAS_(NAME) cublasD##NAME
#include "generic/cumatrix.c"