aboutsummaryrefslogtreecommitdiff
path: root/nerv/lib/matrix/cumatrix.c
blob: d998871074bbdc663f69802d3119f502d7a85f92 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#define NERV_GENERIC_CUMATRIX
#include "cumatrix.h"
#include "cuda_helper.h"
#include <string.h>
#include <time.h>
#define PROFILE_HASHMAP_SIZE 123457
static cublasHandle_t cublas_handle;
static cudaEvent_t profile_start, profile_stop;
curandGenerator_t curand_gen;
static HashMap *profile;

void nerv_cumatrix_select_gpu(int dev, Status *status) {
    fprintf(stderr, "** selecting GPU %d\n", dev);
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
    CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status);
    CUDA_SAFE_SYNC_CALL(cublasDestroy(cublas_handle), status);
    CUDA_SAFE_SYNC_CALL(cublasCreate(&cublas_handle), status);
}

void nerv_cumatrix_print_profile() {
    size_t i;
    fprintf(stderr, "*** [nerv cumatrix profile] **\n");
    for (i = 0; i < profile->size; i++)
    {
        HashNode *ptr;
        for (ptr = profile->bucket[i]; ptr; ptr = ptr->next)
        {
            fprintf(stderr, "%s:\t%.6f\n", ptr->key, *(float *)ptr->val);
        }
    }
}

void nerv_cumatrix_clear_profile() {
    hashmap_clear(profile);
}

void accu_profile(const char *name, float delta) {
    float *val = hashmap_getval(profile, name);
    if (!val)
    {
        val = malloc(sizeof(float));
        *val = 0;
        hashmap_setval(profile, name, val);
    }
    *val += delta;
}

void nerv_cumatrix_init() {
    cublasCreate(&cublas_handle);
    curandCreateGenerator(&curand_gen, CURAND_RNG_PSEUDO_DEFAULT);
    curandSetPseudoRandomGeneratorSeed(curand_gen, time(NULL));
    cudaEventCreate(&profile_start);
    cudaEventCreate(&profile_stop);
    profile = hashmap_create(PROFILE_HASHMAP_SIZE, bkdr_hash, strcmp);
}

#define MATRIX_USE_FLOAT
#define cuda_matrix_(NAME) cuda_matrix_float_##NAME
#define nerv_matrix_(NAME) nerv_matrix_cuda_float_##NAME
#define cudak_(NAME) cudak_float_ ## NAME
#define NERV_CUBLAS_(NAME) cublasS##NAME
#define MATRIX_CUMATRIX_HOST_TNAME nerv_matrix_host_float_tname
#include "generic/cumatrix.c"

#undef NERV_CUBLAS_
#undef cudak_
#undef nerv_matrix_
#undef cuda_matrix_
#undef MATRIX_USE_FLOAT
#undef MATRIX_ELEM
#undef MATRIX_ELEM_PTR
#undef MATRIX_ELEM_PTR_BASE
#undef MATRIX_ELEM_FMT
#undef MATRIX_ELEM_WRITE_FMT
#undef MATRIX_CUMATRIX_HOST_TNAME

#define MATRIX_USE_DOUBLE
#define cuda_matrix_(NAME) cuda_matrix_double_##NAME
#define nerv_matrix_(NAME) nerv_matrix_cuda_double_##NAME
#define cudak_(NAME) cudak_double_ ## NAME
#define NERV_CUBLAS_(NAME) cublasD##NAME
#define MATRIX_CUMATRIX_HOST_TNAME nerv_matrix_host_double_tname
#include "generic/cumatrix.c"