From e91fc2ddaa74dd2c46ce93c9e92020d66c037c8e Mon Sep 17 00:00:00 2001 From: Determinant Date: Wed, 24 Feb 2016 16:58:32 +0800 Subject: add CuContext/MContext --- nerv/lib/matrix/generic/cumatrix.c | 149 +++++++++++++++++++++---------------- 1 file changed, 85 insertions(+), 64 deletions(-) (limited to 'nerv/lib/matrix/generic/cumatrix.c') diff --git a/nerv/lib/matrix/generic/cumatrix.c b/nerv/lib/matrix/generic/cumatrix.c index 7b70607..6342d90 100644 --- a/nerv/lib/matrix/generic/cumatrix.c +++ b/nerv/lib/matrix/generic/cumatrix.c @@ -1,10 +1,11 @@ #ifdef NERV_GENERIC_CUMATRIX #include "matrix.h" #include "elem_type.h" -#define MATRIX_DATA_FREE(ptr, status) cuda_matrix_(free)(ptr, status) -#define MATRIX_DATA_ALLOC(dptr, stride, width, height, status) \ - cuda_matrix_(alloc)(dptr, stride, width, height, status) - +#define MATRIX_DATA_FREE(ptr, context, status) \ + cuda_matrix_(free)(ptr, context, status) +#define MATRIX_DATA_ALLOC(dptr, stride, width, height, context, status) \ + cuda_matrix_(alloc)(dptr, stride, width, height, \ + context, status) #define NERV_GENERIC_MATRIX #define NERV_GENERIC_CUKERNEL #include "../../common.h" @@ -14,12 +15,13 @@ void nerv_matrix_(add)(Matrix *c, const Matrix *a, const Matrix *b, MATRIX_ELEM alpha, MATRIX_ELEM beta, + CuContext *context, Status *status) { CHECK_SAME_DIMENSION(a, b, status); CHECK_SAME_DIMENSION(a, c, status); PROFILE_START CUBLAS_SAFE_SYNC_CALL( - NERV_CUBLAS_(geam)(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N, + NERV_CUBLAS_(geam)(context->cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N, a->ncol, a->nrow, &alpha, MATRIX_ELEM_PTR(a), a->stride / sizeof(MATRIX_ELEM), @@ -33,7 +35,8 @@ void nerv_matrix_(add)(Matrix *c, const Matrix *a, const Matrix *b, void nerv_matrix_(mul)(Matrix *c, const Matrix *a, const Matrix *b, MATRIX_ELEM alpha, MATRIX_ELEM beta, - int ta, int tb, Status *status) { + int ta, int tb, + CuContext *context, Status *status) { #define SWAP(a, b) \ do { int t = (a); (a) = (b); (b) = t; } while (0) @@ -46,7 +49,7 @@ void nerv_matrix_(mul)(Matrix *c, const Matrix *a, const Matrix *b, /* Because matrix in Nerv is row-major, here b comes first */ PROFILE_START CUBLAS_SAFE_SYNC_CALL( - NERV_CUBLAS_(gemm)(cublas_handle, tb, ta, + NERV_CUBLAS_(gemm)(context->cublas_handle, tb, ta, bn, am, bm, &alpha, MATRIX_ELEM_PTR(b), b->stride / sizeof(MATRIX_ELEM), @@ -58,7 +61,8 @@ void nerv_matrix_(mul)(Matrix *c, const Matrix *a, const Matrix *b, NERV_SET_STATUS(status, NERV_NORMAL, 0); } -void nerv_matrix_(sigmoid)(Matrix *a, const Matrix *b, Status *status) { +void nerv_matrix_(sigmoid)(Matrix *a, const Matrix *b, + CuContext *context, Status *status) { CHECK_SAME_DIMENSION(a, b, status); PROFILE_START cudak_(cuda_sigmoid)(b, a); @@ -67,7 +71,8 @@ void nerv_matrix_(sigmoid)(Matrix *a, const Matrix *b, Status *status) { } void nerv_matrix_(sigmoid_grad)(Matrix *nerr, const Matrix *err, - const Matrix *output, Status *status) { + const Matrix *output, + CuContext *context, Status *status) { CHECK_SAME_DIMENSION(nerr, err, status); CHECK_SAME_DIMENSION(nerr, output, status); PROFILE_START @@ -76,14 +81,16 @@ void nerv_matrix_(sigmoid_grad)(Matrix *nerr, const Matrix *err, NERV_SET_STATUS(status, NERV_NORMAL, 0); } -void nerv_matrix_(rand_uniform)(Matrix *a, Status *status) { +void nerv_matrix_(rand_uniform)(Matrix *a, CuContext *context, Status *status) { PROFILE_START - cudak_(cuda_rand_uniform)(a); + cudak_(cuda_rand_uniform)(a, context); PROFILE_STOP NERV_SET_STATUS(status, NERV_NORMAL, 0); } -void nerv_matrix_(thres_mask)(Matrix *a, Matrix *b, double thres, double low, double high, Status *status) { +void nerv_matrix_(thres_mask)(Matrix *a, Matrix *b, double thres, + double low, double high, + CuContext *context, Status *status) { CHECK_SAME_DIMENSION(a, b, status); PROFILE_START cudak_(cuda_thres_mask)(a, b, thres, low, high); @@ -91,7 +98,8 @@ void nerv_matrix_(thres_mask)(Matrix *a, Matrix *b, double thres, double low, do NERV_SET_STATUS(status, NERV_NORMAL, 0); } -void nerv_matrix_(tanh)(Matrix *a, const Matrix *b, Status *status) { +void nerv_matrix_(tanh)(Matrix *a, const Matrix *b, + CuContext *context, Status *status) { CHECK_SAME_DIMENSION(a, b, status); PROFILE_START cudak_(cuda_tanh)(b, a); @@ -99,8 +107,8 @@ void nerv_matrix_(tanh)(Matrix *a, const Matrix *b, Status *status) { NERV_SET_STATUS(status, NERV_NORMAL, 0); } -void nerv_matrix_(tanh_grad)(Matrix *nerr, const Matrix *err, - const Matrix *output, Status *status) { +void nerv_matrix_(tanh_grad)(Matrix *nerr, const Matrix *err, const Matrix *output, + CuContext *context, Status *status) { CHECK_SAME_DIMENSION(nerr, err, status); CHECK_SAME_DIMENSION(nerr, output, status); PROFILE_START @@ -109,24 +117,25 @@ void nerv_matrix_(tanh_grad)(Matrix *nerr, const Matrix *err, NERV_SET_STATUS(status, NERV_NORMAL, 0); } -Matrix *nerv_matrix_(softmax)(Matrix *b, const Matrix *a, Status *status) { +Matrix *nerv_matrix_(softmax)(Matrix *b, const Matrix *a, + CuContext *context, Status *status) { Matrix *max, *max_idx; Matrix *dno; CHECK_SAME_DIMENSION_RET(a, b, status); - max = nerv_matrix_(create)(a->nrow, 1, status); + max = nerv_matrix_(create)(a->nrow, 1, context, status); if (status->err_code != NERV_NORMAL) return NULL; - max_idx = nerv_matrix_(create)(a->nrow, 1, status); + max_idx = nerv_matrix_(create)(a->nrow, 1, context, status); if (status->err_code != NERV_NORMAL) { - nerv_matrix_(destroy)(max, status); + nerv_matrix_(destroy)(max, context, status); return NULL; } - dno = nerv_matrix_(create)(a->nrow, 1, status); + dno = nerv_matrix_(create)(a->nrow, 1, context, status); if (status->err_code != NERV_NORMAL) { /* FIXME: destroy may also fail? */ - nerv_matrix_(destroy)(max, status); - nerv_matrix_(destroy)(max_idx, status); + nerv_matrix_(destroy)(max, context, status); + nerv_matrix_(destroy)(max_idx, context, status); return NULL; } PROFILE_START @@ -134,14 +143,14 @@ Matrix *nerv_matrix_(softmax)(Matrix *b, const Matrix *a, Status *status) { cudak_(cuda_softmax_denominator)(a, max, dno); cudak_(cuda_softmax_final)(a, max, dno, b); PROFILE_STOP - nerv_matrix_(destroy)(max, status); - nerv_matrix_(destroy)(dno, status); + nerv_matrix_(destroy)(max, context, status); + nerv_matrix_(destroy)(dno, context, status); NERV_SET_STATUS(status, NERV_NORMAL, 0); return max_idx; } -Matrix *nerv_matrix_(rowsum)(Matrix *a, Status *status) { - Matrix *b = nerv_matrix_(create)(a->nrow, 1, status); +Matrix *nerv_matrix_(rowsum)(Matrix *a, CuContext *context, Status *status) { + Matrix *b = nerv_matrix_(create)(a->nrow, 1, context, status); if (status->err_code != NERV_NORMAL) return NULL; PROFILE_START @@ -151,8 +160,8 @@ Matrix *nerv_matrix_(rowsum)(Matrix *a, Status *status) { return b; } -Matrix *nerv_matrix_(colsum)(Matrix *a, Status *status) { - Matrix *b = nerv_matrix_(create)(1, a->ncol, status); +Matrix *nerv_matrix_(colsum)(Matrix *a, CuContext *context, Status *status) { + Matrix *b = nerv_matrix_(create)(1, a->ncol, context, status); if (status->err_code != NERV_NORMAL) return NULL; PROFILE_START @@ -163,8 +172,8 @@ Matrix *nerv_matrix_(colsum)(Matrix *a, Status *status) { } Matrix *nerv_matrix_(colsame)(Matrix *a, const Matrix *ref, - Status *status) { - Matrix *b = nerv_matrix_(create)(1, a->ncol, status); + CuContext *context, Status *status) { + Matrix *b = nerv_matrix_(create)(1, a->ncol, context, status); if (status->err_code != NERV_NORMAL) return NULL; CHECK_SAME_DIMENSION_RET(a, ref, status); @@ -175,8 +184,8 @@ Matrix *nerv_matrix_(colsame)(Matrix *a, const Matrix *ref, return b; } -Matrix *nerv_matrix_(rowmax)(Matrix *a, Status *status) { - Matrix *b = nerv_matrix_(create)(a->nrow, 1, status); +Matrix *nerv_matrix_(rowmax)(Matrix *a, CuContext *context, Status *status) { + Matrix *b = nerv_matrix_(create)(a->nrow, 1, context, status); if (status->err_code != NERV_NORMAL) return NULL; PROFILE_START @@ -187,15 +196,15 @@ Matrix *nerv_matrix_(rowmax)(Matrix *a, Status *status) { } void nerv_matrix_(rowmax_idx)(Matrix *a, Matrix **b, Matrix **idx, - Status *status) { - *b = nerv_matrix_(create)(a->nrow, 1, status); + CuContext *context, Status *status) { + *b = nerv_matrix_(create)(a->nrow, 1, context, status); if (status->err_code != NERV_NORMAL) return; - *idx = nerv_matrix_(create)(a->nrow, 1, status); + *idx = nerv_matrix_(create)(a->nrow, 1, context, status); if (status->err_code != NERV_NORMAL) { /* FIXME: destroy may also fail? */ - nerv_matrix_(destroy)(*b, status); + nerv_matrix_(destroy)(*b, context, status); return; } PROFILE_START @@ -205,7 +214,7 @@ void nerv_matrix_(rowmax_idx)(Matrix *a, Matrix **b, Matrix **idx, } void nerv_matrix_(add_row)(Matrix *b, const Matrix *a, double beta, - Status *status) { + CuContext *context, Status *status) { if (a->ncol != b->ncol) NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); if (a->nrow != 1) @@ -216,23 +225,25 @@ void nerv_matrix_(add_row)(Matrix *b, const Matrix *a, double beta, NERV_SET_STATUS(status, NERV_NORMAL, 0); } -void nerv_matrix_(fill)(Matrix *self, double val, Status *status) { +void nerv_matrix_(fill)(Matrix *self, double val, + CuContext *context, Status *status) { PROFILE_START cudak_(cuda_fill)(self, val); PROFILE_STOP NERV_SET_STATUS(status, NERV_NORMAL, 0); } -void nerv_matrix_(clip)(Matrix *self, double val_1, double val_2, Status *status) { +void nerv_matrix_(clip)(Matrix *self, double val1, double val2, + CuContext *context, Status *status) { PROFILE_START - cudak_(cuda_clip)(self, val_1, val_2); + cudak_(cuda_clip)(self, val1, val2); PROFILE_STOP NERV_SET_STATUS(status, NERV_NORMAL, 0); } void nerv_matrix_(copy_fromd)(Matrix *a, const Matrix *b, int a_begin, int b_begin, int b_end, - Status *status) { + CuContext *context, Status *status) { if (!(0 <= b_begin && b_begin < b_end && b_end <= b->nrow && a_begin + b_end - b_begin <= a->nrow)) NERV_EXIT_STATUS(status, MAT_INVALID_COPY_INTERVAL, 0); @@ -251,7 +262,7 @@ void nerv_matrix_(copy_fromd)(Matrix *a, const Matrix *b, void nerv_matrix_(copy_fromh)(Matrix *a, const Matrix *b, int a_begin, int b_begin, int b_end, - Status *status) { + CuContext *context, Status *status) { if (!(0 <= b_begin && b_begin < b_end && b_end <= b->nrow && a_begin + b_end - b_begin <= a->nrow)) NERV_EXIT_STATUS(status, MAT_INVALID_COPY_INTERVAL, 0); @@ -270,7 +281,7 @@ void nerv_matrix_(copy_fromh)(Matrix *a, const Matrix *b, void nerv_matrix_(copy_toh)(Matrix *a, const Matrix *b, int a_begin, int a_end, int b_begin, - Status *status) { + CuContext *context, Status *status) { if (!(0 <= a_begin && a_begin < a_end && a_end <= a->nrow && b_begin + a_end - a_begin <= b->nrow)) NERV_EXIT_STATUS(status, MAT_INVALID_COPY_INTERVAL, 0); @@ -287,15 +298,15 @@ void nerv_matrix_(copy_toh)(Matrix *a, const Matrix *b, NERV_SET_STATUS(status, NERV_NORMAL, 0); } -Matrix *nerv_matrix_(trans)(Matrix *a, Status *status) { +Matrix *nerv_matrix_(trans)(Matrix *a, CuContext *context, Status *status) { MATRIX_ELEM alpha = 1, beta = 0; - Matrix *b = nerv_matrix_(create)(a->ncol, a->nrow, status); + Matrix *b = nerv_matrix_(create)(a->ncol, a->nrow, context, status); if (status->err_code != NERV_NORMAL) return NULL; /* FIXME: possible memory leak when lua error is raised */ PROFILE_START CUBLAS_SAFE_SYNC_CALL_RET( - NERV_CUBLAS_(geam)(cublas_handle, CUBLAS_OP_T, CUBLAS_OP_T, + NERV_CUBLAS_(geam)(context->cublas_handle, CUBLAS_OP_T, CUBLAS_OP_T, a->nrow, a->ncol, &alpha, MATRIX_ELEM_PTR(a), a->stride / sizeof(MATRIX_ELEM), @@ -309,7 +320,7 @@ Matrix *nerv_matrix_(trans)(Matrix *a, Status *status) { } void nerv_matrix_(mul_elem)(Matrix *c, const Matrix *a, const Matrix *b, - Status *status) { + CuContext *context, Status *status) { CHECK_SAME_DIMENSION(a, b, status); CHECK_SAME_DIMENSION(a, c, status); PROFILE_START @@ -318,7 +329,8 @@ void nerv_matrix_(mul_elem)(Matrix *c, const Matrix *a, const Matrix *b, NERV_SET_STATUS(status, NERV_NORMAL, 0); } -void nerv_matrix_(log_elem)(Matrix *b, const Matrix *a, Status *status) { +void nerv_matrix_(log_elem)(Matrix *b, const Matrix *a, + CuContext *context, Status *status) { CHECK_SAME_DIMENSION(a, b, status); PROFILE_START cudak_(cuda_log_elem)(a, b); @@ -326,14 +338,15 @@ void nerv_matrix_(log_elem)(Matrix *b, const Matrix *a, Status *status) { NERV_SET_STATUS(status, NERV_NORMAL, 0); } -Matrix *nerv_matrix_(decompress)(const Matrix *a, int orig_col, Status *status) { +Matrix *nerv_matrix_(decompress)(const Matrix *a, int orig_col, + CuContext *context, Status *status) { Matrix *b; if (a->ncol != 1) { NERV_SET_STATUS(status, MAT_COL_VECTOR_EXP, 0); return NULL; } - b = nerv_matrix_(create)(a->nrow, orig_col, status); + b = nerv_matrix_(create)(a->nrow, orig_col, context, status); if (status->err_code != NERV_NORMAL) return NULL; PROFILE_START @@ -345,7 +358,8 @@ Matrix *nerv_matrix_(decompress)(const Matrix *a, int orig_col, Status *status) } void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b, - const Matrix *idx, int b_begin, Status *status) { + const Matrix *idx, int b_begin, + CuContext *context, Status *status) { long nrow = a->nrow; if (!(0 <= b_begin && b_begin + nrow <= idx->ncol)) NERV_EXIT_STATUS(status, MAT_INVALID_COPY_INTERVAL, 0); @@ -379,7 +393,8 @@ void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b, } void nerv_matrix_(copy_rows_fromd_by_idx)(Matrix *a, const Matrix *b, - const Matrix *idx, int b_begin, Status *status) { + const Matrix *idx, int b_begin, + CuContext *context, Status *status) { long nrow = a->nrow; if (!(0 <= b_begin && b_begin + nrow <= idx->ncol)) NERV_EXIT_STATUS(status, MAT_INVALID_COPY_INTERVAL, 0); @@ -394,7 +409,8 @@ void nerv_matrix_(copy_rows_fromd_by_idx)(Matrix *a, const Matrix *b, } void nerv_matrix_(copy_rows_fromd_by_colidx)(Matrix *a, const Matrix *b, - const Matrix *idx, int b_begin, Status *status) { + const Matrix *idx, int b_begin, + CuContext *context, Status *status) { long nrow = a->nrow; if (!(0 <= b_begin && b_begin + nrow <= idx->nrow)) NERV_EXIT_STATUS(status, MAT_INVALID_COPY_INTERVAL, 0); @@ -412,7 +428,9 @@ void nerv_matrix_(copy_rows_fromd_by_colidx)(Matrix *a, const Matrix *b, #ifdef __NERV_FUTURE_CUDA_7 -void nerv_matrix_(update_select_rows_by_rowidx)(Matrix *c, const Matrix *a, const Matrix *idx, double alpha, double beta, Status *status) { +void nerv_matrix_(update_select_rows_by_rowidx)(Matrix *c, const Matrix *a, + const Matrix *idx, double alpha, double beta, + CuContext *context, Status *status) { long nrow = a->nrow; if (idx->nrow != 1 || idx->ncol != a->nrow) NERV_EXIT_STATUS(status, MAT_IDX_VECTOR_EXP, 0); @@ -424,7 +442,9 @@ void nerv_matrix_(update_select_rows_by_rowidx)(Matrix *c, const Matrix *a, cons NERV_SET_STATUS(status, NERV_NORMAL, 0); } -void nerv_matrix_(update_select_rows_by_colidx)(Matrix *c, const Matrix *a, const Matrix *idx, double alpha, double beta, Status *status) { +void nerv_matrix_(update_select_rows_by_colidx)(Matrix *c, const Matrix *a, + const Matrix *idx, double alpha, double beta, + CuContext *context, Status *status) { long nrow = a->nrow; if (idx->ncol != 1 || idx->nrow != a->nrow) NERV_EXIT_STATUS(status, MAT_IDX_VECTOR_EXP, 0); @@ -438,20 +458,20 @@ void nerv_matrix_(update_select_rows_by_colidx)(Matrix *c, const Matrix *a, cons #endif void nerv_matrix_(expand_frm)(Matrix *a, const Matrix *b, - int context, Status *status) { + int cont, CuContext *context, Status *status) { if (a->nrow != b->nrow) NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); - if (a->ncol != b->ncol * (context * 2 + 1)) + if (a->ncol != b->ncol * (cont * 2 + 1)) NERV_EXIT_STATUS(status, MAT_GENERAL_ERR, "the width should be 2 * context + 1"); PROFILE_START - cudak_(cuda_expand_frm)(b, a, context); + cudak_(cuda_expand_frm)(b, a, cont); PROFILE_STOP NERV_SET_STATUS(status, NERV_NORMAL, 0); } void nerv_matrix_(rearrange_frm)(Matrix *a, const Matrix *b, - int step, Status *status) { + int step, CuContext *context, Status *status) { CHECK_SAME_DIMENSION(a, b, status); if (b->ncol % step) NERV_EXIT_STATUS(status, MAT_GENERAL_ERR, @@ -463,7 +483,7 @@ void nerv_matrix_(rearrange_frm)(Matrix *a, const Matrix *b, } void nerv_matrix_(scale_rows_by_col)(Matrix *a, const Matrix *b, - Status *status) { + CuContext *context, Status *status) { if (a->nrow != b->nrow) NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); if (b->ncol != 1) @@ -475,7 +495,7 @@ void nerv_matrix_(scale_rows_by_col)(Matrix *a, const Matrix *b, } void nerv_matrix_(scale_rows_by_row)(Matrix *a, const Matrix *b, - Status *status) { + CuContext *context, Status *status) { if (a->ncol != b->ncol) NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); if (b->nrow != 1) @@ -486,7 +506,8 @@ void nerv_matrix_(scale_rows_by_row)(Matrix *a, const Matrix *b, NERV_SET_STATUS(status, NERV_NORMAL, 0); } -void nerv_matrix_(prefixsum_row)(Matrix *a, const Matrix *b, Status *status) { +void nerv_matrix_(prefixsum_row)(Matrix *a, const Matrix *b, + CuContext *context, Status *status) { CHECK_SAME_DIMENSION(a, b, status); PROFILE_START cudak_(cuda_prefixsum_row)(b, a); @@ -494,14 +515,14 @@ void nerv_matrix_(prefixsum_row)(Matrix *a, const Matrix *b, Status *status) { NERV_SET_STATUS(status, NERV_NORMAL, 0); } -static void cuda_matrix_(free)(MATRIX_ELEM *ptr, Status *status) { +static void cuda_matrix_(free)(MATRIX_ELEM *ptr, CuContext *context, Status *status) { CUDA_SAFE_SYNC_CALL(cudaFree(ptr), status); NERV_SET_STATUS(status, NERV_NORMAL, 0); } static void cuda_matrix_(alloc)(MATRIX_ELEM **dptr, size_t *stride, long width, long height, - Status *status) { + CuContext *context, Status *status) { PROFILE_START CUDA_SAFE_SYNC_CALL(cudaMallocPitch((void **)dptr, stride, width, height), status); -- cgit v1.2.3