diff options
-rw-r--r-- | nerv/lib/matrix/cukernel.h | 2 | ||||
-rw-r--r-- | nerv/lib/matrix/generic/cukernel.cu | 45 | ||||
-rw-r--r-- | nerv/lib/matrix/generic/cumatrix.c | 18 | ||||
-rw-r--r-- | nerv/lib/matrix/generic/cumatrix.h | 3 | ||||
-rw-r--r-- | nerv/matrix/generic/cumatrix.c | 21 |
5 files changed, 89 insertions, 0 deletions
diff --git a/nerv/lib/matrix/cukernel.h b/nerv/lib/matrix/cukernel.h index fffe0bc..40f8e9f 100644 --- a/nerv/lib/matrix/cukernel.h +++ b/nerv/lib/matrix/cukernel.h @@ -3,6 +3,8 @@ void cudak_(cuda_mul_elem)(const Matrix *a, const Matrix *b, Matrix *c); void cudak_(cuda_log_elem)(const Matrix *a, Matrix *b); void cudak_(cuda_sigmoid)(const Matrix *a, Matrix *b); void cudak_(cuda_sigmoid_grad)(const Matrix *output, const Matrix *err, Matrix *nerr); +void cudak_(cuda_tanh)(const Matrix *a, Matrix *b); +void cudak_(cuda_tanh_grad)(const Matrix *output, const Matrix *err, Matrix *nerr); void cudak_(cuda_rowsum)(const Matrix *a, Matrix *b); void cudak_(cuda_rowmax)(const Matrix *a, Matrix *b); void cudak_(cuda_rowmax_idx)(const Matrix *a, Matrix *b, Matrix *idx); diff --git a/nerv/lib/matrix/generic/cukernel.cu b/nerv/lib/matrix/generic/cukernel.cu index e58c488..c82041f 100644 --- a/nerv/lib/matrix/generic/cukernel.cu +++ b/nerv/lib/matrix/generic/cukernel.cu @@ -53,6 +53,28 @@ __global__ void cudak_(sigmoid_grad)(const MATRIX_ELEM *output, nerr[idx] = output[idx] * (1.0 - output[idx]) * err[idx]; } +__global__ void cudak_(tanh)(const MATRIX_ELEM *a, MATRIX_ELEM *b, + int nrow, int ncol, int stride) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + long idx; + if (i >= nrow || j >= ncol) return; + idx = j + i * stride; + b[idx] = (exp(a[idx]) - exp(-a[idx])) / (exp(a[idx]) + exp(-a[idx])); +} + +__global__ void cudak_(tanh_grad)(const MATRIX_ELEM *output, + const MATRIX_ELEM *err, + MATRIX_ELEM *nerr, + int nrow, int ncol, int stride) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + long idx; + if (i >= nrow || j >= ncol) return; + idx = j + i * stride; + nerr[idx] = (1.0 - output[idx] * output[idx]) * err[idx]; +} + __global__ void cudak_(softmax_final)(const MATRIX_ELEM *a, MATRIX_ELEM *b, const MATRIX_ELEM *max, const MATRIX_ELEM *deno, int nrow, int ncol, int stride, int mstride) { @@ -353,6 +375,29 @@ extern "C" { cudaStreamSynchronize(0); } + void cudak_(cuda_tanh)(const Matrix *a, Matrix *b) { + dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), + CEIL_DIV(b->nrow, threadsPerBlock.y)); + cudak_(tanh)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), b->nrow, b->ncol, + b->stride / sizeof(MATRIX_ELEM)); + cudaStreamSynchronize(0); + } + + void cudak_(cuda_tanh_grad)(const Matrix *output, + const Matrix *err, Matrix *nerr) { + dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(nerr->ncol, threadsPerBlock.x), + CEIL_DIV(nerr->nrow, threadsPerBlock.y)); + cudak_(tanh_grad)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(output), MATRIX_ELEM_PTR(err), + MATRIX_ELEM_PTR(nerr), + nerr->nrow, nerr->ncol, + nerr->stride / sizeof(MATRIX_ELEM)); + cudaStreamSynchronize(0); + } + void cudak_(cuda_rowsum)(const Matrix *a, Matrix *b) { dim3 block(CUDA_THREADS_NN, 1); int ncol = a->ncol; diff --git a/nerv/lib/matrix/generic/cumatrix.c b/nerv/lib/matrix/generic/cumatrix.c index 00af895..77cb304 100644 --- a/nerv/lib/matrix/generic/cumatrix.c +++ b/nerv/lib/matrix/generic/cumatrix.c @@ -75,6 +75,24 @@ void nerv_matrix_(sigmoid_grad)(Matrix *nerr, const Matrix *err, NERV_SET_STATUS(status, NERV_NORMAL, 0); } +void nerv_matrix_(tanh)(Matrix *a, const Matrix *b, Status *status) { + CHECK_SAME_DIMENSION(a, b, status); + PROFILE_START + cudak_(cuda_tanh)(b, a); + PROFILE_STOP + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(tanh_grad)(Matrix *nerr, const Matrix *err, + const Matrix *output, Status *status) { + CHECK_SAME_DIMENSION(nerr, err, status); + CHECK_SAME_DIMENSION(nerr, output, status); + PROFILE_START + cudak_(cuda_tanh_grad)(output, err, nerr); + PROFILE_STOP + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + Matrix *nerv_matrix_(softmax)(Matrix *b, const Matrix *a, Status *status) { Matrix *max, *max_idx; Matrix *dno; diff --git a/nerv/lib/matrix/generic/cumatrix.h b/nerv/lib/matrix/generic/cumatrix.h index 21c29b7..aa8805a 100644 --- a/nerv/lib/matrix/generic/cumatrix.h +++ b/nerv/lib/matrix/generic/cumatrix.h @@ -9,6 +9,9 @@ void nerv_matrix_(mul)(Matrix *c, const Matrix *a, const Matrix *b, void nerv_matrix_(sigmoid)(Matrix *a, const Matrix *b, Status *status); void nerv_matrix_(sigmoid_grad)(Matrix *nerr, const Matrix *err, const Matrix *output, Status *status); +void nerv_matrix_(tanh)(Matrix *a, const Matrix *b, Status *status); +void nerv_matrix_(tanh_grad)(Matrix *nerr, const Matrix *err, + const Matrix *output, Status *status); Matrix *nerv_matrix_(softmax)(Matrix *b, const Matrix *a, Status *status); Matrix *nerv_matrix_(rowsum)(Matrix *a, Status *status); diff --git a/nerv/matrix/generic/cumatrix.c b/nerv/matrix/generic/cumatrix.c index e1519b0..3d9e694 100644 --- a/nerv/matrix/generic/cumatrix.c +++ b/nerv/matrix/generic/cumatrix.c @@ -62,6 +62,25 @@ static int nerv_matrix_(lua_sigmoid_grad)(lua_State *L) { return 0; } +static int nerv_matrix_(lua_tanh)(lua_State *L) { + Status status; + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname)); + nerv_matrix_(tanh)(a, b, &status); + NERV_LUA_CHECK_STATUS(L, status); + return 0; +} + +static int nerv_matrix_(lua_tanh_grad)(lua_State *L) { + Status status; + Matrix *nerr = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *err = luaT_checkudata(L, 2, nerv_matrix_(tname)); + Matrix *output = luaT_checkudata(L, 3, nerv_matrix_(tname)); + nerv_matrix_(tanh_grad)(nerr, err, output, &status); + NERV_LUA_CHECK_STATUS(L, status); + return 0; +} + static int nerv_matrix_(lua_softmax)(lua_State *L) { Status status; Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname)); @@ -328,6 +347,8 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = { {"fill", nerv_matrix_(lua_fill)}, {"sigmoid", nerv_matrix_(lua_sigmoid)}, {"sigmoid_grad", nerv_matrix_(lua_sigmoid_grad)}, + {"tanh", nerv_matrix_(lua_tanh)}, + {"tanh_grad", nerv_matrix_(lua_tanh_grad)}, {"softmax", nerv_matrix_(lua_softmax)}, {"mul_elem", nerv_matrix_(lua_mul_elem)}, {"log_elem", nerv_matrix_(lua_log_elem)}, |