diff options
Diffstat (limited to 'nerv')
28 files changed, 303 insertions, 58 deletions
diff --git a/nerv/init.lua b/nerv/init.lua index 9c1a5c8..406aea6 100644 --- a/nerv/init.lua +++ b/nerv/init.lua @@ -22,6 +22,12 @@ function nerv.mesg_with_timestamp(fmt, ...) os.date("%H:%M:%S %F"), fmt), ...) end +function nerv.info_stderr(fmt, ...) + io.stderr:write(string.format( + string.format("(%s)[nerv] info: %s\n",os.date("%H:%M:%S %F"), fmt), + ...)) +end + function nerv.info(fmt, ...) nerv.printf( string.format("(%s)[nerv] info: %s\n", diff --git a/nerv/io/sgd_buffer.lua b/nerv/io/sgd_buffer.lua index 74c4934..dd5d285 100644 --- a/nerv/io/sgd_buffer.lua +++ b/nerv/io/sgd_buffer.lua @@ -55,7 +55,7 @@ function SGDBuffer:saturate() buff.data:copy_from(buff.leftover, 0, lrow) buff.leftover = nil end - nerv.info("buffer leftover: %d\n", lrow) + nerv.info("buffer leftover: %d", lrow) reader.tail = lrow reader.has_leftover = false end diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index 00cbcfb..56a32f9 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -19,19 +19,19 @@ end function MatrixParam:update(gradient) local gconf = self.gconf - self.correction:add(self.correction, gradient, gconf.momentum, 1.0) + self.correction:add(self.correction, gradient, gconf.momentum, 1.0, nerv.context) -- momentum gain local mmt_gain = 1.0 / (1.0 - gconf.momentum); local n = self.gconf.batch_size * mmt_gain -- perform update - self.trans:add(self.trans, self.correction, 1.0, -gconf.lrate / n) + self.trans:add(self.trans, self.correction, 1.0, -gconf.lrate / n, nerv.context) end function LinearTransParam:update(gradient) MatrixParam.update(self, gradient) local gconf = self.gconf -- weight decay - self.trans:add(self.trans, self.trans, 1.0, -gconf.lrate * gconf.wcost) + self.trans:add(self.trans, self.trans, 1.0, -gconf.lrate * gconf.wcost, nerv.context) end function AffineLayer:__init(id, global_conf, layer_conf) @@ -61,29 +61,50 @@ function AffineLayer:init(batch_size) end function AffineLayer:update(bp_err, input, output) + --print(nerv.context) if self.direct_update then - self.ltp.correction:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N') + self.ltp.correction:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N', nerv.context) -- momentum gain local mmt_gain = 1.0 / (1.0 - gconf.momentum); local n = self.gconf.batch_size * mmt_gain -- perform update - self.ltp.trans:add(self.ltp.trans, self.ltp.correction, 1.0, -gconf.lrate / n) + self.ltp.trans:add(self.ltp.trans, self.ltp.correction, 1.0, -gconf.lrate / n, nerv.context) else - self.ltp_grad:mul(input[1], bp_err[1], 1.0, 0.0, 'T', 'N') + self.ltp_grad:mul(input[1], bp_err[1], 1.0, 0.0, 'T', 'N', nerv.context) self.ltp:update(self.ltp_grad) end self.bp:update(bp_err[1]:colsum()) end +function AffineLayer:gradient(bp_err, input, output) + + self.ltp.correction:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N', nerv.context) + self.bp_grad = bp_err[1]:colsum() + self.bp.correction:add(self.bp.correction, self.bp_grad, gconf.momentum, 1.0, nerv.context) +end + +function AffineLayer:update_gradient() + -- momentum gain + local mmt_gain = 1.0 / (1.0 - gconf.momentum); + local n = self.gconf.batch_size * mmt_gain + -- perform update + self.ltp.trans:add(self.ltp.trans, self.ltp.correction, 1.0, -gconf.lrate / n, nerv.context) + self.bp.trans:add(self.bp.trans, self.bp.correction, 1.0, -gconf.lrate / n, nerv.context) + + self.ltp.trans:add(self.ltp.trans, self.ltp.trans, 1.0, -gconf.lrate * gconf.wcost, nerv.context) + self.bp.trans:add(self.bp.trans, self.bp.trans, 1.0, -gconf.lrate * gconf.wcost, nerv.context) +end + function AffineLayer:propagate(input, output) -- apply linear transform - output[1]:mul(input[1], self.ltp.trans, 1.0, 0.0, 'N', 'N') + --print(nerv.context) + output[1]:mul(input[1], self.ltp.trans, 1.0, 0.0, 'N', 'N', nerv.context) -- add bias output[1]:add_row(self.bp.trans, 1.0) end function AffineLayer:back_propagate(bp_err, next_bp_err, input, output) - next_bp_err[1]:mul(bp_err[1], self.ltp.trans, 1.0, 0.0, 'N', 'T') + next_bp_err[1]:mul(bp_err[1], self.ltp.trans, 1.0, 0.0, 'N', 'T', nerv.context) end function AffineLayer:get_params() diff --git a/nerv/layer/combiner.lua b/nerv/layer/combiner.lua index 7bd7617..23cf1db 100644 --- a/nerv/layer/combiner.lua +++ b/nerv/layer/combiner.lua @@ -36,7 +36,7 @@ end function CombinerLayer:propagate(input, output) output[1]:fill(0) for i = 1, #self.dim_in do - output[1]:add(output[1], input[i], 1.0, self.lambda[i]) + output[1]:add(output[1], input[i], 1.0, self.lambda[i], nerv.context) end for i = 2, #self.dim_out do output[i]:copy_fromd(output[1]) @@ -47,10 +47,10 @@ function CombinerLayer:back_propagate(bp_err, next_bp_err, input, output) local sum = self.sum sum:copy_fromd(bp_err[1]) for i = 2, #self.dim_out do - sum:add(sum, bp_err[i], 1.0, 1.0) + sum:add(sum, bp_err[i], 1.0, 1.0, nerv.context) end for i = 1, #self.dim_in do - next_bp_err[i]:add(next_bp_err[i], sum, 0.0, self.lambda[i]) + next_bp_err[i]:add(next_bp_err[i], sum, 0.0, self.lambda[i], nerv.context) end end diff --git a/nerv/layer/sigmoid.lua b/nerv/layer/sigmoid.lua index dfd09eb..f6f1417 100644 --- a/nerv/layer/sigmoid.lua +++ b/nerv/layer/sigmoid.lua @@ -18,6 +18,14 @@ function SigmoidLayer:update(bp_err, input, output) -- no params, therefore do nothing end +function SigmoidLayer:gradient(bp_err, input, output) + -- no params, therefore do nothing +end + +function SigmoidLayer:update_gradient() + -- no params, therefore do nothing +end + function SigmoidLayer:propagate(input, output) output[1]:sigmoid(input[1]) end diff --git a/nerv/layer/softmax.lua b/nerv/layer/softmax.lua index e979ebf..7e9c6f0 100644 --- a/nerv/layer/softmax.lua +++ b/nerv/layer/softmax.lua @@ -18,6 +18,15 @@ function SoftmaxLayer:update(bp_err, input, output) -- no params, therefore do nothing end +function SoftmaxLayer:gradient(bp_err, input, output) + -- no params, therefore do nothing +end + +function SoftmaxLayer:update_gradient() + -- no params, therefore do nothing +end + + function SoftmaxLayer:propagate(input, output) output[1]:softmax(input[1]) end diff --git a/nerv/layer/softmax_ce.lua b/nerv/layer/softmax_ce.lua index f878a2f..42adbc6 100644 --- a/nerv/layer/softmax_ce.lua +++ b/nerv/layer/softmax_ce.lua @@ -27,6 +27,14 @@ function SoftmaxCELayer:update(bp_err, input, output) -- no params, therefore do nothing end +function SoftmaxCELayer:gradient(bp_err, input, output) + -- no params, therefore do nothing +end + +function SoftmaxCELayer:update_gradient(bp_err, input, output) + -- no params, therefore do nothing +end + function SoftmaxCELayer:propagate(input, output) local softmax = self.softmax local ce = self.ce diff --git a/nerv/lib/common.c b/nerv/lib/common.c index db667b2..1fa1d9f 100644 --- a/nerv/lib/common.c +++ b/nerv/lib/common.c @@ -1,4 +1,5 @@ #include "common.h" +#include "matrix/cuda_helper.h" #include <stdarg.h> int nerv_error(lua_State *L, const char *err_mesg_fmt, ...) { va_list ap; diff --git a/nerv/lib/common.h b/nerv/lib/common.h index 1c588d1..a4e3582 100644 --- a/nerv/lib/common.h +++ b/nerv/lib/common.h @@ -59,6 +59,8 @@ typedef struct Status { nerv_error_status(L, &status); \ } while (0) +#define PROFILE_HASHMAP_SIZE 123457 + typedef struct HashNode { const char *key; void *val; @@ -82,6 +84,8 @@ void hashmap_clear(HashMap *h); size_t bkdr_hash(const char *key); +extern const char *nerv_context_tname; + int nerv_error(lua_State *L, const char *err_mesg_fmt, ...); int nerv_error_status(lua_State *L, Status *status); int nerv_error_method_not_implemented(lua_State *L); diff --git a/nerv/lib/io/chunk_file.c b/nerv/lib/io/chunk_file.c index 4e00b0b..79dbee3 100644 --- a/nerv/lib/io/chunk_file.c +++ b/nerv/lib/io/chunk_file.c @@ -61,7 +61,7 @@ static const char *read_chunk_metadata(FILE *fp, const char *fn, NERV_SET_STATUS(status, (fgets(buff + LUA_RETURN_LEN, LINEBUFF_SIZE, fp) == (buff + LUA_RETURN_LEN) ? \ NERV_NORMAL : CF_INVALID_FORMAT), 0); - fprintf(stderr, "metadata: %s\n", buff); + //fprintf(stderr, "metadata: %s\n", buff); return buff; } @@ -112,7 +112,7 @@ static ChunkFile *open_read(const char *fn, Status *status) { for (i = 0;; offset += chunk_len, i++) { ChunkInfo *cip; - fprintf(stderr, "reading chunk %d from %d\n", i, (int)offset); + //fprintf(stderr, "reading chunk %d from %d\n", i, (int)offset); /* skip to the begining of chunk i */ if (fseeko(fp, offset, SEEK_SET) != 0) { diff --git a/nerv/lib/matrix/cukernel.h b/nerv/lib/matrix/cukernel.h index 2126c6f..31e199b 100644 --- a/nerv/lib/matrix/cukernel.h +++ b/nerv/lib/matrix/cukernel.h @@ -13,7 +13,7 @@ void cudak_(cuda_softmax_final)(const Matrix *a, const Matrix *max, const Matrix void cudak_(cuda_add_row)(const Matrix *a, Matrix *b, double beta); void cudak_(cuda_fill)(Matrix *a, double val); void cudak_(cuda_clip)(Matrix *a, double val_1, double val_2); -void cudak_(cuda_expand_frm)(const Matrix *a, Matrix *b, int context); +void cudak_(cuda_expand_frm)(const Matrix *a, Matrix *b, int context, int a_begin, int a_end); void cudak_(cuda_rearrange_frm)(const Matrix *a, Matrix *b, int step); void cudak_(cuda_scale_rows_by_row)(const Matrix *a, Matrix *b); void cudak_(cuda_scale_rows_by_col)(const Matrix *a, Matrix *b); diff --git a/nerv/lib/matrix/cumatrix.c b/nerv/lib/matrix/cumatrix.c index ff1168d..c913db2 100644 --- a/nerv/lib/matrix/cumatrix.c +++ b/nerv/lib/matrix/cumatrix.c @@ -2,11 +2,13 @@ #include "../common.h" #include "cuda_helper.h" #include <string.h> -#define PROFILE_HASHMAP_SIZE 123457 + static cublasHandle_t cublas_handle; static cudaEvent_t profile_start, profile_stop; static HashMap *profile; +const char *nerv_context_tname = "nerv.CCuContext"; + void nerv_cumatrix_print_profile() { size_t i; fprintf(stderr, "*** [nerv cumatrix profile] **\n"); @@ -35,6 +37,12 @@ void accu_profile(const char *name, float delta) { *val += delta; } +cublasHandle_t* nerv_get_cublas_handle() +{ + return &cublas_handle; +} + + void nerv_cumatrix_init() { cublasCreate(&cublas_handle); cudaEventCreate(&profile_start); diff --git a/nerv/lib/matrix/cumatrix.h b/nerv/lib/matrix/cumatrix.h index e6def66..e53c702 100644 --- a/nerv/lib/matrix/cumatrix.h +++ b/nerv/lib/matrix/cumatrix.h @@ -4,4 +4,18 @@ void nerv_cumatrix_print_profile(); void nerv_cumatrix_clear_profile(); void nerv_cumatrix_init(); + +void nerv_set_cublas_handle(); + +typedef struct CuContext +{ + cublasHandle_t cublas_handle; + cudaEvent_t profile_start, profile_stop; + HashMap *profile; + pthread_t pid; + int refcount; +}CuContext; + +extern const char *nerv_context_tname; + #endif diff --git a/nerv/lib/matrix/generic/cukernel.cu b/nerv/lib/matrix/generic/cukernel.cu index 08feb59..707f8fd 100644 --- a/nerv/lib/matrix/generic/cukernel.cu +++ b/nerv/lib/matrix/generic/cukernel.cu @@ -229,14 +229,15 @@ __global__ void cudak_(expand_frm)(const MATRIX_ELEM *a, MATRIX_ELEM *b, int nrow, int ncol, int enrow, int encol, int stride, int estride, - int context) { + int context, + int a_begin, int a_end) { int j = blockIdx.x * blockDim.x + threadIdx.x; int i = blockIdx.y * blockDim.y + threadIdx.y; int ridx; if (i >= enrow || j >= encol) return; ridx = i + j / ncol - context; - if (ridx < 0) ridx = 0; - else if (ridx >= nrow) ridx = nrow - 1; + if (ridx < a_begin) ridx = a_begin; + else if (ridx >= a_end) ridx = a_end - 1; b[j + i * estride] = a[j % ncol + ridx * stride]; } @@ -541,7 +542,7 @@ extern "C" { cudaStreamSynchronize(0); } - void cudak_(cuda_expand_frm)(const Matrix *a, Matrix *b, int context) { + void cudak_(cuda_expand_frm)(const Matrix *a, Matrix *b, int context, int a_begin, int a_end) { dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), CEIL_DIV(b->nrow, threadsPerBlock.y)); @@ -551,7 +552,8 @@ extern "C" { b->nrow, b->ncol, a->stride / sizeof(MATRIX_ELEM), b->stride / sizeof(MATRIX_ELEM), - context); + context, + a_begin, a_end); cudaStreamSynchronize(0); } diff --git a/nerv/lib/matrix/generic/cumatrix.c b/nerv/lib/matrix/generic/cumatrix.c index 770e503..5b11496 100644 --- a/nerv/lib/matrix/generic/cumatrix.c +++ b/nerv/lib/matrix/generic/cumatrix.c @@ -13,12 +13,13 @@ void nerv_matrix_(add)(Matrix *c, const Matrix *a, const Matrix *b, MATRIX_ELEM alpha, MATRIX_ELEM beta, - Status *status) { + cublasHandle_t *handle, Status *status) { CHECK_SAME_DIMENSION(a, b, status); CHECK_SAME_DIMENSION(a, c, status); - PROFILE_START + cublasHandle_t *cuhandle = (handle == NULL ? &cublas_handle : handle); + PROFILE_START //cublas_handle CUBLAS_SAFE_SYNC_CALL( - NERV_CUBLAS_(geam)(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N, + NERV_CUBLAS_(geam)(*cuhandle, CUBLAS_OP_N, CUBLAS_OP_N, a->ncol, a->nrow, &alpha, MATRIX_ELEM_PTR(a), a->stride / sizeof(MATRIX_ELEM), @@ -32,7 +33,7 @@ void nerv_matrix_(add)(Matrix *c, const Matrix *a, const Matrix *b, void nerv_matrix_(mul)(Matrix *c, const Matrix *a, const Matrix *b, MATRIX_ELEM alpha, MATRIX_ELEM beta, - int ta, int tb, Status *status) { + int ta, int tb, cublasHandle_t *handle, Status *status) { #define SWAP(a, b) \ do { int t = (a); (a) = (b); (b) = t; } while (0) @@ -42,10 +43,11 @@ void nerv_matrix_(mul)(Matrix *c, const Matrix *a, const Matrix *b, if (tb == CUBLAS_OP_T) SWAP(bm, bn); if (an != bm) NERV_EXIT_STATUS(status, MAT_WRONG_MULT_DIM, 0); + cublasHandle_t *cuhandle = (handle == NULL ? &cublas_handle : handle); /* Because matrix in Nerv is row-major, here b comes first */ - PROFILE_START + PROFILE_START //cublas_handle CUBLAS_SAFE_SYNC_CALL( - NERV_CUBLAS_(gemm)(cublas_handle, tb, ta, + NERV_CUBLAS_(gemm)(*cuhandle, tb, ta, bn, am, bm, &alpha, MATRIX_ELEM_PTR(b), b->stride / sizeof(MATRIX_ELEM), @@ -253,15 +255,16 @@ void nerv_matrix_(copy_toh)(Matrix *a, const Matrix *b, NERV_SET_STATUS(status, NERV_NORMAL, 0); } -Matrix *nerv_matrix_(trans)(Matrix *a, Status *status) { +Matrix *nerv_matrix_(trans)(Matrix *a, cublasHandle_t *handle, Status *status) { MATRIX_ELEM alpha = 1, beta = 0; Matrix *b = nerv_matrix_(create)(a->ncol, a->nrow, status); if (status->err_code != NERV_NORMAL) return NULL; + cublasHandle_t *cuhandle = (handle == NULL ? &cublas_handle : handle); /* FIXME: possible memory leak when lua error is raised */ - PROFILE_START + PROFILE_START //cublas_handle CUBLAS_SAFE_SYNC_CALL_RET( - NERV_CUBLAS_(geam)(cublas_handle, CUBLAS_OP_T, CUBLAS_OP_T, + NERV_CUBLAS_(geam)(*cuhandle, CUBLAS_OP_T, CUBLAS_OP_T, a->nrow, a->ncol, &alpha, MATRIX_ELEM_PTR(a), a->stride / sizeof(MATRIX_ELEM), @@ -360,14 +363,16 @@ void nerv_matrix_(copy_rows_fromd_by_idx)(Matrix *a, const Matrix *b, } void nerv_matrix_(expand_frm)(Matrix *a, const Matrix *b, - int context, Status *status) { - if (a->nrow != b->nrow) - NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); + int context, int b_begin, int b_end, + Status *status) { + if (!(0 <= b_begin && b_begin < b_end && b_end <= b->nrow && + b_end - b_begin == a->nrow)) + NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); if (a->ncol != b->ncol * (context * 2 + 1)) NERV_EXIT_STATUS(status, MAT_GENERAL_ERR, "the width should be 2 * context + 1"); PROFILE_START - cudak_(cuda_expand_frm)(b, a, context); + cudak_(cuda_expand_frm)(b, a, context, b_begin, b_end); PROFILE_STOP NERV_SET_STATUS(status, NERV_NORMAL, 0); } diff --git a/nerv/lib/matrix/generic/cumatrix.h b/nerv/lib/matrix/generic/cumatrix.h index 04e8c5a..f476414 100644 --- a/nerv/lib/matrix/generic/cumatrix.h +++ b/nerv/lib/matrix/generic/cumatrix.h @@ -1,11 +1,11 @@ #include "../../common.h" void nerv_matrix_(add)(Matrix *c, const Matrix *a, const Matrix *b, - MATRIX_ELEM alpha, MATRIX_ELEM beta, + MATRIX_ELEM alpha, MATRIX_ELEM beta, cublasHandle_t *handle, Status *status); void nerv_matrix_(mul)(Matrix *c, const Matrix *a, const Matrix *b, MATRIX_ELEM alpha, MATRIX_ELEM beta, - int ta, int tb, Status *status); + int ta, int tb, cublasHandle_t *handle, Status *status); void nerv_matrix_(sigmoid)(Matrix *a, const Matrix *b, Status *status); void nerv_matrix_(sigmoid_grad)(Matrix *nerr, const Matrix *err, const Matrix *output, Status *status); @@ -31,7 +31,7 @@ void nerv_matrix_(copy_fromh)(Matrix *a, const Matrix *b, void nerv_matrix_(copy_toh)(Matrix *a, const Matrix *b, int a_begin, int a_end, int b_begin, Status *status); -Matrix *nerv_matrix_(trans)(Matrix *a, Status *status); +Matrix *nerv_matrix_(trans)(Matrix *a, cublasHandle_t *handle, Status *status); void nerv_matrix_(mul_elem)(Matrix *c, const Matrix *a, const Matrix *b, Status *status); @@ -44,7 +44,7 @@ void nerv_matrix_(copy_rows_fromd_by_idx)(Matrix *a, const Matrix *b, const Matrix *idx, int b_begin, Status *status); void nerv_matrix_(expand_frm)(Matrix *a, const Matrix *b, - int context, Status *status); + int context, int b_begin, int b_end, Status *status); void nerv_matrix_(rearrange_frm)(Matrix *a, const Matrix *b, int step, Status *status); void nerv_matrix_(scale_rows_by_col)(Matrix *a, const Matrix *b, diff --git a/nerv/lib/matrix/generic/matrix.c b/nerv/lib/matrix/generic/matrix.c index 6cb3dc0..cac1ee7 100644 --- a/nerv/lib/matrix/generic/matrix.c +++ b/nerv/lib/matrix/generic/matrix.c @@ -5,21 +5,29 @@ void nerv_matrix_(data_free)(Matrix *self, Status *status) { assert(*self->data_ref > 0); - if (--(*self->data_ref) == 0) + //if (--(*self->data_ref) == 0) + if (NULL != self && __sync_fetch_and_add(&self->refcount, -1) == 1) { - /* free matrix data */ - MATRIX_DATA_FREE(MATRIX_ELEM_PTR(self), status); - free(self->data_ref); - free(self); - } - else { - free(self); - NERV_SET_STATUS(status, NERV_NORMAL, 0); + if(__sync_fetch_and_add(self->data_ref, -1) == 1) + { + /* free matrix data */ + MATRIX_DATA_FREE(MATRIX_ELEM_PTR(self), status); + free(self->data_ref); + free(self); + self = NULL; + } + else + { + free(self); + self = NULL; + NERV_SET_STATUS(status, NERV_NORMAL, 0); + } } } void nerv_matrix_(data_retain)(Matrix *self) { - (*self->data_ref)++; + __sync_fetch_and_add(self->data_ref, 1); + //(*self->data_ref)++; } Matrix *nerv_matrix_(create)(long nrow, long ncol, Status *status) { @@ -36,6 +44,7 @@ Matrix *nerv_matrix_(create)(long nrow, long ncol, Status *status) { free(self); return NULL; } + self->refcount = 1; self->data_ref = (long *)malloc(sizeof(long)); *self->data_ref = 0; nerv_matrix_(data_retain)(self); @@ -56,6 +65,7 @@ Matrix *nerv_matrix_(getrow)(Matrix *self, int row) { prow->nmax = prow->ncol; MATRIX_ELEM_PTR(prow) = MATRIX_ROW_PTR(self, row); prow->data_ref = self->data_ref; + prow->refcount = 1; nerv_matrix_(data_retain)(prow); return prow; } diff --git a/nerv/lib/matrix/generic/mmatrix.c b/nerv/lib/matrix/generic/mmatrix.c index 225079e..0850c6e 100644 --- a/nerv/lib/matrix/generic/mmatrix.c +++ b/nerv/lib/matrix/generic/mmatrix.c @@ -7,6 +7,7 @@ #define NERV_GENERIC_MATRIX #include "../../common.h" #include "../../io/chunk_file.h" +#include "../cuda_helper.h" #include "string.h" static void host_matrix_(free)(MATRIX_ELEM *ptr, Status *status) { @@ -79,4 +80,47 @@ void nerv_matrix_(copy_from)(Matrix *a, const Matrix *b, NERV_SET_STATUS(status, NERV_NORMAL, 0); } +void nerv_matrix_(expand_frm)(Matrix *a, const Matrix *b, + int context, int b_begin, int b_end, Status *status) { + if (!(0 <= b_begin && b_begin < b_end && b_end <= b->nrow && + b_end - b_begin == a->nrow)) + NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); + if (a->ncol != b->ncol * (context * 2 + 1)) + NERV_EXIT_STATUS(status, MAT_GENERAL_ERR, + "the width should be 2 * context + 1"); + int i, j, r_off; + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < context * 2 + 1; j++) + { + r_off = b_begin + i + j - context; + if (r_off < b_begin) r_off = b_begin; + if (r_off >= b_end) r_off = b_end - 1; + memcpy(MATRIX_ROW_PTR(a, i) + j*b->ncol, MATRIX_ROW_PTR(b, r_off), sizeof(MATRIX_ELEM) * b->ncol); + } + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(rearrange_frm)(Matrix *a, const Matrix *b, + int step, int b_begin, int b_end, Status *status) { + //CHECK_SAME_DIMENSION(a, b, status); + if (!(0 <= b_begin && b_begin < b_end && b_end <= b->nrow && + b_end - b_begin == a->nrow)) + NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); + if (b->ncol % step) + NERV_EXIT_STATUS(status, MAT_GENERAL_ERR, + "the dimension of columns is not divisible by step"); + + int i, j; + int stride = a->stride / sizeof(MATRIX_ELEM); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + MATRIX_ELEM_PTR(a)[j + i * stride] = MATRIX_ELEM_PTR(b)[j / step + (j % step) * (b->ncol/step) + (i+b_begin) * stride]; + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + + #endif diff --git a/nerv/lib/matrix/generic/mmatrix.h b/nerv/lib/matrix/generic/mmatrix.h index f00a04d..eb6c4c7 100644 --- a/nerv/lib/matrix/generic/mmatrix.h +++ b/nerv/lib/matrix/generic/mmatrix.h @@ -6,3 +6,9 @@ void nerv_matrix_(save)(Matrix *self, ChunkFile *cfp, Status *status); void nerv_matrix_(copy_from)(Matrix *a, const Matrix *b, int a_begin, int b_begin, int b_end, Status *status); + +void nerv_matrix_(expand_frm)(Matrix *a, const Matrix *b, + int context, int b_begin, int b_end, Status *status); + +void nerv_matrix_(rearrange_frm)(Matrix *a, const Matrix *b, + int step, int b_begin, int b_end, Status *status); diff --git a/nerv/lib/matrix/matrix.h b/nerv/lib/matrix/matrix.h index 67a6e30..51ca736 100644 --- a/nerv/lib/matrix/matrix.h +++ b/nerv/lib/matrix/matrix.h @@ -13,6 +13,7 @@ typedef struct Matrix { long *i; } data; /* pointer to actual storage */ long *data_ref; + int refcount; /* prevent matrix struct double free */ } Matrix; #define MATRIX_ROW_PTR(self, row) \ diff --git a/nerv/lib/matrix/mmatrix.c b/nerv/lib/matrix/mmatrix.c index b8157eb..e54b336 100644 --- a/nerv/lib/matrix/mmatrix.c +++ b/nerv/lib/matrix/mmatrix.c @@ -52,3 +52,21 @@ Matrix *nerv_matrix_(perm_gen)(int ncol, Status *status) { #define host_matrix_(NAME) host_matrix_int_##NAME #define nerv_matrix_(NAME) nerv_matrix_host_int_##NAME #include "generic/mmatrix.c" + +Matrix *nerv_matrix_(perm_gen)(int ncol, Status *status) { + int i; + Matrix *self = nerv_matrix_(create)(1, ncol, status); + if (status->err_code != NERV_NORMAL) + return NULL; + long *prow = self->data.i; + for (i = 0; i < ncol; i++) + prow[i] = i; + for (i = ncol - 1; i >= 0; i--) + { + size_t j = rand() % (i + 1); + long tmp = prow[i]; + prow[i] = prow[j]; + prow[j] = tmp; + } + return self; +} diff --git a/nerv/matrix/cumatrix.c b/nerv/matrix/cumatrix.c index fef03fc..08b7efa 100644 --- a/nerv/matrix/cumatrix.c +++ b/nerv/matrix/cumatrix.c @@ -1,4 +1,5 @@ #define NERV_GENERIC_CUMATRIX +#include "../lib/matrix/cuda_helper.h" #include "../lib/common.h" #include "../lib/matrix/cumatrix.h" #include "../lib/matrix/cuda_helper.h" diff --git a/nerv/matrix/generic/cumatrix.c b/nerv/matrix/generic/cumatrix.c index 08cb4c2..ce7e68a 100644 --- a/nerv/matrix/generic/cumatrix.c +++ b/nerv/matrix/generic/cumatrix.c @@ -6,6 +6,7 @@ #define MATRIX_BASE_TNAME nerv_matrix_cuda_tname #define NERV_GENERIC_MATRIX #define NERV_GENERIC_CUKERNEL +#include "../../lib/matrix/cuda_helper.h" #include "../../lib/common.h" #include "../../lib/matrix/generic/matrix.h" #include "../../lib/matrix/generic/cumatrix.h" @@ -17,7 +18,9 @@ static int nerv_matrix_(lua_add)(lua_State *L) { const Matrix *b = lu |