aboutsummaryrefslogtreecommitdiff
path: root/nerv
diff options
context:
space:
mode:
Diffstat (limited to 'nerv')
-rw-r--r--nerv/init.lua6
-rw-r--r--nerv/io/sgd_buffer.lua2
-rw-r--r--nerv/layer/affine.lua37
-rw-r--r--nerv/layer/combiner.lua6
-rw-r--r--nerv/layer/sigmoid.lua8
-rw-r--r--nerv/layer/softmax.lua9
-rw-r--r--nerv/layer/softmax_ce.lua8
-rw-r--r--nerv/lib/common.c1
-rw-r--r--nerv/lib/common.h4
-rw-r--r--nerv/lib/io/chunk_file.c4
-rw-r--r--nerv/lib/matrix/cukernel.h2
-rw-r--r--nerv/lib/matrix/cumatrix.c10
-rw-r--r--nerv/lib/matrix/cumatrix.h14
-rw-r--r--nerv/lib/matrix/generic/cukernel.cu12
-rw-r--r--nerv/lib/matrix/generic/cumatrix.c31
-rw-r--r--nerv/lib/matrix/generic/cumatrix.h8
-rw-r--r--nerv/lib/matrix/generic/matrix.c30
-rw-r--r--nerv/lib/matrix/generic/mmatrix.c44
-rw-r--r--nerv/lib/matrix/generic/mmatrix.h6
-rw-r--r--nerv/lib/matrix/matrix.h1
-rw-r--r--nerv/lib/matrix/mmatrix.c18
-rw-r--r--nerv/matrix/cumatrix.c1
-rw-r--r--nerv/matrix/generic/cumatrix.c27
-rw-r--r--nerv/matrix/generic/matrix.c7
-rw-r--r--nerv/matrix/generic/mmatrix.c34
-rw-r--r--nerv/matrix/init.lua6
-rw-r--r--nerv/nn/layer_dag.lua23
-rw-r--r--nerv/nn/layer_repo.lua2
28 files changed, 303 insertions, 58 deletions
diff --git a/nerv/init.lua b/nerv/init.lua
index 9c1a5c8..406aea6 100644
--- a/nerv/init.lua
+++ b/nerv/init.lua
@@ -22,6 +22,12 @@ function nerv.mesg_with_timestamp(fmt, ...)
os.date("%H:%M:%S %F"), fmt), ...)
end
+function nerv.info_stderr(fmt, ...)
+ io.stderr:write(string.format(
+ string.format("(%s)[nerv] info: %s\n",os.date("%H:%M:%S %F"), fmt),
+ ...))
+end
+
function nerv.info(fmt, ...)
nerv.printf(
string.format("(%s)[nerv] info: %s\n",
diff --git a/nerv/io/sgd_buffer.lua b/nerv/io/sgd_buffer.lua
index 74c4934..dd5d285 100644
--- a/nerv/io/sgd_buffer.lua
+++ b/nerv/io/sgd_buffer.lua
@@ -55,7 +55,7 @@ function SGDBuffer:saturate()
buff.data:copy_from(buff.leftover, 0, lrow)
buff.leftover = nil
end
- nerv.info("buffer leftover: %d\n", lrow)
+ nerv.info("buffer leftover: %d", lrow)
reader.tail = lrow
reader.has_leftover = false
end
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua
index 00cbcfb..56a32f9 100644
--- a/nerv/layer/affine.lua
+++ b/nerv/layer/affine.lua
@@ -19,19 +19,19 @@ end
function MatrixParam:update(gradient)
local gconf = self.gconf
- self.correction:add(self.correction, gradient, gconf.momentum, 1.0)
+ self.correction:add(self.correction, gradient, gconf.momentum, 1.0, nerv.context)
-- momentum gain
local mmt_gain = 1.0 / (1.0 - gconf.momentum);
local n = self.gconf.batch_size * mmt_gain
-- perform update
- self.trans:add(self.trans, self.correction, 1.0, -gconf.lrate / n)
+ self.trans:add(self.trans, self.correction, 1.0, -gconf.lrate / n, nerv.context)
end
function LinearTransParam:update(gradient)
MatrixParam.update(self, gradient)
local gconf = self.gconf
-- weight decay
- self.trans:add(self.trans, self.trans, 1.0, -gconf.lrate * gconf.wcost)
+ self.trans:add(self.trans, self.trans, 1.0, -gconf.lrate * gconf.wcost, nerv.context)
end
function AffineLayer:__init(id, global_conf, layer_conf)
@@ -61,29 +61,50 @@ function AffineLayer:init(batch_size)
end
function AffineLayer:update(bp_err, input, output)
+ --print(nerv.context)
if self.direct_update then
- self.ltp.correction:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N')
+ self.ltp.correction:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N', nerv.context)
-- momentum gain
local mmt_gain = 1.0 / (1.0 - gconf.momentum);
local n = self.gconf.batch_size * mmt_gain
-- perform update
- self.ltp.trans:add(self.ltp.trans, self.ltp.correction, 1.0, -gconf.lrate / n)
+ self.ltp.trans:add(self.ltp.trans, self.ltp.correction, 1.0, -gconf.lrate / n, nerv.context)
else
- self.ltp_grad:mul(input[1], bp_err[1], 1.0, 0.0, 'T', 'N')
+ self.ltp_grad:mul(input[1], bp_err[1], 1.0, 0.0, 'T', 'N', nerv.context)
self.ltp:update(self.ltp_grad)
end
self.bp:update(bp_err[1]:colsum())
end
+function AffineLayer:gradient(bp_err, input, output)
+
+ self.ltp.correction:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N', nerv.context)
+ self.bp_grad = bp_err[1]:colsum()
+ self.bp.correction:add(self.bp.correction, self.bp_grad, gconf.momentum, 1.0, nerv.context)
+end
+
+function AffineLayer:update_gradient()
+ -- momentum gain
+ local mmt_gain = 1.0 / (1.0 - gconf.momentum);
+ local n = self.gconf.batch_size * mmt_gain
+ -- perform update
+ self.ltp.trans:add(self.ltp.trans, self.ltp.correction, 1.0, -gconf.lrate / n, nerv.context)
+ self.bp.trans:add(self.bp.trans, self.bp.correction, 1.0, -gconf.lrate / n, nerv.context)
+
+ self.ltp.trans:add(self.ltp.trans, self.ltp.trans, 1.0, -gconf.lrate * gconf.wcost, nerv.context)
+ self.bp.trans:add(self.bp.trans, self.bp.trans, 1.0, -gconf.lrate * gconf.wcost, nerv.context)
+end
+
function AffineLayer:propagate(input, output)
-- apply linear transform
- output[1]:mul(input[1], self.ltp.trans, 1.0, 0.0, 'N', 'N')
+ --print(nerv.context)
+ output[1]:mul(input[1], self.ltp.trans, 1.0, 0.0, 'N', 'N', nerv.context)
-- add bias
output[1]:add_row(self.bp.trans, 1.0)
end
function AffineLayer:back_propagate(bp_err, next_bp_err, input, output)
- next_bp_err[1]:mul(bp_err[1], self.ltp.trans, 1.0, 0.0, 'N', 'T')
+ next_bp_err[1]:mul(bp_err[1], self.ltp.trans, 1.0, 0.0, 'N', 'T', nerv.context)
end
function AffineLayer:get_params()
diff --git a/nerv/layer/combiner.lua b/nerv/layer/combiner.lua
index 7bd7617..23cf1db 100644
--- a/nerv/layer/combiner.lua
+++ b/nerv/layer/combiner.lua
@@ -36,7 +36,7 @@ end
function CombinerLayer:propagate(input, output)
output[1]:fill(0)
for i = 1, #self.dim_in do
- output[1]:add(output[1], input[i], 1.0, self.lambda[i])
+ output[1]:add(output[1], input[i], 1.0, self.lambda[i], nerv.context)
end
for i = 2, #self.dim_out do
output[i]:copy_fromd(output[1])
@@ -47,10 +47,10 @@ function CombinerLayer:back_propagate(bp_err, next_bp_err, input, output)
local sum = self.sum
sum:copy_fromd(bp_err[1])
for i = 2, #self.dim_out do
- sum:add(sum, bp_err[i], 1.0, 1.0)
+ sum:add(sum, bp_err[i], 1.0, 1.0, nerv.context)
end
for i = 1, #self.dim_in do
- next_bp_err[i]:add(next_bp_err[i], sum, 0.0, self.lambda[i])
+ next_bp_err[i]:add(next_bp_err[i], sum, 0.0, self.lambda[i], nerv.context)
end
end
diff --git a/nerv/layer/sigmoid.lua b/nerv/layer/sigmoid.lua
index dfd09eb..f6f1417 100644
--- a/nerv/layer/sigmoid.lua
+++ b/nerv/layer/sigmoid.lua
@@ -18,6 +18,14 @@ function SigmoidLayer:update(bp_err, input, output)
-- no params, therefore do nothing
end
+function SigmoidLayer:gradient(bp_err, input, output)
+ -- no params, therefore do nothing
+end
+
+function SigmoidLayer:update_gradient()
+ -- no params, therefore do nothing
+end
+
function SigmoidLayer:propagate(input, output)
output[1]:sigmoid(input[1])
end
diff --git a/nerv/layer/softmax.lua b/nerv/layer/softmax.lua
index e979ebf..7e9c6f0 100644
--- a/nerv/layer/softmax.lua
+++ b/nerv/layer/softmax.lua
@@ -18,6 +18,15 @@ function SoftmaxLayer:update(bp_err, input, output)
-- no params, therefore do nothing
end
+function SoftmaxLayer:gradient(bp_err, input, output)
+ -- no params, therefore do nothing
+end
+
+function SoftmaxLayer:update_gradient()
+ -- no params, therefore do nothing
+end
+
+
function SoftmaxLayer:propagate(input, output)
output[1]:softmax(input[1])
end
diff --git a/nerv/layer/softmax_ce.lua b/nerv/layer/softmax_ce.lua
index f878a2f..42adbc6 100644
--- a/nerv/layer/softmax_ce.lua
+++ b/nerv/layer/softmax_ce.lua
@@ -27,6 +27,14 @@ function SoftmaxCELayer:update(bp_err, input, output)
-- no params, therefore do nothing
end
+function SoftmaxCELayer:gradient(bp_err, input, output)
+ -- no params, therefore do nothing
+end
+
+function SoftmaxCELayer:update_gradient(bp_err, input, output)
+ -- no params, therefore do nothing
+end
+
function SoftmaxCELayer:propagate(input, output)
local softmax = self.softmax
local ce = self.ce
diff --git a/nerv/lib/common.c b/nerv/lib/common.c
index db667b2..1fa1d9f 100644
--- a/nerv/lib/common.c
+++ b/nerv/lib/common.c
@@ -1,4 +1,5 @@
#include "common.h"
+#include "matrix/cuda_helper.h"
#include <stdarg.h>
int nerv_error(lua_State *L, const char *err_mesg_fmt, ...) {
va_list ap;
diff --git a/nerv/lib/common.h b/nerv/lib/common.h
index 1c588d1..a4e3582 100644
--- a/nerv/lib/common.h
+++ b/nerv/lib/common.h
@@ -59,6 +59,8 @@ typedef struct Status {
nerv_error_status(L, &status); \
} while (0)
+#define PROFILE_HASHMAP_SIZE 123457
+
typedef struct HashNode {
const char *key;
void *val;
@@ -82,6 +84,8 @@ void hashmap_clear(HashMap *h);
size_t bkdr_hash(const char *key);
+extern const char *nerv_context_tname;
+
int nerv_error(lua_State *L, const char *err_mesg_fmt, ...);
int nerv_error_status(lua_State *L, Status *status);
int nerv_error_method_not_implemented(lua_State *L);
diff --git a/nerv/lib/io/chunk_file.c b/nerv/lib/io/chunk_file.c
index 4e00b0b..79dbee3 100644
--- a/nerv/lib/io/chunk_file.c
+++ b/nerv/lib/io/chunk_file.c
@@ -61,7 +61,7 @@ static const char *read_chunk_metadata(FILE *fp, const char *fn,
NERV_SET_STATUS(status, (fgets(buff + LUA_RETURN_LEN,
LINEBUFF_SIZE, fp) == (buff + LUA_RETURN_LEN) ? \
NERV_NORMAL : CF_INVALID_FORMAT), 0);
- fprintf(stderr, "metadata: %s\n", buff);
+ //fprintf(stderr, "metadata: %s\n", buff);
return buff;
}
@@ -112,7 +112,7 @@ static ChunkFile *open_read(const char *fn, Status *status) {
for (i = 0;; offset += chunk_len, i++)
{
ChunkInfo *cip;
- fprintf(stderr, "reading chunk %d from %d\n", i, (int)offset);
+ //fprintf(stderr, "reading chunk %d from %d\n", i, (int)offset);
/* skip to the begining of chunk i */
if (fseeko(fp, offset, SEEK_SET) != 0)
{
diff --git a/nerv/lib/matrix/cukernel.h b/nerv/lib/matrix/cukernel.h
index 2126c6f..31e199b 100644
--- a/nerv/lib/matrix/cukernel.h
+++ b/nerv/lib/matrix/cukernel.h
@@ -13,7 +13,7 @@ void cudak_(cuda_softmax_final)(const Matrix *a, const Matrix *max, const Matrix
void cudak_(cuda_add_row)(const Matrix *a, Matrix *b, double beta);
void cudak_(cuda_fill)(Matrix *a, double val);
void cudak_(cuda_clip)(Matrix *a, double val_1, double val_2);
-void cudak_(cuda_expand_frm)(const Matrix *a, Matrix *b, int context);
+void cudak_(cuda_expand_frm)(const Matrix *a, Matrix *b, int context, int a_begin, int a_end);
void cudak_(cuda_rearrange_frm)(const Matrix *a, Matrix *b, int step);
void cudak_(cuda_scale_rows_by_row)(const Matrix *a, Matrix *b);
void cudak_(cuda_scale_rows_by_col)(const Matrix *a, Matrix *b);
diff --git a/nerv/lib/matrix/cumatrix.c b/nerv/lib/matrix/cumatrix.c
index ff1168d..c913db2 100644
--- a/nerv/lib/matrix/cumatrix.c
+++ b/nerv/lib/matrix/cumatrix.c
@@ -2,11 +2,13 @@
#include "../common.h"
#include "cuda_helper.h"
#include <string.h>
-#define PROFILE_HASHMAP_SIZE 123457
+
static cublasHandle_t cublas_handle;
static cudaEvent_t profile_start, profile_stop;
static HashMap *profile;
+const char *nerv_context_tname = "nerv.CCuContext";
+
void nerv_cumatrix_print_profile() {
size_t i;
fprintf(stderr, "*** [nerv cumatrix profile] **\n");
@@ -35,6 +37,12 @@ void accu_profile(const char *name, float delta) {
*val += delta;
}
+cublasHandle_t* nerv_get_cublas_handle()
+{
+ return &cublas_handle;
+}
+
+
void nerv_cumatrix_init() {
cublasCreate(&cublas_handle);
cudaEventCreate(&profile_start);
diff --git a/nerv/lib/matrix/cumatrix.h b/nerv/lib/matrix/cumatrix.h
index e6def66..e53c702 100644
--- a/nerv/lib/matrix/cumatrix.h
+++ b/nerv/lib/matrix/cumatrix.h
@@ -4,4 +4,18 @@
void nerv_cumatrix_print_profile();
void nerv_cumatrix_clear_profile();
void nerv_cumatrix_init();
+
+void nerv_set_cublas_handle();
+
+typedef struct CuContext
+{
+ cublasHandle_t cublas_handle;
+ cudaEvent_t profile_start, profile_stop;
+ HashMap *profile;
+ pthread_t pid;
+ int refcount;
+}CuContext;
+
+extern const char *nerv_context_tname;
+
#endif
diff --git a/nerv/lib/matrix/generic/cukernel.cu b/nerv/lib/matrix/generic/cukernel.cu
index 08feb59..707f8fd 100644
--- a/nerv/lib/matrix/generic/cukernel.cu
+++ b/nerv/lib/matrix/generic/cukernel.cu
@@ -229,14 +229,15 @@ __global__ void cudak_(expand_frm)(const MATRIX_ELEM *a, MATRIX_ELEM *b,
int nrow, int ncol,
int enrow, int encol,
int stride, int estride,
- int context) {
+ int context,
+ int a_begin, int a_end) {
int j = blockIdx.x * blockDim.x + threadIdx.x;
int i = blockIdx.y * blockDim.y + threadIdx.y;
int ridx;
if (i >= enrow || j >= encol) return;
ridx = i + j / ncol - context;
- if (ridx < 0) ridx = 0;
- else if (ridx >= nrow) ridx = nrow - 1;
+ if (ridx < a_begin) ridx = a_begin;
+ else if (ridx >= a_end) ridx = a_end - 1;
b[j + i * estride] = a[j % ncol + ridx * stride];
}
@@ -541,7 +542,7 @@ extern "C" {
cudaStreamSynchronize(0);
}
- void cudak_(cuda_expand_frm)(const Matrix *a, Matrix *b, int context) {
+ void cudak_(cuda_expand_frm)(const Matrix *a, Matrix *b, int context, int a_begin, int a_end) {
dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N);
dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x),
CEIL_DIV(b->nrow, threadsPerBlock.y));
@@ -551,7 +552,8 @@ extern "C" {
b->nrow, b->ncol,
a->stride / sizeof(MATRIX_ELEM),
b->stride / sizeof(MATRIX_ELEM),
- context);
+ context,
+ a_begin, a_end);
cudaStreamSynchronize(0);
}
diff --git a/nerv/lib/matrix/generic/cumatrix.c b/nerv/lib/matrix/generic/cumatrix.c
index 770e503..5b11496 100644
--- a/nerv/lib/matrix/generic/cumatrix.c
+++ b/nerv/lib/matrix/generic/cumatrix.c
@@ -13,12 +13,13 @@
void nerv_matrix_(add)(Matrix *c, const Matrix *a, const Matrix *b,
MATRIX_ELEM alpha, MATRIX_ELEM beta,
- Status *status) {
+ cublasHandle_t *handle, Status *status) {
CHECK_SAME_DIMENSION(a, b, status);
CHECK_SAME_DIMENSION(a, c, status);
- PROFILE_START
+ cublasHandle_t *cuhandle = (handle == NULL ? &cublas_handle : handle);
+ PROFILE_START //cublas_handle
CUBLAS_SAFE_SYNC_CALL(
- NERV_CUBLAS_(geam)(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N,
+ NERV_CUBLAS_(geam)(*cuhandle, CUBLAS_OP_N, CUBLAS_OP_N,
a->ncol, a->nrow,
&alpha,
MATRIX_ELEM_PTR(a), a->stride / sizeof(MATRIX_ELEM),
@@ -32,7 +33,7 @@ void nerv_matrix_(add)(Matrix *c, const Matrix *a, const Matrix *b,
void nerv_matrix_(mul)(Matrix *c, const Matrix *a, const Matrix *b,
MATRIX_ELEM alpha, MATRIX_ELEM beta,
- int ta, int tb, Status *status) {
+ int ta, int tb, cublasHandle_t *handle, Status *status) {
#define SWAP(a, b) \
do { int t = (a); (a) = (b); (b) = t; } while (0)
@@ -42,10 +43,11 @@ void nerv_matrix_(mul)(Matrix *c, const Matrix *a, const Matrix *b,
if (tb == CUBLAS_OP_T) SWAP(bm, bn);
if (an != bm)
NERV_EXIT_STATUS(status, MAT_WRONG_MULT_DIM, 0);
+ cublasHandle_t *cuhandle = (handle == NULL ? &cublas_handle : handle);
/* Because matrix in Nerv is row-major, here b comes first */
- PROFILE_START
+ PROFILE_START //cublas_handle
CUBLAS_SAFE_SYNC_CALL(
- NERV_CUBLAS_(gemm)(cublas_handle, tb, ta,
+ NERV_CUBLAS_(gemm)(*cuhandle, tb, ta,
bn, am, bm,
&alpha,
MATRIX_ELEM_PTR(b), b->stride / sizeof(MATRIX_ELEM),
@@ -253,15 +255,16 @@ void nerv_matrix_(copy_toh)(Matrix *a, const Matrix *b,
NERV_SET_STATUS(status, NERV_NORMAL, 0);
}
-Matrix *nerv_matrix_(trans)(Matrix *a, Status *status) {
+Matrix *nerv_matrix_(trans)(Matrix *a, cublasHandle_t *handle, Status *status) {
MATRIX_ELEM alpha = 1, beta = 0;
Matrix *b = nerv_matrix_(create)(a->ncol, a->nrow, status);
if (status->err_code != NERV_NORMAL)
return NULL;
+ cublasHandle_t *cuhandle = (handle == NULL ? &cublas_handle : handle);
/* FIXME: possible memory leak when lua error is raised */
- PROFILE_START
+ PROFILE_START //cublas_handle
CUBLAS_SAFE_SYNC_CALL_RET(
- NERV_CUBLAS_(geam)(cublas_handle, CUBLAS_OP_T, CUBLAS_OP_T,
+ NERV_CUBLAS_(geam)(*cuhandle, CUBLAS_OP_T, CUBLAS_OP_T,
a->nrow, a->ncol,
&alpha,
MATRIX_ELEM_PTR(a), a->stride / sizeof(MATRIX_ELEM),
@@ -360,14 +363,16 @@ void nerv_matrix_(copy_rows_fromd_by_idx)(Matrix *a, const Matrix *b,
}
void nerv_matrix_(expand_frm)(Matrix *a, const Matrix *b,
- int context, Status *status) {
- if (a->nrow != b->nrow)
- NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0);
+ int context, int b_begin, int b_end,
+ Status *status) {
+ if (!(0 <= b_begin && b_begin < b_end && b_end <= b->nrow &&
+ b_end - b_begin == a->nrow))
+ NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0);
if (a->ncol != b->ncol * (context * 2 + 1))
NERV_EXIT_STATUS(status, MAT_GENERAL_ERR,
"the width should be 2 * context + 1");
PROFILE_START
- cudak_(cuda_expand_frm)(b, a, context);
+ cudak_(cuda_expand_frm)(b, a, context, b_begin, b_end);
PROFILE_STOP
NERV_SET_STATUS(status, NERV_NORMAL, 0);
}
diff --git a/nerv/lib/matrix/generic/cumatrix.h b/nerv/lib/matrix/generic/cumatrix.h
index 04e8c5a..f476414 100644
--- a/nerv/lib/matrix/generic/cumatrix.h
+++ b/nerv/lib/matrix/generic/cumatrix.h
@@ -1,11 +1,11 @@
#include "../../common.h"
void nerv_matrix_(add)(Matrix *c, const Matrix *a, const Matrix *b,
- MATRIX_ELEM alpha, MATRIX_ELEM beta,
+ MATRIX_ELEM alpha, MATRIX_ELEM beta, cublasHandle_t *handle,
Status *status);
void nerv_matrix_(mul)(Matrix *c, const Matrix *a, const Matrix *b,
MATRIX_ELEM alpha, MATRIX_ELEM beta,
- int ta, int tb, Status *status);
+ int ta, int tb, cublasHandle_t *handle, Status *status);
void nerv_matrix_(sigmoid)(Matrix *a, const Matrix *b, Status *status);
void nerv_matrix_(sigmoid_grad)(Matrix *nerr, const Matrix *err,
const Matrix *output, Status *status);
@@ -31,7 +31,7 @@ void nerv_matrix_(copy_fromh)(Matrix *a, const Matrix *b,
void nerv_matrix_(copy_toh)(Matrix *a, const Matrix *b,
int a_begin, int a_end, int b_begin,
Status *status);
-Matrix *nerv_matrix_(trans)(Matrix *a, Status *status);
+Matrix *nerv_matrix_(trans)(Matrix *a, cublasHandle_t *handle, Status *status);
void nerv_matrix_(mul_elem)(Matrix *c, const Matrix *a, const Matrix *b,
Status *status);
@@ -44,7 +44,7 @@ void nerv_matrix_(copy_rows_fromd_by_idx)(Matrix *a, const Matrix *b,
const Matrix *idx, int b_begin, Status *status);
void nerv_matrix_(expand_frm)(Matrix *a, const Matrix *b,
- int context, Status *status);
+ int context, int b_begin, int b_end, Status *status);
void nerv_matrix_(rearrange_frm)(Matrix *a, const Matrix *b,
int step, Status *status);
void nerv_matrix_(scale_rows_by_col)(Matrix *a, const Matrix *b,
diff --git a/nerv/lib/matrix/generic/matrix.c b/nerv/lib/matrix/generic/matrix.c
index 6cb3dc0..cac1ee7 100644
--- a/nerv/lib/matrix/generic/matrix.c
+++ b/nerv/lib/matrix/generic/matrix.c
@@ -5,21 +5,29 @@
void nerv_matrix_(data_free)(Matrix *self, Status *status) {
assert(*self->data_ref > 0);
- if (--(*self->data_ref) == 0)
+ //if (--(*self->data_ref) == 0)
+ if (NULL != self && __sync_fetch_and_add(&self->refcount, -1) == 1)
{
- /* free matrix data */
- MATRIX_DATA_FREE(MATRIX_ELEM_PTR(self), status);
- free(self->data_ref);
- free(self);
- }
- else {
- free(self);
- NERV_SET_STATUS(status, NERV_NORMAL, 0);
+ if(__sync_fetch_and_add(self->data_ref, -1) == 1)
+ {
+ /* free matrix data */
+ MATRIX_DATA_FREE(MATRIX_ELEM_PTR(self), status);
+ free(self->data_ref);
+ free(self);
+ self = NULL;
+ }
+ else
+ {
+ free(self);
+ self = NULL;
+ NERV_SET_STATUS(status, NERV_NORMAL, 0);
+ }
}
}
void nerv_matrix_(data_retain)(Matrix *self) {
- (*self->data_ref)++;
+ __sync_fetch_and_add(self->data_ref, 1);
+ //(*self->data_ref)++;
}
Matrix *nerv_matrix_(create)(long nrow, long ncol, Status *status) {
@@ -36,6 +44,7 @@ Matrix *nerv_matrix_(create)(long nrow, long ncol, Status *status) {
free(self);
return NULL;
}
+ self->refcount = 1;
self->data_ref = (long *)malloc(sizeof(long));
*self->data_ref = 0;
nerv_matrix_(data_retain)(self);
@@ -56,6 +65,7 @@ Matrix *nerv_matrix_(getrow)(Matrix *self, int row) {
prow->nmax = prow->ncol;
MATRIX_ELEM_PTR(prow) = MATRIX_ROW_PTR(self, row);
prow->data_ref = self->data_ref;
+ prow->refcount = 1;
nerv_matrix_(data_retain)(prow);
return prow;
}
diff --git a/nerv/lib/matrix/generic/mmatrix.c b/nerv/lib/matrix/generic/mmatrix.c
index 225079e..0850c6e 100644
--- a/nerv/lib/matrix/generic/mmatrix.c
+++ b/nerv/lib/matrix/generic/mmatrix.c
@@ -7,6 +7,7 @@
#define NERV_GENERIC_MATRIX
#include "../../common.h"
#include "../../io/chunk_file.h"
+#include "../cuda_helper.h"
#include "string.h"
static void host_matrix_(free)(MATRIX_ELEM *ptr, Status *status) {
@@ -79,4 +80,47 @@ void nerv_matrix_(copy_from)(Matrix *a, const Matrix *b,
NERV_SET_STATUS(status, NERV_NORMAL, 0);
}
+void nerv_matrix_(expand_frm)(Matrix *a, const Matrix *b,
+ int context, int b_begin, int b_end, Status *status) {
+ if (!(0 <= b_begin && b_begin < b_end && b_end <= b->nrow &&
+ b_end - b_begin == a->nrow))
+ NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0);
+ if (a->ncol != b->ncol * (context * 2 + 1))
+ NERV_EXIT_STATUS(status, MAT_GENERAL_ERR,
+ "the width should be 2 * context + 1");
+ int i, j, r_off;
+ for (i = 0; i < a->nrow; i++)
+ {
+ for (j = 0; j < context * 2 + 1; j++)
+ {
+ r_off = b_begin + i + j - context;
+ if (r_off < b_begin) r_off = b_begin;
+ if (r_off >= b_end) r_off = b_end - 1;
+ memcpy(MATRIX_ROW_PTR(a, i) + j*b->ncol, MATRIX_ROW_PTR(b, r_off), sizeof(MATRIX_ELEM) * b->ncol);
+ }
+ }
+ NERV_SET_STATUS(status, NERV_NORMAL, 0);
+}
+
+void nerv_matrix_(rearrange_frm)(Matrix *a, const Matrix *b,
+ int step, int b_begin, int b_end, Status *status) {
+ //CHECK_SAME_DIMENSION(a, b, status);
+ if (!(0 <= b_begin && b_begin < b_end && b_end <= b->nrow &&
+ b_end - b_begin == a->nrow))
+ NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0);
+ if (b->ncol % step)
+ NERV_EXIT_STATUS(status, MAT_GENERAL_ERR,
+ "the dimension of columns is not divisible by step");
+
+ int i, j;
+ int stride = a->stride / sizeof(MATRIX_ELEM);
+ for (i = 0; i < a->nrow; i++)
+ {
+ for (j = 0; j < a->ncol; j++)
+ MATRIX_ELEM_PTR(a)[j + i * stride] = MATRIX_ELEM_PTR(b)[j / step + (j % step) * (b->ncol/step) + (i+b_begin) * stride];
+ }
+ NERV_SET_STATUS(status, NERV_NORMAL, 0);
+}
+
+
#endif
diff --git a/nerv/lib/matrix/generic/mmatrix.h b/nerv/lib/matrix/generic/mmatrix.h
index f00a04d..eb6c4c7 100644
--- a/nerv/lib/matrix/generic/mmatrix.h
+++ b/nerv/lib/matrix/generic/mmatrix.h
@@ -6,3 +6,9 @@ void nerv_matrix_(save)(Matrix *self, ChunkFile *cfp, Status *status);
void nerv_matrix_(copy_from)(Matrix *a, const Matrix *b,
int a_begin, int b_begin, int b_end,
Status *status);
+
+void nerv_matrix_(expand_frm)(Matrix *a, const Matrix *b,
+ int context, int b_begin, int b_end, Status *status);
+
+void nerv_matrix_(rearrange_frm)(Matrix *a, const Matrix *b,
+ int step, int b_begin, int b_end, Status *status);
diff --git a/nerv/lib/matrix/matrix.h b/nerv/lib/matrix/matrix.h
index 67a6e30..51ca736 100644
--- a/nerv/lib/matrix/matrix.h
+++ b/nerv/lib/matrix/matrix.h
@@ -13,6 +13,7 @@ typedef struct Matrix {
long *i;
} data; /* pointer to actual storage */
long *data_ref;
+ int refcount; /* prevent matrix struct double free */
} Matrix;
#define MATRIX_ROW_PTR(self, row) \
diff --git a/nerv/lib/matrix/mmatrix.c b/nerv/lib/matrix/mmatrix.c
index b8157eb..e54b336 100644
--- a/nerv/lib/matrix/mmatrix.c
+++ b/nerv/lib/matrix/mmatrix.c
@@ -52,3 +52,21 @@ Matrix *nerv_matrix_(perm_gen)(int ncol, Status *status) {
#define host_matrix_(NAME) host_matrix_int_##NAME
#define nerv_matrix_(NAME) nerv_matrix_host_int_##NAME
#include "generic/mmatrix.c"
+
+Matrix *nerv_matrix_(perm_gen)(int ncol, Status *status) {
+ int i;
+ Matrix *self = nerv_matrix_(create)(1, ncol, status);
+ if (status->err_code != NERV_NORMAL)
+ return NULL;
+ long *prow = self->data.i;
+ for (i = 0; i < ncol; i++)
+ prow[i] = i;
+ for (i = ncol - 1; i >= 0; i--)
+ {
+ size_t j = rand() % (i + 1);
+ long tmp = prow[i];
+ prow[i] = prow[j];
+ prow[j] = tmp;
+ }
+ return self;
+}
diff --git a/nerv/matrix/cumatrix.c b/nerv/matrix/cumatrix.c
index fef03fc..08b7efa 100644
--- a/nerv/matrix/cumatrix.c
+++ b/nerv/matrix/cumatrix.c
@@ -1,4 +1,5 @@
#define NERV_GENERIC_CUMATRIX
+#include "../lib/matrix/cuda_helper.h"
#include "../lib/common.h"
#include "../lib/matrix/cumatrix.h"
#include "../lib/matrix/cuda_helper.h"
diff --git a/nerv/matrix/generic/cumatrix.c b/nerv/matrix/generic/cumatrix.c
index 08cb4c2..ce7e68a 100644
--- a/nerv/matrix/generic/cumatrix.c
+++ b/nerv/matrix/generic/cumatrix.c
@@ -6,6 +6,7 @@
#define MATRIX_BASE_TNAME nerv_matrix_cuda_tname
#define NERV_GENERIC_MATRIX
#define NERV_GENERIC_CUKERNEL
+#include "../../lib/matrix/cuda_helper.h"
#include "../../lib/common.h"
#include "../../lib/matrix/generic/matrix.h"
#include "../../lib/matrix/generic/cumatrix.h"
@@ -17,7 +18,9 @@ static int nerv_matrix_(lua_add)(lua_State *L) {
const Matrix *b = lu