diff options
author | Determinant <[email protected]> | 2016-02-15 15:04:13 +0800 |
---|---|---|
committer | Determinant <[email protected]> | 2016-02-15 15:04:13 +0800 |
commit | 3362020a6bc43766a92882abe6d127c8bb98a628 (patch) | |
tree | cad93eb88c2813694c0ae4ca4ecb9873a719ad85 /nerv/lib | |
parent | dcad8a3f80fc55ca93984d981f9b829d2e4ea728 (diff) |
try a basic merge
Diffstat (limited to 'nerv/lib')
-rw-r--r-- | nerv/lib/common.c | 1 | ||||
-rw-r--r-- | nerv/lib/io/chunk_file.c | 3 | ||||
-rw-r--r-- | nerv/lib/matrix/cukernel.h | 2 | ||||
-rw-r--r-- | nerv/lib/matrix/generic/cukernel.cu | 6 | ||||
-rw-r--r-- | nerv/lib/matrix/generic/cumatrix.c | 2 | ||||
-rw-r--r-- | nerv/lib/matrix/generic/mmatrix.c | 505 | ||||
-rw-r--r-- | nerv/lib/matrix/generic/mmatrix.h | 45 | ||||
-rw-r--r-- | nerv/lib/matrix/matrix.h | 2 | ||||
-rw-r--r-- | nerv/lib/matrix/mmatrix.c | 5 |
9 files changed, 560 insertions, 11 deletions
diff --git a/nerv/lib/common.c b/nerv/lib/common.c index db667b2..d977f8d 100644 --- a/nerv/lib/common.c +++ b/nerv/lib/common.c @@ -41,6 +41,7 @@ int nerv_error_status(lua_State *L, Status *status) { status->file, status->lineno); else nerv_error(L, "%s @%s:%d", mmesg, status->file, status->lineno); + return 0; } int nerv_error_method_not_implemented(lua_State *L) { diff --git a/nerv/lib/io/chunk_file.c b/nerv/lib/io/chunk_file.c index 71db820..5c50ccd 100644 --- a/nerv/lib/io/chunk_file.c +++ b/nerv/lib/io/chunk_file.c @@ -2,6 +2,7 @@ #include "chunk_file.h" #include <stdlib.h> #include <string.h> +#include <ctype.h> #define PARAM_HEADER_SIZE 16 static size_t read_chunk_header_plain(FILE *fp, Status *status) { @@ -112,7 +113,7 @@ static ChunkFile *open_read(const char *fn, Status *status) { for (i = 0;; offset += chunk_len, i++) { ChunkInfo *cip; - fprintf(stdout, "reading chunk %d from %d\n", i, (int)offset); + fprintf(stderr, "reading chunk %d from %d\n", i, (int)offset); /* skip to the begining of chunk i */ if (fseeko(fp, offset, SEEK_SET) != 0) { diff --git a/nerv/lib/matrix/cukernel.h b/nerv/lib/matrix/cukernel.h index fe682d3..39d42eb 100644 --- a/nerv/lib/matrix/cukernel.h +++ b/nerv/lib/matrix/cukernel.h @@ -3,7 +3,7 @@ void cudak_(cuda_mul_elem)(const Matrix *a, const Matrix *b, Matrix *c); void cudak_(cuda_log_elem)(const Matrix *a, Matrix *b); void cudak_(cuda_sigmoid)(const Matrix *a, Matrix *b); void cudak_(cuda_sigmoid_grad)(const Matrix *output, const Matrix *err, Matrix *nerr); -void cudak_(cuda_rand_uniform)(const Matrix *a); //a's curand_gen may be modified +void cudak_(cuda_rand_uniform)(const Matrix *a); /* a's curand_gen may be modified */ void cudak_(cuda_thres_mask)(const Matrix *a, const Matrix *b, double thres, double low, double high); void cudak_(cuda_tanh)(const Matrix *a, Matrix *b); void cudak_(cuda_tanh_grad)(const Matrix *output, const Matrix *err, Matrix *nerr); diff --git a/nerv/lib/matrix/generic/cukernel.cu b/nerv/lib/matrix/generic/cukernel.cu index 2b696d5..7f780a8 100644 --- a/nerv/lib/matrix/generic/cukernel.cu +++ b/nerv/lib/matrix/generic/cukernel.cu @@ -356,11 +356,17 @@ __global__ void cudak_(copy_rows_by_idx)(const MATRIX_ELEM *a, MATRIX_ELEM *b, int j = blockIdx.x * blockDim.x + threadIdx.x; int i = blockIdx.y * blockDim.y + threadIdx.y; if (i >= nrow || j >= ncol) return; + /* int k = lrintf(idx[i]); if (k < 0 || k >= a_nrow) { printf("error in kernel copy_rows_by_idx k(%d) out of range\n", k); } b[j + i * stride] = a[j + k * stride]; + */ + /* NOTE: in most cases it is guaranteed + * the idx is within the range, checking + * would bring some overhead. */ + b[j + i * stride] = a[j + lrintf(idx[i]) * stride]; } __global__ void cudak_(copy_rows_by_colidx)(const MATRIX_ELEM *a, MATRIX_ELEM *b, diff --git a/nerv/lib/matrix/generic/cumatrix.c b/nerv/lib/matrix/generic/cumatrix.c index 7582725..bf93b77 100644 --- a/nerv/lib/matrix/generic/cumatrix.c +++ b/nerv/lib/matrix/generic/cumatrix.c @@ -41,7 +41,7 @@ void nerv_matrix_(mul)(Matrix *c, const Matrix *a, const Matrix *b, int bm = b->nrow, bn = b->ncol; if (ta == CUBLAS_OP_T) SWAP(am, an); if (tb == CUBLAS_OP_T) SWAP(bm, bn); - if (an != bm) + if (an != bm || (am != c->nrow && bn != c->ncol)) NERV_EXIT_STATUS(status, MAT_WRONG_MULT_DIM, 0); /* Because matrix in Nerv is row-major, here b comes first */ PROFILE_START diff --git a/nerv/lib/matrix/generic/mmatrix.c b/nerv/lib/matrix/generic/mmatrix.c index 225079e..fa1dc5f 100644 --- a/nerv/lib/matrix/generic/mmatrix.c +++ b/nerv/lib/matrix/generic/mmatrix.c @@ -5,9 +5,477 @@ #define MATRIX_DATA_ALLOC(dptr, stride, width, height, status) \ host_matrix_(alloc)(dptr, stride, width, height, status) #define NERV_GENERIC_MATRIX +#include "../cuda_helper.h" #include "../../common.h" #include "../../io/chunk_file.h" -#include "string.h" +#include <string.h> +#include <cblas.h> +#include <float.h> + +Matrix *nerv_matrix_(colsum)(Matrix *a, Status *status) { + Matrix *b = nerv_matrix_(create)(1, a->ncol, status); + if (status->err_code != NERV_NORMAL) + return NULL; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + int i, j; + size_t astride = a->stride; + memset(brow, 0, sizeof(MATRIX_ELEM) * b->ncol); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + brow[j] += arow[j]; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return b; +} + +Matrix *nerv_matrix_(colsame)(Matrix *a, const Matrix *ref, Status *status) { + Matrix *b = nerv_matrix_(create)(1, a->ncol, status); + if (status->err_code != NERV_NORMAL) + return NULL; + CHECK_SAME_DIMENSION_RET(a, ref, status); + int i, j; + size_t astride = a->stride, cstride = ref->stride; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + const MATRIX_ELEM *crow = MATRIX_ELEM_PTR(ref); + memset(brow, 0, sizeof(MATRIX_ELEM) * b->ncol); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + { + brow[j] += (int)(arow[j] == crow[j]); + } + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + crow = MATRIX_NEXT_ROW_PTR(crow, cstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return b; +} + +Matrix *nerv_matrix_(rowsum)(Matrix *a, Status *status) { + Matrix *b = nerv_matrix_(create)(a->nrow, 1, status); + if (status->err_code != NERV_NORMAL) + return NULL; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + int i, j; + size_t astride = a->stride, bstride = b->stride; + memset(brow, 0, b->stride * b->nrow); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + brow[0] += arow[j]; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return b; +} + +Matrix *nerv_matrix_(rowmax)(Matrix *a, Status *status) { + Matrix *b = nerv_matrix_(create)(a->nrow, 1, status); + if (status->err_code != NERV_NORMAL) + return NULL; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + int i, j; + size_t astride = a->stride, bstride = b->stride; + for (i = 0; i < a->nrow; i++) + { + brow[0] = arow[0]; + for (j = 1; j < a->ncol; j++) + if (arow[j] > brow[0]) + brow[0] = arow[j]; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return b; +} + +void nerv_matrix_(rowmax_idx)(Matrix *a, Matrix **b, Matrix **idx, Status *status) { + *b = nerv_matrix_(create)(a->nrow, 1, status); + if (status->err_code != NERV_NORMAL) + return; + *idx = nerv_matrix_(create)(a->nrow, 1, status); + if (status->err_code != NERV_NORMAL) + { + /* FIXME: destroy may also fail! */ + nerv_matrix_(destroy)(*b, status); + return; + } + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(*b), + *crow = MATRIX_ELEM_PTR(*idx); + int i, j; + size_t astride = a->stride, + bstride = (*b)->stride, + cstride = (*idx)->stride; + for (i = 0; i < a->nrow; i++) + { + brow[0] = arow[(int)(crow[0] = 0)]; + for (j = 1; j < a->ncol; j++) + if (arow[j] > brow[0]) + brow[0] = arow[(int)(crow[0] = j)]; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + crow = MATRIX_NEXT_ROW_PTR(crow, cstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +Matrix *nerv_matrix_(trans)(Matrix *a, Status *status) { + Matrix *b = nerv_matrix_(create)(a->ncol, a->nrow, status); + if (status->err_code != NERV_NORMAL) + return NULL; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + int i, j; + size_t astride = a->stride, bstride = b->stride; + for (i = 0; i < a->nrow; i++) + { + MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b) + i; + for (j = 0; j < a->ncol; j++) + { + brow[0] = arow[j]; + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return b; +} + +Matrix *nerv_matrix_(decompress)(const Matrix *a, int orig_col, Status *status) { + Matrix *b; + if (a->ncol != 1) + { + NERV_SET_STATUS(status, MAT_COL_VECTOR_EXP, 0); + return NULL; + } + b = nerv_matrix_(create)(a->nrow, orig_col, status); + if (status->err_code != NERV_NORMAL) + return NULL; + int i; + size_t astride = a->stride, bstride = b->stride; + const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b); + memset(brow, 0, b->stride * b->nrow); + for (i = 0; i < a->nrow; i++) + { + brow[lrintf(arow[0])] = 1; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return b; +} + +void nerv_matrix_(add)(Matrix *c, const Matrix *a, const Matrix *b, MATRIX_ELEM alpha, MATRIX_ELEM beta, Status *status) { + CHECK_SAME_DIMENSION(a, b, status); + CHECK_SAME_DIMENSION(a, c, status); + int i, j; + size_t astride = a->stride, + bstride = b->stride, + cstride = c->stride; + const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + MATRIX_ELEM *crow = MATRIX_ELEM_PTR(c); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + crow[j] = arow[j] * alpha + brow[j] * beta; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + crow = MATRIX_NEXT_ROW_PTR(crow, cstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + + +void nerv_matrix_(mul)(Matrix *c, const Matrix *a, const Matrix *b, + MATRIX_ELEM alpha, MATRIX_ELEM beta, + int ta, int tb, Status *status) { +#define SWAP(a, b) \ + do { int t = (a); (a) = (b); (b) = t; } while (0) + + int am = a->nrow, an = a->ncol; + int bm = b->nrow, bn = b->ncol; + if (ta == CblasTrans) SWAP(am, an); + if (tb == CblasTrans) SWAP(bm, bn); + if (an != bm || (am != c->nrow && bn != c->ncol)) + NERV_EXIT_STATUS(status, MAT_WRONG_MULT_DIM, 0); + /* Because matrix in Nerv is row-major, here b comes first */ + NERV_CBLAS_(gemm)(CblasRowMajor, ta, tb, am, bn, an, + alpha, + MATRIX_ELEM_PTR(a), a->stride / sizeof(MATRIX_ELEM), + MATRIX_ELEM_PTR(b), b->stride / sizeof(MATRIX_ELEM), + beta, + MATRIX_ELEM_PTR(c), c->stride / sizeof(MATRIX_ELEM)); + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(add_row)(Matrix *b, const Matrix *a, double beta, + Status *status) { + if (a->ncol != b->ncol) + NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); + if (a->nrow != 1) + NERV_EXIT_STATUS(status, MAT_ROW_VECTOR_EXP, 0); + const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b); + int i, j; + size_t bstride = b->stride; + for (i = 0; i < b->nrow; i++) + { + for (j = 0; j < b->ncol; j++) + brow[j] += arow[j] * beta; + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(clip)(Matrix *self, double val_1, double val_2, Status *status) { + int i, j; + size_t astride = self->stride; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(self); + for (i = 0; i < self->nrow; i++) + { + for (j = 0; j < self->ncol; j++) + if (arow[j] > val_2) + arow[j] = val_2; + else if (arow[j] < val_1) + arow[j] = val_1; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(fill)(Matrix *self, double val, Status *status) { + int i, j; + size_t astride = self->stride; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(self); + for (i = 0; i < self->nrow; i++) + { + for (j = 0; j < self->ncol; j++) + arow[j] = val; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(sigmoid)(Matrix *a, const Matrix *b, Status *status) { + CHECK_SAME_DIMENSION(a, b, status); + int i, j; + size_t astride = a->stride, bstride = b->stride; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + const MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + arow[j] = 1.0 / (1.0 + exp(-brow[j])); + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(sigmoid_grad)(Matrix *nerr, const Matrix *err, + const Matrix *output, Status *status) { + CHECK_SAME_DIMENSION(nerr, err, status); + CHECK_SAME_DIMENSION(nerr, output, status); + int i, j; + size_t nerr_stride = nerr->stride, + err_stride = err->stride, + out_stride = output->stride; + MATRIX_ELEM *nerr_row = MATRIX_ELEM_PTR(nerr); + const MATRIX_ELEM *err_row = MATRIX_ELEM_PTR(err), + *out_row = MATRIX_ELEM_PTR(output); + for (i = 0; i < nerr->nrow; i++) + { + for (j = 0; j < nerr->ncol; j++) + nerr_row[j] = out_row[j] * (1.0 - out_row[j]) * err_row[j]; + nerr_row = MATRIX_NEXT_ROW_PTR(nerr_row, nerr_stride); + err_row = MATRIX_NEXT_ROW_PTR(err_row, err_stride); + out_row = MATRIX_NEXT_ROW_PTR(out_row, out_stride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +Matrix *nerv_matrix_(softmax)(Matrix *b, const Matrix *a, Status *status) { + Matrix *max_idx; + CHECK_SAME_DIMENSION_RET(a, b, status); + max_idx = nerv_matrix_(create)(a->nrow, 1, status); + if (status->err_code != NERV_NORMAL) + return NULL; + int i, j; + size_t astride = a->stride, + bstride = b->stride, + cstride = max_idx->stride; + const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b), + *crow = MATRIX_ELEM_PTR(max_idx); + for (i = 0; i < b->nrow; i++) + { + MATRIX_ELEM max = arow[(int)(crow[0] = 0)]; + for (j = 1; j < b->ncol; j++) + if (arow[j] > max) + max = arow[(int)(crow[0] = j)]; + + MATRIX_ELEM sum = 0; + for (j = 0; j < b->ncol; j++) + sum += exp(arow[j] - max); + for (j = 0; j < b->ncol; j++) + brow[j] = exp(arow[j] - max) / sum; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + crow = MATRIX_NEXT_ROW_PTR(crow, cstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return max_idx; +} + +void nerv_matrix_(mul_elem)(Matrix *c, const Matrix *a, const Matrix *b, + Status *status) { + CHECK_SAME_DIMENSION(a, b, status); + CHECK_SAME_DIMENSION(a, c, status); + int i, j; + size_t astride = a->stride, + bstride = b->stride, + cstride = c->stride; + const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + MATRIX_ELEM *crow = MATRIX_ELEM_PTR(c); + for (i = 0; i < c->nrow; i++) + { + for (j = 0; j < c->ncol; j++) + crow[j] = arow[j] * brow[j]; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + crow = MATRIX_NEXT_ROW_PTR(crow, cstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(log_elem)(Matrix *b, const Matrix *a, Status *status) { + CHECK_SAME_DIMENSION(a, b, status); + int i, j; + size_t astride = a->stride, bstride = b->stride; + const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b); + MATRIX_ELEM limit = +#ifdef MATRIX_USE_FLOAT + FLT_MIN; +#elif defined (MATRIX_USE_DOUBLE) + DBL_MIN; +#elif defined (MATRIX_USE_INT) + 1; +#endif + for (i = 0; i < b->nrow; i++) + { + for (j = 0; j < b->ncol; j++) + brow[j] = log(arow[j] < limit ? limit : arow[j]); + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(expand_frm)(Matrix *a, const Matrix *b, + int context, Status *status) { + if (a->nrow != b->nrow) + NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); + if (a->ncol != b->ncol * (context * 2 + 1)) + NERV_EXIT_STATUS(status, MAT_GENERAL_ERR, + "the width should be 2 * context + 1"); + int i, j, k; + size_t astride = a->stride, bstride = b->stride; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + for (i = 0; i < a->nrow; i++) + { + MATRIX_ELEM *a_subrow = arow; + int start = i - context; + if (start < 0) start = 0; + const MATRIX_ELEM *brow = MATRIX_ROW_PTR(b, start); + for (j = i - context; j <= i + context; j++) + { + for (k = 0; k < b->ncol; k++) + a_subrow[k] = brow[k]; + if (0 <= j && j < b->nrow - 1) + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + a_subrow += b->ncol; + } + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(rearrange_frm)(Matrix *a, const Matrix *b, + int step, Status *status) { + CHECK_SAME_DIMENSION(a, b, status); + if (b->ncol % step) + NERV_EXIT_STATUS(status, MAT_GENERAL_ERR, + "the dimension of columns is not divisible by step"); + int i, j, k; + size_t astride = a->stride, bstride = b->stride; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + const MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b); + int gsize = b->ncol / step; /* group size of each step */ + for (i = 0; i < a->nrow; i++) + { + MATRIX_ELEM *a_subrow = arow; + for (k = 0; k < gsize; k++) + { + const MATRIX_ELEM *b_subrow = brow + k; + for (j = 0; j < step; + j++, a_subrow++, b_subrow += gsize) + *a_subrow = *b_subrow; + } + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(scale_rows_by_row)(Matrix *a, const Matrix *b, + Status *status) { + if (a->ncol != b->ncol) + NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); + if (b->nrow != 1) + NERV_EXIT_STATUS(status, MAT_ROW_VECTOR_EXP, 0); + int i, j; + size_t astride = a->stride; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + const MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + arow[j] *= brow[j]; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(scale_rows_by_col)(Matrix *a, const Matrix *b, + Status *status) { + if (a->nrow != b->nrow) + NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); + if (b->ncol != 1) + NERV_EXIT_STATUS(status, MAT_COL_VECTOR_EXP, 0); + int i, j; + size_t astride = a->stride, bstride = b->stride; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + arow[j] *= brow[0]; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} static void host_matrix_(free)(MATRIX_ELEM *ptr, Status *status) { free(ptr); @@ -29,7 +497,10 @@ Matrix *nerv_matrix_(load)(ChunkData *cdp, Status *status) { FILE *fp = cdp->fp; Matrix *self; if (fscanf(fp, "%ld %ld", &nrow, &ncol) != 2) - NERV_EXIT_STATUS(status, MAT_INVALID_FORMAT, 0); + { + NERV_SET_STATUS(status, MAT_INVALID_FORMAT, 0); + return 0; + } self = nerv_matrix_(create)(nrow, ncol, status); if (status->err_code != NERV_NORMAL) return NULL; @@ -40,7 +511,8 @@ Matrix *nerv_matrix_(load)(ChunkData *cdp, Status *status) { if (fscanf(fp, MATRIX_ELEM_FMT, row + j) != 1) { free(self); - NERV_EXIT_STATUS(status, MAT_INVALID_FORMAT, 0); + NERV_SET_STATUS(status, MAT_INVALID_FORMAT, 0); + return 0; } } NERV_SET_STATUS(status, NERV_NORMAL, 0); @@ -65,7 +537,8 @@ void nerv_matrix_(save)(Matrix *self, ChunkFile *cfp, Status *status) { NERV_SET_STATUS(status, NERV_NORMAL, 0); } -void nerv_matrix_(copy_from)(Matrix *a, const Matrix *b, + +void nerv_matrix_(copy_fromh)(Matrix *a, const Matrix *b, int a_begin, int b_begin, int b_end, Status *status) { if (!(0 <= b_begin && b_begin < b_end && b_end <= b->nrow && @@ -75,7 +548,29 @@ void nerv_matrix_(copy_from)(Matrix *a, const Matrix *b, NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); memmove(MATRIX_ROW_PTR(a, a_begin), MATRIX_ROW_PTR(b, b_begin), - sizeof(MATRIX_ELEM) * b->ncol * (b_end - b_begin)); + b->stride * (b_end - b_begin)); + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b, + const Matrix *idx, int b_begin, Status *status) { + if (!(0 <= b_begin && b_begin + a->nrow <= idx->ncol)) + NERV_EXIT_STATUS(status, MAT_INVALID_COPY_INTERVAL, 0); + if (idx->nrow != 1) + NERV_EXIT_STATUS(status, MAT_IDX_VECTOR_EXP, 0); + if (a->ncol != b->ncol) + NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); + int i; + MATRIX_ELEM *idx_row = MATRIX_ELEM_PTR(idx) + b_begin; + for (i = 0; i < a->nrow; i++) + { + int src_idx = lrintf(idx_row[i]); + /* assume the index is valid to improve performance + if (!(0 <= src_idx && src_idx < b->nrow)) + NERV_EXIT_STATUS(status, MAT_INVALID_IDX, 0); + */ + memmove(MATRIX_ROW_PTR(a, i), MATRIX_ROW_PTR(b, src_idx), b->stride); + } NERV_SET_STATUS(status, NERV_NORMAL, 0); } diff --git a/nerv/lib/matrix/generic/mmatrix.h b/nerv/lib/matrix/generic/mmatrix.h index f00a04d..c54c4e5 100644 --- a/nerv/lib/matrix/generic/mmatrix.h +++ b/nerv/lib/matrix/generic/mmatrix.h @@ -1,8 +1,47 @@ #include "../../common.h" #include "../../io/chunk_file.h" -Matrix *nerv_matrix_(load)(ChunkData *cdp, Status *status); -void nerv_matrix_(save)(Matrix *self, ChunkFile *cfp, Status *status); -void nerv_matrix_(copy_from)(Matrix *a, const Matrix *b, +void nerv_matrix_(add)(Matrix *c, const Matrix *a, const Matrix *b, + MATRIX_ELEM alpha, MATRIX_ELEM beta, + Status *status); +void nerv_matrix_(mul)(Matrix *c, const Matrix *a, const Matrix *b, + MATRIX_ELEM alpha, MATRIX_ELEM beta, + int ta, int tb, Status *status); +void nerv_matrix_(sigmoid)(Matrix *a, const Matrix *b, Status *status); +void nerv_matrix_(sigmoid_grad)(Matrix *nerr, const Matrix *err, + const Matrix *output, Status *status); + +Matrix *nerv_matrix_(softmax)(Matrix *b, const Matrix *a, Status *status); +Matrix *nerv_matrix_(rowsum)(Matrix *a, Status *status); +Matrix *nerv_matrix_(colsum)(Matrix *a, Status *status); +Matrix *nerv_matrix_(colsame)(Matrix *a, const Matrix *ref, + Status *status); +Matrix *nerv_matrix_(rowmax)(Matrix *a, Status *status); +void nerv_matrix_(rowmax_idx)(Matrix *a, Matrix **b, Matrix **idx, + Status *status); +void nerv_matrix_(add_row)(Matrix *b, const Matrix *a, double beta, + Status *status); +void nerv_matrix_(clip)(Matrix *self, double val_1, double val_2, Status *status); +void nerv_matrix_(fill)(Matrix *self, double val, Status *status); +void nerv_matrix_(copy_fromh)(Matrix *a, const Matrix *b, int a_begin, int b_begin, int b_end, Status *status); +Matrix *nerv_matrix_(trans)(Matrix *a, Status *status); +void nerv_matrix_(mul_elem)(Matrix *c, const Matrix *a, const Matrix *b, + Status *status); + +void nerv_matrix_(log_elem)(Matrix *b, const Matrix *a, Status *status); + +Matrix *nerv_matrix_(decompress)(const Matrix *a, int orig_col, Status *status); +void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b, + const Matrix *idx, int b_begin, Status *status); +void nerv_matrix_(expand_frm)(Matrix *a, const Matrix *b, + int context, Status *status); +void nerv_matrix_(rearrange_frm)(Matrix *a, const Matrix *b, + int step, Status *status); +void nerv_matrix_(scale_rows_by_col)(Matrix *a, const Matrix *b, + Status *status); +void nerv_matrix_(scale_rows_by_row)(Matrix *a, const Matrix *b, + Status *status); +Matrix *nerv_matrix_(load)(ChunkData *cdp, Status *status); +void nerv_matrix_(save)(Matrix *self, ChunkFile *cfp, Status *status); diff --git a/nerv/lib/matrix/matrix.h b/nerv/lib/matrix/matrix.h index a28fd97..2562772 100644 --- a/nerv/lib/matrix/matrix.h +++ b/nerv/lib/matrix/matrix.h @@ -20,4 +20,6 @@ typedef struct Matrix { #define MATRIX_ROW_PTR(self, row) \ (MATRIX_ELEM *)((char *)MATRIX_ELEM_PTR(self) + (row) * (self)->stride) +#define MATRIX_NEXT_ROW_PTR(self, stride) \ + (MATRIX_ELEM *)((char *)self + stride) #endif diff --git a/nerv/lib/matrix/mmatrix.c b/nerv/lib/matrix/mmatrix.c index b5670f2..3125ab6 100644 --- a/nerv/lib/matrix/mmatrix.c +++ b/nerv/lib/matrix/mmatrix.c @@ -5,6 +5,7 @@ #define MATRIX_USE_FLOAT #define host_matrix_(NAME) host_matrix_float_##NAME #define nerv_matrix_(NAME) nerv_matrix_host_float_##NAME +#define NERV_CBLAS_(NAME) cblas_s##NAME #include "generic/matrix.h" #include "generic/elem_type.h" #include "generic/mmatrix.c" @@ -29,6 +30,7 @@ Matrix *nerv_matrix_(perm_gen)(int ncol, Status *status) { #undef nerv_matrix_ #undef host_matrix_ +#undef NERV_CBLAS_ #undef MATRIX_USE_FLOAT #undef MATRIX_ELEM #undef MATRIX_ELEM_PTR @@ -40,9 +42,11 @@ Matrix *nerv_matrix_(perm_gen)(int ncol, Status *status) { #define MATRIX_USE_DOUBLE #define host_matrix_(NAME) host_matrix_double_##NAME #define nerv_matrix_(NAME) nerv_matrix_host_double_##NAME +#define NERV_CBLAS_(NAME) cblas_d##NAME #include "generic/mmatrix.c" #undef nerv_matrix_ #undef host_matrix_ +#undef NERV_CBLAS_ #undef MATRIX_USE_DOUBLE #undef MATRIX_ELEM #undef MATRIX_ELEM_PTR @@ -54,4 +58,5 @@ Matrix *nerv_matrix_(perm_gen)(int ncol, Status *status) { #define MATRIX_USE_INT #define host_matrix_(NAME) host_matrix_int_##NAME #define nerv_matrix_(NAME) nerv_matrix_host_int_##NAME +#define NERV_CBLAS_(NAME) (void) #include "generic/mmatrix.c" |