diff options
Diffstat (limited to 'nerv/lib/matrix/generic/mmatrix.c')
-rw-r--r-- | nerv/lib/matrix/generic/mmatrix.c | 505 |
1 files changed, 500 insertions, 5 deletions
diff --git a/nerv/lib/matrix/generic/mmatrix.c b/nerv/lib/matrix/generic/mmatrix.c index 225079e..fa1dc5f 100644 --- a/nerv/lib/matrix/generic/mmatrix.c +++ b/nerv/lib/matrix/generic/mmatrix.c @@ -5,9 +5,477 @@ #define MATRIX_DATA_ALLOC(dptr, stride, width, height, status) \ host_matrix_(alloc)(dptr, stride, width, height, status) #define NERV_GENERIC_MATRIX +#include "../cuda_helper.h" #include "../../common.h" #include "../../io/chunk_file.h" -#include "string.h" +#include <string.h> +#include <cblas.h> +#include <float.h> + +Matrix *nerv_matrix_(colsum)(Matrix *a, Status *status) { + Matrix *b = nerv_matrix_(create)(1, a->ncol, status); + if (status->err_code != NERV_NORMAL) + return NULL; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + int i, j; + size_t astride = a->stride; + memset(brow, 0, sizeof(MATRIX_ELEM) * b->ncol); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + brow[j] += arow[j]; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return b; +} + +Matrix *nerv_matrix_(colsame)(Matrix *a, const Matrix *ref, Status *status) { + Matrix *b = nerv_matrix_(create)(1, a->ncol, status); + if (status->err_code != NERV_NORMAL) + return NULL; + CHECK_SAME_DIMENSION_RET(a, ref, status); + int i, j; + size_t astride = a->stride, cstride = ref->stride; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + const MATRIX_ELEM *crow = MATRIX_ELEM_PTR(ref); + memset(brow, 0, sizeof(MATRIX_ELEM) * b->ncol); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + { + brow[j] += (int)(arow[j] == crow[j]); + } + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + crow = MATRIX_NEXT_ROW_PTR(crow, cstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return b; +} + +Matrix *nerv_matrix_(rowsum)(Matrix *a, Status *status) { + Matrix *b = nerv_matrix_(create)(a->nrow, 1, status); + if (status->err_code != NERV_NORMAL) + return NULL; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + int i, j; + size_t astride = a->stride, bstride = b->stride; + memset(brow, 0, b->stride * b->nrow); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + brow[0] += arow[j]; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return b; +} + +Matrix *nerv_matrix_(rowmax)(Matrix *a, Status *status) { + Matrix *b = nerv_matrix_(create)(a->nrow, 1, status); + if (status->err_code != NERV_NORMAL) + return NULL; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + int i, j; + size_t astride = a->stride, bstride = b->stride; + for (i = 0; i < a->nrow; i++) + { + brow[0] = arow[0]; + for (j = 1; j < a->ncol; j++) + if (arow[j] > brow[0]) + brow[0] = arow[j]; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return b; +} + +void nerv_matrix_(rowmax_idx)(Matrix *a, Matrix **b, Matrix **idx, Status *status) { + *b = nerv_matrix_(create)(a->nrow, 1, status); + if (status->err_code != NERV_NORMAL) + return; + *idx = nerv_matrix_(create)(a->nrow, 1, status); + if (status->err_code != NERV_NORMAL) + { + /* FIXME: destroy may also fail! */ + nerv_matrix_(destroy)(*b, status); + return; + } + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(*b), + *crow = MATRIX_ELEM_PTR(*idx); + int i, j; + size_t astride = a->stride, + bstride = (*b)->stride, + cstride = (*idx)->stride; + for (i = 0; i < a->nrow; i++) + { + brow[0] = arow[(int)(crow[0] = 0)]; + for (j = 1; j < a->ncol; j++) + if (arow[j] > brow[0]) + brow[0] = arow[(int)(crow[0] = j)]; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + crow = MATRIX_NEXT_ROW_PTR(crow, cstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +Matrix *nerv_matrix_(trans)(Matrix *a, Status *status) { + Matrix *b = nerv_matrix_(create)(a->ncol, a->nrow, status); + if (status->err_code != NERV_NORMAL) + return NULL; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + int i, j; + size_t astride = a->stride, bstride = b->stride; + for (i = 0; i < a->nrow; i++) + { + MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b) + i; + for (j = 0; j < a->ncol; j++) + { + brow[0] = arow[j]; + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return b; +} + +Matrix *nerv_matrix_(decompress)(const Matrix *a, int orig_col, Status *status) { + Matrix *b; + if (a->ncol != 1) + { + NERV_SET_STATUS(status, MAT_COL_VECTOR_EXP, 0); + return NULL; + } + b = nerv_matrix_(create)(a->nrow, orig_col, status); + if (status->err_code != NERV_NORMAL) + return NULL; + int i; + size_t astride = a->stride, bstride = b->stride; + const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b); + memset(brow, 0, b->stride * b->nrow); + for (i = 0; i < a->nrow; i++) + { + brow[lrintf(arow[0])] = 1; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return b; +} + +void nerv_matrix_(add)(Matrix *c, const Matrix *a, const Matrix *b, MATRIX_ELEM alpha, MATRIX_ELEM beta, Status *status) { + CHECK_SAME_DIMENSION(a, b, status); + CHECK_SAME_DIMENSION(a, c, status); + int i, j; + size_t astride = a->stride, + bstride = b->stride, + cstride = c->stride; + const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + MATRIX_ELEM *crow = MATRIX_ELEM_PTR(c); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + crow[j] = arow[j] * alpha + brow[j] * beta; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + crow = MATRIX_NEXT_ROW_PTR(crow, cstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + + +void nerv_matrix_(mul)(Matrix *c, const Matrix *a, const Matrix *b, + MATRIX_ELEM alpha, MATRIX_ELEM beta, + int ta, int tb, Status *status) { +#define SWAP(a, b) \ + do { int t = (a); (a) = (b); (b) = t; } while (0) + + int am = a->nrow, an = a->ncol; + int bm = b->nrow, bn = b->ncol; + if (ta == CblasTrans) SWAP(am, an); + if (tb == CblasTrans) SWAP(bm, bn); + if (an != bm || (am != c->nrow && bn != c->ncol)) + NERV_EXIT_STATUS(status, MAT_WRONG_MULT_DIM, 0); + /* Because matrix in Nerv is row-major, here b comes first */ + NERV_CBLAS_(gemm)(CblasRowMajor, ta, tb, am, bn, an, + alpha, + MATRIX_ELEM_PTR(a), a->stride / sizeof(MATRIX_ELEM), + MATRIX_ELEM_PTR(b), b->stride / sizeof(MATRIX_ELEM), + beta, + MATRIX_ELEM_PTR(c), c->stride / sizeof(MATRIX_ELEM)); + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(add_row)(Matrix *b, const Matrix *a, double beta, + Status *status) { + if (a->ncol != b->ncol) + NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); + if (a->nrow != 1) + NERV_EXIT_STATUS(status, MAT_ROW_VECTOR_EXP, 0); + const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b); + int i, j; + size_t bstride = b->stride; + for (i = 0; i < b->nrow; i++) + { + for (j = 0; j < b->ncol; j++) + brow[j] += arow[j] * beta; + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(clip)(Matrix *self, double val_1, double val_2, Status *status) { + int i, j; + size_t astride = self->stride; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(self); + for (i = 0; i < self->nrow; i++) + { + for (j = 0; j < self->ncol; j++) + if (arow[j] > val_2) + arow[j] = val_2; + else if (arow[j] < val_1) + arow[j] = val_1; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(fill)(Matrix *self, double val, Status *status) { + int i, j; + size_t astride = self->stride; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(self); + for (i = 0; i < self->nrow; i++) + { + for (j = 0; j < self->ncol; j++) + arow[j] = val; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(sigmoid)(Matrix *a, const Matrix *b, Status *status) { + CHECK_SAME_DIMENSION(a, b, status); + int i, j; + size_t astride = a->stride, bstride = b->stride; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + const MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + arow[j] = 1.0 / (1.0 + exp(-brow[j])); + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(sigmoid_grad)(Matrix *nerr, const Matrix *err, + const Matrix *output, Status *status) { + CHECK_SAME_DIMENSION(nerr, err, status); + CHECK_SAME_DIMENSION(nerr, output, status); + int i, j; + size_t nerr_stride = nerr->stride, + err_stride = err->stride, + out_stride = output->stride; + MATRIX_ELEM *nerr_row = MATRIX_ELEM_PTR(nerr); + const MATRIX_ELEM *err_row = MATRIX_ELEM_PTR(err), + *out_row = MATRIX_ELEM_PTR(output); + for (i = 0; i < nerr->nrow; i++) + { + for (j = 0; j < nerr->ncol; j++) + nerr_row[j] = out_row[j] * (1.0 - out_row[j]) * err_row[j]; + nerr_row = MATRIX_NEXT_ROW_PTR(nerr_row, nerr_stride); + err_row = MATRIX_NEXT_ROW_PTR(err_row, err_stride); + out_row = MATRIX_NEXT_ROW_PTR(out_row, out_stride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +Matrix *nerv_matrix_(softmax)(Matrix *b, const Matrix *a, Status *status) { + Matrix *max_idx; + CHECK_SAME_DIMENSION_RET(a, b, status); + max_idx = nerv_matrix_(create)(a->nrow, 1, status); + if (status->err_code != NERV_NORMAL) + return NULL; + int i, j; + size_t astride = a->stride, + bstride = b->stride, + cstride = max_idx->stride; + const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b), + *crow = MATRIX_ELEM_PTR(max_idx); + for (i = 0; i < b->nrow; i++) + { + MATRIX_ELEM max = arow[(int)(crow[0] = 0)]; + for (j = 1; j < b->ncol; j++) + if (arow[j] > max) + max = arow[(int)(crow[0] = j)]; + + MATRIX_ELEM sum = 0; + for (j = 0; j < b->ncol; j++) + sum += exp(arow[j] - max); + for (j = 0; j < b->ncol; j++) + brow[j] = exp(arow[j] - max) / sum; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + crow = MATRIX_NEXT_ROW_PTR(crow, cstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return max_idx; +} + +void nerv_matrix_(mul_elem)(Matrix *c, const Matrix *a, const Matrix *b, + Status *status) { + CHECK_SAME_DIMENSION(a, b, status); + CHECK_SAME_DIMENSION(a, c, status); + int i, j; + size_t astride = a->stride, + bstride = b->stride, + cstride = c->stride; + const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + MATRIX_ELEM *crow = MATRIX_ELEM_PTR(c); + for (i = 0; i < c->nrow; i++) + { + for (j = 0; j < c->ncol; j++) + crow[j] = arow[j] * brow[j]; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + crow = MATRIX_NEXT_ROW_PTR(crow, cstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(log_elem)(Matrix *b, const Matrix *a, Status *status) { + CHECK_SAME_DIMENSION(a, b, status); + int i, j; + size_t astride = a->stride, bstride = b->stride; + const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b); + MATRIX_ELEM limit = +#ifdef MATRIX_USE_FLOAT + FLT_MIN; +#elif defined (MATRIX_USE_DOUBLE) + DBL_MIN; +#elif defined (MATRIX_USE_INT) + 1; +#endif + for (i = 0; i < b->nrow; i++) + { + for (j = 0; j < b->ncol; j++) + brow[j] = log(arow[j] < limit ? limit : arow[j]); + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(expand_frm)(Matrix *a, const Matrix *b, + int context, Status *status) { + if (a->nrow != b->nrow) + NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); + if (a->ncol != b->ncol * (context * 2 + 1)) + NERV_EXIT_STATUS(status, MAT_GENERAL_ERR, + "the width should be 2 * context + 1"); + int i, j, k; + size_t astride = a->stride, bstride = b->stride; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + for (i = 0; i < a->nrow; i++) + { + MATRIX_ELEM *a_subrow = arow; + int start = i - context; + if (start < 0) start = 0; + const MATRIX_ELEM *brow = MATRIX_ROW_PTR(b, start); + for (j = i - context; j <= i + context; j++) + { + for (k = 0; k < b->ncol; k++) + a_subrow[k] = brow[k]; + if (0 <= j && j < b->nrow - 1) + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + a_subrow += b->ncol; + } + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(rearrange_frm)(Matrix *a, const Matrix *b, + int step, Status *status) { + CHECK_SAME_DIMENSION(a, b, status); + if (b->ncol % step) + NERV_EXIT_STATUS(status, MAT_GENERAL_ERR, + "the dimension of columns is not divisible by step"); + int i, j, k; + size_t astride = a->stride, bstride = b->stride; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + const MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b); + int gsize = b->ncol / step; /* group size of each step */ + for (i = 0; i < a->nrow; i++) + { + MATRIX_ELEM *a_subrow = arow; + for (k = 0; k < gsize; k++) + { + const MATRIX_ELEM *b_subrow = brow + k; + for (j = 0; j < step; + j++, a_subrow++, b_subrow += gsize) + *a_subrow = *b_subrow; + } + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(scale_rows_by_row)(Matrix *a, const Matrix *b, + Status *status) { + if (a->ncol != b->ncol) + NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); + if (b->nrow != 1) + NERV_EXIT_STATUS(status, MAT_ROW_VECTOR_EXP, 0); + int i, j; + size_t astride = a->stride; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a); + const MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + arow[j] *= brow[j]; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(scale_rows_by_col)(Matrix *a, const Matrix *b, + Status *status) { + if (a->nrow != b->nrow) + NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); + if (b->ncol != 1) + NERV_EXIT_STATUS(status, MAT_COL_VECTOR_EXP, 0); + int i, j; + size_t astride = a->stride, bstride = b->stride; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + arow[j] *= brow[0]; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} static void host_matrix_(free)(MATRIX_ELEM *ptr, Status *status) { free(ptr); @@ -29,7 +497,10 @@ Matrix *nerv_matrix_(load)(ChunkData *cdp, Status *status) { FILE *fp = cdp->fp; Matrix *self; if (fscanf(fp, "%ld %ld", &nrow, &ncol) != 2) - NERV_EXIT_STATUS(status, MAT_INVALID_FORMAT, 0); + { + NERV_SET_STATUS(status, MAT_INVALID_FORMAT, 0); + return 0; + } self = nerv_matrix_(create)(nrow, ncol, status); if (status->err_code != NERV_NORMAL) return NULL; @@ -40,7 +511,8 @@ Matrix *nerv_matrix_(load)(ChunkData *cdp, Status *status) { if (fscanf(fp, MATRIX_ELEM_FMT, row + j) != 1) { free(self); - NERV_EXIT_STATUS(status, MAT_INVALID_FORMAT, 0); + NERV_SET_STATUS(status, MAT_INVALID_FORMAT, 0); + return 0; } } NERV_SET_STATUS(status, NERV_NORMAL, 0); @@ -65,7 +537,8 @@ void nerv_matrix_(save)(Matrix *self, ChunkFile *cfp, Status *status) { NERV_SET_STATUS(status, NERV_NORMAL, 0); } -void nerv_matrix_(copy_from)(Matrix *a, const Matrix *b, + +void nerv_matrix_(copy_fromh)(Matrix *a, const Matrix *b, int a_begin, int b_begin, int b_end, Status *status) { if (!(0 <= b_begin && b_begin < b_end && b_end <= b->nrow && @@ -75,7 +548,29 @@ void nerv_matrix_(copy_from)(Matrix *a, const Matrix *b, NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); memmove(MATRIX_ROW_PTR(a, a_begin), MATRIX_ROW_PTR(b, b_begin), - sizeof(MATRIX_ELEM) * b->ncol * (b_end - b_begin)); + b->stride * (b_end - b_begin)); + NERV_SET_STATUS(status, NERV_NORMAL, 0); +} + +void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b, + const Matrix *idx, int b_begin, Status *status) { + if (!(0 <= b_begin && b_begin + a->nrow <= idx->ncol)) + NERV_EXIT_STATUS(status, MAT_INVALID_COPY_INTERVAL, 0); + if (idx->nrow != 1) + NERV_EXIT_STATUS(status, MAT_IDX_VECTOR_EXP, 0); + if (a->ncol != b->ncol) + NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0); + int i; + MATRIX_ELEM *idx_row = MATRIX_ELEM_PTR(idx) + b_begin; + for (i = 0; i < a->nrow; i++) + { + int src_idx = lrintf(idx_row[i]); + /* assume the index is valid to improve performance + if (!(0 <= src_idx && src_idx < b->nrow)) + NERV_EXIT_STATUS(status, MAT_INVALID_IDX, 0); + */ + memmove(MATRIX_ROW_PTR(a, i), MATRIX_ROW_PTR(b, src_idx), b->stride); + } NERV_SET_STATUS(status, NERV_NORMAL, 0); } |