aboutsummaryrefslogblamecommitdiff
path: root/nerv/lib/matrix/generic/mmatrix.c
blob: 3dabe0ee86a42f77682fc2414c363c44782768f4 (plain) (tree)
1
2
3
4
5
6
7
8
9






                                                                                    
                           
                         
                                
































































































































































































































































                                                                                                                          















                                                              


















































































































































































































                                                                              


                                                                  
                                            






                                                                          
                                            








                                                            



                                                       
                                                    
                                        







                                                          

                                                               

             
                                            

















                                                                       
                                            

 

                                                         








                                                                






















                                                                             
                                            


      
#ifdef NERV_GENERIC_MMATRIX
#include "matrix.h"
#include "elem_type.h"
#define MATRIX_DATA_FREE(ptr, status) host_matrix_(free)(ptr, status)
#define MATRIX_DATA_ALLOC(dptr, stride, width, height, status) \
                            host_matrix_(alloc)(dptr, stride, width, height, status)
#define NERV_GENERIC_MATRIX
#include "../cuda_helper.h"
#include "../../common.h"
#include "../../io/chunk_file.h"
#include <string.h>
#include <cblas.h>
#include <float.h>

Matrix *nerv_matrix_(colsum)(Matrix *a, Status *status) {
    Matrix *b = nerv_matrix_(create)(1, a->ncol, status);
    if (status->err_code != NERV_NORMAL)
        return NULL;
    MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a),
                *brow = MATRIX_ELEM_PTR(b);
    int i, j;
    size_t astride = a->stride;
    memset(brow, 0, sizeof(MATRIX_ELEM) * b->ncol);
    for (i = 0; i < a->nrow; i++)
    {
        for (j = 0; j < a->ncol; j++)
            brow[j] += arow[j];
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
    return b;
}

Matrix *nerv_matrix_(colsame)(Matrix *a, const Matrix *ref, Status *status) {
    Matrix *b = nerv_matrix_(create)(1, a->ncol, status);
    if (status->err_code != NERV_NORMAL)
        return NULL;
    CHECK_SAME_DIMENSION_RET(a, ref, status);
    int i, j;
    size_t astride = a->stride, cstride = ref->stride;
    MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a),
                *brow = MATRIX_ELEM_PTR(b);
    const MATRIX_ELEM *crow = MATRIX_ELEM_PTR(ref);
    memset(brow, 0, sizeof(MATRIX_ELEM) * b->ncol);
    for (i = 0; i < a->nrow; i++)
    {
        for (j = 0; j < a->ncol; j++)
        {
            brow[j] += (int)(arow[j] == crow[j]);
        }
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
        crow = MATRIX_NEXT_ROW_PTR(crow, cstride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
    return b;
}

Matrix *nerv_matrix_(rowsum)(Matrix *a, Status *status) {
    Matrix *b = nerv_matrix_(create)(a->nrow, 1, status);
    if (status->err_code != NERV_NORMAL)
        return NULL;
    MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a),
                *brow = MATRIX_ELEM_PTR(b);
    int i, j;
    size_t astride = a->stride, bstride = b->stride;
    memset(brow, 0, b->stride * b->nrow);
    for (i = 0; i < a->nrow; i++)
    {
        for (j = 0; j < a->ncol; j++)
            brow[0] += arow[j];
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
        brow = MATRIX_NEXT_ROW_PTR(brow, bstride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
    return b;
}

Matrix *nerv_matrix_(rowmax)(Matrix *a, Status *status) {
    Matrix *b = nerv_matrix_(create)(a->nrow, 1, status);
    if (status->err_code != NERV_NORMAL)
        return NULL;
    MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a),
                *brow = MATRIX_ELEM_PTR(b);
    int i, j;
    size_t astride = a->stride, bstride = b->stride;
    for (i = 0; i < a->nrow; i++)
    {
        brow[0] = arow[0];
        for (j = 1; j < a->ncol; j++)
            if (arow[j] > brow[0])
                brow[0] = arow[j];
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
        brow = MATRIX_NEXT_ROW_PTR(brow, bstride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
    return b;
}

void nerv_matrix_(rowmax_idx)(Matrix *a, Matrix **b, Matrix **idx, Status *status) {
    *b = nerv_matrix_(create)(a->nrow, 1, status);
    if (status->err_code != NERV_NORMAL)
        return;
    *idx = nerv_matrix_(create)(a->nrow, 1, status);
    if (status->err_code != NERV_NORMAL)
    {
        /* FIXME: destroy may also fail! */
        nerv_matrix_(destroy)(*b, status);
        return;
    }
    MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a),
                *brow = MATRIX_ELEM_PTR(*b),
                *crow = MATRIX_ELEM_PTR(*idx);
    int i, j;
    size_t astride = a->stride,
            bstride = (*b)->stride,
            cstride = (*idx)->stride;
    for (i = 0; i < a->nrow; i++)
    {
        brow[0] = arow[(int)(crow[0] = 0)];
        for (j = 1; j < a->ncol; j++)
            if (arow[j] > brow[0])
                brow[0] = arow[(int)(crow[0] = j)];
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
        brow = MATRIX_NEXT_ROW_PTR(brow, bstride);
        crow = MATRIX_NEXT_ROW_PTR(crow, cstride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

Matrix *nerv_matrix_(trans)(Matrix *a, Status *status) {
    Matrix *b = nerv_matrix_(create)(a->ncol, a->nrow, status);
    if (status->err_code != NERV_NORMAL)
        return NULL;
    MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a);
    int i, j;
    size_t astride = a->stride, bstride = b->stride;
    for (i = 0; i < a->nrow; i++)
    {
        MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b) + i;
        for (j = 0; j < a->ncol; j++)
        {
            brow[0] = arow[j];
            brow = MATRIX_NEXT_ROW_PTR(brow, bstride);
        }
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
    return b;
}

Matrix *nerv_matrix_(decompress)(const Matrix *a, int orig_col, Status *status) {
    Matrix *b;
    if (a->ncol != 1)
    {
        NERV_SET_STATUS(status, MAT_COL_VECTOR_EXP, 0);
        return NULL;
    }
    b = nerv_matrix_(create)(a->nrow, orig_col, status);
    if (status->err_code != NERV_NORMAL)
        return NULL;
    int i;
    size_t astride = a->stride, bstride = b->stride;
    const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a);
    MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b);
    memset(brow, 0, b->stride * b->nrow);
    for (i = 0; i < a->nrow; i++)
    {
        brow[lrintf(arow[0])] = 1;
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
        brow = MATRIX_NEXT_ROW_PTR(brow, bstride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
    return b;
}

void nerv_matrix_(add)(Matrix *c, const Matrix *a, const Matrix *b, MATRIX_ELEM alpha, MATRIX_ELEM beta, Status *status) {
    CHECK_SAME_DIMENSION(a, b, status);
    CHECK_SAME_DIMENSION(a, c, status);
    int i, j;
    size_t astride = a->stride,
            bstride = b->stride,
            cstride = c->stride;
    const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a),
                        *brow = MATRIX_ELEM_PTR(b);
    MATRIX_ELEM *crow = MATRIX_ELEM_PTR(c);
    for (i = 0; i < a->nrow; i++)
    {
        for (j = 0; j < a->ncol; j++)
            crow[j] = arow[j] * alpha + brow[j] * beta;
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
        brow = MATRIX_NEXT_ROW_PTR(brow, bstride);
        crow = MATRIX_NEXT_ROW_PTR(crow, cstride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}


void nerv_matrix_(mul)(Matrix *c, const Matrix *a, const Matrix *b,
                        MATRIX_ELEM alpha, MATRIX_ELEM beta,
                        int ta, int tb, Status *status) {
#define SWAP(a, b) \
    do { int t = (a); (a) = (b); (b) = t; } while (0)

    int am = a->nrow, an = a->ncol;
    int bm = b->nrow, bn = b->ncol;
    if (ta == CblasTrans) SWAP(am, an);
    if (tb == CblasTrans) SWAP(bm, bn);
    if (an != bm || (am != c->nrow && bn != c->ncol))
        NERV_EXIT_STATUS(status, MAT_WRONG_MULT_DIM, 0);
    /* Because matrix in Nerv is row-major, here b comes first */
    NERV_CBLAS_(gemm)(CblasRowMajor, ta, tb, am, bn, an,
         alpha,
         MATRIX_ELEM_PTR(a), a->stride / sizeof(MATRIX_ELEM),
         MATRIX_ELEM_PTR(b), b->stride / sizeof(MATRIX_ELEM),
         beta,
         MATRIX_ELEM_PTR(c), c->stride / sizeof(MATRIX_ELEM));
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

void nerv_matrix_(add_row)(Matrix *b, const Matrix *a, double beta,
                            Status *status) {
    if (a->ncol != b->ncol)
        NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0);
    if (a->nrow != 1)
        NERV_EXIT_STATUS(status, MAT_ROW_VECTOR_EXP, 0);
    const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a);
    MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b);
    int i, j;
    size_t bstride = b->stride;
    for (i = 0; i < b->nrow; i++)
    {
        for (j = 0; j < b->ncol; j++)
            brow[j] += arow[j] * beta;
        brow = MATRIX_NEXT_ROW_PTR(brow, bstride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

void nerv_matrix_(clip)(Matrix *self, double val_1, double val_2, Status *status) {
    int i, j;
    size_t astride = self->stride;
    MATRIX_ELEM *arow = MATRIX_ELEM_PTR(self);
    for (i = 0; i < self->nrow; i++)
    {
        for (j = 0; j < self->ncol; j++)
            if (arow[j] > val_2)
                arow[j] = val_2;
            else if (arow[j] < val_1)
                arow[j] = val_1;
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

void nerv_matrix_(fill)(Matrix *self, double val, Status *status) {
    int i, j;
    size_t astride = self->stride;
    MATRIX_ELEM *arow = MATRIX_ELEM_PTR(self);
    for (i = 0; i < self->nrow; i++)
    {
        for (j = 0; j < self->ncol; j++)
            arow[j] = val;
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

void nerv_matrix_(diagonalize)(Matrix *self, Status *status) {
    if (self->nrow != self->ncol)
        NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0);
    int i, j;
    size_t astride = self->stride;
    MATRIX_ELEM *arow = MATRIX_ELEM_PTR(self);
    for (i = 0; i < self->nrow; i++)
    {
        for (j = 0; j < self->ncol; j++)
            if (i != j)
                arow[j] = 0;
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

void nerv_matrix_(sigmoid)(Matrix *a, const Matrix *b, Status *status) {
    CHECK_SAME_DIMENSION(a, b, status);
    int i, j;
    size_t astride = a->stride, bstride = b->stride;
    MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a);
    const MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b);
    for (i = 0; i < a->nrow; i++)
    {
        for (j = 0; j < a->ncol; j++)
            arow[j] = 1.0 / (1.0 + exp(-brow[j]));
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
        brow = MATRIX_NEXT_ROW_PTR(brow, bstride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

void nerv_matrix_(sigmoid_grad)(Matrix *nerr, const Matrix *err,
                                const Matrix *output, Status *status) {
    CHECK_SAME_DIMENSION(nerr, err, status);
    CHECK_SAME_DIMENSION(nerr, output, status);
    int i, j;
    size_t nerr_stride = nerr->stride,
            err_stride = err->stride,
            out_stride = output->stride;
    MATRIX_ELEM *nerr_row = MATRIX_ELEM_PTR(nerr);
    const MATRIX_ELEM *err_row = MATRIX_ELEM_PTR(err),
                        *out_row = MATRIX_ELEM_PTR(output);
    for (i = 0; i < nerr->nrow; i++)
    {
        for (j = 0; j < nerr->ncol; j++)
            nerr_row[j] = out_row[j] * (1.0 - out_row[j]) * err_row[j];
        nerr_row = MATRIX_NEXT_ROW_PTR(nerr_row, nerr_stride);
        err_row = MATRIX_NEXT_ROW_PTR(err_row, err_stride);
        out_row = MATRIX_NEXT_ROW_PTR(out_row, out_stride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

Matrix *nerv_matrix_(softmax)(Matrix *b, const Matrix *a, Status *status) {
    Matrix *max_idx;
    CHECK_SAME_DIMENSION_RET(a, b, status);
    max_idx = nerv_matrix_(create)(a->nrow, 1, status);
    if (status->err_code != NERV_NORMAL)
        return NULL;
    int i, j;
    size_t astride = a->stride,
            bstride = b->stride,
            cstride = max_idx->stride;
    const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a);
    MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b),
                        *crow = MATRIX_ELEM_PTR(max_idx);
    for (i = 0; i < b->nrow; i++)
    {
        MATRIX_ELEM max = arow[(int)(crow[0] = 0)];
        for (j = 1; j < b->ncol; j++)
            if (arow[j] > max)
                max = arow[(int)(crow[0] = j)];

        MATRIX_ELEM sum = 0;
        for (j = 0; j < b->ncol; j++)
            sum += exp(arow[j] - max);
        for (j = 0; j < b->ncol; j++)
            brow[j] = exp(arow[j] - max) / sum;
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
        brow = MATRIX_NEXT_ROW_PTR(brow, bstride);
        crow = MATRIX_NEXT_ROW_PTR(crow, cstride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
    return max_idx;
}

void nerv_matrix_(mul_elem)(Matrix *c, const Matrix *a, const Matrix *b,
                            Status *status) {
    CHECK_SAME_DIMENSION(a, b, status);
    CHECK_SAME_DIMENSION(a, c, status);
    int i, j;
    size_t astride = a->stride,
            bstride = b->stride,
            cstride = c->stride;
    const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a),
                        *brow = MATRIX_ELEM_PTR(b);
    MATRIX_ELEM *crow = MATRIX_ELEM_PTR(c);
    for (i = 0; i < c->nrow; i++)
    {
        for (j = 0; j < c->ncol; j++)
            crow[j] = arow[j] * brow[j];
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
        brow = MATRIX_NEXT_ROW_PTR(brow, bstride);
        crow = MATRIX_NEXT_ROW_PTR(crow, cstride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

void nerv_matrix_(log_elem)(Matrix *b, const Matrix *a, Status *status) {
    CHECK_SAME_DIMENSION(a, b, status);
    int i, j;
    size_t astride = a->stride, bstride = b->stride;
    const MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a);
    MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b);
    MATRIX_ELEM limit =
#ifdef MATRIX_USE_FLOAT
    FLT_MIN;
#elif defined (MATRIX_USE_DOUBLE)
    DBL_MIN;
#elif defined (MATRIX_USE_INT)
    1;
#endif
    for (i = 0; i < b->nrow; i++)
    {
        for (j = 0; j < b->ncol; j++)
            brow[j] = log(arow[j] < limit ? limit : arow[j]);
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
        brow = MATRIX_NEXT_ROW_PTR(brow, bstride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

void nerv_matrix_(expand_frm)(Matrix *a, const Matrix *b,
                            int context, Status *status) {
    if (a->nrow != b->nrow)
        NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0);
    if (a->ncol != b->ncol * (context * 2 + 1))
        NERV_EXIT_STATUS(status, MAT_GENERAL_ERR,
                        "the width should be 2 * context + 1");
    int i, j, k;
    size_t astride = a->stride, bstride = b->stride;
    MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a);
    for (i = 0; i < a->nrow; i++)
    {
        MATRIX_ELEM *a_subrow = arow;
        int start = i - context;
        if (start < 0) start = 0;
        const MATRIX_ELEM *brow = MATRIX_ROW_PTR(b, start);
        for (j = i - context; j <= i + context; j++)
        {
            for (k = 0; k < b->ncol; k++)
                a_subrow[k] = brow[k];
            if (0 <= j && j < b->nrow - 1)
                brow = MATRIX_NEXT_ROW_PTR(brow, bstride);
            a_subrow += b->ncol;
        }
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

void nerv_matrix_(rearrange_frm)(Matrix *a, const Matrix *b,
                                int step, Status *status) {
    CHECK_SAME_DIMENSION(a, b, status);
    if (b->ncol % step)
        NERV_EXIT_STATUS(status, MAT_GENERAL_ERR,
                         "the dimension of columns is not divisible by step");
    int i, j, k;
    size_t astride = a->stride, bstride = b->stride;
    MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a);
    const MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b);
    int gsize = b->ncol / step; /* group size of each step */
    for (i = 0; i < a->nrow; i++)
    {
        MATRIX_ELEM *a_subrow = arow;
        for (k = 0; k < gsize; k++)
        {
            const MATRIX_ELEM *b_subrow = brow + k;
            for (j = 0; j < step;
                 j++, a_subrow++, b_subrow += gsize)
                *a_subrow = *b_subrow;
        }
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
        brow = MATRIX_NEXT_ROW_PTR(brow, bstride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

void nerv_matrix_(scale_rows_by_row)(Matrix *a, const Matrix *b,
                                    Status *status) {
    if (a->ncol != b->ncol)
        NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0);
    if (b->nrow != 1)
        NERV_EXIT_STATUS(status, MAT_ROW_VECTOR_EXP, 0);
    int i, j;
    size_t astride = a->stride;
    MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a);
    const MATRIX_ELEM *brow = MATRIX_ELEM_PTR(b);
    for (i = 0; i < a->nrow; i++)
    {
        for (j = 0; j < a->ncol; j++)
            arow[j] *= brow[j];
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

void nerv_matrix_(scale_rows_by_col)(Matrix *a, const Matrix *b,
                                    Status *status) {
    if (a->nrow != b->nrow)
        NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0);
    if (b->ncol != 1)
        NERV_EXIT_STATUS(status, MAT_COL_VECTOR_EXP, 0);
    int i, j;
    size_t astride = a->stride, bstride = b->stride;
    MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a),
                *brow = MATRIX_ELEM_PTR(b);
    for (i = 0; i < a->nrow; i++)
    {
        for (j = 0; j < a->ncol; j++)
            arow[j] *= brow[0];
        arow = MATRIX_NEXT_ROW_PTR(arow, astride);
        brow = MATRIX_NEXT_ROW_PTR(brow, bstride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

static void host_matrix_(free)(MATRIX_ELEM *ptr, Status *status) {
    free(ptr);
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

static void host_matrix_(alloc)(MATRIX_ELEM **dptr, size_t *stride,
                                long width, long height, Status *status) {
    if ((*dptr = (MATRIX_ELEM *)malloc(width * height)) == NULL)
        NERV_EXIT_STATUS(status, MAT_INSUF_MEM, 0);
    *stride = width;
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

#include "matrix.c"
Matrix *nerv_matrix_(load)(ChunkData *cdp, Status *status) {
    int i, j;
    long nrow, ncol;
    FILE *fp = cdp->fp;
    Matrix *self;
    if (fscanf(fp, "%ld %ld", &nrow, &ncol) != 2)
    {
        NERV_SET_STATUS(status, MAT_INVALID_FORMAT, 0);
        return 0;
    }
    self = nerv_matrix_(create)(nrow, ncol, status);
    if (status->err_code != NERV_NORMAL)
        return NULL;
    for (i = 0; i < nrow; i++)
    {
        MATRIX_ELEM *row = MATRIX_ROW_PTR(self, i);
        for (j = 0; j < ncol; j++)
            if (fscanf(fp, MATRIX_ELEM_FMT, row + j) != 1)
            {
                free(self);
                NERV_SET_STATUS(status, MAT_INVALID_FORMAT, 0);
                return 0;
            }
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
    return self;
}

void nerv_matrix_(save)(Matrix *self, ChunkFile *cfp, Status *status) {
    int i, j;
    long nrow = self->nrow, ncol = self->ncol;
    FILE *fp = cfp->fp;
    if (fprintf(fp, "%ld %ld\n", nrow, ncol) < 0)
        NERV_EXIT_STATUS(status, MAT_WRITE_ERROR, 0);
    for (i = 0; i < nrow; i++)
    {
        MATRIX_ELEM *row = MATRIX_ROW_PTR(self, i);
        for (j = 0; j < ncol; j++)
            if (fprintf(fp, MATRIX_ELEM_WRITE_FMT " ", row[j]) < 0)
                NERV_EXIT_STATUS(status, MAT_WRITE_ERROR, 0);
        if (fprintf(fp, "\n") < 0)
            NERV_EXIT_STATUS(status, MAT_WRITE_ERROR, 0);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}


void nerv_matrix_(copy_fromh)(Matrix *a, const Matrix *b,
                            int a_begin, int b_begin, int b_end,
                            Status *status) {
    if (!(0 <= b_begin && b_begin < b_end && b_end <= b->nrow &&
            a_begin + b_end - b_begin <= a->nrow))
        NERV_EXIT_STATUS(status, MAT_INVALID_COPY_INTERVAL, 0);
    if (a->ncol != b->ncol)
        NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0);
    memmove(MATRIX_ROW_PTR(a, a_begin),
            MATRIX_ROW_PTR(b, b_begin),
            b->stride * (b_end - b_begin));
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

void nerv_matrix_(copy_rows_fromh_by_idx)(Matrix *a, const Matrix *b,
                        const Matrix *idx, int b_begin, Status *status) {
    if (!(0 <= b_begin && b_begin + a->nrow <= idx->ncol))
        NERV_EXIT_STATUS(status, MAT_INVALID_COPY_INTERVAL, 0);
    if (idx->nrow != 1)
        NERV_EXIT_STATUS(status, MAT_IDX_VECTOR_EXP, 0);
    if (a->ncol != b->ncol)
        NERV_EXIT_STATUS(status, MAT_MISMATCH_DIM, 0);
    int i;
    MATRIX_ELEM *idx_row = MATRIX_ELEM_PTR(idx) + b_begin;
    for (i = 0; i < a->nrow; i++)
    {
        int src_idx = lrintf(idx_row[i]);
        /* assume the index is valid to improve performance
        if (!(0 <= src_idx && src_idx < b->nrow))
            NERV_EXIT_STATUS(status, MAT_INVALID_IDX, 0);
        */
        memmove(MATRIX_ROW_PTR(a, i), MATRIX_ROW_PTR(b, src_idx), b->stride);
    }
    NERV_SET_STATUS(status, NERV_NORMAL, 0);
}

#endif