From ea6f2990f99dd9ded6a0e74d75a3ec84900a2518 Mon Sep 17 00:00:00 2001 From: Determinant Date: Wed, 3 Jun 2015 23:00:45 +0800 Subject: demo now works (without random shuffle) --- matrix/generic/cukernel.cu | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'matrix/generic/cukernel.cu') diff --git a/matrix/generic/cukernel.cu b/matrix/generic/cukernel.cu index 1d8b983..05a1e78 100644 --- a/matrix/generic/cukernel.cu +++ b/matrix/generic/cukernel.cu @@ -187,6 +187,15 @@ __global__ void cudak_(scale_row)(const MATRIX_ELEM *a, MATRIX_ELEM *b, b[j + i * stride] *= a[j]; } +__global__ void cudak_(decompress)(const MATRIX_ELEM *a, MATRIX_ELEM *b, + int nrow, int ncol, + int stride_a, int stride_b) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + if (i >= nrow || j >= ncol) return; + b[lrintf(a[j + i * stride_a]) + i * stride_b] = 1.0; +} + extern "C" { #include "../cukernel.h" void cudak_(cuda_log_elem)(const Matrix *a, Matrix *b) { @@ -385,5 +394,15 @@ extern "C" { (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), b->nrow, b->ncol, b->stride / sizeof(MATRIX_ELEM)); } + + void cudak_(cuda_decompress)(const Matrix *a, Matrix *b) { + dim3 threadsPerBlock(1, CUDA_THREADS_NN); + dim3 numBlocks(1, CEIL_DIV(a->nrow, threadsPerBlock.y)); + cudak_(decompress)<<>> \ + (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), + a->nrow, a->ncol, + a->stride / sizeof(MATRIX_ELEM), + b->stride / sizeof(MATRIX_ELEM)); + } } #endif -- cgit v1.2.3