diff options
author | cloudygoose <[email protected]> | 2015-06-04 12:47:18 +0800 |
---|---|---|
committer | cloudygoose <[email protected]> | 2015-06-04 12:47:18 +0800 |
commit | 2301cba19914f35a8c34c3d27d98deb43ddaaf1d (patch) | |
tree | a2a58889ad90c684a00512037f4f3d3d566c3f60 /matrix/generic/cukernel.cu | |
parent | 88a2c29f347df2ef75b9891235bc176676e5dafd (diff) | |
parent | ea6f2990f99dd9ded6a0e74d75a3ec84900a2518 (diff) |
...
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'matrix/generic/cukernel.cu')
-rw-r--r-- | matrix/generic/cukernel.cu | 19 |
1 files changed, 19 insertions, 0 deletions
diff --git a/matrix/generic/cukernel.cu b/matrix/generic/cukernel.cu index 1d8b983..05a1e78 100644 --- a/matrix/generic/cukernel.cu +++ b/matrix/generic/cukernel.cu @@ -187,6 +187,15 @@ __global__ void cudak_(scale_row)(const MATRIX_ELEM *a, MATRIX_ELEM *b, b[j + i * stride] *= a[j]; } +__global__ void cudak_(decompress)(const MATRIX_ELEM *a, MATRIX_ELEM *b, + int nrow, int ncol, + int stride_a, int stride_b) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + if (i >= nrow || j >= ncol) return; + b[lrintf(a[j + i * stride_a]) + i * stride_b] = 1.0; +} + extern "C" { #include "../cukernel.h" void cudak_(cuda_log_elem)(const Matrix *a, Matrix *b) { @@ -385,5 +394,15 @@ extern "C" { (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), b->nrow, b->ncol, b->stride / sizeof(MATRIX_ELEM)); } + + void cudak_(cuda_decompress)(const Matrix *a, Matrix *b) { + dim3 threadsPerBlock(1, CUDA_THREADS_NN); + dim3 numBlocks(1, CEIL_DIV(a->nrow, threadsPerBlock.y)); + cudak_(decompress)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), + a->nrow, a->ncol, + a->stride / sizeof(MATRIX_ELEM), + b->stride / sizeof(MATRIX_ELEM)); + } } #endif |