summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile3
-rw-r--r--matrix/cukernel.cu7
2 files changed, 6 insertions, 4 deletions
diff --git a/Makefile b/Makefile
index ef2adcc..9f6413e 100644
--- a/Makefile
+++ b/Makefile
@@ -11,6 +11,7 @@ CFLAGS := -Wall -Wextra
OBJ_DIR := build/objs
LUA_DIR := build/lua
NVCC := $(CUDA_BASE)/bin/nvcc
+NVCC_FLAGS := -Xcompiler -fPIC,-Wall,-Wextra
OBJS := $(addprefix $(OBJ_DIR)/,$(OBJS))
LIBS := $(addprefix $(OBJ_DIR)/,$(LIBS))
@@ -30,7 +31,7 @@ $(OBJ_DIR)/%.o: %.c
$(OBJ_DIR)/matrix/%.o: matrix/%.c
gcc -c -o $@ $< $(INCLUDE) -fPIC $(CFLAGS)
$(OBJ_DIR)/matrix/cukernel.o: matrix/cukernel.cu
- $(NVCC) -c -o $@ $< -Xcompiler -fPIC $(INCLUDE)
+ $(NVCC) -c -o $@ $< $(INCLUDE) $(NVCC_FLAGS)
$(LUA_DIR)/%.lua: %.lua
cp $< $@
$(OBJ_DIR)/luaT.o:
diff --git a/matrix/cukernel.cu b/matrix/cukernel.cu
index dd1ebfc..ee6d871 100644
--- a/matrix/cukernel.cu
+++ b/matrix/cukernel.cu
@@ -84,6 +84,7 @@ __global__ void block_reduce_max(const float *input, float *output,
}
extern "C" {
+#include "cukernel.h"
void cuda_sigmoid(const Matrix *a, Matrix *b) {
dim3 threadsPerBlock(CUDA_THREADS_N,
CUDA_THREADS_N);
@@ -106,7 +107,7 @@ extern "C" {
a->stride / sizeof(float), stride / sizeof(float),
ncol);
ncol = blocks_per_row;
- assert(ncol <= block.x);
+ assert((unsigned long)ncol <= block.x);
grid.x = 1;
block_reduce_sum<<<grid, block, block.x * sizeof(float)>>> \
(res, b->data.f,
@@ -143,7 +144,7 @@ extern "C" {
max->stride / sizeof(float),
ncol);
ncol = blocks_per_row;
- assert(ncol <= block.x);
+ assert((unsigned long)ncol <= block.x);
grid.x = 1;
block_reduce_sum<<<grid, block, block.x * sizeof(float)>>> \
(res, b->data.f,
@@ -165,7 +166,7 @@ extern "C" {
a->stride / sizeof(float), stride / sizeof(float),
ncol);
ncol = blocks_per_row;
- assert(ncol <= block.x);
+ assert((unsigned long)ncol <= block.x);
grid.x = 1;
block_reduce_max<<<grid, block, block.x * sizeof(float)>>> \
(res, b->data.f,