diff options
-rw-r--r-- | Makefile | 33 | ||||
-rw-r--r-- | examples/cumatrix_example.lua | 6 | ||||
-rw-r--r-- | examples/cumatrix_from_mmatrix.lua | 8 | ||||
-rw-r--r-- | matrix/generic/cumatrix.c | 15 |
4 files changed, 32 insertions, 30 deletions
@@ -4,7 +4,9 @@ OBJS := nerv.o luaT.o common.o \ io/init.o io/param.o \ examples/oop_example.o LIBS := libnerv.so -LUA_LIBS := matrix/init.lua io/init.lua nerv.lua pl/utils.lua pl/compat.lua layer/init.lua layer/affine.lua +LUA_LIBS := matrix/init.lua io/init.lua nerv.lua \ + pl/utils.lua pl/compat.lua \ + layer/init.lua layer/affine.lua layer/sigmoid.lua INCLUDE := -I build/luajit-2.0/include/luajit-2.0/ -DLUA_USE_APICHECK CUDA_BASE := /usr/local/cuda-6.5 CUDA_INCLUDE := -I $(CUDA_BASE)/include/ @@ -13,30 +15,22 @@ LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcubl CFLAGS := -Wall -Wextra OBJ_DIR := build/objs LUA_DIR := build/lua +SUBDIR := matrix io layer examples pl NVCC := $(CUDA_BASE)/bin/nvcc NVCC_FLAGS := -Xcompiler -fPIC,-Wall,-Wextra OBJS := $(addprefix $(OBJ_DIR)/,$(OBJS)) +OBJ_SUBDIR := $(addprefix $(OBJ_DIR)/,$(SUBDIR)) +LUA_SUBDIR := $(addprefix $(LUA_DIR)/,$(SUBDIR)) LIBS := $(addprefix $(OBJ_DIR)/,$(LIBS)) LUA_LIBS := $(addprefix $(LUA_DIR)/,$(LUA_LIBS)) -all: luajit $(OBJ_DIR) $(LIBS) $(LUA_DIR) $(LUA_LIBS) +all: luajit $(OBJ_DIR) $(OBJ_SUBDIR) $(LIBS) $(LUA_DIR) $(LUA_SUBDIR) $(LUA_LIBS) luajit: ./build_luajit.sh -$(OBJ_DIR): - -mkdir -p $(OBJ_DIR) - -mkdir -p $(OBJ_DIR)/matrix - -mkdir -p $(LUA_DIR)/matrix - -mkdir -p $(OBJ_DIR)/io - -mkdir -p $(LUA_DIR)/io - -mkdir -p $(LUA_DIR)/pl - -mkdir -p $(LUA_DIR)/layer - -mkdir -p $(OBJ_DIR)/examples -$(LUA_DIR): - -mkdir -p $(LUA_DIR) -$(OBJ_DIR)/%.o: %.c - gcc -c -o $@ $< $(INCLUDE) -fPIC $(CFLAGS) -$(OBJ_DIR)/matrix/%.o: matrix/%.c +$(OBJ_DIR) $(LUA_DIR) $(OBJ_SUBDIR) $(LUA_SUBDIR): + -mkdir -p $@ +$(OBJ_DIR)/%.o: %.c $(patsubst /%.o,/%.c,$@) gcc -c -o $@ $< $(INCLUDE) -fPIC $(CFLAGS) $(OBJ_DIR)/matrix/cukernel.o: matrix/cukernel.cu $(NVCC) -c -o $@ $< $(INCLUDE) $(NVCC_FLAGS) @@ -46,9 +40,10 @@ $(OBJ_DIR)/luaT.o: gcc -c -o $@ luaT/luaT.c $(INCLUDE) -fPIC $(LIBS): $(OBJS) gcc -shared -o $@ $(OBJS) $(LDFLAGS) -matrix/cumatrix.c: matrix/generic/cumatrix.c -matrix/mmatrix.c: matrix/generic/mmatrix.c -matrix/generic/mmatrix.c matrix/generic/cumatrix.c: matrix/generic/matrix.c + +$(OBJ_DIR)/matrix/cumatrix.o: matrix/generic/cumatrix.c matrix/generic/matrix.c +$(OBJ_DIR)/matrix/mmatrix.o: matrix/generic/mmatrix.c matrix/generic/matrix.c + clean: -rm -rf $(OBJ_DIR) -rm -rf $(LUA_DIR) diff --git a/examples/cumatrix_example.lua b/examples/cumatrix_example.lua index f8235eb..084dcca 100644 --- a/examples/cumatrix_example.lua +++ b/examples/cumatrix_example.lua @@ -11,10 +11,12 @@ for i = 0, m - 1 do end end print(fm) -fs = fm:softmax() +fs = fm:create() +fs:softmax(fm) -- print(fs) print(dm) -ds = dm:softmax() +ds = dm:create() +ds:softmax(dm) -- print(ds) print(fs) print(fs + fs) diff --git a/examples/cumatrix_from_mmatrix.lua b/examples/cumatrix_from_mmatrix.lua index 1aac1c4..2309e14 100644 --- a/examples/cumatrix_from_mmatrix.lua +++ b/examples/cumatrix_from_mmatrix.lua @@ -24,5 +24,9 @@ dc[1]:copy_tod(dc[0]) print("dc[1] copied to dc[0]") print(dc) print("softmax of fc and dc") -print(fc:softmax()) -print(dc:softmax()) +sfc = fc:create() +sdc = dc:create() +sfc:softmax(fc) +print(sfc) +sdc:softmax(dc) +print(sdc) diff --git a/matrix/generic/cumatrix.c b/matrix/generic/cumatrix.c index 49e2620..2deb7a3 100644 --- a/matrix/generic/cumatrix.c +++ b/matrix/generic/cumatrix.c @@ -27,6 +27,7 @@ static cublasHandle_t cublas_handle; Matrix *nerv_matrix_(new_)(long nrow, long ncol); +void nerv_matrix_(data_free)(Matrix *self); static void nerv_matrix_(add_)(const Matrix *a, const Matrix *b, const Matrix *c, @@ -98,9 +99,8 @@ static int nerv_matrix_(sigmoid)(lua_State *L) { Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname)); CHECK_SAME_DIMENSION(a, b); - cudak_(cuda_sigmoid)(a, b); - luaT_pushudata(L, b, nerv_matrix_(tname)); - return 1; + cudak_(cuda_sigmoid)(b, a); + return 0; } static int nerv_matrix_(sigmoid_grad)(lua_State *L) { @@ -114,15 +114,16 @@ static int nerv_matrix_(sigmoid_grad)(lua_State *L) { } static int nerv_matrix_(softmax)(lua_State *L) { - Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 1, nerv_matrix_(tname)); Matrix *max = nerv_matrix_(new_)(a->nrow, 1); Matrix *dno = nerv_matrix_(new_)(a->nrow, 1); - Matrix *b = nerv_matrix_(new_)(a->nrow, a->ncol); cudak_(cuda_rowmax)(a, max); cudak_(cuda_softmax_denominator)(a, max, dno); cudak_(cuda_softmax_final)(a, max, dno, b); - luaT_pushudata(L, b, nerv_matrix_(tname)); - return 1; + nerv_matrix_(data_free)(max); + nerv_matrix_(data_free)(dno); + return 0; } static int nerv_matrix_(rowsum)(lua_State *L) { |