diff options
author | Determinant <[email protected]> | 2015-06-22 19:01:29 +0800 |
---|---|---|
committer | Determinant <[email protected]> | 2015-06-22 19:01:29 +0800 |
commit | 2497fd9e7a0fae5ee4887890d7a312e0e08a93b8 (patch) | |
tree | 382f97575bd2df9ee6abb1662b11b279fc22d72b /nerv | |
parent | 196e9b48a3541caccdffc5743001cced70667091 (diff) |
major change: use luarocks to manage project
Diffstat (limited to 'nerv')
59 files changed, 6468 insertions, 2 deletions
@@ -1,2 +0,0 @@ -#!/bin/bash -exec 'build/luajit-2.0/bin/luajit' -e "package.cpath=\"${PWD}/build/lib/?.so\"" -e "package.path=\"${PWD}/build/lua/?/init.lua;${PWD}/build/lua/?.lua;${PWD}/?.lua\"" -e "require 'nerv'" "$@" diff --git a/nerv/.gitignore b/nerv/.gitignore new file mode 100644 index 0000000..567609b --- /dev/null +++ b/nerv/.gitignore @@ -0,0 +1 @@ +build/ diff --git a/nerv/Makefile b/nerv/Makefile new file mode 100644 index 0000000..b69a63e --- /dev/null +++ b/nerv/Makefile @@ -0,0 +1,60 @@ +.PHONY: build install clean +SHELL := /bin/bash +BUILD_DIR := $(CURDIR)/build +OBJS := nerv.o luaT.o common.o \ + matrix/mmatrix.o matrix/cumatrix.o matrix/init.o matrix/cukernel.o \ + io/init.o io/chunk_file.o \ + examples/oop_example.o +LIBS := libnerv.so +LUA_LIBS := matrix/init.lua io/init.lua init.lua \ + layer/init.lua layer/affine.lua layer/sigmoid.lua layer/softmax_ce.lua \ + layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua \ + nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \ + io/sgd_buffer.lua +INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK +CUDA_BASE := /usr/local/cuda-6.5 +#CUDA_BASE := /usr/local/cuda-5.0 +CUDA_INCLUDE := -I $(CUDA_BASE)/include/ +INCLUDE += $(CUDA_INCLUDE) +LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcublas +CFLAGS := -Wall -Wextra -O2 +OBJ_DIR := $(BUILD_DIR)/objs +SUBDIR := matrix io layer examples nn +NVCC := $(CUDA_BASE)/bin/nvcc +NVCC_FLAGS := -Xcompiler -fPIC,-Wall,-Wextra + +LUA_DIR = $(INST_LUADIR)/nerv +OBJS := $(addprefix $(OBJ_DIR)/,$(OBJS)) +OBJ_SUBDIR := $(addprefix $(OBJ_DIR)/,$(SUBDIR)) +LUA_SUBDIR := $(addprefix $(LUA_DIR)/,$(SUBDIR)) +LIBS := $(addprefix $(INST_LIBDIR)/,$(LIBS)) +LUA_LIBS := $(addprefix $(LUA_DIR)/,$(LUA_LIBS)) + +build: $(OBJ_DIR) $(OBJ_SUBDIR) $(OBJS) +$(OBJ_DIR) $(LUA_DIR) $(OBJ_SUBDIR) $(LUA_SUBDIR): + -mkdir -p $@ +$(OBJ_DIR)/%.o: %.c $(patsubst /%.o,/%.c,$@) + gcc -c -o $@ $< $(INCLUDE) -fPIC $(CFLAGS) +$(OBJ_DIR)/matrix/cukernel.o: matrix/cukernel.cu + $(NVCC) -c -o $@ $< $(INCLUDE) $(NVCC_FLAGS) +$(LUA_DIR)/%.lua: %.lua + cp $< $@ +$(OBJ_DIR)/luaT.o: + gcc -c -o $@ luaT/luaT.c $(INCLUDE) -fPIC +$(LIBS): $(OBJS) + gcc -shared -o $@ $(OBJS) $(LDFLAGS) + +$(OBJ_DIR)/matrix/cumatrix.o: matrix/generic/cumatrix.c matrix/generic/matrix.c matrix/generic/cukernel.cu +$(OBJ_DIR)/matrix/mmatrix.o: matrix/generic/mmatrix.c matrix/generic/matrix.c +$(OBJ_DIR)/matrix/cukernel.o: matrix/generic/cukernel.cu + +.PHONY: speech + +speech: + -mkdir -p build/objs/speech/tnet_io + $(MAKE) -C speech/ BUILD_DIR=$(BUILD_DIR) LIB_DIR=$(LIB_DIR) OBJ_DIR=$(CURDIR)/build/objs/speech/ LUA_DIR=$(LUA_DIR) + +clean: + -rm -rf $(OBJ_DIR) + +install: $(LIBS) $(LUA_DIR) $(LUA_SUBDIR) $(LUA_LIBS) diff --git a/nerv/common.c b/nerv/common.c new file mode 100644 index 0000000..b4e39e6 --- /dev/null +++ b/nerv/common.c @@ -0,0 +1,76 @@ +#include "common.h" +#include <stdarg.h> +int nerv_error(lua_State *L, const char *err_mesg_fmt, ...) { + va_list ap; + va_start(ap, err_mesg_fmt); + lua_pushstring(L, "[nerv] internal error: "); + lua_pushvfstring(L, err_mesg_fmt, ap); + lua_concat(L, 2); + lua_error(L); + va_end(ap); + return 0; +} + +int nerv_error_method_not_implemented(lua_State *L) { + return nerv_error(L, "method not implemented"); +} + +void luaN_append_methods(lua_State *L, const luaL_Reg *mlist) { + for (; mlist->func; mlist++) + { + lua_pushcfunction(L, mlist->func); + lua_setfield(L, -2, mlist->name); + } +} + +HashMap *hashmap_create(size_t size, HashKey_t hfunc, HashMapCmp_t cmp) { + HashMap *res = (HashMap *)malloc(sizeof(HashMap)); + res->bucket = calloc(size, sizeof(HashNode)); + res->cmp = cmp; + res->hfunc = hfunc; + res->size = size; + return res; +} + +void *hashmap_getval(HashMap *h, const char *key) { + size_t idx = h->hfunc(key) % h->size; + HashNode *ptr; + for (ptr = h->bucket[idx]; ptr; ptr = ptr->next) + { + if (!h->cmp(ptr->key, key)) + return ptr->val; + } + return NULL; +} + +void hashmap_setval(HashMap *h, const char *key, void *val) { + size_t idx = h->hfunc(key) % h->size; + HashNode *ptr = malloc(sizeof(HashNode)); + ptr->next = h->bucket[idx]; + h->bucket[idx] = ptr; + ptr->key = key; + ptr->val = val; +} + +void hashmap_clear(HashMap *h) { + size_t i; + for (i = 0; i < h->size; i++) + { + HashNode *ptr, *nptr; + for (ptr = h->bucket[i]; ptr; ptr = nptr) + { + nptr = ptr->next; + free(ptr->val); + free(ptr); + } + h->bucket[i] = NULL; + } +} + +size_t bkdr_hash(const char *key) { + unsigned int seed = 131; + unsigned int res = 0; + while (*key) + res = res * seed + *key++; + return res; +} diff --git a/nerv/common.h b/nerv/common.h new file mode 100644 index 0000000..e21c7a5 --- /dev/null +++ b/nerv/common.h @@ -0,0 +1,36 @@ +#ifndef NERV_COMMON_H +#define NERV_COMMON_H +#include "lua.h" +#include "lauxlib.h" +#include "lualib.h" +#include "luaT/luaT.h" +#include <stdio.h> +#include <stdlib.h> + +typedef struct HashNode { + const char *key; + void *val; + struct HashNode *next; +} HashNode; + +typedef int (*HashMapCmp_t)(const char *a, const char *b); +typedef size_t (*HashKey_t)(const char *key); + +typedef struct HashMap { + HashNode **bucket; + HashMapCmp_t cmp; + HashKey_t hfunc; + size_t size; +} HashMap; + +HashMap *hashmap_create(size_t size, HashKey_t hfunc, HashMapCmp_t cmp); +void *hashmap_getval(HashMap *h, const char *key); +void hashmap_setval(HashMap *h, const char *key, void *val); +void hashmap_clear(HashMap *h); + +size_t bkdr_hash(const char *key); + +int nerv_error(lua_State *L, const char *err_mesg_fmt, ...); +int nerv_error_method_not_implemented(lua_State *L); +void luaN_append_methods(lua_State *L, const luaL_Reg *mlist); +#endif diff --git a/nerv/doc/nerv.md b/nerv/doc/nerv.md new file mode 100644 index 0000000..28411f5 --- /dev/null +++ b/nerv/doc/nerv.md @@ -0,0 +1,17 @@ +#The Nerv utility functions# +Part of the [Nerv](../README.md) toolkit. +##Methods## +* __string = nerv.typename(obj a)__ +A registered function, the original function is `luaT_lua_typename`. In some cases if you call `type(a)` for object of some class in __Nerv__(like __Nerv.CuMatrix__) it will only return "userdata"(because it is created in C), in this case you can use this method to get its type. + +--- + +* __metatable = nerv.getmetatable(string tname)__ +A registered function, the original function is `luaT_lua_getmetatable`. `tname` should be a class name that has been registered in __luaT__. + +* __metatable = nerv.newmetatable(string tname, string parenttname, function constructor, function destructor, function factory)__ +A registered function, the original function is `luaT_newmetatable`, it returns the metatable of the created class by the name `tname`. +* __string = nerv.setmetatable(table self, string tname)__ +A registered function, the original function is `luaT_lua_setmetatable`. It assigns the metatable registered in __luaT__ by the name *tname* to the table *self*. And return *tname* to user. +* __table = nerv.get_type(string typename)__ +Returns the type(`loadstring("return " .. typename)`).
\ No newline at end of file diff --git a/nerv/doc/nerv_class.md b/nerv/doc/nerv_class.md new file mode 100644 index 0000000..99f63e7 --- /dev/null +++ b/nerv/doc/nerv_class.md @@ -0,0 +1,36 @@ +#The Nerv OOP# +Part of the [Nerv](../README.md) toolkit. +##Methods## +* __metatable mt, metatable mpt = nerv.class(string tname, string parenttname)__ +This method is used to create a class by the name `tname`, which inherits `parenttname` in __Nerv__, then you create a new instance of this class by calling `obj=tname(...)`. The `tname.__init(...)` method(if defined) will be called in the constructing. The metatable of the class and its parent class will be returned. + +##Examples## +* This example implements a simple `nerv.Counter` class which is inherited by `nerv.BetterCounter`. + +``` +do + nerv.class("nerv.Counter") + function nerv.Counter:__init(c) + if (c) then + self.c = c + else + self.c = 0 + end + end +end +do + local mt, mpt = nerv.class("nerv.BetterCounter", "nerv.Counter") + function nerv.BetterCounter:__init(c, bc) + mpt.__init(self, c) + if (bc) then + self.bc = bc + else + self.bc = 0 + end + end +end +c1 = nerv.Counter(1) +print(c1.c) +bc1 = nerv.BetterCounter(1, 1) +print(bc1.c, bc1.bc) +```
\ No newline at end of file diff --git a/nerv/doc/nerv_io.md b/nerv/doc/nerv_io.md new file mode 100644 index 0000000..07589df --- /dev/null +++ b/nerv/doc/nerv_io.md @@ -0,0 +1,113 @@ +#The Nerv IO Package# +Part of the [Nerv](../README.md) toolkit. + +##Description## +The main class that the user uses to store and read parameter object to and from files is __nerv.ChunkFile__. +In the file, a parameter object will be saved using a standard format. First is the length(in byte) of this object, then a table which includes some meta information of the object, and a data area. Below is an example text file. +``` +[0000000000202] +{type="nerv.ExampleP",info={message="just-a-try"},id="exampleP1"} +3 3 +5.000000 5.000000 5.000000 +5.000000 5.000000 5.000000 +5.000000 5.000000 5.000000 +1 3 +4.000000 4.000000 4.000000 +[0000000000202] +{type="nerv.ExampleP",info={message="just-a-try"},id="exampleP2"} +3 3 +4.000000 4.000000 4.000000 +4.000000 4.000000 4.000000 +4.000000 4.000000 4.000000 +1 3 +3.000000 3.000000 3.000000 +``` + +##Methods## +* __ChunkFile ChunkFile(string fn, string mode)__ +`mode` can be `r` or `w`, for reading or writing a file. The returned __ChunkFile__ will be ready to write or read objects which follows the __nerv.Param__ interface(using `write_chunk` and `read_chunk`). +* __void ChunkFile.write_chunk(ChunkFile self, Param p)__ +Write `p` into the file. `p:write` will be called. +* __Param ChunkFile.read_chunk(ChunkFile self, string id, table global_conf)__ +Read the __Param__ object by id `id` from the file `self`. It will be constructed using `__init(id, global_conf)`. `p:read` will be called. +* __void ChunkFile.close(ChunkFile self)__ +Close the opened file. + +##Examples## +* An example showing how to use __ChunkFile__ to store and read parameter objects. +``` +require 'io' +do + local mt, mpt = nerv.class('nerv.ExampleP', 'nerv.Param') + function nerv.ExampleP:__init(id, global_conf) + self.id = id + self.global_conf = global_conf + self.matrix = nerv.MMatrixFloat(3, 3) + for i = 0, 2, 1 do + for j = 0, 2, 1 do + self.matrix[i][j] = 3 + end + end + self.bias = nerv.MMatrixFloat(1, 3) + for i = 0, 2, 1 do + self.bias[i] = 2; + end + self:set_info({message = 'just-a-try'}) + end + function nerv.ExampleP:addOne() + for i = 0, 2, 1 do + for j = 0, 2, 1 do + self.matrix[i][j] = self.matrix[i][j] + 1 + end + end + for i = 0, 2, 1 do + self.bias[i] = self.bias[i] + 1 + end + end + function nerv.ExampleP:read(pcdata) + self.matrix = nerv.MMatrixFloat.load(pcdata) + self.bias = nerv.MMatrixFloat.load(pcdata) + end + function nerv.ExampleP:write(pfhandle) + self.matrix:save(pfhandle) + self.bias:save(pfhandle) + end +end +global_conf = {} +do + local f = nerv.ChunkFile('../tmp', 'w') + local exampleP1 = nerv.ExampleP('exampleP1', global_conf) + local exampleP2 = nerv.ExampleP('exampleP2', global_conf) + exampleP1:addOne() + exampleP1:addOne() + exampleP2:addOne() + + f:write_chunk(exampleP1) + f:write_chunk(exampleP2) + f:close() +end +do + local f = nerv.ChunkFile('../tmp', 'r') + local exampleP1 = f:read_chunk('exampleP1', global_conf) + local exampleP2 = f:read_chunk('exampleP2', global_conf) + f:close() + print(exampleP1.matrix) + print(exampleP2.matrix) +end +``` + +##Developer Notes## +* There are four classes in to deal with chunk data, which are __nerv.ChunkFile__, __nerv.ChunkFileHandle__, __nerv.ChunkInfo__, __nerv.ChunkData__. Below is the underlying C structs. +``` +typedef struct ChunkFileHandle { + FILE *fp; +} ChunkFileHandle; +typedef struct ChunkInfo { + off_t offset, length; +} ChunkInfo; +typedef struct ChunkData { + FILE *fp; + char *data; +} ChunkData; +``` +* In __Nerv.io__, a returned(by `ChunkFile.__init`) __nerv.ChunkFile__ will have a member `handle`, which is a __nerv.ChunkFileHandle__.
\ No newline at end of file diff --git a/nerv/doc/nerv_layer.md b/nerv/doc/nerv_layer.md new file mode 100644 index 0000000..de2fb12 --- /dev/null +++ b/nerv/doc/nerv_layer.md @@ -0,0 +1,180 @@ +#The Nerv Layer Package# +Part of the [Nerv](../README.md) toolkit. + +##Description## +__nerv.Layer__ is the base class and most of its methods are abstract. +###Class hierarchy and their members### +* __nerv.Layer__. + * `table dim_in` It specifies the dimensions of the inputs. + * `table dim_out` It specifies the dimensions of the outputs. + * `string id` ID of this layer. + * `table gconf` Stores the `global_conf`. +* __nerv.AffineLayer__ inherits __nerv.Layer__, both `#dim_in` and `#dim_out` are 1. + * `MatrixParam ltp` The liner transform parameter. + * `BiasParam bp` The bias parameter. +* __nerv.BiasLayer__ inherits __nerv.Layer__, both `#dim_in` nad `#dim_out` are 1. + * `BiasParam bias` The bias parameter. +* __nerv.SigmoidLayer__ inherits __nerv.Layer__, both `#dim_in` and `#dim_out` are 1. +* __nerv.SoftmaxCELayer__ inherits __nerv.Layer__, `#dim_in` is 2 and `#dim_out` is -1(optional). `input[1]` is the input to the softmax layer, `input[2]` is the reference distribution. In its `propagate(input, output)` method, if `output[1] ~= nil`, cross\_entropy value will outputed. + * `float total_ce` Records the accumlated cross entropy value. + * `int total_frams` Records how many frames have passed. + * `bool compressed` The reference distribution can be a one-hot format. This feature is enabled by `layer_conf.compressed`. + +##Methods## +* __void Layer.\_\_init(Layer self, string id, table global_conf, table layer_conf)__ +Abstract method. +The constructing method should assign `id` to `self.id` and `global_conf` to `self.gconf`, `layer_conf.dim_in` to `self.dim_in`, `layer_conf.dim_out` to `self.dim_out`. `dim_in` and `dim_out` are a list specifies the dimensions of the inputs and outputs. Also, `layer_conf` will include the parameters, which should also be properly saved. +* __void Layer.init(Layer self)__ +Abstract method. +Initialization method, in this method the layer should do some self-checking and allocate space for intermediate results. +* __void Layer.update(Layer self, table bp_err, table input, table output)__ +Abstract method. +`bp_err[i]` should be the error on `output[i]`. In this method the parameters of `self` is updated. +* __void Layer.propagate(Layer self, table input, table output)__ +Abstract method. +Given `input` and the current parameters, propagate and store the result in `output`. +* __void Layer.back_propagate(Layer self, Matrix next_bp_err, Matrix bp_err, Matrix input, Matrix output)__ +Abstract method. +Calculate the error on the inputs and store them in `next_bp_err`. + +* __void Layer.check_dim_len(int len_in, int len_out)__ +Check whether `#self.dim_in == len_in` and `#self.dim_out == len_out`, if violated, an error will be posted. +* __void Layer.get_params(Layer self)__ +Abstract method. +The layer should return a list containing its parameters. + +####nerv.Layer.get\_dim(self)#### +* Returns: + `dim_in`: __table__. + `dim_out`: __table__. +* Parameters: + `self`: __nerv.Layer__. +* Description: + Returns `self.dim_in, self.dim_out`. + +##Examples## +* a basic example using __Nerv__ layers to a linear classification. + +``` +require 'math' + +require 'layer.affine' +require 'layer.softmax_ce' + +--[[Example using layers, a simple two-classification problem]]-- + +function calculate_accurate(networkO, labelM) + sum = 0 + for i = 0, networkO:nrow() - 1, 1 do + if (labelM[i][0] == 1 and networkO[i][0] >= 0.5) then + sum = sum + 1 + end + if (labelM[i][1] == 1 and networkO[i][1] >= 0.5) then + sum = sum + 1 + end + end + return sum +end + +--[[begin global setting and data generation]]-- +global_conf = {lrate = 10, + wcost = 1e-6, + momentum = 0.9, + cumat_type = nerv.CuMatrixFloat} + +input_dim = 5 +data_num = 100 +ansV = nerv.CuMatrixFloat(input_dim, 1) +for i = 0, input_dim - 1, 1 do + ansV[i][0] = math.random() - 0.5 +end +ansB = math.random() - 0.5 +print('displaying ansV') +print(ansV) +print('displaying ansB(bias)') +print(ansB) + +dataM = nerv.CuMatrixFloat(data_num, input_dim) +for i = 0, data_num - 1, 1 do + for j = 0, input_dim - 1, 1 do + dataM[i][j] = math.random() * 2 - 1 + end +end +refM = nerv.CuMatrixFloat(data_num, 1) +refM:fill(ansB) +refM:mul(dataM, ansV, 1, 1) --refM = dataM * ansV + ansB + +labelM = nerv.CuMatrixFloat(data_num, 2) +for i = 0, data_num - 1, 1 do + if (refM[i][0] > 0) then + labelM[i][0] = 1 + labelM[i][1] = 0 + else + labelM[i][0] = 0 + labelM[i][1] = 1 + end +end +--[[global setting and data generation end]]-- + + +--[[begin network building]]-- +--parameters +affineL_ltp = nerv.LinearTransParam('AffineL_ltp', global_conf) +affineL_ltp.trans = nerv.CuMatrixFloat(input_dim, 2) +for i = 0, input_dim - 1, 1 do + for j = 0, 1, 1 do + affineL_ltp.trans[i][j] = math.random() - 0.5 + end +end +affineL_bp = nerv.BiasParam('AffineL_bp', global_conf) +affineL_bp.trans = nerv.CuMatrixFloat(1, 2) +for j = 0, 1, 1 do + affineL_bp.trans[j] = math.random() - 0.5 +end + +--layers +affineL = nerv.AffineLayer('AffineL', global_conf, {['ltp'] = affineL_ltp, + ['bp'] = affineL_bp, + dim_in = {input_dim}, + dim_out = {2}}) +softmaxL = nerv.SoftmaxCELayer('softmaxL', global_conf, {dim_in = {2, 2}, + dim_out = {}}) +print('layers initializing...') +affineL:init() +softmaxL:init() +--[[network building end]]-- + + +--[[begin space allocation]]-- +print('network input&output&error space allocation...') +affineI = {dataM} --input to the network is data +affineO = {nerv.CuMatrixFloat(data_num, 2)} +softmaxI = {affineO[1], labelM} +softmaxO = {} +output = nerv.CuMatrixFloat(data_num, 2) + +affineE = {nerv.CuMatrixFloat(data_num, 2)} +--[[space allocation end]]-- + + +--[[begin training]]-- +ce_last = 0 +for l = 0, 10, 1 do + affineL:propagate(affineI, affineO) + softmaxL:propagate(softmaxI, softmaxO) + output:softmax(softmaxI[1]) + + softmaxL:back_propagate(affineE, {}, softmaxI, softmaxO) + + affineL:update(affineE, affineI, affineO) + + if (l % 5 == 0) then + nerv.utils.printf("training iteration %d finished\n", l) + nerv.utils.printf("cross entropy: %.8f\n", softmaxL.total_ce - ce_last) + ce_last = softmaxL.total_ce + nerv.utils.printf("accurate labels: %d\n", calculate_accurate(output, labelM)) + nerv.utils.printf("total frames processed: %.8f\n", softmaxL.total_frames) + end +end +--[[end training]]-- +``` diff --git a/nerv/doc/nerv_matrix.md b/nerv/doc/nerv_matrix.md new file mode 100644 index 0000000..22971d2 --- /dev/null +++ b/nerv/doc/nerv_matrix.md @@ -0,0 +1,165 @@ +#The Nerv Matrix Package# +Part of the [Nerv](../README.md) toolkit. + +##Description## +###Underlying structure### +In the begining is could be useful to know something about the underlying structure of a __Nerv__ matrix. Please keep in mind that matrice in __Nerv__ is row-major. +Every matrix object is a encapsulation of a C struct that describes the attributes of this matrix. +``` +typedef struct Matrix { + size_t stride; /* size of a row */ + long ncol, nrow, nmax; /* dimension of the matrix, nmax is simply nrow * ncol */ + union { + float *f; + double *d; + long *i; + } data; /* pointer to actual storage */ + long *data_ref; +} Matrix; +``` +It is worth mentioning that that `data_ref` is a counter which counts the number of references to its memory space, mind that it will also be increased when a row of the matrix is referenced(`col = m[2]`). A __Nerv__ matrix will deallocate its space when this counter is decreased to zero. +Also note that all assigning operation in __Nerv__ is reference copy, you can use `copy_tod` or `copy_toh` method to copy value. Also, row assigning operations like `m1[2]=m2[3]` is forbidden in __Nerv__. + +###Class hierarchy### +The class hierarchy of the matrix classes can be clearly observed in `matrix/init.c`. +First there is a abstract base class __Nerv.Matrix__, which is inherited by __Nerv.CuMatrix__ and __Nerv.MMatrix__(also abstract). +Finally, there is __Nerv.CuMatrixFloat__, __Nerv.CuMatrixDouble__, inheriting __Nerv.CuMatrix__, and __Nerv.MMatrixFloat__, __Nerv.MMatrixDouble__, __Nerv.MMatrixInt__ , inheriting __Nerv.MMatrix__. + +##Methods## +Mind that usually a matrix object can only do calculation with matrix of its own type(a __Nerv.CuMatrixFloat__ matrix can only do add operation with a __Nerv.CuMatrixFloat__). +In the methods description below, __Matrix__ could be __Nerv.CuMatrixFloat__, __Nerv.CuMatrixDouble__, __Nerv.MMatrixFloat__ or __Nerv.MMatrixDouble__. __Element_type__ could be `float` or `double`, respectively. +* __Matrix = Matrix(int nrow, int ncol)__ +Returns a __Matrix__ object of `nrow` rows and `ncol` columns. +* __Element_type = Matrix.get_elem(Matrix self, int index)__ +Returns the element value at the specific index(treating the matrix as a vector). The index should be less than `nmax` of the matrix. +* __void Matrix.set_elem(Matrix self, int index, Element_type value)__ +Set the value at `index` to be `value`. +* __int Matrix.ncol(Matrix self)__ +Get `ncol`, the number of columns. +* __int Matrix.nrow(Matrix self)__ +Get `nrow`, the number of rows. +* __int Matrix.get_dataref_value(Matrix self)__ +Returns the value(not a pointer) of space the `data_ref` pointer pointed to. This function is mainly for debugging. +* __Matrix/Element\_type, boolean Matrix.\_\_index\_\_(Matrix self, int index)__ +If the matrix has more than one row, will return the row at `index` as a __Matrix__ . Otherwise it will return the value at `index`. +* __void Matrix.\_\_newindex\_\_(Matrix self, int index, Element_type value)__ +Set the element at `index` to be `value`. +--- +* __Matrix Matrix.create(Matrix a)__ +Return a new __Matrix__ of `a`'s size(of the same number of rows and columns). +* __Matrix Matrix.colsum(Matrix self)__ +Return a new __Matrix__ of size (1,`self.ncol`), which stores the sum of all columns of __Matrix__ `self`. +* __Matrix Matrix.rowsum(Matrix self)__ +Return a new __Matrix__ of size (`self.nrow`,1), which stores the sum of all rows of __Matrix__ `self`. +* __Matrix Matrix.rowmax(Matrix self)__ +Return a new __Matrix__ of size (`self.nrow`,1), which stores the max value of all rows of __Matrix__ `self`. +* __Matrix Matrix.trans(Matrix self)__ +Return a new __Matrix__ of size (`self.ncol`,`self.nrow`), which stores the transpose of __Matrix__ `self`. +* __void Matrix.copy_fromh(Matrix self, MMatrix a)__ +Copy the content of a __MMatrix__ `a` to __Matrix__ `self`, they should be of the same size. +* __void Matrix.copy_fromd(Matrix self, CuMatrix a)__ +Copy the content of a __CuMatrix__ `a` to __Matrix__ `self`, they should be of the same size. +* __void Matrix.copy_toh(Matrix self, MMatrix a)__ +Copy the content of the __Matrix__ `self` to a __MMatrix__ `a`. +* __void Matrix.copy_tod(Matrix self, CuMatrix a)__ +Copy the content of the __Matrix__ `self` to a __CuMatrix__ `a`. +* __void Matrix.add(Matrix self, Matrix ma, Matrix mb, Element_type alpha, Element_type beta)__ +It sets the content of __Matrix__ `self` to be `alpha * ma + beta * mb`.__Matrix__ `ma,mb,self` should be of the same size. +* __void Matrix.mul(Matrix self, Matrix ma, Matrix mb, Element_type alpha, Element_type beta, [string ta, string tb])__ +It sets the content of __Matrix__ `self` to be `beta * self + alpha * ma * mb`. `ta` and `tb` is optional, if `ta` is 'T', then `ma` will be transposed, also if `tb` is 'T', `mb` will be transposed. +* __void Matrix.add_row(Matrix self, Matrix va, Element_type beta)__ +Add `beta * va` to every row of __Matrix__ `self`. +* __void Matrix.fill(Matrix self, Element_type value)__ +Fill the content of __Matrix__ `self` to be `value`. +* __void Matrix.sigmoid(Matrix self, Matrix ma)__ +Set the element of __Matrix__ `self` to be elementwise-sigmoid of `ma`. +* __void Matrix.sigmoid_grad(Matrix self, Matrix err, Matrix output)__ +Set the element of __Matrix__ `self`, to be `self[i][j]=err[i][j]*output[i][j]*(1-output[i][j])`. This function is used to propagate sigmoid layer error. +* __void Matrix.softmax(Matrix self, Matrix a)__ +Calculate a row-by-row softmax of __Matrix__ `a` and save the result in `self`. +* __void Matrix.mul_elem(Matrix self, Matrix ma, Matrix mb)__ +Calculate element-wise multiplication of __Matrix__ `ma` and `mb`, store the result in `self`. +* __void Matrix.log_elem(Matrix self, Matrix ma)__ +Calculate element-wise log of __Matrix__ `ma`, store the result in `self`. +* __void Matrix.copy_rows_fromh_by_idx(Matrix self, MMatrix ma, MMatrixInt idx)__ +`idx` should be a row vector. This function copy the rows of `ma` to `self` according to `idx`, in other words, it assigns `ma[idx[i]]` to `self[i]`. +* __void Matrix.expand_frm(Matrix self, Matrix a, int context)__ +Treating each row of `a` as speech feature, and do a feature expansion. The `self` should of size `(a.nrow, a.ncol * (context * 2 + 1))`. `self[i]` will be `(a[i-context] a[i-context+1] ... a[i] a[i+1] a[i+context])`. `a[0]` and `a[nrow]` will be copied to extend the index range. +* __void Matrix.rearrange_frm(Matrix self, Matrix a, int step)__ +Rearrange `a` according to its feature dimension. The `step` is the length of context. So, `self[i][j]` will be assigned `a[i][j / step + (j % step) * (a.ncol / step)]`. `a` and `self` should be of the same size and `step` should be divisible by `a.ncol`. +* __void Matrix.scale_row(Matrix self, Matrix scale)__ +Scale each column of `self` according to a vector `scale`. `scale` should be of size `1 * self.ncol`. +* __Matrix Matrix.\_\_add\_\_(Matrix ma, Matrix mb)__ +Returns a new __Matrix__ which stores the result of `ma+mb`. +* __Matrix Matrix.\_\_sub\_\_(Matrix ma, Matrix mb)__ +Returns a new __Matrix__ which stores the result of `ma-mb`. +* __Matrix Matrix.\_\_mul\_\_(Matrix ma, Matrix mb)__ +Returns a new __Matrix__ which stores the result of `ma*mb`. +* __CuMatrix CuMatrix.new_from_host(MMatrix m)__ +Return a new __CuMatrix__ which is a copy of `m`. +* __MMatrix CuMatrix.new_to_host(CuMatrix self)__ +Return a new __MMatrix__ which is a copy of `self`. +* __string Matrix.\_\_tostring\_\_(Matrix self)__ +Returns a string containing values of __Matrix__ `self`. +--- +* __MMatrix MMatrix.load(ChunkData chunk)__ +Return a new __MMatrix__ loaded from the file position in `chunk`. +* __void MMatrix.save(MMatrix self, ChunkFileHandle chunk)__ +Write `self` to the file position in `chunk`. +* __void MMatrix.copy_from(MMatrix ma, MMatrix mb,[int b_bgein, int b_end, int a_begin])__ +Copy a part of `mb`(rows of index `[b_begin..b_end)`) to `ma` beginning at row index `a_begin`. If not specified, `b_begin` will be `0`, `b_end` will be `b.nrow`, `a_begin` will be `0`. + +##Examples## +* Use `get_dataref_value` to test __Nerv__'s matrix space allocation. +``` +m = 10 +n = 10 +fm = nerv.MMatrixFloat(m, n) +dm = nerv.MMatrixDouble(m, n) +for i = 0, m - 1 do + for j = 0, n - 1 do + t = i / (j + 1) + fm[i][j] = t + dm[i][j] = t + end +end +print("test fm:get_dataref_value:", fm:get_dataref_value()) +print("forced a garbade collect") +collectgarbage("collect") +print("test fm:get_dataref_value:", fm:get_dataref_value()) +print(fm) +print(dm) +``` +* Test some __Matrix__ calculations. +``` +m = 4 +n = 4 +fm = nerv.CuMatrixFloat(m, n) +dm = nerv.CuMatrixDouble(m, n) +for i = 0, m - 1 do + for j = 0, n - 1 do + -- local t = math.random(10) + t = i / (j + 1) + fm[i][j] = t + dm[i][j] = t + end +end +print(fm) +fs = fm:create() +fs:softmax(fm) +-- print(fs) +print(dm) +ds = dm:create() +ds:softmax(dm) +-- print(ds) +print(fs) +print(fs + fs) +print(ds + ds) +print(fs - fs) +print(ds - ds) +a = fs:create() +a:mul_elem(fs, fs) +print(a) +a:log_elem(fs) +print(a) +```
\ No newline at end of file diff --git a/nerv/doc/nerv_nn.md b/nerv/doc/nerv_nn.md new file mode 100644 index 0000000..c57447d --- /dev/null +++ b/nerv/doc/nerv_nn.md @@ -0,0 +1,256 @@ +#The Nerv NN Package# +Part of the [Nerv](../README.md) toolkit. + +##Description## +###Class hierarchy### +it contains __nerv.LayerRepo__, __nerv.ParamRepo__, and __nerv.DAGLayer__(inherits __nerv.Layer__). + +###Class hierarchy and their members### +####nerv.ParamRepo#### +Get parameter object by ID. +* `table param_table` Contains the mapping of parameter ID to parameter file(__nerv.ChunkFile__) +* __nerv.LayerRepo__ Get layer object by ID. +* `table layers` Contains the mapping of layer ID to layer object. +objects. + +####__nerv.DAGLayer__#### +Inherits __nerv.Layer__. +* `layers`: __table__, a mapping from a layer ID to its "ref". A ref is a structure that contains reference to space allocations and other info of the layer. +* `inputs`: __table__, a mapping from the inputs ports of the DAG layer to the input ports of the sublayer, the key is the port number, the value is `{ref, port}`. +* `outputs`:__table__, the counterpart of `inputs`. +* `parsed_conn`: __table__, a list of parsed connections, each entry is of format `{{ref_from, port_from}, {ref_to, port_to}}`. +* `queue`: __table__, a list of "ref"s, the propagation of the DAGLayer will follow this order, and back-propagation will follow a reverse order. + +##Methods## + +###__nerv.ParamRepo__### + +####nerv.ParamRepo:\_\_init(param\_files)#### +* Parameters: + `param_files`: __table__ +* Description: + `param_files` is a list of file names that stores parameters, the newed __ParamRepo__ will read them from file and store the mapping for future fetching. + +####nerv.Param ParamRepo.get_param(ParamRepo self, string pid, table global_conf)#### +* Returns: + __nerv.Layer__ +* Parameters: + `self`: __nerv.ParamRepo__. + `pid`: __string__. + `global_conf`: __table__. +* Description: + __ParamRepo__ will find the __nerv.ChunkFile__ `pf` that contains parameter of ID `pid` and return `pf:read_chunk(pid, global_conf)`. + +###__nerv.LayerRepo__### +####nerv.LayerRepo:\_\_init(layer\_spec, param\_repo, global\_conf)#### +* Returns: + __nerv.LayerRepo__. +* Parameters: + `self`: __nerv.ParamRepo__. + `layer_spec`: __table__. + `param_repo`: __nerv.ParamRepo__. + `global_conf`: __table__. +* Description: + __LayerRepo__ will construct the layers specified in `layer_spec`. Every entry in the `layer_spec` table should follow the format below: + + > layer_spec : {[layer_type1] = llist1, [layer_type2] = llist2, ...} + > llist : {layer1, layer2, ...} + > layer : layerid = {param_config, layer_config} + > param_config : {param1 = paramID1, param2 = paramID2} + + __LayerRepo__ will merge `param_config` into `layer_config` and construct a layer by calling `layer_type(layerid, global_conf, layer_config)`. + +####nerv.LayerRepo.get\_layer(self, lid)#### +* Returns: + __nerv.LayerRepo__, the layer with ID `lid`. +* Parameters: + `self`:__nerv.LayerRepo__. + `lid`:__string__. +* Description: + Returns the layer with ID `lid`. + +###nerv.DAGLayer### +####nerv.DAGLayer:\_\_init(id, global\_conf, layer\_conf)#### +* Returns: + __nerv.DAGLayer__ +* Parameters: + `id`: __string__ + `global_conf`: __table__ + `layer_conf`: __table__ +* Description: + The `layer_conf` should contain `layer_conf.sub_layers` which is a __nerv.LayerRepo__ storing the sub layers of the DAGLayer. It should also contain `layer_conf.connections`, which is a string-to-string mapping table describing the DAG connections. See an example below: + + ``` + dagL = nerv.DAGLayer("DAGL", global_conf, {["dim_in"] = {input_dim, 2}, ["dim_out"] = {}, ["sub_layers"] = layerRepo, + ["connections"] = { + ["<input>[1]"] = "AffineL[1]", + ["AffineL[1]"] = "SoftmaxL[1]", + ["<input>[2]"] = "SoftmaxL[2]", + }}) + ``` + +####nerv.DAGLayer.init(self, batch\_size)#### +* Parameters: + `self`: __nerv.DAGLayer__ + `batch_size`: __int__ +* Description: + This initialization method will allocate space for output and input matrice, and will call `init()` for each of its sub layers. + + +####nerv.DAGLayer.propagate(self, input, output)#### +* Parameters: + `self`: __nerv.DAGLayer__ + `input`: __table__ + `output`: __table__ +* Description: + The same function as __nerv.Layer.propagate__, do propagation for each layer in the order of `self.queue`. + +####nerv.DAGLayer.back\_propagate(self, next\_bp\_err, bp\_err, input, output)#### +* Parameters: + `self`: __nerv.DAGLayer__ + `next_bp_err`: __table__ + `bp_err`: __table__ + `input`: __table__ + `output`: __table__ +* Description: + The same function as __nerv.Layer.back_propagate__, do back-propagation for each layer in the reverse order of `self.queue`. + +####nerv.DAGLayer.update(self, bp\_err, input, output)#### +* Parameters: + `self`: __nerv.DAGLayer__ + `bp_err`: __table__ + `input`: __table__ + `output`: __table__ +* Description: + The same function as __nerv.Layer.update__, do update for each layer in the order of `self.queue`. + +##Examples## +* aaa + +``` +require 'math' + +require 'layer.affine' +require 'layer.softmax_ce' + +--[[Example using DAGLayer, a simple two-classification problem]]-- + +--[[begin global setting and data generation]]-- +global_conf = {lrate = 10, + wcost = 1e-6, + momentum = 0.9, + cumat_type = nerv.CuMatrixFloat, + } + +input_dim = 5 +data_num = 100 +param_fn = "../tmp" +ansV = nerv.CuMatrixFloat(input_dim, 1) +for i = 0, input_dim - 1, 1 do + ansV[i][0] = math.random() - 0.5 +end +ansB = math.random() - 0.5 +print('displaying ansV') +print(ansV) +print('displaying ansB(bias)') +print(ansB) + +dataM = nerv.CuMatrixFloat(data_num, input_dim) +for i = 0, data_num - 1, 1 do + for j = 0, input_dim - 1, 1 do + dataM[i][j] = math.random() * 2 - 1 + end +end +refM = nerv.CuMatrixFloat(data_num, 1) +refM:fill(ansB) +refM:mul(dataM, ansV, 1, 1) --refM = dataM * ansV + ansB + +labelM = nerv.CuMatrixFloat(data_num, 2) +for i = 0, data_num - 1, 1 do + if (refM[i][0] > 0) then + labelM[i][0] = 1 + labelM[i][1] = 0 + else + labelM[i][0] = 0 + labelM[i][1] = 1 + end +end +--[[global setting and data generation end]]-- + + +--[[begin network building]]-- +--parameters +do + local affineL_ltp = nerv.LinearTransParam('AffineL_ltp', global_conf) + affineL_ltp.trans = nerv.CuMatrixFloat(input_dim, 2) + for i = 0, input_dim - 1, 1 do + for j = 0, 1, 1 do + affineL_ltp.trans[i][j] = math.random() - 0.5 + end + end + local affineL_bp = nerv.BiasParam('AffineL_bp', global_conf) + affineL_bp.trans = nerv.CuMatrixFloat(1, 2) + for j = 0, 1, 1 do + affineL_bp.trans[j] = math.random() - 0.5 + end + + local chunk = nerv.ChunkFile(param_fn, 'w') + chunk:write_chunk(affineL_ltp) + chunk:write_chunk(affineL_bp) + chunk:close() + + paramRepo = nerv.ParamRepo({param_fn}) +end + +--layers +layerRepo = nerv.LayerRepo({ + ["nerv.AffineLayer"] = + { + ["AffineL"] = {{["ltp"] = "AffineL_ltp", ["bp"] = "AffineL_bp"}, {["dim_in"] = {input_dim}, ["dim_out"] = {2}}}, + }, + ["nerv.SoftmaxCELayer"] = + { + ["SoftmaxL"] = {{}, {["dim_in"] = {2, 2}, ["dim_out"] = {}}} + }, + }, paramRepo, global_conf) +affineL = layerRepo:get_layer("AffineL") +softmaxL = layerRepo:get_layer("SoftmaxL") +print('layers initializing...') +dagL = nerv.DAGLayer("DAGL", global_conf, {["dim_in"] = {input_dim, 2}, ["dim_out"] = {}, ["sub_layers"] = layerRepo, + ["connections"] = { + ["<input>[1]"] = "AffineL[1]", + ["AffineL[1]"] = "SoftmaxL[1]", + ["<input>[2]"] = "SoftmaxL[2]", + }}) +dagL:init(data_num) +--affineL:init() +--softmaxL:init() +--[[network building end]]-- + + +--[[begin space allocation]]-- +print('network input&output&error space allocation...') +dagL_input = {dataM, labelM} +dagL_output = {} +dagL_err = {} +dagL_ierr = {nerv.CuMatrixFloat(data_num, input_dim), nerv.CuMatrixFloat(data_num, 2)} +--[[space allocation end]]-- + + +--[[begin training]]-- +ce_last = 0 +for l = 0, 10, 1 do + dagL:propagate(dagL_input, dagL_output) + dagL:back_propagate(dagL_ierr, dagL_err, dagL_input, dagL_output) + dagL:update(dagL_err, dagL_input, dagL_output) + + if (l % 2 == 0) then + nerv.utils.printf("training iteration %d finished\n", l) + nerv.utils.printf("cross entropy: %.8f\n", softmaxL.total_ce - ce_last) + --nerv.utils.printf("accurate labels: %d\n", calculate_accurate(output, labelM)) + nerv.utils.printf("total frames processed: %.8f\n", softmaxL.total_frames) + end + ce_last = softmaxL.total_ce +end +--[[end training]]-- +```
\ No newline at end of file diff --git a/nerv/doc/nerv_param.md b/nerv/doc/nerv_param.md new file mode 100644 index 0000000..167cb11 --- /dev/null +++ b/nerv/doc/nerv_param.md @@ -0,0 +1,27 @@ +#The Nerv Parameter Package# +Part of the [Nerv](../README.md) toolkit. + +##Description## +###Class hierarchy### +There is a base class __Nerv.Param__ defined in `layer/init.lua`. + +###Class hierarchy and their members### +* __nerv.MatrixParam__ inherits __nerv.Param__ + * `Matrix trans` stores the parameter matrix. +* __nerv.LinearTransParam__ inherits __Nerv.MatrixParam__. +* __Nerv.BiasParam__ inherits __Nerv.MatrixParam__. + +##Methods## +* __void Param.\_\_init(Param self, string id, table global_conf)__ +Constructor of a __Param__, it will set `self.id` to be `id` and `self.gconf` to be `global_conf`. +* __void Param.set_info(Param self, table info)__ +Set `self.info` to be `info`. +* __table Param.get_info(Param self)__ +Returns `self.info`. +* __void Param.read(Param self, ChunkData pcdata)__ +Abstract method. +In this method, `self` should in turn calls its members to load from `pcdata`. +* __void Param.write(Param self, ChunkFileHandle pfhandle)__ +Abstract method. +Save parameters to file. In this method, `self` should in turn calls its members to save to `pfhandle`. + diff --git a/nerv/examples/asr_trainer.lua b/nerv/examples/asr_trainer.lua new file mode 100644 index 0000000..a5727be --- /dev/null +++ b/nerv/examples/asr_trainer.lua @@ -0,0 +1,106 @@ +function build_trainer(ifname) + local param_repo = nerv.ParamRepo() + param_repo:import(ifname, nil, gconf) + local sublayer_repo = make_sublayer_repo(param_repo) + local layer_repo = make_layer_repo(sublayer_repo, param_repo) + local crit = get_criterion_layer(sublayer_repo) + local network = get_network(layer_repo) + local input_order = get_input_order() + local iterative_trainer = function (prefix, scp_file, bp) + gconf.randomize = bp + -- build buffer + local buffer = make_buffer(make_readers(scp_file, layer_repo)) + -- initialize the network + network:init(gconf.batch_size) + gconf.cnt = 0 + err_input = {nerv.CuMatrixFloat(256, 1)} + err_input[1]:fill(1) + for data in buffer.get_data, buffer do + -- prine stat periodically + gconf.cnt = gconf.cnt + 1 + if gconf.cnt == 1000 then + print_stat(sublayer_repo) + nerv.CuMatrix.print_profile() + nerv.CuMatrix.clear_profile() + gconf.cnt = 0 + -- break + end + local input = {} +-- if gconf.cnt == 100 then break end + for i, id in ipairs(input_order) do + if data[id] == nil then + nerv.error("input data %s not found", id) + end + table.insert(input, data[id]) + end + local output = {nerv.CuMatrixFloat(256, 1)} + err_output = {input[1]:create()} + network:propagate(input, output) + if bp then + network:back_propagate(err_input, err_output, input, output) + network:update(err_input, input, output) + end + -- collect garbage in-time to save GPU memory + collectgarbage("collect") + end + print_stat(sublayer_repo) + nerv.CuMatrix.print_profile() + nerv.CuMatrix.clear_profile() + if (not bp) and prefix ~= nil then + nerv.info("writing back...") + local fname = string.format("%s_cv%.3f.nerv", + prefix, get_accuracy(sublayer_repo)) + network:get_params():export(fname, nil) + end + return get_accuracy(sublayer_repo) + end + return iterative_trainer +end + +dofile(arg[1]) +start_halving_inc = 0.5 +halving_factor = 0.6 +end_halving_inc = 0.1 +min_iter = 1 +max_iter = 20 +min_halving = 5 +gconf.batch_size = 256 +gconf.buffer_size = 81920 + +local pf0 = gconf.initialized_param +local trainer = build_trainer(pf0) +--local trainer = build_trainer("c3.nerv") +local accu_best = trainer(nil, gconf.cv_scp, false) +local do_halving = false + +nerv.info("initial cross validation: %.3f", accu_best) +for i = 1, max_iter do + nerv.info("[NN] begin iteration %d with lrate = %.6f", i, gconf.lrate) + local accu_tr = trainer(nil, gconf.tr_scp, true) + nerv.info("[TR] training set %d: %.3f", i, accu_tr) + local accu_new = trainer( + string.format("%s_%s_iter_%d_lr%f_tr%.3f", + string.gsub( + (string.gsub(pf0[1], "(.*/)(.*)", "%2")), + "(.*)%..*", "%1"), + os.date("%Y%m%d%H%M%S"), + i, gconf.lrate, + accu_tr), + gconf.cv_scp, false) + nerv.info("[CV] cross validation %d: %.3f", i, accu_new) + -- TODO: revert the weights + local accu_diff = accu_new - accu_best + if do_halving and accu_diff < end_halving_inc and i > min_iter then + break + end + if accu_diff < start_halving_inc and i >= min_halving then + do_halving = true + end + if do_halving then + gconf.lrate = gconf.lrate * halving_factor + end + if accu_new > accu_best then + accu_best = accu_new + end +-- nerv.Matrix.print_profile() +end diff --git a/nerv/examples/chunk_file_example.lua b/nerv/examples/chunk_file_example.lua new file mode 100644 index 0000000..5961c98 --- /dev/null +++ b/nerv/examples/chunk_file_example.lua @@ -0,0 +1,53 @@ +-- To define a readable and writable chunk, one must define a class with the +-- following methods: __init(id, global_conf), read(handle), write(handle), +-- get_info(), set_info(info) and an id attribute. This file demonstrates a +-- basic chunk implementation which manages the I/O of a matrix + +local MatrixChunk = nerv.class("nerv.MatrixChunk") + +function MatrixChunk:__init(id, global_conf) + self.id = id + self.info = {} + self.gconf = global_conf +end + +function MatrixChunk:read(handle) + -- pass the read handle to the matrix method + self.data = nerv.MMatrixFloat.load(handle) +end + +function MatrixChunk:write(handle) + -- pass the write handle to the matrix method + self.data:save(handle) +end + +function MatrixChunk:get_info() + return self.info +end + +function MatrixChunk:set_info(info) + self.info = info +end + +function MatrixChunk.create_from_matrix(id, mat) + local ins = nerv.MatrixChunk(id) + ins.data = mat + return ins +end + +mat = nerv.MMatrixFloat(3, 4) +for i = 0, 2 do + for j = 0, 3 do + mat[i][j] = i + j + end +end + +cd = nerv.MatrixChunk.create_from_matrix("matrix1", mat) + +cf = nerv.ChunkFile("test.nerv", "w") +cf:write_chunk(cd) +cf:close() + +cf2 = nerv.ChunkFile("test.nerv", "r") +cd2 = cf2:read_chunk("matrix1") +print(cd2.data) diff --git a/nerv/examples/cumatrix_example.lua b/nerv/examples/cumatrix_example.lua new file mode 100644 index 0000000..544fc7f --- /dev/null +++ b/nerv/examples/cumatrix_example.lua @@ -0,0 +1,31 @@ +m = 4 +n = 4 +fm = nerv.CuMatrixFloat(m, n) +dm = nerv.CuMatrixDouble(m, n) +for i = 0, m - 1 do + for j = 0, n - 1 do + -- local t = math.random(10) + t = i / (j + 1) + fm[i][j] = t + dm[i][j] = t + end +end +print(fm) +fs = fm:create() +fs:softmax(fm) +-- print(fs) +print(dm) +ds = dm:create() +ds:softmax(dm) +-- print(ds) +print(fs) +print(fs + fs) +print(ds + ds) +print(fs - fs) +print(ds - ds) + +a = fs:create() +a:mul_elem(fs, fs) +print(a) +a:log_elem(fs) +print(a) diff --git a/nerv/examples/cumatrix_from_mmatrix.lua b/nerv/examples/cumatrix_from_mmatrix.lua new file mode 100644 index 0000000..2309e14 --- /dev/null +++ b/nerv/examples/cumatrix_from_mmatrix.lua @@ -0,0 +1,32 @@ +m = 3 +n = 4 +fm = nerv.MMatrixFloat(m, n) +dm = nerv.MMatrixDouble(m, n) +for i = 0, m - 1 do + for j = 0, n - 1 do + -- local t = math.random(10) + t = i / (j + 1) + fm[i][j] = t + dm[i][j] = t + end +end +print(fm) +print(dm) + +fc = nerv.CuMatrixFloat(m, n) +dc = nerv.CuMatrixDouble(m, n) +fc:copy_fromh(fm) +dc:copy_fromh(dm) +print("fc and dc") +print(fc) +print(dc) +dc[1]:copy_tod(dc[0]) +print("dc[1] copied to dc[0]") +print(dc) +print("softmax of fc and dc") +sfc = fc:create() +sdc = dc:create() +sfc:softmax(fc) +print(sfc) +sdc:softmax(dc) +print(sdc) diff --git a/nerv/examples/mmatrix_example.lua b/nerv/examples/mmatrix_example.lua new file mode 100644 index 0000000..8ddfe84 --- /dev/null +++ b/nerv/examples/mmatrix_example.lua @@ -0,0 +1,20 @@ +m = 10 +n = 10 +fm = nerv.MMatrixFloat(m, n) +dm = nerv.MMatrixDouble(m, n) +for i = 0, m - 1 do + for j = 0, n - 1 do + -- local t = math.random(10) + t = i / (j + 1) + fm[i][j] = t + dm[i][j] = t + end +end +print("test fm:get_dataref_value:", fm:get_dataref_value()) +print("forced a garbade collect") +collectgarbage("collect") +print("test fm:get_dataref_value:", fm:get_dataref_value()) +print(fm) +-- print(fm:softmax()) +print(dm) +-- print(dm:softmax()) diff --git a/nerv/examples/oop_example.c b/nerv/examples/oop_example.c new file mode 100644 index 0000000..59dfc5a --- /dev/null +++ b/nerv/examples/oop_example.c @@ -0,0 +1,101 @@ +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include "../common.h" + +#define SQR(x) ((x) * (x)) + +const char *point_tname = "nerv.Point"; +const char *better_point_tname = "nerv.BetterPoint"; + +typedef struct { + double x, y; +} Point; + +static int point_norm (lua_State *L) { + Point *p = luaT_checkudata(L, 1, point_tname); + lua_pushnumber(L, sqrt(SQR(p->x) + SQR(p->y))); + return 1; +} + +static int point_set_x (lua_State *L) { + Point *p = luaT_checkudata(L, 1, point_tname); + p->x = luaL_checknumber(L, 2); + return 0; +} + +static int point_set_y (lua_State *L) { + Point *p = luaT_checkudata(L, 1, point_tname); + p->y = luaL_checknumber(L, 2); + return 0; +} + +/* generic constructor */ +void point_new_(Point *self, double x, double y) { + self->x = x; + self->y = y; +} + +int point_new(lua_State *L) { + /* `_new` function should create the object itself */ + Point *self = (Point *)malloc(sizeof(Point)); + point_new_(self, luaL_checknumber(L, 1), luaL_checknumber(L, 2)); + luaT_pushudata(L, self, point_tname); + fprintf(stderr, "[example] %s constructor is invoked\n", + point_tname); + return 1; +} + +static const luaL_Reg point_methods[] = { + {"set_x", point_set_x}, + {"set_y", point_set_y}, + {"norm", point_norm}, + {NULL, NULL} +}; + + +/* the subclass method overrides the one from baseclass */ +static int better_point_norm (lua_State *L) { + Point *p = luaT_checkudata(L, 1, point_tname); + lua_pushnumber(L, fabs(p->x) + fabs(p->y)); + return 1; +} + +int better_point_new(lua_State *L) { + /* `_new` function should create the object itself */ + Point *self = (Point *)malloc(sizeof(Point)); + point_new_(self, luaL_checknumber(L, 1), luaL_checknumber(L, 2)); + luaT_pushudata(L, self, better_point_tname); + fprintf(stderr, "[example] %s constructor is invoked\n", + better_point_tname); + return 1; +} + +static const luaL_Reg better_point_methods[] = { + {"norm", better_point_norm}, + {NULL, NULL} +}; + +void nerv_point_init(lua_State *L) { + /* create a class and let luaT know */ + luaT_newmetatable(L, point_tname, NULL, point_new, NULL, NULL); + /* register member functions */ + luaL_register(L, NULL, point_methods); + /* keep the stack balanced, see `nerv.c` */ + lua_pop(L, 1); +} + +void nerv_better_point_init(lua_State *L) { + /* create a class and let luaT know */ + luaT_newmetatable(L, better_point_tname, point_tname, + better_point_new, NULL, NULL); + /* register member functions */ + luaL_register(L, NULL, better_point_methods); + /* keep the stack balanced, see `nerv.c` */ + lua_pop(L, 1); +} + +void nerv_example_init(lua_State *L) { + nerv_point_init(L); + nerv_better_point_init(L); +} diff --git a/nerv/examples/oop_example.lua b/nerv/examples/oop_example.lua new file mode 100644 index 0000000..b753288 --- /dev/null +++ b/nerv/examples/oop_example.lua @@ -0,0 +1,16 @@ +p = nerv.Point(0, 0) -- create a Point instance +print(p) +print(p:norm()) -- get 2-norm of the Point +p:set_x(1.0) +p:set_y(2.0) +print(p:norm()) -- get 2-norm of the Point + +bp = nerv.BetterPoint(1, 2) +-- use methods from base class +bp:set_x(1.0) +bp:set_y(2.0) +print(bp) +print(bp:norm()) --get 1-norm of the Point + +print(p.__typename) +print(bp.__typename) diff --git a/nerv/examples/swb_baseline.lua b/nerv/examples/swb_baseline.lua new file mode 100644 index 0000000..8b7e01a --- /dev/null +++ b/nerv/examples/swb_baseline.lua @@ -0,0 +1,166 @@ +require 'speech.init' +gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + frm_ext = 5, + tr_scp = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", + cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp", + htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", + initialized_param = {"/slfs1/users/mfy43/swb_init.nerv", + "/slfs1/users/mfy43/swb_global_transf.nerv"}, + debug = false} + +function make_sublayer_repo(param_repo) + return nerv.LayerRepo( + { + -- global transf + ["nerv.BiasLayer"] = + { + blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, + blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, + wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} + }, + -- biased linearity + ["nerv.AffineLayer"] = + { + affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, + {dim_in = {429}, dim_out = {2048}}}, + affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, + {dim_in = {2048}, dim_out = {3001}}} + }, + ["nerv.SigmoidLayer"] = + { + sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} + }, + ["nerv.SoftmaxCELayer"] = + { + ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}} + } + }, param_repo, gconf) +end + +function make_layer_repo(sublayer_repo, param_repo) + return nerv.LayerRepo( + { + ["nerv.DAGLayer"] = + { + global_transf = {{}, { + dim_in = {429}, dim_out = {429}, + sub_layers = sublayer_repo, + connections = { + ["<input>[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "blayer2[1]", + ["blayer2[1]"] = "wlayer2[1]", + ["wlayer2[1]"] = "<output>[1]" + } + }}, + main = {{}, { + dim_in = {429, 1}, dim_out = {1}, + sub_layers = sublayer_repo, + connections = { + ["<input>[1]"] = "affine0[1]", + ["affine0[1]"] = "sigmoid0[1]", + ["sigmoid0[1]"] = "affine1[1]", + ["affine1[1]"] = "sigmoid1[1]", + ["sigmoid1[1]"] = "affine2[1]", + ["affine2[1]"] = "sigmoid2[1]", + ["sigmoid2[1]"] = "affine3[1]", + ["affine3[1]"] = "sigmoid3[1]", + ["sigmoid3[1]"] = "affine4[1]", + ["affine4[1]"] = "sigmoid4[1]", + ["sigmoid4[1]"] = "affine5[1]", + ["affine5[1]"] = "sigmoid5[1]", + ["sigmoid5[1]"] = "affine6[1]", + ["affine6[1]"] = "sigmoid6[1]", + ["sigmoid6[1]"] = "affine7[1]", + ["affine7[1]"] = "ce_crit[1]", + ["<input>[2]"] = "ce_crit[2]", + ["ce_crit[1]"] = "<output>[1]" + } + }} + } + }, param_repo, gconf) +end + +function get_criterion_layer(sublayer_repo) + return sublayer_repo:get_layer("ce_crit") +end + +function get_network(layer_repo) + return layer_repo:get_layer("main") +end + +function make_readers(scp_file, layer_repo) + return { + {reader = nerv.TNetReader(gconf, + { + id = "main_scp", + scp_file = scp_file, + conf_file = gconf.htk_conf, + frm_ext = gconf.frm_ext, + mlfs = { + phone_state = { + file = "/slfs1/users/mfy43/swb_ivec/ref.mlf", + format = "map", + format_arg = "/slfs1/users/mfy43/swb_ivec/dict", + dir = "*/", + ext = "lab" + } + }, + global_transf = layer_repo:get_layer("global_transf") + }), + data = {main_scp = 429, phone_state = 1}} + } +end + +function make_buffer(readers) + return nerv.SGDBuffer(gconf, + { + buffer_size = gconf.buffer_size, + randomize = gconf.randomize, + readers = readers + }) +end + +function get_input_order() + return {"main_scp", "phone_state"} +end + +function get_accuracy(sublayer_repo) + local ce_crit = sublayer_repo:get_layer("ce_crit") + return ce_crit.total_correct / ce_crit.total_frames * 100 +end + +function print_stat(sublayer_repo) + local ce_crit = sublayer_repo:get_layer("ce_crit") + nerv.info("*** training stat begin ***") + nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce) + nerv.printf("correct:\t\t%d\n", ce_crit.total_correct) + nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames) + nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames) + nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(sublayer_repo)) + nerv.info("*** training stat end ***") +end diff --git a/nerv/examples/test_dnn_layers.lua b/nerv/examples/test_dnn_layers.lua new file mode 100644 index 0000000..64c0dec --- /dev/null +++ b/nerv/examples/test_dnn_layers.lua @@ -0,0 +1,78 @@ +require 'layer.affine' +require 'layer.sigmoid' +require 'layer.softmax_ce' + +global_conf = {lrate = 0.8, wcost = 1e-6, + momentum = 0.9, cumat_type = nerv.CuMatrixFloat} + +pf = nerv.ChunkFile("affine.param", "r") +ltp = pf:read_chunk("a", global_conf) +bp = pf:read_chunk("b", global_conf) + +-- print(bp.trans) + +af = nerv.AffineLayer("test", global_conf, {["ltp"] = ltp, + ["bp"] = bp, + dim_in = {429}, + dim_out = {2048}}) +sg = nerv.SigmoidLayer("test2", global_conf, {dim_in = {2048}, + dim_out = {2048}}) +sm = nerv.SoftmaxCELayer("test3", global_conf, {dim_in = {2048, 2048}, + dim_out = {}}) +af:init() +sg:init() +sm:init() + +df = nerv.ChunkFile("input.param", "r") + +label = nerv.CuMatrixFloat(10, 2048) +label:fill(0) +for i = 0, 9 do + label[i][i] = 1.0 +end + +input1 = {df:read_chunk("input", global_conf).trans} +output1 = {nerv.CuMatrixFloat(10, 2048)} +input2 = output1 +output2 = {nerv.CuMatrixFloat(10, 2048)} +input3 = {output2[1], label} +output3 = {} +err_input1 = {} +err_output1 = {nerv.CuMatrixFloat(10, 2048)} +err_input2 = err_output1 +err_output2 = {nerv.CuMatrixFloat(10, 2048)} +err_input3 = err_output2 +err_output3 = {input1[1]:create()} + +for i = 0, 3 do + -- propagate + af:propagate(input1, output1) + sg:propagate(input2, output2) + sm:propagate(input3, output3) + + -- back_propagate + sm:back_propagate(err_output1, err_input1, input3, output3) + sg:back_propagate(err_output2, err_input2, input2, output2) + af:back_propagate(err_output3, err_input3, input1, output1) + + -- update + sm:update(err_input1, input3, output3) + sg:update(err_input2, input2, output2) + af:update(err_input3, input1, output1) + + + print("output1") + print(output1[1]) + print("output2") + print(output2[1]) + print("err_output1") + print(err_output1[1]) + print("err_output2") + print(err_output2[1]) + nerv.printf("cross entropy: %.8f\n", sm.total_ce) + nerv.printf("frames: %.8f\n", sm.total_frames) +end +print("linear") +print(af.ltp.trans) +print("linear2") +print(af.bp.trans) diff --git a/nerv/examples/test_nn_lib.lua b/nerv/examples/test_nn_lib.lua new file mode 100644 index 0000000..5444810 --- /dev/null +++ b/nerv/examples/test_nn_lib.lua @@ -0,0 +1,164 @@ +require 'speech.init' +gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, + cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + batch_size = 256} + +param_repo = nerv.ParamRepo({"converted.nerv", "global_transf.nerv"}) +sublayer_repo = nerv.LayerRepo( + { + -- global transf + ["nerv.BiasLayer"] = + { + blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, + blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, + wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} + }, + -- biased linearity + ["nerv.AffineLayer"] = + { + affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, + {dim_in = {429}, dim_out = {2048}}}, + affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, + {dim_in = {2048}, dim_out = {2048}}}, + affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, + {dim_in = {2048}, dim_out = {3001}}} + }, + ["nerv.SigmoidLayer"] = + { + sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, + sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} + }, + ["nerv.SoftmaxCELayer"] = + { + softmax_ce0 = {{}, {dim_in = {3001, 1}, dim_out = {}, compressed = true}} + } + }, param_repo, gconf) + +layer_repo = nerv.LayerRepo( + { + ["nerv.DAGLayer"] = + { + global_transf = {{}, { + dim_in = {429}, dim_out = {429}, + sub_layers = sublayer_repo, + connections = { + ["<input>[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "blayer2[1]", + ["blayer2[1]"] = "wlayer2[1]", + ["wlayer2[1]"] = "<output>[1]" + } + }}, + main = {{}, { + dim_in = {429, 1}, dim_out = {}, + sub_layers = sublayer_repo, + connections = { + ["<input>[1]"] = "affine0[1]", + ["affine0[1]"] = "sigmoid0[1]", + ["sigmoid0[1]"] = "affine1[1]", + ["affine1[1]"] = "sigmoid1[1]", + ["sigmoid1[1]"] = "affine2[1]", + ["affine2[1]"] = "sigmoid2[1]", + ["sigmoid2[1]"] = "affine3[1]", + ["affine3[1]"] = "sigmoid3[1]", + ["sigmoid3[1]"] = "affine4[1]", + ["affine4[1]"] = "sigmoid4[1]", + ["sigmoid4[1]"] = "affine5[1]", + ["affine5[1]"] = "sigmoid5[1]", + ["sigmoid5[1]"] = "affine6[1]", + ["affine6[1]"] = "sigmoid6[1]", + ["sigmoid6[1]"] = "affine7[1]", + ["affine7[1]"] = "softmax_ce0[1]", + ["<input>[2]"] = "softmax_ce0[2]" + } + }} + } + }, param_repo, gconf) + +tnet_reader = nerv.TNetReader(gconf, + { + id = "main_scp", + scp_file = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", +-- scp_file = "t.scp", + conf_file = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", + frm_ext = 5, + mlfs = { + ref = { + file = "/slfs1/users/mfy43/swb_ivec/ref.mlf", + format = "map", + format_arg = "/slfs1/users/mfy43/swb_ivec/dict", + dir = "*/", + ext = "lab" + } + }, + global_transf = layer_repo:get_layer("global_transf") + }) + +buffer = nerv.SGDBuffer(gconf, + { + buffer_size = 81920, + randomize = true, + readers = { + { reader = tnet_reader, + data = {main_scp = 429, ref = 1}} + } + }) + +sm = sublayer_repo:get_layer("softmax_ce0") +main = layer_repo:get_layer("main") +main:init(gconf.batch_size) +gconf.cnt = 0 +-- data = buffer:get_data() +-- input = {data.main_scp, data.ref} +-- while true do +for data in buffer.get_data, buffer do +-- if gconf.cnt == 100 then break end +-- gconf.cnt = gconf.cnt + 1 + + input = {data.main_scp, data.ref} + output = {} + err_input = {} + err_output = {input[1]:create()} + + main:propagate(input, output) + main:back_propagate(err_output, err_input, input, output) + main:update(err_input, input, output) + +-- nerv.printf("cross entropy: %.8f\n", sm.total_ce) +-- nerv.printf("correct: %d\n", sm.total_correct) +-- nerv.printf("frames: %d\n", sm.total_frames) +-- nerv.printf("err/frm: %.8f\n", sm.total_ce / sm.total_frames) +-- nerv.printf("accuracy: %.8f\n", sm.total_correct / sm.total_frames) + collectgarbage("collect") +end +nerv.printf("cross entropy: %.8f\n", sm.total_ce) +nerv.printf("correct: %d\n", sm.total_correct) +nerv.printf("accuracy: %.3f%%\n", sm.total_correct / sm.total_frames * 100) +nerv.printf("writing back...\n") +cf = nerv.ChunkFile("output.nerv", "w") +for i, p in ipairs(main:get_params()) do + print(p) + cf:write_chunk(p) +end +cf:close() +nerv.Matrix.print_profile() diff --git a/nerv/init.lua b/nerv/init.lua new file mode 100644 index 0000000..89010a7 --- /dev/null +++ b/nerv/init.lua @@ -0,0 +1,128 @@ +require 'libnerv' + +function nerv.error(fmt, ...) + error(nerv.printf("[nerv] internal error: " .. fmt .. "\n", ...)) +end + +function nerv.error_method_not_implemented() + nerv.error("method not implemented"); +end + +function nerv.printf(fmt, ...) + io.write(string.format(fmt, ...)) +end + +function nerv.mesg_with_timestamp(fmt, ...) + nerv.printf( + string.format("(%s)[nerv] info: %s\n", + os.date("%H:%M:%S %F"), fmt), ...) +end + +function nerv.info(fmt, ...) + nerv.printf( + string.format("(%s)[nerv] info: %s\n", + os.date("%H:%M:%S %F"), fmt), ...) +end + +function nerv.warning(fmt, ...) + nerv.printf( + string.format("(%s)[nerv] warning: %s\n", + os.date("%H:%M:%S %F"), fmt), ...) +end + +-- Torch C API wrapper +function nerv.class(tname, parenttname) + + local function constructor(...) + local self = {} + nerv.setmetatable(self, tname) + if self.__init then + self:__init(...) + end + return self + end + + local function factory() + local self = {} + nerv.setmetatable(self, tname) + return self + end + + local mt = nerv.newmetatable(tname, parenttname, constructor, nil, factory) + local mpt + if parenttname then + mpt = nerv.getmetatable(parenttname) + end + return mt, mpt +end + +function table.val_to_str(v) + if "string" == type(v) then + v = string.gsub(v, "\n", "\\n") + if string.match(string.gsub(v,"[^'\"]",""), '^"+$') then + return "'" .. v .. "'" + end + return '"' .. string.gsub(v,'"', '\\"') .. '"' + else + return "table" == type(v) and table.tostring(v) or + tostring(v) + end +end + +function table.key_to_str (k) + if "string" == type(k) and string.match(k, "^[_%a][_%a%d]*$") then + return k + else + return "[" .. table.val_to_str(k) .. "]" + end +end + +function table.tostring(tbl) + local result, done = {}, {} + for k, v in ipairs(tbl) do + table.insert(result, table.val_to_str(v)) + done[k] = true + end + for k, v in pairs(tbl) do + if not done[k] then + table.insert(result, + table.key_to_str(k) .. "=" .. table.val_to_str(v)) + end + end + return "{" .. table.concat(result, ",") .. "}" +end + +function nerv.get_type(tname) + return assert(loadstring("return " .. tname))() +end + +function nerv.is_type(obj, tname) + local mt0 = nerv.getmetatable(tname) + local mt = getmetatable(obj) + while mt do + if mt == mt0 then + return true + end + mt = getmetatable(mt) + end + return false +end + +function nerv.dirname(filename) + if filename:match(".-/.-") then + local name = string.gsub(filename, "(.*/)(.*)", "%1") + return name + else + return '' + end +end + +function nerv.include(filename) + local caller = debug.getinfo(2, "S").source:sub(2) + dofile(nerv.dirname(caller) .. filename) +end + +nerv.include('matrix/init.lua') +nerv.include('io/init.lua') +nerv.include('layer/init.lua') +nerv.include('nn/init.lua') diff --git a/nerv/io/chunk_file.c b/nerv/io/chunk_file.c new file mode 100644 index 0000000..c0b6b9f --- /dev/null +++ b/nerv/io/chunk_file.c @@ -0,0 +1,325 @@ +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include "../common.h" +#include "chunk_file.h" + +#define INVALID_FORMAT_ERROR(fn) \ + nerv_error(L, "Invalid chunk file: %s", fn) +#define CHECK_FORMAT(exp, ret, fname) \ + do { \ + if ((exp) != (ret)) INVALID_FORMAT_ERROR(fn); \ + } while (0) +#define CHECK_FILE_OPEN(pfh) \ + do { \ + if ((pfh)->closed) \ + nerv_error(L, "operations on a closed file"); \ + } while (0) + +const char *nerv_chunk_file_tname = "nerv.ChunkFile"; +const char *nerv_chunk_file_handle_tname = "nerv.ChunkFileHandle"; +const char *nerv_chunk_info_tname = "nerv.ChunkInfo"; +const char *nerv_chunk_data_tname = "nerv.ChunkData"; + +#define PARAM_HEADER_SIZE 16 + +enum { + NORMAL, + INVALID_FORMAT, + END_OF_FILE, + SECTION_OVERFLOW, + WRITE_ERROR +}; + +size_t read_chunk_header_plain(FILE *fp, int *status) { + static char buff[PARAM_HEADER_SIZE]; + int i; + size_t size = 0; + *status = NORMAL; + if (fread(buff, 1, PARAM_HEADER_SIZE, fp) != PARAM_HEADER_SIZE) + { + if (feof(fp)) *status = END_OF_FILE; + else *status = INVALID_FORMAT; + } + for (i = 0; i < PARAM_HEADER_SIZE; i++) + if (isdigit(buff[i])) + size = size * 10 + buff[i] - '0'; +/* fprintf(stderr, "header: %lu\n", size); */ + return size; +} + +#define CHECK_WRITE(status) \ + do { \ + if (status == SECTION_OVERFLOW) \ + nerv_error(L, "section overflowed"); \ + else if (status == WRITE_ERROR) \ + nerv_error(L, "error while writing"); \ + } while (0) + +void write_chunk_header_plain(FILE *fp, size_t size, int *status) { + static char buff[PARAM_HEADER_SIZE]; + int i; + *status = NORMAL; + for (i = PARAM_HEADER_SIZE - 3; i > 0; i--, size /= 10) + buff[i] = size % 10 + '0'; + if (size) + { + *status = SECTION_OVERFLOW; + return; + } + buff[0] = '['; + buff[PARAM_HEADER_SIZE - 2] = ']'; + buff[PARAM_HEADER_SIZE - 1] = '\n'; + if (fwrite(buff, 1, PARAM_HEADER_SIZE, fp) != PARAM_HEADER_SIZE) + { + *status = WRITE_ERROR; + return; + } +} + +ChunkData *get_chunk_data(FILE *fp, ChunkInfo *info) { + ChunkData *pcd = (ChunkData *)malloc(sizeof(ChunkData)); + pcd->data = (char *)malloc(info->length); + pcd->fp = fmemopen(pcd->data, info->length, "r"); + assert(fseeko(fp, info->offset, SEEK_SET) == 0); + if (fread(pcd->data, 1, info->length, fp) != (size_t)info->length) + return NULL; + return pcd; +} + +const char *read_chunk_metadata(lua_State *L, FILE *fp, const char *fn) { +#define LINEBUFF_SIZE 1024 + static char buff[7 + LINEBUFF_SIZE] = "return "; + CHECK_FORMAT(fgets(buff + 7, LINEBUFF_SIZE, fp), buff + 7, fn); + /* fprintf(stderr, "metadata: %s\n", buff); */ + return buff; +} + +void write_chunk_metadata(FILE *fp, const char *metadata_str, int *status) { + size_t size = strlen(metadata_str); + *status = NORMAL; + if (fwrite(metadata_str, 1, size, fp) != size || + fprintf(fp, "\n") < 0) + { + *status = WRITE_ERROR; + return; + } + /* fprintf(stderr, "metadata: %s\n", metadata_str); */ +} + + +int nerv_chunk_file_open_write(lua_State *L, const char *fn) { + FILE *fp = fopen(fn, "w"); + ChunkFileHandle *lfp; + if (!fp) nerv_error(L, "Error while opening chunk file: %s", fn); + lfp = (ChunkFileHandle *)malloc(sizeof(ChunkFileHandle)); + lfp->fp = fp; + lfp->closed = 0; + luaT_pushudata(L, lfp, nerv_chunk_file_handle_tname); + lua_setfield(L, -2, "handle"); + luaT_pushmetatable(L, nerv_chunk_file_tname); + lua_setmetatable(L, -2); + return 1; +} + +int nerv_chunk_file_open_read(lua_State *L, const char *fn) { + FILE *fp = fopen(fn, "r"); + int i, status; + size_t chunk_len; + off_t offset; + ChunkFileHandle *lfp; + + if (!fp) nerv_error(L, "Error while opening chunk file: %s", fn); + offset = ftello(fp); + lua_newtable(L); + /* fprintf(stderr, "%d\n", (int)offset); */ + for (i = 0;; offset += chunk_len, i++) + { + ChunkInfo *pci; + /* fprintf(stderr, "reading chunk %d from %d\n", i, (int)offset); */ + /* skip to the begining of chunk i */ + CHECK_FORMAT(fseeko(fp, offset, SEEK_SET), 0, fn); + /* read header */ + chunk_len = read_chunk_header_plain(fp, &status); + if (status == END_OF_FILE) break; + else if (status == INVALID_FORMAT) + INVALID_FORMAT_ERROR(fn); + /* read metadata */ + luaL_loadstring(L, read_chunk_metadata(L, fp, fn)); + CHECK_FORMAT(lua_pcall(L, 0, 1, 0), 0, fn); + CHECK_FORMAT(lua_istable(L, -1), 1, fn); + /* stack: obj_table, metadata */ + /* chunk info */ + pci = (ChunkInfo *)malloc(sizeof(ChunkInfo)); + pci->offset = ftello(fp); + pci->length = chunk_len - (pci->offset - offset); + /* fprintf(stderr, "%d + %d (skip %lu)\n", (int)pci->offset, + (int)pci->length, chunk_len); */ + luaT_pushudata(L, pci, nerv_chunk_info_tname); + lua_setfield(L, -2, "chunk"); + /* stack: obj_table, metadata */ + /* get id */ + lua_getfield(L, -1, "id"); + /* stack: obj_table, metadata, id */ + if (!lua_isstring(L, -1)) + nerv_error(L, "id field in metadata must be a string"); + lua_pushvalue(L, -1); + /* stack: obj_table, metadata, id, id */ + lua_gettable(L, -4); + /* stack: obj_table, metadata, id, obj[id] */ + if (!lua_isnil(L, -1)) + nerv_error(L, "conflicting id"); + lua_pop(L, 1); + /* stack: obj_table, metadata, id */ + lua_pushvalue(L, -2); + /* stack: obj_table, metadata, id, metadata */ + lua_settable(L, -4); + /* stack: obj_table, metadata */ + lua_pop(L, 1); + } + lua_setfield(L, -2, "metadata"); + lfp = (ChunkFileHandle *)malloc(sizeof(ChunkFileHandle)); + lfp->fp = fp; + lfp->closed = 0; + luaT_pushudata(L, lfp, nerv_chunk_file_handle_tname); + lua_setfield(L, -2, "handle"); + luaT_pushmetatable(L, nerv_chunk_file_tname); + lua_setmetatable(L, -2); + return 1; +} + +int nerv_chunk_file_new_(lua_State *L, const char *fn, const char *mode) { + int rd = 1, bin = 0; + size_t i, len = strlen(mode); + for (i = 0; i < len; i++) + switch (mode[i]) + { + case 'r': rd = 1; break; + case 'w': rd = 0; break; + case 'b': bin = 1; break; + } + return rd ? nerv_chunk_file_open_read(L, fn) : \ + nerv_chunk_file_open_write(L, fn); +} + +int nerv_chunk_file___init(lua_State *L) { + lua_pushvalue(L, 1); + return nerv_chunk_file_new_(L, luaL_checkstring(L, 2), + luaL_checkstring(L, 3)); +} + +int nerv_chunk_file_new(lua_State *L) { + lua_newtable(L); + return nerv_chunk_file_new_(L, luaL_checkstring(L, 1), + luaL_checkstring(L, 2)); +} + +int nerv_chunk_file_write_chunkdata(lua_State *L) { + ChunkFileHandle *pfh; + int status; + off_t start; + size_t size; + const char *metadata_str = lua_tolstring(L, 2, NULL); + lua_getfield(L, 1, "handle"); + pfh = luaT_checkudata(L, -1, nerv_chunk_file_handle_tname); + CHECK_FILE_OPEN(pfh); + start = ftello(pfh->fp); + write_chunk_header_plain(pfh->fp, 0, &status); /* fill zeros */ + CHECK_WRITE(status); + write_chunk_metadata(pfh->fp, metadata_str, &status); + CHECK_WRITE(status); + lua_pushvalue(L, 3); + lua_getfield(L, -1, "write"); + if (!lua_isfunction(L, -1)) + nerv_error(L, "\"write\" method must be implemented"); + lua_pushvalue(L, -2); + lua_pushvalue(L, 4); /* pass handle as parameter to write() */ + lua_call(L, 2, 0); /* let the write() to write */ + lua_pop(L, 1); + size = ftello(pfh->fp) - start; + fseeko(pfh->fp, start, SEEK_SET); + /* write the calced size */ + write_chunk_header_plain(pfh->fp, size, &status); + CHECK_WRITE(status); + fseeko(pfh->fp, 0, SEEK_END); + return 0; +} + +int nerv_chunk_file_get_chunkdata(lua_State *L) { + ChunkFileHandle *pfh; + ChunkInfo *pci; + ChunkData *pcd; + const char *id = luaL_checkstring(L, 2); + + lua_getfield(L, 1, "handle"); + pfh = luaT_checkudata(L, -1, nerv_chunk_file_handle_tname); + CHECK_FILE_OPEN(pfh); + lua_pop(L, 1); /* pop handle */ + lua_getfield(L, 1, "metadata"); + /* now stack: self, k, metadata */ + lua_getfield(L, -1, id); + /* now stack: self, k, metadata, kth{} */ + if (lua_isnil(L, -1)) /* no chunck with the id */ + return 0; + lua_getfield(L, -1, "chunk"); + pci = luaT_checkudata(L, -1, nerv_chunk_info_tname); + if (!(pcd = get_chunk_data(pfh->fp, pci))) + nerv_error(L, "unexpected end of file"); + luaT_pushudata(L, pcd, nerv_chunk_data_tname); + return 1; +} + +int nerv_chunk_file_close(lua_State *L) { + ChunkFileHandle *pfh; + lua_getfield(L, 1, "handle"); + pfh = luaT_checkudata(L, -1, nerv_chunk_file_handle_tname); + CHECK_FILE_OPEN(pfh); + fclose(pfh->fp); + pfh->closed = 1; + return 0; +} + +int nerv_chunk_file_handle_destroy(lua_State *L) { + ChunkFileHandle *pfh = luaT_checkudata(L, 1, + nerv_chunk_file_handle_tname); + if (!pfh->closed) fclose(pfh->fp); + free(pfh); + return 0; +} + +static int nerv_chunk_info_destroy(lua_State *L) { + ChunkInfo *pci = luaT_checkudata(L, 1, nerv_chunk_info_tname); + free(pci); + return 0; +} + +static int nerv_chunk_data_destroy(lua_State *L) { + ChunkData *pcd = luaT_checkudata(L, 1, nerv_chunk_data_tname); + fclose(pcd->fp); + free(pcd->data); + free(pcd); + return 0; +} + +static const luaL_Reg nerv_chunk_file_methods[] = { + {"get_chunkdata", nerv_chunk_file_get_chunkdata}, + {"_write_chunkdata", nerv_chunk_file_write_chunkdata}, + {"close", nerv_chunk_file_close}, + {"__init", nerv_chunk_file___init}, + {NULL, NULL} +}; + +void nerv_chunk_file_init(lua_State *L) { + luaT_newmetatable(L, nerv_chunk_file_tname, NULL, + nerv_chunk_file_new, + NULL, NULL); + luaL_register(L, NULL, nerv_chunk_file_methods); + lua_pop(L, 1); + luaT_newmetatable(L, nerv_chunk_file_handle_tname, NULL, + NULL, nerv_chunk_file_handle_destroy, NULL); + luaT_newmetatable(L, nerv_chunk_info_tname, NULL, + NULL, nerv_chunk_info_destroy, NULL); + luaT_newmetatable(L, nerv_chunk_data_tname, NULL, + NULL, nerv_chunk_data_destroy, NULL); +} + diff --git a/nerv/io/chunk_file.h b/nerv/io/chunk_file.h new file mode 100644 index 0000000..9bae59d --- /dev/null +++ b/nerv/io/chunk_file.h @@ -0,0 +1,23 @@ +#ifndef NERV_LAYER_FILE_H +#define NERV_LAYER_FILE_H + +extern const char *nerv_chunk_file_tname; +extern const char *nerv_chunk_file_handle_tname; +extern const char *nerv_chunk_info_tname; +extern const char *nerv_chunk_data_tname; + +typedef struct ChunkFileHandle { + FILE *fp; + int closed; +} ChunkFileHandle; + +typedef struct ChunkInfo { + off_t offset, length; +} ChunkInfo; + +typedef struct ChunkData { + FILE *fp; + char *data; +} ChunkData; + +#endif diff --git a/nerv/io/init.c b/nerv/io/init.c new file mode 100644 index 0000000..70585f7 --- /dev/null +++ b/nerv/io/init.c @@ -0,0 +1,6 @@ +#include "../common.h" + +extern void nerv_chunk_file_init(lua_State *L); +void nerv_io_init(lua_State *L) { + nerv_chunk_file_init(L); +} diff --git a/nerv/io/init.lua b/nerv/io/init.lua new file mode 100644 index 0000000..647ff93 --- /dev/null +++ b/nerv/io/init.lua @@ -0,0 +1,55 @@ +function nerv.ChunkFile:write_chunkdata(metadata, writer) + if type(metadata) ~= "table" then + nerv.error("metadata should be a Lua table") + return + end + return self:_write_chunkdata(table.tostring(metadata), writer) +end + +function nerv.ChunkFile:write_chunk(chunk) + local id = chunk.id + local type = chunk.__typename + if id == nil then + nerv.error("id of chunk %s must be specified", type) + end + self:write_chunkdata({id = id, + type = type, + info = chunk:get_info()}, chunk) +end + +function nerv.ChunkFile:read_chunk(id, global_conf) + if self.metadata == nil then + nerv.error("wrong file opening mode") + end + local metadata = self.metadata[id] + if metadata == nil then + nerv.error("chunk with id %s does not exist", id) + end + local chunk_type = nerv.get_type(metadata.type) + local chunk = chunk_type(id, global_conf) + chunk:set_info(metadata.info) + chunk:read(self:get_chunkdata(id)) + return chunk +end + +local DataReader = nerv.class("nerv.DataReader") + +function DataReader:__init(global_conf, reader_conf) + nerv.error_method_not_implemented() +end + +function DataReader:get_data() + nerv.error_method_not_implemented() +end + +local DataBuffer = nerv.class("nerv.DataBuffer") + +function DataBuffer:__init(global_conf, buffer_conf) + nerv.error_method_not_implemented() +end + +function DataBuffer:get_batch() + nerv.error_method_not_implemented() +end + +nerv.include('sgd_buffer.lua') diff --git a/nerv/io/sgd_buffer.lua b/nerv/io/sgd_buffer.lua new file mode 100644 index 0000000..f4f7dfe --- /dev/null +++ b/nerv/io/sgd_buffer.lua @@ -0,0 +1,111 @@ +local SGDBuffer = nerv.class("nerv.SGDBuffer", "nerv.DataBuffer") + +function SGDBuffer:__init(global_conf, buffer_conf) + self.gconf = global_conf + self.buffer_size = math.floor(buffer_conf.buffer_size / + global_conf.batch_size) * global_conf.batch_size + self.randomize = buffer_conf.randomize + if self.randomize == nil then + self.randomize = false + end + self.head = 0 + self.tail = 0 + self.readers = {} + for i, reader_spec in ipairs(buffer_conf.readers) do + local buffs = {} + for id, width in pairs(reader_spec.data) do + buffs[id] = {data = global_conf.mmat_type(self.buffer_size, width), + leftover = nil, + width = width} + end + table.insert(self.readers, {buffs = buffs, + reader = reader_spec.reader, + tail = 0, + has_leftover = false}) + end +end + +function SGDBuffer:saturate() + local buffer_size = self.buffer_size + self.head = 0 + self.tail = buffer_size + for i, reader in ipairs(self.readers) do + reader.tail = 0 + if reader.has_leftover then + local lrow + for id, buff in pairs(reader.buffs) do + lrow = buff.leftover:nrow() + if lrow > buffer_size then + nerv.error("buffer size is too small to contain leftovers") + end + buff.data:copy_from(buff.leftover, 0, lrow) + buff.leftover = nil + end + nerv.printf("leftover: %d\n", lrow) + reader.tail = lrow + reader.has_leftover = false + end + while reader.tail < buffer_size do + local data = reader.reader:get_data() + if data == nil then + break + end + local drow = nil + for id, d in pairs(data) do + if drow == nil then + drow = d:nrow() + elseif d:nrow() ~= drow then + nerv.error("reader provides with inconsistent rows of data") + end + end + local remain = buffer_size - reader.tail + if drow > remain then + for id, buff in pairs(reader.buffs) do + local d = data[id] + if d == nil then + nerv.error("reader does not provide data for %s", id) + end + buff.leftover = self.gconf.mmat_type(drow - remain, + buff.width) + buff.leftover:copy_from(d, remain, drow) + end + drow = remain + reader.has_leftover = true + end + for id, buff in pairs(reader.buffs) do + buff.data:copy_from(data[id], 0, drow, reader.tail) + end + reader.tail = reader.tail + drow + end + self.tail = math.min(self.tail, reader.tail) + end + self.rand_map = nerv.MMatrixInt.perm_gen(self.tail) -- generate shuffled index + collectgarbage("collect") + return self.tail >= self.gconf.batch_size +end + +function SGDBuffer:get_data() + local batch_size = self.gconf.batch_size + if self.head >= self.tail then -- buffer is empty + if not self:saturate() then + return nil -- the remaining data cannot build a batch + end + end + if self.head + batch_size > self.tail then + return nil -- the remaining data cannot build a batch + end + local res = {} + for i, reader in ipairs(self.readers) do + for id, buff in pairs(reader.buffs) do + local batch = self.gconf.cumat_type(batch_size, buff.width) + if self.randomize then + batch:copy_rows_fromh_by_idx(buff.data, self.rand_map, self.head) + else + batch:copy_fromh(buff.data, self.head, self.head + batch_size) + end + res[id] = batch + end + end + self.head = self.head + batch_size + return res +end diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua new file mode 100644 index 0000000..00cbcfb --- /dev/null +++ b/nerv/layer/affine.lua @@ -0,0 +1,91 @@ +local MatrixParam = nerv.class('nerv.MatrixParam', 'nerv.Param') +local LinearTransParam = nerv.class('nerv.LinearTransParam', 'nerv.MatrixParam') +local BiasParam = nerv.class('nerv.BiasParam', 'nerv.MatrixParam') +local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer') + +function MatrixParam:read(handle) + self.trans = self.gconf.cumat_type.new_from_host( + nerv.MMatrixFloat.load(handle)) +end + +function MatrixParam:write(handle) + self.trans:new_to_host():save(handle) +end + +function MatrixParam:train_init() + self.correction = self.trans:create() + self.correction:fill(0) +end + +function MatrixParam:update(gradient) + local gconf = self.gconf + self.correction:add(self.correction, gradient, gconf.momentum, 1.0) + -- momentum gain + local mmt_gain = 1.0 / (1.0 - gconf.momentum); + local n = self.gconf.batch_size * mmt_gain + -- perform update + self.trans:add(self.trans, self.correction, 1.0, -gconf.lrate / n) +end + +function LinearTransParam:update(gradient) + MatrixParam.update(self, gradient) + local gconf = self.gconf + -- weight decay + self.trans:add(self.trans, self.trans, 1.0, -gconf.lrate * gconf.wcost) +end + +function AffineLayer:__init(id, global_conf, layer_conf) + self.id = id + self.ltp = layer_conf.ltp + self.bp = layer_conf.bp + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + self:check_dim_len(1, 1) -- exactly one input and one output + self.direct_update = layer_conf.direct_update +end + +function AffineLayer:init(batch_size) + if self.ltp.trans:ncol() ~= self.bp.trans:ncol() then + nerv.error("mismatching dimensions of linear transform and bias paramter") + end + if self.dim_in[1] ~= self.ltp.trans:nrow() then + nerv.error("mismatching dimensions of linear transform parameter and input") + end + if self.dim_out[1] ~= self.ltp.trans:ncol() then + nerv.error("mismatching dimensions of linear transform parameter and output") + end + self.ltp_grad = self.ltp.trans:create() + self.ltp:train_init() + self.bp:train_init() +end + +function AffineLayer:update(bp_err, input, output) + if self.direct_update then + self.ltp.correction:mul(input[1], bp_err[1], 1.0, gconf.momentum, 'T', 'N') + -- momentum gain + local mmt_gain = 1.0 / (1.0 - gconf.momentum); + local n = self.gconf.batch_size * mmt_gain + -- perform update + self.ltp.trans:add(self.ltp.trans, self.ltp.correction, 1.0, -gconf.lrate / n) + else + self.ltp_grad:mul(input[1], bp_err[1], 1.0, 0.0, 'T', 'N') + self.ltp:update(self.ltp_grad) + end + self.bp:update(bp_err[1]:colsum()) +end + +function AffineLayer:propagate(input, output) + -- apply linear transform + output[1]:mul(input[1], self.ltp.trans, 1.0, 0.0, 'N', 'N') + -- add bias + output[1]:add_row(self.bp.trans, 1.0) +end + +function AffineLayer:back_propagate(bp_err, next_bp_err, input, output) + next_bp_err[1]:mul(bp_err[1], self.ltp.trans, 1.0, 0.0, 'N', 'T') +end + +function AffineLayer:get_params() + return nerv.ParamRepo({self.ltp, self.bp}) +end diff --git a/nerv/layer/bias.lua b/nerv/layer/bias.lua new file mode 100644 index 0000000..c99274d --- /dev/null +++ b/nerv/layer/bias.lua @@ -0,0 +1,28 @@ +local BiasLayer = nerv.class("nerv.BiasLayer", "nerv.Layer") + +function BiasLayer:__init(id, global_conf, layer_conf) + self.id = id + self.gconf = global_conf + self.bias = layer_conf.bias + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self:check_dim_len(1, 1) +end + +function BiasLayer:init() + if self.dim_in[1] ~= self.bias.trans:ncol() then + nerv.error("mismatching dimensions of input and bias parameter") + end + if self.dim_out[1] ~= self.bias.trans:ncol() then + nerv.error("mismatching dimensions of output and bias parameter") + end +end + +function BiasLayer:propagate(input, output) + output[1]:copy_fromd(input[1]) + output[1]:add_row(self.bias.trans, 1.0) +end + +function BiasLayer:get_params() + return nerv.ParamRepo({self.bias}) +end diff --git a/nerv/layer/combiner.lua b/nerv/layer/combiner.lua new file mode 100644 index 0000000..7bd7617 --- /dev/null +++ b/nerv/layer/combiner.lua @@ -0,0 +1,59 @@ +local CombinerLayer = nerv.class('nerv.CombinerLayer', 'nerv.Layer') + +function CombinerLayer:__init(id, global_conf, layer_conf) + self.id = id + self.lambda = layer_conf.lambda + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + self:check_dim_len(#self.lambda, -1) + if #self.dim_in < 1 then + nerv.error("no input specified") + end + if #self.dim_out < 1 then + nerv.error("no output specified") + end +end + +function CombinerLayer:init(batch_size) + local dim = self.dim_in[1] + for i = 2, #self.dim_in do + if self.dim_in[i] ~= dim then + nerv.error("mismatching dimensions of inputs") + end + end + for i = 1, #self.dim_out do + if self.dim_out[i] ~= dim then + nerv.error("mismatching dimensions of inputs/outputs") + end + end + self.sum = self.gconf.cumat_type(batch_size, dim) +end + +function CombinerLayer:update(bp_err, input, output) +end + +function CombinerLayer:propagate(input, output) + output[1]:fill(0) + for i = 1, #self.dim_in do + output[1]:add(output[1], input[i], 1.0, self.lambda[i]) + end + for i = 2, #self.dim_out do + output[i]:copy_fromd(output[1]) + end +end + +function CombinerLayer:back_propagate(bp_err, next_bp_err, input, output) + local sum = self.sum + sum:copy_fromd(bp_err[1]) + for i = 2, #self.dim_out do + sum:add(sum, bp_err[i], 1.0, 1.0) + end + for i = 1, #self.dim_in do + next_bp_err[i]:add(next_bp_err[i], sum, 0.0, self.lambda[i]) + end +end + +function CombinerLayer:get_params() + return nerv.ParamRepo({}) +end diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua new file mode 100644 index 0000000..3c55a94 --- /dev/null +++ b/nerv/layer/init.lua @@ -0,0 +1,79 @@ +-- The following methods must be implemented to let a layer work properly + +local Param = nerv.class('nerv.Param') + +function Param:__init(id, global_conf) + self.id = id + self.gconf = global_conf +end + +function Param:get_info() + return self.info +end + +function Param:set_info(info) + self.info = info +end + +function Param:read(handle) + nerv.error_method_not_implemented() +end + +function Param:write(handle) + nerv.error_method_not_implemented() +end + +function Param:update(gradient) + nerv.error_method_not_implemented() +end + +local Layer = nerv.class('nerv.Layer') + +function Layer:__init(id, global_conf, layer_conf) + nerv.error_method_not_implemented() +end + +function Layer:init(batch_size) + nerv.error_method_not_implemented() +end + +function Layer:update(bp_err, input, output) + nerv.error_method_not_implemented() +end + +function Layer:propagate(input, output) + nerv.error_method_not_implemented() +end + +function Layer:back_propagate(bp_err, next_bp_err, input, output) + nerv.error_method_not_implemented() +end + +function Layer:check_dim_len(len_in, len_out) + local expected_in = #self.dim_in + local expected_out = #self.dim_out + if len_in > 0 and expected_in ~= len_in then + nerv.error("layer %s expects %d inputs, %d given", + self.id, len_in, expected_in) + end + if len_out > 0 and expected_out ~= len_out then + nerv.error("layer %s expects %d outputs, %d given", + self.id, len_out, expected_out) + end +end + +function Layer:get_params() + nerv.error_method_not_implemented() +end + +function Layer:get_dim() + return self.dim_in, self.dim_out +end + +nerv.include('affine.lua') +nerv.include('sigmoid.lua') +nerv.include('softmax_ce.lua') +nerv.include('bias.lua') +nerv.include('window.lua') +nerv.include('mse.lua') +nerv.include('combiner.lua') diff --git a/nerv/layer/mse.lua b/nerv/layer/mse.lua new file mode 100644 index 0000000..9a97add --- /dev/null +++ b/nerv/layer/mse.lua @@ -0,0 +1,52 @@ +local MSELayer = nerv.class("nerv.MSELayer", "nerv.Layer") + +function MSELayer:__init(id, global_conf, layer_conf) + self.id = id + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.gconf = global_conf + self:check_dim_len(2, -1) +end + +function MSELayer:init(batch_size) + if self.dim_in[1] ~= self.dim_in[2] then + nerv.error("mismatching dimensions of previous network output and labels") + end + self.scale = 1 / self.dim_in[1] + self.total_mse = 0.0 + self.total_frames = 0 + self.mse = self.gconf.cumat_type(batch_size, self.dim_in[1]) + self.mse_sum = self.gconf.cumat_type(batch_size, 1) + self.diff = self.mse:create() +end + +function MSELayer:update(bp_err, input, output) + -- no params, therefore do nothing +end + +function MSELayer:propagate(input, output) + local mse = self.mse + local mse_sum = self.mse_sum + mse:add(input[1], input[2], 1.0, -1.0) + self.diff:copy_fromd(mse) + mse:mul_elem(mse, mse) + mse_sum:add(mse_sum, mse:rowsum(mse), 0.0, self.scale) + if output[1] ~= nil then + output[1]:copy_fromd(mse_sum) + end + self.total_mse = self.total_mse + mse_sum:colsum()[0] + self.total_frames = self.total_frames + mse_sum:nrow() +end + +-- NOTE: must call propagate before back_propagate +function MSELayer:back_propagate(bp_err, next_bp_err, input, output) + local nbe = next_bp_err[1] + nbe:add(nbe, self.diff, 0.0, 2 * self.scale) + if bp_err[1] ~= nil then + nbe:scale_rows_by_col(bp_err[1]) + end +end + +function MSELayer:get_params() + return nerv.ParamRepo({}) +end diff --git a/nerv/layer/sigmoid.lua b/nerv/layer/sigmoid.lua new file mode 100644 index 0000000..dfd09eb --- /dev/null +++ b/nerv/layer/sigmoid.lua @@ -0,0 +1,31 @@ +local SigmoidLayer = nerv.class("nerv.SigmoidLayer", "nerv.Layer") + +function SigmoidLayer:__init(id, global_conf, layer_conf) + self.id = id + self.gconf = global_conf + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self:check_dim_len(1, 1) +end + +function SigmoidLayer:init() + if self.dim_in[1] ~= self.dim_out[1] then + nerv.error("mismatching dimensions of input and output") + end +end + +function SigmoidLayer:update(bp_err, input, output) + -- no params, therefore do nothing +end + +function SigmoidLayer:propagate(input, output) + output[1]:sigmoid(input[1]) +end + +function SigmoidLayer:back_propagate(bp_err, next_bp_err, input, output) + next_bp_err[1]:sigmoid_grad(bp_err[1], output[1]) +end + +function SigmoidLayer:get_params() + return nerv.ParamRepo({}) +end diff --git a/nerv/layer/softmax_ce.lua b/nerv/layer/softmax_ce.lua new file mode 100644 index 0000000..daf891e --- /dev/null +++ b/nerv/layer/softmax_ce.lua @@ -0,0 +1,68 @@ +local SoftmaxCELayer = nerv.class("nerv.SoftmaxCELayer", "nerv.Layer") + +function SoftmaxCELayer:__init(id, global_conf, layer_conf) + self.id = id + self.gconf = global_conf + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self.compressed = layer_conf.compressed + if self.compressed == nil then + self.compressed = false + end + self:check_dim_len(2, -1) -- two inputs: nn output and label +end + +function SoftmaxCELayer:init(batch_size) + if not self.compressed and (self.dim_in[1] ~= self.dim_in[2]) then + nerv.error("mismatching dimensions of previous network output and labels") + end + self.total_ce = 0.0 + self.total_correct = 0 + self.total_frames = 0 + self.softmax = self.gconf.cumat_type(batch_size, self.dim_in[1]) + self.ce = self.softmax:create() +end + +function SoftmaxCELayer:update(bp_err, input, output) + -- no params, therefore do nothing +end + +function SoftmaxCELayer:propagate(input, output) + local softmax = self.softmax + local ce = self.ce + local classified = softmax:softmax(input[1]) + local label = input[2] + ce:log_elem(softmax) + if self.compressed then + label = label:decompress(input[1]:ncol()) + end + ce:mul_elem(ce, label) + ce = ce:rowsum() + if output[1] ~= nil then + output[1]:copy_fromd(ce) + end + -- add total ce + self.total_ce = self.total_ce - ce:colsum()[0] + self.total_frames = self.total_frames + softmax:nrow() + -- TODO: add colsame for uncompressed label + if self.compressed then + self.total_correct = self.total_correct + classified:colsame(input[2])[0] + end +end + +function SoftmaxCELayer:back_propagate(bp_err, next_bp_err, input, output) + -- softmax output - label + local label = input[2] + if self.compressed then + label = label:decompress(input[1]:ncol()) + end + local nbe = next_bp_err[1] + nbe:add(self.softmax, label, 1.0, -1.0) + if bp_err[1] ~= nil then + nbe:scale_rows_by_col(bp_err[1]) + end +end + +function SoftmaxCELayer:get_params() + return nerv.ParamRepo({}) +end diff --git a/nerv/layer/window.lua b/nerv/layer/window.lua new file mode 100644 index 0000000..4e9a3b1 --- /dev/null +++ b/nerv/layer/window.lua @@ -0,0 +1,28 @@ +local WindowLayer = nerv.class("nerv.WindowLayer", "nerv.Layer") + +function WindowLayer:__init(id, global_conf, layer_conf) + self.id = id + self.gconf = global_conf + self.window = layer_conf.window + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out + self:check_dim_len(1, 1) +end + +function WindowLayer:init() + if self.dim_in[1] ~= self.window.trans:ncol() then + nerv.error("mismatching dimensions of input and window parameter") + end + if self.dim_out[1] ~= self.window.trans:ncol() then + nerv.error("mismatching dimensions of output and window parameter") + end +end + +function WindowLayer:propagate(input, output) + output[1]:copy_fromd(input[1]) + output[1]:scale_rows_by_row(self.window.trans) +end + +function WindowLayer:get_params() + return nerv.ParamRepo({self.window}) +end diff --git a/nerv/luaT/README.md b/nerv/luaT/README.md new file mode 100644 index 0000000..6e9cf0d --- /dev/null +++ b/nerv/luaT/README.md @@ -0,0 +1,239 @@ +<a name="luat.dok"/> +# Lua Torch C API # + +luaT provides an API to interface Lua and C in Torch packages. It defines a +concept of _classes_ to Lua for Torch, and provides a mechanism to easily +handle these Lua classes from C. + +It additionally provides few functions that `luaL` should have defined, and +defines several functions similar to `luaL` ones for better type error printing when using +`luaT` classes. + +<a name="luat.memory.dok"/> +## Memory functions ## + +Classical memory allocation functions which generate a Lua error in case of +problem. + +<a name="luaT_alloc"/> +### void* luaT_alloc(lua_State *L, long size) ### + +Allocates `size` bytes, and return a pointer on the allocated +memory. A Lua error will be generated if running out of memory. + +<a name="luaT_realloc"/> +### void* luaT_realloc(lua_State *L, void *ptr, long size) ### + +Realloc `ptr` to `size` bytes. `ptr` must have been previously +allocated with [luaT_alloc](#luaT_alloc) or +[luaT_realloc](#luaT_realloc), or the C `malloc` or `realloc` +functions. A Lua error will be generated if running out of memory. + +<a name="luaT_free"/> +### void luaT_free(lua_State *L, void *ptr) ### + +Free memory allocated at address `ptr`. The memory must have been +previously allocated with [luaT_alloc](#luaT_alloc) or +[luaT_realloc](#luaT_realloc), or the C `malloc` or `realloc` +functions. + +<a name="luat.classcreate"/> +## Class creation and basic handling ## + +A `luaT` class is basically either a Lua _table_ or _userdata_ with +an appropriate _metatable_. This appropriate metatable is created with +[luaT_newmetatable](#luaT_newmetatable). Contrary to luaL userdata +functions, luaT mechanism handles inheritance. If the class inherit from +another class, then the metatable will itself have a metatable +corresponding to the _parent metatable_: the metatables are cascaded +according to the class inheritance. Multiple inheritance is not supported. + +<a name="luat.operatoroverloading"/> +### Operator overloading ### + +The metatable of a `luaT` object contains `Lua` operators like +`__index`, `__newindex`, `__tostring`, `__add` +(etc...). These operators will respectively look for `__index__`, +`__newindex__`, `__tostring__`, `__add__` (etc...) in the +metatable. If found, the corresponding function or value will be returned, +else a Lua error will be raised. + +If one wants to provide `__index__` or `__newindex__` in the +metaclass, these operators must follow a particular scheme: + + * `__index__` must either return a value _and_ `true` or return `false` only. In the first case, it means `__index__` was able to handle the given argument (for e.g., the type was correct). The second case means it was not able to do anything, so `__index` in the root metatable can then try to see if the metaclass contains the required value. + + * `__newindex__` must either return `true` or `false`. As for `__index__`, `true` means it could handle the argument and `false` not. If not, the root metatable `__newindex` will then raise an error if the object was a userdata, or apply a rawset if the object was a Lua table. + +Other metaclass operators like `__tostring__`, `__add__`, etc... do not have any particular constraint. + +<a name="luat_newmetatable"/> +### const char* luaT_newmetatable(lua_State *L, const char *tname, const char *parenttname, lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory) ### + +This function creates a new metatable, which is the Lua way to define a new +object class. As for `luaL_newmetatable`, the metatable is registered in +the Lua registry table, with the key `tname`. In addition, `tname` is +also registered in the Lua registry, with the metatable as key (the +typename of a given object can be thus easily retrieved). + +The class name `tname` must be of the form `modulename.classname`. The module name +If not NULL, `parenttname` must be a valid typename corresponding to the +parent class of the new class. + +If not NULL, `constructor`, a function `new` will be added to the metatable, pointing to this given function. The constructor might also +be called through `modulename.classname()`, which is an alias setup by `luaT_metatable`. + +If not NULL, `destructor` will be called when garbage collecting the object. + +If not NULL, `factory` must be a Lua C function creating an empty object +instance of the class. This functions are used in Torch for serialization. + +Note that classes can be partly defined in C and partly defined in Lua: +once the metatable is created in C, it can be filled up with additional +methods in Lua. + +The return value is the value returned by [luaT_typenameid](#luat_typenameid). + +<a name="luat_pushmetatable"/> +### int luaT_pushmetatable(lua_State *L, const name *tname) ### + +Push the metatable with type name `tname` on the stack, it `tname` is a +valid Torch class name (previously registered with luaT_newmetatable). + +On success, returns 1. If `tname` is invalid, nothing is pushed and it +returns 0. + +<a name="luat_typenameid"/> +### const char* luaT_typenameid(lua_State *L, const char *tname) ### + +If `tname` is a valid Torch class name, then returns a unique string (the +contents will be the same than `tname`) pointing on the string registered +in the Lua registry. This string is thus valid as long as Lua is +running. The returned string shall not be freed. + +If `tname` is an invalid class name, returns NULL. + +<a name="luat_typename"/> +### const char* luaT_typename(lua_State *L, int ud) ### + +Returns the typename of the object at index `ud` on the stack. If it is +not a valid Torch object, returns NULL. + +<a name="luat_pushudata"/> +### void luaT_pushudata(lua_State *L, void *udata, const char *tname) ### + +Given a C structure `udata`, push a userdata object on the stack with +metatable corresponding to `tname`. Obviously, `tname` must be a valid +Torch name registered with [luaT_newmetatable](#luat_newmetatable). + +<a name="luat_toudata"/> +### void *luaT_toudata(lua_State *L, int ud, const char *tname) ### + +Returns a pointer to the original C structure previously pushed on the +stack with [luaT_pushudata](#luat_pushudata), if the object at index +`ud` is a valid Torch class name. Returns NULL otherwise. + +<a name="luat_isudata"/> +### int luaT_isudata(lua_State *L, int ud, const char *tname) ### + +Returns 1 if the object at index `ud` on the stack is a valid Torch class name `tname`. +Returns 0 otherwise. + +<a name="luat_getfield"/> +### Checking fields of a table ### + +This functions check that the table at the given index `ud` on the Lua +stack has a field named `field`, and that it is of the specified type. +These function raises a Lua error on failure. + +<a name="luat_getfieldcheckudata"/> +## void *luaT_getfieldcheckudata(lua_State *L, int ud, const char *field, const char *tname) ## + +Checks that the field named `field` of the table at index `ud` is a +Torch class name `tname`. Returns the pointer of the C structure +previously pushed on the stack with [luaT_pushudata](#luat_pushudata) on +success. The function raises a Lua error on failure. + +<a name="luat_getfieldchecklightudata"/> +## void *luaT_getfieldchecklightudata(lua_State *L, int ud, const char *field) ## + +Checks that the field named `field` of the table at index `ud` is a +lightuserdata. Returns the lightuserdata pointer on success. The function +raises a Lua error on failure. + +<a name="luat_getfieldcheckint"/> +## int luaT_getfieldcheckint(lua_State *L, int ud, const char *field) ## + +Checks that the field named `field` of the table at index `ud` is an +int. Returns the int value pointer on success. The function raises a Lua +error on failure. + +<a name="luat_getfieldcheckstring"/> +## const char* luaT_getfieldcheckstring(lua_State *L, int ud, const char *field) ## + +Checks that the field named `field` of the table at index `ud` is a +string. Returns a pointer to the string on success. The function raises a +Lua error on failure. + +<a name="luat_getfieldcheckboolean"/> +## int luaT_getfieldcheckboolean(lua_State *L, int ud, const char *field) ## + +Checks that the field named `field` of the table at index `ud` is a +boolean. On success, returns 1 if the boolean is `true`, 0 if it is +`false`. The function raises a Lua error on failure. + +<a name="luat_getfieldchecktable"/> +## void luaT_getfieldchecktable(lua_State *L, int ud, const char *field) ## + +Checks that the field named `field` of the table at index `ud` is a +table. On success, push the table on the stack. The function raises a Lua +error on failure. + +<a name="luat_typerror"/> +### int luaT_typerror(lua_State *L, int ud, const char *tname) ### + +Raises a `luaL_argerror` (and returns its value), claiming that the +object at index `ud` on the stack is not of type `tname`. Note that +this function does not check the type, it only raises an error. + +<a name="luat_checkboolean"/> +### int luaT_checkboolean(lua_State *L, int ud) ### + +Checks that the value at index `ud` is a boolean. On success, returns 1 +if the boolean is `true`, 0 if it is `false`. The function raises a Lua +error on failure. + +<a name="luat_optboolean"/> +### int luaT_optboolean(lua_State *L, int ud, int def) ### + +Checks that the value at index `ud` is a boolean. On success, returns 1 +if the boolean is `true`, 0 if it is `false`. If there is no value at +index `ud`, returns `def`. In any other cases, raises an error. + +<a name="luat_registeratname"/> +### void luaT_registeratname(lua_State *L, const struct luaL_Reg *methods, const char *name) ### + +This function assume a table is on the stack. It creates a table field +`name` in the table (if this field does not exist yet), and fill up +`methods` in this table field. + +<a name="luat_classrootname"/> +### const char *luaT_classrootname(const char *tname) ### + +Assuming `tname` is of the form `modulename.classname`, returns +`classname`. The returned value shall not be freed. It is a pointer +inside `tname` string. + +<a name="luat_classmodulename"/> +### const char *luaT_classmodulename(const char *tname) ### + +Assuming `tname` is of the form `modulename.classname`, returns +`modulename`. The returned value shall not be freed. It is valid until the +next call to `luaT_classrootname`. + +<a name="luat_stackdump"/> +### void luaT_stackdump(lua_State *L) ### + +This function print outs the state of the Lua stack. It is useful for debug +purposes. + diff --git a/nerv/luaT/luaT.c b/nerv/luaT/luaT.c new file mode 100644 index 0000000..7b85ce3 --- /dev/null +++ b/nerv/luaT/luaT.c @@ -0,0 +1,1079 @@ +#include <stdlib.h> +#include <string.h> + +#include "luaT.h" + +void* luaT_alloc(lua_State *L, long size) +{ + void *ptr; + + if(size == 0) + return NULL; + + if(size < 0) + luaL_error(L, "$ Torch: invalid memory size -- maybe an overflow?"); + + ptr = malloc(size); + if(!ptr) + luaL_error(L, "$ Torch: not enough memory: you tried to allocate %dGB. Buy new RAM!", size/1073741824); + + return ptr; +} + +void* luaT_realloc(lua_State *L, void *ptr, long size) +{ + if(!ptr) + return(luaT_alloc(L, size)); + + if(size == 0) + { + luaT_free(L, ptr); + return NULL; + } + + if(size < 0) + luaL_error(L, "$ Torch: invalid memory size -- maybe an overflow?"); + + ptr = realloc(ptr, size); + if(!ptr) + luaL_error(L, "$ Torch: not enough memory: you tried to reallocate %dGB. Buy new RAM!", size/1073741824); + return ptr; +} + +void luaT_free(lua_State *L, void *ptr) +{ + free(ptr); +} + +void luaT_stackdump(lua_State *L) +{ + int i; + const char *tname = NULL; + int top = lua_gettop(L); + for(i = 1; i <= top; i++) + { + int t = lua_type(L, i); + printf("%3d. ", i); + switch(t) + { + case LUA_TSTRING: + printf("'%s'", lua_tostring(L,i)); + break; + case LUA_TBOOLEAN: + printf(lua_toboolean(L, i) ? "true" : "false"); + break; + case LUA_TNUMBER: + printf("%g", lua_tonumber(L,i)); + break; + case LUA_TUSERDATA: + tname = luaT_typename(L, i); + printf("userdata %lx [%s]", (long)lua_topointer(L, i), (tname ? tname : "not a Torch object")); + break; + case 10: + tname = luaT_typename(L, i); + printf("cdata %lx [%s]", (long)lua_topointer(L, i), (tname ? tname : "not a Torch object")); + break; + case LUA_TTABLE: + lua_pushvalue(L, i); + lua_rawget(L, LUA_REGISTRYINDEX); + if(lua_isstring(L, -1)) + tname = lua_tostring(L, -1); /*luaT_typenameid(L, lua_tostring(L, -1)); */ + else + tname = NULL; + lua_pop(L, 1); + if(tname) + printf("metatable [%s]", tname); + else + { + tname = luaT_typename(L, i); + printf("table %lx [%s]", (long)lua_topointer(L, i), (tname ? tname : "not a Torch object")); + } + break; + default: + printf("Lua object type: %s", lua_typename(L,t)); + break; + } + printf("\n"); + } + printf("---------------------------------------------\n"); +} + +/* metatable operator methods */ +static int luaT_mt__index(lua_State *L); +static int luaT_mt__newindex(lua_State *L); +static int luaT_mt__tostring(lua_State *L); +static int luaT_mt__add(lua_State *L); +static int luaT_mt__sub(lua_State *L); +static int luaT_mt__mul(lua_State *L); +static int luaT_mt__div(lua_State *L); +static int luaT_mt__mod(lua_State *L); +static int luaT_mt__pow(lua_State *L); +static int luaT_mt__unm(lua_State *L); +static int luaT_mt__concat(lua_State *L); +static int luaT_mt__len(lua_State *L); +static int luaT_mt__eq(lua_State *L); +static int luaT_mt__lt(lua_State *L); +static int luaT_mt__le(lua_State *L); +static int luaT_mt__call(lua_State *L); + +/* Constructor-metatable methods */ +static int luaT_cmt__call(lua_State *L); +static int luaT_cmt__newindex(lua_State *L); + +const char* luaT_newmetatable(lua_State *L, const char *tname, const char *parenttname, + lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory) +{ + lua_pushcfunction(L, luaT_lua_newmetatable); + lua_pushstring(L, tname); + (parenttname ? lua_pushstring(L, parenttname) : lua_pushnil(L)); + (constructor ? lua_pushcfunction(L, constructor) : lua_pushnil(L)); + (destructor ? lua_pushcfunction(L, destructor) : lua_pushnil(L)); + (factory ? lua_pushcfunction(L, factory) : lua_pushnil(L)); + lua_call(L, 5, 1); + return luaT_typenameid(L, tname); +} + +int luaT_pushmetatable(lua_State *L, const char *tname) +{ + lua_getfield(L, LUA_REGISTRYINDEX, tname); + if(lua_isnil(L, -1)) + { + lua_pop(L, 1); + return 0; + } + return 1; +} + +const char *luaT_typenameid(lua_State *L, const char *tname) +{ + if(luaT_pushmetatable(L, tname)) + { + const char *tnameid = NULL; + lua_rawget(L, LUA_REGISTRYINDEX); + if(lua_isstring(L, -1)) + tnameid = lua_tostring(L, -1); + lua_pop(L, 1); /* the string/nil */ + return tnameid; + } + return NULL; +} + +static const char cdataname[] = "" + "local _, ffi = pcall(require, 'ffi')\n" + "if ffi then\n" + " local id2name = {}\n" + " return function(cdata, name)\n" + " local id = tonumber(ffi.typeof(cdata))\n" + " if id then\n" + " if name then\n" + " id2name[id] = name\n" + " return name\n" + " else\n" + " return rawget(id2name, id)\n" + " end\n" + " end\n" + " return nil\n" + " end\n" + "else\n" + " return function() end\n" + "end\n"; + +static const char* luaT_cdataname(lua_State *L, int ud, const char *tname) +{ + lua_pushstring(L, "__cdataname"); + lua_rawget(L, LUA_REGISTRYINDEX); + if(lua_isnil(L,-1)) + { + lua_pop(L, 1); + + if(luaL_dostring(L, cdataname)) /* did something go wrong? */ + luaL_error(L, "internal error (could not load cdataname): %s", lua_tostring(L, -1)); + + lua_pushstring(L, "__cdataname"); + lua_pushvalue(L, -2); + lua_rawset(L, LUA_REGISTRYINDEX); + } + if(!lua_isfunction(L, -1)) /* should not happen */ + luaL_error(L, "internal error (cdataname is not a function)"); + + lua_pushvalue(L, ud); + if(tname) + lua_pushstring(L, tname); + if(lua_pcall(L, (tname ? 2 : 1), 1, 0)) + luaL_error(L, "internal error (cdataname): %s", lua_tostring(L, -1)); + + tname = lua_tostring(L, -1); + lua_pop(L, 1); + + return tname; +} + +const char* luaT_typename(lua_State *L, int ud) +{ + if(lua_type(L, ud) == 10) + return luaT_cdataname(L, ud, NULL); + else if(lua_getmetatable(L, ud)) + { + const char *tname = NULL; + lua_rawget(L, LUA_REGISTRYINDEX); + if(lua_isstring(L, -1)) + tname = lua_tostring(L, -1); + lua_pop(L, 1); /* the string/nil */ + return tname; + } + return NULL; +} + +void luaT_pushudata(lua_State *L, void *udata, const char *tname) +{ + if(udata) + { + void **udata_p = lua_newuserdata(L, sizeof(void*)); + *udata_p = udata; + if(!luaT_pushmetatable(L, tname)) + luaL_error(L, "Torch internal problem: cannot find metatable for type <%s>", tname); + lua_setmetatable(L, -2); + } + else + lua_pushnil(L); +} + +void *luaT_toudata(lua_State *L, int ud, const char *tname) +{ + void **p = lua_touserdata(L, ud); + if(p != NULL) /* value is a userdata? */ + { + if(!luaT_pushmetatable(L, tname)) + luaL_error(L, "Torch internal problem: cannot find metatable for type <%s>", tname); + + /* initialize the table we want to get the metatable on */ + /* note that we have to be careful with indices, as we just inserted stuff */ + lua_pushvalue(L, (ud < 0 ? ud - 1 : ud)); + while(lua_getmetatable(L, -1)) /* get the next metatable */ + { + lua_remove(L, -2); /* remove the previous metatable [or object, if first time] */ + if(lua_rawequal(L, -1, -2)) + { + lua_pop(L, 2); /* remove the two metatables */ + return *p; + } + } + lua_pop(L, 2); /* remove the two metatables */ + } + return NULL; +} + +int luaT_isudata(lua_State *L, int ud, const char *tname) +{ + if(luaT_toudata(L, ud, tname)) + return 1; + else + return 0; +} + +void *luaT_checkudata(lua_State *L, int ud, const char *tname) +{ + void *p = luaT_toudata(L, ud, tname); + if(!p) + luaT_typerror(L, ud, tname); + return p; +} + +void *luaT_getfieldcheckudata(lua_State *L, int ud, const char *field, const char *tname) +{ + void *p; + lua_getfield(L, ud, field); + if(lua_isnil(L, -1)) + luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); + p = luaT_toudata(L, -1, tname); + if(!p) + luaL_error(L, "bad argument #%d (field %s is not a %s)", ud, field, tname); + return p; +} + +void *luaT_getfieldchecklightudata(lua_State *L, int ud, const char *field) +{ + void *p; + lua_getfield(L, ud, field); + if(lua_isnil(L, -1)) + luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); + + if(!lua_islightuserdata(L, -1)) + luaL_error(L, "bad argument #%d (field %s is not a light userdata)", ud, field); + + p = lua_touserdata(L, -1); + + return p; +} + +double luaT_getfieldchecknumber(lua_State *L, int ud, const char *field) +{ + lua_getfield(L, ud, field); + if(lua_isnil(L, -1)) + luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); + if(!lua_isnumber(L, -1)) + luaL_error(L, "bad argument #%d (field %s is not a number)", ud, field); + return lua_tonumber(L, -1); +} + +int luaT_getfieldcheckint(lua_State *L, int ud, const char *field) +{ + lua_getfield(L, ud, field); + if(lua_isnil(L, -1)) + luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); + if(!lua_isnumber(L, -1)) + luaL_error(L, "bad argument #%d (field %s is not a number)", ud, field); + return (int)lua_tonumber(L, -1); +} + +const char* luaT_getfieldcheckstring(lua_State *L, int ud, const char *field) +{ + lua_getfield(L, ud, field); + if(lua_isnil(L, -1)) + luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); + if(!lua_isstring(L, -1)) + luaL_error(L, "bad argument #%d (field %s is not a string)", ud, field); + return lua_tostring(L, -1); +} + +int luaT_getfieldcheckboolean(lua_State *L, int ud, const char *field) +{ + lua_getfield(L, ud, field); + if(lua_isnil(L, -1)) + luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); + if(!lua_isboolean(L, -1)) + luaL_error(L, "bad argument #%d (field %s is not a boolean)", ud, field); + return lua_toboolean(L, -1); +} + +void luaT_getfieldchecktable(lua_State *L, int ud, const char *field) +{ + lua_getfield(L, ud, field); + if(lua_isnil(L, -1)) + luaL_error(L, "bad argument #%d (field %s does not exist)", ud, field); + if(!lua_istable(L, -1)) + luaL_error(L, "bad argument #%d (field %s is not a table)", ud, field); +} + +/**** type checks as in luaL ****/ +int luaT_typerror(lua_State *L, int ud, const char *tname) +{ + const char *msg; + const char *tnameud = luaT_typename(L, ud); + + if(!tnameud) + tnameud = lua_typename(L, ud); + + msg = lua_pushfstring(L, "%s expected, got %s", + tname, + (tnameud ? tnameud : "unknown object")); + + return luaL_argerror(L, ud, msg); +} + +int luaT_checkboolean(lua_State *L, int ud) +{ + if(!lua_isboolean(L, ud)) + luaT_typerror(L, ud, lua_typename(L, LUA_TBOOLEAN)); + return lua_toboolean(L, ud); +} + +int luaT_optboolean(lua_State *L, int ud, int def) +{ + if(lua_isnoneornil(L,ud)) + return def; + + return luaT_checkboolean(L, ud); +} + +void luaT_registeratname(lua_State *L, const struct luaL_Reg *methods, const char *name) +{ + int idx = lua_gettop(L); + + luaL_checktype(L, idx, LUA_TTABLE); + lua_pushstring(L, name); + lua_rawget(L, idx); + + if(lua_isnil(L, -1)) + { + lua_pop(L, 1); + lua_pushstring(L, name); + lua_newtable(L); + lua_rawset(L, idx); + + lua_pushstring(L, name); + lua_rawget(L, idx); + } + + luaL_register(L, NULL, methods); + lua_pop(L, 1); +} + + +/* utility functions */ +const char *luaT_classrootname(const char *tname) +{ + int i; + int sz = strlen(tname); + + for(i = 0; i < sz; i++) + { + if(tname[i] == '.') + return tname+i+1; + } + return tname; +} + +/* module_name must be a buffer at least as big as tname + * return true if the class is part of a module */ +int luaT_classmodulename(const char *tname, char *module_name) +{ + char chars[] = {'.', '\0'}; + size_t n; + n = strcspn(tname, chars); + strncpy(module_name, tname, n); + module_name[n] = '\0'; + return tname[n] == '.'; +} + +/* Lua only functions */ +int luaT_lua_newmetatable(lua_State *L) +{ + const char* tname = luaL_checkstring(L, 1); + char module_name[256]; + int is_in_module = 0; + is_in_module = luaT_classmodulename(tname, module_name); + + lua_settop(L, 5); + luaL_argcheck(L, lua_isnoneornil(L, 2) || lua_isstring(L, 2), 2, "parent class name or nil expected"); + luaL_argcheck(L, lua_isnoneornil(L, 3) || lua_isfunction(L, 3), 3, "constructor function or nil expected"); + luaL_argcheck(L, lua_isnoneornil(L, 4) || lua_isfunction(L, 4), 4, "destructor function or nil expected"); + luaL_argcheck(L, lua_isnoneornil(L, 5) || lua_isfunction(L, 5), 5, "factory function or nil expected"); + + if(is_in_module) + lua_getfield(L, LUA_GLOBALSINDEX, module_name); + else + lua_pushvalue(L, LUA_GLOBALSINDEX); + if(!lua_istable(L, 6)) + luaL_error(L, "while creating metatable %s: bad argument #1 (%s is an invalid module name)", tname, module_name); + + /* we first create the new metaclass if we have to */ + if(!luaT_pushmetatable(L, tname)) + { + /* create the metatable */ + lua_newtable(L); + + /* registry[name] = metatable */ + lua_pushvalue(L, -1); + lua_setfield(L, LUA_REGISTRYINDEX, tname); + + /* registry[metatable] = tname */ + lua_pushvalue(L, -1); + lua_pushstring(L, tname); + lua_rawset(L, LUA_REGISTRYINDEX); + + /* __index handling */ + lua_pushcfunction(L, luaT_mt__index); + lua_setfield(L, -2, "__index"); + + /* __newindex handling */ + lua_pushcfunction(L, luaT_mt__newindex); + lua_setfield(L, -2, "__newindex"); + + /* __typename contains the typename */ + lua_pushstring(L, tname); + lua_setfield(L, -2, "__typename"); + + /* __metatable is self */ + lua_pushvalue(L, -1); + lua_setfield(L, -2, "__metatable"); + + /* by default, __version equals 1 */ + lua_pushnumber(L, 1); + lua_setfield(L, -2, "__version"); + + /* assign default operator functions */ + lua_pushcfunction(L, luaT_mt__tostring); + lua_setfield(L, -2, "__tostring"); + + lua_pushcfunction(L, luaT_mt__add); + lua_setfield(L, -2, "__add"); + + lua_pushcfunction(L, luaT_mt__sub); + lua_setfield(L, -2, "__sub"); + + lua_pushcfunction(L, luaT_mt__mul); + lua_setfield(L, -2, "__mul"); + + lua_pushcfunction(L, luaT_mt__div); + lua_setfield(L, -2, "__div"); + + lua_pushcfunction(L, luaT_mt__mod); + lua_setfield(L, -2, "__mod"); + + lua_pushcfunction(L, luaT_mt__pow); + lua_setfield(L, -2, "__pow"); + + lua_pushcfunction(L, luaT_mt__unm); + lua_setfield(L, -2, "__unm"); + + lua_pushcfunction(L, luaT_mt__concat); + lua_setfield(L, -2, "__concat"); + + lua_pushcfunction(L, luaT_mt__len); + lua_setfield(L, -2, "__len"); + + lua_pushcfunction(L, luaT_mt__eq); + lua_setfield(L, -2, "__eq"); + + lua_pushcfunction(L, luaT_mt__lt); + lua_setfield(L, -2, "__lt"); + + lua_pushcfunction(L, luaT_mt__le); + lua_setfield(L, -2, "__le"); + + lua_pushcfunction(L, luaT_mt__call); + lua_setfield(L, -2, "__call"); + } + + /* we assign the parent class if necessary */ + if(!lua_isnoneornil(L, 2)) + { + if(lua_getmetatable(L, -1)) + luaL_error(L, "class %s has been already assigned a parent class\n", tname); + else + { + const char* parenttname = luaL_checkstring(L, 2); + if(!luaT_pushmetatable(L, parenttname)) + luaL_error(L, "bad argument #2 (invalid parent class name %s)", parenttname); + lua_setmetatable(L, -2); + } + } + + /* register the destructor function */ + if(!lua_isnoneornil(L, 4)) + { + /* does it exists already? */ + lua_pushstring(L, "__gc"); + lua_rawget(L, -2); + + if(lua_isnil(L, -1)) + { + lua_pop(L, 1); /* pop nil */ + lua_pushstring(L, "__gc"); + lua_pushvalue(L, 4); + lua_rawset(L, -3); + } + else + luaL_error(L, "%s has been already assigned a destructor", tname); + } + + /* register the factory function */ + if(!lua_isnoneornil(L, 5)) + { + /* does it exists already? */ + lua_pushstring(L, "__factory"); + lua_rawget(L, -2); + + if(lua_isnil(L, -1)) + { + lua_pop(L, 1); /* pop nil */ + lua_pushstring(L, "__factory"); + lua_pushvalue(L, 5); + lua_rawset(L, -3); + } + else + luaL_error(L, "%s has been already assigned a factory", tname); + } + + /******** Constructor table and metatable ********/ + lua_pushstring(L, "__constructor"); + lua_rawget(L, -2); + if(lua_isnil(L, -1)) + { + lua_pop(L, 1); /* pop nil */ + lua_newtable(L); /* fancy table */ + lua_newtable(L); /* fancy metatable */ + + lua_pushvalue(L, -3); /* metatable */ + lua_setfield(L, -2, "__index"); /* so we can get the methods */ + + lua_pushcfunction(L, luaT_cmt__newindex); + lua_setfield(L, -2, "__newindex"); /* so we add new methods */ + + lua_pushcfunction(L, luaT_cmt__call); + lua_setfield(L, -2, "__call"); /* so we can create, we are here for only that */ + + lua_pushvalue(L, -3); + lua_setfield(L, -2, "__metatable"); /* redirect to metatable with methods */ + + lua_setmetatable(L, -2); /* constructor metatable is ... this fancy metatable */ + + /* set metatable[__constructor] = constructor-metatable */ + lua_pushstring(L, "__constructor"); + lua_pushvalue(L, -2); + lua_rawset(L, -4); + } + + /* register the constructor function */ + if(!lua_isnoneornil(L, 3)) + { + /* get constructor metatable */ + lua_getmetatable(L, -1); + + /* does it exists already? */ + lua_pushstring(L, "__new"); + lua_rawget(L, -2); + + if(lua_isnil(L, -1)) + { + lua_pop(L, 1); /* pop nil */ + lua_pushstring(L, "__new"); + lua_pushvalue(L, 3); + lua_rawset(L, -3); + + /* set "new" in the metatable too */ + lua_pushstring(L, "new"); + lua_pushvalue(L, 3); + lua_rawset(L, -5); + } + else + luaL_error(L, "%s has been already assigned a constructor", tname); + + /* pop constructor metatable */ + lua_pop(L, 1); + } + + /* module.name = constructor metatable */ + lua_setfield(L, 6, luaT_classrootname(tname)); + + return 1; /* returns the metatable */ +} + +/* Lua only utility functions */ + +/* add any custom type, provided the object has a metatable */ +int luaT_lua_metatype(lua_State *L) +{ + if( (lua_gettop(L) != 2) && (lua_gettop(L) != 3) ) + luaL_error(L, "expecting: string table [ctype]"); + + luaL_checkstring(L, 1); + luaL_checktype(L, 2, LUA_TTABLE); + + if(lua_gettop(L) == 3) + { + if(!luaT_cdataname(L, 3, lua_tostring(L, 1))) + luaL_error(L, "could not register cdata type -- missing ffi library?"); + } + + /* registry[name] = metatable */ + lua_pushvalue(L, 1); + lua_pushvalue(L, 2); + lua_rawset(L, LUA_REGISTRYINDEX); + + /* registry[metatable] = tname */ + lua_pushvalue(L, 2); + lua_pushvalue(L, 1); + lua_rawset(L, LUA_REGISTRYINDEX); + + return 0; +} + +/* return a userdata from a C pointer */ +/* you are better to know what you are doing */ +int luaT_lua_pushudata(lua_State *L) +{ + void *udata = NULL; + const char *tname = luaL_checkstring(L, 2); + + if(lua_type(L, 1) == 10) + udata = *((void**)lua_topointer(L, 1)); + else if(lua_isnumber(L, 1)) + udata = (void*)(long)lua_tonumber(L, 1); + else + luaL_argerror(L, 1, "expecting number or cdata"); + + luaT_pushudata(L, udata, tname); + + return 1; +} + +int luaT_lua_factory(lua_State *L) +{ + const char* tname = luaL_checkstring(L, 1); + if(luaT_pushmetatable(L, tname) && !lua_isnil(L, -1)) + { + lua_pushstring(L, "__factory"); + lua_rawget(L, -2); + } + else + { + lua_pushnil(L); + } + return 1; +} + +int luaT_lua_getconstructortable(lua_State *L) +{ + const char* tname = luaL_checkstring(L, 1); + if(luaT_pushmetatable(L, tname)) + { + lua_pushstring(L, "__constructor"); + lua_rawget(L, -2); + return 1; + } + return 0; +} + + +int luaT_lua_typename(lua_State *L) +{ + const char* tname = NULL; + luaL_checkany(L, 1); + if((tname = luaT_typename(L, 1))) + { + lua_pushstring(L, tname); + return 1; + } + return 0; +} + +int luaT_lua_isequal(lua_State *L) +{ + if(lua_isuserdata(L, 1) && lua_isuserdata(L, 2)) + { + void **u1, **u2; + luaL_argcheck(L, luaT_typename(L, 1), 1, "Torch object expected"); + luaL_argcheck(L, luaT_typename(L, 2), 2, "Torch object expected"); + + u1 = lua_touserdata(L, 1); + u2 = lua_touserdata(L, 2); + if(*u1 == *u2) + lua_pushboolean(L, 1); + else + lua_pushboolean(L, 0); + } + else if(lua_istable(L, 1) && lua_istable(L, 2)) + lua_pushboolean(L, lua_rawequal(L, 1, 2)); + else + lua_pushboolean(L, 0); + return 1; +} + +int luaT_lua_pointer(lua_State *L) +{ + if(lua_isuserdata(L, 1)) + { + void **ptr; + luaL_argcheck(L, luaT_typename(L, 1), 1, "Torch object expected"); + ptr = lua_touserdata(L, 1); + lua_pushnumber(L, (long)(*ptr)); + return 1; + } + else if(lua_istable(L, 1) || lua_isthread(L, 1) || lua_isfunction(L, 1)) + { + const void* ptr = lua_topointer(L, 1); + lua_pushnumber(L, (long)(ptr)); + return 1; + } + else if(lua_type(L, 1) == 10) /* cdata */ + { + /* we want the pointer holded by cdata */ + /* not the pointer on the cdata object */ + const void* ptr = *((void**)lua_topointer(L, 1)); + lua_pushnumber(L, (long)(ptr)); + return 1; + } + else if(lua_isstring(L, 1)) + { + const char* ptr = lua_tostring(L, 1); + lua_pushnumber(L, (long)(ptr)); + return 1; + } + else + luaL_error(L, "Torch object, table, thread, cdata or function expected"); + + return 0; +} + +int luaT_lua_setenv(lua_State *L) +{ + if(!lua_isfunction(L, 1) && !lua_isuserdata(L, 1)) + luaL_typerror(L, 1, "function or userdata"); + luaL_checktype(L, 2, LUA_TTABLE); + lua_setfenv(L, 1); + return 0; +} + +int luaT_lua_getenv(lua_State *L) +{ + if(!lua_isfunction(L, 1) && !lua_isuserdata(L, 1)) + luaL_typerror(L, 1, "function or userdata"); + lua_getfenv(L, 1); + return 1; +} + +int luaT_lua_getmetatable(lua_State *L) +{ + const char *tname = luaL_checkstring(L, 1); + if(luaT_pushmetatable(L, tname)) + return 1; + return 0; +} + +int luaT_lua_version(lua_State *L) +{ + luaL_checkany(L, 1); + + if(lua_type(L, 1) == 10) + { + const char *tname = luaT_cdataname(L, 1, NULL); + if(tname) + { + luaT_pushmetatable(L, tname); + lua_pushstring(L, "__version"); + lua_rawget(L, -2); + return 1; + } + return 0; + } + else if(lua_getmetatable(L, 1)) + { + lua_pushstring(L, "__version"); + lua_rawget(L, -2); + return 1; + } + return 0; +} + +int luaT_lua_setmetatable(lua_State *L) +{ + const char *tname = luaL_checkstring(L, 2); + luaL_checktype(L, 1, LUA_TTABLE); + + if(!luaT_pushmetatable(L, tname)) + luaL_error(L, "unknown typename %s\n", tname); + lua_setmetatable(L, 1); + + return 1; +} + +/* metatable operator methods */ +static int luaT_mt__index(lua_State *L) +{ + if(!lua_getmetatable(L, 1)) + luaL_error(L, "critical internal indexing error: no metatable found"); + + if(!lua_istable(L, -1)) + luaL_error(L, "critical internal indexing error: not a metatable"); + + /* test for __index__ method first */ + lua_getfield(L, -1, "__index__"); + if(!lua_isnil(L, -1)) + { + int result; + + if(!lua_isfunction(L, -1)) + luaL_error(L, "critical internal indexing error: __index__ is not a function"); + + lua_pushvalue(L, 1); + lua_pushvalue(L, 2); + + lua_call(L, 2, LUA_MULTRET); /* DEBUG: risque: faut vraiment retourner 1 ou 2 valeurs... */ + + result = lua_toboolean(L, -1); + lua_pop(L, 1); + + if(result) + return 1; + + /* on the stack: 1. the object 2. the value 3. the metatable */ + /* apparently, __index wants only one element returned */ + /* return lua_gettop(L)-3; */ + + } + else + lua_pop(L, 1); /* remove nil __index__ on the stack */ + + lua_pushvalue(L, 2); + lua_gettable(L, -2); + + return 1; +} + +static int luaT_mt__newindex(lua_State *L) +{ + if(!lua_getmetatable(L, 1)) + luaL_error(L, "critical internal indexing error: no metatable found"); + + if(!lua_istable(L, -1)) + luaL_error(L, "critical internal indexing error: not a metatable"); + + /* test for __newindex__ method first */ + lua_getfield(L, -1, "__newindex__"); + if(!lua_isnil(L, -1)) + { + int result; + + if(!lua_isfunction(L, -1)) + luaL_error(L, "critical internal indexing error: __newindex__ is not a function"); + + lua_pushvalue(L, 1); + lua_pushvalue(L, 2); + lua_pushvalue(L, 3); + + lua_call(L, 3, 1); /* DEBUG: risque: faut vraiment retourner qqch */ + + result = lua_toboolean(L, -1); + lua_pop(L, 1); + + if(result) + return 0; + } + else + lua_pop(L, 1); /* remove nil __newindex__ on the stack */ + + lua_pop(L, 1); /* pop the metatable */ + if(lua_istable(L, 1)) + lua_rawset(L, 1); + else + luaL_error(L, "the class %s cannot be indexed", luaT_typename(L, 1)); + + return 0; +} + +/* note: check dans metatable pour ca, donc necessaire */ +#define MT_DECLARE_OPERATOR(NAME, NIL_BEHAVIOR) \ + int luaT_mt__##NAME(lua_State *L) \ + { \ + if(!lua_getmetatable(L, 1)) \ + luaL_error(L, "internal error in __" #NAME ": no metatable"); \ + \ + lua_getfield(L, -1, "__" #NAME "__"); \ + if(lua_isnil(L, -1)) \ + { \ + NIL_BEHAVIOR; \ + } \ + else \ + { \ + if(lua_isfunction(L, -1)) \ + { \ + lua_insert(L, 1); /* insert function */ \ + lua_pop(L, 1); /* remove metatable */ \ + lua_call(L, lua_gettop(L)-1, LUA_MULTRET); /* we return the result of the call */ \ + return lua_gettop(L); \ + } \ + /* we return the thing the user left in __tostring__ */ \ + } \ + return 0; \ + } + +MT_DECLARE_OPERATOR(tostring, + lua_pushstring(L, luaT_typename(L, 1)); + return 1;) +MT_DECLARE_OPERATOR(add, luaL_error(L, "%s has no addition operator", luaT_typename(L, 1))) +MT_DECLARE_OPERATOR(sub, luaL_error(L, "%s has no substraction operator", luaT_typename(L, 1))) +MT_DECLARE_OPERATOR(mul, luaL_error(L, "%s has no multiplication operator", luaT_typename(L, 1))) +MT_DECLARE_OPERATOR(div, luaL_error(L, "%s has no division operator", luaT_typename(L, 1))) +MT_DECLARE_OPERATOR(mod, luaL_error(L, "%s has no modulo operator", luaT_typename(L, 1))) +MT_DECLARE_OPERATOR(pow, luaL_error(L, "%s has no power operator", luaT_typename(L, 1))) +MT_DECLARE_OPERATOR(unm, luaL_error(L, "%s has no negation operator", luaT_typename(L, 1))) +MT_DECLARE_OPERATOR(concat, luaL_error(L, "%s has no concat operator", luaT_typename(L, 1))) +MT_DECLARE_OPERATOR(len, luaL_error(L, "%s has no length operator", luaT_typename(L, 1))) +MT_DECLARE_OPERATOR(eq, + lua_settop(L, 2); + lua_pushcfunction(L, luaT_lua_isequal); + lua_insert(L, 1); + lua_call(L, 2, 1); + return 1;) +MT_DECLARE_OPERATOR(lt, luaL_error(L, "%s has no lower than operator", luaT_typename(L, 1))) +MT_DECLARE_OPERATOR(le, luaL_error(L, "%s has no lower or equal than operator", luaT_typename(L, 1))) +MT_DECLARE_OPERATOR(call, luaL_error(L, "%s has no call operator", luaT_typename(L, 1))) + + +/* constructor metatable methods */ +int luaT_cmt__call(lua_State *L) +{ + if(!lua_istable(L, 1)) + luaL_error(L, "internal error in __call: not a constructor table"); + + if(!lua_getmetatable(L, 1)) + luaL_error(L, "internal error in __call: no metatable available"); + + lua_pushstring(L, "__new"); + lua_rawget(L, -2); + + if(lua_isnil(L, -1)) + luaL_error(L, "no constructor available"); + + lua_remove(L, 1); /* remove constructor atable */ + lua_insert(L, 1); /* insert constructor */ + lua_pop(L, 1); /* remove fancy metatable */ + + lua_call(L, lua_gettop(L)-1, LUA_MULTRET); + return lua_gettop(L); +} + +int luaT_cmt__newindex(lua_State *L) +{ + if(!lua_istable(L, 1)) + luaL_error(L, "internal error in __newindex: not a constructor table"); + + if(!lua_getmetatable(L, 1)) + luaL_error(L, "internal error in __newindex: no metatable available"); + + lua_pushstring(L, "__metatable"); + lua_rawget(L, -2); + + if(!lua_istable(L, -1)) + luaL_error(L, "internal error in __newindex: no metaclass available"); + + lua_insert(L, 2); + lua_pop(L, 1); /* remove the metatable over the constructor table */ + + lua_rawset(L, -3); + + return 0; +} + +/******************** deprecated functions ********************/ +int luaT_pushmetaclass(lua_State *L, const char *tname) +{ + return luaT_pushmetatable(L, tname); +} + +const char* luaT_id(lua_State *L, int ud) +{ + return luaT_typename(L, ud); +} + +const char* luaT_id2typename(lua_State *L, const char *id) +{ + return id; +} + +const char* luaT_typename2id(lua_State *L, const char *tname) +{ + return luaT_typenameid(L, tname); +} + +int luaT_getmetaclass(lua_State *L, int index) +{ + return lua_getmetatable(L, index); +} + +const char* luaT_checktypename2id(lua_State *L, const char *tname) +{ + const char* id = luaT_typenameid(L, tname); + if(!id) + luaL_error(L, "unknown class <%s>", tname); + return id; +} + +void luaT_registeratid(lua_State *L, const struct luaL_Reg *methods, const char *id) +{ + luaT_registeratname(L, methods, id); +} + +/**************************************************************/ diff --git a/nerv/luaT/luaT.h b/nerv/luaT/luaT.h new file mode 100644 index 0000000..5e8dd2f --- /dev/null +++ b/nerv/luaT/luaT.h @@ -0,0 +1,111 @@ +#ifndef LUAT_UTILS_INC +#define LUAT_UTILS_INC + +#ifdef __cplusplus +extern "C" { +#endif +#include <lua.h> +#include <lauxlib.h> +#ifdef __cplusplus +} +#endif + +#ifndef LUA_EXTERNC +# ifdef __cplusplus +# define LUA_EXTERNC extern "C" +# else +# define LUA_EXTERNC extern +# endif +#endif + +#ifdef _MSC_VER +# define DLL_EXPORT __declspec(dllexport) +# define DLL_IMPORT __declspec(dllimport) +# ifdef luaT_EXPORTS +# define LUAT_API LUA_EXTERNC DLL_EXPORT +# else +# define LUAT_API LUA_EXTERNC DLL_IMPORT +# endif +#else +# define DLL_EXPORT +# define DLL_IMPORT +# define LUAT_API LUA_EXTERNC +#endif + + +/* C functions */ + +LUAT_API void* luaT_alloc(lua_State *L, long size); +LUAT_API void* luaT_realloc(lua_State *L, void *ptr, long size); +LUAT_API void luaT_free(lua_State *L, void *ptr); + +LUAT_API const char* luaT_newmetatable(lua_State *L, const char *tname, const char *parenttname, + lua_CFunction constructor, lua_CFunction destructor, lua_CFunction factory); + +LUAT_API int luaT_pushmetatable(lua_State *L, const char *tname); + +LUAT_API const char* luaT_typenameid(lua_State *L, const char *tname); +LUAT_API const char* luaT_typename(lua_State *L, int ud); + +LUAT_API void luaT_pushudata(lua_State *L, void *udata, const char *tname); +LUAT_API void *luaT_toudata(lua_State *L, int ud, const char *tname); +LUAT_API int luaT_isudata(lua_State *L, int ud, const char *tname); +LUAT_API void *luaT_checkudata(lua_State *L, int ud, const char *tname); + +LUAT_API void *luaT_getfieldcheckudata(lua_State *L, int ud, const char *field, const char *tname); +LUAT_API void *luaT_getfieldchecklightudata(lua_State *L, int ud, const char *field); +LUAT_API double luaT_getfieldchecknumber(lua_State *L, int ud, const char *field); +LUAT_API int luaT_getfieldcheckint(lua_State *L, int ud, const char *field); +LUAT_API const char* luaT_getfieldcheckstring(lua_State *L, int ud, const char *field); +LUAT_API int luaT_getfieldcheckboolean(lua_State *L, int ud, const char *field); +LUAT_API void luaT_getfieldchecktable(lua_State *L, int ud, const char *field); + +LUAT_API int luaT_typerror(lua_State *L, int ud, const char *tname); + +LUAT_API int luaT_checkboolean(lua_State *L, int ud); +LUAT_API int luaT_optboolean(lua_State *L, int ud, int def); + +LUAT_API void luaT_registeratname(lua_State *L, const struct luaL_Reg *methods, const char *name); + +/* utility functions */ +LUAT_API const char *luaT_classrootname(const char *tname); +LUAT_API int luaT_classmodulename(const char *tname, char *module_name); + +/* debug */ +LUAT_API void luaT_stackdump(lua_State *L); + +/* Lua functions */ +LUAT_API int luaT_lua_newmetatable(lua_State *L); +LUAT_API int luaT_lua_factory(lua_State *L); +LUAT_API int luaT_lua_getconstructortable(lua_State *L); +LUAT_API int luaT_lua_typename(lua_State *L); +LUAT_API int luaT_lua_isequal(lua_State *L); +LUAT_API int luaT_lua_pointer(lua_State *L); +LUAT_API int luaT_lua_setenv(lua_State *L); +LUAT_API int luaT_lua_getenv(lua_State *L); +LUAT_API int luaT_lua_getmetatable(lua_State *L); +LUAT_API int luaT_lua_version(lua_State *L); +LUAT_API int luaT_lua_setmetatable(lua_State *L); +LUAT_API int luaT_lua_metatype(lua_State *L); +LUAT_API int luaT_lua_pushudata(lua_State *L); + +/* deprecated functions */ +/* ids have been replaced by string names to identify classes */ +/* comments show what function (that you should use) they call now */ +#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) +#define LUAT_DEPRECATED __attribute__((__deprecated__)) +#elif defined(_MSC_VER) +#define LUAT_DEPRECATED __declspec(deprecated) +#else +#define LUAT_DEPRECATED +#endif + +LUAT_API LUAT_DEPRECATED int luaT_pushmetaclass(lua_State *L, const char *tname); /* same as luaT_pushmetatable */ +LUAT_API LUAT_DEPRECATED const char* luaT_id(lua_State *L, int ud); /* same as luaT_typename */ +LUAT_API LUAT_DEPRECATED const char* luaT_id2typename(lua_State *L, const char *id); /* same as luaT_typenameid */ +LUAT_API LUAT_DEPRECATED const char* luaT_typename2id(lua_State *L, const char*); /* same as luaT_typenameid */ +LUAT_API LUAT_DEPRECATED int luaT_getmetaclass(lua_State *L, int index); /* same as luaT_getmetatable */ +LUAT_API LUAT_DEPRECATED const char* luaT_checktypename2id(lua_State *L, const char *tname); /* same as luaT_typenameid */ +LUAT_API LUAT_DEPRECATED void luaT_registeratid(lua_State *L, const struct luaL_Reg *methods, const char *id); /* same as luaT_registeratname */ + +#endif diff --git a/nerv/matrix/cuda_helper.h b/nerv/matrix/cuda_helper.h new file mode 100644 index 0000000..fde6f18 --- /dev/null +++ b/nerv/matrix/cuda_helper.h @@ -0,0 +1,75 @@ +#ifndef NERV_CUDA_HELPER_H +#define NERV_CUDA_HELPER_H +#include "cuda.h" +#include "cuda_runtime.h" +#include "driver_types.h" +#include "cublas_v2.h" +#define CUBLAS_SAFE_SYNC_CALL(call) \ + do { \ + cublasStatus_t err = (call); \ + if (err != CUBLAS_STATUS_SUCCESS) \ + nerv_error(L, "cumatrix cublas error: %s at %s:%d", \ + cublasGetErrorString(err), __FILE__, __LINE__); \ + cudaDeviceSynchronize(); \ + } while (0) + +#define CUDA_SAFE_CALL(call) \ + do { \ + cudaError_t err = (call); \ + if (err != cudaSuccess) \ + nerv_error(L, "cumatrix CUDA error: %s at %s:%d", \ + cudaGetErrorString(err), __FILE__, __LINE__); \ + } while (0) + +#define CUDA_SAFE_SYNC_CALL(call) \ + do { \ + CUDA_SAFE_CALL(call); \ + cudaDeviceSynchronize(); \ + } while (0) + +#define CHECK_SAME_DIMENSION(a, b) \ + do { \ + if (!(a->nrow == b->nrow && a->ncol == b->ncol)) \ + nerv_error(L, "matrices should be of the same dimension"); \ + } while (0) + +static const char *cublasGetErrorString(cublasStatus_t err) { + switch (err) + { + case CUBLAS_STATUS_SUCCESS: + return "CUBLAS_STATUS_SUCCESS"; + case CUBLAS_STATUS_NOT_INITIALIZED: + return "CUBLAS_STATUS_NOT_INITIALIZED"; + case CUBLAS_STATUS_ALLOC_FAILED: + return "CUBLAS_STATUS_ALLOC_FAILED"; + case CUBLAS_STATUS_INVALID_VALUE: + return "CUBLAS_STATUS_INVALID_VALUE"; + case CUBLAS_STATUS_ARCH_MISMATCH: + return "CUBLAS_STATUS_ARCH_MISMATCH"; + case CUBLAS_STATUS_MAPPING_ERROR: + return "CUBLAS_STATUS_MAPPING_ERROR"; + case CUBLAS_STATUS_EXECUTION_FAILED: + return "CUBLAS_STATUS_EXECUTION_FAILED"; + case CUBLAS_STATUS_INTERNAL_ERROR: + return "CUBLAS_STATUS_INTERNAL_ERROR"; +/* case CUBLAS_STATUS_NOT_SUPPORTED: + return "CUBLAS_STATUS_NOT_SUPPORTED"; + case CUBLAS_STATUS_LICENSE_ERROR: + return "CUBLAS_STATUS_LICENSE_ERROR"; */ + } + return "<unknown>"; +} + +#define PROFILE_START \ + do { \ + cudaEventRecord(profile_start, 0); +#define PROFILE_STOP \ + cudaEventRecord(profile_stop, 0); \ + cudaEventSynchronize(profile_stop); \ + float milliseconds = 0; \ + cudaEventElapsedTime(&milliseconds, profile_start, profile_stop); \ + accu_profile(__func__, milliseconds / 1000); \ + } while (0); + +#define PROFILE_END +#endif diff --git a/nerv/matrix/cukernel.cu b/nerv/matrix/cukernel.cu new file mode 100644 index 0000000..a19030a --- /dev/null +++ b/nerv/matrix/cukernel.cu @@ -0,0 +1,17 @@ +#define NERV_GENERIC_CUKERNEL + +#define cudak_(NAME) cudak_float_ ## NAME +#define MATRIX_USE_FLOAT +#include "generic/elem_type.h" +#include "generic/cukernel.cu" +#undef cudak_ +#undef MATRIX_USE_FLOAT +#undef MATRIX_ELEM +#undef MATRIX_ELEM_PTR +#undef MATRIX_ELEM_FMT +#undef MATRIX_ELEM_WRITE_FMT + +#define cudak_(NAME) cudak_double_ ## NAME +#define MATRIX_USE_DOUBLE +#include "generic/elem_type.h" +#include "generic/cukernel.cu" diff --git a/nerv/matrix/cukernel.h b/nerv/matrix/cukernel.h new file mode 100644 index 0000000..8a1494f --- /dev/null +++ b/nerv/matrix/cukernel.h @@ -0,0 +1,20 @@ +#ifdef NERV_GENERIC_CUKERNEL +void cudak_(cuda_mul_elem)(const Matrix *a, const Matrix *b, Matrix *c); +void cudak_(cuda_log_elem)(const Matrix *a, Matrix *b); +void cudak_(cuda_sigmoid)(const Matrix *a, Matrix *b); +void cudak_(cuda_sigmoid_grad)(const Matrix *output, const Matrix *err, Matrix *nerr); +void cudak_(cuda_rowsum)(const Matrix *a, Matrix *b); +void cudak_(cuda_rowmax)(const Matrix *a, Matrix *b); +void cudak_(cuda_rowmax_idx)(const Matrix *a, Matrix *b, Matrix *idx); +void cudak_(cuda_colsum)(const Matrix *a, Matrix *b); +void cudak_(cuda_colsame)(const Matrix *a, const Matrix *ref, Matrix *b); +void cudak_(cuda_softmax_denominator)(const Matrix *a, const Matrix *max, Matrix *b); +void cudak_(cuda_softmax_final)(const Matrix *a, const Matrix *max, const Matrix *deno, Matrix *b); +void cudak_(cuda_add_row)(const Matrix *a, Matrix *b, double beta); +void cudak_(cuda_fill)(Matrix *a, double val); +void cudak_(cuda_expand_frm)(const Matrix *a, Matrix *b, int context); +void cudak_(cuda_rearrange_frm)(const Matrix *a, Matrix *b, int step); +void cudak_(cuda_scale_rows_by_row)(const Matrix *a, Matrix *b); +void cudak_(cuda_scale_rows_by_col)(const Matrix *a, Matrix *b); +void cudak_(cuda_decompress)(const Matrix *a, Matrix *b); +#endif diff --git a/nerv/matrix/cumatrix.c b/nerv/matrix/cumatrix.c new file mode 100644 index 0000000..af34fb4 --- /dev/null +++ b/nerv/matrix/cumatrix.c @@ -0,0 +1,87 @@ +#define NERV_GENERIC_CUMATRIX +#include "../common.h" +#include "cuda_helper.h" +#include <string.h> +#define PROFILE_HASHMAP_SIZE 123457 +static cublasHandle_t cublas_handle; +static cudaEvent_t profile_start, profile_stop; +static HashMap *profile; + +static int print_profile(lua_State *L) { + (void)L; + size_t i; + fprintf(stderr, "*** [nerv cumatrix profile] **\n"); + for (i = 0; i < profile->size; i++) + { + HashNode *ptr; + for (ptr = profile->bucket[i]; ptr; ptr = ptr->next) + { + fprintf(stderr, "%s:\t%.6f\n", ptr->key, *(float *)ptr->val); + } + } + return 0; +} + +static int clear_profile(lua_State *L) { + (void)L; + hashmap_clear(profile); + return 0; +} + +void accu_profile(const char *name, float delta) { + float *val = hashmap_getval(profile, name); + if (!val) + { + val = malloc(sizeof(float)); + *val = 0; + hashmap_setval(profile, name, val); + } + *val += delta; +} + +static const luaL_Reg cumatrix_methods[] = { + {"print_profile", print_profile}, + {"clear_profile", clear_profile}, + {NULL, NULL} +}; + +extern void nerv_matrix_cuda_float_init(lua_State *L); +extern void nerv_matrix_cuda_double_init(lua_State *L); + +void nerv_cumatrix_init(lua_State *L) { + luaL_register(L, NULL, cumatrix_methods); + cublasCreate(&cublas_handle); + cudaEventCreate(&profile_start); + cudaEventCreate(&profile_stop); + profile = hashmap_create(PROFILE_HASHMAP_SIZE, bkdr_hash, strcmp); + nerv_matrix_cuda_float_init(L); + nerv_matrix_cuda_double_init(L); +} + +#define MATRIX_USE_FLOAT +#define cuda_matrix_(NAME) cuda_matrix_float_##NAME +#define nerv_matrix_(NAME) nerv_matrix_cuda_float_##NAME +#define cudak_(NAME) cudak_float_ ## NAME +#define NERV_CUBLAS_(NAME) cublasS##NAME +#define MATRIX_CUMATRIX_HOST_TNAME nerv_matrix_host_float_tname +const char *nerv_matrix_(tname) = "nerv.CuMatrixFloat"; +#include "generic/cumatrix.c" +#undef NERV_CUBLAS_ +#undef cudak_ +#undef nerv_matrix_ +#undef cuda_matrix_ +#undef MATRIX_USE_FLOAT +#undef MATRIX_ELEM +#undef MATRIX_ELEM_PTR +#undef MATRIX_ELEM_FMT +#undef MATRIX_ELEM_WRITE_FMT +#undef MATRIX_CUMATRIX_HOST_TNAME + +#define MATRIX_USE_DOUBLE +#define cuda_matrix_(NAME) cuda_matrix_double_##NAME +#define nerv_matrix_(NAME) nerv_matrix_cuda_double_##NAME +#define cudak_(NAME) cudak_double_ ## NAME +#define NERV_CUBLAS_(NAME) cublasD##NAME +#define MATRIX_CUMATRIX_HOST_TNAME nerv_matrix_host_double_tname +const char *nerv_matrix_(tname) = "nerv.CuMatrixDouble"; +#include "generic/cumatrix.c" diff --git a/nerv/matrix/generic/cukernel.cu b/nerv/matrix/generic/cukernel.cu new file mode 100644 index 0000000..d6c8adc --- /dev/null +++ b/nerv/matrix/generic/cukernel.cu @@ -0,0 +1,571 @@ +#ifdef NERV_GENERIC_CUKERNEL +#include <assert.h> +#include <stdio.h> +#include "matrix.h" +#include "cuda.h" +#include "float.h" +#define CUDA_THREADS_N 16 +#define CUDA_THREADS_NN ((CUDA_THREADS_N) * (CUDA_THREADS_N)) +#define CEIL_DIV(a, b) (((a) + (b) - 1) / (b)) +__global__ void cudak_(log_elem)(const MATRIX_ELEM *a, MATRIX_ELEM *b, + int nrow, int ncol, int stride) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + long idx; + MATRIX_ELEM tmp; + if (i >= nrow || j >= ncol) return; + idx = j + i * stride; + tmp = a[idx]; + if(tmp < FLT_MIN) tmp = FLT_MIN; + b[idx] = log(tmp); +} + +__global__ void cudak_(mul_elem)(const MATRIX_ELEM *a, const MATRIX_ELEM *b, + MATRIX_ELEM *c, + int nrow, int ncol, int stride) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + long idx; + if (i >= nrow || j >= ncol) return; + idx = j + i * stride; + c[idx] = a[idx] * b[idx]; +} + +__global__ void cudak_(sigmoid)(const MATRIX_ELEM *a, MATRIX_ELEM *b, + int nrow, int ncol, int stride) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + long idx; + if (i >= nrow || j >= ncol) return; + idx = j + i * stride; + b[idx] = 1.0 / (1.0 + exp(-a[idx])); +} + +__global__ void cudak_(sigmoid_grad)(const MATRIX_ELEM *output, + const MATRIX_ELEM *err, + MATRIX_ELEM *nerr, + int nrow, int ncol, int stride) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + long idx; + if (i >= nrow || j >= ncol) return; + idx = j + i * stride; + nerr[idx] = output[idx] * (1.0 - output[idx]) * err[idx]; +} + +__global__ void cudak_(softmax_final)(const MATRIX_ELEM *a, MATRIX_ELEM *b, + const MATRIX_ELEM *max, const MATRIX_ELEM *deno, + int nrow, int ncol, int stride, int mstride) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + long idx; + if (i >= nrow || j >= ncol) return; + idx = j + i * stride; + b[idx] = exp(a[idx] - max[0 + i * mstride]) / deno[0 + i * mstride]; +} + +__global__ void cudak_(block_reduce_rowsum)(const MATRIX_ELEM *input, + MATRIX_ELEM *output, + const int istride, const int ostride, + const int n) { + extern __shared__ MATRIX_ELEM cudak_(arr)[]; + int j = blockIdx.x * blockDim.x + threadIdx.x; + cudak_(arr)[threadIdx.x] = j < n ? input[j + istride * blockIdx.y] : 0; + __syncthreads(); + for (int offset = blockDim.x >> 1; offset; offset >>= 1) + { + if (threadIdx.x < offset) + cudak_(arr)[threadIdx.x] += cudak_(arr)[threadIdx.x + offset]; + __syncthreads(); + } + if (threadIdx.x == 0) + output[blockIdx.x + ostride * blockIdx.y] = cudak_(arr)[0]; +} + +__global__ void cudak_(block_reduce_colsum)(const MATRIX_ELEM *input, + MATRIX_ELEM *output, + const int istride, const int ostride, + const int n) { + extern __shared__ MATRIX_ELEM cudak_(arr)[]; + int i = blockIdx.y * blockDim.y + threadIdx.y; + cudak_(arr)[threadIdx.y] = i < n ? input[blockIdx.x + istride * i] : 0; + __syncthreads(); + for (int offset = blockDim.y >> 1; offset; offset >>= 1) + { + if (threadIdx.y < offset) + cudak_(arr)[threadIdx.y] += cudak_(arr)[threadIdx.y + offset]; + __syncthreads(); + } + if (threadIdx.y == 0) + output[blockIdx.x + ostride * blockIdx.y] = cudak_(arr)[0]; +} + +__global__ void cudak_(block_reduce_colsame)(const MATRIX_ELEM *input, + const MATRIX_ELEM *ref_input, + MATRIX_ELEM *output, + const int istride, const int ostride, + const int n) { + extern __shared__ MATRIX_ELEM cudak_(arr)[]; + int i = blockIdx.y * blockDim.y + threadIdx.y; + cudak_(arr)[threadIdx.y] = (i < n && input[blockIdx.x + istride * i] == \ + ref_input[blockIdx.x + istride * i]) ? 1.0 : 0; + __syncthreads(); + for (int offset = blockDim.y >> 1; offset; offset >>= 1) + { + if (threadIdx.y < offset) + cudak_(arr)[threadIdx.y] += cudak_(arr)[threadIdx.y + offset]; + __syncthreads(); + } + if (threadIdx.y == 0) + output[blockIdx.x + ostride * blockIdx.y] = cudak_(arr)[0]; +} + +__global__ void cudak_(block_reduce_softmax_rowsum)(const MATRIX_ELEM *input, + MATRIX_ELEM *output, + const MATRIX_ELEM *max, + const int istride, const int ostride, + const int mstride, const int n) { + extern __shared__ MATRIX_ELEM cudak_(arr)[]; + int j = blockIdx.x * blockDim.x + threadIdx.x; + cudak_(arr)[threadIdx.x] = j < n ? exp(input[j + istride * blockIdx.y] - \ + max[0 + mstride * blockIdx.y]) : 0; + __syncthreads(); + for (int offset = blockDim.x >> 1; offset; offset >>= 1) + { + if (threadIdx.x < offset) + cudak_(arr)[threadIdx.x] += cudak_(arr)[threadIdx.x + offset]; + __syncthreads(); + } + if (threadIdx.x == 0) + output[blockIdx.x + ostride * blockIdx.y] = cudak_(arr)[0]; +} + +__global__ void cudak_(block_reduce_rowmax)(const MATRIX_ELEM *input, + MATRIX_ELEM *output, + const int istride, const int ostride, + const int n) { + extern __shared__ MATRIX_ELEM cudak_(arr)[]; + int j = blockIdx.x * blockDim.x + threadIdx.x; + cudak_(arr)[threadIdx.x] = j < n ? input[j + istride * blockIdx.y] : -FLT_MAX; + __syncthreads(); + for (int offset = blockDim.x >> 1; offset; offset >>= 1) + { + if (threadIdx.x < offset) + { + MATRIX_ELEM l = cudak_(arr)[threadIdx.x], + r = cudak_(arr)[threadIdx.x + offset]; + if (r > l) + cudak_(arr)[threadIdx.x] = r; + } + __syncthreads(); + } + if (threadIdx.x == 0) + output[blockIdx.x + ostride * blockIdx.y] = cudak_(arr)[0]; +} + +__global__ void cudak_(block_reduce_rowmax_idx)(const MATRIX_ELEM *input, + const MATRIX_ELEM *idx_input, + MATRIX_ELEM *output, + MATRIX_ELEM *idx_output, + const int istride, const int ostride, + const int n) { + extern __shared__ MATRIX_ELEM cudak_(arr)[]; + MATRIX_ELEM *arr_val = cudak_(arr); + MATRIX_ELEM *arr_idx = arr_val + blockDim.x; + int j = blockIdx.x * blockDim.x + threadIdx.x; + arr_val[threadIdx.x] = j < n ? input[j + istride * blockIdx.y] : -FLT_MAX; + arr_idx[threadIdx.x] = j < n ? idx_input[j + istride * blockIdx.y] : 0; + __syncthreads(); + for (int offset = blockDim.x >> 1; offset; offset >>= 1) + { + if (threadIdx.x < offset) + { + MATRIX_ELEM l = arr_val[threadIdx.x], + r = arr_val[threadIdx.x + offset]; + if (r > l) + { + arr_val[threadIdx.x] = r; + arr_idx[threadIdx.x] = arr_idx[threadIdx.x + offset]; + } + } + __syncthreads(); + } + if (threadIdx.x == 0) + { + output[blockIdx.x + ostride * blockIdx.y] = arr_val[0]; + idx_output[blockIdx.x + ostride * blockIdx.y] = arr_idx[0]; + } +} + +__global__ void cudak_(add_row)(const MATRIX_ELEM *a, MATRIX_ELEM *b, + int nrow, int ncol, int stride, double beta) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + if (i >= nrow || j >= ncol) return; + b[j + i * stride] += beta * a[j]; +} + +__global__ void cudak_(fill)(MATRIX_ELEM *a, + int nrow, int ncol, int stride, double val) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + if (i >= nrow || j >= ncol) return; + a[j + i * stride] = val; +} + +__global__ void cudak_(expand_frm)(const MATRIX_ELEM *a, MATRIX_ELEM *b, + int nrow, int ncol, + int enrow, int encol, + int stride, int estride, + int context) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + int ridx; + if (i >= enrow || j >= encol) return; + ridx = i + j / ncol - context; + if (ridx < 0) ridx = 0; + else if (ridx >= nrow) ridx = nrow - 1; + b[j + i * estride] = a[j % ncol + ridx * stride]; +} + +__global__ void cudak_(rearrange_frm)(const MATRIX_ELEM *a, MATRIX_ELEM *b, + int nrow, int ncol, + int stride, int step, int orig_dim) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + if (i >= nrow || j >= ncol) return; + b[j + i * stride] = a[j / step + (j % step) * orig_dim + i * stride]; +} + +__global__ void cudak_(scale_rows_by_col)(const MATRIX_ELEM *a, MATRIX_ELEM *b, + int nrow, int ncol, + int astride, int bstride) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + if (i >= nrow || j >= ncol) return; + b[j + i * bstride] *= a[i * astride]; +} + +__global__ void cudak_(scale_rows_by_row)(const MATRIX_ELEM *a, MATRIX_ELEM *b, + int nrow, int ncol, + int stride) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + if (i >= nrow || j >= ncol) return; + b[j + i * stride] *= a[j]; +} + +__global__ void cudak_(decompress)(const MATRIX_ELEM *a, MATRIX_ELEM *b, + int nrow, int ncol, + int stride_a, int stride_b) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + if (i >= nrow || j >= ncol) return; + b[lrintf(a[j + i * stride_a]) + i * stride_b] = 1.0; +} + +__global__ void cudak_(gen_col_idx)(MATRIX_ELEM *b, + int nrow, int ncol, int stride) { + int j = blockIdx.x * blockDim.x + threadIdx.x; + int i = blockIdx.y * blockDim.y + threadIdx.y; + if (i >= nrow || j >= ncol) return; + b[j + i * stride] = j; +} + +extern "C" { +#include "../cukernel.h" + void cudak_(cuda_log_elem)(const Matrix *a, Matrix *b) { + dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), + CEIL_DIV(b->nrow, threadsPerBlock.y)); + cudak_(log_elem)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), + b->nrow, b->ncol, b->stride / sizeof(MATRIX_ELEM)); + cudaStreamSynchronize(0); + } + + void cudak_(cuda_mul_elem)(const Matrix *a, const Matrix *b, + Matrix *c) { + dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), + CEIL_DIV(b->nrow, threadsPerBlock.y)); + cudak_(mul_elem)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), + MATRIX_ELEM_PTR(c), + b->nrow, b->ncol, b->stride / sizeof(MATRIX_ELEM)); + cudaStreamSynchronize(0); + } + + void cudak_(cuda_sigmoid)(const Matrix *a, Matrix *b) { + dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), + CEIL_DIV(b->nrow, threadsPerBlock.y)); + cudak_(sigmoid)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), b->nrow, b->ncol, + b->stride / sizeof(MATRIX_ELEM)); + cudaStreamSynchronize(0); + } + + void cudak_(cuda_sigmoid_grad)(const Matrix *output, + const Matrix *err, Matrix *nerr) { + dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(nerr->ncol, threadsPerBlock.x), + CEIL_DIV(nerr->nrow, threadsPerBlock.y)); + cudak_(sigmoid_grad)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(output), MATRIX_ELEM_PTR(err), + MATRIX_ELEM_PTR(nerr), + nerr->nrow, nerr->ncol, + nerr->stride / sizeof(MATRIX_ELEM)); + cudaStreamSynchronize(0); + } + + void cudak_(cuda_rowsum)(const Matrix *a, Matrix *b) { + dim3 block(CUDA_THREADS_NN, 1); + int ncol = a->ncol; + int blocks_per_row = CEIL_DIV(ncol, block.x); + dim3 grid(blocks_per_row, a->nrow); + MATRIX_ELEM *res; + size_t stride; + cudaMallocPitch(&res, &stride, blocks_per_row * sizeof(MATRIX_ELEM), a->nrow); + cudak_(block_reduce_rowsum)<<<grid, block, block.x * sizeof(MATRIX_ELEM)>>> \ + (MATRIX_ELEM_PTR(a), res, + a->stride / sizeof(MATRIX_ELEM), stride / sizeof(MATRIX_ELEM), + ncol); + ncol = blocks_per_row; + assert((unsigned long)ncol <= block.x); + grid.x = 1; + cudaStreamSynchronize(0); + cudak_(block_reduce_rowsum)<<<grid, block, block.x * sizeof(MATRIX_ELEM)>>> \ + (res, MATRIX_ELEM_PTR(b), + stride / sizeof(MATRIX_ELEM), b->stride / sizeof(MATRIX_ELEM), + ncol); + cudaStreamSynchronize(0); + cudaFree(res); + } + + void cudak_(cuda_colsame)(const Matrix *a, const Matrix *ref, Matrix *b) { + dim3 block(1, CUDA_THREADS_NN); + int nrow = a->nrow; + int blocks_per_col = CEIL_DIV(nrow, block.y); + dim3 grid(a->ncol, blocks_per_col); + MATRIX_ELEM *res; + size_t stride; + cudaMallocPitch(&res, &stride, a->ncol * sizeof(MATRIX_ELEM), blocks_per_col); + cudak_(block_reduce_colsame)<<<grid, block, block.y * sizeof(MATRIX_ELEM)>>> \ + (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(ref), res, + a->stride / sizeof(MATRIX_ELEM), stride / sizeof(MATRIX_ELEM), + nrow); + nrow = blocks_per_col; + assert((unsigned long)nrow <= block.y); + grid.y = 1; + cudaStreamSynchronize(0); + cudak_(block_reduce_colsum)<<<grid, block, block.y * sizeof(MATRIX_ELEM)>>> \ + (res, MATRIX_ELEM_PTR(b), + stride / sizeof(MATRIX_ELEM), b->stride / sizeof(MATRIX_ELEM), + nrow); + cudaStreamSynchronize(0); + cudaFree(res); + } + + void cudak_(cuda_colsum)(const Matrix *a, Matrix *b) { + dim3 block(1, CUDA_THREADS_NN); + int nrow = a->nrow; + int blocks_per_col = CEIL_DIV(nrow, block.y); + dim3 grid(a->ncol, blocks_per_col); + MATRIX_ELEM *res; + size_t stride; + cudaMallocPitch(&res, &stride, a->ncol * sizeof(MATRIX_ELEM), blocks_per_col); + cudak_(block_reduce_colsum)<<<grid, block, block.y * sizeof(MATRIX_ELEM)>>> \ + (MATRIX_ELEM_PTR(a), res, + a->stride / sizeof(MATRIX_ELEM), stride / sizeof(MATRIX_ELEM), + nrow); + nrow = blocks_per_col; + assert((unsigned long)nrow <= block.y); + grid.y = 1; + cudaStreamSynchronize(0); + cudak_(block_reduce_colsum)<<<grid, block, block.y * sizeof(MATRIX_ELEM)>>> \ + (res, MATRIX_ELEM_PTR(b), + stride / sizeof(MATRIX_ELEM), b->stride / sizeof(MATRIX_ELEM), + nrow); + cudaStreamSynchronize(0); + cudaFree(res); + } + + void cudak_(cuda_softmax_final)(const Matrix *a, const Matrix *max, + const Matrix *deno, Matrix *b) { + dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), + CEIL_DIV(b->nrow, threadsPerBlock.y)); + cudak_(softmax_final)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), + MATRIX_ELEM_PTR(max), MATRIX_ELEM_PTR(deno), + b->nrow, b->ncol, + b->stride / sizeof(MATRIX_ELEM), + max->stride / sizeof(MATRIX_ELEM)); + cudaStreamSynchronize(0); + } + + void cudak_(cuda_softmax_denominator)(const Matrix *a, const Matrix *max, Matrix *b) { + dim3 block(CUDA_THREADS_NN, 1); + int ncol = a->ncol; + int blocks_per_row = CEIL_DIV(ncol, block.x); + dim3 grid(blocks_per_row, a->nrow); + MATRIX_ELEM *res; + size_t stride; + assert(max->ncol == 1); + cudaMallocPitch(&res, &stride, blocks_per_row * sizeof(MATRIX_ELEM), a->nrow); + cudak_(block_reduce_softmax_rowsum) \ + <<<grid, block, block.x * sizeof(MATRIX_ELEM)>>> \ + (MATRIX_ELEM_PTR(a), res, MATRIX_ELEM_PTR(max), + a->stride / sizeof(MATRIX_ELEM), stride / sizeof(MATRIX_ELEM), + max->stride / sizeof(MATRIX_ELEM), + ncol); + ncol = blocks_per_row; + assert((unsigned long)ncol <= block.x); + grid.x = 1; + cudaStreamSynchronize(0); + cudak_(block_reduce_rowsum) \ + <<<grid, block, block.x * sizeof(MATRIX_ELEM)>>> \ + (res, MATRIX_ELEM_PTR(b), + stride / sizeof(MATRIX_ELEM), b->stride / sizeof(MATRIX_ELEM), + ncol); + cudaStreamSynchronize(0); + cudaFree(res); + } + + void cudak_(cuda_rowmax)(const Matrix *a, Matrix *b) { + dim3 block(CUDA_THREADS_NN, 1); + int ncol = a->ncol; + int blocks_per_row = CEIL_DIV(ncol, block.x); + dim3 grid(blocks_per_row, a->nrow); + MATRIX_ELEM *res; + size_t stride; + cudaMallocPitch(&res, &stride, blocks_per_row * sizeof(MATRIX_ELEM), a->nrow); + cudak_(block_reduce_rowmax)<<<grid, block, block.x * sizeof(MATRIX_ELEM)>>> \ + (MATRIX_ELEM_PTR(a), res, + a->stride / sizeof(MATRIX_ELEM), stride / sizeof(MATRIX_ELEM), + ncol); + ncol = blocks_per_row; + assert((unsigned long)ncol <= block.x); + grid.x = 1; + cudaStreamSynchronize(0); + cudak_(block_reduce_rowmax)<<<grid, block, block.x * sizeof(MATRIX_ELEM)>>> \ + (res, MATRIX_ELEM_PTR(b), + stride / sizeof(MATRIX_ELEM), b->stride / sizeof(MATRIX_ELEM), + ncol); + cudaStreamSynchronize(0); + cudaFree(res); + } + + void cudak_(cuda_rowmax_idx)(const Matrix *a, Matrix *b, Matrix *b_idx) { + dim3 block(CUDA_THREADS_NN, 1); + int ncol = a->ncol; + int blocks_per_row = CEIL_DIV(ncol, block.x); + dim3 grid(blocks_per_row, a->nrow); + MATRIX_ELEM *a_idx, *res, *res_idx; + size_t stride; + cudaMallocPitch(&a_idx, &stride, a->stride, a->nrow); + cudak_(gen_col_idx)<<<grid, block>>>(a_idx, a->nrow, ncol, stride / sizeof(MATRIX_ELEM)); + cudaMallocPitch(&res, &stride, blocks_per_row * sizeof(MATRIX_ELEM), a->nrow); + cudaMallocPitch(&res_idx, &stride, blocks_per_row * sizeof(MATRIX_ELEM), a->nrow); + cudaStreamSynchronize(0); + cudak_(block_reduce_rowmax_idx)<<<grid, block, + 2 * block.x * sizeof(MATRIX_ELEM)>>> \ + (MATRIX_ELEM_PTR(a), a_idx, res, res_idx, + a->stride / sizeof(MATRIX_ELEM), stride / sizeof(MATRIX_ELEM), + ncol); + ncol = blocks_per_row; + assert((unsigned long)ncol <= block.x); + grid.x = 1; + cudaStreamSynchronize(0); + cudak_(block_reduce_rowmax_idx)<<<grid, block, + 2 * block.x * sizeof(MATRIX_ELEM)>>> \ + (res, res_idx, MATRIX_ELEM_PTR(b), MATRIX_ELEM_PTR(b_idx), + stride / sizeof(MATRIX_ELEM), b->stride / sizeof(MATRIX_ELEM), + ncol); + cudaStreamSynchronize(0); + cudaFree(a_idx); + cudaFree(res); + cudaFree(res_idx); + } + + /* in-place calc */ + void cudak_(cuda_add_row)(const Matrix *a, Matrix *b, double beta) { + dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), + CEIL_DIV(b->nrow, threadsPerBlock.y)); + cudak_(add_row)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), b->nrow, b->ncol, + b->stride / sizeof(MATRIX_ELEM), beta); + cudaStreamSynchronize(0); + } + + void cudak_(cuda_fill)(Matrix *a, double val) { + dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(a->ncol, threadsPerBlock.x), + CEIL_DIV(a->nrow, threadsPerBlock.y)); + cudak_(fill)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(a), a->nrow, a->ncol, + a->stride / sizeof(MATRIX_ELEM), val); + cudaStreamSynchronize(0); + } + + void cudak_(cuda_expand_frm)(const Matrix *a, Matrix *b, int context) { + dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), + CEIL_DIV(b->nrow, threadsPerBlock.y)); + cudak_(expand_frm)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), + a->nrow, a->ncol, + b->nrow, b->ncol, + a->stride / sizeof(MATRIX_ELEM), + b->stride / sizeof(MATRIX_ELEM), + context); + cudaStreamSynchronize(0); + } + + void cudak_(cuda_rearrange_frm)(const Matrix *a, Matrix *b, int step) { + dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), + CEIL_DIV(b->nrow, threadsPerBlock.y)); + cudak_(rearrange_frm)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), + b->nrow, b->ncol, b->stride / sizeof(MATRIX_ELEM), + step, b->ncol / step); + cudaStreamSynchronize(0); + } + + void cudak_(cuda_scale_rows_by_col)(const Matrix *a, Matrix *b) { + dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), + CEIL_DIV(b->nrow, threadsPerBlock.y)); + cudak_(scale_rows_by_col)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), + b->nrow, b->ncol, + a->stride / sizeof(MATRIX_ELEM), + b->stride / sizeof(MATRIX_ELEM)); + cudaStreamSynchronize(0); + } + + void cudak_(cuda_scale_rows_by_row)(const Matrix *a, Matrix *b) { + dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N); + dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x), + CEIL_DIV(b->nrow, threadsPerBlock.y)); + cudak_(scale_rows_by_row)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), + b->nrow, b->ncol, b->stride / sizeof(MATRIX_ELEM)); + cudaStreamSynchronize(0); + } + + void cudak_(cuda_decompress)(const Matrix *a, Matrix *b) { + dim3 threadsPerBlock(1, CUDA_THREADS_NN); + dim3 numBlocks(1, CEIL_DIV(a->nrow, threadsPerBlock.y)); + cudak_(decompress)<<<numBlocks, threadsPerBlock>>> \ + (MATRIX_ELEM_PTR(a), MATRIX_ELEM_PTR(b), + a->nrow, a->ncol, + a->stride / sizeof(MATRIX_ELEM), + b->stride / sizeof(MATRIX_ELEM)); + cudaStreamSynchronize(0); + } +} +#endif diff --git a/nerv/matrix/generic/cumatrix.c b/nerv/matrix/generic/cumatrix.c new file mode 100644 index 0000000..b5d1a35 --- /dev/null +++ b/nerv/matrix/generic/cumatrix.c @@ -0,0 +1,493 @@ +#ifdef NERV_GENERIC_CUMATRIX +#include "matrix.h" +#include "elem_type.h" + +#define MATRIX_DATA_FREE(L, ptr) cuda_matrix_(free)(L, ptr) +#define MATRIX_DATA_ALLOC(L, dptr, stride, width, height) \ + cuda_matrix_(alloc)(L, dptr, stride, width, height) +#define MATRIX_DATA_WRITE(L, data, idx, val) cuda_matrix_(write)(L, data, idx, val) +#define MATRIX_DATA_READ(L, data, idx) cuda_matrix_(read)(L, data, idx) +#define MATRIX_INIT(L) cuda_matrix_(init)(L) +#define MATRIX_BASE_TNAME nerv_matrix_cuda_tname +#define NERV_GENERIC_MATRIX +#define NERV_GENERIC_CUKERNEL +#include "../../common.h" +#include "../cukernel.h" +#include "../cuda_helper.h" + +Matrix *nerv_matrix_(new_)(lua_State *L, long nrow, long ncol); +void nerv_matrix_(data_free)(lua_State *L, Matrix *self); + +static void nerv_matrix_(add_)(lua_State *L, const Matrix *a, const Matrix *b, + const Matrix *c, + MATRIX_ELEM alpha, MATRIX_ELEM beta) { + PROFILE_START + CUBLAS_SAFE_SYNC_CALL( + NERV_CUBLAS_(geam)(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N, + a->ncol, a->nrow, + &alpha, + MATRIX_ELEM_PTR(a), a->stride / sizeof(MATRIX_ELEM), + &beta, + MATRIX_ELEM_PTR(b), b->stride / sizeof(MATRIX_ELEM), + MATRIX_ELEM_PTR(c), c->stride / sizeof(MATRIX_ELEM))); + PROFILE_STOP +} + +static int nerv_matrix_(add)(lua_State *L) { + Matrix *c = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 3, nerv_matrix_(tname)); + MATRIX_ELEM alpha = luaL_checknumber(L, 4); + MATRIX_ELEM beta = luaL_checknumber(L, 5); + CHECK_SAME_DIMENSION(a, b); + CHECK_SAME_DIMENSION(a, c); + nerv_matrix_(add_)(L, a, b, c, alpha, beta); + return 0; +} + +static int nerv_matrix_(get_cublas_op)(char ch) { + return (ch == 'T' || ch == 't') ? CUBLAS_OP_T : CUBLAS_OP_N; +} + +static int nerv_matrix_(mul)(lua_State *L) { +#define SWAP(a, b) \ + do { int t = (a); (a) = (b); (b) = t; } while (0) + + Matrix *c = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 3, nerv_matrix_(tname)); + MATRIX_ELEM alpha = luaL_checknumber(L, 4); + MATRIX_ELEM beta = luaL_checknumber(L, 5); + int nargs = lua_gettop(L); + int ta = nargs > 5 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 6)) \ + : CUBLAS_OP_N; + int tb = nargs > 6 ? nerv_matrix_(get_cublas_op)(*luaL_checkstring(L, 7)) \ + : CUBLAS_OP_N; + int am = a->nrow, an = a->ncol; + int bm = b->nrow, bn = b->ncol; + if (ta == CUBLAS_OP_T) SWAP(am, an); + if (tb == CUBLAS_OP_T) SWAP(bm, bn); + if (an != bm) + nerv_error(L, "Wrong dimension of multipliers"); +/* MATRIX_ELEM alpha = 1.0f, beta = 0.0f; */ + /* Because matrix in Nerv is row-major, here b comes first */ + PROFILE_START + CUBLAS_SAFE_SYNC_CALL( + NERV_CUBLAS_(gemm)(cublas_handle, tb, ta, + bn, am, bm, + &alpha, + MATRIX_ELEM_PTR(b), b->stride / sizeof(MATRIX_ELEM), + MATRIX_ELEM_PTR(a), a->stride / sizeof(MATRIX_ELEM), + &beta, + MATRIX_ELEM_PTR(c), c->stride / sizeof(MATRIX_ELEM))); + PROFILE_STOP + return 0; +} + +static int nerv_matrix_(create)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = nerv_matrix_(new_)(L, a->nrow, a->ncol); + luaT_pushudata(L, b, nerv_matrix_(tname)); + return 1; +} + +static int nerv_matrix_(sigmoid)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname)); + CHECK_SAME_DIMENSION(a, b); + PROFILE_START + cudak_(cuda_sigmoid)(b, a); + PROFILE_STOP + return 0; +} + +static int nerv_matrix_(sigmoid_grad)(lua_State *L) { + Matrix *nerr = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *err = luaT_checkudata(L, 2, nerv_matrix_(tname)); + Matrix *output = luaT_checkudata(L, 3, nerv_matrix_(tname)); + CHECK_SAME_DIMENSION(nerr, err); + CHECK_SAME_DIMENSION(nerr, output); + PROFILE_START + cudak_(cuda_sigmoid_grad)(output, err, nerr); + PROFILE_STOP + return 0; +} + +static int nerv_matrix_(softmax)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *max, *max_idx; + Matrix *dno; + CHECK_SAME_DIMENSION(a, b); + max = nerv_matrix_(new_)(L, a->nrow, 1); + max_idx = nerv_matrix_(new_)(L, a->nrow, 1); + dno = nerv_matrix_(new_)(L, a->nrow, 1); + PROFILE_START + cudak_(cuda_rowmax_idx)(a, max, max_idx); + cudak_(cuda_softmax_denominator)(a, max, dno); + cudak_(cuda_softmax_final)(a, max, dno, b); + PROFILE_STOP + nerv_matrix_(data_free)(L, max); + nerv_matrix_(data_free)(L, dno); + luaT_pushudata(L, max_idx, nerv_matrix_(tname)); + return 1; +} + +static int nerv_matrix_(rowsum)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = nerv_matrix_(new_)(L, a->nrow, 1); + PROFILE_START + cudak_(cuda_rowsum)(a, b); + PROFILE_STOP + luaT_pushudata(L, b, nerv_matrix_(tname)); + return 1; +} + +static int nerv_matrix_(colsum)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = nerv_matrix_(new_)(L, 1, a->ncol); + PROFILE_START + cudak_(cuda_colsum)(a, b); + PROFILE_STOP + luaT_pushudata(L, b, nerv_matrix_(tname)); + return 1; +} + +static int nerv_matrix_(colsame)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *ref = luaT_checkudata(L, 2, nerv_matrix_(tname)); + Matrix *b = nerv_matrix_(new_)(L, 1, a->ncol); + CHECK_SAME_DIMENSION(a, ref); + PROFILE_START + cudak_(cuda_colsame)(a, ref, b); + PROFILE_STOP + luaT_pushudata(L, b, nerv_matrix_(tname)); + return 1; +} + +static int nerv_matrix_(rowmax)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = nerv_matrix_(new_)(L, a->nrow, 1); + PROFILE_START + cudak_(cuda_rowmax)(a, b); + PROFILE_STOP + luaT_pushudata(L, b, nerv_matrix_(tname)); + return 1; +} + +static int nerv_matrix_(rowmax_idx)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = nerv_matrix_(new_)(L, a->nrow, 1); + Matrix *idx = nerv_matrix_(new_)(L, a->nrow, 1); + PROFILE_START + cudak_(cuda_rowmax_idx)(a, b, idx); + PROFILE_STOP + luaT_pushudata(L, b, nerv_matrix_(tname)); + luaT_pushudata(L, idx, nerv_matrix_(tname)); + return 2; +} + +static int nerv_matrix_(add_row)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 1, nerv_matrix_(tname)); + double beta = luaL_checknumber(L, 3); + if (a->ncol != b->ncol) + nerv_error(L, "the number of columns is not the same"); + if (a->nrow != 1) + nerv_error(L, "a row vector is expected"); + PROFILE_START + cudak_(cuda_add_row)(a, b, beta); + PROFILE_STOP + return 0; +} + +static int nerv_matrix_(fill)(lua_State *L) { + Matrix *self = luaT_checkudata(L, 1, nerv_matrix_(tname)); + double val = luaL_checknumber(L, 2); + PROFILE_START + cudak_(cuda_fill)(self, val); + PROFILE_STOP + return 0; +} + +static int nerv_matrix_(copy_fromd)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname)); + int nargs = lua_gettop(L); + int b_begin = nargs > 2 ? luaL_checkinteger(L, 3) : 0; + int b_end = nargs > 3 ? luaL_checkinteger(L, 4) : b->nrow; + int a_begin = nargs > 4 ? luaL_checkinteger(L, 5) : 0; + if (!(0 <= b_begin && b_begin < b_end && b_end <= b->nrow && + a_begin + b_end - b_begin <= a->nrow)) + nerv_error(L, "invalid copy interval"); + if (a->ncol != b->ncol) + nerv_error(L, "matrices should be of the same dimension"); + PROFILE_START + CUDA_SAFE_SYNC_CALL( + cudaMemcpy2D(MATRIX_ROW_PTR(a, a_begin), a->stride, + MATRIX_ROW_PTR(b, b_begin), b->stride, + sizeof(MATRIX_ELEM) * b->ncol, b_end - b_begin, + cudaMemcpyDeviceToDevice)); + PROFILE_STOP + return 0; +} + +extern const char *MATRIX_CUMATRIX_HOST_TNAME; +static int nerv_matrix_(copy_fromh)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 2, MATRIX_CUMATRIX_HOST_TNAME); + int nargs = lua_gettop(L); + int b_begin = nargs > 2 ? luaL_checkinteger(L, 3) : 0; + int b_end = nargs > 3 ? luaL_checkinteger(L, 4) : b->nrow; + int a_begin = nargs > 4 ? luaL_checkinteger(L, 5) : 0; + if (!(0 <= b_begin && b_begin < b_end && b_end <= b->nrow && + a_begin + b_end - b_begin <= a->nrow)) + nerv_error(L, "invalid copy interval"); + if (a->ncol != b->ncol) + nerv_error(L, "matrices should be of the same dimension"); + PROFILE_START + CUDA_SAFE_SYNC_CALL( + cudaMemcpy2D(MATRIX_ROW_PTR(a, a_begin), a->stride, + MATRIX_ROW_PTR(b, b_begin), b->stride, + sizeof(MATRIX_ELEM) * b->ncol, b_end - b_begin, + cudaMemcpyHostToDevice)); + PROFILE_STOP + return 0; +} + +static int nerv_matrix_(copy_toh)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 2, MATRIX_CUMATRIX_HOST_TNAME); + int nargs = lua_gettop(L); + int a_begin = nargs > 2 ? luaL_checkinteger(L, 3) : 0; + int a_end = nargs > 3 ? luaL_checkinteger(L, 4) : a->nrow; + int b_begin = nargs > 4 ? luaL_checkinteger(L, 5) : 0; + if (!(0 <= a_begin && a_begin < a_end && a_end <= a->nrow && + b_begin + a_end - a_begin <= b->nrow)) + nerv_error(L, "invalid copy interval"); + if (b->ncol != a->ncol) + nerv_error(L, "matrices should be of the same dimension"); + PROFILE_START + CUDA_SAFE_SYNC_CALL( + cudaMemcpy2D(MATRIX_ROW_PTR(b, b_begin), b->stride, + MATRIX_ROW_PTR(a, a_begin), a->stride, + sizeof(MATRIX_ELEM) * a->ncol, a_end - a_begin, + cudaMemcpyDeviceToHost)); + PROFILE_STOP + return 0; +} + +static int nerv_matrix_(trans)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = nerv_matrix_(new_)(L, a->ncol, a->nrow); + MATRIX_ELEM alpha = 1, beta = 0; + /* FIXME: possible memory leak when lua error is raised */ + PROFILE_START + CUBLAS_SAFE_SYNC_CALL( + NERV_CUBLAS_(geam)(cublas_handle, CUBLAS_OP_T, CUBLAS_OP_T, + a->nrow, a->ncol, + &alpha, + MATRIX_ELEM_PTR(a), a->stride / sizeof(MATRIX_ELEM), + &beta, + MATRIX_ELEM_PTR(a), a->stride / sizeof(MATRIX_ELEM), + MATRIX_ELEM_PTR(b), b->stride / sizeof(MATRIX_ELEM))); + PROFILE_STOP + luaT_pushudata(L, b, nerv_matrix_(tname)); + return 1; +} + +static int nerv_matrix_(mul_elem)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 3, nerv_matrix_(tname)); + Matrix *c = luaT_checkudata(L, 1, nerv_matrix_(tname)); + CHECK_SAME_DIMENSION(a, b); + CHECK_SAME_DIMENSION(a, c); + PROFILE_START + cudak_(cuda_mul_elem)(a, b, c); + PROFILE_STOP + return 0; +} + +static int nerv_matrix_(log_elem)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 2, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 1, nerv_matrix_(tname)); + CHECK_SAME_DIMENSION(a, b); + PROFILE_START + cudak_(cuda_log_elem)(a, b); + PROFILE_STOP + return 0; +} + +static int nerv_matrix_(decompress)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b; + int orig_col = luaL_checkinteger(L, 2); + if (a->ncol != 1) + nerv_error(L, "the compressed matrix must be a column vector"); + b = nerv_matrix_(new_)(L, a->nrow, orig_col); + PROFILE_START + cudak_(cuda_fill)(b, 0.0); + cudak_(cuda_decompress)(a, b); + PROFILE_STOP + luaT_pushudata(L, b, nerv_matrix_(tname)); + return 1; +} + +extern const char *nerv_matrix_host_int_tname; +static int nerv_matrix_(copy_rows_fromh_by_idx)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 2, MATRIX_CUMATRIX_HOST_TNAME); + Matrix *idx = luaT_checkudata(L, 3, nerv_matrix_host_int_tname); + long nrow = a->nrow; + int b_begin = lua_gettop(L) > 3 ? luaL_checkinteger(L, 4) : 0; + if (!(0 <= b_begin && b_begin + nrow <= idx->ncol)) + nerv_error(L, "invalid copy interval"); + long *idx_ptr = idx->data.i; + int i; + if (idx->nrow != 1) + nerv_error(L, "index should be a vector"); + if (a->ncol != b->ncol) + nerv_error(L, "source/destination dimension mismatch"); + cudaStream_t *streams = (cudaStream_t*)malloc(sizeof(cudaStream_t) * nrow); + for (i = 0; i < nrow; i++) + { + int src_row = idx_ptr[b_begin + i]; + if (!(0 <= src_row && src_row < b->nrow)) + nerv_error(L, "invalid index"); + CUDA_SAFE_CALL(cudaStreamCreate(streams + i)); + CUDA_SAFE_CALL(cudaMemcpyAsync(MATRIX_ROW_PTR(a, i), + MATRIX_ROW_PTR(b, src_row), + b->stride, + cudaMemcpyHostToDevice, streams[i])); + } + for (i = 0; i < nrow; i++) + { + CUDA_SAFE_CALL(cudaStreamSynchronize(streams[i])); + CUDA_SAFE_CALL(cudaStreamDestroy(streams[i])); + } + free(streams); + return 0; +} + +static int nerv_matrix_(expand_frm)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname)); + int context = luaL_checkinteger(L, 3); + if (a->nrow != b->nrow) + nerv_error(L, "mismatching number of frames"); + if (a->ncol != b->ncol * (context * 2 + 1)) + nerv_error(L, "the width should be 2 * context + 1"); + PROFILE_START + cudak_(cuda_expand_frm)(b, a, context); + PROFILE_STOP + return 0; +} + +static int nerv_matrix_(rearrange_frm)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname)); + int step = luaL_checkinteger(L, 3); + CHECK_SAME_DIMENSION(a, b); + if (b->ncol % step) + nerv_error(L, "the dimension of columns is not divisible by step"); + PROFILE_START + cudak_(cuda_rearrange_frm)(b, a, step); + PROFILE_STOP + return 0; +} + +static int nerv_matrix_(scale_rows_by_col)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname)); + if (a->nrow != b->nrow) + nerv_error(L, "the number of rows is not the same"); + if (b->ncol != 1) + nerv_error(L, "a column vector is expected"); + PROFILE_START + cudak_(cuda_scale_rows_by_col)(b, a); + PROFILE_STOP + return 0; +} + +static int nerv_matrix_(scale_rows_by_row)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname)); + if (a->ncol != b->ncol) + nerv_error(L, "the number of columns is not the same"); + if (b->nrow != 1) + nerv_error(L, "a row vector is expected"); + PROFILE_START + cudak_(cuda_scale_rows_by_row)(b, a); + PROFILE_STOP + return 0; +} + +static const luaL_Reg nerv_matrix_(extra_methods)[] = { + {"create", nerv_matrix_(create)}, + {"colsum", nerv_matrix_(colsum)}, + {"colsame", nerv_matrix_(colsame)}, + {"rowsum", nerv_matrix_(rowsum)}, + {"rowmax", nerv_matrix_(rowmax)}, + {"rowmax_idx", nerv_matrix_(rowmax_idx)}, + {"trans", nerv_matrix_(trans)}, + {"decompress", nerv_matrix_(decompress)}, + /* in-place calc */ + {"copy_fromh", nerv_matrix_(copy_fromh)}, + {"copy_fromd", nerv_matrix_(copy_fromd)}, + {"copy_toh", nerv_matrix_(copy_toh)}, + {"add", nerv_matrix_(add)}, + {"mul", nerv_matrix_(mul)}, + {"add_row", nerv_matrix_(add_row)}, + {"fill", nerv_matrix_(fill)}, + {"sigmoid", nerv_matrix_(sigmoid)}, + {"sigmoid_grad", nerv_matrix_(sigmoid_grad)}, + {"softmax", nerv_matrix_(softmax)}, + {"mul_elem", nerv_matrix_(mul_elem)}, + {"log_elem", nerv_matrix_(log_elem)}, + {"copy_rows_fromh_by_idx", nerv_matrix_(copy_rows_fromh_by_idx)}, + {"expand_frm", nerv_matrix_(expand_frm)}, + {"rearrange_frm", nerv_matrix_(rearrange_frm)}, + {"scale_rows_by_row", nerv_matrix_(scale_rows_by_row)}, + {"scale_rows_by_col", nerv_matrix_(scale_rows_by_col)}, + {NULL, NULL} +}; + +static void cuda_matrix_(init)(lua_State *L) { + luaN_append_methods(L, nerv_matrix_(extra_methods)); +} + +static void cuda_matrix_(free)(lua_State *L, MATRIX_ELEM *ptr) { + CUDA_SAFE_SYNC_CALL(cudaFree(ptr)); +} + +static void cuda_matrix_(alloc)(lua_State *L, MATRIX_ELEM **dptr, + size_t *stride, long width, long height) { + PROFILE_START + CUDA_SAFE_SYNC_CALL(cudaMallocPitch((void **)dptr, stride, width, height)); + PROFILE_STOP +} + +static MATRIX_ELEM cuda_matrix_(read)(lua_State *L, MATRIX_ELEM *data, + int idx) { + MATRIX_ELEM res; + CUDA_SAFE_SYNC_CALL(cudaMemcpy(&res, data + idx, + sizeof(MATRIX_ELEM), cudaMemcpyDeviceToHost)); + return res; +} + +static void cuda_matrix_(write)(lua_State *L, MATRIX_ELEM *data, + int idx, MATRIX_ELEM val) { + CUDA_SAFE_SYNC_CALL(cudaMemcpy(data + idx, &val, + sizeof(MATRIX_ELEM), cudaMemcpyHostToDevice)); +} + +int nerv_matrix_(get_elem)(lua_State *L) { + return nerv_error_method_not_implemented(L); +} + +int nerv_matrix_(set_elem)(lua_State *L) { + return nerv_error_method_not_implemented(L); +} + +#include "matrix.c" +#endif diff --git a/nerv/matrix/generic/elem_type.h b/nerv/matrix/generic/elem_type.h new file mode 100644 index 0000000..bffe940 --- /dev/null +++ b/nerv/matrix/generic/elem_type.h @@ -0,0 +1,22 @@ +#ifdef MATRIX_USE_FLOAT + +#define MATRIX_ELEM float +#define MATRIX_ELEM_FMT "%f" +#define MATRIX_ELEM_WRITE_FMT "%.8f" +#define MATRIX_ELEM_PTR(self) ((self)->data.f) + +#elif defined(MATRIX_USE_DOUBLE) + +#define MATRIX_ELEM double +#define MATRIX_ELEM_FMT "%lf" +#define MATRIX_ELEM_WRITE_FMT "%.8lf" +#define MATRIX_ELEM_PTR(self) ((self)->data.d) + +#elif defined(MATRIX_USE_INT) + +#define MATRIX_ELEM long +#define MATRIX_ELEM_FMT "%ld" +#define MATRIX_ELEM_WRITE_FMT "%ld" +#define MATRIX_ELEM_PTR(self) ((self)->data.i) + +#endif diff --git a/nerv/matrix/generic/matrix.c b/nerv/matrix/generic/matrix.c new file mode 100644 index 0000000..e17fb42 --- /dev/null +++ b/nerv/matrix/generic/matrix.c @@ -0,0 +1,155 @@ +#ifdef NERV_GENERIC_MATRIX +#include "../../common.h" +#include "matrix.h" + +extern const char *nerv_matrix_(tname); +extern const char *MATRIX_BASE_TNAME; + +void nerv_matrix_(data_free)(lua_State *L, Matrix *self) { + (void)L; + assert(*self->data_ref > 0); + if (--(*self->data_ref) == 0) + { + /* free matrix data */ + MATRIX_DATA_FREE(L, MATRIX_ELEM_PTR(self)); + free(self->data_ref); + free(self); + } +} + +void nerv_matrix_(data_retain)(Matrix *self) { + (*self->data_ref)++; +} + +Matrix *nerv_matrix_(new_)(lua_State *L, long nrow, long ncol) { + Matrix *self = (Matrix *)malloc(sizeof(Matrix)); + self->nrow = nrow; + self->ncol = ncol; + self->nmax = self->nrow * self->ncol; + MATRIX_DATA_ALLOC(L, &MATRIX_ELEM_PTR(self), &self->stride, + sizeof(MATRIX_ELEM) * self->ncol, self->nrow); + self->data_ref = (long *)malloc(sizeof(long)); + *self->data_ref = 0; + nerv_matrix_(data_retain)(self); + return self; +} + +int nerv_matrix_(new)(lua_State *L) { + luaT_pushudata(L, nerv_matrix_(new_)(L, luaL_checkinteger(L, 1), + luaL_checkinteger(L, 2)), + nerv_matrix_(tname)); + return 1; +} + +int nerv_matrix_(destroy)(lua_State *L) { + Matrix *self = luaT_checkudata(L, 1, nerv_matrix_(tname)); + nerv_matrix_(data_free)(L, self); + return 1; +} + +int nerv_matrix_(get_elem)(lua_State *L); +int nerv_matrix_(set_elem)(lua_State *L); + +static Matrix *nerv_matrix_(getrow)(Matrix *self, int row) { + Matrix *prow = (Matrix *)malloc(sizeof(Matrix)); + prow->ncol = self->ncol; + prow->nrow = 1; + prow->stride = self->stride; + prow->nmax = prow->ncol; + MATRIX_ELEM_PTR(prow) = MATRIX_ROW_PTR(self, row); + prow->data_ref = self->data_ref; + nerv_matrix_(data_retain)(prow); + return prow; +} + +static int nerv_matrix_(newindex)(lua_State *L) { + Matrix *self = luaT_checkudata(L, 1, nerv_matrix_(tname)); + if (lua_isnumber(L, 2)) + { + int idx = luaL_checkinteger(L, 2); + if (self->nrow == 1) + { + if (idx < 0 || idx >= self->ncol) + nerv_error(L, "index must be within range [0, %d)", self->ncol); + MATRIX_DATA_WRITE(L, MATRIX_ELEM_PTR(self), idx, + luaL_checknumber(L, 3)); + } + else + nerv_error(L, "cannot assign to row vector"); + lua_pushboolean(L, 1); + return 1; + } + else + { + lua_pushboolean(L, 0); + return 1; + } +} + + +static int nerv_matrix_(index)(lua_State *L) { + Matrix *self = luaT_checkudata(L, 1, nerv_matrix_(tname)); + if (lua_isnumber(L, 2)) + { + int idx = luaL_checkinteger(L, 2); + if (self->nrow == 1) + { + if (idx < 0 || idx >= self->ncol) + nerv_error(L, "index must be within range [0, %d)", self->ncol); + lua_pushnumber(L, MATRIX_DATA_READ(L, MATRIX_ELEM_PTR(self), idx)); + } + else + { + if (idx < 0 || idx >= self->nrow) + nerv_error(L, "index must be within range [0, %d)", self->nrow); + luaT_pushudata(L, nerv_matrix_(getrow)(self, idx), nerv_matrix_(tname)); + } + lua_pushboolean(L, 1); + return 2; + } + else + { + lua_pushboolean(L, 0); + return 1; + } +} + +static int nerv_matrix_(ncol)(lua_State *L) { + Matrix *self = luaT_checkudata(L, 1, nerv_matrix_(tname)); + lua_pushinteger(L, self->ncol); + return 1; +} + +static int nerv_matrix_(nrow)(lua_State *L) { + Matrix *self = luaT_checkudata(L, 1, nerv_matrix_(tname)); + lua_pushinteger(L, self->nrow); + return 1; +} + +static int nerv_matrix_(get_dataref_value)(lua_State *L) { + Matrix *self = luaT_checkudata(L, 1, nerv_matrix_(tname)); + lua_pushinteger(L, *(self->data_ref)); + return 1; +} + +static const luaL_Reg nerv_matrix_(methods)[] = { + {"get_elem", nerv_matrix_(get_elem)}, + {"set_elem", nerv_matrix_(set_elem)}, + {"ncol", nerv_matrix_(ncol)}, + {"nrow", nerv_matrix_(nrow)}, + {"get_dataref_value", nerv_matrix_(get_dataref_value)}, + {"__index__", nerv_matrix_(index)}, + {"__newindex__", nerv_matrix_(newindex)}, + {NULL, NULL} +}; + +void nerv_matrix_(init)(lua_State *L) { + luaT_newmetatable(L, nerv_matrix_(tname), MATRIX_BASE_TNAME, + nerv_matrix_(new), nerv_matrix_(destroy), NULL); + luaL_register(L, NULL, nerv_matrix_(methods)); +#ifdef MATRIX_INIT + MATRIX_INIT(L); +#endif + lua_pop(L, 1); +} +#endif diff --git a/nerv/matrix/generic/matrix.h b/nerv/matrix/generic/matrix.h new file mode 100644 index 0000000..833724b --- /dev/null +++ b/nerv/matrix/generic/matrix.h @@ -0,0 +1,19 @@ +#ifndef NERV_GENERIC_MATRIX_H +#define NERV_GENERIC_MATRIX_H + +#include <stddef.h> +typedef struct Matrix { + size_t stride; /* size of a row */ + long ncol, nrow, nmax; /* dimension of the matrix */ + union { + float *f; + double *d; + long *i; + } data; /* pointer to actual storage */ + long *data_ref; +} Matrix; + +#define MATRIX_ROW_PTR(self, row) \ + (MATRIX_ELEM *)((char *)MATRIX_ELEM_PTR(self) + (row) * (self)->stride) + +#endif diff --git a/nerv/matrix/generic/mmatrix.c b/nerv/matrix/generic/mmatrix.c new file mode 100644 index 0000000..b0f0791 --- /dev/null +++ b/nerv/matrix/generic/mmatrix.c @@ -0,0 +1,122 @@ +#ifdef NERV_GENERIC_MMATRIX +#include "matrix.h" +#include "elem_type.h" +#define MATRIX_DATA_FREE(L, ptr) free(ptr) +#define MATRIX_DATA_ALLOC(L, dptr, stride, width, height) \ + host_matrix_(alloc)(L, dptr, stride, width, height) +#define MATRIX_DATA_WRITE(L, data, idx, val) (data[idx] = val) +#define MATRIX_DATA_READ(L, data, idx) (data[idx]) +#define MATRIX_INIT(L) host_matrix_(init)(L) +#define MATRIX_BASE_TNAME nerv_matrix_host_tname +#define NERV_GENERIC_MATRIX +#include "../../common.h" +#include "../../io/chunk_file.h" +#include "string.h" + +static void host_matrix_(alloc)(lua_State *L, + MATRIX_ELEM **dptr, size_t *stride, + long width, long height) { + if ((*dptr = (MATRIX_ELEM *)malloc(width * height)) == NULL) + nerv_error(L, "mmatrix insufficient memory"); + *stride = width; +} + +int nerv_matrix_(get_elem)(lua_State *L) { + Matrix *self = luaT_checkudata(L, 1, nerv_matrix_(tname)); + int idx = luaL_checkinteger(L, 2); + if (idx < 0 || idx >= self->nmax) + nerv_error(L, "index must be within range [0, %d)", self->nmax); + lua_pushnumber(L, MATRIX_ELEM_PTR(self)[idx]); + return 1; +} + +int nerv_matrix_(set_elem)(lua_State *L) { + Matrix *self = luaT_checkudata(L, 1, nerv_matrix_(tname)); + int idx = luaL_checkinteger(L, 2); + MATRIX_ELEM v = luaL_checknumber(L, 3); + if (idx < 0 || idx >= self->nmax) + nerv_error(L, "index must be within range [0, %d)", self->nmax); + MATRIX_ELEM_PTR(self)[idx] = v; + return 0; +} + +static const luaL_Reg nerv_matrix_(extra_methods)[]; +static void host_matrix_(init)(lua_State *L) { + luaN_append_methods(L, nerv_matrix_(extra_methods)); +#ifdef MMATRIX_INIT + MMATRIX_INIT(L); +#endif +} + +#include "matrix.c" + +int nerv_matrix_(load)(lua_State *L) { + ChunkData *chunk = luaT_checkudata(L, 1, nerv_chunk_data_tname); + Matrix *self; + int i, j; + long nrow, ncol; + FILE *fp = chunk->fp; + if (fscanf(fp, "%ld %ld", &nrow, &ncol) != 2) + return 0; + self = nerv_matrix_(new_)(L, nrow, ncol); + for (i = 0; i < nrow; i++) + { + MATRIX_ELEM *row = MATRIX_ROW_PTR(self, i); + for (j = 0; j < ncol; j++) + if (fscanf(fp, MATRIX_ELEM_FMT, row + j) != 1) + { + free(self); + return 0; + } + } + luaT_pushudata(L, self, nerv_matrix_(tname)); + return 1; +} + +int nerv_matrix_(save)(lua_State *L) { + ChunkFileHandle *chunk = luaT_checkudata(L, 2, + nerv_chunk_file_handle_tname); + Matrix *self = luaT_checkudata(L, 1, nerv_matrix_(tname)); + int i, j; + long nrow = self->nrow, ncol = self->ncol; + FILE *fp = chunk->fp; + if (fprintf(fp, "%ld %ld\n", nrow, ncol) < 0) + return 0; + for (i = 0; i < nrow; i++) + { + MATRIX_ELEM *row = MATRIX_ROW_PTR(self, i); + for (j = 0; j < ncol; j++) + if (fprintf(fp, MATRIX_ELEM_WRITE_FMT " ", row[j]) < 0) + return 0; + if (fprintf(fp, "\n") < 0) + return 0; + } + return 0; +} + +static int nerv_matrix_(copy_from)(lua_State *L) { + Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname)); + Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname)); + int nargs = lua_gettop(L); + int b_begin = nargs > 2 ? luaL_checkinteger(L, 3) : 0; + int b_end = nargs > 3 ? luaL_checkinteger(L, 4) : b->nrow; + int a_begin = nargs > 4 ? luaL_checkinteger(L, 5) : 0; + if (!(0 <= b_begin && b_begin < b_end && b_end <= b->nrow && + a_begin + b_end - b_begin <= a->nrow)) + nerv_error(L, "invalid copy interval"); + if (a->ncol != b->ncol) + nerv_error(L, "matrices should be of the same dimension"); + memmove(MATRIX_ROW_PTR(a, a_begin), + MATRIX_ROW_PTR(b, b_begin), + sizeof(MATRIX_ELEM) * b->ncol * (b_end - b_begin)); + return 0; +} + +static const luaL_Reg nerv_matrix_(extra_methods)[] = { + {"load", nerv_matrix_(load)}, + {"save", nerv_matrix_(save)}, + {"copy_from", nerv_matrix_(copy_from)}, + {NULL, NULL} +}; + +#endif diff --git a/nerv/matrix/init.c b/nerv/matrix/init.c new file mode 100644 index 0000000..c29d7e9 --- /dev/null +++ b/nerv/matrix/init.c @@ -0,0 +1,35 @@ +#include "../common.h" +#include "generic/matrix.h" + +const char *nerv_matrix_tname = "nerv.Matrix"; +const char *nerv_matrix_cuda_tname = "nerv.CuMatrix"; +const char *nerv_matrix_host_tname = "nerv.MMatrix"; + +void nerv_cumatrix_init(lua_State *L); +void nerv_mmatrix_init(lua_State *L); + +static const luaL_Reg matrix_methods[] = { + {"__tostring__", nerv_error_method_not_implemented }, + {"__add__", nerv_error_method_not_implemented }, + {"__sub__", nerv_error_method_not_implemented }, + {"__mul__", nerv_error_method_not_implemented }, + {NULL, NULL} +}; + +void nerv_matrix_init(lua_State *L) { + /* abstract base class: Matrix */ + luaT_newmetatable(L, nerv_matrix_tname, NULL, NULL, NULL, NULL); + luaL_register(L, NULL, matrix_methods); + lua_pop(L, 1); + + /* CuMatrix inherits from Matrix */ + luaT_newmetatable(L, nerv_matrix_cuda_tname, nerv_matrix_tname, + NULL, NULL, NULL); + nerv_cumatrix_init(L); + lua_pop(L, 1); + /* MMatrix inherits from Matrix */ + luaT_newmetatable(L, nerv_matrix_host_tname, nerv_matrix_tname, + NULL, NULL, NULL); + nerv_mmatrix_init(L); + lua_pop(L, 1); +} diff --git a/nerv/matrix/init.lua b/nerv/matrix/init.lua new file mode 100644 index 0000000..1a8925f --- /dev/null +++ b/nerv/matrix/init.lua @@ -0,0 +1,77 @@ +function nerv.Matrix:__tostring__() + local ncol = self:ncol() + local nrow = self:nrow() + local strt = {} + local fmt + if self.fmt then + fmt = self.fmt + else + fmt = "%.8f " + end + if nrow == 1 then + for col = 0, ncol - 1 do + table.insert(strt, string.format(fmt, self[col])) + end + table.insert(strt, "\n") + else + for row = 0, nrow - 1 do + local rp = self[row] + for col = 0, ncol - 1 do + table.insert(strt, string.format(fmt, rp[col])) + end + table.insert(strt, "\n") + end + end + table.insert(strt, string.format( + "[%s %d x %d]", self.__typename, nrow, ncol)) + return table.concat(strt) +end + +-- gen: a function takes take indices of the matrix and return the generated +-- all entrys in the matrix will be assigned by calling gen(i, j) +function nerv.Matrix:generate(gen) + if (self:nrow() == 1) then + for j = 0, self:ncol() - 1 do + self[j] = gen(j) + end + else + for i = 0, self:nrow() - 1 do + local row = self[i] + for j = 0, self:ncol() - 1 do + row[j] = gen(i, j) + end + end + end +end + +nerv.MMatrixInt.fmt = "%d " + +function nerv.CuMatrix:__add__(b) + c = self:create() + c:add(self, b, 1.0, 1.0) + return c +end + +function nerv.CuMatrix:__sub__(b) + c = self:create() + c:add(self, b, 1.0, -1.0) + return c +end + +function nerv.CuMatrix:__mul__(b) + c = nerv.get_type(self.__typename)(self:nrow(), b:ncol()) + c:mul(self, b, 1.0, 0.0, 'N', 'N') + return c +end + +function nerv.CuMatrixFloat.new_from_host(mat) + local res = nerv.CuMatrixFloat(mat:nrow(), mat:ncol()) + res:copy_fromh(mat) + return res +end + +function nerv.CuMatrixFloat:new_to_host() + local res = nerv.MMatrixFloat(self:nrow(), self:ncol()) + self:copy_toh(res) + return res +end diff --git a/nerv/matrix/mmatrix.c b/nerv/matrix/mmatrix.c new file mode 100644 index 0000000..d1d68b9 --- /dev/null +++ b/nerv/matrix/mmatrix.c @@ -0,0 +1,77 @@ +#define NERV_GENERIC_MMATRIX +#include <stdlib.h> +#include "../common.h" +void nerv_matrix_host_float_init(lua_State *L); +void nerv_matrix_host_double_init(lua_State *L); +void nerv_matrix_host_int_init(lua_State *L); + +void nerv_mmatrix_init(lua_State *L) { + srand(1); + nerv_matrix_host_float_init(L); + nerv_matrix_host_double_init(L); + nerv_matrix_host_int_init(L); +} + +#define MATRIX_USE_FLOAT +#define host_matrix_(NAME) host_matrix_float_##NAME +#define nerv_matrix_(NAME) nerv_matrix_host_float_##NAME +const char *nerv_matrix_(tname) = "nerv.MMatrixFloat"; +#include "generic/mmatrix.c" +#undef nerv_matrix_ +#undef host_matrix_ +#undef MATRIX_USE_FLOAT +#undef MATRIX_ELEM +#undef MATRIX_ELEM_PTR +#undef MATRIX_ELEM_FMT +#undef MATRIX_ELEM_WRITE_FMT + +#define NERV_GENERIC_MMATRIX +#define MATRIX_USE_DOUBLE +#define host_matrix_(NAME) host_matrix_double_##NAME +#define nerv_matrix_(NAME) nerv_matrix_host_double_##NAME +const char *nerv_matrix_(tname) = "nerv.MMatrixDouble"; +#include "generic/mmatrix.c" +#undef nerv_matrix_ +#undef host_matrix_ +#undef MATRIX_USE_DOUBLE +#undef MATRIX_ELEM +#undef MATRIX_ELEM_PTR +#undef MATRIX_ELEM_FMT +#undef MATRIX_ELEM_WRITE_FMT + +#define NERV_GENERIC_MMATRIX +#define MATRIX_USE_INT +#define host_matrix_(NAME) host_matrix_int_##NAME +#define nerv_matrix_(NAME) nerv_matrix_host_int_##NAME +const char *nerv_matrix_(tname) = "nerv.MMatrixInt"; +#define MMATRIX_INIT(L) host_matrix_(init_extra)(L) + +static const luaL_Reg nerv_matrix_(extra_methods_int)[]; +static void host_matrix_(init_extra)(lua_State *L) { + luaN_append_methods(L, nerv_matrix_(extra_methods_int)); +} + +#include "generic/mmatrix.c" + +static int nerv_matrix_(perm_gen)(lua_State *L) { + int i, ncol = luaL_checkinteger(L, 1); + Matrix *self = nerv_matrix_(new_)(L, 1, ncol); + long *prow = self->data.i; + for (i = 0; i < ncol; i++) + prow[i] = i; + for (i = ncol - 1; i >= 0; i--) + { + size_t j = rand() % (i + 1); + long tmp = prow[i]; + prow[i] = prow[j]; + prow[j] = tmp; + } + luaT_pushudata(L, self, nerv_matrix_(tname)); + return 1; +} + +static const luaL_Reg nerv_matrix_(extra_methods_int)[] = { + {"perm_gen", nerv_matrix_(perm_gen)}, + {NULL, NULL} +}; + diff --git a/nerv/nerv b/nerv/nerv new file mode 100644 index 0000000..7571659 --- /dev/null +++ b/nerv/nerv @@ -0,0 +1,13 @@ +#! /usr/bin/env luajit +require 'nerv' +print("Greetings") +if #arg < 1 then + return +end +local script = arg[1] +local script_arg = {} +for i = 2, #arg do + table.insert(script_arg, arg[i]) +end +arg = script_arg +dofile(script) diff --git a/nerv/nerv-scm-1.rockspec b/nerv/nerv-scm-1.rockspec new file mode 100644 index 0000000..d14140a --- /dev/null +++ b/nerv/nerv-scm-1.rockspec @@ -0,0 +1,38 @@ +package = "nerv" +version = "scm-1" +source = { + url = "..." -- We don't have one yet +} +description = { + summary = "An example for the LuaRocks tutorial.", + detailed = [[ + ]], + homepage = "https://github.com/Determinant/nerv", -- We don't have one yet + license = "BSD" -- or whatever you like +} +dependencies = { + "lua >= 5.1" + -- If you depend on other rocks, add them here +} +build = { + -- We'll start here. + type = "make", + build_variables = { + CFLAGS="$(CFLAGS)", + LIBFLAG="$(LIBFLAG)", + LUA_LIBDIR="$(LUA_LIBDIR)", + LUA_BINDIR="$(LUA_BINDIR)", + LUA_INCDIR="$(LUA_INCDIR)", + LUA="$(LUA)", + }, + install_variables = { + INST_PREFIX="$(PREFIX)", + INST_BINDIR="$(BINDIR)", + INST_LIBDIR="$(LIBDIR)", + INST_LUADIR="$(LUADIR)", + INST_CONFDIR="$(CONFDIR)", + }, + install = { + bin = {"nerv"} + } +} diff --git a/nerv/nerv.c b/nerv/nerv.c new file mode 100644 index 0000000..a59eadc --- /dev/null +++ b/nerv/nerv.c @@ -0,0 +1,38 @@ +#include "common.h" + +extern void nerv_example_init(lua_State *L); +extern void nerv_matrix_init(lua_State *L); +extern void nerv_io_init(lua_State *L); + +static const luaL_Reg nerv_utils_methods[] = { + {"setmetatable", luaT_lua_setmetatable}, + {"getmetatable", luaT_lua_getmetatable}, + {"newmetatable", luaT_lua_newmetatable}, + {"typename", luaT_lua_typename}, + {NULL, NULL} +}; + +void nerv_utils_init(lua_State *L) { + luaL_register(L, NULL, nerv_utils_methods); +} + +int luaopen_libnerv(lua_State *L) { + lua_newtable(L); + /* duplicate table */ + lua_pushvalue(L, -1); + /* set table to global index */ + lua_setfield(L, LUA_GLOBALSINDEX, "nerv"); + /* A table reference still remains. + * + * The following initialization functions should obey to the rule that they + * maintain the stack properly to guarantee the stack stays the same before + * and after invoking the call (i.e. stay balanced). + * + * Also note that they can make use of the value at top of the stack which + * references to the `nerv` global table. */ + nerv_utils_init(L); + nerv_example_init(L); + nerv_matrix_init(L); + nerv_io_init(L); + return 1; +} diff --git a/nerv/nn/init.lua b/nerv/nn/init.lua new file mode 100644 index 0000000..cbaf52b --- /dev/null +++ b/nerv/nn/init.lua @@ -0,0 +1,3 @@ +nerv.include('layer_repo.lua') +nerv.include('param_repo.lua') +nerv.include('layer_dag.lua') diff --git a/nerv/nn/layer_dag.lua b/nerv/nn/layer_dag.lua new file mode 100644 index 0000000..8e30216 --- /dev/null +++ b/nerv/nn/layer_dag.lua @@ -0,0 +1,249 @@ +local DAGLayer = nerv.class("nerv.DAGLayer", "nerv.Layer") + +local function parse_id(str) + local id, port, _ + _, _, id, port = string.find(str, "([a-zA-Z0-9_]+)%[([0-9]+)%]") + if id == nil or port == nil then + _, _, id, port = string.find(str, "(.+)%[([0-9]+)%]") + if not (id == "<input>" or id == "<output>") then + nerv.error("wrong format of connection id") + end + end + port = tonumber(port) + return id, port +end + +local function discover(id, layers, layer_repo) + local ref = layers[id] + if id == "<input>" or id == "<output>" then + return nil + end + if ref == nil then + local layer = layer_repo:get_layer(id) + local dim_in, dim_out = layer:get_dim() + ref = { + layer = layer, + inputs = {}, + outputs = {}, + err_inputs = {}, + err_outputs = {}, + next_layers = {}, + input_len = #dim_in, + output_len = #dim_out, + in_deg = 0, + visited = false + } + layers[id] = ref + end + return ref +end + +function DAGLayer:__init(id, global_conf, layer_conf) + local layers = {} + local inputs = {} + local outputs = {} + local dim_in = layer_conf.dim_in + local dim_out = layer_conf.dim_out + local parsed_conn = {} + for from, to in pairs(layer_conf.connections) do + local id_from, port_from = parse_id(from) + local id_to, port_to = parse_id(to) + local ref_from = discover(id_from, layers, layer_conf.sub_layers) + local ref_to = discover(id_to, layers, layer_conf.sub_layers) + local input_dim, output_dim, _ + if ref_from and ref_from.outputs[port_from] ~= nil then + nerv.error("%s has already been attached", from) + end + if ref_to and ref_to.inputs[port_to] ~= nil then + nerv.error("%s has already been attached", to) + end + if id_from == "<input>" then + input_dim, _ = ref_to.layer:get_dim() + if dim_in[port_from] ~= input_dim[port_to] then + nerv.error("mismatching data dimension between %s and %s", from, to) + end + inputs[port_from] = {ref_to, port_to} + ref_to.inputs[port_to] = inputs -- just a place holder + elseif id_to == "<output>" then + _, output_dim = ref_from.layer:get_dim() + if output_dim[port_from] ~= dim_out[port_to] then + nerv.error("mismatching data dimension between %s and %s", from, to) + end + outputs[port_to] = {ref_from, port_from} + ref_from.outputs[port_from] = outputs -- just a place holder + else + _, output_dim = ref_from.layer:get_dim() + input_dim, _ = ref_to.layer:get_dim() + if output_dim[port_from] ~= input_dim[port_to] then + nerv.error("mismatching data dimension between %s and %s", from, to) + end + + table.insert(parsed_conn, + {{ref_from, port_from}, {ref_to, port_to}}) + table.insert(ref_from.next_layers, ref_to) -- add edge + ref_to.in_deg = ref_to.in_deg + 1 -- increase the in-degree of the target layer + end + end + + -- topology sort + local queue = {} + local l = 1 + local r = 1 + for id, ref in pairs(layers) do + if ref.in_deg == 0 then + table.insert(queue, ref) + nerv.info("adding source layer: %s", id) + r = r + 1 + end + end + if l == r then + nerv.error("loop detected") + end + while l < r do + local cur = queue[l] + cur.visited = true + l = l + 1 + for _, nl in pairs(cur.next_layers) do + nl.in_deg = nl.in_deg - 1 + if nl.in_deg == 0 then + table.insert(queue, nl) + r = r + 1 + end + end + end + for i = 1, #queue do + nerv.info("enqueued layer: %s", queue[i].layer.id) + end + + for id, ref in pairs(layers) do + -- check wether the graph is connected + if ref.visited == false then + nerv.warning("layer %s is ignored", id) + end + end + + self.layers = layers + self.inputs = inputs + self.outputs = outputs + self.dim_in = dim_in + self.dim_out = dim_out + self.parsed_conn = parsed_conn + self.queue = queue + self.gconf = global_conf +end + +function DAGLayer:init(batch_size) + for i, conn in ipairs(self.parsed_conn) do + local _, output_dim + local ref_from, port_from, ref_to, port_to + ref_from, port_from = unpack(conn[1]) + ref_to, port_to = unpack(conn[2]) + _, output_dim = ref_from.layer:get_dim() + local mid = self.gconf.cumat_type(batch_size, + output_dim[port_from]) + local err_mid = mid:create() + + ref_from.outputs[port_from] = mid + ref_to.inputs[port_to] = mid + + ref_from.err_inputs[port_from] = err_mid + ref_to.err_outputs[port_to] = err_mid + end + for id, ref in pairs(self.layers) do + for i = 1, ref.input_len do + if ref.inputs[i] == nil then + nerv.error("dangling input port %d of layer %s", i, id) + end + end + for i = 1, ref.output_len do + if ref.outputs[i] == nil then + nerv.error("dangling output port %d of layer %s", i, id) + end + end + -- initialize sub layers + ref.layer:init(batch_size) + end + for i = 1, #self.dim_in do + if self.inputs[i] == nil then + nerv.error("dangling port %d of layer <input>", i) + end + end + for i = 1, #self.dim_out do + if self.outputs[i] == nil then + nerv.error("dangling port %d of layer <output>", i) + end + end +end + +function DAGLayer:set_inputs(input) + for i = 1, #self.dim_in do + local layer = self.inputs[i][1] + local port = self.inputs[i][2] + layer.inputs[port] = input[i] + end +end + +function DAGLayer:set_outputs(output) + for i = 1, #self.dim_out do + local layer = self.outputs[i][1] + local port = self.outputs[i][2] + layer.outputs[port] = output[i] + end +end + +function DAGLayer:set_err_inputs(bp_err) + for i = 1, #self.dim_out do + local layer = self.outputs[i][1] + local port = self.outputs[i][2] + layer.err_inputs[port] = bp_err[i] + end +end + +function DAGLayer:set_err_outputs(next_bp_err) + for i = 1, #self.dim_in do + local layer = self.inputs[i][1] + local port = self.inputs[i][2] + layer.err_outputs[port] = next_bp_err[i] + end +end + +function DAGLayer:update(bp_err, input, output) + self:set_err_inputs(bp_err) + self:set_inputs(input) + self:set_outputs(output) + -- print("update") + for id, ref in pairs(self.queue) do + -- print(ref.layer.id) + ref.layer:update(ref.err_inputs, ref.inputs, ref.outputs) + end +end + +function DAGLayer:propagate(input, output) + self:set_inputs(input) + self:set_outputs(output) + for i = 1, #self.queue do + local ref = self.queue[i] + -- print(ref.layer.id) + ref.layer:propagate(ref.inputs, ref.outputs) + end +end + +function DAGLayer:back_propagate(bp_err, next_bp_err, input, output) + self:set_err_outputs(next_bp_err) + self:set_err_inputs(bp_err) + self:set_inputs(input) + self:set_outputs(output) + for i = #self.queue, 1, -1 do + local ref = self.queue[i] + -- print(ref.layer.id) + ref.layer:back_propagate(ref.err_inputs, ref.err_outputs, ref.inputs, ref.outputs) + end +end + +function DAGLayer:get_params() + local param_repos = {} + for id, ref in pairs(self.queue) do + table.insert(param_repos, ref.layer:get_params()) + end + return nerv.ParamRepo.merge(param_repos) +end diff --git a/nerv/nn/layer_repo.lua b/nerv/nn/layer_repo.lua new file mode 100644 index 0000000..602c37c --- /dev/null +++ b/nerv/nn/layer_repo.lua @@ -0,0 +1,34 @@ +local LayerRepo = nerv.class("nerv.LayerRepo") + +function LayerRepo:__init(layer_spec, param_repo, global_conf) + local layers = {} + for ltype, llist in pairs(layer_spec) do + local layer_type = nerv.get_type(ltype) + for id, spec in pairs(llist) do + if layers[id] ~= nil then + nerv.error("a layer with id %s already exists", id) + end + nerv.info("create layer: %s", id) + if type(spec[2]) ~= "table" then + nerv.error("layer config table is need") + end + layer_config = spec[2] + if type(spec[1]) ~= "table" then + nerv.error("parameter description table is needed") + end + for pname, pid in pairs(spec[1]) do + layer_config[pname] = param_repo:get_param(pid) + end + layers[id] = layer_type(id, global_conf, layer_config) + end + end + self.layers = layers +end + +function LayerRepo:get_layer(lid) + local layer = self.layers[lid] + if layer == nil then + nerv.error("layer with id %s not found", lid) + end + return layer +end diff --git a/nerv/nn/param_repo.lua b/nerv/nn/param_repo.lua new file mode 100644 index 0000000..ab971ba --- /dev/null +++ b/nerv/nn/param_repo.lua @@ -0,0 +1,76 @@ +local ParamRepo = nerv.class("nerv.ParamRepo") +function ParamRepo:__init(plist) + self.params = {} + if plist ~= nil then + for i, p in ipairs(plist) do + self.params[p.id] = p + end + end +end + +function ParamRepo:add(pid, p) + if self.params[pid] ~= nil then + nerv.error("duplicate params with the same id: %s", pid) + end + self.params[pid] = p +end + +function ParamRepo:remove(pid, p) + if self.params[pid] == nil then + nerv.error("param %s does not exit", pid) + end + table.remove(self.params, pid) +end + +function ParamRepo.merge(repos) + local self = nerv.ParamRepo() + for i, repo in ipairs(repos) do + if not nerv.is_type(repo, "nerv.ParamRepo") then + nerv.error("nerv.ParamRepo objects expected, got %s", repo) + end + for pid, p in pairs(repo.params) do + self:add(pid, p) + end + end + return self +end + +function ParamRepo:import(param_files, pids, gconf) + if type(param_files) ~= "table" then + nerv.error("param file table is need") + end + for i = 1, #param_files do + local pf = nerv.ChunkFile(param_files[i], "r") + for cid, cspec in pairs(pf.metadata) do + if pids == nil or pids[cid] ~= nil then + local p = pf:read_chunk(cid, gconf) + if not nerv.is_type(p, "nerv.Param") then + nerv.error("param chunk is expected") + end + self:add(cid, p) + end + end + end +end + +function ParamRepo:export(param_file, pids) + cf = nerv.ChunkFile(param_file, "w") + if pids == nil then + for id, p in pairs(self.params) do + cf:write_chunk(p) + end + else + for i, pid in ipairs(pids) do + cf:write_chunk(self:get_param(pid)) + end + end + cf:close() +end + +function ParamRepo:get_param(pid) + local p = self.params[pid] + if p == nil then + nerv.error("param with id %s not found", pid) + end + return p +end |