diff options
59 files changed, 2008 insertions, 1196 deletions
diff --git a/.gitmodules b/.gitmodules index 9f556c5..2b346c4 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,6 +1,9 @@ [submodule "luajit-2.0"] path = luajit-2.0 - url = http://luajit.org/git/luajit-2.0.git + url = https://speechlab.sjtu.edu.cn/gitlab/nerv-dev/luajit.git [submodule "luarocks"] path = luarocks - url = https://github.com/keplerproject/luarocks.git + url = https://speechlab.sjtu.edu.cn/gitlab/nerv-dev/luarocks.git +[submodule "Penlight"] + path = Penlight + url = https://speechlab.sjtu.edu.cn/gitlab/nerv-dev/Penlight.git @@ -1,19 +1,42 @@ .PHONY: all clean install luajit luarocks speech +############## EDIT THESE LINES ##################### SHELL := /bin/bash PREFIX := $(CURDIR)/install/ -all: luajit luarocks install +#CUDA_BASE := /usr/local/cuda-7.0 +CUDA_BASE := /usr/local/cuda +BLAS_BASE := /usr/lib/ +BLAS_LDFLAGS := -L$(BLAS_BASE) -Wl,-rpath=$(BLAS_BASE) +BLAS_TYPE := atlas +KALDI_BASE := /speechlab/tools/KALDI/kaldi-master/ +####################################################### +MKL_LDFLAGS := -lmkl_rt +ATLAS_LDFLAGS := -lcblas -llapack_atlas +ifeq ($(BLAS_TYPE), mkl) +BLAS_LDFLAGS += $(MKL_LDFLAGS) +else ifeq ($(BLAS_TYPE), atlas) +BLAS_LDFLAGS += $(ATLAS_LDFLAGS) +else +$(error Invalid blas type) +endif +export CUDA_BASE +export KALDI_BASE +export BLAS_LDFLAGS + +.PHONY: nerv speech/speech_utils speech/htk_io speech/kaldi_io speech/kaldi_decode \ + nerv-clean speech/speech_utils-clean speech/htk_io-clean speech/kaldi_io-clean speech/kaldi_decode-clean \ + Penlight + +all: luajit luarocks Penlight nerv luajit: PREFIX=$(PREFIX) ./tools/build_luajit.sh luarocks: PREFIX=$(PREFIX) ./tools/build_luarocks.sh -install: - cd nerv; $(PREFIX)/bin/luarocks make CFLAGS=$(CFLAGS) -speech: - cd speech/speech_utils; $(PREFIX)/bin/luarocks make - cd speech/htk_io; $(PREFIX)/bin/luarocks make - cd speech/kaldi_io; $(PREFIX)/bin/luarocks make -clean: - cd nerv && make clean - cd speech/speech_utils && make clean - cd speech/htk_io && make clean - cd speech/kaldi_io && make clean + +speech: speech/speech_utils speech/htk_io speech/kaldi_io speech/kaldi_decode +speech-clean: speech/speech_utils-clean speech/htk_io-clean speech/kaldi_io-clean speech/kaldi_decode-clean +clean: nerv-clean speech-clean + +nerv Penlight speech/speech_utils speech/htk_io speech/kaldi_io speech/kaldi_decode: + cd $@; $(PREFIX)/bin/luarocks make +nerv-clean speech/speech_utils-clean speech/htk_io-clean speech/kaldi_io-clean speech/kaldi_decode-clean: + cd $(subst -clean,,$@); make clean LUA_BINDIR=$(PREFIX)/bin/ diff --git a/Penlight b/Penlight new file mode 160000 +Subproject 16d149338af9efc910528641c5240c5641aeb8d diff --git a/README.md b/README.md deleted file mode 100644 index fe9dfc1..0000000 --- a/README.md +++ /dev/null @@ -1,55 +0,0 @@ -#The Nerv Toolkit User Manual# -NOTE: This readme is obsolete and will be rearranged, for further information, please check http://nerv-sjtu.github.io/nerv/ - -This user manual will information about how to use __Nerv__ and __Nerv__'s interface. - -##How to make and start using## -First make sure you have __lua__ and __CUDA__ installed on your computer. -__Nerv__ is currently developed via github.You can download and make __Nerv__ by doing the following: -``` -cd ~ -git clone https://github.com/Nerv-SJTU/nerv.git -cd nerv -git submodule init && git submodule update -make -#To include some new CUDA feature(e.x. atomicCAS), use "make CFLAGS=-D__NERV_FUTURE_CUDA_7" - -#further, if you want the speech modules -git clone https://github.com/Nerv-SJTU/nerv-speech.git speech -make speech -``` -The `git submodule` command is for the __luajit__ repository inside __Nerv__. -Now, you can try to run some example scripts. -``` -./install/bin/nerv examples/cumatrix_example.lua -``` -To get an example of DNN(for ASR) training, run(this requires the speech modules) -You need to be at or (copy files from) `/slfs1`(SJTU speechlab cluster) to get this running. -``` -./install/bin/nerv nerv/examples/asr_trainer.lua nerv/examples/swb_baseline.lua -``` - -##How to contribute## -Fork the original repository, then use the __pull&merge__ function in github to contribute. -The pull&merge request can be found on your dashboard in github. See this [sync-help] to sync with the original repository. - -##Nerv Packages## -* __luaT__ -Nerv uses [luaT]\(a [Torch] library\) to define lua class in C. -* __[The Nerv OOP](nerv/doc/nerv_class.md)__ -Enables object-oriented programming in Nerv. -* __[The Nerv utility functions](nerv/doc/nerv.md)__ -Inlcudes some utility functions from luaT to implement __Nerv.Class__. -* __[The Nerv Matrix Package](nerv/doc/nerv_matrix.md)__ -The matrix package is a basic package in __Nerv__ that is used to store and manipulate matrices. -* __[The Nerv IO Package](nerv/doc/nerv_io.md)__ -The IO package is used to read and write parameters to file. -* __[The Nerv Parameter Package](nerv/doc/nerv_param.md)__ -The parameter package is used to store, read model parameters from file. -* __[The Nerv Layer Package](nerv/doc/nerv_layer.md)__ -The layer package is used to define propagation and backpropagation of different type of layers. -* __[The Nerv NN Package](nerv/doc/nerv_nn.md)__ -The nn package is for organizing a neural network, it contains __nerv.LayerRepo__, __nerv.ParamRepo__, and __nerv.DAGLayer__. -[luaT]:https://github.com/torch/torch7/tree/master/lib/luaT -[Torch]:https://github.com/torch -[sync-help]:https://help.github.com/articles/syncing-a-fork/ diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..c00743c --- /dev/null +++ b/README.rst @@ -0,0 +1,64 @@ +NERV Toolkit +============ + +NOTE: This readme is in-progress. + +Installation +------------ +First, make sure you have at least one implementation of BLAS and CUDA installed +on your computer. + +- Checkout NERV: + + :: + + bash + git clone https://speechlab.sjtu.edu.cn/gitlab/nerv-dev/nerv.git + +- Checkout submodules (luajit, luarocks, Penlight, etc.): + + :: + + cd nerv + git submodule init && git submodule update + +- Build NERV: you can specify either ``mkl`` or ``atlas`` to ``BLAS_TYPE``. + ``BLAS_BASE`` is the directory containing BLAS ``.so`` files. By default, + ``atlas`` is used for ``BLAS_TYPE``, ``/usr/lib/`` is used for ``BLAS_BASE``, + and ``/usr/local/cuda`` is used for ``CUDA_BASE``. + + :: + + # an example for compiling on SJTU Speechlab major cluster + make BLAS_TYPE=mkl BLAS_BASE=/home/intel/mkl/lib/intel64/ CUDA_BASE=/usr/local/cuda + +- To include some new features (e.g. ``atomicCAS`` in CUDA), add corresponding flags to + ``NERV_FEAT`` (e.g. ``NERV_FEAT=-D__NERV_FUTURE_CUDA_7``) while making: + + :: + + make NERV_FEAT=-D__NERV_FUTURE_CUDA_7 BLAS_TYPE=mkl BLAS_BASE=/home/intel/mkl/lib/intel64/ CUDA_BASE=/usr/local/cuda + +- For speech tasks, you need to install related lua rocks (Lua packages): + + :: + + # checkout speech repository to local directory nerv/speech (suppose you're + # still at the root directory of NERV repo) + git clone https://speechlab.sjtu.edu.cn/gitlab/nerv-dev/nerv-speech.git speech + # build and install HTK I/O support, Kaldi I/O support, Kaldi decoding support, etc. + make speech BLAS_TYPE=mkl BLAS_BASE=/home/intel/mkl/lib/intel64/ + +Example & Tutorial +------------------ +For speech tasks, please refer to ``tutorial/`` in ``nerv-speech`` repository. + +Contribution +------------ +The basic rule is simple: just fork the original repository, then create a pull +request (merge request) to the administrator of the project. If you want to fix +any bugs in existing code, don't hesitate to create a pull (merge) request to +the repository with clear and detailed analysis of the problem. If you want to +add additional task-specific functionalities (modules) for speech to NERV, +please create a luarocks-compliant package and also a pull (merge) request to +the ``nerv-speech`` repository instead of ``nerv``. diff --git a/lua/config.lua b/lua/config.lua deleted file mode 100644 index 1ec1198..0000000 --- a/lua/config.lua +++ /dev/null @@ -1,67 +0,0 @@ -function get_global_conf() - local global_conf = { - lrate = 0.15, - wcost = 1e-5, - momentum = 0, - clip = 5, - cumat_type = nerv.CuMatrixFloat, - mmat_type = nerv.MMatrixFloat, - vocab_size = 10000, - nn_act_default = 0, - hidden_size = 300, - layer_num = 1, - chunk_size = 15, - batch_size = 20, - max_iter = 3, - param_random = function() return (math.random() / 5 - 0.1) end, - dropout = 0.5, - timer = nerv.Timer(), - pr = nerv.ParamRepo(), - } - return global_conf -end - -function get_layers(global_conf) - local pr = global_conf.pr - local layers = { - ['nerv.LSTMLayer'] = {}, - ['nerv.DropoutLayer'] = {}, - ['nerv.SelectLinearLayer'] = { - ['select'] = {dim_in = {1}, dim_out = {global_conf.hidden_size}, vocab = global_conf.vocab_size, pr = pr}, - }, - ['nerv.CombinerLayer'] = {}, - ['nerv.AffineLayer'] = { - output = {dim_in = {global_conf.hidden_size}, dim_out = {global_conf.vocab_size}, pr = pr} - }, - ['nerv.SoftmaxCELayer'] = { - softmax = {dim_in = {global_conf.vocab_size, global_conf.vocab_size}, dim_out = {1}, compressed = true}, - }, - } - for i = 1, global_conf.layer_num do - layers['nerv.LSTMLayer']['lstm' .. i] = {dim_in = {global_conf.hidden_size, global_conf.hidden_size, global_conf.hidden_size}, dim_out = {global_conf.hidden_size, global_conf.hidden_size}, pr = pr} - layers['nerv.DropoutLayer']['dropout' .. i] = {dim_in = {global_conf.hidden_size}, dim_out = {global_conf.hidden_size}} - layers['nerv.CombinerLayer']['dup' .. i] = {dim_in = {global_conf.hidden_size}, dim_out = {global_conf.hidden_size, global_conf.hidden_size}, lambda = {1}} - end - return layers -end - -function get_connections(global_conf) - local connections = { - {'<input>[1]', 'select[1]', 0}, - {'select[1]', 'lstm1[1]', 0}, - {'dropout' .. global_conf.layer_num .. '[1]', 'output[1]', 0}, - {'output[1]', 'softmax[1]', 0}, - {'<input>[2]', 'softmax[2]', 0}, - {'softmax[1]', '<output>[1]', 0}, - } - for i = 1, global_conf.layer_num do - table.insert(connections, {'lstm' .. i .. '[1]', 'dup' .. i .. '[1]', 0}) - table.insert(connections, {'lstm' .. i .. '[2]', 'lstm' .. i .. '[3]', 1}) - table.insert(connections, {'dup' .. i .. '[1]', 'lstm' .. i .. '[2]', 1}) - table.insert(connections, {'dup' .. i .. '[2]', 'dropout' .. i .. '[1]', 0}) - if i > 1 then - table.insert(connections, {'dropout' .. (i - 1) .. '[1]', 'lstm' .. i .. '[1]', 0}) - end - end - return connections -end diff --git a/lua/main.lua b/lua/main.lua deleted file mode 100644 index ce0270a..0000000 --- a/lua/main.lua +++ /dev/null @@ -1,45 +0,0 @@ -nerv.include('reader.lua') -nerv.include('timer.lua') -nerv.include('config.lua') -nerv.include(arg[1]) - -local global_conf = get_global_conf() -local timer = global_conf.timer - -timer:tic('IO') - -local data_path = 'nerv/nerv/examples/lmptb/PTBdata/' -local train_reader = nerv.Reader(data_path .. 'vocab', data_path .. 'ptb.valid.txt.adds') -local val_reader = nerv.Reader(data_path .. 'vocab', data_path .. 'ptb.valid.txt.adds') - -local train_data = train_reader:get_all_batch(global_conf) -local val_data = val_reader:get_all_batch(global_conf) - -local layers = get_layers(global_conf) -local connections = get_connections(global_conf) - -local NN = nerv.NN(global_conf, train_data, val_data, layers, connections) - -timer:toc('IO') -timer:check('IO') -io.flush() - -timer:tic('global') -local best_cv = 1e10 -for i = 1, global_conf.max_iter do - timer:tic('Epoch' .. i) - local train_ppl, val_ppl = NN:epoch() - if val_ppl < best_cv then - best_cv = val_ppl - else - global_conf.lrate = global_conf.lrate / 2.0 - end - nerv.printf('Epoch %d: %f %f %f\n', i, global_conf.lrate, train_ppl, val_ppl) - timer:toc('Epoch' .. i) - timer:check('Epoch' .. i) - io.flush() -end -timer:toc('global') -timer:check('global') -timer:check('network') -timer:check('gc') diff --git a/lua/network.lua b/lua/network.lua deleted file mode 100644 index 0c11321..0000000 --- a/lua/network.lua +++ /dev/null @@ -1,113 +0,0 @@ -nerv.include('select_linear.lua') - -local nn = nerv.class('nerv.NN') - -function nn:__init(global_conf, train_data, val_data, layers, connections) - self.gconf = global_conf - self.network = self:get_network(layers, connections) - self.train_data = self:get_data(train_data) - self.val_data = self:get_data(val_data) -end - -function nn:get_network(layers, connections) - self.gconf.dropout_rate = 0 - local layer_repo = nerv.LayerRepo(layers, self.gconf.pr, self.gconf) - local graph = nerv.GraphLayer('graph', self.gconf, - {dim_in = {1, self.gconf.vocab_size}, dim_out = {1}, - layer_repo = layer_repo, connections = connections}) - local network = nerv.Network('network', self.gconf, - {network = graph, clip = self.gconf.clip}) - network:init(self.gconf.batch_size, self.gconf.chunk_size) - return network -end - -function nn:get_data(data) - local err_output = {} - local softmax_output = {} - local output = {} - for i = 1, self.gconf.chunk_size do - err_output[i] = self.gconf.cumat_type(self.gconf.batch_size, 1) - softmax_output[i] = self.gconf.cumat_type(self.gconf.batch_size, self.gconf.vocab_size) - output[i] = self.gconf.cumat_type(self.gconf.batch_size, 1) - end - local ret = {} - for i = 1, #data do - ret[i] = {} - ret[i].input = {} - ret[i].output = {} - ret[i].err_input = {} - ret[i].err_output = {} - for t = 1, self.gconf.chunk_size do - ret[i].input[t] = {} - ret[i].output[t] = {} - ret[i].err_input[t] = {} - ret[i].err_output[t] = {} - ret[i].input[t][1] = data[i].input[t] - ret[i].input[t][2] = data[i].output[t] - ret[i].output[t][1] = output[t] - local err_input = self.gconf.mmat_type(self.gconf.batch_size, 1) - for j = 1, self.gconf.batch_size do - if t <= data[i].seq_len[j] then - err_input[j - 1][0] = 1 - else - err_input[j - 1][0] = 0 - end - end - ret[i].err_input[t][1] = self.gconf.cumat_type.new_from_host(err_input) - ret[i].err_output[t][1] = err_output[t] - ret[i].err_output[t][2] = softmax_output[t] - end - ret[i].seq_length = data[i].seq_len - ret[i].new_seq = {} - for j = 1, self.gconf.batch_size do - if data[i].seq_start[j] then - table.insert(ret[i].new_seq, j) - end - end - end - return ret -end - -function nn:process(data, do_train) - local timer = self.gconf.timer - local total_err = 0 - local total_frame = 0 - for id = 1, #data do - if do_train then - self.gconf.dropout_rate = self.gconf.dropout - data[id].do_train = true - else - self.gconf.dropout_rate = 0 - data[id].do_train = false - end - timer:tic('network') - self.network:mini_batch_init(data[id]) - self.network:propagate() - timer:toc('network') - for t = 1, self.gconf.chunk_size do - local tmp = data[id].output[t][1]:new_to_host() - for i = 1, self.gconf.batch_size do - if t <= data[id].seq_length[i] then - total_err = total_err + math.log10(math.exp(tmp[i - 1][0])) - total_frame = total_frame + 1 - end - end - end - if do_train then - timer:tic('network') - self.network:back_propagate() - self.network:update() - timer:toc('network') - end - timer:tic('gc') - collectgarbage('collect') - timer:toc('gc') - end - return math.pow(10, - total_err / total_frame) -end - -function nn:epoch() - local train_error = self:process(self.train_data, true) - local val_error = self:process(self.val_data, false) - return train_error, val_error -end diff --git a/lua/reader.lua b/lua/reader.lua deleted file mode 100644 index 0c7bcb6..0000000 --- a/lua/reader.lua +++ /dev/null @@ -1,113 +0,0 @@ -local Reader = nerv.class('nerv.Reader') - -function Reader:__init(vocab_file, input_file) - self:get_vocab(vocab_file) - self:get_seq(input_file) -end - -function Reader:get_vocab(vocab_file) - local f = io.open(vocab_file, 'r') - local id = 0 - self.vocab = {} - while true do - local word = f:read() - if word == nil then - break - end - self.vocab[word] = id - id = id + 1 - end - self.size = id -end - -function Reader:split(s, t) - local ret = {} - for x in (s .. t):gmatch('(.-)' .. t) do - table.insert(ret, x) - end - return ret -end - -function Reader:get_seq(input_file) - local f = io.open(input_file, 'r') - self.seq = {} - while true do - local seq = f:read() - if seq == nil then - break - end - seq = self:split(seq, ' ') - local tmp = {} - for i = 1, #seq do - if seq[i] ~= '' then - table.insert(tmp, self.vocab[seq[i]]) - end - end - table.insert(self.seq, tmp) - end -end - -function Reader:get_in_out(id, pos) - return self.seq[id][pos], self.seq[id][pos + 1], pos + 1 == #self.seq[id] -end - -function Reader:get_all_batch(global_conf) - local data = {} - local pos = {} - local offset = 1 - for i = 1, global_conf.batch_size do - pos[i] = nil - end - --while true do - for i = 1, 100 do - local input = {} - local output = {} - for i = 1, global_conf.chunk_size do - input[i] = global_conf.mmat_type(global_conf.batch_size, 1) - input[i]:fill(global_conf.nn_act_default) - output[i] = global_conf.mmat_type(global_conf.batch_size, 1) - output[i]:fill(global_conf.nn_act_default) - end - local seq_start = {} - local seq_end = {} - local seq_len = {} - for i = 1, global_conf.batch_size do - seq_start[i] = false - seq_end[i] = false - seq_len[i] = 0 - end - local has_new = false - for i = 1, global_conf.batch_size do - if pos[i] == nil then - if offset < #self.seq then - seq_start[i] = true - pos[i] = {offset, 1} - offset = offset + 1 - end - end - if pos[i] ~= nil then - has_new = true - for j = 1, global_conf.chunk_size do - local final - input[j][i-1][0], output[j][i-1][0], final = self:get_in_out(pos[i][1], pos[i][2]) - seq_len[i] = j - if final then - seq_end[i] = true - pos[i] = nil - break - end - pos[i][2] = pos[i][2] + 1 - end - end - end - if not has_new then - break - end - for i = 1, global_conf.chunk_size do - input[i] = global_conf.cumat_type.new_from_host(input[i]) - output[i] = global_conf.cumat_type.new_from_host(output[i]) - end - table.insert(data, {input = input, output = output, seq_start = seq_start, seq_end = seq_end, seq_len = seq_len}) - end - return data -end diff --git a/lua/select_linear.lua b/lua/select_linear.lua deleted file mode 100644 index a7e20cc..0000000 --- a/lua/select_linear.lua +++ /dev/null @@ -1,62 +0,0 @@ -local SL = nerv.class('nerv.SelectLinearLayer', 'nerv.Layer') - ---id: string ---global_conf: table ---layer_conf: table ---Get Parameters -function SL:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - - self.vocab = layer_conf.vocab - self.ltp = self:find_param("ltp", layer_conf, global_conf, nerv.LinearTransParam, {self.vocab, self.dim_out[1]}) --layer_conf.ltp - - self:check_dim_len(1, 1) -end - ---Check parameter -function SL:init(batch_size) - if (self.dim_in[1] ~= 1) then --one word id - nerv.error("mismatching dimensions of ltp and input") - end - if (self.dim_out[1] ~= self.ltp.trans:ncol()) then - nerv.error("mismatching dimensions of bp and output") - end - - self.batch_size = bath_size - self.ltp:train_init() -end - -function SL:update(bp_err, input, output) - --use this to produce reproducable result, don't forget to set the dropout to zero! - --for i = 1, input[1]:nrow(), 1 do - -- local word_vec = self.ltp.trans[input[1][i - 1][0]] - -- word_vec:add(word_vec, bp_err[1][i - 1], 1, - self.gconf.lrate / self.gconf.batch_size) - --end - - --I tried the update_select_rows kernel which uses atomicAdd, but it generates unreproducable result - self.ltp.trans:update_select_rows_by_colidx(bp_err[1], input[1], - self.gconf.lrate / self.gconf.batch_size, 0) - self.ltp.trans:add(self.ltp.trans, self.ltp.trans, 1.0, - self.gconf.lrate * self.gconf.wcost) -end - -function SL:propagate(input, output) - --for i = 0, input[1]:ncol() - 1, 1 do - -- if (input[1][0][i] > 0) then - -- output[1][i]:copy_fromd(self.ltp.trans[input[1][0][i]]) - -- else - -- output[1][i]:fill(0) - -- end - --end - output[1]:copy_rows_fromd_by_colidx(self.ltp.trans, input[1]) -end - -function SL:back_propagate(bp_err, next_bp_err, input, output) - --input is compressed, do nothing -end - -function SL:get_params() - local paramRepo = nerv.ParamRepo({self.ltp}) - return paramRepo -end diff --git a/lua/timer.lua b/lua/timer.lua deleted file mode 100644 index 2c54ca8..0000000 --- a/lua/timer.lua +++ /dev/null @@ -1,33 +0,0 @@ -local Timer = nerv.class("nerv.Timer") - -function Timer:__init() - self.last = {} - self.rec = {} -end - -function Timer:tic(item) - self.last[item] = os.clock() -end - -function Timer:toc(item) - if (self.last[item] == nil) then - nerv.error("item not there") - end - if (self.rec[item] == nil) then - self.rec[item] = 0 - end - self.rec[item] = self.rec[item] + os.clock() - self.last[item] -end - -function Timer:check(item) - if self.rec[item]==nil then - nerv.error('item not there') - end - nerv.printf('"%s" lasts for %f secs.\n',item,self.rec[item]) -end - -function Timer:flush() - for key, value in pairs(self.rec) do - self.rec[key] = nil - end -end diff --git a/lua/tnn.lua b/lua/tnn.lua deleted file mode 100644 index bf9f118..0000000 --- a/lua/tnn.lua +++ /dev/null @@ -1,136 +0,0 @@ -nerv.include('select_linear.lua') - -local reader = nerv.class('nerv.TNNReader') - -function reader:__init(global_conf, data) - self.gconf = global_conf - self.offset = 0 - self.data = data -end - -function reader:get_batch(feeds) - self.offset = self.offset + 1 - if self.offset > #self.data then - return false - end - for i = 1, self.gconf.chunk_size do - feeds.inputs_m[i][1]:copy_from(self.data[self.offset].input[i]) - feeds.inputs_m[i][2]:copy_from(self.data[self.offset].output[i]:decompress(self.gconf.vocab_size)) - end - feeds.flags_now = self.data[self.offset].flags - feeds.flagsPack_now = self.data[self.offset].flagsPack - return true -end - -function reader:has_data(t, i) - return t <= self.data[self.offset].seq_len[i] -end - -function reader:get_err_input() - return self.data[self.offset].err_input -end - -local nn = nerv.class('nerv.NN') - -function nn:__init(global_conf, train_data, val_data, layers, connections) - self.gconf = global_conf - self.tnn = self:get_tnn(layers, connections) - self.train_data = self:get_data(train_data) - self.val_data = self:get_data(val_data) -end - -function nn:get_tnn(layers, connections) - self.gconf.dropout_rate = 0 - local layer_repo = nerv.LayerRepo(layers, self.gconf.pr, self.gconf) - local tnn = nerv.TNN('TNN', self.gconf, {dim_in = {1, self.gconf.vocab_size}, - dim_out = {1}, sub_layers = layer_repo, connections = connections, - clip = self.gconf.clip}) - tnn:init(self.gconf.batch_size, self.gconf.chunk_size) - return tnn -end - -function nn:get_data(data) - local ret = {} - for i = 1, #data do - ret[i] = {} - ret[i].input = data[i].input - ret[i].output = data[i].output - ret[i].flags = {} - ret[i].err_input = {} - for t = 1, self.gconf.chunk_size do - ret[i].flags[t] = {} - local err_input = self.gconf.mmat_type(self.gconf.batch_size, 1) - for j = 1, self.gconf.batch_size do - if t <= data[i].seq_len[j] then - ret[i].flags[t][j] = nerv.TNN.FC.SEQ_NORM - err_input[j - 1][0] = 1 - else - ret[i].flags[t][j] = 0 - err_input[j - 1][0] = 0 - end - end - ret[i].err_input[t] = self.gconf.cumat_type.new_from_host(err_input) - end - for j = 1, self.gconf.batch_size do - if data[i].seq_start[j] then - ret[i].flags[1][j] = bit.bor(ret[i].flags[1][j], nerv.TNN.FC.SEQ_START) - end - if data[i].seq_end[j] then - local t = data[i].seq_len[j] - ret[i].flags[t][j] = bit.bor(ret[i].flags[t][j], nerv.TNN.FC.SEQ_END) - end - end - ret[i].flagsPack = {} - for t = 1, self.gconf.chunk_size do - ret[i].flagsPack[t] = 0 - for j = 1, self.gconf.batch_size do - ret[i].flagsPack[t] = bit.bor(ret[i].flagsPack[t], ret[i].flags[t][j]) - end - end - ret[i].seq_len = data[i].seq_len - end - return ret -end - -function nn:process(data, do_train) - local total_err = 0 - local total_frame = 0 - local reader = nerv.TNNReader(self.gconf, data) - while true do - local r, _ = self.tnn:getfeed_from_reader(reader) - if not r then - break - end - if do_train then - self.gconf.dropout_rate = self.gconf.dropout - else - self.gconf.dropout_rate = 0 - end - self.tnn:net_propagate() - for t = 1, self.gconf.chunk_size do - local tmp = self.tnn.outputs_m[t][1]:new_to_host() - for i = 1, self.gconf.batch_size do - if reader:has_data(t, i) then - total_err = total_err + math.log10(math.exp(tmp[i - 1][0])) - total_frame = total_frame + 1 - end - end - end - if do_train then - local err_input = reader:get_err_input() - for i = 1, self.gconf.chunk_size do - self.tnn.err_inputs_m[i][1]:copy_from(err_input[i]) - end - self.tnn:net_backpropagate(false) - self.tnn:net_backpropagate(true) - end - collectgarbage('collect') - end - return math.pow(10, - total_err / total_frame) -end - -function nn:epoch() - local train_error = self:process(self.train_data, true) - local val_error = self:process(self.val_data, false) - return train_error, val_error -end diff --git a/nerv/Makefile b/nerv/Makefile index a9b4baf..421eda0 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -1,3 +1,11 @@ +ifndef LUA_BINDIR +$(error Please build the package via luarocks: `luarocks make`) +endif + +ifndef CUDA_BASE +$(error CUDA_BASE is not set) +endif + .PHONY: build install clean SHELL := /bin/bash @@ -34,18 +42,18 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/tanh.lua layer/softmax_ce.lua layer/softmax.lua \ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua \ layer/elem_mul.lua layer/lstm.lua layer/lstm_gate.lua layer/dropout.lua layer/gru.lua \ - layer/graph.lua layer/rnn.lua layer/duplicate.lua layer/identity.lua \ + layer/graph.lua layer/rnn.lua layer/duplicate.lua layer/identity.lua \ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua nn/network.lua \ io/sgd_buffer.lua \ tnn/init.lua tnn/sutil.lua tnn/tnn.lua INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK -#CUDA_BASE := /usr/local/cuda-7.0 -CUDA_BASE := /usr/local/cuda CUDA_INCLUDE := -I $(CUDA_BASE)/include/ INCLUDE += $(CUDA_INCLUDE) -LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcublas -lcurand +CUDA_LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcuda -lcublas -lcurand +override CFLAGS += $(NERV_FEAT) + NVCC := $(CUDA_BASE)/bin/nvcc EMPTY := SPACE := $(EMPTY) $(EMPTY) @@ -66,11 +74,11 @@ $(LUA_DIR)/%.lua: %.lua cp $< $@ $(LIB_PATH)/libnervcore.so: $(CORE_OBJS) - gcc -shared -o $@ $^ $(LDFLAGS) -lcblas + gcc -shared -o $@ $^ $(LDFLAGS) $(CUDA_LDFLAGS) $(BLAS_LDFLAGS) $(LIB_PATH)/libluaT.so: $(LUAT_OBJS) - gcc -shared -o $@ $^ $(LDFLAGS) + gcc -shared -o $@ $^ $(INST_LIBDIR)/libnerv.so: $(NERV_OBJS) $(LIB_PATH)/libnervcore.so $(LIB_PATH)/libluaT.so - gcc -shared -o $@ $(NERV_OBJS) $(LDFLAGS) -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -lluaT + gcc -shared -o $@ $(NERV_OBJS) -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -lluaT $(OBJ_DIR)/matrix/cumatrix.o: matrix/generic/cumatrix.c matrix/generic/matrix.c $(OBJ_DIR)/matrix/mmatrix.o: matrix/generic/mmatrix.c matrix/generic/matrix.c diff --git a/nerv/doc/nerv.md b/nerv/doc/nerv.md index 28411f5..125928d 100644 --- a/nerv/doc/nerv.md +++ b/nerv/doc/nerv.md @@ -1,6 +1,6 @@ -#The Nerv utility functions# +# The Nerv utility functions Part of the [Nerv](../README.md) toolkit. -##Methods## +## Methods * __string = nerv.typename(obj a)__ A registered function, the original function is `luaT_lua_typename`. In some cases if you call `type(a)` for object of some class in __Nerv__(like __Nerv.CuMatrix__) it will only return "userdata"(because it is created in C), in this case you can use this method to get its type. @@ -14,4 +14,4 @@ A registered function, the original function is `luaT_newmetatable`, it returns * __string = nerv.setmetatable(table self, string tname)__ A registered function, the original function is `luaT_lua_setmetatable`. It assigns the metatable registered in __luaT__ by the name *tname* to the table *self*. And return *tname* to user. * __table = nerv.get_type(string typename)__ -Returns the type(`loadstring("return " .. typename)`).
\ No newline at end of file +Returns the type(`loadstring("return " .. typename)`). diff --git a/nerv/doc/nerv_class.md b/nerv/doc/nerv_class.md index 99f63e7..8314b12 100644 --- a/nerv/doc/nerv_class.md +++ b/nerv/doc/nerv_class.md @@ -1,10 +1,10 @@ -#The Nerv OOP# +# The Nerv OOP Part of the [Nerv](../README.md) toolkit. -##Methods## +## Methods * __metatable mt, metatable mpt = nerv.class(string tname, string parenttname)__ This method is used to create a class by the name `tname`, which inherits `parenttname` in __Nerv__, then you create a new instance of this class by calling `obj=tname(...)`. The `tname.__init(...)` method(if defined) will be called in the constructing. The metatable of the class and its parent class will be returned. -##Examples## +## Examples * This example implements a simple `nerv.Counter` class which is inherited by `nerv.BetterCounter`. ``` @@ -33,4 +33,4 @@ c1 = nerv.Counter(1) print(c1.c) bc1 = nerv.BetterCounter(1, 1) print(bc1.c, bc1.bc) -```
\ No newline at end of file +``` diff --git a/nerv/doc/nerv_io.md b/nerv/doc/nerv_io.md index 07589df..299362f 100644 --- a/nerv/doc/nerv_io.md +++ b/nerv/doc/nerv_io.md @@ -1,7 +1,7 @@ -#The Nerv IO Package# +# The Nerv IO Package Part of the [Nerv](../README.md) toolkit. -##Description## +## Description The main class that the user uses to store and read parameter object to and from files is __nerv.ChunkFile__. In the file, a parameter object will be saved using a standard format. First is the length(in byte) of this object, then a table which includes some meta information of the object, and a data area. Below is an example text file. ``` @@ -23,7 +23,7 @@ In the file, a parameter object will be saved using a standard format. First is 3.000000 3.000000 3.000000 ``` -##Methods## +## Methods * __ChunkFile ChunkFile(string fn, string mode)__ `mode` can be `r` or `w`, for reading or writing a file. The returned __ChunkFile__ will be ready to write or read objects which follows the __nerv.Param__ interface(using `write_chunk` and `read_chunk`). * __void ChunkFile.write_chunk(ChunkFile self, Param p)__ @@ -33,7 +33,7 @@ Read the __Param__ object by id `id` from the file `self`. It will be constructe * __void ChunkFile.close(ChunkFile self)__ Close the opened file. -##Examples## +## Examples * An example showing how to use __ChunkFile__ to store and read parameter objects. ``` require 'io' @@ -96,7 +96,7 @@ do end ``` -##Developer Notes## +## Developer Notes * There are four classes in to deal with chunk data, which are __nerv.ChunkFile__, __nerv.ChunkFileHandle__, __nerv.ChunkInfo__, __nerv.ChunkData__. Below is the underlying C structs. ``` typedef struct ChunkFileHandle { @@ -110,4 +110,5 @@ typedef struct ChunkData { char *data; } ChunkData; ``` -* In __Nerv.io__, a returned(by `ChunkFile.__init`) __nerv.ChunkFile__ will have a member `handle`, which is a __nerv.ChunkFileHandle__.
\ No newline at end of file + +* In __Nerv.io__, a returned(by `ChunkFile.__init`) __nerv.ChunkFile__ will have a member `handle`, which is a __nerv.ChunkFileHandle__. diff --git a/nerv/doc/nerv_layer.md b/nerv/doc/nerv_layer.md index de2fb12..dd7c9bb 100644 --- a/nerv/doc/nerv_layer.md +++ b/nerv/doc/nerv_layer.md @@ -1,9 +1,9 @@ -#The Nerv Layer Package# +# The Nerv Layer Package Part of the [Nerv](../README.md) toolkit. -##Description## +## Description __nerv.Layer__ is the base class and most of its methods are abstract. -###Class hierarchy and their members### +### Class hierarchy and their members * __nerv.Layer__. * `table dim_in` It specifies the dimensions of the inputs. * `table dim_out` It specifies the dimensions of the outputs. @@ -20,7 +20,7 @@ __nerv.Layer__ is the base class and most of its methods are abstract. * `int total_frams` Records how many frames have passed. * `bool compressed` The reference distribution can be a one-hot format. This feature is enabled by `layer_conf.compressed`. -##Methods## +## Methods * __void Layer.\_\_init(Layer self, string id, table global_conf, table layer_conf)__ Abstract method. The constructing method should assign `id` to `self.id` and `global_conf` to `self.gconf`, `layer_conf.dim_in` to `self.dim_in`, `layer_conf.dim_out` to `self.dim_out`. `dim_in` and `dim_out` are a list specifies the dimensions of the inputs and outputs. Also, `layer_conf` will include the parameters, which should also be properly saved. @@ -43,7 +43,7 @@ Check whether `#self.dim_in == len_in` and `#self.dim_out == len_out`, if violat Abstract method. The layer should return a list containing its parameters. -####nerv.Layer.get\_dim(self)#### +#### nerv.Layer.get\_dim(self) * Returns: `dim_in`: __table__. `dim_out`: __table__. @@ -52,7 +52,7 @@ The layer should return a list containing its parameters. * Description: Returns `self.dim_in, self.dim_out`. -##Examples## +## Examples * a basic example using __Nerv__ layers to a linear classification. ``` @@ -178,3 +178,4 @@ for l = 0, 10, 1 do end --[[end training]]-- ``` + diff --git a/nerv/doc/nerv_matrix.md b/nerv/doc/nerv_matrix.md index dfd843d..3782eb3 100644 --- a/nerv/doc/nerv_matrix.md +++ b/nerv/doc/nerv_matrix.md @@ -1,8 +1,8 @@ -#The Nerv Matrix Package# +# The Nerv Matrix Package Part of the [Nerv](../README.md) toolkit. -##Description## -###Underlying structure### +## Description +### Underlying structure In the begining is could be useful to know something about the underlying structure of a __Nerv__ matrix. Please keep in mind that matrice in __Nerv__ is row-major. Every matrix object is a encapsulation of a C struct that describes the attributes of this matrix. ``` @@ -20,12 +20,12 @@ typedef struct Matrix { It is worth mentioning that that `data_ref` is a counter which counts the number of references to its memory space, mind that it will also be increased when a row of the matrix is referenced(`col = m[2]`). A __Nerv__ matrix will deallocate its space when this counter is decreased to zero. Also note that all assigning operation in __Nerv__ is reference copy, you can use `copy_tod` or `copy_toh` method to copy value. Also, row assigning operations like `m1[2]=m2[3]` is forbidden in __Nerv__. -###Class hierarchy### +### Class hierarchy The class hierarchy of the matrix classes can be clearly observed in `matrix/init.c`. First there is a abstract base class __Nerv.Matrix__, which is inherited by __Nerv.CuMatrix__ and __Nerv.MMatrix__(also abstract). Finally, there is __Nerv.CuMatrixFloat__, __Nerv.CuMatrixDouble__, inheriting __Nerv.CuMatrix__, and __Nerv.MMatrixFloat__, __Nerv.MMatrixDouble__, __Nerv.MMatrixInt__ , inheriting __Nerv.MMatrix__. -##Methods## +## Methods Mind that usually a matrix object can only do calculation with matrix of its own type(a __Nerv.CuMatrixFloat__ matrix can only do add operation with a __Nerv.CuMatrixFloat__). In the methods description below, __Matrix__ could be __Nerv.CuMatrixFloat__, __Nerv.CuMatrixDouble__, __Nerv.MMatrixFloat__ or __Nerv.MMatrixDouble__. __Element_type__ could be `float` or `double`, respectively. * __Matrix = Matrix(int nrow, int ncol)__ @@ -53,6 +53,8 @@ Return a new __Matrix__ of size (1,`self.ncol`), which stores the sum of all col Return a new __Matrix__ of size (`self.nrow`,1), which stores the sum of all rows of __Matrix__ `self`. * __Matrix Matrix.rowmax(Matrix self)__ Return a new __Matrix__ of size (`self.nrow`,1), which stores the max value of all rows of __Matrix__ `self`. +* __Matrix Matrix.rowmax_idx(Matrix self)__ +Return two new __Matrix__ of size (`self.nrow`,1), which stores the max value of all rows of __Matrix__ `self`, and its corresponding column indices(start from zero). * __Matrix Matrix.trans(Matrix self)__ Return a new __Matrix__ of size (`self.ncol`,`self.nrow`), which stores the transpose of __Matrix__ `self`. * __void Matrix.copy_fromh(Matrix self, MMatrix a)__ @@ -81,8 +83,8 @@ Fill the content of __Matrix__ `self` to be `value`. Set the element of __Matrix__ `self` to be elementwise-sigmoid of `ma`. * __void Matrix.sigmoid_grad(Matrix self, Matrix err, Matrix output)__ Set the element of __Matrix__ `self`, to be `self[i][j]=err[i][j]*output[i][j]*(1-output[i][j])`. This function is used to propagate sigmoid layer error. -* __void Matrix.softmax(Matrix self, Matrix a)__ -Calculate a row-by-row softmax of __Matrix__ `a` and save the result in `self`. +* __Matrix Matrix.softmax(Matrix self, Matrix a)__ +Calculate a row-by-row softmax of __Matrix__ `a` and save the result in `self`. Returns a new `self.nrow*1` index matrix that stores the index of the maximum value of each row. * __void Matrix.mul_elem(Matrix self, Matrix ma, Matrix mb)__ Calculate element-wise multiplication of __Matrix__ `ma` and `mb`, store the result in `self`. * __void Matrix.log_elem(Matrix self, Matrix ma)__ @@ -113,7 +115,7 @@ Write `self` to the file position in `chunk`. * __void MMatrix.copy_from(MMatrix ma, MMatrix mb,[int b_bgein, int b_end, int a_begin])__ Copy a part of `mb`(rows of index `[b_begin..b_end)`) to `ma` beginning at row index `a_begin`. If not specified, `b_begin` will be `0`, `b_end` will be `b.nrow`, `a_begin` will be `0`. -##Examples## +## Examples * Use `get_dataref_value` to test __Nerv__'s matrix space allocation. ``` m = 10 @@ -134,6 +136,7 @@ print("test fm:get_dataref_value:", fm:get_dataref_value()) print(fm) print(dm) ``` + * Test some __Matrix__ calculations. ``` m = 4 @@ -167,3 +170,4 @@ print(a) a:log_elem(fs) print(a) ``` + diff --git a/nerv/doc/nerv_nn.md b/nerv/doc/nerv_nn.md index c57447d..63537fb 100644 --- a/nerv/doc/nerv_nn.md +++ b/nerv/doc/nerv_nn.md @@ -1,19 +1,19 @@ -#The Nerv NN Package# +# The Nerv NN Package Part of the [Nerv](../README.md) toolkit. -##Description## -###Class hierarchy### +## Description +### Class hierarchy it contains __nerv.LayerRepo__, __nerv.ParamRepo__, and __nerv.DAGLayer__(inherits __nerv.Layer__). -###Class hierarchy and their members### -####nerv.ParamRepo#### +### Class hierarchy and their members +#### nerv.ParamRepo Get parameter object by ID. * `table param_table` Contains the mapping of parameter ID to parameter file(__nerv.ChunkFile__) * __nerv.LayerRepo__ Get layer object by ID. * `table layers` Contains the mapping of layer ID to layer object. objects. -####__nerv.DAGLayer__#### +#### __nerv.DAGLayer__ Inherits __nerv.Layer__. * `layers`: __table__, a mapping from a layer ID to its "ref". A ref is a structure that contains reference to space allocations and other info of the layer. * `inputs`: __table__, a mapping from the inputs ports of the DAG layer to the input ports of the sublayer, the key is the port number, the value is `{ref, port}`. @@ -21,17 +21,17 @@ Inherits __nerv.Layer__. * `parsed_conn`: __table__, a list of parsed connections, each entry is of format `{{ref_from, port_from}, {ref_to, port_to}}`. * `queue`: __table__, a list of "ref"s, the propagation of the DAGLayer will follow this order, and back-propagation will follow a reverse order. -##Methods## +## Methods -###__nerv.ParamRepo__### +### __nerv.ParamRepo__ -####nerv.ParamRepo:\_\_init(param\_files)#### +#### nerv.ParamRepo:\_\_init(param\_files) * Parameters: `param_files`: __table__ * Description: `param_files` is a list of file names that stores parameters, the newed __ParamRepo__ will read them from file and store the mapping for future fetching. -####nerv.Param ParamRepo.get_param(ParamRepo self, string pid, table global_conf)#### +#### nerv.Param ParamRepo.get_param(ParamRepo self, string pid, table global_conf) * Returns: __nerv.Layer__ * Parameters: @@ -41,8 +41,8 @@ Inherits __nerv.Layer__. * Description: __ParamRepo__ will find the __nerv.ChunkFile__ `pf` that contains parameter of ID `pid` and return `pf:read_chunk(pid, global_conf)`. -###__nerv.LayerRepo__### -####nerv.LayerRepo:\_\_init(layer\_spec, param\_repo, global\_conf)#### +### __nerv.LayerRepo__ +#### nerv.LayerRepo:\_\_init(layer\_spec, param\_repo, global\_conf) * Returns: __nerv.LayerRepo__. * Parameters: @@ -60,7 +60,7 @@ Inherits __nerv.Layer__. __LayerRepo__ will merge `param_config` into `layer_config` and construct a layer by calling `layer_type(layerid, global_conf, layer_config)`. -####nerv.LayerRepo.get\_layer(self, lid)#### +#### nerv.LayerRepo.get\_layer(self, lid) * Returns: __nerv.LayerRepo__, the layer with ID `lid`. * Parameters: @@ -69,8 +69,8 @@ Inherits __nerv.Layer__. * Description: Returns the layer with ID `lid`. -###nerv.DAGLayer### -####nerv.DAGLayer:\_\_init(id, global\_conf, layer\_conf)#### +### nerv.DAGLayer +#### nerv.DAGLayer:\_\_init(id, global\_conf, layer\_conf) * Returns: __nerv.DAGLayer__ * Parameters: @@ -89,7 +89,7 @@ Inherits __nerv.Layer__. }}) ``` -####nerv.DAGLayer.init(self, batch\_size)#### +#### nerv.DAGLayer.init(self, batch\_size) * Parameters: `self`: __nerv.DAGLayer__ `batch_size`: __int__ @@ -97,7 +97,7 @@ Inherits __nerv.Layer__. This initialization method will allocate space for output and input matrice, and will call `init()` for each of its sub layers. -####nerv.DAGLayer.propagate(self, input, output)#### +#### nerv.DAGLayer.propagate(self, input, output) * Parameters: `self`: __nerv.DAGLayer__ `input`: __table__ @@ -105,7 +105,7 @@ Inherits __nerv.Layer__. * Description: The same function as __nerv.Layer.propagate__, do propagation for each layer in the order of `self.queue`. -####nerv.DAGLayer.back\_propagate(self, next\_bp\_err, bp\_err, input, output)#### +#### nerv.DAGLayer.back\_propagate(self, next\_bp\_err, bp\_err, input, output) * Parameters: `self`: __nerv.DAGLayer__ `next_bp_err`: __table__ @@ -115,7 +115,7 @@ Inherits __nerv.Layer__. * Description: The same function as __nerv.Layer.back_propagate__, do back-propagation for each layer in the reverse order of `self.queue`. -####nerv.DAGLayer.update(self, bp\_err, input, output)#### +#### nerv.DAGLayer.update(self, bp\_err, input, output) * Parameters: `self`: __nerv.DAGLayer__ `bp_err`: __table__ @@ -124,7 +124,7 @@ Inherits __nerv.Layer__. * Description: The same function as __nerv.Layer.update__, do update for each layer in the order of `self.queue`. -##Examples## +## Examples * aaa ``` @@ -253,4 +253,5 @@ for l = 0, 10, 1 do ce_last = softmaxL.total_ce end --[[end training]]-- -```
\ No newline at end of file +``` + diff --git a/nerv/doc/nerv_param.md b/nerv/doc/nerv_param.md index 167cb11..98793f0 100644 --- a/nerv/doc/nerv_param.md +++ b/nerv/doc/nerv_param.md @@ -1,17 +1,17 @@ -#The Nerv Parameter Package# +# The Nerv Parameter Package Part of the [Nerv](../README.md) toolkit. -##Description## -###Class hierarchy### +## Description +### Class hierarchy There is a base class __Nerv.Param__ defined in `layer/init.lua`. -###Class hierarchy and their members### +### Class hierarchy and their members * __nerv.MatrixParam__ inherits __nerv.Param__ * `Matrix trans` stores the parameter matrix. * __nerv.LinearTransParam__ inherits __Nerv.MatrixParam__. * __Nerv.BiasParam__ inherits __Nerv.MatrixParam__. -##Methods## +## Methods * __void Param.\_\_init(Param self, string id, table global_conf)__ Constructor of a __Param__, it will set `self.id` to be `id` and `self.gconf` to be `global_conf`. * __void Param.set_info(Param self, table info)__ diff --git a/nerv/examples/asr_trainer.lua b/nerv/examples/asr_trainer.lua index 3fa2653..5bf28bd 100644 --- a/nerv/examples/asr_trainer.lua +++ b/nerv/examples/asr_trainer.lua @@ -1,17 +1,33 @@ -function build_trainer(ifname) - local param_repo = nerv.ParamRepo() - param_repo:import(ifname, nil, gconf) - local layer_repo = make_layer_repo(param_repo) - local network = get_network(layer_repo) - local global_transf = get_global_transf(layer_repo) - local input_order = get_input_order() +require 'lfs' +require 'pl' +local function build_trainer(ifname) + local host_param_repo = nerv.ParamRepo() local mat_type + local src_loc_type + local train_loc_type + host_param_repo:import(ifname, nil, gconf) if gconf.use_cpu then mat_type = gconf.mmat_type + src_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST + train_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST else mat_type = gconf.cumat_type + src_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST + train_loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE end - local iterative_trainer = function (prefix, scp_file, bp) + local param_repo = host_param_repo:copy(train_loc_type) + local layer_repo = make_layer_repo(param_repo) + local network = get_network(layer_repo) + local global_transf = get_global_transf(layer_repo) + local input_order = get_input_order() + local iterative_trainer = function (prefix, scp_file, bp, rebind_param_repo) + -- rebind the params if necessary + if rebind_param_repo then + host_param_repo = rebind_param_repo + param_repo = host_param_repo:copy(train_loc_type) + layer_repo:rebind(param_repo) + rebind_param_repo = nil + end gconf.randomize = bp -- build buffer local buffer = make_buffer(make_readers(scp_file, layer_repo)) @@ -64,61 +80,193 @@ function build_trainer(ifname) print_stat(layer_repo) mat_type.print_profile() mat_type.clear_profile() - if (not bp) and prefix ~= nil then - nerv.info("writing back...") - local fname = string.format("%s_cv%.3f.nerv", - prefix, get_accuracy(layer_repo)) - network:get_params():export(fname, nil) + local fname + if (not bp) then + host_param_repo = param_repo:copy(src_loc_type) + if prefix ~= nil then + nerv.info("writing back...") + fname = string.format("%s_cv%.3f.nerv", + prefix, get_accuracy(layer_repo)) + host_param_repo:export(fname, nil) + end end - return get_accuracy(layer_repo) + return get_accuracy(layer_repo), host_param_repo, fname end return iterative_trainer end -dofile(arg[1]) -start_halving_inc = 0.5 -halving_factor = 0.6 -end_halving_inc = 0.1 -min_iter = 1 -max_iter = 20 -min_halving = 5 -gconf.batch_size = 256 -gconf.buffer_size = 81920 +local function check_and_add_defaults(spec, opts) + local function get_opt_val(k) + return opts[string.gsub(k, '_', '-')].val + end + local opt_v = get_opt_val("resume_from") + if opt_v then + gconf = dofile(opt_v) + else + for k, v in pairs(spec) do + local opt_v = get_opt_val(k) + if opt_v ~= nil then + gconf[k] = opt_v + elseif gconf[k] ~= nil then + elseif v ~= nil then + gconf[k] = v + end + end + end +end -local pf0 = gconf.initialized_param -local trainer = build_trainer(pf0) ---local trainer = build_trainer("c3.nerv") -local accu_best = trainer(nil, gconf.cv_scp, false) -local do_halving = false - -nerv.info("initial cross validation: %.3f", accu_best) -for i = 1, max_iter do - nerv.info("[NN] begin iteration %d with lrate = %.6f", i, gconf.lrate) - local accu_tr = trainer(nil, gconf.tr_scp, true) - nerv.info("[TR] training set %d: %.3f", i, accu_tr) - local accu_new = trainer( - string.format("%s_%s_iter_%d_lr%f_tr%.3f", - string.gsub( - (string.gsub(pf0[1], "(.*/)(.*)", "%2")), - "(.*)%..*", "%1"), - os.date("%Y%m%d%H%M%S"), - i, gconf.lrate, - accu_tr), - gconf.cv_scp, false) - nerv.info("[CV] cross validation %d: %.3f", i, accu_new) - -- TODO: revert the weights - local accu_diff = accu_new - accu_best - if do_halving and accu_diff < end_halving_inc and i > min_iter then - break +local function make_options(spec) + local options = {} + for k, v in pairs(spec) do + table.insert(options, + {string.gsub(k, '_', '-'), nil, type(v), default = v}) end - if accu_diff < start_halving_inc and i >= min_halving then - do_halving = true + return options +end + +local function print_help(options) + nerv.printf("Usage: <asr_trainer.lua> [options] network_config.lua\n") + nerv.print_usage(options) +end + +local function print_gconf() + local key_maxlen = 0 + for k, v in pairs(gconf) do + key_maxlen = math.max(key_maxlen, #k or 0) end - if do_halving then - gconf.lrate = gconf.lrate * halving_factor + local function pattern_gen() + return string.format("%%-%ds = %%s\n", key_maxlen) end - if accu_new > accu_best then - accu_best = accu_new + nerv.info("ready to train with the following gconf settings:") + nerv.printf(pattern_gen(), "Key", "Value") + for k, v in pairs(gconf) do + nerv.printf(pattern_gen(), k or "", v or "") end +end + +local function dump_gconf(fname) + local f = io.open(fname, "w") + f:write("return ") + f:write(table.tostring(gconf)) + f:close() +end + +local trainer_defaults = { + lrate = 0.8, + batch_size = 256, + buffer_size = 81920, + wcost = 1e-6, + momentum = 0.9, + start_halving_inc = 0.5, + halving_factor = 0.6, + end_halving_inc = 0.1, + cur_iter = 1, + min_iter = 1, + max_iter = 20, + min_halving = 5, + do_halving = false, + cumat_tname = "nerv.CuMatrixFloat", + mmat_tname = "nerv.MMatrixFloat", + debug = false, +} + +local options = make_options(trainer_defaults) +local extra_opt_spec = { + {"tr-scp", nil, "string"}, + {"cv-scp", nil, "string"}, + {"resume-from", nil, "string"}, + {"help", "h", "boolean", default = false, desc = "show this help information"}, + {"dir", nil, "string", desc = "specify the working directory"}, +} + +table.extend(options, extra_opt_spec) + +arg, opts = nerv.parse_args(arg, options) + +if #arg < 1 or opts["help"].val then + print_help(options) + return +end + +dofile(arg[1]) + +--[[ + +Rule: command-line option overrides network config overrides trainer default. +Note: config key like aaa_bbbb_cc could be overriden by specifying +--aaa-bbbb-cc to command-line arguments. + +]]-- + +check_and_add_defaults(trainer_defaults, opts) +gconf.mmat_type = nerv.get_type(gconf.mmat_tname) +gconf.cumat_type = nerv.get_type(gconf.cumat_tname) +gconf.use_cpu = econf.use_cpu or false + +local pf0 = gconf.initialized_param +local date_pattern = "%Y%m%d%H%M%S" +local logfile_name = "log" +local working_dir = opts["dir"].val or string.format("nerv_%s", os.date(date_pattern)) +local rebind_param_repo = nil + +print_gconf() +if not lfs.mkdir(working_dir) then + nerv.error("[asr_trainer] working directory already exists") +end +-- copy the network config +dir.copyfile(arg[1], working_dir) +-- set logfile path +nerv.set_logfile(path.join(working_dir, logfile_name)) +path.chdir(working_dir) + +-- start the training +local trainer = build_trainer(pf0) +local pr_prev +gconf.accu_best, pr_prev = trainer(nil, gconf.cv_scp, false) +nerv.info("initial cross validation: %.3f", gconf.accu_best) +for i = gconf.cur_iter, gconf.max_iter do + local stop = false + gconf.cur_iter = i + dump_gconf(string.format("iter_%d.meta", i)) + repeat -- trick to implement `continue` statement + nerv.info("[NN] begin iteration %d with lrate = %.6f", i, gconf.lrate) + local accu_tr = trainer(nil, gconf.tr_scp, true, rebind_param_repo) + nerv.info("[TR] training set %d: %.3f", i, accu_tr) + local param_prefix = string.format("%s_%s_iter_%d_lr%f_tr%.3f", + string.gsub( + (string.gsub(pf0[1], "(.*/)(.*)", "%2")), + "(.*)%..*", "%1"), + os.date(date_pattern), + i, gconf.lrate, + accu_tr) + local accu_new, pr_new, param_fname = trainer(param_prefix, gconf.cv_scp, false) + nerv.info("[CV] cross validation %d: %.3f", i, accu_new) + local accu_prev = gconf.accu_best + if accu_new < gconf.accu_best then + nerv.info("rejecting the trained params, rollback to the previous one") + file.move(param_fname, param_fname .. ".rejected") + rebind_param_repo = pr_prev + break -- `continue` equivalent + else + nerv.info("accepting the trained params") + gconf.accu_best = accu_new + pr_prev = pr_new + gconf.initialized_param = {path.join(path.currentdir(), param_fname)} + end + if gconf.do_halving and + gconf.accu_best - accu_prev < gconf.end_halving_inc and + i > gconf.min_iter then + stop = true + break + end + if gconf.accu_best - accu_prev < gconf.start_halving_inc and + i >= gconf.min_halving then + gconf.do_halving = true + end + if gconf.do_halving then + gconf.lrate = gconf.lrate * gconf.halving_factor + end + until true + if stop then break end -- nerv.Matrix.print_profile() end diff --git a/nerv/examples/swb_baseline.lua b/nerv/examples/swb_baseline.lua index 51052ba..0ce8468 100644 --- a/nerv/examples/swb_baseline.lua +++ b/nerv/examples/swb_baseline.lua @@ -1,7 +1,5 @@ require 'htk_io' gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, - cumat_type = nerv.CuMatrixFloat, - mmat_type = nerv.MMatrixFloat, rearrange = true, -- just to make the context order consistent with old results, deprecated frm_ext = 5, frm_trim = 5, -- trim the first and last 5 frames, TNet just does this, deprecated @@ -9,8 +7,7 @@ gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp", htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", initialized_param = {"/slfs1/users/mfy43/swb_init.nerv", - "/slfs1/users/mfy43/swb_global_transf.nerv"}, - debug = false} + "/slfs1/users/mfy43/swb_global_transf.nerv"}} function make_layer_repo(param_repo) local layer_repo = nerv.LayerRepo( @@ -18,51 +15,51 @@ function make_layer_repo(param_repo) -- global transf ["nerv.BiasLayer"] = { - blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, - blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} + blayer1 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias1"}}, + blayer2 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias2"}} }, ["nerv.WindowLayer"] = { - wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, - wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} + wlayer1 = {dim_in = {429}, dim_out = {429}, params = {window = "window1"}}, + wlayer2 = {dim_in = {429}, dim_out = {429}, params = {window = "window2"}} }, -- biased linearity ["nerv.AffineLayer"] = { - affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, - {dim_in = {429}, dim_out = {2048}}}, - affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, - {dim_in = {2048}, dim_out = {3001}}} + affine0 = {dim_in = {429}, dim_out = {2048}, + params = {ltp = "affine0_ltp", bp = "affine0_bp"}}, + affine1 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine1_ltp", bp = "affine1_bp"}}, + affine2 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine2_ltp", bp = "affine2_bp"}}, + affine3 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine3_ltp", bp = "affine3_bp"}}, + affine4 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine4_ltp", bp = "affine4_bp"}}, + affine5 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine5_ltp", bp = "affine5_bp"}}, + affine6 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine6_ltp", bp = "affine6_bp"}}, + affine7 = {dim_in = {2048}, dim_out = {3001}, + params = {ltp = "affine7_ltp", bp = "affine7_bp"}} }, ["nerv.SigmoidLayer"] = { - sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} + sigmoid0 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid1 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid2 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid3 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid4 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid5 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid6 = {dim_in = {2048}, dim_out = {2048}} }, ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output { - ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}} + ce_crit = {dim_in = {3001, 1}, dim_out = {1}, compressed = true} }, ["nerv.SoftmaxLayer"] = -- softmax for decode output { - softmax = {{}, {dim_in = {3001}, dim_out = {3001}}} + softmax = {dim_in = {3001}, dim_out = {3001}} } }, param_repo, gconf) @@ -70,7 +67,7 @@ function make_layer_repo(param_repo) { ["nerv.DAGLayer"] = { - global_transf = {{}, { + global_transf = { dim_in = {429}, dim_out = {429}, sub_layers = layer_repo, connections = { @@ -80,8 +77,8 @@ function make_layer_repo(param_repo) ["blayer2[1]"] = "wlayer2[1]", ["wlayer2[1]"] = "<output>[1]" } - }}, - main = {{}, { + }, + main = { dim_in = {429}, dim_out = {3001}, sub_layers = layer_repo, connections = { @@ -102,7 +99,7 @@ function make_layer_repo(param_repo) ["sigmoid6[1]"] = "affine7[1]", ["affine7[1]"] = "<output>[1]" } - }} + } } }, param_repo, gconf) @@ -110,7 +107,7 @@ function make_layer_repo(param_repo) { ["nerv.DAGLayer"] = { - ce_output = {{}, { + ce_output = { dim_in = {429, 1}, dim_out = {1}, sub_layers = layer_repo, connections = { @@ -119,8 +116,8 @@ function make_layer_repo(param_repo) ["<input>[2]"] = "ce_crit[2]", ["ce_crit[1]"] = "<output>[1]" } - }}, - softmax_output = {{}, { + }, + softmax_output = { dim_in = {429}, dim_out = {3001}, sub_layers = layer_repo, connections = { @@ -128,7 +125,7 @@ function make_layer_repo(param_repo) ["main[1]"] = "softmax[1]", ["softmax[1]"] = "<output>[1]" } - }} + } } }, param_repo, gconf) @@ -173,6 +170,7 @@ function make_buffer(readers) return nerv.SGDBuffer(gconf, { buffer_size = gconf.buffer_size, + batch_size = gconf.batch_size, randomize = gconf.randomize, readers = readers, use_gpu = true @@ -184,6 +182,10 @@ function get_input_order() {id = "phone_state"}} end +function get_decode_input_order() + return {{id = "main_scp", global_transf = true}} +end + function get_accuracy(layer_repo) local ce_crit = layer_repo:get_layer("ce_crit") return ce_crit.total_correct / ce_crit.total_frames * 100 diff --git a/nerv/examples/swb_baseline2.lua b/nerv/examples/swb_baseline2.lua new file mode 100644 index 0000000..8b5ebb1 --- /dev/null +++ b/nerv/examples/swb_baseline2.lua @@ -0,0 +1,203 @@ +require 'htk_io' +gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, + rearrange = true, -- just to make the context order consistent with old results, deprecated + frm_ext = 5, + frm_trim = 5, -- trim the first and last 5 frames, TNet just does this, deprecated + tr_scp = "/speechlab/users/mfy43/swb50/train_bp.scp", + cv_scp = "/speechlab/users/mfy43/swb50/train_cv.scp", + htk_conf = "/speechlab/users/mfy43/swb50/plp_0_d_a.conf", + initialized_param = {"/speechlab/users/mfy43/swb50/swb_init.nerv", + "/speechlab/users/mfy43/swb50/swb_global_transf.nerv"}} + +function make_layer_repo(param_repo) + local layer_repo = nerv.LayerRepo( + { + -- global transf + ["nerv.BiasLayer"] = + { + blayer1 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias1"}}, + blayer2 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias2"}} + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {dim_in = {429}, dim_out = {429}, params = {window = "window1"}}, + wlayer2 = {dim_in = {429}, dim_out = {429}, params = {window = "window2"}} + }, + -- biased linearity + ["nerv.AffineLayer"] = + { + affine0 = {dim_in = {429}, dim_out = {2048}, + params = {ltp = "affine0_ltp", bp = "affine0_bp"}}, + affine1 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine1_ltp", bp = "affine1_bp"}}, + affine2 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine2_ltp", bp = "affine2_bp"}}, + affine3 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine3_ltp", bp = "affine3_bp"}}, + affine4 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine4_ltp", bp = "affine4_bp"}}, + affine5 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine5_ltp", bp = "affine5_bp"}}, + affine6 = {dim_in = {2048}, dim_out = {2048}, + params = {ltp = "affine6_ltp", bp = "affine6_bp"}}, + affine7 = {dim_in = {2048}, dim_out = {3001}, + params = {ltp = "affine7_ltp", bp = "affine7_bp"}} + }, + ["nerv.SigmoidLayer"] = + { + sigmoid0 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid1 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid2 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid3 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid4 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid5 = {dim_in = {2048}, dim_out = {2048}}, + sigmoid6 = {dim_in = {2048}, dim_out = {2048}} + }, + ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output + { + ce_crit = {dim_in = {3001, 1}, dim_out = {1}, compressed = true} + }, + ["nerv.SoftmaxLayer"] = -- softmax for decode output + { + softmax = {dim_in = {3001}, dim_out = {3001}} + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + global_transf = { + dim_in = {429}, dim_out = {429}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "blayer2[1]", + ["blayer2[1]"] = "wlayer2[1]", + ["wlayer2[1]"] = "<output>[1]" + } + }, + main = { + dim_in = {429}, dim_out = {3001}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "affine0[1]", + ["affine0[1]"] = "sigmoid0[1]", + ["sigmoid0[1]"] = "affine1[1]", + ["affine1[1]"] = "sigmoid1[1]", + ["sigmoid1[1]"] = "affine2[1]", + ["affine2[1]"] = "sigmoid2[1]", + ["sigmoid2[1]"] = "affine3[1]", + ["affine3[1]"] = "sigmoid3[1]", + ["sigmoid3[1]"] = "affine4[1]", + ["affine4[1]"] = "sigmoid4[1]", + ["sigmoid4[1]"] = "affine5[1]", + ["affine5[1]"] = "sigmoid5[1]", + ["sigmoid5[1]"] = "affine6[1]", + ["affine6[1]"] = "sigmoid6[1]", + ["sigmoid6[1]"] = "affine7[1]", + ["affine7[1]"] = "<output>[1]" + } + } + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + ce_output = { + dim_in = {429, 1}, dim_out = {1}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "main[1]", + ["main[1]"] = "ce_crit[1]", + ["<input>[2]"] = "ce_crit[2]", + ["ce_crit[1]"] = "<output>[1]" + } + }, + softmax_output = { + dim_in = {429}, dim_out = {3001}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "main[1]", + ["main[1]"] = "softmax[1]", + ["softmax[1]"] = "<output>[1]" + } + } + } + }, param_repo, gconf) + + return layer_repo +end + +function get_network(layer_repo) + return layer_repo:get_layer("ce_output") +end + +function get_decode_network(layer_repo) + return layer_repo:get_layer("softmax_output") +end + +function get_global_transf(layer_repo) + return layer_repo:get_layer("global_transf") +end + +function make_readers(scp_file, layer_repo) + return { + {reader = nerv.TNetReader(gconf, + { + id = "main_scp", + scp_file = scp_file, + conf_file = gconf.htk_conf, + frm_ext = gconf.frm_ext, + mlfs = { + phone_state = { + file = "/speechlab/users/mfy43/swb50/ref.mlf", + format = "map", + format_arg = "/speechlab/users/mfy43/swb50/dict", + dir = "*/", + ext = "lab" + } + } + }), + data = {main_scp = 429, phone_state = 1}} + } +end + +function make_buffer(readers) + return nerv.SGDBuffer(gconf, + { + buffer_size = gconf.buffer_size, + batch_size = gconf.batch_size, + randomize = gconf.randomize, + readers = readers, + use_gpu = true + }) +end + +function get_input_order() + return {{id = "main_scp", global_transf = true}, + {id = "phone_state"}} +end + +function get_decode_input_order() + return {{id = "main_scp", global_transf = true}} +end + +function get_accuracy(layer_repo) + local ce_crit = layer_repo:get_layer("ce_crit") + return ce_crit.total_correct / ce_crit.total_frames * 100 +end + +function print_stat(layer_repo) + local ce_crit = layer_repo:get_layer("ce_crit") + nerv.info("*** training stat begin ***") + nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce) + nerv.printf("correct:\t\t%d\n", ce_crit.total_correct) + nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames) + nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames) + nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo)) + nerv.info("*** training stat end ***") +end diff --git a/nerv/examples/swb_baseline_basic.lua b/nerv/examples/swb_baseline_basic.lua deleted file mode 100644 index 71f04a3..0000000 --- a/nerv/examples/swb_baseline_basic.lua +++ /dev/null @@ -1,162 +0,0 @@ -require 'htk_io' -gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, - cumat_type = nerv.CuMatrixFloat, - mmat_type = nerv.MMatrixFloat, - frm_ext = 5, - frm_trim = 5, - tr_scp = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", - cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp", - htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", - initialized_param = {"/slfs1/users/mfy43/swb_init.nerv", - "/slfs1/users/mfy43/swb_global_transf.nerv"}, - debug = false} - -function make_layer_repo(param_repo) - local layer_repo = nerv.LayerRepo( - { - -- global transf - ["nerv.BiasLayer"] = - { - blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, - blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} - }, - ["nerv.WindowLayer"] = - { - wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, - wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} - }, - -- biased linearity - ["nerv.AffineLayer"] = - { - affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"}, - {dim_in = {429}, dim_out = {2048}}}, - affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"}, - {dim_in = {2048}, dim_out = {2048}}}, - affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"}, - {dim_in = {2048}, dim_out = {3001}}} - }, - ["nerv.SigmoidLayer"] = - { - sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}}, - sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}} - }, - ["nerv.SoftmaxCELayer"] = - { - ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}} - } - }, param_repo, gconf) - - layer_repo:add_layers( - { - ["nerv.DAGLayer"] = - { - global_transf = {{}, { - dim_in = {429}, dim_out = {429}, - sub_layers = layer_repo, - connections = { - ["<input>[1]"] = "blayer1[1]", - ["blayer1[1]"] = "wlayer1[1]", - ["wlayer1[1]"] = "blayer2[1]", - ["blayer2[1]"] = "wlayer2[1]", - ["wlayer2[1]"] = "<output>[1]" - } - }}, - main = {{}, { - dim_in = {429, 1}, dim_out = {1}, - sub_layers = layer_repo, - connections = { - ["<input>[1]"] = "affine0[1]", - ["affine0[1]"] = "sigmoid0[1]", - ["sigmoid0[1]"] = "affine1[1]", - ["affine1[1]"] = "sigmoid1[1]", - ["sigmoid1[1]"] = "affine2[1]", - ["affine2[1]"] = "sigmoid2[1]", - ["sigmoid2[1]"] = "affine3[1]", - ["affine3[1]"] = "sigmoid3[1]", - ["sigmoid3[1]"] = "affine4[1]", - ["affine4[1]"] = "sigmoid4[1]", - ["sigmoid4[1]"] = "affine5[1]", - ["affine5[1]"] = "sigmoid5[1]", - ["sigmoid5[1]"] = "affine6[1]", - ["affine6[1]"] = "sigmoid6[1]", - ["sigmoid6[1]"] = "affine7[1]", - ["affine7[1]"] = "ce_crit[1]", - ["<input>[2]"] = "ce_crit[2]", - ["ce_crit[1]"] = "<output>[1]" - } - }} - } - }, param_repo, gconf) - return layer_repo -end - -function get_network(layer_repo) - return layer_repo:get_layer("main") -end - -function make_readers(scp_file, layer_repo) - return { - {reader = nerv.TNetReader(gconf, - { - id = "main_scp", - scp_file = scp_file, - conf_file = gconf.htk_conf, - frm_ext = gconf.frm_ext, - mlfs = { - phone_state = { - file = "/slfs1/users/mfy43/swb_ivec/ref.mlf", - format = "map", - format_arg = "/slfs1/users/mfy43/swb_ivec/dict", - dir = "*/", - ext = "lab" - } - } - }), - data = {main_scp = 429, phone_state = 1}} - } -end - -function make_buffer(readers) - return nerv.SGDBuffer(gconf, - { - buffer_size = gconf.buffer_size, - randomize = gconf.randomize, - readers = readers - }) -end - -function get_input_order() - return {{id = "main_scp", global_transf = true}, - {id = "phone_state"}} -end - -function get_accuracy(layer_repo) - local ce_crit = layer_repo:get_layer("ce_crit") - return ce_crit.total_correct / ce_crit.total_frames * 100 -end - -function print_stat(layer_repo) - local ce_crit = layer_repo:get_layer("ce_crit") - nerv.info("*** training stat begin ***") - nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce) - nerv.printf("correct:\t\t%d\n", ce_crit.total_correct) - nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames) - nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames) - nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo)) - nerv.info("*** training stat end ***") -end diff --git a/nerv/examples/timit_baseline2.lua b/nerv/examples/timit_baseline2.lua new file mode 100644 index 0000000..2d144b5 --- /dev/null +++ b/nerv/examples/timit_baseline2.lua @@ -0,0 +1,212 @@ +require 'kaldi_io' +gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, frm_ext = 5, + tr_scp = "ark:/speechlab/tools/KALDI/kaldi-master/src/featbin/copy-feats " .. + "scp:/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/train.scp ark:- |", + cv_scp = "ark:/speechlab/tools/KALDI/kaldi-master/src/featbin/copy-feats " .. + "scp:/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/cv.scp ark:- |", + initialized_param = {"/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/nnet_init.nerv", + "/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/nnet_output.nerv", + "/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/nnet_trans.nerv"}, + decode_param = {"/speechlab/users/mfy43/timit/nnet_init_20160229015745_iter_13_lr0.013437_tr72.434_cv58.729.nerv", + "/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/nnet_trans.nerv"}} + +function make_layer_repo(param_repo) + local layer_repo = nerv.LayerRepo( + { + -- global transf + ["nerv.BiasLayer"] = + { + blayer1 = {dim_in = {440}, dim_out = {440}, params = {bias = "bias0"}} + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {dim_in = {440}, dim_out = {440}, params = {window = "window0"}} + }, + -- biased linearity + ["nerv.AffineLayer"] = + { + affine0 = {dim_in = {440}, dim_out = {1024}, + params = {ltp = "affine0_ltp", bp = "affine0_bp"}}, + affine1 = {dim_in = {1024}, dim_out = {1024}, + params = {ltp = "affine1_ltp", bp = "affine1_bp"}}, + affine2 = {dim_in = {1024}, dim_out = {1024}, + params = {ltp = "affine2_ltp", bp = "affine2_bp"}}, + affine3 = {dim_in = {1024}, dim_out = {1024}, + params = {ltp = "affine3_ltp", bp = "affine3_bp"}}, + affine4 = {dim_in = {1024}, dim_out = {1024}, + params = {ltp = "affine4_ltp", bp = "affine4_bp"}}, + affine5 = {dim_in = {1024}, dim_out = {1024}, + params = {ltp = "affine5_ltp", bp = "affine5_bp"}}, + affine6 = {dim_in = {1024}, dim_out = {1959}, + params = {ltp = "affine6_ltp", bp = "affine6_bp"}} + }, + ["nerv.SigmoidLayer"] = + { + sigmoid0 = {dim_in = {1024}, dim_out = {1024}}, + sigmoid1 = {dim_in = {1024}, dim_out = {1024}}, + sigmoid2 = {dim_in = {1024}, dim_out = {1024}}, + sigmoid3 = {dim_in = {1024}, dim_out = {1024}}, + sigmoid4 = {dim_in = {1024}, dim_out = {1024}}, + sigmoid5 = {dim_in = {1024}, dim_out = {1024}} + }, + ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output + { + ce_crit = {dim_in = {1959, 1}, dim_out = {1}, compressed = true} + }, + ["nerv.SoftmaxLayer"] = -- softmax for decode output + { + softmax = {dim_in = {1959}, dim_out = {1959}} + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + global_transf = { + dim_in = {440}, dim_out = {440}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "<output>[1]" + } + }, + main = { + dim_in = {440}, dim_out = {1959}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "affine0[1]", + ["affine0[1]"] = "sigmoid0[1]", + ["sigmoid0[1]"] = "affine1[1]", + ["affine1[1]"] = "sigmoid1[1]", + ["sigmoid1[1]"] = "affine2[1]", + ["affine2[1]"] = "sigmoid2[1]", + ["sigmoid2[1]"] = "affine3[1]", + ["affine3[1]"] = "sigmoid3[1]", + ["sigmoid3[1]"] = "affine4[1]", + ["affine4[1]"] = "sigmoid4[1]", + ["sigmoid4[1]"] = "affine5[1]", + ["affine5[1]"] = "sigmoid5[1]", + ["sigmoid5[1]"] = "affine6[1]", + ["affine6[1]"] = "<output>[1]" + } + } + } + }, param_repo, gconf) + + layer_repo:add_layers( + { + ["nerv.DAGLayer"] = + { + ce_output = { + dim_in = {440, 1}, dim_out = {1}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "main[1]", + ["main[1]"] = "ce_crit[1]", + ["<input>[2]"] = "ce_crit[2]", + ["ce_crit[1]"] = "<output>[1]" + } + }, + softmax_output = { + dim_in = {440}, dim_out = {1959}, + sub_layers = layer_repo, + connections = { + ["<input>[1]"] = "main[1]", + ["main[1]"] = "softmax[1]", + ["softmax[1]"] = "<output>[1]" + } + } + } + }, param_repo, gconf) + + return layer_repo +end + +function get_network(layer_repo) + return layer_repo:get_layer("ce_output") +end + +function get_decode_network(layer_repo) + return layer_repo:get_layer("softmax_output") +end + +function get_global_transf(layer_repo) + return layer_repo:get_layer("global_transf") +end + +function make_readers(scp_file, layer_repo) + return { + {reader = nerv.KaldiReader(gconf, + { + id = "main_scp", + feature_rspecifier = scp_file, + conf_file = gconf.htk_conf, + frm_ext = gconf.frm_ext, + mlfs = { + phone_state = { + targets_rspecifier = "ark:/speechlab/tools/KALDI/kaldi-master/src/bin/ali-to-pdf " .. + "/speechlab/users/mfy43/timit/s5/exp/tri3_ali/final.mdl " .. + "\"ark:gunzip -c /speechlab/users/mfy43/timit/s5/exp/tri3_ali/ali.*.gz |\" " .. + "ark:- | " .. + "/speechlab/tools/KALDI/kaldi-master/src/bin/ali-to-post " .. + "ark:- ark:- |", + format = "map" + } + } + }), + data = {main_scp = 440, phone_state = 1}} + } +end + +function make_decode_readers(scp_file, layer_repo) + return { + {reader = nerv.KaldiReader(gconf, + { + id = "main_scp", + feature_rspecifier = scp_file, + conf_file = gconf.htk_conf, + frm_ext = gconf.frm_ext, + mlfs = {}, + need_key = true + }), + data = {main_scp = 440, phone_state = 1}} + } +end + +function make_buffer(readers) + return nerv.SGDBuffer(gconf, + { + buffer_size = gconf.buffer_size, + batch_size = gconf.batch_size, + randomize = gconf.randomize, + readers = readers, + use_gpu = true + }) +end + +function get_input_order() + return {{id = "main_scp", global_transf = true}, + {id = "phone_state"}} +end + +function get_decode_input_order() + return {{id = "main_scp", global_transf = true}} +end + +function get_accuracy(layer_repo) + local ce_crit = layer_repo:get_layer("ce_crit") + return ce_crit.total_correct / ce_crit.total_frames * 100 +end + +function print_stat(layer_repo) + local ce_crit = layer_repo:get_layer("ce_crit") + nerv.info("*** training stat begin ***") + nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce) + nerv.printf("correct:\t\t%d\n", ce_crit.total_correct) + nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames) + nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames) + nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo)) + nerv.info("*** training stat end ***") +end diff --git a/nerv/init.lua b/nerv/init.lua index e7d668c..da7df29 100644 --- a/nerv/init.lua +++ b/nerv/init.lua @@ -13,6 +13,10 @@ function nerv.error_method_not_implemented() nerv.error("method not implemented"); end +function nerv.set_logfile(filename) + nerv._logfile = io.open(filename, "w") +end + --- Format a string just like `sprintf` in C. -- @param fmt the format string -- @param ... args, the data to be formatted @@ -25,7 +29,13 @@ end -- @param fmt the format string -- @param ... args, the data to be formatted function nerv.printf(fmt, ...) - io.write(nerv.sprintf(fmt, ...)) + local line = nerv.sprintf(fmt, ...) + io.stderr:write(line) + -- duplicate the all output to the log file, if set + if nerv._logfile then + nerv._logfile:write(line) + nerv._logfile:flush() + end end --- Raise an global error with the formatted message. @@ -54,7 +64,7 @@ end function nerv.warning(fmt, ...) nerv.printf( string.format("(%s)[nerv] warning: %s\n", - os.date("%H:%M:%S.%N %F"), fmt), ...) + os.date("%H:%M:%S %F"), fmt), ...) end --- Create a class (Torch-compatible). @@ -88,24 +98,27 @@ function nerv.class(tname, parenttname) end function table.val_to_str(v) - if "string" == type(v) then - v = string.gsub(v, "\n", "\\n") - if string.match(string.gsub(v,"[^'\"]",""), '^"+$') then - return "'" .. v .. "'" + if "string" == type(v) then + v = string.gsub(v, "\n", "\\n") + if string.match(string.gsub(v,"[^'\"]",""), '^"+$') then + return "'" .. v .. "'" + end + return '"' .. string.gsub(v,'"', '\\"') .. '"' + else + return "table" == type(v) and table.tostring(v) or + (("number" == type(v) or + "string" == type(v) or + "boolean" == type(v)) and tostring(v)) or + nil -- failed to serialize end - return '"' .. string.gsub(v,'"', '\\"') .. '"' - else - return "table" == type(v) and table.tostring(v) or - tostring(v) - end end function table.key_to_str (k) - if "string" == type(k) and string.match(k, "^[_%a][_%a%d]*$") then - return k - else - return "[" .. table.val_to_str(k) .. "]" - end + if "string" == type(k) and string.match(k, "^[_%a][_%a%d]*$") then + return k + else + return "[" .. table.val_to_str(k) .. "]" + end end --- Get the string representation of a table, which can be executed as a valid @@ -114,18 +127,18 @@ end -- @return the string representation which will result in a Lua table entity -- when evaluated function table.tostring(tbl) - local result, done = {}, {} - for k, v in ipairs(tbl) do - table.insert(result, table.val_to_str(v)) - done[k] = true - end - for k, v in pairs(tbl) do - if not done[k] then - table.insert(result, - table.key_to_str(k) .. "=" .. table.val_to_str(v)) + local result, done = {}, {} + for k, v in ipairs(tbl) do + table.insert(result, table.val_to_str(v)) + done[k] = true + end + for k, v in pairs(tbl) do + if not done[k] then + table.insert(result, + table.key_to_str(k) .. "=" .. table.val_to_str(v)) + end end - end - return "{" .. table.concat(result, ",") .. "}" + return "{" .. table.concat(result, ",") .. "}" end --- Get the class by name. @@ -172,6 +185,168 @@ function nerv.include(filename) return dofile(nerv.dirname(caller) .. filename) end +--- Parse the command-line options and arguments +-- @param argv the argrument list to parsed +-- @param options The specification of options, should be a list of tables, +-- each one for exactly one available option, say `v`, with `v[1]`, `v[2]`, +-- `v[3]` indicating the full name of the option, the short form of the option +-- (when it is a boolean option) and the type of the value controlled by the +-- option. `default` and `desc` keys can also be specified to set the default +-- value and description of the option. +-- +-- An example of specification: +-- {{"aaa", "a", "boolean", default = false, desc = "an option called aaa"}, +-- {"bbb", "b", "boolean", default = true, desc = "bbb is set to be true if --bbb=no does not present"}, +-- {"ccc", nil, "int", default = 0, desc = "ccc expects an integeral value"}}` +-- +-- @return args, opts The non-option arguments and parsed options. `opts` is +-- again a list of tables, each of which corresponds to one table in parameter +-- `options`. The parsed value could be accessed by `opts["aaa"].val` (which is +-- `true` if "--aaa" or "-a" is specified). +function nerv.parse_args(argv, options, unordered) + local is_opt_exp = "^[-](.*)$" + local sim_opt_exp = "^[-]([a-z]+)$" + local opt_exp = "^[-][-]([^=]+)$" + local opt_with_val_exp = "^[-][-]([^=]+)=([^=]+)$" + local opts = {} + local sopts = {} + local args = {} + local arg_start = false + local function err() + nerv.error("invalid format of option specification") + end + for _, v in ipairs(options) do + if type(v) ~= "table" or + (v[1] == nil and v[2] == nil) or + v[3] == nil then + err() + end + local opt_full = v[1] + local opt_short = v[2] + local opt_type = v[3] + local opt_meta = {type = opt_type, + desc = v.desc or "", + val = v.default} + if opt_short ~= nil then + if type(opt_short) ~= "string" or #opt_short ~= 1 then err() end + if opt_type ~= "boolean" then + nerv.error("only boolean option could have short form") + end + sopts[opt_short] = opt_meta + end + if opt_full ~= nil then + if type(opt_full) ~= "string" then err() end + opts[opt_full] = opt_meta + end + end + for _, token in ipairs(argv) do + if ((not arg_start) or unordered) and token:match(is_opt_exp) then + local k = token:match(sim_opt_exp) + if k then + for c in k:gmatch"." do + if sopts[c] then + sopts[c].val = true + else + nerv.error("invalid option -%s", c) + end + end + else + local k = token:match(opt_exp) + if k then + if opts[k] == nil then + nerv.error("invalid option %s", token) + end + if opts[k].type ~= "boolean" then + nerv.error("invalid option --%s: " .. + "a %s value needs to be specified", + k, opts[k].type) + else + opts[k].val = true + end + else + local k, v = token:match(opt_with_val_exp) + if k then + if opts[k] == nil then + nerv.error("invalid option %s", token) + end + if opts[k].type == "boolean" then + if v == "yes" then + opts[k].val = true + elseif v == "no" then + opts[k].val = false + else + nerv.error("boolean value should be \"yes\" or \"no\"") + end + elseif opts[k].type == "int" then + local t = tonumber(v) + opts[k].val = t + if t == nil or math.floor(t) ~= t then + nerv.error("int value is expected") + end + elseif opts[k].type == "number" then + local t = tonumber(v) + opts[k].val = t + if t == nil then + nerv.error("numeric value is expected") + end + elseif opts[k].type == "string" then + opts[k].val = v + else + nerv.error("unrecognized type %s", opts[k].type) + end + else + nerv.error("unrecognized option %s", token) + end + end + end + else + table.insert(args, token) + arg_start = true + end + end + return args, opts +end + +--- Print usage information of the command-line options +-- @param options the list of options used in `parse_args` +function nerv.print_usage(options) + local full_maxlen = 0 + local type_maxlen = 0 + local default_maxlen = 0 + for _, v in ipairs(options) do + local opt_full = v[1] + local opt_short = v[2] + local opt_type = v[3] + full_maxlen = math.max(full_maxlen, #opt_full or 0) + type_maxlen = math.max(full_maxlen, #opt_type or 0) + default_maxlen = math.max(full_maxlen, #tostring(v.default) or 0) + end + local function pattern_gen() + return string.format("\t%%-%ds\t%%-2s\t%%-%ds\t%%-%ds\t%%s\n", + full_maxlen, type_maxlen, default_maxlen) + end + nerv.printf("\n") + nerv.printf(pattern_gen(), "Option", "Abbr.", "Type", "Default", "Desc.") + for _, v in ipairs(options) do + local opt_full = v[1] + local opt_short = v[2] + local opt_type = v[3] + nerv.printf(pattern_gen(), + (opt_full and '--' .. opt_full) or "", + (opt_short and '-' .. opt_short) or "", + opt_type, + (v.default ~= nil and tostring(v.default)) or "", + v.desc or "") + end + nerv.printf("\n") +end + +function table.extend(tbl1, tbl2) + for _, v in ipairs(tbl2) do + table.insert(tbl1, v) + end +end + -- the following lines trigger the initialization of basic modules nerv.include('matrix/init.lua') diff --git a/nerv/io/sgd_buffer.lua b/nerv/io/sgd_buffer.lua index 3cf4f5a..d78f6d1 100644 --- a/nerv/io/sgd_buffer.lua +++ b/nerv/io/sgd_buffer.lua @@ -2,8 +2,9 @@ local SGDBuffer = nerv.class("nerv.SGDBuffer", "nerv.DataBuffer") function SGDBuffer:__init(global_conf, buffer_conf) self.gconf = global_conf + self.batch_size = buffer_conf.batch_size self.buffer_size = math.floor(buffer_conf.buffer_size / - global_conf.batch_size) * global_conf.batch_size + self.batch_size) * self.batch_size self.randomize = buffer_conf.randomize self.consume = buffer_conf.consume local cumat_type = global_conf.cumat_type @@ -112,11 +113,11 @@ function SGDBuffer:saturate() end self.rand_map = self.perm_gen(self.tail) -- generate shuffled index collectgarbage("collect") - return self.tail >= self.gconf.batch_size + return self.tail >= self.batch_size end function SGDBuffer:get_data() - local batch_size = self.gconf.batch_size + local batch_size = self.batch_size if self.head >= self.tail then -- buffer is empty local t = os.clock() if (not self:saturate()) and (not self.consume) then diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index 4156dde..38743aa 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -8,21 +8,19 @@ local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer') --- A parameter that consists of a single matrix -- @type nerv.MatrixParam +function MatrixParam:check(checker) + -- check trans matrix type + checker(self.trans) +end + --- Read from a file handle. -- @param handle the file handle function MatrixParam:read(handle) self.trans = self.gconf.mmat_type.load(handle) - if not self.gconf.use_cpu then - self.trans = self.gconf.cumat_type.new_from_host(self.trans) - end end function MatrixParam:write(handle) - local trans = self.trans - if not self.gconf.use_cpu then - trans = self.trans:new_to_host() - end - trans:save(handle) + self.trans:save(handle) end function MatrixParam:train_init() @@ -30,6 +28,12 @@ function MatrixParam:train_init() self.correction:fill(0) end +function MatrixParam:copy(copier) + local target = nerv.MatrixParam(self.id, self.gconf) + target.trans = copier(self.trans) + return target +end + function MatrixParam:_update_by_gradient(gradient, alpha, beta) local gconf = self.gconf -- momentum gain @@ -77,25 +81,24 @@ end --- The constructor. function AffineLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - if layer_conf.ltp ~= nil and layer_conf.ltp1 == nil then - layer_conf.ltp1 = layer_conf.ltp - end + nerv.Layer.__init(self, id, global_conf, layer_conf) + self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs + self:bind_params() +end + +function AffineLayer:bind_params() for i = 1, #self.dim_in do local pid = "ltp" .. i local pid_list = i == 1 and {pid, "ltp"} or pid - self["ltp" .. i] = self:find_param(pid_list, layer_conf, global_conf, + self["ltp" .. i] = self:find_param(pid_list, self.lconf, self.gconf, nerv.LinearTransParam, - {self.dim_in[i], self.dim_out[1]}) + {self.dim_in[i], self.dim_out[1]}) end self.ltp = self.ltp1 -- alias of ltp1 - self.bp = self:find_param("bp", layer_conf, global_conf, + self.bp = self:find_param("bp", self.lconf, self.gconf, nerv.BiasParam, {1, self.dim_out[1]}) - self.gconf = global_conf - self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs + end function AffineLayer:init(batch_size) @@ -142,7 +145,7 @@ function AffineLayer:back_propagate(bp_err, next_bp_err, input, output) end function AffineLayer:get_params() - local pr = nerv.ParamRepo({self.ltp1, self.bp}) + local pr = nerv.ParamRepo({self.ltp1, self.bp}, self.loc_type) for i = 2, #self.dim_in do pr:add(self["ltp" .. i].id, self["ltp" .. i]) end diff --git a/nerv/layer/bias.lua b/nerv/layer/bias.lua index 924c3da..191be78 100644 --- a/nerv/layer/bias.lua +++ b/nerv/layer/bias.lua @@ -1,12 +1,15 @@ local BiasLayer = nerv.class("nerv.BiasLayer", "nerv.Layer") function BiasLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.bias = layer_conf.bias - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) + self:bind_params() +end + +function BiasLayer:bind_params() + self.bias = self:find_param("bias", self.lconf, self.gconf, + nerv.BiasParam, + {1, self.dim_out[1]}) end function BiasLayer:init() @@ -28,5 +31,5 @@ function BiasLayer:propagate(input, output) end function BiasLayer:get_params() - return nerv.ParamRepo({self.bias}) + return nerv.ParamRepo({self.bias}, self.loc_type) end diff --git a/nerv/layer/combiner.lua b/nerv/layer/combiner.lua index 22e89a9..028c970 100644 --- a/nerv/layer/combiner.lua +++ b/nerv/layer/combiner.lua @@ -1,16 +1,8 @@ local CombinerLayer = nerv.class('nerv.CombinerLayer', 'nerv.Layer') function CombinerLayer:__init(id, global_conf, layer_conf) - self.id = id + nerv.Layer.__init(self, id, global_conf, layer_conf) self.lambda = layer_conf.lambda - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - if self.gconf.use_cpu then - self.mat_type = self.gconf.mmat_type - else - self.mat_type = self.gconf.cumat_type - end self:check_dim_len(#self.lambda, -1) if #self.dim_in < 1 then nerv.error("no input specified") @@ -20,6 +12,10 @@ function CombinerLayer:__init(id, global_conf, layer_conf) end end +function CombinerLayer:bind_params() + -- do nothing +end + function CombinerLayer:init(batch_size) local dim = self.dim_in[1] for i = 2, #self.dim_in do @@ -66,5 +62,5 @@ function CombinerLayer:back_propagate(bp_err, next_bp_err, input, output) end function CombinerLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/dropout.lua b/nerv/layer/dropout.lua index 42660cc..1a379c9 100644 --- a/nerv/layer/dropout.lua +++ b/nerv/layer/dropout.lua @@ -1,22 +1,18 @@ local DropoutLayer = nerv.class("nerv.DropoutLayer", "nerv.Layer") function DropoutLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - if self.gconf.use_cpu then - self.mat_type = self.gconf.mmat_type - else - self.mat_type = self.gconf.cumat_type - end + nerv.Layer.__init(self, id, global_conf, layer_conf) self.rate = layer_conf.dropout_rate or global_conf.dropout_rate if self.rate == nil then nerv.warning("[DropoutLayer:propagate] dropout rate is not set") end - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out self:check_dim_len(1, 1) -- two inputs: nn output and label end +function DropoutLayer:bind_params() + -- do nothing +end + function DropoutLayer:init(batch_size, chunk_size) if self.dim_in[1] ~= self.dim_out[1] then nerv.error("mismatching dimensions of input and output") @@ -73,5 +69,5 @@ function DropoutLayer:back_propagate(bp_err, next_bp_err, input, output, t) end function DropoutLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua index 8988617..137472b 100644 --- a/nerv/layer/duplicate.lua +++ b/nerv/layer/duplicate.lua @@ -1,10 +1,7 @@ local DuplicateLayer = nerv.class('nerv.DuplicateLayer', 'nerv.Layer') function DuplicateLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, -1) if #self.dim_out < 1 then nerv.error('no output specified') @@ -40,5 +37,5 @@ function DuplicateLayer:update() end function DuplicateLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/elem_mul.lua b/nerv/layer/elem_mul.lua index fe80a3f..f03649b 100644 --- a/nerv/layer/elem_mul.lua +++ b/nerv/layer/elem_mul.lua @@ -1,14 +1,15 @@ local ElemMulLayer = nerv.class('nerv.ElemMulLayer', 'nerv.Layer') function ElemMulLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf + nerv.Layer.__init(self, id, global_conf, layer_conf) -- element-wise multiplication of input[1] and input[2] self:check_dim_len(2, 1) end +function ElemMulLayer:bind_params() + -- do nothing +end + function ElemMulLayer:init(batch_size) if self.dim_in[1] ~= self.dim_in[2] or self.dim_in[1] ~= self.dim_out[1] then @@ -34,5 +35,5 @@ function ElemMulLayer:update(bp_err, input, output) end function ElemMulLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua index 1406eff..5f42fca 100644 --- a/nerv/layer/graph.lua +++ b/nerv/layer/graph.lua @@ -1,10 +1,7 @@ local GraphLayer = nerv.class('nerv.GraphLayer', 'nerv.Layer') function GraphLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf + nerv.Layer.__init(self, id, global_conf, layer_conf) self:graph_init(layer_conf.layer_repo, layer_conf.connections) end @@ -155,5 +152,5 @@ function GraphLayer:get_params() table.insert(param_repos, ref.layer:get_params()) end end - return nerv.ParamRepo.merge(param_repos) + return nerv.ParamRepo.merge(param_repos, self.loc_type) end diff --git a/nerv/layer/gru.lua b/nerv/layer/gru.lua index e81d21a..71718d7 100644 --- a/nerv/layer/gru.lua +++ b/nerv/layer/gru.lua @@ -4,11 +4,7 @@ function GRULayer:__init(id, global_conf, layer_conf) -- input1:x -- input2:h -- input3:c (h^~) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - + nerv.Layer.__init(self, id, global_conf, layer_conf) if self.dim_in[2] ~= self.dim_out[1] then nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)", self.dim_in[2], self.dim_out[1]) @@ -17,7 +13,7 @@ function GRULayer:__init(id, global_conf, layer_conf) -- prepare a DAGLayer to hold the lstm structure local pr = layer_conf.pr if pr == nil then - pr = nerv.ParamRepo() + pr = nerv.ParamRepo({}, self.loc_type) end local function ap(str) @@ -63,7 +59,7 @@ function GRULayer:__init(id, global_conf, layer_conf) }, } - local layerRepo = nerv.LayerRepo(layers, pr, global_conf) + self.lrepo = nerv.LayerRepo(layers, pr, global_conf) local connections = { ["<input>[1]"] = ap("inputXDup[1]"), @@ -97,12 +93,20 @@ function GRULayer:__init(id, global_conf, layer_conf) self.dag = nerv.DAGLayer(self.id, global_conf, {dim_in = self.dim_in, dim_out = self.dim_out, - sub_layers = layerRepo, + sub_layers = self.lrepo, connections = connections}) self:check_dim_len(2, 1) -- x, h and h end +function GRULayer:bind_params() + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo({}, self.loc_type) + end + self.lrepo:rebind(pr) +end + function GRULayer:init(batch_size, chunk_size) self.dag:init(batch_size, chunk_size) end diff --git a/nerv/layer/identity.lua b/nerv/layer/identity.lua index aeeff89..d56337d 100644 --- a/nerv/layer/identity.lua +++ b/nerv/layer/identity.lua @@ -1,10 +1,7 @@ local IdentityLayer = nerv.class('nerv.IdentityLayer', 'nerv.Layer') function IdentityLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) if self.dim_in[1] ~= self.dim_out[1] then nerv.error('mismatching dimensions of input and output') @@ -29,5 +26,5 @@ function IdentityLayer:update() end function IdentityLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua index 4fabefa..475ef62 100644 --- a/nerv/layer/init.lua +++ b/nerv/layer/init.lua @@ -30,7 +30,18 @@ end local Layer = nerv.class('nerv.Layer') function Layer:__init(id, global_conf, layer_conf) - nerv.error_method_not_implemented() + self.id = id + self.gconf = global_conf + self.lconf = layer_conf + if self.gconf.use_cpu then + self.mat_type = self.gconf.mmat_type + self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST + else + self.mat_type = self.gconf.cumat_type + self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE + end + self.dim_in = layer_conf.dim_in + self.dim_out = layer_conf.dim_out end function Layer:init(batch_size) @@ -66,6 +77,10 @@ function Layer:get_params() nerv.error_method_not_implemented() end +function Layer:bind_params() + nerv.error_method_not_implemented() +end + function Layer:get_dim() return self.dim_in, self.dim_out end @@ -78,30 +93,33 @@ function Layer:get_sublayer(id) nerv.error('primitive layer does not have sublayers') end -function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim) - if type(pid_list) == "string" then - pid_list = {pid_list} +function Layer:find_param(plist, lconf, gconf, p_type, p_dim) + if type(plist) == "string" then + plist = {plist} end - pid_list_str = table.tostring(pid_list) - for i, pid in ipairs(pid_list) do - if lconf[pid] ~= nil then - nerv.info("param [%s] of layer [%s] found in `layer_conf`.", pid, self.id) - return lconf[pid] + if lconf.params == nil then + lconf.params = {} + end + plist_str = table.tostring(plist) + local pid + for i, pname in ipairs(plist) do + if lconf.params[pname] ~= nil then + nerv.info("param id for [%s] of layer [%s] specified in `layer_conf.params`.", pname, self.id) + pid = lconf.params[pname] end - local pid_g = self.id .. '_' .. pid --global identifier - local pr = lconf.pr - local p - if pr ~= nil and pr:has_param(pid_g) == true then - nerv.info("param [%s] of layer [%s] found in `layer_conf.pr`.", pid_list_str, self.id) - p = pr:get_param(pid_g) - return p + if lconf.pr:has_param(pid) then + return lconf.pr:get_param(pid) end end - nerv.info("param [%s] of layer [%s] is not found in `layer_conf` or `layer_conf.pr`, " .. - "switch to auto-generate", pid_list_str, self.id) - local pid_g = self.id .. '_' .. pid_list[1] - p = p_type(pid_g, gconf) - p.trans = gconf.cumat_type(unpack(p_dim)) + pid = self.id .. '_' .. plist[1] + if lconf.pr:has_param(pid) then + nerv.info("param id for [%s] of layer [%s] is generated automatically.", pname, self.id) + return lconf.pr:get_param(pid) + end + nerv.info("param id for [%s] of layer [%s] is not found in the specified param repo, " .. + "switch to auto-generate", plist_str, self.id) + local p = p_type(pid, gconf) + p.trans = self.mat_type(unpack(p_dim)) if type(gconf.param_random) ~= "function" then nerv.error("a param generate function is needed") end diff --git a/nerv/layer/lstm.lua b/nerv/layer/lstm.lua index caa7569..641d5dc 100644 --- a/nerv/layer/lstm.lua +++ b/nerv/layer/lstm.lua @@ -4,15 +4,11 @@ function LSTMLayer:__init(id, global_conf, layer_conf) -- input1:x -- input2:h -- input3:c - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - + nerv.Layer.__init(self, id, global_conf, layer_conf) -- prepare a DAGLayer to hold the lstm structure local pr = layer_conf.pr if pr == nil then - pr = nerv.ParamRepo() + pr = nerv.ParamRepo({}, self.loc_type) end local function ap(str) @@ -66,7 +62,7 @@ function LSTMLayer:__init(id, global_conf, layer_conf) }, } - local layerRepo = nerv.LayerRepo(layers, pr, global_conf) + self.lrepo = nerv.LayerRepo(layers, pr, global_conf) local connections = { ["<input>[1]"] = ap("inputXDup[1]"), @@ -109,12 +105,20 @@ function LSTMLayer:__init(id, global_conf, layer_conf) self.dag = nerv.DAGLayer(self.id, global_conf, {dim_in = self.dim_in, dim_out = self.dim_out, - sub_layers = layerRepo, + sub_layers = self.lrepo, connections = connections}) self:check_dim_len(3, 2) -- x, h, c and h, c end +function LSTMLayer:bind_params() + local pr = layer_conf.pr + if pr == nil then + pr = nerv.ParamRepo({}, self.loc_type) + end + self.lrepo:rebind(pr) +end + function LSTMLayer:init(batch_size, chunk_size) self.dag:init(batch_size, chunk_size) end diff --git a/nerv/layer/lstm_gate.lua b/nerv/layer/lstm_gate.lua index 1963eba..7a27bab 100644 --- a/nerv/layer/lstm_gate.lua +++ b/nerv/layer/lstm_gate.lua @@ -2,20 +2,19 @@ local LSTMGateLayer = nerv.class('nerv.LSTMGateLayer', 'nerv.Layer') -- NOTE: this is a full matrix gate function LSTMGateLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf + nerv.Layer.__init(self, id, global_conf, layer_conf) + self:check_dim_len(-1, 1) --accept multiple inputs + self:bind_params() +end +function LSTMGateLayer:bind_params() for i = 1, #self.dim_in do - self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf, + self["ltp" .. i] = self:find_param("ltp" .. i, self.lconf, self.gconf, nerv.LinearTransParam, {self.dim_in[i], self.dim_out[1]}) end - self.bp = self:find_param("bp", layer_conf, global_conf, + self.bp = self:find_param("bp", self.lconf, self.gconf, nerv.BiasParam, {1, self.dim_out[1]}) - - self:check_dim_len(-1, 1) --accept multiple inputs end function LSTMGateLayer:init(batch_size) @@ -69,7 +68,7 @@ function LSTMGateLayer:update(bp_err, input, output) end function LSTMGateLayer:get_params() - local pr = nerv.ParamRepo({self.bp}) + local pr = nerv.ParamRepo({self.bp}, self.loc_type) for i = 1, #self.dim_in do pr:add(self["ltp" .. i].id, self["ltp" .. i]) end diff --git a/nerv/layer/mse.lua b/nerv/layer/mse.lua index 1c218d0..458d086 100644 --- a/nerv/layer/mse.lua +++ b/nerv/layer/mse.lua @@ -1,18 +1,14 @@ local MSELayer = nerv.class("nerv.MSELayer", "nerv.Layer") function MSELayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = global_conf - if self.gconf.use_cpu then - self.mat_type = self.gconf.mmat_type - else - self.mat_type = self.gconf.cumat_type - end + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(2, -1) end +function MSELayer:bind_params() + -- do nothing +end + function MSELayer:init(batch_size) if self.dim_in[1] ~= self.dim_in[2] then nerv.error("mismatching dimensions of previous network output and labels") @@ -61,5 +57,5 @@ function MSELayer:back_propagate(bp_err, next_bp_err, input, output) end function MSELayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua index 38f2326..e59cf5b 100644 --- a/nerv/layer/rnn.lua +++ b/nerv/layer/rnn.lua @@ -1,10 +1,7 @@ local RNNLayer = nerv.class('nerv.RNNLayer', 'nerv.GraphLayer') function RNNLayer:__init(id, global_conf, layer_conf) - self.id = id - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out - self.gconf = layer_conf.gconf + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) local din = layer_conf.dim_in[1] @@ -12,7 +9,7 @@ function RNNLayer:__init(id, global_conf, layer_conf) local pr = layer_conf.pr if pr == nil then - pr = nerv.ParamRepo() + pr = nerv.ParamRepo({}, self.loc_type) end local layers = { diff --git a/nerv/layer/sigmoid.lua b/nerv/layer/sigmoid.lua index 0a8bcdc..a9f9749 100644 --- a/nerv/layer/sigmoid.lua +++ b/nerv/layer/sigmoid.lua @@ -1,13 +1,14 @@ local SigmoidLayer = nerv.class("nerv.SigmoidLayer", "nerv.Layer") function SigmoidLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) end +function SigmoidLayer:bind_params() + -- do nothing +end + function SigmoidLayer:init() if self.dim_in[1] ~= self.dim_out[1] then nerv.error("mismatching dimensions of input and output") @@ -31,5 +32,5 @@ function SigmoidLayer:back_propagate(bp_err, next_bp_err, input, output) end function SigmoidLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/softmax.lua b/nerv/layer/softmax.lua index 4205b66..f7a5163 100644 --- a/nerv/layer/softmax.lua +++ b/nerv/layer/softmax.lua @@ -1,13 +1,14 @@ local SoftmaxLayer = nerv.class("nerv.SoftmaxLayer", "nerv.Layer") function SoftmaxLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) -- two inputs: nn output and label end +function SoftmaxLayer:bind_params() + -- do nothing +end + function SoftmaxLayer:init(batch_size) if self.dim_in[1] ~= self.dim_out[1] then nerv.error("mismatching dimensions of input and output") @@ -31,5 +32,5 @@ function SoftmaxLayer:back_propagate(bp_err, next_bp_err, input, output) end function SoftmaxLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/softmax_ce.lua b/nerv/layer/softmax_ce.lua index d7d650e..7b4a80c 100644 --- a/nerv/layer/softmax_ce.lua +++ b/nerv/layer/softmax_ce.lua @@ -1,15 +1,7 @@ local SoftmaxCELayer = nerv.class("nerv.SoftmaxCELayer", "nerv.Layer") function SoftmaxCELayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - if self.gconf.use_cpu then - self.mat_type = self.gconf.mmat_type - else - self.mat_type = self.gconf.cumat_type - end - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self.compressed = layer_conf.compressed if self.compressed == nil then self.compressed = false @@ -17,6 +9,10 @@ function SoftmaxCELayer:__init(id, global_conf, layer_conf) self:check_dim_len(2, -1) -- two inputs: nn output and label end +function SoftmaxCELayer:bind_params() + -- do nothing +end + function SoftmaxCELayer:init(batch_size, chunk_size) if not self.compressed and (self.dim_in[1] ~= self.dim_in[2]) then nerv.error("mismatching dimensions of previous network output and labels") @@ -94,5 +90,5 @@ function SoftmaxCELayer:back_propagate(bp_err, next_bp_err, input, output, t) end function SoftmaxCELayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/tanh.lua b/nerv/layer/tanh.lua index e1c32f2..7a19fc8 100644 --- a/nerv/layer/tanh.lua +++ b/nerv/layer/tanh.lua @@ -1,13 +1,14 @@ local TanhLayer = nerv.class("nerv.TanhLayer", "nerv.Layer") function TanhLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) end +function TanhLayer:bind_params() + -- do nothing +end + function TanhLayer:init() if self.dim_in[1] ~= self.dim_out[1] then nerv.error("mismatching dimensions of input and output") @@ -31,5 +32,5 @@ function TanhLayer:back_propagate(bp_err, next_bp_err, input, output) end function TanhLayer:get_params() - return nerv.ParamRepo({}) + return nerv.ParamRepo({}, self.loc_type) end diff --git a/nerv/layer/window.lua b/nerv/layer/window.lua index 4933de0..364929f 100644 --- a/nerv/layer/window.lua +++ b/nerv/layer/window.lua @@ -1,12 +1,15 @@ local WindowLayer = nerv.class("nerv.WindowLayer", "nerv.Layer") function WindowLayer:__init(id, global_conf, layer_conf) - self.id = id - self.gconf = global_conf - self.window = layer_conf.window - self.dim_in = layer_conf.dim_in - self.dim_out = layer_conf.dim_out + nerv.Layer.__init(self, id, global_conf, layer_conf) self:check_dim_len(1, 1) + self:bind_params() +end + +function WindowLayer:bind_params() + self.window = self:find_param("window", self.lconf, self.gconf, + nerv.BiasParam, + {1, self.dim_out[1]}) end function WindowLayer:init() @@ -28,5 +31,5 @@ function WindowLayer:propagate(input, output) end function WindowLayer:get_params() - return nerv.ParamRepo({self.window}) + return nerv.ParamRepo({self.window}, self.loc_type) end diff --git a/nerv/lib/cblas.h b/nerv/lib/cblas.h new file mode 100644 index 0000000..4087ffb --- /dev/null +++ b/nerv/lib/cblas.h @@ -0,0 +1,596 @@ +#ifndef CBLAS_H + +#ifndef CBLAS_ENUM_DEFINED_H + #define CBLAS_ENUM_DEFINED_H + enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102 }; + enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, + AtlasConj=114}; + enum CBLAS_UPLO {CblasUpper=121, CblasLower=122}; + enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132}; + enum CBLAS_SIDE {CblasLeft=141, CblasRight=142}; +#endif + +#ifndef CBLAS_ENUM_ONLY +#define CBLAS_H +#define CBLAS_INDEX int + +int cblas_errprn(int ierr, int info, char *form, ...); + +/* + * =========================================================================== + * Prototypes for level 1 BLAS functions (complex are recast as routines) + * =========================================================================== + */ +float cblas_sdsdot(const int N, const float alpha, const float *X, + const int incX, const float *Y, const int incY); +double cblas_dsdot(const int N, const float *X, const int incX, const float *Y, + const int incY); +float cblas_sdot(const int N, const float *X, const int incX, + const float *Y, const int incY); +double cblas_ddot(const int N, const double *X, const int incX, + const double *Y, const int incY); +/* + * Functions having prefixes Z and C only + */ +void cblas_cdotu_sub(const int N, const void *X, const int incX, + const void *Y, const int incY, void *dotu); +void cblas_cdotc_sub(const int N, const void *X, const int incX, + const void *Y, const int incY, void *dotc); + +void cblas_zdotu_sub(const int N, const void *X, const int incX, + const void *Y, const int incY, void *dotu); +void cblas_zdotc_sub(const int N, const void *X, const int incX, + const void *Y, const int incY, void *dotc); + + +/* + * Functions having prefixes S D SC DZ + */ +float cblas_snrm2(const int N, const float *X, const int incX); +float cblas_sasum(const int N, const float *X, const int incX); + +double cblas_dnrm2(const int N, const double *X, const int incX); +double cblas_dasum(const int N, const double *X, const int incX); + +float cblas_scnrm2(const int N, const void *X, const int incX); +float cblas_scasum(const int N, const void *X, const int incX); + +double cblas_dznrm2(const int N, const void *X, const int incX); +double cblas_dzasum(const int N, const void *X, const int incX); + + +/* + * Functions having standard 4 prefixes (S D C Z) + */ +CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX); +CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX); +CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX); +CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX); + +/* + * =========================================================================== + * Prototypes for level 1 BLAS routines + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (s, d, c, z) + */ +void cblas_sswap(const int N, float *X, const int incX, + float *Y, const int incY); +void cblas_scopy(const int N, const float *X, const int incX, + float *Y, const int incY); +void cblas_saxpy(const int N, const float alpha, const float *X, + const int incX, float *Y, const int incY); +void catlas_saxpby(const int N, const float alpha, const float *X, + const int incX, const float beta, float *Y, const int incY); +void catlas_sset + (const int N, const float alpha, float *X, const int incX); + +void cblas_dswap(const int N, double *X, const int incX, + double *Y, const int incY); +void cblas_dcopy(const int N, const double *X, const int incX, + double *Y, const int incY); +void cblas_daxpy(const int N, const double alpha, const double *X, + const int incX, double *Y, const int incY); +void catlas_daxpby(const int N, const double alpha, const double *X, + const int incX, const double beta, double *Y, const int incY); +void catlas_dset + (const int N, const double alpha, double *X, const int incX); + +void cblas_cswap(const int N, void *X, const int incX, + void *Y, const int incY); +void cblas_ccopy(const int N, const void *X, const int incX, + void *Y, const int incY); +void cblas_caxpy(const int N, const void *alpha, const void *X, + const int incX, void *Y, const int incY); +void catlas_caxpby(const int N, const void *alpha, const void *X, + const int incX, const void *beta, void *Y, const int incY); +void catlas_cset + (const int N, const void *alpha, void *X, const int incX); + +void cblas_zswap(const int N, void *X, const int incX, + void *Y, const int incY); +void cblas_zcopy(const int N, const void *X, const int incX, + void *Y, const int incY); +void cblas_zaxpy(const int N, const void *alpha, const void *X, + const int incX, void *Y, const int incY); +void catlas_zaxpby(const int N, const void *alpha, const void *X, + const int incX, const void *beta, void *Y, const int incY); +void catlas_zset + (const int N, const void *alpha, void *X, const int incX); + + +/* + * Routines with S and D prefix only + */ +void cblas_srotg(float *a, float *b, float *c, float *s); +void cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P); +void cblas_srot(const int N, float *X, const int incX, + float *Y, const int incY, const float c, const float s); +void cblas_srotm(const int N, float *X, const int incX, + float *Y, const int incY, const float *P); + +void cblas_drotg(double *a, double *b, double *c, double *s); +void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P); +void cblas_drot(const int N, double *X, const int incX, + double *Y, const int incY, const double c, const double s); +void cblas_drotm(const int N, double *X, const int incX, + double *Y, const int incY, const double *P); + + +/* + * Routines with S D C Z CS and ZD prefixes + */ +void cblas_sscal(const int N, const float alpha, float *X, const int incX); +void cblas_dscal(const int N, const double alpha, double *X, const int incX); +void cblas_cscal(const int N, const void *alpha, void *X, const int incX); +void cblas_zscal(const int N, const void *alpha, void *X, const int incX); +void cblas_csscal(const int N, const float alpha, void *X, const int incX); +void cblas_zdscal(const int N, const double alpha, void *X, const int incX); + +/* + * Extra reference routines provided by ATLAS, but not mandated by the standard + */ +void cblas_crotg(void *a, void *b, void *c, void *s); +void cblas_zrotg(void *a, void *b, void *c, void *s); +void cblas_csrot(const int N, void *X, const int incX, void *Y, const int incY, + const float c, const float s); +void cblas_zdrot(const int N, void *X, const int incX, void *Y, const int incY, + const double c, const double s); + +/* + * =========================================================================== + * Prototypes for level 2 BLAS + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (S, D, C, Z) + */ +void cblas_sgemv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const float alpha, const float *A, const int lda, + const float *X, const int incX, const float beta, + float *Y, const int incY); +void cblas_sgbmv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const float alpha, + const float *A, const int lda, const float *X, + const int incX, const float beta, float *Y, const int incY); +void cblas_strmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *A, const int lda, + float *X, const int incX); +void cblas_stbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const float *A, const int lda, + float *X, const int incX); +void cblas_stpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *Ap, float *X, const int incX); +void cblas_strsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *A, const int lda, float *X, + const int incX); +void cblas_stbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const float *A, const int lda, + float *X, const int incX); +void cblas_stpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const float *Ap, float *X, const int incX); + +void cblas_dgemv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const double alpha, const double *A, const int lda, + const double *X, const int incX, const double beta, + double *Y, const int incY); +void cblas_dgbmv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const double alpha, + const double *A, const int lda, const double *X, + const int incX, const double beta, double *Y, const int incY); +void cblas_dtrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *A, const int lda, + double *X, const int incX); +void cblas_dtbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const double *A, const int lda, + double *X, const int incX); +void cblas_dtpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *Ap, double *X, const int incX); +void cblas_dtrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *A, const int lda, double *X, + const int incX); +void cblas_dtbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const double *A, const int lda, + double *X, const int incX); +void cblas_dtpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const double *Ap, double *X, const int incX); + +void cblas_cgemv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *X, const int incX, const void *beta, + void *Y, const int incY); +void cblas_cgbmv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const void *alpha, + const void *A, const int lda, const void *X, + const int incX, const void *beta, void *Y, const int incY); +void cblas_ctrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, + void *X, const int incX); +void cblas_ctbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +void cblas_ctpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); +void cblas_ctrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, void *X, + const int incX); +void cblas_ctbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +void cblas_ctpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); + +void cblas_zgemv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *X, const int incX, const void *beta, + void *Y, const int incY); +void cblas_zgbmv(const enum CBLAS_ORDER Order, + const enum CBLAS_TRANSPOSE TransA, const int M, const int N, + const int KL, const int KU, const void *alpha, + const void *A, const int lda, const void *X, + const int incX, const void *beta, void *Y, const int incY); +void cblas_ztrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, + void *X, const int incX); +void cblas_ztbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +void cblas_ztpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); +void cblas_ztrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *A, const int lda, void *X, + const int incX); +void cblas_ztbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const int K, const void *A, const int lda, + void *X, const int incX); +void cblas_ztpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag, + const int N, const void *Ap, void *X, const int incX); + + +/* + * Routines with S and D prefixes only + */ +void cblas_ssymv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *A, + const int lda, const float *X, const int incX, + const float beta, float *Y, const int incY); +void cblas_ssbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const float alpha, const float *A, + const int lda, const float *X, const int incX, + const float beta, float *Y, const int incY); +void cblas_sspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *Ap, + const float *X, const int incX, + const float beta, float *Y, const int incY); +void cblas_sger(const enum CBLAS_ORDER Order, const int M, const int N, + const float alpha, const float *X, const int incX, + const float *Y, const int incY, float *A, const int lda); +void cblas_ssyr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, float *A, const int lda); +void cblas_sspr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, float *Ap); +void cblas_ssyr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, const float *Y, const int incY, float *A, + const int lda); +void cblas_sspr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const float *X, + const int incX, const float *Y, const int incY, float *A); + +void cblas_dsymv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *A, + const int lda, const double *X, const int incX, + const double beta, double *Y, const int incY); +void cblas_dsbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const double alpha, const double *A, + const int lda, const double *X, const int incX, + const double beta, double *Y, const int incY); +void cblas_dspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *Ap, + const double *X, const int incX, + const double beta, double *Y, const int incY); +void cblas_dger(const enum CBLAS_ORDER Order, const int M, const int N, + const double alpha, const double *X, const int incX, + const double *Y, const int incY, double *A, const int lda); +void cblas_dsyr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, double *A, const int lda); +void cblas_dspr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, double *Ap); +void cblas_dsyr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, const double *Y, const int incY, double *A, + const int lda); +void cblas_dspr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const double *X, + const int incX, const double *Y, const int incY, double *A); + + +/* + * Routines with C and Z prefixes only + */ +void cblas_chemv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_chbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_chpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *Ap, + const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_cgeru(const enum CBLAS_ORDER Order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_cgerc(const enum CBLAS_ORDER Order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_cher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const void *X, const int incX, + void *A, const int lda); +void cblas_chpr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const float alpha, const void *X, + const int incX, void *A); +void cblas_cher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_chpr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *Ap); + +void cblas_zhemv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_zhbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const int K, const void *alpha, const void *A, + const int lda, const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_zhpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const void *alpha, const void *Ap, + const void *X, const int incX, + const void *beta, void *Y, const int incY); +void cblas_zgeru(const enum CBLAS_ORDER Order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_zher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const void *X, const int incX, + void *A, const int lda); +void cblas_zhpr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const int N, const double alpha, const void *X, + const int incX, void *A); +void cblas_zher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *A, const int lda); +void cblas_zhpr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N, + const void *alpha, const void *X, const int incX, + const void *Y, const int incY, void *Ap); + +/* + * =========================================================================== + * Prototypes for level 3 BLAS + * =========================================================================== + */ + +/* + * Routines with standard 4 prefixes (S, D, C, Z) + */ +void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const float alpha, const float *A, + const int lda, const float *B, const int ldb, + const float beta, float *C, const int ldc); +void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const float alpha, const float *A, const int lda, + const float *B, const int ldb, const float beta, + float *C, const int ldc); +void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const float alpha, const float *A, const int lda, + const float beta, float *C, const int ldc); +void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const float alpha, const float *A, const int lda, + const float *B, const int ldb, const float beta, + float *C, const int ldc); +void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const float alpha, const float *A, const int lda, + float *B, const int ldb); +void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const float alpha, const float *A, const int lda, + float *B, const int ldb); + +void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const double alpha, const double *A, + const int lda, const double *B, const int ldb, + const double beta, double *C, const int ldc); +void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const double alpha, const double *A, const int lda, + const double *B, const int ldb, const double beta, + double *C, const int ldc); +void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const double alpha, const double *A, const int lda, + const double beta, double *C, const int ldc); +void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const double alpha, const double *A, const int lda, + const double *B, const int ldb, const double beta, + double *C, const int ldc); +void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const double alpha, const double *A, const int lda, + double *B, const int ldb); +void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const double alpha, const double *A, const int lda, + double *B, const int ldb); + +void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const void *alpha, const void *A, + const int lda, const void *B, const int ldb, + const void *beta, void *C, const int ldc); +void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *beta, void *C, const int ldc); +void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); +void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); + +void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_TRANSPOSE TransB, const int M, const int N, + const int K, const void *alpha, const void *A, + const int lda, const void *B, const int ldb, + const void *beta, void *C, const int ldc); +void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *beta, void *C, const int ldc); +void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); +void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA, + const enum CBLAS_DIAG Diag, const int M, const int N, + const void *alpha, const void *A, const int lda, + void *B, const int ldb); + + +/* + * Routines with prefixes C and Z only + */ +void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const float alpha, const void *A, const int lda, + const float beta, void *C, const int ldc); +void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const float beta, + void *C, const int ldc); +void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side, + const enum CBLAS_UPLO Uplo, const int M, const int N, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const void *beta, + void *C, const int ldc); +void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const double alpha, const void *A, const int lda, + const double beta, void *C, const int ldc); +void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, + const enum CBLAS_TRANSPOSE Trans, const int N, const int K, + const void *alpha, const void *A, const int lda, + const void *B, const int ldb, const double beta, + void *C, const int ldc); + +int cblas_errprn(int ierr, int info, char *form, ...); + +#endif /* end #ifdef CBLAS_ENUM_ONLY */ +#endif diff --git a/nerv/lib/matrix/cumatrix.c b/nerv/lib/matrix/cumatrix.c index ff2ea22..aec4d60 100644 --- a/nerv/lib/matrix/cumatrix.c +++ b/nerv/lib/matrix/cumatrix.c @@ -37,7 +37,9 @@ void nerv_cuda_context_accu_profile(CuContext *context, *val += delta; } -static void new_cuda_handles(CuContext *context, Status *status) { +static void new_cuda_handles(CuContext *context, int dev, Status *status) { + if (context->has_handle) return; + CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status); CUBLAS_SAFE_SYNC_CALL(cublasCreate(&(context->cublas_handle)), status); CURAND_SAFE_SYNC_CALL(curandCreateGenerator(&(context->curand_gen), CURAND_RNG_PSEUDO_DEFAULT), status); @@ -47,9 +49,12 @@ static void new_cuda_handles(CuContext *context, Status *status) { CUDA_SAFE_SYNC_CALL(cudaEventCreate(&(context->profile_start)), status); CUDA_SAFE_SYNC_CALL(cudaEventCreate(&(context->profile_stop)), status); NERV_SET_STATUS(status, NERV_NORMAL, 0); + context->has_handle = 1; } static void free_cuda_handles(CuContext *context, Status *status) { + if (!context->has_handle) return; + context->has_handle = 0; CUBLAS_SAFE_SYNC_CALL(cublasDestroy(context->cublas_handle), status); CURAND_SAFE_SYNC_CALL(curandDestroyGenerator(context->curand_gen), status); CUDA_SAFE_SYNC_CALL(cudaEventDestroy(context->profile_start), status); @@ -57,9 +62,41 @@ static void free_cuda_handles(CuContext *context, Status *status) { NERV_SET_STATUS(status, NERV_NORMAL, 0); } -CuContext *nerv_cuda_context_create(Status *status) { +static int choose_best_gpu(Status *status) { + int i, n, dev = 0; + float best_ratio = 0; + fprintf(stderr, "*** select a GPU based on available space\n"); + CUDA_SAFE_CALL_RET(cudaGetDeviceCount(&n), status); + for (i = 0; i < n; i++) + { + size_t avail, total; + float ratio; + CUDA_SAFE_SYNC_CALL_RET(cudaSetDevice(i), status); + CUDA_SAFE_SYNC_CALL_RET(cuMemGetInfo(&avail, &total), status); + ratio = (float)avail/total * 100; + fprintf(stderr, "* card %d: %.2f%%\n", i, ratio); + if (ratio > best_ratio) + { + best_ratio = ratio; + dev = i; + } + CUDA_SAFE_SYNC_CALL_RET(cudaDeviceReset(), status); + } + fprintf(stderr, "*** final decision: GPU %d\n", dev); + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return dev; +} + +CuContext *nerv_cuda_context_create(int dev, Status *status) { CuContext *context = (CuContext *)malloc(sizeof(CuContext)); - new_cuda_handles(context, status); + context->has_handle = 0; /* this line must come first */ + if (dev == -1) + { + dev = choose_best_gpu(status); + if (status->err_code != NERV_NORMAL) + return NULL; + } + new_cuda_handles(context, dev, status); if (status->err_code != NERV_NORMAL) return NULL; context->profile = nerv_hashmap_create(PROFILE_HASHMAP_SIZE, bkdr_hash, strcmp); @@ -78,11 +115,14 @@ void nerv_cuda_context_destroy(CuContext *context, Status *status) { void nerv_cuda_context_select_gpu(CuContext *context, int dev, Status *status) { - free_cuda_handles(context, status); + /* free_cuda_handles(context, status); if (status->err_code != NERV_NORMAL) return; - CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status); - new_cuda_handles(context, status); + */ + /* because of cudaDeviceReset */ + context->has_handle = 0; + CUDA_SAFE_SYNC_CALL(cudaDeviceReset(), status); + new_cuda_handles(context, dev, status); if (status->err_code != NERV_NORMAL) return; NERV_SET_STATUS(status, NERV_NORMAL, 0); diff --git a/nerv/lib/matrix/cumatrix.h b/nerv/lib/matrix/cumatrix.h index 280035b..fd2a5ce 100644 --- a/nerv/lib/matrix/cumatrix.h +++ b/nerv/lib/matrix/cumatrix.h @@ -5,6 +5,7 @@ #include "cuda_helper.h" typedef struct CuContext { + int has_handle; cublasHandle_t cublas_handle; cudaEvent_t profile_start, profile_stop; curandGenerator_t curand_gen; @@ -15,6 +16,6 @@ void nerv_cuda_context_print_profile(CuContext *context); void nerv_cuda_context_clear_profile(CuContext *context); void nerv_cuda_context_accu_profile(CuContext *context, const char *name, float delta); void nerv_cuda_context_select_gpu(CuContext *context, int dev, Status *status); -CuContext *nerv_cuda_context_create(Status *status); +CuContext *nerv_cuda_context_create(int dev, Status *status); void nerv_cuda_context_destroy(CuContext *contex, Status *status); #endif diff --git a/nerv/lib/matrix/generic/mmatrix.c b/nerv/lib/matrix/generic/mmatrix.c index 485d778..fb99b53 100644 --- a/nerv/lib/matrix/generic/mmatrix.c +++ b/nerv/lib/matrix/generic/mmatrix.c @@ -8,10 +8,10 @@ context, status) #define NERV_GENERIC_MATRIX #include "../../common.h" +#include "../../cblas.h" #include "../../io/chunk_file.h" #include <string.h> #include <math.h> -#include <cblas.h> #include <float.h> Matrix *nerv_matrix_(colsum)(Matrix *a, MContext *context, Status *status) { diff --git a/nerv/matrix/cumatrix.c b/nerv/matrix/cumatrix.c index f6a4ed5..b8eef9c 100644 --- a/nerv/matrix/cumatrix.c +++ b/nerv/matrix/cumatrix.c @@ -9,7 +9,7 @@ const char *nerv_cuda_context_tname = "nerv.CuContext"; int nerv_cuda_context_lua_select_gpu(lua_State *L) { Status status; nerv_cuda_context_select_gpu(luaT_checkudata(L, 1, nerv_cuda_context_tname), - luaL_checkinteger(L, 1), &status); + luaL_checkinteger(L, 2), &status); NERV_LUA_CHECK_STATUS(L, status); return 0; } @@ -26,7 +26,8 @@ int nerv_cuda_context_lua_clear_profile(lua_State *L) { int nerv_cuda_context_lua_new(lua_State *L) { Status status; - CuContext *self = nerv_cuda_context_create(&status); + int dev = lua_gettop(L) > 0 ? luaL_checkinteger(L, 1) : -1; + CuContext *self = nerv_cuda_context_create(dev, &status); NERV_LUA_CHECK_STATUS(L, status); luaT_pushudata(L, self, nerv_cuda_context_tname); return 1; diff --git a/nerv/matrix/generic/mmatrix.c b/nerv/matrix/generic/mmatrix.c index 69000b7..1f37173 100644 --- a/nerv/matrix/generic/mmatrix.c +++ b/nerv/matrix/generic/mmatrix.c @@ -8,10 +8,10 @@ #define MATRIX_BASE_TNAME nerv_matrix_host_tname #define NERV_GENERIC_MATRIX #include "../../lib/common.h" +#include "../../lib/cblas.h" #include "../../lib/matrix/generic/mmatrix.h" #include "../../io/chunk_file.h" #include <string.h> -#include <cblas.h> #define BLAS_OP_N CblasNoTrans static int nerv_matrix_(lua_get_blas_op)(char ch) { diff --git a/nerv/matrix/init.lua b/nerv/matrix/init.lua index ef2fb6b..cf85004 100644 --- a/nerv/matrix/init.lua +++ b/nerv/matrix/init.lua @@ -87,6 +87,17 @@ function nerv.Matrix:__mul__(b) return c end +--- A wrapper function for `copy_from` +function nerv.Matrix:copy_to(b, ...) + b:copy_from(self, ...) +end + +--- The base class for all device (in-GPU) matrices +-- @type nerv.CuMatrix + +--- A wrapper function for `copy_fromd` +nerv.CuMatrix.copy_tod = nerv.Matrix.copy_to + --- CUDA float matrices -- @type nerv.CuMatrixFloat @@ -127,6 +138,14 @@ end -- @type nerv.MMatrix --- A wrapper function for `copy_fromh` -function nerv.MMatrix:copy_toh(b, ...) +nerv.MMatrix.copy_toh = nerv.Matrix.copy_to + +--- A wrapper function for `nerv.CuMatrix` copy +function nerv.MMatrix:copy_fromd(b, ...) + b:copy_toh(self, ...) +end + +--- A wrapper function for `nerv.CuMatrix` copy +function nerv.MMatrix:copy_tod(b, ...) b:copy_fromh(self, ...) end @@ -1,13 +1,21 @@ #! /usr/bin/env luajit require 'nerv' -nerv.printf("*** NERV: A Lua-based toolkit for high-performance deep learning (alpha) ***\n") -nerv.info("automatically initialize a default CuContext...") -nerv.CuMatrix._default_context = nerv.CuContext() -nerv.info("the default CuContext is ok") +local options = {{"help", "h", "boolean", default = false, desc = "print this help message"}, + {"use-cpu", "c", "boolean", default = false, desc = "use CPU by default (instead of gpu by default)"}, + {"select-gpu", nil, "int", default = -1, desc = "select the GPU for computation, fallback to auto mode if not specified"}} +econf = {} -- environment configuration -nerv.info("automatically initialize a default MContext...") -nerv.MMatrix._default_context = nerv.MContext() -nerv.info("the default MContext is ok") +local function print_help() + nerv.printf("Usage: <nerv_prog> [options] script.lua\n") + nerv.print_usage(options) +end + +nerv.printf("*** NERV: A Lua-based toolkit for high-performance deep learning (alpha) ***\n") +arg, opts = nerv.parse_args(arg, options) +if #arg < 1 or opts["help"].val then + print_help() + return +end -- only for backward compatibilty, will be removed in the future local function _add_profile_method(cls) @@ -15,13 +23,25 @@ local function _add_profile_method(cls) cls.print_profile = function () c:print_profile() end cls.clear_profile = function () c:clear_profile() end end -_add_profile_method(nerv.CuMatrix) -_add_profile_method(nerv.MMatrix) - -if #arg < 1 then - return +if not opts["use-cpu"].val then + local dev = opts["select-gpu"].val + nerv.info("automatically initialize a default CuContext...") + nerv.CuMatrix._default_context = nerv.CuContext(dev) + nerv.info("the default CuContext is ok") + _add_profile_method(nerv.CuMatrix) + nerv.CuMatrix.select_gpu = + function (dev) nerv.CuMatrix._default_context:select_gpu(dev) end + econf.use_cpu = false +else + econf.use_cpu = true end + +nerv.info("automatically initialize a default MContext...") +nerv.MMatrix._default_context = nerv.MContext() +nerv.info("the default MContext is ok") +_add_profile_method(nerv.MMatrix) + local script = arg[1] local script_arg = {} for i = 2, #arg do @@ -29,5 +49,3 @@ for i = 2, #arg do end arg = script_arg dofile(script) -nerv.CuMatrix.print_profile() -nerv.MMatrix.print_profile() diff --git a/nerv/nerv-scm-1.rockspec b/nerv/nerv-scm-1.rockspec index 0e1e47f..d039e85 100644 --- a/nerv/nerv-scm-1.rockspec +++ b/nerv/nerv-scm-1.rockspec @@ -11,7 +11,8 @@ description = { license = "BSD" } dependencies = { - "lua >= 5.1" + "lua >= 5.1", + "penlight >= 1.3.2" } build = { type = "make", diff --git a/nerv/nn/layer_dag.lua b/nerv/nn/layer_dag.lua index 6896878..f999752 100644 --- a/nerv/nn/layer_dag.lua +++ b/nerv/nn/layer_dag.lua @@ -134,20 +134,16 @@ function DAGLayer:__init(id, global_conf, layer_conf) end end + nerv.Layer.__init(self, id, global_conf, layer_conf) self.layers = layers self.inputs = inputs self.outputs = outputs - self.id = id - self.dim_in = dim_in - self.dim_out = dim_out self.parsed_conn = parsed_conn self.queue = queue - self.gconf = global_conf - if self.gconf.use_cpu then - self.mat_type = self.gconf.mmat_type - else - self.mat_type = self.gconf.cumat_type - end +end + +function DAGLayer:bind_params() + -- do nothing (instead of rebinding params for each layer) end function DAGLayer:init(batch_size, chunk_size) @@ -325,7 +321,7 @@ function DAGLayer:get_params() for id, ref in pairs(self.queue) do table.insert(param_repos, ref.layer:get_params()) end - return nerv.ParamRepo.merge(param_repos) + return nerv.ParamRepo.merge(param_repos, self.loc_type) end DAGLayer.PORT_TYPES = { diff --git a/nerv/nn/layer_repo.lua b/nerv/nn/layer_repo.lua index a169b2b..acef54a 100644 --- a/nerv/nn/layer_repo.lua +++ b/nerv/nn/layer_repo.lua @@ -12,27 +12,29 @@ function LayerRepo:add_layers(layer_spec, param_repo, global_conf) if layer_type == nil then nerv.error('layer type `%s` not found', ltype) end - for id, layer_config in pairs(llist) do - if layers[id] ~= nil then - nerv.error("a layer with id %s already exists", id) - end - nerv.info("create layer: %s", id) - if type(layer_config) ~= "table" then + for id, lconf in pairs(llist) do + if type(lconf) ~= "table" then nerv.error("layer config table is need") end - if type(layer_config.params) == "table" then - for pname, pid in pairs(layer_config.params) do - layer_config[pname] = param_repo:get_param(pid) - end + if lconf.pr == nil then + lconf.pr = param_repo end - if layer_config.pr == nil then - layer_config.pr = param_repo + if layers[id] ~= nil then + nerv.error("a layer with id %s already exists", id) end - layers[id] = layer_type(id, global_conf, layer_config) + nerv.info("create layer: %s", id) + layers[id] = layer_type(id, global_conf, lconf) end end end +function LayerRepo:rebind(param_repo) + for id, layer in pairs(self.layers) do + layer.lconf.pr = param_repo + layer:bind_params() + end +end + function LayerRepo:get_layer(lid) local layer = self.layers[lid] if layer == nil then diff --git a/nerv/nn/param_repo.lua b/nerv/nn/param_repo.lua index c124e08..aba7765 100644 --- a/nerv/nn/param_repo.lua +++ b/nerv/nn/param_repo.lua @@ -1,8 +1,37 @@ local ParamRepo = nerv.class("nerv.ParamRepo") -function ParamRepo:__init(plist) + +ParamRepo.LOC_TYPES = { + ON_DEVICE = {}, + ON_HOST = {} +} + +function ParamRepo:__init(plist, loc_type) self.params = {} + self.loc_type = loc_type or ParamRepo.LOC_TYPES.ON_HOST + local function make_checker(tname) + return function (mat) + if not nerv.is_type(mat, tname) then + nerv.error("unexpected param type in repo specification") + end + end + end + self.make_copier = function (mat_type, copy_method) + return function (mat) + local target = mat_type(mat:nrow(), mat:ncol()) + mat[copy_method](mat, target) + return target + end + end + + if self.loc_type == ParamRepo.LOC_TYPES.ON_HOST then + self.checker = make_checker("nerv.MMatrix") + else + self.checker = make_checker("nerv.CuMatrix") + end + if plist ~= nil then for i, p in ipairs(plist) do + p:check(self.checker) self.params[p.id] = p end end @@ -12,6 +41,7 @@ function ParamRepo:add(pid, p) if self.params[pid] ~= nil then nerv.error("duplicate params with the same id: %s", pid) end + p:check(self.checker) self.params[pid] = p end @@ -22,8 +52,8 @@ function ParamRepo:remove(pid, p) table.remove(self.params, pid) end -function ParamRepo.merge(repos) - local self = nerv.ParamRepo() +function ParamRepo.merge(repos, loc_type) + local self = nerv.ParamRepo(nil, loc_type) for i, repo in ipairs(repos) do if not nerv.is_type(repo, "nerv.ParamRepo") then nerv.error("nerv.ParamRepo objects expected, got %s", repo) @@ -78,3 +108,26 @@ function ParamRepo:get_param(pid) end return p end + +function ParamRepo:copy(loc_type, pids) + local copier + local target = nerv.ParamRepo(nil, loc_type) + if loc_type == nil then + loc_type = self.loc_type + end + if loc_type == ParamRepo.LOC_TYPES.ON_HOST then + copier = self.make_copier(gconf.mmat_type, 'copy_toh') + else + copier = self.make_copier(gconf.cumat_type, 'copy_tod') + end + if pids == nil then + for id, p in pairs(self.params) do + target.params[id] = p:copy(copier) + end + else + for i, pid in ipairs(pids) do + target.params[pid] = self:get_param(pid):copy(copier) + end + end + return target +end diff --git a/nerv/test/parse_args.lua b/nerv/test/parse_args.lua new file mode 100644 index 0000000..34ad55e --- /dev/null +++ b/nerv/test/parse_args.lua @@ -0,0 +1,15 @@ +local options = {{"abandon", "a", "boolean", default = false, desc = "abandon your belief"}, + {"bullshit", "b", "boolean", default = false, desc = "start to bullshit"}, + {"cheat", "c", "boolean", default = false, desc = "try to cheat"}, + {"delete", "d", "boolean", default = false, desc = "remove everything"}, + {"hehe", "h", "boolean", default = false, desc = "233333"}, + {"oh", "o", "boolean", default = true, desc = "oh yes!"}, + {"uid", nil, "int", desc = "user uid"}, + {"str", nil, "string", desc = "test string"}} + +args, opts = nerv.parse_args({"arg1", "arg2", "-abcd", "arg3", + "--hehe", "--oh=no", "--uid=43", + "highfive", "--str=hello"}, options) + +nerv.print_usage(options) +print(table.tostring(args), table.tostring(opts)) |