aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitmodules7
-rw-r--r--Makefile47
m---------Penlight0
-rw-r--r--README.md55
-rw-r--r--README.rst64
-rw-r--r--lua/config.lua67
-rw-r--r--lua/main.lua45
-rw-r--r--lua/network.lua113
-rw-r--r--lua/reader.lua113
-rw-r--r--lua/select_linear.lua62
-rw-r--r--lua/timer.lua33
-rw-r--r--lua/tnn.lua136
-rw-r--r--nerv/Makefile22
-rw-r--r--nerv/doc/nerv.md6
-rw-r--r--nerv/doc/nerv_class.md8
-rw-r--r--nerv/doc/nerv_io.md13
-rw-r--r--nerv/doc/nerv_layer.md13
-rw-r--r--nerv/doc/nerv_matrix.md20
-rw-r--r--nerv/doc/nerv_nn.md43
-rw-r--r--nerv/doc/nerv_param.md10
-rw-r--r--nerv/examples/asr_trainer.lua256
-rw-r--r--nerv/examples/swb_baseline.lua84
-rw-r--r--nerv/examples/swb_baseline2.lua203
-rw-r--r--nerv/examples/swb_baseline_basic.lua162
-rw-r--r--nerv/examples/timit_baseline2.lua212
-rw-r--r--nerv/init.lua229
-rw-r--r--nerv/io/sgd_buffer.lua7
-rw-r--r--nerv/layer/affine.lua43
-rw-r--r--nerv/layer/bias.lua15
-rw-r--r--nerv/layer/combiner.lua16
-rw-r--r--nerv/layer/dropout.lua16
-rw-r--r--nerv/layer/duplicate.lua7
-rw-r--r--nerv/layer/elem_mul.lua11
-rw-r--r--nerv/layer/graph.lua7
-rw-r--r--nerv/layer/gru.lua20
-rw-r--r--nerv/layer/identity.lua7
-rw-r--r--nerv/layer/init.lua60
-rw-r--r--nerv/layer/lstm.lua20
-rw-r--r--nerv/layer/lstm_gate.lua17
-rw-r--r--nerv/layer/mse.lua16
-rw-r--r--nerv/layer/rnn.lua7
-rw-r--r--nerv/layer/sigmoid.lua11
-rw-r--r--nerv/layer/softmax.lua11
-rw-r--r--nerv/layer/softmax_ce.lua16
-rw-r--r--nerv/layer/tanh.lua11
-rw-r--r--nerv/layer/window.lua15
-rw-r--r--nerv/lib/cblas.h596
-rw-r--r--nerv/lib/matrix/cumatrix.c52
-rw-r--r--nerv/lib/matrix/cumatrix.h3
-rw-r--r--nerv/lib/matrix/generic/mmatrix.c2
-rw-r--r--nerv/matrix/cumatrix.c5
-rw-r--r--nerv/matrix/generic/mmatrix.c2
-rw-r--r--nerv/matrix/init.lua21
-rw-r--r--nerv/nerv46
-rw-r--r--nerv/nerv-scm-1.rockspec3
-rw-r--r--nerv/nn/layer_dag.lua16
-rw-r--r--nerv/nn/layer_repo.lua28
-rw-r--r--nerv/nn/param_repo.lua59
-rw-r--r--nerv/test/parse_args.lua15
59 files changed, 2008 insertions, 1196 deletions
diff --git a/.gitmodules b/.gitmodules
index 9f556c5..2b346c4 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,6 +1,9 @@
[submodule "luajit-2.0"]
path = luajit-2.0
- url = http://luajit.org/git/luajit-2.0.git
+ url = https://speechlab.sjtu.edu.cn/gitlab/nerv-dev/luajit.git
[submodule "luarocks"]
path = luarocks
- url = https://github.com/keplerproject/luarocks.git
+ url = https://speechlab.sjtu.edu.cn/gitlab/nerv-dev/luarocks.git
+[submodule "Penlight"]
+ path = Penlight
+ url = https://speechlab.sjtu.edu.cn/gitlab/nerv-dev/Penlight.git
diff --git a/Makefile b/Makefile
index 0982295..28012da 100644
--- a/Makefile
+++ b/Makefile
@@ -1,19 +1,42 @@
.PHONY: all clean install luajit luarocks speech
+############## EDIT THESE LINES #####################
SHELL := /bin/bash
PREFIX := $(CURDIR)/install/
-all: luajit luarocks install
+#CUDA_BASE := /usr/local/cuda-7.0
+CUDA_BASE := /usr/local/cuda
+BLAS_BASE := /usr/lib/
+BLAS_LDFLAGS := -L$(BLAS_BASE) -Wl,-rpath=$(BLAS_BASE)
+BLAS_TYPE := atlas
+KALDI_BASE := /speechlab/tools/KALDI/kaldi-master/
+#######################################################
+MKL_LDFLAGS := -lmkl_rt
+ATLAS_LDFLAGS := -lcblas -llapack_atlas
+ifeq ($(BLAS_TYPE), mkl)
+BLAS_LDFLAGS += $(MKL_LDFLAGS)
+else ifeq ($(BLAS_TYPE), atlas)
+BLAS_LDFLAGS += $(ATLAS_LDFLAGS)
+else
+$(error Invalid blas type)
+endif
+export CUDA_BASE
+export KALDI_BASE
+export BLAS_LDFLAGS
+
+.PHONY: nerv speech/speech_utils speech/htk_io speech/kaldi_io speech/kaldi_decode \
+ nerv-clean speech/speech_utils-clean speech/htk_io-clean speech/kaldi_io-clean speech/kaldi_decode-clean \
+ Penlight
+
+all: luajit luarocks Penlight nerv
luajit:
PREFIX=$(PREFIX) ./tools/build_luajit.sh
luarocks:
PREFIX=$(PREFIX) ./tools/build_luarocks.sh
-install:
- cd nerv; $(PREFIX)/bin/luarocks make CFLAGS=$(CFLAGS)
-speech:
- cd speech/speech_utils; $(PREFIX)/bin/luarocks make
- cd speech/htk_io; $(PREFIX)/bin/luarocks make
- cd speech/kaldi_io; $(PREFIX)/bin/luarocks make
-clean:
- cd nerv && make clean
- cd speech/speech_utils && make clean
- cd speech/htk_io && make clean
- cd speech/kaldi_io && make clean
+
+speech: speech/speech_utils speech/htk_io speech/kaldi_io speech/kaldi_decode
+speech-clean: speech/speech_utils-clean speech/htk_io-clean speech/kaldi_io-clean speech/kaldi_decode-clean
+clean: nerv-clean speech-clean
+
+nerv Penlight speech/speech_utils speech/htk_io speech/kaldi_io speech/kaldi_decode:
+ cd $@; $(PREFIX)/bin/luarocks make
+nerv-clean speech/speech_utils-clean speech/htk_io-clean speech/kaldi_io-clean speech/kaldi_decode-clean:
+ cd $(subst -clean,,$@); make clean LUA_BINDIR=$(PREFIX)/bin/
diff --git a/Penlight b/Penlight
new file mode 160000
+Subproject 16d149338af9efc910528641c5240c5641aeb8d
diff --git a/README.md b/README.md
deleted file mode 100644
index fe9dfc1..0000000
--- a/README.md
+++ /dev/null
@@ -1,55 +0,0 @@
-#The Nerv Toolkit User Manual#
-NOTE: This readme is obsolete and will be rearranged, for further information, please check http://nerv-sjtu.github.io/nerv/
-
-This user manual will information about how to use __Nerv__ and __Nerv__'s interface.
-
-##How to make and start using##
-First make sure you have __lua__ and __CUDA__ installed on your computer.
-__Nerv__ is currently developed via github.You can download and make __Nerv__ by doing the following:
-```
-cd ~
-git clone https://github.com/Nerv-SJTU/nerv.git
-cd nerv
-git submodule init && git submodule update
-make
-#To include some new CUDA feature(e.x. atomicCAS), use "make CFLAGS=-D__NERV_FUTURE_CUDA_7"
-
-#further, if you want the speech modules
-git clone https://github.com/Nerv-SJTU/nerv-speech.git speech
-make speech
-```
-The `git submodule` command is for the __luajit__ repository inside __Nerv__.
-Now, you can try to run some example scripts.
-```
-./install/bin/nerv examples/cumatrix_example.lua
-```
-To get an example of DNN(for ASR) training, run(this requires the speech modules)
-You need to be at or (copy files from) `/slfs1`(SJTU speechlab cluster) to get this running.
-```
-./install/bin/nerv nerv/examples/asr_trainer.lua nerv/examples/swb_baseline.lua
-```
-
-##How to contribute##
-Fork the original repository, then use the __pull&merge__ function in github to contribute.
-The pull&merge request can be found on your dashboard in github. See this [sync-help] to sync with the original repository.
-
-##Nerv Packages##
-* __luaT__
-Nerv uses [luaT]\(a [Torch] library\) to define lua class in C.
-* __[The Nerv OOP](nerv/doc/nerv_class.md)__
-Enables object-oriented programming in Nerv.
-* __[The Nerv utility functions](nerv/doc/nerv.md)__
-Inlcudes some utility functions from luaT to implement __Nerv.Class__.
-* __[The Nerv Matrix Package](nerv/doc/nerv_matrix.md)__
-The matrix package is a basic package in __Nerv__ that is used to store and manipulate matrices.
-* __[The Nerv IO Package](nerv/doc/nerv_io.md)__
-The IO package is used to read and write parameters to file.
-* __[The Nerv Parameter Package](nerv/doc/nerv_param.md)__
-The parameter package is used to store, read model parameters from file.
-* __[The Nerv Layer Package](nerv/doc/nerv_layer.md)__
-The layer package is used to define propagation and backpropagation of different type of layers.
-* __[The Nerv NN Package](nerv/doc/nerv_nn.md)__
-The nn package is for organizing a neural network, it contains __nerv.LayerRepo__, __nerv.ParamRepo__, and __nerv.DAGLayer__.
-[luaT]:https://github.com/torch/torch7/tree/master/lib/luaT
-[Torch]:https://github.com/torch
-[sync-help]:https://help.github.com/articles/syncing-a-fork/
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..c00743c
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,64 @@
+NERV Toolkit
+============
+
+NOTE: This readme is in-progress.
+
+Installation
+------------
+First, make sure you have at least one implementation of BLAS and CUDA installed
+on your computer.
+
+- Checkout NERV:
+
+ ::
+
+ bash
+ git clone https://speechlab.sjtu.edu.cn/gitlab/nerv-dev/nerv.git
+
+- Checkout submodules (luajit, luarocks, Penlight, etc.):
+
+ ::
+
+ cd nerv
+ git submodule init && git submodule update
+
+- Build NERV: you can specify either ``mkl`` or ``atlas`` to ``BLAS_TYPE``.
+ ``BLAS_BASE`` is the directory containing BLAS ``.so`` files. By default,
+ ``atlas`` is used for ``BLAS_TYPE``, ``/usr/lib/`` is used for ``BLAS_BASE``,
+ and ``/usr/local/cuda`` is used for ``CUDA_BASE``.
+
+ ::
+
+ # an example for compiling on SJTU Speechlab major cluster
+ make BLAS_TYPE=mkl BLAS_BASE=/home/intel/mkl/lib/intel64/ CUDA_BASE=/usr/local/cuda
+
+- To include some new features (e.g. ``atomicCAS`` in CUDA), add corresponding flags to
+ ``NERV_FEAT`` (e.g. ``NERV_FEAT=-D__NERV_FUTURE_CUDA_7``) while making:
+
+ ::
+
+ make NERV_FEAT=-D__NERV_FUTURE_CUDA_7 BLAS_TYPE=mkl BLAS_BASE=/home/intel/mkl/lib/intel64/ CUDA_BASE=/usr/local/cuda
+
+- For speech tasks, you need to install related lua rocks (Lua packages):
+
+ ::
+
+ # checkout speech repository to local directory nerv/speech (suppose you're
+ # still at the root directory of NERV repo)
+ git clone https://speechlab.sjtu.edu.cn/gitlab/nerv-dev/nerv-speech.git speech
+ # build and install HTK I/O support, Kaldi I/O support, Kaldi decoding support, etc.
+ make speech BLAS_TYPE=mkl BLAS_BASE=/home/intel/mkl/lib/intel64/
+
+Example & Tutorial
+------------------
+For speech tasks, please refer to ``tutorial/`` in ``nerv-speech`` repository.
+
+Contribution
+------------
+The basic rule is simple: just fork the original repository, then create a pull
+request (merge request) to the administrator of the project. If you want to fix
+any bugs in existing code, don't hesitate to create a pull (merge) request to
+the repository with clear and detailed analysis of the problem. If you want to
+add additional task-specific functionalities (modules) for speech to NERV,
+please create a luarocks-compliant package and also a pull (merge) request to
+the ``nerv-speech`` repository instead of ``nerv``.
diff --git a/lua/config.lua b/lua/config.lua
deleted file mode 100644
index 1ec1198..0000000
--- a/lua/config.lua
+++ /dev/null
@@ -1,67 +0,0 @@
-function get_global_conf()
- local global_conf = {
- lrate = 0.15,
- wcost = 1e-5,
- momentum = 0,
- clip = 5,
- cumat_type = nerv.CuMatrixFloat,
- mmat_type = nerv.MMatrixFloat,
- vocab_size = 10000,
- nn_act_default = 0,
- hidden_size = 300,
- layer_num = 1,
- chunk_size = 15,
- batch_size = 20,
- max_iter = 3,
- param_random = function() return (math.random() / 5 - 0.1) end,
- dropout = 0.5,
- timer = nerv.Timer(),
- pr = nerv.ParamRepo(),
- }
- return global_conf
-end
-
-function get_layers(global_conf)
- local pr = global_conf.pr
- local layers = {
- ['nerv.LSTMLayer'] = {},
- ['nerv.DropoutLayer'] = {},
- ['nerv.SelectLinearLayer'] = {
- ['select'] = {dim_in = {1}, dim_out = {global_conf.hidden_size}, vocab = global_conf.vocab_size, pr = pr},
- },
- ['nerv.CombinerLayer'] = {},
- ['nerv.AffineLayer'] = {
- output = {dim_in = {global_conf.hidden_size}, dim_out = {global_conf.vocab_size}, pr = pr}
- },
- ['nerv.SoftmaxCELayer'] = {
- softmax = {dim_in = {global_conf.vocab_size, global_conf.vocab_size}, dim_out = {1}, compressed = true},
- },
- }
- for i = 1, global_conf.layer_num do
- layers['nerv.LSTMLayer']['lstm' .. i] = {dim_in = {global_conf.hidden_size, global_conf.hidden_size, global_conf.hidden_size}, dim_out = {global_conf.hidden_size, global_conf.hidden_size}, pr = pr}
- layers['nerv.DropoutLayer']['dropout' .. i] = {dim_in = {global_conf.hidden_size}, dim_out = {global_conf.hidden_size}}
- layers['nerv.CombinerLayer']['dup' .. i] = {dim_in = {global_conf.hidden_size}, dim_out = {global_conf.hidden_size, global_conf.hidden_size}, lambda = {1}}
- end
- return layers
-end
-
-function get_connections(global_conf)
- local connections = {
- {'<input>[1]', 'select[1]', 0},
- {'select[1]', 'lstm1[1]', 0},
- {'dropout' .. global_conf.layer_num .. '[1]', 'output[1]', 0},
- {'output[1]', 'softmax[1]', 0},
- {'<input>[2]', 'softmax[2]', 0},
- {'softmax[1]', '<output>[1]', 0},
- }
- for i = 1, global_conf.layer_num do
- table.insert(connections, {'lstm' .. i .. '[1]', 'dup' .. i .. '[1]', 0})
- table.insert(connections, {'lstm' .. i .. '[2]', 'lstm' .. i .. '[3]', 1})
- table.insert(connections, {'dup' .. i .. '[1]', 'lstm' .. i .. '[2]', 1})
- table.insert(connections, {'dup' .. i .. '[2]', 'dropout' .. i .. '[1]', 0})
- if i > 1 then
- table.insert(connections, {'dropout' .. (i - 1) .. '[1]', 'lstm' .. i .. '[1]', 0})
- end
- end
- return connections
-end
diff --git a/lua/main.lua b/lua/main.lua
deleted file mode 100644
index ce0270a..0000000
--- a/lua/main.lua
+++ /dev/null
@@ -1,45 +0,0 @@
-nerv.include('reader.lua')
-nerv.include('timer.lua')
-nerv.include('config.lua')
-nerv.include(arg[1])
-
-local global_conf = get_global_conf()
-local timer = global_conf.timer
-
-timer:tic('IO')
-
-local data_path = 'nerv/nerv/examples/lmptb/PTBdata/'
-local train_reader = nerv.Reader(data_path .. 'vocab', data_path .. 'ptb.valid.txt.adds')
-local val_reader = nerv.Reader(data_path .. 'vocab', data_path .. 'ptb.valid.txt.adds')
-
-local train_data = train_reader:get_all_batch(global_conf)
-local val_data = val_reader:get_all_batch(global_conf)
-
-local layers = get_layers(global_conf)
-local connections = get_connections(global_conf)
-
-local NN = nerv.NN(global_conf, train_data, val_data, layers, connections)
-
-timer:toc('IO')
-timer:check('IO')
-io.flush()
-
-timer:tic('global')
-local best_cv = 1e10
-for i = 1, global_conf.max_iter do
- timer:tic('Epoch' .. i)
- local train_ppl, val_ppl = NN:epoch()
- if val_ppl < best_cv then
- best_cv = val_ppl
- else
- global_conf.lrate = global_conf.lrate / 2.0
- end
- nerv.printf('Epoch %d: %f %f %f\n', i, global_conf.lrate, train_ppl, val_ppl)
- timer:toc('Epoch' .. i)
- timer:check('Epoch' .. i)
- io.flush()
-end
-timer:toc('global')
-timer:check('global')
-timer:check('network')
-timer:check('gc')
diff --git a/lua/network.lua b/lua/network.lua
deleted file mode 100644
index 0c11321..0000000
--- a/lua/network.lua
+++ /dev/null
@@ -1,113 +0,0 @@
-nerv.include('select_linear.lua')
-
-local nn = nerv.class('nerv.NN')
-
-function nn:__init(global_conf, train_data, val_data, layers, connections)
- self.gconf = global_conf
- self.network = self:get_network(layers, connections)
- self.train_data = self:get_data(train_data)
- self.val_data = self:get_data(val_data)
-end
-
-function nn:get_network(layers, connections)
- self.gconf.dropout_rate = 0
- local layer_repo = nerv.LayerRepo(layers, self.gconf.pr, self.gconf)
- local graph = nerv.GraphLayer('graph', self.gconf,
- {dim_in = {1, self.gconf.vocab_size}, dim_out = {1},
- layer_repo = layer_repo, connections = connections})
- local network = nerv.Network('network', self.gconf,
- {network = graph, clip = self.gconf.clip})
- network:init(self.gconf.batch_size, self.gconf.chunk_size)
- return network
-end
-
-function nn:get_data(data)
- local err_output = {}
- local softmax_output = {}
- local output = {}
- for i = 1, self.gconf.chunk_size do
- err_output[i] = self.gconf.cumat_type(self.gconf.batch_size, 1)
- softmax_output[i] = self.gconf.cumat_type(self.gconf.batch_size, self.gconf.vocab_size)
- output[i] = self.gconf.cumat_type(self.gconf.batch_size, 1)
- end
- local ret = {}
- for i = 1, #data do
- ret[i] = {}
- ret[i].input = {}
- ret[i].output = {}
- ret[i].err_input = {}
- ret[i].err_output = {}
- for t = 1, self.gconf.chunk_size do
- ret[i].input[t] = {}
- ret[i].output[t] = {}
- ret[i].err_input[t] = {}
- ret[i].err_output[t] = {}
- ret[i].input[t][1] = data[i].input[t]
- ret[i].input[t][2] = data[i].output[t]
- ret[i].output[t][1] = output[t]
- local err_input = self.gconf.mmat_type(self.gconf.batch_size, 1)
- for j = 1, self.gconf.batch_size do
- if t <= data[i].seq_len[j] then
- err_input[j - 1][0] = 1
- else
- err_input[j - 1][0] = 0
- end
- end
- ret[i].err_input[t][1] = self.gconf.cumat_type.new_from_host(err_input)
- ret[i].err_output[t][1] = err_output[t]
- ret[i].err_output[t][2] = softmax_output[t]
- end
- ret[i].seq_length = data[i].seq_len
- ret[i].new_seq = {}
- for j = 1, self.gconf.batch_size do
- if data[i].seq_start[j] then
- table.insert(ret[i].new_seq, j)
- end
- end
- end
- return ret
-end
-
-function nn:process(data, do_train)
- local timer = self.gconf.timer
- local total_err = 0
- local total_frame = 0
- for id = 1, #data do
- if do_train then
- self.gconf.dropout_rate = self.gconf.dropout
- data[id].do_train = true
- else
- self.gconf.dropout_rate = 0
- data[id].do_train = false
- end
- timer:tic('network')
- self.network:mini_batch_init(data[id])
- self.network:propagate()
- timer:toc('network')
- for t = 1, self.gconf.chunk_size do
- local tmp = data[id].output[t][1]:new_to_host()
- for i = 1, self.gconf.batch_size do
- if t <= data[id].seq_length[i] then
- total_err = total_err + math.log10(math.exp(tmp[i - 1][0]))
- total_frame = total_frame + 1
- end
- end
- end
- if do_train then
- timer:tic('network')
- self.network:back_propagate()
- self.network:update()
- timer:toc('network')
- end
- timer:tic('gc')
- collectgarbage('collect')
- timer:toc('gc')
- end
- return math.pow(10, - total_err / total_frame)
-end
-
-function nn:epoch()
- local train_error = self:process(self.train_data, true)
- local val_error = self:process(self.val_data, false)
- return train_error, val_error
-end
diff --git a/lua/reader.lua b/lua/reader.lua
deleted file mode 100644
index 0c7bcb6..0000000
--- a/lua/reader.lua
+++ /dev/null
@@ -1,113 +0,0 @@
-local Reader = nerv.class('nerv.Reader')
-
-function Reader:__init(vocab_file, input_file)
- self:get_vocab(vocab_file)
- self:get_seq(input_file)
-end
-
-function Reader:get_vocab(vocab_file)
- local f = io.open(vocab_file, 'r')
- local id = 0
- self.vocab = {}
- while true do
- local word = f:read()
- if word == nil then
- break
- end
- self.vocab[word] = id
- id = id + 1
- end
- self.size = id
-end
-
-function Reader:split(s, t)
- local ret = {}
- for x in (s .. t):gmatch('(.-)' .. t) do
- table.insert(ret, x)
- end
- return ret
-end
-
-function Reader:get_seq(input_file)
- local f = io.open(input_file, 'r')
- self.seq = {}
- while true do
- local seq = f:read()
- if seq == nil then
- break
- end
- seq = self:split(seq, ' ')
- local tmp = {}
- for i = 1, #seq do
- if seq[i] ~= '' then
- table.insert(tmp, self.vocab[seq[i]])
- end
- end
- table.insert(self.seq, tmp)
- end
-end
-
-function Reader:get_in_out(id, pos)
- return self.seq[id][pos], self.seq[id][pos + 1], pos + 1 == #self.seq[id]
-end
-
-function Reader:get_all_batch(global_conf)
- local data = {}
- local pos = {}
- local offset = 1
- for i = 1, global_conf.batch_size do
- pos[i] = nil
- end
- --while true do
- for i = 1, 100 do
- local input = {}
- local output = {}
- for i = 1, global_conf.chunk_size do
- input[i] = global_conf.mmat_type(global_conf.batch_size, 1)
- input[i]:fill(global_conf.nn_act_default)
- output[i] = global_conf.mmat_type(global_conf.batch_size, 1)
- output[i]:fill(global_conf.nn_act_default)
- end
- local seq_start = {}
- local seq_end = {}
- local seq_len = {}
- for i = 1, global_conf.batch_size do
- seq_start[i] = false
- seq_end[i] = false
- seq_len[i] = 0
- end
- local has_new = false
- for i = 1, global_conf.batch_size do
- if pos[i] == nil then
- if offset < #self.seq then
- seq_start[i] = true
- pos[i] = {offset, 1}
- offset = offset + 1
- end
- end
- if pos[i] ~= nil then
- has_new = true
- for j = 1, global_conf.chunk_size do
- local final
- input[j][i-1][0], output[j][i-1][0], final = self:get_in_out(pos[i][1], pos[i][2])
- seq_len[i] = j
- if final then
- seq_end[i] = true
- pos[i] = nil
- break
- end
- pos[i][2] = pos[i][2] + 1
- end
- end
- end
- if not has_new then
- break
- end
- for i = 1, global_conf.chunk_size do
- input[i] = global_conf.cumat_type.new_from_host(input[i])
- output[i] = global_conf.cumat_type.new_from_host(output[i])
- end
- table.insert(data, {input = input, output = output, seq_start = seq_start, seq_end = seq_end, seq_len = seq_len})
- end
- return data
-end
diff --git a/lua/select_linear.lua b/lua/select_linear.lua
deleted file mode 100644
index a7e20cc..0000000
--- a/lua/select_linear.lua
+++ /dev/null
@@ -1,62 +0,0 @@
-local SL = nerv.class('nerv.SelectLinearLayer', 'nerv.Layer')
-
---id: string
---global_conf: table
---layer_conf: table
---Get Parameters
-function SL:__init(id, global_conf, layer_conf)
- self.id = id
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- self.gconf = global_conf
-
- self.vocab = layer_conf.vocab
- self.ltp = self:find_param("ltp", layer_conf, global_conf, nerv.LinearTransParam, {self.vocab, self.dim_out[1]}) --layer_conf.ltp
-
- self:check_dim_len(1, 1)
-end
-
---Check parameter
-function SL:init(batch_size)
- if (self.dim_in[1] ~= 1) then --one word id
- nerv.error("mismatching dimensions of ltp and input")
- end
- if (self.dim_out[1] ~= self.ltp.trans:ncol()) then
- nerv.error("mismatching dimensions of bp and output")
- end
-
- self.batch_size = bath_size
- self.ltp:train_init()
-end
-
-function SL:update(bp_err, input, output)
- --use this to produce reproducable result, don't forget to set the dropout to zero!
- --for i = 1, input[1]:nrow(), 1 do
- -- local word_vec = self.ltp.trans[input[1][i - 1][0]]
- -- word_vec:add(word_vec, bp_err[1][i - 1], 1, - self.gconf.lrate / self.gconf.batch_size)
- --end
-
- --I tried the update_select_rows kernel which uses atomicAdd, but it generates unreproducable result
- self.ltp.trans:update_select_rows_by_colidx(bp_err[1], input[1], - self.gconf.lrate / self.gconf.batch_size, 0)
- self.ltp.trans:add(self.ltp.trans, self.ltp.trans, 1.0, - self.gconf.lrate * self.gconf.wcost)
-end
-
-function SL:propagate(input, output)
- --for i = 0, input[1]:ncol() - 1, 1 do
- -- if (input[1][0][i] > 0) then
- -- output[1][i]:copy_fromd(self.ltp.trans[input[1][0][i]])
- -- else
- -- output[1][i]:fill(0)
- -- end
- --end
- output[1]:copy_rows_fromd_by_colidx(self.ltp.trans, input[1])
-end
-
-function SL:back_propagate(bp_err, next_bp_err, input, output)
- --input is compressed, do nothing
-end
-
-function SL:get_params()
- local paramRepo = nerv.ParamRepo({self.ltp})
- return paramRepo
-end
diff --git a/lua/timer.lua b/lua/timer.lua
deleted file mode 100644
index 2c54ca8..0000000
--- a/lua/timer.lua
+++ /dev/null
@@ -1,33 +0,0 @@
-local Timer = nerv.class("nerv.Timer")
-
-function Timer:__init()
- self.last = {}
- self.rec = {}
-end
-
-function Timer:tic(item)
- self.last[item] = os.clock()
-end
-
-function Timer:toc(item)
- if (self.last[item] == nil) then
- nerv.error("item not there")
- end
- if (self.rec[item] == nil) then
- self.rec[item] = 0
- end
- self.rec[item] = self.rec[item] + os.clock() - self.last[item]
-end
-
-function Timer:check(item)
- if self.rec[item]==nil then
- nerv.error('item not there')
- end
- nerv.printf('"%s" lasts for %f secs.\n',item,self.rec[item])
-end
-
-function Timer:flush()
- for key, value in pairs(self.rec) do
- self.rec[key] = nil
- end
-end
diff --git a/lua/tnn.lua b/lua/tnn.lua
deleted file mode 100644
index bf9f118..0000000
--- a/lua/tnn.lua
+++ /dev/null
@@ -1,136 +0,0 @@
-nerv.include('select_linear.lua')
-
-local reader = nerv.class('nerv.TNNReader')
-
-function reader:__init(global_conf, data)
- self.gconf = global_conf
- self.offset = 0
- self.data = data
-end
-
-function reader:get_batch(feeds)
- self.offset = self.offset + 1
- if self.offset > #self.data then
- return false
- end
- for i = 1, self.gconf.chunk_size do
- feeds.inputs_m[i][1]:copy_from(self.data[self.offset].input[i])
- feeds.inputs_m[i][2]:copy_from(self.data[self.offset].output[i]:decompress(self.gconf.vocab_size))
- end
- feeds.flags_now = self.data[self.offset].flags
- feeds.flagsPack_now = self.data[self.offset].flagsPack
- return true
-end
-
-function reader:has_data(t, i)
- return t <= self.data[self.offset].seq_len[i]
-end
-
-function reader:get_err_input()
- return self.data[self.offset].err_input
-end
-
-local nn = nerv.class('nerv.NN')
-
-function nn:__init(global_conf, train_data, val_data, layers, connections)
- self.gconf = global_conf
- self.tnn = self:get_tnn(layers, connections)
- self.train_data = self:get_data(train_data)
- self.val_data = self:get_data(val_data)
-end
-
-function nn:get_tnn(layers, connections)
- self.gconf.dropout_rate = 0
- local layer_repo = nerv.LayerRepo(layers, self.gconf.pr, self.gconf)
- local tnn = nerv.TNN('TNN', self.gconf, {dim_in = {1, self.gconf.vocab_size},
- dim_out = {1}, sub_layers = layer_repo, connections = connections,
- clip = self.gconf.clip})
- tnn:init(self.gconf.batch_size, self.gconf.chunk_size)
- return tnn
-end
-
-function nn:get_data(data)
- local ret = {}
- for i = 1, #data do
- ret[i] = {}
- ret[i].input = data[i].input
- ret[i].output = data[i].output
- ret[i].flags = {}
- ret[i].err_input = {}
- for t = 1, self.gconf.chunk_size do
- ret[i].flags[t] = {}
- local err_input = self.gconf.mmat_type(self.gconf.batch_size, 1)
- for j = 1, self.gconf.batch_size do
- if t <= data[i].seq_len[j] then
- ret[i].flags[t][j] = nerv.TNN.FC.SEQ_NORM
- err_input[j - 1][0] = 1
- else
- ret[i].flags[t][j] = 0
- err_input[j - 1][0] = 0
- end
- end
- ret[i].err_input[t] = self.gconf.cumat_type.new_from_host(err_input)
- end
- for j = 1, self.gconf.batch_size do
- if data[i].seq_start[j] then
- ret[i].flags[1][j] = bit.bor(ret[i].flags[1][j], nerv.TNN.FC.SEQ_START)
- end
- if data[i].seq_end[j] then
- local t = data[i].seq_len[j]
- ret[i].flags[t][j] = bit.bor(ret[i].flags[t][j], nerv.TNN.FC.SEQ_END)
- end
- end
- ret[i].flagsPack = {}
- for t = 1, self.gconf.chunk_size do
- ret[i].flagsPack[t] = 0
- for j = 1, self.gconf.batch_size do
- ret[i].flagsPack[t] = bit.bor(ret[i].flagsPack[t], ret[i].flags[t][j])
- end
- end
- ret[i].seq_len = data[i].seq_len
- end
- return ret
-end
-
-function nn:process(data, do_train)
- local total_err = 0
- local total_frame = 0
- local reader = nerv.TNNReader(self.gconf, data)
- while true do
- local r, _ = self.tnn:getfeed_from_reader(reader)
- if not r then
- break
- end
- if do_train then
- self.gconf.dropout_rate = self.gconf.dropout
- else
- self.gconf.dropout_rate = 0
- end
- self.tnn:net_propagate()
- for t = 1, self.gconf.chunk_size do
- local tmp = self.tnn.outputs_m[t][1]:new_to_host()
- for i = 1, self.gconf.batch_size do
- if reader:has_data(t, i) then
- total_err = total_err + math.log10(math.exp(tmp[i - 1][0]))
- total_frame = total_frame + 1
- end
- end
- end
- if do_train then
- local err_input = reader:get_err_input()
- for i = 1, self.gconf.chunk_size do
- self.tnn.err_inputs_m[i][1]:copy_from(err_input[i])
- end
- self.tnn:net_backpropagate(false)
- self.tnn:net_backpropagate(true)
- end
- collectgarbage('collect')
- end
- return math.pow(10, - total_err / total_frame)
-end
-
-function nn:epoch()
- local train_error = self:process(self.train_data, true)
- local val_error = self:process(self.val_data, false)
- return train_error, val_error
-end
diff --git a/nerv/Makefile b/nerv/Makefile
index a9b4baf..421eda0 100644
--- a/nerv/Makefile
+++ b/nerv/Makefile
@@ -1,3 +1,11 @@
+ifndef LUA_BINDIR
+$(error Please build the package via luarocks: `luarocks make`)
+endif
+
+ifndef CUDA_BASE
+$(error CUDA_BASE is not set)
+endif
+
.PHONY: build install clean
SHELL := /bin/bash
@@ -34,18 +42,18 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \
layer/init.lua layer/affine.lua layer/sigmoid.lua layer/tanh.lua layer/softmax_ce.lua layer/softmax.lua \
layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua \
layer/elem_mul.lua layer/lstm.lua layer/lstm_gate.lua layer/dropout.lua layer/gru.lua \
- layer/graph.lua layer/rnn.lua layer/duplicate.lua layer/identity.lua \
+ layer/graph.lua layer/rnn.lua layer/duplicate.lua layer/identity.lua \
nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua nn/network.lua \
io/sgd_buffer.lua \
tnn/init.lua tnn/sutil.lua tnn/tnn.lua
INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK
-#CUDA_BASE := /usr/local/cuda-7.0
-CUDA_BASE := /usr/local/cuda
CUDA_INCLUDE := -I $(CUDA_BASE)/include/
INCLUDE += $(CUDA_INCLUDE)
-LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcublas -lcurand
+CUDA_LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcuda -lcublas -lcurand
+override CFLAGS += $(NERV_FEAT)
+
NVCC := $(CUDA_BASE)/bin/nvcc
EMPTY :=
SPACE := $(EMPTY) $(EMPTY)
@@ -66,11 +74,11 @@ $(LUA_DIR)/%.lua: %.lua
cp $< $@
$(LIB_PATH)/libnervcore.so: $(CORE_OBJS)
- gcc -shared -o $@ $^ $(LDFLAGS) -lcblas
+ gcc -shared -o $@ $^ $(LDFLAGS) $(CUDA_LDFLAGS) $(BLAS_LDFLAGS)
$(LIB_PATH)/libluaT.so: $(LUAT_OBJS)
- gcc -shared -o $@ $^ $(LDFLAGS)
+ gcc -shared -o $@ $^
$(INST_LIBDIR)/libnerv.so: $(NERV_OBJS) $(LIB_PATH)/libnervcore.so $(LIB_PATH)/libluaT.so
- gcc -shared -o $@ $(NERV_OBJS) $(LDFLAGS) -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -lluaT
+ gcc -shared -o $@ $(NERV_OBJS) -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -lluaT
$(OBJ_DIR)/matrix/cumatrix.o: matrix/generic/cumatrix.c matrix/generic/matrix.c
$(OBJ_DIR)/matrix/mmatrix.o: matrix/generic/mmatrix.c matrix/generic/matrix.c
diff --git a/nerv/doc/nerv.md b/nerv/doc/nerv.md
index 28411f5..125928d 100644
--- a/nerv/doc/nerv.md
+++ b/nerv/doc/nerv.md
@@ -1,6 +1,6 @@
-#The Nerv utility functions#
+# The Nerv utility functions
Part of the [Nerv](../README.md) toolkit.
-##Methods##
+## Methods
* __string = nerv.typename(obj a)__
A registered function, the original function is `luaT_lua_typename`. In some cases if you call `type(a)` for object of some class in __Nerv__(like __Nerv.CuMatrix__) it will only return "userdata"(because it is created in C), in this case you can use this method to get its type.
@@ -14,4 +14,4 @@ A registered function, the original function is `luaT_newmetatable`, it returns
* __string = nerv.setmetatable(table self, string tname)__
A registered function, the original function is `luaT_lua_setmetatable`. It assigns the metatable registered in __luaT__ by the name *tname* to the table *self*. And return *tname* to user.
* __table = nerv.get_type(string typename)__
-Returns the type(`loadstring("return " .. typename)`). \ No newline at end of file
+Returns the type(`loadstring("return " .. typename)`).
diff --git a/nerv/doc/nerv_class.md b/nerv/doc/nerv_class.md
index 99f63e7..8314b12 100644
--- a/nerv/doc/nerv_class.md
+++ b/nerv/doc/nerv_class.md
@@ -1,10 +1,10 @@
-#The Nerv OOP#
+# The Nerv OOP
Part of the [Nerv](../README.md) toolkit.
-##Methods##
+## Methods
* __metatable mt, metatable mpt = nerv.class(string tname, string parenttname)__
This method is used to create a class by the name `tname`, which inherits `parenttname` in __Nerv__, then you create a new instance of this class by calling `obj=tname(...)`. The `tname.__init(...)` method(if defined) will be called in the constructing. The metatable of the class and its parent class will be returned.
-##Examples##
+## Examples
* This example implements a simple `nerv.Counter` class which is inherited by `nerv.BetterCounter`.
```
@@ -33,4 +33,4 @@ c1 = nerv.Counter(1)
print(c1.c)
bc1 = nerv.BetterCounter(1, 1)
print(bc1.c, bc1.bc)
-``` \ No newline at end of file
+```
diff --git a/nerv/doc/nerv_io.md b/nerv/doc/nerv_io.md
index 07589df..299362f 100644
--- a/nerv/doc/nerv_io.md
+++ b/nerv/doc/nerv_io.md
@@ -1,7 +1,7 @@
-#The Nerv IO Package#
+# The Nerv IO Package
Part of the [Nerv](../README.md) toolkit.
-##Description##
+## Description
The main class that the user uses to store and read parameter object to and from files is __nerv.ChunkFile__.
In the file, a parameter object will be saved using a standard format. First is the length(in byte) of this object, then a table which includes some meta information of the object, and a data area. Below is an example text file.
```
@@ -23,7 +23,7 @@ In the file, a parameter object will be saved using a standard format. First is
3.000000 3.000000 3.000000
```
-##Methods##
+## Methods
* __ChunkFile ChunkFile(string fn, string mode)__
`mode` can be `r` or `w`, for reading or writing a file. The returned __ChunkFile__ will be ready to write or read objects which follows the __nerv.Param__ interface(using `write_chunk` and `read_chunk`).
* __void ChunkFile.write_chunk(ChunkFile self, Param p)__
@@ -33,7 +33,7 @@ Read the __Param__ object by id `id` from the file `self`. It will be constructe
* __void ChunkFile.close(ChunkFile self)__
Close the opened file.
-##Examples##
+## Examples
* An example showing how to use __ChunkFile__ to store and read parameter objects.
```
require 'io'
@@ -96,7 +96,7 @@ do
end
```
-##Developer Notes##
+## Developer Notes
* There are four classes in to deal with chunk data, which are __nerv.ChunkFile__, __nerv.ChunkFileHandle__, __nerv.ChunkInfo__, __nerv.ChunkData__. Below is the underlying C structs.
```
typedef struct ChunkFileHandle {
@@ -110,4 +110,5 @@ typedef struct ChunkData {
char *data;
} ChunkData;
```
-* In __Nerv.io__, a returned(by `ChunkFile.__init`) __nerv.ChunkFile__ will have a member `handle`, which is a __nerv.ChunkFileHandle__. \ No newline at end of file
+
+* In __Nerv.io__, a returned(by `ChunkFile.__init`) __nerv.ChunkFile__ will have a member `handle`, which is a __nerv.ChunkFileHandle__.
diff --git a/nerv/doc/nerv_layer.md b/nerv/doc/nerv_layer.md
index de2fb12..dd7c9bb 100644
--- a/nerv/doc/nerv_layer.md
+++ b/nerv/doc/nerv_layer.md
@@ -1,9 +1,9 @@
-#The Nerv Layer Package#
+# The Nerv Layer Package
Part of the [Nerv](../README.md) toolkit.
-##Description##
+## Description
__nerv.Layer__ is the base class and most of its methods are abstract.
-###Class hierarchy and their members###
+### Class hierarchy and their members
* __nerv.Layer__.
* `table dim_in` It specifies the dimensions of the inputs.
* `table dim_out` It specifies the dimensions of the outputs.
@@ -20,7 +20,7 @@ __nerv.Layer__ is the base class and most of its methods are abstract.
* `int total_frams` Records how many frames have passed.
* `bool compressed` The reference distribution can be a one-hot format. This feature is enabled by `layer_conf.compressed`.
-##Methods##
+## Methods
* __void Layer.\_\_init(Layer self, string id, table global_conf, table layer_conf)__
Abstract method.
The constructing method should assign `id` to `self.id` and `global_conf` to `self.gconf`, `layer_conf.dim_in` to `self.dim_in`, `layer_conf.dim_out` to `self.dim_out`. `dim_in` and `dim_out` are a list specifies the dimensions of the inputs and outputs. Also, `layer_conf` will include the parameters, which should also be properly saved.
@@ -43,7 +43,7 @@ Check whether `#self.dim_in == len_in` and `#self.dim_out == len_out`, if violat
Abstract method.
The layer should return a list containing its parameters.
-####nerv.Layer.get\_dim(self)####
+#### nerv.Layer.get\_dim(self)
* Returns:
`dim_in`: __table__.
`dim_out`: __table__.
@@ -52,7 +52,7 @@ The layer should return a list containing its parameters.
* Description:
Returns `self.dim_in, self.dim_out`.
-##Examples##
+## Examples
* a basic example using __Nerv__ layers to a linear classification.
```
@@ -178,3 +178,4 @@ for l = 0, 10, 1 do
end
--[[end training]]--
```
+
diff --git a/nerv/doc/nerv_matrix.md b/nerv/doc/nerv_matrix.md
index dfd843d..3782eb3 100644
--- a/nerv/doc/nerv_matrix.md
+++ b/nerv/doc/nerv_matrix.md
@@ -1,8 +1,8 @@
-#The Nerv Matrix Package#
+# The Nerv Matrix Package
Part of the [Nerv](../README.md) toolkit.
-##Description##
-###Underlying structure###
+## Description
+### Underlying structure
In the begining is could be useful to know something about the underlying structure of a __Nerv__ matrix. Please keep in mind that matrice in __Nerv__ is row-major.
Every matrix object is a encapsulation of a C struct that describes the attributes of this matrix.
```
@@ -20,12 +20,12 @@ typedef struct Matrix {
It is worth mentioning that that `data_ref` is a counter which counts the number of references to its memory space, mind that it will also be increased when a row of the matrix is referenced(`col = m[2]`). A __Nerv__ matrix will deallocate its space when this counter is decreased to zero.
Also note that all assigning operation in __Nerv__ is reference copy, you can use `copy_tod` or `copy_toh` method to copy value. Also, row assigning operations like `m1[2]=m2[3]` is forbidden in __Nerv__.
-###Class hierarchy###
+### Class hierarchy
The class hierarchy of the matrix classes can be clearly observed in `matrix/init.c`.
First there is a abstract base class __Nerv.Matrix__, which is inherited by __Nerv.CuMatrix__ and __Nerv.MMatrix__(also abstract).
Finally, there is __Nerv.CuMatrixFloat__, __Nerv.CuMatrixDouble__, inheriting __Nerv.CuMatrix__, and __Nerv.MMatrixFloat__, __Nerv.MMatrixDouble__, __Nerv.MMatrixInt__ , inheriting __Nerv.MMatrix__.
-##Methods##
+## Methods
Mind that usually a matrix object can only do calculation with matrix of its own type(a __Nerv.CuMatrixFloat__ matrix can only do add operation with a __Nerv.CuMatrixFloat__).
In the methods description below, __Matrix__ could be __Nerv.CuMatrixFloat__, __Nerv.CuMatrixDouble__, __Nerv.MMatrixFloat__ or __Nerv.MMatrixDouble__. __Element_type__ could be `float` or `double`, respectively.
* __Matrix = Matrix(int nrow, int ncol)__
@@ -53,6 +53,8 @@ Return a new __Matrix__ of size (1,`self.ncol`), which stores the sum of all col
Return a new __Matrix__ of size (`self.nrow`,1), which stores the sum of all rows of __Matrix__ `self`.
* __Matrix Matrix.rowmax(Matrix self)__
Return a new __Matrix__ of size (`self.nrow`,1), which stores the max value of all rows of __Matrix__ `self`.
+* __Matrix Matrix.rowmax_idx(Matrix self)__
+Return two new __Matrix__ of size (`self.nrow`,1), which stores the max value of all rows of __Matrix__ `self`, and its corresponding column indices(start from zero).
* __Matrix Matrix.trans(Matrix self)__
Return a new __Matrix__ of size (`self.ncol`,`self.nrow`), which stores the transpose of __Matrix__ `self`.
* __void Matrix.copy_fromh(Matrix self, MMatrix a)__
@@ -81,8 +83,8 @@ Fill the content of __Matrix__ `self` to be `value`.
Set the element of __Matrix__ `self` to be elementwise-sigmoid of `ma`.
* __void Matrix.sigmoid_grad(Matrix self, Matrix err, Matrix output)__
Set the element of __Matrix__ `self`, to be `self[i][j]=err[i][j]*output[i][j]*(1-output[i][j])`. This function is used to propagate sigmoid layer error.
-* __void Matrix.softmax(Matrix self, Matrix a)__
-Calculate a row-by-row softmax of __Matrix__ `a` and save the result in `self`.
+* __Matrix Matrix.softmax(Matrix self, Matrix a)__
+Calculate a row-by-row softmax of __Matrix__ `a` and save the result in `self`. Returns a new `self.nrow*1` index matrix that stores the index of the maximum value of each row.
* __void Matrix.mul_elem(Matrix self, Matrix ma, Matrix mb)__
Calculate element-wise multiplication of __Matrix__ `ma` and `mb`, store the result in `self`.
* __void Matrix.log_elem(Matrix self, Matrix ma)__
@@ -113,7 +115,7 @@ Write `self` to the file position in `chunk`.
* __void MMatrix.copy_from(MMatrix ma, MMatrix mb,[int b_bgein, int b_end, int a_begin])__
Copy a part of `mb`(rows of index `[b_begin..b_end)`) to `ma` beginning at row index `a_begin`. If not specified, `b_begin` will be `0`, `b_end` will be `b.nrow`, `a_begin` will be `0`.
-##Examples##
+## Examples
* Use `get_dataref_value` to test __Nerv__'s matrix space allocation.
```
m = 10
@@ -134,6 +136,7 @@ print("test fm:get_dataref_value:", fm:get_dataref_value())
print(fm)
print(dm)
```
+
* Test some __Matrix__ calculations.
```
m = 4
@@ -167,3 +170,4 @@ print(a)
a:log_elem(fs)
print(a)
```
+
diff --git a/nerv/doc/nerv_nn.md b/nerv/doc/nerv_nn.md
index c57447d..63537fb 100644
--- a/nerv/doc/nerv_nn.md
+++ b/nerv/doc/nerv_nn.md
@@ -1,19 +1,19 @@
-#The Nerv NN Package#
+# The Nerv NN Package
Part of the [Nerv](../README.md) toolkit.
-##Description##
-###Class hierarchy###
+## Description
+### Class hierarchy
it contains __nerv.LayerRepo__, __nerv.ParamRepo__, and __nerv.DAGLayer__(inherits __nerv.Layer__).
-###Class hierarchy and their members###
-####nerv.ParamRepo####
+### Class hierarchy and their members
+#### nerv.ParamRepo
Get parameter object by ID.
* `table param_table` Contains the mapping of parameter ID to parameter file(__nerv.ChunkFile__)
* __nerv.LayerRepo__ Get layer object by ID.
* `table layers` Contains the mapping of layer ID to layer object.
objects.
-####__nerv.DAGLayer__####
+#### __nerv.DAGLayer__
Inherits __nerv.Layer__.
* `layers`: __table__, a mapping from a layer ID to its "ref". A ref is a structure that contains reference to space allocations and other info of the layer.
* `inputs`: __table__, a mapping from the inputs ports of the DAG layer to the input ports of the sublayer, the key is the port number, the value is `{ref, port}`.
@@ -21,17 +21,17 @@ Inherits __nerv.Layer__.
* `parsed_conn`: __table__, a list of parsed connections, each entry is of format `{{ref_from, port_from}, {ref_to, port_to}}`.
* `queue`: __table__, a list of "ref"s, the propagation of the DAGLayer will follow this order, and back-propagation will follow a reverse order.
-##Methods##
+## Methods
-###__nerv.ParamRepo__###
+### __nerv.ParamRepo__
-####nerv.ParamRepo:\_\_init(param\_files)####
+#### nerv.ParamRepo:\_\_init(param\_files)
* Parameters:
`param_files`: __table__
* Description:
`param_files` is a list of file names that stores parameters, the newed __ParamRepo__ will read them from file and store the mapping for future fetching.
-####nerv.Param ParamRepo.get_param(ParamRepo self, string pid, table global_conf)####
+#### nerv.Param ParamRepo.get_param(ParamRepo self, string pid, table global_conf)
* Returns:
__nerv.Layer__
* Parameters:
@@ -41,8 +41,8 @@ Inherits __nerv.Layer__.
* Description:
__ParamRepo__ will find the __nerv.ChunkFile__ `pf` that contains parameter of ID `pid` and return `pf:read_chunk(pid, global_conf)`.
-###__nerv.LayerRepo__###
-####nerv.LayerRepo:\_\_init(layer\_spec, param\_repo, global\_conf)####
+### __nerv.LayerRepo__
+#### nerv.LayerRepo:\_\_init(layer\_spec, param\_repo, global\_conf)
* Returns:
__nerv.LayerRepo__.
* Parameters:
@@ -60,7 +60,7 @@ Inherits __nerv.Layer__.
__LayerRepo__ will merge `param_config` into `layer_config` and construct a layer by calling `layer_type(layerid, global_conf, layer_config)`.
-####nerv.LayerRepo.get\_layer(self, lid)####
+#### nerv.LayerRepo.get\_layer(self, lid)
* Returns:
__nerv.LayerRepo__, the layer with ID `lid`.
* Parameters:
@@ -69,8 +69,8 @@ Inherits __nerv.Layer__.
* Description:
Returns the layer with ID `lid`.
-###nerv.DAGLayer###
-####nerv.DAGLayer:\_\_init(id, global\_conf, layer\_conf)####
+### nerv.DAGLayer
+#### nerv.DAGLayer:\_\_init(id, global\_conf, layer\_conf)
* Returns:
__nerv.DAGLayer__
* Parameters:
@@ -89,7 +89,7 @@ Inherits __nerv.Layer__.
}})
```
-####nerv.DAGLayer.init(self, batch\_size)####
+#### nerv.DAGLayer.init(self, batch\_size)
* Parameters:
`self`: __nerv.DAGLayer__
`batch_size`: __int__
@@ -97,7 +97,7 @@ Inherits __nerv.Layer__.
This initialization method will allocate space for output and input matrice, and will call `init()` for each of its sub layers.
-####nerv.DAGLayer.propagate(self, input, output)####
+#### nerv.DAGLayer.propagate(self, input, output)
* Parameters:
`self`: __nerv.DAGLayer__
`input`: __table__
@@ -105,7 +105,7 @@ Inherits __nerv.Layer__.
* Description:
The same function as __nerv.Layer.propagate__, do propagation for each layer in the order of `self.queue`.
-####nerv.DAGLayer.back\_propagate(self, next\_bp\_err, bp\_err, input, output)####
+#### nerv.DAGLayer.back\_propagate(self, next\_bp\_err, bp\_err, input, output)
* Parameters:
`self`: __nerv.DAGLayer__
`next_bp_err`: __table__
@@ -115,7 +115,7 @@ Inherits __nerv.Layer__.
* Description:
The same function as __nerv.Layer.back_propagate__, do back-propagation for each layer in the reverse order of `self.queue`.
-####nerv.DAGLayer.update(self, bp\_err, input, output)####
+#### nerv.DAGLayer.update(self, bp\_err, input, output)
* Parameters:
`self`: __nerv.DAGLayer__
`bp_err`: __table__
@@ -124,7 +124,7 @@ Inherits __nerv.Layer__.
* Description:
The same function as __nerv.Layer.update__, do update for each layer in the order of `self.queue`.
-##Examples##
+## Examples
* aaa
```
@@ -253,4 +253,5 @@ for l = 0, 10, 1 do
ce_last = softmaxL.total_ce
end
--[[end training]]--
-``` \ No newline at end of file
+```
+
diff --git a/nerv/doc/nerv_param.md b/nerv/doc/nerv_param.md
index 167cb11..98793f0 100644
--- a/nerv/doc/nerv_param.md
+++ b/nerv/doc/nerv_param.md
@@ -1,17 +1,17 @@
-#The Nerv Parameter Package#
+# The Nerv Parameter Package
Part of the [Nerv](../README.md) toolkit.
-##Description##
-###Class hierarchy###
+## Description
+### Class hierarchy
There is a base class __Nerv.Param__ defined in `layer/init.lua`.
-###Class hierarchy and their members###
+### Class hierarchy and their members
* __nerv.MatrixParam__ inherits __nerv.Param__
* `Matrix trans` stores the parameter matrix.
* __nerv.LinearTransParam__ inherits __Nerv.MatrixParam__.
* __Nerv.BiasParam__ inherits __Nerv.MatrixParam__.
-##Methods##
+## Methods
* __void Param.\_\_init(Param self, string id, table global_conf)__
Constructor of a __Param__, it will set `self.id` to be `id` and `self.gconf` to be `global_conf`.
* __void Param.set_info(Param self, table info)__
diff --git a/nerv/examples/asr_trainer.lua b/nerv/examples/asr_trainer.lua
index 3fa2653..5bf28bd 100644
--- a/nerv/examples/asr_trainer.lua
+++ b/nerv/examples/asr_trainer.lua
@@ -1,17 +1,33 @@
-function build_trainer(ifname)
- local param_repo = nerv.ParamRepo()
- param_repo:import(ifname, nil, gconf)
- local layer_repo = make_layer_repo(param_repo)
- local network = get_network(layer_repo)
- local global_transf = get_global_transf(layer_repo)
- local input_order = get_input_order()
+require 'lfs'
+require 'pl'
+local function build_trainer(ifname)
+ local host_param_repo = nerv.ParamRepo()
local mat_type
+ local src_loc_type
+ local train_loc_type
+ host_param_repo:import(ifname, nil, gconf)
if gconf.use_cpu then
mat_type = gconf.mmat_type
+ src_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
+ train_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
else
mat_type = gconf.cumat_type
+ src_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
+ train_loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE
end
- local iterative_trainer = function (prefix, scp_file, bp)
+ local param_repo = host_param_repo:copy(train_loc_type)
+ local layer_repo = make_layer_repo(param_repo)
+ local network = get_network(layer_repo)
+ local global_transf = get_global_transf(layer_repo)
+ local input_order = get_input_order()
+ local iterative_trainer = function (prefix, scp_file, bp, rebind_param_repo)
+ -- rebind the params if necessary
+ if rebind_param_repo then
+ host_param_repo = rebind_param_repo
+ param_repo = host_param_repo:copy(train_loc_type)
+ layer_repo:rebind(param_repo)
+ rebind_param_repo = nil
+ end
gconf.randomize = bp
-- build buffer
local buffer = make_buffer(make_readers(scp_file, layer_repo))
@@ -64,61 +80,193 @@ function build_trainer(ifname)
print_stat(layer_repo)
mat_type.print_profile()
mat_type.clear_profile()
- if (not bp) and prefix ~= nil then
- nerv.info("writing back...")
- local fname = string.format("%s_cv%.3f.nerv",
- prefix, get_accuracy(layer_repo))
- network:get_params():export(fname, nil)
+ local fname
+ if (not bp) then
+ host_param_repo = param_repo:copy(src_loc_type)
+ if prefix ~= nil then
+ nerv.info("writing back...")
+ fname = string.format("%s_cv%.3f.nerv",
+ prefix, get_accuracy(layer_repo))
+ host_param_repo:export(fname, nil)
+ end
end
- return get_accuracy(layer_repo)
+ return get_accuracy(layer_repo), host_param_repo, fname
end
return iterative_trainer
end
-dofile(arg[1])
-start_halving_inc = 0.5
-halving_factor = 0.6
-end_halving_inc = 0.1
-min_iter = 1
-max_iter = 20
-min_halving = 5
-gconf.batch_size = 256
-gconf.buffer_size = 81920
+local function check_and_add_defaults(spec, opts)
+ local function get_opt_val(k)
+ return opts[string.gsub(k, '_', '-')].val
+ end
+ local opt_v = get_opt_val("resume_from")
+ if opt_v then
+ gconf = dofile(opt_v)
+ else
+ for k, v in pairs(spec) do
+ local opt_v = get_opt_val(k)
+ if opt_v ~= nil then
+ gconf[k] = opt_v
+ elseif gconf[k] ~= nil then
+ elseif v ~= nil then
+ gconf[k] = v
+ end
+ end
+ end
+end
-local pf0 = gconf.initialized_param
-local trainer = build_trainer(pf0)
---local trainer = build_trainer("c3.nerv")
-local accu_best = trainer(nil, gconf.cv_scp, false)
-local do_halving = false
-
-nerv.info("initial cross validation: %.3f", accu_best)
-for i = 1, max_iter do
- nerv.info("[NN] begin iteration %d with lrate = %.6f", i, gconf.lrate)
- local accu_tr = trainer(nil, gconf.tr_scp, true)
- nerv.info("[TR] training set %d: %.3f", i, accu_tr)
- local accu_new = trainer(
- string.format("%s_%s_iter_%d_lr%f_tr%.3f",
- string.gsub(
- (string.gsub(pf0[1], "(.*/)(.*)", "%2")),
- "(.*)%..*", "%1"),
- os.date("%Y%m%d%H%M%S"),
- i, gconf.lrate,
- accu_tr),
- gconf.cv_scp, false)
- nerv.info("[CV] cross validation %d: %.3f", i, accu_new)
- -- TODO: revert the weights
- local accu_diff = accu_new - accu_best
- if do_halving and accu_diff < end_halving_inc and i > min_iter then
- break
+local function make_options(spec)
+ local options = {}
+ for k, v in pairs(spec) do
+ table.insert(options,
+ {string.gsub(k, '_', '-'), nil, type(v), default = v})
end
- if accu_diff < start_halving_inc and i >= min_halving then
- do_halving = true
+ return options
+end
+
+local function print_help(options)
+ nerv.printf("Usage: <asr_trainer.lua> [options] network_config.lua\n")
+ nerv.print_usage(options)
+end
+
+local function print_gconf()
+ local key_maxlen = 0
+ for k, v in pairs(gconf) do
+ key_maxlen = math.max(key_maxlen, #k or 0)
end
- if do_halving then
- gconf.lrate = gconf.lrate * halving_factor
+ local function pattern_gen()
+ return string.format("%%-%ds = %%s\n", key_maxlen)
end
- if accu_new > accu_best then
- accu_best = accu_new
+ nerv.info("ready to train with the following gconf settings:")
+ nerv.printf(pattern_gen(), "Key", "Value")
+ for k, v in pairs(gconf) do
+ nerv.printf(pattern_gen(), k or "", v or "")
end
+end
+
+local function dump_gconf(fname)
+ local f = io.open(fname, "w")
+ f:write("return ")
+ f:write(table.tostring(gconf))
+ f:close()
+end
+
+local trainer_defaults = {
+ lrate = 0.8,
+ batch_size = 256,
+ buffer_size = 81920,
+ wcost = 1e-6,
+ momentum = 0.9,
+ start_halving_inc = 0.5,
+ halving_factor = 0.6,
+ end_halving_inc = 0.1,
+ cur_iter = 1,
+ min_iter = 1,
+ max_iter = 20,
+ min_halving = 5,
+ do_halving = false,
+ cumat_tname = "nerv.CuMatrixFloat",
+ mmat_tname = "nerv.MMatrixFloat",
+ debug = false,
+}
+
+local options = make_options(trainer_defaults)
+local extra_opt_spec = {
+ {"tr-scp", nil, "string"},
+ {"cv-scp", nil, "string"},
+ {"resume-from", nil, "string"},
+ {"help", "h", "boolean", default = false, desc = "show this help information"},
+ {"dir", nil, "string", desc = "specify the working directory"},
+}
+
+table.extend(options, extra_opt_spec)
+
+arg, opts = nerv.parse_args(arg, options)
+
+if #arg < 1 or opts["help"].val then
+ print_help(options)
+ return
+end
+
+dofile(arg[1])
+
+--[[
+
+Rule: command-line option overrides network config overrides trainer default.
+Note: config key like aaa_bbbb_cc could be overriden by specifying
+--aaa-bbbb-cc to command-line arguments.
+
+]]--
+
+check_and_add_defaults(trainer_defaults, opts)
+gconf.mmat_type = nerv.get_type(gconf.mmat_tname)
+gconf.cumat_type = nerv.get_type(gconf.cumat_tname)
+gconf.use_cpu = econf.use_cpu or false
+
+local pf0 = gconf.initialized_param
+local date_pattern = "%Y%m%d%H%M%S"
+local logfile_name = "log"
+local working_dir = opts["dir"].val or string.format("nerv_%s", os.date(date_pattern))
+local rebind_param_repo = nil
+
+print_gconf()
+if not lfs.mkdir(working_dir) then
+ nerv.error("[asr_trainer] working directory already exists")
+end
+-- copy the network config
+dir.copyfile(arg[1], working_dir)
+-- set logfile path
+nerv.set_logfile(path.join(working_dir, logfile_name))
+path.chdir(working_dir)
+
+-- start the training
+local trainer = build_trainer(pf0)
+local pr_prev
+gconf.accu_best, pr_prev = trainer(nil, gconf.cv_scp, false)
+nerv.info("initial cross validation: %.3f", gconf.accu_best)
+for i = gconf.cur_iter, gconf.max_iter do
+ local stop = false
+ gconf.cur_iter = i
+ dump_gconf(string.format("iter_%d.meta", i))
+ repeat -- trick to implement `continue` statement
+ nerv.info("[NN] begin iteration %d with lrate = %.6f", i, gconf.lrate)
+ local accu_tr = trainer(nil, gconf.tr_scp, true, rebind_param_repo)
+ nerv.info("[TR] training set %d: %.3f", i, accu_tr)
+ local param_prefix = string.format("%s_%s_iter_%d_lr%f_tr%.3f",
+ string.gsub(
+ (string.gsub(pf0[1], "(.*/)(.*)", "%2")),
+ "(.*)%..*", "%1"),
+ os.date(date_pattern),
+ i, gconf.lrate,
+ accu_tr)
+ local accu_new, pr_new, param_fname = trainer(param_prefix, gconf.cv_scp, false)
+ nerv.info("[CV] cross validation %d: %.3f", i, accu_new)
+ local accu_prev = gconf.accu_best
+ if accu_new < gconf.accu_best then
+ nerv.info("rejecting the trained params, rollback to the previous one")
+ file.move(param_fname, param_fname .. ".rejected")
+ rebind_param_repo = pr_prev
+ break -- `continue` equivalent
+ else
+ nerv.info("accepting the trained params")
+ gconf.accu_best = accu_new
+ pr_prev = pr_new
+ gconf.initialized_param = {path.join(path.currentdir(), param_fname)}
+ end
+ if gconf.do_halving and
+ gconf.accu_best - accu_prev < gconf.end_halving_inc and
+ i > gconf.min_iter then
+ stop = true
+ break
+ end
+ if gconf.accu_best - accu_prev < gconf.start_halving_inc and
+ i >= gconf.min_halving then
+ gconf.do_halving = true
+ end
+ if gconf.do_halving then
+ gconf.lrate = gconf.lrate * gconf.halving_factor
+ end
+ until true
+ if stop then break end
-- nerv.Matrix.print_profile()
end
diff --git a/nerv/examples/swb_baseline.lua b/nerv/examples/swb_baseline.lua
index 51052ba..0ce8468 100644
--- a/nerv/examples/swb_baseline.lua
+++ b/nerv/examples/swb_baseline.lua
@@ -1,7 +1,5 @@
require 'htk_io'
gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9,
- cumat_type = nerv.CuMatrixFloat,
- mmat_type = nerv.MMatrixFloat,
rearrange = true, -- just to make the context order consistent with old results, deprecated
frm_ext = 5,
frm_trim = 5, -- trim the first and last 5 frames, TNet just does this, deprecated
@@ -9,8 +7,7 @@ gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9,
cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp",
htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf",
initialized_param = {"/slfs1/users/mfy43/swb_init.nerv",
- "/slfs1/users/mfy43/swb_global_transf.nerv"},
- debug = false}
+ "/slfs1/users/mfy43/swb_global_transf.nerv"}}
function make_layer_repo(param_repo)
local layer_repo = nerv.LayerRepo(
@@ -18,51 +15,51 @@ function make_layer_repo(param_repo)
-- global transf
["nerv.BiasLayer"] =
{
- blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}},
- blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}}
+ blayer1 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias1"}},
+ blayer2 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias2"}}
},
["nerv.WindowLayer"] =
{
- wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}},
- wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}}
+ wlayer1 = {dim_in = {429}, dim_out = {429}, params = {window = "window1"}},
+ wlayer2 = {dim_in = {429}, dim_out = {429}, params = {window = "window2"}}
},
-- biased linearity
["nerv.AffineLayer"] =
{
- affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"},
- {dim_in = {429}, dim_out = {2048}}},
- affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"},
- {dim_in = {2048}, dim_out = {3001}}}
+ affine0 = {dim_in = {429}, dim_out = {2048},
+ params = {ltp = "affine0_ltp", bp = "affine0_bp"}},
+ affine1 = {dim_in = {2048}, dim_out = {2048},
+ params = {ltp = "affine1_ltp", bp = "affine1_bp"}},
+ affine2 = {dim_in = {2048}, dim_out = {2048},
+ params = {ltp = "affine2_ltp", bp = "affine2_bp"}},
+ affine3 = {dim_in = {2048}, dim_out = {2048},
+ params = {ltp = "affine3_ltp", bp = "affine3_bp"}},
+ affine4 = {dim_in = {2048}, dim_out = {2048},
+ params = {ltp = "affine4_ltp", bp = "affine4_bp"}},
+ affine5 = {dim_in = {2048}, dim_out = {2048},
+ params = {ltp = "affine5_ltp", bp = "affine5_bp"}},
+ affine6 = {dim_in = {2048}, dim_out = {2048},
+ params = {ltp = "affine6_ltp", bp = "affine6_bp"}},
+ affine7 = {dim_in = {2048}, dim_out = {3001},
+ params = {ltp = "affine7_ltp", bp = "affine7_bp"}}
},
["nerv.SigmoidLayer"] =
{
- sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}}
+ sigmoid0 = {dim_in = {2048}, dim_out = {2048}},
+ sigmoid1 = {dim_in = {2048}, dim_out = {2048}},
+ sigmoid2 = {dim_in = {2048}, dim_out = {2048}},
+ sigmoid3 = {dim_in = {2048}, dim_out = {2048}},
+ sigmoid4 = {dim_in = {2048}, dim_out = {2048}},
+ sigmoid5 = {dim_in = {2048}, dim_out = {2048}},
+ sigmoid6 = {dim_in = {2048}, dim_out = {2048}}
},
["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output
{
- ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}}
+ ce_crit = {dim_in = {3001, 1}, dim_out = {1}, compressed = true}
},
["nerv.SoftmaxLayer"] = -- softmax for decode output
{
- softmax = {{}, {dim_in = {3001}, dim_out = {3001}}}
+ softmax = {dim_in = {3001}, dim_out = {3001}}
}
}, param_repo, gconf)
@@ -70,7 +67,7 @@ function make_layer_repo(param_repo)
{
["nerv.DAGLayer"] =
{
- global_transf = {{}, {
+ global_transf = {
dim_in = {429}, dim_out = {429},
sub_layers = layer_repo,
connections = {
@@ -80,8 +77,8 @@ function make_layer_repo(param_repo)
["blayer2[1]"] = "wlayer2[1]",
["wlayer2[1]"] = "<output>[1]"
}
- }},
- main = {{}, {
+ },
+ main = {
dim_in = {429}, dim_out = {3001},
sub_layers = layer_repo,
connections = {
@@ -102,7 +99,7 @@ function make_layer_repo(param_repo)
["sigmoid6[1]"] = "affine7[1]",
["affine7[1]"] = "<output>[1]"
}
- }}
+ }
}
}, param_repo, gconf)
@@ -110,7 +107,7 @@ function make_layer_repo(param_repo)
{
["nerv.DAGLayer"] =
{
- ce_output = {{}, {
+ ce_output = {
dim_in = {429, 1}, dim_out = {1},
sub_layers = layer_repo,
connections = {
@@ -119,8 +116,8 @@ function make_layer_repo(param_repo)
["<input>[2]"] = "ce_crit[2]",
["ce_crit[1]"] = "<output>[1]"
}
- }},
- softmax_output = {{}, {
+ },
+ softmax_output = {
dim_in = {429}, dim_out = {3001},
sub_layers = layer_repo,
connections = {
@@ -128,7 +125,7 @@ function make_layer_repo(param_repo)
["main[1]"] = "softmax[1]",
["softmax[1]"] = "<output>[1]"
}
- }}
+ }
}
}, param_repo, gconf)
@@ -173,6 +170,7 @@ function make_buffer(readers)
return nerv.SGDBuffer(gconf,
{
buffer_size = gconf.buffer_size,
+ batch_size = gconf.batch_size,
randomize = gconf.randomize,
readers = readers,
use_gpu = true
@@ -184,6 +182,10 @@ function get_input_order()
{id = "phone_state"}}
end
+function get_decode_input_order()
+ return {{id = "main_scp", global_transf = true}}
+end
+
function get_accuracy(layer_repo)
local ce_crit = layer_repo:get_layer("ce_crit")
return ce_crit.total_correct / ce_crit.total_frames * 100
diff --git a/nerv/examples/swb_baseline2.lua b/nerv/examples/swb_baseline2.lua
new file mode 100644
index 0000000..8b5ebb1
--- /dev/null
+++ b/nerv/examples/swb_baseline2.lua
@@ -0,0 +1,203 @@
+require 'htk_io'
+gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9,
+ rearrange = true, -- just to make the context order consistent with old results, deprecated
+ frm_ext = 5,
+ frm_trim = 5, -- trim the first and last 5 frames, TNet just does this, deprecated
+ tr_scp = "/speechlab/users/mfy43/swb50/train_bp.scp",
+ cv_scp = "/speechlab/users/mfy43/swb50/train_cv.scp",
+ htk_conf = "/speechlab/users/mfy43/swb50/plp_0_d_a.conf",
+ initialized_param = {"/speechlab/users/mfy43/swb50/swb_init.nerv",
+ "/speechlab/users/mfy43/swb50/swb_global_transf.nerv"}}
+
+function make_layer_repo(param_repo)
+ local layer_repo = nerv.LayerRepo(
+ {
+ -- global transf
+ ["nerv.BiasLayer"] =
+ {
+ blayer1 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias1"}},
+ blayer2 = {dim_in = {429}, dim_out = {429}, params = {bias = "bias2"}}
+ },
+ ["nerv.WindowLayer"] =
+ {
+ wlayer1 = {dim_in = {429}, dim_out = {429}, params = {window = "window1"}},
+ wlayer2 = {dim_in = {429}, dim_out = {429}, params = {window = "window2"}}
+ },
+ -- biased linearity
+ ["nerv.AffineLayer"] =
+ {
+ affine0 = {dim_in = {429}, dim_out = {2048},
+ params = {ltp = "affine0_ltp", bp = "affine0_bp"}},
+ affine1 = {dim_in = {2048}, dim_out = {2048},
+ params = {ltp = "affine1_ltp", bp = "affine1_bp"}},
+ affine2 = {dim_in = {2048}, dim_out = {2048},
+ params = {ltp = "affine2_ltp", bp = "affine2_bp"}},
+ affine3 = {dim_in = {2048}, dim_out = {2048},
+ params = {ltp = "affine3_ltp", bp = "affine3_bp"}},
+ affine4 = {dim_in = {2048}, dim_out = {2048},
+ params = {ltp = "affine4_ltp", bp = "affine4_bp"}},
+ affine5 = {dim_in = {2048}, dim_out = {2048},
+ params = {ltp = "affine5_ltp", bp = "affine5_bp"}},
+ affine6 = {dim_in = {2048}, dim_out = {2048},
+ params = {ltp = "affine6_ltp", bp = "affine6_bp"}},
+ affine7 = {dim_in = {2048}, dim_out = {3001},
+ params = {ltp = "affine7_ltp", bp = "affine7_bp"}}
+ },
+ ["nerv.SigmoidLayer"] =
+ {
+ sigmoid0 = {dim_in = {2048}, dim_out = {2048}},
+ sigmoid1 = {dim_in = {2048}, dim_out = {2048}},
+ sigmoid2 = {dim_in = {2048}, dim_out = {2048}},
+ sigmoid3 = {dim_in = {2048}, dim_out = {2048}},
+ sigmoid4 = {dim_in = {2048}, dim_out = {2048}},
+ sigmoid5 = {dim_in = {2048}, dim_out = {2048}},
+ sigmoid6 = {dim_in = {2048}, dim_out = {2048}}
+ },
+ ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output
+ {
+ ce_crit = {dim_in = {3001, 1}, dim_out = {1}, compressed = true}
+ },
+ ["nerv.SoftmaxLayer"] = -- softmax for decode output
+ {
+ softmax = {dim_in = {3001}, dim_out = {3001}}
+ }
+ }, param_repo, gconf)
+
+ layer_repo:add_layers(
+ {
+ ["nerv.DAGLayer"] =
+ {
+ global_transf = {
+ dim_in = {429}, dim_out = {429},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "blayer1[1]",
+ ["blayer1[1]"] = "wlayer1[1]",
+ ["wlayer1[1]"] = "blayer2[1]",
+ ["blayer2[1]"] = "wlayer2[1]",
+ ["wlayer2[1]"] = "<output>[1]"
+ }
+ },
+ main = {
+ dim_in = {429}, dim_out = {3001},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "affine0[1]",
+ ["affine0[1]"] = "sigmoid0[1]",
+ ["sigmoid0[1]"] = "affine1[1]",
+ ["affine1[1]"] = "sigmoid1[1]",
+ ["sigmoid1[1]"] = "affine2[1]",
+ ["affine2[1]"] = "sigmoid2[1]",
+ ["sigmoid2[1]"] = "affine3[1]",
+ ["affine3[1]"] = "sigmoid3[1]",
+ ["sigmoid3[1]"] = "affine4[1]",
+ ["affine4[1]"] = "sigmoid4[1]",
+ ["sigmoid4[1]"] = "affine5[1]",
+ ["affine5[1]"] = "sigmoid5[1]",
+ ["sigmoid5[1]"] = "affine6[1]",
+ ["affine6[1]"] = "sigmoid6[1]",
+ ["sigmoid6[1]"] = "affine7[1]",
+ ["affine7[1]"] = "<output>[1]"
+ }
+ }
+ }
+ }, param_repo, gconf)
+
+ layer_repo:add_layers(
+ {
+ ["nerv.DAGLayer"] =
+ {
+ ce_output = {
+ dim_in = {429, 1}, dim_out = {1},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "main[1]",
+ ["main[1]"] = "ce_crit[1]",
+ ["<input>[2]"] = "ce_crit[2]",
+ ["ce_crit[1]"] = "<output>[1]"
+ }
+ },
+ softmax_output = {
+ dim_in = {429}, dim_out = {3001},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "main[1]",
+ ["main[1]"] = "softmax[1]",
+ ["softmax[1]"] = "<output>[1]"
+ }
+ }
+ }
+ }, param_repo, gconf)
+
+ return layer_repo
+end
+
+function get_network(layer_repo)
+ return layer_repo:get_layer("ce_output")
+end
+
+function get_decode_network(layer_repo)
+ return layer_repo:get_layer("softmax_output")
+end
+
+function get_global_transf(layer_repo)
+ return layer_repo:get_layer("global_transf")
+end
+
+function make_readers(scp_file, layer_repo)
+ return {
+ {reader = nerv.TNetReader(gconf,
+ {
+ id = "main_scp",
+ scp_file = scp_file,
+ conf_file = gconf.htk_conf,
+ frm_ext = gconf.frm_ext,
+ mlfs = {
+ phone_state = {
+ file = "/speechlab/users/mfy43/swb50/ref.mlf",
+ format = "map",
+ format_arg = "/speechlab/users/mfy43/swb50/dict",
+ dir = "*/",
+ ext = "lab"
+ }
+ }
+ }),
+ data = {main_scp = 429, phone_state = 1}}
+ }
+end
+
+function make_buffer(readers)
+ return nerv.SGDBuffer(gconf,
+ {
+ buffer_size = gconf.buffer_size,
+ batch_size = gconf.batch_size,
+ randomize = gconf.randomize,
+ readers = readers,
+ use_gpu = true
+ })
+end
+
+function get_input_order()
+ return {{id = "main_scp", global_transf = true},
+ {id = "phone_state"}}
+end
+
+function get_decode_input_order()
+ return {{id = "main_scp", global_transf = true}}
+end
+
+function get_accuracy(layer_repo)
+ local ce_crit = layer_repo:get_layer("ce_crit")
+ return ce_crit.total_correct / ce_crit.total_frames * 100
+end
+
+function print_stat(layer_repo)
+ local ce_crit = layer_repo:get_layer("ce_crit")
+ nerv.info("*** training stat begin ***")
+ nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce)
+ nerv.printf("correct:\t\t%d\n", ce_crit.total_correct)
+ nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames)
+ nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames)
+ nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo))
+ nerv.info("*** training stat end ***")
+end
diff --git a/nerv/examples/swb_baseline_basic.lua b/nerv/examples/swb_baseline_basic.lua
deleted file mode 100644
index 71f04a3..0000000
--- a/nerv/examples/swb_baseline_basic.lua
+++ /dev/null
@@ -1,162 +0,0 @@
-require 'htk_io'
-gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9,
- cumat_type = nerv.CuMatrixFloat,
- mmat_type = nerv.MMatrixFloat,
- frm_ext = 5,
- frm_trim = 5,
- tr_scp = "/slfs1/users/mfy43/swb_ivec/train_bp.scp",
- cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp",
- htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf",
- initialized_param = {"/slfs1/users/mfy43/swb_init.nerv",
- "/slfs1/users/mfy43/swb_global_transf.nerv"},
- debug = false}
-
-function make_layer_repo(param_repo)
- local layer_repo = nerv.LayerRepo(
- {
- -- global transf
- ["nerv.BiasLayer"] =
- {
- blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}},
- blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}}
- },
- ["nerv.WindowLayer"] =
- {
- wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}},
- wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}}
- },
- -- biased linearity
- ["nerv.AffineLayer"] =
- {
- affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"},
- {dim_in = {429}, dim_out = {2048}}},
- affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"},
- {dim_in = {2048}, dim_out = {2048}}},
- affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"},
- {dim_in = {2048}, dim_out = {3001}}}
- },
- ["nerv.SigmoidLayer"] =
- {
- sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}},
- sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}}
- },
- ["nerv.SoftmaxCELayer"] =
- {
- ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}}
- }
- }, param_repo, gconf)
-
- layer_repo:add_layers(
- {
- ["nerv.DAGLayer"] =
- {
- global_transf = {{}, {
- dim_in = {429}, dim_out = {429},
- sub_layers = layer_repo,
- connections = {
- ["<input>[1]"] = "blayer1[1]",
- ["blayer1[1]"] = "wlayer1[1]",
- ["wlayer1[1]"] = "blayer2[1]",
- ["blayer2[1]"] = "wlayer2[1]",
- ["wlayer2[1]"] = "<output>[1]"
- }
- }},
- main = {{}, {
- dim_in = {429, 1}, dim_out = {1},
- sub_layers = layer_repo,
- connections = {
- ["<input>[1]"] = "affine0[1]",
- ["affine0[1]"] = "sigmoid0[1]",
- ["sigmoid0[1]"] = "affine1[1]",
- ["affine1[1]"] = "sigmoid1[1]",
- ["sigmoid1[1]"] = "affine2[1]",
- ["affine2[1]"] = "sigmoid2[1]",
- ["sigmoid2[1]"] = "affine3[1]",
- ["affine3[1]"] = "sigmoid3[1]",
- ["sigmoid3[1]"] = "affine4[1]",
- ["affine4[1]"] = "sigmoid4[1]",
- ["sigmoid4[1]"] = "affine5[1]",
- ["affine5[1]"] = "sigmoid5[1]",
- ["sigmoid5[1]"] = "affine6[1]",
- ["affine6[1]"] = "sigmoid6[1]",
- ["sigmoid6[1]"] = "affine7[1]",
- ["affine7[1]"] = "ce_crit[1]",
- ["<input>[2]"] = "ce_crit[2]",
- ["ce_crit[1]"] = "<output>[1]"
- }
- }}
- }
- }, param_repo, gconf)
- return layer_repo
-end
-
-function get_network(layer_repo)
- return layer_repo:get_layer("main")
-end
-
-function make_readers(scp_file, layer_repo)
- return {
- {reader = nerv.TNetReader(gconf,
- {
- id = "main_scp",
- scp_file = scp_file,
- conf_file = gconf.htk_conf,
- frm_ext = gconf.frm_ext,
- mlfs = {
- phone_state = {
- file = "/slfs1/users/mfy43/swb_ivec/ref.mlf",
- format = "map",
- format_arg = "/slfs1/users/mfy43/swb_ivec/dict",
- dir = "*/",
- ext = "lab"
- }
- }
- }),
- data = {main_scp = 429, phone_state = 1}}
- }
-end
-
-function make_buffer(readers)
- return nerv.SGDBuffer(gconf,
- {
- buffer_size = gconf.buffer_size,
- randomize = gconf.randomize,
- readers = readers
- })
-end
-
-function get_input_order()
- return {{id = "main_scp", global_transf = true},
- {id = "phone_state"}}
-end
-
-function get_accuracy(layer_repo)
- local ce_crit = layer_repo:get_layer("ce_crit")
- return ce_crit.total_correct / ce_crit.total_frames * 100
-end
-
-function print_stat(layer_repo)
- local ce_crit = layer_repo:get_layer("ce_crit")
- nerv.info("*** training stat begin ***")
- nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce)
- nerv.printf("correct:\t\t%d\n", ce_crit.total_correct)
- nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames)
- nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames)
- nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo))
- nerv.info("*** training stat end ***")
-end
diff --git a/nerv/examples/timit_baseline2.lua b/nerv/examples/timit_baseline2.lua
new file mode 100644
index 0000000..2d144b5
--- /dev/null
+++ b/nerv/examples/timit_baseline2.lua
@@ -0,0 +1,212 @@
+require 'kaldi_io'
+gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9, frm_ext = 5,
+ tr_scp = "ark:/speechlab/tools/KALDI/kaldi-master/src/featbin/copy-feats " ..
+ "scp:/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/train.scp ark:- |",
+ cv_scp = "ark:/speechlab/tools/KALDI/kaldi-master/src/featbin/copy-feats " ..
+ "scp:/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/cv.scp ark:- |",
+ initialized_param = {"/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/nnet_init.nerv",
+ "/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/nnet_output.nerv",
+ "/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/nnet_trans.nerv"},
+ decode_param = {"/speechlab/users/mfy43/timit/nnet_init_20160229015745_iter_13_lr0.013437_tr72.434_cv58.729.nerv",
+ "/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/nnet_trans.nerv"}}
+
+function make_layer_repo(param_repo)
+ local layer_repo = nerv.LayerRepo(
+ {
+ -- global transf
+ ["nerv.BiasLayer"] =
+ {
+ blayer1 = {dim_in = {440}, dim_out = {440}, params = {bias = "bias0"}}
+ },
+ ["nerv.WindowLayer"] =
+ {
+ wlayer1 = {dim_in = {440}, dim_out = {440}, params = {window = "window0"}}
+ },
+ -- biased linearity
+ ["nerv.AffineLayer"] =
+ {
+ affine0 = {dim_in = {440}, dim_out = {1024},
+ params = {ltp = "affine0_ltp", bp = "affine0_bp"}},
+ affine1 = {dim_in = {1024}, dim_out = {1024},
+ params = {ltp = "affine1_ltp", bp = "affine1_bp"}},
+ affine2 = {dim_in = {1024}, dim_out = {1024},
+ params = {ltp = "affine2_ltp", bp = "affine2_bp"}},
+ affine3 = {dim_in = {1024}, dim_out = {1024},
+ params = {ltp = "affine3_ltp", bp = "affine3_bp"}},
+ affine4 = {dim_in = {1024}, dim_out = {1024},
+ params = {ltp = "affine4_ltp", bp = "affine4_bp"}},
+ affine5 = {dim_in = {1024}, dim_out = {1024},
+ params = {ltp = "affine5_ltp", bp = "affine5_bp"}},
+ affine6 = {dim_in = {1024}, dim_out = {1959},
+ params = {ltp = "affine6_ltp", bp = "affine6_bp"}}
+ },
+ ["nerv.SigmoidLayer"] =
+ {
+ sigmoid0 = {dim_in = {1024}, dim_out = {1024}},
+ sigmoid1 = {dim_in = {1024}, dim_out = {1024}},
+ sigmoid2 = {dim_in = {1024}, dim_out = {1024}},
+ sigmoid3 = {dim_in = {1024}, dim_out = {1024}},
+ sigmoid4 = {dim_in = {1024}, dim_out = {1024}},
+ sigmoid5 = {dim_in = {1024}, dim_out = {1024}}
+ },
+ ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output
+ {
+ ce_crit = {dim_in = {1959, 1}, dim_out = {1}, compressed = true}
+ },
+ ["nerv.SoftmaxLayer"] = -- softmax for decode output
+ {
+ softmax = {dim_in = {1959}, dim_out = {1959}}
+ }
+ }, param_repo, gconf)
+
+ layer_repo:add_layers(
+ {
+ ["nerv.DAGLayer"] =
+ {
+ global_transf = {
+ dim_in = {440}, dim_out = {440},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "blayer1[1]",
+ ["blayer1[1]"] = "wlayer1[1]",
+ ["wlayer1[1]"] = "<output>[1]"
+ }
+ },
+ main = {
+ dim_in = {440}, dim_out = {1959},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "affine0[1]",
+ ["affine0[1]"] = "sigmoid0[1]",
+ ["sigmoid0[1]"] = "affine1[1]",
+ ["affine1[1]"] = "sigmoid1[1]",
+ ["sigmoid1[1]"] = "affine2[1]",
+ ["affine2[1]"] = "sigmoid2[1]",
+ ["sigmoid2[1]"] = "affine3[1]",
+ ["affine3[1]"] = "sigmoid3[1]",
+ ["sigmoid3[1]"] = "affine4[1]",
+ ["affine4[1]"] = "sigmoid4[1]",
+ ["sigmoid4[1]"] = "affine5[1]",
+ ["affine5[1]"] = "sigmoid5[1]",
+ ["sigmoid5[1]"] = "affine6[1]",
+ ["affine6[1]"] = "<output>[1]"
+ }
+ }
+ }
+ }, param_repo, gconf)
+
+ layer_repo:add_layers(
+ {
+ ["nerv.DAGLayer"] =
+ {
+ ce_output = {
+ dim_in = {440, 1}, dim_out = {1},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "main[1]",
+ ["main[1]"] = "ce_crit[1]",
+ ["<input>[2]"] = "ce_crit[2]",
+ ["ce_crit[1]"] = "<output>[1]"
+ }
+ },
+ softmax_output = {
+ dim_in = {440}, dim_out = {1959},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "main[1]",
+ ["main[1]"] = "softmax[1]",
+ ["softmax[1]"] = "<output>[1]"
+ }
+ }
+ }
+ }, param_repo, gconf)
+
+ return layer_repo
+end
+
+function get_network(layer_repo)
+ return layer_repo:get_layer("ce_output")
+end
+
+function get_decode_network(layer_repo)
+ return layer_repo:get_layer("softmax_output")
+end
+
+function get_global_transf(layer_repo)
+ return layer_repo:get_layer("global_transf")
+end
+
+function make_readers(scp_file, layer_repo)
+ return {
+ {reader = nerv.KaldiReader(gconf,
+ {
+ id = "main_scp",
+ feature_rspecifier = scp_file,
+ conf_file = gconf.htk_conf,
+ frm_ext = gconf.frm_ext,
+ mlfs = {
+ phone_state = {
+ targets_rspecifier = "ark:/speechlab/tools/KALDI/kaldi-master/src/bin/ali-to-pdf " ..
+ "/speechlab/users/mfy43/timit/s5/exp/tri3_ali/final.mdl " ..
+ "\"ark:gunzip -c /speechlab/users/mfy43/timit/s5/exp/tri3_ali/ali.*.gz |\" " ..
+ "ark:- | " ..
+ "/speechlab/tools/KALDI/kaldi-master/src/bin/ali-to-post " ..
+ "ark:- ark:- |",
+ format = "map"
+ }
+ }
+ }),
+ data = {main_scp = 440, phone_state = 1}}
+ }
+end
+
+function make_decode_readers(scp_file, layer_repo)
+ return {
+ {reader = nerv.KaldiReader(gconf,
+ {
+ id = "main_scp",
+ feature_rspecifier = scp_file,
+ conf_file = gconf.htk_conf,
+ frm_ext = gconf.frm_ext,
+ mlfs = {},
+ need_key = true
+ }),
+ data = {main_scp = 440, phone_state = 1}}
+ }
+end
+
+function make_buffer(readers)
+ return nerv.SGDBuffer(gconf,
+ {
+ buffer_size = gconf.buffer_size,
+ batch_size = gconf.batch_size,
+ randomize = gconf.randomize,
+ readers = readers,
+ use_gpu = true
+ })
+end
+
+function get_input_order()
+ return {{id = "main_scp", global_transf = true},
+ {id = "phone_state"}}
+end
+
+function get_decode_input_order()
+ return {{id = "main_scp", global_transf = true}}
+end
+
+function get_accuracy(layer_repo)
+ local ce_crit = layer_repo:get_layer("ce_crit")
+ return ce_crit.total_correct / ce_crit.total_frames * 100
+end
+
+function print_stat(layer_repo)
+ local ce_crit = layer_repo:get_layer("ce_crit")
+ nerv.info("*** training stat begin ***")
+ nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce)
+ nerv.printf("correct:\t\t%d\n", ce_crit.total_correct)
+ nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames)
+ nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames)
+ nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo))
+ nerv.info("*** training stat end ***")
+end
diff --git a/nerv/init.lua b/nerv/init.lua
index e7d668c..da7df29 100644
--- a/nerv/init.lua
+++ b/nerv/init.lua
@@ -13,6 +13,10 @@ function nerv.error_method_not_implemented()
nerv.error("method not implemented");
end
+function nerv.set_logfile(filename)
+ nerv._logfile = io.open(filename, "w")
+end
+
--- Format a string just like `sprintf` in C.
-- @param fmt the format string
-- @param ... args, the data to be formatted
@@ -25,7 +29,13 @@ end
-- @param fmt the format string
-- @param ... args, the data to be formatted
function nerv.printf(fmt, ...)
- io.write(nerv.sprintf(fmt, ...))
+ local line = nerv.sprintf(fmt, ...)
+ io.stderr:write(line)
+ -- duplicate the all output to the log file, if set
+ if nerv._logfile then
+ nerv._logfile:write(line)
+ nerv._logfile:flush()
+ end
end
--- Raise an global error with the formatted message.
@@ -54,7 +64,7 @@ end
function nerv.warning(fmt, ...)
nerv.printf(
string.format("(%s)[nerv] warning: %s\n",
- os.date("%H:%M:%S.%N %F"), fmt), ...)
+ os.date("%H:%M:%S %F"), fmt), ...)
end
--- Create a class (Torch-compatible).
@@ -88,24 +98,27 @@ function nerv.class(tname, parenttname)
end
function table.val_to_str(v)
- if "string" == type(v) then
- v = string.gsub(v, "\n", "\\n")
- if string.match(string.gsub(v,"[^'\"]",""), '^"+$') then
- return "'" .. v .. "'"
+ if "string" == type(v) then
+ v = string.gsub(v, "\n", "\\n")
+ if string.match(string.gsub(v,"[^'\"]",""), '^"+$') then
+ return "'" .. v .. "'"
+ end
+ return '"' .. string.gsub(v,'"', '\\"') .. '"'
+ else
+ return "table" == type(v) and table.tostring(v) or
+ (("number" == type(v) or
+ "string" == type(v) or
+ "boolean" == type(v)) and tostring(v)) or
+ nil -- failed to serialize
end
- return '"' .. string.gsub(v,'"', '\\"') .. '"'
- else
- return "table" == type(v) and table.tostring(v) or
- tostring(v)
- end
end
function table.key_to_str (k)
- if "string" == type(k) and string.match(k, "^[_%a][_%a%d]*$") then
- return k
- else
- return "[" .. table.val_to_str(k) .. "]"
- end
+ if "string" == type(k) and string.match(k, "^[_%a][_%a%d]*$") then
+ return k
+ else
+ return "[" .. table.val_to_str(k) .. "]"
+ end
end
--- Get the string representation of a table, which can be executed as a valid
@@ -114,18 +127,18 @@ end
-- @return the string representation which will result in a Lua table entity
-- when evaluated
function table.tostring(tbl)
- local result, done = {}, {}
- for k, v in ipairs(tbl) do
- table.insert(result, table.val_to_str(v))
- done[k] = true
- end
- for k, v in pairs(tbl) do
- if not done[k] then
- table.insert(result,
- table.key_to_str(k) .. "=" .. table.val_to_str(v))
+ local result, done = {}, {}
+ for k, v in ipairs(tbl) do
+ table.insert(result, table.val_to_str(v))
+ done[k] = true
+ end
+ for k, v in pairs(tbl) do
+ if not done[k] then
+ table.insert(result,
+ table.key_to_str(k) .. "=" .. table.val_to_str(v))
+ end
end
- end
- return "{" .. table.concat(result, ",") .. "}"
+ return "{" .. table.concat(result, ",") .. "}"
end
--- Get the class by name.
@@ -172,6 +185,168 @@ function nerv.include(filename)
return dofile(nerv.dirname(caller) .. filename)
end
+--- Parse the command-line options and arguments
+-- @param argv the argrument list to parsed
+-- @param options The specification of options, should be a list of tables,
+-- each one for exactly one available option, say `v`, with `v[1]`, `v[2]`,
+-- `v[3]` indicating the full name of the option, the short form of the option
+-- (when it is a boolean option) and the type of the value controlled by the
+-- option. `default` and `desc` keys can also be specified to set the default
+-- value and description of the option.
+--
+-- An example of specification:
+-- {{"aaa", "a", "boolean", default = false, desc = "an option called aaa"},
+-- {"bbb", "b", "boolean", default = true, desc = "bbb is set to be true if --bbb=no does not present"},
+-- {"ccc", nil, "int", default = 0, desc = "ccc expects an integeral value"}}`
+--
+-- @return args, opts The non-option arguments and parsed options. `opts` is
+-- again a list of tables, each of which corresponds to one table in parameter
+-- `options`. The parsed value could be accessed by `opts["aaa"].val` (which is
+-- `true` if "--aaa" or "-a" is specified).
+function nerv.parse_args(argv, options, unordered)
+ local is_opt_exp = "^[-](.*)$"
+ local sim_opt_exp = "^[-]([a-z]+)$"
+ local opt_exp = "^[-][-]([^=]+)$"
+ local opt_with_val_exp = "^[-][-]([^=]+)=([^=]+)$"
+ local opts = {}
+ local sopts = {}
+ local args = {}
+ local arg_start = false
+ local function err()
+ nerv.error("invalid format of option specification")
+ end
+ for _, v in ipairs(options) do
+ if type(v) ~= "table" or
+ (v[1] == nil and v[2] == nil) or
+ v[3] == nil then
+ err()
+ end
+ local opt_full = v[1]
+ local opt_short = v[2]
+ local opt_type = v[3]
+ local opt_meta = {type = opt_type,
+ desc = v.desc or "",
+ val = v.default}
+ if opt_short ~= nil then
+ if type(opt_short) ~= "string" or #opt_short ~= 1 then err() end
+ if opt_type ~= "boolean" then
+ nerv.error("only boolean option could have short form")
+ end
+ sopts[opt_short] = opt_meta
+ end
+ if opt_full ~= nil then
+ if type(opt_full) ~= "string" then err() end
+ opts[opt_full] = opt_meta
+ end
+ end
+ for _, token in ipairs(argv) do
+ if ((not arg_start) or unordered) and token:match(is_opt_exp) then
+ local k = token:match(sim_opt_exp)
+ if k then
+ for c in k:gmatch"." do
+ if sopts[c] then
+ sopts[c].val = true
+ else
+ nerv.error("invalid option -%s", c)
+ end
+ end
+ else
+ local k = token:match(opt_exp)
+ if k then
+ if opts[k] == nil then
+ nerv.error("invalid option %s", token)
+ end
+ if opts[k].type ~= "boolean" then
+ nerv.error("invalid option --%s: " ..
+ "a %s value needs to be specified",
+ k, opts[k].type)
+ else
+ opts[k].val = true
+ end
+ else
+ local k, v = token:match(opt_with_val_exp)
+ if k then
+ if opts[k] == nil then
+ nerv.error("invalid option %s", token)
+ end
+ if opts[k].type == "boolean" then
+ if v == "yes" then
+ opts[k].val = true
+ elseif v == "no" then
+ opts[k].val = false
+ else
+ nerv.error("boolean value should be \"yes\" or \"no\"")
+ end
+ elseif opts[k].type == "int" then
+ local t = tonumber(v)
+ opts[k].val = t
+ if t == nil or math.floor(t) ~= t then
+ nerv.error("int value is expected")
+ end
+ elseif opts[k].type == "number" then
+ local t = tonumber(v)
+ opts[k].val = t
+ if t == nil then
+ nerv.error("numeric value is expected")
+ end
+ elseif opts[k].type == "string" then
+ opts[k].val = v
+ else
+ nerv.error("unrecognized type %s", opts[k].type)
+ end
+ else
+ nerv.error("unrecognized option %s", token)
+ end
+ end
+ end
+ else
+ table.insert(args, token)
+ arg_start = true
+ end
+ end
+ return args, opts
+end
+
+--- Print usage information of the command-line options
+-- @param options the list of options used in `parse_args`
+function nerv.print_usage(options)
+ local full_maxlen = 0
+ local type_maxlen = 0
+ local default_maxlen = 0
+ for _, v in ipairs(options) do
+ local opt_full = v[1]
+ local opt_short = v[2]
+ local opt_type = v[3]
+ full_maxlen = math.max(full_maxlen, #opt_full or 0)
+ type_maxlen = math.max(full_maxlen, #opt_type or 0)
+ default_maxlen = math.max(full_maxlen, #tostring(v.default) or 0)
+ end
+ local function pattern_gen()
+ return string.format("\t%%-%ds\t%%-2s\t%%-%ds\t%%-%ds\t%%s\n",
+ full_maxlen, type_maxlen, default_maxlen)
+ end
+ nerv.printf("\n")
+ nerv.printf(pattern_gen(), "Option", "Abbr.", "Type", "Default", "Desc.")
+ for _, v in ipairs(options) do
+ local opt_full = v[1]
+ local opt_short = v[2]
+ local opt_type = v[3]
+ nerv.printf(pattern_gen(),
+ (opt_full and '--' .. opt_full) or "",
+ (opt_short and '-' .. opt_short) or "",
+ opt_type,
+ (v.default ~= nil and tostring(v.default)) or "",
+ v.desc or "")
+ end
+ nerv.printf("\n")
+end
+
+function table.extend(tbl1, tbl2)
+ for _, v in ipairs(tbl2) do
+ table.insert(tbl1, v)
+ end
+end
+
-- the following lines trigger the initialization of basic modules
nerv.include('matrix/init.lua')
diff --git a/nerv/io/sgd_buffer.lua b/nerv/io/sgd_buffer.lua
index 3cf4f5a..d78f6d1 100644
--- a/nerv/io/sgd_buffer.lua
+++ b/nerv/io/sgd_buffer.lua
@@ -2,8 +2,9 @@ local SGDBuffer = nerv.class("nerv.SGDBuffer", "nerv.DataBuffer")
function SGDBuffer:__init(global_conf, buffer_conf)
self.gconf = global_conf
+ self.batch_size = buffer_conf.batch_size
self.buffer_size = math.floor(buffer_conf.buffer_size /
- global_conf.batch_size) * global_conf.batch_size
+ self.batch_size) * self.batch_size
self.randomize = buffer_conf.randomize
self.consume = buffer_conf.consume
local cumat_type = global_conf.cumat_type
@@ -112,11 +113,11 @@ function SGDBuffer:saturate()
end
self.rand_map = self.perm_gen(self.tail) -- generate shuffled index
collectgarbage("collect")
- return self.tail >= self.gconf.batch_size
+ return self.tail >= self.batch_size
end
function SGDBuffer:get_data()
- local batch_size = self.gconf.batch_size
+ local batch_size = self.batch_size
if self.head >= self.tail then -- buffer is empty
local t = os.clock()
if (not self:saturate()) and (not self.consume) then
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua
index 4156dde..38743aa 100644
--- a/nerv/layer/affine.lua
+++ b/nerv/layer/affine.lua
@@ -8,21 +8,19 @@ local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer')
--- A parameter that consists of a single matrix
-- @type nerv.MatrixParam
+function MatrixParam:check(checker)
+ -- check trans matrix type
+ checker(self.trans)
+end
+
--- Read from a file handle.
-- @param handle the file handle
function MatrixParam:read(handle)
self.trans = self.gconf.mmat_type.load(handle)
- if not self.gconf.use_cpu then
- self.trans = self.gconf.cumat_type.new_from_host(self.trans)
- end
end
function MatrixParam:write(handle)
- local trans = self.trans
- if not self.gconf.use_cpu then
- trans = self.trans:new_to_host()
- end
- trans:save(handle)
+ self.trans:save(handle)
end
function MatrixParam:train_init()
@@ -30,6 +28,12 @@ function MatrixParam:train_init()
self.correction:fill(0)
end
+function MatrixParam:copy(copier)
+ local target = nerv.MatrixParam(self.id, self.gconf)
+ target.trans = copier(self.trans)
+ return target
+end
+
function MatrixParam:_update_by_gradient(gradient, alpha, beta)
local gconf = self.gconf
-- momentum gain
@@ -77,25 +81,24 @@ end
--- The constructor.
function AffineLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- if layer_conf.ltp ~= nil and layer_conf.ltp1 == nil then
- layer_conf.ltp1 = layer_conf.ltp
- end
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
+ self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs
+ self:bind_params()
+end
+
+function AffineLayer:bind_params()
for i = 1, #self.dim_in do
local pid = "ltp" .. i
local pid_list = i == 1 and {pid, "ltp"} or pid
- self["ltp" .. i] = self:find_param(pid_list, layer_conf, global_conf,
+ self["ltp" .. i] = self:find_param(pid_list, self.lconf, self.gconf,
nerv.LinearTransParam,
- {self.dim_in[i], self.dim_out[1]})
+ {self.dim_in[i], self.dim_out[1]})
end
self.ltp = self.ltp1 -- alias of ltp1
- self.bp = self:find_param("bp", layer_conf, global_conf,
+ self.bp = self:find_param("bp", self.lconf, self.gconf,
nerv.BiasParam,
{1, self.dim_out[1]})
- self.gconf = global_conf
- self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs
+
end
function AffineLayer:init(batch_size)
@@ -142,7 +145,7 @@ function AffineLayer:back_propagate(bp_err, next_bp_err, input, output)
end
function AffineLayer:get_params()
- local pr = nerv.ParamRepo({self.ltp1, self.bp})
+ local pr = nerv.ParamRepo({self.ltp1, self.bp}, self.loc_type)
for i = 2, #self.dim_in do
pr:add(self["ltp" .. i].id, self["ltp" .. i])
end
diff --git a/nerv/layer/bias.lua b/nerv/layer/bias.lua
index 924c3da..191be78 100644
--- a/nerv/layer/bias.lua
+++ b/nerv/layer/bias.lua
@@ -1,12 +1,15 @@
local BiasLayer = nerv.class("nerv.BiasLayer", "nerv.Layer")
function BiasLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.gconf = global_conf
- self.bias = layer_conf.bias
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self:check_dim_len(1, 1)
+ self:bind_params()
+end
+
+function BiasLayer:bind_params()
+ self.bias = self:find_param("bias", self.lconf, self.gconf,
+ nerv.BiasParam,
+ {1, self.dim_out[1]})
end
function BiasLayer:init()
@@ -28,5 +31,5 @@ function BiasLayer:propagate(input, output)
end
function BiasLayer:get_params()
- return nerv.ParamRepo({self.bias})
+ return nerv.ParamRepo({self.bias}, self.loc_type)
end
diff --git a/nerv/layer/combiner.lua b/nerv/layer/combiner.lua
index 22e89a9..028c970 100644
--- a/nerv/layer/combiner.lua
+++ b/nerv/layer/combiner.lua
@@ -1,16 +1,8 @@
local CombinerLayer = nerv.class('nerv.CombinerLayer', 'nerv.Layer')
function CombinerLayer:__init(id, global_conf, layer_conf)
- self.id = id
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self.lambda = layer_conf.lambda
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- self.gconf = global_conf
- if self.gconf.use_cpu then
- self.mat_type = self.gconf.mmat_type
- else
- self.mat_type = self.gconf.cumat_type
- end
self:check_dim_len(#self.lambda, -1)
if #self.dim_in < 1 then
nerv.error("no input specified")
@@ -20,6 +12,10 @@ function CombinerLayer:__init(id, global_conf, layer_conf)
end
end
+function CombinerLayer:bind_params()
+ -- do nothing
+end
+
function CombinerLayer:init(batch_size)
local dim = self.dim_in[1]
for i = 2, #self.dim_in do
@@ -66,5 +62,5 @@ function CombinerLayer:back_propagate(bp_err, next_bp_err, input, output)
end
function CombinerLayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/dropout.lua b/nerv/layer/dropout.lua
index 42660cc..1a379c9 100644
--- a/nerv/layer/dropout.lua
+++ b/nerv/layer/dropout.lua
@@ -1,22 +1,18 @@
local DropoutLayer = nerv.class("nerv.DropoutLayer", "nerv.Layer")
function DropoutLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.gconf = global_conf
- if self.gconf.use_cpu then
- self.mat_type = self.gconf.mmat_type
- else
- self.mat_type = self.gconf.cumat_type
- end
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self.rate = layer_conf.dropout_rate or global_conf.dropout_rate
if self.rate == nil then
nerv.warning("[DropoutLayer:propagate] dropout rate is not set")
end
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
self:check_dim_len(1, 1) -- two inputs: nn output and label
end
+function DropoutLayer:bind_params()
+ -- do nothing
+end
+
function DropoutLayer:init(batch_size, chunk_size)
if self.dim_in[1] ~= self.dim_out[1] then
nerv.error("mismatching dimensions of input and output")
@@ -73,5 +69,5 @@ function DropoutLayer:back_propagate(bp_err, next_bp_err, input, output, t)
end
function DropoutLayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua
index 8988617..137472b 100644
--- a/nerv/layer/duplicate.lua
+++ b/nerv/layer/duplicate.lua
@@ -1,10 +1,7 @@
local DuplicateLayer = nerv.class('nerv.DuplicateLayer', 'nerv.Layer')
function DuplicateLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- self.gconf = global_conf
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self:check_dim_len(1, -1)
if #self.dim_out < 1 then
nerv.error('no output specified')
@@ -40,5 +37,5 @@ function DuplicateLayer:update()
end
function DuplicateLayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/elem_mul.lua b/nerv/layer/elem_mul.lua
index fe80a3f..f03649b 100644
--- a/nerv/layer/elem_mul.lua
+++ b/nerv/layer/elem_mul.lua
@@ -1,14 +1,15 @@
local ElemMulLayer = nerv.class('nerv.ElemMulLayer', 'nerv.Layer')
function ElemMulLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- self.gconf = global_conf
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
-- element-wise multiplication of input[1] and input[2]
self:check_dim_len(2, 1)
end
+function ElemMulLayer:bind_params()
+ -- do nothing
+end
+
function ElemMulLayer:init(batch_size)
if self.dim_in[1] ~= self.dim_in[2] or
self.dim_in[1] ~= self.dim_out[1] then
@@ -34,5 +35,5 @@ function ElemMulLayer:update(bp_err, input, output)
end
function ElemMulLayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/graph.lua b/nerv/layer/graph.lua
index 1406eff..5f42fca 100644
--- a/nerv/layer/graph.lua
+++ b/nerv/layer/graph.lua
@@ -1,10 +1,7 @@
local GraphLayer = nerv.class('nerv.GraphLayer', 'nerv.Layer')
function GraphLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- self.gconf = global_conf
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self:graph_init(layer_conf.layer_repo, layer_conf.connections)
end
@@ -155,5 +152,5 @@ function GraphLayer:get_params()
table.insert(param_repos, ref.layer:get_params())
end
end
- return nerv.ParamRepo.merge(param_repos)
+ return nerv.ParamRepo.merge(param_repos, self.loc_type)
end
diff --git a/nerv/layer/gru.lua b/nerv/layer/gru.lua
index e81d21a..71718d7 100644
--- a/nerv/layer/gru.lua
+++ b/nerv/layer/gru.lua
@@ -4,11 +4,7 @@ function GRULayer:__init(id, global_conf, layer_conf)
-- input1:x
-- input2:h
-- input3:c (h^~)
- self.id = id
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- self.gconf = global_conf
-
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
if self.dim_in[2] ~= self.dim_out[1] then
nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)",
self.dim_in[2], self.dim_out[1])
@@ -17,7 +13,7 @@ function GRULayer:__init(id, global_conf, layer_conf)
-- prepare a DAGLayer to hold the lstm structure
local pr = layer_conf.pr
if pr == nil then
- pr = nerv.ParamRepo()
+ pr = nerv.ParamRepo({}, self.loc_type)
end
local function ap(str)
@@ -63,7 +59,7 @@ function GRULayer:__init(id, global_conf, layer_conf)
},
}
- local layerRepo = nerv.LayerRepo(layers, pr, global_conf)
+ self.lrepo = nerv.LayerRepo(layers, pr, global_conf)
local connections = {
["<input>[1]"] = ap("inputXDup[1]"),
@@ -97,12 +93,20 @@ function GRULayer:__init(id, global_conf, layer_conf)
self.dag = nerv.DAGLayer(self.id, global_conf,
{dim_in = self.dim_in,
dim_out = self.dim_out,
- sub_layers = layerRepo,
+ sub_layers = self.lrepo,
connections = connections})
self:check_dim_len(2, 1) -- x, h and h
end
+function GRULayer:bind_params()
+ local pr = layer_conf.pr
+ if pr == nil then
+ pr = nerv.ParamRepo({}, self.loc_type)
+ end
+ self.lrepo:rebind(pr)
+end
+
function GRULayer:init(batch_size, chunk_size)
self.dag:init(batch_size, chunk_size)
end
diff --git a/nerv/layer/identity.lua b/nerv/layer/identity.lua
index aeeff89..d56337d 100644
--- a/nerv/layer/identity.lua
+++ b/nerv/layer/identity.lua
@@ -1,10 +1,7 @@
local IdentityLayer = nerv.class('nerv.IdentityLayer', 'nerv.Layer')
function IdentityLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- self.gconf = global_conf
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self:check_dim_len(1, 1)
if self.dim_in[1] ~= self.dim_out[1] then
nerv.error('mismatching dimensions of input and output')
@@ -29,5 +26,5 @@ function IdentityLayer:update()
end
function IdentityLayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/init.lua b/nerv/layer/init.lua
index 4fabefa..475ef62 100644
--- a/nerv/layer/init.lua
+++ b/nerv/layer/init.lua
@@ -30,7 +30,18 @@ end
local Layer = nerv.class('nerv.Layer')
function Layer:__init(id, global_conf, layer_conf)
- nerv.error_method_not_implemented()
+ self.id = id
+ self.gconf = global_conf
+ self.lconf = layer_conf
+ if self.gconf.use_cpu then
+ self.mat_type = self.gconf.mmat_type
+ self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
+ else
+ self.mat_type = self.gconf.cumat_type
+ self.loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE
+ end
+ self.dim_in = layer_conf.dim_in
+ self.dim_out = layer_conf.dim_out
end
function Layer:init(batch_size)
@@ -66,6 +77,10 @@ function Layer:get_params()
nerv.error_method_not_implemented()
end
+function Layer:bind_params()
+ nerv.error_method_not_implemented()
+end
+
function Layer:get_dim()
return self.dim_in, self.dim_out
end
@@ -78,30 +93,33 @@ function Layer:get_sublayer(id)
nerv.error('primitive layer does not have sublayers')
end
-function Layer:find_param(pid_list, lconf, gconf, p_type, p_dim)
- if type(pid_list) == "string" then
- pid_list = {pid_list}
+function Layer:find_param(plist, lconf, gconf, p_type, p_dim)
+ if type(plist) == "string" then
+ plist = {plist}
end
- pid_list_str = table.tostring(pid_list)
- for i, pid in ipairs(pid_list) do
- if lconf[pid] ~= nil then
- nerv.info("param [%s] of layer [%s] found in `layer_conf`.", pid, self.id)
- return lconf[pid]
+ if lconf.params == nil then
+ lconf.params = {}
+ end
+ plist_str = table.tostring(plist)
+ local pid
+ for i, pname in ipairs(plist) do
+ if lconf.params[pname] ~= nil then
+ nerv.info("param id for [%s] of layer [%s] specified in `layer_conf.params`.", pname, self.id)
+ pid = lconf.params[pname]
end
- local pid_g = self.id .. '_' .. pid --global identifier
- local pr = lconf.pr
- local p
- if pr ~= nil and pr:has_param(pid_g) == true then
- nerv.info("param [%s] of layer [%s] found in `layer_conf.pr`.", pid_list_str, self.id)
- p = pr:get_param(pid_g)
- return p
+ if lconf.pr:has_param(pid) then
+ return lconf.pr:get_param(pid)
end
end
- nerv.info("param [%s] of layer [%s] is not found in `layer_conf` or `layer_conf.pr`, " ..
- "switch to auto-generate", pid_list_str, self.id)
- local pid_g = self.id .. '_' .. pid_list[1]
- p = p_type(pid_g, gconf)
- p.trans = gconf.cumat_type(unpack(p_dim))
+ pid = self.id .. '_' .. plist[1]
+ if lconf.pr:has_param(pid) then
+ nerv.info("param id for [%s] of layer [%s] is generated automatically.", pname, self.id)
+ return lconf.pr:get_param(pid)
+ end
+ nerv.info("param id for [%s] of layer [%s] is not found in the specified param repo, " ..
+ "switch to auto-generate", plist_str, self.id)
+ local p = p_type(pid, gconf)
+ p.trans = self.mat_type(unpack(p_dim))
if type(gconf.param_random) ~= "function" then
nerv.error("a param generate function is needed")
end
diff --git a/nerv/layer/lstm.lua b/nerv/layer/lstm.lua
index caa7569..641d5dc 100644
--- a/nerv/layer/lstm.lua
+++ b/nerv/layer/lstm.lua
@@ -4,15 +4,11 @@ function LSTMLayer:__init(id, global_conf, layer_conf)
-- input1:x
-- input2:h
-- input3:c
- self.id = id
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- self.gconf = global_conf
-
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
-- prepare a DAGLayer to hold the lstm structure
local pr = layer_conf.pr
if pr == nil then
- pr = nerv.ParamRepo()
+ pr = nerv.ParamRepo({}, self.loc_type)
end
local function ap(str)
@@ -66,7 +62,7 @@ function LSTMLayer:__init(id, global_conf, layer_conf)
},
}
- local layerRepo = nerv.LayerRepo(layers, pr, global_conf)
+ self.lrepo = nerv.LayerRepo(layers, pr, global_conf)
local connections = {
["<input>[1]"] = ap("inputXDup[1]"),
@@ -109,12 +105,20 @@ function LSTMLayer:__init(id, global_conf, layer_conf)
self.dag = nerv.DAGLayer(self.id, global_conf,
{dim_in = self.dim_in,
dim_out = self.dim_out,
- sub_layers = layerRepo,
+ sub_layers = self.lrepo,
connections = connections})
self:check_dim_len(3, 2) -- x, h, c and h, c
end
+function LSTMLayer:bind_params()
+ local pr = layer_conf.pr
+ if pr == nil then
+ pr = nerv.ParamRepo({}, self.loc_type)
+ end
+ self.lrepo:rebind(pr)
+end
+
function LSTMLayer:init(batch_size, chunk_size)
self.dag:init(batch_size, chunk_size)
end
diff --git a/nerv/layer/lstm_gate.lua b/nerv/layer/lstm_gate.lua
index 1963eba..7a27bab 100644
--- a/nerv/layer/lstm_gate.lua
+++ b/nerv/layer/lstm_gate.lua
@@ -2,20 +2,19 @@ local LSTMGateLayer = nerv.class('nerv.LSTMGateLayer', 'nerv.Layer')
-- NOTE: this is a full matrix gate
function LSTMGateLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- self.gconf = global_conf
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
+ self:check_dim_len(-1, 1) --accept multiple inputs
+ self:bind_params()
+end
+function LSTMGateLayer:bind_params()
for i = 1, #self.dim_in do
- self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf,
+ self["ltp" .. i] = self:find_param("ltp" .. i, self.lconf, self.gconf,
nerv.LinearTransParam,
{self.dim_in[i], self.dim_out[1]})
end
- self.bp = self:find_param("bp", layer_conf, global_conf,
+ self.bp = self:find_param("bp", self.lconf, self.gconf,
nerv.BiasParam, {1, self.dim_out[1]})
-
- self:check_dim_len(-1, 1) --accept multiple inputs
end
function LSTMGateLayer:init(batch_size)
@@ -69,7 +68,7 @@ function LSTMGateLayer:update(bp_err, input, output)
end
function LSTMGateLayer:get_params()
- local pr = nerv.ParamRepo({self.bp})
+ local pr = nerv.ParamRepo({self.bp}, self.loc_type)
for i = 1, #self.dim_in do
pr:add(self["ltp" .. i].id, self["ltp" .. i])
end
diff --git a/nerv/layer/mse.lua b/nerv/layer/mse.lua
index 1c218d0..458d086 100644
--- a/nerv/layer/mse.lua
+++ b/nerv/layer/mse.lua
@@ -1,18 +1,14 @@
local MSELayer = nerv.class("nerv.MSELayer", "nerv.Layer")
function MSELayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- self.gconf = global_conf
- if self.gconf.use_cpu then
- self.mat_type = self.gconf.mmat_type
- else
- self.mat_type = self.gconf.cumat_type
- end
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self:check_dim_len(2, -1)
end
+function MSELayer:bind_params()
+ -- do nothing
+end
+
function MSELayer:init(batch_size)
if self.dim_in[1] ~= self.dim_in[2] then
nerv.error("mismatching dimensions of previous network output and labels")
@@ -61,5 +57,5 @@ function MSELayer:back_propagate(bp_err, next_bp_err, input, output)
end
function MSELayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/rnn.lua b/nerv/layer/rnn.lua
index 38f2326..e59cf5b 100644
--- a/nerv/layer/rnn.lua
+++ b/nerv/layer/rnn.lua
@@ -1,10 +1,7 @@
local RNNLayer = nerv.class('nerv.RNNLayer', 'nerv.GraphLayer')
function RNNLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
- self.gconf = layer_conf.gconf
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self:check_dim_len(1, 1)
local din = layer_conf.dim_in[1]
@@ -12,7 +9,7 @@ function RNNLayer:__init(id, global_conf, layer_conf)
local pr = layer_conf.pr
if pr == nil then
- pr = nerv.ParamRepo()
+ pr = nerv.ParamRepo({}, self.loc_type)
end
local layers = {
diff --git a/nerv/layer/sigmoid.lua b/nerv/layer/sigmoid.lua
index 0a8bcdc..a9f9749 100644
--- a/nerv/layer/sigmoid.lua
+++ b/nerv/layer/sigmoid.lua
@@ -1,13 +1,14 @@
local SigmoidLayer = nerv.class("nerv.SigmoidLayer", "nerv.Layer")
function SigmoidLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.gconf = global_conf
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self:check_dim_len(1, 1)
end
+function SigmoidLayer:bind_params()
+ -- do nothing
+end
+
function SigmoidLayer:init()
if self.dim_in[1] ~= self.dim_out[1] then
nerv.error("mismatching dimensions of input and output")
@@ -31,5 +32,5 @@ function SigmoidLayer:back_propagate(bp_err, next_bp_err, input, output)
end
function SigmoidLayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/softmax.lua b/nerv/layer/softmax.lua
index 4205b66..f7a5163 100644
--- a/nerv/layer/softmax.lua
+++ b/nerv/layer/softmax.lua
@@ -1,13 +1,14 @@
local SoftmaxLayer = nerv.class("nerv.SoftmaxLayer", "nerv.Layer")
function SoftmaxLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.gconf = global_conf
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self:check_dim_len(1, 1) -- two inputs: nn output and label
end
+function SoftmaxLayer:bind_params()
+ -- do nothing
+end
+
function SoftmaxLayer:init(batch_size)
if self.dim_in[1] ~= self.dim_out[1] then
nerv.error("mismatching dimensions of input and output")
@@ -31,5 +32,5 @@ function SoftmaxLayer:back_propagate(bp_err, next_bp_err, input, output)
end
function SoftmaxLayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/softmax_ce.lua b/nerv/layer/softmax_ce.lua
index d7d650e..7b4a80c 100644
--- a/nerv/layer/softmax_ce.lua
+++ b/nerv/layer/softmax_ce.lua
@@ -1,15 +1,7 @@
local SoftmaxCELayer = nerv.class("nerv.SoftmaxCELayer", "nerv.Layer")
function SoftmaxCELayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.gconf = global_conf
- if self.gconf.use_cpu then
- self.mat_type = self.gconf.mmat_type
- else
- self.mat_type = self.gconf.cumat_type
- end
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self.compressed = layer_conf.compressed
if self.compressed == nil then
self.compressed = false
@@ -17,6 +9,10 @@ function SoftmaxCELayer:__init(id, global_conf, layer_conf)
self:check_dim_len(2, -1) -- two inputs: nn output and label
end
+function SoftmaxCELayer:bind_params()
+ -- do nothing
+end
+
function SoftmaxCELayer:init(batch_size, chunk_size)
if not self.compressed and (self.dim_in[1] ~= self.dim_in[2]) then
nerv.error("mismatching dimensions of previous network output and labels")
@@ -94,5 +90,5 @@ function SoftmaxCELayer:back_propagate(bp_err, next_bp_err, input, output, t)
end
function SoftmaxCELayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/tanh.lua b/nerv/layer/tanh.lua
index e1c32f2..7a19fc8 100644
--- a/nerv/layer/tanh.lua
+++ b/nerv/layer/tanh.lua
@@ -1,13 +1,14 @@
local TanhLayer = nerv.class("nerv.TanhLayer", "nerv.Layer")
function TanhLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.gconf = global_conf
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self:check_dim_len(1, 1)
end
+function TanhLayer:bind_params()
+ -- do nothing
+end
+
function TanhLayer:init()
if self.dim_in[1] ~= self.dim_out[1] then
nerv.error("mismatching dimensions of input and output")
@@ -31,5 +32,5 @@ function TanhLayer:back_propagate(bp_err, next_bp_err, input, output)
end
function TanhLayer:get_params()
- return nerv.ParamRepo({})
+ return nerv.ParamRepo({}, self.loc_type)
end
diff --git a/nerv/layer/window.lua b/nerv/layer/window.lua
index 4933de0..364929f 100644
--- a/nerv/layer/window.lua
+++ b/nerv/layer/window.lua
@@ -1,12 +1,15 @@
local WindowLayer = nerv.class("nerv.WindowLayer", "nerv.Layer")
function WindowLayer:__init(id, global_conf, layer_conf)
- self.id = id
- self.gconf = global_conf
- self.window = layer_conf.window
- self.dim_in = layer_conf.dim_in
- self.dim_out = layer_conf.dim_out
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self:check_dim_len(1, 1)
+ self:bind_params()
+end
+
+function WindowLayer:bind_params()
+ self.window = self:find_param("window", self.lconf, self.gconf,
+ nerv.BiasParam,
+ {1, self.dim_out[1]})
end
function WindowLayer:init()
@@ -28,5 +31,5 @@ function WindowLayer:propagate(input, output)
end
function WindowLayer:get_params()
- return nerv.ParamRepo({self.window})
+ return nerv.ParamRepo({self.window}, self.loc_type)
end
diff --git a/nerv/lib/cblas.h b/nerv/lib/cblas.h
new file mode 100644
index 0000000..4087ffb
--- /dev/null
+++ b/nerv/lib/cblas.h
@@ -0,0 +1,596 @@
+#ifndef CBLAS_H
+
+#ifndef CBLAS_ENUM_DEFINED_H
+ #define CBLAS_ENUM_DEFINED_H
+ enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102 };
+ enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113,
+ AtlasConj=114};
+ enum CBLAS_UPLO {CblasUpper=121, CblasLower=122};
+ enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132};
+ enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
+#endif
+
+#ifndef CBLAS_ENUM_ONLY
+#define CBLAS_H
+#define CBLAS_INDEX int
+
+int cblas_errprn(int ierr, int info, char *form, ...);
+
+/*
+ * ===========================================================================
+ * Prototypes for level 1 BLAS functions (complex are recast as routines)
+ * ===========================================================================
+ */
+float cblas_sdsdot(const int N, const float alpha, const float *X,
+ const int incX, const float *Y, const int incY);
+double cblas_dsdot(const int N, const float *X, const int incX, const float *Y,
+ const int incY);
+float cblas_sdot(const int N, const float *X, const int incX,
+ const float *Y, const int incY);
+double cblas_ddot(const int N, const double *X, const int incX,
+ const double *Y, const int incY);
+/*
+ * Functions having prefixes Z and C only
+ */
+void cblas_cdotu_sub(const int N, const void *X, const int incX,
+ const void *Y, const int incY, void *dotu);
+void cblas_cdotc_sub(const int N, const void *X, const int incX,
+ const void *Y, const int incY, void *dotc);
+
+void cblas_zdotu_sub(const int N, const void *X, const int incX,
+ const void *Y, const int incY, void *dotu);
+void cblas_zdotc_sub(const int N, const void *X, const int incX,
+ const void *Y, const int incY, void *dotc);
+
+
+/*
+ * Functions having prefixes S D SC DZ
+ */
+float cblas_snrm2(const int N, const float *X, const int incX);
+float cblas_sasum(const int N, const float *X, const int incX);
+
+double cblas_dnrm2(const int N, const double *X, const int incX);
+double cblas_dasum(const int N, const double *X, const int incX);
+
+float cblas_scnrm2(const int N, const void *X, const int incX);
+float cblas_scasum(const int N, const void *X, const int incX);
+
+double cblas_dznrm2(const int N, const void *X, const int incX);
+double cblas_dzasum(const int N, const void *X, const int incX);
+
+
+/*
+ * Functions having standard 4 prefixes (S D C Z)
+ */
+CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX);
+CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX);
+CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX);
+CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX);
+
+/*
+ * ===========================================================================
+ * Prototypes for level 1 BLAS routines
+ * ===========================================================================
+ */
+
+/*
+ * Routines with standard 4 prefixes (s, d, c, z)
+ */
+void cblas_sswap(const int N, float *X, const int incX,
+ float *Y, const int incY);
+void cblas_scopy(const int N, const float *X, const int incX,
+ float *Y, const int incY);
+void cblas_saxpy(const int N, const float alpha, const float *X,
+ const int incX, float *Y, const int incY);
+void catlas_saxpby(const int N, const float alpha, const float *X,
+ const int incX, const float beta, float *Y, const int incY);
+void catlas_sset
+ (const int N, const float alpha, float *X, const int incX);
+
+void cblas_dswap(const int N, double *X, const int incX,
+ double *Y, const int incY);
+void cblas_dcopy(const int N, const double *X, const int incX,
+ double *Y, const int incY);
+void cblas_daxpy(const int N, const double alpha, const double *X,
+ const int incX, double *Y, const int incY);
+void catlas_daxpby(const int N, const double alpha, const double *X,
+ const int incX, const double beta, double *Y, const int incY);
+void catlas_dset
+ (const int N, const double alpha, double *X, const int incX);
+
+void cblas_cswap(const int N, void *X, const int incX,
+ void *Y, const int incY);
+void cblas_ccopy(const int N, const void *X, const int incX,
+ void *Y, const int incY);
+void cblas_caxpy(const int N, const void *alpha, const void *X,
+ const int incX, void *Y, const int incY);
+void catlas_caxpby(const int N, const void *alpha, const void *X,
+ const int incX, const void *beta, void *Y, const int incY);
+void catlas_cset
+ (const int N, const void *alpha, void *X, const int incX);
+
+void cblas_zswap(const int N, void *X, const int incX,
+ void *Y, const int incY);
+void cblas_zcopy(const int N, const void *X, const int incX,
+ void *Y, const int incY);
+void cblas_zaxpy(const int N, const void *alpha, const void *X,
+ const int incX, void *Y, const int incY);
+void catlas_zaxpby(const int N, const void *alpha, const void *X,
+ const int incX, const void *beta, void *Y, const int incY);
+void catlas_zset
+ (const int N, const void *alpha, void *X, const int incX);
+
+
+/*
+ * Routines with S and D prefix only
+ */
+void cblas_srotg(float *a, float *b, float *c, float *s);
+void cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P);
+void cblas_srot(const int N, float *X, const int incX,
+ float *Y, const int incY, const float c, const float s);
+void cblas_srotm(const int N, float *X, const int incX,
+ float *Y, const int incY, const float *P);
+
+void cblas_drotg(double *a, double *b, double *c, double *s);
+void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P);
+void cblas_drot(const int N, double *X, const int incX,
+ double *Y, const int incY, const double c, const double s);
+void cblas_drotm(const int N, double *X, const int incX,
+ double *Y, const int incY, const double *P);
+
+
+/*
+ * Routines with S D C Z CS and ZD prefixes
+ */
+void cblas_sscal(const int N, const float alpha, float *X, const int incX);
+void cblas_dscal(const int N, const double alpha, double *X, const int incX);
+void cblas_cscal(const int N, const void *alpha, void *X, const int incX);
+void cblas_zscal(const int N, const void *alpha, void *X, const int incX);
+void cblas_csscal(const int N, const float alpha, void *X, const int incX);
+void cblas_zdscal(const int N, const double alpha, void *X, const int incX);
+
+/*
+ * Extra reference routines provided by ATLAS, but not mandated by the standard
+ */
+void cblas_crotg(void *a, void *b, void *c, void *s);
+void cblas_zrotg(void *a, void *b, void *c, void *s);
+void cblas_csrot(const int N, void *X, const int incX, void *Y, const int incY,
+ const float c, const float s);
+void cblas_zdrot(const int N, void *X, const int incX, void *Y, const int incY,
+ const double c, const double s);
+
+/*
+ * ===========================================================================
+ * Prototypes for level 2 BLAS
+ * ===========================================================================
+ */
+
+/*
+ * Routines with standard 4 prefixes (S, D, C, Z)
+ */
+void cblas_sgemv(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const float alpha, const float *A, const int lda,
+ const float *X, const int incX, const float beta,
+ float *Y, const int incY);
+void cblas_sgbmv(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const int KL, const int KU, const float alpha,
+ const float *A, const int lda, const float *X,
+ const int incX, const float beta, float *Y, const int incY);
+void cblas_strmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const float *A, const int lda,
+ float *X, const int incX);
+void cblas_stbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const int K, const float *A, const int lda,
+ float *X, const int incX);
+void cblas_stpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const float *Ap, float *X, const int incX);
+void cblas_strsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const float *A, const int lda, float *X,
+ const int incX);
+void cblas_stbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const int K, const float *A, const int lda,
+ float *X, const int incX);
+void cblas_stpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const float *Ap, float *X, const int incX);
+
+void cblas_dgemv(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const double alpha, const double *A, const int lda,
+ const double *X, const int incX, const double beta,
+ double *Y, const int incY);
+void cblas_dgbmv(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const int KL, const int KU, const double alpha,
+ const double *A, const int lda, const double *X,
+ const int incX, const double beta, double *Y, const int incY);
+void cblas_dtrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const double *A, const int lda,
+ double *X, const int incX);
+void cblas_dtbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const int K, const double *A, const int lda,
+ double *X, const int incX);
+void cblas_dtpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const double *Ap, double *X, const int incX);
+void cblas_dtrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const double *A, const int lda, double *X,
+ const int incX);
+void cblas_dtbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const int K, const double *A, const int lda,
+ double *X, const int incX);
+void cblas_dtpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const double *Ap, double *X, const int incX);
+
+void cblas_cgemv(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ const void *X, const int incX, const void *beta,
+ void *Y, const int incY);
+void cblas_cgbmv(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const int KL, const int KU, const void *alpha,
+ const void *A, const int lda, const void *X,
+ const int incX, const void *beta, void *Y, const int incY);
+void cblas_ctrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const void *A, const int lda,
+ void *X, const int incX);
+void cblas_ctbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const int K, const void *A, const int lda,
+ void *X, const int incX);
+void cblas_ctpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const void *Ap, void *X, const int incX);
+void cblas_ctrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const void *A, const int lda, void *X,
+ const int incX);
+void cblas_ctbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const int K, const void *A, const int lda,
+ void *X, const int incX);
+void cblas_ctpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const void *Ap, void *X, const int incX);
+
+void cblas_zgemv(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ const void *X, const int incX, const void *beta,
+ void *Y, const int incY);
+void cblas_zgbmv(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const int KL, const int KU, const void *alpha,
+ const void *A, const int lda, const void *X,
+ const int incX, const void *beta, void *Y, const int incY);
+void cblas_ztrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const void *A, const int lda,
+ void *X, const int incX);
+void cblas_ztbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const int K, const void *A, const int lda,
+ void *X, const int incX);
+void cblas_ztpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const void *Ap, void *X, const int incX);
+void cblas_ztrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const void *A, const int lda, void *X,
+ const int incX);
+void cblas_ztbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const int K, const void *A, const int lda,
+ void *X, const int incX);
+void cblas_ztpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const void *Ap, void *X, const int incX);
+
+
+/*
+ * Routines with S and D prefixes only
+ */
+void cblas_ssymv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const float alpha, const float *A,
+ const int lda, const float *X, const int incX,
+ const float beta, float *Y, const int incY);
+void cblas_ssbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const int K, const float alpha, const float *A,
+ const int lda, const float *X, const int incX,
+ const float beta, float *Y, const int incY);
+void cblas_sspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const float alpha, const float *Ap,
+ const float *X, const int incX,
+ const float beta, float *Y, const int incY);
+void cblas_sger(const enum CBLAS_ORDER Order, const int M, const int N,
+ const float alpha, const float *X, const int incX,
+ const float *Y, const int incY, float *A, const int lda);
+void cblas_ssyr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const float alpha, const float *X,
+ const int incX, float *A, const int lda);
+void cblas_sspr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const float alpha, const float *X,
+ const int incX, float *Ap);
+void cblas_ssyr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const float alpha, const float *X,
+ const int incX, const float *Y, const int incY, float *A,
+ const int lda);
+void cblas_sspr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const float alpha, const float *X,
+ const int incX, const float *Y, const int incY, float *A);
+
+void cblas_dsymv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const double alpha, const double *A,
+ const int lda, const double *X, const int incX,
+ const double beta, double *Y, const int incY);
+void cblas_dsbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const int K, const double alpha, const double *A,
+ const int lda, const double *X, const int incX,
+ const double beta, double *Y, const int incY);
+void cblas_dspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const double alpha, const double *Ap,
+ const double *X, const int incX,
+ const double beta, double *Y, const int incY);
+void cblas_dger(const enum CBLAS_ORDER Order, const int M, const int N,
+ const double alpha, const double *X, const int incX,
+ const double *Y, const int incY, double *A, const int lda);
+void cblas_dsyr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const double alpha, const double *X,
+ const int incX, double *A, const int lda);
+void cblas_dspr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const double alpha, const double *X,
+ const int incX, double *Ap);
+void cblas_dsyr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const double alpha, const double *X,
+ const int incX, const double *Y, const int incY, double *A,
+ const int lda);
+void cblas_dspr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const double alpha, const double *X,
+ const int incX, const double *Y, const int incY, double *A);
+
+
+/*
+ * Routines with C and Z prefixes only
+ */
+void cblas_chemv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const void *alpha, const void *A,
+ const int lda, const void *X, const int incX,
+ const void *beta, void *Y, const int incY);
+void cblas_chbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const int K, const void *alpha, const void *A,
+ const int lda, const void *X, const int incX,
+ const void *beta, void *Y, const int incY);
+void cblas_chpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const void *alpha, const void *Ap,
+ const void *X, const int incX,
+ const void *beta, void *Y, const int incY);
+void cblas_cgeru(const enum CBLAS_ORDER Order, const int M, const int N,
+ const void *alpha, const void *X, const int incX,
+ const void *Y, const int incY, void *A, const int lda);
+void cblas_cgerc(const enum CBLAS_ORDER Order, const int M, const int N,
+ const void *alpha, const void *X, const int incX,
+ const void *Y, const int incY, void *A, const int lda);
+void cblas_cher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const float alpha, const void *X, const int incX,
+ void *A, const int lda);
+void cblas_chpr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const float alpha, const void *X,
+ const int incX, void *A);
+void cblas_cher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
+ const void *alpha, const void *X, const int incX,
+ const void *Y, const int incY, void *A, const int lda);
+void cblas_chpr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
+ const void *alpha, const void *X, const int incX,
+ const void *Y, const int incY, void *Ap);
+
+void cblas_zhemv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const void *alpha, const void *A,
+ const int lda, const void *X, const int incX,
+ const void *beta, void *Y, const int incY);
+void cblas_zhbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const int K, const void *alpha, const void *A,
+ const int lda, const void *X, const int incX,
+ const void *beta, void *Y, const int incY);
+void cblas_zhpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const void *alpha, const void *Ap,
+ const void *X, const int incX,
+ const void *beta, void *Y, const int incY);
+void cblas_zgeru(const enum CBLAS_ORDER Order, const int M, const int N,
+ const void *alpha, const void *X, const int incX,
+ const void *Y, const int incY, void *A, const int lda);
+void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N,
+ const void *alpha, const void *X, const int incX,
+ const void *Y, const int incY, void *A, const int lda);
+void cblas_zher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const double alpha, const void *X, const int incX,
+ void *A, const int lda);
+void cblas_zhpr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const double alpha, const void *X,
+ const int incX, void *A);
+void cblas_zher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
+ const void *alpha, const void *X, const int incX,
+ const void *Y, const int incY, void *A, const int lda);
+void cblas_zhpr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
+ const void *alpha, const void *X, const int incX,
+ const void *Y, const int incY, void *Ap);
+
+/*
+ * ===========================================================================
+ * Prototypes for level 3 BLAS
+ * ===========================================================================
+ */
+
+/*
+ * Routines with standard 4 prefixes (S, D, C, Z)
+ */
+void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
+ const int K, const float alpha, const float *A,
+ const int lda, const float *B, const int ldb,
+ const float beta, float *C, const int ldc);
+void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const int M, const int N,
+ const float alpha, const float *A, const int lda,
+ const float *B, const int ldb, const float beta,
+ float *C, const int ldc);
+void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const float alpha, const float *A, const int lda,
+ const float beta, float *C, const int ldc);
+void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const float alpha, const float *A, const int lda,
+ const float *B, const int ldb, const float beta,
+ float *C, const int ldc);
+void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_DIAG Diag, const int M, const int N,
+ const float alpha, const float *A, const int lda,
+ float *B, const int ldb);
+void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_DIAG Diag, const int M, const int N,
+ const float alpha, const float *A, const int lda,
+ float *B, const int ldb);
+
+void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
+ const int K, const double alpha, const double *A,
+ const int lda, const double *B, const int ldb,
+ const double beta, double *C, const int ldc);
+void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const int M, const int N,
+ const double alpha, const double *A, const int lda,
+ const double *B, const int ldb, const double beta,
+ double *C, const int ldc);
+void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const double alpha, const double *A, const int lda,
+ const double beta, double *C, const int ldc);
+void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const double alpha, const double *A, const int lda,
+ const double *B, const int ldb, const double beta,
+ double *C, const int ldc);
+void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_DIAG Diag, const int M, const int N,
+ const double alpha, const double *A, const int lda,
+ double *B, const int ldb);
+void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_DIAG Diag, const int M, const int N,
+ const double alpha, const double *A, const int lda,
+ double *B, const int ldb);
+
+void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
+ const int K, const void *alpha, const void *A,
+ const int lda, const void *B, const int ldb,
+ const void *beta, void *C, const int ldc);
+void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ const void *B, const int ldb, const void *beta,
+ void *C, const int ldc);
+void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const void *alpha, const void *A, const int lda,
+ const void *beta, void *C, const int ldc);
+void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const void *alpha, const void *A, const int lda,
+ const void *B, const int ldb, const void *beta,
+ void *C, const int ldc);
+void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_DIAG Diag, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ void *B, const int ldb);
+void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_DIAG Diag, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ void *B, const int ldb);
+
+void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
+ const int K, const void *alpha, const void *A,
+ const int lda, const void *B, const int ldb,
+ const void *beta, void *C, const int ldc);
+void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ const void *B, const int ldb, const void *beta,
+ void *C, const int ldc);
+void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const void *alpha, const void *A, const int lda,
+ const void *beta, void *C, const int ldc);
+void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const void *alpha, const void *A, const int lda,
+ const void *B, const int ldb, const void *beta,
+ void *C, const int ldc);
+void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_DIAG Diag, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ void *B, const int ldb);
+void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_DIAG Diag, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ void *B, const int ldb);
+
+
+/*
+ * Routines with prefixes C and Z only
+ */
+void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ const void *B, const int ldb, const void *beta,
+ void *C, const int ldc);
+void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const float alpha, const void *A, const int lda,
+ const float beta, void *C, const int ldc);
+void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const void *alpha, const void *A, const int lda,
+ const void *B, const int ldb, const float beta,
+ void *C, const int ldc);
+void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ const void *B, const int ldb, const void *beta,
+ void *C, const int ldc);
+void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const double alpha, const void *A, const int lda,
+ const double beta, void *C, const int ldc);
+void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const void *alpha, const void *A, const int lda,
+ const void *B, const int ldb, const double beta,
+ void *C, const int ldc);
+
+int cblas_errprn(int ierr, int info, char *form, ...);
+
+#endif /* end #ifdef CBLAS_ENUM_ONLY */
+#endif
diff --git a/nerv/lib/matrix/cumatrix.c b/nerv/lib/matrix/cumatrix.c
index ff2ea22..aec4d60 100644
--- a/nerv/lib/matrix/cumatrix.c
+++ b/nerv/lib/matrix/cumatrix.c
@@ -37,7 +37,9 @@ void nerv_cuda_context_accu_profile(CuContext *context,
*val += delta;
}
-static void new_cuda_handles(CuContext *context, Status *status) {
+static void new_cuda_handles(CuContext *context, int dev, Status *status) {
+ if (context->has_handle) return;
+ CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status);
CUBLAS_SAFE_SYNC_CALL(cublasCreate(&(context->cublas_handle)), status);
CURAND_SAFE_SYNC_CALL(curandCreateGenerator(&(context->curand_gen),
CURAND_RNG_PSEUDO_DEFAULT), status);
@@ -47,9 +49,12 @@ static void new_cuda_handles(CuContext *context, Status *status) {
CUDA_SAFE_SYNC_CALL(cudaEventCreate(&(context->profile_start)), status);
CUDA_SAFE_SYNC_CALL(cudaEventCreate(&(context->profile_stop)), status);
NERV_SET_STATUS(status, NERV_NORMAL, 0);
+ context->has_handle = 1;
}
static void free_cuda_handles(CuContext *context, Status *status) {
+ if (!context->has_handle) return;
+ context->has_handle = 0;
CUBLAS_SAFE_SYNC_CALL(cublasDestroy(context->cublas_handle), status);
CURAND_SAFE_SYNC_CALL(curandDestroyGenerator(context->curand_gen), status);
CUDA_SAFE_SYNC_CALL(cudaEventDestroy(context->profile_start), status);
@@ -57,9 +62,41 @@ static void free_cuda_handles(CuContext *context, Status *status) {
NERV_SET_STATUS(status, NERV_NORMAL, 0);
}
-CuContext *nerv_cuda_context_create(Status *status) {
+static int choose_best_gpu(Status *status) {
+ int i, n, dev = 0;
+ float best_ratio = 0;
+ fprintf(stderr, "*** select a GPU based on available space\n");
+ CUDA_SAFE_CALL_RET(cudaGetDeviceCount(&n), status);
+ for (i = 0; i < n; i++)
+ {
+ size_t avail, total;
+ float ratio;
+ CUDA_SAFE_SYNC_CALL_RET(cudaSetDevice(i), status);
+ CUDA_SAFE_SYNC_CALL_RET(cuMemGetInfo(&avail, &total), status);
+ ratio = (float)avail/total * 100;
+ fprintf(stderr, "* card %d: %.2f%%\n", i, ratio);
+ if (ratio > best_ratio)
+ {
+ best_ratio = ratio;
+ dev = i;
+ }
+ CUDA_SAFE_SYNC_CALL_RET(cudaDeviceReset(), status);
+ }
+ fprintf(stderr, "*** final decision: GPU %d\n", dev);
+ NERV_SET_STATUS(status, NERV_NORMAL, 0);
+ return dev;
+}
+
+CuContext *nerv_cuda_context_create(int dev, Status *status) {
CuContext *context = (CuContext *)malloc(sizeof(CuContext));
- new_cuda_handles(context, status);
+ context->has_handle = 0; /* this line must come first */
+ if (dev == -1)
+ {
+ dev = choose_best_gpu(status);
+ if (status->err_code != NERV_NORMAL)
+ return NULL;
+ }
+ new_cuda_handles(context, dev, status);
if (status->err_code != NERV_NORMAL)
return NULL;
context->profile = nerv_hashmap_create(PROFILE_HASHMAP_SIZE, bkdr_hash, strcmp);
@@ -78,11 +115,14 @@ void nerv_cuda_context_destroy(CuContext *context, Status *status) {
void nerv_cuda_context_select_gpu(CuContext *context,
int dev, Status *status) {
- free_cuda_handles(context, status);
+ /* free_cuda_handles(context, status);
if (status->err_code != NERV_NORMAL)
return;
- CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status);
- new_cuda_handles(context, status);
+ */
+ /* because of cudaDeviceReset */
+ context->has_handle = 0;
+ CUDA_SAFE_SYNC_CALL(cudaDeviceReset(), status);
+ new_cuda_handles(context, dev, status);
if (status->err_code != NERV_NORMAL)
return;
NERV_SET_STATUS(status, NERV_NORMAL, 0);
diff --git a/nerv/lib/matrix/cumatrix.h b/nerv/lib/matrix/cumatrix.h
index 280035b..fd2a5ce 100644
--- a/nerv/lib/matrix/cumatrix.h
+++ b/nerv/lib/matrix/cumatrix.h
@@ -5,6 +5,7 @@
#include "cuda_helper.h"
typedef struct CuContext {
+ int has_handle;
cublasHandle_t cublas_handle;
cudaEvent_t profile_start, profile_stop;
curandGenerator_t curand_gen;
@@ -15,6 +16,6 @@ void nerv_cuda_context_print_profile(CuContext *context);
void nerv_cuda_context_clear_profile(CuContext *context);
void nerv_cuda_context_accu_profile(CuContext *context, const char *name, float delta);
void nerv_cuda_context_select_gpu(CuContext *context, int dev, Status *status);
-CuContext *nerv_cuda_context_create(Status *status);
+CuContext *nerv_cuda_context_create(int dev, Status *status);
void nerv_cuda_context_destroy(CuContext *contex, Status *status);
#endif
diff --git a/nerv/lib/matrix/generic/mmatrix.c b/nerv/lib/matrix/generic/mmatrix.c
index 485d778..fb99b53 100644
--- a/nerv/lib/matrix/generic/mmatrix.c
+++ b/nerv/lib/matrix/generic/mmatrix.c
@@ -8,10 +8,10 @@
context, status)
#define NERV_GENERIC_MATRIX
#include "../../common.h"
+#include "../../cblas.h"
#include "../../io/chunk_file.h"
#include <string.h>
#include <math.h>
-#include <cblas.h>
#include <float.h>
Matrix *nerv_matrix_(colsum)(Matrix *a, MContext *context, Status *status) {
diff --git a/nerv/matrix/cumatrix.c b/nerv/matrix/cumatrix.c
index f6a4ed5..b8eef9c 100644
--- a/nerv/matrix/cumatrix.c
+++ b/nerv/matrix/cumatrix.c
@@ -9,7 +9,7 @@ const char *nerv_cuda_context_tname = "nerv.CuContext";
int nerv_cuda_context_lua_select_gpu(lua_State *L) {
Status status;
nerv_cuda_context_select_gpu(luaT_checkudata(L, 1, nerv_cuda_context_tname),
- luaL_checkinteger(L, 1), &status);
+ luaL_checkinteger(L, 2), &status);
NERV_LUA_CHECK_STATUS(L, status);
return 0;
}
@@ -26,7 +26,8 @@ int nerv_cuda_context_lua_clear_profile(lua_State *L) {
int nerv_cuda_context_lua_new(lua_State *L) {
Status status;
- CuContext *self = nerv_cuda_context_create(&status);
+ int dev = lua_gettop(L) > 0 ? luaL_checkinteger(L, 1) : -1;
+ CuContext *self = nerv_cuda_context_create(dev, &status);
NERV_LUA_CHECK_STATUS(L, status);
luaT_pushudata(L, self, nerv_cuda_context_tname);
return 1;
diff --git a/nerv/matrix/generic/mmatrix.c b/nerv/matrix/generic/mmatrix.c
index 69000b7..1f37173 100644
--- a/nerv/matrix/generic/mmatrix.c
+++ b/nerv/matrix/generic/mmatrix.c
@@ -8,10 +8,10 @@
#define MATRIX_BASE_TNAME nerv_matrix_host_tname
#define NERV_GENERIC_MATRIX
#include "../../lib/common.h"
+#include "../../lib/cblas.h"
#include "../../lib/matrix/generic/mmatrix.h"
#include "../../io/chunk_file.h"
#include <string.h>
-#include <cblas.h>
#define BLAS_OP_N CblasNoTrans
static int nerv_matrix_(lua_get_blas_op)(char ch) {
diff --git a/nerv/matrix/init.lua b/nerv/matrix/init.lua
index ef2fb6b..cf85004 100644
--- a/nerv/matrix/init.lua
+++ b/nerv/matrix/init.lua
@@ -87,6 +87,17 @@ function nerv.Matrix:__mul__(b)
return c
end
+--- A wrapper function for `copy_from`
+function nerv.Matrix:copy_to(b, ...)
+ b:copy_from(self, ...)
+end
+
+--- The base class for all device (in-GPU) matrices
+-- @type nerv.CuMatrix
+
+--- A wrapper function for `copy_fromd`
+nerv.CuMatrix.copy_tod = nerv.Matrix.copy_to
+
--- CUDA float matrices
-- @type nerv.CuMatrixFloat
@@ -127,6 +138,14 @@ end
-- @type nerv.MMatrix
--- A wrapper function for `copy_fromh`
-function nerv.MMatrix:copy_toh(b, ...)
+nerv.MMatrix.copy_toh = nerv.Matrix.copy_to
+
+--- A wrapper function for `nerv.CuMatrix` copy
+function nerv.MMatrix:copy_fromd(b, ...)
+ b:copy_toh(self, ...)
+end
+
+--- A wrapper function for `nerv.CuMatrix` copy
+function nerv.MMatrix:copy_tod(b, ...)
b:copy_fromh(self, ...)
end
diff --git a/nerv/nerv b/nerv/nerv
index 4dd448c..4c20ec7 100644
--- a/nerv/nerv
+++ b/nerv/nerv
@@ -1,13 +1,21 @@
#! /usr/bin/env luajit
require 'nerv'
-nerv.printf("*** NERV: A Lua-based toolkit for high-performance deep learning (alpha) ***\n")
-nerv.info("automatically initialize a default CuContext...")
-nerv.CuMatrix._default_context = nerv.CuContext()
-nerv.info("the default CuContext is ok")
+local options = {{"help", "h", "boolean", default = false, desc = "print this help message"},
+ {"use-cpu", "c", "boolean", default = false, desc = "use CPU by default (instead of gpu by default)"},
+ {"select-gpu", nil, "int", default = -1, desc = "select the GPU for computation, fallback to auto mode if not specified"}}
+econf = {} -- environment configuration
-nerv.info("automatically initialize a default MContext...")
-nerv.MMatrix._default_context = nerv.MContext()
-nerv.info("the default MContext is ok")
+local function print_help()
+ nerv.printf("Usage: <nerv_prog> [options] script.lua\n")
+ nerv.print_usage(options)
+end
+
+nerv.printf("*** NERV: A Lua-based toolkit for high-performance deep learning (alpha) ***\n")
+arg, opts = nerv.parse_args(arg, options)
+if #arg < 1 or opts["help"].val then
+ print_help()
+ return
+end
-- only for backward compatibilty, will be removed in the future
local function _add_profile_method(cls)
@@ -15,13 +23,25 @@ local function _add_profile_method(cls)
cls.print_profile = function () c:print_profile() end
cls.clear_profile = function () c:clear_profile() end
end
-_add_profile_method(nerv.CuMatrix)
-_add_profile_method(nerv.MMatrix)
-
-if #arg < 1 then
- return
+if not opts["use-cpu"].val then
+ local dev = opts["select-gpu"].val
+ nerv.info("automatically initialize a default CuContext...")
+ nerv.CuMatrix._default_context = nerv.CuContext(dev)
+ nerv.info("the default CuContext is ok")
+ _add_profile_method(nerv.CuMatrix)
+ nerv.CuMatrix.select_gpu =
+ function (dev) nerv.CuMatrix._default_context:select_gpu(dev) end
+ econf.use_cpu = false
+else
+ econf.use_cpu = true
end
+
+nerv.info("automatically initialize a default MContext...")
+nerv.MMatrix._default_context = nerv.MContext()
+nerv.info("the default MContext is ok")
+_add_profile_method(nerv.MMatrix)
+
local script = arg[1]
local script_arg = {}
for i = 2, #arg do
@@ -29,5 +49,3 @@ for i = 2, #arg do
end
arg = script_arg
dofile(script)
-nerv.CuMatrix.print_profile()
-nerv.MMatrix.print_profile()
diff --git a/nerv/nerv-scm-1.rockspec b/nerv/nerv-scm-1.rockspec
index 0e1e47f..d039e85 100644
--- a/nerv/nerv-scm-1.rockspec
+++ b/nerv/nerv-scm-1.rockspec
@@ -11,7 +11,8 @@ description = {
license = "BSD"
}
dependencies = {
- "lua >= 5.1"
+ "lua >= 5.1",
+ "penlight >= 1.3.2"
}
build = {
type = "make",
diff --git a/nerv/nn/layer_dag.lua b/nerv/nn/layer_dag.lua
index 6896878..f999752 100644
--- a/nerv/nn/layer_dag.lua
+++ b/nerv/nn/layer_dag.lua
@@ -134,20 +134,16 @@ function DAGLayer:__init(id, global_conf, layer_conf)
end
end
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
self.layers = layers
self.inputs = inputs
self.outputs = outputs
- self.id = id
- self.dim_in = dim_in
- self.dim_out = dim_out
self.parsed_conn = parsed_conn
self.queue = queue
- self.gconf = global_conf
- if self.gconf.use_cpu then
- self.mat_type = self.gconf.mmat_type
- else
- self.mat_type = self.gconf.cumat_type
- end
+end
+
+function DAGLayer:bind_params()
+ -- do nothing (instead of rebinding params for each layer)
end
function DAGLayer:init(batch_size, chunk_size)
@@ -325,7 +321,7 @@ function DAGLayer:get_params()
for id, ref in pairs(self.queue) do
table.insert(param_repos, ref.layer:get_params())
end
- return nerv.ParamRepo.merge(param_repos)
+ return nerv.ParamRepo.merge(param_repos, self.loc_type)
end
DAGLayer.PORT_TYPES = {
diff --git a/nerv/nn/layer_repo.lua b/nerv/nn/layer_repo.lua
index a169b2b..acef54a 100644
--- a/nerv/nn/layer_repo.lua
+++ b/nerv/nn/layer_repo.lua
@@ -12,27 +12,29 @@ function LayerRepo:add_layers(layer_spec, param_repo, global_conf)
if layer_type == nil then
nerv.error('layer type `%s` not found', ltype)
end
- for id, layer_config in pairs(llist) do
- if layers[id] ~= nil then
- nerv.error("a layer with id %s already exists", id)
- end
- nerv.info("create layer: %s", id)
- if type(layer_config) ~= "table" then
+ for id, lconf in pairs(llist) do
+ if type(lconf) ~= "table" then
nerv.error("layer config table is need")
end
- if type(layer_config.params) == "table" then
- for pname, pid in pairs(layer_config.params) do
- layer_config[pname] = param_repo:get_param(pid)
- end
+ if lconf.pr == nil then
+ lconf.pr = param_repo
end
- if layer_config.pr == nil then
- layer_config.pr = param_repo
+ if layers[id] ~= nil then
+ nerv.error("a layer with id %s already exists", id)
end
- layers[id] = layer_type(id, global_conf, layer_config)
+ nerv.info("create layer: %s", id)
+ layers[id] = layer_type(id, global_conf, lconf)
end
end
end
+function LayerRepo:rebind(param_repo)
+ for id, layer in pairs(self.layers) do
+ layer.lconf.pr = param_repo
+ layer:bind_params()
+ end
+end
+
function LayerRepo:get_layer(lid)
local layer = self.layers[lid]
if layer == nil then
diff --git a/nerv/nn/param_repo.lua b/nerv/nn/param_repo.lua
index c124e08..aba7765 100644
--- a/nerv/nn/param_repo.lua
+++ b/nerv/nn/param_repo.lua
@@ -1,8 +1,37 @@
local ParamRepo = nerv.class("nerv.ParamRepo")
-function ParamRepo:__init(plist)
+
+ParamRepo.LOC_TYPES = {
+ ON_DEVICE = {},
+ ON_HOST = {}
+}
+
+function ParamRepo:__init(plist, loc_type)
self.params = {}
+ self.loc_type = loc_type or ParamRepo.LOC_TYPES.ON_HOST
+ local function make_checker(tname)
+ return function (mat)
+ if not nerv.is_type(mat, tname) then
+ nerv.error("unexpected param type in repo specification")
+ end
+ end
+ end
+ self.make_copier = function (mat_type, copy_method)
+ return function (mat)
+ local target = mat_type(mat:nrow(), mat:ncol())
+ mat[copy_method](mat, target)
+ return target
+ end
+ end
+
+ if self.loc_type == ParamRepo.LOC_TYPES.ON_HOST then
+ self.checker = make_checker("nerv.MMatrix")
+ else
+ self.checker = make_checker("nerv.CuMatrix")
+ end
+
if plist ~= nil then
for i, p in ipairs(plist) do
+ p:check(self.checker)
self.params[p.id] = p
end
end
@@ -12,6 +41,7 @@ function ParamRepo:add(pid, p)
if self.params[pid] ~= nil then
nerv.error("duplicate params with the same id: %s", pid)
end
+ p:check(self.checker)
self.params[pid] = p
end
@@ -22,8 +52,8 @@ function ParamRepo:remove(pid, p)
table.remove(self.params, pid)
end
-function ParamRepo.merge(repos)
- local self = nerv.ParamRepo()
+function ParamRepo.merge(repos, loc_type)
+ local self = nerv.ParamRepo(nil, loc_type)
for i, repo in ipairs(repos) do
if not nerv.is_type(repo, "nerv.ParamRepo") then
nerv.error("nerv.ParamRepo objects expected, got %s", repo)
@@ -78,3 +108,26 @@ function ParamRepo:get_param(pid)
end
return p
end
+
+function ParamRepo:copy(loc_type, pids)
+ local copier
+ local target = nerv.ParamRepo(nil, loc_type)
+ if loc_type == nil then
+ loc_type = self.loc_type
+ end
+ if loc_type == ParamRepo.LOC_TYPES.ON_HOST then
+ copier = self.make_copier(gconf.mmat_type, 'copy_toh')
+ else
+ copier = self.make_copier(gconf.cumat_type, 'copy_tod')
+ end
+ if pids == nil then
+ for id, p in pairs(self.params) do
+ target.params[id] = p:copy(copier)
+ end
+ else
+ for i, pid in ipairs(pids) do
+ target.params[pid] = self:get_param(pid):copy(copier)
+ end
+ end
+ return target
+end
diff --git a/nerv/test/parse_args.lua b/nerv/test/parse_args.lua
new file mode 100644
index 0000000..34ad55e
--- /dev/null
+++ b/nerv/test/parse_args.lua
@@ -0,0 +1,15 @@
+local options = {{"abandon", "a", "boolean", default = false, desc = "abandon your belief"},
+ {"bullshit", "b", "boolean", default = false, desc = "start to bullshit"},
+ {"cheat", "c", "boolean", default = false, desc = "try to cheat"},
+ {"delete", "d", "boolean", default = false, desc = "remove everything"},
+ {"hehe", "h", "boolean", default = false, desc = "233333"},
+ {"oh", "o", "boolean", default = true, desc = "oh yes!"},
+ {"uid", nil, "int", desc = "user uid"},
+ {"str", nil, "string", desc = "test string"}}
+
+args, opts = nerv.parse_args({"arg1", "arg2", "-abcd", "arg3",
+ "--hehe", "--oh=no", "--uid=43",
+ "highfive", "--str=hello"}, options)
+
+nerv.print_usage(options)
+print(table.tostring(args), table.tostring(opts))