aboutsummaryrefslogtreecommitdiff
path: root/nerv
diff options
context:
space:
mode:
Diffstat (limited to 'nerv')
-rw-r--r--nerv/Makefile22
-rw-r--r--nerv/doc/nerv.md6
-rw-r--r--nerv/doc/nerv_class.md8
-rw-r--r--nerv/doc/nerv_io.md13
-rw-r--r--nerv/doc/nerv_layer.md13
-rw-r--r--nerv/doc/nerv_matrix.md20
-rw-r--r--nerv/doc/nerv_nn.md43
-rw-r--r--nerv/doc/nerv_param.md10
-rw-r--r--nerv/examples/asr_trainer.lua256
-rw-r--r--nerv/examples/swb_baseline.lua84
-rw-r--r--nerv/examples/swb_baseline2.lua203
-rw-r--r--nerv/examples/swb_baseline_basic.lua162
-rw-r--r--nerv/examples/timit_baseline2.lua212
-rw-r--r--nerv/init.lua229
-rw-r--r--nerv/io/sgd_buffer.lua7
-rw-r--r--nerv/layer/affine.lua43
-rw-r--r--nerv/layer/bias.lua15
-rw-r--r--nerv/layer/combiner.lua16
-rw-r--r--nerv/layer/dropout.lua16
-rw-r--r--nerv/layer/duplicate.lua7
-rw-r--r--nerv/layer/elem_mul.lua11
-rw-r--r--nerv/layer/graph.lua7
-rw-r--r--nerv/layer/gru.lua20
-rw-r--r--nerv/layer/identity.lua7
-rw-r--r--nerv/layer/init.lua60
-rw-r--r--nerv/layer/lstm.lua20
-rw-r--r--nerv/layer/lstm_gate.lua17
-rw-r--r--nerv/layer/mse.lua16
-rw-r--r--nerv/layer/rnn.lua7
-rw-r--r--nerv/layer/sigmoid.lua11
-rw-r--r--nerv/layer/softmax.lua11
-rw-r--r--nerv/layer/softmax_ce.lua16
-rw-r--r--nerv/layer/tanh.lua11
-rw-r--r--nerv/layer/window.lua15
-rw-r--r--nerv/lib/cblas.h596
-rw-r--r--nerv/lib/matrix/cumatrix.c52
-rw-r--r--nerv/lib/matrix/cumatrix.h3
-rw-r--r--nerv/lib/matrix/generic/mmatrix.c2
-rw-r--r--nerv/matrix/cumatrix.c5
-rw-r--r--nerv/matrix/generic/mmatrix.c2
-rw-r--r--nerv/matrix/init.lua21
-rw-r--r--nerv/nerv46
-rw-r--r--nerv/nerv-scm-1.rockspec3
-rw-r--r--nerv/nn/layer_dag.lua16
-rw-r--r--nerv/nn/layer_repo.lua28
-rw-r--r--nerv/nn/param_repo.lua59
-rw-r--r--nerv/test/parse_args.lua15
47 files changed, 1904 insertions, 558 deletions
diff --git a/nerv/Makefile b/nerv/Makefile
index a9b4baf..421eda0 100644
--- a/nerv/Makefile
+++ b/nerv/Makefile
@@ -1,3 +1,11 @@
+ifndef LUA_BINDIR
+$(error Please build the package via luarocks: `luarocks make`)
+endif
+
+ifndef CUDA_BASE
+$(error CUDA_BASE is not set)
+endif
+
.PHONY: build install clean
SHELL := /bin/bash
@@ -34,18 +42,18 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \
layer/init.lua layer/affine.lua layer/sigmoid.lua layer/tanh.lua layer/softmax_ce.lua layer/softmax.lua \
layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua \
layer/elem_mul.lua layer/lstm.lua layer/lstm_gate.lua layer/dropout.lua layer/gru.lua \
- layer/graph.lua layer/rnn.lua layer/duplicate.lua layer/identity.lua \
+ layer/graph.lua layer/rnn.lua layer/duplicate.lua layer/identity.lua \
nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua nn/network.lua \
io/sgd_buffer.lua \
tnn/init.lua tnn/sutil.lua tnn/tnn.lua
INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK
-#CUDA_BASE := /usr/local/cuda-7.0
-CUDA_BASE := /usr/local/cuda
CUDA_INCLUDE := -I $(CUDA_BASE)/include/
INCLUDE += $(CUDA_INCLUDE)
-LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcublas -lcurand
+CUDA_LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcuda -lcublas -lcurand
+override CFLAGS += $(NERV_FEAT)
+
NVCC := $(CUDA_BASE)/bin/nvcc
EMPTY :=
SPACE := $(EMPTY) $(EMPTY)
@@ -66,11 +74,11 @@ $(LUA_DIR)/%.lua: %.lua
cp $< $@
$(LIB_PATH)/libnervcore.so: $(CORE_OBJS)
- gcc -shared -o $@ $^ $(LDFLAGS) -lcblas
+ gcc -shared -o $@ $^ $(LDFLAGS) $(CUDA_LDFLAGS) $(BLAS_LDFLAGS)
$(LIB_PATH)/libluaT.so: $(LUAT_OBJS)
- gcc -shared -o $@ $^ $(LDFLAGS)
+ gcc -shared -o $@ $^
$(INST_LIBDIR)/libnerv.so: $(NERV_OBJS) $(LIB_PATH)/libnervcore.so $(LIB_PATH)/libluaT.so
- gcc -shared -o $@ $(NERV_OBJS) $(LDFLAGS) -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -lluaT
+ gcc -shared -o $@ $(NERV_OBJS) -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -lluaT
$(OBJ_DIR)/matrix/cumatrix.o: matrix/generic/cumatrix.c matrix/generic/matrix.c
$(OBJ_DIR)/matrix/mmatrix.o: matrix/generic/mmatrix.c matrix/generic/matrix.c
diff --git a/nerv/doc/nerv.md b/nerv/doc/nerv.md
index 28411f5..125928d 100644
--- a/nerv/doc/nerv.md
+++ b/nerv/doc/nerv.md
@@ -1,6 +1,6 @@
-#The Nerv utility functions#
+# The Nerv utility functions
Part of the [Nerv](../README.md) toolkit.
-##Methods##
+## Methods
* __string = nerv.typename(obj a)__
A registered function, the original function is `luaT_lua_typename`. In some cases if you call `type(a)` for object of some class in __Nerv__(like __Nerv.CuMatrix__) it will only return "userdata"(because it is created in C), in this case you can use this method to get its type.
@@ -14,4 +14,4 @@ A registered function, the original function is `luaT_newmetatable`, it returns
* __string = nerv.setmetatable(table self, string tname)__
A registered function, the original function is `luaT_lua_setmetatable`. It assigns the metatable registered in __luaT__ by the name *tname* to the table *self*. And return *tname* to user.
* __table = nerv.get_type(string typename)__
-Returns the type(`loadstring("return " .. typename)`). \ No newline at end of file
+Returns the type(`loadstring("return " .. typename)`).
diff --git a/nerv/doc/nerv_class.md b/nerv/doc/nerv_class.md
index 99f63e7..8314b12 100644
--- a/nerv/doc/nerv_class.md
+++ b/nerv/doc/nerv_class.md
@@ -1,10 +1,10 @@
-#The Nerv OOP#
+# The Nerv OOP
Part of the [Nerv](../README.md) toolkit.
-##Methods##
+## Methods
* __metatable mt, metatable mpt = nerv.class(string tname, string parenttname)__
This method is used to create a class by the name `tname`, which inherits `parenttname` in __Nerv__, then you create a new instance of this class by calling `obj=tname(...)`. The `tname.__init(...)` method(if defined) will be called in the constructing. The metatable of the class and its parent class will be returned.
-##Examples##
+## Examples
* This example implements a simple `nerv.Counter` class which is inherited by `nerv.BetterCounter`.
```
@@ -33,4 +33,4 @@ c1 = nerv.Counter(1)
print(c1.c)
bc1 = nerv.BetterCounter(1, 1)
print(bc1.c, bc1.bc)
-``` \ No newline at end of file
+```
diff --git a/nerv/doc/nerv_io.md b/nerv/doc/nerv_io.md
index 07589df..299362f 100644
--- a/nerv/doc/nerv_io.md
+++ b/nerv/doc/nerv_io.md
@@ -1,7 +1,7 @@
-#The Nerv IO Package#
+# The Nerv IO Package
Part of the [Nerv](../README.md) toolkit.
-##Description##
+## Description
The main class that the user uses to store and read parameter object to and from files is __nerv.ChunkFile__.
In the file, a parameter object will be saved using a standard format. First is the length(in byte) of this object, then a table which includes some meta information of the object, and a data area. Below is an example text file.
```
@@ -23,7 +23,7 @@ In the file, a parameter object will be saved using a standard format. First is
3.000000 3.000000 3.000000
```
-##Methods##
+## Methods
* __ChunkFile ChunkFile(string fn, string mode)__
`mode` can be `r` or `w`, for reading or writing a file. The returned __ChunkFile__ will be ready to write or read objects which follows the __nerv.Param__ interface(using `write_chunk` and `read_chunk`).
* __void ChunkFile.write_chunk(ChunkFile self, Param p)__
@@ -33,7 +33,7 @@ Read the __Param__ object by id `id` from the file `self`. It will be constructe
* __void ChunkFile.close(ChunkFile self)__
Close the opened file.
-##Examples##
+## Examples
* An example showing how to use __ChunkFile__ to store and read parameter objects.
```
require 'io'
@@ -96,7 +96,7 @@ do
end
```
-##Developer Notes##
+## Developer Notes
* There are four classes in to deal with chunk data, which are __nerv.ChunkFile__, __nerv.ChunkFileHandle__, __nerv.ChunkInfo__, __nerv.ChunkData__. Below is the underlying C structs.
```
typedef struct ChunkFileHandle {
@@ -110,4 +110,5 @@ typedef struct ChunkData {
char *data;
} ChunkData;
```
-* In __Nerv.io__, a returned(by `ChunkFile.__init`) __nerv.ChunkFile__ will have a member `handle`, which is a __nerv.ChunkFileHandle__. \ No newline at end of file
+
+* In __Nerv.io__, a returned(by `ChunkFile.__init`) __nerv.ChunkFile__ will have a member `handle`, which is a __nerv.ChunkFileHandle__.
diff --git a/nerv/doc/nerv_layer.md b/nerv/doc/nerv_layer.md
index de2fb12..dd7c9bb 100644
--- a/nerv/doc/nerv_layer.md
+++ b/nerv/doc/nerv_layer.md
@@ -1,9 +1,9 @@
-#The Nerv Layer Package#
+# The Nerv Layer Package
Part of the [Nerv](../README.md) toolkit.
-##Description##
+## Description
__nerv.Layer__ is the base class and most of its methods are abstract.
-###Class hierarchy and their members###
+### Class hierarchy and their members
* __nerv.Layer__.
* `table dim_in` It specifies the dimensions of the inputs.
* `table dim_out` It specifies the dimensions of the outputs.
@@ -20,7 +20,7 @@ __nerv.Layer__ is the base class and most of its methods are abstract.
* `int total_frams` Records how many frames have passed.
* `bool compressed` The reference distribution can be a one-hot format. This feature is enabled by `layer_conf.compressed`.
-##Methods##
+## Methods
* __void Layer.\_\_init(Layer self, string id, table global_conf, table layer_conf)__
Abstract method.
The constructing method should assign `id` to `self.id` and `global_conf` to `self.gconf`, `layer_conf.dim_in` to `self.dim_in`, `layer_conf.dim_out` to `self.dim_out`. `dim_in` and `dim_out` are a list specifies the dimensions of the inputs and outputs. Also, `layer_conf` will include the parameters, which should also be properly saved.
@@ -43,7 +43,7 @@ Check whether `#self.dim_in == len_in` and `#self.dim_out == len_out`, if violat
Abstract method.
The layer should return a list containing its parameters.
-####nerv.Layer.get\_dim(self)####
+#### nerv.Layer.get\_dim(self)
* Returns:
`dim_in`: __table__.
`dim_out`: __table__.
@@ -52,7 +52,7 @@ The layer should return a list containing its parameters.
* Description:
Returns `self.dim_in, self.dim_out`.
-##Examples##
+## Examples
* a basic example using __Nerv__ layers to a linear classification.
```
@@ -178,3 +178,4 @@ for l = 0, 10, 1 do
end
--[[end training]]--
```
+
diff --git a/nerv/doc/nerv_matrix.md b/nerv/doc/nerv_matrix.md
index dfd843d..3782eb3 100644
--- a/nerv/doc/nerv_matrix.md
+++ b/nerv/doc/nerv_matrix.md
@@ -1,8 +1,8 @@
-#The Nerv Matrix Package#
+# The Nerv Matrix Package
Part of the [Nerv](../README.md) toolkit.
-##Description##
-###Underlying structure###
+## Description
+### Underlying structure
In the begining is could be useful to know something about the underlying structure of a __Nerv__ matrix. Please keep in mind that matrice in __Nerv__ is row-major.
Every matrix object is a encapsulation of a C struct that describes the attributes of this matrix.
```
@@ -20,12 +20,12 @@ typedef struct Matrix {
It is worth mentioning that that `data_ref` is a counter which counts the number of references to its memory space, mind that it will also be increased when a row of the matrix is referenced(`col = m[2]`). A __Nerv__ matrix will deallocate its space when this counter is decreased to zero.
Also note that all assigning operation in __Nerv__ is reference copy, you can use `copy_tod` or `copy_toh` method to copy value. Also, row assigning operations like `m1[2]=m2[3]` is forbidden in __Nerv__.
-###Class hierarchy###
+### Class hierarchy
The class hierarchy of the matrix classes can be clearly observed in `matrix/init.c`.
First there is a abstract base class __Nerv.Matrix__, which is inherited by __Nerv.CuMatrix__ and __Nerv.MMatrix__(also abstract).
Finally, there is __Nerv.CuMatrixFloat__, __Nerv.CuMatrixDouble__, inheriting __Nerv.CuMatrix__, and __Nerv.MMatrixFloat__, __Nerv.MMatrixDouble__, __Nerv.MMatrixInt__ , inheriting __Nerv.MMatrix__.
-##Methods##
+## Methods
Mind that usually a matrix object can only do calculation with matrix of its own type(a __Nerv.CuMatrixFloat__ matrix can only do add operation with a __Nerv.CuMatrixFloat__).
In the methods description below, __Matrix__ could be __Nerv.CuMatrixFloat__, __Nerv.CuMatrixDouble__, __Nerv.MMatrixFloat__ or __Nerv.MMatrixDouble__. __Element_type__ could be `float` or `double`, respectively.
* __Matrix = Matrix(int nrow, int ncol)__
@@ -53,6 +53,8 @@ Return a new __Matrix__ of size (1,`self.ncol`), which stores the sum of all col
Return a new __Matrix__ of size (`self.nrow`,1), which stores the sum of all rows of __Matrix__ `self`.
* __Matrix Matrix.rowmax(Matrix self)__
Return a new __Matrix__ of size (`self.nrow`,1), which stores the max value of all rows of __Matrix__ `self`.
+* __Matrix Matrix.rowmax_idx(Matrix self)__
+Return two new __Matrix__ of size (`self.nrow`,1), which stores the max value of all rows of __Matrix__ `self`, and its corresponding column indices(start from zero).
* __Matrix Matrix.trans(Matrix self)__
Return a new __Matrix__ of size (`self.ncol`,`self.nrow`), which stores the transpose of __Matrix__ `self`.
* __void Matrix.copy_fromh(Matrix self, MMatrix a)__
@@ -81,8 +83,8 @@ Fill the content of __Matrix__ `self` to be `value`.
Set the element of __Matrix__ `self` to be elementwise-sigmoid of `ma`.
* __void Matrix.sigmoid_grad(Matrix self, Matrix err, Matrix output)__
Set the element of __Matrix__ `self`, to be `self[i][j]=err[i][j]*output[i][j]*(1-output[i][j])`. This function is used to propagate sigmoid layer error.
-* __void Matrix.softmax(Matrix self, Matrix a)__
-Calculate a row-by-row softmax of __Matrix__ `a` and save the result in `self`.
+* __Matrix Matrix.softmax(Matrix self, Matrix a)__
+Calculate a row-by-row softmax of __Matrix__ `a` and save the result in `self`. Returns a new `self.nrow*1` index matrix that stores the index of the maximum value of each row.
* __void Matrix.mul_elem(Matrix self, Matrix ma, Matrix mb)__
Calculate element-wise multiplication of __Matrix__ `ma` and `mb`, store the result in `self`.
* __void Matrix.log_elem(Matrix self, Matrix ma)__
@@ -113,7 +115,7 @@ Write `self` to the file position in `chunk`.
* __void MMatrix.copy_from(MMatrix ma, MMatrix mb,[int b_bgein, int b_end, int a_begin])__
Copy a part of `mb`(rows of index `[b_begin..b_end)`) to `ma` beginning at row index `a_begin`. If not specified, `b_begin` will be `0`, `b_end` will be `b.nrow`, `a_begin` will be `0`.
-##Examples##
+## Examples
* Use `get_dataref_value` to test __Nerv__'s matrix space allocation.
```
m = 10
@@ -134,6 +136,7 @@ print("test fm:get_dataref_value:", fm:get_dataref_value())
print(fm)
print(dm)
```
+
* Test some __Matrix__ calculations.
```
m = 4
@@ -167,3 +170,4 @@ print(a)
a:log_elem(fs)
print(a)
```
+
diff --git a/nerv/doc/nerv_nn.md b/nerv/doc/nerv_nn.md
index c57447d..63537fb 100644
--- a/nerv/doc/nerv_nn.md
+++ b/nerv/doc/nerv_nn.md
@@ -1,19 +1,19 @@
-#The Nerv NN Package#
+# The Nerv NN Package
Part of the [Nerv](../README.md) toolkit.
-##Description##
-###Class hierarchy###
+## Description
+### Class hierarchy
it contains __nerv.LayerRepo__, __nerv.ParamRepo__, and __nerv.DAGLayer__(inherits __nerv.Layer__).
-###Class hierarchy and their members###
-####nerv.ParamRepo####
+### Class hierarchy and their members
+#### nerv.ParamRepo
Get parameter object by ID.
* `table param_table` Contains the mapping of parameter ID to parameter file(__nerv.ChunkFile__)
* __nerv.LayerRepo__ Get layer object by ID.
* `table layers` Contains the mapping of layer ID to layer object.
objects.
-####__nerv.DAGLayer__####
+#### __nerv.DAGLayer__
Inherits __nerv.Layer__.
* `layers`: __table__, a mapping from a layer ID to its "ref". A ref is a structure that contains reference to space allocations and other info of the layer.
* `inputs`: __table__, a mapping from the inputs ports of the DAG layer to the input ports of the sublayer, the key is the port number, the value is `{ref, port}`.
@@ -21,17 +21,17 @@ Inherits __nerv.Layer__.
* `parsed_conn`: __table__, a list of parsed connections, each entry is of format `{{ref_from, port_from}, {ref_to, port_to}}`.
* `queue`: __table__, a list of "ref"s, the propagation of the DAGLayer will follow this order, and back-propagation will follow a reverse order.
-##Methods##
+## Methods
-###__nerv.ParamRepo__###
+### __nerv.ParamRepo__
-####nerv.ParamRepo:\_\_init(param\_files)####
+#### nerv.ParamRepo:\_\_init(param\_files)
* Parameters:
`param_files`: __table__
* Description:
`param_files` is a list of file names that stores parameters, the newed __ParamRepo__ will read them from file and store the mapping for future fetching.
-####nerv.Param ParamRepo.get_param(ParamRepo self, string pid, table global_conf)####
+#### nerv.Param ParamRepo.get_param(ParamRepo self, string pid, table global_conf)
* Returns:
__nerv.Layer__
* Parameters:
@@ -41,8 +41,8 @@ Inherits __nerv.Layer__.
* Description:
__ParamRepo__ will find the __nerv.ChunkFile__ `pf` that contains parameter of ID `pid` and return `pf:read_chunk(pid, global_conf)`.
-###__nerv.LayerRepo__###
-####nerv.LayerRepo:\_\_init(layer\_spec, param\_repo, global\_conf)####
+### __nerv.LayerRepo__
+#### nerv.LayerRepo:\_\_init(layer\_spec, param\_repo, global\_conf)
* Returns:
__nerv.LayerRepo__.
* Parameters:
@@ -60,7 +60,7 @@ Inherits __nerv.Layer__.
__LayerRepo__ will merge `param_config` into `layer_config` and construct a layer by calling `layer_type(layerid, global_conf, layer_config)`.
-####nerv.LayerRepo.get\_layer(self, lid)####
+#### nerv.LayerRepo.get\_layer(self, lid)
* Returns:
__nerv.LayerRepo__, the layer with ID `lid`.
* Parameters:
@@ -69,8 +69,8 @@ Inherits __nerv.Layer__.
* Description:
Returns the layer with ID `lid`.
-###nerv.DAGLayer###
-####nerv.DAGLayer:\_\_init(id, global\_conf, layer\_conf)####
+### nerv.DAGLayer
+#### nerv.DAGLayer:\_\_init(id, global\_conf, layer\_conf)
* Returns:
__nerv.DAGLayer__
* Parameters:
@@ -89,7 +89,7 @@ Inherits __nerv.Layer__.
}})
```
-####nerv.DAGLayer.init(self, batch\_size)####
+#### nerv.DAGLayer.init(self, batch\_size)
* Parameters:
`self`: __nerv.DAGLayer__
`batch_size`: __int__
@@ -97,7 +97,7 @@ Inherits __nerv.Layer__.
This initialization method will allocate space for output and input matrice, and will call `init()` for each of its sub layers.
-####nerv.DAGLayer.propagate(self, input, output)####
+#### nerv.DAGLayer.propagate(self, input, output)
* Parameters:
`self`: __nerv.DAGLayer__
`input`: __table__
@@ -105,7 +105,7 @@ Inherits __nerv.Layer__.
* Description:
The same function as __nerv.Layer.propagate__, do propagation for each layer in the order of `self.queue`.
-####nerv.DAGLayer.back\_propagate(self, next\_bp\_err, bp\_err, input, output)####
+#### nerv.DAGLayer.back\_propagate(self, next\_bp\_err, bp\_err, input, output)
* Parameters:
`self`: __nerv.DAGLayer__
`next_bp_err`: __table__
@@ -115,7 +115,7 @@ Inherits __nerv.Layer__.
* Description:
The same function as __nerv.Layer.back_propagate__, do back-propagation for each layer in the reverse order of `self.queue`.
-####nerv.DAGLayer.update(self, bp\_err, input, output)####
+#### nerv.DAGLayer.update(self, bp\_err, input, output)
* Parameters:
`self`: __nerv.DAGLayer__
`bp_err`: __table__
@@ -124,7 +124,7 @@ Inherits __nerv.Layer__.
* Description:
The same function as __nerv.Layer.update__, do update for each layer in the order of `self.queue`.
-##Examples##
+## Examples
* aaa
```
@@ -253,4 +253,5 @@ for l = 0, 10, 1 do
ce_last = softmaxL.total_ce
end
--[[end training]]--
-``` \ No newline at end of file
+```
+
diff --git a/nerv/doc/nerv_param.md b/nerv/doc/nerv_param.md
index 167cb11..98793f0 100644
--- a/nerv/doc/nerv_param.md
+++ b/nerv/doc/nerv_param.md
@@ -1,17 +1,17 @@
-#The Nerv Parameter Package#
+# The Nerv Parameter Package
Part of the [Nerv](../README.md) toolkit.
-##Description##
-###Class hierarchy###
+## Description
+### Class hierarchy
There is a base class __Nerv.Param__ defined in `layer/init.lua`.
-###Class hierarchy and their members###
+### Class hierarchy and their members
* __nerv.MatrixParam__ inherits __nerv.Param__
* `Matrix trans` stores the parameter matrix.
* __nerv.LinearTransParam__ inherits __Nerv.MatrixParam__.
* __Nerv.BiasParam__ inherits __Nerv.MatrixParam__.
-##Methods##
+## Methods
* __void Param.\_\_init(Param self, string id, table global_conf)__
Constructor of a __Param__, it will set `self.id` to be `id` and `self.gconf` to be `global_conf`.
* __void Param.set_info(Param self, table info)__
diff --git a/nerv/examples/asr_trainer.lua b/nerv/examples/asr_trainer.lua
index 3fa2653..5bf28bd 100644
--- a/nerv/examples/asr_trainer.lua
+++ b/nerv/examples/asr_trainer.lua
@@ -1,17 +1,33 @@
-function build_trainer(ifname)
- local param_repo = nerv.ParamRepo()
- param_repo:import(ifname, nil, gconf)
- local layer_repo = make_layer_repo(param_repo)
- local network = get_network(layer_repo)
- local global_transf = get_global_transf(layer_repo)
- local input_order = get_input_order()
+require 'lfs'
+require 'pl'
+local function build_trainer(ifname)
+ local host_param_repo = nerv.ParamRepo()
local mat_type
+ local src_loc_type
+ local train_loc_type
+ host_param_repo:import(ifname, nil, gconf)
if gconf.use_cpu then
mat_type = gconf.mmat_type
+ src_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
+ train_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
else
mat_type = gconf.cumat_type
+ src_loc_type = nerv.ParamRepo.LOC_TYPES.ON_HOST
+ train_loc_type = nerv.ParamRepo.LOC_TYPES.ON_DEVICE
end
- local iterative_trainer = function (prefix, scp_file, bp)
+ local param_repo = host_param_repo:copy(train_loc_type)
+ local layer_repo = make_layer_repo(param_repo)
+ local network = get_network(layer_repo)
+ local global_transf = get_global_transf(layer_repo)
+ local input_order = get_input_order()
+ local iterative_trainer = function (prefix, scp_file, bp, rebind_param_repo)
+ -- rebind the params if necessary
+ if rebind_param_repo then
+ host_param_repo = rebind_param_repo
+ param_repo = host_param_repo:copy(train_loc_type)
+ layer_repo:rebind(param_repo)
+ rebind_param_repo = nil
+ end
gconf.randomize = bp
-- build buffer
local buffer = make_buffer(make_readers(scp_file, layer_repo))
@@ -64,61 +80,193 @@ function build_trainer(ifname)
print_stat(layer_repo)
mat_type.print_profile()
mat_type.clear_profile()
- if (not bp) and prefix ~= nil then
- nerv.info("writing back...")
- local fname = string.format("%s_cv%.3f.nerv",
- prefix, get_accuracy(layer_repo))
- network:get_params():export(fname, nil)
+ local fname
+ if (not bp) then
+ host_param_repo = param_repo:copy(src_loc_type)
+ if prefix ~= nil then
+ nerv.info("writing back...")
+ fname = string.format("%s_cv%.3f.nerv",
+ prefix, get_accuracy(layer_repo))
+ host_param_repo:export(fname, nil)
+ end
end
- return get_accuracy(layer_repo)
+ return get_accuracy(layer_repo), host_param_repo, fname
end
return iterative_trainer
end
-dofile(arg[1])
-start_halving_inc = 0.5
-halving_factor = 0.6
-end_halving_inc = 0.1
-min_iter = 1
-max_iter = 20
-min_halving = 5
-gconf.batch_size = 256
-gconf.buffer_size = 81920
+local function check_and_add_defaults(spec, opts)
+ local function get_opt_val(k)
+ return opts[string.gsub(k, '_', '-')].val
+ end
+ local opt_v = get_opt_val("resume_from")
+ if opt_v then
+ gconf = dofile(opt_v)
+ else
+ for k, v in pairs(spec) do
+ local opt_v = get_opt_val(k)
+ if opt_v ~= nil then
+ gconf[k] = opt_v
+ elseif gconf[k] ~= nil then
+ elseif v ~= nil then
+ gconf[k] = v
+ end
+ end
+ end
+end
-local pf0 = gconf.initialized_param
-local trainer = build_trainer(pf0)
---local trainer = build_trainer("c3.nerv")
-local accu_best = trainer(nil, gconf.cv_scp, false)
-local do_halving = false
-
-nerv.info("initial cross validation: %.3f", accu_best)
-for i = 1, max_iter do
- nerv.info("[NN] begin iteration %d with lrate = %.6f", i, gconf.lrate)
- local accu_tr = trainer(nil, gconf.tr_scp, true)
- nerv.info("[TR] training set %d: %.3f", i, accu_tr)
- local accu_new = trainer(
- string.format("%s_%s_iter_%d_lr%f_tr%.3f",
- string.gsub(