From 3362020a6bc43766a92882abe6d127c8bb98a628 Mon Sep 17 00:00:00 2001 From: Determinant Date: Mon, 15 Feb 2016 15:04:13 +0800 Subject: try a basic merge --- Makefile | 4 + nerv/Makefile | 13 +- nerv/config.ld | 8 + nerv/doc/lua/index.html | 71 ++ nerv/doc/lua/ldoc_pale.css | 304 +++++ nerv/doc/lua/modules/layer.affine.html | 145 +++ nerv/doc/lua/modules/matrix.html | 409 +++++++ nerv/doc/lua/modules/nerv.html | 428 +++++++ nerv/examples/asr_trainer.lua | 18 +- nerv/init.lua | 64 +- nerv/io/sgd_buffer.lua | 31 +- nerv/layer/affine.lua | 56 +- nerv/layer/bias.lua | 2 +- nerv/layer/combiner.lua | 13 +- nerv/layer/mse.lua | 17 +- nerv/layer/softmax_ce.lua | 11 +- nerv/layer/window.lua | 2 +- nerv/lib/common.c | 1 + nerv/lib/io/chunk_file.c | 3 +- nerv/lib/matrix/cukernel.h | 2 +- nerv/lib/matrix/generic/cukernel.cu | 6 + nerv/lib/matrix/generic/cumatrix.c | 2 +- nerv/lib/matrix/generic/mmatrix.c | 505 ++++++++- nerv/lib/matrix/generic/mmatrix.h | 45 +- nerv/lib/matrix/matrix.h | 2 + nerv/lib/matrix/mmatrix.c | 5 + nerv/matrix/generic/cumatrix.c | 294 +---- nerv/matrix/generic/matrix.c | 213 ++++ nerv/matrix/generic/mmatrix.c | 60 +- nerv/matrix/init.lua | 57 +- nerv/matrix/mmatrix.c | 3 + nerv/nerv-scm-1.rockspec | 2 +- nerv/nn/layer_dag.lua | 9 +- nerv/nn/param_repo.lua | 6 +- nerv/test/cumatrix_func.lua | 2 + nerv/test/cumatrix_func.out | 1952 ++++++++++++++++++++++++++++++++ nerv/test/matrix_func.lua | 168 +++ nerv/test/mmatrix_func.lua | 2 + nerv/test/mmatrix_func.out | 1952 ++++++++++++++++++++++++++++++++ 39 files changed, 6540 insertions(+), 347 deletions(-) create mode 100644 nerv/config.ld create mode 100644 nerv/doc/lua/index.html create mode 100644 nerv/doc/lua/ldoc_pale.css create mode 100644 nerv/doc/lua/modules/layer.affine.html create mode 100644 nerv/doc/lua/modules/matrix.html create mode 100644 nerv/doc/lua/modules/nerv.html create mode 100644 nerv/test/cumatrix_func.lua create mode 100644 nerv/test/cumatrix_func.out create mode 100644 nerv/test/matrix_func.lua create mode 100644 nerv/test/mmatrix_func.lua create mode 100644 nerv/test/mmatrix_func.out diff --git a/Makefile b/Makefile index 72a5915..0982295 100644 --- a/Makefile +++ b/Makefile @@ -11,5 +11,9 @@ install: speech: cd speech/speech_utils; $(PREFIX)/bin/luarocks make cd speech/htk_io; $(PREFIX)/bin/luarocks make + cd speech/kaldi_io; $(PREFIX)/bin/luarocks make clean: cd nerv && make clean + cd speech/speech_utils && make clean + cd speech/htk_io && make clean + cd speech/kaldi_io && make clean diff --git a/nerv/Makefile b/nerv/Makefile index 5c329f9..0c6b380 100644 --- a/nerv/Makefile +++ b/nerv/Makefile @@ -32,11 +32,11 @@ LIBS := $(INST_LIBDIR)/libnerv.so $(LIB_PATH)/libnervcore.so $(LIB_PATH)/libluaT LUA_LIBS := matrix/init.lua io/init.lua init.lua \ layer/init.lua layer/affine.lua layer/sigmoid.lua layer/tanh.lua layer/softmax_ce.lua layer/softmax.lua \ layer/window.lua layer/bias.lua layer/combiner.lua layer/mse.lua layer/affine_recurrent.lua \ - layer/elem_mul.lua layer/gate_fff.lua \ + layer/elem_mul.lua layer/gate_fff.lua \ nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \ io/sgd_buffer.lua \ - tnn/init.lua tnn/layer_dag_t.lua tnn/sutil.lua tnn/tnn.lua \ - tnn/layersT/dropout_t.lua tnn/layersT/lstm_t.lua tnn/layersT/softmax_ce_t.lua + tnn/init.lua tnn/layer_dag_t.lua tnn/sutil.lua tnn/tnn.lua \ + tnn/layersT/dropout_t.lua tnn/layersT/lstm_t.lua tnn/layersT/softmax_ce_t.lua INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK #CUDA_BASE := /usr/local/cuda-7.0 @@ -45,9 +45,8 @@ CUDA_INCLUDE := -I $(CUDA_BASE)/include/ INCLUDE += $(CUDA_INCLUDE) LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcublas -lcurand -CFLAGS := -Wall -Wextra -O2 NVCC := $(CUDA_BASE)/bin/nvcc -NVCC_FLAGS := -Xcompiler -fPIC,-Wall,-Wextra +NVCC_FLAGS := -Xcompiler -fPIC,-Wextra LUA_LIBS := $(addprefix $(LUA_DIR)/,$(LUA_LIBS)) @@ -58,12 +57,12 @@ $(OBJ_DIR) $(LUA_DIR) $(OBJ_SUBDIR) $(LUA_SUBDIR) $(INC_SUBDIR): $(OBJ_DIR)/%.o: %.c $(patsubst /%.o,/%.c,$@) gcc -c -o $@ $< $(INCLUDE) -fPIC $(CFLAGS) $(OBJ_DIR)/lib/matrix/cukernel.o: lib/matrix/cukernel.cu - $(NVCC) -c -o $@ $< $(INCLUDE) $(NVCC_FLAGS) $(CFLAGS) + $(NVCC) -c -o $@ $< $(INCLUDE) $(NVCC_FLAGS) $(LUA_DIR)/%.lua: %.lua cp $< $@ $(LIB_PATH)/libnervcore.so: $(CORE_OBJS) - gcc -shared -o $@ $^ $(LDFLAGS) + gcc -shared -o $@ $^ $(LDFLAGS) -lcblas $(LIB_PATH)/libluaT.so: $(LUAT_OBJS) gcc -shared -o $@ $^ $(LDFLAGS) $(INST_LIBDIR)/libnerv.so: $(NERV_OBJS) $(LIB_PATH)/libnervcore.so $(LIB_PATH)/libluaT.so diff --git a/nerv/config.ld b/nerv/config.ld new file mode 100644 index 0000000..f60b53f --- /dev/null +++ b/nerv/config.ld @@ -0,0 +1,8 @@ +project = 'NERV' +title = 'NERV documentation' +description = 'A Lua-based toolkit for high-performance deep learning' +backtick_references = true +file = {'./', exclude = {'./lib/luaT'}} +dir = 'doc/lua' +style = '!pale' +kind_names = {topic = 'Manual',script = 'Programs'} diff --git a/nerv/doc/lua/index.html b/nerv/doc/lua/index.html new file mode 100644 index 0000000..56212cc --- /dev/null +++ b/nerv/doc/lua/index.html @@ -0,0 +1,71 @@ + + + + + NERV documentation + + + + +
+ +
+ +
+
+
+ + +
+ + + + + + +
+ + +

A Lua-based toolkit for high-performance deep learning

+ +

Modules

+ + + + + + + + + + + + + +
nervNERV: a Lua-based toolkit for high-performance deep learning.
layer.affineParameter and layer classes related to linear transform.
matrixImplements a fraction of matrix operations (methods) in Lua, while + others are implemented in C extension.
+ +
+
+
+generated by LDoc 1.4.3 +Last updated 2016-01-15 14:56:30 +
+
+ + diff --git a/nerv/doc/lua/ldoc_pale.css b/nerv/doc/lua/ldoc_pale.css new file mode 100644 index 0000000..b071110 --- /dev/null +++ b/nerv/doc/lua/ldoc_pale.css @@ -0,0 +1,304 @@ +/* BEGIN RESET + +Copyright (c) 2010, Yahoo! Inc. All rights reserved. +Code licensed under the BSD License: +http://developer.yahoo.com/yui/license.html +version: 2.8.2r1 +*/ +html { + color: #000; + background: #FFF; +} +body,div,dl,dt,dd,ul,ol,li,h1,h2,h3,h4,h5,h6,pre,code,form,fieldset,legend,input,button,textarea,p,blockquote,th,td { + margin: 0; + padding: 0; +} +table { + border-collapse: collapse; + border-spacing: 0; +} +fieldset,img { + border: 0; +} +address,caption,cite,code,dfn,em,strong,th,var,optgroup { + font-style: inherit; + font-weight: inherit; +} +del,ins { + text-decoration: none; +} +li { + list-style: disc; + margin-left: 20px; +} +caption,th { + text-align: left; +} +h1,h2,h3,h4,h5,h6 { + font-size: 100%; + font-weight: bold; +} +q:before,q:after { + content: ''; +} +abbr,acronym { + border: 0; + font-variant: normal; +} +sup { + vertical-align: baseline; +} +sub { + vertical-align: baseline; +} +legend { + color: #000; +} +input,button,textarea,select,optgroup,option { + font-family: inherit; + font-size: inherit; + font-style: inherit; + font-weight: inherit; +} +input,button,textarea,select {*font-size:100%; +} +/* END RESET */ + +body { + margin-left: 1em; + margin-right: 1em; + font-family: arial, helvetica, geneva, sans-serif; + background-color: #ffffff; margin: 0px; +} + +code, tt { font-family: monospace; font-size: 1.1em; } +span.parameter { font-family:monospace; } +span.parameter:after { content:":"; } +span.types:before { content:"("; } +span.types:after { content:")"; } +.type { font-weight: bold; font-style:italic } + +body, p, td, th { font-size: .95em; line-height: 1.2em;} + +p, ul { margin: 10px 0 0 0px;} + +strong { font-weight: bold;} + +em { font-style: italic;} + +h1 { + font-size: 1.5em; + margin: 0 0 20px 0; +} +h2, h3, h4 { margin: 15px 0 10px 0; } +h2 { font-size: 1.25em; } +h3 { font-size: 1.15em; } +h4 { font-size: 1.06em; } + +a:link { font-weight: bold; color: #004080; text-decoration: none; } +a:visited { font-weight: bold; color: #006699; text-decoration: none; } +a:link:hover { text-decoration: underline; } + +hr { + color:#cccccc; + background: #00007f; + height: 1px; +} + +blockquote { margin-left: 3em; } + +ul { list-style-type: disc; } + +p.name { + font-family: "Andale Mono", monospace; + padding-top: 1em; +} + +pre { + background-color: rgb(245, 245, 245); + border: 1px solid #C0C0C0; /* silver */ + padding: 10px; + margin: 10px 0 10px 0; + overflow: auto; + font-family: "Andale Mono", monospace; +} + +pre.example { + font-size: .85em; +} + +table.index { border: 1px #00007f; } +table.index td { text-align: left; vertical-align: top; } + +#container { + margin-left: 1em; + margin-right: 1em; + background-color: #ffffff; +} + +#product { + text-align: center; + border-bottom: 1px solid #cccccc; + background-color: #ffffff; +} + +#product big { + font-size: 2em; +} + +#main { + background-color:#FFFFFF; // #f0f0f0; + //border-left: 2px solid #cccccc; +} + +#navigation { + float: left; + width: 14em; + vertical-align: top; + background-color:#FFFFFF; // #f0f0f0; + border-right: 2px solid #cccccc; + overflow: visible; +} + +#navigation h2 { + background-color:#FFFFFF;//:#e7e7e7; + font-size:1.1em; + color:#000000; + text-align: left; + padding:0.2em; + //border-top:1px solid #dddddd; + border-bottom:1px solid #dddddd; +} + +#navigation ul +{ + font-size:1em; + list-style-type: none; + margin: 1px 1px 10px 1px; +} + +#navigation li { + text-indent: -1em; + display: block; + margin: 3px 0px 0px 22px; +} + +#navigation li li a { + margin: 0px 3px 0px -1em; +} + +#content { + margin-left: 14em; + padding: 1em; + width: 700px; + border-left: 2px solid #cccccc; + // border-right: 2px solid #cccccc; + background-color: #ffffff; +} + +#about { + clear: both; + padding: 5px; + border-top: 2px solid #cccccc; + background-color: #ffffff; +} + +@media print { + body { + font: 12pt "Times New Roman", "TimeNR", Times, serif; + } + a { font-weight: bold; color: #004080; text-decoration: underline; } + + #main { + background-color: #ffffff; + border-left: 0px; + } + + #container { + margin-left: 2%; + margin-right: 2%; + background-color: #ffffff; + } + + #content { + padding: 1em; + background-color: #ffffff; + } + + #navigation { + display: none; + } + pre.example { + font-family: "Andale Mono", monospace; + font-size: 10pt; + page-break-inside: avoid; + } +} + +table.module_list { + border-width: 1px; + border-style: solid; + border-color: #cccccc; + border-collapse: collapse; +} +table.module_list td { + border-width: 1px; + padding: 3px; + border-style: solid; + border-color: #cccccc; +} +table.module_list td.name { background-color: #f0f0f0; ; min-width: 200px; } +table.module_list td.summary { width: 100%; } + +table.function_list { + border-width: 1px; + border-style: solid; + border-color: #cccccc; + border-collapse: collapse; +} +table.function_list td { + border-width: 1px; + padding: 3px; + border-style: solid; + border-color: #cccccc; +} +table.function_list td.name { background-color: #f6f6ff; ; min-width: 200px; } +table.function_list td.summary { width: 100%; } + +dl.table dt, dl.function dt {border-top: 1px solid #ccc; padding-top: 1em;} +dl.table dd, dl.function dd {padding-bottom: 1em; margin: 10px 0 0 20px;} +dl.table h3, dl.function h3 {font-size: .95em;} + +ul.nowrap { + overflow:auto; + whitespace:nowrap; +} + +/* stop sublists from having initial vertical space */ +ul ul { margin-top: 0px; } +ol ul { margin-top: 0px; } +ol ol { margin-top: 0px; } +ul ol { margin-top: 0px; } + +/* make the target distinct; helps when we're navigating to a function */ +a:target + * { + background-color: #FF9; +} + + +/* styles for prettification of source */ +pre .comment { color: #558817; } +pre .constant { color: #a8660d; } +pre .escape { color: #844631; } +pre .keyword { color: #aa5050; font-weight: bold; } +pre .library { color: #0e7c6b; } +pre .marker { color: #512b1e; background: #fedc56; font-weight: bold; } +pre .string { color: #8080ff; } +pre .number { color: #f8660d; } +pre .operator { color: #2239a8; font-weight: bold; } +pre .preprocessor, pre .prepro { color: #a33243; } +pre .global { color: #800080; } +pre .user-keyword { color: #800080; } +pre .prompt { color: #558817; } +pre .url { color: #272fc2; text-decoration: underline; } + diff --git a/nerv/doc/lua/modules/layer.affine.html b/nerv/doc/lua/modules/layer.affine.html new file mode 100644 index 0000000..817fc37 --- /dev/null +++ b/nerv/doc/lua/modules/layer.affine.html @@ -0,0 +1,145 @@ + + + + + NERV documentation + + + + +
+ +
+ +
+
+
+ + +
+ + + + + + +
+ +

Module layer.affine

+

Parameter and layer classes related to linear transform.

+

+ + +

Class nerv.MatrixParam

+ + + + + +
MatrixParam:read (handle)Read from a file handle.
+

Class nerv.AffineLayer

+ + + + + +
AffineLayer:__init (id, global_conf, layer_conf)The constructor.
+ +
+
+ + +

Class nerv.MatrixParam

+ +
+ A parameter that consists of a single matrix +
+
+
+ + MatrixParam:read (handle) +
+
+ Read from a file handle. + + +

Parameters:

+
    +
  • handle + the file handle +
  • +
+ + + + + +
+
+

Class nerv.AffineLayer

+ +
+ A fully-connected linear transform layer. +
+
+
+ + AffineLayer:__init (id, global_conf, layer_conf) +
+
+ The constructor. + + +

Parameters:

+
    +
  • id + +
  • +
  • global_conf + +
  • +
  • layer_conf + +
  • +
+ + + + + +
+
+ + +
+
+
+generated by LDoc 1.4.3 +Last updated 2016-01-15 14:56:30 +
+
+ + diff --git a/nerv/doc/lua/modules/matrix.html b/nerv/doc/lua/modules/matrix.html new file mode 100644 index 0000000..2840e59 --- /dev/null +++ b/nerv/doc/lua/modules/matrix.html @@ -0,0 +1,409 @@ + + + + + NERV documentation + + + + +
+ +
+ +
+
+
+ + +
+ + + + + + +
+ +

Module matrix

+

Implements a fraction of matrix operations (methods) in Lua, while + others are implemented in C extension.

+

+

Info:

+
    +
  • Author: Ted Yin
  • +
+ + +

Class nerv.Matrix

+ + + + + + + + + + + + + + + + + + + + + + + + + +
nerv.Matrix:__tostring__ ()Convert the matrix object to a string.
nerv.Matrix:generate (gen)Assign each element in a matrix using the value returned by a callback gen.
nerv.Matrix:create (nrow, ncol)Create a fresh new matrix of the same matrix type (as self).
nerv.Matrix:__add__ (b)Operator overloading of +.
nerv.Matrix:__sub__ (b)Operator overloading of -.
nerv.Matrix:__mul__ (b)Operator overloading of *.
+

Class nerv.CuMatrixFloat

+ + + + + + + + + +
nerv.CuMatrixFloat.new_from_host (mat)Create a CUDA matrix copy of the host matrix (in memory)
nerv.CuMatrixFloat:new_to_host ()Create a host matrix copy of the CUDA matrix
+

Class nerv.CuMatrixDouble

+ + + + + + + + + +
nerv.CuMatrixDouble.new_from_host (mat)Create a CUDA matrix copy of the host matrix (in memory)
nerv.CuMatrixDouble:new_to_host ()Create a host matrix copy of the CUDA matrix
+

Class nerv.MMatrix

+ + + + + + + + + + + + + +
nerv.MMatrix:copy_toh (b, ...)A wrapper function for copy_fromh
nerv.MMatrix.print_profile ()Print profiling info of host matrices
nerv.MMatrix.clear_profile ()Clear profiling info of host matrices
+ +
+
+ + +

Class nerv.Matrix

+ +
+ The base class for all matrices. +
+
+
+ + nerv.Matrix:__tostring__ () +
+
+ Convert the matrix object to a string. + + + + + + + +
+
+ + nerv.Matrix:generate (gen) +
+
+ Assign each element in a matrix using the value returned by a callback gen. + + +

Parameters:

+
    +
  • gen + the callback used to generated the values in the matrix, to which + the indices of row and column will be passed (e.g., gen(i, j)) +
  • +
+ + + + + +
+
+ + nerv.Matrix:create (nrow, ncol) +
+
+ Create a fresh new matrix of the same matrix type (as self). + + +

Parameters:

+
    +
  • nrow + optional, the number of rows in the created matrix if specified, + otherwise self:nrow() will be used +
  • +
  • ncol + optional, the number of columns in the created matrix if specified, + otherwise self:ncol() will be used +
  • +
+ + + + + +
+
+ + nerv.Matrix:__add__ (b) +
+
+ Operator overloading of +. + + +

Parameters:

+
    +
  • b + +
  • +
+ + + + + +
+
+ + nerv.Matrix:__sub__ (b) +
+
+ Operator overloading of -. + + +

Parameters:

+
    +
  • b + +
  • +
+ + + + + +
+
+ + nerv.Matrix:__mul__ (b) +
+
+ Operator overloading of *. + + +

Parameters:

+
    +
  • b + +
  • +
+ + + + + +
+
+

Class nerv.CuMatrixFloat

+ +
+ CUDA float matrices +
+
+
+ + nerv.CuMatrixFloat.new_from_host (mat) +
+
+ Create a CUDA matrix copy of the host matrix (in memory) + + +

Parameters:

+
    +
  • mat + the host matrix +
  • +
+ + + + + +
+
+ + nerv.CuMatrixFloat:new_to_host () +
+
+ Create a host matrix copy of the CUDA matrix + + + + + + + +
+
+

Class nerv.CuMatrixDouble

+ +
+ CUDA double matrices +
+
+
+ + nerv.CuMatrixDouble.new_from_host (mat) +
+
+ Create a CUDA matrix copy of the host matrix (in memory) + + +

Parameters:

+
    +
  • mat + the host matrix +
  • +
+ + + + + +
+
+ + nerv.CuMatrixDouble:new_to_host () +
+
+ Create a host matrix copy of the CUDA matrix + + + + + + + +
+
+

Class nerv.MMatrix

+ +
+ The base class for all host (in-memory) matrices +
+
+
+ + nerv.MMatrix:copy_toh (b, ...) +
+
+ A wrapper function for copy_fromh + + +

Parameters:

+
    +
  • b + +
  • +
  • ... + +
  • +
+ + + + + +
+
+ + nerv.MMatrix.print_profile () +
+
+ Print profiling info of host matrices + + + + + + + +
+
+ + nerv.MMatrix.clear_profile () +
+
+ Clear profiling info of host matrices + + + + + + + +
+
+ + +
+
+
+generated by LDoc 1.4.3 +Last updated 2016-01-15 14:56:30 +
+
+ + diff --git a/nerv/doc/lua/modules/nerv.html b/nerv/doc/lua/modules/nerv.html new file mode 100644 index 0000000..91baf9e --- /dev/null +++ b/nerv/doc/lua/modules/nerv.html @@ -0,0 +1,428 @@ + + + + + NERV documentation + + + + +
+ +
+ +
+
+
+ + +
+ + + + + + +
+ +

Module nerv

+

NERV: a Lua-based toolkit for high-performance deep learning.

+

+ This file contains misc utility functions of NERV and finally initializes + NERV by including init.lua of other basic modules.

+

Info:

+
    +
  • Author: Ted Yin
  • +
+ + +

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
error_method_not_implemented ()Dummy function.
sprintf (fmt, ...)Format a string just like sprintf in C.
printf (fmt, ...)Print a formatted string to stdout.
error (fmt, ...)Raise an global error with the formatted message.
info (fmt, ...)Print a notification message that begins with "info" and a timestamp.
warning (fmt, ...)Print a warning message that begins with "warning" and a timestamp.
class (tname, parenttname)Create a class (Torch-compatible).
table.tostring (tbl)Get the string representation of a table, which can be executed as a valid + piece of Lua code.
get_type (tname)Get the class by name.
is_type (obj, tname)Check if the object is of the certain class.
dirname (filename)Strip last component from file name.
include (filename)Include a script file (chunk) into the current script.
+ +
+
+ + +

Functions

+ +
+
+ + error_method_not_implemented () +
+
+ Dummy function. + Display a friendly error message when user attempts to invoke a + non-implemented function. + + + + + + + +
+
+ + sprintf (fmt, ...) +
+
+ Format a string just like sprintf in C. + + +

Parameters:

+
    +
  • fmt + the format string +
  • +
  • ... + args, the data to be formatted +
  • +
+ +

Returns:

+
    + + the formatted string +
+ + + + +
+
+ + printf (fmt, ...) +
+
+ Print a formatted string to stdout. + + +

Parameters:

+
    +
  • fmt + the format string +
  • +
  • ... + args, the data to be formatted +
  • +
+ + + + + +
+
+ + error (fmt, ...) +
+
+ Raise an global error with the formatted message. + + +

Parameters:

+
    +
  • fmt + the format string +
  • +
  • ... + args, the data to be formatted +
  • +
+ + + + + +
+
+ + info (fmt, ...) +
+
+ Print a notification message that begins with "info" and a timestamp. + Instead of using nerv.printf, normal users should use this to print any + notification information. + + +

Parameters:

+
    +
  • fmt + the format string +
  • +
  • ... + args, the data to be formatted +
  • +
+ + + + + +
+
+ + warning (fmt, ...) +
+
+ Print a warning message that begins with "warning" and a timestamp. + Instead of using nerv.printf, normal users should use this to print any + warnings. + + +

Parameters:

+
    +
  • fmt + the format string +
  • +
  • ... + args, the data to be formatted +
  • +
+ + + + + +
+
+ + class (tname, parenttname) +
+
+ Create a class (Torch-compatible). + Use this to create a class in NERV. + + +

Parameters:

+
    +
  • tname + the class name +
  • +
  • parenttname + the parent class name (from which it inherits) +
  • +
+ +

Returns:

+
    + + the created class +
+ + + + +
+
+ + table.tostring (tbl) +
+
+ Get the string representation of a table, which can be executed as a valid + piece of Lua code. + + +

Parameters:

+
    +
  • tbl + the table +
  • +
+ +

Returns:

+
    + + the string representation which will result in a Lua table entity + when evaluated +
+ + + + +
+
+ + get_type (tname) +
+
+ Get the class by name. + + +

Parameters:

+
    +
  • tname + the name of the class +
  • +
+ +

Returns:

+
    + + the class entity +
+ + + + +
+
+ + is_type (obj, tname) +
+
+ Check if the object is of the certain class. + + +

Parameters:

+
    +
  • obj + the object ("class instance") +
  • +
  • tname + the class name ("type name") +
  • +
+ + + + + +
+
+ + dirname (filename) +
+
+ Strip last component from file name. + + +

Parameters:

+
    +
  • filename + the path to a file +
  • +
+ +

Returns:

+
    + + the path to the containing directory +
+ + + + +
+
+ + include (filename) +
+
+ Include a script file (chunk) into the current script. + An analogy to #include in C. Note that the effect is the same as executing + dofile(filename) at the current line. + + +

Parameters:

+
    +
  • filename + the path to a file +
  • +
+ +

Returns:

+
    + + all values returned by the chunk +
+ + + + +
+
+ + +
+
+
+generated by LDoc 1.4.3 +Last updated 2016-01-15 14:56:30 +
+
+ + diff --git a/nerv/examples/asr_trainer.lua b/nerv/examples/asr_trainer.lua index 69cfeed..3fa2653 100644 --- a/nerv/examples/asr_trainer.lua +++ b/nerv/examples/asr_trainer.lua @@ -5,6 +5,12 @@ function build_trainer(ifname) local network = get_network(layer_repo) local global_transf = get_global_transf(layer_repo) local input_order = get_input_order() + local mat_type + if gconf.use_cpu then + mat_type = gconf.mmat_type + else + mat_type = gconf.cumat_type + end local iterative_trainer = function (prefix, scp_file, bp) gconf.randomize = bp -- build buffer @@ -12,15 +18,15 @@ function build_trainer(ifname) -- initialize the network network:init(gconf.batch_size) gconf.cnt = 0 - err_input = {nerv.CuMatrixFloat(gconf.batch_size, 1)} + err_input = {mat_type(gconf.batch_size, 1)} err_input[1]:fill(1) for data in buffer.get_data, buffer do -- prine stat periodically gconf.cnt = gconf.cnt + 1 if gconf.cnt == 1000 then print_stat(layer_repo) - nerv.CuMatrix.print_profile() - nerv.CuMatrix.clear_profile() + mat_type.print_profile() + mat_type.clear_profile() gconf.cnt = 0 -- break end @@ -42,7 +48,7 @@ function build_trainer(ifname) end table.insert(input, transformed) end - local output = {nerv.CuMatrixFloat(gconf.batch_size, 1)} + local output = {mat_type(gconf.batch_size, 1)} err_output = {} for i = 1, #input do table.insert(err_output, input[i]:create()) @@ -56,8 +62,8 @@ function build_trainer(ifname) collectgarbage("collect") end print_stat(layer_repo) - nerv.CuMatrix.print_profile() - nerv.CuMatrix.clear_profile() + mat_type.print_profile() + mat_type.clear_profile() if (not bp) and prefix ~= nil then nerv.info("writing back...") local fname = string.format("%s_cv%.3f.nerv", diff --git a/nerv/init.lua b/nerv/init.lua index b5d20a2..6312df1 100644 --- a/nerv/init.lua +++ b/nerv/init.lua @@ -1,40 +1,67 @@ +--- NERV: a Lua-based toolkit for high-performance deep learning. +-- This file contains misc utility functions of NERV and finally initializes +-- NERV by including `init.lua` of other basic modules. +-- @author Ted Yin +-- @module nerv + require 'libnerv' +--- Dummy function. +-- Display a friendly error message when user attempts to invoke a +-- non-implemented function. function nerv.error_method_not_implemented() nerv.error("method not implemented"); end +--- Format a string just like `sprintf` in C. +-- @param fmt the format string +-- @param ... args, the data to be formatted +-- @return the formatted string function nerv.sprintf(fmt, ...) return string.format(fmt, ...) end +--- Print a formatted string to stdout. +-- @param fmt the format string +-- @param ... args, the data to be formatted function nerv.printf(fmt, ...) io.write(nerv.sprintf(fmt, ...)) end +--- Raise an global error with the formatted message. +-- @param fmt the format string +-- @param ... args, the data to be formatted function nerv.error(fmt, ...) error(nerv.sprintf("[nerv] internal error: " .. fmt .. "\n", ...)) end -function nerv.mesg_with_timestamp(fmt, ...) - nerv.printf( - string.format("(%s)[nerv] info: %s\n", - os.date("%H:%M:%S %F"), fmt), ...) -end - +--- Print a notification message that begins with "info" and a timestamp. +-- Instead of using `nerv.printf`, normal users should use this to print any +-- notification information. +-- @param fmt the format string +-- @param ... args, the data to be formatted function nerv.info(fmt, ...) nerv.printf( string.format("(%s)[nerv] info: %s\n", os.date("%H:%M:%S %F"), fmt), ...) end +--- Print a warning message that begins with "warning" and a timestamp. +-- Instead of using `nerv.printf`, normal users should use this to print any +-- warnings. +-- @param fmt the format string +-- @param ... args, the data to be formatted function nerv.warning(fmt, ...) nerv.printf( string.format("(%s)[nerv] warning: %s\n", os.date("%H:%M:%S %F"), fmt), ...) end --- Torch C API wrapper +--- Create a class (Torch-compatible). +-- Use this to create a class in NERV. +-- @param tname the class name +-- @param parenttname the parent class name (from which it inherits) +-- @return the created class function nerv.class(tname, parenttname) local function constructor(...) @@ -81,6 +108,11 @@ function table.key_to_str (k) end end +--- Get the string representation of a table, which can be executed as a valid +-- piece of Lua code. +-- @param tbl the table +-- @return the string representation which will result in a Lua table entity +-- when evaluated function table.tostring(tbl) local result, done = {}, {} for k, v in ipairs(tbl) do @@ -96,10 +128,16 @@ function table.tostring(tbl) return "{" .. table.concat(result, ",") .. "}" end +--- Get the class by name. +-- @param tname the name of the class +-- @return the class entity function nerv.get_type(tname) return assert(loadstring("return " .. tname))() end +--- Check if the object is of the certain class. +-- @param obj the object ("class instance") +-- @param tname the class name ("type name") function nerv.is_type(obj, tname) local mt0 = nerv.getmetatable(tname) local mt = getmetatable(obj) @@ -112,6 +150,9 @@ function nerv.is_type(obj, tname) return false end +--- Strip last component from file name. +-- @param filename the path to a file +-- @return the path to the containing directory function nerv.dirname(filename) if filename:match(".-/.-") then local name = string.gsub(filename, "(.*/)(.*)", "%1") @@ -121,11 +162,18 @@ function nerv.dirname(filename) end end +--- Include a script file (chunk) into the current script. +-- An analogy to `#include` in C. Note that the effect is the same as executing +-- `dofile(filename)` at the current line. +-- @param filename the path to a file +-- @return all values returned by the chunk function nerv.include(filename) local caller = debug.getinfo(2, "S").source:sub(2) - dofile(nerv.dirname(caller) .. filename) + return dofile(nerv.dirname(caller) .. filename) end +-- the following lines trigger the initialization of basic modules + nerv.include('matrix/init.lua') nerv.include('io/init.lua') nerv.include('layer/init.lua') diff --git a/nerv/io/sgd_buffer.lua b/nerv/io/sgd_buffer.lua index 74c4934..3cf4f5a 100644 --- a/nerv/io/sgd_buffer.lua +++ b/nerv/io/sgd_buffer.lua @@ -7,21 +7,38 @@ function SGDBuffer:__init(global_conf, buffer_conf) self.randomize = buffer_conf.randomize self.consume = buffer_conf.consume local cumat_type = global_conf.cumat_type + if self.gconf.use_cpu then + self.output_mat_type = self.gconf.mmat_type + else + self.output_mat_type = self.gconf.cumat_type + end if buffer_conf.use_gpu then self.mat_type = cumat_type - self.copy_rows_from_by_idx = cumat_type.copy_rows_fromd_by_idx - self.copy_from = cumat_type.copy_fromd - self.copy_from_reader = cumat_type.copy_fromh + if self.gconf.use_cpu then + -- gpu buffer -> cpu training + nerv.error("not implemeted") + else + -- gpu buffer -> gpu training + self.copy_rows_from_by_idx = cumat_type.copy_rows_fromd_by_idx + self.copy_from = cumat_type.copy_fromd + end self.perm_gen = function (x) return cumat_type.new_from_host(nerv.MMatrixFloat.perm_gen(x)) end else self.mat_type = global_conf.mmat_type - self.copy_rows_from_by_idx = cumat_type.copy_rows_fromh_by_idx - self.copy_from = cumat_type.copy_fromh + if self.gconf.use_cpu then + -- cpu buffer -> cpu training + self.copy_rows_from_by_idx = gconf.mmat_type.copy_rows_fromh_by_idx + self.copy_from = gconf.mmat_type.copy_fromh + else + -- cpu buffer -> gpu training + self.copy_rows_from_by_idx = cumat_type.copy_rows_fromh_by_idx + self.copy_from = cumat_type.copy_fromh + end self.perm_gen = nerv.MMatrixFloat.perm_gen - self.copy_from_reader = self.mat_type.copy_from end + self.copy_from_reader = self.mat_type.copy_fromh self.head = 0 self.tail = 0 self.readers = {} @@ -117,7 +134,7 @@ function SGDBuffer:get_data() local res = {} for i, reader in ipairs(self.readers) do for id, buff in pairs(reader.buffs) do - local batch = self.gconf.cumat_type(actual_batch_size, buff.width) + local batch = self.output_mat_type(actual_batch_size, buff.width) if self.randomize then self.copy_rows_from_by_idx(batch, buff.data, self.rand_map, self.head) else diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index 566e9bc..0d4f7dd 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -1,15 +1,28 @@ +--- Parameter and layer classes related to linear transform. + local MatrixParam = nerv.class('nerv.MatrixParam', 'nerv.Param') local LinearTransParam = nerv.class('nerv.LinearTransParam', 'nerv.MatrixParam') local BiasParam = nerv.class('nerv.BiasParam', 'nerv.MatrixParam') local AffineLayer = nerv.class('nerv.AffineLayer', 'nerv.Layer') +--- A parameter that consists of a single matrix +-- @type nerv.MatrixParam + +--- Read from a file handle. +-- @param handle the file handle function MatrixParam:read(handle) - self.trans = self.gconf.cumat_type.new_from_host( - self.gconf.mmat_type.load(handle)) + self.trans = self.gconf.mmat_type.load(handle) + if not self.gconf.use_cpu then + self.trans = self.gconf.cumat_type.new_from_host(self.trans) + end end function MatrixParam:write(handle) - self.trans:new_to_host():save(handle) + local trans = self.trans + if not self.gconf.use_cpu then + trans = self.trans:new_to_host() + end + trans:save(handle) end function MatrixParam:train_init() @@ -59,15 +72,23 @@ function LinearTransParam:update_by_err_input(err, input) self:_update_by_err_input(err, input, l2, l2) end +--- A fully-connected linear transform layer. +-- @type nerv.AffineLayer + +--- The constructor. function AffineLayer:__init(id, global_conf, layer_conf) self.id = id self.dim_in = layer_conf.dim_in self.dim_out = layer_conf.dim_out - self.ltp = self:find_param("ltp", layer_conf, global_conf, nerv.LinearTransParam, {self.dim_in[1], self.dim_out[1]}) --layer_conf.ltp - for i = 2, #self.dim_in do - self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf, nerv.LinearTransParam, {self.dim_in[i], self.dim_out[1]}) + for i = 1, #self.dim_in do + self["ltp" .. i] = self:find_param("ltp" .. i, layer_conf, global_conf, + nerv.LinearTransParam, + {self.dim_in[i], self.dim_out[1]}) end - self.bp = self:find_param("bp", layer_conf, global_conf, nerv.BiasParam, {1, self.dim_out[1]}) --layer_conf.bp + self.ltp = self.ltp1 -- alias of ltp1 + self.bp = self:find_param("bp", layer_conf, global_conf, + nerv.BiasParam, + {1, self.dim_out[1]}) self.gconf = global_conf self:check_dim_len(-1, 1) -- exactly one output, allow multiple inputs end @@ -76,15 +97,7 @@ function AffineLayer:init(batch_size) if self.ltp.trans:ncol() ~= self.bp.trans:ncol() then nerv.error("mismatching dimensions of linear transform and bias paramter") end - self.bp:train_init() - if self.dim_in[1] ~= self.ltp.trans:nrow() then - nerv.error("mismatching dimensions of linear transform parameter and input") - end - if self.dim_out[1] ~= self.ltp.trans:ncol() then - nerv.error("mismatching dimensions of linear transform parameter and output") - end - self.ltp:train_init() - for i = 2, #self.dim_in do + for i = 1, #self.dim_in do if self.dim_in[i] ~= self["ltp" .. i].trans:nrow() then nerv.error("mismatching dimensions of linear transform parameter and input") end @@ -93,6 +106,7 @@ function AffineLayer:init(batch_size) end self["ltp" .. i]:train_init() end + self.bp:train_init() end function AffineLayer:batch_resize(batch_size) @@ -100,24 +114,24 @@ function AffineLayer:batch_resize(batch_size) end function AffineLayer:update(bp_err, input, output) - self.ltp:update_by_err_input(bp_err[1], input[1]) - for i = 2, #self.dim_in do + for i = 1, #self.dim_in do self["ltp" .. i]:update_by_err_input(bp_err[1], input[i]) end self.bp:update_by_gradient(bp_err[1]:colsum()) end function AffineLayer:propagate(input, output) - output[1]:mul(input[1], self.ltp.trans, 1.0, 0.0, 'N', 'N') + -- apply linear transform + output[1]:mul(input[1], self.ltp1.trans, 1.0, 0.0, 'N', 'N') for i = 2, #self.dim_in do output[1]:mul(input[i], self["ltp" .. i].trans, 1.0, 1.0, 'N', 'N') end + -- add bias output[1]:add_row(self.bp.trans, 1.0) end function AffineLayer:back_propagate(bp_err, next_bp_err, input, output) - next_bp_err[1]:mul(bp_err[1], self.ltp.trans, 1.0, 0.0, 'N', 'T') - for i = 2, #self.dim_in do + for i = 1, #self.dim_in do next_bp_err[i]:mul(bp_err[1], self["ltp" .. i].trans, 1.0, 0.0, 'N', 'T') end end diff --git a/nerv/layer/bias.lua b/nerv/layer/bias.lua index 7e9fd46..924c3da 100644 --- a/nerv/layer/bias.lua +++ b/nerv/layer/bias.lua @@ -23,7 +23,7 @@ function BiasLayer:batch_resize(batch_size) end function BiasLayer:propagate(input, output) - output[1]:copy_fromd(input[1]) + output[1]:copy_from(input[1]) output[1]:add_row(self.bias.trans, 1.0) end diff --git a/nerv/layer/combiner.lua b/nerv/layer/combiner.lua index 1bcfdfb..22e89a9 100644 --- a/nerv/layer/combiner.lua +++ b/nerv/layer/combiner.lua @@ -6,6 +6,11 @@ function CombinerLayer:__init(id, global_conf, layer_conf) self.dim_in = layer_conf.dim_in self.dim_out = layer_conf.dim_out self.gconf = global_conf + if self.gconf.use_cpu then + self.mat_type = self.gconf.mmat_type + else + self.mat_type = self.gconf.cumat_type + end self:check_dim_len(#self.lambda, -1) if #self.dim_in < 1 then nerv.error("no input specified") @@ -27,12 +32,12 @@ function CombinerLayer:init(batch_size) nerv.error("mismatching dimensions of inputs/outputs") end end - self.sum = self.gconf.cumat_type(batch_size, dim) + self.sum = self.mat_type(batch_size, dim) end function CombinerLayer:batch_resize(batch_size) if self.sum:nrow() ~= batch_size then - self.sum = self.gconf.cumat_type(batch_size, self.dim_in[1]) + self.sum = self.mat_type(batch_size, self.dim_in[1]) end end @@ -45,13 +50,13 @@ function CombinerLayer:propagate(input, output) output[1]:add(output[1], input[i], 1.0, self.lambda[i]) end for i = 2, #self.dim_out do - output[i]:copy_fromd(output[1]) + output[i]:copy_from(output[1]) end end function CombinerLayer:back_propagate(bp_err, next_bp_err, input, output) local sum = self.sum - sum:copy_fromd(bp_err[1]) + sum:copy_from(bp_err[1]) for i = 2, #self.dim_out do sum:add(sum, bp_err[i], 1.0, 1.0) end diff --git a/nerv/layer/mse.lua b/nerv/layer/mse.lua index 0ee3080..1c218d0 100644 --- a/nerv/layer/mse.lua +++ b/nerv/layer/mse.lua @@ -5,6 +5,11 @@ function MSELayer:__init(id, global_conf, layer_conf) self.dim_in = layer_conf.dim_in self.dim_out = layer_conf.dim_out self.gconf = global_conf + if self.gconf.use_cpu then + self.mat_type = self.gconf.mmat_type + else + self.mat_type = self.gconf.cumat_type + end self:check_dim_len(2, -1) end @@ -15,15 +20,15 @@ function MSELayer:init(batch_size) self.scale = 1 / self.dim_in[1] self.total_mse = 0.0 self.total_frames = 0 - self.mse = self.gconf.cumat_type(batch_size, self.dim_in[1]) - self.mse_sum = self.gconf.cumat_type(batch_size, 1) + self.mse = self.mat_type(batch_size, self.dim_in[1]) + self.mse_sum = self.mat_type(batch_size, 1) self.diff = self.mse:create() end function MSELayer:batch_resize(batch_size) if self.mse:nrow() ~= batch_resize then - self.mse = self.gconf.cumat_type(batch_size, self.dim_in[1]) - self.mse_sum = self.gconf.cumat_type(batch_size, 1) + self.mse = self.mat_type(batch_size, self.dim_in[1]) + self.mse_sum = self.mat_type(batch_size, 1) self.diff = self.mse:create() end end @@ -36,11 +41,11 @@ function MSELayer:propagate(input, output) local mse = self.mse local mse_sum = self.mse_sum mse:add(input[1], input[2], 1.0, -1.0) - self.diff:copy_fromd(mse) + self.diff:copy_from(mse) mse:mul_elem(mse, mse) mse_sum:add(mse_sum, mse:rowsum(mse), 0.0, self.scale) if output[1] ~= nil then - output[1]:copy_fromd(mse_sum) + output[1]:copy_from(mse_sum) end self.total_mse = self.total_mse + mse_sum:colsum()[0][0] self.total_frames = self.total_frames + mse_sum:nrow() diff --git a/nerv/layer/softmax_ce.lua b/nerv/layer/softmax_ce.lua index 9071e86..31a2ad7 100644 --- a/nerv/layer/softmax_ce.lua +++ b/nerv/layer/softmax_ce.lua @@ -3,6 +3,11 @@ local SoftmaxCELayer = nerv.class("nerv.SoftmaxCELayer", "nerv.Layer") function SoftmaxCELayer:__init(id, global_conf, layer_conf) self.id = id self.gconf = global_conf + if self.gconf.use_cpu then + self.mat_type = self.gconf.mmat_type + else + self.mat_type = self.gconf.cumat_type + end self.dim_in = layer_conf.dim_in self.dim_out = layer_conf.dim_out self.compressed = layer_conf.compressed @@ -19,13 +24,13 @@ function SoftmaxCELayer:init(batch_size) self.total_ce = 0.0 self.total_correct = 0 self.total_frames = 0 - self.softmax = self.gconf.cumat_type(batch_size, self.dim_in[1]) + self.softmax = self.mat_type(batch_size, self.dim_in[1]) self.ce = self.softmax:create() end function SoftmaxCELayer:batch_resize(batch_size) if self.softmax:nrow() ~= batch_resize then - self.softmax = self.gconf.cumat_type(batch_size, self.dim_in[1]) + self.softmax = self.mat_type(batch_size, self.dim_in[1]) self.ce = self.softmax:create() end end @@ -46,7 +51,7 @@ function SoftmaxCELayer:propagate(input, output) ce:mul_elem(ce, label) ce = ce:rowsum() if output[1] ~= nil then - output[1]:copy_fromd(ce) + output[1]:copy_from(ce) end -- add total ce self.total_ce = self.total_ce - ce:colsum()[0][0] diff --git a/nerv/layer/window.lua b/nerv/layer/window.lua index 8eed352..4933de0 100644 --- a/nerv/layer/window.lua +++ b/nerv/layer/window.lua @@ -23,7 +23,7 @@ function WindowLayer:batch_resize(batch_size) end function WindowLayer:propagate(input, output) - output[1]:copy_fromd(input[1]) + output[1]:copy_from(input[1]) output[1]:scale_rows_by_row(self.window.trans) end diff --git a/nerv/lib/common.c b/nerv/lib/common.c index db667b2..d977f8d 100644 --- a/nerv/lib/common.c +++ b/nerv/lib/common.c @@ -41,6 +41,7 @@ int nerv_error_status(lua_State *L, Status *status) { status->file, status->lineno); else nerv_error(L, "%s @%s:%d", mmesg, status->file, status->lineno); + return 0; } int nerv_error_method_not_implemented(lua_State *L) { diff --git a/nerv/lib/io/chunk_file.c b/nerv/lib/io/chunk_file.c index 71db820..5c50ccd 100644 --- a/nerv/lib/io/chunk_file.c +++ b/nerv/lib/io/chunk_file.c @@ -2,6 +2,7 @@ #include "chunk_file.h" #include #include +#include #define PARAM_HEADER_SIZE 16 static size_t read_chunk_header_plain(FILE *fp, Status *status) { @@ -112,7 +113,7 @@ static ChunkFile *open_read(const char *fn, Status *status) { for (i = 0;; offset += chunk_len, i++) { ChunkInfo *cip; - fprintf(stdout, "reading chunk %d from %d\n", i, (int)offset); + fprintf(stderr, "reading chunk %d from %d\n", i, (int)offset); /* skip to the begining of chunk i */ if (fseeko(fp, offset, SEEK_SET) != 0) { diff --git a/nerv/lib/matrix/cukernel.h b/nerv/lib/matrix/cukernel.h index fe682d3..39d42eb 100644 --- a/nerv/lib/matrix/cukernel.h +++ b/nerv/lib/matrix/cukernel.h @@ -3,7 +3,7 @@ void cudak_(cuda_mul_elem)(const Matrix *a, const Matrix *b, Matrix *c); void cudak_(cuda_log_elem)(const Matrix *a, Matrix *b); void cudak_(cuda_sigmoid)(const Matrix *a, Matrix *b); void cudak_(cuda_sigmoid_grad)(const Matrix *output, const Matrix *err, Matrix *nerr); -void cudak_(cuda_rand_uniform)(const Matrix *a); //a's curand_gen may be modified +void cudak_(cuda_rand_uniform)(const Matrix *a); /* a's curand_gen may be modified */ void cudak_(cuda_thres_mask)(const Matrix *a, const Matrix *b, double thres, double low, double high); void cudak_(cuda_tanh)(const Matrix *a, Matrix *b); void cudak_(cuda_tanh_grad)(const Matrix *output, const Matrix *err, Matrix *nerr); diff --git a/nerv/lib/matrix/generic/cukernel.cu b/nerv/lib/matrix/generic/cukernel.cu index 2b696d5..7f780a8 100644 --- a/nerv/lib/matrix/generic/cukernel.cu +++ b/nerv/lib/matrix/generic/cukernel.cu @@ -356,11 +356,17 @@ __global__ void cudak_(copy_rows_by_idx)(const MATRIX_ELEM *a, MATRIX_ELEM *b, int j = blockIdx.x * blockDim.x + threadIdx.x; int i = blockIdx.y * blockDim.y + threadIdx.y; if (i >= nrow || j >= ncol) return; + /* int k = lrintf(idx[i]); if (k < 0 || k >= a_nrow) { printf("error in kernel copy_rows_by_idx k(%d) out of range\n", k); } b[j + i * stride] = a[j + k * stride]; + */ + /* NOTE: in most cases it is guaranteed + * the idx is within the range, checking + * would bring some overhead. */ + b[j + i * stride] = a[j + lrintf(idx[i]) * stride]; } __global__ void cudak_(copy_rows_by_colidx)(const MATRIX_ELEM *a, MATRIX_ELEM *b, diff --git a/nerv/lib/matrix/generic/cumatrix.c b/nerv/lib/matrix/generic/cumatrix.c index 7582725..bf93b77 100644 --- a/nerv/lib/matrix/generic/cumatrix.c +++ b/nerv/lib/matrix/generic/cumatrix.c @@ -41,7 +41,7 @@ void nerv_matrix_(mul)(Matrix *c, const Matrix *a, const Matrix *b, int bm = b->nrow, bn = b->ncol; if (ta == CUBLAS_OP_T) SWAP(am, an); if (tb == CUBLAS_OP_T) SWAP(bm, bn); - if (an != bm) + if (an != bm || (am != c->nrow && bn != c->ncol)) NERV_EXIT_STATUS(status, MAT_WRONG_MULT_DIM, 0); /* Because matrix in Nerv is row-major, here b comes first */ PROFILE_START diff --git a/nerv/lib/matrix/generic/mmatrix.c b/nerv/lib/matrix/generic/mmatrix.c index 225079e..fa1dc5f 100644 --- a/nerv/lib/matrix/generic/mmatrix.c +++ b/nerv/lib/matrix/generic/mmatrix.c @@ -5,9 +5,477 @@ #define MATRIX_DATA_ALLOC(dptr, stride, width, height, status) \ host_matrix_(alloc)(dptr, stride, width, height, status) #define NERV_GENERIC_MATRIX +#include "../cuda_helper.h" #include "../../common.h" #include "../../io/chunk_file.h" -#include "string.h" +#include +#include +#include + +Matrix *nerv_matrix_(colsum)(Matrix *a, Status *status) { + Matrix *b = nerv_matrix_(create)(1, a->ncol, status); + if (status->err_code != NERV_NORMAL) + return NULL; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + int i, j; + size_t astride = a->stride; + memset(brow, 0, sizeof(MATRIX_ELEM) * b->ncol); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + brow[j] += arow[j]; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return b; +} + +Matrix *nerv_matrix_(colsame)(Matrix *a, const Matrix *ref, Status *status) { + Matrix *b = nerv_matrix_(create)(1, a->ncol, status); + if (status->err_code != NERV_NORMAL) + return NULL; + CHECK_SAME_DIMENSION_RET(a, ref, status); + int i, j; + size_t astride = a->stride, cstride = ref->stride; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + const MATRIX_ELEM *crow = MATRIX_ELEM_PTR(ref); + memset(brow, 0, sizeof(MATRIX_ELEM) * b->ncol); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + { + brow[j] += (int)(arow[j] == crow[j]); + } + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + crow = MATRIX_NEXT_ROW_PTR(crow, cstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return b; +} + +Matrix *nerv_matrix_(rowsum)(Matrix *a, Status *status) { + Matrix *b = nerv_matrix_(create)(a->nrow, 1, status); + if (status->err_code != NERV_NORMAL) + return NULL; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + int i, j; + size_t astride = a->stride, bstride = b->stride; + memset(brow, 0, b->stride * b->nrow); + for (i = 0; i < a->nrow; i++) + { + for (j = 0; j < a->ncol; j++) + brow[0] += arow[j]; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return b; +} + +Matrix *nerv_matrix_(rowmax)(Matrix *a, Status *status) { + Matrix *b = nerv_matrix_(create)(a->nrow, 1, status); + if (status->err_code != NERV_NORMAL) + return NULL; + MATRIX_ELEM *arow = MATRIX_ELEM_PTR(a), + *brow = MATRIX_ELEM_PTR(b); + int i, j; + size_t astride = a->stride, bstride = b->stride; + for (i = 0; i < a->nrow; i++) + { + brow[0] = arow[0]; + for (j = 1; j < a->ncol; j++) + if (arow[j] > brow[0]) + brow[0] = arow[j]; + arow = MATRIX_NEXT_ROW_PTR(arow, astride); + brow = MATRIX_NEXT_ROW_PTR(brow, bstride); + } + NERV_SET_STATUS(status, NERV_NORMAL, 0); + return b; +} + +void nerv_matrix_(rowmax_idx)(Matrix *a, Matrix **b, Matrix **idx, Status *status) { + *b = nerv_matrix_(create)(a->nrow, 1, status); + if (status->err_code != NERV_NORMAL) + return; + *idx = nerv_matrix_(create)(a->nrow, 1, status); + if (status->er