aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile30
-rw-r--r--nerv/Makefile11
-rw-r--r--nerv/examples/swb_baseline.lua2
-rw-r--r--nerv/examples/swb_baseline2.lua201
-rw-r--r--nerv/examples/timit_baseline2.lua215
-rw-r--r--nerv/init.lua2
-rw-r--r--nerv/lib/cblas.h596
-rw-r--r--nerv/lib/matrix/generic/mmatrix.c2
-rw-r--r--nerv/matrix/generic/mmatrix.c2
9 files changed, 1047 insertions, 14 deletions
diff --git a/Makefile b/Makefile
index 0982295..d8612dd 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,28 @@
.PHONY: all clean install luajit luarocks speech
SHELL := /bin/bash
PREFIX := $(CURDIR)/install/
+#CUDA_BASE := /usr/local/cuda-7.0
+CUDA_BASE := /usr/local/cuda
+BLAS_BASE := /usr/lib/
+BLAS_LDFLAGS := -L$(BLAS_BASE) -Wl,-rpath=$(BLAS_BASE)
+BLAS_TYPE := atlas
+KALDI_BASE := /speechlab/tools/KALDI/kaldi-master/
+
+MKL_LDFLAGS := -lmkl_rt
+ATLAS_LDFLAGS := -lcblas -llapack_atlas
+ifeq ($(BLAS_TYPE), mkl)
+BLAS_LDFLAGS += $(MKL_LDFLAGS)
+else ifeq ($(BLAS_TYPE), atlas)
+BLAS_LDFLAGS += $(ATLAS_LDFLAGS)
+else
+$(error Invalid blas type)
+endif
+export CUDA_BASE
+export KALDI_BASE
+export BLAS_LDFLAGS
+
+.PHONY: htk_io kaldi_io kaldi_decode
+
all: luajit luarocks install
luajit:
PREFIX=$(PREFIX) ./tools/build_luajit.sh
@@ -8,12 +30,12 @@ luarocks:
PREFIX=$(PREFIX) ./tools/build_luarocks.sh
install:
cd nerv; $(PREFIX)/bin/luarocks make CFLAGS=$(CFLAGS)
-speech:
- cd speech/speech_utils; $(PREFIX)/bin/luarocks make
- cd speech/htk_io; $(PREFIX)/bin/luarocks make
- cd speech/kaldi_io; $(PREFIX)/bin/luarocks make
+speech: htk_io kaldi_io kaldi_decode
+htk_io kaldi_io kaldi_decode:
+ cd speech/$@; $(PREFIX)/bin/luarocks make
clean:
cd nerv && make clean
cd speech/speech_utils && make clean
cd speech/htk_io && make clean
cd speech/kaldi_io && make clean
+ cd speech/kaldi_decode && make clean
diff --git a/nerv/Makefile b/nerv/Makefile
index a2155b9..9ef3212 100644
--- a/nerv/Makefile
+++ b/nerv/Makefile
@@ -39,12 +39,11 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \
tnn/init.lua tnn/sutil.lua tnn/tnn.lua
INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK
-#CUDA_BASE := /usr/local/cuda-7.0
-CUDA_BASE := /usr/local/cuda
CUDA_INCLUDE := -I $(CUDA_BASE)/include/
INCLUDE += $(CUDA_INCLUDE)
-LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcublas -lcurand
+CUDA_LDFLAGS := -L$(CUDA_BASE)/lib64/ -Wl,-rpath=$(CUDA_BASE)/lib64/ -lcudart -lcublas -lcurand
+
NVCC := $(CUDA_BASE)/bin/nvcc
EMPTY :=
SPACE := $(EMPTY) $(EMPTY)
@@ -65,11 +64,11 @@ $(LUA_DIR)/%.lua: %.lua
cp $< $@
$(LIB_PATH)/libnervcore.so: $(CORE_OBJS)
- gcc -shared -o $@ $^ $(LDFLAGS) -lcblas
+ gcc -shared -o $@ $^ $(LDFLAGS) $(CUDA_LDFLAGS) $(BLAS_LDFLAGS)
$(LIB_PATH)/libluaT.so: $(LUAT_OBJS)
- gcc -shared -o $@ $^ $(LDFLAGS)
+ gcc -shared -o $@ $^
$(INST_LIBDIR)/libnerv.so: $(NERV_OBJS) $(LIB_PATH)/libnervcore.so $(LIB_PATH)/libluaT.so
- gcc -shared -o $@ $(NERV_OBJS) $(LDFLAGS) -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -lluaT
+ gcc -shared -o $@ $(NERV_OBJS) -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -lluaT
$(OBJ_DIR)/matrix/cumatrix.o: matrix/generic/cumatrix.c matrix/generic/matrix.c
$(OBJ_DIR)/matrix/mmatrix.o: matrix/generic/mmatrix.c matrix/generic/matrix.c
diff --git a/nerv/examples/swb_baseline.lua b/nerv/examples/swb_baseline.lua
index 51052ba..cacc401 100644
--- a/nerv/examples/swb_baseline.lua
+++ b/nerv/examples/swb_baseline.lua
@@ -9,7 +9,7 @@ gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9,
cv_scp = "/slfs1/users/mfy43/swb_ivec/train_cv.scp",
htk_conf = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf",
initialized_param = {"/slfs1/users/mfy43/swb_init.nerv",
- "/slfs1/users/mfy43/swb_global_transf.nerv"},
+ "/slfs1/users/mfy43/swb_global_transf.nerv"},
debug = false}
function make_layer_repo(param_repo)
diff --git a/nerv/examples/swb_baseline2.lua b/nerv/examples/swb_baseline2.lua
new file mode 100644
index 0000000..0e2a6e0
--- /dev/null
+++ b/nerv/examples/swb_baseline2.lua
@@ -0,0 +1,201 @@
+require 'htk_io'
+gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9,
+ cumat_type = nerv.CuMatrixFloat,
+ mmat_type = nerv.MMatrixFloat,
+ rearrange = true, -- just to make the context order consistent with old results, deprecated
+ frm_ext = 5,
+ frm_trim = 5, -- trim the first and last 5 frames, TNet just does this, deprecated
+ tr_scp = "/speechlab/users/mfy43/swb50/train_bp.scp",
+ cv_scp = "/speechlab/users/mfy43/swb50/train_cv.scp",
+ htk_conf = "/speechlab/users/mfy43/swb50/plp_0_d_a.conf",
+ initialized_param = {"/speechlab/users/mfy43/swb50/swb_init.nerv",
+ "/speechlab/users/mfy43/swb50/swb_global_transf.nerv"},
+ debug = false}
+
+function make_layer_repo(param_repo)
+ local layer_repo = nerv.LayerRepo(
+ {
+ -- global transf
+ ["nerv.BiasLayer"] =
+ {
+ blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}},
+ blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}}
+ },
+ ["nerv.WindowLayer"] =
+ {
+ wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}},
+ wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}}
+ },
+ -- biased linearity
+ ["nerv.AffineLayer"] =
+ {
+ affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"},
+ {dim_in = {429}, dim_out = {2048}}},
+ affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"},
+ {dim_in = {2048}, dim_out = {2048}}},
+ affine7 = {{ltp = "affine7_ltp", bp = "affine7_bp"},
+ {dim_in = {2048}, dim_out = {3001}}}
+ },
+ ["nerv.SigmoidLayer"] =
+ {
+ sigmoid0 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid1 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid2 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid3 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid4 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid5 = {{}, {dim_in = {2048}, dim_out = {2048}}},
+ sigmoid6 = {{}, {dim_in = {2048}, dim_out = {2048}}}
+ },
+ ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output
+ {
+ ce_crit = {{}, {dim_in = {3001, 1}, dim_out = {1}, compressed = true}}
+ },
+ ["nerv.SoftmaxLayer"] = -- softmax for decode output
+ {
+ softmax = {{}, {dim_in = {3001}, dim_out = {3001}}}
+ }
+ }, param_repo, gconf)
+
+ layer_repo:add_layers(
+ {
+ ["nerv.DAGLayer"] =
+ {
+ global_transf = {{}, {
+ dim_in = {429}, dim_out = {429},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "blayer1[1]",
+ ["blayer1[1]"] = "wlayer1[1]",
+ ["wlayer1[1]"] = "blayer2[1]",
+ ["blayer2[1]"] = "wlayer2[1]",
+ ["wlayer2[1]"] = "<output>[1]"
+ }
+ }},
+ main = {{}, {
+ dim_in = {429}, dim_out = {3001},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "affine0[1]",
+ ["affine0[1]"] = "sigmoid0[1]",
+ ["sigmoid0[1]"] = "affine1[1]",
+ ["affine1[1]"] = "sigmoid1[1]",
+ ["sigmoid1[1]"] = "affine2[1]",
+ ["affine2[1]"] = "sigmoid2[1]",
+ ["sigmoid2[1]"] = "affine3[1]",
+ ["affine3[1]"] = "sigmoid3[1]",
+ ["sigmoid3[1]"] = "affine4[1]",
+ ["affine4[1]"] = "sigmoid4[1]",
+ ["sigmoid4[1]"] = "affine5[1]",
+ ["affine5[1]"] = "sigmoid5[1]",
+ ["sigmoid5[1]"] = "affine6[1]",
+ ["affine6[1]"] = "sigmoid6[1]",
+ ["sigmoid6[1]"] = "affine7[1]",
+ ["affine7[1]"] = "<output>[1]"
+ }
+ }}
+ }
+ }, param_repo, gconf)
+
+ layer_repo:add_layers(
+ {
+ ["nerv.DAGLayer"] =
+ {
+ ce_output = {{}, {
+ dim_in = {429, 1}, dim_out = {1},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "main[1]",
+ ["main[1]"] = "ce_crit[1]",
+ ["<input>[2]"] = "ce_crit[2]",
+ ["ce_crit[1]"] = "<output>[1]"
+ }
+ }},
+ softmax_output = {{}, {
+ dim_in = {429}, dim_out = {3001},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "main[1]",
+ ["main[1]"] = "softmax[1]",
+ ["softmax[1]"] = "<output>[1]"
+ }
+ }}
+ }
+ }, param_repo, gconf)
+
+ return layer_repo
+end
+
+function get_network(layer_repo)
+ return layer_repo:get_layer("ce_output")
+end
+
+function get_decode_network(layer_repo)
+ return layer_repo:get_layer("softmax_output")
+end
+
+function get_global_transf(layer_repo)
+ return layer_repo:get_layer("global_transf")
+end
+
+function make_readers(scp_file, layer_repo)
+ return {
+ {reader = nerv.TNetReader(gconf,
+ {
+ id = "main_scp",
+ scp_file = scp_file,
+ conf_file = gconf.htk_conf,
+ frm_ext = gconf.frm_ext,
+ mlfs = {
+ phone_state = {
+ file = "/speechlab/users/mfy43/swb50/ref.mlf",
+ format = "map",
+ format_arg = "/speechlab/users/mfy43/swb50/dict",
+ dir = "*/",
+ ext = "lab"
+ }
+ }
+ }),
+ data = {main_scp = 429, phone_state = 1}}
+ }
+end
+
+function make_buffer(readers)
+ return nerv.SGDBuffer(gconf,
+ {
+ buffer_size = gconf.buffer_size,
+ randomize = gconf.randomize,
+ readers = readers,
+ use_gpu = true
+ })
+end
+
+function get_input_order()
+ return {{id = "main_scp", global_transf = true},
+ {id = "phone_state"}}
+end
+
+function get_accuracy(layer_repo)
+ local ce_crit = layer_repo:get_layer("ce_crit")
+ return ce_crit.total_correct / ce_crit.total_frames * 100
+end
+
+function print_stat(layer_repo)
+ local ce_crit = layer_repo:get_layer("ce_crit")
+ nerv.info("*** training stat begin ***")
+ nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce)
+ nerv.printf("correct:\t\t%d\n", ce_crit.total_correct)
+ nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames)
+ nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames)
+ nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo))
+ nerv.info("*** training stat end ***")
+end
diff --git a/nerv/examples/timit_baseline2.lua b/nerv/examples/timit_baseline2.lua
new file mode 100644
index 0000000..174b9e7
--- /dev/null
+++ b/nerv/examples/timit_baseline2.lua
@@ -0,0 +1,215 @@
+require 'kaldi_io'
+gconf = {lrate = 0.8, wcost = 1e-6, momentum = 0.9,
+ cumat_type = nerv.CuMatrixFloat,
+ mmat_type = nerv.MMatrixFloat,
+ frm_ext = 5,
+ tr_scp = "ark:/speechlab/tools/KALDI/kaldi-master/src/featbin/copy-feats " ..
+ "scp:/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/train.scp ark:- |",
+ cv_scp = "ark:/speechlab/tools/KALDI/kaldi-master/src/featbin/copy-feats " ..
+ "scp:/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/cv.scp ark:- |",
+ initialized_param = {"/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/nnet_init.nerv",
+ "/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/nnet_output.nerv",
+ "/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/nnet_trans.nerv"},
+ decode_param = {"/speechlab/users/mfy43/timit/nnet_init_20160229015745_iter_13_lr0.013437_tr72.434_cv58.729.nerv",
+ "/speechlab/users/mfy43/timit/s5/exp/dnn4_nerv_prepare/nnet_trans.nerv"},
+ debug = false}
+
+function make_layer_repo(param_repo)
+ local layer_repo = nerv.LayerRepo(
+ {
+ -- global transf
+ ["nerv.BiasLayer"] =
+ {
+ blayer1 = {{bias = "bias0"}, {dim_in = {440}, dim_out = {440}}}
+ },
+ ["nerv.WindowLayer"] =
+ {
+ wlayer1 = {{window = "window0"}, {dim_in = {440}, dim_out = {440}}}
+ },
+ -- biased linearity
+ ["nerv.AffineLayer"] =
+ {
+ affine0 = {{ltp = "affine0_ltp", bp = "affine0_bp"},
+ {dim_in = {440}, dim_out = {1024}}},
+ affine1 = {{ltp = "affine1_ltp", bp = "affine1_bp"},
+ {dim_in = {1024}, dim_out = {1024}}},
+ affine2 = {{ltp = "affine2_ltp", bp = "affine2_bp"},
+ {dim_in = {1024}, dim_out = {1024}}},
+ affine3 = {{ltp = "affine3_ltp", bp = "affine3_bp"},
+ {dim_in = {1024}, dim_out = {1024}}},
+ affine4 = {{ltp = "affine4_ltp", bp = "affine4_bp"},
+ {dim_in = {1024}, dim_out = {1024}}},
+ affine5 = {{ltp = "affine5_ltp", bp = "affine5_bp"},
+ {dim_in = {1024}, dim_out = {1024}}},
+ affine6 = {{ltp = "affine6_ltp", bp = "affine6_bp"},
+ {dim_in = {1024}, dim_out = {1959}}}
+ },
+ ["nerv.SigmoidLayer"] =
+ {
+ sigmoid0 = {{}, {dim_in = {1024}, dim_out = {1024}}},
+ sigmoid1 = {{}, {dim_in = {1024}, dim_out = {1024}}},
+ sigmoid2 = {{}, {dim_in = {1024}, dim_out = {1024}}},
+ sigmoid3 = {{}, {dim_in = {1024}, dim_out = {1024}}},
+ sigmoid4 = {{}, {dim_in = {1024}, dim_out = {1024}}},
+ sigmoid5 = {{}, {dim_in = {1024}, dim_out = {1024}}}
+ },
+ ["nerv.SoftmaxCELayer"] = -- softmax + ce criterion layer for finetune output
+ {
+ ce_crit = {{}, {dim_in = {1959, 1}, dim_out = {1}, compressed = true}}
+ },
+ ["nerv.SoftmaxLayer"] = -- softmax for decode output
+ {
+ softmax = {{}, {dim_in = {1959}, dim_out = {1959}}}
+ }
+ }, param_repo, gconf)
+
+ layer_repo:add_layers(
+ {
+ ["nerv.DAGLayer"] =
+ {
+ global_transf = {{}, {
+ dim_in = {440}, dim_out = {440},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "blayer1[1]",
+ ["blayer1[1]"] = "wlayer1[1]",
+ ["wlayer1[1]"] = "<output>[1]"
+ }
+ }},
+ main = {{}, {
+ dim_in = {440}, dim_out = {1959},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "affine0[1]",
+ ["affine0[1]"] = "sigmoid0[1]",
+ ["sigmoid0[1]"] = "affine1[1]",
+ ["affine1[1]"] = "sigmoid1[1]",
+ ["sigmoid1[1]"] = "affine2[1]",
+ ["affine2[1]"] = "sigmoid2[1]",
+ ["sigmoid2[1]"] = "affine3[1]",
+ ["affine3[1]"] = "sigmoid3[1]",
+ ["sigmoid3[1]"] = "affine4[1]",
+ ["affine4[1]"] = "sigmoid4[1]",
+ ["sigmoid4[1]"] = "affine5[1]",
+ ["affine5[1]"] = "sigmoid5[1]",
+ ["sigmoid5[1]"] = "affine6[1]",
+ ["affine6[1]"] = "<output>[1]"
+ }
+ }}
+ }
+ }, param_repo, gconf)
+
+ layer_repo:add_layers(
+ {
+ ["nerv.DAGLayer"] =
+ {
+ ce_output = {{}, {
+ dim_in = {440, 1}, dim_out = {1},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "main[1]",
+ ["main[1]"] = "ce_crit[1]",
+ ["<input>[2]"] = "ce_crit[2]",
+ ["ce_crit[1]"] = "<output>[1]"
+ }
+ }},
+ softmax_output = {{}, {
+ dim_in = {440}, dim_out = {1959},
+ sub_layers = layer_repo,
+ connections = {
+ ["<input>[1]"] = "main[1]",
+ ["main[1]"] = "softmax[1]",
+ ["softmax[1]"] = "<output>[1]"
+ }
+ }}
+ }
+ }, param_repo, gconf)
+
+ return layer_repo
+end
+
+function get_network(layer_repo)
+ return layer_repo:get_layer("ce_output")
+end
+
+function get_decode_network(layer_repo)
+ return layer_repo:get_layer("softmax_output")
+end
+
+function get_global_transf(layer_repo)
+ return layer_repo:get_layer("global_transf")
+end
+
+function make_readers(scp_file, layer_repo)
+ return {
+ {reader = nerv.KaldiReader(gconf,
+ {
+ id = "main_scp",
+ feature_rspecifier = scp_file,
+ conf_file = gconf.htk_conf,
+ frm_ext = gconf.frm_ext,
+ mlfs = {
+ phone_state = {
+ targets_rspecifier = "ark:/speechlab/tools/KALDI/kaldi-master/src/bin/ali-to-pdf " ..
+ "/speechlab/users/mfy43/timit/s5/exp/tri3_ali/final.mdl " ..
+ "\"ark:gunzip -c /speechlab/users/mfy43/timit/s5/exp/tri3_ali/ali.*.gz |\" " ..
+ "ark:- | " ..
+ "/speechlab/tools/KALDI/kaldi-master/src/bin/ali-to-post " ..
+ "ark:- ark:- |",
+ format = "map"
+ }
+ }
+ }),
+ data = {main_scp = 440, phone_state = 1}}
+ }
+end
+
+function make_decode_readers(scp_file, layer_repo)
+ return {
+ {reader = nerv.KaldiReader(gconf,
+ {
+ id = "main_scp",
+ feature_rspecifier = scp_file,
+ conf_file = gconf.htk_conf,
+ frm_ext = gconf.frm_ext,
+ mlfs = {},
+ need_key = true
+ }),
+ data = {main_scp = 440, phone_state = 1}}
+ }
+end
+
+function make_buffer(readers)
+ return nerv.SGDBuffer(gconf,
+ {
+ buffer_size = gconf.buffer_size,
+ randomize = gconf.randomize,
+ readers = readers,
+ use_gpu = true
+ })
+end
+
+function get_input_order()
+ return {{id = "main_scp", global_transf = true},
+ {id = "phone_state"}}
+end
+
+function get_decode_input_order()
+ return {{id = "main_scp", global_transf = true}}
+end
+
+function get_accuracy(layer_repo)
+ local ce_crit = layer_repo:get_layer("ce_crit")
+ return ce_crit.total_correct / ce_crit.total_frames * 100
+end
+
+function print_stat(layer_repo)
+ local ce_crit = layer_repo:get_layer("ce_crit")
+ nerv.info("*** training stat begin ***")
+ nerv.printf("cross entropy:\t\t%.8f\n", ce_crit.total_ce)
+ nerv.printf("correct:\t\t%d\n", ce_crit.total_correct)
+ nerv.printf("frames:\t\t\t%d\n", ce_crit.total_frames)
+ nerv.printf("err/frm:\t\t%.8f\n", ce_crit.total_ce / ce_crit.total_frames)
+ nerv.printf("accuracy:\t\t%.3f%%\n", get_accuracy(layer_repo))
+ nerv.info("*** training stat end ***")
+end
diff --git a/nerv/init.lua b/nerv/init.lua
index e7d668c..4aa7a45 100644
--- a/nerv/init.lua
+++ b/nerv/init.lua
@@ -25,7 +25,7 @@ end
-- @param fmt the format string
-- @param ... args, the data to be formatted
function nerv.printf(fmt, ...)
- io.write(nerv.sprintf(fmt, ...))
+ io.stderr:write(nerv.sprintf(fmt, ...))
end
--- Raise an global error with the formatted message.
diff --git a/nerv/lib/cblas.h b/nerv/lib/cblas.h
new file mode 100644
index 0000000..4087ffb
--- /dev/null
+++ b/nerv/lib/cblas.h
@@ -0,0 +1,596 @@
+#ifndef CBLAS_H
+
+#ifndef CBLAS_ENUM_DEFINED_H
+ #define CBLAS_ENUM_DEFINED_H
+ enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102 };
+ enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113,
+ AtlasConj=114};
+ enum CBLAS_UPLO {CblasUpper=121, CblasLower=122};
+ enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132};
+ enum CBLAS_SIDE {CblasLeft=141, CblasRight=142};
+#endif
+
+#ifndef CBLAS_ENUM_ONLY
+#define CBLAS_H
+#define CBLAS_INDEX int
+
+int cblas_errprn(int ierr, int info, char *form, ...);
+
+/*
+ * ===========================================================================
+ * Prototypes for level 1 BLAS functions (complex are recast as routines)
+ * ===========================================================================
+ */
+float cblas_sdsdot(const int N, const float alpha, const float *X,
+ const int incX, const float *Y, const int incY);
+double cblas_dsdot(const int N, const float *X, const int incX, const float *Y,
+ const int incY);
+float cblas_sdot(const int N, const float *X, const int incX,
+ const float *Y, const int incY);
+double cblas_ddot(const int N, const double *X, const int incX,
+ const double *Y, const int incY);
+/*
+ * Functions having prefixes Z and C only
+ */
+void cblas_cdotu_sub(const int N, const void *X, const int incX,
+ const void *Y, const int incY, void *dotu);
+void cblas_cdotc_sub(const int N, const void *X, const int incX,
+ const void *Y, const int incY, void *dotc);
+
+void cblas_zdotu_sub(const int N, const void *X, const int incX,
+ const void *Y, const int incY, void *dotu);
+void cblas_zdotc_sub(const int N, const void *X, const int incX,
+ const void *Y, const int incY, void *dotc);
+
+
+/*
+ * Functions having prefixes S D SC DZ
+ */
+float cblas_snrm2(const int N, const float *X, const int incX);
+float cblas_sasum(const int N, const float *X, const int incX);
+
+double cblas_dnrm2(const int N, const double *X, const int incX);
+double cblas_dasum(const int N, const double *X, const int incX);
+
+float cblas_scnrm2(const int N, const void *X, const int incX);
+float cblas_scasum(const int N, const void *X, const int incX);
+
+double cblas_dznrm2(const int N, const void *X, const int incX);
+double cblas_dzasum(const int N, const void *X, const int incX);
+
+
+/*
+ * Functions having standard 4 prefixes (S D C Z)
+ */
+CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX);
+CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX);
+CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX);
+CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX);
+
+/*
+ * ===========================================================================
+ * Prototypes for level 1 BLAS routines
+ * ===========================================================================
+ */
+
+/*
+ * Routines with standard 4 prefixes (s, d, c, z)
+ */
+void cblas_sswap(const int N, float *X, const int incX,
+ float *Y, const int incY);
+void cblas_scopy(const int N, const float *X, const int incX,
+ float *Y, const int incY);
+void cblas_saxpy(const int N, const float alpha, const float *X,
+ const int incX, float *Y, const int incY);
+void catlas_saxpby(const int N, const float alpha, const float *X,
+ const int incX, const float beta, float *Y, const int incY);
+void catlas_sset
+ (const int N, const float alpha, float *X, const int incX);
+
+void cblas_dswap(const int N, double *X, const int incX,
+ double *Y, const int incY);
+void cblas_dcopy(const int N, const double *X, const int incX,
+ double *Y, const int incY);
+void cblas_daxpy(const int N, const double alpha, const double *X,
+ const int incX, double *Y, const int incY);
+void catlas_daxpby(const int N, const double alpha, const double *X,
+ const int incX, const double beta, double *Y, const int incY);
+void catlas_dset
+ (const int N, const double alpha, double *X, const int incX);
+
+void cblas_cswap(const int N, void *X, const int incX,
+ void *Y, const int incY);
+void cblas_ccopy(const int N, const void *X, const int incX,
+ void *Y, const int incY);
+void cblas_caxpy(const int N, const void *alpha, const void *X,
+ const int incX, void *Y, const int incY);
+void catlas_caxpby(const int N, const void *alpha, const void *X,
+ const int incX, const void *beta, void *Y, const int incY);
+void catlas_cset
+ (const int N, const void *alpha, void *X, const int incX);
+
+void cblas_zswap(const int N, void *X, const int incX,
+ void *Y, const int incY);
+void cblas_zcopy(const int N, const void *X, const int incX,
+ void *Y, const int incY);
+void cblas_zaxpy(const int N, const void *alpha, const void *X,
+ const int incX, void *Y, const int incY);
+void catlas_zaxpby(const int N, const void *alpha, const void *X,
+ const int incX, const void *beta, void *Y, const int incY);
+void catlas_zset
+ (const int N, const void *alpha, void *X, const int incX);
+
+
+/*
+ * Routines with S and D prefix only
+ */
+void cblas_srotg(float *a, float *b, float *c, float *s);
+void cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P);
+void cblas_srot(const int N, float *X, const int incX,
+ float *Y, const int incY, const float c, const float s);
+void cblas_srotm(const int N, float *X, const int incX,
+ float *Y, const int incY, const float *P);
+
+void cblas_drotg(double *a, double *b, double *c, double *s);
+void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P);
+void cblas_drot(const int N, double *X, const int incX,
+ double *Y, const int incY, const double c, const double s);
+void cblas_drotm(const int N, double *X, const int incX,
+ double *Y, const int incY, const double *P);
+
+
+/*
+ * Routines with S D C Z CS and ZD prefixes
+ */
+void cblas_sscal(const int N, const float alpha, float *X, const int incX);
+void cblas_dscal(const int N, const double alpha, double *X, const int incX);
+void cblas_cscal(const int N, const void *alpha, void *X, const int incX);
+void cblas_zscal(const int N, const void *alpha, void *X, const int incX);
+void cblas_csscal(const int N, const float alpha, void *X, const int incX);
+void cblas_zdscal(const int N, const double alpha, void *X, const int incX);
+
+/*
+ * Extra reference routines provided by ATLAS, but not mandated by the standard
+ */
+void cblas_crotg(void *a, void *b, void *c, void *s);
+void cblas_zrotg(void *a, void *b, void *c, void *s);
+void cblas_csrot(const int N, void *X, const int incX, void *Y, const int incY,
+ const float c, const float s);
+void cblas_zdrot(const int N, void *X, const int incX, void *Y, const int incY,
+ const double c, const double s);
+
+/*
+ * ===========================================================================
+ * Prototypes for level 2 BLAS
+ * ===========================================================================
+ */
+
+/*
+ * Routines with standard 4 prefixes (S, D, C, Z)
+ */
+void cblas_sgemv(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const float alpha, const float *A, const int lda,
+ const float *X, const int incX, const float beta,
+ float *Y, const int incY);
+void cblas_sgbmv(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const int KL, const int KU, const float alpha,
+ const float *A, const int lda, const float *X,
+ const int incX, const float beta, float *Y, const int incY);
+void cblas_strmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const float *A, const int lda,
+ float *X, const int incX);
+void cblas_stbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const int K, const float *A, const int lda,
+ float *X, const int incX);
+void cblas_stpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const float *Ap, float *X, const int incX);
+void cblas_strsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const float *A, const int lda, float *X,
+ const int incX);
+void cblas_stbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const int K, const float *A, const int lda,
+ float *X, const int incX);
+void cblas_stpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const float *Ap, float *X, const int incX);
+
+void cblas_dgemv(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const double alpha, const double *A, const int lda,
+ const double *X, const int incX, const double beta,
+ double *Y, const int incY);
+void cblas_dgbmv(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const int KL, const int KU, const double alpha,
+ const double *A, const int lda, const double *X,
+ const int incX, const double beta, double *Y, const int incY);
+void cblas_dtrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const double *A, const int lda,
+ double *X, const int incX);
+void cblas_dtbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const int K, const double *A, const int lda,
+ double *X, const int incX);
+void cblas_dtpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const double *Ap, double *X, const int incX);
+void cblas_dtrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const double *A, const int lda, double *X,
+ const int incX);
+void cblas_dtbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const int K, const double *A, const int lda,
+ double *X, const int incX);
+void cblas_dtpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const double *Ap, double *X, const int incX);
+
+void cblas_cgemv(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ const void *X, const int incX, const void *beta,
+ void *Y, const int incY);
+void cblas_cgbmv(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const int KL, const int KU, const void *alpha,
+ const void *A, const int lda, const void *X,
+ const int incX, const void *beta, void *Y, const int incY);
+void cblas_ctrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const void *A, const int lda,
+ void *X, const int incX);
+void cblas_ctbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const int K, const void *A, const int lda,
+ void *X, const int incX);
+void cblas_ctpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const void *Ap, void *X, const int incX);
+void cblas_ctrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const void *A, const int lda, void *X,
+ const int incX);
+void cblas_ctbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const int K, const void *A, const int lda,
+ void *X, const int incX);
+void cblas_ctpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const void *Ap, void *X, const int incX);
+
+void cblas_zgemv(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ const void *X, const int incX, const void *beta,
+ void *Y, const int incY);
+void cblas_zgbmv(const enum CBLAS_ORDER Order,
+ const enum CBLAS_TRANSPOSE TransA, const int M, const int N,
+ const int KL, const int KU, const void *alpha,
+ const void *A, const int lda, const void *X,
+ const int incX, const void *beta, void *Y, const int incY);
+void cblas_ztrmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const void *A, const int lda,
+ void *X, const int incX);
+void cblas_ztbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const int K, const void *A, const int lda,
+ void *X, const int incX);
+void cblas_ztpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const void *Ap, void *X, const int incX);
+void cblas_ztrsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const void *A, const int lda, void *X,
+ const int incX);
+void cblas_ztbsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const int K, const void *A, const int lda,
+ void *X, const int incX);
+void cblas_ztpsv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE TransA, const enum CBLAS_DIAG Diag,
+ const int N, const void *Ap, void *X, const int incX);
+
+
+/*
+ * Routines with S and D prefixes only
+ */
+void cblas_ssymv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const float alpha, const float *A,
+ const int lda, const float *X, const int incX,
+ const float beta, float *Y, const int incY);
+void cblas_ssbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const int K, const float alpha, const float *A,
+ const int lda, const float *X, const int incX,
+ const float beta, float *Y, const int incY);
+void cblas_sspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const float alpha, const float *Ap,
+ const float *X, const int incX,
+ const float beta, float *Y, const int incY);
+void cblas_sger(const enum CBLAS_ORDER Order, const int M, const int N,
+ const float alpha, const float *X, const int incX,
+ const float *Y, const int incY, float *A, const int lda);
+void cblas_ssyr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const float alpha, const float *X,
+ const int incX, float *A, const int lda);
+void cblas_sspr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const float alpha, const float *X,
+ const int incX, float *Ap);
+void cblas_ssyr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const float alpha, const float *X,
+ const int incX, const float *Y, const int incY, float *A,
+ const int lda);
+void cblas_sspr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const float alpha, const float *X,
+ const int incX, const float *Y, const int incY, float *A);
+
+void cblas_dsymv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const double alpha, const double *A,
+ const int lda, const double *X, const int incX,
+ const double beta, double *Y, const int incY);
+void cblas_dsbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const int K, const double alpha, const double *A,
+ const int lda, const double *X, const int incX,
+ const double beta, double *Y, const int incY);
+void cblas_dspmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const double alpha, const double *Ap,
+ const double *X, const int incX,
+ const double beta, double *Y, const int incY);
+void cblas_dger(const enum CBLAS_ORDER Order, const int M, const int N,
+ const double alpha, const double *X, const int incX,
+ const double *Y, const int incY, double *A, const int lda);
+void cblas_dsyr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const double alpha, const double *X,
+ const int incX, double *A, const int lda);
+void cblas_dspr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const double alpha, const double *X,
+ const int incX, double *Ap);
+void cblas_dsyr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const double alpha, const double *X,
+ const int incX, const double *Y, const int incY, double *A,
+ const int lda);
+void cblas_dspr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const double alpha, const double *X,
+ const int incX, const double *Y, const int incY, double *A);
+
+
+/*
+ * Routines with C and Z prefixes only
+ */
+void cblas_chemv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const void *alpha, const void *A,
+ const int lda, const void *X, const int incX,
+ const void *beta, void *Y, const int incY);
+void cblas_chbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const int K, const void *alpha, const void *A,
+ const int lda, const void *X, const int incX,
+ const void *beta, void *Y, const int incY);
+void cblas_chpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const void *alpha, const void *Ap,
+ const void *X, const int incX,
+ const void *beta, void *Y, const int incY);
+void cblas_cgeru(const enum CBLAS_ORDER Order, const int M, const int N,
+ const void *alpha, const void *X, const int incX,
+ const void *Y, const int incY, void *A, const int lda);
+void cblas_cgerc(const enum CBLAS_ORDER Order, const int M, const int N,
+ const void *alpha, const void *X, const int incX,
+ const void *Y, const int incY, void *A, const int lda);
+void cblas_cher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const float alpha, const void *X, const int incX,
+ void *A, const int lda);
+void cblas_chpr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const float alpha, const void *X,
+ const int incX, void *A);
+void cblas_cher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
+ const void *alpha, const void *X, const int incX,
+ const void *Y, const int incY, void *A, const int lda);
+void cblas_chpr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
+ const void *alpha, const void *X, const int incX,
+ const void *Y, const int incY, void *Ap);
+
+void cblas_zhemv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const void *alpha, const void *A,
+ const int lda, const void *X, const int incX,
+ const void *beta, void *Y, const int incY);
+void cblas_zhbmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const int K, const void *alpha, const void *A,
+ const int lda, const void *X, const int incX,
+ const void *beta, void *Y, const int incY);
+void cblas_zhpmv(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const void *alpha, const void *Ap,
+ const void *X, const int incX,
+ const void *beta, void *Y, const int incY);
+void cblas_zgeru(const enum CBLAS_ORDER Order, const int M, const int N,
+ const void *alpha, const void *X, const int incX,
+ const void *Y, const int incY, void *A, const int lda);
+void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N,
+ const void *alpha, const void *X, const int incX,
+ const void *Y, const int incY, void *A, const int lda);
+void cblas_zher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const double alpha, const void *X, const int incX,
+ void *A, const int lda);
+void cblas_zhpr(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const int N, const double alpha, const void *X,
+ const int incX, void *A);
+void cblas_zher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
+ const void *alpha, const void *X, const int incX,
+ const void *Y, const int incY, void *A, const int lda);
+void cblas_zhpr2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo, const int N,
+ const void *alpha, const void *X, const int incX,
+ const void *Y, const int incY, void *Ap);
+
+/*
+ * ===========================================================================
+ * Prototypes for level 3 BLAS
+ * ===========================================================================
+ */
+
+/*
+ * Routines with standard 4 prefixes (S, D, C, Z)
+ */
+void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
+ const int K, const float alpha, const float *A,
+ const int lda, const float *B, const int ldb,
+ const float beta, float *C, const int ldc);
+void cblas_ssymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const int M, const int N,
+ const float alpha, const float *A, const int lda,
+ const float *B, const int ldb, const float beta,
+ float *C, const int ldc);
+void cblas_ssyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const float alpha, const float *A, const int lda,
+ const float beta, float *C, const int ldc);
+void cblas_ssyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const float alpha, const float *A, const int lda,
+ const float *B, const int ldb, const float beta,
+ float *C, const int ldc);
+void cblas_strmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_DIAG Diag, const int M, const int N,
+ const float alpha, const float *A, const int lda,
+ float *B, const int ldb);
+void cblas_strsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_DIAG Diag, const int M, const int N,
+ const float alpha, const float *A, const int lda,
+ float *B, const int ldb);
+
+void cblas_dgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
+ const int K, const double alpha, const double *A,
+ const int lda, const double *B, const int ldb,
+ const double beta, double *C, const int ldc);
+void cblas_dsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const int M, const int N,
+ const double alpha, const double *A, const int lda,
+ const double *B, const int ldb, const double beta,
+ double *C, const int ldc);
+void cblas_dsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const double alpha, const double *A, const int lda,
+ const double beta, double *C, const int ldc);
+void cblas_dsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const double alpha, const double *A, const int lda,
+ const double *B, const int ldb, const double beta,
+ double *C, const int ldc);
+void cblas_dtrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_DIAG Diag, const int M, const int N,
+ const double alpha, const double *A, const int lda,
+ double *B, const int ldb);
+void cblas_dtrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_DIAG Diag, const int M, const int N,
+ const double alpha, const double *A, const int lda,
+ double *B, const int ldb);
+
+void cblas_cgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
+ const int K, const void *alpha, const void *A,
+ const int lda, const void *B, const int ldb,
+ const void *beta, void *C, const int ldc);
+void cblas_csymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ const void *B, const int ldb, const void *beta,
+ void *C, const int ldc);
+void cblas_csyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const void *alpha, const void *A, const int lda,
+ const void *beta, void *C, const int ldc);
+void cblas_csyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const void *alpha, const void *A, const int lda,
+ const void *B, const int ldb, const void *beta,
+ void *C, const int ldc);
+void cblas_ctrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_DIAG Diag, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ void *B, const int ldb);
+void cblas_ctrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_DIAG Diag, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ void *B, const int ldb);
+
+void cblas_zgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_TRANSPOSE TransB, const int M, const int N,
+ const int K, const void *alpha, const void *A,
+ const int lda, const void *B, const int ldb,
+ const void *beta, void *C, const int ldc);
+void cblas_zsymm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ const void *B, const int ldb, const void *beta,
+ void *C, const int ldc);
+void cblas_zsyrk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const void *alpha, const void *A, const int lda,
+ const void *beta, void *C, const int ldc);
+void cblas_zsyr2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const void *alpha, const void *A, const int lda,
+ const void *B, const int ldb, const void *beta,
+ void *C, const int ldc);
+void cblas_ztrmm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_DIAG Diag, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ void *B, const int ldb);
+void cblas_ztrsm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const enum CBLAS_TRANSPOSE TransA,
+ const enum CBLAS_DIAG Diag, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ void *B, const int ldb);
+
+
+/*
+ * Routines with prefixes C and Z only
+ */
+void cblas_chemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ const void *B, const int ldb, const void *beta,
+ void *C, const int ldc);
+void cblas_cherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const float alpha, const void *A, const int lda,
+ const float beta, void *C, const int ldc);
+void cblas_cher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const void *alpha, const void *A, const int lda,
+ const void *B, const int ldb, const float beta,
+ void *C, const int ldc);
+void cblas_zhemm(const enum CBLAS_ORDER Order, const enum CBLAS_SIDE Side,
+ const enum CBLAS_UPLO Uplo, const int M, const int N,
+ const void *alpha, const void *A, const int lda,
+ const void *B, const int ldb, const void *beta,
+ void *C, const int ldc);
+void cblas_zherk(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const double alpha, const void *A, const int lda,
+ const double beta, void *C, const int ldc);
+void cblas_zher2k(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
+ const enum CBLAS_TRANSPOSE Trans, const int N, const int K,
+ const void *alpha, const void *A, const int lda,
+ const void *B, const int ldb, const double beta,
+ void *C, const int ldc);
+
+int cblas_errprn(int ierr, int info, char *form, ...);
+
+#endif /* end #ifdef CBLAS_ENUM_ONLY */
+#endif
diff --git a/nerv/lib/matrix/generic/mmatrix.c b/nerv/lib/matrix/generic/mmatrix.c
index 485d778..fb99b53 100644
--- a/nerv/lib/matrix/generic/mmatrix.c
+++ b/nerv/lib/matrix/generic/mmatrix.c
@@ -8,10 +8,10 @@
context, status)
#define NERV_GENERIC_MATRIX
#include "../../common.h"
+#include "../../cblas.h"
#include "../../io/chunk_file.h"
#include <string.h>
#include <math.h>
-#include <cblas.h>
#include <float.h>
Matrix *nerv_matrix_(colsum)(Matrix *a, MContext *context, Status *status) {
diff --git a/nerv/matrix/generic/mmatrix.c b/nerv/matrix/generic/mmatrix.c
index 69000b7..1f37173 100644
--- a/nerv/matrix/generic/mmatrix.c
+++ b/nerv/matrix/generic/mmatrix.c
@@ -8,10 +8,10 @@
#define MATRIX_BASE_TNAME nerv_matrix_host_tname
#define NERV_GENERIC_MATRIX
#include "../../lib/common.h"
+#include "../../lib/cblas.h"
#include "../../lib/matrix/generic/mmatrix.h"
#include "../../io/chunk_file.h"
#include <string.h>
-#include <cblas.h>
#define BLAS_OP_N CblasNoTrans
static int nerv_matrix_(lua_get_blas_op)(char ch) {