From a74183ddb4ab8383bfe214b3745eb8a0a99ee47a Mon Sep 17 00:00:00 2001 From: Determinant Date: Thu, 25 Jun 2015 12:56:45 +0800 Subject: let HTK I/O implementation be a single package --- Makefile | 39 - examples/tnet_io_example.lua | 16 - examples/tnet_preprocessing_example.lua | 75 - examples/tnet_preprocessing_example2.lua | 68 - examples/tnet_sgd_buffer.lua | 70 - htk_io/Makefile | 40 + htk_io/examples/tnet_io_example.lua | 16 + htk_io/examples/tnet_preprocessing_example.lua | 75 + htk_io/examples/tnet_preprocessing_example2.lua | 68 + htk_io/examples/tnet_sgd_buffer.lua | 70 + htk_io/htk_io-scm-1.rockspec | 36 + htk_io/init.c | 8 + htk_io/init.lua | 62 + htk_io/src/KaldiLib/Common.cc | 277 ++++ htk_io/src/KaldiLib/Common.h | 233 +++ htk_io/src/KaldiLib/Error.h | 172 +++ htk_io/src/KaldiLib/Features.cc | 1798 +++++++++++++++++++++++ htk_io/src/KaldiLib/Features.h | 597 ++++++++ htk_io/src/KaldiLib/Labels.cc | 612 ++++++++ htk_io/src/KaldiLib/Labels.h | 90 ++ htk_io/src/KaldiLib/Makefile | 28 + htk_io/src/KaldiLib/MathAux.h | 117 ++ htk_io/src/KaldiLib/Matrix.cc | 295 ++++ htk_io/src/KaldiLib/Matrix.h | 677 +++++++++ htk_io/src/KaldiLib/Matrix.tcc | 796 ++++++++++ htk_io/src/KaldiLib/MlfStream.cc | 268 ++++ htk_io/src/KaldiLib/MlfStream.h | 639 ++++++++ htk_io/src/KaldiLib/MlfStream.tcc | 517 +++++++ htk_io/src/KaldiLib/StkMatch.cc | 582 ++++++++ htk_io/src/KaldiLib/StkMatch.h | 123 ++ htk_io/src/KaldiLib/StkStream.h | 526 +++++++ htk_io/src/KaldiLib/StkStream.tcc | 228 +++ htk_io/src/KaldiLib/Timer.cc | 5 + htk_io/src/KaldiLib/Timer.h | 103 ++ htk_io/src/KaldiLib/Tokenizer.cc | 53 + htk_io/src/KaldiLib/Tokenizer.h | 45 + htk_io/src/KaldiLib/Types.h | 78 + htk_io/src/KaldiLib/UserInterface.cc | 669 +++++++++ htk_io/src/KaldiLib/UserInterface.h | 166 +++ htk_io/src/KaldiLib/Vector.cc | 110 ++ htk_io/src/KaldiLib/Vector.h | 496 +++++++ htk_io/src/KaldiLib/Vector.tcc | 638 ++++++++ htk_io/src/KaldiLib/clapack.cc | 61 + htk_io/src/KaldiLib/clapack.h | 149 ++ htk_io/src/cwrapper.cpp | 148 ++ htk_io/src/cwrapper.h | 37 + htk_io/src/init.c | 118 ++ htk_io/src/test.c | 40 + htk_io/src/tnet.mk | 83 ++ htk_io/tools/tnet_to_nerv.c | 57 + htk_io/tools/tnet_to_nerv.cpp | 68 + init.c | 8 - init.lua | 62 - speech-scm-1.rockspec | 38 - tnet_io/KaldiLib/Common.cc | 277 ---- tnet_io/KaldiLib/Common.h | 233 --- tnet_io/KaldiLib/Error.h | 172 --- tnet_io/KaldiLib/Features.cc | 1798 ----------------------- tnet_io/KaldiLib/Features.h | 597 -------- tnet_io/KaldiLib/Labels.cc | 612 -------- tnet_io/KaldiLib/Labels.h | 90 -- tnet_io/KaldiLib/Makefile | 28 - tnet_io/KaldiLib/MathAux.h | 117 -- tnet_io/KaldiLib/Matrix.cc | 295 ---- tnet_io/KaldiLib/Matrix.h | 677 --------- tnet_io/KaldiLib/Matrix.tcc | 796 ---------- tnet_io/KaldiLib/MlfStream.cc | 268 ---- tnet_io/KaldiLib/MlfStream.h | 639 -------- tnet_io/KaldiLib/MlfStream.tcc | 517 ------- tnet_io/KaldiLib/StkMatch.cc | 582 -------- tnet_io/KaldiLib/StkMatch.h | 123 -- tnet_io/KaldiLib/StkStream.h | 526 ------- tnet_io/KaldiLib/StkStream.tcc | 228 --- tnet_io/KaldiLib/Timer.cc | 5 - tnet_io/KaldiLib/Timer.h | 103 -- tnet_io/KaldiLib/Tokenizer.cc | 53 - tnet_io/KaldiLib/Tokenizer.h | 45 - tnet_io/KaldiLib/Types.h | 78 - tnet_io/KaldiLib/UserInterface.cc | 669 --------- tnet_io/KaldiLib/UserInterface.h | 166 --- tnet_io/KaldiLib/Vector.cc | 110 -- tnet_io/KaldiLib/Vector.h | 496 ------- tnet_io/KaldiLib/Vector.tcc | 638 -------- tnet_io/KaldiLib/clapack.cc | 61 - tnet_io/KaldiLib/clapack.h | 149 -- tnet_io/cwrapper.cpp | 148 -- tnet_io/cwrapper.h | 37 - tnet_io/init.c | 118 -- tnet_io/test.c | 40 - tnet_io/tnet.mk | 83 -- tools/tnet_to_nerv.c | 57 - tools/tnet_to_nerv.cpp | 68 - 92 files changed, 12074 insertions(+), 12075 deletions(-) delete mode 100644 Makefile delete mode 100644 examples/tnet_io_example.lua delete mode 100644 examples/tnet_preprocessing_example.lua delete mode 100644 examples/tnet_preprocessing_example2.lua delete mode 100644 examples/tnet_sgd_buffer.lua create mode 100644 htk_io/Makefile create mode 100644 htk_io/examples/tnet_io_example.lua create mode 100644 htk_io/examples/tnet_preprocessing_example.lua create mode 100644 htk_io/examples/tnet_preprocessing_example2.lua create mode 100644 htk_io/examples/tnet_sgd_buffer.lua create mode 100644 htk_io/htk_io-scm-1.rockspec create mode 100644 htk_io/init.c create mode 100644 htk_io/init.lua create mode 100644 htk_io/src/KaldiLib/Common.cc create mode 100644 htk_io/src/KaldiLib/Common.h create mode 100644 htk_io/src/KaldiLib/Error.h create mode 100644 htk_io/src/KaldiLib/Features.cc create mode 100644 htk_io/src/KaldiLib/Features.h create mode 100644 htk_io/src/KaldiLib/Labels.cc create mode 100644 htk_io/src/KaldiLib/Labels.h create mode 100644 htk_io/src/KaldiLib/Makefile create mode 100644 htk_io/src/KaldiLib/MathAux.h create mode 100644 htk_io/src/KaldiLib/Matrix.cc create mode 100644 htk_io/src/KaldiLib/Matrix.h create mode 100644 htk_io/src/KaldiLib/Matrix.tcc create mode 100644 htk_io/src/KaldiLib/MlfStream.cc create mode 100644 htk_io/src/KaldiLib/MlfStream.h create mode 100644 htk_io/src/KaldiLib/MlfStream.tcc create mode 100644 htk_io/src/KaldiLib/StkMatch.cc create mode 100644 htk_io/src/KaldiLib/StkMatch.h create mode 100644 htk_io/src/KaldiLib/StkStream.h create mode 100644 htk_io/src/KaldiLib/StkStream.tcc create mode 100644 htk_io/src/KaldiLib/Timer.cc create mode 100644 htk_io/src/KaldiLib/Timer.h create mode 100644 htk_io/src/KaldiLib/Tokenizer.cc create mode 100644 htk_io/src/KaldiLib/Tokenizer.h create mode 100644 htk_io/src/KaldiLib/Types.h create mode 100644 htk_io/src/KaldiLib/UserInterface.cc create mode 100644 htk_io/src/KaldiLib/UserInterface.h create mode 100644 htk_io/src/KaldiLib/Vector.cc create mode 100644 htk_io/src/KaldiLib/Vector.h create mode 100644 htk_io/src/KaldiLib/Vector.tcc create mode 100644 htk_io/src/KaldiLib/clapack.cc create mode 100644 htk_io/src/KaldiLib/clapack.h create mode 100644 htk_io/src/cwrapper.cpp create mode 100644 htk_io/src/cwrapper.h create mode 100644 htk_io/src/init.c create mode 100644 htk_io/src/test.c create mode 100644 htk_io/src/tnet.mk create mode 100644 htk_io/tools/tnet_to_nerv.c create mode 100644 htk_io/tools/tnet_to_nerv.cpp delete mode 100644 init.c delete mode 100644 init.lua delete mode 100644 speech-scm-1.rockspec delete mode 100644 tnet_io/KaldiLib/Common.cc delete mode 100644 tnet_io/KaldiLib/Common.h delete mode 100644 tnet_io/KaldiLib/Error.h delete mode 100644 tnet_io/KaldiLib/Features.cc delete mode 100644 tnet_io/KaldiLib/Features.h delete mode 100644 tnet_io/KaldiLib/Labels.cc delete mode 100644 tnet_io/KaldiLib/Labels.h delete mode 100644 tnet_io/KaldiLib/Makefile delete mode 100644 tnet_io/KaldiLib/MathAux.h delete mode 100644 tnet_io/KaldiLib/Matrix.cc delete mode 100644 tnet_io/KaldiLib/Matrix.h delete mode 100644 tnet_io/KaldiLib/Matrix.tcc delete mode 100644 tnet_io/KaldiLib/MlfStream.cc delete mode 100644 tnet_io/KaldiLib/MlfStream.h delete mode 100644 tnet_io/KaldiLib/MlfStream.tcc delete mode 100644 tnet_io/KaldiLib/StkMatch.cc delete mode 100644 tnet_io/KaldiLib/StkMatch.h delete mode 100644 tnet_io/KaldiLib/StkStream.h delete mode 100644 tnet_io/KaldiLib/StkStream.tcc delete mode 100644 tnet_io/KaldiLib/Timer.cc delete mode 100644 tnet_io/KaldiLib/Timer.h delete mode 100644 tnet_io/KaldiLib/Tokenizer.cc delete mode 100644 tnet_io/KaldiLib/Tokenizer.h delete mode 100644 tnet_io/KaldiLib/Types.h delete mode 100644 tnet_io/KaldiLib/UserInterface.cc delete mode 100644 tnet_io/KaldiLib/UserInterface.h delete mode 100644 tnet_io/KaldiLib/Vector.cc delete mode 100644 tnet_io/KaldiLib/Vector.h delete mode 100644 tnet_io/KaldiLib/Vector.tcc delete mode 100644 tnet_io/KaldiLib/clapack.cc delete mode 100644 tnet_io/KaldiLib/clapack.h delete mode 100644 tnet_io/cwrapper.cpp delete mode 100644 tnet_io/cwrapper.h delete mode 100644 tnet_io/init.c delete mode 100644 tnet_io/test.c delete mode 100644 tnet_io/tnet.mk delete mode 100644 tools/tnet_to_nerv.c delete mode 100644 tools/tnet_to_nerv.cpp diff --git a/Makefile b/Makefile deleted file mode 100644 index a077df8..0000000 --- a/Makefile +++ /dev/null @@ -1,39 +0,0 @@ -.PHONY: tnet -SHELL := /bin/bash -BUILD_DIR := $(CURDIR)/build -OBJS := init.o tnet_io/cwrapper.o tnet_io/init.o -LIBS := libspeech.so -LUA_LIBS := init.lua -INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK - -SUBDIR := tnet_io -OBJ_DIR := $(BUILD_DIR)/objs -LUA_DIR = $(INST_LUADIR)/speech - -OBJS := $(addprefix $(OBJ_DIR)/,$(OBJS)) -LIBS := $(addprefix $(INST_LIBDIR)/,$(LIBS)) -OBJ_SUBDIR := $(addprefix $(OBJ_DIR)/,$(SUBDIR)) -LUA_SUBDIR := $(addprefix $(LUA_DIR)/,$(SUBDIR)) -LUA_LIBS := $(addprefix $(LUA_DIR)/,$(LUA_LIBS)) -LIB_PATH := $(LUA_BINDIR)/../lib - -build: $(OBJ_DIR) $(OBJ_SUBDIR) $(OBJS) tnet $(OBJ_DIR)/tnet_io/test -install: $(LUA_DIR) $(LUA_SUBDIR) $(LUA_LIBS) $(LIBS) - -$(OBJ_DIR) $(LUA_DIR) $(OBJ_SUBDIR) $(LUA_SUBDIR): - -mkdir -p $@ -$(LUA_DIR)/%.lua: %.lua - cp $< $@ -$(LIBS): $(OBJ_DIR)/tnet_io/cwrapper.o $(OBJ_DIR)/init.o $(OBJ_DIR)/tnet_io/init.o $(OBJ_DIR)/tnet_io/libKaldiLib.a - gcc -shared -o $@ $(OBJ_DIR)/tnet_io/cwrapper.o $(OBJ_DIR)/init.o $(OBJ_DIR)/tnet_io/libKaldiLib.a $(OBJ_DIR)/tnet_io/init.o -lstdc++ -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -lluaT -$(OBJ_DIR)/tnet_io/test: $(OBJ_DIR)/tnet_io/cwrapper.o $(OBJ_DIR)/tnet_io/test.o $(OBJ_DIR)/tnet_io/libKaldiLib.a - gcc -o $@ $^ -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -Wl,-rpath=$(LUA_LIBDIR) -L$(LUA_LIBDIR) -lluajit-5.1 -lstdc++ -lm -$(OBJ_DIR)/tnet_io/cwrapper.o: tnet_io/cwrapper.cpp - g++ -o $@ -c $< -DHAVE_ATLAS -I tnet_io/KaldiLib/ -g -fPIC $(INCLUDE) -$(OBJ_DIR)/%.o: %.c - gcc -o $@ -c $< -g $(INCLUDE) -fPIC -clean: - -rm $(OBJ_DIR)/tnet_io/*.o - $(MAKE) -C tnet_io/KaldiLib/ clean -tnet: - $(MAKE) -C tnet_io/KaldiLib/ OBJ_DIR=$(OBJ_DIR)/tnet_io diff --git a/examples/tnet_io_example.lua b/examples/tnet_io_example.lua deleted file mode 100644 index eea73a5..0000000 --- a/examples/tnet_io_example.lua +++ /dev/null @@ -1,16 +0,0 @@ -require 'libspeech' -frm_ext = 5 -feat_repo = nerv.TNetFeatureRepo( - "/slfs1/users/mfy43/swb_ivec/train_bp.scp", - "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", - frm_ext) -lab_repo = nerv.TNetLabelRepo( - "/slfs1/users/mfy43/swb_ivec/ref.mlf", - "map", - "/slfs1/users/mfy43/swb_ivec/dict", - "*/", - "lab") -feat_utter = feat_repo:cur_utter() -print(feat_utter) -lab_utter = lab_repo:get_utter(feat_repo, feat_utter:nrow() - frm_ext * 2) -print(lab_utter) diff --git a/examples/tnet_preprocessing_example.lua b/examples/tnet_preprocessing_example.lua deleted file mode 100644 index 9e1c0ce..0000000 --- a/examples/tnet_preprocessing_example.lua +++ /dev/null @@ -1,75 +0,0 @@ -require 'libspeech' -frm_ext = 5 -gconf = {cumat_type = nerv.CuMatrixFloat, - batch_size = 158} -param_repo = nerv.ParamRepo({"global_transf.nerv"}) -sublayer_repo = nerv.LayerRepo( - { - ["nerv.BiasLayer"] = - { - blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, - blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} - }, - ["nerv.WindowLayer"] = - { - wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, - wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} - } - }, param_repo, gconf) - -layer_repo = nerv.LayerRepo( - { - ["nerv.DAGLayer"] = - { - main = {{}, { - dim_in = {429}, dim_out = {429}, - sub_layers = sublayer_repo, - connections = { - ["[1]"] = "blayer1[1]", - ["blayer1[1]"] = "wlayer1[1]", - ["wlayer1[1]"] = "blayer2[1]", - ["blayer2[1]"] = "wlayer2[1]", - ["wlayer2[1]"] = "[1]" - } - }} - } - }, param_repo, gconf) - -feat_repo = nerv.TNetFeatureRepo( - "/slfs1/users/mfy43/swb_ivec/train_bp.scp", - "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", - frm_ext) -lab_repo = nerv.TNetLabelRepo( - "/slfs1/users/mfy43/swb_ivec/ref.mlf", - "map", - "/slfs1/users/mfy43/swb_ivec/dict", - "*/", - "lab") -feat_utter = feat_repo:cur_utter() - --- print(feat_utter) --- lab_utter = lab_repo:get_utter(feat_repo, feat_utter:nrow() - frm_ext * 2) --- print(lab_utter) - -cf2 = nerv.ChunkFile("feat_256", "r") -input = cf2:read_chunk("input", gconf) - -step = frm_ext * 2 + 1 -expanded = nerv.CuMatrixFloat(feat_utter:nrow(), feat_utter:ncol() * step) -expanded:expand_frm(nerv.CuMatrixFloat.new_from_host(feat_utter), frm_ext) - -rearranged = expanded:create() -rearranged:rearrange_frm(expanded, step) - -output = {expanded:create()} -main = layer_repo:get_layer("main") -main:init() -main:propagate({rearranged}, output) - -for i = 0, 157 - 10 do - row_diff = input.trans[i] - output[1][i + 5] - for j = 0, row_diff:ncol() - 1 do - nerv.printf("%.8f ", row_diff[j]) - end - nerv.printf("\n") -end diff --git a/examples/tnet_preprocessing_example2.lua b/examples/tnet_preprocessing_example2.lua deleted file mode 100644 index 1215b23..0000000 --- a/examples/tnet_preprocessing_example2.lua +++ /dev/null @@ -1,68 +0,0 @@ -require 'speech.init' -gconf = {cumat_type = nerv.CuMatrixFloat, - batch_size = 158} -param_repo = nerv.ParamRepo({"global_transf.nerv"}) - -sublayer_repo = nerv.LayerRepo( - { - ["nerv.BiasLayer"] = - { - blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, - blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} - }, - ["nerv.WindowLayer"] = - { - wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, - wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} - } - }, param_repo, gconf) - -layer_repo = nerv.LayerRepo( - { - ["nerv.DAGLayer"] = - { - main = {{}, { - dim_in = {429}, dim_out = {429}, - sub_layers = sublayer_repo, - connections = { - ["[1]"] = "blayer1[1]", - ["blayer1[1]"] = "wlayer1[1]", - ["wlayer1[1]"] = "blayer2[1]", - ["blayer2[1]"] = "wlayer2[1]", - ["wlayer2[1]"] = "[1]" - } - }} - } - }, param_repo, gconf) - -reader = nerv.TNetReader({}, - { - id = "main_scp", - scp_file = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", - conf_file = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", - frm_ext = 5, - mlfs = { - ref = { - file = "/slfs1/users/mfy43/swb_ivec/ref.mlf", - format = "map", - format_arg = "/slfs1/users/mfy43/swb_ivec/dict", - dir = "*/", - ext = "lab" - } - }, - global_transf = layer_repo:get_layer("main") - }) - -utter = reader:get_data() --- print(utter.main_scp) -print(utter.ref) --- cf2 = nerv.ChunkFile("feat_256", "r") --- input = cf2:read_chunk("input", gconf) - --- for i = 0, 157 - 10 do --- row_diff = input.trans[i] - utter.main_scp[i] --- for j = 0, row_diff:ncol() - 1 do --- nerv.printf("%.8f ", row_diff[j]) --- end --- nerv.printf("\n") --- end diff --git a/examples/tnet_sgd_buffer.lua b/examples/tnet_sgd_buffer.lua deleted file mode 100644 index 152d2f5..0000000 --- a/examples/tnet_sgd_buffer.lua +++ /dev/null @@ -1,70 +0,0 @@ -require 'speech.init' -gconf = {cumat_type = nerv.CuMatrixFloat, - mmat_type = nerv.MMatrixFloat, - batch_size = 256} -param_repo = nerv.ParamRepo({"global_transf.nerv"}) - -sublayer_repo = nerv.LayerRepo( - { - ["nerv.BiasLayer"] = - { - blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, - blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} - }, - ["nerv.WindowLayer"] = - { - wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, - wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} - } - }, param_repo, gconf) - -layer_repo = nerv.LayerRepo( - { - ["nerv.DAGLayer"] = - { - main = {{}, { - dim_in = {429}, dim_out = {429}, - sub_layers = sublayer_repo, - connections = { - ["[1]"] = "blayer1[1]", - ["blayer1[1]"] = "wlayer1[1]", - ["wlayer1[1]"] = "blayer2[1]", - ["blayer2[1]"] = "wlayer2[1]", - ["wlayer2[1]"] = "[1]" - } - }} - } - }, param_repo, gconf) - -tnet_reader = nerv.TNetReader({}, - { - id = "main_scp", --- scp_file = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", - scp_file = "t.scp", - conf_file = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", - frm_ext = 5, - mlfs = { - ref = { - file = "/slfs1/users/mfy43/swb_ivec/ref.mlf", - format = "map", - format_arg = "/slfs1/users/mfy43/swb_ivec/dict", - dir = "*/", - ext = "lab" - } - }, - global_transf = layer_repo:get_layer("main") - }) - -buffer = nerv.SGDBuffer(gconf, - { - buffer_size = 1024, - readers = { - { reader = tnet_reader, - data = {main_scp = 429, ref = 1}} - } - }) - -for data in buffer.get_data, buffer do - print(data.main_scp) --- print(data.ref) -end diff --git a/htk_io/Makefile b/htk_io/Makefile new file mode 100644 index 0000000..d32d17a --- /dev/null +++ b/htk_io/Makefile @@ -0,0 +1,40 @@ +.PHONY: tnet +SHELL := /bin/bash +BUILD_DIR := $(CURDIR)/build +INC_PATH := $(LUA_BINDIR)/../include/ +OBJS := init.o src/cwrapper.o src/init.o +LIBS := libhtkio.so +LUA_LIBS := init.lua +INCLUDE := -I $(LUA_INCDIR) -I $(INC_PATH) -DLUA_USE_APICHECK + +SUBDIR := src +OBJ_DIR := $(BUILD_DIR)/objs +LUA_DIR = $(INST_LUADIR)/htk_io + +OBJS := $(addprefix $(OBJ_DIR)/,$(OBJS)) +LIBS := $(addprefix $(INST_LIBDIR)/,$(LIBS)) +OBJ_SUBDIR := $(addprefix $(OBJ_DIR)/,$(SUBDIR)) +LUA_SUBDIR := $(addprefix $(LUA_DIR)/,$(SUBDIR)) +LUA_LIBS := $(addprefix $(LUA_DIR)/,$(LUA_LIBS)) +LIB_PATH := $(LUA_BINDIR)/../lib + +build: $(OBJ_DIR) $(OBJ_SUBDIR) $(OBJS) tnet $(OBJ_DIR)/src/test +install: $(LUA_DIR) $(LUA_SUBDIR) $(LUA_LIBS) $(LIBS) + +$(OBJ_DIR) $(LUA_DIR) $(OBJ_SUBDIR) $(LUA_SUBDIR): + -mkdir -p $@ +$(LUA_DIR)/%.lua: %.lua + cp $< $@ +$(LIBS): $(OBJ_DIR)/src/cwrapper.o $(OBJ_DIR)/init.o $(OBJ_DIR)/src/init.o $(OBJ_DIR)/src/libKaldiLib.a + gcc -shared -o $@ $(OBJ_DIR)/src/cwrapper.o $(OBJ_DIR)/init.o $(OBJ_DIR)/src/libKaldiLib.a $(OBJ_DIR)/src/init.o -lstdc++ -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -lluaT +$(OBJ_DIR)/src/test: $(OBJ_DIR)/src/cwrapper.o $(OBJ_DIR)/src/test.o $(OBJ_DIR)/src/libKaldiLib.a + gcc -o $@ $^ -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -Wl,-rpath=$(LUA_LIBDIR) -L$(LUA_LIBDIR) -lluajit-5.1 -lstdc++ -lm +$(OBJ_DIR)/src/cwrapper.o: src/cwrapper.cpp + g++ -o $@ -c $< -DHAVE_ATLAS -I src/KaldiLib/ -g -fPIC $(INCLUDE) +$(OBJ_DIR)/%.o: %.c + gcc -o $@ -c $< -g $(INCLUDE) -fPIC +clean: + -rm $(OBJ_DIR)/src/*.o + $(MAKE) -C src/KaldiLib/ clean +tnet: + $(MAKE) -C src/KaldiLib/ OBJ_DIR=$(OBJ_DIR)/src diff --git a/htk_io/examples/tnet_io_example.lua b/htk_io/examples/tnet_io_example.lua new file mode 100644 index 0000000..eea73a5 --- /dev/null +++ b/htk_io/examples/tnet_io_example.lua @@ -0,0 +1,16 @@ +require 'libspeech' +frm_ext = 5 +feat_repo = nerv.TNetFeatureRepo( + "/slfs1/users/mfy43/swb_ivec/train_bp.scp", + "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", + frm_ext) +lab_repo = nerv.TNetLabelRepo( + "/slfs1/users/mfy43/swb_ivec/ref.mlf", + "map", + "/slfs1/users/mfy43/swb_ivec/dict", + "*/", + "lab") +feat_utter = feat_repo:cur_utter() +print(feat_utter) +lab_utter = lab_repo:get_utter(feat_repo, feat_utter:nrow() - frm_ext * 2) +print(lab_utter) diff --git a/htk_io/examples/tnet_preprocessing_example.lua b/htk_io/examples/tnet_preprocessing_example.lua new file mode 100644 index 0000000..9e1c0ce --- /dev/null +++ b/htk_io/examples/tnet_preprocessing_example.lua @@ -0,0 +1,75 @@ +require 'libspeech' +frm_ext = 5 +gconf = {cumat_type = nerv.CuMatrixFloat, + batch_size = 158} +param_repo = nerv.ParamRepo({"global_transf.nerv"}) +sublayer_repo = nerv.LayerRepo( + { + ["nerv.BiasLayer"] = + { + blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, + blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, + wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} + } + }, param_repo, gconf) + +layer_repo = nerv.LayerRepo( + { + ["nerv.DAGLayer"] = + { + main = {{}, { + dim_in = {429}, dim_out = {429}, + sub_layers = sublayer_repo, + connections = { + ["[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "blayer2[1]", + ["blayer2[1]"] = "wlayer2[1]", + ["wlayer2[1]"] = "[1]" + } + }} + } + }, param_repo, gconf) + +feat_repo = nerv.TNetFeatureRepo( + "/slfs1/users/mfy43/swb_ivec/train_bp.scp", + "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", + frm_ext) +lab_repo = nerv.TNetLabelRepo( + "/slfs1/users/mfy43/swb_ivec/ref.mlf", + "map", + "/slfs1/users/mfy43/swb_ivec/dict", + "*/", + "lab") +feat_utter = feat_repo:cur_utter() + +-- print(feat_utter) +-- lab_utter = lab_repo:get_utter(feat_repo, feat_utter:nrow() - frm_ext * 2) +-- print(lab_utter) + +cf2 = nerv.ChunkFile("feat_256", "r") +input = cf2:read_chunk("input", gconf) + +step = frm_ext * 2 + 1 +expanded = nerv.CuMatrixFloat(feat_utter:nrow(), feat_utter:ncol() * step) +expanded:expand_frm(nerv.CuMatrixFloat.new_from_host(feat_utter), frm_ext) + +rearranged = expanded:create() +rearranged:rearrange_frm(expanded, step) + +output = {expanded:create()} +main = layer_repo:get_layer("main") +main:init() +main:propagate({rearranged}, output) + +for i = 0, 157 - 10 do + row_diff = input.trans[i] - output[1][i + 5] + for j = 0, row_diff:ncol() - 1 do + nerv.printf("%.8f ", row_diff[j]) + end + nerv.printf("\n") +end diff --git a/htk_io/examples/tnet_preprocessing_example2.lua b/htk_io/examples/tnet_preprocessing_example2.lua new file mode 100644 index 0000000..1215b23 --- /dev/null +++ b/htk_io/examples/tnet_preprocessing_example2.lua @@ -0,0 +1,68 @@ +require 'speech.init' +gconf = {cumat_type = nerv.CuMatrixFloat, + batch_size = 158} +param_repo = nerv.ParamRepo({"global_transf.nerv"}) + +sublayer_repo = nerv.LayerRepo( + { + ["nerv.BiasLayer"] = + { + blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, + blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, + wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} + } + }, param_repo, gconf) + +layer_repo = nerv.LayerRepo( + { + ["nerv.DAGLayer"] = + { + main = {{}, { + dim_in = {429}, dim_out = {429}, + sub_layers = sublayer_repo, + connections = { + ["[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "blayer2[1]", + ["blayer2[1]"] = "wlayer2[1]", + ["wlayer2[1]"] = "[1]" + } + }} + } + }, param_repo, gconf) + +reader = nerv.TNetReader({}, + { + id = "main_scp", + scp_file = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", + conf_file = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", + frm_ext = 5, + mlfs = { + ref = { + file = "/slfs1/users/mfy43/swb_ivec/ref.mlf", + format = "map", + format_arg = "/slfs1/users/mfy43/swb_ivec/dict", + dir = "*/", + ext = "lab" + } + }, + global_transf = layer_repo:get_layer("main") + }) + +utter = reader:get_data() +-- print(utter.main_scp) +print(utter.ref) +-- cf2 = nerv.ChunkFile("feat_256", "r") +-- input = cf2:read_chunk("input", gconf) + +-- for i = 0, 157 - 10 do +-- row_diff = input.trans[i] - utter.main_scp[i] +-- for j = 0, row_diff:ncol() - 1 do +-- nerv.printf("%.8f ", row_diff[j]) +-- end +-- nerv.printf("\n") +-- end diff --git a/htk_io/examples/tnet_sgd_buffer.lua b/htk_io/examples/tnet_sgd_buffer.lua new file mode 100644 index 0000000..152d2f5 --- /dev/null +++ b/htk_io/examples/tnet_sgd_buffer.lua @@ -0,0 +1,70 @@ +require 'speech.init' +gconf = {cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + batch_size = 256} +param_repo = nerv.ParamRepo({"global_transf.nerv"}) + +sublayer_repo = nerv.LayerRepo( + { + ["nerv.BiasLayer"] = + { + blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, + blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, + wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} + } + }, param_repo, gconf) + +layer_repo = nerv.LayerRepo( + { + ["nerv.DAGLayer"] = + { + main = {{}, { + dim_in = {429}, dim_out = {429}, + sub_layers = sublayer_repo, + connections = { + ["[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "blayer2[1]", + ["blayer2[1]"] = "wlayer2[1]", + ["wlayer2[1]"] = "[1]" + } + }} + } + }, param_repo, gconf) + +tnet_reader = nerv.TNetReader({}, + { + id = "main_scp", +-- scp_file = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", + scp_file = "t.scp", + conf_file = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", + frm_ext = 5, + mlfs = { + ref = { + file = "/slfs1/users/mfy43/swb_ivec/ref.mlf", + format = "map", + format_arg = "/slfs1/users/mfy43/swb_ivec/dict", + dir = "*/", + ext = "lab" + } + }, + global_transf = layer_repo:get_layer("main") + }) + +buffer = nerv.SGDBuffer(gconf, + { + buffer_size = 1024, + readers = { + { reader = tnet_reader, + data = {main_scp = 429, ref = 1}} + } + }) + +for data in buffer.get_data, buffer do + print(data.main_scp) +-- print(data.ref) +end diff --git a/htk_io/htk_io-scm-1.rockspec b/htk_io/htk_io-scm-1.rockspec new file mode 100644 index 0000000..59fa8f0 --- /dev/null +++ b/htk_io/htk_io-scm-1.rockspec @@ -0,0 +1,36 @@ +package = "htk_io" +version = "scm-1" +source = { + url = "https://github.com/Determinant/nerv-speech.git" +} +description = { + summary = "HTK I/O support for Nerv", + detailed = [[ + ]], + homepage = "https://github.com/Determinant/nerv-speech", + license = "BSD" +} +dependencies = { + "nerv >= scm-1", + "lua >= 5.1" +} +build = { + type = "make", + build_variables = { + CFLAGS="$(CFLAGS)", + LIBFLAG="$(LIBFLAG)", + LUA_LIBDIR="$(LUA_LIBDIR)", + LUA_BINDIR="$(LUA_BINDIR)", + LUA_INCDIR="$(LUA_INCDIR)", + INST_PREFIX="$(PREFIX)", + LUA="$(LUA)", + }, + install_variables = { + LUA_BINDIR="$(LUA_BINDIR)", + INST_PREFIX="$(PREFIX)", + INST_BINDIR="$(BINDIR)", + INST_LIBDIR="$(LIBDIR)", + INST_LUADIR="$(LUADIR)", + INST_CONFDIR="$(CONFDIR)", + }, +} diff --git a/htk_io/init.c b/htk_io/init.c new file mode 100644 index 0000000..edd454f --- /dev/null +++ b/htk_io/init.c @@ -0,0 +1,8 @@ +#include "../nerv/common.h" +#include + +extern void tnet_io_init(lua_State *L); +int luaopen_libhtkio(lua_State *L) { + tnet_io_init(L); + return 1; +} diff --git a/htk_io/init.lua b/htk_io/init.lua new file mode 100644 index 0000000..27ece6e --- /dev/null +++ b/htk_io/init.lua @@ -0,0 +1,62 @@ +require 'libhtkio' +local TNetReader = nerv.class("nerv.TNetReader", "nerv.DataReader") + +function TNetReader:__init(global_conf, reader_conf) + self.feat_id = reader_conf.id + self.frm_ext = reader_conf.frm_ext + self.gconf = global_conf + self.global_transf = reader_conf.global_transf + self.debug = global_conf.debug + if self.debug == nil then + self.debug = false + end + self.feat_repo = nerv.TNetFeatureRepo(reader_conf.scp_file, + reader_conf.conf_file, + reader_conf.frm_ext) + self.lab_repo = {} + for id, mlf_spec in pairs(reader_conf.mlfs) do + self.lab_repo[id] = nerv.TNetLabelRepo(mlf_spec.file, + mlf_spec.format, + mlf_spec.format_arg, + mlf_spec.dir, + mlf_spec.ext) + end +end + +function TNetReader:get_data() + if self.feat_repo:is_end() then + return nil + end + local res = {} + local frm_ext = self.frm_ext + local step = frm_ext * 2 + 1 + -- read HTK feature + local feat_utter = self.feat_repo:cur_utter(self.debug) + -- expand the feature + local expanded = self.gconf.cumat_type(feat_utter:nrow(), feat_utter:ncol() * step) + expanded:expand_frm(self.gconf.cumat_type.new_from_host(feat_utter), frm_ext) + -- rearrange the feature (``transpose'' operation in TNet) + local rearranged = expanded:create() + rearranged:rearrange_frm(expanded, step) + -- prepare for transf + local input = {rearranged} + local output = {rearranged:create()} + -- do transf + self.global_transf:init(input[1]:nrow()) + self.global_transf:propagate(input, output) + -- trim frames + expanded = self.gconf.mmat_type(output[1]:nrow() - frm_ext * 2, output[1]:ncol()) + output[1]:copy_toh(expanded, frm_ext, feat_utter:nrow() - frm_ext) + res[self.feat_id] = expanded + -- add corresponding labels + for id, repo in pairs(self.lab_repo) do + local lab_utter = repo:get_utter(self.feat_repo, + expanded:nrow(), + self.debug) + res[id] = lab_utter + end + -- move the pointer to next + self.feat_repo:next() + collectgarbage("collect") + return res +end diff --git a/htk_io/src/KaldiLib/Common.cc b/htk_io/src/KaldiLib/Common.cc new file mode 100644 index 0000000..40909ee --- /dev/null +++ b/htk_io/src/KaldiLib/Common.cc @@ -0,0 +1,277 @@ +#include +#include +#include +#include +#include + +#include "Common.h" +#include "MathAux.h" + + +/// Defines the white chars for string trimming +#if !defined(WHITE_CHARS) +# define WHITE_CHARS " \t" +#endif + +namespace TNet { + +#include + + // Allocating stream variable used by stream modifier MatrixVectorIostreamControl + const int MATRIX_IOS_FORMAT_IWORD = std::ios_base::xalloc(); + + //*************************************************************************** + //*************************************************************************** + int getHTKstr(char *str) + { + char termChar = '\0'; + char *chrptr = str; + + while (std::isspace(*chrptr)) ++chrptr; + + if (*chrptr == '\'' || *chrptr == '"') { + termChar = *chrptr; + chrptr++; + } + + for (; *chrptr; chrptr++) { + if (*chrptr == '\'' || *chrptr == '"') { + if (termChar == *chrptr) { + termChar = '\0'; + chrptr++; + break; + } + } + + if (std::isspace(*chrptr) && !termChar) { + break; + } + + if (*chrptr == '\\') { + ++chrptr; + if (*chrptr == '\0' || (*chrptr >= '0' && *chrptr <= '7' && + (*++chrptr < '0' || *chrptr > '7' || + *++chrptr < '0' || *chrptr > '7'))) { + return -1; + } + + if (*chrptr >= '0' && *chrptr <= '7') { + *chrptr = (char)((*chrptr - '0') + (chrptr[-1] - '0') * 8 + (chrptr[-2] - '0') * 64); + } + } + *str++ = *chrptr; + } + + if (termChar) { + return -2; + } + + *str = '\0'; + + return 0; + } + + + //***************************************************************************** + //***************************************************************************** + void + ParseHTKString(const std::string & rIn, std::string & rOut) + { + int ret_val; + + // the new string will be at most as long as the original, so we allocate + // space + char* new_str = new char[rIn.size() + 1]; + + char* p_htk_str = new_str; + + strcpy(p_htk_str, rIn.c_str()); + ret_val = getHTKstr(p_htk_str); + + // call the function + if (!ret_val) { + rOut = p_htk_str; + } + + delete [] new_str; + + if (ret_val) { + throw std::runtime_error("Error parsing HTK string"); + } + } + + + + //*************************************************************************** + //*************************************************************************** + bool + IsBigEndian() + { + int a = 1; + return (bool) ((char *) &a)[0] != 1; + } + + + //*************************************************************************** + //*************************************************************************** + void + MakeHtkFileName(char* pOutFileName, const char* inFileName, + const char* out_dir, const char* out_ext) + { + const char* base_name; + const char* bname_end = NULL; + const char* chrptr; + + // if (*inFileName == '*' && *++inFileName == '/') ++inFileName; + + // we don't do anything if file is stdin/out + if (!strcmp(inFileName, "-")) + { + pOutFileName[0] = '-'; + pOutFileName[1] = '\0'; + return; + } + + base_name = strrchr(inFileName, '/'); + base_name = base_name != NULL ? base_name + 1 : inFileName; + + if (out_ext) bname_end = strrchr(base_name, '.'); + if (!bname_end) bname_end = base_name + strlen(base_name); + + + if ((chrptr = strstr(inFileName, "/./")) != NULL) + { + // what is in path after /./ serve as base name + base_name = chrptr + 3; + } + /* else if (*inFileName != '/') + { + // if inFileName isn't absolut path, don't forget directory structure + base_name = inFileName; + }*/ + + *pOutFileName = '\0'; + if (out_dir) + { + if (*out_dir) + { + strcat(pOutFileName, out_dir); + strcat(pOutFileName, "/"); + } + strncat(pOutFileName, base_name, bname_end-base_name); + } + else + { + strncat(pOutFileName, inFileName, bname_end-inFileName); + } + + if (out_ext && *out_ext) + { + strcat(pOutFileName, "."); + strcat(pOutFileName, out_ext); + } + } + + + //**************************************************************************** + //**************************************************************************** + bool + CloseEnough(const float f1, const float f2, const float nRounds) + { + bool ret_val = (_ABS((f1 - f2) / (f2 == 0.0f ? 1.0f : f2)) + < (nRounds * FLT_EPSILON)); + + return ret_val; + } + + + //**************************************************************************** + //**************************************************************************** + bool + CloseEnough(const double f1, const double f2, const double nRounds) + { + bool ret_val = (_ABS((f1 - f2) / (f2 == 0.0 ? 1.0 : f2)) + < (nRounds * DBL_EPSILON)); + + return ret_val; + } + + + //**************************************************************************** + //**************************************************************************** + char* + ExpandHtkFilterCmd(const char *command, const char *filename, const char* pFilter) + { + + char *out, *outend; + const char *chrptr = command; + int ndollars = 0; + int fnlen = strlen(filename); + + while (*chrptr++) ndollars += (*chrptr == *pFilter); + + out = (char*) malloc(strlen(command) - ndollars + ndollars * fnlen + 1); + + outend = out; + + for (chrptr = command; *chrptr; chrptr++) { + if (*chrptr == *pFilter) { + strcpy(outend, filename); + outend += fnlen; + } else { + *outend++ = *chrptr; + } + } + *outend = '\0'; + return out; + } + + //*************************************************************************** + //*************************************************************************** + char * + StrToUpper(char *str) + { + char *chptr; + for (chptr = str; *chptr; chptr++) { + *chptr = (char)toupper(*chptr); + } + return str; + } + + + //**************************************************************************** + //**************************************************************************** + std::string& + Trim(std::string& rStr) + { + // WHITE_CHARS is defined in common.h + std::string::size_type pos = rStr.find_last_not_of(WHITE_CHARS); + if(pos != std::string::npos) + { + rStr.erase(pos + 1); + pos = rStr.find_first_not_of(WHITE_CHARS); + if(pos != std::string::npos) rStr.erase(0, pos); + } + else + rStr.erase(rStr.begin(), rStr.end()); + + return rStr; + } + + +} // namespace TNet + +//#ifdef CYGWIN + +void assertf(const char *c, int i, const char *msg){ + printf("Assertion \"%s\" failed: file \"%s\", line %d\n", msg?msg:"(null)", c?c:"(null)", i); + abort(); +} + + +void assertf_throw(const char *c, int i, const char *msg){ + char buf[2000]; + snprintf(buf, 1999, "Assertion \"%s\" failed, throwing exception: file \"%s\", line %d\n", msg?msg:"(null)", c?c:"(null)", i); + throw std::runtime_error((std::string)buf); +} +//#endif diff --git a/htk_io/src/KaldiLib/Common.h b/htk_io/src/KaldiLib/Common.h new file mode 100644 index 0000000..9cd9658 --- /dev/null +++ b/htk_io/src/KaldiLib/Common.h @@ -0,0 +1,233 @@ +#ifndef TNet_Common_h +#define TNet_Common_h + +#include +#include // C string stuff like strcpy +#include +#include +#include + +/* Alignment of critical dynamic data structure + * + * Not all platforms support memalign so we provide a stk_memalign wrapper + * void *stk_memalign( size_t align, size_t size, void **pp_orig ) + * *pp_orig is the pointer that has to be freed afterwards. + */ +#ifdef HAVE_POSIX_MEMALIGN +# define stk_memalign(align,size,pp_orig) \ + ( !posix_memalign( pp_orig, align, size ) ? *(pp_orig) : NULL ) +# ifdef STK_MEMALIGN_MANUAL +# undef STK_MEMALIGN_MANUAL +# endif +#elif defined(HAVE_MEMALIGN) + /* Some systems have memalign() but no declaration for it */ + //void * memalign( size_t align, size_t size ); +# define stk_memalign(align,size,pp_orig) \ + ( *(pp_orig) = memalign( align, size ) ) +# ifdef STK_MEMALIGN_MANUAL +# undef STK_MEMALIGN_MANUAL +# endif +#else /* We don't have any choice but to align manually */ +# define stk_memalign(align,size,pp_orig) \ + (( *(pp_orig) = malloc( size + align - 1 )) ? \ + (void *)( (((unsigned long)*(pp_orig)) + 15) & ~0xFUL ) : NULL ) +# define STK_MEMALIGN_MANUAL +#endif + + +#define swap8(a) { \ + char t=((char*)&a)[0]; ((char*)&a)[0]=((char*)&a)[7]; ((char*)&a)[7]=t;\ + t=((char*)&a)[1]; ((char*)&a)[1]=((char*)&a)[6]; ((char*)&a)[6]=t;\ + t=((char*)&a)[2]; ((char*)&a)[2]=((char*)&a)[5]; ((char*)&a)[5]=t;\ + t=((char*)&a)[3]; ((char*)&a)[3]=((char*)&a)[4]; ((char*)&a)[4]=t;} +#define swap4(a) { \ + char t=((char*)&a)[0]; ((char*)&a)[0]=((char*)&a)[3]; ((char*)&a)[3]=t;\ + t=((char*)&a)[1]; ((char*)&a)[1]=((char*)&a)[2]; ((char*)&a)[2]=t;} +#define swap2(a) { \ + char t=((char*)&a)[0]; ((char*)&a)[0]=((char*)&a)[1]; ((char*)&a)[1]=t;} + + +namespace TNet +{ + /** ************************************************************************** + ** ************************************************************************** + * @brief Aligns a number to a specified base + * @param n Number of type @c _T to align + * @return Aligned value of type @c _T + */ + template + inline _T + align(const _T n) + { + const _T x(_align - 1); + return (n + x) & ~(x); + } + + + /** + * @brief Returns true if architecture is big endian + */ + bool + IsBigEndian(); + + + /** + * @brief Returns true if two numbers are close enough to each other + * + * @param f1 First operand + * @param f2 Second operand + * @param nRounds Expected number of operations prior to this comparison + */ + bool + CloseEnough(const float f1, const float f2, const float nRounds); + + + /** + * @brief Returns true if two numbers are close enough to each other + * + * @param f1 First operand + * @param f2 Second operand + * @param nRounds Expected number of operations prior to this comparison + */ + bool + CloseEnough(const double f1, const double f2, const double nRounds); + + + /** + * @brief Parses a HTK-style string into a C++ std::string readable + * + * @param rIn HTK input string + * @param rOut output parsed string + */ + void + ParseHTKString(const std::string & rIn, std::string & rOut); + + + /** + * @brief Synthesize new file name based on name, path, and extension + * + * @param pOutFileName full ouptut file name + * @param pInFileName file name + * @param pOutDir directory + * @param pOutExt extension + */ + void + MakeHtkFileName(char *pOutFileName, const char* pInFileName, const char *pOutDir, + const char *pOutExt); + + + /** + * @brief Removes the leading and trailing white chars + * + * @param rStr Refference to the string to be processed + * @return Refference to the original string + * + * The white characters are determined by the @c WHITE_CHARS macro defined + * above. + */ + std::string& + Trim(std::string& rStr); + + + char* + StrToUpper(char* pStr); + + char* + ExpandHtkFilterCmd(const char *command, const char *filename, const char* pFilter); + + + template + std::string to_string(const T& val) + { + std::stringstream ss; + ss << val; + return ss.str(); + } + + inline void + ExpectKeyword(std::istream &i_stream, const char *kwd) + { + std::string token; + i_stream >> token; + if (token != kwd) { + throw std::runtime_error(std::string(kwd) + " expected"); + } + } + + extern const int MATRIX_IOS_FORMAT_IWORD; + + enum MatrixVectorIostreamControlBits { + ACCUMULATE_INPUT = 1, +// BINARY_OUTPUT = 2 + }; + + class MatrixVectorIostreamControl + { + public: + MatrixVectorIostreamControl(enum MatrixVectorIostreamControlBits bitsToBeSet, bool valueToBeSet) + : mBitsToBeSet(bitsToBeSet), mValueToBeSet(valueToBeSet) {} + + static long Flags(std::ios_base &rIos, enum MatrixVectorIostreamControlBits bits) + { return rIos.iword(MATRIX_IOS_FORMAT_IWORD); } + + long mBitsToBeSet; + bool mValueToBeSet; + + friend std::ostream & operator <<(std::ostream &rOs, const MatrixVectorIostreamControl modifier) + { + if(modifier.mValueToBeSet) { + rOs.iword(MATRIX_IOS_FORMAT_IWORD) |= modifier.mBitsToBeSet; + } else { + rOs.iword(MATRIX_IOS_FORMAT_IWORD) &= ~modifier.mBitsToBeSet; + } + return rOs; + } + + friend std::istream & operator >>(std::istream &rIs, const MatrixVectorIostreamControl modifier) + { + if(modifier.mValueToBeSet) { + rIs.iword(MATRIX_IOS_FORMAT_IWORD) |= modifier.mBitsToBeSet; + } else { + rIs.iword(MATRIX_IOS_FORMAT_IWORD) &= ~modifier.mBitsToBeSet; + } + return rIs; + } + }; + + + + +} // namespace TNet + +#ifdef __ICC +#pragma warning (disable: 383) // ICPC remark we don't want. +#pragma warning (disable: 810) // ICPC remark we don't want. +#pragma warning (disable: 981) // ICPC remark we don't want. +#pragma warning (disable: 1418) // ICPC remark we don't want. +#pragma warning (disable: 444) // ICPC remark we don't want. +#pragma warning (disable: 869) // ICPC remark we don't want. +#pragma warning (disable: 1287) // ICPC remark we don't want. +#pragma warning (disable: 279) // ICPC remark we don't want. +#pragma warning (disable: 981) // ICPC remark we don't want. +#endif + +//#ifdef CYGWIN +#if 1 +#undef assert +#ifndef NDEBUG +#define assert(e) ((e) ? (void)0 : assertf(__FILE__, __LINE__, #e)) +#else +#define assert(e) ((void)0) +#endif +void assertf(const char *c, int i, const char *msg); // Just make it possible to break into assert on gdb-- has some kind of bug on cygwin. +#else +#include +#endif + +#define assert_throw(e) ((e) ? (void)0 : assertf_throw(__FILE__, __LINE__, #e)) +void assertf_throw(const char *c, int i, const char *msg); + +#define DAN_STYLE_IO + +#endif // ifndef TNet_Common_h + diff --git a/htk_io/src/KaldiLib/Error.h b/htk_io/src/KaldiLib/Error.h new file mode 100644 index 0000000..2228dde --- /dev/null +++ b/htk_io/src/KaldiLib/Error.h @@ -0,0 +1,172 @@ +// +// C++ Interface: %{MODULE} +// +// Description: +// +// +// Author: %{AUTHOR} <%{EMAIL}>, (C) %{YEAR} +// +// Copyright: See COPYING file that comes with this distribution +// +// + +/** @file Error.h + * This header defines several types and functions relating to the + * handling of exceptions in STK. + */ + +#ifndef TNET_Error_h +#define TNET_Error_h + +#include +#include +#include +#include + +#include +#include +#include +#include + +// THESE MACROS TERRIBLY CLASH WITH STK!!!! +// WE MUST USE SAME MACROS! +// +//#define Error(msg) _Error_(__func__, __FILE__, __LINE__, msg) +//#define Warning(msg) _Warning_(__func__, __FILE__, __LINE__, msg) +//#define TraceLog(msg) _TraceLog_(__func__, __FILE__, __LINE__, msg) +// + +#ifndef Error + #define Error(...) _Error_(__func__, __FILE__, __LINE__, __VA_ARGS__) +#endif +#ifndef PError + #define PError(...) _PError_(__func__, __FILE__, __LINE__, __VA_ARGS__) +#endif +#ifndef Warning + #define Warning(...) _Warning_(__func__, __FILE__, __LINE__, __VA_ARGS__) +#endif +#ifndef TraceLog + #define TraceLog(...) _TraceLog_(__func__, __FILE__, __LINE__, __VA_ARGS__) +#endif + +namespace TNet { + + + + /** MyException + * Custom exception class, gets the stacktrace + */ + class MyException + : public std::runtime_error + { + public: + explicit MyException(const std::string& what_arg) throw(); + virtual ~MyException() throw(); + + const char* what() const throw() + { return mWhat.c_str(); } + + private: + std::string mWhat; + }; + + /** + * MyException:: implemenatation + */ + inline + MyException:: + MyException(const std::string& what_arg) throw() + : std::runtime_error(what_arg) + { + mWhat = what_arg; + mWhat += "\nTHE STACKTRACE INSIDE MyException OBJECT IS:\n"; + + void *array[10]; + size_t size; + char **strings; + size_t i; + + size = backtrace (array, 10); + strings = backtrace_symbols (array, size); + + //<< 0th string is the MyException ctor, so ignore and start by 1 + for (i = 1; i < size; i++) { + mWhat += strings[i]; + mWhat += "\n"; + } + + free (strings); + } + + + inline + MyException:: + ~MyException() throw() + { } + + + /** + * @brief Error throwing function (with backtrace) + */ + inline void + _Error_(const char *func, const char *file, int line, const std::string &msg) + { + std::stringstream ss; + ss << "ERROR (" << func << ':' << file << ':' << line << ") " << msg; + throw MyException(ss.str()); + } + + /** + * @brief Throw a formatted error + */ + inline void _PError_(const char *func, const char *file, int line, const char *fmt, ...) { + va_list ap; + char msg[256]; + va_start(ap, fmt); + vsnprintf(msg, sizeof msg, fmt, ap); + va_end(ap); + _Error_(func, file, line, msg); + } + + /** + * @brief Warning handling function + */ + inline void + _Warning_(const char *func, const char *file, int line, const std::string &msg) + { + std::cout << "WARNING (" << func << ':' << file << ':' << line << ") " << msg << std::endl; + } + + inline void + _TraceLog_(const char *func, const char *file, int line, const std::string &msg) + { + std::cout << "INFO (" << func << ':' << file << ':' << line << ") " << msg << std::endl; + std::cout.flush(); + } + + /** + * New kaldi error handling: + * + * class KaldiErrorMessage is invoked from the KALDI_ERROR macro. + * The destructor throws an exception. + */ + class KaldiErrorMessage { + public: + KaldiErrorMessage(const char *func, const char *file, int line) { + this->stream() << "ERROR (" + << func << "():" + << file << ':' << line << ") "; + } + inline std::ostream &stream() { return ss; } + ~KaldiErrorMessage() { throw MyException(ss.str()); } + private: + std::ostringstream ss; + }; + #define KALDI_ERR TNet::KaldiErrorMessage(__func__, __FILE__, __LINE__).stream() + + + +} // namespace TNet + +//#define TNET_Error_h +#endif diff --git a/htk_io/src/KaldiLib/Features.cc b/htk_io/src/KaldiLib/Features.cc new file mode 100644 index 0000000..64b63e8 --- /dev/null +++ b/htk_io/src/KaldiLib/Features.cc @@ -0,0 +1,1798 @@ + +//enable feature repository profiling +#define PROFILING 1 + +#include +#include +#include +#include + +#include "Features.h" +#include "Tokenizer.h" +#include "StkMatch.h" +#include "Types.h" + + + +namespace TNet +{ + const char + FeatureRepository:: + mpParmKindNames[13][16] = + { + {"WAVEFORM"}, + {"LPC"}, + {"LPREFC"}, + {"LPCEPSTRA"}, + {"LPDELCEP"}, + {"IREFC"}, + {"MFCC"}, + {"FBANK"}, + {"MELSPEC"}, + {"USER"}, + {"DISCRETE"}, + {"PLP"}, + {"ANON"} + }; + + //*************************************************************************** + //*************************************************************************** + + FileListElem:: + FileListElem(const std::string & rFileName) + { + std::string::size_type pos; + + mLogical = rFileName; + mWeight = 1.0; + + // some slash-backslash replacement hack + for (size_t i = 0; i < mLogical.size(); i++) { + if (mLogical[i] == '\\') { + mLogical[i] = '/'; + } + } + + // read sentence weight definition if any ( physical_file.fea[s,e]{weight} ) + if ((pos = mLogical.find('{')) != std::string::npos) + { + std::string tmp_weight(mLogical.begin() + pos + 1, mLogical.end()); + std::stringstream tmp_ss(tmp_weight); + + tmp_ss >> mWeight; + mLogical.erase(pos); + } + + // look for "=" symbol and if found, split it + if ((pos = mLogical.find('=')) != std::string::npos) + { + // copy all from mLogical[pos+1] till the end to mPhysical + mPhysical.assign(mLogical.begin() + pos + 1, mLogical.end()); + // erase all from pos + 1 till the end from mLogical + mLogical.erase(pos); + // trim the leading and trailing spaces + Trim(mPhysical); + Trim(mLogical); + } + else + { + // trim the leading and trailing spaces + Trim(mLogical); + + mPhysical = mLogical; + } + } + + + //########################################################################### + //########################################################################### + // FeatureRepository section + //########################################################################### + //########################################################################### + + //*************************************************************************** + //*************************************************************************** + void + FeatureRepository:: + ReadCepsNormFile( + const char * pFileName, + char ** pLastFileName, + BaseFloat ** vec_buff, + int sampleKind, + CNFileType type, + int coefs) + { + FILE* fp; + int i; + char s1[64]; + char s2[64]; + const char* typeStr = (type == CNF_Mean ? "MEAN" : + type == CNF_Variance ? "VARIANCE" : "VARSCALE"); + + const char* typeStr2 = (type == CNF_Mean ? "CMN" : + type == CNF_Variance ? "CVN" : "VarScale"); + + if (*pLastFileName != NULL && !strcmp(*pLastFileName, pFileName)) { + return; + } + free(*pLastFileName); + *pLastFileName=strdup(pFileName); + *vec_buff = (BaseFloat*) realloc(*vec_buff, coefs * sizeof(BaseFloat)); + + if (*pLastFileName == NULL || *vec_buff== NULL) + throw std::runtime_error("Insufficient memory"); + + if ((fp = fopen(pFileName, "r")) == NULL) { + throw std::runtime_error(std::string("Cannot open ") + typeStr2 + + " pFileName: '" + pFileName + "'"); + } + + if ((type != CNF_VarScale + && (fscanf(fp, " <%64[^>]> <%64[^>]>", s1, s2) != 2 + || strcmp(StrToUpper(s1), "CEPSNORM") + || ReadParmKind(s2, false) != sampleKind)) + || fscanf(fp, " <%64[^>]> %d", s1, &i) != 2 + || strcmp(StrToUpper(s1), typeStr) + || i != coefs) + { + ParmKind2Str(sampleKind, s2); + + //std::cout << "[[[TADY!!!!]]]" << pFileName << "\n" << std::flush; + + throw std::runtime_error(std::string("") + + (type == CNF_VarScale ? "" : " <") + + (type == CNF_VarScale ? "" : s2) + + (type == CNF_VarScale ? "" : ">") + + " <" + typeStr + " ... expected in " + typeStr2 + + " file " + pFileName); + } + + for (i = 0; i < coefs; i++) { + if (fscanf(fp, " "FLOAT_FMT, *vec_buff+i) != 1) { + if (fscanf(fp, "%64s", s2) == 1) { + throw std::runtime_error(std::string("Decimal number expected but '") + + s2 + "' found in " + typeStr2 + " file " + pFileName); + } + else if (feof(fp)) { + throw std::runtime_error(std::string("Unexpected end of ") + + typeStr2 + " file "+ pFileName); + } + else { + throw std::runtime_error(std::string("Cannot read ") + typeStr2 + + " file " + pFileName); + } + } + + if (type == CNF_Variance) + (*vec_buff)[i] = BaseFloat(1 / sqrt((*vec_buff)[i])); + else if (type == CNF_VarScale) + (*vec_buff)[i] = BaseFloat(sqrt((*vec_buff)[i])); + } + + if (fscanf(fp, "%64s", s2) == 1) + { + throw std::runtime_error(std::string("End of file expected but '") + + s2 + "' found in " + typeStr2 + " file " + pFileName); + } + + fclose(fp); + } // ReadCepsNormFile(...) + + + //*************************************************************************** + //*************************************************************************** + void + FeatureRepository:: + HtkFilter(const char* pFilter, const char* pValue, FeatureRepository& rOut) + { + std::list::iterator it; + std::string str; + + rOut.mSwapFeatures = mSwapFeatures; + rOut.mStartFrameExt = mStartFrameExt; + rOut.mEndFrameExt = mEndFrameExt; + rOut.mTargetKind = mTargetKind; + rOut.mDerivOrder = mDerivOrder; + rOut.mDerivWinLengths = mDerivWinLengths; + + rOut.mpCvgFile = mpCvgFile; + rOut.mpCmnPath = mpCmnPath; + rOut.mpCmnMask = mpCmnMask; + rOut.mpCvnPath = mpCvnPath; + rOut.mpCvnMask = mpCvnMask; + + rOut.mInputQueue.clear(); + + // go through all records and check the mask + for (it=mInputQueue.begin(); it!= mInputQueue.end(); ++it) { + if (pFilter == NULL + || (ProcessMask(it->Logical(), pFilter, str) && (str == pValue))) { + rOut.mInputQueue.push_back(*it); + } + } + + // set the queue position to the begining + rOut.mInputQueueIterator = mInputQueue.end(); + + rOut.mCurrentIndexFileName = ""; + rOut.mCurrentIndexFileDir = ""; + rOut.mCurrentIndexFileExt = ""; + + mStream.close(); + mStream.clear(); + + rOut.mpLastFileName = NULL; + rOut.mLastFileName = ""; + rOut.mpLastCmnFile = NULL; + rOut.mpLastCvnFile = NULL; + rOut.mpLastCvgFile = NULL; + rOut.mpCmn = NULL; + rOut.mpCvn = NULL; + rOut.mpCvg = NULL; + rOut.mpA = NULL; + rOut.mpB = NULL; + + } + + + //*************************************************************************** + //*************************************************************************** + void + FeatureRepository:: + HtkSelection(const char* pFilter, std::list< std::string >& rOut) + { + std::map< std::string, bool> aux_map; + std::map< std::string, bool>::iterator map_it; + std::list::iterator it; + std::string str; + + rOut.clear(); + + if(pFilter != NULL) { + // go through all records and check the mask + for (it=mInputQueue.begin(); it!= mInputQueue.end(); ++it) { + if (ProcessMask(it->Logical(), pFilter, str)) { + aux_map[str] = true; + } + } + } else { + aux_map[std::string("default speaker")] = true; + } + + for (map_it = aux_map.begin(); map_it != aux_map.end(); ++map_it) { + rOut.push_back(map_it->first); + } + } + + + //*************************************************************************** + //*************************************************************************** + int + FeatureRepository:: + ParmKind2Str(unsigned parmKind, char *pOutString) + { + // :KLUDGE: Absolutely no idea what this is... + if ((parmKind & 0x003F) >= sizeof(mpParmKindNames)/sizeof(mpParmKindNames[0])) + return 0; + + strcpy(pOutString, mpParmKindNames[parmKind & 0x003F]); + + if (parmKind & PARAMKIND_E) strcat(pOutString, "_E"); + if (parmKind & PARAMKIND_N) strcat(pOutString, "_N"); + if (parmKind & PARAMKIND_D) strcat(pOutString, "_D"); + if (parmKind & PARAMKIND_A) strcat(pOutString, "_A"); + if (parmKind & PARAMKIND_C) strcat(pOutString, "_C"); + if (parmKind & PARAMKIND_Z) strcat(pOutString, "_Z"); + if (parmKind & PARAMKIND_K) strcat(pOutString, "_K"); + if (parmKind & PARAMKIND_0) strcat(pOutString, "_0"); + if (parmKind & PARAMKIND_V) strcat(pOutString, "_V"); + if (parmKind & PARAMKIND_T) strcat(pOutString, "_T"); + + return 1; + } + + + // //*************************************************************************** + // //*************************************************************************** + // void + // AddFileListToFeatureRepositories( + // const char* pFileName, + // const char* pFilter, + // std::queue &featureRepositoryList) + // { + // IStkStream l_stream; + // std::string file_name; + // Tokenizer file_list(pFileName, ","); + // Tokenizer::iterator p_file_name; + + // //:TODO: error if empty featureRepositoryList + // + // for (p_file_name = file_list.begin(); p_file_name != file_list.end(); ++p_file_name) + // { + // // get rid of initial and trailing blanks + // Trim(*p_file_name); + + // // open file name + // l_stream.open(p_file_name->c_str(), std::ios::in, pFilter); + // + // if (!l_stream.good()) { + // //:TODO: + // // Warning or error ... Why warning? -Lukas + // throw std::runtime_error(std::string("Cannot not open list file ") + + // *p_file_name); + // } + + // // read all lines and parse them + // for(;;) + // { + // l_stream >> file_name; + // //:TODO: if(l_stream.badl()) Error() + // // Reading after last token set the fail bit + // if(l_stream.fail()) + // break; + // // we can push_back a std::string as new FileListElem object + // // is created using FileListElem(const std::string&) constructor + // // and logical and physical names are correctly extracted + // featureRepositoryList.front()->mInputQueue.push_back(file_name); + // + // //cycle in the featureRepositoryList + // featureRepositoryList.push(featureRepositoryList.front()); + // featureRepositoryList.pop(); + // } + // l_stream.close(); + // } + // } // AddFileList(const std::string & rFileName) + + + //******************************************