diff options
Diffstat (limited to 'htk_io')
46 files changed, 12074 insertions, 0 deletions
diff --git a/htk_io/Makefile b/htk_io/Makefile new file mode 100644 index 0000000..d32d17a --- /dev/null +++ b/htk_io/Makefile @@ -0,0 +1,40 @@ +.PHONY: tnet +SHELL := /bin/bash +BUILD_DIR := $(CURDIR)/build +INC_PATH := $(LUA_BINDIR)/../include/ +OBJS := init.o src/cwrapper.o src/init.o +LIBS := libhtkio.so +LUA_LIBS := init.lua +INCLUDE := -I $(LUA_INCDIR) -I $(INC_PATH) -DLUA_USE_APICHECK + +SUBDIR := src +OBJ_DIR := $(BUILD_DIR)/objs +LUA_DIR = $(INST_LUADIR)/htk_io + +OBJS := $(addprefix $(OBJ_DIR)/,$(OBJS)) +LIBS := $(addprefix $(INST_LIBDIR)/,$(LIBS)) +OBJ_SUBDIR := $(addprefix $(OBJ_DIR)/,$(SUBDIR)) +LUA_SUBDIR := $(addprefix $(LUA_DIR)/,$(SUBDIR)) +LUA_LIBS := $(addprefix $(LUA_DIR)/,$(LUA_LIBS)) +LIB_PATH := $(LUA_BINDIR)/../lib + +build: $(OBJ_DIR) $(OBJ_SUBDIR) $(OBJS) tnet $(OBJ_DIR)/src/test +install: $(LUA_DIR) $(LUA_SUBDIR) $(LUA_LIBS) $(LIBS) + +$(OBJ_DIR) $(LUA_DIR) $(OBJ_SUBDIR) $(LUA_SUBDIR): + -mkdir -p $@ +$(LUA_DIR)/%.lua: %.lua + cp $< $@ +$(LIBS): $(OBJ_DIR)/src/cwrapper.o $(OBJ_DIR)/init.o $(OBJ_DIR)/src/init.o $(OBJ_DIR)/src/libKaldiLib.a + gcc -shared -o $@ $(OBJ_DIR)/src/cwrapper.o $(OBJ_DIR)/init.o $(OBJ_DIR)/src/libKaldiLib.a $(OBJ_DIR)/src/init.o -lstdc++ -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -lluaT +$(OBJ_DIR)/src/test: $(OBJ_DIR)/src/cwrapper.o $(OBJ_DIR)/src/test.o $(OBJ_DIR)/src/libKaldiLib.a + gcc -o $@ $^ -Wl,-rpath=$(LIB_PATH) -L$(LIB_PATH) -lnervcore -Wl,-rpath=$(LUA_LIBDIR) -L$(LUA_LIBDIR) -lluajit-5.1 -lstdc++ -lm +$(OBJ_DIR)/src/cwrapper.o: src/cwrapper.cpp + g++ -o $@ -c $< -DHAVE_ATLAS -I src/KaldiLib/ -g -fPIC $(INCLUDE) +$(OBJ_DIR)/%.o: %.c + gcc -o $@ -c $< -g $(INCLUDE) -fPIC +clean: + -rm $(OBJ_DIR)/src/*.o + $(MAKE) -C src/KaldiLib/ clean +tnet: + $(MAKE) -C src/KaldiLib/ OBJ_DIR=$(OBJ_DIR)/src diff --git a/htk_io/examples/tnet_io_example.lua b/htk_io/examples/tnet_io_example.lua new file mode 100644 index 0000000..eea73a5 --- /dev/null +++ b/htk_io/examples/tnet_io_example.lua @@ -0,0 +1,16 @@ +require 'libspeech' +frm_ext = 5 +feat_repo = nerv.TNetFeatureRepo( + "/slfs1/users/mfy43/swb_ivec/train_bp.scp", + "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", + frm_ext) +lab_repo = nerv.TNetLabelRepo( + "/slfs1/users/mfy43/swb_ivec/ref.mlf", + "map", + "/slfs1/users/mfy43/swb_ivec/dict", + "*/", + "lab") +feat_utter = feat_repo:cur_utter() +print(feat_utter) +lab_utter = lab_repo:get_utter(feat_repo, feat_utter:nrow() - frm_ext * 2) +print(lab_utter) diff --git a/htk_io/examples/tnet_preprocessing_example.lua b/htk_io/examples/tnet_preprocessing_example.lua new file mode 100644 index 0000000..9e1c0ce --- /dev/null +++ b/htk_io/examples/tnet_preprocessing_example.lua @@ -0,0 +1,75 @@ +require 'libspeech' +frm_ext = 5 +gconf = {cumat_type = nerv.CuMatrixFloat, + batch_size = 158} +param_repo = nerv.ParamRepo({"global_transf.nerv"}) +sublayer_repo = nerv.LayerRepo( + { + ["nerv.BiasLayer"] = + { + blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, + blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, + wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} + } + }, param_repo, gconf) + +layer_repo = nerv.LayerRepo( + { + ["nerv.DAGLayer"] = + { + main = {{}, { + dim_in = {429}, dim_out = {429}, + sub_layers = sublayer_repo, + connections = { + ["<input>[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "blayer2[1]", + ["blayer2[1]"] = "wlayer2[1]", + ["wlayer2[1]"] = "<output>[1]" + } + }} + } + }, param_repo, gconf) + +feat_repo = nerv.TNetFeatureRepo( + "/slfs1/users/mfy43/swb_ivec/train_bp.scp", + "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", + frm_ext) +lab_repo = nerv.TNetLabelRepo( + "/slfs1/users/mfy43/swb_ivec/ref.mlf", + "map", + "/slfs1/users/mfy43/swb_ivec/dict", + "*/", + "lab") +feat_utter = feat_repo:cur_utter() + +-- print(feat_utter) +-- lab_utter = lab_repo:get_utter(feat_repo, feat_utter:nrow() - frm_ext * 2) +-- print(lab_utter) + +cf2 = nerv.ChunkFile("feat_256", "r") +input = cf2:read_chunk("input", gconf) + +step = frm_ext * 2 + 1 +expanded = nerv.CuMatrixFloat(feat_utter:nrow(), feat_utter:ncol() * step) +expanded:expand_frm(nerv.CuMatrixFloat.new_from_host(feat_utter), frm_ext) + +rearranged = expanded:create() +rearranged:rearrange_frm(expanded, step) + +output = {expanded:create()} +main = layer_repo:get_layer("main") +main:init() +main:propagate({rearranged}, output) + +for i = 0, 157 - 10 do + row_diff = input.trans[i] - output[1][i + 5] + for j = 0, row_diff:ncol() - 1 do + nerv.printf("%.8f ", row_diff[j]) + end + nerv.printf("\n") +end diff --git a/htk_io/examples/tnet_preprocessing_example2.lua b/htk_io/examples/tnet_preprocessing_example2.lua new file mode 100644 index 0000000..1215b23 --- /dev/null +++ b/htk_io/examples/tnet_preprocessing_example2.lua @@ -0,0 +1,68 @@ +require 'speech.init' +gconf = {cumat_type = nerv.CuMatrixFloat, + batch_size = 158} +param_repo = nerv.ParamRepo({"global_transf.nerv"}) + +sublayer_repo = nerv.LayerRepo( + { + ["nerv.BiasLayer"] = + { + blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, + blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, + wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} + } + }, param_repo, gconf) + +layer_repo = nerv.LayerRepo( + { + ["nerv.DAGLayer"] = + { + main = {{}, { + dim_in = {429}, dim_out = {429}, + sub_layers = sublayer_repo, + connections = { + ["<input>[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "blayer2[1]", + ["blayer2[1]"] = "wlayer2[1]", + ["wlayer2[1]"] = "<output>[1]" + } + }} + } + }, param_repo, gconf) + +reader = nerv.TNetReader({}, + { + id = "main_scp", + scp_file = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", + conf_file = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", + frm_ext = 5, + mlfs = { + ref = { + file = "/slfs1/users/mfy43/swb_ivec/ref.mlf", + format = "map", + format_arg = "/slfs1/users/mfy43/swb_ivec/dict", + dir = "*/", + ext = "lab" + } + }, + global_transf = layer_repo:get_layer("main") + }) + +utter = reader:get_data() +-- print(utter.main_scp) +print(utter.ref) +-- cf2 = nerv.ChunkFile("feat_256", "r") +-- input = cf2:read_chunk("input", gconf) + +-- for i = 0, 157 - 10 do +-- row_diff = input.trans[i] - utter.main_scp[i] +-- for j = 0, row_diff:ncol() - 1 do +-- nerv.printf("%.8f ", row_diff[j]) +-- end +-- nerv.printf("\n") +-- end diff --git a/htk_io/examples/tnet_sgd_buffer.lua b/htk_io/examples/tnet_sgd_buffer.lua new file mode 100644 index 0000000..152d2f5 --- /dev/null +++ b/htk_io/examples/tnet_sgd_buffer.lua @@ -0,0 +1,70 @@ +require 'speech.init' +gconf = {cumat_type = nerv.CuMatrixFloat, + mmat_type = nerv.MMatrixFloat, + batch_size = 256} +param_repo = nerv.ParamRepo({"global_transf.nerv"}) + +sublayer_repo = nerv.LayerRepo( + { + ["nerv.BiasLayer"] = + { + blayer1 = {{bias = "bias1"}, {dim_in = {429}, dim_out = {429}}}, + blayer2 = {{bias = "bias2"}, {dim_in = {429}, dim_out = {429}}} + }, + ["nerv.WindowLayer"] = + { + wlayer1 = {{window = "window1"}, {dim_in = {429}, dim_out = {429}}}, + wlayer2 = {{window = "window2"}, {dim_in = {429}, dim_out = {429}}} + } + }, param_repo, gconf) + +layer_repo = nerv.LayerRepo( + { + ["nerv.DAGLayer"] = + { + main = {{}, { + dim_in = {429}, dim_out = {429}, + sub_layers = sublayer_repo, + connections = { + ["<input>[1]"] = "blayer1[1]", + ["blayer1[1]"] = "wlayer1[1]", + ["wlayer1[1]"] = "blayer2[1]", + ["blayer2[1]"] = "wlayer2[1]", + ["wlayer2[1]"] = "<output>[1]" + } + }} + } + }, param_repo, gconf) + +tnet_reader = nerv.TNetReader({}, + { + id = "main_scp", +-- scp_file = "/slfs1/users/mfy43/swb_ivec/train_bp.scp", + scp_file = "t.scp", + conf_file = "/slfs1/users/mfy43/swb_ivec/plp_0_d_a.conf", + frm_ext = 5, + mlfs = { + ref = { + file = "/slfs1/users/mfy43/swb_ivec/ref.mlf", + format = "map", + format_arg = "/slfs1/users/mfy43/swb_ivec/dict", + dir = "*/", + ext = "lab" + } + }, + global_transf = layer_repo:get_layer("main") + }) + +buffer = nerv.SGDBuffer(gconf, + { + buffer_size = 1024, + readers = { + { reader = tnet_reader, + data = {main_scp = 429, ref = 1}} + } + }) + +for data in buffer.get_data, buffer do + print(data.main_scp) +-- print(data.ref) +end diff --git a/htk_io/htk_io-scm-1.rockspec b/htk_io/htk_io-scm-1.rockspec new file mode 100644 index 0000000..59fa8f0 --- /dev/null +++ b/htk_io/htk_io-scm-1.rockspec @@ -0,0 +1,36 @@ +package = "htk_io" +version = "scm-1" +source = { + url = "https://github.com/Determinant/nerv-speech.git" +} +description = { + summary = "HTK I/O support for Nerv", + detailed = [[ + ]], + homepage = "https://github.com/Determinant/nerv-speech", + license = "BSD" +} +dependencies = { + "nerv >= scm-1", + "lua >= 5.1" +} +build = { + type = "make", + build_variables = { + CFLAGS="$(CFLAGS)", + LIBFLAG="$(LIBFLAG)", + LUA_LIBDIR="$(LUA_LIBDIR)", + LUA_BINDIR="$(LUA_BINDIR)", + LUA_INCDIR="$(LUA_INCDIR)", + INST_PREFIX="$(PREFIX)", + LUA="$(LUA)", + }, + install_variables = { + LUA_BINDIR="$(LUA_BINDIR)", + INST_PREFIX="$(PREFIX)", + INST_BINDIR="$(BINDIR)", + INST_LIBDIR="$(LIBDIR)", + INST_LUADIR="$(LUADIR)", + INST_CONFDIR="$(CONFDIR)", + }, +} diff --git a/htk_io/init.c b/htk_io/init.c new file mode 100644 index 0000000..edd454f --- /dev/null +++ b/htk_io/init.c @@ -0,0 +1,8 @@ +#include "../nerv/common.h" +#include <stdio.h> + +extern void tnet_io_init(lua_State *L); +int luaopen_libhtkio(lua_State *L) { + tnet_io_init(L); + return 1; +} diff --git a/htk_io/init.lua b/htk_io/init.lua new file mode 100644 index 0000000..27ece6e --- /dev/null +++ b/htk_io/init.lua @@ -0,0 +1,62 @@ +require 'libhtkio' +local TNetReader = nerv.class("nerv.TNetReader", "nerv.DataReader") + +function TNetReader:__init(global_conf, reader_conf) + self.feat_id = reader_conf.id + self.frm_ext = reader_conf.frm_ext + self.gconf = global_conf + self.global_transf = reader_conf.global_transf + self.debug = global_conf.debug + if self.debug == nil then + self.debug = false + end + self.feat_repo = nerv.TNetFeatureRepo(reader_conf.scp_file, + reader_conf.conf_file, + reader_conf.frm_ext) + self.lab_repo = {} + for id, mlf_spec in pairs(reader_conf.mlfs) do + self.lab_repo[id] = nerv.TNetLabelRepo(mlf_spec.file, + mlf_spec.format, + mlf_spec.format_arg, + mlf_spec.dir, + mlf_spec.ext) + end +end + +function TNetReader:get_data() + if self.feat_repo:is_end() then + return nil + end + local res = {} + local frm_ext = self.frm_ext + local step = frm_ext * 2 + 1 + -- read HTK feature + local feat_utter = self.feat_repo:cur_utter(self.debug) + -- expand the feature + local expanded = self.gconf.cumat_type(feat_utter:nrow(), feat_utter:ncol() * step) + expanded:expand_frm(self.gconf.cumat_type.new_from_host(feat_utter), frm_ext) + -- rearrange the feature (``transpose'' operation in TNet) + local rearranged = expanded:create() + rearranged:rearrange_frm(expanded, step) + -- prepare for transf + local input = {rearranged} + local output = {rearranged:create()} + -- do transf + self.global_transf:init(input[1]:nrow()) + self.global_transf:propagate(input, output) + -- trim frames + expanded = self.gconf.mmat_type(output[1]:nrow() - frm_ext * 2, output[1]:ncol()) + output[1]:copy_toh(expanded, frm_ext, feat_utter:nrow() - frm_ext) + res[self.feat_id] = expanded + -- add corresponding labels + for id, repo in pairs(self.lab_repo) do + local lab_utter = repo:get_utter(self.feat_repo, + expanded:nrow(), + self.debug) + res[id] = lab_utter + end + -- move the pointer to next + self.feat_repo:next() + collectgarbage("collect") + return res +end diff --git a/htk_io/src/KaldiLib/Common.cc b/htk_io/src/KaldiLib/Common.cc new file mode 100644 index 0000000..40909ee --- /dev/null +++ b/htk_io/src/KaldiLib/Common.cc @@ -0,0 +1,277 @@ +#include <string> +#include <stdexcept> +#include <cmath> +#include <cfloat> +#include <cstdio> + +#include "Common.h" +#include "MathAux.h" + + +/// Defines the white chars for string trimming +#if !defined(WHITE_CHARS) +# define WHITE_CHARS " \t" +#endif + +namespace TNet { + +#include <ios> + + // Allocating stream variable used by stream modifier MatrixVectorIostreamControl + const int MATRIX_IOS_FORMAT_IWORD = std::ios_base::xalloc(); + + //*************************************************************************** + //*************************************************************************** + int getHTKstr(char *str) + { + char termChar = '\0'; + char *chrptr = str; + + while (std::isspace(*chrptr)) ++chrptr; + + if (*chrptr == '\'' || *chrptr == '"') { + termChar = *chrptr; + chrptr++; + } + + for (; *chrptr; chrptr++) { + if (*chrptr == '\'' || *chrptr == '"') { + if (termChar == *chrptr) { + termChar = '\0'; + chrptr++; + break; + } + } + + if (std::isspace(*chrptr) && !termChar) { + break; + } + + if (*chrptr == '\\') { + ++chrptr; + if (*chrptr == '\0' || (*chrptr >= '0' && *chrptr <= '7' && + (*++chrptr < '0' || *chrptr > '7' || + *++chrptr < '0' || *chrptr > '7'))) { + return -1; + } + + if (*chrptr >= '0' && *chrptr <= '7') { + *chrptr = (char)((*chrptr - '0') + (chrptr[-1] - '0') * 8 + (chrptr[-2] - '0') * 64); + } + } + *str++ = *chrptr; + } + + if (termChar) { + return -2; + } + + *str = '\0'; + + return 0; + } + + + //***************************************************************************** + //***************************************************************************** + void + ParseHTKString(const std::string & rIn, std::string & rOut) + { + int ret_val; + + // the new string will be at most as long as the original, so we allocate + // space + char* new_str = new char[rIn.size() + 1]; + + char* p_htk_str = new_str; + + strcpy(p_htk_str, rIn.c_str()); + ret_val = getHTKstr(p_htk_str); + + // call the function + if (!ret_val) { + rOut = p_htk_str; + } + + delete [] new_str; + + if (ret_val) { + throw std::runtime_error("Error parsing HTK string"); + } + } + + + + //*************************************************************************** + //*************************************************************************** + bool + IsBigEndian() + { + int a = 1; + return (bool) ((char *) &a)[0] != 1; + } + + + //*************************************************************************** + //*************************************************************************** + void + MakeHtkFileName(char* pOutFileName, const char* inFileName, + const char* out_dir, const char* out_ext) + { + const char* base_name; + const char* bname_end = NULL; + const char* chrptr; + + // if (*inFileName == '*' && *++inFileName == '/') ++inFileName; + + // we don't do anything if file is stdin/out + if (!strcmp(inFileName, "-")) + { + pOutFileName[0] = '-'; + pOutFileName[1] = '\0'; + return; + } + + base_name = strrchr(inFileName, '/'); + base_name = base_name != NULL ? base_name + 1 : inFileName; + + if (out_ext) bname_end = strrchr(base_name, '.'); + if (!bname_end) bname_end = base_name + strlen(base_name); + + + if ((chrptr = strstr(inFileName, "/./")) != NULL) + { + // what is in path after /./ serve as base name + base_name = chrptr + 3; + } + /* else if (*inFileName != '/') + { + // if inFileName isn't absolut path, don't forget directory structure + base_name = inFileName; + }*/ + + *pOutFileName = '\0'; + if (out_dir) + { + if (*out_dir) + { + strcat(pOutFileName, out_dir); + strcat(pOutFileName, "/"); + } + strncat(pOutFileName, base_name, bname_end-base_name); + } + else + { + strncat(pOutFileName, inFileName, bname_end-inFileName); + } + + if (out_ext && *out_ext) + { + strcat(pOutFileName, "."); + strcat(pOutFileName, out_ext); + } + } + + + //**************************************************************************** + //**************************************************************************** + bool + CloseEnough(const float f1, const float f2, const float nRounds) + { + bool ret_val = (_ABS((f1 - f2) / (f2 == 0.0f ? 1.0f : f2)) + < (nRounds * FLT_EPSILON)); + + return ret_val; + } + + + //**************************************************************************** + //**************************************************************************** + bool + CloseEnough(const double f1, const double f2, const double nRounds) + { + bool ret_val = (_ABS((f1 - f2) / (f2 == 0.0 ? 1.0 : f2)) + < (nRounds * DBL_EPSILON)); + + return ret_val; + } + + + //**************************************************************************** + //**************************************************************************** + char* + ExpandHtkFilterCmd(const char *command, const char *filename, const char* pFilter) + { + + char *out, *outend; + const char *chrptr = command; + int ndollars = 0; + int fnlen = strlen(filename); + + while (*chrptr++) ndollars += (*chrptr == *pFilter); + + out = (char*) malloc(strlen(command) - ndollars + ndollars * fnlen + 1); + + outend = out; + + for (chrptr = command; *chrptr; chrptr++) { + if (*chrptr == *pFilter) { + strcpy(outend, filename); + outend += fnlen; + } else { + *outend++ = *chrptr; + } + } + *outend = '\0'; + return out; + } + + //*************************************************************************** + //*************************************************************************** + char * + StrToUpper(char *str) + { + char *chptr; + for (chptr = str; *chptr; chptr++) { + *chptr = (char)toupper(*chptr); + } + return str; + } + + + //**************************************************************************** + //**************************************************************************** + std::string& + Trim(std::string& rStr) + { + // WHITE_CHARS is defined in common.h + std::string::size_type pos = rStr.find_last_not_of(WHITE_CHARS); + if(pos != std::string::npos) + { + rStr.erase(pos + 1); + pos = rStr.find_first_not_of(WHITE_CHARS); + if(pos != std::string::npos) rStr.erase(0, pos); + } + else + rStr.erase(rStr.begin(), rStr.end()); + + return rStr; + } + + +} // namespace TNet + +//#ifdef CYGWIN + +void assertf(const char *c, int i, const char *msg){ + printf("Assertion \"%s\" failed: file \"%s\", line %d\n", msg?msg:"(null)", c?c:"(null)", i); + abort(); +} + + +void assertf_throw(const char *c, int i, const char *msg){ + char buf[2000]; + snprintf(buf, 1999, "Assertion \"%s\" failed, throwing exception: file \"%s\", line %d\n", msg?msg:"(null)", c?c:"(null)", i); + throw std::runtime_error((std::string)buf); +} +//#endif |