From acd1bc3cf812f69a6260179b584f2a3f0e6d6b80 Mon Sep 17 00:00:00 2001 From: Determinant Date: Wed, 5 Aug 2015 08:03:39 +0800 Subject: put global transformation in a separate library --- htk_io/htk_io-scm-1.rockspec | 1 + htk_io/init.lua | 24 +++++++-------------- speech_utils/Makefile | 30 ++++++++++++++++++++++++++ speech_utils/init.lua | 23 ++++++++++++++++++++ speech_utils/speech_utils-scm-1.rockspec | 36 ++++++++++++++++++++++++++++++++ 5 files changed, 97 insertions(+), 17 deletions(-) create mode 100644 speech_utils/Makefile create mode 100644 speech_utils/init.lua create mode 100644 speech_utils/speech_utils-scm-1.rockspec diff --git a/htk_io/htk_io-scm-1.rockspec b/htk_io/htk_io-scm-1.rockspec index a251d24..ed01d0f 100644 --- a/htk_io/htk_io-scm-1.rockspec +++ b/htk_io/htk_io-scm-1.rockspec @@ -12,6 +12,7 @@ description = { } dependencies = { "nerv >= scm-1", + "speech_utils >= scm-1", "lua >= 5.1" } build = { diff --git a/htk_io/init.lua b/htk_io/init.lua index 27ece6e..af1f740 100644 --- a/htk_io/init.lua +++ b/htk_io/init.lua @@ -1,4 +1,5 @@ require 'libhtkio' +require 'speech_utils' local TNetReader = nerv.class("nerv.TNetReader", "nerv.DataReader") function TNetReader:__init(global_conf, reader_conf) @@ -32,26 +33,15 @@ function TNetReader:get_data() local step = frm_ext * 2 + 1 -- read HTK feature local feat_utter = self.feat_repo:cur_utter(self.debug) - -- expand the feature - local expanded = self.gconf.cumat_type(feat_utter:nrow(), feat_utter:ncol() * step) - expanded:expand_frm(self.gconf.cumat_type.new_from_host(feat_utter), frm_ext) - -- rearrange the feature (``transpose'' operation in TNet) - local rearranged = expanded:create() - rearranged:rearrange_frm(expanded, step) - -- prepare for transf - local input = {rearranged} - local output = {rearranged:create()} - -- do transf - self.global_transf:init(input[1]:nrow()) - self.global_transf:propagate(input, output) - -- trim frames - expanded = self.gconf.mmat_type(output[1]:nrow() - frm_ext * 2, output[1]:ncol()) - output[1]:copy_toh(expanded, frm_ext, feat_utter:nrow() - frm_ext) - res[self.feat_id] = expanded + -- global transf + local transformed = nerv.speech_utils.global_transf(feat_utter, + self.global_transf, frm_ext, self.gconf) + + res[self.feat_id] = transformed -- add corresponding labels for id, repo in pairs(self.lab_repo) do local lab_utter = repo:get_utter(self.feat_repo, - expanded:nrow(), + transformed:nrow(), self.debug) res[id] = lab_utter end diff --git a/speech_utils/Makefile b/speech_utils/Makefile new file mode 100644 index 0000000..b288322 --- /dev/null +++ b/speech_utils/Makefile @@ -0,0 +1,30 @@ +SHELL := /bin/bash +BUILD_DIR := $(CURDIR)/build +INC_PATH := $(LUA_BINDIR)/../include/ +OBJS := +LIBS := +LUA_LIBS := init.lua +INCLUDE := -I $(LUA_INCDIR) -I $(INC_PATH) -DLUA_USE_APICHECK + +SUBDIR := src +OBJ_DIR := $(BUILD_DIR)/objs +LUA_DIR = $(INST_LUADIR)/speech_utils + +OBJS := $(addprefix $(OBJ_DIR)/,$(OBJS)) +LIBS := $(addprefix $(INST_LIBDIR)/,$(LIBS)) +OBJ_SUBDIR := $(addprefix $(OBJ_DIR)/,$(SUBDIR)) +LUA_SUBDIR := $(addprefix $(LUA_DIR)/,$(SUBDIR)) +LUA_LIBS := $(addprefix $(LUA_DIR)/,$(LUA_LIBS)) +LIB_PATH := $(LUA_BINDIR)/../lib + +build: $(OBJ_DIR) $(OBJ_SUBDIR) $(OBJS) +install: $(LUA_DIR) $(LUA_SUBDIR) $(LUA_LIBS) $(LIBS) + +$(OBJ_DIR) $(LUA_DIR) $(OBJ_SUBDIR) $(LUA_SUBDIR): + -mkdir -p $@ +$(LUA_DIR)/%.lua: %.lua + cp $< $@ +$(OBJ_DIR)/%.o: %.c + gcc -o $@ -c $< -g $(INCLUDE) -fPIC +clean: + -rm $(OBJ_DIR)/src/*.o diff --git a/speech_utils/init.lua b/speech_utils/init.lua new file mode 100644 index 0000000..5148664 --- /dev/null +++ b/speech_utils/init.lua @@ -0,0 +1,23 @@ +nerv.speech_utils = {} + +function nerv.speech_utils.global_transf(feat_utter, global_transf, frm_ext, gconf) + local res = {} + local step = frm_ext * 2 + 1 + -- expand the feature + local expanded = gconf.cumat_type(feat_utter:nrow(), feat_utter:ncol() * step) + expanded:expand_frm(gconf.cumat_type.new_from_host(feat_utter), frm_ext) + -- rearrange the feature (``transpose'' operation in TNet) + local rearranged = expanded:create() + rearranged:rearrange_frm(expanded, step) + -- prepare for transf + local input = {rearranged} + local output = {rearranged:create()} + -- do transf + global_transf:init(input[1]:nrow()) + global_transf:propagate(input, output) + -- trim frames + expanded = gconf.mmat_type(output[1]:nrow() - frm_ext * 2, output[1]:ncol()) + output[1]:copy_toh(expanded, frm_ext, feat_utter:nrow() - frm_ext) + collectgarbage("collect") + return expanded +end diff --git a/speech_utils/speech_utils-scm-1.rockspec b/speech_utils/speech_utils-scm-1.rockspec new file mode 100644 index 0000000..6477822 --- /dev/null +++ b/speech_utils/speech_utils-scm-1.rockspec @@ -0,0 +1,36 @@ +package = "speech_utils" +version = "scm-1" +source = { + url = "https://github.com/Determinant/nerv-speech.git" +} +description = { + summary = "Speech Recognition Utilities for Nerv", + detailed = [[ + ]], + homepage = "https://github.com/Determinant/nerv-speech", + license = "BSD" +} +dependencies = { + "nerv >= scm-1", + "lua >= 5.1" +} +build = { + type = "make", + build_variables = { + CFLAGS="$(CFLAGS)", + LIBFLAG="$(LIBFLAG)", + LUA_LIBDIR="$(LUA_LIBDIR)", + LUA_BINDIR="$(LUA_BINDIR)", + LUA_INCDIR="$(LUA_INCDIR)", + INST_PREFIX="$(PREFIX)", + LUA="$(LUA)", + }, + install_variables = { + LUA_BINDIR="$(LUA_BINDIR)", + INST_PREFIX="$(PREFIX)", + INST_BINDIR="$(BINDIR)", + INST_LIBDIR="$(LIBDIR)", + INST_LUADIR="$(LUADIR)", + INST_CONFDIR="$(CONFDIR)", + }, +} -- cgit v1.2.3-70-g09d2