summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--htk_io/htk_io-scm-1.rockspec1
-rw-r--r--htk_io/init.lua24
-rw-r--r--speech_utils/Makefile30
-rw-r--r--speech_utils/init.lua23
-rw-r--r--speech_utils/speech_utils-scm-1.rockspec36
5 files changed, 97 insertions, 17 deletions
diff --git a/htk_io/htk_io-scm-1.rockspec b/htk_io/htk_io-scm-1.rockspec
index a251d24..ed01d0f 100644
--- a/htk_io/htk_io-scm-1.rockspec
+++ b/htk_io/htk_io-scm-1.rockspec
@@ -12,6 +12,7 @@ description = {
}
dependencies = {
"nerv >= scm-1",
+ "speech_utils >= scm-1",
"lua >= 5.1"
}
build = {
diff --git a/htk_io/init.lua b/htk_io/init.lua
index 27ece6e..af1f740 100644
--- a/htk_io/init.lua
+++ b/htk_io/init.lua
@@ -1,4 +1,5 @@
require 'libhtkio'
+require 'speech_utils'
local TNetReader = nerv.class("nerv.TNetReader", "nerv.DataReader")
function TNetReader:__init(global_conf, reader_conf)
@@ -32,26 +33,15 @@ function TNetReader:get_data()
local step = frm_ext * 2 + 1
-- read HTK feature
local feat_utter = self.feat_repo:cur_utter(self.debug)
- -- expand the feature
- local expanded = self.gconf.cumat_type(feat_utter:nrow(), feat_utter:ncol() * step)
- expanded:expand_frm(self.gconf.cumat_type.new_from_host(feat_utter), frm_ext)
- -- rearrange the feature (``transpose'' operation in TNet)
- local rearranged = expanded:create()
- rearranged:rearrange_frm(expanded, step)
- -- prepare for transf
- local input = {rearranged}
- local output = {rearranged:create()}
- -- do transf
- self.global_transf:init(input[1]:nrow())
- self.global_transf:propagate(input, output)
- -- trim frames
- expanded = self.gconf.mmat_type(output[1]:nrow() - frm_ext * 2, output[1]:ncol())
- output[1]:copy_toh(expanded, frm_ext, feat_utter:nrow() - frm_ext)
- res[self.feat_id] = expanded
+ -- global transf
+ local transformed = nerv.speech_utils.global_transf(feat_utter,
+ self.global_transf, frm_ext, self.gconf)
+
+ res[self.feat_id] = transformed
-- add corresponding labels
for id, repo in pairs(self.lab_repo) do
local lab_utter = repo:get_utter(self.feat_repo,
- expanded:nrow(),
+ transformed:nrow(),
self.debug)
res[id] = lab_utter
end
diff --git a/speech_utils/Makefile b/speech_utils/Makefile
new file mode 100644
index 0000000..b288322
--- /dev/null
+++ b/speech_utils/Makefile
@@ -0,0 +1,30 @@
+SHELL := /bin/bash
+BUILD_DIR := $(CURDIR)/build
+INC_PATH := $(LUA_BINDIR)/../include/
+OBJS :=
+LIBS :=
+LUA_LIBS := init.lua
+INCLUDE := -I $(LUA_INCDIR) -I $(INC_PATH) -DLUA_USE_APICHECK
+
+SUBDIR := src
+OBJ_DIR := $(BUILD_DIR)/objs
+LUA_DIR = $(INST_LUADIR)/speech_utils
+
+OBJS := $(addprefix $(OBJ_DIR)/,$(OBJS))
+LIBS := $(addprefix $(INST_LIBDIR)/,$(LIBS))
+OBJ_SUBDIR := $(addprefix $(OBJ_DIR)/,$(SUBDIR))
+LUA_SUBDIR := $(addprefix $(LUA_DIR)/,$(SUBDIR))
+LUA_LIBS := $(addprefix $(LUA_DIR)/,$(LUA_LIBS))
+LIB_PATH := $(LUA_BINDIR)/../lib
+
+build: $(OBJ_DIR) $(OBJ_SUBDIR) $(OBJS)
+install: $(LUA_DIR) $(LUA_SUBDIR) $(LUA_LIBS) $(LIBS)
+
+$(OBJ_DIR) $(LUA_DIR) $(OBJ_SUBDIR) $(LUA_SUBDIR):
+ -mkdir -p $@
+$(LUA_DIR)/%.lua: %.lua
+ cp $< $@
+$(OBJ_DIR)/%.o: %.c
+ gcc -o $@ -c $< -g $(INCLUDE) -fPIC
+clean:
+ -rm $(OBJ_DIR)/src/*.o
diff --git a/speech_utils/init.lua b/speech_utils/init.lua
new file mode 100644
index 0000000..5148664
--- /dev/null
+++ b/speech_utils/init.lua
@@ -0,0 +1,23 @@
+nerv.speech_utils = {}
+
+function nerv.speech_utils.global_transf(feat_utter, global_transf, frm_ext, gconf)
+ local res = {}
+ local step = frm_ext * 2 + 1
+ -- expand the feature
+ local expanded = gconf.cumat_type(feat_utter:nrow(), feat_utter:ncol() * step)
+ expanded:expand_frm(gconf.cumat_type.new_from_host(feat_utter), frm_ext)
+ -- rearrange the feature (``transpose'' operation in TNet)
+ local rearranged = expanded:create()
+ rearranged:rearrange_frm(expanded, step)
+ -- prepare for transf
+ local input = {rearranged}
+ local output = {rearranged:create()}
+ -- do transf
+ global_transf:init(input[1]:nrow())
+ global_transf:propagate(input, output)
+ -- trim frames
+ expanded = gconf.mmat_type(output[1]:nrow() - frm_ext * 2, output[1]:ncol())
+ output[1]:copy_toh(expanded, frm_ext, feat_utter:nrow() - frm_ext)
+ collectgarbage("collect")
+ return expanded
+end
diff --git a/speech_utils/speech_utils-scm-1.rockspec b/speech_utils/speech_utils-scm-1.rockspec
new file mode 100644
index 0000000..6477822
--- /dev/null
+++ b/speech_utils/speech_utils-scm-1.rockspec
@@ -0,0 +1,36 @@
+package = "speech_utils"
+version = "scm-1"
+source = {
+ url = "https://github.com/Determinant/nerv-speech.git"
+}
+description = {
+ summary = "Speech Recognition Utilities for Nerv",
+ detailed = [[
+ ]],
+ homepage = "https://github.com/Determinant/nerv-speech",
+ license = "BSD"
+}
+dependencies = {
+ "nerv >= scm-1",
+ "lua >= 5.1"
+}
+build = {
+ type = "make",
+ build_variables = {
+ CFLAGS="$(CFLAGS)",
+ LIBFLAG="$(LIBFLAG)",
+ LUA_LIBDIR="$(LUA_LIBDIR)",
+ LUA_BINDIR="$(LUA_BINDIR)",
+ LUA_INCDIR="$(LUA_INCDIR)",
+ INST_PREFIX="$(PREFIX)",
+ LUA="$(LUA)",
+ },
+ install_variables = {
+ LUA_BINDIR="$(LUA_BINDIR)",
+ INST_PREFIX="$(PREFIX)",
+ INST_BINDIR="$(BINDIR)",
+ INST_LIBDIR="$(LIBDIR)",
+ INST_LUADIR="$(LUADIR)",
+ INST_CONFDIR="$(CONFDIR)",
+ },
+}