1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
|
require 'libkaldiio'
require 'speech_utils'
local KaldiReader = nerv.class("nerv.KaldiReader", "nerv.DataReader")
function KaldiReader:__init(global_conf, reader_conf)
self.feat_id = reader_conf.id
self.frm_ext = reader_conf.frm_ext
self.need_key = reader_conf.need_key -- for sequence training
self.gconf = global_conf
if self.gconf.use_cpu then
self.mat_type = self.gconf.mmat_type
else
self.mat_type = self.gconf.cumat_type
end
self.debug = global_conf.debug
if self.debug == nil then
self.debug = false
end
self.feat_repo = nerv.KaldiFeatureRepo(reader_conf.feature_rspecifier)
self.lab_repo = {}
if reader_conf.mlfs then
for id, mlf_spec in pairs(reader_conf.mlfs) do
if mlf_spec.format == nil then
nerv.error("format spec is expected for label %s", id)
end
self.lab_repo[id] = nerv.KaldiLabelRepo(mlf_spec.targets_rspecifier,
mlf_spec.format)
end
end
self.lookup_repo = {}
if reader_conf.lookup then
for id, lookup_spec in pairs(reader_conf.lookup) do
if lookup_spec.map_rspecifier == nil then
nerv.error("map spec is expected for lookup %s", id)
end
self.lookup_repo[id] = nerv.KaldiLookupFeatureRepo(lookup_spec.targets_rspecifier,
lookup_spec.map_rspecifier)
end
end
end
function KaldiReader:get_data()
local fail_to_read_alignment = false
if self.feat_repo:is_end() then
return nil
end
local res = {}
-- read Kaldi feature
local raw = self.feat_repo:cur_utter(self.debug)
if not self.gconf.use_cpu then
raw = self.gconf.cumat_type.new_from_host(raw)
end
local rearranged
if self.frm_ext and self.frm_ext > 0 then
local step = self.frm_ext * 2 + 1
-- expand the feature
local expanded = self.mat_type(raw:nrow(), raw:ncol() * step)
expanded:expand_frm(raw, self.frm_ext)
-- rearrange the feature (``transpose'' operation in TNet)
if self.gconf.rearrange then
rearranged = expanded:create()
rearranged:rearrange_frm(expanded, step)
else
rearranged = expanded
end
else
rearranged = raw
end
local feat_utter
if self.gconf.frm_trim then
feat_utter = self.gconf.mmat_type(rearranged:nrow() - self.gconf.frm_trim * 2, rearranged:ncol())
rearranged:copy_toh(feat_utter, self.gconf.frm_trim, rearranged:nrow() - self.gconf.frm_trim)
else
if self.gconf.use_cpu then
feat_utter = rearranged
else
feat_utter = self.gconf.mmat_type(rearranged:nrow(), rearranged:ncol())
rearranged:copy_toh(feat_utter)
end
end
res[self.feat_id] = feat_utter
if self.need_key then
res["key"] = self.feat_repo:key()
end
-- add corresponding labels
for id, repo in pairs(self.lab_repo) do
local lab_utter = repo:get_utter(self.feat_repo,
feat_utter:nrow(),
self.debug)
if lab_utter == nil then
fail_to_read_alignment = true
end
res[id] = lab_utter
end
-- add corresponding lookup features
for id, repo in pairs(self.lookup_repo) do
local lookup_utter = repo:get_utter(self.feat_repo,
feat_utter:nrow(),
self.debug)
local nrow = lookup_utter:nrow()
if nrow < feat_utter:nrow() then
-- repeat the last frame
local nlu = lookup_utter:create(feat_utter:nrow())
nlu:copy_from(lookup_utter, 0, nrow)
row = nlu[nrow - 1]
for i = 0, feat_utter:nrow() - nrow - 1 do
nlu:copy_from(nlu, nrow - 1, nrow, nrow + i)
end
lookup_utter = nlu
end
res[id] = lookup_utter
end
-- move the pointer to next
self.feat_repo:next()
collectgarbage("collect")
if fail_to_read_alignment then
nerv.info("[kaldi] utterance %s alignment not found, skip it.", self.feat_repo:key())
res = self:get_data()
end
return res
end
|