1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
|
require 'lfs'
require 'pl'
local function build_trainer(ifname)
local param_repo = nerv.ParamRepo()
param_repo:import(ifname, nil, gconf)
local layer_repo = make_layer_repo(param_repo)
local network = get_network(layer_repo)
local global_transf = get_global_transf(layer_repo)
local input_order = get_input_order()
local mat_type
if gconf.use_cpu then
mat_type = gconf.mmat_type
else
mat_type = gconf.cumat_type
end
local iterative_trainer = function (prefix, scp_file, bp)
gconf.randomize = bp
-- build buffer
local buffer = make_buffer(make_readers(scp_file, layer_repo))
-- initialize the network
network:init(gconf.batch_size)
gconf.cnt = 0
err_input = {mat_type(gconf.batch_size, 1)}
err_input[1]:fill(1)
for data in buffer.get_data, buffer do
-- prine stat periodically
gconf.cnt = gconf.cnt + 1
if gconf.cnt == 1000 then
print_stat(layer_repo)
mat_type.print_profile()
mat_type.clear_profile()
gconf.cnt = 0
-- break
end
local input = {}
-- if gconf.cnt == 1000 then break end
for i, e in ipairs(input_order) do
local id = e.id
if data[id] == nil then
nerv.error("input data %s not found", id)
end
local transformed
if e.global_transf then
transformed = nerv.speech_utils.global_transf(data[id],
global_transf,
gconf.frm_ext or 0, 0,
gconf)
else
transformed = data[id]
end
table.insert(input, transformed)
end
local output = {mat_type(gconf.batch_size, 1)}
err_output = {}
for i = 1, #input do
table.insert(err_output, input[i]:create())
end
network:propagate(input, output)
if bp then
network:back_propagate(err_input, err_output, input, output)
network:update(err_input, input, output)
end
-- collect garbage in-time to save GPU memory
collectgarbage("collect")
end
print_stat(layer_repo)
mat_type.print_profile()
mat_type.clear_profile()
if (not bp) and prefix ~= nil then
nerv.info("writing back...")
local fname = string.format("%s_cv%.3f.nerv",
prefix, get_accuracy(layer_repo))
network:get_params():export(fname, nil)
end
return get_accuracy(layer_repo)
end
return iterative_trainer
end
local function check_and_add_defaults(spec)
for k, v in pairs(spec) do
gconf[k] = opts[string.gsub(k, '_', '-')].val or gconf[k] or v
end
end
local function make_options(spec)
local options = {}
for k, v in pairs(spec) do
table.insert(options,
{string.gsub(k, '_', '-'), nil, type(v), default = v})
end
return options
end
local function print_help(options)
nerv.printf("Usage: <asr_trainer.lua> [options] network_config.lua\n")
nerv.print_usage(options)
end
local function print_gconf()
local key_maxlen = 0
for k, v in pairs(gconf) do
key_maxlen = math.max(key_maxlen, #k or 0)
end
local function pattern_gen()
return string.format("%%-%ds = %%s\n", key_maxlen)
end
nerv.info("ready to train with the following gconf settings:")
nerv.printf(pattern_gen(), "Key", "Value")
for k, v in pairs(gconf) do
nerv.printf(pattern_gen(), k or "", v or "")
end
end
local trainer_defaults = {
lrate = 0.8,
batch_size = 256,
buffer_size = 81920,
wcost = 1e-6,
momentum = 0.9,
start_halving_inc = 0.5,
halving_factor = 0.6,
end_halving_inc = 0.1,
min_iter = 1,
max_iter = 20,
min_halving = 5,
do_halving = false,
tr_scp = nil,
cv_scp = nil,
cumat_type = nerv.CuMatrixFloat,
mmat_type = nerv.MMatrixFloat,
debug = false
}
local options = make_options(trainer_defaults)
table.insert(options, {"help", "h", "boolean",
default = false, desc = "show this help information"})
table.insert(options, {"dir", nil, "string",
default = nil, desc = "specify the working directory"})
arg, opts = nerv.parse_args(arg, options)
if #arg < 1 or opts["help"].val then
print_help(options)
return
end
dofile(arg[1])
--[[
Rule: command-line option overrides network config overrides trainer default.
Note: config key like aaa_bbbb_cc could be overriden by specifying
--aaa-bbbb-cc to command-line arguments.
]]--
check_and_add_defaults(trainer_defaults)
local pf0 = gconf.initialized_param
local trainer = build_trainer(pf0)
local accu_best = trainer(nil, gconf.cv_scp, false)
local date_pattern = "%Y%m%d%H%M%S"
local logfile_name = "log"
local working_dir = opts["dir"].val or string.format("nerv_%s", os.date(date_pattern))
print_gconf()
if not lfs.mkdir(working_dir) then
nerv.error("[asr_trainer] working directory already exists")
end
-- copy the network config
dir.copyfile(arg[1], working_dir)
-- set logfile path
nerv.set_logfile(path.join(working_dir, logfile_name))
path.chdir(working_dir)
nerv.info("initial cross validation: %.3f", accu_best)
for i = 1, gconf.max_iter do
nerv.info("[NN] begin iteration %d with lrate = %.6f", i, gconf.lrate)
local accu_tr = trainer(nil, gconf.tr_scp, true)
nerv.info("[TR] training set %d: %.3f", i, accu_tr)
local accu_new = trainer(
string.format("%s_%s_iter_%d_lr%f_tr%.3f",
string.gsub(
(string.gsub(pf0[1], "(.*/)(.*)", "%2")),
"(.*)%..*", "%1"),
os.date(date_pattern),
i, gconf.lrate,
accu_tr),
gconf.cv_scp, false)
nerv.info("[CV] cross validation %d: %.3f", i, accu_new)
-- TODO: revert the weights
local accu_diff = accu_new - accu_best
if gconf.do_halving and
accu_diff < gconf.end_halving_inc and
i > gconf.min_iter then
break
end
if accu_diff < gconf.start_halving_inc and
i >= gconf.min_halving then
gconf.do_halving = true
end
if gconf.do_halving then
gconf.lrate = gconf.lrate * gconf.halving_factor
end
if accu_new > accu_best then
accu_best = accu_new
end
-- nerv.Matrix.print_profile()
end
|