diff options
Diffstat (limited to 'nerv/examples/ptb')
-rw-r--r-- | nerv/examples/ptb/main.lua | 118 | ||||
-rw-r--r-- | nerv/examples/ptb/reader.lua | 67 | ||||
-rw-r--r-- | nerv/examples/ptb/select_linear.lua | 63 |
3 files changed, 248 insertions, 0 deletions
diff --git a/nerv/examples/ptb/main.lua b/nerv/examples/ptb/main.lua new file mode 100644 index 0000000..688716b --- /dev/null +++ b/nerv/examples/ptb/main.lua @@ -0,0 +1,118 @@ +nerv.include('reader.lua') +nerv.include('select_linear.lua') + +gconf = { + chunk_size = 5, + dropout_rate = 0, + lrate = 1.5, + wcost = 1e-5, + max_iter = 3, + clip = 5, + momentum = 0, + batch_size = 200, + test = true, +} + +local hidden_size = 300 +local vocab_size = 10000 +local layer_num = 1 +local dropout_rate = 0.5 +local trainer = nerv.Trainer + +function trainer:make_layer_repo(param_repo) + local layers = { + ['nerv.LSTMLayer'] = {}, + ['nerv.DropoutLayer'] = {}, + ['nerv.SelectLinearLayer'] = { + ['select'] = {dim_in = {1}, dim_out = {hidden_size}, vocab = vocab_size, pr = param_repo}, + }, + ['nerv.AffineLayer'] = { + output = {dim_in = {hidden_size}, dim_out = {vocab_size}, pr = param_repo}, + }, + ['nerv.SoftmaxCELayer'] = { + softmax = {dim_in = {vocab_size, 1}, dim_out = {1}, compressed = true}, + }, + } + for i = 1, layer_num do + layers['nerv.LSTMLayer']['lstm' .. i] = {dim_in = {hidden_size}, dim_out = {hidden_size}, pr = param_repo} + layers['nerv.DropoutLayer']['dropout' .. i] = {dim_in = {hidden_size}, dim_out = {hidden_size}} + end + return nerv.LayerRepo(layers, param_repo, gconf) +end + +function trainer:get_network(layer_repo) + local connections = { + {'<input>[1]', 'select[1]', 0}, + {'select[1]', 'lstm1[1]', 0}, + {'dropout' .. layer_num .. '[1]', 'output[1]', 0}, + {'output[1]', 'softmax[1]', 0}, + {'<input>[2]', 'softmax[2]', 0}, + {'softmax[1]', '<output>[1]', 0}, + } + for i = 1, layer_num do + table.insert(connections, {'lstm' .. i .. '[1]', 'dropout' .. i .. '[1]', 0}) + if i < 1 then + table.insert(connections, {'dropout' .. (i - 1) .. '[1]', 'lstm' .. i .. '[1]', 0}) + end + end + return nerv.GraphLayer('graph', gconf, {dim_in = {1, 1}, dim_out = {1}, layer_repo = layer_repo, connections = connections}) +end + +function trainer:get_input_order() + return {'input', 'label'} +end + +function trainer:get_readers(dataset) + local data_path = 'nerv/nerv/examples/lmptb/PTBdata/' + local vocab_file = data_path .. 'vocab' + local train_file = data_path .. 'ptb.train.txt.adds' + local cv_file = data_path .. 'ptb.valid.txt.adds' + local test_file = data_path .. 'ptb.test.txt.adds' + local reader + if dataset == 'train' then + reader = nerv.Reader(vocab_file, train_file) + elseif dataset == 'validate' then + reader = nerv.Reader(vocab_file, cv_file) + elseif dataset == 'test' then + reader = nerv.Reader(vocab_file, test_file) + else + nerv.error('no such dataset') + end + return {{reader = reader, data = {input = 1, label = 1}}} +end + +local total_err +local total_frame + +function trainer:get_error() + return math.pow(10, -total_err / total_frame) +end + +function trainer:epoch_preprocess(dataset, do_train) + if dataset == 'train' then + gconf.dropout_rate = dropout_rate + nerv.info('set dropout rate to %f', dropout_rate) + end + if dataset == 'validate' then + gconf.dropout_rate = 0 + nerv.info('set dropout rate to 0') + end + if dataset == 'test' then + gconf.dropout_rate = 0 + nerv.info('set dropout rate to 0') + end + total_err = 0 + total_frame = 0 +end + +function trainer:mini_batch_middleprocess(cnt, info) + for t = 1, gconf.chunk_size do + local tmp = info.output[1][t]:new_to_host() + for i = 1, gconf.batch_size do + total_err = total_err + math.log10(math.exp(tmp[i - 1][0])) + end + end + for i = 1, gconf.batch_size do + total_frame = total_frame + info.seq_length[i] + end +end diff --git a/nerv/examples/ptb/reader.lua b/nerv/examples/ptb/reader.lua new file mode 100644 index 0000000..70c0c97 --- /dev/null +++ b/nerv/examples/ptb/reader.lua @@ -0,0 +1,67 @@ +local Reader = nerv.class('nerv.Reader') + +function Reader:__init(vocab_file, input_file) + self:get_vocab(vocab_file) + self:get_seq(input_file) + self.offset = 1 +end + +function Reader:get_vocab(vocab_file) + local f = io.open(vocab_file, 'r') + local id = 0 + self.vocab = {} + while true do + local word = f:read() + if word == nil then + break + end + self.vocab[word] = id + id = id + 1 + end + self.size = id +end + +function Reader:split(s, t) + local ret = {} + for x in (s .. t):gmatch('(.-)' .. t) do + table.insert(ret, x) + end + return ret +end + +function Reader:get_seq(input_file) + local f = io.open(input_file, 'r') + self.seq = {} + -- while true do + for i = 1, 26 do + local seq = f:read() + if seq == nil then + break + end + seq = self:split(seq, ' ') + local tmp = {} + for i = 1, #seq do + if seq[i] ~= '' then + table.insert(tmp, self.vocab[seq[i]]) + end + end + table.insert(self.seq, tmp) + end +end + +function Reader:get_data() + if self.offset > #self.seq then + return nil + end + local tmp = self.seq[self.offset] + local res = { + input = nerv.MMatrixFloat(#tmp - 1, 1), + label = nerv.MMatrixFloat(#tmp - 1, 1), + } + for i = 1, #tmp - 1 do + res.input[i - 1][0] = tmp[i] + res.label[i - 1][0] = tmp[i + 1] + end + self.offset = self.offset + 1 + return res +end diff --git a/nerv/examples/ptb/select_linear.lua b/nerv/examples/ptb/select_linear.lua new file mode 100644 index 0000000..42778f8 --- /dev/null +++ b/nerv/examples/ptb/select_linear.lua @@ -0,0 +1,63 @@ +local SL = nerv.class('nerv.SelectLinearLayer', 'nerv.Layer') + +--id: string +--global_conf: table +--layer_conf: table +--Get Parameters +function SL:__init(id, global_conf, layer_conf) + nerv.Layer.__init(self, id, global_conf, layer_conf) + + self.vocab = layer_conf.vocab + + self:check_dim_len(1, 1) + self:bind_params() +end + +function SL:bind_params() + self.ltp = self:find_param("ltp", self.lconf, self.gconf, nerv.LinearTransParam, {self.vocab, self.dim_out[1]}) --layer_conf.ltp +end + +--Check parameter +function SL:init(batch_size) + if (self.dim_in[1] ~= 1) then --one word id + nerv.error("mismatching dimensions of ltp and input") + end + if (self.dim_out[1] ~= self.ltp.trans:ncol()) then + nerv.error("mismatching dimensions of bp and output") + end + + self.batch_size = bath_size + self.ltp:train_init() +end + +function SL:update() + --use this to produce reproducable result, don't forget to set the dropout to zero! + --for i = 1, input[1]:nrow(), 1 do + -- local word_vec = self.ltp.trans[input[1][i - 1][0]] + -- word_vec:add(word_vec, bp_err[1][i - 1], 1, - self.gconf.lrate / self.gconf.batch_size) + --end + + --I tried the update_select_rows kernel which uses atomicAdd, but it generates unreproducable result + self.ltp:update_by_err_input() +end + +function SL:propagate(input, output) + --for i = 0, input[1]:ncol() - 1, 1 do + -- if (input[1][0][i] > 0) then + -- output[1][i]:copy_fromd(self.ltp.trans[input[1][0][i]]) + -- else + -- output[1][i]:fill(0) + -- end + --end + output[1]:copy_rows_fromd_by_colidx(self.ltp.trans, input[1]) +end + +function SL:back_propagate(bp_err, next_bp_err, input, output) + --input is compressed, do nothing + self.ltp:back_propagate_by_err_input(bp_err[1], input[1]:decompress(self.vocab)) +end + +function SL:get_params() + local paramRepo = nerv.ParamRepo({self.ltp}, self.loc_type) + return paramRepo +end |