aboutsummaryrefslogtreecommitdiff
path: root/nerv/examples/ptb
diff options
context:
space:
mode:
Diffstat (limited to 'nerv/examples/ptb')
-rw-r--r--nerv/examples/ptb/main.lua118
-rw-r--r--nerv/examples/ptb/reader.lua67
-rw-r--r--nerv/examples/ptb/select_linear.lua63
3 files changed, 248 insertions, 0 deletions
diff --git a/nerv/examples/ptb/main.lua b/nerv/examples/ptb/main.lua
new file mode 100644
index 0000000..688716b
--- /dev/null
+++ b/nerv/examples/ptb/main.lua
@@ -0,0 +1,118 @@
+nerv.include('reader.lua')
+nerv.include('select_linear.lua')
+
+gconf = {
+ chunk_size = 5,
+ dropout_rate = 0,
+ lrate = 1.5,
+ wcost = 1e-5,
+ max_iter = 3,
+ clip = 5,
+ momentum = 0,
+ batch_size = 200,
+ test = true,
+}
+
+local hidden_size = 300
+local vocab_size = 10000
+local layer_num = 1
+local dropout_rate = 0.5
+local trainer = nerv.Trainer
+
+function trainer:make_layer_repo(param_repo)
+ local layers = {
+ ['nerv.LSTMLayer'] = {},
+ ['nerv.DropoutLayer'] = {},
+ ['nerv.SelectLinearLayer'] = {
+ ['select'] = {dim_in = {1}, dim_out = {hidden_size}, vocab = vocab_size, pr = param_repo},
+ },
+ ['nerv.AffineLayer'] = {
+ output = {dim_in = {hidden_size}, dim_out = {vocab_size}, pr = param_repo},
+ },
+ ['nerv.SoftmaxCELayer'] = {
+ softmax = {dim_in = {vocab_size, 1}, dim_out = {1}, compressed = true},
+ },
+ }
+ for i = 1, layer_num do
+ layers['nerv.LSTMLayer']['lstm' .. i] = {dim_in = {hidden_size}, dim_out = {hidden_size}, pr = param_repo}
+ layers['nerv.DropoutLayer']['dropout' .. i] = {dim_in = {hidden_size}, dim_out = {hidden_size}}
+ end
+ return nerv.LayerRepo(layers, param_repo, gconf)
+end
+
+function trainer:get_network(layer_repo)
+ local connections = {
+ {'<input>[1]', 'select[1]', 0},
+ {'select[1]', 'lstm1[1]', 0},
+ {'dropout' .. layer_num .. '[1]', 'output[1]', 0},
+ {'output[1]', 'softmax[1]', 0},
+ {'<input>[2]', 'softmax[2]', 0},
+ {'softmax[1]', '<output>[1]', 0},
+ }
+ for i = 1, layer_num do
+ table.insert(connections, {'lstm' .. i .. '[1]', 'dropout' .. i .. '[1]', 0})
+ if i < 1 then
+ table.insert(connections, {'dropout' .. (i - 1) .. '[1]', 'lstm' .. i .. '[1]', 0})
+ end
+ end
+ return nerv.GraphLayer('graph', gconf, {dim_in = {1, 1}, dim_out = {1}, layer_repo = layer_repo, connections = connections})
+end
+
+function trainer:get_input_order()
+ return {'input', 'label'}
+end
+
+function trainer:get_readers(dataset)
+ local data_path = 'nerv/nerv/examples/lmptb/PTBdata/'
+ local vocab_file = data_path .. 'vocab'
+ local train_file = data_path .. 'ptb.train.txt.adds'
+ local cv_file = data_path .. 'ptb.valid.txt.adds'
+ local test_file = data_path .. 'ptb.test.txt.adds'
+ local reader
+ if dataset == 'train' then
+ reader = nerv.Reader(vocab_file, train_file)
+ elseif dataset == 'validate' then
+ reader = nerv.Reader(vocab_file, cv_file)
+ elseif dataset == 'test' then
+ reader = nerv.Reader(vocab_file, test_file)
+ else
+ nerv.error('no such dataset')
+ end
+ return {{reader = reader, data = {input = 1, label = 1}}}
+end
+
+local total_err
+local total_frame
+
+function trainer:get_error()
+ return math.pow(10, -total_err / total_frame)
+end
+
+function trainer:epoch_preprocess(dataset, do_train)
+ if dataset == 'train' then
+ gconf.dropout_rate = dropout_rate
+ nerv.info('set dropout rate to %f', dropout_rate)
+ end
+ if dataset == 'validate' then
+ gconf.dropout_rate = 0
+ nerv.info('set dropout rate to 0')
+ end
+ if dataset == 'test' then
+ gconf.dropout_rate = 0
+ nerv.info('set dropout rate to 0')
+ end
+ total_err = 0
+ total_frame = 0
+end
+
+function trainer:mini_batch_middleprocess(cnt, info)
+ for t = 1, gconf.chunk_size do
+ local tmp = info.output[1][t]:new_to_host()
+ for i = 1, gconf.batch_size do
+ total_err = total_err + math.log10(math.exp(tmp[i - 1][0]))
+ end
+ end
+ for i = 1, gconf.batch_size do
+ total_frame = total_frame + info.seq_length[i]
+ end
+end
diff --git a/nerv/examples/ptb/reader.lua b/nerv/examples/ptb/reader.lua
new file mode 100644
index 0000000..70c0c97
--- /dev/null
+++ b/nerv/examples/ptb/reader.lua
@@ -0,0 +1,67 @@
+local Reader = nerv.class('nerv.Reader')
+
+function Reader:__init(vocab_file, input_file)
+ self:get_vocab(vocab_file)
+ self:get_seq(input_file)
+ self.offset = 1
+end
+
+function Reader:get_vocab(vocab_file)
+ local f = io.open(vocab_file, 'r')
+ local id = 0
+ self.vocab = {}
+ while true do
+ local word = f:read()
+ if word == nil then
+ break
+ end
+ self.vocab[word] = id
+ id = id + 1
+ end
+ self.size = id
+end
+
+function Reader:split(s, t)
+ local ret = {}
+ for x in (s .. t):gmatch('(.-)' .. t) do
+ table.insert(ret, x)
+ end
+ return ret
+end
+
+function Reader:get_seq(input_file)
+ local f = io.open(input_file, 'r')
+ self.seq = {}
+ -- while true do
+ for i = 1, 26 do
+ local seq = f:read()
+ if seq == nil then
+ break
+ end
+ seq = self:split(seq, ' ')
+ local tmp = {}
+ for i = 1, #seq do
+ if seq[i] ~= '' then
+ table.insert(tmp, self.vocab[seq[i]])
+ end
+ end
+ table.insert(self.seq, tmp)
+ end
+end
+
+function Reader:get_data()
+ if self.offset > #self.seq then
+ return nil
+ end
+ local tmp = self.seq[self.offset]
+ local res = {
+ input = nerv.MMatrixFloat(#tmp - 1, 1),
+ label = nerv.MMatrixFloat(#tmp - 1, 1),
+ }
+ for i = 1, #tmp - 1 do
+ res.input[i - 1][0] = tmp[i]
+ res.label[i - 1][0] = tmp[i + 1]
+ end
+ self.offset = self.offset + 1
+ return res
+end
diff --git a/nerv/examples/ptb/select_linear.lua b/nerv/examples/ptb/select_linear.lua
new file mode 100644
index 0000000..42778f8
--- /dev/null
+++ b/nerv/examples/ptb/select_linear.lua
@@ -0,0 +1,63 @@
+local SL = nerv.class('nerv.SelectLinearLayer', 'nerv.Layer')
+
+--id: string
+--global_conf: table
+--layer_conf: table
+--Get Parameters
+function SL:__init(id, global_conf, layer_conf)
+ nerv.Layer.__init(self, id, global_conf, layer_conf)
+
+ self.vocab = layer_conf.vocab
+
+ self:check_dim_len(1, 1)
+ self:bind_params()
+end
+
+function SL:bind_params()
+ self.ltp = self:find_param("ltp", self.lconf, self.gconf, nerv.LinearTransParam, {self.vocab, self.dim_out[1]}) --layer_conf.ltp
+end
+
+--Check parameter
+function SL:init(batch_size)
+ if (self.dim_in[1] ~= 1) then --one word id
+ nerv.error("mismatching dimensions of ltp and input")
+ end
+ if (self.dim_out[1] ~= self.ltp.trans:ncol()) then
+ nerv.error("mismatching dimensions of bp and output")
+ end
+
+ self.batch_size = bath_size
+ self.ltp:train_init()
+end
+
+function SL:update()
+ --use this to produce reproducable result, don't forget to set the dropout to zero!
+ --for i = 1, input[1]:nrow(), 1 do
+ -- local word_vec = self.ltp.trans[input[1][i - 1][0]]
+ -- word_vec:add(word_vec, bp_err[1][i - 1], 1, - self.gconf.lrate / self.gconf.batch_size)
+ --end
+
+ --I tried the update_select_rows kernel which uses atomicAdd, but it generates unreproducable result
+ self.ltp:update_by_err_input()
+end
+
+function SL:propagate(input, output)
+ --for i = 0, input[1]:ncol() - 1, 1 do
+ -- if (input[1][0][i] > 0) then
+ -- output[1][i]:copy_fromd(self.ltp.trans[input[1][0][i]])
+ -- else
+ -- output[1][i]:fill(0)
+ -- end
+ --end
+ output[1]:copy_rows_fromd_by_colidx(self.ltp.trans, input[1])
+end
+
+function SL:back_propagate(bp_err, next_bp_err, input, output)
+ --input is compressed, do nothing
+ self.ltp:back_propagate_by_err_input(bp_err[1], input[1]:decompress(self.vocab))
+end
+
+function SL:get_params()
+ local paramRepo = nerv.ParamRepo({self.ltp}, self.loc_type)
+ return paramRepo
+end