From 4585970021f75d4c9e7154fc1681a80efa0f48ab Mon Sep 17 00:00:00 2001 From: Qi Liu Date: Mon, 9 May 2016 21:02:57 +0800 Subject: clip on gradient now --- nerv/examples/trainer.lua | 1 + nerv/layer/affine.lua | 4 ++++ nerv/nn/network.lua | 10 ---------- nerv/nn/trainer.lua | 2 +- 4 files changed, 6 insertions(+), 11 deletions(-) diff --git a/nerv/examples/trainer.lua b/nerv/examples/trainer.lua index f6c7a5a..caed2e2 100644 --- a/nerv/examples/trainer.lua +++ b/nerv/examples/trainer.lua @@ -80,6 +80,7 @@ local trainer_defaults = { local options = make_options(trainer_defaults) local extra_opt_spec = { + {"clip", nil, "number"}, {"resume-from", nil, "string"}, {"help", "h", "boolean", default = false, desc = "show this help information"}, {"dir", nil, "string", desc = "specify the working directory"}, diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua index 1ac4681..b68cf3d 100644 --- a/nerv/layer/affine.lua +++ b/nerv/layer/affine.lua @@ -48,6 +48,10 @@ function MatrixParam:_update(alpha, beta) -- momentum gain local mmt_gain = 1.0 / (1.0 - gconf.momentum) local n = gconf.batch_size * mmt_gain + -- clip gradient + if gconf.clip then + self.correction_acc:clip(-gconf.clip, gconf.clip) + end -- perform update if gconf.momentum > 0 then self.correction:add(self.correction, self.correction_acc, gconf.momentum, 1.0) diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua index d0d5462..358b100 100644 --- a/nerv/nn/network.lua +++ b/nerv/nn/network.lua @@ -33,8 +33,6 @@ local network = nerv.class('nerv.Network') -- -- * `network`: a `nerv.Layer` instance describing the structure of the network -- to be compiled --- * `clip`: a `number` value indicating the cliping threshold (i.e. preserve --- the values within [-clip, +clip]) -- * `nn_act_default`: a `number` value indicating the value used for filling -- "holes" in activation values of a batch matrix (0 by default) @@ -49,7 +47,6 @@ function network:__init(id, global_conf, network_conf) else self.mat_type = self.gconf.cumat_type end - self.clip = network_conf.clip self.nn_act_default = network_conf.nn_act_default if self.nn_act_default == nil then self.nn_act_default = 0 @@ -660,13 +657,6 @@ function network:back_propagate() local t, id = self.queue[i].chunk, self.queue[i].id if t <= self.max_length then self.layers[id]:back_propagate(self.err_input[t][id], self.err_output[t][id], self.input[t][id], self.output[t][id], t) - -- gradient clip - if self.clip ~= nil then - local dim_in, _ = self.layers[id]:get_dim() - for j = 1, #dim_in do - self.err_output[t][id][j]:clip(-self.clip, self.clip) - end - end end -- flush border gradient if self.flush[t][id].timestamp == self.timestamp then diff --git a/nerv/nn/trainer.lua b/nerv/nn/trainer.lua index 44390ea..8357c10 100644 --- a/nerv/nn/trainer.lua +++ b/nerv/nn/trainer.lua @@ -25,7 +25,7 @@ function trainer:__init(gconf) self.input_order = self:get_input_order() self.network = nerv.Network('network', gconf, {network = graph, - clip = gconf.clip}) + nn_act_default = gconf.nn_act_default}) local network = self.network network:init(gconf.batch_size, gconf.chunk_size) -- cgit v1.2.3-70-g09d2