aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQi Liu <[email protected]>2016-05-09 21:02:57 +0800
committerQi Liu <[email protected]>2016-05-09 21:02:57 +0800
commit4585970021f75d4c9e7154fc1681a80efa0f48ab (patch)
treeb2773916f1eb836ac3ea0ace5080b881ad2960ba
parent03439902dbd339cfbbc684b6fcc6b1810fa02ede (diff)
clip on gradient now
-rw-r--r--nerv/examples/trainer.lua1
-rw-r--r--nerv/layer/affine.lua4
-rw-r--r--nerv/nn/network.lua10
-rw-r--r--nerv/nn/trainer.lua2
4 files changed, 6 insertions, 11 deletions
diff --git a/nerv/examples/trainer.lua b/nerv/examples/trainer.lua
index f6c7a5a..caed2e2 100644
--- a/nerv/examples/trainer.lua
+++ b/nerv/examples/trainer.lua
@@ -80,6 +80,7 @@ local trainer_defaults = {
local options = make_options(trainer_defaults)
local extra_opt_spec = {
+ {"clip", nil, "number"},
{"resume-from", nil, "string"},
{"help", "h", "boolean", default = false, desc = "show this help information"},
{"dir", nil, "string", desc = "specify the working directory"},
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua
index 1ac4681..b68cf3d 100644
--- a/nerv/layer/affine.lua
+++ b/nerv/layer/affine.lua
@@ -48,6 +48,10 @@ function MatrixParam:_update(alpha, beta)
-- momentum gain
local mmt_gain = 1.0 / (1.0 - gconf.momentum)
local n = gconf.batch_size * mmt_gain
+ -- clip gradient
+ if gconf.clip then
+ self.correction_acc:clip(-gconf.clip, gconf.clip)
+ end
-- perform update
if gconf.momentum > 0 then
self.correction:add(self.correction, self.correction_acc, gconf.momentum, 1.0)
diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua
index d0d5462..358b100 100644
--- a/nerv/nn/network.lua
+++ b/nerv/nn/network.lua
@@ -33,8 +33,6 @@ local network = nerv.class('nerv.Network')
--
-- * `network`: a `nerv.Layer` instance describing the structure of the network
-- to be compiled
--- * `clip`: a `number` value indicating the cliping threshold (i.e. preserve
--- the values within [-clip, +clip])
-- * `nn_act_default`: a `number` value indicating the value used for filling
-- "holes" in activation values of a batch matrix (0 by default)
@@ -49,7 +47,6 @@ function network:__init(id, global_conf, network_conf)
else
self.mat_type = self.gconf.cumat_type
end
- self.clip = network_conf.clip
self.nn_act_default = network_conf.nn_act_default
if self.nn_act_default == nil then
self.nn_act_default = 0
@@ -660,13 +657,6 @@ function network:back_propagate()
local t, id = self.queue[i].chunk, self.queue[i].id
if t <= self.max_length then
self.layers[id]:back_propagate(self.err_input[t][id], self.err_output[t][id], self.input[t][id], self.output[t][id], t)
- -- gradient clip
- if self.clip ~= nil then
- local dim_in, _ = self.layers[id]:get_dim()
- for j = 1, #dim_in do
- self.err_output[t][id][j]:clip(-self.clip, self.clip)
- end
- end
end
-- flush border gradient
if self.flush[t][id].timestamp == self.timestamp then
diff --git a/nerv/nn/trainer.lua b/nerv/nn/trainer.lua
index 44390ea..8357c10 100644
--- a/nerv/nn/trainer.lua
+++ b/nerv/nn/trainer.lua
@@ -25,7 +25,7 @@ function trainer:__init(gconf)
self.input_order = self:get_input_order()
self.network = nerv.Network('network', gconf,
{network = graph,
- clip = gconf.clip})
+ nn_act_default = gconf.nn_act_default})
local network = self.network
network:init(gconf.batch_size, gconf.chunk_size)