4 files changed, 6 insertions, 11 deletions
diff --git a/nerv/examples/trainer.lua b/nerv/examples/trainer.lua
index f6c7a5a..caed2e2 100644
--- a/nerv/examples/trainer.lua
+++ b/nerv/examples/trainer.lua
@@ -80,6 +80,7 @@ local trainer_defaults = {
 
 local options = make_options(trainer_defaults)
 local extra_opt_spec = {
+    {"clip", nil, "number"},
     {"resume-from", nil, "string"},
     {"help", "h", "boolean", default = false, desc = "show this help information"},
     {"dir", nil, "string", desc = "specify the working directory"},
diff --git a/nerv/layer/affine.lua b/nerv/layer/affine.lua
index 1ac4681..b68cf3d 100644
--- a/nerv/layer/affine.lua
+++ b/nerv/layer/affine.lua
@@ -48,6 +48,10 @@ function MatrixParam:_update(alpha, beta)
     -- momentum gain
     local mmt_gain = 1.0 / (1.0 - gconf.momentum)
     local n = gconf.batch_size * mmt_gain
+    -- clip gradient
+    if gconf.clip then
+        self.correction_acc:clip(-gconf.clip, gconf.clip)
+    end
     -- perform update
     if gconf.momentum > 0 then
         self.correction:add(self.correction, self.correction_acc, gconf.momentum, 1.0)
diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua
index d0d5462..358b100 100644
--- a/nerv/nn/network.lua
+++ b/nerv/nn/network.lua
@@ -33,8 +33,6 @@ local network = nerv.class('nerv.Network')
 --
 -- * `network`: a `nerv.Layer` instance describing the structure of the network
 --   to be compiled
--- * `clip`: a `number` value indicating the cliping threshold (i.e. preserve
---   the values within [-clip, +clip])
 -- * `nn_act_default`: a `number` value indicating the value used for filling
 --   "holes" in activation values of a batch matrix (0 by default)
 
@@ -49,7 +47,6 @@ function network:__init(id, global_conf, network_conf)
     else
         self.mat_type = self.gconf.cumat_type
     end
-    self.clip = network_conf.clip
     self.nn_act_default = network_conf.nn_act_default
     if self.nn_act_default == nil then
         self.nn_act_default = 0
@@ -660,13 +657,6 @@ function network:back_propagate()
         local t, id = self.queue[i].chunk, self.queue[i].id
         if t <= self.max_length then
             self.layers[id]:back_propagate(self.err_input[t][id], self.err_output[t][id], self.input[t][id], self.output[t][id], t)
-            -- gradient clip
-            if self.clip ~= nil then
-                local dim_in, _ = self.layers[id]:get_dim()
-                for j = 1, #dim_in do
-                    self.err_output[t][id][j]:clip(-self.clip, self.clip)
-                end
-            end
         end
         -- flush border gradient
         if self.flush[t][id].timestamp == self.timestamp then
diff --git a/nerv/nn/trainer.lua b/nerv/nn/trainer.lua
index 44390ea..8357c10 100644
--- a/nerv/nn/trainer.lua
+++ b/nerv/nn/trainer.lua
@@ -25,7 +25,7 @@ function trainer:__init(gconf)
     self.input_order = self:get_input_order()
     self.network = nerv.Network('network', gconf,
                                 {network = graph,
-                                 clip = gconf.clip})
+                                 nn_act_default = gconf.nn_act_default})
     local network = self.network
     network:init(gconf.batch_size, gconf.chunk_size)