4 files changed, 30 insertions, 9 deletions
diff --git a/Makefile b/Makefile
index 3995d21..9c9c86c 100644
--- a/Makefile
+++ b/Makefile
@@ -27,7 +27,7 @@ export BLAS_LDFLAGS
 		Penlight
 
 all: nerv
-submodule: luajit luajit Penlight
+submodule: luajit luarocks Penlight
 luajit:
 	PREFIX=$(PREFIX) ./tools/build_luajit.sh
 luarocks:
diff --git a/nerv/examples/network_debug/config.lua b/nerv/examples/network_debug/config.lua
index 093bde2..e20d5a9 100644
--- a/nerv/examples/network_debug/config.lua
+++ b/nerv/examples/network_debug/config.lua
@@ -12,7 +12,7 @@ function get_global_conf()
         layer_num = 1,
         chunk_size = 15,
         batch_size = 20,
-        max_iter = 1,
+        max_iter = 35,
         param_random = function() return (math.random() / 5 - 0.1) end,
         dropout_rate = 0.5,
         timer = nerv.Timer(),
diff --git a/nerv/layer/duplicate.lua b/nerv/layer/duplicate.lua
index 137472b..2621cdf 100644
--- a/nerv/layer/duplicate.lua
+++ b/nerv/layer/duplicate.lua
@@ -20,10 +20,7 @@ function DuplicateLayer:batch_resize()
 end
 
 function DuplicateLayer:propagate(input, output)
-    for i = 1, #self.dim_out do
-        output[i]:copy_from(input[1])
-        -- FIXME: use reference copy to speed up
-    end
+    -- do nothing, use reference copy in nn/network.lua
 end
 
 function DuplicateLayer:back_propagate(bp_err, next_bp_err)
diff --git a/nerv/nn/network.lua b/nerv/nn/network.lua
index 910cdad..b06028e 100644
--- a/nerv/nn/network.lua
+++ b/nerv/nn/network.lua
@@ -228,9 +228,11 @@ function network:make_initial_store()
                 err_memory[t][i][j] = self.mat_type(self.batch_size, dim_in[j])
                 err_memory[t][i][j]:fill(0)
             end
-            for j = 1, #dim_out do
-                memory[t][i][j] = self.mat_type(self.batch_size, dim_out[j])
-                memory[t][i][j]:fill(self.nn_act_default)
+            if t < 1 or t > self.chunk_size or not nerv.is_type(self.layers[i], 'nerv.DuplicateLayer') then
+                for j = 1, #dim_out do
+                    memory[t][i][j] = self.mat_type(self.batch_size, dim_out[j])
+                    memory[t][i][j]:fill(self.nn_act_default)
+                end
             end
         end
         if t < 1 or t > self.chunk_size then
@@ -288,6 +290,28 @@ function network:make_initial_store()
         end
     end
 
+    -- reference copy for duplicate layer
+    for i = 1, #self.queue do
+        local t, id = self.queue[i].chunk, self.queue[i].id
+        if nerv.is_type(self.layers[id], 'nerv.DuplicateLayer') then
+            local _, dim_out = self.layers[id]:get_dim()
+            for j = 1, #dim_out do
+                if self.output[t][id][j] ~= nil then
+                    nerv.error('duplicate output reference not nil')
+                end
+                self.output[t][id][j] = self.input[t][id][1]
+                local edge = self.output_conn[id][j]
+                local to, port, time = edge[1], edge[2], edge[3] + t
+                if time >= 1 and time <= self.chunk_size then
+                    if self.input[time][to][port] ~= nil then
+                        nerv.error('duplicate input reference not nil')
+                    end
+                    self.input[time][to][port] = self.output[t][id][j]
+                end
+            end
+        end
+    end
+
     -- check dangling reference
     for t = 1, self.chunk_size do
         for i = 1, #self.dim_in do