From 20e0c009ab387107ed1a492dd42b0253ca19ed37 Mon Sep 17 00:00:00 2001
From: txh18 <cloudygooseg@gmail.com>
Date: Wed, 20 Jan 2016 14:26:54 +0800
Subject: the prefixsum_row operation of matrix is implemented

---
 nerv/lib/matrix/generic/cukernel.cu | 50 +++++++++++++++++++++++++++++++++++++
 nerv/lib/matrix/generic/cumatrix.c  |  8 ++++++
 nerv/lib/matrix/generic/cumatrix.h  |  1 +
 nerv/matrix/generic/cumatrix.c      | 10 ++++++++
 4 files changed, 69 insertions(+)

diff --git a/nerv/lib/matrix/generic/cukernel.cu b/nerv/lib/matrix/generic/cukernel.cu
index 2b696d5..995059c 100644
--- a/nerv/lib/matrix/generic/cukernel.cu
+++ b/nerv/lib/matrix/generic/cukernel.cu
@@ -376,6 +376,22 @@ __global__ void cudak_(copy_rows_by_colidx)(const MATRIX_ELEM *a, MATRIX_ELEM *b
     b[j + i * stride] = a[j + k * stride];
 }
 
+__global__ void cudak_(prefixsum_row_reduce)(const MATRIX_ELEM *a, MATRIX_ELEM *b,
+                        int nrow, int ncol, int stride_a, int stride_b, int offset) {
+    int j = blockIdx.x * blockDim.x + threadIdx.x;
+    int i = blockIdx.y * blockDim.y + threadIdx.y;
+    long idx_a, idx_b;
+    if (i >= nrow || j >= ncol) return;
+    idx_b = j + i * stride_b;
+    idx_a = j + i * stride_a;
+    //b[idx] = 1.0 / (1.0 + exp(-a[idx]));
+    if (j >= offset) 
+        b[idx_b] = a[idx_a] + a[idx_a - offset];
+    else
+        b[idx_b] = a[idx_a];
+}
+
+
 
 extern "C" {
 #include "../cukernel.h"
@@ -737,6 +753,40 @@ extern "C" {
         cudaStreamSynchronize(0);
     }
 
+    void cudak_(cuda_prefixsum_row)(const Matrix *a, Matrix *b) {
+        dim3 threadsPerBlock(CUDA_THREADS_N, CUDA_THREADS_N);
+        dim3 numBlocks(CEIL_DIV(b->ncol, threadsPerBlock.x),
+                CEIL_DIV(b->nrow, threadsPerBlock.y));
+        
+        MATRIX_ELEM *tmp[2];
+        size_t tmp_stride[2];
+        cudaMallocPitch(tmp, tmp_stride + 0, a->ncol * sizeof(MATRIX_ELEM), a->nrow);
+        cudaMallocPitch(tmp + 1, tmp_stride + 1, a->ncol * sizeof(MATRIX_ELEM), a->nrow);
+        
+        int offset = 1;
+        cudak_(prefixsum_row_reduce)<<<numBlocks, threadsPerBlock>>> \
+            (MATRIX_ELEM_PTR(a), tmp[0], b->nrow, b->ncol,
+            a->stride / sizeof(MATRIX_ELEM), tmp_stride[0] / sizeof(MATRIX_ELEM), offset);
+        int pin = 0, pout = 1;
+
+        for (offset = 2;offset <= a->ncol / 2;offset *= 2) {
+            cudak_(prefixsum_row_reduce)<<<numBlocks, threadsPerBlock>>> \
+                (tmp[pin], tmp[pout], b->nrow, b->ncol,
+                tmp_stride[pin] / sizeof(MATRIX_ELEM), tmp_stride[pout] / sizeof(MATRIX_ELEM), offset);
+            pin = 1 - pin; 
+            pout = 1 - pout;
+        }
+
+        cudak_(prefixsum_row_reduce)<<<numBlocks, threadsPerBlock>>> \
+            (tmp[pin], MATRIX_ELEM_PTR(b), b->nrow, b->ncol,
+            tmp_stride[pin] / sizeof(MATRIX_ELEM), b->stride / sizeof(MATRIX_ELEM), offset);
+        
+        cudaFree(tmp[0]);
+        cudaFree(tmp[1]);
+        
+        cudaStreamSynchronize(0);
+    }
+
     void cudak_(cuda_decompress)(const Matrix *a, Matrix *b) {
         dim3 threadsPerBlock(1, CUDA_THREADS_NN);
         dim3 numBlocks(1, CEIL_DIV(a->nrow, threadsPerBlock.y));
diff --git a/nerv/lib/matrix/generic/cumatrix.c b/nerv/lib/matrix/generic/cumatrix.c
index 7582725..dafadb2 100644
--- a/nerv/lib/matrix/generic/cumatrix.c
+++ b/nerv/lib/matrix/generic/cumatrix.c
@@ -486,6 +486,14 @@ void nerv_matrix_(scale_rows_by_row)(Matrix *a, const Matrix *b,
     NERV_SET_STATUS(status, NERV_NORMAL, 0);
 }
 
+void nerv_matrix_(prefixsum_row)(Matrix *a, const Matrix *b, Status *status) {
+    CHECK_SAME_DIMENSION(a, b, status);
+    PROFILE_START
+    cudak_(cuda_prefixsum_row)(b, a);
+    PROFILE_STOP
+    NERV_SET_STATUS(status, NERV_NORMAL, 0);
+}
+
 static void cuda_matrix_(free)(MATRIX_ELEM *ptr, Status *status) {
     CUDA_SAFE_SYNC_CALL(cudaFree(ptr), status);
     NERV_SET_STATUS(status, NERV_NORMAL, 0);
diff --git a/nerv/lib/matrix/generic/cumatrix.h b/nerv/lib/matrix/generic/cumatrix.h
index e82dccd..41e9e35 100644
--- a/nerv/lib/matrix/generic/cumatrix.h
+++ b/nerv/lib/matrix/generic/cumatrix.h
@@ -61,3 +61,4 @@ void nerv_matrix_(scale_rows_by_col)(Matrix *a, const Matrix *b,
                                     Status *status);
 void nerv_matrix_(scale_rows_by_row)(Matrix *a, const Matrix *b,
                                     Status *status);
+void nerv_matrix_(prefixsum_row)(Matrix *a, const Matrix *b, Status *status);
diff --git a/nerv/matrix/generic/cumatrix.c b/nerv/matrix/generic/cumatrix.c
index edd7b0a..29fa7b1 100644
--- a/nerv/matrix/generic/cumatrix.c
+++ b/nerv/matrix/generic/cumatrix.c
@@ -52,6 +52,15 @@ static int nerv_matrix_(lua_sigmoid)(lua_State *L) {
     return 0;
 }
 
+static int nerv_matrix_(lua_prefixsum_row)(lua_State *L) {
+    Status status;
+    Matrix *a = luaT_checkudata(L, 1, nerv_matrix_(tname));
+    Matrix *b = luaT_checkudata(L, 2, nerv_matrix_(tname));
+    nerv_matrix_(prefixsum_row)(a, b, &status);
+    NERV_LUA_CHECK_STATUS(L, status);
+    return 0;
+}
+
 static int nerv_matrix_(lua_sigmoid_grad)(lua_State *L) {
     Status status;
     Matrix *nerr = luaT_checkudata(L, 1, nerv_matrix_(tname));
@@ -407,6 +416,7 @@ static const luaL_Reg nerv_matrix_(extra_methods)[] = {
     {"rearrange_frm", nerv_matrix_(lua_rearrange_frm)},
     {"scale_rows_by_row", nerv_matrix_(lua_scale_rows_by_row)},
     {"scale_rows_by_col", nerv_matrix_(lua_scale_rows_by_col)},
+    {"prefixsum_row", nerv_matrix_(lua_prefixsum_row)},
 #ifdef __NERV_FUTURE_CUDA_7
     {"update_select_rows_by_rowidx", nerv_matrix_(lua_update_select_rows_by_rowidx)},
     {"update_select_rows_by_colidx", nerv_matrix_(lua_update_select_rows_by_colidx)},
-- 
cgit v1.2.3-70-g09d2


From e829ef9253dfece9ff9f599130bc625f1267e136 Mon Sep 17 00:00:00 2001
From: txh18 <cloudygooseg@gmail.com>
Date: Wed, 20 Jan 2016 15:15:28 +0800
Subject: used prefixsum_row operation to speed up sampling on the softmax
 output

---
 nerv/examples/lmptb/lm_sampler.lua            | 35 +++++++++++++++++++++++----
 nerv/examples/lmptb/sample_grulm_ptb_main.lua |  4 +--
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/nerv/examples/lmptb/lm_sampler.lua b/nerv/examples/lmptb/lm_sampler.lua
index c25a75c..2a4f1c3 100644
--- a/nerv/examples/lmptb/lm_sampler.lua
+++ b/nerv/examples/lmptb/lm_sampler.lua
@@ -27,7 +27,8 @@ function LMSampler:load_dagL(dagL)
     self.dagL_outputs[2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size)
     
     self.smout_d = global_conf.cumat_type(self.batch_size, self.vocab:size())
-    self.smout_h = global_conf.mmat_type(self.batch_size, self.vocab:size())
+    self.ssout_d = global_conf.cumat_type(self.batch_size, self.vocab:size())
+    self.ssout_h = global_conf.mmat_type(self.batch_size, self.vocab:size())
 
     self.store = {}
     for i = 1, self.batch_size do
@@ -40,9 +41,27 @@ function LMSampler:load_dagL(dagL)
     self.repo = {}
 end
 
-function LMSampler:sample_to_store(smout)
+function LMSampler:sample_to_store(ssout)
     for i = 1, self.batch_size do
         local ran = math.random()
+        local id = 1
+        local low = 0
+        local high = ssout:ncol() - 1
+        if ssout[i - 1][high] < 0.9999 or ssout[i - 1][high] > 1.0001 then
+            nerv.error("%s ERROR, softmax output summation(%f) seems to have some problem", self.log_pre, ssout[i - 1][high])
+        end
+        if ssout[i - 1][low] < ran then
+            while low + 1 < high do
+                local mid = math.floor((low + high) / 2)
+                if ssout[i - 1][mid] < ran then
+                    low = mid
+                else
+                    high = mid
+                end
+            end
+            id = high + 1
+        end
+        --[[
         local s = 0
         local id = self.vocab:size()
         for j = 0, self.vocab:size() - 1 do
@@ -52,13 +71,18 @@ function LMSampler:sample_to_store(smout)
                 break
             end
         end
+        ]]--
         if #self.store[i] >= self.chunk_size - 2 then
             id = self.sen_end_id
         end
         local tmp = {}
         tmp.w = self.vocab:get_word_id(id).str
         tmp.id = id
-        tmp.p = smout[i - 1][id - 1]
+        if id == 1 then
+            tmp.p = ssout[i - 1][id - 1]
+        else
+            tmp.p = ssout[i - 1][id - 1] - ssout[i - 1][id - 2] 
+        end
         table.insert(self.store[i], tmp)
     end
 end
@@ -74,9 +98,10 @@ function LMSampler:lm_sample_rnn_dagL(sample_num, p_conf)
         inputs[2]:copy_fromd(outputs[2]) --copy hidden activation
     
         self.smout_d:softmax(outputs[1])
-        self.smout_d:copy_toh(self.smout_h)
+        self.ssout_d:prefixsum_row(self.smout_d)
+        self.ssout_d:copy_toh(self.ssout_h)
         
-        self:sample_to_store(self.smout_h)
+        self:sample_to_store(self.ssout_h)
         for i = 1, self.batch_size do
             inputs[1][i - 1][0] = self.store[i][#self.store[i]].id - 1
             if self.store[i][#self.store[i]].id == self.sen_end_id then --meet a sentence end
diff --git a/nerv/examples/lmptb/sample_grulm_ptb_main.lua b/nerv/examples/lmptb/sample_grulm_ptb_main.lua
index 9a13d36..30dfe26 100644
--- a/nerv/examples/lmptb/sample_grulm_ptb_main.lua
+++ b/nerv/examples/lmptb/sample_grulm_ptb_main.lua
@@ -424,11 +424,11 @@ if commands["sampling"] == 1 then
     local dagL = load_net_dagL(global_conf, global_conf.fn_to_sample) 
     local sampler = nerv.LMSampler(global_conf)
     sampler:load_dagL(dagL)
-    for k = 1, 5 do
+    for k = 1, 1 do
         local res = sampler:lm_sample_rnn_dagL(10, {})
         for i = 1, #res do
             for j = 1, #res[i] do
-                nerv.printf("%s ", res[i][j].w)
+                nerv.printf("%s(%f) ", res[i][j].w, res[i][j].p)
             end
             nerv.printf("\n")
         end
-- 
cgit v1.2.3-70-g09d2


From ceda23dff9e9204ad3061bd65a096ae33d94d757 Mon Sep 17 00:00:00 2001
From: txh18 <cloudygooseg@gmail.com>
Date: Thu, 21 Jan 2016 14:33:07 +0800
Subject: tiny bug fix in LMVocab

---
 nerv/examples/lmptb/grulm_ptb_main.lua  | 4 ++--
 nerv/examples/lmptb/lmptb/lmvocab.lua   | 6 ++----
 nerv/examples/lmptb/lstmlm_ptb_main.lua | 4 ++--
 3 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/nerv/examples/lmptb/grulm_ptb_main.lua b/nerv/examples/lmptb/grulm_ptb_main.lua
index ef5d7f9..6095b12 100644
--- a/nerv/examples/lmptb/grulm_ptb_main.lua
+++ b/nerv/examples/lmptb/grulm_ptb_main.lua
@@ -388,10 +388,10 @@ nerv.LMUtil.wait(2)
 
 math.randomseed(1)
 
-local vocab = nerv.LMVocab()
+local vocab = nerv.LMVocab(global_conf)
 global_conf["vocab"] = vocab
 nerv.printf("%s building vocab...\n", global_conf.sche_log_pre)
-global_conf.vocab:build_file(global_conf.vocab_fn, false)
+global_conf.vocab:build_file(global_conf.vocab_fn)
 ppl_rec = {} 
 
 local final_iter = -1
diff --git a/nerv/examples/lmptb/lmptb/lmvocab.lua b/nerv/examples/lmptb/lmptb/lmvocab.lua
index 0e7ef3e..38bb18e 100644
--- a/nerv/examples/lmptb/lmptb/lmvocab.lua
+++ b/nerv/examples/lmptb/lmptb/lmvocab.lua
@@ -2,8 +2,6 @@ require 'lmptb.lmutil'
 
 local Vocab = nerv.class("nerv.LMVocab")
 
-local printf = nerv.printf
-
 local mysplit = function(inputstr, sep)
     if sep == nil then
         sep = "%s"
@@ -106,7 +104,7 @@ end
 --fn: string
 --Add all words in fn to the vocab
 function Vocab:build_file(fn)
-    printf("%s Vocab building on file %s...\n", self.log_pre, fn)
+    nerv.printf("%s Vocab building on file %s...\n", self.log_pre, fn)
     local file = io.open(fn, "r")
     while (true) do
         local list = nerv.LMUtil.read_line(file)
@@ -119,7 +117,7 @@ function Vocab:build_file(fn)
         end
     end
     file:close()
-    printf("%s Building finished, vocab size now is %d.\n", self.log_pre, self:size())
+    nerv.printf("%s Building finished, vocab size now is %d.\n", self.log_pre, self:size())
 end
 
 --[[test
diff --git a/nerv/examples/lmptb/lstmlm_ptb_main.lua b/nerv/examples/lmptb/lstmlm_ptb_main.lua
index 9bdd5ff..5794476 100644
--- a/nerv/examples/lmptb/lstmlm_ptb_main.lua
+++ b/nerv/examples/lmptb/lstmlm_ptb_main.lua
@@ -390,10 +390,10 @@ nerv.LMUtil.wait(2)
 
 math.randomseed(1)
 
-local vocab = nerv.LMVocab()
+local vocab = nerv.LMVocab(global_conf)
 global_conf["vocab"] = vocab
 nerv.printf("%s building vocab...\n", global_conf.sche_log_pre)
-global_conf.vocab:build_file(global_conf.vocab_fn, false)
+global_conf.vocab:build_file(global_conf.vocab_fn)
 ppl_rec = {} 
 
 local final_iter = -1
-- 
cgit v1.2.3-70-g09d2


From d2ad8abe4bb8529c5b1ef4b3a956271e7d6d813b Mon Sep 17 00:00:00 2001
From: txh18 <cloudygooseg@gmail.com>
Date: Wed, 27 Jan 2016 21:48:13 +0800
Subject: added logp addition to LMResult

---
 nerv/examples/lmptb/lmptb/lmutil.lua | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/nerv/examples/lmptb/lmptb/lmutil.lua b/nerv/examples/lmptb/lmptb/lmutil.lua
index 6d66d6e..13a5c45 100644
--- a/nerv/examples/lmptb/lmptb/lmutil.lua
+++ b/nerv/examples/lmptb/lmptb/lmutil.lua
@@ -112,10 +112,17 @@ end
 --cla:string
 --w:string
 --prob:float, the probability
-function Result:add(cla, w, prob)
-    self[cla].logp_all = self[cla].logp_all + math.log10(prob)
+function Result:add(cla, w, prob, log10ed)
+    local lp
+    if log10ed == true then
+        lp = prob
+    else
+        lp = math.log10(prob)
+    end
+
+    self[cla].logp_all = self[cla].logp_all + lp
     if (self.vocab:is_unk_str(w)) then
-        self[cla].logp_unk = self[cla].logp_unk + math.log10(prob)
+        self[cla].logp_unk = self[cla].logp_unk + lp
         self[cla].cn_unk = self[cla].cn_unk + 1
     end
     if (w == self.vocab.sen_end_token) then
-- 
cgit v1.2.3-70-g09d2


From 3f84267f9fe035ac7ed305a7d66ecab9f7ae50e3 Mon Sep 17 00:00:00 2001
From: txh18 <cloudygooseg@gmail.com>
Date: Fri, 29 Jan 2016 17:38:05 +0800
Subject: moved gru_t to tnn/layersT

---
 nerv/Makefile                             |   2 +-
 nerv/examples/lmptb/lmptb/layer/gru_t.lua | 114 ------------------------------
 nerv/examples/lmptb/lmptb/layer/init.lua  |   2 +-
 nerv/tnn/init.lua                         |   1 +
 nerv/tnn/layersT/gru_t.lua                | 114 ++++++++++++++++++++++++++++++
 5 files changed, 117 insertions(+), 116 deletions(-)
 delete mode 100644 nerv/examples/lmptb/lmptb/layer/gru_t.lua
 create mode 100644 nerv/tnn/layersT/gru_t.lua

diff --git a/nerv/Makefile b/nerv/Makefile
index 5c329f9..db5df22 100644
--- a/nerv/Makefile
+++ b/nerv/Makefile
@@ -36,7 +36,7 @@ LUA_LIBS := matrix/init.lua io/init.lua init.lua \
 			nn/init.lua nn/layer_repo.lua nn/param_repo.lua nn/layer_dag.lua \
 			io/sgd_buffer.lua \
             tnn/init.lua tnn/layer_dag_t.lua tnn/sutil.lua tnn/tnn.lua \
-            tnn/layersT/dropout_t.lua tnn/layersT/lstm_t.lua tnn/layersT/softmax_ce_t.lua
+            tnn/layersT/dropout_t.lua tnn/layersT/lstm_t.lua tnn/layersT/gru_t.lua tnn/layersT/softmax_ce_t.lua
 
 INCLUDE := -I $(LUA_INCDIR) -DLUA_USE_APICHECK
 #CUDA_BASE := /usr/local/cuda-7.0
diff --git a/nerv/examples/lmptb/lmptb/layer/gru_t.lua b/nerv/examples/lmptb/lmptb/layer/gru_t.lua
deleted file mode 100644
index 8f15cc8..0000000
--- a/nerv/examples/lmptb/lmptb/layer/gru_t.lua
+++ /dev/null
@@ -1,114 +0,0 @@
-local GRULayerT = nerv.class('nerv.GRULayerT', 'nerv.LayerT')
-
-function GRULayerT:__init(id, global_conf, layer_conf)
-    --input1:x input2:h input3:c(h^~)
-    self.id = id
-    self.dim_in = layer_conf.dim_in
-    self.dim_out = layer_conf.dim_out
-    self.gconf = global_conf
-
-    if self.dim_in[2] ~= self.dim_out[1] then
-        nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)", self.dim_in[2], self.dim_out[1])
-    end
-
-    --prepare a DAGLayerT to hold the lstm structure
-    local pr = layer_conf.pr
-    if pr == nil then
-        pr = nerv.ParamRepo()
-    end
-    
-    local function ap(str)
-        return self.id .. '.' .. str
-    end
-
-    local layers = {
-        ["nerv.CombinerLayer"] = {
-            [ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]}, 
-                ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}},
-            [ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, 
-                ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}},
-            [ap("updateGDup")] = {{}, {["dim_in"] = {self.dim_in[2]},
-                ["dim_out"] = {self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}},
-            [ap("updateMergeL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]},
-                ["lambda"] = {1, -1, 1}}},
-        },
-        ["nerv.AffineLayer"] = {
-            [ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]}, ["pr"] = pr}},
-        },
-        ["nerv.TanhLayer"] = {
-            [ap("mainTanhL")] = {{}, {["dim_in"] = {self.dim_out[1]}, ["dim_out"] = {self.dim_out[1]}}},
-        },
-        ["nerv.GateFLayer"] = {
-            [ap("resetGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, 
-                ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}},
-            [ap("updateGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, 
-                ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}},
-        },
-        ["nerv.ElemMulLayer"] = {
-            [ap("resetGMulL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}},
-            [ap("updateGMulCL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}},
-            [ap("updateGMulHL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}},
-        },
-    }
-    
-    local layerRepo = nerv.LayerRepo(layers, pr, global_conf)
-
-    local connections_t = {
-        ["<input>[1]"] = ap("inputXDup[1]"), 
-        ["<input>[2]"] = ap("inputHDup[1]"),
-
-        [ap("inputXDup[1]")] = ap("resetGateL[1]"),
-        [ap("inputHDup[1]")] = ap("resetGateL[2]"),
-        [ap("inputXDup[2]")] = ap("updateGateL[1]"),
-        [ap("inputHDup[2]")] = ap("updateGateL[2]"),
-        [ap("updateGateL[1]")] = ap("updateGDup[1]"),
-
-        [ap("resetGateL[1]")] = ap("resetGMulL[1]"),
-        [ap("inputHDup[3]")] = ap("resetGMulL[2]"),
-
-        [ap("inputXDup[3]")] = ap("mainAffineL[1]"),
-        [ap("resetGMulL[1]")] = ap("mainAffineL[2]"),
-        [ap("mainAffineL[1]")] = ap("mainTanhL[1]"),
-
-        [ap("updateGDup[1]")] = ap("updateGMulHL[1]"),
-        [ap("inputHDup[4]")] = ap("updateGMulHL[2]"),
-        [ap("updateGDup[2]")] = ap("updateGMulCL[1]"),
-        [ap("mainTanhL[1]")] = ap("updateGMulCL[2]"),
- 
-        [ap("inputHDup[5]")] = ap("updateMergeL[1]"),
-        [ap("updateGMulHL[1]")] = ap("updateMergeL[2]"),
-        [ap("updateGMulCL[1]")] = ap("updateMergeL[3]"),
-
-        [ap("updateMergeL[1]")] = "<output>[1]",
-    }
-
-    self.dagL = nerv.DAGLayerT(self.id, global_conf, 
-            {["dim_in"] = self.dim_in, ["dim_out"] = self.dim_out, ["sub_layers"] = layerRepo, 
-            ["connections"] = connections_t})
-    
-    self:check_dim_len(2, 1) -- x, h and h
-end
-
-function GRULayerT:init(batch_size, chunk_size)
-    self.dagL:init(batch_size, chunk_size)
-end
-
-function GRULayerT:batch_resize(batch_size, chunk_size)
-    self.dagL:batch_resize(batch_size, chunk_size)
-end
-
-function GRULayerT:update(bp_err, input, output, t)
-    self.dagL:update(bp_err, input, output, t)
-end
-
-function GRULayerT:propagate(input, output, t)
-    self.dagL:propagate(input, output, t)
-end
-
-function GRULayerT:back_propagate(bp_err, next_bp_err, input, output, t)
-    self.dagL:back_propagate(bp_err, next_bp_err, input, output, t)
-end
-
-function GRULayerT:get_params()
-    return self.dagL:get_params()
-end
diff --git a/nerv/examples/lmptb/lmptb/layer/init.lua b/nerv/examples/lmptb/lmptb/layer/init.lua
index b345244..ceae009 100644
--- a/nerv/examples/lmptb/lmptb/layer/init.lua
+++ b/nerv/examples/lmptb/lmptb/layer/init.lua
@@ -1,6 +1,6 @@
 require 'lmptb.layer.select_linear'
 require 'lmptb.layer.affine_recurrent_plusvec'
-require 'lmptb.layer.gru_t'
+--require 'lmptb.layer.gru_t'
 require 'lmptb.layer.lm_affine_recurrent'
 
 
diff --git a/nerv/tnn/init.lua b/nerv/tnn/init.lua
index b375fa8..7faca31 100644
--- a/nerv/tnn/init.lua
+++ b/nerv/tnn/init.lua
@@ -47,5 +47,6 @@ nerv.include('sutil.lua')
 nerv.include('tnn.lua')
 nerv.include('layersT/softmax_ce_t.lua')
 nerv.include('layersT/lstm_t.lua')
+nerv.include('layersT/gru_t.lua')
 nerv.include('layersT/dropout_t.lua')
 nerv.include('layer_dag_t.lua')
diff --git a/nerv/tnn/layersT/gru_t.lua b/nerv/tnn/layersT/gru_t.lua
new file mode 100644
index 0000000..8f15cc8
--- /dev/null
+++ b/nerv/tnn/layersT/gru_t.lua
@@ -0,0 +1,114 @@
+local GRULayerT = nerv.class('nerv.GRULayerT', 'nerv.LayerT')
+
+function GRULayerT:__init(id, global_conf, layer_conf)
+    --input1:x input2:h input3:c(h^~)
+    self.id = id
+    self.dim_in = layer_conf.dim_in
+    self.dim_out = layer_conf.dim_out
+    self.gconf = global_conf
+
+    if self.dim_in[2] ~= self.dim_out[1] then
+        nerv.error("dim_in[2](%d) mismatch with dim_out[1](%d)", self.dim_in[2], self.dim_out[1])
+    end
+
+    --prepare a DAGLayerT to hold the lstm structure
+    local pr = layer_conf.pr
+    if pr == nil then
+        pr = nerv.ParamRepo()
+    end
+    
+    local function ap(str)
+        return self.id .. '.' .. str
+    end
+
+    local layers = {
+        ["nerv.CombinerLayer"] = {
+            [ap("inputXDup")] = {{}, {["dim_in"] = {self.dim_in[1]}, 
+                ["dim_out"] = {self.dim_in[1], self.dim_in[1], self.dim_in[1]}, ["lambda"] = {1}}},
+            [ap("inputHDup")] = {{}, {["dim_in"] = {self.dim_in[2]}, 
+                ["dim_out"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}},
+            [ap("updateGDup")] = {{}, {["dim_in"] = {self.dim_in[2]},
+                ["dim_out"] = {self.dim_in[2], self.dim_in[2]}, ["lambda"] = {1}}},
+            [ap("updateMergeL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]},
+                ["lambda"] = {1, -1, 1}}},
+        },
+        ["nerv.AffineLayer"] = {
+            [ap("mainAffineL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, ["dim_out"] = {self.dim_out[1]}, ["pr"] = pr}},
+        },
+        ["nerv.TanhLayer"] = {
+            [ap("mainTanhL")] = {{}, {["dim_in"] = {self.dim_out[1]}, ["dim_out"] = {self.dim_out[1]}}},
+        },
+        ["nerv.GateFLayer"] = {
+            [ap("resetGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, 
+                ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}},
+            [ap("updateGateL")] = {{}, {["dim_in"] = {self.dim_in[1], self.dim_in[2]}, 
+                ["dim_out"] = {self.dim_in[2]}, ["pr"] = pr}},
+        },
+        ["nerv.ElemMulLayer"] = {
+            [ap("resetGMulL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}},
+            [ap("updateGMulCL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}},
+            [ap("updateGMulHL")] = {{}, {["dim_in"] = {self.dim_in[2], self.dim_in[2]}, ["dim_out"] = {self.dim_in[2]}}},
+        },
+    }
+    
+    local layerRepo = nerv.LayerRepo(layers, pr, global_conf)
+
+    local connections_t = {
+        ["<input>[1]"] = ap("inputXDup[1]"), 
+        ["<input>[2]"] = ap("inputHDup[1]"),
+
+        [ap("inputXDup[1]")] = ap("resetGateL[1]"),
+        [ap("inputHDup[1]")] = ap("resetGateL[2]"),
+        [ap("inputXDup[2]")] = ap("updateGateL[1]"),
+        [ap("inputHDup[2]")] = ap("updateGateL[2]"),
+        [ap("updateGateL[1]")] = ap("updateGDup[1]"),
+
+        [ap("resetGateL[1]")] = ap("resetGMulL[1]"),
+        [ap("inputHDup[3]")] = ap("resetGMulL[2]"),
+
+        [ap("inputXDup[3]")] = ap("mainAffineL[1]"),
+        [ap("resetGMulL[1]")] = ap("mainAffineL[2]"),
+        [ap("mainAffineL[1]")] = ap("mainTanhL[1]"),
+
+        [ap("updateGDup[1]")] = ap("updateGMulHL[1]"),
+        [ap("inputHDup[4]")] = ap("updateGMulHL[2]"),
+        [ap("updateGDup[2]")] = ap("updateGMulCL[1]"),
+        [ap("mainTanhL[1]")] = ap("updateGMulCL[2]"),
+ 
+        [ap("inputHDup[5]")] = ap("updateMergeL[1]"),
+        [ap("updateGMulHL[1]")] = ap("updateMergeL[2]"),
+        [ap("updateGMulCL[1]")] = ap("updateMergeL[3]"),
+
+        [ap("updateMergeL[1]")] = "<output>[1]",
+    }
+
+    self.dagL = nerv.DAGLayerT(self.id, global_conf, 
+            {["dim_in"] = self.dim_in, ["dim_out"] = self.dim_out, ["sub_layers"] = layerRepo, 
+            ["connections"] = connections_t})
+    
+    self:check_dim_len(2, 1) -- x, h and h
+end
+
+function GRULayerT:init(batch_size, chunk_size)
+    self.dagL:init(batch_size, chunk_size)
+end
+
+function GRULayerT:batch_resize(batch_size, chunk_size)
+    self.dagL:batch_resize(batch_size, chunk_size)
+end
+
+function GRULayerT:update(bp_err, input, output, t)
+    self.dagL:update(bp_err, input, output, t)
+end
+
+function GRULayerT:propagate(input, output, t)
+    self.dagL:propagate(input, output, t)
+end
+
+function GRULayerT:back_propagate(bp_err, next_bp_err, input, output, t)
+    self.dagL:back_propagate(bp_err, next_bp_err, input, output, t)
+end
+
+function GRULayerT:get_params()
+    return self.dagL:get_params()
+end
-- 
cgit v1.2.3-70-g09d2


From b99fe996dedccada79772d0a061d6b47e54899dd Mon Sep 17 00:00:00 2001
From: txh18 <cloudygooseg@gmail.com>
Date: Fri, 29 Jan 2016 20:25:09 +0800
Subject: select gpu code from mfy

---
 nerv/examples/lmptb/grulm_ptb_main.lua | 10 +++++++++-
 nerv/lib/matrix/cumatrix.c             |  6 ++++++
 nerv/matrix/cumatrix.c                 |  9 +++++++++
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/nerv/examples/lmptb/grulm_ptb_main.lua b/nerv/examples/lmptb/grulm_ptb_main.lua
index 6095b12..838a665 100644
--- a/nerv/examples/lmptb/grulm_ptb_main.lua
+++ b/nerv/examples/lmptb/grulm_ptb_main.lua
@@ -198,6 +198,7 @@ qdata_dir = root_dir .. '/ptb/questionGen/gen'
 global_conf = {
     lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5,
     cumat_type = nerv.CuMatrixFloat,
+    select_gpu = 0,
     mmat_type = nerv.MMatrixFloat,
     nn_act_default = 0, 
 
@@ -359,7 +360,14 @@ commands = nerv.SUtil.parse_commands_set(commands_str)
 if start_lr ~= nil then
     global_conf.lrate = start_lr
 end
- 
+
+nerv.printf("detecting gconf.select_gpu...\n")
+if global_conf.select_gpu then
+    nerv.printf("select gpu to %d\n", global_conf.select_gpu)
+    global_conf.cumat_type.select_gpu(global_conf.select_gpu)
+    nerv.LMUtil.wait(1)
+end
+
 nerv.printf("%s creating work_dir(%s)...\n", global_conf.sche_log_pre, global_conf.work_dir)
 nerv.LMUtil.wait(2)
 os.execute("mkdir -p "..global_conf.work_dir)
diff --git a/nerv/lib/matrix/cumatrix.c b/nerv/lib/matrix/cumatrix.c
index a8ed075..533dade 100644
--- a/nerv/lib/matrix/cumatrix.c
+++ b/nerv/lib/matrix/cumatrix.c
@@ -7,6 +7,12 @@ static cublasHandle_t cublas_handle;
 static cudaEvent_t profile_start, profile_stop;
 static HashMap *profile;
 
+void nerv_cumatrix_select_gpu(int dev, Status *status) {
+    fprintf(stderr, "** selecting GPU %d\n", dev);
+    NERV_SET_STATUS(status, NERV_NORMAL, 0);
+    CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status);
+}
+
 void nerv_cumatrix_print_profile() {
     size_t i;
     fprintf(stderr, "*** [nerv cumatrix profile] **\n");
diff --git a/nerv/matrix/cumatrix.c b/nerv/matrix/cumatrix.c
index bf92f92..7f22d68 100644
--- a/nerv/matrix/cumatrix.c
+++ b/nerv/matrix/cumatrix.c
@@ -8,6 +8,14 @@ static cublasHandle_t cublas_handle;
 static cudaEvent_t profile_start, profile_stop;
 static HashMap *profile;
 
+static int select_gpu(lua_State *L) {
+    Status status;
+    int dev = luaL_checkinteger(L, 1);
+    nerv_cumatrix_select_gpu(dev, &status);
+    NERV_LUA_CHECK_STATUS(L, status);
+    return 0;
+}
+
 static int print_profile(lua_State *L) {
     nerv_cumatrix_print_profile();
     return 0;
@@ -21,6 +29,7 @@ static int clear_profile(lua_State *L) {
 static const luaL_Reg cumatrix_methods[] = {
     {"print_profile", print_profile},
     {"clear_profile", clear_profile},
+    {"select_gpu", select_gpu},
     {NULL, NULL}
 };
 
-- 
cgit v1.2.3-70-g09d2


From e7240d5bfa73b441c85672b8fae9255640cfc336 Mon Sep 17 00:00:00 2001
From: txh18 <cloudygooseg@gmail.com>
Date: Sat, 30 Jan 2016 16:43:16 +0800
Subject: like data of twitter from choose2 to choose

---
 nerv/examples/lmptb/grulm_ptb_main.lua  |  2 +-
 nerv/examples/lmptb/lstmlm_ptb_main.lua |  2 +-
 nerv/examples/lmptb/rnnlm_ptb_main.lua  | 37 +++++++++++++++++++++++++++++++++
 3 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/nerv/examples/lmptb/grulm_ptb_main.lua b/nerv/examples/lmptb/grulm_ptb_main.lua
index 838a665..4a3f39f 100644
--- a/nerv/examples/lmptb/grulm_ptb_main.lua
+++ b/nerv/examples/lmptb/grulm_ptb_main.lua
@@ -260,7 +260,7 @@ global_conf = {
 elseif (set == "twitter") then
 
 data_dir = root_dir .. '/twitter_new/DATA'
-train_fn = data_dir .. '/twitter.choose2.adds'
+train_fn = data_dir .. '/twitter.choose.adds'
 valid_fn = data_dir .. '/twitter.valid.adds'
 test_fn = data_dir .. '/comm.test.choose-ppl.adds'
 vocab_fn = data_dir .. '/twitter.choose.train.vocab'
diff --git a/nerv/examples/lmptb/lstmlm_ptb_main.lua b/nerv/examples/lmptb/lstmlm_ptb_main.lua
index 5794476..b576834 100644
--- a/nerv/examples/lmptb/lstmlm_ptb_main.lua
+++ b/nerv/examples/lmptb/lstmlm_ptb_main.lua
@@ -277,7 +277,7 @@ global_conf = {
     hidden_size = 300,
     layer_num = 1,
     chunk_size = 15,
-    batch_size = 20, 
+    batch_size = 32, 
     max_iter = 35,
     lr_decay = 1.003,
     decay_iter = 10,
diff --git a/nerv/examples/lmptb/rnnlm_ptb_main.lua b/nerv/examples/lmptb/rnnlm_ptb_main.lua
index dc011fb..a1d9471 100644
--- a/nerv/examples/lmptb/rnnlm_ptb_main.lua
+++ b/nerv/examples/lmptb/rnnlm_ptb_main.lua
@@ -197,6 +197,43 @@ global_conf = {
     work_dir_base = root_dir .. '/ptb/EXP-nerv/rnnlm_tnn'
 }
 
+elseif (set == "twitter") then
+
+data_dir = root_dir .. '/twitter_new/DATA'
+train_fn = data_dir .. '/twitter.choose.adds'
+valid_fn = data_dir .. '/twitter.valid.adds'
+test_fn = data_dir .. '/comm.test.choose-ppl.adds'
+vocab_fn = data_dir .. '/twitter.choose.train.vocab'
+
+--qdata_dir = root_dir .. '/ptb/questionGen/gen'
+
+global_conf = {
+    lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5,
+    cumat_type = nerv.CuMatrixFloat,
+    mmat_type = nerv.MMatrixFloat,
+    nn_act_default = 0, 
+
+    hidden_size = 300,
+    layer_num = 1,
+    chunk_size = 15,
+    batch_size = 32, 
+    max_iter = 30,
+    lr_decay = 1.003,
+    decay_iter = 10,
+    param_random = function() return (math.random() / 5 - 0.1) end,
+    dropout_str = "0.5",
+
+    train_fn = train_fn,
+    valid_fn = valid_fn,
+    test_fn = test_fn,
+    vocab_fn = vocab_fn,
+    max_sen_len = 32,
+    sche_log_pre = "[SCHEDULER]:",
+    log_w_num = 40000, --give a message when log_w_num words have been processed
+    timer = nerv.Timer(),
+    work_dir_base = root_dir .. '/twitter_new/EXP-nerv/rnnlm_v1.0'
+}
+
 elseif (set == "msr_sc") then
 
 data_dir = '/home/slhome/txh18/workspace/sentenceCompletion/DATA_PV2'
-- 
cgit v1.2.3-70-g09d2


From bb0f58c82882d34ee1737227476167be9367433c Mon Sep 17 00:00:00 2001
From: txh18 <cloudygooseg@gmail.com>
Date: Sun, 31 Jan 2016 21:56:27 +0800
Subject: new changes to select_gpu

---
 nerv/lib/matrix/cumatrix.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/nerv/lib/matrix/cumatrix.c b/nerv/lib/matrix/cumatrix.c
index 533dade..ea7a518 100644
--- a/nerv/lib/matrix/cumatrix.c
+++ b/nerv/lib/matrix/cumatrix.c
@@ -11,6 +11,8 @@ void nerv_cumatrix_select_gpu(int dev, Status *status) {
     fprintf(stderr, "** selecting GPU %d\n", dev);
     NERV_SET_STATUS(status, NERV_NORMAL, 0);
     CUDA_SAFE_SYNC_CALL(cudaSetDevice(dev), status);
+    CUDA_SAFE_SYNC_CALL(cublasDestroy(cublas_handle), status);
+    CUDA_SAFE_SYNC_CALL(cublasCreate(&cublas_handle), status);
 }
 
 void nerv_cumatrix_print_profile() {
-- 
cgit v1.2.3-70-g09d2


From 2fc05a9b3bb28ea8cae66c82b891028cccc40e53 Mon Sep 17 00:00:00 2001
From: txh18 <cloudygooseg@gmail.com>
Date: Wed, 3 Feb 2016 14:13:33 +0800
Subject: added same_io option to lm_seq_reader

---
 nerv/examples/lmptb/lmptb/lmseqreader.lua        | 34 ++++++++++++++----------
 nerv/examples/lmptb/m-tests/lmseqreader_test.lua | 10 ++++---
 nerv/examples/lmptb/m-tests/some-text            |  2 +-
 3 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/nerv/examples/lmptb/lmptb/lmseqreader.lua b/nerv/examples/lmptb/lmptb/lmseqreader.lua
index 0f29f8b..1272929 100644
--- a/nerv/examples/lmptb/lmptb/lmseqreader.lua
+++ b/nerv/examples/lmptb/lmptb/lmseqreader.lua
@@ -28,6 +28,10 @@ function LMReader:__init(global_conf, batch_size, chunk_size, vocab, r_conf)
     if r_conf.compressed_label == true then
         self.compressed_label = true
     end
+    self.same_io = false
+    if r_conf.same_io == true then --can be used to train P(wi|w1..(i-1),(i+1)..n)
+        self.same_io = true
+    end
 end
 
 --fn: string
@@ -36,9 +40,9 @@ function LMReader:open_file(fn)
     if (self.fh ~= nil) then
         nerv.error("%s error: in open_file(fn is %s), file handle not nil.", self.log_pre, fn)
     end
-    printf("%s opening file %s...\n", self.log_pre, fn)
-    print(self.log_pre, "batch_size:", self.batch_size, "chunk_size", self.chunk_size)
-    print(self.log_pre, "se_mode:", self.se_mode)
+    nerv.printf("%s opening file %s...\n", self.log_pre, fn)
+    nerv.printf("%s batch_size:%d chunk_size:%d\n", self.log_pre, self.batch_size, self.chunk_size)
+    nerv.printf("%s se_mode:%s same_io:%s\n", self.log_pre, tostring(self.se_mode), tostring(self.same_io))
     self.fh = io.open(fn, "r")
     self.streams = {}
     for i = 1, self.batch_size, 1 do
@@ -132,12 +136,15 @@ function LMReader:get_batch(feeds)
             else
                 self:refresh_stream(i)
                 if st.store[st.head] ~= nil then
-                    inputs_s[j][i] = st.store[st.head]
-                    --inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head]).id - 1
-                    self.bak_inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head]).id - 1
+                    if self.same_io == false then 
+                        inputs_s[j][i] = st.store[st.head]
+                        self.bak_inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head]).id - 1
+                    else
+                        inputs_s[j][i] = st.store[st.head + 1]
+                        self.bak_inputs_m[j][1][i - 1][0] = self.vocab:get_word_str(st.store[st.head + 1]).id - 1                      
+                    end
                 else
                     inputs_s[j][i] = self.vocab.null_token
-                    --inputs_m[j][1][i - 1][0] = 0
                     self.bak_inputs_m[j][1][i - 1][0] = 0
                 end
                 if st.store[st.head + 1] ~= nil then
@@ -148,7 +155,7 @@ function LMReader:get_batch(feeds)
                         inputs_m[j][2][i - 1][self.vocab:get_word_str(st.store[st.head + 1]).id - 1] = 1
                     end
                 else
-                    if (inputs_s[j][i] ~= self.vocab.null_token) then
+                    if inputs_s[j][i] ~= self.vocab.null_token then
                         nerv.error("reader error : input not null but label is null_token")
                     end
                     labels_s[j][i] = self.vocab.null_token
@@ -159,6 +166,9 @@ function LMReader:get_batch(feeds)
                     end
                     flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_NORM) --has both input and label
                     got_new = true
+                    if st.store[st.head] == self.vocab.sen_end_token then
+                        flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_START)
+                    end
                     st.store[st.head] = nil
                     st.head = st.head + 1
                     if labels_s[j][i] == self.vocab.sen_end_token then
@@ -169,10 +179,7 @@ function LMReader:get_batch(feeds)
                             end_stream = true --meet sentence end, this stream ends now
                         end
                     end
-                    if inputs_s[j][i] == self.vocab.sen_end_token then
-                        flags[j][i] = bit.bor(flags[j][i], nerv.TNN.FC.SEQ_START)
-                    end
-                end 
+               end 
             end
         end
     end
@@ -190,7 +197,7 @@ function LMReader:get_batch(feeds)
 
     --check for self.al_sen_start
     for i = 1, self.batch_size do
-        if inputs_s[1][i] ~= self.vocab.sen_end_token and inputs_s[1][i] ~= self.vocab.null_token then
+        if bit.band(flags[1][i], nerv.TNN.FC.SEQ_START) == 0 and flags[1][i] > 0 then
             self.stat.al_sen_start = false
         end
     end
@@ -198,7 +205,6 @@ function LMReader:get_batch(feeds)
     if got_new == false then
         nerv.info("lmseqreader file ends, printing stats...")
         nerv.printf("al_sen_start:%s\n", tostring(self.stat.al_sen_start))
-
         return false
     else
         return true
diff --git a/nerv/examples/lmptb/m-tests/lmseqreader_test.lua b/nerv/examples/lmptb/m-tests/lmseqreader_test.lua
index 9127559..3f99741 100644
--- a/nerv/examples/lmptb/m-tests/lmseqreader_test.lua
+++ b/nerv/examples/lmptb/m-tests/lmseqreader_test.lua
@@ -7,7 +7,7 @@ local test_fn = "/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-te
 --local test_fn = "/home/slhome/txh18/workspace/nerv-project/nerv/examples/lmptb/PTBdata/ptb.train.txt"
 local vocab = nerv.LMVocab()
 vocab:build_file(test_fn)
-local chunk_size = 20
+local chunk_size = 15
 local batch_size = 3
 local global_conf = {
     lrate = 1, wcost = 1e-6, momentum = 0,
@@ -30,7 +30,8 @@ local global_conf = {
     vocab = vocab
 }
 
-local reader = nerv.LMSeqReader(global_conf, batch_size, chunk_size, vocab, {["se_mode"] = true})
+local reader = nerv.LMSeqReader(global_conf, batch_size, chunk_size, vocab, 
+        {["se_mode"] = true, ["same_io"] = true})
 reader:open_file(test_fn)
 local feeds = {}
 feeds.flags_now = {}
@@ -40,14 +41,15 @@ for j = 1, chunk_size do
     feeds.inputs_m[j] = {global_conf.cumat_type(batch_size, 1), global_conf.cumat_type(batch_size, global_conf.vocab:size())}
     feeds.flags_now[j] = {}
 end
-while (1) do
+for k = 1, 5 do
     local r = reader:get_batch(feeds)
     if (r == false) then break end
     for j = 1, chunk_size, 1 do
         for i = 1, batch_size, 1 do
-            printf("%s[L(%s)] ", feeds.inputs_s[j][i], feeds.labels_s[j][i])   --vocab:get_word_str(input[i][j]).id
+            printf("%s[L(%s)]F%d ", feeds.inputs_s[j][i], feeds.labels_s[j][i], feeds.flags_now[j][i])   --vocab:get_word_str(input[i][j]).id
         end
         printf("\n")
     end
     printf("\n")
 end
+printf("reader.sen_start %s\n", tostring(reader.stat.al_sen_start))
diff --git a/nerv/examples/lmptb/m-tests/some-text b/nerv/examples/lmptb/m-tests/some-text
index da4bea9..6756fa0 100644
--- a/nerv/examples/lmptb/m-tests/some-text
+++ b/nerv/examples/lmptb/m-tests/some-text
@@ -1,4 +1,4 @@
-</s> aa bb cc aa bb cc aa bb cc aa bb cc aa bb cc aa </s>
+</s> aa bb cc aa bb cc aa bb cc aa bb cc aa </s>
 </s> aa bb cc aa bb cc aa bb cc aa </s>
 </s> bb cc aa bb cc aa bb cc aa </s>
 </s> aa bb cc aa </s>
-- 
cgit v1.2.3-70-g09d2


From 8a6385261a71b2432cd20347286a6eb0166e32b6 Mon Sep 17 00:00:00 2001
From: txh18 <cloudygooseg@gmail.com>
Date: Fri, 5 Feb 2016 16:49:24 +0800
Subject: made lm sampling code cleaner

---
 nerv/examples/lmptb/lm_sampler.lua            |   7 +-
 nerv/examples/lmptb/sample_grulm_ptb_main.lua | 122 ++++++++++++++------------
 2 files changed, 72 insertions(+), 57 deletions(-)

diff --git a/nerv/examples/lmptb/lm_sampler.lua b/nerv/examples/lmptb/lm_sampler.lua
index 2a4f1c3..d194af9 100644
--- a/nerv/examples/lmptb/lm_sampler.lua
+++ b/nerv/examples/lmptb/lm_sampler.lua
@@ -3,18 +3,19 @@ local LMSampler = nerv.class('nerv.LMSampler')
 function LMSampler:__init(global_conf)
     self.log_pre = "LMSampler"
     self.gconf = global_conf
+    self.batch_size = self.gconf.batch_size
+    self.chunk_size = self.gconf.chunk_size --largest sample sentence length
     self.vocab = self.gconf.vocab
     self.sen_end_token = self.vocab.sen_end_token
     self.sen_end_id = self.vocab:get_word_str(self.sen_end_token).id 
 end
 
 function LMSampler:load_dagL(dagL)
-    self.batch_size = self.gconf.batch_size
-    self.chunk_size = self.gconf.chunk_size
-    
+   
     nerv.printf("%s loading dagL\n", self.log_pre)
 
     self.dagL = dagL
+    self.dagL:init(self.batch_size)
 
     self.dagL_inputs = {}
     self.dagL_inputs[1] = global_conf.cumat_type(global_conf.batch_size, 1)
diff --git a/nerv/examples/lmptb/sample_grulm_ptb_main.lua b/nerv/examples/lmptb/sample_grulm_ptb_main.lua
index 30dfe26..42a5787 100644
--- a/nerv/examples/lmptb/sample_grulm_ptb_main.lua
+++ b/nerv/examples/lmptb/sample_grulm_ptb_main.lua
@@ -134,10 +134,39 @@ function prepare_tnn(global_conf, layerRepo)
     return tnn
 end
 
-function prepare_dagL(global_conf, layerRepo)
-    nerv.printf("%s Generate and initing dagL ...\n", global_conf.sche_log_pre)
+function load_net_tnn(global_conf, fn)
+    prepare_parameters(global_conf, fn)
+    local layerRepo = prepare_layers(global_conf)
+    local tnn = prepare_tnn(global_conf, layerRepo)
+    return tnn
+end
+
+function prepare_sampler(sm_conf)
+    sm_conf.pr = nerv.ParamRepo()
+    sm_conf.pr:import({sm_conf.fn_to_sample}, nil, sm_conf)
+
+    local layers = {
+        ["nerv.GRULayerT"] = {
+            ["gruL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size, sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size}, ["pr"] = sm_conf.pr}}, 
+        },
+        ["nerv.DropoutLayerT"] = {
+            ["dropoutL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size}}}, 
+        },
+        ["nerv.SelectLinearLayer"] = {
+            ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {sm_conf.hidden_size}, ["vocab"] = sm_conf.vocab, ["pr"] = sm_conf.pr}},
+        },
+        ["nerv.CombinerLayer"] = {
+           ["combinerL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size, sm_conf.hidden_size}, ["lambda"] = {1}}},
+        },
+        ["nerv.AffineLayer"] = {
+            ["outputL"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.vocab:size()},  ["pr"] = sm_conf.pr}},
+        },
+        ["nerv.SoftmaxCELayerT"] = {
+            ["softmaxL"] = {{}, {["dim_in"] = {sm_conf.vocab:size(), sm_conf.vocab:size()}, ["dim_out"] = {1}}},
+        },
+    }
+    local layerRepo = nerv.LayerRepo(layers, sm_conf.pr, sm_conf)
 
-    --input: input_w, input_w, ... input_w_now, last_activation
     local connections_t = {
         ["<input>[1]"] = "selectL1[1]",
        
@@ -151,48 +180,19 @@ function prepare_dagL(global_conf, layerRepo)
         ["combinerL1[2]"] = "<output>[2]",
     }
     
-    if global_conf.layer_num > 1 then
+    if sm_conf.layer_num > 1 then
         nerv.error("multiple layer is currently not supported(not hard to implement though)")
     end
-    --[[
-    for l = 2, global_conf.layer_num do
-        table.insert(connections_t, {"dropoutL"..(l-1).."[1]", "gruL"..l.."[1]", 0})
-        table.insert(connections_t, {"gruL"..l.."[1]", "combinerL"..l.."[1]", 0})
-        table.insert(connections_t, {"combinerL"..l.."[1]", "gruL"..l.."[2]", 1})
-        table.insert(connections_t, {"combinerL"..l.."[2]", "dropoutL"..l.."[1]", 0})
-    end
-    ]]--
 
-    --[[
-    printf("%s printing DAG connections:\n", global_conf.sche_log_pre)
-    for key, value in pairs(connections_t) do
-        printf("\t%s->%s\n", key, value)
-    end
-    ]]--
-
-    local dagL = nerv.DAGLayerT("dagL", global_conf, {["dim_in"] = {1, global_conf.hidden_size}, 
-            ["dim_out"] = {global_conf.vocab:size(), global_conf.hidden_size}, ["sub_layers"] = layerRepo,
+    local dagL = nerv.DAGLayerT("dagL", sm_conf, {["dim_in"] = {1, sm_conf.hidden_size}, 
+            ["dim_out"] = {sm_conf.vocab:size(), sm_conf.hidden_size}, ["sub_layers"] = layerRepo,
             ["connections"] = connections_t
         })
+    
+    local sampler = nerv.LMSampler(sm_conf)
+    sampler:load_dagL(dagL)
 
-    dagL:init(global_conf.batch_size)
-
-    nerv.printf("%s Initing DAGL end.\n", global_conf.sche_log_pre)
-    return dagL
-end
-
-function load_net_tnn(global_conf, fn)
-    prepare_parameters(global_conf, fn)
-    local layerRepo = prepare_layers(global_conf)
-    local tnn = prepare_tnn(global_conf, layerRepo)
-    return tnn
-end
-
-function load_net_dagL(global_conf, fn)
-    prepare_parameters(global_conf, fn)
-    local layerRepo = prepare_layers(global_conf)
-    local dagL = prepare_dagL(global_conf, layerRepo)
-    return dagL
+    return sampler
 end
 
 local train_fn, valid_fn, test_fn
@@ -240,6 +240,23 @@ global_conf = {
     fn_to_sample = root_dir .. '/ptb/EXP-nerv/grulm_v1.0h300l1ch15ba32slr0.15wc1e-05dr0.5/params.final',
 }
 
+sm_conf = {
+    cumat_type = nerv.CuMatrixFloat,
+    mmat_type = nerv.MMatrixFloat,
+    nn_act_default = 0, 
+
+    hidden_size = 300,
+    layer_num = 1,
+    batch_size = 32, 
+    chunk_size = 85, --largest sample sentence length
+    max_iter = 35,
+    max_sen_len = 90,
+    sche_log_pre = "[SAMPLER_S]:",
+
+    timer = global_conf.timer,
+    fn_to_sample = root_dir .. '/ptb/EXP-nerv/grulm_v1.0h300l1ch15ba32slr0.15wc1e-05dr0.5/params.final',
+}
+
 elseif (set == "msr_sc") then
 
 data_dir = '/home/slhome/txh18/workspace/sentenceCompletion/DATA_PV2'
@@ -276,15 +293,13 @@ global_conf = {
 
 elseif (set == "twitter") then
 
-data_dir = root_dir .. '/twitter_new/DATA'
-train_fn = data_dir .. '/twitter.choose2.adds'
-valid_fn = data_dir .. '/twitter.valid.adds'
-test_fn = data_dir .. '/comm.test.choose-ppl.adds'
-vocab_fn = data_dir .. '/twitter.choose.train.vocab'
-
---qdata_dir = root_dir .. '/ptb/questionGen/gen'
-
-global_conf = {
+    data_dir = root_dir .. '/twitter_new/DATA'
+    train_fn = data_dir .. '/twitter.choose2.adds'
+    valid_fn = data_dir .. '/twitter.valid.adds'
+    test_fn = data_dir .. '/comm.test.choose-ppl.adds'
+    vocab_fn = data_dir .. '/twitter.choose.train.vocab'
+    --qdata_dir = root_dir .. '/ptb/questionGen/gen'
+    global_conf = {
     lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5,
     cumat_type = nerv.CuMatrixFloat,
     mmat_type = nerv.MMatrixFloat,
@@ -309,7 +324,7 @@ global_conf = {
     log_w_num = 40000, --give a message when log_w_num words have been processed
     timer = nerv.Timer(),
     work_dir_base = root_dir .. '/twitter_new/EXP-nerv/grulm_v1.0'
-}
+    }
 
 else
 
@@ -385,8 +400,8 @@ nerv.LMUtil.wait(2)
 ]]--
 
 ----------------printing options---------------------------------
-nerv.printf("%s printing global_conf...\n", global_conf.sche_log_pre)
-for id, value in pairs(global_conf) do
+nerv.printf("%s printing sm_conf...\n", sm_conf.sche_log_pre)
+for id, value in pairs(sm_conf) do
     nerv.printf("%s:\t%s\n", id, tostring(value))
 end
 nerv.LMUtil.wait(2)
@@ -405,6 +420,7 @@ math.randomseed(1)
 
 local vocab = nerv.LMVocab()
 global_conf["vocab"] = vocab
+sm_conf["vocab"] = global_conf.vocab
 nerv.printf("%s building vocab...\n", global_conf.sche_log_pre)
 global_conf.vocab:build_file(global_conf.vocab_fn, false)
 ppl_rec = {} 
@@ -421,9 +437,7 @@ end --if commands["test"]
 if commands["sampling"] == 1 then
     nerv.printf("===SAMPLE===\n") 
     global_conf.sche_log_pre = "[SCHEDULER SAMPLING]:" 
-    local dagL = load_net_dagL(global_conf, global_conf.fn_to_sample) 
-    local sampler = nerv.LMSampler(global_conf)
-    sampler:load_dagL(dagL)
+    local sampler = prepare_sampler(sm_conf)
     for k = 1, 1 do
         local res = sampler:lm_sample_rnn_dagL(10, {})
         for i = 1, #res do
-- 
cgit v1.2.3-70-g09d2


From a1d8c0a2369ea72df77821f7b298903e9470e676 Mon Sep 17 00:00:00 2001
From: txh18 <cloudygooseg@gmail.com>
Date: Fri, 5 Feb 2016 21:19:58 +0800
Subject: ...

---
 nerv/examples/lmptb/m-tests/lm_sampler_test.lua | 454 ++++++++++++++++++++++++
 nerv/examples/lmptb/sample_grulm_ptb_main.lua   | 454 ------------------------
 2 files changed, 454 insertions(+), 454 deletions(-)
 create mode 100644 nerv/examples/lmptb/m-tests/lm_sampler_test.lua
 delete mode 100644 nerv/examples/lmptb/sample_grulm_ptb_main.lua

diff --git a/nerv/examples/lmptb/m-tests/lm_sampler_test.lua b/nerv/examples/lmptb/m-tests/lm_sampler_test.lua
new file mode 100644
index 0000000..42a5787
--- /dev/null
+++ b/nerv/examples/lmptb/m-tests/lm_sampler_test.lua
@@ -0,0 +1,454 @@
+require 'lmptb.lmvocab'
+require 'lmptb.lmfeeder'
+require 'lmptb.lmutil'
+require 'lmptb.layer.init'
+--require 'tnn.init'
+require 'lmptb.lmseqreader'
+require 'lm_trainer'
+require 'lm_sampler'
+
+--[[global function rename]]--
+--local printf = nerv.printf
+local LMTrainer = nerv.LMTrainer
+--[[global function rename ends]]--
+
+function prepare_parameters(global_conf, fn)
+    nerv.printf("%s preparing parameters...\n", global_conf.sche_log_pre) 
+    
+    global_conf.paramRepo = nerv.ParamRepo()
+    local paramRepo = global_conf.paramRepo
+
+    nerv.printf("%s loading parameter from file %s...\n", global_conf.sche_log_pre, fn) 
+    paramRepo:import({fn}, nil, global_conf)
+
+    nerv.printf("%s preparing parameters end.\n", global_conf.sche_log_pre)
+
+    return nil
+end
+
+--global_conf: table
+--Returns: nerv.LayerRepo
+function prepare_layers(global_conf)
+    nerv.printf("%s preparing layers...\n", global_conf.sche_log_pre)
+    
+    local pr = global_conf.paramRepo
+
+    local du = false
+
+    --local recurrentLconfig = {{["bp"] = "bp_h", ["ltp_hh"] = "ltp_hh"}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["break_id"] = global_conf.vocab:get_sen_entry().id, ["independent"] = global_conf.independent, ["clip"] = 10}}
+    --local recurrentLconfig = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["clip"] = 10, ["direct_update"] = du, ["pr"] = pr}}
+
+    local layers = {
+        ["nerv.GRULayerT"] = {
+            ["gruL1"] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["pr"] = pr}}, 
+        },
+        
+        ["nerv.DropoutLayerT"] = {
+            ["dropoutL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}, 
+        },
+
+        ["nerv.SelectLinearLayer"] = {
+            ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}, ["vocab"] = global_conf.vocab, ["pr"] = pr}},
+        },
+        
+        ["nerv.CombinerLayer"] = {
+            ["combinerL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}},
+        },
+
+        ["nerv.AffineLayer"] = {
+            ["outputL"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.vocab:size()}, ["direct_update"] = du, ["pr"] = pr}},
+        },
+
+        ["nerv.SoftmaxCELayerT"] = {
+            ["softmaxL"] = {{}, {["dim_in"] = {global_conf.vocab:size(), global_conf.vocab:size()}, ["dim_out"] = {1}}},
+        },
+    }
+   
+    for l = 2, global_conf.layer_num do 
+        layers["nerv.DropoutLayerT"]["dropoutL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}
+        layers["nerv.GRULayerT"]["gruL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["pr"] = pr}}
+        layers["nerv.CombinerLayer"]["combinerL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}}
+    end
+    --[[ --we do not need those in the new tnn framework
+    printf("%s adding %d bptt layers...\n", global_conf.sche_log_pre, global_conf.bptt)
+    for i = 1, global_conf.bptt do
+        layers["nerv.IndRecurrentLayer"]["recurrentL" .. (i + 1)] = recurrentLconfig 
+        layers["nerv.SigmoidLayer"]["sigmoidL" .. (i + 1)] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}
+        layers["nerv.SelectLinearLayer"]["selectL" .. (i + 1)] = {{["ltp"] = "ltp_ih"}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}}}
+    end
+    --]]
+
+    local layerRepo = nerv.LayerRepo(layers, pr, global_conf)
+    nerv.printf("%s preparing layers end.\n", global_conf.sche_log_pre)
+    return layerRepo
+end
+
+--global_conf: table
+--layerRepo: nerv.LayerRepo
+--Returns: a nerv.TNN
+function prepare_tnn(global_conf, layerRepo)
+    nerv.printf("%s Generate and initing TNN ...\n", global_conf.sche_log_pre)
+
+    --input: input_w, input_w, ... input_w_now, last_activation
+    local connections_t = {
+        {"<input>[1]", "selectL1[1]", 0},
+        
+        --{"selectL1[1]", "recurrentL1[1]", 0},  
+        --{"recurrentL1[1]", "sigmoidL1[1]", 0},
+        --{"sigmoidL1[1]", "combinerL1[1]", 0},
+        --{"combinerL1[1]", "recurrentL1[2]", 1},
+        
+        {"selectL1[1]", "gruL1[1]", 0},
+        {"gruL1[1]", "combinerL1[1]", 0},
+        {"combinerL1[1]", "gruL1[2]", 1},
+        {"combinerL1[2]", "dropoutL1[1]", 0},
+        
+        {"dropoutL"..global_conf.layer_num.."[1]", "outputL[1]", 0},
+        {"outputL[1]", "softmaxL[1]", 0},
+        {"<input>[2]", "softmaxL[2]", 0},
+        {"softmaxL[1]", "<output>[1]", 0}
+    }
+
+    for l = 2, global_conf.layer_num do
+        table.insert(connections_t, {"dropoutL"..(l-1).."[1]", "gruL"..l.."[1]", 0})
+        table.insert(connections_t, {"gruL"..l.."[1]", "combinerL"..l.."[1]", 0})
+        table.insert(connections_t, {"combinerL"..l.."[1]", "gruL"..l.."[2]", 1})
+        table.insert(connections_t, {"combinerL"..l.."[2]", "dropoutL"..l.."[1]", 0})
+    end
+
+    --[[
+    printf("%s printing DAG connections:\n", global_conf.sche_log_pre)
+    for key, value in pairs(connections_t) do
+        printf("\t%s->%s\n", key, value)
+    end
+    ]]--
+
+    local tnn = nerv.TNN("TNN", global_conf, {["dim_in"] = {1, global_conf.vocab:size()}, 
+            ["dim_out"] = {1}, ["sub_layers"] = layerRepo,
+            ["connections"] = connections_t, ["clip_t"] = global_conf.clip_t,
+        })
+
+    tnn:init(global_conf.batch_size, global_conf.chunk_size)
+
+    nerv.printf("%s Initing TNN end.\n", global_conf.sche_log_pre)
+    return tnn
+end
+
+function load_net_tnn(global_conf, fn)
+    prepare_parameters(global_conf, fn)
+    local layerRepo = prepare_layers(global_conf)
+    local tnn = prepare_tnn(global_conf, layerRepo)
+    return tnn
+end
+
+function prepare_sampler(sm_conf)
+    sm_conf.pr = nerv.ParamRepo()
+    sm_conf.pr:import({sm_conf.fn_to_sample}, nil, sm_conf)
+
+    local layers = {
+        ["nerv.GRULayerT"] = {
+            ["gruL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size, sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size}, ["pr"] = sm_conf.pr}}, 
+        },
+        ["nerv.DropoutLayerT"] = {
+            ["dropoutL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size}}}, 
+        },
+        ["nerv.SelectLinearLayer"] = {
+            ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {sm_conf.hidden_size}, ["vocab"] = sm_conf.vocab, ["pr"] = sm_conf.pr}},
+        },
+        ["nerv.CombinerLayer"] = {
+           ["combinerL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size, sm_conf.hidden_size}, ["lambda"] = {1}}},
+        },
+        ["nerv.AffineLayer"] = {
+            ["outputL"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.vocab:size()},  ["pr"] = sm_conf.pr}},
+        },
+        ["nerv.SoftmaxCELayerT"] = {
+            ["softmaxL"] = {{}, {["dim_in"] = {sm_conf.vocab:size(), sm_conf.vocab:size()}, ["dim_out"] = {1}}},
+        },
+    }
+    local layerRepo = nerv.LayerRepo(layers, sm_conf.pr, sm_conf)
+
+    local connections_t = {
+        ["<input>[1]"] = "selectL1[1]",
+       
+        ["selectL1[1]"] = "gruL1[1]",
+        ["gruL1[1]"] = "combinerL1[1]",
+        ["<input>[2]"] = "gruL1[2]",
+        --{"combinerL1[2]", "dropoutL1[1]", 0},
+        
+        ["combinerL" .. global_conf.layer_num .. "[1]"] = "outputL[1]",
+        ["outputL[1]"] = "<output>[1]",     
+        ["combinerL1[2]"] = "<output>[2]",
+    }
+    
+    if sm_conf.layer_num > 1 then
+        nerv.error("multiple layer is currently not supported(not hard to implement though)")
+    end
+
+    local dagL = nerv.DAGLayerT("dagL", sm_conf, {["dim_in"] = {1, sm_conf.hidden_size}, 
+            ["dim_out"] = {sm_conf.vocab:size(), sm_conf.hidden_size}, ["sub_layers"] = layerRepo,
+            ["connections"] = connections_t
+        })
+    
+    local sampler = nerv.LMSampler(sm_conf)
+    sampler:load_dagL(dagL)
+
+    return sampler
+end
+
+local train_fn, valid_fn, test_fn
+global_conf = {}
+local set = arg[1] --"test"
+
+root_dir = '/home/slhome/txh18/workspace'
+
+if (set == "ptb") then
+
+data_dir = root_dir .. '/ptb/DATA'
+train_fn = data_dir .. '/ptb.train.txt.adds'
+valid_fn = data_dir .. '/ptb.valid.txt.adds'
+test_fn = data_dir .. '/ptb.test.txt.adds'
+vocab_fn = data_dir .. '/vocab'
+
+qdata_dir = root_dir .. '/ptb/questionGen/gen'
+
+global_conf = {
+    lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5,
+    cumat_type = nerv.CuMatrixFloat,
+    mmat_type = nerv.MMatrixFloat,
+    nn_act_default = 0, 
+
+    hidden_size = 300,
+    layer_num = 1,
+    chunk_size = 15,
+    batch_size = 32, 
+    max_iter = 35,
+    lr_decay = 1.003,
+    decay_iter = 10,
+    param_random = function() return (math.random() / 5 - 0.1) end,
+    dropout_str = "0.5",
+
+    train_fn = train_fn,
+    valid_fn = valid_fn,
+    test_fn = test_fn,
+    vocab_fn = vocab_fn,
+    max_sen_len = 90,
+    sche_log_pre = "[SCHEDULER]:",
+    log_w_num = 40000, --give a message when log_w_num words have been processed
+    timer = nerv.Timer(),
+    work_dir_base = root_dir .. '/ptb/EXP-nerv/grulm_v1.0',
+    
+    fn_to_sample = root_dir .. '/ptb/EXP-nerv/grulm_v1.0h300l1ch15ba32slr0.15wc1e-05dr0.5/params.final',
+}
+
+sm_conf = {
+    cumat_type = nerv.CuMatrixFloat,
+    mmat_type = nerv.MMatrixFloat,
+    nn_act_default = 0, 
+
+    hidden_size = 300,
+    layer_num = 1,
+    batch_size = 32, 
+    chunk_size = 85, --largest sample sentence length
+    max_iter = 35,
+    max_sen_len = 90,
+    sche_log_pre = "[SAMPLER_S]:",
+
+    timer = global_conf.timer,
+    fn_to_sample = root_dir .. '/ptb/EXP-nerv/grulm_v1.0h300l1ch15ba32slr0.15wc1e-05dr0.5/params.final',
+}
+
+elseif (set == "msr_sc") then
+
+data_dir = '/home/slhome/txh18/workspace/sentenceCompletion/DATA_PV2'
+train_fn = data_dir .. '/normed_all.sf.len60.adds.train'
+valid_fn = data_dir .. '/normed_all.sf.len60.adds.dev'
+test_fn = data_dir .. '/answer_normed.adds'
+vocab_fn = data_dir .. '/normed_all.choose.vocab30000.addqvocab'
+
+global_conf = {
+    lrate = 1, wcost = 1e-6, momentum = 0,
+    cumat_type = nerv.CuMatrixFloat,
+    mmat_type = nerv.MMatrixFloat,
+    nn_act_default = 0, 
+
+    hidden_size = 300,
+    layer_num = 1,
+    chunk_size = 15,
+    batch_size = 10, 
+    max_iter = 30,
+    decay_iter = 10,
+    lr_decay = 1.003,
+    param_random = function() return (math.random() / 5 - 0.1) end,
+    dropout_str = "0",
+
+    train_fn = train_fn,
+    valid_fn = valid_fn,
+    test_fn = test_fn,
+    vocab_fn = vocab_fn,
+    sche_log_pre = "[SCHEDULER]:",
+    log_w_num = 400000, --give a message when log_w_num words have been processed
+    timer = nerv.Timer(),
+    work_dir_base = '/home/slhome/txh18/workspace/sentenceCompletion/EXP-Nerv/rnnlm_test'
+}
+
+elseif (set == "twitter") then
+
+    data_dir = root_dir .. '/twitter_new/DATA'
+    train_fn = data_dir .. '/twitter.choose2.adds'
+    valid_fn = data_dir .. '/twitter.valid.adds'
+    test_fn = data_dir .. '/comm.test.choose-ppl.adds'
+    vocab_fn = data_dir .. '/twitter.choose.train.vocab'
+    --qdata_dir = root_dir .. '/ptb/questionGen/gen'
+    global_conf = {
+    lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5,
+    cumat_type = nerv.CuMatrixFloat,
+    mmat_type = nerv.MMatrixFloat,
+    nn_act_default = 0, 
+
+    hidden_size = 300,
+    layer_num = 1,
+    chunk_size = 15,
+    batch_size = 32, 
+    max_iter = 30,
+    lr_decay = 1.003,
+    decay_iter = 10,
+    param_random = function() return (math.random() / 5 - 0.1) end,
+    dropout_str = "0.5",
+
+    train_fn = train_fn,
+    valid_fn = valid_fn,
+    test_fn = test_fn,
+    vocab_fn = vocab_fn,
+    max_sen_len = 32,
+    sche_log_pre = "[SCHEDULER]:",
+    log_w_num = 40000, --give a message when log_w_num words have been processed
+    timer = nerv.Timer(),
+    work_dir_base = root_dir .. '/twitter_new/EXP-nerv/grulm_v1.0'
+    }
+
+else
+
+valid_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text-chn'
+train_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text-chn'
+test_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text-chn'
+vocab_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text-chn'
+
+global_conf = {
+    lrate = 0.01, wcost = 1e-5, momentum = 0,
+    cumat_type = nerv.CuMatrixFloat,
+    mmat_type = nerv.MMatrixFloat,
+    nn_act_default = 0, 
+
+    hidden_size = 20,
+    layer_num = 1,
+    chunk_size = 2,
+    batch_size = 10, 
+    max_iter = 3,
+    param_random = function() return (math.random() / 5 - 0.1) end,
+    dropout_str = "0",
+
+    train_fn = train_fn,
+    valid_fn = valid_fn,
+    test_fn = test_fn,
+    max_sen_len = 80,
+    lr_decay = 1.003,
+    decay_iter = 10,
+    vocab_fn = vocab_fn, 
+    sche_log_pre = "[SCHEDULER]:",
+    log_w_num = 10, --give a message when log_w_num words have been processed
+    timer = nerv.Timer(),
+    work_dir_base = '/home/slhome/txh18/workspace/nerv/play/testEXP/tnn_lstmlm_test'
+}
+
+end
+
+lr_half = false --can not be local, to be set by loadstring
+start_iter = -1
+start_lr = nil
+ppl_last = 100000
+commands_str = "sampling" --"train:test"
+commands = {}
+test_iter = -1
+--for testout(question)
+q_file = "/home/slhome/txh18/workspace/ptb/questionGen/gen/ptb.test.txt.q10rs1_Msss.adds"
+
+if arg[2] ~= nil then
+    nerv.printf("%s applying arg[2](%s)...\n", global_conf.sche_log_pre, arg[2])
+    loadstring(arg[2])() 
+    nerv.LMUtil.wait(0.5)
+else
+    nerv.printf("%s no user setting, all default...\n", global_conf.sche_log_pre)
+end
+
+global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'l' .. global_conf.layer_num .. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' ..   global_conf.lrate .. 'wc' .. global_conf.wcost .. 'dr' .. global_conf.dropout_str 
+global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf'
+global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak'
+global_conf.param_fn = global_conf.work_dir .. "/params"
+global_conf.dropout_list = nerv.SUtil.parse_schedule(global_conf.dropout_str)
+global_conf.log_fn = global_conf.work_dir .. '/log_lstm_tnn_' .. commands_str ..os.date("_TT%m_%d_%X",os.time())
+global_conf.log_fn, _ = string.gsub(global_conf.log_fn, ':', '-')
+commands = nerv.SUtil.parse_commands_set(commands_str)
+
+if start_lr ~= nil then
+    global_conf.lrate = start_lr
+end
+
+--[[ 
+--redirecting log outputs!
+nerv.SUtil.log_redirect(global_conf.log_fn)
+nerv.LMUtil.wait(2)
+]]--
+
+----------------printing options---------------------------------
+nerv.printf("%s printing sm_conf...\n", sm_conf.sche_log_pre)
+for id, value in pairs(sm_conf) do
+    nerv.printf("%s:\t%s\n", id, tostring(value))
+end
+nerv.LMUtil.wait(2)
+
+nerv.printf("%s printing training scheduling options...\n", global_conf.sche_log_pre)
+nerv.printf("lr_half:\t%s\n", tostring(lr_half))
+nerv.printf("start_iter:\t%s\n", tostring(start_iter))
+nerv.printf("ppl_last:\t%s\n", tostring(ppl_last))
+nerv.printf("commands_str:\t%s\n", commands_str)
+nerv.printf("test_iter:\t%s\n", tostring(test_iter))
+nerv.printf("%s printing training scheduling end.\n", global_conf.sche_log_pre)
+nerv.LMUtil.wait(2)
+------------------printing options end------------------------------
+
+math.randomseed(1)
+
+local vocab = nerv.LMVocab()
+global_conf["vocab"] = vocab
+sm_conf["vocab"] = global_conf.vocab
+nerv.printf("%s building vocab...\n", global_conf.sche_log_pre)
+global_conf.vocab:build_file(global_conf.vocab_fn, false)
+ppl_rec = {} 
+
+local final_iter = -1
+if commands["test"] == 1 then
+    nerv.printf("===FINAL TEST===\n") 
+    global_conf.sche_log_pre = "[SCHEDULER FINAL_TEST]:" 
+    local tnn = load_net_tnn(global_conf, global_conf.fn_to_sample) 
+    global_conf.dropout_rate = 0
+    LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update!
+end --if commands["test"]
+
+if commands["sampling"] == 1 then
+    nerv.printf("===SAMPLE===\n") 
+    global_conf.sche_log_pre = "[SCHEDULER SAMPLING]:" 
+    local sampler = prepare_sampler(sm_conf)
+    for k = 1, 1 do
+        local res = sampler:lm_sample_rnn_dagL(10, {})
+        for i = 1, #res do
+            for j = 1, #res[i] do
+                nerv.printf("%s(%f) ", res[i][j].w, res[i][j].p)
+            end
+            nerv.printf("\n")
+        end
+    end
+    --global_conf.dropout_rate = 0
+    --LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update!
+end --if commands["sampling"]
+
+
diff --git a/nerv/examples/lmptb/sample_grulm_ptb_main.lua b/nerv/examples/lmptb/sample_grulm_ptb_main.lua
deleted file mode 100644
index 42a5787..0000000
--- a/nerv/examples/lmptb/sample_grulm_ptb_main.lua
+++ /dev/null
@@ -1,454 +0,0 @@
-require 'lmptb.lmvocab'
-require 'lmptb.lmfeeder'
-require 'lmptb.lmutil'
-require 'lmptb.layer.init'
---require 'tnn.init'
-require 'lmptb.lmseqreader'
-require 'lm_trainer'
-require 'lm_sampler'
-
---[[global function rename]]--
---local printf = nerv.printf
-local LMTrainer = nerv.LMTrainer
---[[global function rename ends]]--
-
-function prepare_parameters(global_conf, fn)
-    nerv.printf("%s preparing parameters...\n", global_conf.sche_log_pre) 
-    
-    global_conf.paramRepo = nerv.ParamRepo()
-    local paramRepo = global_conf.paramRepo
-
-    nerv.printf("%s loading parameter from file %s...\n", global_conf.sche_log_pre, fn) 
-    paramRepo:import({fn}, nil, global_conf)
-
-    nerv.printf("%s preparing parameters end.\n", global_conf.sche_log_pre)
-
-    return nil
-end
-
---global_conf: table
---Returns: nerv.LayerRepo
-function prepare_layers(global_conf)
-    nerv.printf("%s preparing layers...\n", global_conf.sche_log_pre)
-    
-    local pr = global_conf.paramRepo
-
-    local du = false
-
-    --local recurrentLconfig = {{["bp"] = "bp_h", ["ltp_hh"] = "ltp_hh"}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["break_id"] = global_conf.vocab:get_sen_entry().id, ["independent"] = global_conf.independent, ["clip"] = 10}}
-    --local recurrentLconfig = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["clip"] = 10, ["direct_update"] = du, ["pr"] = pr}}
-
-    local layers = {
-        ["nerv.GRULayerT"] = {
-            ["gruL1"] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["pr"] = pr}}, 
-        },
-        
-        ["nerv.DropoutLayerT"] = {
-            ["dropoutL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}, 
-        },
-
-        ["nerv.SelectLinearLayer"] = {
-            ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}, ["vocab"] = global_conf.vocab, ["pr"] = pr}},
-        },
-        
-        ["nerv.CombinerLayer"] = {
-            ["combinerL1"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}},
-        },
-
-        ["nerv.AffineLayer"] = {
-            ["outputL"] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.vocab:size()}, ["direct_update"] = du, ["pr"] = pr}},
-        },
-
-        ["nerv.SoftmaxCELayerT"] = {
-            ["softmaxL"] = {{}, {["dim_in"] = {global_conf.vocab:size(), global_conf.vocab:size()}, ["dim_out"] = {1}}},
-        },
-    }
-   
-    for l = 2, global_conf.layer_num do 
-        layers["nerv.DropoutLayerT"]["dropoutL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}
-        layers["nerv.GRULayerT"]["gruL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size, global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}, ["pr"] = pr}}
-        layers["nerv.CombinerLayer"]["combinerL" .. l] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size, global_conf.hidden_size}, ["lambda"] = {1}}}
-    end
-    --[[ --we do not need those in the new tnn framework
-    printf("%s adding %d bptt layers...\n", global_conf.sche_log_pre, global_conf.bptt)
-    for i = 1, global_conf.bptt do
-        layers["nerv.IndRecurrentLayer"]["recurrentL" .. (i + 1)] = recurrentLconfig 
-        layers["nerv.SigmoidLayer"]["sigmoidL" .. (i + 1)] = {{}, {["dim_in"] = {global_conf.hidden_size}, ["dim_out"] = {global_conf.hidden_size}}}
-        layers["nerv.SelectLinearLayer"]["selectL" .. (i + 1)] = {{["ltp"] = "ltp_ih"}, {["dim_in"] = {1}, ["dim_out"] = {global_conf.hidden_size}}}
-    end
-    --]]
-
-    local layerRepo = nerv.LayerRepo(layers, pr, global_conf)
-    nerv.printf("%s preparing layers end.\n", global_conf.sche_log_pre)
-    return layerRepo
-end
-
---global_conf: table
---layerRepo: nerv.LayerRepo
---Returns: a nerv.TNN
-function prepare_tnn(global_conf, layerRepo)
-    nerv.printf("%s Generate and initing TNN ...\n", global_conf.sche_log_pre)
-
-    --input: input_w, input_w, ... input_w_now, last_activation
-    local connections_t = {
-        {"<input>[1]", "selectL1[1]", 0},
-        
-        --{"selectL1[1]", "recurrentL1[1]", 0},  
-        --{"recurrentL1[1]", "sigmoidL1[1]", 0},
-        --{"sigmoidL1[1]", "combinerL1[1]", 0},
-        --{"combinerL1[1]", "recurrentL1[2]", 1},
-        
-        {"selectL1[1]", "gruL1[1]", 0},
-        {"gruL1[1]", "combinerL1[1]", 0},
-        {"combinerL1[1]", "gruL1[2]", 1},
-        {"combinerL1[2]", "dropoutL1[1]", 0},
-        
-        {"dropoutL"..global_conf.layer_num.."[1]", "outputL[1]", 0},
-        {"outputL[1]", "softmaxL[1]", 0},
-        {"<input>[2]", "softmaxL[2]", 0},
-        {"softmaxL[1]", "<output>[1]", 0}
-    }
-
-    for l = 2, global_conf.layer_num do
-        table.insert(connections_t, {"dropoutL"..(l-1).."[1]", "gruL"..l.."[1]", 0})
-        table.insert(connections_t, {"gruL"..l.."[1]", "combinerL"..l.."[1]", 0})
-        table.insert(connections_t, {"combinerL"..l.."[1]", "gruL"..l.."[2]", 1})
-        table.insert(connections_t, {"combinerL"..l.."[2]", "dropoutL"..l.."[1]", 0})
-    end
-
-    --[[
-    printf("%s printing DAG connections:\n", global_conf.sche_log_pre)
-    for key, value in pairs(connections_t) do
-        printf("\t%s->%s\n", key, value)
-    end
-    ]]--
-
-    local tnn = nerv.TNN("TNN", global_conf, {["dim_in"] = {1, global_conf.vocab:size()}, 
-            ["dim_out"] = {1}, ["sub_layers"] = layerRepo,
-            ["connections"] = connections_t, ["clip_t"] = global_conf.clip_t,
-        })
-
-    tnn:init(global_conf.batch_size, global_conf.chunk_size)
-
-    nerv.printf("%s Initing TNN end.\n", global_conf.sche_log_pre)
-    return tnn
-end
-
-function load_net_tnn(global_conf, fn)
-    prepare_parameters(global_conf, fn)
-    local layerRepo = prepare_layers(global_conf)
-    local tnn = prepare_tnn(global_conf, layerRepo)
-    return tnn
-end
-
-function prepare_sampler(sm_conf)
-    sm_conf.pr = nerv.ParamRepo()
-    sm_conf.pr:import({sm_conf.fn_to_sample}, nil, sm_conf)
-
-    local layers = {
-        ["nerv.GRULayerT"] = {
-            ["gruL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size, sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size}, ["pr"] = sm_conf.pr}}, 
-        },
-        ["nerv.DropoutLayerT"] = {
-            ["dropoutL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size}}}, 
-        },
-        ["nerv.SelectLinearLayer"] = {
-            ["selectL1"] = {{}, {["dim_in"] = {1}, ["dim_out"] = {sm_conf.hidden_size}, ["vocab"] = sm_conf.vocab, ["pr"] = sm_conf.pr}},
-        },
-        ["nerv.CombinerLayer"] = {
-           ["combinerL1"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.hidden_size, sm_conf.hidden_size}, ["lambda"] = {1}}},
-        },
-        ["nerv.AffineLayer"] = {
-            ["outputL"] = {{}, {["dim_in"] = {sm_conf.hidden_size}, ["dim_out"] = {sm_conf.vocab:size()},  ["pr"] = sm_conf.pr}},
-        },
-        ["nerv.SoftmaxCELayerT"] = {
-            ["softmaxL"] = {{}, {["dim_in"] = {sm_conf.vocab:size(), sm_conf.vocab:size()}, ["dim_out"] = {1}}},
-        },
-    }
-    local layerRepo = nerv.LayerRepo(layers, sm_conf.pr, sm_conf)
-
-    local connections_t = {
-        ["<input>[1]"] = "selectL1[1]",
-       
-        ["selectL1[1]"] = "gruL1[1]",
-        ["gruL1[1]"] = "combinerL1[1]",
-        ["<input>[2]"] = "gruL1[2]",
-        --{"combinerL1[2]", "dropoutL1[1]", 0},
-        
-        ["combinerL" .. global_conf.layer_num .. "[1]"] = "outputL[1]",
-        ["outputL[1]"] = "<output>[1]",     
-        ["combinerL1[2]"] = "<output>[2]",
-    }
-    
-    if sm_conf.layer_num > 1 then
-        nerv.error("multiple layer is currently not supported(not hard to implement though)")
-    end
-
-    local dagL = nerv.DAGLayerT("dagL", sm_conf, {["dim_in"] = {1, sm_conf.hidden_size}, 
-            ["dim_out"] = {sm_conf.vocab:size(), sm_conf.hidden_size}, ["sub_layers"] = layerRepo,
-            ["connections"] = connections_t
-        })
-    
-    local sampler = nerv.LMSampler(sm_conf)
-    sampler:load_dagL(dagL)
-
-    return sampler
-end
-
-local train_fn, valid_fn, test_fn
-global_conf = {}
-local set = arg[1] --"test"
-
-root_dir = '/home/slhome/txh18/workspace'
-
-if (set == "ptb") then
-
-data_dir = root_dir .. '/ptb/DATA'
-train_fn = data_dir .. '/ptb.train.txt.adds'
-valid_fn = data_dir .. '/ptb.valid.txt.adds'
-test_fn = data_dir .. '/ptb.test.txt.adds'
-vocab_fn = data_dir .. '/vocab'
-
-qdata_dir = root_dir .. '/ptb/questionGen/gen'
-
-global_conf = {
-    lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5,
-    cumat_type = nerv.CuMatrixFloat,
-    mmat_type = nerv.MMatrixFloat,
-    nn_act_default = 0, 
-
-    hidden_size = 300,
-    layer_num = 1,
-    chunk_size = 15,
-    batch_size = 32, 
-    max_iter = 35,
-    lr_decay = 1.003,
-    decay_iter = 10,
-    param_random = function() return (math.random() / 5 - 0.1) end,
-    dropout_str = "0.5",
-
-    train_fn = train_fn,
-    valid_fn = valid_fn,
-    test_fn = test_fn,
-    vocab_fn = vocab_fn,
-    max_sen_len = 90,
-    sche_log_pre = "[SCHEDULER]:",
-    log_w_num = 40000, --give a message when log_w_num words have been processed
-    timer = nerv.Timer(),
-    work_dir_base = root_dir .. '/ptb/EXP-nerv/grulm_v1.0',
-    
-    fn_to_sample = root_dir .. '/ptb/EXP-nerv/grulm_v1.0h300l1ch15ba32slr0.15wc1e-05dr0.5/params.final',
-}
-
-sm_conf = {
-    cumat_type = nerv.CuMatrixFloat,
-    mmat_type = nerv.MMatrixFloat,
-    nn_act_default = 0, 
-
-    hidden_size = 300,
-    layer_num = 1,
-    batch_size = 32, 
-    chunk_size = 85, --largest sample sentence length
-    max_iter = 35,
-    max_sen_len = 90,
-    sche_log_pre = "[SAMPLER_S]:",
-
-    timer = global_conf.timer,
-    fn_to_sample = root_dir .. '/ptb/EXP-nerv/grulm_v1.0h300l1ch15ba32slr0.15wc1e-05dr0.5/params.final',
-}
-
-elseif (set == "msr_sc") then
-
-data_dir = '/home/slhome/txh18/workspace/sentenceCompletion/DATA_PV2'
-train_fn = data_dir .. '/normed_all.sf.len60.adds.train'
-valid_fn = data_dir .. '/normed_all.sf.len60.adds.dev'
-test_fn = data_dir .. '/answer_normed.adds'
-vocab_fn = data_dir .. '/normed_all.choose.vocab30000.addqvocab'
-
-global_conf = {
-    lrate = 1, wcost = 1e-6, momentum = 0,
-    cumat_type = nerv.CuMatrixFloat,
-    mmat_type = nerv.MMatrixFloat,
-    nn_act_default = 0, 
-
-    hidden_size = 300,
-    layer_num = 1,
-    chunk_size = 15,
-    batch_size = 10, 
-    max_iter = 30,
-    decay_iter = 10,
-    lr_decay = 1.003,
-    param_random = function() return (math.random() / 5 - 0.1) end,
-    dropout_str = "0",
-
-    train_fn = train_fn,
-    valid_fn = valid_fn,
-    test_fn = test_fn,
-    vocab_fn = vocab_fn,
-    sche_log_pre = "[SCHEDULER]:",
-    log_w_num = 400000, --give a message when log_w_num words have been processed
-    timer = nerv.Timer(),
-    work_dir_base = '/home/slhome/txh18/workspace/sentenceCompletion/EXP-Nerv/rnnlm_test'
-}
-
-elseif (set == "twitter") then
-
-    data_dir = root_dir .. '/twitter_new/DATA'
-    train_fn = data_dir .. '/twitter.choose2.adds'
-    valid_fn = data_dir .. '/twitter.valid.adds'
-    test_fn = data_dir .. '/comm.test.choose-ppl.adds'
-    vocab_fn = data_dir .. '/twitter.choose.train.vocab'
-    --qdata_dir = root_dir .. '/ptb/questionGen/gen'
-    global_conf = {
-    lrate = 0.15, wcost = 1e-5, momentum = 0, clip_t = 5,
-    cumat_type = nerv.CuMatrixFloat,
-    mmat_type = nerv.MMatrixFloat,
-    nn_act_default = 0, 
-
-    hidden_size = 300,
-    layer_num = 1,
-    chunk_size = 15,
-    batch_size = 32, 
-    max_iter = 30,
-    lr_decay = 1.003,
-    decay_iter = 10,
-    param_random = function() return (math.random() / 5 - 0.1) end,
-    dropout_str = "0.5",
-
-    train_fn = train_fn,
-    valid_fn = valid_fn,
-    test_fn = test_fn,
-    vocab_fn = vocab_fn,
-    max_sen_len = 32,
-    sche_log_pre = "[SCHEDULER]:",
-    log_w_num = 40000, --give a message when log_w_num words have been processed
-    timer = nerv.Timer(),
-    work_dir_base = root_dir .. '/twitter_new/EXP-nerv/grulm_v1.0'
-    }
-
-else
-
-valid_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text-chn'
-train_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text-chn'
-test_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text-chn'
-vocab_fn = '/home/slhome/txh18/workspace/nerv/nerv/nerv/examples/lmptb/m-tests/some-text-chn'
-
-global_conf = {
-    lrate = 0.01, wcost = 1e-5, momentum = 0,
-    cumat_type = nerv.CuMatrixFloat,
-    mmat_type = nerv.MMatrixFloat,
-    nn_act_default = 0, 
-
-    hidden_size = 20,
-    layer_num = 1,
-    chunk_size = 2,
-    batch_size = 10, 
-    max_iter = 3,
-    param_random = function() return (math.random() / 5 - 0.1) end,
-    dropout_str = "0",
-
-    train_fn = train_fn,
-    valid_fn = valid_fn,
-    test_fn = test_fn,
-    max_sen_len = 80,
-    lr_decay = 1.003,
-    decay_iter = 10,
-    vocab_fn = vocab_fn, 
-    sche_log_pre = "[SCHEDULER]:",
-    log_w_num = 10, --give a message when log_w_num words have been processed
-    timer = nerv.Timer(),
-    work_dir_base = '/home/slhome/txh18/workspace/nerv/play/testEXP/tnn_lstmlm_test'
-}
-
-end
-
-lr_half = false --can not be local, to be set by loadstring
-start_iter = -1
-start_lr = nil
-ppl_last = 100000
-commands_str = "sampling" --"train:test"
-commands = {}
-test_iter = -1
---for testout(question)
-q_file = "/home/slhome/txh18/workspace/ptb/questionGen/gen/ptb.test.txt.q10rs1_Msss.adds"
-
-if arg[2] ~= nil then
-    nerv.printf("%s applying arg[2](%s)...\n", global_conf.sche_log_pre, arg[2])
-    loadstring(arg[2])() 
-    nerv.LMUtil.wait(0.5)
-else
-    nerv.printf("%s no user setting, all default...\n", global_conf.sche_log_pre)
-end
-
-global_conf.work_dir = global_conf.work_dir_base .. 'h' .. global_conf.hidden_size .. 'l' .. global_conf.layer_num .. 'ch' .. global_conf.chunk_size .. 'ba' .. global_conf.batch_size .. 'slr' ..   global_conf.lrate .. 'wc' .. global_conf.wcost .. 'dr' .. global_conf.dropout_str 
-global_conf.train_fn_shuf = global_conf.work_dir .. '/train_fn_shuf'
-global_conf.train_fn_shuf_bak = global_conf.train_fn_shuf .. '_bak'
-global_conf.param_fn = global_conf.work_dir .. "/params"
-global_conf.dropout_list = nerv.SUtil.parse_schedule(global_conf.dropout_str)
-global_conf.log_fn = global_conf.work_dir .. '/log_lstm_tnn_' .. commands_str ..os.date("_TT%m_%d_%X",os.time())
-global_conf.log_fn, _ = string.gsub(global_conf.log_fn, ':', '-')
-commands = nerv.SUtil.parse_commands_set(commands_str)
-
-if start_lr ~= nil then
-    global_conf.lrate = start_lr
-end
-
---[[ 
---redirecting log outputs!
-nerv.SUtil.log_redirect(global_conf.log_fn)
-nerv.LMUtil.wait(2)
-]]--
-
-----------------printing options---------------------------------
-nerv.printf("%s printing sm_conf...\n", sm_conf.sche_log_pre)
-for id, value in pairs(sm_conf) do
-    nerv.printf("%s:\t%s\n", id, tostring(value))
-end
-nerv.LMUtil.wait(2)
-
-nerv.printf("%s printing training scheduling options...\n", global_conf.sche_log_pre)
-nerv.printf("lr_half:\t%s\n", tostring(lr_half))
-nerv.printf("start_iter:\t%s\n", tostring(start_iter))
-nerv.printf("ppl_last:\t%s\n", tostring(ppl_last))
-nerv.printf("commands_str:\t%s\n", commands_str)
-nerv.printf("test_iter:\t%s\n", tostring(test_iter))
-nerv.printf("%s printing training scheduling end.\n", global_conf.sche_log_pre)
-nerv.LMUtil.wait(2)
-------------------printing options end------------------------------
-
-math.randomseed(1)
-
-local vocab = nerv.LMVocab()
-global_conf["vocab"] = vocab
-sm_conf["vocab"] = global_conf.vocab
-nerv.printf("%s building vocab...\n", global_conf.sche_log_pre)
-global_conf.vocab:build_file(global_conf.vocab_fn, false)
-ppl_rec = {} 
-
-local final_iter = -1
-if commands["test"] == 1 then
-    nerv.printf("===FINAL TEST===\n") 
-    global_conf.sche_log_pre = "[SCHEDULER FINAL_TEST]:" 
-    local tnn = load_net_tnn(global_conf, global_conf.fn_to_sample) 
-    global_conf.dropout_rate = 0
-    LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update!
-end --if commands["test"]
-
-if commands["sampling"] == 1 then
-    nerv.printf("===SAMPLE===\n") 
-    global_conf.sche_log_pre = "[SCHEDULER SAMPLING]:" 
-    local sampler = prepare_sampler(sm_conf)
-    for k = 1, 1 do
-        local res = sampler:lm_sample_rnn_dagL(10, {})
-        for i = 1, #res do
-            for j = 1, #res[i] do
-                nerv.printf("%s(%f) ", res[i][j].w, res[i][j].p)
-            end
-            nerv.printf("\n")
-        end
-    end
-    --global_conf.dropout_rate = 0
-    --LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update!
-end --if commands["sampling"]
-
-
-- 
cgit v1.2.3-70-g09d2


From 3d7a2be2d8ac3083617df2b7194921971f0ac94e Mon Sep 17 00:00:00 2001
From: txh18 <cloudygooseg@gmail.com>
Date: Fri, 5 Feb 2016 21:42:05 +0800
Subject: ..

---
 nerv/examples/lmptb/lm_sampler.lua | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/nerv/examples/lmptb/lm_sampler.lua b/nerv/examples/lmptb/lm_sampler.lua
index d194af9..9d31f17 100644
--- a/nerv/examples/lmptb/lm_sampler.lua
+++ b/nerv/examples/lmptb/lm_sampler.lua
@@ -8,10 +8,12 @@ function LMSampler:__init(global_conf)
     self.vocab = self.gconf.vocab
     self.sen_end_token = self.vocab.sen_end_token
     self.sen_end_id = self.vocab:get_word_str(self.sen_end_token).id 
+
+    self.loaded = false
+
 end
 
-function LMSampler:load_dagL(dagL)
-   
+function LMSampler:load_dagL(dagL)   
     nerv.printf("%s loading dagL\n", self.log_pre)
 
     self.dagL = dagL
@@ -40,9 +42,11 @@ function LMSampler:load_dagL(dagL)
         self.store[i][1].p = 0
     end
     self.repo = {}
+
+    self.loaded = true
 end
 
-function LMSampler:sample_to_store(ssout)
+function LMSampler:sample_to_store(ssout) --private
     for i = 1, self.batch_size do
         local ran = math.random()
         local id = 1
@@ -88,8 +92,9 @@ function LMSampler:sample_to_store(ssout)
     end
 end
 
---Returns: LMResult
 function LMSampler:lm_sample_rnn_dagL(sample_num, p_conf)
+    assert(self.loaded == true)
+
     local dagL = self.dagL
     local inputs = self.dagL_inputs
     local outputs = self.dagL_outputs
-- 
cgit v1.2.3-70-g09d2


From 7a421571300417dba0d5c703d9a460ad19aeef14 Mon Sep 17 00:00:00 2001
From: txh18 <cloudygooseg@gmail.com>
Date: Fri, 5 Feb 2016 22:46:53 +0800
Subject: enhanced m-test/lm_sample

---
 nerv/examples/lmptb/m-tests/lm_sampler_test.lua | 45 ++++++++++++++++---------
 1 file changed, 30 insertions(+), 15 deletions(-)

diff --git a/nerv/examples/lmptb/m-tests/lm_sampler_test.lua b/nerv/examples/lmptb/m-tests/lm_sampler_test.lua
index 42a5787..0313d77 100644
--- a/nerv/examples/lmptb/m-tests/lm_sampler_test.lua
+++ b/nerv/examples/lmptb/m-tests/lm_sampler_test.lua
@@ -362,15 +362,12 @@ global_conf = {
 
 end
 
-lr_half = false --can not be local, to be set by loadstring
-start_iter = -1
-start_lr = nil
-ppl_last = 100000
 commands_str = "sampling" --"train:test"
 commands = {}
-test_iter = -1
---for testout(question)
-q_file = "/home/slhome/txh18/workspace/ptb/questionGen/gen/ptb.test.txt.q10rs1_Msss.adds"
+test_iter = -1 --obselete
+random_seed = 1
+sample_num = 10
+out_fn = nil
 
 if arg[2] ~= nil then
     nerv.printf("%s applying arg[2](%s)...\n", global_conf.sche_log_pre, arg[2])
@@ -407,16 +404,16 @@ end
 nerv.LMUtil.wait(2)
 
 nerv.printf("%s printing training scheduling options...\n", global_conf.sche_log_pre)
-nerv.printf("lr_half:\t%s\n", tostring(lr_half))
-nerv.printf("start_iter:\t%s\n", tostring(start_iter))
-nerv.printf("ppl_last:\t%s\n", tostring(ppl_last))
 nerv.printf("commands_str:\t%s\n", commands_str)
 nerv.printf("test_iter:\t%s\n", tostring(test_iter))
+nerv.printf("random_seed:\t%s\n", tostring(random_seed))
+nerv.printf("sample_num:\t%s\n", tostring(sample_num))
+nerv.printf("out_fn:\t%s\n", tostring(out_fn))
 nerv.printf("%s printing training scheduling end.\n", global_conf.sche_log_pre)
 nerv.LMUtil.wait(2)
 ------------------printing options end------------------------------
 
-math.randomseed(1)
+math.randomseed(random_seed)
 
 local vocab = nerv.LMVocab()
 global_conf["vocab"] = vocab
@@ -438,15 +435,33 @@ if commands["sampling"] == 1 then
     nerv.printf("===SAMPLE===\n") 
     global_conf.sche_log_pre = "[SCHEDULER SAMPLING]:" 
     local sampler = prepare_sampler(sm_conf)
-    for k = 1, 1 do
-        local res = sampler:lm_sample_rnn_dagL(10, {})
+    local out_fh = nil
+    if out_fn ~= nil then
+        out_fh = assert(io.open(out_fn, "w"))
+        nerv.printf("%s outputing samples to file \"%s\"...\n", global_conf.sche_log_pre, out_fn)
+    end
+    for k = 1, sample_num do
+        local res = sampler:lm_sample_rnn_dagL(1, {})
         for i = 1, #res do
+            if out_fh == nil then nerv.printf("lm_sampler_output_sample: ") end
             for j = 1, #res[i] do
-                nerv.printf("%s(%f) ", res[i][j].w, res[i][j].p)
+                if out_fh == nil then
+                    nerv.printf("%s %f ", res[i][j].w, res[i][j].p)
+                else
+                    out_fh:write(nerv.sprintf("%s %f ", res[i][j].w, res[i][j].p))
+                end
+            end
+            if out_fh == nil then
+                nerv.printf("\n")
+            else
+                out_fh:write(nerv.sprintf("\n"))
             end
-            nerv.printf("\n")
         end
+        if k % 100 == 0 and out_fh ~= nil then nerv.printf("%s %d sample done\n", global_conf.sche_log_pre, k) end
     end
+
+    if out_fh ~= nil then out_fh:close() end
+    nerv.printf("%s complete,bye\n", global_conf.sche_log_pre)
     --global_conf.dropout_rate = 0
     --LMTrainer.lm_process_file_rnn(global_conf, global_conf.test_fn, tnn, false) --false update!
 end --if commands["sampling"]
-- 
cgit v1.2.3-70-g09d2


From 8782772c4a68d45a403e610efa75a1c8f401c7e7 Mon Sep 17 00:00:00 2001
From: txh18 <cloudygooseg@gmail.com>
Date: Fri, 5 Feb 2016 23:30:22 +0800
Subject: ...

---
 nerv/examples/lmptb/m-tests/lm_sampler_test.lua | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nerv/examples/lmptb/m-tests/lm_sampler_test.lua b/nerv/examples/lmptb/m-tests/lm_sampler_test.lua
index 0313d77..effb2ad 100644
--- a/nerv/examples/lmptb/m-tests/lm_sampler_test.lua
+++ b/nerv/examples/lmptb/m-tests/lm_sampler_test.lua
@@ -457,7 +457,7 @@ if commands["sampling"] == 1 then
                 out_fh:write(nerv.sprintf("\n"))
             end
         end
-        if k % 100 == 0 and out_fh ~= nil then nerv.printf("%s %d sample done\n", global_conf.sche_log_pre, k) end
+        if k % 10000 == 0 and out_fh ~= nil then nerv.printf("%s %d sample done\n", global_conf.sche_log_pre, k) end
     end
 
     if out_fh ~= nil then out_fh:close() end
-- 
cgit v1.2.3-70-g09d2


From 152c89dc8af3d5d7ace79f65616f192a71b96b0d Mon Sep 17 00:00:00 2001
From: txh18 <cloudygooseg@gmail.com>
Date: Sat, 6 Feb 2016 21:33:41 +0800
Subject: bug fixes in lm_sampler

---
 nerv/examples/lmptb/lm_sampler.lua | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/nerv/examples/lmptb/lm_sampler.lua b/nerv/examples/lmptb/lm_sampler.lua
index 9d31f17..c9adf85 100644
--- a/nerv/examples/lmptb/lm_sampler.lua
+++ b/nerv/examples/lmptb/lm_sampler.lua
@@ -10,7 +10,6 @@ function LMSampler:__init(global_conf)
     self.sen_end_id = self.vocab:get_word_str(self.sen_end_token).id 
 
     self.loaded = false
-
 end
 
 function LMSampler:load_dagL(dagL)   
@@ -20,18 +19,18 @@ function LMSampler:load_dagL(dagL)
     self.dagL:init(self.batch_size)
 
     self.dagL_inputs = {}
-    self.dagL_inputs[1] = global_conf.cumat_type(global_conf.batch_size, 1)
+    self.dagL_inputs[1] = self.gconf.cumat_type(self.gconf.batch_size, 1)
     self.dagL_inputs[1]:fill(self.sen_end_id - 1)
-    self.dagL_inputs[2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size)
+    self.dagL_inputs[2] = self.gconf.cumat_type(self.gconf.batch_size, self.gconf.hidden_size)
     self.dagL_inputs[2]:fill(0)
     
     self.dagL_outputs = {}
-    self.dagL_outputs[1] = global_conf.cumat_type(global_conf.batch_size, global_conf.vocab:size())
-    self.dagL_outputs[2] = global_conf.cumat_type(global_conf.batch_size, global_conf.hidden_size)
+    self.dagL_outputs[1] = self.gconf.cumat_type(self.gconf.batch_size, self.gconf.vocab:size())
+    self.dagL_outputs[2] = self.gconf.cumat_type(self.gconf.batch_size, self.gconf.hidden_size)
     
-    self.smout_d = global_conf.cumat_type(self.batch_size, self.vocab:size())
-    self.ssout_d = global_conf.cumat_type(self.batch_size, self.vocab:size())
-    self.ssout_h = global_conf.mmat_type(self.batch_size, self.vocab:size())
+    self.smout_d = self.gconf.cumat_type(self.batch_size, self.vocab:size())
+    self.ssout_d = self.gconf.cumat_type(self.batch_size, self.vocab:size())
+    self.ssout_h = self.gconf.mmat_type(self.batch_size, self.vocab:size())
 
     self.store = {}
     for i = 1, self.batch_size do
-- 
cgit v1.2.3-70-g09d2