add TNet tutorial; support converting global transf from TNet format

author: Determinant <ted.sybil@gmail.com> 2016-03-13 16:18:36 +0800
committer: Determinant <ted.sybil@gmail.com> 2016-03-13 16:18:36 +0800
commit: 93eb84aca23526959b76401fd6509f151a589e9a (patch)
tree: f9abef4f3bc9b49190ec2ec775344d5cdb52388c
parent: ddc4545050b41d12cfdc19cea9ba31c940d3d537 (diff)
4 files changed, 108 insertions, 61 deletions
diff --git a/htk_io/tools/tnet_to_nerv.c b/htk_io/tools/tnet_to_nerv.c
deleted file mode 100644
index 5774819..0000000
--- a/htk_io/tools/tnet_to_nerv.c
+++ /dev/null
@@ -1,57 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-char token[1024];
-double mat[4096][4096];
-int main() {
-    FILE *fout = fopen("converted.nerv", "w");
-    int cnt = 0;
-    while (scanf("%s", token) != EOF)
-    {
-        int nrow, ncol;
-        int i, j;
-        if (strcmp(token, "<biasedlinearity>") == 0)
-        {
-            scanf("%d %d", &ncol, &nrow);
-            scanf("%s %d %d", token, &ncol, &nrow);
-            printf("%d %d\n", nrow, ncol);
-            for (j = 0; j < ncol; j++)
-                for (i = 0; i < nrow; i++)
-                    scanf("%lf", mat[i] + j);
-            off_t base = ftello(fout);
-            fprintf(fout, "%16d", 0);
-            fprintf(fout, "{type=\"nerv.LinearTransParam\",id=\"affine%d_ltp\"}\n",
-                            cnt);
-            fprintf(fout, "%d %d\n", nrow, ncol);
-            for (i = 0; i < nrow; i++)
-            {
-                for (j = 0; j < ncol; j++)
-                    fprintf(fout, "%.8f ", mat[i][j]);
-                fprintf(fout, "\n");
-            }
-            size_t length = ftello(fout) - base;
-            fseeko(fout, base, SEEK_SET);
-            fprintf(fout, "[%13lu]\n", length);
-            fseeko(fout, 0, SEEK_END);
-            if (scanf("%s %d", token, &ncol) == 2 && *token == 'v')
-            {
-                base = ftello(fout);
-                for (j = 0; j < ncol; j++)
-                    scanf("%lf", mat[0] + j);
-                fprintf(fout, "%16d", 0);
-                fprintf(fout, "{type=\"nerv.BiasParam\",id=\"affine%d_bp\"}\n",
-                        cnt);
-                fprintf(fout, "1 %d\n", ncol);
-                for (j = 0; j < ncol; j++)
-                    fprintf(fout, "%.8f ", mat[0][j]);
-                fprintf(fout, "\n");
-                length = ftello(fout) - base;
-                fseeko(fout, base, SEEK_SET);
-                fprintf(fout, "[%13lu]\n", length);
-                cnt++;
-                fseeko(fout, 0, SEEK_END);
-            }
-        }
-    }
-    return 0;
-}
diff --git a/htk_io/tools/tnet_to_nerv.cpp b/htk_io/tools/tnet_to_nerv.cpp
index 63a104d..067097e 100644
--- a/htk_io/tools/tnet_to_nerv.cpp
+++ b/htk_io/tools/tnet_to_nerv.cpp
@@ -4,6 +4,7 @@
 #include <cstring>
 #include <cassert>
 #include <cstdlib>
+#include <map>
 
 char token[1024];
 char output[1024];
@@ -23,6 +24,18 @@ void free_matrix(double **mat, int nrow, int ncol) {
     delete [] mat;
 }
 
+int cnt0;
+std::map<std::string, int> param_cnt;
+int get_param_cnt(const std::string &key) {
+    std::map<std::string, int>::iterator it = param_cnt.find(key);
+    if (it == param_cnt.end())
+    {
+        param_cnt[key] = cnt0 + 1;
+        return cnt0;
+    }
+    return it->second++;
+}
+
 int main(int argc, char **argv) {
     FILE *fin;
     std::ofstream fout;
@@ -30,12 +43,14 @@ int main(int argc, char **argv) {
     fin = fopen(argv[1], "r");
     fout.open(argv[2]);
     assert(fin != NULL);
-    int cnt = argc > 3 ? atoi(argv[3]) : 0;
+    cnt0 = argc > 3 ? atoi(argv[3]) : 0;
+    bool shift;
     while (fscanf(fin, "%s", token) != EOF)
     {
         int nrow, ncol;
         int i, j;
         double **mat;
+        int cnt = get_param_cnt(token);
         if (strcmp(token, "<biasedlinearity>") == 0)
         {
             fscanf(fin, "%d %d", &ncol, &nrow);
@@ -84,10 +99,38 @@ int main(int argc, char **argv) {
                 sprintf(output, "[%13lu]\n", length);
                 fout << output;
                 fout.seekp(0, std::ios_base::end);
-                cnt++;
             }
             free_matrix(mat, nrow, ncol);
         }
+        else if ((shift = (strcmp(token, "<bias>") == 0)) ||
+                strcmp(token, "<window>") == 0)
+        {
+            fscanf(fin, "%d %d", &ncol, &nrow);
+            printf("%d %d\n", nrow, ncol);
+            assert(nrow == ncol);
+            mat = new_matrix(1, ncol);
+            assert(fscanf(fin, "%s %d", token, &ncol) == 2 && *token == 'v');
+            for (j = 0; j < ncol; j++)
+                    fscanf(fin, "%lf", mat[0] + j);
+            long base = fout.tellp();
+            sprintf(output, "%16d", 0);
+            fout << output;
+            sprintf(output, "{type=\"nerv.BiasParam\",id=\"%s%d\"}\n",
+                    shift ? "bias" : "window",
+                    cnt);
+            fout << output;
+            sprintf(output, "%d %d\n", 1, ncol);
+            fout << output;
+            for (j = 0; j < ncol; j++)
+                fout << mat[0][j] << " ";
+            fout << std::endl;
+            long length = fout.tellp() - base;
+            fout.seekp(base);
+            sprintf(output, "[%13lu]\n", length);
+            fout << output;
+            fout.seekp(0, std::ios_base::end);
+            free_matrix(mat, 1, ncol);
+        }
     }
     return 0;
 }
diff --git a/kaldi_io/tools/kaldi_to_nerv.cpp b/kaldi_io/tools/kaldi_to_nerv.cpp
index f16de44..aadac53 100644
--- a/kaldi_io/tools/kaldi_to_nerv.cpp
+++ b/kaldi_io/tools/kaldi_to_nerv.cpp
@@ -4,6 +4,7 @@
 #include <cstring>
 #include <cassert>
 #include <cstdlib>
+#include <map>
 
 char token[1024];
 char output[1024];
@@ -23,6 +24,18 @@ void free_matrix(double **mat, int nrow, int ncol) {
     delete [] mat;
 }
 
+int cnt0;
+std::map<std::string, int> param_cnt;
+int get_param_cnt(const std::string &key) {
+    std::map<std::string, int>::iterator it = param_cnt.find(key);
+    if (it == param_cnt.end())
+    {
+        param_cnt[key] = cnt0 + 1;
+        return cnt0;
+    }
+    return it->second++;
+}
+
 int main(int argc, char **argv) {
     FILE *fin;
     std::ofstream fout;
@@ -30,13 +43,14 @@ int main(int argc, char **argv) {
     fin = fopen(argv[1], "r");
     fout.open(argv[2]);
     assert(fin != NULL);
-    int cnt = argc > 3 ? atoi(argv[3]) : 0;
+    cnt0 = argc > 3 ? atoi(argv[3]) : 0;
     bool shift;
     while (fscanf(fin, "%s", token) != EOF)
     {
         int nrow, ncol;
         int i, j;
         double **mat;
+        int cnt = get_param_cnt(token);
         if (strcmp(token, "<AffineTransform>") == 0)
         {
             double lrate, blrate, mnorm;
@@ -91,7 +105,6 @@ int main(int argc, char **argv) {
                 sprintf(output, "[%13lu]\n", length);
                 fout << output;
                 fout.seekp(0, std::ios_base::end);
-                cnt++;
             }
             free_matrix(mat, nrow, ncol);
         }
diff --git a/tutorial/howto_pretrain_from_tnet.rst b/tutorial/howto_pretrain_from_tnet.rst
new file mode 100644
index 0000000..7636478
--- /dev/null
+++ b/tutorial/howto_pretrain_from_tnet.rst
@@ -0,0 +1,48 @@
+How to Use a Pre-trained Model from TNet
+========================================
+
+:author: Ted Yin (mfy43) <ted.sybil@gmail.com>
+:abstract: Instruct on how to convert a pre-trained TNet model to NERV format,
+           train the converted model and finally convert back to TNet format
+           for subsequent decoding.
+
+- Note: this tutorial is the counterpart to "Plan B" of decoding in *How to Use
+  a Pre-trained nnet Model from Kaldi*. For more complete information, please
+  refer to that tutorial.
+
+- Note: in this tutorial, we use the following notations to denote the directory prefix:
+
+  - ``<nerv_home>``: the path of NERV (the location of outer most directory ``nerv``)
+
+- To convert a TNet DNN model file:
+
+  ::
+    # compile the tool written in C++:
+    g++ -o tnet_to_nerv <nerv_home>/speech/htk_io/tools/tnet_to_nerv.cpp
+    # conver the model (the third argument indicates the initial number used in naming the parameters)
+    ./tnet_to_nerv <path_to_tnet_nn>.nnet <path_to_converted>.nerv 0
+
+- Apply the method above to convert your global transformation file and network
+  file to NERV chunk files respectively.
+
+- Train the converted parameters. Here, a network configuration file similar to
+  the one used in Kaldi tutorial could be found at
+  ``<nerv_home>/nerv/examples/swb_baseline2.lua``.
+
+- Create a copy of ``<nerv_home>/speech/htk_io/tools/nerv_to_tnet.lua``.
+
+    - Modify the list named ``lnames`` to list the name of layers you want to
+      put into the output TNet parameter file in order. You may ask why the
+      NERV-to-TNet converstion is so cumbersome. This is because TNet nnet is a
+      special case of more general NERV toolkit -- it only allows stacked DNNs
+      and therefore TNet-to-NERV conversion is lossless but the other direction
+      is not. Your future NERV network may have multiple branches and that's
+      why you need to specify how to select and "stack" your layers in the TNet
+      parameter output.
+
+    - Do the conversion by:
+
+      ::
+
+         <nerv_home>/install/bin/nerv --use-cpu nerv_to_tnet.lua <your_network_config>.lua <your_trained_params>.nerv <path_to_converted>.nnet
+
author	Determinant <ted.sybil@gmail.com>	2016-03-13 16:18:36 +0800
committer	Determinant <ted.sybil@gmail.com>	2016-03-13 16:18:36 +0800
commit	93eb84aca23526959b76401fd6509f151a589e9a (patch)
tree	f9abef4f3bc9b49190ec2ec775344d5cdb52388c
parent	ddc4545050b41d12cfdc19cea9ba31c940d3d537 (diff)