summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDeterminant <[email protected]>2016-03-13 16:18:36 +0800
committerDeterminant <[email protected]>2016-03-13 16:18:36 +0800
commit93eb84aca23526959b76401fd6509f151a589e9a (patch)
treef9abef4f3bc9b49190ec2ec775344d5cdb52388c
parentddc4545050b41d12cfdc19cea9ba31c940d3d537 (diff)
add TNet tutorial; support converting global transf from TNet format
-rw-r--r--htk_io/tools/tnet_to_nerv.c57
-rw-r--r--htk_io/tools/tnet_to_nerv.cpp47
-rw-r--r--kaldi_io/tools/kaldi_to_nerv.cpp17
-rw-r--r--tutorial/howto_pretrain_from_tnet.rst48
4 files changed, 108 insertions, 61 deletions
diff --git a/htk_io/tools/tnet_to_nerv.c b/htk_io/tools/tnet_to_nerv.c
deleted file mode 100644
index 5774819..0000000
--- a/htk_io/tools/tnet_to_nerv.c
+++ /dev/null
@@ -1,57 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-char token[1024];
-double mat[4096][4096];
-int main() {
- FILE *fout = fopen("converted.nerv", "w");
- int cnt = 0;
- while (scanf("%s", token) != EOF)
- {
- int nrow, ncol;
- int i, j;
- if (strcmp(token, "<biasedlinearity>") == 0)
- {
- scanf("%d %d", &ncol, &nrow);
- scanf("%s %d %d", token, &ncol, &nrow);
- printf("%d %d\n", nrow, ncol);
- for (j = 0; j < ncol; j++)
- for (i = 0; i < nrow; i++)
- scanf("%lf", mat[i] + j);
- off_t base = ftello(fout);
- fprintf(fout, "%16d", 0);
- fprintf(fout, "{type=\"nerv.LinearTransParam\",id=\"affine%d_ltp\"}\n",
- cnt);
- fprintf(fout, "%d %d\n", nrow, ncol);
- for (i = 0; i < nrow; i++)
- {
- for (j = 0; j < ncol; j++)
- fprintf(fout, "%.8f ", mat[i][j]);
- fprintf(fout, "\n");
- }
- size_t length = ftello(fout) - base;
- fseeko(fout, base, SEEK_SET);
- fprintf(fout, "[%13lu]\n", length);
- fseeko(fout, 0, SEEK_END);
- if (scanf("%s %d", token, &ncol) == 2 && *token == 'v')
- {
- base = ftello(fout);
- for (j = 0; j < ncol; j++)
- scanf("%lf", mat[0] + j);
- fprintf(fout, "%16d", 0);
- fprintf(fout, "{type=\"nerv.BiasParam\",id=\"affine%d_bp\"}\n",
- cnt);
- fprintf(fout, "1 %d\n", ncol);
- for (j = 0; j < ncol; j++)
- fprintf(fout, "%.8f ", mat[0][j]);
- fprintf(fout, "\n");
- length = ftello(fout) - base;
- fseeko(fout, base, SEEK_SET);
- fprintf(fout, "[%13lu]\n", length);
- cnt++;
- fseeko(fout, 0, SEEK_END);
- }
- }
- }
- return 0;
-}
diff --git a/htk_io/tools/tnet_to_nerv.cpp b/htk_io/tools/tnet_to_nerv.cpp
index 63a104d..067097e 100644
--- a/htk_io/tools/tnet_to_nerv.cpp
+++ b/htk_io/tools/tnet_to_nerv.cpp
@@ -4,6 +4,7 @@
#include <cstring>
#include <cassert>
#include <cstdlib>
+#include <map>
char token[1024];
char output[1024];
@@ -23,6 +24,18 @@ void free_matrix(double **mat, int nrow, int ncol) {
delete [] mat;
}
+int cnt0;
+std::map<std::string, int> param_cnt;
+int get_param_cnt(const std::string &key) {
+ std::map<std::string, int>::iterator it = param_cnt.find(key);
+ if (it == param_cnt.end())
+ {
+ param_cnt[key] = cnt0 + 1;
+ return cnt0;
+ }
+ return it->second++;
+}
+
int main(int argc, char **argv) {
FILE *fin;
std::ofstream fout;
@@ -30,12 +43,14 @@ int main(int argc, char **argv) {
fin = fopen(argv[1], "r");
fout.open(argv[2]);
assert(fin != NULL);
- int cnt = argc > 3 ? atoi(argv[3]) : 0;
+ cnt0 = argc > 3 ? atoi(argv[3]) : 0;
+ bool shift;
while (fscanf(fin, "%s", token) != EOF)
{
int nrow, ncol;
int i, j;
double **mat;
+ int cnt = get_param_cnt(token);
if (strcmp(token, "<biasedlinearity>") == 0)
{
fscanf(fin, "%d %d", &ncol, &nrow);
@@ -84,10 +99,38 @@ int main(int argc, char **argv) {
sprintf(output, "[%13lu]\n", length);
fout << output;
fout.seekp(0, std::ios_base::end);
- cnt++;
}
free_matrix(mat, nrow, ncol);
}
+ else if ((shift = (strcmp(token, "<bias>") == 0)) ||
+ strcmp(token, "<window>") == 0)
+ {
+ fscanf(fin, "%d %d", &ncol, &nrow);
+ printf("%d %d\n", nrow, ncol);
+ assert(nrow == ncol);
+ mat = new_matrix(1, ncol);
+ assert(fscanf(fin, "%s %d", token, &ncol) == 2 && *token == 'v');
+ for (j = 0; j < ncol; j++)
+ fscanf(fin, "%lf", mat[0] + j);
+ long base = fout.tellp();
+ sprintf(output, "%16d", 0);
+ fout << output;
+ sprintf(output, "{type=\"nerv.BiasParam\",id=\"%s%d\"}\n",
+ shift ? "bias" : "window",
+ cnt);
+ fout << output;
+ sprintf(output, "%d %d\n", 1, ncol);
+ fout << output;
+ for (j = 0; j < ncol; j++)
+ fout << mat[0][j] << " ";
+ fout << std::endl;
+ long length = fout.tellp() - base;
+ fout.seekp(base);
+ sprintf(output, "[%13lu]\n", length);
+ fout << output;
+ fout.seekp(0, std::ios_base::end);
+ free_matrix(mat, 1, ncol);
+ }
}
return 0;
}
diff --git a/kaldi_io/tools/kaldi_to_nerv.cpp b/kaldi_io/tools/kaldi_to_nerv.cpp
index f16de44..aadac53 100644
--- a/kaldi_io/tools/kaldi_to_nerv.cpp
+++ b/kaldi_io/tools/kaldi_to_nerv.cpp
@@ -4,6 +4,7 @@
#include <cstring>
#include <cassert>
#include <cstdlib>
+#include <map>
char token[1024];
char output[1024];
@@ -23,6 +24,18 @@ void free_matrix(double **mat, int nrow, int ncol) {
delete [] mat;
}
+int cnt0;
+std::map<std::string, int> param_cnt;
+int get_param_cnt(const std::string &key) {
+ std::map<std::string, int>::iterator it = param_cnt.find(key);
+ if (it == param_cnt.end())
+ {
+ param_cnt[key] = cnt0 + 1;
+ return cnt0;
+ }
+ return it->second++;
+}
+
int main(int argc, char **argv) {
FILE *fin;
std::ofstream fout;
@@ -30,13 +43,14 @@ int main(int argc, char **argv) {
fin = fopen(argv[1], "r");
fout.open(argv[2]);
assert(fin != NULL);
- int cnt = argc > 3 ? atoi(argv[3]) : 0;
+ cnt0 = argc > 3 ? atoi(argv[3]) : 0;
bool shift;
while (fscanf(fin, "%s", token) != EOF)
{
int nrow, ncol;
int i, j;
double **mat;
+ int cnt = get_param_cnt(token);
if (strcmp(token, "<AffineTransform>") == 0)
{
double lrate, blrate, mnorm;
@@ -91,7 +105,6 @@ int main(int argc, char **argv) {
sprintf(output, "[%13lu]\n", length);
fout << output;
fout.seekp(0, std::ios_base::end);
- cnt++;
}
free_matrix(mat, nrow, ncol);
}
diff --git a/tutorial/howto_pretrain_from_tnet.rst b/tutorial/howto_pretrain_from_tnet.rst
new file mode 100644
index 0000000..7636478
--- /dev/null
+++ b/tutorial/howto_pretrain_from_tnet.rst
@@ -0,0 +1,48 @@
+How to Use a Pre-trained Model from TNet
+========================================
+
+:author: Ted Yin (mfy43) <[email protected]>
+:abstract: Instruct on how to convert a pre-trained TNet model to NERV format,
+ train the converted model and finally convert back to TNet format
+ for subsequent decoding.
+
+- Note: this tutorial is the counterpart to "Plan B" of decoding in *How to Use
+ a Pre-trained nnet Model from Kaldi*. For more complete information, please
+ refer to that tutorial.
+
+- Note: in this tutorial, we use the following notations to denote the directory prefix:
+
+ - ``<nerv_home>``: the path of NERV (the location of outer most directory ``nerv``)
+
+- To convert a TNet DNN model file:
+
+ ::
+ # compile the tool written in C++:
+ g++ -o tnet_to_nerv <nerv_home>/speech/htk_io/tools/tnet_to_nerv.cpp
+ # conver the model (the third argument indicates the initial number used in naming the parameters)
+ ./tnet_to_nerv <path_to_tnet_nn>.nnet <path_to_converted>.nerv 0
+
+- Apply the method above to convert your global transformation file and network
+ file to NERV chunk files respectively.
+
+- Train the converted parameters. Here, a network configuration file similar to
+ the one used in Kaldi tutorial could be found at
+ ``<nerv_home>/nerv/examples/swb_baseline2.lua``.
+
+- Create a copy of ``<nerv_home>/speech/htk_io/tools/nerv_to_tnet.lua``.
+
+ - Modify the list named ``lnames`` to list the name of layers you want to
+ put into the output TNet parameter file in order. You may ask why the
+ NERV-to-TNet converstion is so cumbersome. This is because TNet nnet is a
+ special case of more general NERV toolkit -- it only allows stacked DNNs
+ and therefore TNet-to-NERV conversion is lossless but the other direction
+ is not. Your future NERV network may have multiple branches and that's
+ why you need to specify how to select and "stack" your layers in the TNet
+ parameter output.
+
+ - Do the conversion by:
+
+ ::
+
+ <nerv_home>/install/bin/nerv --use-cpu nerv_to_tnet.lua <your_network_config>.lua <your_trained_params>.nerv <path_to_converted>.nnet
+