diff options
-rw-r--r-- | htk_io/tools/tnet_to_nerv.c | 57 | ||||
-rw-r--r-- | htk_io/tools/tnet_to_nerv.cpp | 47 | ||||
-rw-r--r-- | kaldi_io/tools/kaldi_to_nerv.cpp | 17 | ||||
-rw-r--r-- | tutorial/howto_pretrain_from_tnet.rst | 48 |
4 files changed, 108 insertions, 61 deletions
diff --git a/htk_io/tools/tnet_to_nerv.c b/htk_io/tools/tnet_to_nerv.c deleted file mode 100644 index 5774819..0000000 --- a/htk_io/tools/tnet_to_nerv.c +++ /dev/null @@ -1,57 +0,0 @@ -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -char token[1024]; -double mat[4096][4096]; -int main() { - FILE *fout = fopen("converted.nerv", "w"); - int cnt = 0; - while (scanf("%s", token) != EOF) - { - int nrow, ncol; - int i, j; - if (strcmp(token, "<biasedlinearity>") == 0) - { - scanf("%d %d", &ncol, &nrow); - scanf("%s %d %d", token, &ncol, &nrow); - printf("%d %d\n", nrow, ncol); - for (j = 0; j < ncol; j++) - for (i = 0; i < nrow; i++) - scanf("%lf", mat[i] + j); - off_t base = ftello(fout); - fprintf(fout, "%16d", 0); - fprintf(fout, "{type=\"nerv.LinearTransParam\",id=\"affine%d_ltp\"}\n", - cnt); - fprintf(fout, "%d %d\n", nrow, ncol); - for (i = 0; i < nrow; i++) - { - for (j = 0; j < ncol; j++) - fprintf(fout, "%.8f ", mat[i][j]); - fprintf(fout, "\n"); - } - size_t length = ftello(fout) - base; - fseeko(fout, base, SEEK_SET); - fprintf(fout, "[%13lu]\n", length); - fseeko(fout, 0, SEEK_END); - if (scanf("%s %d", token, &ncol) == 2 && *token == 'v') - { - base = ftello(fout); - for (j = 0; j < ncol; j++) - scanf("%lf", mat[0] + j); - fprintf(fout, "%16d", 0); - fprintf(fout, "{type=\"nerv.BiasParam\",id=\"affine%d_bp\"}\n", - cnt); - fprintf(fout, "1 %d\n", ncol); - for (j = 0; j < ncol; j++) - fprintf(fout, "%.8f ", mat[0][j]); - fprintf(fout, "\n"); - length = ftello(fout) - base; - fseeko(fout, base, SEEK_SET); - fprintf(fout, "[%13lu]\n", length); - cnt++; - fseeko(fout, 0, SEEK_END); - } - } - } - return 0; -} diff --git a/htk_io/tools/tnet_to_nerv.cpp b/htk_io/tools/tnet_to_nerv.cpp index 63a104d..067097e 100644 --- a/htk_io/tools/tnet_to_nerv.cpp +++ b/htk_io/tools/tnet_to_nerv.cpp @@ -4,6 +4,7 @@ #include <cstring> #include <cassert> #include <cstdlib> +#include <map> char token[1024]; char output[1024]; @@ -23,6 +24,18 @@ void free_matrix(double **mat, int nrow, int ncol) { delete [] mat; } +int cnt0; +std::map<std::string, int> param_cnt; +int get_param_cnt(const std::string &key) { + std::map<std::string, int>::iterator it = param_cnt.find(key); + if (it == param_cnt.end()) + { + param_cnt[key] = cnt0 + 1; + return cnt0; + } + return it->second++; +} + int main(int argc, char **argv) { FILE *fin; std::ofstream fout; @@ -30,12 +43,14 @@ int main(int argc, char **argv) { fin = fopen(argv[1], "r"); fout.open(argv[2]); assert(fin != NULL); - int cnt = argc > 3 ? atoi(argv[3]) : 0; + cnt0 = argc > 3 ? atoi(argv[3]) : 0; + bool shift; while (fscanf(fin, "%s", token) != EOF) { int nrow, ncol; int i, j; double **mat; + int cnt = get_param_cnt(token); if (strcmp(token, "<biasedlinearity>") == 0) { fscanf(fin, "%d %d", &ncol, &nrow); @@ -84,10 +99,38 @@ int main(int argc, char **argv) { sprintf(output, "[%13lu]\n", length); fout << output; fout.seekp(0, std::ios_base::end); - cnt++; } free_matrix(mat, nrow, ncol); } + else if ((shift = (strcmp(token, "<bias>") == 0)) || + strcmp(token, "<window>") == 0) + { + fscanf(fin, "%d %d", &ncol, &nrow); + printf("%d %d\n", nrow, ncol); + assert(nrow == ncol); + mat = new_matrix(1, ncol); + assert(fscanf(fin, "%s %d", token, &ncol) == 2 && *token == 'v'); + for (j = 0; j < ncol; j++) + fscanf(fin, "%lf", mat[0] + j); + long base = fout.tellp(); + sprintf(output, "%16d", 0); + fout << output; + sprintf(output, "{type=\"nerv.BiasParam\",id=\"%s%d\"}\n", + shift ? "bias" : "window", + cnt); + fout << output; + sprintf(output, "%d %d\n", 1, ncol); + fout << output; + for (j = 0; j < ncol; j++) + fout << mat[0][j] << " "; + fout << std::endl; + long length = fout.tellp() - base; + fout.seekp(base); + sprintf(output, "[%13lu]\n", length); + fout << output; + fout.seekp(0, std::ios_base::end); + free_matrix(mat, 1, ncol); + } } return 0; } diff --git a/kaldi_io/tools/kaldi_to_nerv.cpp b/kaldi_io/tools/kaldi_to_nerv.cpp index f16de44..aadac53 100644 --- a/kaldi_io/tools/kaldi_to_nerv.cpp +++ b/kaldi_io/tools/kaldi_to_nerv.cpp @@ -4,6 +4,7 @@ #include <cstring> #include <cassert> #include <cstdlib> +#include <map> char token[1024]; char output[1024]; @@ -23,6 +24,18 @@ void free_matrix(double **mat, int nrow, int ncol) { delete [] mat; } +int cnt0; +std::map<std::string, int> param_cnt; +int get_param_cnt(const std::string &key) { + std::map<std::string, int>::iterator it = param_cnt.find(key); + if (it == param_cnt.end()) + { + param_cnt[key] = cnt0 + 1; + return cnt0; + } + return it->second++; +} + int main(int argc, char **argv) { FILE *fin; std::ofstream fout; @@ -30,13 +43,14 @@ int main(int argc, char **argv) { fin = fopen(argv[1], "r"); fout.open(argv[2]); assert(fin != NULL); - int cnt = argc > 3 ? atoi(argv[3]) : 0; + cnt0 = argc > 3 ? atoi(argv[3]) : 0; bool shift; while (fscanf(fin, "%s", token) != EOF) { int nrow, ncol; int i, j; double **mat; + int cnt = get_param_cnt(token); if (strcmp(token, "<AffineTransform>") == 0) { double lrate, blrate, mnorm; @@ -91,7 +105,6 @@ int main(int argc, char **argv) { sprintf(output, "[%13lu]\n", length); fout << output; fout.seekp(0, std::ios_base::end); - cnt++; } free_matrix(mat, nrow, ncol); } diff --git a/tutorial/howto_pretrain_from_tnet.rst b/tutorial/howto_pretrain_from_tnet.rst new file mode 100644 index 0000000..7636478 --- /dev/null +++ b/tutorial/howto_pretrain_from_tnet.rst @@ -0,0 +1,48 @@ +How to Use a Pre-trained Model from TNet +======================================== + +:author: Ted Yin (mfy43) <[email protected]> +:abstract: Instruct on how to convert a pre-trained TNet model to NERV format, + train the converted model and finally convert back to TNet format + for subsequent decoding. + +- Note: this tutorial is the counterpart to "Plan B" of decoding in *How to Use + a Pre-trained nnet Model from Kaldi*. For more complete information, please + refer to that tutorial. + +- Note: in this tutorial, we use the following notations to denote the directory prefix: + + - ``<nerv_home>``: the path of NERV (the location of outer most directory ``nerv``) + +- To convert a TNet DNN model file: + + :: + # compile the tool written in C++: + g++ -o tnet_to_nerv <nerv_home>/speech/htk_io/tools/tnet_to_nerv.cpp + # conver the model (the third argument indicates the initial number used in naming the parameters) + ./tnet_to_nerv <path_to_tnet_nn>.nnet <path_to_converted>.nerv 0 + +- Apply the method above to convert your global transformation file and network + file to NERV chunk files respectively. + +- Train the converted parameters. Here, a network configuration file similar to + the one used in Kaldi tutorial could be found at + ``<nerv_home>/nerv/examples/swb_baseline2.lua``. + +- Create a copy of ``<nerv_home>/speech/htk_io/tools/nerv_to_tnet.lua``. + + - Modify the list named ``lnames`` to list the name of layers you want to + put into the output TNet parameter file in order. You may ask why the + NERV-to-TNet converstion is so cumbersome. This is because TNet nnet is a + special case of more general NERV toolkit -- it only allows stacked DNNs + and therefore TNet-to-NERV conversion is lossless but the other direction + is not. Your future NERV network may have multiple branches and that's + why you need to specify how to select and "stack" your layers in the TNet + parameter output. + + - Do the conversion by: + + :: + + <nerv_home>/install/bin/nerv --use-cpu nerv_to_tnet.lua <your_network_config>.lua <your_trained_params>.nerv <path_to_converted>.nnet + |