diff options
-rwxr-xr-x | kaldi_io/tools/convert_from_kaldi_pretrain.sh | 4 | ||||
-rwxr-xr-x | kaldi_io/tools/kaldi_to_nerv | bin | 18442 -> 18719 bytes | |||
-rw-r--r-- | kaldi_io/tools/kaldi_to_nerv.cpp | 27 | ||||
-rw-r--r-- | kaldi_io/tools/nerv_to_kaldi.lua | 4 | ||||
-rw-r--r-- | tutorial/howto_pretrain_from_kaldi.rst | 61 |
5 files changed, 86 insertions, 10 deletions
diff --git a/kaldi_io/tools/convert_from_kaldi_pretrain.sh b/kaldi_io/tools/convert_from_kaldi_pretrain.sh index dc8ec8e..78f532f 100755 --- a/kaldi_io/tools/convert_from_kaldi_pretrain.sh +++ b/kaldi_io/tools/convert_from_kaldi_pretrain.sh @@ -23,7 +23,6 @@ dir=$6 kaldi_to_nerv=$nerv_kaldi/tools/kaldi_to_nerv mkdir $dir -p mkdir $dir/log -p -false && { ###### PREPARE DATASETS ###### cp $data/feats.scp $dir/train_sorted.scp cp $data_cv/feats.scp $dir/cv.scp @@ -42,10 +41,9 @@ nnet_init=$dir/nnet_output.init nnet-initialize --binary=false $nnet_proto $nnet_init ###### MODEL PARAMETER CONVERSION ###### -$kaldi_to_nerv $nnet_init $dir/nnet_output.nerv +$kaldi_to_nerv $nnet_init $dir/nnet_output.nerv $hid_num $kaldi_to_nerv <(nnet-copy --binary=false $pretrain_dir/${hid_num}.dbn -) $dir/nnet_init.nerv $kaldi_to_nerv <(nnet-copy --binary=false $pretrain_dir/final.feature_transform -) $dir/nnet_trans.nerv -} ###### PREPARE FOR DECODING ##### echo "Using PDF targets from dirs '$alidir' '$alidir_cv'" # training targets in posterior format, diff --git a/kaldi_io/tools/kaldi_to_nerv b/kaldi_io/tools/kaldi_to_nerv Binary files differindex d08894d..78469f8 100755 --- a/kaldi_io/tools/kaldi_to_nerv +++ b/kaldi_io/tools/kaldi_to_nerv diff --git a/kaldi_io/tools/kaldi_to_nerv.cpp b/kaldi_io/tools/kaldi_to_nerv.cpp index 0282b7c..f16de44 100644 --- a/kaldi_io/tools/kaldi_to_nerv.cpp +++ b/kaldi_io/tools/kaldi_to_nerv.cpp @@ -3,23 +3,40 @@ #include <string> #include <cstring> #include <cassert> +#include <cstdlib> char token[1024]; char output[1024]; -double mat[4096][4096]; + +double **new_matrix(int nrow, int ncol) { + double **mat = new double *[nrow]; + int i; + for (i = 0; i < nrow; i++) + mat[i] = new double[ncol]; + return mat; +} + +void free_matrix(double **mat, int nrow, int ncol) { + int i; + for (i = 0; i < nrow; i++) + delete [] mat[i]; + delete [] mat; +} + int main(int argc, char **argv) { FILE *fin; std::ofstream fout; - assert(argc == 3); + assert(argc >= 3); fin = fopen(argv[1], "r"); fout.open(argv[2]); assert(fin != NULL); - int cnt = 0; + int cnt = argc > 3 ? atoi(argv[3]) : 0; bool shift; while (fscanf(fin, "%s", token) != EOF) { int nrow, ncol; int i, j; + double **mat; if (strcmp(token, "<AffineTransform>") == 0) { double lrate, blrate, mnorm; @@ -29,6 +46,7 @@ int main(int argc, char **argv) { fscanf(fin, "%s", token); assert(*token == '['); printf("%d %d\n", nrow, ncol); + mat = new_matrix(nrow, ncol); for (j = 0; j < ncol; j++) for (i = 0; i < nrow; i++) fscanf(fin, "%lf", mat[i] + j); @@ -75,12 +93,14 @@ int main(int argc, char **argv) { fout.seekp(0, std::ios_base::end); cnt++; } + free_matrix(mat, nrow, ncol); } else if ((shift = (strcmp(token, "<AddShift>") == 0)) || strcmp(token, "<Rescale>") == 0) { double lrate, blrate, mnorm; fscanf(fin, "%d %d", &ncol, &ncol); + mat = new_matrix(1, ncol); fscanf(fin, "%s %lf", token, &lrate); fscanf(fin, "%s", token); @@ -107,6 +127,7 @@ int main(int argc, char **argv) { fout.seekp(0, std::ios_base::end); fscanf(fin, "%s", token); assert(*token == ']'); + free_matrix(mat, 1, ncol); } } return 0; diff --git a/kaldi_io/tools/nerv_to_kaldi.lua b/kaldi_io/tools/nerv_to_kaldi.lua index 804f09b..fba6a6c 100644 --- a/kaldi_io/tools/nerv_to_kaldi.lua +++ b/kaldi_io/tools/nerv_to_kaldi.lua @@ -1,8 +1,8 @@ --- usage: nerv config_file nerv_param_input tnet_output +-- usage: nerv nerv_to_kaldi.lua config_file nerv_param_input kaldi_param_output dofile(arg[1]) param_repo = nerv.ParamRepo() -param_repo:import({arg[2], gconf.initialized_param[2]}, nil, gconf) +param_repo:import({arg[2]}, nil, gconf) layer_repo = make_layer_repo(param_repo) f = assert(io.open(arg[3], "w")) diff --git a/tutorial/howto_pretrain_from_kaldi.rst b/tutorial/howto_pretrain_from_kaldi.rst index 95b5f36..ff6ef3d 100644 --- a/tutorial/howto_pretrain_from_kaldi.rst +++ b/tutorial/howto_pretrain_from_kaldi.rst @@ -31,11 +31,16 @@ How to Use a Pretrained nnet Model from Kaldi by the original stage 2). - Run ``local/nnet/run_dnn.sh`` (first two stages). -- You'll find directory like ``dnn4_pretrain-dbn`` and ``dnn4_pretrain-dbn_dnn`` inside the ``exp/``. They correspond to stage 1 and stage 2 respectively. To use NERV to do stage 2 instead, we need the pretrained network and the global transformation from stage 1: +- You'll find directory like ``dnn4_pretrain-dbn`` and + ``dnn4_pretrain-dbn_dnn`` inside the ``exp/``. They correspond to stage 1 and + stage 2 respectively. To use NERV to do stage 2 instead, we need the + pretrained network and the global transformation from stage 1: - Check the file ``exp/dnn4_pretrain-dbn/6.dbn`` exists. (pretrained network) - Check the file ``exp/dnn4_pretrain-dbn/tr_splice5_cmvn-g.nnet`` exists. (global transformation) - - Run script from ``kaldi_io/tools/convert_from_kaldi_pretrain.sh`` to generate the parameters for the output layer and the script files for training and cross-validation set. + - Run script from ``kaldi_io/tools/convert_from_kaldi_pretrain.sh`` to + generate the parameters for the output layer and the script files for + training and cross-validation set. - The previous conversion commands will automatically give identifiers to the parameters read from the Kaldi network file. The identifiers are like, for @@ -58,3 +63,55 @@ How to Use a Pretrained nnet Model from Kaldi - followed by an argument ``timit_mybaseline.lua`` to the scheduler, specifying the network you want to train and some relevant settings, such as where to find the initialized parameters and learning rate, etc. + +- Finally, after about 13 iterations, the funetune ends. There are two ways to + decode your model: + + - Plan A: + + - Open your ``timit_mybaseline.lua`` again and modify ``decode_param`` to + your final chunk file (the file with an extension ``.nerv``) and your + global transformation chunk file once used in training. This part lets + the decoder know about the set of parameters for decoding. + + - Copy the script ``nerv/speech/kaldi_io/README.timit`` to your Kaldi + working directory (``timit/s5``) and modify the paths listed in the + script. + + - Run the modified ``README.timit`` in ``s5`` directory (where there is the + ``path.sh``). + + - After decoding, run ``bash RESULT exp/dnn4_nerv`` to see the results. + + - Plan B: In this plan, we manually convert the trained model back to Kaldi + nnet format, and use Kaldi to decode. + + - Create a copy of ``nerv/speech/kaldi_io/tools/nerv_to_kaldi.lua``. + + - Modify the list named ``lnames`` to list the name of layers you want to + put into the output Kaldi parameter file in order. (You don't actually + need to change for this tutorial) You may ask why the NERV-to-Kaldi + converstion is so cumbersome. This is because Kaldi nnet is a special + case of more general NERV toolkit --- it only allows stacked DNNs and + therefore Kaldi-to-NERV conversion is lossless but the other direction is + not. Your future NERV network may have multiple branches and that's why + you need to specify how to select and "stack" your layers in the Kaldi + parameter output. + + - Do the conversion by: + + :: + + cat your_trained_params.nerv your_global_trans.nerv > all.nerv + install/bin/nerv nerv_to_kaldi.lua timit_mybaseline.lua all.nerv your_kaldi_output.nnet + + - Finally, locate the directory of stage 2: ``exp/dnn4_pretrain-dbn_dnn`` + and temporarily change the symbolic link for the final network file to the converted one: + + :: + + cd exp/dnn4_pretrain-dbn_dnn + mv final.nnet final.nnet.orig + ln -sv your_kaldi_output.nnet final.nnet + + Then proceed a normal Kaldi decoding. |