summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDeterminant <[email protected]>2016-02-29 21:35:57 +0800
committerDeterminant <[email protected]>2016-02-29 21:35:57 +0800
commitd9e03b7d2c14549ab480d5e3b31518be4cdc8eed (patch)
treebe9507abe92f7e31e64507b4c4958f278cf64afa
parent534b039d297b9f2f83f889e2592686d79569e141 (diff)
add decode part to the nerv-kaldi tutorial
-rwxr-xr-xkaldi_io/tools/convert_from_kaldi_pretrain.sh4
-rwxr-xr-xkaldi_io/tools/kaldi_to_nervbin18442 -> 18719 bytes
-rw-r--r--kaldi_io/tools/kaldi_to_nerv.cpp27
-rw-r--r--kaldi_io/tools/nerv_to_kaldi.lua4
-rw-r--r--tutorial/howto_pretrain_from_kaldi.rst61
5 files changed, 86 insertions, 10 deletions
diff --git a/kaldi_io/tools/convert_from_kaldi_pretrain.sh b/kaldi_io/tools/convert_from_kaldi_pretrain.sh
index dc8ec8e..78f532f 100755
--- a/kaldi_io/tools/convert_from_kaldi_pretrain.sh
+++ b/kaldi_io/tools/convert_from_kaldi_pretrain.sh
@@ -23,7 +23,6 @@ dir=$6
kaldi_to_nerv=$nerv_kaldi/tools/kaldi_to_nerv
mkdir $dir -p
mkdir $dir/log -p
-false && {
###### PREPARE DATASETS ######
cp $data/feats.scp $dir/train_sorted.scp
cp $data_cv/feats.scp $dir/cv.scp
@@ -42,10 +41,9 @@ nnet_init=$dir/nnet_output.init
nnet-initialize --binary=false $nnet_proto $nnet_init
###### MODEL PARAMETER CONVERSION ######
-$kaldi_to_nerv $nnet_init $dir/nnet_output.nerv
+$kaldi_to_nerv $nnet_init $dir/nnet_output.nerv $hid_num
$kaldi_to_nerv <(nnet-copy --binary=false $pretrain_dir/${hid_num}.dbn -) $dir/nnet_init.nerv
$kaldi_to_nerv <(nnet-copy --binary=false $pretrain_dir/final.feature_transform -) $dir/nnet_trans.nerv
-}
###### PREPARE FOR DECODING #####
echo "Using PDF targets from dirs '$alidir' '$alidir_cv'"
# training targets in posterior format,
diff --git a/kaldi_io/tools/kaldi_to_nerv b/kaldi_io/tools/kaldi_to_nerv
index d08894d..78469f8 100755
--- a/kaldi_io/tools/kaldi_to_nerv
+++ b/kaldi_io/tools/kaldi_to_nerv
Binary files differ
diff --git a/kaldi_io/tools/kaldi_to_nerv.cpp b/kaldi_io/tools/kaldi_to_nerv.cpp
index 0282b7c..f16de44 100644
--- a/kaldi_io/tools/kaldi_to_nerv.cpp
+++ b/kaldi_io/tools/kaldi_to_nerv.cpp
@@ -3,23 +3,40 @@
#include <string>
#include <cstring>
#include <cassert>
+#include <cstdlib>
char token[1024];
char output[1024];
-double mat[4096][4096];
+
+double **new_matrix(int nrow, int ncol) {
+ double **mat = new double *[nrow];
+ int i;
+ for (i = 0; i < nrow; i++)
+ mat[i] = new double[ncol];
+ return mat;
+}
+
+void free_matrix(double **mat, int nrow, int ncol) {
+ int i;
+ for (i = 0; i < nrow; i++)
+ delete [] mat[i];
+ delete [] mat;
+}
+
int main(int argc, char **argv) {
FILE *fin;
std::ofstream fout;
- assert(argc == 3);
+ assert(argc >= 3);
fin = fopen(argv[1], "r");
fout.open(argv[2]);
assert(fin != NULL);
- int cnt = 0;
+ int cnt = argc > 3 ? atoi(argv[3]) : 0;
bool shift;
while (fscanf(fin, "%s", token) != EOF)
{
int nrow, ncol;
int i, j;
+ double **mat;
if (strcmp(token, "<AffineTransform>") == 0)
{
double lrate, blrate, mnorm;
@@ -29,6 +46,7 @@ int main(int argc, char **argv) {
fscanf(fin, "%s", token);
assert(*token == '[');
printf("%d %d\n", nrow, ncol);
+ mat = new_matrix(nrow, ncol);
for (j = 0; j < ncol; j++)
for (i = 0; i < nrow; i++)
fscanf(fin, "%lf", mat[i] + j);
@@ -75,12 +93,14 @@ int main(int argc, char **argv) {
fout.seekp(0, std::ios_base::end);
cnt++;
}
+ free_matrix(mat, nrow, ncol);
}
else if ((shift = (strcmp(token, "<AddShift>") == 0)) ||
strcmp(token, "<Rescale>") == 0)
{
double lrate, blrate, mnorm;
fscanf(fin, "%d %d", &ncol, &ncol);
+ mat = new_matrix(1, ncol);
fscanf(fin, "%s %lf",
token, &lrate);
fscanf(fin, "%s", token);
@@ -107,6 +127,7 @@ int main(int argc, char **argv) {
fout.seekp(0, std::ios_base::end);
fscanf(fin, "%s", token);
assert(*token == ']');
+ free_matrix(mat, 1, ncol);
}
}
return 0;
diff --git a/kaldi_io/tools/nerv_to_kaldi.lua b/kaldi_io/tools/nerv_to_kaldi.lua
index 804f09b..fba6a6c 100644
--- a/kaldi_io/tools/nerv_to_kaldi.lua
+++ b/kaldi_io/tools/nerv_to_kaldi.lua
@@ -1,8 +1,8 @@
--- usage: nerv config_file nerv_param_input tnet_output
+-- usage: nerv nerv_to_kaldi.lua config_file nerv_param_input kaldi_param_output
dofile(arg[1])
param_repo = nerv.ParamRepo()
-param_repo:import({arg[2], gconf.initialized_param[2]}, nil, gconf)
+param_repo:import({arg[2]}, nil, gconf)
layer_repo = make_layer_repo(param_repo)
f = assert(io.open(arg[3], "w"))
diff --git a/tutorial/howto_pretrain_from_kaldi.rst b/tutorial/howto_pretrain_from_kaldi.rst
index 95b5f36..ff6ef3d 100644
--- a/tutorial/howto_pretrain_from_kaldi.rst
+++ b/tutorial/howto_pretrain_from_kaldi.rst
@@ -31,11 +31,16 @@ How to Use a Pretrained nnet Model from Kaldi
by the original stage 2).
- Run ``local/nnet/run_dnn.sh`` (first two stages).
-- You'll find directory like ``dnn4_pretrain-dbn`` and ``dnn4_pretrain-dbn_dnn`` inside the ``exp/``. They correspond to stage 1 and stage 2 respectively. To use NERV to do stage 2 instead, we need the pretrained network and the global transformation from stage 1:
+- You'll find directory like ``dnn4_pretrain-dbn`` and
+ ``dnn4_pretrain-dbn_dnn`` inside the ``exp/``. They correspond to stage 1 and
+ stage 2 respectively. To use NERV to do stage 2 instead, we need the
+ pretrained network and the global transformation from stage 1:
- Check the file ``exp/dnn4_pretrain-dbn/6.dbn`` exists. (pretrained network)
- Check the file ``exp/dnn4_pretrain-dbn/tr_splice5_cmvn-g.nnet`` exists. (global transformation)
- - Run script from ``kaldi_io/tools/convert_from_kaldi_pretrain.sh`` to generate the parameters for the output layer and the script files for training and cross-validation set.
+ - Run script from ``kaldi_io/tools/convert_from_kaldi_pretrain.sh`` to
+ generate the parameters for the output layer and the script files for
+ training and cross-validation set.
- The previous conversion commands will automatically give identifiers to the
parameters read from the Kaldi network file. The identifiers are like, for
@@ -58,3 +63,55 @@ How to Use a Pretrained nnet Model from Kaldi
- followed by an argument ``timit_mybaseline.lua`` to the scheduler,
specifying the network you want to train and some relevant settings, such
as where to find the initialized parameters and learning rate, etc.
+
+- Finally, after about 13 iterations, the funetune ends. There are two ways to
+ decode your model:
+
+ - Plan A:
+
+ - Open your ``timit_mybaseline.lua`` again and modify ``decode_param`` to
+ your final chunk file (the file with an extension ``.nerv``) and your
+ global transformation chunk file once used in training. This part lets
+ the decoder know about the set of parameters for decoding.
+
+ - Copy the script ``nerv/speech/kaldi_io/README.timit`` to your Kaldi
+ working directory (``timit/s5``) and modify the paths listed in the
+ script.
+
+ - Run the modified ``README.timit`` in ``s5`` directory (where there is the
+ ``path.sh``).
+
+ - After decoding, run ``bash RESULT exp/dnn4_nerv`` to see the results.
+
+ - Plan B: In this plan, we manually convert the trained model back to Kaldi
+ nnet format, and use Kaldi to decode.
+
+ - Create a copy of ``nerv/speech/kaldi_io/tools/nerv_to_kaldi.lua``.
+
+ - Modify the list named ``lnames`` to list the name of layers you want to
+ put into the output Kaldi parameter file in order. (You don't actually
+ need to change for this tutorial) You may ask why the NERV-to-Kaldi
+ converstion is so cumbersome. This is because Kaldi nnet is a special
+ case of more general NERV toolkit --- it only allows stacked DNNs and
+ therefore Kaldi-to-NERV conversion is lossless but the other direction is
+ not. Your future NERV network may have multiple branches and that's why
+ you need to specify how to select and "stack" your layers in the Kaldi
+ parameter output.
+
+ - Do the conversion by:
+
+ ::
+
+ cat your_trained_params.nerv your_global_trans.nerv > all.nerv
+ install/bin/nerv nerv_to_kaldi.lua timit_mybaseline.lua all.nerv your_kaldi_output.nnet
+
+ - Finally, locate the directory of stage 2: ``exp/dnn4_pretrain-dbn_dnn``
+ and temporarily change the symbolic link for the final network file to the converted one:
+
+ ::
+
+ cd exp/dnn4_pretrain-dbn_dnn
+ mv final.nnet final.nnet.orig
+ ln -sv your_kaldi_output.nnet final.nnet
+
+ Then proceed a normal Kaldi decoding.