diff options
Diffstat (limited to 'kaldi_io/tools')
-rwxr-xr-x | kaldi_io/tools/convert_from_kaldi_pretrain.sh | 64 | ||||
-rwxr-xr-x | kaldi_io/tools/kaldi_to_nerv | bin | 0 -> 18719 bytes | |||
-rw-r--r-- | kaldi_io/tools/kaldi_to_nerv.cpp | 57 | ||||
-rw-r--r-- | kaldi_io/tools/nerv_to_kaldi.lua | 4 |
4 files changed, 107 insertions, 18 deletions
diff --git a/kaldi_io/tools/convert_from_kaldi_pretrain.sh b/kaldi_io/tools/convert_from_kaldi_pretrain.sh new file mode 100755 index 0000000..78f532f --- /dev/null +++ b/kaldi_io/tools/convert_from_kaldi_pretrain.sh @@ -0,0 +1,64 @@ +#!/bin/bash + +hid_dim=1024 +hid_num=6 +pretrain_dir=exp/dnn4_pretrain-dbn +nerv_kaldi=/speechlab/users/mfy43/nerv/speech/kaldi_io/ + +[ -f path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +data=$1 +data_cv=$2 +lang=$3 +alidir=$4 +alidir_cv=$5 +dir=$6 + +[[ -z $data_fmllr ]] && data_fmllr=data-fmllr-tri3 +[[ -z $alidir ]] && alidir=exp/tri3_ali +[[ -z $dir ]] && dir=exp/dnn4_nerv_prepare +[[ -z $data ]] && data=$data_fmllr/train_tr90 +[[ -z $data_cv ]] && data_cv=$data_fmllr/train_cv10 +kaldi_to_nerv=$nerv_kaldi/tools/kaldi_to_nerv +mkdir $dir -p +mkdir $dir/log -p +###### PREPARE DATASETS ###### +cp $data/feats.scp $dir/train_sorted.scp +cp $data_cv/feats.scp $dir/cv.scp +utils/shuffle_list.pl --srand ${seed:-777} <$dir/train_sorted.scp >$dir/train.scp + +feats_tr="ark:copy-feats scp:$dir/train.scp ark:- |" + +###### INITIALIZE OUTPUT LAYER ###### +[ -z $num_tgt ] && \ + num_tgt=$(hmm-info --print-args=false $alidir/final.mdl | grep pdfs | awk '{ print $NF }') +nnet_proto=$dir/nnet_output.proto +echo "# genrating network prototype $nnet_proto" +utils/nnet/make_nnet_proto.py \ + $hid_dim $num_tgt 0 $hid_dim >$nnet_proto || exit 1 +nnet_init=$dir/nnet_output.init +nnet-initialize --binary=false $nnet_proto $nnet_init + +###### MODEL PARAMETER CONVERSION ###### +$kaldi_to_nerv $nnet_init $dir/nnet_output.nerv $hid_num +$kaldi_to_nerv <(nnet-copy --binary=false $pretrain_dir/${hid_num}.dbn -) $dir/nnet_init.nerv +$kaldi_to_nerv <(nnet-copy --binary=false $pretrain_dir/final.feature_transform -) $dir/nnet_trans.nerv +###### PREPARE FOR DECODING ##### +echo "Using PDF targets from dirs '$alidir' '$alidir_cv'" +# training targets in posterior format, +labels_tr="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz |\" ark:- | ali-to-post ark:- ark:- |" +labels_cv="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir_cv/ali.*.gz |\" ark:- | ali-to-post ark:- ark:- |" +# training targets for analyze-counts, +labels_tr_pdf="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz |\" ark:- |" +labels_tr_phn="ark:ali-to-phones --per-frame=true $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz |\" ark:- |" + +# get pdf-counts, used later for decoding/aligning, +analyze-counts --verbose=1 --binary=false "$labels_tr_pdf" $dir/ali_train_pdf.counts 2>$dir/log/analyze_counts_pdf.log || exit 1 +# copy the old transition model, will be needed by decoder, +copy-transition-model --binary=false $alidir/final.mdl $dir/final.mdl || exit 1 +# copy the tree +cp $alidir/tree $dir/tree || exit 1 + +# make phone counts for analysis, +[ -e $lang/phones.txt ] && analyze-counts --verbose=1 --symbol-table=$lang/phones.txt "$labels_tr_phn" /dev/null 2>$dir/log/analyze_counts_phones.log || exit 1 diff --git a/kaldi_io/tools/kaldi_to_nerv b/kaldi_io/tools/kaldi_to_nerv Binary files differnew file mode 100755 index 0000000..78469f8 --- /dev/null +++ b/kaldi_io/tools/kaldi_to_nerv diff --git a/kaldi_io/tools/kaldi_to_nerv.cpp b/kaldi_io/tools/kaldi_to_nerv.cpp index 1edb0f2..f16de44 100644 --- a/kaldi_io/tools/kaldi_to_nerv.cpp +++ b/kaldi_io/tools/kaldi_to_nerv.cpp @@ -3,31 +3,53 @@ #include <string> #include <cstring> #include <cassert> +#include <cstdlib> char token[1024]; char output[1024]; -double mat[4096][4096]; + +double **new_matrix(int nrow, int ncol) { + double **mat = new double *[nrow]; + int i; + for (i = 0; i < nrow; i++) + mat[i] = new double[ncol]; + return mat; +} + +void free_matrix(double **mat, int nrow, int ncol) { + int i; + for (i = 0; i < nrow; i++) + delete [] mat[i]; + delete [] mat; +} + int main(int argc, char **argv) { + FILE *fin; std::ofstream fout; - fout.open(argv[1]); - int cnt = 0; + assert(argc >= 3); + fin = fopen(argv[1], "r"); + fout.open(argv[2]); + assert(fin != NULL); + int cnt = argc > 3 ? atoi(argv[3]) : 0; bool shift; - while (scanf("%s", token) != EOF) + while (fscanf(fin, "%s", token) != EOF) { int nrow, ncol; int i, j; + double **mat; if (strcmp(token, "<AffineTransform>") == 0) { double lrate, blrate, mnorm; - scanf("%d %d", &ncol, &nrow); - scanf("%s %lf %s %lf %s %lf", + fscanf(fin, "%d %d", &ncol, &nrow); + fscanf(fin, "%s %lf %s %lf %s %lf", token, &lrate, token, &blrate, token, &mnorm); - scanf("%s", token); + fscanf(fin, "%s", token); assert(*token == '['); printf("%d %d\n", nrow, ncol); + mat = new_matrix(nrow, ncol); for (j = 0; j < ncol; j++) for (i = 0; i < nrow; i++) - scanf("%lf", mat[i] + j); + fscanf(fin, "%lf", mat[i] + j); long base = fout.tellp(); sprintf(output, "%16d", 0); fout << output; @@ -47,13 +69,13 @@ int main(int argc, char **argv) { sprintf(output, "[%13lu]\n", length); fout << output; fout.seekp(0, std::ios_base::end); - scanf("%s", token); + fscanf(fin, "%s", token); assert(*token == ']'); - if (scanf("%s", token) == 1 && *token == '[') + if (fscanf(fin, "%s", token) == 1 && *token == '[') { base = fout.tellp(); for (j = 0; j < ncol; j++) - scanf("%lf", mat[0] + j); + fscanf(fin, "%lf", mat[0] + j); sprintf(output, "%16d", 0); fout << output; sprintf(output, "{type=\"nerv.BiasParam\",id=\"affine%d_bp\"}\n", @@ -71,19 +93,21 @@ int main(int argc, char **argv) { fout.seekp(0, std::ios_base::end); cnt++; } + free_matrix(mat, nrow, ncol); } else if ((shift = (strcmp(token, "<AddShift>") == 0)) || strcmp(token, "<Rescale>") == 0) { double lrate, blrate, mnorm; - scanf("%d %d", &ncol, &ncol); - scanf("%s %lf", + fscanf(fin, "%d %d", &ncol, &ncol); + mat = new_matrix(1, ncol); + fscanf(fin, "%s %lf", token, &lrate); - scanf("%s", token); + fscanf(fin, "%s", token); assert(*token == '['); printf("%d\n", ncol); for (j = 0; j < ncol; j++) - scanf("%lf", mat[0] + j); + fscanf(fin, "%lf", mat[0] + j); long base = fout.tellp(); sprintf(output, "%16d", 0); fout << output; @@ -101,8 +125,9 @@ int main(int argc, char **argv) { sprintf(output, "[%13lu]\n", length); fout << output; fout.seekp(0, std::ios_base::end); - scanf("%s", token); + fscanf(fin, "%s", token); assert(*token == ']'); + free_matrix(mat, 1, ncol); } } return 0; diff --git a/kaldi_io/tools/nerv_to_kaldi.lua b/kaldi_io/tools/nerv_to_kaldi.lua index 804f09b..fba6a6c 100644 --- a/kaldi_io/tools/nerv_to_kaldi.lua +++ b/kaldi_io/tools/nerv_to_kaldi.lua @@ -1,8 +1,8 @@ --- usage: nerv config_file nerv_param_input tnet_output +-- usage: nerv nerv_to_kaldi.lua config_file nerv_param_input kaldi_param_output dofile(arg[1]) param_repo = nerv.ParamRepo() -param_repo:import({arg[2], gconf.initialized_param[2]}, nil, gconf) +param_repo:import({arg[2]}, nil, gconf) layer_repo = make_layer_repo(param_repo) f = assert(io.open(arg[3], "w")) |