summaryrefslogtreecommitdiff
path: root/kaldi_io/tools
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io/tools')
-rwxr-xr-xkaldi_io/tools/convert_from_kaldi_pretrain.sh64
-rwxr-xr-xkaldi_io/tools/kaldi_to_nervbin0 -> 18719 bytes
-rw-r--r--kaldi_io/tools/kaldi_to_nerv.cpp57
-rw-r--r--kaldi_io/tools/nerv_to_kaldi.lua4
4 files changed, 107 insertions, 18 deletions
diff --git a/kaldi_io/tools/convert_from_kaldi_pretrain.sh b/kaldi_io/tools/convert_from_kaldi_pretrain.sh
new file mode 100755
index 0000000..78f532f
--- /dev/null
+++ b/kaldi_io/tools/convert_from_kaldi_pretrain.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+hid_dim=1024
+hid_num=6
+pretrain_dir=exp/dnn4_pretrain-dbn
+nerv_kaldi=/speechlab/users/mfy43/nerv/speech/kaldi_io/
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+data=$1
+data_cv=$2
+lang=$3
+alidir=$4
+alidir_cv=$5
+dir=$6
+
+[[ -z $data_fmllr ]] && data_fmllr=data-fmllr-tri3
+[[ -z $alidir ]] && alidir=exp/tri3_ali
+[[ -z $dir ]] && dir=exp/dnn4_nerv_prepare
+[[ -z $data ]] && data=$data_fmllr/train_tr90
+[[ -z $data_cv ]] && data_cv=$data_fmllr/train_cv10
+kaldi_to_nerv=$nerv_kaldi/tools/kaldi_to_nerv
+mkdir $dir -p
+mkdir $dir/log -p
+###### PREPARE DATASETS ######
+cp $data/feats.scp $dir/train_sorted.scp
+cp $data_cv/feats.scp $dir/cv.scp
+utils/shuffle_list.pl --srand ${seed:-777} <$dir/train_sorted.scp >$dir/train.scp
+
+feats_tr="ark:copy-feats scp:$dir/train.scp ark:- |"
+
+###### INITIALIZE OUTPUT LAYER ######
+[ -z $num_tgt ] && \
+ num_tgt=$(hmm-info --print-args=false $alidir/final.mdl | grep pdfs | awk '{ print $NF }')
+nnet_proto=$dir/nnet_output.proto
+echo "# genrating network prototype $nnet_proto"
+utils/nnet/make_nnet_proto.py \
+ $hid_dim $num_tgt 0 $hid_dim >$nnet_proto || exit 1
+nnet_init=$dir/nnet_output.init
+nnet-initialize --binary=false $nnet_proto $nnet_init
+
+###### MODEL PARAMETER CONVERSION ######
+$kaldi_to_nerv $nnet_init $dir/nnet_output.nerv $hid_num
+$kaldi_to_nerv <(nnet-copy --binary=false $pretrain_dir/${hid_num}.dbn -) $dir/nnet_init.nerv
+$kaldi_to_nerv <(nnet-copy --binary=false $pretrain_dir/final.feature_transform -) $dir/nnet_trans.nerv
+###### PREPARE FOR DECODING #####
+echo "Using PDF targets from dirs '$alidir' '$alidir_cv'"
+# training targets in posterior format,
+labels_tr="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz |\" ark:- | ali-to-post ark:- ark:- |"
+labels_cv="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir_cv/ali.*.gz |\" ark:- | ali-to-post ark:- ark:- |"
+# training targets for analyze-counts,
+labels_tr_pdf="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz |\" ark:- |"
+labels_tr_phn="ark:ali-to-phones --per-frame=true $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz |\" ark:- |"
+
+# get pdf-counts, used later for decoding/aligning,
+analyze-counts --verbose=1 --binary=false "$labels_tr_pdf" $dir/ali_train_pdf.counts 2>$dir/log/analyze_counts_pdf.log || exit 1
+# copy the old transition model, will be needed by decoder,
+copy-transition-model --binary=false $alidir/final.mdl $dir/final.mdl || exit 1
+# copy the tree
+cp $alidir/tree $dir/tree || exit 1
+
+# make phone counts for analysis,
+[ -e $lang/phones.txt ] && analyze-counts --verbose=1 --symbol-table=$lang/phones.txt "$labels_tr_phn" /dev/null 2>$dir/log/analyze_counts_phones.log || exit 1
diff --git a/kaldi_io/tools/kaldi_to_nerv b/kaldi_io/tools/kaldi_to_nerv
new file mode 100755
index 0000000..78469f8
--- /dev/null
+++ b/kaldi_io/tools/kaldi_to_nerv
Binary files differ
diff --git a/kaldi_io/tools/kaldi_to_nerv.cpp b/kaldi_io/tools/kaldi_to_nerv.cpp
index 1edb0f2..f16de44 100644
--- a/kaldi_io/tools/kaldi_to_nerv.cpp
+++ b/kaldi_io/tools/kaldi_to_nerv.cpp
@@ -3,31 +3,53 @@
#include <string>
#include <cstring>
#include <cassert>
+#include <cstdlib>
char token[1024];
char output[1024];
-double mat[4096][4096];
+
+double **new_matrix(int nrow, int ncol) {
+ double **mat = new double *[nrow];
+ int i;
+ for (i = 0; i < nrow; i++)
+ mat[i] = new double[ncol];
+ return mat;
+}
+
+void free_matrix(double **mat, int nrow, int ncol) {
+ int i;
+ for (i = 0; i < nrow; i++)
+ delete [] mat[i];
+ delete [] mat;
+}
+
int main(int argc, char **argv) {
+ FILE *fin;
std::ofstream fout;
- fout.open(argv[1]);
- int cnt = 0;
+ assert(argc >= 3);
+ fin = fopen(argv[1], "r");
+ fout.open(argv[2]);
+ assert(fin != NULL);
+ int cnt = argc > 3 ? atoi(argv[3]) : 0;
bool shift;
- while (scanf("%s", token) != EOF)
+ while (fscanf(fin, "%s", token) != EOF)
{
int nrow, ncol;
int i, j;
+ double **mat;
if (strcmp(token, "<AffineTransform>") == 0)
{
double lrate, blrate, mnorm;
- scanf("%d %d", &ncol, &nrow);
- scanf("%s %lf %s %lf %s %lf",
+ fscanf(fin, "%d %d", &ncol, &nrow);
+ fscanf(fin, "%s %lf %s %lf %s %lf",
token, &lrate, token, &blrate, token, &mnorm);
- scanf("%s", token);
+ fscanf(fin, "%s", token);
assert(*token == '[');
printf("%d %d\n", nrow, ncol);
+ mat = new_matrix(nrow, ncol);
for (j = 0; j < ncol; j++)
for (i = 0; i < nrow; i++)
- scanf("%lf", mat[i] + j);
+ fscanf(fin, "%lf", mat[i] + j);
long base = fout.tellp();
sprintf(output, "%16d", 0);
fout << output;
@@ -47,13 +69,13 @@ int main(int argc, char **argv) {
sprintf(output, "[%13lu]\n", length);
fout << output;
fout.seekp(0, std::ios_base::end);
- scanf("%s", token);
+ fscanf(fin, "%s", token);
assert(*token == ']');
- if (scanf("%s", token) == 1 && *token == '[')
+ if (fscanf(fin, "%s", token) == 1 && *token == '[')
{
base = fout.tellp();
for (j = 0; j < ncol; j++)
- scanf("%lf", mat[0] + j);
+ fscanf(fin, "%lf", mat[0] + j);
sprintf(output, "%16d", 0);
fout << output;
sprintf(output, "{type=\"nerv.BiasParam\",id=\"affine%d_bp\"}\n",
@@ -71,19 +93,21 @@ int main(int argc, char **argv) {
fout.seekp(0, std::ios_base::end);
cnt++;
}
+ free_matrix(mat, nrow, ncol);
}
else if ((shift = (strcmp(token, "<AddShift>") == 0)) ||
strcmp(token, "<Rescale>") == 0)
{
double lrate, blrate, mnorm;
- scanf("%d %d", &ncol, &ncol);
- scanf("%s %lf",
+ fscanf(fin, "%d %d", &ncol, &ncol);
+ mat = new_matrix(1, ncol);
+ fscanf(fin, "%s %lf",
token, &lrate);
- scanf("%s", token);
+ fscanf(fin, "%s", token);
assert(*token == '[');
printf("%d\n", ncol);
for (j = 0; j < ncol; j++)
- scanf("%lf", mat[0] + j);
+ fscanf(fin, "%lf", mat[0] + j);
long base = fout.tellp();
sprintf(output, "%16d", 0);
fout << output;
@@ -101,8 +125,9 @@ int main(int argc, char **argv) {
sprintf(output, "[%13lu]\n", length);
fout << output;
fout.seekp(0, std::ios_base::end);
- scanf("%s", token);
+ fscanf(fin, "%s", token);
assert(*token == ']');
+ free_matrix(mat, 1, ncol);
}
}
return 0;
diff --git a/kaldi_io/tools/nerv_to_kaldi.lua b/kaldi_io/tools/nerv_to_kaldi.lua
index 804f09b..fba6a6c 100644
--- a/kaldi_io/tools/nerv_to_kaldi.lua
+++ b/kaldi_io/tools/nerv_to_kaldi.lua
@@ -1,8 +1,8 @@
--- usage: nerv config_file nerv_param_input tnet_output
+-- usage: nerv nerv_to_kaldi.lua config_file nerv_param_input kaldi_param_output
dofile(arg[1])
param_repo = nerv.ParamRepo()
-param_repo:import({arg[2], gconf.initialized_param[2]}, nil, gconf)
+param_repo:import({arg[2]}, nil, gconf)
layer_repo = make_layer_repo(param_repo)
f = assert(io.open(arg[3], "w"))