From 1e0ac0fb5c9f517e7325deb16004de1054454da7 Mon Sep 17 00:00:00 2001 From: Determinant Date: Mon, 29 Feb 2016 20:03:52 +0800 Subject: refactor kaldi_decode --- kaldi_io/Makefile | 11 +++-- kaldi_io/kaldi_io-scm-1.rockspec | 2 +- kaldi_io/tools/convert_from_kaldi_pretrain.sh | 66 ++++++++++++++++++++++++++ kaldi_io/tools/kaldi_to_nerv | Bin 0 -> 18442 bytes 4 files changed, 75 insertions(+), 4 deletions(-) create mode 100755 kaldi_io/tools/convert_from_kaldi_pretrain.sh create mode 100755 kaldi_io/tools/kaldi_to_nerv (limited to 'kaldi_io') diff --git a/kaldi_io/Makefile b/kaldi_io/Makefile index 6d350a4..abfa8e6 100644 --- a/kaldi_io/Makefile +++ b/kaldi_io/Makefile @@ -1,6 +1,12 @@ -# Change KDIR to `kaldi-trunk' path (Kaldi must be compiled with --share) -KDIR := /speechlab/tools/KALDI/kaldi-master/ +ifndef LUA_BINDIR +$(error Please build the package via luarocks: `luarocks make`) +endif +ifndef KALDI_BASE +$(error KALDI_BASE is not set) +endif + +KDIR := $(KALDI_BASE) SHELL := /bin/bash BUILD_DIR := $(CURDIR)/build INC_PATH := $(LUA_BINDIR)/../include/ @@ -26,7 +32,6 @@ build: $(OBJ_DIR) $(OBJ_SUBDIR) $(OBJS) $(OBJ_DIR)/src/test install: $(LUA_DIR) $(LUA_SUBDIR) $(LUA_LIBS) $(LIBS) include $(KDIR)/src/kaldi.mk -#KL := $(KDIR)/src/feat/kaldi-feat.a $(KDIR)/src/matrix/kaldi-matrix.a $(KDIR)/src/base/kaldi-base.a $(KDIR)/src/util/kaldi-util.a $(KDIR)/src/hmm/kaldi-hmm.a $(KDIR)/src/tree/kaldi-tree.a -lcblas -llapack_atlas KL := $(KDIR)/src/feat/kaldi-feat.a $(KDIR)/src/matrix/kaldi-matrix.a $(KDIR)/src/base/kaldi-base.a $(KDIR)/src/util/kaldi-util.a $(KDIR)/src/hmm/kaldi-hmm.a $(KDIR)/src/tree/kaldi-tree.a $(BLAS_LDFLAGS) $(OBJ_DIR) $(LUA_DIR) $(OBJ_SUBDIR) $(LUA_SUBDIR): -mkdir -p $@ diff --git a/kaldi_io/kaldi_io-scm-1.rockspec b/kaldi_io/kaldi_io-scm-1.rockspec index 7c9f8d8..5a97cff 100644 --- a/kaldi_io/kaldi_io-scm-1.rockspec +++ b/kaldi_io/kaldi_io-scm-1.rockspec @@ -4,7 +4,7 @@ source = { url = "https://github.com/Nerv-SJTU/nerv-speech.git" } description = { - summary = "Kaldi I/O support (Kaldi I/O wrapper) for Nerv", + summary = "Kaldi I/O support (Kaldi I/O wrapper) for NERV", detailed = [[ ]], homepage = "https://github.com/Nerv-SJTU/nerv-speech", diff --git a/kaldi_io/tools/convert_from_kaldi_pretrain.sh b/kaldi_io/tools/convert_from_kaldi_pretrain.sh new file mode 100755 index 0000000..dc8ec8e --- /dev/null +++ b/kaldi_io/tools/convert_from_kaldi_pretrain.sh @@ -0,0 +1,66 @@ +#!/bin/bash + +hid_dim=1024 +hid_num=6 +pretrain_dir=exp/dnn4_pretrain-dbn +nerv_kaldi=/speechlab/users/mfy43/nerv/speech/kaldi_io/ + +[ -f path.sh ] && . ./path.sh +. parse_options.sh || exit 1; + +data=$1 +data_cv=$2 +lang=$3 +alidir=$4 +alidir_cv=$5 +dir=$6 + +[[ -z $data_fmllr ]] && data_fmllr=data-fmllr-tri3 +[[ -z $alidir ]] && alidir=exp/tri3_ali +[[ -z $dir ]] && dir=exp/dnn4_nerv_prepare +[[ -z $data ]] && data=$data_fmllr/train_tr90 +[[ -z $data_cv ]] && data_cv=$data_fmllr/train_cv10 +kaldi_to_nerv=$nerv_kaldi/tools/kaldi_to_nerv +mkdir $dir -p +mkdir $dir/log -p +false && { +###### PREPARE DATASETS ###### +cp $data/feats.scp $dir/train_sorted.scp +cp $data_cv/feats.scp $dir/cv.scp +utils/shuffle_list.pl --srand ${seed:-777} <$dir/train_sorted.scp >$dir/train.scp + +feats_tr="ark:copy-feats scp:$dir/train.scp ark:- |" + +###### INITIALIZE OUTPUT LAYER ###### +[ -z $num_tgt ] && \ + num_tgt=$(hmm-info --print-args=false $alidir/final.mdl | grep pdfs | awk '{ print $NF }') +nnet_proto=$dir/nnet_output.proto +echo "# genrating network prototype $nnet_proto" +utils/nnet/make_nnet_proto.py \ + $hid_dim $num_tgt 0 $hid_dim >$nnet_proto || exit 1 +nnet_init=$dir/nnet_output.init +nnet-initialize --binary=false $nnet_proto $nnet_init + +###### MODEL PARAMETER CONVERSION ###### +$kaldi_to_nerv $nnet_init $dir/nnet_output.nerv +$kaldi_to_nerv <(nnet-copy --binary=false $pretrain_dir/${hid_num}.dbn -) $dir/nnet_init.nerv +$kaldi_to_nerv <(nnet-copy --binary=false $pretrain_dir/final.feature_transform -) $dir/nnet_trans.nerv +} +###### PREPARE FOR DECODING ##### +echo "Using PDF targets from dirs '$alidir' '$alidir_cv'" +# training targets in posterior format, +labels_tr="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz |\" ark:- | ali-to-post ark:- ark:- |" +labels_cv="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir_cv/ali.*.gz |\" ark:- | ali-to-post ark:- ark:- |" +# training targets for analyze-counts, +labels_tr_pdf="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz |\" ark:- |" +labels_tr_phn="ark:ali-to-phones --per-frame=true $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz |\" ark:- |" + +# get pdf-counts, used later for decoding/aligning, +analyze-counts --verbose=1 --binary=false "$labels_tr_pdf" $dir/ali_train_pdf.counts 2>$dir/log/analyze_counts_pdf.log || exit 1 +# copy the old transition model, will be needed by decoder, +copy-transition-model --binary=false $alidir/final.mdl $dir/final.mdl || exit 1 +# copy the tree +cp $alidir/tree $dir/tree || exit 1 + +# make phone counts for analysis, +[ -e $lang/phones.txt ] && analyze-counts --verbose=1 --symbol-table=$lang/phones.txt "$labels_tr_phn" /dev/null 2>$dir/log/analyze_counts_phones.log || exit 1 diff --git a/kaldi_io/tools/kaldi_to_nerv b/kaldi_io/tools/kaldi_to_nerv new file mode 100755 index 0000000..d08894d Binary files /dev/null and b/kaldi_io/tools/kaldi_to_nerv differ -- cgit v1.2.3-70-g09d2