summaryrefslogtreecommitdiff
path: root/kaldi_io
diff options
context:
space:
mode:
Diffstat (limited to 'kaldi_io')
-rw-r--r--kaldi_io/Makefile11
-rw-r--r--kaldi_io/kaldi_io-scm-1.rockspec2
-rwxr-xr-xkaldi_io/tools/convert_from_kaldi_pretrain.sh66
-rwxr-xr-xkaldi_io/tools/kaldi_to_nervbin0 -> 18442 bytes
4 files changed, 75 insertions, 4 deletions
diff --git a/kaldi_io/Makefile b/kaldi_io/Makefile
index 6d350a4..abfa8e6 100644
--- a/kaldi_io/Makefile
+++ b/kaldi_io/Makefile
@@ -1,6 +1,12 @@
-# Change KDIR to `kaldi-trunk' path (Kaldi must be compiled with --share)
-KDIR := /speechlab/tools/KALDI/kaldi-master/
+ifndef LUA_BINDIR
+$(error Please build the package via luarocks: `luarocks make`)
+endif
+ifndef KALDI_BASE
+$(error KALDI_BASE is not set)
+endif
+
+KDIR := $(KALDI_BASE)
SHELL := /bin/bash
BUILD_DIR := $(CURDIR)/build
INC_PATH := $(LUA_BINDIR)/../include/
@@ -26,7 +32,6 @@ build: $(OBJ_DIR) $(OBJ_SUBDIR) $(OBJS) $(OBJ_DIR)/src/test
install: $(LUA_DIR) $(LUA_SUBDIR) $(LUA_LIBS) $(LIBS)
include $(KDIR)/src/kaldi.mk
-#KL := $(KDIR)/src/feat/kaldi-feat.a $(KDIR)/src/matrix/kaldi-matrix.a $(KDIR)/src/base/kaldi-base.a $(KDIR)/src/util/kaldi-util.a $(KDIR)/src/hmm/kaldi-hmm.a $(KDIR)/src/tree/kaldi-tree.a -lcblas -llapack_atlas
KL := $(KDIR)/src/feat/kaldi-feat.a $(KDIR)/src/matrix/kaldi-matrix.a $(KDIR)/src/base/kaldi-base.a $(KDIR)/src/util/kaldi-util.a $(KDIR)/src/hmm/kaldi-hmm.a $(KDIR)/src/tree/kaldi-tree.a $(BLAS_LDFLAGS)
$(OBJ_DIR) $(LUA_DIR) $(OBJ_SUBDIR) $(LUA_SUBDIR):
-mkdir -p $@
diff --git a/kaldi_io/kaldi_io-scm-1.rockspec b/kaldi_io/kaldi_io-scm-1.rockspec
index 7c9f8d8..5a97cff 100644
--- a/kaldi_io/kaldi_io-scm-1.rockspec
+++ b/kaldi_io/kaldi_io-scm-1.rockspec
@@ -4,7 +4,7 @@ source = {
url = "https://github.com/Nerv-SJTU/nerv-speech.git"
}
description = {
- summary = "Kaldi I/O support (Kaldi I/O wrapper) for Nerv",
+ summary = "Kaldi I/O support (Kaldi I/O wrapper) for NERV",
detailed = [[
]],
homepage = "https://github.com/Nerv-SJTU/nerv-speech",
diff --git a/kaldi_io/tools/convert_from_kaldi_pretrain.sh b/kaldi_io/tools/convert_from_kaldi_pretrain.sh
new file mode 100755
index 0000000..dc8ec8e
--- /dev/null
+++ b/kaldi_io/tools/convert_from_kaldi_pretrain.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+hid_dim=1024
+hid_num=6
+pretrain_dir=exp/dnn4_pretrain-dbn
+nerv_kaldi=/speechlab/users/mfy43/nerv/speech/kaldi_io/
+
+[ -f path.sh ] && . ./path.sh
+. parse_options.sh || exit 1;
+
+data=$1
+data_cv=$2
+lang=$3
+alidir=$4
+alidir_cv=$5
+dir=$6
+
+[[ -z $data_fmllr ]] && data_fmllr=data-fmllr-tri3
+[[ -z $alidir ]] && alidir=exp/tri3_ali
+[[ -z $dir ]] && dir=exp/dnn4_nerv_prepare
+[[ -z $data ]] && data=$data_fmllr/train_tr90
+[[ -z $data_cv ]] && data_cv=$data_fmllr/train_cv10
+kaldi_to_nerv=$nerv_kaldi/tools/kaldi_to_nerv
+mkdir $dir -p
+mkdir $dir/log -p
+false && {
+###### PREPARE DATASETS ######
+cp $data/feats.scp $dir/train_sorted.scp
+cp $data_cv/feats.scp $dir/cv.scp
+utils/shuffle_list.pl --srand ${seed:-777} <$dir/train_sorted.scp >$dir/train.scp
+
+feats_tr="ark:copy-feats scp:$dir/train.scp ark:- |"
+
+###### INITIALIZE OUTPUT LAYER ######
+[ -z $num_tgt ] && \
+ num_tgt=$(hmm-info --print-args=false $alidir/final.mdl | grep pdfs | awk '{ print $NF }')
+nnet_proto=$dir/nnet_output.proto
+echo "# genrating network prototype $nnet_proto"
+utils/nnet/make_nnet_proto.py \
+ $hid_dim $num_tgt 0 $hid_dim >$nnet_proto || exit 1
+nnet_init=$dir/nnet_output.init
+nnet-initialize --binary=false $nnet_proto $nnet_init
+
+###### MODEL PARAMETER CONVERSION ######
+$kaldi_to_nerv $nnet_init $dir/nnet_output.nerv
+$kaldi_to_nerv <(nnet-copy --binary=false $pretrain_dir/${hid_num}.dbn -) $dir/nnet_init.nerv
+$kaldi_to_nerv <(nnet-copy --binary=false $pretrain_dir/final.feature_transform -) $dir/nnet_trans.nerv
+}
+###### PREPARE FOR DECODING #####
+echo "Using PDF targets from dirs '$alidir' '$alidir_cv'"
+# training targets in posterior format,
+labels_tr="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz |\" ark:- | ali-to-post ark:- ark:- |"
+labels_cv="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir_cv/ali.*.gz |\" ark:- | ali-to-post ark:- ark:- |"
+# training targets for analyze-counts,
+labels_tr_pdf="ark:ali-to-pdf $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz |\" ark:- |"
+labels_tr_phn="ark:ali-to-phones --per-frame=true $alidir/final.mdl \"ark:gunzip -c $alidir/ali.*.gz |\" ark:- |"
+
+# get pdf-counts, used later for decoding/aligning,
+analyze-counts --verbose=1 --binary=false "$labels_tr_pdf" $dir/ali_train_pdf.counts 2>$dir/log/analyze_counts_pdf.log || exit 1
+# copy the old transition model, will be needed by decoder,
+copy-transition-model --binary=false $alidir/final.mdl $dir/final.mdl || exit 1
+# copy the tree
+cp $alidir/tree $dir/tree || exit 1
+
+# make phone counts for analysis,
+[ -e $lang/phones.txt ] && analyze-counts --verbose=1 --symbol-table=$lang/phones.txt "$labels_tr_phn" /dev/null 2>$dir/log/analyze_counts_phones.log || exit 1
diff --git a/kaldi_io/tools/kaldi_to_nerv b/kaldi_io/tools/kaldi_to_nerv
new file mode 100755
index 0000000..d08894d
--- /dev/null
+++ b/kaldi_io/tools/kaldi_to_nerv
Binary files differ