From b33b3a6732c6b6a66bd5c44c615be56d66f4ed67 Mon Sep 17 00:00:00 2001 From: Yimmon Zhuang Date: Wed, 14 Oct 2015 15:37:20 +0800 Subject: support kaldi decoder --- kaldi_decode/utils/int2sym.pl | 71 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) create mode 100755 kaldi_decode/utils/int2sym.pl (limited to 'kaldi_decode/utils/int2sym.pl') diff --git a/kaldi_decode/utils/int2sym.pl b/kaldi_decode/utils/int2sym.pl new file mode 100755 index 0000000..d618939 --- /dev/null +++ b/kaldi_decode/utils/int2sym.pl @@ -0,0 +1,71 @@ +#!/usr/bin/env perl +# Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) +# Apache 2.0. + +undef $field_begin; +undef $field_end; + + +if ($ARGV[0] eq "-f") { + shift @ARGV; + $field_spec = shift @ARGV; + if ($field_spec =~ m/^\d+$/) { + $field_begin = $field_spec - 1; $field_end = $field_spec - 1; + } + if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) + if ($1 ne "") { + $field_begin = $1 - 1; # Change to zero-based indexing. + } + if ($2 ne "") { + $field_end = $2 - 1; # Change to zero-based indexing. + } + } + if (!defined $field_begin && !defined $field_end) { + die "Bad argument to -f option: $field_spec"; + } +} +$symtab = shift @ARGV; +if(!defined $symtab) { + print STDERR "Usage: sym2int.pl [options] symtab [input] > output\n" . + "options: [-f (|-)]\n" . + "e.g.: -f 2, or -f 3-4\n"; + exit(1); +} + +open(F, "<$symtab") || die "Error opening symbol table file $symtab"; +while() { + @A = split(" ", $_); + @A == 2 || die "bad line in symbol table file: $_"; + $int2sym{$A[1]} = $A[0]; +} + +sub int2sym { + my $a = shift @_; + my $pos = shift @_; + if($a !~ m:^\d+$:) { # not all digits.. + $pos1 = $pos+1; # make it one-based. + die "int2sym.pl: found noninteger token $a [in position $pos1]\n"; + } + $s = $int2sym{$a}; + if(!defined ($s)) { + die "int2sym.pl: integer $a not in symbol table $symtab."; + } + return $s; +} + +$error = 0; +while (<>) { + @A = split(" ", $_); + for ($pos = 0; $pos <= $#A; $pos++) { + $a = $A[$pos]; + if ( (!defined $field_begin || $pos >= $field_begin) + && (!defined $field_end || $pos <= $field_end)) { + $a = int2sym($a, $pos); + } + print $a . " "; + } + print "\n"; +} + + + -- cgit v1.2.3