diff options
author | Ted Yin <[email protected]> | 2015-10-14 15:43:43 +0800 |
---|---|---|
committer | Ted Yin <[email protected]> | 2015-10-14 15:43:43 +0800 |
commit | fd2148540efd7a5e5e2c054280d53d3eb3b055cc (patch) | |
tree | 47501412a3324e4c13b1238eeb913aae02b2024a /kaldi_decode/utils/int2sym.pl | |
parent | 0dba4c998fcccb4bae29582b7d8be94de476dd0b (diff) | |
parent | b33b3a6732c6b6a66bd5c44c615be56d66f4ed67 (diff) |
Merge pull request #7 from yimmon/master
support kaldi decoder
Diffstat (limited to 'kaldi_decode/utils/int2sym.pl')
-rwxr-xr-x | kaldi_decode/utils/int2sym.pl | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/kaldi_decode/utils/int2sym.pl b/kaldi_decode/utils/int2sym.pl new file mode 100755 index 0000000..d618939 --- /dev/null +++ b/kaldi_decode/utils/int2sym.pl @@ -0,0 +1,71 @@ +#!/usr/bin/env perl +# Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey) +# Apache 2.0. + +undef $field_begin; +undef $field_end; + + +if ($ARGV[0] eq "-f") { + shift @ARGV; + $field_spec = shift @ARGV; + if ($field_spec =~ m/^\d+$/) { + $field_begin = $field_spec - 1; $field_end = $field_spec - 1; + } + if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10) + if ($1 ne "") { + $field_begin = $1 - 1; # Change to zero-based indexing. + } + if ($2 ne "") { + $field_end = $2 - 1; # Change to zero-based indexing. + } + } + if (!defined $field_begin && !defined $field_end) { + die "Bad argument to -f option: $field_spec"; + } +} +$symtab = shift @ARGV; +if(!defined $symtab) { + print STDERR "Usage: sym2int.pl [options] symtab [input] > output\n" . + "options: [-f (<field>|<field_start>-<field-end>)]\n" . + "e.g.: -f 2, or -f 3-4\n"; + exit(1); +} + +open(F, "<$symtab") || die "Error opening symbol table file $symtab"; +while(<F>) { + @A = split(" ", $_); + @A == 2 || die "bad line in symbol table file: $_"; + $int2sym{$A[1]} = $A[0]; +} + +sub int2sym { + my $a = shift @_; + my $pos = shift @_; + if($a !~ m:^\d+$:) { # not all digits.. + $pos1 = $pos+1; # make it one-based. + die "int2sym.pl: found noninteger token $a [in position $pos1]\n"; + } + $s = $int2sym{$a}; + if(!defined ($s)) { + die "int2sym.pl: integer $a not in symbol table $symtab."; + } + return $s; +} + +$error = 0; +while (<>) { + @A = split(" ", $_); + for ($pos = 0; $pos <= $#A; $pos++) { + $a = $A[$pos]; + if ( (!defined $field_begin || $pos >= $field_begin) + && (!defined $field_end || $pos <= $field_end)) { + $a = int2sym($a, $pos); + } + print $a . " "; + } + print "\n"; +} + + + |