blob: d6189394a5f9785d908cf7a46a675f48a8492b23 (
plain) (
tree)
|
|
#!/usr/bin/env perl
# Copyright 2010-2012 Microsoft Corporation Johns Hopkins University (Author: Daniel Povey)
# Apache 2.0.
undef $field_begin;
undef $field_end;
if ($ARGV[0] eq "-f") {
shift @ARGV;
$field_spec = shift @ARGV;
if ($field_spec =~ m/^\d+$/) {
$field_begin = $field_spec - 1; $field_end = $field_spec - 1;
}
if ($field_spec =~ m/^(\d*)[-:](\d*)/) { # accept e.g. 1:10 as a courtesty (properly, 1-10)
if ($1 ne "") {
$field_begin = $1 - 1; # Change to zero-based indexing.
}
if ($2 ne "") {
$field_end = $2 - 1; # Change to zero-based indexing.
}
}
if (!defined $field_begin && !defined $field_end) {
die "Bad argument to -f option: $field_spec";
}
}
$symtab = shift @ARGV;
if(!defined $symtab) {
print STDERR "Usage: sym2int.pl [options] symtab [input] > output\n" .
"options: [-f (<field>|<field_start>-<field-end>)]\n" .
"e.g.: -f 2, or -f 3-4\n";
exit(1);
}
open(F, "<$symtab") || die "Error opening symbol table file $symtab";
while(<F>) {
@A = split(" ", $_);
@A == 2 || die "bad line in symbol table file: $_";
$int2sym{$A[1]} = $A[0];
}
sub int2sym {
my $a = shift @_;
my $pos = shift @_;
if($a !~ m:^\d+$:) { # not all digits..
$pos1 = $pos+1; # make it one-based.
die "int2sym.pl: found noninteger token $a [in position $pos1]\n";
}
$s = $int2sym{$a};
if(!defined ($s)) {
die "int2sym.pl: integer $a not in symbol table $symtab.";
}
return $s;
}
$error = 0;
while (<>) {
@A = split(" ", $_);
for ($pos = 0; $pos <= $#A; $pos++) {
$a = $A[$pos];
if ( (!defined $field_begin || $pos >= $field_begin)
&& (!defined $field_end || $pos <= $field_end)) {
$a = int2sym($a, $pos);
}
print $a . " ";
}
print "\n";
}
|