...

author: Determinant <[email protected]> 2016-02-29 20:06:25 +0800
committer: Determinant <[email protected]> 2016-02-29 20:06:25 +0800
commit: 534b039d297b9f2f83f889e2592686d79569e141 (patch)
tree: 66f73999d98427d2e4c2ed09eeec3d6d845fb975 /kaldi_decode/utils/run.pl
parent: 1e0ac0fb5c9f517e7325deb16004de1054454da7 (diff)
1 files changed, 0 insertions, 264 deletions
diff --git a/kaldi_decode/utils/run.pl b/kaldi_decode/utils/run.pl
deleted file mode 100755
index 6145a7a..0000000
--- a/kaldi_decode/utils/run.pl
+++ /dev/null
@@ -1,264 +0,0 @@
-#!/usr/bin/env perl
-use warnings; #sed replacement for -w perl parameter
-
-# In general, doing 
-#  run.pl some.log a b c is like running the command a b c in
-# the bash shell, and putting the standard error and output into some.log.
-# To run parallel jobs (backgrounded on the host machine), you can do (e.g.)
-#  run.pl JOB=1:4 some.JOB.log a b c JOB is like running the command a b c JOB
-# and putting it in some.JOB.log, for each one. [Note: JOB can be any identifier].
-# If any of the jobs fails, this script will fail.
-
-# A typical example is:
-#  run.pl some.log my-prog "--opt=foo bar" foo \|  other-prog baz
-# and run.pl will run something like:
-# ( my-prog '--opt=foo bar' foo |  other-prog baz ) >& some.log
-# 
-# Basically it takes the command-line arguments, quotes them
-# as necessary to preserve spaces, and evaluates them with bash.
-# In addition it puts the command line at the top of the log, and
-# the start and end times of the command at the beginning and end.
-# The reason why this is useful is so that we can create a different
-# version of this program that uses a queueing system instead.
-
-# use Data::Dumper;
-
-@ARGV < 2 && die "usage: run.pl log-file command-line arguments...";
-
-
-$max_jobs_run = -1;
-$jobstart = 1;
-$jobend = 1;
-$ignored_opts = ""; # These will be ignored.
-
-# First parse an option like JOB=1:4, and any
-# options that would normally be given to
-# queue.pl, which we will just discard.
-
-if (@ARGV > 0) {
-  while (@ARGV >= 2 && $ARGV[0] =~ m:^-:) { # parse any options
-    # that would normally go to qsub, but which will be ignored here.
-    $switch = shift @ARGV;
-    if ($switch eq "-V") {
-      $ignored_opts .= "-V ";
-    } elsif ($switch eq "--max-jobs-run" || $switch eq "-tc") {
-      # we do support the option --max-jobs-run n, and its GridEngine form -tc n.
-      $max_jobs_run = shift @ARGV;
-      if (! ($max_jobs_run > 0)) {
-        die "run.pl: invalid option --max-jobs-run $max_jobs_run";
-      }
-    } else {
-      $option = shift @ARGV;
-      if ($switch eq "-sync" && $option =~ m/^[yY]/) {
-        $ignored_opts .= "-sync "; # Note: in the
-        # corresponding code in queue.pl it says instead, just "$sync = 1;".
-      }
-      $ignored_opts .= "$switch $option ";
-      if ($switch eq "-pe") { # e.g. -pe smp 5
-        $option2 = shift @ARGV;
-        $ignored_opts .= "$option2 ";
-      }
-    }
-  }
-  if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) { # e.g. JOB=1:10
-    $jobname = $1;
-    $jobstart = $2;
-    $jobend = $3;
-    shift;
-    if ($jobstart > $jobend) {
-      die "run.pl: invalid job range $ARGV[0]";
-    }
-    if ($jobstart <= 0) {
-      die "run.pl: invalid job range $ARGV[0], start must be strictly positive (this is required for GridEngine compatibility).";
-    }
-  } elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
-    $jobname = $1;
-    $jobstart = $2;
-    $jobend = $2;
-    shift;
-  } elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
-    print STDERR "run.pl: Warning: suspicious first argument to run.pl: $ARGV[0]\n";
-  }
-}
-
-# Users found this message confusing so we are removing it.
-# if ($ignored_opts ne "") {
-#  print STDERR "run.pl: Warning: ignoring options \"$ignored_opts\"\n";
-# }
-
-if ($max_jobs_run == -1) { # If --max-jobs-run option not set,
-                           # then work out the number of processors if possible,
-                           # and set it based on that.
-  $max_jobs_run = 0;
-  if (open(P, "</proc/cpuinfo")) {  # Linux
-    while (<P>) { if (m/^processor/) { $max_jobs_run++; } }
-    if ($max_jobs_run == 0) {
-      print STDERR "run.pl: Warning: failed to detect any processors from /proc/cpuinfo\n";
-      $max_jobs_run = 10;  # reasonable default.
-    }
-    close(P);
-  } elsif (open(P, "sysctl -a |")) {  # BSD/Darwin
-    while (<P>) {
-      if (m/hw\.ncpu\s*[:=]\s*(\d+)/) { # hw.ncpu = 4, or hw.ncpu: 4
-        $max_jobs_run = $1;
-        last;
-      }
-    }
-    close(P);
-    if ($max_jobs_run == 0) {
-      print STDERR "run.pl: Warning: failed to detect any processors from sysctl -a\n";
-      $max_jobs_run = 10;  # reasonable default.
-    }
-  } else {
-    # allow at most 32 jobs at once, on non-UNIX systems; change this code
-    # if you need to change this default.
-    $max_jobs_run = 32;
-  }
-  # The just-computed value of $max_jobs_run is just the number of processors
-  # (or our best guess); and if it happens that the number of jobs we need to
-  # run is just slightly above $max_jobs_run, it will make sense to increase
-  # $max_jobs_run to equal the number of jobs, so we don't have a small number
-  # of leftover jobs.
-  $num_jobs = $jobend - $jobstart + 1;
-  if ($num_jobs > $max_jobs_run && $num_jobs < 1.4 * $max_jobs_run) {
-    $max_jobs_run = $num_jobs;
-  }
-}
-
-$logfile = shift @ARGV;
-
-if (defined $jobname && $logfile !~ m/$jobname/ &&
-    $jobend > $jobstart) {
-  print STDERR "run.pl: you are trying to run a parallel job but "
-    . "you are putting the output into just one log file ($logfile)\n";
-  exit(1);
-}
-
-$cmd = "";
-
-foreach $x (@ARGV) { 
-    if ($x =~ m/^\S+$/) { $cmd .=  $x . " "; }
-    elsif ($x =~ m:\":) { $cmd .= "'$x' "; }
-    else { $cmd .= "\"$x\" "; } 
-}
-
-#$Data::Dumper::Indent=0;
-$ret = 0;
-$numfail = 0;
-%active_pids=();
-
-use POSIX ":sys_wait_h";
-for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
-  if (scalar(keys %active_pids) >= $max_jobs_run) {
-    
-    # Lets wait for a change in any child's status
-    # Then we have to work out which child finished
-    $r = waitpid(-1, 0);
-    $code = $?;
-    if ($r < 0 ) { die "run.pl: Error waiting for child process"; } # should never happen.
-    if ( defined $active_pids{$r} ) {
-        $jid=$active_pids{$r};
-        $fail[$jid]=$code; 
-        if ($code !=0) { $numfail++;}
-        delete $active_pids{$r};
-        # print STDERR "Finished: $r/$jid " .  Dumper(\%active_pids) . "\n";
-    } else {
-        die "run.pl: Cannot find the PID of the chold process that just finished.";
-    }
-
-    # In theory we could do a non-blocking waitpid over all jobs running just 
-    # to find out if only one or more jobs finished during the previous waitpid()
-    # However, we just omit this and will reap the next one in the next pass
-    # through the for(;;) cycle
-  }
-  $childpid = fork();
-  if (!defined $childpid) { die "run.pl: Error forking in run.pl (writing to $logfile)"; }
-  if ($childpid == 0) { # We're in the child... this branch
-    # executes the job and returns (possibly with an error status).
-    if (defined $jobname) { 
-      $cmd =~ s/$jobname/$jobid/g;
-      $logfile =~ s/$jobname/$jobid/g;
-    }
-    system("mkdir -p `dirname $logfile` 2>/dev/null");
-    open(F, ">$logfile") || die "run.pl: Error opening log file $logfile";
-    print F "# " . $cmd . "\n";
-    print F "# Started at " . `date`;
-    $starttime = `date +'%s'`;
-    print F "#\n";
-    close(F);
-
-    # Pipe into bash.. make sure we're not using any other shell.
-    open(B, "|bash") || die "run.pl: Error opening shell command"; 
-    print B "( " . $cmd . ") 2>>$logfile >> $logfile";
-    close(B);                   # If there was an error, exit status is in $?
-    $ret = $?;
-
-    $lowbits = $ret & 127;
-    $highbits = $ret >> 8;
-    if ($lowbits != 0) { $return_str = "code $highbits; signal $lowbits" }
-    else { $return_str = "code $highbits"; }
-
-    $endtime = `date +'%s'`;
-    open(F, ">>$logfile") || die "run.pl: Error opening log file $logfile (again)";
-    $enddate = `date`;
-    chop $enddate;
-    print F "# Accounting: time=" . ($endtime - $starttime) . " threads=1\n";
-    print F "# Ended ($return_str) at " . $enddate . ", elapsed time " . ($endtime-$starttime) . " seconds\n";
-    close(F);
-    exit($ret == 0 ? 0 : 1);
-  } else {
-    $pid[$jobid] = $childpid;
-    $active_pids{$childpid} = $jobid;
-    # print STDERR "Queued: " .  Dumper(\%active_pids) . "\n";
-  }
-}
-
-# Now we have submitted all the jobs, lets wait until all the jobs finish
-foreach $child (keys %active_pids) {
-    $jobid=$active_pids{$child};
-    $r = waitpid($pid[$jobid], 0);
-    $code = $?;
-    if ($r == -1) { die "run.pl: Error waiting for child process"; } # should never happen.
-    if ($r != 0) { $fail[$jobid]=$code; $numfail++ if $code!=0; } # Completed successfully
-}
-
-# Some sanity checks:
-# The $fail array should not contain undefined codes
-# The number of non-zeros in that array  should be equal to $numfail
-# We cannot do foreach() here, as the JOB ids do not necessarily start by zero
-$failed_jids=0;
-for ($jobid = $jobstart; $jobid <= $jobend; $jobid++) {
-  $job_return = $fail[$jobid];
-  if (not defined $job_return ) {
-    # print Dumper(\@fail);
-    
-    die "run.pl: Sanity check failed: we have indication that some jobs are running " . 
-      "even after we waited for all jobs to finish" ; 
-  }
-  if ($job_return != 0 ){ $failed_jids++;}
-}
-if ($failed_jids != $numfail) {
-  die "run.pl: Sanity check failed: cannot find out how many jobs failed ($failed_jids x $numfail)."
-}
-if ($numfail > 0) { $ret = 1; }
-
-if ($ret != 0) {
-  $njobs = $jobend - $jobstart + 1;
-  if ($njobs == 1) { 
-    if (defined $jobname) {
-      $logfile =~ s/$jobname/$jobstart/; # only one numbered job, so replace name with
-                                         # that job.
-    }
-    print STDERR "run.pl: job failed, log is in $logfile\n";
-    if ($logfile =~ m/JOB/) {
-      print STDERR "run.pl: probably you forgot to put JOB=1:\$nj in your script.";
-    }
-  }
-  else {
-    $logfile =~ s/$jobname/*/g;
-    print STDERR "run.pl: $numfail / $njobs failed, log is in $logfile\n";
-  }
-}
-
-
-exit ($ret);
author	Determinant <[email protected]>	2016-02-29 20:06:25 +0800
committer	Determinant <[email protected]>	2016-02-29 20:06:25 +0800
commit	534b039d297b9f2f83f889e2592686d79569e141 (patch)
tree	66f73999d98427d2e4c2ed09eeec3d6d845fb975 /kaldi_decode/utils/run.pl
parent	1e0ac0fb5c9f517e7325deb16004de1054454da7 (diff)