#!/usr/bin/env perl
use strict;
use warnings;
# Copyright 2012 Johns Hopkins University (Author: Daniel Povey).
# 2014 Vimal Manohar (Johns Hopkins University)
# Apache 2.0.
use File::Basename;
use Cwd;
use Getopt::Long;
# queue.pl has the same functionality as run.pl, except that
# it runs the job in question on the queue (Sun GridEngine).
# This version of queue.pl uses the task array functionality
# of the grid engine. Note: it's different from the queue.pl
# in the s4 and earlier scripts.
# The script now supports configuring the queue system using a config file
# (default in conf/queue.conf; but can be passed specified with --config option)
# and a set of command line options.
# The current script handles:
# 1) Normal configuration arguments
# For e.g. a command line option of "--gpu 1" could be converted into the option
# "-q g.q -l gpu=1" to qsub. How the CLI option is handled is determined by a
# line in the config file like
# gpu=* -q g.q -l gpu=$0
# $0 here in the line is replaced with the argument read from the CLI and the
# resulting string is passed to qsub.
# 2) Special arguments to options such as
# gpu=0
# If --gpu 0 is given in the command line, then no special "-q" is given.
# 3) Default argument
# default gpu=0
# If --gpu option is not passed in the command line, then the script behaves as
# if --gpu 0 was passed since 0 is specified as the default argument for that
# option
# 4) Arbitrary options and arguments.
# Any command line option starting with '--' and its argument would be handled
# as long as its defined in the config file.
# 5) Default behavior
# If the config file that is passed using is not readable, then the script
# behaves as if the queue has the following config file:
# $ cat conf/queue.conf
# # Default configuration
# command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
# option mem=* -l mem_free=$0,ram_free=$0
# option mem=0 # Do not add anything to qsub_opts
# option num_threads=* -pe smp $0
# option num_threads=1 # Do not add anything to qsub_opts
# option max_jobs_run=* -tc $0
# default gpu=0
# option gpu=0 -q all.q
# option gpu=* -l gpu=$0 -q g.q
my $qsub_opts = "";
my $sync = 0;
my $num_threads = 1;
my $gpu = 0;
my $config = "conf/queue.conf";
my %cli_options = ();
my $jobname;
my $jobstart;
my $jobend;
my $array_job = 0;
sub print_usage() {
print STDERR
"Usage: queue.pl [options] [JOB=1:n] log-file command-line arguments...\n" .
"e.g.: queue.pl foo.log echo baz\n" .
" (which will echo \"baz\", with stdout and stderr directed to foo.log)\n" .
"or: queue.pl -q all.q\@xyz foo.log echo bar \| sed s/bar/baz/ \n" .
" (which is an example of using a pipe; you can provide other escaped bash constructs)\n" .
"or: queue.pl -q all.q\@qyz JOB=1:10 foo.JOB.log echo JOB \n" .
" (which illustrates the mechanism to submit parallel jobs; note, you can use \n" .
" another string other than JOB)\n" .
"Note: if you pass the \"-sync y\" option to qsub, this script will take note\n" .
"and change its behavior. Otherwise it uses qstat to work out when the job finished\n" .
"Options:\n" .
" --config <config-file> (default: $config)\n" .
" --mem <mem-requirement> (e.g. --mem 2G, --mem 500M, \n" .
" also support K and numbers mean bytes)\n" .
" --num-threads <num-threads> (default: $num_threads)\n" .
" --max-jobs-run <num-jobs>\n" .
" --gpu <0|1> (default: $gpu)\n";
exit 1;
}
if (@ARGV < 2) {
print_usage();
}
for (my $x = 1; $x <= 3; $x++) { # This for-loop is to
# allow the JOB=1:n option to be interleaved with the
# options to qsub.
while (@ARGV >= 2 && $ARGV[0] =~ m:^-:) {
my $switch = shift @ARGV;
if ($switch eq "-V") {
$qsub_opts .= "-V ";
} else {
my $argument = shift @ARGV;
if ($argument =~ m/^--/) {
print STDERR "WARNING: suspicious argument '$argument' to $switch; starts with '-'\n";
}
if ($switch eq "-sync" && $argument =~ m/^[yY]/) {
$sync = 1;
$qsub_opts .= "$switch $argument ";
} elsif ($switch eq "-pe") { # e.g. -pe smp 5
my $argument2 = shift @ARGV;
$qsub_opts .= "$switch $argument $argument2 ";
$num_threads = $argument2;
} elsif ($switch =~ m/^--/) { # Config options
# Convert CLI option to variable name
# by removing '--' from the switch and replacing any
# '-' with a '_'
$switch =~ s/^--//;
$switch =~ s/-/_/g;
$cli_options{$switch} = $argument;
} else { # Other qsub options - passed as is
$qsub_opts .= "$switch $argument ";
}
}
}
if ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+):(\d+)$/) { # e.g. JOB=1:20
$array_job = 1;
$jobname = $1;
$jobstart = $2;
$jobend = $3;
shift;
if ($jobstart > $jobend) {
die "queue.pl: invalid job range $ARGV[0]";
}
if ($jobstart <= 0) {
die "run.pl: invalid job range $ARGV[0], start must be strictly positive (this is a GridEngine limitation).";
}
} elsif ($ARGV[0] =~ m/^([\w_][\w\d_]*)+=(\d+)$/) { # e.g. JOB=1.
$array_job = 1;
$jobname = $1;
$jobstart = $2;
$jobend = $2;
shift;
} elsif ($ARGV[0] =~ m/.+\=.*\:.*$/) {
print STDERR "Warning: suspicious first argument to queue.pl: $ARGV[0]\n";
}
}
if (@ARGV < 2) {
print_usage();
}
if (exists $cli_options{"config"}) {
$config = $cli_options{"config"};
}
my $default_config_file = <<'EOF';
# Default configuration
command qsub -v PATH -cwd -S /bin/bash -j y -l arch=*64*
option mem=* -l mem_free=$0,ram_free=$0
option mem=0 # Do not add anything to qsub_opts
option num_threads=* -pe smp $0
option num_threads=1 # Do not add anything to qsub_opts
option max_jobs_run=* -tc $0
default gpu=0
option gpu=0
option gpu=* -l gpu=$0 -q g.q
EOF
# Here the configuration options specified by the user on the command line
# (e.g. --mem 2G) are converted to options to the qsub system as defined in
# the config file. (e.g. if the config file has the line
# "option mem=* -l ram_free=$0,mem_free=$0"
# and the user has specified '--mem 2G' on the command line, the options
# passed to queue system would be "-l ram_free=2G,mem_free=2G
# A more detailed description of the ways the options would be handled is at
# the top of this file.
my $opened_config_file = 1;
open CONFIG, "<$config" or $opened_config_file = 0;
my %cli_config_options = ();
my %cli_default_options = ();
if ($opened_config_file == 0 && exists($cli_options{"config"})) {
print STDERR "Could not open config file $config\n";
exit(1);
} elsif ($opened_config_file == 0 && !exists($cli_options{"config"})) {
# Open the default config file instead
open (CONFIG, "echo '$default_config_file' |") or die "Unable to open pipe\n";
$config = "Default config";
}
my $qsub_cmd = "";
my $read_command = 0;
while(<CONFIG>) {
chomp;
my $line = $_;
$_ =~ s/\s*#.*//g;
if ($_ eq "") { next; }
if ($_ =~ /^command (.+)/) {
$read_command = 1;
$qsub_cmd = $1 . " ";
} elsif ($_ =~ m/^option ([^=]+)=\* (.+)$/) {
# Config option that needs replacement with parameter value read from CLI
# e.g.: option mem=* -l mem_free=$0,ram_free=$0
my $option = $1; # mem
my $arg= $2; # -l mem_free=$0,ram_free=$0
if ($arg !~ m:\$0:) {
die "Unable to parse line '$line' in config file ($config)\n";
}
if (exists $cli_options{$option}) {
# Replace $0 with the argument read from command line.
# e.g. "-l mem_free=$0,ram_free=$0" -> "-l mem_free=2G,ram_free=2G"
$arg =~ s/\$0/$cli_options{$option}/g;
$cli_config_options{$option} = $arg;
}
} elsif ($_ =~ m/^option ([^=]+)=(\S+)\s?(.*)$/) {
# Config option that does not need replacement
# e.g. option gpu=0 -q all.q
my $option = $1; # gpu
my $value = $2; # 0
my $arg = $3; # -q all.q
if (exists $cli_options{$option}) {
$cli_default_options{($option,$value)} = $arg;
}
} elsif ($_ =~ m/^default (\S+)=(\S+)/) {
# Default options. Used for setting default values to options i.e. when
# the user does not specify the option on the command line
# e.g. default gpu=0
my $option = $1; # gpu
my $value = $2; # 0
if (!exists $cli_options{$option}) {
# If the user has specified this option on the command line, then we
# don't have to do anything
$cli_options{$option} = $value;
}
} else {
print STDERR "queue.pl: unable to parse line '$line' in config file ($config)\n";
exit(1);
}
}
close(CONFIG);
if ($read_command != 1) {
print STDERR "queue.pl: config file ($config) does not contain the line \"command .*\"\n";
exit(1);
}
for my $option (keys %cli_options) {
if ($option eq