### Status codes ###
+FAILED Job failed to execute (i.e., it returned a non-zero exit code)
+INTERR Internal error of BEX (e.g., failed to read job prolog file)
NEW Newly inserted job, which did not run yet
NOPING Host does not respond to ping
NOXFER Transfer of the job body to a temporary file on the host has failed
OK Job finished successfully (this is usually not seen in the queue, since
finished jobs are immediately deleted or moved to the history)
-FAILED Job failed to execute (i.e., it returned a non-zero exit code)
-INTERR Internal error of BEX (e.g., failed to read job prolog file)
+PREP Running preparatory commands (i.e., those present in Prep header field)
PREPFAIL Preparatory commands failed (i.e., those present in Prep header field)
REMOVED Job removed from the queue (behavior similar to OK)
+RUN Job is running
-These are present only in log files and messages sent over status FIFO:
+Additional status codes recorded in the log files:
-RUN Job is running
-SEND Sending job to the host
-PREP Running preparatory commands (i.e., those present in Prep header field)
+REQUEUE Attempted to put on a queue, but it already was there
-Status codes not tied to a specific job (sent over status FIFO):
+Additional status codes sent only over status FIFO:
-PING Trying to ping the host
+DONE Done with the host (job equals "-")
+INIT Host ready, preparing to execute jobs (job equals "-")
LOCKED Host or job not available, because it is locked by another brun
-INIT Host ready, preparing to execute jobs
-DONE Done with the host
+PING Trying to ping the host (job equals "-")
+SEND Sending job to the host
print "New job ", $job->id, "\n";
for my $m (@machines) {
if ($queue->enqueue($m, $job)) {
- $queue->write_job_status($m, $job->id, { 'Time' => time, 'Status' => 'NEW' });
+ $queue->update_job_status($m, $job->id, 'NEW');
print "\t$m\n";
} else {
+ $queue->log($m, $job->id, 'REQUEUE');
print "\t$m (already queued)\n";
}
}
print STDERR "Cannot remove $m:", $queue->job_name($j), ", it is locked\n";
$err = 1;
} else {
- $queue->log($m, $j, 'REMOVED');
- $queue->write_job_status($m, $j, { 'Time' => time, 'Status' => 'REMOVED' });
+ $queue->update_job_status($m, $j, 'REMOVED');
$queue->remove($m, $j);
print "Removed $m:", $queue->job_name($j), "\n";
}
$err = 1;
} else {
my $enq = $dest->enqueue($m, $job);
- $dest->write_job_status($m, $job->id, { 'Time' => time, 'Status' => 'NEW', 'Message' => 'Moved to this queue' });
- $queue->log($m, $j, 'REMOVED', "Moved to another queue");
- $queue->write_job_status($m, $j, { 'Time' => time, 'Status' => 'REMOVED', 'Message' => 'Moved to another queue' });
+ if ($enq) {
+ $dest->update_job_status($m, $job->id, 'NEW', 'Moved to this queue');
+ } else {
+ $dest->log($m, $job->id, 'REQUEUE', 'Moved to this queue');
+ }
+ $queue->update_job_status($m, $job->id, 'REMOVED', 'Moved from this queue');
$queue->remove($m, $j);
- print "Moved $m:", $queue->job_name($j);
+ print "Moved $m:", $dest->job_name($j);
print " (already queued)" if !$enq;
print "\n";
}
print $status_fd "! $mach $job $status\n";
}
if ($log_on_queue) {
- $log_on_queue->log($mach, $job, $status, $msg);
+ $log_on_queue->update_job_status($mach, $job, $status, $msg);
}
}
update_status($mach, $jid, 'LOCKED', undef);
next;
}
- my $stat = {
- 'Time' => time,
- };
print "### Running ", $job->name, " on $mach ###\n";
my ($s, $msg) = run_job($job, $queue, $mach);
-
- $stat->{'Status'} = $s;
- $stat->{'Message'} = $msg;
- $queue->write_job_status($mach, $jid, $stat);
-
- # Called after writing the status file, so that the front-end watching
- # our status FIFO can see the new status file.
update_status($mach, $jid, $s, $queue, $msg);
if ($s eq 'OK') {
my ($queue, $machine, $job) = @_;
my $qf = $queue->queue_file($machine, $job->id);
if (-f $qf) {
- $queue->log($machine, $job->id, 'REQUEUE');
return 0;
}
my $fn = $queue->job_file($job->id);
my $dir = $queue->host_dir($machine);
-d $dir or mkdir $dir or die "Cannot create directory $dir: $!";
symlink '../../jobs/' . $job->id . '.job', $qf or die "Cannot create $qf: $!";
- $queue->log($machine, $job->id, 'QUEUE');
return 1;
}
my $dst = "$d/$jid.$suff";
if (-f $src) {
rename $src, $dst or die "Cannot rename $src to $dst: $!";
+ } else {
+ # Might be present from the previous incarnation of the same job
+ unlink $dst;
}
}
} else {
rename "$sf.$$", $sf or die "Cannot rename $sf.$$ to $sf: $!";
}
+sub update_job_status($$$$;$) {
+ my ($queue, $machine, $jid, $stat, $msg) = @_;
+ my $s = {
+ 'Time' => time,
+ 'Status' => $stat,
+ 'Message' => $msg,
+ };
+ $queue->write_job_status($machine, $jid, $s);
+ $queue->log($machine, $jid, $stat, $msg);
+}
+
sub lock_name($$$) {
my ($queue, $machine, $jid) = @_;
my $lock = $queue->{'Name'};