/<job-id>.stat (Optional) status of the job
/<job-id>.tmp Used temporarily by brun to store the script actually
sent to the host (can be inspected if something goes wrong)
+ /<job-id>.log (Optional) transcript of output produced by the job (including
+ previous failed attempts)
<queue>/jobs/<job-id>.job All jobs issued on this queue, including those which
are no longer queued for any machine
NOPING - host does not respond to ping
NOXFER - transfer of the job body to a temporary file
on the host has failed
+ RUN - job is running (present only in log files)
OK - job executed successfully (however, the job will
be removed from the queue immediately, so you are
not likely to see this code)
- Rename machine -> host
- ssh options
- Detector of orphans (unused queue dirs, jobs on non-existent machines, non-queued jobs)
+- job failed => give a more explanatory message
chomp $rtmp;
update_status($mach, $jid, 'RUN', $queue);
- system 'ssh', '-t', $mach, "$rtmp ; e=\$? ; rm -f $rtmp ; exit \$e";
+ my $lf = $queue->log_file($mach, $jid);
+ system 'bash', '-o', 'pipefail', '-c', "ssh -t $mach '$rtmp ; e=\$? ; rm -f $rtmp ; exit \$e' 2>&1 | tee -a $lf";
if ($?) {
return ('FAILED', 'Job failed');
} else {
our $job_cnt = 0;
+sub check_id($) {
+ my ($id) = @_;
+ return $id =~ /^([0-9A-Za-z-]+)$/;
+}
+
sub new($;$) {
my ($class, $id) = @_;
my $job = { };
bless $job;
if (defined $id) {
+ check_id($id) or die "Invalid job ID";
$job->{'ID'} = $id;
} else {
$job_cnt++;
close T;
$job->{'Subject'} //= '?';
$job->{'ID'} or die "Cannot load $file: Missing ID";
- $job->{'ID'} !~ /\.[a-z]+$/ or die "Cannot load $file: Invalid ID syntax";
+ check_id($job->{'ID'}) or die "Cannot load $file: Invalid ID syntax";
return bless $job;
}
return bless $queue;
}
+sub log_file($$) {
+ my ($queue, $machine, $jid) = @_;
+ return $queue->host_dir($machine) . '/' . $jid . '.log';
+}
+
# Most actions have to be logged by the caller
sub log($$$$;$) {
my ($queue, $mach, $jid, $stat, $msg) = @_;
- my $fh = $queue->{'LogFH'} //= new IO::File $queue->{'Name'} . '/log', '>>' or die "Cannot open log: $!";
- my $m = join(" ", POSIX::strftime("%Y-%m-%d %H:%M:%S", localtime), $mach, $jid, $stat);
+ my $t = POSIX::strftime("%Y-%m-%d %H:%M:%S", localtime);
+ my $m = join(" ", $t, $mach, $jid, $stat);
$m .= " $msg" if defined $msg;
+
+ my $fh = $queue->{'LogFH'} //= new IO::File $queue->{'Name'} . '/log', '>>' or die "Cannot open log: $!";
print $fh "$m\n";
+
+ # Append to the per-job log file
+ if (open L, '>>', $queue->log_file($mach, $jid)) {
+ print L "### $m\n";
+ close L;
+ }
}
sub host_dir($$) {
unlink $queue->queue_file($machine, $jid);
unlink $queue->status_file($machine, $jid);
unlink $queue->temp_file($machine, $jid);
+ unlink $queue->log_file($machine, $jid);
}
sub job_metadata($$) {