From 7dcac7d25840b8ec2f1747fda63d20e0589d6333 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Mon, 31 Oct 2011 11:24:11 +0100 Subject: [PATCH] Per-job logs --- NOTES | 3 +++ TODO | 1 + brun | 3 ++- lib/BEX/Job.pm | 8 +++++++- lib/BEX/Queue.pm | 18 ++++++++++++++++-- 5 files changed, 29 insertions(+), 4 deletions(-) diff --git a/NOTES b/NOTES index 1412bf7..567f305 100644 --- a/NOTES +++ b/NOTES @@ -6,6 +6,8 @@ /.stat (Optional) status of the job /.tmp Used temporarily by brun to store the script actually sent to the host (can be inspected if something goes wrong) + /.log (Optional) transcript of output produced by the job (including + previous failed attempts) /jobs/.job All jobs issued on this queue, including those which are no longer queued for any machine @@ -39,6 +41,7 @@ Status: Machine-readable status of the job: NOPING - host does not respond to ping NOXFER - transfer of the job body to a temporary file on the host has failed + RUN - job is running (present only in log files) OK - job executed successfully (however, the job will be removed from the queue immediately, so you are not likely to see this code) diff --git a/TODO b/TODO index 63d6f1d..6d96283 100644 --- a/TODO +++ b/TODO @@ -9,3 +9,4 @@ - Rename machine -> host - ssh options - Detector of orphans (unused queue dirs, jobs on non-existent machines, non-queued jobs) +- job failed => give a more explanatory message diff --git a/brun b/brun index 901c7d5..767eefb 100755 --- a/brun +++ b/brun @@ -78,7 +78,8 @@ sub run_job($$$) { chomp $rtmp; update_status($mach, $jid, 'RUN', $queue); - system 'ssh', '-t', $mach, "$rtmp ; e=\$? ; rm -f $rtmp ; exit \$e"; + my $lf = $queue->log_file($mach, $jid); + system 'bash', '-o', 'pipefail', '-c', "ssh -t $mach '$rtmp ; e=\$? ; rm -f $rtmp ; exit \$e' 2>&1 | tee -a $lf"; if ($?) { return ('FAILED', 'Job failed'); } else { diff --git a/lib/BEX/Job.pm b/lib/BEX/Job.pm index 7bd7a8f..1b920e4 100644 --- a/lib/BEX/Job.pm +++ b/lib/BEX/Job.pm @@ -10,11 +10,17 @@ use POSIX (); our $job_cnt = 0; +sub check_id($) { + my ($id) = @_; + return $id =~ /^([0-9A-Za-z-]+)$/; +} + sub new($;$) { my ($class, $id) = @_; my $job = { }; bless $job; if (defined $id) { + check_id($id) or die "Invalid job ID"; $job->{'ID'} = $id; } else { $job_cnt++; @@ -42,7 +48,7 @@ sub new_from_file($$;$) { close T; $job->{'Subject'} //= '?'; $job->{'ID'} or die "Cannot load $file: Missing ID"; - $job->{'ID'} !~ /\.[a-z]+$/ or die "Cannot load $file: Invalid ID syntax"; + check_id($job->{'ID'}) or die "Cannot load $file: Invalid ID syntax"; return bless $job; } diff --git a/lib/BEX/Queue.pm b/lib/BEX/Queue.pm index 9b93047..00bb6cb 100644 --- a/lib/BEX/Queue.pm +++ b/lib/BEX/Queue.pm @@ -23,13 +23,26 @@ sub new($;$) { return bless $queue; } +sub log_file($$) { + my ($queue, $machine, $jid) = @_; + return $queue->host_dir($machine) . '/' . $jid . '.log'; +} + # Most actions have to be logged by the caller sub log($$$$;$) { my ($queue, $mach, $jid, $stat, $msg) = @_; - my $fh = $queue->{'LogFH'} //= new IO::File $queue->{'Name'} . '/log', '>>' or die "Cannot open log: $!"; - my $m = join(" ", POSIX::strftime("%Y-%m-%d %H:%M:%S", localtime), $mach, $jid, $stat); + my $t = POSIX::strftime("%Y-%m-%d %H:%M:%S", localtime); + my $m = join(" ", $t, $mach, $jid, $stat); $m .= " $msg" if defined $msg; + + my $fh = $queue->{'LogFH'} //= new IO::File $queue->{'Name'} . '/log', '>>' or die "Cannot open log: $!"; print $fh "$m\n"; + + # Append to the per-job log file + if (open L, '>>', $queue->log_file($mach, $jid)) { + print L "### $m\n"; + close L; + } } sub host_dir($$) { @@ -92,6 +105,7 @@ sub remove($$) { unlink $queue->queue_file($machine, $jid); unlink $queue->status_file($machine, $jid); unlink $queue->temp_file($machine, $jid); + unlink $queue->log_file($machine, $jid); } sub job_metadata($$) { -- 2.39.2