From: Martin Mares Date: Mon, 31 Oct 2011 14:50:04 +0000 (+0100) Subject: Rationalized logging X-Git-Tag: v3.0~35 X-Git-Url: http://mj.ucw.cz/gitweb/?a=commitdiff_plain;h=dce104810522c8394ef547f3098945a58c2b2dd6;p=bex.git Rationalized logging --- diff --git a/NOTES b/NOTES index ee84961..7b40ffd 100644 --- a/NOTES +++ b/NOTES @@ -48,25 +48,26 @@ Message: (Optional) human-readable message explaining the status ### Status codes ### +FAILED Job failed to execute (i.e., it returned a non-zero exit code) +INTERR Internal error of BEX (e.g., failed to read job prolog file) NEW Newly inserted job, which did not run yet NOPING Host does not respond to ping NOXFER Transfer of the job body to a temporary file on the host has failed OK Job finished successfully (this is usually not seen in the queue, since finished jobs are immediately deleted or moved to the history) -FAILED Job failed to execute (i.e., it returned a non-zero exit code) -INTERR Internal error of BEX (e.g., failed to read job prolog file) +PREP Running preparatory commands (i.e., those present in Prep header field) PREPFAIL Preparatory commands failed (i.e., those present in Prep header field) REMOVED Job removed from the queue (behavior similar to OK) +RUN Job is running -These are present only in log files and messages sent over status FIFO: +Additional status codes recorded in the log files: -RUN Job is running -SEND Sending job to the host -PREP Running preparatory commands (i.e., those present in Prep header field) +REQUEUE Attempted to put on a queue, but it already was there -Status codes not tied to a specific job (sent over status FIFO): +Additional status codes sent only over status FIFO: -PING Trying to ping the host +DONE Done with the host (job equals "-") +INIT Host ready, preparing to execute jobs (job equals "-") LOCKED Host or job not available, because it is locked by another brun -INIT Host ready, preparing to execute jobs -DONE Done with the host +PING Trying to ping the host (job equals "-") +SEND Sending job to the host diff --git a/benq b/benq index 76d7948..1f92e94 100755 --- a/benq +++ b/benq @@ -90,9 +90,10 @@ if (defined $requeue_id) { print "New job ", $job->id, "\n"; for my $m (@machines) { if ($queue->enqueue($m, $job)) { - $queue->write_job_status($m, $job->id, { 'Time' => time, 'Status' => 'NEW' }); + $queue->update_job_status($m, $job->id, 'NEW'); print "\t$m\n"; } else { + $queue->log($m, $job->id, 'REQUEUE'); print "\t$m (already queued)\n"; } } diff --git a/bq b/bq index 478b683..6fb7a17 100755 --- a/bq +++ b/bq @@ -117,8 +117,7 @@ sub do_rm() print STDERR "Cannot remove $m:", $queue->job_name($j), ", it is locked\n"; $err = 1; } else { - $queue->log($m, $j, 'REMOVED'); - $queue->write_job_status($m, $j, { 'Time' => time, 'Status' => 'REMOVED' }); + $queue->update_job_status($m, $j, 'REMOVED'); $queue->remove($m, $j); print "Removed $m:", $queue->job_name($j), "\n"; } @@ -141,11 +140,14 @@ sub do_move_to() $err = 1; } else { my $enq = $dest->enqueue($m, $job); - $dest->write_job_status($m, $job->id, { 'Time' => time, 'Status' => 'NEW', 'Message' => 'Moved to this queue' }); - $queue->log($m, $j, 'REMOVED', "Moved to another queue"); - $queue->write_job_status($m, $j, { 'Time' => time, 'Status' => 'REMOVED', 'Message' => 'Moved to another queue' }); + if ($enq) { + $dest->update_job_status($m, $job->id, 'NEW', 'Moved to this queue'); + } else { + $dest->log($m, $job->id, 'REQUEUE', 'Moved to this queue'); + } + $queue->update_job_status($m, $job->id, 'REMOVED', 'Moved from this queue'); $queue->remove($m, $j); - print "Moved $m:", $queue->job_name($j); + print "Moved $m:", $dest->job_name($j); print " (already queued)" if !$enq; print "\n"; } diff --git a/brun b/brun index 42bdf86..387347a 100755 --- a/brun +++ b/brun @@ -38,7 +38,7 @@ sub update_status($$$$;$) { print $status_fd "! $mach $job $status\n"; } if ($log_on_queue) { - $log_on_queue->log($mach, $job, $status, $msg); + $log_on_queue->update_job_status($mach, $job, $status, $msg); } } @@ -172,18 +172,8 @@ for my $mach (@machines) { update_status($mach, $jid, 'LOCKED', undef); next; } - my $stat = { - 'Time' => time, - }; print "### Running ", $job->name, " on $mach ###\n"; my ($s, $msg) = run_job($job, $queue, $mach); - - $stat->{'Status'} = $s; - $stat->{'Message'} = $msg; - $queue->write_job_status($mach, $jid, $stat); - - # Called after writing the status file, so that the front-end watching - # our status FIFO can see the new status file. update_status($mach, $jid, $s, $queue, $msg); if ($s eq 'OK') { diff --git a/lib/BEX/Queue.pm b/lib/BEX/Queue.pm index b8fb6f1..3fb94f3 100644 --- a/lib/BEX/Queue.pm +++ b/lib/BEX/Queue.pm @@ -77,7 +77,6 @@ sub enqueue($$$) { my ($queue, $machine, $job) = @_; my $qf = $queue->queue_file($machine, $job->id); if (-f $qf) { - $queue->log($machine, $job->id, 'REQUEUE'); return 0; } my $fn = $queue->job_file($job->id); @@ -85,7 +84,6 @@ sub enqueue($$$) { my $dir = $queue->host_dir($machine); -d $dir or mkdir $dir or die "Cannot create directory $dir: $!"; symlink '../../jobs/' . $job->id . '.job', $qf or die "Cannot create $qf: $!"; - $queue->log($machine, $job->id, 'QUEUE'); return 1; } @@ -114,6 +112,9 @@ sub remove($$;$) { my $dst = "$d/$jid.$suff"; if (-f $src) { rename $src, $dst or die "Cannot rename $src to $dst: $!"; + } else { + # Might be present from the previous incarnation of the same job + unlink $dst; } } } else { @@ -164,6 +165,17 @@ sub write_job_status($$$$) { rename "$sf.$$", $sf or die "Cannot rename $sf.$$ to $sf: $!"; } +sub update_job_status($$$$;$) { + my ($queue, $machine, $jid, $stat, $msg) = @_; + my $s = { + 'Time' => time, + 'Status' => $stat, + 'Message' => $msg, + }; + $queue->write_job_status($machine, $jid, $s); + $queue->log($machine, $jid, $stat, $msg); +} + sub lock_name($$$) { my ($queue, $machine, $jid) = @_; my $lock = $queue->{'Name'};