]> mj.ucw.cz Git - bex.git/commitdiff
Rationalized logging
authorMartin Mares <mj@ucw.cz>
Mon, 31 Oct 2011 14:50:04 +0000 (15:50 +0100)
committerMartin Mares <mj@ucw.cz>
Mon, 31 Oct 2011 14:50:04 +0000 (15:50 +0100)
NOTES
benq
bq
brun
lib/BEX/Queue.pm

diff --git a/NOTES b/NOTES
index ee84961285689704ea2b066a2d975891c962e6be..7b40ffd95260661b6785893fe6f203428cdb1e0c 100644 (file)
--- a/NOTES
+++ b/NOTES
@@ -48,25 +48,26 @@ Message: <msg>                      (Optional) human-readable message explaining the status
 
 ### Status codes ###
 
+FAILED         Job failed to execute (i.e., it returned a non-zero exit code)
+INTERR         Internal error of BEX (e.g., failed to read job prolog file)
 NEW            Newly inserted job, which did not run yet
 NOPING         Host does not respond to ping
 NOXFER         Transfer of the job body to a temporary file on the host has failed
 OK             Job finished successfully (this is usually not seen in the queue, since
                finished jobs are immediately deleted or moved to the history)
-FAILED         Job failed to execute (i.e., it returned a non-zero exit code)
-INTERR         Internal error of BEX (e.g., failed to read job prolog file)
+PREP           Running preparatory commands (i.e., those present in Prep header field)
 PREPFAIL       Preparatory commands failed (i.e., those present in Prep header field)
 REMOVED                Job removed from the queue (behavior similar to OK)
+RUN            Job is running
 
-These are present only in log files and messages sent over status FIFO:
+Additional status codes recorded in the log files:
 
-RUN            Job is running
-SEND           Sending job to the host
-PREP           Running preparatory commands (i.e., those present in Prep header field)
+REQUEUE                Attempted to put on a queue, but it already was there
 
-Status codes not tied to a specific job (sent over status FIFO):
+Additional status codes sent only over status FIFO:
 
-PING           Trying to ping the host
+DONE           Done with the host (job equals "-")
+INIT           Host ready, preparing to execute jobs (job equals "-")
 LOCKED         Host or job not available, because it is locked by another brun
-INIT           Host ready, preparing to execute jobs
-DONE           Done with the host
+PING           Trying to ping the host (job equals "-")
+SEND           Sending job to the host
diff --git a/benq b/benq
index 76d79489a3f26ff178f802e9ad140e75a151937f..1f92e947972ab616ee67ba9734b8dd333c630cf9 100755 (executable)
--- a/benq
+++ b/benq
@@ -90,9 +90,10 @@ if (defined $requeue_id) {
 print "New job ", $job->id, "\n";
 for my $m (@machines) {
        if ($queue->enqueue($m, $job)) {
-               $queue->write_job_status($m, $job->id, { 'Time' => time, 'Status' => 'NEW' });
+               $queue->update_job_status($m, $job->id, 'NEW');
                print "\t$m\n";
        } else {
+               $queue->log($m, $job->id, 'REQUEUE');
                print "\t$m (already queued)\n";
        }
 }
diff --git a/bq b/bq
index 478b68384218a52232fcc84b9e9cea4bbc71d511..6fb7a1733c2b94bcd50ed0a99cee875c246d3949 100755 (executable)
--- a/bq
+++ b/bq
@@ -117,8 +117,7 @@ sub do_rm()
                                print STDERR "Cannot remove $m:", $queue->job_name($j), ", it is locked\n";
                                $err = 1;
                        } else {
-                               $queue->log($m, $j, 'REMOVED');
-                               $queue->write_job_status($m, $j, { 'Time' => time, 'Status' => 'REMOVED' });
+                               $queue->update_job_status($m, $j, 'REMOVED');
                                $queue->remove($m, $j);
                                print "Removed $m:", $queue->job_name($j), "\n";
                        }
@@ -141,11 +140,14 @@ sub do_move_to()
                                $err = 1;
                        } else {
                                my $enq = $dest->enqueue($m, $job);
-                               $dest->write_job_status($m, $job->id, { 'Time' => time, 'Status' => 'NEW', 'Message' => 'Moved to this queue' });
-                               $queue->log($m, $j, 'REMOVED', "Moved to another queue");
-                               $queue->write_job_status($m, $j, { 'Time' => time, 'Status' => 'REMOVED', 'Message' => 'Moved to another queue' });
+                               if ($enq) {
+                                       $dest->update_job_status($m, $job->id, 'NEW', 'Moved to this queue');
+                               } else {
+                                       $dest->log($m, $job->id, 'REQUEUE', 'Moved to this queue');
+                               }
+                               $queue->update_job_status($m, $job->id, 'REMOVED', 'Moved from this queue');
                                $queue->remove($m, $j);
-                               print "Moved $m:", $queue->job_name($j);
+                               print "Moved $m:", $dest->job_name($j);
                                print " (already queued)" if !$enq;
                                print "\n";
                        }
diff --git a/brun b/brun
index 42bdf86b4e07b984632eaaca1de53417abb6040d..387347a9b0c5aed9f5b168764e9821271fe51dc7 100755 (executable)
--- a/brun
+++ b/brun
@@ -38,7 +38,7 @@ sub update_status($$$$;$) {
                print $status_fd "! $mach $job $status\n";
        }
        if ($log_on_queue) {
-               $log_on_queue->log($mach, $job, $status, $msg);
+               $log_on_queue->update_job_status($mach, $job, $status, $msg);
        }
 }
 
@@ -172,18 +172,8 @@ for my $mach (@machines) {
                        update_status($mach, $jid, 'LOCKED', undef);
                        next;
                }
-               my $stat = {
-                       'Time' => time,
-               };
                print "### Running ", $job->name, " on $mach ###\n";
                my ($s, $msg) = run_job($job, $queue, $mach);
-
-               $stat->{'Status'} = $s;
-               $stat->{'Message'} = $msg;
-               $queue->write_job_status($mach, $jid, $stat);
-
-               # Called after writing the status file, so that the front-end watching
-               # our status FIFO can see the new status file.
                update_status($mach, $jid, $s, $queue, $msg);
 
                if ($s eq 'OK') {
index b8fb6f166d391a1ec1539ead112fe993fc9b144e..3fb94f358d0379c487a1d055ffeeefadc9ba1850 100644 (file)
@@ -77,7 +77,6 @@ sub enqueue($$$) {
        my ($queue, $machine, $job) = @_;
        my $qf = $queue->queue_file($machine, $job->id);
        if (-f $qf) {
-               $queue->log($machine, $job->id, 'REQUEUE');
                return 0;
        }
        my $fn = $queue->job_file($job->id);
@@ -85,7 +84,6 @@ sub enqueue($$$) {
        my $dir = $queue->host_dir($machine);
        -d $dir or mkdir $dir or die "Cannot create directory $dir: $!";
        symlink '../../jobs/' . $job->id . '.job', $qf or die "Cannot create $qf: $!";
-       $queue->log($machine, $job->id, 'QUEUE');
        return 1;
 }
 
@@ -114,6 +112,9 @@ sub remove($$;$) {
                        my $dst = "$d/$jid.$suff";
                        if (-f $src) {
                                rename $src, $dst or die "Cannot rename $src to $dst: $!";
+                       } else {
+                               # Might be present from the previous incarnation of the same job
+                               unlink $dst;
                        }
                }
        } else {
@@ -164,6 +165,17 @@ sub write_job_status($$$$) {
        rename "$sf.$$", $sf or die "Cannot rename $sf.$$ to $sf: $!";
 }
 
+sub update_job_status($$$$;$) {
+       my ($queue, $machine, $jid, $stat, $msg) = @_;
+       my $s = {
+               'Time' => time,
+               'Status' => $stat,
+               'Message' => $msg,
+       };
+       $queue->write_job_status($machine, $jid, $s);
+       $queue->log($machine, $jid, $stat, $msg);
+}
+
 sub lock_name($$$) {
        my ($queue, $machine, $jid) = @_;
        my $lock = $queue->{'Name'};