]> mj.ucw.cz Git - bex.git/commitdiff
bprun: Replaced magical counters by real accounting of jobs
authorMartin Mares <mj@ucw.cz>
Wed, 9 Nov 2011 22:03:11 +0000 (23:03 +0100)
committerMartin Mares <mj@ucw.cz>
Wed, 9 Nov 2011 22:03:11 +0000 (23:03 +0100)
bprun

diff --git a/bprun b/bprun
index 0a9253350cc07a9ecc21fda3a6241a6726f8ffa7..7a66f0b2fff8f57b1dab40bf366ca6faeb536832 100755 (executable)
--- a/bprun
+++ b/bprun
@@ -34,15 +34,6 @@ system 'screen', '-S', $screen_session, '-X', 'select', '.';
 !$? or die "Screen session $screen_session not found\n";
 
 my $queue = BEX::Queue->new($queue_name);
-my @machines = ();
-my %job_counter = ();
-for my $m (BEX::Config::parse_machine_list(@ARGV ? @ARGV : '*')) {
-       my @jobs = $queue->scan($m);
-       @jobs or next;
-       push @machines, $m;
-       $job_counter{$m} = @jobs;
-}
-
 my $fifo_name = $queue->{'Name'} . '/status-fifo';
 unlink $fifo_name;
 mkfifo $fifo_name, 0700 or die "Cannot create $fifo_name: $!";
@@ -50,6 +41,14 @@ open FIFO, '+<', $fifo_name or die "Cannot open $fifo_name: $!";
 
 my $ui = ($text_mode ? BEX::bprun::text->new : BEX::bprun::curses->new);
 
+my @machines = ();
+for my $mach (BEX::Config::parse_machine_list(@ARGV ? @ARGV : '*')) {
+       my @jobs = $queue->scan($mach);
+       @jobs or next;
+       push @machines, $mach;
+       for (@jobs) { $ui->update($mach, $_, 'READY'); }
+}
+
 my %running = ();
 my $max = $BEX::Config::max_parallel_jobs;
 
@@ -116,19 +115,12 @@ my $nrows;
 my @by_row = ();
 my %by_host = ();
 
-my $total_hosts;
-my $active_hosts;
-my $done_hosts;
-my $failed_hosts;
+my %host_state;
+my %host_cnt;
 
-my $total_jobs;
-my $active_jobs;
-my $done_jobs;
-my $failed_jobs;
+my %job_state;
+my %job_cnt;
 
-my %host_active_jobs;
-my %host_done_jobs;
-my %host_failed_jobs;
 my %host_last_fail_job;
 my %host_last_fail_stat;
 
@@ -151,13 +143,11 @@ sub new($) {
                $BEX::Config::max_parallel_jobs = $nrows;
        }
 
-       $total_hosts = $active_hosts = $done_hosts = $failed_hosts = 0;
-       $total_jobs = $active_jobs = $done_jobs = $failed_jobs = 0;
-       %host_active_jobs = %host_done_jobs = %host_failed_jobs = %host_last_fail_job = %host_last_fail_stat = ();
-       for my $m (@machines) {
-               $total_hosts++;
-               $total_jobs += $job_counter{$m};
-               $host_active_jobs{$m} = $host_done_jobs{$m} = $host_failed_jobs{$m} = 0;
+       %host_state = %host_cnt = ();
+       %job_state = %job_cnt = ();
+       for my $s ('unknown', 'ready', 'running', 'done', 'failed') {
+               $host_cnt{$s} = 0;
+               $job_cnt{'*'}{$s} = 0;
        }
 
        my $ui = bless {};
@@ -182,11 +172,41 @@ sub err($$) {
        $C->refresh;
 }
 
+sub set_host_status($$$) {
+       my ($ui, $mach, $stat) = @_;
+       my $prev_stat = $host_state{$mach};
+       if (defined $prev_stat) {
+               $host_cnt{$prev_stat}--;
+       } else {
+               for my $s ('unknown', 'ready', 'running', 'done', 'failed') { $job_cnt{$mach}{$s} = 0; }
+       }
+       $host_state{$mach} = $stat;
+       $host_cnt{$stat}++;
+}
+
+sub set_job_status($$$$) {
+       my ($ui, $mach, $jid, $stat) = @_;
+       my $prev_stat = $job_state{$mach}{$jid} // 'unknown';
+       $job_cnt{$mach}{$prev_stat}--;
+       $job_cnt{'*'}{$prev_stat}--;
+       $job_state{$mach}{$jid} = $stat;
+       $job_cnt{$mach}{$stat}++;
+       $job_cnt{'*'}{$stat}++;
+}
+
 sub refresh_status($) {
        $C->bkgdset(COLOR_PAIR(1) | A_BOLD);
-       my $waiting_hosts = $total_hosts - $active_hosts - $done_hosts - $failed_hosts;
-       my $waiting_jobs = $total_jobs - $active_jobs - $done_jobs - $failed_jobs;
-       $C->addnstr(0, 0, "BEX  Hosts: ${active_hosts}R ${done_hosts}D ${failed_hosts}E ${waiting_hosts}W  Jobs: ${active_jobs}R ${done_jobs}D ${failed_jobs}E ${waiting_jobs}W", $C->getmaxx);
+       $C->addnstr(0, 0,
+               sprintf("BEX  Hosts: %dR %dD %dE %dW  Jobs: %dR %dD %dE %dW",
+                       $host_cnt{'running'},
+                       $host_cnt{'done'},
+                       $host_cnt{'failed'},
+                       $host_cnt{'ready'},
+                       $job_cnt{'*'}{'running'},
+                       $job_cnt{'*'}{'done'},
+                       $job_cnt{'*'}{'failed'},
+                       $job_cnt{'*'}{'ready'},
+               ), $C->getmaxx);
        $C->clrtoeol;
        $C->refresh;
 }
@@ -232,14 +252,15 @@ sub redraw_slot($) {
        my $stat = $s->{'Status'} // "?";
        my $jid = $s->{'Job'} // "";
        my $jname = ($jid eq "" ? "" : $queue->job_name($jid));
-       if ($host_active_jobs{$mach}) {
-               if ($host_failed_jobs{$mach}) {
+       my $jcnt = $job_cnt{$mach};
+       if ($jcnt->{'running'}) {
+               if ($jcnt->{'failed'}) {
                        $C->bkgdset(COLOR_PAIR(4) | A_BOLD);
                } else {
                        $C->bkgdset(COLOR_PAIR(3) | A_BOLD);
                }
        } else {
-               if ($host_failed_jobs{$mach}) {
+               if ($jcnt->{'failed'}) {
                        $C->bkgdset(COLOR_PAIR(4));
                } else {
                        $C->bkgdset(0);
@@ -247,19 +268,19 @@ sub redraw_slot($) {
        }
        my $r = $s->{'Row'} + 1;
        $C->addstr($r, 0, sprintf("%-20.20s", $mach));
-       if ($host_failed_jobs{$mach}) {
+       if ($jcnt->{'failed'}) {
                $C->bkgdset(COLOR_PAIR(4));
-               $C->addstr(sprintf("%3dE ", $host_failed_jobs{$mach}));
+               $C->addstr(sprintf("%3dE ", $jcnt->{'failed'}));
        } else {
                $C->bkgdset(0);
                $C->addstr("     ");
        }
        $C->bkgdset(0);
-       $C->addstr(sprintf("%3dD %3dW", $host_done_jobs{$mach}, $job_counter{$mach} - $host_done_jobs{$mach} - $host_failed_jobs{$mach}));
+       $C->addstr(sprintf("%3dD %3dW", $jcnt->{'done'}, $jcnt->{'ready'}));
        if ($stat eq 'DONE') {
                if (defined $host_last_fail_stat{$mach}) {
                        $C->bkgdset(COLOR_PAIR(4));
-                       $C->addstr(sprintf("  %-8s %s", $host_last_fail_stat{$mach}, $host_last_fail_job{$mach}));
+                       $C->addstr(sprintf("  %-8s %s", $host_last_fail_stat{$mach}, $queue->job_name($host_last_fail_job{$mach})));
                }
        } else {
                my $text = sprintf("  %-8s %s", $stat, $jname);
@@ -273,45 +294,38 @@ sub update($$$$) {
        my ($ui, $mach, $jid, $stat) = @_;
        my $s = get_slot($mach);
        given ($stat) {
+               when ('READY') {
+                       # Pseudo-state generated internally
+                       $ui->set_host_status($mach, 'ready');
+                       $ui->set_job_status($mach, $jid, 'ready');
+               }
                when ('OK') {
-                       $active_jobs--;
-                       $done_jobs++;
-                       $host_active_jobs{$mach}--;
-                       $host_done_jobs{$mach}++;
+                       $ui->set_job_status($mach, $jid, 'done');
                }
                when (['FAILED', 'INTERR', 'NOPING', 'PREPFAIL']) {
-                       $active_jobs--;
-                       $failed_jobs++;
-                       $host_active_jobs{$mach}--;
-                       $host_failed_jobs{$mach}++;
+                       $ui->set_job_status($mach, $jid, 'failed');
                        $host_last_fail_job{$mach} = $jid;
                        $host_last_fail_stat{$mach} = $stat;
                }
                when ('DONE') {
-                       $active_hosts--;
-                       if ($host_failed_jobs{$mach}) {
-                               $failed_hosts++;
+                       if ($job_cnt{$mach}{'failed'}) {
+                               $ui->set_host_status($mach, 'failed');
                        } else {
-                               $done_hosts++;
+                               $ui->set_host_status($mach, 'done');
                        }
                }
                when ('INIT') {
-                       if (defined $jid) {
-                               $active_hosts++;
-                       } else {
-                               $active_jobs++;
-                               $host_active_jobs{$mach}++;
-                       }
+                       $ui->set_host_status($mach, 'running');
+                       $ui->set_job_status($mach, $jid, 'running') if defined $jid;
                }
                when ('LOCKED') {
-                       if ($jid eq '-') {
-                               $failed_jobs += $job_counter{$mach};
-                               $host_failed_jobs{$mach} += $job_counter{$mach};
+                       if (defined $jid) {
+                               $ui->set_job_status($mach, $jid, 'failed');
                        } else {
-                               $active_jobs--;
-                               $failed_jobs++;
-                               $host_active_jobs{$mach}--;
-                               $host_failed_jobs{$mach}++;
+                               for my $j (keys %{$job_state{$mach}}) {
+                                       $ui->set_job_status($mach, $jid, 'failed');
+                               }
+                               $ui->set_host_status($mach, 'failed');
                                $host_last_fail_job{$mach} = $jid;
                                $host_last_fail_stat{$mach} = $stat;
                        }