my $queue_name;
my $text_mode;
+my $debug = 0;
my $debug_children;
sub usage() {
Usage: bex prun [<options>] [[!]<machine-or-class> ...]
Options:
- --debug-children Log stdout and stderr to ./debug.log
+ --debug Log status changes to stderr
+ --debug-children Log stdout and stderr of child processes to ./debug.log
-p, --parallel=<n> Set limit on the number of jobs run in parallel
-q, --queue=<name> Run jobs in the given queue
--text Use plain-text user interface instead of curses
GetOptions(
"q|queue=s" => \$queue_name,
"text!" => \$text_mode,
+ "debug+" => \$debug,
"debug-children!" => \$debug_children,
"p|parallel=i" => \$BEX::Config::max_parallel_jobs,
"help" => \&usage,
"--queue=" . $queue->{'Name'},
$mach,
);
- push @cmd, ">debug.log", "2>&1" if $debug_children;
+ push @cmd, ">>debug.log", "2>&1" if $debug_children;
push @tm, join(" ", @cmd);
system @tm;
!$? or $ui->update($mach, undef, 'INTERR');
}
$_ = <FIFO>;
chomp;
+ print STDERR "<< $_\n" if $debug;
my ($mach, $jid, $stat) = /^! (\S+) (\S+) (\S+)$/;
if (!defined $stat) {
$ui->err("Received invalid status message <$_>");
my $C;
my $nrows;
-my @by_row = ();
-my %by_host = ();
+my @by_row;
+my %by_host;
my %host_state;
my %host_cnt;
my %host_last_fail_job;
my %host_last_fail_stat;
+my @states;
+my %state_to_pri;
+
+BEGIN {
+ @by_row = ();
+ %by_host = ();
+ @states = qw(unknown ready running done failed);
+ %state_to_pri = (
+ 'unknown' => 0,
+ 'ready' => 1,
+ 'done' => 2,
+ 'failed' => 3,
+ 'running' => 4,
+ );
+}
+
sub new($) {
$C = new Curses;
start_color;
init_pair(2, COLOR_YELLOW, COLOR_RED);
init_pair(3, COLOR_YELLOW, COLOR_BLACK);
init_pair(4, COLOR_RED, COLOR_BLACK);
+ init_pair(5, COLOR_BLUE, COLOR_BLACK);
$nrows = $C->getmaxy - 2;
if ($BEX::Config::max_parallel_jobs > $nrows) {
%host_state = %host_cnt = ();
%job_state = %job_cnt = ();
- for my $s ('unknown', 'ready', 'running', 'done', 'failed') {
+ for my $s (@states) {
$host_cnt{$s} = 0;
$job_cnt{'*'}{$s} = 0;
}
sub set_host_status($$$) {
my ($ui, $mach, $stat) = @_;
+ print STDERR "H: $mach $stat\n" if $debug;
my $prev_stat = $host_state{$mach};
if (defined $prev_stat) {
$host_cnt{$prev_stat}--;
} else {
- for my $s ('unknown', 'ready', 'running', 'done', 'failed') { $job_cnt{$mach}{$s} = 0; }
+ for my $s (@states) { $job_cnt{$mach}{$s} = 0; }
}
$host_state{$mach} = $stat;
$host_cnt{$stat}++;
sub set_job_status($$$$) {
my ($ui, $mach, $jid, $stat) = @_;
+ print STDERR "J: $mach $jid $stat\n" if $debug;
my $prev_stat = $job_state{$mach}{$jid} // 'unknown';
$job_cnt{$mach}{$prev_stat}--;
$job_cnt{'*'}{$prev_stat}--;
sub get_slot($) {
my ($mach) = @_;
- my $s;
- if (defined ($s = $by_host{$mach})) {
- delete $s->{'Gone'};
- } else {
- my ($best, $besti);
- for my $i (0..$nrows-1) {
- my $r = $by_row[$i];
- if (!defined $r) {
- $besti = $i;
- $best = undef;
- last;
- } elsif ($r->{'Gone'} && (!$best || $best->{'Gone'} > $r->{'Gone'})) {
- $besti = $i;
- $best = $r;
- }
+ my $s = $by_host{$mach};
+ if (!defined $s) {
+ $s = $by_host{$mach} = { 'Host' => $mach };
+ }
+ return $s;
+}
+
+my $place_counter;
+
+sub place_slot($) {
+ my ($s) = @_;
+ $s->{'LastUpdate'} = $place_counter++;
+ return $s if defined $s->{'Row'};
+
+ my $pri = $state_to_pri{$host_state{$s->{'Host'}}};
+ my ($best, $besti);
+ my $bestpri = 99;
+ for my $i (0..$nrows-1) {
+ my $r = $by_row[$i];
+ if (!defined $r) {
+ $besti = $i;
+ $best = undef;
+ last;
+ }
+ my $rpri = $state_to_pri{$host_state{$r->{'Host'}}};
+ print STDERR "I: ... considering ", $r->{'Host'}, " (pri $rpri, lu ", $r->{'LastUpdate'}, ")\n" if $debug > 1;
+ next if $rpri > $pri;
+
+ if ($rpri < $bestpri ||
+ $rpri == $bestpri && $r->{'LastUpdate'} < $best->{'LastUpdate'}) {
+ # Trick: $best must be defined, as otherwise $bestpri == 99
+ $best = $r;
+ $besti = $i;
+ $bestpri = $rpri;
}
+ }
+
+ if (defined $besti) {
if ($best) {
- delete $by_host{$best->{'Host'}};
+ print STDERR "I: Replacing ", $best->{'Host'}, " (pri $bestpri)\n";
+ delete $best->{'Row'};
}
- $s->{'Host'} = $mach;
+ print STDERR "I: Allocated ", $s->{'Host'}, " \@$besti (pri $pri)\n";
$s->{'Row'} = $besti;
- $by_host{$mach} = $s;
$by_row[$besti] = $s;
+ } else {
+ print STDERR "I: No place for ", $s->{'Host'}, " (pri $pri)\n" if $debug;
}
- return $s;
-}
-
-my $gone_counter = 1;
-sub delete_slot($) {
- my ($s) = @_;
- $s->{'Gone'} = $gone_counter++;
}
sub redraw_slot($) {
my ($s) = @_;
+ my $r = $s->{'Row'} // return;
+ $r++;
my $mach = $s->{'Host'};
my $stat = $s->{'Status'} // "?";
my $jid = $s->{'Job'} // "";
$C->bkgdset(0);
}
}
- my $r = $s->{'Row'} + 1;
$C->addstr($r, 0, sprintf("%-20.20s", $mach));
if ($jcnt->{'failed'}) {
$C->bkgdset(COLOR_PAIR(4));
$C->addstr(sprintf("%3dD %3dW", $jcnt->{'done'}, $jcnt->{'ready'}));
if ($stat eq 'DONE') {
if (defined $host_last_fail_stat{$mach}) {
- $C->bkgdset(COLOR_PAIR(4));
+ $C->bkgdset(($host_last_fail_stat{$mach} eq 'NOPING') ? COLOR_PAIR(5) : COLOR_PAIR(4));
$C->addstr(sprintf(" %-8s %s", $host_last_fail_stat{$mach}, $queue->job_name($host_last_fail_job{$mach})));
}
} else {
sub update($$$$) {
my ($ui, $mach, $jid, $stat) = @_;
- my $s = get_slot($mach);
given ($stat) {
when ('READY') {
# Pseudo-state generated internally
$ui->err("Received unknown job status $stat");
}
}
+ my $s = get_slot($mach);
$s->{'Job'} = $jid;
$s->{'Status'} = $stat;
+ place_slot($s);
redraw_slot($s);
- if ($stat eq 'DONE') { delete_slot($s); }
$ui->refresh_status;
}