2 # Batch EXecutor 2.0 -- Parallel Execution Using Screen
3 # (c) 2011 Martin Mares <mj@ucw.cz>
16 my $screen_session = 'BEX';
20 "q|queue=s" => \$queue_name,
21 "session=s" => \$screen_session,
22 "text!" => \$text_mode,
24 Usage: bprun [<options>] [[!]<machine-or-class> ...]
27 -q, --queue=<name> Run jobs in the given queue
28 --session=<name> Job windows should be opened within the given screen
29 session (default: BEX)
30 --text Use textual user interface instead of curses
33 system 'screen', '-S', $screen_session, '-X', 'select', '.';
34 !$? or die "Screen session $screen_session not found\n";
36 my $queue = BEX::Queue->new($queue_name);
39 for my $m (BEX::Config::parse_machine_list(@ARGV ? @ARGV : '*')) {
40 my @jobs = $queue->scan($m);
43 $job_counter{$m} = @jobs;
46 my $fifo_name = $queue->{'Name'} . '/status-fifo';
48 mkfifo $fifo_name, 0700 or die "Cannot create $fifo_name: $!";
49 open FIFO, '+<', $fifo_name or die "Cannot open $fifo_name: $!";
51 my $ui = ($text_mode ? BEX::bprun::text->new : BEX::bprun::curses->new);
54 my $max = $BEX::Config::max_parallel_jobs;
56 while (keys %running || @machines) {
57 if (@machines && keys %running < $max) {
58 my $mach = shift @machines;
59 $ui->update($mach, undef, 'START');
60 my @scr = ('screen', '-t', $mach);
61 push @scr, '-S', $screen_session if defined $screen_session;
62 push @scr, '-X', 'screen', './brun', "--status-fifo=$fifo_name", $mach;
64 !$? or $ui->update($mach, undef, 'INTERR');
65 $running{$mach} = 'START';
70 my ($mach, $jid, $stat) = /^! (\S+) (\S+) (\S+)$/;
72 $ui->err("Received invalid status message <$_>");
75 if (!defined $running{$mach}) {
76 $ui->err("Received status message <$_> for a machine which does not run");
79 $running{$mach} = $stat;
80 $ui->update($mach, ($jid eq '-' ? undef : $jid), $stat);
81 if ($stat eq 'DONE') {
82 delete $running{$mach};
90 package BEX::bprun::text;
100 my ($ui, $mach, $jid, $stat) = @_;
101 print +($mach // '-'), (defined($jid) ? ":$jid" : ""), " $stat\n";
106 print STDERR "ERROR: $msg\n";
109 package BEX::bprun::curses;
129 my %host_active_jobs;
131 my %host_failed_jobs;
132 my %host_last_fail_job;
133 my %host_last_fail_stat;
138 has_colors && COLORS >= 8 && COLOR_PAIRS >= 8 or die "Your terminal is too dumb for me\n";
144 init_pair(1, COLOR_YELLOW, COLOR_BLUE);
145 init_pair(2, COLOR_YELLOW, COLOR_RED);
146 init_pair(3, COLOR_YELLOW, COLOR_BLACK);
147 init_pair(4, COLOR_RED, COLOR_BLACK);
149 $nrows = $C->getmaxy - 2;
150 if ($BEX::Config::max_parallel_jobs > $nrows) {
151 $BEX::Config::max_parallel_jobs = $nrows;
154 $total_hosts = $active_hosts = $done_hosts = $failed_hosts = 0;
155 $total_jobs = $active_jobs = $done_jobs = $failed_jobs = 0;
156 %host_active_jobs = %host_done_jobs = %host_failed_jobs = %host_last_fail_job = %host_last_fail_stat = ();
157 for my $m (@machines) {
159 $total_jobs += $job_counter{$m};
160 $host_active_jobs{$m} = $host_done_jobs{$m} = $host_failed_jobs{$m} = 0;
170 $C->bkgdset(COLOR_PAIR(1) | A_BOLD);
171 $C->addstr($C->getmaxy-1, 0, "Press any key to quit...");
179 $C->bkgdset(COLOR_PAIR(2) | A_BOLD);
180 $C->addnstr($C->getmaxy-1, 0, "ERROR: $msg", $C->getmaxx);
185 sub refresh_status($) {
186 $C->bkgdset(COLOR_PAIR(1) | A_BOLD);
187 my $waiting_hosts = $total_hosts - $active_hosts - $done_hosts - $failed_hosts;
188 my $waiting_jobs = $total_jobs - $active_jobs - $done_jobs - $failed_jobs;
189 $C->addnstr(0, 0, "BEX Hosts: ${active_hosts}R ${done_hosts}D ${failed_hosts}E ${waiting_hosts}W Jobs: ${active_jobs}R ${done_jobs}D ${failed_jobs}E ${waiting_jobs}W", $C->getmaxx);
197 if (defined ($s = $by_host{$mach})) {
201 for my $i (0..$nrows-1) {
207 } elsif ($r->{'Gone'} && (!$best || $best->{'Gone'} > $r->{'Gone'})) {
213 delete $by_host{$best->{'Host'}};
215 $s->{'Host'} = $mach;
216 $s->{'Row'} = $besti;
217 $by_host{$mach} = $s;
218 $by_row[$besti] = $s;
223 my $gone_counter = 1;
226 $s->{'Gone'} = $gone_counter++;
231 my $mach = $s->{'Host'};
232 my $stat = $s->{'Status'} // "?";
233 my $jid = $s->{'Job'} // "";
234 my $jname = ($jid eq "" ? "" : $queue->job_name($jid));
235 if ($host_active_jobs{$mach}) {
236 if ($host_failed_jobs{$mach}) {
237 $C->bkgdset(COLOR_PAIR(4) | A_BOLD);
239 $C->bkgdset(COLOR_PAIR(3) | A_BOLD);
242 if ($host_failed_jobs{$mach}) {
243 $C->bkgdset(COLOR_PAIR(4));
248 my $r = $s->{'Row'} + 1;
249 $C->addstr($r, 0, sprintf("%-20.20s", $mach));
250 if ($host_failed_jobs{$mach}) {
251 $C->bkgdset(COLOR_PAIR(4));
252 $C->addstr(sprintf("%3dE ", $host_failed_jobs{$mach}));
258 $C->addstr(sprintf("%3dD %3dW", $host_done_jobs{$mach}, $job_counter{$mach} - $host_done_jobs{$mach} - $host_failed_jobs{$mach}));
259 if ($stat eq 'DONE') {
260 if (defined $host_last_fail_stat{$mach}) {
261 $C->bkgdset(COLOR_PAIR(4));
262 $C->addstr(sprintf(" %-8s %s", $host_last_fail_stat{$mach}, $host_last_fail_job{$mach}));
265 my $text = sprintf(" %-8s %s", $stat, $jname);
273 my ($ui, $mach, $jid, $stat) = @_;
274 my $s = get_slot($mach);
279 $host_active_jobs{$mach}--;
280 $host_done_jobs{$mach}++;
282 when (['FAILED', 'INTERR', 'NOPING', 'PREPFAIL']) {
285 $host_active_jobs{$mach}--;
286 $host_failed_jobs{$mach}++;
287 $host_last_fail_job{$mach} = $jid;
288 $host_last_fail_stat{$mach} = $stat;
292 if ($host_failed_jobs{$mach}) {
303 $host_active_jobs{$mach}++;
308 $failed_jobs += $job_counter{$mach};
309 $host_failed_jobs{$mach} += $job_counter{$mach};
313 $host_active_jobs{$mach}--;
314 $host_failed_jobs{$mach}++;
315 $host_last_fail_job{$mach} = $jid;
316 $host_last_fail_stat{$mach} = $stat;
319 when (['START', 'PING', 'SEND', 'RUN']) {
322 $ui->err("Received unknown job status $stat");
326 $s->{'Status'} = $stat;
328 if ($stat eq 'DONE') { delete_slot($s); }