2 # Batch EXecutor 3.0 -- Parallel Execution Using Screen
3 # (c) 2011-2012 Martin Mares <mj@ucw.cz>
18 Usage: bex prun [<options>] [[!]<machine-or-class> ...]
21 --debug-children Log stdout and stderr to ./debug.log
22 -p, --parallel=<n> Set limit on the number of jobs run in parallel
23 -q, --queue=<name> Run jobs in the given queue
24 --text Use plain-text user interface instead of curses
30 "q|queue=s" => \$queue_name,
31 "text!" => \$text_mode,
32 "debug-children!" => \$debug_children,
33 "p|parallel=i" => \$BEX::Config::max_parallel_jobs,
35 ) or die "Try `bex prun --help' for more information.\n";
37 system 'tmux', 'has-session';
38 !$? or die "You need to start tmux first.\n";
40 my $queue = BEX::Queue->new($queue_name);
41 my $fifo_name = $queue->{'Path'} . '/status-fifo';
43 mkfifo $fifo_name, 0700 or die "Cannot create $fifo_name: $!";
44 open FIFO, '+<', $fifo_name or die "Cannot open $fifo_name: $!";
46 my $ui = ($text_mode ? BEX::bprun::text->new : BEX::bprun::curses->new);
49 for my $mach (BEX::Config::parse_machine_list(@ARGV ? @ARGV : '*')) {
50 my @jobs = $queue->scan($mach);
52 push @machines, $mach;
53 for (@jobs) { $ui->update($mach, $_, 'READY'); }
57 my $max = $BEX::Config::max_parallel_jobs;
59 while (keys %running || @machines) {
60 if (@machines && keys %running < $max) {
61 my $mach = shift @machines;
62 $ui->update($mach, undef, 'START');
63 my @tm = ('tmux', 'new-window', '-n', $mach, '-d');
64 my $P5LIB = $ENV{"PERL5LIB"} // "";
66 "BEX_HOME='$BEX::Config::home'",
67 "BEX_LIB='$BEX::Config::lib'",
69 "$BEX::Config::lib/bin/bex-run",
70 "--status-fifo=$fifo_name",
71 "--queue=" . $queue->{'Name'},
74 push @cmd, ">debug.log", "2>&1" if $debug_children;
75 push @tm, join(" ", @cmd);
77 !$? or $ui->update($mach, undef, 'INTERR');
78 $running{$mach} = 'START';
83 my ($mach, $jid, $stat) = /^! (\S+) (\S+) (\S+)$/;
85 $ui->err("Received invalid status message <$_>");
88 if (!defined $running{$mach}) {
89 $ui->err("Received status message <$_> for a machine which does not run");
92 $running{$mach} = $stat;
93 $ui->update($mach, ($jid eq '-' ? undef : $jid), $stat);
94 if ($stat eq 'DONE') {
95 delete $running{$mach};
103 package BEX::bprun::text;
113 my ($ui, $mach, $jid, $stat) = @_;
114 print +($mach // '-'), (defined($jid) ? ":$jid" : ""), " $stat\n";
119 print STDERR "ERROR: $msg\n";
122 package BEX::bprun::curses;
138 my %host_last_fail_job;
139 my %host_last_fail_stat;
144 has_colors && COLORS >= 8 && COLOR_PAIRS >= 8 or die "Your terminal is too dumb for me\n";
150 init_pair(1, COLOR_YELLOW, COLOR_BLUE);
151 init_pair(2, COLOR_YELLOW, COLOR_RED);
152 init_pair(3, COLOR_YELLOW, COLOR_BLACK);
153 init_pair(4, COLOR_RED, COLOR_BLACK);
154 init_pair(5, COLOR_BLUE, COLOR_BLACK);
156 $nrows = $C->getmaxy - 2;
157 if ($BEX::Config::max_parallel_jobs > $nrows) {
158 $BEX::Config::max_parallel_jobs = $nrows;
161 %host_state = %host_cnt = ();
162 %job_state = %job_cnt = ();
163 for my $s ('unknown', 'ready', 'running', 'done', 'failed') {
165 $job_cnt{'*'}{$s} = 0;
175 $C->bkgdset(COLOR_PAIR(1) | A_BOLD);
176 $C->addstr($C->getmaxy-1, 0, "Press any key to quit...");
184 $C->bkgdset(COLOR_PAIR(2) | A_BOLD);
185 $C->addnstr($C->getmaxy-1, 0, "ERROR: $msg", $C->getmaxx);
190 sub set_host_status($$$) {
191 my ($ui, $mach, $stat) = @_;
192 my $prev_stat = $host_state{$mach};
193 if (defined $prev_stat) {
194 $host_cnt{$prev_stat}--;
196 for my $s ('unknown', 'ready', 'running', 'done', 'failed') { $job_cnt{$mach}{$s} = 0; }
198 $host_state{$mach} = $stat;
202 sub set_job_status($$$$) {
203 my ($ui, $mach, $jid, $stat) = @_;
204 my $prev_stat = $job_state{$mach}{$jid} // 'unknown';
205 $job_cnt{$mach}{$prev_stat}--;
206 $job_cnt{'*'}{$prev_stat}--;
207 $job_state{$mach}{$jid} = $stat;
208 $job_cnt{$mach}{$stat}++;
209 $job_cnt{'*'}{$stat}++;
212 sub refresh_status($) {
213 $C->bkgdset(COLOR_PAIR(1) | A_BOLD);
215 sprintf("BEX Hosts: %dR %dD %dE %dW Jobs: %dR %dD %dE %dW",
216 $host_cnt{'running'},
220 $job_cnt{'*'}{'running'},
221 $job_cnt{'*'}{'done'},
222 $job_cnt{'*'}{'failed'},
223 $job_cnt{'*'}{'ready'},
232 if (defined ($s = $by_host{$mach})) {
236 for my $i (0..$nrows-1) {
242 } elsif ($r->{'Gone'} && (!$best || $best->{'Gone'} > $r->{'Gone'})) {
248 delete $by_host{$best->{'Host'}};
250 $s->{'Host'} = $mach;
251 $s->{'Row'} = $besti;
252 $by_host{$mach} = $s;
253 $by_row[$besti] = $s;
258 my $gone_counter = 1;
261 $s->{'Gone'} = $gone_counter++;
266 my $mach = $s->{'Host'};
267 my $stat = $s->{'Status'} // "?";
268 my $jid = $s->{'Job'} // "";
269 my $jname = ($jid eq "" ? "" : $queue->job_name($jid));
270 my $jcnt = $job_cnt{$mach};
271 if ($jcnt->{'running'}) {
272 if ($jcnt->{'failed'}) {
273 $C->bkgdset(COLOR_PAIR(4) | A_BOLD);
275 $C->bkgdset(COLOR_PAIR(3) | A_BOLD);
278 if ($jcnt->{'failed'}) {
279 $C->bkgdset(COLOR_PAIR(4));
284 my $r = $s->{'Row'} + 1;
285 $C->addstr($r, 0, sprintf("%-20.20s", $mach));
286 if ($jcnt->{'failed'}) {
287 $C->bkgdset(COLOR_PAIR(4));
288 $C->addstr(sprintf("%3dE ", $jcnt->{'failed'}));
294 $C->addstr(sprintf("%3dD %3dW", $jcnt->{'done'}, $jcnt->{'ready'}));
295 if ($stat eq 'DONE') {
296 if (defined $host_last_fail_stat{$mach}) {
297 $C->bkgdset(($host_last_fail_stat{$mach} eq 'NOPING') ? COLOR_PAIR(5) : COLOR_PAIR(4));
298 $C->addstr(sprintf(" %-8s %s", $host_last_fail_stat{$mach}, $queue->job_name($host_last_fail_job{$mach})));
301 my $text = sprintf(" %-8s %s", $stat, $jname);
309 my ($ui, $mach, $jid, $stat) = @_;
310 my $s = get_slot($mach);
313 # Pseudo-state generated internally
314 $ui->set_host_status($mach, 'ready');
315 $ui->set_job_status($mach, $jid, 'ready');
318 $ui->set_job_status($mach, $jid, 'done');
320 when (['FAILED', 'INTERR', 'NOPING', 'PREPFAIL', 'NOXFER']) {
321 $ui->set_job_status($mach, $jid, 'failed');
322 $host_last_fail_job{$mach} = $jid;
323 $host_last_fail_stat{$mach} = $stat;
326 if ($job_cnt{$mach}{'failed'}) {
327 $ui->set_host_status($mach, 'failed');
329 $ui->set_host_status($mach, 'done');
333 $ui->set_host_status($mach, 'running');
334 $ui->set_job_status($mach, $jid, 'running') if defined $jid;
338 $ui->set_job_status($mach, $jid, 'failed');
340 for my $j (keys %{$job_state{$mach}}) {
341 $ui->set_job_status($mach, $jid, 'failed');
343 $ui->set_host_status($mach, 'failed');
344 $host_last_fail_job{$mach} = $jid;
345 $host_last_fail_stat{$mach} = $stat;
348 when (['START', 'PING', 'SEND', 'RUN']) {
351 $ui->err("Received unknown job status $stat");
355 $s->{'Status'} = $stat;
357 if ($stat eq 'DONE') { delete_slot($s); }