X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=brun;h=040b6ba40d66d6abc9f83bd8b00401062a76e069;hb=b7a75186ed33df2a064f71efbc36cbf9b5903af7;hp=324f23f6dd58c8dae0042197427503a63af3b098;hpb=acb510407eae97c8428cd40dca30fb4dacd726a7;p=bex.git diff --git a/brun b/brun index 324f23f..040b6ba 100755 --- a/brun +++ b/brun @@ -38,23 +38,68 @@ sub update_status($$$$;$) { print $status_fd "! $mach $job $status\n"; } if ($log_on_queue) { - $log_on_queue->log($mach, $job, $status, $msg); + $log_on_queue->update_job_status($mach, $job, $status, $msg); } } +my %pings; + sub ping_machine($) { my ($mach) = @_; - return 1 unless $BEX::Config::ping_hosts; - update_status($mach, '-', 'PING', undef); - my $host = BEX::Config::host_name($mach); - `ping -c1 -n $host >/dev/null 2>/dev/null`; - return !$?; + if (!defined $pings{$mach}) { + if ($BEX::Config::ping_hosts) { + update_status($mach, '-', 'PING', undef); + my $host = BEX::Config::host_name($mach); + `ping -c1 -n $host >/dev/null 2>/dev/null`; + $pings{$mach} = !$?; + } else { + $pings{$mach} = 1; + } + } + if ($pings{$mach}) { + return ('OK', undef); + } else { + return ('NOPING', 'Does not ping'); + } } -sub run_job($$$) { +sub exit_status($) { + my ($s) = @_; + if ($s >> 8) { + return "with exit code " . ($s >> 8); + } else { + return "on fatal signal " . ($s & 127); + } +} + +sub run_job_prep($$$) { + my ($job, $queue, $mach) = @_; + my $prep = $job->attr('Prep'); + defined($prep) && $prep !~ /^\s*$/ or return 'OK'; + + my $jid = $job->id; + update_status($mach, $jid, 'PREP', $queue); + my $lf = $queue->log_file($mach, $jid); + $ENV{'HOST'} = BEX::Config::host_name($mach); + system 'bash', '-o', 'pipefail', '-c', "( $prep ) 2>&1 | tee -a $lf"; + delete $ENV{'HOST'}; + if ($?) { + return ('PREPFAIL', 'Preparatory command failed ' . exit_status($?)); + } else { + return 'OK'; + } +} + +sub run_job_body($$$) { my ($job, $queue, $mach) = @_; - my $jid = $job->{'ID'}; + + if ($job->attr('body') =~ /^\s*$/s) { + # Shortcut if the body is empty + return 'OK' + } + my $host = BEX::Config::host_name($mach); + my $jid = $job->id; my $tmp = $queue->temp_file($mach, $jid); open T, '>', $tmp or die; @@ -66,7 +111,7 @@ sub run_job($$$) { print T "#!/bin/sh\n"; } print T "# BEX job ", $jid, "\n"; - print T $job->{'body'}; + print T $job->attr('body'); if (defined $BEX::Config::job_epilog) { open E, $BEX::Config::job_epilog or return ('INTERR', "Cannot open epilog: $!"); while () { print T; } @@ -76,20 +121,33 @@ sub run_job($$$) { update_status($mach, $jid, 'SEND', undef); my $cmd = 't=$(mktemp -t bex-XXXXXXXX) && cat >$t && chmod u+x $t && echo $t'; - my $rtmp = `ssh <$tmp $host '$cmd'`; + my $rtmp = `$BEX::Config::ssh_command <$tmp $host '$cmd'`; !$? && defined($rtmp) && $rtmp ne '' or return ('NOXFER', 'Transfer failed'); chomp $rtmp; update_status($mach, $jid, 'RUN', $queue); my $lf = $queue->log_file($mach, $jid); - system 'bash', '-o', 'pipefail', '-c', "ssh -t $host '$rtmp ; e=\$? ; rm -f $rtmp ; exit \$e' 2>&1 | tee -a $lf"; + system 'bash', '-o', 'pipefail', '-c', "$BEX::Config::ssh_command $host '$rtmp ; e=\$? ; rm -f $rtmp ; exit \$e' 2>&1 | tee -a $lf"; if ($?) { - return ('FAILED', 'Job failed'); + return ('FAILED', 'Job failed ' . exit_status($?)); } else { - return ('OK', undef); + return 'OK'; } } +sub run_job($$$) { + my ($job, $queue, $mach) = @_; + my ($stat, $msg); + + ($stat, $msg) = ping_machine($mach); + $stat eq 'OK' or return ($stat, $msg); + + ($stat, $msg) = run_job_prep($job, $queue, $mach); + $stat eq 'OK' or return ($stat, $msg); + + return run_job_body($job, $queue, $mach); +} + my @machines = BEX::Config::parse_machine_list(@ARGV ? @ARGV : '*'); my $queue = BEX::Queue->new($queue_name); @@ -104,35 +162,19 @@ for my $mach (@machines) { next; } update_status($mach, '-', 'INIT', undef); - my $ping; while (my $jid = shift @q) { if (defined $given_job) { $jid eq $given_job or next; } my $job = BEX::Job->new_from_file($queue->job_file($jid)); + update_status($mach, $jid, 'INIT', undef); if (!$queue->lock($mach, $jid)) { print "### Skipping locked $jid on $mach ###\n"; update_status($mach, $jid, 'LOCKED', undef); next; } - my $stat = { - 'Time' => time, - }; - print "### Running $jid (", $job->attr('Subject'), ") on $mach ###\n"; - $ping //= ping_machine($mach); - my ($s, $msg); - if (!$ping) { - ($s, $msg) = ('NOPING', 'Does not ping'); - } else { - ($s, $msg) = run_job($job, $queue, $mach); - } - - $stat->{'Status'} = $s; - $stat->{'Message'} = $msg; - $queue->write_job_status($mach, $jid, $stat); - - # Called after writing the status file, so that the front-end watching - # our status FIFO can see the new status file. + print "### Running ", $job->name, " on $mach ###\n"; + my ($s, $msg) = run_job($job, $queue, $mach); update_status($mach, $jid, $s, $queue, $msg); if ($s eq 'OK') { @@ -146,6 +188,7 @@ for my $mach (@machines) { } } } +} continue { update_status($mach, '-', 'DONE', undef); } $queue->unlock;