From a3f95cc0e2fb3c359fa989f58691ad44b39173eb Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Mon, 31 Oct 2011 14:37:58 +0100 Subject: [PATCH] Implemented preparatory commands --- NOTES | 5 ++++ TODO | 1 - brun | 87 ++++++++++++++++++++++++++++++++++++++++++++++------------- 3 files changed, 74 insertions(+), 19 deletions(-) diff --git a/NOTES b/NOTES index 561abd6..9f3dc17 100644 --- a/NOTES +++ b/NOTES @@ -32,6 +32,9 @@ Known header fields: ID: Identifier of the job, unique in the scope of a queue Subject: Subject to be displayed to the user +Prep: Run in a shell before the job body is executed; + $HOST contains the name of the host. This is useful for + example if you want to transfer data to the host by rsync. ### Status files ### @@ -52,11 +55,13 @@ OK Job finished successfully (this is usually not seen in the queue, since finished jobs are immediately deleted or moved to the history) FAILED Job failed to execute (i.e., it returned a non-zero exit code) INTERR Internal error of BEX (e.g., failed to read job prolog file) +PREPFAIL Preparatory commands failed (i.e., those present in Prep header field) These are present only in log files and messages sent over status FIFO: RUN Job is running SEND Sending job to the host +PREP Running preparatory commands (i.e., those present in Prep header field) Status codes not tied to a specific job (sent over status FIFO): diff --git a/TODO b/TODO index b2a374d..b7badfd 100644 --- a/TODO +++ b/TODO @@ -1,7 +1,6 @@ - bprun: option for setting max # of running jobs - bprun --job - bprun --curses -- rsync, rsync-only - Terminology: machine vs. host - ssh options - Detector of orphans (unused queue dirs, jobs on non-existent machines, non-queued jobs) diff --git a/brun b/brun index 324f23f..3e46d47 100755 --- a/brun +++ b/brun @@ -42,19 +42,64 @@ sub update_status($$$$;$) { } } +my %pings; + sub ping_machine($) { my ($mach) = @_; - return 1 unless $BEX::Config::ping_hosts; - update_status($mach, '-', 'PING', undef); - my $host = BEX::Config::host_name($mach); - `ping -c1 -n $host >/dev/null 2>/dev/null`; - return !$?; + if (!defined $pings{$mach}) { + if ($BEX::Config::ping_hosts) { + update_status($mach, '-', 'PING', undef); + my $host = BEX::Config::host_name($mach); + `ping -c1 -n $host >/dev/null 2>/dev/null`; + $pings{$mach} = !$?; + } else { + $pings{$mach} = 1; + } + } + if ($pings{$mach}) { + return ('OK', undef); + } else { + return ('NOPING', 'Does not ping'); + } } -sub run_job($$$) { +sub exit_status($) { + my ($s) = @_; + if ($s >> 8) { + return "with exit code " . ($s >> 8); + } else { + return "on fatal signal " . ($s & 127); + } +} + +sub run_job_prep($$$) { + my ($job, $queue, $mach) = @_; + my $prep = $job->attr('Prep'); + defined($prep) && $prep !~ /^\s*$/ or return 'OK'; + + my $jid = $job->id; + update_status($mach, $jid, 'PREP', $queue); + my $lf = $queue->log_file($mach, $jid); + $ENV{'HOST'} = BEX::Config::host_name($mach); + system 'bash', '-o', 'pipefail', '-c', "( $prep ) 2>&1 | tee -a $lf"; + delete $ENV{'HOST'}; + if ($?) { + return ('PREPFAIL', 'Preparatory command failed ' . exit_status($?)); + } else { + return 'OK'; + } +} + +sub run_job_body($$$) { my ($job, $queue, $mach) = @_; - my $jid = $job->{'ID'}; + + if ($job->attr('body') =~ /^\s*$/s) { + # Shortcut if the body is empty + return 'OK' + } + my $host = BEX::Config::host_name($mach); + my $jid = $job->id; my $tmp = $queue->temp_file($mach, $jid); open T, '>', $tmp or die; @@ -66,7 +111,7 @@ sub run_job($$$) { print T "#!/bin/sh\n"; } print T "# BEX job ", $jid, "\n"; - print T $job->{'body'}; + print T $job->attr('body'); if (defined $BEX::Config::job_epilog) { open E, $BEX::Config::job_epilog or return ('INTERR', "Cannot open epilog: $!"); while () { print T; } @@ -84,12 +129,25 @@ sub run_job($$$) { my $lf = $queue->log_file($mach, $jid); system 'bash', '-o', 'pipefail', '-c', "ssh -t $host '$rtmp ; e=\$? ; rm -f $rtmp ; exit \$e' 2>&1 | tee -a $lf"; if ($?) { - return ('FAILED', 'Job failed'); + return ('FAILED', 'Job failed ' . exit_status($?)); } else { - return ('OK', undef); + return 'OK'; } } +sub run_job($$$) { + my ($job, $queue, $mach) = @_; + my ($stat, $msg); + + ($stat, $msg) = ping_machine($mach); + $stat eq 'OK' or return ($stat, $msg); + + ($stat, $msg) = run_job_prep($job, $queue, $mach); + $stat eq 'OK' or return ($stat, $msg); + + return run_job_body($job, $queue, $mach); +} + my @machines = BEX::Config::parse_machine_list(@ARGV ? @ARGV : '*'); my $queue = BEX::Queue->new($queue_name); @@ -104,7 +162,6 @@ for my $mach (@machines) { next; } update_status($mach, '-', 'INIT', undef); - my $ping; while (my $jid = shift @q) { if (defined $given_job) { $jid eq $given_job or next; @@ -119,13 +176,7 @@ for my $mach (@machines) { 'Time' => time, }; print "### Running $jid (", $job->attr('Subject'), ") on $mach ###\n"; - $ping //= ping_machine($mach); - my ($s, $msg); - if (!$ping) { - ($s, $msg) = ('NOPING', 'Does not ping'); - } else { - ($s, $msg) = run_job($job, $queue, $mach); - } + my ($s, $msg) = run_job($job, $queue, $mach); $stat->{'Status'} = $s; $stat->{'Message'} = $msg; -- 2.39.2