From e6e7b89be520726f540b9b0b61793d5393b26b21 Mon Sep 17 00:00:00 2001 From: Martin Mares Date: Mon, 31 Oct 2011 10:57:35 +0100 Subject: [PATCH] Bits of documentation --- NOTES | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ TODO | 11 +++++++++++ brun | 3 --- 3 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 NOTES create mode 100644 TODO diff --git a/NOTES b/NOTES new file mode 100644 index 0000000..1412bf7 --- /dev/null +++ b/NOTES @@ -0,0 +1,48 @@ +### Structure of queue directories ### + +/hosts// Jobs queued for the given host + (they are executed in the lexicographic order of s) + /.job Symlink to /jobs/.job + /.stat (Optional) status of the job + /.tmp Used temporarily by brun to store the script actually + sent to the host (can be inspected if something goes wrong) + +/jobs/.job All jobs issued on this queue, including those which + are no longer queued for any machine + +/log Log of actions on this queue. Lines look this way: + YYYY-MM-DD HH:MM:SS [] + and correspond to "Status" and "Message" + in status files. + +/status-fifo FIFO used for reporting status of subprocesses by `bprun' + +### Job files ### + +Mail-like structure. First come the headers (:), keywords are +case-sensitive, no multi-line fields allowed, then an empty line and then the body +(i.e., commands to be executed on the remote host). + +Known header fields: + +ID: Identifier of the job, unique in the scope of a queue +Subject: Subject to be displayed to the user + +### Status files ### + +Structure identical to job headers, but they do not contain a body. + +Known fields: + +Time: UNIX timestamp of the last status change +Status: Machine-readable status of the job: + NOPING - host does not respond to ping + NOXFER - transfer of the job body to a temporary file + on the host has failed + OK - job executed successfully (however, the job will + be removed from the queue immediately, so you are + not likely to see this code) + FAILED - job failed to execute (i.e., it returned + a non-zero exit code) + INTERR - internal error of BEX +Message: (Optional) human-readable message explaining the status diff --git a/TODO b/TODO new file mode 100644 index 0000000..63d6f1d --- /dev/null +++ b/TODO @@ -0,0 +1,11 @@ +- benq: options for specifying subject and other params +- benq: take job from file +- benq: requeue job +- bprun: option for setting max # of running jobs +- bprun --job +- bprun --curses +- Locking +- rsync, rsync-only +- Rename machine -> host +- ssh options +- Detector of orphans (unused queue dirs, jobs on non-existent machines, non-queued jobs) diff --git a/brun b/brun index aa8a51a..901c7d5 100755 --- a/brun +++ b/brun @@ -53,9 +53,6 @@ sub run_job($$$) { my ($job, $queue, $mach) = @_; my $jid = $job->{'ID'}; - # FIXME: rsyncing, rsync-only jobs - # FIXME: Locking - my $tmp = $queue->temp_file($mach, $jid); open T, '>', $tmp or die; if (defined $BEX::Config::job_prolog) { -- 2.39.2