- bprun --curses
- Terminology: machine vs. host
- Detector of orphans (unused queue dirs, jobs on non-existent machines, non-queued jobs)
-- job failed => give a more explanatory message
-- write_job_status should be atomic
sub write_job_status($$$$) {
my ($queue, $machine, $jid, $stat) = @_;
my $sf = $queue->status_file($machine, $jid);
- open S, '>', $sf or die "Cannot create $sf: $!";
+ open S, '>', "$sf.$$" or die "Cannot create $sf.$$: $!";
for my $k (sort keys %$stat) {
print S "$k: ", $stat->{$k}, "\n" if defined $stat->{$k};
}
close S;
+ rename "$sf.$$", $sf or die "Cannot rename $sf.$$ to $sf: $!";
}
sub lock_name($$$) {