From 0fda8189b956eb8d024625d7b2d596e9aeabb073 Mon Sep 17 00:00:00 2001 From: Tomas Gavenciak Date: Sat, 27 Nov 2010 12:54:30 +0100 Subject: [PATCH] Several changes in status.py, wrote docs Documented the new semantics of status file (unique keys, multiline entries, merge/update) Rewrite Status to reflect docs Renamed Status exception to MoeInvalidStatusFile Added Status.update according to docs --- t/doc/status.rst | 72 ++++++++++++++++++++++++++ t/moe/status.py | 129 +++++++++++++++++++++++++++-------------------- 2 files changed, 147 insertions(+), 54 deletions(-) create mode 100644 t/doc/status.rst diff --git a/t/doc/status.rst b/t/doc/status.rst new file mode 100644 index 0000000..d8ba50f --- /dev/null +++ b/t/doc/status.rst @@ -0,0 +1,72 @@ +.. _status: + +.. highlight:: none + +================ +Moe status files +================ + +.. contents:: + :local: + +------------------- +Status files syntax +------------------- + +The status files hold and transmit status information of a single task (submission). + +A status is an (unordered) dictionary of **key:value** entries and named subtrees. +Keys of both the entries and subtrees may consist only of letters, numbers, ``-`` and ``_`` +(must match ``[a-zA-Z0-9-_]+``). The keys are case-sensitive. + +The values are arbitrary byte-strings not containing character ``\0`` (ASCII char. 0). +Note that this permits almost any encoding that can avoid the character ``\0``, such as UTF-8. +When processing the values as strings, Moe decodes the strings as UTF-8 (unless stated otherwise). + +The format is line oriented (delimited by ``\n``), +whitespace characters (`` \t\r``) at the beginning of a line are ignored. +The file consists of empty lines, comments, entries and subtrees. + +Comments start with ``#`` and continue until the end of the line. +Comments must occur alone on the line. + +Entries are written as a key and a colon. Everything after the colon (incl. whitespace) +is considered to be part of the value. +Multi-line values continue on following lines, prefixed by a single semicolon. + +A subtree consists of a key followed by ``(``, the subtree is closed by ``)`` on a single line. + +Keys of entries and subtrees must be unique in every subtree, including the top-level of a status file. + +Status file grammar +^^^^^^^^^^^^^^^^^^^ + +:: + STATUS = ELEMENT * + ELEMENT = WS '\n' | COMMENT | ENTRY | SUBTREE + SUBTREE = WS KEY '(' WS '\n' STATUS ')' WS '\n' + ENTRY = WS KEY WS ':' VALUE '\n' ( WS ':' VALUE '\n' )* + COMMENT = WS re('#[^\n\0]*\n' + KEY = re([a-zA-Z0-9-_]+) + VALUE = re([^\n\0]*) + WS = re(['\r\t ']*) + +Every ``ELEMENT`` ends with a newline. ``ENTRY`` is a (possibly multiline) **key:value** pair. + +--------------------- +Updating status files +--------------------- + +A common operation is to update status file *A* with *B*. This proceeds level-by level starting with the top levels +of *A* and *B*. The result is a union of the entries, entries of *B* overwriting the corresponding entries of *A*. +Any subtrees with matching names are updated recursively. A subtree of *A* and an entry of *B* of the same name +(or vice versa) are not allowed. + + +----------------- +Module moe.status +----------------- + +.. automodule:: moe.status + + diff --git a/t/moe/status.py b/t/moe/status.py index 76681c7..86ed37e 100644 --- a/t/moe/status.py +++ b/t/moe/status.py @@ -1,64 +1,73 @@ -#!/usr/bin/env python - import sys import types import re -key_pattern = re.compile("^[A-Za-z0-9_-]+$") +key_pattern = re.compile("\A[A-Za-z0-9_-]+\Z") -class MoeStatusInvalid(Exception): +class MoeInvalidStatusFile(Exception): pass class Status: - """Moe status file.""" + """ + (One subtree of) Moe status file. + """ def __init__(self): - self.stat = {} + self.d = {} def __getitem__(self, k): - if not self.stat.has_key(k): return None - v = self.stat[k] - if type(v) == types.ListType: - if len(v) > 0: return v[0] - else: return None - else: return v + return self.d[k] def __setitem__(self, k, v): - self.stat[k] = v + self.d[k] = v def keys(self): - return self.stat.keys() + return self.d.keys() - def get_list(self, k): - m = self.stat - if not m.has_key(k): - m[k] = [] - elif type(m[k]) != types.ListType: - m[k] = [m[k]] - return m[k] + def update(self, stat2): + """ + Updates values of `self` with values of `stat2`, recursively. + Directly references objects (values and subtrees) of `stat2`, so making a deep copy of `stat2` + may be necessary if you intend to modify `stat2` afterwards. + """ + + for k,v2 in stat2.d.items(): + if k not in self.d: + self[k] = v2 + else: + v = self[k] + if isinstance(v, Status) != isinstance(v2, Status): + raise TypeError("Mixing Status and value while updating key %r"%k) + if isinstance(v, Status): + v.update(v2) + else: + self[k] = v2 + def dump(self, prefix=""): """ - Dump status in metafile format. - Return a list of lines, `prefix` is indent prefix. + Dump Status in status file format. + Returns a list of lines, ``prefix`` is indentation prefix. """ + l = [] - for k,v in self.stat.items(): - if type(v) == types.ListType: vals = v - else: vals = [v] - for w in vals: - if isinstance(w, Status): - l.append(prefix + k + "(\n") - l.extend(self.str_lines(prefix+" ")) - l.append(prefix + ")\n") - else: - l.append(prefix + k + ":" + str(w) + "\n") + for k,v in self.d.items(): + if isinstance(v, Status): + l.append(prefix + k + " (") + l.extend(self.dump(prefix+" ")) + l.append(prefix + ")") + else: + d = str(v).split('\n') + l.append(prefix + k + ":" + d[0]) + for i in d[1:]: + l.append(prefix + ' '*len(k) + ':' + i) return l def write(self, f=None, name=None): """ - Write status (as a metafile) to `f` or file `name` or `stdout` + Write Status to File ``f`` or overwrite file ``name`` or write to ``stdout`` (otherwise). """ + if not f and name is not None: with open(name, "w") as f: for l in self.dump(): @@ -69,47 +78,59 @@ class Status: for l in self.dump(): f.write(l+"\n") - def read(self, file=None, name=None): - if file is None: - if name is not None: - file = open(name, "r") - else: - file = sys.stdin - self.stat = {} - self.do_read(file) + def read(self, f=None, name=None): + """ + Parse Status from File ``f`` or from file ``name`` or from ``stdin`` (otherwise) + Deletes all contents of the Status. + """ + + self.d = {} + if not f and name is not None: + with open(name, 'r') as f: + self.do_read(f) + else: + if not f: + f = sys.stdin + self.do_read(f) def read_val(self, k, v): + """ + Internal: Safely add a new value + """ + if not key_pattern.match(k): - raise MoeStatusInvalid, "Parse error: invalid key syntax" - m = self.stat - if not m.has_key(k): - m[k] = v - else: - self.get_list(k).append(v) + raise MoeInvalidStatusFile("Parse error: invalid key %r"%k) + if k in self.d: + raise MoeInvalidStatusFile("Multiple occurences of key %r"%k) + self.d[k]=v + + def do_read(self, f): + """ + Internal: Parse an open file + """ - def do_read(self, file): stk = [] this = self - for x in file.readlines(): + for x in f.readlines(): x = x.rstrip("\n").lstrip(" \t") if x=="" or x.startswith("#"): pass else: sep = x.find(":") if sep >= 0: - k = x[:sep] + k = x[:sep].rstrip(" \t") v = x[sep+1:] this.read_val(k, v) elif x.endswith("("): - k = x[:-1] + k = x[:-1].rstrip(" \t") new = Status() this.read_val(k, new) stk.append(this) this = new elif x == ")": if len(stk) == 0: - raise MoeStatusInvalid, "Parse error: incorrect nesting" + raise MoeInvalidStatusFile("Parse error: incorrect nesting") else: this = stk.pop() else: - raise MoeStatusInvalid, "Parse error: malformed line" + raise MoeInvalidStatusFile("Parse error: malformed line") -- 2.39.2