X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=t%2Fmoe%2Fconfparser.py;h=31783403a3a78c640a5b413a62b321e25b5cd287;hb=c1ab43f6560c6ad61e51face3bfeda5dd8c703ce;hp=d7616d55980b72d7a6b157420f542a45c1805d89;hpb=748dba3fceb225606533cbd836ea4642344b96ac;p=eval.git diff --git a/t/moe/confparser.py b/t/moe/confparser.py index d7616d5..3178340 100644 --- a/t/moe/confparser.py +++ b/t/moe/confparser.py @@ -27,7 +27,7 @@ COMMENT = re('#[^\\n]*\\n') STATEMENT = CONDITION | OPERATION | SUBTREE OPERATION = WS VARNAME WS ( '=' | '+=' ) WS EXPRESSION -SUBTREE = WS VARNAME WS '{' BLOCK '}' +SUBTREE = WS VARNAME WS '{' BLOCK WS '}' CONDITION = WS 'if' FORMULA WS '{' BLOCK WS '}' FORMULA = WS (( EXPRESSION WS ( '!=' | '==' ) WS EXPRESSION ) | '(' AND WS ')' | '(' OR WS ')' | NOT ) @@ -35,6 +35,8 @@ AND = FORMULA WS 'and' FORMULA OR = FORMULA WS 'or' FORMULA NOT = WS 'not' FORMULA +NOTE: ';' or '\n' is currently required even after CONDITION and SUBTREE block + TODO: change to OPERATION only NOTE: Formula may contain additional/extra parentheses EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'" @@ -42,20 +44,22 @@ ECHAR = re('([^\\{}]|\\\\|\\{|\\}|\\n)*') VARNAME = re('[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)*') """ -import re, itertools, logging as log - -class ConfigSyntaxError(Exception): - # TODO: choose a better superclass +import re, types, itertools, logging as log +import traceback +import moe.conf as conf + + +class ConfigSyntaxError(conf.ConfigError): + def __init__(self, msg, fname='', line=None, column=None): self.msg = msg self.fname = fname self.line = line self.column = column + def __str__(self): return('ConfigSyntaxError %s:%d:%d: %s'%(self.fname, self.line, self.column, self.msg)) -"Variable name regexp, dots (separators) must be separated from edges and each other." -re_VARNAME = re.compile(r'\A([A-Za-z0-9_-]+\.)*[A-Za-z0-9_-]+\Z') class ConfigParser(object): c_varname_sep = u'.' @@ -73,55 +77,116 @@ class ConfigParser(object): c_neq = u'!=' c_set = u'=' c_append = u'+=' - def __init__(self, f, tree, fname=''): - self.f = f # Stream + + def __init__(self, s, tree, fname='', level=0): + """Create a config file parser. + `s` is either a string, unicode or an open file. File is assumed to be utf-8, string is converted to unicode. + `tree` is a ConfigTree to fill the operations into. + `fname` is an optional name of the file, for debugging and syntax errors. + `level` indicates the precedence the operations should have in the ConfigTree + """ + self.s = s # Unicode, ascii string or an open file + self.buf = u"" # Read-buffer for s file, whole unicode string for s string/unicode + if isinstance(self.s, types.StringTypes): + self.buf = unicode(self.s) + elif (not isinstance(self.s, file)) or self.s.closed: + raise TypeError("Expected unicode, str or open file.") + self.bufpos = 0 self.fname = fname # Filename self.line = 1 - self.col = 1 + self.column = 1 self.tree = tree # ConfTree to fill + self.level = level # level of the parsed operations self.prefix = '' # Prefix of variable name, may begin with '.' - self.conds = [] # Stack of nested conditions, these are chained, so only the last is necessary + self.conditions = [] # Stack of nested conditions, these are chained, so only the last is necessary + self.read_ops = [] # List of parsed operations (varname, `Operation`), returned by `self.parse()` + + def preread(self, l): + "Make sure buf contains at least `l` next characters, return True on succes and False on hitting EOF." + if isinstance(self.s, file): + self.buf = self.buf[self.bufpos:] + self.s.read(max(l, 1024)).decode('utf8') + self.bufpos = 0 + return len(self.buf) >= self.bufpos + l + def peek(self, l = 1): - "Peek and return next `l` unicode characters." - # TODO - return '' + "Peek and return next `l` unicode characters or everything until EOF." + self.preread(l) + return self.buf[self.bufpos:self.bufpos+l] + def peeks(self, s): - "Peek and compare next `len(s)` characters to `s`. Unicode." + "Peek and compare next `len(s)` characters to `s`. Converts `s` to unicode. False on hitting EOF." s = unicode(s) return self.peek(len(s)) == s - return True + def next(self, l = 1): - "Eat and return next `l` unicode characters." - # TODO - return '' + "Eat and return next `l` unicode characters. Raise exception on EOF." + if not self.preread(l): + self.syntax_error("Unexpected end of file") + s = self.buf[self.bufpos:self.bufpos+l] + self.bufpos += l + rnl = s.rfind('\n') + if rnl<0: + # no newline + self.column += l + else: + # some newlines + self.line += s.count('\n') + self.column = l - rnl - 1 + return s + def nexts(self, s): - "Compare next `len(s)` characters to `s`, eat them and return True if they match. Unicode." + """Compare next `len(s)` characters to `s`. On match, eat them and return True. Otherwise just return False. + Converts `s` to unicode. False on hitting EOF.""" s = unicode(s) - return self.next(len(s)) == s + if self.peeks(s): + self.next(len(s)) + return True + return False + def eof(self): "Check for end-of-stream." - # TODO - return False - def expected(self, s, msg=None): + return not self.preread(1) + + def expect(self, s, msg=None): "Eat and compare next `len(s)` characters to `s`. If not equal, raise an error with `msg`. Unicode." s = unicode(s) if not self.nexts(s): - raise self.syntaxError(msg or u"%r expected."%(s,)) - def syntaxError(self, msg, *args): + self.syntax_error(msg or u"%r expected."%(s,)) + + def syntax_error(self, msg, *args): "Raise a syntax error with file/line/column info" - raise ConfSyntaxError(fname=self.fname, line=self.line, column=self.column, msg=(msg%args)) + raise ConfigSyntaxError(fname=self.fname, line=self.line, column=self.column, msg=(msg%args)) + + def dbg(self): + n = None; s = '' + for i in traceback.extract_stack(): + if i[2][:2]=='p_': + s += ' ' + n = i[2] + if n: log.debug(s + n + ' ' + repr(self.peek(15)) + '...') + def parse(self): - p_BLOCK(self) + self.read_ops = [] + self.p_BLOCK() + return self.read_ops + def p_BLOCK(self): + self.dbg() # Debug self.p_WS() - while not self.eof() and not f.peek(self.c_close): + while (not self.eof()) and (not self.peeks(self.c_close)): self.p_STATEMENT() - slef.p_WS() - if not self.peek() in self.c_sep: + l0 = self.line + self.p_WS() + if self.eof() or self.peeks(self.c_close): break - self.p_SEP() + if self.line == l0: # No newline skipped in p_WS + self.expect(';') + else: + self.nexts(';') # NOTE: this is weird - can ';' occur anywhere? Or at most once, but only after any p_WS debris? self.p_WS() - def p_WS(): + + def p_WS(self): + self.dbg() # Debug while not self.eof(): if self.peek() in self.c_ws: self.next() @@ -129,12 +194,15 @@ class ConfigParser(object): self.p_COMMENT() else: break + def p_COMMENT(self): + self.dbg() # Debug self.expect(self.c_comment, "'#' expected at the beginning of a comment.") - while not self.eof() and not self.nexts(self.c_nl): - pass - self.eof() or self.expect(self.c_nl) + while (not self.eof()) and (not self.nexts(self.c_nl)): + self.next(1) + def p_STATEMENT(self): + self.dbg() # Debug self.p_WS() if self.peeks(self.c_if): self.p_CONDITION() @@ -142,13 +210,13 @@ class ConfigParser(object): # for operation or subtree, read VARNAME varname = self.p_VARNAME() self.p_WS() - if self.nexts(self.c_open): - self.p_BLOCK(varname) - self.p_WS() - self.expect(self.c_close) + if self.peeks(self.c_open): + self.p_SUBTREE(varname) else: self.p_OPERATION(varname) + def p_SUBTREE(self, varname=None): + self.dbg() # Debug if not varname: self.p_WS() varname = self.p_VARNAME() @@ -162,7 +230,9 @@ class ConfigParser(object): # close block and self.p_WS() self.expect(self.c_close) + def p_OPERATION(self, varname=None): + self.dbg() # Debug if not varname: self.p_WS() varname = self.p_VARNAME() @@ -172,21 +242,29 @@ class ConfigParser(object): elif self.nexts(self.c_append): op = 'APPEND' else: - self.syntaxError('Unknown operation.') + self.syntax_error('Unknown operation.') self.p_WS() exp = self.p_EXPRESSION() - v = self.tree.lookup((self.prefix+self.c_varname_sep+varname).lstrip(self.c_varname_sep)) + vname = (self.prefix+self.c_varname_sep+varname).lstrip(self.c_varname_sep) + v = self.tree.lookup(vname) if self.conditions: cnd = self.conditions[-1] else: cnd = None - v.add_operation(op, cnd, exp, self.priority) + op = conf.Operation(op, cnd, exp, level=self.level, + source="%s:%d:%d"%(self.fname, self.line, self.column)) + # NOTE/WARNING: The last character of operation will be reported in case of error. + v.add_operation(op) + self.read_ops.append( (vname, op) ) + def p_CONDITION(self): + self.dbg() # Debug self.p_WS() + t = u"condition at %s:%d:%d"%(self.fname, self.line, self.column) self.expect(self.c_if) self.p_WS() - f = p_FORMULA(self) - cnd = ConfigCondition(f) + f = self.p_FORMULA() + cnd = conf.ConfigCondition(f, text=t, parent=(self.conditions and self.conditions[-1]) or None) self.conditions.append(cnd) # Parse a block self.p_WS() @@ -196,15 +274,19 @@ class ConfigParser(object): self.expect(self.c_close) # Cleanup self.conditions.pop() + def p_VARNAME(self): + self.dbg() # Debug vnl = [] - while self.peek().isalnum() or self.peek() in u'-_': + while self.peek().isalnum() or self.peek() in u'-_.': vnl.append(self.next()) vn = u''.join(vnl) - if not re_VARNAME.match(vn): - self.syntax_error('Invalid variable name') + if not conf.re_VARNAME.match(vn): + self.syntax_error('Invalid variable name %r', vn) return vn + def p_EXPRESSION(self): + self.dbg() # Debug op = self.next() if op not in '\'"': self.syntax_error('Invalid start of expression') @@ -215,7 +297,7 @@ class ConfigParser(object): exl.append(self.next()) self.expect(op) s = u''.join(exl) - return ConfigExpression((s,), s) + return conf.ConfigExpression((s,), s) # Parse expression with variables exl = [op] expr = [] @@ -245,23 +327,29 @@ class ConfigParser(object): # Concatenate consecutive characters in expr expr2 = [] for i in expr: - if expr2 and isinstance(expr2[-1], unicode): + if expr2 and isinstance(expr2[-1], unicode) and isinstance(i, unicode): expr2[-1] = expr2[-1] + i else: expr2.append(i) - return ConfigExpression(tuple(expr2), exs) + return conf.ConfigExpression(tuple(expr2), exs) + def p_FORMULA(self): + self.dbg() # Debug self.p_WS() # Combined logical formula if self.nexts(u'('): f1 = self.p_FORMULA() self.p_WS() if self.nexts(self.c_and): + if self.peek(1).isalnum(): + self.syntax_error('trailing characters after %r', self.c_and) f2 = self.p_FORMULA() self.p_WS() self.expect(u')') return ('AND', f1, f2) elif self.nexts(self.c_or): + if self.peek(1).isalnum(): + self.syntax_error('trailing characters after %r', self.c_or) f2 = self.p_FORMULA() self.p_WS() self.expect(u')') @@ -272,6 +360,8 @@ class ConfigParser(object): else: self.syntax_error("Logic operator or ')' expected") elif self.nexts(self.c_not): + if self.peek().isalnum(): + self.syntax_error('trailing characters after %r', self.c_not) # 'not' formula f = self.p_FORMULA() return ('NOT', f)