X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;f=t%2Fmoe%2Fconfparser.py;h=d024912b94d9497aa679a6144603103d301ed460;hb=c861bd9c09d20e06533e122253bcd3bb94b0bef9;hp=ed73a1c804cbb781cffc9ec00d12f4e3735976b6;hpb=067c6380413ee589e57ad1d15f3914df2cfc9062;p=eval.git diff --git a/t/moe/confparser.py b/t/moe/confparser.py index ed73a1c..d024912 100644 --- a/t/moe/confparser.py +++ b/t/moe/confparser.py @@ -4,7 +4,7 @@ confparse.py Simple Moe configuration file syntax parser. -TODO: decide '()' around formulas +TODO: decide neccessity of '()' in/around formulas TODO: check escaping in expressions TODO: should whitespace (incl. '\\n') be allowed (almost) everywhere? can comment be anywhere whitespace can? @@ -27,7 +27,7 @@ COMMENT = re('#[^\\n]*\\n') STATEMENT = CONDITION | OPERATION | SUBTREE OPERATION = WS VARNAME WS ( '=' | '+=' ) WS EXPRESSION -SUBTREE = WS VARNAME WS '{' BLOCK '}' +SUBTREE = WS VARNAME WS '{' BLOCK WS '}' CONDITION = WS 'if' FORMULA WS '{' BLOCK WS '}' FORMULA = WS (( EXPRESSION WS ( '!=' | '==' ) WS EXPRESSION ) | '(' AND WS ')' | '(' OR WS ')' | NOT ) @@ -35,180 +35,263 @@ AND = FORMULA WS 'and' FORMULA OR = FORMULA WS 'or' FORMULA NOT = WS 'not' FORMULA +NOTE: ';' or '\n' is currently required even after CONDITION and SUBTREE block + TODO: change to OPERATION only NOTE: Formula may contain additional/extra parentheses -EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'" +EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'" | VARNAME ECHAR = re('([^\\{}]|\\\\|\\{|\\}|\\n)*') VARNAME = re('[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)*') """ -import re, logging as log - -class ConfSyntaxError(Exception): - # TODO: choose better superclass +import re, types, itertools, logging as log +import traceback +import moe.conf as conf + + +class ConfigSyntaxError(conf.ConfigError): + def __init__(self, msg, fname='', line=None, column=None): self.msg = msg self.fname = fname self.line = line self.column = column + def __str__(self): - return('ConfSyntaxError %s:%d:%d: %s'%(self.fname, self.line, self.column, self.msg)) - -c_varname_sep = u'.' -c_comment = u'#' -c_open = u'{' -c_close = u'}' -c_ws = u' \t\n' -c_sep = u';\n' -c_nl = u'\n' -c_if = u'if' -c_and = u'and' -c_or = u'or' -c_not = u'not' -c_eq = u'==' -c_neq = u'!=' -c_set = u'=' -c_append = u'+=' - -"Variable name regexp, dots (separators) must be separated from edges and each other." -re_VARNAME = re.compile(r'\A([A-Za-z0-9_-]+\.)*[A-Za-z0-9_-]+\Z') - -class ConfParser(object): - def __init__(self, f, tree, fname=''): - self.f = f # Stream + return('ConfigSyntaxError %s:%d:%d: %s'%(self.fname, self.line, self.column, self.msg)) + + +class ConfigParser(object): + c_varname_sep = u'.' + c_comment = u'#' + c_open = u'{' + c_close = u'}' + c_ws = u' \t\n' + c_sep = u';\n' + c_nl = u'\n' + c_if = u'if' + c_and = u'and' + c_or = u'or' + c_not = u'not' + c_eq = u'==' + c_neq = u'!=' + c_set = u'=' + c_append = u'+=' + + def __init__(self, s, tree, fname='', level=0): + """Create a config file parser. + `s` is either a string, unicode or an open file. File is assumed to be utf-8, string is converted to unicode. + `tree` is a ConfigTree to fill the operations into. + `fname` is an optional name of the file, for debugging and syntax errors. + `level` indicates the precedence the operations should have in the ConfigTree + """ + self.s = s # Unicode, ascii string or an open file + self.buf = u"" # Read-buffer for s file, whole unicode string for s string/unicode + if isinstance(self.s, types.StringTypes): + self.buf = unicode(self.s) + elif (not isinstance(self.s, file)) or self.s.closed: + raise TypeError("Expected unicode, str or open file.") + self.bufpos = 0 self.fname = fname # Filename self.line = 1 - self.col = 1 + self.column = 1 self.tree = tree # ConfTree to fill + self.level = level # level of the parsed operations self.prefix = '' # Prefix of variable name, may begin with '.' - self.conds = [] # Stack of nested conditions, these are chained, so only the last is necessary + self.conditions = [] # Stack of nested conditions, these are chained, so only the last is necessary + self.read_ops = [] # List of parsed operations (varname, `Operation`), returned by `self.parse()` + + def preread(self, l): + "Make sure buf contains at least `l` next characters, return True on succes and False on hitting EOF." + if isinstance(self.s, file): + self.buf = self.buf[self.bufpos:] + self.s.read(max(l, 1024)).decode('utf8') + self.bufpos = 0 + return len(self.buf) >= self.bufpos + l + def peek(self, l = 1): - "Peek and return next `l` unicode characters." - # TODO - return '' + "Peek and return next `l` unicode characters or everything until EOF." + self.preread(l) + return self.buf[self.bufpos:self.bufpos+l] + def peeks(self, s): - "Peek and compare next `len(s)` characters to `s`. Unicode." + "Peek and compare next `len(s)` characters to `s`. Converts `s` to unicode. False on hitting EOF." s = unicode(s) return self.peek(len(s)) == s - return True + def next(self, l = 1): - "Eat and return next `l` unicode characters." - # TODO - return '' + "Eat and return next `l` unicode characters. Raise exception on EOF." + if not self.preread(l): + self.syntax_error("Unexpected end of file") + s = self.buf[self.bufpos:self.bufpos+l] + self.bufpos += l + rnl = s.rfind('\n') + if rnl<0: + # no newline + self.column += l + else: + # some newlines + self.line += s.count('\n') + self.column = l - rnl - 1 + return s + def nexts(self, s): - "Compare next `len(s)` characters to `s`, eat them and return True if they match. Unicode." + """Compare next `len(s)` characters to `s`. On match, eat them and return True. Otherwise just return False. + Converts `s` to unicode. False on hitting EOF.""" s = unicode(s) - return self.next(len(s)) == s + if self.peeks(s): + self.next(len(s)) + return True + return False + def eof(self): "Check for end-of-stream." - # TODO - return False - def expected(self, s, msg=None): + return not self.preread(1) + + def expect(self, s, msg=None): "Eat and compare next `len(s)` characters to `s`. If not equal, raise an error with `msg`. Unicode." s = unicode(s) if not self.nexts(s): - raise self.syntaxError(msg or u"%r expected."%(s,)) - def syntaxError(self, msg, *args): + self.syntax_error(msg or u"%r expected."%(s,)) + + def syntax_error(self, msg, *args): "Raise a syntax error with file/line/column info" - raise ConfSyntaxError(fname=self.fname, line=self.line, column=self.column, msg=(msg%args)) + raise ConfigSyntaxError(fname=self.fname, line=self.line, column=self.column, msg=(msg%args)) + + def dbg(self): + n = None; s = '' + for i in traceback.extract_stack(): + if i[2][:2]=='p_': + s += ' ' + n = i[2] + if n: log.debug(s + n + ' ' + repr(self.peek(15)) + '...') + def parse(self): - p_BLOCK(self) + self.read_ops = [] + self.p_BLOCK() + return self.read_ops + def p_BLOCK(self): + self.dbg() # Debug self.p_WS() - while not self.eof() and not f.peek(c_close): + while (not self.eof()) and (not self.peeks(self.c_close)): self.p_STATEMENT() - slef.p_WS() - if not self.peek() in c_sep: + l0 = self.line + self.p_WS() + if self.eof() or self.peeks(self.c_close): break - self.p_SEP() + if self.line == l0: # No newline skipped in p_WS + self.expect(';') + else: + self.nexts(';') # NOTE: this is weird - can ';' occur anywhere? Or at most once, but only after any p_WS debris? self.p_WS() - def p_WS(): + + def p_WS(self): + self.dbg() # Debug while not self.eof(): - if self.peek() in c_ws: + if self.peek() in self.c_ws: self.next() - elif self.peeks(c_comment): + elif self.peeks(self.c_comment): self.p_COMMENT() else: break + def p_COMMENT(self): - self.expect(c_comment, "'#' expected at the beginning of a comment.") - while not self.eof() and not self.nexts(c_nl): - pass - self.eof() or self.expect(c_nl) + self.dbg() # Debug + self.expect(self.c_comment, "'#' expected at the beginning of a comment.") + while (not self.eof()) and (not self.nexts(self.c_nl)): + self.next(1) + def p_STATEMENT(self): + self.dbg() # Debug self.p_WS() - if self.peeks(c_if): + if self.peeks(self.c_if): self.p_CONDITION() else: # for operation or subtree, read VARNAME varname = self.p_VARNAME() self.p_WS() - if self.nexts(c_open): - self.p_BLOCK(varname) - self.p_WS() - self.expect(c_close) + if self.peeks(self.c_open): + self.p_SUBTREE(varname) else: self.p_OPERATION(varname) + def p_SUBTREE(self, varname=None): + self.dbg() # Debug if not varname: self.p_WS() varname = self.p_VARNAME() self.p_WS() - self.expect(c_open) + self.expect(self.c_open) # backup and extend the variable name prefix p = self.prefix - self.prefix = p + c_varname_sep + varname + self.prefix = p + self.c_varname_sep + varname self.p_BLOCK() self.prefix = p # close block and self.p_WS() - self.expect(c_close) + self.expect(self.c_close) + def p_OPERATION(self, varname=None): + self.dbg() # Debug if not varname: self.p_WS() varname = self.p_VARNAME() self.p_WS() - if self.nexts(c_set): + if self.nexts(self.c_set): op = 'SET' - elif self.nexts(c_append): + elif self.nexts(self.c_append): op = 'APPEND' else: - self.syntaxError('Unknown operation.') + self.syntax_error('Unknown operation.') self.p_WS() exp = self.p_EXPRESSION() - v = self.tree.lookup((self.prefix+c_varname_sep+varname).lstrip(c_varname_sep)) + vname = (self.prefix+self.c_varname_sep+varname).lstrip(self.c_varname_sep) + v = self.tree.lookup(vname) if self.conditions: cnd = self.conditions[-1] else: cnd = None - v.add_operation(op, cnd, exp, self.priority) + op = conf.Operation(op, cnd, exp, level=self.level, + source="%s:%d:%d"%(self.fname, self.line, self.column)) + # NOTE/WARNING: The last character of operation will be reported in case of error. + v.add_operation(op) + self.read_ops.append( (vname, op) ) + def p_CONDITION(self): + self.dbg() # Debug self.p_WS() - self.expect(c_if) + t = u"condition at %s:%d:%d"%(self.fname, self.line, self.column) + self.expect(self.c_if) self.p_WS() - f = p_FORMULA(self) - cnd = ConfigCondition(f) + f = self.p_FORMULA() + cnd = conf.ConfigCondition(f, text=t, parent=(self.conditions and self.conditions[-1]) or None) self.conditions.append(cnd) # Parse a block self.p_WS() - self.expect(c_open) + self.expect(self.c_open) self.p_BLOCK() self.p_WS() - self.expect(c_close) + self.expect(self.c_close) # Cleanup self.conditions.pop() + def p_VARNAME(self): + self.dbg() # Debug vnl = [] - while self.peek().isalnum() or self.peek() in u'-_': + while self.preread(1) and (self.peek().isalnum() or self.peek() in u'-_.'): vnl.append(self.next()) vn = u''.join(vnl) - if not re_VARNAME.match(vn): - self.syntax_error('Invalid variable name') + if not conf.re_VARNAME.match(vn): + self.syntax_error('Invalid variable name %r', vn) return vn + def p_EXPRESSION(self): + self.dbg() # Debug + if self.peek() not in '\'"': + # Expect a variable name + varname = self.p_VARNAME() + return conf.ConfigExpression((self.tree.lookup(varname),), varname) op = self.next() - if op not in '\'"': - self.syntax_error('Invalid start of expression') # Parse literal expression if op == u'\'': exl = [] @@ -216,7 +299,7 @@ class ConfParser(object): exl.append(self.next()) self.expect(op) s = u''.join(exl) - return ConfigExpression((s,), s) + return conf.ConfigExpression((s,), s) # Parse expression with variables exl = [op] expr = [] @@ -225,17 +308,17 @@ class ConfParser(object): if self.nexts(u'\\'): # Escape sequence c = self.next() - if c not in u'\\"n' + c_open + c_close: + if c not in u'\\"n' + self.c_open + self.c_close: self.syntax_error('Illeal escape sequence in expression') if c == 'n': expr.append(u'\n') else: expr.append(c) exl.append(c) - elif self.nexts(c_open): + elif self.nexts(self.c_open): # Parse a variable name in '{}' varname = self.p_VARNAME() - self.expect(c_close) + self.expect(self.c_close) exl.append(varname) expr.append(self.tree.lookup(varname)) else: @@ -246,23 +329,29 @@ class ConfParser(object): # Concatenate consecutive characters in expr expr2 = [] for i in expr: - if expr2 and isinstance(expr2[-1], unicode): + if expr2 and isinstance(expr2[-1], unicode) and isinstance(i, unicode): expr2[-1] = expr2[-1] + i else: expr2.append(i) - return ConfigExpression(tuple(expr2), exs) + return conf.ConfigExpression(expr2, exs) + def p_FORMULA(self): + self.dbg() # Debug self.p_WS() # Combined logical formula if self.nexts(u'('): f1 = self.p_FORMULA() self.p_WS() - if self.nexts(c_and): + if self.nexts(self.c_and): + if self.peek(1).isalnum(): + self.syntax_error('trailing characters after %r', self.c_and) f2 = self.p_FORMULA() self.p_WS() self.expect(u')') return ('AND', f1, f2) - elif self.nexts(c_or): + elif self.nexts(self.c_or): + if self.peek(1).isalnum(): + self.syntax_error('trailing characters after %r', self.c_or) f2 = self.p_FORMULA() self.p_WS() self.expect(u')') @@ -272,7 +361,9 @@ class ConfParser(object): return f1 else: self.syntax_error("Logic operator or ')' expected") - elif self.nexts(c_not): + elif self.nexts(self.c_not): + if self.peek().isalnum(): + self.syntax_error('trailing characters after %r', self.c_not) # 'not' formula f = self.p_FORMULA() return ('NOT', f) @@ -280,11 +371,11 @@ class ConfParser(object): # Should be (in)equality condition e1 = self.p_EXPRESSION() self.p_WS() - if self.nexts(c_eq): + if self.nexts(self.c_eq): self.p_WS() e2 = self.p_EXPRESSION() return ('==', e1, e2) - elif self.nexts(c_neq): + elif self.nexts(self.c_neq): self.p_WS() e2 = self.p_EXPRESSION() return ('!=', e1, e2)