X-Git-Url: http://mj.ucw.cz/gitweb/?a=blobdiff_plain;ds=sidebyside;f=t%2Fmoe%2Fconfparser.py;h=d024912b94d9497aa679a6144603103d301ed460;hb=c861bd9c09d20e06533e122253bcd3bb94b0bef9;hp=e6ff5660182f8d11c5e626de4187764dbd7996f1;hpb=646341eb3be8bcabb9c5f07cb9072105db5666a9;p=eval.git diff --git a/t/moe/confparser.py b/t/moe/confparser.py index e6ff566..d024912 100644 --- a/t/moe/confparser.py +++ b/t/moe/confparser.py @@ -27,7 +27,7 @@ COMMENT = re('#[^\\n]*\\n') STATEMENT = CONDITION | OPERATION | SUBTREE OPERATION = WS VARNAME WS ( '=' | '+=' ) WS EXPRESSION -SUBTREE = WS VARNAME WS '{' BLOCK '}' +SUBTREE = WS VARNAME WS '{' BLOCK WS '}' CONDITION = WS 'if' FORMULA WS '{' BLOCK WS '}' FORMULA = WS (( EXPRESSION WS ( '!=' | '==' ) WS EXPRESSION ) | '(' AND WS ')' | '(' OR WS ')' | NOT ) @@ -35,29 +35,31 @@ AND = FORMULA WS 'and' FORMULA OR = FORMULA WS 'or' FORMULA NOT = WS 'not' FORMULA +NOTE: ';' or '\n' is currently required even after CONDITION and SUBTREE block + TODO: change to OPERATION only NOTE: Formula may contain additional/extra parentheses -EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'" +EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'" | VARNAME ECHAR = re('([^\\{}]|\\\\|\\{|\\}|\\n)*') VARNAME = re('[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)*') """ import re, types, itertools, logging as log import traceback -import conf +import moe.conf as conf + + +class ConfigSyntaxError(conf.ConfigError): -class ConfigSyntaxError(Exception): - # TODO: choose a better superclass def __init__(self, msg, fname='', line=None, column=None): self.msg = msg self.fname = fname self.line = line self.column = column + def __str__(self): return('ConfigSyntaxError %s:%d:%d: %s'%(self.fname, self.line, self.column, self.msg)) -"Variable name regexp, dots (separators) must be separated from edges and each other." -re_VARNAME = re.compile(r'\A([A-Za-z0-9_-]+\.)*[A-Za-z0-9_-]+\Z') class ConfigParser(object): c_varname_sep = u'.' @@ -75,6 +77,7 @@ class ConfigParser(object): c_neq = u'!=' c_set = u'=' c_append = u'+=' + def __init__(self, s, tree, fname='', level=0): """Create a config file parser. `s` is either a string, unicode or an open file. File is assumed to be utf-8, string is converted to unicode. @@ -82,7 +85,7 @@ class ConfigParser(object): `fname` is an optional name of the file, for debugging and syntax errors. `level` indicates the precedence the operations should have in the ConfigTree """ - self.s = s # Unicode, string or an open file + self.s = s # Unicode, ascii string or an open file self.buf = u"" # Read-buffer for s file, whole unicode string for s string/unicode if isinstance(self.s, types.StringTypes): self.buf = unicode(self.s) @@ -97,24 +100,28 @@ class ConfigParser(object): self.prefix = '' # Prefix of variable name, may begin with '.' self.conditions = [] # Stack of nested conditions, these are chained, so only the last is necessary self.read_ops = [] # List of parsed operations (varname, `Operation`), returned by `self.parse()` + def preread(self, l): "Make sure buf contains at least `l` next characters, return True on succes and False on hitting EOF." if isinstance(self.s, file): self.buf = self.buf[self.bufpos:] + self.s.read(max(l, 1024)).decode('utf8') self.bufpos = 0 return len(self.buf) >= self.bufpos + l + def peek(self, l = 1): "Peek and return next `l` unicode characters or everything until EOF." self.preread(l) return self.buf[self.bufpos:self.bufpos+l] + def peeks(self, s): "Peek and compare next `len(s)` characters to `s`. Converts `s` to unicode. False on hitting EOF." s = unicode(s) return self.peek(len(s)) == s + def next(self, l = 1): "Eat and return next `l` unicode characters. Raise exception on EOF." if not self.preread(l): - raise ConfigSyntaxError("Unexpected end of file") + self.syntax_error("Unexpected end of file") s = self.buf[self.bufpos:self.bufpos+l] self.bufpos += l rnl = s.rfind('\n') @@ -126,6 +133,7 @@ class ConfigParser(object): self.line += s.count('\n') self.column = l - rnl - 1 return s + def nexts(self, s): """Compare next `len(s)` characters to `s`. On match, eat them and return True. Otherwise just return False. Converts `s` to unicode. False on hitting EOF.""" @@ -134,17 +142,21 @@ class ConfigParser(object): self.next(len(s)) return True return False + def eof(self): "Check for end-of-stream." return not self.preread(1) + def expect(self, s, msg=None): "Eat and compare next `len(s)` characters to `s`. If not equal, raise an error with `msg`. Unicode." s = unicode(s) if not self.nexts(s): self.syntax_error(msg or u"%r expected."%(s,)) + def syntax_error(self, msg, *args): "Raise a syntax error with file/line/column info" raise ConfigSyntaxError(fname=self.fname, line=self.line, column=self.column, msg=(msg%args)) + def dbg(self): n = None; s = '' for i in traceback.extract_stack(): @@ -152,10 +164,12 @@ class ConfigParser(object): s += ' ' n = i[2] if n: log.debug(s + n + ' ' + repr(self.peek(15)) + '...') + def parse(self): self.read_ops = [] self.p_BLOCK() return self.read_ops + def p_BLOCK(self): self.dbg() # Debug self.p_WS() @@ -170,6 +184,7 @@ class ConfigParser(object): else: self.nexts(';') # NOTE: this is weird - can ';' occur anywhere? Or at most once, but only after any p_WS debris? self.p_WS() + def p_WS(self): self.dbg() # Debug while not self.eof(): @@ -179,11 +194,13 @@ class ConfigParser(object): self.p_COMMENT() else: break + def p_COMMENT(self): self.dbg() # Debug self.expect(self.c_comment, "'#' expected at the beginning of a comment.") while (not self.eof()) and (not self.nexts(self.c_nl)): self.next(1) + def p_STATEMENT(self): self.dbg() # Debug self.p_WS() @@ -197,6 +214,7 @@ class ConfigParser(object): self.p_SUBTREE(varname) else: self.p_OPERATION(varname) + def p_SUBTREE(self, varname=None): self.dbg() # Debug if not varname: @@ -212,6 +230,7 @@ class ConfigParser(object): # close block and self.p_WS() self.expect(self.c_close) + def p_OPERATION(self, varname=None): self.dbg() # Debug if not varname: @@ -237,6 +256,7 @@ class ConfigParser(object): # NOTE/WARNING: The last character of operation will be reported in case of error. v.add_operation(op) self.read_ops.append( (vname, op) ) + def p_CONDITION(self): self.dbg() # Debug self.p_WS() @@ -254,20 +274,24 @@ class ConfigParser(object): self.expect(self.c_close) # Cleanup self.conditions.pop() + def p_VARNAME(self): self.dbg() # Debug vnl = [] - while self.peek().isalnum() or self.peek() in u'-_.': + while self.preread(1) and (self.peek().isalnum() or self.peek() in u'-_.'): vnl.append(self.next()) vn = u''.join(vnl) - if not re_VARNAME.match(vn): + if not conf.re_VARNAME.match(vn): self.syntax_error('Invalid variable name %r', vn) return vn + def p_EXPRESSION(self): self.dbg() # Debug + if self.peek() not in '\'"': + # Expect a variable name + varname = self.p_VARNAME() + return conf.ConfigExpression((self.tree.lookup(varname),), varname) op = self.next() - if op not in '\'"': - self.syntax_error('Invalid start of expression') # Parse literal expression if op == u'\'': exl = [] @@ -309,7 +333,8 @@ class ConfigParser(object): expr2[-1] = expr2[-1] + i else: expr2.append(i) - return conf.ConfigExpression(tuple(expr2), exs) + return conf.ConfigExpression(expr2, exs) + def p_FORMULA(self): self.dbg() # Debug self.p_WS()