Simple Moe configuration file syntax parser.
-TODO: decide '()' around formulas
+TODO: decide neccessity of '()' in/around formulas
TODO: check escaping in expressions
TODO: should whitespace (incl. '\\n') be allowed (almost) everywhere?
can comment be anywhere whitespace can?
STATEMENT = CONDITION | OPERATION | SUBTREE
OPERATION = WS VARNAME WS ( '=' | '+=' ) WS EXPRESSION
-SUBTREE = WS VARNAME WS '{' BLOCK '}'
+SUBTREE = WS VARNAME WS '{' BLOCK WS '}'
CONDITION = WS 'if' FORMULA WS '{' BLOCK WS '}'
FORMULA = WS (( EXPRESSION WS ( '!=' | '==' ) WS EXPRESSION ) | '(' AND WS ')' | '(' OR WS ')' | NOT )
OR = FORMULA WS 'or' FORMULA
NOT = WS 'not' FORMULA
+NOTE: ';' or '\n' is currently required even after CONDITION and SUBTREE block
+ TODO: change to OPERATION only
NOTE: Formula may contain additional/extra parentheses
-EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'"
+EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'" | VARNAME
ECHAR = re('([^\\{}]|\\\\|\\{|\\}|\\n)*')
VARNAME = re('[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)*')
"""
-import re, logging as log
-
-class ConfSyntaxError(Exception):
- # TODO: choose better superclass
+import re, types, itertools, logging as log
+import traceback
+import moe.conf as conf
+
+
+class ConfigSyntaxError(conf.ConfigError):
+
def __init__(self, msg, fname='<unknown>', line=None, column=None):
self.msg = msg
self.fname = fname
self.line = line
self.column = column
+
def __str__(self):
- return('ConfSyntaxError %s:%d:%d: %s'%(self.fname, self.line, self.column, self.msg))
-
-c_varname_sep = u'.'
-c_comment = u'#'
-c_open = u'{'
-c_close = u'}'
-c_ws = u' \t\n'
-c_sep = u';\n'
-c_nl = u'\n'
-c_if = u'if'
-c_and = u'and'
-c_or = u'or'
-c_not = u'not'
-c_eq = u'=='
-c_neq = u'!='
-c_set = u'='
-c_append = u'+='
-
-"Variable name regexp, dots (separators) must be separated from edges and each other."
-re_VARNAME = re.compile(r'\A([A-Za-z0-9_-]+\.)*[A-Za-z0-9_-]+\Z')
-
-class ConfParser(object):
- def __init__(self, f, tree, fname='<unknown>'):
- self.f = f # Stream
+ return('ConfigSyntaxError %s:%d:%d: %s'%(self.fname, self.line, self.column, self.msg))
+
+
+class ConfigParser(object):
+ c_varname_sep = u'.'
+ c_comment = u'#'
+ c_open = u'{'
+ c_close = u'}'
+ c_ws = u' \t\n'
+ c_sep = u';\n'
+ c_nl = u'\n'
+ c_if = u'if'
+ c_and = u'and'
+ c_or = u'or'
+ c_not = u'not'
+ c_eq = u'=='
+ c_neq = u'!='
+ c_set = u'='
+ c_append = u'+='
+
+ def __init__(self, s, tree, fname='<unknown>', level=0):
+ """Create a config file parser.
+ `s` is either a string, unicode or an open file. File is assumed to be utf-8, string is converted to unicode.
+ `tree` is a ConfigTree to fill the operations into.
+ `fname` is an optional name of the file, for debugging and syntax errors.
+ `level` indicates the precedence the operations should have in the ConfigTree
+ """
+ self.s = s # Unicode, ascii string or an open file
+ self.buf = u"" # Read-buffer for s file, whole unicode string for s string/unicode
+ if isinstance(self.s, types.StringTypes):
+ self.buf = unicode(self.s)
+ elif (not isinstance(self.s, file)) or self.s.closed:
+ raise TypeError("Expected unicode, str or open file.")
+ self.bufpos = 0
self.fname = fname # Filename
self.line = 1
- self.col = 1
+ self.column = 1
self.tree = tree # ConfTree to fill
+ self.level = level # level of the parsed operations
self.prefix = '' # Prefix of variable name, may begin with '.'
- self.conds = [] # Stack of nested conditions, these are chained, so only the last is necessary
+ self.conditions = [] # Stack of nested conditions, these are chained, so only the last is necessary
+ self.read_ops = [] # List of parsed operations (varname, `Operation`), returned by `self.parse()`
+
+ def preread(self, l):
+ "Make sure buf contains at least `l` next characters, return True on succes and False on hitting EOF."
+ if isinstance(self.s, file):
+ self.buf = self.buf[self.bufpos:] + self.s.read(max(l, 1024)).decode('utf8')
+ self.bufpos = 0
+ return len(self.buf) >= self.bufpos + l
+
def peek(self, l = 1):
- "Peek and return next `l` unicode characters."
- # TODO
- return ''
+ "Peek and return next `l` unicode characters or everything until EOF."
+ self.preread(l)
+ return self.buf[self.bufpos:self.bufpos+l]
+
def peeks(self, s):
- "Peek and compare next `len(s)` characters to `s`. Unicode."
+ "Peek and compare next `len(s)` characters to `s`. Converts `s` to unicode. False on hitting EOF."
s = unicode(s)
return self.peek(len(s)) == s
- return True
+
def next(self, l = 1):
- "Eat and return next `l` unicode characters."
- # TODO
- return ''
+ "Eat and return next `l` unicode characters. Raise exception on EOF."
+ if not self.preread(l):
+ self.syntax_error("Unexpected end of file")
+ s = self.buf[self.bufpos:self.bufpos+l]
+ self.bufpos += l
+ rnl = s.rfind('\n')
+ if rnl<0:
+ # no newline
+ self.column += l
+ else:
+ # some newlines
+ self.line += s.count('\n')
+ self.column = l - rnl - 1
+ return s
+
def nexts(self, s):
- "Compare next `len(s)` characters to `s`, eat them and return True if they match. Unicode."
+ """Compare next `len(s)` characters to `s`. On match, eat them and return True. Otherwise just return False.
+ Converts `s` to unicode. False on hitting EOF."""
s = unicode(s)
- return self.next(len(s)) == s
+ if self.peeks(s):
+ self.next(len(s))
+ return True
+ return False
+
def eof(self):
"Check for end-of-stream."
- # TODO
- return False
- def expected(self, s, msg=None):
+ return not self.preread(1)
+
+ def expect(self, s, msg=None):
"Eat and compare next `len(s)` characters to `s`. If not equal, raise an error with `msg`. Unicode."
s = unicode(s)
if not self.nexts(s):
- raise self.syntaxError(msg or u"%r expected."%(s,))
- def syntaxError(self, msg, *args):
+ self.syntax_error(msg or u"%r expected."%(s,))
+
+ def syntax_error(self, msg, *args):
"Raise a syntax error with file/line/column info"
- raise ConfSyntaxError(fname=self.fname, line=self.line, column=self.column, msg=(msg%args))
+ raise ConfigSyntaxError(fname=self.fname, line=self.line, column=self.column, msg=(msg%args))
+
+ def dbg(self):
+ n = None; s = ''
+ for i in traceback.extract_stack():
+ if i[2][:2]=='p_':
+ s += ' '
+ n = i[2]
+ if n: log.debug(s + n + ' ' + repr(self.peek(15)) + '...')
+
def parse(self):
- p_BLOCK(self)
+ self.read_ops = []
+ self.p_BLOCK()
+ return self.read_ops
+
def p_BLOCK(self):
+ self.dbg() # Debug
self.p_WS()
- while not self.eof() and not f.peek(c_close):
+ while (not self.eof()) and (not self.peeks(self.c_close)):
self.p_STATEMENT()
- slef.p_WS()
- if not self.peek() in c_sep:
+ l0 = self.line
+ self.p_WS()
+ if self.eof() or self.peeks(self.c_close):
break
- self.p_SEP()
+ if self.line == l0: # No newline skipped in p_WS
+ self.expect(';')
+ else:
+ self.nexts(';') # NOTE: this is weird - can ';' occur anywhere? Or at most once, but only after any p_WS debris?
self.p_WS()
- def p_WS():
+
+ def p_WS(self):
+ self.dbg() # Debug
while not self.eof():
- if self.peek() in c_ws:
+ if self.peek() in self.c_ws:
self.next()
- elif self.peeks(c_comment):
+ elif self.peeks(self.c_comment):
self.p_COMMENT()
else:
break
+
def p_COMMENT(self):
- self.expect(c_comment, "'#' expected at the beginning of a comment.")
- while not self.eof() and not self.nexts(c_nl):
- pass
- self.eof() or self.expect(c_nl)
+ self.dbg() # Debug
+ self.expect(self.c_comment, "'#' expected at the beginning of a comment.")
+ while (not self.eof()) and (not self.nexts(self.c_nl)):
+ self.next(1)
+
def p_STATEMENT(self):
+ self.dbg() # Debug
self.p_WS()
- if self.peeks(c_if):
+ if self.peeks(self.c_if):
self.p_CONDITION()
else:
# for operation or subtree, read VARNAME
varname = self.p_VARNAME()
self.p_WS()
- if self.nexts(c_open):
- self.p_BLOCK(varname)
- self.p_WS()
- self.expect(c_close)
+ if self.peeks(self.c_open):
+ self.p_SUBTREE(varname)
else:
self.p_OPERATION(varname)
+
def p_SUBTREE(self, varname=None):
+ self.dbg() # Debug
if not varname:
self.p_WS()
varname = self.p_VARNAME()
self.p_WS()
- self.expect(c_open)
+ self.expect(self.c_open)
# backup and extend the variable name prefix
p = self.prefix
- self.prefix = p + c_varname_sep + varname
+ self.prefix = p + self.c_varname_sep + varname
self.p_BLOCK()
self.prefix = p
# close block and
self.p_WS()
- self.expect(c_close)
+ self.expect(self.c_close)
+
def p_OPERATION(self, varname=None):
+ self.dbg() # Debug
if not varname:
self.p_WS()
varname = self.p_VARNAME()
self.p_WS()
- if self.nexts(c_set):
+ if self.nexts(self.c_set):
op = 'SET'
- elif self.nexts(c_append):
+ elif self.nexts(self.c_append):
op = 'APPEND'
else:
- self.syntaxError('Unknown operation.')
+ self.syntax_error('Unknown operation.')
self.p_WS()
exp = self.p_EXPRESSION()
- v = self.tree.lookup((self.prefix+c_varname_sep+varname).lstrip(c_varname_sep))
+ vname = (self.prefix+self.c_varname_sep+varname).lstrip(self.c_varname_sep)
+ v = self.tree.lookup(vname)
if self.conditions:
cnd = self.conditions[-1]
else:
cnd = None
- v.add_operation(op, cnd, exp, self.priority)
+ op = conf.Operation(op, cnd, exp, level=self.level,
+ source="%s:%d:%d"%(self.fname, self.line, self.column))
+ # NOTE/WARNING: The last character of operation will be reported in case of error.
+ v.add_operation(op)
+ self.read_ops.append( (vname, op) )
+
def p_CONDITION(self):
+ self.dbg() # Debug
self.p_WS()
- self.expect(c_if)
+ t = u"condition at %s:%d:%d"%(self.fname, self.line, self.column)
+ self.expect(self.c_if)
self.p_WS()
- f = p_FORMULA(self)
- cnd = ConfigCondition(f)
+ f = self.p_FORMULA()
+ cnd = conf.ConfigCondition(f, text=t, parent=(self.conditions and self.conditions[-1]) or None)
self.conditions.append(cnd)
# Parse a block
self.p_WS()
- self.expect(c_open)
+ self.expect(self.c_open)
self.p_BLOCK()
self.p_WS()
- self.expect(c_close)
+ self.expect(self.c_close)
# Cleanup
self.conditions.pop()
+
def p_VARNAME(self):
+ self.dbg() # Debug
vnl = []
- while self.peek().isalnum() or self.peek() in u'-_':
+ while self.preread(1) and (self.peek().isalnum() or self.peek() in u'-_.'):
vnl.append(self.next())
vn = u''.join(vnl)
- if not re_VARNAME.match(vn):
- self.syntax_error('Invalid variable name')
+ if not conf.re_VARNAME.match(vn):
+ self.syntax_error('Invalid variable name %r', vn)
return vn
+
def p_EXPRESSION(self):
+ self.dbg() # Debug
+ if self.peek() not in '\'"':
+ # Expect a variable name
+ varname = self.p_VARNAME()
+ return conf.ConfigExpression((self.tree.lookup(varname),), varname)
op = self.next()
- if op not in '\'"':
- self.syntax_error('Invalid start of expression')
# Parse literal expression
if op == u'\'':
exl = []
exl.append(self.next())
self.expect(op)
s = u''.join(exl)
- return ConfigExpression((s,), s)
+ return conf.ConfigExpression((s,), s)
# Parse expression with variables
exl = [op]
expr = []
if self.nexts(u'\\'):
# Escape sequence
c = self.next()
- if c not in u'\\"n' + c_open + c_close:
+ if c not in u'\\"n' + self.c_open + self.c_close:
self.syntax_error('Illeal escape sequence in expression')
if c == 'n':
expr.append(u'\n')
else:
expr.append(c)
exl.append(c)
- elif self.nexts(c_open):
+ elif self.nexts(self.c_open):
# Parse a variable name in '{}'
varname = self.p_VARNAME()
- self.expect(c_close)
+ self.expect(self.c_close)
exl.append(varname)
expr.append(self.tree.lookup(varname))
else:
# Concatenate consecutive characters in expr
expr2 = []
for i in expr:
- if expr2 and isinstance(expr2[-1], unicode):
+ if expr2 and isinstance(expr2[-1], unicode) and isinstance(i, unicode):
expr2[-1] = expr2[-1] + i
else:
expr2.append(i)
- return ConfigExpression(tuple(expr2), exs)
+ return conf.ConfigExpression(expr2, exs)
+
def p_FORMULA(self):
+ self.dbg() # Debug
self.p_WS()
# Combined logical formula
if self.nexts(u'('):
f1 = self.p_FORMULA()
self.p_WS()
- if self.nexts(c_and):
+ if self.nexts(self.c_and):
+ if self.peek(1).isalnum():
+ self.syntax_error('trailing characters after %r', self.c_and)
f2 = self.p_FORMULA()
self.p_WS()
self.expect(u')')
return ('AND', f1, f2)
- elif self.nexts(c_or):
+ elif self.nexts(self.c_or):
+ if self.peek(1).isalnum():
+ self.syntax_error('trailing characters after %r', self.c_or)
f2 = self.p_FORMULA()
self.p_WS()
self.expect(u')')
return f1
else:
self.syntax_error("Logic operator or ')' expected")
- elif self.nexts(c_not):
+ elif self.nexts(self.c_not):
+ if self.peek().isalnum():
+ self.syntax_error('trailing characters after %r', self.c_not)
# 'not' formula
f = self.p_FORMULA()
return ('NOT', f)
# Should be (in)equality condition
e1 = self.p_EXPRESSION()
self.p_WS()
- if self.nexts(c_eq):
+ if self.nexts(self.c_eq):
self.p_WS()
e2 = self.p_EXPRESSION()
return ('==', e1, e2)
- elif self.nexts(c_neq):
+ elif self.nexts(self.c_neq):
self.p_WS()
e2 = self.p_EXPRESSION()
return ('!=', e1, e2)