]> mj.ucw.cz Git - moe.git/commitdiff
Complete, but untested config parser.
authorTomas Gavenciak <gavento@matfyz.cz>
Sun, 23 May 2010 16:00:41 +0000 (12:00 -0400)
committerTomas Gavenciak <gavento@matfyz.cz>
Sun, 23 May 2010 16:00:41 +0000 (12:00 -0400)
t/moe/confparser.py

index 58a7dad42907b159a924d299f30fdf948021960d..ed73a1c804cbb781cffc9ec00d12f4e3735976b6 100644 (file)
@@ -11,7 +11,8 @@ TODO: should whitespace (incl. '\\n') be allowed (almost) everywhere?
 
 Generally, whitespace and comments are alowed everywhere except in variable names and inside expressions. 
 Also, COMMENT must not contain '\\n'. 
-FILE, BLOCK, STATEMENT, OPERATION, SUBTREE, CONDITION, FORMULA, AND, OR and NOT eat any preceding whitespace.
+
+FILE, BLOCK, STATEMENT, OPERATION, SUBTREE, CONDITION, FORMULA, AND, OR and NOT eat any preceding whitespace. TODO: check?
 
 The configuration syntax is the following:
 
@@ -34,19 +35,259 @@ AND = FORMULA WS 'and' FORMULA
 OR = FORMULA WS 'or' FORMULA
 NOT = WS 'not' FORMULA 
 
+NOTE: Formula may contain additional/extra parentheses
+
 EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'"
 ECHAR = re('([^\\{}]|\\\\|\\{|\\}|\\n)*')
+VARNAME = re('[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)*')
 """
 
 import re, logging as log
+    
+class ConfSyntaxError(Exception):
+  # TODO: choose better superclass
+  def __init__(self, msg, fname='<unknown>', line=None, column=None):
+    self.msg = msg
+    self.fname = fname
+    self.line = line
+    self.column = column
+  def __str__(self):
+    return('ConfSyntaxError %s:%d:%d: %s'%(self.fname, self.line, self.column, self.msg))
 
-c_tree_sep = u'.'
+c_varname_sep = u'.'
 c_comment = u'#'
 c_open = u'{'
 c_close = u'}'
+c_ws = u' \t\n'
+c_sep = u';\n'
+c_nl = u'\n'
 c_if = u'if'
+c_and = u'and'
+c_or = u'or'
+c_not = u'not'
+c_eq = u'=='
+c_neq = u'!='
+c_set = u'='
+c_append = u'+='
 
 "Variable name regexp, dots (separators) must be separated from edges and each other."
 re_VARNAME = re.compile(r'\A([A-Za-z0-9_-]+\.)*[A-Za-z0-9_-]+\Z')
 
+class ConfParser(object):
+  def __init__(self, f, tree, fname='<unknown>'):
+    self.f = f         # Stream
+    self.fname = fname # Filename
+    self.line = 1      
+    self.col = 1
+    self.tree = tree   # ConfTree to fill
+    self.prefix = ''   # Prefix of variable name, may begin with '.'
+    self.conds = []    # Stack of nested conditions, these are chained, so only the last is necessary
+  def peek(self, l = 1):
+    "Peek and return next `l` unicode characters."
+    # TODO
+    return ''
+  def peeks(self, s):
+    "Peek and compare next `len(s)` characters to `s`. Unicode."
+    s = unicode(s)
+    return self.peek(len(s)) == s
+    return True
+  def next(self, l = 1):
+    "Eat and return next `l` unicode characters."
+    # TODO
+    return ''
+  def nexts(self, s):
+    "Compare next `len(s)` characters to `s`, eat them and return True if they match. Unicode."
+    s = unicode(s)
+    return self.next(len(s)) == s
+  def eof(self):
+    "Check for end-of-stream."
+    # TODO
+    return False
+  def expected(self, s, msg=None):
+    "Eat and compare next `len(s)` characters to `s`. If not equal, raise an error with `msg`. Unicode."
+    s = unicode(s)
+    if not self.nexts(s): 
+      raise self.syntaxError(msg or u"%r expected."%(s,))
+  def syntaxError(self, msg, *args):
+    "Raise a syntax error with file/line/column info"
+    raise ConfSyntaxError(fname=self.fname, line=self.line, column=self.column, msg=(msg%args))
+  def parse(self):
+    p_BLOCK(self)
+  def p_BLOCK(self):
+    self.p_WS()
+    while not self.eof() and not f.peek(c_close):
+      self.p_STATEMENT()
+      slef.p_WS()
+      if not self.peek() in c_sep:
+       break
+      self.p_SEP()
+      self.p_WS()
+  def p_WS():
+    while not self.eof():
+      if self.peek() in c_ws:
+       self.next()
+      elif self.peeks(c_comment):
+       self.p_COMMENT()
+      else:
+       break
+  def p_COMMENT(self):
+    self.expect(c_comment, "'#' expected at the beginning of a comment.")
+    while not self.eof() and not self.nexts(c_nl):
+      pass
+    self.eof() or self.expect(c_nl)
+  def p_STATEMENT(self):
+    self.p_WS()
+    if self.peeks(c_if):
+      self.p_CONDITION()
+    else:
+      # for operation or subtree, read VARNAME
+      varname = self.p_VARNAME()
+      self.p_WS()
+      if self.nexts(c_open):
+       self.p_BLOCK(varname)
+       self.p_WS()
+       self.expect(c_close)
+      else:
+       self.p_OPERATION(varname)
+  def p_SUBTREE(self, varname=None):
+    if not varname:
+      self.p_WS()
+      varname = self.p_VARNAME()
+    self.p_WS()
+    self.expect(c_open)
+    # backup and extend the variable name prefix 
+    p = self.prefix
+    self.prefix = p + c_varname_sep + varname
+    self.p_BLOCK()
+    self.prefix = p
+    # close block and 
+    self.p_WS()
+    self.expect(c_close)
+  def p_OPERATION(self, varname=None):
+    if not varname:
+      self.p_WS()
+      varname = self.p_VARNAME()
+    self.p_WS()
+    if self.nexts(c_set):
+      op = 'SET'
+    elif self.nexts(c_append):
+      op = 'APPEND'
+    else:
+      self.syntaxError('Unknown operation.')
+    self.p_WS()
+    exp = self.p_EXPRESSION()
+    v = self.tree.lookup((self.prefix+c_varname_sep+varname).lstrip(c_varname_sep))
+    if self.conditions:
+      cnd = self.conditions[-1]
+    else:
+      cnd = None
+    v.add_operation(op, cnd, exp, self.priority) 
+  def p_CONDITION(self):
+    self.p_WS()
+    self.expect(c_if)
+    self.p_WS()
+    f = p_FORMULA(self)
+    cnd = ConfigCondition(f)
+    self.conditions.append(cnd)
+    # Parse a block
+    self.p_WS()
+    self.expect(c_open)
+    self.p_BLOCK()
+    self.p_WS()
+    self.expect(c_close)
+    # Cleanup
+    self.conditions.pop()
+  def p_VARNAME(self):
+    vnl = []
+    while self.peek().isalnum() or self.peek() in u'-_':
+      vnl.append(self.next())
+    vn = u''.join(vnl)
+    if not re_VARNAME.match(vn):
+      self.syntax_error('Invalid variable name')
+    return vn
+  def p_EXPRESSION(self):
+    op = self.next()
+    if op not in '\'"':
+      self.syntax_error('Invalid start of expression')
+    # Parse literal expression 
+    if op == u'\'':
+      exl = []
+      while not self.peeks(op):
+       exl.append(self.next())
+      self.expect(op)
+      s = u''.join(exl)
+      return ConfigExpression((s,), s)
+    # Parse expression with variables
+    exl = [op]
+    expr = []
+    while not self.peeks(op):
+      exl.append(self.peek())
+      if self.nexts(u'\\'):
+       # Escape sequence
+       c = self.next()
+       if c not in u'\\"n' + c_open + c_close:
+         self.syntax_error('Illeal escape sequence in expression')
+       if c == 'n':
+         expr.append(u'\n')
+       else:
+         expr.append(c)
+       exl.append(c)
+      elif self.nexts(c_open):
+       # Parse a variable name in '{}'
+       varname = self.p_VARNAME()
+        self.expect(c_close)
+       exl.append(varname)
+       expr.append(self.tree.lookup(varname))
+      else:
+       # Regular character
+       expr.append(self.next())
+    self.expect(op)
+    exs = ''.join(exl)
+    # Concatenate consecutive characters in expr
+    expr2 = []
+    for i in expr:
+      if expr2 and isinstance(expr2[-1], unicode):
+       expr2[-1] = expr2[-1] + i
+      else:
+       expr2.append(i)
+    return ConfigExpression(tuple(expr2), exs)
+  def p_FORMULA(self):
+    self.p_WS()
+    # Combined logical formula
+    if self.nexts(u'('):
+      f1 = self.p_FORMULA()
+      self.p_WS()
+      if self.nexts(c_and):
+       f2 = self.p_FORMULA()
+       self.p_WS()
+       self.expect(u')')
+       return ('AND', f1, f2)
+      elif self.nexts(c_or):
+       f2 = self.p_FORMULA()
+       self.p_WS()
+       self.expect(u')')
+       return ('OR', f1, f2)
+      elif self.nexts(u')'):
+       # Only extra parenthes
+       return f1
+      else:
+       self.syntax_error("Logic operator or ')' expected")
+    elif self.nexts(c_not):
+      # 'not' formula
+      f = self.p_FORMULA()
+      return ('NOT', f)
+    else:
+      # Should be (in)equality condition
+      e1 = self.p_EXPRESSION()
+      self.p_WS()
+      if self.nexts(c_eq):
+       self.p_WS()
+       e2 = self.p_EXPRESSION()
+       return ('==', e1, e2)
+      elif self.nexts(c_neq):
+       self.p_WS()
+       e2 = self.p_EXPRESSION()
+       return ('!=', e1, e2)
+      else:
+       self.syntax_error("Comparation operator expected")