From 067c6380413ee589e57ad1d15f3914df2cfc9062 Mon Sep 17 00:00:00 2001 From: Tomas Gavenciak Date: Sun, 23 May 2010 12:00:41 -0400 Subject: [PATCH] Complete, but untested config parser. --- t/moe/confparser.py | 245 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 243 insertions(+), 2 deletions(-) diff --git a/t/moe/confparser.py b/t/moe/confparser.py index 58a7dad..ed73a1c 100644 --- a/t/moe/confparser.py +++ b/t/moe/confparser.py @@ -11,7 +11,8 @@ TODO: should whitespace (incl. '\\n') be allowed (almost) everywhere? Generally, whitespace and comments are alowed everywhere except in variable names and inside expressions. Also, COMMENT must not contain '\\n'. -FILE, BLOCK, STATEMENT, OPERATION, SUBTREE, CONDITION, FORMULA, AND, OR and NOT eat any preceding whitespace. + +FILE, BLOCK, STATEMENT, OPERATION, SUBTREE, CONDITION, FORMULA, AND, OR and NOT eat any preceding whitespace. TODO: check? The configuration syntax is the following: @@ -34,19 +35,259 @@ AND = FORMULA WS 'and' FORMULA OR = FORMULA WS 'or' FORMULA NOT = WS 'not' FORMULA +NOTE: Formula may contain additional/extra parentheses + EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'" ECHAR = re('([^\\{}]|\\\\|\\{|\\}|\\n)*') +VARNAME = re('[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)*') """ import re, logging as log + +class ConfSyntaxError(Exception): + # TODO: choose better superclass + def __init__(self, msg, fname='', line=None, column=None): + self.msg = msg + self.fname = fname + self.line = line + self.column = column + def __str__(self): + return('ConfSyntaxError %s:%d:%d: %s'%(self.fname, self.line, self.column, self.msg)) -c_tree_sep = u'.' +c_varname_sep = u'.' c_comment = u'#' c_open = u'{' c_close = u'}' +c_ws = u' \t\n' +c_sep = u';\n' +c_nl = u'\n' c_if = u'if' +c_and = u'and' +c_or = u'or' +c_not = u'not' +c_eq = u'==' +c_neq = u'!=' +c_set = u'=' +c_append = u'+=' "Variable name regexp, dots (separators) must be separated from edges and each other." re_VARNAME = re.compile(r'\A([A-Za-z0-9_-]+\.)*[A-Za-z0-9_-]+\Z') +class ConfParser(object): + def __init__(self, f, tree, fname=''): + self.f = f # Stream + self.fname = fname # Filename + self.line = 1 + self.col = 1 + self.tree = tree # ConfTree to fill + self.prefix = '' # Prefix of variable name, may begin with '.' + self.conds = [] # Stack of nested conditions, these are chained, so only the last is necessary + def peek(self, l = 1): + "Peek and return next `l` unicode characters." + # TODO + return '' + def peeks(self, s): + "Peek and compare next `len(s)` characters to `s`. Unicode." + s = unicode(s) + return self.peek(len(s)) == s + return True + def next(self, l = 1): + "Eat and return next `l` unicode characters." + # TODO + return '' + def nexts(self, s): + "Compare next `len(s)` characters to `s`, eat them and return True if they match. Unicode." + s = unicode(s) + return self.next(len(s)) == s + def eof(self): + "Check for end-of-stream." + # TODO + return False + def expected(self, s, msg=None): + "Eat and compare next `len(s)` characters to `s`. If not equal, raise an error with `msg`. Unicode." + s = unicode(s) + if not self.nexts(s): + raise self.syntaxError(msg or u"%r expected."%(s,)) + def syntaxError(self, msg, *args): + "Raise a syntax error with file/line/column info" + raise ConfSyntaxError(fname=self.fname, line=self.line, column=self.column, msg=(msg%args)) + def parse(self): + p_BLOCK(self) + def p_BLOCK(self): + self.p_WS() + while not self.eof() and not f.peek(c_close): + self.p_STATEMENT() + slef.p_WS() + if not self.peek() in c_sep: + break + self.p_SEP() + self.p_WS() + def p_WS(): + while not self.eof(): + if self.peek() in c_ws: + self.next() + elif self.peeks(c_comment): + self.p_COMMENT() + else: + break + def p_COMMENT(self): + self.expect(c_comment, "'#' expected at the beginning of a comment.") + while not self.eof() and not self.nexts(c_nl): + pass + self.eof() or self.expect(c_nl) + def p_STATEMENT(self): + self.p_WS() + if self.peeks(c_if): + self.p_CONDITION() + else: + # for operation or subtree, read VARNAME + varname = self.p_VARNAME() + self.p_WS() + if self.nexts(c_open): + self.p_BLOCK(varname) + self.p_WS() + self.expect(c_close) + else: + self.p_OPERATION(varname) + def p_SUBTREE(self, varname=None): + if not varname: + self.p_WS() + varname = self.p_VARNAME() + self.p_WS() + self.expect(c_open) + # backup and extend the variable name prefix + p = self.prefix + self.prefix = p + c_varname_sep + varname + self.p_BLOCK() + self.prefix = p + # close block and + self.p_WS() + self.expect(c_close) + def p_OPERATION(self, varname=None): + if not varname: + self.p_WS() + varname = self.p_VARNAME() + self.p_WS() + if self.nexts(c_set): + op = 'SET' + elif self.nexts(c_append): + op = 'APPEND' + else: + self.syntaxError('Unknown operation.') + self.p_WS() + exp = self.p_EXPRESSION() + v = self.tree.lookup((self.prefix+c_varname_sep+varname).lstrip(c_varname_sep)) + if self.conditions: + cnd = self.conditions[-1] + else: + cnd = None + v.add_operation(op, cnd, exp, self.priority) + def p_CONDITION(self): + self.p_WS() + self.expect(c_if) + self.p_WS() + f = p_FORMULA(self) + cnd = ConfigCondition(f) + self.conditions.append(cnd) + # Parse a block + self.p_WS() + self.expect(c_open) + self.p_BLOCK() + self.p_WS() + self.expect(c_close) + # Cleanup + self.conditions.pop() + def p_VARNAME(self): + vnl = [] + while self.peek().isalnum() or self.peek() in u'-_': + vnl.append(self.next()) + vn = u''.join(vnl) + if not re_VARNAME.match(vn): + self.syntax_error('Invalid variable name') + return vn + def p_EXPRESSION(self): + op = self.next() + if op not in '\'"': + self.syntax_error('Invalid start of expression') + # Parse literal expression + if op == u'\'': + exl = [] + while not self.peeks(op): + exl.append(self.next()) + self.expect(op) + s = u''.join(exl) + return ConfigExpression((s,), s) + # Parse expression with variables + exl = [op] + expr = [] + while not self.peeks(op): + exl.append(self.peek()) + if self.nexts(u'\\'): + # Escape sequence + c = self.next() + if c not in u'\\"n' + c_open + c_close: + self.syntax_error('Illeal escape sequence in expression') + if c == 'n': + expr.append(u'\n') + else: + expr.append(c) + exl.append(c) + elif self.nexts(c_open): + # Parse a variable name in '{}' + varname = self.p_VARNAME() + self.expect(c_close) + exl.append(varname) + expr.append(self.tree.lookup(varname)) + else: + # Regular character + expr.append(self.next()) + self.expect(op) + exs = ''.join(exl) + # Concatenate consecutive characters in expr + expr2 = [] + for i in expr: + if expr2 and isinstance(expr2[-1], unicode): + expr2[-1] = expr2[-1] + i + else: + expr2.append(i) + return ConfigExpression(tuple(expr2), exs) + def p_FORMULA(self): + self.p_WS() + # Combined logical formula + if self.nexts(u'('): + f1 = self.p_FORMULA() + self.p_WS() + if self.nexts(c_and): + f2 = self.p_FORMULA() + self.p_WS() + self.expect(u')') + return ('AND', f1, f2) + elif self.nexts(c_or): + f2 = self.p_FORMULA() + self.p_WS() + self.expect(u')') + return ('OR', f1, f2) + elif self.nexts(u')'): + # Only extra parenthes + return f1 + else: + self.syntax_error("Logic operator or ')' expected") + elif self.nexts(c_not): + # 'not' formula + f = self.p_FORMULA() + return ('NOT', f) + else: + # Should be (in)equality condition + e1 = self.p_EXPRESSION() + self.p_WS() + if self.nexts(c_eq): + self.p_WS() + e2 = self.p_EXPRESSION() + return ('==', e1, e2) + elif self.nexts(c_neq): + self.p_WS() + e2 = self.p_EXPRESSION() + return ('!=', e1, e2) + else: + self.syntax_error("Comparation operator expected") -- 2.39.2