2 Simple Moe configuration file syntax parser.
4 Generally, whitespace and comments are alowed everywhere except in variable names and inside expressions,
5 ``\\n`` ends a ``COMMENT``.
7 ``FILE``, ``BLOCK``, ``STATEMENT``, ``OPERATION``, ``SUBTREE``, ``CONDITION``, ``FORMULA``, ``AND``, ``OR``
8 and ``NOT`` ignore any preceding whitespace.
12 The configuration syntax is the following::
15 BLOCK = WS | STATEMENT ( SEP STATEMENT )*
18 WS = ( ' ' | '\t' | '\n' | COMMENT )*
20 COMMENT = re('#[^\n]*\n')
22 STATEMENT = CONDITION | OPERATION | SUBTREE
24 OPERATION = WS VARNAME WS ( '=' | '+=' ) WS EXPRESSION
25 SUBTREE = WS VARNAME WS '{' BLOCK WS '}'
26 CONDITION = WS 'if' FORMULA WS '{' BLOCK WS '}'
28 FORMULA = WS (( EXPRESSION WS ( '!=' | '==' ) WS EXPRESSION ) |
29 '(' AND WS ')' | '(' OR WS ')' | NOT )
30 AND = FORMULA WS 'and' FORMULA
31 OR = FORMULA WS 'or' FORMULA
32 NOT = WS 'not' FORMULA
34 EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\n]*'" | VARNAME
35 ECHAR = re('([^\{}]|\\|\{|\}|\\n)*')
36 VARNAME = re('[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)*')
38 .. todo:: should whitespace (incl. '\n') be allowed (almost) everywhere?
39 can comment be anywhere whitespace can?
40 .. note:: ';' or '\n' is currently required even after CONDITION and SUBTREE block
41 .. note:: Formula can contain additional/unnecessary parentheses
44 import re, types, itertools, logging as log
47 import moe.config as cf
52 Escape any ``{``, ``}``, ``"`` and ``\\`` in the given string, making it safe for parsing.
54 s = s.replace('\\', '\\\\')
55 s = s.replace('{', '\\{')
56 s = s.replace('}', '\\}')
57 s = s.replace('"', '\\"')
60 class ConfigSyntaxError(cf.ConfigError):
62 def __init__(self, msg, source='<unknown>', line=None, column=None):
69 return('ConfigSyntaxError %s:%d:%d: %s'%(self.source, self.line, self.column, self.msg))
72 class ConfigParser(object):
89 def __init__(self, s, tree, source='<unknown>', level=0):
90 """Create a config file parser.
91 `s` is either a string, unicode or an open file. File is assumed to be utf-8, string is converted to unicode.
92 `tree` is a ConfigTree to fill the operations into.
93 `source` is an optional name of the file, for debugging and syntax errors.
94 `level` indicates the precedence the operations should have in the ConfigTree
96 self.s = s # Unicode, ascii string or an open file
97 self.buf = u"" # Read-buffer for s file, whole unicode string for s string/unicode
98 if isinstance(self.s, types.StringTypes):
99 self.buf = unicode(self.s)
100 elif (not isinstance(self.s, file)) or self.s.closed:
101 raise TypeError("Expected unicode, str or open file.")
103 self.source = source # Usually filename
106 self.tree = tree # ConfTree to fill
107 self.level = level # level of the parsed operations
108 self.prefix = '' # Prefix of variable name, may begin with '.'
109 self.conditions = [] # Stack of nested conditions, these are chained, so only the last is necessary
110 self.read_ops = [] # List of parsed operations (varname, `Operation`), returned by `self.parse()`
112 def preread(self, l):
113 "Make sure buf contains at least `l` next characters, return True on succes and False on hitting EOF."
114 if isinstance(self.s, file):
115 self.buf = self.buf[self.bufpos:] + self.s.read(max(l, 1024)).decode('utf8')
117 return len(self.buf) >= self.bufpos + l
119 def peek(self, l = 1):
120 "Peek and return next `l` unicode characters or everything until EOF."
122 return self.buf[self.bufpos:self.bufpos+l]
125 "Peek and compare next `len(s)` characters to `s`. Converts `s` to unicode. False on hitting EOF."
127 return self.peek(len(s)) == s
129 def next(self, l = 1):
130 "Eat and return next `l` unicode characters. Raise exception on EOF."
131 if not self.preread(l):
132 self.syntax_error("Unexpected end of file")
133 s = self.buf[self.bufpos:self.bufpos+l]
141 self.line += s.count('\n')
142 self.column = l - rnl - 1
146 """Compare next `len(s)` characters to `s`. On match, eat them and return True. Otherwise just return False.
147 Converts `s` to unicode. False on hitting EOF."""
155 "Check for end-of-stream."
156 return not self.preread(1)
158 def expect(self, s, msg=None):
159 "Eat and compare next `len(s)` characters to `s`. If not equal, raise an error with `msg`. Unicode."
161 if not self.nexts(s):
162 self.syntax_error(msg or u"%r expected."%(s,))
164 def syntax_error(self, msg, *args):
165 "Raise a syntax error with file/line/column info"
166 raise ConfigSyntaxError(source=self.source, line=self.line, column=self.column, msg=(msg%args))
170 for i in traceback.extract_stack():
174 if n: log.debug(s + n + ' ' + repr(self.peek(15)) + '...')
184 while (not self.eof()) and (not self.peeks(self.c_close)):
188 if self.eof() or self.peeks(self.c_close):
190 if self.line == l0: # No newline skipped in p_WS
193 self.nexts(';') # NOTE: this is weird - can ';' occur anywhere? Or at most once, but only after any p_WS debris?
198 while not self.eof():
199 if self.peek() in self.c_ws:
201 elif self.peeks(self.c_comment):
208 self.expect(self.c_comment, "'#' expected at the beginning of a comment.")
209 while (not self.eof()) and (not self.nexts(self.c_nl)):
212 def p_STATEMENT(self):
215 if self.peeks(self.c_if):
218 # for operation or subtree, read VARNAME
219 varname = self.p_VARNAME()
221 if self.peeks(self.c_open):
222 self.p_SUBTREE(varname)
224 self.p_OPERATION(varname)
226 def p_SUBTREE(self, varname=None):
230 varname = self.p_VARNAME()
232 self.expect(self.c_open)
233 # backup and extend the variable name prefix
235 self.prefix = p + self.c_varname_sep + varname
240 self.expect(self.c_close)
242 def p_OPERATION(self, varname=None):
246 varname = self.p_VARNAME()
248 if self.nexts(self.c_set):
250 elif self.nexts(self.c_append):
253 self.syntax_error('Unexpected end of file.')
255 self.syntax_error('Unknown operation: %r...', self.peek(10))
257 exp = self.p_EXPRESSION()
258 vname = (self.prefix+self.c_varname_sep+varname).lstrip(self.c_varname_sep)
259 v = self.tree.lookup(vname)
261 cnd = self.conditions[-1]
264 op = cf.Operation(op, cnd, exp, level=self.level,
265 source="%s:%d:%d"%(self.source, self.line, self.column))
266 # NOTE/WARNING: The last character of operation will be reported in case of error.
268 self.read_ops.append( (vname, op) )
270 def p_CONDITION(self):
273 t = u"condition at %s:%d:%d"%(self.source, self.line, self.column)
274 self.expect(self.c_if)
277 cnd = cf.ConfigCondition(f, text=t, parent=(self.conditions and self.conditions[-1]) or None)
278 self.conditions.append(cnd)
281 self.expect(self.c_open)
284 self.expect(self.c_close)
286 self.conditions.pop()
291 while self.preread(1) and (self.peek().isalnum() or self.peek() in u'-_.'):
292 vnl.append(self.next())
294 if not cf.re_VARNAME.match(vn):
295 self.syntax_error('Invalid variable name %r', vn)
298 def p_EXPRESSION(self):
300 if self.peek() not in '\'"':
301 # Expect a variable name
302 varname = self.p_VARNAME()
303 return cf.ConfigExpression((self.tree.lookup(varname),), varname)
305 # Parse literal expression
308 while not self.peeks(op):
309 exl.append(self.next())
312 return cf.ConfigExpression((s,), s)
313 # Parse expression with variables
316 while not self.peeks(op):
317 exl.append(self.peek())
318 if self.nexts(u'\\'):
321 if c not in u'\\"n' + self.c_open + self.c_close:
322 self.syntax_error('Illeal escape sequence in expression')
328 elif self.nexts(self.c_open):
329 # Parse a variable name in '{}'
330 varname = self.p_VARNAME()
331 self.expect(self.c_close)
333 expr.append(self.tree.lookup(varname))
336 expr.append(self.next())
339 # Concatenate consecutive characters in expr
342 if expr2 and isinstance(expr2[-1], unicode) and isinstance(i, unicode):
343 expr2[-1] = expr2[-1] + i
346 return cf.ConfigExpression(expr2, exs)
351 # Combined logical formula
353 f1 = self.p_FORMULA()
355 if self.nexts(self.c_and):
356 if self.peek(1).isalnum():
357 self.syntax_error('trailing characters after %r', self.c_and)
358 f2 = self.p_FORMULA()
361 return ('AND', f1, f2)
362 elif self.nexts(self.c_or):
363 if self.peek(1).isalnum():
364 self.syntax_error('trailing characters after %r', self.c_or)
365 f2 = self.p_FORMULA()
368 return ('OR', f1, f2)
369 elif self.nexts(u')'):
370 # Only extra parenthes
373 self.syntax_error("Logic operator or ')' expected")
374 elif self.nexts(self.c_not):
375 if self.peek().isalnum():
376 self.syntax_error('trailing characters after %r', self.c_not)
381 # Should be (in)equality condition
382 e1 = self.p_EXPRESSION()
384 if self.nexts(self.c_eq):
386 e2 = self.p_EXPRESSION()
387 return ('==', e1, e2)
388 elif self.nexts(self.c_neq):
390 e2 = self.p_EXPRESSION()
391 return ('!=', e1, e2)
393 self.syntax_error("Comparation operator expected")