5 Simple Moe configuration file syntax parser.
7 TODO: decide neccessity of '()' in/around formulas
8 TODO: check escaping in expressions
9 TODO: should whitespace (incl. '\\n') be allowed (almost) everywhere?
10 can comment be anywhere whitespace can?
12 Generally, whitespace and comments are alowed everywhere except in variable names and inside expressions.
13 Also, COMMENT must not contain '\\n'.
15 FILE, BLOCK, STATEMENT, OPERATION, SUBTREE, CONDITION, FORMULA, AND, OR and NOT eat any preceding whitespace. TODO: check?
17 The configuration syntax is the following:
20 BLOCK = WS | STATEMENT ( SEP STATEMENT )*
23 WS = ( ' ' | '\\t' | '\\n' | COMMENT )*
25 COMMENT = re('#[^\\n]*\\n')
27 STATEMENT = CONDITION | OPERATION | SUBTREE
29 OPERATION = WS VARNAME WS ( '=' | '+=' ) WS EXPRESSION
30 SUBTREE = WS VARNAME WS '{' BLOCK WS '}'
31 CONDITION = WS 'if' FORMULA WS '{' BLOCK WS '}'
33 FORMULA = WS (( EXPRESSION WS ( '!=' | '==' ) WS EXPRESSION ) | '(' AND WS ')' | '(' OR WS ')' | NOT )
34 AND = FORMULA WS 'and' FORMULA
35 OR = FORMULA WS 'or' FORMULA
36 NOT = WS 'not' FORMULA
38 NOTE: ';' or '\n' is currently required even after CONDITION and SUBTREE block
39 TODO: change to OPERATION only
40 NOTE: Formula may contain additional/extra parentheses
42 EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'"
43 ECHAR = re('([^\\{}]|\\\\|\\{|\\}|\\n)*')
44 VARNAME = re('[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)*')
47 import re, types, itertools, logging as log
49 import moe.conf as conf
52 class ConfigSyntaxError(conf.ConfigError):
54 def __init__(self, msg, fname='<unknown>', line=None, column=None):
61 return('ConfigSyntaxError %s:%d:%d: %s'%(self.fname, self.line, self.column, self.msg))
64 class ConfigParser(object):
81 def __init__(self, s, tree, fname='<unknown>', level=0):
82 """Create a config file parser.
83 `s` is either a string, unicode or an open file. File is assumed to be utf-8, string is converted to unicode.
84 `tree` is a ConfigTree to fill the operations into.
85 `fname` is an optional name of the file, for debugging and syntax errors.
86 `level` indicates the precedence the operations should have in the ConfigTree
88 self.s = s # Unicode, ascii string or an open file
89 self.buf = u"" # Read-buffer for s file, whole unicode string for s string/unicode
90 if isinstance(self.s, types.StringTypes):
91 self.buf = unicode(self.s)
92 elif (not isinstance(self.s, file)) or self.s.closed:
93 raise TypeError("Expected unicode, str or open file.")
95 self.fname = fname # Filename
98 self.tree = tree # ConfTree to fill
99 self.level = level # level of the parsed operations
100 self.prefix = '' # Prefix of variable name, may begin with '.'
101 self.conditions = [] # Stack of nested conditions, these are chained, so only the last is necessary
102 self.read_ops = [] # List of parsed operations (varname, `Operation`), returned by `self.parse()`
104 def preread(self, l):
105 "Make sure buf contains at least `l` next characters, return True on succes and False on hitting EOF."
106 if isinstance(self.s, file):
107 self.buf = self.buf[self.bufpos:] + self.s.read(max(l, 1024)).decode('utf8')
109 return len(self.buf) >= self.bufpos + l
111 def peek(self, l = 1):
112 "Peek and return next `l` unicode characters or everything until EOF."
114 return self.buf[self.bufpos:self.bufpos+l]
117 "Peek and compare next `len(s)` characters to `s`. Converts `s` to unicode. False on hitting EOF."
119 return self.peek(len(s)) == s
121 def next(self, l = 1):
122 "Eat and return next `l` unicode characters. Raise exception on EOF."
123 if not self.preread(l):
124 self.syntax_error("Unexpected end of file")
125 s = self.buf[self.bufpos:self.bufpos+l]
133 self.line += s.count('\n')
134 self.column = l - rnl - 1
138 """Compare next `len(s)` characters to `s`. On match, eat them and return True. Otherwise just return False.
139 Converts `s` to unicode. False on hitting EOF."""
147 "Check for end-of-stream."
148 return not self.preread(1)
150 def expect(self, s, msg=None):
151 "Eat and compare next `len(s)` characters to `s`. If not equal, raise an error with `msg`. Unicode."
153 if not self.nexts(s):
154 self.syntax_error(msg or u"%r expected."%(s,))
156 def syntax_error(self, msg, *args):
157 "Raise a syntax error with file/line/column info"
158 raise ConfigSyntaxError(fname=self.fname, line=self.line, column=self.column, msg=(msg%args))
162 for i in traceback.extract_stack():
166 if n: log.debug(s + n + ' ' + repr(self.peek(15)) + '...')
176 while (not self.eof()) and (not self.peeks(self.c_close)):
180 if self.eof() or self.peeks(self.c_close):
182 if self.line == l0: # No newline skipped in p_WS
185 self.nexts(';') # NOTE: this is weird - can ';' occur anywhere? Or at most once, but only after any p_WS debris?
190 while not self.eof():
191 if self.peek() in self.c_ws:
193 elif self.peeks(self.c_comment):
200 self.expect(self.c_comment, "'#' expected at the beginning of a comment.")
201 while (not self.eof()) and (not self.nexts(self.c_nl)):
204 def p_STATEMENT(self):
207 if self.peeks(self.c_if):
210 # for operation or subtree, read VARNAME
211 varname = self.p_VARNAME()
213 if self.peeks(self.c_open):
214 self.p_SUBTREE(varname)
216 self.p_OPERATION(varname)
218 def p_SUBTREE(self, varname=None):
222 varname = self.p_VARNAME()
224 self.expect(self.c_open)
225 # backup and extend the variable name prefix
227 self.prefix = p + self.c_varname_sep + varname
232 self.expect(self.c_close)
234 def p_OPERATION(self, varname=None):
238 varname = self.p_VARNAME()
240 if self.nexts(self.c_set):
242 elif self.nexts(self.c_append):
245 self.syntax_error('Unknown operation.')
247 exp = self.p_EXPRESSION()
248 vname = (self.prefix+self.c_varname_sep+varname).lstrip(self.c_varname_sep)
249 v = self.tree.lookup(vname)
251 cnd = self.conditions[-1]
254 op = conf.Operation(op, cnd, exp, level=self.level,
255 source="%s:%d:%d"%(self.fname, self.line, self.column))
256 # NOTE/WARNING: The last character of operation will be reported in case of error.
258 self.read_ops.append( (vname, op) )
260 def p_CONDITION(self):
263 t = u"condition at %s:%d:%d"%(self.fname, self.line, self.column)
264 self.expect(self.c_if)
267 cnd = conf.ConfigCondition(f, text=t, parent=(self.conditions and self.conditions[-1]) or None)
268 self.conditions.append(cnd)
271 self.expect(self.c_open)
274 self.expect(self.c_close)
276 self.conditions.pop()
281 while self.peek().isalnum() or self.peek() in u'-_.':
282 vnl.append(self.next())
284 if not conf.re_VARNAME.match(vn):
285 self.syntax_error('Invalid variable name %r', vn)
288 def p_EXPRESSION(self):
292 self.syntax_error('Invalid start of expression')
293 # Parse literal expression
296 while not self.peeks(op):
297 exl.append(self.next())
300 return conf.ConfigExpression((s,), s)
301 # Parse expression with variables
304 while not self.peeks(op):
305 exl.append(self.peek())
306 if self.nexts(u'\\'):
309 if c not in u'\\"n' + self.c_open + self.c_close:
310 self.syntax_error('Illeal escape sequence in expression')
316 elif self.nexts(self.c_open):
317 # Parse a variable name in '{}'
318 varname = self.p_VARNAME()
319 self.expect(self.c_close)
321 expr.append(self.tree.lookup(varname))
324 expr.append(self.next())
327 # Concatenate consecutive characters in expr
330 if expr2 and isinstance(expr2[-1], unicode) and isinstance(i, unicode):
331 expr2[-1] = expr2[-1] + i
334 return conf.ConfigExpression(tuple(expr2), exs)
339 # Combined logical formula
341 f1 = self.p_FORMULA()
343 if self.nexts(self.c_and):
344 if self.peek(1).isalnum():
345 self.syntax_error('trailing characters after %r', self.c_and)
346 f2 = self.p_FORMULA()
349 return ('AND', f1, f2)
350 elif self.nexts(self.c_or):
351 if self.peek(1).isalnum():
352 self.syntax_error('trailing characters after %r', self.c_or)
353 f2 = self.p_FORMULA()
356 return ('OR', f1, f2)
357 elif self.nexts(u')'):
358 # Only extra parenthes
361 self.syntax_error("Logic operator or ')' expected")
362 elif self.nexts(self.c_not):
363 if self.peek().isalnum():
364 self.syntax_error('trailing characters after %r', self.c_not)
369 # Should be (in)equality condition
370 e1 = self.p_EXPRESSION()
372 if self.nexts(self.c_eq):
374 e2 = self.p_EXPRESSION()
375 return ('==', e1, e2)
376 elif self.nexts(self.c_neq):
378 e2 = self.p_EXPRESSION()
379 return ('!=', e1, e2)
381 self.syntax_error("Comparation operator expected")