5 Simple Moe configuration file syntax parser.
7 TODO: decide neccessity of '()' in/around formulas
8 TODO: check escaping in expressions
9 TODO: should whitespace (incl. '\\n') be allowed (almost) everywhere?
10 can comment be anywhere whitespace can?
12 Generally, whitespace and comments are alowed everywhere except in variable names and inside expressions.
13 Also, COMMENT must not contain '\\n'.
15 FILE, BLOCK, STATEMENT, OPERATION, SUBTREE, CONDITION, FORMULA, AND, OR and NOT eat any preceding whitespace. TODO: check?
17 The configuration syntax is the following:
20 BLOCK = WS | STATEMENT ( SEP STATEMENT )*
23 WS = ( ' ' | '\\t' | '\\n' | COMMENT )*
25 COMMENT = re('#[^\\n]*\\n')
27 STATEMENT = CONDITION | OPERATION | SUBTREE
29 OPERATION = WS VARNAME WS ( '=' | '+=' ) WS EXPRESSION
30 SUBTREE = WS VARNAME WS '{' BLOCK WS '}'
31 CONDITION = WS 'if' FORMULA WS '{' BLOCK WS '}'
33 FORMULA = WS (( EXPRESSION WS ( '!=' | '==' ) WS EXPRESSION ) | '(' AND WS ')' | '(' OR WS ')' | NOT )
34 AND = FORMULA WS 'and' FORMULA
35 OR = FORMULA WS 'or' FORMULA
36 NOT = WS 'not' FORMULA
38 NOTE: ';' or '\n' is currently required even after CONDITION and SUBTREE block
39 TODO: change to OPERATION only
40 NOTE: Formula may contain additional/extra parentheses
42 EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'" | VARNAME
43 ECHAR = re('([^\\{}]|\\\\|\\{|\\}|\\n)*')
44 VARNAME = re('[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)*')
47 import re, types, itertools, logging as log
49 import moe.conf as conf
52 class ConfigSyntaxError(conf.ConfigError):
54 def __init__(self, msg, fname='<unknown>', line=None, column=None):
61 return('ConfigSyntaxError %s:%d:%d: %s'%(self.fname, self.line, self.column, self.msg))
64 class ConfigParser(object):
81 def __init__(self, s, tree, fname='<unknown>', level=0):
82 """Create a config file parser.
83 `s` is either a string, unicode or an open file. File is assumed to be utf-8, string is converted to unicode.
84 `tree` is a ConfigTree to fill the operations into.
85 `fname` is an optional name of the file, for debugging and syntax errors.
86 `level` indicates the precedence the operations should have in the ConfigTree
88 self.s = s # Unicode, ascii string or an open file
89 self.buf = u"" # Read-buffer for s file, whole unicode string for s string/unicode
90 if isinstance(self.s, types.StringTypes):
91 self.buf = unicode(self.s)
92 elif (not isinstance(self.s, file)) or self.s.closed:
93 raise TypeError("Expected unicode, str or open file.")
95 self.fname = fname # Filename
98 self.tree = tree # ConfTree to fill
99 self.level = level # level of the parsed operations
100 self.prefix = '' # Prefix of variable name, may begin with '.'
101 self.conditions = [] # Stack of nested conditions, these are chained, so only the last is necessary
102 self.read_ops = [] # List of parsed operations (varname, `Operation`), returned by `self.parse()`
104 def preread(self, l):
105 "Make sure buf contains at least `l` next characters, return True on succes and False on hitting EOF."
106 if isinstance(self.s, file):
107 self.buf = self.buf[self.bufpos:] + self.s.read(max(l, 1024)).decode('utf8')
109 return len(self.buf) >= self.bufpos + l
111 def peek(self, l = 1):
112 "Peek and return next `l` unicode characters or everything until EOF."
114 return self.buf[self.bufpos:self.bufpos+l]
117 "Peek and compare next `len(s)` characters to `s`. Converts `s` to unicode. False on hitting EOF."
119 return self.peek(len(s)) == s
121 def next(self, l = 1):
122 "Eat and return next `l` unicode characters. Raise exception on EOF."
123 if not self.preread(l):
124 self.syntax_error("Unexpected end of file")
125 s = self.buf[self.bufpos:self.bufpos+l]
133 self.line += s.count('\n')
134 self.column = l - rnl - 1
138 """Compare next `len(s)` characters to `s`. On match, eat them and return True. Otherwise just return False.
139 Converts `s` to unicode. False on hitting EOF."""
147 "Check for end-of-stream."
148 return not self.preread(1)
150 def expect(self, s, msg=None):
151 "Eat and compare next `len(s)` characters to `s`. If not equal, raise an error with `msg`. Unicode."
153 if not self.nexts(s):
154 self.syntax_error(msg or u"%r expected."%(s,))
156 def syntax_error(self, msg, *args):
157 "Raise a syntax error with file/line/column info"
158 raise ConfigSyntaxError(fname=self.fname, line=self.line, column=self.column, msg=(msg%args))
162 for i in traceback.extract_stack():
166 if n: log.debug(s + n + ' ' + repr(self.peek(15)) + '...')
176 while (not self.eof()) and (not self.peeks(self.c_close)):
180 if self.eof() or self.peeks(self.c_close):
182 if self.line == l0: # No newline skipped in p_WS
185 self.nexts(';') # NOTE: this is weird - can ';' occur anywhere? Or at most once, but only after any p_WS debris?
190 while not self.eof():
191 if self.peek() in self.c_ws:
193 elif self.peeks(self.c_comment):
200 self.expect(self.c_comment, "'#' expected at the beginning of a comment.")
201 while (not self.eof()) and (not self.nexts(self.c_nl)):
204 def p_STATEMENT(self):
207 if self.peeks(self.c_if):
210 # for operation or subtree, read VARNAME
211 varname = self.p_VARNAME()
213 if self.peeks(self.c_open):
214 self.p_SUBTREE(varname)
216 self.p_OPERATION(varname)
218 def p_SUBTREE(self, varname=None):
222 varname = self.p_VARNAME()
224 self.expect(self.c_open)
225 # backup and extend the variable name prefix
227 self.prefix = p + self.c_varname_sep + varname
232 self.expect(self.c_close)
234 def p_OPERATION(self, varname=None):
238 varname = self.p_VARNAME()
240 if self.nexts(self.c_set):
242 elif self.nexts(self.c_append):
245 self.syntax_error('Unknown operation.')
247 exp = self.p_EXPRESSION()
248 vname = (self.prefix+self.c_varname_sep+varname).lstrip(self.c_varname_sep)
249 v = self.tree.lookup(vname)
251 cnd = self.conditions[-1]
254 op = conf.Operation(op, cnd, exp, level=self.level,
255 source="%s:%d:%d"%(self.fname, self.line, self.column))
256 # NOTE/WARNING: The last character of operation will be reported in case of error.
258 self.read_ops.append( (vname, op) )
260 def p_CONDITION(self):
263 t = u"condition at %s:%d:%d"%(self.fname, self.line, self.column)
264 self.expect(self.c_if)
267 cnd = conf.ConfigCondition(f, text=t, parent=(self.conditions and self.conditions[-1]) or None)
268 self.conditions.append(cnd)
271 self.expect(self.c_open)
274 self.expect(self.c_close)
276 self.conditions.pop()
281 while self.preread(1) and (self.peek().isalnum() or self.peek() in u'-_.'):
282 vnl.append(self.next())
284 if not conf.re_VARNAME.match(vn):
285 self.syntax_error('Invalid variable name %r', vn)
288 def p_EXPRESSION(self):
290 if self.peek() not in '\'"':
291 # Expect a variable name
292 varname = self.p_VARNAME()
293 return conf.ConfigExpression((self.tree.lookup(varname),), varname)
295 # Parse literal expression
298 while not self.peeks(op):
299 exl.append(self.next())
302 return conf.ConfigExpression((s,), s)
303 # Parse expression with variables
306 while not self.peeks(op):
307 exl.append(self.peek())
308 if self.nexts(u'\\'):
311 if c not in u'\\"n' + self.c_open + self.c_close:
312 self.syntax_error('Illeal escape sequence in expression')
318 elif self.nexts(self.c_open):
319 # Parse a variable name in '{}'
320 varname = self.p_VARNAME()
321 self.expect(self.c_close)
323 expr.append(self.tree.lookup(varname))
326 expr.append(self.next())
329 # Concatenate consecutive characters in expr
332 if expr2 and isinstance(expr2[-1], unicode) and isinstance(i, unicode):
333 expr2[-1] = expr2[-1] + i
336 return conf.ConfigExpression(expr2, exs)
341 # Combined logical formula
343 f1 = self.p_FORMULA()
345 if self.nexts(self.c_and):
346 if self.peek(1).isalnum():
347 self.syntax_error('trailing characters after %r', self.c_and)
348 f2 = self.p_FORMULA()
351 return ('AND', f1, f2)
352 elif self.nexts(self.c_or):
353 if self.peek(1).isalnum():
354 self.syntax_error('trailing characters after %r', self.c_or)
355 f2 = self.p_FORMULA()
358 return ('OR', f1, f2)
359 elif self.nexts(u')'):
360 # Only extra parenthes
363 self.syntax_error("Logic operator or ')' expected")
364 elif self.nexts(self.c_not):
365 if self.peek().isalnum():
366 self.syntax_error('trailing characters after %r', self.c_not)
371 # Should be (in)equality condition
372 e1 = self.p_EXPRESSION()
374 if self.nexts(self.c_eq):
376 e2 = self.p_EXPRESSION()
377 return ('==', e1, e2)
378 elif self.nexts(self.c_neq):
380 e2 = self.p_EXPRESSION()
381 return ('!=', e1, e2)
383 self.syntax_error("Comparation operator expected")