5 Simple Moe configuration file syntax parser.
7 TODO: decide neccessity of '()' in/around formulas
8 TODO: check escaping in expressions
9 TODO: should whitespace (incl. '\\n') be allowed (almost) everywhere?
10 can comment be anywhere whitespace can?
12 Generally, whitespace and comments are alowed everywhere except in variable names and inside expressions.
13 Also, COMMENT must not contain '\\n'.
15 FILE, BLOCK, STATEMENT, OPERATION, SUBTREE, CONDITION, FORMULA, AND, OR and NOT eat any preceding whitespace. TODO: check?
17 The configuration syntax is the following:
20 BLOCK = WS | STATEMENT ( SEP STATEMENT )*
23 WS = ( ' ' | '\\t' | '\\n' | COMMENT )*
25 COMMENT = re('#[^\\n]*\\n')
27 STATEMENT = CONDITION | OPERATION | SUBTREE
29 OPERATION = WS VARNAME WS ( '=' | '+=' ) WS EXPRESSION
30 SUBTREE = WS VARNAME WS '{' BLOCK WS '}'
31 CONDITION = WS 'if' FORMULA WS '{' BLOCK WS '}'
33 FORMULA = WS (( EXPRESSION WS ( '!=' | '==' ) WS EXPRESSION ) | '(' AND WS ')' | '(' OR WS ')' | NOT )
34 AND = FORMULA WS 'and' FORMULA
35 OR = FORMULA WS 'or' FORMULA
36 NOT = WS 'not' FORMULA
38 NOTE: ';' or '\n' is currently required even after CONDITION and SUBTREE block
39 TODO: change to OPERATION only
40 NOTE: Formula may contain additional/extra parentheses
42 EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'" | VARNAME
43 ECHAR = re('([^\\{}]|\\\\|\\{|\\}|\\n)*')
44 VARNAME = re('[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)*')
47 import re, types, itertools, logging as log
50 import moe.config as cf
53 class ConfigSyntaxError(cf.ConfigError):
55 def __init__(self, msg, source='<unknown>', line=None, column=None):
62 return('ConfigSyntaxError %s:%d:%d: %s'%(self.source, self.line, self.column, self.msg))
65 class ConfigParser(object):
82 def __init__(self, s, tree, source='<unknown>', level=0):
83 """Create a config file parser.
84 `s` is either a string, unicode or an open file. File is assumed to be utf-8, string is converted to unicode.
85 `tree` is a ConfigTree to fill the operations into.
86 `source` is an optional name of the file, for debugging and syntax errors.
87 `level` indicates the precedence the operations should have in the ConfigTree
89 self.s = s # Unicode, ascii string or an open file
90 self.buf = u"" # Read-buffer for s file, whole unicode string for s string/unicode
91 if isinstance(self.s, types.StringTypes):
92 self.buf = unicode(self.s)
93 elif (not isinstance(self.s, file)) or self.s.closed:
94 raise TypeError("Expected unicode, str or open file.")
96 self.source = source # Usually filename
99 self.tree = tree # ConfTree to fill
100 self.level = level # level of the parsed operations
101 self.prefix = '' # Prefix of variable name, may begin with '.'
102 self.conditions = [] # Stack of nested conditions, these are chained, so only the last is necessary
103 self.read_ops = [] # List of parsed operations (varname, `Operation`), returned by `self.parse()`
105 def preread(self, l):
106 "Make sure buf contains at least `l` next characters, return True on succes and False on hitting EOF."
107 if isinstance(self.s, file):
108 self.buf = self.buf[self.bufpos:] + self.s.read(max(l, 1024)).decode('utf8')
110 return len(self.buf) >= self.bufpos + l
112 def peek(self, l = 1):
113 "Peek and return next `l` unicode characters or everything until EOF."
115 return self.buf[self.bufpos:self.bufpos+l]
118 "Peek and compare next `len(s)` characters to `s`. Converts `s` to unicode. False on hitting EOF."
120 return self.peek(len(s)) == s
122 def next(self, l = 1):
123 "Eat and return next `l` unicode characters. Raise exception on EOF."
124 if not self.preread(l):
125 self.syntax_error("Unexpected end of file")
126 s = self.buf[self.bufpos:self.bufpos+l]
134 self.line += s.count('\n')
135 self.column = l - rnl - 1
139 """Compare next `len(s)` characters to `s`. On match, eat them and return True. Otherwise just return False.
140 Converts `s` to unicode. False on hitting EOF."""
148 "Check for end-of-stream."
149 return not self.preread(1)
151 def expect(self, s, msg=None):
152 "Eat and compare next `len(s)` characters to `s`. If not equal, raise an error with `msg`. Unicode."
154 if not self.nexts(s):
155 self.syntax_error(msg or u"%r expected."%(s,))
157 def syntax_error(self, msg, *args):
158 "Raise a syntax error with file/line/column info"
159 raise ConfigSyntaxError(source=self.source, line=self.line, column=self.column, msg=(msg%args))
163 for i in traceback.extract_stack():
167 if n: log.debug(s + n + ' ' + repr(self.peek(15)) + '...')
177 while (not self.eof()) and (not self.peeks(self.c_close)):
181 if self.eof() or self.peeks(self.c_close):
183 if self.line == l0: # No newline skipped in p_WS
186 self.nexts(';') # NOTE: this is weird - can ';' occur anywhere? Or at most once, but only after any p_WS debris?
191 while not self.eof():
192 if self.peek() in self.c_ws:
194 elif self.peeks(self.c_comment):
201 self.expect(self.c_comment, "'#' expected at the beginning of a comment.")
202 while (not self.eof()) and (not self.nexts(self.c_nl)):
205 def p_STATEMENT(self):
208 if self.peeks(self.c_if):
211 # for operation or subtree, read VARNAME
212 varname = self.p_VARNAME()
214 if self.peeks(self.c_open):
215 self.p_SUBTREE(varname)
217 self.p_OPERATION(varname)
219 def p_SUBTREE(self, varname=None):
223 varname = self.p_VARNAME()
225 self.expect(self.c_open)
226 # backup and extend the variable name prefix
228 self.prefix = p + self.c_varname_sep + varname
233 self.expect(self.c_close)
235 def p_OPERATION(self, varname=None):
239 varname = self.p_VARNAME()
241 if self.nexts(self.c_set):
243 elif self.nexts(self.c_append):
246 self.syntax_error('Unknown operation.')
248 exp = self.p_EXPRESSION()
249 vname = (self.prefix+self.c_varname_sep+varname).lstrip(self.c_varname_sep)
250 v = self.tree.lookup(vname)
252 cnd = self.conditions[-1]
255 op = cf.Operation(op, cnd, exp, level=self.level,
256 source="%s:%d:%d"%(self.source, self.line, self.column))
257 # NOTE/WARNING: The last character of operation will be reported in case of error.
259 self.read_ops.append( (vname, op) )
261 def p_CONDITION(self):
264 t = u"condition at %s:%d:%d"%(self.source, self.line, self.column)
265 self.expect(self.c_if)
268 cnd = cf.ConfigCondition(f, text=t, parent=(self.conditions and self.conditions[-1]) or None)
269 self.conditions.append(cnd)
272 self.expect(self.c_open)
275 self.expect(self.c_close)
277 self.conditions.pop()
282 while self.preread(1) and (self.peek().isalnum() or self.peek() in u'-_.'):
283 vnl.append(self.next())
285 if not cf.re_VARNAME.match(vn):
286 self.syntax_error('Invalid variable name %r', vn)
289 def p_EXPRESSION(self):
291 if self.peek() not in '\'"':
292 # Expect a variable name
293 varname = self.p_VARNAME()
294 return cf.ConfigExpression((self.tree.lookup(varname),), varname)
296 # Parse literal expression
299 while not self.peeks(op):
300 exl.append(self.next())
303 return cf.ConfigExpression((s,), s)
304 # Parse expression with variables
307 while not self.peeks(op):
308 exl.append(self.peek())
309 if self.nexts(u'\\'):
312 if c not in u'\\"n' + self.c_open + self.c_close:
313 self.syntax_error('Illeal escape sequence in expression')
319 elif self.nexts(self.c_open):
320 # Parse a variable name in '{}'
321 varname = self.p_VARNAME()
322 self.expect(self.c_close)
324 expr.append(self.tree.lookup(varname))
327 expr.append(self.next())
330 # Concatenate consecutive characters in expr
333 if expr2 and isinstance(expr2[-1], unicode) and isinstance(i, unicode):
334 expr2[-1] = expr2[-1] + i
337 return cf.ConfigExpression(expr2, exs)
342 # Combined logical formula
344 f1 = self.p_FORMULA()
346 if self.nexts(self.c_and):
347 if self.peek(1).isalnum():
348 self.syntax_error('trailing characters after %r', self.c_and)
349 f2 = self.p_FORMULA()
352 return ('AND', f1, f2)
353 elif self.nexts(self.c_or):
354 if self.peek(1).isalnum():
355 self.syntax_error('trailing characters after %r', self.c_or)
356 f2 = self.p_FORMULA()
359 return ('OR', f1, f2)
360 elif self.nexts(u')'):
361 # Only extra parenthes
364 self.syntax_error("Logic operator or ')' expected")
365 elif self.nexts(self.c_not):
366 if self.peek().isalnum():
367 self.syntax_error('trailing characters after %r', self.c_not)
372 # Should be (in)equality condition
373 e1 = self.p_EXPRESSION()
375 if self.nexts(self.c_eq):
377 e2 = self.p_EXPRESSION()
378 return ('==', e1, e2)
379 elif self.nexts(self.c_neq):
381 e2 = self.p_EXPRESSION()
382 return ('!=', e1, e2)
384 self.syntax_error("Comparation operator expected")