5 Simple Moe configuration file syntax parser.
7 TODO: decide neccessity of '()' in/around formulas
8 TODO: check escaping in expressions
9 TODO: should whitespace (incl. '\\n') be allowed (almost) everywhere?
10 can comment be anywhere whitespace can?
12 Generally, whitespace and comments are alowed everywhere except in variable names and inside expressions.
13 Also, COMMENT must not contain '\\n'.
15 FILE, BLOCK, STATEMENT, OPERATION, SUBTREE, CONDITION, FORMULA, AND, OR and NOT eat any preceding whitespace. TODO: check?
17 The configuration syntax is the following:
20 BLOCK = WS | STATEMENT ( SEP STATEMENT )*
23 WS = ( ' ' | '\\t' | '\\n' | COMMENT )*
25 COMMENT = re('#[^\\n]*\\n')
27 STATEMENT = CONDITION | OPERATION | SUBTREE
29 OPERATION = WS VARNAME WS ( '=' | '+=' ) WS EXPRESSION
30 SUBTREE = WS VARNAME WS '{' BLOCK '}'
31 CONDITION = WS 'if' FORMULA WS '{' BLOCK WS '}'
33 FORMULA = WS (( EXPRESSION WS ( '!=' | '==' ) WS EXPRESSION ) | '(' AND WS ')' | '(' OR WS ')' | NOT )
34 AND = FORMULA WS 'and' FORMULA
35 OR = FORMULA WS 'or' FORMULA
36 NOT = WS 'not' FORMULA
38 NOTE: Formula may contain additional/extra parentheses
40 EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'"
41 ECHAR = re('([^\\{}]|\\\\|\\{|\\}|\\n)*')
42 VARNAME = re('[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)*')
45 import re, types, itertools, logging as log
49 class ConfigSyntaxError(Exception):
50 # TODO: choose a better superclass
51 def __init__(self, msg, fname='<unknown>', line=None, column=None):
57 return('ConfigSyntaxError %s:%d:%d: %s'%(self.fname, self.line, self.column, self.msg))
59 "Variable name regexp, dots (separators) must be separated from edges and each other."
60 re_VARNAME = re.compile(r'\A([A-Za-z0-9_-]+\.)*[A-Za-z0-9_-]+\Z')
62 class ConfigParser(object):
78 def __init__(self, s, tree, fname='<unknown>', level=0):
79 """Create a config file parser.
80 `s` is either a string, unicode or an open file. File is assumed to be utf-8, string is converted to unicode.
81 `tree` is a ConfigTree to fill the operations into.
82 `fname` is an optional name of the file, for debugging and syntax errors.
83 `level` indicates the precedence the operations should have in the ConfigTree
85 self.s = s # Unicode, string or an open file
86 self.buf = u"" # Read-buffer for s file, whole unicode string for s string/unicode
87 if isinstance(self.s, types.StringTypes):
88 self.buf = unicode(self.s)
89 elif (not isinstance(self.s, file)) or self.s.closed:
90 raise TypeError("Expected unicode, str or open file.")
92 self.fname = fname # Filename
95 self.tree = tree # ConfTree to fill
96 self.level = level # level of the parsed operations
97 self.prefix = '' # Prefix of variable name, may begin with '.'
98 self.conditions = [] # Stack of nested conditions, these are chained, so only the last is necessary
99 self.read_ops = [] # List of parsed operations (varname, `Operation`), returned by `self.parse()`
100 def preread(self, l):
101 "Make sure buf contains at least `l` next characters, return True on succes and False on hitting EOF."
102 if isinstance(self.s, file):
103 self.buf = self.buf[self.bufpos:] + self.s.read(max(l, 1024)).decode('utf8')
105 return len(self.buf) >= self.bufpos + l
106 def peek(self, l = 1):
107 "Peek and return next `l` unicode characters or everything until EOF."
109 return self.buf[self.bufpos:self.bufpos+l]
111 "Peek and compare next `len(s)` characters to `s`. Converts `s` to unicode. False on hitting EOF."
113 return self.peek(len(s)) == s
114 def next(self, l = 1):
115 "Eat and return next `l` unicode characters. Raise exception on EOF."
116 if not self.preread(l):
117 raise ConfigSyntaxError("Unexpected end of file")
118 s = self.buf[self.bufpos:self.bufpos+l]
126 self.line += s.count('\n')
127 self.column = l - rnl - 1
130 """Compare next `len(s)` characters to `s`. On match, eat them and return True. Otherwise just return False.
131 Converts `s` to unicode. False on hitting EOF."""
138 "Check for end-of-stream."
139 return not self.preread(1)
140 def expect(self, s, msg=None):
141 "Eat and compare next `len(s)` characters to `s`. If not equal, raise an error with `msg`. Unicode."
143 if not self.nexts(s):
144 self.syntax_error(msg or u"%r expected."%(s,))
145 def syntax_error(self, msg, *args):
146 "Raise a syntax error with file/line/column info"
147 raise ConfigSyntaxError(fname=self.fname, line=self.line, column=self.column, msg=(msg%args))
150 for i in traceback.extract_stack():
154 if n: log.debug(s + n + ' ' + repr(self.peek(15)) + '...')
162 while (not self.eof()) and (not self.peeks(self.c_close)):
166 if self.eof() or self.peeks(self.c_close):
168 if self.line == l0: # No newline skipped in p_WS
171 self.nexts(';') # NOTE: this is weird - can ';' occur anywhere? Or at most once, but only after any p_WS debris?
175 while not self.eof():
176 if self.peek() in self.c_ws:
178 elif self.peeks(self.c_comment):
184 self.expect(self.c_comment, "'#' expected at the beginning of a comment.")
185 while (not self.eof()) and (not self.nexts(self.c_nl)):
187 def p_STATEMENT(self):
190 if self.peeks(self.c_if):
193 # for operation or subtree, read VARNAME
194 varname = self.p_VARNAME()
196 if self.peeks(self.c_open):
197 self.p_SUBTREE(varname)
199 self.p_OPERATION(varname)
200 def p_SUBTREE(self, varname=None):
204 varname = self.p_VARNAME()
206 self.expect(self.c_open)
207 # backup and extend the variable name prefix
209 self.prefix = p + self.c_varname_sep + varname
214 self.expect(self.c_close)
215 def p_OPERATION(self, varname=None):
219 varname = self.p_VARNAME()
221 if self.nexts(self.c_set):
223 elif self.nexts(self.c_append):
226 self.syntax_error('Unknown operation.')
228 exp = self.p_EXPRESSION()
229 vname = (self.prefix+self.c_varname_sep+varname).lstrip(self.c_varname_sep)
230 v = self.tree.lookup(vname)
232 cnd = self.conditions[-1]
235 op = conf.Operation(op, cnd, exp, level=self.level,
236 source="%s:%d:%d"%(self.fname, self.line, self.column))
237 # NOTE/WARNING: The last character of operation will be reported in case of error.
239 self.read_ops.append( (vname, op) )
240 def p_CONDITION(self):
243 t = u"condition at %s:%d:%d"%(self.fname, self.line, self.column)
244 self.expect(self.c_if)
247 cnd = conf.ConfigCondition(f, text=t, parent=(self.conditions and self.conditions[-1]) or None)
248 self.conditions.append(cnd)
251 self.expect(self.c_open)
254 self.expect(self.c_close)
256 self.conditions.pop()
260 while self.peek().isalnum() or self.peek() in u'-_.':
261 vnl.append(self.next())
263 if not re_VARNAME.match(vn):
264 self.syntax_error('Invalid variable name %r', vn)
266 def p_EXPRESSION(self):
270 self.syntax_error('Invalid start of expression')
271 # Parse literal expression
274 while not self.peeks(op):
275 exl.append(self.next())
278 return conf.ConfigExpression((s,), s)
279 # Parse expression with variables
282 while not self.peeks(op):
283 exl.append(self.peek())
284 if self.nexts(u'\\'):
287 if c not in u'\\"n' + self.c_open + self.c_close:
288 self.syntax_error('Illeal escape sequence in expression')
294 elif self.nexts(self.c_open):
295 # Parse a variable name in '{}'
296 varname = self.p_VARNAME()
297 self.expect(self.c_close)
299 expr.append(self.tree.lookup(varname))
302 expr.append(self.next())
305 # Concatenate consecutive characters in expr
308 if expr2 and isinstance(expr2[-1], unicode) and isinstance(i, unicode):
309 expr2[-1] = expr2[-1] + i
312 return conf.ConfigExpression(tuple(expr2), exs)
316 # Combined logical formula
318 f1 = self.p_FORMULA()
320 if self.nexts(self.c_and):
321 if self.peek(1).isalnum():
322 self.syntax_error('trailing characters after %r', self.c_and)
323 f2 = self.p_FORMULA()
326 return ('AND', f1, f2)
327 elif self.nexts(self.c_or):
328 if self.peek(1).isalnum():
329 self.syntax_error('trailing characters after %r', self.c_or)
330 f2 = self.p_FORMULA()
333 return ('OR', f1, f2)
334 elif self.nexts(u')'):
335 # Only extra parenthes
338 self.syntax_error("Logic operator or ')' expected")
339 elif self.nexts(self.c_not):
340 if self.peek().isalnum():
341 self.syntax_error('trailing characters after %r', self.c_not)
346 # Should be (in)equality condition
347 e1 = self.p_EXPRESSION()
349 if self.nexts(self.c_eq):
351 e2 = self.p_EXPRESSION()
352 return ('==', e1, e2)
353 elif self.nexts(self.c_neq):
355 e2 = self.p_EXPRESSION()
356 return ('!=', e1, e2)
358 self.syntax_error("Comparation operator expected")