5 Simple Moe configuration file syntax parser.
7 TODO: decide neccessity of '()' in/around formulas
8 TODO: check escaping in expressions
9 TODO: should whitespace (incl. '\\n') be allowed (almost) everywhere?
10 can comment be anywhere whitespace can?
12 Generally, whitespace and comments are alowed everywhere except in variable names and inside expressions.
13 Also, COMMENT must not contain '\\n'.
15 FILE, BLOCK, STATEMENT, OPERATION, SUBTREE, CONDITION, FORMULA, AND, OR and NOT eat any preceding whitespace. TODO: check?
17 The configuration syntax is the following:
20 BLOCK = WS | STATEMENT ( SEP STATEMENT )*
23 WS = ( ' ' | '\\t' | '\\n' | COMMENT )*
25 COMMENT = re('#[^\\n]*\\n')
27 STATEMENT = CONDITION | OPERATION | SUBTREE
29 OPERATION = WS VARNAME WS ( '=' | '+=' ) WS EXPRESSION
30 SUBTREE = WS VARNAME WS '{' BLOCK WS '}'
31 CONDITION = WS 'if' FORMULA WS '{' BLOCK WS '}'
33 FORMULA = WS (( EXPRESSION WS ( '!=' | '==' ) WS EXPRESSION ) | '(' AND WS ')' | '(' OR WS ')' | NOT )
34 AND = FORMULA WS 'and' FORMULA
35 OR = FORMULA WS 'or' FORMULA
36 NOT = WS 'not' FORMULA
38 NOTE: ';' or '\n' is currently required even after CONDITION and SUBTREE block
39 TODO: change to OPERATION only
40 NOTE: Formula may contain additional/extra parentheses
42 EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'"
43 ECHAR = re('([^\\{}]|\\\\|\\{|\\}|\\n)*')
44 VARNAME = re('[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)*')
47 import re, types, itertools, logging as log
51 class ConfigSyntaxError(conf.ConfigError):
52 def __init__(self, msg, fname='<unknown>', line=None, column=None):
58 return('ConfigSyntaxError %s:%d:%d: %s'%(self.fname, self.line, self.column, self.msg))
60 class ConfigParser(object):
76 def __init__(self, s, tree, fname='<unknown>', level=0):
77 """Create a config file parser.
78 `s` is either a string, unicode or an open file. File is assumed to be utf-8, string is converted to unicode.
79 `tree` is a ConfigTree to fill the operations into.
80 `fname` is an optional name of the file, for debugging and syntax errors.
81 `level` indicates the precedence the operations should have in the ConfigTree
83 self.s = s # Unicode, string or an open file
84 self.buf = u"" # Read-buffer for s file, whole unicode string for s string/unicode
85 if isinstance(self.s, types.StringTypes):
86 self.buf = unicode(self.s)
87 elif (not isinstance(self.s, file)) or self.s.closed:
88 raise TypeError("Expected unicode, str or open file.")
90 self.fname = fname # Filename
93 self.tree = tree # ConfTree to fill
94 self.level = level # level of the parsed operations
95 self.prefix = '' # Prefix of variable name, may begin with '.'
96 self.conditions = [] # Stack of nested conditions, these are chained, so only the last is necessary
97 self.read_ops = [] # List of parsed operations (varname, `Operation`), returned by `self.parse()`
99 "Make sure buf contains at least `l` next characters, return True on succes and False on hitting EOF."
100 if isinstance(self.s, file):
101 self.buf = self.buf[self.bufpos:] + self.s.read(max(l, 1024)).decode('utf8')
103 return len(self.buf) >= self.bufpos + l
104 def peek(self, l = 1):
105 "Peek and return next `l` unicode characters or everything until EOF."
107 return self.buf[self.bufpos:self.bufpos+l]
109 "Peek and compare next `len(s)` characters to `s`. Converts `s` to unicode. False on hitting EOF."
111 return self.peek(len(s)) == s
112 def next(self, l = 1):
113 "Eat and return next `l` unicode characters. Raise exception on EOF."
114 if not self.preread(l):
115 self.syntax_error("Unexpected end of file")
116 s = self.buf[self.bufpos:self.bufpos+l]
124 self.line += s.count('\n')
125 self.column = l - rnl - 1
128 """Compare next `len(s)` characters to `s`. On match, eat them and return True. Otherwise just return False.
129 Converts `s` to unicode. False on hitting EOF."""
136 "Check for end-of-stream."
137 return not self.preread(1)
138 def expect(self, s, msg=None):
139 "Eat and compare next `len(s)` characters to `s`. If not equal, raise an error with `msg`. Unicode."
141 if not self.nexts(s):
142 self.syntax_error(msg or u"%r expected."%(s,))
143 def syntax_error(self, msg, *args):
144 "Raise a syntax error with file/line/column info"
145 raise ConfigSyntaxError(fname=self.fname, line=self.line, column=self.column, msg=(msg%args))
148 for i in traceback.extract_stack():
152 if n: log.debug(s + n + ' ' + repr(self.peek(15)) + '...')
160 while (not self.eof()) and (not self.peeks(self.c_close)):
164 if self.eof() or self.peeks(self.c_close):
166 if self.line == l0: # No newline skipped in p_WS
169 self.nexts(';') # NOTE: this is weird - can ';' occur anywhere? Or at most once, but only after any p_WS debris?
173 while not self.eof():
174 if self.peek() in self.c_ws:
176 elif self.peeks(self.c_comment):
182 self.expect(self.c_comment, "'#' expected at the beginning of a comment.")
183 while (not self.eof()) and (not self.nexts(self.c_nl)):
185 def p_STATEMENT(self):
188 if self.peeks(self.c_if):
191 # for operation or subtree, read VARNAME
192 varname = self.p_VARNAME()
194 if self.peeks(self.c_open):
195 self.p_SUBTREE(varname)
197 self.p_OPERATION(varname)
198 def p_SUBTREE(self, varname=None):
202 varname = self.p_VARNAME()
204 self.expect(self.c_open)
205 # backup and extend the variable name prefix
207 self.prefix = p + self.c_varname_sep + varname
212 self.expect(self.c_close)
213 def p_OPERATION(self, varname=None):
217 varname = self.p_VARNAME()
219 if self.nexts(self.c_set):
221 elif self.nexts(self.c_append):
224 self.syntax_error('Unknown operation.')
226 exp = self.p_EXPRESSION()
227 vname = (self.prefix+self.c_varname_sep+varname).lstrip(self.c_varname_sep)
228 v = self.tree.lookup(vname)
230 cnd = self.conditions[-1]
233 op = conf.Operation(op, cnd, exp, level=self.level,
234 source="%s:%d:%d"%(self.fname, self.line, self.column))
235 # NOTE/WARNING: The last character of operation will be reported in case of error.
237 self.read_ops.append( (vname, op) )
238 def p_CONDITION(self):
241 t = u"condition at %s:%d:%d"%(self.fname, self.line, self.column)
242 self.expect(self.c_if)
245 cnd = conf.ConfigCondition(f, text=t, parent=(self.conditions and self.conditions[-1]) or None)
246 self.conditions.append(cnd)
249 self.expect(self.c_open)
252 self.expect(self.c_close)
254 self.conditions.pop()
258 while self.peek().isalnum() or self.peek() in u'-_.':
259 vnl.append(self.next())
261 if not conf.re_VARNAME.match(vn):
262 self.syntax_error('Invalid variable name %r', vn)
264 def p_EXPRESSION(self):
268 self.syntax_error('Invalid start of expression')
269 # Parse literal expression
272 while not self.peeks(op):
273 exl.append(self.next())
276 return conf.ConfigExpression((s,), s)
277 # Parse expression with variables
280 while not self.peeks(op):
281 exl.append(self.peek())
282 if self.nexts(u'\\'):
285 if c not in u'\\"n' + self.c_open + self.c_close:
286 self.syntax_error('Illeal escape sequence in expression')
292 elif self.nexts(self.c_open):
293 # Parse a variable name in '{}'
294 varname = self.p_VARNAME()
295 self.expect(self.c_close)
297 expr.append(self.tree.lookup(varname))
300 expr.append(self.next())
303 # Concatenate consecutive characters in expr
306 if expr2 and isinstance(expr2[-1], unicode) and isinstance(i, unicode):
307 expr2[-1] = expr2[-1] + i
310 return conf.ConfigExpression(tuple(expr2), exs)
314 # Combined logical formula
316 f1 = self.p_FORMULA()
318 if self.nexts(self.c_and):
319 if self.peek(1).isalnum():
320 self.syntax_error('trailing characters after %r', self.c_and)
321 f2 = self.p_FORMULA()
324 return ('AND', f1, f2)
325 elif self.nexts(self.c_or):
326 if self.peek(1).isalnum():
327 self.syntax_error('trailing characters after %r', self.c_or)
328 f2 = self.p_FORMULA()
331 return ('OR', f1, f2)
332 elif self.nexts(u')'):
333 # Only extra parenthes
336 self.syntax_error("Logic operator or ')' expected")
337 elif self.nexts(self.c_not):
338 if self.peek().isalnum():
339 self.syntax_error('trailing characters after %r', self.c_not)
344 # Should be (in)equality condition
345 e1 = self.p_EXPRESSION()
347 if self.nexts(self.c_eq):
349 e2 = self.p_EXPRESSION()
350 return ('==', e1, e2)
351 elif self.nexts(self.c_neq):
353 e2 = self.p_EXPRESSION()
354 return ('!=', e1, e2)
356 self.syntax_error("Comparation operator expected")