5 Simple Moe configuration file syntax parser.
7 TODO: decide neccessity of '()' in/around formulas
8 TODO: check escaping in expressions
9 TODO: should whitespace (incl. '\\n') be allowed (almost) everywhere?
10 can comment be anywhere whitespace can?
12 Generally, whitespace and comments are alowed everywhere except in variable names and inside expressions.
13 Also, COMMENT must not contain '\\n'.
15 FILE, BLOCK, STATEMENT, OPERATION, SUBTREE, CONDITION, FORMULA, AND, OR and NOT eat any preceding whitespace. TODO: check?
17 The configuration syntax is the following:
20 BLOCK = WS | STATEMENT ( SEP STATEMENT )*
23 WS = ( ' ' | '\\t' | '\\n' | COMMENT )*
25 COMMENT = re('#[^\\n]*\\n')
27 STATEMENT = CONDITION | OPERATION | SUBTREE
29 OPERATION = WS VARNAME WS ( '=' | '+=' ) WS EXPRESSION
30 SUBTREE = WS VARNAME WS '{' BLOCK '}'
31 CONDITION = WS 'if' FORMULA WS '{' BLOCK WS '}'
33 FORMULA = WS (( EXPRESSION WS ( '!=' | '==' ) WS EXPRESSION ) | '(' AND WS ')' | '(' OR WS ')' | NOT )
34 AND = FORMULA WS 'and' FORMULA
35 OR = FORMULA WS 'or' FORMULA
36 NOT = WS 'not' FORMULA
38 NOTE: Formula may contain additional/extra parentheses
40 EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'"
41 ECHAR = re('([^\\{}]|\\\\|\\{|\\}|\\n)*')
42 VARNAME = re('[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)*')
45 import re, types, itertools, logging as log
49 class ConfigSyntaxError(conf.ConfigError):
50 def __init__(self, msg, fname='<unknown>', line=None, column=None):
56 return('ConfigSyntaxError %s:%d:%d: %s'%(self.fname, self.line, self.column, self.msg))
58 class ConfigParser(object):
74 def __init__(self, s, tree, fname='<unknown>', level=0):
75 """Create a config file parser.
76 `s` is either a string, unicode or an open file. File is assumed to be utf-8, string is converted to unicode.
77 `tree` is a ConfigTree to fill the operations into.
78 `fname` is an optional name of the file, for debugging and syntax errors.
79 `level` indicates the precedence the operations should have in the ConfigTree
81 self.s = s # Unicode, string or an open file
82 self.buf = u"" # Read-buffer for s file, whole unicode string for s string/unicode
83 if isinstance(self.s, types.StringTypes):
84 self.buf = unicode(self.s)
85 elif (not isinstance(self.s, file)) or self.s.closed:
86 raise TypeError("Expected unicode, str or open file.")
88 self.fname = fname # Filename
91 self.tree = tree # ConfTree to fill
92 self.level = level # level of the parsed operations
93 self.prefix = '' # Prefix of variable name, may begin with '.'
94 self.conditions = [] # Stack of nested conditions, these are chained, so only the last is necessary
95 self.read_ops = [] # List of parsed operations (varname, `Operation`), returned by `self.parse()`
97 "Make sure buf contains at least `l` next characters, return True on succes and False on hitting EOF."
98 if isinstance(self.s, file):
99 self.buf = self.buf[self.bufpos:] + self.s.read(max(l, 1024)).decode('utf8')
101 return len(self.buf) >= self.bufpos + l
102 def peek(self, l = 1):
103 "Peek and return next `l` unicode characters or everything until EOF."
105 return self.buf[self.bufpos:self.bufpos+l]
107 "Peek and compare next `len(s)` characters to `s`. Converts `s` to unicode. False on hitting EOF."
109 return self.peek(len(s)) == s
110 def next(self, l = 1):
111 "Eat and return next `l` unicode characters. Raise exception on EOF."
112 if not self.preread(l):
113 self.syntax_error("Unexpected end of file")
114 s = self.buf[self.bufpos:self.bufpos+l]
122 self.line += s.count('\n')
123 self.column = l - rnl - 1
126 """Compare next `len(s)` characters to `s`. On match, eat them and return True. Otherwise just return False.
127 Converts `s` to unicode. False on hitting EOF."""
134 "Check for end-of-stream."
135 return not self.preread(1)
136 def expect(self, s, msg=None):
137 "Eat and compare next `len(s)` characters to `s`. If not equal, raise an error with `msg`. Unicode."
139 if not self.nexts(s):
140 self.syntax_error(msg or u"%r expected."%(s,))
141 def syntax_error(self, msg, *args):
142 "Raise a syntax error with file/line/column info"
143 raise ConfigSyntaxError(fname=self.fname, line=self.line, column=self.column, msg=(msg%args))
146 for i in traceback.extract_stack():
150 if n: log.debug(s + n + ' ' + repr(self.peek(15)) + '...')
158 while (not self.eof()) and (not self.peeks(self.c_close)):
162 if self.eof() or self.peeks(self.c_close):
164 if self.line == l0: # No newline skipped in p_WS
167 self.nexts(';') # NOTE: this is weird - can ';' occur anywhere? Or at most once, but only after any p_WS debris?
171 while not self.eof():
172 if self.peek() in self.c_ws:
174 elif self.peeks(self.c_comment):
180 self.expect(self.c_comment, "'#' expected at the beginning of a comment.")
181 while (not self.eof()) and (not self.nexts(self.c_nl)):
183 def p_STATEMENT(self):
186 if self.peeks(self.c_if):
189 # for operation or subtree, read VARNAME
190 varname = self.p_VARNAME()
192 if self.peeks(self.c_open):
193 self.p_SUBTREE(varname)
195 self.p_OPERATION(varname)
196 def p_SUBTREE(self, varname=None):
200 varname = self.p_VARNAME()
202 self.expect(self.c_open)
203 # backup and extend the variable name prefix
205 self.prefix = p + self.c_varname_sep + varname
210 self.expect(self.c_close)
211 def p_OPERATION(self, varname=None):
215 varname = self.p_VARNAME()
217 if self.nexts(self.c_set):
219 elif self.nexts(self.c_append):
222 self.syntax_error('Unknown operation.')
224 exp = self.p_EXPRESSION()
225 vname = (self.prefix+self.c_varname_sep+varname).lstrip(self.c_varname_sep)
226 v = self.tree.lookup(vname)
228 cnd = self.conditions[-1]
231 op = conf.Operation(op, cnd, exp, level=self.level,
232 source="%s:%d:%d"%(self.fname, self.line, self.column))
233 # NOTE/WARNING: The last character of operation will be reported in case of error.
235 self.read_ops.append( (vname, op) )
236 def p_CONDITION(self):
239 t = u"condition at %s:%d:%d"%(self.fname, self.line, self.column)
240 self.expect(self.c_if)
243 cnd = conf.ConfigCondition(f, text=t, parent=(self.conditions and self.conditions[-1]) or None)
244 self.conditions.append(cnd)
247 self.expect(self.c_open)
250 self.expect(self.c_close)
252 self.conditions.pop()
256 while self.peek().isalnum() or self.peek() in u'-_.':
257 vnl.append(self.next())
259 if not conf.re_VARNAME.match(vn):
260 self.syntax_error('Invalid variable name %r', vn)
262 def p_EXPRESSION(self):
266 self.syntax_error('Invalid start of expression')
267 # Parse literal expression
270 while not self.peeks(op):
271 exl.append(self.next())
274 return conf.ConfigExpression((s,), s)
275 # Parse expression with variables
278 while not self.peeks(op):
279 exl.append(self.peek())
280 if self.nexts(u'\\'):
283 if c not in u'\\"n' + self.c_open + self.c_close:
284 self.syntax_error('Illeal escape sequence in expression')
290 elif self.nexts(self.c_open):
291 # Parse a variable name in '{}'
292 varname = self.p_VARNAME()
293 self.expect(self.c_close)
295 expr.append(self.tree.lookup(varname))
298 expr.append(self.next())
301 # Concatenate consecutive characters in expr
304 if expr2 and isinstance(expr2[-1], unicode) and isinstance(i, unicode):
305 expr2[-1] = expr2[-1] + i
308 return conf.ConfigExpression(tuple(expr2), exs)
312 # Combined logical formula
314 f1 = self.p_FORMULA()
316 if self.nexts(self.c_and):
317 if self.peek(1).isalnum():
318 self.syntax_error('trailing characters after %r', self.c_and)
319 f2 = self.p_FORMULA()
322 return ('AND', f1, f2)
323 elif self.nexts(self.c_or):
324 if self.peek(1).isalnum():
325 self.syntax_error('trailing characters after %r', self.c_or)
326 f2 = self.p_FORMULA()
329 return ('OR', f1, f2)
330 elif self.nexts(u')'):
331 # Only extra parenthes
334 self.syntax_error("Logic operator or ')' expected")
335 elif self.nexts(self.c_not):
336 if self.peek().isalnum():
337 self.syntax_error('trailing characters after %r', self.c_not)
342 # Should be (in)equality condition
343 e1 = self.p_EXPRESSION()
345 if self.nexts(self.c_eq):
347 e2 = self.p_EXPRESSION()
348 return ('==', e1, e2)
349 elif self.nexts(self.c_neq):
351 e2 = self.p_EXPRESSION()
352 return ('!=', e1, e2)
354 self.syntax_error("Comparation operator expected")