5 Simple Moe configuration file syntax parser.
7 TODO: decide neccessity of '()' in/around formulas
8 TODO: check escaping in expressions
9 TODO: should whitespace (incl. '\\n') be allowed (almost) everywhere?
10 can comment be anywhere whitespace can?
12 Generally, whitespace and comments are alowed everywhere except in variable names and inside expressions.
13 Also, COMMENT must not contain '\\n'.
15 FILE, BLOCK, STATEMENT, OPERATION, SUBTREE, CONDITION, FORMULA, AND, OR and NOT eat any preceding whitespace. TODO: check?
17 The configuration syntax is the following:
20 BLOCK = WS | STATEMENT ( SEP STATEMENT )*
23 WS = ( ' ' | '\\t' | '\\n' | COMMENT )*
25 COMMENT = re('#[^\\n]*\\n')
27 STATEMENT = CONDITION | OPERATION | SUBTREE
29 OPERATION = WS VARNAME WS ( '=' | '+=' ) WS EXPRESSION
30 SUBTREE = WS VARNAME WS '{' BLOCK '}'
31 CONDITION = WS 'if' FORMULA WS '{' BLOCK WS '}'
33 FORMULA = WS (( EXPRESSION WS ( '!=' | '==' ) WS EXPRESSION ) | '(' AND WS ')' | '(' OR WS ')' | NOT )
34 AND = FORMULA WS 'and' FORMULA
35 OR = FORMULA WS 'or' FORMULA
36 NOT = WS 'not' FORMULA
38 NOTE: Formula may contain additional/extra parentheses
40 EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'"
41 ECHAR = re('([^\\{}]|\\\\|\\{|\\}|\\n)*')
42 VARNAME = re('[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)*')
45 import re, types, itertools, logging as log
49 class ConfigSyntaxError(Exception):
50 # TODO: choose a better superclass
51 def __init__(self, msg, fname='<unknown>', line=None, column=None):
57 return('ConfigSyntaxError %s:%d:%d: %s'%(self.fname, self.line, self.column, self.msg))
59 "Variable name regexp, dots (separators) must be separated from edges and each other."
60 re_VARNAME = re.compile(r'\A([A-Za-z0-9_-]+\.)*[A-Za-z0-9_-]+\Z')
62 class ConfigParser(object):
78 def __init__(self, s, tree, fname='<unknown>', level=0):
79 """Create a config file parser.
80 `s` is either a string, unicode or an open file. File is assumed to be utf-8, string is converted to unicode.
81 `tree` is a ConfigTree to fill the operations into.
82 `fname` is an optional name of the file, for debugging and syntax errors.
83 `level` indicates the precedence the operations should have in the ConfigTree
85 self.s = s # Unicode, string or an open file
86 self.buf = u"" # Read-buffer for s file, whole unicode string for s string/unicode
87 if isinstance(self.s, types.StringTypes):
88 self.buf = unicode(self.s)
89 elif (not isinstance(self.s, file)) or self.s.closed:
90 raise TypeError("Expected unicode, str or open file.")
92 self.fname = fname # Filename
95 self.tree = tree # ConfTree to fill
96 self.level = level # level of the parsed operations
97 self.prefix = '' # Prefix of variable name, may begin with '.'
98 self.conditions = [] # Stack of nested conditions, these are chained, so only the last is necessary
99 self.read_ops = [] # List of parsed operations (varname, `Operation`), returned by `self.parse()`
100 def preread(self, l):
101 "Make sure buf contains at least `l` next characters, return True on succes and False on hitting EOF."
102 if isinstance(self.s, file):
103 self.buf = self.buf[self.bufpos:] + self.s.read(max(l, 1024)).decode('utf8')
105 return len(self.buf) >= self.bufpos + l
106 def peek(self, l = 1):
107 "Peek and return next `l` unicode characters or everything until EOF."
109 return self.buf[self.bufpos:self.bufpos+l]
111 "Peek and compare next `len(s)` characters to `s`. Converts `s` to unicode. False on hitting EOF."
113 return self.peek(len(s)) == s
114 def next(self, l = 1):
115 "Eat and return next `l` unicode characters. Raise exception on EOF."
116 if not self.preread(l):
117 raise ConfigSyntaxError("Unexpected end of file")
118 s = self.buf[self.bufpos:self.bufpos+l]
126 self.line += s.count('\n')
127 self.column = l - rnl - 1
130 """Compare next `len(s)` characters to `s`. On match, eat them and return True. Otherwise just return False.
131 Converts `s` to unicode. False on hitting EOF."""
138 "Check for end-of-stream."
139 return not self.preread(1)
140 def expect(self, s, msg=None):
141 "Eat and compare next `len(s)` characters to `s`. If not equal, raise an error with `msg`. Unicode."
143 if not self.nexts(s):
144 self.syntax_error(msg or u"%r expected."%(s,))
145 def syntax_error(self, msg, *args):
146 "Raise a syntax error with file/line/column info"
147 raise ConfigSyntaxError(fname=self.fname, line=self.line, column=self.column, msg=(msg%args))
150 for i in traceback.extract_stack():
154 if n: log.debug(s + n + ' ' + repr(self.peek(15)) + '...')
162 while (not self.eof()) and (not self.peeks(self.c_close)):
166 if self.eof() or self.peeks(self.c_close):
168 if self.line == l0: # No newline skipped in p_WS
171 self.nexts(';') # NOTE: this is weird - can ';' occur anywhere? Or at most once, but only after any p_WS debris?
175 while not self.eof():
176 if self.peek() in self.c_ws:
178 elif self.peeks(self.c_comment):
184 self.expect(self.c_comment, "'#' expected at the beginning of a comment.")
185 while (not self.eof()) and (not self.nexts(self.c_nl)):
187 def p_STATEMENT(self):
190 if self.peeks(self.c_if):
193 # for operation or subtree, read VARNAME
194 varname = self.p_VARNAME()
196 if self.peeks(self.c_open):
197 self.p_SUBTREE(varname)
199 self.p_OPERATION(varname)
200 def p_SUBTREE(self, varname=None):
204 varname = self.p_VARNAME()
206 self.expect(self.c_open)
207 # backup and extend the variable name prefix
209 self.prefix = p + self.c_varname_sep + varname
214 self.expect(self.c_close)
215 def p_OPERATION(self, varname=None):
219 varname = self.p_VARNAME()
221 if self.nexts(self.c_set):
223 elif self.nexts(self.c_append):
226 self.syntax_error('Unknown operation.')
228 exp = self.p_EXPRESSION()
229 vname = (self.prefix+self.c_varname_sep+varname).lstrip(self.c_varname_sep)
230 v = self.tree.lookup(vname)
232 cnd = self.conditions[-1]
235 op = conf.Operation(op, cnd, exp, level=self.level,
236 source="%s:%d:%d"%(self.fname, self.line, self.column))
237 # NOTE/WARNING: The last character of operation will be reported in case of error.
239 self.read_ops.append( (vname, op) )
240 def p_CONDITION(self):
243 self.expect(self.c_if)
246 cnd = conf.ConfigCondition(f)
247 self.conditions.append(cnd)
250 self.expect(self.c_open)
253 self.expect(self.c_close)
255 self.conditions.pop()
259 while self.peek().isalnum() or self.peek() in u'-_.':
260 vnl.append(self.next())
262 if not re_VARNAME.match(vn):
263 self.syntax_error('Invalid variable name %r', vn)
265 def p_EXPRESSION(self):
269 self.syntax_error('Invalid start of expression')
270 # Parse literal expression
273 while not self.peeks(op):
274 exl.append(self.next())
277 return conf.ConfigExpression((s,), s)
278 # Parse expression with variables
281 while not self.peeks(op):
282 exl.append(self.peek())
283 if self.nexts(u'\\'):
286 if c not in u'\\"n' + self.c_open + self.c_close:
287 self.syntax_error('Illeal escape sequence in expression')
293 elif self.nexts(self.c_open):
294 # Parse a variable name in '{}'
295 varname = self.p_VARNAME()
296 self.expect(self.c_close)
298 expr.append(self.tree.lookup(varname))
301 expr.append(self.next())
304 # Concatenate consecutive characters in expr
307 if expr2 and isinstance(expr2[-1], unicode) and isinstance(i, unicode):
308 expr2[-1] = expr2[-1] + i
311 return conf.ConfigExpression(tuple(expr2), exs)
315 # Combined logical formula
317 f1 = self.p_FORMULA()
319 if self.nexts(self.c_and):
320 if self.peek(1).isalnum():
321 self.syntax_error('trailing characters after %r', self.c_and)
322 f2 = self.p_FORMULA()
325 return ('AND', f1, f2)
326 elif self.nexts(self.c_or):
327 if self.peek(1).isalnum():
328 self.syntax_error('trailing characters after %r', self.c_or)
329 f2 = self.p_FORMULA()
332 return ('OR', f1, f2)
333 elif self.nexts(u')'):
334 # Only extra parenthes
337 self.syntax_error("Logic operator or ')' expected")
338 elif self.nexts(self.c_not):
339 if self.peek().isalnum():
340 self.syntax_error('trailing characters after %r', self.c_not)
345 # Should be (in)equality condition
346 e1 = self.p_EXPRESSION()
348 if self.nexts(self.c_eq):
350 e2 = self.p_EXPRESSION()
351 return ('==', e1, e2)
352 elif self.nexts(self.c_neq):
354 e2 = self.p_EXPRESSION()
355 return ('!=', e1, e2)
357 self.syntax_error("Comparation operator expected")