5 Simple Moe configuration file syntax parser.
7 TODO: decide neccessity of '()' in/around formulas
8 TODO: check escaping in expressions
9 TODO: should whitespace (incl. '\\n') be allowed (almost) everywhere?
10 can comment be anywhere whitespace can?
12 Generally, whitespace and comments are alowed everywhere except in variable names and inside expressions.
13 Also, COMMENT must not contain '\\n'.
15 FILE, BLOCK, STATEMENT, OPERATION, SUBTREE, CONDITION, FORMULA, AND, OR and NOT eat any preceding whitespace. TODO: check?
17 The configuration syntax is the following:
20 BLOCK = WS | STATEMENT ( SEP STATEMENT )*
23 WS = ( ' ' | '\\t' | '\\n' | COMMENT )*
25 COMMENT = re('#[^\\n]*\\n')
27 STATEMENT = CONDITION | OPERATION | SUBTREE
29 OPERATION = WS VARNAME WS ( '=' | '+=' ) WS EXPRESSION
30 SUBTREE = WS VARNAME WS '{' BLOCK '}'
31 CONDITION = WS 'if' FORMULA WS '{' BLOCK WS '}'
33 FORMULA = WS (( EXPRESSION WS ( '!=' | '==' ) WS EXPRESSION ) | '(' AND WS ')' | '(' OR WS ')' | NOT )
34 AND = FORMULA WS 'and' FORMULA
35 OR = FORMULA WS 'or' FORMULA
36 NOT = WS 'not' FORMULA
38 NOTE: Formula may contain additional/extra parentheses
40 EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'"
41 ECHAR = re('([^\\{}]|\\\\|\\{|\\}|\\n)*')
42 VARNAME = re('[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)*')
45 import re, itertools, logging as log
47 class ConfigSyntaxError(Exception):
48 # TODO: choose a better superclass
49 def __init__(self, msg, fname='<unknown>', line=None, column=None):
55 return('ConfigSyntaxError %s:%d:%d: %s'%(self.fname, self.line, self.column, self.msg))
57 "Variable name regexp, dots (separators) must be separated from edges and each other."
58 re_VARNAME = re.compile(r'\A([A-Za-z0-9_-]+\.)*[A-Za-z0-9_-]+\Z')
60 class ConfigParser(object):
76 def __init__(self, s, tree, fname='<unknown>', level=0):
77 """Create a config file parser.
78 `s` is either a string, unicode or an open file. File is assumed to be utf-8, string is converted to unicode.
79 `tree` is a ConfigTree to fill the operations into.
80 `fname` is an optional name of the file, for debugging and syntax errors.
81 `level` indicates the precedence the operations should have in the ConfigTree
83 self.s = s # Unicode, string or an open file
84 self.buf = u"" # Read-buffer for s file, whole unicode string for s string/unicode
85 if isinstance(self.s, types.StringTypes):
86 self.buf = unicode(self.s)
87 elif (not isinstance(self.s, file)) or self.s.closed:
88 raise TypeError("Expected unicode, str or open file.")
90 self.fname = fname # Filename
93 self.tree = tree # ConfTree to fill
94 self.level = level # level of the parsed operations
95 self.prefix = '' # Prefix of variable name, may begin with '.'
96 self.conds = [] # Stack of nested conditions, these are chained, so only the last is necessary
98 "Make sure buf contains at least `l` next characters, return True on succes and False on hitting EOF."
99 if isinstance(self.s, file):
100 self.buf = self.buf[self.bufpos:] + self.s.read(max(l, 1024)).decode('utf8')
102 return len(self.buf) >= self.bufpos + l
103 def peek(self, l = 1):
104 "Peek and return next `l` unicode characters or everything until EOF."
108 "Peek and compare next `len(s)` characters to `s`. Converts `s` to unicode. False on hitting EOF."
110 return self.peek(len(s)) == s
112 def next(self, l = 1):
113 "Eat and return next `l` unicode characters. Raise exception on EOF."
114 if not self.preread(l):
115 raise ConfigSyntaxError("Unexpected end of file")
116 s = self.buf[self.bufpos:self.bufpos+l]
124 self.line += s.count('\n')
125 self.column = l - rnl - 1
128 """Compare next `len(s)` characters to `s`. On match, eat them and return True. Otherwise just return False.
129 Converts `s` to unicode. False on hitting EOF."""
136 "Check for end-of-stream."
137 return self.preread(1)
138 def expected(self, s, msg=None):
139 "Eat and compare next `len(s)` characters to `s`. If not equal, raise an error with `msg`. Unicode."
141 if not self.nexts(s):
142 raise self.syntaxError(msg or u"%r expected."%(s,))
143 def syntaxError(self, msg, *args):
144 "Raise a syntax error with file/line/column info"
145 raise ConfSyntaxError(fname=self.fname, line=self.line, column=self.column, msg=(msg%args))
150 while not self.eof() and not f.peek(self.c_close):
153 if not self.peek() in self.c_sep:
158 while not self.eof():
159 if self.peek() in self.c_ws:
161 elif self.peeks(self.c_comment):
166 self.expect(self.c_comment, "'#' expected at the beginning of a comment.")
167 while not self.eof() and not self.nexts(self.c_nl):
169 def p_STATEMENT(self):
171 if self.peeks(self.c_if):
174 # for operation or subtree, read VARNAME
175 varname = self.p_VARNAME()
177 if self.nexts(self.c_open):
178 self.p_BLOCK(varname)
180 self.expect(self.c_close)
182 self.p_OPERATION(varname)
183 def p_SUBTREE(self, varname=None):
186 varname = self.p_VARNAME()
188 self.expect(self.c_open)
189 # backup and extend the variable name prefix
191 self.prefix = p + self.c_varname_sep + varname
196 self.expect(self.c_close)
197 def p_OPERATION(self, varname=None):
200 varname = self.p_VARNAME()
202 if self.nexts(self.c_set):
204 elif self.nexts(self.c_append):
207 self.syntaxError('Unknown operation.')
209 exp = self.p_EXPRESSION()
210 v = self.tree.lookup((self.prefix+self.c_varname_sep+varname).lstrip(self.c_varname_sep))
212 cnd = self.conditions[-1]
215 v.add_operation(conf.Operation(op, cnd, exp, level=self.level,
216 source="%s:%d:%d"%(self.fname, self.line, self.column)))
217 # NOTE/WARNING: The last character of operation is reported.
218 def p_CONDITION(self):
220 self.expect(self.c_if)
223 cnd = ConfigCondition(f)
224 self.conditions.append(cnd)
227 self.expect(self.c_open)
230 self.expect(self.c_close)
232 self.conditions.pop()
235 while self.peek().isalnum() or self.peek() in u'-_':
236 vnl.append(self.next())
238 if not re_VARNAME.match(vn):
239 self.syntax_error('Invalid variable name')
241 def p_EXPRESSION(self):
244 self.syntax_error('Invalid start of expression')
245 # Parse literal expression
248 while not self.peeks(op):
249 exl.append(self.next())
252 return ConfigExpression((s,), s)
253 # Parse expression with variables
256 while not self.peeks(op):
257 exl.append(self.peek())
258 if self.nexts(u'\\'):
261 if c not in u'\\"n' + self.c_open + self.c_close:
262 self.syntax_error('Illeal escape sequence in expression')
268 elif self.nexts(self.c_open):
269 # Parse a variable name in '{}'
270 varname = self.p_VARNAME()
271 self.expect(self.c_close)
273 expr.append(self.tree.lookup(varname))
276 expr.append(self.next())
279 # Concatenate consecutive characters in expr
282 if expr2 and isinstance(expr2[-1], unicode):
283 expr2[-1] = expr2[-1] + i
286 return ConfigExpression(tuple(expr2), exs)
289 # Combined logical formula
291 f1 = self.p_FORMULA()
293 if self.nexts(self.c_and):
294 f2 = self.p_FORMULA()
297 return ('AND', f1, f2)
298 elif self.nexts(self.c_or):
299 f2 = self.p_FORMULA()
302 return ('OR', f1, f2)
303 elif self.nexts(u')'):
304 # Only extra parenthes
307 self.syntax_error("Logic operator or ')' expected")
308 elif self.nexts(self.c_not):
313 # Should be (in)equality condition
314 e1 = self.p_EXPRESSION()
316 if self.nexts(self.c_eq):
318 e2 = self.p_EXPRESSION()
319 return ('==', e1, e2)
320 elif self.nexts(self.c_neq):
322 e2 = self.p_EXPRESSION()
323 return ('!=', e1, e2)
325 self.syntax_error("Comparation operator expected")