5 Simple Moe configuration file syntax parser.
7 TODO: decide neccessity of '()' in/around formulas
8 TODO: check escaping in expressions
9 TODO: should whitespace (incl. '\\n') be allowed (almost) everywhere?
10 can comment be anywhere whitespace can?
12 Generally, whitespace and comments are alowed everywhere except in variable names and inside expressions.
13 Also, COMMENT must not contain '\\n'.
15 FILE, BLOCK, STATEMENT, OPERATION, SUBTREE, CONDITION, FORMULA, AND, OR and NOT eat any preceding whitespace. TODO: check?
17 The configuration syntax is the following:
20 BLOCK = WS | STATEMENT ( SEP STATEMENT )*
23 WS = ( ' ' | '\\t' | '\\n' | COMMENT )*
25 COMMENT = re('#[^\\n]*\\n')
27 STATEMENT = CONDITION | OPERATION | SUBTREE
29 OPERATION = WS VARNAME WS ( '=' | '+=' ) WS EXPRESSION
30 SUBTREE = WS VARNAME WS '{' BLOCK '}'
31 CONDITION = WS 'if' FORMULA WS '{' BLOCK WS '}'
33 FORMULA = WS (( EXPRESSION WS ( '!=' | '==' ) WS EXPRESSION ) | '(' AND WS ')' | '(' OR WS ')' | NOT )
34 AND = FORMULA WS 'and' FORMULA
35 OR = FORMULA WS 'or' FORMULA
36 NOT = WS 'not' FORMULA
38 NOTE: Formula may contain additional/extra parentheses
40 EXPRESSION = '"' ( ECHAR | '{' VARNAME '}' )* '"' | re"'[^'\\n]*'"
41 ECHAR = re('([^\\{}]|\\\\|\\{|\\}|\\n)*')
42 VARNAME = re('[a-zA-Z0-9-_]+(\.[a-zA-Z0-9-_]+)*')
45 import re, itertools, logging as log
47 class ConfigSyntaxError(Exception):
48 # TODO: choose a better superclass
49 def __init__(self, msg, fname='<unknown>', line=None, column=None):
55 return('ConfigSyntaxError %s:%d:%d: %s'%(self.fname, self.line, self.column, self.msg))
57 "Variable name regexp, dots (separators) must be separated from edges and each other."
58 re_VARNAME = re.compile(r'\A([A-Za-z0-9_-]+\.)*[A-Za-z0-9_-]+\Z')
60 class ConfigParser(object):
76 def __init__(self, f, tree, fname='<unknown>'):
78 self.fname = fname # Filename
81 self.tree = tree # ConfTree to fill
82 self.prefix = '' # Prefix of variable name, may begin with '.'
83 self.conds = [] # Stack of nested conditions, these are chained, so only the last is necessary
84 def peek(self, l = 1):
85 "Peek and return next `l` unicode characters."
89 "Peek and compare next `len(s)` characters to `s`. Unicode."
91 return self.peek(len(s)) == s
93 def next(self, l = 1):
94 "Eat and return next `l` unicode characters."
98 "Compare next `len(s)` characters to `s`, eat them and return True if they match. Unicode."
100 return self.next(len(s)) == s
102 "Check for end-of-stream."
105 def expected(self, s, msg=None):
106 "Eat and compare next `len(s)` characters to `s`. If not equal, raise an error with `msg`. Unicode."
108 if not self.nexts(s):
109 raise self.syntaxError(msg or u"%r expected."%(s,))
110 def syntaxError(self, msg, *args):
111 "Raise a syntax error with file/line/column info"
112 raise ConfSyntaxError(fname=self.fname, line=self.line, column=self.column, msg=(msg%args))
117 while not self.eof() and not f.peek(self.c_close):
120 if not self.peek() in self.c_sep:
125 while not self.eof():
126 if self.peek() in self.c_ws:
128 elif self.peeks(self.c_comment):
133 self.expect(self.c_comment, "'#' expected at the beginning of a comment.")
134 while not self.eof() and not self.nexts(self.c_nl):
136 self.eof() or self.expect(self.c_nl)
137 def p_STATEMENT(self):
139 if self.peeks(self.c_if):
142 # for operation or subtree, read VARNAME
143 varname = self.p_VARNAME()
145 if self.nexts(self.c_open):
146 self.p_BLOCK(varname)
148 self.expect(self.c_close)
150 self.p_OPERATION(varname)
151 def p_SUBTREE(self, varname=None):
154 varname = self.p_VARNAME()
156 self.expect(self.c_open)
157 # backup and extend the variable name prefix
159 self.prefix = p + self.c_varname_sep + varname
164 self.expect(self.c_close)
165 def p_OPERATION(self, varname=None):
168 varname = self.p_VARNAME()
170 if self.nexts(self.c_set):
172 elif self.nexts(self.c_append):
175 self.syntaxError('Unknown operation.')
177 exp = self.p_EXPRESSION()
178 v = self.tree.lookup((self.prefix+self.c_varname_sep+varname).lstrip(self.c_varname_sep))
180 cnd = self.conditions[-1]
183 v.add_operation(op, cnd, exp, self.priority)
184 def p_CONDITION(self):
186 self.expect(self.c_if)
189 cnd = ConfigCondition(f)
190 self.conditions.append(cnd)
193 self.expect(self.c_open)
196 self.expect(self.c_close)
198 self.conditions.pop()
201 while self.peek().isalnum() or self.peek() in u'-_':
202 vnl.append(self.next())
204 if not re_VARNAME.match(vn):
205 self.syntax_error('Invalid variable name')
207 def p_EXPRESSION(self):
210 self.syntax_error('Invalid start of expression')
211 # Parse literal expression
214 while not self.peeks(op):
215 exl.append(self.next())
218 return ConfigExpression((s,), s)
219 # Parse expression with variables
222 while not self.peeks(op):
223 exl.append(self.peek())
224 if self.nexts(u'\\'):
227 if c not in u'\\"n' + self.c_open + self.c_close:
228 self.syntax_error('Illeal escape sequence in expression')
234 elif self.nexts(self.c_open):
235 # Parse a variable name in '{}'
236 varname = self.p_VARNAME()
237 self.expect(self.c_close)
239 expr.append(self.tree.lookup(varname))
242 expr.append(self.next())
245 # Concatenate consecutive characters in expr
248 if expr2 and isinstance(expr2[-1], unicode):
249 expr2[-1] = expr2[-1] + i
252 return ConfigExpression(tuple(expr2), exs)
255 # Combined logical formula
257 f1 = self.p_FORMULA()
259 if self.nexts(self.c_and):
260 f2 = self.p_FORMULA()
263 return ('AND', f1, f2)
264 elif self.nexts(self.c_or):
265 f2 = self.p_FORMULA()
268 return ('OR', f1, f2)
269 elif self.nexts(u')'):
270 # Only extra parenthes
273 self.syntax_error("Logic operator or ')' expected")
274 elif self.nexts(self.c_not):
279 # Should be (in)equality condition
280 e1 = self.p_EXPRESSION()
282 if self.nexts(self.c_eq):
284 e2 = self.p_EXPRESSION()
285 return ('==', e1, e2)
286 elif self.nexts(self.c_neq):
288 e2 = self.p_EXPRESSION()
289 return ('!=', e1, e2)
291 self.syntax_error("Comparation operator expected")