"Converts between Lark and Nearley grammars. Work in progress!" import os.path import sys import js2py from lark import Lark, InlineTransformer, Transformer nearley_grammar = r""" start: (ruledef|directive)+ directive: "@" NAME (STRING|NAME) | "@" JS -> js_code ruledef: NAME "->" expansions | NAME REGEXP "->" expansions -> macro expansions: expansion ("|" expansion)* expansion: expr+ js ?expr: item [":" /[+*?]/] ?item: rule|string|regexp | "(" expansions ")" rule: NAME string: STRING regexp: REGEXP JS: /(?s){%.*?%}/ js: JS? NAME: /[a-zA-Z_$]\w*/ COMMENT: /\#[^\n]*/ REGEXP: /\[.*?\]/ STRING: /".*?"/ %import common.WS %ignore WS %ignore COMMENT """ nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard') def _get_rulename(name): name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name) return 'n_' + name.replace('$', '__DOLLAR__') class NearleyToLark(InlineTransformer): def __init__(self, context): self.context = context self.functions = {} self.extra_rules = {} def _new_function(self, code): n = len(self.functions) name = 'alias_%d' % n assert name not in self.functions code = "%s = (%s);" % (name, code) self.context.execute(code) f = getattr(self.context, name) self.functions[name] = f return name def _extra_rule(self, rule): name = 'xrule_%d' % len(self.extra_rules) assert name not in self.extra_rules self.extra_rules[name] = rule return name def rule(self, name): return _get_rulename(name) def ruledef(self, name, exps): return '!%s: %s' % (_get_rulename(name), exps) def expr(self, item, op): rule = '(%s)%s' % (item, op) return self._extra_rule(rule) def regexp(self, r): return '/%s/' % r def string(self, s): return self._extra_rule(s) def expansion(self, *x): x, js = x[:-1], x[-1] if js.children: js_code ,= js.children js_code = js_code[2:-2] alias = '-> ' + self._new_function(js_code) else: alias = '' return ' '.join(x) + alias def expansions(self, *x): return '%s' % ('\n |'.join(x)) def start(self, *rules): return '\n'.join(filter(None, rules)) def _nearley_to_lark(g, builtin_path, n2l): rule_defs = [] tree = nearley_grammar_parser.parse(g) for statement in tree.children: if statement.data == 'directive': directive, arg = statement.children if directive == 'builtin': with open(os.path.join(builtin_path, arg[1:-1])) as f: text = f.read() rule_defs += _nearley_to_lark(text, builtin_path, n2l) else: assert False, directive elif statement.data == 'js_code': code ,= statement.children code = code[2:-2] n2l.context.execute(code) elif statement.data == 'macro': pass # TODO Add support for macros! elif statement.data == 'ruledef': rule_defs.append( n2l.transform(statement) ) else: raise Exception("Unknown statement: %s" % statement) return rule_defs def nearley_to_lark(g, builtin_path, context): n2l = NearleyToLark(context) lark_g = '\n'.join(_nearley_to_lark(g, builtin_path, n2l)) lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items()) t = Transformer() for fname, fcode in n2l.functions.items(): setattr(t, fname, fcode) setattr(t, '__default__', lambda n, c: c if c else None) return lark_g, t def test(): css_example_grammar = """ # http://www.w3.org/TR/css3-color/#colorunits @builtin "whitespace.ne" @builtin "number.ne" @builtin "postprocessors.ne" csscolor -> "#" hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit {% function(d) { return { "r": parseInt(d[1]+d[2], 16), "g": parseInt(d[3]+d[4], 16), "b": parseInt(d[5]+d[6], 16), } } %} | "#" hexdigit hexdigit hexdigit {% function(d) { return { "r": parseInt(d[1]+d[1], 16), "g": parseInt(d[2]+d[2], 16), "b": parseInt(d[3]+d[3], 16), } } %} | "rgb" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"r": 4, "g": 8, "b": 12}) %} | "hsl" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"h": 4, "s": 8, "l": 12}) %} | "rgba" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"r": 4, "g": 8, "b": 12, "a": 16}) %} | "hsla" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"h": 4, "s": 8, "l": 12, "a": 16}) %} hexdigit -> [a-fA-F0-9] colnum -> unsigned_int {% id %} | percentage {% function(d) {return Math.floor(d[0]*255); } %} """ context = js2py.EvalJs() context.execute('function id(x) {return x[0]; }') converted_grammar, t = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin', context) # print(converted_grammar) l = Lark(converted_grammar, start='n_csscolor') tree = l.parse('#a199ff') print(t.transform(tree)) tree = l.parse('rgb(255, 70%, 3)') print(t.transform(tree)) def main(): try: nearley_lib = sys.argv[1] except IndexError: print("Reads Nearley grammar from stdin and outputs a lark grammar.") print("Usage: %s " % sys.argv[0]) return grammar = sys.stdin.read() print(nearley_to_lark(grammar, os.path.join(nearley_lib, 'builtin'))) if __name__ == '__main__': main() # test()