"Converts between Lark and Nearley grammars. Work in progress!" import os.path import sys from lark import Lark, InlineTransformer nearley_grammar = r""" start: (ruledef|directive)+ directive: "@" NAME (STRING|NAME) | "@" _JS -> js_code ruledef: NAME "->" expansions | NAME REGEXP "->" expansions -> macro expansions: expansion ("|" expansion)* expansion: expr+ _JS? ?expr: item [":" /[+*?]/] ?item: rule|string|regexp | "(" expansions ")" rule: NAME string: STRING regexp: REGEXP _JS: /(?s){%.*?%}/ NAME: /[a-zA-Z_$]\w*/ WS.ignore: /[\t \f\n]+/ COMMENT.ignore: /\#[^\n]*/ REGEXP: /\[.*?\]/ STRING: /".*?"/ """ class NearleyToLark(InlineTransformer): def __init__(self, builtin_path): self.builtin_path = builtin_path def rule(self, name): # return {'_': '_WS?', '__':'_WS'}.get(name, name) return {'_': '_ws_maybe', '__':'_ws'}.get(name, name) def ruledef(self, name, exps): name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name) return '%s: %s' % (name, exps) def expr(self, item, op): return '(%s)%s' % (item, op) def regexp(self, r): return '/%s/' % r def string(self, s): # TODO allow regular strings, and split them in the parser frontend return ' '.join('"%s"'%ch for ch in s[1:-1]) def expansion(self, *x): return ' '.join(x) def expansions(self, *x): return '(%s)' % ('\n |'.join(x)) def js_code(self): return '' def macro(self, *args): return '' # TODO support macros?! def directive(self, name, *args): if name == 'builtin': arg = args[0][1:-1] with open(os.path.join(self.builtin_path, arg)) as f: text = f.read() return nearley_to_lark(text, self.builtin_path) elif name == 'preprocessor': return '' raise Exception('Unknown directive: %s' % name) def start(self, *rules): return '\n'.join(filter(None, rules)) def nearley_to_lark(g, builtin_path): parser = Lark(nearley_grammar) tree = parser.parse(g) return NearleyToLark(builtin_path).transform(tree) def test(): css_example_grammar = """ # http://www.w3.org/TR/css3-color/#colorunits @builtin "whitespace.ne" @builtin "number.ne" @builtin "postprocessors.ne" csscolor -> "#" hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit {% function(d) { return { "r": parseInt(d[1]+d[2], 16), "g": parseInt(d[3]+d[4], 16), "b": parseInt(d[5]+d[6], 16), } } %} | "#" hexdigit hexdigit hexdigit {% function(d) { return { "r": parseInt(d[1]+d[1], 16), "g": parseInt(d[2]+d[2], 16), "b": parseInt(d[3]+d[3], 16), } } %} | "rgb" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"r": 4, "g": 8, "b": 12}) %} | "hsl" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"h": 4, "s": 8, "l": 12}) %} | "rgba" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"r": 4, "g": 8, "b": 12, "a": 16}) %} | "hsla" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"h": 4, "s": 8, "l": 12, "a": 16}) %} hexdigit -> [a-fA-F0-9] colnum -> unsigned_int {% id %} | percentage {% function(d) {return Math.floor(d[0]*255); } %} """ converted_grammar = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin') print(converted_grammar) l = Lark(converted_grammar, start='csscolor', parser='earley_nolex') print(l.parse('#a199ff').pretty()) print(l.parse('rgb(255, 70%, 3)').pretty()) def main(): try: nearley_lib = sys.argv[1] except IndexError: print("Reads Nearley grammar from stdin and outputs a lark grammar.") print("Usage: %s " % sys.argv[0]) return grammar = sys.stdin.read() print(nearley_to_lark(grammar, os.path.join(nearley_lib, 'builtin'))) if __name__ == '__main__': main()