From d4140d5e6ce0ecb8f3b729b820b66b037ef1c610 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Fri, 17 Mar 2017 12:13:33 +0200 Subject: [PATCH 1/3] Nearley -> Lark tool working for tests with js2py --- lark/tools/nearley.py | 134 +++++++++++++++++++++++++++++------------- 1 file changed, 93 insertions(+), 41 deletions(-) diff --git a/lark/tools/nearley.py b/lark/tools/nearley.py index 5613c21..5df44fc 100644 --- a/lark/tools/nearley.py +++ b/lark/tools/nearley.py @@ -3,18 +3,20 @@ import os.path import sys -from lark import Lark, InlineTransformer +import js2py + +from lark import Lark, InlineTransformer, Transformer nearley_grammar = r""" start: (ruledef|directive)+ directive: "@" NAME (STRING|NAME) - | "@" _JS -> js_code + | "@" JS -> js_code ruledef: NAME "->" expansions | NAME REGEXP "->" expansions -> macro expansions: expansion ("|" expansion)* - expansion: expr+ _JS? + expansion: expr+ js ?expr: item [":" /[+*?]/] @@ -24,7 +26,8 @@ nearley_grammar = r""" rule: NAME string: STRING regexp: REGEXP - _JS: /(?s){%.*?%}/ + JS: /(?s){%.*?%}/ + js: JS? NAME: /[a-zA-Z_$]\w*/ COMMENT: /\#[^\n]*/ @@ -37,60 +40,104 @@ nearley_grammar = r""" """ +nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard') +def _get_rulename(name): + name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name) + return 'n_' + name.replace('$', '__DOLLAR__') class NearleyToLark(InlineTransformer): - def __init__(self, builtin_path): - self.builtin_path = builtin_path + def __init__(self, context): + self.context = context + self.functions = {} + self.extra_rules = {} + + def _new_function(self, code): + n = len(self.functions) + name = 'alias_%d' % n + assert name not in self.functions + code = "%s = (%s);" % (name, code) + self.context.execute(code) + f = getattr(self.context, name) + self.functions[name] = f + + return name + + def _extra_rule(self, rule): + name = 'xrule_%d' % len(self.extra_rules) + assert name not in self.extra_rules + self.extra_rules[name] = rule + return name def rule(self, name): - # return {'_': '_WS?', '__':'_WS'}.get(name, name) - return {'_': '_ws_maybe', '__':'_ws'}.get(name, name) + return _get_rulename(name) def ruledef(self, name, exps): - name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name) - return '%s: %s' % (name, exps) + return '!%s: %s' % (_get_rulename(name), exps) def expr(self, item, op): - return '(%s)%s' % (item, op) + rule = '(%s)%s' % (item, op) + return self._extra_rule(rule) def regexp(self, r): return '/%s/' % r def string(self, s): - # TODO allow regular strings, and split them in the parser frontend - return ' '.join('"%s"'%ch for ch in s[1:-1]) + return self._extra_rule(s) def expansion(self, *x): - return ' '.join(x) + x, js = x[:-1], x[-1] + if js.children: + js_code ,= js.children + js_code = js_code[2:-2] + alias = '-> ' + self._new_function(js_code) + else: + alias = '' + return ' '.join(x) + alias def expansions(self, *x): - return '(%s)' % ('\n |'.join(x)) - - def js_code(self): - return '' - - def macro(self, *args): - return '' # TODO support macros?! - - def directive(self, name, *args): - if name == 'builtin': - arg = args[0][1:-1] - with open(os.path.join(self.builtin_path, arg)) as f: - text = f.read() - return nearley_to_lark(text, self.builtin_path) - elif name == 'preprocessor': - return '' - - raise Exception('Unknown directive: %s' % name) + return '%s' % ('\n |'.join(x)) def start(self, *rules): return '\n'.join(filter(None, rules)) -def nearley_to_lark(g, builtin_path): - parser = Lark(nearley_grammar, parser='earley', lexer='standard') - tree = parser.parse(g) - return NearleyToLark(builtin_path).transform(tree) +def _nearley_to_lark(g, builtin_path, n2l): + rule_defs = [] + + tree = nearley_grammar_parser.parse(g) + for statement in tree.children: + if statement.data == 'directive': + directive, arg = statement.children + if directive == 'builtin': + with open(os.path.join(builtin_path, arg[1:-1])) as f: + text = f.read() + rule_defs += _nearley_to_lark(text, builtin_path, n2l) + else: + assert False, directive + elif statement.data == 'js_code': + code ,= statement.children + code = code[2:-2] + n2l.context.execute(code) + elif statement.data == 'macro': + pass # TODO Add support for macros! + elif statement.data == 'ruledef': + rule_defs.append( n2l.transform(statement) ) + else: + raise Exception("Unknown statement: %s" % statement) + + return rule_defs + + +def nearley_to_lark(g, builtin_path, context): + n2l = NearleyToLark(context) + lark_g = '\n'.join(_nearley_to_lark(g, builtin_path, n2l)) + lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items()) + t = Transformer() + for fname, fcode in n2l.functions.items(): + setattr(t, fname, fcode) + setattr(t, '__default__', lambda n, c: c if c else None) + + return lark_g, t def test(): @@ -129,12 +176,17 @@ def test(): function(d) {return Math.floor(d[0]*255); } %} """ - converted_grammar = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin') - print(converted_grammar) + context = js2py.EvalJs() + context.execute('function id(x) {return x[0]; }') + + converted_grammar, t = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin', context) + # print(converted_grammar) - l = Lark(converted_grammar, start='csscolor') - print(l.parse('#a199ff').pretty()) - print(l.parse('rgb(255, 70%, 3)').pretty()) + l = Lark(converted_grammar, start='n_csscolor') + tree = l.parse('#a199ff') + print(t.transform(tree)) + tree = l.parse('rgb(255, 70%, 3)') + print(t.transform(tree)) def main(): From 007b2174df984ed288dd6d22a961ba95a98c32e1 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Sat, 18 Mar 2017 13:38:17 +0200 Subject: [PATCH 2/3] Still working on Nearley --- lark/tools/nearley.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/lark/tools/nearley.py b/lark/tools/nearley.py index 5df44fc..e63d92b 100644 --- a/lark/tools/nearley.py +++ b/lark/tools/nearley.py @@ -128,7 +128,9 @@ def _nearley_to_lark(g, builtin_path, n2l): return rule_defs -def nearley_to_lark(g, builtin_path, context): +def nearley_to_lark(g, builtin_path): + context = js2py.EvalJs() + context.execute('function id(x) {return x[0]; }') n2l = NearleyToLark(context) lark_g = '\n'.join(_nearley_to_lark(g, builtin_path, n2l)) lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items()) @@ -176,10 +178,7 @@ def test(): function(d) {return Math.floor(d[0]*255); } %} """ - context = js2py.EvalJs() - context.execute('function id(x) {return x[0]; }') - - converted_grammar, t = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin', context) + converted_grammar, t = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin') # print(converted_grammar) l = Lark(converted_grammar, start='n_csscolor') @@ -202,5 +201,5 @@ def main(): if __name__ == '__main__': - main() - # test() + # main() + test() From 7d3e00666bfdf376a113ef3df9f1fc7a87afda2b Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Tue, 28 Mar 2017 11:24:28 +0300 Subject: [PATCH 3/3] Nearley-to-Lark Works. Now using js2py as code generator --- lark/tools/nearley.py | 102 +++++++++++++++++++++++++----------------- 1 file changed, 61 insertions(+), 41 deletions(-) diff --git a/lark/tools/nearley.py b/lark/tools/nearley.py index e63d92b..9b14db6 100644 --- a/lark/tools/nearley.py +++ b/lark/tools/nearley.py @@ -3,7 +3,6 @@ import os.path import sys -import js2py from lark import Lark, InlineTransformer, Transformer @@ -44,29 +43,30 @@ nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard' def _get_rulename(name): name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name) - return 'n_' + name.replace('$', '__DOLLAR__') + return 'n_' + name.replace('$', '__DOLLAR__').lower() class NearleyToLark(InlineTransformer): - def __init__(self, context): - self.context = context - self.functions = {} + def __init__(self): + self._count = 0 self.extra_rules = {} + self.extra_rules_rev = {} + self.alias_js_code = {} def _new_function(self, code): - n = len(self.functions) - name = 'alias_%d' % n - assert name not in self.functions - code = "%s = (%s);" % (name, code) - self.context.execute(code) - f = getattr(self.context, name) - self.functions[name] = f + name = 'alias_%d' % self._count + self._count += 1 + self.alias_js_code[name] = code return name def _extra_rule(self, rule): + if rule in self.extra_rules_rev: + return self.extra_rules_rev[rule] + name = 'xrule_%d' % len(self.extra_rules) assert name not in self.extra_rules self.extra_rules[name] = rule + self.extra_rules_rev[rule] = name return name def rule(self, name): @@ -101,7 +101,7 @@ class NearleyToLark(InlineTransformer): def start(self, *rules): return '\n'.join(filter(None, rules)) -def _nearley_to_lark(g, builtin_path, n2l): +def _nearley_to_lark(g, builtin_path, n2l, js_code): rule_defs = [] tree = nearley_grammar_parser.parse(g) @@ -111,13 +111,13 @@ def _nearley_to_lark(g, builtin_path, n2l): if directive == 'builtin': with open(os.path.join(builtin_path, arg[1:-1])) as f: text = f.read() - rule_defs += _nearley_to_lark(text, builtin_path, n2l) + rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code) else: assert False, directive elif statement.data == 'js_code': code ,= statement.children code = code[2:-2] - n2l.context.execute(code) + js_code.append(code) elif statement.data == 'macro': pass # TODO Add support for macros! elif statement.data == 'ruledef': @@ -128,19 +128,40 @@ def _nearley_to_lark(g, builtin_path, n2l): return rule_defs -def nearley_to_lark(g, builtin_path): - context = js2py.EvalJs() - context.execute('function id(x) {return x[0]; }') - n2l = NearleyToLark(context) - lark_g = '\n'.join(_nearley_to_lark(g, builtin_path, n2l)) +def create_code_for_nearley_grammar(g, start, builtin_path): + import js2py + + emit_code = [] + def emit(x=None): + if x: + emit_code.append(x) + emit_code.append('\n') + + js_code = ['function id(x) {return x[0];}'] + n2l = NearleyToLark() + lark_g = '\n'.join(_nearley_to_lark(g, builtin_path, n2l, js_code)) lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items()) - t = Transformer() - for fname, fcode in n2l.functions.items(): - setattr(t, fname, fcode) - setattr(t, '__default__', lambda n, c: c if c else None) - return lark_g, t + emit('from lark import Lark, Transformer') + emit() + emit('grammar = ' + repr(lark_g)) + emit() + + for alias, code in n2l.alias_js_code.items(): + js_code.append('%s = (%s);' % (alias, code)) + + emit(js2py.translate_js('\n'.join(js_code))) + emit('class TranformNearley(Transformer):') + for alias in n2l.alias_js_code: + emit(" %s = var.get('%s').to_python()" % (alias, alias)) + emit(" __default__ = lambda self, n, c: c if c else None") + + emit() + emit('parser = Lark(grammar, start="n_%s")' % start) + emit('def parse(text):') + emit(' return TranformNearley().transform(parser.parse(text))') + return ''.join(emit_code) def test(): css_example_grammar = """ @@ -178,28 +199,27 @@ def test(): function(d) {return Math.floor(d[0]*255); } %} """ - converted_grammar, t = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin') - # print(converted_grammar) + code = create_code_for_nearley_grammar(css_example_grammar, 'csscolor', '/home/erez/nearley/builtin') + d = {} + exec (code, d) + parse = d['parse'] - l = Lark(converted_grammar, start='n_csscolor') - tree = l.parse('#a199ff') - print(t.transform(tree)) - tree = l.parse('rgb(255, 70%, 3)') - print(t.transform(tree)) + print(parse('#a199ff')) + print(parse('rgb(255, 70%, 3)')) def main(): - try: - nearley_lib = sys.argv[1] - except IndexError: - print("Reads Nearley grammar from stdin and outputs a lark grammar.") - print("Usage: %s " % sys.argv[0]) + if len(sys.argv) < 3: + print("Reads Nearley grammar (with js functions) outputs an equivalent lark parser.") + print("Usage: %s " % sys.argv[0]) return - grammar = sys.stdin.read() - print(nearley_to_lark(grammar, os.path.join(nearley_lib, 'builtin'))) + fn, start, nearley_lib = sys.argv[1:] + with open(fn) as f: + grammar = f.read() + print(create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'))) if __name__ == '__main__': - # main() - test() + main() + # test()