diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 9a0bb00..453ed45 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -219,7 +219,6 @@ class SimplifyTree(InlineTransformer): return tokenmods + [value] def get_tokens(tree, token_set): - tokens = [] for t in tree.find_data('token'): x = t.children name = x[0].value @@ -266,10 +265,13 @@ class ExtractAnonTokens(InlineTransformer): else: assert False, token + if value in self.token_reverse: # Kind of a wierd placement + token_name = self.token_reverse[value] + if token_name not in self.token_set: self.token_set.add(token_name) self.tokens.append((token_name, token, [])) - assert value not in self.token_reverse + assert value not in self.token_reverse, value self.token_reverse[value] = token_name return Token('TOKEN', token_name, -1) diff --git a/tools/nearley.py b/tools/nearley.py index b45ca53..b78d3b8 100644 --- a/tools/nearley.py +++ b/tools/nearley.py @@ -1,22 +1,32 @@ "Converts between Lark and Nearley grammars. Work in progress!" +import os.path +import sys + from lark import Lark, InlineTransformer nearley_grammar = r""" start: (ruledef|directive)+ - directive: "@" NAME STRING + directive: "@" NAME (STRING|NAME) + | "@" _JS -> js_code ruledef: NAME "->" expansions + | NAME REGEXP "->" expansions -> macro expansions: expansion ("|" expansion)* - expansion: (rule|string|regexp)+ _JS? + expansion: expr+ _JS? + + ?expr: item [":" /[+*?]/] + + ?item: rule|string|regexp + | "(" expansions ")" rule: NAME string: STRING regexp: REGEXP _JS: /(?s){%.*?%}/ - NAME: /[a-zA-Z_]\w*/ + NAME: /[a-zA-Z_$]\w*/ WS.ignore: /[\t \f\n]+/ COMMENT.ignore: /\#[^\n]*/ REGEXP: /\[.*?\]/ @@ -27,13 +37,20 @@ nearley_grammar = r""" class NearleyToLark(InlineTransformer): + def __init__(self, builtin_path): + self.builtin_path = builtin_path def rule(self, name): - return {'_': '_WS?', '__':'_WS'}.get(name, name) + # return {'_': '_WS?', '__':'_WS'}.get(name, name) + return {'_': '_ws_maybe', '__':'_ws'}.get(name, name) def ruledef(self, name, exps): + name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name) return '%s: %s' % (name, exps) + def expr(self, item, op): + return '(%s)%s' % (item, op) + def regexp(self, r): return '/%s/' % r @@ -45,35 +62,32 @@ class NearleyToLark(InlineTransformer): return ' '.join(x) def expansions(self, *x): - return '\n |'.join(x) + return '(%s)' % ('\n |'.join(x)) + + def js_code(self): + return '' + + def macro(self, *args): + return '' # TODO support macros?! def directive(self, name, *args): if name == 'builtin': arg = args[0][1:-1] - if arg == 'whitespace.ne': - return r'_WS: /[ \t\n\v\f]/' - elif arg == 'number.ne': - return ('unsigned_int: DIGIT+\n' - 'DIGIT: /\d/\n' - 'decimal: "-"? DIGIT+ [/\./ DIGIT+] \n' - 'percentage: decimal "%"\n' - ) - # TODO - elif arg == 'postprocessors.ne': - pass - else: - assert False, arg - else: - assert False - pass + with open(os.path.join(self.builtin_path, arg)) as f: + text = f.read() + return nearley_to_lark(text, self.builtin_path) + elif name == 'preprocessor': + return '' + + raise Exception('Unknown directive: %s' % name) def start(self, *rules): return '\n'.join(filter(None, rules)) -def nearley_to_lark(g): +def nearley_to_lark(g, builtin_path): parser = Lark(nearley_grammar) tree = parser.parse(g) - return NearleyToLark().transform(tree) + return NearleyToLark(builtin_path).transform(tree) def test(): @@ -112,7 +126,7 @@ def test(): function(d) {return Math.floor(d[0]*255); } %} """ - converted_grammar = nearley_to_lark(css_example_grammar) + converted_grammar = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin') print(converted_grammar) l = Lark(converted_grammar, start='csscolor', parser='earley_nolex') @@ -120,5 +134,17 @@ def test(): print(l.parse('rgb(255, 70%, 3)').pretty()) +def main(): + try: + nearley_lib = sys.argv[1] + except IndexError: + print("Reads Nearley grammar from stdin and outputs a lark grammar.") + print("Usage: %s " % sys.argv[0]) + return + + grammar = sys.stdin.read() + print(nearley_to_lark(grammar, os.path.join(nearley_lib, 'builtin'))) + + if __name__ == '__main__': - test() + main()