From e151d22ea230baafef55da89173fc57fa8754346 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Thu, 2 Mar 2017 18:47:47 +0200 Subject: [PATCH] examples.reconstruct_json now working with scanless --- examples/reconstruct_json.py | 37 +++++++++++++++++++++++------------- lark/load_grammar.py | 6 +++--- lark/parse_tree_builder.py | 11 ++++++----- lark/reconstruct.py | 10 +++++----- 4 files changed, 38 insertions(+), 26 deletions(-) diff --git a/examples/reconstruct_json.py b/examples/reconstruct_json.py index 7f434fe..de4e086 100644 --- a/examples/reconstruct_json.py +++ b/examples/reconstruct_json.py @@ -12,18 +12,18 @@ from lark.reconstruct import Reconstructor from .json_parser import json_grammar -def test(): - - test_json = ''' - { - "empty_object" : {}, - "empty_array" : [], - "booleans" : { "YES" : true, "NO" : false }, - "numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ], - "strings" : [ "This", [ "And" , "That" ] ], - "nothing" : null - } - ''' +test_json = ''' + { + "empty_object" : {}, + "empty_array" : [], + "booleans" : { "YES" : true, "NO" : false }, + "numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ], + "strings" : [ "This", [ "And" , "That", "And a \\"b" ] ], + "nothing" : null + } +''' + +def test_scanless(): json_parser = Lark(json_grammar) tree = json_parser.parse(test_json) @@ -38,4 +38,15 @@ def test(): print (new_json) print (json.loads(new_json) == json.loads(test_json)) -test() + +def test_lalr(): + + json_parser = Lark(json_grammar, parser='lalr') + tree = json_parser.parse(test_json) + + new_json = Reconstructor(json_parser).reconstruct(tree) + print (new_json) + print (json.loads(new_json) == json.loads(test_json)) + +test_scanless() +test_lalr() diff --git a/lark/load_grammar.py b/lark/load_grammar.py index c9a2d30..f91b0b9 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -380,7 +380,7 @@ class Grammar: if name.startswith('_'): options = RuleOptions.new_from(options, filter_out=True) else: - options = RuleOptions.new_from(options, join_children=True) + options = RuleOptions.new_from(options, create_token=name) name = tokens_to_convert.get(name, name) for exp in chain( tree.find_data('expansion'), tree.find_data('expr') ): @@ -454,10 +454,10 @@ class Grammar: class RuleOptions: - def __init__(self, keep_all_tokens=False, expand1=False, join_children=False, filter_out=False): + def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False): self.keep_all_tokens = keep_all_tokens self.expand1 = expand1 - self.join_children = join_children # used for scanless postprocessing + self.create_token = create_token # used for scanless postprocessing self.filter_out = filter_out # remove this rule from the tree # used for "token"-rules in scanless diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index 8e0c62c..547deab 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -1,4 +1,5 @@ from .common import is_terminal, GrammarError +from .lexer import Token class Callback(object): pass @@ -12,9 +13,9 @@ def create_expand1_tree_builder_function(tree_builder): return tree_builder(children) return expand1 -def create_join_children(tree_builder): +def create_token_wrapper(tree_builder, name): def join_children(children): - children = [''.join(children)] + children = [Token(name, ''.join(children))] return tree_builder(children) return join_children @@ -67,7 +68,7 @@ class ParseTreeBuilder: for origin, (expansions, options) in rules.items(): keep_all_tokens = options.keep_all_tokens if options else False expand1 = options.expand1 if options else False - join_children = options.join_children if options else False + create_token = options.create_token if options else False _origin = origin @@ -85,8 +86,8 @@ class ParseTreeBuilder: if expand1: f = create_expand1_tree_builder_function(f) - if join_children: - f = create_join_children(f) + if create_token: + f = create_token_wrapper(f, create_token) alias_handler = create_rule_handler(expansion, f, keep_all_tokens, filter_out) diff --git a/lark/reconstruct.py b/lark/reconstruct.py index fd70130..b166882 100644 --- a/lark/reconstruct.py +++ b/lark/reconstruct.py @@ -20,8 +20,11 @@ def is_iter_empty(i): class Reconstructor: def __init__(self, parser): - tokens = {t.name:t for t in parser.lexer_conf.tokens} - token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in parser.lexer_conf.tokens} + # Recreate the rules to assume a standard lexer + _tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever') + tokens = {t.name:t for t in _tokens} + + token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in _tokens} class MatchData(object): def __init__(self, data): @@ -71,9 +74,6 @@ class Reconstructor: return to_write - # Recreate the rules to assume a standard lexer - _tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever') - d = defaultdict(list) for name, (expansions, _o) in rules.items(): for expansion, alias in expansions: