examples.reconstruct_json now working with scanless

8 years ago · e151d22ea2
--- a/examples/reconstruct_json.py
+++ b/examples/reconstruct_json.py
@@ -12,18 +12,18 @@ from lark.reconstruct import Reconstructor

 from .json_parser import json_grammar

 def test():

    test_json = '''
        {
            "empty_object" : {},
            "empty_array"  : [],
            "booleans"     : { "YES" : true, "NO" : false },
            "numbers"      : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ],
            "strings"      : [ "This", [ "And" , "That" ] ],
            "nothing"      : null
        }
    '''
 test_json = '''
    {
        "empty_object" : {},
        "empty_array"  : [],
        "booleans"     : { "YES" : true, "NO" : false },
        "numbers"      : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ],
        "strings"      : [ "This", [ "And" , "That", "And a \\"b" ] ],
        "nothing"      : null
    }
 '''

 def test_scanless():

    json_parser = Lark(json_grammar)
    tree = json_parser.parse(test_json)
@@ -38,4 +38,15 @@ def test():
    print (new_json)
    print (json.loads(new_json) == json.loads(test_json))

 test()

 def test_lalr():

    json_parser = Lark(json_grammar, parser='lalr')
    tree = json_parser.parse(test_json)

    new_json = Reconstructor(json_parser).reconstruct(tree)
    print (new_json)
    print (json.loads(new_json) == json.loads(test_json)) 

 test_scanless()
 test_lalr()
--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -380,7 +380,7 @@ class Grammar:
                    if name.startswith('_'):
                        options = RuleOptions.new_from(options, filter_out=True)
                    else:
                        options = RuleOptions.new_from(options, join_children=True)
                        options = RuleOptions.new_from(options, create_token=name)

                name = tokens_to_convert.get(name, name)
                for exp in chain( tree.find_data('expansion'), tree.find_data('expr') ):
@@ -454,10 +454,10 @@ class Grammar:


 class RuleOptions:
    def __init__(self, keep_all_tokens=False, expand1=False, join_children=False, filter_out=False):
    def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False):
        self.keep_all_tokens = keep_all_tokens
        self.expand1 = expand1
        self.join_children = join_children  # used for scanless postprocessing
        self.create_token = create_token  # used for scanless postprocessing

        self.filter_out = filter_out        # remove this rule from the tree
                                            # used for "token"-rules in scanless
--- a/lark/parse_tree_builder.py
+++ b/lark/parse_tree_builder.py
@@ -1,4 +1,5 @@
 from .common import is_terminal, GrammarError
 from .lexer import Token

 class Callback(object):
    pass
@@ -12,9 +13,9 @@ def create_expand1_tree_builder_function(tree_builder):
            return tree_builder(children)
    return expand1

 def create_join_children(tree_builder):
 def create_token_wrapper(tree_builder, name):
    def join_children(children):
        children = [''.join(children)]
        children = [Token(name, ''.join(children))]
        return tree_builder(children)
    return join_children

@@ -67,7 +68,7 @@ class ParseTreeBuilder:
        for origin, (expansions, options) in rules.items():
            keep_all_tokens = options.keep_all_tokens if options else False
            expand1 = options.expand1 if options else False
            join_children = options.join_children if options else False
            create_token = options.create_token if options else False

            _origin = origin

@@ -85,8 +86,8 @@ class ParseTreeBuilder:
                        if expand1:
                            f = create_expand1_tree_builder_function(f)

                    if join_children:
                        f = create_join_children(f)
                    if create_token:
                        f = create_token_wrapper(f, create_token)


                alias_handler = create_rule_handler(expansion, f, keep_all_tokens, filter_out)
--- a/lark/reconstruct.py
+++ b/lark/reconstruct.py
@@ -20,8 +20,11 @@ def is_iter_empty(i):

 class Reconstructor:
    def __init__(self, parser):
        tokens = {t.name:t for t in parser.lexer_conf.tokens}
        token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in parser.lexer_conf.tokens}
        # Recreate the rules to assume a standard lexer
        _tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever')
        tokens = {t.name:t for t in _tokens}

        token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in _tokens}

        class MatchData(object):
            def __init__(self, data):
@@ -71,9 +74,6 @@ class Reconstructor:

                return to_write

        # Recreate the rules to assume a standard lexer
        _tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever')

        d = defaultdict(list)
        for name, (expansions, _o) in rules.items():
            for expansion, alias in expansions: