Ver código fonte

examples.reconstruct_json now working with scanless

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 anos atrás
pai
commit
e151d22ea2
4 arquivos alterados com 38 adições e 26 exclusões
  1. +24
    -13
      examples/reconstruct_json.py
  2. +3
    -3
      lark/load_grammar.py
  3. +6
    -5
      lark/parse_tree_builder.py
  4. +5
    -5
      lark/reconstruct.py

+ 24
- 13
examples/reconstruct_json.py Ver arquivo

@@ -12,18 +12,18 @@ from lark.reconstruct import Reconstructor

from .json_parser import json_grammar

def test():
test_json = '''
{
"empty_object" : {},
"empty_array" : [],
"booleans" : { "YES" : true, "NO" : false },
"numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ],
"strings" : [ "This", [ "And" , "That" ] ],
"nothing" : null
}
'''
test_json = '''
{
"empty_object" : {},
"empty_array" : [],
"booleans" : { "YES" : true, "NO" : false },
"numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ],
"strings" : [ "This", [ "And" , "That", "And a \\"b" ] ],
"nothing" : null
}
'''
def test_scanless():

json_parser = Lark(json_grammar)
tree = json_parser.parse(test_json)
@@ -38,4 +38,15 @@ def test():
print (new_json)
print (json.loads(new_json) == json.loads(test_json))

test()

def test_lalr():

json_parser = Lark(json_grammar, parser='lalr')
tree = json_parser.parse(test_json)

new_json = Reconstructor(json_parser).reconstruct(tree)
print (new_json)
print (json.loads(new_json) == json.loads(test_json))

test_scanless()
test_lalr()

+ 3
- 3
lark/load_grammar.py Ver arquivo

@@ -380,7 +380,7 @@ class Grammar:
if name.startswith('_'):
options = RuleOptions.new_from(options, filter_out=True)
else:
options = RuleOptions.new_from(options, join_children=True)
options = RuleOptions.new_from(options, create_token=name)

name = tokens_to_convert.get(name, name)
for exp in chain( tree.find_data('expansion'), tree.find_data('expr') ):
@@ -454,10 +454,10 @@ class Grammar:


class RuleOptions:
def __init__(self, keep_all_tokens=False, expand1=False, join_children=False, filter_out=False):
def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False):
self.keep_all_tokens = keep_all_tokens
self.expand1 = expand1
self.join_children = join_children # used for scanless postprocessing
self.create_token = create_token # used for scanless postprocessing

self.filter_out = filter_out # remove this rule from the tree
# used for "token"-rules in scanless


+ 6
- 5
lark/parse_tree_builder.py Ver arquivo

@@ -1,4 +1,5 @@
from .common import is_terminal, GrammarError
from .lexer import Token

class Callback(object):
pass
@@ -12,9 +13,9 @@ def create_expand1_tree_builder_function(tree_builder):
return tree_builder(children)
return expand1

def create_join_children(tree_builder):
def create_token_wrapper(tree_builder, name):
def join_children(children):
children = [''.join(children)]
children = [Token(name, ''.join(children))]
return tree_builder(children)
return join_children

@@ -67,7 +68,7 @@ class ParseTreeBuilder:
for origin, (expansions, options) in rules.items():
keep_all_tokens = options.keep_all_tokens if options else False
expand1 = options.expand1 if options else False
join_children = options.join_children if options else False
create_token = options.create_token if options else False

_origin = origin

@@ -85,8 +86,8 @@ class ParseTreeBuilder:
if expand1:
f = create_expand1_tree_builder_function(f)

if join_children:
f = create_join_children(f)
if create_token:
f = create_token_wrapper(f, create_token)


alias_handler = create_rule_handler(expansion, f, keep_all_tokens, filter_out)


+ 5
- 5
lark/reconstruct.py Ver arquivo

@@ -20,8 +20,11 @@ def is_iter_empty(i):

class Reconstructor:
def __init__(self, parser):
tokens = {t.name:t for t in parser.lexer_conf.tokens}
token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in parser.lexer_conf.tokens}
# Recreate the rules to assume a standard lexer
_tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever')
tokens = {t.name:t for t in _tokens}

token_res = {t.name:re.compile(t.pattern.to_regexp()) for t in _tokens}

class MatchData(object):
def __init__(self, data):
@@ -71,9 +74,6 @@ class Reconstructor:

return to_write

# Recreate the rules to assume a standard lexer
_tokens, rules, _grammar_extra = parser.grammar.compile(lexer='standard', start='whatever')

d = defaultdict(list)
for name, (expansions, _o) in rules.items():
for expansion, alias in expansions:


Carregando…
Cancelar
Salvar