Browse Source

Merge branch 'fix_recons'

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.6.3
Erez Shinan 6 years ago
parent
commit
633bd21e70
6 changed files with 151 additions and 28 deletions
  1. +1
    -1
      lark/exceptions.py
  2. +2
    -2
      lark/lark.py
  3. +4
    -2
      lark/load_grammar.py
  4. +27
    -23
      lark/reconstruct.py
  5. +1
    -0
      tests/__main__.py
  6. +116
    -0
      tests/test_reconstructor.py

+ 1
- 1
lark/exceptions.py View File

@@ -75,7 +75,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):
self.column = getattr(token, 'column', '?')
self.considered_rules = considered_rules
self.state = state
self.pos_in_stream = token.pos_in_stream
self.pos_in_stream = getattr(token, 'pos_in_stream', None)

message = ("Unexpected token %r at line %s, column %s.\n"
"Expected: %s\n"


+ 2
- 2
lark/lark.py View File

@@ -157,9 +157,9 @@ class Lark:
self.grammar = load_grammar(grammar, self.source)

# Compile the EBNF grammar into BNF
tokens, self.rules, self.ignore_tokens = self.grammar.compile()
self.terminals, self.rules, self.ignore_tokens = self.grammar.compile()

self.lexer_conf = LexerConf(tokens, self.ignore_tokens, self.options.postlex, self.options.lexer_callbacks)
self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, self.options.lexer_callbacks)

if self.options.parser:
self.parser = self._build_parser()


+ 4
- 2
lark/load_grammar.py View File

@@ -448,8 +448,10 @@ class Grammar:
self.ignore = ignore

def compile(self):
token_defs = list(self.token_defs)
rule_defs = self.rule_defs
# We change the trees in-place (to support huge grammars)
# So deepcopy allows calling compile more than once.
token_defs = deepcopy(list(self.token_defs))
rule_defs = deepcopy(self.rule_defs)

# =================
# Compile Tokens


+ 27
- 23
lark/reconstruct.py View File

@@ -67,38 +67,42 @@ class MakeMatchTree:

class Reconstructor:
def __init__(self, parser):
# Recreate the rules to assume a standard lexer
_tokens, rules, _grammar_extra = parser.grammar.compile()
# XXX TODO calling compile twice returns different results!
tokens, rules, _grammar_extra = parser.grammar.compile()

expand1s = {r.origin for r in parser.rules if r.options and r.options.expand1}
self.write_tokens = WriteTokensTransformer({t.name:t for t in tokens})
self.rules = list(self._build_recons_rules(rules))

d = defaultdict(list)
def _build_recons_rules(self, rules):
expand1s = {r.origin for r in rules if r.options and r.options.expand1}

aliases = defaultdict(list)
for r in rules:
# Rules can match their alias
if r.alias:
alias = NonTerminal(r.alias)
d[alias].append(r.expansion)
d[r.origin].append([alias])
else:
d[r.origin].append(r.expansion)
aliases[r.origin].append( r.alias )

# Expanded rules can match their own terminal
for sym in r.expansion:
if sym in expand1s:
d[sym].append([Terminal(sym.name)])
rule_names = {r.origin for r in rules}
nonterminals = {sym for sym in rule_names
if sym.name.startswith('_') or sym in expand1s or sym in aliases }

for r in rules:
recons_exp = [sym if sym in nonterminals else Terminal(sym.name)
for sym in r.expansion if not is_discarded_terminal(sym)]

reduced_rules = defaultdict(list)
for name, expansions in d.items():
for expansion in expansions:
reduced = [sym if sym.name.startswith('_') or sym in expand1s else Terminal(sym.name)
for sym in expansion if not is_discarded_terminal(sym)]
# Skip self-recursive constructs
if recons_exp == [r.origin]:
continue

reduced_rules[name, tuple(reduced)].append(expansion)
sym = NonTerminal(r.alias) if r.alias else r.origin

self.rules = [Rule(name, list(reduced), MakeMatchTree(name.name, expansions[0]), None)
for (name, reduced), expansions in reduced_rules.items()]
yield Rule(sym, recons_exp, MakeMatchTree(sym.name, r.expansion))

self.write_tokens = WriteTokensTransformer({t.name:t for t in _tokens})
for origin, rule_aliases in aliases.items():
for alias in rule_aliases:
yield Rule(origin, [Terminal(alias)], MakeMatchTree(origin.name, [NonTerminal(alias)]))
yield Rule(origin, [Terminal(origin.name)], MakeMatchTree(origin.name, [origin]))


def _match(self, term, token):


+ 1
- 0
tests/__main__.py View File

@@ -5,6 +5,7 @@ import logging

from .test_trees import TestTrees
from .test_tools import TestStandalone
from .test_reconstructor import TestReconstructor

try:
from .test_nearley.test_nearley import TestNearley


+ 116
- 0
tests/test_reconstructor.py View File

@@ -0,0 +1,116 @@
import json
import unittest
from unittest import TestCase
from lark import Lark
from lark.reconstruct import Reconstructor


common = """
%import common (WS_INLINE, NUMBER, WORD)
%ignore WS_INLINE
"""

def _remove_ws(s):
return s.replace(' ', '').replace('\n','')

class TestReconstructor(TestCase):

def assert_reconstruct(self, grammar, code):
parser = Lark(grammar, parser='lalr')
tree = parser.parse(code)
new = Reconstructor(parser).reconstruct(tree)
self.assertEqual(_remove_ws(code), _remove_ws(new))

def test_starred_rule(self):

g = """
start: item*
item: NL
| rule
rule: WORD ":" NUMBER
NL: /(\\r?\\n)+\s*/
""" + common

code = """
Elephants: 12
"""

self.assert_reconstruct(g, code)

def test_starred_group(self):

g = """
start: (rule | _NL)*
rule: WORD ":" NUMBER
_NL: /(\\r?\\n)+\s*/
""" + common

code = """
Elephants: 12
"""

self.assert_reconstruct(g, code)

def test_alias(self):

g = """
start: line*
line: NL
| rule
| "hello" -> hi
rule: WORD ":" NUMBER
NL: /(\\r?\\n)+\s*/
""" + common

code = """
Elephants: 12
hello
"""

self.assert_reconstruct(g, code)

def test_json_example(self):
test_json = '''
{
"empty_object" : {},
"empty_array" : [],
"booleans" : { "YES" : true, "NO" : false },
"numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ],
"strings" : [ "This", [ "And" , "That", "And a \\"b" ] ],
"nothing" : null
}
'''

json_grammar = r"""
?start: value

?value: object
| array
| string
| SIGNED_NUMBER -> number
| "true" -> true
| "false" -> false
| "null" -> null

array : "[" [value ("," value)*] "]"
object : "{" [pair ("," pair)*] "}"
pair : string ":" value

string : ESCAPED_STRING

%import common.ESCAPED_STRING
%import common.SIGNED_NUMBER
%import common.WS

%ignore WS
"""

json_parser = Lark(json_grammar, parser='lalr')
tree = json_parser.parse(test_json)

new_json = Reconstructor(json_parser).reconstruct(tree)
self.assertEqual(json.loads(new_json), json.loads(test_json))


if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save