Browse Source

Failing test

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
julienmalard 3 years ago
parent
commit
b01c283d47
1 changed files with 37 additions and 0 deletions
  1. +37
    -0
      tests/test_reconstructor.py

+ 37
- 0
tests/test_reconstructor.py View File

@@ -140,6 +140,43 @@ class TestReconstructor(TestCase):
new_json = Reconstructor(json_parser).reconstruct(tree) new_json = Reconstructor(json_parser).reconstruct(tree)
self.assertEqual(json.loads(new_json), json.loads(test_json)) self.assertEqual(json.loads(new_json), json.loads(test_json))


def test_switch_grammar_unicode_terminal(self):
"""
This test checks that a parse tree built with a grammar containing only ascii characters can be reconstructed
with a grammar that has unicode rules (or vice versa). The original bug assigned ANON terminals to unicode
keywords, which offsets the ANON terminal count in the unicode grammar and causes subsequent identical ANON
tokens (e.g., `+=`) to mis-match between the two grammars.
"""

g1 = """
start: (NL | stmt)*
stmt: "keyword" var op var
!op: ("+=" | "-=" | "*=" | "/=")
var: WORD
NL: /(\\r?\\n)+\s*/
""" + common

g2 = """
start: (NL | stmt)*
stmt: "குறிப்பு" var op var
!op: ("+=" | "-=" | "*=" | "/=")
var: WORD
NL: /(\\r?\\n)+\s*/
""" + common

code = """
keyword x += y
"""

l1 = Lark(g1, parser='lalr')
l2 = Lark(g2, parser='lalr')
r = Reconstructor(l2)

tree = l1.parse(code)
code2 = r.reconstruct(tree)
assert l2.parse(code2) == tree




if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

Loading…
Cancel
Save