|
|
@@ -140,6 +140,43 @@ class TestReconstructor(TestCase): |
|
|
|
new_json = Reconstructor(json_parser).reconstruct(tree) |
|
|
|
self.assertEqual(json.loads(new_json), json.loads(test_json)) |
|
|
|
|
|
|
|
def test_switch_grammar_unicode_terminal(self): |
|
|
|
""" |
|
|
|
This test checks that a parse tree built with a grammar containing only ascii characters can be reconstructed |
|
|
|
with a grammar that has unicode rules (or vice versa). The original bug assigned ANON terminals to unicode |
|
|
|
keywords, which offsets the ANON terminal count in the unicode grammar and causes subsequent identical ANON |
|
|
|
tokens (e.g., `+=`) to mis-match between the two grammars. |
|
|
|
""" |
|
|
|
|
|
|
|
g1 = """ |
|
|
|
start: (NL | stmt)* |
|
|
|
stmt: "keyword" var op var |
|
|
|
!op: ("+=" | "-=" | "*=" | "/=") |
|
|
|
var: WORD |
|
|
|
NL: /(\\r?\\n)+\s*/ |
|
|
|
""" + common |
|
|
|
|
|
|
|
g2 = """ |
|
|
|
start: (NL | stmt)* |
|
|
|
stmt: "குறிப்பு" var op var |
|
|
|
!op: ("+=" | "-=" | "*=" | "/=") |
|
|
|
var: WORD |
|
|
|
NL: /(\\r?\\n)+\s*/ |
|
|
|
""" + common |
|
|
|
|
|
|
|
code = """ |
|
|
|
keyword x += y |
|
|
|
""" |
|
|
|
|
|
|
|
l1 = Lark(g1, parser='lalr') |
|
|
|
l2 = Lark(g2, parser='lalr') |
|
|
|
r = Reconstructor(l2) |
|
|
|
|
|
|
|
tree = l1.parse(code) |
|
|
|
code2 = r.reconstruct(tree) |
|
|
|
assert l2.parse(code2) == tree |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
|
unittest.main() |