From b01c283d47d7194959d057bbaf18a25640acc92a Mon Sep 17 00:00:00 2001
From: julienmalard <julien.malard@mail.mcgill.ca>
Date: Tue, 10 Nov 2020 10:33:53 -0500
Subject: [PATCH] Failing test

---
 tests/test_reconstructor.py | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/tests/test_reconstructor.py b/tests/test_reconstructor.py
index 93c64fe..6196f4a 100644
--- a/tests/test_reconstructor.py
+++ b/tests/test_reconstructor.py
@@ -140,6 +140,43 @@ class TestReconstructor(TestCase):
         new_json = Reconstructor(json_parser).reconstruct(tree)
         self.assertEqual(json.loads(new_json), json.loads(test_json))
 
+    def test_switch_grammar_unicode_terminal(self):
+        """
+        This test checks that a parse tree built with a grammar containing only ascii characters can be reconstructed
+        with a grammar that has unicode rules (or vice versa). The original bug assigned ANON terminals to unicode
+        keywords, which offsets the ANON terminal count in the unicode grammar and causes subsequent identical ANON
+        tokens (e.g., `+=`) to mis-match between the two grammars.
+        """
+
+        g1 = """
+        start: (NL | stmt)*
+        stmt: "keyword" var op var
+        !op: ("+=" | "-=" | "*=" | "/=")
+        var: WORD
+        NL: /(\\r?\\n)+\s*/
+        """ + common
+
+        g2 = """
+        start: (NL | stmt)*
+        stmt: "குறிப்பு" var op var
+        !op: ("+=" | "-=" | "*=" | "/=")
+        var: WORD
+        NL: /(\\r?\\n)+\s*/
+        """ + common
+
+        code = """
+        keyword x += y
+        """
+
+        l1 = Lark(g1, parser='lalr')
+        l2 = Lark(g2, parser='lalr')
+        r = Reconstructor(l2)
+
+        tree = l1.parse(code)
+        code2 = r.reconstruct(tree)
+        assert l2.parse(code2) == tree
+
+
 
 if __name__ == '__main__':
     unittest.main()