This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

185 lines
4.6 KiB

  1. # coding=utf-8
  2. import json
  3. import sys
  4. import unittest
  5. from unittest import TestCase
  6. from lark import Lark
  7. from lark.reconstruct import Reconstructor
  8. common = """
  9. %import common (WS_INLINE, NUMBER, WORD)
  10. %ignore WS_INLINE
  11. """
  12. def _remove_ws(s):
  13. return s.replace(' ', '').replace('\n', '')
  14. class TestReconstructor(TestCase):
  15. def assert_reconstruct(self, grammar, code):
  16. parser = Lark(grammar, parser='lalr', maybe_placeholders=False)
  17. tree = parser.parse(code)
  18. new = Reconstructor(parser).reconstruct(tree)
  19. self.assertEqual(_remove_ws(code), _remove_ws(new))
  20. def test_starred_rule(self):
  21. g = """
  22. start: item*
  23. item: NL
  24. | rule
  25. rule: WORD ":" NUMBER
  26. NL: /(\\r?\\n)+\\s*/
  27. """ + common
  28. code = """
  29. Elephants: 12
  30. """
  31. self.assert_reconstruct(g, code)
  32. def test_starred_group(self):
  33. g = """
  34. start: (rule | NL)*
  35. rule: WORD ":" NUMBER
  36. NL: /(\\r?\\n)+\\s*/
  37. """ + common
  38. code = """
  39. Elephants: 12
  40. """
  41. self.assert_reconstruct(g, code)
  42. def test_alias(self):
  43. g = """
  44. start: line*
  45. line: NL
  46. | rule
  47. | "hello" -> hi
  48. rule: WORD ":" NUMBER
  49. NL: /(\\r?\\n)+\\s*/
  50. """ + common
  51. code = """
  52. Elephants: 12
  53. hello
  54. """
  55. self.assert_reconstruct(g, code)
  56. def test_keep_tokens(self):
  57. g = """
  58. start: (NL | stmt)*
  59. stmt: var op var
  60. !op: ("+" | "-" | "*" | "/")
  61. var: WORD
  62. NL: /(\\r?\\n)+\s*/
  63. """ + common
  64. code = """
  65. a+b
  66. """
  67. self.assert_reconstruct(g, code)
  68. def test_expand_rule(self):
  69. g = """
  70. ?start: (NL | mult_stmt)*
  71. ?mult_stmt: sum_stmt ["*" sum_stmt]
  72. ?sum_stmt: var ["+" var]
  73. var: WORD
  74. NL: /(\\r?\\n)+\s*/
  75. """ + common
  76. code = ['a', 'a*b', 'a+b', 'a*b+c', 'a+b*c', 'a+b*c+d']
  77. for c in code:
  78. self.assert_reconstruct(g, c)
  79. def test_json_example(self):
  80. test_json = '''
  81. {
  82. "empty_object" : {},
  83. "empty_array" : [],
  84. "booleans" : { "YES" : true, "NO" : false },
  85. "numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ],
  86. "strings" : [ "This", [ "And" , "That", "And a \\"b" ] ],
  87. "nothing" : null
  88. }
  89. '''
  90. json_grammar = r"""
  91. ?start: value
  92. ?value: object
  93. | array
  94. | string
  95. | SIGNED_NUMBER -> number
  96. | "true" -> true
  97. | "false" -> false
  98. | "null" -> null
  99. array : "[" [value ("," value)*] "]"
  100. object : "{" [pair ("," pair)*] "}"
  101. pair : string ":" value
  102. string : ESCAPED_STRING
  103. %import common.ESCAPED_STRING
  104. %import common.SIGNED_NUMBER
  105. %import common.WS
  106. %ignore WS
  107. """
  108. json_parser = Lark(json_grammar, parser='lalr', maybe_placeholders=False)
  109. tree = json_parser.parse(test_json)
  110. new_json = Reconstructor(json_parser).reconstruct(tree)
  111. self.assertEqual(json.loads(new_json), json.loads(test_json))
  112. @unittest.skipIf(sys.version_info < (3, 0), "Python 2 does not play well with Unicode.")
  113. def test_switch_grammar_unicode_terminal(self):
  114. """
  115. This test checks that a parse tree built with a grammar containing only ascii characters can be reconstructed
  116. with a grammar that has unicode rules (or vice versa). The original bug assigned ANON terminals to unicode
  117. keywords, which offsets the ANON terminal count in the unicode grammar and causes subsequent identical ANON
  118. tokens (e.g., `+=`) to mis-match between the two grammars.
  119. """
  120. g1 = """
  121. start: (NL | stmt)*
  122. stmt: "keyword" var op var
  123. !op: ("+=" | "-=" | "*=" | "/=")
  124. var: WORD
  125. NL: /(\\r?\\n)+\s*/
  126. """ + common
  127. g2 = """
  128. start: (NL | stmt)*
  129. stmt: "குறிப்பு" var op var
  130. !op: ("+=" | "-=" | "*=" | "/=")
  131. var: WORD
  132. NL: /(\\r?\\n)+\s*/
  133. """ + common
  134. code = """
  135. keyword x += y
  136. """
  137. l1 = Lark(g1, parser='lalr')
  138. l2 = Lark(g2, parser='lalr')
  139. r = Reconstructor(l2)
  140. tree = l1.parse(code)
  141. code2 = r.reconstruct(tree)
  142. assert l2.parse(code2) == tree
  143. if __name__ == '__main__':
  144. unittest.main()