This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

334 lines
13 KiB

  1. from __future__ import absolute_import
  2. import unittest
  3. import logging
  4. import os
  5. import sys
  6. try:
  7. from cStringIO import StringIO as cStringIO
  8. except ImportError:
  9. # Available only in Python 2.x, 3.x only has io.StringIO from below
  10. cStringIO = None
  11. from io import (
  12. StringIO as uStringIO,
  13. open,
  14. )
  15. logging.basicConfig(level=logging.INFO)
  16. from lark.lark import Lark
  17. from lark.grammar_analysis import GrammarError
  18. from lark.parser import ParseError
  19. __path__ = os.path.dirname(__file__)
  20. def _read(n, *args):
  21. with open(os.path.join(__path__, n), *args) as f:
  22. return f.read()
  23. class TestLalr(unittest.TestCase):
  24. def test_basic1(self):
  25. g = Lark("""start: a+ b a* "b" a*
  26. b: "b"
  27. a: "a"
  28. """, parser='lalr')
  29. r = g.parse('aaabaab')
  30. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' )
  31. r = g.parse('aaabaaba')
  32. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaaa' )
  33. self.assertRaises(ParseError, g.parse, 'aaabaa')
  34. def test_basic2(self):
  35. # Multiple parsers and colliding tokens
  36. g = Lark("""start: B A
  37. B: "12"
  38. A: "1" """)
  39. g2 = Lark("""start: B A
  40. B: "12"
  41. A: "2" """)
  42. x = g.parse('121')
  43. assert x.data == 'start' and x.children == ['12', '1'], x
  44. x = g2.parse('122')
  45. assert x.data == 'start' and x.children == ['12', '2'], x
  46. def test_basic3(self):
  47. "Tests that Earley and LALR parsers produce equal trees"
  48. g = Lark("""start: "(" name_list ("," "*" NAME)? ")"
  49. name_list: NAME | name_list "," NAME
  50. NAME: /\w+/ """, parser='lalr')
  51. l = g.parse('(a,b,c,*x)')
  52. g = Lark("""start: "(" name_list ("," "*" NAME)? ")"
  53. name_list: NAME | name_list "," NAME
  54. NAME: /\w+/ """)
  55. l2 = g.parse('(a,b,c,*x)')
  56. assert l == l2, '%s != %s' % (l.pretty(), l2.pretty())
  57. @unittest.skipIf(cStringIO is None, "cStringIO not available")
  58. def test_stringio_bytes(self):
  59. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  60. Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  61. def test_stringio_unicode(self):
  62. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  63. Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  64. def test_unicode(self):
  65. g = Lark(u"""start: UNIA UNIB UNIA
  66. UNIA: /\xa3/
  67. UNIB: /\u0101/
  68. """)
  69. g.parse(u'\xa3\u0101\u00a3')
  70. def test_unicode2(self):
  71. g = Lark(r"""start: UNIA UNIB UNIA UNIC
  72. UNIA: /\xa3/
  73. UNIB: "a\u0101b\ "
  74. UNIC: /a?\u0101c\n/
  75. """)
  76. g.parse(u'\xa3a\u0101b\\ \u00a3\u0101c\n')
  77. def test_recurse_expansion(self):
  78. """Verify that stack depth doesn't get exceeded on recursive rules marked for expansion."""
  79. g = Lark(r"""start: a | start a
  80. a : "a" """)
  81. # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
  82. # STree data structures, which uses recursion).
  83. g.parse("a" * (sys.getrecursionlimit() // 4))
  84. def test_expand1_lists_with_one_item(self):
  85. g = Lark(r"""start: list
  86. ?list: item+
  87. item : A
  88. A: "a"
  89. """)
  90. r = g.parse("a")
  91. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  92. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  93. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  94. self.assertEqual(len(r.children), 1)
  95. def test_expand1_lists_with_one_item_2(self):
  96. g = Lark(r"""start: list
  97. ?list: item+ "!"
  98. item : A
  99. A: "a"
  100. """)
  101. r = g.parse("a!")
  102. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  103. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  104. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  105. self.assertEqual(len(r.children), 1)
  106. def test_dont_expand1_lists_with_multiple_items(self):
  107. g = Lark(r"""start: list
  108. ?list: item+
  109. item : A
  110. A: "a"
  111. """)
  112. r = g.parse("aa")
  113. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  114. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  115. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  116. self.assertEqual(len(r.children), 1)
  117. # Sanity check: verify that 'list' contains the two 'item's we've given it
  118. [list] = r.children
  119. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  120. def test_dont_expand1_lists_with_multiple_items_2(self):
  121. g = Lark(r"""start: list
  122. ?list: item+ "!"
  123. item : A
  124. A: "a"
  125. """)
  126. r = g.parse("aa!")
  127. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  128. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  129. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  130. self.assertEqual(len(r.children), 1)
  131. # Sanity check: verify that 'list' contains the two 'item's we've given it
  132. [list] = r.children
  133. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  134. def test_empty_expand1_list(self):
  135. g = Lark(r"""start: list
  136. ?list: item*
  137. item : A
  138. A: "a"
  139. """)
  140. r = g.parse("")
  141. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  142. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  143. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  144. self.assertEqual(len(r.children), 1)
  145. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  146. [list] = r.children
  147. self.assertSequenceEqual([item.data for item in list.children], ())
  148. def test_empty_expand1_list_2(self):
  149. g = Lark(r"""start: list
  150. ?list: item* "!"?
  151. item : A
  152. A: "a"
  153. """)
  154. r = g.parse("")
  155. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  156. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  157. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  158. self.assertEqual(len(r.children), 1)
  159. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  160. [list] = r.children
  161. self.assertSequenceEqual([item.data for item in list.children], ())
  162. def test_empty_flatten_list(self):
  163. g = Lark(r"""start: list
  164. list: | item "," list
  165. item : A
  166. A: "a"
  167. """)
  168. r = g.parse("")
  169. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  170. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  171. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  172. [list] = r.children
  173. self.assertSequenceEqual([item.data for item in list.children], ())
  174. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  175. def test_single_item_flatten_list(self):
  176. g = Lark(r"""start: list
  177. list: | item "," list
  178. item : A
  179. A: "a"
  180. """)
  181. r = g.parse("a,")
  182. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  183. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  184. # Sanity check: verify that 'list' contains exactly the one 'item' we've given it
  185. [list] = r.children
  186. self.assertSequenceEqual([item.data for item in list.children], ('item',))
  187. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  188. def test_multiple_item_flatten_list(self):
  189. g = Lark(r"""start: list
  190. #list: | item "," list
  191. item : A
  192. A: "a"
  193. """)
  194. r = g.parse("a,a,")
  195. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  196. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  197. # Sanity check: verify that 'list' contains exactly the two 'item's we've given it
  198. [list] = r.children
  199. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  200. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  201. def test_recurse_flatten(self):
  202. """Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""
  203. g = Lark(r"""start: a | start a
  204. a : A
  205. A : "a" """)
  206. # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
  207. # STree data structures, which uses recursion).
  208. g.parse("a" * (sys.getrecursionlimit() // 4))
  209. def test_token_collision(self):
  210. g = Lark("""start: "Hello" NAME
  211. NAME: /\w+/
  212. WS.ignore: /\s+/
  213. """, parser='lalr')
  214. x = g.parse('Hello World')
  215. self.assertSequenceEqual(x.children, ['World'])
  216. x = g.parse('Hello HelloWorld')
  217. self.assertSequenceEqual(x.children, ['HelloWorld'])
  218. def test_undefined_rule(self):
  219. self.assertRaises(GrammarError, Lark, """start: a""", parser='lalr')
  220. def test_undefined_token(self):
  221. self.assertRaises(GrammarError, Lark, """start: A""", parser='lalr')
  222. def test_rule_collision(self):
  223. g = Lark("""start: "a"+ "b"
  224. | "a"+ """, parser='lalr')
  225. x = g.parse('aaaa')
  226. x = g.parse('aaaab')
  227. def test_rule_collision2(self):
  228. g = Lark("""start: "a"* "b"
  229. | "a"+ """, parser='lalr')
  230. x = g.parse('aaaa')
  231. x = g.parse('aaaab')
  232. x = g.parse('b')
  233. def test_regex_embed(self):
  234. g = Lark("""start: A B C
  235. A: /a/
  236. B: /${A}b/
  237. C: /${B}c/
  238. """, parser='lalr')
  239. x = g.parse('aababc')
  240. def test_token_not_anon(self):
  241. """Tests that "a" is matched as A, rather than an anonymous token.
  242. That means that "a" is not filtered out, despite being an 'immediate string'.
  243. Whether or not this is the intuitive behavior, I'm not sure yet.
  244. -Erez
  245. """
  246. g = Lark("""start: "a"
  247. A: "a" """, parser='lalr')
  248. x = g.parse('a')
  249. self.assertEqual(len(x.children), 1, '"a" should not be considered anonymous')
  250. self.assertEqual(x.children[0].type, "A")
  251. def test_maybe(self):
  252. g = Lark("""start: ["a"] """, parser='lalr')
  253. x = g.parse('a')
  254. x = g.parse('')
  255. def test_start(self):
  256. g = Lark("""a: "a" a? """, parser='lalr', start='a')
  257. x = g.parse('a')
  258. x = g.parse('aa')
  259. x = g.parse('aaa')
  260. def test_alias(self):
  261. g = Lark("""start: "a" -> b """, parser='lalr')
  262. x = g.parse('a')
  263. self.assertEqual(x.data, "b")
  264. if __name__ == '__main__':
  265. unittest.main()