This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

366 rindas
15 KiB

  1. from __future__ import absolute_import
  2. import unittest
  3. import logging
  4. import os
  5. import sys
  6. try:
  7. from cStringIO import StringIO as cStringIO
  8. except ImportError:
  9. # Available only in Python 2.x, 3.x only has io.StringIO from below
  10. cStringIO = None
  11. from io import (
  12. StringIO as uStringIO,
  13. open,
  14. )
  15. logging.basicConfig(level=logging.INFO)
  16. from lark.lark import Lark
  17. from lark.common import GrammarError, ParseError
  18. __path__ = os.path.dirname(__file__)
  19. def _read(n, *args):
  20. with open(os.path.join(__path__, n), *args) as f:
  21. return f.read()
  22. class TestParsers(unittest.TestCase):
  23. def test_same_ast(self):
  24. "Tests that Earley and LALR parsers produce equal trees"
  25. g = Lark("""start: "(" name_list ("," "*" NAME)? ")"
  26. name_list: NAME | name_list "," NAME
  27. NAME: /\w+/ """, parser='lalr')
  28. l = g.parse('(a,b,c,*x)')
  29. g = Lark("""start: "(" name_list ("," "*" NAME)? ")"
  30. name_list: NAME | name_list "," NAME
  31. NAME: /\w+/ """)
  32. l2 = g.parse('(a,b,c,*x)')
  33. assert l == l2, '%s != %s' % (l.pretty(), l2.pretty())
  34. class TestEarley(unittest.TestCase):
  35. pass
  36. def _make_parser_test(PARSER):
  37. def _Lark(grammar, **kwargs):
  38. return Lark(grammar, parser=PARSER, **kwargs)
  39. class _TestParser(unittest.TestCase):
  40. def test_basic1(self):
  41. g = _Lark("""start: a+ b a* "b" a*
  42. b: "b"
  43. a: "a"
  44. """)
  45. r = g.parse('aaabaab')
  46. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' )
  47. r = g.parse('aaabaaba')
  48. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaaa' )
  49. self.assertRaises(ParseError, g.parse, 'aaabaa')
  50. def test_basic2(self):
  51. # Multiple parsers and colliding tokens
  52. g = _Lark("""start: B A
  53. B: "12"
  54. A: "1" """)
  55. g2 = _Lark("""start: B A
  56. B: "12"
  57. A: "2" """)
  58. x = g.parse('121')
  59. assert x.data == 'start' and x.children == ['12', '1'], x
  60. x = g2.parse('122')
  61. assert x.data == 'start' and x.children == ['12', '2'], x
  62. @unittest.skipIf(cStringIO is None, "cStringIO not available")
  63. def test_stringio_bytes(self):
  64. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  65. _Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  66. def test_stringio_unicode(self):
  67. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  68. _Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  69. def test_unicode(self):
  70. g = _Lark(u"""start: UNIA UNIB UNIA
  71. UNIA: /\xa3/
  72. UNIB: /\u0101/
  73. """)
  74. g.parse(u'\xa3\u0101\u00a3')
  75. def test_unicode2(self):
  76. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  77. UNIA: /\xa3/
  78. UNIB: "a\u0101b\ "
  79. UNIC: /a?\u0101c\n/
  80. """)
  81. g.parse(u'\xa3a\u0101b\\ \u00a3\u0101c\n')
  82. def test_recurse_expansion(self):
  83. """Verify that stack depth doesn't get exceeded on recursive rules marked for expansion."""
  84. g = _Lark(r"""start: a | start a
  85. a : "a" """)
  86. # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
  87. # STree data structures, which uses recursion).
  88. g.parse("a" * (sys.getrecursionlimit() // 4))
  89. def test_expand1_lists_with_one_item(self):
  90. g = _Lark(r"""start: list
  91. ?list: item+
  92. item : A
  93. A: "a"
  94. """)
  95. r = g.parse("a")
  96. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  97. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  98. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  99. self.assertEqual(len(r.children), 1)
  100. def test_expand1_lists_with_one_item_2(self):
  101. g = _Lark(r"""start: list
  102. ?list: item+ "!"
  103. item : A
  104. A: "a"
  105. """)
  106. r = g.parse("a!")
  107. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  108. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  109. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  110. self.assertEqual(len(r.children), 1)
  111. def test_dont_expand1_lists_with_multiple_items(self):
  112. g = _Lark(r"""start: list
  113. ?list: item+
  114. item : A
  115. A: "a"
  116. """)
  117. r = g.parse("aa")
  118. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  119. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  120. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  121. self.assertEqual(len(r.children), 1)
  122. # Sanity check: verify that 'list' contains the two 'item's we've given it
  123. [list] = r.children
  124. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  125. def test_dont_expand1_lists_with_multiple_items_2(self):
  126. g = _Lark(r"""start: list
  127. ?list: item+ "!"
  128. item : A
  129. A: "a"
  130. """)
  131. r = g.parse("aa!")
  132. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  133. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  134. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  135. self.assertEqual(len(r.children), 1)
  136. # Sanity check: verify that 'list' contains the two 'item's we've given it
  137. [list] = r.children
  138. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  139. def test_empty_expand1_list(self):
  140. g = _Lark(r"""start: list
  141. ?list: item*
  142. item : A
  143. A: "a"
  144. """)
  145. r = g.parse("")
  146. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  147. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  148. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  149. self.assertEqual(len(r.children), 1)
  150. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  151. [list] = r.children
  152. self.assertSequenceEqual([item.data for item in list.children], ())
  153. def test_empty_expand1_list_2(self):
  154. g = _Lark(r"""start: list
  155. ?list: item* "!"?
  156. item : A
  157. A: "a"
  158. """)
  159. r = g.parse("")
  160. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  161. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  162. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  163. self.assertEqual(len(r.children), 1)
  164. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  165. [list] = r.children
  166. self.assertSequenceEqual([item.data for item in list.children], ())
  167. def test_empty_flatten_list(self):
  168. g = _Lark(r"""start: list
  169. list: | item "," list
  170. item : A
  171. A: "a"
  172. """)
  173. r = g.parse("")
  174. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  175. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  176. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  177. [list] = r.children
  178. self.assertSequenceEqual([item.data for item in list.children], ())
  179. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  180. def test_single_item_flatten_list(self):
  181. g = _Lark(r"""start: list
  182. list: | item "," list
  183. item : A
  184. A: "a"
  185. """)
  186. r = g.parse("a,")
  187. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  188. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  189. # Sanity check: verify that 'list' contains exactly the one 'item' we've given it
  190. [list] = r.children
  191. self.assertSequenceEqual([item.data for item in list.children], ('item',))
  192. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  193. def test_multiple_item_flatten_list(self):
  194. g = _Lark(r"""start: list
  195. #list: | item "," list
  196. item : A
  197. A: "a"
  198. """)
  199. r = g.parse("a,a,")
  200. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  201. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  202. # Sanity check: verify that 'list' contains exactly the two 'item's we've given it
  203. [list] = r.children
  204. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  205. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  206. def test_recurse_flatten(self):
  207. """Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""
  208. g = _Lark(r"""start: a | start a
  209. a : A
  210. A : "a" """)
  211. # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
  212. # STree data structures, which uses recursion).
  213. g.parse("a" * (sys.getrecursionlimit() // 4))
  214. def test_token_collision(self):
  215. g = _Lark("""start: "Hello" NAME
  216. NAME: /\w+/
  217. WS.ignore: /\s+/
  218. """)
  219. x = g.parse('Hello World')
  220. self.assertSequenceEqual(x.children, ['World'])
  221. x = g.parse('Hello HelloWorld')
  222. self.assertSequenceEqual(x.children, ['HelloWorld'])
  223. def test_undefined_rule(self):
  224. self.assertRaises(GrammarError, _Lark, """start: a""")
  225. def test_undefined_token(self):
  226. self.assertRaises(GrammarError, _Lark, """start: A""")
  227. def test_rule_collision(self):
  228. g = _Lark("""start: "a"+ "b"
  229. | "a"+ """)
  230. x = g.parse('aaaa')
  231. x = g.parse('aaaab')
  232. def test_rule_collision2(self):
  233. g = _Lark("""start: "a"* "b"
  234. | "a"+ """)
  235. x = g.parse('aaaa')
  236. x = g.parse('aaaab')
  237. x = g.parse('b')
  238. def test_regex_embed(self):
  239. g = _Lark("""start: A B C
  240. A: /a/
  241. B: /${A}b/
  242. C: /${B}c/
  243. """)
  244. x = g.parse('aababc')
  245. def test_token_not_anon(self):
  246. """Tests that "a" is matched as A, rather than an anonymous token.
  247. That means that "a" is not filtered out, despite being an 'immediate string'.
  248. Whether or not this is the intuitive behavior, I'm not sure yet.
  249. -Erez
  250. """
  251. g = _Lark("""start: "a"
  252. A: "a" """)
  253. x = g.parse('a')
  254. self.assertEqual(len(x.children), 1, '"a" should not be considered anonymous')
  255. self.assertEqual(x.children[0].type, "A")
  256. def test_maybe(self):
  257. g = _Lark("""start: ["a"] """)
  258. x = g.parse('a')
  259. x = g.parse('')
  260. def test_start(self):
  261. g = _Lark("""a: "a" a? """, start='a')
  262. x = g.parse('a')
  263. x = g.parse('aa')
  264. x = g.parse('aaa')
  265. def test_alias(self):
  266. g = _Lark("""start: "a" -> b """)
  267. x = g.parse('a')
  268. self.assertEqual(x.data, "b")
  269. def test_lexer_token_limit(self):
  270. "Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"
  271. tokens = {'A%d'%i:'"%d"'%i for i in range(300)}
  272. g = _Lark("""start: %s
  273. %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))
  274. def test_float_without_lexer(self):
  275. g = _Lark("""start: ["+"|"-"] float
  276. float: digit* "." digit+ exp?
  277. | digit+ exp
  278. exp: ("e"|"E") ["+"|"-"] digit+
  279. digit: "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"
  280. """)
  281. g.parse("1.2")
  282. g.parse("-.2e9")
  283. g.parse("+2e-9")
  284. self.assertRaises(ParseError, g.parse, "+2e-9e")
  285. _NAME = "Test" + PARSER.capitalize()
  286. _TestParser.__name__ = _NAME
  287. globals()[_NAME] = _TestParser
  288. for PARSER in ['lalr', 'earley', 'lalr_contextual_lexer']:
  289. _make_parser_test(PARSER)
  290. if __name__ == '__main__':
  291. unittest.main()