This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1456 lines
49 KiB

  1. # -*- coding: utf-8 -*-
  2. from __future__ import absolute_import
  3. import unittest
  4. import logging
  5. import os
  6. import sys
  7. try:
  8. from cStringIO import StringIO as cStringIO
  9. except ImportError:
  10. # Available only in Python 2.x, 3.x only has io.StringIO from below
  11. cStringIO = None
  12. from io import (
  13. StringIO as uStringIO,
  14. open,
  15. )
  16. logging.basicConfig(level=logging.INFO)
  17. from lark.lark import Lark
  18. from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
  19. from lark.tree import Tree
  20. from lark.visitors import Transformer
  21. __path__ = os.path.dirname(__file__)
  22. def _read(n, *args):
  23. with open(os.path.join(__path__, n), *args) as f:
  24. return f.read()
  25. class TestParsers(unittest.TestCase):
  26. def test_same_ast(self):
  27. "Tests that Earley and LALR parsers produce equal trees"
  28. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  29. name_list: NAME | name_list "," NAME
  30. NAME: /\w+/ """, parser='lalr')
  31. l = g.parse('(a,b,c,*x)')
  32. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  33. name_list: NAME | name_list "," NAME
  34. NAME: /\w/+ """)
  35. l2 = g.parse('(a,b,c,*x)')
  36. assert l == l2, '%s != %s' % (l.pretty(), l2.pretty())
  37. def test_infinite_recurse(self):
  38. g = """start: a
  39. a: a | "a"
  40. """
  41. self.assertRaises(GrammarError, Lark, g, parser='lalr')
  42. # TODO: should it? shouldn't it?
  43. # l = Lark(g, parser='earley', lexer='dynamic')
  44. # self.assertRaises(ParseError, l.parse, 'a')
  45. def test_propagate_positions(self):
  46. g = Lark("""start: a
  47. a: "a"
  48. """, propagate_positions=True)
  49. r = g.parse('a')
  50. self.assertEqual( r.children[0].meta.line, 1 )
  51. def test_expand1(self):
  52. g = Lark("""start: a
  53. ?a: b
  54. b: "x"
  55. """)
  56. r = g.parse('x')
  57. self.assertEqual( r.children[0].data, "b" )
  58. g = Lark("""start: a
  59. ?a: b -> c
  60. b: "x"
  61. """)
  62. r = g.parse('x')
  63. self.assertEqual( r.children[0].data, "c" )
  64. g = Lark("""start: a
  65. ?a: B -> c
  66. B: "x"
  67. """)
  68. self.assertEqual( r.children[0].data, "c" )
  69. g = Lark("""start: a
  70. ?a: b b -> c
  71. b: "x"
  72. """)
  73. r = g.parse('xx')
  74. self.assertEqual( r.children[0].data, "c" )
  75. def test_embedded_transformer(self):
  76. class T(Transformer):
  77. def a(self, children):
  78. return "<a>"
  79. def b(self, children):
  80. return "<b>"
  81. def c(self, children):
  82. return "<c>"
  83. # Test regular
  84. g = Lark("""start: a
  85. a : "x"
  86. """, parser='lalr')
  87. r = T().transform(g.parse("x"))
  88. self.assertEqual( r.children, ["<a>"] )
  89. g = Lark("""start: a
  90. a : "x"
  91. """, parser='lalr', transformer=T())
  92. r = g.parse("x")
  93. self.assertEqual( r.children, ["<a>"] )
  94. # Test Expand1
  95. g = Lark("""start: a
  96. ?a : b
  97. b : "x"
  98. """, parser='lalr')
  99. r = T().transform(g.parse("x"))
  100. self.assertEqual( r.children, ["<b>"] )
  101. g = Lark("""start: a
  102. ?a : b
  103. b : "x"
  104. """, parser='lalr', transformer=T())
  105. r = g.parse("x")
  106. self.assertEqual( r.children, ["<b>"] )
  107. # Test Expand1 -> Alias
  108. g = Lark("""start: a
  109. ?a : b b -> c
  110. b : "x"
  111. """, parser='lalr')
  112. r = T().transform(g.parse("xx"))
  113. self.assertEqual( r.children, ["<c>"] )
  114. g = Lark("""start: a
  115. ?a : b b -> c
  116. b : "x"
  117. """, parser='lalr', transformer=T())
  118. r = g.parse("xx")
  119. self.assertEqual( r.children, ["<c>"] )
  120. def test_alias(self):
  121. Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """)
  122. def _make_full_earley_test(LEXER):
  123. def _Lark(grammar, **kwargs):
  124. return Lark(grammar, lexer=LEXER, parser='earley', propagate_positions=True, **kwargs)
  125. class _TestFullEarley(unittest.TestCase):
  126. def test_anon(self):
  127. # Fails an Earley implementation without special handling for empty rules,
  128. # or re-processing of already completed rules.
  129. g = Lark(r"""start: B
  130. B: ("ab"|/[^b]/)+
  131. """, lexer=LEXER)
  132. self.assertEqual( g.parse('abc').children[0], 'abc')
  133. def test_earley(self):
  134. g = Lark("""start: A "b" c
  135. A: "a"+
  136. c: "abc"
  137. """, parser="earley", lexer=LEXER)
  138. x = g.parse('aaaababc')
  139. def test_earley2(self):
  140. grammar = """
  141. start: statement+
  142. statement: "r"
  143. | "c" /[a-z]/+
  144. %ignore " "
  145. """
  146. program = """c b r"""
  147. l = Lark(grammar, parser='earley', lexer=LEXER)
  148. l.parse(program)
  149. @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser")
  150. def test_earley3(self):
  151. """Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)
  152. By default, `+` should immitate regexp greedy-matching
  153. """
  154. grammar = """
  155. start: A A
  156. A: "a"+
  157. """
  158. l = Lark(grammar, parser='earley', lexer=LEXER)
  159. res = l.parse("aaa")
  160. self.assertEqual(set(res.children), {'aa', 'a'})
  161. # XXX TODO fix Earley to maintain correct order
  162. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  163. # self.assertEqual(res.children, ['aa', 'a'])
  164. def test_earley4(self):
  165. grammar = """
  166. start: A A?
  167. A: "a"+
  168. """
  169. l = Lark(grammar, parser='earley', lexer=LEXER)
  170. res = l.parse("aaa")
  171. assert set(res.children) == {'aa', 'a'} or res.children == ['aaa']
  172. # XXX TODO fix Earley to maintain correct order
  173. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  174. # self.assertEqual(res.children, ['aaa'])
  175. def test_earley_repeating_empty(self):
  176. # This was a sneaky bug!
  177. grammar = """
  178. !start: "a" empty empty "b"
  179. empty: empty2
  180. empty2:
  181. """
  182. parser = Lark(grammar, parser='earley', lexer=LEXER)
  183. res = parser.parse('ab')
  184. empty_tree = Tree('empty', [Tree('empty2', [])])
  185. self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b'])
  186. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  187. def test_earley_explicit_ambiguity(self):
  188. # This was a sneaky bug!
  189. grammar = """
  190. start: a b | ab
  191. a: "a"
  192. b: "b"
  193. ab: "ab"
  194. """
  195. parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit')
  196. ambig_tree = parser.parse('ab')
  197. self.assertEqual( ambig_tree.data, '_ambig')
  198. self.assertEqual( len(ambig_tree.children), 2)
  199. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  200. def test_ambiguity1(self):
  201. grammar = """
  202. start: cd+ "e"
  203. !cd: "c"
  204. | "d"
  205. | "cd"
  206. """
  207. l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
  208. ambig_tree = l.parse('cde')
  209. assert ambig_tree.data == '_ambig', ambig_tree
  210. assert len(ambig_tree.children) == 2
  211. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  212. def test_ambiguity2(self):
  213. grammar = """
  214. ANY: /[a-zA-Z0-9 ]+/
  215. a.2: "A" b+
  216. b.2: "B"
  217. c: ANY
  218. start: (a|c)*
  219. """
  220. l = Lark(grammar, parser='earley', lexer=LEXER)
  221. res = l.parse('ABX')
  222. expected = Tree('start', [
  223. Tree('a', [
  224. Tree('b', [])
  225. ]),
  226. Tree('c', [
  227. 'X'
  228. ])
  229. ])
  230. self.assertEqual(res, expected)
  231. def test_fruitflies_ambig(self):
  232. grammar = """
  233. start: noun verb noun -> simple
  234. | noun verb "like" noun -> comparative
  235. noun: adj? NOUN
  236. verb: VERB
  237. adj: ADJ
  238. NOUN: "flies" | "bananas" | "fruit"
  239. VERB: "like" | "flies"
  240. ADJ: "fruit"
  241. %import common.WS
  242. %ignore WS
  243. """
  244. parser = Lark(grammar, ambiguity='explicit', lexer=LEXER)
  245. tree = parser.parse('fruit flies like bananas')
  246. expected = Tree('_ambig', [
  247. Tree('comparative', [
  248. Tree('noun', ['fruit']),
  249. Tree('verb', ['flies']),
  250. Tree('noun', ['bananas'])
  251. ]),
  252. Tree('simple', [
  253. Tree('noun', [Tree('adj', ['fruit']), 'flies']),
  254. Tree('verb', ['like']),
  255. Tree('noun', ['bananas'])
  256. ])
  257. ])
  258. # self.assertEqual(tree, expected)
  259. self.assertEqual(tree.data, expected.data)
  260. self.assertEqual(set(tree.children), set(expected.children))
  261. @unittest.skipIf(LEXER!='dynamic_complete', "Only relevant for the dynamic_complete parser")
  262. def test_explicit_ambiguity2(self):
  263. grammar = r"""
  264. start: NAME+
  265. NAME: /\w+/
  266. %ignore " "
  267. """
  268. text = """cat"""
  269. parser = _Lark(grammar, start='start', ambiguity='explicit')
  270. tree = parser.parse(text)
  271. self.assertEqual(tree.data, '_ambig')
  272. combinations = {tuple(str(s) for s in t.children) for t in tree.children}
  273. self.assertEqual(combinations, {
  274. ('cat',),
  275. ('ca', 't'),
  276. ('c', 'at'),
  277. ('c', 'a' ,'t')
  278. })
  279. def test_term_ambig_resolve(self):
  280. grammar = r"""
  281. !start: NAME+
  282. NAME: /\w+/
  283. %ignore " "
  284. """
  285. text = """foo bar"""
  286. parser = Lark(grammar)
  287. tree = parser.parse(text)
  288. self.assertEqual(tree.children, ['foo', 'bar'])
  289. # @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO
  290. # def test_not_all_derivations(self):
  291. # grammar = """
  292. # start: cd+ "e"
  293. # !cd: "c"
  294. # | "d"
  295. # | "cd"
  296. # """
  297. # l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False)
  298. # x = l.parse('cde')
  299. # assert x.data != '_ambig', x
  300. # assert len(x.children) == 1
  301. _NAME = "TestFullEarley" + LEXER.capitalize()
  302. _TestFullEarley.__name__ = _NAME
  303. globals()[_NAME] = _TestFullEarley
  304. def _make_parser_test(LEXER, PARSER):
  305. def _Lark(grammar, **kwargs):
  306. return Lark(grammar, lexer=LEXER, parser=PARSER, propagate_positions=True, **kwargs)
  307. def _Lark_open(gfilename, **kwargs):
  308. return Lark.open(gfilename, lexer=LEXER, parser=PARSER, propagate_positions=True, **kwargs)
  309. class _TestParser(unittest.TestCase):
  310. def test_basic1(self):
  311. g = _Lark("""start: a+ b a* "b" a*
  312. b: "b"
  313. a: "a"
  314. """)
  315. r = g.parse('aaabaab')
  316. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' )
  317. r = g.parse('aaabaaba')
  318. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaaa' )
  319. self.assertRaises(ParseError, g.parse, 'aaabaa')
  320. def test_basic2(self):
  321. # Multiple parsers and colliding tokens
  322. g = _Lark("""start: B A
  323. B: "12"
  324. A: "1" """)
  325. g2 = _Lark("""start: B A
  326. B: "12"
  327. A: "2" """)
  328. x = g.parse('121')
  329. assert x.data == 'start' and x.children == ['12', '1'], x
  330. x = g2.parse('122')
  331. assert x.data == 'start' and x.children == ['12', '2'], x
  332. @unittest.skipIf(cStringIO is None, "cStringIO not available")
  333. def test_stringio_bytes(self):
  334. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  335. _Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  336. def test_stringio_unicode(self):
  337. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  338. _Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  339. def test_unicode(self):
  340. g = _Lark(u"""start: UNIA UNIB UNIA
  341. UNIA: /\xa3/
  342. UNIB: /\u0101/
  343. """)
  344. g.parse(u'\xa3\u0101\u00a3')
  345. def test_unicode2(self):
  346. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  347. UNIA: /\xa3/
  348. UNIB: "a\u0101b\ "
  349. UNIC: /a?\u0101c\n/
  350. """)
  351. g.parse(u'\xa3a\u0101b\\ \u00a3\u0101c\n')
  352. def test_unicode3(self):
  353. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  354. UNIA: /\xa3/
  355. UNIB: "\u0101"
  356. UNIC: /\u0203/ /\n/
  357. """)
  358. g.parse(u'\xa3\u0101\u00a3\u0203\n')
  359. def test_hex_escape(self):
  360. g = _Lark(r"""start: A B C
  361. A: "\x01"
  362. B: /\x02/
  363. C: "\xABCD"
  364. """)
  365. g.parse('\x01\x02\xABCD')
  366. def test_unicode_literal_range_escape(self):
  367. g = _Lark(r"""start: A+
  368. A: "\u0061".."\u0063"
  369. """)
  370. g.parse('abc')
  371. def test_hex_literal_range_escape(self):
  372. g = _Lark(r"""start: A+
  373. A: "\x01".."\x03"
  374. """)
  375. g.parse('\x01\x02\x03')
  376. @unittest.skipIf(PARSER == 'cyk', "Takes forever")
  377. def test_stack_for_ebnf(self):
  378. """Verify that stack depth isn't an issue for EBNF grammars"""
  379. g = _Lark(r"""start: a+
  380. a : "a" """)
  381. g.parse("a" * (sys.getrecursionlimit()*2 ))
  382. def test_expand1_lists_with_one_item(self):
  383. g = _Lark(r"""start: list
  384. ?list: item+
  385. item : A
  386. A: "a"
  387. """)
  388. r = g.parse("a")
  389. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  390. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  391. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  392. self.assertEqual(len(r.children), 1)
  393. def test_expand1_lists_with_one_item_2(self):
  394. g = _Lark(r"""start: list
  395. ?list: item+ "!"
  396. item : A
  397. A: "a"
  398. """)
  399. r = g.parse("a!")
  400. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  401. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  402. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  403. self.assertEqual(len(r.children), 1)
  404. def test_dont_expand1_lists_with_multiple_items(self):
  405. g = _Lark(r"""start: list
  406. ?list: item+
  407. item : A
  408. A: "a"
  409. """)
  410. r = g.parse("aa")
  411. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  412. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  413. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  414. self.assertEqual(len(r.children), 1)
  415. # Sanity check: verify that 'list' contains the two 'item's we've given it
  416. [list] = r.children
  417. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  418. def test_dont_expand1_lists_with_multiple_items_2(self):
  419. g = _Lark(r"""start: list
  420. ?list: item+ "!"
  421. item : A
  422. A: "a"
  423. """)
  424. r = g.parse("aa!")
  425. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  426. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  427. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  428. self.assertEqual(len(r.children), 1)
  429. # Sanity check: verify that 'list' contains the two 'item's we've given it
  430. [list] = r.children
  431. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  432. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  433. def test_empty_expand1_list(self):
  434. g = _Lark(r"""start: list
  435. ?list: item*
  436. item : A
  437. A: "a"
  438. """)
  439. r = g.parse("")
  440. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  441. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  442. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  443. self.assertEqual(len(r.children), 1)
  444. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  445. [list] = r.children
  446. self.assertSequenceEqual([item.data for item in list.children], ())
  447. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  448. def test_empty_expand1_list_2(self):
  449. g = _Lark(r"""start: list
  450. ?list: item* "!"?
  451. item : A
  452. A: "a"
  453. """)
  454. r = g.parse("")
  455. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  456. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  457. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  458. self.assertEqual(len(r.children), 1)
  459. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  460. [list] = r.children
  461. self.assertSequenceEqual([item.data for item in list.children], ())
  462. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  463. def test_empty_flatten_list(self):
  464. g = _Lark(r"""start: list
  465. list: | item "," list
  466. item : A
  467. A: "a"
  468. """)
  469. r = g.parse("")
  470. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  471. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  472. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  473. [list] = r.children
  474. self.assertSequenceEqual([item.data for item in list.children], ())
  475. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  476. def test_single_item_flatten_list(self):
  477. g = _Lark(r"""start: list
  478. list: | item "," list
  479. item : A
  480. A: "a"
  481. """)
  482. r = g.parse("a,")
  483. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  484. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  485. # Sanity check: verify that 'list' contains exactly the one 'item' we've given it
  486. [list] = r.children
  487. self.assertSequenceEqual([item.data for item in list.children], ('item',))
  488. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  489. def test_multiple_item_flatten_list(self):
  490. g = _Lark(r"""start: list
  491. #list: | item "," list
  492. item : A
  493. A: "a"
  494. """)
  495. r = g.parse("a,a,")
  496. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  497. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  498. # Sanity check: verify that 'list' contains exactly the two 'item's we've given it
  499. [list] = r.children
  500. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  501. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  502. def test_recurse_flatten(self):
  503. """Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""
  504. g = _Lark(r"""start: a | start a
  505. a : A
  506. A : "a" """)
  507. # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
  508. # STree data structures, which uses recursion).
  509. g.parse("a" * (sys.getrecursionlimit() // 4))
  510. def test_token_collision(self):
  511. g = _Lark(r"""start: "Hello" NAME
  512. NAME: /\w/+
  513. %ignore " "
  514. """)
  515. x = g.parse('Hello World')
  516. self.assertSequenceEqual(x.children, ['World'])
  517. x = g.parse('Hello HelloWorld')
  518. self.assertSequenceEqual(x.children, ['HelloWorld'])
  519. def test_token_collision_WS(self):
  520. g = _Lark(r"""start: "Hello" NAME
  521. NAME: /\w/+
  522. %import common.WS
  523. %ignore WS
  524. """)
  525. x = g.parse('Hello World')
  526. self.assertSequenceEqual(x.children, ['World'])
  527. x = g.parse('Hello HelloWorld')
  528. self.assertSequenceEqual(x.children, ['HelloWorld'])
  529. def test_token_collision2(self):
  530. g = _Lark("""
  531. !start: "starts"
  532. %import common.LCASE_LETTER
  533. """)
  534. x = g.parse("starts")
  535. self.assertSequenceEqual(x.children, ['starts'])
  536. # def test_string_priority(self):
  537. # g = _Lark("""start: (A | /a?bb/)+
  538. # A: "a" """)
  539. # x = g.parse('abb')
  540. # self.assertEqual(len(x.children), 2)
  541. # # This parse raises an exception because the lexer will always try to consume
  542. # # "a" first and will never match the regular expression
  543. # # This behavior is subject to change!!
  544. # # Thie won't happen with ambiguity handling.
  545. # g = _Lark("""start: (A | /a?ab/)+
  546. # A: "a" """)
  547. # self.assertRaises(LexError, g.parse, 'aab')
  548. def test_undefined_rule(self):
  549. self.assertRaises(GrammarError, _Lark, """start: a""")
  550. def test_undefined_token(self):
  551. self.assertRaises(GrammarError, _Lark, """start: A""")
  552. def test_rule_collision(self):
  553. g = _Lark("""start: "a"+ "b"
  554. | "a"+ """)
  555. x = g.parse('aaaa')
  556. x = g.parse('aaaab')
  557. def test_rule_collision2(self):
  558. g = _Lark("""start: "a"* "b"
  559. | "a"+ """)
  560. x = g.parse('aaaa')
  561. x = g.parse('aaaab')
  562. x = g.parse('b')
  563. def test_token_not_anon(self):
  564. """Tests that "a" is matched as an anonymous token, and not A.
  565. """
  566. g = _Lark("""start: "a"
  567. A: "a" """)
  568. x = g.parse('a')
  569. self.assertEqual(len(x.children), 0, '"a" should be considered anonymous')
  570. g = _Lark("""start: "a" A
  571. A: "a" """)
  572. x = g.parse('aa')
  573. self.assertEqual(len(x.children), 1, 'only "a" should be considered anonymous')
  574. self.assertEqual(x.children[0].type, "A")
  575. g = _Lark("""start: /a/
  576. A: /a/ """)
  577. x = g.parse('a')
  578. self.assertEqual(len(x.children), 1)
  579. self.assertEqual(x.children[0].type, "A", "A isn't associated with /a/")
  580. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  581. def test_maybe(self):
  582. g = _Lark("""start: ["a"] """)
  583. x = g.parse('a')
  584. x = g.parse('')
  585. def test_start(self):
  586. g = _Lark("""a: "a" a? """, start='a')
  587. x = g.parse('a')
  588. x = g.parse('aa')
  589. x = g.parse('aaa')
  590. def test_alias(self):
  591. g = _Lark("""start: "a" -> b """)
  592. x = g.parse('a')
  593. self.assertEqual(x.data, "b")
  594. def test_token_ebnf(self):
  595. g = _Lark("""start: A
  596. A: "a"* ("b"? "c".."e")+
  597. """)
  598. x = g.parse('abcde')
  599. x = g.parse('dd')
  600. def test_backslash(self):
  601. g = _Lark(r"""start: "\\" "a"
  602. """)
  603. x = g.parse(r'\a')
  604. g = _Lark(r"""start: /\\/ /a/
  605. """)
  606. x = g.parse(r'\a')
  607. def test_backslash2(self):
  608. g = _Lark(r"""start: "\"" "-"
  609. """)
  610. x = g.parse('"-')
  611. g = _Lark(r"""start: /\// /-/
  612. """)
  613. x = g.parse('/-')
  614. def test_special_chars(self):
  615. g = _Lark(r"""start: "\n"
  616. """)
  617. x = g.parse('\n')
  618. g = _Lark(r"""start: /\n/
  619. """)
  620. x = g.parse('\n')
  621. # def test_token_recurse(self):
  622. # g = _Lark("""start: A
  623. # A: B
  624. # B: A
  625. # """)
  626. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  627. def test_empty(self):
  628. # Fails an Earley implementation without special handling for empty rules,
  629. # or re-processing of already completed rules.
  630. g = _Lark(r"""start: _empty a "B"
  631. a: _empty "A"
  632. _empty:
  633. """)
  634. x = g.parse('AB')
  635. def test_regex_quote(self):
  636. g = r"""
  637. start: SINGLE_QUOTED_STRING | DOUBLE_QUOTED_STRING
  638. SINGLE_QUOTED_STRING : /'[^']*'/
  639. DOUBLE_QUOTED_STRING : /"[^"]*"/
  640. """
  641. g = _Lark(g)
  642. self.assertEqual( g.parse('"hello"').children, ['"hello"'])
  643. self.assertEqual( g.parse("'hello'").children, ["'hello'"])
  644. def test_lexer_token_limit(self):
  645. "Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"
  646. tokens = {'A%d'%i:'"%d"'%i for i in range(300)}
  647. g = _Lark("""start: %s
  648. %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))
  649. def test_float_without_lexer(self):
  650. expected_error = UnexpectedCharacters if LEXER.startswith('dynamic') else UnexpectedToken
  651. if PARSER == 'cyk':
  652. expected_error = ParseError
  653. g = _Lark("""start: ["+"|"-"] float
  654. float: digit* "." digit+ exp?
  655. | digit+ exp
  656. exp: ("e"|"E") ["+"|"-"] digit+
  657. digit: "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"
  658. """)
  659. g.parse("1.2")
  660. g.parse("-.2e9")
  661. g.parse("+2e-9")
  662. self.assertRaises( expected_error, g.parse, "+2e-9e")
  663. def test_keep_all_tokens(self):
  664. l = _Lark("""start: "a"+ """, keep_all_tokens=True)
  665. tree = l.parse('aaa')
  666. self.assertEqual(tree.children, ['a', 'a', 'a'])
  667. def test_token_flags(self):
  668. l = _Lark("""!start: "a"i+
  669. """
  670. )
  671. tree = l.parse('aA')
  672. self.assertEqual(tree.children, ['a', 'A'])
  673. l = _Lark("""!start: /a/i+
  674. """
  675. )
  676. tree = l.parse('aA')
  677. self.assertEqual(tree.children, ['a', 'A'])
  678. # g = """!start: "a"i "a"
  679. # """
  680. # self.assertRaises(GrammarError, _Lark, g)
  681. # g = """!start: /a/i /a/
  682. # """
  683. # self.assertRaises(GrammarError, _Lark, g)
  684. g = """start: NAME "," "a"
  685. NAME: /[a-z_]/i /[a-z0-9_]/i*
  686. """
  687. l = _Lark(g)
  688. tree = l.parse('ab,a')
  689. self.assertEqual(tree.children, ['ab'])
  690. tree = l.parse('AB,a')
  691. self.assertEqual(tree.children, ['AB'])
  692. def test_token_flags3(self):
  693. l = _Lark("""!start: ABC+
  694. ABC: "abc"i
  695. """
  696. )
  697. tree = l.parse('aBcAbC')
  698. self.assertEqual(tree.children, ['aBc', 'AbC'])
  699. def test_token_flags2(self):
  700. g = """!start: ("a"i | /a/ /b/?)+
  701. """
  702. l = _Lark(g)
  703. tree = l.parse('aA')
  704. self.assertEqual(tree.children, ['a', 'A'])
  705. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  706. def test_twice_empty(self):
  707. g = """!start: [["A"]]
  708. """
  709. l = _Lark(g)
  710. tree = l.parse('A')
  711. self.assertEqual(tree.children, ['A'])
  712. tree = l.parse('')
  713. self.assertEqual(tree.children, [])
  714. def test_undefined_ignore(self):
  715. g = """!start: "A"
  716. %ignore B
  717. """
  718. self.assertRaises( GrammarError, _Lark, g)
  719. def test_alias_in_terminal(self):
  720. g = """start: TERM
  721. TERM: "a" -> alias
  722. """
  723. self.assertRaises( GrammarError, _Lark, g)
  724. def test_line_and_column(self):
  725. g = r"""!start: "A" bc "D"
  726. !bc: "B\nC"
  727. """
  728. l = _Lark(g)
  729. a, bc, d = l.parse("AB\nCD").children
  730. self.assertEqual(a.line, 1)
  731. self.assertEqual(a.column, 1)
  732. bc ,= bc.children
  733. self.assertEqual(bc.line, 1)
  734. self.assertEqual(bc.column, 2)
  735. self.assertEqual(d.line, 2)
  736. self.assertEqual(d.column, 2)
  737. if LEXER != 'dynamic':
  738. self.assertEqual(a.end_line, 1)
  739. self.assertEqual(a.end_column, 2)
  740. self.assertEqual(bc.end_line, 2)
  741. self.assertEqual(bc.end_column, 2)
  742. self.assertEqual(d.end_line, 2)
  743. self.assertEqual(d.end_column, 3)
  744. def test_reduce_cycle(self):
  745. """Tests an edge-condition in the LALR parser, in which a transition state looks exactly like the end state.
  746. It seems that the correct solution is to explicitely distinguish finalization in the reduce() function.
  747. """
  748. l = _Lark("""
  749. term: A
  750. | term term
  751. A: "a"
  752. """, start='term')
  753. tree = l.parse("aa")
  754. self.assertEqual(len(tree.children), 2)
  755. @unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority")
  756. def test_lexer_prioritization(self):
  757. "Tests effect of priority on result"
  758. grammar = """
  759. start: A B | AB
  760. A.2: "a"
  761. B: "b"
  762. AB: "ab"
  763. """
  764. l = _Lark(grammar)
  765. res = l.parse("ab")
  766. self.assertEqual(res.children, ['a', 'b'])
  767. self.assertNotEqual(res.children, ['ab'])
  768. grammar = """
  769. start: A B | AB
  770. A: "a"
  771. B: "b"
  772. AB.3: "ab"
  773. """
  774. l = _Lark(grammar)
  775. res = l.parse("ab")
  776. self.assertNotEqual(res.children, ['a', 'b'])
  777. self.assertEqual(res.children, ['ab'])
  778. def test_import(self):
  779. grammar = """
  780. start: NUMBER WORD
  781. %import common.NUMBER
  782. %import common.WORD
  783. %import common.WS
  784. %ignore WS
  785. """
  786. l = _Lark(grammar)
  787. x = l.parse('12 elephants')
  788. self.assertEqual(x.children, ['12', 'elephants'])
  789. def test_import_rename(self):
  790. grammar = """
  791. start: N W
  792. %import common.NUMBER -> N
  793. %import common.WORD -> W
  794. %import common.WS
  795. %ignore WS
  796. """
  797. l = _Lark(grammar)
  798. x = l.parse('12 elephants')
  799. self.assertEqual(x.children, ['12', 'elephants'])
  800. def test_relative_import(self):
  801. l = _Lark_open('test_relative_import.lark', rel_to=__file__)
  802. x = l.parse('12 lions')
  803. self.assertEqual(x.children, ['12', 'lions'])
  804. def test_relative_import_rename(self):
  805. l = _Lark_open('test_relative_import_rename.lark', rel_to=__file__)
  806. x = l.parse('12 lions')
  807. self.assertEqual(x.children, ['12', 'lions'])
  808. def test_relative_rule_import(self):
  809. l = _Lark_open('test_relative_rule_import.lark', rel_to=__file__)
  810. x = l.parse('xaabby')
  811. self.assertEqual(x.children, [
  812. 'x',
  813. Tree('expr', ['a', Tree('expr', ['a', 'b']), 'b']),
  814. 'y'])
  815. def test_relative_rule_import_drop_ignore(self):
  816. # %ignore rules are dropped on import
  817. l = _Lark_open('test_relative_rule_import_drop_ignore.lark',
  818. rel_to=__file__)
  819. self.assertRaises((ParseError, UnexpectedInput),
  820. l.parse, 'xa abby')
  821. def test_relative_rule_import_subrule(self):
  822. l = _Lark_open('test_relative_rule_import_subrule.lark',
  823. rel_to=__file__)
  824. x = l.parse('xaabby')
  825. self.assertEqual(x.children, [
  826. 'x',
  827. Tree('startab', [
  828. Tree('grammars__ab__expr', [
  829. 'a', Tree('grammars__ab__expr', ['a', 'b']), 'b',
  830. ]),
  831. ]),
  832. 'y'])
  833. def test_relative_rule_import_subrule_no_conflict(self):
  834. l = _Lark_open(
  835. 'test_relative_rule_import_subrule_no_conflict.lark',
  836. rel_to=__file__)
  837. x = l.parse('xaby')
  838. self.assertEqual(x.children, [Tree('expr', [
  839. 'x',
  840. Tree('startab', [
  841. Tree('grammars__ab__expr', ['a', 'b']),
  842. ]),
  843. 'y'])])
  844. self.assertRaises((ParseError, UnexpectedInput),
  845. l.parse, 'xaxabyby')
  846. def test_relative_rule_import_rename(self):
  847. l = _Lark_open('test_relative_rule_import_rename.lark',
  848. rel_to=__file__)
  849. x = l.parse('xaabby')
  850. self.assertEqual(x.children, [
  851. 'x',
  852. Tree('ab', ['a', Tree('ab', ['a', 'b']), 'b']),
  853. 'y'])
  854. def test_multi_import(self):
  855. grammar = """
  856. start: NUMBER WORD
  857. %import common (NUMBER, WORD, WS)
  858. %ignore WS
  859. """
  860. l = _Lark(grammar)
  861. x = l.parse('12 toucans')
  862. self.assertEqual(x.children, ['12', 'toucans'])
  863. def test_relative_multi_import(self):
  864. l = _Lark_open("test_relative_multi_import.lark", rel_to=__file__)
  865. x = l.parse('12 capybaras')
  866. self.assertEqual(x.children, ['12', 'capybaras'])
  867. def test_import_errors(self):
  868. grammar = """
  869. start: NUMBER WORD
  870. %import .grammars.bad_test.NUMBER
  871. """
  872. self.assertRaises(IOError, _Lark, grammar)
  873. grammar = """
  874. start: NUMBER WORD
  875. %import bad_test.NUMBER
  876. """
  877. self.assertRaises(IOError, _Lark, grammar)
  878. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  879. def test_earley_prioritization(self):
  880. "Tests effect of priority on result"
  881. grammar = """
  882. start: a | b
  883. a.1: "a"
  884. b.2: "a"
  885. """
  886. # l = Lark(grammar, parser='earley', lexer='standard')
  887. l = _Lark(grammar)
  888. res = l.parse("a")
  889. self.assertEqual(res.children[0].data, 'b')
  890. grammar = """
  891. start: a | b
  892. a.2: "a"
  893. b.1: "a"
  894. """
  895. l = _Lark(grammar)
  896. # l = Lark(grammar, parser='earley', lexer='standard')
  897. res = l.parse("a")
  898. self.assertEqual(res.children[0].data, 'a')
  899. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  900. def test_earley_prioritization_sum(self):
  901. "Tests effect of priority on result"
  902. grammar = """
  903. start: ab_ b_ a_ | indirection
  904. indirection: a_ bb_ a_
  905. a_: "a"
  906. b_: "b"
  907. ab_: "ab"
  908. bb_.1: "bb"
  909. """
  910. l = Lark(grammar, priority="invert")
  911. res = l.parse('abba')
  912. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  913. grammar = """
  914. start: ab_ b_ a_ | indirection
  915. indirection: a_ bb_ a_
  916. a_: "a"
  917. b_: "b"
  918. ab_.1: "ab"
  919. bb_: "bb"
  920. """
  921. l = Lark(grammar, priority="invert")
  922. res = l.parse('abba')
  923. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  924. grammar = """
  925. start: ab_ b_ a_ | indirection
  926. indirection: a_ bb_ a_
  927. a_.2: "a"
  928. b_.1: "b"
  929. ab_.3: "ab"
  930. bb_.3: "bb"
  931. """
  932. l = Lark(grammar, priority="invert")
  933. res = l.parse('abba')
  934. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  935. grammar = """
  936. start: ab_ b_ a_ | indirection
  937. indirection: a_ bb_ a_
  938. a_.1: "a"
  939. b_.1: "b"
  940. ab_.4: "ab"
  941. bb_.3: "bb"
  942. """
  943. l = Lark(grammar, priority="invert")
  944. res = l.parse('abba')
  945. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  946. def test_utf8(self):
  947. g = u"""start: a
  948. a: "±a"
  949. """
  950. l = _Lark(g)
  951. self.assertEqual(l.parse(u'±a'), Tree('start', [Tree('a', [])]))
  952. g = u"""start: A
  953. A: "±a"
  954. """
  955. l = _Lark(g)
  956. self.assertEqual(l.parse(u'±a'), Tree('start', [u'\xb1a']))
  957. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  958. def test_ignore(self):
  959. grammar = r"""
  960. COMMENT: /(!|(\/\/))[^\n]*/
  961. %ignore COMMENT
  962. %import common.WS -> _WS
  963. %import common.INT
  964. start: "INT"i _WS+ INT _WS*
  965. """
  966. parser = _Lark(grammar)
  967. tree = parser.parse("int 1 ! This is a comment\n")
  968. self.assertEqual(tree.children, ['1'])
  969. tree = parser.parse("int 1 ! This is a comment") # A trailing ignore token can be tricky!
  970. self.assertEqual(tree.children, ['1'])
  971. parser = _Lark(r"""
  972. start : "a"*
  973. %ignore "b"
  974. """)
  975. tree = parser.parse("bb")
  976. self.assertEqual(tree.children, [])
  977. def test_regex_escaping(self):
  978. g = _Lark("start: /[ab]/")
  979. g.parse('a')
  980. g.parse('b')
  981. self.assertRaises( UnexpectedInput, g.parse, 'c')
  982. _Lark(r'start: /\w/').parse('a')
  983. g = _Lark(r'start: /\\w/')
  984. self.assertRaises( UnexpectedInput, g.parse, 'a')
  985. g.parse(r'\w')
  986. _Lark(r'start: /\[/').parse('[')
  987. _Lark(r'start: /\//').parse('/')
  988. _Lark(r'start: /\\/').parse('\\')
  989. _Lark(r'start: /\[ab]/').parse('[ab]')
  990. _Lark(r'start: /\\[ab]/').parse('\\a')
  991. _Lark(r'start: /\t/').parse('\t')
  992. _Lark(r'start: /\\t/').parse('\\t')
  993. _Lark(r'start: /\\\t/').parse('\\\t')
  994. _Lark(r'start: "\t"').parse('\t')
  995. _Lark(r'start: "\\t"').parse('\\t')
  996. _Lark(r'start: "\\\t"').parse('\\\t')
  997. def test_ranged_repeat_rules(self):
  998. g = u"""!start: "A"~3
  999. """
  1000. l = _Lark(g)
  1001. self.assertEqual(l.parse(u'AAA'), Tree('start', ["A", "A", "A"]))
  1002. self.assertRaises(ParseError, l.parse, u'AA')
  1003. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1004. g = u"""!start: "A"~0..2
  1005. """
  1006. if PARSER != 'cyk': # XXX CYK currently doesn't support empty grammars
  1007. l = _Lark(g)
  1008. self.assertEqual(l.parse(u''), Tree('start', []))
  1009. self.assertEqual(l.parse(u'A'), Tree('start', ['A']))
  1010. self.assertEqual(l.parse(u'AA'), Tree('start', ['A', 'A']))
  1011. self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'AAA')
  1012. g = u"""!start: "A"~3..2
  1013. """
  1014. self.assertRaises(GrammarError, _Lark, g)
  1015. g = u"""!start: "A"~2..3 "B"~2
  1016. """
  1017. l = _Lark(g)
  1018. self.assertEqual(l.parse(u'AABB'), Tree('start', ['A', 'A', 'B', 'B']))
  1019. self.assertEqual(l.parse(u'AAABB'), Tree('start', ['A', 'A', 'A', 'B', 'B']))
  1020. self.assertRaises(ParseError, l.parse, u'AAAB')
  1021. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1022. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1023. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1024. def test_ranged_repeat_terms(self):
  1025. g = u"""!start: AAA
  1026. AAA: "A"~3
  1027. """
  1028. l = _Lark(g)
  1029. self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"]))
  1030. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA')
  1031. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1032. g = u"""!start: AABB CC
  1033. AABB: "A"~0..2 "B"~2
  1034. CC: "C"~1..2
  1035. """
  1036. l = _Lark(g)
  1037. self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC']))
  1038. self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C']))
  1039. self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC']))
  1040. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB')
  1041. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1042. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1043. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1044. @unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX
  1045. def test_priority_vs_embedded(self):
  1046. g = """
  1047. A.2: "a"
  1048. WORD: ("a".."z")+
  1049. start: (A | WORD)+
  1050. """
  1051. l = _Lark(g)
  1052. t = l.parse('abc')
  1053. self.assertEqual(t.children, ['a', 'bc'])
  1054. self.assertEqual(t.children[0].type, 'A')
  1055. def test_line_counting(self):
  1056. p = _Lark("start: /[^x]+/")
  1057. text = 'hello\nworld'
  1058. t = p.parse(text)
  1059. tok = t.children[0]
  1060. self.assertEqual(tok, text)
  1061. self.assertEqual(tok.line, 1)
  1062. self.assertEqual(tok.column, 1)
  1063. if _LEXER != 'dynamic':
  1064. self.assertEqual(tok.end_line, 2)
  1065. self.assertEqual(tok.end_column, 6)
  1066. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1067. def test_empty_end(self):
  1068. p = _Lark("""
  1069. start: b c d
  1070. b: "B"
  1071. c: | "C"
  1072. d: | "D"
  1073. """)
  1074. res = p.parse('B')
  1075. self.assertEqual(len(res.children), 3)
  1076. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1077. def test_maybe_placeholders(self):
  1078. # Anonymous tokens shouldn't count
  1079. p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1080. self.assertEqual(p.parse("").children, [])
  1081. # All invisible constructs shouldn't count
  1082. p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c]
  1083. A: "a"
  1084. _c: "c" """, maybe_placeholders=True)
  1085. self.assertEqual(p.parse("").children, [None])
  1086. self.assertEqual(p.parse("c").children, [None])
  1087. self.assertEqual(p.parse("aefc").children, ['a'])
  1088. # ? shouldn't apply
  1089. p = _Lark("""!start: ["a"] "b"? ["c"] """, maybe_placeholders=True)
  1090. self.assertEqual(p.parse("").children, [None, None])
  1091. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1092. p = _Lark("""!start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1093. self.assertEqual(p.parse("").children, [None, None, None])
  1094. self.assertEqual(p.parse("a").children, ['a', None, None])
  1095. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1096. self.assertEqual(p.parse("c").children, [None, None, 'c'])
  1097. self.assertEqual(p.parse("ab").children, ['a', 'b', None])
  1098. self.assertEqual(p.parse("ac").children, ['a', None, 'c'])
  1099. self.assertEqual(p.parse("bc").children, [None, 'b', 'c'])
  1100. self.assertEqual(p.parse("abc").children, ['a', 'b', 'c'])
  1101. p = _Lark("""!start: (["a"] "b" ["c"])+ """, maybe_placeholders=True)
  1102. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1103. self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None])
  1104. self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c'])
  1105. self.assertEqual(p.parse("babbcabcb").children,
  1106. [None, 'b', None,
  1107. 'a', 'b', None,
  1108. None, 'b', 'c',
  1109. 'a', 'b', 'c',
  1110. None, 'b', None])
  1111. p = _Lark("""!start: ["a"] ["c"] "b"+ ["a"] ["d"] """, maybe_placeholders=True)
  1112. self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None])
  1113. self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd'])
  1114. self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None])
  1115. self.assertEqual(p.parse("cbbbb").children, [None, 'c', 'b', 'b', 'b', 'b', None, None])
  1116. def test_escaped_string(self):
  1117. "Tests common.ESCAPED_STRING"
  1118. grammar = r"""
  1119. start: ESCAPED_STRING+
  1120. %import common (WS_INLINE, ESCAPED_STRING)
  1121. %ignore WS_INLINE
  1122. """
  1123. parser = _Lark(grammar)
  1124. parser.parse(r'"\\" "b" "c"')
  1125. parser.parse(r'"That" "And a \"b"')
  1126. _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()
  1127. _TestParser.__name__ = _NAME
  1128. globals()[_NAME] = _TestParser
  1129. # Note: You still have to import them in __main__ for the tests to run
  1130. _TO_TEST = [
  1131. ('standard', 'earley'),
  1132. ('standard', 'cyk'),
  1133. ('dynamic', 'earley'),
  1134. ('dynamic_complete', 'earley'),
  1135. ('standard', 'lalr'),
  1136. ('contextual', 'lalr'),
  1137. # (None, 'earley'),
  1138. ]
  1139. for _LEXER, _PARSER in _TO_TEST:
  1140. _make_parser_test(_LEXER, _PARSER)
  1141. for _LEXER in ('dynamic', 'dynamic_complete'):
  1142. _make_full_earley_test(_LEXER)
  1143. if __name__ == '__main__':
  1144. unittest.main()