This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1475 lines
50 KiB

  1. # -*- coding: utf-8 -*-
  2. from __future__ import absolute_import
  3. import unittest
  4. import logging
  5. import os
  6. import sys
  7. try:
  8. from cStringIO import StringIO as cStringIO
  9. except ImportError:
  10. # Available only in Python 2.x, 3.x only has io.StringIO from below
  11. cStringIO = None
  12. from io import (
  13. StringIO as uStringIO,
  14. open,
  15. )
  16. logging.basicConfig(level=logging.INFO)
  17. from lark.lark import Lark
  18. from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
  19. from lark.tree import Tree
  20. from lark.visitors import Transformer
  21. from lark.grammar import Rule
  22. from lark.lexer import TerminalDef
  23. __path__ = os.path.dirname(__file__)
  24. def _read(n, *args):
  25. with open(os.path.join(__path__, n), *args) as f:
  26. return f.read()
  27. class TestParsers(unittest.TestCase):
  28. def test_same_ast(self):
  29. "Tests that Earley and LALR parsers produce equal trees"
  30. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  31. name_list: NAME | name_list "," NAME
  32. NAME: /\w+/ """, parser='lalr')
  33. l = g.parse('(a,b,c,*x)')
  34. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  35. name_list: NAME | name_list "," NAME
  36. NAME: /\w/+ """)
  37. l2 = g.parse('(a,b,c,*x)')
  38. assert l == l2, '%s != %s' % (l.pretty(), l2.pretty())
  39. def test_infinite_recurse(self):
  40. g = """start: a
  41. a: a | "a"
  42. """
  43. self.assertRaises(GrammarError, Lark, g, parser='lalr')
  44. # TODO: should it? shouldn't it?
  45. # l = Lark(g, parser='earley', lexer='dynamic')
  46. # self.assertRaises(ParseError, l.parse, 'a')
  47. def test_propagate_positions(self):
  48. g = Lark("""start: a
  49. a: "a"
  50. """, propagate_positions=True)
  51. r = g.parse('a')
  52. self.assertEqual( r.children[0].meta.line, 1 )
  53. def test_expand1(self):
  54. g = Lark("""start: a
  55. ?a: b
  56. b: "x"
  57. """)
  58. r = g.parse('x')
  59. self.assertEqual( r.children[0].data, "b" )
  60. g = Lark("""start: a
  61. ?a: b -> c
  62. b: "x"
  63. """)
  64. r = g.parse('x')
  65. self.assertEqual( r.children[0].data, "c" )
  66. g = Lark("""start: a
  67. ?a: B -> c
  68. B: "x"
  69. """)
  70. self.assertEqual( r.children[0].data, "c" )
  71. g = Lark("""start: a
  72. ?a: b b -> c
  73. b: "x"
  74. """)
  75. r = g.parse('xx')
  76. self.assertEqual( r.children[0].data, "c" )
  77. def test_embedded_transformer(self):
  78. class T(Transformer):
  79. def a(self, children):
  80. return "<a>"
  81. def b(self, children):
  82. return "<b>"
  83. def c(self, children):
  84. return "<c>"
  85. # Test regular
  86. g = Lark("""start: a
  87. a : "x"
  88. """, parser='lalr')
  89. r = T().transform(g.parse("x"))
  90. self.assertEqual( r.children, ["<a>"] )
  91. g = Lark("""start: a
  92. a : "x"
  93. """, parser='lalr', transformer=T())
  94. r = g.parse("x")
  95. self.assertEqual( r.children, ["<a>"] )
  96. # Test Expand1
  97. g = Lark("""start: a
  98. ?a : b
  99. b : "x"
  100. """, parser='lalr')
  101. r = T().transform(g.parse("x"))
  102. self.assertEqual( r.children, ["<b>"] )
  103. g = Lark("""start: a
  104. ?a : b
  105. b : "x"
  106. """, parser='lalr', transformer=T())
  107. r = g.parse("x")
  108. self.assertEqual( r.children, ["<b>"] )
  109. # Test Expand1 -> Alias
  110. g = Lark("""start: a
  111. ?a : b b -> c
  112. b : "x"
  113. """, parser='lalr')
  114. r = T().transform(g.parse("xx"))
  115. self.assertEqual( r.children, ["<c>"] )
  116. g = Lark("""start: a
  117. ?a : b b -> c
  118. b : "x"
  119. """, parser='lalr', transformer=T())
  120. r = g.parse("xx")
  121. self.assertEqual( r.children, ["<c>"] )
  122. def test_alias(self):
  123. Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """)
  124. def _make_full_earley_test(LEXER):
  125. def _Lark(grammar, **kwargs):
  126. return Lark(grammar, lexer=LEXER, parser='earley', propagate_positions=True, **kwargs)
  127. class _TestFullEarley(unittest.TestCase):
  128. def test_anon(self):
  129. # Fails an Earley implementation without special handling for empty rules,
  130. # or re-processing of already completed rules.
  131. g = Lark(r"""start: B
  132. B: ("ab"|/[^b]/)+
  133. """, lexer=LEXER)
  134. self.assertEqual( g.parse('abc').children[0], 'abc')
  135. def test_earley(self):
  136. g = Lark("""start: A "b" c
  137. A: "a"+
  138. c: "abc"
  139. """, parser="earley", lexer=LEXER)
  140. x = g.parse('aaaababc')
  141. def test_earley2(self):
  142. grammar = """
  143. start: statement+
  144. statement: "r"
  145. | "c" /[a-z]/+
  146. %ignore " "
  147. """
  148. program = """c b r"""
  149. l = Lark(grammar, parser='earley', lexer=LEXER)
  150. l.parse(program)
  151. @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser")
  152. def test_earley3(self):
  153. """Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)
  154. By default, `+` should immitate regexp greedy-matching
  155. """
  156. grammar = """
  157. start: A A
  158. A: "a"+
  159. """
  160. l = Lark(grammar, parser='earley', lexer=LEXER)
  161. res = l.parse("aaa")
  162. self.assertEqual(set(res.children), {'aa', 'a'})
  163. # XXX TODO fix Earley to maintain correct order
  164. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  165. # self.assertEqual(res.children, ['aa', 'a'])
  166. def test_earley4(self):
  167. grammar = """
  168. start: A A?
  169. A: "a"+
  170. """
  171. l = Lark(grammar, parser='earley', lexer=LEXER)
  172. res = l.parse("aaa")
  173. assert set(res.children) == {'aa', 'a'} or res.children == ['aaa']
  174. # XXX TODO fix Earley to maintain correct order
  175. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  176. # self.assertEqual(res.children, ['aaa'])
  177. def test_earley_repeating_empty(self):
  178. # This was a sneaky bug!
  179. grammar = """
  180. !start: "a" empty empty "b"
  181. empty: empty2
  182. empty2:
  183. """
  184. parser = Lark(grammar, parser='earley', lexer=LEXER)
  185. res = parser.parse('ab')
  186. empty_tree = Tree('empty', [Tree('empty2', [])])
  187. self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b'])
  188. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  189. def test_earley_explicit_ambiguity(self):
  190. # This was a sneaky bug!
  191. grammar = """
  192. start: a b | ab
  193. a: "a"
  194. b: "b"
  195. ab: "ab"
  196. """
  197. parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit')
  198. ambig_tree = parser.parse('ab')
  199. self.assertEqual( ambig_tree.data, '_ambig')
  200. self.assertEqual( len(ambig_tree.children), 2)
  201. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  202. def test_ambiguity1(self):
  203. grammar = """
  204. start: cd+ "e"
  205. !cd: "c"
  206. | "d"
  207. | "cd"
  208. """
  209. l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
  210. ambig_tree = l.parse('cde')
  211. assert ambig_tree.data == '_ambig', ambig_tree
  212. assert len(ambig_tree.children) == 2
  213. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  214. def test_ambiguity2(self):
  215. grammar = """
  216. ANY: /[a-zA-Z0-9 ]+/
  217. a.2: "A" b+
  218. b.2: "B"
  219. c: ANY
  220. start: (a|c)*
  221. """
  222. l = Lark(grammar, parser='earley', lexer=LEXER)
  223. res = l.parse('ABX')
  224. expected = Tree('start', [
  225. Tree('a', [
  226. Tree('b', [])
  227. ]),
  228. Tree('c', [
  229. 'X'
  230. ])
  231. ])
  232. self.assertEqual(res, expected)
  233. def test_fruitflies_ambig(self):
  234. grammar = """
  235. start: noun verb noun -> simple
  236. | noun verb "like" noun -> comparative
  237. noun: adj? NOUN
  238. verb: VERB
  239. adj: ADJ
  240. NOUN: "flies" | "bananas" | "fruit"
  241. VERB: "like" | "flies"
  242. ADJ: "fruit"
  243. %import common.WS
  244. %ignore WS
  245. """
  246. parser = Lark(grammar, ambiguity='explicit', lexer=LEXER)
  247. tree = parser.parse('fruit flies like bananas')
  248. expected = Tree('_ambig', [
  249. Tree('comparative', [
  250. Tree('noun', ['fruit']),
  251. Tree('verb', ['flies']),
  252. Tree('noun', ['bananas'])
  253. ]),
  254. Tree('simple', [
  255. Tree('noun', [Tree('adj', ['fruit']), 'flies']),
  256. Tree('verb', ['like']),
  257. Tree('noun', ['bananas'])
  258. ])
  259. ])
  260. # self.assertEqual(tree, expected)
  261. self.assertEqual(tree.data, expected.data)
  262. self.assertEqual(set(tree.children), set(expected.children))
  263. @unittest.skipIf(LEXER!='dynamic_complete', "Only relevant for the dynamic_complete parser")
  264. def test_explicit_ambiguity2(self):
  265. grammar = r"""
  266. start: NAME+
  267. NAME: /\w+/
  268. %ignore " "
  269. """
  270. text = """cat"""
  271. parser = _Lark(grammar, start='start', ambiguity='explicit')
  272. tree = parser.parse(text)
  273. self.assertEqual(tree.data, '_ambig')
  274. combinations = {tuple(str(s) for s in t.children) for t in tree.children}
  275. self.assertEqual(combinations, {
  276. ('cat',),
  277. ('ca', 't'),
  278. ('c', 'at'),
  279. ('c', 'a' ,'t')
  280. })
  281. def test_term_ambig_resolve(self):
  282. grammar = r"""
  283. !start: NAME+
  284. NAME: /\w+/
  285. %ignore " "
  286. """
  287. text = """foo bar"""
  288. parser = Lark(grammar)
  289. tree = parser.parse(text)
  290. self.assertEqual(tree.children, ['foo', 'bar'])
  291. # @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO
  292. # def test_not_all_derivations(self):
  293. # grammar = """
  294. # start: cd+ "e"
  295. # !cd: "c"
  296. # | "d"
  297. # | "cd"
  298. # """
  299. # l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False)
  300. # x = l.parse('cde')
  301. # assert x.data != '_ambig', x
  302. # assert len(x.children) == 1
  303. _NAME = "TestFullEarley" + LEXER.capitalize()
  304. _TestFullEarley.__name__ = _NAME
  305. globals()[_NAME] = _TestFullEarley
  306. def _make_parser_test(LEXER, PARSER):
  307. def _Lark(grammar, **kwargs):
  308. return Lark(grammar, lexer=LEXER, parser=PARSER, propagate_positions=True, **kwargs)
  309. def _Lark_open(gfilename, **kwargs):
  310. return Lark.open(gfilename, lexer=LEXER, parser=PARSER, propagate_positions=True, **kwargs)
  311. class _TestParser(unittest.TestCase):
  312. def test_basic1(self):
  313. g = _Lark("""start: a+ b a* "b" a*
  314. b: "b"
  315. a: "a"
  316. """)
  317. r = g.parse('aaabaab')
  318. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' )
  319. r = g.parse('aaabaaba')
  320. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaaa' )
  321. self.assertRaises(ParseError, g.parse, 'aaabaa')
  322. def test_basic2(self):
  323. # Multiple parsers and colliding tokens
  324. g = _Lark("""start: B A
  325. B: "12"
  326. A: "1" """)
  327. g2 = _Lark("""start: B A
  328. B: "12"
  329. A: "2" """)
  330. x = g.parse('121')
  331. assert x.data == 'start' and x.children == ['12', '1'], x
  332. x = g2.parse('122')
  333. assert x.data == 'start' and x.children == ['12', '2'], x
  334. @unittest.skipIf(cStringIO is None, "cStringIO not available")
  335. def test_stringio_bytes(self):
  336. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  337. _Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  338. def test_stringio_unicode(self):
  339. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  340. _Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  341. def test_unicode(self):
  342. g = _Lark(u"""start: UNIA UNIB UNIA
  343. UNIA: /\xa3/
  344. UNIB: /\u0101/
  345. """)
  346. g.parse(u'\xa3\u0101\u00a3')
  347. def test_unicode2(self):
  348. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  349. UNIA: /\xa3/
  350. UNIB: "a\u0101b\ "
  351. UNIC: /a?\u0101c\n/
  352. """)
  353. g.parse(u'\xa3a\u0101b\\ \u00a3\u0101c\n')
  354. def test_unicode3(self):
  355. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  356. UNIA: /\xa3/
  357. UNIB: "\u0101"
  358. UNIC: /\u0203/ /\n/
  359. """)
  360. g.parse(u'\xa3\u0101\u00a3\u0203\n')
  361. def test_hex_escape(self):
  362. g = _Lark(r"""start: A B C
  363. A: "\x01"
  364. B: /\x02/
  365. C: "\xABCD"
  366. """)
  367. g.parse('\x01\x02\xABCD')
  368. def test_unicode_literal_range_escape(self):
  369. g = _Lark(r"""start: A+
  370. A: "\u0061".."\u0063"
  371. """)
  372. g.parse('abc')
  373. def test_hex_literal_range_escape(self):
  374. g = _Lark(r"""start: A+
  375. A: "\x01".."\x03"
  376. """)
  377. g.parse('\x01\x02\x03')
  378. @unittest.skipIf(PARSER == 'cyk', "Takes forever")
  379. def test_stack_for_ebnf(self):
  380. """Verify that stack depth isn't an issue for EBNF grammars"""
  381. g = _Lark(r"""start: a+
  382. a : "a" """)
  383. g.parse("a" * (sys.getrecursionlimit()*2 ))
  384. def test_expand1_lists_with_one_item(self):
  385. g = _Lark(r"""start: list
  386. ?list: item+
  387. item : A
  388. A: "a"
  389. """)
  390. r = g.parse("a")
  391. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  392. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  393. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  394. self.assertEqual(len(r.children), 1)
  395. def test_expand1_lists_with_one_item_2(self):
  396. g = _Lark(r"""start: list
  397. ?list: item+ "!"
  398. item : A
  399. A: "a"
  400. """)
  401. r = g.parse("a!")
  402. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  403. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  404. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  405. self.assertEqual(len(r.children), 1)
  406. def test_dont_expand1_lists_with_multiple_items(self):
  407. g = _Lark(r"""start: list
  408. ?list: item+
  409. item : A
  410. A: "a"
  411. """)
  412. r = g.parse("aa")
  413. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  414. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  415. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  416. self.assertEqual(len(r.children), 1)
  417. # Sanity check: verify that 'list' contains the two 'item's we've given it
  418. [list] = r.children
  419. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  420. def test_dont_expand1_lists_with_multiple_items_2(self):
  421. g = _Lark(r"""start: list
  422. ?list: item+ "!"
  423. item : A
  424. A: "a"
  425. """)
  426. r = g.parse("aa!")
  427. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  428. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  429. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  430. self.assertEqual(len(r.children), 1)
  431. # Sanity check: verify that 'list' contains the two 'item's we've given it
  432. [list] = r.children
  433. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  434. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  435. def test_empty_expand1_list(self):
  436. g = _Lark(r"""start: list
  437. ?list: item*
  438. item : A
  439. A: "a"
  440. """)
  441. r = g.parse("")
  442. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  443. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  444. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  445. self.assertEqual(len(r.children), 1)
  446. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  447. [list] = r.children
  448. self.assertSequenceEqual([item.data for item in list.children], ())
  449. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  450. def test_empty_expand1_list_2(self):
  451. g = _Lark(r"""start: list
  452. ?list: item* "!"?
  453. item : A
  454. A: "a"
  455. """)
  456. r = g.parse("")
  457. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  458. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  459. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  460. self.assertEqual(len(r.children), 1)
  461. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  462. [list] = r.children
  463. self.assertSequenceEqual([item.data for item in list.children], ())
  464. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  465. def test_empty_flatten_list(self):
  466. g = _Lark(r"""start: list
  467. list: | item "," list
  468. item : A
  469. A: "a"
  470. """)
  471. r = g.parse("")
  472. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  473. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  474. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  475. [list] = r.children
  476. self.assertSequenceEqual([item.data for item in list.children], ())
  477. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  478. def test_single_item_flatten_list(self):
  479. g = _Lark(r"""start: list
  480. list: | item "," list
  481. item : A
  482. A: "a"
  483. """)
  484. r = g.parse("a,")
  485. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  486. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  487. # Sanity check: verify that 'list' contains exactly the one 'item' we've given it
  488. [list] = r.children
  489. self.assertSequenceEqual([item.data for item in list.children], ('item',))
  490. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  491. def test_multiple_item_flatten_list(self):
  492. g = _Lark(r"""start: list
  493. #list: | item "," list
  494. item : A
  495. A: "a"
  496. """)
  497. r = g.parse("a,a,")
  498. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  499. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  500. # Sanity check: verify that 'list' contains exactly the two 'item's we've given it
  501. [list] = r.children
  502. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  503. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  504. def test_recurse_flatten(self):
  505. """Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""
  506. g = _Lark(r"""start: a | start a
  507. a : A
  508. A : "a" """)
  509. # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
  510. # STree data structures, which uses recursion).
  511. g.parse("a" * (sys.getrecursionlimit() // 4))
  512. def test_token_collision(self):
  513. g = _Lark(r"""start: "Hello" NAME
  514. NAME: /\w/+
  515. %ignore " "
  516. """)
  517. x = g.parse('Hello World')
  518. self.assertSequenceEqual(x.children, ['World'])
  519. x = g.parse('Hello HelloWorld')
  520. self.assertSequenceEqual(x.children, ['HelloWorld'])
  521. def test_token_collision_WS(self):
  522. g = _Lark(r"""start: "Hello" NAME
  523. NAME: /\w/+
  524. %import common.WS
  525. %ignore WS
  526. """)
  527. x = g.parse('Hello World')
  528. self.assertSequenceEqual(x.children, ['World'])
  529. x = g.parse('Hello HelloWorld')
  530. self.assertSequenceEqual(x.children, ['HelloWorld'])
  531. def test_token_collision2(self):
  532. g = _Lark("""
  533. !start: "starts"
  534. %import common.LCASE_LETTER
  535. """)
  536. x = g.parse("starts")
  537. self.assertSequenceEqual(x.children, ['starts'])
  538. # def test_string_priority(self):
  539. # g = _Lark("""start: (A | /a?bb/)+
  540. # A: "a" """)
  541. # x = g.parse('abb')
  542. # self.assertEqual(len(x.children), 2)
  543. # # This parse raises an exception because the lexer will always try to consume
  544. # # "a" first and will never match the regular expression
  545. # # This behavior is subject to change!!
  546. # # Thie won't happen with ambiguity handling.
  547. # g = _Lark("""start: (A | /a?ab/)+
  548. # A: "a" """)
  549. # self.assertRaises(LexError, g.parse, 'aab')
  550. def test_undefined_rule(self):
  551. self.assertRaises(GrammarError, _Lark, """start: a""")
  552. def test_undefined_token(self):
  553. self.assertRaises(GrammarError, _Lark, """start: A""")
  554. def test_rule_collision(self):
  555. g = _Lark("""start: "a"+ "b"
  556. | "a"+ """)
  557. x = g.parse('aaaa')
  558. x = g.parse('aaaab')
  559. def test_rule_collision2(self):
  560. g = _Lark("""start: "a"* "b"
  561. | "a"+ """)
  562. x = g.parse('aaaa')
  563. x = g.parse('aaaab')
  564. x = g.parse('b')
  565. def test_token_not_anon(self):
  566. """Tests that "a" is matched as an anonymous token, and not A.
  567. """
  568. g = _Lark("""start: "a"
  569. A: "a" """)
  570. x = g.parse('a')
  571. self.assertEqual(len(x.children), 0, '"a" should be considered anonymous')
  572. g = _Lark("""start: "a" A
  573. A: "a" """)
  574. x = g.parse('aa')
  575. self.assertEqual(len(x.children), 1, 'only "a" should be considered anonymous')
  576. self.assertEqual(x.children[0].type, "A")
  577. g = _Lark("""start: /a/
  578. A: /a/ """)
  579. x = g.parse('a')
  580. self.assertEqual(len(x.children), 1)
  581. self.assertEqual(x.children[0].type, "A", "A isn't associated with /a/")
  582. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  583. def test_maybe(self):
  584. g = _Lark("""start: ["a"] """)
  585. x = g.parse('a')
  586. x = g.parse('')
  587. def test_start(self):
  588. g = _Lark("""a: "a" a? """, start='a')
  589. x = g.parse('a')
  590. x = g.parse('aa')
  591. x = g.parse('aaa')
  592. def test_alias(self):
  593. g = _Lark("""start: "a" -> b """)
  594. x = g.parse('a')
  595. self.assertEqual(x.data, "b")
  596. def test_token_ebnf(self):
  597. g = _Lark("""start: A
  598. A: "a"* ("b"? "c".."e")+
  599. """)
  600. x = g.parse('abcde')
  601. x = g.parse('dd')
  602. def test_backslash(self):
  603. g = _Lark(r"""start: "\\" "a"
  604. """)
  605. x = g.parse(r'\a')
  606. g = _Lark(r"""start: /\\/ /a/
  607. """)
  608. x = g.parse(r'\a')
  609. def test_backslash2(self):
  610. g = _Lark(r"""start: "\"" "-"
  611. """)
  612. x = g.parse('"-')
  613. g = _Lark(r"""start: /\// /-/
  614. """)
  615. x = g.parse('/-')
  616. def test_special_chars(self):
  617. g = _Lark(r"""start: "\n"
  618. """)
  619. x = g.parse('\n')
  620. g = _Lark(r"""start: /\n/
  621. """)
  622. x = g.parse('\n')
  623. # def test_token_recurse(self):
  624. # g = _Lark("""start: A
  625. # A: B
  626. # B: A
  627. # """)
  628. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  629. def test_empty(self):
  630. # Fails an Earley implementation without special handling for empty rules,
  631. # or re-processing of already completed rules.
  632. g = _Lark(r"""start: _empty a "B"
  633. a: _empty "A"
  634. _empty:
  635. """)
  636. x = g.parse('AB')
  637. def test_regex_quote(self):
  638. g = r"""
  639. start: SINGLE_QUOTED_STRING | DOUBLE_QUOTED_STRING
  640. SINGLE_QUOTED_STRING : /'[^']*'/
  641. DOUBLE_QUOTED_STRING : /"[^"]*"/
  642. """
  643. g = _Lark(g)
  644. self.assertEqual( g.parse('"hello"').children, ['"hello"'])
  645. self.assertEqual( g.parse("'hello'").children, ["'hello'"])
  646. def test_lexer_token_limit(self):
  647. "Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"
  648. tokens = {'A%d'%i:'"%d"'%i for i in range(300)}
  649. g = _Lark("""start: %s
  650. %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))
  651. def test_float_without_lexer(self):
  652. expected_error = UnexpectedCharacters if LEXER.startswith('dynamic') else UnexpectedToken
  653. if PARSER == 'cyk':
  654. expected_error = ParseError
  655. g = _Lark("""start: ["+"|"-"] float
  656. float: digit* "." digit+ exp?
  657. | digit+ exp
  658. exp: ("e"|"E") ["+"|"-"] digit+
  659. digit: "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"
  660. """)
  661. g.parse("1.2")
  662. g.parse("-.2e9")
  663. g.parse("+2e-9")
  664. self.assertRaises( expected_error, g.parse, "+2e-9e")
  665. def test_keep_all_tokens(self):
  666. l = _Lark("""start: "a"+ """, keep_all_tokens=True)
  667. tree = l.parse('aaa')
  668. self.assertEqual(tree.children, ['a', 'a', 'a'])
  669. def test_token_flags(self):
  670. l = _Lark("""!start: "a"i+
  671. """
  672. )
  673. tree = l.parse('aA')
  674. self.assertEqual(tree.children, ['a', 'A'])
  675. l = _Lark("""!start: /a/i+
  676. """
  677. )
  678. tree = l.parse('aA')
  679. self.assertEqual(tree.children, ['a', 'A'])
  680. # g = """!start: "a"i "a"
  681. # """
  682. # self.assertRaises(GrammarError, _Lark, g)
  683. # g = """!start: /a/i /a/
  684. # """
  685. # self.assertRaises(GrammarError, _Lark, g)
  686. g = """start: NAME "," "a"
  687. NAME: /[a-z_]/i /[a-z0-9_]/i*
  688. """
  689. l = _Lark(g)
  690. tree = l.parse('ab,a')
  691. self.assertEqual(tree.children, ['ab'])
  692. tree = l.parse('AB,a')
  693. self.assertEqual(tree.children, ['AB'])
  694. def test_token_flags3(self):
  695. l = _Lark("""!start: ABC+
  696. ABC: "abc"i
  697. """
  698. )
  699. tree = l.parse('aBcAbC')
  700. self.assertEqual(tree.children, ['aBc', 'AbC'])
  701. def test_token_flags2(self):
  702. g = """!start: ("a"i | /a/ /b/?)+
  703. """
  704. l = _Lark(g)
  705. tree = l.parse('aA')
  706. self.assertEqual(tree.children, ['a', 'A'])
  707. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  708. def test_twice_empty(self):
  709. g = """!start: [["A"]]
  710. """
  711. l = _Lark(g)
  712. tree = l.parse('A')
  713. self.assertEqual(tree.children, ['A'])
  714. tree = l.parse('')
  715. self.assertEqual(tree.children, [])
  716. def test_undefined_ignore(self):
  717. g = """!start: "A"
  718. %ignore B
  719. """
  720. self.assertRaises( GrammarError, _Lark, g)
  721. def test_alias_in_terminal(self):
  722. g = """start: TERM
  723. TERM: "a" -> alias
  724. """
  725. self.assertRaises( GrammarError, _Lark, g)
  726. def test_line_and_column(self):
  727. g = r"""!start: "A" bc "D"
  728. !bc: "B\nC"
  729. """
  730. l = _Lark(g)
  731. a, bc, d = l.parse("AB\nCD").children
  732. self.assertEqual(a.line, 1)
  733. self.assertEqual(a.column, 1)
  734. bc ,= bc.children
  735. self.assertEqual(bc.line, 1)
  736. self.assertEqual(bc.column, 2)
  737. self.assertEqual(d.line, 2)
  738. self.assertEqual(d.column, 2)
  739. if LEXER != 'dynamic':
  740. self.assertEqual(a.end_line, 1)
  741. self.assertEqual(a.end_column, 2)
  742. self.assertEqual(bc.end_line, 2)
  743. self.assertEqual(bc.end_column, 2)
  744. self.assertEqual(d.end_line, 2)
  745. self.assertEqual(d.end_column, 3)
  746. def test_reduce_cycle(self):
  747. """Tests an edge-condition in the LALR parser, in which a transition state looks exactly like the end state.
  748. It seems that the correct solution is to explicitely distinguish finalization in the reduce() function.
  749. """
  750. l = _Lark("""
  751. term: A
  752. | term term
  753. A: "a"
  754. """, start='term')
  755. tree = l.parse("aa")
  756. self.assertEqual(len(tree.children), 2)
  757. @unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority")
  758. def test_lexer_prioritization(self):
  759. "Tests effect of priority on result"
  760. grammar = """
  761. start: A B | AB
  762. A.2: "a"
  763. B: "b"
  764. AB: "ab"
  765. """
  766. l = _Lark(grammar)
  767. res = l.parse("ab")
  768. self.assertEqual(res.children, ['a', 'b'])
  769. self.assertNotEqual(res.children, ['ab'])
  770. grammar = """
  771. start: A B | AB
  772. A: "a"
  773. B: "b"
  774. AB.3: "ab"
  775. """
  776. l = _Lark(grammar)
  777. res = l.parse("ab")
  778. self.assertNotEqual(res.children, ['a', 'b'])
  779. self.assertEqual(res.children, ['ab'])
  780. def test_import(self):
  781. grammar = """
  782. start: NUMBER WORD
  783. %import common.NUMBER
  784. %import common.WORD
  785. %import common.WS
  786. %ignore WS
  787. """
  788. l = _Lark(grammar)
  789. x = l.parse('12 elephants')
  790. self.assertEqual(x.children, ['12', 'elephants'])
  791. def test_import_rename(self):
  792. grammar = """
  793. start: N W
  794. %import common.NUMBER -> N
  795. %import common.WORD -> W
  796. %import common.WS
  797. %ignore WS
  798. """
  799. l = _Lark(grammar)
  800. x = l.parse('12 elephants')
  801. self.assertEqual(x.children, ['12', 'elephants'])
  802. def test_relative_import(self):
  803. l = _Lark_open('test_relative_import.lark', rel_to=__file__)
  804. x = l.parse('12 lions')
  805. self.assertEqual(x.children, ['12', 'lions'])
  806. def test_relative_import_rename(self):
  807. l = _Lark_open('test_relative_import_rename.lark', rel_to=__file__)
  808. x = l.parse('12 lions')
  809. self.assertEqual(x.children, ['12', 'lions'])
  810. def test_relative_rule_import(self):
  811. l = _Lark_open('test_relative_rule_import.lark', rel_to=__file__)
  812. x = l.parse('xaabby')
  813. self.assertEqual(x.children, [
  814. 'x',
  815. Tree('expr', ['a', Tree('expr', ['a', 'b']), 'b']),
  816. 'y'])
  817. def test_relative_rule_import_drop_ignore(self):
  818. # %ignore rules are dropped on import
  819. l = _Lark_open('test_relative_rule_import_drop_ignore.lark',
  820. rel_to=__file__)
  821. self.assertRaises((ParseError, UnexpectedInput),
  822. l.parse, 'xa abby')
  823. def test_relative_rule_import_subrule(self):
  824. l = _Lark_open('test_relative_rule_import_subrule.lark',
  825. rel_to=__file__)
  826. x = l.parse('xaabby')
  827. self.assertEqual(x.children, [
  828. 'x',
  829. Tree('startab', [
  830. Tree('grammars__ab__expr', [
  831. 'a', Tree('grammars__ab__expr', ['a', 'b']), 'b',
  832. ]),
  833. ]),
  834. 'y'])
  835. def test_relative_rule_import_subrule_no_conflict(self):
  836. l = _Lark_open(
  837. 'test_relative_rule_import_subrule_no_conflict.lark',
  838. rel_to=__file__)
  839. x = l.parse('xaby')
  840. self.assertEqual(x.children, [Tree('expr', [
  841. 'x',
  842. Tree('startab', [
  843. Tree('grammars__ab__expr', ['a', 'b']),
  844. ]),
  845. 'y'])])
  846. self.assertRaises((ParseError, UnexpectedInput),
  847. l.parse, 'xaxabyby')
  848. def test_relative_rule_import_rename(self):
  849. l = _Lark_open('test_relative_rule_import_rename.lark',
  850. rel_to=__file__)
  851. x = l.parse('xaabby')
  852. self.assertEqual(x.children, [
  853. 'x',
  854. Tree('ab', ['a', Tree('ab', ['a', 'b']), 'b']),
  855. 'y'])
  856. def test_multi_import(self):
  857. grammar = """
  858. start: NUMBER WORD
  859. %import common (NUMBER, WORD, WS)
  860. %ignore WS
  861. """
  862. l = _Lark(grammar)
  863. x = l.parse('12 toucans')
  864. self.assertEqual(x.children, ['12', 'toucans'])
  865. def test_relative_multi_import(self):
  866. l = _Lark_open("test_relative_multi_import.lark", rel_to=__file__)
  867. x = l.parse('12 capybaras')
  868. self.assertEqual(x.children, ['12', 'capybaras'])
  869. def test_import_errors(self):
  870. grammar = """
  871. start: NUMBER WORD
  872. %import .grammars.bad_test.NUMBER
  873. """
  874. self.assertRaises(IOError, _Lark, grammar)
  875. grammar = """
  876. start: NUMBER WORD
  877. %import bad_test.NUMBER
  878. """
  879. self.assertRaises(IOError, _Lark, grammar)
  880. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  881. def test_earley_prioritization(self):
  882. "Tests effect of priority on result"
  883. grammar = """
  884. start: a | b
  885. a.1: "a"
  886. b.2: "a"
  887. """
  888. # l = Lark(grammar, parser='earley', lexer='standard')
  889. l = _Lark(grammar)
  890. res = l.parse("a")
  891. self.assertEqual(res.children[0].data, 'b')
  892. grammar = """
  893. start: a | b
  894. a.2: "a"
  895. b.1: "a"
  896. """
  897. l = _Lark(grammar)
  898. # l = Lark(grammar, parser='earley', lexer='standard')
  899. res = l.parse("a")
  900. self.assertEqual(res.children[0].data, 'a')
  901. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  902. def test_earley_prioritization_sum(self):
  903. "Tests effect of priority on result"
  904. grammar = """
  905. start: ab_ b_ a_ | indirection
  906. indirection: a_ bb_ a_
  907. a_: "a"
  908. b_: "b"
  909. ab_: "ab"
  910. bb_.1: "bb"
  911. """
  912. l = Lark(grammar, priority="invert")
  913. res = l.parse('abba')
  914. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  915. grammar = """
  916. start: ab_ b_ a_ | indirection
  917. indirection: a_ bb_ a_
  918. a_: "a"
  919. b_: "b"
  920. ab_.1: "ab"
  921. bb_: "bb"
  922. """
  923. l = Lark(grammar, priority="invert")
  924. res = l.parse('abba')
  925. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  926. grammar = """
  927. start: ab_ b_ a_ | indirection
  928. indirection: a_ bb_ a_
  929. a_.2: "a"
  930. b_.1: "b"
  931. ab_.3: "ab"
  932. bb_.3: "bb"
  933. """
  934. l = Lark(grammar, priority="invert")
  935. res = l.parse('abba')
  936. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  937. grammar = """
  938. start: ab_ b_ a_ | indirection
  939. indirection: a_ bb_ a_
  940. a_.1: "a"
  941. b_.1: "b"
  942. ab_.4: "ab"
  943. bb_.3: "bb"
  944. """
  945. l = Lark(grammar, priority="invert")
  946. res = l.parse('abba')
  947. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  948. def test_utf8(self):
  949. g = u"""start: a
  950. a: "±a"
  951. """
  952. l = _Lark(g)
  953. self.assertEqual(l.parse(u'±a'), Tree('start', [Tree('a', [])]))
  954. g = u"""start: A
  955. A: "±a"
  956. """
  957. l = _Lark(g)
  958. self.assertEqual(l.parse(u'±a'), Tree('start', [u'\xb1a']))
  959. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  960. def test_ignore(self):
  961. grammar = r"""
  962. COMMENT: /(!|(\/\/))[^\n]*/
  963. %ignore COMMENT
  964. %import common.WS -> _WS
  965. %import common.INT
  966. start: "INT"i _WS+ INT _WS*
  967. """
  968. parser = _Lark(grammar)
  969. tree = parser.parse("int 1 ! This is a comment\n")
  970. self.assertEqual(tree.children, ['1'])
  971. tree = parser.parse("int 1 ! This is a comment") # A trailing ignore token can be tricky!
  972. self.assertEqual(tree.children, ['1'])
  973. parser = _Lark(r"""
  974. start : "a"*
  975. %ignore "b"
  976. """)
  977. tree = parser.parse("bb")
  978. self.assertEqual(tree.children, [])
  979. def test_regex_escaping(self):
  980. g = _Lark("start: /[ab]/")
  981. g.parse('a')
  982. g.parse('b')
  983. self.assertRaises( UnexpectedInput, g.parse, 'c')
  984. _Lark(r'start: /\w/').parse('a')
  985. g = _Lark(r'start: /\\w/')
  986. self.assertRaises( UnexpectedInput, g.parse, 'a')
  987. g.parse(r'\w')
  988. _Lark(r'start: /\[/').parse('[')
  989. _Lark(r'start: /\//').parse('/')
  990. _Lark(r'start: /\\/').parse('\\')
  991. _Lark(r'start: /\[ab]/').parse('[ab]')
  992. _Lark(r'start: /\\[ab]/').parse('\\a')
  993. _Lark(r'start: /\t/').parse('\t')
  994. _Lark(r'start: /\\t/').parse('\\t')
  995. _Lark(r'start: /\\\t/').parse('\\\t')
  996. _Lark(r'start: "\t"').parse('\t')
  997. _Lark(r'start: "\\t"').parse('\\t')
  998. _Lark(r'start: "\\\t"').parse('\\\t')
  999. def test_ranged_repeat_rules(self):
  1000. g = u"""!start: "A"~3
  1001. """
  1002. l = _Lark(g)
  1003. self.assertEqual(l.parse(u'AAA'), Tree('start', ["A", "A", "A"]))
  1004. self.assertRaises(ParseError, l.parse, u'AA')
  1005. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1006. g = u"""!start: "A"~0..2
  1007. """
  1008. if PARSER != 'cyk': # XXX CYK currently doesn't support empty grammars
  1009. l = _Lark(g)
  1010. self.assertEqual(l.parse(u''), Tree('start', []))
  1011. self.assertEqual(l.parse(u'A'), Tree('start', ['A']))
  1012. self.assertEqual(l.parse(u'AA'), Tree('start', ['A', 'A']))
  1013. self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'AAA')
  1014. g = u"""!start: "A"~3..2
  1015. """
  1016. self.assertRaises(GrammarError, _Lark, g)
  1017. g = u"""!start: "A"~2..3 "B"~2
  1018. """
  1019. l = _Lark(g)
  1020. self.assertEqual(l.parse(u'AABB'), Tree('start', ['A', 'A', 'B', 'B']))
  1021. self.assertEqual(l.parse(u'AAABB'), Tree('start', ['A', 'A', 'A', 'B', 'B']))
  1022. self.assertRaises(ParseError, l.parse, u'AAAB')
  1023. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1024. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1025. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1026. def test_ranged_repeat_terms(self):
  1027. g = u"""!start: AAA
  1028. AAA: "A"~3
  1029. """
  1030. l = _Lark(g)
  1031. self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"]))
  1032. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA')
  1033. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1034. g = u"""!start: AABB CC
  1035. AABB: "A"~0..2 "B"~2
  1036. CC: "C"~1..2
  1037. """
  1038. l = _Lark(g)
  1039. self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC']))
  1040. self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C']))
  1041. self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC']))
  1042. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB')
  1043. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1044. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1045. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1046. @unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX
  1047. def test_priority_vs_embedded(self):
  1048. g = """
  1049. A.2: "a"
  1050. WORD: ("a".."z")+
  1051. start: (A | WORD)+
  1052. """
  1053. l = _Lark(g)
  1054. t = l.parse('abc')
  1055. self.assertEqual(t.children, ['a', 'bc'])
  1056. self.assertEqual(t.children[0].type, 'A')
  1057. def test_line_counting(self):
  1058. p = _Lark("start: /[^x]+/")
  1059. text = 'hello\nworld'
  1060. t = p.parse(text)
  1061. tok = t.children[0]
  1062. self.assertEqual(tok, text)
  1063. self.assertEqual(tok.line, 1)
  1064. self.assertEqual(tok.column, 1)
  1065. if _LEXER != 'dynamic':
  1066. self.assertEqual(tok.end_line, 2)
  1067. self.assertEqual(tok.end_column, 6)
  1068. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1069. def test_empty_end(self):
  1070. p = _Lark("""
  1071. start: b c d
  1072. b: "B"
  1073. c: | "C"
  1074. d: | "D"
  1075. """)
  1076. res = p.parse('B')
  1077. self.assertEqual(len(res.children), 3)
  1078. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1079. def test_maybe_placeholders(self):
  1080. # Anonymous tokens shouldn't count
  1081. p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1082. self.assertEqual(p.parse("").children, [])
  1083. # All invisible constructs shouldn't count
  1084. p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c]
  1085. A: "a"
  1086. _c: "c" """, maybe_placeholders=True)
  1087. self.assertEqual(p.parse("").children, [None])
  1088. self.assertEqual(p.parse("c").children, [None])
  1089. self.assertEqual(p.parse("aefc").children, ['a'])
  1090. # ? shouldn't apply
  1091. p = _Lark("""!start: ["a"] "b"? ["c"] """, maybe_placeholders=True)
  1092. self.assertEqual(p.parse("").children, [None, None])
  1093. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1094. p = _Lark("""!start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1095. self.assertEqual(p.parse("").children, [None, None, None])
  1096. self.assertEqual(p.parse("a").children, ['a', None, None])
  1097. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1098. self.assertEqual(p.parse("c").children, [None, None, 'c'])
  1099. self.assertEqual(p.parse("ab").children, ['a', 'b', None])
  1100. self.assertEqual(p.parse("ac").children, ['a', None, 'c'])
  1101. self.assertEqual(p.parse("bc").children, [None, 'b', 'c'])
  1102. self.assertEqual(p.parse("abc").children, ['a', 'b', 'c'])
  1103. p = _Lark("""!start: (["a"] "b" ["c"])+ """, maybe_placeholders=True)
  1104. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1105. self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None])
  1106. self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c'])
  1107. self.assertEqual(p.parse("babbcabcb").children,
  1108. [None, 'b', None,
  1109. 'a', 'b', None,
  1110. None, 'b', 'c',
  1111. 'a', 'b', 'c',
  1112. None, 'b', None])
  1113. p = _Lark("""!start: ["a"] ["c"] "b"+ ["a"] ["d"] """, maybe_placeholders=True)
  1114. self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None])
  1115. self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd'])
  1116. self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None])
  1117. self.assertEqual(p.parse("cbbbb").children, [None, 'c', 'b', 'b', 'b', 'b', None, None])
  1118. def test_escaped_string(self):
  1119. "Tests common.ESCAPED_STRING"
  1120. grammar = r"""
  1121. start: ESCAPED_STRING+
  1122. %import common (WS_INLINE, ESCAPED_STRING)
  1123. %ignore WS_INLINE
  1124. """
  1125. parser = _Lark(grammar)
  1126. parser.parse(r'"\\" "b" "c"')
  1127. parser.parse(r'"That" "And a \"b"')
  1128. @unittest.skipIf(PARSER!='lalr', "Serialize currently only works for LALR parsers (though it should be easy to extend)")
  1129. def test_serialize(self):
  1130. grammar = """
  1131. start: "A" b "C"
  1132. b: "B"
  1133. """
  1134. parser = _Lark(grammar)
  1135. d = parser.serialize()
  1136. parser2 = Lark.deserialize(d, {}, {})
  1137. self.assertEqual(parser2.parse('ABC'), Tree('start', [Tree('b', [])]) )
  1138. namespace = {'Rule': Rule, 'TerminalDef': TerminalDef}
  1139. d, m = parser.memo_serialize(namespace.values())
  1140. parser3 = Lark.deserialize(d, namespace, m)
  1141. self.assertEqual(parser3.parse('ABC'), Tree('start', [Tree('b', [])]) )
  1142. _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()
  1143. _TestParser.__name__ = _NAME
  1144. globals()[_NAME] = _TestParser
  1145. # Note: You still have to import them in __main__ for the tests to run
  1146. _TO_TEST = [
  1147. ('standard', 'earley'),
  1148. ('standard', 'cyk'),
  1149. ('dynamic', 'earley'),
  1150. ('dynamic_complete', 'earley'),
  1151. ('standard', 'lalr'),
  1152. ('contextual', 'lalr'),
  1153. # (None, 'earley'),
  1154. ]
  1155. for _LEXER, _PARSER in _TO_TEST:
  1156. _make_parser_test(_LEXER, _PARSER)
  1157. for _LEXER in ('dynamic', 'dynamic_complete'):
  1158. _make_full_earley_test(_LEXER)
  1159. if __name__ == '__main__':
  1160. unittest.main()