This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1835 lines
62 KiB

  1. # -*- coding: utf-8 -*-
  2. from __future__ import absolute_import
  3. import re
  4. import unittest
  5. import logging
  6. import os
  7. import sys
  8. from copy import copy, deepcopy
  9. try:
  10. from cStringIO import StringIO as cStringIO
  11. except ImportError:
  12. # Available only in Python 2.x, 3.x only has io.StringIO from below
  13. cStringIO = None
  14. from io import (
  15. StringIO as uStringIO,
  16. BytesIO,
  17. open,
  18. )
  19. logging.basicConfig(level=logging.INFO)
  20. try:
  21. import regex
  22. except ImportError:
  23. regex = None
  24. from lark.lark import Lark
  25. from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
  26. from lark.tree import Tree
  27. from lark.visitors import Transformer, Transformer_InPlace, v_args
  28. from lark.grammar import Rule
  29. from lark.lexer import TerminalDef, Lexer, TraditionalLexer
  30. __path__ = os.path.dirname(__file__)
  31. def _read(n, *args):
  32. with open(os.path.join(__path__, n), *args) as f:
  33. return f.read()
  34. class TestParsers(unittest.TestCase):
  35. def test_big_list(self):
  36. Lark(r"""
  37. start: {}
  38. """.format(
  39. "|".join(['"%s"'%i for i in range(250)])
  40. ))
  41. def test_same_ast(self):
  42. "Tests that Earley and LALR parsers produce equal trees"
  43. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  44. name_list: NAME | name_list "," NAME
  45. NAME: /\w+/ """, parser='lalr')
  46. l = g.parse('(a,b,c,*x)')
  47. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  48. name_list: NAME | name_list "," NAME
  49. NAME: /\w/+ """)
  50. l2 = g.parse('(a,b,c,*x)')
  51. assert l == l2, '%s != %s' % (l.pretty(), l2.pretty())
  52. def test_infinite_recurse(self):
  53. g = """start: a
  54. a: a | "a"
  55. """
  56. self.assertRaises(GrammarError, Lark, g, parser='lalr')
  57. # TODO: should it? shouldn't it?
  58. # l = Lark(g, parser='earley', lexer='dynamic')
  59. # self.assertRaises(ParseError, l.parse, 'a')
  60. def test_propagate_positions(self):
  61. g = Lark("""start: a
  62. a: "a"
  63. """, propagate_positions=True)
  64. r = g.parse('a')
  65. self.assertEqual( r.children[0].meta.line, 1 )
  66. g = Lark("""start: x
  67. x: a
  68. a: "a"
  69. """, propagate_positions=True)
  70. r = g.parse('a')
  71. self.assertEqual( r.children[0].meta.line, 1 )
  72. def test_expand1(self):
  73. g = Lark("""start: a
  74. ?a: b
  75. b: "x"
  76. """)
  77. r = g.parse('x')
  78. self.assertEqual( r.children[0].data, "b" )
  79. g = Lark("""start: a
  80. ?a: b -> c
  81. b: "x"
  82. """)
  83. r = g.parse('x')
  84. self.assertEqual( r.children[0].data, "c" )
  85. g = Lark("""start: a
  86. ?a: B -> c
  87. B: "x"
  88. """)
  89. self.assertEqual( r.children[0].data, "c" )
  90. g = Lark("""start: a
  91. ?a: b b -> c
  92. b: "x"
  93. """)
  94. r = g.parse('xx')
  95. self.assertEqual( r.children[0].data, "c" )
  96. def test_comment_in_rule_definition(self):
  97. g = Lark("""start: a
  98. a: "a"
  99. // A comment
  100. // Another comment
  101. | "b"
  102. // Still more
  103. c: "unrelated"
  104. """)
  105. r = g.parse('b')
  106. self.assertEqual( r.children[0].data, "a" )
  107. def test_visit_tokens(self):
  108. class T(Transformer):
  109. def a(self, children):
  110. return children[0] + "!"
  111. def A(self, tok):
  112. return tok.update(value=tok.upper())
  113. # Test regular
  114. g = """start: a
  115. a : A
  116. A: "x"
  117. """
  118. p = Lark(g, parser='lalr')
  119. r = T(False).transform(p.parse("x"))
  120. self.assertEqual( r.children, ["x!"] )
  121. r = T().transform(p.parse("x"))
  122. self.assertEqual( r.children, ["X!"] )
  123. # Test internal transformer
  124. p = Lark(g, parser='lalr', transformer=T())
  125. r = p.parse("x")
  126. self.assertEqual( r.children, ["X!"] )
  127. def test_vargs_meta(self):
  128. @v_args(meta=True)
  129. class T1(Transformer):
  130. def a(self, children, meta):
  131. assert not children
  132. return meta.line
  133. def start(self, children, meta):
  134. return children
  135. @v_args(meta=True, inline=True)
  136. class T2(Transformer):
  137. def a(self, meta):
  138. return meta.line
  139. def start(self, meta, *res):
  140. return list(res)
  141. for T in (T1, T2):
  142. for internal in [False, True]:
  143. try:
  144. g = Lark(r"""start: a+
  145. a : "x" _NL?
  146. _NL: /\n/+
  147. """, parser='lalr', transformer=T() if internal else None, propagate_positions=True)
  148. except NotImplementedError:
  149. assert internal
  150. continue
  151. res = g.parse("xx\nx\nxxx\n\n\nxx")
  152. assert not internal
  153. res = T().transform(res)
  154. self.assertEqual(res, [1, 1, 2, 3, 3, 3, 6, 6])
  155. def test_vargs_tree(self):
  156. tree = Lark('''
  157. start: a a a
  158. !a: "A"
  159. ''').parse('AAA')
  160. tree_copy = deepcopy(tree)
  161. @v_args(tree=True)
  162. class T(Transformer):
  163. def a(self, tree):
  164. return 1
  165. def start(self, tree):
  166. return tree.children
  167. res = T().transform(tree)
  168. self.assertEqual(res, [1, 1, 1])
  169. self.assertEqual(tree, tree_copy)
  170. def test_embedded_transformer(self):
  171. class T(Transformer):
  172. def a(self, children):
  173. return "<a>"
  174. def b(self, children):
  175. return "<b>"
  176. def c(self, children):
  177. return "<c>"
  178. # Test regular
  179. g = Lark("""start: a
  180. a : "x"
  181. """, parser='lalr')
  182. r = T().transform(g.parse("x"))
  183. self.assertEqual( r.children, ["<a>"] )
  184. g = Lark("""start: a
  185. a : "x"
  186. """, parser='lalr', transformer=T())
  187. r = g.parse("x")
  188. self.assertEqual( r.children, ["<a>"] )
  189. # Test Expand1
  190. g = Lark("""start: a
  191. ?a : b
  192. b : "x"
  193. """, parser='lalr')
  194. r = T().transform(g.parse("x"))
  195. self.assertEqual( r.children, ["<b>"] )
  196. g = Lark("""start: a
  197. ?a : b
  198. b : "x"
  199. """, parser='lalr', transformer=T())
  200. r = g.parse("x")
  201. self.assertEqual( r.children, ["<b>"] )
  202. # Test Expand1 -> Alias
  203. g = Lark("""start: a
  204. ?a : b b -> c
  205. b : "x"
  206. """, parser='lalr')
  207. r = T().transform(g.parse("xx"))
  208. self.assertEqual( r.children, ["<c>"] )
  209. g = Lark("""start: a
  210. ?a : b b -> c
  211. b : "x"
  212. """, parser='lalr', transformer=T())
  213. r = g.parse("xx")
  214. self.assertEqual( r.children, ["<c>"] )
  215. def test_embedded_transformer_inplace(self):
  216. @v_args(tree=True)
  217. class T1(Transformer_InPlace):
  218. def a(self, tree):
  219. assert isinstance(tree, Tree), tree
  220. tree.children.append("tested")
  221. return tree
  222. def b(self, tree):
  223. return Tree(tree.data, tree.children + ['tested2'])
  224. @v_args(tree=True)
  225. class T2(Transformer):
  226. def a(self, tree):
  227. assert isinstance(tree, Tree), tree
  228. tree.children.append("tested")
  229. return tree
  230. def b(self, tree):
  231. return Tree(tree.data, tree.children + ['tested2'])
  232. class T3(Transformer):
  233. @v_args(tree=True)
  234. def a(self, tree):
  235. assert isinstance(tree, Tree)
  236. tree.children.append("tested")
  237. return tree
  238. @v_args(tree=True)
  239. def b(self, tree):
  240. return Tree(tree.data, tree.children + ['tested2'])
  241. for t in [T1(), T2(), T3()]:
  242. for internal in [False, True]:
  243. g = Lark("""start: a b
  244. a : "x"
  245. b : "y"
  246. """, parser='lalr', transformer=t if internal else None)
  247. r = g.parse("xy")
  248. if not internal:
  249. r = t.transform(r)
  250. a, b = r.children
  251. self.assertEqual(a.children, ["tested"])
  252. self.assertEqual(b.children, ["tested2"])
  253. def test_alias(self):
  254. Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """)
  255. def _make_full_earley_test(LEXER):
  256. def _Lark(grammar, **kwargs):
  257. return Lark(grammar, lexer=LEXER, parser='earley', propagate_positions=True, **kwargs)
  258. class _TestFullEarley(unittest.TestCase):
  259. def test_anon(self):
  260. # Fails an Earley implementation without special handling for empty rules,
  261. # or re-processing of already completed rules.
  262. g = Lark(r"""start: B
  263. B: ("ab"|/[^b]/)+
  264. """, lexer=LEXER)
  265. self.assertEqual( g.parse('abc').children[0], 'abc')
  266. def test_earley(self):
  267. g = Lark("""start: A "b" c
  268. A: "a"+
  269. c: "abc"
  270. """, parser="earley", lexer=LEXER)
  271. x = g.parse('aaaababc')
  272. def test_earley2(self):
  273. grammar = """
  274. start: statement+
  275. statement: "r"
  276. | "c" /[a-z]/+
  277. %ignore " "
  278. """
  279. program = """c b r"""
  280. l = Lark(grammar, parser='earley', lexer=LEXER)
  281. l.parse(program)
  282. @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser")
  283. def test_earley3(self):
  284. """Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)
  285. By default, `+` should immitate regexp greedy-matching
  286. """
  287. grammar = """
  288. start: A A
  289. A: "a"+
  290. """
  291. l = Lark(grammar, parser='earley', lexer=LEXER)
  292. res = l.parse("aaa")
  293. self.assertEqual(set(res.children), {'aa', 'a'})
  294. # XXX TODO fix Earley to maintain correct order
  295. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  296. # self.assertEqual(res.children, ['aa', 'a'])
  297. def test_earley4(self):
  298. grammar = """
  299. start: A A?
  300. A: "a"+
  301. """
  302. l = Lark(grammar, parser='earley', lexer=LEXER)
  303. res = l.parse("aaa")
  304. assert set(res.children) == {'aa', 'a'} or res.children == ['aaa']
  305. # XXX TODO fix Earley to maintain correct order
  306. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  307. # self.assertEqual(res.children, ['aaa'])
  308. def test_earley_repeating_empty(self):
  309. # This was a sneaky bug!
  310. grammar = """
  311. !start: "a" empty empty "b"
  312. empty: empty2
  313. empty2:
  314. """
  315. parser = Lark(grammar, parser='earley', lexer=LEXER)
  316. res = parser.parse('ab')
  317. empty_tree = Tree('empty', [Tree('empty2', [])])
  318. self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b'])
  319. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  320. def test_earley_explicit_ambiguity(self):
  321. # This was a sneaky bug!
  322. grammar = """
  323. start: a b | ab
  324. a: "a"
  325. b: "b"
  326. ab: "ab"
  327. """
  328. parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit')
  329. ambig_tree = parser.parse('ab')
  330. self.assertEqual( ambig_tree.data, '_ambig')
  331. self.assertEqual( len(ambig_tree.children), 2)
  332. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  333. def test_ambiguity1(self):
  334. grammar = """
  335. start: cd+ "e"
  336. !cd: "c"
  337. | "d"
  338. | "cd"
  339. """
  340. l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
  341. ambig_tree = l.parse('cde')
  342. assert ambig_tree.data == '_ambig', ambig_tree
  343. assert len(ambig_tree.children) == 2
  344. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  345. def test_ambiguity2(self):
  346. grammar = """
  347. ANY: /[a-zA-Z0-9 ]+/
  348. a.2: "A" b+
  349. b.2: "B"
  350. c: ANY
  351. start: (a|c)*
  352. """
  353. l = Lark(grammar, parser='earley', lexer=LEXER)
  354. res = l.parse('ABX')
  355. expected = Tree('start', [
  356. Tree('a', [
  357. Tree('b', [])
  358. ]),
  359. Tree('c', [
  360. 'X'
  361. ])
  362. ])
  363. self.assertEqual(res, expected)
  364. def test_fruitflies_ambig(self):
  365. grammar = """
  366. start: noun verb noun -> simple
  367. | noun verb "like" noun -> comparative
  368. noun: adj? NOUN
  369. verb: VERB
  370. adj: ADJ
  371. NOUN: "flies" | "bananas" | "fruit"
  372. VERB: "like" | "flies"
  373. ADJ: "fruit"
  374. %import common.WS
  375. %ignore WS
  376. """
  377. parser = Lark(grammar, ambiguity='explicit', lexer=LEXER)
  378. tree = parser.parse('fruit flies like bananas')
  379. expected = Tree('_ambig', [
  380. Tree('comparative', [
  381. Tree('noun', ['fruit']),
  382. Tree('verb', ['flies']),
  383. Tree('noun', ['bananas'])
  384. ]),
  385. Tree('simple', [
  386. Tree('noun', [Tree('adj', ['fruit']), 'flies']),
  387. Tree('verb', ['like']),
  388. Tree('noun', ['bananas'])
  389. ])
  390. ])
  391. # self.assertEqual(tree, expected)
  392. self.assertEqual(tree.data, expected.data)
  393. self.assertEqual(set(tree.children), set(expected.children))
  394. @unittest.skipIf(LEXER!='dynamic_complete', "Only relevant for the dynamic_complete parser")
  395. def test_explicit_ambiguity2(self):
  396. grammar = r"""
  397. start: NAME+
  398. NAME: /\w+/
  399. %ignore " "
  400. """
  401. text = """cat"""
  402. parser = _Lark(grammar, start='start', ambiguity='explicit')
  403. tree = parser.parse(text)
  404. self.assertEqual(tree.data, '_ambig')
  405. combinations = {tuple(str(s) for s in t.children) for t in tree.children}
  406. self.assertEqual(combinations, {
  407. ('cat',),
  408. ('ca', 't'),
  409. ('c', 'at'),
  410. ('c', 'a' ,'t')
  411. })
  412. def test_term_ambig_resolve(self):
  413. grammar = r"""
  414. !start: NAME+
  415. NAME: /\w+/
  416. %ignore " "
  417. """
  418. text = """foo bar"""
  419. parser = Lark(grammar)
  420. tree = parser.parse(text)
  421. self.assertEqual(tree.children, ['foo', 'bar'])
  422. # @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO
  423. # def test_not_all_derivations(self):
  424. # grammar = """
  425. # start: cd+ "e"
  426. # !cd: "c"
  427. # | "d"
  428. # | "cd"
  429. # """
  430. # l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False)
  431. # x = l.parse('cde')
  432. # assert x.data != '_ambig', x
  433. # assert len(x.children) == 1
  434. _NAME = "TestFullEarley" + LEXER.capitalize()
  435. _TestFullEarley.__name__ = _NAME
  436. globals()[_NAME] = _TestFullEarley
  437. class CustomLexer(Lexer):
  438. """
  439. Purpose of this custom lexer is to test the integration,
  440. so it uses the traditionalparser as implementation without custom lexing behaviour.
  441. """
  442. def __init__(self, lexer_conf):
  443. self.lexer = TraditionalLexer(copy(lexer_conf))
  444. def lex(self, *args, **kwargs):
  445. return self.lexer.lex(*args, **kwargs)
  446. def _make_parser_test(LEXER, PARSER):
  447. lexer_class_or_name = CustomLexer if LEXER == 'custom' else LEXER
  448. def _Lark(grammar, **kwargs):
  449. return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  450. def _Lark_open(gfilename, **kwargs):
  451. return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  452. class _TestParser(unittest.TestCase):
  453. def test_basic1(self):
  454. g = _Lark("""start: a+ b a* "b" a*
  455. b: "b"
  456. a: "a"
  457. """)
  458. r = g.parse('aaabaab')
  459. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' )
  460. r = g.parse('aaabaaba')
  461. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaaa' )
  462. self.assertRaises(ParseError, g.parse, 'aaabaa')
  463. def test_basic2(self):
  464. # Multiple parsers and colliding tokens
  465. g = _Lark("""start: B A
  466. B: "12"
  467. A: "1" """)
  468. g2 = _Lark("""start: B A
  469. B: "12"
  470. A: "2" """)
  471. x = g.parse('121')
  472. assert x.data == 'start' and x.children == ['12', '1'], x
  473. x = g2.parse('122')
  474. assert x.data == 'start' and x.children == ['12', '2'], x
  475. @unittest.skipIf(cStringIO is None, "cStringIO not available")
  476. def test_stringio_bytes(self):
  477. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  478. _Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  479. def test_stringio_unicode(self):
  480. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  481. _Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  482. def test_unicode(self):
  483. g = _Lark(u"""start: UNIA UNIB UNIA
  484. UNIA: /\xa3/
  485. UNIB: /\u0101/
  486. """)
  487. g.parse(u'\xa3\u0101\u00a3')
  488. def test_unicode2(self):
  489. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  490. UNIA: /\xa3/
  491. UNIB: "a\u0101b\ "
  492. UNIC: /a?\u0101c\n/
  493. """)
  494. g.parse(u'\xa3a\u0101b\\ \u00a3\u0101c\n')
  495. def test_unicode3(self):
  496. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  497. UNIA: /\xa3/
  498. UNIB: "\u0101"
  499. UNIC: /\u0203/ /\n/
  500. """)
  501. g.parse(u'\xa3\u0101\u00a3\u0203\n')
  502. def test_hex_escape(self):
  503. g = _Lark(r"""start: A B C
  504. A: "\x01"
  505. B: /\x02/
  506. C: "\xABCD"
  507. """)
  508. g.parse('\x01\x02\xABCD')
  509. def test_unicode_literal_range_escape(self):
  510. g = _Lark(r"""start: A+
  511. A: "\u0061".."\u0063"
  512. """)
  513. g.parse('abc')
  514. def test_hex_literal_range_escape(self):
  515. g = _Lark(r"""start: A+
  516. A: "\x01".."\x03"
  517. """)
  518. g.parse('\x01\x02\x03')
  519. @unittest.skipIf(PARSER == 'cyk', "Takes forever")
  520. def test_stack_for_ebnf(self):
  521. """Verify that stack depth isn't an issue for EBNF grammars"""
  522. g = _Lark(r"""start: a+
  523. a : "a" """)
  524. g.parse("a" * (sys.getrecursionlimit()*2 ))
  525. def test_expand1_lists_with_one_item(self):
  526. g = _Lark(r"""start: list
  527. ?list: item+
  528. item : A
  529. A: "a"
  530. """)
  531. r = g.parse("a")
  532. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  533. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  534. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  535. self.assertEqual(len(r.children), 1)
  536. def test_expand1_lists_with_one_item_2(self):
  537. g = _Lark(r"""start: list
  538. ?list: item+ "!"
  539. item : A
  540. A: "a"
  541. """)
  542. r = g.parse("a!")
  543. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  544. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  545. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  546. self.assertEqual(len(r.children), 1)
  547. def test_dont_expand1_lists_with_multiple_items(self):
  548. g = _Lark(r"""start: list
  549. ?list: item+
  550. item : A
  551. A: "a"
  552. """)
  553. r = g.parse("aa")
  554. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  555. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  556. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  557. self.assertEqual(len(r.children), 1)
  558. # Sanity check: verify that 'list' contains the two 'item's we've given it
  559. [list] = r.children
  560. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  561. def test_dont_expand1_lists_with_multiple_items_2(self):
  562. g = _Lark(r"""start: list
  563. ?list: item+ "!"
  564. item : A
  565. A: "a"
  566. """)
  567. r = g.parse("aa!")
  568. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  569. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  570. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  571. self.assertEqual(len(r.children), 1)
  572. # Sanity check: verify that 'list' contains the two 'item's we've given it
  573. [list] = r.children
  574. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  575. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  576. def test_empty_expand1_list(self):
  577. g = _Lark(r"""start: list
  578. ?list: item*
  579. item : A
  580. A: "a"
  581. """)
  582. r = g.parse("")
  583. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  584. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  585. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  586. self.assertEqual(len(r.children), 1)
  587. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  588. [list] = r.children
  589. self.assertSequenceEqual([item.data for item in list.children], ())
  590. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  591. def test_empty_expand1_list_2(self):
  592. g = _Lark(r"""start: list
  593. ?list: item* "!"?
  594. item : A
  595. A: "a"
  596. """)
  597. r = g.parse("")
  598. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  599. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  600. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  601. self.assertEqual(len(r.children), 1)
  602. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  603. [list] = r.children
  604. self.assertSequenceEqual([item.data for item in list.children], ())
  605. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  606. def test_empty_flatten_list(self):
  607. g = _Lark(r"""start: list
  608. list: | item "," list
  609. item : A
  610. A: "a"
  611. """)
  612. r = g.parse("")
  613. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  614. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  615. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  616. [list] = r.children
  617. self.assertSequenceEqual([item.data for item in list.children], ())
  618. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  619. def test_single_item_flatten_list(self):
  620. g = _Lark(r"""start: list
  621. list: | item "," list
  622. item : A
  623. A: "a"
  624. """)
  625. r = g.parse("a,")
  626. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  627. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  628. # Sanity check: verify that 'list' contains exactly the one 'item' we've given it
  629. [list] = r.children
  630. self.assertSequenceEqual([item.data for item in list.children], ('item',))
  631. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  632. def test_multiple_item_flatten_list(self):
  633. g = _Lark(r"""start: list
  634. #list: | item "," list
  635. item : A
  636. A: "a"
  637. """)
  638. r = g.parse("a,a,")
  639. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  640. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  641. # Sanity check: verify that 'list' contains exactly the two 'item's we've given it
  642. [list] = r.children
  643. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  644. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  645. def test_recurse_flatten(self):
  646. """Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""
  647. g = _Lark(r"""start: a | start a
  648. a : A
  649. A : "a" """)
  650. # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
  651. # STree data structures, which uses recursion).
  652. g.parse("a" * (sys.getrecursionlimit() // 4))
  653. def test_token_collision(self):
  654. g = _Lark(r"""start: "Hello" NAME
  655. NAME: /\w/+
  656. %ignore " "
  657. """)
  658. x = g.parse('Hello World')
  659. self.assertSequenceEqual(x.children, ['World'])
  660. x = g.parse('Hello HelloWorld')
  661. self.assertSequenceEqual(x.children, ['HelloWorld'])
  662. def test_token_collision_WS(self):
  663. g = _Lark(r"""start: "Hello" NAME
  664. NAME: /\w/+
  665. %import common.WS
  666. %ignore WS
  667. """)
  668. x = g.parse('Hello World')
  669. self.assertSequenceEqual(x.children, ['World'])
  670. x = g.parse('Hello HelloWorld')
  671. self.assertSequenceEqual(x.children, ['HelloWorld'])
  672. def test_token_collision2(self):
  673. g = _Lark("""
  674. !start: "starts"
  675. %import common.LCASE_LETTER
  676. """)
  677. x = g.parse("starts")
  678. self.assertSequenceEqual(x.children, ['starts'])
  679. def test_templates(self):
  680. g = _Lark(r"""
  681. start: "[" sep{NUMBER, ","} "]"
  682. sep{item, delim}: item (delim item)*
  683. NUMBER: /\d+/
  684. %ignore " "
  685. """)
  686. x = g.parse("[1, 2, 3, 4]")
  687. self.assertSequenceEqual(x.children, [Tree('sep', ['1', '2', '3', '4'])])
  688. x = g.parse("[1]")
  689. self.assertSequenceEqual(x.children, [Tree('sep', ['1'])])
  690. def test_templates_recursion(self):
  691. g = _Lark(r"""
  692. start: "[" _sep{NUMBER, ","} "]"
  693. _sep{item, delim}: item | _sep{item, delim} delim item
  694. NUMBER: /\d+/
  695. %ignore " "
  696. """)
  697. x = g.parse("[1, 2, 3, 4]")
  698. self.assertSequenceEqual(x.children, ['1', '2', '3', '4'])
  699. x = g.parse("[1]")
  700. self.assertSequenceEqual(x.children, ['1'])
  701. def test_templates_import(self):
  702. g = _Lark_open("test_templates_import.lark", rel_to=__file__)
  703. x = g.parse("[1, 2, 3, 4]")
  704. self.assertSequenceEqual(x.children, [Tree('sep', ['1', '2', '3', '4'])])
  705. x = g.parse("[1]")
  706. self.assertSequenceEqual(x.children, [Tree('sep', ['1'])])
  707. def test_templates_alias(self):
  708. g = _Lark(r"""
  709. start: expr{"C"}
  710. expr{t}: "A" t
  711. | "B" t -> b
  712. """)
  713. x = g.parse("AC")
  714. self.assertSequenceEqual(x.children, [Tree('expr', [])])
  715. x = g.parse("BC")
  716. self.assertSequenceEqual(x.children, [Tree('b', [])])
  717. def test_templates_modifiers(self):
  718. g = _Lark(r"""
  719. start: expr{"B"}
  720. !expr{t}: "A" t
  721. """)
  722. x = g.parse("AB")
  723. self.assertSequenceEqual(x.children, [Tree('expr', ["A", "B"])])
  724. g = _Lark(r"""
  725. start: _expr{"B"}
  726. !_expr{t}: "A" t
  727. """)
  728. x = g.parse("AB")
  729. self.assertSequenceEqual(x.children, ["A", "B"])
  730. g = _Lark(r"""
  731. start: expr{b}
  732. b: "B"
  733. ?expr{t}: "A" t
  734. """)
  735. x = g.parse("AB")
  736. self.assertSequenceEqual(x.children, [Tree('b',[])])
  737. def test_templates_templates(self):
  738. g = _Lark('''start: a{b}
  739. a{t}: t{"a"}
  740. b{x}: x''')
  741. x = g.parse('a')
  742. self.assertSequenceEqual(x.children, [Tree('a', [Tree('b',[])])])
  743. def test_g_regex_flags(self):
  744. g = _Lark("""
  745. start: "a" /b+/ C
  746. C: "C" | D
  747. D: "D" E
  748. E: "e"
  749. """, g_regex_flags=re.I)
  750. x1 = g.parse("ABBc")
  751. x2 = g.parse("abdE")
  752. # def test_string_priority(self):
  753. # g = _Lark("""start: (A | /a?bb/)+
  754. # A: "a" """)
  755. # x = g.parse('abb')
  756. # self.assertEqual(len(x.children), 2)
  757. # # This parse raises an exception because the lexer will always try to consume
  758. # # "a" first and will never match the regular expression
  759. # # This behavior is subject to change!!
  760. # # Thie won't happen with ambiguity handling.
  761. # g = _Lark("""start: (A | /a?ab/)+
  762. # A: "a" """)
  763. # self.assertRaises(LexError, g.parse, 'aab')
  764. def test_undefined_rule(self):
  765. self.assertRaises(GrammarError, _Lark, """start: a""")
  766. def test_undefined_token(self):
  767. self.assertRaises(GrammarError, _Lark, """start: A""")
  768. def test_rule_collision(self):
  769. g = _Lark("""start: "a"+ "b"
  770. | "a"+ """)
  771. x = g.parse('aaaa')
  772. x = g.parse('aaaab')
  773. def test_rule_collision2(self):
  774. g = _Lark("""start: "a"* "b"
  775. | "a"+ """)
  776. x = g.parse('aaaa')
  777. x = g.parse('aaaab')
  778. x = g.parse('b')
  779. def test_token_not_anon(self):
  780. """Tests that "a" is matched as an anonymous token, and not A.
  781. """
  782. g = _Lark("""start: "a"
  783. A: "a" """)
  784. x = g.parse('a')
  785. self.assertEqual(len(x.children), 0, '"a" should be considered anonymous')
  786. g = _Lark("""start: "a" A
  787. A: "a" """)
  788. x = g.parse('aa')
  789. self.assertEqual(len(x.children), 1, 'only "a" should be considered anonymous')
  790. self.assertEqual(x.children[0].type, "A")
  791. g = _Lark("""start: /a/
  792. A: /a/ """)
  793. x = g.parse('a')
  794. self.assertEqual(len(x.children), 1)
  795. self.assertEqual(x.children[0].type, "A", "A isn't associated with /a/")
  796. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  797. def test_maybe(self):
  798. g = _Lark("""start: ["a"] """)
  799. x = g.parse('a')
  800. x = g.parse('')
  801. def test_start(self):
  802. g = _Lark("""a: "a" a? """, start='a')
  803. x = g.parse('a')
  804. x = g.parse('aa')
  805. x = g.parse('aaa')
  806. def test_alias(self):
  807. g = _Lark("""start: "a" -> b """)
  808. x = g.parse('a')
  809. self.assertEqual(x.data, "b")
  810. def test_token_ebnf(self):
  811. g = _Lark("""start: A
  812. A: "a"* ("b"? "c".."e")+
  813. """)
  814. x = g.parse('abcde')
  815. x = g.parse('dd')
  816. def test_backslash(self):
  817. g = _Lark(r"""start: "\\" "a"
  818. """)
  819. x = g.parse(r'\a')
  820. g = _Lark(r"""start: /\\/ /a/
  821. """)
  822. x = g.parse(r'\a')
  823. def test_backslash2(self):
  824. g = _Lark(r"""start: "\"" "-"
  825. """)
  826. x = g.parse('"-')
  827. g = _Lark(r"""start: /\// /-/
  828. """)
  829. x = g.parse('/-')
  830. def test_special_chars(self):
  831. g = _Lark(r"""start: "\n"
  832. """)
  833. x = g.parse('\n')
  834. g = _Lark(r"""start: /\n/
  835. """)
  836. x = g.parse('\n')
  837. # def test_token_recurse(self):
  838. # g = _Lark("""start: A
  839. # A: B
  840. # B: A
  841. # """)
  842. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  843. def test_empty(self):
  844. # Fails an Earley implementation without special handling for empty rules,
  845. # or re-processing of already completed rules.
  846. g = _Lark(r"""start: _empty a "B"
  847. a: _empty "A"
  848. _empty:
  849. """)
  850. x = g.parse('AB')
  851. def test_regex_quote(self):
  852. g = r"""
  853. start: SINGLE_QUOTED_STRING | DOUBLE_QUOTED_STRING
  854. SINGLE_QUOTED_STRING : /'[^']*'/
  855. DOUBLE_QUOTED_STRING : /"[^"]*"/
  856. """
  857. g = _Lark(g)
  858. self.assertEqual( g.parse('"hello"').children, ['"hello"'])
  859. self.assertEqual( g.parse("'hello'").children, ["'hello'"])
  860. def test_lexer_token_limit(self):
  861. "Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"
  862. tokens = {'A%d'%i:'"%d"'%i for i in range(300)}
  863. g = _Lark("""start: %s
  864. %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))
  865. def test_float_without_lexer(self):
  866. expected_error = UnexpectedCharacters if LEXER.startswith('dynamic') else UnexpectedToken
  867. if PARSER == 'cyk':
  868. expected_error = ParseError
  869. g = _Lark("""start: ["+"|"-"] float
  870. float: digit* "." digit+ exp?
  871. | digit+ exp
  872. exp: ("e"|"E") ["+"|"-"] digit+
  873. digit: "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"
  874. """)
  875. g.parse("1.2")
  876. g.parse("-.2e9")
  877. g.parse("+2e-9")
  878. self.assertRaises( expected_error, g.parse, "+2e-9e")
  879. def test_keep_all_tokens(self):
  880. l = _Lark("""start: "a"+ """, keep_all_tokens=True)
  881. tree = l.parse('aaa')
  882. self.assertEqual(tree.children, ['a', 'a', 'a'])
  883. def test_token_flags(self):
  884. l = _Lark("""!start: "a"i+
  885. """
  886. )
  887. tree = l.parse('aA')
  888. self.assertEqual(tree.children, ['a', 'A'])
  889. l = _Lark("""!start: /a/i+
  890. """
  891. )
  892. tree = l.parse('aA')
  893. self.assertEqual(tree.children, ['a', 'A'])
  894. # g = """!start: "a"i "a"
  895. # """
  896. # self.assertRaises(GrammarError, _Lark, g)
  897. # g = """!start: /a/i /a/
  898. # """
  899. # self.assertRaises(GrammarError, _Lark, g)
  900. g = """start: NAME "," "a"
  901. NAME: /[a-z_]/i /[a-z0-9_]/i*
  902. """
  903. l = _Lark(g)
  904. tree = l.parse('ab,a')
  905. self.assertEqual(tree.children, ['ab'])
  906. tree = l.parse('AB,a')
  907. self.assertEqual(tree.children, ['AB'])
  908. def test_token_flags3(self):
  909. l = _Lark("""!start: ABC+
  910. ABC: "abc"i
  911. """
  912. )
  913. tree = l.parse('aBcAbC')
  914. self.assertEqual(tree.children, ['aBc', 'AbC'])
  915. def test_token_flags2(self):
  916. g = """!start: ("a"i | /a/ /b/?)+
  917. """
  918. l = _Lark(g)
  919. tree = l.parse('aA')
  920. self.assertEqual(tree.children, ['a', 'A'])
  921. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  922. def test_twice_empty(self):
  923. g = """!start: ("A"?)?
  924. """
  925. l = _Lark(g)
  926. tree = l.parse('A')
  927. self.assertEqual(tree.children, ['A'])
  928. tree = l.parse('')
  929. self.assertEqual(tree.children, [])
  930. def test_undefined_ignore(self):
  931. g = """!start: "A"
  932. %ignore B
  933. """
  934. self.assertRaises( GrammarError, _Lark, g)
  935. def test_alias_in_terminal(self):
  936. g = """start: TERM
  937. TERM: "a" -> alias
  938. """
  939. self.assertRaises( GrammarError, _Lark, g)
  940. def test_line_and_column(self):
  941. g = r"""!start: "A" bc "D"
  942. !bc: "B\nC"
  943. """
  944. l = _Lark(g)
  945. a, bc, d = l.parse("AB\nCD").children
  946. self.assertEqual(a.line, 1)
  947. self.assertEqual(a.column, 1)
  948. bc ,= bc.children
  949. self.assertEqual(bc.line, 1)
  950. self.assertEqual(bc.column, 2)
  951. self.assertEqual(d.line, 2)
  952. self.assertEqual(d.column, 2)
  953. if LEXER != 'dynamic':
  954. self.assertEqual(a.end_line, 1)
  955. self.assertEqual(a.end_column, 2)
  956. self.assertEqual(bc.end_line, 2)
  957. self.assertEqual(bc.end_column, 2)
  958. self.assertEqual(d.end_line, 2)
  959. self.assertEqual(d.end_column, 3)
  960. def test_reduce_cycle(self):
  961. """Tests an edge-condition in the LALR parser, in which a transition state looks exactly like the end state.
  962. It seems that the correct solution is to explicitely distinguish finalization in the reduce() function.
  963. """
  964. l = _Lark("""
  965. term: A
  966. | term term
  967. A: "a"
  968. """, start='term')
  969. tree = l.parse("aa")
  970. self.assertEqual(len(tree.children), 2)
  971. @unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority")
  972. def test_lexer_prioritization(self):
  973. "Tests effect of priority on result"
  974. grammar = """
  975. start: A B | AB
  976. A.2: "a"
  977. B: "b"
  978. AB: "ab"
  979. """
  980. l = _Lark(grammar)
  981. res = l.parse("ab")
  982. self.assertEqual(res.children, ['a', 'b'])
  983. self.assertNotEqual(res.children, ['ab'])
  984. grammar = """
  985. start: A B | AB
  986. A: "a"
  987. B: "b"
  988. AB.3: "ab"
  989. """
  990. l = _Lark(grammar)
  991. res = l.parse("ab")
  992. self.assertNotEqual(res.children, ['a', 'b'])
  993. self.assertEqual(res.children, ['ab'])
  994. grammar = """
  995. start: A B | AB
  996. A: "a"
  997. B.-20: "b"
  998. AB.-10: "ab"
  999. """
  1000. l = _Lark(grammar)
  1001. res = l.parse("ab")
  1002. self.assertEqual(res.children, ['a', 'b'])
  1003. grammar = """
  1004. start: A B | AB
  1005. A.-99999999999999999999999: "a"
  1006. B: "b"
  1007. AB: "ab"
  1008. """
  1009. l = _Lark(grammar)
  1010. res = l.parse("ab")
  1011. self.assertEqual(res.children, ['ab'])
  1012. def test_import(self):
  1013. grammar = """
  1014. start: NUMBER WORD
  1015. %import common.NUMBER
  1016. %import common.WORD
  1017. %import common.WS
  1018. %ignore WS
  1019. """
  1020. l = _Lark(grammar)
  1021. x = l.parse('12 elephants')
  1022. self.assertEqual(x.children, ['12', 'elephants'])
  1023. def test_import_rename(self):
  1024. grammar = """
  1025. start: N W
  1026. %import common.NUMBER -> N
  1027. %import common.WORD -> W
  1028. %import common.WS
  1029. %ignore WS
  1030. """
  1031. l = _Lark(grammar)
  1032. x = l.parse('12 elephants')
  1033. self.assertEqual(x.children, ['12', 'elephants'])
  1034. def test_relative_import(self):
  1035. l = _Lark_open('test_relative_import.lark', rel_to=__file__)
  1036. x = l.parse('12 lions')
  1037. self.assertEqual(x.children, ['12', 'lions'])
  1038. def test_relative_import_unicode(self):
  1039. l = _Lark_open('test_relative_import_unicode.lark', rel_to=__file__)
  1040. x = l.parse(u'Ø')
  1041. self.assertEqual(x.children, [u'Ø'])
  1042. def test_relative_import_rename(self):
  1043. l = _Lark_open('test_relative_import_rename.lark', rel_to=__file__)
  1044. x = l.parse('12 lions')
  1045. self.assertEqual(x.children, ['12', 'lions'])
  1046. def test_relative_rule_import(self):
  1047. l = _Lark_open('test_relative_rule_import.lark', rel_to=__file__)
  1048. x = l.parse('xaabby')
  1049. self.assertEqual(x.children, [
  1050. 'x',
  1051. Tree('expr', ['a', Tree('expr', ['a', 'b']), 'b']),
  1052. 'y'])
  1053. def test_relative_rule_import_drop_ignore(self):
  1054. # %ignore rules are dropped on import
  1055. l = _Lark_open('test_relative_rule_import_drop_ignore.lark',
  1056. rel_to=__file__)
  1057. self.assertRaises((ParseError, UnexpectedInput),
  1058. l.parse, 'xa abby')
  1059. def test_relative_rule_import_subrule(self):
  1060. l = _Lark_open('test_relative_rule_import_subrule.lark',
  1061. rel_to=__file__)
  1062. x = l.parse('xaabby')
  1063. self.assertEqual(x.children, [
  1064. 'x',
  1065. Tree('startab', [
  1066. Tree('grammars__ab__expr', [
  1067. 'a', Tree('grammars__ab__expr', ['a', 'b']), 'b',
  1068. ]),
  1069. ]),
  1070. 'y'])
  1071. def test_relative_rule_import_subrule_no_conflict(self):
  1072. l = _Lark_open(
  1073. 'test_relative_rule_import_subrule_no_conflict.lark',
  1074. rel_to=__file__)
  1075. x = l.parse('xaby')
  1076. self.assertEqual(x.children, [Tree('expr', [
  1077. 'x',
  1078. Tree('startab', [
  1079. Tree('grammars__ab__expr', ['a', 'b']),
  1080. ]),
  1081. 'y'])])
  1082. self.assertRaises((ParseError, UnexpectedInput),
  1083. l.parse, 'xaxabyby')
  1084. def test_relative_rule_import_rename(self):
  1085. l = _Lark_open('test_relative_rule_import_rename.lark',
  1086. rel_to=__file__)
  1087. x = l.parse('xaabby')
  1088. self.assertEqual(x.children, [
  1089. 'x',
  1090. Tree('ab', ['a', Tree('ab', ['a', 'b']), 'b']),
  1091. 'y'])
  1092. def test_multi_import(self):
  1093. grammar = """
  1094. start: NUMBER WORD
  1095. %import common (NUMBER, WORD, WS)
  1096. %ignore WS
  1097. """
  1098. l = _Lark(grammar)
  1099. x = l.parse('12 toucans')
  1100. self.assertEqual(x.children, ['12', 'toucans'])
  1101. def test_relative_multi_import(self):
  1102. l = _Lark_open("test_relative_multi_import.lark", rel_to=__file__)
  1103. x = l.parse('12 capybaras')
  1104. self.assertEqual(x.children, ['12', 'capybaras'])
  1105. def test_relative_import_preserves_leading_underscore(self):
  1106. l = _Lark_open("test_relative_import_preserves_leading_underscore.lark", rel_to=__file__)
  1107. x = l.parse('Ax')
  1108. self.assertEqual(next(x.find_data('c')).children, ['A'])
  1109. def test_relative_import_of_nested_grammar(self):
  1110. l = _Lark_open("grammars/test_relative_import_of_nested_grammar.lark", rel_to=__file__)
  1111. x = l.parse('N')
  1112. self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])
  1113. def test_relative_import_rules_dependencies_imported_only_once(self):
  1114. l = _Lark_open("test_relative_import_rules_dependencies_imported_only_once.lark", rel_to=__file__)
  1115. x = l.parse('AAA')
  1116. self.assertEqual(next(x.find_data('a')).children, ['A'])
  1117. self.assertEqual(next(x.find_data('b')).children, ['A'])
  1118. self.assertEqual(next(x.find_data('d')).children, ['A'])
  1119. def test_import_errors(self):
  1120. grammar = """
  1121. start: NUMBER WORD
  1122. %import .grammars.bad_test.NUMBER
  1123. """
  1124. self.assertRaises(IOError, _Lark, grammar)
  1125. grammar = """
  1126. start: NUMBER WORD
  1127. %import bad_test.NUMBER
  1128. """
  1129. self.assertRaises(IOError, _Lark, grammar)
  1130. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  1131. def test_earley_prioritization(self):
  1132. "Tests effect of priority on result"
  1133. grammar = """
  1134. start: a | b
  1135. a.1: "a"
  1136. b.2: "a"
  1137. """
  1138. # l = Lark(grammar, parser='earley', lexer='standard')
  1139. l = _Lark(grammar)
  1140. res = l.parse("a")
  1141. self.assertEqual(res.children[0].data, 'b')
  1142. grammar = """
  1143. start: a | b
  1144. a.2: "a"
  1145. b.1: "a"
  1146. """
  1147. l = _Lark(grammar)
  1148. # l = Lark(grammar, parser='earley', lexer='standard')
  1149. res = l.parse("a")
  1150. self.assertEqual(res.children[0].data, 'a')
  1151. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  1152. def test_earley_prioritization_sum(self):
  1153. "Tests effect of priority on result"
  1154. grammar = """
  1155. start: ab_ b_ a_ | indirection
  1156. indirection: a_ bb_ a_
  1157. a_: "a"
  1158. b_: "b"
  1159. ab_: "ab"
  1160. bb_.1: "bb"
  1161. """
  1162. l = Lark(grammar, priority="invert")
  1163. res = l.parse('abba')
  1164. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  1165. grammar = """
  1166. start: ab_ b_ a_ | indirection
  1167. indirection: a_ bb_ a_
  1168. a_: "a"
  1169. b_: "b"
  1170. ab_.1: "ab"
  1171. bb_: "bb"
  1172. """
  1173. l = Lark(grammar, priority="invert")
  1174. res = l.parse('abba')
  1175. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1176. grammar = """
  1177. start: ab_ b_ a_ | indirection
  1178. indirection: a_ bb_ a_
  1179. a_.2: "a"
  1180. b_.1: "b"
  1181. ab_.3: "ab"
  1182. bb_.3: "bb"
  1183. """
  1184. l = Lark(grammar, priority="invert")
  1185. res = l.parse('abba')
  1186. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  1187. grammar = """
  1188. start: ab_ b_ a_ | indirection
  1189. indirection: a_ bb_ a_
  1190. a_.1: "a"
  1191. b_.1: "b"
  1192. ab_.4: "ab"
  1193. bb_.3: "bb"
  1194. """
  1195. l = Lark(grammar, priority="invert")
  1196. res = l.parse('abba')
  1197. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1198. def test_utf8(self):
  1199. g = u"""start: a
  1200. a: "±a"
  1201. """
  1202. l = _Lark(g)
  1203. self.assertEqual(l.parse(u'±a'), Tree('start', [Tree('a', [])]))
  1204. g = u"""start: A
  1205. A: "±a"
  1206. """
  1207. l = _Lark(g)
  1208. self.assertEqual(l.parse(u'±a'), Tree('start', [u'\xb1a']))
  1209. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  1210. def test_ignore(self):
  1211. grammar = r"""
  1212. COMMENT: /(!|(\/\/))[^\n]*/
  1213. %ignore COMMENT
  1214. %import common.WS -> _WS
  1215. %import common.INT
  1216. start: "INT"i _WS+ INT _WS*
  1217. """
  1218. parser = _Lark(grammar)
  1219. tree = parser.parse("int 1 ! This is a comment\n")
  1220. self.assertEqual(tree.children, ['1'])
  1221. tree = parser.parse("int 1 ! This is a comment") # A trailing ignore token can be tricky!
  1222. self.assertEqual(tree.children, ['1'])
  1223. parser = _Lark(r"""
  1224. start : "a"*
  1225. %ignore "b"
  1226. """)
  1227. tree = parser.parse("bb")
  1228. self.assertEqual(tree.children, [])
  1229. def test_regex_escaping(self):
  1230. g = _Lark("start: /[ab]/")
  1231. g.parse('a')
  1232. g.parse('b')
  1233. self.assertRaises( UnexpectedInput, g.parse, 'c')
  1234. _Lark(r'start: /\w/').parse('a')
  1235. g = _Lark(r'start: /\\w/')
  1236. self.assertRaises( UnexpectedInput, g.parse, 'a')
  1237. g.parse(r'\w')
  1238. _Lark(r'start: /\[/').parse('[')
  1239. _Lark(r'start: /\//').parse('/')
  1240. _Lark(r'start: /\\/').parse('\\')
  1241. _Lark(r'start: /\[ab]/').parse('[ab]')
  1242. _Lark(r'start: /\\[ab]/').parse('\\a')
  1243. _Lark(r'start: /\t/').parse('\t')
  1244. _Lark(r'start: /\\t/').parse('\\t')
  1245. _Lark(r'start: /\\\t/').parse('\\\t')
  1246. _Lark(r'start: "\t"').parse('\t')
  1247. _Lark(r'start: "\\t"').parse('\\t')
  1248. _Lark(r'start: "\\\t"').parse('\\\t')
  1249. def test_ranged_repeat_rules(self):
  1250. g = u"""!start: "A"~3
  1251. """
  1252. l = _Lark(g)
  1253. self.assertEqual(l.parse(u'AAA'), Tree('start', ["A", "A", "A"]))
  1254. self.assertRaises(ParseError, l.parse, u'AA')
  1255. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1256. g = u"""!start: "A"~0..2
  1257. """
  1258. if PARSER != 'cyk': # XXX CYK currently doesn't support empty grammars
  1259. l = _Lark(g)
  1260. self.assertEqual(l.parse(u''), Tree('start', []))
  1261. self.assertEqual(l.parse(u'A'), Tree('start', ['A']))
  1262. self.assertEqual(l.parse(u'AA'), Tree('start', ['A', 'A']))
  1263. self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'AAA')
  1264. g = u"""!start: "A"~3..2
  1265. """
  1266. self.assertRaises(GrammarError, _Lark, g)
  1267. g = u"""!start: "A"~2..3 "B"~2
  1268. """
  1269. l = _Lark(g)
  1270. self.assertEqual(l.parse(u'AABB'), Tree('start', ['A', 'A', 'B', 'B']))
  1271. self.assertEqual(l.parse(u'AAABB'), Tree('start', ['A', 'A', 'A', 'B', 'B']))
  1272. self.assertRaises(ParseError, l.parse, u'AAAB')
  1273. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1274. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1275. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1276. def test_ranged_repeat_terms(self):
  1277. g = u"""!start: AAA
  1278. AAA: "A"~3
  1279. """
  1280. l = _Lark(g)
  1281. self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"]))
  1282. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA')
  1283. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1284. g = u"""!start: AABB CC
  1285. AABB: "A"~0..2 "B"~2
  1286. CC: "C"~1..2
  1287. """
  1288. l = _Lark(g)
  1289. self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC']))
  1290. self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C']))
  1291. self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC']))
  1292. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB')
  1293. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1294. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1295. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1296. @unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX
  1297. def test_priority_vs_embedded(self):
  1298. g = """
  1299. A.2: "a"
  1300. WORD: ("a".."z")+
  1301. start: (A | WORD)+
  1302. """
  1303. l = _Lark(g)
  1304. t = l.parse('abc')
  1305. self.assertEqual(t.children, ['a', 'bc'])
  1306. self.assertEqual(t.children[0].type, 'A')
  1307. def test_line_counting(self):
  1308. p = _Lark("start: /[^x]+/")
  1309. text = 'hello\nworld'
  1310. t = p.parse(text)
  1311. tok = t.children[0]
  1312. self.assertEqual(tok, text)
  1313. self.assertEqual(tok.line, 1)
  1314. self.assertEqual(tok.column, 1)
  1315. if _LEXER != 'dynamic':
  1316. self.assertEqual(tok.end_line, 2)
  1317. self.assertEqual(tok.end_column, 6)
  1318. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1319. def test_empty_end(self):
  1320. p = _Lark("""
  1321. start: b c d
  1322. b: "B"
  1323. c: | "C"
  1324. d: | "D"
  1325. """)
  1326. res = p.parse('B')
  1327. self.assertEqual(len(res.children), 3)
  1328. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1329. def test_maybe_placeholders(self):
  1330. # Anonymous tokens shouldn't count
  1331. p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1332. self.assertEqual(p.parse("").children, [])
  1333. # All invisible constructs shouldn't count
  1334. p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c]
  1335. A: "a"
  1336. _c: "c" """, maybe_placeholders=True)
  1337. self.assertEqual(p.parse("").children, [None])
  1338. self.assertEqual(p.parse("c").children, [None])
  1339. self.assertEqual(p.parse("aefc").children, ['a'])
  1340. # ? shouldn't apply
  1341. p = _Lark("""!start: ["a"] "b"? ["c"] """, maybe_placeholders=True)
  1342. self.assertEqual(p.parse("").children, [None, None])
  1343. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1344. p = _Lark("""!start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1345. self.assertEqual(p.parse("").children, [None, None, None])
  1346. self.assertEqual(p.parse("a").children, ['a', None, None])
  1347. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1348. self.assertEqual(p.parse("c").children, [None, None, 'c'])
  1349. self.assertEqual(p.parse("ab").children, ['a', 'b', None])
  1350. self.assertEqual(p.parse("ac").children, ['a', None, 'c'])
  1351. self.assertEqual(p.parse("bc").children, [None, 'b', 'c'])
  1352. self.assertEqual(p.parse("abc").children, ['a', 'b', 'c'])
  1353. p = _Lark("""!start: (["a"] "b" ["c"])+ """, maybe_placeholders=True)
  1354. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1355. self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None])
  1356. self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c'])
  1357. self.assertEqual(p.parse("babbcabcb").children,
  1358. [None, 'b', None,
  1359. 'a', 'b', None,
  1360. None, 'b', 'c',
  1361. 'a', 'b', 'c',
  1362. None, 'b', None])
  1363. p = _Lark("""!start: ["a"] ["c"] "b"+ ["a"] ["d"] """, maybe_placeholders=True)
  1364. self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None])
  1365. self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd'])
  1366. self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None])
  1367. self.assertEqual(p.parse("cbbbb").children, [None, 'c', 'b', 'b', 'b', 'b', None, None])
  1368. def test_escaped_string(self):
  1369. "Tests common.ESCAPED_STRING"
  1370. grammar = r"""
  1371. start: ESCAPED_STRING+
  1372. %import common (WS_INLINE, ESCAPED_STRING)
  1373. %ignore WS_INLINE
  1374. """
  1375. parser = _Lark(grammar)
  1376. parser.parse(r'"\\" "b" "c"')
  1377. parser.parse(r'"That" "And a \"b"')
  1378. def test_meddling_unused(self):
  1379. "Unless 'unused' is removed, LALR analysis will fail on reduce-reduce collision"
  1380. grammar = """
  1381. start: EKS* x
  1382. x: EKS
  1383. unused: x*
  1384. EKS: "x"
  1385. """
  1386. parser = _Lark(grammar)
  1387. @unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
  1388. def test_serialize(self):
  1389. grammar = """
  1390. start: _ANY b "C"
  1391. _ANY: /./
  1392. b: "B"
  1393. """
  1394. parser = _Lark(grammar)
  1395. s = BytesIO()
  1396. parser.save(s)
  1397. s.seek(0)
  1398. parser2 = Lark.load(s)
  1399. self.assertEqual(parser2.parse('ABC'), Tree('start', [Tree('b', [])]) )
  1400. def test_multi_start(self):
  1401. parser = _Lark('''
  1402. a: "x" "a"?
  1403. b: "x" "b"?
  1404. ''', start=['a', 'b'])
  1405. self.assertEqual(parser.parse('xa', 'a'), Tree('a', []))
  1406. self.assertEqual(parser.parse('xb', 'b'), Tree('b', []))
  1407. def test_lexer_detect_newline_tokens(self):
  1408. # Detect newlines in regular tokens
  1409. g = _Lark(r"""start: "go" tail*
  1410. !tail : SA "@" | SB "@" | SC "@" | SD "@"
  1411. SA : "a" /\n/
  1412. SB : /b./s
  1413. SC : "c" /[^a-z]/
  1414. SD : "d" /\s/
  1415. """)
  1416. a,b,c,d = [x.children[1] for x in g.parse('goa\n@b\n@c\n@d\n@').children]
  1417. self.assertEqual(a.line, 2)
  1418. self.assertEqual(b.line, 3)
  1419. self.assertEqual(c.line, 4)
  1420. self.assertEqual(d.line, 5)
  1421. # Detect newlines in ignored tokens
  1422. for re in ['/\\n/', '/[^a-z]/', '/\\s/']:
  1423. g = _Lark('''!start: "a" "a"
  1424. %ignore {}'''.format(re))
  1425. a, b = g.parse('a\na').children
  1426. self.assertEqual(a.line, 1)
  1427. self.assertEqual(b.line, 2)
  1428. @unittest.skipIf(not regex or sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
  1429. def test_unicode_class(self):
  1430. "Tests that character classes from the `regex` module work correctly."
  1431. g = _Lark(r"""?start: NAME
  1432. NAME: ID_START ID_CONTINUE*
  1433. ID_START: /[\p{Lu}\p{Ll}\p{Lt}\p{Lm}\p{Lo}\p{Nl}_]+/
  1434. ID_CONTINUE: ID_START | /[\p{Mn}\p{Mc}\p{Nd}\p{Pc}]+/""", regex=True)
  1435. self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')
  1436. @unittest.skipIf(not regex or sys.version_info[0] == 2, 'Unicode and Python 2 do not place nicely together.')
  1437. def test_unicode_word(self):
  1438. "Tests that a persistent bug in the `re` module works when `regex` is enabled."
  1439. g = _Lark(r"""?start: NAME
  1440. NAME: /[\w]+/
  1441. """, regex=True)
  1442. self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')
  1443. _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()
  1444. _TestParser.__name__ = _NAME
  1445. _TestParser.__qualname__ = "tests.test_parser." + _NAME
  1446. globals()[_NAME] = _TestParser
  1447. # Note: You still have to import them in __main__ for the tests to run
  1448. _TO_TEST = [
  1449. ('standard', 'earley'),
  1450. ('standard', 'cyk'),
  1451. ('dynamic', 'earley'),
  1452. ('dynamic_complete', 'earley'),
  1453. ('standard', 'lalr'),
  1454. ('contextual', 'lalr'),
  1455. ('custom', 'lalr'),
  1456. # (None, 'earley'),
  1457. ]
  1458. for _LEXER, _PARSER in _TO_TEST:
  1459. _make_parser_test(_LEXER, _PARSER)
  1460. for _LEXER in ('dynamic', 'dynamic_complete'):
  1461. _make_full_earley_test(_LEXER)
  1462. if __name__ == '__main__':
  1463. unittest.main()