This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1816 lines
61 KiB

  1. # -*- coding: utf-8 -*-
  2. from __future__ import absolute_import
  3. try:
  4. import regex as re
  5. except ImportError:
  6. import re
  7. import unittest
  8. import logging
  9. import os
  10. import sys
  11. from copy import deepcopy
  12. try:
  13. from cStringIO import StringIO as cStringIO
  14. except ImportError:
  15. # Available only in Python 2.x, 3.x only has io.StringIO from below
  16. cStringIO = None
  17. from io import (
  18. StringIO as uStringIO,
  19. BytesIO,
  20. open,
  21. )
  22. logging.basicConfig(level=logging.INFO)
  23. from lark.lark import Lark
  24. from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
  25. from lark.tree import Tree
  26. from lark.visitors import Transformer, Transformer_InPlace, v_args
  27. from lark.grammar import Rule
  28. from lark.lexer import TerminalDef, Lexer, TraditionalLexer
  29. __path__ = os.path.dirname(__file__)
  30. def _read(n, *args):
  31. with open(os.path.join(__path__, n), *args) as f:
  32. return f.read()
  33. class TestParsers(unittest.TestCase):
  34. def test_big_list(self):
  35. Lark(r"""
  36. start: {}
  37. """.format(
  38. "|".join(['"%s"'%i for i in range(250)])
  39. ))
  40. def test_same_ast(self):
  41. "Tests that Earley and LALR parsers produce equal trees"
  42. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  43. name_list: NAME | name_list "," NAME
  44. NAME: /\w+/ """, parser='lalr')
  45. l = g.parse('(a,b,c,*x)')
  46. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  47. name_list: NAME | name_list "," NAME
  48. NAME: /\w/+ """)
  49. l2 = g.parse('(a,b,c,*x)')
  50. assert l == l2, '%s != %s' % (l.pretty(), l2.pretty())
  51. def test_infinite_recurse(self):
  52. g = """start: a
  53. a: a | "a"
  54. """
  55. self.assertRaises(GrammarError, Lark, g, parser='lalr')
  56. # TODO: should it? shouldn't it?
  57. # l = Lark(g, parser='earley', lexer='dynamic')
  58. # self.assertRaises(ParseError, l.parse, 'a')
  59. def test_propagate_positions(self):
  60. g = Lark("""start: a
  61. a: "a"
  62. """, propagate_positions=True)
  63. r = g.parse('a')
  64. self.assertEqual( r.children[0].meta.line, 1 )
  65. g = Lark("""start: x
  66. x: a
  67. a: "a"
  68. """, propagate_positions=True)
  69. r = g.parse('a')
  70. self.assertEqual( r.children[0].meta.line, 1 )
  71. def test_expand1(self):
  72. g = Lark("""start: a
  73. ?a: b
  74. b: "x"
  75. """)
  76. r = g.parse('x')
  77. self.assertEqual( r.children[0].data, "b" )
  78. g = Lark("""start: a
  79. ?a: b -> c
  80. b: "x"
  81. """)
  82. r = g.parse('x')
  83. self.assertEqual( r.children[0].data, "c" )
  84. g = Lark("""start: a
  85. ?a: B -> c
  86. B: "x"
  87. """)
  88. self.assertEqual( r.children[0].data, "c" )
  89. g = Lark("""start: a
  90. ?a: b b -> c
  91. b: "x"
  92. """)
  93. r = g.parse('xx')
  94. self.assertEqual( r.children[0].data, "c" )
  95. def test_comment_in_rule_definition(self):
  96. g = Lark("""start: a
  97. a: "a"
  98. // A comment
  99. // Another comment
  100. | "b"
  101. // Still more
  102. c: "unrelated"
  103. """)
  104. r = g.parse('b')
  105. self.assertEqual( r.children[0].data, "a" )
  106. def test_visit_tokens(self):
  107. class T(Transformer):
  108. def a(self, children):
  109. return children[0] + "!"
  110. def A(self, tok):
  111. return tok.update(value=tok.upper())
  112. # Test regular
  113. g = """start: a
  114. a : A
  115. A: "x"
  116. """
  117. p = Lark(g, parser='lalr')
  118. r = T(False).transform(p.parse("x"))
  119. self.assertEqual( r.children, ["x!"] )
  120. r = T().transform(p.parse("x"))
  121. self.assertEqual( r.children, ["X!"] )
  122. # Test internal transformer
  123. p = Lark(g, parser='lalr', transformer=T())
  124. r = p.parse("x")
  125. self.assertEqual( r.children, ["X!"] )
  126. def test_vargs_meta(self):
  127. @v_args(meta=True)
  128. class T1(Transformer):
  129. def a(self, children, meta):
  130. assert not children
  131. return meta.line
  132. def start(self, children, meta):
  133. return children
  134. @v_args(meta=True, inline=True)
  135. class T2(Transformer):
  136. def a(self, meta):
  137. return meta.line
  138. def start(self, meta, *res):
  139. return list(res)
  140. for T in (T1, T2):
  141. for internal in [False, True]:
  142. try:
  143. g = Lark(r"""start: a+
  144. a : "x" _NL?
  145. _NL: /\n/+
  146. """, parser='lalr', transformer=T() if internal else None, propagate_positions=True)
  147. except NotImplementedError:
  148. assert internal
  149. continue
  150. res = g.parse("xx\nx\nxxx\n\n\nxx")
  151. assert not internal
  152. res = T().transform(res)
  153. self.assertEqual(res, [1, 1, 2, 3, 3, 3, 6, 6])
  154. def test_vargs_tree(self):
  155. tree = Lark('''
  156. start: a a a
  157. !a: "A"
  158. ''').parse('AAA')
  159. tree_copy = deepcopy(tree)
  160. @v_args(tree=True)
  161. class T(Transformer):
  162. def a(self, tree):
  163. return 1
  164. def start(self, tree):
  165. return tree.children
  166. res = T().transform(tree)
  167. self.assertEqual(res, [1, 1, 1])
  168. self.assertEqual(tree, tree_copy)
  169. def test_embedded_transformer(self):
  170. class T(Transformer):
  171. def a(self, children):
  172. return "<a>"
  173. def b(self, children):
  174. return "<b>"
  175. def c(self, children):
  176. return "<c>"
  177. # Test regular
  178. g = Lark("""start: a
  179. a : "x"
  180. """, parser='lalr')
  181. r = T().transform(g.parse("x"))
  182. self.assertEqual( r.children, ["<a>"] )
  183. g = Lark("""start: a
  184. a : "x"
  185. """, parser='lalr', transformer=T())
  186. r = g.parse("x")
  187. self.assertEqual( r.children, ["<a>"] )
  188. # Test Expand1
  189. g = Lark("""start: a
  190. ?a : b
  191. b : "x"
  192. """, parser='lalr')
  193. r = T().transform(g.parse("x"))
  194. self.assertEqual( r.children, ["<b>"] )
  195. g = Lark("""start: a
  196. ?a : b
  197. b : "x"
  198. """, parser='lalr', transformer=T())
  199. r = g.parse("x")
  200. self.assertEqual( r.children, ["<b>"] )
  201. # Test Expand1 -> Alias
  202. g = Lark("""start: a
  203. ?a : b b -> c
  204. b : "x"
  205. """, parser='lalr')
  206. r = T().transform(g.parse("xx"))
  207. self.assertEqual( r.children, ["<c>"] )
  208. g = Lark("""start: a
  209. ?a : b b -> c
  210. b : "x"
  211. """, parser='lalr', transformer=T())
  212. r = g.parse("xx")
  213. self.assertEqual( r.children, ["<c>"] )
  214. def test_embedded_transformer_inplace(self):
  215. @v_args(tree=True)
  216. class T1(Transformer_InPlace):
  217. def a(self, tree):
  218. assert isinstance(tree, Tree), tree
  219. tree.children.append("tested")
  220. return tree
  221. def b(self, tree):
  222. return Tree(tree.data, tree.children + ['tested2'])
  223. @v_args(tree=True)
  224. class T2(Transformer):
  225. def a(self, tree):
  226. assert isinstance(tree, Tree), tree
  227. tree.children.append("tested")
  228. return tree
  229. def b(self, tree):
  230. return Tree(tree.data, tree.children + ['tested2'])
  231. class T3(Transformer):
  232. @v_args(tree=True)
  233. def a(self, tree):
  234. assert isinstance(tree, Tree)
  235. tree.children.append("tested")
  236. return tree
  237. @v_args(tree=True)
  238. def b(self, tree):
  239. return Tree(tree.data, tree.children + ['tested2'])
  240. for t in [T1(), T2(), T3()]:
  241. for internal in [False, True]:
  242. g = Lark("""start: a b
  243. a : "x"
  244. b : "y"
  245. """, parser='lalr', transformer=t if internal else None)
  246. r = g.parse("xy")
  247. if not internal:
  248. r = t.transform(r)
  249. a, b = r.children
  250. self.assertEqual(a.children, ["tested"])
  251. self.assertEqual(b.children, ["tested2"])
  252. def test_alias(self):
  253. Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """)
  254. def _make_full_earley_test(LEXER):
  255. def _Lark(grammar, **kwargs):
  256. return Lark(grammar, lexer=LEXER, parser='earley', propagate_positions=True, **kwargs)
  257. class _TestFullEarley(unittest.TestCase):
  258. def test_anon(self):
  259. # Fails an Earley implementation without special handling for empty rules,
  260. # or re-processing of already completed rules.
  261. g = Lark(r"""start: B
  262. B: ("ab"|/[^b]/)+
  263. """, lexer=LEXER)
  264. self.assertEqual( g.parse('abc').children[0], 'abc')
  265. def test_earley(self):
  266. g = Lark("""start: A "b" c
  267. A: "a"+
  268. c: "abc"
  269. """, parser="earley", lexer=LEXER)
  270. x = g.parse('aaaababc')
  271. def test_earley2(self):
  272. grammar = """
  273. start: statement+
  274. statement: "r"
  275. | "c" /[a-z]/+
  276. %ignore " "
  277. """
  278. program = """c b r"""
  279. l = Lark(grammar, parser='earley', lexer=LEXER)
  280. l.parse(program)
  281. @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser")
  282. def test_earley3(self):
  283. """Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)
  284. By default, `+` should immitate regexp greedy-matching
  285. """
  286. grammar = """
  287. start: A A
  288. A: "a"+
  289. """
  290. l = Lark(grammar, parser='earley', lexer=LEXER)
  291. res = l.parse("aaa")
  292. self.assertEqual(set(res.children), {'aa', 'a'})
  293. # XXX TODO fix Earley to maintain correct order
  294. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  295. # self.assertEqual(res.children, ['aa', 'a'])
  296. def test_earley4(self):
  297. grammar = """
  298. start: A A?
  299. A: "a"+
  300. """
  301. l = Lark(grammar, parser='earley', lexer=LEXER)
  302. res = l.parse("aaa")
  303. assert set(res.children) == {'aa', 'a'} or res.children == ['aaa']
  304. # XXX TODO fix Earley to maintain correct order
  305. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  306. # self.assertEqual(res.children, ['aaa'])
  307. def test_earley_repeating_empty(self):
  308. # This was a sneaky bug!
  309. grammar = """
  310. !start: "a" empty empty "b"
  311. empty: empty2
  312. empty2:
  313. """
  314. parser = Lark(grammar, parser='earley', lexer=LEXER)
  315. res = parser.parse('ab')
  316. empty_tree = Tree('empty', [Tree('empty2', [])])
  317. self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b'])
  318. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  319. def test_earley_explicit_ambiguity(self):
  320. # This was a sneaky bug!
  321. grammar = """
  322. start: a b | ab
  323. a: "a"
  324. b: "b"
  325. ab: "ab"
  326. """
  327. parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit')
  328. ambig_tree = parser.parse('ab')
  329. self.assertEqual( ambig_tree.data, '_ambig')
  330. self.assertEqual( len(ambig_tree.children), 2)
  331. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  332. def test_ambiguity1(self):
  333. grammar = """
  334. start: cd+ "e"
  335. !cd: "c"
  336. | "d"
  337. | "cd"
  338. """
  339. l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
  340. ambig_tree = l.parse('cde')
  341. assert ambig_tree.data == '_ambig', ambig_tree
  342. assert len(ambig_tree.children) == 2
  343. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  344. def test_ambiguity2(self):
  345. grammar = """
  346. ANY: /[a-zA-Z0-9 ]+/
  347. a.2: "A" b+
  348. b.2: "B"
  349. c: ANY
  350. start: (a|c)*
  351. """
  352. l = Lark(grammar, parser='earley', lexer=LEXER)
  353. res = l.parse('ABX')
  354. expected = Tree('start', [
  355. Tree('a', [
  356. Tree('b', [])
  357. ]),
  358. Tree('c', [
  359. 'X'
  360. ])
  361. ])
  362. self.assertEqual(res, expected)
  363. def test_fruitflies_ambig(self):
  364. grammar = """
  365. start: noun verb noun -> simple
  366. | noun verb "like" noun -> comparative
  367. noun: adj? NOUN
  368. verb: VERB
  369. adj: ADJ
  370. NOUN: "flies" | "bananas" | "fruit"
  371. VERB: "like" | "flies"
  372. ADJ: "fruit"
  373. %import common.WS
  374. %ignore WS
  375. """
  376. parser = Lark(grammar, ambiguity='explicit', lexer=LEXER)
  377. tree = parser.parse('fruit flies like bananas')
  378. expected = Tree('_ambig', [
  379. Tree('comparative', [
  380. Tree('noun', ['fruit']),
  381. Tree('verb', ['flies']),
  382. Tree('noun', ['bananas'])
  383. ]),
  384. Tree('simple', [
  385. Tree('noun', [Tree('adj', ['fruit']), 'flies']),
  386. Tree('verb', ['like']),
  387. Tree('noun', ['bananas'])
  388. ])
  389. ])
  390. # self.assertEqual(tree, expected)
  391. self.assertEqual(tree.data, expected.data)
  392. self.assertEqual(set(tree.children), set(expected.children))
  393. @unittest.skipIf(LEXER!='dynamic_complete', "Only relevant for the dynamic_complete parser")
  394. def test_explicit_ambiguity2(self):
  395. grammar = r"""
  396. start: NAME+
  397. NAME: /\w+/
  398. %ignore " "
  399. """
  400. text = """cat"""
  401. parser = _Lark(grammar, start='start', ambiguity='explicit')
  402. tree = parser.parse(text)
  403. self.assertEqual(tree.data, '_ambig')
  404. combinations = {tuple(str(s) for s in t.children) for t in tree.children}
  405. self.assertEqual(combinations, {
  406. ('cat',),
  407. ('ca', 't'),
  408. ('c', 'at'),
  409. ('c', 'a' ,'t')
  410. })
  411. def test_term_ambig_resolve(self):
  412. grammar = r"""
  413. !start: NAME+
  414. NAME: /\w+/
  415. %ignore " "
  416. """
  417. text = """foo bar"""
  418. parser = Lark(grammar)
  419. tree = parser.parse(text)
  420. self.assertEqual(tree.children, ['foo', 'bar'])
  421. # @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO
  422. # def test_not_all_derivations(self):
  423. # grammar = """
  424. # start: cd+ "e"
  425. # !cd: "c"
  426. # | "d"
  427. # | "cd"
  428. # """
  429. # l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False)
  430. # x = l.parse('cde')
  431. # assert x.data != '_ambig', x
  432. # assert len(x.children) == 1
  433. _NAME = "TestFullEarley" + LEXER.capitalize()
  434. _TestFullEarley.__name__ = _NAME
  435. globals()[_NAME] = _TestFullEarley
  436. class CustomLexer(Lexer):
  437. """
  438. Purpose of this custom lexer is to test the integration,
  439. so it uses the traditionalparser as implementation without custom lexing behaviour.
  440. """
  441. def __init__(self, lexer_conf, re_):
  442. self.lexer = TraditionalLexer(lexer_conf.tokens, re_, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks, g_regex_flags=lexer_conf.g_regex_flags)
  443. def lex(self, *args, **kwargs):
  444. return self.lexer.lex(*args, **kwargs)
  445. def _make_parser_test(LEXER, PARSER):
  446. lexer_class_or_name = CustomLexer if LEXER == 'custom' else LEXER
  447. def _Lark(grammar, **kwargs):
  448. return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  449. def _Lark_open(gfilename, **kwargs):
  450. return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  451. class _TestParser(unittest.TestCase):
  452. def test_basic1(self):
  453. g = _Lark("""start: a+ b a* "b" a*
  454. b: "b"
  455. a: "a"
  456. """)
  457. r = g.parse('aaabaab')
  458. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' )
  459. r = g.parse('aaabaaba')
  460. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaaa' )
  461. self.assertRaises(ParseError, g.parse, 'aaabaa')
  462. def test_basic2(self):
  463. # Multiple parsers and colliding tokens
  464. g = _Lark("""start: B A
  465. B: "12"
  466. A: "1" """)
  467. g2 = _Lark("""start: B A
  468. B: "12"
  469. A: "2" """)
  470. x = g.parse('121')
  471. assert x.data == 'start' and x.children == ['12', '1'], x
  472. x = g2.parse('122')
  473. assert x.data == 'start' and x.children == ['12', '2'], x
  474. @unittest.skipIf(cStringIO is None, "cStringIO not available")
  475. def test_stringio_bytes(self):
  476. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  477. _Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  478. def test_stringio_unicode(self):
  479. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  480. _Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  481. def test_unicode(self):
  482. g = _Lark(u"""start: UNIA UNIB UNIA
  483. UNIA: /\xa3/
  484. UNIB: /\u0101/
  485. """)
  486. g.parse(u'\xa3\u0101\u00a3')
  487. def test_unicode2(self):
  488. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  489. UNIA: /\xa3/
  490. UNIB: "a\u0101b\ "
  491. UNIC: /a?\u0101c\n/
  492. """)
  493. g.parse(u'\xa3a\u0101b\\ \u00a3\u0101c\n')
  494. def test_unicode3(self):
  495. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  496. UNIA: /\xa3/
  497. UNIB: "\u0101"
  498. UNIC: /\u0203/ /\n/
  499. """)
  500. g.parse(u'\xa3\u0101\u00a3\u0203\n')
  501. def test_hex_escape(self):
  502. g = _Lark(r"""start: A B C
  503. A: "\x01"
  504. B: /\x02/
  505. C: "\xABCD"
  506. """)
  507. g.parse('\x01\x02\xABCD')
  508. def test_unicode_literal_range_escape(self):
  509. g = _Lark(r"""start: A+
  510. A: "\u0061".."\u0063"
  511. """)
  512. g.parse('abc')
  513. def test_hex_literal_range_escape(self):
  514. g = _Lark(r"""start: A+
  515. A: "\x01".."\x03"
  516. """)
  517. g.parse('\x01\x02\x03')
  518. @unittest.skipIf(PARSER == 'cyk', "Takes forever")
  519. def test_stack_for_ebnf(self):
  520. """Verify that stack depth isn't an issue for EBNF grammars"""
  521. g = _Lark(r"""start: a+
  522. a : "a" """)
  523. g.parse("a" * (sys.getrecursionlimit()*2 ))
  524. def test_expand1_lists_with_one_item(self):
  525. g = _Lark(r"""start: list
  526. ?list: item+
  527. item : A
  528. A: "a"
  529. """)
  530. r = g.parse("a")
  531. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  532. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  533. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  534. self.assertEqual(len(r.children), 1)
  535. def test_expand1_lists_with_one_item_2(self):
  536. g = _Lark(r"""start: list
  537. ?list: item+ "!"
  538. item : A
  539. A: "a"
  540. """)
  541. r = g.parse("a!")
  542. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  543. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  544. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  545. self.assertEqual(len(r.children), 1)
  546. def test_dont_expand1_lists_with_multiple_items(self):
  547. g = _Lark(r"""start: list
  548. ?list: item+
  549. item : A
  550. A: "a"
  551. """)
  552. r = g.parse("aa")
  553. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  554. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  555. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  556. self.assertEqual(len(r.children), 1)
  557. # Sanity check: verify that 'list' contains the two 'item's we've given it
  558. [list] = r.children
  559. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  560. def test_dont_expand1_lists_with_multiple_items_2(self):
  561. g = _Lark(r"""start: list
  562. ?list: item+ "!"
  563. item : A
  564. A: "a"
  565. """)
  566. r = g.parse("aa!")
  567. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  568. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  569. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  570. self.assertEqual(len(r.children), 1)
  571. # Sanity check: verify that 'list' contains the two 'item's we've given it
  572. [list] = r.children
  573. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  574. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  575. def test_empty_expand1_list(self):
  576. g = _Lark(r"""start: list
  577. ?list: item*
  578. item : A
  579. A: "a"
  580. """)
  581. r = g.parse("")
  582. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  583. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  584. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  585. self.assertEqual(len(r.children), 1)
  586. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  587. [list] = r.children
  588. self.assertSequenceEqual([item.data for item in list.children], ())
  589. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  590. def test_empty_expand1_list_2(self):
  591. g = _Lark(r"""start: list
  592. ?list: item* "!"?
  593. item : A
  594. A: "a"
  595. """)
  596. r = g.parse("")
  597. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  598. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  599. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  600. self.assertEqual(len(r.children), 1)
  601. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  602. [list] = r.children
  603. self.assertSequenceEqual([item.data for item in list.children], ())
  604. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  605. def test_empty_flatten_list(self):
  606. g = _Lark(r"""start: list
  607. list: | item "," list
  608. item : A
  609. A: "a"
  610. """)
  611. r = g.parse("")
  612. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  613. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  614. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  615. [list] = r.children
  616. self.assertSequenceEqual([item.data for item in list.children], ())
  617. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  618. def test_single_item_flatten_list(self):
  619. g = _Lark(r"""start: list
  620. list: | item "," list
  621. item : A
  622. A: "a"
  623. """)
  624. r = g.parse("a,")
  625. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  626. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  627. # Sanity check: verify that 'list' contains exactly the one 'item' we've given it
  628. [list] = r.children
  629. self.assertSequenceEqual([item.data for item in list.children], ('item',))
  630. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  631. def test_multiple_item_flatten_list(self):
  632. g = _Lark(r"""start: list
  633. #list: | item "," list
  634. item : A
  635. A: "a"
  636. """)
  637. r = g.parse("a,a,")
  638. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  639. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  640. # Sanity check: verify that 'list' contains exactly the two 'item's we've given it
  641. [list] = r.children
  642. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  643. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  644. def test_recurse_flatten(self):
  645. """Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""
  646. g = _Lark(r"""start: a | start a
  647. a : A
  648. A : "a" """)
  649. # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
  650. # STree data structures, which uses recursion).
  651. g.parse("a" * (sys.getrecursionlimit() // 4))
  652. def test_token_collision(self):
  653. g = _Lark(r"""start: "Hello" NAME
  654. NAME: /\w/+
  655. %ignore " "
  656. """)
  657. x = g.parse('Hello World')
  658. self.assertSequenceEqual(x.children, ['World'])
  659. x = g.parse('Hello HelloWorld')
  660. self.assertSequenceEqual(x.children, ['HelloWorld'])
  661. def test_token_collision_WS(self):
  662. g = _Lark(r"""start: "Hello" NAME
  663. NAME: /\w/+
  664. %import common.WS
  665. %ignore WS
  666. """)
  667. x = g.parse('Hello World')
  668. self.assertSequenceEqual(x.children, ['World'])
  669. x = g.parse('Hello HelloWorld')
  670. self.assertSequenceEqual(x.children, ['HelloWorld'])
  671. def test_token_collision2(self):
  672. g = _Lark("""
  673. !start: "starts"
  674. %import common.LCASE_LETTER
  675. """)
  676. x = g.parse("starts")
  677. self.assertSequenceEqual(x.children, ['starts'])
  678. def test_templates(self):
  679. g = _Lark(r"""
  680. start: "[" sep{NUMBER, ","} "]"
  681. sep{item, delim}: item (delim item)*
  682. NUMBER: /\d+/
  683. %ignore " "
  684. """)
  685. x = g.parse("[1, 2, 3, 4]")
  686. self.assertSequenceEqual(x.children, [Tree('sep', ['1', '2', '3', '4'])])
  687. x = g.parse("[1]")
  688. self.assertSequenceEqual(x.children, [Tree('sep', ['1'])])
  689. def test_templates_recursion(self):
  690. g = _Lark(r"""
  691. start: "[" _sep{NUMBER, ","} "]"
  692. _sep{item, delim}: item | _sep{item, delim} delim item
  693. NUMBER: /\d+/
  694. %ignore " "
  695. """)
  696. x = g.parse("[1, 2, 3, 4]")
  697. self.assertSequenceEqual(x.children, ['1', '2', '3', '4'])
  698. x = g.parse("[1]")
  699. self.assertSequenceEqual(x.children, ['1'])
  700. def test_templates_import(self):
  701. g = _Lark_open("test_templates_import.lark", rel_to=__file__)
  702. x = g.parse("[1, 2, 3, 4]")
  703. self.assertSequenceEqual(x.children, [Tree('sep', ['1', '2', '3', '4'])])
  704. x = g.parse("[1]")
  705. self.assertSequenceEqual(x.children, [Tree('sep', ['1'])])
  706. def test_templates_alias(self):
  707. g = _Lark(r"""
  708. start: expr{"C"}
  709. expr{t}: "A" t
  710. | "B" t -> b
  711. """)
  712. x = g.parse("AC")
  713. self.assertSequenceEqual(x.children, [Tree('expr', [])])
  714. x = g.parse("BC")
  715. self.assertSequenceEqual(x.children, [Tree('b', [])])
  716. def test_templates_modifiers(self):
  717. g = _Lark(r"""
  718. start: expr{"B"}
  719. !expr{t}: "A" t
  720. """)
  721. x = g.parse("AB")
  722. self.assertSequenceEqual(x.children, [Tree('expr', ["A", "B"])])
  723. g = _Lark(r"""
  724. start: _expr{"B"}
  725. !_expr{t}: "A" t
  726. """)
  727. x = g.parse("AB")
  728. self.assertSequenceEqual(x.children, ["A", "B"])
  729. g = _Lark(r"""
  730. start: expr{b}
  731. b: "B"
  732. ?expr{t}: "A" t
  733. """)
  734. x = g.parse("AB")
  735. self.assertSequenceEqual(x.children, [Tree('b',[])])
  736. def test_templates_templates(self):
  737. g = _Lark('''start: a{b}
  738. a{t}: t{"a"}
  739. b{x}: x''')
  740. x = g.parse('a')
  741. self.assertSequenceEqual(x.children, [Tree('a', [Tree('b',[])])])
  742. def test_g_regex_flags(self):
  743. g = _Lark("""
  744. start: "a" /b+/ C
  745. C: "C" | D
  746. D: "D" E
  747. E: "e"
  748. """, g_regex_flags=re.I)
  749. x1 = g.parse("ABBc")
  750. x2 = g.parse("abdE")
  751. # def test_string_priority(self):
  752. # g = _Lark("""start: (A | /a?bb/)+
  753. # A: "a" """)
  754. # x = g.parse('abb')
  755. # self.assertEqual(len(x.children), 2)
  756. # # This parse raises an exception because the lexer will always try to consume
  757. # # "a" first and will never match the regular expression
  758. # # This behavior is subject to change!!
  759. # # Thie won't happen with ambiguity handling.
  760. # g = _Lark("""start: (A | /a?ab/)+
  761. # A: "a" """)
  762. # self.assertRaises(LexError, g.parse, 'aab')
  763. def test_undefined_rule(self):
  764. self.assertRaises(GrammarError, _Lark, """start: a""")
  765. def test_undefined_token(self):
  766. self.assertRaises(GrammarError, _Lark, """start: A""")
  767. def test_rule_collision(self):
  768. g = _Lark("""start: "a"+ "b"
  769. | "a"+ """)
  770. x = g.parse('aaaa')
  771. x = g.parse('aaaab')
  772. def test_rule_collision2(self):
  773. g = _Lark("""start: "a"* "b"
  774. | "a"+ """)
  775. x = g.parse('aaaa')
  776. x = g.parse('aaaab')
  777. x = g.parse('b')
  778. def test_token_not_anon(self):
  779. """Tests that "a" is matched as an anonymous token, and not A.
  780. """
  781. g = _Lark("""start: "a"
  782. A: "a" """)
  783. x = g.parse('a')
  784. self.assertEqual(len(x.children), 0, '"a" should be considered anonymous')
  785. g = _Lark("""start: "a" A
  786. A: "a" """)
  787. x = g.parse('aa')
  788. self.assertEqual(len(x.children), 1, 'only "a" should be considered anonymous')
  789. self.assertEqual(x.children[0].type, "A")
  790. g = _Lark("""start: /a/
  791. A: /a/ """)
  792. x = g.parse('a')
  793. self.assertEqual(len(x.children), 1)
  794. self.assertEqual(x.children[0].type, "A", "A isn't associated with /a/")
  795. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  796. def test_maybe(self):
  797. g = _Lark("""start: ["a"] """)
  798. x = g.parse('a')
  799. x = g.parse('')
  800. def test_start(self):
  801. g = _Lark("""a: "a" a? """, start='a')
  802. x = g.parse('a')
  803. x = g.parse('aa')
  804. x = g.parse('aaa')
  805. def test_alias(self):
  806. g = _Lark("""start: "a" -> b """)
  807. x = g.parse('a')
  808. self.assertEqual(x.data, "b")
  809. def test_token_ebnf(self):
  810. g = _Lark("""start: A
  811. A: "a"* ("b"? "c".."e")+
  812. """)
  813. x = g.parse('abcde')
  814. x = g.parse('dd')
  815. def test_backslash(self):
  816. g = _Lark(r"""start: "\\" "a"
  817. """)
  818. x = g.parse(r'\a')
  819. g = _Lark(r"""start: /\\/ /a/
  820. """)
  821. x = g.parse(r'\a')
  822. def test_backslash2(self):
  823. g = _Lark(r"""start: "\"" "-"
  824. """)
  825. x = g.parse('"-')
  826. g = _Lark(r"""start: /\// /-/
  827. """)
  828. x = g.parse('/-')
  829. def test_special_chars(self):
  830. g = _Lark(r"""start: "\n"
  831. """)
  832. x = g.parse('\n')
  833. g = _Lark(r"""start: /\n/
  834. """)
  835. x = g.parse('\n')
  836. # def test_token_recurse(self):
  837. # g = _Lark("""start: A
  838. # A: B
  839. # B: A
  840. # """)
  841. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  842. def test_empty(self):
  843. # Fails an Earley implementation without special handling for empty rules,
  844. # or re-processing of already completed rules.
  845. g = _Lark(r"""start: _empty a "B"
  846. a: _empty "A"
  847. _empty:
  848. """)
  849. x = g.parse('AB')
  850. def test_regex_quote(self):
  851. g = r"""
  852. start: SINGLE_QUOTED_STRING | DOUBLE_QUOTED_STRING
  853. SINGLE_QUOTED_STRING : /'[^']*'/
  854. DOUBLE_QUOTED_STRING : /"[^"]*"/
  855. """
  856. g = _Lark(g)
  857. self.assertEqual( g.parse('"hello"').children, ['"hello"'])
  858. self.assertEqual( g.parse("'hello'").children, ["'hello'"])
  859. def test_lexer_token_limit(self):
  860. "Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"
  861. tokens = {'A%d'%i:'"%d"'%i for i in range(300)}
  862. g = _Lark("""start: %s
  863. %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))
  864. def test_float_without_lexer(self):
  865. expected_error = UnexpectedCharacters if LEXER.startswith('dynamic') else UnexpectedToken
  866. if PARSER == 'cyk':
  867. expected_error = ParseError
  868. g = _Lark("""start: ["+"|"-"] float
  869. float: digit* "." digit+ exp?
  870. | digit+ exp
  871. exp: ("e"|"E") ["+"|"-"] digit+
  872. digit: "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"
  873. """)
  874. g.parse("1.2")
  875. g.parse("-.2e9")
  876. g.parse("+2e-9")
  877. self.assertRaises( expected_error, g.parse, "+2e-9e")
  878. def test_keep_all_tokens(self):
  879. l = _Lark("""start: "a"+ """, keep_all_tokens=True)
  880. tree = l.parse('aaa')
  881. self.assertEqual(tree.children, ['a', 'a', 'a'])
  882. def test_token_flags(self):
  883. l = _Lark("""!start: "a"i+
  884. """
  885. )
  886. tree = l.parse('aA')
  887. self.assertEqual(tree.children, ['a', 'A'])
  888. l = _Lark("""!start: /a/i+
  889. """
  890. )
  891. tree = l.parse('aA')
  892. self.assertEqual(tree.children, ['a', 'A'])
  893. # g = """!start: "a"i "a"
  894. # """
  895. # self.assertRaises(GrammarError, _Lark, g)
  896. # g = """!start: /a/i /a/
  897. # """
  898. # self.assertRaises(GrammarError, _Lark, g)
  899. g = """start: NAME "," "a"
  900. NAME: /[a-z_]/i /[a-z0-9_]/i*
  901. """
  902. l = _Lark(g)
  903. tree = l.parse('ab,a')
  904. self.assertEqual(tree.children, ['ab'])
  905. tree = l.parse('AB,a')
  906. self.assertEqual(tree.children, ['AB'])
  907. def test_token_flags3(self):
  908. l = _Lark("""!start: ABC+
  909. ABC: "abc"i
  910. """
  911. )
  912. tree = l.parse('aBcAbC')
  913. self.assertEqual(tree.children, ['aBc', 'AbC'])
  914. def test_token_flags2(self):
  915. g = """!start: ("a"i | /a/ /b/?)+
  916. """
  917. l = _Lark(g)
  918. tree = l.parse('aA')
  919. self.assertEqual(tree.children, ['a', 'A'])
  920. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  921. def test_twice_empty(self):
  922. g = """!start: ("A"?)?
  923. """
  924. l = _Lark(g)
  925. tree = l.parse('A')
  926. self.assertEqual(tree.children, ['A'])
  927. tree = l.parse('')
  928. self.assertEqual(tree.children, [])
  929. def test_undefined_ignore(self):
  930. g = """!start: "A"
  931. %ignore B
  932. """
  933. self.assertRaises( GrammarError, _Lark, g)
  934. def test_alias_in_terminal(self):
  935. g = """start: TERM
  936. TERM: "a" -> alias
  937. """
  938. self.assertRaises( GrammarError, _Lark, g)
  939. def test_line_and_column(self):
  940. g = r"""!start: "A" bc "D"
  941. !bc: "B\nC"
  942. """
  943. l = _Lark(g)
  944. a, bc, d = l.parse("AB\nCD").children
  945. self.assertEqual(a.line, 1)
  946. self.assertEqual(a.column, 1)
  947. bc ,= bc.children
  948. self.assertEqual(bc.line, 1)
  949. self.assertEqual(bc.column, 2)
  950. self.assertEqual(d.line, 2)
  951. self.assertEqual(d.column, 2)
  952. if LEXER != 'dynamic':
  953. self.assertEqual(a.end_line, 1)
  954. self.assertEqual(a.end_column, 2)
  955. self.assertEqual(bc.end_line, 2)
  956. self.assertEqual(bc.end_column, 2)
  957. self.assertEqual(d.end_line, 2)
  958. self.assertEqual(d.end_column, 3)
  959. def test_reduce_cycle(self):
  960. """Tests an edge-condition in the LALR parser, in which a transition state looks exactly like the end state.
  961. It seems that the correct solution is to explicitely distinguish finalization in the reduce() function.
  962. """
  963. l = _Lark("""
  964. term: A
  965. | term term
  966. A: "a"
  967. """, start='term')
  968. tree = l.parse("aa")
  969. self.assertEqual(len(tree.children), 2)
  970. @unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority")
  971. def test_lexer_prioritization(self):
  972. "Tests effect of priority on result"
  973. grammar = """
  974. start: A B | AB
  975. A.2: "a"
  976. B: "b"
  977. AB: "ab"
  978. """
  979. l = _Lark(grammar)
  980. res = l.parse("ab")
  981. self.assertEqual(res.children, ['a', 'b'])
  982. self.assertNotEqual(res.children, ['ab'])
  983. grammar = """
  984. start: A B | AB
  985. A: "a"
  986. B: "b"
  987. AB.3: "ab"
  988. """
  989. l = _Lark(grammar)
  990. res = l.parse("ab")
  991. self.assertNotEqual(res.children, ['a', 'b'])
  992. self.assertEqual(res.children, ['ab'])
  993. grammar = """
  994. start: A B | AB
  995. A: "a"
  996. B.-20: "b"
  997. AB.-10: "ab"
  998. """
  999. l = _Lark(grammar)
  1000. res = l.parse("ab")
  1001. self.assertEqual(res.children, ['a', 'b'])
  1002. grammar = """
  1003. start: A B | AB
  1004. A.-99999999999999999999999: "a"
  1005. B: "b"
  1006. AB: "ab"
  1007. """
  1008. l = _Lark(grammar)
  1009. res = l.parse("ab")
  1010. self.assertEqual(res.children, ['ab'])
  1011. def test_import(self):
  1012. grammar = """
  1013. start: NUMBER WORD
  1014. %import common.NUMBER
  1015. %import common.WORD
  1016. %import common.WS
  1017. %ignore WS
  1018. """
  1019. l = _Lark(grammar)
  1020. x = l.parse('12 elephants')
  1021. self.assertEqual(x.children, ['12', 'elephants'])
  1022. def test_import_rename(self):
  1023. grammar = """
  1024. start: N W
  1025. %import common.NUMBER -> N
  1026. %import common.WORD -> W
  1027. %import common.WS
  1028. %ignore WS
  1029. """
  1030. l = _Lark(grammar)
  1031. x = l.parse('12 elephants')
  1032. self.assertEqual(x.children, ['12', 'elephants'])
  1033. def test_relative_import(self):
  1034. l = _Lark_open('test_relative_import.lark', rel_to=__file__)
  1035. x = l.parse('12 lions')
  1036. self.assertEqual(x.children, ['12', 'lions'])
  1037. def test_relative_import_unicode(self):
  1038. l = _Lark_open('test_relative_import_unicode.lark', rel_to=__file__)
  1039. x = l.parse(u'Ø')
  1040. self.assertEqual(x.children, [u'Ø'])
  1041. def test_relative_import_rename(self):
  1042. l = _Lark_open('test_relative_import_rename.lark', rel_to=__file__)
  1043. x = l.parse('12 lions')
  1044. self.assertEqual(x.children, ['12', 'lions'])
  1045. def test_relative_rule_import(self):
  1046. l = _Lark_open('test_relative_rule_import.lark', rel_to=__file__)
  1047. x = l.parse('xaabby')
  1048. self.assertEqual(x.children, [
  1049. 'x',
  1050. Tree('expr', ['a', Tree('expr', ['a', 'b']), 'b']),
  1051. 'y'])
  1052. def test_relative_rule_import_drop_ignore(self):
  1053. # %ignore rules are dropped on import
  1054. l = _Lark_open('test_relative_rule_import_drop_ignore.lark',
  1055. rel_to=__file__)
  1056. self.assertRaises((ParseError, UnexpectedInput),
  1057. l.parse, 'xa abby')
  1058. def test_relative_rule_import_subrule(self):
  1059. l = _Lark_open('test_relative_rule_import_subrule.lark',
  1060. rel_to=__file__)
  1061. x = l.parse('xaabby')
  1062. self.assertEqual(x.children, [
  1063. 'x',
  1064. Tree('startab', [
  1065. Tree('grammars__ab__expr', [
  1066. 'a', Tree('grammars__ab__expr', ['a', 'b']), 'b',
  1067. ]),
  1068. ]),
  1069. 'y'])
  1070. def test_relative_rule_import_subrule_no_conflict(self):
  1071. l = _Lark_open(
  1072. 'test_relative_rule_import_subrule_no_conflict.lark',
  1073. rel_to=__file__)
  1074. x = l.parse('xaby')
  1075. self.assertEqual(x.children, [Tree('expr', [
  1076. 'x',
  1077. Tree('startab', [
  1078. Tree('grammars__ab__expr', ['a', 'b']),
  1079. ]),
  1080. 'y'])])
  1081. self.assertRaises((ParseError, UnexpectedInput),
  1082. l.parse, 'xaxabyby')
  1083. def test_relative_rule_import_rename(self):
  1084. l = _Lark_open('test_relative_rule_import_rename.lark',
  1085. rel_to=__file__)
  1086. x = l.parse('xaabby')
  1087. self.assertEqual(x.children, [
  1088. 'x',
  1089. Tree('ab', ['a', Tree('ab', ['a', 'b']), 'b']),
  1090. 'y'])
  1091. def test_multi_import(self):
  1092. grammar = """
  1093. start: NUMBER WORD
  1094. %import common (NUMBER, WORD, WS)
  1095. %ignore WS
  1096. """
  1097. l = _Lark(grammar)
  1098. x = l.parse('12 toucans')
  1099. self.assertEqual(x.children, ['12', 'toucans'])
  1100. def test_relative_multi_import(self):
  1101. l = _Lark_open("test_relative_multi_import.lark", rel_to=__file__)
  1102. x = l.parse('12 capybaras')
  1103. self.assertEqual(x.children, ['12', 'capybaras'])
  1104. def test_relative_import_preserves_leading_underscore(self):
  1105. l = _Lark_open("test_relative_import_preserves_leading_underscore.lark", rel_to=__file__)
  1106. x = l.parse('Ax')
  1107. self.assertEqual(next(x.find_data('c')).children, ['A'])
  1108. def test_relative_import_of_nested_grammar(self):
  1109. l = _Lark_open("grammars/test_relative_import_of_nested_grammar.lark", rel_to=__file__)
  1110. x = l.parse('N')
  1111. self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])
  1112. def test_relative_import_rules_dependencies_imported_only_once(self):
  1113. l = _Lark_open("test_relative_import_rules_dependencies_imported_only_once.lark", rel_to=__file__)
  1114. x = l.parse('AAA')
  1115. self.assertEqual(next(x.find_data('a')).children, ['A'])
  1116. self.assertEqual(next(x.find_data('b')).children, ['A'])
  1117. self.assertEqual(next(x.find_data('d')).children, ['A'])
  1118. def test_import_errors(self):
  1119. grammar = """
  1120. start: NUMBER WORD
  1121. %import .grammars.bad_test.NUMBER
  1122. """
  1123. self.assertRaises(IOError, _Lark, grammar)
  1124. grammar = """
  1125. start: NUMBER WORD
  1126. %import bad_test.NUMBER
  1127. """
  1128. self.assertRaises(IOError, _Lark, grammar)
  1129. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  1130. def test_earley_prioritization(self):
  1131. "Tests effect of priority on result"
  1132. grammar = """
  1133. start: a | b
  1134. a.1: "a"
  1135. b.2: "a"
  1136. """
  1137. # l = Lark(grammar, parser='earley', lexer='standard')
  1138. l = _Lark(grammar)
  1139. res = l.parse("a")
  1140. self.assertEqual(res.children[0].data, 'b')
  1141. grammar = """
  1142. start: a | b
  1143. a.2: "a"
  1144. b.1: "a"
  1145. """
  1146. l = _Lark(grammar)
  1147. # l = Lark(grammar, parser='earley', lexer='standard')
  1148. res = l.parse("a")
  1149. self.assertEqual(res.children[0].data, 'a')
  1150. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  1151. def test_earley_prioritization_sum(self):
  1152. "Tests effect of priority on result"
  1153. grammar = """
  1154. start: ab_ b_ a_ | indirection
  1155. indirection: a_ bb_ a_
  1156. a_: "a"
  1157. b_: "b"
  1158. ab_: "ab"
  1159. bb_.1: "bb"
  1160. """
  1161. l = Lark(grammar, priority="invert")
  1162. res = l.parse('abba')
  1163. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  1164. grammar = """
  1165. start: ab_ b_ a_ | indirection
  1166. indirection: a_ bb_ a_
  1167. a_: "a"
  1168. b_: "b"
  1169. ab_.1: "ab"
  1170. bb_: "bb"
  1171. """
  1172. l = Lark(grammar, priority="invert")
  1173. res = l.parse('abba')
  1174. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1175. grammar = """
  1176. start: ab_ b_ a_ | indirection
  1177. indirection: a_ bb_ a_
  1178. a_.2: "a"
  1179. b_.1: "b"
  1180. ab_.3: "ab"
  1181. bb_.3: "bb"
  1182. """
  1183. l = Lark(grammar, priority="invert")
  1184. res = l.parse('abba')
  1185. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  1186. grammar = """
  1187. start: ab_ b_ a_ | indirection
  1188. indirection: a_ bb_ a_
  1189. a_.1: "a"
  1190. b_.1: "b"
  1191. ab_.4: "ab"
  1192. bb_.3: "bb"
  1193. """
  1194. l = Lark(grammar, priority="invert")
  1195. res = l.parse('abba')
  1196. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1197. def test_utf8(self):
  1198. g = u"""start: a
  1199. a: "±a"
  1200. """
  1201. l = _Lark(g)
  1202. self.assertEqual(l.parse(u'±a'), Tree('start', [Tree('a', [])]))
  1203. g = u"""start: A
  1204. A: "±a"
  1205. """
  1206. l = _Lark(g)
  1207. self.assertEqual(l.parse(u'±a'), Tree('start', [u'\xb1a']))
  1208. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  1209. def test_ignore(self):
  1210. grammar = r"""
  1211. COMMENT: /(!|(\/\/))[^\n]*/
  1212. %ignore COMMENT
  1213. %import common.WS -> _WS
  1214. %import common.INT
  1215. start: "INT"i _WS+ INT _WS*
  1216. """
  1217. parser = _Lark(grammar)
  1218. tree = parser.parse("int 1 ! This is a comment\n")
  1219. self.assertEqual(tree.children, ['1'])
  1220. tree = parser.parse("int 1 ! This is a comment") # A trailing ignore token can be tricky!
  1221. self.assertEqual(tree.children, ['1'])
  1222. parser = _Lark(r"""
  1223. start : "a"*
  1224. %ignore "b"
  1225. """)
  1226. tree = parser.parse("bb")
  1227. self.assertEqual(tree.children, [])
  1228. def test_regex_escaping(self):
  1229. g = _Lark("start: /[ab]/")
  1230. g.parse('a')
  1231. g.parse('b')
  1232. self.assertRaises( UnexpectedInput, g.parse, 'c')
  1233. _Lark(r'start: /\w/').parse('a')
  1234. g = _Lark(r'start: /\\w/')
  1235. self.assertRaises( UnexpectedInput, g.parse, 'a')
  1236. g.parse(r'\w')
  1237. _Lark(r'start: /\[/').parse('[')
  1238. _Lark(r'start: /\//').parse('/')
  1239. _Lark(r'start: /\\/').parse('\\')
  1240. _Lark(r'start: /\[ab]/').parse('[ab]')
  1241. _Lark(r'start: /\\[ab]/').parse('\\a')
  1242. _Lark(r'start: /\t/').parse('\t')
  1243. _Lark(r'start: /\\t/').parse('\\t')
  1244. _Lark(r'start: /\\\t/').parse('\\\t')
  1245. _Lark(r'start: "\t"').parse('\t')
  1246. _Lark(r'start: "\\t"').parse('\\t')
  1247. _Lark(r'start: "\\\t"').parse('\\\t')
  1248. def test_ranged_repeat_rules(self):
  1249. g = u"""!start: "A"~3
  1250. """
  1251. l = _Lark(g)
  1252. self.assertEqual(l.parse(u'AAA'), Tree('start', ["A", "A", "A"]))
  1253. self.assertRaises(ParseError, l.parse, u'AA')
  1254. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1255. g = u"""!start: "A"~0..2
  1256. """
  1257. if PARSER != 'cyk': # XXX CYK currently doesn't support empty grammars
  1258. l = _Lark(g)
  1259. self.assertEqual(l.parse(u''), Tree('start', []))
  1260. self.assertEqual(l.parse(u'A'), Tree('start', ['A']))
  1261. self.assertEqual(l.parse(u'AA'), Tree('start', ['A', 'A']))
  1262. self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'AAA')
  1263. g = u"""!start: "A"~3..2
  1264. """
  1265. self.assertRaises(GrammarError, _Lark, g)
  1266. g = u"""!start: "A"~2..3 "B"~2
  1267. """
  1268. l = _Lark(g)
  1269. self.assertEqual(l.parse(u'AABB'), Tree('start', ['A', 'A', 'B', 'B']))
  1270. self.assertEqual(l.parse(u'AAABB'), Tree('start', ['A', 'A', 'A', 'B', 'B']))
  1271. self.assertRaises(ParseError, l.parse, u'AAAB')
  1272. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1273. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1274. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1275. def test_ranged_repeat_terms(self):
  1276. g = u"""!start: AAA
  1277. AAA: "A"~3
  1278. """
  1279. l = _Lark(g)
  1280. self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"]))
  1281. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA')
  1282. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1283. g = u"""!start: AABB CC
  1284. AABB: "A"~0..2 "B"~2
  1285. CC: "C"~1..2
  1286. """
  1287. l = _Lark(g)
  1288. self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC']))
  1289. self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C']))
  1290. self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC']))
  1291. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB')
  1292. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1293. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1294. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1295. @unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX
  1296. def test_priority_vs_embedded(self):
  1297. g = """
  1298. A.2: "a"
  1299. WORD: ("a".."z")+
  1300. start: (A | WORD)+
  1301. """
  1302. l = _Lark(g)
  1303. t = l.parse('abc')
  1304. self.assertEqual(t.children, ['a', 'bc'])
  1305. self.assertEqual(t.children[0].type, 'A')
  1306. def test_line_counting(self):
  1307. p = _Lark("start: /[^x]+/")
  1308. text = 'hello\nworld'
  1309. t = p.parse(text)
  1310. tok = t.children[0]
  1311. self.assertEqual(tok, text)
  1312. self.assertEqual(tok.line, 1)
  1313. self.assertEqual(tok.column, 1)
  1314. if _LEXER != 'dynamic':
  1315. self.assertEqual(tok.end_line, 2)
  1316. self.assertEqual(tok.end_column, 6)
  1317. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1318. def test_empty_end(self):
  1319. p = _Lark("""
  1320. start: b c d
  1321. b: "B"
  1322. c: | "C"
  1323. d: | "D"
  1324. """)
  1325. res = p.parse('B')
  1326. self.assertEqual(len(res.children), 3)
  1327. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1328. def test_maybe_placeholders(self):
  1329. # Anonymous tokens shouldn't count
  1330. p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1331. self.assertEqual(p.parse("").children, [])
  1332. # All invisible constructs shouldn't count
  1333. p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c]
  1334. A: "a"
  1335. _c: "c" """, maybe_placeholders=True)
  1336. self.assertEqual(p.parse("").children, [None])
  1337. self.assertEqual(p.parse("c").children, [None])
  1338. self.assertEqual(p.parse("aefc").children, ['a'])
  1339. # ? shouldn't apply
  1340. p = _Lark("""!start: ["a"] "b"? ["c"] """, maybe_placeholders=True)
  1341. self.assertEqual(p.parse("").children, [None, None])
  1342. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1343. p = _Lark("""!start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1344. self.assertEqual(p.parse("").children, [None, None, None])
  1345. self.assertEqual(p.parse("a").children, ['a', None, None])
  1346. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1347. self.assertEqual(p.parse("c").children, [None, None, 'c'])
  1348. self.assertEqual(p.parse("ab").children, ['a', 'b', None])
  1349. self.assertEqual(p.parse("ac").children, ['a', None, 'c'])
  1350. self.assertEqual(p.parse("bc").children, [None, 'b', 'c'])
  1351. self.assertEqual(p.parse("abc").children, ['a', 'b', 'c'])
  1352. p = _Lark("""!start: (["a"] "b" ["c"])+ """, maybe_placeholders=True)
  1353. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1354. self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None])
  1355. self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c'])
  1356. self.assertEqual(p.parse("babbcabcb").children,
  1357. [None, 'b', None,
  1358. 'a', 'b', None,
  1359. None, 'b', 'c',
  1360. 'a', 'b', 'c',
  1361. None, 'b', None])
  1362. p = _Lark("""!start: ["a"] ["c"] "b"+ ["a"] ["d"] """, maybe_placeholders=True)
  1363. self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None])
  1364. self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd'])
  1365. self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None])
  1366. self.assertEqual(p.parse("cbbbb").children, [None, 'c', 'b', 'b', 'b', 'b', None, None])
  1367. def test_escaped_string(self):
  1368. "Tests common.ESCAPED_STRING"
  1369. grammar = r"""
  1370. start: ESCAPED_STRING+
  1371. %import common (WS_INLINE, ESCAPED_STRING)
  1372. %ignore WS_INLINE
  1373. """
  1374. parser = _Lark(grammar)
  1375. parser.parse(r'"\\" "b" "c"')
  1376. parser.parse(r'"That" "And a \"b"')
  1377. def test_meddling_unused(self):
  1378. "Unless 'unused' is removed, LALR analysis will fail on reduce-reduce collision"
  1379. grammar = """
  1380. start: EKS* x
  1381. x: EKS
  1382. unused: x*
  1383. EKS: "x"
  1384. """
  1385. parser = _Lark(grammar)
  1386. @unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
  1387. def test_serialize(self):
  1388. grammar = """
  1389. start: _ANY b "C"
  1390. _ANY: /./
  1391. b: "B"
  1392. """
  1393. parser = _Lark(grammar)
  1394. s = BytesIO()
  1395. parser.save(s)
  1396. s.seek(0)
  1397. parser2 = Lark.load(s)
  1398. self.assertEqual(parser2.parse('ABC'), Tree('start', [Tree('b', [])]) )
  1399. def test_multi_start(self):
  1400. parser = _Lark('''
  1401. a: "x" "a"?
  1402. b: "x" "b"?
  1403. ''', start=['a', 'b'])
  1404. self.assertEqual(parser.parse('xa', 'a'), Tree('a', []))
  1405. self.assertEqual(parser.parse('xb', 'b'), Tree('b', []))
  1406. def test_lexer_detect_newline_tokens(self):
  1407. # Detect newlines in regular tokens
  1408. g = _Lark(r"""start: "go" tail*
  1409. !tail : SA "@" | SB "@" | SC "@" | SD "@"
  1410. SA : "a" /\n/
  1411. SB : /b./s
  1412. SC : "c" /[^a-z]/
  1413. SD : "d" /\s/
  1414. """)
  1415. a,b,c,d = [x.children[1] for x in g.parse('goa\n@b\n@c\n@d\n@').children]
  1416. self.assertEqual(a.line, 2)
  1417. self.assertEqual(b.line, 3)
  1418. self.assertEqual(c.line, 4)
  1419. self.assertEqual(d.line, 5)
  1420. # Detect newlines in ignored tokens
  1421. for re in ['/\\n/', '/[^a-z]/', '/\\s/']:
  1422. g = _Lark('''!start: "a" "a"
  1423. %ignore {}'''.format(re))
  1424. a, b = g.parse('a\na').children
  1425. self.assertEqual(a.line, 1)
  1426. self.assertEqual(b.line, 2)
  1427. _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()
  1428. _TestParser.__name__ = _NAME
  1429. _TestParser.__qualname__ = "tests.test_parser." + _NAME
  1430. globals()[_NAME] = _TestParser
  1431. # Note: You still have to import them in __main__ for the tests to run
  1432. _TO_TEST = [
  1433. ('standard', 'earley'),
  1434. ('standard', 'cyk'),
  1435. ('dynamic', 'earley'),
  1436. ('dynamic_complete', 'earley'),
  1437. ('standard', 'lalr'),
  1438. ('contextual', 'lalr'),
  1439. ('custom', 'lalr'),
  1440. # (None, 'earley'),
  1441. ]
  1442. for _LEXER, _PARSER in _TO_TEST:
  1443. _make_parser_test(_LEXER, _PARSER)
  1444. for _LEXER in ('dynamic', 'dynamic_complete'):
  1445. _make_full_earley_test(_LEXER)
  1446. if __name__ == '__main__':
  1447. unittest.main()