This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1806 lines
61 KiB

  1. # -*- coding: utf-8 -*-
  2. from __future__ import absolute_import
  3. import re
  4. import unittest
  5. import logging
  6. import os
  7. import sys
  8. from copy import deepcopy
  9. try:
  10. from cStringIO import StringIO as cStringIO
  11. except ImportError:
  12. # Available only in Python 2.x, 3.x only has io.StringIO from below
  13. cStringIO = None
  14. from io import (
  15. StringIO as uStringIO,
  16. open,
  17. )
  18. logging.basicConfig(level=logging.INFO)
  19. from lark.lark import Lark
  20. from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
  21. from lark.tree import Tree
  22. from lark.visitors import Transformer, Transformer_InPlace, v_args
  23. from lark.grammar import Rule
  24. from lark.lexer import TerminalDef, Lexer, TraditionalLexer
  25. __path__ = os.path.dirname(__file__)
  26. def _read(n, *args):
  27. with open(os.path.join(__path__, n), *args) as f:
  28. return f.read()
  29. class TestParsers(unittest.TestCase):
  30. def test_same_ast(self):
  31. "Tests that Earley and LALR parsers produce equal trees"
  32. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  33. name_list: NAME | name_list "," NAME
  34. NAME: /\w+/ """, parser='lalr')
  35. l = g.parse('(a,b,c,*x)')
  36. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  37. name_list: NAME | name_list "," NAME
  38. NAME: /\w/+ """)
  39. l2 = g.parse('(a,b,c,*x)')
  40. assert l == l2, '%s != %s' % (l.pretty(), l2.pretty())
  41. def test_infinite_recurse(self):
  42. g = """start: a
  43. a: a | "a"
  44. """
  45. self.assertRaises(GrammarError, Lark, g, parser='lalr')
  46. # TODO: should it? shouldn't it?
  47. # l = Lark(g, parser='earley', lexer='dynamic')
  48. # self.assertRaises(ParseError, l.parse, 'a')
  49. def test_propagate_positions(self):
  50. g = Lark("""start: a
  51. a: "a"
  52. """, propagate_positions=True)
  53. r = g.parse('a')
  54. self.assertEqual( r.children[0].meta.line, 1 )
  55. g = Lark("""start: x
  56. x: a
  57. a: "a"
  58. """, propagate_positions=True)
  59. r = g.parse('a')
  60. self.assertEqual( r.children[0].meta.line, 1 )
  61. def test_expand1(self):
  62. g = Lark("""start: a
  63. ?a: b
  64. b: "x"
  65. """)
  66. r = g.parse('x')
  67. self.assertEqual( r.children[0].data, "b" )
  68. g = Lark("""start: a
  69. ?a: b -> c
  70. b: "x"
  71. """)
  72. r = g.parse('x')
  73. self.assertEqual( r.children[0].data, "c" )
  74. g = Lark("""start: a
  75. ?a: B -> c
  76. B: "x"
  77. """)
  78. self.assertEqual( r.children[0].data, "c" )
  79. g = Lark("""start: a
  80. ?a: b b -> c
  81. b: "x"
  82. """)
  83. r = g.parse('xx')
  84. self.assertEqual( r.children[0].data, "c" )
  85. def test_comment_in_rule_definition(self):
  86. g = Lark("""start: a
  87. a: "a"
  88. // A comment
  89. // Another comment
  90. | "b"
  91. // Still more
  92. c: "unrelated"
  93. """)
  94. r = g.parse('b')
  95. self.assertEqual( r.children[0].data, "a" )
  96. def test_visit_tokens(self):
  97. class T(Transformer):
  98. def a(self, children):
  99. return children[0] + "!"
  100. def A(self, tok):
  101. return tok.update(value=tok.upper())
  102. # Test regular
  103. g = """start: a
  104. a : A
  105. A: "x"
  106. """
  107. p = Lark(g, parser='lalr')
  108. r = T(False).transform(p.parse("x"))
  109. self.assertEqual( r.children, ["x!"] )
  110. r = T().transform(p.parse("x"))
  111. self.assertEqual( r.children, ["X!"] )
  112. # Test internal transformer
  113. p = Lark(g, parser='lalr', transformer=T())
  114. r = p.parse("x")
  115. self.assertEqual( r.children, ["X!"] )
  116. def test_vargs_meta(self):
  117. @v_args(meta=True)
  118. class T1(Transformer):
  119. def a(self, children, meta):
  120. assert not children
  121. return meta.line
  122. def start(self, children, meta):
  123. return children
  124. @v_args(meta=True, inline=True)
  125. class T2(Transformer):
  126. def a(self, meta):
  127. return meta.line
  128. def start(self, meta, *res):
  129. return list(res)
  130. for T in (T1, T2):
  131. for internal in [False, True]:
  132. try:
  133. g = Lark(r"""start: a+
  134. a : "x" _NL?
  135. _NL: /\n/+
  136. """, parser='lalr', transformer=T() if internal else None, propagate_positions=True)
  137. except NotImplementedError:
  138. assert internal
  139. continue
  140. res = g.parse("xx\nx\nxxx\n\n\nxx")
  141. assert not internal
  142. res = T().transform(res)
  143. self.assertEqual(res, [1, 1, 2, 3, 3, 3, 6, 6])
  144. def test_vargs_tree(self):
  145. tree = Lark('''
  146. start: a a a
  147. !a: "A"
  148. ''').parse('AAA')
  149. tree_copy = deepcopy(tree)
  150. @v_args(tree=True)
  151. class T(Transformer):
  152. def a(self, tree):
  153. return 1
  154. def start(self, tree):
  155. return tree.children
  156. res = T().transform(tree)
  157. self.assertEqual(res, [1, 1, 1])
  158. self.assertEqual(tree, tree_copy)
  159. def test_embedded_transformer(self):
  160. class T(Transformer):
  161. def a(self, children):
  162. return "<a>"
  163. def b(self, children):
  164. return "<b>"
  165. def c(self, children):
  166. return "<c>"
  167. # Test regular
  168. g = Lark("""start: a
  169. a : "x"
  170. """, parser='lalr')
  171. r = T().transform(g.parse("x"))
  172. self.assertEqual( r.children, ["<a>"] )
  173. g = Lark("""start: a
  174. a : "x"
  175. """, parser='lalr', transformer=T())
  176. r = g.parse("x")
  177. self.assertEqual( r.children, ["<a>"] )
  178. # Test Expand1
  179. g = Lark("""start: a
  180. ?a : b
  181. b : "x"
  182. """, parser='lalr')
  183. r = T().transform(g.parse("x"))
  184. self.assertEqual( r.children, ["<b>"] )
  185. g = Lark("""start: a
  186. ?a : b
  187. b : "x"
  188. """, parser='lalr', transformer=T())
  189. r = g.parse("x")
  190. self.assertEqual( r.children, ["<b>"] )
  191. # Test Expand1 -> Alias
  192. g = Lark("""start: a
  193. ?a : b b -> c
  194. b : "x"
  195. """, parser='lalr')
  196. r = T().transform(g.parse("xx"))
  197. self.assertEqual( r.children, ["<c>"] )
  198. g = Lark("""start: a
  199. ?a : b b -> c
  200. b : "x"
  201. """, parser='lalr', transformer=T())
  202. r = g.parse("xx")
  203. self.assertEqual( r.children, ["<c>"] )
  204. def test_embedded_transformer_inplace(self):
  205. @v_args(tree=True)
  206. class T1(Transformer_InPlace):
  207. def a(self, tree):
  208. assert isinstance(tree, Tree), tree
  209. tree.children.append("tested")
  210. return tree
  211. def b(self, tree):
  212. return Tree(tree.data, tree.children + ['tested2'])
  213. @v_args(tree=True)
  214. class T2(Transformer):
  215. def a(self, tree):
  216. assert isinstance(tree, Tree), tree
  217. tree.children.append("tested")
  218. return tree
  219. def b(self, tree):
  220. return Tree(tree.data, tree.children + ['tested2'])
  221. class T3(Transformer):
  222. @v_args(tree=True)
  223. def a(self, tree):
  224. assert isinstance(tree, Tree)
  225. tree.children.append("tested")
  226. return tree
  227. @v_args(tree=True)
  228. def b(self, tree):
  229. return Tree(tree.data, tree.children + ['tested2'])
  230. for t in [T1(), T2(), T3()]:
  231. for internal in [False, True]:
  232. g = Lark("""start: a b
  233. a : "x"
  234. b : "y"
  235. """, parser='lalr', transformer=t if internal else None)
  236. r = g.parse("xy")
  237. if not internal:
  238. r = t.transform(r)
  239. a, b = r.children
  240. self.assertEqual(a.children, ["tested"])
  241. self.assertEqual(b.children, ["tested2"])
  242. def test_alias(self):
  243. Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """)
  244. def _make_full_earley_test(LEXER):
  245. def _Lark(grammar, **kwargs):
  246. return Lark(grammar, lexer=LEXER, parser='earley', propagate_positions=True, **kwargs)
  247. class _TestFullEarley(unittest.TestCase):
  248. def test_anon(self):
  249. # Fails an Earley implementation without special handling for empty rules,
  250. # or re-processing of already completed rules.
  251. g = Lark(r"""start: B
  252. B: ("ab"|/[^b]/)+
  253. """, lexer=LEXER)
  254. self.assertEqual( g.parse('abc').children[0], 'abc')
  255. def test_earley(self):
  256. g = Lark("""start: A "b" c
  257. A: "a"+
  258. c: "abc"
  259. """, parser="earley", lexer=LEXER)
  260. x = g.parse('aaaababc')
  261. def test_earley2(self):
  262. grammar = """
  263. start: statement+
  264. statement: "r"
  265. | "c" /[a-z]/+
  266. %ignore " "
  267. """
  268. program = """c b r"""
  269. l = Lark(grammar, parser='earley', lexer=LEXER)
  270. l.parse(program)
  271. @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser")
  272. def test_earley3(self):
  273. """Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)
  274. By default, `+` should immitate regexp greedy-matching
  275. """
  276. grammar = """
  277. start: A A
  278. A: "a"+
  279. """
  280. l = Lark(grammar, parser='earley', lexer=LEXER)
  281. res = l.parse("aaa")
  282. self.assertEqual(set(res.children), {'aa', 'a'})
  283. # XXX TODO fix Earley to maintain correct order
  284. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  285. # self.assertEqual(res.children, ['aa', 'a'])
  286. def test_earley4(self):
  287. grammar = """
  288. start: A A?
  289. A: "a"+
  290. """
  291. l = Lark(grammar, parser='earley', lexer=LEXER)
  292. res = l.parse("aaa")
  293. assert set(res.children) == {'aa', 'a'} or res.children == ['aaa']
  294. # XXX TODO fix Earley to maintain correct order
  295. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  296. # self.assertEqual(res.children, ['aaa'])
  297. def test_earley_repeating_empty(self):
  298. # This was a sneaky bug!
  299. grammar = """
  300. !start: "a" empty empty "b"
  301. empty: empty2
  302. empty2:
  303. """
  304. parser = Lark(grammar, parser='earley', lexer=LEXER)
  305. res = parser.parse('ab')
  306. empty_tree = Tree('empty', [Tree('empty2', [])])
  307. self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b'])
  308. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  309. def test_earley_explicit_ambiguity(self):
  310. # This was a sneaky bug!
  311. grammar = """
  312. start: a b | ab
  313. a: "a"
  314. b: "b"
  315. ab: "ab"
  316. """
  317. parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit')
  318. ambig_tree = parser.parse('ab')
  319. self.assertEqual( ambig_tree.data, '_ambig')
  320. self.assertEqual( len(ambig_tree.children), 2)
  321. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  322. def test_ambiguity1(self):
  323. grammar = """
  324. start: cd+ "e"
  325. !cd: "c"
  326. | "d"
  327. | "cd"
  328. """
  329. l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
  330. ambig_tree = l.parse('cde')
  331. assert ambig_tree.data == '_ambig', ambig_tree
  332. assert len(ambig_tree.children) == 2
  333. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  334. def test_ambiguity2(self):
  335. grammar = """
  336. ANY: /[a-zA-Z0-9 ]+/
  337. a.2: "A" b+
  338. b.2: "B"
  339. c: ANY
  340. start: (a|c)*
  341. """
  342. l = Lark(grammar, parser='earley', lexer=LEXER)
  343. res = l.parse('ABX')
  344. expected = Tree('start', [
  345. Tree('a', [
  346. Tree('b', [])
  347. ]),
  348. Tree('c', [
  349. 'X'
  350. ])
  351. ])
  352. self.assertEqual(res, expected)
  353. def test_fruitflies_ambig(self):
  354. grammar = """
  355. start: noun verb noun -> simple
  356. | noun verb "like" noun -> comparative
  357. noun: adj? NOUN
  358. verb: VERB
  359. adj: ADJ
  360. NOUN: "flies" | "bananas" | "fruit"
  361. VERB: "like" | "flies"
  362. ADJ: "fruit"
  363. %import common.WS
  364. %ignore WS
  365. """
  366. parser = Lark(grammar, ambiguity='explicit', lexer=LEXER)
  367. tree = parser.parse('fruit flies like bananas')
  368. expected = Tree('_ambig', [
  369. Tree('comparative', [
  370. Tree('noun', ['fruit']),
  371. Tree('verb', ['flies']),
  372. Tree('noun', ['bananas'])
  373. ]),
  374. Tree('simple', [
  375. Tree('noun', [Tree('adj', ['fruit']), 'flies']),
  376. Tree('verb', ['like']),
  377. Tree('noun', ['bananas'])
  378. ])
  379. ])
  380. # self.assertEqual(tree, expected)
  381. self.assertEqual(tree.data, expected.data)
  382. self.assertEqual(set(tree.children), set(expected.children))
  383. @unittest.skipIf(LEXER!='dynamic_complete', "Only relevant for the dynamic_complete parser")
  384. def test_explicit_ambiguity2(self):
  385. grammar = r"""
  386. start: NAME+
  387. NAME: /\w+/
  388. %ignore " "
  389. """
  390. text = """cat"""
  391. parser = _Lark(grammar, start='start', ambiguity='explicit')
  392. tree = parser.parse(text)
  393. self.assertEqual(tree.data, '_ambig')
  394. combinations = {tuple(str(s) for s in t.children) for t in tree.children}
  395. self.assertEqual(combinations, {
  396. ('cat',),
  397. ('ca', 't'),
  398. ('c', 'at'),
  399. ('c', 'a' ,'t')
  400. })
  401. def test_term_ambig_resolve(self):
  402. grammar = r"""
  403. !start: NAME+
  404. NAME: /\w+/
  405. %ignore " "
  406. """
  407. text = """foo bar"""
  408. parser = Lark(grammar)
  409. tree = parser.parse(text)
  410. self.assertEqual(tree.children, ['foo', 'bar'])
  411. # @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO
  412. # def test_not_all_derivations(self):
  413. # grammar = """
  414. # start: cd+ "e"
  415. # !cd: "c"
  416. # | "d"
  417. # | "cd"
  418. # """
  419. # l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False)
  420. # x = l.parse('cde')
  421. # assert x.data != '_ambig', x
  422. # assert len(x.children) == 1
  423. _NAME = "TestFullEarley" + LEXER.capitalize()
  424. _TestFullEarley.__name__ = _NAME
  425. globals()[_NAME] = _TestFullEarley
  426. class CustomLexer(Lexer):
  427. """
  428. Purpose of this custom lexer is to test the integration,
  429. so it uses the traditionalparser as implementation without custom lexing behaviour.
  430. """
  431. def __init__(self, lexer_conf):
  432. self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks, g_regex_flags=lexer_conf.g_regex_flags)
  433. def lex(self, *args, **kwargs):
  434. return self.lexer.lex(*args, **kwargs)
  435. def _make_parser_test(LEXER, PARSER):
  436. lexer_class_or_name = CustomLexer if LEXER == 'custom' else LEXER
  437. def _Lark(grammar, **kwargs):
  438. return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  439. def _Lark_open(gfilename, **kwargs):
  440. return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  441. class _TestParser(unittest.TestCase):
  442. def test_basic1(self):
  443. g = _Lark("""start: a+ b a* "b" a*
  444. b: "b"
  445. a: "a"
  446. """)
  447. r = g.parse('aaabaab')
  448. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' )
  449. r = g.parse('aaabaaba')
  450. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaaa' )
  451. self.assertRaises(ParseError, g.parse, 'aaabaa')
  452. def test_basic2(self):
  453. # Multiple parsers and colliding tokens
  454. g = _Lark("""start: B A
  455. B: "12"
  456. A: "1" """)
  457. g2 = _Lark("""start: B A
  458. B: "12"
  459. A: "2" """)
  460. x = g.parse('121')
  461. assert x.data == 'start' and x.children == ['12', '1'], x
  462. x = g2.parse('122')
  463. assert x.data == 'start' and x.children == ['12', '2'], x
  464. @unittest.skipIf(cStringIO is None, "cStringIO not available")
  465. def test_stringio_bytes(self):
  466. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  467. _Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  468. def test_stringio_unicode(self):
  469. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  470. _Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  471. def test_unicode(self):
  472. g = _Lark(u"""start: UNIA UNIB UNIA
  473. UNIA: /\xa3/
  474. UNIB: /\u0101/
  475. """)
  476. g.parse(u'\xa3\u0101\u00a3')
  477. def test_unicode2(self):
  478. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  479. UNIA: /\xa3/
  480. UNIB: "a\u0101b\ "
  481. UNIC: /a?\u0101c\n/
  482. """)
  483. g.parse(u'\xa3a\u0101b\\ \u00a3\u0101c\n')
  484. def test_unicode3(self):
  485. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  486. UNIA: /\xa3/
  487. UNIB: "\u0101"
  488. UNIC: /\u0203/ /\n/
  489. """)
  490. g.parse(u'\xa3\u0101\u00a3\u0203\n')
  491. def test_hex_escape(self):
  492. g = _Lark(r"""start: A B C
  493. A: "\x01"
  494. B: /\x02/
  495. C: "\xABCD"
  496. """)
  497. g.parse('\x01\x02\xABCD')
  498. def test_unicode_literal_range_escape(self):
  499. g = _Lark(r"""start: A+
  500. A: "\u0061".."\u0063"
  501. """)
  502. g.parse('abc')
  503. def test_hex_literal_range_escape(self):
  504. g = _Lark(r"""start: A+
  505. A: "\x01".."\x03"
  506. """)
  507. g.parse('\x01\x02\x03')
  508. @unittest.skipIf(PARSER == 'cyk', "Takes forever")
  509. def test_stack_for_ebnf(self):
  510. """Verify that stack depth isn't an issue for EBNF grammars"""
  511. g = _Lark(r"""start: a+
  512. a : "a" """)
  513. g.parse("a" * (sys.getrecursionlimit()*2 ))
  514. def test_expand1_lists_with_one_item(self):
  515. g = _Lark(r"""start: list
  516. ?list: item+
  517. item : A
  518. A: "a"
  519. """)
  520. r = g.parse("a")
  521. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  522. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  523. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  524. self.assertEqual(len(r.children), 1)
  525. def test_expand1_lists_with_one_item_2(self):
  526. g = _Lark(r"""start: list
  527. ?list: item+ "!"
  528. item : A
  529. A: "a"
  530. """)
  531. r = g.parse("a!")
  532. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  533. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  534. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  535. self.assertEqual(len(r.children), 1)
  536. def test_dont_expand1_lists_with_multiple_items(self):
  537. g = _Lark(r"""start: list
  538. ?list: item+
  539. item : A
  540. A: "a"
  541. """)
  542. r = g.parse("aa")
  543. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  544. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  545. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  546. self.assertEqual(len(r.children), 1)
  547. # Sanity check: verify that 'list' contains the two 'item's we've given it
  548. [list] = r.children
  549. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  550. def test_dont_expand1_lists_with_multiple_items_2(self):
  551. g = _Lark(r"""start: list
  552. ?list: item+ "!"
  553. item : A
  554. A: "a"
  555. """)
  556. r = g.parse("aa!")
  557. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  558. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  559. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  560. self.assertEqual(len(r.children), 1)
  561. # Sanity check: verify that 'list' contains the two 'item's we've given it
  562. [list] = r.children
  563. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  564. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  565. def test_empty_expand1_list(self):
  566. g = _Lark(r"""start: list
  567. ?list: item*
  568. item : A
  569. A: "a"
  570. """)
  571. r = g.parse("")
  572. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  573. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  574. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  575. self.assertEqual(len(r.children), 1)
  576. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  577. [list] = r.children
  578. self.assertSequenceEqual([item.data for item in list.children], ())
  579. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  580. def test_empty_expand1_list_2(self):
  581. g = _Lark(r"""start: list
  582. ?list: item* "!"?
  583. item : A
  584. A: "a"
  585. """)
  586. r = g.parse("")
  587. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  588. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  589. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  590. self.assertEqual(len(r.children), 1)
  591. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  592. [list] = r.children
  593. self.assertSequenceEqual([item.data for item in list.children], ())
  594. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  595. def test_empty_flatten_list(self):
  596. g = _Lark(r"""start: list
  597. list: | item "," list
  598. item : A
  599. A: "a"
  600. """)
  601. r = g.parse("")
  602. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  603. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  604. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  605. [list] = r.children
  606. self.assertSequenceEqual([item.data for item in list.children], ())
  607. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  608. def test_single_item_flatten_list(self):
  609. g = _Lark(r"""start: list
  610. list: | item "," list
  611. item : A
  612. A: "a"
  613. """)
  614. r = g.parse("a,")
  615. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  616. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  617. # Sanity check: verify that 'list' contains exactly the one 'item' we've given it
  618. [list] = r.children
  619. self.assertSequenceEqual([item.data for item in list.children], ('item',))
  620. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  621. def test_multiple_item_flatten_list(self):
  622. g = _Lark(r"""start: list
  623. #list: | item "," list
  624. item : A
  625. A: "a"
  626. """)
  627. r = g.parse("a,a,")
  628. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  629. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  630. # Sanity check: verify that 'list' contains exactly the two 'item's we've given it
  631. [list] = r.children
  632. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  633. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  634. def test_recurse_flatten(self):
  635. """Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""
  636. g = _Lark(r"""start: a | start a
  637. a : A
  638. A : "a" """)
  639. # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
  640. # STree data structures, which uses recursion).
  641. g.parse("a" * (sys.getrecursionlimit() // 4))
  642. def test_token_collision(self):
  643. g = _Lark(r"""start: "Hello" NAME
  644. NAME: /\w/+
  645. %ignore " "
  646. """)
  647. x = g.parse('Hello World')
  648. self.assertSequenceEqual(x.children, ['World'])
  649. x = g.parse('Hello HelloWorld')
  650. self.assertSequenceEqual(x.children, ['HelloWorld'])
  651. def test_token_collision_WS(self):
  652. g = _Lark(r"""start: "Hello" NAME
  653. NAME: /\w/+
  654. %import common.WS
  655. %ignore WS
  656. """)
  657. x = g.parse('Hello World')
  658. self.assertSequenceEqual(x.children, ['World'])
  659. x = g.parse('Hello HelloWorld')
  660. self.assertSequenceEqual(x.children, ['HelloWorld'])
  661. def test_token_collision2(self):
  662. g = _Lark("""
  663. !start: "starts"
  664. %import common.LCASE_LETTER
  665. """)
  666. x = g.parse("starts")
  667. self.assertSequenceEqual(x.children, ['starts'])
  668. def test_templates(self):
  669. g = _Lark(r"""
  670. start: "[" sep{NUMBER, ","} "]"
  671. sep{item, delim}: item (delim item)*
  672. NUMBER: /\d+/
  673. %ignore " "
  674. """)
  675. x = g.parse("[1, 2, 3, 4]")
  676. self.assertSequenceEqual(x.children, [Tree('sep', ['1', '2', '3', '4'])])
  677. x = g.parse("[1]")
  678. self.assertSequenceEqual(x.children, [Tree('sep', ['1'])])
  679. def test_templates_recursion(self):
  680. g = _Lark(r"""
  681. start: "[" _sep{NUMBER, ","} "]"
  682. _sep{item, delim}: item | _sep{item, delim} delim item
  683. NUMBER: /\d+/
  684. %ignore " "
  685. """)
  686. x = g.parse("[1, 2, 3, 4]")
  687. self.assertSequenceEqual(x.children, ['1', '2', '3', '4'])
  688. x = g.parse("[1]")
  689. self.assertSequenceEqual(x.children, ['1'])
  690. def test_templates_import(self):
  691. g = _Lark_open("test_templates_import.lark", rel_to=__file__)
  692. x = g.parse("[1, 2, 3, 4]")
  693. self.assertSequenceEqual(x.children, [Tree('sep', ['1', '2', '3', '4'])])
  694. x = g.parse("[1]")
  695. self.assertSequenceEqual(x.children, [Tree('sep', ['1'])])
  696. def test_templates_alias(self):
  697. g = _Lark(r"""
  698. start: expr{"C"}
  699. expr{t}: "A" t
  700. | "B" t -> b
  701. """)
  702. x = g.parse("AC")
  703. self.assertSequenceEqual(x.children, [Tree('expr', [])])
  704. x = g.parse("BC")
  705. self.assertSequenceEqual(x.children, [Tree('b', [])])
  706. def test_templates_modifiers(self):
  707. g = _Lark(r"""
  708. start: expr{"B"}
  709. !expr{t}: "A" t
  710. """)
  711. x = g.parse("AB")
  712. self.assertSequenceEqual(x.children, [Tree('expr', ["A", "B"])])
  713. g = _Lark(r"""
  714. start: _expr{"B"}
  715. !_expr{t}: "A" t
  716. """)
  717. x = g.parse("AB")
  718. self.assertSequenceEqual(x.children, ["A", "B"])
  719. g = _Lark(r"""
  720. start: expr{b}
  721. b: "B"
  722. ?expr{t}: "A" t
  723. """)
  724. x = g.parse("AB")
  725. self.assertSequenceEqual(x.children, [Tree('b',[])])
  726. def test_templates_templates(self):
  727. g = _Lark('''start: a{b}
  728. a{t}: t{"a"}
  729. b{x}: x''')
  730. x = g.parse('a')
  731. self.assertSequenceEqual(x.children, [Tree('a', [Tree('b',[])])])
  732. def test_g_regex_flags(self):
  733. g = _Lark("""
  734. start: "a" /b+/ C
  735. C: "C" | D
  736. D: "D" E
  737. E: "e"
  738. """, g_regex_flags=re.I)
  739. x1 = g.parse("ABBc")
  740. x2 = g.parse("abdE")
  741. # def test_string_priority(self):
  742. # g = _Lark("""start: (A | /a?bb/)+
  743. # A: "a" """)
  744. # x = g.parse('abb')
  745. # self.assertEqual(len(x.children), 2)
  746. # # This parse raises an exception because the lexer will always try to consume
  747. # # "a" first and will never match the regular expression
  748. # # This behavior is subject to change!!
  749. # # Thie won't happen with ambiguity handling.
  750. # g = _Lark("""start: (A | /a?ab/)+
  751. # A: "a" """)
  752. # self.assertRaises(LexError, g.parse, 'aab')
  753. def test_undefined_rule(self):
  754. self.assertRaises(GrammarError, _Lark, """start: a""")
  755. def test_undefined_token(self):
  756. self.assertRaises(GrammarError, _Lark, """start: A""")
  757. def test_rule_collision(self):
  758. g = _Lark("""start: "a"+ "b"
  759. | "a"+ """)
  760. x = g.parse('aaaa')
  761. x = g.parse('aaaab')
  762. def test_rule_collision2(self):
  763. g = _Lark("""start: "a"* "b"
  764. | "a"+ """)
  765. x = g.parse('aaaa')
  766. x = g.parse('aaaab')
  767. x = g.parse('b')
  768. def test_token_not_anon(self):
  769. """Tests that "a" is matched as an anonymous token, and not A.
  770. """
  771. g = _Lark("""start: "a"
  772. A: "a" """)
  773. x = g.parse('a')
  774. self.assertEqual(len(x.children), 0, '"a" should be considered anonymous')
  775. g = _Lark("""start: "a" A
  776. A: "a" """)
  777. x = g.parse('aa')
  778. self.assertEqual(len(x.children), 1, 'only "a" should be considered anonymous')
  779. self.assertEqual(x.children[0].type, "A")
  780. g = _Lark("""start: /a/
  781. A: /a/ """)
  782. x = g.parse('a')
  783. self.assertEqual(len(x.children), 1)
  784. self.assertEqual(x.children[0].type, "A", "A isn't associated with /a/")
  785. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  786. def test_maybe(self):
  787. g = _Lark("""start: ["a"] """)
  788. x = g.parse('a')
  789. x = g.parse('')
  790. def test_start(self):
  791. g = _Lark("""a: "a" a? """, start='a')
  792. x = g.parse('a')
  793. x = g.parse('aa')
  794. x = g.parse('aaa')
  795. def test_alias(self):
  796. g = _Lark("""start: "a" -> b """)
  797. x = g.parse('a')
  798. self.assertEqual(x.data, "b")
  799. def test_token_ebnf(self):
  800. g = _Lark("""start: A
  801. A: "a"* ("b"? "c".."e")+
  802. """)
  803. x = g.parse('abcde')
  804. x = g.parse('dd')
  805. def test_backslash(self):
  806. g = _Lark(r"""start: "\\" "a"
  807. """)
  808. x = g.parse(r'\a')
  809. g = _Lark(r"""start: /\\/ /a/
  810. """)
  811. x = g.parse(r'\a')
  812. def test_backslash2(self):
  813. g = _Lark(r"""start: "\"" "-"
  814. """)
  815. x = g.parse('"-')
  816. g = _Lark(r"""start: /\// /-/
  817. """)
  818. x = g.parse('/-')
  819. def test_special_chars(self):
  820. g = _Lark(r"""start: "\n"
  821. """)
  822. x = g.parse('\n')
  823. g = _Lark(r"""start: /\n/
  824. """)
  825. x = g.parse('\n')
  826. # def test_token_recurse(self):
  827. # g = _Lark("""start: A
  828. # A: B
  829. # B: A
  830. # """)
  831. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  832. def test_empty(self):
  833. # Fails an Earley implementation without special handling for empty rules,
  834. # or re-processing of already completed rules.
  835. g = _Lark(r"""start: _empty a "B"
  836. a: _empty "A"
  837. _empty:
  838. """)
  839. x = g.parse('AB')
  840. def test_regex_quote(self):
  841. g = r"""
  842. start: SINGLE_QUOTED_STRING | DOUBLE_QUOTED_STRING
  843. SINGLE_QUOTED_STRING : /'[^']*'/
  844. DOUBLE_QUOTED_STRING : /"[^"]*"/
  845. """
  846. g = _Lark(g)
  847. self.assertEqual( g.parse('"hello"').children, ['"hello"'])
  848. self.assertEqual( g.parse("'hello'").children, ["'hello'"])
  849. def test_lexer_token_limit(self):
  850. "Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"
  851. tokens = {'A%d'%i:'"%d"'%i for i in range(300)}
  852. g = _Lark("""start: %s
  853. %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))
  854. def test_float_without_lexer(self):
  855. expected_error = UnexpectedCharacters if LEXER.startswith('dynamic') else UnexpectedToken
  856. if PARSER == 'cyk':
  857. expected_error = ParseError
  858. g = _Lark("""start: ["+"|"-"] float
  859. float: digit* "." digit+ exp?
  860. | digit+ exp
  861. exp: ("e"|"E") ["+"|"-"] digit+
  862. digit: "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"
  863. """)
  864. g.parse("1.2")
  865. g.parse("-.2e9")
  866. g.parse("+2e-9")
  867. self.assertRaises( expected_error, g.parse, "+2e-9e")
  868. def test_keep_all_tokens(self):
  869. l = _Lark("""start: "a"+ """, keep_all_tokens=True)
  870. tree = l.parse('aaa')
  871. self.assertEqual(tree.children, ['a', 'a', 'a'])
  872. def test_token_flags(self):
  873. l = _Lark("""!start: "a"i+
  874. """
  875. )
  876. tree = l.parse('aA')
  877. self.assertEqual(tree.children, ['a', 'A'])
  878. l = _Lark("""!start: /a/i+
  879. """
  880. )
  881. tree = l.parse('aA')
  882. self.assertEqual(tree.children, ['a', 'A'])
  883. # g = """!start: "a"i "a"
  884. # """
  885. # self.assertRaises(GrammarError, _Lark, g)
  886. # g = """!start: /a/i /a/
  887. # """
  888. # self.assertRaises(GrammarError, _Lark, g)
  889. g = """start: NAME "," "a"
  890. NAME: /[a-z_]/i /[a-z0-9_]/i*
  891. """
  892. l = _Lark(g)
  893. tree = l.parse('ab,a')
  894. self.assertEqual(tree.children, ['ab'])
  895. tree = l.parse('AB,a')
  896. self.assertEqual(tree.children, ['AB'])
  897. def test_token_flags3(self):
  898. l = _Lark("""!start: ABC+
  899. ABC: "abc"i
  900. """
  901. )
  902. tree = l.parse('aBcAbC')
  903. self.assertEqual(tree.children, ['aBc', 'AbC'])
  904. def test_token_flags2(self):
  905. g = """!start: ("a"i | /a/ /b/?)+
  906. """
  907. l = _Lark(g)
  908. tree = l.parse('aA')
  909. self.assertEqual(tree.children, ['a', 'A'])
  910. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  911. def test_twice_empty(self):
  912. g = """!start: ("A"?)?
  913. """
  914. l = _Lark(g)
  915. tree = l.parse('A')
  916. self.assertEqual(tree.children, ['A'])
  917. tree = l.parse('')
  918. self.assertEqual(tree.children, [])
  919. def test_undefined_ignore(self):
  920. g = """!start: "A"
  921. %ignore B
  922. """
  923. self.assertRaises( GrammarError, _Lark, g)
  924. def test_alias_in_terminal(self):
  925. g = """start: TERM
  926. TERM: "a" -> alias
  927. """
  928. self.assertRaises( GrammarError, _Lark, g)
  929. def test_line_and_column(self):
  930. g = r"""!start: "A" bc "D"
  931. !bc: "B\nC"
  932. """
  933. l = _Lark(g)
  934. a, bc, d = l.parse("AB\nCD").children
  935. self.assertEqual(a.line, 1)
  936. self.assertEqual(a.column, 1)
  937. bc ,= bc.children
  938. self.assertEqual(bc.line, 1)
  939. self.assertEqual(bc.column, 2)
  940. self.assertEqual(d.line, 2)
  941. self.assertEqual(d.column, 2)
  942. if LEXER != 'dynamic':
  943. self.assertEqual(a.end_line, 1)
  944. self.assertEqual(a.end_column, 2)
  945. self.assertEqual(bc.end_line, 2)
  946. self.assertEqual(bc.end_column, 2)
  947. self.assertEqual(d.end_line, 2)
  948. self.assertEqual(d.end_column, 3)
  949. def test_reduce_cycle(self):
  950. """Tests an edge-condition in the LALR parser, in which a transition state looks exactly like the end state.
  951. It seems that the correct solution is to explicitely distinguish finalization in the reduce() function.
  952. """
  953. l = _Lark("""
  954. term: A
  955. | term term
  956. A: "a"
  957. """, start='term')
  958. tree = l.parse("aa")
  959. self.assertEqual(len(tree.children), 2)
  960. @unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority")
  961. def test_lexer_prioritization(self):
  962. "Tests effect of priority on result"
  963. grammar = """
  964. start: A B | AB
  965. A.2: "a"
  966. B: "b"
  967. AB: "ab"
  968. """
  969. l = _Lark(grammar)
  970. res = l.parse("ab")
  971. self.assertEqual(res.children, ['a', 'b'])
  972. self.assertNotEqual(res.children, ['ab'])
  973. grammar = """
  974. start: A B | AB
  975. A: "a"
  976. B: "b"
  977. AB.3: "ab"
  978. """
  979. l = _Lark(grammar)
  980. res = l.parse("ab")
  981. self.assertNotEqual(res.children, ['a', 'b'])
  982. self.assertEqual(res.children, ['ab'])
  983. grammar = """
  984. start: A B | AB
  985. A: "a"
  986. B.-20: "b"
  987. AB.-10: "ab"
  988. """
  989. l = _Lark(grammar)
  990. res = l.parse("ab")
  991. self.assertEqual(res.children, ['a', 'b'])
  992. grammar = """
  993. start: A B | AB
  994. A.-99999999999999999999999: "a"
  995. B: "b"
  996. AB: "ab"
  997. """
  998. l = _Lark(grammar)
  999. res = l.parse("ab")
  1000. self.assertEqual(res.children, ['ab'])
  1001. def test_import(self):
  1002. grammar = """
  1003. start: NUMBER WORD
  1004. %import common.NUMBER
  1005. %import common.WORD
  1006. %import common.WS
  1007. %ignore WS
  1008. """
  1009. l = _Lark(grammar)
  1010. x = l.parse('12 elephants')
  1011. self.assertEqual(x.children, ['12', 'elephants'])
  1012. def test_import_rename(self):
  1013. grammar = """
  1014. start: N W
  1015. %import common.NUMBER -> N
  1016. %import common.WORD -> W
  1017. %import common.WS
  1018. %ignore WS
  1019. """
  1020. l = _Lark(grammar)
  1021. x = l.parse('12 elephants')
  1022. self.assertEqual(x.children, ['12', 'elephants'])
  1023. def test_relative_import(self):
  1024. l = _Lark_open('test_relative_import.lark', rel_to=__file__)
  1025. x = l.parse('12 lions')
  1026. self.assertEqual(x.children, ['12', 'lions'])
  1027. def test_relative_import_unicode(self):
  1028. l = _Lark_open('test_relative_import_unicode.lark', rel_to=__file__)
  1029. x = l.parse(u'Ø')
  1030. self.assertEqual(x.children, [u'Ø'])
  1031. def test_relative_import_rename(self):
  1032. l = _Lark_open('test_relative_import_rename.lark', rel_to=__file__)
  1033. x = l.parse('12 lions')
  1034. self.assertEqual(x.children, ['12', 'lions'])
  1035. def test_relative_rule_import(self):
  1036. l = _Lark_open('test_relative_rule_import.lark', rel_to=__file__)
  1037. x = l.parse('xaabby')
  1038. self.assertEqual(x.children, [
  1039. 'x',
  1040. Tree('expr', ['a', Tree('expr', ['a', 'b']), 'b']),
  1041. 'y'])
  1042. def test_relative_rule_import_drop_ignore(self):
  1043. # %ignore rules are dropped on import
  1044. l = _Lark_open('test_relative_rule_import_drop_ignore.lark',
  1045. rel_to=__file__)
  1046. self.assertRaises((ParseError, UnexpectedInput),
  1047. l.parse, 'xa abby')
  1048. def test_relative_rule_import_subrule(self):
  1049. l = _Lark_open('test_relative_rule_import_subrule.lark',
  1050. rel_to=__file__)
  1051. x = l.parse('xaabby')
  1052. self.assertEqual(x.children, [
  1053. 'x',
  1054. Tree('startab', [
  1055. Tree('grammars__ab__expr', [
  1056. 'a', Tree('grammars__ab__expr', ['a', 'b']), 'b',
  1057. ]),
  1058. ]),
  1059. 'y'])
  1060. def test_relative_rule_import_subrule_no_conflict(self):
  1061. l = _Lark_open(
  1062. 'test_relative_rule_import_subrule_no_conflict.lark',
  1063. rel_to=__file__)
  1064. x = l.parse('xaby')
  1065. self.assertEqual(x.children, [Tree('expr', [
  1066. 'x',
  1067. Tree('startab', [
  1068. Tree('grammars__ab__expr', ['a', 'b']),
  1069. ]),
  1070. 'y'])])
  1071. self.assertRaises((ParseError, UnexpectedInput),
  1072. l.parse, 'xaxabyby')
  1073. def test_relative_rule_import_rename(self):
  1074. l = _Lark_open('test_relative_rule_import_rename.lark',
  1075. rel_to=__file__)
  1076. x = l.parse('xaabby')
  1077. self.assertEqual(x.children, [
  1078. 'x',
  1079. Tree('ab', ['a', Tree('ab', ['a', 'b']), 'b']),
  1080. 'y'])
  1081. def test_multi_import(self):
  1082. grammar = """
  1083. start: NUMBER WORD
  1084. %import common (NUMBER, WORD, WS)
  1085. %ignore WS
  1086. """
  1087. l = _Lark(grammar)
  1088. x = l.parse('12 toucans')
  1089. self.assertEqual(x.children, ['12', 'toucans'])
  1090. def test_relative_multi_import(self):
  1091. l = _Lark_open("test_relative_multi_import.lark", rel_to=__file__)
  1092. x = l.parse('12 capybaras')
  1093. self.assertEqual(x.children, ['12', 'capybaras'])
  1094. def test_relative_import_preserves_leading_underscore(self):
  1095. l = _Lark_open("test_relative_import_preserves_leading_underscore.lark", rel_to=__file__)
  1096. x = l.parse('Ax')
  1097. self.assertEqual(next(x.find_data('c')).children, ['A'])
  1098. def test_relative_import_of_nested_grammar(self):
  1099. l = _Lark_open("grammars/test_relative_import_of_nested_grammar.lark", rel_to=__file__)
  1100. x = l.parse('N')
  1101. self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])
  1102. def test_relative_import_rules_dependencies_imported_only_once(self):
  1103. l = _Lark_open("test_relative_import_rules_dependencies_imported_only_once.lark", rel_to=__file__)
  1104. x = l.parse('AAA')
  1105. self.assertEqual(next(x.find_data('a')).children, ['A'])
  1106. self.assertEqual(next(x.find_data('b')).children, ['A'])
  1107. self.assertEqual(next(x.find_data('d')).children, ['A'])
  1108. def test_import_errors(self):
  1109. grammar = """
  1110. start: NUMBER WORD
  1111. %import .grammars.bad_test.NUMBER
  1112. """
  1113. self.assertRaises(IOError, _Lark, grammar)
  1114. grammar = """
  1115. start: NUMBER WORD
  1116. %import bad_test.NUMBER
  1117. """
  1118. self.assertRaises(IOError, _Lark, grammar)
  1119. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  1120. def test_earley_prioritization(self):
  1121. "Tests effect of priority on result"
  1122. grammar = """
  1123. start: a | b
  1124. a.1: "a"
  1125. b.2: "a"
  1126. """
  1127. # l = Lark(grammar, parser='earley', lexer='standard')
  1128. l = _Lark(grammar)
  1129. res = l.parse("a")
  1130. self.assertEqual(res.children[0].data, 'b')
  1131. grammar = """
  1132. start: a | b
  1133. a.2: "a"
  1134. b.1: "a"
  1135. """
  1136. l = _Lark(grammar)
  1137. # l = Lark(grammar, parser='earley', lexer='standard')
  1138. res = l.parse("a")
  1139. self.assertEqual(res.children[0].data, 'a')
  1140. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  1141. def test_earley_prioritization_sum(self):
  1142. "Tests effect of priority on result"
  1143. grammar = """
  1144. start: ab_ b_ a_ | indirection
  1145. indirection: a_ bb_ a_
  1146. a_: "a"
  1147. b_: "b"
  1148. ab_: "ab"
  1149. bb_.1: "bb"
  1150. """
  1151. l = Lark(grammar, priority="invert")
  1152. res = l.parse('abba')
  1153. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  1154. grammar = """
  1155. start: ab_ b_ a_ | indirection
  1156. indirection: a_ bb_ a_
  1157. a_: "a"
  1158. b_: "b"
  1159. ab_.1: "ab"
  1160. bb_: "bb"
  1161. """
  1162. l = Lark(grammar, priority="invert")
  1163. res = l.parse('abba')
  1164. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1165. grammar = """
  1166. start: ab_ b_ a_ | indirection
  1167. indirection: a_ bb_ a_
  1168. a_.2: "a"
  1169. b_.1: "b"
  1170. ab_.3: "ab"
  1171. bb_.3: "bb"
  1172. """
  1173. l = Lark(grammar, priority="invert")
  1174. res = l.parse('abba')
  1175. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  1176. grammar = """
  1177. start: ab_ b_ a_ | indirection
  1178. indirection: a_ bb_ a_
  1179. a_.1: "a"
  1180. b_.1: "b"
  1181. ab_.4: "ab"
  1182. bb_.3: "bb"
  1183. """
  1184. l = Lark(grammar, priority="invert")
  1185. res = l.parse('abba')
  1186. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1187. def test_utf8(self):
  1188. g = u"""start: a
  1189. a: "±a"
  1190. """
  1191. l = _Lark(g)
  1192. self.assertEqual(l.parse(u'±a'), Tree('start', [Tree('a', [])]))
  1193. g = u"""start: A
  1194. A: "±a"
  1195. """
  1196. l = _Lark(g)
  1197. self.assertEqual(l.parse(u'±a'), Tree('start', [u'\xb1a']))
  1198. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  1199. def test_ignore(self):
  1200. grammar = r"""
  1201. COMMENT: /(!|(\/\/))[^\n]*/
  1202. %ignore COMMENT
  1203. %import common.WS -> _WS
  1204. %import common.INT
  1205. start: "INT"i _WS+ INT _WS*
  1206. """
  1207. parser = _Lark(grammar)
  1208. tree = parser.parse("int 1 ! This is a comment\n")
  1209. self.assertEqual(tree.children, ['1'])
  1210. tree = parser.parse("int 1 ! This is a comment") # A trailing ignore token can be tricky!
  1211. self.assertEqual(tree.children, ['1'])
  1212. parser = _Lark(r"""
  1213. start : "a"*
  1214. %ignore "b"
  1215. """)
  1216. tree = parser.parse("bb")
  1217. self.assertEqual(tree.children, [])
  1218. def test_regex_escaping(self):
  1219. g = _Lark("start: /[ab]/")
  1220. g.parse('a')
  1221. g.parse('b')
  1222. self.assertRaises( UnexpectedInput, g.parse, 'c')
  1223. _Lark(r'start: /\w/').parse('a')
  1224. g = _Lark(r'start: /\\w/')
  1225. self.assertRaises( UnexpectedInput, g.parse, 'a')
  1226. g.parse(r'\w')
  1227. _Lark(r'start: /\[/').parse('[')
  1228. _Lark(r'start: /\//').parse('/')
  1229. _Lark(r'start: /\\/').parse('\\')
  1230. _Lark(r'start: /\[ab]/').parse('[ab]')
  1231. _Lark(r'start: /\\[ab]/').parse('\\a')
  1232. _Lark(r'start: /\t/').parse('\t')
  1233. _Lark(r'start: /\\t/').parse('\\t')
  1234. _Lark(r'start: /\\\t/').parse('\\\t')
  1235. _Lark(r'start: "\t"').parse('\t')
  1236. _Lark(r'start: "\\t"').parse('\\t')
  1237. _Lark(r'start: "\\\t"').parse('\\\t')
  1238. def test_ranged_repeat_rules(self):
  1239. g = u"""!start: "A"~3
  1240. """
  1241. l = _Lark(g)
  1242. self.assertEqual(l.parse(u'AAA'), Tree('start', ["A", "A", "A"]))
  1243. self.assertRaises(ParseError, l.parse, u'AA')
  1244. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1245. g = u"""!start: "A"~0..2
  1246. """
  1247. if PARSER != 'cyk': # XXX CYK currently doesn't support empty grammars
  1248. l = _Lark(g)
  1249. self.assertEqual(l.parse(u''), Tree('start', []))
  1250. self.assertEqual(l.parse(u'A'), Tree('start', ['A']))
  1251. self.assertEqual(l.parse(u'AA'), Tree('start', ['A', 'A']))
  1252. self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'AAA')
  1253. g = u"""!start: "A"~3..2
  1254. """
  1255. self.assertRaises(GrammarError, _Lark, g)
  1256. g = u"""!start: "A"~2..3 "B"~2
  1257. """
  1258. l = _Lark(g)
  1259. self.assertEqual(l.parse(u'AABB'), Tree('start', ['A', 'A', 'B', 'B']))
  1260. self.assertEqual(l.parse(u'AAABB'), Tree('start', ['A', 'A', 'A', 'B', 'B']))
  1261. self.assertRaises(ParseError, l.parse, u'AAAB')
  1262. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1263. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1264. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1265. def test_ranged_repeat_terms(self):
  1266. g = u"""!start: AAA
  1267. AAA: "A"~3
  1268. """
  1269. l = _Lark(g)
  1270. self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"]))
  1271. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA')
  1272. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1273. g = u"""!start: AABB CC
  1274. AABB: "A"~0..2 "B"~2
  1275. CC: "C"~1..2
  1276. """
  1277. l = _Lark(g)
  1278. self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC']))
  1279. self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C']))
  1280. self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC']))
  1281. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB')
  1282. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1283. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1284. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1285. @unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX
  1286. def test_priority_vs_embedded(self):
  1287. g = """
  1288. A.2: "a"
  1289. WORD: ("a".."z")+
  1290. start: (A | WORD)+
  1291. """
  1292. l = _Lark(g)
  1293. t = l.parse('abc')
  1294. self.assertEqual(t.children, ['a', 'bc'])
  1295. self.assertEqual(t.children[0].type, 'A')
  1296. def test_line_counting(self):
  1297. p = _Lark("start: /[^x]+/")
  1298. text = 'hello\nworld'
  1299. t = p.parse(text)
  1300. tok = t.children[0]
  1301. self.assertEqual(tok, text)
  1302. self.assertEqual(tok.line, 1)
  1303. self.assertEqual(tok.column, 1)
  1304. if _LEXER != 'dynamic':
  1305. self.assertEqual(tok.end_line, 2)
  1306. self.assertEqual(tok.end_column, 6)
  1307. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1308. def test_empty_end(self):
  1309. p = _Lark("""
  1310. start: b c d
  1311. b: "B"
  1312. c: | "C"
  1313. d: | "D"
  1314. """)
  1315. res = p.parse('B')
  1316. self.assertEqual(len(res.children), 3)
  1317. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1318. def test_maybe_placeholders(self):
  1319. # Anonymous tokens shouldn't count
  1320. p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1321. self.assertEqual(p.parse("").children, [])
  1322. # All invisible constructs shouldn't count
  1323. p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c]
  1324. A: "a"
  1325. _c: "c" """, maybe_placeholders=True)
  1326. self.assertEqual(p.parse("").children, [None])
  1327. self.assertEqual(p.parse("c").children, [None])
  1328. self.assertEqual(p.parse("aefc").children, ['a'])
  1329. # ? shouldn't apply
  1330. p = _Lark("""!start: ["a"] "b"? ["c"] """, maybe_placeholders=True)
  1331. self.assertEqual(p.parse("").children, [None, None])
  1332. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1333. p = _Lark("""!start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1334. self.assertEqual(p.parse("").children, [None, None, None])
  1335. self.assertEqual(p.parse("a").children, ['a', None, None])
  1336. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1337. self.assertEqual(p.parse("c").children, [None, None, 'c'])
  1338. self.assertEqual(p.parse("ab").children, ['a', 'b', None])
  1339. self.assertEqual(p.parse("ac").children, ['a', None, 'c'])
  1340. self.assertEqual(p.parse("bc").children, [None, 'b', 'c'])
  1341. self.assertEqual(p.parse("abc").children, ['a', 'b', 'c'])
  1342. p = _Lark("""!start: (["a"] "b" ["c"])+ """, maybe_placeholders=True)
  1343. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1344. self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None])
  1345. self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c'])
  1346. self.assertEqual(p.parse("babbcabcb").children,
  1347. [None, 'b', None,
  1348. 'a', 'b', None,
  1349. None, 'b', 'c',
  1350. 'a', 'b', 'c',
  1351. None, 'b', None])
  1352. p = _Lark("""!start: ["a"] ["c"] "b"+ ["a"] ["d"] """, maybe_placeholders=True)
  1353. self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None])
  1354. self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd'])
  1355. self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None])
  1356. self.assertEqual(p.parse("cbbbb").children, [None, 'c', 'b', 'b', 'b', 'b', None, None])
  1357. def test_escaped_string(self):
  1358. "Tests common.ESCAPED_STRING"
  1359. grammar = r"""
  1360. start: ESCAPED_STRING+
  1361. %import common (WS_INLINE, ESCAPED_STRING)
  1362. %ignore WS_INLINE
  1363. """
  1364. parser = _Lark(grammar)
  1365. parser.parse(r'"\\" "b" "c"')
  1366. parser.parse(r'"That" "And a \"b"')
  1367. def test_meddling_unused(self):
  1368. "Unless 'unused' is removed, LALR analysis will fail on reduce-reduce collision"
  1369. grammar = """
  1370. start: EKS* x
  1371. x: EKS
  1372. unused: x*
  1373. EKS: "x"
  1374. """
  1375. parser = _Lark(grammar)
  1376. @unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
  1377. def test_serialize(self):
  1378. grammar = """
  1379. start: _ANY b "C"
  1380. _ANY: /./
  1381. b: "B"
  1382. """
  1383. parser = _Lark(grammar)
  1384. d = parser.serialize()
  1385. parser2 = Lark.deserialize(d, {}, {})
  1386. self.assertEqual(parser2.parse('ABC'), Tree('start', [Tree('b', [])]) )
  1387. namespace = {'Rule': Rule, 'TerminalDef': TerminalDef}
  1388. d, m = parser.memo_serialize(namespace.values())
  1389. parser3 = Lark.deserialize(d, namespace, m)
  1390. self.assertEqual(parser3.parse('ABC'), Tree('start', [Tree('b', [])]) )
  1391. def test_multi_start(self):
  1392. parser = _Lark('''
  1393. a: "x" "a"?
  1394. b: "x" "b"?
  1395. ''', start=['a', 'b'])
  1396. self.assertEqual(parser.parse('xa', 'a'), Tree('a', []))
  1397. self.assertEqual(parser.parse('xb', 'b'), Tree('b', []))
  1398. def test_lexer_detect_newline_tokens(self):
  1399. # Detect newlines in regular tokens
  1400. g = _Lark(r"""start: "go" tail*
  1401. !tail : SA "@" | SB "@" | SC "@" | SD "@"
  1402. SA : "a" /\n/
  1403. SB : /b./s
  1404. SC : "c" /[^a-z]/
  1405. SD : "d" /\s/
  1406. """)
  1407. a,b,c,d = [x.children[1] for x in g.parse('goa\n@b\n@c\n@d\n@').children]
  1408. self.assertEqual(a.line, 2)
  1409. self.assertEqual(b.line, 3)
  1410. self.assertEqual(c.line, 4)
  1411. self.assertEqual(d.line, 5)
  1412. # Detect newlines in ignored tokens
  1413. for re in ['/\\n/', '/[^a-z]/', '/\\s/']:
  1414. g = _Lark('''!start: "a" "a"
  1415. %ignore {}'''.format(re))
  1416. a, b = g.parse('a\na').children
  1417. self.assertEqual(a.line, 1)
  1418. self.assertEqual(b.line, 2)
  1419. _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()
  1420. _TestParser.__name__ = _NAME
  1421. _TestParser.__qualname__ = "tests.test_parser." + _NAME
  1422. globals()[_NAME] = _TestParser
  1423. # Note: You still have to import them in __main__ for the tests to run
  1424. _TO_TEST = [
  1425. ('standard', 'earley'),
  1426. ('standard', 'cyk'),
  1427. ('dynamic', 'earley'),
  1428. ('dynamic_complete', 'earley'),
  1429. ('standard', 'lalr'),
  1430. ('contextual', 'lalr'),
  1431. ('custom', 'lalr'),
  1432. # (None, 'earley'),
  1433. ]
  1434. for _LEXER, _PARSER in _TO_TEST:
  1435. _make_parser_test(_LEXER, _PARSER)
  1436. for _LEXER in ('dynamic', 'dynamic_complete'):
  1437. _make_full_earley_test(_LEXER)
  1438. if __name__ == '__main__':
  1439. unittest.main()