This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1806 lines
61 KiB

  1. # -*- coding: utf-8 -*-
  2. from __future__ import absolute_import
  3. import re
  4. import unittest
  5. import logging
  6. import os
  7. import sys
  8. from copy import deepcopy
  9. try:
  10. from cStringIO import StringIO as cStringIO
  11. except ImportError:
  12. # Available only in Python 2.x, 3.x only has io.StringIO from below
  13. cStringIO = None
  14. from io import (
  15. StringIO as uStringIO,
  16. BytesIO,
  17. open,
  18. )
  19. logging.basicConfig(level=logging.INFO)
  20. from lark.lark import Lark
  21. from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
  22. from lark.tree import Tree
  23. from lark.visitors import Transformer, Transformer_InPlace, v_args
  24. from lark.grammar import Rule
  25. from lark.lexer import TerminalDef, Lexer, TraditionalLexer
  26. __path__ = os.path.dirname(__file__)
  27. def _read(n, *args):
  28. with open(os.path.join(__path__, n), *args) as f:
  29. return f.read()
  30. class TestParsers(unittest.TestCase):
  31. def test_same_ast(self):
  32. "Tests that Earley and LALR parsers produce equal trees"
  33. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  34. name_list: NAME | name_list "," NAME
  35. NAME: /\w+/ """, parser='lalr')
  36. l = g.parse('(a,b,c,*x)')
  37. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  38. name_list: NAME | name_list "," NAME
  39. NAME: /\w/+ """)
  40. l2 = g.parse('(a,b,c,*x)')
  41. assert l == l2, '%s != %s' % (l.pretty(), l2.pretty())
  42. def test_infinite_recurse(self):
  43. g = """start: a
  44. a: a | "a"
  45. """
  46. self.assertRaises(GrammarError, Lark, g, parser='lalr')
  47. # TODO: should it? shouldn't it?
  48. # l = Lark(g, parser='earley', lexer='dynamic')
  49. # self.assertRaises(ParseError, l.parse, 'a')
  50. def test_propagate_positions(self):
  51. g = Lark("""start: a
  52. a: "a"
  53. """, propagate_positions=True)
  54. r = g.parse('a')
  55. self.assertEqual( r.children[0].meta.line, 1 )
  56. g = Lark("""start: x
  57. x: a
  58. a: "a"
  59. """, propagate_positions=True)
  60. r = g.parse('a')
  61. self.assertEqual( r.children[0].meta.line, 1 )
  62. def test_expand1(self):
  63. g = Lark("""start: a
  64. ?a: b
  65. b: "x"
  66. """)
  67. r = g.parse('x')
  68. self.assertEqual( r.children[0].data, "b" )
  69. g = Lark("""start: a
  70. ?a: b -> c
  71. b: "x"
  72. """)
  73. r = g.parse('x')
  74. self.assertEqual( r.children[0].data, "c" )
  75. g = Lark("""start: a
  76. ?a: B -> c
  77. B: "x"
  78. """)
  79. self.assertEqual( r.children[0].data, "c" )
  80. g = Lark("""start: a
  81. ?a: b b -> c
  82. b: "x"
  83. """)
  84. r = g.parse('xx')
  85. self.assertEqual( r.children[0].data, "c" )
  86. def test_comment_in_rule_definition(self):
  87. g = Lark("""start: a
  88. a: "a"
  89. // A comment
  90. // Another comment
  91. | "b"
  92. // Still more
  93. c: "unrelated"
  94. """)
  95. r = g.parse('b')
  96. self.assertEqual( r.children[0].data, "a" )
  97. def test_visit_tokens(self):
  98. class T(Transformer):
  99. def a(self, children):
  100. return children[0] + "!"
  101. def A(self, tok):
  102. return tok.update(value=tok.upper())
  103. # Test regular
  104. g = """start: a
  105. a : A
  106. A: "x"
  107. """
  108. p = Lark(g, parser='lalr')
  109. r = T(False).transform(p.parse("x"))
  110. self.assertEqual( r.children, ["x!"] )
  111. r = T().transform(p.parse("x"))
  112. self.assertEqual( r.children, ["X!"] )
  113. # Test internal transformer
  114. p = Lark(g, parser='lalr', transformer=T())
  115. r = p.parse("x")
  116. self.assertEqual( r.children, ["X!"] )
  117. def test_vargs_meta(self):
  118. @v_args(meta=True)
  119. class T1(Transformer):
  120. def a(self, children, meta):
  121. assert not children
  122. return meta.line
  123. def start(self, children, meta):
  124. return children
  125. @v_args(meta=True, inline=True)
  126. class T2(Transformer):
  127. def a(self, meta):
  128. return meta.line
  129. def start(self, meta, *res):
  130. return list(res)
  131. for T in (T1, T2):
  132. for internal in [False, True]:
  133. try:
  134. g = Lark(r"""start: a+
  135. a : "x" _NL?
  136. _NL: /\n/+
  137. """, parser='lalr', transformer=T() if internal else None, propagate_positions=True)
  138. except NotImplementedError:
  139. assert internal
  140. continue
  141. res = g.parse("xx\nx\nxxx\n\n\nxx")
  142. assert not internal
  143. res = T().transform(res)
  144. self.assertEqual(res, [1, 1, 2, 3, 3, 3, 6, 6])
  145. def test_vargs_tree(self):
  146. tree = Lark('''
  147. start: a a a
  148. !a: "A"
  149. ''').parse('AAA')
  150. tree_copy = deepcopy(tree)
  151. @v_args(tree=True)
  152. class T(Transformer):
  153. def a(self, tree):
  154. return 1
  155. def start(self, tree):
  156. return tree.children
  157. res = T().transform(tree)
  158. self.assertEqual(res, [1, 1, 1])
  159. self.assertEqual(tree, tree_copy)
  160. def test_embedded_transformer(self):
  161. class T(Transformer):
  162. def a(self, children):
  163. return "<a>"
  164. def b(self, children):
  165. return "<b>"
  166. def c(self, children):
  167. return "<c>"
  168. # Test regular
  169. g = Lark("""start: a
  170. a : "x"
  171. """, parser='lalr')
  172. r = T().transform(g.parse("x"))
  173. self.assertEqual( r.children, ["<a>"] )
  174. g = Lark("""start: a
  175. a : "x"
  176. """, parser='lalr', transformer=T())
  177. r = g.parse("x")
  178. self.assertEqual( r.children, ["<a>"] )
  179. # Test Expand1
  180. g = Lark("""start: a
  181. ?a : b
  182. b : "x"
  183. """, parser='lalr')
  184. r = T().transform(g.parse("x"))
  185. self.assertEqual( r.children, ["<b>"] )
  186. g = Lark("""start: a
  187. ?a : b
  188. b : "x"
  189. """, parser='lalr', transformer=T())
  190. r = g.parse("x")
  191. self.assertEqual( r.children, ["<b>"] )
  192. # Test Expand1 -> Alias
  193. g = Lark("""start: a
  194. ?a : b b -> c
  195. b : "x"
  196. """, parser='lalr')
  197. r = T().transform(g.parse("xx"))
  198. self.assertEqual( r.children, ["<c>"] )
  199. g = Lark("""start: a
  200. ?a : b b -> c
  201. b : "x"
  202. """, parser='lalr', transformer=T())
  203. r = g.parse("xx")
  204. self.assertEqual( r.children, ["<c>"] )
  205. def test_embedded_transformer_inplace(self):
  206. @v_args(tree=True)
  207. class T1(Transformer_InPlace):
  208. def a(self, tree):
  209. assert isinstance(tree, Tree), tree
  210. tree.children.append("tested")
  211. return tree
  212. def b(self, tree):
  213. return Tree(tree.data, tree.children + ['tested2'])
  214. @v_args(tree=True)
  215. class T2(Transformer):
  216. def a(self, tree):
  217. assert isinstance(tree, Tree), tree
  218. tree.children.append("tested")
  219. return tree
  220. def b(self, tree):
  221. return Tree(tree.data, tree.children + ['tested2'])
  222. class T3(Transformer):
  223. @v_args(tree=True)
  224. def a(self, tree):
  225. assert isinstance(tree, Tree)
  226. tree.children.append("tested")
  227. return tree
  228. @v_args(tree=True)
  229. def b(self, tree):
  230. return Tree(tree.data, tree.children + ['tested2'])
  231. for t in [T1(), T2(), T3()]:
  232. for internal in [False, True]:
  233. g = Lark("""start: a b
  234. a : "x"
  235. b : "y"
  236. """, parser='lalr', transformer=t if internal else None)
  237. r = g.parse("xy")
  238. if not internal:
  239. r = t.transform(r)
  240. a, b = r.children
  241. self.assertEqual(a.children, ["tested"])
  242. self.assertEqual(b.children, ["tested2"])
  243. def test_alias(self):
  244. Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """)
  245. def _make_full_earley_test(LEXER):
  246. def _Lark(grammar, **kwargs):
  247. return Lark(grammar, lexer=LEXER, parser='earley', propagate_positions=True, **kwargs)
  248. class _TestFullEarley(unittest.TestCase):
  249. def test_anon(self):
  250. # Fails an Earley implementation without special handling for empty rules,
  251. # or re-processing of already completed rules.
  252. g = Lark(r"""start: B
  253. B: ("ab"|/[^b]/)+
  254. """, lexer=LEXER)
  255. self.assertEqual( g.parse('abc').children[0], 'abc')
  256. def test_earley(self):
  257. g = Lark("""start: A "b" c
  258. A: "a"+
  259. c: "abc"
  260. """, parser="earley", lexer=LEXER)
  261. x = g.parse('aaaababc')
  262. def test_earley2(self):
  263. grammar = """
  264. start: statement+
  265. statement: "r"
  266. | "c" /[a-z]/+
  267. %ignore " "
  268. """
  269. program = """c b r"""
  270. l = Lark(grammar, parser='earley', lexer=LEXER)
  271. l.parse(program)
  272. @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser")
  273. def test_earley3(self):
  274. """Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)
  275. By default, `+` should immitate regexp greedy-matching
  276. """
  277. grammar = """
  278. start: A A
  279. A: "a"+
  280. """
  281. l = Lark(grammar, parser='earley', lexer=LEXER)
  282. res = l.parse("aaa")
  283. self.assertEqual(set(res.children), {'aa', 'a'})
  284. # XXX TODO fix Earley to maintain correct order
  285. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  286. # self.assertEqual(res.children, ['aa', 'a'])
  287. def test_earley4(self):
  288. grammar = """
  289. start: A A?
  290. A: "a"+
  291. """
  292. l = Lark(grammar, parser='earley', lexer=LEXER)
  293. res = l.parse("aaa")
  294. assert set(res.children) == {'aa', 'a'} or res.children == ['aaa']
  295. # XXX TODO fix Earley to maintain correct order
  296. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  297. # self.assertEqual(res.children, ['aaa'])
  298. def test_earley_repeating_empty(self):
  299. # This was a sneaky bug!
  300. grammar = """
  301. !start: "a" empty empty "b"
  302. empty: empty2
  303. empty2:
  304. """
  305. parser = Lark(grammar, parser='earley', lexer=LEXER)
  306. res = parser.parse('ab')
  307. empty_tree = Tree('empty', [Tree('empty2', [])])
  308. self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b'])
  309. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  310. def test_earley_explicit_ambiguity(self):
  311. # This was a sneaky bug!
  312. grammar = """
  313. start: a b | ab
  314. a: "a"
  315. b: "b"
  316. ab: "ab"
  317. """
  318. parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit')
  319. ambig_tree = parser.parse('ab')
  320. self.assertEqual( ambig_tree.data, '_ambig')
  321. self.assertEqual( len(ambig_tree.children), 2)
  322. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  323. def test_ambiguity1(self):
  324. grammar = """
  325. start: cd+ "e"
  326. !cd: "c"
  327. | "d"
  328. | "cd"
  329. """
  330. l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
  331. ambig_tree = l.parse('cde')
  332. assert ambig_tree.data == '_ambig', ambig_tree
  333. assert len(ambig_tree.children) == 2
  334. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  335. def test_ambiguity2(self):
  336. grammar = """
  337. ANY: /[a-zA-Z0-9 ]+/
  338. a.2: "A" b+
  339. b.2: "B"
  340. c: ANY
  341. start: (a|c)*
  342. """
  343. l = Lark(grammar, parser='earley', lexer=LEXER)
  344. res = l.parse('ABX')
  345. expected = Tree('start', [
  346. Tree('a', [
  347. Tree('b', [])
  348. ]),
  349. Tree('c', [
  350. 'X'
  351. ])
  352. ])
  353. self.assertEqual(res, expected)
  354. def test_fruitflies_ambig(self):
  355. grammar = """
  356. start: noun verb noun -> simple
  357. | noun verb "like" noun -> comparative
  358. noun: adj? NOUN
  359. verb: VERB
  360. adj: ADJ
  361. NOUN: "flies" | "bananas" | "fruit"
  362. VERB: "like" | "flies"
  363. ADJ: "fruit"
  364. %import common.WS
  365. %ignore WS
  366. """
  367. parser = Lark(grammar, ambiguity='explicit', lexer=LEXER)
  368. tree = parser.parse('fruit flies like bananas')
  369. expected = Tree('_ambig', [
  370. Tree('comparative', [
  371. Tree('noun', ['fruit']),
  372. Tree('verb', ['flies']),
  373. Tree('noun', ['bananas'])
  374. ]),
  375. Tree('simple', [
  376. Tree('noun', [Tree('adj', ['fruit']), 'flies']),
  377. Tree('verb', ['like']),
  378. Tree('noun', ['bananas'])
  379. ])
  380. ])
  381. # self.assertEqual(tree, expected)
  382. self.assertEqual(tree.data, expected.data)
  383. self.assertEqual(set(tree.children), set(expected.children))
  384. @unittest.skipIf(LEXER!='dynamic_complete', "Only relevant for the dynamic_complete parser")
  385. def test_explicit_ambiguity2(self):
  386. grammar = r"""
  387. start: NAME+
  388. NAME: /\w+/
  389. %ignore " "
  390. """
  391. text = """cat"""
  392. parser = _Lark(grammar, start='start', ambiguity='explicit')
  393. tree = parser.parse(text)
  394. self.assertEqual(tree.data, '_ambig')
  395. combinations = {tuple(str(s) for s in t.children) for t in tree.children}
  396. self.assertEqual(combinations, {
  397. ('cat',),
  398. ('ca', 't'),
  399. ('c', 'at'),
  400. ('c', 'a' ,'t')
  401. })
  402. def test_term_ambig_resolve(self):
  403. grammar = r"""
  404. !start: NAME+
  405. NAME: /\w+/
  406. %ignore " "
  407. """
  408. text = """foo bar"""
  409. parser = Lark(grammar)
  410. tree = parser.parse(text)
  411. self.assertEqual(tree.children, ['foo', 'bar'])
  412. # @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO
  413. # def test_not_all_derivations(self):
  414. # grammar = """
  415. # start: cd+ "e"
  416. # !cd: "c"
  417. # | "d"
  418. # | "cd"
  419. # """
  420. # l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False)
  421. # x = l.parse('cde')
  422. # assert x.data != '_ambig', x
  423. # assert len(x.children) == 1
  424. _NAME = "TestFullEarley" + LEXER.capitalize()
  425. _TestFullEarley.__name__ = _NAME
  426. globals()[_NAME] = _TestFullEarley
  427. class CustomLexer(Lexer):
  428. """
  429. Purpose of this custom lexer is to test the integration,
  430. so it uses the traditionalparser as implementation without custom lexing behaviour.
  431. """
  432. def __init__(self, lexer_conf):
  433. self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks, g_regex_flags=lexer_conf.g_regex_flags)
  434. def lex(self, *args, **kwargs):
  435. return self.lexer.lex(*args, **kwargs)
  436. def _make_parser_test(LEXER, PARSER):
  437. lexer_class_or_name = CustomLexer if LEXER == 'custom' else LEXER
  438. def _Lark(grammar, **kwargs):
  439. return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  440. def _Lark_open(gfilename, **kwargs):
  441. return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  442. class _TestParser(unittest.TestCase):
  443. def test_basic1(self):
  444. g = _Lark("""start: a+ b a* "b" a*
  445. b: "b"
  446. a: "a"
  447. """)
  448. r = g.parse('aaabaab')
  449. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' )
  450. r = g.parse('aaabaaba')
  451. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaaa' )
  452. self.assertRaises(ParseError, g.parse, 'aaabaa')
  453. def test_basic2(self):
  454. # Multiple parsers and colliding tokens
  455. g = _Lark("""start: B A
  456. B: "12"
  457. A: "1" """)
  458. g2 = _Lark("""start: B A
  459. B: "12"
  460. A: "2" """)
  461. x = g.parse('121')
  462. assert x.data == 'start' and x.children == ['12', '1'], x
  463. x = g2.parse('122')
  464. assert x.data == 'start' and x.children == ['12', '2'], x
  465. @unittest.skipIf(cStringIO is None, "cStringIO not available")
  466. def test_stringio_bytes(self):
  467. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  468. _Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  469. def test_stringio_unicode(self):
  470. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  471. _Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  472. def test_unicode(self):
  473. g = _Lark(u"""start: UNIA UNIB UNIA
  474. UNIA: /\xa3/
  475. UNIB: /\u0101/
  476. """)
  477. g.parse(u'\xa3\u0101\u00a3')
  478. def test_unicode2(self):
  479. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  480. UNIA: /\xa3/
  481. UNIB: "a\u0101b\ "
  482. UNIC: /a?\u0101c\n/
  483. """)
  484. g.parse(u'\xa3a\u0101b\\ \u00a3\u0101c\n')
  485. def test_unicode3(self):
  486. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  487. UNIA: /\xa3/
  488. UNIB: "\u0101"
  489. UNIC: /\u0203/ /\n/
  490. """)
  491. g.parse(u'\xa3\u0101\u00a3\u0203\n')
  492. def test_hex_escape(self):
  493. g = _Lark(r"""start: A B C
  494. A: "\x01"
  495. B: /\x02/
  496. C: "\xABCD"
  497. """)
  498. g.parse('\x01\x02\xABCD')
  499. def test_unicode_literal_range_escape(self):
  500. g = _Lark(r"""start: A+
  501. A: "\u0061".."\u0063"
  502. """)
  503. g.parse('abc')
  504. def test_hex_literal_range_escape(self):
  505. g = _Lark(r"""start: A+
  506. A: "\x01".."\x03"
  507. """)
  508. g.parse('\x01\x02\x03')
  509. @unittest.skipIf(PARSER == 'cyk', "Takes forever")
  510. def test_stack_for_ebnf(self):
  511. """Verify that stack depth isn't an issue for EBNF grammars"""
  512. g = _Lark(r"""start: a+
  513. a : "a" """)
  514. g.parse("a" * (sys.getrecursionlimit()*2 ))
  515. def test_expand1_lists_with_one_item(self):
  516. g = _Lark(r"""start: list
  517. ?list: item+
  518. item : A
  519. A: "a"
  520. """)
  521. r = g.parse("a")
  522. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  523. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  524. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  525. self.assertEqual(len(r.children), 1)
  526. def test_expand1_lists_with_one_item_2(self):
  527. g = _Lark(r"""start: list
  528. ?list: item+ "!"
  529. item : A
  530. A: "a"
  531. """)
  532. r = g.parse("a!")
  533. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  534. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  535. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  536. self.assertEqual(len(r.children), 1)
  537. def test_dont_expand1_lists_with_multiple_items(self):
  538. g = _Lark(r"""start: list
  539. ?list: item+
  540. item : A
  541. A: "a"
  542. """)
  543. r = g.parse("aa")
  544. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  545. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  546. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  547. self.assertEqual(len(r.children), 1)
  548. # Sanity check: verify that 'list' contains the two 'item's we've given it
  549. [list] = r.children
  550. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  551. def test_dont_expand1_lists_with_multiple_items_2(self):
  552. g = _Lark(r"""start: list
  553. ?list: item+ "!"
  554. item : A
  555. A: "a"
  556. """)
  557. r = g.parse("aa!")
  558. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  559. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  560. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  561. self.assertEqual(len(r.children), 1)
  562. # Sanity check: verify that 'list' contains the two 'item's we've given it
  563. [list] = r.children
  564. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  565. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  566. def test_empty_expand1_list(self):
  567. g = _Lark(r"""start: list
  568. ?list: item*
  569. item : A
  570. A: "a"
  571. """)
  572. r = g.parse("")
  573. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  574. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  575. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  576. self.assertEqual(len(r.children), 1)
  577. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  578. [list] = r.children
  579. self.assertSequenceEqual([item.data for item in list.children], ())
  580. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  581. def test_empty_expand1_list_2(self):
  582. g = _Lark(r"""start: list
  583. ?list: item* "!"?
  584. item : A
  585. A: "a"
  586. """)
  587. r = g.parse("")
  588. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  589. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  590. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  591. self.assertEqual(len(r.children), 1)
  592. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  593. [list] = r.children
  594. self.assertSequenceEqual([item.data for item in list.children], ())
  595. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  596. def test_empty_flatten_list(self):
  597. g = _Lark(r"""start: list
  598. list: | item "," list
  599. item : A
  600. A: "a"
  601. """)
  602. r = g.parse("")
  603. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  604. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  605. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  606. [list] = r.children
  607. self.assertSequenceEqual([item.data for item in list.children], ())
  608. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  609. def test_single_item_flatten_list(self):
  610. g = _Lark(r"""start: list
  611. list: | item "," list
  612. item : A
  613. A: "a"
  614. """)
  615. r = g.parse("a,")
  616. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  617. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  618. # Sanity check: verify that 'list' contains exactly the one 'item' we've given it
  619. [list] = r.children
  620. self.assertSequenceEqual([item.data for item in list.children], ('item',))
  621. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  622. def test_multiple_item_flatten_list(self):
  623. g = _Lark(r"""start: list
  624. #list: | item "," list
  625. item : A
  626. A: "a"
  627. """)
  628. r = g.parse("a,a,")
  629. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  630. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  631. # Sanity check: verify that 'list' contains exactly the two 'item's we've given it
  632. [list] = r.children
  633. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  634. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  635. def test_recurse_flatten(self):
  636. """Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""
  637. g = _Lark(r"""start: a | start a
  638. a : A
  639. A : "a" """)
  640. # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
  641. # STree data structures, which uses recursion).
  642. g.parse("a" * (sys.getrecursionlimit() // 4))
  643. def test_token_collision(self):
  644. g = _Lark(r"""start: "Hello" NAME
  645. NAME: /\w/+
  646. %ignore " "
  647. """)
  648. x = g.parse('Hello World')
  649. self.assertSequenceEqual(x.children, ['World'])
  650. x = g.parse('Hello HelloWorld')
  651. self.assertSequenceEqual(x.children, ['HelloWorld'])
  652. def test_token_collision_WS(self):
  653. g = _Lark(r"""start: "Hello" NAME
  654. NAME: /\w/+
  655. %import common.WS
  656. %ignore WS
  657. """)
  658. x = g.parse('Hello World')
  659. self.assertSequenceEqual(x.children, ['World'])
  660. x = g.parse('Hello HelloWorld')
  661. self.assertSequenceEqual(x.children, ['HelloWorld'])
  662. def test_token_collision2(self):
  663. g = _Lark("""
  664. !start: "starts"
  665. %import common.LCASE_LETTER
  666. """)
  667. x = g.parse("starts")
  668. self.assertSequenceEqual(x.children, ['starts'])
  669. def test_templates(self):
  670. g = _Lark(r"""
  671. start: "[" sep{NUMBER, ","} "]"
  672. sep{item, delim}: item (delim item)*
  673. NUMBER: /\d+/
  674. %ignore " "
  675. """)
  676. x = g.parse("[1, 2, 3, 4]")
  677. self.assertSequenceEqual(x.children, [Tree('sep', ['1', '2', '3', '4'])])
  678. x = g.parse("[1]")
  679. self.assertSequenceEqual(x.children, [Tree('sep', ['1'])])
  680. def test_templates_recursion(self):
  681. g = _Lark(r"""
  682. start: "[" _sep{NUMBER, ","} "]"
  683. _sep{item, delim}: item | _sep{item, delim} delim item
  684. NUMBER: /\d+/
  685. %ignore " "
  686. """)
  687. x = g.parse("[1, 2, 3, 4]")
  688. self.assertSequenceEqual(x.children, ['1', '2', '3', '4'])
  689. x = g.parse("[1]")
  690. self.assertSequenceEqual(x.children, ['1'])
  691. def test_templates_import(self):
  692. g = _Lark_open("test_templates_import.lark", rel_to=__file__)
  693. x = g.parse("[1, 2, 3, 4]")
  694. self.assertSequenceEqual(x.children, [Tree('sep', ['1', '2', '3', '4'])])
  695. x = g.parse("[1]")
  696. self.assertSequenceEqual(x.children, [Tree('sep', ['1'])])
  697. def test_templates_alias(self):
  698. g = _Lark(r"""
  699. start: expr{"C"}
  700. expr{t}: "A" t
  701. | "B" t -> b
  702. """)
  703. x = g.parse("AC")
  704. self.assertSequenceEqual(x.children, [Tree('expr', [])])
  705. x = g.parse("BC")
  706. self.assertSequenceEqual(x.children, [Tree('b', [])])
  707. def test_templates_modifiers(self):
  708. g = _Lark(r"""
  709. start: expr{"B"}
  710. !expr{t}: "A" t
  711. """)
  712. x = g.parse("AB")
  713. self.assertSequenceEqual(x.children, [Tree('expr', ["A", "B"])])
  714. g = _Lark(r"""
  715. start: _expr{"B"}
  716. !_expr{t}: "A" t
  717. """)
  718. x = g.parse("AB")
  719. self.assertSequenceEqual(x.children, ["A", "B"])
  720. g = _Lark(r"""
  721. start: expr{b}
  722. b: "B"
  723. ?expr{t}: "A" t
  724. """)
  725. x = g.parse("AB")
  726. self.assertSequenceEqual(x.children, [Tree('b',[])])
  727. def test_templates_templates(self):
  728. g = _Lark('''start: a{b}
  729. a{t}: t{"a"}
  730. b{x}: x''')
  731. x = g.parse('a')
  732. self.assertSequenceEqual(x.children, [Tree('a', [Tree('b',[])])])
  733. def test_g_regex_flags(self):
  734. g = _Lark("""
  735. start: "a" /b+/ C
  736. C: "C" | D
  737. D: "D" E
  738. E: "e"
  739. """, g_regex_flags=re.I)
  740. x1 = g.parse("ABBc")
  741. x2 = g.parse("abdE")
  742. # def test_string_priority(self):
  743. # g = _Lark("""start: (A | /a?bb/)+
  744. # A: "a" """)
  745. # x = g.parse('abb')
  746. # self.assertEqual(len(x.children), 2)
  747. # # This parse raises an exception because the lexer will always try to consume
  748. # # "a" first and will never match the regular expression
  749. # # This behavior is subject to change!!
  750. # # Thie won't happen with ambiguity handling.
  751. # g = _Lark("""start: (A | /a?ab/)+
  752. # A: "a" """)
  753. # self.assertRaises(LexError, g.parse, 'aab')
  754. def test_undefined_rule(self):
  755. self.assertRaises(GrammarError, _Lark, """start: a""")
  756. def test_undefined_token(self):
  757. self.assertRaises(GrammarError, _Lark, """start: A""")
  758. def test_rule_collision(self):
  759. g = _Lark("""start: "a"+ "b"
  760. | "a"+ """)
  761. x = g.parse('aaaa')
  762. x = g.parse('aaaab')
  763. def test_rule_collision2(self):
  764. g = _Lark("""start: "a"* "b"
  765. | "a"+ """)
  766. x = g.parse('aaaa')
  767. x = g.parse('aaaab')
  768. x = g.parse('b')
  769. def test_token_not_anon(self):
  770. """Tests that "a" is matched as an anonymous token, and not A.
  771. """
  772. g = _Lark("""start: "a"
  773. A: "a" """)
  774. x = g.parse('a')
  775. self.assertEqual(len(x.children), 0, '"a" should be considered anonymous')
  776. g = _Lark("""start: "a" A
  777. A: "a" """)
  778. x = g.parse('aa')
  779. self.assertEqual(len(x.children), 1, 'only "a" should be considered anonymous')
  780. self.assertEqual(x.children[0].type, "A")
  781. g = _Lark("""start: /a/
  782. A: /a/ """)
  783. x = g.parse('a')
  784. self.assertEqual(len(x.children), 1)
  785. self.assertEqual(x.children[0].type, "A", "A isn't associated with /a/")
  786. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  787. def test_maybe(self):
  788. g = _Lark("""start: ["a"] """)
  789. x = g.parse('a')
  790. x = g.parse('')
  791. def test_start(self):
  792. g = _Lark("""a: "a" a? """, start='a')
  793. x = g.parse('a')
  794. x = g.parse('aa')
  795. x = g.parse('aaa')
  796. def test_alias(self):
  797. g = _Lark("""start: "a" -> b """)
  798. x = g.parse('a')
  799. self.assertEqual(x.data, "b")
  800. def test_token_ebnf(self):
  801. g = _Lark("""start: A
  802. A: "a"* ("b"? "c".."e")+
  803. """)
  804. x = g.parse('abcde')
  805. x = g.parse('dd')
  806. def test_backslash(self):
  807. g = _Lark(r"""start: "\\" "a"
  808. """)
  809. x = g.parse(r'\a')
  810. g = _Lark(r"""start: /\\/ /a/
  811. """)
  812. x = g.parse(r'\a')
  813. def test_backslash2(self):
  814. g = _Lark(r"""start: "\"" "-"
  815. """)
  816. x = g.parse('"-')
  817. g = _Lark(r"""start: /\// /-/
  818. """)
  819. x = g.parse('/-')
  820. def test_special_chars(self):
  821. g = _Lark(r"""start: "\n"
  822. """)
  823. x = g.parse('\n')
  824. g = _Lark(r"""start: /\n/
  825. """)
  826. x = g.parse('\n')
  827. # def test_token_recurse(self):
  828. # g = _Lark("""start: A
  829. # A: B
  830. # B: A
  831. # """)
  832. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  833. def test_empty(self):
  834. # Fails an Earley implementation without special handling for empty rules,
  835. # or re-processing of already completed rules.
  836. g = _Lark(r"""start: _empty a "B"
  837. a: _empty "A"
  838. _empty:
  839. """)
  840. x = g.parse('AB')
  841. def test_regex_quote(self):
  842. g = r"""
  843. start: SINGLE_QUOTED_STRING | DOUBLE_QUOTED_STRING
  844. SINGLE_QUOTED_STRING : /'[^']*'/
  845. DOUBLE_QUOTED_STRING : /"[^"]*"/
  846. """
  847. g = _Lark(g)
  848. self.assertEqual( g.parse('"hello"').children, ['"hello"'])
  849. self.assertEqual( g.parse("'hello'").children, ["'hello'"])
  850. def test_lexer_token_limit(self):
  851. "Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"
  852. tokens = {'A%d'%i:'"%d"'%i for i in range(300)}
  853. g = _Lark("""start: %s
  854. %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))
  855. def test_float_without_lexer(self):
  856. expected_error = UnexpectedCharacters if LEXER.startswith('dynamic') else UnexpectedToken
  857. if PARSER == 'cyk':
  858. expected_error = ParseError
  859. g = _Lark("""start: ["+"|"-"] float
  860. float: digit* "." digit+ exp?
  861. | digit+ exp
  862. exp: ("e"|"E") ["+"|"-"] digit+
  863. digit: "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"
  864. """)
  865. g.parse("1.2")
  866. g.parse("-.2e9")
  867. g.parse("+2e-9")
  868. self.assertRaises( expected_error, g.parse, "+2e-9e")
  869. def test_keep_all_tokens(self):
  870. l = _Lark("""start: "a"+ """, keep_all_tokens=True)
  871. tree = l.parse('aaa')
  872. self.assertEqual(tree.children, ['a', 'a', 'a'])
  873. def test_token_flags(self):
  874. l = _Lark("""!start: "a"i+
  875. """
  876. )
  877. tree = l.parse('aA')
  878. self.assertEqual(tree.children, ['a', 'A'])
  879. l = _Lark("""!start: /a/i+
  880. """
  881. )
  882. tree = l.parse('aA')
  883. self.assertEqual(tree.children, ['a', 'A'])
  884. # g = """!start: "a"i "a"
  885. # """
  886. # self.assertRaises(GrammarError, _Lark, g)
  887. # g = """!start: /a/i /a/
  888. # """
  889. # self.assertRaises(GrammarError, _Lark, g)
  890. g = """start: NAME "," "a"
  891. NAME: /[a-z_]/i /[a-z0-9_]/i*
  892. """
  893. l = _Lark(g)
  894. tree = l.parse('ab,a')
  895. self.assertEqual(tree.children, ['ab'])
  896. tree = l.parse('AB,a')
  897. self.assertEqual(tree.children, ['AB'])
  898. def test_token_flags3(self):
  899. l = _Lark("""!start: ABC+
  900. ABC: "abc"i
  901. """
  902. )
  903. tree = l.parse('aBcAbC')
  904. self.assertEqual(tree.children, ['aBc', 'AbC'])
  905. def test_token_flags2(self):
  906. g = """!start: ("a"i | /a/ /b/?)+
  907. """
  908. l = _Lark(g)
  909. tree = l.parse('aA')
  910. self.assertEqual(tree.children, ['a', 'A'])
  911. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  912. def test_twice_empty(self):
  913. g = """!start: ("A"?)?
  914. """
  915. l = _Lark(g)
  916. tree = l.parse('A')
  917. self.assertEqual(tree.children, ['A'])
  918. tree = l.parse('')
  919. self.assertEqual(tree.children, [])
  920. def test_undefined_ignore(self):
  921. g = """!start: "A"
  922. %ignore B
  923. """
  924. self.assertRaises( GrammarError, _Lark, g)
  925. def test_alias_in_terminal(self):
  926. g = """start: TERM
  927. TERM: "a" -> alias
  928. """
  929. self.assertRaises( GrammarError, _Lark, g)
  930. def test_line_and_column(self):
  931. g = r"""!start: "A" bc "D"
  932. !bc: "B\nC"
  933. """
  934. l = _Lark(g)
  935. a, bc, d = l.parse("AB\nCD").children
  936. self.assertEqual(a.line, 1)
  937. self.assertEqual(a.column, 1)
  938. bc ,= bc.children
  939. self.assertEqual(bc.line, 1)
  940. self.assertEqual(bc.column, 2)
  941. self.assertEqual(d.line, 2)
  942. self.assertEqual(d.column, 2)
  943. if LEXER != 'dynamic':
  944. self.assertEqual(a.end_line, 1)
  945. self.assertEqual(a.end_column, 2)
  946. self.assertEqual(bc.end_line, 2)
  947. self.assertEqual(bc.end_column, 2)
  948. self.assertEqual(d.end_line, 2)
  949. self.assertEqual(d.end_column, 3)
  950. def test_reduce_cycle(self):
  951. """Tests an edge-condition in the LALR parser, in which a transition state looks exactly like the end state.
  952. It seems that the correct solution is to explicitely distinguish finalization in the reduce() function.
  953. """
  954. l = _Lark("""
  955. term: A
  956. | term term
  957. A: "a"
  958. """, start='term')
  959. tree = l.parse("aa")
  960. self.assertEqual(len(tree.children), 2)
  961. @unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority")
  962. def test_lexer_prioritization(self):
  963. "Tests effect of priority on result"
  964. grammar = """
  965. start: A B | AB
  966. A.2: "a"
  967. B: "b"
  968. AB: "ab"
  969. """
  970. l = _Lark(grammar)
  971. res = l.parse("ab")
  972. self.assertEqual(res.children, ['a', 'b'])
  973. self.assertNotEqual(res.children, ['ab'])
  974. grammar = """
  975. start: A B | AB
  976. A: "a"
  977. B: "b"
  978. AB.3: "ab"
  979. """
  980. l = _Lark(grammar)
  981. res = l.parse("ab")
  982. self.assertNotEqual(res.children, ['a', 'b'])
  983. self.assertEqual(res.children, ['ab'])
  984. grammar = """
  985. start: A B | AB
  986. A: "a"
  987. B.-20: "b"
  988. AB.-10: "ab"
  989. """
  990. l = _Lark(grammar)
  991. res = l.parse("ab")
  992. self.assertEqual(res.children, ['a', 'b'])
  993. grammar = """
  994. start: A B | AB
  995. A.-99999999999999999999999: "a"
  996. B: "b"
  997. AB: "ab"
  998. """
  999. l = _Lark(grammar)
  1000. res = l.parse("ab")
  1001. self.assertEqual(res.children, ['ab'])
  1002. def test_import(self):
  1003. grammar = """
  1004. start: NUMBER WORD
  1005. %import common.NUMBER
  1006. %import common.WORD
  1007. %import common.WS
  1008. %ignore WS
  1009. """
  1010. l = _Lark(grammar)
  1011. x = l.parse('12 elephants')
  1012. self.assertEqual(x.children, ['12', 'elephants'])
  1013. def test_import_rename(self):
  1014. grammar = """
  1015. start: N W
  1016. %import common.NUMBER -> N
  1017. %import common.WORD -> W
  1018. %import common.WS
  1019. %ignore WS
  1020. """
  1021. l = _Lark(grammar)
  1022. x = l.parse('12 elephants')
  1023. self.assertEqual(x.children, ['12', 'elephants'])
  1024. def test_relative_import(self):
  1025. l = _Lark_open('test_relative_import.lark', rel_to=__file__)
  1026. x = l.parse('12 lions')
  1027. self.assertEqual(x.children, ['12', 'lions'])
  1028. def test_relative_import_unicode(self):
  1029. l = _Lark_open('test_relative_import_unicode.lark', rel_to=__file__)
  1030. x = l.parse(u'Ø')
  1031. self.assertEqual(x.children, [u'Ø'])
  1032. def test_relative_import_rename(self):
  1033. l = _Lark_open('test_relative_import_rename.lark', rel_to=__file__)
  1034. x = l.parse('12 lions')
  1035. self.assertEqual(x.children, ['12', 'lions'])
  1036. def test_relative_rule_import(self):
  1037. l = _Lark_open('test_relative_rule_import.lark', rel_to=__file__)
  1038. x = l.parse('xaabby')
  1039. self.assertEqual(x.children, [
  1040. 'x',
  1041. Tree('expr', ['a', Tree('expr', ['a', 'b']), 'b']),
  1042. 'y'])
  1043. def test_relative_rule_import_drop_ignore(self):
  1044. # %ignore rules are dropped on import
  1045. l = _Lark_open('test_relative_rule_import_drop_ignore.lark',
  1046. rel_to=__file__)
  1047. self.assertRaises((ParseError, UnexpectedInput),
  1048. l.parse, 'xa abby')
  1049. def test_relative_rule_import_subrule(self):
  1050. l = _Lark_open('test_relative_rule_import_subrule.lark',
  1051. rel_to=__file__)
  1052. x = l.parse('xaabby')
  1053. self.assertEqual(x.children, [
  1054. 'x',
  1055. Tree('startab', [
  1056. Tree('grammars__ab__expr', [
  1057. 'a', Tree('grammars__ab__expr', ['a', 'b']), 'b',
  1058. ]),
  1059. ]),
  1060. 'y'])
  1061. def test_relative_rule_import_subrule_no_conflict(self):
  1062. l = _Lark_open(
  1063. 'test_relative_rule_import_subrule_no_conflict.lark',
  1064. rel_to=__file__)
  1065. x = l.parse('xaby')
  1066. self.assertEqual(x.children, [Tree('expr', [
  1067. 'x',
  1068. Tree('startab', [
  1069. Tree('grammars__ab__expr', ['a', 'b']),
  1070. ]),
  1071. 'y'])])
  1072. self.assertRaises((ParseError, UnexpectedInput),
  1073. l.parse, 'xaxabyby')
  1074. def test_relative_rule_import_rename(self):
  1075. l = _Lark_open('test_relative_rule_import_rename.lark',
  1076. rel_to=__file__)
  1077. x = l.parse('xaabby')
  1078. self.assertEqual(x.children, [
  1079. 'x',
  1080. Tree('ab', ['a', Tree('ab', ['a', 'b']), 'b']),
  1081. 'y'])
  1082. def test_multi_import(self):
  1083. grammar = """
  1084. start: NUMBER WORD
  1085. %import common (NUMBER, WORD, WS)
  1086. %ignore WS
  1087. """
  1088. l = _Lark(grammar)
  1089. x = l.parse('12 toucans')
  1090. self.assertEqual(x.children, ['12', 'toucans'])
  1091. def test_relative_multi_import(self):
  1092. l = _Lark_open("test_relative_multi_import.lark", rel_to=__file__)
  1093. x = l.parse('12 capybaras')
  1094. self.assertEqual(x.children, ['12', 'capybaras'])
  1095. def test_relative_import_preserves_leading_underscore(self):
  1096. l = _Lark_open("test_relative_import_preserves_leading_underscore.lark", rel_to=__file__)
  1097. x = l.parse('Ax')
  1098. self.assertEqual(next(x.find_data('c')).children, ['A'])
  1099. def test_relative_import_of_nested_grammar(self):
  1100. l = _Lark_open("grammars/test_relative_import_of_nested_grammar.lark", rel_to=__file__)
  1101. x = l.parse('N')
  1102. self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])
  1103. def test_relative_import_rules_dependencies_imported_only_once(self):
  1104. l = _Lark_open("test_relative_import_rules_dependencies_imported_only_once.lark", rel_to=__file__)
  1105. x = l.parse('AAA')
  1106. self.assertEqual(next(x.find_data('a')).children, ['A'])
  1107. self.assertEqual(next(x.find_data('b')).children, ['A'])
  1108. self.assertEqual(next(x.find_data('d')).children, ['A'])
  1109. def test_import_errors(self):
  1110. grammar = """
  1111. start: NUMBER WORD
  1112. %import .grammars.bad_test.NUMBER
  1113. """
  1114. self.assertRaises(IOError, _Lark, grammar)
  1115. grammar = """
  1116. start: NUMBER WORD
  1117. %import bad_test.NUMBER
  1118. """
  1119. self.assertRaises(IOError, _Lark, grammar)
  1120. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  1121. def test_earley_prioritization(self):
  1122. "Tests effect of priority on result"
  1123. grammar = """
  1124. start: a | b
  1125. a.1: "a"
  1126. b.2: "a"
  1127. """
  1128. # l = Lark(grammar, parser='earley', lexer='standard')
  1129. l = _Lark(grammar)
  1130. res = l.parse("a")
  1131. self.assertEqual(res.children[0].data, 'b')
  1132. grammar = """
  1133. start: a | b
  1134. a.2: "a"
  1135. b.1: "a"
  1136. """
  1137. l = _Lark(grammar)
  1138. # l = Lark(grammar, parser='earley', lexer='standard')
  1139. res = l.parse("a")
  1140. self.assertEqual(res.children[0].data, 'a')
  1141. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  1142. def test_earley_prioritization_sum(self):
  1143. "Tests effect of priority on result"
  1144. grammar = """
  1145. start: ab_ b_ a_ | indirection
  1146. indirection: a_ bb_ a_
  1147. a_: "a"
  1148. b_: "b"
  1149. ab_: "ab"
  1150. bb_.1: "bb"
  1151. """
  1152. l = Lark(grammar, priority="invert")
  1153. res = l.parse('abba')
  1154. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  1155. grammar = """
  1156. start: ab_ b_ a_ | indirection
  1157. indirection: a_ bb_ a_
  1158. a_: "a"
  1159. b_: "b"
  1160. ab_.1: "ab"
  1161. bb_: "bb"
  1162. """
  1163. l = Lark(grammar, priority="invert")
  1164. res = l.parse('abba')
  1165. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1166. grammar = """
  1167. start: ab_ b_ a_ | indirection
  1168. indirection: a_ bb_ a_
  1169. a_.2: "a"
  1170. b_.1: "b"
  1171. ab_.3: "ab"
  1172. bb_.3: "bb"
  1173. """
  1174. l = Lark(grammar, priority="invert")
  1175. res = l.parse('abba')
  1176. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  1177. grammar = """
  1178. start: ab_ b_ a_ | indirection
  1179. indirection: a_ bb_ a_
  1180. a_.1: "a"
  1181. b_.1: "b"
  1182. ab_.4: "ab"
  1183. bb_.3: "bb"
  1184. """
  1185. l = Lark(grammar, priority="invert")
  1186. res = l.parse('abba')
  1187. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1188. def test_utf8(self):
  1189. g = u"""start: a
  1190. a: "±a"
  1191. """
  1192. l = _Lark(g)
  1193. self.assertEqual(l.parse(u'±a'), Tree('start', [Tree('a', [])]))
  1194. g = u"""start: A
  1195. A: "±a"
  1196. """
  1197. l = _Lark(g)
  1198. self.assertEqual(l.parse(u'±a'), Tree('start', [u'\xb1a']))
  1199. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  1200. def test_ignore(self):
  1201. grammar = r"""
  1202. COMMENT: /(!|(\/\/))[^\n]*/
  1203. %ignore COMMENT
  1204. %import common.WS -> _WS
  1205. %import common.INT
  1206. start: "INT"i _WS+ INT _WS*
  1207. """
  1208. parser = _Lark(grammar)
  1209. tree = parser.parse("int 1 ! This is a comment\n")
  1210. self.assertEqual(tree.children, ['1'])
  1211. tree = parser.parse("int 1 ! This is a comment") # A trailing ignore token can be tricky!
  1212. self.assertEqual(tree.children, ['1'])
  1213. parser = _Lark(r"""
  1214. start : "a"*
  1215. %ignore "b"
  1216. """)
  1217. tree = parser.parse("bb")
  1218. self.assertEqual(tree.children, [])
  1219. def test_regex_escaping(self):
  1220. g = _Lark("start: /[ab]/")
  1221. g.parse('a')
  1222. g.parse('b')
  1223. self.assertRaises( UnexpectedInput, g.parse, 'c')
  1224. _Lark(r'start: /\w/').parse('a')
  1225. g = _Lark(r'start: /\\w/')
  1226. self.assertRaises( UnexpectedInput, g.parse, 'a')
  1227. g.parse(r'\w')
  1228. _Lark(r'start: /\[/').parse('[')
  1229. _Lark(r'start: /\//').parse('/')
  1230. _Lark(r'start: /\\/').parse('\\')
  1231. _Lark(r'start: /\[ab]/').parse('[ab]')
  1232. _Lark(r'start: /\\[ab]/').parse('\\a')
  1233. _Lark(r'start: /\t/').parse('\t')
  1234. _Lark(r'start: /\\t/').parse('\\t')
  1235. _Lark(r'start: /\\\t/').parse('\\\t')
  1236. _Lark(r'start: "\t"').parse('\t')
  1237. _Lark(r'start: "\\t"').parse('\\t')
  1238. _Lark(r'start: "\\\t"').parse('\\\t')
  1239. def test_ranged_repeat_rules(self):
  1240. g = u"""!start: "A"~3
  1241. """
  1242. l = _Lark(g)
  1243. self.assertEqual(l.parse(u'AAA'), Tree('start', ["A", "A", "A"]))
  1244. self.assertRaises(ParseError, l.parse, u'AA')
  1245. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1246. g = u"""!start: "A"~0..2
  1247. """
  1248. if PARSER != 'cyk': # XXX CYK currently doesn't support empty grammars
  1249. l = _Lark(g)
  1250. self.assertEqual(l.parse(u''), Tree('start', []))
  1251. self.assertEqual(l.parse(u'A'), Tree('start', ['A']))
  1252. self.assertEqual(l.parse(u'AA'), Tree('start', ['A', 'A']))
  1253. self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'AAA')
  1254. g = u"""!start: "A"~3..2
  1255. """
  1256. self.assertRaises(GrammarError, _Lark, g)
  1257. g = u"""!start: "A"~2..3 "B"~2
  1258. """
  1259. l = _Lark(g)
  1260. self.assertEqual(l.parse(u'AABB'), Tree('start', ['A', 'A', 'B', 'B']))
  1261. self.assertEqual(l.parse(u'AAABB'), Tree('start', ['A', 'A', 'A', 'B', 'B']))
  1262. self.assertRaises(ParseError, l.parse, u'AAAB')
  1263. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1264. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1265. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1266. def test_ranged_repeat_terms(self):
  1267. g = u"""!start: AAA
  1268. AAA: "A"~3
  1269. """
  1270. l = _Lark(g)
  1271. self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"]))
  1272. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA')
  1273. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1274. g = u"""!start: AABB CC
  1275. AABB: "A"~0..2 "B"~2
  1276. CC: "C"~1..2
  1277. """
  1278. l = _Lark(g)
  1279. self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC']))
  1280. self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C']))
  1281. self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC']))
  1282. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB')
  1283. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1284. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1285. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1286. @unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX
  1287. def test_priority_vs_embedded(self):
  1288. g = """
  1289. A.2: "a"
  1290. WORD: ("a".."z")+
  1291. start: (A | WORD)+
  1292. """
  1293. l = _Lark(g)
  1294. t = l.parse('abc')
  1295. self.assertEqual(t.children, ['a', 'bc'])
  1296. self.assertEqual(t.children[0].type, 'A')
  1297. def test_line_counting(self):
  1298. p = _Lark("start: /[^x]+/")
  1299. text = 'hello\nworld'
  1300. t = p.parse(text)
  1301. tok = t.children[0]
  1302. self.assertEqual(tok, text)
  1303. self.assertEqual(tok.line, 1)
  1304. self.assertEqual(tok.column, 1)
  1305. if _LEXER != 'dynamic':
  1306. self.assertEqual(tok.end_line, 2)
  1307. self.assertEqual(tok.end_column, 6)
  1308. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1309. def test_empty_end(self):
  1310. p = _Lark("""
  1311. start: b c d
  1312. b: "B"
  1313. c: | "C"
  1314. d: | "D"
  1315. """)
  1316. res = p.parse('B')
  1317. self.assertEqual(len(res.children), 3)
  1318. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1319. def test_maybe_placeholders(self):
  1320. # Anonymous tokens shouldn't count
  1321. p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1322. self.assertEqual(p.parse("").children, [])
  1323. # All invisible constructs shouldn't count
  1324. p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c]
  1325. A: "a"
  1326. _c: "c" """, maybe_placeholders=True)
  1327. self.assertEqual(p.parse("").children, [None])
  1328. self.assertEqual(p.parse("c").children, [None])
  1329. self.assertEqual(p.parse("aefc").children, ['a'])
  1330. # ? shouldn't apply
  1331. p = _Lark("""!start: ["a"] "b"? ["c"] """, maybe_placeholders=True)
  1332. self.assertEqual(p.parse("").children, [None, None])
  1333. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1334. p = _Lark("""!start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1335. self.assertEqual(p.parse("").children, [None, None, None])
  1336. self.assertEqual(p.parse("a").children, ['a', None, None])
  1337. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1338. self.assertEqual(p.parse("c").children, [None, None, 'c'])
  1339. self.assertEqual(p.parse("ab").children, ['a', 'b', None])
  1340. self.assertEqual(p.parse("ac").children, ['a', None, 'c'])
  1341. self.assertEqual(p.parse("bc").children, [None, 'b', 'c'])
  1342. self.assertEqual(p.parse("abc").children, ['a', 'b', 'c'])
  1343. p = _Lark("""!start: (["a"] "b" ["c"])+ """, maybe_placeholders=True)
  1344. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1345. self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None])
  1346. self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c'])
  1347. self.assertEqual(p.parse("babbcabcb").children,
  1348. [None, 'b', None,
  1349. 'a', 'b', None,
  1350. None, 'b', 'c',
  1351. 'a', 'b', 'c',
  1352. None, 'b', None])
  1353. p = _Lark("""!start: ["a"] ["c"] "b"+ ["a"] ["d"] """, maybe_placeholders=True)
  1354. self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None])
  1355. self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd'])
  1356. self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None])
  1357. self.assertEqual(p.parse("cbbbb").children, [None, 'c', 'b', 'b', 'b', 'b', None, None])
  1358. def test_escaped_string(self):
  1359. "Tests common.ESCAPED_STRING"
  1360. grammar = r"""
  1361. start: ESCAPED_STRING+
  1362. %import common (WS_INLINE, ESCAPED_STRING)
  1363. %ignore WS_INLINE
  1364. """
  1365. parser = _Lark(grammar)
  1366. parser.parse(r'"\\" "b" "c"')
  1367. parser.parse(r'"That" "And a \"b"')
  1368. def test_meddling_unused(self):
  1369. "Unless 'unused' is removed, LALR analysis will fail on reduce-reduce collision"
  1370. grammar = """
  1371. start: EKS* x
  1372. x: EKS
  1373. unused: x*
  1374. EKS: "x"
  1375. """
  1376. parser = _Lark(grammar)
  1377. @unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
  1378. def test_serialize(self):
  1379. grammar = """
  1380. start: _ANY b "C"
  1381. _ANY: /./
  1382. b: "B"
  1383. """
  1384. parser = _Lark(grammar)
  1385. s = BytesIO()
  1386. parser.save(s)
  1387. s.seek(0)
  1388. parser2 = Lark.load(s)
  1389. self.assertEqual(parser2.parse('ABC'), Tree('start', [Tree('b', [])]) )
  1390. def test_multi_start(self):
  1391. parser = _Lark('''
  1392. a: "x" "a"?
  1393. b: "x" "b"?
  1394. ''', start=['a', 'b'])
  1395. self.assertEqual(parser.parse('xa', 'a'), Tree('a', []))
  1396. self.assertEqual(parser.parse('xb', 'b'), Tree('b', []))
  1397. def test_lexer_detect_newline_tokens(self):
  1398. # Detect newlines in regular tokens
  1399. g = _Lark(r"""start: "go" tail*
  1400. !tail : SA "@" | SB "@" | SC "@" | SD "@"
  1401. SA : "a" /\n/
  1402. SB : /b./s
  1403. SC : "c" /[^a-z]/
  1404. SD : "d" /\s/
  1405. """)
  1406. a,b,c,d = [x.children[1] for x in g.parse('goa\n@b\n@c\n@d\n@').children]
  1407. self.assertEqual(a.line, 2)
  1408. self.assertEqual(b.line, 3)
  1409. self.assertEqual(c.line, 4)
  1410. self.assertEqual(d.line, 5)
  1411. # Detect newlines in ignored tokens
  1412. for re in ['/\\n/', '/[^a-z]/', '/\\s/']:
  1413. g = _Lark('''!start: "a" "a"
  1414. %ignore {}'''.format(re))
  1415. a, b = g.parse('a\na').children
  1416. self.assertEqual(a.line, 1)
  1417. self.assertEqual(b.line, 2)
  1418. _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()
  1419. _TestParser.__name__ = _NAME
  1420. _TestParser.__qualname__ = "tests.test_parser." + _NAME
  1421. globals()[_NAME] = _TestParser
  1422. # Note: You still have to import them in __main__ for the tests to run
  1423. _TO_TEST = [
  1424. ('standard', 'earley'),
  1425. ('standard', 'cyk'),
  1426. ('dynamic', 'earley'),
  1427. ('dynamic_complete', 'earley'),
  1428. ('standard', 'lalr'),
  1429. ('contextual', 'lalr'),
  1430. ('custom', 'lalr'),
  1431. # (None, 'earley'),
  1432. ]
  1433. for _LEXER, _PARSER in _TO_TEST:
  1434. _make_parser_test(_LEXER, _PARSER)
  1435. for _LEXER in ('dynamic', 'dynamic_complete'):
  1436. _make_full_earley_test(_LEXER)
  1437. if __name__ == '__main__':
  1438. unittest.main()