This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1726 lines
58 KiB

  1. # -*- coding: utf-8 -*-
  2. from __future__ import absolute_import
  3. import unittest
  4. import logging
  5. import os
  6. import sys
  7. from copy import deepcopy
  8. try:
  9. from cStringIO import StringIO as cStringIO
  10. except ImportError:
  11. # Available only in Python 2.x, 3.x only has io.StringIO from below
  12. cStringIO = None
  13. from io import (
  14. StringIO as uStringIO,
  15. open,
  16. )
  17. logging.basicConfig(level=logging.INFO)
  18. from lark.lark import Lark
  19. from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
  20. from lark.tree import Tree
  21. from lark.visitors import Transformer, Transformer_InPlace, v_args
  22. from lark.grammar import Rule
  23. from lark.lexer import TerminalDef, Lexer, TraditionalLexer
  24. __path__ = os.path.dirname(__file__)
  25. def _read(n, *args):
  26. with open(os.path.join(__path__, n), *args) as f:
  27. return f.read()
  28. class TestParsers(unittest.TestCase):
  29. def test_same_ast(self):
  30. "Tests that Earley and LALR parsers produce equal trees"
  31. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  32. name_list: NAME | name_list "," NAME
  33. NAME: /\w+/ """, parser='lalr')
  34. l = g.parse('(a,b,c,*x)')
  35. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  36. name_list: NAME | name_list "," NAME
  37. NAME: /\w/+ """)
  38. l2 = g.parse('(a,b,c,*x)')
  39. assert l == l2, '%s != %s' % (l.pretty(), l2.pretty())
  40. def test_infinite_recurse(self):
  41. g = """start: a
  42. a: a | "a"
  43. """
  44. self.assertRaises(GrammarError, Lark, g, parser='lalr')
  45. # TODO: should it? shouldn't it?
  46. # l = Lark(g, parser='earley', lexer='dynamic')
  47. # self.assertRaises(ParseError, l.parse, 'a')
  48. def test_propagate_positions(self):
  49. g = Lark("""start: a
  50. a: "a"
  51. """, propagate_positions=True)
  52. r = g.parse('a')
  53. self.assertEqual( r.children[0].meta.line, 1 )
  54. g = Lark("""start: x
  55. x: a
  56. a: "a"
  57. """, propagate_positions=True)
  58. r = g.parse('a')
  59. self.assertEqual( r.children[0].meta.line, 1 )
  60. def test_expand1(self):
  61. g = Lark("""start: a
  62. ?a: b
  63. b: "x"
  64. """)
  65. r = g.parse('x')
  66. self.assertEqual( r.children[0].data, "b" )
  67. g = Lark("""start: a
  68. ?a: b -> c
  69. b: "x"
  70. """)
  71. r = g.parse('x')
  72. self.assertEqual( r.children[0].data, "c" )
  73. g = Lark("""start: a
  74. ?a: B -> c
  75. B: "x"
  76. """)
  77. self.assertEqual( r.children[0].data, "c" )
  78. g = Lark("""start: a
  79. ?a: b b -> c
  80. b: "x"
  81. """)
  82. r = g.parse('xx')
  83. self.assertEqual( r.children[0].data, "c" )
  84. def test_comment_in_rule_definition(self):
  85. g = Lark("""start: a
  86. a: "a"
  87. // A comment
  88. // Another comment
  89. | "b"
  90. // Still more
  91. c: "unrelated"
  92. """)
  93. r = g.parse('b')
  94. self.assertEqual( r.children[0].data, "a" )
  95. def test_visit_tokens(self):
  96. class T(Transformer):
  97. def a(self, children):
  98. return children[0] + "!"
  99. def A(self, tok):
  100. return tok.update(value=tok.upper())
  101. # Test regular
  102. g = """start: a
  103. a : A
  104. A: "x"
  105. """
  106. p = Lark(g, parser='lalr')
  107. r = T(False).transform(p.parse("x"))
  108. self.assertEqual( r.children, ["x!"] )
  109. r = T().transform(p.parse("x"))
  110. self.assertEqual( r.children, ["X!"] )
  111. # Test internal transformer
  112. p = Lark(g, parser='lalr', transformer=T())
  113. r = p.parse("x")
  114. self.assertEqual( r.children, ["X!"] )
  115. def test_vargs_meta(self):
  116. @v_args(meta=True)
  117. class T1(Transformer):
  118. def a(self, children, meta):
  119. assert not children
  120. return meta.line
  121. def start(self, children, meta):
  122. return children
  123. @v_args(meta=True, inline=True)
  124. class T2(Transformer):
  125. def a(self, meta):
  126. return meta.line
  127. def start(self, meta, *res):
  128. return list(res)
  129. for T in (T1, T2):
  130. for internal in [False, True]:
  131. try:
  132. g = Lark(r"""start: a+
  133. a : "x" _NL?
  134. _NL: /\n/+
  135. """, parser='lalr', transformer=T() if internal else None, propagate_positions=True)
  136. except NotImplementedError:
  137. assert internal
  138. continue
  139. res = g.parse("xx\nx\nxxx\n\n\nxx")
  140. assert not internal
  141. res = T().transform(res)
  142. self.assertEqual(res, [1, 1, 2, 3, 3, 3, 6, 6])
  143. def test_vargs_tree(self):
  144. tree = Lark('''
  145. start: a a a
  146. !a: "A"
  147. ''').parse('AAA')
  148. tree_copy = deepcopy(tree)
  149. @v_args(tree=True)
  150. class T(Transformer):
  151. def a(self, tree):
  152. return 1
  153. def start(self, tree):
  154. return tree.children
  155. res = T().transform(tree)
  156. self.assertEqual(res, [1, 1, 1])
  157. self.assertEqual(tree, tree_copy)
  158. def test_embedded_transformer(self):
  159. class T(Transformer):
  160. def a(self, children):
  161. return "<a>"
  162. def b(self, children):
  163. return "<b>"
  164. def c(self, children):
  165. return "<c>"
  166. # Test regular
  167. g = Lark("""start: a
  168. a : "x"
  169. """, parser='lalr')
  170. r = T().transform(g.parse("x"))
  171. self.assertEqual( r.children, ["<a>"] )
  172. g = Lark("""start: a
  173. a : "x"
  174. """, parser='lalr', transformer=T())
  175. r = g.parse("x")
  176. self.assertEqual( r.children, ["<a>"] )
  177. # Test Expand1
  178. g = Lark("""start: a
  179. ?a : b
  180. b : "x"
  181. """, parser='lalr')
  182. r = T().transform(g.parse("x"))
  183. self.assertEqual( r.children, ["<b>"] )
  184. g = Lark("""start: a
  185. ?a : b
  186. b : "x"
  187. """, parser='lalr', transformer=T())
  188. r = g.parse("x")
  189. self.assertEqual( r.children, ["<b>"] )
  190. # Test Expand1 -> Alias
  191. g = Lark("""start: a
  192. ?a : b b -> c
  193. b : "x"
  194. """, parser='lalr')
  195. r = T().transform(g.parse("xx"))
  196. self.assertEqual( r.children, ["<c>"] )
  197. g = Lark("""start: a
  198. ?a : b b -> c
  199. b : "x"
  200. """, parser='lalr', transformer=T())
  201. r = g.parse("xx")
  202. self.assertEqual( r.children, ["<c>"] )
  203. def test_embedded_transformer_inplace(self):
  204. @v_args(tree=True)
  205. class T1(Transformer_InPlace):
  206. def a(self, tree):
  207. assert isinstance(tree, Tree), tree
  208. tree.children.append("tested")
  209. return tree
  210. def b(self, tree):
  211. return Tree(tree.data, tree.children + ['tested2'])
  212. @v_args(tree=True)
  213. class T2(Transformer):
  214. def a(self, tree):
  215. assert isinstance(tree, Tree), tree
  216. tree.children.append("tested")
  217. return tree
  218. def b(self, tree):
  219. return Tree(tree.data, tree.children + ['tested2'])
  220. class T3(Transformer):
  221. @v_args(tree=True)
  222. def a(self, tree):
  223. assert isinstance(tree, Tree)
  224. tree.children.append("tested")
  225. return tree
  226. @v_args(tree=True)
  227. def b(self, tree):
  228. return Tree(tree.data, tree.children + ['tested2'])
  229. for t in [T1(), T2(), T3()]:
  230. for internal in [False, True]:
  231. g = Lark("""start: a b
  232. a : "x"
  233. b : "y"
  234. """, parser='lalr', transformer=t if internal else None)
  235. r = g.parse("xy")
  236. if not internal:
  237. r = t.transform(r)
  238. a, b = r.children
  239. self.assertEqual(a.children, ["tested"])
  240. self.assertEqual(b.children, ["tested2"])
  241. def test_alias(self):
  242. Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """)
  243. def _make_full_earley_test(LEXER):
  244. def _Lark(grammar, **kwargs):
  245. return Lark(grammar, lexer=LEXER, parser='earley', propagate_positions=True, **kwargs)
  246. class _TestFullEarley(unittest.TestCase):
  247. def test_anon(self):
  248. # Fails an Earley implementation without special handling for empty rules,
  249. # or re-processing of already completed rules.
  250. g = Lark(r"""start: B
  251. B: ("ab"|/[^b]/)+
  252. """, lexer=LEXER)
  253. self.assertEqual( g.parse('abc').children[0], 'abc')
  254. def test_earley(self):
  255. g = Lark("""start: A "b" c
  256. A: "a"+
  257. c: "abc"
  258. """, parser="earley", lexer=LEXER)
  259. x = g.parse('aaaababc')
  260. def test_earley2(self):
  261. grammar = """
  262. start: statement+
  263. statement: "r"
  264. | "c" /[a-z]/+
  265. %ignore " "
  266. """
  267. program = """c b r"""
  268. l = Lark(grammar, parser='earley', lexer=LEXER)
  269. l.parse(program)
  270. @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser")
  271. def test_earley3(self):
  272. """Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)
  273. By default, `+` should immitate regexp greedy-matching
  274. """
  275. grammar = """
  276. start: A A
  277. A: "a"+
  278. """
  279. l = Lark(grammar, parser='earley', lexer=LEXER)
  280. res = l.parse("aaa")
  281. self.assertEqual(set(res.children), {'aa', 'a'})
  282. # XXX TODO fix Earley to maintain correct order
  283. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  284. # self.assertEqual(res.children, ['aa', 'a'])
  285. def test_earley4(self):
  286. grammar = """
  287. start: A A?
  288. A: "a"+
  289. """
  290. l = Lark(grammar, parser='earley', lexer=LEXER)
  291. res = l.parse("aaa")
  292. assert set(res.children) == {'aa', 'a'} or res.children == ['aaa']
  293. # XXX TODO fix Earley to maintain correct order
  294. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  295. # self.assertEqual(res.children, ['aaa'])
  296. def test_earley_repeating_empty(self):
  297. # This was a sneaky bug!
  298. grammar = """
  299. !start: "a" empty empty "b"
  300. empty: empty2
  301. empty2:
  302. """
  303. parser = Lark(grammar, parser='earley', lexer=LEXER)
  304. res = parser.parse('ab')
  305. empty_tree = Tree('empty', [Tree('empty2', [])])
  306. self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b'])
  307. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  308. def test_earley_explicit_ambiguity(self):
  309. # This was a sneaky bug!
  310. grammar = """
  311. start: a b | ab
  312. a: "a"
  313. b: "b"
  314. ab: "ab"
  315. """
  316. parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit')
  317. ambig_tree = parser.parse('ab')
  318. self.assertEqual( ambig_tree.data, '_ambig')
  319. self.assertEqual( len(ambig_tree.children), 2)
  320. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  321. def test_ambiguity1(self):
  322. grammar = """
  323. start: cd+ "e"
  324. !cd: "c"
  325. | "d"
  326. | "cd"
  327. """
  328. l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
  329. ambig_tree = l.parse('cde')
  330. assert ambig_tree.data == '_ambig', ambig_tree
  331. assert len(ambig_tree.children) == 2
  332. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  333. def test_ambiguity2(self):
  334. grammar = """
  335. ANY: /[a-zA-Z0-9 ]+/
  336. a.2: "A" b+
  337. b.2: "B"
  338. c: ANY
  339. start: (a|c)*
  340. """
  341. l = Lark(grammar, parser='earley', lexer=LEXER)
  342. res = l.parse('ABX')
  343. expected = Tree('start', [
  344. Tree('a', [
  345. Tree('b', [])
  346. ]),
  347. Tree('c', [
  348. 'X'
  349. ])
  350. ])
  351. self.assertEqual(res, expected)
  352. def test_fruitflies_ambig(self):
  353. grammar = """
  354. start: noun verb noun -> simple
  355. | noun verb "like" noun -> comparative
  356. noun: adj? NOUN
  357. verb: VERB
  358. adj: ADJ
  359. NOUN: "flies" | "bananas" | "fruit"
  360. VERB: "like" | "flies"
  361. ADJ: "fruit"
  362. %import common.WS
  363. %ignore WS
  364. """
  365. parser = Lark(grammar, ambiguity='explicit', lexer=LEXER)
  366. tree = parser.parse('fruit flies like bananas')
  367. expected = Tree('_ambig', [
  368. Tree('comparative', [
  369. Tree('noun', ['fruit']),
  370. Tree('verb', ['flies']),
  371. Tree('noun', ['bananas'])
  372. ]),
  373. Tree('simple', [
  374. Tree('noun', [Tree('adj', ['fruit']), 'flies']),
  375. Tree('verb', ['like']),
  376. Tree('noun', ['bananas'])
  377. ])
  378. ])
  379. # self.assertEqual(tree, expected)
  380. self.assertEqual(tree.data, expected.data)
  381. self.assertEqual(set(tree.children), set(expected.children))
  382. @unittest.skipIf(LEXER!='dynamic_complete', "Only relevant for the dynamic_complete parser")
  383. def test_explicit_ambiguity2(self):
  384. grammar = r"""
  385. start: NAME+
  386. NAME: /\w+/
  387. %ignore " "
  388. """
  389. text = """cat"""
  390. parser = _Lark(grammar, start='start', ambiguity='explicit')
  391. tree = parser.parse(text)
  392. self.assertEqual(tree.data, '_ambig')
  393. combinations = {tuple(str(s) for s in t.children) for t in tree.children}
  394. self.assertEqual(combinations, {
  395. ('cat',),
  396. ('ca', 't'),
  397. ('c', 'at'),
  398. ('c', 'a' ,'t')
  399. })
  400. def test_term_ambig_resolve(self):
  401. grammar = r"""
  402. !start: NAME+
  403. NAME: /\w+/
  404. %ignore " "
  405. """
  406. text = """foo bar"""
  407. parser = Lark(grammar)
  408. tree = parser.parse(text)
  409. self.assertEqual(tree.children, ['foo', 'bar'])
  410. # @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO
  411. # def test_not_all_derivations(self):
  412. # grammar = """
  413. # start: cd+ "e"
  414. # !cd: "c"
  415. # | "d"
  416. # | "cd"
  417. # """
  418. # l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False)
  419. # x = l.parse('cde')
  420. # assert x.data != '_ambig', x
  421. # assert len(x.children) == 1
  422. _NAME = "TestFullEarley" + LEXER.capitalize()
  423. _TestFullEarley.__name__ = _NAME
  424. globals()[_NAME] = _TestFullEarley
  425. class CustomLexer(Lexer):
  426. """
  427. Purpose of this custom lexer is to test the integration,
  428. so it uses the traditionalparser as implementation without custom lexing behaviour.
  429. """
  430. def __init__(self, lexer_conf):
  431. self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks)
  432. def lex(self, *args, **kwargs):
  433. return self.lexer.lex(*args, **kwargs)
  434. def _make_parser_test(LEXER, PARSER):
  435. lexer_class_or_name = CustomLexer if LEXER == 'custom' else LEXER
  436. def _Lark(grammar, **kwargs):
  437. return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  438. def _Lark_open(gfilename, **kwargs):
  439. return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  440. class _TestParser(unittest.TestCase):
  441. def test_basic1(self):
  442. g = _Lark("""start: a+ b a* "b" a*
  443. b: "b"
  444. a: "a"
  445. """)
  446. r = g.parse('aaabaab')
  447. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' )
  448. r = g.parse('aaabaaba')
  449. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaaa' )
  450. self.assertRaises(ParseError, g.parse, 'aaabaa')
  451. def test_basic2(self):
  452. # Multiple parsers and colliding tokens
  453. g = _Lark("""start: B A
  454. B: "12"
  455. A: "1" """)
  456. g2 = _Lark("""start: B A
  457. B: "12"
  458. A: "2" """)
  459. x = g.parse('121')
  460. assert x.data == 'start' and x.children == ['12', '1'], x
  461. x = g2.parse('122')
  462. assert x.data == 'start' and x.children == ['12', '2'], x
  463. @unittest.skipIf(cStringIO is None, "cStringIO not available")
  464. def test_stringio_bytes(self):
  465. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  466. _Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  467. def test_stringio_unicode(self):
  468. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  469. _Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  470. def test_unicode(self):
  471. g = _Lark(u"""start: UNIA UNIB UNIA
  472. UNIA: /\xa3/
  473. UNIB: /\u0101/
  474. """)
  475. g.parse(u'\xa3\u0101\u00a3')
  476. def test_unicode2(self):
  477. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  478. UNIA: /\xa3/
  479. UNIB: "a\u0101b\ "
  480. UNIC: /a?\u0101c\n/
  481. """)
  482. g.parse(u'\xa3a\u0101b\\ \u00a3\u0101c\n')
  483. def test_unicode3(self):
  484. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  485. UNIA: /\xa3/
  486. UNIB: "\u0101"
  487. UNIC: /\u0203/ /\n/
  488. """)
  489. g.parse(u'\xa3\u0101\u00a3\u0203\n')
  490. def test_hex_escape(self):
  491. g = _Lark(r"""start: A B C
  492. A: "\x01"
  493. B: /\x02/
  494. C: "\xABCD"
  495. """)
  496. g.parse('\x01\x02\xABCD')
  497. def test_unicode_literal_range_escape(self):
  498. g = _Lark(r"""start: A+
  499. A: "\u0061".."\u0063"
  500. """)
  501. g.parse('abc')
  502. def test_hex_literal_range_escape(self):
  503. g = _Lark(r"""start: A+
  504. A: "\x01".."\x03"
  505. """)
  506. g.parse('\x01\x02\x03')
  507. @unittest.skipIf(PARSER == 'cyk', "Takes forever")
  508. def test_stack_for_ebnf(self):
  509. """Verify that stack depth isn't an issue for EBNF grammars"""
  510. g = _Lark(r"""start: a+
  511. a : "a" """)
  512. g.parse("a" * (sys.getrecursionlimit()*2 ))
  513. def test_expand1_lists_with_one_item(self):
  514. g = _Lark(r"""start: list
  515. ?list: item+
  516. item : A
  517. A: "a"
  518. """)
  519. r = g.parse("a")
  520. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  521. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  522. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  523. self.assertEqual(len(r.children), 1)
  524. def test_expand1_lists_with_one_item_2(self):
  525. g = _Lark(r"""start: list
  526. ?list: item+ "!"
  527. item : A
  528. A: "a"
  529. """)
  530. r = g.parse("a!")
  531. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  532. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  533. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  534. self.assertEqual(len(r.children), 1)
  535. def test_dont_expand1_lists_with_multiple_items(self):
  536. g = _Lark(r"""start: list
  537. ?list: item+
  538. item : A
  539. A: "a"
  540. """)
  541. r = g.parse("aa")
  542. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  543. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  544. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  545. self.assertEqual(len(r.children), 1)
  546. # Sanity check: verify that 'list' contains the two 'item's we've given it
  547. [list] = r.children
  548. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  549. def test_dont_expand1_lists_with_multiple_items_2(self):
  550. g = _Lark(r"""start: list
  551. ?list: item+ "!"
  552. item : A
  553. A: "a"
  554. """)
  555. r = g.parse("aa!")
  556. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  557. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  558. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  559. self.assertEqual(len(r.children), 1)
  560. # Sanity check: verify that 'list' contains the two 'item's we've given it
  561. [list] = r.children
  562. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  563. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  564. def test_empty_expand1_list(self):
  565. g = _Lark(r"""start: list
  566. ?list: item*
  567. item : A
  568. A: "a"
  569. """)
  570. r = g.parse("")
  571. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  572. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  573. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  574. self.assertEqual(len(r.children), 1)
  575. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  576. [list] = r.children
  577. self.assertSequenceEqual([item.data for item in list.children], ())
  578. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  579. def test_empty_expand1_list_2(self):
  580. g = _Lark(r"""start: list
  581. ?list: item* "!"?
  582. item : A
  583. A: "a"
  584. """)
  585. r = g.parse("")
  586. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  587. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  588. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  589. self.assertEqual(len(r.children), 1)
  590. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  591. [list] = r.children
  592. self.assertSequenceEqual([item.data for item in list.children], ())
  593. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  594. def test_empty_flatten_list(self):
  595. g = _Lark(r"""start: list
  596. list: | item "," list
  597. item : A
  598. A: "a"
  599. """)
  600. r = g.parse("")
  601. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  602. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  603. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  604. [list] = r.children
  605. self.assertSequenceEqual([item.data for item in list.children], ())
  606. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  607. def test_single_item_flatten_list(self):
  608. g = _Lark(r"""start: list
  609. list: | item "," list
  610. item : A
  611. A: "a"
  612. """)
  613. r = g.parse("a,")
  614. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  615. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  616. # Sanity check: verify that 'list' contains exactly the one 'item' we've given it
  617. [list] = r.children
  618. self.assertSequenceEqual([item.data for item in list.children], ('item',))
  619. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  620. def test_multiple_item_flatten_list(self):
  621. g = _Lark(r"""start: list
  622. #list: | item "," list
  623. item : A
  624. A: "a"
  625. """)
  626. r = g.parse("a,a,")
  627. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  628. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  629. # Sanity check: verify that 'list' contains exactly the two 'item's we've given it
  630. [list] = r.children
  631. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  632. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  633. def test_recurse_flatten(self):
  634. """Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""
  635. g = _Lark(r"""start: a | start a
  636. a : A
  637. A : "a" """)
  638. # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
  639. # STree data structures, which uses recursion).
  640. g.parse("a" * (sys.getrecursionlimit() // 4))
  641. def test_token_collision(self):
  642. g = _Lark(r"""start: "Hello" NAME
  643. NAME: /\w/+
  644. %ignore " "
  645. """)
  646. x = g.parse('Hello World')
  647. self.assertSequenceEqual(x.children, ['World'])
  648. x = g.parse('Hello HelloWorld')
  649. self.assertSequenceEqual(x.children, ['HelloWorld'])
  650. def test_token_collision_WS(self):
  651. g = _Lark(r"""start: "Hello" NAME
  652. NAME: /\w/+
  653. %import common.WS
  654. %ignore WS
  655. """)
  656. x = g.parse('Hello World')
  657. self.assertSequenceEqual(x.children, ['World'])
  658. x = g.parse('Hello HelloWorld')
  659. self.assertSequenceEqual(x.children, ['HelloWorld'])
  660. def test_token_collision2(self):
  661. g = _Lark("""
  662. !start: "starts"
  663. %import common.LCASE_LETTER
  664. """)
  665. x = g.parse("starts")
  666. self.assertSequenceEqual(x.children, ['starts'])
  667. # def test_string_priority(self):
  668. # g = _Lark("""start: (A | /a?bb/)+
  669. # A: "a" """)
  670. # x = g.parse('abb')
  671. # self.assertEqual(len(x.children), 2)
  672. # # This parse raises an exception because the lexer will always try to consume
  673. # # "a" first and will never match the regular expression
  674. # # This behavior is subject to change!!
  675. # # Thie won't happen with ambiguity handling.
  676. # g = _Lark("""start: (A | /a?ab/)+
  677. # A: "a" """)
  678. # self.assertRaises(LexError, g.parse, 'aab')
  679. def test_undefined_rule(self):
  680. self.assertRaises(GrammarError, _Lark, """start: a""")
  681. def test_undefined_token(self):
  682. self.assertRaises(GrammarError, _Lark, """start: A""")
  683. def test_rule_collision(self):
  684. g = _Lark("""start: "a"+ "b"
  685. | "a"+ """)
  686. x = g.parse('aaaa')
  687. x = g.parse('aaaab')
  688. def test_rule_collision2(self):
  689. g = _Lark("""start: "a"* "b"
  690. | "a"+ """)
  691. x = g.parse('aaaa')
  692. x = g.parse('aaaab')
  693. x = g.parse('b')
  694. def test_token_not_anon(self):
  695. """Tests that "a" is matched as an anonymous token, and not A.
  696. """
  697. g = _Lark("""start: "a"
  698. A: "a" """)
  699. x = g.parse('a')
  700. self.assertEqual(len(x.children), 0, '"a" should be considered anonymous')
  701. g = _Lark("""start: "a" A
  702. A: "a" """)
  703. x = g.parse('aa')
  704. self.assertEqual(len(x.children), 1, 'only "a" should be considered anonymous')
  705. self.assertEqual(x.children[0].type, "A")
  706. g = _Lark("""start: /a/
  707. A: /a/ """)
  708. x = g.parse('a')
  709. self.assertEqual(len(x.children), 1)
  710. self.assertEqual(x.children[0].type, "A", "A isn't associated with /a/")
  711. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  712. def test_maybe(self):
  713. g = _Lark("""start: ["a"] """)
  714. x = g.parse('a')
  715. x = g.parse('')
  716. def test_start(self):
  717. g = _Lark("""a: "a" a? """, start='a')
  718. x = g.parse('a')
  719. x = g.parse('aa')
  720. x = g.parse('aaa')
  721. def test_alias(self):
  722. g = _Lark("""start: "a" -> b """)
  723. x = g.parse('a')
  724. self.assertEqual(x.data, "b")
  725. def test_token_ebnf(self):
  726. g = _Lark("""start: A
  727. A: "a"* ("b"? "c".."e")+
  728. """)
  729. x = g.parse('abcde')
  730. x = g.parse('dd')
  731. def test_backslash(self):
  732. g = _Lark(r"""start: "\\" "a"
  733. """)
  734. x = g.parse(r'\a')
  735. g = _Lark(r"""start: /\\/ /a/
  736. """)
  737. x = g.parse(r'\a')
  738. def test_backslash2(self):
  739. g = _Lark(r"""start: "\"" "-"
  740. """)
  741. x = g.parse('"-')
  742. g = _Lark(r"""start: /\// /-/
  743. """)
  744. x = g.parse('/-')
  745. def test_special_chars(self):
  746. g = _Lark(r"""start: "\n"
  747. """)
  748. x = g.parse('\n')
  749. g = _Lark(r"""start: /\n/
  750. """)
  751. x = g.parse('\n')
  752. # def test_token_recurse(self):
  753. # g = _Lark("""start: A
  754. # A: B
  755. # B: A
  756. # """)
  757. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  758. def test_empty(self):
  759. # Fails an Earley implementation without special handling for empty rules,
  760. # or re-processing of already completed rules.
  761. g = _Lark(r"""start: _empty a "B"
  762. a: _empty "A"
  763. _empty:
  764. """)
  765. x = g.parse('AB')
  766. def test_regex_quote(self):
  767. g = r"""
  768. start: SINGLE_QUOTED_STRING | DOUBLE_QUOTED_STRING
  769. SINGLE_QUOTED_STRING : /'[^']*'/
  770. DOUBLE_QUOTED_STRING : /"[^"]*"/
  771. """
  772. g = _Lark(g)
  773. self.assertEqual( g.parse('"hello"').children, ['"hello"'])
  774. self.assertEqual( g.parse("'hello'").children, ["'hello'"])
  775. def test_lexer_token_limit(self):
  776. "Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"
  777. tokens = {'A%d'%i:'"%d"'%i for i in range(300)}
  778. g = _Lark("""start: %s
  779. %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))
  780. def test_float_without_lexer(self):
  781. expected_error = UnexpectedCharacters if LEXER.startswith('dynamic') else UnexpectedToken
  782. if PARSER == 'cyk':
  783. expected_error = ParseError
  784. g = _Lark("""start: ["+"|"-"] float
  785. float: digit* "." digit+ exp?
  786. | digit+ exp
  787. exp: ("e"|"E") ["+"|"-"] digit+
  788. digit: "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"
  789. """)
  790. g.parse("1.2")
  791. g.parse("-.2e9")
  792. g.parse("+2e-9")
  793. self.assertRaises( expected_error, g.parse, "+2e-9e")
  794. def test_keep_all_tokens(self):
  795. l = _Lark("""start: "a"+ """, keep_all_tokens=True)
  796. tree = l.parse('aaa')
  797. self.assertEqual(tree.children, ['a', 'a', 'a'])
  798. def test_token_flags(self):
  799. l = _Lark("""!start: "a"i+
  800. """
  801. )
  802. tree = l.parse('aA')
  803. self.assertEqual(tree.children, ['a', 'A'])
  804. l = _Lark("""!start: /a/i+
  805. """
  806. )
  807. tree = l.parse('aA')
  808. self.assertEqual(tree.children, ['a', 'A'])
  809. # g = """!start: "a"i "a"
  810. # """
  811. # self.assertRaises(GrammarError, _Lark, g)
  812. # g = """!start: /a/i /a/
  813. # """
  814. # self.assertRaises(GrammarError, _Lark, g)
  815. g = """start: NAME "," "a"
  816. NAME: /[a-z_]/i /[a-z0-9_]/i*
  817. """
  818. l = _Lark(g)
  819. tree = l.parse('ab,a')
  820. self.assertEqual(tree.children, ['ab'])
  821. tree = l.parse('AB,a')
  822. self.assertEqual(tree.children, ['AB'])
  823. def test_token_flags3(self):
  824. l = _Lark("""!start: ABC+
  825. ABC: "abc"i
  826. """
  827. )
  828. tree = l.parse('aBcAbC')
  829. self.assertEqual(tree.children, ['aBc', 'AbC'])
  830. def test_token_flags2(self):
  831. g = """!start: ("a"i | /a/ /b/?)+
  832. """
  833. l = _Lark(g)
  834. tree = l.parse('aA')
  835. self.assertEqual(tree.children, ['a', 'A'])
  836. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  837. def test_twice_empty(self):
  838. g = """!start: ("A"?)?
  839. """
  840. l = _Lark(g)
  841. tree = l.parse('A')
  842. self.assertEqual(tree.children, ['A'])
  843. tree = l.parse('')
  844. self.assertEqual(tree.children, [])
  845. def test_undefined_ignore(self):
  846. g = """!start: "A"
  847. %ignore B
  848. """
  849. self.assertRaises( GrammarError, _Lark, g)
  850. def test_alias_in_terminal(self):
  851. g = """start: TERM
  852. TERM: "a" -> alias
  853. """
  854. self.assertRaises( GrammarError, _Lark, g)
  855. def test_line_and_column(self):
  856. g = r"""!start: "A" bc "D"
  857. !bc: "B\nC"
  858. """
  859. l = _Lark(g)
  860. a, bc, d = l.parse("AB\nCD").children
  861. self.assertEqual(a.line, 1)
  862. self.assertEqual(a.column, 1)
  863. bc ,= bc.children
  864. self.assertEqual(bc.line, 1)
  865. self.assertEqual(bc.column, 2)
  866. self.assertEqual(d.line, 2)
  867. self.assertEqual(d.column, 2)
  868. if LEXER != 'dynamic':
  869. self.assertEqual(a.end_line, 1)
  870. self.assertEqual(a.end_column, 2)
  871. self.assertEqual(bc.end_line, 2)
  872. self.assertEqual(bc.end_column, 2)
  873. self.assertEqual(d.end_line, 2)
  874. self.assertEqual(d.end_column, 3)
  875. def test_reduce_cycle(self):
  876. """Tests an edge-condition in the LALR parser, in which a transition state looks exactly like the end state.
  877. It seems that the correct solution is to explicitely distinguish finalization in the reduce() function.
  878. """
  879. l = _Lark("""
  880. term: A
  881. | term term
  882. A: "a"
  883. """, start='term')
  884. tree = l.parse("aa")
  885. self.assertEqual(len(tree.children), 2)
  886. @unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority")
  887. def test_lexer_prioritization(self):
  888. "Tests effect of priority on result"
  889. grammar = """
  890. start: A B | AB
  891. A.2: "a"
  892. B: "b"
  893. AB: "ab"
  894. """
  895. l = _Lark(grammar)
  896. res = l.parse("ab")
  897. self.assertEqual(res.children, ['a', 'b'])
  898. self.assertNotEqual(res.children, ['ab'])
  899. grammar = """
  900. start: A B | AB
  901. A: "a"
  902. B: "b"
  903. AB.3: "ab"
  904. """
  905. l = _Lark(grammar)
  906. res = l.parse("ab")
  907. self.assertNotEqual(res.children, ['a', 'b'])
  908. self.assertEqual(res.children, ['ab'])
  909. grammar = """
  910. start: A B | AB
  911. A: "a"
  912. B.-20: "b"
  913. AB.-10: "ab"
  914. """
  915. l = _Lark(grammar)
  916. res = l.parse("ab")
  917. self.assertEqual(res.children, ['a', 'b'])
  918. grammar = """
  919. start: A B | AB
  920. A.-99999999999999999999999: "a"
  921. B: "b"
  922. AB: "ab"
  923. """
  924. l = _Lark(grammar)
  925. res = l.parse("ab")
  926. self.assertEqual(res.children, ['ab'])
  927. def test_import(self):
  928. grammar = """
  929. start: NUMBER WORD
  930. %import common.NUMBER
  931. %import common.WORD
  932. %import common.WS
  933. %ignore WS
  934. """
  935. l = _Lark(grammar)
  936. x = l.parse('12 elephants')
  937. self.assertEqual(x.children, ['12', 'elephants'])
  938. def test_import_rename(self):
  939. grammar = """
  940. start: N W
  941. %import common.NUMBER -> N
  942. %import common.WORD -> W
  943. %import common.WS
  944. %ignore WS
  945. """
  946. l = _Lark(grammar)
  947. x = l.parse('12 elephants')
  948. self.assertEqual(x.children, ['12', 'elephants'])
  949. def test_relative_import(self):
  950. l = _Lark_open('test_relative_import.lark', rel_to=__file__)
  951. x = l.parse('12 lions')
  952. self.assertEqual(x.children, ['12', 'lions'])
  953. def test_relative_import_unicode(self):
  954. l = _Lark_open('test_relative_import_unicode.lark', rel_to=__file__)
  955. x = l.parse(u'Ø')
  956. self.assertEqual(x.children, [u'Ø'])
  957. def test_relative_import_rename(self):
  958. l = _Lark_open('test_relative_import_rename.lark', rel_to=__file__)
  959. x = l.parse('12 lions')
  960. self.assertEqual(x.children, ['12', 'lions'])
  961. def test_relative_rule_import(self):
  962. l = _Lark_open('test_relative_rule_import.lark', rel_to=__file__)
  963. x = l.parse('xaabby')
  964. self.assertEqual(x.children, [
  965. 'x',
  966. Tree('expr', ['a', Tree('expr', ['a', 'b']), 'b']),
  967. 'y'])
  968. def test_relative_rule_import_drop_ignore(self):
  969. # %ignore rules are dropped on import
  970. l = _Lark_open('test_relative_rule_import_drop_ignore.lark',
  971. rel_to=__file__)
  972. self.assertRaises((ParseError, UnexpectedInput),
  973. l.parse, 'xa abby')
  974. def test_relative_rule_import_subrule(self):
  975. l = _Lark_open('test_relative_rule_import_subrule.lark',
  976. rel_to=__file__)
  977. x = l.parse('xaabby')
  978. self.assertEqual(x.children, [
  979. 'x',
  980. Tree('startab', [
  981. Tree('grammars__ab__expr', [
  982. 'a', Tree('grammars__ab__expr', ['a', 'b']), 'b',
  983. ]),
  984. ]),
  985. 'y'])
  986. def test_relative_rule_import_subrule_no_conflict(self):
  987. l = _Lark_open(
  988. 'test_relative_rule_import_subrule_no_conflict.lark',
  989. rel_to=__file__)
  990. x = l.parse('xaby')
  991. self.assertEqual(x.children, [Tree('expr', [
  992. 'x',
  993. Tree('startab', [
  994. Tree('grammars__ab__expr', ['a', 'b']),
  995. ]),
  996. 'y'])])
  997. self.assertRaises((ParseError, UnexpectedInput),
  998. l.parse, 'xaxabyby')
  999. def test_relative_rule_import_rename(self):
  1000. l = _Lark_open('test_relative_rule_import_rename.lark',
  1001. rel_to=__file__)
  1002. x = l.parse('xaabby')
  1003. self.assertEqual(x.children, [
  1004. 'x',
  1005. Tree('ab', ['a', Tree('ab', ['a', 'b']), 'b']),
  1006. 'y'])
  1007. def test_multi_import(self):
  1008. grammar = """
  1009. start: NUMBER WORD
  1010. %import common (NUMBER, WORD, WS)
  1011. %ignore WS
  1012. """
  1013. l = _Lark(grammar)
  1014. x = l.parse('12 toucans')
  1015. self.assertEqual(x.children, ['12', 'toucans'])
  1016. def test_relative_multi_import(self):
  1017. l = _Lark_open("test_relative_multi_import.lark", rel_to=__file__)
  1018. x = l.parse('12 capybaras')
  1019. self.assertEqual(x.children, ['12', 'capybaras'])
  1020. def test_relative_import_preserves_leading_underscore(self):
  1021. l = _Lark_open("test_relative_import_preserves_leading_underscore.lark", rel_to=__file__)
  1022. x = l.parse('Ax')
  1023. self.assertEqual(next(x.find_data('c')).children, ['A'])
  1024. def test_relative_import_of_nested_grammar(self):
  1025. l = _Lark_open("grammars/test_relative_import_of_nested_grammar.lark", rel_to=__file__)
  1026. x = l.parse('N')
  1027. self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])
  1028. def test_relative_import_rules_dependencies_imported_only_once(self):
  1029. l = _Lark_open("test_relative_import_rules_dependencies_imported_only_once.lark", rel_to=__file__)
  1030. x = l.parse('AAA')
  1031. self.assertEqual(next(x.find_data('a')).children, ['A'])
  1032. self.assertEqual(next(x.find_data('b')).children, ['A'])
  1033. self.assertEqual(next(x.find_data('d')).children, ['A'])
  1034. def test_import_errors(self):
  1035. grammar = """
  1036. start: NUMBER WORD
  1037. %import .grammars.bad_test.NUMBER
  1038. """
  1039. self.assertRaises(IOError, _Lark, grammar)
  1040. grammar = """
  1041. start: NUMBER WORD
  1042. %import bad_test.NUMBER
  1043. """
  1044. self.assertRaises(IOError, _Lark, grammar)
  1045. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  1046. def test_earley_prioritization(self):
  1047. "Tests effect of priority on result"
  1048. grammar = """
  1049. start: a | b
  1050. a.1: "a"
  1051. b.2: "a"
  1052. """
  1053. # l = Lark(grammar, parser='earley', lexer='standard')
  1054. l = _Lark(grammar)
  1055. res = l.parse("a")
  1056. self.assertEqual(res.children[0].data, 'b')
  1057. grammar = """
  1058. start: a | b
  1059. a.2: "a"
  1060. b.1: "a"
  1061. """
  1062. l = _Lark(grammar)
  1063. # l = Lark(grammar, parser='earley', lexer='standard')
  1064. res = l.parse("a")
  1065. self.assertEqual(res.children[0].data, 'a')
  1066. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  1067. def test_earley_prioritization_sum(self):
  1068. "Tests effect of priority on result"
  1069. grammar = """
  1070. start: ab_ b_ a_ | indirection
  1071. indirection: a_ bb_ a_
  1072. a_: "a"
  1073. b_: "b"
  1074. ab_: "ab"
  1075. bb_.1: "bb"
  1076. """
  1077. l = Lark(grammar, priority="invert")
  1078. res = l.parse('abba')
  1079. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  1080. grammar = """
  1081. start: ab_ b_ a_ | indirection
  1082. indirection: a_ bb_ a_
  1083. a_: "a"
  1084. b_: "b"
  1085. ab_.1: "ab"
  1086. bb_: "bb"
  1087. """
  1088. l = Lark(grammar, priority="invert")
  1089. res = l.parse('abba')
  1090. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1091. grammar = """
  1092. start: ab_ b_ a_ | indirection
  1093. indirection: a_ bb_ a_
  1094. a_.2: "a"
  1095. b_.1: "b"
  1096. ab_.3: "ab"
  1097. bb_.3: "bb"
  1098. """
  1099. l = Lark(grammar, priority="invert")
  1100. res = l.parse('abba')
  1101. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  1102. grammar = """
  1103. start: ab_ b_ a_ | indirection
  1104. indirection: a_ bb_ a_
  1105. a_.1: "a"
  1106. b_.1: "b"
  1107. ab_.4: "ab"
  1108. bb_.3: "bb"
  1109. """
  1110. l = Lark(grammar, priority="invert")
  1111. res = l.parse('abba')
  1112. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1113. def test_utf8(self):
  1114. g = u"""start: a
  1115. a: "±a"
  1116. """
  1117. l = _Lark(g)
  1118. self.assertEqual(l.parse(u'±a'), Tree('start', [Tree('a', [])]))
  1119. g = u"""start: A
  1120. A: "±a"
  1121. """
  1122. l = _Lark(g)
  1123. self.assertEqual(l.parse(u'±a'), Tree('start', [u'\xb1a']))
  1124. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  1125. def test_ignore(self):
  1126. grammar = r"""
  1127. COMMENT: /(!|(\/\/))[^\n]*/
  1128. %ignore COMMENT
  1129. %import common.WS -> _WS
  1130. %import common.INT
  1131. start: "INT"i _WS+ INT _WS*
  1132. """
  1133. parser = _Lark(grammar)
  1134. tree = parser.parse("int 1 ! This is a comment\n")
  1135. self.assertEqual(tree.children, ['1'])
  1136. tree = parser.parse("int 1 ! This is a comment") # A trailing ignore token can be tricky!
  1137. self.assertEqual(tree.children, ['1'])
  1138. parser = _Lark(r"""
  1139. start : "a"*
  1140. %ignore "b"
  1141. """)
  1142. tree = parser.parse("bb")
  1143. self.assertEqual(tree.children, [])
  1144. def test_regex_escaping(self):
  1145. g = _Lark("start: /[ab]/")
  1146. g.parse('a')
  1147. g.parse('b')
  1148. self.assertRaises( UnexpectedInput, g.parse, 'c')
  1149. _Lark(r'start: /\w/').parse('a')
  1150. g = _Lark(r'start: /\\w/')
  1151. self.assertRaises( UnexpectedInput, g.parse, 'a')
  1152. g.parse(r'\w')
  1153. _Lark(r'start: /\[/').parse('[')
  1154. _Lark(r'start: /\//').parse('/')
  1155. _Lark(r'start: /\\/').parse('\\')
  1156. _Lark(r'start: /\[ab]/').parse('[ab]')
  1157. _Lark(r'start: /\\[ab]/').parse('\\a')
  1158. _Lark(r'start: /\t/').parse('\t')
  1159. _Lark(r'start: /\\t/').parse('\\t')
  1160. _Lark(r'start: /\\\t/').parse('\\\t')
  1161. _Lark(r'start: "\t"').parse('\t')
  1162. _Lark(r'start: "\\t"').parse('\\t')
  1163. _Lark(r'start: "\\\t"').parse('\\\t')
  1164. def test_ranged_repeat_rules(self):
  1165. g = u"""!start: "A"~3
  1166. """
  1167. l = _Lark(g)
  1168. self.assertEqual(l.parse(u'AAA'), Tree('start', ["A", "A", "A"]))
  1169. self.assertRaises(ParseError, l.parse, u'AA')
  1170. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1171. g = u"""!start: "A"~0..2
  1172. """
  1173. if PARSER != 'cyk': # XXX CYK currently doesn't support empty grammars
  1174. l = _Lark(g)
  1175. self.assertEqual(l.parse(u''), Tree('start', []))
  1176. self.assertEqual(l.parse(u'A'), Tree('start', ['A']))
  1177. self.assertEqual(l.parse(u'AA'), Tree('start', ['A', 'A']))
  1178. self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'AAA')
  1179. g = u"""!start: "A"~3..2
  1180. """
  1181. self.assertRaises(GrammarError, _Lark, g)
  1182. g = u"""!start: "A"~2..3 "B"~2
  1183. """
  1184. l = _Lark(g)
  1185. self.assertEqual(l.parse(u'AABB'), Tree('start', ['A', 'A', 'B', 'B']))
  1186. self.assertEqual(l.parse(u'AAABB'), Tree('start', ['A', 'A', 'A', 'B', 'B']))
  1187. self.assertRaises(ParseError, l.parse, u'AAAB')
  1188. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1189. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1190. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1191. def test_ranged_repeat_terms(self):
  1192. g = u"""!start: AAA
  1193. AAA: "A"~3
  1194. """
  1195. l = _Lark(g)
  1196. self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"]))
  1197. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA')
  1198. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1199. g = u"""!start: AABB CC
  1200. AABB: "A"~0..2 "B"~2
  1201. CC: "C"~1..2
  1202. """
  1203. l = _Lark(g)
  1204. self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC']))
  1205. self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C']))
  1206. self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC']))
  1207. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB')
  1208. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1209. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1210. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1211. @unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX
  1212. def test_priority_vs_embedded(self):
  1213. g = """
  1214. A.2: "a"
  1215. WORD: ("a".."z")+
  1216. start: (A | WORD)+
  1217. """
  1218. l = _Lark(g)
  1219. t = l.parse('abc')
  1220. self.assertEqual(t.children, ['a', 'bc'])
  1221. self.assertEqual(t.children[0].type, 'A')
  1222. def test_line_counting(self):
  1223. p = _Lark("start: /[^x]+/")
  1224. text = 'hello\nworld'
  1225. t = p.parse(text)
  1226. tok = t.children[0]
  1227. self.assertEqual(tok, text)
  1228. self.assertEqual(tok.line, 1)
  1229. self.assertEqual(tok.column, 1)
  1230. if _LEXER != 'dynamic':
  1231. self.assertEqual(tok.end_line, 2)
  1232. self.assertEqual(tok.end_column, 6)
  1233. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1234. def test_empty_end(self):
  1235. p = _Lark("""
  1236. start: b c d
  1237. b: "B"
  1238. c: | "C"
  1239. d: | "D"
  1240. """)
  1241. res = p.parse('B')
  1242. self.assertEqual(len(res.children), 3)
  1243. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1244. def test_maybe_placeholders(self):
  1245. # Anonymous tokens shouldn't count
  1246. p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1247. self.assertEqual(p.parse("").children, [])
  1248. # All invisible constructs shouldn't count
  1249. p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c]
  1250. A: "a"
  1251. _c: "c" """, maybe_placeholders=True)
  1252. self.assertEqual(p.parse("").children, [None])
  1253. self.assertEqual(p.parse("c").children, [None])
  1254. self.assertEqual(p.parse("aefc").children, ['a'])
  1255. # ? shouldn't apply
  1256. p = _Lark("""!start: ["a"] "b"? ["c"] """, maybe_placeholders=True)
  1257. self.assertEqual(p.parse("").children, [None, None])
  1258. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1259. p = _Lark("""!start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1260. self.assertEqual(p.parse("").children, [None, None, None])
  1261. self.assertEqual(p.parse("a").children, ['a', None, None])
  1262. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1263. self.assertEqual(p.parse("c").children, [None, None, 'c'])
  1264. self.assertEqual(p.parse("ab").children, ['a', 'b', None])
  1265. self.assertEqual(p.parse("ac").children, ['a', None, 'c'])
  1266. self.assertEqual(p.parse("bc").children, [None, 'b', 'c'])
  1267. self.assertEqual(p.parse("abc").children, ['a', 'b', 'c'])
  1268. p = _Lark("""!start: (["a"] "b" ["c"])+ """, maybe_placeholders=True)
  1269. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1270. self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None])
  1271. self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c'])
  1272. self.assertEqual(p.parse("babbcabcb").children,
  1273. [None, 'b', None,
  1274. 'a', 'b', None,
  1275. None, 'b', 'c',
  1276. 'a', 'b', 'c',
  1277. None, 'b', None])
  1278. p = _Lark("""!start: ["a"] ["c"] "b"+ ["a"] ["d"] """, maybe_placeholders=True)
  1279. self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None])
  1280. self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd'])
  1281. self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None])
  1282. self.assertEqual(p.parse("cbbbb").children, [None, 'c', 'b', 'b', 'b', 'b', None, None])
  1283. def test_escaped_string(self):
  1284. "Tests common.ESCAPED_STRING"
  1285. grammar = r"""
  1286. start: ESCAPED_STRING+
  1287. %import common (WS_INLINE, ESCAPED_STRING)
  1288. %ignore WS_INLINE
  1289. """
  1290. parser = _Lark(grammar)
  1291. parser.parse(r'"\\" "b" "c"')
  1292. parser.parse(r'"That" "And a \"b"')
  1293. def test_meddling_unused(self):
  1294. "Unless 'unused' is removed, LALR analysis will fail on reduce-reduce collision"
  1295. grammar = """
  1296. start: EKS* x
  1297. x: EKS
  1298. unused: x*
  1299. EKS: "x"
  1300. """
  1301. parser = _Lark(grammar)
  1302. @unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
  1303. def test_serialize(self):
  1304. grammar = """
  1305. start: _ANY b "C"
  1306. _ANY: /./
  1307. b: "B"
  1308. """
  1309. parser = _Lark(grammar)
  1310. d = parser.serialize()
  1311. parser2 = Lark.deserialize(d, {}, {})
  1312. self.assertEqual(parser2.parse('ABC'), Tree('start', [Tree('b', [])]) )
  1313. namespace = {'Rule': Rule, 'TerminalDef': TerminalDef}
  1314. d, m = parser.memo_serialize(namespace.values())
  1315. parser3 = Lark.deserialize(d, namespace, m)
  1316. self.assertEqual(parser3.parse('ABC'), Tree('start', [Tree('b', [])]) )
  1317. def test_multi_start(self):
  1318. parser = _Lark('''
  1319. a: "x" "a"?
  1320. b: "x" "b"?
  1321. ''', start=['a', 'b'])
  1322. self.assertEqual(parser.parse('xa', 'a'), Tree('a', []))
  1323. self.assertEqual(parser.parse('xb', 'b'), Tree('b', []))
  1324. def test_lexer_detect_newline_tokens(self):
  1325. # Detect newlines in regular tokens
  1326. g = _Lark(r"""start: "go" tail*
  1327. !tail : SA "@" | SB "@" | SC "@" | SD "@"
  1328. SA : "a" /\n/
  1329. SB : /b./s
  1330. SC : "c" /[^a-z]/
  1331. SD : "d" /\s/
  1332. """)
  1333. a,b,c,d = [x.children[1] for x in g.parse('goa\n@b\n@c\n@d\n@').children]
  1334. self.assertEqual(a.line, 2)
  1335. self.assertEqual(b.line, 3)
  1336. self.assertEqual(c.line, 4)
  1337. self.assertEqual(d.line, 5)
  1338. # Detect newlines in ignored tokens
  1339. for re in ['/\\n/', '/[^a-z]/', '/\\s/']:
  1340. g = _Lark('''!start: "a" "a"
  1341. %ignore {}'''.format(re))
  1342. a, b = g.parse('a\na').children
  1343. self.assertEqual(a.line, 1)
  1344. self.assertEqual(b.line, 2)
  1345. _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()
  1346. _TestParser.__name__ = _NAME
  1347. globals()[_NAME] = _TestParser
  1348. # Note: You still have to import them in __main__ for the tests to run
  1349. _TO_TEST = [
  1350. ('standard', 'earley'),
  1351. ('standard', 'cyk'),
  1352. ('dynamic', 'earley'),
  1353. ('dynamic_complete', 'earley'),
  1354. ('standard', 'lalr'),
  1355. ('contextual', 'lalr'),
  1356. ('custom', 'lalr'),
  1357. # (None, 'earley'),
  1358. ]
  1359. for _LEXER, _PARSER in _TO_TEST:
  1360. _make_parser_test(_LEXER, _PARSER)
  1361. for _LEXER in ('dynamic', 'dynamic_complete'):
  1362. _make_full_earley_test(_LEXER)
  1363. if __name__ == '__main__':
  1364. unittest.main()