This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
25개 이상의 토픽을 선택하실 수 없습니다. Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1813 lines
61 KiB

  1. # -*- coding: utf-8 -*-
  2. from __future__ import absolute_import
  3. import re
  4. import unittest
  5. import logging
  6. import os
  7. import sys
  8. from copy import deepcopy
  9. try:
  10. from cStringIO import StringIO as cStringIO
  11. except ImportError:
  12. # Available only in Python 2.x, 3.x only has io.StringIO from below
  13. cStringIO = None
  14. from io import (
  15. StringIO as uStringIO,
  16. BytesIO,
  17. open,
  18. )
  19. logging.basicConfig(level=logging.INFO)
  20. from lark.lark import Lark
  21. from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
  22. from lark.tree import Tree
  23. from lark.visitors import Transformer, Transformer_InPlace, v_args
  24. from lark.grammar import Rule
  25. from lark.lexer import TerminalDef, Lexer, TraditionalLexer
  26. __path__ = os.path.dirname(__file__)
  27. def _read(n, *args):
  28. with open(os.path.join(__path__, n), *args) as f:
  29. return f.read()
  30. class TestParsers(unittest.TestCase):
  31. def test_big_list(self):
  32. Lark(r"""
  33. start: {}
  34. """.format(
  35. "|".join(['"%s"'%i for i in range(250)])
  36. ))
  37. def test_same_ast(self):
  38. "Tests that Earley and LALR parsers produce equal trees"
  39. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  40. name_list: NAME | name_list "," NAME
  41. NAME: /\w+/ """, parser='lalr')
  42. l = g.parse('(a,b,c,*x)')
  43. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  44. name_list: NAME | name_list "," NAME
  45. NAME: /\w/+ """)
  46. l2 = g.parse('(a,b,c,*x)')
  47. assert l == l2, '%s != %s' % (l.pretty(), l2.pretty())
  48. def test_infinite_recurse(self):
  49. g = """start: a
  50. a: a | "a"
  51. """
  52. self.assertRaises(GrammarError, Lark, g, parser='lalr')
  53. # TODO: should it? shouldn't it?
  54. # l = Lark(g, parser='earley', lexer='dynamic')
  55. # self.assertRaises(ParseError, l.parse, 'a')
  56. def test_propagate_positions(self):
  57. g = Lark("""start: a
  58. a: "a"
  59. """, propagate_positions=True)
  60. r = g.parse('a')
  61. self.assertEqual( r.children[0].meta.line, 1 )
  62. g = Lark("""start: x
  63. x: a
  64. a: "a"
  65. """, propagate_positions=True)
  66. r = g.parse('a')
  67. self.assertEqual( r.children[0].meta.line, 1 )
  68. def test_expand1(self):
  69. g = Lark("""start: a
  70. ?a: b
  71. b: "x"
  72. """)
  73. r = g.parse('x')
  74. self.assertEqual( r.children[0].data, "b" )
  75. g = Lark("""start: a
  76. ?a: b -> c
  77. b: "x"
  78. """)
  79. r = g.parse('x')
  80. self.assertEqual( r.children[0].data, "c" )
  81. g = Lark("""start: a
  82. ?a: B -> c
  83. B: "x"
  84. """)
  85. self.assertEqual( r.children[0].data, "c" )
  86. g = Lark("""start: a
  87. ?a: b b -> c
  88. b: "x"
  89. """)
  90. r = g.parse('xx')
  91. self.assertEqual( r.children[0].data, "c" )
  92. def test_comment_in_rule_definition(self):
  93. g = Lark("""start: a
  94. a: "a"
  95. // A comment
  96. // Another comment
  97. | "b"
  98. // Still more
  99. c: "unrelated"
  100. """)
  101. r = g.parse('b')
  102. self.assertEqual( r.children[0].data, "a" )
  103. def test_visit_tokens(self):
  104. class T(Transformer):
  105. def a(self, children):
  106. return children[0] + "!"
  107. def A(self, tok):
  108. return tok.update(value=tok.upper())
  109. # Test regular
  110. g = """start: a
  111. a : A
  112. A: "x"
  113. """
  114. p = Lark(g, parser='lalr')
  115. r = T(False).transform(p.parse("x"))
  116. self.assertEqual( r.children, ["x!"] )
  117. r = T().transform(p.parse("x"))
  118. self.assertEqual( r.children, ["X!"] )
  119. # Test internal transformer
  120. p = Lark(g, parser='lalr', transformer=T())
  121. r = p.parse("x")
  122. self.assertEqual( r.children, ["X!"] )
  123. def test_vargs_meta(self):
  124. @v_args(meta=True)
  125. class T1(Transformer):
  126. def a(self, children, meta):
  127. assert not children
  128. return meta.line
  129. def start(self, children, meta):
  130. return children
  131. @v_args(meta=True, inline=True)
  132. class T2(Transformer):
  133. def a(self, meta):
  134. return meta.line
  135. def start(self, meta, *res):
  136. return list(res)
  137. for T in (T1, T2):
  138. for internal in [False, True]:
  139. try:
  140. g = Lark(r"""start: a+
  141. a : "x" _NL?
  142. _NL: /\n/+
  143. """, parser='lalr', transformer=T() if internal else None, propagate_positions=True)
  144. except NotImplementedError:
  145. assert internal
  146. continue
  147. res = g.parse("xx\nx\nxxx\n\n\nxx")
  148. assert not internal
  149. res = T().transform(res)
  150. self.assertEqual(res, [1, 1, 2, 3, 3, 3, 6, 6])
  151. def test_vargs_tree(self):
  152. tree = Lark('''
  153. start: a a a
  154. !a: "A"
  155. ''').parse('AAA')
  156. tree_copy = deepcopy(tree)
  157. @v_args(tree=True)
  158. class T(Transformer):
  159. def a(self, tree):
  160. return 1
  161. def start(self, tree):
  162. return tree.children
  163. res = T().transform(tree)
  164. self.assertEqual(res, [1, 1, 1])
  165. self.assertEqual(tree, tree_copy)
  166. def test_embedded_transformer(self):
  167. class T(Transformer):
  168. def a(self, children):
  169. return "<a>"
  170. def b(self, children):
  171. return "<b>"
  172. def c(self, children):
  173. return "<c>"
  174. # Test regular
  175. g = Lark("""start: a
  176. a : "x"
  177. """, parser='lalr')
  178. r = T().transform(g.parse("x"))
  179. self.assertEqual( r.children, ["<a>"] )
  180. g = Lark("""start: a
  181. a : "x"
  182. """, parser='lalr', transformer=T())
  183. r = g.parse("x")
  184. self.assertEqual( r.children, ["<a>"] )
  185. # Test Expand1
  186. g = Lark("""start: a
  187. ?a : b
  188. b : "x"
  189. """, parser='lalr')
  190. r = T().transform(g.parse("x"))
  191. self.assertEqual( r.children, ["<b>"] )
  192. g = Lark("""start: a
  193. ?a : b
  194. b : "x"
  195. """, parser='lalr', transformer=T())
  196. r = g.parse("x")
  197. self.assertEqual( r.children, ["<b>"] )
  198. # Test Expand1 -> Alias
  199. g = Lark("""start: a
  200. ?a : b b -> c
  201. b : "x"
  202. """, parser='lalr')
  203. r = T().transform(g.parse("xx"))
  204. self.assertEqual( r.children, ["<c>"] )
  205. g = Lark("""start: a
  206. ?a : b b -> c
  207. b : "x"
  208. """, parser='lalr', transformer=T())
  209. r = g.parse("xx")
  210. self.assertEqual( r.children, ["<c>"] )
  211. def test_embedded_transformer_inplace(self):
  212. @v_args(tree=True)
  213. class T1(Transformer_InPlace):
  214. def a(self, tree):
  215. assert isinstance(tree, Tree), tree
  216. tree.children.append("tested")
  217. return tree
  218. def b(self, tree):
  219. return Tree(tree.data, tree.children + ['tested2'])
  220. @v_args(tree=True)
  221. class T2(Transformer):
  222. def a(self, tree):
  223. assert isinstance(tree, Tree), tree
  224. tree.children.append("tested")
  225. return tree
  226. def b(self, tree):
  227. return Tree(tree.data, tree.children + ['tested2'])
  228. class T3(Transformer):
  229. @v_args(tree=True)
  230. def a(self, tree):
  231. assert isinstance(tree, Tree)
  232. tree.children.append("tested")
  233. return tree
  234. @v_args(tree=True)
  235. def b(self, tree):
  236. return Tree(tree.data, tree.children + ['tested2'])
  237. for t in [T1(), T2(), T3()]:
  238. for internal in [False, True]:
  239. g = Lark("""start: a b
  240. a : "x"
  241. b : "y"
  242. """, parser='lalr', transformer=t if internal else None)
  243. r = g.parse("xy")
  244. if not internal:
  245. r = t.transform(r)
  246. a, b = r.children
  247. self.assertEqual(a.children, ["tested"])
  248. self.assertEqual(b.children, ["tested2"])
  249. def test_alias(self):
  250. Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """)
  251. def _make_full_earley_test(LEXER):
  252. def _Lark(grammar, **kwargs):
  253. return Lark(grammar, lexer=LEXER, parser='earley', propagate_positions=True, **kwargs)
  254. class _TestFullEarley(unittest.TestCase):
  255. def test_anon(self):
  256. # Fails an Earley implementation without special handling for empty rules,
  257. # or re-processing of already completed rules.
  258. g = Lark(r"""start: B
  259. B: ("ab"|/[^b]/)+
  260. """, lexer=LEXER)
  261. self.assertEqual( g.parse('abc').children[0], 'abc')
  262. def test_earley(self):
  263. g = Lark("""start: A "b" c
  264. A: "a"+
  265. c: "abc"
  266. """, parser="earley", lexer=LEXER)
  267. x = g.parse('aaaababc')
  268. def test_earley2(self):
  269. grammar = """
  270. start: statement+
  271. statement: "r"
  272. | "c" /[a-z]/+
  273. %ignore " "
  274. """
  275. program = """c b r"""
  276. l = Lark(grammar, parser='earley', lexer=LEXER)
  277. l.parse(program)
  278. @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser")
  279. def test_earley3(self):
  280. """Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)
  281. By default, `+` should immitate regexp greedy-matching
  282. """
  283. grammar = """
  284. start: A A
  285. A: "a"+
  286. """
  287. l = Lark(grammar, parser='earley', lexer=LEXER)
  288. res = l.parse("aaa")
  289. self.assertEqual(set(res.children), {'aa', 'a'})
  290. # XXX TODO fix Earley to maintain correct order
  291. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  292. # self.assertEqual(res.children, ['aa', 'a'])
  293. def test_earley4(self):
  294. grammar = """
  295. start: A A?
  296. A: "a"+
  297. """
  298. l = Lark(grammar, parser='earley', lexer=LEXER)
  299. res = l.parse("aaa")
  300. assert set(res.children) == {'aa', 'a'} or res.children == ['aaa']
  301. # XXX TODO fix Earley to maintain correct order
  302. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  303. # self.assertEqual(res.children, ['aaa'])
  304. def test_earley_repeating_empty(self):
  305. # This was a sneaky bug!
  306. grammar = """
  307. !start: "a" empty empty "b"
  308. empty: empty2
  309. empty2:
  310. """
  311. parser = Lark(grammar, parser='earley', lexer=LEXER)
  312. res = parser.parse('ab')
  313. empty_tree = Tree('empty', [Tree('empty2', [])])
  314. self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b'])
  315. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  316. def test_earley_explicit_ambiguity(self):
  317. # This was a sneaky bug!
  318. grammar = """
  319. start: a b | ab
  320. a: "a"
  321. b: "b"
  322. ab: "ab"
  323. """
  324. parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit')
  325. ambig_tree = parser.parse('ab')
  326. self.assertEqual( ambig_tree.data, '_ambig')
  327. self.assertEqual( len(ambig_tree.children), 2)
  328. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  329. def test_ambiguity1(self):
  330. grammar = """
  331. start: cd+ "e"
  332. !cd: "c"
  333. | "d"
  334. | "cd"
  335. """
  336. l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
  337. ambig_tree = l.parse('cde')
  338. assert ambig_tree.data == '_ambig', ambig_tree
  339. assert len(ambig_tree.children) == 2
  340. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  341. def test_ambiguity2(self):
  342. grammar = """
  343. ANY: /[a-zA-Z0-9 ]+/
  344. a.2: "A" b+
  345. b.2: "B"
  346. c: ANY
  347. start: (a|c)*
  348. """
  349. l = Lark(grammar, parser='earley', lexer=LEXER)
  350. res = l.parse('ABX')
  351. expected = Tree('start', [
  352. Tree('a', [
  353. Tree('b', [])
  354. ]),
  355. Tree('c', [
  356. 'X'
  357. ])
  358. ])
  359. self.assertEqual(res, expected)
  360. def test_fruitflies_ambig(self):
  361. grammar = """
  362. start: noun verb noun -> simple
  363. | noun verb "like" noun -> comparative
  364. noun: adj? NOUN
  365. verb: VERB
  366. adj: ADJ
  367. NOUN: "flies" | "bananas" | "fruit"
  368. VERB: "like" | "flies"
  369. ADJ: "fruit"
  370. %import common.WS
  371. %ignore WS
  372. """
  373. parser = Lark(grammar, ambiguity='explicit', lexer=LEXER)
  374. tree = parser.parse('fruit flies like bananas')
  375. expected = Tree('_ambig', [
  376. Tree('comparative', [
  377. Tree('noun', ['fruit']),
  378. Tree('verb', ['flies']),
  379. Tree('noun', ['bananas'])
  380. ]),
  381. Tree('simple', [
  382. Tree('noun', [Tree('adj', ['fruit']), 'flies']),
  383. Tree('verb', ['like']),
  384. Tree('noun', ['bananas'])
  385. ])
  386. ])
  387. # self.assertEqual(tree, expected)
  388. self.assertEqual(tree.data, expected.data)
  389. self.assertEqual(set(tree.children), set(expected.children))
  390. @unittest.skipIf(LEXER!='dynamic_complete', "Only relevant for the dynamic_complete parser")
  391. def test_explicit_ambiguity2(self):
  392. grammar = r"""
  393. start: NAME+
  394. NAME: /\w+/
  395. %ignore " "
  396. """
  397. text = """cat"""
  398. parser = _Lark(grammar, start='start', ambiguity='explicit')
  399. tree = parser.parse(text)
  400. self.assertEqual(tree.data, '_ambig')
  401. combinations = {tuple(str(s) for s in t.children) for t in tree.children}
  402. self.assertEqual(combinations, {
  403. ('cat',),
  404. ('ca', 't'),
  405. ('c', 'at'),
  406. ('c', 'a' ,'t')
  407. })
  408. def test_term_ambig_resolve(self):
  409. grammar = r"""
  410. !start: NAME+
  411. NAME: /\w+/
  412. %ignore " "
  413. """
  414. text = """foo bar"""
  415. parser = Lark(grammar)
  416. tree = parser.parse(text)
  417. self.assertEqual(tree.children, ['foo', 'bar'])
  418. # @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO
  419. # def test_not_all_derivations(self):
  420. # grammar = """
  421. # start: cd+ "e"
  422. # !cd: "c"
  423. # | "d"
  424. # | "cd"
  425. # """
  426. # l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False)
  427. # x = l.parse('cde')
  428. # assert x.data != '_ambig', x
  429. # assert len(x.children) == 1
  430. _NAME = "TestFullEarley" + LEXER.capitalize()
  431. _TestFullEarley.__name__ = _NAME
  432. globals()[_NAME] = _TestFullEarley
  433. class CustomLexer(Lexer):
  434. """
  435. Purpose of this custom lexer is to test the integration,
  436. so it uses the traditionalparser as implementation without custom lexing behaviour.
  437. """
  438. def __init__(self, lexer_conf):
  439. self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks, g_regex_flags=lexer_conf.g_regex_flags)
  440. def lex(self, *args, **kwargs):
  441. return self.lexer.lex(*args, **kwargs)
  442. def _make_parser_test(LEXER, PARSER):
  443. lexer_class_or_name = CustomLexer if LEXER == 'custom' else LEXER
  444. def _Lark(grammar, **kwargs):
  445. return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  446. def _Lark_open(gfilename, **kwargs):
  447. return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  448. class _TestParser(unittest.TestCase):
  449. def test_basic1(self):
  450. g = _Lark("""start: a+ b a* "b" a*
  451. b: "b"
  452. a: "a"
  453. """)
  454. r = g.parse('aaabaab')
  455. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' )
  456. r = g.parse('aaabaaba')
  457. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaaa' )
  458. self.assertRaises(ParseError, g.parse, 'aaabaa')
  459. def test_basic2(self):
  460. # Multiple parsers and colliding tokens
  461. g = _Lark("""start: B A
  462. B: "12"
  463. A: "1" """)
  464. g2 = _Lark("""start: B A
  465. B: "12"
  466. A: "2" """)
  467. x = g.parse('121')
  468. assert x.data == 'start' and x.children == ['12', '1'], x
  469. x = g2.parse('122')
  470. assert x.data == 'start' and x.children == ['12', '2'], x
  471. @unittest.skipIf(cStringIO is None, "cStringIO not available")
  472. def test_stringio_bytes(self):
  473. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  474. _Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  475. def test_stringio_unicode(self):
  476. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  477. _Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  478. def test_unicode(self):
  479. g = _Lark(u"""start: UNIA UNIB UNIA
  480. UNIA: /\xa3/
  481. UNIB: /\u0101/
  482. """)
  483. g.parse(u'\xa3\u0101\u00a3')
  484. def test_unicode2(self):
  485. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  486. UNIA: /\xa3/
  487. UNIB: "a\u0101b\ "
  488. UNIC: /a?\u0101c\n/
  489. """)
  490. g.parse(u'\xa3a\u0101b\\ \u00a3\u0101c\n')
  491. def test_unicode3(self):
  492. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  493. UNIA: /\xa3/
  494. UNIB: "\u0101"
  495. UNIC: /\u0203/ /\n/
  496. """)
  497. g.parse(u'\xa3\u0101\u00a3\u0203\n')
  498. def test_hex_escape(self):
  499. g = _Lark(r"""start: A B C
  500. A: "\x01"
  501. B: /\x02/
  502. C: "\xABCD"
  503. """)
  504. g.parse('\x01\x02\xABCD')
  505. def test_unicode_literal_range_escape(self):
  506. g = _Lark(r"""start: A+
  507. A: "\u0061".."\u0063"
  508. """)
  509. g.parse('abc')
  510. def test_hex_literal_range_escape(self):
  511. g = _Lark(r"""start: A+
  512. A: "\x01".."\x03"
  513. """)
  514. g.parse('\x01\x02\x03')
  515. @unittest.skipIf(PARSER == 'cyk', "Takes forever")
  516. def test_stack_for_ebnf(self):
  517. """Verify that stack depth isn't an issue for EBNF grammars"""
  518. g = _Lark(r"""start: a+
  519. a : "a" """)
  520. g.parse("a" * (sys.getrecursionlimit()*2 ))
  521. def test_expand1_lists_with_one_item(self):
  522. g = _Lark(r"""start: list
  523. ?list: item+
  524. item : A
  525. A: "a"
  526. """)
  527. r = g.parse("a")
  528. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  529. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  530. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  531. self.assertEqual(len(r.children), 1)
  532. def test_expand1_lists_with_one_item_2(self):
  533. g = _Lark(r"""start: list
  534. ?list: item+ "!"
  535. item : A
  536. A: "a"
  537. """)
  538. r = g.parse("a!")
  539. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  540. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  541. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  542. self.assertEqual(len(r.children), 1)
  543. def test_dont_expand1_lists_with_multiple_items(self):
  544. g = _Lark(r"""start: list
  545. ?list: item+
  546. item : A
  547. A: "a"
  548. """)
  549. r = g.parse("aa")
  550. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  551. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  552. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  553. self.assertEqual(len(r.children), 1)
  554. # Sanity check: verify that 'list' contains the two 'item's we've given it
  555. [list] = r.children
  556. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  557. def test_dont_expand1_lists_with_multiple_items_2(self):
  558. g = _Lark(r"""start: list
  559. ?list: item+ "!"
  560. item : A
  561. A: "a"
  562. """)
  563. r = g.parse("aa!")
  564. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  565. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  566. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  567. self.assertEqual(len(r.children), 1)
  568. # Sanity check: verify that 'list' contains the two 'item's we've given it
  569. [list] = r.children
  570. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  571. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  572. def test_empty_expand1_list(self):
  573. g = _Lark(r"""start: list
  574. ?list: item*
  575. item : A
  576. A: "a"
  577. """)
  578. r = g.parse("")
  579. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  580. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  581. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  582. self.assertEqual(len(r.children), 1)
  583. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  584. [list] = r.children
  585. self.assertSequenceEqual([item.data for item in list.children], ())
  586. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  587. def test_empty_expand1_list_2(self):
  588. g = _Lark(r"""start: list
  589. ?list: item* "!"?
  590. item : A
  591. A: "a"
  592. """)
  593. r = g.parse("")
  594. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  595. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  596. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  597. self.assertEqual(len(r.children), 1)
  598. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  599. [list] = r.children
  600. self.assertSequenceEqual([item.data for item in list.children], ())
  601. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  602. def test_empty_flatten_list(self):
  603. g = _Lark(r"""start: list
  604. list: | item "," list
  605. item : A
  606. A: "a"
  607. """)
  608. r = g.parse("")
  609. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  610. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  611. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  612. [list] = r.children
  613. self.assertSequenceEqual([item.data for item in list.children], ())
  614. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  615. def test_single_item_flatten_list(self):
  616. g = _Lark(r"""start: list
  617. list: | item "," list
  618. item : A
  619. A: "a"
  620. """)
  621. r = g.parse("a,")
  622. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  623. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  624. # Sanity check: verify that 'list' contains exactly the one 'item' we've given it
  625. [list] = r.children
  626. self.assertSequenceEqual([item.data for item in list.children], ('item',))
  627. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  628. def test_multiple_item_flatten_list(self):
  629. g = _Lark(r"""start: list
  630. #list: | item "," list
  631. item : A
  632. A: "a"
  633. """)
  634. r = g.parse("a,a,")
  635. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  636. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  637. # Sanity check: verify that 'list' contains exactly the two 'item's we've given it
  638. [list] = r.children
  639. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  640. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  641. def test_recurse_flatten(self):
  642. """Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""
  643. g = _Lark(r"""start: a | start a
  644. a : A
  645. A : "a" """)
  646. # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
  647. # STree data structures, which uses recursion).
  648. g.parse("a" * (sys.getrecursionlimit() // 4))
  649. def test_token_collision(self):
  650. g = _Lark(r"""start: "Hello" NAME
  651. NAME: /\w/+
  652. %ignore " "
  653. """)
  654. x = g.parse('Hello World')
  655. self.assertSequenceEqual(x.children, ['World'])
  656. x = g.parse('Hello HelloWorld')
  657. self.assertSequenceEqual(x.children, ['HelloWorld'])
  658. def test_token_collision_WS(self):
  659. g = _Lark(r"""start: "Hello" NAME
  660. NAME: /\w/+
  661. %import common.WS
  662. %ignore WS
  663. """)
  664. x = g.parse('Hello World')
  665. self.assertSequenceEqual(x.children, ['World'])
  666. x = g.parse('Hello HelloWorld')
  667. self.assertSequenceEqual(x.children, ['HelloWorld'])
  668. def test_token_collision2(self):
  669. g = _Lark("""
  670. !start: "starts"
  671. %import common.LCASE_LETTER
  672. """)
  673. x = g.parse("starts")
  674. self.assertSequenceEqual(x.children, ['starts'])
  675. def test_templates(self):
  676. g = _Lark(r"""
  677. start: "[" sep{NUMBER, ","} "]"
  678. sep{item, delim}: item (delim item)*
  679. NUMBER: /\d+/
  680. %ignore " "
  681. """)
  682. x = g.parse("[1, 2, 3, 4]")
  683. self.assertSequenceEqual(x.children, [Tree('sep', ['1', '2', '3', '4'])])
  684. x = g.parse("[1]")
  685. self.assertSequenceEqual(x.children, [Tree('sep', ['1'])])
  686. def test_templates_recursion(self):
  687. g = _Lark(r"""
  688. start: "[" _sep{NUMBER, ","} "]"
  689. _sep{item, delim}: item | _sep{item, delim} delim item
  690. NUMBER: /\d+/
  691. %ignore " "
  692. """)
  693. x = g.parse("[1, 2, 3, 4]")
  694. self.assertSequenceEqual(x.children, ['1', '2', '3', '4'])
  695. x = g.parse("[1]")
  696. self.assertSequenceEqual(x.children, ['1'])
  697. def test_templates_import(self):
  698. g = _Lark_open("test_templates_import.lark", rel_to=__file__)
  699. x = g.parse("[1, 2, 3, 4]")
  700. self.assertSequenceEqual(x.children, [Tree('sep', ['1', '2', '3', '4'])])
  701. x = g.parse("[1]")
  702. self.assertSequenceEqual(x.children, [Tree('sep', ['1'])])
  703. def test_templates_alias(self):
  704. g = _Lark(r"""
  705. start: expr{"C"}
  706. expr{t}: "A" t
  707. | "B" t -> b
  708. """)
  709. x = g.parse("AC")
  710. self.assertSequenceEqual(x.children, [Tree('expr', [])])
  711. x = g.parse("BC")
  712. self.assertSequenceEqual(x.children, [Tree('b', [])])
  713. def test_templates_modifiers(self):
  714. g = _Lark(r"""
  715. start: expr{"B"}
  716. !expr{t}: "A" t
  717. """)
  718. x = g.parse("AB")
  719. self.assertSequenceEqual(x.children, [Tree('expr', ["A", "B"])])
  720. g = _Lark(r"""
  721. start: _expr{"B"}
  722. !_expr{t}: "A" t
  723. """)
  724. x = g.parse("AB")
  725. self.assertSequenceEqual(x.children, ["A", "B"])
  726. g = _Lark(r"""
  727. start: expr{b}
  728. b: "B"
  729. ?expr{t}: "A" t
  730. """)
  731. x = g.parse("AB")
  732. self.assertSequenceEqual(x.children, [Tree('b',[])])
  733. def test_templates_templates(self):
  734. g = _Lark('''start: a{b}
  735. a{t}: t{"a"}
  736. b{x}: x''')
  737. x = g.parse('a')
  738. self.assertSequenceEqual(x.children, [Tree('a', [Tree('b',[])])])
  739. def test_g_regex_flags(self):
  740. g = _Lark("""
  741. start: "a" /b+/ C
  742. C: "C" | D
  743. D: "D" E
  744. E: "e"
  745. """, g_regex_flags=re.I)
  746. x1 = g.parse("ABBc")
  747. x2 = g.parse("abdE")
  748. # def test_string_priority(self):
  749. # g = _Lark("""start: (A | /a?bb/)+
  750. # A: "a" """)
  751. # x = g.parse('abb')
  752. # self.assertEqual(len(x.children), 2)
  753. # # This parse raises an exception because the lexer will always try to consume
  754. # # "a" first and will never match the regular expression
  755. # # This behavior is subject to change!!
  756. # # Thie won't happen with ambiguity handling.
  757. # g = _Lark("""start: (A | /a?ab/)+
  758. # A: "a" """)
  759. # self.assertRaises(LexError, g.parse, 'aab')
  760. def test_undefined_rule(self):
  761. self.assertRaises(GrammarError, _Lark, """start: a""")
  762. def test_undefined_token(self):
  763. self.assertRaises(GrammarError, _Lark, """start: A""")
  764. def test_rule_collision(self):
  765. g = _Lark("""start: "a"+ "b"
  766. | "a"+ """)
  767. x = g.parse('aaaa')
  768. x = g.parse('aaaab')
  769. def test_rule_collision2(self):
  770. g = _Lark("""start: "a"* "b"
  771. | "a"+ """)
  772. x = g.parse('aaaa')
  773. x = g.parse('aaaab')
  774. x = g.parse('b')
  775. def test_token_not_anon(self):
  776. """Tests that "a" is matched as an anonymous token, and not A.
  777. """
  778. g = _Lark("""start: "a"
  779. A: "a" """)
  780. x = g.parse('a')
  781. self.assertEqual(len(x.children), 0, '"a" should be considered anonymous')
  782. g = _Lark("""start: "a" A
  783. A: "a" """)
  784. x = g.parse('aa')
  785. self.assertEqual(len(x.children), 1, 'only "a" should be considered anonymous')
  786. self.assertEqual(x.children[0].type, "A")
  787. g = _Lark("""start: /a/
  788. A: /a/ """)
  789. x = g.parse('a')
  790. self.assertEqual(len(x.children), 1)
  791. self.assertEqual(x.children[0].type, "A", "A isn't associated with /a/")
  792. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  793. def test_maybe(self):
  794. g = _Lark("""start: ["a"] """)
  795. x = g.parse('a')
  796. x = g.parse('')
  797. def test_start(self):
  798. g = _Lark("""a: "a" a? """, start='a')
  799. x = g.parse('a')
  800. x = g.parse('aa')
  801. x = g.parse('aaa')
  802. def test_alias(self):
  803. g = _Lark("""start: "a" -> b """)
  804. x = g.parse('a')
  805. self.assertEqual(x.data, "b")
  806. def test_token_ebnf(self):
  807. g = _Lark("""start: A
  808. A: "a"* ("b"? "c".."e")+
  809. """)
  810. x = g.parse('abcde')
  811. x = g.parse('dd')
  812. def test_backslash(self):
  813. g = _Lark(r"""start: "\\" "a"
  814. """)
  815. x = g.parse(r'\a')
  816. g = _Lark(r"""start: /\\/ /a/
  817. """)
  818. x = g.parse(r'\a')
  819. def test_backslash2(self):
  820. g = _Lark(r"""start: "\"" "-"
  821. """)
  822. x = g.parse('"-')
  823. g = _Lark(r"""start: /\// /-/
  824. """)
  825. x = g.parse('/-')
  826. def test_special_chars(self):
  827. g = _Lark(r"""start: "\n"
  828. """)
  829. x = g.parse('\n')
  830. g = _Lark(r"""start: /\n/
  831. """)
  832. x = g.parse('\n')
  833. # def test_token_recurse(self):
  834. # g = _Lark("""start: A
  835. # A: B
  836. # B: A
  837. # """)
  838. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  839. def test_empty(self):
  840. # Fails an Earley implementation without special handling for empty rules,
  841. # or re-processing of already completed rules.
  842. g = _Lark(r"""start: _empty a "B"
  843. a: _empty "A"
  844. _empty:
  845. """)
  846. x = g.parse('AB')
  847. def test_regex_quote(self):
  848. g = r"""
  849. start: SINGLE_QUOTED_STRING | DOUBLE_QUOTED_STRING
  850. SINGLE_QUOTED_STRING : /'[^']*'/
  851. DOUBLE_QUOTED_STRING : /"[^"]*"/
  852. """
  853. g = _Lark(g)
  854. self.assertEqual( g.parse('"hello"').children, ['"hello"'])
  855. self.assertEqual( g.parse("'hello'").children, ["'hello'"])
  856. def test_lexer_token_limit(self):
  857. "Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"
  858. tokens = {'A%d'%i:'"%d"'%i for i in range(300)}
  859. g = _Lark("""start: %s
  860. %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))
  861. def test_float_without_lexer(self):
  862. expected_error = UnexpectedCharacters if LEXER.startswith('dynamic') else UnexpectedToken
  863. if PARSER == 'cyk':
  864. expected_error = ParseError
  865. g = _Lark("""start: ["+"|"-"] float
  866. float: digit* "." digit+ exp?
  867. | digit+ exp
  868. exp: ("e"|"E") ["+"|"-"] digit+
  869. digit: "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"
  870. """)
  871. g.parse("1.2")
  872. g.parse("-.2e9")
  873. g.parse("+2e-9")
  874. self.assertRaises( expected_error, g.parse, "+2e-9e")
  875. def test_keep_all_tokens(self):
  876. l = _Lark("""start: "a"+ """, keep_all_tokens=True)
  877. tree = l.parse('aaa')
  878. self.assertEqual(tree.children, ['a', 'a', 'a'])
  879. def test_token_flags(self):
  880. l = _Lark("""!start: "a"i+
  881. """
  882. )
  883. tree = l.parse('aA')
  884. self.assertEqual(tree.children, ['a', 'A'])
  885. l = _Lark("""!start: /a/i+
  886. """
  887. )
  888. tree = l.parse('aA')
  889. self.assertEqual(tree.children, ['a', 'A'])
  890. # g = """!start: "a"i "a"
  891. # """
  892. # self.assertRaises(GrammarError, _Lark, g)
  893. # g = """!start: /a/i /a/
  894. # """
  895. # self.assertRaises(GrammarError, _Lark, g)
  896. g = """start: NAME "," "a"
  897. NAME: /[a-z_]/i /[a-z0-9_]/i*
  898. """
  899. l = _Lark(g)
  900. tree = l.parse('ab,a')
  901. self.assertEqual(tree.children, ['ab'])
  902. tree = l.parse('AB,a')
  903. self.assertEqual(tree.children, ['AB'])
  904. def test_token_flags3(self):
  905. l = _Lark("""!start: ABC+
  906. ABC: "abc"i
  907. """
  908. )
  909. tree = l.parse('aBcAbC')
  910. self.assertEqual(tree.children, ['aBc', 'AbC'])
  911. def test_token_flags2(self):
  912. g = """!start: ("a"i | /a/ /b/?)+
  913. """
  914. l = _Lark(g)
  915. tree = l.parse('aA')
  916. self.assertEqual(tree.children, ['a', 'A'])
  917. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  918. def test_twice_empty(self):
  919. g = """!start: ("A"?)?
  920. """
  921. l = _Lark(g)
  922. tree = l.parse('A')
  923. self.assertEqual(tree.children, ['A'])
  924. tree = l.parse('')
  925. self.assertEqual(tree.children, [])
  926. def test_undefined_ignore(self):
  927. g = """!start: "A"
  928. %ignore B
  929. """
  930. self.assertRaises( GrammarError, _Lark, g)
  931. def test_alias_in_terminal(self):
  932. g = """start: TERM
  933. TERM: "a" -> alias
  934. """
  935. self.assertRaises( GrammarError, _Lark, g)
  936. def test_line_and_column(self):
  937. g = r"""!start: "A" bc "D"
  938. !bc: "B\nC"
  939. """
  940. l = _Lark(g)
  941. a, bc, d = l.parse("AB\nCD").children
  942. self.assertEqual(a.line, 1)
  943. self.assertEqual(a.column, 1)
  944. bc ,= bc.children
  945. self.assertEqual(bc.line, 1)
  946. self.assertEqual(bc.column, 2)
  947. self.assertEqual(d.line, 2)
  948. self.assertEqual(d.column, 2)
  949. if LEXER != 'dynamic':
  950. self.assertEqual(a.end_line, 1)
  951. self.assertEqual(a.end_column, 2)
  952. self.assertEqual(bc.end_line, 2)
  953. self.assertEqual(bc.end_column, 2)
  954. self.assertEqual(d.end_line, 2)
  955. self.assertEqual(d.end_column, 3)
  956. def test_reduce_cycle(self):
  957. """Tests an edge-condition in the LALR parser, in which a transition state looks exactly like the end state.
  958. It seems that the correct solution is to explicitely distinguish finalization in the reduce() function.
  959. """
  960. l = _Lark("""
  961. term: A
  962. | term term
  963. A: "a"
  964. """, start='term')
  965. tree = l.parse("aa")
  966. self.assertEqual(len(tree.children), 2)
  967. @unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority")
  968. def test_lexer_prioritization(self):
  969. "Tests effect of priority on result"
  970. grammar = """
  971. start: A B | AB
  972. A.2: "a"
  973. B: "b"
  974. AB: "ab"
  975. """
  976. l = _Lark(grammar)
  977. res = l.parse("ab")
  978. self.assertEqual(res.children, ['a', 'b'])
  979. self.assertNotEqual(res.children, ['ab'])
  980. grammar = """
  981. start: A B | AB
  982. A: "a"
  983. B: "b"
  984. AB.3: "ab"
  985. """
  986. l = _Lark(grammar)
  987. res = l.parse("ab")
  988. self.assertNotEqual(res.children, ['a', 'b'])
  989. self.assertEqual(res.children, ['ab'])
  990. grammar = """
  991. start: A B | AB
  992. A: "a"
  993. B.-20: "b"
  994. AB.-10: "ab"
  995. """
  996. l = _Lark(grammar)
  997. res = l.parse("ab")
  998. self.assertEqual(res.children, ['a', 'b'])
  999. grammar = """
  1000. start: A B | AB
  1001. A.-99999999999999999999999: "a"
  1002. B: "b"
  1003. AB: "ab"
  1004. """
  1005. l = _Lark(grammar)
  1006. res = l.parse("ab")
  1007. self.assertEqual(res.children, ['ab'])
  1008. def test_import(self):
  1009. grammar = """
  1010. start: NUMBER WORD
  1011. %import common.NUMBER
  1012. %import common.WORD
  1013. %import common.WS
  1014. %ignore WS
  1015. """
  1016. l = _Lark(grammar)
  1017. x = l.parse('12 elephants')
  1018. self.assertEqual(x.children, ['12', 'elephants'])
  1019. def test_import_rename(self):
  1020. grammar = """
  1021. start: N W
  1022. %import common.NUMBER -> N
  1023. %import common.WORD -> W
  1024. %import common.WS
  1025. %ignore WS
  1026. """
  1027. l = _Lark(grammar)
  1028. x = l.parse('12 elephants')
  1029. self.assertEqual(x.children, ['12', 'elephants'])
  1030. def test_relative_import(self):
  1031. l = _Lark_open('test_relative_import.lark', rel_to=__file__)
  1032. x = l.parse('12 lions')
  1033. self.assertEqual(x.children, ['12', 'lions'])
  1034. def test_relative_import_unicode(self):
  1035. l = _Lark_open('test_relative_import_unicode.lark', rel_to=__file__)
  1036. x = l.parse(u'Ø')
  1037. self.assertEqual(x.children, [u'Ø'])
  1038. def test_relative_import_rename(self):
  1039. l = _Lark_open('test_relative_import_rename.lark', rel_to=__file__)
  1040. x = l.parse('12 lions')
  1041. self.assertEqual(x.children, ['12', 'lions'])
  1042. def test_relative_rule_import(self):
  1043. l = _Lark_open('test_relative_rule_import.lark', rel_to=__file__)
  1044. x = l.parse('xaabby')
  1045. self.assertEqual(x.children, [
  1046. 'x',
  1047. Tree('expr', ['a', Tree('expr', ['a', 'b']), 'b']),
  1048. 'y'])
  1049. def test_relative_rule_import_drop_ignore(self):
  1050. # %ignore rules are dropped on import
  1051. l = _Lark_open('test_relative_rule_import_drop_ignore.lark',
  1052. rel_to=__file__)
  1053. self.assertRaises((ParseError, UnexpectedInput),
  1054. l.parse, 'xa abby')
  1055. def test_relative_rule_import_subrule(self):
  1056. l = _Lark_open('test_relative_rule_import_subrule.lark',
  1057. rel_to=__file__)
  1058. x = l.parse('xaabby')
  1059. self.assertEqual(x.children, [
  1060. 'x',
  1061. Tree('startab', [
  1062. Tree('grammars__ab__expr', [
  1063. 'a', Tree('grammars__ab__expr', ['a', 'b']), 'b',
  1064. ]),
  1065. ]),
  1066. 'y'])
  1067. def test_relative_rule_import_subrule_no_conflict(self):
  1068. l = _Lark_open(
  1069. 'test_relative_rule_import_subrule_no_conflict.lark',
  1070. rel_to=__file__)
  1071. x = l.parse('xaby')
  1072. self.assertEqual(x.children, [Tree('expr', [
  1073. 'x',
  1074. Tree('startab', [
  1075. Tree('grammars__ab__expr', ['a', 'b']),
  1076. ]),
  1077. 'y'])])
  1078. self.assertRaises((ParseError, UnexpectedInput),
  1079. l.parse, 'xaxabyby')
  1080. def test_relative_rule_import_rename(self):
  1081. l = _Lark_open('test_relative_rule_import_rename.lark',
  1082. rel_to=__file__)
  1083. x = l.parse('xaabby')
  1084. self.assertEqual(x.children, [
  1085. 'x',
  1086. Tree('ab', ['a', Tree('ab', ['a', 'b']), 'b']),
  1087. 'y'])
  1088. def test_multi_import(self):
  1089. grammar = """
  1090. start: NUMBER WORD
  1091. %import common (NUMBER, WORD, WS)
  1092. %ignore WS
  1093. """
  1094. l = _Lark(grammar)
  1095. x = l.parse('12 toucans')
  1096. self.assertEqual(x.children, ['12', 'toucans'])
  1097. def test_relative_multi_import(self):
  1098. l = _Lark_open("test_relative_multi_import.lark", rel_to=__file__)
  1099. x = l.parse('12 capybaras')
  1100. self.assertEqual(x.children, ['12', 'capybaras'])
  1101. def test_relative_import_preserves_leading_underscore(self):
  1102. l = _Lark_open("test_relative_import_preserves_leading_underscore.lark", rel_to=__file__)
  1103. x = l.parse('Ax')
  1104. self.assertEqual(next(x.find_data('c')).children, ['A'])
  1105. def test_relative_import_of_nested_grammar(self):
  1106. l = _Lark_open("grammars/test_relative_import_of_nested_grammar.lark", rel_to=__file__)
  1107. x = l.parse('N')
  1108. self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])
  1109. def test_relative_import_rules_dependencies_imported_only_once(self):
  1110. l = _Lark_open("test_relative_import_rules_dependencies_imported_only_once.lark", rel_to=__file__)
  1111. x = l.parse('AAA')
  1112. self.assertEqual(next(x.find_data('a')).children, ['A'])
  1113. self.assertEqual(next(x.find_data('b')).children, ['A'])
  1114. self.assertEqual(next(x.find_data('d')).children, ['A'])
  1115. def test_import_errors(self):
  1116. grammar = """
  1117. start: NUMBER WORD
  1118. %import .grammars.bad_test.NUMBER
  1119. """
  1120. self.assertRaises(IOError, _Lark, grammar)
  1121. grammar = """
  1122. start: NUMBER WORD
  1123. %import bad_test.NUMBER
  1124. """
  1125. self.assertRaises(IOError, _Lark, grammar)
  1126. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  1127. def test_earley_prioritization(self):
  1128. "Tests effect of priority on result"
  1129. grammar = """
  1130. start: a | b
  1131. a.1: "a"
  1132. b.2: "a"
  1133. """
  1134. # l = Lark(grammar, parser='earley', lexer='standard')
  1135. l = _Lark(grammar)
  1136. res = l.parse("a")
  1137. self.assertEqual(res.children[0].data, 'b')
  1138. grammar = """
  1139. start: a | b
  1140. a.2: "a"
  1141. b.1: "a"
  1142. """
  1143. l = _Lark(grammar)
  1144. # l = Lark(grammar, parser='earley', lexer='standard')
  1145. res = l.parse("a")
  1146. self.assertEqual(res.children[0].data, 'a')
  1147. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  1148. def test_earley_prioritization_sum(self):
  1149. "Tests effect of priority on result"
  1150. grammar = """
  1151. start: ab_ b_ a_ | indirection
  1152. indirection: a_ bb_ a_
  1153. a_: "a"
  1154. b_: "b"
  1155. ab_: "ab"
  1156. bb_.1: "bb"
  1157. """
  1158. l = Lark(grammar, priority="invert")
  1159. res = l.parse('abba')
  1160. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  1161. grammar = """
  1162. start: ab_ b_ a_ | indirection
  1163. indirection: a_ bb_ a_
  1164. a_: "a"
  1165. b_: "b"
  1166. ab_.1: "ab"
  1167. bb_: "bb"
  1168. """
  1169. l = Lark(grammar, priority="invert")
  1170. res = l.parse('abba')
  1171. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1172. grammar = """
  1173. start: ab_ b_ a_ | indirection
  1174. indirection: a_ bb_ a_
  1175. a_.2: "a"
  1176. b_.1: "b"
  1177. ab_.3: "ab"
  1178. bb_.3: "bb"
  1179. """
  1180. l = Lark(grammar, priority="invert")
  1181. res = l.parse('abba')
  1182. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  1183. grammar = """
  1184. start: ab_ b_ a_ | indirection
  1185. indirection: a_ bb_ a_
  1186. a_.1: "a"
  1187. b_.1: "b"
  1188. ab_.4: "ab"
  1189. bb_.3: "bb"
  1190. """
  1191. l = Lark(grammar, priority="invert")
  1192. res = l.parse('abba')
  1193. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1194. def test_utf8(self):
  1195. g = u"""start: a
  1196. a: "±a"
  1197. """
  1198. l = _Lark(g)
  1199. self.assertEqual(l.parse(u'±a'), Tree('start', [Tree('a', [])]))
  1200. g = u"""start: A
  1201. A: "±a"
  1202. """
  1203. l = _Lark(g)
  1204. self.assertEqual(l.parse(u'±a'), Tree('start', [u'\xb1a']))
  1205. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  1206. def test_ignore(self):
  1207. grammar = r"""
  1208. COMMENT: /(!|(\/\/))[^\n]*/
  1209. %ignore COMMENT
  1210. %import common.WS -> _WS
  1211. %import common.INT
  1212. start: "INT"i _WS+ INT _WS*
  1213. """
  1214. parser = _Lark(grammar)
  1215. tree = parser.parse("int 1 ! This is a comment\n")
  1216. self.assertEqual(tree.children, ['1'])
  1217. tree = parser.parse("int 1 ! This is a comment") # A trailing ignore token can be tricky!
  1218. self.assertEqual(tree.children, ['1'])
  1219. parser = _Lark(r"""
  1220. start : "a"*
  1221. %ignore "b"
  1222. """)
  1223. tree = parser.parse("bb")
  1224. self.assertEqual(tree.children, [])
  1225. def test_regex_escaping(self):
  1226. g = _Lark("start: /[ab]/")
  1227. g.parse('a')
  1228. g.parse('b')
  1229. self.assertRaises( UnexpectedInput, g.parse, 'c')
  1230. _Lark(r'start: /\w/').parse('a')
  1231. g = _Lark(r'start: /\\w/')
  1232. self.assertRaises( UnexpectedInput, g.parse, 'a')
  1233. g.parse(r'\w')
  1234. _Lark(r'start: /\[/').parse('[')
  1235. _Lark(r'start: /\//').parse('/')
  1236. _Lark(r'start: /\\/').parse('\\')
  1237. _Lark(r'start: /\[ab]/').parse('[ab]')
  1238. _Lark(r'start: /\\[ab]/').parse('\\a')
  1239. _Lark(r'start: /\t/').parse('\t')
  1240. _Lark(r'start: /\\t/').parse('\\t')
  1241. _Lark(r'start: /\\\t/').parse('\\\t')
  1242. _Lark(r'start: "\t"').parse('\t')
  1243. _Lark(r'start: "\\t"').parse('\\t')
  1244. _Lark(r'start: "\\\t"').parse('\\\t')
  1245. def test_ranged_repeat_rules(self):
  1246. g = u"""!start: "A"~3
  1247. """
  1248. l = _Lark(g)
  1249. self.assertEqual(l.parse(u'AAA'), Tree('start', ["A", "A", "A"]))
  1250. self.assertRaises(ParseError, l.parse, u'AA')
  1251. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1252. g = u"""!start: "A"~0..2
  1253. """
  1254. if PARSER != 'cyk': # XXX CYK currently doesn't support empty grammars
  1255. l = _Lark(g)
  1256. self.assertEqual(l.parse(u''), Tree('start', []))
  1257. self.assertEqual(l.parse(u'A'), Tree('start', ['A']))
  1258. self.assertEqual(l.parse(u'AA'), Tree('start', ['A', 'A']))
  1259. self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'AAA')
  1260. g = u"""!start: "A"~3..2
  1261. """
  1262. self.assertRaises(GrammarError, _Lark, g)
  1263. g = u"""!start: "A"~2..3 "B"~2
  1264. """
  1265. l = _Lark(g)
  1266. self.assertEqual(l.parse(u'AABB'), Tree('start', ['A', 'A', 'B', 'B']))
  1267. self.assertEqual(l.parse(u'AAABB'), Tree('start', ['A', 'A', 'A', 'B', 'B']))
  1268. self.assertRaises(ParseError, l.parse, u'AAAB')
  1269. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1270. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1271. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1272. def test_ranged_repeat_terms(self):
  1273. g = u"""!start: AAA
  1274. AAA: "A"~3
  1275. """
  1276. l = _Lark(g)
  1277. self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"]))
  1278. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA')
  1279. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1280. g = u"""!start: AABB CC
  1281. AABB: "A"~0..2 "B"~2
  1282. CC: "C"~1..2
  1283. """
  1284. l = _Lark(g)
  1285. self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC']))
  1286. self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C']))
  1287. self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC']))
  1288. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB')
  1289. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1290. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1291. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1292. @unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX
  1293. def test_priority_vs_embedded(self):
  1294. g = """
  1295. A.2: "a"
  1296. WORD: ("a".."z")+
  1297. start: (A | WORD)+
  1298. """
  1299. l = _Lark(g)
  1300. t = l.parse('abc')
  1301. self.assertEqual(t.children, ['a', 'bc'])
  1302. self.assertEqual(t.children[0].type, 'A')
  1303. def test_line_counting(self):
  1304. p = _Lark("start: /[^x]+/")
  1305. text = 'hello\nworld'
  1306. t = p.parse(text)
  1307. tok = t.children[0]
  1308. self.assertEqual(tok, text)
  1309. self.assertEqual(tok.line, 1)
  1310. self.assertEqual(tok.column, 1)
  1311. if _LEXER != 'dynamic':
  1312. self.assertEqual(tok.end_line, 2)
  1313. self.assertEqual(tok.end_column, 6)
  1314. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1315. def test_empty_end(self):
  1316. p = _Lark("""
  1317. start: b c d
  1318. b: "B"
  1319. c: | "C"
  1320. d: | "D"
  1321. """)
  1322. res = p.parse('B')
  1323. self.assertEqual(len(res.children), 3)
  1324. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1325. def test_maybe_placeholders(self):
  1326. # Anonymous tokens shouldn't count
  1327. p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1328. self.assertEqual(p.parse("").children, [])
  1329. # All invisible constructs shouldn't count
  1330. p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c]
  1331. A: "a"
  1332. _c: "c" """, maybe_placeholders=True)
  1333. self.assertEqual(p.parse("").children, [None])
  1334. self.assertEqual(p.parse("c").children, [None])
  1335. self.assertEqual(p.parse("aefc").children, ['a'])
  1336. # ? shouldn't apply
  1337. p = _Lark("""!start: ["a"] "b"? ["c"] """, maybe_placeholders=True)
  1338. self.assertEqual(p.parse("").children, [None, None])
  1339. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1340. p = _Lark("""!start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1341. self.assertEqual(p.parse("").children, [None, None, None])
  1342. self.assertEqual(p.parse("a").children, ['a', None, None])
  1343. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1344. self.assertEqual(p.parse("c").children, [None, None, 'c'])
  1345. self.assertEqual(p.parse("ab").children, ['a', 'b', None])
  1346. self.assertEqual(p.parse("ac").children, ['a', None, 'c'])
  1347. self.assertEqual(p.parse("bc").children, [None, 'b', 'c'])
  1348. self.assertEqual(p.parse("abc").children, ['a', 'b', 'c'])
  1349. p = _Lark("""!start: (["a"] "b" ["c"])+ """, maybe_placeholders=True)
  1350. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1351. self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None])
  1352. self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c'])
  1353. self.assertEqual(p.parse("babbcabcb").children,
  1354. [None, 'b', None,
  1355. 'a', 'b', None,
  1356. None, 'b', 'c',
  1357. 'a', 'b', 'c',
  1358. None, 'b', None])
  1359. p = _Lark("""!start: ["a"] ["c"] "b"+ ["a"] ["d"] """, maybe_placeholders=True)
  1360. self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None])
  1361. self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd'])
  1362. self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None])
  1363. self.assertEqual(p.parse("cbbbb").children, [None, 'c', 'b', 'b', 'b', 'b', None, None])
  1364. def test_escaped_string(self):
  1365. "Tests common.ESCAPED_STRING"
  1366. grammar = r"""
  1367. start: ESCAPED_STRING+
  1368. %import common (WS_INLINE, ESCAPED_STRING)
  1369. %ignore WS_INLINE
  1370. """
  1371. parser = _Lark(grammar)
  1372. parser.parse(r'"\\" "b" "c"')
  1373. parser.parse(r'"That" "And a \"b"')
  1374. def test_meddling_unused(self):
  1375. "Unless 'unused' is removed, LALR analysis will fail on reduce-reduce collision"
  1376. grammar = """
  1377. start: EKS* x
  1378. x: EKS
  1379. unused: x*
  1380. EKS: "x"
  1381. """
  1382. parser = _Lark(grammar)
  1383. @unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
  1384. def test_serialize(self):
  1385. grammar = """
  1386. start: _ANY b "C"
  1387. _ANY: /./
  1388. b: "B"
  1389. """
  1390. parser = _Lark(grammar)
  1391. s = BytesIO()
  1392. parser.save(s)
  1393. s.seek(0)
  1394. parser2 = Lark.load(s)
  1395. self.assertEqual(parser2.parse('ABC'), Tree('start', [Tree('b', [])]) )
  1396. def test_multi_start(self):
  1397. parser = _Lark('''
  1398. a: "x" "a"?
  1399. b: "x" "b"?
  1400. ''', start=['a', 'b'])
  1401. self.assertEqual(parser.parse('xa', 'a'), Tree('a', []))
  1402. self.assertEqual(parser.parse('xb', 'b'), Tree('b', []))
  1403. def test_lexer_detect_newline_tokens(self):
  1404. # Detect newlines in regular tokens
  1405. g = _Lark(r"""start: "go" tail*
  1406. !tail : SA "@" | SB "@" | SC "@" | SD "@"
  1407. SA : "a" /\n/
  1408. SB : /b./s
  1409. SC : "c" /[^a-z]/
  1410. SD : "d" /\s/
  1411. """)
  1412. a,b,c,d = [x.children[1] for x in g.parse('goa\n@b\n@c\n@d\n@').children]
  1413. self.assertEqual(a.line, 2)
  1414. self.assertEqual(b.line, 3)
  1415. self.assertEqual(c.line, 4)
  1416. self.assertEqual(d.line, 5)
  1417. # Detect newlines in ignored tokens
  1418. for re in ['/\\n/', '/[^a-z]/', '/\\s/']:
  1419. g = _Lark('''!start: "a" "a"
  1420. %ignore {}'''.format(re))
  1421. a, b = g.parse('a\na').children
  1422. self.assertEqual(a.line, 1)
  1423. self.assertEqual(b.line, 2)
  1424. _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()
  1425. _TestParser.__name__ = _NAME
  1426. _TestParser.__qualname__ = "tests.test_parser." + _NAME
  1427. globals()[_NAME] = _TestParser
  1428. # Note: You still have to import them in __main__ for the tests to run
  1429. _TO_TEST = [
  1430. ('standard', 'earley'),
  1431. ('standard', 'cyk'),
  1432. ('dynamic', 'earley'),
  1433. ('dynamic_complete', 'earley'),
  1434. ('standard', 'lalr'),
  1435. ('contextual', 'lalr'),
  1436. ('custom', 'lalr'),
  1437. # (None, 'earley'),
  1438. ]
  1439. for _LEXER, _PARSER in _TO_TEST:
  1440. _make_parser_test(_LEXER, _PARSER)
  1441. for _LEXER in ('dynamic', 'dynamic_complete'):
  1442. _make_full_earley_test(_LEXER)
  1443. if __name__ == '__main__':
  1444. unittest.main()