This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1637 lines
56 KiB

  1. # -*- coding: utf-8 -*-
  2. from __future__ import absolute_import
  3. import unittest
  4. import logging
  5. import os
  6. import sys
  7. try:
  8. from cStringIO import StringIO as cStringIO
  9. except ImportError:
  10. # Available only in Python 2.x, 3.x only has io.StringIO from below
  11. cStringIO = None
  12. from io import (
  13. StringIO as uStringIO,
  14. open,
  15. )
  16. logging.basicConfig(level=logging.INFO)
  17. from lark.lark import Lark
  18. from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
  19. from lark.tree import Tree
  20. from lark.visitors import Transformer, Transformer_InPlace, v_args
  21. from lark.grammar import Rule
  22. from lark.lexer import TerminalDef, Lexer, TraditionalLexer
  23. __path__ = os.path.dirname(__file__)
  24. def _read(n, *args):
  25. with open(os.path.join(__path__, n), *args) as f:
  26. return f.read()
  27. class TestParsers(unittest.TestCase):
  28. def test_same_ast(self):
  29. "Tests that Earley and LALR parsers produce equal trees"
  30. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  31. name_list: NAME | name_list "," NAME
  32. NAME: /\w+/ """, parser='lalr')
  33. l = g.parse('(a,b,c,*x)')
  34. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  35. name_list: NAME | name_list "," NAME
  36. NAME: /\w/+ """)
  37. l2 = g.parse('(a,b,c,*x)')
  38. assert l == l2, '%s != %s' % (l.pretty(), l2.pretty())
  39. def test_infinite_recurse(self):
  40. g = """start: a
  41. a: a | "a"
  42. """
  43. self.assertRaises(GrammarError, Lark, g, parser='lalr')
  44. # TODO: should it? shouldn't it?
  45. # l = Lark(g, parser='earley', lexer='dynamic')
  46. # self.assertRaises(ParseError, l.parse, 'a')
  47. def test_propagate_positions(self):
  48. g = Lark("""start: a
  49. a: "a"
  50. """, propagate_positions=True)
  51. r = g.parse('a')
  52. self.assertEqual( r.children[0].meta.line, 1 )
  53. def test_expand1(self):
  54. g = Lark("""start: a
  55. ?a: b
  56. b: "x"
  57. """)
  58. r = g.parse('x')
  59. self.assertEqual( r.children[0].data, "b" )
  60. g = Lark("""start: a
  61. ?a: b -> c
  62. b: "x"
  63. """)
  64. r = g.parse('x')
  65. self.assertEqual( r.children[0].data, "c" )
  66. g = Lark("""start: a
  67. ?a: B -> c
  68. B: "x"
  69. """)
  70. self.assertEqual( r.children[0].data, "c" )
  71. g = Lark("""start: a
  72. ?a: b b -> c
  73. b: "x"
  74. """)
  75. r = g.parse('xx')
  76. self.assertEqual( r.children[0].data, "c" )
  77. def test_visit_tokens(self):
  78. class T(Transformer):
  79. def a(self, children):
  80. return children[0] + "!"
  81. def A(self, tok):
  82. return tok.upper()
  83. # Test regular
  84. g = Lark("""start: a
  85. a : A
  86. A: "x"
  87. """, parser='lalr')
  88. r = T().transform(g.parse("x"))
  89. self.assertEqual( r.children, ["x!"] )
  90. r = T(True).transform(g.parse("x"))
  91. self.assertEqual( r.children, ["X!"] )
  92. def test_embedded_transformer(self):
  93. class T(Transformer):
  94. def a(self, children):
  95. return "<a>"
  96. def b(self, children):
  97. return "<b>"
  98. def c(self, children):
  99. return "<c>"
  100. # Test regular
  101. g = Lark("""start: a
  102. a : "x"
  103. """, parser='lalr')
  104. r = T().transform(g.parse("x"))
  105. self.assertEqual( r.children, ["<a>"] )
  106. g = Lark("""start: a
  107. a : "x"
  108. """, parser='lalr', transformer=T())
  109. r = g.parse("x")
  110. self.assertEqual( r.children, ["<a>"] )
  111. # Test Expand1
  112. g = Lark("""start: a
  113. ?a : b
  114. b : "x"
  115. """, parser='lalr')
  116. r = T().transform(g.parse("x"))
  117. self.assertEqual( r.children, ["<b>"] )
  118. g = Lark("""start: a
  119. ?a : b
  120. b : "x"
  121. """, parser='lalr', transformer=T())
  122. r = g.parse("x")
  123. self.assertEqual( r.children, ["<b>"] )
  124. # Test Expand1 -> Alias
  125. g = Lark("""start: a
  126. ?a : b b -> c
  127. b : "x"
  128. """, parser='lalr')
  129. r = T().transform(g.parse("xx"))
  130. self.assertEqual( r.children, ["<c>"] )
  131. g = Lark("""start: a
  132. ?a : b b -> c
  133. b : "x"
  134. """, parser='lalr', transformer=T())
  135. r = g.parse("xx")
  136. self.assertEqual( r.children, ["<c>"] )
  137. def test_embedded_transformer_inplace(self):
  138. @v_args(tree=True)
  139. class T1(Transformer_InPlace):
  140. def a(self, tree):
  141. assert isinstance(tree, Tree), tree
  142. tree.children.append("tested")
  143. return tree
  144. def b(self, tree):
  145. return Tree(tree.data, tree.children + ['tested2'])
  146. @v_args(tree=True)
  147. class T2(Transformer):
  148. def a(self, tree):
  149. assert isinstance(tree, Tree)
  150. tree.children.append("tested")
  151. return tree
  152. def b(self, tree):
  153. return Tree(tree.data, tree.children + ['tested2'])
  154. class T3(Transformer):
  155. @v_args(tree=True)
  156. def a(self, tree):
  157. assert isinstance(tree, Tree)
  158. tree.children.append("tested")
  159. return tree
  160. @v_args(tree=True)
  161. def b(self, tree):
  162. return Tree(tree.data, tree.children + ['tested2'])
  163. for t in [T1(), T2(), T3()]:
  164. for internal in [False, True]:
  165. g = Lark("""start: a b
  166. a : "x"
  167. b : "y"
  168. """, parser='lalr', transformer=t if internal else None)
  169. r = g.parse("xy")
  170. if not internal:
  171. r = t.transform(r)
  172. a, b = r.children
  173. self.assertEqual(a.children, ["tested"])
  174. self.assertEqual(b.children, ["tested2"])
  175. def test_alias(self):
  176. Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """)
  177. def _make_full_earley_test(LEXER):
  178. def _Lark(grammar, **kwargs):
  179. return Lark(grammar, lexer=LEXER, parser='earley', propagate_positions=True, **kwargs)
  180. class _TestFullEarley(unittest.TestCase):
  181. def test_anon(self):
  182. # Fails an Earley implementation without special handling for empty rules,
  183. # or re-processing of already completed rules.
  184. g = Lark(r"""start: B
  185. B: ("ab"|/[^b]/)+
  186. """, lexer=LEXER)
  187. self.assertEqual( g.parse('abc').children[0], 'abc')
  188. def test_earley(self):
  189. g = Lark("""start: A "b" c
  190. A: "a"+
  191. c: "abc"
  192. """, parser="earley", lexer=LEXER)
  193. x = g.parse('aaaababc')
  194. def test_earley2(self):
  195. grammar = """
  196. start: statement+
  197. statement: "r"
  198. | "c" /[a-z]/+
  199. %ignore " "
  200. """
  201. program = """c b r"""
  202. l = Lark(grammar, parser='earley', lexer=LEXER)
  203. l.parse(program)
  204. @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser")
  205. def test_earley3(self):
  206. """Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)
  207. By default, `+` should immitate regexp greedy-matching
  208. """
  209. grammar = """
  210. start: A A
  211. A: "a"+
  212. """
  213. l = Lark(grammar, parser='earley', lexer=LEXER)
  214. res = l.parse("aaa")
  215. self.assertEqual(set(res.children), {'aa', 'a'})
  216. # XXX TODO fix Earley to maintain correct order
  217. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  218. # self.assertEqual(res.children, ['aa', 'a'])
  219. def test_earley4(self):
  220. grammar = """
  221. start: A A?
  222. A: "a"+
  223. """
  224. l = Lark(grammar, parser='earley', lexer=LEXER)
  225. res = l.parse("aaa")
  226. assert set(res.children) == {'aa', 'a'} or res.children == ['aaa']
  227. # XXX TODO fix Earley to maintain correct order
  228. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  229. # self.assertEqual(res.children, ['aaa'])
  230. def test_earley_repeating_empty(self):
  231. # This was a sneaky bug!
  232. grammar = """
  233. !start: "a" empty empty "b"
  234. empty: empty2
  235. empty2:
  236. """
  237. parser = Lark(grammar, parser='earley', lexer=LEXER)
  238. res = parser.parse('ab')
  239. empty_tree = Tree('empty', [Tree('empty2', [])])
  240. self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b'])
  241. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  242. def test_earley_explicit_ambiguity(self):
  243. # This was a sneaky bug!
  244. grammar = """
  245. start: a b | ab
  246. a: "a"
  247. b: "b"
  248. ab: "ab"
  249. """
  250. parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit')
  251. ambig_tree = parser.parse('ab')
  252. self.assertEqual( ambig_tree.data, '_ambig')
  253. self.assertEqual( len(ambig_tree.children), 2)
  254. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  255. def test_ambiguity1(self):
  256. grammar = """
  257. start: cd+ "e"
  258. !cd: "c"
  259. | "d"
  260. | "cd"
  261. """
  262. l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
  263. ambig_tree = l.parse('cde')
  264. assert ambig_tree.data == '_ambig', ambig_tree
  265. assert len(ambig_tree.children) == 2
  266. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  267. def test_ambiguity2(self):
  268. grammar = """
  269. ANY: /[a-zA-Z0-9 ]+/
  270. a.2: "A" b+
  271. b.2: "B"
  272. c: ANY
  273. start: (a|c)*
  274. """
  275. l = Lark(grammar, parser='earley', lexer=LEXER)
  276. res = l.parse('ABX')
  277. expected = Tree('start', [
  278. Tree('a', [
  279. Tree('b', [])
  280. ]),
  281. Tree('c', [
  282. 'X'
  283. ])
  284. ])
  285. self.assertEqual(res, expected)
  286. def test_fruitflies_ambig(self):
  287. grammar = """
  288. start: noun verb noun -> simple
  289. | noun verb "like" noun -> comparative
  290. noun: adj? NOUN
  291. verb: VERB
  292. adj: ADJ
  293. NOUN: "flies" | "bananas" | "fruit"
  294. VERB: "like" | "flies"
  295. ADJ: "fruit"
  296. %import common.WS
  297. %ignore WS
  298. """
  299. parser = Lark(grammar, ambiguity='explicit', lexer=LEXER)
  300. tree = parser.parse('fruit flies like bananas')
  301. expected = Tree('_ambig', [
  302. Tree('comparative', [
  303. Tree('noun', ['fruit']),
  304. Tree('verb', ['flies']),
  305. Tree('noun', ['bananas'])
  306. ]),
  307. Tree('simple', [
  308. Tree('noun', [Tree('adj', ['fruit']), 'flies']),
  309. Tree('verb', ['like']),
  310. Tree('noun', ['bananas'])
  311. ])
  312. ])
  313. # self.assertEqual(tree, expected)
  314. self.assertEqual(tree.data, expected.data)
  315. self.assertEqual(set(tree.children), set(expected.children))
  316. @unittest.skipIf(LEXER!='dynamic_complete', "Only relevant for the dynamic_complete parser")
  317. def test_explicit_ambiguity2(self):
  318. grammar = r"""
  319. start: NAME+
  320. NAME: /\w+/
  321. %ignore " "
  322. """
  323. text = """cat"""
  324. parser = _Lark(grammar, start='start', ambiguity='explicit')
  325. tree = parser.parse(text)
  326. self.assertEqual(tree.data, '_ambig')
  327. combinations = {tuple(str(s) for s in t.children) for t in tree.children}
  328. self.assertEqual(combinations, {
  329. ('cat',),
  330. ('ca', 't'),
  331. ('c', 'at'),
  332. ('c', 'a' ,'t')
  333. })
  334. def test_term_ambig_resolve(self):
  335. grammar = r"""
  336. !start: NAME+
  337. NAME: /\w+/
  338. %ignore " "
  339. """
  340. text = """foo bar"""
  341. parser = Lark(grammar)
  342. tree = parser.parse(text)
  343. self.assertEqual(tree.children, ['foo', 'bar'])
  344. # @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO
  345. # def test_not_all_derivations(self):
  346. # grammar = """
  347. # start: cd+ "e"
  348. # !cd: "c"
  349. # | "d"
  350. # | "cd"
  351. # """
  352. # l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False)
  353. # x = l.parse('cde')
  354. # assert x.data != '_ambig', x
  355. # assert len(x.children) == 1
  356. _NAME = "TestFullEarley" + LEXER.capitalize()
  357. _TestFullEarley.__name__ = _NAME
  358. globals()[_NAME] = _TestFullEarley
  359. class CustomLexer(Lexer):
  360. """
  361. Purpose of this custom lexer is to test the integration,
  362. so it uses the traditionalparser as implementation without custom lexing behaviour.
  363. """
  364. def __init__(self, lexer_conf):
  365. self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks)
  366. def lex(self, *args, **kwargs):
  367. return self.lexer.lex(*args, **kwargs)
  368. def _make_parser_test(LEXER, PARSER):
  369. lexer_class_or_name = CustomLexer if LEXER == 'custom' else LEXER
  370. def _Lark(grammar, **kwargs):
  371. return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  372. def _Lark_open(gfilename, **kwargs):
  373. return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  374. class _TestParser(unittest.TestCase):
  375. def test_basic1(self):
  376. g = _Lark("""start: a+ b a* "b" a*
  377. b: "b"
  378. a: "a"
  379. """)
  380. r = g.parse('aaabaab')
  381. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' )
  382. r = g.parse('aaabaaba')
  383. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaaa' )
  384. self.assertRaises(ParseError, g.parse, 'aaabaa')
  385. def test_basic2(self):
  386. # Multiple parsers and colliding tokens
  387. g = _Lark("""start: B A
  388. B: "12"
  389. A: "1" """)
  390. g2 = _Lark("""start: B A
  391. B: "12"
  392. A: "2" """)
  393. x = g.parse('121')
  394. assert x.data == 'start' and x.children == ['12', '1'], x
  395. x = g2.parse('122')
  396. assert x.data == 'start' and x.children == ['12', '2'], x
  397. @unittest.skipIf(cStringIO is None, "cStringIO not available")
  398. def test_stringio_bytes(self):
  399. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  400. _Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  401. def test_stringio_unicode(self):
  402. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  403. _Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  404. def test_unicode(self):
  405. g = _Lark(u"""start: UNIA UNIB UNIA
  406. UNIA: /\xa3/
  407. UNIB: /\u0101/
  408. """)
  409. g.parse(u'\xa3\u0101\u00a3')
  410. def test_unicode2(self):
  411. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  412. UNIA: /\xa3/
  413. UNIB: "a\u0101b\ "
  414. UNIC: /a?\u0101c\n/
  415. """)
  416. g.parse(u'\xa3a\u0101b\\ \u00a3\u0101c\n')
  417. def test_unicode3(self):
  418. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  419. UNIA: /\xa3/
  420. UNIB: "\u0101"
  421. UNIC: /\u0203/ /\n/
  422. """)
  423. g.parse(u'\xa3\u0101\u00a3\u0203\n')
  424. def test_hex_escape(self):
  425. g = _Lark(r"""start: A B C
  426. A: "\x01"
  427. B: /\x02/
  428. C: "\xABCD"
  429. """)
  430. g.parse('\x01\x02\xABCD')
  431. def test_unicode_literal_range_escape(self):
  432. g = _Lark(r"""start: A+
  433. A: "\u0061".."\u0063"
  434. """)
  435. g.parse('abc')
  436. def test_hex_literal_range_escape(self):
  437. g = _Lark(r"""start: A+
  438. A: "\x01".."\x03"
  439. """)
  440. g.parse('\x01\x02\x03')
  441. @unittest.skipIf(PARSER == 'cyk', "Takes forever")
  442. def test_stack_for_ebnf(self):
  443. """Verify that stack depth isn't an issue for EBNF grammars"""
  444. g = _Lark(r"""start: a+
  445. a : "a" """)
  446. g.parse("a" * (sys.getrecursionlimit()*2 ))
  447. def test_expand1_lists_with_one_item(self):
  448. g = _Lark(r"""start: list
  449. ?list: item+
  450. item : A
  451. A: "a"
  452. """)
  453. r = g.parse("a")
  454. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  455. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  456. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  457. self.assertEqual(len(r.children), 1)
  458. def test_expand1_lists_with_one_item_2(self):
  459. g = _Lark(r"""start: list
  460. ?list: item+ "!"
  461. item : A
  462. A: "a"
  463. """)
  464. r = g.parse("a!")
  465. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  466. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  467. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  468. self.assertEqual(len(r.children), 1)
  469. def test_dont_expand1_lists_with_multiple_items(self):
  470. g = _Lark(r"""start: list
  471. ?list: item+
  472. item : A
  473. A: "a"
  474. """)
  475. r = g.parse("aa")
  476. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  477. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  478. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  479. self.assertEqual(len(r.children), 1)
  480. # Sanity check: verify that 'list' contains the two 'item's we've given it
  481. [list] = r.children
  482. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  483. def test_dont_expand1_lists_with_multiple_items_2(self):
  484. g = _Lark(r"""start: list
  485. ?list: item+ "!"
  486. item : A
  487. A: "a"
  488. """)
  489. r = g.parse("aa!")
  490. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  491. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  492. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  493. self.assertEqual(len(r.children), 1)
  494. # Sanity check: verify that 'list' contains the two 'item's we've given it
  495. [list] = r.children
  496. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  497. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  498. def test_empty_expand1_list(self):
  499. g = _Lark(r"""start: list
  500. ?list: item*
  501. item : A
  502. A: "a"
  503. """)
  504. r = g.parse("")
  505. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  506. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  507. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  508. self.assertEqual(len(r.children), 1)
  509. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  510. [list] = r.children
  511. self.assertSequenceEqual([item.data for item in list.children], ())
  512. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  513. def test_empty_expand1_list_2(self):
  514. g = _Lark(r"""start: list
  515. ?list: item* "!"?
  516. item : A
  517. A: "a"
  518. """)
  519. r = g.parse("")
  520. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  521. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  522. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  523. self.assertEqual(len(r.children), 1)
  524. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  525. [list] = r.children
  526. self.assertSequenceEqual([item.data for item in list.children], ())
  527. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  528. def test_empty_flatten_list(self):
  529. g = _Lark(r"""start: list
  530. list: | item "," list
  531. item : A
  532. A: "a"
  533. """)
  534. r = g.parse("")
  535. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  536. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  537. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  538. [list] = r.children
  539. self.assertSequenceEqual([item.data for item in list.children], ())
  540. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  541. def test_single_item_flatten_list(self):
  542. g = _Lark(r"""start: list
  543. list: | item "," list
  544. item : A
  545. A: "a"
  546. """)
  547. r = g.parse("a,")
  548. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  549. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  550. # Sanity check: verify that 'list' contains exactly the one 'item' we've given it
  551. [list] = r.children
  552. self.assertSequenceEqual([item.data for item in list.children], ('item',))
  553. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  554. def test_multiple_item_flatten_list(self):
  555. g = _Lark(r"""start: list
  556. #list: | item "," list
  557. item : A
  558. A: "a"
  559. """)
  560. r = g.parse("a,a,")
  561. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  562. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  563. # Sanity check: verify that 'list' contains exactly the two 'item's we've given it
  564. [list] = r.children
  565. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  566. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  567. def test_recurse_flatten(self):
  568. """Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""
  569. g = _Lark(r"""start: a | start a
  570. a : A
  571. A : "a" """)
  572. # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
  573. # STree data structures, which uses recursion).
  574. g.parse("a" * (sys.getrecursionlimit() // 4))
  575. def test_token_collision(self):
  576. g = _Lark(r"""start: "Hello" NAME
  577. NAME: /\w/+
  578. %ignore " "
  579. """)
  580. x = g.parse('Hello World')
  581. self.assertSequenceEqual(x.children, ['World'])
  582. x = g.parse('Hello HelloWorld')
  583. self.assertSequenceEqual(x.children, ['HelloWorld'])
  584. def test_token_collision_WS(self):
  585. g = _Lark(r"""start: "Hello" NAME
  586. NAME: /\w/+
  587. %import common.WS
  588. %ignore WS
  589. """)
  590. x = g.parse('Hello World')
  591. self.assertSequenceEqual(x.children, ['World'])
  592. x = g.parse('Hello HelloWorld')
  593. self.assertSequenceEqual(x.children, ['HelloWorld'])
  594. def test_token_collision2(self):
  595. g = _Lark("""
  596. !start: "starts"
  597. %import common.LCASE_LETTER
  598. """)
  599. x = g.parse("starts")
  600. self.assertSequenceEqual(x.children, ['starts'])
  601. # def test_string_priority(self):
  602. # g = _Lark("""start: (A | /a?bb/)+
  603. # A: "a" """)
  604. # x = g.parse('abb')
  605. # self.assertEqual(len(x.children), 2)
  606. # # This parse raises an exception because the lexer will always try to consume
  607. # # "a" first and will never match the regular expression
  608. # # This behavior is subject to change!!
  609. # # Thie won't happen with ambiguity handling.
  610. # g = _Lark("""start: (A | /a?ab/)+
  611. # A: "a" """)
  612. # self.assertRaises(LexError, g.parse, 'aab')
  613. def test_undefined_rule(self):
  614. self.assertRaises(GrammarError, _Lark, """start: a""")
  615. def test_undefined_token(self):
  616. self.assertRaises(GrammarError, _Lark, """start: A""")
  617. def test_rule_collision(self):
  618. g = _Lark("""start: "a"+ "b"
  619. | "a"+ """)
  620. x = g.parse('aaaa')
  621. x = g.parse('aaaab')
  622. def test_rule_collision2(self):
  623. g = _Lark("""start: "a"* "b"
  624. | "a"+ """)
  625. x = g.parse('aaaa')
  626. x = g.parse('aaaab')
  627. x = g.parse('b')
  628. def test_token_not_anon(self):
  629. """Tests that "a" is matched as an anonymous token, and not A.
  630. """
  631. g = _Lark("""start: "a"
  632. A: "a" """)
  633. x = g.parse('a')
  634. self.assertEqual(len(x.children), 0, '"a" should be considered anonymous')
  635. g = _Lark("""start: "a" A
  636. A: "a" """)
  637. x = g.parse('aa')
  638. self.assertEqual(len(x.children), 1, 'only "a" should be considered anonymous')
  639. self.assertEqual(x.children[0].type, "A")
  640. g = _Lark("""start: /a/
  641. A: /a/ """)
  642. x = g.parse('a')
  643. self.assertEqual(len(x.children), 1)
  644. self.assertEqual(x.children[0].type, "A", "A isn't associated with /a/")
  645. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  646. def test_maybe(self):
  647. g = _Lark("""start: ["a"] """)
  648. x = g.parse('a')
  649. x = g.parse('')
  650. def test_start(self):
  651. g = _Lark("""a: "a" a? """, start='a')
  652. x = g.parse('a')
  653. x = g.parse('aa')
  654. x = g.parse('aaa')
  655. def test_alias(self):
  656. g = _Lark("""start: "a" -> b """)
  657. x = g.parse('a')
  658. self.assertEqual(x.data, "b")
  659. def test_token_ebnf(self):
  660. g = _Lark("""start: A
  661. A: "a"* ("b"? "c".."e")+
  662. """)
  663. x = g.parse('abcde')
  664. x = g.parse('dd')
  665. def test_backslash(self):
  666. g = _Lark(r"""start: "\\" "a"
  667. """)
  668. x = g.parse(r'\a')
  669. g = _Lark(r"""start: /\\/ /a/
  670. """)
  671. x = g.parse(r'\a')
  672. def test_backslash2(self):
  673. g = _Lark(r"""start: "\"" "-"
  674. """)
  675. x = g.parse('"-')
  676. g = _Lark(r"""start: /\// /-/
  677. """)
  678. x = g.parse('/-')
  679. def test_special_chars(self):
  680. g = _Lark(r"""start: "\n"
  681. """)
  682. x = g.parse('\n')
  683. g = _Lark(r"""start: /\n/
  684. """)
  685. x = g.parse('\n')
  686. # def test_token_recurse(self):
  687. # g = _Lark("""start: A
  688. # A: B
  689. # B: A
  690. # """)
  691. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  692. def test_empty(self):
  693. # Fails an Earley implementation without special handling for empty rules,
  694. # or re-processing of already completed rules.
  695. g = _Lark(r"""start: _empty a "B"
  696. a: _empty "A"
  697. _empty:
  698. """)
  699. x = g.parse('AB')
  700. def test_regex_quote(self):
  701. g = r"""
  702. start: SINGLE_QUOTED_STRING | DOUBLE_QUOTED_STRING
  703. SINGLE_QUOTED_STRING : /'[^']*'/
  704. DOUBLE_QUOTED_STRING : /"[^"]*"/
  705. """
  706. g = _Lark(g)
  707. self.assertEqual( g.parse('"hello"').children, ['"hello"'])
  708. self.assertEqual( g.parse("'hello'").children, ["'hello'"])
  709. def test_lexer_token_limit(self):
  710. "Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"
  711. tokens = {'A%d'%i:'"%d"'%i for i in range(300)}
  712. g = _Lark("""start: %s
  713. %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))
  714. def test_float_without_lexer(self):
  715. expected_error = UnexpectedCharacters if LEXER.startswith('dynamic') else UnexpectedToken
  716. if PARSER == 'cyk':
  717. expected_error = ParseError
  718. g = _Lark("""start: ["+"|"-"] float
  719. float: digit* "." digit+ exp?
  720. | digit+ exp
  721. exp: ("e"|"E") ["+"|"-"] digit+
  722. digit: "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"
  723. """)
  724. g.parse("1.2")
  725. g.parse("-.2e9")
  726. g.parse("+2e-9")
  727. self.assertRaises( expected_error, g.parse, "+2e-9e")
  728. def test_keep_all_tokens(self):
  729. l = _Lark("""start: "a"+ """, keep_all_tokens=True)
  730. tree = l.parse('aaa')
  731. self.assertEqual(tree.children, ['a', 'a', 'a'])
  732. def test_token_flags(self):
  733. l = _Lark("""!start: "a"i+
  734. """
  735. )
  736. tree = l.parse('aA')
  737. self.assertEqual(tree.children, ['a', 'A'])
  738. l = _Lark("""!start: /a/i+
  739. """
  740. )
  741. tree = l.parse('aA')
  742. self.assertEqual(tree.children, ['a', 'A'])
  743. # g = """!start: "a"i "a"
  744. # """
  745. # self.assertRaises(GrammarError, _Lark, g)
  746. # g = """!start: /a/i /a/
  747. # """
  748. # self.assertRaises(GrammarError, _Lark, g)
  749. g = """start: NAME "," "a"
  750. NAME: /[a-z_]/i /[a-z0-9_]/i*
  751. """
  752. l = _Lark(g)
  753. tree = l.parse('ab,a')
  754. self.assertEqual(tree.children, ['ab'])
  755. tree = l.parse('AB,a')
  756. self.assertEqual(tree.children, ['AB'])
  757. def test_token_flags3(self):
  758. l = _Lark("""!start: ABC+
  759. ABC: "abc"i
  760. """
  761. )
  762. tree = l.parse('aBcAbC')
  763. self.assertEqual(tree.children, ['aBc', 'AbC'])
  764. def test_token_flags2(self):
  765. g = """!start: ("a"i | /a/ /b/?)+
  766. """
  767. l = _Lark(g)
  768. tree = l.parse('aA')
  769. self.assertEqual(tree.children, ['a', 'A'])
  770. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  771. def test_twice_empty(self):
  772. g = """!start: [["A"]]
  773. """
  774. l = _Lark(g)
  775. tree = l.parse('A')
  776. self.assertEqual(tree.children, ['A'])
  777. tree = l.parse('')
  778. self.assertEqual(tree.children, [])
  779. def test_undefined_ignore(self):
  780. g = """!start: "A"
  781. %ignore B
  782. """
  783. self.assertRaises( GrammarError, _Lark, g)
  784. def test_alias_in_terminal(self):
  785. g = """start: TERM
  786. TERM: "a" -> alias
  787. """
  788. self.assertRaises( GrammarError, _Lark, g)
  789. def test_line_and_column(self):
  790. g = r"""!start: "A" bc "D"
  791. !bc: "B\nC"
  792. """
  793. l = _Lark(g)
  794. a, bc, d = l.parse("AB\nCD").children
  795. self.assertEqual(a.line, 1)
  796. self.assertEqual(a.column, 1)
  797. bc ,= bc.children
  798. self.assertEqual(bc.line, 1)
  799. self.assertEqual(bc.column, 2)
  800. self.assertEqual(d.line, 2)
  801. self.assertEqual(d.column, 2)
  802. if LEXER != 'dynamic':
  803. self.assertEqual(a.end_line, 1)
  804. self.assertEqual(a.end_column, 2)
  805. self.assertEqual(bc.end_line, 2)
  806. self.assertEqual(bc.end_column, 2)
  807. self.assertEqual(d.end_line, 2)
  808. self.assertEqual(d.end_column, 3)
  809. def test_reduce_cycle(self):
  810. """Tests an edge-condition in the LALR parser, in which a transition state looks exactly like the end state.
  811. It seems that the correct solution is to explicitely distinguish finalization in the reduce() function.
  812. """
  813. l = _Lark("""
  814. term: A
  815. | term term
  816. A: "a"
  817. """, start='term')
  818. tree = l.parse("aa")
  819. self.assertEqual(len(tree.children), 2)
  820. @unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority")
  821. def test_lexer_prioritization(self):
  822. "Tests effect of priority on result"
  823. grammar = """
  824. start: A B | AB
  825. A.2: "a"
  826. B: "b"
  827. AB: "ab"
  828. """
  829. l = _Lark(grammar)
  830. res = l.parse("ab")
  831. self.assertEqual(res.children, ['a', 'b'])
  832. self.assertNotEqual(res.children, ['ab'])
  833. grammar = """
  834. start: A B | AB
  835. A: "a"
  836. B: "b"
  837. AB.3: "ab"
  838. """
  839. l = _Lark(grammar)
  840. res = l.parse("ab")
  841. self.assertNotEqual(res.children, ['a', 'b'])
  842. self.assertEqual(res.children, ['ab'])
  843. grammar = """
  844. start: A B | AB
  845. A: "a"
  846. B.-20: "b"
  847. AB.-10: "ab"
  848. """
  849. l = _Lark(grammar)
  850. res = l.parse("ab")
  851. self.assertEqual(res.children, ['a', 'b'])
  852. grammar = """
  853. start: A B | AB
  854. A.-99999999999999999999999: "a"
  855. B: "b"
  856. AB: "ab"
  857. """
  858. l = _Lark(grammar)
  859. res = l.parse("ab")
  860. self.assertEqual(res.children, ['ab'])
  861. def test_import(self):
  862. grammar = """
  863. start: NUMBER WORD
  864. %import common.NUMBER
  865. %import common.WORD
  866. %import common.WS
  867. %ignore WS
  868. """
  869. l = _Lark(grammar)
  870. x = l.parse('12 elephants')
  871. self.assertEqual(x.children, ['12', 'elephants'])
  872. def test_import_rename(self):
  873. grammar = """
  874. start: N W
  875. %import common.NUMBER -> N
  876. %import common.WORD -> W
  877. %import common.WS
  878. %ignore WS
  879. """
  880. l = _Lark(grammar)
  881. x = l.parse('12 elephants')
  882. self.assertEqual(x.children, ['12', 'elephants'])
  883. def test_relative_import(self):
  884. l = _Lark_open('test_relative_import.lark', rel_to=__file__)
  885. x = l.parse('12 lions')
  886. self.assertEqual(x.children, ['12', 'lions'])
  887. def test_relative_import_rename(self):
  888. l = _Lark_open('test_relative_import_rename.lark', rel_to=__file__)
  889. x = l.parse('12 lions')
  890. self.assertEqual(x.children, ['12', 'lions'])
  891. def test_relative_rule_import(self):
  892. l = _Lark_open('test_relative_rule_import.lark', rel_to=__file__)
  893. x = l.parse('xaabby')
  894. self.assertEqual(x.children, [
  895. 'x',
  896. Tree('expr', ['a', Tree('expr', ['a', 'b']), 'b']),
  897. 'y'])
  898. def test_relative_rule_import_drop_ignore(self):
  899. # %ignore rules are dropped on import
  900. l = _Lark_open('test_relative_rule_import_drop_ignore.lark',
  901. rel_to=__file__)
  902. self.assertRaises((ParseError, UnexpectedInput),
  903. l.parse, 'xa abby')
  904. def test_relative_rule_import_subrule(self):
  905. l = _Lark_open('test_relative_rule_import_subrule.lark',
  906. rel_to=__file__)
  907. x = l.parse('xaabby')
  908. self.assertEqual(x.children, [
  909. 'x',
  910. Tree('startab', [
  911. Tree('grammars__ab__expr', [
  912. 'a', Tree('grammars__ab__expr', ['a', 'b']), 'b',
  913. ]),
  914. ]),
  915. 'y'])
  916. def test_relative_rule_import_subrule_no_conflict(self):
  917. l = _Lark_open(
  918. 'test_relative_rule_import_subrule_no_conflict.lark',
  919. rel_to=__file__)
  920. x = l.parse('xaby')
  921. self.assertEqual(x.children, [Tree('expr', [
  922. 'x',
  923. Tree('startab', [
  924. Tree('grammars__ab__expr', ['a', 'b']),
  925. ]),
  926. 'y'])])
  927. self.assertRaises((ParseError, UnexpectedInput),
  928. l.parse, 'xaxabyby')
  929. def test_relative_rule_import_rename(self):
  930. l = _Lark_open('test_relative_rule_import_rename.lark',
  931. rel_to=__file__)
  932. x = l.parse('xaabby')
  933. self.assertEqual(x.children, [
  934. 'x',
  935. Tree('ab', ['a', Tree('ab', ['a', 'b']), 'b']),
  936. 'y'])
  937. def test_multi_import(self):
  938. grammar = """
  939. start: NUMBER WORD
  940. %import common (NUMBER, WORD, WS)
  941. %ignore WS
  942. """
  943. l = _Lark(grammar)
  944. x = l.parse('12 toucans')
  945. self.assertEqual(x.children, ['12', 'toucans'])
  946. def test_relative_multi_import(self):
  947. l = _Lark_open("test_relative_multi_import.lark", rel_to=__file__)
  948. x = l.parse('12 capybaras')
  949. self.assertEqual(x.children, ['12', 'capybaras'])
  950. def test_relative_import_preserves_leading_underscore(self):
  951. l = _Lark_open("test_relative_import_preserves_leading_underscore.lark", rel_to=__file__)
  952. x = l.parse('Ax')
  953. self.assertEqual(next(x.find_data('c')).children, ['A'])
  954. def test_relative_import_of_nested_grammar(self):
  955. l = _Lark_open("grammars/test_relative_import_of_nested_grammar.lark", rel_to=__file__)
  956. x = l.parse('N')
  957. self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])
  958. def test_relative_import_rules_dependencies_imported_only_once(self):
  959. l = _Lark_open("test_relative_import_rules_dependencies_imported_only_once.lark", rel_to=__file__)
  960. x = l.parse('AAA')
  961. self.assertEqual(next(x.find_data('a')).children, ['A'])
  962. self.assertEqual(next(x.find_data('b')).children, ['A'])
  963. self.assertEqual(next(x.find_data('d')).children, ['A'])
  964. def test_import_errors(self):
  965. grammar = """
  966. start: NUMBER WORD
  967. %import .grammars.bad_test.NUMBER
  968. """
  969. self.assertRaises(IOError, _Lark, grammar)
  970. grammar = """
  971. start: NUMBER WORD
  972. %import bad_test.NUMBER
  973. """
  974. self.assertRaises(IOError, _Lark, grammar)
  975. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  976. def test_earley_prioritization(self):
  977. "Tests effect of priority on result"
  978. grammar = """
  979. start: a | b
  980. a.1: "a"
  981. b.2: "a"
  982. """
  983. # l = Lark(grammar, parser='earley', lexer='standard')
  984. l = _Lark(grammar)
  985. res = l.parse("a")
  986. self.assertEqual(res.children[0].data, 'b')
  987. grammar = """
  988. start: a | b
  989. a.2: "a"
  990. b.1: "a"
  991. """
  992. l = _Lark(grammar)
  993. # l = Lark(grammar, parser='earley', lexer='standard')
  994. res = l.parse("a")
  995. self.assertEqual(res.children[0].data, 'a')
  996. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  997. def test_earley_prioritization_sum(self):
  998. "Tests effect of priority on result"
  999. grammar = """
  1000. start: ab_ b_ a_ | indirection
  1001. indirection: a_ bb_ a_
  1002. a_: "a"
  1003. b_: "b"
  1004. ab_: "ab"
  1005. bb_.1: "bb"
  1006. """
  1007. l = Lark(grammar, priority="invert")
  1008. res = l.parse('abba')
  1009. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  1010. grammar = """
  1011. start: ab_ b_ a_ | indirection
  1012. indirection: a_ bb_ a_
  1013. a_: "a"
  1014. b_: "b"
  1015. ab_.1: "ab"
  1016. bb_: "bb"
  1017. """
  1018. l = Lark(grammar, priority="invert")
  1019. res = l.parse('abba')
  1020. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1021. grammar = """
  1022. start: ab_ b_ a_ | indirection
  1023. indirection: a_ bb_ a_
  1024. a_.2: "a"
  1025. b_.1: "b"
  1026. ab_.3: "ab"
  1027. bb_.3: "bb"
  1028. """
  1029. l = Lark(grammar, priority="invert")
  1030. res = l.parse('abba')
  1031. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  1032. grammar = """
  1033. start: ab_ b_ a_ | indirection
  1034. indirection: a_ bb_ a_
  1035. a_.1: "a"
  1036. b_.1: "b"
  1037. ab_.4: "ab"
  1038. bb_.3: "bb"
  1039. """
  1040. l = Lark(grammar, priority="invert")
  1041. res = l.parse('abba')
  1042. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1043. def test_utf8(self):
  1044. g = u"""start: a
  1045. a: "±a"
  1046. """
  1047. l = _Lark(g)
  1048. self.assertEqual(l.parse(u'±a'), Tree('start', [Tree('a', [])]))
  1049. g = u"""start: A
  1050. A: "±a"
  1051. """
  1052. l = _Lark(g)
  1053. self.assertEqual(l.parse(u'±a'), Tree('start', [u'\xb1a']))
  1054. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  1055. def test_ignore(self):
  1056. grammar = r"""
  1057. COMMENT: /(!|(\/\/))[^\n]*/
  1058. %ignore COMMENT
  1059. %import common.WS -> _WS
  1060. %import common.INT
  1061. start: "INT"i _WS+ INT _WS*
  1062. """
  1063. parser = _Lark(grammar)
  1064. tree = parser.parse("int 1 ! This is a comment\n")
  1065. self.assertEqual(tree.children, ['1'])
  1066. tree = parser.parse("int 1 ! This is a comment") # A trailing ignore token can be tricky!
  1067. self.assertEqual(tree.children, ['1'])
  1068. parser = _Lark(r"""
  1069. start : "a"*
  1070. %ignore "b"
  1071. """)
  1072. tree = parser.parse("bb")
  1073. self.assertEqual(tree.children, [])
  1074. def test_regex_escaping(self):
  1075. g = _Lark("start: /[ab]/")
  1076. g.parse('a')
  1077. g.parse('b')
  1078. self.assertRaises( UnexpectedInput, g.parse, 'c')
  1079. _Lark(r'start: /\w/').parse('a')
  1080. g = _Lark(r'start: /\\w/')
  1081. self.assertRaises( UnexpectedInput, g.parse, 'a')
  1082. g.parse(r'\w')
  1083. _Lark(r'start: /\[/').parse('[')
  1084. _Lark(r'start: /\//').parse('/')
  1085. _Lark(r'start: /\\/').parse('\\')
  1086. _Lark(r'start: /\[ab]/').parse('[ab]')
  1087. _Lark(r'start: /\\[ab]/').parse('\\a')
  1088. _Lark(r'start: /\t/').parse('\t')
  1089. _Lark(r'start: /\\t/').parse('\\t')
  1090. _Lark(r'start: /\\\t/').parse('\\\t')
  1091. _Lark(r'start: "\t"').parse('\t')
  1092. _Lark(r'start: "\\t"').parse('\\t')
  1093. _Lark(r'start: "\\\t"').parse('\\\t')
  1094. def test_ranged_repeat_rules(self):
  1095. g = u"""!start: "A"~3
  1096. """
  1097. l = _Lark(g)
  1098. self.assertEqual(l.parse(u'AAA'), Tree('start', ["A", "A", "A"]))
  1099. self.assertRaises(ParseError, l.parse, u'AA')
  1100. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1101. g = u"""!start: "A"~0..2
  1102. """
  1103. if PARSER != 'cyk': # XXX CYK currently doesn't support empty grammars
  1104. l = _Lark(g)
  1105. self.assertEqual(l.parse(u''), Tree('start', []))
  1106. self.assertEqual(l.parse(u'A'), Tree('start', ['A']))
  1107. self.assertEqual(l.parse(u'AA'), Tree('start', ['A', 'A']))
  1108. self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'AAA')
  1109. g = u"""!start: "A"~3..2
  1110. """
  1111. self.assertRaises(GrammarError, _Lark, g)
  1112. g = u"""!start: "A"~2..3 "B"~2
  1113. """
  1114. l = _Lark(g)
  1115. self.assertEqual(l.parse(u'AABB'), Tree('start', ['A', 'A', 'B', 'B']))
  1116. self.assertEqual(l.parse(u'AAABB'), Tree('start', ['A', 'A', 'A', 'B', 'B']))
  1117. self.assertRaises(ParseError, l.parse, u'AAAB')
  1118. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1119. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1120. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1121. def test_ranged_repeat_terms(self):
  1122. g = u"""!start: AAA
  1123. AAA: "A"~3
  1124. """
  1125. l = _Lark(g)
  1126. self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"]))
  1127. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA')
  1128. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1129. g = u"""!start: AABB CC
  1130. AABB: "A"~0..2 "B"~2
  1131. CC: "C"~1..2
  1132. """
  1133. l = _Lark(g)
  1134. self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC']))
  1135. self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C']))
  1136. self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC']))
  1137. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB')
  1138. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1139. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1140. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1141. @unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX
  1142. def test_priority_vs_embedded(self):
  1143. g = """
  1144. A.2: "a"
  1145. WORD: ("a".."z")+
  1146. start: (A | WORD)+
  1147. """
  1148. l = _Lark(g)
  1149. t = l.parse('abc')
  1150. self.assertEqual(t.children, ['a', 'bc'])
  1151. self.assertEqual(t.children[0].type, 'A')
  1152. def test_line_counting(self):
  1153. p = _Lark("start: /[^x]+/")
  1154. text = 'hello\nworld'
  1155. t = p.parse(text)
  1156. tok = t.children[0]
  1157. self.assertEqual(tok, text)
  1158. self.assertEqual(tok.line, 1)
  1159. self.assertEqual(tok.column, 1)
  1160. if _LEXER != 'dynamic':
  1161. self.assertEqual(tok.end_line, 2)
  1162. self.assertEqual(tok.end_column, 6)
  1163. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1164. def test_empty_end(self):
  1165. p = _Lark("""
  1166. start: b c d
  1167. b: "B"
  1168. c: | "C"
  1169. d: | "D"
  1170. """)
  1171. res = p.parse('B')
  1172. self.assertEqual(len(res.children), 3)
  1173. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1174. def test_maybe_placeholders(self):
  1175. # Anonymous tokens shouldn't count
  1176. p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1177. self.assertEqual(p.parse("").children, [])
  1178. # All invisible constructs shouldn't count
  1179. p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c]
  1180. A: "a"
  1181. _c: "c" """, maybe_placeholders=True)
  1182. self.assertEqual(p.parse("").children, [None])
  1183. self.assertEqual(p.parse("c").children, [None])
  1184. self.assertEqual(p.parse("aefc").children, ['a'])
  1185. # ? shouldn't apply
  1186. p = _Lark("""!start: ["a"] "b"? ["c"] """, maybe_placeholders=True)
  1187. self.assertEqual(p.parse("").children, [None, None])
  1188. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1189. p = _Lark("""!start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1190. self.assertEqual(p.parse("").children, [None, None, None])
  1191. self.assertEqual(p.parse("a").children, ['a', None, None])
  1192. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1193. self.assertEqual(p.parse("c").children, [None, None, 'c'])
  1194. self.assertEqual(p.parse("ab").children, ['a', 'b', None])
  1195. self.assertEqual(p.parse("ac").children, ['a', None, 'c'])
  1196. self.assertEqual(p.parse("bc").children, [None, 'b', 'c'])
  1197. self.assertEqual(p.parse("abc").children, ['a', 'b', 'c'])
  1198. p = _Lark("""!start: (["a"] "b" ["c"])+ """, maybe_placeholders=True)
  1199. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1200. self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None])
  1201. self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c'])
  1202. self.assertEqual(p.parse("babbcabcb").children,
  1203. [None, 'b', None,
  1204. 'a', 'b', None,
  1205. None, 'b', 'c',
  1206. 'a', 'b', 'c',
  1207. None, 'b', None])
  1208. p = _Lark("""!start: ["a"] ["c"] "b"+ ["a"] ["d"] """, maybe_placeholders=True)
  1209. self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None])
  1210. self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd'])
  1211. self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None])
  1212. self.assertEqual(p.parse("cbbbb").children, [None, 'c', 'b', 'b', 'b', 'b', None, None])
  1213. def test_escaped_string(self):
  1214. "Tests common.ESCAPED_STRING"
  1215. grammar = r"""
  1216. start: ESCAPED_STRING+
  1217. %import common (WS_INLINE, ESCAPED_STRING)
  1218. %ignore WS_INLINE
  1219. """
  1220. parser = _Lark(grammar)
  1221. parser.parse(r'"\\" "b" "c"')
  1222. parser.parse(r'"That" "And a \"b"')
  1223. def test_meddling_unused(self):
  1224. "Unless 'unused' is removed, LALR analysis will fail on reduce-reduce collision"
  1225. grammar = """
  1226. start: EKS* x
  1227. x: EKS
  1228. unused: x*
  1229. EKS: "x"
  1230. """
  1231. parser = _Lark(grammar)
  1232. @unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
  1233. def test_serialize(self):
  1234. grammar = """
  1235. start: _ANY b "C"
  1236. _ANY: /./
  1237. b: "B"
  1238. """
  1239. parser = _Lark(grammar)
  1240. d = parser.serialize()
  1241. parser2 = Lark.deserialize(d, {}, {})
  1242. self.assertEqual(parser2.parse('ABC'), Tree('start', [Tree('b', [])]) )
  1243. namespace = {'Rule': Rule, 'TerminalDef': TerminalDef}
  1244. d, m = parser.memo_serialize(namespace.values())
  1245. parser3 = Lark.deserialize(d, namespace, m)
  1246. self.assertEqual(parser3.parse('ABC'), Tree('start', [Tree('b', [])]) )
  1247. def test_multi_start(self):
  1248. parser = _Lark('''
  1249. a: "x" "a"?
  1250. b: "x" "b"?
  1251. ''', start=['a', 'b'])
  1252. self.assertEqual(parser.parse('xa', 'a'), Tree('a', []))
  1253. self.assertEqual(parser.parse('xb', 'b'), Tree('b', []))
  1254. def test_lexer_detect_newline_tokens(self):
  1255. # Detect newlines in regular tokens
  1256. g = _Lark(r"""start: "go" tail*
  1257. !tail : SA "@" | SB "@" | SC "@" | SD "@"
  1258. SA : "a" /\n/
  1259. SB : /b./s
  1260. SC : "c" /[^a-z]/
  1261. SD : "d" /\s/
  1262. """)
  1263. a,b,c,d = [x.children[1] for x in g.parse('goa\n@b\n@c\n@d\n@').children]
  1264. self.assertEqual(a.line, 2)
  1265. self.assertEqual(b.line, 3)
  1266. self.assertEqual(c.line, 4)
  1267. self.assertEqual(d.line, 5)
  1268. # Detect newlines in ignored tokens
  1269. for re in ['/\\n/', '/[^a-z]/', '/\\s/']:
  1270. g = _Lark('''!start: "a" "a"
  1271. %ignore {}'''.format(re))
  1272. a, b = g.parse('a\na').children
  1273. self.assertEqual(a.line, 1)
  1274. self.assertEqual(b.line, 2)
  1275. _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()
  1276. _TestParser.__name__ = _NAME
  1277. globals()[_NAME] = _TestParser
  1278. # Note: You still have to import them in __main__ for the tests to run
  1279. _TO_TEST = [
  1280. ('standard', 'earley'),
  1281. ('standard', 'cyk'),
  1282. ('dynamic', 'earley'),
  1283. ('dynamic_complete', 'earley'),
  1284. ('standard', 'lalr'),
  1285. ('contextual', 'lalr'),
  1286. ('custom', 'lalr'),
  1287. # (None, 'earley'),
  1288. ]
  1289. for _LEXER, _PARSER in _TO_TEST:
  1290. _make_parser_test(_LEXER, _PARSER)
  1291. for _LEXER in ('dynamic', 'dynamic_complete'):
  1292. _make_full_earley_test(_LEXER)
  1293. if __name__ == '__main__':
  1294. unittest.main()