This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1619 lines
55 KiB

  1. # -*- coding: utf-8 -*-
  2. from __future__ import absolute_import
  3. import unittest
  4. import logging
  5. import os
  6. import sys
  7. try:
  8. from cStringIO import StringIO as cStringIO
  9. except ImportError:
  10. # Available only in Python 2.x, 3.x only has io.StringIO from below
  11. cStringIO = None
  12. from io import (
  13. StringIO as uStringIO,
  14. open,
  15. )
  16. logging.basicConfig(level=logging.INFO)
  17. from lark.lark import Lark
  18. from lark.exceptions import GrammarError, ParseError, UnexpectedToken, UnexpectedInput, UnexpectedCharacters
  19. from lark.tree import Tree
  20. from lark.visitors import Transformer, Transformer_InPlace, v_args
  21. from lark.grammar import Rule
  22. from lark.lexer import TerminalDef, Lexer, TraditionalLexer
  23. __path__ = os.path.dirname(__file__)
  24. def _read(n, *args):
  25. with open(os.path.join(__path__, n), *args) as f:
  26. return f.read()
  27. class TestParsers(unittest.TestCase):
  28. def test_same_ast(self):
  29. "Tests that Earley and LALR parsers produce equal trees"
  30. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  31. name_list: NAME | name_list "," NAME
  32. NAME: /\w+/ """, parser='lalr')
  33. l = g.parse('(a,b,c,*x)')
  34. g = Lark(r"""start: "(" name_list ("," "*" NAME)? ")"
  35. name_list: NAME | name_list "," NAME
  36. NAME: /\w/+ """)
  37. l2 = g.parse('(a,b,c,*x)')
  38. assert l == l2, '%s != %s' % (l.pretty(), l2.pretty())
  39. def test_infinite_recurse(self):
  40. g = """start: a
  41. a: a | "a"
  42. """
  43. self.assertRaises(GrammarError, Lark, g, parser='lalr')
  44. # TODO: should it? shouldn't it?
  45. # l = Lark(g, parser='earley', lexer='dynamic')
  46. # self.assertRaises(ParseError, l.parse, 'a')
  47. def test_propagate_positions(self):
  48. g = Lark("""start: a
  49. a: "a"
  50. """, propagate_positions=True)
  51. r = g.parse('a')
  52. self.assertEqual( r.children[0].meta.line, 1 )
  53. def test_expand1(self):
  54. g = Lark("""start: a
  55. ?a: b
  56. b: "x"
  57. """)
  58. r = g.parse('x')
  59. self.assertEqual( r.children[0].data, "b" )
  60. g = Lark("""start: a
  61. ?a: b -> c
  62. b: "x"
  63. """)
  64. r = g.parse('x')
  65. self.assertEqual( r.children[0].data, "c" )
  66. g = Lark("""start: a
  67. ?a: B -> c
  68. B: "x"
  69. """)
  70. self.assertEqual( r.children[0].data, "c" )
  71. g = Lark("""start: a
  72. ?a: b b -> c
  73. b: "x"
  74. """)
  75. r = g.parse('xx')
  76. self.assertEqual( r.children[0].data, "c" )
  77. def test_embedded_transformer(self):
  78. class T(Transformer):
  79. def a(self, children):
  80. return "<a>"
  81. def b(self, children):
  82. return "<b>"
  83. def c(self, children):
  84. return "<c>"
  85. # Test regular
  86. g = Lark("""start: a
  87. a : "x"
  88. """, parser='lalr')
  89. r = T().transform(g.parse("x"))
  90. self.assertEqual( r.children, ["<a>"] )
  91. g = Lark("""start: a
  92. a : "x"
  93. """, parser='lalr', transformer=T())
  94. r = g.parse("x")
  95. self.assertEqual( r.children, ["<a>"] )
  96. # Test Expand1
  97. g = Lark("""start: a
  98. ?a : b
  99. b : "x"
  100. """, parser='lalr')
  101. r = T().transform(g.parse("x"))
  102. self.assertEqual( r.children, ["<b>"] )
  103. g = Lark("""start: a
  104. ?a : b
  105. b : "x"
  106. """, parser='lalr', transformer=T())
  107. r = g.parse("x")
  108. self.assertEqual( r.children, ["<b>"] )
  109. # Test Expand1 -> Alias
  110. g = Lark("""start: a
  111. ?a : b b -> c
  112. b : "x"
  113. """, parser='lalr')
  114. r = T().transform(g.parse("xx"))
  115. self.assertEqual( r.children, ["<c>"] )
  116. g = Lark("""start: a
  117. ?a : b b -> c
  118. b : "x"
  119. """, parser='lalr', transformer=T())
  120. r = g.parse("xx")
  121. self.assertEqual( r.children, ["<c>"] )
  122. def test_embedded_transformer_inplace(self):
  123. @v_args(tree=True)
  124. class T1(Transformer_InPlace):
  125. def a(self, tree):
  126. assert isinstance(tree, Tree), tree
  127. tree.children.append("tested")
  128. return tree
  129. def b(self, tree):
  130. return Tree(tree.data, tree.children + ['tested2'])
  131. @v_args(tree=True)
  132. class T2(Transformer):
  133. def a(self, tree):
  134. assert isinstance(tree, Tree)
  135. tree.children.append("tested")
  136. return tree
  137. def b(self, tree):
  138. return Tree(tree.data, tree.children + ['tested2'])
  139. class T3(Transformer):
  140. @v_args(tree=True)
  141. def a(self, tree):
  142. assert isinstance(tree, Tree)
  143. tree.children.append("tested")
  144. return tree
  145. @v_args(tree=True)
  146. def b(self, tree):
  147. return Tree(tree.data, tree.children + ['tested2'])
  148. for t in [T1(), T2(), T3()]:
  149. for internal in [False, True]:
  150. g = Lark("""start: a b
  151. a : "x"
  152. b : "y"
  153. """, parser='lalr', transformer=t if internal else None)
  154. r = g.parse("xy")
  155. if not internal:
  156. r = t.transform(r)
  157. a, b = r.children
  158. self.assertEqual(a.children, ["tested"])
  159. self.assertEqual(b.children, ["tested2"])
  160. def test_alias(self):
  161. Lark("""start: ["a"] "b" ["c"] "e" ["f"] ["g"] ["h"] "x" -> d """)
  162. def _make_full_earley_test(LEXER):
  163. def _Lark(grammar, **kwargs):
  164. return Lark(grammar, lexer=LEXER, parser='earley', propagate_positions=True, **kwargs)
  165. class _TestFullEarley(unittest.TestCase):
  166. def test_anon(self):
  167. # Fails an Earley implementation without special handling for empty rules,
  168. # or re-processing of already completed rules.
  169. g = Lark(r"""start: B
  170. B: ("ab"|/[^b]/)+
  171. """, lexer=LEXER)
  172. self.assertEqual( g.parse('abc').children[0], 'abc')
  173. def test_earley(self):
  174. g = Lark("""start: A "b" c
  175. A: "a"+
  176. c: "abc"
  177. """, parser="earley", lexer=LEXER)
  178. x = g.parse('aaaababc')
  179. def test_earley2(self):
  180. grammar = """
  181. start: statement+
  182. statement: "r"
  183. | "c" /[a-z]/+
  184. %ignore " "
  185. """
  186. program = """c b r"""
  187. l = Lark(grammar, parser='earley', lexer=LEXER)
  188. l.parse(program)
  189. @unittest.skipIf(LEXER=='dynamic', "Only relevant for the dynamic_complete parser")
  190. def test_earley3(self):
  191. """Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)
  192. By default, `+` should immitate regexp greedy-matching
  193. """
  194. grammar = """
  195. start: A A
  196. A: "a"+
  197. """
  198. l = Lark(grammar, parser='earley', lexer=LEXER)
  199. res = l.parse("aaa")
  200. self.assertEqual(set(res.children), {'aa', 'a'})
  201. # XXX TODO fix Earley to maintain correct order
  202. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  203. # self.assertEqual(res.children, ['aa', 'a'])
  204. def test_earley4(self):
  205. grammar = """
  206. start: A A?
  207. A: "a"+
  208. """
  209. l = Lark(grammar, parser='earley', lexer=LEXER)
  210. res = l.parse("aaa")
  211. assert set(res.children) == {'aa', 'a'} or res.children == ['aaa']
  212. # XXX TODO fix Earley to maintain correct order
  213. # i.e. terminals it imitate greedy search for terminals, but lazy search for rules
  214. # self.assertEqual(res.children, ['aaa'])
  215. def test_earley_repeating_empty(self):
  216. # This was a sneaky bug!
  217. grammar = """
  218. !start: "a" empty empty "b"
  219. empty: empty2
  220. empty2:
  221. """
  222. parser = Lark(grammar, parser='earley', lexer=LEXER)
  223. res = parser.parse('ab')
  224. empty_tree = Tree('empty', [Tree('empty2', [])])
  225. self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b'])
  226. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  227. def test_earley_explicit_ambiguity(self):
  228. # This was a sneaky bug!
  229. grammar = """
  230. start: a b | ab
  231. a: "a"
  232. b: "b"
  233. ab: "ab"
  234. """
  235. parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit')
  236. ambig_tree = parser.parse('ab')
  237. self.assertEqual( ambig_tree.data, '_ambig')
  238. self.assertEqual( len(ambig_tree.children), 2)
  239. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  240. def test_ambiguity1(self):
  241. grammar = """
  242. start: cd+ "e"
  243. !cd: "c"
  244. | "d"
  245. | "cd"
  246. """
  247. l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER)
  248. ambig_tree = l.parse('cde')
  249. assert ambig_tree.data == '_ambig', ambig_tree
  250. assert len(ambig_tree.children) == 2
  251. @unittest.skipIf(LEXER=='standard', "Requires dynamic lexer")
  252. def test_ambiguity2(self):
  253. grammar = """
  254. ANY: /[a-zA-Z0-9 ]+/
  255. a.2: "A" b+
  256. b.2: "B"
  257. c: ANY
  258. start: (a|c)*
  259. """
  260. l = Lark(grammar, parser='earley', lexer=LEXER)
  261. res = l.parse('ABX')
  262. expected = Tree('start', [
  263. Tree('a', [
  264. Tree('b', [])
  265. ]),
  266. Tree('c', [
  267. 'X'
  268. ])
  269. ])
  270. self.assertEqual(res, expected)
  271. def test_fruitflies_ambig(self):
  272. grammar = """
  273. start: noun verb noun -> simple
  274. | noun verb "like" noun -> comparative
  275. noun: adj? NOUN
  276. verb: VERB
  277. adj: ADJ
  278. NOUN: "flies" | "bananas" | "fruit"
  279. VERB: "like" | "flies"
  280. ADJ: "fruit"
  281. %import common.WS
  282. %ignore WS
  283. """
  284. parser = Lark(grammar, ambiguity='explicit', lexer=LEXER)
  285. tree = parser.parse('fruit flies like bananas')
  286. expected = Tree('_ambig', [
  287. Tree('comparative', [
  288. Tree('noun', ['fruit']),
  289. Tree('verb', ['flies']),
  290. Tree('noun', ['bananas'])
  291. ]),
  292. Tree('simple', [
  293. Tree('noun', [Tree('adj', ['fruit']), 'flies']),
  294. Tree('verb', ['like']),
  295. Tree('noun', ['bananas'])
  296. ])
  297. ])
  298. # self.assertEqual(tree, expected)
  299. self.assertEqual(tree.data, expected.data)
  300. self.assertEqual(set(tree.children), set(expected.children))
  301. @unittest.skipIf(LEXER!='dynamic_complete', "Only relevant for the dynamic_complete parser")
  302. def test_explicit_ambiguity2(self):
  303. grammar = r"""
  304. start: NAME+
  305. NAME: /\w+/
  306. %ignore " "
  307. """
  308. text = """cat"""
  309. parser = _Lark(grammar, start='start', ambiguity='explicit')
  310. tree = parser.parse(text)
  311. self.assertEqual(tree.data, '_ambig')
  312. combinations = {tuple(str(s) for s in t.children) for t in tree.children}
  313. self.assertEqual(combinations, {
  314. ('cat',),
  315. ('ca', 't'),
  316. ('c', 'at'),
  317. ('c', 'a' ,'t')
  318. })
  319. def test_term_ambig_resolve(self):
  320. grammar = r"""
  321. !start: NAME+
  322. NAME: /\w+/
  323. %ignore " "
  324. """
  325. text = """foo bar"""
  326. parser = Lark(grammar)
  327. tree = parser.parse(text)
  328. self.assertEqual(tree.children, ['foo', 'bar'])
  329. # @unittest.skipIf(LEXER=='dynamic', "Not implemented in Dynamic Earley yet") # TODO
  330. # def test_not_all_derivations(self):
  331. # grammar = """
  332. # start: cd+ "e"
  333. # !cd: "c"
  334. # | "d"
  335. # | "cd"
  336. # """
  337. # l = Lark(grammar, parser='earley', ambiguity='explicit', lexer=LEXER, earley__all_derivations=False)
  338. # x = l.parse('cde')
  339. # assert x.data != '_ambig', x
  340. # assert len(x.children) == 1
  341. _NAME = "TestFullEarley" + LEXER.capitalize()
  342. _TestFullEarley.__name__ = _NAME
  343. globals()[_NAME] = _TestFullEarley
  344. class CustomLexer(Lexer):
  345. """
  346. Purpose of this custom lexer is to test the integration,
  347. so it uses the traditionalparser as implementation without custom lexing behaviour.
  348. """
  349. def __init__(self, lexer_conf):
  350. self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks)
  351. def lex(self, *args, **kwargs):
  352. return self.lexer.lex(*args, **kwargs)
  353. def _make_parser_test(LEXER, PARSER):
  354. lexer_class_or_name = CustomLexer if LEXER == 'custom' else LEXER
  355. def _Lark(grammar, **kwargs):
  356. return Lark(grammar, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  357. def _Lark_open(gfilename, **kwargs):
  358. return Lark.open(gfilename, lexer=lexer_class_or_name, parser=PARSER, propagate_positions=True, **kwargs)
  359. class _TestParser(unittest.TestCase):
  360. def test_basic1(self):
  361. g = _Lark("""start: a+ b a* "b" a*
  362. b: "b"
  363. a: "a"
  364. """)
  365. r = g.parse('aaabaab')
  366. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaa' )
  367. r = g.parse('aaabaaba')
  368. self.assertEqual( ''.join(x.data for x in r.children), 'aaabaaa' )
  369. self.assertRaises(ParseError, g.parse, 'aaabaa')
  370. def test_basic2(self):
  371. # Multiple parsers and colliding tokens
  372. g = _Lark("""start: B A
  373. B: "12"
  374. A: "1" """)
  375. g2 = _Lark("""start: B A
  376. B: "12"
  377. A: "2" """)
  378. x = g.parse('121')
  379. assert x.data == 'start' and x.children == ['12', '1'], x
  380. x = g2.parse('122')
  381. assert x.data == 'start' and x.children == ['12', '2'], x
  382. @unittest.skipIf(cStringIO is None, "cStringIO not available")
  383. def test_stringio_bytes(self):
  384. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  385. _Lark(cStringIO(b'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  386. def test_stringio_unicode(self):
  387. """Verify that a Lark can be created from file-like objects other than Python's standard 'file' object"""
  388. _Lark(uStringIO(u'start: a+ b a* "b" a*\n b: "b"\n a: "a" '))
  389. def test_unicode(self):
  390. g = _Lark(u"""start: UNIA UNIB UNIA
  391. UNIA: /\xa3/
  392. UNIB: /\u0101/
  393. """)
  394. g.parse(u'\xa3\u0101\u00a3')
  395. def test_unicode2(self):
  396. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  397. UNIA: /\xa3/
  398. UNIB: "a\u0101b\ "
  399. UNIC: /a?\u0101c\n/
  400. """)
  401. g.parse(u'\xa3a\u0101b\\ \u00a3\u0101c\n')
  402. def test_unicode3(self):
  403. g = _Lark(r"""start: UNIA UNIB UNIA UNIC
  404. UNIA: /\xa3/
  405. UNIB: "\u0101"
  406. UNIC: /\u0203/ /\n/
  407. """)
  408. g.parse(u'\xa3\u0101\u00a3\u0203\n')
  409. def test_hex_escape(self):
  410. g = _Lark(r"""start: A B C
  411. A: "\x01"
  412. B: /\x02/
  413. C: "\xABCD"
  414. """)
  415. g.parse('\x01\x02\xABCD')
  416. def test_unicode_literal_range_escape(self):
  417. g = _Lark(r"""start: A+
  418. A: "\u0061".."\u0063"
  419. """)
  420. g.parse('abc')
  421. def test_hex_literal_range_escape(self):
  422. g = _Lark(r"""start: A+
  423. A: "\x01".."\x03"
  424. """)
  425. g.parse('\x01\x02\x03')
  426. @unittest.skipIf(PARSER == 'cyk', "Takes forever")
  427. def test_stack_for_ebnf(self):
  428. """Verify that stack depth isn't an issue for EBNF grammars"""
  429. g = _Lark(r"""start: a+
  430. a : "a" """)
  431. g.parse("a" * (sys.getrecursionlimit()*2 ))
  432. def test_expand1_lists_with_one_item(self):
  433. g = _Lark(r"""start: list
  434. ?list: item+
  435. item : A
  436. A: "a"
  437. """)
  438. r = g.parse("a")
  439. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  440. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  441. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  442. self.assertEqual(len(r.children), 1)
  443. def test_expand1_lists_with_one_item_2(self):
  444. g = _Lark(r"""start: list
  445. ?list: item+ "!"
  446. item : A
  447. A: "a"
  448. """)
  449. r = g.parse("a!")
  450. # because 'list' is an expand-if-contains-one rule and we only provided one element it should have expanded to 'item'
  451. self.assertSequenceEqual([subtree.data for subtree in r.children], ('item',))
  452. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  453. self.assertEqual(len(r.children), 1)
  454. def test_dont_expand1_lists_with_multiple_items(self):
  455. g = _Lark(r"""start: list
  456. ?list: item+
  457. item : A
  458. A: "a"
  459. """)
  460. r = g.parse("aa")
  461. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  462. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  463. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  464. self.assertEqual(len(r.children), 1)
  465. # Sanity check: verify that 'list' contains the two 'item's we've given it
  466. [list] = r.children
  467. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  468. def test_dont_expand1_lists_with_multiple_items_2(self):
  469. g = _Lark(r"""start: list
  470. ?list: item+ "!"
  471. item : A
  472. A: "a"
  473. """)
  474. r = g.parse("aa!")
  475. # because 'list' is an expand-if-contains-one rule and we've provided more than one element it should *not* have expanded
  476. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  477. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  478. self.assertEqual(len(r.children), 1)
  479. # Sanity check: verify that 'list' contains the two 'item's we've given it
  480. [list] = r.children
  481. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  482. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  483. def test_empty_expand1_list(self):
  484. g = _Lark(r"""start: list
  485. ?list: item*
  486. item : A
  487. A: "a"
  488. """)
  489. r = g.parse("")
  490. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  491. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  492. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  493. self.assertEqual(len(r.children), 1)
  494. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  495. [list] = r.children
  496. self.assertSequenceEqual([item.data for item in list.children], ())
  497. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  498. def test_empty_expand1_list_2(self):
  499. g = _Lark(r"""start: list
  500. ?list: item* "!"?
  501. item : A
  502. A: "a"
  503. """)
  504. r = g.parse("")
  505. # because 'list' is an expand-if-contains-one rule and we've provided less than one element (i.e. none) it should *not* have expanded
  506. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  507. # regardless of the amount of items: there should be only *one* child in 'start' because 'list' isn't an expand-all rule
  508. self.assertEqual(len(r.children), 1)
  509. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  510. [list] = r.children
  511. self.assertSequenceEqual([item.data for item in list.children], ())
  512. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  513. def test_empty_flatten_list(self):
  514. g = _Lark(r"""start: list
  515. list: | item "," list
  516. item : A
  517. A: "a"
  518. """)
  519. r = g.parse("")
  520. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  521. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  522. # Sanity check: verify that 'list' contains no 'item's as we've given it none
  523. [list] = r.children
  524. self.assertSequenceEqual([item.data for item in list.children], ())
  525. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  526. def test_single_item_flatten_list(self):
  527. g = _Lark(r"""start: list
  528. list: | item "," list
  529. item : A
  530. A: "a"
  531. """)
  532. r = g.parse("a,")
  533. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  534. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  535. # Sanity check: verify that 'list' contains exactly the one 'item' we've given it
  536. [list] = r.children
  537. self.assertSequenceEqual([item.data for item in list.children], ('item',))
  538. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  539. def test_multiple_item_flatten_list(self):
  540. g = _Lark(r"""start: list
  541. #list: | item "," list
  542. item : A
  543. A: "a"
  544. """)
  545. r = g.parse("a,a,")
  546. # Because 'list' is a flatten rule it's top-level element should *never* be expanded
  547. self.assertSequenceEqual([subtree.data for subtree in r.children], ('list',))
  548. # Sanity check: verify that 'list' contains exactly the two 'item's we've given it
  549. [list] = r.children
  550. self.assertSequenceEqual([item.data for item in list.children], ('item', 'item'))
  551. @unittest.skipIf(True, "Flattening list isn't implemented (and may never be)")
  552. def test_recurse_flatten(self):
  553. """Verify that stack depth doesn't get exceeded on recursive rules marked for flattening."""
  554. g = _Lark(r"""start: a | start a
  555. a : A
  556. A : "a" """)
  557. # Force PLY to write to the debug log, but prevent writing it to the terminal (uses repr() on the half-built
  558. # STree data structures, which uses recursion).
  559. g.parse("a" * (sys.getrecursionlimit() // 4))
  560. def test_token_collision(self):
  561. g = _Lark(r"""start: "Hello" NAME
  562. NAME: /\w/+
  563. %ignore " "
  564. """)
  565. x = g.parse('Hello World')
  566. self.assertSequenceEqual(x.children, ['World'])
  567. x = g.parse('Hello HelloWorld')
  568. self.assertSequenceEqual(x.children, ['HelloWorld'])
  569. def test_token_collision_WS(self):
  570. g = _Lark(r"""start: "Hello" NAME
  571. NAME: /\w/+
  572. %import common.WS
  573. %ignore WS
  574. """)
  575. x = g.parse('Hello World')
  576. self.assertSequenceEqual(x.children, ['World'])
  577. x = g.parse('Hello HelloWorld')
  578. self.assertSequenceEqual(x.children, ['HelloWorld'])
  579. def test_token_collision2(self):
  580. g = _Lark("""
  581. !start: "starts"
  582. %import common.LCASE_LETTER
  583. """)
  584. x = g.parse("starts")
  585. self.assertSequenceEqual(x.children, ['starts'])
  586. # def test_string_priority(self):
  587. # g = _Lark("""start: (A | /a?bb/)+
  588. # A: "a" """)
  589. # x = g.parse('abb')
  590. # self.assertEqual(len(x.children), 2)
  591. # # This parse raises an exception because the lexer will always try to consume
  592. # # "a" first and will never match the regular expression
  593. # # This behavior is subject to change!!
  594. # # Thie won't happen with ambiguity handling.
  595. # g = _Lark("""start: (A | /a?ab/)+
  596. # A: "a" """)
  597. # self.assertRaises(LexError, g.parse, 'aab')
  598. def test_undefined_rule(self):
  599. self.assertRaises(GrammarError, _Lark, """start: a""")
  600. def test_undefined_token(self):
  601. self.assertRaises(GrammarError, _Lark, """start: A""")
  602. def test_rule_collision(self):
  603. g = _Lark("""start: "a"+ "b"
  604. | "a"+ """)
  605. x = g.parse('aaaa')
  606. x = g.parse('aaaab')
  607. def test_rule_collision2(self):
  608. g = _Lark("""start: "a"* "b"
  609. | "a"+ """)
  610. x = g.parse('aaaa')
  611. x = g.parse('aaaab')
  612. x = g.parse('b')
  613. def test_token_not_anon(self):
  614. """Tests that "a" is matched as an anonymous token, and not A.
  615. """
  616. g = _Lark("""start: "a"
  617. A: "a" """)
  618. x = g.parse('a')
  619. self.assertEqual(len(x.children), 0, '"a" should be considered anonymous')
  620. g = _Lark("""start: "a" A
  621. A: "a" """)
  622. x = g.parse('aa')
  623. self.assertEqual(len(x.children), 1, 'only "a" should be considered anonymous')
  624. self.assertEqual(x.children[0].type, "A")
  625. g = _Lark("""start: /a/
  626. A: /a/ """)
  627. x = g.parse('a')
  628. self.assertEqual(len(x.children), 1)
  629. self.assertEqual(x.children[0].type, "A", "A isn't associated with /a/")
  630. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  631. def test_maybe(self):
  632. g = _Lark("""start: ["a"] """)
  633. x = g.parse('a')
  634. x = g.parse('')
  635. def test_start(self):
  636. g = _Lark("""a: "a" a? """, start='a')
  637. x = g.parse('a')
  638. x = g.parse('aa')
  639. x = g.parse('aaa')
  640. def test_alias(self):
  641. g = _Lark("""start: "a" -> b """)
  642. x = g.parse('a')
  643. self.assertEqual(x.data, "b")
  644. def test_token_ebnf(self):
  645. g = _Lark("""start: A
  646. A: "a"* ("b"? "c".."e")+
  647. """)
  648. x = g.parse('abcde')
  649. x = g.parse('dd')
  650. def test_backslash(self):
  651. g = _Lark(r"""start: "\\" "a"
  652. """)
  653. x = g.parse(r'\a')
  654. g = _Lark(r"""start: /\\/ /a/
  655. """)
  656. x = g.parse(r'\a')
  657. def test_backslash2(self):
  658. g = _Lark(r"""start: "\"" "-"
  659. """)
  660. x = g.parse('"-')
  661. g = _Lark(r"""start: /\// /-/
  662. """)
  663. x = g.parse('/-')
  664. def test_special_chars(self):
  665. g = _Lark(r"""start: "\n"
  666. """)
  667. x = g.parse('\n')
  668. g = _Lark(r"""start: /\n/
  669. """)
  670. x = g.parse('\n')
  671. # def test_token_recurse(self):
  672. # g = _Lark("""start: A
  673. # A: B
  674. # B: A
  675. # """)
  676. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  677. def test_empty(self):
  678. # Fails an Earley implementation without special handling for empty rules,
  679. # or re-processing of already completed rules.
  680. g = _Lark(r"""start: _empty a "B"
  681. a: _empty "A"
  682. _empty:
  683. """)
  684. x = g.parse('AB')
  685. def test_regex_quote(self):
  686. g = r"""
  687. start: SINGLE_QUOTED_STRING | DOUBLE_QUOTED_STRING
  688. SINGLE_QUOTED_STRING : /'[^']*'/
  689. DOUBLE_QUOTED_STRING : /"[^"]*"/
  690. """
  691. g = _Lark(g)
  692. self.assertEqual( g.parse('"hello"').children, ['"hello"'])
  693. self.assertEqual( g.parse("'hello'").children, ["'hello'"])
  694. def test_lexer_token_limit(self):
  695. "Python has a stupid limit of 100 groups in a regular expression. Test that we handle this limitation"
  696. tokens = {'A%d'%i:'"%d"'%i for i in range(300)}
  697. g = _Lark("""start: %s
  698. %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))
  699. def test_float_without_lexer(self):
  700. expected_error = UnexpectedCharacters if LEXER.startswith('dynamic') else UnexpectedToken
  701. if PARSER == 'cyk':
  702. expected_error = ParseError
  703. g = _Lark("""start: ["+"|"-"] float
  704. float: digit* "." digit+ exp?
  705. | digit+ exp
  706. exp: ("e"|"E") ["+"|"-"] digit+
  707. digit: "0"|"1"|"2"|"3"|"4"|"5"|"6"|"7"|"8"|"9"
  708. """)
  709. g.parse("1.2")
  710. g.parse("-.2e9")
  711. g.parse("+2e-9")
  712. self.assertRaises( expected_error, g.parse, "+2e-9e")
  713. def test_keep_all_tokens(self):
  714. l = _Lark("""start: "a"+ """, keep_all_tokens=True)
  715. tree = l.parse('aaa')
  716. self.assertEqual(tree.children, ['a', 'a', 'a'])
  717. def test_token_flags(self):
  718. l = _Lark("""!start: "a"i+
  719. """
  720. )
  721. tree = l.parse('aA')
  722. self.assertEqual(tree.children, ['a', 'A'])
  723. l = _Lark("""!start: /a/i+
  724. """
  725. )
  726. tree = l.parse('aA')
  727. self.assertEqual(tree.children, ['a', 'A'])
  728. # g = """!start: "a"i "a"
  729. # """
  730. # self.assertRaises(GrammarError, _Lark, g)
  731. # g = """!start: /a/i /a/
  732. # """
  733. # self.assertRaises(GrammarError, _Lark, g)
  734. g = """start: NAME "," "a"
  735. NAME: /[a-z_]/i /[a-z0-9_]/i*
  736. """
  737. l = _Lark(g)
  738. tree = l.parse('ab,a')
  739. self.assertEqual(tree.children, ['ab'])
  740. tree = l.parse('AB,a')
  741. self.assertEqual(tree.children, ['AB'])
  742. def test_token_flags3(self):
  743. l = _Lark("""!start: ABC+
  744. ABC: "abc"i
  745. """
  746. )
  747. tree = l.parse('aBcAbC')
  748. self.assertEqual(tree.children, ['aBc', 'AbC'])
  749. def test_token_flags2(self):
  750. g = """!start: ("a"i | /a/ /b/?)+
  751. """
  752. l = _Lark(g)
  753. tree = l.parse('aA')
  754. self.assertEqual(tree.children, ['a', 'A'])
  755. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  756. def test_twice_empty(self):
  757. g = """!start: [["A"]]
  758. """
  759. l = _Lark(g)
  760. tree = l.parse('A')
  761. self.assertEqual(tree.children, ['A'])
  762. tree = l.parse('')
  763. self.assertEqual(tree.children, [])
  764. def test_undefined_ignore(self):
  765. g = """!start: "A"
  766. %ignore B
  767. """
  768. self.assertRaises( GrammarError, _Lark, g)
  769. def test_alias_in_terminal(self):
  770. g = """start: TERM
  771. TERM: "a" -> alias
  772. """
  773. self.assertRaises( GrammarError, _Lark, g)
  774. def test_line_and_column(self):
  775. g = r"""!start: "A" bc "D"
  776. !bc: "B\nC"
  777. """
  778. l = _Lark(g)
  779. a, bc, d = l.parse("AB\nCD").children
  780. self.assertEqual(a.line, 1)
  781. self.assertEqual(a.column, 1)
  782. bc ,= bc.children
  783. self.assertEqual(bc.line, 1)
  784. self.assertEqual(bc.column, 2)
  785. self.assertEqual(d.line, 2)
  786. self.assertEqual(d.column, 2)
  787. if LEXER != 'dynamic':
  788. self.assertEqual(a.end_line, 1)
  789. self.assertEqual(a.end_column, 2)
  790. self.assertEqual(bc.end_line, 2)
  791. self.assertEqual(bc.end_column, 2)
  792. self.assertEqual(d.end_line, 2)
  793. self.assertEqual(d.end_column, 3)
  794. def test_reduce_cycle(self):
  795. """Tests an edge-condition in the LALR parser, in which a transition state looks exactly like the end state.
  796. It seems that the correct solution is to explicitely distinguish finalization in the reduce() function.
  797. """
  798. l = _Lark("""
  799. term: A
  800. | term term
  801. A: "a"
  802. """, start='term')
  803. tree = l.parse("aa")
  804. self.assertEqual(len(tree.children), 2)
  805. @unittest.skipIf(LEXER != 'standard', "Only standard lexers care about token priority")
  806. def test_lexer_prioritization(self):
  807. "Tests effect of priority on result"
  808. grammar = """
  809. start: A B | AB
  810. A.2: "a"
  811. B: "b"
  812. AB: "ab"
  813. """
  814. l = _Lark(grammar)
  815. res = l.parse("ab")
  816. self.assertEqual(res.children, ['a', 'b'])
  817. self.assertNotEqual(res.children, ['ab'])
  818. grammar = """
  819. start: A B | AB
  820. A: "a"
  821. B: "b"
  822. AB.3: "ab"
  823. """
  824. l = _Lark(grammar)
  825. res = l.parse("ab")
  826. self.assertNotEqual(res.children, ['a', 'b'])
  827. self.assertEqual(res.children, ['ab'])
  828. grammar = """
  829. start: A B | AB
  830. A: "a"
  831. B.-20: "b"
  832. AB.-10: "ab"
  833. """
  834. l = _Lark(grammar)
  835. res = l.parse("ab")
  836. self.assertEqual(res.children, ['a', 'b'])
  837. grammar = """
  838. start: A B | AB
  839. A.-99999999999999999999999: "a"
  840. B: "b"
  841. AB: "ab"
  842. """
  843. l = _Lark(grammar)
  844. res = l.parse("ab")
  845. self.assertEqual(res.children, ['ab'])
  846. def test_import(self):
  847. grammar = """
  848. start: NUMBER WORD
  849. %import common.NUMBER
  850. %import common.WORD
  851. %import common.WS
  852. %ignore WS
  853. """
  854. l = _Lark(grammar)
  855. x = l.parse('12 elephants')
  856. self.assertEqual(x.children, ['12', 'elephants'])
  857. def test_import_rename(self):
  858. grammar = """
  859. start: N W
  860. %import common.NUMBER -> N
  861. %import common.WORD -> W
  862. %import common.WS
  863. %ignore WS
  864. """
  865. l = _Lark(grammar)
  866. x = l.parse('12 elephants')
  867. self.assertEqual(x.children, ['12', 'elephants'])
  868. def test_relative_import(self):
  869. l = _Lark_open('test_relative_import.lark', rel_to=__file__)
  870. x = l.parse('12 lions')
  871. self.assertEqual(x.children, ['12', 'lions'])
  872. def test_relative_import_rename(self):
  873. l = _Lark_open('test_relative_import_rename.lark', rel_to=__file__)
  874. x = l.parse('12 lions')
  875. self.assertEqual(x.children, ['12', 'lions'])
  876. def test_relative_rule_import(self):
  877. l = _Lark_open('test_relative_rule_import.lark', rel_to=__file__)
  878. x = l.parse('xaabby')
  879. self.assertEqual(x.children, [
  880. 'x',
  881. Tree('expr', ['a', Tree('expr', ['a', 'b']), 'b']),
  882. 'y'])
  883. def test_relative_rule_import_drop_ignore(self):
  884. # %ignore rules are dropped on import
  885. l = _Lark_open('test_relative_rule_import_drop_ignore.lark',
  886. rel_to=__file__)
  887. self.assertRaises((ParseError, UnexpectedInput),
  888. l.parse, 'xa abby')
  889. def test_relative_rule_import_subrule(self):
  890. l = _Lark_open('test_relative_rule_import_subrule.lark',
  891. rel_to=__file__)
  892. x = l.parse('xaabby')
  893. self.assertEqual(x.children, [
  894. 'x',
  895. Tree('startab', [
  896. Tree('grammars__ab__expr', [
  897. 'a', Tree('grammars__ab__expr', ['a', 'b']), 'b',
  898. ]),
  899. ]),
  900. 'y'])
  901. def test_relative_rule_import_subrule_no_conflict(self):
  902. l = _Lark_open(
  903. 'test_relative_rule_import_subrule_no_conflict.lark',
  904. rel_to=__file__)
  905. x = l.parse('xaby')
  906. self.assertEqual(x.children, [Tree('expr', [
  907. 'x',
  908. Tree('startab', [
  909. Tree('grammars__ab__expr', ['a', 'b']),
  910. ]),
  911. 'y'])])
  912. self.assertRaises((ParseError, UnexpectedInput),
  913. l.parse, 'xaxabyby')
  914. def test_relative_rule_import_rename(self):
  915. l = _Lark_open('test_relative_rule_import_rename.lark',
  916. rel_to=__file__)
  917. x = l.parse('xaabby')
  918. self.assertEqual(x.children, [
  919. 'x',
  920. Tree('ab', ['a', Tree('ab', ['a', 'b']), 'b']),
  921. 'y'])
  922. def test_multi_import(self):
  923. grammar = """
  924. start: NUMBER WORD
  925. %import common (NUMBER, WORD, WS)
  926. %ignore WS
  927. """
  928. l = _Lark(grammar)
  929. x = l.parse('12 toucans')
  930. self.assertEqual(x.children, ['12', 'toucans'])
  931. def test_relative_multi_import(self):
  932. l = _Lark_open("test_relative_multi_import.lark", rel_to=__file__)
  933. x = l.parse('12 capybaras')
  934. self.assertEqual(x.children, ['12', 'capybaras'])
  935. def test_relative_import_preserves_leading_underscore(self):
  936. l = _Lark_open("test_relative_import_preserves_leading_underscore.lark", rel_to=__file__)
  937. x = l.parse('Ax')
  938. self.assertEqual(next(x.find_data('c')).children, ['A'])
  939. def test_relative_import_of_nested_grammar(self):
  940. l = _Lark_open("grammars/test_relative_import_of_nested_grammar.lark", rel_to=__file__)
  941. x = l.parse('N')
  942. self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])
  943. def test_relative_import_rules_dependencies_imported_only_once(self):
  944. l = _Lark_open("test_relative_import_rules_dependencies_imported_only_once.lark", rel_to=__file__)
  945. x = l.parse('AAA')
  946. self.assertEqual(next(x.find_data('a')).children, ['A'])
  947. self.assertEqual(next(x.find_data('b')).children, ['A'])
  948. self.assertEqual(next(x.find_data('d')).children, ['A'])
  949. def test_import_errors(self):
  950. grammar = """
  951. start: NUMBER WORD
  952. %import .grammars.bad_test.NUMBER
  953. """
  954. self.assertRaises(IOError, _Lark, grammar)
  955. grammar = """
  956. start: NUMBER WORD
  957. %import bad_test.NUMBER
  958. """
  959. self.assertRaises(IOError, _Lark, grammar)
  960. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  961. def test_earley_prioritization(self):
  962. "Tests effect of priority on result"
  963. grammar = """
  964. start: a | b
  965. a.1: "a"
  966. b.2: "a"
  967. """
  968. # l = Lark(grammar, parser='earley', lexer='standard')
  969. l = _Lark(grammar)
  970. res = l.parse("a")
  971. self.assertEqual(res.children[0].data, 'b')
  972. grammar = """
  973. start: a | b
  974. a.2: "a"
  975. b.1: "a"
  976. """
  977. l = _Lark(grammar)
  978. # l = Lark(grammar, parser='earley', lexer='standard')
  979. res = l.parse("a")
  980. self.assertEqual(res.children[0].data, 'a')
  981. @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules")
  982. def test_earley_prioritization_sum(self):
  983. "Tests effect of priority on result"
  984. grammar = """
  985. start: ab_ b_ a_ | indirection
  986. indirection: a_ bb_ a_
  987. a_: "a"
  988. b_: "b"
  989. ab_: "ab"
  990. bb_.1: "bb"
  991. """
  992. l = Lark(grammar, priority="invert")
  993. res = l.parse('abba')
  994. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  995. grammar = """
  996. start: ab_ b_ a_ | indirection
  997. indirection: a_ bb_ a_
  998. a_: "a"
  999. b_: "b"
  1000. ab_.1: "ab"
  1001. bb_: "bb"
  1002. """
  1003. l = Lark(grammar, priority="invert")
  1004. res = l.parse('abba')
  1005. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1006. grammar = """
  1007. start: ab_ b_ a_ | indirection
  1008. indirection: a_ bb_ a_
  1009. a_.2: "a"
  1010. b_.1: "b"
  1011. ab_.3: "ab"
  1012. bb_.3: "bb"
  1013. """
  1014. l = Lark(grammar, priority="invert")
  1015. res = l.parse('abba')
  1016. self.assertEqual(''.join(child.data for child in res.children), 'ab_b_a_')
  1017. grammar = """
  1018. start: ab_ b_ a_ | indirection
  1019. indirection: a_ bb_ a_
  1020. a_.1: "a"
  1021. b_.1: "b"
  1022. ab_.4: "ab"
  1023. bb_.3: "bb"
  1024. """
  1025. l = Lark(grammar, priority="invert")
  1026. res = l.parse('abba')
  1027. self.assertEqual(''.join(child.data for child in res.children), 'indirection')
  1028. def test_utf8(self):
  1029. g = u"""start: a
  1030. a: "±a"
  1031. """
  1032. l = _Lark(g)
  1033. self.assertEqual(l.parse(u'±a'), Tree('start', [Tree('a', [])]))
  1034. g = u"""start: A
  1035. A: "±a"
  1036. """
  1037. l = _Lark(g)
  1038. self.assertEqual(l.parse(u'±a'), Tree('start', [u'\xb1a']))
  1039. @unittest.skipIf(PARSER == 'cyk', "No empty rules")
  1040. def test_ignore(self):
  1041. grammar = r"""
  1042. COMMENT: /(!|(\/\/))[^\n]*/
  1043. %ignore COMMENT
  1044. %import common.WS -> _WS
  1045. %import common.INT
  1046. start: "INT"i _WS+ INT _WS*
  1047. """
  1048. parser = _Lark(grammar)
  1049. tree = parser.parse("int 1 ! This is a comment\n")
  1050. self.assertEqual(tree.children, ['1'])
  1051. tree = parser.parse("int 1 ! This is a comment") # A trailing ignore token can be tricky!
  1052. self.assertEqual(tree.children, ['1'])
  1053. parser = _Lark(r"""
  1054. start : "a"*
  1055. %ignore "b"
  1056. """)
  1057. tree = parser.parse("bb")
  1058. self.assertEqual(tree.children, [])
  1059. def test_regex_escaping(self):
  1060. g = _Lark("start: /[ab]/")
  1061. g.parse('a')
  1062. g.parse('b')
  1063. self.assertRaises( UnexpectedInput, g.parse, 'c')
  1064. _Lark(r'start: /\w/').parse('a')
  1065. g = _Lark(r'start: /\\w/')
  1066. self.assertRaises( UnexpectedInput, g.parse, 'a')
  1067. g.parse(r'\w')
  1068. _Lark(r'start: /\[/').parse('[')
  1069. _Lark(r'start: /\//').parse('/')
  1070. _Lark(r'start: /\\/').parse('\\')
  1071. _Lark(r'start: /\[ab]/').parse('[ab]')
  1072. _Lark(r'start: /\\[ab]/').parse('\\a')
  1073. _Lark(r'start: /\t/').parse('\t')
  1074. _Lark(r'start: /\\t/').parse('\\t')
  1075. _Lark(r'start: /\\\t/').parse('\\\t')
  1076. _Lark(r'start: "\t"').parse('\t')
  1077. _Lark(r'start: "\\t"').parse('\\t')
  1078. _Lark(r'start: "\\\t"').parse('\\\t')
  1079. def test_ranged_repeat_rules(self):
  1080. g = u"""!start: "A"~3
  1081. """
  1082. l = _Lark(g)
  1083. self.assertEqual(l.parse(u'AAA'), Tree('start', ["A", "A", "A"]))
  1084. self.assertRaises(ParseError, l.parse, u'AA')
  1085. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1086. g = u"""!start: "A"~0..2
  1087. """
  1088. if PARSER != 'cyk': # XXX CYK currently doesn't support empty grammars
  1089. l = _Lark(g)
  1090. self.assertEqual(l.parse(u''), Tree('start', []))
  1091. self.assertEqual(l.parse(u'A'), Tree('start', ['A']))
  1092. self.assertEqual(l.parse(u'AA'), Tree('start', ['A', 'A']))
  1093. self.assertRaises((UnexpectedToken, UnexpectedInput), l.parse, u'AAA')
  1094. g = u"""!start: "A"~3..2
  1095. """
  1096. self.assertRaises(GrammarError, _Lark, g)
  1097. g = u"""!start: "A"~2..3 "B"~2
  1098. """
  1099. l = _Lark(g)
  1100. self.assertEqual(l.parse(u'AABB'), Tree('start', ['A', 'A', 'B', 'B']))
  1101. self.assertEqual(l.parse(u'AAABB'), Tree('start', ['A', 'A', 'A', 'B', 'B']))
  1102. self.assertRaises(ParseError, l.parse, u'AAAB')
  1103. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1104. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1105. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1106. def test_ranged_repeat_terms(self):
  1107. g = u"""!start: AAA
  1108. AAA: "A"~3
  1109. """
  1110. l = _Lark(g)
  1111. self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"]))
  1112. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA')
  1113. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
  1114. g = u"""!start: AABB CC
  1115. AABB: "A"~0..2 "B"~2
  1116. CC: "C"~1..2
  1117. """
  1118. l = _Lark(g)
  1119. self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC']))
  1120. self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C']))
  1121. self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC']))
  1122. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB')
  1123. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
  1124. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
  1125. self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
  1126. @unittest.skipIf(PARSER=='earley', "Priority not handled correctly right now") # TODO XXX
  1127. def test_priority_vs_embedded(self):
  1128. g = """
  1129. A.2: "a"
  1130. WORD: ("a".."z")+
  1131. start: (A | WORD)+
  1132. """
  1133. l = _Lark(g)
  1134. t = l.parse('abc')
  1135. self.assertEqual(t.children, ['a', 'bc'])
  1136. self.assertEqual(t.children[0].type, 'A')
  1137. def test_line_counting(self):
  1138. p = _Lark("start: /[^x]+/")
  1139. text = 'hello\nworld'
  1140. t = p.parse(text)
  1141. tok = t.children[0]
  1142. self.assertEqual(tok, text)
  1143. self.assertEqual(tok.line, 1)
  1144. self.assertEqual(tok.column, 1)
  1145. if _LEXER != 'dynamic':
  1146. self.assertEqual(tok.end_line, 2)
  1147. self.assertEqual(tok.end_column, 6)
  1148. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1149. def test_empty_end(self):
  1150. p = _Lark("""
  1151. start: b c d
  1152. b: "B"
  1153. c: | "C"
  1154. d: | "D"
  1155. """)
  1156. res = p.parse('B')
  1157. self.assertEqual(len(res.children), 3)
  1158. @unittest.skipIf(PARSER=='cyk', "Empty rules")
  1159. def test_maybe_placeholders(self):
  1160. # Anonymous tokens shouldn't count
  1161. p = _Lark("""start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1162. self.assertEqual(p.parse("").children, [])
  1163. # All invisible constructs shouldn't count
  1164. p = _Lark("""start: [A] ["b"] [_c] ["e" "f" _c]
  1165. A: "a"
  1166. _c: "c" """, maybe_placeholders=True)
  1167. self.assertEqual(p.parse("").children, [None])
  1168. self.assertEqual(p.parse("c").children, [None])
  1169. self.assertEqual(p.parse("aefc").children, ['a'])
  1170. # ? shouldn't apply
  1171. p = _Lark("""!start: ["a"] "b"? ["c"] """, maybe_placeholders=True)
  1172. self.assertEqual(p.parse("").children, [None, None])
  1173. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1174. p = _Lark("""!start: ["a"] ["b"] ["c"] """, maybe_placeholders=True)
  1175. self.assertEqual(p.parse("").children, [None, None, None])
  1176. self.assertEqual(p.parse("a").children, ['a', None, None])
  1177. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1178. self.assertEqual(p.parse("c").children, [None, None, 'c'])
  1179. self.assertEqual(p.parse("ab").children, ['a', 'b', None])
  1180. self.assertEqual(p.parse("ac").children, ['a', None, 'c'])
  1181. self.assertEqual(p.parse("bc").children, [None, 'b', 'c'])
  1182. self.assertEqual(p.parse("abc").children, ['a', 'b', 'c'])
  1183. p = _Lark("""!start: (["a"] "b" ["c"])+ """, maybe_placeholders=True)
  1184. self.assertEqual(p.parse("b").children, [None, 'b', None])
  1185. self.assertEqual(p.parse("bb").children, [None, 'b', None, None, 'b', None])
  1186. self.assertEqual(p.parse("abbc").children, ['a', 'b', None, None, 'b', 'c'])
  1187. self.assertEqual(p.parse("babbcabcb").children,
  1188. [None, 'b', None,
  1189. 'a', 'b', None,
  1190. None, 'b', 'c',
  1191. 'a', 'b', 'c',
  1192. None, 'b', None])
  1193. p = _Lark("""!start: ["a"] ["c"] "b"+ ["a"] ["d"] """, maybe_placeholders=True)
  1194. self.assertEqual(p.parse("bb").children, [None, None, 'b', 'b', None, None])
  1195. self.assertEqual(p.parse("bd").children, [None, None, 'b', None, 'd'])
  1196. self.assertEqual(p.parse("abba").children, ['a', None, 'b', 'b', 'a', None])
  1197. self.assertEqual(p.parse("cbbbb").children, [None, 'c', 'b', 'b', 'b', 'b', None, None])
  1198. def test_escaped_string(self):
  1199. "Tests common.ESCAPED_STRING"
  1200. grammar = r"""
  1201. start: ESCAPED_STRING+
  1202. %import common (WS_INLINE, ESCAPED_STRING)
  1203. %ignore WS_INLINE
  1204. """
  1205. parser = _Lark(grammar)
  1206. parser.parse(r'"\\" "b" "c"')
  1207. parser.parse(r'"That" "And a \"b"')
  1208. def test_meddling_unused(self):
  1209. "Unless 'unused' is removed, LALR analysis will fail on reduce-reduce collision"
  1210. grammar = """
  1211. start: EKS* x
  1212. x: EKS
  1213. unused: x*
  1214. EKS: "x"
  1215. """
  1216. parser = _Lark(grammar)
  1217. @unittest.skipIf(PARSER!='lalr' or LEXER=='custom', "Serialize currently only works for LALR parsers without custom lexers (though it should be easy to extend)")
  1218. def test_serialize(self):
  1219. grammar = """
  1220. start: _ANY b "C"
  1221. _ANY: /./
  1222. b: "B"
  1223. """
  1224. parser = _Lark(grammar)
  1225. d = parser.serialize()
  1226. parser2 = Lark.deserialize(d, {}, {})
  1227. self.assertEqual(parser2.parse('ABC'), Tree('start', [Tree('b', [])]) )
  1228. namespace = {'Rule': Rule, 'TerminalDef': TerminalDef}
  1229. d, m = parser.memo_serialize(namespace.values())
  1230. parser3 = Lark.deserialize(d, namespace, m)
  1231. self.assertEqual(parser3.parse('ABC'), Tree('start', [Tree('b', [])]) )
  1232. def test_multi_start(self):
  1233. parser = _Lark('''
  1234. a: "x" "a"?
  1235. b: "x" "b"?
  1236. ''', start=['a', 'b'])
  1237. self.assertEqual(parser.parse('xa', 'a'), Tree('a', []))
  1238. self.assertEqual(parser.parse('xb', 'b'), Tree('b', []))
  1239. def test_lexer_detect_newline_tokens(self):
  1240. # Detect newlines in regular tokens
  1241. g = _Lark(r"""start: "go" tail*
  1242. !tail : SA "@" | SB "@" | SC "@" | SD "@"
  1243. SA : "a" /\n/
  1244. SB : /b./s
  1245. SC : "c" /[^a-z]/
  1246. SD : "d" /\s/
  1247. """)
  1248. a,b,c,d = [x.children[1] for x in g.parse('goa\n@b\n@c\n@d\n@').children]
  1249. self.assertEqual(a.line, 2)
  1250. self.assertEqual(b.line, 3)
  1251. self.assertEqual(c.line, 4)
  1252. self.assertEqual(d.line, 5)
  1253. # Detect newlines in ignored tokens
  1254. for re in ['/\\n/', '/[^a-z]/', '/\\s/']:
  1255. g = _Lark('''!start: "a" "a"
  1256. %ignore {}'''.format(re))
  1257. a, b = g.parse('a\na').children
  1258. self.assertEqual(a.line, 1)
  1259. self.assertEqual(b.line, 2)
  1260. _NAME = "Test" + PARSER.capitalize() + LEXER.capitalize()
  1261. _TestParser.__name__ = _NAME
  1262. globals()[_NAME] = _TestParser
  1263. # Note: You still have to import them in __main__ for the tests to run
  1264. _TO_TEST = [
  1265. ('standard', 'earley'),
  1266. ('standard', 'cyk'),
  1267. ('dynamic', 'earley'),
  1268. ('dynamic_complete', 'earley'),
  1269. ('standard', 'lalr'),
  1270. ('contextual', 'lalr'),
  1271. ('custom', 'lalr'),
  1272. # (None, 'earley'),
  1273. ]
  1274. for _LEXER, _PARSER in _TO_TEST:
  1275. _make_parser_test(_LEXER, _PARSER)
  1276. for _LEXER in ('dynamic', 'dynamic_complete'):
  1277. _make_full_earley_test(_LEXER)
  1278. if __name__ == '__main__':
  1279. unittest.main()