|
- from __future__ import absolute_import
-
- import sys
- from unittest import TestCase, main
-
- from lark import Lark, Token, Tree, ParseError, UnexpectedInput
- from lark.load_grammar import GrammarError, GRAMMAR_ERRORS, find_grammar_errors
- from lark.load_grammar import FromPackageLoader
-
-
- class TestGrammar(TestCase):
- def setUp(self):
- pass
-
- def test_errors(self):
- for msg, examples in GRAMMAR_ERRORS:
- for example in examples:
- try:
- p = Lark(example)
- except GrammarError as e:
- assert msg in str(e)
- else:
- assert False, "example did not raise an error"
-
- def test_empty_literal(self):
- # Issues #888
- self.assertRaises(GrammarError, Lark, "start: \"\"")
-
- def test_override_rule(self):
- # Overrides the 'sep' template in existing grammar to add an optional terminating delimiter
- # Thus extending it beyond its original capacity
- p = Lark("""
- %import .test_templates_import (start, sep)
-
- %override sep{item, delim}: item (delim item)* delim?
- %ignore " "
- """, source_path=__file__)
-
- a = p.parse('[1, 2, 3]')
- b = p.parse('[1, 2, 3, ]')
- assert a == b
-
- self.assertRaises(GrammarError, Lark, """
- %import .test_templates_import (start, sep)
-
- %override sep{item}: item (delim item)* delim?
- """, source_path=__file__)
-
- self.assertRaises(GrammarError, Lark, """
- %override sep{item}: item (delim item)* delim?
- """, source_path=__file__)
-
- def test_override_terminal(self):
- p = Lark("""
-
- %import .grammars.ab (startab, A, B)
-
- %override A: "c"
- %override B: "d"
- """, start='startab', source_path=__file__)
-
- a = p.parse('cd')
- self.assertEqual(a.children[0].children, [Token('A', 'c'), Token('B', 'd')])
-
- def test_extend_rule(self):
- p = Lark("""
- %import .grammars.ab (startab, A, B, expr)
-
- %extend expr: B A
- """, start='startab', source_path=__file__)
- a = p.parse('abab')
- self.assertEqual(a.children[0].children, ['a', Tree('expr', ['b', 'a']), 'b'])
-
- self.assertRaises(GrammarError, Lark, """
- %extend expr: B A
- """)
-
- def test_extend_term(self):
- p = Lark("""
- %import .grammars.ab (startab, A, B, expr)
-
- %extend A: "c"
- """, start='startab', source_path=__file__)
- a = p.parse('acbb')
- self.assertEqual(a.children[0].children, ['a', Tree('expr', ['c', 'b']), 'b'])
-
- def test_extend_twice(self):
- p = Lark("""
- start: x+
-
- x: "a"
- %extend x: "b"
- %extend x: "c"
- """)
-
- assert p.parse("abccbba") == p.parse("cbabbbb")
-
- def test_undefined_ignore(self):
- g = """!start: "A"
-
- %ignore B
- """
- self.assertRaises( GrammarError, Lark, g)
-
- g = """!start: "A"
-
- %ignore start
- """
- self.assertRaises( GrammarError, Lark, g)
-
- def test_alias_in_terminal(self):
- g = """start: TERM
- TERM: "a" -> alias
- """
- self.assertRaises( GrammarError, Lark, g)
-
- def test_undefined_rule(self):
- self.assertRaises(GrammarError, Lark, """start: a""")
-
- def test_undefined_term(self):
- self.assertRaises(GrammarError, Lark, """start: A""")
-
- def test_token_multiline_only_works_with_x_flag(self):
- g = r"""start: ABC
- ABC: / a b c
- d
- e f
- /i
- """
- self.assertRaises( GrammarError, Lark, g)
-
- def test_import_custom_sources(self):
- custom_loader = FromPackageLoader('tests', ('grammars', ))
-
- grammar = """
- start: startab
-
- %import ab.startab
- """
-
- p = Lark(grammar, import_paths=[custom_loader])
- self.assertEqual(p.parse('ab'),
- Tree('start', [Tree('startab', [Tree('ab__expr', [Token('ab__A', 'a'), Token('ab__B', 'b')])])]))
-
- def test_import_custom_sources2(self):
- custom_loader = FromPackageLoader('tests', ('grammars', ))
-
- grammar = """
- start: rule_to_import
-
- %import test_relative_import_of_nested_grammar__grammar_to_import.rule_to_import
- """
- p = Lark(grammar, import_paths=[custom_loader])
- x = p.parse('N')
- self.assertEqual(next(x.find_data('rule_to_import')).children, ['N'])
-
- def test_import_custom_sources3(self):
- custom_loader2 = FromPackageLoader('tests')
- grammar = """
- %import .test_relative_import (start, WS)
- %ignore WS
- """
- p = Lark(grammar, import_paths=[custom_loader2], source_path=__file__) # import relative to current file
- x = p.parse('12 capybaras')
- self.assertEqual(x.children, ['12', 'capybaras'])
-
- def test_find_grammar_errors(self):
- text = """
- a: rule
- b rule
- c: rule
- B.: "hello" f
- D: "okay"
- """
-
- assert [e.line for e, _s in find_grammar_errors(text)] == [3, 5]
-
- text = """
- a: rule
- b rule
- | ok
- c: rule
- B.: "hello" f
- D: "okay"
- """
-
- assert [e.line for e, _s in find_grammar_errors(text)] == [3, 4, 6]
-
- text = """
- a: rule @#$#@$@&&
- b: rule
- | ok
- c: rule
- B: "hello" f @
- D: "okay"
- """
-
- x = find_grammar_errors(text)
- assert [e.line for e, _s in find_grammar_errors(text)] == [2, 6]
-
- def test_ranged_repeat_terms(self):
- g = u"""!start: AAA
- AAA: "A"~3
- """
- l = Lark(g, parser='lalr')
- self.assertEqual(l.parse(u'AAA'), Tree('start', ["AAA"]))
- self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AA')
- self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAA')
-
- g = u"""!start: AABB CC
- AABB: "A"~0..2 "B"~2
- CC: "C"~1..2
- """
- l = Lark(g, parser='lalr')
- self.assertEqual(l.parse(u'AABBCC'), Tree('start', ['AABB', 'CC']))
- self.assertEqual(l.parse(u'BBC'), Tree('start', ['BB', 'C']))
- self.assertEqual(l.parse(u'ABBCC'), Tree('start', ['ABB', 'CC']))
- self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAB')
- self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAABBB')
- self.assertRaises((ParseError, UnexpectedInput), l.parse, u'ABB')
- self.assertRaises((ParseError, UnexpectedInput), l.parse, u'AAAABB')
-
- def test_ranged_repeat_large(self):
- g = u"""!start: "A"~60
- """
- l = Lark(g, parser='lalr')
- self.assertGreater(len(l.rules), 1, "Expected that more than one rule will be generated")
- self.assertEqual(l.parse(u'A' * 60), Tree('start', ["A"] * 60))
- self.assertRaises(ParseError, l.parse, u'A' * 59)
- self.assertRaises((ParseError, UnexpectedInput), l.parse, u'A' * 61)
-
- g = u"""!start: "A"~15..100
- """
- l = Lark(g, parser='lalr')
- for i in range(0, 110):
- if 15 <= i <= 100:
- self.assertEqual(l.parse(u'A' * i), Tree('start', ['A']*i))
- else:
- self.assertRaises(UnexpectedInput, l.parse, u'A' * i)
-
- # 8191 is a Mersenne prime
- g = u"""start: "A"~8191
- """
- l = Lark(g, parser='lalr')
- self.assertEqual(l.parse(u'A' * 8191), Tree('start', []))
- self.assertRaises(UnexpectedInput, l.parse, u'A' * 8190)
- self.assertRaises(UnexpectedInput, l.parse, u'A' * 8192)
-
- def test_large_terminal(self):
- # TODO: The `reversed` below is required because otherwise the regex engine is happy
- # with just parsing 9 from the string 999 instead of consuming the longest
- g = "start: NUMBERS\n"
- g += "NUMBERS: " + '|'.join('"%s"' % i for i in reversed(range(0, 1000)))
-
- l = Lark(g, parser='lalr')
- for i in (0, 9, 99, 999):
- self.assertEqual(l.parse(str(i)), Tree('start', [str(i)]))
- for i in (-1, 1000):
- self.assertRaises(UnexpectedInput, l.parse, str(i))
-
-
- if __name__ == '__main__':
- main()
-
-
|