diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 7e83e59..8b2eaa8 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -789,6 +789,20 @@ def _find_used_symbols(tree): for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} class GrammarLoader: + ERRORS = { + 'Unclosed parenthesis': ['a: (\n'], + 'Umatched closing parenthesis': ['a: )\n', 'a: [)\n', 'a: (]\n'], + 'Expecting rule or terminal definition (missing colon)': ['a\n', 'A\n', 'a->\n', 'A->\n', 'a A\n'], + 'Illegal name for rules or terminals': ['Aa:\n'], + 'Alias expects lowercase name': ['a: -> "a"\n'], + 'Unexpected colon': ['a::\n', 'a: b:\n', 'a: B:\n', 'a: "a":\n'], + 'Misplaced operator': ['a: b??', 'a: b(?)', 'a:+\n', 'a:?\n', 'a:*\n', 'a:|*\n'], + 'Expecting option ("|") or a new rule or terminal definition': ['a:a\n()\n'], + 'Terminal names cannot contain dots': ['A.B\n'], + '%import expects a name': ['%import "a"\n'], + '%ignore expects a value': ['%ignore %import\n'], + } + def __init__(self, re_module): terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] @@ -814,19 +828,9 @@ class GrammarLoader: (e.line, e.column, grammar_name, context)) except UnexpectedToken as e: context = e.get_context(grammar_text) - error = e.match_examples(self.parser.parse, { - 'Unclosed parenthesis': ['a: (\n'], - 'Umatched closing parenthesis': ['a: )\n', 'a: [)\n', 'a: (]\n'], - 'Expecting rule or terminal definition (missing colon)': ['a\n', 'a->\n', 'A->\n', 'a A\n'], - 'Alias expects lowercase name': ['a: -> "a"\n'], - 'Unexpected colon': ['a::\n', 'a: b:\n', 'a: B:\n', 'a: "a":\n'], - 'Misplaced operator': ['a: b??', 'a: b(?)', 'a:+\n', 'a:?\n', 'a:*\n', 'a:|*\n'], - 'Expecting option ("|") or a new rule or terminal definition': ['a:a\n()\n'], - '%import expects a name': ['%import "a"\n'], - '%ignore expects a value': ['%ignore %import\n'], - }) + error = e.match_examples(self.parser.parse, self.ERRORS, use_accepts=True) if error: - raise GrammarError("%s at line %s column %s\n\n%s" % (error, e.line, e.column, context)) + raise GrammarError("%s, at line %s column %s\n\n%s" % (error, e.line, e.column, context)) elif 'STRING' in e.expected: raise GrammarError("Expecting a value at line %s column %s\n\n%s" % (e.line, e.column, context)) raise diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py index 94c32cc..737cb02 100644 --- a/lark/parsers/grammar_analysis.py +++ b/lark/parsers/grammar_analysis.py @@ -138,7 +138,7 @@ class GrammarAnalyzer(object): for r in rules: for sym in r.expansion: if not (sym.is_term or sym in self.rules_by_origin): - raise GrammarError("Using an undefined rule: %s" % sym) # TODO test validation + raise GrammarError("Using an undefined rule: %s" % sym) self.start_states = {start: self.expand_rule(root_rule.origin) for start, root_rule in root_rules.items()} diff --git a/tests/__main__.py b/tests/__main__.py index 9ef9f1b..5ec89e3 100644 --- a/tests/__main__.py +++ b/tests/__main__.py @@ -7,6 +7,7 @@ from lark import logger from .test_trees import TestTrees from .test_tools import TestStandalone from .test_cache import TestCache +from .test_grammar import TestGrammar from .test_reconstructor import TestReconstructor try: diff --git a/tests/test_grammar.py b/tests/test_grammar.py new file mode 100644 index 0000000..88c8e22 --- /dev/null +++ b/tests/test_grammar.py @@ -0,0 +1,31 @@ +from __future__ import absolute_import + +import sys +from unittest import TestCase, main + +from lark import Lark +from lark.load_grammar import GrammarLoader, GrammarError + + +class TestGrammar(TestCase): + def setUp(self): + pass + + def test_errors(self): + for msg, examples in GrammarLoader.ERRORS.items(): + for example in examples: + try: + p = Lark(example) + except GrammarError as e: + assert msg in str(e) + else: + assert False, "example did not raise an error" + + + + +if __name__ == '__main__': + main() + + +