Improved error reporting in Earley

7 lat temu · 1602482608
--- a/README.md
+++ b/README.md
@@ -43,7 +43,7 @@ Notice punctuation doesn't appear in the resulting tree. It's automatically filt

 ## Learn more about using Lark

 - Read the [tutorial](/docs/json_tutorial.md), which shows how to write a JSON parser in Lark.
 - **Read the [tutorial](/docs/json_tutorial.md)**, which shows how to write a JSON parser in Lark.
 - Read the [reference](/docs/reference.md)
 - Browse the [examples](/examples), which include a calculator, and a Python-code parser.
 - Check out the [tests](/tests/test_parser.py) for more examples.
@@ -72,6 +72,7 @@ These features are planned to be implemented in the near future:
 - Grammar composition (in cases that the tokens can reliably signify a grammar change)
 - Parser generator - create a small parser, indepdendent of Lark, to embed in your project.
 - Optimizations in both the parsers and the lexer
 - Better handling of ambiguity

 ## Comparison to other parsers

--- a/lark/common.py
+++ b/lark/common.py
@@ -6,6 +6,22 @@ class ParseError(Exception):
    pass


 class UnexpectedToken(ParseError):
    def __init__(self, token, expected, seq, index):
        self.token = token
        self.expected = expected
        self.line = getattr(token, 'line', '?')
        self.column = getattr(token, 'column', '?')

        context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
        message = ("Unexpected token %r at line %s, column %s.\n"
                   "Expected: %s\n"
                   "Context: %s" % (token.value, self.line, self.column, expected, context))

        super(ParseError, self).__init__(message)



 def is_terminal(sym):
    return sym.isupper() or sym[0] == '$'

--- a/lark/parsers/earley.py
+++ b/lark/parsers/earley.py
@@ -1,7 +1,7 @@
 "My name is Earley"

 from ..utils import classify, STRING_TYPE
 from ..common import ParseError
 from ..common import ParseError, UnexpectedToken

 try:
    xrange
@@ -14,6 +14,7 @@ class MatchFailed(object):
 class AbortParseMatch(Exception):
    pass


 class Rule(object):
    def __init__(self, name, symbols, postprocess):
        self.name = name
@@ -34,6 +35,8 @@ class State(object):
            if self.is_literal:
                self.expect_symbol = self.expect_symbol['literal']
            assert isinstance(self.expect_symbol, STRING_TYPE), self.expect_symbol
        else:
            self.is_literal = False

    def next_state(self, data):
        return State(self.rule, self.expect+1, self.reference, self.data + [data])
@@ -136,7 +139,8 @@ class Parser(object):
            self.advance_to(table, pos + 1, set())

            if not table[-1]:
                raise ParseError('Error at line {t.line}:{t.column}'.format(t=stream[pos]))
                expected = {s.expect_symbol for s in table[-2] if s.is_literal}
                raise UnexpectedToken(stream[pos], expected, stream, pos)

        res = list(self.finish(table))
        if not res:
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -1,21 +1,5 @@
 from .lalr_analysis import ACTION_SHIFT
 from ..common import ParseError

 class UnexpectedToken(ParseError):
    def __init__(self, token, expected, seq, index):
        self.token = token
        self.expected = expected
        self.line = getattr(token, 'line', '?')
        self.column = getattr(token, 'column', '?')

        context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
        message = ("Unexpected input %r at line %s, column %s.\n"
                   "Expected: %s\n"
                   "Context: %s" % (token.value, self.line, self.column, expected, context))

        super(ParseError, self).__init__(message)


 from ..common import ParseError, UnexpectedToken


 class Parser(object):