Small adjustments to PR

4 years ago · 02d57bc32a
--- a/lark-stubs/parsers/lalr_puppet.pyi
+++ b/lark-stubs/parsers/lalr_puppet.pyi
@@ -5,8 +5,9 @@ from lark import Token, Tree

 class ParserPuppet(object):
    """
    Represents a LalrParser that can be step through.
    Shouldn't instantiated by hand, but is accessible as `UnexpectedToken.puppet`
    Provides an interface to interactively step through the parser (LALR(1) only for now)

    Accessible via `UnexpectedToken.puppet` (raised by the parser on token error)
    """
    def feed_token(self, token: Token): ...

--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -45,7 +45,7 @@ class UnexpectedInput(LarkError):
            example that bests matches the current error.
        """
        assert self.state is not None, "Not supported for this exception"
        

        if isinstance(examples, dict):
            examples = examples.items()

@@ -57,7 +57,11 @@ class UnexpectedInput(LarkError):
                try:
                    parse_fn(malformed)
                except UnexpectedInput as ut:
                    if ut.state == self.state and (not use_accepts or ut.accepts == self.accepts):
                    if ut.state == self.state:
                        if use_accepts and ut.accepts != self.accepts:
                            logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
                                        (self.state, self.accepts, ut.accepts, i, j))
                            continue
                        try:
                            if ut.token == self.token:  # Try exact match first
                                logging.debug("Exact Match at example [%s][%s]" % (i, j))
@@ -74,27 +78,25 @@ class UnexpectedInput(LarkError):
                        if not candidate[0]:
                            logging.debug("Same State match at example [%s][%s]" % (i, j))
                            candidate = label, False
                    elif ut.state == self.state:
                        logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
                                      (self.state, self.accepts, ut.accepts, i, j))

        return candidate[0]


 class UnexpectedCharacters(LexError, UnexpectedInput):
    def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):
        self.line = line
        self.column = column
        self.pos_in_stream = lex_pos
        self.state = state

        self.allowed = allowed
        self.considered_tokens = considered_tokens

        if isinstance(seq, bytes):
            message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos:lex_pos+1].decode("ascii", "backslashreplace"), line, column)
        else:
            message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column)

        self.line = line
        self.column = column
        self.allowed = allowed
        self.considered_tokens = considered_tokens
        self.pos_in_stream = lex_pos
        self.state = state

        message += '\n\n' + self.get_context(seq)
        if allowed:
            message += '\nExpecting: %s\n' % allowed
@@ -106,16 +108,20 @@ class UnexpectedCharacters(LexError, UnexpectedInput):


 class UnexpectedToken(ParseError, UnexpectedInput):
    def __init__(self, token, expected, considered_rules=None, state=None, puppet=None, accepts=None):
        self.token = token
        self.expected = expected     # XXX str shouldn't necessary
    def __init__(self, token, expected, considered_rules=None, state=None, puppet=None):
        self.line = getattr(token, 'line', '?')
        self.column = getattr(token, 'column', '?')
        self.considered_rules = considered_rules
        self.state = state
        self.pos_in_stream = getattr(token, 'pos_in_stream', None)
        self.state = state

        self.token = token
        self.expected = expected     # XXX deprecate? `accepts` is better
        self.considered_rules = considered_rules
        self.puppet = puppet
        self.accepts = accepts

        # TODO Only calculate `accepts()` when we need to display it to the user
        # This will improve performance when doing automatic error handling
        self.accepts = puppet and puppet.accepts()

        message = ("Unexpected token %r at line %s, column %s.\n"
                   "Expected one of: \n\t* %s\n"
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -62,10 +62,9 @@ class _Parser:
                expected = {s for s in states[state].keys() if s.isupper()}
                try:
                    puppet = ParserPuppet(self, state_stack, value_stack, start, stream, set_state)
                    accepts = puppet.accepts()
                except NameError:
                    puppet = accepts = None
                raise UnexpectedToken(token, expected, state=state, puppet=puppet, accepts=accepts)
                except NameError:   # For standalone parser
                    puppet = None
                raise UnexpectedToken(token, expected, state=state, puppet=puppet)

        def reduce(rule):
            size = len(rule.expansion)