Refactored ParserPuppet, added stubs

4 years ago · cb2d9cded0
--- a/lark-stubs/exceptions.pyi
+++ b/lark-stubs/exceptions.pyi
@@ -3,7 +3,7 @@
 from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set
 from .tree import Tree
 from .lexer import Token

 from .parsers.lalr_puppet import ParserPuppet

 class LarkError(Exception):
    pass
@@ -38,16 +38,16 @@ class UnexpectedInput(LarkError):
            parse_fn: Callable[[str], Tree],
            examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
            token_type_match_fallback: bool = False,
            print_debug_info: bool = True
            use_accepts: bool = False,
    ) -> T:
        ...


 class UnexpectedToken(ParseError, UnexpectedInput):
    expected: List[str]
    expected: Set[str]
    considered_rules: Set[str]
    puppet: Any
    accepts: List[str]
    puppet: ParserPuppet
    accepts: Set[str]

 class UnexpectedCharacters(LexError, UnexpectedInput):
    allowed: Set[str]
--- a/lark-stubs/parsers/init.pyi
+++ b/lark-stubs/parsers/init.pyi
--- a/lark-stubs/parsers/lalr_puppet.pyi
+++ b/lark-stubs/parsers/lalr_puppet.pyi
@@ -0,0 +1,21 @@
 from typing import Set, Dict, Any

 from lark import Token, Tree


 class ParserPuppet(object):
    """
    Represents a LalrParser that can be step through.
    Shouldn't instantiated by hand, but is accessible as `UnexpectedToken.puppet`
    """
    def feed_token(self, token: Token): ...

    def copy(self) -> ParserPuppet: ...

    def pretty(self) -> str: ...

    def choices(self) -> Dict[str, Any]: ...

    def accepts(self) -> Set[str]: ...

    def resume_parse(self) -> Tree: ...
--- a/lark/exceptions.py
+++ b/lark/exceptions.py
@@ -1,3 +1,5 @@
 import logging

 from .utils import STRING_TYPE

 ###{standalone
@@ -37,7 +39,7 @@ class UnexpectedInput(LarkError):
            after = text[pos:end].split(b'\n', 1)[0]
            return (before + after + b'\n' + b' ' * len(before) + b'^\n').decode("ascii", "backslashreplace")

    def match_examples(self, parse_fn, examples, token_type_match_fallback=False, print_debug_info=True):
    def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False):
        """ Given a parser instance and a dictionary mapping some label with
            some malformed syntax examples, it'll return the label for the
            example that bests matches the current error.
@@ -55,27 +57,26 @@ class UnexpectedInput(LarkError):
                try:
                    parse_fn(malformed)
                except UnexpectedInput as ut:
                    if ut.state == self.state and ut.accepts == self.accepts:
                    if ut.state == self.state and (not use_accepts or ut.accepts == self.accepts):
                        try:
                            if ut.token == self.token:  # Try exact match first
                                if print_debug_info:
                                    print("Exact Match at %d, with example %d" % (i, j), (ut.token, self.token, ut.state, self.state))
                                logging.debug("Exact Match at example [%s][%s]" % (i, j))
                                return label

                            if token_type_match_fallback:
                                # Fallback to token types match
                                if (ut.token.type == self.token.type) and not candidate[-1]:
                                    if print_debug_info:
                                        print("Token Type Fallback at %d, with example %d" % (i, j))
                                    logging.debug("Token Type Fallback at example [%s][%s]" % (i, j))
                                    candidate = label, True

                        except AttributeError:
                            pass
                        if not candidate[0]:
                            if print_debug_info:
                                print("Defaulted at %d, with example %d" % (i, j))
                            logging.debug("Same State match at example [%s][%s]" % (i, j))
                            candidate = label, False

                    elif ut.state == self.state:
                        logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
                                      (self.state, self.accepts, ut.accepts, i, j))
        return candidate[0]


--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -59,18 +59,10 @@ class _Parser:
            try:
                return states[state][token.type]
            except KeyError:
                expected = [s for s in states[state].keys() if s.isupper()]
                expected = {s for s in states[state].keys() if s.isupper()}
                try:
                    puppet = ParserPuppet(self, state_stack, value_stack, start, stream, set_state)
                    accepts = []
                    for t in expected:
                        new_puppet = puppet.copy()
                        try:
                            new_puppet.feed_token(Token(t, ''))
                        except KeyError:
                            pass
                        else:
                            accepts.append(t)
                    accepts = puppet.accepts()
                except NameError:
                    puppet = accepts = None
                raise UnexpectedToken(token, expected, state=state, puppet=puppet, accepts=accepts)
--- a/lark/parsers/lalr_puppet.py
+++ b/lark/parsers/lalr_puppet.py
@@ -3,6 +3,8 @@
 from copy import deepcopy

 from .lalr_analysis import Shift, Reduce
 from .. import Token


 class ParserPuppet(object):
    def __init__(self, parser, state_stack, value_stack, start, stream, set_state):
@@ -67,13 +69,26 @@ class ParserPuppet(object):
        )

    def pretty(self):
        print("Puppet choices:")
        out = ["Puppet choices:"]
        for k, v in self.choices().items():
            print('\t-', k, '->', v)
        print('stack size:', len(self._state_stack))
            out.append('\t- %s -> %s' % (k, v))
        out.append('stack size: %s' % len(self._state_stack))
        return '\n'.join(out)

    def choices(self):
        return self.parser.parse_table.states[self._state_stack[-1]]

    def accepts(self):
        accepts = set()
        for t in self.choices():
            new_puppet = self.copy()
            try:
                new_puppet.feed_token(Token(t, ''))
            except KeyError:
                pass
            else:
                accepts.add(t)
        return accepts

    def resume_parse(self):
        return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack)