@@ -3,7 +3,7 @@ | |||
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set | |||
from .tree import Tree | |||
from .lexer import Token | |||
from .parsers.lalr_puppet import ParserPuppet | |||
class LarkError(Exception): | |||
pass | |||
@@ -38,16 +38,16 @@ class UnexpectedInput(LarkError): | |||
parse_fn: Callable[[str], Tree], | |||
examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], | |||
token_type_match_fallback: bool = False, | |||
print_debug_info: bool = True | |||
use_accepts: bool = False, | |||
) -> T: | |||
... | |||
class UnexpectedToken(ParseError, UnexpectedInput): | |||
expected: List[str] | |||
expected: Set[str] | |||
considered_rules: Set[str] | |||
puppet: Any | |||
accepts: List[str] | |||
puppet: ParserPuppet | |||
accepts: Set[str] | |||
class UnexpectedCharacters(LexError, UnexpectedInput): | |||
allowed: Set[str] | |||
@@ -0,0 +1,21 @@ | |||
from typing import Set, Dict, Any | |||
from lark import Token, Tree | |||
class ParserPuppet(object): | |||
""" | |||
Represents a LalrParser that can be step through. | |||
Shouldn't instantiated by hand, but is accessible as `UnexpectedToken.puppet` | |||
""" | |||
def feed_token(self, token: Token): ... | |||
def copy(self) -> ParserPuppet: ... | |||
def pretty(self) -> str: ... | |||
def choices(self) -> Dict[str, Any]: ... | |||
def accepts(self) -> Set[str]: ... | |||
def resume_parse(self) -> Tree: ... |
@@ -1,3 +1,5 @@ | |||
import logging | |||
from .utils import STRING_TYPE | |||
###{standalone | |||
@@ -37,7 +39,7 @@ class UnexpectedInput(LarkError): | |||
after = text[pos:end].split(b'\n', 1)[0] | |||
return (before + after + b'\n' + b' ' * len(before) + b'^\n').decode("ascii", "backslashreplace") | |||
def match_examples(self, parse_fn, examples, token_type_match_fallback=False, print_debug_info=True): | |||
def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False): | |||
""" Given a parser instance and a dictionary mapping some label with | |||
some malformed syntax examples, it'll return the label for the | |||
example that bests matches the current error. | |||
@@ -55,27 +57,26 @@ class UnexpectedInput(LarkError): | |||
try: | |||
parse_fn(malformed) | |||
except UnexpectedInput as ut: | |||
if ut.state == self.state and ut.accepts == self.accepts: | |||
if ut.state == self.state and (not use_accepts or ut.accepts == self.accepts): | |||
try: | |||
if ut.token == self.token: # Try exact match first | |||
if print_debug_info: | |||
print("Exact Match at %d, with example %d" % (i, j), (ut.token, self.token, ut.state, self.state)) | |||
logging.debug("Exact Match at example [%s][%s]" % (i, j)) | |||
return label | |||
if token_type_match_fallback: | |||
# Fallback to token types match | |||
if (ut.token.type == self.token.type) and not candidate[-1]: | |||
if print_debug_info: | |||
print("Token Type Fallback at %d, with example %d" % (i, j)) | |||
logging.debug("Token Type Fallback at example [%s][%s]" % (i, j)) | |||
candidate = label, True | |||
except AttributeError: | |||
pass | |||
if not candidate[0]: | |||
if print_debug_info: | |||
print("Defaulted at %d, with example %d" % (i, j)) | |||
logging.debug("Same State match at example [%s][%s]" % (i, j)) | |||
candidate = label, False | |||
elif ut.state == self.state: | |||
logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % | |||
(self.state, self.accepts, ut.accepts, i, j)) | |||
return candidate[0] | |||
@@ -59,18 +59,10 @@ class _Parser: | |||
try: | |||
return states[state][token.type] | |||
except KeyError: | |||
expected = [s for s in states[state].keys() if s.isupper()] | |||
expected = {s for s in states[state].keys() if s.isupper()} | |||
try: | |||
puppet = ParserPuppet(self, state_stack, value_stack, start, stream, set_state) | |||
accepts = [] | |||
for t in expected: | |||
new_puppet = puppet.copy() | |||
try: | |||
new_puppet.feed_token(Token(t, '')) | |||
except KeyError: | |||
pass | |||
else: | |||
accepts.append(t) | |||
accepts = puppet.accepts() | |||
except NameError: | |||
puppet = accepts = None | |||
raise UnexpectedToken(token, expected, state=state, puppet=puppet, accepts=accepts) | |||
@@ -3,6 +3,8 @@ | |||
from copy import deepcopy | |||
from .lalr_analysis import Shift, Reduce | |||
from .. import Token | |||
class ParserPuppet(object): | |||
def __init__(self, parser, state_stack, value_stack, start, stream, set_state): | |||
@@ -67,13 +69,26 @@ class ParserPuppet(object): | |||
) | |||
def pretty(self): | |||
print("Puppet choices:") | |||
out = ["Puppet choices:"] | |||
for k, v in self.choices().items(): | |||
print('\t-', k, '->', v) | |||
print('stack size:', len(self._state_stack)) | |||
out.append('\t- %s -> %s' % (k, v)) | |||
out.append('stack size: %s' % len(self._state_stack)) | |||
return '\n'.join(out) | |||
def choices(self): | |||
return self.parser.parse_table.states[self._state_stack[-1]] | |||
def accepts(self): | |||
accepts = set() | |||
for t in self.choices(): | |||
new_puppet = self.copy() | |||
try: | |||
new_puppet.feed_token(Token(t, '')) | |||
except KeyError: | |||
pass | |||
else: | |||
accepts.add(t) | |||
return accepts | |||
def resume_parse(self): | |||
return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack) |