@@ -3,7 +3,7 @@ | |||||
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set | from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set | ||||
from .tree import Tree | from .tree import Tree | ||||
from .lexer import Token | from .lexer import Token | ||||
from .parsers.lalr_puppet import ParserPuppet | |||||
class LarkError(Exception): | class LarkError(Exception): | ||||
pass | pass | ||||
@@ -38,16 +38,16 @@ class UnexpectedInput(LarkError): | |||||
parse_fn: Callable[[str], Tree], | parse_fn: Callable[[str], Tree], | ||||
examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], | examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], | ||||
token_type_match_fallback: bool = False, | token_type_match_fallback: bool = False, | ||||
print_debug_info: bool = True | |||||
use_accepts: bool = False, | |||||
) -> T: | ) -> T: | ||||
... | ... | ||||
class UnexpectedToken(ParseError, UnexpectedInput): | class UnexpectedToken(ParseError, UnexpectedInput): | ||||
expected: List[str] | |||||
expected: Set[str] | |||||
considered_rules: Set[str] | considered_rules: Set[str] | ||||
puppet: Any | |||||
accepts: List[str] | |||||
puppet: ParserPuppet | |||||
accepts: Set[str] | |||||
class UnexpectedCharacters(LexError, UnexpectedInput): | class UnexpectedCharacters(LexError, UnexpectedInput): | ||||
allowed: Set[str] | allowed: Set[str] | ||||
@@ -0,0 +1,21 @@ | |||||
from typing import Set, Dict, Any | |||||
from lark import Token, Tree | |||||
class ParserPuppet(object): | |||||
""" | |||||
Represents a LalrParser that can be step through. | |||||
Shouldn't instantiated by hand, but is accessible as `UnexpectedToken.puppet` | |||||
""" | |||||
def feed_token(self, token: Token): ... | |||||
def copy(self) -> ParserPuppet: ... | |||||
def pretty(self) -> str: ... | |||||
def choices(self) -> Dict[str, Any]: ... | |||||
def accepts(self) -> Set[str]: ... | |||||
def resume_parse(self) -> Tree: ... |
@@ -1,3 +1,5 @@ | |||||
import logging | |||||
from .utils import STRING_TYPE | from .utils import STRING_TYPE | ||||
###{standalone | ###{standalone | ||||
@@ -37,7 +39,7 @@ class UnexpectedInput(LarkError): | |||||
after = text[pos:end].split(b'\n', 1)[0] | after = text[pos:end].split(b'\n', 1)[0] | ||||
return (before + after + b'\n' + b' ' * len(before) + b'^\n').decode("ascii", "backslashreplace") | return (before + after + b'\n' + b' ' * len(before) + b'^\n').decode("ascii", "backslashreplace") | ||||
def match_examples(self, parse_fn, examples, token_type_match_fallback=False, print_debug_info=True): | |||||
def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False): | |||||
""" Given a parser instance and a dictionary mapping some label with | """ Given a parser instance and a dictionary mapping some label with | ||||
some malformed syntax examples, it'll return the label for the | some malformed syntax examples, it'll return the label for the | ||||
example that bests matches the current error. | example that bests matches the current error. | ||||
@@ -55,27 +57,26 @@ class UnexpectedInput(LarkError): | |||||
try: | try: | ||||
parse_fn(malformed) | parse_fn(malformed) | ||||
except UnexpectedInput as ut: | except UnexpectedInput as ut: | ||||
if ut.state == self.state and ut.accepts == self.accepts: | |||||
if ut.state == self.state and (not use_accepts or ut.accepts == self.accepts): | |||||
try: | try: | ||||
if ut.token == self.token: # Try exact match first | if ut.token == self.token: # Try exact match first | ||||
if print_debug_info: | |||||
print("Exact Match at %d, with example %d" % (i, j), (ut.token, self.token, ut.state, self.state)) | |||||
logging.debug("Exact Match at example [%s][%s]" % (i, j)) | |||||
return label | return label | ||||
if token_type_match_fallback: | if token_type_match_fallback: | ||||
# Fallback to token types match | # Fallback to token types match | ||||
if (ut.token.type == self.token.type) and not candidate[-1]: | if (ut.token.type == self.token.type) and not candidate[-1]: | ||||
if print_debug_info: | |||||
print("Token Type Fallback at %d, with example %d" % (i, j)) | |||||
logging.debug("Token Type Fallback at example [%s][%s]" % (i, j)) | |||||
candidate = label, True | candidate = label, True | ||||
except AttributeError: | except AttributeError: | ||||
pass | pass | ||||
if not candidate[0]: | if not candidate[0]: | ||||
if print_debug_info: | |||||
print("Defaulted at %d, with example %d" % (i, j)) | |||||
logging.debug("Same State match at example [%s][%s]" % (i, j)) | |||||
candidate = label, False | candidate = label, False | ||||
elif ut.state == self.state: | |||||
logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % | |||||
(self.state, self.accepts, ut.accepts, i, j)) | |||||
return candidate[0] | return candidate[0] | ||||
@@ -59,18 +59,10 @@ class _Parser: | |||||
try: | try: | ||||
return states[state][token.type] | return states[state][token.type] | ||||
except KeyError: | except KeyError: | ||||
expected = [s for s in states[state].keys() if s.isupper()] | |||||
expected = {s for s in states[state].keys() if s.isupper()} | |||||
try: | try: | ||||
puppet = ParserPuppet(self, state_stack, value_stack, start, stream, set_state) | puppet = ParserPuppet(self, state_stack, value_stack, start, stream, set_state) | ||||
accepts = [] | |||||
for t in expected: | |||||
new_puppet = puppet.copy() | |||||
try: | |||||
new_puppet.feed_token(Token(t, '')) | |||||
except KeyError: | |||||
pass | |||||
else: | |||||
accepts.append(t) | |||||
accepts = puppet.accepts() | |||||
except NameError: | except NameError: | ||||
puppet = accepts = None | puppet = accepts = None | ||||
raise UnexpectedToken(token, expected, state=state, puppet=puppet, accepts=accepts) | raise UnexpectedToken(token, expected, state=state, puppet=puppet, accepts=accepts) | ||||
@@ -3,6 +3,8 @@ | |||||
from copy import deepcopy | from copy import deepcopy | ||||
from .lalr_analysis import Shift, Reduce | from .lalr_analysis import Shift, Reduce | ||||
from .. import Token | |||||
class ParserPuppet(object): | class ParserPuppet(object): | ||||
def __init__(self, parser, state_stack, value_stack, start, stream, set_state): | def __init__(self, parser, state_stack, value_stack, start, stream, set_state): | ||||
@@ -67,13 +69,26 @@ class ParserPuppet(object): | |||||
) | ) | ||||
def pretty(self): | def pretty(self): | ||||
print("Puppet choices:") | |||||
out = ["Puppet choices:"] | |||||
for k, v in self.choices().items(): | for k, v in self.choices().items(): | ||||
print('\t-', k, '->', v) | |||||
print('stack size:', len(self._state_stack)) | |||||
out.append('\t- %s -> %s' % (k, v)) | |||||
out.append('stack size: %s' % len(self._state_stack)) | |||||
return '\n'.join(out) | |||||
def choices(self): | def choices(self): | ||||
return self.parser.parse_table.states[self._state_stack[-1]] | return self.parser.parse_table.states[self._state_stack[-1]] | ||||
def accepts(self): | |||||
accepts = set() | |||||
for t in self.choices(): | |||||
new_puppet = self.copy() | |||||
try: | |||||
new_puppet.feed_token(Token(t, '')) | |||||
except KeyError: | |||||
pass | |||||
else: | |||||
accepts.add(t) | |||||
return accepts | |||||
def resume_parse(self): | def resume_parse(self): | ||||
return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack) | return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack) |