| @@ -3,7 +3,7 @@ | |||
| from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set | |||
| from .tree import Tree | |||
| from .lexer import Token | |||
| from .parsers.lalr_puppet import ParserPuppet | |||
| class LarkError(Exception): | |||
| pass | |||
| @@ -38,16 +38,16 @@ class UnexpectedInput(LarkError): | |||
| parse_fn: Callable[[str], Tree], | |||
| examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], | |||
| token_type_match_fallback: bool = False, | |||
| print_debug_info: bool = True | |||
| use_accepts: bool = False, | |||
| ) -> T: | |||
| ... | |||
| class UnexpectedToken(ParseError, UnexpectedInput): | |||
| expected: List[str] | |||
| expected: Set[str] | |||
| considered_rules: Set[str] | |||
| puppet: Any | |||
| accepts: List[str] | |||
| puppet: ParserPuppet | |||
| accepts: Set[str] | |||
| class UnexpectedCharacters(LexError, UnexpectedInput): | |||
| allowed: Set[str] | |||
| @@ -0,0 +1,21 @@ | |||
| from typing import Set, Dict, Any | |||
| from lark import Token, Tree | |||
| class ParserPuppet(object): | |||
| """ | |||
| Represents a LalrParser that can be step through. | |||
| Shouldn't instantiated by hand, but is accessible as `UnexpectedToken.puppet` | |||
| """ | |||
| def feed_token(self, token: Token): ... | |||
| def copy(self) -> ParserPuppet: ... | |||
| def pretty(self) -> str: ... | |||
| def choices(self) -> Dict[str, Any]: ... | |||
| def accepts(self) -> Set[str]: ... | |||
| def resume_parse(self) -> Tree: ... | |||
| @@ -1,3 +1,5 @@ | |||
| import logging | |||
| from .utils import STRING_TYPE | |||
| ###{standalone | |||
| @@ -37,7 +39,7 @@ class UnexpectedInput(LarkError): | |||
| after = text[pos:end].split(b'\n', 1)[0] | |||
| return (before + after + b'\n' + b' ' * len(before) + b'^\n').decode("ascii", "backslashreplace") | |||
| def match_examples(self, parse_fn, examples, token_type_match_fallback=False, print_debug_info=True): | |||
| def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False): | |||
| """ Given a parser instance and a dictionary mapping some label with | |||
| some malformed syntax examples, it'll return the label for the | |||
| example that bests matches the current error. | |||
| @@ -55,27 +57,26 @@ class UnexpectedInput(LarkError): | |||
| try: | |||
| parse_fn(malformed) | |||
| except UnexpectedInput as ut: | |||
| if ut.state == self.state and ut.accepts == self.accepts: | |||
| if ut.state == self.state and (not use_accepts or ut.accepts == self.accepts): | |||
| try: | |||
| if ut.token == self.token: # Try exact match first | |||
| if print_debug_info: | |||
| print("Exact Match at %d, with example %d" % (i, j), (ut.token, self.token, ut.state, self.state)) | |||
| logging.debug("Exact Match at example [%s][%s]" % (i, j)) | |||
| return label | |||
| if token_type_match_fallback: | |||
| # Fallback to token types match | |||
| if (ut.token.type == self.token.type) and not candidate[-1]: | |||
| if print_debug_info: | |||
| print("Token Type Fallback at %d, with example %d" % (i, j)) | |||
| logging.debug("Token Type Fallback at example [%s][%s]" % (i, j)) | |||
| candidate = label, True | |||
| except AttributeError: | |||
| pass | |||
| if not candidate[0]: | |||
| if print_debug_info: | |||
| print("Defaulted at %d, with example %d" % (i, j)) | |||
| logging.debug("Same State match at example [%s][%s]" % (i, j)) | |||
| candidate = label, False | |||
| elif ut.state == self.state: | |||
| logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % | |||
| (self.state, self.accepts, ut.accepts, i, j)) | |||
| return candidate[0] | |||
| @@ -59,18 +59,10 @@ class _Parser: | |||
| try: | |||
| return states[state][token.type] | |||
| except KeyError: | |||
| expected = [s for s in states[state].keys() if s.isupper()] | |||
| expected = {s for s in states[state].keys() if s.isupper()} | |||
| try: | |||
| puppet = ParserPuppet(self, state_stack, value_stack, start, stream, set_state) | |||
| accepts = [] | |||
| for t in expected: | |||
| new_puppet = puppet.copy() | |||
| try: | |||
| new_puppet.feed_token(Token(t, '')) | |||
| except KeyError: | |||
| pass | |||
| else: | |||
| accepts.append(t) | |||
| accepts = puppet.accepts() | |||
| except NameError: | |||
| puppet = accepts = None | |||
| raise UnexpectedToken(token, expected, state=state, puppet=puppet, accepts=accepts) | |||
| @@ -3,6 +3,8 @@ | |||
| from copy import deepcopy | |||
| from .lalr_analysis import Shift, Reduce | |||
| from .. import Token | |||
| class ParserPuppet(object): | |||
| def __init__(self, parser, state_stack, value_stack, start, stream, set_state): | |||
| @@ -67,13 +69,26 @@ class ParserPuppet(object): | |||
| ) | |||
| def pretty(self): | |||
| print("Puppet choices:") | |||
| out = ["Puppet choices:"] | |||
| for k, v in self.choices().items(): | |||
| print('\t-', k, '->', v) | |||
| print('stack size:', len(self._state_stack)) | |||
| out.append('\t- %s -> %s' % (k, v)) | |||
| out.append('stack size: %s' % len(self._state_stack)) | |||
| return '\n'.join(out) | |||
| def choices(self): | |||
| return self.parser.parse_table.states[self._state_stack[-1]] | |||
| def accepts(self): | |||
| accepts = set() | |||
| for t in self.choices(): | |||
| new_puppet = self.copy() | |||
| try: | |||
| new_puppet.feed_token(Token(t, '')) | |||
| except KeyError: | |||
| pass | |||
| else: | |||
| accepts.add(t) | |||
| return accepts | |||
| def resume_parse(self): | |||
| return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack) | |||