From cb2d9cded072e0f150b0d6d349fd431369b83a93 Mon Sep 17 00:00:00 2001 From: MegaIng1 Date: Thu, 13 Aug 2020 03:51:01 +0200 Subject: [PATCH] Refactored ParserPuppet, added stubs --- lark-stubs/exceptions.pyi | 10 +++++----- lark-stubs/parsers/__init__.pyi | 0 lark-stubs/parsers/lalr_puppet.pyi | 21 +++++++++++++++++++++ lark/exceptions.py | 19 ++++++++++--------- lark/parsers/lalr_parser.py | 12 ++---------- lark/parsers/lalr_puppet.py | 21 ++++++++++++++++++--- 6 files changed, 56 insertions(+), 27 deletions(-) create mode 100644 lark-stubs/parsers/__init__.pyi create mode 100644 lark-stubs/parsers/lalr_puppet.pyi diff --git a/lark-stubs/exceptions.pyi b/lark-stubs/exceptions.pyi index 67c39fb..268844c 100644 --- a/lark-stubs/exceptions.pyi +++ b/lark-stubs/exceptions.pyi @@ -3,7 +3,7 @@ from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set from .tree import Tree from .lexer import Token - +from .parsers.lalr_puppet import ParserPuppet class LarkError(Exception): pass @@ -38,16 +38,16 @@ class UnexpectedInput(LarkError): parse_fn: Callable[[str], Tree], examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], token_type_match_fallback: bool = False, - print_debug_info: bool = True + use_accepts: bool = False, ) -> T: ... class UnexpectedToken(ParseError, UnexpectedInput): - expected: List[str] + expected: Set[str] considered_rules: Set[str] - puppet: Any - accepts: List[str] + puppet: ParserPuppet + accepts: Set[str] class UnexpectedCharacters(LexError, UnexpectedInput): allowed: Set[str] diff --git a/lark-stubs/parsers/__init__.pyi b/lark-stubs/parsers/__init__.pyi new file mode 100644 index 0000000..e69de29 diff --git a/lark-stubs/parsers/lalr_puppet.pyi b/lark-stubs/parsers/lalr_puppet.pyi new file mode 100644 index 0000000..c138c32 --- /dev/null +++ b/lark-stubs/parsers/lalr_puppet.pyi @@ -0,0 +1,21 @@ +from typing import Set, Dict, Any + +from lark import Token, Tree + + +class ParserPuppet(object): + """ + Represents a LalrParser that can be step through. + Shouldn't instantiated by hand, but is accessible as `UnexpectedToken.puppet` + """ + def feed_token(self, token: Token): ... + + def copy(self) -> ParserPuppet: ... + + def pretty(self) -> str: ... + + def choices(self) -> Dict[str, Any]: ... + + def accepts(self) -> Set[str]: ... + + def resume_parse(self) -> Tree: ... diff --git a/lark/exceptions.py b/lark/exceptions.py index 92ef64e..03f3da4 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -1,3 +1,5 @@ +import logging + from .utils import STRING_TYPE ###{standalone @@ -37,7 +39,7 @@ class UnexpectedInput(LarkError): after = text[pos:end].split(b'\n', 1)[0] return (before + after + b'\n' + b' ' * len(before) + b'^\n').decode("ascii", "backslashreplace") - def match_examples(self, parse_fn, examples, token_type_match_fallback=False, print_debug_info=True): + def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False): """ Given a parser instance and a dictionary mapping some label with some malformed syntax examples, it'll return the label for the example that bests matches the current error. @@ -55,27 +57,26 @@ class UnexpectedInput(LarkError): try: parse_fn(malformed) except UnexpectedInput as ut: - if ut.state == self.state and ut.accepts == self.accepts: + if ut.state == self.state and (not use_accepts or ut.accepts == self.accepts): try: if ut.token == self.token: # Try exact match first - if print_debug_info: - print("Exact Match at %d, with example %d" % (i, j), (ut.token, self.token, ut.state, self.state)) + logging.debug("Exact Match at example [%s][%s]" % (i, j)) return label if token_type_match_fallback: # Fallback to token types match if (ut.token.type == self.token.type) and not candidate[-1]: - if print_debug_info: - print("Token Type Fallback at %d, with example %d" % (i, j)) + logging.debug("Token Type Fallback at example [%s][%s]" % (i, j)) candidate = label, True except AttributeError: pass if not candidate[0]: - if print_debug_info: - print("Defaulted at %d, with example %d" % (i, j)) + logging.debug("Same State match at example [%s][%s]" % (i, j)) candidate = label, False - + elif ut.state == self.state: + logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" % + (self.state, self.accepts, ut.accepts, i, j)) return candidate[0] diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index f61e093..ba75606 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -59,18 +59,10 @@ class _Parser: try: return states[state][token.type] except KeyError: - expected = [s for s in states[state].keys() if s.isupper()] + expected = {s for s in states[state].keys() if s.isupper()} try: puppet = ParserPuppet(self, state_stack, value_stack, start, stream, set_state) - accepts = [] - for t in expected: - new_puppet = puppet.copy() - try: - new_puppet.feed_token(Token(t, '')) - except KeyError: - pass - else: - accepts.append(t) + accepts = puppet.accepts() except NameError: puppet = accepts = None raise UnexpectedToken(token, expected, state=state, puppet=puppet, accepts=accepts) diff --git a/lark/parsers/lalr_puppet.py b/lark/parsers/lalr_puppet.py index 2b350bf..24c77a1 100644 --- a/lark/parsers/lalr_puppet.py +++ b/lark/parsers/lalr_puppet.py @@ -3,6 +3,8 @@ from copy import deepcopy from .lalr_analysis import Shift, Reduce +from .. import Token + class ParserPuppet(object): def __init__(self, parser, state_stack, value_stack, start, stream, set_state): @@ -67,13 +69,26 @@ class ParserPuppet(object): ) def pretty(self): - print("Puppet choices:") + out = ["Puppet choices:"] for k, v in self.choices().items(): - print('\t-', k, '->', v) - print('stack size:', len(self._state_stack)) + out.append('\t- %s -> %s' % (k, v)) + out.append('stack size: %s' % len(self._state_stack)) + return '\n'.join(out) def choices(self): return self.parser.parse_table.states[self._state_stack[-1]] + def accepts(self): + accepts = set() + for t in self.choices(): + new_puppet = self.copy() + try: + new_puppet.feed_token(Token(t, '')) + except KeyError: + pass + else: + accepts.add(t) + return accepts + def resume_parse(self): return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack)