From 2a1b03bc653aeaa5a42a859d5e1b84df332182fa Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Thu, 15 Apr 2021 12:57:12 -0500 Subject: [PATCH] Rename ParserPuppet -> InteractiveParser --- docs/classes.rst | 11 +++---- lark-stubs/lark.pyi | 2 +- lark/exceptions.py | 24 ++++++++++----- lark/lark.py | 4 +-- lark/load_grammar.py | 12 ++++---- lark/parser_frontends.py | 6 ++-- lark/parsers/lalr_parser.py | 28 ++++++++--------- lark/parsers/lalr_puppet.py | 60 +++++++++++++++++++++++-------------- tests/test_parser.py | 36 +++++++++++----------- 9 files changed, 103 insertions(+), 80 deletions(-) diff --git a/docs/classes.rst b/docs/classes.rst index a70fb59..8f64083 100644 --- a/docs/classes.rst +++ b/docs/classes.rst @@ -66,10 +66,11 @@ UnexpectedInput .. autoclass:: lark.exceptions.UnexpectedCharacters -.. _parserpuppet: +InteractiveParser +----------------- -ParserPuppet ------------- +.. autoclass:: lark.parsers.lalr_puppet.InteractiveParser + :members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts -.. autoclass:: lark.parsers.lalr_puppet.ParserPuppet - :members: choices, feed_token, copy, pretty, resume_parse +.. autoclass:: lark.parsers.lalr_puppet.ImmutableInteractiveParser + :members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts \ No newline at end of file diff --git a/lark-stubs/lark.pyi b/lark-stubs/lark.pyi index 6fd3139..10a5572 100644 --- a/lark-stubs/lark.pyi +++ b/lark-stubs/lark.pyi @@ -91,7 +91,7 @@ class Lark: def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: ... - def get_puppet(self, text: str = None, start: Optional[str] = None) -> ParserPuppet: + def parse_interactive(self, text: str = None, start: Optional[str] = None) -> ParserPuppet: ... @classmethod diff --git a/lark/exceptions.py b/lark/exceptions.py index d9204b9..757600d 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -1,3 +1,5 @@ +from warnings import warn + from .utils import STRING_TYPE, logger, NO_VALUE @@ -177,14 +179,16 @@ class UnexpectedCharacters(LexError, UnexpectedInput): class UnexpectedToken(ParseError, UnexpectedInput): - """When the parser throws UnexpectedToken, it instantiates a puppet - with its internal state. Users can then interactively set the puppet to - the desired puppet state, and resume regular parsing. + """An exception that is raised by the parser, when the token it received + doesn't match any valid step forward. + + The parser provides an interactive instance through `interactive_parser`, + which is initialized to the point of failture, and can be used for debugging and error handling. - see: :ref:`ParserPuppet`. + see: :ref:`InteractiveParser`. """ - def __init__(self, token, expected, considered_rules=None, state=None, puppet=None, terminals_by_name=None, token_history=None): + def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None): # TODO considered_rules and expected can be figured out using state self.line = getattr(token, 'line', '?') self.column = getattr(token, 'column', '?') @@ -195,7 +199,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): self.expected = expected # XXX deprecate? `accepts` is better self._accepts = NO_VALUE self.considered_rules = considered_rules - self.puppet = puppet + self.interactive_parser = interactive_parser self._terminals_by_name = terminals_by_name self.token_history = token_history @@ -204,7 +208,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): @property def accepts(self): if self._accepts is NO_VALUE: - self._accepts = self.puppet and self.puppet.accepts() + self._accepts = self.interactive_parser and self.interactive_parser.accepts() return self._accepts def __str__(self): @@ -215,6 +219,12 @@ class UnexpectedToken(ParseError, UnexpectedInput): return message + @property + def puppet(self): + warn("UnexpectedToken.puppet attribute has been renamed to interactive_parser", DeprecationWarning) + return self.interactive_parser + + class VisitError(LarkError): """VisitError is raised when visitors are interrupted by an exception diff --git a/lark/lark.py b/lark/lark.py index 5ea6b5c..82c2d36 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -532,8 +532,8 @@ class Lark(Serialize): "Get information about a terminal" return self._terminals_dict[name] - def get_puppet(self, text=None, start=None): - return self.parser.get_puppet(text, start=start) + def parse_interactive(self, text=None, start=None): + return self.parser.parse_interactive(text, start=start) def parse(self, text, start=None, on_error=None): """Parse the given text, according to the options provided. diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 63369a9..4e8d298 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -866,7 +866,7 @@ def _error_repr(error): else: return str(error) -def _search_puppet(puppet, predicate): +def _search_interactive_parser(interactive_parser, predicate): def expand(node): path, p = node for choice in p.choices(): @@ -878,7 +878,7 @@ def _search_puppet(puppet, predicate): else: yield path + (choice,), new_p - for path, p in bfs_all_unique([((), puppet)], expand): + for path, p in bfs_all_unique([((), interactive_parser)], expand): if predicate(p): return path, p @@ -888,10 +888,10 @@ def find_grammar_errors(text, start='start'): errors.append((e, _error_repr(e))) # recover to a new line - token_path, _ = _search_puppet(e.puppet.as_immutable(), lambda p: '_NL' in p.choices()) + token_path, _ = _search_interactive_parser(e.interactive_parser.as_immutable(), lambda p: '_NL' in p.choices()) for token_type in token_path: - e.puppet.feed_token(Token(token_type, '')) - e.puppet.feed_token(Token('_NL', '\n')) + e.interactive_parser.feed_token(Token(token_type, '')) + e.interactive_parser.feed_token(Token('_NL', '\n')) return True _tree = _get_parser().parse(text + '\n', start, on_error=on_error) @@ -900,7 +900,7 @@ def find_grammar_errors(text, start='start'): errors = [el[0] for el in errors_by_line.values()] # already sorted for e in errors: - e[0].puppet = None + e[0].interactive_parser = None return errors diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index d334947..e066d9a 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -106,12 +106,12 @@ class ParsingFrontend(Serialize): kw = {} if on_error is None else {'on_error': on_error} return self.parser.parse(stream, start, **kw) - def get_puppet(self, text=None, start=None): + def parse_interactive(self, text=None, start=None): start = self._verify_start(start) if self.parser_conf.parser_type != 'lalr': - raise ConfigurationError("Can only create a Puppet for parser='lalr' at the moment.") + raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ") stream = text if self.skip_lexer else LexerThread(self.lexer, text) - return self.parser.get_puppet(stream, start) + return self.parser.parse_interactive(stream, start) def get_frontend(parser, lexer): diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index a7a4074..43eb861 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -8,7 +8,7 @@ from ..lexer import Token from ..utils import Serialize from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable -from .lalr_puppet import ParserPuppet +from .lalr_puppet import InteractiveParser from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken ###{standalone @@ -33,9 +33,8 @@ class LALR_Parser(Serialize): def serialize(self, memo): return self._parse_table.serialize(memo) - def get_puppet(self, lexer, start): - return self.parser.get_puppet(lexer, start) - + def parse_interactive(self, lexer, start): + return self.parser.parse(lexer, start, start_interactive=True) def parse(self, lexer, start, on_error=None): try: @@ -46,7 +45,7 @@ class LALR_Parser(Serialize): while True: if isinstance(e, UnexpectedCharacters): - s = e.puppet.lexer_state.state + s = e.interactive_parser.lexer_state.state p = s.line_ctr.char_pos if not on_error(e): @@ -58,9 +57,11 @@ class LALR_Parser(Serialize): s.line_ctr.feed(s.text[p:p+1]) try: - return e.puppet.resume_parse() + return e.interactive_parser.resume_parse() except UnexpectedToken as e2: - if isinstance(e, UnexpectedToken) and e.token.type == e2.token.type == '$END' and e.puppet == e2.puppet: + if (isinstance(e, UnexpectedToken) + and e.token.type == e2.token.type == '$END' + and e.interactive_parser == e2.interactive_parser): # Prevent infinite loop raise e2 e = e2 @@ -125,7 +126,7 @@ class ParserState(object): action, arg = states[state][token.type] except KeyError: expected = {s for s in states[state].keys() if s.isupper()} - raise UnexpectedToken(token, expected, state=self, puppet=None) + raise UnexpectedToken(token, expected, state=self, interactive_parser=None) assert arg != end_state @@ -162,14 +163,11 @@ class _Parser(object): self.callbacks = callbacks self.debug = debug - def get_puppet(self, lexer, start, value_stack=None, state_stack=None): - parse_conf = ParseConf(self.parse_table, self.callbacks, start) - parser_state = ParserState(parse_conf, lexer, state_stack, value_stack) - return ParserPuppet(self, parser_state, parser_state.lexer) - - def parse(self, lexer, start, value_stack=None, state_stack=None): + def parse(self, lexer, start, value_stack=None, state_stack=None, start_interactive=False): parse_conf = ParseConf(self.parse_table, self.callbacks, start) parser_state = ParserState(parse_conf, lexer, state_stack, value_stack) + if start_interactive: + return InteractiveParser(self, parser_state, parser_state.lexer) return self.parse_from_state(parser_state) @@ -184,7 +182,7 @@ class _Parser(object): return state.feed_token(token, True) except UnexpectedInput as e: try: - e.puppet = ParserPuppet(self, state, state.lexer) + e.interactive_parser = InteractiveParser(self, state, state.lexer) except NameError: pass raise e diff --git a/lark/parsers/lalr_puppet.py b/lark/parsers/lalr_puppet.py index 8e5a315..ce596b5 100644 --- a/lark/parsers/lalr_puppet.py +++ b/lark/parsers/lalr_puppet.py @@ -1,16 +1,15 @@ -# This module provide a LALR puppet, which is used to debugging and error handling +# This module provides a LALR interactive parser, which is used for debugging and error handling from copy import copy -from .lalr_analysis import Shift, Reduce from .. import Token from ..exceptions import UnexpectedToken -class ParserPuppet(object): - """ParserPuppet gives you advanced control over error handling when parsing with LALR. +class InteractiveParser(object): + """InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR. - For a simpler, more streamlined interface, see the ``on_error`` argument to ``Lark.parse()``. + For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``. """ def __init__(self, parser, parser_state, lexer_state): self.parser = parser @@ -25,9 +24,9 @@ class ParserPuppet(object): return self.parser_state.feed_token(token, token.type == '$END') def exhaust_lexer(self): - """Try to feed the rest of the lexer state into the puppet. + """Try to feed the rest of the lexer state into the interactive parser. - Note that this modifies the puppet in place and does not feed an '$END' Token""" + Note that this modifies the instance in place and does not feed an '$END' Token""" for token in self.lexer_state.lex(self.parser_state): self.parser_state.feed_token(token) @@ -38,9 +37,9 @@ class ParserPuppet(object): def __copy__(self): - """Create a new puppet with a separate state. + """Create a new interactive parser with a separate state. - Calls to feed_token() won't affect the old puppet, and vice-versa. + Calls to feed_token() won't affect the old instance, and vice-versa. """ return type(self)( self.parser, @@ -52,18 +51,19 @@ class ParserPuppet(object): return copy(self) def __eq__(self, other): - if not isinstance(other, ParserPuppet): + if not isinstance(other, InteractiveParser): return False return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state def as_immutable(self): + """Convert to an ``ImmutableInteractiveParser``.""" p = copy(self) - return ImmutableParserPuppet(p.parser, p.parser_state, p.lexer_state) + return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state) def pretty(self): """Print the output of ``choices()`` in a way that's easier to read.""" - out = ["Puppet choices:"] + out = ["Parser choices:"] for k, v in self.choices().items(): out.append('\t- %s -> %s' % (k, v)) out.append('stack size: %s' % len(self.parser_state.state_stack)) @@ -79,12 +79,13 @@ class ParserPuppet(object): return self.parser_state.parse_conf.parse_table.states[self.parser_state.position] def accepts(self): + """Returns the set of possible tokens that will advance the parser into a new valid state.""" accepts = set() for t in self.choices(): if t.isupper(): # is terminal? - new_puppet = copy(self) + new_cursor = copy(self) try: - new_puppet.feed_token(Token(t, '')) + new_cursor.feed_token(Token(t, '')) except UnexpectedToken: pass else: @@ -92,12 +93,16 @@ class ParserPuppet(object): return accepts def resume_parse(self): - """Resume parsing from the current puppet state.""" + """Resume automated parsing from the current state.""" return self.parser.parse_from_state(self.parser_state) -class ImmutableParserPuppet(ParserPuppet): +class ImmutableInteractiveParser(InteractiveParser): + """Same as ``InteractiveParser``, but operations create a new instance instead + of changing it in-place. + """ + result = None def __hash__(self): @@ -105,14 +110,23 @@ class ImmutableParserPuppet(ParserPuppet): def feed_token(self, token): c = copy(self) - c.result = ParserPuppet.feed_token(c, token) + c.result = InteractiveParser.feed_token(c, token) return c def exhaust_lexer(self): - """Try to feed the rest of the lexer state into the puppet. + """Try to feed the rest of the lexer state into the parser. + + Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token""" + cursor = self.as_mutable() + cursor.exhaust_lexer() + return cursor.as_immutable() + + def as_mutable(self): + """Convert to an ``InteractiveParser``.""" + p = copy(self) + return InteractiveParser(p.parser, p.parser_state, p.lexer_state) + - Note that this returns a new ImmutableParserPuppet and does not feed an '$END' Token""" - res = copy(self) - for token in res.lexer_state.lex(res.parser_state): - res = res.parser_state.feed_token(token) - return res +# Deprecated class names for the interactive parser +ParserPuppet = InteractiveParser +ImmutableParserPuppet = ImmutableInteractiveParser diff --git a/tests/test_parser.py b/tests/test_parser.py index 90ee1ee..07cf79c 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -2404,32 +2404,32 @@ def _make_parser_test(LEXER, PARSER): B: "b" ''') - puppet = g.get_puppet() + ip = g.parse_interactive() - self.assertRaises(UnexpectedToken, puppet.feed_eof) - self.assertRaises(TypeError, puppet.exhaust_lexer) - puppet.feed_token(Token('A', 'a')) - res = puppet.feed_eof() + self.assertRaises(UnexpectedToken, ip.feed_eof) + self.assertRaises(TypeError, ip.exhaust_lexer) + ip.feed_token(Token('A', 'a')) + res = ip.feed_eof() self.assertEqual(res, Tree('start', ['a'])) - puppet = g.get_puppet("ab") + ip = g.parse_interactive("ab") - puppet.exhaust_lexer() + ip.exhaust_lexer() - puppet_copy = puppet.copy() - self.assertEqual(puppet_copy.parser_state, puppet.parser_state) - self.assertEqual(puppet_copy.lexer_state.state, puppet.lexer_state.state) - self.assertIsNot(puppet_copy.parser_state, puppet.parser_state) - self.assertIsNot(puppet_copy.lexer_state.state, puppet.lexer_state.state) - self.assertIsNot(puppet_copy.lexer_state.state.line_ctr, puppet.lexer_state.state.line_ctr) + ip_copy = ip.copy() + self.assertEqual(ip_copy.parser_state, ip.parser_state) + self.assertEqual(ip_copy.lexer_state.state, ip.lexer_state.state) + self.assertIsNot(ip_copy.parser_state, ip.parser_state) + self.assertIsNot(ip_copy.lexer_state.state, ip.lexer_state.state) + self.assertIsNot(ip_copy.lexer_state.state.line_ctr, ip.lexer_state.state.line_ctr) - res = puppet.feed_eof(puppet.lexer_state.state.last_token) + res = ip.feed_eof(ip.lexer_state.state.last_token) self.assertEqual(res, Tree('start', ['a', 'b'])) - self.assertRaises(UnexpectedToken ,puppet.feed_eof) + self.assertRaises(UnexpectedToken ,ip.feed_eof) - self.assertRaises(UnexpectedToken, puppet_copy.feed_token, Token('A', 'a')) - puppet_copy.feed_token(Token('B', 'b')) - res = puppet_copy.feed_eof() + self.assertRaises(UnexpectedToken, ip_copy.feed_token, Token('A', 'a')) + ip_copy.feed_token(Token('B', 'b')) + res = ip_copy.feed_eof() self.assertEqual(res, Tree('start', ['a', 'b', 'b'])) @unittest.skipIf(PARSER!='lalr', "Puppet error handling only works with LALR for now")