From 22c289126f1763fd3b7eb1e73808434dad25a239 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Thu, 15 Apr 2021 13:08:48 -0500 Subject: [PATCH] Rename ParserPuppet -> InteractiveParser --- docs/classes.rst | 4 +- docs/features.md | 2 +- .../{error_puppet.py => error_handling.py} | 12 +- lark-stubs/exceptions.pyi | 4 +- lark-stubs/lark.pyi | 4 +- lark-stubs/parsers/lalr_puppet.pyi | 43 ------ lark/lark.py | 2 +- lark/parsers/lalr_interactive_parser.py | 132 +++++++++++++++++ lark/parsers/lalr_parser.py | 2 +- lark/parsers/lalr_puppet.py | 135 +----------------- tests/test_parser.py | 12 +- 11 files changed, 156 insertions(+), 196 deletions(-) rename examples/advanced/{error_puppet.py => error_handling.py} (67%) delete mode 100644 lark-stubs/parsers/lalr_puppet.pyi create mode 100644 lark/parsers/lalr_interactive_parser.py diff --git a/docs/classes.rst b/docs/classes.rst index 8f64083..2ff46a2 100644 --- a/docs/classes.rst +++ b/docs/classes.rst @@ -69,8 +69,8 @@ UnexpectedInput InteractiveParser ----------------- -.. autoclass:: lark.parsers.lalr_puppet.InteractiveParser +.. autoclass:: lark.parsers.lalr_interactive_parser.InteractiveParser :members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts -.. autoclass:: lark.parsers.lalr_puppet.ImmutableInteractiveParser +.. autoclass:: lark.parsers.lalr_interactive_parser.ImmutableInteractiveParser :members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts \ No newline at end of file diff --git a/docs/features.md b/docs/features.md index fb272aa..a187957 100644 --- a/docs/features.md +++ b/docs/features.md @@ -8,7 +8,7 @@ - EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md)) - Builds a parse-tree (AST) automagically based on the grammar - Stand-alone parser generator - create a small independent parser to embed in your project. - - Flexible error handling by using a "puppet parser" mechanism (LALR only) + - Flexible error handling by using an interactive parser interface (LALR only) - Automatic line & column tracking (for both tokens and matched rules) - Automatic terminal collision resolution - Standard library of terminals (strings, numbers, names, etc.) diff --git a/examples/advanced/error_puppet.py b/examples/advanced/error_handling.py similarity index 67% rename from examples/advanced/error_puppet.py rename to examples/advanced/error_handling.py index 1b1bfbc..1fb9be3 100644 --- a/examples/advanced/error_puppet.py +++ b/examples/advanced/error_handling.py @@ -1,11 +1,11 @@ """ -Error handling with a puppet -================================== +Error handling using an interactive parser +========================================== -This example demonstrates error handling using a parsing puppet in LALR +This example demonstrates error handling using an interactive parser in LALR When the parser encounters an UnexpectedToken exception, it creates a -parsing puppet with the current parse-state, and lets you control how +an interactive parser with the current parse-state, and lets you control how to proceed step-by-step. When you've achieved the correct parse-state, you can resume the run by returning True. """ @@ -20,8 +20,8 @@ def ignore_errors(e): return True elif e.token.type == 'SIGNED_NUMBER': # Try to feed a comma and retry the number - e.puppet.feed_token(Token('COMMA', ',')) - e.puppet.feed_token(e.token) + e.interactive_parser.feed_token(Token('COMMA', ',')) + e.interactive_parser.feed_token(e.token) return True # Unhandled error. Will stop parse and raise exception diff --git a/lark-stubs/exceptions.pyi b/lark-stubs/exceptions.pyi index c2f3a6e..1c04fa8 100644 --- a/lark-stubs/exceptions.pyi +++ b/lark-stubs/exceptions.pyi @@ -3,7 +3,7 @@ from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set from .tree import Tree from .lexer import Token -from .parsers.lalr_puppet import ParserPuppet +from .parsers.lalr_interactive_parser import InteractiveParser class LarkError(Exception): pass @@ -52,7 +52,7 @@ class UnexpectedInput(LarkError): class UnexpectedToken(ParseError, UnexpectedInput): expected: Set[str] considered_rules: Set[str] - puppet: ParserPuppet + interactive_parser: InteractiveParser accepts: Set[str] class UnexpectedCharacters(LexError, UnexpectedInput): diff --git a/lark-stubs/lark.pyi b/lark-stubs/lark.pyi index 10a5572..155b774 100644 --- a/lark-stubs/lark.pyi +++ b/lark-stubs/lark.pyi @@ -5,7 +5,7 @@ from typing import ( Literal, Protocol, Tuple, Iterable, ) -from .parsers.lalr_puppet import ParserPuppet +from .parsers.lalr_interactive_parser import InteractiveParser from .visitors import Transformer from .lexer import Token, Lexer, TerminalDef from .tree import Tree @@ -91,7 +91,7 @@ class Lark: def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: ... - def parse_interactive(self, text: str = None, start: Optional[str] = None) -> ParserPuppet: + def parse_interactive(self, text: str = None, start: Optional[str] = None) -> InteractiveParser: ... @classmethod diff --git a/lark-stubs/parsers/lalr_puppet.pyi b/lark-stubs/parsers/lalr_puppet.pyi deleted file mode 100644 index d50ff15..0000000 --- a/lark-stubs/parsers/lalr_puppet.pyi +++ /dev/null @@ -1,43 +0,0 @@ -from typing import Set, Dict, Any - -from lark import Token, Tree - - -class ParserPuppet(object): - """ - Provides an interface to interactively step through the parser (LALR(1) only for now) - - Accessible via `UnexpectedToken.puppet` (raised by the parser on token error) - """ - parser: Any - parser_state: Any - lexer_state: Any - - - def feed_token(self, token: Token) -> Any: ... - - def exhaust_lexer(self) -> None: ... - - def feed_eof(self, last_token: Token = None) -> Any: ... - - def copy(self) -> ParserPuppet: ... - - def as_immutable(self) -> ImmutableParserPuppet: ... - - def pretty(self) -> str: ... - - def choices(self) -> Dict[str, Any]: ... - - def accepts(self) -> Set[str]: ... - - def resume_parse(self) -> Tree: ... - - -class ImmutableParserPuppet(ParserPuppet): - result: Any = None - - def feed_token(self, token: Token) -> ImmutableParserPuppet: ... - - def exhaust_lexer(self) -> ImmutableParserPuppet: ... - - def feed_eof(self, last_token: Token = None) -> ImmutableParserPuppet: ... diff --git a/lark/lark.py b/lark/lark.py index 82c2d36..df51e43 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -542,7 +542,7 @@ class Lark(Serialize): text (str): Text to be parsed. start (str, optional): Required if Lark was given multiple possible start symbols (using the start option). on_error (function, optional): if provided, will be called on UnexpectedToken error. Return true to resume parsing. - LALR only. See examples/advanced/error_puppet.py for an example of how to use on_error. + LALR only. See examples/advanced/error_handling.py for an example of how to use on_error. Returns: If a transformer is supplied to ``__init__``, returns whatever is the diff --git a/lark/parsers/lalr_interactive_parser.py b/lark/parsers/lalr_interactive_parser.py new file mode 100644 index 0000000..ce596b5 --- /dev/null +++ b/lark/parsers/lalr_interactive_parser.py @@ -0,0 +1,132 @@ +# This module provides a LALR interactive parser, which is used for debugging and error handling + +from copy import copy + +from .. import Token +from ..exceptions import UnexpectedToken + + +class InteractiveParser(object): + """InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR. + + For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``. + """ + def __init__(self, parser, parser_state, lexer_state): + self.parser = parser + self.parser_state = parser_state + self.lexer_state = lexer_state + + def feed_token(self, token): + """Feed the parser with a token, and advance it to the next state, as if it received it from the lexer. + + Note that ``token`` has to be an instance of ``Token``. + """ + return self.parser_state.feed_token(token, token.type == '$END') + + def exhaust_lexer(self): + """Try to feed the rest of the lexer state into the interactive parser. + + Note that this modifies the instance in place and does not feed an '$END' Token""" + for token in self.lexer_state.lex(self.parser_state): + self.parser_state.feed_token(token) + + def feed_eof(self, last_token=None): + """Feed a '$END' Token. Borrows from 'last_token' if given.""" + eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1) + return self.feed_token(eof) + + + def __copy__(self): + """Create a new interactive parser with a separate state. + + Calls to feed_token() won't affect the old instance, and vice-versa. + """ + return type(self)( + self.parser, + copy(self.parser_state), + copy(self.lexer_state), + ) + + def copy(self): + return copy(self) + + def __eq__(self, other): + if not isinstance(other, InteractiveParser): + return False + + return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state + + def as_immutable(self): + """Convert to an ``ImmutableInteractiveParser``.""" + p = copy(self) + return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state) + + def pretty(self): + """Print the output of ``choices()`` in a way that's easier to read.""" + out = ["Parser choices:"] + for k, v in self.choices().items(): + out.append('\t- %s -> %s' % (k, v)) + out.append('stack size: %s' % len(self.parser_state.state_stack)) + return '\n'.join(out) + + def choices(self): + """Returns a dictionary of token types, matched to their action in the parser. + + Only returns token types that are accepted by the current state. + + Updated by ``feed_token()``. + """ + return self.parser_state.parse_conf.parse_table.states[self.parser_state.position] + + def accepts(self): + """Returns the set of possible tokens that will advance the parser into a new valid state.""" + accepts = set() + for t in self.choices(): + if t.isupper(): # is terminal? + new_cursor = copy(self) + try: + new_cursor.feed_token(Token(t, '')) + except UnexpectedToken: + pass + else: + accepts.add(t) + return accepts + + def resume_parse(self): + """Resume automated parsing from the current state.""" + return self.parser.parse_from_state(self.parser_state) + + + +class ImmutableInteractiveParser(InteractiveParser): + """Same as ``InteractiveParser``, but operations create a new instance instead + of changing it in-place. + """ + + result = None + + def __hash__(self): + return hash((self.parser_state, self.lexer_state)) + + def feed_token(self, token): + c = copy(self) + c.result = InteractiveParser.feed_token(c, token) + return c + + def exhaust_lexer(self): + """Try to feed the rest of the lexer state into the parser. + + Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token""" + cursor = self.as_mutable() + cursor.exhaust_lexer() + return cursor.as_immutable() + + def as_mutable(self): + """Convert to an ``InteractiveParser``.""" + p = copy(self) + return InteractiveParser(p.parser, p.parser_state, p.lexer_state) + + +# Deprecated class names for the interactive parser +ParserPuppet = InteractiveParser +ImmutableParserPuppet = ImmutableInteractiveParser diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 43eb861..fe40791 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -8,7 +8,7 @@ from ..lexer import Token from ..utils import Serialize from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable -from .lalr_puppet import InteractiveParser +from .lalr_interactive_parser import InteractiveParser from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken ###{standalone diff --git a/lark/parsers/lalr_puppet.py b/lark/parsers/lalr_puppet.py index ce596b5..6ea6d89 100644 --- a/lark/parsers/lalr_puppet.py +++ b/lark/parsers/lalr_puppet.py @@ -1,132 +1,3 @@ -# This module provides a LALR interactive parser, which is used for debugging and error handling - -from copy import copy - -from .. import Token -from ..exceptions import UnexpectedToken - - -class InteractiveParser(object): - """InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR. - - For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``. - """ - def __init__(self, parser, parser_state, lexer_state): - self.parser = parser - self.parser_state = parser_state - self.lexer_state = lexer_state - - def feed_token(self, token): - """Feed the parser with a token, and advance it to the next state, as if it received it from the lexer. - - Note that ``token`` has to be an instance of ``Token``. - """ - return self.parser_state.feed_token(token, token.type == '$END') - - def exhaust_lexer(self): - """Try to feed the rest of the lexer state into the interactive parser. - - Note that this modifies the instance in place and does not feed an '$END' Token""" - for token in self.lexer_state.lex(self.parser_state): - self.parser_state.feed_token(token) - - def feed_eof(self, last_token=None): - """Feed a '$END' Token. Borrows from 'last_token' if given.""" - eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1) - return self.feed_token(eof) - - - def __copy__(self): - """Create a new interactive parser with a separate state. - - Calls to feed_token() won't affect the old instance, and vice-versa. - """ - return type(self)( - self.parser, - copy(self.parser_state), - copy(self.lexer_state), - ) - - def copy(self): - return copy(self) - - def __eq__(self, other): - if not isinstance(other, InteractiveParser): - return False - - return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state - - def as_immutable(self): - """Convert to an ``ImmutableInteractiveParser``.""" - p = copy(self) - return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state) - - def pretty(self): - """Print the output of ``choices()`` in a way that's easier to read.""" - out = ["Parser choices:"] - for k, v in self.choices().items(): - out.append('\t- %s -> %s' % (k, v)) - out.append('stack size: %s' % len(self.parser_state.state_stack)) - return '\n'.join(out) - - def choices(self): - """Returns a dictionary of token types, matched to their action in the parser. - - Only returns token types that are accepted by the current state. - - Updated by ``feed_token()``. - """ - return self.parser_state.parse_conf.parse_table.states[self.parser_state.position] - - def accepts(self): - """Returns the set of possible tokens that will advance the parser into a new valid state.""" - accepts = set() - for t in self.choices(): - if t.isupper(): # is terminal? - new_cursor = copy(self) - try: - new_cursor.feed_token(Token(t, '')) - except UnexpectedToken: - pass - else: - accepts.add(t) - return accepts - - def resume_parse(self): - """Resume automated parsing from the current state.""" - return self.parser.parse_from_state(self.parser_state) - - - -class ImmutableInteractiveParser(InteractiveParser): - """Same as ``InteractiveParser``, but operations create a new instance instead - of changing it in-place. - """ - - result = None - - def __hash__(self): - return hash((self.parser_state, self.lexer_state)) - - def feed_token(self, token): - c = copy(self) - c.result = InteractiveParser.feed_token(c, token) - return c - - def exhaust_lexer(self): - """Try to feed the rest of the lexer state into the parser. - - Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token""" - cursor = self.as_mutable() - cursor.exhaust_lexer() - return cursor.as_immutable() - - def as_mutable(self): - """Convert to an ``InteractiveParser``.""" - p = copy(self) - return InteractiveParser(p.parser, p.parser_state, p.lexer_state) - - -# Deprecated class names for the interactive parser -ParserPuppet = InteractiveParser -ImmutableParserPuppet = ImmutableInteractiveParser +# Deprecated + +from .lalr_interactive_parser import ParserPuppet, ImmutableParserPuppet \ No newline at end of file diff --git a/tests/test_parser.py b/tests/test_parser.py index 07cf79c..18b70fc 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -2395,8 +2395,8 @@ def _make_parser_test(LEXER, PARSER): """, regex=True) self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') - @unittest.skipIf(PARSER!='lalr', "Puppet is only implemented for LALR at the moment") - def test_parser_puppet(self): + @unittest.skipIf(PARSER!='lalr', "interactive_parser is only implemented for LALR at the moment") + def test_parser_interactive_parser(self): g = _Lark(r''' start: A+ B* @@ -2432,8 +2432,8 @@ def _make_parser_test(LEXER, PARSER): res = ip_copy.feed_eof() self.assertEqual(res, Tree('start', ['a', 'b', 'b'])) - @unittest.skipIf(PARSER!='lalr', "Puppet error handling only works with LALR for now") - def test_error_with_puppet(self): + @unittest.skipIf(PARSER!='lalr', "interactive_parser error handling only works with LALR for now") + def test_error_with_interactive_parser(self): def ignore_errors(e): if isinstance(e, UnexpectedCharacters): # Skip bad character @@ -2445,8 +2445,8 @@ def _make_parser_test(LEXER, PARSER): return True elif e.token.type == 'SIGNED_NUMBER': # Try to feed a comma and retry the number - e.puppet.feed_token(Token('COMMA', ',')) - e.puppet.feed_token(e.token) + e.interactive_parser.feed_token(Token('COMMA', ',')) + e.interactive_parser.feed_token(e.token) return True