| @@ -69,8 +69,8 @@ UnexpectedInput | |||
| InteractiveParser | |||
| ----------------- | |||
| .. autoclass:: lark.parsers.lalr_puppet.InteractiveParser | |||
| .. autoclass:: lark.parsers.lalr_interactive_parser.InteractiveParser | |||
| :members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts | |||
| .. autoclass:: lark.parsers.lalr_puppet.ImmutableInteractiveParser | |||
| .. autoclass:: lark.parsers.lalr_interactive_parser.ImmutableInteractiveParser | |||
| :members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts | |||
| @@ -8,7 +8,7 @@ | |||
| - EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md)) | |||
| - Builds a parse-tree (AST) automagically based on the grammar | |||
| - Stand-alone parser generator - create a small independent parser to embed in your project. | |||
| - Flexible error handling by using a "puppet parser" mechanism (LALR only) | |||
| - Flexible error handling by using an interactive parser interface (LALR only) | |||
| - Automatic line & column tracking (for both tokens and matched rules) | |||
| - Automatic terminal collision resolution | |||
| - Standard library of terminals (strings, numbers, names, etc.) | |||
| @@ -1,11 +1,11 @@ | |||
| """ | |||
| Error handling with a puppet | |||
| ================================== | |||
| Error handling using an interactive parser | |||
| ========================================== | |||
| This example demonstrates error handling using a parsing puppet in LALR | |||
| This example demonstrates error handling using an interactive parser in LALR | |||
| When the parser encounters an UnexpectedToken exception, it creates a | |||
| parsing puppet with the current parse-state, and lets you control how | |||
| an interactive parser with the current parse-state, and lets you control how | |||
| to proceed step-by-step. When you've achieved the correct parse-state, | |||
| you can resume the run by returning True. | |||
| """ | |||
| @@ -20,8 +20,8 @@ def ignore_errors(e): | |||
| return True | |||
| elif e.token.type == 'SIGNED_NUMBER': | |||
| # Try to feed a comma and retry the number | |||
| e.puppet.feed_token(Token('COMMA', ',')) | |||
| e.puppet.feed_token(e.token) | |||
| e.interactive_parser.feed_token(Token('COMMA', ',')) | |||
| e.interactive_parser.feed_token(e.token) | |||
| return True | |||
| # Unhandled error. Will stop parse and raise exception | |||
| @@ -3,7 +3,7 @@ | |||
| from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set | |||
| from .tree import Tree | |||
| from .lexer import Token | |||
| from .parsers.lalr_puppet import ParserPuppet | |||
| from .parsers.lalr_interactive_parser import InteractiveParser | |||
| class LarkError(Exception): | |||
| pass | |||
| @@ -52,7 +52,7 @@ class UnexpectedInput(LarkError): | |||
| class UnexpectedToken(ParseError, UnexpectedInput): | |||
| expected: Set[str] | |||
| considered_rules: Set[str] | |||
| puppet: ParserPuppet | |||
| interactive_parser: InteractiveParser | |||
| accepts: Set[str] | |||
| class UnexpectedCharacters(LexError, UnexpectedInput): | |||
| @@ -5,7 +5,7 @@ from typing import ( | |||
| Literal, Protocol, Tuple, Iterable, | |||
| ) | |||
| from .parsers.lalr_puppet import ParserPuppet | |||
| from .parsers.lalr_interactive_parser import InteractiveParser | |||
| from .visitors import Transformer | |||
| from .lexer import Token, Lexer, TerminalDef | |||
| from .tree import Tree | |||
| @@ -91,7 +91,7 @@ class Lark: | |||
| def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: | |||
| ... | |||
| def parse_interactive(self, text: str = None, start: Optional[str] = None) -> ParserPuppet: | |||
| def parse_interactive(self, text: str = None, start: Optional[str] = None) -> InteractiveParser: | |||
| ... | |||
| @classmethod | |||
| @@ -1,43 +0,0 @@ | |||
| from typing import Set, Dict, Any | |||
| from lark import Token, Tree | |||
| class ParserPuppet(object): | |||
| """ | |||
| Provides an interface to interactively step through the parser (LALR(1) only for now) | |||
| Accessible via `UnexpectedToken.puppet` (raised by the parser on token error) | |||
| """ | |||
| parser: Any | |||
| parser_state: Any | |||
| lexer_state: Any | |||
| def feed_token(self, token: Token) -> Any: ... | |||
| def exhaust_lexer(self) -> None: ... | |||
| def feed_eof(self, last_token: Token = None) -> Any: ... | |||
| def copy(self) -> ParserPuppet: ... | |||
| def as_immutable(self) -> ImmutableParserPuppet: ... | |||
| def pretty(self) -> str: ... | |||
| def choices(self) -> Dict[str, Any]: ... | |||
| def accepts(self) -> Set[str]: ... | |||
| def resume_parse(self) -> Tree: ... | |||
| class ImmutableParserPuppet(ParserPuppet): | |||
| result: Any = None | |||
| def feed_token(self, token: Token) -> ImmutableParserPuppet: ... | |||
| def exhaust_lexer(self) -> ImmutableParserPuppet: ... | |||
| def feed_eof(self, last_token: Token = None) -> ImmutableParserPuppet: ... | |||
| @@ -542,7 +542,7 @@ class Lark(Serialize): | |||
| text (str): Text to be parsed. | |||
| start (str, optional): Required if Lark was given multiple possible start symbols (using the start option). | |||
| on_error (function, optional): if provided, will be called on UnexpectedToken error. Return true to resume parsing. | |||
| LALR only. See examples/advanced/error_puppet.py for an example of how to use on_error. | |||
| LALR only. See examples/advanced/error_handling.py for an example of how to use on_error. | |||
| Returns: | |||
| If a transformer is supplied to ``__init__``, returns whatever is the | |||
| @@ -0,0 +1,132 @@ | |||
| # This module provides a LALR interactive parser, which is used for debugging and error handling | |||
| from copy import copy | |||
| from .. import Token | |||
| from ..exceptions import UnexpectedToken | |||
| class InteractiveParser(object): | |||
| """InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR. | |||
| For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``. | |||
| """ | |||
| def __init__(self, parser, parser_state, lexer_state): | |||
| self.parser = parser | |||
| self.parser_state = parser_state | |||
| self.lexer_state = lexer_state | |||
| def feed_token(self, token): | |||
| """Feed the parser with a token, and advance it to the next state, as if it received it from the lexer. | |||
| Note that ``token`` has to be an instance of ``Token``. | |||
| """ | |||
| return self.parser_state.feed_token(token, token.type == '$END') | |||
| def exhaust_lexer(self): | |||
| """Try to feed the rest of the lexer state into the interactive parser. | |||
| Note that this modifies the instance in place and does not feed an '$END' Token""" | |||
| for token in self.lexer_state.lex(self.parser_state): | |||
| self.parser_state.feed_token(token) | |||
| def feed_eof(self, last_token=None): | |||
| """Feed a '$END' Token. Borrows from 'last_token' if given.""" | |||
| eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1) | |||
| return self.feed_token(eof) | |||
| def __copy__(self): | |||
| """Create a new interactive parser with a separate state. | |||
| Calls to feed_token() won't affect the old instance, and vice-versa. | |||
| """ | |||
| return type(self)( | |||
| self.parser, | |||
| copy(self.parser_state), | |||
| copy(self.lexer_state), | |||
| ) | |||
| def copy(self): | |||
| return copy(self) | |||
| def __eq__(self, other): | |||
| if not isinstance(other, InteractiveParser): | |||
| return False | |||
| return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state | |||
| def as_immutable(self): | |||
| """Convert to an ``ImmutableInteractiveParser``.""" | |||
| p = copy(self) | |||
| return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state) | |||
| def pretty(self): | |||
| """Print the output of ``choices()`` in a way that's easier to read.""" | |||
| out = ["Parser choices:"] | |||
| for k, v in self.choices().items(): | |||
| out.append('\t- %s -> %s' % (k, v)) | |||
| out.append('stack size: %s' % len(self.parser_state.state_stack)) | |||
| return '\n'.join(out) | |||
| def choices(self): | |||
| """Returns a dictionary of token types, matched to their action in the parser. | |||
| Only returns token types that are accepted by the current state. | |||
| Updated by ``feed_token()``. | |||
| """ | |||
| return self.parser_state.parse_conf.parse_table.states[self.parser_state.position] | |||
| def accepts(self): | |||
| """Returns the set of possible tokens that will advance the parser into a new valid state.""" | |||
| accepts = set() | |||
| for t in self.choices(): | |||
| if t.isupper(): # is terminal? | |||
| new_cursor = copy(self) | |||
| try: | |||
| new_cursor.feed_token(Token(t, '')) | |||
| except UnexpectedToken: | |||
| pass | |||
| else: | |||
| accepts.add(t) | |||
| return accepts | |||
| def resume_parse(self): | |||
| """Resume automated parsing from the current state.""" | |||
| return self.parser.parse_from_state(self.parser_state) | |||
| class ImmutableInteractiveParser(InteractiveParser): | |||
| """Same as ``InteractiveParser``, but operations create a new instance instead | |||
| of changing it in-place. | |||
| """ | |||
| result = None | |||
| def __hash__(self): | |||
| return hash((self.parser_state, self.lexer_state)) | |||
| def feed_token(self, token): | |||
| c = copy(self) | |||
| c.result = InteractiveParser.feed_token(c, token) | |||
| return c | |||
| def exhaust_lexer(self): | |||
| """Try to feed the rest of the lexer state into the parser. | |||
| Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token""" | |||
| cursor = self.as_mutable() | |||
| cursor.exhaust_lexer() | |||
| return cursor.as_immutable() | |||
| def as_mutable(self): | |||
| """Convert to an ``InteractiveParser``.""" | |||
| p = copy(self) | |||
| return InteractiveParser(p.parser, p.parser_state, p.lexer_state) | |||
| # Deprecated class names for the interactive parser | |||
| ParserPuppet = InteractiveParser | |||
| ImmutableParserPuppet = ImmutableInteractiveParser | |||
| @@ -8,7 +8,7 @@ from ..lexer import Token | |||
| from ..utils import Serialize | |||
| from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | |||
| from .lalr_puppet import InteractiveParser | |||
| from .lalr_interactive_parser import InteractiveParser | |||
| from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken | |||
| ###{standalone | |||
| @@ -1,132 +1,3 @@ | |||
| # This module provides a LALR interactive parser, which is used for debugging and error handling | |||
| from copy import copy | |||
| from .. import Token | |||
| from ..exceptions import UnexpectedToken | |||
| class InteractiveParser(object): | |||
| """InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR. | |||
| For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``. | |||
| """ | |||
| def __init__(self, parser, parser_state, lexer_state): | |||
| self.parser = parser | |||
| self.parser_state = parser_state | |||
| self.lexer_state = lexer_state | |||
| def feed_token(self, token): | |||
| """Feed the parser with a token, and advance it to the next state, as if it received it from the lexer. | |||
| Note that ``token`` has to be an instance of ``Token``. | |||
| """ | |||
| return self.parser_state.feed_token(token, token.type == '$END') | |||
| def exhaust_lexer(self): | |||
| """Try to feed the rest of the lexer state into the interactive parser. | |||
| Note that this modifies the instance in place and does not feed an '$END' Token""" | |||
| for token in self.lexer_state.lex(self.parser_state): | |||
| self.parser_state.feed_token(token) | |||
| def feed_eof(self, last_token=None): | |||
| """Feed a '$END' Token. Borrows from 'last_token' if given.""" | |||
| eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1) | |||
| return self.feed_token(eof) | |||
| def __copy__(self): | |||
| """Create a new interactive parser with a separate state. | |||
| Calls to feed_token() won't affect the old instance, and vice-versa. | |||
| """ | |||
| return type(self)( | |||
| self.parser, | |||
| copy(self.parser_state), | |||
| copy(self.lexer_state), | |||
| ) | |||
| def copy(self): | |||
| return copy(self) | |||
| def __eq__(self, other): | |||
| if not isinstance(other, InteractiveParser): | |||
| return False | |||
| return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state | |||
| def as_immutable(self): | |||
| """Convert to an ``ImmutableInteractiveParser``.""" | |||
| p = copy(self) | |||
| return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state) | |||
| def pretty(self): | |||
| """Print the output of ``choices()`` in a way that's easier to read.""" | |||
| out = ["Parser choices:"] | |||
| for k, v in self.choices().items(): | |||
| out.append('\t- %s -> %s' % (k, v)) | |||
| out.append('stack size: %s' % len(self.parser_state.state_stack)) | |||
| return '\n'.join(out) | |||
| def choices(self): | |||
| """Returns a dictionary of token types, matched to their action in the parser. | |||
| Only returns token types that are accepted by the current state. | |||
| Updated by ``feed_token()``. | |||
| """ | |||
| return self.parser_state.parse_conf.parse_table.states[self.parser_state.position] | |||
| def accepts(self): | |||
| """Returns the set of possible tokens that will advance the parser into a new valid state.""" | |||
| accepts = set() | |||
| for t in self.choices(): | |||
| if t.isupper(): # is terminal? | |||
| new_cursor = copy(self) | |||
| try: | |||
| new_cursor.feed_token(Token(t, '')) | |||
| except UnexpectedToken: | |||
| pass | |||
| else: | |||
| accepts.add(t) | |||
| return accepts | |||
| def resume_parse(self): | |||
| """Resume automated parsing from the current state.""" | |||
| return self.parser.parse_from_state(self.parser_state) | |||
| class ImmutableInteractiveParser(InteractiveParser): | |||
| """Same as ``InteractiveParser``, but operations create a new instance instead | |||
| of changing it in-place. | |||
| """ | |||
| result = None | |||
| def __hash__(self): | |||
| return hash((self.parser_state, self.lexer_state)) | |||
| def feed_token(self, token): | |||
| c = copy(self) | |||
| c.result = InteractiveParser.feed_token(c, token) | |||
| return c | |||
| def exhaust_lexer(self): | |||
| """Try to feed the rest of the lexer state into the parser. | |||
| Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token""" | |||
| cursor = self.as_mutable() | |||
| cursor.exhaust_lexer() | |||
| return cursor.as_immutable() | |||
| def as_mutable(self): | |||
| """Convert to an ``InteractiveParser``.""" | |||
| p = copy(self) | |||
| return InteractiveParser(p.parser, p.parser_state, p.lexer_state) | |||
| # Deprecated class names for the interactive parser | |||
| ParserPuppet = InteractiveParser | |||
| ImmutableParserPuppet = ImmutableInteractiveParser | |||
| # Deprecated | |||
| from .lalr_interactive_parser import ParserPuppet, ImmutableParserPuppet | |||
| @@ -2395,8 +2395,8 @@ def _make_parser_test(LEXER, PARSER): | |||
| """, regex=True) | |||
| self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') | |||
| @unittest.skipIf(PARSER!='lalr', "Puppet is only implemented for LALR at the moment") | |||
| def test_parser_puppet(self): | |||
| @unittest.skipIf(PARSER!='lalr', "interactive_parser is only implemented for LALR at the moment") | |||
| def test_parser_interactive_parser(self): | |||
| g = _Lark(r''' | |||
| start: A+ B* | |||
| @@ -2432,8 +2432,8 @@ def _make_parser_test(LEXER, PARSER): | |||
| res = ip_copy.feed_eof() | |||
| self.assertEqual(res, Tree('start', ['a', 'b', 'b'])) | |||
| @unittest.skipIf(PARSER!='lalr', "Puppet error handling only works with LALR for now") | |||
| def test_error_with_puppet(self): | |||
| @unittest.skipIf(PARSER!='lalr', "interactive_parser error handling only works with LALR for now") | |||
| def test_error_with_interactive_parser(self): | |||
| def ignore_errors(e): | |||
| if isinstance(e, UnexpectedCharacters): | |||
| # Skip bad character | |||
| @@ -2445,8 +2445,8 @@ def _make_parser_test(LEXER, PARSER): | |||
| return True | |||
| elif e.token.type == 'SIGNED_NUMBER': | |||
| # Try to feed a comma and retry the number | |||
| e.puppet.feed_token(Token('COMMA', ',')) | |||
| e.puppet.feed_token(e.token) | |||
| e.interactive_parser.feed_token(Token('COMMA', ',')) | |||
| e.interactive_parser.feed_token(e.token) | |||
| return True | |||