@@ -69,8 +69,8 @@ UnexpectedInput | |||
InteractiveParser | |||
----------------- | |||
.. autoclass:: lark.parsers.lalr_puppet.InteractiveParser | |||
.. autoclass:: lark.parsers.lalr_interactive_parser.InteractiveParser | |||
:members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts | |||
.. autoclass:: lark.parsers.lalr_puppet.ImmutableInteractiveParser | |||
.. autoclass:: lark.parsers.lalr_interactive_parser.ImmutableInteractiveParser | |||
:members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts |
@@ -8,7 +8,7 @@ | |||
- EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md)) | |||
- Builds a parse-tree (AST) automagically based on the grammar | |||
- Stand-alone parser generator - create a small independent parser to embed in your project. | |||
- Flexible error handling by using a "puppet parser" mechanism (LALR only) | |||
- Flexible error handling by using an interactive parser interface (LALR only) | |||
- Automatic line & column tracking (for both tokens and matched rules) | |||
- Automatic terminal collision resolution | |||
- Standard library of terminals (strings, numbers, names, etc.) | |||
@@ -1,11 +1,11 @@ | |||
""" | |||
Error handling with a puppet | |||
================================== | |||
Error handling using an interactive parser | |||
========================================== | |||
This example demonstrates error handling using a parsing puppet in LALR | |||
This example demonstrates error handling using an interactive parser in LALR | |||
When the parser encounters an UnexpectedToken exception, it creates a | |||
parsing puppet with the current parse-state, and lets you control how | |||
an interactive parser with the current parse-state, and lets you control how | |||
to proceed step-by-step. When you've achieved the correct parse-state, | |||
you can resume the run by returning True. | |||
""" | |||
@@ -20,8 +20,8 @@ def ignore_errors(e): | |||
return True | |||
elif e.token.type == 'SIGNED_NUMBER': | |||
# Try to feed a comma and retry the number | |||
e.puppet.feed_token(Token('COMMA', ',')) | |||
e.puppet.feed_token(e.token) | |||
e.interactive_parser.feed_token(Token('COMMA', ',')) | |||
e.interactive_parser.feed_token(e.token) | |||
return True | |||
# Unhandled error. Will stop parse and raise exception |
@@ -3,7 +3,7 @@ | |||
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set | |||
from .tree import Tree | |||
from .lexer import Token | |||
from .parsers.lalr_puppet import ParserPuppet | |||
from .parsers.lalr_interactive_parser import InteractiveParser | |||
class LarkError(Exception): | |||
pass | |||
@@ -52,7 +52,7 @@ class UnexpectedInput(LarkError): | |||
class UnexpectedToken(ParseError, UnexpectedInput): | |||
expected: Set[str] | |||
considered_rules: Set[str] | |||
puppet: ParserPuppet | |||
interactive_parser: InteractiveParser | |||
accepts: Set[str] | |||
class UnexpectedCharacters(LexError, UnexpectedInput): | |||
@@ -5,7 +5,7 @@ from typing import ( | |||
Literal, Protocol, Tuple, Iterable, | |||
) | |||
from .parsers.lalr_puppet import ParserPuppet | |||
from .parsers.lalr_interactive_parser import InteractiveParser | |||
from .visitors import Transformer | |||
from .lexer import Token, Lexer, TerminalDef | |||
from .tree import Tree | |||
@@ -91,7 +91,7 @@ class Lark: | |||
def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: | |||
... | |||
def parse_interactive(self, text: str = None, start: Optional[str] = None) -> ParserPuppet: | |||
def parse_interactive(self, text: str = None, start: Optional[str] = None) -> InteractiveParser: | |||
... | |||
@classmethod | |||
@@ -1,43 +0,0 @@ | |||
from typing import Set, Dict, Any | |||
from lark import Token, Tree | |||
class ParserPuppet(object): | |||
""" | |||
Provides an interface to interactively step through the parser (LALR(1) only for now) | |||
Accessible via `UnexpectedToken.puppet` (raised by the parser on token error) | |||
""" | |||
parser: Any | |||
parser_state: Any | |||
lexer_state: Any | |||
def feed_token(self, token: Token) -> Any: ... | |||
def exhaust_lexer(self) -> None: ... | |||
def feed_eof(self, last_token: Token = None) -> Any: ... | |||
def copy(self) -> ParserPuppet: ... | |||
def as_immutable(self) -> ImmutableParserPuppet: ... | |||
def pretty(self) -> str: ... | |||
def choices(self) -> Dict[str, Any]: ... | |||
def accepts(self) -> Set[str]: ... | |||
def resume_parse(self) -> Tree: ... | |||
class ImmutableParserPuppet(ParserPuppet): | |||
result: Any = None | |||
def feed_token(self, token: Token) -> ImmutableParserPuppet: ... | |||
def exhaust_lexer(self) -> ImmutableParserPuppet: ... | |||
def feed_eof(self, last_token: Token = None) -> ImmutableParserPuppet: ... |
@@ -542,7 +542,7 @@ class Lark(Serialize): | |||
text (str): Text to be parsed. | |||
start (str, optional): Required if Lark was given multiple possible start symbols (using the start option). | |||
on_error (function, optional): if provided, will be called on UnexpectedToken error. Return true to resume parsing. | |||
LALR only. See examples/advanced/error_puppet.py for an example of how to use on_error. | |||
LALR only. See examples/advanced/error_handling.py for an example of how to use on_error. | |||
Returns: | |||
If a transformer is supplied to ``__init__``, returns whatever is the | |||
@@ -0,0 +1,132 @@ | |||
# This module provides a LALR interactive parser, which is used for debugging and error handling | |||
from copy import copy | |||
from .. import Token | |||
from ..exceptions import UnexpectedToken | |||
class InteractiveParser(object): | |||
"""InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR. | |||
For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``. | |||
""" | |||
def __init__(self, parser, parser_state, lexer_state): | |||
self.parser = parser | |||
self.parser_state = parser_state | |||
self.lexer_state = lexer_state | |||
def feed_token(self, token): | |||
"""Feed the parser with a token, and advance it to the next state, as if it received it from the lexer. | |||
Note that ``token`` has to be an instance of ``Token``. | |||
""" | |||
return self.parser_state.feed_token(token, token.type == '$END') | |||
def exhaust_lexer(self): | |||
"""Try to feed the rest of the lexer state into the interactive parser. | |||
Note that this modifies the instance in place and does not feed an '$END' Token""" | |||
for token in self.lexer_state.lex(self.parser_state): | |||
self.parser_state.feed_token(token) | |||
def feed_eof(self, last_token=None): | |||
"""Feed a '$END' Token. Borrows from 'last_token' if given.""" | |||
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1) | |||
return self.feed_token(eof) | |||
def __copy__(self): | |||
"""Create a new interactive parser with a separate state. | |||
Calls to feed_token() won't affect the old instance, and vice-versa. | |||
""" | |||
return type(self)( | |||
self.parser, | |||
copy(self.parser_state), | |||
copy(self.lexer_state), | |||
) | |||
def copy(self): | |||
return copy(self) | |||
def __eq__(self, other): | |||
if not isinstance(other, InteractiveParser): | |||
return False | |||
return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state | |||
def as_immutable(self): | |||
"""Convert to an ``ImmutableInteractiveParser``.""" | |||
p = copy(self) | |||
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state) | |||
def pretty(self): | |||
"""Print the output of ``choices()`` in a way that's easier to read.""" | |||
out = ["Parser choices:"] | |||
for k, v in self.choices().items(): | |||
out.append('\t- %s -> %s' % (k, v)) | |||
out.append('stack size: %s' % len(self.parser_state.state_stack)) | |||
return '\n'.join(out) | |||
def choices(self): | |||
"""Returns a dictionary of token types, matched to their action in the parser. | |||
Only returns token types that are accepted by the current state. | |||
Updated by ``feed_token()``. | |||
""" | |||
return self.parser_state.parse_conf.parse_table.states[self.parser_state.position] | |||
def accepts(self): | |||
"""Returns the set of possible tokens that will advance the parser into a new valid state.""" | |||
accepts = set() | |||
for t in self.choices(): | |||
if t.isupper(): # is terminal? | |||
new_cursor = copy(self) | |||
try: | |||
new_cursor.feed_token(Token(t, '')) | |||
except UnexpectedToken: | |||
pass | |||
else: | |||
accepts.add(t) | |||
return accepts | |||
def resume_parse(self): | |||
"""Resume automated parsing from the current state.""" | |||
return self.parser.parse_from_state(self.parser_state) | |||
class ImmutableInteractiveParser(InteractiveParser): | |||
"""Same as ``InteractiveParser``, but operations create a new instance instead | |||
of changing it in-place. | |||
""" | |||
result = None | |||
def __hash__(self): | |||
return hash((self.parser_state, self.lexer_state)) | |||
def feed_token(self, token): | |||
c = copy(self) | |||
c.result = InteractiveParser.feed_token(c, token) | |||
return c | |||
def exhaust_lexer(self): | |||
"""Try to feed the rest of the lexer state into the parser. | |||
Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token""" | |||
cursor = self.as_mutable() | |||
cursor.exhaust_lexer() | |||
return cursor.as_immutable() | |||
def as_mutable(self): | |||
"""Convert to an ``InteractiveParser``.""" | |||
p = copy(self) | |||
return InteractiveParser(p.parser, p.parser_state, p.lexer_state) | |||
# Deprecated class names for the interactive parser | |||
ParserPuppet = InteractiveParser | |||
ImmutableParserPuppet = ImmutableInteractiveParser |
@@ -8,7 +8,7 @@ from ..lexer import Token | |||
from ..utils import Serialize | |||
from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | |||
from .lalr_puppet import InteractiveParser | |||
from .lalr_interactive_parser import InteractiveParser | |||
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken | |||
###{standalone | |||
@@ -1,132 +1,3 @@ | |||
# This module provides a LALR interactive parser, which is used for debugging and error handling | |||
from copy import copy | |||
from .. import Token | |||
from ..exceptions import UnexpectedToken | |||
class InteractiveParser(object): | |||
"""InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR. | |||
For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``. | |||
""" | |||
def __init__(self, parser, parser_state, lexer_state): | |||
self.parser = parser | |||
self.parser_state = parser_state | |||
self.lexer_state = lexer_state | |||
def feed_token(self, token): | |||
"""Feed the parser with a token, and advance it to the next state, as if it received it from the lexer. | |||
Note that ``token`` has to be an instance of ``Token``. | |||
""" | |||
return self.parser_state.feed_token(token, token.type == '$END') | |||
def exhaust_lexer(self): | |||
"""Try to feed the rest of the lexer state into the interactive parser. | |||
Note that this modifies the instance in place and does not feed an '$END' Token""" | |||
for token in self.lexer_state.lex(self.parser_state): | |||
self.parser_state.feed_token(token) | |||
def feed_eof(self, last_token=None): | |||
"""Feed a '$END' Token. Borrows from 'last_token' if given.""" | |||
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1) | |||
return self.feed_token(eof) | |||
def __copy__(self): | |||
"""Create a new interactive parser with a separate state. | |||
Calls to feed_token() won't affect the old instance, and vice-versa. | |||
""" | |||
return type(self)( | |||
self.parser, | |||
copy(self.parser_state), | |||
copy(self.lexer_state), | |||
) | |||
def copy(self): | |||
return copy(self) | |||
def __eq__(self, other): | |||
if not isinstance(other, InteractiveParser): | |||
return False | |||
return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state | |||
def as_immutable(self): | |||
"""Convert to an ``ImmutableInteractiveParser``.""" | |||
p = copy(self) | |||
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state) | |||
def pretty(self): | |||
"""Print the output of ``choices()`` in a way that's easier to read.""" | |||
out = ["Parser choices:"] | |||
for k, v in self.choices().items(): | |||
out.append('\t- %s -> %s' % (k, v)) | |||
out.append('stack size: %s' % len(self.parser_state.state_stack)) | |||
return '\n'.join(out) | |||
def choices(self): | |||
"""Returns a dictionary of token types, matched to their action in the parser. | |||
Only returns token types that are accepted by the current state. | |||
Updated by ``feed_token()``. | |||
""" | |||
return self.parser_state.parse_conf.parse_table.states[self.parser_state.position] | |||
def accepts(self): | |||
"""Returns the set of possible tokens that will advance the parser into a new valid state.""" | |||
accepts = set() | |||
for t in self.choices(): | |||
if t.isupper(): # is terminal? | |||
new_cursor = copy(self) | |||
try: | |||
new_cursor.feed_token(Token(t, '')) | |||
except UnexpectedToken: | |||
pass | |||
else: | |||
accepts.add(t) | |||
return accepts | |||
def resume_parse(self): | |||
"""Resume automated parsing from the current state.""" | |||
return self.parser.parse_from_state(self.parser_state) | |||
class ImmutableInteractiveParser(InteractiveParser): | |||
"""Same as ``InteractiveParser``, but operations create a new instance instead | |||
of changing it in-place. | |||
""" | |||
result = None | |||
def __hash__(self): | |||
return hash((self.parser_state, self.lexer_state)) | |||
def feed_token(self, token): | |||
c = copy(self) | |||
c.result = InteractiveParser.feed_token(c, token) | |||
return c | |||
def exhaust_lexer(self): | |||
"""Try to feed the rest of the lexer state into the parser. | |||
Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token""" | |||
cursor = self.as_mutable() | |||
cursor.exhaust_lexer() | |||
return cursor.as_immutable() | |||
def as_mutable(self): | |||
"""Convert to an ``InteractiveParser``.""" | |||
p = copy(self) | |||
return InteractiveParser(p.parser, p.parser_state, p.lexer_state) | |||
# Deprecated class names for the interactive parser | |||
ParserPuppet = InteractiveParser | |||
ImmutableParserPuppet = ImmutableInteractiveParser | |||
# Deprecated | |||
from .lalr_interactive_parser import ParserPuppet, ImmutableParserPuppet |
@@ -2395,8 +2395,8 @@ def _make_parser_test(LEXER, PARSER): | |||
""", regex=True) | |||
self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') | |||
@unittest.skipIf(PARSER!='lalr', "Puppet is only implemented for LALR at the moment") | |||
def test_parser_puppet(self): | |||
@unittest.skipIf(PARSER!='lalr', "interactive_parser is only implemented for LALR at the moment") | |||
def test_parser_interactive_parser(self): | |||
g = _Lark(r''' | |||
start: A+ B* | |||
@@ -2432,8 +2432,8 @@ def _make_parser_test(LEXER, PARSER): | |||
res = ip_copy.feed_eof() | |||
self.assertEqual(res, Tree('start', ['a', 'b', 'b'])) | |||
@unittest.skipIf(PARSER!='lalr', "Puppet error handling only works with LALR for now") | |||
def test_error_with_puppet(self): | |||
@unittest.skipIf(PARSER!='lalr', "interactive_parser error handling only works with LALR for now") | |||
def test_error_with_interactive_parser(self): | |||
def ignore_errors(e): | |||
if isinstance(e, UnexpectedCharacters): | |||
# Skip bad character | |||
@@ -2445,8 +2445,8 @@ def _make_parser_test(LEXER, PARSER): | |||
return True | |||
elif e.token.type == 'SIGNED_NUMBER': | |||
# Try to feed a comma and retry the number | |||
e.puppet.feed_token(Token('COMMA', ',')) | |||
e.puppet.feed_token(e.token) | |||
e.interactive_parser.feed_token(Token('COMMA', ',')) | |||
e.interactive_parser.feed_token(e.token) | |||
return True | |||