@@ -69,8 +69,8 @@ UnexpectedInput | |||||
InteractiveParser | InteractiveParser | ||||
----------------- | ----------------- | ||||
.. autoclass:: lark.parsers.lalr_puppet.InteractiveParser | |||||
.. autoclass:: lark.parsers.lalr_interactive_parser.InteractiveParser | |||||
:members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts | :members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts | ||||
.. autoclass:: lark.parsers.lalr_puppet.ImmutableInteractiveParser | |||||
.. autoclass:: lark.parsers.lalr_interactive_parser.ImmutableInteractiveParser | |||||
:members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts | :members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts |
@@ -8,7 +8,7 @@ | |||||
- EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md)) | - EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md)) | ||||
- Builds a parse-tree (AST) automagically based on the grammar | - Builds a parse-tree (AST) automagically based on the grammar | ||||
- Stand-alone parser generator - create a small independent parser to embed in your project. | - Stand-alone parser generator - create a small independent parser to embed in your project. | ||||
- Flexible error handling by using a "puppet parser" mechanism (LALR only) | |||||
- Flexible error handling by using an interactive parser interface (LALR only) | |||||
- Automatic line & column tracking (for both tokens and matched rules) | - Automatic line & column tracking (for both tokens and matched rules) | ||||
- Automatic terminal collision resolution | - Automatic terminal collision resolution | ||||
- Standard library of terminals (strings, numbers, names, etc.) | - Standard library of terminals (strings, numbers, names, etc.) | ||||
@@ -1,11 +1,11 @@ | |||||
""" | """ | ||||
Error handling with a puppet | |||||
================================== | |||||
Error handling using an interactive parser | |||||
========================================== | |||||
This example demonstrates error handling using a parsing puppet in LALR | |||||
This example demonstrates error handling using an interactive parser in LALR | |||||
When the parser encounters an UnexpectedToken exception, it creates a | When the parser encounters an UnexpectedToken exception, it creates a | ||||
parsing puppet with the current parse-state, and lets you control how | |||||
an interactive parser with the current parse-state, and lets you control how | |||||
to proceed step-by-step. When you've achieved the correct parse-state, | to proceed step-by-step. When you've achieved the correct parse-state, | ||||
you can resume the run by returning True. | you can resume the run by returning True. | ||||
""" | """ | ||||
@@ -20,8 +20,8 @@ def ignore_errors(e): | |||||
return True | return True | ||||
elif e.token.type == 'SIGNED_NUMBER': | elif e.token.type == 'SIGNED_NUMBER': | ||||
# Try to feed a comma and retry the number | # Try to feed a comma and retry the number | ||||
e.puppet.feed_token(Token('COMMA', ',')) | |||||
e.puppet.feed_token(e.token) | |||||
e.interactive_parser.feed_token(Token('COMMA', ',')) | |||||
e.interactive_parser.feed_token(e.token) | |||||
return True | return True | ||||
# Unhandled error. Will stop parse and raise exception | # Unhandled error. Will stop parse and raise exception |
@@ -3,7 +3,7 @@ | |||||
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set | from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set | ||||
from .tree import Tree | from .tree import Tree | ||||
from .lexer import Token | from .lexer import Token | ||||
from .parsers.lalr_puppet import ParserPuppet | |||||
from .parsers.lalr_interactive_parser import InteractiveParser | |||||
class LarkError(Exception): | class LarkError(Exception): | ||||
pass | pass | ||||
@@ -52,7 +52,7 @@ class UnexpectedInput(LarkError): | |||||
class UnexpectedToken(ParseError, UnexpectedInput): | class UnexpectedToken(ParseError, UnexpectedInput): | ||||
expected: Set[str] | expected: Set[str] | ||||
considered_rules: Set[str] | considered_rules: Set[str] | ||||
puppet: ParserPuppet | |||||
interactive_parser: InteractiveParser | |||||
accepts: Set[str] | accepts: Set[str] | ||||
class UnexpectedCharacters(LexError, UnexpectedInput): | class UnexpectedCharacters(LexError, UnexpectedInput): | ||||
@@ -5,7 +5,7 @@ from typing import ( | |||||
Literal, Protocol, Tuple, Iterable, | Literal, Protocol, Tuple, Iterable, | ||||
) | ) | ||||
from .parsers.lalr_puppet import ParserPuppet | |||||
from .parsers.lalr_interactive_parser import InteractiveParser | |||||
from .visitors import Transformer | from .visitors import Transformer | ||||
from .lexer import Token, Lexer, TerminalDef | from .lexer import Token, Lexer, TerminalDef | ||||
from .tree import Tree | from .tree import Tree | ||||
@@ -91,7 +91,7 @@ class Lark: | |||||
def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: | def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: | ||||
... | ... | ||||
def parse_interactive(self, text: str = None, start: Optional[str] = None) -> ParserPuppet: | |||||
def parse_interactive(self, text: str = None, start: Optional[str] = None) -> InteractiveParser: | |||||
... | ... | ||||
@classmethod | @classmethod | ||||
@@ -1,43 +0,0 @@ | |||||
from typing import Set, Dict, Any | |||||
from lark import Token, Tree | |||||
class ParserPuppet(object): | |||||
""" | |||||
Provides an interface to interactively step through the parser (LALR(1) only for now) | |||||
Accessible via `UnexpectedToken.puppet` (raised by the parser on token error) | |||||
""" | |||||
parser: Any | |||||
parser_state: Any | |||||
lexer_state: Any | |||||
def feed_token(self, token: Token) -> Any: ... | |||||
def exhaust_lexer(self) -> None: ... | |||||
def feed_eof(self, last_token: Token = None) -> Any: ... | |||||
def copy(self) -> ParserPuppet: ... | |||||
def as_immutable(self) -> ImmutableParserPuppet: ... | |||||
def pretty(self) -> str: ... | |||||
def choices(self) -> Dict[str, Any]: ... | |||||
def accepts(self) -> Set[str]: ... | |||||
def resume_parse(self) -> Tree: ... | |||||
class ImmutableParserPuppet(ParserPuppet): | |||||
result: Any = None | |||||
def feed_token(self, token: Token) -> ImmutableParserPuppet: ... | |||||
def exhaust_lexer(self) -> ImmutableParserPuppet: ... | |||||
def feed_eof(self, last_token: Token = None) -> ImmutableParserPuppet: ... |
@@ -542,7 +542,7 @@ class Lark(Serialize): | |||||
text (str): Text to be parsed. | text (str): Text to be parsed. | ||||
start (str, optional): Required if Lark was given multiple possible start symbols (using the start option). | start (str, optional): Required if Lark was given multiple possible start symbols (using the start option). | ||||
on_error (function, optional): if provided, will be called on UnexpectedToken error. Return true to resume parsing. | on_error (function, optional): if provided, will be called on UnexpectedToken error. Return true to resume parsing. | ||||
LALR only. See examples/advanced/error_puppet.py for an example of how to use on_error. | |||||
LALR only. See examples/advanced/error_handling.py for an example of how to use on_error. | |||||
Returns: | Returns: | ||||
If a transformer is supplied to ``__init__``, returns whatever is the | If a transformer is supplied to ``__init__``, returns whatever is the | ||||
@@ -0,0 +1,132 @@ | |||||
# This module provides a LALR interactive parser, which is used for debugging and error handling | |||||
from copy import copy | |||||
from .. import Token | |||||
from ..exceptions import UnexpectedToken | |||||
class InteractiveParser(object): | |||||
"""InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR. | |||||
For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``. | |||||
""" | |||||
def __init__(self, parser, parser_state, lexer_state): | |||||
self.parser = parser | |||||
self.parser_state = parser_state | |||||
self.lexer_state = lexer_state | |||||
def feed_token(self, token): | |||||
"""Feed the parser with a token, and advance it to the next state, as if it received it from the lexer. | |||||
Note that ``token`` has to be an instance of ``Token``. | |||||
""" | |||||
return self.parser_state.feed_token(token, token.type == '$END') | |||||
def exhaust_lexer(self): | |||||
"""Try to feed the rest of the lexer state into the interactive parser. | |||||
Note that this modifies the instance in place and does not feed an '$END' Token""" | |||||
for token in self.lexer_state.lex(self.parser_state): | |||||
self.parser_state.feed_token(token) | |||||
def feed_eof(self, last_token=None): | |||||
"""Feed a '$END' Token. Borrows from 'last_token' if given.""" | |||||
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1) | |||||
return self.feed_token(eof) | |||||
def __copy__(self): | |||||
"""Create a new interactive parser with a separate state. | |||||
Calls to feed_token() won't affect the old instance, and vice-versa. | |||||
""" | |||||
return type(self)( | |||||
self.parser, | |||||
copy(self.parser_state), | |||||
copy(self.lexer_state), | |||||
) | |||||
def copy(self): | |||||
return copy(self) | |||||
def __eq__(self, other): | |||||
if not isinstance(other, InteractiveParser): | |||||
return False | |||||
return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state | |||||
def as_immutable(self): | |||||
"""Convert to an ``ImmutableInteractiveParser``.""" | |||||
p = copy(self) | |||||
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state) | |||||
def pretty(self): | |||||
"""Print the output of ``choices()`` in a way that's easier to read.""" | |||||
out = ["Parser choices:"] | |||||
for k, v in self.choices().items(): | |||||
out.append('\t- %s -> %s' % (k, v)) | |||||
out.append('stack size: %s' % len(self.parser_state.state_stack)) | |||||
return '\n'.join(out) | |||||
def choices(self): | |||||
"""Returns a dictionary of token types, matched to their action in the parser. | |||||
Only returns token types that are accepted by the current state. | |||||
Updated by ``feed_token()``. | |||||
""" | |||||
return self.parser_state.parse_conf.parse_table.states[self.parser_state.position] | |||||
def accepts(self): | |||||
"""Returns the set of possible tokens that will advance the parser into a new valid state.""" | |||||
accepts = set() | |||||
for t in self.choices(): | |||||
if t.isupper(): # is terminal? | |||||
new_cursor = copy(self) | |||||
try: | |||||
new_cursor.feed_token(Token(t, '')) | |||||
except UnexpectedToken: | |||||
pass | |||||
else: | |||||
accepts.add(t) | |||||
return accepts | |||||
def resume_parse(self): | |||||
"""Resume automated parsing from the current state.""" | |||||
return self.parser.parse_from_state(self.parser_state) | |||||
class ImmutableInteractiveParser(InteractiveParser): | |||||
"""Same as ``InteractiveParser``, but operations create a new instance instead | |||||
of changing it in-place. | |||||
""" | |||||
result = None | |||||
def __hash__(self): | |||||
return hash((self.parser_state, self.lexer_state)) | |||||
def feed_token(self, token): | |||||
c = copy(self) | |||||
c.result = InteractiveParser.feed_token(c, token) | |||||
return c | |||||
def exhaust_lexer(self): | |||||
"""Try to feed the rest of the lexer state into the parser. | |||||
Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token""" | |||||
cursor = self.as_mutable() | |||||
cursor.exhaust_lexer() | |||||
return cursor.as_immutable() | |||||
def as_mutable(self): | |||||
"""Convert to an ``InteractiveParser``.""" | |||||
p = copy(self) | |||||
return InteractiveParser(p.parser, p.parser_state, p.lexer_state) | |||||
# Deprecated class names for the interactive parser | |||||
ParserPuppet = InteractiveParser | |||||
ImmutableParserPuppet = ImmutableInteractiveParser |
@@ -8,7 +8,7 @@ from ..lexer import Token | |||||
from ..utils import Serialize | from ..utils import Serialize | ||||
from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | ||||
from .lalr_puppet import InteractiveParser | |||||
from .lalr_interactive_parser import InteractiveParser | |||||
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken | from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken | ||||
###{standalone | ###{standalone | ||||
@@ -1,132 +1,3 @@ | |||||
# This module provides a LALR interactive parser, which is used for debugging and error handling | |||||
from copy import copy | |||||
from .. import Token | |||||
from ..exceptions import UnexpectedToken | |||||
class InteractiveParser(object): | |||||
"""InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR. | |||||
For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``. | |||||
""" | |||||
def __init__(self, parser, parser_state, lexer_state): | |||||
self.parser = parser | |||||
self.parser_state = parser_state | |||||
self.lexer_state = lexer_state | |||||
def feed_token(self, token): | |||||
"""Feed the parser with a token, and advance it to the next state, as if it received it from the lexer. | |||||
Note that ``token`` has to be an instance of ``Token``. | |||||
""" | |||||
return self.parser_state.feed_token(token, token.type == '$END') | |||||
def exhaust_lexer(self): | |||||
"""Try to feed the rest of the lexer state into the interactive parser. | |||||
Note that this modifies the instance in place and does not feed an '$END' Token""" | |||||
for token in self.lexer_state.lex(self.parser_state): | |||||
self.parser_state.feed_token(token) | |||||
def feed_eof(self, last_token=None): | |||||
"""Feed a '$END' Token. Borrows from 'last_token' if given.""" | |||||
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1) | |||||
return self.feed_token(eof) | |||||
def __copy__(self): | |||||
"""Create a new interactive parser with a separate state. | |||||
Calls to feed_token() won't affect the old instance, and vice-versa. | |||||
""" | |||||
return type(self)( | |||||
self.parser, | |||||
copy(self.parser_state), | |||||
copy(self.lexer_state), | |||||
) | |||||
def copy(self): | |||||
return copy(self) | |||||
def __eq__(self, other): | |||||
if not isinstance(other, InteractiveParser): | |||||
return False | |||||
return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state | |||||
def as_immutable(self): | |||||
"""Convert to an ``ImmutableInteractiveParser``.""" | |||||
p = copy(self) | |||||
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state) | |||||
def pretty(self): | |||||
"""Print the output of ``choices()`` in a way that's easier to read.""" | |||||
out = ["Parser choices:"] | |||||
for k, v in self.choices().items(): | |||||
out.append('\t- %s -> %s' % (k, v)) | |||||
out.append('stack size: %s' % len(self.parser_state.state_stack)) | |||||
return '\n'.join(out) | |||||
def choices(self): | |||||
"""Returns a dictionary of token types, matched to their action in the parser. | |||||
Only returns token types that are accepted by the current state. | |||||
Updated by ``feed_token()``. | |||||
""" | |||||
return self.parser_state.parse_conf.parse_table.states[self.parser_state.position] | |||||
def accepts(self): | |||||
"""Returns the set of possible tokens that will advance the parser into a new valid state.""" | |||||
accepts = set() | |||||
for t in self.choices(): | |||||
if t.isupper(): # is terminal? | |||||
new_cursor = copy(self) | |||||
try: | |||||
new_cursor.feed_token(Token(t, '')) | |||||
except UnexpectedToken: | |||||
pass | |||||
else: | |||||
accepts.add(t) | |||||
return accepts | |||||
def resume_parse(self): | |||||
"""Resume automated parsing from the current state.""" | |||||
return self.parser.parse_from_state(self.parser_state) | |||||
class ImmutableInteractiveParser(InteractiveParser): | |||||
"""Same as ``InteractiveParser``, but operations create a new instance instead | |||||
of changing it in-place. | |||||
""" | |||||
result = None | |||||
def __hash__(self): | |||||
return hash((self.parser_state, self.lexer_state)) | |||||
def feed_token(self, token): | |||||
c = copy(self) | |||||
c.result = InteractiveParser.feed_token(c, token) | |||||
return c | |||||
def exhaust_lexer(self): | |||||
"""Try to feed the rest of the lexer state into the parser. | |||||
Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token""" | |||||
cursor = self.as_mutable() | |||||
cursor.exhaust_lexer() | |||||
return cursor.as_immutable() | |||||
def as_mutable(self): | |||||
"""Convert to an ``InteractiveParser``.""" | |||||
p = copy(self) | |||||
return InteractiveParser(p.parser, p.parser_state, p.lexer_state) | |||||
# Deprecated class names for the interactive parser | |||||
ParserPuppet = InteractiveParser | |||||
ImmutableParserPuppet = ImmutableInteractiveParser | |||||
# Deprecated | |||||
from .lalr_interactive_parser import ParserPuppet, ImmutableParserPuppet |
@@ -2395,8 +2395,8 @@ def _make_parser_test(LEXER, PARSER): | |||||
""", regex=True) | """, regex=True) | ||||
self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') | self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') | ||||
@unittest.skipIf(PARSER!='lalr', "Puppet is only implemented for LALR at the moment") | |||||
def test_parser_puppet(self): | |||||
@unittest.skipIf(PARSER!='lalr', "interactive_parser is only implemented for LALR at the moment") | |||||
def test_parser_interactive_parser(self): | |||||
g = _Lark(r''' | g = _Lark(r''' | ||||
start: A+ B* | start: A+ B* | ||||
@@ -2432,8 +2432,8 @@ def _make_parser_test(LEXER, PARSER): | |||||
res = ip_copy.feed_eof() | res = ip_copy.feed_eof() | ||||
self.assertEqual(res, Tree('start', ['a', 'b', 'b'])) | self.assertEqual(res, Tree('start', ['a', 'b', 'b'])) | ||||
@unittest.skipIf(PARSER!='lalr', "Puppet error handling only works with LALR for now") | |||||
def test_error_with_puppet(self): | |||||
@unittest.skipIf(PARSER!='lalr', "interactive_parser error handling only works with LALR for now") | |||||
def test_error_with_interactive_parser(self): | |||||
def ignore_errors(e): | def ignore_errors(e): | ||||
if isinstance(e, UnexpectedCharacters): | if isinstance(e, UnexpectedCharacters): | ||||
# Skip bad character | # Skip bad character | ||||
@@ -2445,8 +2445,8 @@ def _make_parser_test(LEXER, PARSER): | |||||
return True | return True | ||||
elif e.token.type == 'SIGNED_NUMBER': | elif e.token.type == 'SIGNED_NUMBER': | ||||
# Try to feed a comma and retry the number | # Try to feed a comma and retry the number | ||||
e.puppet.feed_token(Token('COMMA', ',')) | |||||
e.puppet.feed_token(e.token) | |||||
e.interactive_parser.feed_token(Token('COMMA', ',')) | |||||
e.interactive_parser.feed_token(e.token) | |||||
return True | return True | ||||