@@ -66,10 +66,11 @@ UnexpectedInput | |||||
.. autoclass:: lark.exceptions.UnexpectedCharacters | .. autoclass:: lark.exceptions.UnexpectedCharacters | ||||
.. _parserpuppet: | |||||
InteractiveParser | |||||
----------------- | |||||
ParserPuppet | |||||
------------ | |||||
.. autoclass:: lark.parsers.lalr_puppet.InteractiveParser | |||||
:members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts | |||||
.. autoclass:: lark.parsers.lalr_puppet.ParserPuppet | |||||
:members: choices, feed_token, copy, pretty, resume_parse | |||||
.. autoclass:: lark.parsers.lalr_puppet.ImmutableInteractiveParser | |||||
:members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts |
@@ -91,7 +91,7 @@ class Lark: | |||||
def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: | def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: | ||||
... | ... | ||||
def get_puppet(self, text: str = None, start: Optional[str] = None) -> ParserPuppet: | |||||
def parse_interactive(self, text: str = None, start: Optional[str] = None) -> ParserPuppet: | |||||
... | ... | ||||
@classmethod | @classmethod | ||||
@@ -1,3 +1,5 @@ | |||||
from warnings import warn | |||||
from .utils import STRING_TYPE, logger, NO_VALUE | from .utils import STRING_TYPE, logger, NO_VALUE | ||||
@@ -177,14 +179,16 @@ class UnexpectedCharacters(LexError, UnexpectedInput): | |||||
class UnexpectedToken(ParseError, UnexpectedInput): | class UnexpectedToken(ParseError, UnexpectedInput): | ||||
"""When the parser throws UnexpectedToken, it instantiates a puppet | |||||
with its internal state. Users can then interactively set the puppet to | |||||
the desired puppet state, and resume regular parsing. | |||||
"""An exception that is raised by the parser, when the token it received | |||||
doesn't match any valid step forward. | |||||
The parser provides an interactive instance through `interactive_parser`, | |||||
which is initialized to the point of failture, and can be used for debugging and error handling. | |||||
see: :ref:`ParserPuppet`. | |||||
see: :ref:`InteractiveParser`. | |||||
""" | """ | ||||
def __init__(self, token, expected, considered_rules=None, state=None, puppet=None, terminals_by_name=None, token_history=None): | |||||
def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None): | |||||
# TODO considered_rules and expected can be figured out using state | # TODO considered_rules and expected can be figured out using state | ||||
self.line = getattr(token, 'line', '?') | self.line = getattr(token, 'line', '?') | ||||
self.column = getattr(token, 'column', '?') | self.column = getattr(token, 'column', '?') | ||||
@@ -195,7 +199,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||||
self.expected = expected # XXX deprecate? `accepts` is better | self.expected = expected # XXX deprecate? `accepts` is better | ||||
self._accepts = NO_VALUE | self._accepts = NO_VALUE | ||||
self.considered_rules = considered_rules | self.considered_rules = considered_rules | ||||
self.puppet = puppet | |||||
self.interactive_parser = interactive_parser | |||||
self._terminals_by_name = terminals_by_name | self._terminals_by_name = terminals_by_name | ||||
self.token_history = token_history | self.token_history = token_history | ||||
@@ -204,7 +208,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||||
@property | @property | ||||
def accepts(self): | def accepts(self): | ||||
if self._accepts is NO_VALUE: | if self._accepts is NO_VALUE: | ||||
self._accepts = self.puppet and self.puppet.accepts() | |||||
self._accepts = self.interactive_parser and self.interactive_parser.accepts() | |||||
return self._accepts | return self._accepts | ||||
def __str__(self): | def __str__(self): | ||||
@@ -215,6 +219,12 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||||
return message | return message | ||||
@property | |||||
def puppet(self): | |||||
warn("UnexpectedToken.puppet attribute has been renamed to interactive_parser", DeprecationWarning) | |||||
return self.interactive_parser | |||||
class VisitError(LarkError): | class VisitError(LarkError): | ||||
"""VisitError is raised when visitors are interrupted by an exception | """VisitError is raised when visitors are interrupted by an exception | ||||
@@ -532,8 +532,8 @@ class Lark(Serialize): | |||||
"Get information about a terminal" | "Get information about a terminal" | ||||
return self._terminals_dict[name] | return self._terminals_dict[name] | ||||
def get_puppet(self, text=None, start=None): | |||||
return self.parser.get_puppet(text, start=start) | |||||
def parse_interactive(self, text=None, start=None): | |||||
return self.parser.parse_interactive(text, start=start) | |||||
def parse(self, text, start=None, on_error=None): | def parse(self, text, start=None, on_error=None): | ||||
"""Parse the given text, according to the options provided. | """Parse the given text, according to the options provided. | ||||
@@ -866,7 +866,7 @@ def _error_repr(error): | |||||
else: | else: | ||||
return str(error) | return str(error) | ||||
def _search_puppet(puppet, predicate): | |||||
def _search_interactive_parser(interactive_parser, predicate): | |||||
def expand(node): | def expand(node): | ||||
path, p = node | path, p = node | ||||
for choice in p.choices(): | for choice in p.choices(): | ||||
@@ -878,7 +878,7 @@ def _search_puppet(puppet, predicate): | |||||
else: | else: | ||||
yield path + (choice,), new_p | yield path + (choice,), new_p | ||||
for path, p in bfs_all_unique([((), puppet)], expand): | |||||
for path, p in bfs_all_unique([((), interactive_parser)], expand): | |||||
if predicate(p): | if predicate(p): | ||||
return path, p | return path, p | ||||
@@ -888,10 +888,10 @@ def find_grammar_errors(text, start='start'): | |||||
errors.append((e, _error_repr(e))) | errors.append((e, _error_repr(e))) | ||||
# recover to a new line | # recover to a new line | ||||
token_path, _ = _search_puppet(e.puppet.as_immutable(), lambda p: '_NL' in p.choices()) | |||||
token_path, _ = _search_interactive_parser(e.interactive_parser.as_immutable(), lambda p: '_NL' in p.choices()) | |||||
for token_type in token_path: | for token_type in token_path: | ||||
e.puppet.feed_token(Token(token_type, '')) | |||||
e.puppet.feed_token(Token('_NL', '\n')) | |||||
e.interactive_parser.feed_token(Token(token_type, '')) | |||||
e.interactive_parser.feed_token(Token('_NL', '\n')) | |||||
return True | return True | ||||
_tree = _get_parser().parse(text + '\n', start, on_error=on_error) | _tree = _get_parser().parse(text + '\n', start, on_error=on_error) | ||||
@@ -900,7 +900,7 @@ def find_grammar_errors(text, start='start'): | |||||
errors = [el[0] for el in errors_by_line.values()] # already sorted | errors = [el[0] for el in errors_by_line.values()] # already sorted | ||||
for e in errors: | for e in errors: | ||||
e[0].puppet = None | |||||
e[0].interactive_parser = None | |||||
return errors | return errors | ||||
@@ -106,12 +106,12 @@ class ParsingFrontend(Serialize): | |||||
kw = {} if on_error is None else {'on_error': on_error} | kw = {} if on_error is None else {'on_error': on_error} | ||||
return self.parser.parse(stream, start, **kw) | return self.parser.parse(stream, start, **kw) | ||||
def get_puppet(self, text=None, start=None): | |||||
def parse_interactive(self, text=None, start=None): | |||||
start = self._verify_start(start) | start = self._verify_start(start) | ||||
if self.parser_conf.parser_type != 'lalr': | if self.parser_conf.parser_type != 'lalr': | ||||
raise ConfigurationError("Can only create a Puppet for parser='lalr' at the moment.") | |||||
raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ") | |||||
stream = text if self.skip_lexer else LexerThread(self.lexer, text) | stream = text if self.skip_lexer else LexerThread(self.lexer, text) | ||||
return self.parser.get_puppet(stream, start) | |||||
return self.parser.parse_interactive(stream, start) | |||||
def get_frontend(parser, lexer): | def get_frontend(parser, lexer): | ||||
@@ -8,7 +8,7 @@ from ..lexer import Token | |||||
from ..utils import Serialize | from ..utils import Serialize | ||||
from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | ||||
from .lalr_puppet import ParserPuppet | |||||
from .lalr_puppet import InteractiveParser | |||||
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken | from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken | ||||
###{standalone | ###{standalone | ||||
@@ -33,9 +33,8 @@ class LALR_Parser(Serialize): | |||||
def serialize(self, memo): | def serialize(self, memo): | ||||
return self._parse_table.serialize(memo) | return self._parse_table.serialize(memo) | ||||
def get_puppet(self, lexer, start): | |||||
return self.parser.get_puppet(lexer, start) | |||||
def parse_interactive(self, lexer, start): | |||||
return self.parser.parse(lexer, start, start_interactive=True) | |||||
def parse(self, lexer, start, on_error=None): | def parse(self, lexer, start, on_error=None): | ||||
try: | try: | ||||
@@ -46,7 +45,7 @@ class LALR_Parser(Serialize): | |||||
while True: | while True: | ||||
if isinstance(e, UnexpectedCharacters): | if isinstance(e, UnexpectedCharacters): | ||||
s = e.puppet.lexer_state.state | |||||
s = e.interactive_parser.lexer_state.state | |||||
p = s.line_ctr.char_pos | p = s.line_ctr.char_pos | ||||
if not on_error(e): | if not on_error(e): | ||||
@@ -58,9 +57,11 @@ class LALR_Parser(Serialize): | |||||
s.line_ctr.feed(s.text[p:p+1]) | s.line_ctr.feed(s.text[p:p+1]) | ||||
try: | try: | ||||
return e.puppet.resume_parse() | |||||
return e.interactive_parser.resume_parse() | |||||
except UnexpectedToken as e2: | except UnexpectedToken as e2: | ||||
if isinstance(e, UnexpectedToken) and e.token.type == e2.token.type == '$END' and e.puppet == e2.puppet: | |||||
if (isinstance(e, UnexpectedToken) | |||||
and e.token.type == e2.token.type == '$END' | |||||
and e.interactive_parser == e2.interactive_parser): | |||||
# Prevent infinite loop | # Prevent infinite loop | ||||
raise e2 | raise e2 | ||||
e = e2 | e = e2 | ||||
@@ -125,7 +126,7 @@ class ParserState(object): | |||||
action, arg = states[state][token.type] | action, arg = states[state][token.type] | ||||
except KeyError: | except KeyError: | ||||
expected = {s for s in states[state].keys() if s.isupper()} | expected = {s for s in states[state].keys() if s.isupper()} | ||||
raise UnexpectedToken(token, expected, state=self, puppet=None) | |||||
raise UnexpectedToken(token, expected, state=self, interactive_parser=None) | |||||
assert arg != end_state | assert arg != end_state | ||||
@@ -162,14 +163,11 @@ class _Parser(object): | |||||
self.callbacks = callbacks | self.callbacks = callbacks | ||||
self.debug = debug | self.debug = debug | ||||
def get_puppet(self, lexer, start, value_stack=None, state_stack=None): | |||||
parse_conf = ParseConf(self.parse_table, self.callbacks, start) | |||||
parser_state = ParserState(parse_conf, lexer, state_stack, value_stack) | |||||
return ParserPuppet(self, parser_state, parser_state.lexer) | |||||
def parse(self, lexer, start, value_stack=None, state_stack=None): | |||||
def parse(self, lexer, start, value_stack=None, state_stack=None, start_interactive=False): | |||||
parse_conf = ParseConf(self.parse_table, self.callbacks, start) | parse_conf = ParseConf(self.parse_table, self.callbacks, start) | ||||
parser_state = ParserState(parse_conf, lexer, state_stack, value_stack) | parser_state = ParserState(parse_conf, lexer, state_stack, value_stack) | ||||
if start_interactive: | |||||
return InteractiveParser(self, parser_state, parser_state.lexer) | |||||
return self.parse_from_state(parser_state) | return self.parse_from_state(parser_state) | ||||
@@ -184,7 +182,7 @@ class _Parser(object): | |||||
return state.feed_token(token, True) | return state.feed_token(token, True) | ||||
except UnexpectedInput as e: | except UnexpectedInput as e: | ||||
try: | try: | ||||
e.puppet = ParserPuppet(self, state, state.lexer) | |||||
e.interactive_parser = InteractiveParser(self, state, state.lexer) | |||||
except NameError: | except NameError: | ||||
pass | pass | ||||
raise e | raise e | ||||
@@ -1,16 +1,15 @@ | |||||
# This module provide a LALR puppet, which is used to debugging and error handling | |||||
# This module provides a LALR interactive parser, which is used for debugging and error handling | |||||
from copy import copy | from copy import copy | ||||
from .lalr_analysis import Shift, Reduce | |||||
from .. import Token | from .. import Token | ||||
from ..exceptions import UnexpectedToken | from ..exceptions import UnexpectedToken | ||||
class ParserPuppet(object): | |||||
"""ParserPuppet gives you advanced control over error handling when parsing with LALR. | |||||
class InteractiveParser(object): | |||||
"""InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR. | |||||
For a simpler, more streamlined interface, see the ``on_error`` argument to ``Lark.parse()``. | |||||
For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``. | |||||
""" | """ | ||||
def __init__(self, parser, parser_state, lexer_state): | def __init__(self, parser, parser_state, lexer_state): | ||||
self.parser = parser | self.parser = parser | ||||
@@ -25,9 +24,9 @@ class ParserPuppet(object): | |||||
return self.parser_state.feed_token(token, token.type == '$END') | return self.parser_state.feed_token(token, token.type == '$END') | ||||
def exhaust_lexer(self): | def exhaust_lexer(self): | ||||
"""Try to feed the rest of the lexer state into the puppet. | |||||
"""Try to feed the rest of the lexer state into the interactive parser. | |||||
Note that this modifies the puppet in place and does not feed an '$END' Token""" | |||||
Note that this modifies the instance in place and does not feed an '$END' Token""" | |||||
for token in self.lexer_state.lex(self.parser_state): | for token in self.lexer_state.lex(self.parser_state): | ||||
self.parser_state.feed_token(token) | self.parser_state.feed_token(token) | ||||
@@ -38,9 +37,9 @@ class ParserPuppet(object): | |||||
def __copy__(self): | def __copy__(self): | ||||
"""Create a new puppet with a separate state. | |||||
"""Create a new interactive parser with a separate state. | |||||
Calls to feed_token() won't affect the old puppet, and vice-versa. | |||||
Calls to feed_token() won't affect the old instance, and vice-versa. | |||||
""" | """ | ||||
return type(self)( | return type(self)( | ||||
self.parser, | self.parser, | ||||
@@ -52,18 +51,19 @@ class ParserPuppet(object): | |||||
return copy(self) | return copy(self) | ||||
def __eq__(self, other): | def __eq__(self, other): | ||||
if not isinstance(other, ParserPuppet): | |||||
if not isinstance(other, InteractiveParser): | |||||
return False | return False | ||||
return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state | return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state | ||||
def as_immutable(self): | def as_immutable(self): | ||||
"""Convert to an ``ImmutableInteractiveParser``.""" | |||||
p = copy(self) | p = copy(self) | ||||
return ImmutableParserPuppet(p.parser, p.parser_state, p.lexer_state) | |||||
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state) | |||||
def pretty(self): | def pretty(self): | ||||
"""Print the output of ``choices()`` in a way that's easier to read.""" | """Print the output of ``choices()`` in a way that's easier to read.""" | ||||
out = ["Puppet choices:"] | |||||
out = ["Parser choices:"] | |||||
for k, v in self.choices().items(): | for k, v in self.choices().items(): | ||||
out.append('\t- %s -> %s' % (k, v)) | out.append('\t- %s -> %s' % (k, v)) | ||||
out.append('stack size: %s' % len(self.parser_state.state_stack)) | out.append('stack size: %s' % len(self.parser_state.state_stack)) | ||||
@@ -79,12 +79,13 @@ class ParserPuppet(object): | |||||
return self.parser_state.parse_conf.parse_table.states[self.parser_state.position] | return self.parser_state.parse_conf.parse_table.states[self.parser_state.position] | ||||
def accepts(self): | def accepts(self): | ||||
"""Returns the set of possible tokens that will advance the parser into a new valid state.""" | |||||
accepts = set() | accepts = set() | ||||
for t in self.choices(): | for t in self.choices(): | ||||
if t.isupper(): # is terminal? | if t.isupper(): # is terminal? | ||||
new_puppet = copy(self) | |||||
new_cursor = copy(self) | |||||
try: | try: | ||||
new_puppet.feed_token(Token(t, '')) | |||||
new_cursor.feed_token(Token(t, '')) | |||||
except UnexpectedToken: | except UnexpectedToken: | ||||
pass | pass | ||||
else: | else: | ||||
@@ -92,12 +93,16 @@ class ParserPuppet(object): | |||||
return accepts | return accepts | ||||
def resume_parse(self): | def resume_parse(self): | ||||
"""Resume parsing from the current puppet state.""" | |||||
"""Resume automated parsing from the current state.""" | |||||
return self.parser.parse_from_state(self.parser_state) | return self.parser.parse_from_state(self.parser_state) | ||||
class ImmutableParserPuppet(ParserPuppet): | |||||
class ImmutableInteractiveParser(InteractiveParser): | |||||
"""Same as ``InteractiveParser``, but operations create a new instance instead | |||||
of changing it in-place. | |||||
""" | |||||
result = None | result = None | ||||
def __hash__(self): | def __hash__(self): | ||||
@@ -105,14 +110,23 @@ class ImmutableParserPuppet(ParserPuppet): | |||||
def feed_token(self, token): | def feed_token(self, token): | ||||
c = copy(self) | c = copy(self) | ||||
c.result = ParserPuppet.feed_token(c, token) | |||||
c.result = InteractiveParser.feed_token(c, token) | |||||
return c | return c | ||||
def exhaust_lexer(self): | def exhaust_lexer(self): | ||||
"""Try to feed the rest of the lexer state into the puppet. | |||||
"""Try to feed the rest of the lexer state into the parser. | |||||
Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token""" | |||||
cursor = self.as_mutable() | |||||
cursor.exhaust_lexer() | |||||
return cursor.as_immutable() | |||||
def as_mutable(self): | |||||
"""Convert to an ``InteractiveParser``.""" | |||||
p = copy(self) | |||||
return InteractiveParser(p.parser, p.parser_state, p.lexer_state) | |||||
Note that this returns a new ImmutableParserPuppet and does not feed an '$END' Token""" | |||||
res = copy(self) | |||||
for token in res.lexer_state.lex(res.parser_state): | |||||
res = res.parser_state.feed_token(token) | |||||
return res | |||||
# Deprecated class names for the interactive parser | |||||
ParserPuppet = InteractiveParser | |||||
ImmutableParserPuppet = ImmutableInteractiveParser |
@@ -2404,32 +2404,32 @@ def _make_parser_test(LEXER, PARSER): | |||||
B: "b" | B: "b" | ||||
''') | ''') | ||||
puppet = g.get_puppet() | |||||
ip = g.parse_interactive() | |||||
self.assertRaises(UnexpectedToken, puppet.feed_eof) | |||||
self.assertRaises(TypeError, puppet.exhaust_lexer) | |||||
puppet.feed_token(Token('A', 'a')) | |||||
res = puppet.feed_eof() | |||||
self.assertRaises(UnexpectedToken, ip.feed_eof) | |||||
self.assertRaises(TypeError, ip.exhaust_lexer) | |||||
ip.feed_token(Token('A', 'a')) | |||||
res = ip.feed_eof() | |||||
self.assertEqual(res, Tree('start', ['a'])) | self.assertEqual(res, Tree('start', ['a'])) | ||||
puppet = g.get_puppet("ab") | |||||
ip = g.parse_interactive("ab") | |||||
puppet.exhaust_lexer() | |||||
ip.exhaust_lexer() | |||||
puppet_copy = puppet.copy() | |||||
self.assertEqual(puppet_copy.parser_state, puppet.parser_state) | |||||
self.assertEqual(puppet_copy.lexer_state.state, puppet.lexer_state.state) | |||||
self.assertIsNot(puppet_copy.parser_state, puppet.parser_state) | |||||
self.assertIsNot(puppet_copy.lexer_state.state, puppet.lexer_state.state) | |||||
self.assertIsNot(puppet_copy.lexer_state.state.line_ctr, puppet.lexer_state.state.line_ctr) | |||||
ip_copy = ip.copy() | |||||
self.assertEqual(ip_copy.parser_state, ip.parser_state) | |||||
self.assertEqual(ip_copy.lexer_state.state, ip.lexer_state.state) | |||||
self.assertIsNot(ip_copy.parser_state, ip.parser_state) | |||||
self.assertIsNot(ip_copy.lexer_state.state, ip.lexer_state.state) | |||||
self.assertIsNot(ip_copy.lexer_state.state.line_ctr, ip.lexer_state.state.line_ctr) | |||||
res = puppet.feed_eof(puppet.lexer_state.state.last_token) | |||||
res = ip.feed_eof(ip.lexer_state.state.last_token) | |||||
self.assertEqual(res, Tree('start', ['a', 'b'])) | self.assertEqual(res, Tree('start', ['a', 'b'])) | ||||
self.assertRaises(UnexpectedToken ,puppet.feed_eof) | |||||
self.assertRaises(UnexpectedToken ,ip.feed_eof) | |||||
self.assertRaises(UnexpectedToken, puppet_copy.feed_token, Token('A', 'a')) | |||||
puppet_copy.feed_token(Token('B', 'b')) | |||||
res = puppet_copy.feed_eof() | |||||
self.assertRaises(UnexpectedToken, ip_copy.feed_token, Token('A', 'a')) | |||||
ip_copy.feed_token(Token('B', 'b')) | |||||
res = ip_copy.feed_eof() | |||||
self.assertEqual(res, Tree('start', ['a', 'b', 'b'])) | self.assertEqual(res, Tree('start', ['a', 'b', 'b'])) | ||||
@unittest.skipIf(PARSER!='lalr', "Puppet error handling only works with LALR for now") | @unittest.skipIf(PARSER!='lalr', "Puppet error handling only works with LALR for now") | ||||