Browse Source

Rename ParserPuppet -> InteractiveParser

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.3
Erez Sh 3 years ago
parent
commit
2a1b03bc65
9 changed files with 103 additions and 80 deletions
  1. +6
    -5
      docs/classes.rst
  2. +1
    -1
      lark-stubs/lark.pyi
  3. +17
    -7
      lark/exceptions.py
  4. +2
    -2
      lark/lark.py
  5. +6
    -6
      lark/load_grammar.py
  6. +3
    -3
      lark/parser_frontends.py
  7. +13
    -15
      lark/parsers/lalr_parser.py
  8. +37
    -23
      lark/parsers/lalr_puppet.py
  9. +18
    -18
      tests/test_parser.py

+ 6
- 5
docs/classes.rst View File

@@ -66,10 +66,11 @@ UnexpectedInput


.. autoclass:: lark.exceptions.UnexpectedCharacters .. autoclass:: lark.exceptions.UnexpectedCharacters


.. _parserpuppet:
InteractiveParser
-----------------


ParserPuppet
------------
.. autoclass:: lark.parsers.lalr_puppet.InteractiveParser
:members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts


.. autoclass:: lark.parsers.lalr_puppet.ParserPuppet
:members: choices, feed_token, copy, pretty, resume_parse
.. autoclass:: lark.parsers.lalr_puppet.ImmutableInteractiveParser
:members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts

+ 1
- 1
lark-stubs/lark.pyi View File

@@ -91,7 +91,7 @@ class Lark:
def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree:
... ...


def get_puppet(self, text: str = None, start: Optional[str] = None) -> ParserPuppet:
def parse_interactive(self, text: str = None, start: Optional[str] = None) -> ParserPuppet:
... ...


@classmethod @classmethod


+ 17
- 7
lark/exceptions.py View File

@@ -1,3 +1,5 @@
from warnings import warn

from .utils import STRING_TYPE, logger, NO_VALUE from .utils import STRING_TYPE, logger, NO_VALUE




@@ -177,14 +179,16 @@ class UnexpectedCharacters(LexError, UnexpectedInput):




class UnexpectedToken(ParseError, UnexpectedInput): class UnexpectedToken(ParseError, UnexpectedInput):
"""When the parser throws UnexpectedToken, it instantiates a puppet
with its internal state. Users can then interactively set the puppet to
the desired puppet state, and resume regular parsing.
"""An exception that is raised by the parser, when the token it received
doesn't match any valid step forward.

The parser provides an interactive instance through `interactive_parser`,
which is initialized to the point of failture, and can be used for debugging and error handling.


see: :ref:`ParserPuppet`.
see: :ref:`InteractiveParser`.
""" """


def __init__(self, token, expected, considered_rules=None, state=None, puppet=None, terminals_by_name=None, token_history=None):
def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None):
# TODO considered_rules and expected can be figured out using state # TODO considered_rules and expected can be figured out using state
self.line = getattr(token, 'line', '?') self.line = getattr(token, 'line', '?')
self.column = getattr(token, 'column', '?') self.column = getattr(token, 'column', '?')
@@ -195,7 +199,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):
self.expected = expected # XXX deprecate? `accepts` is better self.expected = expected # XXX deprecate? `accepts` is better
self._accepts = NO_VALUE self._accepts = NO_VALUE
self.considered_rules = considered_rules self.considered_rules = considered_rules
self.puppet = puppet
self.interactive_parser = interactive_parser
self._terminals_by_name = terminals_by_name self._terminals_by_name = terminals_by_name
self.token_history = token_history self.token_history = token_history


@@ -204,7 +208,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):
@property @property
def accepts(self): def accepts(self):
if self._accepts is NO_VALUE: if self._accepts is NO_VALUE:
self._accepts = self.puppet and self.puppet.accepts()
self._accepts = self.interactive_parser and self.interactive_parser.accepts()
return self._accepts return self._accepts


def __str__(self): def __str__(self):
@@ -215,6 +219,12 @@ class UnexpectedToken(ParseError, UnexpectedInput):


return message return message


@property
def puppet(self):
warn("UnexpectedToken.puppet attribute has been renamed to interactive_parser", DeprecationWarning)
return self.interactive_parser



class VisitError(LarkError): class VisitError(LarkError):
"""VisitError is raised when visitors are interrupted by an exception """VisitError is raised when visitors are interrupted by an exception


+ 2
- 2
lark/lark.py View File

@@ -532,8 +532,8 @@ class Lark(Serialize):
"Get information about a terminal" "Get information about a terminal"
return self._terminals_dict[name] return self._terminals_dict[name]
def get_puppet(self, text=None, start=None):
return self.parser.get_puppet(text, start=start)
def parse_interactive(self, text=None, start=None):
return self.parser.parse_interactive(text, start=start)


def parse(self, text, start=None, on_error=None): def parse(self, text, start=None, on_error=None):
"""Parse the given text, according to the options provided. """Parse the given text, according to the options provided.


+ 6
- 6
lark/load_grammar.py View File

@@ -866,7 +866,7 @@ def _error_repr(error):
else: else:
return str(error) return str(error)


def _search_puppet(puppet, predicate):
def _search_interactive_parser(interactive_parser, predicate):
def expand(node): def expand(node):
path, p = node path, p = node
for choice in p.choices(): for choice in p.choices():
@@ -878,7 +878,7 @@ def _search_puppet(puppet, predicate):
else: else:
yield path + (choice,), new_p yield path + (choice,), new_p


for path, p in bfs_all_unique([((), puppet)], expand):
for path, p in bfs_all_unique([((), interactive_parser)], expand):
if predicate(p): if predicate(p):
return path, p return path, p


@@ -888,10 +888,10 @@ def find_grammar_errors(text, start='start'):
errors.append((e, _error_repr(e))) errors.append((e, _error_repr(e)))


# recover to a new line # recover to a new line
token_path, _ = _search_puppet(e.puppet.as_immutable(), lambda p: '_NL' in p.choices())
token_path, _ = _search_interactive_parser(e.interactive_parser.as_immutable(), lambda p: '_NL' in p.choices())
for token_type in token_path: for token_type in token_path:
e.puppet.feed_token(Token(token_type, ''))
e.puppet.feed_token(Token('_NL', '\n'))
e.interactive_parser.feed_token(Token(token_type, ''))
e.interactive_parser.feed_token(Token('_NL', '\n'))
return True return True


_tree = _get_parser().parse(text + '\n', start, on_error=on_error) _tree = _get_parser().parse(text + '\n', start, on_error=on_error)
@@ -900,7 +900,7 @@ def find_grammar_errors(text, start='start'):
errors = [el[0] for el in errors_by_line.values()] # already sorted errors = [el[0] for el in errors_by_line.values()] # already sorted


for e in errors: for e in errors:
e[0].puppet = None
e[0].interactive_parser = None
return errors return errors






+ 3
- 3
lark/parser_frontends.py View File

@@ -106,12 +106,12 @@ class ParsingFrontend(Serialize):
kw = {} if on_error is None else {'on_error': on_error} kw = {} if on_error is None else {'on_error': on_error}
return self.parser.parse(stream, start, **kw) return self.parser.parse(stream, start, **kw)
def get_puppet(self, text=None, start=None):
def parse_interactive(self, text=None, start=None):
start = self._verify_start(start) start = self._verify_start(start)
if self.parser_conf.parser_type != 'lalr': if self.parser_conf.parser_type != 'lalr':
raise ConfigurationError("Can only create a Puppet for parser='lalr' at the moment.")
raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ")
stream = text if self.skip_lexer else LexerThread(self.lexer, text) stream = text if self.skip_lexer else LexerThread(self.lexer, text)
return self.parser.get_puppet(stream, start)
return self.parser.parse_interactive(stream, start)




def get_frontend(parser, lexer): def get_frontend(parser, lexer):


+ 13
- 15
lark/parsers/lalr_parser.py View File

@@ -8,7 +8,7 @@ from ..lexer import Token
from ..utils import Serialize from ..utils import Serialize


from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
from .lalr_puppet import ParserPuppet
from .lalr_puppet import InteractiveParser
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken


###{standalone ###{standalone
@@ -33,9 +33,8 @@ class LALR_Parser(Serialize):
def serialize(self, memo): def serialize(self, memo):
return self._parse_table.serialize(memo) return self._parse_table.serialize(memo)
def get_puppet(self, lexer, start):
return self.parser.get_puppet(lexer, start)
def parse_interactive(self, lexer, start):
return self.parser.parse(lexer, start, start_interactive=True)


def parse(self, lexer, start, on_error=None): def parse(self, lexer, start, on_error=None):
try: try:
@@ -46,7 +45,7 @@ class LALR_Parser(Serialize):


while True: while True:
if isinstance(e, UnexpectedCharacters): if isinstance(e, UnexpectedCharacters):
s = e.puppet.lexer_state.state
s = e.interactive_parser.lexer_state.state
p = s.line_ctr.char_pos p = s.line_ctr.char_pos


if not on_error(e): if not on_error(e):
@@ -58,9 +57,11 @@ class LALR_Parser(Serialize):
s.line_ctr.feed(s.text[p:p+1]) s.line_ctr.feed(s.text[p:p+1])


try: try:
return e.puppet.resume_parse()
return e.interactive_parser.resume_parse()
except UnexpectedToken as e2: except UnexpectedToken as e2:
if isinstance(e, UnexpectedToken) and e.token.type == e2.token.type == '$END' and e.puppet == e2.puppet:
if (isinstance(e, UnexpectedToken)
and e.token.type == e2.token.type == '$END'
and e.interactive_parser == e2.interactive_parser):
# Prevent infinite loop # Prevent infinite loop
raise e2 raise e2
e = e2 e = e2
@@ -125,7 +126,7 @@ class ParserState(object):
action, arg = states[state][token.type] action, arg = states[state][token.type]
except KeyError: except KeyError:
expected = {s for s in states[state].keys() if s.isupper()} expected = {s for s in states[state].keys() if s.isupper()}
raise UnexpectedToken(token, expected, state=self, puppet=None)
raise UnexpectedToken(token, expected, state=self, interactive_parser=None)


assert arg != end_state assert arg != end_state


@@ -162,14 +163,11 @@ class _Parser(object):
self.callbacks = callbacks self.callbacks = callbacks
self.debug = debug self.debug = debug


def get_puppet(self, lexer, start, value_stack=None, state_stack=None):
parse_conf = ParseConf(self.parse_table, self.callbacks, start)
parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
return ParserPuppet(self, parser_state, parser_state.lexer)
def parse(self, lexer, start, value_stack=None, state_stack=None):
def parse(self, lexer, start, value_stack=None, state_stack=None, start_interactive=False):
parse_conf = ParseConf(self.parse_table, self.callbacks, start) parse_conf = ParseConf(self.parse_table, self.callbacks, start)
parser_state = ParserState(parse_conf, lexer, state_stack, value_stack) parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
if start_interactive:
return InteractiveParser(self, parser_state, parser_state.lexer)
return self.parse_from_state(parser_state) return self.parse_from_state(parser_state)


@@ -184,7 +182,7 @@ class _Parser(object):
return state.feed_token(token, True) return state.feed_token(token, True)
except UnexpectedInput as e: except UnexpectedInput as e:
try: try:
e.puppet = ParserPuppet(self, state, state.lexer)
e.interactive_parser = InteractiveParser(self, state, state.lexer)
except NameError: except NameError:
pass pass
raise e raise e


+ 37
- 23
lark/parsers/lalr_puppet.py View File

@@ -1,16 +1,15 @@
# This module provide a LALR puppet, which is used to debugging and error handling
# This module provides a LALR interactive parser, which is used for debugging and error handling


from copy import copy from copy import copy


from .lalr_analysis import Shift, Reduce
from .. import Token from .. import Token
from ..exceptions import UnexpectedToken from ..exceptions import UnexpectedToken




class ParserPuppet(object):
"""ParserPuppet gives you advanced control over error handling when parsing with LALR.
class InteractiveParser(object):
"""InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR.


For a simpler, more streamlined interface, see the ``on_error`` argument to ``Lark.parse()``.
For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``.
""" """
def __init__(self, parser, parser_state, lexer_state): def __init__(self, parser, parser_state, lexer_state):
self.parser = parser self.parser = parser
@@ -25,9 +24,9 @@ class ParserPuppet(object):
return self.parser_state.feed_token(token, token.type == '$END') return self.parser_state.feed_token(token, token.type == '$END')
def exhaust_lexer(self): def exhaust_lexer(self):
"""Try to feed the rest of the lexer state into the puppet.
"""Try to feed the rest of the lexer state into the interactive parser.
Note that this modifies the puppet in place and does not feed an '$END' Token"""
Note that this modifies the instance in place and does not feed an '$END' Token"""
for token in self.lexer_state.lex(self.parser_state): for token in self.lexer_state.lex(self.parser_state):
self.parser_state.feed_token(token) self.parser_state.feed_token(token)
@@ -38,9 +37,9 @@ class ParserPuppet(object):




def __copy__(self): def __copy__(self):
"""Create a new puppet with a separate state.
"""Create a new interactive parser with a separate state.


Calls to feed_token() won't affect the old puppet, and vice-versa.
Calls to feed_token() won't affect the old instance, and vice-versa.
""" """
return type(self)( return type(self)(
self.parser, self.parser,
@@ -52,18 +51,19 @@ class ParserPuppet(object):
return copy(self) return copy(self)


def __eq__(self, other): def __eq__(self, other):
if not isinstance(other, ParserPuppet):
if not isinstance(other, InteractiveParser):
return False return False


return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state


def as_immutable(self): def as_immutable(self):
"""Convert to an ``ImmutableInteractiveParser``."""
p = copy(self) p = copy(self)
return ImmutableParserPuppet(p.parser, p.parser_state, p.lexer_state)
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state)


def pretty(self): def pretty(self):
"""Print the output of ``choices()`` in a way that's easier to read.""" """Print the output of ``choices()`` in a way that's easier to read."""
out = ["Puppet choices:"]
out = ["Parser choices:"]
for k, v in self.choices().items(): for k, v in self.choices().items():
out.append('\t- %s -> %s' % (k, v)) out.append('\t- %s -> %s' % (k, v))
out.append('stack size: %s' % len(self.parser_state.state_stack)) out.append('stack size: %s' % len(self.parser_state.state_stack))
@@ -79,12 +79,13 @@ class ParserPuppet(object):
return self.parser_state.parse_conf.parse_table.states[self.parser_state.position] return self.parser_state.parse_conf.parse_table.states[self.parser_state.position]


def accepts(self): def accepts(self):
"""Returns the set of possible tokens that will advance the parser into a new valid state."""
accepts = set() accepts = set()
for t in self.choices(): for t in self.choices():
if t.isupper(): # is terminal? if t.isupper(): # is terminal?
new_puppet = copy(self)
new_cursor = copy(self)
try: try:
new_puppet.feed_token(Token(t, ''))
new_cursor.feed_token(Token(t, ''))
except UnexpectedToken: except UnexpectedToken:
pass pass
else: else:
@@ -92,12 +93,16 @@ class ParserPuppet(object):
return accepts return accepts


def resume_parse(self): def resume_parse(self):
"""Resume parsing from the current puppet state."""
"""Resume automated parsing from the current state."""
return self.parser.parse_from_state(self.parser_state) return self.parser.parse_from_state(self.parser_state)






class ImmutableParserPuppet(ParserPuppet):
class ImmutableInteractiveParser(InteractiveParser):
"""Same as ``InteractiveParser``, but operations create a new instance instead
of changing it in-place.
"""

result = None result = None


def __hash__(self): def __hash__(self):
@@ -105,14 +110,23 @@ class ImmutableParserPuppet(ParserPuppet):


def feed_token(self, token): def feed_token(self, token):
c = copy(self) c = copy(self)
c.result = ParserPuppet.feed_token(c, token)
c.result = InteractiveParser.feed_token(c, token)
return c return c


def exhaust_lexer(self): def exhaust_lexer(self):
"""Try to feed the rest of the lexer state into the puppet.
"""Try to feed the rest of the lexer state into the parser.

Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token"""
cursor = self.as_mutable()
cursor.exhaust_lexer()
return cursor.as_immutable()

def as_mutable(self):
"""Convert to an ``InteractiveParser``."""
p = copy(self)
return InteractiveParser(p.parser, p.parser_state, p.lexer_state)



Note that this returns a new ImmutableParserPuppet and does not feed an '$END' Token"""
res = copy(self)
for token in res.lexer_state.lex(res.parser_state):
res = res.parser_state.feed_token(token)
return res
# Deprecated class names for the interactive parser
ParserPuppet = InteractiveParser
ImmutableParserPuppet = ImmutableInteractiveParser

+ 18
- 18
tests/test_parser.py View File

@@ -2404,32 +2404,32 @@ def _make_parser_test(LEXER, PARSER):
B: "b" B: "b"
''') ''')
puppet = g.get_puppet()
ip = g.parse_interactive()


self.assertRaises(UnexpectedToken, puppet.feed_eof)
self.assertRaises(TypeError, puppet.exhaust_lexer)
puppet.feed_token(Token('A', 'a'))
res = puppet.feed_eof()
self.assertRaises(UnexpectedToken, ip.feed_eof)
self.assertRaises(TypeError, ip.exhaust_lexer)
ip.feed_token(Token('A', 'a'))
res = ip.feed_eof()
self.assertEqual(res, Tree('start', ['a'])) self.assertEqual(res, Tree('start', ['a']))


puppet = g.get_puppet("ab")
ip = g.parse_interactive("ab")


puppet.exhaust_lexer()
ip.exhaust_lexer()


puppet_copy = puppet.copy()
self.assertEqual(puppet_copy.parser_state, puppet.parser_state)
self.assertEqual(puppet_copy.lexer_state.state, puppet.lexer_state.state)
self.assertIsNot(puppet_copy.parser_state, puppet.parser_state)
self.assertIsNot(puppet_copy.lexer_state.state, puppet.lexer_state.state)
self.assertIsNot(puppet_copy.lexer_state.state.line_ctr, puppet.lexer_state.state.line_ctr)
ip_copy = ip.copy()
self.assertEqual(ip_copy.parser_state, ip.parser_state)
self.assertEqual(ip_copy.lexer_state.state, ip.lexer_state.state)
self.assertIsNot(ip_copy.parser_state, ip.parser_state)
self.assertIsNot(ip_copy.lexer_state.state, ip.lexer_state.state)
self.assertIsNot(ip_copy.lexer_state.state.line_ctr, ip.lexer_state.state.line_ctr)


res = puppet.feed_eof(puppet.lexer_state.state.last_token)
res = ip.feed_eof(ip.lexer_state.state.last_token)
self.assertEqual(res, Tree('start', ['a', 'b'])) self.assertEqual(res, Tree('start', ['a', 'b']))
self.assertRaises(UnexpectedToken ,puppet.feed_eof)
self.assertRaises(UnexpectedToken ,ip.feed_eof)
self.assertRaises(UnexpectedToken, puppet_copy.feed_token, Token('A', 'a'))
puppet_copy.feed_token(Token('B', 'b'))
res = puppet_copy.feed_eof()
self.assertRaises(UnexpectedToken, ip_copy.feed_token, Token('A', 'a'))
ip_copy.feed_token(Token('B', 'b'))
res = ip_copy.feed_eof()
self.assertEqual(res, Tree('start', ['a', 'b', 'b'])) self.assertEqual(res, Tree('start', ['a', 'b', 'b']))


@unittest.skipIf(PARSER!='lalr', "Puppet error handling only works with LALR for now") @unittest.skipIf(PARSER!='lalr', "Puppet error handling only works with LALR for now")


Loading…
Cancel
Save