ソースを参照

Rename ParserPuppet -> InteractiveParser

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.3
Erez Sh 3年前
コミット
2a1b03bc65
9個のファイルの変更103行の追加80行の削除
  1. +6
    -5
      docs/classes.rst
  2. +1
    -1
      lark-stubs/lark.pyi
  3. +17
    -7
      lark/exceptions.py
  4. +2
    -2
      lark/lark.py
  5. +6
    -6
      lark/load_grammar.py
  6. +3
    -3
      lark/parser_frontends.py
  7. +13
    -15
      lark/parsers/lalr_parser.py
  8. +37
    -23
      lark/parsers/lalr_puppet.py
  9. +18
    -18
      tests/test_parser.py

+ 6
- 5
docs/classes.rst ファイルの表示

@@ -66,10 +66,11 @@ UnexpectedInput

.. autoclass:: lark.exceptions.UnexpectedCharacters

.. _parserpuppet:
InteractiveParser
-----------------

ParserPuppet
------------
.. autoclass:: lark.parsers.lalr_puppet.InteractiveParser
:members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts

.. autoclass:: lark.parsers.lalr_puppet.ParserPuppet
:members: choices, feed_token, copy, pretty, resume_parse
.. autoclass:: lark.parsers.lalr_puppet.ImmutableInteractiveParser
:members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts

+ 1
- 1
lark-stubs/lark.pyi ファイルの表示

@@ -91,7 +91,7 @@ class Lark:
def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree:
...

def get_puppet(self, text: str = None, start: Optional[str] = None) -> ParserPuppet:
def parse_interactive(self, text: str = None, start: Optional[str] = None) -> ParserPuppet:
...

@classmethod


+ 17
- 7
lark/exceptions.py ファイルの表示

@@ -1,3 +1,5 @@
from warnings import warn

from .utils import STRING_TYPE, logger, NO_VALUE


@@ -177,14 +179,16 @@ class UnexpectedCharacters(LexError, UnexpectedInput):


class UnexpectedToken(ParseError, UnexpectedInput):
"""When the parser throws UnexpectedToken, it instantiates a puppet
with its internal state. Users can then interactively set the puppet to
the desired puppet state, and resume regular parsing.
"""An exception that is raised by the parser, when the token it received
doesn't match any valid step forward.

The parser provides an interactive instance through `interactive_parser`,
which is initialized to the point of failture, and can be used for debugging and error handling.

see: :ref:`ParserPuppet`.
see: :ref:`InteractiveParser`.
"""

def __init__(self, token, expected, considered_rules=None, state=None, puppet=None, terminals_by_name=None, token_history=None):
def __init__(self, token, expected, considered_rules=None, state=None, interactive_parser=None, terminals_by_name=None, token_history=None):
# TODO considered_rules and expected can be figured out using state
self.line = getattr(token, 'line', '?')
self.column = getattr(token, 'column', '?')
@@ -195,7 +199,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):
self.expected = expected # XXX deprecate? `accepts` is better
self._accepts = NO_VALUE
self.considered_rules = considered_rules
self.puppet = puppet
self.interactive_parser = interactive_parser
self._terminals_by_name = terminals_by_name
self.token_history = token_history

@@ -204,7 +208,7 @@ class UnexpectedToken(ParseError, UnexpectedInput):
@property
def accepts(self):
if self._accepts is NO_VALUE:
self._accepts = self.puppet and self.puppet.accepts()
self._accepts = self.interactive_parser and self.interactive_parser.accepts()
return self._accepts

def __str__(self):
@@ -215,6 +219,12 @@ class UnexpectedToken(ParseError, UnexpectedInput):

return message

@property
def puppet(self):
warn("UnexpectedToken.puppet attribute has been renamed to interactive_parser", DeprecationWarning)
return self.interactive_parser


class VisitError(LarkError):
"""VisitError is raised when visitors are interrupted by an exception


+ 2
- 2
lark/lark.py ファイルの表示

@@ -532,8 +532,8 @@ class Lark(Serialize):
"Get information about a terminal"
return self._terminals_dict[name]
def get_puppet(self, text=None, start=None):
return self.parser.get_puppet(text, start=start)
def parse_interactive(self, text=None, start=None):
return self.parser.parse_interactive(text, start=start)

def parse(self, text, start=None, on_error=None):
"""Parse the given text, according to the options provided.


+ 6
- 6
lark/load_grammar.py ファイルの表示

@@ -866,7 +866,7 @@ def _error_repr(error):
else:
return str(error)

def _search_puppet(puppet, predicate):
def _search_interactive_parser(interactive_parser, predicate):
def expand(node):
path, p = node
for choice in p.choices():
@@ -878,7 +878,7 @@ def _search_puppet(puppet, predicate):
else:
yield path + (choice,), new_p

for path, p in bfs_all_unique([((), puppet)], expand):
for path, p in bfs_all_unique([((), interactive_parser)], expand):
if predicate(p):
return path, p

@@ -888,10 +888,10 @@ def find_grammar_errors(text, start='start'):
errors.append((e, _error_repr(e)))

# recover to a new line
token_path, _ = _search_puppet(e.puppet.as_immutable(), lambda p: '_NL' in p.choices())
token_path, _ = _search_interactive_parser(e.interactive_parser.as_immutable(), lambda p: '_NL' in p.choices())
for token_type in token_path:
e.puppet.feed_token(Token(token_type, ''))
e.puppet.feed_token(Token('_NL', '\n'))
e.interactive_parser.feed_token(Token(token_type, ''))
e.interactive_parser.feed_token(Token('_NL', '\n'))
return True

_tree = _get_parser().parse(text + '\n', start, on_error=on_error)
@@ -900,7 +900,7 @@ def find_grammar_errors(text, start='start'):
errors = [el[0] for el in errors_by_line.values()] # already sorted

for e in errors:
e[0].puppet = None
e[0].interactive_parser = None
return errors




+ 3
- 3
lark/parser_frontends.py ファイルの表示

@@ -106,12 +106,12 @@ class ParsingFrontend(Serialize):
kw = {} if on_error is None else {'on_error': on_error}
return self.parser.parse(stream, start, **kw)
def get_puppet(self, text=None, start=None):
def parse_interactive(self, text=None, start=None):
start = self._verify_start(start)
if self.parser_conf.parser_type != 'lalr':
raise ConfigurationError("Can only create a Puppet for parser='lalr' at the moment.")
raise ConfigurationError("parse_interactive() currently only works with parser='lalr' ")
stream = text if self.skip_lexer else LexerThread(self.lexer, text)
return self.parser.get_puppet(stream, start)
return self.parser.parse_interactive(stream, start)


def get_frontend(parser, lexer):


+ 13
- 15
lark/parsers/lalr_parser.py ファイルの表示

@@ -8,7 +8,7 @@ from ..lexer import Token
from ..utils import Serialize

from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
from .lalr_puppet import ParserPuppet
from .lalr_puppet import InteractiveParser
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken

###{standalone
@@ -33,9 +33,8 @@ class LALR_Parser(Serialize):
def serialize(self, memo):
return self._parse_table.serialize(memo)
def get_puppet(self, lexer, start):
return self.parser.get_puppet(lexer, start)
def parse_interactive(self, lexer, start):
return self.parser.parse(lexer, start, start_interactive=True)

def parse(self, lexer, start, on_error=None):
try:
@@ -46,7 +45,7 @@ class LALR_Parser(Serialize):

while True:
if isinstance(e, UnexpectedCharacters):
s = e.puppet.lexer_state.state
s = e.interactive_parser.lexer_state.state
p = s.line_ctr.char_pos

if not on_error(e):
@@ -58,9 +57,11 @@ class LALR_Parser(Serialize):
s.line_ctr.feed(s.text[p:p+1])

try:
return e.puppet.resume_parse()
return e.interactive_parser.resume_parse()
except UnexpectedToken as e2:
if isinstance(e, UnexpectedToken) and e.token.type == e2.token.type == '$END' and e.puppet == e2.puppet:
if (isinstance(e, UnexpectedToken)
and e.token.type == e2.token.type == '$END'
and e.interactive_parser == e2.interactive_parser):
# Prevent infinite loop
raise e2
e = e2
@@ -125,7 +126,7 @@ class ParserState(object):
action, arg = states[state][token.type]
except KeyError:
expected = {s for s in states[state].keys() if s.isupper()}
raise UnexpectedToken(token, expected, state=self, puppet=None)
raise UnexpectedToken(token, expected, state=self, interactive_parser=None)

assert arg != end_state

@@ -162,14 +163,11 @@ class _Parser(object):
self.callbacks = callbacks
self.debug = debug

def get_puppet(self, lexer, start, value_stack=None, state_stack=None):
parse_conf = ParseConf(self.parse_table, self.callbacks, start)
parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
return ParserPuppet(self, parser_state, parser_state.lexer)
def parse(self, lexer, start, value_stack=None, state_stack=None):
def parse(self, lexer, start, value_stack=None, state_stack=None, start_interactive=False):
parse_conf = ParseConf(self.parse_table, self.callbacks, start)
parser_state = ParserState(parse_conf, lexer, state_stack, value_stack)
if start_interactive:
return InteractiveParser(self, parser_state, parser_state.lexer)
return self.parse_from_state(parser_state)

@@ -184,7 +182,7 @@ class _Parser(object):
return state.feed_token(token, True)
except UnexpectedInput as e:
try:
e.puppet = ParserPuppet(self, state, state.lexer)
e.interactive_parser = InteractiveParser(self, state, state.lexer)
except NameError:
pass
raise e


+ 37
- 23
lark/parsers/lalr_puppet.py ファイルの表示

@@ -1,16 +1,15 @@
# This module provide a LALR puppet, which is used to debugging and error handling
# This module provides a LALR interactive parser, which is used for debugging and error handling

from copy import copy

from .lalr_analysis import Shift, Reduce
from .. import Token
from ..exceptions import UnexpectedToken


class ParserPuppet(object):
"""ParserPuppet gives you advanced control over error handling when parsing with LALR.
class InteractiveParser(object):
"""InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR.

For a simpler, more streamlined interface, see the ``on_error`` argument to ``Lark.parse()``.
For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``.
"""
def __init__(self, parser, parser_state, lexer_state):
self.parser = parser
@@ -25,9 +24,9 @@ class ParserPuppet(object):
return self.parser_state.feed_token(token, token.type == '$END')
def exhaust_lexer(self):
"""Try to feed the rest of the lexer state into the puppet.
"""Try to feed the rest of the lexer state into the interactive parser.
Note that this modifies the puppet in place and does not feed an '$END' Token"""
Note that this modifies the instance in place and does not feed an '$END' Token"""
for token in self.lexer_state.lex(self.parser_state):
self.parser_state.feed_token(token)
@@ -38,9 +37,9 @@ class ParserPuppet(object):


def __copy__(self):
"""Create a new puppet with a separate state.
"""Create a new interactive parser with a separate state.

Calls to feed_token() won't affect the old puppet, and vice-versa.
Calls to feed_token() won't affect the old instance, and vice-versa.
"""
return type(self)(
self.parser,
@@ -52,18 +51,19 @@ class ParserPuppet(object):
return copy(self)

def __eq__(self, other):
if not isinstance(other, ParserPuppet):
if not isinstance(other, InteractiveParser):
return False

return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state

def as_immutable(self):
"""Convert to an ``ImmutableInteractiveParser``."""
p = copy(self)
return ImmutableParserPuppet(p.parser, p.parser_state, p.lexer_state)
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state)

def pretty(self):
"""Print the output of ``choices()`` in a way that's easier to read."""
out = ["Puppet choices:"]
out = ["Parser choices:"]
for k, v in self.choices().items():
out.append('\t- %s -> %s' % (k, v))
out.append('stack size: %s' % len(self.parser_state.state_stack))
@@ -79,12 +79,13 @@ class ParserPuppet(object):
return self.parser_state.parse_conf.parse_table.states[self.parser_state.position]

def accepts(self):
"""Returns the set of possible tokens that will advance the parser into a new valid state."""
accepts = set()
for t in self.choices():
if t.isupper(): # is terminal?
new_puppet = copy(self)
new_cursor = copy(self)
try:
new_puppet.feed_token(Token(t, ''))
new_cursor.feed_token(Token(t, ''))
except UnexpectedToken:
pass
else:
@@ -92,12 +93,16 @@ class ParserPuppet(object):
return accepts

def resume_parse(self):
"""Resume parsing from the current puppet state."""
"""Resume automated parsing from the current state."""
return self.parser.parse_from_state(self.parser_state)



class ImmutableParserPuppet(ParserPuppet):
class ImmutableInteractiveParser(InteractiveParser):
"""Same as ``InteractiveParser``, but operations create a new instance instead
of changing it in-place.
"""

result = None

def __hash__(self):
@@ -105,14 +110,23 @@ class ImmutableParserPuppet(ParserPuppet):

def feed_token(self, token):
c = copy(self)
c.result = ParserPuppet.feed_token(c, token)
c.result = InteractiveParser.feed_token(c, token)
return c

def exhaust_lexer(self):
"""Try to feed the rest of the lexer state into the puppet.
"""Try to feed the rest of the lexer state into the parser.

Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token"""
cursor = self.as_mutable()
cursor.exhaust_lexer()
return cursor.as_immutable()

def as_mutable(self):
"""Convert to an ``InteractiveParser``."""
p = copy(self)
return InteractiveParser(p.parser, p.parser_state, p.lexer_state)


Note that this returns a new ImmutableParserPuppet and does not feed an '$END' Token"""
res = copy(self)
for token in res.lexer_state.lex(res.parser_state):
res = res.parser_state.feed_token(token)
return res
# Deprecated class names for the interactive parser
ParserPuppet = InteractiveParser
ImmutableParserPuppet = ImmutableInteractiveParser

+ 18
- 18
tests/test_parser.py ファイルの表示

@@ -2404,32 +2404,32 @@ def _make_parser_test(LEXER, PARSER):
B: "b"
''')
puppet = g.get_puppet()
ip = g.parse_interactive()

self.assertRaises(UnexpectedToken, puppet.feed_eof)
self.assertRaises(TypeError, puppet.exhaust_lexer)
puppet.feed_token(Token('A', 'a'))
res = puppet.feed_eof()
self.assertRaises(UnexpectedToken, ip.feed_eof)
self.assertRaises(TypeError, ip.exhaust_lexer)
ip.feed_token(Token('A', 'a'))
res = ip.feed_eof()
self.assertEqual(res, Tree('start', ['a']))

puppet = g.get_puppet("ab")
ip = g.parse_interactive("ab")

puppet.exhaust_lexer()
ip.exhaust_lexer()

puppet_copy = puppet.copy()
self.assertEqual(puppet_copy.parser_state, puppet.parser_state)
self.assertEqual(puppet_copy.lexer_state.state, puppet.lexer_state.state)
self.assertIsNot(puppet_copy.parser_state, puppet.parser_state)
self.assertIsNot(puppet_copy.lexer_state.state, puppet.lexer_state.state)
self.assertIsNot(puppet_copy.lexer_state.state.line_ctr, puppet.lexer_state.state.line_ctr)
ip_copy = ip.copy()
self.assertEqual(ip_copy.parser_state, ip.parser_state)
self.assertEqual(ip_copy.lexer_state.state, ip.lexer_state.state)
self.assertIsNot(ip_copy.parser_state, ip.parser_state)
self.assertIsNot(ip_copy.lexer_state.state, ip.lexer_state.state)
self.assertIsNot(ip_copy.lexer_state.state.line_ctr, ip.lexer_state.state.line_ctr)

res = puppet.feed_eof(puppet.lexer_state.state.last_token)
res = ip.feed_eof(ip.lexer_state.state.last_token)
self.assertEqual(res, Tree('start', ['a', 'b']))
self.assertRaises(UnexpectedToken ,puppet.feed_eof)
self.assertRaises(UnexpectedToken ,ip.feed_eof)
self.assertRaises(UnexpectedToken, puppet_copy.feed_token, Token('A', 'a'))
puppet_copy.feed_token(Token('B', 'b'))
res = puppet_copy.feed_eof()
self.assertRaises(UnexpectedToken, ip_copy.feed_token, Token('A', 'a'))
ip_copy.feed_token(Token('B', 'b'))
res = ip_copy.feed_eof()
self.assertEqual(res, Tree('start', ['a', 'b', 'b']))

@unittest.skipIf(PARSER!='lalr', "Puppet error handling only works with LALR for now")


読み込み中…
キャンセル
保存