Browse Source

Rename ParserPuppet -> InteractiveParser

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.3
Erez Sh 4 years ago
parent
commit
22c289126f
11 changed files with 156 additions and 196 deletions
  1. +2
    -2
      docs/classes.rst
  2. +1
    -1
      docs/features.md
  3. +6
    -6
      examples/advanced/error_handling.py
  4. +2
    -2
      lark-stubs/exceptions.pyi
  5. +2
    -2
      lark-stubs/lark.pyi
  6. +0
    -43
      lark-stubs/parsers/lalr_puppet.pyi
  7. +1
    -1
      lark/lark.py
  8. +132
    -0
      lark/parsers/lalr_interactive_parser.py
  9. +1
    -1
      lark/parsers/lalr_parser.py
  10. +3
    -132
      lark/parsers/lalr_puppet.py
  11. +6
    -6
      tests/test_parser.py

+ 2
- 2
docs/classes.rst View File

@@ -69,8 +69,8 @@ UnexpectedInput
InteractiveParser
-----------------

.. autoclass:: lark.parsers.lalr_puppet.InteractiveParser
.. autoclass:: lark.parsers.lalr_interactive_parser.InteractiveParser
:members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts

.. autoclass:: lark.parsers.lalr_puppet.ImmutableInteractiveParser
.. autoclass:: lark.parsers.lalr_interactive_parser.ImmutableInteractiveParser
:members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts

+ 1
- 1
docs/features.md View File

@@ -8,7 +8,7 @@
- EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md))
- Builds a parse-tree (AST) automagically based on the grammar
- Stand-alone parser generator - create a small independent parser to embed in your project.
- Flexible error handling by using a "puppet parser" mechanism (LALR only)
- Flexible error handling by using an interactive parser interface (LALR only)
- Automatic line & column tracking (for both tokens and matched rules)
- Automatic terminal collision resolution
- Standard library of terminals (strings, numbers, names, etc.)


examples/advanced/error_puppet.py → examples/advanced/error_handling.py View File

@@ -1,11 +1,11 @@
"""
Error handling with a puppet
==================================
Error handling using an interactive parser
==========================================

This example demonstrates error handling using a parsing puppet in LALR
This example demonstrates error handling using an interactive parser in LALR

When the parser encounters an UnexpectedToken exception, it creates a
parsing puppet with the current parse-state, and lets you control how
an interactive parser with the current parse-state, and lets you control how
to proceed step-by-step. When you've achieved the correct parse-state,
you can resume the run by returning True.
"""
@@ -20,8 +20,8 @@ def ignore_errors(e):
return True
elif e.token.type == 'SIGNED_NUMBER':
# Try to feed a comma and retry the number
e.puppet.feed_token(Token('COMMA', ','))
e.puppet.feed_token(e.token)
e.interactive_parser.feed_token(Token('COMMA', ','))
e.interactive_parser.feed_token(e.token)
return True

# Unhandled error. Will stop parse and raise exception

+ 2
- 2
lark-stubs/exceptions.pyi View File

@@ -3,7 +3,7 @@
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set
from .tree import Tree
from .lexer import Token
from .parsers.lalr_puppet import ParserPuppet
from .parsers.lalr_interactive_parser import InteractiveParser

class LarkError(Exception):
pass
@@ -52,7 +52,7 @@ class UnexpectedInput(LarkError):
class UnexpectedToken(ParseError, UnexpectedInput):
expected: Set[str]
considered_rules: Set[str]
puppet: ParserPuppet
interactive_parser: InteractiveParser
accepts: Set[str]

class UnexpectedCharacters(LexError, UnexpectedInput):


+ 2
- 2
lark-stubs/lark.pyi View File

@@ -5,7 +5,7 @@ from typing import (
Literal, Protocol, Tuple, Iterable,
)

from .parsers.lalr_puppet import ParserPuppet
from .parsers.lalr_interactive_parser import InteractiveParser
from .visitors import Transformer
from .lexer import Token, Lexer, TerminalDef
from .tree import Tree
@@ -91,7 +91,7 @@ class Lark:
def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree:
...

def parse_interactive(self, text: str = None, start: Optional[str] = None) -> ParserPuppet:
def parse_interactive(self, text: str = None, start: Optional[str] = None) -> InteractiveParser:
...

@classmethod


+ 0
- 43
lark-stubs/parsers/lalr_puppet.pyi View File

@@ -1,43 +0,0 @@
from typing import Set, Dict, Any

from lark import Token, Tree


class ParserPuppet(object):
"""
Provides an interface to interactively step through the parser (LALR(1) only for now)

Accessible via `UnexpectedToken.puppet` (raised by the parser on token error)
"""
parser: Any
parser_state: Any
lexer_state: Any

def feed_token(self, token: Token) -> Any: ...

def exhaust_lexer(self) -> None: ...

def feed_eof(self, last_token: Token = None) -> Any: ...

def copy(self) -> ParserPuppet: ...
def as_immutable(self) -> ImmutableParserPuppet: ...

def pretty(self) -> str: ...

def choices(self) -> Dict[str, Any]: ...

def accepts(self) -> Set[str]: ...

def resume_parse(self) -> Tree: ...


class ImmutableParserPuppet(ParserPuppet):
result: Any = None

def feed_token(self, token: Token) -> ImmutableParserPuppet: ...

def exhaust_lexer(self) -> ImmutableParserPuppet: ...

def feed_eof(self, last_token: Token = None) -> ImmutableParserPuppet: ...

+ 1
- 1
lark/lark.py View File

@@ -542,7 +542,7 @@ class Lark(Serialize):
text (str): Text to be parsed.
start (str, optional): Required if Lark was given multiple possible start symbols (using the start option).
on_error (function, optional): if provided, will be called on UnexpectedToken error. Return true to resume parsing.
LALR only. See examples/advanced/error_puppet.py for an example of how to use on_error.
LALR only. See examples/advanced/error_handling.py for an example of how to use on_error.

Returns:
If a transformer is supplied to ``__init__``, returns whatever is the


+ 132
- 0
lark/parsers/lalr_interactive_parser.py View File

@@ -0,0 +1,132 @@
# This module provides a LALR interactive parser, which is used for debugging and error handling

from copy import copy

from .. import Token
from ..exceptions import UnexpectedToken


class InteractiveParser(object):
"""InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR.

For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``.
"""
def __init__(self, parser, parser_state, lexer_state):
self.parser = parser
self.parser_state = parser_state
self.lexer_state = lexer_state

def feed_token(self, token):
"""Feed the parser with a token, and advance it to the next state, as if it received it from the lexer.

Note that ``token`` has to be an instance of ``Token``.
"""
return self.parser_state.feed_token(token, token.type == '$END')
def exhaust_lexer(self):
"""Try to feed the rest of the lexer state into the interactive parser.
Note that this modifies the instance in place and does not feed an '$END' Token"""
for token in self.lexer_state.lex(self.parser_state):
self.parser_state.feed_token(token)
def feed_eof(self, last_token=None):
"""Feed a '$END' Token. Borrows from 'last_token' if given."""
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1)
return self.feed_token(eof)


def __copy__(self):
"""Create a new interactive parser with a separate state.

Calls to feed_token() won't affect the old instance, and vice-versa.
"""
return type(self)(
self.parser,
copy(self.parser_state),
copy(self.lexer_state),
)

def copy(self):
return copy(self)

def __eq__(self, other):
if not isinstance(other, InteractiveParser):
return False

return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state

def as_immutable(self):
"""Convert to an ``ImmutableInteractiveParser``."""
p = copy(self)
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state)

def pretty(self):
"""Print the output of ``choices()`` in a way that's easier to read."""
out = ["Parser choices:"]
for k, v in self.choices().items():
out.append('\t- %s -> %s' % (k, v))
out.append('stack size: %s' % len(self.parser_state.state_stack))
return '\n'.join(out)

def choices(self):
"""Returns a dictionary of token types, matched to their action in the parser.

Only returns token types that are accepted by the current state.

Updated by ``feed_token()``.
"""
return self.parser_state.parse_conf.parse_table.states[self.parser_state.position]

def accepts(self):
"""Returns the set of possible tokens that will advance the parser into a new valid state."""
accepts = set()
for t in self.choices():
if t.isupper(): # is terminal?
new_cursor = copy(self)
try:
new_cursor.feed_token(Token(t, ''))
except UnexpectedToken:
pass
else:
accepts.add(t)
return accepts

def resume_parse(self):
"""Resume automated parsing from the current state."""
return self.parser.parse_from_state(self.parser_state)



class ImmutableInteractiveParser(InteractiveParser):
"""Same as ``InteractiveParser``, but operations create a new instance instead
of changing it in-place.
"""

result = None

def __hash__(self):
return hash((self.parser_state, self.lexer_state))

def feed_token(self, token):
c = copy(self)
c.result = InteractiveParser.feed_token(c, token)
return c

def exhaust_lexer(self):
"""Try to feed the rest of the lexer state into the parser.

Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token"""
cursor = self.as_mutable()
cursor.exhaust_lexer()
return cursor.as_immutable()

def as_mutable(self):
"""Convert to an ``InteractiveParser``."""
p = copy(self)
return InteractiveParser(p.parser, p.parser_state, p.lexer_state)


# Deprecated class names for the interactive parser
ParserPuppet = InteractiveParser
ImmutableParserPuppet = ImmutableInteractiveParser

+ 1
- 1
lark/parsers/lalr_parser.py View File

@@ -8,7 +8,7 @@ from ..lexer import Token
from ..utils import Serialize

from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
from .lalr_puppet import InteractiveParser
from .lalr_interactive_parser import InteractiveParser
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken

###{standalone


+ 3
- 132
lark/parsers/lalr_puppet.py View File

@@ -1,132 +1,3 @@
# This module provides a LALR interactive parser, which is used for debugging and error handling

from copy import copy

from .. import Token
from ..exceptions import UnexpectedToken


class InteractiveParser(object):
"""InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR.

For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``.
"""
def __init__(self, parser, parser_state, lexer_state):
self.parser = parser
self.parser_state = parser_state
self.lexer_state = lexer_state

def feed_token(self, token):
"""Feed the parser with a token, and advance it to the next state, as if it received it from the lexer.

Note that ``token`` has to be an instance of ``Token``.
"""
return self.parser_state.feed_token(token, token.type == '$END')
def exhaust_lexer(self):
"""Try to feed the rest of the lexer state into the interactive parser.
Note that this modifies the instance in place and does not feed an '$END' Token"""
for token in self.lexer_state.lex(self.parser_state):
self.parser_state.feed_token(token)
def feed_eof(self, last_token=None):
"""Feed a '$END' Token. Borrows from 'last_token' if given."""
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1)
return self.feed_token(eof)


def __copy__(self):
"""Create a new interactive parser with a separate state.

Calls to feed_token() won't affect the old instance, and vice-versa.
"""
return type(self)(
self.parser,
copy(self.parser_state),
copy(self.lexer_state),
)

def copy(self):
return copy(self)

def __eq__(self, other):
if not isinstance(other, InteractiveParser):
return False

return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state

def as_immutable(self):
"""Convert to an ``ImmutableInteractiveParser``."""
p = copy(self)
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state)

def pretty(self):
"""Print the output of ``choices()`` in a way that's easier to read."""
out = ["Parser choices:"]
for k, v in self.choices().items():
out.append('\t- %s -> %s' % (k, v))
out.append('stack size: %s' % len(self.parser_state.state_stack))
return '\n'.join(out)

def choices(self):
"""Returns a dictionary of token types, matched to their action in the parser.

Only returns token types that are accepted by the current state.

Updated by ``feed_token()``.
"""
return self.parser_state.parse_conf.parse_table.states[self.parser_state.position]

def accepts(self):
"""Returns the set of possible tokens that will advance the parser into a new valid state."""
accepts = set()
for t in self.choices():
if t.isupper(): # is terminal?
new_cursor = copy(self)
try:
new_cursor.feed_token(Token(t, ''))
except UnexpectedToken:
pass
else:
accepts.add(t)
return accepts

def resume_parse(self):
"""Resume automated parsing from the current state."""
return self.parser.parse_from_state(self.parser_state)



class ImmutableInteractiveParser(InteractiveParser):
"""Same as ``InteractiveParser``, but operations create a new instance instead
of changing it in-place.
"""

result = None

def __hash__(self):
return hash((self.parser_state, self.lexer_state))

def feed_token(self, token):
c = copy(self)
c.result = InteractiveParser.feed_token(c, token)
return c

def exhaust_lexer(self):
"""Try to feed the rest of the lexer state into the parser.

Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token"""
cursor = self.as_mutable()
cursor.exhaust_lexer()
return cursor.as_immutable()

def as_mutable(self):
"""Convert to an ``InteractiveParser``."""
p = copy(self)
return InteractiveParser(p.parser, p.parser_state, p.lexer_state)


# Deprecated class names for the interactive parser
ParserPuppet = InteractiveParser
ImmutableParserPuppet = ImmutableInteractiveParser
# Deprecated
from .lalr_interactive_parser import ParserPuppet, ImmutableParserPuppet

+ 6
- 6
tests/test_parser.py View File

@@ -2395,8 +2395,8 @@ def _make_parser_test(LEXER, PARSER):
""", regex=True)
self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')

@unittest.skipIf(PARSER!='lalr', "Puppet is only implemented for LALR at the moment")
def test_parser_puppet(self):
@unittest.skipIf(PARSER!='lalr', "interactive_parser is only implemented for LALR at the moment")
def test_parser_interactive_parser(self):

g = _Lark(r'''
start: A+ B*
@@ -2432,8 +2432,8 @@ def _make_parser_test(LEXER, PARSER):
res = ip_copy.feed_eof()
self.assertEqual(res, Tree('start', ['a', 'b', 'b']))

@unittest.skipIf(PARSER!='lalr', "Puppet error handling only works with LALR for now")
def test_error_with_puppet(self):
@unittest.skipIf(PARSER!='lalr', "interactive_parser error handling only works with LALR for now")
def test_error_with_interactive_parser(self):
def ignore_errors(e):
if isinstance(e, UnexpectedCharacters):
# Skip bad character
@@ -2445,8 +2445,8 @@ def _make_parser_test(LEXER, PARSER):
return True
elif e.token.type == 'SIGNED_NUMBER':
# Try to feed a comma and retry the number
e.puppet.feed_token(Token('COMMA', ','))
e.puppet.feed_token(e.token)
e.interactive_parser.feed_token(Token('COMMA', ','))
e.interactive_parser.feed_token(e.token)

return True



Loading…
Cancel
Save