Browse Source

Rename ParserPuppet -> InteractiveParser

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.3
Erez Sh 4 years ago
parent
commit
22c289126f
11 changed files with 156 additions and 196 deletions
  1. +2
    -2
      docs/classes.rst
  2. +1
    -1
      docs/features.md
  3. +6
    -6
      examples/advanced/error_handling.py
  4. +2
    -2
      lark-stubs/exceptions.pyi
  5. +2
    -2
      lark-stubs/lark.pyi
  6. +0
    -43
      lark-stubs/parsers/lalr_puppet.pyi
  7. +1
    -1
      lark/lark.py
  8. +132
    -0
      lark/parsers/lalr_interactive_parser.py
  9. +1
    -1
      lark/parsers/lalr_parser.py
  10. +3
    -132
      lark/parsers/lalr_puppet.py
  11. +6
    -6
      tests/test_parser.py

+ 2
- 2
docs/classes.rst View File

@@ -69,8 +69,8 @@ UnexpectedInput
InteractiveParser InteractiveParser
----------------- -----------------


.. autoclass:: lark.parsers.lalr_puppet.InteractiveParser
.. autoclass:: lark.parsers.lalr_interactive_parser.InteractiveParser
:members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts :members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts


.. autoclass:: lark.parsers.lalr_puppet.ImmutableInteractiveParser
.. autoclass:: lark.parsers.lalr_interactive_parser.ImmutableInteractiveParser
:members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts :members: choices, feed_token, copy, pretty, resume_parse, exhaust_lexer, accepts

+ 1
- 1
docs/features.md View File

@@ -8,7 +8,7 @@
- EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md)) - EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md))
- Builds a parse-tree (AST) automagically based on the grammar - Builds a parse-tree (AST) automagically based on the grammar
- Stand-alone parser generator - create a small independent parser to embed in your project. - Stand-alone parser generator - create a small independent parser to embed in your project.
- Flexible error handling by using a "puppet parser" mechanism (LALR only)
- Flexible error handling by using an interactive parser interface (LALR only)
- Automatic line & column tracking (for both tokens and matched rules) - Automatic line & column tracking (for both tokens and matched rules)
- Automatic terminal collision resolution - Automatic terminal collision resolution
- Standard library of terminals (strings, numbers, names, etc.) - Standard library of terminals (strings, numbers, names, etc.)


examples/advanced/error_puppet.py → examples/advanced/error_handling.py View File

@@ -1,11 +1,11 @@
""" """
Error handling with a puppet
==================================
Error handling using an interactive parser
==========================================


This example demonstrates error handling using a parsing puppet in LALR
This example demonstrates error handling using an interactive parser in LALR


When the parser encounters an UnexpectedToken exception, it creates a When the parser encounters an UnexpectedToken exception, it creates a
parsing puppet with the current parse-state, and lets you control how
an interactive parser with the current parse-state, and lets you control how
to proceed step-by-step. When you've achieved the correct parse-state, to proceed step-by-step. When you've achieved the correct parse-state,
you can resume the run by returning True. you can resume the run by returning True.
""" """
@@ -20,8 +20,8 @@ def ignore_errors(e):
return True return True
elif e.token.type == 'SIGNED_NUMBER': elif e.token.type == 'SIGNED_NUMBER':
# Try to feed a comma and retry the number # Try to feed a comma and retry the number
e.puppet.feed_token(Token('COMMA', ','))
e.puppet.feed_token(e.token)
e.interactive_parser.feed_token(Token('COMMA', ','))
e.interactive_parser.feed_token(e.token)
return True return True


# Unhandled error. Will stop parse and raise exception # Unhandled error. Will stop parse and raise exception

+ 2
- 2
lark-stubs/exceptions.pyi View File

@@ -3,7 +3,7 @@
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set
from .tree import Tree from .tree import Tree
from .lexer import Token from .lexer import Token
from .parsers.lalr_puppet import ParserPuppet
from .parsers.lalr_interactive_parser import InteractiveParser


class LarkError(Exception): class LarkError(Exception):
pass pass
@@ -52,7 +52,7 @@ class UnexpectedInput(LarkError):
class UnexpectedToken(ParseError, UnexpectedInput): class UnexpectedToken(ParseError, UnexpectedInput):
expected: Set[str] expected: Set[str]
considered_rules: Set[str] considered_rules: Set[str]
puppet: ParserPuppet
interactive_parser: InteractiveParser
accepts: Set[str] accepts: Set[str]


class UnexpectedCharacters(LexError, UnexpectedInput): class UnexpectedCharacters(LexError, UnexpectedInput):


+ 2
- 2
lark-stubs/lark.pyi View File

@@ -5,7 +5,7 @@ from typing import (
Literal, Protocol, Tuple, Iterable, Literal, Protocol, Tuple, Iterable,
) )


from .parsers.lalr_puppet import ParserPuppet
from .parsers.lalr_interactive_parser import InteractiveParser
from .visitors import Transformer from .visitors import Transformer
from .lexer import Token, Lexer, TerminalDef from .lexer import Token, Lexer, TerminalDef
from .tree import Tree from .tree import Tree
@@ -91,7 +91,7 @@ class Lark:
def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree: def parse(self, text: str, start: Optional[str] = None, on_error: Callable[[UnexpectedInput], bool] = None) -> Tree:
... ...


def parse_interactive(self, text: str = None, start: Optional[str] = None) -> ParserPuppet:
def parse_interactive(self, text: str = None, start: Optional[str] = None) -> InteractiveParser:
... ...


@classmethod @classmethod


+ 0
- 43
lark-stubs/parsers/lalr_puppet.pyi View File

@@ -1,43 +0,0 @@
from typing import Set, Dict, Any

from lark import Token, Tree


class ParserPuppet(object):
"""
Provides an interface to interactively step through the parser (LALR(1) only for now)

Accessible via `UnexpectedToken.puppet` (raised by the parser on token error)
"""
parser: Any
parser_state: Any
lexer_state: Any

def feed_token(self, token: Token) -> Any: ...

def exhaust_lexer(self) -> None: ...

def feed_eof(self, last_token: Token = None) -> Any: ...

def copy(self) -> ParserPuppet: ...
def as_immutable(self) -> ImmutableParserPuppet: ...

def pretty(self) -> str: ...

def choices(self) -> Dict[str, Any]: ...

def accepts(self) -> Set[str]: ...

def resume_parse(self) -> Tree: ...


class ImmutableParserPuppet(ParserPuppet):
result: Any = None

def feed_token(self, token: Token) -> ImmutableParserPuppet: ...

def exhaust_lexer(self) -> ImmutableParserPuppet: ...

def feed_eof(self, last_token: Token = None) -> ImmutableParserPuppet: ...

+ 1
- 1
lark/lark.py View File

@@ -542,7 +542,7 @@ class Lark(Serialize):
text (str): Text to be parsed. text (str): Text to be parsed.
start (str, optional): Required if Lark was given multiple possible start symbols (using the start option). start (str, optional): Required if Lark was given multiple possible start symbols (using the start option).
on_error (function, optional): if provided, will be called on UnexpectedToken error. Return true to resume parsing. on_error (function, optional): if provided, will be called on UnexpectedToken error. Return true to resume parsing.
LALR only. See examples/advanced/error_puppet.py for an example of how to use on_error.
LALR only. See examples/advanced/error_handling.py for an example of how to use on_error.


Returns: Returns:
If a transformer is supplied to ``__init__``, returns whatever is the If a transformer is supplied to ``__init__``, returns whatever is the


+ 132
- 0
lark/parsers/lalr_interactive_parser.py View File

@@ -0,0 +1,132 @@
# This module provides a LALR interactive parser, which is used for debugging and error handling

from copy import copy

from .. import Token
from ..exceptions import UnexpectedToken


class InteractiveParser(object):
"""InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR.

For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``.
"""
def __init__(self, parser, parser_state, lexer_state):
self.parser = parser
self.parser_state = parser_state
self.lexer_state = lexer_state

def feed_token(self, token):
"""Feed the parser with a token, and advance it to the next state, as if it received it from the lexer.

Note that ``token`` has to be an instance of ``Token``.
"""
return self.parser_state.feed_token(token, token.type == '$END')
def exhaust_lexer(self):
"""Try to feed the rest of the lexer state into the interactive parser.
Note that this modifies the instance in place and does not feed an '$END' Token"""
for token in self.lexer_state.lex(self.parser_state):
self.parser_state.feed_token(token)
def feed_eof(self, last_token=None):
"""Feed a '$END' Token. Borrows from 'last_token' if given."""
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1)
return self.feed_token(eof)


def __copy__(self):
"""Create a new interactive parser with a separate state.

Calls to feed_token() won't affect the old instance, and vice-versa.
"""
return type(self)(
self.parser,
copy(self.parser_state),
copy(self.lexer_state),
)

def copy(self):
return copy(self)

def __eq__(self, other):
if not isinstance(other, InteractiveParser):
return False

return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state

def as_immutable(self):
"""Convert to an ``ImmutableInteractiveParser``."""
p = copy(self)
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state)

def pretty(self):
"""Print the output of ``choices()`` in a way that's easier to read."""
out = ["Parser choices:"]
for k, v in self.choices().items():
out.append('\t- %s -> %s' % (k, v))
out.append('stack size: %s' % len(self.parser_state.state_stack))
return '\n'.join(out)

def choices(self):
"""Returns a dictionary of token types, matched to their action in the parser.

Only returns token types that are accepted by the current state.

Updated by ``feed_token()``.
"""
return self.parser_state.parse_conf.parse_table.states[self.parser_state.position]

def accepts(self):
"""Returns the set of possible tokens that will advance the parser into a new valid state."""
accepts = set()
for t in self.choices():
if t.isupper(): # is terminal?
new_cursor = copy(self)
try:
new_cursor.feed_token(Token(t, ''))
except UnexpectedToken:
pass
else:
accepts.add(t)
return accepts

def resume_parse(self):
"""Resume automated parsing from the current state."""
return self.parser.parse_from_state(self.parser_state)



class ImmutableInteractiveParser(InteractiveParser):
"""Same as ``InteractiveParser``, but operations create a new instance instead
of changing it in-place.
"""

result = None

def __hash__(self):
return hash((self.parser_state, self.lexer_state))

def feed_token(self, token):
c = copy(self)
c.result = InteractiveParser.feed_token(c, token)
return c

def exhaust_lexer(self):
"""Try to feed the rest of the lexer state into the parser.

Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token"""
cursor = self.as_mutable()
cursor.exhaust_lexer()
return cursor.as_immutable()

def as_mutable(self):
"""Convert to an ``InteractiveParser``."""
p = copy(self)
return InteractiveParser(p.parser, p.parser_state, p.lexer_state)


# Deprecated class names for the interactive parser
ParserPuppet = InteractiveParser
ImmutableParserPuppet = ImmutableInteractiveParser

+ 1
- 1
lark/parsers/lalr_parser.py View File

@@ -8,7 +8,7 @@ from ..lexer import Token
from ..utils import Serialize from ..utils import Serialize


from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable
from .lalr_puppet import InteractiveParser
from .lalr_interactive_parser import InteractiveParser
from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken from lark.exceptions import UnexpectedCharacters, UnexpectedInput, UnexpectedToken


###{standalone ###{standalone


+ 3
- 132
lark/parsers/lalr_puppet.py View File

@@ -1,132 +1,3 @@
# This module provides a LALR interactive parser, which is used for debugging and error handling

from copy import copy

from .. import Token
from ..exceptions import UnexpectedToken


class InteractiveParser(object):
"""InteractiveParser gives you advanced control over parsing and error handling when parsing with LALR.

For a simpler interface, see the ``on_error`` argument to ``Lark.parse()``.
"""
def __init__(self, parser, parser_state, lexer_state):
self.parser = parser
self.parser_state = parser_state
self.lexer_state = lexer_state

def feed_token(self, token):
"""Feed the parser with a token, and advance it to the next state, as if it received it from the lexer.

Note that ``token`` has to be an instance of ``Token``.
"""
return self.parser_state.feed_token(token, token.type == '$END')
def exhaust_lexer(self):
"""Try to feed the rest of the lexer state into the interactive parser.
Note that this modifies the instance in place and does not feed an '$END' Token"""
for token in self.lexer_state.lex(self.parser_state):
self.parser_state.feed_token(token)
def feed_eof(self, last_token=None):
"""Feed a '$END' Token. Borrows from 'last_token' if given."""
eof = Token.new_borrow_pos('$END', '', last_token) if last_token is not None else Token('$END', '', 0, 1, 1)
return self.feed_token(eof)


def __copy__(self):
"""Create a new interactive parser with a separate state.

Calls to feed_token() won't affect the old instance, and vice-versa.
"""
return type(self)(
self.parser,
copy(self.parser_state),
copy(self.lexer_state),
)

def copy(self):
return copy(self)

def __eq__(self, other):
if not isinstance(other, InteractiveParser):
return False

return self.parser_state == other.parser_state and self.lexer_state == other.lexer_state

def as_immutable(self):
"""Convert to an ``ImmutableInteractiveParser``."""
p = copy(self)
return ImmutableInteractiveParser(p.parser, p.parser_state, p.lexer_state)

def pretty(self):
"""Print the output of ``choices()`` in a way that's easier to read."""
out = ["Parser choices:"]
for k, v in self.choices().items():
out.append('\t- %s -> %s' % (k, v))
out.append('stack size: %s' % len(self.parser_state.state_stack))
return '\n'.join(out)

def choices(self):
"""Returns a dictionary of token types, matched to their action in the parser.

Only returns token types that are accepted by the current state.

Updated by ``feed_token()``.
"""
return self.parser_state.parse_conf.parse_table.states[self.parser_state.position]

def accepts(self):
"""Returns the set of possible tokens that will advance the parser into a new valid state."""
accepts = set()
for t in self.choices():
if t.isupper(): # is terminal?
new_cursor = copy(self)
try:
new_cursor.feed_token(Token(t, ''))
except UnexpectedToken:
pass
else:
accepts.add(t)
return accepts

def resume_parse(self):
"""Resume automated parsing from the current state."""
return self.parser.parse_from_state(self.parser_state)



class ImmutableInteractiveParser(InteractiveParser):
"""Same as ``InteractiveParser``, but operations create a new instance instead
of changing it in-place.
"""

result = None

def __hash__(self):
return hash((self.parser_state, self.lexer_state))

def feed_token(self, token):
c = copy(self)
c.result = InteractiveParser.feed_token(c, token)
return c

def exhaust_lexer(self):
"""Try to feed the rest of the lexer state into the parser.

Note that this returns a new ImmutableInteractiveParser and does not feed an '$END' Token"""
cursor = self.as_mutable()
cursor.exhaust_lexer()
return cursor.as_immutable()

def as_mutable(self):
"""Convert to an ``InteractiveParser``."""
p = copy(self)
return InteractiveParser(p.parser, p.parser_state, p.lexer_state)


# Deprecated class names for the interactive parser
ParserPuppet = InteractiveParser
ImmutableParserPuppet = ImmutableInteractiveParser
# Deprecated
from .lalr_interactive_parser import ParserPuppet, ImmutableParserPuppet

+ 6
- 6
tests/test_parser.py View File

@@ -2395,8 +2395,8 @@ def _make_parser_test(LEXER, PARSER):
""", regex=True) """, regex=True)
self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்') self.assertEqual(g.parse('வணக்கம்'), 'வணக்கம்')


@unittest.skipIf(PARSER!='lalr', "Puppet is only implemented for LALR at the moment")
def test_parser_puppet(self):
@unittest.skipIf(PARSER!='lalr', "interactive_parser is only implemented for LALR at the moment")
def test_parser_interactive_parser(self):


g = _Lark(r''' g = _Lark(r'''
start: A+ B* start: A+ B*
@@ -2432,8 +2432,8 @@ def _make_parser_test(LEXER, PARSER):
res = ip_copy.feed_eof() res = ip_copy.feed_eof()
self.assertEqual(res, Tree('start', ['a', 'b', 'b'])) self.assertEqual(res, Tree('start', ['a', 'b', 'b']))


@unittest.skipIf(PARSER!='lalr', "Puppet error handling only works with LALR for now")
def test_error_with_puppet(self):
@unittest.skipIf(PARSER!='lalr', "interactive_parser error handling only works with LALR for now")
def test_error_with_interactive_parser(self):
def ignore_errors(e): def ignore_errors(e):
if isinstance(e, UnexpectedCharacters): if isinstance(e, UnexpectedCharacters):
# Skip bad character # Skip bad character
@@ -2445,8 +2445,8 @@ def _make_parser_test(LEXER, PARSER):
return True return True
elif e.token.type == 'SIGNED_NUMBER': elif e.token.type == 'SIGNED_NUMBER':
# Try to feed a comma and retry the number # Try to feed a comma and retry the number
e.puppet.feed_token(Token('COMMA', ','))
e.puppet.feed_token(e.token)
e.interactive_parser.feed_token(Token('COMMA', ','))
e.interactive_parser.feed_token(e.token)


return True return True




Loading…
Cancel
Save