Browse Source

Refactored ParserPuppet, added stubs

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.10.0
MegaIng1 4 years ago
parent
commit
cb2d9cded0
6 changed files with 56 additions and 27 deletions
  1. +5
    -5
      lark-stubs/exceptions.pyi
  2. +0
    -0
      lark-stubs/parsers/__init__.pyi
  3. +21
    -0
      lark-stubs/parsers/lalr_puppet.pyi
  4. +10
    -9
      lark/exceptions.py
  5. +2
    -10
      lark/parsers/lalr_parser.py
  6. +18
    -3
      lark/parsers/lalr_puppet.py

+ 5
- 5
lark-stubs/exceptions.pyi View File

@@ -3,7 +3,7 @@
from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set from typing import Dict, Iterable, Callable, Union, TypeVar, Tuple, Any, List, Set
from .tree import Tree from .tree import Tree
from .lexer import Token from .lexer import Token
from .parsers.lalr_puppet import ParserPuppet


class LarkError(Exception): class LarkError(Exception):
pass pass
@@ -38,16 +38,16 @@ class UnexpectedInput(LarkError):
parse_fn: Callable[[str], Tree], parse_fn: Callable[[str], Tree],
examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]], examples: Union[Dict[T, Iterable[str]], Iterable[Tuple[T, Iterable[str]]]],
token_type_match_fallback: bool = False, token_type_match_fallback: bool = False,
print_debug_info: bool = True
use_accepts: bool = False,
) -> T: ) -> T:
... ...




class UnexpectedToken(ParseError, UnexpectedInput): class UnexpectedToken(ParseError, UnexpectedInput):
expected: List[str]
expected: Set[str]
considered_rules: Set[str] considered_rules: Set[str]
puppet: Any
accepts: List[str]
puppet: ParserPuppet
accepts: Set[str]


class UnexpectedCharacters(LexError, UnexpectedInput): class UnexpectedCharacters(LexError, UnexpectedInput):
allowed: Set[str] allowed: Set[str]


+ 0
- 0
lark-stubs/parsers/__init__.pyi View File


+ 21
- 0
lark-stubs/parsers/lalr_puppet.pyi View File

@@ -0,0 +1,21 @@
from typing import Set, Dict, Any

from lark import Token, Tree


class ParserPuppet(object):
"""
Represents a LalrParser that can be step through.
Shouldn't instantiated by hand, but is accessible as `UnexpectedToken.puppet`
"""
def feed_token(self, token: Token): ...

def copy(self) -> ParserPuppet: ...

def pretty(self) -> str: ...

def choices(self) -> Dict[str, Any]: ...

def accepts(self) -> Set[str]: ...

def resume_parse(self) -> Tree: ...

+ 10
- 9
lark/exceptions.py View File

@@ -1,3 +1,5 @@
import logging

from .utils import STRING_TYPE from .utils import STRING_TYPE


###{standalone ###{standalone
@@ -37,7 +39,7 @@ class UnexpectedInput(LarkError):
after = text[pos:end].split(b'\n', 1)[0] after = text[pos:end].split(b'\n', 1)[0]
return (before + after + b'\n' + b' ' * len(before) + b'^\n').decode("ascii", "backslashreplace") return (before + after + b'\n' + b' ' * len(before) + b'^\n').decode("ascii", "backslashreplace")


def match_examples(self, parse_fn, examples, token_type_match_fallback=False, print_debug_info=True):
def match_examples(self, parse_fn, examples, token_type_match_fallback=False, use_accepts=False):
""" Given a parser instance and a dictionary mapping some label with """ Given a parser instance and a dictionary mapping some label with
some malformed syntax examples, it'll return the label for the some malformed syntax examples, it'll return the label for the
example that bests matches the current error. example that bests matches the current error.
@@ -55,27 +57,26 @@ class UnexpectedInput(LarkError):
try: try:
parse_fn(malformed) parse_fn(malformed)
except UnexpectedInput as ut: except UnexpectedInput as ut:
if ut.state == self.state and ut.accepts == self.accepts:
if ut.state == self.state and (not use_accepts or ut.accepts == self.accepts):
try: try:
if ut.token == self.token: # Try exact match first if ut.token == self.token: # Try exact match first
if print_debug_info:
print("Exact Match at %d, with example %d" % (i, j), (ut.token, self.token, ut.state, self.state))
logging.debug("Exact Match at example [%s][%s]" % (i, j))
return label return label


if token_type_match_fallback: if token_type_match_fallback:
# Fallback to token types match # Fallback to token types match
if (ut.token.type == self.token.type) and not candidate[-1]: if (ut.token.type == self.token.type) and not candidate[-1]:
if print_debug_info:
print("Token Type Fallback at %d, with example %d" % (i, j))
logging.debug("Token Type Fallback at example [%s][%s]" % (i, j))
candidate = label, True candidate = label, True


except AttributeError: except AttributeError:
pass pass
if not candidate[0]: if not candidate[0]:
if print_debug_info:
print("Defaulted at %d, with example %d" % (i, j))
logging.debug("Same State match at example [%s][%s]" % (i, j))
candidate = label, False candidate = label, False

elif ut.state == self.state:
logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
(self.state, self.accepts, ut.accepts, i, j))
return candidate[0] return candidate[0]






+ 2
- 10
lark/parsers/lalr_parser.py View File

@@ -59,18 +59,10 @@ class _Parser:
try: try:
return states[state][token.type] return states[state][token.type]
except KeyError: except KeyError:
expected = [s for s in states[state].keys() if s.isupper()]
expected = {s for s in states[state].keys() if s.isupper()}
try: try:
puppet = ParserPuppet(self, state_stack, value_stack, start, stream, set_state) puppet = ParserPuppet(self, state_stack, value_stack, start, stream, set_state)
accepts = []
for t in expected:
new_puppet = puppet.copy()
try:
new_puppet.feed_token(Token(t, ''))
except KeyError:
pass
else:
accepts.append(t)
accepts = puppet.accepts()
except NameError: except NameError:
puppet = accepts = None puppet = accepts = None
raise UnexpectedToken(token, expected, state=state, puppet=puppet, accepts=accepts) raise UnexpectedToken(token, expected, state=state, puppet=puppet, accepts=accepts)


+ 18
- 3
lark/parsers/lalr_puppet.py View File

@@ -3,6 +3,8 @@
from copy import deepcopy from copy import deepcopy


from .lalr_analysis import Shift, Reduce from .lalr_analysis import Shift, Reduce
from .. import Token



class ParserPuppet(object): class ParserPuppet(object):
def __init__(self, parser, state_stack, value_stack, start, stream, set_state): def __init__(self, parser, state_stack, value_stack, start, stream, set_state):
@@ -67,13 +69,26 @@ class ParserPuppet(object):
) )


def pretty(self): def pretty(self):
print("Puppet choices:")
out = ["Puppet choices:"]
for k, v in self.choices().items(): for k, v in self.choices().items():
print('\t-', k, '->', v)
print('stack size:', len(self._state_stack))
out.append('\t- %s -> %s' % (k, v))
out.append('stack size: %s' % len(self._state_stack))
return '\n'.join(out)


def choices(self): def choices(self):
return self.parser.parse_table.states[self._state_stack[-1]] return self.parser.parse_table.states[self._state_stack[-1]]


def accepts(self):
accepts = set()
for t in self.choices():
new_puppet = self.copy()
try:
new_puppet.feed_token(Token(t, ''))
except KeyError:
pass
else:
accepts.add(t)
return accepts

def resume_parse(self): def resume_parse(self):
return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack) return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack)

Loading…
Cancel
Save