Browse Source

Small adjustments to PR

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.10.0
Erez Sh 5 years ago
parent
commit
02d57bc32a
3 changed files with 30 additions and 24 deletions
  1. +3
    -2
      lark-stubs/parsers/lalr_puppet.pyi
  2. +24
    -18
      lark/exceptions.py
  3. +3
    -4
      lark/parsers/lalr_parser.py

+ 3
- 2
lark-stubs/parsers/lalr_puppet.pyi View File

@@ -5,8 +5,9 @@ from lark import Token, Tree


class ParserPuppet(object): class ParserPuppet(object):
""" """
Represents a LalrParser that can be step through.
Shouldn't instantiated by hand, but is accessible as `UnexpectedToken.puppet`
Provides an interface to interactively step through the parser (LALR(1) only for now)

Accessible via `UnexpectedToken.puppet` (raised by the parser on token error)
""" """
def feed_token(self, token: Token): ... def feed_token(self, token: Token): ...




+ 24
- 18
lark/exceptions.py View File

@@ -45,7 +45,7 @@ class UnexpectedInput(LarkError):
example that bests matches the current error. example that bests matches the current error.
""" """
assert self.state is not None, "Not supported for this exception" assert self.state is not None, "Not supported for this exception"
if isinstance(examples, dict): if isinstance(examples, dict):
examples = examples.items() examples = examples.items()


@@ -57,7 +57,11 @@ class UnexpectedInput(LarkError):
try: try:
parse_fn(malformed) parse_fn(malformed)
except UnexpectedInput as ut: except UnexpectedInput as ut:
if ut.state == self.state and (not use_accepts or ut.accepts == self.accepts):
if ut.state == self.state:
if use_accepts and ut.accepts != self.accepts:
logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
(self.state, self.accepts, ut.accepts, i, j))
continue
try: try:
if ut.token == self.token: # Try exact match first if ut.token == self.token: # Try exact match first
logging.debug("Exact Match at example [%s][%s]" % (i, j)) logging.debug("Exact Match at example [%s][%s]" % (i, j))
@@ -74,27 +78,25 @@ class UnexpectedInput(LarkError):
if not candidate[0]: if not candidate[0]:
logging.debug("Same State match at example [%s][%s]" % (i, j)) logging.debug("Same State match at example [%s][%s]" % (i, j))
candidate = label, False candidate = label, False
elif ut.state == self.state:
logging.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
(self.state, self.accepts, ut.accepts, i, j))

return candidate[0] return candidate[0]




class UnexpectedCharacters(LexError, UnexpectedInput): class UnexpectedCharacters(LexError, UnexpectedInput):
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None): def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):
self.line = line
self.column = column
self.pos_in_stream = lex_pos
self.state = state

self.allowed = allowed
self.considered_tokens = considered_tokens


if isinstance(seq, bytes): if isinstance(seq, bytes):
message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos:lex_pos+1].decode("ascii", "backslashreplace"), line, column) message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos:lex_pos+1].decode("ascii", "backslashreplace"), line, column)
else: else:
message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column) message = "No terminal defined for '%s' at line %d col %d" % (seq[lex_pos], line, column)


self.line = line
self.column = column
self.allowed = allowed
self.considered_tokens = considered_tokens
self.pos_in_stream = lex_pos
self.state = state

message += '\n\n' + self.get_context(seq) message += '\n\n' + self.get_context(seq)
if allowed: if allowed:
message += '\nExpecting: %s\n' % allowed message += '\nExpecting: %s\n' % allowed
@@ -106,16 +108,20 @@ class UnexpectedCharacters(LexError, UnexpectedInput):




class UnexpectedToken(ParseError, UnexpectedInput): class UnexpectedToken(ParseError, UnexpectedInput):
def __init__(self, token, expected, considered_rules=None, state=None, puppet=None, accepts=None):
self.token = token
self.expected = expected # XXX str shouldn't necessary
def __init__(self, token, expected, considered_rules=None, state=None, puppet=None):
self.line = getattr(token, 'line', '?') self.line = getattr(token, 'line', '?')
self.column = getattr(token, 'column', '?') self.column = getattr(token, 'column', '?')
self.considered_rules = considered_rules
self.state = state
self.pos_in_stream = getattr(token, 'pos_in_stream', None) self.pos_in_stream = getattr(token, 'pos_in_stream', None)
self.state = state

self.token = token
self.expected = expected # XXX deprecate? `accepts` is better
self.considered_rules = considered_rules
self.puppet = puppet self.puppet = puppet
self.accepts = accepts

# TODO Only calculate `accepts()` when we need to display it to the user
# This will improve performance when doing automatic error handling
self.accepts = puppet and puppet.accepts()


message = ("Unexpected token %r at line %s, column %s.\n" message = ("Unexpected token %r at line %s, column %s.\n"
"Expected one of: \n\t* %s\n" "Expected one of: \n\t* %s\n"


+ 3
- 4
lark/parsers/lalr_parser.py View File

@@ -62,10 +62,9 @@ class _Parser:
expected = {s for s in states[state].keys() if s.isupper()} expected = {s for s in states[state].keys() if s.isupper()}
try: try:
puppet = ParserPuppet(self, state_stack, value_stack, start, stream, set_state) puppet = ParserPuppet(self, state_stack, value_stack, start, stream, set_state)
accepts = puppet.accepts()
except NameError:
puppet = accepts = None
raise UnexpectedToken(token, expected, state=state, puppet=puppet, accepts=accepts)
except NameError: # For standalone parser
puppet = None
raise UnexpectedToken(token, expected, state=state, puppet=puppet)


def reduce(rule): def reduce(rule):
size = len(rule.expansion) size = len(rule.expansion)


Loading…
Cancel
Save