Browse Source

Added support for error handling, using a puppet parser.

TODO: Add docs
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.9.0
Erez Sh 5 years ago
parent
commit
66a073d0aa
3 changed files with 82 additions and 22 deletions
  1. +34
    -0
      examples/error_puppet.py
  2. +19
    -4
      lark/lark.py
  3. +29
    -18
      lark/parsers/lalr_parser.py

+ 34
- 0
examples/error_puppet.py View File

@@ -0,0 +1,34 @@
#
# This example demonstrates error handling using a parsing puppet in LALR
#
# When the parser encounters an UnexpectedToken exception, it creates a
# parsing puppet with the current parse-state, and lets you control how
# to proceed step-by-step. When you've achieved the correct parse-state,
# you can resume the run by returning True.
#

from lark import UnexpectedToken, Token

from .json_parser import json_parser

def ignore_errors(e):
if e.token.type == 'COMMA':
# Skip comma
return True
elif e.token.type == 'SIGNED_NUMBER':
# Try to feed a comma and retry the number
e.puppet.feed_token(Token('COMMA', ','))
e.puppet.feed_token(e.token)
return True

# Unhandled error. Will stop parse and raise exception
return False


def main():
s = "[0 1, 2,, 3,,, 4, 5 6 ]"
res = json_parser.parse(s, on_error=ignore_errors)
print(res) # prints [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]

main()


+ 19
- 4
lark/lark.py View File

@@ -9,7 +9,7 @@ from .load_grammar import load_grammar
from .tree import Tree
from .common import LexerConf, ParserConf

from .lexer import Lexer, TraditionalLexer, TerminalDef
from .lexer import Lexer, TraditionalLexer, TerminalDef, UnexpectedToken
from .parse_tree_builder import ParseTreeBuilder
from .parser_frontends import get_frontend
from .grammar import Rule
@@ -359,13 +359,28 @@ class Lark(Serialize):
"Get information about a terminal"
return self._terminals_dict[name]

def parse(self, text, start=None):
def parse(self, text, start=None, on_error=None):
"""Parse the given text, according to the options provided.

The 'start' parameter is required if Lark was given multiple possible start symbols (using the start option).
Parameters:
start: str - required if Lark was given multiple possible start symbols (using the start option).
on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing.

Returns a tree, unless specified otherwise.
"""
return self.parser.parse(text, start=start)
try:
return self.parser.parse(text, start=start)
except UnexpectedToken as e:
if on_error is None:
raise

while True:
if not on_error(e):
raise e
try:
return e.puppet.resume_parse()
except UnexpectedToken as e2:
e = e2


###}

+ 29
- 18
lark/parsers/lalr_parser.py View File

@@ -41,15 +41,15 @@ class _Parser:
self.callbacks = callbacks
self.debug = debug

def parse(self, seq, start, set_state=None):
def parse(self, seq, start, set_state=None, value_stack=None, state_stack=None):
token = None
stream = iter(seq)
states = self.parse_table.states
start_state = self.parse_table.start_states[start]
end_state = self.parse_table.end_states[start]

state_stack = [start_state]
value_stack = []
state_stack = state_stack or [start_state]
value_stack = value_stack or []

if set_state: set_state(start_state)

@@ -59,7 +59,7 @@ class _Parser:
return states[state][token.type]
except KeyError:
expected = [s for s in states[state].keys() if s.isupper()]
raise UnexpectedToken(token, expected, state=state, puppet=_ParserPuppet(self, state_stack, value_stack, start))
raise UnexpectedToken(token, expected, state=state, puppet=_ParserPuppet(self, state_stack, value_stack, start, stream, set_state))

def reduce(rule):
size = len(rule.expansion)
@@ -116,25 +116,24 @@ class _Parser:


class _ParserPuppet:
def __init__(self, parser, state_stack, value_stack, start):
def __init__(self, parser, state_stack, value_stack, start, stream, set_state):
self.parser = parser
self.state_stack = state_stack
self.value_stack = value_stack
self.start = start
self._state_stack = state_stack
self._value_stack = value_stack
self._start = start
self._stream = stream
self._set_state = set_state

def feed_token(self, token):
end_state = self.parser.parse_table.end_states[self.start]
state_stack = self.state_stack
value_stack = self.value_stack
end_state = self.parser.parse_table.end_states[self._start]
state_stack = self._state_stack
value_stack = self._value_stack

state = state_stack[-1]
action, arg = self.parser.parse_table.states[state][token.type]
assert arg != end_state

if action is Shift:
state_stack.append(arg)
value_stack.append(token)
else:
while action is Reduce:
rule = arg
size = len(rule.expansion)
if size:
@@ -151,8 +150,20 @@ class _ParserPuppet:
state_stack.append(new_state)
value_stack.append(value)

if state_stack[-1] == end_state:
return value_stack[-1]
if state_stack[-1] == end_state:
return value_stack[-1]

state = state_stack[-1]
action, arg = self.parser.parse_table.states[state][token.type]
assert arg != end_state

assert action is Shift
state_stack.append(arg)
value_stack.append(token)


def choices(self):
return self.parser.parse_table.states[self.state_stack[-1]]
return self.parser.parse_table.states[self._state_stack[-1]]

def resume_parse(self):
return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack)

Loading…
Cancel
Save