@@ -25,12 +25,21 @@ Example: | |||
Lark(...) | |||
``` | |||
#### parse(self, text) | |||
#### parse(self, text, start=None, on_error=None) | |||
Return a complete parse tree for the text (of type Tree) | |||
Parse the given text, according to the options provided. | |||
Returns a complete parse tree for the text (of type Tree) | |||
If a transformer is supplied to `__init__`, returns whatever is the result of the transformation. | |||
Parameters: | |||
* start: str - required if Lark was given multiple possible start symbols (using the start option). | |||
* on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing. LALR only. | |||
(See `examples/error_puppet.py` for an example of how to use `on_error`.) | |||
#### save(self, f) / load(cls, f) | |||
@@ -160,6 +169,8 @@ See the [visitors page](visitors.md) | |||
## UnexpectedToken | |||
TODO: Explain puppet mechanism (related to on_error) | |||
## UnexpectedException | |||
- `UnexpectedInput` | |||
@@ -6,6 +6,7 @@ | |||
- EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md)) | |||
- Builds a parse-tree (AST) automagically based on the grammar | |||
- Stand-alone parser generator - create a small independent parser to embed in your project. | |||
- Flexible error handling by using a "puppet parser" mechanism (LALR only) | |||
- Automatic line & column tracking (for both tokens and matched rules) | |||
- Automatic terminal collision resolution | |||
- Standard library of terminals (strings, numbers, names, etc.) | |||
@@ -0,0 +1,34 @@ | |||
# | |||
# This example demonstrates error handling using a parsing puppet in LALR | |||
# | |||
# When the parser encounters an UnexpectedToken exception, it creates a | |||
# parsing puppet with the current parse-state, and lets you control how | |||
# to proceed step-by-step. When you've achieved the correct parse-state, | |||
# you can resume the run by returning True. | |||
# | |||
from lark import UnexpectedToken, Token | |||
from .json_parser import json_parser | |||
def ignore_errors(e): | |||
if e.token.type == 'COMMA': | |||
# Skip comma | |||
return True | |||
elif e.token.type == 'SIGNED_NUMBER': | |||
# Try to feed a comma and retry the number | |||
e.puppet.feed_token(Token('COMMA', ',')) | |||
e.puppet.feed_token(e.token) | |||
return True | |||
# Unhandled error. Will stop parse and raise exception | |||
return False | |||
def main(): | |||
s = "[0 1, 2,, 3,,, 4, 5 6 ]" | |||
res = json_parser.parse(s, on_error=ignore_errors) | |||
print(res) # prints [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] | |||
main() | |||
@@ -81,7 +81,7 @@ class UnexpectedCharacters(LexError, UnexpectedInput): | |||
class UnexpectedToken(ParseError, UnexpectedInput): | |||
def __init__(self, token, expected, considered_rules=None, state=None): | |||
def __init__(self, token, expected, considered_rules=None, state=None, puppet=None): | |||
self.token = token | |||
self.expected = expected # XXX str shouldn't necessary | |||
self.line = getattr(token, 'line', '?') | |||
@@ -89,6 +89,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||
self.considered_rules = considered_rules | |||
self.state = state | |||
self.pos_in_stream = getattr(token, 'pos_in_stream', None) | |||
self.puppet = puppet | |||
message = ("Unexpected token %r at line %s, column %s.\n" | |||
"Expected one of: \n\t* %s\n" | |||
@@ -9,7 +9,7 @@ from .load_grammar import load_grammar | |||
from .tree import Tree | |||
from .common import LexerConf, ParserConf | |||
from .lexer import Lexer, TraditionalLexer, TerminalDef | |||
from .lexer import Lexer, TraditionalLexer, TerminalDef, UnexpectedToken | |||
from .parse_tree_builder import ParseTreeBuilder | |||
from .parser_frontends import get_frontend | |||
from .grammar import Rule | |||
@@ -359,13 +359,28 @@ class Lark(Serialize): | |||
"Get information about a terminal" | |||
return self._terminals_dict[name] | |||
def parse(self, text, start=None): | |||
def parse(self, text, start=None, on_error=None): | |||
"""Parse the given text, according to the options provided. | |||
The 'start' parameter is required if Lark was given multiple possible start symbols (using the start option). | |||
Parameters: | |||
start: str - required if Lark was given multiple possible start symbols (using the start option). | |||
on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing. LALR only. | |||
Returns a tree, unless specified otherwise. | |||
""" | |||
return self.parser.parse(text, start=start) | |||
try: | |||
return self.parser.parse(text, start=start) | |||
except UnexpectedToken as e: | |||
if on_error is None: | |||
raise | |||
while True: | |||
if not on_error(e): | |||
raise e | |||
try: | |||
return e.puppet.resume_parse() | |||
except UnexpectedToken as e2: | |||
e = e2 | |||
###} |
@@ -41,15 +41,15 @@ class _Parser: | |||
self.callbacks = callbacks | |||
self.debug = debug | |||
def parse(self, seq, start, set_state=None): | |||
def parse(self, seq, start, set_state=None, value_stack=None, state_stack=None): | |||
token = None | |||
stream = iter(seq) | |||
states = self.parse_table.states | |||
start_state = self.parse_table.start_states[start] | |||
end_state = self.parse_table.end_states[start] | |||
state_stack = [start_state] | |||
value_stack = [] | |||
state_stack = state_stack or [start_state] | |||
value_stack = value_stack or [] | |||
if set_state: set_state(start_state) | |||
@@ -59,7 +59,7 @@ class _Parser: | |||
return states[state][token.type] | |||
except KeyError: | |||
expected = [s for s in states[state].keys() if s.isupper()] | |||
raise UnexpectedToken(token, expected, state=state) | |||
raise UnexpectedToken(token, expected, state=state, puppet=_ParserPuppet(self, state_stack, value_stack, start, stream, set_state)) | |||
def reduce(rule): | |||
size = len(rule.expansion) | |||
@@ -111,3 +111,59 @@ class _Parser: | |||
return value_stack[-1] | |||
###} | |||
class _ParserPuppet: | |||
def __init__(self, parser, state_stack, value_stack, start, stream, set_state): | |||
self.parser = parser | |||
self._state_stack = state_stack | |||
self._value_stack = value_stack | |||
self._start = start | |||
self._stream = stream | |||
self._set_state = set_state | |||
def feed_token(self, token): | |||
end_state = self.parser.parse_table.end_states[self._start] | |||
state_stack = self._state_stack | |||
value_stack = self._value_stack | |||
state = state_stack[-1] | |||
action, arg = self.parser.parse_table.states[state][token.type] | |||
assert arg != end_state | |||
while action is Reduce: | |||
rule = arg | |||
size = len(rule.expansion) | |||
if size: | |||
s = value_stack[-size:] | |||
del state_stack[-size:] | |||
del value_stack[-size:] | |||
else: | |||
s = [] | |||
value = self.parser.callbacks[rule](s) | |||
_action, new_state = self.parser.parse_table.states[state_stack[-1]][rule.origin.name] | |||
assert _action is Shift | |||
state_stack.append(new_state) | |||
value_stack.append(value) | |||
if state_stack[-1] == end_state: | |||
return value_stack[-1] | |||
state = state_stack[-1] | |||
action, arg = self.parser.parse_table.states[state][token.type] | |||
assert arg != end_state | |||
assert action is Shift | |||
state_stack.append(arg) | |||
value_stack.append(token) | |||
def choices(self): | |||
return self.parser.parse_table.states[self._state_stack[-1]] | |||
def resume_parse(self): | |||
return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack) |