| @@ -25,12 +25,21 @@ Example: | |||
| Lark(...) | |||
| ``` | |||
| #### parse(self, text) | |||
| #### parse(self, text, start=None, on_error=None) | |||
| Return a complete parse tree for the text (of type Tree) | |||
| Parse the given text, according to the options provided. | |||
| Returns a complete parse tree for the text (of type Tree) | |||
| If a transformer is supplied to `__init__`, returns whatever is the result of the transformation. | |||
| Parameters: | |||
| * start: str - required if Lark was given multiple possible start symbols (using the start option). | |||
| * on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing. LALR only. | |||
| (See `examples/error_puppet.py` for an example of how to use `on_error`.) | |||
| #### save(self, f) / load(cls, f) | |||
| @@ -160,6 +169,8 @@ See the [visitors page](visitors.md) | |||
| ## UnexpectedToken | |||
| TODO: Explain puppet mechanism (related to on_error) | |||
| ## UnexpectedException | |||
| - `UnexpectedInput` | |||
| @@ -6,6 +6,7 @@ | |||
| - EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md)) | |||
| - Builds a parse-tree (AST) automagically based on the grammar | |||
| - Stand-alone parser generator - create a small independent parser to embed in your project. | |||
| - Flexible error handling by using a "puppet parser" mechanism (LALR only) | |||
| - Automatic line & column tracking (for both tokens and matched rules) | |||
| - Automatic terminal collision resolution | |||
| - Standard library of terminals (strings, numbers, names, etc.) | |||
| @@ -0,0 +1,34 @@ | |||
| # | |||
| # This example demonstrates error handling using a parsing puppet in LALR | |||
| # | |||
| # When the parser encounters an UnexpectedToken exception, it creates a | |||
| # parsing puppet with the current parse-state, and lets you control how | |||
| # to proceed step-by-step. When you've achieved the correct parse-state, | |||
| # you can resume the run by returning True. | |||
| # | |||
| from lark import UnexpectedToken, Token | |||
| from .json_parser import json_parser | |||
| def ignore_errors(e): | |||
| if e.token.type == 'COMMA': | |||
| # Skip comma | |||
| return True | |||
| elif e.token.type == 'SIGNED_NUMBER': | |||
| # Try to feed a comma and retry the number | |||
| e.puppet.feed_token(Token('COMMA', ',')) | |||
| e.puppet.feed_token(e.token) | |||
| return True | |||
| # Unhandled error. Will stop parse and raise exception | |||
| return False | |||
| def main(): | |||
| s = "[0 1, 2,, 3,,, 4, 5 6 ]" | |||
| res = json_parser.parse(s, on_error=ignore_errors) | |||
| print(res) # prints [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] | |||
| main() | |||
| @@ -81,7 +81,7 @@ class UnexpectedCharacters(LexError, UnexpectedInput): | |||
| class UnexpectedToken(ParseError, UnexpectedInput): | |||
| def __init__(self, token, expected, considered_rules=None, state=None): | |||
| def __init__(self, token, expected, considered_rules=None, state=None, puppet=None): | |||
| self.token = token | |||
| self.expected = expected # XXX str shouldn't necessary | |||
| self.line = getattr(token, 'line', '?') | |||
| @@ -89,6 +89,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||
| self.considered_rules = considered_rules | |||
| self.state = state | |||
| self.pos_in_stream = getattr(token, 'pos_in_stream', None) | |||
| self.puppet = puppet | |||
| message = ("Unexpected token %r at line %s, column %s.\n" | |||
| "Expected one of: \n\t* %s\n" | |||
| @@ -9,7 +9,7 @@ from .load_grammar import load_grammar | |||
| from .tree import Tree | |||
| from .common import LexerConf, ParserConf | |||
| from .lexer import Lexer, TraditionalLexer, TerminalDef | |||
| from .lexer import Lexer, TraditionalLexer, TerminalDef, UnexpectedToken | |||
| from .parse_tree_builder import ParseTreeBuilder | |||
| from .parser_frontends import get_frontend | |||
| from .grammar import Rule | |||
| @@ -359,13 +359,28 @@ class Lark(Serialize): | |||
| "Get information about a terminal" | |||
| return self._terminals_dict[name] | |||
| def parse(self, text, start=None): | |||
| def parse(self, text, start=None, on_error=None): | |||
| """Parse the given text, according to the options provided. | |||
| The 'start' parameter is required if Lark was given multiple possible start symbols (using the start option). | |||
| Parameters: | |||
| start: str - required if Lark was given multiple possible start symbols (using the start option). | |||
| on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing. LALR only. | |||
| Returns a tree, unless specified otherwise. | |||
| """ | |||
| return self.parser.parse(text, start=start) | |||
| try: | |||
| return self.parser.parse(text, start=start) | |||
| except UnexpectedToken as e: | |||
| if on_error is None: | |||
| raise | |||
| while True: | |||
| if not on_error(e): | |||
| raise e | |||
| try: | |||
| return e.puppet.resume_parse() | |||
| except UnexpectedToken as e2: | |||
| e = e2 | |||
| ###} | |||
| @@ -41,15 +41,15 @@ class _Parser: | |||
| self.callbacks = callbacks | |||
| self.debug = debug | |||
| def parse(self, seq, start, set_state=None): | |||
| def parse(self, seq, start, set_state=None, value_stack=None, state_stack=None): | |||
| token = None | |||
| stream = iter(seq) | |||
| states = self.parse_table.states | |||
| start_state = self.parse_table.start_states[start] | |||
| end_state = self.parse_table.end_states[start] | |||
| state_stack = [start_state] | |||
| value_stack = [] | |||
| state_stack = state_stack or [start_state] | |||
| value_stack = value_stack or [] | |||
| if set_state: set_state(start_state) | |||
| @@ -59,7 +59,7 @@ class _Parser: | |||
| return states[state][token.type] | |||
| except KeyError: | |||
| expected = [s for s in states[state].keys() if s.isupper()] | |||
| raise UnexpectedToken(token, expected, state=state) | |||
| raise UnexpectedToken(token, expected, state=state, puppet=_ParserPuppet(self, state_stack, value_stack, start, stream, set_state)) | |||
| def reduce(rule): | |||
| size = len(rule.expansion) | |||
| @@ -111,3 +111,59 @@ class _Parser: | |||
| return value_stack[-1] | |||
| ###} | |||
| class _ParserPuppet: | |||
| def __init__(self, parser, state_stack, value_stack, start, stream, set_state): | |||
| self.parser = parser | |||
| self._state_stack = state_stack | |||
| self._value_stack = value_stack | |||
| self._start = start | |||
| self._stream = stream | |||
| self._set_state = set_state | |||
| def feed_token(self, token): | |||
| end_state = self.parser.parse_table.end_states[self._start] | |||
| state_stack = self._state_stack | |||
| value_stack = self._value_stack | |||
| state = state_stack[-1] | |||
| action, arg = self.parser.parse_table.states[state][token.type] | |||
| assert arg != end_state | |||
| while action is Reduce: | |||
| rule = arg | |||
| size = len(rule.expansion) | |||
| if size: | |||
| s = value_stack[-size:] | |||
| del state_stack[-size:] | |||
| del value_stack[-size:] | |||
| else: | |||
| s = [] | |||
| value = self.parser.callbacks[rule](s) | |||
| _action, new_state = self.parser.parse_table.states[state_stack[-1]][rule.origin.name] | |||
| assert _action is Shift | |||
| state_stack.append(new_state) | |||
| value_stack.append(value) | |||
| if state_stack[-1] == end_state: | |||
| return value_stack[-1] | |||
| state = state_stack[-1] | |||
| action, arg = self.parser.parse_table.states[state][token.type] | |||
| assert arg != end_state | |||
| assert action is Shift | |||
| state_stack.append(arg) | |||
| value_stack.append(token) | |||
| def choices(self): | |||
| return self.parser.parse_table.states[self._state_stack[-1]] | |||
| def resume_parse(self): | |||
| return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack) | |||