| @@ -25,12 +25,21 @@ Example: | |||||
| Lark(...) | Lark(...) | ||||
| ``` | ``` | ||||
| #### parse(self, text) | |||||
| #### parse(self, text, start=None, on_error=None) | |||||
| Return a complete parse tree for the text (of type Tree) | |||||
| Parse the given text, according to the options provided. | |||||
| Returns a complete parse tree for the text (of type Tree) | |||||
| If a transformer is supplied to `__init__`, returns whatever is the result of the transformation. | If a transformer is supplied to `__init__`, returns whatever is the result of the transformation. | ||||
| Parameters: | |||||
| * start: str - required if Lark was given multiple possible start symbols (using the start option). | |||||
| * on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing. LALR only. | |||||
| (See `examples/error_puppet.py` for an example of how to use `on_error`.) | |||||
| #### save(self, f) / load(cls, f) | #### save(self, f) / load(cls, f) | ||||
| @@ -160,6 +169,8 @@ See the [visitors page](visitors.md) | |||||
| ## UnexpectedToken | ## UnexpectedToken | ||||
| TODO: Explain puppet mechanism (related to on_error) | |||||
| ## UnexpectedException | ## UnexpectedException | ||||
| - `UnexpectedInput` | - `UnexpectedInput` | ||||
| @@ -6,6 +6,7 @@ | |||||
| - EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md)) | - EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md)) | ||||
| - Builds a parse-tree (AST) automagically based on the grammar | - Builds a parse-tree (AST) automagically based on the grammar | ||||
| - Stand-alone parser generator - create a small independent parser to embed in your project. | - Stand-alone parser generator - create a small independent parser to embed in your project. | ||||
| - Flexible error handling by using a "puppet parser" mechanism (LALR only) | |||||
| - Automatic line & column tracking (for both tokens and matched rules) | - Automatic line & column tracking (for both tokens and matched rules) | ||||
| - Automatic terminal collision resolution | - Automatic terminal collision resolution | ||||
| - Standard library of terminals (strings, numbers, names, etc.) | - Standard library of terminals (strings, numbers, names, etc.) | ||||
| @@ -0,0 +1,34 @@ | |||||
| # | |||||
| # This example demonstrates error handling using a parsing puppet in LALR | |||||
| # | |||||
| # When the parser encounters an UnexpectedToken exception, it creates a | |||||
| # parsing puppet with the current parse-state, and lets you control how | |||||
| # to proceed step-by-step. When you've achieved the correct parse-state, | |||||
| # you can resume the run by returning True. | |||||
| # | |||||
| from lark import UnexpectedToken, Token | |||||
| from .json_parser import json_parser | |||||
| def ignore_errors(e): | |||||
| if e.token.type == 'COMMA': | |||||
| # Skip comma | |||||
| return True | |||||
| elif e.token.type == 'SIGNED_NUMBER': | |||||
| # Try to feed a comma and retry the number | |||||
| e.puppet.feed_token(Token('COMMA', ',')) | |||||
| e.puppet.feed_token(e.token) | |||||
| return True | |||||
| # Unhandled error. Will stop parse and raise exception | |||||
| return False | |||||
| def main(): | |||||
| s = "[0 1, 2,, 3,,, 4, 5 6 ]" | |||||
| res = json_parser.parse(s, on_error=ignore_errors) | |||||
| print(res) # prints [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] | |||||
| main() | |||||
| @@ -81,7 +81,7 @@ class UnexpectedCharacters(LexError, UnexpectedInput): | |||||
| class UnexpectedToken(ParseError, UnexpectedInput): | class UnexpectedToken(ParseError, UnexpectedInput): | ||||
| def __init__(self, token, expected, considered_rules=None, state=None): | |||||
| def __init__(self, token, expected, considered_rules=None, state=None, puppet=None): | |||||
| self.token = token | self.token = token | ||||
| self.expected = expected # XXX str shouldn't necessary | self.expected = expected # XXX str shouldn't necessary | ||||
| self.line = getattr(token, 'line', '?') | self.line = getattr(token, 'line', '?') | ||||
| @@ -89,6 +89,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): | |||||
| self.considered_rules = considered_rules | self.considered_rules = considered_rules | ||||
| self.state = state | self.state = state | ||||
| self.pos_in_stream = getattr(token, 'pos_in_stream', None) | self.pos_in_stream = getattr(token, 'pos_in_stream', None) | ||||
| self.puppet = puppet | |||||
| message = ("Unexpected token %r at line %s, column %s.\n" | message = ("Unexpected token %r at line %s, column %s.\n" | ||||
| "Expected one of: \n\t* %s\n" | "Expected one of: \n\t* %s\n" | ||||
| @@ -9,7 +9,7 @@ from .load_grammar import load_grammar | |||||
| from .tree import Tree | from .tree import Tree | ||||
| from .common import LexerConf, ParserConf | from .common import LexerConf, ParserConf | ||||
| from .lexer import Lexer, TraditionalLexer, TerminalDef | |||||
| from .lexer import Lexer, TraditionalLexer, TerminalDef, UnexpectedToken | |||||
| from .parse_tree_builder import ParseTreeBuilder | from .parse_tree_builder import ParseTreeBuilder | ||||
| from .parser_frontends import get_frontend | from .parser_frontends import get_frontend | ||||
| from .grammar import Rule | from .grammar import Rule | ||||
| @@ -359,13 +359,28 @@ class Lark(Serialize): | |||||
| "Get information about a terminal" | "Get information about a terminal" | ||||
| return self._terminals_dict[name] | return self._terminals_dict[name] | ||||
| def parse(self, text, start=None): | |||||
| def parse(self, text, start=None, on_error=None): | |||||
| """Parse the given text, according to the options provided. | """Parse the given text, according to the options provided. | ||||
| The 'start' parameter is required if Lark was given multiple possible start symbols (using the start option). | |||||
| Parameters: | |||||
| start: str - required if Lark was given multiple possible start symbols (using the start option). | |||||
| on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing. LALR only. | |||||
| Returns a tree, unless specified otherwise. | Returns a tree, unless specified otherwise. | ||||
| """ | """ | ||||
| return self.parser.parse(text, start=start) | |||||
| try: | |||||
| return self.parser.parse(text, start=start) | |||||
| except UnexpectedToken as e: | |||||
| if on_error is None: | |||||
| raise | |||||
| while True: | |||||
| if not on_error(e): | |||||
| raise e | |||||
| try: | |||||
| return e.puppet.resume_parse() | |||||
| except UnexpectedToken as e2: | |||||
| e = e2 | |||||
| ###} | ###} | ||||
| @@ -41,15 +41,15 @@ class _Parser: | |||||
| self.callbacks = callbacks | self.callbacks = callbacks | ||||
| self.debug = debug | self.debug = debug | ||||
| def parse(self, seq, start, set_state=None): | |||||
| def parse(self, seq, start, set_state=None, value_stack=None, state_stack=None): | |||||
| token = None | token = None | ||||
| stream = iter(seq) | stream = iter(seq) | ||||
| states = self.parse_table.states | states = self.parse_table.states | ||||
| start_state = self.parse_table.start_states[start] | start_state = self.parse_table.start_states[start] | ||||
| end_state = self.parse_table.end_states[start] | end_state = self.parse_table.end_states[start] | ||||
| state_stack = [start_state] | |||||
| value_stack = [] | |||||
| state_stack = state_stack or [start_state] | |||||
| value_stack = value_stack or [] | |||||
| if set_state: set_state(start_state) | if set_state: set_state(start_state) | ||||
| @@ -59,7 +59,7 @@ class _Parser: | |||||
| return states[state][token.type] | return states[state][token.type] | ||||
| except KeyError: | except KeyError: | ||||
| expected = [s for s in states[state].keys() if s.isupper()] | expected = [s for s in states[state].keys() if s.isupper()] | ||||
| raise UnexpectedToken(token, expected, state=state) | |||||
| raise UnexpectedToken(token, expected, state=state, puppet=_ParserPuppet(self, state_stack, value_stack, start, stream, set_state)) | |||||
| def reduce(rule): | def reduce(rule): | ||||
| size = len(rule.expansion) | size = len(rule.expansion) | ||||
| @@ -111,3 +111,59 @@ class _Parser: | |||||
| return value_stack[-1] | return value_stack[-1] | ||||
| ###} | ###} | ||||
| class _ParserPuppet: | |||||
| def __init__(self, parser, state_stack, value_stack, start, stream, set_state): | |||||
| self.parser = parser | |||||
| self._state_stack = state_stack | |||||
| self._value_stack = value_stack | |||||
| self._start = start | |||||
| self._stream = stream | |||||
| self._set_state = set_state | |||||
| def feed_token(self, token): | |||||
| end_state = self.parser.parse_table.end_states[self._start] | |||||
| state_stack = self._state_stack | |||||
| value_stack = self._value_stack | |||||
| state = state_stack[-1] | |||||
| action, arg = self.parser.parse_table.states[state][token.type] | |||||
| assert arg != end_state | |||||
| while action is Reduce: | |||||
| rule = arg | |||||
| size = len(rule.expansion) | |||||
| if size: | |||||
| s = value_stack[-size:] | |||||
| del state_stack[-size:] | |||||
| del value_stack[-size:] | |||||
| else: | |||||
| s = [] | |||||
| value = self.parser.callbacks[rule](s) | |||||
| _action, new_state = self.parser.parse_table.states[state_stack[-1]][rule.origin.name] | |||||
| assert _action is Shift | |||||
| state_stack.append(new_state) | |||||
| value_stack.append(value) | |||||
| if state_stack[-1] == end_state: | |||||
| return value_stack[-1] | |||||
| state = state_stack[-1] | |||||
| action, arg = self.parser.parse_table.states[state][token.type] | |||||
| assert arg != end_state | |||||
| assert action is Shift | |||||
| state_stack.append(arg) | |||||
| value_stack.append(token) | |||||
| def choices(self): | |||||
| return self.parser.parse_table.states[self._state_stack[-1]] | |||||
| def resume_parse(self): | |||||
| return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack) | |||||