From 4463524b3a164b9a02679ad80ff076fb3013ced3 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Sun, 21 Jun 2020 09:18:56 +0300 Subject: [PATCH 1/3] Puppet initial --- lark/exceptions.py | 3 ++- lark/parsers/lalr_parser.py | 47 ++++++++++++++++++++++++++++++++++++- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/lark/exceptions.py b/lark/exceptions.py index cf03746..017e504 100644 --- a/lark/exceptions.py +++ b/lark/exceptions.py @@ -81,7 +81,7 @@ class UnexpectedCharacters(LexError, UnexpectedInput): class UnexpectedToken(ParseError, UnexpectedInput): - def __init__(self, token, expected, considered_rules=None, state=None): + def __init__(self, token, expected, considered_rules=None, state=None, puppet=None): self.token = token self.expected = expected # XXX str shouldn't necessary self.line = getattr(token, 'line', '?') @@ -89,6 +89,7 @@ class UnexpectedToken(ParseError, UnexpectedInput): self.considered_rules = considered_rules self.state = state self.pos_in_stream = getattr(token, 'pos_in_stream', None) + self.puppet = puppet message = ("Unexpected token %r at line %s, column %s.\n" "Expected one of: \n\t* %s\n" diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index e15b954..991789b 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -59,7 +59,7 @@ class _Parser: return states[state][token.type] except KeyError: expected = [s for s in states[state].keys() if s.isupper()] - raise UnexpectedToken(token, expected, state=state) + raise UnexpectedToken(token, expected, state=state, puppet=_ParserPuppet(self, state_stack, value_stack, start)) def reduce(rule): size = len(rule.expansion) @@ -111,3 +111,48 @@ class _Parser: return value_stack[-1] ###} + + + + +class _ParserPuppet: + def __init__(self, parser, state_stack, value_stack, start): + self.parser = parser + self.state_stack = state_stack + self.value_stack = value_stack + self.start = start + + def feed_token(self, token): + end_state = self.parser.parse_table.end_states[self.start] + state_stack = self.state_stack + value_stack = self.value_stack + + state = state_stack[-1] + action, arg = self.parser.parse_table.states[state][token.type] + assert arg != end_state + + if action is Shift: + state_stack.append(arg) + value_stack.append(token) + else: + rule = arg + size = len(rule.expansion) + if size: + s = value_stack[-size:] + del state_stack[-size:] + del value_stack[-size:] + else: + s = [] + + value = self.parser.callbacks[rule](s) + + _action, new_state = self.parser.parse_table.states[state_stack[-1]][rule.origin.name] + assert _action is Shift + state_stack.append(new_state) + value_stack.append(value) + + if state_stack[-1] == end_state: + return value_stack[-1] + + def choices(self): + return self.parser.parse_table.states[self.state_stack[-1]] \ No newline at end of file From 66a073d0aa6733c160b83cc4fb2d6f13b7ea8dc1 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Fri, 26 Jun 2020 00:23:38 +0300 Subject: [PATCH 2/3] Added support for error handling, using a puppet parser. TODO: Add docs --- examples/error_puppet.py | 34 +++++++++++++++++++++++++++ lark/lark.py | 23 ++++++++++++++---- lark/parsers/lalr_parser.py | 47 +++++++++++++++++++++++-------------- 3 files changed, 82 insertions(+), 22 deletions(-) create mode 100644 examples/error_puppet.py diff --git a/examples/error_puppet.py b/examples/error_puppet.py new file mode 100644 index 0000000..a5e0857 --- /dev/null +++ b/examples/error_puppet.py @@ -0,0 +1,34 @@ +# +# This example demonstrates error handling using a parsing puppet in LALR +# +# When the parser encounters an UnexpectedToken exception, it creates a +# parsing puppet with the current parse-state, and lets you control how +# to proceed step-by-step. When you've achieved the correct parse-state, +# you can resume the run by returning True. +# + +from lark import UnexpectedToken, Token + +from .json_parser import json_parser + +def ignore_errors(e): + if e.token.type == 'COMMA': + # Skip comma + return True + elif e.token.type == 'SIGNED_NUMBER': + # Try to feed a comma and retry the number + e.puppet.feed_token(Token('COMMA', ',')) + e.puppet.feed_token(e.token) + return True + + # Unhandled error. Will stop parse and raise exception + return False + + +def main(): + s = "[0 1, 2,, 3,,, 4, 5 6 ]" + res = json_parser.parse(s, on_error=ignore_errors) + print(res) # prints [1.0, 2.0, 3.0, 4.0, 5.0, 6.0] + +main() + diff --git a/lark/lark.py b/lark/lark.py index 3855191..f5d957e 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -9,7 +9,7 @@ from .load_grammar import load_grammar from .tree import Tree from .common import LexerConf, ParserConf -from .lexer import Lexer, TraditionalLexer, TerminalDef +from .lexer import Lexer, TraditionalLexer, TerminalDef, UnexpectedToken from .parse_tree_builder import ParseTreeBuilder from .parser_frontends import get_frontend from .grammar import Rule @@ -359,13 +359,28 @@ class Lark(Serialize): "Get information about a terminal" return self._terminals_dict[name] - def parse(self, text, start=None): + def parse(self, text, start=None, on_error=None): """Parse the given text, according to the options provided. - The 'start' parameter is required if Lark was given multiple possible start symbols (using the start option). + Parameters: + start: str - required if Lark was given multiple possible start symbols (using the start option). + on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing. Returns a tree, unless specified otherwise. """ - return self.parser.parse(text, start=start) + try: + return self.parser.parse(text, start=start) + except UnexpectedToken as e: + if on_error is None: + raise + + while True: + if not on_error(e): + raise e + try: + return e.puppet.resume_parse() + except UnexpectedToken as e2: + e = e2 + ###} diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index 991789b..7d5cf3b 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -41,15 +41,15 @@ class _Parser: self.callbacks = callbacks self.debug = debug - def parse(self, seq, start, set_state=None): + def parse(self, seq, start, set_state=None, value_stack=None, state_stack=None): token = None stream = iter(seq) states = self.parse_table.states start_state = self.parse_table.start_states[start] end_state = self.parse_table.end_states[start] - state_stack = [start_state] - value_stack = [] + state_stack = state_stack or [start_state] + value_stack = value_stack or [] if set_state: set_state(start_state) @@ -59,7 +59,7 @@ class _Parser: return states[state][token.type] except KeyError: expected = [s for s in states[state].keys() if s.isupper()] - raise UnexpectedToken(token, expected, state=state, puppet=_ParserPuppet(self, state_stack, value_stack, start)) + raise UnexpectedToken(token, expected, state=state, puppet=_ParserPuppet(self, state_stack, value_stack, start, stream, set_state)) def reduce(rule): size = len(rule.expansion) @@ -116,25 +116,24 @@ class _Parser: class _ParserPuppet: - def __init__(self, parser, state_stack, value_stack, start): + def __init__(self, parser, state_stack, value_stack, start, stream, set_state): self.parser = parser - self.state_stack = state_stack - self.value_stack = value_stack - self.start = start + self._state_stack = state_stack + self._value_stack = value_stack + self._start = start + self._stream = stream + self._set_state = set_state def feed_token(self, token): - end_state = self.parser.parse_table.end_states[self.start] - state_stack = self.state_stack - value_stack = self.value_stack + end_state = self.parser.parse_table.end_states[self._start] + state_stack = self._state_stack + value_stack = self._value_stack state = state_stack[-1] action, arg = self.parser.parse_table.states[state][token.type] assert arg != end_state - if action is Shift: - state_stack.append(arg) - value_stack.append(token) - else: + while action is Reduce: rule = arg size = len(rule.expansion) if size: @@ -151,8 +150,20 @@ class _ParserPuppet: state_stack.append(new_state) value_stack.append(value) - if state_stack[-1] == end_state: - return value_stack[-1] + if state_stack[-1] == end_state: + return value_stack[-1] + + state = state_stack[-1] + action, arg = self.parser.parse_table.states[state][token.type] + assert arg != end_state + + assert action is Shift + state_stack.append(arg) + value_stack.append(token) + def choices(self): - return self.parser.parse_table.states[self.state_stack[-1]] \ No newline at end of file + return self.parser.parse_table.states[self._state_stack[-1]] + + def resume_parse(self): + return self.parser.parse(self._stream, self._start, self._set_state, self._value_stack, self._state_stack) \ No newline at end of file From 3b3a8c1c924f4db9b27f7ee740ffc16f5d0b4eb9 Mon Sep 17 00:00:00 2001 From: Erez Sh Date: Fri, 26 Jun 2020 00:37:37 +0300 Subject: [PATCH 3/3] Added docs for on_error --- docs/classes.md | 15 +++++++++++++-- docs/features.md | 1 + lark/lark.py | 2 +- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/docs/classes.md b/docs/classes.md index 60d08ef..084cda6 100644 --- a/docs/classes.md +++ b/docs/classes.md @@ -25,12 +25,21 @@ Example: Lark(...) ``` -#### parse(self, text) +#### parse(self, text, start=None, on_error=None) -Return a complete parse tree for the text (of type Tree) +Parse the given text, according to the options provided. + +Returns a complete parse tree for the text (of type Tree) If a transformer is supplied to `__init__`, returns whatever is the result of the transformation. +Parameters: + +* start: str - required if Lark was given multiple possible start symbols (using the start option). + +* on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing. LALR only. + +(See `examples/error_puppet.py` for an example of how to use `on_error`.) #### save(self, f) / load(cls, f) @@ -160,6 +169,8 @@ See the [visitors page](visitors.md) ## UnexpectedToken +TODO: Explain puppet mechanism (related to on_error) + ## UnexpectedException - `UnexpectedInput` diff --git a/docs/features.md b/docs/features.md index 5dff9f4..d8a4340 100644 --- a/docs/features.md +++ b/docs/features.md @@ -6,6 +6,7 @@ - EBNF-inspired grammar, with extra features (See: [Grammar Reference](grammar.md)) - Builds a parse-tree (AST) automagically based on the grammar - Stand-alone parser generator - create a small independent parser to embed in your project. + - Flexible error handling by using a "puppet parser" mechanism (LALR only) - Automatic line & column tracking (for both tokens and matched rules) - Automatic terminal collision resolution - Standard library of terminals (strings, numbers, names, etc.) diff --git a/lark/lark.py b/lark/lark.py index f5d957e..f7d12fc 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -364,7 +364,7 @@ class Lark(Serialize): Parameters: start: str - required if Lark was given multiple possible start symbols (using the start option). - on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing. + on_error: function - if provided, will be called on UnexpectedToken error. Return true to resume parsing. LALR only. Returns a tree, unless specified otherwise. """