From e69d567bce7376fd968471f3907c0d99bb9a46d3 Mon Sep 17 00:00:00 2001 From: DrSlump Date: Sun, 15 Apr 2018 12:42:13 +0200 Subject: [PATCH 1/2] example driven parser errors --- lark/common.py | 30 ++++++++++++++++++++++++++++-- lark/parsers/lalr_parser.py | 4 +--- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/lark/common.py b/lark/common.py index 7611a2c..4091136 100644 --- a/lark/common.py +++ b/lark/common.py @@ -17,12 +17,13 @@ class ParseError(Exception): pass class UnexpectedToken(ParseError): - def __init__(self, token, expected, seq, index, considered_rules=None): + def __init__(self, token, expected, seq, index, considered_rules=None, state=None): self.token = token self.expected = expected self.line = getattr(token, 'line', '?') self.column = getattr(token, 'column', '?') self.considered_rules = considered_rules + self.state = state try: context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) @@ -36,7 +37,32 @@ class UnexpectedToken(ParseError): super(UnexpectedToken, self).__init__(message) - + def match_examples(self, parse_fn, examples): + """ Given a parser instance and a dictionary mapping some label with + some malformed syntax examples, it'll return the label for the + example that bests matches the current error. + """ + if not self.state: + return None + + candidate = None + for label,example in examples.items(): + if not isinstance(example, (tuple, list)): + example = [example] + + for malformed in example: + try: + parse_fn(malformed) + except UnexpectedToken as ut: + if ut.state == self.state: + if ut.token == self.token: + return label + elif not candidate: + candidate = label + except: + pass + + return candidate ###} diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index a20db07..baea614 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -2,7 +2,6 @@ """ # Author: Erez Shinan (2017) # Email : erezshin@gmail.com - from ..common import UnexpectedToken from .lalr_analysis import LALR_Analyzer, Shift @@ -47,8 +46,7 @@ class _Parser: return states[state][key] except KeyError: expected = states[state].keys() - - raise UnexpectedToken(token, expected, seq, i) + raise UnexpectedToken(token, expected, seq, i, state=state) def reduce(rule): size = len(rule.expansion) From 880f42dd1273f30f76f9f2c9ab116b26d923a684 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Wed, 18 Apr 2018 12:33:47 +0300 Subject: [PATCH 2/2] Corrections to PR and added get_context --- lark/common.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/lark/common.py b/lark/common.py index 4091136..84a4139 100644 --- a/lark/common.py +++ b/lark/common.py @@ -1,7 +1,7 @@ import re import sys -from .utils import get_regexp_width +from .utils import get_regexp_width, STRING_TYPE Py36 = (sys.version_info[:2] >= (3, 6)) @@ -42,27 +42,31 @@ class UnexpectedToken(ParseError): some malformed syntax examples, it'll return the label for the example that bests matches the current error. """ - if not self.state: - return None + assert self.state, "Not supported for this exception" candidate = None - for label,example in examples.items(): - if not isinstance(example, (tuple, list)): - example = [example] + for label, example in examples.items(): + assert not isinstance(example, STRING_TYPE) for malformed in example: try: parse_fn(malformed) except UnexpectedToken as ut: if ut.state == self.state: - if ut.token == self.token: + if ut.token == self.token: # Try exact match first return label elif not candidate: candidate = label - except: - pass return candidate + + def get_context(self, text, span=10): + pos = self.token.pos_in_stream + start = max(pos - span, 0) + end = pos + span + before = text[start:pos].rsplit('\n', 1)[-1] + after = text[pos:end].split('\n', 1)[0] + return before + after + '\n' + ' ' * len(before) + '^\n' ###}