diff --git a/README.md b/README.md index 3b89c13..43edb79 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,7 @@ Notice punctuation doesn't appear in the resulting tree. It's automatically filt ## Learn more about using Lark - - Read the [tutorial](/docs/json_tutorial.md), which shows how to write a JSON parser in Lark. + - **Read the [tutorial](/docs/json_tutorial.md)**, which shows how to write a JSON parser in Lark. - Read the [reference](/docs/reference.md) - Browse the [examples](/examples), which include a calculator, and a Python-code parser. - Check out the [tests](/tests/test_parser.py) for more examples. @@ -72,6 +72,7 @@ These features are planned to be implemented in the near future: - Grammar composition (in cases that the tokens can reliably signify a grammar change) - Parser generator - create a small parser, indepdendent of Lark, to embed in your project. - Optimizations in both the parsers and the lexer + - Better handling of ambiguity ## Comparison to other parsers diff --git a/lark/common.py b/lark/common.py index c4fbf6c..8c82019 100644 --- a/lark/common.py +++ b/lark/common.py @@ -6,6 +6,22 @@ class ParseError(Exception): pass +class UnexpectedToken(ParseError): + def __init__(self, token, expected, seq, index): + self.token = token + self.expected = expected + self.line = getattr(token, 'line', '?') + self.column = getattr(token, 'column', '?') + + context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) + message = ("Unexpected token %r at line %s, column %s.\n" + "Expected: %s\n" + "Context: %s" % (token.value, self.line, self.column, expected, context)) + + super(ParseError, self).__init__(message) + + + def is_terminal(sym): return sym.isupper() or sym[0] == '$' diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py index 177b8d2..dc0cf97 100644 --- a/lark/parsers/earley.py +++ b/lark/parsers/earley.py @@ -1,7 +1,7 @@ "My name is Earley" from ..utils import classify, STRING_TYPE -from ..common import ParseError +from ..common import ParseError, UnexpectedToken try: xrange @@ -14,6 +14,7 @@ class MatchFailed(object): class AbortParseMatch(Exception): pass + class Rule(object): def __init__(self, name, symbols, postprocess): self.name = name @@ -34,6 +35,8 @@ class State(object): if self.is_literal: self.expect_symbol = self.expect_symbol['literal'] assert isinstance(self.expect_symbol, STRING_TYPE), self.expect_symbol + else: + self.is_literal = False def next_state(self, data): return State(self.rule, self.expect+1, self.reference, self.data + [data]) @@ -136,7 +139,8 @@ class Parser(object): self.advance_to(table, pos + 1, set()) if not table[-1]: - raise ParseError('Error at line {t.line}:{t.column}'.format(t=stream[pos])) + expected = {s.expect_symbol for s in table[-2] if s.is_literal} + raise UnexpectedToken(stream[pos], expected, stream, pos) res = list(self.finish(table)) if not res: diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index ada9b1e..cafc9a1 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -1,21 +1,5 @@ from .lalr_analysis import ACTION_SHIFT -from ..common import ParseError - -class UnexpectedToken(ParseError): - def __init__(self, token, expected, seq, index): - self.token = token - self.expected = expected - self.line = getattr(token, 'line', '?') - self.column = getattr(token, 'column', '?') - - context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) - message = ("Unexpected input %r at line %s, column %s.\n" - "Expected: %s\n" - "Context: %s" % (token.value, self.line, self.column, expected, context)) - - super(ParseError, self).__init__(message) - - +from ..common import ParseError, UnexpectedToken class Parser(object):