| @@ -43,7 +43,7 @@ Notice punctuation doesn't appear in the resulting tree. It's automatically filt | |||||
| ## Learn more about using Lark | ## Learn more about using Lark | ||||
| - Read the [tutorial](/docs/json_tutorial.md), which shows how to write a JSON parser in Lark. | |||||
| - **Read the [tutorial](/docs/json_tutorial.md)**, which shows how to write a JSON parser in Lark. | |||||
| - Read the [reference](/docs/reference.md) | - Read the [reference](/docs/reference.md) | ||||
| - Browse the [examples](/examples), which include a calculator, and a Python-code parser. | - Browse the [examples](/examples), which include a calculator, and a Python-code parser. | ||||
| - Check out the [tests](/tests/test_parser.py) for more examples. | - Check out the [tests](/tests/test_parser.py) for more examples. | ||||
| @@ -72,6 +72,7 @@ These features are planned to be implemented in the near future: | |||||
| - Grammar composition (in cases that the tokens can reliably signify a grammar change) | - Grammar composition (in cases that the tokens can reliably signify a grammar change) | ||||
| - Parser generator - create a small parser, indepdendent of Lark, to embed in your project. | - Parser generator - create a small parser, indepdendent of Lark, to embed in your project. | ||||
| - Optimizations in both the parsers and the lexer | - Optimizations in both the parsers and the lexer | ||||
| - Better handling of ambiguity | |||||
| ## Comparison to other parsers | ## Comparison to other parsers | ||||
| @@ -6,6 +6,22 @@ class ParseError(Exception): | |||||
| pass | pass | ||||
| class UnexpectedToken(ParseError): | |||||
| def __init__(self, token, expected, seq, index): | |||||
| self.token = token | |||||
| self.expected = expected | |||||
| self.line = getattr(token, 'line', '?') | |||||
| self.column = getattr(token, 'column', '?') | |||||
| context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) | |||||
| message = ("Unexpected token %r at line %s, column %s.\n" | |||||
| "Expected: %s\n" | |||||
| "Context: %s" % (token.value, self.line, self.column, expected, context)) | |||||
| super(ParseError, self).__init__(message) | |||||
| def is_terminal(sym): | def is_terminal(sym): | ||||
| return sym.isupper() or sym[0] == '$' | return sym.isupper() or sym[0] == '$' | ||||
| @@ -1,7 +1,7 @@ | |||||
| "My name is Earley" | "My name is Earley" | ||||
| from ..utils import classify, STRING_TYPE | from ..utils import classify, STRING_TYPE | ||||
| from ..common import ParseError | |||||
| from ..common import ParseError, UnexpectedToken | |||||
| try: | try: | ||||
| xrange | xrange | ||||
| @@ -14,6 +14,7 @@ class MatchFailed(object): | |||||
| class AbortParseMatch(Exception): | class AbortParseMatch(Exception): | ||||
| pass | pass | ||||
| class Rule(object): | class Rule(object): | ||||
| def __init__(self, name, symbols, postprocess): | def __init__(self, name, symbols, postprocess): | ||||
| self.name = name | self.name = name | ||||
| @@ -34,6 +35,8 @@ class State(object): | |||||
| if self.is_literal: | if self.is_literal: | ||||
| self.expect_symbol = self.expect_symbol['literal'] | self.expect_symbol = self.expect_symbol['literal'] | ||||
| assert isinstance(self.expect_symbol, STRING_TYPE), self.expect_symbol | assert isinstance(self.expect_symbol, STRING_TYPE), self.expect_symbol | ||||
| else: | |||||
| self.is_literal = False | |||||
| def next_state(self, data): | def next_state(self, data): | ||||
| return State(self.rule, self.expect+1, self.reference, self.data + [data]) | return State(self.rule, self.expect+1, self.reference, self.data + [data]) | ||||
| @@ -136,7 +139,8 @@ class Parser(object): | |||||
| self.advance_to(table, pos + 1, set()) | self.advance_to(table, pos + 1, set()) | ||||
| if not table[-1]: | if not table[-1]: | ||||
| raise ParseError('Error at line {t.line}:{t.column}'.format(t=stream[pos])) | |||||
| expected = {s.expect_symbol for s in table[-2] if s.is_literal} | |||||
| raise UnexpectedToken(stream[pos], expected, stream, pos) | |||||
| res = list(self.finish(table)) | res = list(self.finish(table)) | ||||
| if not res: | if not res: | ||||
| @@ -1,21 +1,5 @@ | |||||
| from .lalr_analysis import ACTION_SHIFT | from .lalr_analysis import ACTION_SHIFT | ||||
| from ..common import ParseError | |||||
| class UnexpectedToken(ParseError): | |||||
| def __init__(self, token, expected, seq, index): | |||||
| self.token = token | |||||
| self.expected = expected | |||||
| self.line = getattr(token, 'line', '?') | |||||
| self.column = getattr(token, 'column', '?') | |||||
| context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) | |||||
| message = ("Unexpected input %r at line %s, column %s.\n" | |||||
| "Expected: %s\n" | |||||
| "Context: %s" % (token.value, self.line, self.column, expected, context)) | |||||
| super(ParseError, self).__init__(message) | |||||
| from ..common import ParseError, UnexpectedToken | |||||
| class Parser(object): | class Parser(object): | ||||