@@ -43,7 +43,7 @@ Notice punctuation doesn't appear in the resulting tree. It's automatically filt | |||||
## Learn more about using Lark | ## Learn more about using Lark | ||||
- Read the [tutorial](/docs/json_tutorial.md), which shows how to write a JSON parser in Lark. | |||||
- **Read the [tutorial](/docs/json_tutorial.md)**, which shows how to write a JSON parser in Lark. | |||||
- Read the [reference](/docs/reference.md) | - Read the [reference](/docs/reference.md) | ||||
- Browse the [examples](/examples), which include a calculator, and a Python-code parser. | - Browse the [examples](/examples), which include a calculator, and a Python-code parser. | ||||
- Check out the [tests](/tests/test_parser.py) for more examples. | - Check out the [tests](/tests/test_parser.py) for more examples. | ||||
@@ -72,6 +72,7 @@ These features are planned to be implemented in the near future: | |||||
- Grammar composition (in cases that the tokens can reliably signify a grammar change) | - Grammar composition (in cases that the tokens can reliably signify a grammar change) | ||||
- Parser generator - create a small parser, indepdendent of Lark, to embed in your project. | - Parser generator - create a small parser, indepdendent of Lark, to embed in your project. | ||||
- Optimizations in both the parsers and the lexer | - Optimizations in both the parsers and the lexer | ||||
- Better handling of ambiguity | |||||
## Comparison to other parsers | ## Comparison to other parsers | ||||
@@ -6,6 +6,22 @@ class ParseError(Exception): | |||||
pass | pass | ||||
class UnexpectedToken(ParseError): | |||||
def __init__(self, token, expected, seq, index): | |||||
self.token = token | |||||
self.expected = expected | |||||
self.line = getattr(token, 'line', '?') | |||||
self.column = getattr(token, 'column', '?') | |||||
context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) | |||||
message = ("Unexpected token %r at line %s, column %s.\n" | |||||
"Expected: %s\n" | |||||
"Context: %s" % (token.value, self.line, self.column, expected, context)) | |||||
super(ParseError, self).__init__(message) | |||||
def is_terminal(sym): | def is_terminal(sym): | ||||
return sym.isupper() or sym[0] == '$' | return sym.isupper() or sym[0] == '$' | ||||
@@ -1,7 +1,7 @@ | |||||
"My name is Earley" | "My name is Earley" | ||||
from ..utils import classify, STRING_TYPE | from ..utils import classify, STRING_TYPE | ||||
from ..common import ParseError | |||||
from ..common import ParseError, UnexpectedToken | |||||
try: | try: | ||||
xrange | xrange | ||||
@@ -14,6 +14,7 @@ class MatchFailed(object): | |||||
class AbortParseMatch(Exception): | class AbortParseMatch(Exception): | ||||
pass | pass | ||||
class Rule(object): | class Rule(object): | ||||
def __init__(self, name, symbols, postprocess): | def __init__(self, name, symbols, postprocess): | ||||
self.name = name | self.name = name | ||||
@@ -34,6 +35,8 @@ class State(object): | |||||
if self.is_literal: | if self.is_literal: | ||||
self.expect_symbol = self.expect_symbol['literal'] | self.expect_symbol = self.expect_symbol['literal'] | ||||
assert isinstance(self.expect_symbol, STRING_TYPE), self.expect_symbol | assert isinstance(self.expect_symbol, STRING_TYPE), self.expect_symbol | ||||
else: | |||||
self.is_literal = False | |||||
def next_state(self, data): | def next_state(self, data): | ||||
return State(self.rule, self.expect+1, self.reference, self.data + [data]) | return State(self.rule, self.expect+1, self.reference, self.data + [data]) | ||||
@@ -136,7 +139,8 @@ class Parser(object): | |||||
self.advance_to(table, pos + 1, set()) | self.advance_to(table, pos + 1, set()) | ||||
if not table[-1]: | if not table[-1]: | ||||
raise ParseError('Error at line {t.line}:{t.column}'.format(t=stream[pos])) | |||||
expected = {s.expect_symbol for s in table[-2] if s.is_literal} | |||||
raise UnexpectedToken(stream[pos], expected, stream, pos) | |||||
res = list(self.finish(table)) | res = list(self.finish(table)) | ||||
if not res: | if not res: | ||||
@@ -1,21 +1,5 @@ | |||||
from .lalr_analysis import ACTION_SHIFT | from .lalr_analysis import ACTION_SHIFT | ||||
from ..common import ParseError | |||||
class UnexpectedToken(ParseError): | |||||
def __init__(self, token, expected, seq, index): | |||||
self.token = token | |||||
self.expected = expected | |||||
self.line = getattr(token, 'line', '?') | |||||
self.column = getattr(token, 'column', '?') | |||||
context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]]) | |||||
message = ("Unexpected input %r at line %s, column %s.\n" | |||||
"Expected: %s\n" | |||||
"Context: %s" % (token.value, self.line, self.column, expected, context)) | |||||
super(ParseError, self).__init__(message) | |||||
from ..common import ParseError, UnexpectedToken | |||||
class Parser(object): | class Parser(object): | ||||