Browse Source

Improved error reporting in Earley

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 years ago
parent
commit
1602482608
4 changed files with 25 additions and 20 deletions
  1. +2
    -1
      README.md
  2. +16
    -0
      lark/common.py
  3. +6
    -2
      lark/parsers/earley.py
  4. +1
    -17
      lark/parsers/lalr_parser.py

+ 2
- 1
README.md View File

@@ -43,7 +43,7 @@ Notice punctuation doesn't appear in the resulting tree. It's automatically filt


## Learn more about using Lark ## Learn more about using Lark


- Read the [tutorial](/docs/json_tutorial.md), which shows how to write a JSON parser in Lark.
- **Read the [tutorial](/docs/json_tutorial.md)**, which shows how to write a JSON parser in Lark.
- Read the [reference](/docs/reference.md) - Read the [reference](/docs/reference.md)
- Browse the [examples](/examples), which include a calculator, and a Python-code parser. - Browse the [examples](/examples), which include a calculator, and a Python-code parser.
- Check out the [tests](/tests/test_parser.py) for more examples. - Check out the [tests](/tests/test_parser.py) for more examples.
@@ -72,6 +72,7 @@ These features are planned to be implemented in the near future:
- Grammar composition (in cases that the tokens can reliably signify a grammar change) - Grammar composition (in cases that the tokens can reliably signify a grammar change)
- Parser generator - create a small parser, indepdendent of Lark, to embed in your project. - Parser generator - create a small parser, indepdendent of Lark, to embed in your project.
- Optimizations in both the parsers and the lexer - Optimizations in both the parsers and the lexer
- Better handling of ambiguity


## Comparison to other parsers ## Comparison to other parsers




+ 16
- 0
lark/common.py View File

@@ -6,6 +6,22 @@ class ParseError(Exception):
pass pass




class UnexpectedToken(ParseError):
def __init__(self, token, expected, seq, index):
self.token = token
self.expected = expected
self.line = getattr(token, 'line', '?')
self.column = getattr(token, 'column', '?')

context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
message = ("Unexpected token %r at line %s, column %s.\n"
"Expected: %s\n"
"Context: %s" % (token.value, self.line, self.column, expected, context))

super(ParseError, self).__init__(message)



def is_terminal(sym): def is_terminal(sym):
return sym.isupper() or sym[0] == '$' return sym.isupper() or sym[0] == '$'



+ 6
- 2
lark/parsers/earley.py View File

@@ -1,7 +1,7 @@
"My name is Earley" "My name is Earley"


from ..utils import classify, STRING_TYPE from ..utils import classify, STRING_TYPE
from ..common import ParseError
from ..common import ParseError, UnexpectedToken


try: try:
xrange xrange
@@ -14,6 +14,7 @@ class MatchFailed(object):
class AbortParseMatch(Exception): class AbortParseMatch(Exception):
pass pass



class Rule(object): class Rule(object):
def __init__(self, name, symbols, postprocess): def __init__(self, name, symbols, postprocess):
self.name = name self.name = name
@@ -34,6 +35,8 @@ class State(object):
if self.is_literal: if self.is_literal:
self.expect_symbol = self.expect_symbol['literal'] self.expect_symbol = self.expect_symbol['literal']
assert isinstance(self.expect_symbol, STRING_TYPE), self.expect_symbol assert isinstance(self.expect_symbol, STRING_TYPE), self.expect_symbol
else:
self.is_literal = False


def next_state(self, data): def next_state(self, data):
return State(self.rule, self.expect+1, self.reference, self.data + [data]) return State(self.rule, self.expect+1, self.reference, self.data + [data])
@@ -136,7 +139,8 @@ class Parser(object):
self.advance_to(table, pos + 1, set()) self.advance_to(table, pos + 1, set())


if not table[-1]: if not table[-1]:
raise ParseError('Error at line {t.line}:{t.column}'.format(t=stream[pos]))
expected = {s.expect_symbol for s in table[-2] if s.is_literal}
raise UnexpectedToken(stream[pos], expected, stream, pos)


res = list(self.finish(table)) res = list(self.finish(table))
if not res: if not res:


+ 1
- 17
lark/parsers/lalr_parser.py View File

@@ -1,21 +1,5 @@
from .lalr_analysis import ACTION_SHIFT from .lalr_analysis import ACTION_SHIFT
from ..common import ParseError

class UnexpectedToken(ParseError):
def __init__(self, token, expected, seq, index):
self.token = token
self.expected = expected
self.line = getattr(token, 'line', '?')
self.column = getattr(token, 'column', '?')

context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
message = ("Unexpected input %r at line %s, column %s.\n"
"Expected: %s\n"
"Context: %s" % (token.value, self.line, self.column, expected, context))

super(ParseError, self).__init__(message)


from ..common import ParseError, UnexpectedToken




class Parser(object): class Parser(object):


Loading…
Cancel
Save