Przeglądaj źródła

Improved error reporting in Earley

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 lat temu
rodzic
commit
1602482608
4 zmienionych plików z 25 dodań i 20 usunięć
  1. +2
    -1
      README.md
  2. +16
    -0
      lark/common.py
  3. +6
    -2
      lark/parsers/earley.py
  4. +1
    -17
      lark/parsers/lalr_parser.py

+ 2
- 1
README.md Wyświetl plik

@@ -43,7 +43,7 @@ Notice punctuation doesn't appear in the resulting tree. It's automatically filt

## Learn more about using Lark

- Read the [tutorial](/docs/json_tutorial.md), which shows how to write a JSON parser in Lark.
- **Read the [tutorial](/docs/json_tutorial.md)**, which shows how to write a JSON parser in Lark.
- Read the [reference](/docs/reference.md)
- Browse the [examples](/examples), which include a calculator, and a Python-code parser.
- Check out the [tests](/tests/test_parser.py) for more examples.
@@ -72,6 +72,7 @@ These features are planned to be implemented in the near future:
- Grammar composition (in cases that the tokens can reliably signify a grammar change)
- Parser generator - create a small parser, indepdendent of Lark, to embed in your project.
- Optimizations in both the parsers and the lexer
- Better handling of ambiguity

## Comparison to other parsers



+ 16
- 0
lark/common.py Wyświetl plik

@@ -6,6 +6,22 @@ class ParseError(Exception):
pass


class UnexpectedToken(ParseError):
def __init__(self, token, expected, seq, index):
self.token = token
self.expected = expected
self.line = getattr(token, 'line', '?')
self.column = getattr(token, 'column', '?')

context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
message = ("Unexpected token %r at line %s, column %s.\n"
"Expected: %s\n"
"Context: %s" % (token.value, self.line, self.column, expected, context))

super(ParseError, self).__init__(message)



def is_terminal(sym):
return sym.isupper() or sym[0] == '$'


+ 6
- 2
lark/parsers/earley.py Wyświetl plik

@@ -1,7 +1,7 @@
"My name is Earley"

from ..utils import classify, STRING_TYPE
from ..common import ParseError
from ..common import ParseError, UnexpectedToken

try:
xrange
@@ -14,6 +14,7 @@ class MatchFailed(object):
class AbortParseMatch(Exception):
pass


class Rule(object):
def __init__(self, name, symbols, postprocess):
self.name = name
@@ -34,6 +35,8 @@ class State(object):
if self.is_literal:
self.expect_symbol = self.expect_symbol['literal']
assert isinstance(self.expect_symbol, STRING_TYPE), self.expect_symbol
else:
self.is_literal = False

def next_state(self, data):
return State(self.rule, self.expect+1, self.reference, self.data + [data])
@@ -136,7 +139,8 @@ class Parser(object):
self.advance_to(table, pos + 1, set())

if not table[-1]:
raise ParseError('Error at line {t.line}:{t.column}'.format(t=stream[pos]))
expected = {s.expect_symbol for s in table[-2] if s.is_literal}
raise UnexpectedToken(stream[pos], expected, stream, pos)

res = list(self.finish(table))
if not res:


+ 1
- 17
lark/parsers/lalr_parser.py Wyświetl plik

@@ -1,21 +1,5 @@
from .lalr_analysis import ACTION_SHIFT
from ..common import ParseError

class UnexpectedToken(ParseError):
def __init__(self, token, expected, seq, index):
self.token = token
self.expected = expected
self.line = getattr(token, 'line', '?')
self.column = getattr(token, 'column', '?')

context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
message = ("Unexpected input %r at line %s, column %s.\n"
"Expected: %s\n"
"Context: %s" % (token.value, self.line, self.column, expected, context))

super(ParseError, self).__init__(message)


from ..common import ParseError, UnexpectedToken


class Parser(object):


Ładowanie…
Anuluj
Zapisz