瀏覽代碼

Earley error reporting - initial (Issue #760)

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
Erez Sh 3 年之前
父節點
當前提交
f285cda4f2
共有 6 個檔案被更改,包括 98 行新增13 行删除
  1. +79
    -0
      examples/advanced/error_reporting_earley.py
  2. +1
    -1
      examples/advanced/error_reporting_lalr.py
  3. +1
    -1
      lark/__init__.py
  4. +15
    -9
      lark/exceptions.py
  5. +1
    -1
      lark/parsers/earley.py
  6. +1
    -1
      lark/parsers/xearley.py

+ 79
- 0
examples/advanced/error_reporting_earley.py 查看文件

@@ -0,0 +1,79 @@
"""
Example-Driven Error Reporting
==============================

A demonstration of example-driven error reporting with the Earley parser
(See also: error_reporting_lalr.py)
"""
from lark import Lark, UnexpectedInput

from _json_parser import json_grammar # Using the grammar from the json_parser example

json_parser = Lark(json_grammar)

class JsonSyntaxError(SyntaxError):
def __str__(self):
context, line, column = self.args
return '%s at line %s, column %s.\n\n%s' % (self.label, line, column, context)

class JsonMissingValue(JsonSyntaxError):
label = 'Missing Value'

class JsonMissingOpening(JsonSyntaxError):
label = 'Missing Opening'

class JsonMissingClosing(JsonSyntaxError):
label = 'Missing Closing'

class JsonMissingComma(JsonSyntaxError):
label = 'Missing Comma'

class JsonTrailingComma(JsonSyntaxError):
label = 'Trailing Comma'


def parse(json_text):
try:
j = json_parser.parse(json_text)
except UnexpectedInput as u:
exc_class = u.match_examples(json_parser.parse, {
JsonMissingOpening: ['{"foo": ]}',
'{"foor": }}',
'{"foo": }'],
JsonMissingClosing: ['{"foo": [}',
'{',
'{"a": 1',
'[1'],
JsonMissingComma: ['[1 2]',
'[false 1]',
'["b" 1]',
'{"a":true 1:4}',
'{"a":1 1:4}',
'{"a":"b" 1:4}'],
JsonTrailingComma: ['[,]',
'[1,]',
'[1,2,]',
'{"foo":1,}',
'{"foo":false,"bar":true,}']
}, use_accepts=True)
if not exc_class:
raise
raise exc_class(u.get_context(json_text), u.line, u.column)


def test():
try:
parse('{"example1": "value"')
except JsonMissingClosing as e:
print(e)

try:
parse('{"example2": ] ')
except JsonMissingOpening as e:
print(e)


if __name__ == '__main__':
test()



+ 1
- 1
examples/advanced/error_reporting_lalr.py 查看文件

@@ -3,7 +3,7 @@ Example-Driven Error Reporting
==============================

A demonstration of example-driven error reporting with the LALR parser
(See also: error_reporting_earley.py)
"""
from lark import Lark, UnexpectedInput



+ 1
- 1
lark/__init__.py 查看文件

@@ -3,7 +3,7 @@ from .tree import Tree
from .visitors import Transformer, Visitor, v_args, Discard, Transformer_NonRecursive
from .visitors import InlineTransformer, inline_args # XXX Deprecated
from .exceptions import (ParseError, LexError, GrammarError, UnexpectedToken,
UnexpectedInput, UnexpectedCharacters, LarkError)
UnexpectedInput, UnexpectedCharacters, UnexpectedEOF, LarkError)
from .lexer import Token
from .lark import Lark



+ 15
- 9
lark/exceptions.py 查看文件

@@ -19,14 +19,6 @@ class LexError(LarkError):
pass


class UnexpectedEOF(ParseError):
def __init__(self, expected):
self.expected = expected

message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected))
super(UnexpectedEOF, self).__init__(message)


class UnexpectedInput(LarkError):
"""UnexpectedInput Error.

@@ -47,6 +39,7 @@ class UnexpectedInput(LarkError):
The parser doesn't hold a copy of the text it has to parse,
so you have to provide it again
"""
assert self.pos_in_stream is not None, self
pos = self.pos_in_stream
start = max(pos - span, 0)
end = pos + span
@@ -91,7 +84,7 @@ class UnexpectedInput(LarkError):
parse_fn(malformed)
except UnexpectedInput as ut:
if ut.state == self.state:
if use_accepts and ut.accepts != self.accepts:
if use_accepts and hasattr(self, 'accepts') and ut.accepts != self.accepts:
logger.debug("Different accepts with same state[%d]: %s != %s at example [%s][%s]" %
(self.state, self.accepts, ut.accepts, i, j))
continue
@@ -114,6 +107,19 @@ class UnexpectedInput(LarkError):

return candidate[0]

class UnexpectedEOF(ParseError, UnexpectedInput):
def __init__(self, expected, state=None):
self.expected = expected
self.state = state
from .lexer import Token
self.token = Token("<EOF>", "") #, line=-1, column=-1, pos_in_stream=-1)
self.pos_in_stream = -1
self.line = -1
self.column = -1

message = ("Unexpected end-of-input. Expected one of: \n\t* %s\n" % '\n\t* '.join(x.name for x in self.expected))
super(UnexpectedEOF, self).__init__(message)


class UnexpectedCharacters(LexError, UnexpectedInput):
def __init__(self, seq, lex_pos, line, column, allowed=None, considered_tokens=None, state=None, token_history=None):


+ 1
- 1
lark/parsers/earley.py 查看文件

@@ -299,7 +299,7 @@ class Parser:
solutions = [n.node for n in columns[-1] if n.is_complete and n.node is not None and n.s == start_symbol and n.start == 0]
if not solutions:
expected_terminals = [t.expect for t in to_scan]
raise UnexpectedEOF(expected_terminals)
raise UnexpectedEOF(expected_terminals, state={i.s for i in to_scan})

if self.debug:
from .earley_forest import ForestToPyDotVisitor


+ 1
- 1
lark/parsers/xearley.py 查看文件

@@ -113,7 +113,7 @@ class Parser(BaseParser):
del delayed_matches[i+1] # No longer needed, so unburden memory

if not next_set and not delayed_matches and not next_to_scan:
raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan))
raise UnexpectedCharacters(stream, i, text_line, text_column, {item.expect.name for item in to_scan}, set(to_scan), state={i.s for i in next_to_scan})

return next_to_scan



Loading…
取消
儲存