Browse Source

Added UnexpectedInput exception (with line & column) to xearley (Issue #43)

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 years ago
parent
commit
34449651bf
4 changed files with 19 additions and 14 deletions
  1. +1
    -1
      lark/lexer.py
  2. +7
    -4
      lark/parsers/earley.py
  3. +5
    -2
      lark/parsers/xearley.py
  4. +6
    -7
      tests/test_parser.py

+ 1
- 1
lark/lexer.py View File

@@ -11,7 +11,7 @@ class LexError(Exception):
class UnexpectedInput(LexError):
def __init__(self, seq, lex_pos, line, column, allowed=None):
context = seq[lex_pos:lex_pos+5]
message = "No token defined for: '%s' in %r at line %d" % (seq[lex_pos], context, line)
message = "No token defined for: '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column)

super(UnexpectedInput, self).__init__(message)



+ 7
- 4
lark/parsers/earley.py View File

@@ -137,14 +137,17 @@ class Column:
if isinstance(item.expect, Terminal):
self.to_scan.append(item)
else:
if item not in self.added:
self.added.add(item)
self.to_predict.append(item)
if item in self.added:
continue
self.added.add(item)
self.to_predict.append(item)

self.item_count += 1 # Only count if actually added

def __nonzero__(self):

def __bool__(self):
return bool(self.item_count)
__nonzero__ = __bool__ # Py2 backwards-compatibility

class Parser:
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None):


+ 5
- 2
lark/parsers/xearley.py View File

@@ -21,7 +21,7 @@
from collections import defaultdict

from ..common import ParseError, UnexpectedToken, Terminal
from ..lexer import Token
from ..lexer import Token, UnexpectedInput
from ..tree import Tree
from .grammar_analysis import GrammarAnalyzer

@@ -115,6 +115,9 @@ class Parser:
next_set.add(delayed_matches[i+1])
del delayed_matches[i+1] # No longer needed, so unburden memory

if not next_set and not delayed_matches:
raise UnexpectedInput(stream, i, text_line, text_column, to_scan)

return next_set

# Main loop starts
@@ -128,7 +131,7 @@ class Parser:

if token == '\n':
text_line += 1
text_column = 0
text_column = 1
else:
text_column += 1



+ 6
- 7
tests/test_parser.py View File

@@ -18,7 +18,7 @@ from io import (
logging.basicConfig(level=logging.INFO)

from lark.lark import Lark
from lark.common import GrammarError, ParseError
from lark.common import GrammarError, ParseError, UnexpectedToken
from lark.lexer import LexError, UnexpectedInput
from lark.tree import Tree, Transformer

@@ -718,6 +718,8 @@ def _make_parser_test(LEXER, PARSER):
%s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))

def test_float_without_lexer(self):
expected_error = UnexpectedInput if LEXER == 'dynamic' else UnexpectedToken

g = _Lark("""start: ["+"|"-"] float
float: digit* "." digit+ exp?
| digit+ exp
@@ -727,7 +729,7 @@ def _make_parser_test(LEXER, PARSER):
g.parse("1.2")
g.parse("-.2e9")
g.parse("+2e-9")
self.assertRaises(ParseError, g.parse, "+2e-9e")
self.assertRaises( expected_error, g.parse, "+2e-9e")

def test_keep_all_tokens(self):
l = _Lark("""start: "a"+ """, keep_all_tokens=True)
@@ -963,19 +965,16 @@ def _make_parser_test(LEXER, PARSER):

@unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions")
def test_regex_escaping(self):
expected_error = ParseError if LEXER == 'dynamic' else UnexpectedInput
# TODO Make dynamic parser raise UnexpectedInput if nothing scans?

g = _Lark("start: /[ab]/")
g.parse('a')
g.parse('b')

self.assertRaises( expected_error, g.parse, 'c')
self.assertRaises( UnexpectedInput, g.parse, 'c')

_Lark(r'start: /\w/').parse('a')

g = _Lark(r'start: /\\w/')
self.assertRaises( expected_error, g.parse, 'a')
self.assertRaises( UnexpectedInput, g.parse, 'a')
g.parse(r'\w')

_Lark(r'start: /\[/').parse('[')


Loading…
Cancel
Save