Browse Source

Added UnexpectedInput exception (with line & column) to xearley (Issue #43)

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 8 years ago
parent
commit
34449651bf
4 changed files with 19 additions and 14 deletions
  1. +1
    -1
      lark/lexer.py
  2. +7
    -4
      lark/parsers/earley.py
  3. +5
    -2
      lark/parsers/xearley.py
  4. +6
    -7
      tests/test_parser.py

+ 1
- 1
lark/lexer.py View File

@@ -11,7 +11,7 @@ class LexError(Exception):
class UnexpectedInput(LexError): class UnexpectedInput(LexError):
def __init__(self, seq, lex_pos, line, column, allowed=None): def __init__(self, seq, lex_pos, line, column, allowed=None):
context = seq[lex_pos:lex_pos+5] context = seq[lex_pos:lex_pos+5]
message = "No token defined for: '%s' in %r at line %d" % (seq[lex_pos], context, line)
message = "No token defined for: '%s' in %r at line %d col %d" % (seq[lex_pos], context, line, column)


super(UnexpectedInput, self).__init__(message) super(UnexpectedInput, self).__init__(message)




+ 7
- 4
lark/parsers/earley.py View File

@@ -137,14 +137,17 @@ class Column:
if isinstance(item.expect, Terminal): if isinstance(item.expect, Terminal):
self.to_scan.append(item) self.to_scan.append(item)
else: else:
if item not in self.added:
self.added.add(item)
self.to_predict.append(item)
if item in self.added:
continue
self.added.add(item)
self.to_predict.append(item)


self.item_count += 1 # Only count if actually added self.item_count += 1 # Only count if actually added


def __nonzero__(self):

def __bool__(self):
return bool(self.item_count) return bool(self.item_count)
__nonzero__ = __bool__ # Py2 backwards-compatibility


class Parser: class Parser:
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None): def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None):


+ 5
- 2
lark/parsers/xearley.py View File

@@ -21,7 +21,7 @@
from collections import defaultdict from collections import defaultdict


from ..common import ParseError, UnexpectedToken, Terminal from ..common import ParseError, UnexpectedToken, Terminal
from ..lexer import Token
from ..lexer import Token, UnexpectedInput
from ..tree import Tree from ..tree import Tree
from .grammar_analysis import GrammarAnalyzer from .grammar_analysis import GrammarAnalyzer


@@ -115,6 +115,9 @@ class Parser:
next_set.add(delayed_matches[i+1]) next_set.add(delayed_matches[i+1])
del delayed_matches[i+1] # No longer needed, so unburden memory del delayed_matches[i+1] # No longer needed, so unburden memory


if not next_set and not delayed_matches:
raise UnexpectedInput(stream, i, text_line, text_column, to_scan)

return next_set return next_set


# Main loop starts # Main loop starts
@@ -128,7 +131,7 @@ class Parser:


if token == '\n': if token == '\n':
text_line += 1 text_line += 1
text_column = 0
text_column = 1
else: else:
text_column += 1 text_column += 1




+ 6
- 7
tests/test_parser.py View File

@@ -18,7 +18,7 @@ from io import (
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)


from lark.lark import Lark from lark.lark import Lark
from lark.common import GrammarError, ParseError
from lark.common import GrammarError, ParseError, UnexpectedToken
from lark.lexer import LexError, UnexpectedInput from lark.lexer import LexError, UnexpectedInput
from lark.tree import Tree, Transformer from lark.tree import Tree, Transformer


@@ -718,6 +718,8 @@ def _make_parser_test(LEXER, PARSER):
%s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items()))) %s""" % (' '.join(tokens), '\n'.join("%s: %s"%x for x in tokens.items())))


def test_float_without_lexer(self): def test_float_without_lexer(self):
expected_error = UnexpectedInput if LEXER == 'dynamic' else UnexpectedToken

g = _Lark("""start: ["+"|"-"] float g = _Lark("""start: ["+"|"-"] float
float: digit* "." digit+ exp? float: digit* "." digit+ exp?
| digit+ exp | digit+ exp
@@ -727,7 +729,7 @@ def _make_parser_test(LEXER, PARSER):
g.parse("1.2") g.parse("1.2")
g.parse("-.2e9") g.parse("-.2e9")
g.parse("+2e-9") g.parse("+2e-9")
self.assertRaises(ParseError, g.parse, "+2e-9e")
self.assertRaises( expected_error, g.parse, "+2e-9e")


def test_keep_all_tokens(self): def test_keep_all_tokens(self):
l = _Lark("""start: "a"+ """, keep_all_tokens=True) l = _Lark("""start: "a"+ """, keep_all_tokens=True)
@@ -963,19 +965,16 @@ def _make_parser_test(LEXER, PARSER):


@unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions") @unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions")
def test_regex_escaping(self): def test_regex_escaping(self):
expected_error = ParseError if LEXER == 'dynamic' else UnexpectedInput
# TODO Make dynamic parser raise UnexpectedInput if nothing scans?

g = _Lark("start: /[ab]/") g = _Lark("start: /[ab]/")
g.parse('a') g.parse('a')
g.parse('b') g.parse('b')


self.assertRaises( expected_error, g.parse, 'c')
self.assertRaises( UnexpectedInput, g.parse, 'c')


_Lark(r'start: /\w/').parse('a') _Lark(r'start: /\w/').parse('a')


g = _Lark(r'start: /\\w/') g = _Lark(r'start: /\\w/')
self.assertRaises( expected_error, g.parse, 'a')
self.assertRaises( UnexpectedInput, g.parse, 'a')
g.parse(r'\w') g.parse(r'\w')


_Lark(r'start: /\[/').parse('[') _Lark(r'start: /\[/').parse('[')


Loading…
Cancel
Save