@@ -9,7 +9,7 @@ class LexError(Exception): | |||||
pass | pass | ||||
class UnexpectedInput(LexError): | class UnexpectedInput(LexError): | ||||
def __init__(self, seq, lex_pos, line, column): | |||||
def __init__(self, seq, lex_pos, line, column, allowed=None): | |||||
context = seq[lex_pos:lex_pos+5] | context = seq[lex_pos:lex_pos+5] | ||||
message = "No token defined for: '%s' in %r at line %d" % (seq[lex_pos], context, line) | message = "No token defined for: '%s' in %r at line %d" % (seq[lex_pos], context, line) | ||||
@@ -18,6 +18,7 @@ class UnexpectedInput(LexError): | |||||
self.line = line | self.line = line | ||||
self.column = column | self.column = column | ||||
self.context = context | self.context = context | ||||
self.allowed = allowed | |||||
class Token(Str): | class Token(Str): | ||||
def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None): | def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None): | ||||
@@ -238,7 +239,6 @@ class ContextualLexer: | |||||
break | break | ||||
else: | else: | ||||
if lex_pos < len(stream): | if lex_pos < len(stream): | ||||
print("Allowed tokens:", lexer.tokens) | |||||
raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos) | |||||
raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos, lexer.tokens) | |||||
break | break | ||||
@@ -290,6 +290,31 @@ class ExtractAnonTokens(InlineTransformer): | |||||
def _rfind(s, choices): | def _rfind(s, choices): | ||||
return max(s.rfind(c) for c in choices) | return max(s.rfind(c) for c in choices) | ||||
def _fix_escaping(s): | |||||
s = s.replace('\\"', '"') | |||||
w = '' | |||||
i = iter(s) | |||||
for n in i: | |||||
w += n | |||||
if n == '\\': | |||||
n2 = next(i) | |||||
if n2 == '\\': | |||||
w += '\\\\' | |||||
elif n2 not in 'unftr': | |||||
w += '\\' | |||||
w += n2 | |||||
to_eval = "u'''%s'''" % w | |||||
try: | |||||
s = literal_eval(to_eval) | |||||
except SyntaxError as e: | |||||
raise ValueError(v, e) | |||||
return s | |||||
def _literal_to_pattern(literal): | def _literal_to_pattern(literal): | ||||
v = literal.value | v = literal.value | ||||
flag_start = _rfind(v, '/"')+1 | flag_start = _rfind(v, '/"')+1 | ||||
@@ -300,13 +325,12 @@ def _literal_to_pattern(literal): | |||||
v = v[:flag_start] | v = v[:flag_start] | ||||
assert v[0] == v[-1] and v[0] in '"/' | assert v[0] == v[-1] and v[0] in '"/' | ||||
x = v[1:-1] | x = v[1:-1] | ||||
x = re.sub(r'(\\[wd/ .]|\\\[|\\\])', r'\\\1', x) | |||||
x = x.replace("'", r"\'") | |||||
to_eval = "u'''%s'''" % x | |||||
try: | |||||
s = literal_eval(to_eval) | |||||
except SyntaxError as e: | |||||
raise ValueError(v, e) | |||||
s = _fix_escaping(x) | |||||
if v[0] == '"': | |||||
s = s.replace('\\\\', '\\') | |||||
return { 'STRING': PatternStr, | return { 'STRING': PatternStr, | ||||
'REGEXP': PatternRE }[literal.type](s, flags or None) | 'REGEXP': PatternRE }[literal.type](s, flags or None) | ||||
@@ -19,7 +19,7 @@ logging.basicConfig(level=logging.INFO) | |||||
from lark.lark import Lark | from lark.lark import Lark | ||||
from lark.common import GrammarError, ParseError | from lark.common import GrammarError, ParseError | ||||
from lark.lexer import LexError | |||||
from lark.lexer import LexError, UnexpectedInput | |||||
from lark.tree import Tree, Transformer | from lark.tree import Tree, Transformer | ||||
__path__ = os.path.dirname(__file__) | __path__ = os.path.dirname(__file__) | ||||
@@ -673,7 +673,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
""") | """) | ||||
x = g.parse(r'\a') | x = g.parse(r'\a') | ||||
g = _Lark(r"""start: /\\\\/ /a/ | |||||
g = _Lark(r"""start: /\\/ /a/ | |||||
""") | """) | ||||
x = g.parse(r'\a') | x = g.parse(r'\a') | ||||
@@ -961,6 +961,49 @@ def _make_parser_test(LEXER, PARSER): | |||||
self.assertEqual(tree.children, ['1']) | self.assertEqual(tree.children, ['1']) | ||||
@unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions") | |||||
def test_regex_escaping(self): | |||||
expected_error = ParseError if LEXER == 'dynamic' else UnexpectedInput | |||||
# TODO Make dynamic parser raise UnexpectedInput if nothing scans? | |||||
g = _Lark("start: /[ab]/") | |||||
g.parse('a') | |||||
g.parse('b') | |||||
self.assertRaises( expected_error, g.parse, 'c') | |||||
_Lark(r'start: /\w/').parse('a') | |||||
g = _Lark(r'start: /\\w/') | |||||
self.assertRaises( expected_error, g.parse, 'a') | |||||
g.parse(r'\w') | |||||
_Lark(r'start: /\[/').parse('[') | |||||
_Lark(r'start: /\//').parse('/') | |||||
_Lark(r'start: /\\/').parse('\\') | |||||
_Lark(r'start: /\[ab]/').parse('[ab]') | |||||
_Lark(r'start: /\\[ab]/').parse('\\a') | |||||
_Lark(r'start: /\t/').parse('\t') | |||||
_Lark(r'start: /\\t/').parse('\\t') | |||||
_Lark(r'start: /\\\t/').parse('\\\t') | |||||
_Lark(r'start: "\t"').parse('\t') | |||||
_Lark(r'start: "\\t"').parse('\\t') | |||||
_Lark(r'start: "\\\t"').parse('\\\t') | |||||
_NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() | _NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() | ||||
_TestParser.__name__ = _NAME | _TestParser.__name__ = _NAME | ||||