@@ -9,7 +9,7 @@ class LexError(Exception): | |||
pass | |||
class UnexpectedInput(LexError): | |||
def __init__(self, seq, lex_pos, line, column): | |||
def __init__(self, seq, lex_pos, line, column, allowed=None): | |||
context = seq[lex_pos:lex_pos+5] | |||
message = "No token defined for: '%s' in %r at line %d" % (seq[lex_pos], context, line) | |||
@@ -18,6 +18,7 @@ class UnexpectedInput(LexError): | |||
self.line = line | |||
self.column = column | |||
self.context = context | |||
self.allowed = allowed | |||
class Token(Str): | |||
def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None): | |||
@@ -238,7 +239,6 @@ class ContextualLexer: | |||
break | |||
else: | |||
if lex_pos < len(stream): | |||
print("Allowed tokens:", lexer.tokens) | |||
raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos) | |||
raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos, lexer.tokens) | |||
break | |||
@@ -290,6 +290,31 @@ class ExtractAnonTokens(InlineTransformer): | |||
def _rfind(s, choices): | |||
return max(s.rfind(c) for c in choices) | |||
def _fix_escaping(s): | |||
s = s.replace('\\"', '"') | |||
w = '' | |||
i = iter(s) | |||
for n in i: | |||
w += n | |||
if n == '\\': | |||
n2 = next(i) | |||
if n2 == '\\': | |||
w += '\\\\' | |||
elif n2 not in 'unftr': | |||
w += '\\' | |||
w += n2 | |||
to_eval = "u'''%s'''" % w | |||
try: | |||
s = literal_eval(to_eval) | |||
except SyntaxError as e: | |||
raise ValueError(v, e) | |||
return s | |||
def _literal_to_pattern(literal): | |||
v = literal.value | |||
flag_start = _rfind(v, '/"')+1 | |||
@@ -300,13 +325,12 @@ def _literal_to_pattern(literal): | |||
v = v[:flag_start] | |||
assert v[0] == v[-1] and v[0] in '"/' | |||
x = v[1:-1] | |||
x = re.sub(r'(\\[wd/ .]|\\\[|\\\])', r'\\\1', x) | |||
x = x.replace("'", r"\'") | |||
to_eval = "u'''%s'''" % x | |||
try: | |||
s = literal_eval(to_eval) | |||
except SyntaxError as e: | |||
raise ValueError(v, e) | |||
s = _fix_escaping(x) | |||
if v[0] == '"': | |||
s = s.replace('\\\\', '\\') | |||
return { 'STRING': PatternStr, | |||
'REGEXP': PatternRE }[literal.type](s, flags or None) | |||
@@ -19,7 +19,7 @@ logging.basicConfig(level=logging.INFO) | |||
from lark.lark import Lark | |||
from lark.common import GrammarError, ParseError | |||
from lark.lexer import LexError | |||
from lark.lexer import LexError, UnexpectedInput | |||
from lark.tree import Tree, Transformer | |||
__path__ = os.path.dirname(__file__) | |||
@@ -673,7 +673,7 @@ def _make_parser_test(LEXER, PARSER): | |||
""") | |||
x = g.parse(r'\a') | |||
g = _Lark(r"""start: /\\\\/ /a/ | |||
g = _Lark(r"""start: /\\/ /a/ | |||
""") | |||
x = g.parse(r'\a') | |||
@@ -961,6 +961,49 @@ def _make_parser_test(LEXER, PARSER): | |||
self.assertEqual(tree.children, ['1']) | |||
@unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions") | |||
def test_regex_escaping(self): | |||
expected_error = ParseError if LEXER == 'dynamic' else UnexpectedInput | |||
# TODO Make dynamic parser raise UnexpectedInput if nothing scans? | |||
g = _Lark("start: /[ab]/") | |||
g.parse('a') | |||
g.parse('b') | |||
self.assertRaises( expected_error, g.parse, 'c') | |||
_Lark(r'start: /\w/').parse('a') | |||
g = _Lark(r'start: /\\w/') | |||
self.assertRaises( expected_error, g.parse, 'a') | |||
g.parse(r'\w') | |||
_Lark(r'start: /\[/').parse('[') | |||
_Lark(r'start: /\//').parse('/') | |||
_Lark(r'start: /\\/').parse('\\') | |||
_Lark(r'start: /\[ab]/').parse('[ab]') | |||
_Lark(r'start: /\\[ab]/').parse('\\a') | |||
_Lark(r'start: /\t/').parse('\t') | |||
_Lark(r'start: /\\t/').parse('\\t') | |||
_Lark(r'start: /\\\t/').parse('\\\t') | |||
_Lark(r'start: "\t"').parse('\t') | |||
_Lark(r'start: "\\t"').parse('\\t') | |||
_Lark(r'start: "\\\t"').parse('\\\t') | |||
_NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() | |||
_TestParser.__name__ = _NAME | |||