| @@ -9,7 +9,7 @@ class LexError(Exception): | |||
| pass | |||
| class UnexpectedInput(LexError): | |||
| def __init__(self, seq, lex_pos, line, column): | |||
| def __init__(self, seq, lex_pos, line, column, allowed=None): | |||
| context = seq[lex_pos:lex_pos+5] | |||
| message = "No token defined for: '%s' in %r at line %d" % (seq[lex_pos], context, line) | |||
| @@ -18,6 +18,7 @@ class UnexpectedInput(LexError): | |||
| self.line = line | |||
| self.column = column | |||
| self.context = context | |||
| self.allowed = allowed | |||
| class Token(Str): | |||
| def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None): | |||
| @@ -238,7 +239,6 @@ class ContextualLexer: | |||
| break | |||
| else: | |||
| if lex_pos < len(stream): | |||
| print("Allowed tokens:", lexer.tokens) | |||
| raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos) | |||
| raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos, lexer.tokens) | |||
| break | |||
| @@ -290,6 +290,31 @@ class ExtractAnonTokens(InlineTransformer): | |||
| def _rfind(s, choices): | |||
| return max(s.rfind(c) for c in choices) | |||
| def _fix_escaping(s): | |||
| s = s.replace('\\"', '"') | |||
| w = '' | |||
| i = iter(s) | |||
| for n in i: | |||
| w += n | |||
| if n == '\\': | |||
| n2 = next(i) | |||
| if n2 == '\\': | |||
| w += '\\\\' | |||
| elif n2 not in 'unftr': | |||
| w += '\\' | |||
| w += n2 | |||
| to_eval = "u'''%s'''" % w | |||
| try: | |||
| s = literal_eval(to_eval) | |||
| except SyntaxError as e: | |||
| raise ValueError(v, e) | |||
| return s | |||
| def _literal_to_pattern(literal): | |||
| v = literal.value | |||
| flag_start = _rfind(v, '/"')+1 | |||
| @@ -300,13 +325,12 @@ def _literal_to_pattern(literal): | |||
| v = v[:flag_start] | |||
| assert v[0] == v[-1] and v[0] in '"/' | |||
| x = v[1:-1] | |||
| x = re.sub(r'(\\[wd/ .]|\\\[|\\\])', r'\\\1', x) | |||
| x = x.replace("'", r"\'") | |||
| to_eval = "u'''%s'''" % x | |||
| try: | |||
| s = literal_eval(to_eval) | |||
| except SyntaxError as e: | |||
| raise ValueError(v, e) | |||
| s = _fix_escaping(x) | |||
| if v[0] == '"': | |||
| s = s.replace('\\\\', '\\') | |||
| return { 'STRING': PatternStr, | |||
| 'REGEXP': PatternRE }[literal.type](s, flags or None) | |||
| @@ -19,7 +19,7 @@ logging.basicConfig(level=logging.INFO) | |||
| from lark.lark import Lark | |||
| from lark.common import GrammarError, ParseError | |||
| from lark.lexer import LexError | |||
| from lark.lexer import LexError, UnexpectedInput | |||
| from lark.tree import Tree, Transformer | |||
| __path__ = os.path.dirname(__file__) | |||
| @@ -673,7 +673,7 @@ def _make_parser_test(LEXER, PARSER): | |||
| """) | |||
| x = g.parse(r'\a') | |||
| g = _Lark(r"""start: /\\\\/ /a/ | |||
| g = _Lark(r"""start: /\\/ /a/ | |||
| """) | |||
| x = g.parse(r'\a') | |||
| @@ -961,6 +961,49 @@ def _make_parser_test(LEXER, PARSER): | |||
| self.assertEqual(tree.children, ['1']) | |||
| @unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions") | |||
| def test_regex_escaping(self): | |||
| expected_error = ParseError if LEXER == 'dynamic' else UnexpectedInput | |||
| # TODO Make dynamic parser raise UnexpectedInput if nothing scans? | |||
| g = _Lark("start: /[ab]/") | |||
| g.parse('a') | |||
| g.parse('b') | |||
| self.assertRaises( expected_error, g.parse, 'c') | |||
| _Lark(r'start: /\w/').parse('a') | |||
| g = _Lark(r'start: /\\w/') | |||
| self.assertRaises( expected_error, g.parse, 'a') | |||
| g.parse(r'\w') | |||
| _Lark(r'start: /\[/').parse('[') | |||
| _Lark(r'start: /\//').parse('/') | |||
| _Lark(r'start: /\\/').parse('\\') | |||
| _Lark(r'start: /\[ab]/').parse('[ab]') | |||
| _Lark(r'start: /\\[ab]/').parse('\\a') | |||
| _Lark(r'start: /\t/').parse('\t') | |||
| _Lark(r'start: /\\t/').parse('\\t') | |||
| _Lark(r'start: /\\\t/').parse('\\\t') | |||
| _Lark(r'start: "\t"').parse('\t') | |||
| _Lark(r'start: "\\t"').parse('\\t') | |||
| _Lark(r'start: "\\\t"').parse('\\\t') | |||
| _NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() | |||
| _TestParser.__name__ = _NAME | |||