Fixed escaping for all tests

8 years ago · 08a8a747b8
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -9,7 +9,7 @@ class LexError(Exception):
    pass

 class UnexpectedInput(LexError):
    def __init__(self, seq, lex_pos, line, column):
    def __init__(self, seq, lex_pos, line, column, allowed=None):
        context = seq[lex_pos:lex_pos+5]
        message = "No token defined for: '%s' in %r at line %d" % (seq[lex_pos], context, line)

@@ -18,6 +18,7 @@ class UnexpectedInput(LexError):
        self.line = line
        self.column = column
        self.context = context
        self.allowed = allowed

 class Token(Str):
    def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None):
@@ -238,7 +239,6 @@ class ContextualLexer:
                    break
            else:
                if lex_pos < len(stream):
                    print("Allowed tokens:", lexer.tokens)
                    raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos)
                    raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos, lexer.tokens)
                break

--- a/lark/load_grammar.py
+++ b/lark/load_grammar.py
@@ -290,6 +290,31 @@ class ExtractAnonTokens(InlineTransformer):
 def _rfind(s, choices):
    return max(s.rfind(c) for c in choices)



 def _fix_escaping(s):
    s = s.replace('\\"', '"')
    w = ''
    i = iter(s)
    for n in i:
        w += n
        if n == '\\':
            n2 = next(i)
            if n2 == '\\':
                w += '\\\\'
            elif n2 not in 'unftr':
                w += '\\'
            w += n2

    to_eval = "u'''%s'''" % w
    try:
        s = literal_eval(to_eval)
    except SyntaxError as e:
        raise ValueError(v, e)

    return s


 def _literal_to_pattern(literal):
    v = literal.value
    flag_start = _rfind(v, '/"')+1
@@ -300,13 +325,12 @@ def _literal_to_pattern(literal):
    v = v[:flag_start]
    assert v[0] == v[-1] and v[0] in '"/'
    x = v[1:-1]
    x = re.sub(r'(\\[wd/ .]|\\\[|\\\])', r'\\\1', x)
    x = x.replace("'", r"\'")
    to_eval = "u'''%s'''" % x
    try:
        s = literal_eval(to_eval)
    except SyntaxError as e:
        raise ValueError(v, e)

    s = _fix_escaping(x)

    if v[0] == '"':
        s = s.replace('\\\\', '\\')

    return { 'STRING': PatternStr,
             'REGEXP': PatternRE }[literal.type](s, flags or None)

--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -19,7 +19,7 @@ logging.basicConfig(level=logging.INFO)

 from lark.lark import Lark
 from lark.common import GrammarError, ParseError
 from lark.lexer import LexError
 from lark.lexer import LexError, UnexpectedInput
 from lark.tree import Tree, Transformer

 __path__ = os.path.dirname(__file__)
@@ -673,7 +673,7 @@ def _make_parser_test(LEXER, PARSER):
                      """)
            x = g.parse(r'\a')

            g = _Lark(r"""start: /\\\\/ /a/
            g = _Lark(r"""start: /\\/ /a/
                      """)
            x = g.parse(r'\a')

@@ -961,6 +961,49 @@ def _make_parser_test(LEXER, PARSER):
            self.assertEqual(tree.children, ['1'])


        @unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions")
        def test_regex_escaping(self):
            expected_error = ParseError if LEXER == 'dynamic' else UnexpectedInput
            # TODO Make dynamic parser raise UnexpectedInput if nothing scans?

            g = _Lark("start: /[ab]/")
            g.parse('a')
            g.parse('b')

            self.assertRaises( expected_error, g.parse, 'c')

            _Lark(r'start: /\w/').parse('a')

            g = _Lark(r'start: /\\w/')
            self.assertRaises( expected_error, g.parse, 'a')
            g.parse(r'\w')

            _Lark(r'start: /\[/').parse('[')

            _Lark(r'start: /\//').parse('/')

            _Lark(r'start: /\\/').parse('\\')

            _Lark(r'start: /\[ab]/').parse('[ab]')

            _Lark(r'start: /\\[ab]/').parse('\\a')

            _Lark(r'start: /\t/').parse('\t')

            _Lark(r'start: /\\t/').parse('\\t')

            _Lark(r'start: /\\\t/').parse('\\\t')

            _Lark(r'start: "\t"').parse('\t')

            _Lark(r'start: "\\t"').parse('\\t')

            _Lark(r'start: "\\\t"').parse('\\\t')






    _NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize()
    _TestParser.__name__ = _NAME