Parcourir la source

Fixed escaping for all tests

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan il y a 6 ans
Parent
révision
08a8a747b8
3 fichiers modifiés avec 79 ajouts et 12 suppressions
  1. +3
    -3
      lark/lexer.py
  2. +31
    -7
      lark/load_grammar.py
  3. +45
    -2
      tests/test_parser.py

+ 3
- 3
lark/lexer.py Voir le fichier

@@ -9,7 +9,7 @@ class LexError(Exception):
pass

class UnexpectedInput(LexError):
def __init__(self, seq, lex_pos, line, column):
def __init__(self, seq, lex_pos, line, column, allowed=None):
context = seq[lex_pos:lex_pos+5]
message = "No token defined for: '%s' in %r at line %d" % (seq[lex_pos], context, line)

@@ -18,6 +18,7 @@ class UnexpectedInput(LexError):
self.line = line
self.column = column
self.context = context
self.allowed = allowed

class Token(Str):
def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None):
@@ -238,7 +239,6 @@ class ContextualLexer:
break
else:
if lex_pos < len(stream):
print("Allowed tokens:", lexer.tokens)
raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos)
raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos, lexer.tokens)
break


+ 31
- 7
lark/load_grammar.py Voir le fichier

@@ -290,6 +290,31 @@ class ExtractAnonTokens(InlineTransformer):
def _rfind(s, choices):
return max(s.rfind(c) for c in choices)



def _fix_escaping(s):
s = s.replace('\\"', '"')
w = ''
i = iter(s)
for n in i:
w += n
if n == '\\':
n2 = next(i)
if n2 == '\\':
w += '\\\\'
elif n2 not in 'unftr':
w += '\\'
w += n2

to_eval = "u'''%s'''" % w
try:
s = literal_eval(to_eval)
except SyntaxError as e:
raise ValueError(v, e)

return s


def _literal_to_pattern(literal):
v = literal.value
flag_start = _rfind(v, '/"')+1
@@ -300,13 +325,12 @@ def _literal_to_pattern(literal):
v = v[:flag_start]
assert v[0] == v[-1] and v[0] in '"/'
x = v[1:-1]
x = re.sub(r'(\\[wd/ .]|\\\[|\\\])', r'\\\1', x)
x = x.replace("'", r"\'")
to_eval = "u'''%s'''" % x
try:
s = literal_eval(to_eval)
except SyntaxError as e:
raise ValueError(v, e)

s = _fix_escaping(x)

if v[0] == '"':
s = s.replace('\\\\', '\\')

return { 'STRING': PatternStr,
'REGEXP': PatternRE }[literal.type](s, flags or None)



+ 45
- 2
tests/test_parser.py Voir le fichier

@@ -19,7 +19,7 @@ logging.basicConfig(level=logging.INFO)

from lark.lark import Lark
from lark.common import GrammarError, ParseError
from lark.lexer import LexError
from lark.lexer import LexError, UnexpectedInput
from lark.tree import Tree, Transformer

__path__ = os.path.dirname(__file__)
@@ -673,7 +673,7 @@ def _make_parser_test(LEXER, PARSER):
""")
x = g.parse(r'\a')

g = _Lark(r"""start: /\\\\/ /a/
g = _Lark(r"""start: /\\/ /a/
""")
x = g.parse(r'\a')

@@ -961,6 +961,49 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(tree.children, ['1'])


@unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions")
def test_regex_escaping(self):
expected_error = ParseError if LEXER == 'dynamic' else UnexpectedInput
# TODO Make dynamic parser raise UnexpectedInput if nothing scans?

g = _Lark("start: /[ab]/")
g.parse('a')
g.parse('b')

self.assertRaises( expected_error, g.parse, 'c')

_Lark(r'start: /\w/').parse('a')

g = _Lark(r'start: /\\w/')
self.assertRaises( expected_error, g.parse, 'a')
g.parse(r'\w')

_Lark(r'start: /\[/').parse('[')

_Lark(r'start: /\//').parse('/')

_Lark(r'start: /\\/').parse('\\')

_Lark(r'start: /\[ab]/').parse('[ab]')

_Lark(r'start: /\\[ab]/').parse('\\a')

_Lark(r'start: /\t/').parse('\t')

_Lark(r'start: /\\t/').parse('\\t')

_Lark(r'start: /\\\t/').parse('\\\t')

_Lark(r'start: "\t"').parse('\t')

_Lark(r'start: "\\t"').parse('\\t')

_Lark(r'start: "\\\t"').parse('\\\t')






_NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize()
_TestParser.__name__ = _NAME


Chargement…
Annuler
Enregistrer