Browse Source

Fixed escaping for all tests

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 6 years ago
parent
commit
08a8a747b8
3 changed files with 79 additions and 12 deletions
  1. +3
    -3
      lark/lexer.py
  2. +31
    -7
      lark/load_grammar.py
  3. +45
    -2
      tests/test_parser.py

+ 3
- 3
lark/lexer.py View File

@@ -9,7 +9,7 @@ class LexError(Exception):
pass pass


class UnexpectedInput(LexError): class UnexpectedInput(LexError):
def __init__(self, seq, lex_pos, line, column):
def __init__(self, seq, lex_pos, line, column, allowed=None):
context = seq[lex_pos:lex_pos+5] context = seq[lex_pos:lex_pos+5]
message = "No token defined for: '%s' in %r at line %d" % (seq[lex_pos], context, line) message = "No token defined for: '%s' in %r at line %d" % (seq[lex_pos], context, line)


@@ -18,6 +18,7 @@ class UnexpectedInput(LexError):
self.line = line self.line = line
self.column = column self.column = column
self.context = context self.context = context
self.allowed = allowed


class Token(Str): class Token(Str):
def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None): def __new__(cls, type_, value, pos_in_stream=None, line=None, column=None):
@@ -238,7 +239,6 @@ class ContextualLexer:
break break
else: else:
if lex_pos < len(stream): if lex_pos < len(stream):
print("Allowed tokens:", lexer.tokens)
raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos)
raise UnexpectedInput(stream, lex_pos, line, lex_pos - col_start_pos, lexer.tokens)
break break



+ 31
- 7
lark/load_grammar.py View File

@@ -290,6 +290,31 @@ class ExtractAnonTokens(InlineTransformer):
def _rfind(s, choices): def _rfind(s, choices):
return max(s.rfind(c) for c in choices) return max(s.rfind(c) for c in choices)




def _fix_escaping(s):
s = s.replace('\\"', '"')
w = ''
i = iter(s)
for n in i:
w += n
if n == '\\':
n2 = next(i)
if n2 == '\\':
w += '\\\\'
elif n2 not in 'unftr':
w += '\\'
w += n2

to_eval = "u'''%s'''" % w
try:
s = literal_eval(to_eval)
except SyntaxError as e:
raise ValueError(v, e)

return s


def _literal_to_pattern(literal): def _literal_to_pattern(literal):
v = literal.value v = literal.value
flag_start = _rfind(v, '/"')+1 flag_start = _rfind(v, '/"')+1
@@ -300,13 +325,12 @@ def _literal_to_pattern(literal):
v = v[:flag_start] v = v[:flag_start]
assert v[0] == v[-1] and v[0] in '"/' assert v[0] == v[-1] and v[0] in '"/'
x = v[1:-1] x = v[1:-1]
x = re.sub(r'(\\[wd/ .]|\\\[|\\\])', r'\\\1', x)
x = x.replace("'", r"\'")
to_eval = "u'''%s'''" % x
try:
s = literal_eval(to_eval)
except SyntaxError as e:
raise ValueError(v, e)

s = _fix_escaping(x)

if v[0] == '"':
s = s.replace('\\\\', '\\')

return { 'STRING': PatternStr, return { 'STRING': PatternStr,
'REGEXP': PatternRE }[literal.type](s, flags or None) 'REGEXP': PatternRE }[literal.type](s, flags or None)




+ 45
- 2
tests/test_parser.py View File

@@ -19,7 +19,7 @@ logging.basicConfig(level=logging.INFO)


from lark.lark import Lark from lark.lark import Lark
from lark.common import GrammarError, ParseError from lark.common import GrammarError, ParseError
from lark.lexer import LexError
from lark.lexer import LexError, UnexpectedInput
from lark.tree import Tree, Transformer from lark.tree import Tree, Transformer


__path__ = os.path.dirname(__file__) __path__ = os.path.dirname(__file__)
@@ -673,7 +673,7 @@ def _make_parser_test(LEXER, PARSER):
""") """)
x = g.parse(r'\a') x = g.parse(r'\a')


g = _Lark(r"""start: /\\\\/ /a/
g = _Lark(r"""start: /\\/ /a/
""") """)
x = g.parse(r'\a') x = g.parse(r'\a')


@@ -961,6 +961,49 @@ def _make_parser_test(LEXER, PARSER):
self.assertEqual(tree.children, ['1']) self.assertEqual(tree.children, ['1'])




@unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions")
def test_regex_escaping(self):
expected_error = ParseError if LEXER == 'dynamic' else UnexpectedInput
# TODO Make dynamic parser raise UnexpectedInput if nothing scans?

g = _Lark("start: /[ab]/")
g.parse('a')
g.parse('b')

self.assertRaises( expected_error, g.parse, 'c')

_Lark(r'start: /\w/').parse('a')

g = _Lark(r'start: /\\w/')
self.assertRaises( expected_error, g.parse, 'a')
g.parse(r'\w')

_Lark(r'start: /\[/').parse('[')

_Lark(r'start: /\//').parse('/')

_Lark(r'start: /\\/').parse('\\')

_Lark(r'start: /\[ab]/').parse('[ab]')

_Lark(r'start: /\\[ab]/').parse('\\a')

_Lark(r'start: /\t/').parse('\t')

_Lark(r'start: /\\t/').parse('\\t')

_Lark(r'start: /\\\t/').parse('\\\t')

_Lark(r'start: "\t"').parse('\t')

_Lark(r'start: "\\t"').parse('\\t')

_Lark(r'start: "\\\t"').parse('\\\t')







_NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() _NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize()
_TestParser.__name__ = _NAME _TestParser.__name__ = _NAME


Loading…
Cancel
Save