Browse Source

Merge pull request #645 from decorator-factory/verbose-regex-flag--decorator-factory

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.10.0
Erez Shinan 5 years ago
committed by GitHub
parent
commit
6925b9be0d
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 41 additions and 6 deletions
  1. +15
    -6
      lark/load_grammar.py
  2. +26
    -0
      tests/test_parser.py

+ 15
- 6
lark/load_grammar.py View File

@@ -85,7 +85,7 @@ TERMINALS = {
'RULE': '!?[_?]?[a-z][_a-z0-9]*', 'RULE': '!?[_?]?[a-z][_a-z0-9]*',
'TERMINAL': '_?[A-Z][_A-Z0-9]*', 'TERMINAL': '_?[A-Z][_A-Z0-9]*',
'STRING': r'"(\\"|\\\\|[^"\n])*?"i?', 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?',
'REGEXP': r'/(?!/)(\\/|\\\\|[^/\n])*?/[%s]*' % _RE_FLAGS,
'REGEXP': r'/(?!/)(\\/|\\\\|[^/])*?/[%s]*' % _RE_FLAGS,
'_NL': r'(\r?\n)+\s*', '_NL': r'(\r?\n)+\s*',
'WS': r'[ \t]+', 'WS': r'[ \t]+',
'COMMENT': r'\s*//[^\n]*', 'COMMENT': r'\s*//[^\n]*',
@@ -336,7 +336,7 @@ class PrepareAnonTerminals(Transformer_InPlace):
term_name = None term_name = None


elif isinstance(p, PatternRE): elif isinstance(p, PatternRE):
if p in self.term_reverse: # Kind of a wierd placement.name
if p in self.term_reverse: # Kind of a weird placement.name
term_name = self.term_reverse[p].name term_name = self.term_reverse[p].name
else: else:
assert False, p assert False, p
@@ -409,6 +409,13 @@ def _literal_to_pattern(literal):
flags = v[flag_start:] flags = v[flag_start:]
assert all(f in _RE_FLAGS for f in flags), flags assert all(f in _RE_FLAGS for f in flags), flags


if literal.type == 'STRING' and '\n' in v:
raise GrammarError('You cannot put newlines in string literals')

if literal.type == 'REGEXP' and '\n' in v and 'x' not in flags:
raise GrammarError('You can only use newlines in regular expressions '
'with the `x` (verbose) flag')

v = v[:flag_start] v = v[:flag_start]
assert v[0] == v[-1] and v[0] in '"/' assert v[0] == v[-1] and v[0] in '"/'
x = v[1:-1] x = v[1:-1]
@@ -417,9 +424,11 @@ def _literal_to_pattern(literal):


if literal.type == 'STRING': if literal.type == 'STRING':
s = s.replace('\\\\', '\\') s = s.replace('\\\\', '\\')

return { 'STRING': PatternStr,
'REGEXP': PatternRE }[literal.type](s, flags)
return PatternStr(s, flags)
elif literal.type == 'REGEXP':
return PatternRE(s, flags)
else:
assert False, 'Invariant failed: literal.type not in ["STRING", "REGEXP"]'




@inline_args @inline_args
@@ -841,7 +850,7 @@ class GrammarLoader:
if len(stmt.children) > 1: if len(stmt.children) > 1:
path_node, arg1 = stmt.children path_node, arg1 = stmt.children
else: else:
path_node, = stmt.children
path_node ,= stmt.children
arg1 = None arg1 = None


if isinstance(arg1, Tree): # Multi import if isinstance(arg1, Tree): # Multi import


+ 26
- 0
tests/test_parser.py View File

@@ -1262,6 +1262,32 @@ def _make_parser_test(LEXER, PARSER):
tree = l.parse('aA') tree = l.parse('aA')
self.assertEqual(tree.children, ['a', 'A']) self.assertEqual(tree.children, ['a', 'A'])


def test_token_flags_verbose(self):
g = _Lark(r"""start: NL | ABC
ABC: / [a-z] /x
NL: /\n/
""")
x = g.parse('a')
self.assertEqual(x.children, ['a'])

def test_token_flags_verbose_multiline(self):
g = _Lark(r"""start: ABC
ABC: / a b c
d
e f
/x
""")
x = g.parse('abcdef')
self.assertEqual(x.children, ['abcdef'])

def test_token_multiline_only_works_with_x_flag(self):
g = r"""start: ABC
ABC: / a b c
d
e f
/i
"""
self.assertRaises( GrammarError, _Lark, g)


@unittest.skipIf(PARSER == 'cyk', "No empty rules") @unittest.skipIf(PARSER == 'cyk', "No empty rules")
def test_twice_empty(self): def test_twice_empty(self):


Loading…
Cancel
Save