Browse Source

allow multiline regexes with 'x' (verbose) flag

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.10.0
decorator-factory 4 years ago
parent
commit
9923987e94
2 changed files with 36 additions and 3 deletions
  1. +10
    -3
      lark/load_grammar.py
  2. +26
    -0
      tests/test_parser.py

+ 10
- 3
lark/load_grammar.py View File

@@ -13,7 +13,7 @@ from .parser_frontends import LALR_TraditionalLexer
from .common import LexerConf, ParserConf from .common import LexerConf, ParserConf
from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol from .grammar import RuleOptions, Rule, Terminal, NonTerminal, Symbol
from .utils import classify, suppress, dedup_list, Str from .utils import classify, suppress, dedup_list, Str
from .exceptions import GrammarError, UnexpectedCharacters, UnexpectedToken
from .exceptions import GrammarError, LarkError, UnexpectedCharacters, UnexpectedToken


from .tree import Tree, SlottedTree as ST from .tree import Tree, SlottedTree as ST
from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive from .visitors import Transformer, Visitor, v_args, Transformer_InPlace, Transformer_NonRecursive
@@ -85,7 +85,7 @@ TERMINALS = {
'RULE': '!?[_?]?[a-z][_a-z0-9]*', 'RULE': '!?[_?]?[a-z][_a-z0-9]*',
'TERMINAL': '_?[A-Z][_A-Z0-9]*', 'TERMINAL': '_?[A-Z][_A-Z0-9]*',
'STRING': r'"(\\"|\\\\|[^"\n])*?"i?', 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?',
'REGEXP': r'/(?!/)(\\/|\\\\|[^/\n])*?/[%s]*' % _RE_FLAGS,
'REGEXP': r'/(?!/)(\\/|\\\\|[^/])*?/[%s]*' % _RE_FLAGS,
'_NL': r'(\r?\n)+\s*', '_NL': r'(\r?\n)+\s*',
'WS': r'[ \t]+', 'WS': r'[ \t]+',
'COMMENT': r'\s*//[^\n]*', 'COMMENT': r'\s*//[^\n]*',
@@ -336,7 +336,7 @@ class PrepareAnonTerminals(Transformer_InPlace):
term_name = None term_name = None


elif isinstance(p, PatternRE): elif isinstance(p, PatternRE):
if p in self.term_reverse: # Kind of a wierd placement.name
if p in self.term_reverse: # Kind of a weird placement.name
term_name = self.term_reverse[p].name term_name = self.term_reverse[p].name
else: else:
assert False, p assert False, p
@@ -409,6 +409,13 @@ def _literal_to_pattern(literal):
flags = v[flag_start:] flags = v[flag_start:]
assert all(f in _RE_FLAGS for f in flags), flags assert all(f in _RE_FLAGS for f in flags), flags


if literal.type == 'STRING' and '\n' in v:
raise GrammarError('You cannot put newlines in string literals')

if literal.type == 'REGEXP' and '\n' in v and 'x' not in flags:
raise GrammarError('You can only use newlines in regular expressions '
'with the `x` (verbose) flag')

v = v[:flag_start] v = v[:flag_start]
assert v[0] == v[-1] and v[0] in '"/' assert v[0] == v[-1] and v[0] in '"/'
x = v[1:-1] x = v[1:-1]


+ 26
- 0
tests/test_parser.py View File

@@ -1262,6 +1262,32 @@ def _make_parser_test(LEXER, PARSER):
tree = l.parse('aA') tree = l.parse('aA')
self.assertEqual(tree.children, ['a', 'A']) self.assertEqual(tree.children, ['a', 'A'])


def test_token_flags_verbose(self):
g = _Lark(r"""start: NL | ABC
ABC: / [a-z] /x
NL: /\n/
""")
x = g.parse('a')
self.assertEqual(x.children, ['a'])

def test_token_flags_verbose_multiline(self):
g = _Lark(r"""start: ABC
ABC: / a b c
d
e f
/x
""")
x = g.parse('abcdef')
self.assertEqual(x.children, ['abcdef'])

def test_token_multiline_only_works_with_x_flag(self):
g = r"""start: ABC
ABC: / a b c
d
e f
/i
"""
self.assertRaises( GrammarError, _Lark, g)


@unittest.skipIf(PARSER == 'cyk', "No empty rules") @unittest.skipIf(PARSER == 'cyk', "No empty rules")
def test_twice_empty(self): def test_twice_empty(self):


Loading…
Cancel
Save