Browse Source

Merge branch 'MegaIng-join_regex_flags'

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.10.0
Erez Sh 4 years ago
parent
commit
b5abf2d7af
4 changed files with 52 additions and 10 deletions
  1. +2
    -2
      docs/grammar.md
  2. +1
    -1
      lark/lark.py
  3. +21
    -7
      lark/load_grammar.py
  4. +28
    -0
      tests/test_parser.py

+ 2
- 2
docs/grammar.md View File

@@ -101,9 +101,9 @@ some_rule: my_template{arg1, arg2, ...}

Example:
```ebnf
_seperated{x, sep}: x (sep x)* // Define a sequence of 'x sep x sep x ...'
_separated{x, sep}: x (sep x)* // Define a sequence of 'x sep x sep x ...'

num_list: "[" _seperated{NUMBER, ","} "]" // Will match "[1, 2, 3]" etc.
num_list: "[" _separated{NUMBER, ","} "]" // Will match "[1, 2, 3]" etc.
```

### Priority


+ 1
- 1
lark/lark.py View File

@@ -294,7 +294,7 @@ class Lark(Serialize):
__serialize_fields__ = 'parser', 'rules', 'options'

def _build_lexer(self):
return TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks, g_regex_flags=self.lexer_conf.g_regex_flags)
return TraditionalLexer(self.lexer_conf)

def _prepare_callbacks(self):
self.parser_class = get_frontend(self.options.parser, self.options.lexer)


+ 21
- 7
lark/load_grammar.py View File

@@ -5,7 +5,7 @@ import sys
from copy import copy, deepcopy
from io import open

from .utils import bfs, eval_escaping
from .utils import bfs, eval_escaping, Py36
from .lexer import Token, TerminalDef, PatternStr, PatternRE

from .parse_tree_builder import ParseTreeBuilder
@@ -432,6 +432,20 @@ class PrepareLiterals(Transformer_InPlace):
return ST('pattern', [PatternRE(regexp)])


def _make_joined_pattern(regexp, flags_set):
# In Python 3.6, a new syntax for flags was introduced, that allows us to restrict the scope
# of flags to a specific regexp group. We are already using it in `lexer.Pattern._get_flags`
# However, for prior Python versions, we still need to use global flags, so we have to make sure
# that there are no flag collisions when we merge several terminals.
flags = ()
if not Py36:
if len(flags_set) > 1:
raise GrammarError("Lark doesn't support joining terminals with conflicting flags in python <3.6!")
elif len(flags_set) == 1:
flags ,= flags_set

return PatternRE(regexp, flags)

class TerminalTreeToPattern(Transformer):
def pattern(self, ps):
p ,= ps
@@ -441,16 +455,16 @@ class TerminalTreeToPattern(Transformer):
assert items
if len(items) == 1:
return items[0]
if len({i.flags for i in items}) > 1:
raise GrammarError("Lark doesn't support joining terminals with conflicting flags!")
return PatternRE(''.join(i.to_regexp() for i in items), items[0].flags if items else ())
pattern = ''.join(i.to_regexp() for i in items)
return _make_joined_pattern(pattern, {i.flags for i in items})

def expansions(self, exps):
if len(exps) == 1:
return exps[0]
if len({i.flags for i in exps}) > 1:
raise GrammarError("Lark doesn't support joining terminals with conflicting flags!")
return PatternRE('(?:%s)' % ('|'.join(i.to_regexp() for i in exps)), exps[0].flags)
pattern = '(?:%s)' % ('|'.join(i.to_regexp() for i in exps))
return _make_joined_pattern(pattern, {i.flags for i in exps})

def expr(self, args):
inner, op = args[:2]


+ 28
- 0
tests/test_parser.py View File

@@ -7,6 +7,9 @@ import logging
import os
import sys
from copy import copy, deepcopy

from lark.utils import Py36

try:
from cStringIO import StringIO as cStringIO
except ImportError:
@@ -1062,6 +1065,31 @@ def _make_parser_test(LEXER, PARSER):
g = _Lark(g)
self.assertEqual( g.parse('"hello"').children, ['"hello"'])
self.assertEqual( g.parse("'hello'").children, ["'hello'"])
@unittest.skipIf(not Py36, "Required re syntax only exists in python3.6+")
def test_join_regex_flags(self):
g = r"""
start: A
A: B C
B: /./s
C: /./
"""
g = _Lark(g)
self.assertEqual(g.parse(" ").children,[" "])
self.assertEqual(g.parse("\n ").children,["\n "])
self.assertRaises(UnexpectedCharacters, g.parse, "\n\n")
g = r"""
start: A
A: B | C
B: "b"i
C: "c"
"""
g = _Lark(g)
self.assertEqual(g.parse("b").children,["b"])
self.assertEqual(g.parse("B").children,["B"])
self.assertEqual(g.parse("c").children,["c"])
self.assertRaises(UnexpectedCharacters, g.parse, "C")


def test_lexer_token_limit(self):


Loading…
Cancel
Save