@@ -101,9 +101,9 @@ some_rule: my_template{arg1, arg2, ...} | |||||
Example: | Example: | ||||
```ebnf | ```ebnf | ||||
_seperated{x, sep}: x (sep x)* // Define a sequence of 'x sep x sep x ...' | |||||
_separated{x, sep}: x (sep x)* // Define a sequence of 'x sep x sep x ...' | |||||
num_list: "[" _seperated{NUMBER, ","} "]" // Will match "[1, 2, 3]" etc. | |||||
num_list: "[" _separated{NUMBER, ","} "]" // Will match "[1, 2, 3]" etc. | |||||
``` | ``` | ||||
### Priority | ### Priority | ||||
@@ -294,7 +294,7 @@ class Lark(Serialize): | |||||
__serialize_fields__ = 'parser', 'rules', 'options' | __serialize_fields__ = 'parser', 'rules', 'options' | ||||
def _build_lexer(self): | def _build_lexer(self): | ||||
return TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks, g_regex_flags=self.lexer_conf.g_regex_flags) | |||||
return TraditionalLexer(self.lexer_conf) | |||||
def _prepare_callbacks(self): | def _prepare_callbacks(self): | ||||
self.parser_class = get_frontend(self.options.parser, self.options.lexer) | self.parser_class = get_frontend(self.options.parser, self.options.lexer) | ||||
@@ -5,7 +5,7 @@ import sys | |||||
from copy import copy, deepcopy | from copy import copy, deepcopy | ||||
from io import open | from io import open | ||||
from .utils import bfs, eval_escaping | |||||
from .utils import bfs, eval_escaping, Py36 | |||||
from .lexer import Token, TerminalDef, PatternStr, PatternRE | from .lexer import Token, TerminalDef, PatternStr, PatternRE | ||||
from .parse_tree_builder import ParseTreeBuilder | from .parse_tree_builder import ParseTreeBuilder | ||||
@@ -432,6 +432,20 @@ class PrepareLiterals(Transformer_InPlace): | |||||
return ST('pattern', [PatternRE(regexp)]) | return ST('pattern', [PatternRE(regexp)]) | ||||
def _make_joined_pattern(regexp, flags_set): | |||||
# In Python 3.6, a new syntax for flags was introduced, that allows us to restrict the scope | |||||
# of flags to a specific regexp group. We are already using it in `lexer.Pattern._get_flags` | |||||
# However, for prior Python versions, we still need to use global flags, so we have to make sure | |||||
# that there are no flag collisions when we merge several terminals. | |||||
flags = () | |||||
if not Py36: | |||||
if len(flags_set) > 1: | |||||
raise GrammarError("Lark doesn't support joining terminals with conflicting flags in python <3.6!") | |||||
elif len(flags_set) == 1: | |||||
flags ,= flags_set | |||||
return PatternRE(regexp, flags) | |||||
class TerminalTreeToPattern(Transformer): | class TerminalTreeToPattern(Transformer): | ||||
def pattern(self, ps): | def pattern(self, ps): | ||||
p ,= ps | p ,= ps | ||||
@@ -441,16 +455,16 @@ class TerminalTreeToPattern(Transformer): | |||||
assert items | assert items | ||||
if len(items) == 1: | if len(items) == 1: | ||||
return items[0] | return items[0] | ||||
if len({i.flags for i in items}) > 1: | |||||
raise GrammarError("Lark doesn't support joining terminals with conflicting flags!") | |||||
return PatternRE(''.join(i.to_regexp() for i in items), items[0].flags if items else ()) | |||||
pattern = ''.join(i.to_regexp() for i in items) | |||||
return _make_joined_pattern(pattern, {i.flags for i in items}) | |||||
def expansions(self, exps): | def expansions(self, exps): | ||||
if len(exps) == 1: | if len(exps) == 1: | ||||
return exps[0] | return exps[0] | ||||
if len({i.flags for i in exps}) > 1: | |||||
raise GrammarError("Lark doesn't support joining terminals with conflicting flags!") | |||||
return PatternRE('(?:%s)' % ('|'.join(i.to_regexp() for i in exps)), exps[0].flags) | |||||
pattern = '(?:%s)' % ('|'.join(i.to_regexp() for i in exps)) | |||||
return _make_joined_pattern(pattern, {i.flags for i in exps}) | |||||
def expr(self, args): | def expr(self, args): | ||||
inner, op = args[:2] | inner, op = args[:2] | ||||
@@ -7,6 +7,9 @@ import logging | |||||
import os | import os | ||||
import sys | import sys | ||||
from copy import copy, deepcopy | from copy import copy, deepcopy | ||||
from lark.utils import Py36 | |||||
try: | try: | ||||
from cStringIO import StringIO as cStringIO | from cStringIO import StringIO as cStringIO | ||||
except ImportError: | except ImportError: | ||||
@@ -1062,6 +1065,31 @@ def _make_parser_test(LEXER, PARSER): | |||||
g = _Lark(g) | g = _Lark(g) | ||||
self.assertEqual( g.parse('"hello"').children, ['"hello"']) | self.assertEqual( g.parse('"hello"').children, ['"hello"']) | ||||
self.assertEqual( g.parse("'hello'").children, ["'hello'"]) | self.assertEqual( g.parse("'hello'").children, ["'hello'"]) | ||||
@unittest.skipIf(not Py36, "Required re syntax only exists in python3.6+") | |||||
def test_join_regex_flags(self): | |||||
g = r""" | |||||
start: A | |||||
A: B C | |||||
B: /./s | |||||
C: /./ | |||||
""" | |||||
g = _Lark(g) | |||||
self.assertEqual(g.parse(" ").children,[" "]) | |||||
self.assertEqual(g.parse("\n ").children,["\n "]) | |||||
self.assertRaises(UnexpectedCharacters, g.parse, "\n\n") | |||||
g = r""" | |||||
start: A | |||||
A: B | C | |||||
B: "b"i | |||||
C: "c" | |||||
""" | |||||
g = _Lark(g) | |||||
self.assertEqual(g.parse("b").children,["b"]) | |||||
self.assertEqual(g.parse("B").children,["B"]) | |||||
self.assertEqual(g.parse("c").children,["c"]) | |||||
self.assertRaises(UnexpectedCharacters, g.parse, "C") | |||||
def test_lexer_token_limit(self): | def test_lexer_token_limit(self): | ||||