From 2efbc08854fe5a468b6bbda10bf038d06a8aac00 Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Thu, 16 Nov 2017 09:47:46 +0200 Subject: [PATCH] Support for multiple flags --- examples/python2.g | 12 ++++++------ examples/python3.g | 16 ++++++++-------- lark/common.py | 10 ++++------ lark/load_grammar.py | 25 ++++++++++++++++--------- 4 files changed, 34 insertions(+), 29 deletions(-) diff --git a/examples/python2.g b/examples/python2.g index a429512..aa5dd9e 100644 --- a/examples/python2.g +++ b/examples/python2.g @@ -149,16 +149,16 @@ COMMENT: /#[^\n]*/ _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ %ignore /[\t \f]+/ // WS -%ignore /\\[\t \f]*\r?\n/ // LINE_CONT +%ignore /\\[\t \f]*\r?\n/ // LINE_CONT %ignore COMMENT -STRING : /(?i)[ub]?r?("(?!"").*?(? FLOAT %import common.INT -> _INT %import common.CNAME -> NAME diff --git a/examples/python3.g b/examples/python3.g index c27b7ec..27b0ab4 100644 --- a/examples/python3.g +++ b/examples/python3.g @@ -177,14 +177,14 @@ _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ // STRING : /[ub]?r?("(?!"").*?(?" _INDENT: "" diff --git a/lark/common.py b/lark/common.py index 45aa0cd..50bea90 100644 --- a/lark/common.py +++ b/lark/common.py @@ -68,16 +68,14 @@ class Pattern(object): if Py36: # Python 3.6 changed syntax for flags in regular expression def _get_flags(self, value): - if self.flags: - assert len(self.flags) == 1 - return ('(?%s:%s)' % (self.flags[0], value)) + for f in self.flags or (): + value = ('(?%s:%s)' % (f, value)) return value else: def _get_flags(self, value): - if self.flags: - assert len(self.flags) == 1 - return ('(?%s)' % self.flags) + value + for f in self.flags or (): + value = ('(?%s)' % f) + value return value class PatternStr(Pattern): diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 6f2e102..9bf6f95 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -72,7 +72,7 @@ TOKENS = { 'RULE': '!?[_?]?[a-z][_a-z0-9]*', 'TOKEN': '_?[A-Z][_A-Z0-9]*', 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?', - 'REGEXP': r'/(?!/)(\\/|\\\\|[^/\n])*?/[%s]?' % _RE_FLAGS, + 'REGEXP': r'/(?!/)(\\/|\\\\|[^/\n])*?/[%s]*' % _RE_FLAGS, '_NL': r'(\r?\n)+\s*', 'WS': r'[ \t]+', 'COMMENT': r'//[^\n]*', @@ -287,21 +287,28 @@ class ExtractAnonTokens(InlineTransformer): return Token('TOKEN', token_name, -1) +def _rfind(s, choices): + return max(s.rfind(c) for c in choices) + def _literal_to_pattern(literal): v = literal.value - if v[-1] in _RE_FLAGS: - flags = v[-1] - v = v[:-1] - else: - flags = None + flag_start = _rfind(v, '/"')+1 + assert flag_start > 0 + flags = v[flag_start:] + assert all(f in _RE_FLAGS for f in flags), flags + v = v[:flag_start] assert v[0] == v[-1] and v[0] in '"/' x = v[1:-1] - x = re.sub(r'(\\[wd/ ]|\\\[|\\\])', r'\\\1', x) + x = re.sub(r'(\\[wd/ .]|\\\[|\\\])', r'\\\1', x) x = x.replace("'", r"\'") - s = literal_eval("u'''%s'''" % x) + to_eval = "u'''%s'''" % x + try: + s = literal_eval(to_eval) + except SyntaxError as e: + raise ValueError(v, e) return { 'STRING': PatternStr, - 'REGEXP': PatternRE }[literal.type](s, flags) + 'REGEXP': PatternRE }[literal.type](s, flags or None) class PrepareLiterals(InlineTransformer):