@@ -149,16 +149,16 @@ COMMENT: /#[^\n]*/ | |||||
_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ | _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ | ||||
%ignore /[\t \f]+/ // WS | %ignore /[\t \f]+/ // WS | ||||
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT | |||||
%ignore /\\[\t \f]*\r?\n/ // LINE_CONT | |||||
%ignore COMMENT | %ignore COMMENT | ||||
STRING : /(?i)[ub]?r?("(?!"").*?(?<!\\\\)(\\\\\\\\)*?"|'(?!'').*?(?<!\\\\)(\\\\\\\\)*?')/ | |||||
LONG_STRING: /(?i)(?s)[ub]?r?(""".*?(?<!\\\\)(\\\\\\\\)*?"""|'''.*?(?<!\\\\)(\\\\\\\\)*?''')/ | |||||
STRING : /[ub]?r?("(?!"").*?(?<!\\\\)(\\\\\\\\)*?"|'(?!'').*?(?<!\\\\)(\\\\\\\\)*?')/i | |||||
LONG_STRING.2: /[ub]?r?(""".*?(?<!\\\\)(\\\\\\\\)*?"""|'''.*?(?<!\\\\)(\\\\\\\\)*?''')/is | |||||
DEC_NUMBER: /(?i)[1-9]\d*l?/ | |||||
HEX_NUMBER: /(?i)0x[\da-f]*l?/ | |||||
OCT_NUMBER: /(?i)0o?[0-7]*l?/ | |||||
DEC_NUMBER: /[1-9]\d*l?/i | |||||
HEX_NUMBER: /0x[\da-f]*l?/i | |||||
OCT_NUMBER: /0o?[0-7]*l?/i | |||||
%import common.FLOAT -> FLOAT | %import common.FLOAT -> FLOAT | ||||
%import common.INT -> _INT | %import common.INT -> _INT | ||||
%import common.CNAME -> NAME | %import common.CNAME -> NAME | ||||
@@ -177,14 +177,14 @@ _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ | |||||
// STRING : /[ub]?r?("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/ | // STRING : /[ub]?r?("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/ | ||||
// LONG_STRING: /(?s)[ub]?r?(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/ | // LONG_STRING: /(?s)[ub]?r?(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/ | ||||
STRING : /(?i)[ubf]?r?("(?!"").*?(?<!\\\\)(\\\\\\\\)*?"|'(?!'').*?(?<!\\\\)(\\\\\\\\)*?')/ | |||||
LONG_STRING: /(?i)(?s)[ubf]?r?(""".*?(?<!\\\\)(\\\\\\\\)*?"""|'''.*?(?<!\\\\)(\\\\\\\\)*?''')/ | |||||
DEC_NUMBER: /(?i)[1-9]\d*l?/ | |||||
HEX_NUMBER: /(?i)0x[\da-f]*l?/ | |||||
OCT_NUMBER: /(?i)0o?[0-7]*l?/ | |||||
FLOAT_NUMBER: /(?i)((\d+\.\d*|\.\d+)(e[-+]?\d+)?|\d+(e[-+]?\d+))/ | |||||
IMAG_NUMBER: /(?i)\d+j|${FLOAT_NUMBER}j/ | |||||
STRING : /[ubf]?r?("(?!"").*?(?<!\\\\)(\\\\\\\\)*?"|'(?!'').*?(?<!\\\\)(\\\\\\\\)*?')/i | |||||
LONG_STRING: /[ubf]?r?(""".*?(?<!\\\\)(\\\\\\\\)*?"""|'''.*?(?<!\\\\)(\\\\\\\\)*?''')/is | |||||
DEC_NUMBER: /[1-9]\d*l?/i | |||||
HEX_NUMBER: /0x[\da-f]*l?/i | |||||
OCT_NUMBER: /0o?[0-7]*l?/i | |||||
FLOAT_NUMBER: /((\d+\.\d*|\.\d+)(e[-+]?\d+)?|\d+(e[-+]?\d+))/i | |||||
IMAG_NUMBER: /\d+j|${FLOAT_NUMBER}j/i | |||||
_DEDENT: "<DEDENT>" | _DEDENT: "<DEDENT>" | ||||
_INDENT: "<INDENT>" | _INDENT: "<INDENT>" | ||||
@@ -68,16 +68,14 @@ class Pattern(object): | |||||
if Py36: | if Py36: | ||||
# Python 3.6 changed syntax for flags in regular expression | # Python 3.6 changed syntax for flags in regular expression | ||||
def _get_flags(self, value): | def _get_flags(self, value): | ||||
if self.flags: | |||||
assert len(self.flags) == 1 | |||||
return ('(?%s:%s)' % (self.flags[0], value)) | |||||
for f in self.flags or (): | |||||
value = ('(?%s:%s)' % (f, value)) | |||||
return value | return value | ||||
else: | else: | ||||
def _get_flags(self, value): | def _get_flags(self, value): | ||||
if self.flags: | |||||
assert len(self.flags) == 1 | |||||
return ('(?%s)' % self.flags) + value | |||||
for f in self.flags or (): | |||||
value = ('(?%s)' % f) + value | |||||
return value | return value | ||||
class PatternStr(Pattern): | class PatternStr(Pattern): | ||||
@@ -72,7 +72,7 @@ TOKENS = { | |||||
'RULE': '!?[_?]?[a-z][_a-z0-9]*', | 'RULE': '!?[_?]?[a-z][_a-z0-9]*', | ||||
'TOKEN': '_?[A-Z][_A-Z0-9]*', | 'TOKEN': '_?[A-Z][_A-Z0-9]*', | ||||
'STRING': r'"(\\"|\\\\|[^"\n])*?"i?', | 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?', | ||||
'REGEXP': r'/(?!/)(\\/|\\\\|[^/\n])*?/[%s]?' % _RE_FLAGS, | |||||
'REGEXP': r'/(?!/)(\\/|\\\\|[^/\n])*?/[%s]*' % _RE_FLAGS, | |||||
'_NL': r'(\r?\n)+\s*', | '_NL': r'(\r?\n)+\s*', | ||||
'WS': r'[ \t]+', | 'WS': r'[ \t]+', | ||||
'COMMENT': r'//[^\n]*', | 'COMMENT': r'//[^\n]*', | ||||
@@ -287,21 +287,28 @@ class ExtractAnonTokens(InlineTransformer): | |||||
return Token('TOKEN', token_name, -1) | return Token('TOKEN', token_name, -1) | ||||
def _rfind(s, choices): | |||||
return max(s.rfind(c) for c in choices) | |||||
def _literal_to_pattern(literal): | def _literal_to_pattern(literal): | ||||
v = literal.value | v = literal.value | ||||
if v[-1] in _RE_FLAGS: | |||||
flags = v[-1] | |||||
v = v[:-1] | |||||
else: | |||||
flags = None | |||||
flag_start = _rfind(v, '/"')+1 | |||||
assert flag_start > 0 | |||||
flags = v[flag_start:] | |||||
assert all(f in _RE_FLAGS for f in flags), flags | |||||
v = v[:flag_start] | |||||
assert v[0] == v[-1] and v[0] in '"/' | assert v[0] == v[-1] and v[0] in '"/' | ||||
x = v[1:-1] | x = v[1:-1] | ||||
x = re.sub(r'(\\[wd/ ]|\\\[|\\\])', r'\\\1', x) | |||||
x = re.sub(r'(\\[wd/ .]|\\\[|\\\])', r'\\\1', x) | |||||
x = x.replace("'", r"\'") | x = x.replace("'", r"\'") | ||||
s = literal_eval("u'''%s'''" % x) | |||||
to_eval = "u'''%s'''" % x | |||||
try: | |||||
s = literal_eval(to_eval) | |||||
except SyntaxError as e: | |||||
raise ValueError(v, e) | |||||
return { 'STRING': PatternStr, | return { 'STRING': PatternStr, | ||||
'REGEXP': PatternRE }[literal.type](s, flags) | |||||
'REGEXP': PatternRE }[literal.type](s, flags or None) | |||||
class PrepareLiterals(InlineTransformer): | class PrepareLiterals(InlineTransformer): | ||||