| @@ -149,16 +149,16 @@ COMMENT: /#[^\n]*/ | |||||
| _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ | _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ | ||||
| %ignore /[\t \f]+/ // WS | %ignore /[\t \f]+/ // WS | ||||
| %ignore /\\[\t \f]*\r?\n/ // LINE_CONT | |||||
| %ignore /\\[\t \f]*\r?\n/ // LINE_CONT | |||||
| %ignore COMMENT | %ignore COMMENT | ||||
| STRING : /(?i)[ub]?r?("(?!"").*?(?<!\\\\)(\\\\\\\\)*?"|'(?!'').*?(?<!\\\\)(\\\\\\\\)*?')/ | |||||
| LONG_STRING: /(?i)(?s)[ub]?r?(""".*?(?<!\\\\)(\\\\\\\\)*?"""|'''.*?(?<!\\\\)(\\\\\\\\)*?''')/ | |||||
| STRING : /[ub]?r?("(?!"").*?(?<!\\\\)(\\\\\\\\)*?"|'(?!'').*?(?<!\\\\)(\\\\\\\\)*?')/i | |||||
| LONG_STRING.2: /[ub]?r?(""".*?(?<!\\\\)(\\\\\\\\)*?"""|'''.*?(?<!\\\\)(\\\\\\\\)*?''')/is | |||||
| DEC_NUMBER: /(?i)[1-9]\d*l?/ | |||||
| HEX_NUMBER: /(?i)0x[\da-f]*l?/ | |||||
| OCT_NUMBER: /(?i)0o?[0-7]*l?/ | |||||
| DEC_NUMBER: /[1-9]\d*l?/i | |||||
| HEX_NUMBER: /0x[\da-f]*l?/i | |||||
| OCT_NUMBER: /0o?[0-7]*l?/i | |||||
| %import common.FLOAT -> FLOAT | %import common.FLOAT -> FLOAT | ||||
| %import common.INT -> _INT | %import common.INT -> _INT | ||||
| %import common.CNAME -> NAME | %import common.CNAME -> NAME | ||||
| @@ -177,14 +177,14 @@ _NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+ | |||||
| // STRING : /[ub]?r?("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/ | // STRING : /[ub]?r?("(?!"").*?(?<!\\)(\\\\)*?"|'(?!'').*?(?<!\\)(\\\\)*?')/ | ||||
| // LONG_STRING: /(?s)[ub]?r?(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/ | // LONG_STRING: /(?s)[ub]?r?(""".*?(?<!\\)(\\\\)*?"""|'''.*?(?<!\\)(\\\\)*?''')/ | ||||
| STRING : /(?i)[ubf]?r?("(?!"").*?(?<!\\\\)(\\\\\\\\)*?"|'(?!'').*?(?<!\\\\)(\\\\\\\\)*?')/ | |||||
| LONG_STRING: /(?i)(?s)[ubf]?r?(""".*?(?<!\\\\)(\\\\\\\\)*?"""|'''.*?(?<!\\\\)(\\\\\\\\)*?''')/ | |||||
| DEC_NUMBER: /(?i)[1-9]\d*l?/ | |||||
| HEX_NUMBER: /(?i)0x[\da-f]*l?/ | |||||
| OCT_NUMBER: /(?i)0o?[0-7]*l?/ | |||||
| FLOAT_NUMBER: /(?i)((\d+\.\d*|\.\d+)(e[-+]?\d+)?|\d+(e[-+]?\d+))/ | |||||
| IMAG_NUMBER: /(?i)\d+j|${FLOAT_NUMBER}j/ | |||||
| STRING : /[ubf]?r?("(?!"").*?(?<!\\\\)(\\\\\\\\)*?"|'(?!'').*?(?<!\\\\)(\\\\\\\\)*?')/i | |||||
| LONG_STRING: /[ubf]?r?(""".*?(?<!\\\\)(\\\\\\\\)*?"""|'''.*?(?<!\\\\)(\\\\\\\\)*?''')/is | |||||
| DEC_NUMBER: /[1-9]\d*l?/i | |||||
| HEX_NUMBER: /0x[\da-f]*l?/i | |||||
| OCT_NUMBER: /0o?[0-7]*l?/i | |||||
| FLOAT_NUMBER: /((\d+\.\d*|\.\d+)(e[-+]?\d+)?|\d+(e[-+]?\d+))/i | |||||
| IMAG_NUMBER: /\d+j|${FLOAT_NUMBER}j/i | |||||
| _DEDENT: "<DEDENT>" | _DEDENT: "<DEDENT>" | ||||
| _INDENT: "<INDENT>" | _INDENT: "<INDENT>" | ||||
| @@ -68,16 +68,14 @@ class Pattern(object): | |||||
| if Py36: | if Py36: | ||||
| # Python 3.6 changed syntax for flags in regular expression | # Python 3.6 changed syntax for flags in regular expression | ||||
| def _get_flags(self, value): | def _get_flags(self, value): | ||||
| if self.flags: | |||||
| assert len(self.flags) == 1 | |||||
| return ('(?%s:%s)' % (self.flags[0], value)) | |||||
| for f in self.flags or (): | |||||
| value = ('(?%s:%s)' % (f, value)) | |||||
| return value | return value | ||||
| else: | else: | ||||
| def _get_flags(self, value): | def _get_flags(self, value): | ||||
| if self.flags: | |||||
| assert len(self.flags) == 1 | |||||
| return ('(?%s)' % self.flags) + value | |||||
| for f in self.flags or (): | |||||
| value = ('(?%s)' % f) + value | |||||
| return value | return value | ||||
| class PatternStr(Pattern): | class PatternStr(Pattern): | ||||
| @@ -72,7 +72,7 @@ TOKENS = { | |||||
| 'RULE': '!?[_?]?[a-z][_a-z0-9]*', | 'RULE': '!?[_?]?[a-z][_a-z0-9]*', | ||||
| 'TOKEN': '_?[A-Z][_A-Z0-9]*', | 'TOKEN': '_?[A-Z][_A-Z0-9]*', | ||||
| 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?', | 'STRING': r'"(\\"|\\\\|[^"\n])*?"i?', | ||||
| 'REGEXP': r'/(?!/)(\\/|\\\\|[^/\n])*?/[%s]?' % _RE_FLAGS, | |||||
| 'REGEXP': r'/(?!/)(\\/|\\\\|[^/\n])*?/[%s]*' % _RE_FLAGS, | |||||
| '_NL': r'(\r?\n)+\s*', | '_NL': r'(\r?\n)+\s*', | ||||
| 'WS': r'[ \t]+', | 'WS': r'[ \t]+', | ||||
| 'COMMENT': r'//[^\n]*', | 'COMMENT': r'//[^\n]*', | ||||
| @@ -287,21 +287,28 @@ class ExtractAnonTokens(InlineTransformer): | |||||
| return Token('TOKEN', token_name, -1) | return Token('TOKEN', token_name, -1) | ||||
| def _rfind(s, choices): | |||||
| return max(s.rfind(c) for c in choices) | |||||
| def _literal_to_pattern(literal): | def _literal_to_pattern(literal): | ||||
| v = literal.value | v = literal.value | ||||
| if v[-1] in _RE_FLAGS: | |||||
| flags = v[-1] | |||||
| v = v[:-1] | |||||
| else: | |||||
| flags = None | |||||
| flag_start = _rfind(v, '/"')+1 | |||||
| assert flag_start > 0 | |||||
| flags = v[flag_start:] | |||||
| assert all(f in _RE_FLAGS for f in flags), flags | |||||
| v = v[:flag_start] | |||||
| assert v[0] == v[-1] and v[0] in '"/' | assert v[0] == v[-1] and v[0] in '"/' | ||||
| x = v[1:-1] | x = v[1:-1] | ||||
| x = re.sub(r'(\\[wd/ ]|\\\[|\\\])', r'\\\1', x) | |||||
| x = re.sub(r'(\\[wd/ .]|\\\[|\\\])', r'\\\1', x) | |||||
| x = x.replace("'", r"\'") | x = x.replace("'", r"\'") | ||||
| s = literal_eval("u'''%s'''" % x) | |||||
| to_eval = "u'''%s'''" % x | |||||
| try: | |||||
| s = literal_eval(to_eval) | |||||
| except SyntaxError as e: | |||||
| raise ValueError(v, e) | |||||
| return { 'STRING': PatternStr, | return { 'STRING': PatternStr, | ||||
| 'REGEXP': PatternRE }[literal.type](s, flags) | |||||
| 'REGEXP': PatternRE }[literal.type](s, flags or None) | |||||
| class PrepareLiterals(InlineTransformer): | class PrepareLiterals(InlineTransformer): | ||||