|
|
@@ -3,7 +3,7 @@ |
|
|
|
import re |
|
|
|
|
|
|
|
from .utils import Str, classify, get_regexp_width, Py36, Serialize |
|
|
|
from .exceptions import UnexpectedCharacters, LexError |
|
|
|
from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken |
|
|
|
|
|
|
|
###{standalone |
|
|
|
|
|
|
@@ -43,7 +43,7 @@ class PatternStr(Pattern): |
|
|
|
__serialize_fields__ = 'value', 'flags' |
|
|
|
|
|
|
|
type = "str" |
|
|
|
|
|
|
|
|
|
|
|
def to_regexp(self): |
|
|
|
return self._get_flags(re.escape(self.value)) |
|
|
|
|
|
|
@@ -166,37 +166,32 @@ class _Lex: |
|
|
|
|
|
|
|
while line_ctr.char_pos < len(stream): |
|
|
|
lexer = self.lexer |
|
|
|
for mre, type_from_index in lexer.mres: |
|
|
|
m = mre.match(stream, line_ctr.char_pos) |
|
|
|
if not m: |
|
|
|
continue |
|
|
|
|
|
|
|
t = None |
|
|
|
value = m.group(0) |
|
|
|
type_ = type_from_index[m.lastindex] |
|
|
|
if type_ not in ignore_types: |
|
|
|
t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) |
|
|
|
if t.type in lexer.callback: |
|
|
|
t = lexer.callback[t.type](t) |
|
|
|
if not isinstance(t, Token): |
|
|
|
raise ValueError("Callbacks must return a token (returned %r)" % t) |
|
|
|
last_token = t |
|
|
|
yield t |
|
|
|
else: |
|
|
|
if type_ in lexer.callback: |
|
|
|
t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) |
|
|
|
lexer.callback[type_](t) |
|
|
|
|
|
|
|
line_ctr.feed(value, type_ in newline_types) |
|
|
|
if t: |
|
|
|
t.end_line = line_ctr.line |
|
|
|
t.end_column = line_ctr.column |
|
|
|
|
|
|
|
break |
|
|
|
else: |
|
|
|
res = lexer.match(stream, line_ctr.char_pos) |
|
|
|
if not res: |
|
|
|
allowed = {v for m, tfi in lexer.mres for v in tfi.values()} |
|
|
|
raise UnexpectedCharacters(stream, line_ctr.char_pos, line_ctr.line, line_ctr.column, allowed=allowed, state=self.state, token_history=last_token and [last_token]) |
|
|
|
|
|
|
|
value, type_ = res |
|
|
|
|
|
|
|
t = None |
|
|
|
if type_ not in ignore_types: |
|
|
|
t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) |
|
|
|
if t.type in lexer.callback: |
|
|
|
t = lexer.callback[t.type](t) |
|
|
|
if not isinstance(t, Token): |
|
|
|
raise ValueError("Callbacks must return a token (returned %r)" % t) |
|
|
|
last_token = t |
|
|
|
yield t |
|
|
|
else: |
|
|
|
if type_ in lexer.callback: |
|
|
|
t = Token(type_, value, line_ctr.char_pos, line_ctr.line, line_ctr.column) |
|
|
|
lexer.callback[type_](t) |
|
|
|
|
|
|
|
line_ctr.feed(value, type_ in newline_types) |
|
|
|
if t: |
|
|
|
t.end_line = line_ctr.line |
|
|
|
t.end_column = line_ctr.column |
|
|
|
|
|
|
|
|
|
|
|
class UnlessCallback: |
|
|
|
def __init__(self, mres): |
|
|
@@ -330,6 +325,11 @@ class TraditionalLexer(Lexer): |
|
|
|
|
|
|
|
self.mres = build_mres(terminals) |
|
|
|
|
|
|
|
def match(self, stream, pos): |
|
|
|
for mre, type_from_index in self.mres: |
|
|
|
m = mre.match(stream, pos) |
|
|
|
if m: |
|
|
|
return m.group(0), type_from_index[m.lastindex] |
|
|
|
|
|
|
|
def lex(self, stream): |
|
|
|
return _Lex(self).lex(stream, self.newline_types, self.ignore_types) |
|
|
@@ -367,9 +367,22 @@ class ContextualLexer(Lexer): |
|
|
|
|
|
|
|
def lex(self, stream): |
|
|
|
l = _Lex(self.lexers[self.parser_state], self.parser_state) |
|
|
|
for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types): |
|
|
|
yield x |
|
|
|
l.lexer = self.lexers[self.parser_state] |
|
|
|
l.state = self.parser_state |
|
|
|
try: |
|
|
|
for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types): |
|
|
|
yield x |
|
|
|
l.lexer = self.lexers[self.parser_state] |
|
|
|
l.state = self.parser_state |
|
|
|
except UnexpectedCharacters as e: |
|
|
|
# In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, |
|
|
|
# but not in the current context. |
|
|
|
# This tests the input against the global context, to provide a nicer error. |
|
|
|
root_match = self.root_lexer.match(stream, e.pos_in_stream) |
|
|
|
if not root_match: |
|
|
|
raise |
|
|
|
|
|
|
|
value, type_ = root_match |
|
|
|
t = Token(type_, value, e.pos_in_stream, e.line, e.column) |
|
|
|
expected = {v for m, tfi in l.lexer.mres for v in tfi.values()} |
|
|
|
raise UnexpectedToken(t, expected) |
|
|
|
|
|
|
|
###} |