|
@@ -92,7 +92,7 @@ class TerminalDef(Serialize): |
|
|
|
|
|
|
|
|
def __repr__(self): |
|
|
def __repr__(self): |
|
|
return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) |
|
|
return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def user_repr(self): |
|
|
def user_repr(self): |
|
|
if self.name.startswith('__'): # We represent a generated terminal |
|
|
if self.name.startswith('__'): # We represent a generated terminal |
|
|
return self.pattern.raw or self.name |
|
|
return self.pattern.raw or self.name |
|
@@ -317,7 +317,7 @@ class TraditionalLexer(Lexer): |
|
|
self.user_callbacks = conf.callbacks |
|
|
self.user_callbacks = conf.callbacks |
|
|
self.g_regex_flags = conf.g_regex_flags |
|
|
self.g_regex_flags = conf.g_regex_flags |
|
|
self.use_bytes = conf.use_bytes |
|
|
self.use_bytes = conf.use_bytes |
|
|
self.terminals_by_names = conf.terminals_by_names |
|
|
|
|
|
|
|
|
self.terminals_by_name = conf.terminals_by_name |
|
|
|
|
|
|
|
|
self._mres = None |
|
|
self._mres = None |
|
|
|
|
|
|
|
@@ -361,7 +361,7 @@ class TraditionalLexer(Lexer): |
|
|
allowed = {"<END-OF-FILE>"} |
|
|
allowed = {"<END-OF-FILE>"} |
|
|
raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column, |
|
|
raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column, |
|
|
allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token], |
|
|
allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token], |
|
|
state=parser_state, terminals_by_name=self.terminals_by_names) |
|
|
|
|
|
|
|
|
state=parser_state, terminals_by_name=self.terminals_by_name) |
|
|
|
|
|
|
|
|
value, type_ = res |
|
|
value, type_ = res |
|
|
|
|
|
|
|
@@ -403,7 +403,7 @@ class ContextualLexer(Lexer): |
|
|
|
|
|
|
|
|
def __init__(self, conf, states, always_accept=()): |
|
|
def __init__(self, conf, states, always_accept=()): |
|
|
terminals = list(conf.terminals) |
|
|
terminals = list(conf.terminals) |
|
|
tokens_by_name = conf.terminals_by_names |
|
|
|
|
|
|
|
|
terminals_by_name = conf.terminals_by_name |
|
|
|
|
|
|
|
|
trad_conf = copy(conf) |
|
|
trad_conf = copy(conf) |
|
|
trad_conf.terminals = terminals |
|
|
trad_conf.terminals = terminals |
|
@@ -416,9 +416,8 @@ class ContextualLexer(Lexer): |
|
|
lexer = lexer_by_tokens[key] |
|
|
lexer = lexer_by_tokens[key] |
|
|
except KeyError: |
|
|
except KeyError: |
|
|
accepts = set(accepts) | set(conf.ignore) | set(always_accept) |
|
|
accepts = set(accepts) | set(conf.ignore) | set(always_accept) |
|
|
state_tokens = [tokens_by_name[n] for n in accepts if n and n in tokens_by_name] |
|
|
|
|
|
lexer_conf = copy(trad_conf) |
|
|
lexer_conf = copy(trad_conf) |
|
|
lexer_conf.terminals = state_tokens |
|
|
|
|
|
|
|
|
lexer_conf.terminals = [terminals_by_name[n] for n in accepts if n in terminals_by_name] |
|
|
lexer = TraditionalLexer(lexer_conf) |
|
|
lexer = TraditionalLexer(lexer_conf) |
|
|
lexer_by_tokens[key] = lexer |
|
|
lexer_by_tokens[key] = lexer |
|
|
|
|
|
|
|
@@ -440,13 +439,12 @@ class ContextualLexer(Lexer): |
|
|
except UnexpectedCharacters as e: |
|
|
except UnexpectedCharacters as e: |
|
|
# In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context. |
|
|
# In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context. |
|
|
# This tests the input against the global context, to provide a nicer error. |
|
|
# This tests the input against the global context, to provide a nicer error. |
|
|
last_token = lexer_state.last_token # self.root_lexer.next_token will change this to the wrong token |
|
|
|
|
|
try: |
|
|
try: |
|
|
|
|
|
last_token = lexer_state.last_token # Save last_token. Calling root_lexer.next_token will change this to the wrong token |
|
|
token = self.root_lexer.next_token(lexer_state, parser_state) |
|
|
token = self.root_lexer.next_token(lexer_state, parser_state) |
|
|
|
|
|
raise UnexpectedToken(token, e.allowed, state=parser_state, token_history=[last_token], terminals_by_name=self.root_lexer.terminals_by_name) |
|
|
except UnexpectedCharacters: |
|
|
except UnexpectedCharacters: |
|
|
raise e# Don't raise the exception that the root lexer raise. It has the wrong expected set. |
|
|
|
|
|
else: |
|
|
|
|
|
raise UnexpectedToken(token, e.allowed, state=parser_state, token_history=[last_token], terminals_by_name=self.root_lexer.terminals_by_names) |
|
|
|
|
|
|
|
|
raise e # Raise the original UnexpectedCharacters. The root lexer raises it with the wrong expected set. |
|
|
|
|
|
|
|
|
class LexerThread: |
|
|
class LexerThread: |
|
|
"""A thread that ties a lexer instance and a lexer state, to be used by the parser""" |
|
|
"""A thread that ties a lexer instance and a lexer state, to be used by the parser""" |
|
|