diff --git a/lark-stubs/lexer.pyi b/lark-stubs/lexer.pyi index a654b0f..3f246fb 100644 --- a/lark-stubs/lexer.pyi +++ b/lark-stubs/lexer.pyi @@ -139,7 +139,7 @@ class TraditionalLexer(Lexer): def lex(self, stream: str) -> Iterator[Token]: ... - def next_token(self, lex_state: Any) -> Token: + def next_token(self, lex_state: Any, parser_state: Any = None) -> Token: ... class ContextualLexer(Lexer): diff --git a/lark/lexer.py b/lark/lexer.py index 4c420e7..6d69ec9 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -338,12 +338,12 @@ class TraditionalLexer(Lexer): if m: return m.group(0), type_from_index[m.lastindex] - def lex(self, state, _parser_state): + def lex(self, state, parser_state): with suppress(EOFError): while True: - yield self.next_token(state) + yield self.next_token(state, parser_state) - def next_token(self, lex_state): + def next_token(self, lex_state, parser_state=None): line_ctr = lex_state.line_ctr while line_ctr.char_pos < len(lex_state.text): res = self.match(lex_state.text, line_ctr.char_pos) @@ -352,7 +352,8 @@ class TraditionalLexer(Lexer): if not allowed: allowed = {""} raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column, - allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token]) + allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token], + state=(parser_state and parser_state.position)) value, type_ = res @@ -428,13 +429,13 @@ class ContextualLexer(Lexer): try: while True: lexer = self.lexers[parser_state.position] - yield lexer.next_token(lexer_state) + yield lexer.next_token(lexer_state, parser_state) except EOFError: pass except UnexpectedCharacters as e: # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context. # This tests the input against the global context, to provide a nicer error. - token = self.root_lexer.next_token(lexer_state) + token = self.root_lexer.next_token(lexer_state, parser_state) raise UnexpectedToken(token, e.allowed, state=parser_state.position)