Browse Source

regression-fix for #760

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.11.2
MegaIng1 3 years ago
parent
commit
4e442bc0b8
2 changed files with 8 additions and 7 deletions
  1. +1
    -1
      lark-stubs/lexer.pyi
  2. +7
    -6
      lark/lexer.py

+ 1
- 1
lark-stubs/lexer.pyi View File

@@ -139,7 +139,7 @@ class TraditionalLexer(Lexer):
def lex(self, stream: str) -> Iterator[Token]: def lex(self, stream: str) -> Iterator[Token]:
... ...


def next_token(self, lex_state: Any) -> Token:
def next_token(self, lex_state: Any, parser_state: Any = None) -> Token:
... ...


class ContextualLexer(Lexer): class ContextualLexer(Lexer):


+ 7
- 6
lark/lexer.py View File

@@ -338,12 +338,12 @@ class TraditionalLexer(Lexer):
if m: if m:
return m.group(0), type_from_index[m.lastindex] return m.group(0), type_from_index[m.lastindex]


def lex(self, state, _parser_state):
def lex(self, state, parser_state):
with suppress(EOFError): with suppress(EOFError):
while True: while True:
yield self.next_token(state)
yield self.next_token(state, parser_state)


def next_token(self, lex_state):
def next_token(self, lex_state, parser_state=None):
line_ctr = lex_state.line_ctr line_ctr = lex_state.line_ctr
while line_ctr.char_pos < len(lex_state.text): while line_ctr.char_pos < len(lex_state.text):
res = self.match(lex_state.text, line_ctr.char_pos) res = self.match(lex_state.text, line_ctr.char_pos)
@@ -352,7 +352,8 @@ class TraditionalLexer(Lexer):
if not allowed: if not allowed:
allowed = {"<END-OF-FILE>"} allowed = {"<END-OF-FILE>"}
raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column, raise UnexpectedCharacters(lex_state.text, line_ctr.char_pos, line_ctr.line, line_ctr.column,
allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token])
allowed=allowed, token_history=lex_state.last_token and [lex_state.last_token],
state=(parser_state and parser_state.position))


value, type_ = res value, type_ = res


@@ -428,13 +429,13 @@ class ContextualLexer(Lexer):
try: try:
while True: while True:
lexer = self.lexers[parser_state.position] lexer = self.lexers[parser_state.position]
yield lexer.next_token(lexer_state)
yield lexer.next_token(lexer_state, parser_state)
except EOFError: except EOFError:
pass pass
except UnexpectedCharacters as e: except UnexpectedCharacters as e:
# In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context. # In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined, but not in the current context.
# This tests the input against the global context, to provide a nicer error. # This tests the input against the global context, to provide a nicer error.
token = self.root_lexer.next_token(lexer_state)
token = self.root_lexer.next_token(lexer_state, parser_state)
raise UnexpectedToken(token, e.allowed, state=parser_state.position) raise UnexpectedToken(token, e.allowed, state=parser_state.position)






Loading…
Cancel
Save