Browse Source

Fixed multithreading bug in ContextualLexer (Issue #493)

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.8.0
Erez Sh 5 years ago
parent
commit
8842928963
2 changed files with 19 additions and 16 deletions
  1. +7
    -12
      lark/lexer.py
  2. +12
    -4
      lark/parser_frontends.py

+ 7
- 12
lark/lexer.py View File

@@ -288,10 +288,7 @@ class Lexer(object):

Method Signatures:
lex(self, stream) -> Iterator[Token]

set_parser_state(self, state) # Optional
"""
set_parser_state = NotImplemented
lex = NotImplemented


@@ -349,6 +346,7 @@ class TraditionalLexer(Lexer):


class ContextualLexer(Lexer):

def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}):
tokens_by_name = {}
for t in terminals:
@@ -371,18 +369,15 @@ class ContextualLexer(Lexer):

self.root_lexer = TraditionalLexer(terminals, ignore=ignore, user_callbacks=user_callbacks)

self.set_parser_state(None) # Needs to be set on the outside

def set_parser_state(self, state):
self.parser_state = state

def lex(self, stream):
l = _Lex(self.lexers[self.parser_state], self.parser_state)
def lex(self, stream, get_parser_state):
parser_state = get_parser_state()
l = _Lex(self.lexers[parser_state], parser_state)
try:
for x in l.lex(stream, self.root_lexer.newline_types, self.root_lexer.ignore_types):
yield x
l.lexer = self.lexers[self.parser_state]
l.state = self.parser_state
parser_state = get_parser_state()
l.lexer = self.lexers[parser_state]
l.state = parser_state # For debug only, no need to worry about multithreading
except UnexpectedCharacters as e:
# In the contextual lexer, UnexpectedCharacters can mean that the terminal is defined,
# but not in the current context.


+ 12
- 4
lark/parser_frontends.py View File

@@ -79,14 +79,13 @@ class WithLexer(_ParserFrontend):
def _serialize(self, data, memo):
data['parser'] = data['parser'].serialize(memo)

def lex(self, text):
stream = self.lexer.lex(text)
def lex(self, *args):
stream = self.lexer.lex(*args)
return self.postlex.process(stream) if self.postlex else stream

def parse(self, text, start=None):
token_stream = self.lex(text)
sps = self.lexer.set_parser_state
return self._parse(token_stream, start, *[sps] if sps is not NotImplemented else [])
return self._parse(token_stream, start)

def init_traditional_lexer(self):
self.lexer = TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks)
@@ -114,6 +113,15 @@ class LALR_ContextualLexer(LALR_WithLexer):
ignore=self.lexer_conf.ignore,
always_accept=always_accept,
user_callbacks=self.lexer_conf.callbacks)


def parse(self, text, start=None):
parser_state = [None]
def set_parser_state(s):
parser_state[0] = s

token_stream = self.lex(text, lambda: parser_state[0])
return self._parse(token_stream, start, set_parser_state)
###}

class LALR_CustomLexer(LALR_WithLexer):


Loading…
Cancel
Save