- Refactored lexer interface into LexerConf - Lexer now compiles regexps only when used (especially useful for ContextualLexer) - Lexer now doesn't validate on deserialize (noticable speedup)tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.10.0
| @@ -26,6 +26,13 @@ python_parser2 = Lark.open('python2.lark', parser='lalr', **kwargs) | |||||
| python_parser3 = Lark.open('python3.lark',parser='lalr', **kwargs) | python_parser3 = Lark.open('python3.lark',parser='lalr', **kwargs) | ||||
| python_parser2_earley = Lark.open('python2.lark', parser='earley', lexer='standard', **kwargs) | python_parser2_earley = Lark.open('python2.lark', parser='earley', lexer='standard', **kwargs) | ||||
| try: | |||||
| xrange | |||||
| except NameError: | |||||
| chosen_parser = python_parser3 | |||||
| else: | |||||
| chosen_parser = python_parser2 | |||||
| def _read(fn, *args): | def _read(fn, *args): | ||||
| kwargs = {'encoding': 'iso-8859-1'} | kwargs = {'encoding': 'iso-8859-1'} | ||||
| @@ -42,24 +49,13 @@ def _get_lib_path(): | |||||
| return [x for x in sys.path if x.endswith('%s.%s' % sys.version_info[:2])][0] | return [x for x in sys.path if x.endswith('%s.%s' % sys.version_info[:2])][0] | ||||
| def test_python_lib(): | def test_python_lib(): | ||||
| path = _get_lib_path() | path = _get_lib_path() | ||||
| start = time.time() | start = time.time() | ||||
| files = glob.glob(path+'/*.py') | files = glob.glob(path+'/*.py') | ||||
| for f in files: | for f in files: | ||||
| print( f ) | print( f ) | ||||
| try: | |||||
| # print list(python_parser.lex(_read(os.path.join(path, f)) + '\n')) | |||||
| try: | |||||
| xrange | |||||
| except NameError: | |||||
| python_parser3.parse(_read(os.path.join(path, f)) + '\n') | |||||
| else: | |||||
| python_parser2.parse(_read(os.path.join(path, f)) + '\n') | |||||
| except: | |||||
| print ('At %s' % f) | |||||
| raise | |||||
| chosen_parser.parse(_read(os.path.join(path, f)) + '\n') | |||||
| end = time.time() | end = time.time() | ||||
| print( "test_python_lib (%d files), time: %s secs"%(len(files), end-start) ) | print( "test_python_lib (%d files), time: %s secs"%(len(files), end-start) ) | ||||
| @@ -7,12 +7,14 @@ class LexerConf(Serialize): | |||||
| __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags' | __serialize_fields__ = 'tokens', 'ignore', 'g_regex_flags' | ||||
| __serialize_namespace__ = TerminalDef, | __serialize_namespace__ = TerminalDef, | ||||
| def __init__(self, tokens, ignore=(), postlex=None, callbacks=None, g_regex_flags=0): | |||||
| self.tokens = tokens | |||||
| def __init__(self, tokens, re_module, ignore=(), postlex=None, callbacks=None, g_regex_flags=0, skip_validation=False): | |||||
| self.tokens = tokens # TODO should be terminals | |||||
| self.ignore = ignore | self.ignore = ignore | ||||
| self.postlex = postlex | self.postlex = postlex | ||||
| self.callbacks = callbacks or {} | self.callbacks = callbacks or {} | ||||
| self.g_regex_flags = g_regex_flags | self.g_regex_flags = g_regex_flags | ||||
| self.re_module = re_module | |||||
| self.skip_validation = skip_validation | |||||
| def _deserialize(self): | def _deserialize(self): | ||||
| self.callbacks = {} # TODO | self.callbacks = {} # TODO | ||||
| @@ -166,11 +166,11 @@ class Lark(Serialize): | |||||
| use_regex = self.options.regex | use_regex = self.options.regex | ||||
| if use_regex: | if use_regex: | ||||
| if regex: | if regex: | ||||
| self.re = regex | |||||
| re_module = regex | |||||
| else: | else: | ||||
| raise ImportError('`regex` module must be installed if calling `Lark(regex=True)`.') | raise ImportError('`regex` module must be installed if calling `Lark(regex=True)`.') | ||||
| else: | else: | ||||
| self.re = re | |||||
| re_module = re | |||||
| # Some, but not all file-like objects have a 'name' attribute | # Some, but not all file-like objects have a 'name' attribute | ||||
| try: | try: | ||||
| @@ -243,7 +243,7 @@ class Lark(Serialize): | |||||
| assert self.options.ambiguity in ('resolve', 'explicit', 'auto', ) | assert self.options.ambiguity in ('resolve', 'explicit', 'auto', ) | ||||
| # Parse the grammar file and compose the grammars (TODO) | # Parse the grammar file and compose the grammars (TODO) | ||||
| self.grammar = load_grammar(grammar, self.source, self.re) | |||||
| self.grammar = load_grammar(grammar, self.source, re_module) | |||||
| # Compile the EBNF grammar into BNF | # Compile the EBNF grammar into BNF | ||||
| self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) | self.terminals, self.rules, self.ignore_tokens = self.grammar.compile(self.options.start) | ||||
| @@ -276,7 +276,7 @@ class Lark(Serialize): | |||||
| if hasattr(t, term.name): | if hasattr(t, term.name): | ||||
| lexer_callbacks[term.name] = getattr(t, term.name) | lexer_callbacks[term.name] = getattr(t, term.name) | ||||
| self.lexer_conf = LexerConf(self.terminals, self.ignore_tokens, self.options.postlex, lexer_callbacks, self.options.g_regex_flags) | |||||
| self.lexer_conf = LexerConf(self.terminals, re_module, self.ignore_tokens, self.options.postlex, lexer_callbacks, self.options.g_regex_flags) | |||||
| if self.options.parser: | if self.options.parser: | ||||
| self.parser = self._build_parser() | self.parser = self._build_parser() | ||||
| @@ -304,7 +304,7 @@ class Lark(Serialize): | |||||
| def _build_parser(self): | def _build_parser(self): | ||||
| self._prepare_callbacks() | self._prepare_callbacks() | ||||
| parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) | parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) | ||||
| return self.parser_class(self.lexer_conf, parser_conf, self.re, options=self.options) | |||||
| return self.parser_class(self.lexer_conf, parser_conf, options=self.options) | |||||
| def save(self, f): | def save(self, f): | ||||
| data, m = self.memo_serialize([TerminalDef, Rule]) | data, m = self.memo_serialize([TerminalDef, Rule]) | ||||
| @@ -331,11 +331,11 @@ class Lark(Serialize): | |||||
| if postlex is not None: | if postlex is not None: | ||||
| options['postlex'] = postlex | options['postlex'] = postlex | ||||
| self.options = LarkOptions.deserialize(options, memo) | self.options = LarkOptions.deserialize(options, memo) | ||||
| self.re = regex if self.options.regex else re | |||||
| re_module = regex if self.options.regex else re | |||||
| self.rules = [Rule.deserialize(r, memo) for r in data['rules']] | self.rules = [Rule.deserialize(r, memo) for r in data['rules']] | ||||
| self.source = '<deserialized>' | self.source = '<deserialized>' | ||||
| self._prepare_callbacks() | self._prepare_callbacks() | ||||
| self.parser = self.parser_class.deserialize(data['parser'], memo, self._callbacks, self.options.postlex, self.re) | |||||
| self.parser = self.parser_class.deserialize(data['parser'], memo, self._callbacks, self.options.postlex, re_module) | |||||
| return self | return self | ||||
| @classmethod | @classmethod | ||||
| @@ -6,6 +6,7 @@ from .utils import Str, classify, get_regexp_width, Py36, Serialize | |||||
| from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken | from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken | ||||
| ###{standalone | ###{standalone | ||||
| from copy import copy | |||||
| class Pattern(Serialize): | class Pattern(Serialize): | ||||
| @@ -88,7 +89,6 @@ class TerminalDef(Serialize): | |||||
| return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) | return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) | ||||
| class Token(Str): | class Token(Str): | ||||
| __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') | __slots__ = ('type', 'pos_in_stream', 'value', 'line', 'column', 'end_line', 'end_column', 'end_pos') | ||||
| @@ -294,35 +294,39 @@ class Lexer(object): | |||||
| class TraditionalLexer(Lexer): | class TraditionalLexer(Lexer): | ||||
| def __init__(self, terminals, re_, ignore=(), user_callbacks={}, g_regex_flags=0): | |||||
| def __init__(self, conf): | |||||
| terminals = list(conf.tokens) | |||||
| assert all(isinstance(t, TerminalDef) for t in terminals), terminals | assert all(isinstance(t, TerminalDef) for t in terminals), terminals | ||||
| terminals = list(terminals) | |||||
| self.re = conf.re_module | |||||
| self.re = re_ | |||||
| # Sanitization | |||||
| for t in terminals: | |||||
| try: | |||||
| self.re.compile(t.pattern.to_regexp(), g_regex_flags) | |||||
| except self.re.error: | |||||
| raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern)) | |||||
| if not conf.skip_validation: | |||||
| # Sanitization | |||||
| for t in terminals: | |||||
| try: | |||||
| self.re.compile(t.pattern.to_regexp(), conf.g_regex_flags) | |||||
| except self.re.error: | |||||
| raise LexError("Cannot compile token %s: %s" % (t.name, t.pattern)) | |||||
| if t.pattern.min_width == 0: | |||||
| raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) | |||||
| if t.pattern.min_width == 0: | |||||
| raise LexError("Lexer does not allow zero-width terminals. (%s: %s)" % (t.name, t.pattern)) | |||||
| assert set(ignore) <= {t.name for t in terminals} | |||||
| assert set(conf.ignore) <= {t.name for t in terminals} | |||||
| # Init | # Init | ||||
| self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())] | self.newline_types = [t.name for t in terminals if _regexp_has_newline(t.pattern.to_regexp())] | ||||
| self.ignore_types = list(ignore) | |||||
| self.ignore_types = list(conf.ignore) | |||||
| terminals.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) | terminals.sort(key=lambda x:(-x.priority, -x.pattern.max_width, -len(x.pattern.value), x.name)) | ||||
| self.terminals = terminals | self.terminals = terminals | ||||
| self.user_callbacks = user_callbacks | |||||
| self.build(g_regex_flags) | |||||
| self.user_callbacks = conf.callbacks | |||||
| self.g_regex_flags = conf.g_regex_flags | |||||
| self._mres = None | |||||
| # self.build(g_regex_flags) | |||||
| def build(self, g_regex_flags=0): | |||||
| terminals, self.callback = _create_unless(self.terminals, g_regex_flags, re_=self.re) | |||||
| def _build(self): | |||||
| terminals, self.callback = _create_unless(self.terminals, self.g_regex_flags, re_=self.re) | |||||
| assert all(self.callback.values()) | assert all(self.callback.values()) | ||||
| for type_, f in self.user_callbacks.items(): | for type_, f in self.user_callbacks.items(): | ||||
| @@ -332,7 +336,13 @@ class TraditionalLexer(Lexer): | |||||
| else: | else: | ||||
| self.callback[type_] = f | self.callback[type_] = f | ||||
| self.mres = build_mres(terminals, g_regex_flags, self.re) | |||||
| self._mres = build_mres(terminals, self.g_regex_flags, self.re) | |||||
| @property | |||||
| def mres(self): | |||||
| if self._mres is None: | |||||
| self._build() | |||||
| return self._mres | |||||
| def match(self, stream, pos): | def match(self, stream, pos): | ||||
| for mre, type_from_index in self.mres: | for mre, type_from_index in self.mres: | ||||
| @@ -348,13 +358,15 @@ class TraditionalLexer(Lexer): | |||||
| class ContextualLexer(Lexer): | class ContextualLexer(Lexer): | ||||
| def __init__(self, terminals, states, re_, ignore=(), always_accept=(), user_callbacks={}, g_regex_flags=0): | |||||
| self.re = re_ | |||||
| def __init__(self, conf, states, always_accept=()): | |||||
| terminals = list(conf.tokens) | |||||
| tokens_by_name = {} | tokens_by_name = {} | ||||
| for t in terminals: | for t in terminals: | ||||
| assert t.name not in tokens_by_name, t | assert t.name not in tokens_by_name, t | ||||
| tokens_by_name[t.name] = t | tokens_by_name[t.name] = t | ||||
| trad_conf = type(conf)(terminals, conf.re_module, conf.ignore, callbacks=conf.callbacks, g_regex_flags=conf.g_regex_flags, skip_validation=conf.skip_validation) | |||||
| lexer_by_tokens = {} | lexer_by_tokens = {} | ||||
| self.lexers = {} | self.lexers = {} | ||||
| for state, accepts in states.items(): | for state, accepts in states.items(): | ||||
| @@ -362,14 +374,17 @@ class ContextualLexer(Lexer): | |||||
| try: | try: | ||||
| lexer = lexer_by_tokens[key] | lexer = lexer_by_tokens[key] | ||||
| except KeyError: | except KeyError: | ||||
| accepts = set(accepts) | set(ignore) | set(always_accept) | |||||
| accepts = set(accepts) | set(conf.ignore) | set(always_accept) | |||||
| state_tokens = [tokens_by_name[n] for n in accepts if n and n in tokens_by_name] | state_tokens = [tokens_by_name[n] for n in accepts if n and n in tokens_by_name] | ||||
| lexer = TraditionalLexer(state_tokens, re_=self.re, ignore=ignore, user_callbacks=user_callbacks, g_regex_flags=g_regex_flags) | |||||
| lexer_conf = copy(trad_conf) | |||||
| lexer_conf.tokens = state_tokens | |||||
| lexer = TraditionalLexer(lexer_conf) | |||||
| lexer_by_tokens[key] = lexer | lexer_by_tokens[key] = lexer | ||||
| self.lexers[state] = lexer | self.lexers[state] = lexer | ||||
| self.root_lexer = TraditionalLexer(terminals, re_=self.re, ignore=ignore, user_callbacks=user_callbacks, g_regex_flags=g_regex_flags) | |||||
| assert trad_conf.tokens is terminals | |||||
| self.root_lexer = TraditionalLexer(trad_conf) | |||||
| def lex(self, stream, get_parser_state): | def lex(self, stream, get_parser_state): | ||||
| parser_state = get_parser_state() | parser_state = get_parser_state() | ||||
| @@ -755,19 +755,19 @@ def _find_used_symbols(tree): | |||||
| for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} | for t in x.scan_values(lambda t: t.type in ('RULE', 'TERMINAL'))} | ||||
| class GrammarLoader: | class GrammarLoader: | ||||
| def __init__(self, re_): | |||||
| self.re = re_ | |||||
| def __init__(self, re_module): | |||||
| terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | terminals = [TerminalDef(name, PatternRE(value)) for name, value in TERMINALS.items()] | ||||
| rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | rules = [options_from_rule(name, None, x) for name, x in RULES.items()] | ||||
| rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, _p, xs, o in rules for i, x in enumerate(xs)] | rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), i, None, o) for r, _p, xs, o in rules for i, x in enumerate(xs)] | ||||
| callback = ParseTreeBuilder(rules, ST).create_callback() | callback = ParseTreeBuilder(rules, ST).create_callback() | ||||
| lexer_conf = LexerConf(terminals, ['WS', 'COMMENT']) | |||||
| lexer_conf = LexerConf(terminals, re_module, ['WS', 'COMMENT']) | |||||
| parser_conf = ParserConf(rules, callback, ['start']) | parser_conf = ParserConf(rules, callback, ['start']) | ||||
| self.parser = LALR_TraditionalLexer(lexer_conf, parser_conf, re_) | |||||
| self.parser = LALR_TraditionalLexer(lexer_conf, parser_conf) | |||||
| self.canonize_tree = CanonizeTree() | self.canonize_tree = CanonizeTree() | ||||
| self.re_module = re_module | |||||
| def load_grammar(self, grammar_text, grammar_name='<?>'): | def load_grammar(self, grammar_text, grammar_name='<?>'): | ||||
| "Parse grammar_text, verify, and create Grammar object. Display nice messages on error." | "Parse grammar_text, verify, and create Grammar object. Display nice messages on error." | ||||
| @@ -863,7 +863,7 @@ class GrammarLoader: | |||||
| # import grammars | # import grammars | ||||
| for dotted_path, (base_paths, aliases) in imports.items(): | for dotted_path, (base_paths, aliases) in imports.items(): | ||||
| grammar_path = os.path.join(*dotted_path) + EXT | grammar_path = os.path.join(*dotted_path) + EXT | ||||
| g = import_grammar(grammar_path, self.re, base_paths=base_paths) | |||||
| g = import_grammar(grammar_path, self.re_module, base_paths=base_paths) | |||||
| new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) | new_td, new_rd = import_from_grammar_into_namespace(g, '__'.join(dotted_path), aliases) | ||||
| term_defs += new_td | term_defs += new_td | ||||
| @@ -62,18 +62,18 @@ class WithLexer(_ParserFrontend): | |||||
| __serialize_fields__ = 'parser', 'lexer_conf', 'start' | __serialize_fields__ = 'parser', 'lexer_conf', 'start' | ||||
| __serialize_namespace__ = LexerConf, | __serialize_namespace__ = LexerConf, | ||||
| def __init__(self, lexer_conf, parser_conf, re_, options=None): | |||||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||||
| self.lexer_conf = lexer_conf | self.lexer_conf = lexer_conf | ||||
| self.start = parser_conf.start | self.start = parser_conf.start | ||||
| self.postlex = lexer_conf.postlex | self.postlex = lexer_conf.postlex | ||||
| self.re = re_ | |||||
| @classmethod | @classmethod | ||||
| def deserialize(cls, data, memo, callbacks, postlex, re_): | |||||
| def deserialize(cls, data, memo, callbacks, postlex, re_module): | |||||
| inst = super(WithLexer, cls).deserialize(data, memo) | inst = super(WithLexer, cls).deserialize(data, memo) | ||||
| inst.re = re_ | |||||
| inst.postlex = postlex | inst.postlex = postlex | ||||
| inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks) | inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks) | ||||
| inst.lexer_conf.re_module = re_module | |||||
| inst.lexer_conf.skip_validation=True | |||||
| inst.init_lexer() | inst.init_lexer() | ||||
| return inst | return inst | ||||
| @@ -89,18 +89,17 @@ class WithLexer(_ParserFrontend): | |||||
| return self._parse(token_stream, start) | return self._parse(token_stream, start) | ||||
| def init_traditional_lexer(self): | def init_traditional_lexer(self): | ||||
| self.lexer = TraditionalLexer(self.lexer_conf.tokens, re_=self.re, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks, g_regex_flags=self.lexer_conf.g_regex_flags) | |||||
| self.lexer = TraditionalLexer(self.lexer_conf) | |||||
| class LALR_WithLexer(WithLexer): | class LALR_WithLexer(WithLexer): | ||||
| def __init__(self, lexer_conf, parser_conf, re_, options=None): | |||||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||||
| debug = options.debug if options else False | debug = options.debug if options else False | ||||
| self.re = re_ | |||||
| self.parser = LALR_Parser(parser_conf, debug=debug) | self.parser = LALR_Parser(parser_conf, debug=debug) | ||||
| WithLexer.__init__(self, lexer_conf, parser_conf, re_, options) | |||||
| WithLexer.__init__(self, lexer_conf, parser_conf, options) | |||||
| self.init_lexer() | self.init_lexer() | ||||
| def init_lexer(self): | |||||
| def init_lexer(self, **kw): | |||||
| raise NotImplementedError() | raise NotImplementedError() | ||||
| class LALR_TraditionalLexer(LALR_WithLexer): | class LALR_TraditionalLexer(LALR_WithLexer): | ||||
| @@ -111,12 +110,7 @@ class LALR_ContextualLexer(LALR_WithLexer): | |||||
| def init_lexer(self): | def init_lexer(self): | ||||
| states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()} | states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()} | ||||
| always_accept = self.postlex.always_accept if self.postlex else () | always_accept = self.postlex.always_accept if self.postlex else () | ||||
| self.lexer = ContextualLexer(self.lexer_conf.tokens, states, | |||||
| re_=self.re, | |||||
| ignore=self.lexer_conf.ignore, | |||||
| always_accept=always_accept, | |||||
| user_callbacks=self.lexer_conf.callbacks, | |||||
| g_regex_flags=self.lexer_conf.g_regex_flags) | |||||
| self.lexer = ContextualLexer(self.lexer_conf, states, always_accept=always_accept) | |||||
| def parse(self, text, start=None): | def parse(self, text, start=None): | ||||
| @@ -129,11 +123,11 @@ class LALR_ContextualLexer(LALR_WithLexer): | |||||
| ###} | ###} | ||||
| class LALR_CustomLexer(LALR_WithLexer): | class LALR_CustomLexer(LALR_WithLexer): | ||||
| def __init__(self, lexer_cls, lexer_conf, parser_conf, re_, options=None): | |||||
| self.lexer = lexer_cls(lexer_conf, re_=re_) | |||||
| def __init__(self, lexer_cls, lexer_conf, parser_conf, options=None): | |||||
| self.lexer = lexer_cls(lexer_conf) | |||||
| debug = options.debug if options else False | debug = options.debug if options else False | ||||
| self.parser = LALR_Parser(parser_conf, debug=debug) | self.parser = LALR_Parser(parser_conf, debug=debug) | ||||
| WithLexer.__init__(self, lexer_conf, parser_conf, re_, options) | |||||
| WithLexer.__init__(self, lexer_conf, parser_conf, options) | |||||
| def tokenize_text(text): | def tokenize_text(text): | ||||
| @@ -146,8 +140,8 @@ def tokenize_text(text): | |||||
| yield Token('CHAR', ch, line=line, column=i - col_start_pos) | yield Token('CHAR', ch, line=line, column=i - col_start_pos) | ||||
| class Earley(WithLexer): | class Earley(WithLexer): | ||||
| def __init__(self, lexer_conf, parser_conf, re_, options=None): | |||||
| WithLexer.__init__(self, lexer_conf, parser_conf, re_, options) | |||||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||||
| WithLexer.__init__(self, lexer_conf, parser_conf, options) | |||||
| self.init_traditional_lexer() | self.init_traditional_lexer() | ||||
| resolve_ambiguity = options.ambiguity == 'resolve' | resolve_ambiguity = options.ambiguity == 'resolve' | ||||
| @@ -159,9 +153,7 @@ class Earley(WithLexer): | |||||
| class XEarley(_ParserFrontend): | class XEarley(_ParserFrontend): | ||||
| def __init__(self, lexer_conf, parser_conf, re_, options=None, **kw): | |||||
| self.re = re_ | |||||
| def __init__(self, lexer_conf, parser_conf, options=None, **kw): | |||||
| self.token_by_name = {t.name:t for t in lexer_conf.tokens} | self.token_by_name = {t.name:t for t in lexer_conf.tokens} | ||||
| self.start = parser_conf.start | self.start = parser_conf.start | ||||
| @@ -193,7 +185,7 @@ class XEarley(_ParserFrontend): | |||||
| if width == 0: | if width == 0: | ||||
| raise ValueError("Dynamic Earley doesn't allow zero-width regexps", t) | raise ValueError("Dynamic Earley doesn't allow zero-width regexps", t) | ||||
| self.regexps[t.name] = self.re.compile(regexp, lexer_conf.g_regex_flags) | |||||
| self.regexps[t.name] = lexer_conf.re_module.compile(regexp, lexer_conf.g_regex_flags) | |||||
| def parse(self, text, start): | def parse(self, text, start): | ||||
| return self._parse(text, start) | return self._parse(text, start) | ||||
| @@ -206,8 +198,8 @@ class XEarley_CompleteLex(XEarley): | |||||
| class CYK(WithLexer): | class CYK(WithLexer): | ||||
| def __init__(self, lexer_conf, parser_conf, re_, options=None): | |||||
| WithLexer.__init__(self, lexer_conf, parser_conf, re_, options) | |||||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||||
| WithLexer.__init__(self, lexer_conf, parser_conf, options) | |||||
| self.init_traditional_lexer() | self.init_traditional_lexer() | ||||
| self._analysis = GrammarAnalyzer(parser_conf) | self._analysis = GrammarAnalyzer(parser_conf) | ||||
| @@ -6,7 +6,7 @@ import unittest | |||||
| import logging | import logging | ||||
| import os | import os | ||||
| import sys | import sys | ||||
| from copy import deepcopy | |||||
| from copy import copy, deepcopy | |||||
| try: | try: | ||||
| from cStringIO import StringIO as cStringIO | from cStringIO import StringIO as cStringIO | ||||
| except ImportError: | except ImportError: | ||||
| @@ -553,8 +553,8 @@ class CustomLexer(Lexer): | |||||
| Purpose of this custom lexer is to test the integration, | Purpose of this custom lexer is to test the integration, | ||||
| so it uses the traditionalparser as implementation without custom lexing behaviour. | so it uses the traditionalparser as implementation without custom lexing behaviour. | ||||
| """ | """ | ||||
| def __init__(self, lexer_conf, re_): | |||||
| self.lexer = TraditionalLexer(lexer_conf.tokens, re_, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks, g_regex_flags=lexer_conf.g_regex_flags) | |||||
| def __init__(self, lexer_conf): | |||||
| self.lexer = TraditionalLexer(copy(lexer_conf)) | |||||
| def lex(self, *args, **kwargs): | def lex(self, *args, **kwargs): | ||||
| return self.lexer.lex(*args, **kwargs) | return self.lexer.lex(*args, **kwargs) | ||||