Fix to new serializer code (Discussed in issue #349)

6 years ago · a798dec779
--- a/lark/common.py
+++ b/lark/common.py
@@ -1,12 +1,23 @@
 from .utils import Serialize
 from .lexer import TerminalDef

 ###{standalone

 class LexerConf(Serialize):
    __serialize_fields__ = 'tokens', 'ignore'
    __serialize_namespace__ = TerminalDef,

 class LexerConf:
    def __init__(self, tokens, ignore=(), postlex=None, callbacks=None):
        self.tokens = tokens
        self.ignore = ignore
        self.postlex = postlex
        self.callbacks = callbacks or {}

    def _deserialize(self):
        self.callbacks = {} # TODO

 ###}

 class ParserConf:
    def __init__(self, rules, callbacks, start):
        self.rules = rules
--- a/lark/lexer.py
+++ b/lark/lexer.py
@@ -261,7 +261,7 @@ def _regexp_has_newline(r):
    """
    return '\n' in r or '\\n' in r or '[^' in r or ('(?s' in r and '.' in r)

 class Lexer(Serialize):
 class Lexer(object):
    """Lexer interface

    Method Signatures:
@@ -274,13 +274,6 @@ class Lexer(Serialize):


 class TraditionalLexer(Lexer):
    __serialize_fields__ = 'terminals', 'ignore_types', 'newline_types'
    __serialize_namespace__ = TerminalDef,

    def _deserialize(self):
        self.user_callbacks = {} # TODO implement
        self.build()


    def __init__(self, terminals, ignore=(), user_callbacks={}):
        assert all(isinstance(t, TerminalDef) for t in terminals), terminals
@@ -329,9 +322,6 @@ class TraditionalLexer(Lexer):


 class ContextualLexer(Lexer):
    __serialize_fields__ = 'root_lexer', 'lexers'
    __serialize_namespace__ = TraditionalLexer,

    def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}):
        tokens_by_name = {}
        for t in terminals:
--- a/lark/parser_frontends.py
+++ b/lark/parser_frontends.py
@@ -8,6 +8,7 @@ from .parsers import earley, xearley, cyk
 from .parsers.lalr_parser import LALR_Parser
 from .grammar import Rule
 from .tree import Tree
 from .common import LexerConf

 ###{standalone

@@ -50,34 +51,24 @@ class WithLexer(Serialize):
    parser = None
    lexer_conf = None

    __serialize_fields__ = 'parser', 'lexer'
    __serialize_namespace__ = Rule, ContextualLexer, TraditionalLexer
    __serialize_fields__ = 'parser', 'lexer_conf'
    __serialize_namespace__ = LexerConf,

    def __init__(self, lexer_conf, parser_conf, options=None):
        self.lexer_conf = lexer_conf
        self.postlex = lexer_conf.postlex

    @classmethod
    def deserialize(cls, data, memo, callbacks, postlex):
        inst = super(WithLexer, cls).deserialize(data, memo)
        inst.postlex = postlex
        inst.parser = LALR_Parser.deserialize(inst.parser, memo, callbacks)
        inst.init_lexer()
        return inst
    
    def _serialize(self, data, memo):
        data['parser'] = data['parser'].serialize(memo)

    def init_traditional_lexer(self, lexer_conf):
        self.lexer_conf = lexer_conf
        self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks)
        self.postlex = lexer_conf.postlex

    def init_contextual_lexer(self, lexer_conf):
        self.lexer_conf = lexer_conf
        self.postlex = lexer_conf.postlex
        states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()}
        always_accept = self.postlex.always_accept if self.postlex else ()
        self.lexer = ContextualLexer(lexer_conf.tokens, states,
                                     ignore=lexer_conf.ignore,
                                     always_accept=always_accept,
                                     user_callbacks=lexer_conf.callbacks)

    def lex(self, text):
        stream = self.lexer.lex(text)
        return self.postlex.process(stream) if self.postlex else stream
@@ -87,26 +78,40 @@ class WithLexer(Serialize):
        sps = self.lexer.set_parser_state
        return self.parser.parse(token_stream, *[sps] if sps is not NotImplemented else [])

    def init_traditional_lexer(self):
        self.lexer = TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks)

 class LALR_TraditionalLexer(WithLexer):
 class LALR_WithLexer(WithLexer):
    def __init__(self, lexer_conf, parser_conf, options=None):
        debug = options.debug if options else False
        self.parser = LALR_Parser(parser_conf, debug=debug)
        self.init_traditional_lexer(lexer_conf)
        WithLexer.__init__(self, lexer_conf, parser_conf, options)

 class LALR_ContextualLexer(WithLexer):
    def __init__(self, lexer_conf, parser_conf, options=None):
        debug = options.debug if options else False
        self.parser = LALR_Parser(parser_conf, debug=debug)
        self.init_contextual_lexer(lexer_conf)
        self.init_lexer()

    def init_lexer(self):
        raise NotImplementedError()

 class LALR_TraditionalLexer(LALR_WithLexer):
    def init_lexer(self):
        self.init_traditional_lexer()

 class LALR_ContextualLexer(LALR_WithLexer):
    def init_lexer(self):
        states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()}
        always_accept = self.postlex.always_accept if self.postlex else ()
        self.lexer = ContextualLexer(self.lexer_conf.tokens, states,
                                     ignore=self.lexer_conf.ignore,
                                     always_accept=always_accept,
                                     user_callbacks=self.lexer_conf.callbacks)
 ###}

 class LALR_CustomLexer(WithLexer):
 class LALR_CustomLexer(LALR_WithLexer):
    def __init__(self, lexer_cls, lexer_conf, parser_conf, options=None):
        self.parser = LALR_Parser(parser_conf)
        self.lexer_conf = lexer_conf
        self.lexer = lexer_cls(lexer_conf)
        pass    # TODO

    def init_lexer(self):
        self.lexer = lexer_cls(self.lexer_conf)

 def tokenize_text(text):
    line = 1
@@ -119,7 +124,8 @@ def tokenize_text(text):

 class Earley(WithLexer):
    def __init__(self, lexer_conf, parser_conf, options=None):
        self.init_traditional_lexer(lexer_conf)
        WithLexer.__init__(self, lexer_conf, parser_conf, options)
        self.init_traditional_lexer()

        resolve_ambiguity = options.ambiguity == 'resolve'
        self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity)
@@ -172,7 +178,8 @@ class XEarley_CompleteLex(XEarley):
 class CYK(WithLexer):

    def __init__(self, lexer_conf, parser_conf, options=None):
        self.init_traditional_lexer(lexer_conf)
        WithLexer.__init__(self, lexer_conf, parser_conf, options)
        self.init_traditional_lexer()

        self._analysis = GrammarAnalyzer(parser_conf)
        self._parser = cyk.Parser(parser_conf.rules, parser_conf.start)
--- a/lark/parsers/lalr_parser.py
+++ b/lark/parsers/lalr_parser.py
@@ -25,7 +25,8 @@ class LALR_Parser(object):
    @classmethod
    def deserialize(cls, data, memo, callbacks):
        inst = cls.__new__(cls)
        inst.parser = _Parser(IntParseTable.deserialize(data, memo), callbacks)
        inst._parse_table = IntParseTable.deserialize(data, memo)
        inst.parser = _Parser(inst._parse_table, callbacks)
        return inst

    def serialize(self, memo):
--- a/lark/tools/standalone.py
+++ b/lark/tools/standalone.py
@@ -65,6 +65,7 @@ EXTRACT_STANDALONE_FILES = [
    'indenter.py',
    'grammar.py',
    'lexer.py',
    'common.py',
    'parse_tree_builder.py',
    'parsers/lalr_parser.py',
    'parsers/lalr_analysis.py',
--- a/lark/utils.py
+++ b/lark/utils.py
@@ -103,7 +103,10 @@ class Serialize(object):

        inst = cls.__new__(cls)
        for f in fields:
            setattr(inst, f, _deserialize(data[f], namespace, memo))
            try:
                setattr(inst, f, _deserialize(data[f], namespace, memo))
            except KeyError as e:
                raise KeyError("Cannot find key for class", cls, e)
        postprocess = getattr(inst, '_deserialize', None)
        if postprocess:
            postprocess()
@@ -164,6 +167,15 @@ def smart_decorator(f, create_decorator):

 import sys, re
 Py36 = (sys.version_info[:2] >= (3, 6))

 import sre_parse
 import sre_constants
 def get_regexp_width(regexp):
    try:
        return sre_parse.parse(regexp).getwidth()
    except sre_constants.error:
        raise ValueError(regexp)

 ###}


@@ -209,14 +221,6 @@ except NameError:
        return -1


 import sre_parse
 import sre_constants
 def get_regexp_width(regexp):
    try:
        return sre_parse.parse(regexp).getwidth()
    except sre_constants.error:
        raise ValueError(regexp)


 class Enumerator(Serialize):
    def __init__(self):
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -13,6 +13,9 @@ try:
 except ImportError:
    from io import StringIO




 class TestStandalone(TestCase):
    def setUp(self):
        pass
@@ -74,6 +77,39 @@ class TestStandalone(TestCase):
        x = l2.parse('ABAB')
        self.assertEqual(x, ['a', 'b'])

    def test_postlex(self):
        from lark.indenter import Indenter
        class MyIndenter(Indenter):
            NL_type = '_NEWLINE'
            OPEN_PAREN_types = ['LPAR', 'LSQB', 'LBRACE']
            CLOSE_PAREN_types = ['RPAR', 'RSQB', 'RBRACE']
            INDENT_type = '_INDENT'
            DEDENT_type = '_DEDENT'
            tab_len = 8

        grammar = r"""
            start:  "(" ")" _NEWLINE
            _NEWLINE: /\n/
        """

        # from lark import Lark
        # l = Lark(grammar, parser='lalr', lexer='contextual', postlex=MyIndenter())
        # x = l.parse('(\n)\n')
        # print('@@', x)


        context = self._create_standalone(grammar)
        _Lark = context['Lark_StandAlone']

        # l = _Lark(postlex=MyIndenter())
        # x = l.parse('()\n')
        # print(x)
        l = _Lark(postlex=MyIndenter())
        x = l.parse('(\n)\n')
        print(x)




 if __name__ == '__main__':
    unittest.main()