diff --git a/lark/grammar.py b/lark/grammar.py index e171d52..8691f10 100644 --- a/lark/grammar.py +++ b/lark/grammar.py @@ -30,10 +30,16 @@ class Terminal(Symbol): def fullrepr(self): return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out) + def serialize(self): + return ['T', self.name, self.filter_out] + class NonTerminal(Symbol): is_term = False + def serialize(self): + return ['NT', self.name] + class Rule(object): """ origin : a symbol @@ -64,6 +70,11 @@ class Rule(object): return False return self.origin == other.origin and self.expansion == other.expansion + def serialize(self): + return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.alias, self.options.serialize() if self.options else None] + # def deserialize(self): + # return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.alias, self.options.serialize() if self.options else None] + class RuleOptions: def __init__(self, keep_all_tokens=False, expand1=False, priority=None): @@ -78,3 +89,6 @@ class RuleOptions: self.expand1, self.priority, ) + + def serialize(self): + return [self.keep_all_tokens, self.expand1, self.priority, list(self.empty_indices)] \ No newline at end of file diff --git a/lark/lark.py b/lark/lark.py index 178141c..eb73271 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -208,6 +208,7 @@ class Lark: return self.parser_class(self.lexer_conf, parser_conf, options=self.options) + @classmethod def open(cls, grammar_filename, rel_to=None, **options): """Create an instance of Lark with the grammar given by its filename diff --git a/lark/lexer.py b/lark/lexer.py index e6e9e9e..00ff35c 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -65,6 +65,9 @@ class TerminalDef(object): def __repr__(self): return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) + def serialize(self): + return [self.name, self.pattern, self.priority] + ###{standalone @@ -307,6 +310,13 @@ class TraditionalLexer(Lexer): def lex(self, stream): return _Lex(self).lex(stream, self.newline_types, self.ignore_types) + def serialize(self): + return { + 'terminals': [t.serialize() for t in self.terminals], + 'ignore_types': self.ignore_types, + 'newline_types': self.newline_types, + } + class ContextualLexer(Lexer): def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}): @@ -343,4 +353,6 @@ class ContextualLexer(Lexer): l.lexer = self.lexers[self.parser_state] l.state = self.parser_state + def serialize(self): + return {state: lexer.serialize() for state, lexer in self.lexers.items()} diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index c351ddc..b93592c 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -7,7 +7,7 @@ from .lexer import TraditionalLexer, ContextualLexer, Lexer, Token from .parsers import lalr_parser, earley, xearley, cyk from .tree import Tree -class WithLexer: +class WithLexer(object): lexer = None parser = None lexer_conf = None @@ -36,6 +36,20 @@ class WithLexer: sps = self.lexer.set_parser_state return self.parser.parse(token_stream, *[sps] if sps is not NotImplemented else []) + def serialize(self): + return { + # 'class': type(self).__name__, + 'parser': self.parser.serialize(), + 'lexer': self.lexer.serialize(), + } + @classmethod + def deserialize(cls, data): + inst = cls.__new__(cls) + inst.parser = lalr_parser.Parser.deserialize(data['parser']) + inst.lexer = Lexer.deserialize(data['lexer']) + return inst + + class LALR_TraditionalLexer(WithLexer): def __init__(self, lexer_conf, parser_conf, options=None): debug = options.debug if options else False diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index c30a92e..4de9496 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -4,10 +4,30 @@ # Email : erezshin@gmail.com from ..exceptions import UnexpectedToken from ..lexer import Token +from ..grammar import Rule -from .lalr_analysis import LALR_Analyzer, Shift +from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable -class Parser: + +class Enumerator: + def __init__(self): + self.enums = {} + + def get(self, item): + if item not in self.enums: + self.enums[item] = len(self.enums) + return self.enums[item] + + def __len__(self): + return len(self.enums) + + def reversed(self): + r = {v: k for k, v in self.enums.items()} + assert len(r) == len(self.enums) + return r + + +class Parser(object): def __init__(self, parser_conf, debug=False): assert all(r.options is None or r.options.priority is None for r in parser_conf.rules), "LALR doesn't yet support prioritization" @@ -20,6 +40,38 @@ class Parser: self.parser = _Parser(analysis.parse_table, callbacks) self.parse = self.parser.parse + def serialize(self): + tokens = Enumerator() + rules = Enumerator() + + states = { + state: {tokens.get(token): ((1, rules.get(arg)) if action is Reduce else (0, arg)) + for token, (action, arg) in actions.items()} + for state, actions in self._parse_table.states.items() + } + + return { + 'tokens': tokens.reversed(), + 'rules': {idx: r.serialize() for idx, r in rules.reversed().items()}, + 'states': states, + 'start_state': self._parse_table.start_state, + 'end_state': self._parse_table.end_state, + } + + @classmethod + def deserialize(cls, data): + tokens = data['tokens'] + rules = {idx: Rule.deserialize(r) for idx, r in data['rules'].items()} + states = { + state: {tokens[token]: ((Reduce, rules[arg]) if action==1 else (Shift, arg)) + for token, (action, arg) in actions.items()} + for state, actions in data['states'].items() + } + parse_table = IntParseTable(states, data['start_state'], data['end_state']) + print(parse_table) + + + ###{standalone class _Parser: