| @@ -30,10 +30,16 @@ class Terminal(Symbol): | |||
| def fullrepr(self): | |||
| return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out) | |||
| def serialize(self): | |||
| return ['T', self.name, self.filter_out] | |||
| class NonTerminal(Symbol): | |||
| is_term = False | |||
| def serialize(self): | |||
| return ['NT', self.name] | |||
| class Rule(object): | |||
| """ | |||
| origin : a symbol | |||
| @@ -64,6 +70,11 @@ class Rule(object): | |||
| return False | |||
| return self.origin == other.origin and self.expansion == other.expansion | |||
| def serialize(self): | |||
| return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.alias, self.options.serialize() if self.options else None] | |||
| # def deserialize(self): | |||
| # return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.alias, self.options.serialize() if self.options else None] | |||
| class RuleOptions: | |||
| def __init__(self, keep_all_tokens=False, expand1=False, priority=None): | |||
| @@ -78,3 +89,6 @@ class RuleOptions: | |||
| self.expand1, | |||
| self.priority, | |||
| ) | |||
| def serialize(self): | |||
| return [self.keep_all_tokens, self.expand1, self.priority, list(self.empty_indices)] | |||
| @@ -208,6 +208,7 @@ class Lark: | |||
| return self.parser_class(self.lexer_conf, parser_conf, options=self.options) | |||
| @classmethod | |||
| def open(cls, grammar_filename, rel_to=None, **options): | |||
| """Create an instance of Lark with the grammar given by its filename | |||
| @@ -65,6 +65,9 @@ class TerminalDef(object): | |||
| def __repr__(self): | |||
| return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) | |||
| def serialize(self): | |||
| return [self.name, self.pattern, self.priority] | |||
| ###{standalone | |||
| @@ -307,6 +310,13 @@ class TraditionalLexer(Lexer): | |||
| def lex(self, stream): | |||
| return _Lex(self).lex(stream, self.newline_types, self.ignore_types) | |||
| def serialize(self): | |||
| return { | |||
| 'terminals': [t.serialize() for t in self.terminals], | |||
| 'ignore_types': self.ignore_types, | |||
| 'newline_types': self.newline_types, | |||
| } | |||
| class ContextualLexer(Lexer): | |||
| def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}): | |||
| @@ -343,4 +353,6 @@ class ContextualLexer(Lexer): | |||
| l.lexer = self.lexers[self.parser_state] | |||
| l.state = self.parser_state | |||
| def serialize(self): | |||
| return {state: lexer.serialize() for state, lexer in self.lexers.items()} | |||
| @@ -7,7 +7,7 @@ from .lexer import TraditionalLexer, ContextualLexer, Lexer, Token | |||
| from .parsers import lalr_parser, earley, xearley, cyk | |||
| from .tree import Tree | |||
| class WithLexer: | |||
| class WithLexer(object): | |||
| lexer = None | |||
| parser = None | |||
| lexer_conf = None | |||
| @@ -36,6 +36,20 @@ class WithLexer: | |||
| sps = self.lexer.set_parser_state | |||
| return self.parser.parse(token_stream, *[sps] if sps is not NotImplemented else []) | |||
| def serialize(self): | |||
| return { | |||
| # 'class': type(self).__name__, | |||
| 'parser': self.parser.serialize(), | |||
| 'lexer': self.lexer.serialize(), | |||
| } | |||
| @classmethod | |||
| def deserialize(cls, data): | |||
| inst = cls.__new__(cls) | |||
| inst.parser = lalr_parser.Parser.deserialize(data['parser']) | |||
| inst.lexer = Lexer.deserialize(data['lexer']) | |||
| return inst | |||
| class LALR_TraditionalLexer(WithLexer): | |||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||
| debug = options.debug if options else False | |||
| @@ -4,10 +4,30 @@ | |||
| # Email : erezshin@gmail.com | |||
| from ..exceptions import UnexpectedToken | |||
| from ..lexer import Token | |||
| from ..grammar import Rule | |||
| from .lalr_analysis import LALR_Analyzer, Shift | |||
| from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | |||
| class Parser: | |||
| class Enumerator: | |||
| def __init__(self): | |||
| self.enums = {} | |||
| def get(self, item): | |||
| if item not in self.enums: | |||
| self.enums[item] = len(self.enums) | |||
| return self.enums[item] | |||
| def __len__(self): | |||
| return len(self.enums) | |||
| def reversed(self): | |||
| r = {v: k for k, v in self.enums.items()} | |||
| assert len(r) == len(self.enums) | |||
| return r | |||
| class Parser(object): | |||
| def __init__(self, parser_conf, debug=False): | |||
| assert all(r.options is None or r.options.priority is None | |||
| for r in parser_conf.rules), "LALR doesn't yet support prioritization" | |||
| @@ -20,6 +40,38 @@ class Parser: | |||
| self.parser = _Parser(analysis.parse_table, callbacks) | |||
| self.parse = self.parser.parse | |||
| def serialize(self): | |||
| tokens = Enumerator() | |||
| rules = Enumerator() | |||
| states = { | |||
| state: {tokens.get(token): ((1, rules.get(arg)) if action is Reduce else (0, arg)) | |||
| for token, (action, arg) in actions.items()} | |||
| for state, actions in self._parse_table.states.items() | |||
| } | |||
| return { | |||
| 'tokens': tokens.reversed(), | |||
| 'rules': {idx: r.serialize() for idx, r in rules.reversed().items()}, | |||
| 'states': states, | |||
| 'start_state': self._parse_table.start_state, | |||
| 'end_state': self._parse_table.end_state, | |||
| } | |||
| @classmethod | |||
| def deserialize(cls, data): | |||
| tokens = data['tokens'] | |||
| rules = {idx: Rule.deserialize(r) for idx, r in data['rules'].items()} | |||
| states = { | |||
| state: {tokens[token]: ((Reduce, rules[arg]) if action==1 else (Shift, arg)) | |||
| for token, (action, arg) in actions.items()} | |||
| for state, actions in data['states'].items() | |||
| } | |||
| parse_table = IntParseTable(states, data['start_state'], data['end_state']) | |||
| print(parse_table) | |||
| ###{standalone | |||
| class _Parser: | |||