@@ -30,10 +30,16 @@ class Terminal(Symbol): | |||
def fullrepr(self): | |||
return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out) | |||
def serialize(self): | |||
return ['T', self.name, self.filter_out] | |||
class NonTerminal(Symbol): | |||
is_term = False | |||
def serialize(self): | |||
return ['NT', self.name] | |||
class Rule(object): | |||
""" | |||
origin : a symbol | |||
@@ -64,6 +70,11 @@ class Rule(object): | |||
return False | |||
return self.origin == other.origin and self.expansion == other.expansion | |||
def serialize(self): | |||
return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.alias, self.options.serialize() if self.options else None] | |||
# def deserialize(self): | |||
# return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.alias, self.options.serialize() if self.options else None] | |||
class RuleOptions: | |||
def __init__(self, keep_all_tokens=False, expand1=False, priority=None): | |||
@@ -78,3 +89,6 @@ class RuleOptions: | |||
self.expand1, | |||
self.priority, | |||
) | |||
def serialize(self): | |||
return [self.keep_all_tokens, self.expand1, self.priority, list(self.empty_indices)] |
@@ -208,6 +208,7 @@ class Lark: | |||
return self.parser_class(self.lexer_conf, parser_conf, options=self.options) | |||
@classmethod | |||
def open(cls, grammar_filename, rel_to=None, **options): | |||
"""Create an instance of Lark with the grammar given by its filename | |||
@@ -65,6 +65,9 @@ class TerminalDef(object): | |||
def __repr__(self): | |||
return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) | |||
def serialize(self): | |||
return [self.name, self.pattern, self.priority] | |||
###{standalone | |||
@@ -307,6 +310,13 @@ class TraditionalLexer(Lexer): | |||
def lex(self, stream): | |||
return _Lex(self).lex(stream, self.newline_types, self.ignore_types) | |||
def serialize(self): | |||
return { | |||
'terminals': [t.serialize() for t in self.terminals], | |||
'ignore_types': self.ignore_types, | |||
'newline_types': self.newline_types, | |||
} | |||
class ContextualLexer(Lexer): | |||
def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}): | |||
@@ -343,4 +353,6 @@ class ContextualLexer(Lexer): | |||
l.lexer = self.lexers[self.parser_state] | |||
l.state = self.parser_state | |||
def serialize(self): | |||
return {state: lexer.serialize() for state, lexer in self.lexers.items()} | |||
@@ -7,7 +7,7 @@ from .lexer import TraditionalLexer, ContextualLexer, Lexer, Token | |||
from .parsers import lalr_parser, earley, xearley, cyk | |||
from .tree import Tree | |||
class WithLexer: | |||
class WithLexer(object): | |||
lexer = None | |||
parser = None | |||
lexer_conf = None | |||
@@ -36,6 +36,20 @@ class WithLexer: | |||
sps = self.lexer.set_parser_state | |||
return self.parser.parse(token_stream, *[sps] if sps is not NotImplemented else []) | |||
def serialize(self): | |||
return { | |||
# 'class': type(self).__name__, | |||
'parser': self.parser.serialize(), | |||
'lexer': self.lexer.serialize(), | |||
} | |||
@classmethod | |||
def deserialize(cls, data): | |||
inst = cls.__new__(cls) | |||
inst.parser = lalr_parser.Parser.deserialize(data['parser']) | |||
inst.lexer = Lexer.deserialize(data['lexer']) | |||
return inst | |||
class LALR_TraditionalLexer(WithLexer): | |||
def __init__(self, lexer_conf, parser_conf, options=None): | |||
debug = options.debug if options else False | |||
@@ -4,10 +4,30 @@ | |||
# Email : erezshin@gmail.com | |||
from ..exceptions import UnexpectedToken | |||
from ..lexer import Token | |||
from ..grammar import Rule | |||
from .lalr_analysis import LALR_Analyzer, Shift | |||
from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | |||
class Parser: | |||
class Enumerator: | |||
def __init__(self): | |||
self.enums = {} | |||
def get(self, item): | |||
if item not in self.enums: | |||
self.enums[item] = len(self.enums) | |||
return self.enums[item] | |||
def __len__(self): | |||
return len(self.enums) | |||
def reversed(self): | |||
r = {v: k for k, v in self.enums.items()} | |||
assert len(r) == len(self.enums) | |||
return r | |||
class Parser(object): | |||
def __init__(self, parser_conf, debug=False): | |||
assert all(r.options is None or r.options.priority is None | |||
for r in parser_conf.rules), "LALR doesn't yet support prioritization" | |||
@@ -20,6 +40,38 @@ class Parser: | |||
self.parser = _Parser(analysis.parse_table, callbacks) | |||
self.parse = self.parser.parse | |||
def serialize(self): | |||
tokens = Enumerator() | |||
rules = Enumerator() | |||
states = { | |||
state: {tokens.get(token): ((1, rules.get(arg)) if action is Reduce else (0, arg)) | |||
for token, (action, arg) in actions.items()} | |||
for state, actions in self._parse_table.states.items() | |||
} | |||
return { | |||
'tokens': tokens.reversed(), | |||
'rules': {idx: r.serialize() for idx, r in rules.reversed().items()}, | |||
'states': states, | |||
'start_state': self._parse_table.start_state, | |||
'end_state': self._parse_table.end_state, | |||
} | |||
@classmethod | |||
def deserialize(cls, data): | |||
tokens = data['tokens'] | |||
rules = {idx: Rule.deserialize(r) for idx, r in data['rules'].items()} | |||
states = { | |||
state: {tokens[token]: ((Reduce, rules[arg]) if action==1 else (Shift, arg)) | |||
for token, (action, arg) in actions.items()} | |||
for state, actions in data['states'].items() | |||
} | |||
parse_table = IntParseTable(states, data['start_state'], data['end_state']) | |||
print(parse_table) | |||
###{standalone | |||
class _Parser: | |||