@@ -19,6 +19,15 @@ class Symbol(object): | |||
fullrepr = property(__repr__) | |||
@classmethod | |||
def deserialize(cls, data): | |||
class_ = { | |||
'T': Terminal, | |||
'NT': NonTerminal, | |||
}[data[0]] | |||
return class_(*data[1:]) | |||
class Terminal(Symbol): | |||
is_term = True | |||
@@ -71,17 +80,26 @@ class Rule(object): | |||
return self.origin == other.origin and self.expansion == other.expansion | |||
def serialize(self): | |||
return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.alias, self.options.serialize() if self.options else None] | |||
# def deserialize(self): | |||
# return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.alias, self.options.serialize() if self.options else None] | |||
return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.order, self.alias, self.options.serialize() if self.options else None] | |||
@classmethod | |||
def deserialize(cls, data): | |||
origin, expansion, order, alias, options = data | |||
return cls( | |||
Symbol.deserialize(origin), | |||
[Symbol.deserialize(s) for s in expansion], | |||
order, | |||
alias, | |||
RuleOptions.deserialize(options) if options else None | |||
) | |||
class RuleOptions: | |||
def __init__(self, keep_all_tokens=False, expand1=False, priority=None): | |||
def __init__(self, keep_all_tokens=False, expand1=False, priority=None, empty_indices=()): | |||
self.keep_all_tokens = keep_all_tokens | |||
self.expand1 = expand1 | |||
self.priority = priority | |||
self.empty_indices = () | |||
self.empty_indices = empty_indices | |||
def __repr__(self): | |||
return 'RuleOptions(%r, %r, %r)' % ( | |||
@@ -91,4 +109,8 @@ class RuleOptions: | |||
) | |||
def serialize(self): | |||
return [self.keep_all_tokens, self.expand1, self.priority, list(self.empty_indices)] | |||
return [self.keep_all_tokens, self.expand1, self.priority, list(self.empty_indices)] | |||
@classmethod | |||
def deserialize(cls, data): | |||
return cls(*data) |
@@ -51,24 +51,39 @@ class LarkOptions(object): | |||
if __doc__: | |||
__doc__ += OPTIONS_DOC | |||
_defaults = { | |||
'debug': False, | |||
'keep_all_tokens': False, | |||
'tree_class': Tree, | |||
'cache_grammar': False, | |||
'postlex': None, | |||
'parser': 'earley', | |||
'lexer': 'auto', | |||
'transformer': None, | |||
'start': 'start', | |||
'profile': False, | |||
'priority': 'auto', | |||
'ambiguity': 'auto', | |||
'propagate_positions': False, | |||
'lexer_callbacks': {}, | |||
'maybe_placeholders': False, | |||
} | |||
def __init__(self, options_dict): | |||
o = dict(options_dict) | |||
self.debug = bool(o.pop('debug', False)) | |||
self.keep_all_tokens = bool(o.pop('keep_all_tokens', False)) | |||
self.tree_class = o.pop('tree_class', Tree) | |||
self.cache_grammar = o.pop('cache_grammar', False) | |||
self.postlex = o.pop('postlex', None) | |||
self.parser = o.pop('parser', 'earley') | |||
self.lexer = o.pop('lexer', 'auto') | |||
self.transformer = o.pop('transformer', None) | |||
self.start = o.pop('start', 'start') | |||
self.profile = o.pop('profile', False) | |||
self.priority = o.pop('priority', 'auto') | |||
self.ambiguity = o.pop('ambiguity', 'auto') | |||
self.propagate_positions = o.pop('propagate_positions', False) | |||
self.lexer_callbacks = o.pop('lexer_callbacks', {}) | |||
self.maybe_placeholders = o.pop('maybe_placeholders', False) | |||
options = {} | |||
for name, default in self._defaults.items(): | |||
if name in o: | |||
value = o.pop(name) | |||
if isinstance(default, bool): | |||
value = bool(value) | |||
else: | |||
value = default | |||
options[name] = value | |||
self.__dict__['options'] = options | |||
assert self.parser in ('earley', 'lalr', 'cyk', None) | |||
@@ -79,6 +94,18 @@ class LarkOptions(object): | |||
if o: | |||
raise ValueError("Unknown options: %s" % o.keys()) | |||
def __getattr__(self, name): | |||
return self.options[name] | |||
def __setattr__(self, name, value): | |||
self.options[name] = value | |||
def serialize(self): | |||
return self.options | |||
@classmethod | |||
def deserialize(cls, data): | |||
return cls(data) | |||
class Profiler: | |||
def __init__(self): | |||
@@ -208,6 +235,28 @@ class Lark: | |||
return self.parser_class(self.lexer_conf, parser_conf, options=self.options) | |||
def serialize(self): | |||
return { | |||
'parser': self.parser.serialize(), | |||
'rules': [r.serialize() for r in self.rules], | |||
'options': self.options.serialize(), | |||
} | |||
@classmethod | |||
def deserialize(cls, data): | |||
from .grammar import Rule | |||
inst = cls.__new__(cls) | |||
rules = [Rule.deserialize(r) for r in data['rules']] | |||
options = LarkOptions.deserialize(data['options']) | |||
ptb = ParseTreeBuilder(rules, options.tree_class, options.propagate_positions, options.keep_all_tokens, options.parser!='lalr' and options.ambiguity=='explicit', options.maybe_placeholders) | |||
callbacks = ptb.create_callback(None) | |||
parser_class = get_frontend(options.parser, options.lexer) | |||
inst.parser = parser_class.deserialize(data['parser'], callbacks) | |||
return inst | |||
@classmethod | |||
def open(cls, grammar_filename, rel_to=None, **options): | |||
@@ -68,6 +68,10 @@ class TerminalDef(object): | |||
def serialize(self): | |||
return [self.name, self.pattern, self.priority] | |||
@classmethod | |||
def deserialize(cls, data): | |||
return cls(*data) | |||
###{standalone | |||
@@ -268,6 +272,14 @@ class Lexer: | |||
set_parser_state = NotImplemented | |||
lex = NotImplemented | |||
@classmethod | |||
def deserialize(cls, data): | |||
class_ = { | |||
'traditional': TraditionalLexer, | |||
'contextual': ContextualLexer, | |||
}[data['type']] | |||
return class_.deserialize(data) | |||
class TraditionalLexer(Lexer): | |||
def __init__(self, terminals, ignore=(), user_callbacks={}): | |||
assert all(isinstance(t, TerminalDef) for t in terminals), terminals | |||
@@ -312,11 +324,22 @@ class TraditionalLexer(Lexer): | |||
def serialize(self): | |||
return { | |||
'type': 'traditional', | |||
'terminals': [t.serialize() for t in self.terminals], | |||
'ignore_types': self.ignore_types, | |||
'newline_types': self.newline_types, | |||
} | |||
@classmethod | |||
def deserialize(cls, data): | |||
inst = cls.__new__(cls) | |||
inst.terminals = [TerminalDef.deserialize(t) for t in data['terminals']] | |||
inst.mres = build_mres(inst.terminals) | |||
inst.ignore_types = data['ignore_types'] | |||
inst.newline_types = data['newline_types'] | |||
inst.callback = {} # TODO implement | |||
return inst | |||
class ContextualLexer(Lexer): | |||
def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}): | |||
@@ -354,5 +377,15 @@ class ContextualLexer(Lexer): | |||
l.state = self.parser_state | |||
def serialize(self): | |||
return {state: lexer.serialize() for state, lexer in self.lexers.items()} | |||
return { | |||
'type': 'contextual', | |||
'root_lexer': self.root_lexer.serialize(), | |||
'lexers': {state: lexer.serialize() for state, lexer in self.lexers.items()} | |||
} | |||
@classmethod | |||
def deserialize(cls, data): | |||
inst = cls.__new__(cls) | |||
inst.lexers = {state:Lexer.deserialize(lexer) for state, lexer in data['lexers'].items()} | |||
inst.root_lexer = TraditionalLexer.deserialize(data['root_lexer']) | |||
return inst |
@@ -209,12 +209,12 @@ class ParseTreeBuilder: | |||
keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False) | |||
expand_single_child = options.expand1 if options else False | |||
wrapper_chain = filter(None, [ | |||
wrapper_chain = list(filter(None, [ | |||
(expand_single_child and not rule.alias) and ExpandSingleChild, | |||
maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders and options else None), | |||
self.propagate_positions and PropagatePositions, | |||
self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens), | |||
]) | |||
])) | |||
yield rule, wrapper_chain | |||
@@ -15,11 +15,13 @@ class WithLexer(object): | |||
def init_traditional_lexer(self, lexer_conf): | |||
self.lexer_conf = lexer_conf | |||
self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks) | |||
self.postlex = lexer_conf.postlex | |||
def init_contextual_lexer(self, lexer_conf): | |||
self.lexer_conf = lexer_conf | |||
self.postlex = lexer_conf.postlex | |||
states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()} | |||
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else () | |||
always_accept = self.postlex.always_accept if self.postlex else () | |||
self.lexer = ContextualLexer(lexer_conf.tokens, states, | |||
ignore=lexer_conf.ignore, | |||
always_accept=always_accept, | |||
@@ -27,8 +29,8 @@ class WithLexer(object): | |||
def lex(self, text): | |||
stream = self.lexer.lex(text) | |||
if self.lexer_conf.postlex: | |||
return self.lexer_conf.postlex.process(stream) | |||
if self.postlex: | |||
return self.postlex.process(stream) | |||
return stream | |||
def parse(self, text): | |||
@@ -38,15 +40,19 @@ class WithLexer(object): | |||
def serialize(self): | |||
return { | |||
# 'class': type(self).__name__, | |||
'type': type(self).__name__, | |||
'parser': self.parser.serialize(), | |||
'lexer': self.lexer.serialize(), | |||
} | |||
@classmethod | |||
def deserialize(cls, data): | |||
inst = cls.__new__(cls) | |||
inst.parser = lalr_parser.Parser.deserialize(data['parser']) | |||
def deserialize(cls, data, callbacks): | |||
class_ = globals()[data['type']] # XXX unsafe | |||
parser = lalr_parser.Parser.deserialize(data['parser'], callbacks) | |||
assert parser | |||
inst = class_.__new__(class_) | |||
inst.parser = parser | |||
inst.lexer = Lexer.deserialize(data['lexer']) | |||
inst.postlex = None # TODO | |||
return inst | |||
@@ -59,7 +59,7 @@ class Parser(object): | |||
} | |||
@classmethod | |||
def deserialize(cls, data): | |||
def deserialize(cls, data, callbacks): | |||
tokens = data['tokens'] | |||
rules = {idx: Rule.deserialize(r) for idx, r in data['rules'].items()} | |||
states = { | |||
@@ -68,7 +68,10 @@ class Parser(object): | |||
for state, actions in data['states'].items() | |||
} | |||
parse_table = IntParseTable(states, data['start_state'], data['end_state']) | |||
print(parse_table) | |||
inst = cls.__new__(cls) | |||
inst.parser = _Parser(parse_table, callbacks) | |||
inst.parse = inst.parser.parse | |||
return inst | |||