Browse Source

Basic serialize/deserialize working!

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.7.1
Erez Shinan 6 years ago
parent
commit
335206911d
6 changed files with 146 additions and 33 deletions
  1. +28
    -6
      lark/grammar.py
  2. +64
    -15
      lark/lark.py
  3. +34
    -1
      lark/lexer.py
  4. +2
    -2
      lark/parse_tree_builder.py
  5. +13
    -7
      lark/parser_frontends.py
  6. +5
    -2
      lark/parsers/lalr_parser.py

+ 28
- 6
lark/grammar.py View File

@@ -19,6 +19,15 @@ class Symbol(object):

fullrepr = property(__repr__)

@classmethod
def deserialize(cls, data):
class_ = {
'T': Terminal,
'NT': NonTerminal,
}[data[0]]
return class_(*data[1:])


class Terminal(Symbol):
is_term = True

@@ -71,17 +80,26 @@ class Rule(object):
return self.origin == other.origin and self.expansion == other.expansion

def serialize(self):
return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.alias, self.options.serialize() if self.options else None]
# def deserialize(self):
# return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.alias, self.options.serialize() if self.options else None]
return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.order, self.alias, self.options.serialize() if self.options else None]

@classmethod
def deserialize(cls, data):
origin, expansion, order, alias, options = data
return cls(
Symbol.deserialize(origin),
[Symbol.deserialize(s) for s in expansion],
order,
alias,
RuleOptions.deserialize(options) if options else None
)


class RuleOptions:
def __init__(self, keep_all_tokens=False, expand1=False, priority=None):
def __init__(self, keep_all_tokens=False, expand1=False, priority=None, empty_indices=()):
self.keep_all_tokens = keep_all_tokens
self.expand1 = expand1
self.priority = priority
self.empty_indices = ()
self.empty_indices = empty_indices

def __repr__(self):
return 'RuleOptions(%r, %r, %r)' % (
@@ -91,4 +109,8 @@ class RuleOptions:
)

def serialize(self):
return [self.keep_all_tokens, self.expand1, self.priority, list(self.empty_indices)]
return [self.keep_all_tokens, self.expand1, self.priority, list(self.empty_indices)]
@classmethod
def deserialize(cls, data):
return cls(*data)

+ 64
- 15
lark/lark.py View File

@@ -51,24 +51,39 @@ class LarkOptions(object):
if __doc__:
__doc__ += OPTIONS_DOC

_defaults = {
'debug': False,
'keep_all_tokens': False,
'tree_class': Tree,
'cache_grammar': False,
'postlex': None,
'parser': 'earley',
'lexer': 'auto',
'transformer': None,
'start': 'start',
'profile': False,
'priority': 'auto',
'ambiguity': 'auto',
'propagate_positions': False,
'lexer_callbacks': {},
'maybe_placeholders': False,
}

def __init__(self, options_dict):
o = dict(options_dict)

self.debug = bool(o.pop('debug', False))
self.keep_all_tokens = bool(o.pop('keep_all_tokens', False))
self.tree_class = o.pop('tree_class', Tree)
self.cache_grammar = o.pop('cache_grammar', False)
self.postlex = o.pop('postlex', None)
self.parser = o.pop('parser', 'earley')
self.lexer = o.pop('lexer', 'auto')
self.transformer = o.pop('transformer', None)
self.start = o.pop('start', 'start')
self.profile = o.pop('profile', False)
self.priority = o.pop('priority', 'auto')
self.ambiguity = o.pop('ambiguity', 'auto')
self.propagate_positions = o.pop('propagate_positions', False)
self.lexer_callbacks = o.pop('lexer_callbacks', {})
self.maybe_placeholders = o.pop('maybe_placeholders', False)
options = {}
for name, default in self._defaults.items():
if name in o:
value = o.pop(name)
if isinstance(default, bool):
value = bool(value)
else:
value = default

options[name] = value

self.__dict__['options'] = options

assert self.parser in ('earley', 'lalr', 'cyk', None)

@@ -79,6 +94,18 @@ class LarkOptions(object):
if o:
raise ValueError("Unknown options: %s" % o.keys())

def __getattr__(self, name):
return self.options[name]
def __setattr__(self, name, value):
self.options[name] = value

def serialize(self):
return self.options

@classmethod
def deserialize(cls, data):
return cls(data)


class Profiler:
def __init__(self):
@@ -208,6 +235,28 @@ class Lark:

return self.parser_class(self.lexer_conf, parser_conf, options=self.options)

def serialize(self):
return {
'parser': self.parser.serialize(),
'rules': [r.serialize() for r in self.rules],
'options': self.options.serialize(),
}
@classmethod
def deserialize(cls, data):
from .grammar import Rule
inst = cls.__new__(cls)

rules = [Rule.deserialize(r) for r in data['rules']]
options = LarkOptions.deserialize(data['options'])

ptb = ParseTreeBuilder(rules, options.tree_class, options.propagate_positions, options.keep_all_tokens, options.parser!='lalr' and options.ambiguity=='explicit', options.maybe_placeholders)
callbacks = ptb.create_callback(None)

parser_class = get_frontend(options.parser, options.lexer)
inst.parser = parser_class.deserialize(data['parser'], callbacks)
return inst


@classmethod
def open(cls, grammar_filename, rel_to=None, **options):


+ 34
- 1
lark/lexer.py View File

@@ -68,6 +68,10 @@ class TerminalDef(object):
def serialize(self):
return [self.name, self.pattern, self.priority]

@classmethod
def deserialize(cls, data):
return cls(*data)



###{standalone
@@ -268,6 +272,14 @@ class Lexer:
set_parser_state = NotImplemented
lex = NotImplemented

@classmethod
def deserialize(cls, data):
class_ = {
'traditional': TraditionalLexer,
'contextual': ContextualLexer,
}[data['type']]
return class_.deserialize(data)

class TraditionalLexer(Lexer):
def __init__(self, terminals, ignore=(), user_callbacks={}):
assert all(isinstance(t, TerminalDef) for t in terminals), terminals
@@ -312,11 +324,22 @@ class TraditionalLexer(Lexer):

def serialize(self):
return {
'type': 'traditional',
'terminals': [t.serialize() for t in self.terminals],
'ignore_types': self.ignore_types,
'newline_types': self.newline_types,
}

@classmethod
def deserialize(cls, data):
inst = cls.__new__(cls)
inst.terminals = [TerminalDef.deserialize(t) for t in data['terminals']]
inst.mres = build_mres(inst.terminals)
inst.ignore_types = data['ignore_types']
inst.newline_types = data['newline_types']
inst.callback = {} # TODO implement
return inst


class ContextualLexer(Lexer):
def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}):
@@ -354,5 +377,15 @@ class ContextualLexer(Lexer):
l.state = self.parser_state

def serialize(self):
return {state: lexer.serialize() for state, lexer in self.lexers.items()}
return {
'type': 'contextual',
'root_lexer': self.root_lexer.serialize(),
'lexers': {state: lexer.serialize() for state, lexer in self.lexers.items()}
}

@classmethod
def deserialize(cls, data):
inst = cls.__new__(cls)
inst.lexers = {state:Lexer.deserialize(lexer) for state, lexer in data['lexers'].items()}
inst.root_lexer = TraditionalLexer.deserialize(data['root_lexer'])
return inst

+ 2
- 2
lark/parse_tree_builder.py View File

@@ -209,12 +209,12 @@ class ParseTreeBuilder:
keep_all_tokens = self.always_keep_all_tokens or (options.keep_all_tokens if options else False)
expand_single_child = options.expand1 if options else False

wrapper_chain = filter(None, [
wrapper_chain = list(filter(None, [
(expand_single_child and not rule.alias) and ExpandSingleChild,
maybe_create_child_filter(rule.expansion, keep_all_tokens, self.ambiguous, options.empty_indices if self.maybe_placeholders and options else None),
self.propagate_positions and PropagatePositions,
self.ambiguous and maybe_create_ambiguous_expander(self.tree_class, rule.expansion, keep_all_tokens),
])
]))

yield rule, wrapper_chain



+ 13
- 7
lark/parser_frontends.py View File

@@ -15,11 +15,13 @@ class WithLexer(object):
def init_traditional_lexer(self, lexer_conf):
self.lexer_conf = lexer_conf
self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks)
self.postlex = lexer_conf.postlex

def init_contextual_lexer(self, lexer_conf):
self.lexer_conf = lexer_conf
self.postlex = lexer_conf.postlex
states = {idx:list(t.keys()) for idx, t in self.parser._parse_table.states.items()}
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else ()
always_accept = self.postlex.always_accept if self.postlex else ()
self.lexer = ContextualLexer(lexer_conf.tokens, states,
ignore=lexer_conf.ignore,
always_accept=always_accept,
@@ -27,8 +29,8 @@ class WithLexer(object):

def lex(self, text):
stream = self.lexer.lex(text)
if self.lexer_conf.postlex:
return self.lexer_conf.postlex.process(stream)
if self.postlex:
return self.postlex.process(stream)
return stream

def parse(self, text):
@@ -38,15 +40,19 @@ class WithLexer(object):

def serialize(self):
return {
# 'class': type(self).__name__,
'type': type(self).__name__,
'parser': self.parser.serialize(),
'lexer': self.lexer.serialize(),
}
@classmethod
def deserialize(cls, data):
inst = cls.__new__(cls)
inst.parser = lalr_parser.Parser.deserialize(data['parser'])
def deserialize(cls, data, callbacks):
class_ = globals()[data['type']] # XXX unsafe
parser = lalr_parser.Parser.deserialize(data['parser'], callbacks)
assert parser
inst = class_.__new__(class_)
inst.parser = parser
inst.lexer = Lexer.deserialize(data['lexer'])
inst.postlex = None # TODO
return inst




+ 5
- 2
lark/parsers/lalr_parser.py View File

@@ -59,7 +59,7 @@ class Parser(object):
}
@classmethod
def deserialize(cls, data):
def deserialize(cls, data, callbacks):
tokens = data['tokens']
rules = {idx: Rule.deserialize(r) for idx, r in data['rules'].items()}
states = {
@@ -68,7 +68,10 @@ class Parser(object):
for state, actions in data['states'].items()
}
parse_table = IntParseTable(states, data['start_state'], data['end_state'])
print(parse_table)
inst = cls.__new__(cls)
inst.parser = _Parser(parse_table, callbacks)
inst.parse = inst.parser.parse
return inst





Loading…
Cancel
Save