| @@ -1,4 +1,6 @@ | |||
| class Symbol(object): | |||
| from .utils import Serialize | |||
| class Symbol(Serialize): | |||
| is_term = NotImplemented | |||
| def __init__(self, name): | |||
| @@ -19,16 +21,10 @@ class Symbol(object): | |||
| fullrepr = property(__repr__) | |||
| @classmethod | |||
| def deserialize(cls, data): | |||
| class_ = { | |||
| 'T': Terminal, | |||
| 'NT': NonTerminal, | |||
| }[data[0]] | |||
| return class_(*data[1:]) | |||
| class Terminal(Symbol): | |||
| __serialize_fields__ = 'name', 'filter_out' | |||
| is_term = True | |||
| def __init__(self, name, filter_out=False): | |||
| @@ -39,23 +35,25 @@ class Terminal(Symbol): | |||
| def fullrepr(self): | |||
| return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out) | |||
| def serialize(self): | |||
| return ['T', self.name, self.filter_out] | |||
| class NonTerminal(Symbol): | |||
| __serialize_fields__ = 'name', | |||
| is_term = False | |||
| def serialize(self): | |||
| return ['NT', self.name] | |||
| class Rule(object): | |||
| class Rule(Serialize): | |||
| """ | |||
| origin : a symbol | |||
| expansion : a list of symbols | |||
| order : index of this expansion amongst all rules of the same name | |||
| """ | |||
| __slots__ = ('origin', 'expansion', 'alias', 'options', 'order', '_hash') | |||
| __serialize_fields__ = 'origin', 'expansion', 'order', 'alias', 'options' | |||
| __serialize_namespace__ = lambda: (Terminal, NonTerminal, RuleOptions) | |||
| def __init__(self, origin, expansion, order=0, alias=None, options=None): | |||
| self.origin = origin | |||
| self.expansion = expansion | |||
| @@ -64,6 +62,8 @@ class Rule(object): | |||
| self.options = options | |||
| self._hash = hash((self.origin, tuple(self.expansion))) | |||
| def _deserialize(self): | |||
| self._hash = hash((self.origin, tuple(self.expansion))) | |||
| def __str__(self): | |||
| return '<%s : %s>' % (self.origin.name, ' '.join(x.name for x in self.expansion)) | |||
| @@ -79,22 +79,11 @@ class Rule(object): | |||
| return False | |||
| return self.origin == other.origin and self.expansion == other.expansion | |||
| def serialize(self): | |||
| return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.order, self.alias, self.options.serialize() if self.options else None] | |||
| @classmethod | |||
| def deserialize(cls, data): | |||
| origin, expansion, order, alias, options = data | |||
| return cls( | |||
| Symbol.deserialize(origin), | |||
| [Symbol.deserialize(s) for s in expansion], | |||
| order, | |||
| alias, | |||
| RuleOptions.deserialize(options) if options else None | |||
| ) | |||
| class RuleOptions: | |||
| class RuleOptions(Serialize): | |||
| __serialize_fields__ = 'keep_all_tokens', 'expand1', 'priority', 'empty_indices' | |||
| def __init__(self, keep_all_tokens=False, expand1=False, priority=None, empty_indices=()): | |||
| self.keep_all_tokens = keep_all_tokens | |||
| self.expand1 = expand1 | |||
| @@ -107,10 +96,3 @@ class RuleOptions: | |||
| self.expand1, | |||
| self.priority, | |||
| ) | |||
| def serialize(self): | |||
| return [self.keep_all_tokens, self.expand1, self.priority, list(self.empty_indices)] | |||
| @classmethod | |||
| def deserialize(cls, data): | |||
| return cls(*data) | |||
| @@ -5,7 +5,7 @@ import time | |||
| from collections import defaultdict | |||
| from io import open | |||
| from .utils import STRING_TYPE | |||
| from .utils import STRING_TYPE, Serialize | |||
| from .load_grammar import load_grammar | |||
| from .tree import Tree | |||
| from .common import LexerConf, ParserConf | |||
| @@ -13,9 +13,10 @@ from .common import LexerConf, ParserConf | |||
| from .lexer import Lexer, TraditionalLexer | |||
| from .parse_tree_builder import ParseTreeBuilder | |||
| from .parser_frontends import get_frontend | |||
| from .grammar import Rule | |||
| class LarkOptions(object): | |||
| class LarkOptions(Serialize): | |||
| """Specifies the options for Lark | |||
| """ | |||
| @@ -132,7 +133,7 @@ class Profiler: | |||
| return wrapper | |||
| class Lark: | |||
| class Lark(Serialize): | |||
| def __init__(self, grammar, **options): | |||
| """ | |||
| grammar : a string or file-object containing the grammar spec (using Lark's ebnf syntax) | |||
| @@ -223,6 +224,8 @@ class Lark: | |||
| if __init__.__doc__: | |||
| __init__.__doc__ += "\nOPTIONS:" + LarkOptions.OPTIONS_DOC | |||
| __serialize_fields__ = 'parser', 'rules', 'options' | |||
| def _build_lexer(self): | |||
| return TraditionalLexer(self.lexer_conf.tokens, ignore=self.lexer_conf.ignore, user_callbacks=self.lexer_conf.callbacks) | |||
| @@ -236,16 +239,8 @@ class Lark: | |||
| parser_conf = ParserConf(self.rules, self._callbacks, self.options.start) | |||
| return self.parser_class(self.lexer_conf, parser_conf, options=self.options) | |||
| def serialize(self): | |||
| return { | |||
| 'parser': self.parser.serialize(), | |||
| 'rules': [r.serialize() for r in self.rules], | |||
| 'options': self.options.serialize(), | |||
| } | |||
| @classmethod | |||
| def deserialize(cls, data): | |||
| from .grammar import Rule | |||
| inst = cls.__new__(cls) | |||
| inst.options = LarkOptions.deserialize(data['options']) | |||
| inst.rules = [Rule.deserialize(r) for r in data['rules']] | |||
| @@ -2,10 +2,12 @@ | |||
| import re | |||
| from .utils import Str, classify, get_regexp_width, Py36 | |||
| from .utils import Str, classify, get_regexp_width, Py36, Serialize | |||
| from .exceptions import UnexpectedCharacters, LexError | |||
| class Pattern(object): | |||
| class Pattern(Serialize): | |||
| __serialize_fields__ = 'value', 'flags' | |||
| def __init__(self, value, flags=()): | |||
| self.value = value | |||
| self.flags = frozenset(flags) | |||
| @@ -35,15 +37,6 @@ class Pattern(object): | |||
| value = ('(?%s)' % f) + value | |||
| return value | |||
| @classmethod | |||
| def deserialize(cls, data): | |||
| class_ = { | |||
| 's': PatternStr, | |||
| 're': PatternRE, | |||
| }[data[0]] | |||
| value, flags = data[1:] | |||
| return class_(value, frozenset(flags)) | |||
| class PatternStr(Pattern): | |||
| def to_regexp(self): | |||
| @@ -54,9 +47,6 @@ class PatternStr(Pattern): | |||
| return len(self.value) | |||
| max_width = min_width | |||
| def serialize(self): | |||
| return ['s', self.value, list(self.flags)] | |||
| class PatternRE(Pattern): | |||
| def to_regexp(self): | |||
| return self._get_flags(self.value) | |||
| @@ -68,10 +58,11 @@ class PatternRE(Pattern): | |||
| def max_width(self): | |||
| return get_regexp_width(self.to_regexp())[1] | |||
| def serialize(self): | |||
| return ['re', self.value, list(self.flags)] | |||
| class TerminalDef(object): | |||
| class TerminalDef(Serialize): | |||
| __serialize_fields__ = 'name', 'pattern', 'priority' | |||
| __serialize_namespace__ = lambda: (PatternStr, PatternRE) | |||
| def __init__(self, name, pattern, priority=1): | |||
| assert isinstance(pattern, Pattern), pattern | |||
| self.name = name | |||
| @@ -81,14 +72,6 @@ class TerminalDef(object): | |||
| def __repr__(self): | |||
| return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern) | |||
| def serialize(self): | |||
| return [self.name, self.pattern.serialize(), self.priority] | |||
| @classmethod | |||
| def deserialize(cls, data): | |||
| name, pattern, priority = data | |||
| return cls(name, Pattern.deserialize(pattern), priority) | |||
| ###{standalone | |||
| @@ -278,7 +261,7 @@ def _regexp_has_newline(r): | |||
| """ | |||
| return '\n' in r or '\\n' in r or '[^' in r or ('(?s' in r and '.' in r) | |||
| class Lexer: | |||
| class Lexer(Serialize): | |||
| """Lexer interface | |||
| Method Signatures: | |||
| @@ -289,15 +272,16 @@ class Lexer: | |||
| set_parser_state = NotImplemented | |||
| lex = NotImplemented | |||
| @classmethod | |||
| def deserialize(cls, data): | |||
| class_ = { | |||
| 'traditional': TraditionalLexer, | |||
| 'contextual': ContextualLexer, | |||
| }[data['type']] | |||
| return class_.deserialize(data) | |||
| class TraditionalLexer(Lexer): | |||
| __serialize_fields__ = 'terminals', 'ignore_types', 'newline_types' | |||
| __serialize_namespace__ = lambda: (TerminalDef,) | |||
| def _deserialize(self): | |||
| self.mres = build_mres(self.terminals) | |||
| self.callback = {} # TODO implement | |||
| def __init__(self, terminals, ignore=(), user_callbacks={}): | |||
| assert all(isinstance(t, TerminalDef) for t in terminals), terminals | |||
| @@ -339,26 +323,13 @@ class TraditionalLexer(Lexer): | |||
| def lex(self, stream): | |||
| return _Lex(self).lex(stream, self.newline_types, self.ignore_types) | |||
| def serialize(self): | |||
| return { | |||
| 'type': 'traditional', | |||
| 'terminals': [t.serialize() for t in self.terminals], | |||
| 'ignore_types': self.ignore_types, | |||
| 'newline_types': self.newline_types, | |||
| } | |||
| @classmethod | |||
| def deserialize(cls, data): | |||
| inst = cls.__new__(cls) | |||
| inst.terminals = [TerminalDef.deserialize(t) for t in data['terminals']] | |||
| inst.mres = build_mres(inst.terminals) | |||
| inst.ignore_types = data['ignore_types'] | |||
| inst.newline_types = data['newline_types'] | |||
| inst.callback = {} # TODO implement | |||
| return inst | |||
| class ContextualLexer(Lexer): | |||
| __serialize_fields__ = 'root_lexer', 'lexers' | |||
| __serialize_namespace__ = lambda: (TraditionalLexer,) | |||
| def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}): | |||
| tokens_by_name = {} | |||
| for t in terminals: | |||
| @@ -392,17 +363,3 @@ class ContextualLexer(Lexer): | |||
| yield x | |||
| l.lexer = self.lexers[self.parser_state] | |||
| l.state = self.parser_state | |||
| def serialize(self): | |||
| return { | |||
| 'type': 'contextual', | |||
| 'root_lexer': self.root_lexer.serialize(), | |||
| 'lexers': {state: lexer.serialize() for state, lexer in self.lexers.items()} | |||
| } | |||
| @classmethod | |||
| def deserialize(cls, data): | |||
| inst = cls.__new__(cls) | |||
| inst.lexers = {state:Lexer.deserialize(lexer) for state, lexer in data['lexers'].items()} | |||
| inst.root_lexer = TraditionalLexer.deserialize(data['root_lexer']) | |||
| return inst | |||
| @@ -1,17 +1,31 @@ | |||
| import re | |||
| from functools import partial | |||
| from .utils import get_regexp_width | |||
| from .utils import get_regexp_width, Serialize | |||
| from .parsers.grammar_analysis import GrammarAnalyzer | |||
| from .lexer import TraditionalLexer, ContextualLexer, Lexer, Token | |||
| from .parsers import lalr_parser, earley, xearley, cyk | |||
| from .grammar import Rule | |||
| from .tree import Tree | |||
| class WithLexer(object): | |||
| class WithLexer(Serialize): | |||
| lexer = None | |||
| parser = None | |||
| lexer_conf = None | |||
| __serialize_fields__ = 'parser', 'lexer' | |||
| __serialize_namespace__ = lambda: (Rule, ContextualLexer, LALR_ContextualLexer) | |||
| @classmethod | |||
| def deserialize(cls, data, callbacks): | |||
| inst = super(WithLexer, cls).deserialize(data) | |||
| inst.postlex = None # TODO | |||
| inst.parser = lalr_parser.Parser.deserialize(inst.parser, callbacks) | |||
| return inst | |||
| def _serialize(self, data): | |||
| data['parser'] = data['parser'].serialize() | |||
| def init_traditional_lexer(self, lexer_conf): | |||
| self.lexer_conf = lexer_conf | |||
| self.lexer = TraditionalLexer(lexer_conf.tokens, ignore=lexer_conf.ignore, user_callbacks=lexer_conf.callbacks) | |||
| @@ -36,26 +50,6 @@ class WithLexer(object): | |||
| sps = self.lexer.set_parser_state | |||
| return self.parser.parse(token_stream, *[sps] if sps is not NotImplemented else []) | |||
| def serialize(self): | |||
| return { | |||
| 'type': type(self).__name__, | |||
| 'parser': self.parser.serialize(), | |||
| 'lexer': self.lexer.serialize(), | |||
| } | |||
| @classmethod | |||
| def deserialize(cls, data, callbacks): | |||
| class_ = { | |||
| 'LALR_TraditionalLexer': LALR_TraditionalLexer, | |||
| 'LALR_ContextualLexer': LALR_ContextualLexer, | |||
| }[data['type']] | |||
| parser = lalr_parser.Parser.deserialize(data['parser'], callbacks) | |||
| assert parser | |||
| inst = class_.__new__(class_) | |||
| inst.parser = parser | |||
| inst.lexer = Lexer.deserialize(data['lexer']) | |||
| inst.postlex = None # TODO | |||
| return inst | |||
| class LALR_TraditionalLexer(WithLexer): | |||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||
| @@ -9,7 +9,7 @@ For now, shift/reduce conflicts are automatically resolved as shifts. | |||
| import logging | |||
| from collections import defaultdict | |||
| from ..utils import classify, classify_bool, bfs, fzset | |||
| from ..utils import classify, classify_bool, bfs, fzset, Serialize, Enumerator | |||
| from ..exceptions import GrammarError | |||
| from .grammar_analysis import GrammarAnalyzer, Terminal | |||
| @@ -31,6 +31,36 @@ class ParseTable: | |||
| self.start_state = start_state | |||
| self.end_state = end_state | |||
| def serialize(self): | |||
| tokens = Enumerator() | |||
| rules = Enumerator() | |||
| states = { | |||
| state: {tokens.get(token): ((1, rules.get(arg)) if action is Reduce else (0, arg)) | |||
| for token, (action, arg) in actions.items()} | |||
| for state, actions in self.states.items() | |||
| } | |||
| return { | |||
| 'tokens': tokens.reversed(), | |||
| 'rules': {idx: r.serialize() for idx, r in rules.reversed().items()}, | |||
| 'states': states, | |||
| 'start_state': self.start_state, | |||
| 'end_state': self.end_state, | |||
| } | |||
| @classmethod | |||
| def deserialize(cls, data): | |||
| tokens = data['tokens'] | |||
| rules = data['rules'] | |||
| states = { | |||
| state: {tokens[token]: ((Reduce, rules[arg]) if action==1 else (Shift, arg)) | |||
| for token, (action, arg) in actions.items()} | |||
| for state, actions in data['states'].items() | |||
| } | |||
| return cls(states, data['start_state'], data['end_state']) | |||
| class IntParseTable(ParseTable): | |||
| @classmethod | |||
| @@ -5,12 +5,12 @@ | |||
| from ..exceptions import UnexpectedToken | |||
| from ..lexer import Token | |||
| from ..grammar import Rule | |||
| from ..utils import Enumerator | |||
| from ..utils import Enumerator, Serialize | |||
| from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable | |||
| class Parser(object): | |||
| class Parser: | |||
| def __init__(self, parser_conf, debug=False): | |||
| assert all(r.options is None or r.options.priority is None | |||
| for r in parser_conf.rules), "LALR doesn't yet support prioritization" | |||
| @@ -21,42 +21,18 @@ class Parser(object): | |||
| self._parse_table = analysis.parse_table | |||
| self.parser_conf = parser_conf | |||
| self.parser = _Parser(analysis.parse_table, callbacks) | |||
| self.parse = self.parser.parse | |||
| def serialize(self): | |||
| tokens = Enumerator() | |||
| rules = Enumerator() | |||
| states = { | |||
| state: {tokens.get(token): ((1, rules.get(arg)) if action is Reduce else (0, arg)) | |||
| for token, (action, arg) in actions.items()} | |||
| for state, actions in self._parse_table.states.items() | |||
| } | |||
| return { | |||
| 'tokens': tokens.reversed(), | |||
| 'rules': {idx: r.serialize() for idx, r in rules.reversed().items()}, | |||
| 'states': states, | |||
| 'start_state': self._parse_table.start_state, | |||
| 'end_state': self._parse_table.end_state, | |||
| } | |||
| @classmethod | |||
| def deserialize(cls, data, callbacks): | |||
| tokens = data['tokens'] | |||
| rules = {idx: Rule.deserialize(r) for idx, r in data['rules'].items()} | |||
| states = { | |||
| state: {tokens[token]: ((Reduce, rules[arg]) if action==1 else (Shift, arg)) | |||
| for token, (action, arg) in actions.items()} | |||
| for state, actions in data['states'].items() | |||
| } | |||
| parse_table = IntParseTable(states, data['start_state'], data['end_state']) | |||
| inst = cls.__new__(cls) | |||
| inst.parser = _Parser(parse_table, callbacks) | |||
| inst.parse = inst.parser.parse | |||
| inst.parser = _Parser(IntParseTable.deserialize(data), callbacks) | |||
| return inst | |||
| def serialize(self): | |||
| return self._parse_table.serialize() | |||
| def parse(self, *args): | |||
| return self.parser.parse(*args) | |||
| ###{standalone | |||
| @@ -44,6 +44,54 @@ def bfs(initial, expand): | |||
| def _serialize(value): | |||
| if isinstance(value, Serialize): | |||
| return value.serialize() | |||
| elif isinstance(value, list): | |||
| return [_serialize(elem) for elem in value] | |||
| elif isinstance(value, frozenset): | |||
| return list(value) # TODO reversible? | |||
| elif isinstance(value, dict): | |||
| return {key:_serialize(elem) for key, elem in value.items()} | |||
| return value | |||
| def _deserialize(data, namespace): | |||
| if isinstance(data, dict): | |||
| if '__type__' in data: # Object | |||
| class_ = namespace[data['__type__']] | |||
| return class_.deserialize(data) | |||
| return {key:_deserialize(value, namespace) for key, value in data.items()} | |||
| elif isinstance(data, list): | |||
| return [_deserialize(value, namespace) for value in data] | |||
| return data | |||
| class Serialize(object): | |||
| def serialize(self): | |||
| fields = getattr(self, '__serialize_fields__') | |||
| res = {f: _serialize(getattr(self, f)) for f in fields} | |||
| res['__type__'] = type(self).__name__ | |||
| postprocess = getattr(self, '_serialize', None) | |||
| if postprocess: | |||
| postprocess(res) | |||
| return res | |||
| @classmethod | |||
| def deserialize(cls, data): | |||
| namespace = getattr(cls, '__serialize_namespace__', dict) | |||
| namespace = {c.__name__:c for c in namespace()} | |||
| fields = getattr(cls, '__serialize_fields__') | |||
| inst = cls.__new__(cls) | |||
| for f in fields: | |||
| setattr(inst, f, _deserialize(data[f], namespace)) | |||
| postprocess = getattr(inst, '_deserialize', None) | |||
| if postprocess: | |||
| postprocess() | |||
| return inst | |||
| ###{standalone | |||
| try: | |||