Przeglądaj źródła

Serialized lark is now json compatible

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.7.1
Erez Shinan 5 lat temu
rodzic
commit
066303fdab
5 zmienionych plików z 46 dodań i 23 usunięć
  1. +3
    -2
      lark/lark.py
  2. +19
    -2
      lark/lexer.py
  3. +4
    -1
      lark/parser_frontends.py
  4. +1
    -18
      lark/parsers/lalr_parser.py
  5. +19
    -0
      lark/utils.py

+ 3
- 2
lark/lark.py Wyświetl plik

@@ -54,7 +54,7 @@ class LarkOptions(object):
_defaults = {
'debug': False,
'keep_all_tokens': False,
'tree_class': Tree,
'tree_class': None,
'cache_grammar': False,
'postlex': None,
'parser': 'earley',
@@ -97,6 +97,7 @@ class LarkOptions(object):
def __getattr__(self, name):
return self.options[name]
def __setattr__(self, name, value):
assert name in self.options
self.options[name] = value

def serialize(self):
@@ -227,7 +228,7 @@ class Lark:

def _prepare_callbacks(self):
self.parser_class = get_frontend(self.options.parser, self.options.lexer)
self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders)
self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders)
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)

def _build_parser(self):


+ 19
- 2
lark/lexer.py Wyświetl plik

@@ -35,6 +35,16 @@ class Pattern(object):
value = ('(?%s)' % f) + value
return value

@classmethod
def deserialize(cls, data):
class_ = {
's': PatternStr,
're': PatternRE,
}[data[0]]
value, flags = data[1:]
return class_(value, frozenset(flags))


class PatternStr(Pattern):
def to_regexp(self):
return self._get_flags(re.escape(self.value))
@@ -44,6 +54,9 @@ class PatternStr(Pattern):
return len(self.value)
max_width = min_width

def serialize(self):
return ['s', self.value, list(self.flags)]

class PatternRE(Pattern):
def to_regexp(self):
return self._get_flags(self.value)
@@ -55,6 +68,9 @@ class PatternRE(Pattern):
def max_width(self):
return get_regexp_width(self.to_regexp())[1]

def serialize(self):
return ['re', self.value, list(self.flags)]

class TerminalDef(object):
def __init__(self, name, pattern, priority=1):
assert isinstance(pattern, Pattern), pattern
@@ -66,11 +82,12 @@ class TerminalDef(object):
return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)

def serialize(self):
return [self.name, self.pattern, self.priority]
return [self.name, self.pattern.serialize(), self.priority]

@classmethod
def deserialize(cls, data):
return cls(*data)
name, pattern, priority = data
return cls(name, Pattern.deserialize(pattern), priority)





+ 4
- 1
lark/parser_frontends.py Wyświetl plik

@@ -46,7 +46,10 @@ class WithLexer(object):
}
@classmethod
def deserialize(cls, data, callbacks):
class_ = globals()[data['type']] # XXX unsafe
class_ = {
'LALR_TraditionalLexer': LALR_TraditionalLexer,
'LALR_ContextualLexer': LALR_ContextualLexer,
}[data['type']] # XXX unsafe
parser = lalr_parser.Parser.deserialize(data['parser'], callbacks)
assert parser
inst = class_.__new__(class_)


+ 1
- 18
lark/parsers/lalr_parser.py Wyświetl plik

@@ -5,28 +5,11 @@
from ..exceptions import UnexpectedToken
from ..lexer import Token
from ..grammar import Rule
from ..utils import Enumerator

from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable


class Enumerator:
def __init__(self):
self.enums = {}

def get(self, item):
if item not in self.enums:
self.enums[item] = len(self.enums)
return self.enums[item]

def __len__(self):
return len(self.enums)

def reversed(self):
r = {v: k for k, v in self.enums.items()}
assert len(r) == len(self.enums)
return r


class Parser(object):
def __init__(self, parser_conf, debug=False):
assert all(r.options is None or r.options.priority is None


+ 19
- 0
lark/utils.py Wyświetl plik

@@ -128,3 +128,22 @@ def get_regexp_width(regexp):
return sre_parse.parse(regexp).getwidth()
except sre_constants.error:
raise ValueError(regexp)


class Enumerator:
def __init__(self):
self.enums = {}

def get(self, item):
if item not in self.enums:
self.enums[item] = len(self.enums)
return self.enums[item]

def __len__(self):
return len(self.enums)

def reversed(self):
r = {v: k for k, v in self.enums.items()}
assert len(r) == len(self.enums)
return r


Ładowanie…
Anuluj
Zapisz