Browse Source

Mid work

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.7.1
Erez Shinan 6 years ago
parent
commit
120d5b9ffa
5 changed files with 96 additions and 3 deletions
  1. +14
    -0
      lark/grammar.py
  2. +1
    -0
      lark/lark.py
  3. +12
    -0
      lark/lexer.py
  4. +15
    -1
      lark/parser_frontends.py
  5. +54
    -2
      lark/parsers/lalr_parser.py

+ 14
- 0
lark/grammar.py View File

@@ -30,10 +30,16 @@ class Terminal(Symbol):
def fullrepr(self):
return '%s(%r, %r)' % (type(self).__name__, self.name, self.filter_out)

def serialize(self):
return ['T', self.name, self.filter_out]


class NonTerminal(Symbol):
is_term = False

def serialize(self):
return ['NT', self.name]

class Rule(object):
"""
origin : a symbol
@@ -64,6 +70,11 @@ class Rule(object):
return False
return self.origin == other.origin and self.expansion == other.expansion

def serialize(self):
return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.alias, self.options.serialize() if self.options else None]
# def deserialize(self):
# return [self.origin.serialize(), list(s.serialize() for s in self.expansion), self.alias, self.options.serialize() if self.options else None]


class RuleOptions:
def __init__(self, keep_all_tokens=False, expand1=False, priority=None):
@@ -78,3 +89,6 @@ class RuleOptions:
self.expand1,
self.priority,
)

def serialize(self):
return [self.keep_all_tokens, self.expand1, self.priority, list(self.empty_indices)]

+ 1
- 0
lark/lark.py View File

@@ -208,6 +208,7 @@ class Lark:

return self.parser_class(self.lexer_conf, parser_conf, options=self.options)


@classmethod
def open(cls, grammar_filename, rel_to=None, **options):
"""Create an instance of Lark with the grammar given by its filename


+ 12
- 0
lark/lexer.py View File

@@ -65,6 +65,9 @@ class TerminalDef(object):
def __repr__(self):
return '%s(%r, %r)' % (type(self).__name__, self.name, self.pattern)

def serialize(self):
return [self.name, self.pattern, self.priority]



###{standalone
@@ -307,6 +310,13 @@ class TraditionalLexer(Lexer):
def lex(self, stream):
return _Lex(self).lex(stream, self.newline_types, self.ignore_types)

def serialize(self):
return {
'terminals': [t.serialize() for t in self.terminals],
'ignore_types': self.ignore_types,
'newline_types': self.newline_types,
}


class ContextualLexer(Lexer):
def __init__(self, terminals, states, ignore=(), always_accept=(), user_callbacks={}):
@@ -343,4 +353,6 @@ class ContextualLexer(Lexer):
l.lexer = self.lexers[self.parser_state]
l.state = self.parser_state

def serialize(self):
return {state: lexer.serialize() for state, lexer in self.lexers.items()}


+ 15
- 1
lark/parser_frontends.py View File

@@ -7,7 +7,7 @@ from .lexer import TraditionalLexer, ContextualLexer, Lexer, Token
from .parsers import lalr_parser, earley, xearley, cyk
from .tree import Tree

class WithLexer:
class WithLexer(object):
lexer = None
parser = None
lexer_conf = None
@@ -36,6 +36,20 @@ class WithLexer:
sps = self.lexer.set_parser_state
return self.parser.parse(token_stream, *[sps] if sps is not NotImplemented else [])

def serialize(self):
return {
# 'class': type(self).__name__,
'parser': self.parser.serialize(),
'lexer': self.lexer.serialize(),
}
@classmethod
def deserialize(cls, data):
inst = cls.__new__(cls)
inst.parser = lalr_parser.Parser.deserialize(data['parser'])
inst.lexer = Lexer.deserialize(data['lexer'])
return inst


class LALR_TraditionalLexer(WithLexer):
def __init__(self, lexer_conf, parser_conf, options=None):
debug = options.debug if options else False


+ 54
- 2
lark/parsers/lalr_parser.py View File

@@ -4,10 +4,30 @@
# Email : erezshin@gmail.com
from ..exceptions import UnexpectedToken
from ..lexer import Token
from ..grammar import Rule

from .lalr_analysis import LALR_Analyzer, Shift
from .lalr_analysis import LALR_Analyzer, Shift, Reduce, IntParseTable

class Parser:

class Enumerator:
def __init__(self):
self.enums = {}

def get(self, item):
if item not in self.enums:
self.enums[item] = len(self.enums)
return self.enums[item]

def __len__(self):
return len(self.enums)

def reversed(self):
r = {v: k for k, v in self.enums.items()}
assert len(r) == len(self.enums)
return r


class Parser(object):
def __init__(self, parser_conf, debug=False):
assert all(r.options is None or r.options.priority is None
for r in parser_conf.rules), "LALR doesn't yet support prioritization"
@@ -20,6 +40,38 @@ class Parser:
self.parser = _Parser(analysis.parse_table, callbacks)
self.parse = self.parser.parse

def serialize(self):
tokens = Enumerator()
rules = Enumerator()

states = {
state: {tokens.get(token): ((1, rules.get(arg)) if action is Reduce else (0, arg))
for token, (action, arg) in actions.items()}
for state, actions in self._parse_table.states.items()
}

return {
'tokens': tokens.reversed(),
'rules': {idx: r.serialize() for idx, r in rules.reversed().items()},
'states': states,
'start_state': self._parse_table.start_state,
'end_state': self._parse_table.end_state,
}
@classmethod
def deserialize(cls, data):
tokens = data['tokens']
rules = {idx: Rule.deserialize(r) for idx, r in data['rules'].items()}
states = {
state: {tokens[token]: ((Reduce, rules[arg]) if action==1 else (Shift, arg))
for token, (action, arg) in actions.items()}
for state, actions in data['states'].items()
}
parse_table = IntParseTable(states, data['start_state'], data['end_state'])
print(parse_table)



###{standalone

class _Parser:


Loading…
Cancel
Save