Переглянути джерело

Post-refactor cleanup

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.2
Erez Shinan 6 роки тому
джерело
коміт
39e58cb8fd
5 змінених файлів з 45 додано та 97 видалено
  1. +21
    -35
      lark/parser_frontends.py
  2. +10
    -43
      lark/parsers/earley.py
  3. +4
    -3
      lark/parsers/grammar_analysis.py
  4. +1
    -1
      lark/parsers/lalr_parser.py
  5. +9
    -15
      lark/parsers/xearley.py

+ 21
- 35
lark/parser_frontends.py Переглянути файл

@@ -7,10 +7,16 @@ from .common import is_terminal, GrammarError, ParserConf
from .parsers import lalr_parser, earley, xearley, resolve_ambig

class WithLexer:
def __init__(self, lexer_conf):
def init_traditional_lexer(self, lexer_conf):
self.lexer_conf = lexer_conf
self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore)

def init_contextual_lexer(self, lexer_conf, parser_conf):
self.lexer_conf = lexer_conf
d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()}
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else ()
self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept)

def lex(self, text):
stream = self.lexer.lex(text)
if self.lexer_conf.postlex:
@@ -21,32 +27,22 @@ class WithLexer:

class LALR(WithLexer):
def __init__(self, lexer_conf, parser_conf, options=None):
WithLexer.__init__(self, lexer_conf)

self.parser_conf = parser_conf
self.parser = lalr_parser.Parser(parser_conf)
self.init_traditional_lexer(lexer_conf)

def parse(self, text):
tokens = self.lex(text)
return self.parser.parse(tokens)
token_stream = self.lex(text)
return self.parser.parse(token_stream)


class LALR_ContextualLexer:
class LALR_ContextualLexer(WithLexer):
def __init__(self, lexer_conf, parser_conf, options=None):
self.lexer_conf = lexer_conf
self.parser_conf = parser_conf

self.parser = lalr_parser.Parser(parser_conf)

d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()}
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else ()
self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept)
self.init_contextual_lexer(lexer_conf, parser_conf)

def parse(self, text):
tokens = self.lexer.lex(text)
if self.lexer_conf.postlex:
tokens = self.lexer_conf.postlex.process(tokens)
return self.parser.parse(tokens, self.lexer.set_parser_state)
token_stream = self.lex(text)
return self.parser.parse(token_stream, self.lexer.set_parser_state)

def get_ambiguity_resolver(options):
if not options or options.ambiguity == 'resolve':
@@ -58,24 +54,19 @@ def get_ambiguity_resolver(options):
raise ValueError(options)

def tokenize_text(text):
new_text = []
line = 1
col_start_pos = 0
for i, ch in enumerate(text):
if '\n' in ch:
line += ch.count('\n')
col_start_pos = i + ch.rindex('\n')
new_text.append(Token('CHAR', ch, line=line, column=i - col_start_pos))
return new_text
yield Token('CHAR', ch, line=line, column=i - col_start_pos)

class Earley_NoLex:
def __init__(self, lexer_conf, parser_conf, options=None):
self._prepare_match(lexer_conf)

self.parser = earley.Parser(parser_conf.rules,
parser_conf.start,
parser_conf.callback,
self.match,
self.parser = earley.Parser(parser_conf, self.match,
resolve_ambiguity=get_ambiguity_resolver(options))


@@ -92,17 +83,14 @@ class Earley_NoLex:
self.regexps[t.name] = re.compile(regexp)

def parse(self, text):
new_text = tokenize_text(text)
return self.parser.parse(new_text)
token_stream = tokenize_text(text)
return self.parser.parse(token_stream)

class Earley(WithLexer):
def __init__(self, lexer_conf, parser_conf, options=None):
WithLexer.__init__(self, lexer_conf)
self.init_traditional_lexer(lexer_conf)

self.parser = earley.Parser(parser_conf.rules,
parser_conf.start,
parser_conf.callback,
self.match,
self.parser = earley.Parser(parser_conf, self.match,
resolve_ambiguity=get_ambiguity_resolver(options))

def match(self, term, token):
@@ -119,9 +107,7 @@ class XEarley:

self._prepare_match(lexer_conf)

self.parser = xearley.Parser(parser_conf.rules,
parser_conf.start,
parser_conf.callback,
self.parser = xearley.Parser(parser_conf,
self.match,
resolve_ambiguity=get_ambiguity_resolver(options),
ignore=lexer_conf.ignore,


+ 10
- 43
lark/parsers/earley.py Переглянути файл

@@ -18,9 +18,6 @@ from ..tree import Tree, Visitor_NoRecurse, Transformer_NoRecurse
from .grammar_analysis import GrammarAnalyzer


class EndToken:
type = '$END'

class Derivation(Tree):
_hash = None

@@ -36,8 +33,6 @@ class Derivation(Tree):
self._hash = Tree.__hash__(self)
return self._hash

END_TOKEN = EndToken()

class Item(object):
"An Earley Item, the atom of the algorithm."

@@ -60,11 +55,8 @@ class Item(object):
new_tree = Derivation(self.rule, self.tree.children + [tree])
return self.__class__(self.rule, self.ptr+1, self.start, new_tree)

def similar(self, other):
return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule

def __eq__(self, other):
return self.similar(other) #and (self.tree == other.tree)
return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule

def __hash__(self):
return hash((self.rule, self.ptr, id(self.start))) # Always runs Derivation.__hash__
@@ -152,27 +144,24 @@ class Column:
__nonzero__ = __bool__ # Py2 backwards-compatibility

class Parser:
def __init__(self, rules, start_symbol, callback, term_matcher, resolve_ambiguity=None):
self.analysis = GrammarAnalyzer(rules, start_symbol)
self.start_symbol = start_symbol
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None):
self.analysis = GrammarAnalyzer(parser_conf)
self.parser_conf = parser_conf
self.resolve_ambiguity = resolve_ambiguity

self.FIRST = self.analysis.FIRST
self.postprocess = {}
self.predictions = {}
self.FIRST = {}
for rule in self.analysis.rules:
a = rule.alias
self.postprocess[rule] = a if callable(a) else (a and getattr(callback, a))
for rule in parser_conf.rules:
self.postprocess[rule] = getattr(parser_conf.callback, rule.alias)
self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)]

self.FIRST[rule.origin] = self.analysis.FIRST[rule.origin]

self.term_matcher = term_matcher


def parse(self, stream, start_symbol=None):
# Define parser functions
start_symbol = start_symbol or self.start_symbol
start_symbol = start_symbol or self.parser_conf.start

_Item = Item
match = self.term_matcher
@@ -198,9 +187,8 @@ class Parser:

for item in to_reduce:
new_items = list(complete(item))
for new_item in new_items:
if new_item.similar(item):
raise ParseError('Infinite recursion detected! (rule %s)' % new_item.rule)
if item in new_items:
raise ParseError('Infinite recursion detected! (rule %s)' % item.rule)
column.add(new_items)

def scan(i, token, column):
@@ -252,24 +240,3 @@ class ApplyCallbacks(Transformer_NoRecurse):
return callback(children)
else:
return Tree(rule.origin, children)

# RULES = [
# ('a', ['d']),
# ('d', ['b']),
# ('b', ['C']),
# ('b', ['b', 'C']),
# ('b', ['C', 'b']),
# ]
# p = Parser(RULES, 'a')
# for x in p.parse('CC'):
# print x.pretty()

#---------------
# RULES = [
# ('s', ['a', 'a']),
# ('a', ['b', 'b']),
# ('b', ['C'], lambda (x,): x),
# ('b', ['b', 'C']),
# ]
# p = Parser(RULES, 's', {})
# print p.parse('CCCCC').pretty()

+ 4
- 3
lark/parsers/grammar_analysis.py Переглянути файл

@@ -94,13 +94,14 @@ def calculate_sets(rules):


class GrammarAnalyzer(object):
def __init__(self, rules, start_symbol, debug=False):
def __init__(self, parser_conf, debug=False):
rules = parser_conf.rules
assert len(rules) == len(set(rules))

self.start_symbol = start_symbol
self.start_symbol = parser_conf.start
self.debug = debug

root_rule = Rule('$root', [start_symbol, '$END'])
root_rule = Rule('$root', [self.start_symbol, '$END'])

self.rules_by_origin = {r.origin: [] for r in rules}
for r in rules:


+ 1
- 1
lark/parsers/lalr_parser.py Переглянути файл

@@ -15,7 +15,7 @@ class Parser:
def __init__(self, parser_conf):
assert all(r.options is None or r.options.priority is None
for r in parser_conf.rules), "LALR doesn't yet support prioritization"
self.analysis = analysis = LALR_Analyzer(parser_conf.rules, parser_conf.start)
self.analysis = analysis = LALR_Analyzer(parser_conf)
analysis.compute_lookahead()
callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None)
for rule in analysis.rules}


+ 9
- 15
lark/parsers/xearley.py Переглянути файл

@@ -28,31 +28,26 @@ from .grammar_analysis import GrammarAnalyzer
from .earley import ApplyCallbacks, Item, Column

class Parser:
def __init__(self, rules, start_symbol, callback, term_matcher, resolve_ambiguity=None, ignore=(), predict_all=False):
self.analysis = GrammarAnalyzer(rules, start_symbol)
self.start_symbol = start_symbol
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None, ignore=(), predict_all=False):
self.analysis = GrammarAnalyzer(parser_conf)
self.parser_conf = parser_conf
self.resolve_ambiguity = resolve_ambiguity
self.ignore = list(ignore)
self.predict_all = predict_all

self.FIRST = self.analysis.FIRST
self.postprocess = {}
self.predictions = {}
self.FIRST = {}

for rule in self.analysis.rules:
a = rule.alias
self.postprocess[rule] = a if callable(a) else (a and getattr(callback, a))
for rule in parser_conf.rules:
self.postprocess[rule] = getattr(parser_conf.callback, rule.alias)
self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)]

self.FIRST[rule.origin] = self.analysis.FIRST[rule.origin]

self.term_matcher = term_matcher


def parse(self, stream, start_symbol=None):
# Define parser functions
start_symbol = start_symbol or self.start_symbol
start_symbol = start_symbol or self.parser_conf.start
delayed_matches = defaultdict(list)
match = self.term_matcher

@@ -79,9 +74,8 @@ class Parser:
column.add( predict(nonterm, column) )
for item in to_reduce:
new_items = list(complete(item))
for new_item in new_items:
if new_item.similar(item):
raise ParseError('Infinite recursion detected! (rule %s)' % new_item.rule)
if item in new_items:
raise ParseError('Infinite recursion detected! (rule %s)' % item.rule)
column.add(new_items)

def scan(i, token, column):


Завантаження…
Відмінити
Зберегти