Ver a proveniência

Idiomatic function names and a few other style fixes

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.3
Erez Shinan há 6 anos
ascendente
cometimento
648099d7b4
1 ficheiros alterados com 33 adições e 38 eliminações
  1. +33
    -38
      lark/parsers/cyk.py

+ 33
- 38
lark/parsers/cyk.py Ver ficheiro

@@ -78,11 +78,10 @@ class Grammar(object):
"""Context-free grammar.""" """Context-free grammar."""


def __init__(self, rules): def __init__(self, rules):
super(Grammar, self).__init__()
self.rules = rules
self.rules = frozenset(rules)


def __eq__(self, other): def __eq__(self, other):
return set(self.rules) == set(other.rules)
return self.rules == other.rules


def __str__(self): def __str__(self):
return '\n' + '\n'.join(sorted(repr(x) for x in self.rules)) + '\n' return '\n' + '\n'.join(sorted(repr(x) for x in self.rules)) + '\n'
@@ -111,11 +110,11 @@ class Parser(object):
def __init__(self, rules, start): def __init__(self, rules, start):
super(Parser, self).__init__() super(Parser, self).__init__()
self.orig_rules = {rule.alias: rule for rule in rules} self.orig_rules = {rule.alias: rule for rule in rules}
rules = [self._ToRule(rule) for rule in rules]
self.grammar = ToCnf(Grammar(rules))
rules = [self._to_rule(rule) for rule in rules]
self.grammar = to_cnf(Grammar(rules))
self.start = NT(start) self.start = NT(start)


def _ToRule(self, lark_rule):
def _to_rule(self, lark_rule):
"""Converts a lark rule, (lhs, rhs, callback, options), to a Rule.""" """Converts a lark rule, (lhs, rhs, callback, options), to a Rule."""
return Rule( return Rule(
NT(lark_rule.origin), [ NT(lark_rule.origin), [
@@ -129,26 +128,26 @@ class Parser(object):
if all(r.lhs != self.start for r in table[(0, len(tokenized) - 1)]): if all(r.lhs != self.start for r in table[(0, len(tokenized) - 1)]):
raise ParseError('Parsing failed.') raise ParseError('Parsing failed.')
parse = trees[(0, len(tokenized) - 1)][NT(self.start)] parse = trees[(0, len(tokenized) - 1)][NT(self.start)]
return self._ToTree(RevertCnf(parse))
return self._to_tree(revert_cnf(parse))


def _ToTree(self, rule_node):
def _to_tree(self, rule_node):
"""Converts a RuleNode parse tree to a lark Tree.""" """Converts a RuleNode parse tree to a lark Tree."""
orig_rule = self.orig_rules[rule_node.rule.alias] orig_rule = self.orig_rules[rule_node.rule.alias]
children = [] children = []
for i, child in enumerate(rule_node.children): for i, child in enumerate(rule_node.children):
if isinstance(child, RuleNode): if isinstance(child, RuleNode):
children.append(self._ToTree(child))
children.append(self._to_tree(child))
else: else:
assert isinstance(child.s, Token) assert isinstance(child.s, Token)
children.append(child.s) children.append(child.s)
return Tree(orig_rule.origin, children, rule=orig_rule) return Tree(orig_rule.origin, children, rule=orig_rule)




def PrintParse(node, indent=0):
def print_parse(node, indent=0):
if isinstance(node, RuleNode): if isinstance(node, RuleNode):
print(' ' * (indent * 2) + str(node.rule.lhs)) print(' ' * (indent * 2) + str(node.rule.lhs))
for child in node.children: for child in node.children:
PrintParse(child, indent + 1)
print_parse(child, indent + 1)
else: else:
print(' ' * (indent * 2) + str(node.s)) print(' ' * (indent * 2) + str(node.s))


@@ -247,7 +246,7 @@ class UnitSkipRule(Rule):
__hash__ = Rule.__hash__ __hash__ = Rule.__hash__




def BuildUnitSkipRule(unit_rule, target_rule):
def build_unit_skiprule(unit_rule, target_rule):
skipped_rules = [] skipped_rules = []
if isinstance(unit_rule, UnitSkipRule): if isinstance(unit_rule, UnitSkipRule):
skipped_rules += unit_rule.skipped_rules skipped_rules += unit_rule.skipped_rules
@@ -258,7 +257,7 @@ def BuildUnitSkipRule(unit_rule, target_rule):
weight=unit_rule.weight + target_rule.weight, alias=unit_rule.alias) weight=unit_rule.weight + target_rule.weight, alias=unit_rule.alias)




def GetAnyNtUnitRule(g):
def get_any_nt_unit_rule(g):
"""Returns a non-terminal unit rule from 'g', or None if there is none.""" """Returns a non-terminal unit rule from 'g', or None if there is none."""
for rule in g.rules: for rule in g.rules:
if len(rule.rhs) == 1 and isinstance(rule.rhs[0], NT): if len(rule.rhs) == 1 and isinstance(rule.rhs[0], NT):
@@ -266,28 +265,25 @@ def GetAnyNtUnitRule(g):
return None return None




def RemoveUnitRule(g, rule):
def _remove_unit_rule(g, rule):
"""Removes 'rule' from 'g' without changing the langugage produced by 'g'.""" """Removes 'rule' from 'g' without changing the langugage produced by 'g'."""
new_rules = [x for x in g.rules if x != rule] new_rules = [x for x in g.rules if x != rule]
refs = [x for x in g.rules if x.lhs == rule.rhs[0]] refs = [x for x in g.rules if x.lhs == rule.rhs[0]]
for ref in refs:
new_rules.append(BuildUnitSkipRule(rule, ref))
new_rules += [build_unit_skiprule(rule, ref) for ref in refs]
return Grammar(new_rules) return Grammar(new_rules)




def Split(rule):
def _split(rule):
"""Splits a rule whose len(rhs) > 2 into shorter rules.""" """Splits a rule whose len(rhs) > 2 into shorter rules."""
rule_str = str(rule.lhs) + '__' + '_'.join(str(x) for x in rule.rhs) rule_str = str(rule.lhs) + '__' + '_'.join(str(x) for x in rule.rhs)
rule_name = '__SP_%s' % (rule_str) + '_%d' rule_name = '__SP_%s' % (rule_str) + '_%d'
new_rules = [Rule(rule.lhs, [rule.rhs[0], NT(rule_name % 1)], weight=rule.weight, alias=rule.alias)]
yield Rule(rule.lhs, [rule.rhs[0], NT(rule_name % 1)], weight=rule.weight, alias=rule.alias)
for i in xrange(1, len(rule.rhs) - 2): for i in xrange(1, len(rule.rhs) - 2):
new_rules.append( Rule(NT(rule_name % i),
[rule.rhs[i], NT(rule_name % (i + 1))], weight=0, alias='Split'))
new_rules.append(Rule(NT(rule_name % (len(rule.rhs) - 2)), rule.rhs[-2:], weight=0, alias='Split'))
return new_rules
yield Rule(NT(rule_name % i), [rule.rhs[i], NT(rule_name % (i + 1))], weight=0, alias='Split')
yield Rule(NT(rule_name % (len(rule.rhs) - 2)), rule.rhs[-2:], weight=0, alias='Split')




def Term(g):
def _term(g):
"""Applies the TERM rule on 'g' (see top comment).""" """Applies the TERM rule on 'g' (see top comment)."""
all_t = {x for rule in g.rules for x in rule.rhs if isinstance(x, T)} all_t = {x for rule in g.rules for x in rule.rhs if isinstance(x, T)}
t_rules = {t: Rule(NT('__T_%s' % str(t)), [t], weight=0, alias='Term') for t in all_t} t_rules = {t: Rule(NT('__T_%s' % str(t)), [t], weight=0, alias='Term') for t in all_t}
@@ -302,46 +298,46 @@ def Term(g):
return Grammar(new_rules) return Grammar(new_rules)




def Bin(g):
def _bin(g):
"""Applies the BIN rule to 'g' (see top comment).""" """Applies the BIN rule to 'g' (see top comment)."""
new_rules = [] new_rules = []
for rule in g.rules: for rule in g.rules:
if len(rule.rhs) > 2: if len(rule.rhs) > 2:
new_rules.extend(Split(rule))
new_rules += _split(rule)
else: else:
new_rules.append(rule) new_rules.append(rule)
return Grammar(new_rules) return Grammar(new_rules)




def Unit(g):
def _unit(g):
"""Applies the UNIT rule to 'g' (see top comment).""" """Applies the UNIT rule to 'g' (see top comment)."""
nt_unit_rule = GetAnyNtUnitRule(g)
nt_unit_rule = get_any_nt_unit_rule(g)
while nt_unit_rule: while nt_unit_rule:
g = RemoveUnitRule(g, nt_unit_rule)
nt_unit_rule = GetAnyNtUnitRule(g)
g = _remove_unit_rule(g, nt_unit_rule)
nt_unit_rule = get_any_nt_unit_rule(g)
return g return g




def ToCnf(g):
def to_cnf(g):
"""Creates a CNF grammar from a general context-free grammar 'g'.""" """Creates a CNF grammar from a general context-free grammar 'g'."""
g = Unit(Bin(Term(g)))
g = _unit(_bin(_term(g)))
return CnfWrapper(g) return CnfWrapper(g)




def UnrollUnitSkipRule(lhs, orig_rhs, skipped_rules, children, weight, alias):
def unroll_unit_skiprule(lhs, orig_rhs, skipped_rules, children, weight, alias):
if not skipped_rules: if not skipped_rules:
return RuleNode(Rule(lhs, orig_rhs, weight=weight, alias=alias), children, weight=weight) return RuleNode(Rule(lhs, orig_rhs, weight=weight, alias=alias), children, weight=weight)
else: else:
weight = weight - skipped_rules[0].weight weight = weight - skipped_rules[0].weight
return RuleNode( return RuleNode(
Rule(lhs, [skipped_rules[0].lhs], weight=weight, alias=alias), [ Rule(lhs, [skipped_rules[0].lhs], weight=weight, alias=alias), [
UnrollUnitSkipRule(skipped_rules[0].lhs, orig_rhs,
unroll_unit_skiprule(skipped_rules[0].lhs, orig_rhs,
skipped_rules[1:], children, skipped_rules[1:], children,
skipped_rules[0].weight, skipped_rules[0].alias) skipped_rules[0].weight, skipped_rules[0].alias)
], weight=weight) ], weight=weight)




def RevertCnf(node):
def revert_cnf(node):
"""Reverts a parse tree (RuleNode) to its original non-CNF form (Node).""" """Reverts a parse tree (RuleNode) to its original non-CNF form (Node)."""
if isinstance(node, T): if isinstance(node, T):
return node return node
@@ -350,16 +346,15 @@ def RevertCnf(node):
return node.children[0] return node.children[0]
else: else:
children = [] children = []
reverted_children = [RevertCnf(x) for x in node.children]
for child in reverted_children:
for child in map(revert_cnf, node.children):
# Reverts BIN rule. # Reverts BIN rule.
if isinstance(child, RuleNode) and child.rule.lhs.s.startswith('__SP_'): if isinstance(child, RuleNode) and child.rule.lhs.s.startswith('__SP_'):
children.extend(child.children)
children += child.children
else: else:
children.append(child) children.append(child)
# Reverts UNIT rule. # Reverts UNIT rule.
if isinstance(node.rule, UnitSkipRule): if isinstance(node.rule, UnitSkipRule):
return UnrollUnitSkipRule(node.rule.lhs, node.rule.rhs,
return unroll_unit_skiprule(node.rule.lhs, node.rule.rhs,
node.rule.skipped_rules, children, node.rule.skipped_rules, children,
node.rule.weight, node.rule.alias) node.rule.weight, node.rule.alias)
else: else:


Carregando…
Cancelar
Guardar