| @@ -78,11 +78,10 @@ class Grammar(object): | |||||
| """Context-free grammar.""" | """Context-free grammar.""" | ||||
| def __init__(self, rules): | def __init__(self, rules): | ||||
| super(Grammar, self).__init__() | |||||
| self.rules = rules | |||||
| self.rules = frozenset(rules) | |||||
| def __eq__(self, other): | def __eq__(self, other): | ||||
| return set(self.rules) == set(other.rules) | |||||
| return self.rules == other.rules | |||||
| def __str__(self): | def __str__(self): | ||||
| return '\n' + '\n'.join(sorted(repr(x) for x in self.rules)) + '\n' | return '\n' + '\n'.join(sorted(repr(x) for x in self.rules)) + '\n' | ||||
| @@ -111,11 +110,11 @@ class Parser(object): | |||||
| def __init__(self, rules, start): | def __init__(self, rules, start): | ||||
| super(Parser, self).__init__() | super(Parser, self).__init__() | ||||
| self.orig_rules = {rule.alias: rule for rule in rules} | self.orig_rules = {rule.alias: rule for rule in rules} | ||||
| rules = [self._ToRule(rule) for rule in rules] | |||||
| self.grammar = ToCnf(Grammar(rules)) | |||||
| rules = [self._to_rule(rule) for rule in rules] | |||||
| self.grammar = to_cnf(Grammar(rules)) | |||||
| self.start = NT(start) | self.start = NT(start) | ||||
| def _ToRule(self, lark_rule): | |||||
| def _to_rule(self, lark_rule): | |||||
| """Converts a lark rule, (lhs, rhs, callback, options), to a Rule.""" | """Converts a lark rule, (lhs, rhs, callback, options), to a Rule.""" | ||||
| return Rule( | return Rule( | ||||
| NT(lark_rule.origin), [ | NT(lark_rule.origin), [ | ||||
| @@ -129,26 +128,26 @@ class Parser(object): | |||||
| if all(r.lhs != self.start for r in table[(0, len(tokenized) - 1)]): | if all(r.lhs != self.start for r in table[(0, len(tokenized) - 1)]): | ||||
| raise ParseError('Parsing failed.') | raise ParseError('Parsing failed.') | ||||
| parse = trees[(0, len(tokenized) - 1)][NT(self.start)] | parse = trees[(0, len(tokenized) - 1)][NT(self.start)] | ||||
| return self._ToTree(RevertCnf(parse)) | |||||
| return self._to_tree(revert_cnf(parse)) | |||||
| def _ToTree(self, rule_node): | |||||
| def _to_tree(self, rule_node): | |||||
| """Converts a RuleNode parse tree to a lark Tree.""" | """Converts a RuleNode parse tree to a lark Tree.""" | ||||
| orig_rule = self.orig_rules[rule_node.rule.alias] | orig_rule = self.orig_rules[rule_node.rule.alias] | ||||
| children = [] | children = [] | ||||
| for i, child in enumerate(rule_node.children): | for i, child in enumerate(rule_node.children): | ||||
| if isinstance(child, RuleNode): | if isinstance(child, RuleNode): | ||||
| children.append(self._ToTree(child)) | |||||
| children.append(self._to_tree(child)) | |||||
| else: | else: | ||||
| assert isinstance(child.s, Token) | assert isinstance(child.s, Token) | ||||
| children.append(child.s) | children.append(child.s) | ||||
| return Tree(orig_rule.origin, children, rule=orig_rule) | return Tree(orig_rule.origin, children, rule=orig_rule) | ||||
| def PrintParse(node, indent=0): | |||||
| def print_parse(node, indent=0): | |||||
| if isinstance(node, RuleNode): | if isinstance(node, RuleNode): | ||||
| print(' ' * (indent * 2) + str(node.rule.lhs)) | print(' ' * (indent * 2) + str(node.rule.lhs)) | ||||
| for child in node.children: | for child in node.children: | ||||
| PrintParse(child, indent + 1) | |||||
| print_parse(child, indent + 1) | |||||
| else: | else: | ||||
| print(' ' * (indent * 2) + str(node.s)) | print(' ' * (indent * 2) + str(node.s)) | ||||
| @@ -247,7 +246,7 @@ class UnitSkipRule(Rule): | |||||
| __hash__ = Rule.__hash__ | __hash__ = Rule.__hash__ | ||||
| def BuildUnitSkipRule(unit_rule, target_rule): | |||||
| def build_unit_skiprule(unit_rule, target_rule): | |||||
| skipped_rules = [] | skipped_rules = [] | ||||
| if isinstance(unit_rule, UnitSkipRule): | if isinstance(unit_rule, UnitSkipRule): | ||||
| skipped_rules += unit_rule.skipped_rules | skipped_rules += unit_rule.skipped_rules | ||||
| @@ -258,7 +257,7 @@ def BuildUnitSkipRule(unit_rule, target_rule): | |||||
| weight=unit_rule.weight + target_rule.weight, alias=unit_rule.alias) | weight=unit_rule.weight + target_rule.weight, alias=unit_rule.alias) | ||||
| def GetAnyNtUnitRule(g): | |||||
| def get_any_nt_unit_rule(g): | |||||
| """Returns a non-terminal unit rule from 'g', or None if there is none.""" | """Returns a non-terminal unit rule from 'g', or None if there is none.""" | ||||
| for rule in g.rules: | for rule in g.rules: | ||||
| if len(rule.rhs) == 1 and isinstance(rule.rhs[0], NT): | if len(rule.rhs) == 1 and isinstance(rule.rhs[0], NT): | ||||
| @@ -266,28 +265,25 @@ def GetAnyNtUnitRule(g): | |||||
| return None | return None | ||||
| def RemoveUnitRule(g, rule): | |||||
| def _remove_unit_rule(g, rule): | |||||
| """Removes 'rule' from 'g' without changing the langugage produced by 'g'.""" | """Removes 'rule' from 'g' without changing the langugage produced by 'g'.""" | ||||
| new_rules = [x for x in g.rules if x != rule] | new_rules = [x for x in g.rules if x != rule] | ||||
| refs = [x for x in g.rules if x.lhs == rule.rhs[0]] | refs = [x for x in g.rules if x.lhs == rule.rhs[0]] | ||||
| for ref in refs: | |||||
| new_rules.append(BuildUnitSkipRule(rule, ref)) | |||||
| new_rules += [build_unit_skiprule(rule, ref) for ref in refs] | |||||
| return Grammar(new_rules) | return Grammar(new_rules) | ||||
| def Split(rule): | |||||
| def _split(rule): | |||||
| """Splits a rule whose len(rhs) > 2 into shorter rules.""" | """Splits a rule whose len(rhs) > 2 into shorter rules.""" | ||||
| rule_str = str(rule.lhs) + '__' + '_'.join(str(x) for x in rule.rhs) | rule_str = str(rule.lhs) + '__' + '_'.join(str(x) for x in rule.rhs) | ||||
| rule_name = '__SP_%s' % (rule_str) + '_%d' | rule_name = '__SP_%s' % (rule_str) + '_%d' | ||||
| new_rules = [Rule(rule.lhs, [rule.rhs[0], NT(rule_name % 1)], weight=rule.weight, alias=rule.alias)] | |||||
| yield Rule(rule.lhs, [rule.rhs[0], NT(rule_name % 1)], weight=rule.weight, alias=rule.alias) | |||||
| for i in xrange(1, len(rule.rhs) - 2): | for i in xrange(1, len(rule.rhs) - 2): | ||||
| new_rules.append( Rule(NT(rule_name % i), | |||||
| [rule.rhs[i], NT(rule_name % (i + 1))], weight=0, alias='Split')) | |||||
| new_rules.append(Rule(NT(rule_name % (len(rule.rhs) - 2)), rule.rhs[-2:], weight=0, alias='Split')) | |||||
| return new_rules | |||||
| yield Rule(NT(rule_name % i), [rule.rhs[i], NT(rule_name % (i + 1))], weight=0, alias='Split') | |||||
| yield Rule(NT(rule_name % (len(rule.rhs) - 2)), rule.rhs[-2:], weight=0, alias='Split') | |||||
| def Term(g): | |||||
| def _term(g): | |||||
| """Applies the TERM rule on 'g' (see top comment).""" | """Applies the TERM rule on 'g' (see top comment).""" | ||||
| all_t = {x for rule in g.rules for x in rule.rhs if isinstance(x, T)} | all_t = {x for rule in g.rules for x in rule.rhs if isinstance(x, T)} | ||||
| t_rules = {t: Rule(NT('__T_%s' % str(t)), [t], weight=0, alias='Term') for t in all_t} | t_rules = {t: Rule(NT('__T_%s' % str(t)), [t], weight=0, alias='Term') for t in all_t} | ||||
| @@ -302,46 +298,46 @@ def Term(g): | |||||
| return Grammar(new_rules) | return Grammar(new_rules) | ||||
| def Bin(g): | |||||
| def _bin(g): | |||||
| """Applies the BIN rule to 'g' (see top comment).""" | """Applies the BIN rule to 'g' (see top comment).""" | ||||
| new_rules = [] | new_rules = [] | ||||
| for rule in g.rules: | for rule in g.rules: | ||||
| if len(rule.rhs) > 2: | if len(rule.rhs) > 2: | ||||
| new_rules.extend(Split(rule)) | |||||
| new_rules += _split(rule) | |||||
| else: | else: | ||||
| new_rules.append(rule) | new_rules.append(rule) | ||||
| return Grammar(new_rules) | return Grammar(new_rules) | ||||
| def Unit(g): | |||||
| def _unit(g): | |||||
| """Applies the UNIT rule to 'g' (see top comment).""" | """Applies the UNIT rule to 'g' (see top comment).""" | ||||
| nt_unit_rule = GetAnyNtUnitRule(g) | |||||
| nt_unit_rule = get_any_nt_unit_rule(g) | |||||
| while nt_unit_rule: | while nt_unit_rule: | ||||
| g = RemoveUnitRule(g, nt_unit_rule) | |||||
| nt_unit_rule = GetAnyNtUnitRule(g) | |||||
| g = _remove_unit_rule(g, nt_unit_rule) | |||||
| nt_unit_rule = get_any_nt_unit_rule(g) | |||||
| return g | return g | ||||
| def ToCnf(g): | |||||
| def to_cnf(g): | |||||
| """Creates a CNF grammar from a general context-free grammar 'g'.""" | """Creates a CNF grammar from a general context-free grammar 'g'.""" | ||||
| g = Unit(Bin(Term(g))) | |||||
| g = _unit(_bin(_term(g))) | |||||
| return CnfWrapper(g) | return CnfWrapper(g) | ||||
| def UnrollUnitSkipRule(lhs, orig_rhs, skipped_rules, children, weight, alias): | |||||
| def unroll_unit_skiprule(lhs, orig_rhs, skipped_rules, children, weight, alias): | |||||
| if not skipped_rules: | if not skipped_rules: | ||||
| return RuleNode(Rule(lhs, orig_rhs, weight=weight, alias=alias), children, weight=weight) | return RuleNode(Rule(lhs, orig_rhs, weight=weight, alias=alias), children, weight=weight) | ||||
| else: | else: | ||||
| weight = weight - skipped_rules[0].weight | weight = weight - skipped_rules[0].weight | ||||
| return RuleNode( | return RuleNode( | ||||
| Rule(lhs, [skipped_rules[0].lhs], weight=weight, alias=alias), [ | Rule(lhs, [skipped_rules[0].lhs], weight=weight, alias=alias), [ | ||||
| UnrollUnitSkipRule(skipped_rules[0].lhs, orig_rhs, | |||||
| unroll_unit_skiprule(skipped_rules[0].lhs, orig_rhs, | |||||
| skipped_rules[1:], children, | skipped_rules[1:], children, | ||||
| skipped_rules[0].weight, skipped_rules[0].alias) | skipped_rules[0].weight, skipped_rules[0].alias) | ||||
| ], weight=weight) | ], weight=weight) | ||||
| def RevertCnf(node): | |||||
| def revert_cnf(node): | |||||
| """Reverts a parse tree (RuleNode) to its original non-CNF form (Node).""" | """Reverts a parse tree (RuleNode) to its original non-CNF form (Node).""" | ||||
| if isinstance(node, T): | if isinstance(node, T): | ||||
| return node | return node | ||||
| @@ -350,16 +346,15 @@ def RevertCnf(node): | |||||
| return node.children[0] | return node.children[0] | ||||
| else: | else: | ||||
| children = [] | children = [] | ||||
| reverted_children = [RevertCnf(x) for x in node.children] | |||||
| for child in reverted_children: | |||||
| for child in map(revert_cnf, node.children): | |||||
| # Reverts BIN rule. | # Reverts BIN rule. | ||||
| if isinstance(child, RuleNode) and child.rule.lhs.s.startswith('__SP_'): | if isinstance(child, RuleNode) and child.rule.lhs.s.startswith('__SP_'): | ||||
| children.extend(child.children) | |||||
| children += child.children | |||||
| else: | else: | ||||
| children.append(child) | children.append(child) | ||||
| # Reverts UNIT rule. | # Reverts UNIT rule. | ||||
| if isinstance(node.rule, UnitSkipRule): | if isinstance(node.rule, UnitSkipRule): | ||||
| return UnrollUnitSkipRule(node.rule.lhs, node.rule.rhs, | |||||
| return unroll_unit_skiprule(node.rule.lhs, node.rule.rhs, | |||||
| node.rule.skipped_rules, children, | node.rule.skipped_rules, children, | ||||
| node.rule.weight, node.rule.alias) | node.rule.weight, node.rule.alias) | ||||
| else: | else: | ||||