Adds a CYK parser to lark.tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.3
@@ -59,7 +59,7 @@ class LarkOptions(object): | |||||
self.propagate_positions = o.pop('propagate_positions', False) | self.propagate_positions = o.pop('propagate_positions', False) | ||||
self.earley__predict_all = o.pop('earley__predict_all', False) | self.earley__predict_all = o.pop('earley__predict_all', False) | ||||
assert self.parser in ('earley', 'lalr', None) | |||||
assert self.parser in ('earley', 'lalr', 'cyk', None) | |||||
if self.parser == 'earley' and self.transformer: | if self.parser == 'earley' and self.transformer: | ||||
raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.' | raise ValueError('Cannot specify an embedded transformer when using the Earley algorithm.' | ||||
@@ -131,6 +131,8 @@ class Lark: | |||||
self.options.lexer = 'standard' | self.options.lexer = 'standard' | ||||
elif self.options.parser == 'earley': | elif self.options.parser == 'earley': | ||||
self.options.lexer = 'dynamic' | self.options.lexer = 'dynamic' | ||||
elif self.options.parser == 'cyk': | |||||
self.options.lexer = 'standard' | |||||
else: | else: | ||||
assert False, self.options.parser | assert False, self.options.parser | ||||
lexer = self.options.lexer | lexer = self.options.lexer | ||||
@@ -140,7 +142,9 @@ class Lark: | |||||
if self.options.parser == 'earley': | if self.options.parser == 'earley': | ||||
self.options.ambiguity = 'resolve' | self.options.ambiguity = 'resolve' | ||||
else: | else: | ||||
assert self.options.parser == 'earley', "Only Earley supports disambiguation right now" | |||||
disambig_parsers = ['earley', 'cyk'] | |||||
assert self.options.parser in disambig_parsers, ( | |||||
'Only %s supports disambiguation right now') % ', '.join(disambig_parsers) | |||||
assert self.options.ambiguity in ('resolve', 'explicit', 'auto', 'resolve__antiscore_sum') | assert self.options.ambiguity in ('resolve', 'explicit', 'auto', 'resolve__antiscore_sum') | ||||
# Parse the grammar file and compose the grammars (TODO) | # Parse the grammar file and compose the grammars (TODO) | ||||
@@ -1,10 +1,13 @@ | |||||
import re | import re | ||||
from .utils import get_regexp_width | from .utils import get_regexp_width | ||||
from .parsers.grammar_analysis import GrammarAnalyzer | |||||
from .lexer import Lexer, ContextualLexer, Token | from .lexer import Lexer, ContextualLexer, Token | ||||
from .common import is_terminal, GrammarError, ParserConf | |||||
from .parsers import lalr_parser, earley, xearley, resolve_ambig | |||||
from .common import GrammarError | |||||
from .common import is_terminal, GrammarError | |||||
from .parsers import lalr_parser, earley, xearley, resolve_ambig, cyk | |||||
from .tree import Tree | |||||
class WithLexer: | class WithLexer: | ||||
def init_traditional_lexer(self, lexer_conf): | def init_traditional_lexer(self, lexer_conf): | ||||
@@ -134,6 +137,55 @@ class XEarley: | |||||
def parse(self, text): | def parse(self, text): | ||||
return self.parser.parse(text) | return self.parser.parse(text) | ||||
class Earley(WithLexer): | |||||
def __init__(self, lexer_conf, parser_conf, options=None): | |||||
self.init_traditional_lexer(lexer_conf) | |||||
self.parser = earley.Parser(parser_conf, self.match, | |||||
resolve_ambiguity=get_ambiguity_resolver(options)) | |||||
def match(self, term, token): | |||||
return term == token.type | |||||
def parse(self, text): | |||||
tokens = self.lex(text) | |||||
return self.parser.parse(tokens) | |||||
class CYK(WithLexer): | |||||
def __init__(self, lexer_conf, parser_conf, options=None): | |||||
self.init_traditional_lexer(lexer_conf) | |||||
self._analysis = GrammarAnalyzer(parser_conf) | |||||
self._parser = cyk.Parser(self._analysis.rules, parser_conf.start) | |||||
self._postprocess = {} | |||||
for rule in self._analysis.rules: | |||||
a = rule.alias | |||||
self._postprocess[a] = a if callable(a) else (a and getattr(parser_conf.callback, a)) | |||||
def parse(self, text): | |||||
tokens = list(self.lex(text)) | |||||
parse = self._parser.parse(tokens) | |||||
parse = self._transform(parse) | |||||
return parse | |||||
def _transform(self, tree): | |||||
subtrees = list(tree.iter_subtrees()) | |||||
for subtree in subtrees: | |||||
subtree.children = [self._apply_callback(c) if isinstance(c, Tree) else c for c in subtree.children] | |||||
return self._apply_callback(tree) | |||||
def _apply_callback(self, tree): | |||||
children = tree.children | |||||
callback = self._postprocess[tree.rule.alias] | |||||
assert callback, tree.rule.alias | |||||
r = callback(children) | |||||
return r | |||||
def get_frontend(parser, lexer): | def get_frontend(parser, lexer): | ||||
if parser=='lalr': | if parser=='lalr': | ||||
if lexer is None: | if lexer is None: | ||||
@@ -155,6 +207,11 @@ def get_frontend(parser, lexer): | |||||
raise ValueError('The Earley parser does not support the contextual parser') | raise ValueError('The Earley parser does not support the contextual parser') | ||||
else: | else: | ||||
raise ValueError('Unknown lexer: %s' % lexer) | raise ValueError('Unknown lexer: %s' % lexer) | ||||
elif parser == 'cyk': | |||||
if lexer == 'standard': | |||||
return CYK | |||||
else: | |||||
raise ValueError('CYK parser requires using standard parser.') | |||||
else: | else: | ||||
raise ValueError('Unknown parser: %s' % parser) | raise ValueError('Unknown parser: %s' % parser) | ||||
@@ -0,0 +1,361 @@ | |||||
"""This module implements a CYK parser.""" | |||||
from collections import defaultdict | |||||
import itertools | |||||
from ..common import ParseError, is_terminal | |||||
from ..lexer import Token | |||||
from ..tree import Tree | |||||
try: | |||||
xrange | |||||
except NameError: | |||||
xrange = range | |||||
class Symbol(object): | |||||
"""Any grammar symbol.""" | |||||
def __init__(self, s): | |||||
self.s = s | |||||
def __repr__(self): | |||||
return '%s(%s)' % (type(self).__name__, str(self)) | |||||
def __str__(self): | |||||
return str(self.s) | |||||
def __eq__(self, other): | |||||
return self.s == str(other) | |||||
def __ne__(self, other): | |||||
return not self.__eq__(other) | |||||
def __hash__(self): | |||||
return hash((type(self), str(self.s))) | |||||
class T(Symbol): | |||||
"""Terminal.""" | |||||
def match(self, s): | |||||
return self.s == s.type | |||||
class NT(Symbol): | |||||
"""Non-terminal.""" | |||||
pass | |||||
class Rule(object): | |||||
"""Context-free grammar rule.""" | |||||
def __init__(self, lhs, rhs, weight, alias): | |||||
super(Rule, self).__init__() | |||||
assert isinstance(lhs, NT), lhs | |||||
assert all(isinstance(x, NT) or isinstance(x, T) for x in rhs), rhs | |||||
self.lhs = lhs | |||||
self.rhs = rhs | |||||
self.weight = weight | |||||
self.alias = alias | |||||
def __str__(self): | |||||
return '%s -> %s' % (str(self.lhs), ' '.join(str(x) for x in self.rhs)) | |||||
def __repr__(self): | |||||
return str(self) | |||||
def __hash__(self): | |||||
return hash((self.lhs, tuple(self.rhs))) | |||||
def __eq__(self, other): | |||||
return self.lhs == other.lhs and self.rhs == other.rhs | |||||
def __ne__(self, other): | |||||
return not (self == other) | |||||
class Grammar(object): | |||||
"""Context-free grammar.""" | |||||
def __init__(self, rules): | |||||
self.rules = frozenset(rules) | |||||
def __eq__(self, other): | |||||
return self.rules == other.rules | |||||
def __str__(self): | |||||
return '\n' + '\n'.join(sorted(repr(x) for x in self.rules)) + '\n' | |||||
def __repr__(self): | |||||
return str(self) | |||||
# Parse tree data structures | |||||
class RuleNode(object): | |||||
"""A node in the parse tree, which also contains the full rhs rule.""" | |||||
def __init__(self, rule, children, weight=0): | |||||
self.rule = rule | |||||
self.children = children | |||||
self.weight = weight | |||||
def __repr__(self): | |||||
return 'RuleNode(%s, [%s])' % (repr(self.rule.lhs), ', '.join(str(x) for x in self.children)) | |||||
class Parser(object): | |||||
"""Parser wrapper.""" | |||||
def __init__(self, rules, start): | |||||
super(Parser, self).__init__() | |||||
self.orig_rules = {rule.alias: rule for rule in rules} | |||||
rules = [self._to_rule(rule) for rule in rules] | |||||
self.grammar = to_cnf(Grammar(rules)) | |||||
self.start = NT(start) | |||||
def _to_rule(self, lark_rule): | |||||
"""Converts a lark rule, (lhs, rhs, callback, options), to a Rule.""" | |||||
return Rule( | |||||
NT(lark_rule.origin), [ | |||||
T(x) if is_terminal(x) else NT(x) for x in lark_rule.expansion | |||||
], weight=lark_rule.options.priority if lark_rule.options and lark_rule.options.priority else 0, alias=lark_rule.alias) | |||||
def parse(self, tokenized): # pylint: disable=invalid-name | |||||
"""Parses input, which is a list of tokens.""" | |||||
table, trees = _parse(tokenized, self.grammar) | |||||
# Check if the parse succeeded. | |||||
if all(r.lhs != self.start for r in table[(0, len(tokenized) - 1)]): | |||||
raise ParseError('Parsing failed.') | |||||
parse = trees[(0, len(tokenized) - 1)][NT(self.start)] | |||||
return self._to_tree(revert_cnf(parse)) | |||||
def _to_tree(self, rule_node): | |||||
"""Converts a RuleNode parse tree to a lark Tree.""" | |||||
orig_rule = self.orig_rules[rule_node.rule.alias] | |||||
children = [] | |||||
for i, child in enumerate(rule_node.children): | |||||
if isinstance(child, RuleNode): | |||||
children.append(self._to_tree(child)) | |||||
else: | |||||
assert isinstance(child.s, Token) | |||||
children.append(child.s) | |||||
return Tree(orig_rule.origin, children, rule=orig_rule) | |||||
def print_parse(node, indent=0): | |||||
if isinstance(node, RuleNode): | |||||
print(' ' * (indent * 2) + str(node.rule.lhs)) | |||||
for child in node.children: | |||||
print_parse(child, indent + 1) | |||||
else: | |||||
print(' ' * (indent * 2) + str(node.s)) | |||||
def _parse(s, g): | |||||
"""Parses sentence 's' using CNF grammar 'g'.""" | |||||
# The CYK table. Indexed with a 2-tuple: (start pos, end pos) | |||||
table = defaultdict(set) | |||||
# Top-level structure is similar to the CYK table. Each cell is a dict from | |||||
# rule name to the best (lightest) tree for that rule. | |||||
trees = defaultdict(dict) | |||||
# Populate base case with existing terminal production rules | |||||
for i, w in enumerate(s): | |||||
for terminal, rules in g.terminal_rules.items(): | |||||
if terminal.match(w): | |||||
for rule in rules: | |||||
table[(i, i)].add(rule) | |||||
if (rule.lhs not in trees[(i, i)] or | |||||
rule.weight < trees[(i, i)][rule.lhs].weight): | |||||
trees[(i, i)][rule.lhs] = RuleNode(rule, [T(w)], weight=rule.weight) | |||||
# Iterate over lengths of sub-sentences | |||||
for l in xrange(2, len(s) + 1): | |||||
# Iterate over sub-sentences with the given length | |||||
for i in xrange(len(s) - l + 1): | |||||
# Choose partition of the sub-sentence in [1, l) | |||||
for p in xrange(i + 1, i + l): | |||||
span1 = (i, p - 1) | |||||
span2 = (p, i + l - 1) | |||||
for r1, r2 in itertools.product(table[span1], table[span2]): | |||||
for rule in g.nonterminal_rules.get((r1.lhs, r2.lhs), []): | |||||
table[(i, i + l - 1)].add(rule) | |||||
r1_tree = trees[span1][r1.lhs] | |||||
r2_tree = trees[span2][r2.lhs] | |||||
rule_total_weight = rule.weight + r1_tree.weight + r2_tree.weight | |||||
if (rule.lhs not in trees[(i, i + l - 1)] | |||||
or rule_total_weight < trees[(i, i + l - 1)][rule.lhs].weight): | |||||
trees[(i, i + l - 1)][rule.lhs] = RuleNode(rule, [r1_tree, r2_tree], weight=rule_total_weight) | |||||
return table, trees | |||||
# This section implements context-free grammar converter to Chomsky normal form. | |||||
# It also implements a conversion of parse trees from its CNF to the original | |||||
# grammar. | |||||
# Overview: | |||||
# Applies the following operations in this order: | |||||
# * TERM: Eliminates non-solitary terminals from all rules | |||||
# * BIN: Eliminates rules with more than 2 symbols on their right-hand-side. | |||||
# * UNIT: Eliminates non-terminal unit rules | |||||
# | |||||
# The following grammar characteristics aren't featured: | |||||
# * Start symbol appears on RHS | |||||
# * Empty rules (epsilon rules) | |||||
class CnfWrapper(object): | |||||
"""CNF wrapper for grammar. | |||||
Validates that the input grammar is CNF and provides helper data structures. | |||||
""" | |||||
def __init__(self, grammar): | |||||
super(CnfWrapper, self).__init__() | |||||
self.grammar = grammar | |||||
self.rules = grammar.rules | |||||
self.terminal_rules = defaultdict(list) | |||||
self.nonterminal_rules = defaultdict(list) | |||||
for r in self.rules: | |||||
# Validate that the grammar is CNF and populate auxiliary data structures. | |||||
assert isinstance(r.lhs, NT), r | |||||
assert len(r.rhs) in [1, 2], r | |||||
if len(r.rhs) == 1 and isinstance(r.rhs[0], T): | |||||
self.terminal_rules[r.rhs[0]].append(r) | |||||
elif len(r.rhs) == 2 and all(isinstance(x, NT) for x in r.rhs): | |||||
self.nonterminal_rules[tuple(r.rhs)].append(r) | |||||
else: | |||||
assert False, r | |||||
def __eq__(self, other): | |||||
return self.grammar == other.grammar | |||||
def __repr__(self): | |||||
return repr(self.grammar) | |||||
class UnitSkipRule(Rule): | |||||
"""A rule that records NTs that were skipped during transformation.""" | |||||
def __init__(self, lhs, rhs, skipped_rules, weight, alias): | |||||
super(UnitSkipRule, self).__init__(lhs, rhs, weight, alias) | |||||
self.skipped_rules = skipped_rules | |||||
def __eq__(self, other): | |||||
return isinstance(other, type(self)) and self.skipped_rules == other.skipped_rules | |||||
__hash__ = Rule.__hash__ | |||||
def build_unit_skiprule(unit_rule, target_rule): | |||||
skipped_rules = [] | |||||
if isinstance(unit_rule, UnitSkipRule): | |||||
skipped_rules += unit_rule.skipped_rules | |||||
skipped_rules.append(target_rule) | |||||
if isinstance(target_rule, UnitSkipRule): | |||||
skipped_rules += target_rule.skipped_rules | |||||
return UnitSkipRule(unit_rule.lhs, target_rule.rhs, skipped_rules, | |||||
weight=unit_rule.weight + target_rule.weight, alias=unit_rule.alias) | |||||
def get_any_nt_unit_rule(g): | |||||
"""Returns a non-terminal unit rule from 'g', or None if there is none.""" | |||||
for rule in g.rules: | |||||
if len(rule.rhs) == 1 and isinstance(rule.rhs[0], NT): | |||||
return rule | |||||
return None | |||||
def _remove_unit_rule(g, rule): | |||||
"""Removes 'rule' from 'g' without changing the langugage produced by 'g'.""" | |||||
new_rules = [x for x in g.rules if x != rule] | |||||
refs = [x for x in g.rules if x.lhs == rule.rhs[0]] | |||||
new_rules += [build_unit_skiprule(rule, ref) for ref in refs] | |||||
return Grammar(new_rules) | |||||
def _split(rule): | |||||
"""Splits a rule whose len(rhs) > 2 into shorter rules.""" | |||||
rule_str = str(rule.lhs) + '__' + '_'.join(str(x) for x in rule.rhs) | |||||
rule_name = '__SP_%s' % (rule_str) + '_%d' | |||||
yield Rule(rule.lhs, [rule.rhs[0], NT(rule_name % 1)], weight=rule.weight, alias=rule.alias) | |||||
for i in xrange(1, len(rule.rhs) - 2): | |||||
yield Rule(NT(rule_name % i), [rule.rhs[i], NT(rule_name % (i + 1))], weight=0, alias='Split') | |||||
yield Rule(NT(rule_name % (len(rule.rhs) - 2)), rule.rhs[-2:], weight=0, alias='Split') | |||||
def _term(g): | |||||
"""Applies the TERM rule on 'g' (see top comment).""" | |||||
all_t = {x for rule in g.rules for x in rule.rhs if isinstance(x, T)} | |||||
t_rules = {t: Rule(NT('__T_%s' % str(t)), [t], weight=0, alias='Term') for t in all_t} | |||||
new_rules = [] | |||||
for rule in g.rules: | |||||
if len(rule.rhs) > 1 and any(isinstance(x, T) for x in rule.rhs): | |||||
new_rhs = [t_rules[x].lhs if isinstance(x, T) else x for x in rule.rhs] | |||||
new_rules.append(Rule(rule.lhs, new_rhs, weight=rule.weight, alias=rule.alias)) | |||||
new_rules.extend(v for k, v in t_rules.items() if k in rule.rhs) | |||||
else: | |||||
new_rules.append(rule) | |||||
return Grammar(new_rules) | |||||
def _bin(g): | |||||
"""Applies the BIN rule to 'g' (see top comment).""" | |||||
new_rules = [] | |||||
for rule in g.rules: | |||||
if len(rule.rhs) > 2: | |||||
new_rules += _split(rule) | |||||
else: | |||||
new_rules.append(rule) | |||||
return Grammar(new_rules) | |||||
def _unit(g): | |||||
"""Applies the UNIT rule to 'g' (see top comment).""" | |||||
nt_unit_rule = get_any_nt_unit_rule(g) | |||||
while nt_unit_rule: | |||||
g = _remove_unit_rule(g, nt_unit_rule) | |||||
nt_unit_rule = get_any_nt_unit_rule(g) | |||||
return g | |||||
def to_cnf(g): | |||||
"""Creates a CNF grammar from a general context-free grammar 'g'.""" | |||||
g = _unit(_bin(_term(g))) | |||||
return CnfWrapper(g) | |||||
def unroll_unit_skiprule(lhs, orig_rhs, skipped_rules, children, weight, alias): | |||||
if not skipped_rules: | |||||
return RuleNode(Rule(lhs, orig_rhs, weight=weight, alias=alias), children, weight=weight) | |||||
else: | |||||
weight = weight - skipped_rules[0].weight | |||||
return RuleNode( | |||||
Rule(lhs, [skipped_rules[0].lhs], weight=weight, alias=alias), [ | |||||
unroll_unit_skiprule(skipped_rules[0].lhs, orig_rhs, | |||||
skipped_rules[1:], children, | |||||
skipped_rules[0].weight, skipped_rules[0].alias) | |||||
], weight=weight) | |||||
def revert_cnf(node): | |||||
"""Reverts a parse tree (RuleNode) to its original non-CNF form (Node).""" | |||||
if isinstance(node, T): | |||||
return node | |||||
# Reverts TERM rule. | |||||
if node.rule.lhs.s.startswith('__T_'): | |||||
return node.children[0] | |||||
else: | |||||
children = [] | |||||
for child in map(revert_cnf, node.children): | |||||
# Reverts BIN rule. | |||||
if isinstance(child, RuleNode) and child.rule.lhs.s.startswith('__SP_'): | |||||
children += child.children | |||||
else: | |||||
children.append(child) | |||||
# Reverts UNIT rule. | |||||
if isinstance(node.rule, UnitSkipRule): | |||||
return unroll_unit_skiprule(node.rule.lhs, node.rule.rhs, | |||||
node.rule.skipped_rules, children, | |||||
node.rule.weight, node.rule.alias) | |||||
else: | |||||
return RuleNode(node.rule, children) |
@@ -22,8 +22,7 @@ class Derivation(Tree): | |||||
_hash = None | _hash = None | ||||
def __init__(self, rule, items=None): | def __init__(self, rule, items=None): | ||||
Tree.__init__(self, 'drv', items or []) | |||||
self.rule = rule | |||||
Tree.__init__(self, 'drv', items or [], rule=rule) | |||||
def _pretty_label(self): # Nicer pretty for debugging the parser | def _pretty_label(self): # Nicer pretty for debugging the parser | ||||
return self.rule.origin if self.rule else self.data | return self.rule.origin if self.rule else self.data | ||||
@@ -9,9 +9,10 @@ from .utils import inline_args | |||||
###{standalone | ###{standalone | ||||
class Tree(object): | class Tree(object): | ||||
def __init__(self, data, children): | |||||
def __init__(self, data, children, rule=None): | |||||
self.data = data | self.data = data | ||||
self.children = list(children) | self.children = list(children) | ||||
self.rule = rule | |||||
def __repr__(self): | def __repr__(self): | ||||
return 'Tree(%s, %s)' % (self.data, self.children) | return 'Tree(%s, %s)' % (self.data, self.children) | ||||
@@ -16,6 +16,7 @@ except ImportError: | |||||
from .test_parser import ( | from .test_parser import ( | ||||
TestLalrStandard, | TestLalrStandard, | ||||
TestEarleyStandard, | TestEarleyStandard, | ||||
TestCykStandard, | |||||
TestLalrContextual, | TestLalrContextual, | ||||
TestEarleyScanless, | TestEarleyScanless, | ||||
TestEarleyDynamic, | TestEarleyDynamic, | ||||
@@ -382,6 +382,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
g.parse(u'\xa3\u0101\u00a3\u0203\n') | g.parse(u'\xa3\u0101\u00a3\u0203\n') | ||||
@unittest.skipIf(PARSER == 'cyk', "Takes forever") | |||||
def test_stack_for_ebnf(self): | def test_stack_for_ebnf(self): | ||||
"""Verify that stack depth isn't an issue for EBNF grammars""" | """Verify that stack depth isn't an issue for EBNF grammars""" | ||||
g = _Lark(r"""start: a+ | g = _Lark(r"""start: a+ | ||||
@@ -455,6 +456,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
@unittest.skipIf(PARSER == 'cyk', "No empty rules") | |||||
def test_empty_expand1_list(self): | def test_empty_expand1_list(self): | ||||
g = _Lark(r"""start: list | g = _Lark(r"""start: list | ||||
?list: item* | ?list: item* | ||||
@@ -473,6 +475,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
[list] = r.children | [list] = r.children | ||||
self.assertSequenceEqual([item.data for item in list.children], ()) | self.assertSequenceEqual([item.data for item in list.children], ()) | ||||
@unittest.skipIf(PARSER == 'cyk', "No empty rules") | |||||
def test_empty_expand1_list_2(self): | def test_empty_expand1_list_2(self): | ||||
g = _Lark(r"""start: list | g = _Lark(r"""start: list | ||||
?list: item* "!"? | ?list: item* "!"? | ||||
@@ -492,6 +495,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
self.assertSequenceEqual([item.data for item in list.children], ()) | self.assertSequenceEqual([item.data for item in list.children], ()) | ||||
@unittest.skipIf(PARSER == 'cyk', "No empty rules") | |||||
def test_empty_flatten_list(self): | def test_empty_flatten_list(self): | ||||
g = _Lark(r"""start: list | g = _Lark(r"""start: list | ||||
list: | item "," list | list: | item "," list | ||||
@@ -645,6 +649,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
self.assertEqual(len(x.children), 1, '/a/ should not be considered anonymous') | self.assertEqual(len(x.children), 1, '/a/ should not be considered anonymous') | ||||
self.assertEqual(x.children[0].type, "A") | self.assertEqual(x.children[0].type, "A") | ||||
@unittest.skipIf(PARSER == 'cyk', "No empty rules") | |||||
def test_maybe(self): | def test_maybe(self): | ||||
g = _Lark("""start: ["a"] """) | g = _Lark("""start: ["a"] """) | ||||
x = g.parse('a') | x = g.parse('a') | ||||
@@ -702,6 +707,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
# B: A | # B: A | ||||
# """) | # """) | ||||
@unittest.skipIf(PARSER == 'cyk', "No empty rules") | |||||
def test_empty(self): | def test_empty(self): | ||||
# Fails an Earley implementation without special handling for empty rules, | # Fails an Earley implementation without special handling for empty rules, | ||||
# or re-processing of already completed rules. | # or re-processing of already completed rules. | ||||
@@ -732,6 +738,8 @@ def _make_parser_test(LEXER, PARSER): | |||||
def test_float_without_lexer(self): | def test_float_without_lexer(self): | ||||
expected_error = UnexpectedInput if LEXER == 'dynamic' else UnexpectedToken | expected_error = UnexpectedInput if LEXER == 'dynamic' else UnexpectedToken | ||||
if PARSER == 'cyk': | |||||
expected_error = ParseError | |||||
g = _Lark("""start: ["+"|"-"] float | g = _Lark("""start: ["+"|"-"] float | ||||
float: digit* "." digit+ exp? | float: digit* "." digit+ exp? | ||||
@@ -796,6 +804,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
self.assertEqual(tree.children, ['a', 'A']) | self.assertEqual(tree.children, ['a', 'A']) | ||||
@unittest.skipIf(PARSER == 'cyk', "No empty rules") | |||||
def test_twice_empty(self): | def test_twice_empty(self): | ||||
g = """!start: [["A"]] | g = """!start: [["A"]] | ||||
""" | """ | ||||
@@ -1001,6 +1010,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
@unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions") | @unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions") | ||||
@unittest.skipIf(PARSER == 'cyk', "No empty rules") | |||||
def test_ignore(self): | def test_ignore(self): | ||||
grammar = r""" | grammar = r""" | ||||
COMMENT: /(!|(\/\/))[^\n]*/ | COMMENT: /(!|(\/\/))[^\n]*/ | ||||
@@ -1026,7 +1036,6 @@ def _make_parser_test(LEXER, PARSER): | |||||
self.assertEqual(tree.children, []) | self.assertEqual(tree.children, []) | ||||
@unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions") | @unittest.skipIf(LEXER==None, "Scanless doesn't support regular expressions") | ||||
def test_regex_escaping(self): | def test_regex_escaping(self): | ||||
g = _Lark("start: /[ab]/") | g = _Lark("start: /[ab]/") | ||||
@@ -1075,6 +1084,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
# Note: You still have to import them in __main__ for the tests to run | # Note: You still have to import them in __main__ for the tests to run | ||||
_TO_TEST = [ | _TO_TEST = [ | ||||
('standard', 'earley'), | ('standard', 'earley'), | ||||
('standard', 'cyk'), | |||||
('dynamic', 'earley'), | ('dynamic', 'earley'), | ||||
('standard', 'lalr'), | ('standard', 'lalr'), | ||||
('contextual', 'lalr'), | ('contextual', 'lalr'), | ||||