| @@ -47,12 +47,12 @@ class TreeToJson(Transformer): | |||||
| true = lambda self, _: True | true = lambda self, _: True | ||||
| false = lambda self, _: False | false = lambda self, _: False | ||||
| json_parser = Lark(json_grammar, parser='earley', lexer='dynamic') | |||||
| def parse(x): | |||||
| return TreeToJson().transform(json_parser.parse(x)) | |||||
| # json_parser = Lark(json_grammar, parser='earley', lexer='standard') | |||||
| # def parse(x): | |||||
| # return TreeToJson().transform(json_parser.parse(x)) | |||||
| # json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson()) | |||||
| # parse = json_parser.parse | |||||
| json_parser = Lark(json_grammar, parser='lalr', transformer=TreeToJson()) | |||||
| parse = json_parser.parse | |||||
| def test(): | def test(): | ||||
| test_json = ''' | test_json = ''' | ||||
| @@ -4,7 +4,7 @@ import sre_parse | |||||
| from .lexer import Lexer, ContextualLexer, Token | from .lexer import Lexer, ContextualLexer, Token | ||||
| from .common import is_terminal, GrammarError, ParserConf, Terminal_Regexp, Terminal_Token | from .common import is_terminal, GrammarError, ParserConf, Terminal_Regexp, Terminal_Token | ||||
| from .parsers import lalr_parser, old_earley, nearley, earley | |||||
| from .parsers import lalr_parser, earley | |||||
| from .tree import Transformer | from .tree import Transformer | ||||
| from .parsers import xearley | from .parsers import xearley | ||||
| @@ -49,47 +49,6 @@ class LALR_ContextualLexer: | |||||
| tokens = self.lexer_conf.postlex.process(tokens) | tokens = self.lexer_conf.postlex.process(tokens) | ||||
| return self.parser.parse(tokens, self.lexer.set_parser_state) | return self.parser.parse(tokens, self.lexer.set_parser_state) | ||||
| class Nearley(WithLexer): | |||||
| def __init__(self, lexer_conf, parser_conf): | |||||
| WithLexer.__init__(self, lexer_conf) | |||||
| rules = [{'name':n, | |||||
| 'symbols': self._prepare_expansion(x), | |||||
| 'postprocess': getattr(parser_conf.callback, a)} | |||||
| for n,x,a in parser_conf.rules] | |||||
| self.parser = nearley.Parser(rules, parser_conf.start) | |||||
| def _prepare_expansion(self, expansion): | |||||
| return [(sym, None) if is_terminal(sym) else sym for sym in expansion] | |||||
| def parse(self, text): | |||||
| tokens = list(self.lex(text)) | |||||
| res = self.parser.parse(tokens) | |||||
| assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | |||||
| return res[0] | |||||
| class OldEarley(WithLexer): | |||||
| def __init__(self, lexer_conf, parser_conf): | |||||
| WithLexer.__init__(self, lexer_conf) | |||||
| rules = [(n, self._prepare_expansion(x), a) for n,x,a in parser_conf.rules] | |||||
| self.parser = old_earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start)) | |||||
| def _prepare_expansion(self, expansion): | |||||
| return [(sym,) if is_terminal(sym) else sym for sym in expansion] | |||||
| def parse(self, text): | |||||
| tokens = list(self.lex(text)) | |||||
| res = self.parser.parse(tokens) | |||||
| assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | |||||
| return res[0] | |||||
| def tokenize_text(text): | def tokenize_text(text): | ||||
| new_text = [] | new_text = [] | ||||
| line = 1 | line = 1 | ||||
| @@ -101,32 +60,6 @@ def tokenize_text(text): | |||||
| new_text.append(Token('CHAR', ch, line=line, column=i - col_start_pos)) | new_text.append(Token('CHAR', ch, line=line, column=i - col_start_pos)) | ||||
| return new_text | return new_text | ||||
| class OldEarley_NoLex: | |||||
| def __init__(self, lexer_conf, parser_conf): | |||||
| self.token_by_name = {t.name:t for t in lexer_conf.tokens} | |||||
| rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules] | |||||
| self.parser = old_earley.Parser(ParserConf(rules, parser_conf.callback, parser_conf.start)) | |||||
| def _prepare_expansion(self, expansion): | |||||
| for sym in expansion: | |||||
| if is_terminal(sym): | |||||
| regexp = self.token_by_name[sym].pattern.to_regexp() | |||||
| width = sre_parse.parse(regexp).getwidth() | |||||
| if width != (1,1): | |||||
| raise GrammarError('Scanless parsing (lexer=None) requires all tokens to have a width of 1 (terminal %s: %s is %s)' % (sym, regexp, width)) | |||||
| yield (re.compile(regexp).match, regexp) | |||||
| else: | |||||
| yield sym | |||||
| def parse(self, text): | |||||
| new_text = tokenize_text(text) | |||||
| res = self.parser.parse(new_text) | |||||
| assert len(res) ==1 , 'Ambiguious Parse! Not handled yet' | |||||
| return res[0] | |||||
| class Earley_NoLex: | class Earley_NoLex: | ||||
| def __init__(self, lexer_conf, parser_conf, options=None): | def __init__(self, lexer_conf, parser_conf, options=None): | ||||
| self.token_by_name = {t.name:t for t in lexer_conf.tokens} | self.token_by_name = {t.name:t for t in lexer_conf.tokens} | ||||
| @@ -178,7 +111,7 @@ class XEarley: | |||||
| def __init__(self, lexer_conf, parser_conf, options=None): | def __init__(self, lexer_conf, parser_conf, options=None): | ||||
| self.token_by_name = {t.name:t for t in lexer_conf.tokens} | self.token_by_name = {t.name:t for t in lexer_conf.tokens} | ||||
| rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules] | |||||
| rules = [(n, list(self._prepare_expansion(x)), a, o) for n,x,a,o in parser_conf.rules] | |||||
| resolve_ambiguity = (options.ambiguity=='resolve') if options else True | resolve_ambiguity = (options.ambiguity=='resolve') if options else True | ||||
| ignore = [Terminal_Regexp(self.token_by_name[x].pattern.to_regexp()) for x in lexer_conf.ignore] | ignore = [Terminal_Regexp(self.token_by_name[x].pattern.to_regexp()) for x in lexer_conf.ignore] | ||||
| @@ -195,6 +128,7 @@ class XEarley: | |||||
| if is_terminal(sym): | if is_terminal(sym): | ||||
| regexp = self.token_by_name[sym].pattern.to_regexp() | regexp = self.token_by_name[sym].pattern.to_regexp() | ||||
| width = sre_parse.parse(regexp).getwidth() | width = sre_parse.parse(regexp).getwidth() | ||||
| assert width | |||||
| yield Terminal_Regexp(regexp) | yield Terminal_Regexp(regexp) | ||||
| else: | else: | ||||
| yield sym | yield sym | ||||
| @@ -238,13 +238,13 @@ def _compare_rules(rule1, rule2): | |||||
| def _compare_drv(tree1, tree2): | def _compare_drv(tree1, tree2): | ||||
| if not (isinstance(tree1, Tree) and isinstance(tree2, Tree)): | if not (isinstance(tree1, Tree) and isinstance(tree2, Tree)): | ||||
| return compare(tree1, tree2) | |||||
| return -compare(tree1, tree2) | |||||
| try: | try: | ||||
| rule1, rule2 = tree1.rule, tree2.rule | rule1, rule2 = tree1.rule, tree2.rule | ||||
| except AttributeError: | except AttributeError: | ||||
| # Probably trees that don't take part in this parse (better way to distinguish?) | # Probably trees that don't take part in this parse (better way to distinguish?) | ||||
| return compare(tree1, tree2) | |||||
| return -compare(tree1, tree2) | |||||
| # XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse, | # XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse, | ||||
| # when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be | # when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be | ||||
| @@ -264,7 +264,7 @@ def _compare_drv(tree1, tree2): | |||||
| if c: | if c: | ||||
| return c | return c | ||||
| return compare(len(tree1.children), len(tree2.children)) | |||||
| return -compare(len(tree1.children), len(tree2.children)) | |||||
| def _resolve_ambig(tree): | def _resolve_ambig(tree): | ||||
| @@ -1,4 +1,4 @@ | |||||
| "This module implements an Earley Parser" | |||||
| "This module implements an experimental Earley Parser with a dynamic lexer" | |||||
| # The parser uses a parse-forest to keep track of derivations and ambiguations. | # The parser uses a parse-forest to keep track of derivations and ambiguations. | ||||
| # When the parse ends successfully, a disambiguation stage resolves all ambiguity | # When the parse ends successfully, a disambiguation stage resolves all ambiguity | ||||
| @@ -10,121 +10,21 @@ | |||||
| # The algorithm keeps track of each state set, using a corresponding Column instance. | # The algorithm keeps track of each state set, using a corresponding Column instance. | ||||
| # Column keeps track of new items using NewsList instances. | # Column keeps track of new items using NewsList instances. | ||||
| # | # | ||||
| # Instead of running a lexer beforehand, or using a costy char-by-char method, this parser | |||||
| # uses regular expressions by necessity, achieving high-performance while maintaining all of | |||||
| # Earley's power in parsing any CFG. | |||||
| # | |||||
| # | |||||
| # Author: Erez Shinan (2017) | # Author: Erez Shinan (2017) | ||||
| # Email : erezshin@gmail.com | # Email : erezshin@gmail.com | ||||
| from functools import cmp_to_key | |||||
| from collections import defaultdict | from collections import defaultdict | ||||
| from ..utils import compare | |||||
| from ..common import ParseError, UnexpectedToken, Terminal | from ..common import ParseError, UnexpectedToken, Terminal | ||||
| from ..tree import Tree, Visitor_NoRecurse, Transformer_NoRecurse | |||||
| from ..tree import Tree | |||||
| from .grammar_analysis import GrammarAnalyzer | from .grammar_analysis import GrammarAnalyzer | ||||
| class EndToken: | |||||
| type = '$end' | |||||
| class Derivation(Tree): | |||||
| def __init__(self, rule, items=None): | |||||
| Tree.__init__(self, 'drv', items or []) | |||||
| self.rule = rule | |||||
| END_TOKEN = EndToken() | |||||
| class Item(object): | |||||
| "An Earley Item, the atom of the algorithm." | |||||
| def __init__(self, rule, ptr, start, tree): | |||||
| self.rule = rule | |||||
| self.ptr = ptr | |||||
| self.start = start | |||||
| self.tree = tree if tree is not None else Derivation(self.rule) | |||||
| @property | |||||
| def expect(self): | |||||
| return self.rule.expansion[self.ptr] | |||||
| @property | |||||
| def is_complete(self): | |||||
| return self.ptr == len(self.rule.expansion) | |||||
| def advance(self, tree): | |||||
| assert self.tree.data == 'drv' | |||||
| new_tree = Derivation(self.rule, self.tree.children + [tree]) | |||||
| return Item(self.rule, self.ptr+1, self.start, new_tree) | |||||
| def __eq__(self, other): | |||||
| return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule | |||||
| def __hash__(self): | |||||
| return hash((self.rule, self.ptr, id(self.start))) | |||||
| def __repr__(self): | |||||
| before = list(map(str, self.rule.expansion[:self.ptr])) | |||||
| after = list(map(str, self.rule.expansion[self.ptr:])) | |||||
| return '<(%d) %s : %s * %s>' % (id(self.start), self.rule.origin, ' '.join(before), ' '.join(after)) | |||||
| class NewsList(list): | |||||
| "Keeps track of newly added items (append-only)" | |||||
| def __init__(self, initial=None): | |||||
| list.__init__(self, initial or []) | |||||
| self.last_iter = 0 | |||||
| def get_news(self): | |||||
| i = self.last_iter | |||||
| self.last_iter = len(self) | |||||
| return self[i:] | |||||
| class Column: | |||||
| "An entry in the table, aka Earley Chart. Contains lists of items." | |||||
| def __init__(self, i): | |||||
| self.i = i | |||||
| self.to_reduce = NewsList() | |||||
| self.to_predict = NewsList() | |||||
| self.to_scan = NewsList() | |||||
| self.item_count = 0 | |||||
| self.added = set() | |||||
| self.completed = {} | |||||
| def add(self, items): | |||||
| """Sort items into scan/predict/reduce newslists | |||||
| Makes sure only unique items are added. | |||||
| """ | |||||
| for item in items: | |||||
| if item.is_complete: | |||||
| # XXX Potential bug: What happens if there's ambiguity in an empty rule? | |||||
| if item.rule.expansion and item in self.completed: | |||||
| old_tree = self.completed[item].tree | |||||
| if old_tree.data != '_ambig': | |||||
| new_tree = old_tree.copy() | |||||
| new_tree.rule = old_tree.rule | |||||
| old_tree.set('_ambig', [new_tree]) | |||||
| if item.tree.children[0] is old_tree: # XXX a little hacky! | |||||
| raise ParseError("Infinite recursion in grammar!") | |||||
| old_tree.children.append(item.tree) | |||||
| else: | |||||
| self.completed[item] = item | |||||
| self.to_reduce.append(item) | |||||
| else: | |||||
| if item not in self.added: | |||||
| self.added.add(item) | |||||
| if isinstance(item.expect, Terminal): | |||||
| self.to_scan.append(item) | |||||
| else: | |||||
| self.to_predict.append(item) | |||||
| self.item_count += 1 # Only count if actually added | |||||
| def __nonzero__(self): | |||||
| return bool(self.item_count) | |||||
| from earley import ResolveAmbig, ApplyCallbacks, Item, NewsList, Derivation, END_TOKEN, Column | |||||
| class Parser: | class Parser: | ||||
| def __init__(self, rules, start_symbol, callback, resolve_ambiguity=True, ignore=()): | def __init__(self, rules, start_symbol, callback, resolve_ambiguity=True, ignore=()): | ||||
| @@ -144,7 +44,7 @@ class Parser: | |||||
| def parse(self, stream, start_symbol=None): | def parse(self, stream, start_symbol=None): | ||||
| # Define parser functions | # Define parser functions | ||||
| start_symbol = start_symbol or self.start_symbol | start_symbol = start_symbol or self.start_symbol | ||||
| matched_terminals = defaultdict(list) | |||||
| delayed_matches = defaultdict(list) | |||||
| def predict(nonterm, column): | def predict(nonterm, column): | ||||
| assert not isinstance(nonterm, Terminal), nonterm | assert not isinstance(nonterm, Terminal), nonterm | ||||
| @@ -178,16 +78,17 @@ class Parser: | |||||
| for item in to_scan: | for item in to_scan: | ||||
| m = item.expect.match(stream, i) | m = item.expect.match(stream, i) | ||||
| if m: | if m: | ||||
| matched_terminals[m.end()].append(item.advance(m.group(0))) | |||||
| delayed_matches[m.end()].append(item.advance(m.group(0))) | |||||
| s = m.group(0) | s = m.group(0) | ||||
| for j in range(1, len(s)): | for j in range(1, len(s)): | ||||
| m = item.expect.match(s[:-j]) | m = item.expect.match(s[:-j]) | ||||
| if m: | if m: | ||||
| matched_terminals[m.end()].append(item.advance(m.group(0))) | |||||
| delayed_matches[m.end()].append(item.advance(m.group(0))) | |||||
| next_set = Column(i+1) | next_set = Column(i+1) | ||||
| next_set.add(matched_terminals[i+1]) | |||||
| next_set.add(delayed_matches[i+1]) | |||||
| del delayed_matches[i+1] # No longer needed, so unburden memory | |||||
| return next_set | return next_set | ||||
| @@ -220,73 +121,3 @@ class Parser: | |||||
| return ApplyCallbacks(self.postprocess).transform(tree) | return ApplyCallbacks(self.postprocess).transform(tree) | ||||
| class ApplyCallbacks(Transformer_NoRecurse): | |||||
| def __init__(self, postprocess): | |||||
| self.postprocess = postprocess | |||||
| def drv(self, tree): | |||||
| children = tree.children | |||||
| callback = self.postprocess[tree.rule] | |||||
| if callback: | |||||
| return callback(children) | |||||
| else: | |||||
| return Tree(rule.origin, children) | |||||
| def _compare_rules(rule1, rule2): | |||||
| assert rule1.origin == rule2.origin | |||||
| c = compare( len(rule1.expansion), len(rule2.expansion)) | |||||
| if rule1.origin.startswith('__'): # XXX hack! We need to set priority in parser, not here | |||||
| c = -c | |||||
| return c | |||||
| def _compare_drv(tree1, tree2): | |||||
| if not (isinstance(tree1, Tree) and isinstance(tree2, Tree)): | |||||
| return -compare(tree1, tree2) | |||||
| c = _compare_rules(tree1.rule, tree2.rule) | |||||
| if c: | |||||
| return c | |||||
| # rules are "equal", so compare trees | |||||
| for t1, t2 in zip(tree1.children, tree2.children): | |||||
| c = _compare_drv(t1, t2) | |||||
| if c: | |||||
| return c | |||||
| return -compare(len(tree1.children), len(tree2.children)) | |||||
| class ResolveAmbig(Visitor_NoRecurse): | |||||
| """Resolves ambiguity in resulting parse tree. | |||||
| Minimizes rule length, maximizes match length. | |||||
| """ | |||||
| def _ambig(self, tree): | |||||
| best = min(tree.children, key=cmp_to_key(_compare_drv)) | |||||
| assert best.data == 'drv' | |||||
| tree.set('drv', best.children) | |||||
| tree.rule = best.rule # needed for applying callbacks | |||||
| # RULES = [ | |||||
| # ('a', ['d']), | |||||
| # ('d', ['b']), | |||||
| # ('b', ['C']), | |||||
| # ('b', ['b', 'C']), | |||||
| # ('b', ['C', 'b']), | |||||
| # ] | |||||
| # p = Parser(RULES, 'a') | |||||
| # for x in p.parse('CC'): | |||||
| # print x.pretty() | |||||
| #--------------- | |||||
| # RULES = [ | |||||
| # ('s', ['a', 'a']), | |||||
| # ('a', ['b', 'b']), | |||||
| # ('b', ['C'], lambda (x,): x), | |||||
| # ('b', ['b', 'C']), | |||||
| # ] | |||||
| # p = Parser(RULES, 's', {}) | |||||
| # print p.parse('CCCCC').pretty() | |||||
| @@ -4,10 +4,23 @@ import unittest | |||||
| import logging | import logging | ||||
| from .test_trees import TestTrees | from .test_trees import TestTrees | ||||
| # from .test_selectors import TestSelectors | # from .test_selectors import TestSelectors | ||||
| from .test_parser import TestLalrStandard, TestEarleyStandard, TestLalrContextual, TestParsers, TestEarleyScanless, TestEarley, TestEarleyDynamic | |||||
| # from .test_grammars import TestPythonG, TestConfigG | # from .test_grammars import TestPythonG, TestConfigG | ||||
| from .test_parser import ( | |||||
| TestLalrStandard, | |||||
| TestEarleyStandard, | |||||
| TestLalrContextual, | |||||
| TestEarleyScanless, | |||||
| TestEarleyDynamic, | |||||
| TestFullEarleyScanless, | |||||
| TestFullEarleyDynamic, | |||||
| TestParsers, | |||||
| ) | |||||
| logging.basicConfig(level=logging.INFO) | logging.basicConfig(level=logging.INFO) | ||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| @@ -51,90 +51,95 @@ class TestParsers(unittest.TestCase): | |||||
| self.assertRaises(ParseError, l.parse, 'a') | self.assertRaises(ParseError, l.parse, 'a') | ||||
| class TestEarley(unittest.TestCase): | |||||
| def test_anon_in_scanless(self): | |||||
| # Fails an Earley implementation without special handling for empty rules, | |||||
| # or re-processing of already completed rules. | |||||
| g = Lark(r"""start: B | |||||
| B: ("ab"|/[^b]/)* | |||||
| """, lexer='dynamic') | |||||
| def _make_full_earley_test(LEXER): | |||||
| class _TestFullEarley(unittest.TestCase): | |||||
| def test_anon_in_scanless(self): | |||||
| # Fails an Earley implementation without special handling for empty rules, | |||||
| # or re-processing of already completed rules. | |||||
| g = Lark(r"""start: B | |||||
| B: ("ab"|/[^b]/)* | |||||
| """, lexer=LEXER) | |||||
| self.assertEqual( g.parse('abc').children[0], 'abc') | |||||
| self.assertEqual( g.parse('abc').children[0], 'abc') | |||||
| def test_earley_scanless(self): | |||||
| g = Lark("""start: A "b" c | |||||
| A: "a"+ | |||||
| c: "abc" | |||||
| """, parser="earley", lexer='dynamic') | |||||
| x = g.parse('aaaababc') | |||||
| def test_earley_scanless(self): | |||||
| g = Lark("""start: A "b" c | |||||
| A: "a"+ | |||||
| c: "abc" | |||||
| """, parser="earley", lexer=LEXER) | |||||
| x = g.parse('aaaababc') | |||||
| def test_earley_scanless2(self): | |||||
| grammar = """ | |||||
| start: statement+ | |||||
| def test_earley_scanless2(self): | |||||
| grammar = """ | |||||
| start: statement+ | |||||
| statement: "r" | |||||
| | "c" /[a-z]/+ | |||||
| statement: "r" | |||||
| | "c" /[a-z]/+ | |||||
| %ignore " " | |||||
| """ | |||||
| %ignore " " | |||||
| """ | |||||
| program = """c b r""" | |||||
| program = """c b r""" | |||||
| l = Lark(grammar, parser='earley', lexer='dynamic') | |||||
| l.parse(program) | |||||
| l = Lark(grammar, parser='earley', lexer=LEXER) | |||||
| l.parse(program) | |||||
| def test_earley_scanless3(self): | |||||
| "Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)" | |||||
| def test_earley_scanless3(self): | |||||
| "Tests prioritization and disambiguation for pseudo-terminals (there should be only one result)" | |||||
| grammar = """ | |||||
| start: A A | |||||
| A: "a"+ | |||||
| """ | |||||
| grammar = """ | |||||
| start: A A | |||||
| A: "a"+ | |||||
| """ | |||||
| l = Lark(grammar, parser='earley', lexer='dynamic') | |||||
| res = l.parse("aaa") | |||||
| self.assertEqual(res.children, ['aa', 'a']) | |||||
| l = Lark(grammar, parser='earley', lexer=LEXER) | |||||
| res = l.parse("aaa") | |||||
| self.assertEqual(res.children, ['aa', 'a']) | |||||
| def test_earley_scanless4(self): | |||||
| grammar = """ | |||||
| start: A A? | |||||
| A: "a"+ | |||||
| """ | |||||
| def test_earley_scanless4(self): | |||||
| grammar = """ | |||||
| start: A A? | |||||
| A: "a"+ | |||||
| """ | |||||
| l = Lark(grammar, parser='earley', lexer=LEXER) | |||||
| res = l.parse("aaa") | |||||
| self.assertEqual(res.children, ['aaa']) | |||||
| l = Lark(grammar, parser='earley', lexer='dynamic') | |||||
| res = l.parse("aaa") | |||||
| self.assertEqual(res.children, ['aaa']) | |||||
| def test_earley_repeating_empty(self): | |||||
| # This was a sneaky bug! | |||||
| def test_earley_repeating_empty(self): | |||||
| # This was a sneaky bug! | |||||
| grammar = """ | |||||
| !start: "a" empty empty "b" | |||||
| empty: empty2 | |||||
| empty2: | |||||
| """ | |||||
| grammar = """ | |||||
| !start: "a" empty empty "b" | |||||
| empty: empty2 | |||||
| empty2: | |||||
| """ | |||||
| parser = Lark(grammar, parser='earley', lexer=LEXER) | |||||
| res = parser.parse('ab') | |||||
| parser = Lark(grammar, parser='earley', lexer='dynamic') | |||||
| res = parser.parse('ab') | |||||
| empty_tree = Tree('empty', [Tree('empty2', [])]) | |||||
| self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b']) | |||||
| empty_tree = Tree('empty', [Tree('empty2', [])]) | |||||
| self.assertSequenceEqual(res.children, ['a', empty_tree, empty_tree, 'b']) | |||||
| def test_earley_explicit_ambiguity(self): | |||||
| # This was a sneaky bug! | |||||
| def test_earley_explicit_ambiguity(self): | |||||
| # This was a sneaky bug! | |||||
| grammar = """ | |||||
| start: a b | ab | |||||
| a: "a" | |||||
| b: "b" | |||||
| ab: "ab" | |||||
| """ | |||||
| grammar = """ | |||||
| start: a b | ab | |||||
| a: "a" | |||||
| b: "b" | |||||
| ab: "ab" | |||||
| """ | |||||
| parser = Lark(grammar, parser='earley', lexer=LEXER, ambiguity='explicit') | |||||
| res = parser.parse('ab') | |||||
| parser = Lark(grammar, parser='earley', lexer='dynamic', ambiguity='explicit') | |||||
| res = parser.parse('ab') | |||||
| self.assertEqual( res.data, '_ambig') | |||||
| self.assertEqual( len(res.children), 2) | |||||
| self.assertEqual( res.data, '_ambig') | |||||
| self.assertEqual( len(res.children), 2) | |||||
| _NAME = "TestFullEarley" + (LEXER or 'Scanless').capitalize() | |||||
| _TestFullEarley.__name__ = _NAME | |||||
| globals()[_NAME] = _TestFullEarley | |||||
| def _make_parser_test(LEXER, PARSER): | def _make_parser_test(LEXER, PARSER): | ||||
| @@ -444,7 +449,7 @@ def _make_parser_test(LEXER, PARSER): | |||||
| """) | """) | ||||
| x = g.parse('aababc') | x = g.parse('aababc') | ||||
| @unittest.skipIf(LEXER is None, "Known bug with scanless parsing") # TODO | |||||
| @unittest.skipIf(LEXER in (None, 'dynamic'), "Known bug with scanless parsing") # TODO | |||||
| def test_token_not_anon(self): | def test_token_not_anon(self): | ||||
| """Tests that "a" is matched as A, rather than an anonymous token. | """Tests that "a" is matched as A, rather than an anonymous token. | ||||
| @@ -664,6 +669,8 @@ _TO_TEST = [ | |||||
| for _LEXER, _PARSER in _TO_TEST: | for _LEXER, _PARSER in _TO_TEST: | ||||
| _make_parser_test(_LEXER, _PARSER) | _make_parser_test(_LEXER, _PARSER) | ||||
| for _LEXER in (None, 'dynamic'): | |||||
| _make_full_earley_test(_LEXER) | |||||
| if __name__ == '__main__': | if __name__ == '__main__': | ||||
| unittest.main() | unittest.main() | ||||