| @@ -41,7 +41,7 @@ class LexerConf: | |||||
| class ParserConf: | class ParserConf: | ||||
| def __init__(self, rules, callback, start): | def __init__(self, rules, callback, start): | ||||
| assert all(len(r)==3 for r in rules) | |||||
| assert all(len(r) == 4 for r in rules) | |||||
| self.rules = rules | self.rules = rules | ||||
| self.callback = callback | self.callback = callback | ||||
| self.start = start | self.start = start | ||||
| @@ -75,6 +75,7 @@ TOKENS = { | |||||
| '_TO': '->', | '_TO': '->', | ||||
| '_IGNORE': r'%ignore', | '_IGNORE': r'%ignore', | ||||
| '_IMPORT': r'%import', | '_IMPORT': r'%import', | ||||
| 'NUMBER': '\d+', | |||||
| } | } | ||||
| RULES = { | RULES = { | ||||
| @@ -82,7 +83,8 @@ RULES = { | |||||
| '_list': ['_item', '_list _item'], | '_list': ['_item', '_list _item'], | ||||
| '_item': ['rule', 'token', 'statement', '_NL'], | '_item': ['rule', 'token', 'statement', '_NL'], | ||||
| 'rule': ['RULE _COLON expansions _NL'], | |||||
| 'rule': ['RULE _COLON expansions _NL', | |||||
| 'RULE _DOT NUMBER _COLON expansions _NL'], | |||||
| 'expansions': ['alias', | 'expansions': ['alias', | ||||
| 'expansions _OR alias', | 'expansions _OR alias', | ||||
| 'expansions _NL _OR alias'], | 'expansions _NL _OR alias'], | ||||
| @@ -470,21 +472,29 @@ class Grammar: | |||||
| class RuleOptions: | class RuleOptions: | ||||
| def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False): | |||||
| def __init__(self, keep_all_tokens=False, expand1=False, create_token=None, filter_out=False, priority=None): | |||||
| self.keep_all_tokens = keep_all_tokens | self.keep_all_tokens = keep_all_tokens | ||||
| self.expand1 = expand1 | self.expand1 = expand1 | ||||
| self.create_token = create_token # used for scanless postprocessing | self.create_token = create_token # used for scanless postprocessing | ||||
| self.priority = priority | |||||
| self.filter_out = filter_out # remove this rule from the tree | self.filter_out = filter_out # remove this rule from the tree | ||||
| # used for "token"-rules in scanless | # used for "token"-rules in scanless | ||||
| @classmethod | @classmethod | ||||
| def from_rule(cls, name, expansions): | |||||
| def from_rule(cls, name, *x): | |||||
| if len(x) > 1: | |||||
| priority, expansions = x | |||||
| priority = int(priority) | |||||
| else: | |||||
| expansions ,= x | |||||
| priority = None | |||||
| keep_all_tokens = name.startswith('!') | keep_all_tokens = name.startswith('!') | ||||
| name = name.lstrip('!') | name = name.lstrip('!') | ||||
| expand1 = name.startswith('?') | expand1 = name.startswith('?') | ||||
| name = name.lstrip('?') | name = name.lstrip('?') | ||||
| return name, expansions, cls(keep_all_tokens, expand1) | |||||
| return name, expansions, cls(keep_all_tokens, expand1, priority=priority) | |||||
| @@ -605,7 +615,7 @@ class GrammarLoader: | |||||
| raise GrammarError("Token '%s' defined more than once" % name) | raise GrammarError("Token '%s' defined more than once" % name) | ||||
| token_names.add(name) | token_names.add(name) | ||||
| rules = [RuleOptions.from_rule(name, x) for name, x in rule_defs] | |||||
| rules = [RuleOptions.from_rule(*x) for x in rule_defs] | |||||
| rule_names = set() | rule_names = set() | ||||
| for name, _x, _o in rules: | for name, _x, _o in rules: | ||||
| @@ -123,6 +123,6 @@ class ParseTreeBuilder: | |||||
| raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin)) | raise GrammarError("Rule expansion '%s' already exists in rule %s" % (' '.join(expansion), origin)) | ||||
| setattr(callback, callback_name, alias_handler) | setattr(callback, callback_name, alias_handler) | ||||
| new_rules.append(( _origin, expansion, callback_name )) | |||||
| new_rules.append(( _origin, expansion, callback_name, options )) | |||||
| return new_rules, callback | return new_rules, callback | ||||
| @@ -129,7 +129,7 @@ class Earley_NoLex: | |||||
| def __init__(self, lexer_conf, parser_conf, options=None): | def __init__(self, lexer_conf, parser_conf, options=None): | ||||
| self.token_by_name = {t.name:t for t in lexer_conf.tokens} | self.token_by_name = {t.name:t for t in lexer_conf.tokens} | ||||
| rules = [(n, list(self._prepare_expansion(x)), a) for n,x,a in parser_conf.rules] | |||||
| rules = [(n, list(self._prepare_expansion(x)), a, o) for n,x,a,o in parser_conf.rules] | |||||
| resolve_ambiguity = (options.ambiguity=='resolve') if options else True | resolve_ambiguity = (options.ambiguity=='resolve') if options else True | ||||
| self.parser = earley.Parser(rules, | self.parser = earley.Parser(rules, | ||||
| @@ -156,7 +156,7 @@ class Earley(WithLexer): | |||||
| def __init__(self, lexer_conf, parser_conf, options=None): | def __init__(self, lexer_conf, parser_conf, options=None): | ||||
| WithLexer.__init__(self, lexer_conf) | WithLexer.__init__(self, lexer_conf) | ||||
| rules = [(n, self._prepare_expansion(x), a) for n,x,a in parser_conf.rules] | |||||
| rules = [(n, self._prepare_expansion(x), a, o) for n,x,a,o in parser_conf.rules] | |||||
| resolve_ambiguity = (options.ambiguity=='resolve') if options else True | resolve_ambiguity = (options.ambiguity=='resolve') if options else True | ||||
| self.parser = earley.Parser(rules, | self.parser = earley.Parser(rules, | ||||
| @@ -224,6 +224,11 @@ class ApplyCallbacks(Transformer_NoRecurse): | |||||
| return Tree(rule.origin, children) | return Tree(rule.origin, children) | ||||
| def _compare_rules(rule1, rule2): | def _compare_rules(rule1, rule2): | ||||
| if rule1.options and rule2.options: | |||||
| if rule1.options.priority is not None and rule2.options.priority is not None: | |||||
| assert rule1.options.priority != rule2.options.priority, "Priority is the same between both rules: %s == %s" % (rule1, rule2) | |||||
| return -compare(rule1.options.priority, rule2.options.priority) | |||||
| if rule1.origin != rule2.origin: | if rule1.origin != rule2.origin: | ||||
| return 0 | return 0 | ||||
| c = compare( len(rule1.expansion), len(rule2.expansion)) | c = compare( len(rule1.expansion), len(rule2.expansion)) | ||||
| @@ -7,10 +7,11 @@ class Rule(object): | |||||
| origin : a symbol | origin : a symbol | ||||
| expansion : a list of symbols | expansion : a list of symbols | ||||
| """ | """ | ||||
| def __init__(self, origin, expansion, alias=None): | |||||
| def __init__(self, origin, expansion, alias=None, options=None): | |||||
| self.origin = origin | self.origin = origin | ||||
| self.expansion = expansion | self.expansion = expansion | ||||
| self.alias = alias | self.alias = alias | ||||
| self.options = options | |||||
| def __repr__(self): | def __repr__(self): | ||||
| return '<%s : %s>' % (self.origin, ' '.join(map(str,self.expansion))) | return '<%s : %s>' % (self.origin, ' '.join(map(str,self.expansion))) | ||||
| @@ -111,12 +112,12 @@ class GrammarAnalyzer(object): | |||||
| self.debug = debug | self.debug = debug | ||||
| rule_tuples = list(rule_tuples) | rule_tuples = list(rule_tuples) | ||||
| rule_tuples.append(('$root', [start_symbol, '$end'])) | rule_tuples.append(('$root', [start_symbol, '$end'])) | ||||
| rule_tuples = [(t[0], t[1], None) if len(t)==2 else t for t in rule_tuples] | |||||
| rule_tuples = [(t[0], t[1], None, None) if len(t)==2 else t for t in rule_tuples] | |||||
| self.rules = set() | self.rules = set() | ||||
| self.rules_by_origin = {o: [] for o, _x, _a in rule_tuples} | |||||
| for origin, exp, alias in rule_tuples: | |||||
| r = Rule( origin, exp, alias ) | |||||
| self.rules_by_origin = {o: [] for o, _x, _a, _opt in rule_tuples} | |||||
| for origin, exp, alias, options in rule_tuples: | |||||
| r = Rule( origin, exp, alias, options ) | |||||
| self.rules.add(r) | self.rules.add(r) | ||||
| self.rules_by_origin[origin].append(r) | self.rules_by_origin[origin].append(r) | ||||
| @@ -9,6 +9,7 @@ from .lalr_analysis import LALR_Analyzer, ACTION_SHIFT | |||||
| class Parser(object): | class Parser(object): | ||||
| def __init__(self, parser_conf): | def __init__(self, parser_conf): | ||||
| assert all(o is None or o.priority is None for n,x,a,o in parser_conf.rules), "LALR doesn't yet support prioritization" | |||||
| self.analysis = LALR_Analyzer(parser_conf.rules, parser_conf.start) | self.analysis = LALR_Analyzer(parser_conf.rules, parser_conf.start) | ||||
| self.analysis.compute_lookahead() | self.analysis.compute_lookahead() | ||||
| self.callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) | self.callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) | ||||
| @@ -621,6 +621,30 @@ def _make_parser_test(LEXER, PARSER): | |||||
| self.assertEqual(len(tree.children), 2) | self.assertEqual(len(tree.children), 2) | ||||
| @unittest.skipIf(PARSER != 'earley', "Currently only Earley supports priority in rules") | |||||
| def test_earley_prioritization(self): | |||||
| "Tests effect of priority on result" | |||||
| grammar = """ | |||||
| start: a | b | |||||
| a.1: "a" | |||||
| b.2: "a" | |||||
| """ | |||||
| l = Lark(grammar, parser='earley', lexer='standard') | |||||
| res = l.parse("a") | |||||
| self.assertEqual(res.children[0].data, 'b') | |||||
| grammar = """ | |||||
| start: a | b | |||||
| a.2: "a" | |||||
| b.1: "a" | |||||
| """ | |||||
| l = Lark(grammar, parser='earley', lexer='standard') | |||||
| res = l.parse("a") | |||||
| self.assertEqual(res.children[0].data, 'a') | |||||
| _NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() | _NAME = "Test" + PARSER.capitalize() + (LEXER or 'Scanless').capitalize() | ||||