diff --git a/examples/python_parser.py b/examples/python_parser.py index ddbd5c4..f738a35 100644 --- a/examples/python_parser.py +++ b/examples/python_parser.py @@ -78,6 +78,6 @@ def test_earley_equals_lalr(): if __name__ == '__main__': test_python_lib() - test_earley_equals_lalr() + # test_earley_equals_lalr() # python_parser3.parse(_read(sys.argv[1]) + '\n') diff --git a/lark/grammar.py b/lark/grammar.py index d257bc4..2689389 100644 --- a/lark/grammar.py +++ b/lark/grammar.py @@ -1,3 +1,25 @@ +class Symbol(object): + is_term = NotImplemented + + def __init__(self, name): + self.name = name + + def __eq__(self, other): + assert isinstance(other, Symbol), other + return self.is_term == other.is_term and self.name == other.name + + def __hash__(self): + return hash(self.name) + +class Terminal(Symbol): + is_term = True + + @property + def filter_out(self): + return self.name.startswith('_') + +class NonTerminal(Symbol): + is_term = False class Rule(object): """ diff --git a/lark/lexer.py b/lark/lexer.py index 19e1be4..e7af2a2 100644 --- a/lark/lexer.py +++ b/lark/lexer.py @@ -3,7 +3,7 @@ import re from .utils import Str, classify -from .common import is_terminal, PatternStr, PatternRE, TokenDef +from .common import PatternStr, PatternRE, TokenDef ###{standalone class LexError(Exception): @@ -234,7 +234,7 @@ class ContextualLexer: lexer = lexer_by_tokens[key] except KeyError: accepts = set(accepts) | set(ignore) | set(always_accept) - state_tokens = [tokens_by_name[n] for n in accepts if is_terminal(n) and n!='$END'] + state_tokens = [tokens_by_name[n] for n in accepts if n.is_term and n.name!='$END'] lexer = Lexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks) lexer_by_tokens[key] = lexer diff --git a/lark/load_grammar.py b/lark/load_grammar.py index 43d1bf5..6800801 100644 --- a/lark/load_grammar.py +++ b/lark/load_grammar.py @@ -12,7 +12,7 @@ from .parse_tree_builder import ParseTreeBuilder from .parser_frontends import LALR from .parsers.lalr_parser import UnexpectedToken from .common import is_terminal, GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef -from .grammar import RuleOptions, Rule +from .grammar import RuleOptions, Rule, Terminal, NonTerminal from .tree import Tree, Transformer, InlineTransformer, Visitor, SlottedTree as ST @@ -523,7 +523,9 @@ class Grammar: if alias and name.startswith('_'): raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) - rule = Rule(name, expansion, alias, options) + expansion = [Terminal(x) if is_terminal(x) else NonTerminal(x) for x in expansion] + + rule = Rule(NonTerminal(name), expansion, alias, options) compiled_rules.append(rule) return tokens, compiled_rules, self.ignore @@ -578,12 +580,16 @@ def options_from_rule(name, *x): return name, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority) + +def symbols_from_strcase(expansion): + return [Terminal(x) if is_terminal(x) else NonTerminal(x) for x in expansion] + class GrammarLoader: def __init__(self): tokens = [TokenDef(name, PatternRE(value)) for name, value in TOKENS.items()] - rules = [options_from_rule(name, x) for name, x in RULES.items()] - rules = [Rule(r, x.split(), None, o) for r, xs, o in rules for x in xs] + rules = [options_from_rule(name, x) for name, x in RULES.items()] + rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), None, o) for r, xs, o in rules for x in xs] callback = ParseTreeBuilder(rules, ST).create_callback() lexer_conf = LexerConf(tokens, ['WS', 'COMMENT']) diff --git a/lark/parse_tree_builder.py b/lark/parse_tree_builder.py index 7c74178..54a1bac 100644 --- a/lark/parse_tree_builder.py +++ b/lark/parse_tree_builder.py @@ -84,7 +84,7 @@ class ChildFilterLALR(ChildFilter): return self.node_builder(filtered) def _should_expand(sym): - return not is_terminal(sym) and sym.startswith('_') + return not sym.is_term and sym.name.startswith('_') def maybe_create_child_filter(expansion, filter_out, ambiguous): to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion) if sym not in filter_out] @@ -109,8 +109,8 @@ class ParseTreeBuilder: def _init_builders(self, rules): filter_out = {rule.origin for rule in rules if rule.options and rule.options.filter_out} - filter_out |= {sym for rule in rules for sym in rule.expansion if is_terminal(sym) and sym.startswith('_')} - assert all(x.startswith('_') for x in filter_out) + filter_out |= {sym for rule in rules for sym in rule.expansion if sym.is_term and sym.filter_out} + assert all(t.filter_out for t in filter_out) for rule in rules: options = rule.options @@ -132,9 +132,9 @@ class ParseTreeBuilder: callback = Callback() for rule, wrapper_chain in self.rule_builders: - internal_callback_name = '_callback_%s_%s' % (rule.origin, '_'.join(rule.expansion)) + internal_callback_name = '_callback_%s_%s' % (rule.origin, '_'.join(x.name for x in rule.expansion)) - user_callback_name = rule.alias or rule.origin + user_callback_name = rule.alias or rule.origin.name try: f = transformer._get_func(user_callback_name) except AttributeError: diff --git a/lark/parsers/grammar_analysis.py b/lark/parsers/grammar_analysis.py index f34d5c1..f49e4bc 100644 --- a/lark/parsers/grammar_analysis.py +++ b/lark/parsers/grammar_analysis.py @@ -1,7 +1,7 @@ from ..utils import bfs, fzset, classify -from ..common import GrammarError, is_terminal -from ..grammar import Rule +from ..common import GrammarError +from ..grammar import Rule, Terminal, NonTerminal class RulePtr(object): @@ -67,7 +67,7 @@ def calculate_sets(rules): FIRST = {} FOLLOW = {} for sym in symbols: - FIRST[sym]={sym} if is_terminal(sym) else set() + FIRST[sym]={sym} if sym.is_term else set() FOLLOW[sym]=set() # Calculate NULLABLE and FIRST @@ -108,16 +108,16 @@ class GrammarAnalyzer(object): def __init__(self, parser_conf, debug=False): self.debug = debug - rules = parser_conf.rules + [Rule('$root', [parser_conf.start, '$END'])] + rules = parser_conf.rules + [Rule(NonTerminal('$root'), [NonTerminal(parser_conf.start), Terminal('$END')])] self.rules_by_origin = classify(rules, lambda r: r.origin) assert len(rules) == len(set(rules)) for r in rules: for sym in r.expansion: - if not (is_terminal(sym) or sym in self.rules_by_origin): + if not (sym.is_term or sym in self.rules_by_origin): raise GrammarError("Using an undefined rule: %s" % sym) # TODO test validation - self.start_state = self.expand_rule('$root') + self.start_state = self.expand_rule(NonTerminal('$root')) self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) @@ -125,7 +125,7 @@ class GrammarAnalyzer(object): "Returns all init_ptrs accessible by rule (recursive)" init_ptrs = set() def _expand_rule(rule): - assert not is_terminal(rule), rule + assert not rule.is_term, rule for r in self.rules_by_origin[rule]: init_ptr = RulePtr(r, 0) @@ -133,7 +133,7 @@ class GrammarAnalyzer(object): if r.expansion: # if not empty rule new_r = init_ptr.next - if not is_terminal(new_r): + if not new_r.is_term: yield new_r for _ in bfs([rule], _expand_rule): @@ -142,8 +142,8 @@ class GrammarAnalyzer(object): return fzset(init_ptrs) def _first(self, r): - if is_terminal(r): + if r.is_term: return {r} else: - return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)} + return {rp.next for rp in self.expand_rule(r) if rp.next.is_term} diff --git a/lark/parsers/lalr_analysis.py b/lark/parsers/lalr_analysis.py index 4af28f9..6903be9 100644 --- a/lark/parsers/lalr_analysis.py +++ b/lark/parsers/lalr_analysis.py @@ -10,9 +10,9 @@ import logging from collections import defaultdict from ..utils import classify, classify_bool, bfs, fzset -from ..common import GrammarError, is_terminal +from ..common import GrammarError -from .grammar_analysis import GrammarAnalyzer +from .grammar_analysis import GrammarAnalyzer, Terminal class Action: def __init__(self, name): @@ -70,12 +70,12 @@ class LALR_Analyzer(GrammarAnalyzer): rps = {rp.advance(sym) for rp in rps} for rp in set(rps): - if not rp.is_satisfied and not is_terminal(rp.next): + if not rp.is_satisfied and not rp.next.is_term: rps |= self.expand_rule(rp.next) new_state = fzset(rps) lookahead[sym].append((Shift, new_state)) - if sym == '$END': + if sym == Terminal('$END'): self.end_states.append( new_state ) yield new_state @@ -93,7 +93,7 @@ class LALR_Analyzer(GrammarAnalyzer): if not len(v) == 1: raise GrammarError("Collision in %s: %s" %(k, ', '.join(['\n * %s: %s' % x for x in v]))) - self.states[state] = {k:v[0] for k, v in lookahead.items()} + self.states[state] = {k.name:v[0] for k, v in lookahead.items()} for _ in bfs([self.start_state], step): pass diff --git a/lark/parsers/lalr_parser.py b/lark/parsers/lalr_parser.py index baea614..164a227 100644 --- a/lark/parsers/lalr_parser.py +++ b/lark/parsers/lalr_parser.py @@ -59,7 +59,7 @@ class _Parser: value = self.callbacks[rule](s) - _action, new_state = get_action(rule.origin) + _action, new_state = get_action(rule.origin.name) assert _action is Shift state_stack.append(new_state) value_stack.append(value)