@@ -78,6 +78,6 @@ def test_earley_equals_lalr(): | |||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||
test_python_lib() | test_python_lib() | ||||
test_earley_equals_lalr() | |||||
# test_earley_equals_lalr() | |||||
# python_parser3.parse(_read(sys.argv[1]) + '\n') | # python_parser3.parse(_read(sys.argv[1]) + '\n') | ||||
@@ -1,3 +1,25 @@ | |||||
class Symbol(object): | |||||
is_term = NotImplemented | |||||
def __init__(self, name): | |||||
self.name = name | |||||
def __eq__(self, other): | |||||
assert isinstance(other, Symbol), other | |||||
return self.is_term == other.is_term and self.name == other.name | |||||
def __hash__(self): | |||||
return hash(self.name) | |||||
class Terminal(Symbol): | |||||
is_term = True | |||||
@property | |||||
def filter_out(self): | |||||
return self.name.startswith('_') | |||||
class NonTerminal(Symbol): | |||||
is_term = False | |||||
class Rule(object): | class Rule(object): | ||||
""" | """ | ||||
@@ -3,7 +3,7 @@ | |||||
import re | import re | ||||
from .utils import Str, classify | from .utils import Str, classify | ||||
from .common import is_terminal, PatternStr, PatternRE, TokenDef | |||||
from .common import PatternStr, PatternRE, TokenDef | |||||
###{standalone | ###{standalone | ||||
class LexError(Exception): | class LexError(Exception): | ||||
@@ -234,7 +234,7 @@ class ContextualLexer: | |||||
lexer = lexer_by_tokens[key] | lexer = lexer_by_tokens[key] | ||||
except KeyError: | except KeyError: | ||||
accepts = set(accepts) | set(ignore) | set(always_accept) | accepts = set(accepts) | set(ignore) | set(always_accept) | ||||
state_tokens = [tokens_by_name[n] for n in accepts if is_terminal(n) and n!='$END'] | |||||
state_tokens = [tokens_by_name[n] for n in accepts if n.is_term and n.name!='$END'] | |||||
lexer = Lexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks) | lexer = Lexer(state_tokens, ignore=ignore, user_callbacks=user_callbacks) | ||||
lexer_by_tokens[key] = lexer | lexer_by_tokens[key] = lexer | ||||
@@ -12,7 +12,7 @@ from .parse_tree_builder import ParseTreeBuilder | |||||
from .parser_frontends import LALR | from .parser_frontends import LALR | ||||
from .parsers.lalr_parser import UnexpectedToken | from .parsers.lalr_parser import UnexpectedToken | ||||
from .common import is_terminal, GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef | from .common import is_terminal, GrammarError, LexerConf, ParserConf, PatternStr, PatternRE, TokenDef | ||||
from .grammar import RuleOptions, Rule | |||||
from .grammar import RuleOptions, Rule, Terminal, NonTerminal | |||||
from .tree import Tree, Transformer, InlineTransformer, Visitor, SlottedTree as ST | from .tree import Tree, Transformer, InlineTransformer, Visitor, SlottedTree as ST | ||||
@@ -523,7 +523,9 @@ class Grammar: | |||||
if alias and name.startswith('_'): | if alias and name.startswith('_'): | ||||
raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) | raise GrammarError("Rule %s is marked for expansion (it starts with an underscore) and isn't allowed to have aliases (alias=%s)" % (name, alias)) | ||||
rule = Rule(name, expansion, alias, options) | |||||
expansion = [Terminal(x) if is_terminal(x) else NonTerminal(x) for x in expansion] | |||||
rule = Rule(NonTerminal(name), expansion, alias, options) | |||||
compiled_rules.append(rule) | compiled_rules.append(rule) | ||||
return tokens, compiled_rules, self.ignore | return tokens, compiled_rules, self.ignore | ||||
@@ -578,12 +580,16 @@ def options_from_rule(name, *x): | |||||
return name, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority) | return name, expansions, RuleOptions(keep_all_tokens, expand1, priority=priority) | ||||
def symbols_from_strcase(expansion): | |||||
return [Terminal(x) if is_terminal(x) else NonTerminal(x) for x in expansion] | |||||
class GrammarLoader: | class GrammarLoader: | ||||
def __init__(self): | def __init__(self): | ||||
tokens = [TokenDef(name, PatternRE(value)) for name, value in TOKENS.items()] | tokens = [TokenDef(name, PatternRE(value)) for name, value in TOKENS.items()] | ||||
rules = [options_from_rule(name, x) for name, x in RULES.items()] | |||||
rules = [Rule(r, x.split(), None, o) for r, xs, o in rules for x in xs] | |||||
rules = [options_from_rule(name, x) for name, x in RULES.items()] | |||||
rules = [Rule(NonTerminal(r), symbols_from_strcase(x.split()), None, o) for r, xs, o in rules for x in xs] | |||||
callback = ParseTreeBuilder(rules, ST).create_callback() | callback = ParseTreeBuilder(rules, ST).create_callback() | ||||
lexer_conf = LexerConf(tokens, ['WS', 'COMMENT']) | lexer_conf = LexerConf(tokens, ['WS', 'COMMENT']) | ||||
@@ -84,7 +84,7 @@ class ChildFilterLALR(ChildFilter): | |||||
return self.node_builder(filtered) | return self.node_builder(filtered) | ||||
def _should_expand(sym): | def _should_expand(sym): | ||||
return not is_terminal(sym) and sym.startswith('_') | |||||
return not sym.is_term and sym.name.startswith('_') | |||||
def maybe_create_child_filter(expansion, filter_out, ambiguous): | def maybe_create_child_filter(expansion, filter_out, ambiguous): | ||||
to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion) if sym not in filter_out] | to_include = [(i, _should_expand(sym)) for i, sym in enumerate(expansion) if sym not in filter_out] | ||||
@@ -109,8 +109,8 @@ class ParseTreeBuilder: | |||||
def _init_builders(self, rules): | def _init_builders(self, rules): | ||||
filter_out = {rule.origin for rule in rules if rule.options and rule.options.filter_out} | filter_out = {rule.origin for rule in rules if rule.options and rule.options.filter_out} | ||||
filter_out |= {sym for rule in rules for sym in rule.expansion if is_terminal(sym) and sym.startswith('_')} | |||||
assert all(x.startswith('_') for x in filter_out) | |||||
filter_out |= {sym for rule in rules for sym in rule.expansion if sym.is_term and sym.filter_out} | |||||
assert all(t.filter_out for t in filter_out) | |||||
for rule in rules: | for rule in rules: | ||||
options = rule.options | options = rule.options | ||||
@@ -132,9 +132,9 @@ class ParseTreeBuilder: | |||||
callback = Callback() | callback = Callback() | ||||
for rule, wrapper_chain in self.rule_builders: | for rule, wrapper_chain in self.rule_builders: | ||||
internal_callback_name = '_callback_%s_%s' % (rule.origin, '_'.join(rule.expansion)) | |||||
internal_callback_name = '_callback_%s_%s' % (rule.origin, '_'.join(x.name for x in rule.expansion)) | |||||
user_callback_name = rule.alias or rule.origin | |||||
user_callback_name = rule.alias or rule.origin.name | |||||
try: | try: | ||||
f = transformer._get_func(user_callback_name) | f = transformer._get_func(user_callback_name) | ||||
except AttributeError: | except AttributeError: | ||||
@@ -1,7 +1,7 @@ | |||||
from ..utils import bfs, fzset, classify | from ..utils import bfs, fzset, classify | ||||
from ..common import GrammarError, is_terminal | |||||
from ..grammar import Rule | |||||
from ..common import GrammarError | |||||
from ..grammar import Rule, Terminal, NonTerminal | |||||
class RulePtr(object): | class RulePtr(object): | ||||
@@ -67,7 +67,7 @@ def calculate_sets(rules): | |||||
FIRST = {} | FIRST = {} | ||||
FOLLOW = {} | FOLLOW = {} | ||||
for sym in symbols: | for sym in symbols: | ||||
FIRST[sym]={sym} if is_terminal(sym) else set() | |||||
FIRST[sym]={sym} if sym.is_term else set() | |||||
FOLLOW[sym]=set() | FOLLOW[sym]=set() | ||||
# Calculate NULLABLE and FIRST | # Calculate NULLABLE and FIRST | ||||
@@ -108,16 +108,16 @@ class GrammarAnalyzer(object): | |||||
def __init__(self, parser_conf, debug=False): | def __init__(self, parser_conf, debug=False): | ||||
self.debug = debug | self.debug = debug | ||||
rules = parser_conf.rules + [Rule('$root', [parser_conf.start, '$END'])] | |||||
rules = parser_conf.rules + [Rule(NonTerminal('$root'), [NonTerminal(parser_conf.start), Terminal('$END')])] | |||||
self.rules_by_origin = classify(rules, lambda r: r.origin) | self.rules_by_origin = classify(rules, lambda r: r.origin) | ||||
assert len(rules) == len(set(rules)) | assert len(rules) == len(set(rules)) | ||||
for r in rules: | for r in rules: | ||||
for sym in r.expansion: | for sym in r.expansion: | ||||
if not (is_terminal(sym) or sym in self.rules_by_origin): | |||||
if not (sym.is_term or sym in self.rules_by_origin): | |||||
raise GrammarError("Using an undefined rule: %s" % sym) # TODO test validation | raise GrammarError("Using an undefined rule: %s" % sym) # TODO test validation | ||||
self.start_state = self.expand_rule('$root') | |||||
self.start_state = self.expand_rule(NonTerminal('$root')) | |||||
self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) | self.FIRST, self.FOLLOW, self.NULLABLE = calculate_sets(rules) | ||||
@@ -125,7 +125,7 @@ class GrammarAnalyzer(object): | |||||
"Returns all init_ptrs accessible by rule (recursive)" | "Returns all init_ptrs accessible by rule (recursive)" | ||||
init_ptrs = set() | init_ptrs = set() | ||||
def _expand_rule(rule): | def _expand_rule(rule): | ||||
assert not is_terminal(rule), rule | |||||
assert not rule.is_term, rule | |||||
for r in self.rules_by_origin[rule]: | for r in self.rules_by_origin[rule]: | ||||
init_ptr = RulePtr(r, 0) | init_ptr = RulePtr(r, 0) | ||||
@@ -133,7 +133,7 @@ class GrammarAnalyzer(object): | |||||
if r.expansion: # if not empty rule | if r.expansion: # if not empty rule | ||||
new_r = init_ptr.next | new_r = init_ptr.next | ||||
if not is_terminal(new_r): | |||||
if not new_r.is_term: | |||||
yield new_r | yield new_r | ||||
for _ in bfs([rule], _expand_rule): | for _ in bfs([rule], _expand_rule): | ||||
@@ -142,8 +142,8 @@ class GrammarAnalyzer(object): | |||||
return fzset(init_ptrs) | return fzset(init_ptrs) | ||||
def _first(self, r): | def _first(self, r): | ||||
if is_terminal(r): | |||||
if r.is_term: | |||||
return {r} | return {r} | ||||
else: | else: | ||||
return {rp.next for rp in self.expand_rule(r) if is_terminal(rp.next)} | |||||
return {rp.next for rp in self.expand_rule(r) if rp.next.is_term} | |||||
@@ -10,9 +10,9 @@ import logging | |||||
from collections import defaultdict | from collections import defaultdict | ||||
from ..utils import classify, classify_bool, bfs, fzset | from ..utils import classify, classify_bool, bfs, fzset | ||||
from ..common import GrammarError, is_terminal | |||||
from ..common import GrammarError | |||||
from .grammar_analysis import GrammarAnalyzer | |||||
from .grammar_analysis import GrammarAnalyzer, Terminal | |||||
class Action: | class Action: | ||||
def __init__(self, name): | def __init__(self, name): | ||||
@@ -70,12 +70,12 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
rps = {rp.advance(sym) for rp in rps} | rps = {rp.advance(sym) for rp in rps} | ||||
for rp in set(rps): | for rp in set(rps): | ||||
if not rp.is_satisfied and not is_terminal(rp.next): | |||||
if not rp.is_satisfied and not rp.next.is_term: | |||||
rps |= self.expand_rule(rp.next) | rps |= self.expand_rule(rp.next) | ||||
new_state = fzset(rps) | new_state = fzset(rps) | ||||
lookahead[sym].append((Shift, new_state)) | lookahead[sym].append((Shift, new_state)) | ||||
if sym == '$END': | |||||
if sym == Terminal('$END'): | |||||
self.end_states.append( new_state ) | self.end_states.append( new_state ) | ||||
yield new_state | yield new_state | ||||
@@ -93,7 +93,7 @@ class LALR_Analyzer(GrammarAnalyzer): | |||||
if not len(v) == 1: | if not len(v) == 1: | ||||
raise GrammarError("Collision in %s: %s" %(k, ', '.join(['\n * %s: %s' % x for x in v]))) | raise GrammarError("Collision in %s: %s" %(k, ', '.join(['\n * %s: %s' % x for x in v]))) | ||||
self.states[state] = {k:v[0] for k, v in lookahead.items()} | |||||
self.states[state] = {k.name:v[0] for k, v in lookahead.items()} | |||||
for _ in bfs([self.start_state], step): | for _ in bfs([self.start_state], step): | ||||
pass | pass | ||||
@@ -59,7 +59,7 @@ class _Parser: | |||||
value = self.callbacks[rule](s) | value = self.callbacks[rule](s) | ||||
_action, new_state = get_action(rule.origin) | |||||
_action, new_state = get_action(rule.origin.name) | |||||
assert _action is Shift | assert _action is Shift | ||||
state_stack.append(new_state) | state_stack.append(new_state) | ||||
value_stack.append(value) | value_stack.append(value) | ||||