@@ -7,10 +7,16 @@ from .common import is_terminal, GrammarError, ParserConf | |||||
from .parsers import lalr_parser, earley, xearley, resolve_ambig | from .parsers import lalr_parser, earley, xearley, resolve_ambig | ||||
class WithLexer: | class WithLexer: | ||||
def __init__(self, lexer_conf): | |||||
def init_traditional_lexer(self, lexer_conf): | |||||
self.lexer_conf = lexer_conf | self.lexer_conf = lexer_conf | ||||
self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore) | self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore) | ||||
def init_contextual_lexer(self, lexer_conf, parser_conf): | |||||
self.lexer_conf = lexer_conf | |||||
d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()} | |||||
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else () | |||||
self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept) | |||||
def lex(self, text): | def lex(self, text): | ||||
stream = self.lexer.lex(text) | stream = self.lexer.lex(text) | ||||
if self.lexer_conf.postlex: | if self.lexer_conf.postlex: | ||||
@@ -21,32 +27,22 @@ class WithLexer: | |||||
class LALR(WithLexer): | class LALR(WithLexer): | ||||
def __init__(self, lexer_conf, parser_conf, options=None): | def __init__(self, lexer_conf, parser_conf, options=None): | ||||
WithLexer.__init__(self, lexer_conf) | |||||
self.parser_conf = parser_conf | |||||
self.parser = lalr_parser.Parser(parser_conf) | self.parser = lalr_parser.Parser(parser_conf) | ||||
self.init_traditional_lexer(lexer_conf) | |||||
def parse(self, text): | def parse(self, text): | ||||
tokens = self.lex(text) | |||||
return self.parser.parse(tokens) | |||||
token_stream = self.lex(text) | |||||
return self.parser.parse(token_stream) | |||||
class LALR_ContextualLexer: | |||||
class LALR_ContextualLexer(WithLexer): | |||||
def __init__(self, lexer_conf, parser_conf, options=None): | def __init__(self, lexer_conf, parser_conf, options=None): | ||||
self.lexer_conf = lexer_conf | |||||
self.parser_conf = parser_conf | |||||
self.parser = lalr_parser.Parser(parser_conf) | self.parser = lalr_parser.Parser(parser_conf) | ||||
d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()} | |||||
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else () | |||||
self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept) | |||||
self.init_contextual_lexer(lexer_conf, parser_conf) | |||||
def parse(self, text): | def parse(self, text): | ||||
tokens = self.lexer.lex(text) | |||||
if self.lexer_conf.postlex: | |||||
tokens = self.lexer_conf.postlex.process(tokens) | |||||
return self.parser.parse(tokens, self.lexer.set_parser_state) | |||||
token_stream = self.lex(text) | |||||
return self.parser.parse(token_stream, self.lexer.set_parser_state) | |||||
def get_ambiguity_resolver(options): | def get_ambiguity_resolver(options): | ||||
if not options or options.ambiguity == 'resolve': | if not options or options.ambiguity == 'resolve': | ||||
@@ -58,24 +54,19 @@ def get_ambiguity_resolver(options): | |||||
raise ValueError(options) | raise ValueError(options) | ||||
def tokenize_text(text): | def tokenize_text(text): | ||||
new_text = [] | |||||
line = 1 | line = 1 | ||||
col_start_pos = 0 | col_start_pos = 0 | ||||
for i, ch in enumerate(text): | for i, ch in enumerate(text): | ||||
if '\n' in ch: | if '\n' in ch: | ||||
line += ch.count('\n') | line += ch.count('\n') | ||||
col_start_pos = i + ch.rindex('\n') | col_start_pos = i + ch.rindex('\n') | ||||
new_text.append(Token('CHAR', ch, line=line, column=i - col_start_pos)) | |||||
return new_text | |||||
yield Token('CHAR', ch, line=line, column=i - col_start_pos) | |||||
class Earley_NoLex: | class Earley_NoLex: | ||||
def __init__(self, lexer_conf, parser_conf, options=None): | def __init__(self, lexer_conf, parser_conf, options=None): | ||||
self._prepare_match(lexer_conf) | self._prepare_match(lexer_conf) | ||||
self.parser = earley.Parser(parser_conf.rules, | |||||
parser_conf.start, | |||||
parser_conf.callback, | |||||
self.match, | |||||
self.parser = earley.Parser(parser_conf, self.match, | |||||
resolve_ambiguity=get_ambiguity_resolver(options)) | resolve_ambiguity=get_ambiguity_resolver(options)) | ||||
@@ -92,17 +83,14 @@ class Earley_NoLex: | |||||
self.regexps[t.name] = re.compile(regexp) | self.regexps[t.name] = re.compile(regexp) | ||||
def parse(self, text): | def parse(self, text): | ||||
new_text = tokenize_text(text) | |||||
return self.parser.parse(new_text) | |||||
token_stream = tokenize_text(text) | |||||
return self.parser.parse(token_stream) | |||||
class Earley(WithLexer): | class Earley(WithLexer): | ||||
def __init__(self, lexer_conf, parser_conf, options=None): | def __init__(self, lexer_conf, parser_conf, options=None): | ||||
WithLexer.__init__(self, lexer_conf) | |||||
self.init_traditional_lexer(lexer_conf) | |||||
self.parser = earley.Parser(parser_conf.rules, | |||||
parser_conf.start, | |||||
parser_conf.callback, | |||||
self.match, | |||||
self.parser = earley.Parser(parser_conf, self.match, | |||||
resolve_ambiguity=get_ambiguity_resolver(options)) | resolve_ambiguity=get_ambiguity_resolver(options)) | ||||
def match(self, term, token): | def match(self, term, token): | ||||
@@ -119,9 +107,7 @@ class XEarley: | |||||
self._prepare_match(lexer_conf) | self._prepare_match(lexer_conf) | ||||
self.parser = xearley.Parser(parser_conf.rules, | |||||
parser_conf.start, | |||||
parser_conf.callback, | |||||
self.parser = xearley.Parser(parser_conf, | |||||
self.match, | self.match, | ||||
resolve_ambiguity=get_ambiguity_resolver(options), | resolve_ambiguity=get_ambiguity_resolver(options), | ||||
ignore=lexer_conf.ignore, | ignore=lexer_conf.ignore, | ||||
@@ -18,9 +18,6 @@ from ..tree import Tree, Visitor_NoRecurse, Transformer_NoRecurse | |||||
from .grammar_analysis import GrammarAnalyzer | from .grammar_analysis import GrammarAnalyzer | ||||
class EndToken: | |||||
type = '$END' | |||||
class Derivation(Tree): | class Derivation(Tree): | ||||
_hash = None | _hash = None | ||||
@@ -36,8 +33,6 @@ class Derivation(Tree): | |||||
self._hash = Tree.__hash__(self) | self._hash = Tree.__hash__(self) | ||||
return self._hash | return self._hash | ||||
END_TOKEN = EndToken() | |||||
class Item(object): | class Item(object): | ||||
"An Earley Item, the atom of the algorithm." | "An Earley Item, the atom of the algorithm." | ||||
@@ -60,11 +55,8 @@ class Item(object): | |||||
new_tree = Derivation(self.rule, self.tree.children + [tree]) | new_tree = Derivation(self.rule, self.tree.children + [tree]) | ||||
return self.__class__(self.rule, self.ptr+1, self.start, new_tree) | return self.__class__(self.rule, self.ptr+1, self.start, new_tree) | ||||
def similar(self, other): | |||||
return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule | |||||
def __eq__(self, other): | def __eq__(self, other): | ||||
return self.similar(other) #and (self.tree == other.tree) | |||||
return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule | |||||
def __hash__(self): | def __hash__(self): | ||||
return hash((self.rule, self.ptr, id(self.start))) # Always runs Derivation.__hash__ | return hash((self.rule, self.ptr, id(self.start))) # Always runs Derivation.__hash__ | ||||
@@ -152,27 +144,24 @@ class Column: | |||||
__nonzero__ = __bool__ # Py2 backwards-compatibility | __nonzero__ = __bool__ # Py2 backwards-compatibility | ||||
class Parser: | class Parser: | ||||
def __init__(self, rules, start_symbol, callback, term_matcher, resolve_ambiguity=None): | |||||
self.analysis = GrammarAnalyzer(rules, start_symbol) | |||||
self.start_symbol = start_symbol | |||||
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None): | |||||
self.analysis = GrammarAnalyzer(parser_conf) | |||||
self.parser_conf = parser_conf | |||||
self.resolve_ambiguity = resolve_ambiguity | self.resolve_ambiguity = resolve_ambiguity | ||||
self.FIRST = self.analysis.FIRST | |||||
self.postprocess = {} | self.postprocess = {} | ||||
self.predictions = {} | self.predictions = {} | ||||
self.FIRST = {} | |||||
for rule in self.analysis.rules: | |||||
a = rule.alias | |||||
self.postprocess[rule] = a if callable(a) else (a and getattr(callback, a)) | |||||
for rule in parser_conf.rules: | |||||
self.postprocess[rule] = getattr(parser_conf.callback, rule.alias) | |||||
self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)] | self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)] | ||||
self.FIRST[rule.origin] = self.analysis.FIRST[rule.origin] | |||||
self.term_matcher = term_matcher | self.term_matcher = term_matcher | ||||
def parse(self, stream, start_symbol=None): | def parse(self, stream, start_symbol=None): | ||||
# Define parser functions | # Define parser functions | ||||
start_symbol = start_symbol or self.start_symbol | |||||
start_symbol = start_symbol or self.parser_conf.start | |||||
_Item = Item | _Item = Item | ||||
match = self.term_matcher | match = self.term_matcher | ||||
@@ -198,9 +187,8 @@ class Parser: | |||||
for item in to_reduce: | for item in to_reduce: | ||||
new_items = list(complete(item)) | new_items = list(complete(item)) | ||||
for new_item in new_items: | |||||
if new_item.similar(item): | |||||
raise ParseError('Infinite recursion detected! (rule %s)' % new_item.rule) | |||||
if item in new_items: | |||||
raise ParseError('Infinite recursion detected! (rule %s)' % item.rule) | |||||
column.add(new_items) | column.add(new_items) | ||||
def scan(i, token, column): | def scan(i, token, column): | ||||
@@ -252,24 +240,3 @@ class ApplyCallbacks(Transformer_NoRecurse): | |||||
return callback(children) | return callback(children) | ||||
else: | else: | ||||
return Tree(rule.origin, children) | return Tree(rule.origin, children) | ||||
# RULES = [ | |||||
# ('a', ['d']), | |||||
# ('d', ['b']), | |||||
# ('b', ['C']), | |||||
# ('b', ['b', 'C']), | |||||
# ('b', ['C', 'b']), | |||||
# ] | |||||
# p = Parser(RULES, 'a') | |||||
# for x in p.parse('CC'): | |||||
# print x.pretty() | |||||
#--------------- | |||||
# RULES = [ | |||||
# ('s', ['a', 'a']), | |||||
# ('a', ['b', 'b']), | |||||
# ('b', ['C'], lambda (x,): x), | |||||
# ('b', ['b', 'C']), | |||||
# ] | |||||
# p = Parser(RULES, 's', {}) | |||||
# print p.parse('CCCCC').pretty() |
@@ -94,13 +94,14 @@ def calculate_sets(rules): | |||||
class GrammarAnalyzer(object): | class GrammarAnalyzer(object): | ||||
def __init__(self, rules, start_symbol, debug=False): | |||||
def __init__(self, parser_conf, debug=False): | |||||
rules = parser_conf.rules | |||||
assert len(rules) == len(set(rules)) | assert len(rules) == len(set(rules)) | ||||
self.start_symbol = start_symbol | |||||
self.start_symbol = parser_conf.start | |||||
self.debug = debug | self.debug = debug | ||||
root_rule = Rule('$root', [start_symbol, '$END']) | |||||
root_rule = Rule('$root', [self.start_symbol, '$END']) | |||||
self.rules_by_origin = {r.origin: [] for r in rules} | self.rules_by_origin = {r.origin: [] for r in rules} | ||||
for r in rules: | for r in rules: | ||||
@@ -15,7 +15,7 @@ class Parser: | |||||
def __init__(self, parser_conf): | def __init__(self, parser_conf): | ||||
assert all(r.options is None or r.options.priority is None | assert all(r.options is None or r.options.priority is None | ||||
for r in parser_conf.rules), "LALR doesn't yet support prioritization" | for r in parser_conf.rules), "LALR doesn't yet support prioritization" | ||||
self.analysis = analysis = LALR_Analyzer(parser_conf.rules, parser_conf.start) | |||||
self.analysis = analysis = LALR_Analyzer(parser_conf) | |||||
analysis.compute_lookahead() | analysis.compute_lookahead() | ||||
callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) | callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) | ||||
for rule in analysis.rules} | for rule in analysis.rules} | ||||
@@ -28,31 +28,26 @@ from .grammar_analysis import GrammarAnalyzer | |||||
from .earley import ApplyCallbacks, Item, Column | from .earley import ApplyCallbacks, Item, Column | ||||
class Parser: | class Parser: | ||||
def __init__(self, rules, start_symbol, callback, term_matcher, resolve_ambiguity=None, ignore=(), predict_all=False): | |||||
self.analysis = GrammarAnalyzer(rules, start_symbol) | |||||
self.start_symbol = start_symbol | |||||
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None, ignore=(), predict_all=False): | |||||
self.analysis = GrammarAnalyzer(parser_conf) | |||||
self.parser_conf = parser_conf | |||||
self.resolve_ambiguity = resolve_ambiguity | self.resolve_ambiguity = resolve_ambiguity | ||||
self.ignore = list(ignore) | self.ignore = list(ignore) | ||||
self.predict_all = predict_all | self.predict_all = predict_all | ||||
self.FIRST = self.analysis.FIRST | |||||
self.postprocess = {} | self.postprocess = {} | ||||
self.predictions = {} | self.predictions = {} | ||||
self.FIRST = {} | |||||
for rule in self.analysis.rules: | |||||
a = rule.alias | |||||
self.postprocess[rule] = a if callable(a) else (a and getattr(callback, a)) | |||||
for rule in parser_conf.rules: | |||||
self.postprocess[rule] = getattr(parser_conf.callback, rule.alias) | |||||
self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)] | self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)] | ||||
self.FIRST[rule.origin] = self.analysis.FIRST[rule.origin] | |||||
self.term_matcher = term_matcher | self.term_matcher = term_matcher | ||||
def parse(self, stream, start_symbol=None): | def parse(self, stream, start_symbol=None): | ||||
# Define parser functions | # Define parser functions | ||||
start_symbol = start_symbol or self.start_symbol | |||||
start_symbol = start_symbol or self.parser_conf.start | |||||
delayed_matches = defaultdict(list) | delayed_matches = defaultdict(list) | ||||
match = self.term_matcher | match = self.term_matcher | ||||
@@ -79,9 +74,8 @@ class Parser: | |||||
column.add( predict(nonterm, column) ) | column.add( predict(nonterm, column) ) | ||||
for item in to_reduce: | for item in to_reduce: | ||||
new_items = list(complete(item)) | new_items = list(complete(item)) | ||||
for new_item in new_items: | |||||
if new_item.similar(item): | |||||
raise ParseError('Infinite recursion detected! (rule %s)' % new_item.rule) | |||||
if item in new_items: | |||||
raise ParseError('Infinite recursion detected! (rule %s)' % item.rule) | |||||
column.add(new_items) | column.add(new_items) | ||||
def scan(i, token, column): | def scan(i, token, column): | ||||