@@ -7,10 +7,16 @@ from .common import is_terminal, GrammarError, ParserConf | |||
from .parsers import lalr_parser, earley, xearley, resolve_ambig | |||
class WithLexer: | |||
def __init__(self, lexer_conf): | |||
def init_traditional_lexer(self, lexer_conf): | |||
self.lexer_conf = lexer_conf | |||
self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore) | |||
def init_contextual_lexer(self, lexer_conf, parser_conf): | |||
self.lexer_conf = lexer_conf | |||
d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()} | |||
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else () | |||
self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept) | |||
def lex(self, text): | |||
stream = self.lexer.lex(text) | |||
if self.lexer_conf.postlex: | |||
@@ -21,32 +27,22 @@ class WithLexer: | |||
class LALR(WithLexer): | |||
def __init__(self, lexer_conf, parser_conf, options=None): | |||
WithLexer.__init__(self, lexer_conf) | |||
self.parser_conf = parser_conf | |||
self.parser = lalr_parser.Parser(parser_conf) | |||
self.init_traditional_lexer(lexer_conf) | |||
def parse(self, text): | |||
tokens = self.lex(text) | |||
return self.parser.parse(tokens) | |||
token_stream = self.lex(text) | |||
return self.parser.parse(token_stream) | |||
class LALR_ContextualLexer: | |||
class LALR_ContextualLexer(WithLexer): | |||
def __init__(self, lexer_conf, parser_conf, options=None): | |||
self.lexer_conf = lexer_conf | |||
self.parser_conf = parser_conf | |||
self.parser = lalr_parser.Parser(parser_conf) | |||
d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()} | |||
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else () | |||
self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept) | |||
self.init_contextual_lexer(lexer_conf, parser_conf) | |||
def parse(self, text): | |||
tokens = self.lexer.lex(text) | |||
if self.lexer_conf.postlex: | |||
tokens = self.lexer_conf.postlex.process(tokens) | |||
return self.parser.parse(tokens, self.lexer.set_parser_state) | |||
token_stream = self.lex(text) | |||
return self.parser.parse(token_stream, self.lexer.set_parser_state) | |||
def get_ambiguity_resolver(options): | |||
if not options or options.ambiguity == 'resolve': | |||
@@ -58,24 +54,19 @@ def get_ambiguity_resolver(options): | |||
raise ValueError(options) | |||
def tokenize_text(text): | |||
new_text = [] | |||
line = 1 | |||
col_start_pos = 0 | |||
for i, ch in enumerate(text): | |||
if '\n' in ch: | |||
line += ch.count('\n') | |||
col_start_pos = i + ch.rindex('\n') | |||
new_text.append(Token('CHAR', ch, line=line, column=i - col_start_pos)) | |||
return new_text | |||
yield Token('CHAR', ch, line=line, column=i - col_start_pos) | |||
class Earley_NoLex: | |||
def __init__(self, lexer_conf, parser_conf, options=None): | |||
self._prepare_match(lexer_conf) | |||
self.parser = earley.Parser(parser_conf.rules, | |||
parser_conf.start, | |||
parser_conf.callback, | |||
self.match, | |||
self.parser = earley.Parser(parser_conf, self.match, | |||
resolve_ambiguity=get_ambiguity_resolver(options)) | |||
@@ -92,17 +83,14 @@ class Earley_NoLex: | |||
self.regexps[t.name] = re.compile(regexp) | |||
def parse(self, text): | |||
new_text = tokenize_text(text) | |||
return self.parser.parse(new_text) | |||
token_stream = tokenize_text(text) | |||
return self.parser.parse(token_stream) | |||
class Earley(WithLexer): | |||
def __init__(self, lexer_conf, parser_conf, options=None): | |||
WithLexer.__init__(self, lexer_conf) | |||
self.init_traditional_lexer(lexer_conf) | |||
self.parser = earley.Parser(parser_conf.rules, | |||
parser_conf.start, | |||
parser_conf.callback, | |||
self.match, | |||
self.parser = earley.Parser(parser_conf, self.match, | |||
resolve_ambiguity=get_ambiguity_resolver(options)) | |||
def match(self, term, token): | |||
@@ -119,9 +107,7 @@ class XEarley: | |||
self._prepare_match(lexer_conf) | |||
self.parser = xearley.Parser(parser_conf.rules, | |||
parser_conf.start, | |||
parser_conf.callback, | |||
self.parser = xearley.Parser(parser_conf, | |||
self.match, | |||
resolve_ambiguity=get_ambiguity_resolver(options), | |||
ignore=lexer_conf.ignore, | |||
@@ -18,9 +18,6 @@ from ..tree import Tree, Visitor_NoRecurse, Transformer_NoRecurse | |||
from .grammar_analysis import GrammarAnalyzer | |||
class EndToken: | |||
type = '$END' | |||
class Derivation(Tree): | |||
_hash = None | |||
@@ -36,8 +33,6 @@ class Derivation(Tree): | |||
self._hash = Tree.__hash__(self) | |||
return self._hash | |||
END_TOKEN = EndToken() | |||
class Item(object): | |||
"An Earley Item, the atom of the algorithm." | |||
@@ -60,11 +55,8 @@ class Item(object): | |||
new_tree = Derivation(self.rule, self.tree.children + [tree]) | |||
return self.__class__(self.rule, self.ptr+1, self.start, new_tree) | |||
def similar(self, other): | |||
return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule | |||
def __eq__(self, other): | |||
return self.similar(other) #and (self.tree == other.tree) | |||
return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule | |||
def __hash__(self): | |||
return hash((self.rule, self.ptr, id(self.start))) # Always runs Derivation.__hash__ | |||
@@ -152,27 +144,24 @@ class Column: | |||
__nonzero__ = __bool__ # Py2 backwards-compatibility | |||
class Parser: | |||
def __init__(self, rules, start_symbol, callback, term_matcher, resolve_ambiguity=None): | |||
self.analysis = GrammarAnalyzer(rules, start_symbol) | |||
self.start_symbol = start_symbol | |||
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None): | |||
self.analysis = GrammarAnalyzer(parser_conf) | |||
self.parser_conf = parser_conf | |||
self.resolve_ambiguity = resolve_ambiguity | |||
self.FIRST = self.analysis.FIRST | |||
self.postprocess = {} | |||
self.predictions = {} | |||
self.FIRST = {} | |||
for rule in self.analysis.rules: | |||
a = rule.alias | |||
self.postprocess[rule] = a if callable(a) else (a and getattr(callback, a)) | |||
for rule in parser_conf.rules: | |||
self.postprocess[rule] = getattr(parser_conf.callback, rule.alias) | |||
self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)] | |||
self.FIRST[rule.origin] = self.analysis.FIRST[rule.origin] | |||
self.term_matcher = term_matcher | |||
def parse(self, stream, start_symbol=None): | |||
# Define parser functions | |||
start_symbol = start_symbol or self.start_symbol | |||
start_symbol = start_symbol or self.parser_conf.start | |||
_Item = Item | |||
match = self.term_matcher | |||
@@ -198,9 +187,8 @@ class Parser: | |||
for item in to_reduce: | |||
new_items = list(complete(item)) | |||
for new_item in new_items: | |||
if new_item.similar(item): | |||
raise ParseError('Infinite recursion detected! (rule %s)' % new_item.rule) | |||
if item in new_items: | |||
raise ParseError('Infinite recursion detected! (rule %s)' % item.rule) | |||
column.add(new_items) | |||
def scan(i, token, column): | |||
@@ -252,24 +240,3 @@ class ApplyCallbacks(Transformer_NoRecurse): | |||
return callback(children) | |||
else: | |||
return Tree(rule.origin, children) | |||
# RULES = [ | |||
# ('a', ['d']), | |||
# ('d', ['b']), | |||
# ('b', ['C']), | |||
# ('b', ['b', 'C']), | |||
# ('b', ['C', 'b']), | |||
# ] | |||
# p = Parser(RULES, 'a') | |||
# for x in p.parse('CC'): | |||
# print x.pretty() | |||
#--------------- | |||
# RULES = [ | |||
# ('s', ['a', 'a']), | |||
# ('a', ['b', 'b']), | |||
# ('b', ['C'], lambda (x,): x), | |||
# ('b', ['b', 'C']), | |||
# ] | |||
# p = Parser(RULES, 's', {}) | |||
# print p.parse('CCCCC').pretty() |
@@ -94,13 +94,14 @@ def calculate_sets(rules): | |||
class GrammarAnalyzer(object): | |||
def __init__(self, rules, start_symbol, debug=False): | |||
def __init__(self, parser_conf, debug=False): | |||
rules = parser_conf.rules | |||
assert len(rules) == len(set(rules)) | |||
self.start_symbol = start_symbol | |||
self.start_symbol = parser_conf.start | |||
self.debug = debug | |||
root_rule = Rule('$root', [start_symbol, '$END']) | |||
root_rule = Rule('$root', [self.start_symbol, '$END']) | |||
self.rules_by_origin = {r.origin: [] for r in rules} | |||
for r in rules: | |||
@@ -15,7 +15,7 @@ class Parser: | |||
def __init__(self, parser_conf): | |||
assert all(r.options is None or r.options.priority is None | |||
for r in parser_conf.rules), "LALR doesn't yet support prioritization" | |||
self.analysis = analysis = LALR_Analyzer(parser_conf.rules, parser_conf.start) | |||
self.analysis = analysis = LALR_Analyzer(parser_conf) | |||
analysis.compute_lookahead() | |||
callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) | |||
for rule in analysis.rules} | |||
@@ -28,31 +28,26 @@ from .grammar_analysis import GrammarAnalyzer | |||
from .earley import ApplyCallbacks, Item, Column | |||
class Parser: | |||
def __init__(self, rules, start_symbol, callback, term_matcher, resolve_ambiguity=None, ignore=(), predict_all=False): | |||
self.analysis = GrammarAnalyzer(rules, start_symbol) | |||
self.start_symbol = start_symbol | |||
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None, ignore=(), predict_all=False): | |||
self.analysis = GrammarAnalyzer(parser_conf) | |||
self.parser_conf = parser_conf | |||
self.resolve_ambiguity = resolve_ambiguity | |||
self.ignore = list(ignore) | |||
self.predict_all = predict_all | |||
self.FIRST = self.analysis.FIRST | |||
self.postprocess = {} | |||
self.predictions = {} | |||
self.FIRST = {} | |||
for rule in self.analysis.rules: | |||
a = rule.alias | |||
self.postprocess[rule] = a if callable(a) else (a and getattr(callback, a)) | |||
for rule in parser_conf.rules: | |||
self.postprocess[rule] = getattr(parser_conf.callback, rule.alias) | |||
self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)] | |||
self.FIRST[rule.origin] = self.analysis.FIRST[rule.origin] | |||
self.term_matcher = term_matcher | |||
def parse(self, stream, start_symbol=None): | |||
# Define parser functions | |||
start_symbol = start_symbol or self.start_symbol | |||
start_symbol = start_symbol or self.parser_conf.start | |||
delayed_matches = defaultdict(list) | |||
match = self.term_matcher | |||
@@ -79,9 +74,8 @@ class Parser: | |||
column.add( predict(nonterm, column) ) | |||
for item in to_reduce: | |||
new_items = list(complete(item)) | |||
for new_item in new_items: | |||
if new_item.similar(item): | |||
raise ParseError('Infinite recursion detected! (rule %s)' % new_item.rule) | |||
if item in new_items: | |||
raise ParseError('Infinite recursion detected! (rule %s)' % item.rule) | |||
column.add(new_items) | |||
def scan(i, token, column): | |||