| @@ -7,10 +7,16 @@ from .common import is_terminal, GrammarError, ParserConf | |||
| from .parsers import lalr_parser, earley, xearley, resolve_ambig | |||
| class WithLexer: | |||
| def __init__(self, lexer_conf): | |||
| def init_traditional_lexer(self, lexer_conf): | |||
| self.lexer_conf = lexer_conf | |||
| self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore) | |||
| def init_contextual_lexer(self, lexer_conf, parser_conf): | |||
| self.lexer_conf = lexer_conf | |||
| d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()} | |||
| always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else () | |||
| self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept) | |||
| def lex(self, text): | |||
| stream = self.lexer.lex(text) | |||
| if self.lexer_conf.postlex: | |||
| @@ -21,32 +27,22 @@ class WithLexer: | |||
| class LALR(WithLexer): | |||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||
| WithLexer.__init__(self, lexer_conf) | |||
| self.parser_conf = parser_conf | |||
| self.parser = lalr_parser.Parser(parser_conf) | |||
| self.init_traditional_lexer(lexer_conf) | |||
| def parse(self, text): | |||
| tokens = self.lex(text) | |||
| return self.parser.parse(tokens) | |||
| token_stream = self.lex(text) | |||
| return self.parser.parse(token_stream) | |||
| class LALR_ContextualLexer: | |||
| class LALR_ContextualLexer(WithLexer): | |||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||
| self.lexer_conf = lexer_conf | |||
| self.parser_conf = parser_conf | |||
| self.parser = lalr_parser.Parser(parser_conf) | |||
| d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()} | |||
| always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else () | |||
| self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept) | |||
| self.init_contextual_lexer(lexer_conf, parser_conf) | |||
| def parse(self, text): | |||
| tokens = self.lexer.lex(text) | |||
| if self.lexer_conf.postlex: | |||
| tokens = self.lexer_conf.postlex.process(tokens) | |||
| return self.parser.parse(tokens, self.lexer.set_parser_state) | |||
| token_stream = self.lex(text) | |||
| return self.parser.parse(token_stream, self.lexer.set_parser_state) | |||
| def get_ambiguity_resolver(options): | |||
| if not options or options.ambiguity == 'resolve': | |||
| @@ -58,24 +54,19 @@ def get_ambiguity_resolver(options): | |||
| raise ValueError(options) | |||
| def tokenize_text(text): | |||
| new_text = [] | |||
| line = 1 | |||
| col_start_pos = 0 | |||
| for i, ch in enumerate(text): | |||
| if '\n' in ch: | |||
| line += ch.count('\n') | |||
| col_start_pos = i + ch.rindex('\n') | |||
| new_text.append(Token('CHAR', ch, line=line, column=i - col_start_pos)) | |||
| return new_text | |||
| yield Token('CHAR', ch, line=line, column=i - col_start_pos) | |||
| class Earley_NoLex: | |||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||
| self._prepare_match(lexer_conf) | |||
| self.parser = earley.Parser(parser_conf.rules, | |||
| parser_conf.start, | |||
| parser_conf.callback, | |||
| self.match, | |||
| self.parser = earley.Parser(parser_conf, self.match, | |||
| resolve_ambiguity=get_ambiguity_resolver(options)) | |||
| @@ -92,17 +83,14 @@ class Earley_NoLex: | |||
| self.regexps[t.name] = re.compile(regexp) | |||
| def parse(self, text): | |||
| new_text = tokenize_text(text) | |||
| return self.parser.parse(new_text) | |||
| token_stream = tokenize_text(text) | |||
| return self.parser.parse(token_stream) | |||
| class Earley(WithLexer): | |||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||
| WithLexer.__init__(self, lexer_conf) | |||
| self.init_traditional_lexer(lexer_conf) | |||
| self.parser = earley.Parser(parser_conf.rules, | |||
| parser_conf.start, | |||
| parser_conf.callback, | |||
| self.match, | |||
| self.parser = earley.Parser(parser_conf, self.match, | |||
| resolve_ambiguity=get_ambiguity_resolver(options)) | |||
| def match(self, term, token): | |||
| @@ -119,9 +107,7 @@ class XEarley: | |||
| self._prepare_match(lexer_conf) | |||
| self.parser = xearley.Parser(parser_conf.rules, | |||
| parser_conf.start, | |||
| parser_conf.callback, | |||
| self.parser = xearley.Parser(parser_conf, | |||
| self.match, | |||
| resolve_ambiguity=get_ambiguity_resolver(options), | |||
| ignore=lexer_conf.ignore, | |||
| @@ -18,9 +18,6 @@ from ..tree import Tree, Visitor_NoRecurse, Transformer_NoRecurse | |||
| from .grammar_analysis import GrammarAnalyzer | |||
| class EndToken: | |||
| type = '$END' | |||
| class Derivation(Tree): | |||
| _hash = None | |||
| @@ -36,8 +33,6 @@ class Derivation(Tree): | |||
| self._hash = Tree.__hash__(self) | |||
| return self._hash | |||
| END_TOKEN = EndToken() | |||
| class Item(object): | |||
| "An Earley Item, the atom of the algorithm." | |||
| @@ -60,11 +55,8 @@ class Item(object): | |||
| new_tree = Derivation(self.rule, self.tree.children + [tree]) | |||
| return self.__class__(self.rule, self.ptr+1, self.start, new_tree) | |||
| def similar(self, other): | |||
| return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule | |||
| def __eq__(self, other): | |||
| return self.similar(other) #and (self.tree == other.tree) | |||
| return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule | |||
| def __hash__(self): | |||
| return hash((self.rule, self.ptr, id(self.start))) # Always runs Derivation.__hash__ | |||
| @@ -152,27 +144,24 @@ class Column: | |||
| __nonzero__ = __bool__ # Py2 backwards-compatibility | |||
| class Parser: | |||
| def __init__(self, rules, start_symbol, callback, term_matcher, resolve_ambiguity=None): | |||
| self.analysis = GrammarAnalyzer(rules, start_symbol) | |||
| self.start_symbol = start_symbol | |||
| def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None): | |||
| self.analysis = GrammarAnalyzer(parser_conf) | |||
| self.parser_conf = parser_conf | |||
| self.resolve_ambiguity = resolve_ambiguity | |||
| self.FIRST = self.analysis.FIRST | |||
| self.postprocess = {} | |||
| self.predictions = {} | |||
| self.FIRST = {} | |||
| for rule in self.analysis.rules: | |||
| a = rule.alias | |||
| self.postprocess[rule] = a if callable(a) else (a and getattr(callback, a)) | |||
| for rule in parser_conf.rules: | |||
| self.postprocess[rule] = getattr(parser_conf.callback, rule.alias) | |||
| self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)] | |||
| self.FIRST[rule.origin] = self.analysis.FIRST[rule.origin] | |||
| self.term_matcher = term_matcher | |||
| def parse(self, stream, start_symbol=None): | |||
| # Define parser functions | |||
| start_symbol = start_symbol or self.start_symbol | |||
| start_symbol = start_symbol or self.parser_conf.start | |||
| _Item = Item | |||
| match = self.term_matcher | |||
| @@ -198,9 +187,8 @@ class Parser: | |||
| for item in to_reduce: | |||
| new_items = list(complete(item)) | |||
| for new_item in new_items: | |||
| if new_item.similar(item): | |||
| raise ParseError('Infinite recursion detected! (rule %s)' % new_item.rule) | |||
| if item in new_items: | |||
| raise ParseError('Infinite recursion detected! (rule %s)' % item.rule) | |||
| column.add(new_items) | |||
| def scan(i, token, column): | |||
| @@ -252,24 +240,3 @@ class ApplyCallbacks(Transformer_NoRecurse): | |||
| return callback(children) | |||
| else: | |||
| return Tree(rule.origin, children) | |||
| # RULES = [ | |||
| # ('a', ['d']), | |||
| # ('d', ['b']), | |||
| # ('b', ['C']), | |||
| # ('b', ['b', 'C']), | |||
| # ('b', ['C', 'b']), | |||
| # ] | |||
| # p = Parser(RULES, 'a') | |||
| # for x in p.parse('CC'): | |||
| # print x.pretty() | |||
| #--------------- | |||
| # RULES = [ | |||
| # ('s', ['a', 'a']), | |||
| # ('a', ['b', 'b']), | |||
| # ('b', ['C'], lambda (x,): x), | |||
| # ('b', ['b', 'C']), | |||
| # ] | |||
| # p = Parser(RULES, 's', {}) | |||
| # print p.parse('CCCCC').pretty() | |||
| @@ -94,13 +94,14 @@ def calculate_sets(rules): | |||
| class GrammarAnalyzer(object): | |||
| def __init__(self, rules, start_symbol, debug=False): | |||
| def __init__(self, parser_conf, debug=False): | |||
| rules = parser_conf.rules | |||
| assert len(rules) == len(set(rules)) | |||
| self.start_symbol = start_symbol | |||
| self.start_symbol = parser_conf.start | |||
| self.debug = debug | |||
| root_rule = Rule('$root', [start_symbol, '$END']) | |||
| root_rule = Rule('$root', [self.start_symbol, '$END']) | |||
| self.rules_by_origin = {r.origin: [] for r in rules} | |||
| for r in rules: | |||
| @@ -15,7 +15,7 @@ class Parser: | |||
| def __init__(self, parser_conf): | |||
| assert all(r.options is None or r.options.priority is None | |||
| for r in parser_conf.rules), "LALR doesn't yet support prioritization" | |||
| self.analysis = analysis = LALR_Analyzer(parser_conf.rules, parser_conf.start) | |||
| self.analysis = analysis = LALR_Analyzer(parser_conf) | |||
| analysis.compute_lookahead() | |||
| callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) | |||
| for rule in analysis.rules} | |||
| @@ -28,31 +28,26 @@ from .grammar_analysis import GrammarAnalyzer | |||
| from .earley import ApplyCallbacks, Item, Column | |||
| class Parser: | |||
| def __init__(self, rules, start_symbol, callback, term_matcher, resolve_ambiguity=None, ignore=(), predict_all=False): | |||
| self.analysis = GrammarAnalyzer(rules, start_symbol) | |||
| self.start_symbol = start_symbol | |||
| def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None, ignore=(), predict_all=False): | |||
| self.analysis = GrammarAnalyzer(parser_conf) | |||
| self.parser_conf = parser_conf | |||
| self.resolve_ambiguity = resolve_ambiguity | |||
| self.ignore = list(ignore) | |||
| self.predict_all = predict_all | |||
| self.FIRST = self.analysis.FIRST | |||
| self.postprocess = {} | |||
| self.predictions = {} | |||
| self.FIRST = {} | |||
| for rule in self.analysis.rules: | |||
| a = rule.alias | |||
| self.postprocess[rule] = a if callable(a) else (a and getattr(callback, a)) | |||
| for rule in parser_conf.rules: | |||
| self.postprocess[rule] = getattr(parser_conf.callback, rule.alias) | |||
| self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)] | |||
| self.FIRST[rule.origin] = self.analysis.FIRST[rule.origin] | |||
| self.term_matcher = term_matcher | |||
| def parse(self, stream, start_symbol=None): | |||
| # Define parser functions | |||
| start_symbol = start_symbol or self.start_symbol | |||
| start_symbol = start_symbol or self.parser_conf.start | |||
| delayed_matches = defaultdict(list) | |||
| match = self.term_matcher | |||
| @@ -79,9 +74,8 @@ class Parser: | |||
| column.add( predict(nonterm, column) ) | |||
| for item in to_reduce: | |||
| new_items = list(complete(item)) | |||
| for new_item in new_items: | |||
| if new_item.similar(item): | |||
| raise ParseError('Infinite recursion detected! (rule %s)' % new_item.rule) | |||
| if item in new_items: | |||
| raise ParseError('Infinite recursion detected! (rule %s)' % item.rule) | |||
| column.add(new_items) | |||
| def scan(i, token, column): | |||