| @@ -41,11 +41,11 @@ class UnexpectedToken(ParseError): | |||
| class LexerConf: | |||
| def __init__(self, tokens, ignore=(), postlex=None, callbacks={}): | |||
| def __init__(self, tokens, ignore=(), postlex=None, callbacks=None): | |||
| self.tokens = tokens | |||
| self.ignore = ignore | |||
| self.postlex = postlex | |||
| self.callbacks = callbacks | |||
| self.callbacks = callbacks or {} | |||
| class ParserConf: | |||
| def __init__(self, rules, callback, start): | |||
| @@ -139,36 +139,36 @@ class XEarley: | |||
| class CYK(WithLexer): | |||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||
| self.init_traditional_lexer(lexer_conf) | |||
| self._analysis = GrammarAnalyzer(parser_conf) | |||
| self._parser = cyk.Parser(parser_conf.rules, parser_conf.start) | |||
| self._postprocess = {} | |||
| for rule in parser_conf.rules: | |||
| a = rule.alias | |||
| self._postprocess[a] = a if callable(a) else (a and getattr(parser_conf.callback, a)) | |||
| def parse(self, text): | |||
| tokens = list(self.lex(text)) | |||
| parse = self._parser.parse(tokens) | |||
| parse = self._transform(parse) | |||
| return parse | |||
| def _transform(self, tree): | |||
| subtrees = list(tree.iter_subtrees()) | |||
| for subtree in subtrees: | |||
| subtree.children = [self._apply_callback(c) if isinstance(c, Tree) else c for c in subtree.children] | |||
| return self._apply_callback(tree) | |||
| def _apply_callback(self, tree): | |||
| children = tree.children | |||
| callback = self._postprocess[tree.rule.alias] | |||
| assert callback, tree.rule.alias | |||
| r = callback(children) | |||
| return r | |||
| def __init__(self, lexer_conf, parser_conf, options=None): | |||
| self.init_traditional_lexer(lexer_conf) | |||
| self._analysis = GrammarAnalyzer(parser_conf) | |||
| self._parser = cyk.Parser(parser_conf.rules, parser_conf.start) | |||
| self._postprocess = {} | |||
| for rule in parser_conf.rules: | |||
| a = rule.alias | |||
| self._postprocess[a] = a if callable(a) else (a and getattr(parser_conf.callback, a)) | |||
| def parse(self, text): | |||
| tokens = list(self.lex(text)) | |||
| parse = self._parser.parse(tokens) | |||
| parse = self._transform(parse) | |||
| return parse | |||
| def _transform(self, tree): | |||
| subtrees = list(tree.iter_subtrees()) | |||
| for subtree in subtrees: | |||
| subtree.children = [self._apply_callback(c) if isinstance(c, Tree) else c for c in subtree.children] | |||
| return self._apply_callback(tree) | |||
| def _apply_callback(self, tree): | |||
| children = tree.children | |||
| callback = self._postprocess[tree.rule.alias] | |||
| assert callback, tree.rule.alias | |||
| r = callback(children) | |||
| return r | |||
| def get_frontend(parser, lexer): | |||
| @@ -139,7 +139,7 @@ class Parser(object): | |||
| """Converts a RuleNode parse tree to a lark Tree.""" | |||
| orig_rule = self.orig_rules[rule_node.rule.alias] | |||
| children = [] | |||
| for i, child in enumerate(rule_node.children): | |||
| for child in rule_node.children: | |||
| if isinstance(child, RuleNode): | |||
| children.append(self._to_tree(child)) | |||
| else: | |||
| @@ -4,7 +4,7 @@ | |||
| # When the parse ends successfully, a disambiguation stage resolves all ambiguity | |||
| # (right now ambiguity resolution is not developed beyond the needs of lark) | |||
| # Afterwards the parse tree is reduced (transformed) according to user callbacks. | |||
| # I use the no-recursion version of Transformer and Visitor, because the tree might be | |||
| # I use the no-recursion version of Transformer, because the tree might be | |||
| # deeper than Python's recursion limit (a bit absurd, but that's life) | |||
| # | |||
| # The algorithm keeps track of each state set, using a corresponding Column instance. | |||
| @@ -14,7 +14,7 @@ | |||
| # Email : erezshin@gmail.com | |||
| from ..common import ParseError, UnexpectedToken, is_terminal | |||
| from ..tree import Tree, Visitor_NoRecurse, Transformer_NoRecurse | |||
| from ..tree import Tree, Transformer_NoRecurse | |||
| from .grammar_analysis import GrammarAnalyzer | |||
| @@ -234,9 +234,4 @@ class ApplyCallbacks(Transformer_NoRecurse): | |||
| self.postprocess = postprocess | |||
| def drv(self, tree): | |||
| children = tree.children | |||
| callback = self.postprocess[tree.rule] | |||
| if callback: | |||
| return callback(children) | |||
| else: | |||
| return Tree(rule.origin, children) | |||
| return self.postprocess[tree.rule](tree.children) | |||
| @@ -20,7 +20,7 @@ | |||
| from collections import defaultdict | |||
| from ..common import ParseError, UnexpectedToken, is_terminal | |||
| from ..common import ParseError, is_terminal | |||
| from ..lexer import Token, UnexpectedInput | |||
| from ..tree import Tree | |||
| from .grammar_analysis import GrammarAnalyzer | |||
| @@ -78,7 +78,7 @@ class Parser: | |||
| raise ParseError('Infinite recursion detected! (rule %s)' % item.rule) | |||
| column.add(new_items) | |||
| def scan(i, token, column): | |||
| def scan(i, column): | |||
| to_scan = column.to_scan | |||
| for x in self.ignore: | |||
| @@ -123,7 +123,7 @@ class Parser: | |||
| column = column0 | |||
| for i, token in enumerate(stream): | |||
| predict_and_complete(column) | |||
| column = scan(i, token, column) | |||
| column = scan(i, column) | |||
| if token == '\n': | |||
| text_line += 1 | |||
| @@ -5,7 +5,7 @@ import sys | |||
| import codecs | |||
| from lark import Lark, InlineTransformer, Transformer | |||
| from lark import Lark, InlineTransformer | |||
| nearley_grammar = r""" | |||
| start: (ruledef|directive)+ | |||
| @@ -172,7 +172,7 @@ def create_code_for_nearley_grammar(g, start, builtin_path, folder_path): | |||
| def main(fn, start, nearley_lib): | |||
| with codecs.open(fn, encoding='utf8') as f: | |||
| grammar = f.read() | |||
| return (create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)))) | |||
| return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn))) | |||
| if __name__ == '__main__': | |||