@@ -41,11 +41,11 @@ class UnexpectedToken(ParseError): | |||||
class LexerConf: | class LexerConf: | ||||
def __init__(self, tokens, ignore=(), postlex=None, callbacks={}): | |||||
def __init__(self, tokens, ignore=(), postlex=None, callbacks=None): | |||||
self.tokens = tokens | self.tokens = tokens | ||||
self.ignore = ignore | self.ignore = ignore | ||||
self.postlex = postlex | self.postlex = postlex | ||||
self.callbacks = callbacks | |||||
self.callbacks = callbacks or {} | |||||
class ParserConf: | class ParserConf: | ||||
def __init__(self, rules, callback, start): | def __init__(self, rules, callback, start): | ||||
@@ -139,36 +139,36 @@ class XEarley: | |||||
class CYK(WithLexer): | class CYK(WithLexer): | ||||
def __init__(self, lexer_conf, parser_conf, options=None): | |||||
self.init_traditional_lexer(lexer_conf) | |||||
self._analysis = GrammarAnalyzer(parser_conf) | |||||
self._parser = cyk.Parser(parser_conf.rules, parser_conf.start) | |||||
self._postprocess = {} | |||||
for rule in parser_conf.rules: | |||||
a = rule.alias | |||||
self._postprocess[a] = a if callable(a) else (a and getattr(parser_conf.callback, a)) | |||||
def parse(self, text): | |||||
tokens = list(self.lex(text)) | |||||
parse = self._parser.parse(tokens) | |||||
parse = self._transform(parse) | |||||
return parse | |||||
def _transform(self, tree): | |||||
subtrees = list(tree.iter_subtrees()) | |||||
for subtree in subtrees: | |||||
subtree.children = [self._apply_callback(c) if isinstance(c, Tree) else c for c in subtree.children] | |||||
return self._apply_callback(tree) | |||||
def _apply_callback(self, tree): | |||||
children = tree.children | |||||
callback = self._postprocess[tree.rule.alias] | |||||
assert callback, tree.rule.alias | |||||
r = callback(children) | |||||
return r | |||||
def __init__(self, lexer_conf, parser_conf, options=None): | |||||
self.init_traditional_lexer(lexer_conf) | |||||
self._analysis = GrammarAnalyzer(parser_conf) | |||||
self._parser = cyk.Parser(parser_conf.rules, parser_conf.start) | |||||
self._postprocess = {} | |||||
for rule in parser_conf.rules: | |||||
a = rule.alias | |||||
self._postprocess[a] = a if callable(a) else (a and getattr(parser_conf.callback, a)) | |||||
def parse(self, text): | |||||
tokens = list(self.lex(text)) | |||||
parse = self._parser.parse(tokens) | |||||
parse = self._transform(parse) | |||||
return parse | |||||
def _transform(self, tree): | |||||
subtrees = list(tree.iter_subtrees()) | |||||
for subtree in subtrees: | |||||
subtree.children = [self._apply_callback(c) if isinstance(c, Tree) else c for c in subtree.children] | |||||
return self._apply_callback(tree) | |||||
def _apply_callback(self, tree): | |||||
children = tree.children | |||||
callback = self._postprocess[tree.rule.alias] | |||||
assert callback, tree.rule.alias | |||||
r = callback(children) | |||||
return r | |||||
def get_frontend(parser, lexer): | def get_frontend(parser, lexer): | ||||
@@ -139,7 +139,7 @@ class Parser(object): | |||||
"""Converts a RuleNode parse tree to a lark Tree.""" | """Converts a RuleNode parse tree to a lark Tree.""" | ||||
orig_rule = self.orig_rules[rule_node.rule.alias] | orig_rule = self.orig_rules[rule_node.rule.alias] | ||||
children = [] | children = [] | ||||
for i, child in enumerate(rule_node.children): | |||||
for child in rule_node.children: | |||||
if isinstance(child, RuleNode): | if isinstance(child, RuleNode): | ||||
children.append(self._to_tree(child)) | children.append(self._to_tree(child)) | ||||
else: | else: | ||||
@@ -4,7 +4,7 @@ | |||||
# When the parse ends successfully, a disambiguation stage resolves all ambiguity | # When the parse ends successfully, a disambiguation stage resolves all ambiguity | ||||
# (right now ambiguity resolution is not developed beyond the needs of lark) | # (right now ambiguity resolution is not developed beyond the needs of lark) | ||||
# Afterwards the parse tree is reduced (transformed) according to user callbacks. | # Afterwards the parse tree is reduced (transformed) according to user callbacks. | ||||
# I use the no-recursion version of Transformer and Visitor, because the tree might be | |||||
# I use the no-recursion version of Transformer, because the tree might be | |||||
# deeper than Python's recursion limit (a bit absurd, but that's life) | # deeper than Python's recursion limit (a bit absurd, but that's life) | ||||
# | # | ||||
# The algorithm keeps track of each state set, using a corresponding Column instance. | # The algorithm keeps track of each state set, using a corresponding Column instance. | ||||
@@ -14,7 +14,7 @@ | |||||
# Email : erezshin@gmail.com | # Email : erezshin@gmail.com | ||||
from ..common import ParseError, UnexpectedToken, is_terminal | from ..common import ParseError, UnexpectedToken, is_terminal | ||||
from ..tree import Tree, Visitor_NoRecurse, Transformer_NoRecurse | |||||
from ..tree import Tree, Transformer_NoRecurse | |||||
from .grammar_analysis import GrammarAnalyzer | from .grammar_analysis import GrammarAnalyzer | ||||
@@ -234,9 +234,4 @@ class ApplyCallbacks(Transformer_NoRecurse): | |||||
self.postprocess = postprocess | self.postprocess = postprocess | ||||
def drv(self, tree): | def drv(self, tree): | ||||
children = tree.children | |||||
callback = self.postprocess[tree.rule] | |||||
if callback: | |||||
return callback(children) | |||||
else: | |||||
return Tree(rule.origin, children) | |||||
return self.postprocess[tree.rule](tree.children) |
@@ -20,7 +20,7 @@ | |||||
from collections import defaultdict | from collections import defaultdict | ||||
from ..common import ParseError, UnexpectedToken, is_terminal | |||||
from ..common import ParseError, is_terminal | |||||
from ..lexer import Token, UnexpectedInput | from ..lexer import Token, UnexpectedInput | ||||
from ..tree import Tree | from ..tree import Tree | ||||
from .grammar_analysis import GrammarAnalyzer | from .grammar_analysis import GrammarAnalyzer | ||||
@@ -78,7 +78,7 @@ class Parser: | |||||
raise ParseError('Infinite recursion detected! (rule %s)' % item.rule) | raise ParseError('Infinite recursion detected! (rule %s)' % item.rule) | ||||
column.add(new_items) | column.add(new_items) | ||||
def scan(i, token, column): | |||||
def scan(i, column): | |||||
to_scan = column.to_scan | to_scan = column.to_scan | ||||
for x in self.ignore: | for x in self.ignore: | ||||
@@ -123,7 +123,7 @@ class Parser: | |||||
column = column0 | column = column0 | ||||
for i, token in enumerate(stream): | for i, token in enumerate(stream): | ||||
predict_and_complete(column) | predict_and_complete(column) | ||||
column = scan(i, token, column) | |||||
column = scan(i, column) | |||||
if token == '\n': | if token == '\n': | ||||
text_line += 1 | text_line += 1 | ||||
@@ -5,7 +5,7 @@ import sys | |||||
import codecs | import codecs | ||||
from lark import Lark, InlineTransformer, Transformer | |||||
from lark import Lark, InlineTransformer | |||||
nearley_grammar = r""" | nearley_grammar = r""" | ||||
start: (ruledef|directive)+ | start: (ruledef|directive)+ | ||||
@@ -172,7 +172,7 @@ def create_code_for_nearley_grammar(g, start, builtin_path, folder_path): | |||||
def main(fn, start, nearley_lib): | def main(fn, start, nearley_lib): | ||||
with codecs.open(fn, encoding='utf8') as f: | with codecs.open(fn, encoding='utf8') as f: | ||||
grammar = f.read() | grammar = f.read() | ||||
return (create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)))) | |||||
return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn))) | |||||
if __name__ == '__main__': | if __name__ == '__main__': | ||||