@@ -41,11 +41,11 @@ class UnexpectedToken(ParseError): | |||
class LexerConf: | |||
def __init__(self, tokens, ignore=(), postlex=None, callbacks={}): | |||
def __init__(self, tokens, ignore=(), postlex=None, callbacks=None): | |||
self.tokens = tokens | |||
self.ignore = ignore | |||
self.postlex = postlex | |||
self.callbacks = callbacks | |||
self.callbacks = callbacks or {} | |||
class ParserConf: | |||
def __init__(self, rules, callback, start): | |||
@@ -139,36 +139,36 @@ class XEarley: | |||
class CYK(WithLexer): | |||
def __init__(self, lexer_conf, parser_conf, options=None): | |||
self.init_traditional_lexer(lexer_conf) | |||
self._analysis = GrammarAnalyzer(parser_conf) | |||
self._parser = cyk.Parser(parser_conf.rules, parser_conf.start) | |||
self._postprocess = {} | |||
for rule in parser_conf.rules: | |||
a = rule.alias | |||
self._postprocess[a] = a if callable(a) else (a and getattr(parser_conf.callback, a)) | |||
def parse(self, text): | |||
tokens = list(self.lex(text)) | |||
parse = self._parser.parse(tokens) | |||
parse = self._transform(parse) | |||
return parse | |||
def _transform(self, tree): | |||
subtrees = list(tree.iter_subtrees()) | |||
for subtree in subtrees: | |||
subtree.children = [self._apply_callback(c) if isinstance(c, Tree) else c for c in subtree.children] | |||
return self._apply_callback(tree) | |||
def _apply_callback(self, tree): | |||
children = tree.children | |||
callback = self._postprocess[tree.rule.alias] | |||
assert callback, tree.rule.alias | |||
r = callback(children) | |||
return r | |||
def __init__(self, lexer_conf, parser_conf, options=None): | |||
self.init_traditional_lexer(lexer_conf) | |||
self._analysis = GrammarAnalyzer(parser_conf) | |||
self._parser = cyk.Parser(parser_conf.rules, parser_conf.start) | |||
self._postprocess = {} | |||
for rule in parser_conf.rules: | |||
a = rule.alias | |||
self._postprocess[a] = a if callable(a) else (a and getattr(parser_conf.callback, a)) | |||
def parse(self, text): | |||
tokens = list(self.lex(text)) | |||
parse = self._parser.parse(tokens) | |||
parse = self._transform(parse) | |||
return parse | |||
def _transform(self, tree): | |||
subtrees = list(tree.iter_subtrees()) | |||
for subtree in subtrees: | |||
subtree.children = [self._apply_callback(c) if isinstance(c, Tree) else c for c in subtree.children] | |||
return self._apply_callback(tree) | |||
def _apply_callback(self, tree): | |||
children = tree.children | |||
callback = self._postprocess[tree.rule.alias] | |||
assert callback, tree.rule.alias | |||
r = callback(children) | |||
return r | |||
def get_frontend(parser, lexer): | |||
@@ -139,7 +139,7 @@ class Parser(object): | |||
"""Converts a RuleNode parse tree to a lark Tree.""" | |||
orig_rule = self.orig_rules[rule_node.rule.alias] | |||
children = [] | |||
for i, child in enumerate(rule_node.children): | |||
for child in rule_node.children: | |||
if isinstance(child, RuleNode): | |||
children.append(self._to_tree(child)) | |||
else: | |||
@@ -4,7 +4,7 @@ | |||
# When the parse ends successfully, a disambiguation stage resolves all ambiguity | |||
# (right now ambiguity resolution is not developed beyond the needs of lark) | |||
# Afterwards the parse tree is reduced (transformed) according to user callbacks. | |||
# I use the no-recursion version of Transformer and Visitor, because the tree might be | |||
# I use the no-recursion version of Transformer, because the tree might be | |||
# deeper than Python's recursion limit (a bit absurd, but that's life) | |||
# | |||
# The algorithm keeps track of each state set, using a corresponding Column instance. | |||
@@ -14,7 +14,7 @@ | |||
# Email : erezshin@gmail.com | |||
from ..common import ParseError, UnexpectedToken, is_terminal | |||
from ..tree import Tree, Visitor_NoRecurse, Transformer_NoRecurse | |||
from ..tree import Tree, Transformer_NoRecurse | |||
from .grammar_analysis import GrammarAnalyzer | |||
@@ -234,9 +234,4 @@ class ApplyCallbacks(Transformer_NoRecurse): | |||
self.postprocess = postprocess | |||
def drv(self, tree): | |||
children = tree.children | |||
callback = self.postprocess[tree.rule] | |||
if callback: | |||
return callback(children) | |||
else: | |||
return Tree(rule.origin, children) | |||
return self.postprocess[tree.rule](tree.children) |
@@ -20,7 +20,7 @@ | |||
from collections import defaultdict | |||
from ..common import ParseError, UnexpectedToken, is_terminal | |||
from ..common import ParseError, is_terminal | |||
from ..lexer import Token, UnexpectedInput | |||
from ..tree import Tree | |||
from .grammar_analysis import GrammarAnalyzer | |||
@@ -78,7 +78,7 @@ class Parser: | |||
raise ParseError('Infinite recursion detected! (rule %s)' % item.rule) | |||
column.add(new_items) | |||
def scan(i, token, column): | |||
def scan(i, column): | |||
to_scan = column.to_scan | |||
for x in self.ignore: | |||
@@ -123,7 +123,7 @@ class Parser: | |||
column = column0 | |||
for i, token in enumerate(stream): | |||
predict_and_complete(column) | |||
column = scan(i, token, column) | |||
column = scan(i, column) | |||
if token == '\n': | |||
text_line += 1 | |||
@@ -5,7 +5,7 @@ import sys | |||
import codecs | |||
from lark import Lark, InlineTransformer, Transformer | |||
from lark import Lark, InlineTransformer | |||
nearley_grammar = r""" | |||
start: (ruledef|directive)+ | |||
@@ -172,7 +172,7 @@ def create_code_for_nearley_grammar(g, start, builtin_path, folder_path): | |||
def main(fn, start, nearley_lib): | |||
with codecs.open(fn, encoding='utf8') as f: | |||
grammar = f.read() | |||
return (create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)))) | |||
return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn))) | |||
if __name__ == '__main__': | |||