diff --git a/lark/lark.py b/lark/lark.py index d82e723..27ec9b0 100644 --- a/lark/lark.py +++ b/lark/lark.py @@ -75,6 +75,7 @@ class LarkOptions(Serialize): - "resolve" - The parser will automatically choose the simplest derivation (it chooses consistently: greedy for tokens, non-greedy for rules) - "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest). + - "forest": The parser will return the root of the shared packed parse forest. **=== Misc. / Domain Specific Options ===** @@ -262,7 +263,7 @@ class Lark(Serialize): assert self.options.priority in ('auto', None, 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority) assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"' - assert self.options.ambiguity in ('resolve', 'explicit', 'auto', ) + assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', ) # Parse the grammar file and compose the grammars (TODO) self.grammar = load_grammar(grammar, self.source, re_module) @@ -317,8 +318,11 @@ class Lark(Serialize): def _prepare_callbacks(self): self.parser_class = get_frontend(self.options.parser, self.options.lexer) - self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders) - self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) + self._callbacks = None + # we don't need these callbacks if we aren't building a tree + if self.options.ambiguity != 'forest': + self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders) + self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer) def _build_parser(self): self._prepare_callbacks() diff --git a/lark/parser_frontends.py b/lark/parser_frontends.py index b993b9f..eb2b615 100644 --- a/lark/parser_frontends.py +++ b/lark/parser_frontends.py @@ -165,7 +165,8 @@ class Earley(WithLexer): resolve_ambiguity = options.ambiguity == 'resolve' debug = options.debug if options else False - self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity, debug=debug) + tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None + self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class) def match(self, term, token): return term.name == token.type @@ -179,11 +180,13 @@ class XEarley(_ParserFrontend): self._prepare_match(lexer_conf) resolve_ambiguity = options.ambiguity == 'resolve' debug = options.debug if options else False + tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None self.parser = xearley.Parser(parser_conf, self.match, ignore=lexer_conf.ignore, resolve_ambiguity=resolve_ambiguity, debug=debug, + tree_class=tree_class, **kw ) diff --git a/lark/parsers/earley.py b/lark/parsers/earley.py index a997485..42542c2 100644 --- a/lark/parsers/earley.py +++ b/lark/parsers/earley.py @@ -22,11 +22,12 @@ from .earley_common import Item, TransitiveItem from .earley_forest import ForestSumVisitor, SymbolNode, ForestToParseTree class Parser: - def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, debug=False): + def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, debug=False, tree_class=Tree): analysis = GrammarAnalyzer(parser_conf) self.parser_conf = parser_conf self.resolve_ambiguity = resolve_ambiguity self.debug = debug + self.tree_class = tree_class self.FIRST = analysis.FIRST self.NULLABLE = analysis.NULLABLE @@ -313,10 +314,13 @@ class Parser: elif len(solutions) > 1: assert False, 'Earley should not generate multiple start symbol items!' - # Perform our SPPF -> AST - # TODO: Pass the correct tree class to constructor - transformer = ForestToParseTree(Tree, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity) - return transformer.transform(solutions[0]) + if self.tree_class is not None: + # Perform our SPPF -> AST conversion + transformer = ForestToParseTree(self.tree_class, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity) + return transformer.transform(solutions[0]) + + # return the root of the SPPF + return solutions[0] class ApplyCallbacks(Transformer_InPlace): def __init__(self, postprocess): diff --git a/lark/parsers/xearley.py b/lark/parsers/xearley.py index 855625a..256fc2c 100644 --- a/lark/parsers/xearley.py +++ b/lark/parsers/xearley.py @@ -16,6 +16,7 @@ Earley's power in parsing any CFG. from collections import defaultdict +from ..tree import Tree from ..exceptions import UnexpectedCharacters from ..lexer import Token from ..grammar import Terminal @@ -24,8 +25,8 @@ from .earley_forest import SymbolNode class Parser(BaseParser): - def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, ignore = (), complete_lex = False, debug=False): - BaseParser.__init__(self, parser_conf, term_matcher, resolve_ambiguity, debug) + def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, ignore = (), complete_lex = False, debug=False, tree_class=Tree): + BaseParser.__init__(self, parser_conf, term_matcher, resolve_ambiguity, debug, tree_class) self.ignore = [Terminal(t) for t in ignore] self.complete_lex = complete_lex @@ -148,4 +149,4 @@ class Parser(BaseParser): ## Column is now the final column in the parse. assert i == len(columns)-1 - return to_scan \ No newline at end of file + return to_scan