Ver a proveniência

Add ambiguity='forest' option

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.10.0
Chanic Panic há 4 anos
ascendente
cometimento
9c967fafb0
4 ficheiros alterados com 24 adições e 12 eliminações
  1. +7
    -3
      lark/lark.py
  2. +4
    -1
      lark/parser_frontends.py
  3. +9
    -5
      lark/parsers/earley.py
  4. +4
    -3
      lark/parsers/xearley.py

+ 7
- 3
lark/lark.py Ver ficheiro

@@ -75,6 +75,7 @@ class LarkOptions(Serialize):
- "resolve" - The parser will automatically choose the simplest derivation
(it chooses consistently: greedy for tokens, non-greedy for rules)
- "explicit": The parser will return all derivations wrapped in "_ambig" tree nodes (i.e. a forest).
- "forest": The parser will return the root of the shared packed parse forest.

**=== Misc. / Domain Specific Options ===**

@@ -262,7 +263,7 @@ class Lark(Serialize):

assert self.options.priority in ('auto', None, 'normal', 'invert'), 'invalid priority option specified: {}. options are auto, none, normal, invert.'.format(self.options.priority)
assert self.options.ambiguity not in ('resolve__antiscore_sum', ), 'resolve__antiscore_sum has been replaced with the option priority="invert"'
assert self.options.ambiguity in ('resolve', 'explicit', 'auto', )
assert self.options.ambiguity in ('resolve', 'explicit', 'forest', 'auto', )

# Parse the grammar file and compose the grammars (TODO)
self.grammar = load_grammar(grammar, self.source, re_module)
@@ -317,8 +318,11 @@ class Lark(Serialize):

def _prepare_callbacks(self):
self.parser_class = get_frontend(self.options.parser, self.options.lexer)
self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders)
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)
self._callbacks = None
# we don't need these callbacks if we aren't building a tree
if self.options.ambiguity != 'forest':
self._parse_tree_builder = ParseTreeBuilder(self.rules, self.options.tree_class or Tree, self.options.propagate_positions, self.options.keep_all_tokens, self.options.parser!='lalr' and self.options.ambiguity=='explicit', self.options.maybe_placeholders)
self._callbacks = self._parse_tree_builder.create_callback(self.options.transformer)

def _build_parser(self):
self._prepare_callbacks()


+ 4
- 1
lark/parser_frontends.py Ver ficheiro

@@ -165,7 +165,8 @@ class Earley(WithLexer):

resolve_ambiguity = options.ambiguity == 'resolve'
debug = options.debug if options else False
self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity, debug=debug)
tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
self.parser = earley.Parser(parser_conf, self.match, resolve_ambiguity=resolve_ambiguity, debug=debug, tree_class=tree_class)

def match(self, term, token):
return term.name == token.type
@@ -179,11 +180,13 @@ class XEarley(_ParserFrontend):
self._prepare_match(lexer_conf)
resolve_ambiguity = options.ambiguity == 'resolve'
debug = options.debug if options else False
tree_class = options.tree_class or Tree if options.ambiguity != 'forest' else None
self.parser = xearley.Parser(parser_conf,
self.match,
ignore=lexer_conf.ignore,
resolve_ambiguity=resolve_ambiguity,
debug=debug,
tree_class=tree_class,
**kw
)



+ 9
- 5
lark/parsers/earley.py Ver ficheiro

@@ -22,11 +22,12 @@ from .earley_common import Item, TransitiveItem
from .earley_forest import ForestSumVisitor, SymbolNode, ForestToParseTree

class Parser:
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, debug=False):
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, debug=False, tree_class=Tree):
analysis = GrammarAnalyzer(parser_conf)
self.parser_conf = parser_conf
self.resolve_ambiguity = resolve_ambiguity
self.debug = debug
self.tree_class = tree_class

self.FIRST = analysis.FIRST
self.NULLABLE = analysis.NULLABLE
@@ -313,10 +314,13 @@ class Parser:
elif len(solutions) > 1:
assert False, 'Earley should not generate multiple start symbol items!'

# Perform our SPPF -> AST
# TODO: Pass the correct tree class to constructor
transformer = ForestToParseTree(Tree, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity)
return transformer.transform(solutions[0])
if self.tree_class is not None:
# Perform our SPPF -> AST conversion
transformer = ForestToParseTree(self.tree_class, self.callbacks, self.forest_sum_visitor and self.forest_sum_visitor(), self.resolve_ambiguity)
return transformer.transform(solutions[0])

# return the root of the SPPF
return solutions[0]

class ApplyCallbacks(Transformer_InPlace):
def __init__(self, postprocess):


+ 4
- 3
lark/parsers/xearley.py Ver ficheiro

@@ -16,6 +16,7 @@ Earley's power in parsing any CFG.

from collections import defaultdict

from ..tree import Tree
from ..exceptions import UnexpectedCharacters
from ..lexer import Token
from ..grammar import Terminal
@@ -24,8 +25,8 @@ from .earley_forest import SymbolNode


class Parser(BaseParser):
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, ignore = (), complete_lex = False, debug=False):
BaseParser.__init__(self, parser_conf, term_matcher, resolve_ambiguity, debug)
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=True, ignore = (), complete_lex = False, debug=False, tree_class=Tree):
BaseParser.__init__(self, parser_conf, term_matcher, resolve_ambiguity, debug, tree_class)
self.ignore = [Terminal(t) for t in ignore]
self.complete_lex = complete_lex

@@ -148,4 +149,4 @@ class Parser(BaseParser):

## Column is now the final column in the parse.
assert i == len(columns)-1
return to_scan
return to_scan

Carregando…
Cancelar
Guardar