Browse Source

Refactoring for ambiguity resolution

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 years ago
parent
commit
d9b490f322
5 changed files with 85 additions and 78 deletions
  1. +1
    -1
      lark/lark.py
  2. +11
    -8
      lark/parser_frontends.py
  3. +2
    -66
      lark/parsers/earley.py
  4. +68
    -0
      lark/parsers/resolve_ambig.py
  5. +3
    -3
      lark/parsers/xearley.py

+ 1
- 1
lark/lark.py View File

@@ -139,7 +139,7 @@ class Lark:
if self.options.parser == 'earley': if self.options.parser == 'earley':
self.options.ambiguity = 'resolve' self.options.ambiguity = 'resolve'
else: else:
assert self.options.parser == 'earley'
assert self.options.parser == 'earley', "Only Earley supports disambiguation right now"
assert self.options.ambiguity in ('resolve', 'explicit', 'auto') assert self.options.ambiguity in ('resolve', 'explicit', 'auto')


# Parse the grammar file and compose the grammars (TODO) # Parse the grammar file and compose the grammars (TODO)


+ 11
- 8
lark/parser_frontends.py View File

@@ -4,7 +4,7 @@ import sre_parse
from .lexer import Lexer, ContextualLexer, Token from .lexer import Lexer, ContextualLexer, Token


from .common import is_terminal, GrammarError, ParserConf, Terminal_Regexp, Terminal_Token from .common import is_terminal, GrammarError, ParserConf, Terminal_Regexp, Terminal_Token
from .parsers import lalr_parser, earley, xearley
from .parsers import lalr_parser, earley, xearley, resolve_ambig


class WithLexer: class WithLexer:
def __init__(self, lexer_conf): def __init__(self, lexer_conf):
@@ -48,6 +48,12 @@ class LALR_ContextualLexer:
tokens = self.lexer_conf.postlex.process(tokens) tokens = self.lexer_conf.postlex.process(tokens)
return self.parser.parse(tokens, self.lexer.set_parser_state) return self.parser.parse(tokens, self.lexer.set_parser_state)


def get_ambiguity_resolver(options):
if not options or options.ambiguity == 'resolve':
return resolve_ambig.resolve_ambig
elif options.ambiguity == 'explicit':
return None
raise ValueError(options)


def tokenize_text(text): def tokenize_text(text):
new_text = [] new_text = []
@@ -66,11 +72,10 @@ class Earley_NoLex:


rules = [(n, list(self._prepare_expansion(x)), a, o) for n,x,a,o in parser_conf.rules] rules = [(n, list(self._prepare_expansion(x)), a, o) for n,x,a,o in parser_conf.rules]


resolve_ambiguity = (options.ambiguity=='resolve') if options else True
self.parser = earley.Parser(rules,
self.parser = earley.Parser(rules,
parser_conf.start, parser_conf.start,
parser_conf.callback, parser_conf.callback,
resolve_ambiguity=resolve_ambiguity)
resolve_ambiguity=get_ambiguity_resolver(options))


def _prepare_expansion(self, expansion): def _prepare_expansion(self, expansion):
for sym in expansion: for sym in expansion:
@@ -93,11 +98,10 @@ class Earley(WithLexer):


rules = [(n, self._prepare_expansion(x), a, o) for n,x,a,o in parser_conf.rules] rules = [(n, self._prepare_expansion(x), a, o) for n,x,a,o in parser_conf.rules]


resolve_ambiguity = (options.ambiguity=='resolve') if options else True
self.parser = earley.Parser(rules, self.parser = earley.Parser(rules,
parser_conf.start, parser_conf.start,
parser_conf.callback, parser_conf.callback,
resolve_ambiguity=resolve_ambiguity)
resolve_ambiguity=get_ambiguity_resolver(options))


def _prepare_expansion(self, expansion): def _prepare_expansion(self, expansion):
return [Terminal_Token(sym) if is_terminal(sym) else sym for sym in expansion] return [Terminal_Token(sym) if is_terminal(sym) else sym for sym in expansion]
@@ -113,13 +117,12 @@ class XEarley:


rules = [(n, list(self._prepare_expansion(x)), a, o) for n,x,a,o in parser_conf.rules] rules = [(n, list(self._prepare_expansion(x)), a, o) for n,x,a,o in parser_conf.rules]


resolve_ambiguity = (options.ambiguity=='resolve') if options else True
ignore = [Terminal_Regexp(x, self.token_by_name[x].pattern.to_regexp()) for x in lexer_conf.ignore] ignore = [Terminal_Regexp(x, self.token_by_name[x].pattern.to_regexp()) for x in lexer_conf.ignore]


self.parser = xearley.Parser(rules, self.parser = xearley.Parser(rules,
parser_conf.start, parser_conf.start,
parser_conf.callback, parser_conf.callback,
resolve_ambiguity=resolve_ambiguity,
resolve_ambiguity=get_ambiguity_resolver(options),
ignore=ignore, ignore=ignore,
) )




+ 2
- 66
lark/parsers/earley.py View File

@@ -13,9 +13,6 @@
# Author: Erez Shinan (2017) # Author: Erez Shinan (2017)
# Email : erezshin@gmail.com # Email : erezshin@gmail.com


from functools import cmp_to_key

from ..utils import compare
from ..common import ParseError, UnexpectedToken, Terminal from ..common import ParseError, UnexpectedToken, Terminal
from ..tree import Tree, Visitor_NoRecurse, Transformer_NoRecurse from ..tree import Tree, Visitor_NoRecurse, Transformer_NoRecurse
from .grammar_analysis import GrammarAnalyzer from .grammar_analysis import GrammarAnalyzer
@@ -136,7 +133,7 @@ class Column:
return bool(self.item_count) return bool(self.item_count)


class Parser: class Parser:
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=True):
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None):
self.analysis = GrammarAnalyzer(rules, start_symbol) self.analysis = GrammarAnalyzer(rules, start_symbol)
self.start_symbol = start_symbol self.start_symbol = start_symbol
self.resolve_ambiguity = resolve_ambiguity self.resolve_ambiguity = resolve_ambiguity
@@ -213,10 +210,9 @@ class Parser:
tree = Tree('_ambig', solutions) tree = Tree('_ambig', solutions)


if self.resolve_ambiguity: if self.resolve_ambiguity:
ResolveAmbig().visit(tree)
tree = self.resolve_ambiguity(tree)


return ApplyCallbacks(self.postprocess).transform(tree) return ApplyCallbacks(self.postprocess).transform(tree)




class ApplyCallbacks(Transformer_NoRecurse): class ApplyCallbacks(Transformer_NoRecurse):
@@ -231,66 +227,6 @@ class ApplyCallbacks(Transformer_NoRecurse):
else: else:
return Tree(rule.origin, children) return Tree(rule.origin, children)


def _compare_rules(rule1, rule2):
if rule1.origin != rule2.origin:
if rule1.options and rule2.options:
if rule1.options.priority is not None and rule2.options.priority is not None:
assert rule1.options.priority != rule2.options.priority, "Priority is the same between both rules: %s == %s" % (rule1, rule2)
return -compare(rule1.options.priority, rule2.options.priority)

return 0

c = compare( len(rule1.expansion), len(rule2.expansion))
if rule1.origin.startswith('__'): # XXX hack! We need to set priority in parser, not here
c = -c
return c

def _compare_drv(tree1, tree2):
if not (isinstance(tree1, Tree) and isinstance(tree2, Tree)):
return -compare(tree1, tree2)

try:
rule1, rule2 = tree1.rule, tree2.rule
except AttributeError:
# Probably trees that don't take part in this parse (better way to distinguish?)
return -compare(tree1, tree2)

# XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse,
# when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be
# computationally inefficient. So we handle it here.
if tree1.data == '_ambig':
_resolve_ambig(tree1)
if tree2.data == '_ambig':
_resolve_ambig(tree2)

c = _compare_rules(tree1.rule, tree2.rule)
if c:
return c

# rules are "equal", so compare trees
for t1, t2 in zip(tree1.children, tree2.children):
c = _compare_drv(t1, t2)
if c:
return c

return compare(len(tree1.children), len(tree2.children))


def _resolve_ambig(tree):
assert tree.data == '_ambig'

best = min(tree.children, key=cmp_to_key(_compare_drv))
assert best.data == 'drv'
tree.set('drv', best.children)
tree.rule = best.rule # needed for applying callbacks

assert tree.data != '_ambig'

class ResolveAmbig(Visitor_NoRecurse):
def _ambig(self, tree):
_resolve_ambig(tree)


# RULES = [ # RULES = [
# ('a', ['d']), # ('a', ['d']),
# ('d', ['b']), # ('d', ['b']),


+ 68
- 0
lark/parsers/resolve_ambig.py View File

@@ -0,0 +1,68 @@
from ..utils import compare
from functools import cmp_to_key

from ..tree import Tree, Visitor_NoRecurse

def _compare_rules(rule1, rule2):
if rule1.origin != rule2.origin:
if rule1.options and rule2.options:
if rule1.options.priority is not None and rule2.options.priority is not None:
assert rule1.options.priority != rule2.options.priority, "Priority is the same between both rules: %s == %s" % (rule1, rule2)
return -compare(rule1.options.priority, rule2.options.priority)

return 0

c = compare( len(rule1.expansion), len(rule2.expansion))
if rule1.origin.startswith('__'): # XXX hack! We need to set priority in parser, not here
c = -c
return c

def _compare_drv(tree1, tree2):
if not (isinstance(tree1, Tree) and isinstance(tree2, Tree)):
return -compare(tree1, tree2)

try:
rule1, rule2 = tree1.rule, tree2.rule
except AttributeError:
# Probably trees that don't take part in this parse (better way to distinguish?)
return -compare(tree1, tree2)

# XXX These artifacts can appear due to imperfections in the ordering of Visitor_NoRecurse,
# when confronted with duplicate (same-id) nodes. Fixing this ordering is possible, but would be
# computationally inefficient. So we handle it here.
if tree1.data == '_ambig':
_resolve_ambig(tree1)
if tree2.data == '_ambig':
_resolve_ambig(tree2)

c = _compare_rules(tree1.rule, tree2.rule)
if c:
return c

# rules are "equal", so compare trees
for t1, t2 in zip(tree1.children, tree2.children):
c = _compare_drv(t1, t2)
if c:
return c

return compare(len(tree1.children), len(tree2.children))


def _resolve_ambig(tree):
assert tree.data == '_ambig'

best = min(tree.children, key=cmp_to_key(_compare_drv))
assert best.data == 'drv'
tree.set('drv', best.children)
tree.rule = best.rule # needed for applying callbacks

assert tree.data != '_ambig'

class ResolveAmbig(Visitor_NoRecurse):
def _ambig(self, tree):
_resolve_ambig(tree)


def resolve_ambig(tree):
ResolveAmbig().visit(tree)
return tree

+ 3
- 3
lark/parsers/xearley.py View File

@@ -25,10 +25,10 @@ from ..lexer import Token
from ..tree import Tree from ..tree import Tree
from .grammar_analysis import GrammarAnalyzer from .grammar_analysis import GrammarAnalyzer


from .earley import ResolveAmbig, ApplyCallbacks, Item, NewsList, Derivation, END_TOKEN, Column
from .earley import ApplyCallbacks, Item, Column


class Parser: class Parser:
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=True, ignore=()):
def __init__(self, rules, start_symbol, callback, resolve_ambiguity=None, ignore=()):
self.analysis = GrammarAnalyzer(rules, start_symbol) self.analysis = GrammarAnalyzer(rules, start_symbol)
self.start_symbol = start_symbol self.start_symbol = start_symbol
self.resolve_ambiguity = resolve_ambiguity self.resolve_ambiguity = resolve_ambiguity
@@ -132,7 +132,7 @@ class Parser:
tree = Tree('_ambig', solutions) tree = Tree('_ambig', solutions)


if self.resolve_ambiguity: if self.resolve_ambiguity:
ResolveAmbig().visit(tree)
tree = self.resolve_ambiguity(tree)


return ApplyCallbacks(self.postprocess).transform(tree) return ApplyCallbacks(self.postprocess).transform(tree)




Loading…
Cancel
Save