Browse Source

Post-refactor cleanup

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.2
Erez Shinan 6 years ago
parent
commit
39e58cb8fd
5 changed files with 45 additions and 97 deletions
  1. +21
    -35
      lark/parser_frontends.py
  2. +10
    -43
      lark/parsers/earley.py
  3. +4
    -3
      lark/parsers/grammar_analysis.py
  4. +1
    -1
      lark/parsers/lalr_parser.py
  5. +9
    -15
      lark/parsers/xearley.py

+ 21
- 35
lark/parser_frontends.py View File

@@ -7,10 +7,16 @@ from .common import is_terminal, GrammarError, ParserConf
from .parsers import lalr_parser, earley, xearley, resolve_ambig from .parsers import lalr_parser, earley, xearley, resolve_ambig


class WithLexer: class WithLexer:
def __init__(self, lexer_conf):
def init_traditional_lexer(self, lexer_conf):
self.lexer_conf = lexer_conf self.lexer_conf = lexer_conf
self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore) self.lexer = Lexer(lexer_conf.tokens, ignore=lexer_conf.ignore)


def init_contextual_lexer(self, lexer_conf, parser_conf):
self.lexer_conf = lexer_conf
d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()}
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else ()
self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept)

def lex(self, text): def lex(self, text):
stream = self.lexer.lex(text) stream = self.lexer.lex(text)
if self.lexer_conf.postlex: if self.lexer_conf.postlex:
@@ -21,32 +27,22 @@ class WithLexer:


class LALR(WithLexer): class LALR(WithLexer):
def __init__(self, lexer_conf, parser_conf, options=None): def __init__(self, lexer_conf, parser_conf, options=None):
WithLexer.__init__(self, lexer_conf)

self.parser_conf = parser_conf
self.parser = lalr_parser.Parser(parser_conf) self.parser = lalr_parser.Parser(parser_conf)
self.init_traditional_lexer(lexer_conf)


def parse(self, text): def parse(self, text):
tokens = self.lex(text)
return self.parser.parse(tokens)
token_stream = self.lex(text)
return self.parser.parse(token_stream)




class LALR_ContextualLexer:
class LALR_ContextualLexer(WithLexer):
def __init__(self, lexer_conf, parser_conf, options=None): def __init__(self, lexer_conf, parser_conf, options=None):
self.lexer_conf = lexer_conf
self.parser_conf = parser_conf

self.parser = lalr_parser.Parser(parser_conf) self.parser = lalr_parser.Parser(parser_conf)

d = {idx:t.keys() for idx, t in self.parser.analysis.parse_table.states.items()}
always_accept = lexer_conf.postlex.always_accept if lexer_conf.postlex else ()
self.lexer = ContextualLexer(lexer_conf.tokens, d, ignore=lexer_conf.ignore, always_accept=always_accept)
self.init_contextual_lexer(lexer_conf, parser_conf)


def parse(self, text): def parse(self, text):
tokens = self.lexer.lex(text)
if self.lexer_conf.postlex:
tokens = self.lexer_conf.postlex.process(tokens)
return self.parser.parse(tokens, self.lexer.set_parser_state)
token_stream = self.lex(text)
return self.parser.parse(token_stream, self.lexer.set_parser_state)


def get_ambiguity_resolver(options): def get_ambiguity_resolver(options):
if not options or options.ambiguity == 'resolve': if not options or options.ambiguity == 'resolve':
@@ -58,24 +54,19 @@ def get_ambiguity_resolver(options):
raise ValueError(options) raise ValueError(options)


def tokenize_text(text): def tokenize_text(text):
new_text = []
line = 1 line = 1
col_start_pos = 0 col_start_pos = 0
for i, ch in enumerate(text): for i, ch in enumerate(text):
if '\n' in ch: if '\n' in ch:
line += ch.count('\n') line += ch.count('\n')
col_start_pos = i + ch.rindex('\n') col_start_pos = i + ch.rindex('\n')
new_text.append(Token('CHAR', ch, line=line, column=i - col_start_pos))
return new_text
yield Token('CHAR', ch, line=line, column=i - col_start_pos)


class Earley_NoLex: class Earley_NoLex:
def __init__(self, lexer_conf, parser_conf, options=None): def __init__(self, lexer_conf, parser_conf, options=None):
self._prepare_match(lexer_conf) self._prepare_match(lexer_conf)


self.parser = earley.Parser(parser_conf.rules,
parser_conf.start,
parser_conf.callback,
self.match,
self.parser = earley.Parser(parser_conf, self.match,
resolve_ambiguity=get_ambiguity_resolver(options)) resolve_ambiguity=get_ambiguity_resolver(options))




@@ -92,17 +83,14 @@ class Earley_NoLex:
self.regexps[t.name] = re.compile(regexp) self.regexps[t.name] = re.compile(regexp)


def parse(self, text): def parse(self, text):
new_text = tokenize_text(text)
return self.parser.parse(new_text)
token_stream = tokenize_text(text)
return self.parser.parse(token_stream)


class Earley(WithLexer): class Earley(WithLexer):
def __init__(self, lexer_conf, parser_conf, options=None): def __init__(self, lexer_conf, parser_conf, options=None):
WithLexer.__init__(self, lexer_conf)
self.init_traditional_lexer(lexer_conf)


self.parser = earley.Parser(parser_conf.rules,
parser_conf.start,
parser_conf.callback,
self.match,
self.parser = earley.Parser(parser_conf, self.match,
resolve_ambiguity=get_ambiguity_resolver(options)) resolve_ambiguity=get_ambiguity_resolver(options))


def match(self, term, token): def match(self, term, token):
@@ -119,9 +107,7 @@ class XEarley:


self._prepare_match(lexer_conf) self._prepare_match(lexer_conf)


self.parser = xearley.Parser(parser_conf.rules,
parser_conf.start,
parser_conf.callback,
self.parser = xearley.Parser(parser_conf,
self.match, self.match,
resolve_ambiguity=get_ambiguity_resolver(options), resolve_ambiguity=get_ambiguity_resolver(options),
ignore=lexer_conf.ignore, ignore=lexer_conf.ignore,


+ 10
- 43
lark/parsers/earley.py View File

@@ -18,9 +18,6 @@ from ..tree import Tree, Visitor_NoRecurse, Transformer_NoRecurse
from .grammar_analysis import GrammarAnalyzer from .grammar_analysis import GrammarAnalyzer




class EndToken:
type = '$END'

class Derivation(Tree): class Derivation(Tree):
_hash = None _hash = None


@@ -36,8 +33,6 @@ class Derivation(Tree):
self._hash = Tree.__hash__(self) self._hash = Tree.__hash__(self)
return self._hash return self._hash


END_TOKEN = EndToken()

class Item(object): class Item(object):
"An Earley Item, the atom of the algorithm." "An Earley Item, the atom of the algorithm."


@@ -60,11 +55,8 @@ class Item(object):
new_tree = Derivation(self.rule, self.tree.children + [tree]) new_tree = Derivation(self.rule, self.tree.children + [tree])
return self.__class__(self.rule, self.ptr+1, self.start, new_tree) return self.__class__(self.rule, self.ptr+1, self.start, new_tree)


def similar(self, other):
return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule

def __eq__(self, other): def __eq__(self, other):
return self.similar(other) #and (self.tree == other.tree)
return self.start is other.start and self.ptr == other.ptr and self.rule == other.rule


def __hash__(self): def __hash__(self):
return hash((self.rule, self.ptr, id(self.start))) # Always runs Derivation.__hash__ return hash((self.rule, self.ptr, id(self.start))) # Always runs Derivation.__hash__
@@ -152,27 +144,24 @@ class Column:
__nonzero__ = __bool__ # Py2 backwards-compatibility __nonzero__ = __bool__ # Py2 backwards-compatibility


class Parser: class Parser:
def __init__(self, rules, start_symbol, callback, term_matcher, resolve_ambiguity=None):
self.analysis = GrammarAnalyzer(rules, start_symbol)
self.start_symbol = start_symbol
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None):
self.analysis = GrammarAnalyzer(parser_conf)
self.parser_conf = parser_conf
self.resolve_ambiguity = resolve_ambiguity self.resolve_ambiguity = resolve_ambiguity


self.FIRST = self.analysis.FIRST
self.postprocess = {} self.postprocess = {}
self.predictions = {} self.predictions = {}
self.FIRST = {}
for rule in self.analysis.rules:
a = rule.alias
self.postprocess[rule] = a if callable(a) else (a and getattr(callback, a))
for rule in parser_conf.rules:
self.postprocess[rule] = getattr(parser_conf.callback, rule.alias)
self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)] self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)]


self.FIRST[rule.origin] = self.analysis.FIRST[rule.origin]

self.term_matcher = term_matcher self.term_matcher = term_matcher




def parse(self, stream, start_symbol=None): def parse(self, stream, start_symbol=None):
# Define parser functions # Define parser functions
start_symbol = start_symbol or self.start_symbol
start_symbol = start_symbol or self.parser_conf.start


_Item = Item _Item = Item
match = self.term_matcher match = self.term_matcher
@@ -198,9 +187,8 @@ class Parser:


for item in to_reduce: for item in to_reduce:
new_items = list(complete(item)) new_items = list(complete(item))
for new_item in new_items:
if new_item.similar(item):
raise ParseError('Infinite recursion detected! (rule %s)' % new_item.rule)
if item in new_items:
raise ParseError('Infinite recursion detected! (rule %s)' % item.rule)
column.add(new_items) column.add(new_items)


def scan(i, token, column): def scan(i, token, column):
@@ -252,24 +240,3 @@ class ApplyCallbacks(Transformer_NoRecurse):
return callback(children) return callback(children)
else: else:
return Tree(rule.origin, children) return Tree(rule.origin, children)

# RULES = [
# ('a', ['d']),
# ('d', ['b']),
# ('b', ['C']),
# ('b', ['b', 'C']),
# ('b', ['C', 'b']),
# ]
# p = Parser(RULES, 'a')
# for x in p.parse('CC'):
# print x.pretty()

#---------------
# RULES = [
# ('s', ['a', 'a']),
# ('a', ['b', 'b']),
# ('b', ['C'], lambda (x,): x),
# ('b', ['b', 'C']),
# ]
# p = Parser(RULES, 's', {})
# print p.parse('CCCCC').pretty()

+ 4
- 3
lark/parsers/grammar_analysis.py View File

@@ -94,13 +94,14 @@ def calculate_sets(rules):




class GrammarAnalyzer(object): class GrammarAnalyzer(object):
def __init__(self, rules, start_symbol, debug=False):
def __init__(self, parser_conf, debug=False):
rules = parser_conf.rules
assert len(rules) == len(set(rules)) assert len(rules) == len(set(rules))


self.start_symbol = start_symbol
self.start_symbol = parser_conf.start
self.debug = debug self.debug = debug


root_rule = Rule('$root', [start_symbol, '$END'])
root_rule = Rule('$root', [self.start_symbol, '$END'])


self.rules_by_origin = {r.origin: [] for r in rules} self.rules_by_origin = {r.origin: [] for r in rules}
for r in rules: for r in rules:


+ 1
- 1
lark/parsers/lalr_parser.py View File

@@ -15,7 +15,7 @@ class Parser:
def __init__(self, parser_conf): def __init__(self, parser_conf):
assert all(r.options is None or r.options.priority is None assert all(r.options is None or r.options.priority is None
for r in parser_conf.rules), "LALR doesn't yet support prioritization" for r in parser_conf.rules), "LALR doesn't yet support prioritization"
self.analysis = analysis = LALR_Analyzer(parser_conf.rules, parser_conf.start)
self.analysis = analysis = LALR_Analyzer(parser_conf)
analysis.compute_lookahead() analysis.compute_lookahead()
callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None) callbacks = {rule: getattr(parser_conf.callback, rule.alias or rule.origin, None)
for rule in analysis.rules} for rule in analysis.rules}


+ 9
- 15
lark/parsers/xearley.py View File

@@ -28,31 +28,26 @@ from .grammar_analysis import GrammarAnalyzer
from .earley import ApplyCallbacks, Item, Column from .earley import ApplyCallbacks, Item, Column


class Parser: class Parser:
def __init__(self, rules, start_symbol, callback, term_matcher, resolve_ambiguity=None, ignore=(), predict_all=False):
self.analysis = GrammarAnalyzer(rules, start_symbol)
self.start_symbol = start_symbol
def __init__(self, parser_conf, term_matcher, resolve_ambiguity=None, ignore=(), predict_all=False):
self.analysis = GrammarAnalyzer(parser_conf)
self.parser_conf = parser_conf
self.resolve_ambiguity = resolve_ambiguity self.resolve_ambiguity = resolve_ambiguity
self.ignore = list(ignore) self.ignore = list(ignore)
self.predict_all = predict_all self.predict_all = predict_all


self.FIRST = self.analysis.FIRST
self.postprocess = {} self.postprocess = {}
self.predictions = {} self.predictions = {}
self.FIRST = {}

for rule in self.analysis.rules:
a = rule.alias
self.postprocess[rule] = a if callable(a) else (a and getattr(callback, a))
for rule in parser_conf.rules:
self.postprocess[rule] = getattr(parser_conf.callback, rule.alias)
self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)] self.predictions[rule.origin] = [x.rule for x in self.analysis.expand_rule(rule.origin)]


self.FIRST[rule.origin] = self.analysis.FIRST[rule.origin]

self.term_matcher = term_matcher self.term_matcher = term_matcher




def parse(self, stream, start_symbol=None): def parse(self, stream, start_symbol=None):
# Define parser functions # Define parser functions
start_symbol = start_symbol or self.start_symbol
start_symbol = start_symbol or self.parser_conf.start
delayed_matches = defaultdict(list) delayed_matches = defaultdict(list)
match = self.term_matcher match = self.term_matcher


@@ -79,9 +74,8 @@ class Parser:
column.add( predict(nonterm, column) ) column.add( predict(nonterm, column) )
for item in to_reduce: for item in to_reduce:
new_items = list(complete(item)) new_items = list(complete(item))
for new_item in new_items:
if new_item.similar(item):
raise ParseError('Infinite recursion detected! (rule %s)' % new_item.rule)
if item in new_items:
raise ParseError('Infinite recursion detected! (rule %s)' % item.rule)
column.add(new_items) column.add(new_items)


def scan(i, token, column): def scan(i, token, column):


Loading…
Cancel
Save