Procházet zdrojové kódy

Reconstruct working again

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan před 7 roky
rodič
revize
4076256faa
3 změnil soubory, kde provedl 22 přidání a 18 odebrání
  1. +1
    -1
      lark/parser_frontends.py
  2. +11
    -7
      lark/parsers/earley.py
  3. +10
    -10
      lark/reconstruct.py

+ 1
- 1
lark/parser_frontends.py Zobrazit soubor

@@ -140,7 +140,7 @@ class Earley_NoLex:
yield sym

def parse(self, text):
res = self.parser.parse([Token(x,x) for x in text]) # A little hacky perhaps!
res = self.parser.parse(text)
assert len(res) ==1 , 'Ambiguious Parse! Not handled yet'
return res[0]



+ 11
- 7
lark/parsers/earley.py Zobrazit soubor

@@ -1,6 +1,9 @@
from ..common import ParseError, UnexpectedToken, is_terminal
from .grammar_analysis import GrammarAnalyzer

class EndToken(str):
type = '$end'

class Item:
def __init__(self, rule, ptr, start, data):
self.rule = rule
@@ -34,7 +37,8 @@ class Parser:
self.predictions = {}
for rule in self.analysis.rules:
if rule.origin != '$root': # XXX kinda ugly
self.postprocess[rule] = getattr(parser_conf.callback, rule.alias)
a = rule.alias
self.postprocess[rule] = a if callable(a) else getattr(parser_conf.callback, a)
self.predictions[rule.origin] = [(x.rule, x.index) for x in self.analysis.expand_rule(rule.origin)]

def parse(self, stream):
@@ -49,7 +53,7 @@ class Parser:
return {old_item.advance(item.data) for old_item in table[item.start]
if not old_item.is_complete and old_item.expect == item.rule.origin}

def process_column(i, term):
def process_column(i, token):
assert i == len(table)-1
cur_set = table[i]
next_set = set()
@@ -63,7 +67,7 @@ class Parser:
else:
if is_terminal(item.expect):
# scan
match = item.expect[0](term) if callable(item.expect[0]) else item.expect[0] == term
match = item.expect[0](token) if callable(item.expect[0]) else item.expect[0] == token.type
if match:
next_set.add(item.advance(stream[i]))
else:
@@ -74,9 +78,9 @@ class Parser:
cur_set |= to_process


if not next_set and term != '$end':
if not next_set and token.type != '$end':
expect = filter(is_terminal, [x.expect for x in cur_set if not x.is_complete])
raise UnexpectedToken(term, expect, stream, i)
raise UnexpectedToken(token, expect, stream, i)

table.append(next_set)

@@ -84,9 +88,9 @@ class Parser:
table = [predict(self.start, 0)]

for i, char in enumerate(stream):
process_column(i, char.type)
process_column(i, char)

process_column(len(stream), '$end')
process_column(len(stream), EndToken())

# Parse ended. Now build a parse tree
solutions = [n.data for n in table[len(stream)]


+ 10
- 10
lark/reconstruct.py Zobrazit soubor

@@ -2,7 +2,7 @@ import re
from collections import defaultdict

from .tree import Tree
from .common import is_terminal
from .common import is_terminal, ParserConf
from .lexer import Token, TokenDef__Str
from .parsers import earley
from .lark import Lark
@@ -29,12 +29,15 @@ class Reconstructor:
self.data = data

class MatchTerminal(MatchData):
def match(self, other):
def __call__(self, other):
return token_res[self.data].match(other) is not None

class MatchTree(MatchData):
def match(self, other):
return self.data == other.data
def __call__(self, other):
try:
return self.data == other.data
except AttributeError:
return False

class WriteTokens:
def __init__(self, name, expansion):
@@ -80,20 +83,17 @@ class Reconstructor:
for name, expansions in d.items():
for expansion in expansions:
reduced = [sym if sym.startswith('_') or sym in expand1s else
(sym, MatchTerminal(sym) if is_terminal(sym) else MatchTree(sym))
(MatchTerminal(sym) if is_terminal(sym) else MatchTree(sym),)
for sym in expansion if not is_discarded_terminal(sym)]

name = name.lstrip('!').lstrip('?')

rules.append({'name': name,
'symbols': reduced,
'postprocess': WriteTokens(name, expansion).f
})
rules.append((name, reduced, WriteTokens(name, expansion).f))
self.rules = rules


def _reconstruct(self, tree):
parser = earley.Parser(self.rules, tree.data)
parser = earley.Parser(ParserConf(self.rules, {}, tree.data))

res ,= parser.parse(tree.children) # XXX ambiguity?
for item in res:


Načítá se…
Zrušit
Uložit