瀏覽代碼

Initial reconstruct, working with example!

Definitely needs more work though
tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7 年之前
父節點
當前提交
43da0ddad9
共有 2 個檔案被更改,包括 142 行新增0 行删除
  1. +33
    -0
      examples/reconstruct_json.py
  2. +109
    -0
      lark/reconstruct.py

+ 33
- 0
examples/reconstruct_json.py 查看文件

@@ -0,0 +1,33 @@
import json
from .json_parser import json_grammar

from lark import Lark
from lark.reconstruct import Reconstructor

def test():

test_json = '''
{
"empty_object" : {},
"empty_array" : [],
"booleans" : { "YES" : true, "NO" : false },
"numbers" : [ 0, 1, -2, 3.3, 4.4e5, 6.6e-7 ],
"strings" : [ "This", [ "And" , "That" ] ],
"nothing" : null
}
'''

json_parser = Lark(json_grammar)
tree = json_parser.parse(test_json)

# print '@@', tree.pretty()
# for x in tree.find_data('true'):
# x.data = 'false'
# # x.children[0].value = '"HAHA"'


new_json = Reconstructor(json_parser).reconstruct(tree)
print new_json
print json.loads(new_json) == json.loads(test_json)

test()

+ 109
- 0
lark/reconstruct.py 查看文件

@@ -0,0 +1,109 @@
import re
from collections import defaultdict

from .tree import Tree
from .common import is_terminal
from .lexer import Token, TokenDef__Str
from .parsers import earley
from .lark import Lark



def is_discarded_terminal(t):
return is_terminal(t) and t.startswith('_')

def is_iter_empty(i):
try:
_ = next(i)
return False
except StopIteration:
return True

class Reconstructor:
def __init__(self, parser):
tokens = {t.name:t for t in parser.lexer_conf.tokens}
token_res = {t.name:re.compile(t.to_regexp()) for t in parser.lexer_conf.tokens}

class MatchData:
def __init__(self, data):
self.data = data

class MatchTerminal(MatchData):
def match(self, other):
return token_res[self.data].match(other) is not None

class MatchTree(MatchData):
def match(self, other):
return self.data == other.data

class WriteTokens:
def __init__(self, name, expansion):
self.name = name
self.expansion = expansion

def f(self, args):
args2 = iter(args)
to_write = []
for sym in self.expansion:
if is_discarded_terminal(sym):
t = tokens[sym]
assert isinstance(t, TokenDef__Str)
to_write.append(t.value)
else:
x = next(args2)
if isinstance(x, list):
to_write += x
else:
if isinstance(x, Token):
assert x.type == sym, x
else:
assert x.data == sym, x
to_write.append(x)

assert is_iter_empty(args2)

return to_write

d = defaultdict(list)
for name, expansions in parser.rules.items():
for expansion, alias in expansions:
if alias:
d[alias].append(expansion)
d[name].append([alias])
else:
d[name].append(expansion)

rules = []
expand1s = {name.lstrip('!').lstrip('?') for name in d
if name.startswith(('?', '!?'))} # XXX Ugly code

for name, expansions in d.items():
for expansion in expansions:
reduced = [sym if sym.startswith('_') or sym in expand1s else
(sym, MatchTerminal(sym) if is_terminal(sym) else MatchTree(sym))
for sym in expansion if not is_discarded_terminal(sym)]

name = name.lstrip('!').lstrip('?')

rules.append({'name': name,
'symbols': reduced,
'postprocess': WriteTokens(name, expansion).f
})
self.rules = rules


def _reconstruct(self, tree):
parser = earley.Parser(self.rules, tree.data)

res ,= parser.parse(tree.children) # XXX ambiguity?
for item in res:
if isinstance(item, Tree):
for x in self._reconstruct(item):
yield x
else:
yield item

def reconstruct(self, tree):
return ''.join(self._reconstruct(tree))



Loading…
取消
儲存