ソースを参照

Fixed bug in load_grammar. Improved nearley converter

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 7年前
コミット
3d4ee92829
2個のファイルの変更55行の追加27行の削除
  1. +4
    -2
      lark/load_grammar.py
  2. +51
    -25
      tools/nearley.py

+ 4
- 2
lark/load_grammar.py ファイルの表示

@@ -219,7 +219,6 @@ class SimplifyTree(InlineTransformer):
return tokenmods + [value]

def get_tokens(tree, token_set):
tokens = []
for t in tree.find_data('token'):
x = t.children
name = x[0].value
@@ -266,10 +265,13 @@ class ExtractAnonTokens(InlineTransformer):
else:
assert False, token

if value in self.token_reverse: # Kind of a wierd placement
token_name = self.token_reverse[value]

if token_name not in self.token_set:
self.token_set.add(token_name)
self.tokens.append((token_name, token, []))
assert value not in self.token_reverse
assert value not in self.token_reverse, value
self.token_reverse[value] = token_name

return Token('TOKEN', token_name, -1)


+ 51
- 25
tools/nearley.py ファイルの表示

@@ -1,22 +1,32 @@
"Converts between Lark and Nearley grammars. Work in progress!"

import os.path
import sys

from lark import Lark, InlineTransformer

nearley_grammar = r"""
start: (ruledef|directive)+

directive: "@" NAME STRING
directive: "@" NAME (STRING|NAME)
| "@" _JS -> js_code
ruledef: NAME "->" expansions
| NAME REGEXP "->" expansions -> macro
expansions: expansion ("|" expansion)*

expansion: (rule|string|regexp)+ _JS?
expansion: expr+ _JS?

?expr: item [":" /[+*?]/]

?item: rule|string|regexp
| "(" expansions ")"

rule: NAME
string: STRING
regexp: REGEXP
_JS: /(?s){%.*?%}/

NAME: /[a-zA-Z_]\w*/
NAME: /[a-zA-Z_$]\w*/
WS.ignore: /[\t \f\n]+/
COMMENT.ignore: /\#[^\n]*/
REGEXP: /\[.*?\]/
@@ -27,13 +37,20 @@ nearley_grammar = r"""


class NearleyToLark(InlineTransformer):
def __init__(self, builtin_path):
self.builtin_path = builtin_path

def rule(self, name):
return {'_': '_WS?', '__':'_WS'}.get(name, name)
# return {'_': '_WS?', '__':'_WS'}.get(name, name)
return {'_': '_ws_maybe', '__':'_ws'}.get(name, name)

def ruledef(self, name, exps):
name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
return '%s: %s' % (name, exps)

def expr(self, item, op):
return '(%s)%s' % (item, op)

def regexp(self, r):
return '/%s/' % r

@@ -45,35 +62,32 @@ class NearleyToLark(InlineTransformer):
return ' '.join(x)

def expansions(self, *x):
return '\n |'.join(x)
return '(%s)' % ('\n |'.join(x))

def js_code(self):
return ''

def macro(self, *args):
return '' # TODO support macros?!

def directive(self, name, *args):
if name == 'builtin':
arg = args[0][1:-1]
if arg == 'whitespace.ne':
return r'_WS: /[ \t\n\v\f]/'
elif arg == 'number.ne':
return ('unsigned_int: DIGIT+\n'
'DIGIT: /\d/\n'
'decimal: "-"? DIGIT+ [/\./ DIGIT+] \n'
'percentage: decimal "%"\n'
)
# TODO
elif arg == 'postprocessors.ne':
pass
else:
assert False, arg
else:
assert False
pass
with open(os.path.join(self.builtin_path, arg)) as f:
text = f.read()
return nearley_to_lark(text, self.builtin_path)
elif name == 'preprocessor':
return ''

raise Exception('Unknown directive: %s' % name)

def start(self, *rules):
return '\n'.join(filter(None, rules))

def nearley_to_lark(g):
def nearley_to_lark(g, builtin_path):
parser = Lark(nearley_grammar)
tree = parser.parse(g)
return NearleyToLark().transform(tree)
return NearleyToLark(builtin_path).transform(tree)


def test():
@@ -112,7 +126,7 @@ def test():
function(d) {return Math.floor(d[0]*255); }
%}
"""
converted_grammar = nearley_to_lark(css_example_grammar)
converted_grammar = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin')
print(converted_grammar)

l = Lark(converted_grammar, start='csscolor', parser='earley_nolex')
@@ -120,5 +134,17 @@ def test():
print(l.parse('rgb(255, 70%, 3)').pretty())


def main():
try:
nearley_lib = sys.argv[1]
except IndexError:
print("Reads Nearley grammar from stdin and outputs a lark grammar.")
print("Usage: %s <nearley_lib_path>" % sys.argv[0])
return

grammar = sys.stdin.read()
print(nearley_to_lark(grammar, os.path.join(nearley_lib, 'builtin')))


if __name__ == '__main__':
test()
main()

読み込み中…
キャンセル
保存