Browse Source

Fixed bug in load_grammar. Improved nearley converter

tags/gm/2021-09-23T00Z/github.com--lark-parser-lark/0.5.1
Erez Shinan 9 years ago
parent
commit
3d4ee92829
2 changed files with 55 additions and 27 deletions
  1. +4
    -2
      lark/load_grammar.py
  2. +51
    -25
      tools/nearley.py

+ 4
- 2
lark/load_grammar.py View File

@@ -219,7 +219,6 @@ class SimplifyTree(InlineTransformer):
return tokenmods + [value] return tokenmods + [value]


def get_tokens(tree, token_set): def get_tokens(tree, token_set):
tokens = []
for t in tree.find_data('token'): for t in tree.find_data('token'):
x = t.children x = t.children
name = x[0].value name = x[0].value
@@ -266,10 +265,13 @@ class ExtractAnonTokens(InlineTransformer):
else: else:
assert False, token assert False, token


if value in self.token_reverse: # Kind of a wierd placement
token_name = self.token_reverse[value]

if token_name not in self.token_set: if token_name not in self.token_set:
self.token_set.add(token_name) self.token_set.add(token_name)
self.tokens.append((token_name, token, [])) self.tokens.append((token_name, token, []))
assert value not in self.token_reverse
assert value not in self.token_reverse, value
self.token_reverse[value] = token_name self.token_reverse[value] = token_name


return Token('TOKEN', token_name, -1) return Token('TOKEN', token_name, -1)


+ 51
- 25
tools/nearley.py View File

@@ -1,22 +1,32 @@
"Converts between Lark and Nearley grammars. Work in progress!" "Converts between Lark and Nearley grammars. Work in progress!"


import os.path
import sys

from lark import Lark, InlineTransformer from lark import Lark, InlineTransformer


nearley_grammar = r""" nearley_grammar = r"""
start: (ruledef|directive)+ start: (ruledef|directive)+


directive: "@" NAME STRING
directive: "@" NAME (STRING|NAME)
| "@" _JS -> js_code
ruledef: NAME "->" expansions ruledef: NAME "->" expansions
| NAME REGEXP "->" expansions -> macro
expansions: expansion ("|" expansion)* expansions: expansion ("|" expansion)*


expansion: (rule|string|regexp)+ _JS?
expansion: expr+ _JS?

?expr: item [":" /[+*?]/]

?item: rule|string|regexp
| "(" expansions ")"


rule: NAME rule: NAME
string: STRING string: STRING
regexp: REGEXP regexp: REGEXP
_JS: /(?s){%.*?%}/ _JS: /(?s){%.*?%}/


NAME: /[a-zA-Z_]\w*/
NAME: /[a-zA-Z_$]\w*/
WS.ignore: /[\t \f\n]+/ WS.ignore: /[\t \f\n]+/
COMMENT.ignore: /\#[^\n]*/ COMMENT.ignore: /\#[^\n]*/
REGEXP: /\[.*?\]/ REGEXP: /\[.*?\]/
@@ -27,13 +37,20 @@ nearley_grammar = r"""




class NearleyToLark(InlineTransformer): class NearleyToLark(InlineTransformer):
def __init__(self, builtin_path):
self.builtin_path = builtin_path


def rule(self, name): def rule(self, name):
return {'_': '_WS?', '__':'_WS'}.get(name, name)
# return {'_': '_WS?', '__':'_WS'}.get(name, name)
return {'_': '_ws_maybe', '__':'_ws'}.get(name, name)


def ruledef(self, name, exps): def ruledef(self, name, exps):
name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
return '%s: %s' % (name, exps) return '%s: %s' % (name, exps)


def expr(self, item, op):
return '(%s)%s' % (item, op)

def regexp(self, r): def regexp(self, r):
return '/%s/' % r return '/%s/' % r


@@ -45,35 +62,32 @@ class NearleyToLark(InlineTransformer):
return ' '.join(x) return ' '.join(x)


def expansions(self, *x): def expansions(self, *x):
return '\n |'.join(x)
return '(%s)' % ('\n |'.join(x))

def js_code(self):
return ''

def macro(self, *args):
return '' # TODO support macros?!


def directive(self, name, *args): def directive(self, name, *args):
if name == 'builtin': if name == 'builtin':
arg = args[0][1:-1] arg = args[0][1:-1]
if arg == 'whitespace.ne':
return r'_WS: /[ \t\n\v\f]/'
elif arg == 'number.ne':
return ('unsigned_int: DIGIT+\n'
'DIGIT: /\d/\n'
'decimal: "-"? DIGIT+ [/\./ DIGIT+] \n'
'percentage: decimal "%"\n'
)
# TODO
elif arg == 'postprocessors.ne':
pass
else:
assert False, arg
else:
assert False
pass
with open(os.path.join(self.builtin_path, arg)) as f:
text = f.read()
return nearley_to_lark(text, self.builtin_path)
elif name == 'preprocessor':
return ''

raise Exception('Unknown directive: %s' % name)


def start(self, *rules): def start(self, *rules):
return '\n'.join(filter(None, rules)) return '\n'.join(filter(None, rules))


def nearley_to_lark(g):
def nearley_to_lark(g, builtin_path):
parser = Lark(nearley_grammar) parser = Lark(nearley_grammar)
tree = parser.parse(g) tree = parser.parse(g)
return NearleyToLark().transform(tree)
return NearleyToLark(builtin_path).transform(tree)




def test(): def test():
@@ -112,7 +126,7 @@ def test():
function(d) {return Math.floor(d[0]*255); } function(d) {return Math.floor(d[0]*255); }
%} %}
""" """
converted_grammar = nearley_to_lark(css_example_grammar)
converted_grammar = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin')
print(converted_grammar) print(converted_grammar)


l = Lark(converted_grammar, start='csscolor', parser='earley_nolex') l = Lark(converted_grammar, start='csscolor', parser='earley_nolex')
@@ -120,5 +134,17 @@ def test():
print(l.parse('rgb(255, 70%, 3)').pretty()) print(l.parse('rgb(255, 70%, 3)').pretty())




def main():
try:
nearley_lib = sys.argv[1]
except IndexError:
print("Reads Nearley grammar from stdin and outputs a lark grammar.")
print("Usage: %s <nearley_lib_path>" % sys.argv[0])
return

grammar = sys.stdin.read()
print(nearley_to_lark(grammar, os.path.join(nearley_lib, 'builtin')))


if __name__ == '__main__': if __name__ == '__main__':
test()
main()

Loading…
Cancel
Save