@@ -0,0 +1,150 @@ |
"Converts between Lark and Nearley grammars. Work in progress!" |
import os.path |
import sys |
from lark import Lark, InlineTransformer |
nearley_grammar = r""" |
start: (ruledef|directive)+ |
directive: "@" NAME (STRING|NAME) |
| "@" _JS -> js_code |
ruledef: NAME "->" expansions |
| NAME REGEXP "->" expansions -> macro |
expansions: expansion ("|" expansion)* |
expansion: expr+ _JS? |
?expr: item [":" /[+*?]/] |
?item: rule|string|regexp |
| "(" expansions ")" |
rule: NAME |
string: STRING |
regexp: REGEXP |
_JS: /(?s){%.*?%}/ |
NAME: /[a-zA-Z_$]\w*/ |
WS.ignore: /[\t \f\n]+/ |
COMMENT.ignore: /\#[^\n]*/ |
REGEXP: /\[.*?\]/ |
STRING: /".*?"/ |
""" |
class NearleyToLark(InlineTransformer): |
def __init__(self, builtin_path): |
self.builtin_path = builtin_path |
def rule(self, name): |
# return {'_': '_WS?', '__':'_WS'}.get(name, name) |
return {'_': '_ws_maybe', '__':'_ws'}.get(name, name) |
def ruledef(self, name, exps): |
name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name) |
return '%s: %s' % (name, exps) |
def expr(self, item, op): |
return '(%s)%s' % (item, op) |
def regexp(self, r): |
return '/%s/' % r |
def string(self, s): |
# TODO allow regular strings, and split them in the parser frontend |
return ' '.join('"%s"'%ch for ch in s[1:-1]) |
def expansion(self, *x): |
return ' '.join(x) |
def expansions(self, *x): |
return '(%s)' % ('\n |'.join(x)) |
def js_code(self): |
return '' |
def macro(self, *args): |
return '' # TODO support macros?! |
def directive(self, name, *args): |
if name == 'builtin': |
arg = args[0][1:-1] |
with open(os.path.join(self.builtin_path, arg)) as f: |
text = f.read() |
return nearley_to_lark(text, self.builtin_path) |
elif name == 'preprocessor': |
return '' |
raise Exception('Unknown directive: %s' % name) |
def start(self, *rules): |
return '\n'.join(filter(None, rules)) |
def nearley_to_lark(g, builtin_path): |
parser = Lark(nearley_grammar) |
tree = parser.parse(g) |
return NearleyToLark(builtin_path).transform(tree) |
def test(): |
css_example_grammar = """ |
# http://www.w3.org/TR/css3-color/#colorunits |
@builtin "whitespace.ne" |
@builtin "number.ne" |
@builtin "postprocessors.ne" |
csscolor -> "#" hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit {% |
function(d) { |
return { |
"r": parseInt(d[1]+d[2], 16), |
"g": parseInt(d[3]+d[4], 16), |
"b": parseInt(d[5]+d[6], 16), |
} |
} |
%} |
| "#" hexdigit hexdigit hexdigit {% |
function(d) { |
return { |
"r": parseInt(d[1]+d[1], 16), |
"g": parseInt(d[2]+d[2], 16), |
"b": parseInt(d[3]+d[3], 16), |
} |
} |
%} |
| "rgb" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"r": 4, "g": 8, "b": 12}) %} |
| "hsl" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"h": 4, "s": 8, "l": 12}) %} |
| "rgba" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"r": 4, "g": 8, "b": 12, "a": 16}) %} |
| "hsla" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"h": 4, "s": 8, "l": 12, "a": 16}) %} |
hexdigit -> [a-fA-F0-9] |
colnum -> unsigned_int {% id %} | percentage {% |
function(d) {return Math.floor(d[0]*255); } |
%} |
""" |
converted_grammar = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin') |
print(converted_grammar) |
l = Lark(converted_grammar, start='csscolor', parser='earley_nolex') |
print(l.parse('#a199ff').pretty()) |
print(l.parse('rgb(255, 70%, 3)').pretty()) |
def main(): |
try: |
nearley_lib = sys.argv[1] |
except IndexError: |
print("Reads Nearley grammar from stdin and outputs a lark grammar.") |
print("Usage: %s <nearley_lib_path>" % sys.argv[0]) |
return |
grammar = sys.stdin.read() |
print(nearley_to_lark(grammar, os.path.join(nearley_lib, 'builtin'))) |
if __name__ == '__main__': |
main() |