From b7dcfbca5870c33373790d55b6471bd332a269ab Mon Sep 17 00:00:00 2001 From: Erez Shinan Date: Wed, 15 Feb 2017 14:50:47 +0200 Subject: [PATCH] Tools can now be installed via pip --- lark/__init__.py | 2 +- lark/tools/__init__.py | 0 lark/tools/nearley.py | 150 +++++++++++++++++++++++++++++++++++++++++ setup.py | 2 +- 4 files changed, 152 insertions(+), 2 deletions(-) create mode 100644 lark/tools/__init__.py create mode 100644 lark/tools/nearley.py diff --git a/lark/__init__.py b/lark/__init__.py index 69b5730..c7dd915 100644 --- a/lark/__init__.py +++ b/lark/__init__.py @@ -3,4 +3,4 @@ from .common import ParseError, GrammarError from .lark import Lark from .utils import inline_args -__version__ = "0.1.1" +__version__ = "0.1.2" diff --git a/lark/tools/__init__.py b/lark/tools/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lark/tools/nearley.py b/lark/tools/nearley.py new file mode 100644 index 0000000..b78d3b8 --- /dev/null +++ b/lark/tools/nearley.py @@ -0,0 +1,150 @@ +"Converts between Lark and Nearley grammars. Work in progress!" + +import os.path +import sys + +from lark import Lark, InlineTransformer + +nearley_grammar = r""" + start: (ruledef|directive)+ + + directive: "@" NAME (STRING|NAME) + | "@" _JS -> js_code + ruledef: NAME "->" expansions + | NAME REGEXP "->" expansions -> macro + expansions: expansion ("|" expansion)* + + expansion: expr+ _JS? + + ?expr: item [":" /[+*?]/] + + ?item: rule|string|regexp + | "(" expansions ")" + + rule: NAME + string: STRING + regexp: REGEXP + _JS: /(?s){%.*?%}/ + + NAME: /[a-zA-Z_$]\w*/ + WS.ignore: /[\t \f\n]+/ + COMMENT.ignore: /\#[^\n]*/ + REGEXP: /\[.*?\]/ + STRING: /".*?"/ + + """ + + + +class NearleyToLark(InlineTransformer): + def __init__(self, builtin_path): + self.builtin_path = builtin_path + + def rule(self, name): + # return {'_': '_WS?', '__':'_WS'}.get(name, name) + return {'_': '_ws_maybe', '__':'_ws'}.get(name, name) + + def ruledef(self, name, exps): + name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name) + return '%s: %s' % (name, exps) + + def expr(self, item, op): + return '(%s)%s' % (item, op) + + def regexp(self, r): + return '/%s/' % r + + def string(self, s): + # TODO allow regular strings, and split them in the parser frontend + return ' '.join('"%s"'%ch for ch in s[1:-1]) + + def expansion(self, *x): + return ' '.join(x) + + def expansions(self, *x): + return '(%s)' % ('\n |'.join(x)) + + def js_code(self): + return '' + + def macro(self, *args): + return '' # TODO support macros?! + + def directive(self, name, *args): + if name == 'builtin': + arg = args[0][1:-1] + with open(os.path.join(self.builtin_path, arg)) as f: + text = f.read() + return nearley_to_lark(text, self.builtin_path) + elif name == 'preprocessor': + return '' + + raise Exception('Unknown directive: %s' % name) + + def start(self, *rules): + return '\n'.join(filter(None, rules)) + +def nearley_to_lark(g, builtin_path): + parser = Lark(nearley_grammar) + tree = parser.parse(g) + return NearleyToLark(builtin_path).transform(tree) + + +def test(): + css_example_grammar = """ +# http://www.w3.org/TR/css3-color/#colorunits + + @builtin "whitespace.ne" + @builtin "number.ne" + @builtin "postprocessors.ne" + + csscolor -> "#" hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit {% + function(d) { + return { + "r": parseInt(d[1]+d[2], 16), + "g": parseInt(d[3]+d[4], 16), + "b": parseInt(d[5]+d[6], 16), + } + } + %} + | "#" hexdigit hexdigit hexdigit {% + function(d) { + return { + "r": parseInt(d[1]+d[1], 16), + "g": parseInt(d[2]+d[2], 16), + "b": parseInt(d[3]+d[3], 16), + } + } + %} + | "rgb" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"r": 4, "g": 8, "b": 12}) %} + | "hsl" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"h": 4, "s": 8, "l": 12}) %} + | "rgba" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"r": 4, "g": 8, "b": 12, "a": 16}) %} + | "hsla" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"h": 4, "s": 8, "l": 12, "a": 16}) %} + + hexdigit -> [a-fA-F0-9] + colnum -> unsigned_int {% id %} | percentage {% + function(d) {return Math.floor(d[0]*255); } + %} + """ + converted_grammar = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin') + print(converted_grammar) + + l = Lark(converted_grammar, start='csscolor', parser='earley_nolex') + print(l.parse('#a199ff').pretty()) + print(l.parse('rgb(255, 70%, 3)').pretty()) + + +def main(): + try: + nearley_lib = sys.argv[1] + except IndexError: + print("Reads Nearley grammar from stdin and outputs a lark grammar.") + print("Usage: %s " % sys.argv[0]) + return + + grammar = sys.stdin.read() + print(nearley_to_lark(grammar, os.path.join(nearley_lib, 'builtin'))) + + +if __name__ == '__main__': + main() diff --git a/setup.py b/setup.py index 49db097..ae065fe 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ __version__ ,= re.findall('__version__ = "(.*)"', open('lark/__init__.py').read( setup( name = "lark-parser", version = __version__, - packages = ['lark', 'lark.parsers'], + packages = ['lark', 'lark.parsers', 'lark.tools'], requires = [], install_requires = [],