Added Nearley-to-lark converter

7 anni fa · 387b701670
--- a/lark/common.py
+++ b/lark/common.py
@@ -13,10 +13,13 @@ class UnexpectedToken(ParseError):
        self.line = getattr(token, 'line', '?')
        self.column = getattr(token, 'column', '?')

        context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
        try:
            context = ' '.join(['%r(%s)' % (t.value, t.type) for t in seq[index:index+5]])
        except AttributeError:
            context = seq[index:index+5]
        message = ("Unexpected token %r at line %s, column %s.\n"
                   "Expected: %s\n"
                   "Context: %s" % (token.value, self.line, self.column, expected, context))
                   "Context: %s" % (token, self.line, self.column, expected, context))

        super(ParseError, self).__init__(message)

--- a/tools/init.py
+++ b/tools/init.py
--- a/tools/nearley.py
+++ b/tools/nearley.py
@@ -0,0 +1,124 @@
 "Converts between Lark and Nearley grammars. Work in progress!"

 from lark import Lark, InlineTransformer

 nearley_grammar = r"""
    start: (ruledef|directive)+

    directive: "@" NAME STRING
    ruledef: NAME "->" expansions
    expansions: expansion ("|" expansion)*

    expansion: (rule|string|regexp)+ _JS?

    rule: NAME
    string: STRING
    regexp: REGEXP
    _JS: /(?s){%.*?%}/

    NAME: /[a-zA-Z_]\w*/
    WS.ignore: /[\t \f\n]+/
    COMMENT.ignore: /\#[^\n]*/
    REGEXP: /\[.*?\]/
    STRING: /".*?"/

    """



 class NearleyToLark(InlineTransformer):

    def rule(self, name):
        return {'_': '_WS?', '__':'_WS'}.get(name, name)

    def ruledef(self, name, exps):
        return '%s: %s' % (name, exps)

    def regexp(self, r):
        return '/%s/' % r

    def string(self, s):
        # TODO allow regular strings, and split them in the parser frontend
        return ' '.join('"%s"'%ch for ch in s[1:-1])

    def expansion(self, *x):
        return ' '.join(x)

    def expansions(self, *x):
        return '\n    |'.join(x)

    def directive(self, name, *args):
        if name == 'builtin':
            arg = args[0][1:-1]
            if arg == 'whitespace.ne':
                return r'_WS: /[ \t\n\v\f]/'
            elif arg == 'number.ne':
                return ('unsigned_int: DIGIT+\n'
                        'DIGIT: /\d/\n'
                        'decimal: "-"? DIGIT+ [/\./ DIGIT+] \n'
                        'percentage: decimal "%"\n'
                        )
                # TODO
            elif arg == 'postprocessors.ne':
                pass
            else:
                assert False, arg
        else:
            assert False
        pass

    def start(self, *rules):
        return '\n'.join(filter(None, rules))

 def nearley_to_lark(g):
    parser = Lark(nearley_grammar)
    tree = parser.parse(g)
    return NearleyToLark().transform(tree)


 def test():
    css_example_grammar = """
 # http://www.w3.org/TR/css3-color/#colorunits

    @builtin "whitespace.ne"
    @builtin "number.ne"
    @builtin "postprocessors.ne"

    csscolor -> "#" hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit {%
        function(d) {
            return {
                "r": parseInt(d[1]+d[2], 16),
                "g": parseInt(d[3]+d[4], 16),
                "b": parseInt(d[5]+d[6], 16),
            }
        }
    %}
              | "#" hexdigit hexdigit hexdigit {%
        function(d) {
            return {
                "r": parseInt(d[1]+d[1], 16),
                "g": parseInt(d[2]+d[2], 16),
                "b": parseInt(d[3]+d[3], 16),
            }
        }
    %}
              | "rgb"  _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"r": 4, "g": 8, "b": 12}) %}
              | "hsl"  _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"h": 4, "s": 8, "l": 12}) %}
              | "rgba" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"r": 4, "g": 8, "b": 12, "a": 16}) %}
              | "hsla" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"h": 4, "s": 8, "l": 12, "a": 16}) %}

    hexdigit -> [a-fA-F0-9]
    colnum -> unsigned_int {% id %} | percentage {%
        function(d) {return Math.floor(d[0]*255); }
    %}
    """
    converted_grammar = nearley_to_lark(css_example_grammar)
    print converted_grammar

    l = Lark(converted_grammar, start='csscolor', parser='earley_nolex')
    print l.parse('#a199ff').pretty()
    print l.parse('rgb(255, 70%, 3)').pretty()


 if __name__ == '__main__':
    test()