This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

207 lines
5.9 KiB

  1. "Converts between Lark and Nearley grammars. Work in progress!"
  2. import os.path
  3. import sys
  4. import js2py
  5. from lark import Lark, InlineTransformer, Transformer
  6. nearley_grammar = r"""
  7. start: (ruledef|directive)+
  8. directive: "@" NAME (STRING|NAME)
  9. | "@" JS -> js_code
  10. ruledef: NAME "->" expansions
  11. | NAME REGEXP "->" expansions -> macro
  12. expansions: expansion ("|" expansion)*
  13. expansion: expr+ js
  14. ?expr: item [":" /[+*?]/]
  15. ?item: rule|string|regexp
  16. | "(" expansions ")"
  17. rule: NAME
  18. string: STRING
  19. regexp: REGEXP
  20. JS: /(?s){%.*?%}/
  21. js: JS?
  22. NAME: /[a-zA-Z_$]\w*/
  23. COMMENT: /\#[^\n]*/
  24. REGEXP: /\[.*?\]/
  25. STRING: /".*?"/
  26. %import common.WS
  27. %ignore WS
  28. %ignore COMMENT
  29. """
  30. nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard')
  31. def _get_rulename(name):
  32. name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
  33. return 'n_' + name.replace('$', '__DOLLAR__')
  34. class NearleyToLark(InlineTransformer):
  35. def __init__(self, context):
  36. self.context = context
  37. self.functions = {}
  38. self.extra_rules = {}
  39. def _new_function(self, code):
  40. n = len(self.functions)
  41. name = 'alias_%d' % n
  42. assert name not in self.functions
  43. code = "%s = (%s);" % (name, code)
  44. self.context.execute(code)
  45. f = getattr(self.context, name)
  46. self.functions[name] = f
  47. return name
  48. def _extra_rule(self, rule):
  49. name = 'xrule_%d' % len(self.extra_rules)
  50. assert name not in self.extra_rules
  51. self.extra_rules[name] = rule
  52. return name
  53. def rule(self, name):
  54. return _get_rulename(name)
  55. def ruledef(self, name, exps):
  56. return '!%s: %s' % (_get_rulename(name), exps)
  57. def expr(self, item, op):
  58. rule = '(%s)%s' % (item, op)
  59. return self._extra_rule(rule)
  60. def regexp(self, r):
  61. return '/%s/' % r
  62. def string(self, s):
  63. return self._extra_rule(s)
  64. def expansion(self, *x):
  65. x, js = x[:-1], x[-1]
  66. if js.children:
  67. js_code ,= js.children
  68. js_code = js_code[2:-2]
  69. alias = '-> ' + self._new_function(js_code)
  70. else:
  71. alias = ''
  72. return ' '.join(x) + alias
  73. def expansions(self, *x):
  74. return '%s' % ('\n |'.join(x))
  75. def start(self, *rules):
  76. return '\n'.join(filter(None, rules))
  77. def _nearley_to_lark(g, builtin_path, n2l):
  78. rule_defs = []
  79. tree = nearley_grammar_parser.parse(g)
  80. for statement in tree.children:
  81. if statement.data == 'directive':
  82. directive, arg = statement.children
  83. if directive == 'builtin':
  84. with open(os.path.join(builtin_path, arg[1:-1])) as f:
  85. text = f.read()
  86. rule_defs += _nearley_to_lark(text, builtin_path, n2l)
  87. else:
  88. assert False, directive
  89. elif statement.data == 'js_code':
  90. code ,= statement.children
  91. code = code[2:-2]
  92. n2l.context.execute(code)
  93. elif statement.data == 'macro':
  94. pass # TODO Add support for macros!
  95. elif statement.data == 'ruledef':
  96. rule_defs.append( n2l.transform(statement) )
  97. else:
  98. raise Exception("Unknown statement: %s" % statement)
  99. return rule_defs
  100. def nearley_to_lark(g, builtin_path, context):
  101. n2l = NearleyToLark(context)
  102. lark_g = '\n'.join(_nearley_to_lark(g, builtin_path, n2l))
  103. lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
  104. t = Transformer()
  105. for fname, fcode in n2l.functions.items():
  106. setattr(t, fname, fcode)
  107. setattr(t, '__default__', lambda n, c: c if c else None)
  108. return lark_g, t
  109. def test():
  110. css_example_grammar = """
  111. # http://www.w3.org/TR/css3-color/#colorunits
  112. @builtin "whitespace.ne"
  113. @builtin "number.ne"
  114. @builtin "postprocessors.ne"
  115. csscolor -> "#" hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit {%
  116. function(d) {
  117. return {
  118. "r": parseInt(d[1]+d[2], 16),
  119. "g": parseInt(d[3]+d[4], 16),
  120. "b": parseInt(d[5]+d[6], 16),
  121. }
  122. }
  123. %}
  124. | "#" hexdigit hexdigit hexdigit {%
  125. function(d) {
  126. return {
  127. "r": parseInt(d[1]+d[1], 16),
  128. "g": parseInt(d[2]+d[2], 16),
  129. "b": parseInt(d[3]+d[3], 16),
  130. }
  131. }
  132. %}
  133. | "rgb" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"r": 4, "g": 8, "b": 12}) %}
  134. | "hsl" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"h": 4, "s": 8, "l": 12}) %}
  135. | "rgba" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"r": 4, "g": 8, "b": 12, "a": 16}) %}
  136. | "hsla" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"h": 4, "s": 8, "l": 12, "a": 16}) %}
  137. hexdigit -> [a-fA-F0-9]
  138. colnum -> unsigned_int {% id %} | percentage {%
  139. function(d) {return Math.floor(d[0]*255); }
  140. %}
  141. """
  142. context = js2py.EvalJs()
  143. context.execute('function id(x) {return x[0]; }')
  144. converted_grammar, t = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin', context)
  145. # print(converted_grammar)
  146. l = Lark(converted_grammar, start='n_csscolor')
  147. tree = l.parse('#a199ff')
  148. print(t.transform(tree))
  149. tree = l.parse('rgb(255, 70%, 3)')
  150. print(t.transform(tree))
  151. def main():
  152. try:
  153. nearley_lib = sys.argv[1]
  154. except IndexError:
  155. print("Reads Nearley grammar from stdin and outputs a lark grammar.")
  156. print("Usage: %s <nearley_lib_path>" % sys.argv[0])
  157. return
  158. grammar = sys.stdin.read()
  159. print(nearley_to_lark(grammar, os.path.join(nearley_lib, 'builtin')))
  160. if __name__ == '__main__':
  161. main()
  162. # test()