This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

226 lines
6.5 KiB

  1. "Converts between Lark and Nearley grammars. Work in progress!"
  2. import os.path
  3. import sys
  4. from lark import Lark, InlineTransformer, Transformer
  5. nearley_grammar = r"""
  6. start: (ruledef|directive)+
  7. directive: "@" NAME (STRING|NAME)
  8. | "@" JS -> js_code
  9. ruledef: NAME "->" expansions
  10. | NAME REGEXP "->" expansions -> macro
  11. expansions: expansion ("|" expansion)*
  12. expansion: expr+ js
  13. ?expr: item [":" /[+*?]/]
  14. ?item: rule|string|regexp
  15. | "(" expansions ")"
  16. rule: NAME
  17. string: STRING
  18. regexp: REGEXP
  19. JS: /(?s){%.*?%}/
  20. js: JS?
  21. NAME: /[a-zA-Z_$]\w*/
  22. COMMENT: /\#[^\n]*/
  23. REGEXP: /\[.*?\]/
  24. STRING: /".*?"/
  25. %import common.WS
  26. %ignore WS
  27. %ignore COMMENT
  28. """
  29. nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard')
  30. def _get_rulename(name):
  31. name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
  32. return 'n_' + name.replace('$', '__DOLLAR__').lower()
  33. class NearleyToLark(InlineTransformer):
  34. def __init__(self):
  35. self._count = 0
  36. self.extra_rules = {}
  37. self.extra_rules_rev = {}
  38. self.alias_js_code = {}
  39. def _new_function(self, code):
  40. name = 'alias_%d' % self._count
  41. self._count += 1
  42. self.alias_js_code[name] = code
  43. return name
  44. def _extra_rule(self, rule):
  45. if rule in self.extra_rules_rev:
  46. return self.extra_rules_rev[rule]
  47. name = 'xrule_%d' % len(self.extra_rules)
  48. assert name not in self.extra_rules
  49. self.extra_rules[name] = rule
  50. self.extra_rules_rev[rule] = name
  51. return name
  52. def rule(self, name):
  53. return _get_rulename(name)
  54. def ruledef(self, name, exps):
  55. return '!%s: %s' % (_get_rulename(name), exps)
  56. def expr(self, item, op):
  57. rule = '(%s)%s' % (item, op)
  58. return self._extra_rule(rule)
  59. def regexp(self, r):
  60. return '/%s/' % r
  61. def string(self, s):
  62. return self._extra_rule(s)
  63. def expansion(self, *x):
  64. x, js = x[:-1], x[-1]
  65. if js.children:
  66. js_code ,= js.children
  67. js_code = js_code[2:-2]
  68. alias = '-> ' + self._new_function(js_code)
  69. else:
  70. alias = ''
  71. return ' '.join(x) + alias
  72. def expansions(self, *x):
  73. return '%s' % ('\n |'.join(x))
  74. def start(self, *rules):
  75. return '\n'.join(filter(None, rules))
  76. def _nearley_to_lark(g, builtin_path, n2l, js_code):
  77. rule_defs = []
  78. tree = nearley_grammar_parser.parse(g)
  79. for statement in tree.children:
  80. if statement.data == 'directive':
  81. directive, arg = statement.children
  82. if directive == 'builtin':
  83. with open(os.path.join(builtin_path, arg[1:-1])) as f:
  84. text = f.read()
  85. rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code)
  86. else:
  87. assert False, directive
  88. elif statement.data == 'js_code':
  89. code ,= statement.children
  90. code = code[2:-2]
  91. js_code.append(code)
  92. elif statement.data == 'macro':
  93. pass # TODO Add support for macros!
  94. elif statement.data == 'ruledef':
  95. rule_defs.append( n2l.transform(statement) )
  96. else:
  97. raise Exception("Unknown statement: %s" % statement)
  98. return rule_defs
  99. def create_code_for_nearley_grammar(g, start, builtin_path):
  100. import js2py
  101. emit_code = []
  102. def emit(x=None):
  103. if x:
  104. emit_code.append(x)
  105. emit_code.append('\n')
  106. js_code = ['function id(x) {return x[0];}']
  107. n2l = NearleyToLark()
  108. lark_g = '\n'.join(_nearley_to_lark(g, builtin_path, n2l, js_code))
  109. lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
  110. emit('from lark import Lark, Transformer')
  111. emit()
  112. emit('grammar = ' + repr(lark_g))
  113. emit()
  114. for alias, code in n2l.alias_js_code.items():
  115. js_code.append('%s = (%s);' % (alias, code))
  116. emit(js2py.translate_js('\n'.join(js_code)))
  117. emit('class TranformNearley(Transformer):')
  118. for alias in n2l.alias_js_code:
  119. emit(" %s = var.get('%s').to_python()" % (alias, alias))
  120. emit(" __default__ = lambda self, n, c: c if c else None")
  121. emit()
  122. emit('parser = Lark(grammar, start="n_%s")' % start)
  123. emit('def parse(text):')
  124. emit(' return TranformNearley().transform(parser.parse(text))')
  125. return ''.join(emit_code)
  126. def test():
  127. css_example_grammar = """
  128. # http://www.w3.org/TR/css3-color/#colorunits
  129. @builtin "whitespace.ne"
  130. @builtin "number.ne"
  131. @builtin "postprocessors.ne"
  132. csscolor -> "#" hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit {%
  133. function(d) {
  134. return {
  135. "r": parseInt(d[1]+d[2], 16),
  136. "g": parseInt(d[3]+d[4], 16),
  137. "b": parseInt(d[5]+d[6], 16),
  138. }
  139. }
  140. %}
  141. | "#" hexdigit hexdigit hexdigit {%
  142. function(d) {
  143. return {
  144. "r": parseInt(d[1]+d[1], 16),
  145. "g": parseInt(d[2]+d[2], 16),
  146. "b": parseInt(d[3]+d[3], 16),
  147. }
  148. }
  149. %}
  150. | "rgb" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"r": 4, "g": 8, "b": 12}) %}
  151. | "hsl" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"h": 4, "s": 8, "l": 12}) %}
  152. | "rgba" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"r": 4, "g": 8, "b": 12, "a": 16}) %}
  153. | "hsla" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"h": 4, "s": 8, "l": 12, "a": 16}) %}
  154. hexdigit -> [a-fA-F0-9]
  155. colnum -> unsigned_int {% id %} | percentage {%
  156. function(d) {return Math.floor(d[0]*255); }
  157. %}
  158. """
  159. code = create_code_for_nearley_grammar(css_example_grammar, 'csscolor', '/home/erez/nearley/builtin')
  160. d = {}
  161. exec (code, d)
  162. parse = d['parse']
  163. print(parse('#a199ff'))
  164. print(parse('rgb(255, 70%, 3)'))
  165. def main():
  166. if len(sys.argv) < 3:
  167. print("Reads Nearley grammar (with js functions) outputs an equivalent lark parser.")
  168. print("Usage: %s <nearley_grammar_path> <start_rule> <nearley_lib_path>" % sys.argv[0])
  169. return
  170. fn, start, nearley_lib = sys.argv[1:]
  171. with open(fn) as f:
  172. grammar = f.read()
  173. print(create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin')))
  174. if __name__ == '__main__':
  175. main()
  176. # test()