This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

203 lines
6.1 KiB

  1. "Converts Nearley grammars to Lark"
  2. import os.path
  3. import sys
  4. import codecs
  5. import argparse
  6. from lark import Lark, Transformer, v_args
  7. nearley_grammar = r"""
  8. start: (ruledef|directive)+
  9. directive: "@" NAME (STRING|NAME)
  10. | "@" JS -> js_code
  11. ruledef: NAME "->" expansions
  12. | NAME REGEXP "->" expansions -> macro
  13. expansions: expansion ("|" expansion)*
  14. expansion: expr+ js
  15. ?expr: item (":" /[+*?]/)?
  16. ?item: rule|string|regexp|null
  17. | "(" expansions ")"
  18. rule: NAME
  19. string: STRING
  20. regexp: REGEXP
  21. null: "null"
  22. JS: /{%.*?%}/s
  23. js: JS?
  24. NAME: /[a-zA-Z_$]\w*/
  25. COMMENT: /#[^\n]*/
  26. REGEXP: /\[.*?\]/
  27. STRING: _STRING "i"?
  28. %import common.ESCAPED_STRING -> _STRING
  29. %import common.WS
  30. %ignore WS
  31. %ignore COMMENT
  32. """
  33. nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard')
  34. def _get_rulename(name):
  35. name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
  36. return 'n_' + name.replace('$', '__DOLLAR__').lower()
  37. @v_args(inline=True)
  38. class NearleyToLark(Transformer):
  39. def __init__(self):
  40. self._count = 0
  41. self.extra_rules = {}
  42. self.extra_rules_rev = {}
  43. self.alias_js_code = {}
  44. def _new_function(self, code):
  45. name = 'alias_%d' % self._count
  46. self._count += 1
  47. self.alias_js_code[name] = code
  48. return name
  49. def _extra_rule(self, rule):
  50. if rule in self.extra_rules_rev:
  51. return self.extra_rules_rev[rule]
  52. name = 'xrule_%d' % len(self.extra_rules)
  53. assert name not in self.extra_rules
  54. self.extra_rules[name] = rule
  55. self.extra_rules_rev[rule] = name
  56. return name
  57. def rule(self, name):
  58. return _get_rulename(name)
  59. def ruledef(self, name, exps):
  60. return '!%s: %s' % (_get_rulename(name), exps)
  61. def expr(self, item, op):
  62. rule = '(%s)%s' % (item, op)
  63. return self._extra_rule(rule)
  64. def regexp(self, r):
  65. return '/%s/' % r
  66. def null(self):
  67. return ''
  68. def string(self, s):
  69. return self._extra_rule(s)
  70. def expansion(self, *x):
  71. x, js = x[:-1], x[-1]
  72. if js.children:
  73. js_code ,= js.children
  74. js_code = js_code[2:-2]
  75. alias = '-> ' + self._new_function(js_code)
  76. else:
  77. alias = ''
  78. return ' '.join(x) + alias
  79. def expansions(self, *x):
  80. return '%s' % ('\n |'.join(x))
  81. def start(self, *rules):
  82. return '\n'.join(filter(None, rules))
  83. def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes):
  84. rule_defs = []
  85. tree = nearley_grammar_parser.parse(g)
  86. for statement in tree.children:
  87. if statement.data == 'directive':
  88. directive, arg = statement.children
  89. if directive in ('builtin', 'include'):
  90. folder = builtin_path if directive == 'builtin' else folder_path
  91. path = os.path.join(folder, arg[1:-1])
  92. if path not in includes:
  93. includes.add(path)
  94. with codecs.open(path, encoding='utf8') as f:
  95. text = f.read()
  96. rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes)
  97. else:
  98. assert False, directive
  99. elif statement.data == 'js_code':
  100. code ,= statement.children
  101. code = code[2:-2]
  102. js_code.append(code)
  103. elif statement.data == 'macro':
  104. pass # TODO Add support for macros!
  105. elif statement.data == 'ruledef':
  106. rule_defs.append( n2l.transform(statement) )
  107. else:
  108. raise Exception("Unknown statement: %s" % statement)
  109. return rule_defs
  110. def create_code_for_nearley_grammar(g, start, builtin_path, folder_path, es6=False):
  111. import js2py
  112. emit_code = []
  113. def emit(x=None):
  114. if x:
  115. emit_code.append(x)
  116. emit_code.append('\n')
  117. js_code = ['function id(x) {return x[0];}']
  118. n2l = NearleyToLark()
  119. rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set())
  120. lark_g = '\n'.join(rule_defs)
  121. lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
  122. emit('from lark import Lark, Transformer')
  123. emit()
  124. emit('grammar = ' + repr(lark_g))
  125. emit()
  126. for alias, code in n2l.alias_js_code.items():
  127. js_code.append('%s = (%s);' % (alias, code))
  128. if es6:
  129. emit(js2py.translate_js6('\n'.join(js_code)))
  130. else:
  131. emit(js2py.translate_js('\n'.join(js_code)))
  132. emit('class TransformNearley(Transformer):')
  133. for alias in n2l.alias_js_code:
  134. emit(" %s = var.get('%s').to_python()" % (alias, alias))
  135. emit(" __default__ = lambda self, n, c, m: c if c else None")
  136. emit()
  137. emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start)
  138. emit('def parse(text):')
  139. emit(' return TransformNearley().transform(parser.parse(text))')
  140. return ''.join(emit_code)
  141. def main(fn, start, nearley_lib, es6=False):
  142. with codecs.open(fn, encoding='utf8') as f:
  143. grammar = f.read()
  144. return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)), es6=es6)
  145. def get_arg_parser():
  146. parser = argparse.ArgumentParser(description='Reads a Nearley grammar (with js functions), and outputs an equivalent lark parser.')
  147. parser.add_argument('nearley_grammar', help='Path to the file containing the nearley grammar')
  148. parser.add_argument('start_rule', help='Rule within the nearley grammar to make the base rule')
  149. parser.add_argument('nearley_lib', help='Path to root directory of nearley codebase (used for including builtins)')
  150. parser.add_argument('--es6', help='Enable experimental ES6 support', action='store_true')
  151. return parser
  152. if __name__ == '__main__':
  153. parser = get_arg_parser()
  154. if len(sys.argv)==1:
  155. parser.print_help(sys.stderr)
  156. sys.exit(1)
  157. args = parser.parse_args()
  158. print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6))