This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

202 行
6.1 KiB

  1. "Converts Nearley grammars to Lark"
  2. import os.path
  3. import sys
  4. import codecs
  5. import argparse
  6. from lark import Lark, InlineTransformer
  7. nearley_grammar = r"""
  8. start: (ruledef|directive)+
  9. directive: "@" NAME (STRING|NAME)
  10. | "@" JS -> js_code
  11. ruledef: NAME "->" expansions
  12. | NAME REGEXP "->" expansions -> macro
  13. expansions: expansion ("|" expansion)*
  14. expansion: expr+ js
  15. ?expr: item (":" /[+*?]/)?
  16. ?item: rule|string|regexp|null
  17. | "(" expansions ")"
  18. rule: NAME
  19. string: STRING
  20. regexp: REGEXP
  21. null: "null"
  22. JS: /{%.*?%}/s
  23. js: JS?
  24. NAME: /[a-zA-Z_$]\w*/
  25. COMMENT: /#[^\n]*/
  26. REGEXP: /\[.*?\]/
  27. STRING: _STRING "i"?
  28. %import common.ESCAPED_STRING -> _STRING
  29. %import common.WS
  30. %ignore WS
  31. %ignore COMMENT
  32. """
  33. nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard')
  34. def _get_rulename(name):
  35. name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
  36. return 'n_' + name.replace('$', '__DOLLAR__').lower()
  37. class NearleyToLark(InlineTransformer):
  38. def __init__(self):
  39. self._count = 0
  40. self.extra_rules = {}
  41. self.extra_rules_rev = {}
  42. self.alias_js_code = {}
  43. def _new_function(self, code):
  44. name = 'alias_%d' % self._count
  45. self._count += 1
  46. self.alias_js_code[name] = code
  47. return name
  48. def _extra_rule(self, rule):
  49. if rule in self.extra_rules_rev:
  50. return self.extra_rules_rev[rule]
  51. name = 'xrule_%d' % len(self.extra_rules)
  52. assert name not in self.extra_rules
  53. self.extra_rules[name] = rule
  54. self.extra_rules_rev[rule] = name
  55. return name
  56. def rule(self, name):
  57. return _get_rulename(name)
  58. def ruledef(self, name, exps):
  59. return '!%s: %s' % (_get_rulename(name), exps)
  60. def expr(self, item, op):
  61. rule = '(%s)%s' % (item, op)
  62. return self._extra_rule(rule)
  63. def regexp(self, r):
  64. return '/%s/' % r
  65. def null(self):
  66. return ''
  67. def string(self, s):
  68. return self._extra_rule(s)
  69. def expansion(self, *x):
  70. x, js = x[:-1], x[-1]
  71. if js.children:
  72. js_code ,= js.children
  73. js_code = js_code[2:-2]
  74. alias = '-> ' + self._new_function(js_code)
  75. else:
  76. alias = ''
  77. return ' '.join(x) + alias
  78. def expansions(self, *x):
  79. return '%s' % ('\n |'.join(x))
  80. def start(self, *rules):
  81. return '\n'.join(filter(None, rules))
  82. def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes):
  83. rule_defs = []
  84. tree = nearley_grammar_parser.parse(g)
  85. for statement in tree.children:
  86. if statement.data == 'directive':
  87. directive, arg = statement.children
  88. if directive in ('builtin', 'include'):
  89. folder = builtin_path if directive == 'builtin' else folder_path
  90. path = os.path.join(folder, arg[1:-1])
  91. if path not in includes:
  92. includes.add(path)
  93. with codecs.open(path, encoding='utf8') as f:
  94. text = f.read()
  95. rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes)
  96. else:
  97. assert False, directive
  98. elif statement.data == 'js_code':
  99. code ,= statement.children
  100. code = code[2:-2]
  101. js_code.append(code)
  102. elif statement.data == 'macro':
  103. pass # TODO Add support for macros!
  104. elif statement.data == 'ruledef':
  105. rule_defs.append( n2l.transform(statement) )
  106. else:
  107. raise Exception("Unknown statement: %s" % statement)
  108. return rule_defs
  109. def create_code_for_nearley_grammar(g, start, builtin_path, folder_path, es6=False):
  110. import js2py
  111. emit_code = []
  112. def emit(x=None):
  113. if x:
  114. emit_code.append(x)
  115. emit_code.append('\n')
  116. js_code = ['function id(x) {return x[0];}']
  117. n2l = NearleyToLark()
  118. rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set())
  119. lark_g = '\n'.join(rule_defs)
  120. lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
  121. emit('from lark import Lark, Transformer')
  122. emit()
  123. emit('grammar = ' + repr(lark_g))
  124. emit()
  125. for alias, code in n2l.alias_js_code.items():
  126. js_code.append('%s = (%s);' % (alias, code))
  127. if es6:
  128. emit(js2py.translate_js6('\n'.join(js_code)))
  129. else:
  130. emit(js2py.translate_js('\n'.join(js_code)))
  131. emit('class TransformNearley(Transformer):')
  132. for alias in n2l.alias_js_code:
  133. emit(" %s = var.get('%s').to_python()" % (alias, alias))
  134. emit(" __default__ = lambda self, n, c, m: c if c else None")
  135. emit()
  136. emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start)
  137. emit('def parse(text):')
  138. emit(' return TransformNearley().transform(parser.parse(text))')
  139. return ''.join(emit_code)
  140. def main(fn, start, nearley_lib, es6=False):
  141. with codecs.open(fn, encoding='utf8') as f:
  142. grammar = f.read()
  143. return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)), es6=es6)
  144. def get_arg_parser():
  145. parser = argparse.ArgumentParser(description='Reads a Nearley grammar (with js functions), and outputs an equivalent lark parser.')
  146. parser.add_argument('nearley_grammar', help='Path to the file containing the nearley grammar')
  147. parser.add_argument('start_rule', help='Rule within the nearley grammar to make the base rule')
  148. parser.add_argument('nearley_lib', help='Path to root directory of nearley codebase (used for including builtins)')
  149. parser.add_argument('--es6', help='Enable experimental ES6 support', action='store_true')
  150. return parser
  151. if __name__ == '__main__':
  152. parser = get_arg_parser()
  153. if len(sys.argv)==1:
  154. parser.print_help(sys.stderr)
  155. sys.exit(1)
  156. args = parser.parse_args()
  157. print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6))