This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

187 行
5.4 KiB

  1. "Converts between Lark and Nearley grammars. Work in progress!"
  2. import os.path
  3. import sys
  4. import codecs
  5. from lark import Lark, InlineTransformer
  6. nearley_grammar = r"""
  7. start: (ruledef|directive)+
  8. directive: "@" NAME (STRING|NAME)
  9. | "@" JS -> js_code
  10. ruledef: NAME "->" expansions
  11. | NAME REGEXP "->" expansions -> macro
  12. expansions: expansion ("|" expansion)*
  13. expansion: expr+ js
  14. ?expr: item [":" /[+*?]/]
  15. ?item: rule|string|regexp
  16. | "(" expansions ")"
  17. rule: NAME
  18. string: STRING
  19. regexp: REGEXP
  20. JS: /{%.*?%}/s
  21. js: JS?
  22. NAME: /[a-zA-Z_$]\w*/
  23. COMMENT: /#[^\n]*/
  24. REGEXP: /\[.*?\]/
  25. STRING: /".*?"/
  26. %import common.WS
  27. %ignore WS
  28. %ignore COMMENT
  29. """
  30. nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard')
  31. def _get_rulename(name):
  32. name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
  33. return 'n_' + name.replace('$', '__DOLLAR__').lower()
  34. class NearleyToLark(InlineTransformer):
  35. def __init__(self):
  36. self._count = 0
  37. self.extra_rules = {}
  38. self.extra_rules_rev = {}
  39. self.alias_js_code = {}
  40. def _new_function(self, code):
  41. name = 'alias_%d' % self._count
  42. self._count += 1
  43. self.alias_js_code[name] = code
  44. return name
  45. def _extra_rule(self, rule):
  46. if rule in self.extra_rules_rev:
  47. return self.extra_rules_rev[rule]
  48. name = 'xrule_%d' % len(self.extra_rules)
  49. assert name not in self.extra_rules
  50. self.extra_rules[name] = rule
  51. self.extra_rules_rev[rule] = name
  52. return name
  53. def rule(self, name):
  54. return _get_rulename(name)
  55. def ruledef(self, name, exps):
  56. return '!%s: %s' % (_get_rulename(name), exps)
  57. def expr(self, item, op):
  58. rule = '(%s)%s' % (item, op)
  59. return self._extra_rule(rule)
  60. def regexp(self, r):
  61. return '/%s/' % r
  62. def string(self, s):
  63. return self._extra_rule(s)
  64. def expansion(self, *x):
  65. x, js = x[:-1], x[-1]
  66. if js.children:
  67. js_code ,= js.children
  68. js_code = js_code[2:-2]
  69. alias = '-> ' + self._new_function(js_code)
  70. else:
  71. alias = ''
  72. return ' '.join(x) + alias
  73. def expansions(self, *x):
  74. return '%s' % ('\n |'.join(x))
  75. def start(self, *rules):
  76. return '\n'.join(filter(None, rules))
  77. def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes):
  78. rule_defs = []
  79. tree = nearley_grammar_parser.parse(g)
  80. for statement in tree.children:
  81. if statement.data == 'directive':
  82. directive, arg = statement.children
  83. if directive in ('builtin', 'include'):
  84. folder = builtin_path if directive == 'builtin' else folder_path
  85. path = os.path.join(folder, arg[1:-1])
  86. if path not in includes:
  87. includes.add(path)
  88. with codecs.open(path, encoding='utf8') as f:
  89. text = f.read()
  90. rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes)
  91. else:
  92. assert False, directive
  93. elif statement.data == 'js_code':
  94. code ,= statement.children
  95. code = code[2:-2]
  96. js_code.append(code)
  97. elif statement.data == 'macro':
  98. pass # TODO Add support for macros!
  99. elif statement.data == 'ruledef':
  100. rule_defs.append( n2l.transform(statement) )
  101. else:
  102. raise Exception("Unknown statement: %s" % statement)
  103. return rule_defs
  104. def create_code_for_nearley_grammar(g, start, builtin_path, folder_path):
  105. import js2py
  106. emit_code = []
  107. def emit(x=None):
  108. if x:
  109. emit_code.append(x)
  110. emit_code.append('\n')
  111. js_code = ['function id(x) {return x[0];}']
  112. n2l = NearleyToLark()
  113. rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set())
  114. lark_g = '\n'.join(rule_defs)
  115. lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
  116. emit('from lark import Lark, Transformer')
  117. emit()
  118. emit('grammar = ' + repr(lark_g))
  119. emit()
  120. for alias, code in n2l.alias_js_code.items():
  121. js_code.append('%s = (%s);' % (alias, code))
  122. emit(js2py.translate_js('\n'.join(js_code)))
  123. emit('class TransformNearley(Transformer):')
  124. for alias in n2l.alias_js_code:
  125. emit(" %s = var.get('%s').to_python()" % (alias, alias))
  126. emit(" __default__ = lambda self, n, c: c if c else None")
  127. emit()
  128. emit('parser = Lark(grammar, start="n_%s")' % start)
  129. emit('def parse(text):')
  130. emit(' return TransformNearley().transform(parser.parse(text))')
  131. return ''.join(emit_code)
  132. def main(fn, start, nearley_lib):
  133. with codecs.open(fn, encoding='utf8') as f:
  134. grammar = f.read()
  135. return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)))
  136. if __name__ == '__main__':
  137. if len(sys.argv) < 4:
  138. print("Reads Nearley grammar (with js functions) outputs an equivalent lark parser.")
  139. print("Usage: %s <nearley_grammar_path> <start_rule> <nearley_lib_path>" % sys.argv[0])
  140. sys.exit(1)
  141. fn, start, nearley_lib = sys.argv[1:]
  142. print(main(fn, start, nearley_lib))