This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

197 lines
6.0 KiB

  1. "Converts Nearley grammars to Lark"
  2. import os.path
  3. import sys
  4. import codecs
  5. import argparse
  6. from lark import Lark, InlineTransformer
  7. nearley_grammar = r"""
  8. start: (ruledef|directive)+
  9. directive: "@" NAME (STRING|NAME)
  10. | "@" JS -> js_code
  11. ruledef: NAME "->" expansions
  12. | NAME REGEXP "->" expansions -> macro
  13. expansions: expansion ("|" expansion)*
  14. expansion: expr+ js
  15. ?expr: item (":" /[+*?]/)?
  16. ?item: rule|string|regexp|null
  17. | "(" expansions ")"
  18. rule: NAME
  19. string: STRING
  20. regexp: REGEXP
  21. null: "null"
  22. JS: /{%.*?%}/s
  23. js: JS?
  24. NAME: /[a-zA-Z_$]\w*/
  25. COMMENT: /#[^\n]*/
  26. REGEXP: /\[.*?\]/
  27. %import common.ESCAPED_STRING -> STRING
  28. %import common.WS
  29. %ignore WS
  30. %ignore COMMENT
  31. """
  32. nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard')
  33. def _get_rulename(name):
  34. name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
  35. return 'n_' + name.replace('$', '__DOLLAR__').lower()
  36. class NearleyToLark(InlineTransformer):
  37. def __init__(self):
  38. self._count = 0
  39. self.extra_rules = {}
  40. self.extra_rules_rev = {}
  41. self.alias_js_code = {}
  42. def _new_function(self, code):
  43. name = 'alias_%d' % self._count
  44. self._count += 1
  45. self.alias_js_code[name] = code
  46. return name
  47. def _extra_rule(self, rule):
  48. if rule in self.extra_rules_rev:
  49. return self.extra_rules_rev[rule]
  50. name = 'xrule_%d' % len(self.extra_rules)
  51. assert name not in self.extra_rules
  52. self.extra_rules[name] = rule
  53. self.extra_rules_rev[rule] = name
  54. return name
  55. def rule(self, name):
  56. return _get_rulename(name)
  57. def ruledef(self, name, exps):
  58. return '!%s: %s' % (_get_rulename(name), exps)
  59. def expr(self, item, op):
  60. rule = '(%s)%s' % (item, op)
  61. return self._extra_rule(rule)
  62. def regexp(self, r):
  63. return '/%s/' % r
  64. def null(self):
  65. return ''
  66. def string(self, s):
  67. return self._extra_rule(s)
  68. def expansion(self, *x):
  69. x, js = x[:-1], x[-1]
  70. if js.children:
  71. js_code ,= js.children
  72. js_code = js_code[2:-2]
  73. alias = '-> ' + self._new_function(js_code)
  74. else:
  75. alias = ''
  76. return ' '.join(x) + alias
  77. def expansions(self, *x):
  78. return '%s' % ('\n |'.join(x))
  79. def start(self, *rules):
  80. return '\n'.join(filter(None, rules))
  81. def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes):
  82. rule_defs = []
  83. tree = nearley_grammar_parser.parse(g)
  84. for statement in tree.children:
  85. if statement.data == 'directive':
  86. directive, arg = statement.children
  87. if directive in ('builtin', 'include'):
  88. folder = builtin_path if directive == 'builtin' else folder_path
  89. path = os.path.join(folder, arg[1:-1])
  90. if path not in includes:
  91. includes.add(path)
  92. with codecs.open(path, encoding='utf8') as f:
  93. text = f.read()
  94. rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes)
  95. else:
  96. assert False, directive
  97. elif statement.data == 'js_code':
  98. code ,= statement.children
  99. code = code[2:-2]
  100. js_code.append(code)
  101. elif statement.data == 'macro':
  102. pass # TODO Add support for macros!
  103. elif statement.data == 'ruledef':
  104. rule_defs.append( n2l.transform(statement) )
  105. else:
  106. raise Exception("Unknown statement: %s" % statement)
  107. return rule_defs
  108. def create_code_for_nearley_grammar(g, start, builtin_path, folder_path, es6=False):
  109. import js2py
  110. emit_code = []
  111. def emit(x=None):
  112. if x:
  113. emit_code.append(x)
  114. emit_code.append('\n')
  115. js_code = ['function id(x) {return x[0];}']
  116. n2l = NearleyToLark()
  117. rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set())
  118. lark_g = '\n'.join(rule_defs)
  119. lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
  120. emit('from lark import Lark, Transformer')
  121. emit()
  122. emit('grammar = ' + repr(lark_g))
  123. emit()
  124. for alias, code in n2l.alias_js_code.items():
  125. js_code.append('%s = (%s);' % (alias, code))
  126. if es6:
  127. emit(js2py.translate_js6('\n'.join(js_code)))
  128. else:
  129. emit(js2py.translate_js('\n'.join(js_code)))
  130. emit('class TransformNearley(Transformer):')
  131. for alias in n2l.alias_js_code:
  132. emit(" %s = var.get('%s').to_python()" % (alias, alias))
  133. emit(" __default__ = lambda self, n, c, m: c if c else None")
  134. emit()
  135. emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start)
  136. emit('def parse(text):')
  137. emit(' return TransformNearley().transform(parser.parse(text))')
  138. return ''.join(emit_code)
  139. def main(fn, start, nearley_lib, es6=False):
  140. with codecs.open(fn, encoding='utf8') as f:
  141. grammar = f.read()
  142. return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)), es6=es6)
  143. def get_arg_parser():
  144. parser = argparse.ArgumentParser('Reads Nearley grammar (with js functions) outputs an equivalent lark parser.')
  145. parser.add_argument('nearley_grammar', help='Path to the file containing the nearley grammar')
  146. parser.add_argument('start_rule', help='Rule within the nearley grammar to make the base rule')
  147. parser.add_argument('nearley_lib', help='Path to root directory of nearley codebase (used for including builtins)')
  148. parser.add_argument('--es6', help='Enable experimental ES6 support', action='store_true')
  149. return parser
  150. if __name__ == '__main__':
  151. parser = get_arg_parser()
  152. args = parser.parse_args()
  153. print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6))