This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

199 line
6.0 KiB

  1. "Converts Nearley grammars to Lark"
  2. import os.path
  3. import sys
  4. import codecs
  5. import argparse
  6. from lark import Lark, InlineTransformer
  7. nearley_grammar = r"""
  8. start: (ruledef|directive)+
  9. directive: "@" NAME (STRING|NAME)
  10. | "@" JS -> js_code
  11. ruledef: NAME "->" expansions
  12. | NAME REGEXP "->" expansions -> macro
  13. expansions: expansion ("|" expansion)*
  14. expansion: expr+ js
  15. ?expr: item (":" /[+*?]/)?
  16. ?item: rule|string|regexp|null
  17. | "(" expansions ")"
  18. rule: NAME
  19. string: STRING
  20. regexp: REGEXP
  21. null: "null"
  22. JS: /{%.*?%}/s
  23. js: JS?
  24. NAME: /[a-zA-Z_$]\w*/
  25. COMMENT: /#[^\n]*/
  26. REGEXP: /\[.*?\]/
  27. STRING: _STRING "i"?
  28. %import common.ESCAPED_STRING -> _STRING
  29. %import common.WS
  30. %ignore WS
  31. %ignore COMMENT
  32. """
  33. nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard')
  34. def _get_rulename(name):
  35. name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
  36. return 'n_' + name.replace('$', '__DOLLAR__').lower()
  37. class NearleyToLark(InlineTransformer):
  38. def __init__(self):
  39. self._count = 0
  40. self.extra_rules = {}
  41. self.extra_rules_rev = {}
  42. self.alias_js_code = {}
  43. def _new_function(self, code):
  44. name = 'alias_%d' % self._count
  45. self._count += 1
  46. self.alias_js_code[name] = code
  47. return name
  48. def _extra_rule(self, rule):
  49. if rule in self.extra_rules_rev:
  50. return self.extra_rules_rev[rule]
  51. name = 'xrule_%d' % len(self.extra_rules)
  52. assert name not in self.extra_rules
  53. self.extra_rules[name] = rule
  54. self.extra_rules_rev[rule] = name
  55. return name
  56. def rule(self, name):
  57. return _get_rulename(name)
  58. def ruledef(self, name, exps):
  59. return '!%s: %s' % (_get_rulename(name), exps)
  60. def expr(self, item, op):
  61. rule = '(%s)%s' % (item, op)
  62. return self._extra_rule(rule)
  63. def regexp(self, r):
  64. return '/%s/' % r
  65. def null(self):
  66. return ''
  67. def string(self, s):
  68. return self._extra_rule(s)
  69. def expansion(self, *x):
  70. x, js = x[:-1], x[-1]
  71. if js.children:
  72. js_code ,= js.children
  73. js_code = js_code[2:-2]
  74. alias = '-> ' + self._new_function(js_code)
  75. else:
  76. alias = ''
  77. return ' '.join(x) + alias
  78. def expansions(self, *x):
  79. return '%s' % ('\n |'.join(x))
  80. def start(self, *rules):
  81. return '\n'.join(filter(None, rules))
  82. def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes):
  83. rule_defs = []
  84. tree = nearley_grammar_parser.parse(g)
  85. for statement in tree.children:
  86. if statement.data == 'directive':
  87. directive, arg = statement.children
  88. if directive in ('builtin', 'include'):
  89. folder = builtin_path if directive == 'builtin' else folder_path
  90. path = os.path.join(folder, arg[1:-1])
  91. if path not in includes:
  92. includes.add(path)
  93. with codecs.open(path, encoding='utf8') as f:
  94. text = f.read()
  95. rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes)
  96. else:
  97. assert False, directive
  98. elif statement.data == 'js_code':
  99. code ,= statement.children
  100. code = code[2:-2]
  101. js_code.append(code)
  102. elif statement.data == 'macro':
  103. pass # TODO Add support for macros!
  104. elif statement.data == 'ruledef':
  105. rule_defs.append( n2l.transform(statement) )
  106. else:
  107. raise Exception("Unknown statement: %s" % statement)
  108. return rule_defs
  109. def create_code_for_nearley_grammar(g, start, builtin_path, folder_path, es6=False):
  110. import js2py
  111. emit_code = []
  112. def emit(x=None):
  113. if x:
  114. emit_code.append(x)
  115. emit_code.append('\n')
  116. js_code = ['function id(x) {return x[0];}']
  117. n2l = NearleyToLark()
  118. rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set())
  119. lark_g = '\n'.join(rule_defs)
  120. lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
  121. emit('from lark import Lark, Transformer')
  122. emit()
  123. emit('grammar = ' + repr(lark_g))
  124. emit()
  125. for alias, code in n2l.alias_js_code.items():
  126. js_code.append('%s = (%s);' % (alias, code))
  127. if es6:
  128. emit(js2py.translate_js6('\n'.join(js_code)))
  129. else:
  130. emit(js2py.translate_js('\n'.join(js_code)))
  131. emit('class TransformNearley(Transformer):')
  132. for alias in n2l.alias_js_code:
  133. emit(" %s = var.get('%s').to_python()" % (alias, alias))
  134. emit(" __default__ = lambda self, n, c, m: c if c else None")
  135. emit()
  136. emit('parser = Lark(grammar, start="n_%s", maybe_placeholders=False)' % start)
  137. emit('def parse(text):')
  138. emit(' return TransformNearley().transform(parser.parse(text))')
  139. return ''.join(emit_code)
  140. def main(fn, start, nearley_lib, es6=False):
  141. with codecs.open(fn, encoding='utf8') as f:
  142. grammar = f.read()
  143. return create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn)), es6=es6)
  144. def get_arg_parser():
  145. parser = argparse.ArgumentParser(description='Reads a Nearley grammar (with js functions), and outputs an equivalent lark parser.')
  146. parser.add_argument('nearley_grammar', help='Path to the file containing the nearley grammar')
  147. parser.add_argument('start_rule', help='Rule within the nearley grammar to make the base rule')
  148. parser.add_argument('nearley_lib', help='Path to root directory of nearley codebase (used for including builtins)')
  149. parser.add_argument('--es6', help='Enable experimental ES6 support', action='store_true')
  150. return parser
  151. if __name__ == '__main__':
  152. parser = get_arg_parser()
  153. args = parser.parse_args()
  154. print(main(fn=args.nearley_grammar, start=args.start_rule, nearley_lib=args.nearley_lib, es6=args.es6))