This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Nevar pievienot vairāk kā 25 tēmas Tēmai ir jāsākas ar burtu vai ciparu, tā var saturēt domu zīmes ('-') un var būt līdz 35 simboliem gara.

187 rindas
5.4 KiB

  1. "Converts between Lark and Nearley grammars. Work in progress!"
  2. import os.path
  3. import sys
  4. import codecs
  5. from lark import Lark, InlineTransformer, Transformer
  6. nearley_grammar = r"""
  7. start: (ruledef|directive)+
  8. directive: "@" NAME (STRING|NAME)
  9. | "@" JS -> js_code
  10. ruledef: NAME "->" expansions
  11. | NAME REGEXP "->" expansions -> macro
  12. expansions: expansion ("|" expansion)*
  13. expansion: expr+ js
  14. ?expr: item [":" /[+*?]/]
  15. ?item: rule|string|regexp
  16. | "(" expansions ")"
  17. rule: NAME
  18. string: STRING
  19. regexp: REGEXP
  20. JS: /{%.*?%}/s
  21. js: JS?
  22. NAME: /[a-zA-Z_$]\w*/
  23. COMMENT: /#[^\n]*/
  24. REGEXP: /\[.*?\]/
  25. STRING: /".*?"/
  26. %import common.WS
  27. %ignore WS
  28. %ignore COMMENT
  29. """
  30. nearley_grammar_parser = Lark(nearley_grammar, parser='earley', lexer='standard')
  31. def _get_rulename(name):
  32. name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
  33. return 'n_' + name.replace('$', '__DOLLAR__').lower()
  34. class NearleyToLark(InlineTransformer):
  35. def __init__(self):
  36. self._count = 0
  37. self.extra_rules = {}
  38. self.extra_rules_rev = {}
  39. self.alias_js_code = {}
  40. def _new_function(self, code):
  41. name = 'alias_%d' % self._count
  42. self._count += 1
  43. self.alias_js_code[name] = code
  44. return name
  45. def _extra_rule(self, rule):
  46. if rule in self.extra_rules_rev:
  47. return self.extra_rules_rev[rule]
  48. name = 'xrule_%d' % len(self.extra_rules)
  49. assert name not in self.extra_rules
  50. self.extra_rules[name] = rule
  51. self.extra_rules_rev[rule] = name
  52. return name
  53. def rule(self, name):
  54. return _get_rulename(name)
  55. def ruledef(self, name, exps):
  56. return '!%s: %s' % (_get_rulename(name), exps)
  57. def expr(self, item, op):
  58. rule = '(%s)%s' % (item, op)
  59. return self._extra_rule(rule)
  60. def regexp(self, r):
  61. return '/%s/' % r
  62. def string(self, s):
  63. return self._extra_rule(s)
  64. def expansion(self, *x):
  65. x, js = x[:-1], x[-1]
  66. if js.children:
  67. js_code ,= js.children
  68. js_code = js_code[2:-2]
  69. alias = '-> ' + self._new_function(js_code)
  70. else:
  71. alias = ''
  72. return ' '.join(x) + alias
  73. def expansions(self, *x):
  74. return '%s' % ('\n |'.join(x))
  75. def start(self, *rules):
  76. return '\n'.join(filter(None, rules))
  77. def _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, includes):
  78. rule_defs = []
  79. tree = nearley_grammar_parser.parse(g)
  80. for statement in tree.children:
  81. if statement.data == 'directive':
  82. directive, arg = statement.children
  83. if directive in ('builtin', 'include'):
  84. folder = builtin_path if directive == 'builtin' else folder_path
  85. path = os.path.join(folder, arg[1:-1])
  86. if path not in includes:
  87. includes.add(path)
  88. with codecs.open(path, encoding='utf8') as f:
  89. text = f.read()
  90. rule_defs += _nearley_to_lark(text, builtin_path, n2l, js_code, os.path.abspath(os.path.dirname(path)), includes)
  91. else:
  92. assert False, directive
  93. elif statement.data == 'js_code':
  94. code ,= statement.children
  95. code = code[2:-2]
  96. js_code.append(code)
  97. elif statement.data == 'macro':
  98. pass # TODO Add support for macros!
  99. elif statement.data == 'ruledef':
  100. rule_defs.append( n2l.transform(statement) )
  101. else:
  102. raise Exception("Unknown statement: %s" % statement)
  103. return rule_defs
  104. def create_code_for_nearley_grammar(g, start, builtin_path, folder_path):
  105. import js2py
  106. emit_code = []
  107. def emit(x=None):
  108. if x:
  109. emit_code.append(x)
  110. emit_code.append('\n')
  111. js_code = ['function id(x) {return x[0];}']
  112. n2l = NearleyToLark()
  113. rule_defs = _nearley_to_lark(g, builtin_path, n2l, js_code, folder_path, set())
  114. lark_g = '\n'.join(rule_defs)
  115. lark_g += '\n'+'\n'.join('!%s: %s' % item for item in n2l.extra_rules.items())
  116. emit('from lark import Lark, Transformer')
  117. emit()
  118. emit('grammar = ' + repr(lark_g))
  119. emit()
  120. for alias, code in n2l.alias_js_code.items():
  121. js_code.append('%s = (%s);' % (alias, code))
  122. emit(js2py.translate_js('\n'.join(js_code)))
  123. emit('class TransformNearley(Transformer):')
  124. for alias in n2l.alias_js_code:
  125. emit(" %s = var.get('%s').to_python()" % (alias, alias))
  126. emit(" __default__ = lambda self, n, c: c if c else None")
  127. emit()
  128. emit('parser = Lark(grammar, start="n_%s")' % start)
  129. emit('def parse(text):')
  130. emit(' return TransformNearley().transform(parser.parse(text))')
  131. return ''.join(emit_code)
  132. def main(fn, start, nearley_lib):
  133. with codecs.open(fn, encoding='utf8') as f:
  134. grammar = f.read()
  135. return (create_code_for_nearley_grammar(grammar, start, os.path.join(nearley_lib, 'builtin'), os.path.abspath(os.path.dirname(fn))))
  136. if __name__ == '__main__':
  137. if len(sys.argv) < 4:
  138. print("Reads Nearley grammar (with js functions) outputs an equivalent lark parser.")
  139. print("Usage: %s <nearley_grammar_path> <start_rule> <nearley_lib_path>" % sys.argv[0])
  140. sys.exit(1)
  141. fn, start, nearley_lib = sys.argv[1:]
  142. print(main(fn, start, nearley_lib))