This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 

151 行
4.2 KiB

  1. "Converts between Lark and Nearley grammars. Work in progress!"
  2. import os.path
  3. import sys
  4. from lark import Lark, InlineTransformer
  5. nearley_grammar = r"""
  6. start: (ruledef|directive)+
  7. directive: "@" NAME (STRING|NAME)
  8. | "@" _JS -> js_code
  9. ruledef: NAME "->" expansions
  10. | NAME REGEXP "->" expansions -> macro
  11. expansions: expansion ("|" expansion)*
  12. expansion: expr+ _JS?
  13. ?expr: item [":" /[+*?]/]
  14. ?item: rule|string|regexp
  15. | "(" expansions ")"
  16. rule: NAME
  17. string: STRING
  18. regexp: REGEXP
  19. _JS: /(?s){%.*?%}/
  20. NAME: /[a-zA-Z_$]\w*/
  21. WS.ignore: /[\t \f\n]+/
  22. COMMENT.ignore: /\#[^\n]*/
  23. REGEXP: /\[.*?\]/
  24. STRING: /".*?"/
  25. """
  26. class NearleyToLark(InlineTransformer):
  27. def __init__(self, builtin_path):
  28. self.builtin_path = builtin_path
  29. def rule(self, name):
  30. # return {'_': '_WS?', '__':'_WS'}.get(name, name)
  31. return {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
  32. def ruledef(self, name, exps):
  33. name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
  34. return '%s: %s' % (name, exps)
  35. def expr(self, item, op):
  36. return '(%s)%s' % (item, op)
  37. def regexp(self, r):
  38. return '/%s/' % r
  39. def string(self, s):
  40. # TODO allow regular strings, and split them in the parser frontend
  41. return ' '.join('"%s"'%ch for ch in s[1:-1])
  42. def expansion(self, *x):
  43. return ' '.join(x)
  44. def expansions(self, *x):
  45. return '(%s)' % ('\n |'.join(x))
  46. def js_code(self):
  47. return ''
  48. def macro(self, *args):
  49. return '' # TODO support macros?!
  50. def directive(self, name, *args):
  51. if name == 'builtin':
  52. arg = args[0][1:-1]
  53. with open(os.path.join(self.builtin_path, arg)) as f:
  54. text = f.read()
  55. return nearley_to_lark(text, self.builtin_path)
  56. elif name == 'preprocessor':
  57. return ''
  58. raise Exception('Unknown directive: %s' % name)
  59. def start(self, *rules):
  60. return '\n'.join(filter(None, rules))
  61. def nearley_to_lark(g, builtin_path):
  62. parser = Lark(nearley_grammar)
  63. tree = parser.parse(g)
  64. return NearleyToLark(builtin_path).transform(tree)
  65. def test():
  66. css_example_grammar = """
  67. # http://www.w3.org/TR/css3-color/#colorunits
  68. @builtin "whitespace.ne"
  69. @builtin "number.ne"
  70. @builtin "postprocessors.ne"
  71. csscolor -> "#" hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit {%
  72. function(d) {
  73. return {
  74. "r": parseInt(d[1]+d[2], 16),
  75. "g": parseInt(d[3]+d[4], 16),
  76. "b": parseInt(d[5]+d[6], 16),
  77. }
  78. }
  79. %}
  80. | "#" hexdigit hexdigit hexdigit {%
  81. function(d) {
  82. return {
  83. "r": parseInt(d[1]+d[1], 16),
  84. "g": parseInt(d[2]+d[2], 16),
  85. "b": parseInt(d[3]+d[3], 16),
  86. }
  87. }
  88. %}
  89. | "rgb" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"r": 4, "g": 8, "b": 12}) %}
  90. | "hsl" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"h": 4, "s": 8, "l": 12}) %}
  91. | "rgba" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"r": 4, "g": 8, "b": 12, "a": 16}) %}
  92. | "hsla" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"h": 4, "s": 8, "l": 12, "a": 16}) %}
  93. hexdigit -> [a-fA-F0-9]
  94. colnum -> unsigned_int {% id %} | percentage {%
  95. function(d) {return Math.floor(d[0]*255); }
  96. %}
  97. """
  98. converted_grammar = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin')
  99. print(converted_grammar)
  100. l = Lark(converted_grammar, start='csscolor', parser='earley_nolex')
  101. print(l.parse('#a199ff').pretty())
  102. print(l.parse('rgb(255, 70%, 3)').pretty())
  103. def main():
  104. try:
  105. nearley_lib = sys.argv[1]
  106. except IndexError:
  107. print("Reads Nearley grammar from stdin and outputs a lark grammar.")
  108. print("Usage: %s <nearley_lib_path>" % sys.argv[0])
  109. return
  110. grammar = sys.stdin.read()
  111. print(nearley_to_lark(grammar, os.path.join(nearley_lib, 'builtin')))
  112. if __name__ == '__main__':
  113. main()