This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

151 lines
4.2 KiB

  1. "Converts between Lark and Nearley grammars. Work in progress!"
  2. import os.path
  3. import sys
  4. from lark import Lark, InlineTransformer
  5. nearley_grammar = r"""
  6. start: (ruledef|directive)+
  7. directive: "@" NAME (STRING|NAME)
  8. | "@" _JS -> js_code
  9. ruledef: NAME "->" expansions
  10. | NAME REGEXP "->" expansions -> macro
  11. expansions: expansion ("|" expansion)*
  12. expansion: expr+ _JS?
  13. ?expr: item [":" /[+*?]/]
  14. ?item: rule|string|regexp
  15. | "(" expansions ")"
  16. rule: NAME
  17. string: STRING
  18. regexp: REGEXP
  19. _JS: /(?s){%.*?%}/
  20. NAME: /[a-zA-Z_$]\w*/
  21. WS.ignore: /[\t \f\n]+/
  22. COMMENT.ignore: /\#[^\n]*/
  23. REGEXP: /\[.*?\]/
  24. STRING: /".*?"/
  25. """
  26. class NearleyToLark(InlineTransformer):
  27. def __init__(self, builtin_path):
  28. self.builtin_path = builtin_path
  29. def rule(self, name):
  30. # return {'_': '_WS?', '__':'_WS'}.get(name, name)
  31. return {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
  32. def ruledef(self, name, exps):
  33. name = {'_': '_ws_maybe', '__':'_ws'}.get(name, name)
  34. return '%s: %s' % (name, exps)
  35. def expr(self, item, op):
  36. return '(%s)%s' % (item, op)
  37. def regexp(self, r):
  38. return '/%s/' % r
  39. def string(self, s):
  40. # TODO allow regular strings, and split them in the parser frontend
  41. return ' '.join('"%s"'%ch for ch in s[1:-1])
  42. def expansion(self, *x):
  43. return ' '.join(x)
  44. def expansions(self, *x):
  45. return '(%s)' % ('\n |'.join(x))
  46. def js_code(self):
  47. return ''
  48. def macro(self, *args):
  49. return '' # TODO support macros?!
  50. def directive(self, name, *args):
  51. if name == 'builtin':
  52. arg = args[0][1:-1]
  53. with open(os.path.join(self.builtin_path, arg)) as f:
  54. text = f.read()
  55. return nearley_to_lark(text, self.builtin_path)
  56. elif name == 'preprocessor':
  57. return ''
  58. raise Exception('Unknown directive: %s' % name)
  59. def start(self, *rules):
  60. return '\n'.join(filter(None, rules))
  61. def nearley_to_lark(g, builtin_path):
  62. parser = Lark(nearley_grammar)
  63. tree = parser.parse(g)
  64. return NearleyToLark(builtin_path).transform(tree)
  65. def test():
  66. css_example_grammar = """
  67. # http://www.w3.org/TR/css3-color/#colorunits
  68. @builtin "whitespace.ne"
  69. @builtin "number.ne"
  70. @builtin "postprocessors.ne"
  71. csscolor -> "#" hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit {%
  72. function(d) {
  73. return {
  74. "r": parseInt(d[1]+d[2], 16),
  75. "g": parseInt(d[3]+d[4], 16),
  76. "b": parseInt(d[5]+d[6], 16),
  77. }
  78. }
  79. %}
  80. | "#" hexdigit hexdigit hexdigit {%
  81. function(d) {
  82. return {
  83. "r": parseInt(d[1]+d[1], 16),
  84. "g": parseInt(d[2]+d[2], 16),
  85. "b": parseInt(d[3]+d[3], 16),
  86. }
  87. }
  88. %}
  89. | "rgb" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"r": 4, "g": 8, "b": 12}) %}
  90. | "hsl" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"h": 4, "s": 8, "l": 12}) %}
  91. | "rgba" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"r": 4, "g": 8, "b": 12, "a": 16}) %}
  92. | "hsla" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"h": 4, "s": 8, "l": 12, "a": 16}) %}
  93. hexdigit -> [a-fA-F0-9]
  94. colnum -> unsigned_int {% id %} | percentage {%
  95. function(d) {return Math.floor(d[0]*255); }
  96. %}
  97. """
  98. converted_grammar = nearley_to_lark(css_example_grammar, '/home/erez/nearley/builtin')
  99. print(converted_grammar)
  100. l = Lark(converted_grammar, start='csscolor', parser='earley_nolex')
  101. print(l.parse('#a199ff').pretty())
  102. print(l.parse('rgb(255, 70%, 3)').pretty())
  103. def main():
  104. try:
  105. nearley_lib = sys.argv[1]
  106. except IndexError:
  107. print("Reads Nearley grammar from stdin and outputs a lark grammar.")
  108. print("Usage: %s <nearley_lib_path>" % sys.argv[0])
  109. return
  110. grammar = sys.stdin.read()
  111. print(nearley_to_lark(grammar, os.path.join(nearley_lib, 'builtin')))
  112. if __name__ == '__main__':
  113. main()