This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

125 lines
3.5 KiB

  1. "Converts between Lark and Nearley grammars. Work in progress!"
  2. from lark import Lark, InlineTransformer
  3. nearley_grammar = r"""
  4. start: (ruledef|directive)+
  5. directive: "@" NAME STRING
  6. ruledef: NAME "->" expansions
  7. expansions: expansion ("|" expansion)*
  8. expansion: (rule|string|regexp)+ _JS?
  9. rule: NAME
  10. string: STRING
  11. regexp: REGEXP
  12. _JS: /(?s){%.*?%}/
  13. NAME: /[a-zA-Z_]\w*/
  14. WS.ignore: /[\t \f\n]+/
  15. COMMENT.ignore: /\#[^\n]*/
  16. REGEXP: /\[.*?\]/
  17. STRING: /".*?"/
  18. """
  19. class NearleyToLark(InlineTransformer):
  20. def rule(self, name):
  21. return {'_': '_WS?', '__':'_WS'}.get(name, name)
  22. def ruledef(self, name, exps):
  23. return '%s: %s' % (name, exps)
  24. def regexp(self, r):
  25. return '/%s/' % r
  26. def string(self, s):
  27. # TODO allow regular strings, and split them in the parser frontend
  28. return ' '.join('"%s"'%ch for ch in s[1:-1])
  29. def expansion(self, *x):
  30. return ' '.join(x)
  31. def expansions(self, *x):
  32. return '\n |'.join(x)
  33. def directive(self, name, *args):
  34. if name == 'builtin':
  35. arg = args[0][1:-1]
  36. if arg == 'whitespace.ne':
  37. return r'_WS: /[ \t\n\v\f]/'
  38. elif arg == 'number.ne':
  39. return ('unsigned_int: DIGIT+\n'
  40. 'DIGIT: /\d/\n'
  41. 'decimal: "-"? DIGIT+ [/\./ DIGIT+] \n'
  42. 'percentage: decimal "%"\n'
  43. )
  44. # TODO
  45. elif arg == 'postprocessors.ne':
  46. pass
  47. else:
  48. assert False, arg
  49. else:
  50. assert False
  51. pass
  52. def start(self, *rules):
  53. return '\n'.join(filter(None, rules))
  54. def nearley_to_lark(g):
  55. parser = Lark(nearley_grammar)
  56. tree = parser.parse(g)
  57. return NearleyToLark().transform(tree)
  58. def test():
  59. css_example_grammar = """
  60. # http://www.w3.org/TR/css3-color/#colorunits
  61. @builtin "whitespace.ne"
  62. @builtin "number.ne"
  63. @builtin "postprocessors.ne"
  64. csscolor -> "#" hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit {%
  65. function(d) {
  66. return {
  67. "r": parseInt(d[1]+d[2], 16),
  68. "g": parseInt(d[3]+d[4], 16),
  69. "b": parseInt(d[5]+d[6], 16),
  70. }
  71. }
  72. %}
  73. | "#" hexdigit hexdigit hexdigit {%
  74. function(d) {
  75. return {
  76. "r": parseInt(d[1]+d[1], 16),
  77. "g": parseInt(d[2]+d[2], 16),
  78. "b": parseInt(d[3]+d[3], 16),
  79. }
  80. }
  81. %}
  82. | "rgb" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"r": 4, "g": 8, "b": 12}) %}
  83. | "hsl" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"h": 4, "s": 8, "l": 12}) %}
  84. | "rgba" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"r": 4, "g": 8, "b": 12, "a": 16}) %}
  85. | "hsla" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"h": 4, "s": 8, "l": 12, "a": 16}) %}
  86. hexdigit -> [a-fA-F0-9]
  87. colnum -> unsigned_int {% id %} | percentage {%
  88. function(d) {return Math.floor(d[0]*255); }
  89. %}
  90. """
  91. converted_grammar = nearley_to_lark(css_example_grammar)
  92. print(converted_grammar)
  93. l = Lark(converted_grammar, start='csscolor', parser='earley_nolex')
  94. print(l.parse('#a199ff').pretty())
  95. print(l.parse('rgb(255, 70%, 3)').pretty())
  96. if __name__ == '__main__':
  97. test()