This repo contains code to mirror other repos. It also contains the code that is getting mirrored.
Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

125 linhas
3.5 KiB

  1. "Converts between Lark and Nearley grammars. Work in progress!"
  2. from lark import Lark, InlineTransformer
  3. nearley_grammar = r"""
  4. start: (ruledef|directive)+
  5. directive: "@" NAME STRING
  6. ruledef: NAME "->" expansions
  7. expansions: expansion ("|" expansion)*
  8. expansion: (rule|string|regexp)+ _JS?
  9. rule: NAME
  10. string: STRING
  11. regexp: REGEXP
  12. _JS: /(?s){%.*?%}/
  13. NAME: /[a-zA-Z_]\w*/
  14. WS.ignore: /[\t \f\n]+/
  15. COMMENT.ignore: /\#[^\n]*/
  16. REGEXP: /\[.*?\]/
  17. STRING: /".*?"/
  18. """
  19. class NearleyToLark(InlineTransformer):
  20. def rule(self, name):
  21. return {'_': '_WS?', '__':'_WS'}.get(name, name)
  22. def ruledef(self, name, exps):
  23. return '%s: %s' % (name, exps)
  24. def regexp(self, r):
  25. return '/%s/' % r
  26. def string(self, s):
  27. # TODO allow regular strings, and split them in the parser frontend
  28. return ' '.join('"%s"'%ch for ch in s[1:-1])
  29. def expansion(self, *x):
  30. return ' '.join(x)
  31. def expansions(self, *x):
  32. return '\n |'.join(x)
  33. def directive(self, name, *args):
  34. if name == 'builtin':
  35. arg = args[0][1:-1]
  36. if arg == '':
  37. return r'_WS: /[ \t\n\v\f]/'
  38. elif arg == '':
  39. return ('unsigned_int: DIGIT+\n'
  40. 'DIGIT: /\d/\n'
  41. 'decimal: "-"? DIGIT+ [/\./ DIGIT+] \n'
  42. 'percentage: decimal "%"\n'
  43. )
  44. # TODO
  45. elif arg == '':
  46. pass
  47. else:
  48. assert False, arg
  49. else:
  50. assert False
  51. pass
  52. def start(self, *rules):
  53. return '\n'.join(filter(None, rules))
  54. def nearley_to_lark(g):
  55. parser = Lark(nearley_grammar)
  56. tree = parser.parse(g)
  57. return NearleyToLark().transform(tree)
  58. def test():
  59. css_example_grammar = """
  60. #
  61. @builtin ""
  62. @builtin ""
  63. @builtin ""
  64. csscolor -> "#" hexdigit hexdigit hexdigit hexdigit hexdigit hexdigit {%
  65. function(d) {
  66. return {
  67. "r": parseInt(d[1]+d[2], 16),
  68. "g": parseInt(d[3]+d[4], 16),
  69. "b": parseInt(d[5]+d[6], 16),
  70. }
  71. }
  72. %}
  73. | "#" hexdigit hexdigit hexdigit {%
  74. function(d) {
  75. return {
  76. "r": parseInt(d[1]+d[1], 16),
  77. "g": parseInt(d[2]+d[2], 16),
  78. "b": parseInt(d[3]+d[3], 16),
  79. }
  80. }
  81. %}
  82. | "rgb" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"r": 4, "g": 8, "b": 12}) %}
  83. | "hsl" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ ")" {% $({"h": 4, "s": 8, "l": 12}) %}
  84. | "rgba" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"r": 4, "g": 8, "b": 12, "a": 16}) %}
  85. | "hsla" _ "(" _ colnum _ "," _ colnum _ "," _ colnum _ "," _ decimal _ ")" {% $({"h": 4, "s": 8, "l": 12, "a": 16}) %}
  86. hexdigit -> [a-fA-F0-9]
  87. colnum -> unsigned_int {% id %} | percentage {%
  88. function(d) {return Math.floor(d[0]*255); }
  89. %}
  90. """
  91. converted_grammar = nearley_to_lark(css_example_grammar)
  92. print(converted_grammar)
  93. l = Lark(converted_grammar, start='csscolor', parser='earley_nolex')
  94. print(l.parse('#a199ff').pretty())
  95. print(l.parse('rgb(255, 70%, 3)').pretty())
  96. if __name__ == '__main__':
  97. test()