The blog.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

243 lines
6.0 KiB

  1. from jinja2 import contextfilter, contextfunction, environmentfilter
  2. from jinja2.ext import Extension
  3. import functools
  4. import json
  5. import pathlib
  6. import re
  7. import unittest
  8. import hyde.model
  9. from io import StringIO
  10. from lxml import etree
  11. #from commando.util import getLoggerWithNullHandler
  12. #from logging import DEBUG
  13. #logger = getLoggerWithNullHandler('hyde.server')
  14. #logger.setLevel(DEBUG)
  15. def tbwrapper(f):
  16. @functools.wraps(f)
  17. def wrapper(*args, **kwargs):
  18. try:
  19. return f(*args, **kwargs)
  20. except:
  21. import traceback
  22. traceback.print_exc()
  23. raise
  24. return wrapper
  25. @contextfunction
  26. @tbwrapper
  27. def do_get_tweet_thread(context, start, stop):
  28. #print('gtt:', repr(start), repr(stop))
  29. nodes = []
  30. nextnode = stop
  31. while True:
  32. #print('proc:', nextnode)
  33. curnode = context['site'].content.node_from_relative_path(
  34. 'twitter').get_resource('%d.yaml' % nextnode)
  35. #print('cn:', repr(curnode))
  36. nodes.append(curnode)
  37. if nextnode == start:
  38. break
  39. nextnode = curnode.meta.in_reply_to_status_id
  40. return iter(reversed(nodes))
  41. @contextfilter
  42. def do_attr_escape(context, value):
  43. #print('ae:', repr(value))
  44. # jinja2 requires <, >, and new line to be escaped, which isn't
  45. # required per html spec:
  46. # https://html.spec.whatwg.org/multipage/syntax.html#attributes-2
  47. return value.replace('&', '&amp;').replace('"', '&quot;').replace('<', '&lt;').replace('>', '&gt;').replace('\n', ' ')
  48. @contextfilter
  49. def make_path(context, value):
  50. #print('mp:', repr(value))
  51. return pathlib.PurePosixPath(value)
  52. @contextfilter
  53. def do_gettweettext(context, value):
  54. if isinstance(value, str):
  55. raise TypeError('got string expected tweet object')
  56. #print('rd:', type(value), repr(value), type(value.entities))
  57. if hasattr(value, 'extended_entities'):
  58. mediaindices = [ x.indices for x in value.extended_entities.media ]
  59. else:
  60. mediaindices = [ ]
  61. mentionsindices = [ x.indices for x in value.entities.user_mentions ]
  62. delindices = mediaindices + mentionsindices
  63. text = value.full_text
  64. for start, stop in reversed(sorted(delindices)):
  65. text = text[:start] + text[stop:]
  66. text = text.strip()
  67. return ''.join(('<p>%s</p>' % x for x in text.split('\n') if x))
  68. class TweetExtension(Extension):
  69. """
  70. A wrapper around the rellinktoabs filter for syntactic sugar.
  71. """
  72. tags = { 'gettweettext' }
  73. def __init__(self, env):
  74. super(TweetExtension, self).__init__(env)
  75. env.filters['gettweettext'] = do_gettweettext
  76. env.filters['make_path'] = make_path
  77. env.filters['attr_escape'] = do_attr_escape
  78. env.globals['get_tweet_thread'] = do_get_tweet_thread
  79. def parse(self, parser):
  80. """
  81. Parses the statements and defers to the callback
  82. for rellinktoabs processing.
  83. """
  84. lineno = next(parser.stream).lineno
  85. body = parser.parse_statements(['name:endcleantweet'], drop_needle=True)
  86. return nodes.CallBlock(
  87. self.call_method('_render_cleantweet'),
  88. [], [], body).set_lineno(lineno)
  89. def _render_cleantweet(self, caller=None):
  90. """
  91. Calls the cleantweet filter to transform the output.
  92. """
  93. if not caller:
  94. return ''
  95. output = caller().strip()
  96. return do_gettweettext(self.environment, output)
  97. @contextfilter
  98. def rellinktoabs(context, value):
  99. env = context.environment
  100. # get the path for this context
  101. rel_path = context['resource'].relative_path
  102. content_url = env.globals['content_url'](context, rel_path)
  103. # Note that this could be parsed w/ fragment_fromstring
  104. # (https://lxml.de/3.1/api/private/lxml.html-module.html#fragment_fromstring)
  105. # But is would require using create_parent, and stripping that
  106. # instead, or fragments_fromstring, but iterating over those
  107. # to strings. Not one is a solution to the problem.
  108. html = etree.HTML(value)
  109. # get all the fragment urls
  110. r = html.xpath("//a[@href[starts-with(.,'#')]]")
  111. for i in r:
  112. # prefix them w/ the content_url
  113. i.attrib['href'] = content_url + i.attrib['href']
  114. res = etree.tostring(html, encoding='unicode', method='html')
  115. # lxml.HTML wraps the html w/ html/body tags, strip them
  116. # if present
  117. startstr = '<html><body>'
  118. endstr = '</body></html>'
  119. startpos = 0
  120. endpos = None
  121. if res.startswith(startstr):
  122. startpos = len(startstr)
  123. if res.endswith(endstr):
  124. endpos = -len(endstr)
  125. res = res[startpos:endpos]
  126. return res
  127. # mostly copied from hyde.ext.templates.jinja.py Markdown
  128. # and using docs from:
  129. # https://jinja.palletsprojects.com/en/2.10.x/extensions/#example-extension
  130. # to get the filter installed
  131. class RelLinktoAbs(Extension):
  132. """
  133. A wrapper around the rellinktoabs filter for syntactic sugar.
  134. """
  135. tags = { 'rellinktoabs' }
  136. def __init__(self, env):
  137. super(RelLinktoAbs, self).__init__(env)
  138. env.filters['rellinktoabs'] = rellinktoabs
  139. def parse(self, parser):
  140. """
  141. Parses the statements and defers to the callback
  142. for rellinktoabs processing.
  143. """
  144. lineno = next(parser.stream).lineno
  145. body = parser.parse_statements(['name:endrellinktoabs'], drop_needle=True)
  146. return nodes.CallBlock(
  147. self.call_method('_render_rellinktoabs'),
  148. [], [], body).set_lineno(lineno)
  149. def _render_rellinktoabs(self, caller=None):
  150. """
  151. Calls the rellinktoabs filter to transform the output.
  152. """
  153. if not caller:
  154. return ''
  155. output = caller().strip()
  156. return rellinktoabs(self.environment, output)
  157. class Tests(unittest.TestCase):
  158. _fnameparser = re.compile('funcs_(.*)_[^_]+\\.json')
  159. def test_fixtures(self):
  160. for i in sorted(pathlib.Path('fixtures').glob(
  161. 'funcs_*_*.json')):
  162. with self.subTest(file=i):
  163. mat = self._fnameparser.match(i.name)
  164. self.assertIsNotNone(mat)
  165. with i.open() as fp:
  166. obj = json.load(fp)
  167. if obj['do_expando']:
  168. obj['args'][1] = hyde.model.Expando(
  169. obj['args'][1])
  170. fun = globals()[mat.group(1)]
  171. self.assertEqual(fun(*obj['args']), obj['res'])
  172. def test_errors(self):
  173. # make sure we get a more useful error message
  174. with self.assertRaises(TypeError):
  175. do_gettweettext(None, 'foo')
  176. def test_basepath(self):
  177. v = 'https://foo/bar/baz'
  178. self.assertEqual(make_path(None, v), pathlib.PurePosixPath(v))
  179. def test_attr_escape(self):
  180. self.assertEqual(do_attr_escape(None, 'some & < > \n\n ""'), 'some &amp; &lt; &gt; &quot;&quot;')