jmg
/
blog


			
				
					
						
						
							
							from jinja2 import contextfilter, contextfunction, environmentfilter
from jinja2.ext import Extension

import functools
import json
import pathlib
import re
import unittest

import hyde.model

from io import StringIO
from lxml import etree

#from commando.util import getLoggerWithNullHandler
#from logging import DEBUG

#logger = getLoggerWithNullHandler('hyde.server')
#logger.setLevel(DEBUG)

def tbwrapper(f):
	@functools.wraps(f)
	def wrapper(*args, **kwargs):
		try:
			return f(*args, **kwargs)
		except:
			import traceback
			traceback.print_exc()
			raise

	return wrapper

@contextfunction
@tbwrapper
def do_get_tweet_thread(context, start, stop):
	#print('gtt:', repr(start), repr(stop))

	nodes = []
	nextnode = stop

	while True:
		#print('proc:', nextnode)
		curnode = context['site'].content.node_from_relative_path(
		    'twitter').get_resource('%d.yaml' % nextnode)
		#print('cn:', repr(curnode))
		nodes.append(curnode)

		if nextnode == start:
			break

		nextnode = curnode.meta.in_reply_to_status_id

	return iter(reversed(nodes))

@contextfilter
def do_attr_escape(context, value):
	#print('ae:', repr(value))

	# jinja2 requires <, >, and new line to be escaped, which isn't
	# required per html spec:
	# https://html.spec.whatwg.org/multipage/syntax.html#attributes-2

	return value.replace('&', '&amp;').replace('"', '&quot;').replace('<', '&lt;').replace('>', '&gt;').replace('\n', ' ')

@contextfilter
def make_path(context, value):
	#print('mp:', repr(value))

	return pathlib.PurePosixPath(value)

@contextfilter
def do_gettweettext(context, value):
	if isinstance(value, str):
		raise TypeError('got string expected tweet object')

	#print('rd:', type(value), repr(value), type(value.entities))

	if hasattr(value, 'extended_entities'):
		mediaindices = [ x.indices for x in value.extended_entities.media ]
	else:
		mediaindices = [ ]

	mentionsindices = [ x.indices for x in value.entities.user_mentions ]

	delindices = mediaindices + mentionsindices

	text = value.full_text

	for start, stop in reversed(sorted(delindices)):
		text = text[:start] + text[stop:]

	text = text.strip()

	return ''.join(('<p>%s</p>' % x for x in text.split('\n') if x))

class TweetExtension(Extension):
	"""
	A wrapper around the rellinktoabs filter for syntactic sugar.
	"""
	tags = { 'gettweettext' }

	def __init__(self, env):
		super(TweetExtension, self).__init__(env)

		env.filters['gettweettext'] = do_gettweettext
		env.filters['make_path'] = make_path
		env.filters['attr_escape'] = do_attr_escape
		env.globals['get_tweet_thread'] = do_get_tweet_thread

	def parse(self, parser):
		"""
		Parses the statements and defers to the callback
		for rellinktoabs processing.
		"""

		lineno = next(parser.stream).lineno
		body = parser.parse_statements(['name:endcleantweet'], drop_needle=True)

		return nodes.CallBlock(
		    self.call_method('_render_cleantweet'),
		    [], [], body).set_lineno(lineno)

	def _render_cleantweet(self, caller=None):
		"""
		Calls the cleantweet filter to transform the output.
		"""
		if not caller:
			return ''
		output = caller().strip()
		return do_gettweettext(self.environment, output)

@contextfilter
def rellinktoabs(context, value):
	env = context.environment

	# get the path for this context
	rel_path = context['resource'].relative_path
	content_url = env.globals['content_url'](context, rel_path)

	# Note that this could be parsed w/ fragment_fromstring
	# (https://lxml.de/3.1/api/private/lxml.html-module.html#fragment_fromstring)
	# But is would require using create_parent, and stripping that
	# instead, or fragments_fromstring, but iterating over those
	# to strings.  Not one is a solution to the problem.
	html = etree.HTML(value)

	# get all the fragment urls
	r = html.xpath("//a[@href[starts-with(.,'#')]]")

	for i in r:
		# prefix them w/ the content_url
		i.attrib['href'] = content_url + i.attrib['href']

	res = etree.tostring(html, encoding='unicode', method='html')

	# lxml.HTML wraps the html w/ html/body tags, strip them
	# if present

	startstr = '<html><body>'
	endstr = '</body></html>'

	startpos = 0
	endpos = None
	if res.startswith(startstr):
		startpos = len(startstr)
	if res.endswith(endstr):
		endpos = -len(endstr)

	res = res[startpos:endpos]

	return res

# mostly copied from hyde.ext.templates.jinja.py Markdown
# and using docs from:
# https://jinja.palletsprojects.com/en/2.10.x/extensions/#example-extension
# to get the filter installed

class RelLinktoAbs(Extension):
	"""
	A wrapper around the rellinktoabs filter for syntactic sugar.
	"""
	tags = { 'rellinktoabs' }

	def __init__(self, env):
		super(RelLinktoAbs, self).__init__(env)

		env.filters['rellinktoabs'] = rellinktoabs

	def parse(self, parser):
		"""
		Parses the statements and defers to the callback
		for rellinktoabs processing.
		"""

		lineno = next(parser.stream).lineno
		body = parser.parse_statements(['name:endrellinktoabs'], drop_needle=True)

		return nodes.CallBlock(
		    self.call_method('_render_rellinktoabs'),
		    [], [], body).set_lineno(lineno)

	def _render_rellinktoabs(self, caller=None):
		"""
		Calls the rellinktoabs filter to transform the output.
		"""
		if not caller:
			return ''
		output = caller().strip()
		return rellinktoabs(self.environment, output)

class Tests(unittest.TestCase):
	_fnameparser = re.compile('funcs_(.*)_[^_]+\\.json')

	def test_fixtures(self):
		for i in sorted(pathlib.Path('fixtures').glob(
		    'funcs_*_*.json')):
			with self.subTest(file=i):
				mat = self._fnameparser.match(i.name)
				self.assertIsNotNone(mat)

				with i.open() as fp:
					obj = json.load(fp)

				if obj['do_expando']:
					obj['args'][1] = hyde.model.Expando(
					    obj['args'][1])

				fun = globals()[mat.group(1)]

				self.assertEqual(fun(*obj['args']), obj['res'])

	def test_errors(self):
		# make sure we get a more useful error message
		with self.assertRaises(TypeError):
			do_gettweettext(None, 'foo')

	def test_basepath(self):
		v = 'https://foo/bar/baz'
		self.assertEqual(make_path(None, v), pathlib.PurePosixPath(v))

	def test_attr_escape(self):
		self.assertEqual(do_attr_escape(None, 'some & < > \n\n ""'), 'some &amp; &lt; &gt;    &quot;&quot;')