from jinja2 import contextfilter, contextfunction, environmentfilter from jinja2.ext import Extension import functools import json import pathlib import re import unittest import hyde.model from io import StringIO from lxml import etree #from commando.util import getLoggerWithNullHandler #from logging import DEBUG #logger = getLoggerWithNullHandler('hyde.server') #logger.setLevel(DEBUG) def tbwrapper(f): @functools.wraps(f) def wrapper(*args, **kwargs): try: return f(*args, **kwargs) except: import traceback traceback.print_exc() raise return wrapper @contextfunction @tbwrapper def do_get_tweet_thread(context, start, stop): #print('gtt:', repr(start), repr(stop)) nodes = [] nextnode = stop while True: #print('proc:', nextnode) curnode = context['site'].content.node_from_relative_path( 'twitter').get_resource('%d.yaml' % nextnode) #print('cn:', repr(curnode)) nodes.append(curnode) if nextnode == start: break nextnode = curnode.meta.in_reply_to_status_id return iter(reversed(nodes)) @contextfilter def do_attr_escape(context, value): #print('ae:', repr(value)) # jinja2 requires <, >, and new line to be escaped, which isn't # required per html spec: # https://html.spec.whatwg.org/multipage/syntax.html#attributes-2 return value.replace('&', '&').replace('"', '"').replace('<', '<').replace('>', '>').replace('\n', ' ') @contextfilter def make_path(context, value): #print('mp:', repr(value)) return pathlib.PurePosixPath(value) @contextfilter def do_gettweettext(context, value): if isinstance(value, str): raise TypeError('got string expected tweet object') #print('rd:', type(value), repr(value), type(value.entities)) if hasattr(value, 'extended_entities'): mediaindices = [ x.indices for x in value.extended_entities.media ] else: mediaindices = [ ] mentionsindices = [ x.indices for x in value.entities.user_mentions ] delindices = mediaindices + mentionsindices text = value.full_text for start, stop in reversed(sorted(delindices)): text = text[:start] + text[stop:] text = text.strip() return ''.join(('

%s

' % x for x in text.split('\n') if x)) class TweetExtension(Extension): """ A wrapper around the rellinktoabs filter for syntactic sugar. """ tags = { 'gettweettext' } def __init__(self, env): super(TweetExtension, self).__init__(env) env.filters['gettweettext'] = do_gettweettext env.filters['make_path'] = make_path env.filters['attr_escape'] = do_attr_escape env.globals['get_tweet_thread'] = do_get_tweet_thread def parse(self, parser): """ Parses the statements and defers to the callback for rellinktoabs processing. """ lineno = next(parser.stream).lineno body = parser.parse_statements(['name:endcleantweet'], drop_needle=True) return nodes.CallBlock( self.call_method('_render_cleantweet'), [], [], body).set_lineno(lineno) def _render_cleantweet(self, caller=None): """ Calls the cleantweet filter to transform the output. """ if not caller: return '' output = caller().strip() return do_gettweettext(self.environment, output) @contextfilter def rellinktoabs(context, value): env = context.environment # get the path for this context rel_path = context['resource'].relative_path content_url = env.globals['content_url'](context, rel_path) # Note that this could be parsed w/ fragment_fromstring # (https://lxml.de/3.1/api/private/lxml.html-module.html#fragment_fromstring) # But is would require using create_parent, and stripping that # instead, or fragments_fromstring, but iterating over those # to strings. Not one is a solution to the problem. html = etree.HTML(value) # get all the fragment urls r = html.xpath("//a[@href[starts-with(.,'#')]]") for i in r: # prefix them w/ the content_url i.attrib['href'] = content_url + i.attrib['href'] res = etree.tostring(html, encoding='unicode', method='html') # lxml.HTML wraps the html w/ html/body tags, strip them # if present startstr = '' endstr = '' startpos = 0 endpos = None if res.startswith(startstr): startpos = len(startstr) if res.endswith(endstr): endpos = -len(endstr) res = res[startpos:endpos] return res # mostly copied from hyde.ext.templates.jinja.py Markdown # and using docs from: # https://jinja.palletsprojects.com/en/2.10.x/extensions/#example-extension # to get the filter installed class RelLinktoAbs(Extension): """ A wrapper around the rellinktoabs filter for syntactic sugar. """ tags = { 'rellinktoabs' } def __init__(self, env): super(RelLinktoAbs, self).__init__(env) env.filters['rellinktoabs'] = rellinktoabs def parse(self, parser): """ Parses the statements and defers to the callback for rellinktoabs processing. """ lineno = next(parser.stream).lineno body = parser.parse_statements(['name:endrellinktoabs'], drop_needle=True) return nodes.CallBlock( self.call_method('_render_rellinktoabs'), [], [], body).set_lineno(lineno) def _render_rellinktoabs(self, caller=None): """ Calls the rellinktoabs filter to transform the output. """ if not caller: return '' output = caller().strip() return rellinktoabs(self.environment, output) class Tests(unittest.TestCase): _fnameparser = re.compile('funcs_(.*)_[^_]+\\.json') def test_fixtures(self): for i in sorted(pathlib.Path('fixtures').glob( 'funcs_*_*.json')): with self.subTest(file=i): mat = self._fnameparser.match(i.name) self.assertIsNotNone(mat) with i.open() as fp: obj = json.load(fp) if obj['do_expando']: obj['args'][1] = hyde.model.Expando( obj['args'][1]) fun = globals()[mat.group(1)] self.assertEqual(fun(*obj['args']), obj['res']) def test_errors(self): # make sure we get a more useful error message with self.assertRaises(TypeError): do_gettweettext(None, 'foo') def test_basepath(self): v = 'https://foo/bar/baz' self.assertEqual(make_path(None, v), pathlib.PurePosixPath(v)) def test_attr_escape(self): self.assertEqual(do_attr_escape(None, 'some & < > \n\n ""'), 'some & < > ""')