Browse Source

Rework ImageSizerPlugin to not use beautiful soup but an adhoc parser.

BeautifulSoup is not compatible with HTML 5 and may mangle your
document. We provide a simple parser that should work in most cases.
main
Vincent Bernat 13 years ago
parent
commit
0b9a848e96
2 changed files with 138 additions and 43 deletions
  1. +98
    -43
      hyde/ext/plugins/images.py
  2. +40
    -0
      hyde/tests/ext/test_images.py

+ 98
- 43
hyde/ext/plugins/images.py View File

@@ -7,7 +7,6 @@ from hyde.plugin import Plugin


import re import re
import Image import Image
from BeautifulSoup import BeautifulSoup


class ImageSizerPlugin(Plugin): class ImageSizerPlugin(Plugin):
""" """
@@ -19,10 +18,58 @@ class ImageSizerPlugin(Plugin):
super(ImageSizerPlugin, self).__init__(site) super(ImageSizerPlugin, self).__init__(site)
self.cache = {} self.cache = {}


def _handle_img(self, resource, src, width, height):
"""Determine what should be added to an img tag"""
if height is not None and width is not None:
return "" # Nothing
if src is None:
self.logger.warn("[%s] has an img tag without src attribute" % resource)
return "" # Nothing
if src not in self.cache:
if not re.match(r"(/[^/]|[^/]).*", src):
# Not a local link
return "" # Nothing
if src.startswith("/"):
# Absolute resource
path = src.lstrip("/")
image = self.site.content.resource_from_relative_deploy_path(path)
else:
# Relative resource
path = resource.node.source_folder.child(src)
image = self.site.content.resource_from_path(path)
if image is None:
self.logger.warn(
"[%s] has an unknown image" % resource)
return "" # Nothing
if image.source_file.kind not in ['png', 'jpg', 'jpeg', 'gif']:
self.logger.warn(
"[%s] has an img tag not linking to an image" % resource)
return "" # Nothing
# Now, get the size of the image
try:
self.cache[src] = Image.open(image.path).size
except IOError:
self.logger.warn(
"Unable to process image [%s]" % image)
self.cache[src] = (None, None)
return "" # Nothing
self.logger.debug("Image [%s] is %s" % (src,
self.cache[src]))
new_width, new_height = self.cache[src]
if new_width is None or new_height is None:
return "" # Nothing
if width is not None:
return 'height="%d" ' % (int(width)*new_height/new_width)
elif height is not None:
return 'width="%d" ' % (int(height)*new_width/new_height)
return 'height="%d" width="%d" ' % (new_height, new_width)

def text_resource_complete(self, resource, text): def text_resource_complete(self, resource, text):
""" """
When the resource is generated, search for img tag and specify When the resource is generated, search for img tag and specify
their sizes. their sizes.

Some img tags may be missed, this is not a perfect parser.
""" """
try: try:
mode = self.site.config.mode mode = self.site.config.mode
@@ -36,51 +83,59 @@ class ImageSizerPlugin(Plugin):
self.logger.debug("Skipping sizer in development mode.") self.logger.debug("Skipping sizer in development mode.")
return return


soup = BeautifulSoup(text)
for img in soup.findAll('img'):
if img.has_key('width') and img.has_key('height'):
continue
if not img.has_key('src'):
self.logger.warn("[%s] has an img tag without src attribute" % resource)
pos = 0 # Position in text
img = None # Position of current img tag
state = "find-img"
while pos < len(text):
if state == "find-img":
img = text.find("<img", pos)
if img == -1:
break # No more img tag
pos = img + len("<img")
if not text[pos].isspace():
continue # Not an img tag
pos = pos + 1
tags = {"src": "",
"width": "",
"height": ""}
state = "find-attr"
continue continue
if not img['src'] in self.cache:
if not re.match(r"(/[^/]|[^/]).*", img['src']):
# Not a local link
if state == "find-attr":
if text[pos] == ">":
# We get our img tag
insert = self._handle_img(resource,
tags["src"] or None,
tags["width"] or None,
tags["height"] or None)
img = img + len("<img ")
text = "".join([text[:img], insert, text[img:]])
state = "find-img"
pos = pos + 1
continue continue
if img['src'].startswith("/"):
# Absolute resource
path = img['src'].lstrip("/")
image = self.site.content.resource_from_relative_deploy_path(path)
else:
# Relative resource
path = resource.node.source_folder.child(img['src'])
image = self.site.content.resource_from_path(path)
if image is None:
self.logger.warn(
"[%s] has an unknown image" % resource)
attr = None
for tag in tags:
if text[pos:(pos+len(tag)+1)] == ("%s=" % tag):
attr = tag
pos = pos + len(tag) + 1
break
if not attr:
pos = pos + 1
continue continue
if image.source_file.kind not in ['png', 'jpg', 'jpeg', 'gif']:
self.logger.warn(
"[%s] has an img tag not linking to an image" % resource)
if text[pos] in ["'", '"']:
pos = pos + 1
state = "get-value"
continue
if state == "get-value":
if text[pos] == ">":
state = "find-attr"
continue continue
# Now, get the size of the image
try:
self.cache[img['src']] = Image.open(image.path).size
except IOError:
self.logger.warn(
"Unable to process image [%s]" % image)
self.cache[img['src']] = (None, None)
if text[pos] in ["'", '"'] or text[pos].isspace():
# We got our value
pos = pos + 1
state = "find-attr"
continue continue
self.logger.debug("Image [%s] is %s" % (img['src'],
self.cache[img['src']]))
width, height = self.cache[img['src']]
if width is None:
tags[attr] = tags[attr] + text[pos]
pos = pos + 1
continue continue
if img.has_key('width'):
height = int(img['width'])*height/width
width = int(img['width'])
elif img.has_key('height'):
width = int(img['height'])*width/height
height = int(img['height'])
img['height'], img['width'] = height, width
return unicode(soup)

return text

+ 40
- 0
hyde/tests/ext/test_images.py View File

@@ -81,4 +81,44 @@ class TestImageSizer(object):
""" """
html = self._generic_test_image(text) html = self._generic_test_image(text)


def test_size_image_multiline(self):
text = u"""
<img
src="/media/img/%s"
>
""" % IMAGE_NAME
html = self._generic_test_image(text)
assert ' width="%d"' % IMAGE_SIZE[0] in html
assert ' height="%d"' % IMAGE_SIZE[1] in html

def test_size_multiple_images(self):
text = u"""
<img src="/media/img/%s">
<img src="/media/img/%s">Hello <img src="/media/img/%s">
<img src="/media/img/%s">Bye
""" % ((IMAGE_NAME,)*4)
html = self._generic_test_image(text)
assert ' width="%d"' % IMAGE_SIZE[0] in html
assert ' height="%d"' % IMAGE_SIZE[1] in html
assert 'Hello ' in html
assert 'Bye' in html
assert len([f for f in html.split("<img")
if ' width=' in f]) == 4
assert len([f for f in html.split("<img")
if ' height=' in f]) == 4


def test_size_malformed1(self):
text = u"""
<img src="/media/img/%s>
""" % IMAGE_NAME
html = self._generic_test_image(text)
assert ' width="%d"' % IMAGE_SIZE[0] in html
assert ' height="%d"' % IMAGE_SIZE[1] in html

def test_size_malformed2(self):
text = u"""
<img src="/media/img/%s alt="hello">
""" % IMAGE_NAME
html = self._generic_test_image(text)
assert ' width="%d"' % IMAGE_SIZE[0] in html
assert ' height="%d"' % IMAGE_SIZE[1] in html

Loading…
Cancel
Save