Browse Source

Rework ImageSizerPlugin to not use beautiful soup but an adhoc parser.

BeautifulSoup is not compatible with HTML 5 and may mangle your
document. We provide a simple parser that should work in most cases.
main
Vincent Bernat 14 years ago
parent
commit
0b9a848e96
2 changed files with 138 additions and 43 deletions
  1. +98
    -43
      hyde/ext/plugins/images.py
  2. +40
    -0
      hyde/tests/ext/test_images.py

+ 98
- 43
hyde/ext/plugins/images.py View File

@@ -7,7 +7,6 @@ from hyde.plugin import Plugin

import re
import Image
from BeautifulSoup import BeautifulSoup

class ImageSizerPlugin(Plugin):
"""
@@ -19,10 +18,58 @@ class ImageSizerPlugin(Plugin):
super(ImageSizerPlugin, self).__init__(site)
self.cache = {}

def _handle_img(self, resource, src, width, height):
"""Determine what should be added to an img tag"""
if height is not None and width is not None:
return "" # Nothing
if src is None:
self.logger.warn("[%s] has an img tag without src attribute" % resource)
return "" # Nothing
if src not in self.cache:
if not re.match(r"(/[^/]|[^/]).*", src):
# Not a local link
return "" # Nothing
if src.startswith("/"):
# Absolute resource
path = src.lstrip("/")
image = self.site.content.resource_from_relative_deploy_path(path)
else:
# Relative resource
path = resource.node.source_folder.child(src)
image = self.site.content.resource_from_path(path)
if image is None:
self.logger.warn(
"[%s] has an unknown image" % resource)
return "" # Nothing
if image.source_file.kind not in ['png', 'jpg', 'jpeg', 'gif']:
self.logger.warn(
"[%s] has an img tag not linking to an image" % resource)
return "" # Nothing
# Now, get the size of the image
try:
self.cache[src] = Image.open(image.path).size
except IOError:
self.logger.warn(
"Unable to process image [%s]" % image)
self.cache[src] = (None, None)
return "" # Nothing
self.logger.debug("Image [%s] is %s" % (src,
self.cache[src]))
new_width, new_height = self.cache[src]
if new_width is None or new_height is None:
return "" # Nothing
if width is not None:
return 'height="%d" ' % (int(width)*new_height/new_width)
elif height is not None:
return 'width="%d" ' % (int(height)*new_width/new_height)
return 'height="%d" width="%d" ' % (new_height, new_width)

def text_resource_complete(self, resource, text):
"""
When the resource is generated, search for img tag and specify
their sizes.

Some img tags may be missed, this is not a perfect parser.
"""
try:
mode = self.site.config.mode
@@ -36,51 +83,59 @@ class ImageSizerPlugin(Plugin):
self.logger.debug("Skipping sizer in development mode.")
return

soup = BeautifulSoup(text)
for img in soup.findAll('img'):
if img.has_key('width') and img.has_key('height'):
continue
if not img.has_key('src'):
self.logger.warn("[%s] has an img tag without src attribute" % resource)
pos = 0 # Position in text
img = None # Position of current img tag
state = "find-img"
while pos < len(text):
if state == "find-img":
img = text.find("<img", pos)
if img == -1:
break # No more img tag
pos = img + len("<img")
if not text[pos].isspace():
continue # Not an img tag
pos = pos + 1
tags = {"src": "",
"width": "",
"height": ""}
state = "find-attr"
continue
if not img['src'] in self.cache:
if not re.match(r"(/[^/]|[^/]).*", img['src']):
# Not a local link
if state == "find-attr":
if text[pos] == ">":
# We get our img tag
insert = self._handle_img(resource,
tags["src"] or None,
tags["width"] or None,
tags["height"] or None)
img = img + len("<img ")
text = "".join([text[:img], insert, text[img:]])
state = "find-img"
pos = pos + 1
continue
if img['src'].startswith("/"):
# Absolute resource
path = img['src'].lstrip("/")
image = self.site.content.resource_from_relative_deploy_path(path)
else:
# Relative resource
path = resource.node.source_folder.child(img['src'])
image = self.site.content.resource_from_path(path)
if image is None:
self.logger.warn(
"[%s] has an unknown image" % resource)
attr = None
for tag in tags:
if text[pos:(pos+len(tag)+1)] == ("%s=" % tag):
attr = tag
pos = pos + len(tag) + 1
break
if not attr:
pos = pos + 1
continue
if image.source_file.kind not in ['png', 'jpg', 'jpeg', 'gif']:
self.logger.warn(
"[%s] has an img tag not linking to an image" % resource)
if text[pos] in ["'", '"']:
pos = pos + 1
state = "get-value"
continue
if state == "get-value":
if text[pos] == ">":
state = "find-attr"
continue
# Now, get the size of the image
try:
self.cache[img['src']] = Image.open(image.path).size
except IOError:
self.logger.warn(
"Unable to process image [%s]" % image)
self.cache[img['src']] = (None, None)
if text[pos] in ["'", '"'] or text[pos].isspace():
# We got our value
pos = pos + 1
state = "find-attr"
continue
self.logger.debug("Image [%s] is %s" % (img['src'],
self.cache[img['src']]))
width, height = self.cache[img['src']]
if width is None:
tags[attr] = tags[attr] + text[pos]
pos = pos + 1
continue
if img.has_key('width'):
height = int(img['width'])*height/width
width = int(img['width'])
elif img.has_key('height'):
width = int(img['height'])*width/height
height = int(img['height'])
img['height'], img['width'] = height, width
return unicode(soup)

return text

+ 40
- 0
hyde/tests/ext/test_images.py View File

@@ -81,4 +81,44 @@ class TestImageSizer(object):
"""
html = self._generic_test_image(text)

def test_size_image_multiline(self):
text = u"""
<img
src="/media/img/%s"
>
""" % IMAGE_NAME
html = self._generic_test_image(text)
assert ' width="%d"' % IMAGE_SIZE[0] in html
assert ' height="%d"' % IMAGE_SIZE[1] in html

def test_size_multiple_images(self):
text = u"""
<img src="/media/img/%s">
<img src="/media/img/%s">Hello <img src="/media/img/%s">
<img src="/media/img/%s">Bye
""" % ((IMAGE_NAME,)*4)
html = self._generic_test_image(text)
assert ' width="%d"' % IMAGE_SIZE[0] in html
assert ' height="%d"' % IMAGE_SIZE[1] in html
assert 'Hello ' in html
assert 'Bye' in html
assert len([f for f in html.split("<img")
if ' width=' in f]) == 4
assert len([f for f in html.split("<img")
if ' height=' in f]) == 4

def test_size_malformed1(self):
text = u"""
<img src="/media/img/%s>
""" % IMAGE_NAME
html = self._generic_test_image(text)
assert ' width="%d"' % IMAGE_SIZE[0] in html
assert ' height="%d"' % IMAGE_SIZE[1] in html

def test_size_malformed2(self):
text = u"""
<img src="/media/img/%s alt="hello">
""" % IMAGE_NAME
html = self._generic_test_image(text)
assert ' width="%d"' % IMAGE_SIZE[0] in html
assert ' height="%d"' % IMAGE_SIZE[1] in html

||||||
x
 
000:0
Loading…
Cancel
Save