From fb968685a9b1328e0a9ba46e7d4a032b600e3b17 Mon Sep 17 00:00:00 2001
From: Vincent Bernat <bernat@luffy.cx>
Date: Sun, 8 May 2011 12:56:57 +0200
Subject: [PATCH 1/2] Image sizer plugin: add image dimensions in HTML code

This plugin adds `width` and `height` tags to `img` when they are not
present. It also adds them when only one of them is present
(respecting proportions).
---
 hyde/ext/plugins/images.py    | 86 +++++++++++++++++++++++++++++++++++
 hyde/tests/ext/test_images.py | 84 ++++++++++++++++++++++++++++++++++
 2 files changed, 170 insertions(+)
 create mode 100644 hyde/ext/plugins/images.py
 create mode 100644 hyde/tests/ext/test_images.py

diff --git a/hyde/ext/plugins/images.py b/hyde/ext/plugins/images.py
new file mode 100644
index 0000000..86ad7f8
--- /dev/null
+++ b/hyde/ext/plugins/images.py
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+"""
+Contains classes to handle images related things
+"""
+
+from hyde.plugin import Plugin
+
+import re
+import Image
+from BeautifulSoup import BeautifulSoup
+
+class ImageSizerPlugin(Plugin):
+    """
+    Each HTML page is modified to add width and height for images if
+    they are not already specified.
+    """
+
+    def __init__(self, site):
+        super(ImageSizerPlugin, self).__init__(site)
+        self.cache = {}
+
+    def text_resource_complete(self, resource, text):
+        """
+        When the resource is generated, search for img tag and specify
+        their sizes.
+        """
+        try:
+            mode = self.site.config.mode
+        except AttributeError:
+            mode = "production"
+
+        if not resource.source_file.kind == 'html':
+            return
+
+        if mode.startswith('dev'):
+            self.logger.debug("Skipping sizer in development mode.")
+            return
+
+        soup = BeautifulSoup(text)
+        for img in soup.findAll('img'):
+            if img.has_key('width') and img.has_key('height'):
+                continue
+            if not img.has_key('src'):
+                self.logger.warn("[%s] has an img tag without src attribute" % resource)
+                continue
+            if not img['src'] in self.cache:
+                if not re.match(r"(/[^/]|[^/]).*", img['src']):
+                    # Not a local link
+                    continue
+                if img['src'].startswith("/"):
+                    # Absolute resource
+                    path = img['src'].lstrip("/")
+                    image = self.site.content.resource_from_relative_deploy_path(path)
+                else:
+                    # Relative resource
+                    path = resource.node.source_folder.child(img['src'])
+                    image = self.site.content.resource_from_path(path)
+                if image is None:
+                    self.logger.warn(
+                        "[%s] has an unknown image" % resource)
+                    continue
+                if image.source_file.kind not in ['png', 'jpg', 'jpeg', 'gif']:
+                    self.logger.warn(
+                        "[%s] has an img tag not linking to an image" % resource)
+                    continue
+                # Now, get the size of the image
+                try:
+                    self.cache[img['src']] = Image.open(image.path).size
+                except IOError:
+                    self.logger.warn(
+                        "Unable to process image [%s]" % image)
+                    self.cache[img['src']] = (None, None)
+                    continue
+                self.logger.debug("Image [%s] is %s" % (img['src'],
+                                                        self.cache[img['src']]))
+            width, height = self.cache[img['src']]
+            if width is None:
+                continue
+            if img.has_key('width'):
+                height = int(img['width'])*height/width
+                width = int(img['width'])
+            elif img.has_key('height'):
+                width = int(img['height'])*width/height
+                height = int(img['height'])
+            img['height'], img['width'] = height, width
+        return unicode(soup)
diff --git a/hyde/tests/ext/test_images.py b/hyde/tests/ext/test_images.py
new file mode 100644
index 0000000..08f9c3b
--- /dev/null
+++ b/hyde/tests/ext/test_images.py
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+"""
+Use nose
+`$ pip install nose`
+`$ nosetests`
+"""
+from hyde.fs import File, Folder
+from hyde.generator import Generator
+from hyde.site import Site
+
+from pyquery import PyQuery
+
+TEST_SITE = File(__file__).parent.parent.child_folder('_test')
+IMAGE_SOURCE = File(__file__).parent.child_folder('optipng')
+IMAGE_NAME = "hyde-lt-b.png"
+IMAGE_SIZE = (538, 132)
+
+class TestImageSizer(object):
+
+    def setUp(self):
+        TEST_SITE.make()
+        TEST_SITE.parent.child_folder(
+                    'sites/test_jinja').copy_contents_to(TEST_SITE)
+        IMAGES = TEST_SITE.child_folder('content/media/img')
+        IMAGES.make()
+        IMAGE_SOURCE.copy_contents_to(IMAGES)
+
+    def tearDown(self):
+        TEST_SITE.delete()
+
+    def _generic_test_image(self, text):
+        site = Site(TEST_SITE)
+        site.config.mode = "production"
+        site.config.plugins = ['hyde.ext.plugins.images.ImageSizerPlugin']
+        tlink = File(site.content.source_folder.child('timg.html'))
+        tlink.write(text)
+        gen = Generator(site)
+        gen.generate_all()
+        f = File(site.config.deploy_root_path.child(tlink.name))
+        assert f.exists
+        html = f.read_all()
+        assert html
+        print html
+        return html
+
+    def test_size_image(self):
+        text = u"""
+<img src="/media/img/%s">
+""" % IMAGE_NAME
+        html = self._generic_test_image(text)
+        assert ' width="%d"' % IMAGE_SIZE[0] in html
+        assert ' height="%d"' % IMAGE_SIZE[1] in html
+
+    def test_size_image_relative(self):
+        text = u"""
+<img src="media/img/%s">
+""" % IMAGE_NAME
+        html = self._generic_test_image(text)
+        assert ' width="%d"' % IMAGE_SIZE[0] in html
+        assert ' height="%d"' % IMAGE_SIZE[1] in html
+
+    def test_size_image_no_resize(self):
+        text = u"""
+<img src="/media/img/%s" width="2000" height="150">
+""" % IMAGE_NAME
+        html = self._generic_test_image(text)
+        assert ' width="%d"' % IMAGE_SIZE[0] not in html
+        assert ' height="%d"' % IMAGE_SIZE[1] not in html
+
+    def test_size_image_size_proportional(self):
+        text = u"""
+<img src="/media/img/%s" width="%d">
+""" % (IMAGE_NAME,  IMAGE_SIZE[0]*2)
+        html = self._generic_test_image(text)
+        assert ' width="%d"' % (IMAGE_SIZE[0]*2) in html
+        assert ' height="%d"' % (IMAGE_SIZE[1]*2) in html
+
+    def test_size_image_not_exists(self):
+        text = u"""
+<img src="/media/img/hyde-logo-no.png">
+"""
+        html = self._generic_test_image(text)
+
+

From 0b9a848e9619ce15dc89484ca0cb6374d3932853 Mon Sep 17 00:00:00 2001
From: Vincent Bernat <bernat@luffy.cx>
Date: Sun, 8 May 2011 15:09:31 +0200
Subject: [PATCH 2/2] Rework ImageSizerPlugin to not use beautiful soup but an
 adhoc parser.

BeautifulSoup is not compatible with HTML 5 and may mangle your
document. We provide a simple parser that should work in most cases.
---
 hyde/ext/plugins/images.py    | 141 +++++++++++++++++++++++-----------
 hyde/tests/ext/test_images.py |  40 ++++++++++
 2 files changed, 138 insertions(+), 43 deletions(-)

diff --git a/hyde/ext/plugins/images.py b/hyde/ext/plugins/images.py
index 86ad7f8..e47d5e7 100644
--- a/hyde/ext/plugins/images.py
+++ b/hyde/ext/plugins/images.py
@@ -7,7 +7,6 @@ from hyde.plugin import Plugin
 
 import re
 import Image
-from BeautifulSoup import BeautifulSoup
 
 class ImageSizerPlugin(Plugin):
     """
@@ -19,10 +18,58 @@ class ImageSizerPlugin(Plugin):
         super(ImageSizerPlugin, self).__init__(site)
         self.cache = {}
 
+    def _handle_img(self, resource, src, width, height):
+        """Determine what should be added to an img tag"""
+        if height is not None and width is not None:
+            return ""           # Nothing
+        if src is None:
+            self.logger.warn("[%s] has an img tag without src attribute" % resource)
+            return ""           # Nothing
+        if src not in self.cache:
+            if not re.match(r"(/[^/]|[^/]).*", src):
+                # Not a local link
+                return ""       # Nothing
+            if src.startswith("/"):
+                # Absolute resource
+                path = src.lstrip("/")
+                image = self.site.content.resource_from_relative_deploy_path(path)
+            else:
+                # Relative resource
+                path = resource.node.source_folder.child(src)
+                image = self.site.content.resource_from_path(path)
+            if image is None:
+                self.logger.warn(
+                    "[%s] has an unknown image" % resource)
+                return ""       # Nothing
+            if image.source_file.kind not in ['png', 'jpg', 'jpeg', 'gif']:
+                self.logger.warn(
+                        "[%s] has an img tag not linking to an image" % resource)
+                return ""       # Nothing
+            # Now, get the size of the image
+            try:
+                self.cache[src] = Image.open(image.path).size
+            except IOError:
+                self.logger.warn(
+                    "Unable to process image [%s]" % image)
+                self.cache[src] = (None, None)
+                return ""       # Nothing
+            self.logger.debug("Image [%s] is %s" % (src,
+                                                    self.cache[src]))
+        new_width, new_height = self.cache[src]
+        if new_width is None or new_height is None:
+            return ""           # Nothing
+        if width is not None:
+            return 'height="%d" ' % (int(width)*new_height/new_width)
+        elif height is not None:
+            return 'width="%d" ' % (int(height)*new_width/new_height)
+        return 'height="%d" width="%d" ' % (new_height, new_width)
+
     def text_resource_complete(self, resource, text):
         """
         When the resource is generated, search for img tag and specify
         their sizes.
+
+        Some img tags may be missed, this is not a perfect parser.
         """
         try:
             mode = self.site.config.mode
@@ -36,51 +83,59 @@ class ImageSizerPlugin(Plugin):
             self.logger.debug("Skipping sizer in development mode.")
             return
 
-        soup = BeautifulSoup(text)
-        for img in soup.findAll('img'):
-            if img.has_key('width') and img.has_key('height'):
-                continue
-            if not img.has_key('src'):
-                self.logger.warn("[%s] has an img tag without src attribute" % resource)
+        pos = 0                 # Position in text
+        img = None              # Position of current img tag
+        state = "find-img"
+        while pos < len(text):
+            if state == "find-img":
+                img = text.find("<img", pos)
+                if img == -1:
+                    break           # No more img tag
+                pos = img + len("<img")
+                if not text[pos].isspace():
+                    continue        # Not an img tag
+                pos = pos + 1
+                tags = {"src": "",
+                        "width": "",
+                        "height": ""}
+                state = "find-attr"
                 continue
-            if not img['src'] in self.cache:
-                if not re.match(r"(/[^/]|[^/]).*", img['src']):
-                    # Not a local link
+            if state == "find-attr":
+                if text[pos] == ">":
+                    # We get our img tag
+                    insert = self._handle_img(resource,
+                                              tags["src"] or None,
+                                              tags["width"] or None,
+                                              tags["height"] or None)
+                    img = img + len("<img ")
+                    text = "".join([text[:img], insert, text[img:]])
+                    state = "find-img"
+                    pos = pos + 1
                     continue
-                if img['src'].startswith("/"):
-                    # Absolute resource
-                    path = img['src'].lstrip("/")
-                    image = self.site.content.resource_from_relative_deploy_path(path)
-                else:
-                    # Relative resource
-                    path = resource.node.source_folder.child(img['src'])
-                    image = self.site.content.resource_from_path(path)
-                if image is None:
-                    self.logger.warn(
-                        "[%s] has an unknown image" % resource)
+                attr = None
+                for tag in tags:
+                    if text[pos:(pos+len(tag)+1)] == ("%s=" % tag):
+                        attr = tag
+                        pos = pos + len(tag) + 1
+                        break
+                if not attr:
+                    pos = pos + 1
                     continue
-                if image.source_file.kind not in ['png', 'jpg', 'jpeg', 'gif']:
-                    self.logger.warn(
-                        "[%s] has an img tag not linking to an image" % resource)
+                if text[pos] in ["'", '"']:
+                    pos = pos + 1
+                state = "get-value"
+                continue
+            if state == "get-value":
+                if text[pos] == ">":
+                    state = "find-attr"
                     continue
-                # Now, get the size of the image
-                try:
-                    self.cache[img['src']] = Image.open(image.path).size
-                except IOError:
-                    self.logger.warn(
-                        "Unable to process image [%s]" % image)
-                    self.cache[img['src']] = (None, None)
+                if text[pos] in ["'", '"'] or text[pos].isspace():
+                    # We got our value
+                    pos = pos + 1
+                    state = "find-attr"
                     continue
-                self.logger.debug("Image [%s] is %s" % (img['src'],
-                                                        self.cache[img['src']]))
-            width, height = self.cache[img['src']]
-            if width is None:
+                tags[attr] = tags[attr] + text[pos]
+                pos = pos + 1
                 continue
-            if img.has_key('width'):
-                height = int(img['width'])*height/width
-                width = int(img['width'])
-            elif img.has_key('height'):
-                width = int(img['height'])*width/height
-                height = int(img['height'])
-            img['height'], img['width'] = height, width
-        return unicode(soup)
+
+        return text
diff --git a/hyde/tests/ext/test_images.py b/hyde/tests/ext/test_images.py
index 08f9c3b..7586f6c 100644
--- a/hyde/tests/ext/test_images.py
+++ b/hyde/tests/ext/test_images.py
@@ -81,4 +81,44 @@ class TestImageSizer(object):
 """
         html = self._generic_test_image(text)
 
+    def test_size_image_multiline(self):
+        text = u"""
+     <img 
+src="/media/img/%s"
+>
+""" % IMAGE_NAME
+        html = self._generic_test_image(text)
+        assert ' width="%d"' % IMAGE_SIZE[0] in html
+        assert ' height="%d"' % IMAGE_SIZE[1] in html
+
+    def test_size_multiple_images(self):
+        text = u"""
+<img src="/media/img/%s">
+<img src="/media/img/%s">Hello <img src="/media/img/%s">
+<img src="/media/img/%s">Bye
+""" % ((IMAGE_NAME,)*4)
+        html = self._generic_test_image(text)
+        assert ' width="%d"' % IMAGE_SIZE[0] in html
+        assert ' height="%d"' % IMAGE_SIZE[1] in html
+        assert 'Hello ' in html
+        assert 'Bye' in html
+        assert len([f for f in html.split("<img")
+                    if ' width=' in f]) == 4
+        assert len([f for f in html.split("<img")
+                    if ' height=' in f]) == 4
 
+    def test_size_malformed1(self):
+        text = u"""
+<img src="/media/img/%s>
+""" % IMAGE_NAME
+        html = self._generic_test_image(text)
+        assert ' width="%d"' % IMAGE_SIZE[0] in html
+        assert ' height="%d"' % IMAGE_SIZE[1] in html
+
+    def test_size_malformed2(self):
+        text = u"""
+<img src="/media/img/%s alt="hello">
+""" % IMAGE_NAME
+        html = self._generic_test_image(text)
+        assert ' width="%d"' % IMAGE_SIZE[0] in html
+        assert ' height="%d"' % IMAGE_SIZE[1] in html