User:Birgit Bachler/ass02
Epub Assignment 2: 4chan picture-book + embedded fonts: here
It combines a feed from Popsugar with a 4chan image channel and uses the Teenage Girl.ttf:
import time, os, re
import urllib, urllib2
# What do we want to get
threadurl = ""
# Who are we?
header = {"User-agent": "Mozilla/5.0 (X11; U; Linux i686; en-US; rv: Gecko/20100106 Ubuntu/9.10 (karmic) Firefox/3.5.7"}
# Make the request and store the result
request = urllib2.Request(url=threadurl, headers=header)
response = urllib2.urlopen(request)
imgthread =
# find all the image URLs
imgurl = re.compile("http://\w+\.4chan\.org/\w+/src/\d+\.(?:jpg|jpeg|png|gif|svg)")
imgurls = imgurl.findall(imgthread)
# We need a folder to store the images
targetfolder = "images"
if not os.path.isdir(targetfolder):
# We read through the list of Image URLs and download them in the directory
for url in list(set(imgurls)):
filename = url.split('/')[-1]
print "downloading " + filename
urllib.urlretrieve(str(url), targetfolder + "/" + filename)
import feedparser
sugar = feedparser.parse("")
import shutil
# Remove previous epub files
if os.path.isdir("/tmp/epub"):
# Copy ePub skeleton
shutil.copytree("epub-raw-files", "/tmp/epub")
# Copy images
for image in os.listdir("images"):
shutil.copyfile("images/" + image, "/tmp/epub/OEBPS/images/" + image)
# print image
### Update the container's file listing
# Create item list
# Create item list
imageitems = ""
for image in os.listdir("/tmp/epub/OEBPS/images/"):
if image != "cover.png":
imageitems += '<item id="' + image + '" href="images/' + image +'" media-type="image/' + image.split('.')[-1] + '"/>'
contentitems = ""
for (i, image) in zip(sugar.entries, os.listdir("/tmp/epub/OEBPS/images/")):
if image != "cover.png":
contentitems += '<p><h2>' + i["title"].replace("&", "&").encode("utf-8")+'</h2><img src ="images/' + image + '"/></p>'
# Add images to listing
opf = open("/tmp/epub/OEBPS/content.opf", "w")
content = """<?xml version='1.0' encoding='utf-8'?>
<package xmlns=""
unique-identifier="bookid" version="2.0">
<dc:title>Here we are</dc:title>
<dc:identifier id="bookid">urn:uuid:12345</dc:identifier>
<meta name="cover" content="cover-image" />
<item id="ncx" href="toc.ncx" media-type="text/xml"/>
<item id="cover" href="title.html" media-type="application/xhtml+xml"/>
<item id="content" href="content.html" media-type="application/xhtml+xml"/>
<item id="cover-image" href="images/cover.png" media-type="image/png"/>
<item id="css" href="stylesheet.css" media-type="text/css"/>
<item id="myfont" href="teenage.ttf" media-type="application/x-font-truetype"/>
""" + imageitems + """
<spine toc="ncx">
<itemref idref="cover" linear="no"/>
<itemref idref="content"/>
<reference href="title.html" type="cover" title="Cover"/>
### Place the images in the book
# Create item list
# Add images in main html file
html = open("/tmp/epub/OEBPS/content.html", "w")
content = """<html xmlns="">
<title>Here we go:</title>
<link type="text/css" rel="stylesheet" media="all" href="stylesheet.css" />
<h1>What's on today:</h1>
<div style="text-align:center">""" + contentitems + """</div>
print "done - container ready"
system "cd /tmp/epub"
Epub Assignment 1: ecub-book out of essay here
Epub Hacking Session 1 outcome here