4ChanImgDl

From XPUB & Lens-Based wiki
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

Download all images from a 4Chan thread

#!/usr/bin/python
import time, os, re
import urllib, urllib2

# What do we want to get
threadurl = "http://boards.4chan.org/tv/res/7240846"
# Who are we?
header = {"User-agent": "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.7) Gecko/20100106 Ubuntu/9.10 (karmic) Firefox/3.5.7"}

# Make the request and store the result
request = urllib2.Request(url=threadurl, headers=header)
response = urllib2.urlopen(request)

imgthread = response.read()

# find all the image URLs
imgurl = re.compile("http://\w+\.4chan\.org/\w+/src/\d+\.(?:jpg|jpeg|png|gif|svg)")
imgurls = imgurl.findall(imgthread)

# We need a folder to store the images
targetfolder = threadurl.split('/')[-1]
if not os.path.isdir(targetfolder):
    os.mkdir(targetfolder)

# We read through the list of Image URLs and download them in the directory
for url in list(set(imgurls)):
    filename = url.split('/')[-1]
    print "downloading " + filename
    urllib.urlretrieve(str(url), targetfolder + "/" + filename)
    time.sleep(0.25)