4ChanImgDl

From XPUB & Lens-Based wiki
Revision as of 20:31, 23 September 2010 by Migratebot (talk | contribs) (Created page with "= Download all images from a 4Chan thread = <source lang="python"> #!/usr/bin/python import time, os, re import urllib, urllib2 # What do we want to get threadurl = "htt...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

Download all images from a 4Chan thread

#!/usr/bin/python
import time, os, re
import urllib, urllib2

# What do we want to get
threadurl = "http://boards.4chan.org/tv/res/7240846"
# Who are we?
header = {"User-agent": "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.7) Gecko/20100106 Ubuntu/9.10 (karmic) Firefox/3.5.7"}

# Make the request and store the result
request = urllib2.Request(url=threadurl, headers=header)
response = urllib2.urlopen(request)

imgthread = response.read()

# find all the image URLs
imgurl = re.compile("http://\w+\.4chan\.org/\w+/src/\d+\.(?:jpg|jpeg|png|gif|svg)")
imgurls = imgurl.findall(imgthread)

# We need a folder to store the images
targetfolder = threadurl.split('/')[-1]
if not os.path.isdir(targetfolder):
    os.mkdir(targetfolder)

# We read through the list of Image URLs and download them in the directory
for url in list(set(imgurls)):
    filename = url.split('/')[-1]
    print "downloading " + filename
    urllib.urlretrieve(str(url), targetfolder + "/" + filename)
    time.sleep(0.25)