4ChanImgDl
Revision as of 20:31, 23 September 2010 by Migratebot (talk | contribs) (Created page with "= Download all images from a 4Chan thread =
<source lang="python"> #!/usr/bin/python
import time, os, re
import urllib, urllib2
# What do we want to get
threadurl = "htt...")
Download all images from a 4Chan thread
#!/usr/bin/python
import time, os, re
import urllib, urllib2
# What do we want to get
threadurl = "http://boards.4chan.org/tv/res/7240846"
# Who are we?
header = {"User-agent": "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.1.7) Gecko/20100106 Ubuntu/9.10 (karmic) Firefox/3.5.7"}
# Make the request and store the result
request = urllib2.Request(url=threadurl, headers=header)
response = urllib2.urlopen(request)
imgthread = response.read()
# find all the image URLs
imgurl = re.compile("http://\w+\.4chan\.org/\w+/src/\d+\.(?:jpg|jpeg|png|gif|svg)")
imgurls = imgurl.findall(imgthread)
# We need a folder to store the images
targetfolder = threadurl.split('/')[-1]
if not os.path.isdir(targetfolder):
os.mkdir(targetfolder)
# We read through the list of Image URLs and download them in the directory
for url in list(set(imgurls)):
filename = url.split('/')[-1]
print "downloading " + filename
urllib.urlretrieve(str(url), targetfolder + "/" + filename)
time.sleep(0.25)