PythonFlickrImageFeed
From XPUB & Lens-Based wiki
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import urllib2, urlparse, feedparser, re, os, sys
# from xml.sax.saxutils import quoteattr
imgpat = re.compile(r"<img.+?src=\"(.+?)\"", re.I)
def downloadURL (url, foldername=""):
"""
returns (page, actualurl)
sets user_agent and resolves possible redirection
realurl maybe different than url in the case of a redirect
"""
request = urllib2.Request(url)
user_agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.14) Gecko/20080418 Ubuntu/7.10 (gutsy) Firefox/2.0.0.14"
request.add_header("User-Agent", user_agent)
pagefile=urllib2.urlopen(request)
realurl = pagefile.geturl()
# make a filename based on the URL, inside foldername (if given)
urlpath = urlparse.urlparse(url)[2]
(path, filename) = os.path.split(urlpath)
filename = os.path.join(foldername, filename)
out = open(filename, "wb")
bytes = 0
while True:
data = pagefile.read(1024)
if not data: break
bytes += len(data)
out.write(data)
out.write(data)
pagefile.close()
out.close()
return bytes
def flickrSearch (word):
FLICKR = """http://api.flickr.com/services/feeds/photos_public.gne?tags=%s&lang=en-us&format=rss_200"""
url = FLICKR % word
feed = feedparser.parse(url)
ret = []
for e in feed.entries:
d = e['summary_detail'].value
ret.append(imgpat.findall(d)[0])
return ret
if __name__ == "__main__":
results = flickrSearch(sys.argv[1])
foldername = "images"
if foldername and not os.path.isdir(foldername):
os.mkdir(foldername)
for r in results:
print r
downloadURL(r, foldername)