PythonFlickrImageFeed

From Media Design: Networked & Lens-Based wiki
Jump to navigation Jump to search
#!/usr/bin/env python
#-*- coding:utf-8 -*-

import urllib2, urlparse, feedparser, re, os, sys
# from xml.sax.saxutils import quoteattr

imgpat = re.compile(r"<img.+?src=\"(.+?)\"", re.I)

def downloadURL (url, foldername=""):
    """
    returns (page, actualurl)
    sets user_agent and resolves possible redirection
    realurl maybe different than url in the case of a redirect
    """    
    request = urllib2.Request(url)
    user_agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.14) Gecko/20080418 Ubuntu/7.10 (gutsy) Firefox/2.0.0.14"
    request.add_header("User-Agent", user_agent)
    pagefile=urllib2.urlopen(request)
    realurl = pagefile.geturl()

    # make a filename based on the URL, inside foldername (if given)
    urlpath = urlparse.urlparse(url)[2]
    (path, filename) = os.path.split(urlpath)
    filename = os.path.join(foldername, filename)
    out = open(filename, "wb")
    bytes = 0
    while True:
        data = pagefile.read(1024)
        if not data: break
        bytes += len(data)
        out.write(data)
    out.write(data)
    pagefile.close()
    out.close()
    return bytes

def flickrSearch (word):
    FLICKR = """http://api.flickr.com/services/feeds/photos_public.gne?tags=%s&lang=en-us&format=rss_200"""
    url = FLICKR % word
    feed = feedparser.parse(url)
    ret = []
    for e in feed.entries:
        d = e['summary_detail'].value
        ret.append(imgpat.findall(d)[0])
    return ret


if __name__ == "__main__":
    results = flickrSearch(sys.argv[1])
    foldername = "images"

    if foldername and not os.path.isdir(foldername):
        os.mkdir(foldername)
    for r in results:
        print r
        downloadURL(r, foldername)