Remixing an RSS feed with Python

From XPUB & Lens-Based wiki
Revision as of 15:20, 27 October 2011 by Michael Murtaugh (talk | contribs) (Created page with "<source lang="python"> import lxml.etree, urllib2, codecs # Precede by a quick demo of open-ing a file and parsing it?! # in fact ... use wget + file for testing! # Read from l...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
import lxml.etree, urllib2, codecs

# Precede by a quick demo of open-ing a file and parsing it?!
# in fact ... use wget + file for testing!

# Read from live URL
# f = urllib2.urlopen("http://www.openclipart.org/media/feed/rss/woman")

# Read from local file
import sys
f = codecs.open(sys.argv[1], encoding="utf-8")

# Read in the XML file
doc = lxml.etree.parse(f)

print doc

# Again brief review of file system paths (absolute vs. relative)
# XPATH = super paths for documents, not filesystems!

NS = {
    'media': 'http://search.yahoo.com/mrss/',
    'dc': 'http://purl.org/dc/elements/1.1/',
    'cc': 'http://creativecommons.org/ns#',
    'atom': 'http://www.w3.org/2005/Atom',
}

# Doing something which each item individually (maybe extracting the names
print len(doc.xpath("//item")), "items"
for item in doc.xpath("//item"):
    svg = item.xpath(".//enclosure/@url")[0]
    thumbnail_url = item.xpath(".//media:thumbnail/@url", namespaces=NS)[0]
    creator = item.xpath(".//dc:creator/text()", namespaces=NS)[0]
    title = item.xpath(".//title/text()")[0]
    link = item.xpath(".//link/text()")[0]
    print """<div>
<a href="{1}"><img src="{2}" />{0}</a>
</div>""".format(title, link, thumbnail_url)

# urls = doc.xpath("//enclosure/@url")