User:Eleanorg/RSS Manipulation
From XPUB & Lens-Based wiki
import lxml.etree, urllib2, codecs
# Precede by a quick demo of open-ing a file and parsing it?!
# in fact ... use wget + file for testing!
# Read from live URL
f = urllib2.urlopen("http://london.indymedia.org/articles.rss")
# Read from local file
#import sys
#f = codecs.open(sys.argv[1], encoding="utf-8")
# Read in the XML file
doc = lxml.etree.parse(f)
print doc
# Again brief review of file system paths (absolute vs. relative)
# XPATH = super paths for documents, not filesystems!
NS = {
'media': 'http://search.yahoo.com/mrss/',
'dc': 'http://purl.org/dc/elements/1.1/',
'cc': 'http://creativecommons.org/ns#',
'atom': 'http://www.w3.org/2005/Atom',
}
# grab top 3 items
print stories
# generate new text with dadadodo from these 3 items
for story in range(0,3):
print stories = doc.xpath(".//item/description/text()")[story]