User:Eleanorg/RSS Manipulation

import lxml.etree, urllib2, codecs
 
# Precede by a quick demo of open-ing a file and parsing it?!
# in fact ... use wget + file for testing!
 
# Read from live URL
f = urllib2.urlopen("http://london.indymedia.org/articles.rss")
 
# Read from local file
#import sys
#f = codecs.open(sys.argv[1], encoding="utf-8")

# Read in the XML file
doc = lxml.etree.parse(f)
 
print doc
 
# Again brief review of file system paths (absolute vs. relative)
# XPATH = super paths for documents, not filesystems!
 
NS = {
    'media': 'http://search.yahoo.com/mrss/',
    'dc': 'http://purl.org/dc/elements/1.1/',
    'cc': 'http://creativecommons.org/ns#',
    'atom': 'http://www.w3.org/2005/Atom',
}

# grab top 3 items

print stories

# generate new text with dadadodo from these 3 items

for story in range(0,3):
    print stories = doc.xpath(".//item/description/text()")[story]