|
|
Line 14: |
Line 14: |
|
| |
|
| == Examples == | | == Examples == |
| === Die Zeit ===
| | [[Die Zeit]] |
| A little script to see the current categories of the RSS Feed of german Newspaper "Die Zeit"
| |
| | |
| http://pzwart3.wdka.hro.nl/~mwocher/cgi-bin/Rss_Zeit6.cgi
| |
| | |
| <source lang="python">
| |
| | |
| | |
| #!/usr/bin/env python
| |
| #-*- coding:utf-8 -*-
| |
| | |
| #import cgi
| |
| import lxml.etree, urllib2, codecs
| |
|
| |
| print """
| |
| <html>
| |
|
| |
| <head><title>Sample CGI Script</title></head>
| |
|
| |
| <body>
| |
| """
| |
| | |
| #category_1=Politik
| |
| #category_2=Wirtschaft
| |
| #category_3=Gesellschaft
| |
| #category_4=Kultur
| |
| #category_5=Meinung
| |
| #category_6=Wissen
| |
| #category_7=Digital
| |
| #category_8=Studium
| |
| #category_9=Karriere
| |
| #category_10=Lebensart
| |
| #category_11=Reisen
| |
| #category_12=Auto
| |
| #category_13=Sport
| |
| | |
| #Counters
| |
| category_1=0
| |
| category_2=0
| |
| category_3=0
| |
| category_4=0
| |
| category_5=0
| |
| category_6=0
| |
| category_7=0
| |
| category_8=0
| |
| category_9=0
| |
| category_10=0
| |
| category_11=0
| |
| category_12=0
| |
| category_13=0
| |
| | |
| #Faktor
| |
| Faktor=10
| |
| | |
| # Read from live URL
| |
| f = urllib2.urlopen("http://newsfeed.zeit.de/index")
| |
|
| |
| # Read from local file
| |
| import sys
| |
|
| |
| # Read in the XML file
| |
| doc = lxml.etree.parse(f)
| |
|
| |
| | |
| print """<div style="font-size:{0};" """.format(category_1*5) + """>Gesellschaft</div>"""
| |
| # Again brief review of file system paths (absolute vs. relative)
| |
| # XPATH = super paths for documents, not filesystems!
| |
|
| |
| NS = {
| |
| 'media': 'http://search.yahoo.com/mrss/',
| |
| 'dc': 'http://purl.org/dc/elements/1.1/',
| |
| 'cc': 'http://creativecommons.org/ns#',
| |
| 'atom': 'http://www.w3.org/2005/Atom',
| |
| }
| |
|
| |
| # Doing something which each item individually (maybe extracting the names
| |
| | |
| for item in doc.xpath("//item"):
| |
| category = item.xpath(".//category/text()")[0]
| |
| | |
| if category == "Politik":
| |
| category_1=category_1+1
| |
| | |
| if category == "Wirtschaft":
| |
| category_2=category_2+1
| |
| | |
| if category == "Gesellschaft":
| |
| category_3=category_3+1
| |
| | |
| if category == "Kultur":
| |
| category_4=category_4+1
| |
| | |
| if category == "Meinung":
| |
| category_5=category_5+1
| |
| | |
| if category == "Wissen":
| |
| category_6=category_6+1
| |
| | |
| if category == "Digital":
| |
| category_7=category_7+1
| |
| | |
| if category == "Studium":
| |
| category_8=category_8+1
| |
| | |
| if category == "Karriere":
| |
| category_9=category_9+1
| |
| | |
| if category == "Lebensart":
| |
| category_10=category_10+1
| |
| | |
| if category == "Reisen":
| |
| category_11=category_11+1
| |
| | |
| if category == "Auto":
| |
| category_12=category_12+1
| |
| | |
| if category == "Sport":
| |
| category_13=category_13+1
| |
| | |
| | |
| print """<div style="font-size:{0};" """.format(category_1*Faktor) + """>Politik</div>"""
| |
| print """<div style="font-size:{0};" """.format(category_2*Faktor) + """>Wirtschaft</div>"""
| |
| print """<div style="font-size:{0};" """.format(category_3*Faktor) + """>Gesellschaft</div>"""
| |
| print """<div style="font-size:{0};" """.format(category_4*Faktor) + """>Kultur</div>"""
| |
| print """<div style="font-size:{0};" """.format(category_5*Faktor) + """>Meinung</div>"""
| |
| print """<div style="font-size:{0};" """.format(category_6*Faktor) + """>Wissen</div>"""
| |
| print """<div style="font-size:{0};" """.format(category_7*Faktor) + """>Digital</div>"""
| |
| print """<div style="font-size:{0};" """.format(category_8*Faktor) + """>Studium</div>"""
| |
| print """<div style="font-size:{0};" """.format(category_9*Faktor) + """>Karriere</div>"""
| |
| print """<div style="font-size:{0};" """.format(category_10*Faktor) + """>Lebensart</div>"""
| |
| print """<div style="font-size:{0};" """.format(category_11*Faktor) + """>Reisen</div>"""
| |
| print """<div style="font-size:{0};" """.format(category_12*Faktor) + """>Auto</div>"""
| |
| print """<div style="font-size:{0};" """.format(category_13*Faktor) + """>Sport</div>"""
| |
|
| |
| print """</body></html>"""
| |
| | |
| # urls = doc.xpath("//enclosure/@url")
| |
RSS Feeds are way of publishing lists on the web, such as the latest posts to a blog, or audio files of a podcast. RSS originally meant RDF Site Summary, and was popularized by Dave Winer and the blogging communtiy as Really Simple Syndication, is now said to stand for Rich Site Summary. RSS is designed to make it easy for software, like a "pod catcher" or a feed reader to automatically collect and download information from websites that a user has "subscribed" to. Feeds can be useful to write scripts that use public websites as services to request, for instance, the latest images added to Flickr with a given tag, or to search a set of news sites for their last headlines.
Some examples of public feeds
Examples
Die Zeit