Die Zeit
Revision as of 21:00, 7 October 2012 by Michael Murtaugh (talk | contribs)
A little script to see the current categories of the RSS Feed of german Newspaper "Die Zeit"
http://pzwart3.wdka.hro.nl/~mwocher/cgi-bin/Rss_Zeit6.cgi
#!/usr/bin/env python
#-*- coding:utf-8 -*-
#import cgi
import lxml.etree, urllib2, codecs
print """
<html>
<head><title>Sample CGI Script</title></head>
<body>
"""
#category_1=Politik
#category_2=Wirtschaft
#category_3=Gesellschaft
#category_4=Kultur
#category_5=Meinung
#category_6=Wissen
#category_7=Digital
#category_8=Studium
#category_9=Karriere
#category_10=Lebensart
#category_11=Reisen
#category_12=Auto
#category_13=Sport
#Counters
category_1=0
category_2=0
category_3=0
category_4=0
category_5=0
category_6=0
category_7=0
category_8=0
category_9=0
category_10=0
category_11=0
category_12=0
category_13=0
#Faktor
Faktor=10
# Read from live URL
f = urllib2.urlopen("http://newsfeed.zeit.de/index")
# Read from local file
import sys
# Read in the XML file
doc = lxml.etree.parse(f)
print """<div style="font-size:{0};" """.format(category_1*5) + """>Gesellschaft</div>"""
# Again brief review of file system paths (absolute vs. relative)
# XPATH = super paths for documents, not filesystems!
NS = {
'media': 'http://search.yahoo.com/mrss/',
'dc': 'http://purl.org/dc/elements/1.1/',
'cc': 'http://creativecommons.org/ns#',
'atom': 'http://www.w3.org/2005/Atom',
}
# Doing something which each item individually (maybe extracting the names
for item in doc.xpath("//item"):
category = item.xpath(".//category/text()")[0]
if category == "Politik":
category_1=category_1+1
if category == "Wirtschaft":
category_2=category_2+1
if category == "Gesellschaft":
category_3=category_3+1
if category == "Kultur":
category_4=category_4+1
if category == "Meinung":
category_5=category_5+1
if category == "Wissen":
category_6=category_6+1
if category == "Digital":
category_7=category_7+1
if category == "Studium":
category_8=category_8+1
if category == "Karriere":
category_9=category_9+1
if category == "Lebensart":
category_10=category_10+1
if category == "Reisen":
category_11=category_11+1
if category == "Auto":
category_12=category_12+1
if category == "Sport":
category_13=category_13+1
print """<div style="font-size:{0};" """.format(category_1*Faktor) + """>Politik</div>"""
print """<div style="font-size:{0};" """.format(category_2*Faktor) + """>Wirtschaft</div>"""
print """<div style="font-size:{0};" """.format(category_3*Faktor) + """>Gesellschaft</div>"""
print """<div style="font-size:{0};" """.format(category_4*Faktor) + """>Kultur</div>"""
print """<div style="font-size:{0};" """.format(category_5*Faktor) + """>Meinung</div>"""
print """<div style="font-size:{0};" """.format(category_6*Faktor) + """>Wissen</div>"""
print """<div style="font-size:{0};" """.format(category_7*Faktor) + """>Digital</div>"""
print """<div style="font-size:{0};" """.format(category_8*Faktor) + """>Studium</div>"""
print """<div style="font-size:{0};" """.format(category_9*Faktor) + """>Karriere</div>"""
print """<div style="font-size:{0};" """.format(category_10*Faktor) + """>Lebensart</div>"""
print """<div style="font-size:{0};" """.format(category_11*Faktor) + """>Reisen</div>"""
print """<div style="font-size:{0};" """.format(category_12*Faktor) + """>Auto</div>"""
print """<div style="font-size:{0};" """.format(category_13*Faktor) + """>Sport</div>"""
print """</body></html>"""
# urls = doc.xpath("//enclosure/@url")