Die Zeit

From XPUB & Lens-Based wiki

A little script to see the current categories of the RSS Feed of german Newspaper "Die Zeit"

http://pzwart3.wdka.hro.nl/~mwocher/cgi-bin/Rss_Zeit6.cgi

 
#!/usr/bin/env python
#-*- coding:utf-8 -*-

#import cgi
import lxml.etree, urllib2, codecs
  
print """
  <html>
   
   <head><title>Sample CGI Script</title></head>
   
   <body>
	"""

#category_1=Politik
#category_2=Wirtschaft
#category_3=Gesellschaft
#category_4=Kultur
#category_5=Meinung
#category_6=Wissen
#category_7=Digital
#category_8=Studium
#category_9=Karriere
#category_10=Lebensart
#category_11=Reisen
#category_12=Auto
#category_13=Sport

#Counters
category_1=0
category_2=0
category_3=0
category_4=0
category_5=0
category_6=0
category_7=0
category_8=0
category_9=0
category_10=0
category_11=0
category_12=0
category_13=0

#Faktor
Faktor=10

# Read from live URL
f = urllib2.urlopen("http://newsfeed.zeit.de/index")
 
# Read from local file
import sys
 
# Read in the XML file
doc = lxml.etree.parse(f)
 

print """<div style="font-size:{0};" """.format(category_1*5) + """>Gesellschaft</div>"""	 
# Again brief review of file system paths (absolute vs. relative)
# XPATH = super paths for documents, not filesystems!
 
NS = {
    'media': 'http://search.yahoo.com/mrss/',
    'dc': 'http://purl.org/dc/elements/1.1/',
    'cc': 'http://creativecommons.org/ns#',
    'atom': 'http://www.w3.org/2005/Atom',
}
 
# Doing something which each item individually (maybe extracting the names

for item in doc.xpath("//item"):
	category = item.xpath(".//category/text()")[0]

	if category == "Politik":
		category_1=category_1+1

	if category == "Wirtschaft":
		category_2=category_2+1

	if category == "Gesellschaft":
		category_3=category_3+1

	if category == "Kultur":
		category_4=category_4+1

	if category == "Meinung":
		category_5=category_5+1

	if category == "Wissen":
		category_6=category_6+1

	if category == "Digital":
		category_7=category_7+1

	if category == "Studium":
		category_8=category_8+1

	if category == "Karriere":
		category_9=category_9+1

	if category == "Lebensart":
		category_10=category_10+1

	if category == "Reisen":
		category_11=category_11+1

	if category == "Auto":
		category_12=category_12+1

	if category == "Sport":
		category_13=category_13+1


print """<div style="font-size:{0};" """.format(category_1*Faktor) + """>Politik</div>"""
print """<div style="font-size:{0};" """.format(category_2*Faktor) + """>Wirtschaft</div>"""
print """<div style="font-size:{0};" """.format(category_3*Faktor) + """>Gesellschaft</div>"""	
print """<div style="font-size:{0};" """.format(category_4*Faktor) + """>Kultur</div>"""	
print """<div style="font-size:{0};" """.format(category_5*Faktor) + """>Meinung</div>"""		
print """<div style="font-size:{0};" """.format(category_6*Faktor) + """>Wissen</div>"""		
print """<div style="font-size:{0};" """.format(category_7*Faktor) + """>Digital</div>"""		
print """<div style="font-size:{0};" """.format(category_8*Faktor) + """>Studium</div>"""		
print """<div style="font-size:{0};" """.format(category_9*Faktor) + """>Karriere</div>"""		
print """<div style="font-size:{0};" """.format(category_10*Faktor) + """>Lebensart</div>"""		
print """<div style="font-size:{0};" """.format(category_11*Faktor) + """>Reisen</div>"""			
print """<div style="font-size:{0};" """.format(category_12*Faktor) + """>Auto</div>"""			
print """<div style="font-size:{0};" """.format(category_13*Faktor) + """>Sport</div>"""				
	
print """</body></html>"""

# urls = doc.xpath("//enclosure/@url")