RSS Feed

From XPUB & Lens-Based wiki
Revision as of 21:30, 18 January 2012 by Marie Wocher (talk | contribs) (Created page with "A little script to see the current categories of the RSS Feed of german Newspaper "Die Zeit" http://pzwart3.wdka.hro.nl/~mwocher/cgi-bin/Rss_Zeit6.cgi <source lang="python"> ...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

A little script to see the current categories of the RSS Feed of german Newspaper "Die Zeit"

http://pzwart3.wdka.hro.nl/~mwocher/cgi-bin/Rss_Zeit6.cgi

<source lang="python">


  1. !/usr/bin/env python
  2. -*- coding:utf-8 -*-
  1. import cgi

import lxml.etree, urllib2, codecs

print """

 <html>
  
  <head><title>Sample CGI Script</title></head>
  
  <body>

"""

  1. category_1=Politik
  2. category_2=Wirtschaft
  3. category_3=Gesellschaft
  4. category_4=Kultur
  5. category_5=Meinung
  6. category_6=Wissen
  7. category_7=Digital
  8. category_8=Studium
  9. category_9=Karriere
  10. category_10=Lebensart
  11. category_11=Reisen
  12. category_12=Auto
  13. category_13=Sport
  1. Counters

category_1=0 category_2=0 category_3=0 category_4=0 category_5=0 category_6=0 category_7=0 category_8=0 category_9=0 category_10=0 category_11=0 category_12=0 category_13=0

  1. Faktor

Faktor=10

  1. Read from live URL

f = urllib2.urlopen("http://newsfeed.zeit.de/index")

  1. Read from local file

import sys

  1. Read in the XML file

doc = lxml.etree.parse(f)


print """

Gesellschaft

"""

  1. Again brief review of file system paths (absolute vs. relative)
  2. XPATH = super paths for documents, not filesystems!

NS = {

   'media': 'http://search.yahoo.com/mrss/',
   'dc': 'http://purl.org/dc/elements/1.1/',
   'cc': 'http://creativecommons.org/ns#',
   'atom': 'http://www.w3.org/2005/Atom',

}

  1. Doing something which each item individually (maybe extracting the names

for item in doc.xpath("//item"): category = item.xpath(".//category/text()")[0]

if category == "Politik": category_1=category_1+1

if category == "Wirtschaft": category_2=category_2+1

if category == "Gesellschaft": category_3=category_3+1

if category == "Kultur": category_4=category_4+1

if category == "Meinung": category_5=category_5+1

if category == "Wissen": category_6=category_6+1

if category == "Digital": category_7=category_7+1

if category == "Studium": category_8=category_8+1

if category == "Karriere": category_9=category_9+1

if category == "Lebensart": category_10=category_10+1

if category == "Reisen": category_11=category_11+1

if category == "Auto": category_12=category_12+1

if category == "Sport": category_13=category_13+1


print """

Politik

""" print """

Wirtschaft

""" print """

Gesellschaft

""" print """

Kultur

""" print """

Meinung

""" print """

Wissen

""" print """

Digital

""" print """

Studium

""" print """

Karriere

""" print """

Lebensart

""" print """

Reisen

""" print """

Auto

""" print """

Sport

"""

print """</body></html>"""

  1. urls = doc.xpath("//enclosure/@url")