User:Megan Hoogenboom/ass2

From XPUB & Lens-Based wiki

Problem 1

import feedparser

d = feedparser.parse("http://feeds.nrcnext.nl/nrcnext-blog")

for item in d.entries:
    #print item.title
    words = item.title.split()
    #print words
    test = words[::-1]
    #words = words[:-1]
    new = " ".join(words[:-1] + test)
    print new


Problem 2

import feedparser
import urllib2, urlparse, os, sys
import html5lib


def openURL (url):
    """
    returns (page, actualurl)
    sets user_agent and resolves possible redirection
    realurl maybe different than url in the case of a redirect
    """    
    request = urllib2.Request(url)
    user_agent = "Mozilla/5.0 (X11; U; Linux x86_64; fr; rv:1.9.1.5) Gecko/20091109 Ubuntu/9.10 (karmic) Firefox/3.5.5"
    request.add_header("User-Agent", user_agent)
    pagefile=urllib2.urlopen(request)
    realurl = pagefile.geturl()
    return (pagefile, realurl)


parser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder("dom"))

(f,url)= openURL ("http://feeds.nrcnext.nl/nrcnext-blog")
tree = parser.parse(f)
f.close()
tree.normalize()

print """
<html xmlns="http://www.w3.org/1999/xhtml">
  <head>
    <title>comic</title>
    <link type="text/css" rel="stylesheet" media="all" href="stylesheet.css" />
  </head>
  <body>"""

imgs1 = tree.getElementsByTagName("img"):

zip(list1, list2)
for (a, b) in zip(l1, l2):
    print a
    print b

for tag in imgs1:

    print tag.toxml()
print"""
  </body>
</html>"""