User:Megan Hoogenboom/ass2
< User:Megan Hoogenboom
Revision as of 20:35, 23 September 2010 by Migratebot (talk | contribs) (Created page with "
== Problem 1 ==
<source lang="text"> import feedparser
d = feedparser.parse("http://feeds.nrcnext.nl/nrcnext-blog")
for item in d.entries:
#print item.title
...")
Problem 1
import feedparser
d = feedparser.parse("http://feeds.nrcnext.nl/nrcnext-blog")
for item in d.entries:
#print item.title
words = item.title.split()
#print words
test = words[::-1]
#words = words[:-1]
new = " ".join(words[:-1] + test)
print new
Problem 2
import feedparser
import urllib2, urlparse, os, sys
import html5lib
def openURL (url):
"""
returns (page, actualurl)
sets user_agent and resolves possible redirection
realurl maybe different than url in the case of a redirect
"""
request = urllib2.Request(url)
user_agent = "Mozilla/5.0 (X11; U; Linux x86_64; fr; rv:1.9.1.5) Gecko/20091109 Ubuntu/9.10 (karmic) Firefox/3.5.5"
request.add_header("User-Agent", user_agent)
pagefile=urllib2.urlopen(request)
realurl = pagefile.geturl()
return (pagefile, realurl)
parser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder("dom"))
(f,url)= openURL ("http://feeds.nrcnext.nl/nrcnext-blog")
tree = parser.parse(f)
f.close()
tree.normalize()
print """
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>comic</title>
<link type="text/css" rel="stylesheet" media="all" href="stylesheet.css" />
</head>
<body>"""
imgs1 = tree.getElementsByTagName("img"):
zip(list1, list2)
for (a, b) in zip(l1, l2):
print a
print b
for tag in imgs1:
print tag.toxml()
print"""
</body>
</html>"""