User:Albert Jongstra/duplicators/td problemset
< User:Albert Jongstra | duplicators
Revision as of 20:33, 23 September 2010 by Migratebot (talk | contribs) (Created page with "== PALINDROME HEADLINES ==
<source lang="text"> import feedparser
d = feedparser.parse("http://blogtotheoldskool.com/?feed=rss2")
for item in d.entries:
#print it...")
PALINDROME HEADLINES
import feedparser
d = feedparser.parse("http://blogtotheoldskool.com/?feed=rss2")
for item in d.entries:
#print item.title
words = item.title.split()
#print words
test = words[::-1]
#words = words[:-1]
new = " ".join(words[:-1] + test)
print new
WEB COMICS
import urllib2, urlparse, html5lib
def absolutizeURL (href, base):
if not href.lower().startswith("http://"):
return urlparse.urljoin(base, href)
return href
def openURL (url):
"""
returns (page, actualurl)
sets user_agent and resolves possible redirection
realurl maybe different than url in the case of a redirect
"""
request = urllib2.Request(url)
user_agent = "Mozilla/5.0 (X11; U; Linux x86_64; fr; rv:1.9.1.5) Gecko/20091109 Ubuntu/9.10 (karmic) Firefox/3.5.5"
request.add_header("User-Agent", user_agent)
pagefile=urllib2.urlopen(request)
realurl = pagefile.geturl()
return (pagefile, realurl)
parser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder("dom"))
(f,url1)=openURL("http://www.cocoaboost.com")
tree1 = parser.parse(f)
f.close()
tree1.normalize()
tags1 = tree1.getElementsByTagName("img")
# print tags
(f,url2)=openURL("http://www.volkskrant.nl")
tree2 = parser.parse(f)
f.close()
tree2.normalize()
tags2 = tree2.getElementsByTagName("img")
# print tags
print """<html><body>"""
import random
random.shuffle(tags1)
for (tag1, tag2) in zip (tags1, tags2):
# print the original tag with all the attributes it originally had
var=tag1.getAttribute("src")
var=absolutizeURL(var,url1)
print "<img src='" + var + "' height='200' width='200' />"
# print the original tag with all the attributes it originally had
var=tag2.getAttribute("src")
var=absolutizeURL(var,url2)
print "<img src='" + var + "' height='200' width='200' />"
print """</html></body>"""