Prototyping 3 June 2013
From XPUB & Lens-Based wiki
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import cgi, urllib2, html5lib, urlparse
import cgitb; cgitb.enable()
from xml.etree import ElementTree
print "Content-type: text/html;charset=utf-8"
print
q = cgi.FieldStorage()
url = q.getvalue("url","http://pzwart3.wdka.hro.nl/wiki")
mode = q.getvalue("mode", "")
f = urllib2.urlopen(url)
ct = f.info().get("content-type")
if ct.startswith("text/html"):
t = html5lib.parse(f,treebuilder="etree",namespaceHTMLElements=False)
print """<style>
div { width: 320px; float: left; }
</style>"""
print """<div style="width: 100%; z-index: 1000000; position: absolute; background: black; color: white; padding-bottom: 10px">"""
print '<a href="?url='+url+'&mode=h1">h1</a>'
print '<a href="?url='+url+'&mode=p">p</a>'
print "</div>"
print "<hr />"
if mode == "p":
for p in t.iter("p"):
print '<div style="border: 1px dotted black;">'
print ElementTree.tostring(p)
print "</div>"
elif mode == "h1":
for h1 in t.iter("h1"):
print '<div style="border: 1px dotted black;">'
print ElementTree.tostring(h1)
print "</div>"
for h1 in t.iter("h2"):
print '<div style="border: 1px dotted black;">'
print ElementTree.tostring(h1)
print "</div>"
for h1 in t.iter("h3"):
print '<div style="border: 1px dotted black;">'
print ElementTree.tostring(h1)
print "</div>"
for h1 in t.iter("h4"):
print '<div style="border: 1px dotted black;">'
print ElementTree.tostring(h1)
print "</div>"
for h1 in t.iter("h5"):
print '<div style="border: 1px dotted black;">'
print ElementTree.tostring(h1)
print "</div>"
else:
## MANIPULATE THE TREE
for a in t.iter("a"):
href = a.get("href")
href = urlparse.urljoin(url, href)
a.attrib['href'] = "?url="+href
# $(a).attr()
# print '<a href="?url={0}">{0}</a><br>'.format(href)
## DUMP THE TREE
print ElementTree.tostring(t, method="html")