Prototyping 3 June 2013

From XPUB & Lens-Based wiki
Revision as of 15:13, 3 June 2013 by Michael Murtaugh (talk | contribs) (Created page with "<source lang="python"> #!/usr/bin/env python #-*- coding:utf-8 -*- import cgi, urllib2, html5lib, urlparse import cgitb; cgitb.enable() from xml.etree import ElementTree prin...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import cgi, urllib2, html5lib, urlparse
import cgitb; cgitb.enable()
from xml.etree import ElementTree

print "Content-type: text/html;charset=utf-8"
print
 
q = cgi.FieldStorage()
url = q.getvalue("url","http://pzwart3.wdka.hro.nl/wiki")
mode = q.getvalue("mode", "")

f = urllib2.urlopen(url)
ct = f.info().get("content-type")
if ct.startswith("text/html"):
    t = html5lib.parse(f,treebuilder="etree",namespaceHTMLElements=False)

    print """<style>
div { width: 320px; float: left; }
</style>"""

    print """<div style="width: 100%; z-index: 1000000; position: absolute; background: black; color: white; padding-bottom: 10px">"""
    print '<a href="?url='+url+'&mode=h1">h1</a>'
    print '<a href="?url='+url+'&mode=p">p</a>'
    print "</div>"
    print "<hr />"

    if mode == "p":
        for p in t.iter("p"):
            print '<div style="border: 1px dotted black;">'
            print ElementTree.tostring(p)
            print "</div>"

    elif mode == "h1":
        for h1 in t.iter("h1"):
            print '<div style="border: 1px dotted black;">'
            print ElementTree.tostring(h1)
            print "</div>"
        for h1 in t.iter("h2"):
            print '<div style="border: 1px dotted black;">'
            print ElementTree.tostring(h1)
            print "</div>"
        for h1 in t.iter("h3"):
            print '<div style="border: 1px dotted black;">'
            print ElementTree.tostring(h1)
            print "</div>"
        for h1 in t.iter("h4"):
            print '<div style="border: 1px dotted black;">'
            print ElementTree.tostring(h1)
            print "</div>"
        for h1 in t.iter("h5"):
            print '<div style="border: 1px dotted black;">'
            print ElementTree.tostring(h1)
            print "</div>"

    else:

        ## MANIPULATE THE TREE
        for a in t.iter("a"):
            href = a.get("href")
            href = urlparse.urljoin(url, href)
            a.attrib['href'] = "?url="+href
            # $(a).attr()
            # print '<a href="?url={0}">{0}</a><br>'.format(href)

        ## DUMP THE TREE
        print ElementTree.tostring(t, method="html")