User:Simon/Library of Contingencies: Difference between revisions

Latest revision as of 15:48, 19 June 2020

About the Library of Contingencies

a card within the library, both obverse and reverse sides

The Library of Contingencies is a digital collection of the cards produced for the text Tasks of the Contingent Librarian. The interface consists of a website with two iframes, into which an "obverse" and a "reverse" of each card is displayed. On the obverse is a task and its description, and on the reverse are images and related snippets. It follows a similar logic and offers similar affordances as the printed cards, but with the addition of snippets makes reference to concrete examples of the practice I have been developing with the bootleg library.

The content for the site is pulled from the pziwiki, with the "obverse" listed in my namespace of the wiki (User:Simon), and the "reverse" in the discussion section (User_talk:Simon).

All the obverse cards are transcluded in the Tasks of the Contingent Librarian page, and all the reverse cards are listed in Category: Library Snippets

To pull the content from the wiki, I'm using a python script called "pulltasks.py", which looks for any pages and images in these sections of the wiki, converting the text from mediawiki format to HTML, and rewriting links to images for thumbnail and full size versions:

import argparse, os, sys
from mwclient import Site
from urllib.parse import quote as urlquote
import html5lib
from xml.etree import ElementTree as ET


THUMB_SIZE = 320
FULL_SIZE = 640

NS_CATEGORY = 14

p = argparse.ArgumentParser(description="Dump wiki files to html")
p.add_argument("--host",  metavar='', default="pzwiki.wdka.nl", help='wiki host')
p.add_argument("--path", metavar='', default="/mw-mediadesign/", help="Wiki path. Should end with /")
p.add_argument("--output", default="pages", help="Output path for pages")
p.add_argument("--category", default="Tasks of the Contingent Librarian", help="Category to query")

args = p.parse_args()
# print(args)
# site and login

def catmembers (c):
    prefix = c.get_prefix('cm', True)
    kwargs = dict(c.generate_kwargs(prefix, prop='ids|title', namespace=None,
                                           sort='sortkey', dir='asc', start=None, end=None,
                                           title=c.name, type="page"))
    return c.get_list(True)(c.site, 'categorymembers', 'cm', **kwargs)

NS_MAIN = 0
NS_TALK = 1
NS_USER = 2
NS_USER_TALK = 3

def path4page(p):
    """ REturns the local path for a page """
    ret = p.page_title
    if "/" in ret:
        ret = ret.split("/")[-1]
    ret = ret.replace(" ", "_")
    if p.namespace == NS_USER_TALK:
        ret = ret + "_rvrs"
    return ret + ".html"

def href4page(p):
    p = path4path(p)
    ret = urlquote(p)

def filenameforlink(href):
    """ todo: deal with namespaces? """
    path = href
    if "/" in href:
        path = path.split("/")[-1]
    path = path+".html"
    return path

def rewriteimagelink(a):
    href = a.attrib.get("href")
    path = href
    if "/" in href:
        path = path.split("/")[-1]
    print ("rewriteimagelink", path)

    r = site.api("query", prop="imageinfo", titles=path, iiprop="url", iiurlwidth=str(THUMB_SIZE), formatversion=2)
    iinfo = r['query']['pages'][0]['imageinfo'][0]
    thumburl = iinfo['thumburl']
    #fullsizeurl = iinfo['url']
    #filepageurl = iinfo['descriptionurl']

    r = site.api("query", prop="imageinfo", titles=path, iiprop="url", iiurlwidth=str(FULL_SIZE), formatversion=2)
    iinfo = r['query']['pages'][0]['imageinfo'][0]
    fullsizeurl = iinfo['thumburl']
    #fullsizeurl = iinfo['url']
    #filepageurl = iinfo['descriptionurl']

    a.attrib['href'] = fullsizeurl
    img = a.find("img")
    img.attrib['src'] = thumburl
    if "width" in img.attrib:
        del img.attrib["width"]
    if "height" in img.attrib:
        del img.attrib["height"]
    if "srcset" in img.attrib:
        del img.attrib["srcset"]
    print ("rewriteimagelink", thumburl, fullsizeurl)

def rewritelinks(html):
    t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
    for a in t.findall(".//*[@href]"):
        linkclass = a.attrib.get("class", "")
        href = a.attrib.get("href")
        if "external" in linkclass:
            # leave external links alone
            continue
        # print ("LINK", href)
        if linkclass == "image":
            # link to presentation version of image
            # change img.src to a thumbnail
            rewriteimagelink(a)
        elif href.startswith("/mediadesign/"):
            new_href = filenameforlink(href)
            # print ("Rewriting link {} to {}".format(href, new_href), file=sys.stderr)
            a.attrib['href'] = new_href
    html = ET.tostring(t, method="html", encoding="unicode")
    return html


site = Site(host=args.host, path=args.path)
tasks=site.Categories[args.category]
# FOR EVERY CARD
for card in catmembers(tasks):
    # FRONT
    cardfilename = path4page(card)
    cardpath = os.path.join(args.output, cardfilename)
    print ("Saving page to {}".format(cardpath))
    
    with open(cardpath, "w") as f:
        print ("""<!DOCTYPE html>
<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8">
    <title>Tasks of the Contingent Librarian</title>
<link rel="stylesheet" type="text/css" href="tasks.css">
<script src="tasks.js"></script>
</head>
<body>
""", file=f)
        htmlsrc = site.parse(page=card.name)['text']['*']
        htmlsrc = rewritelinks(htmlsrc)
        print ("""<div class="card">{}</div>""".format(htmlsrc), file=f)
        print ("""
</body>
</html>""", file=f)

    # BACK
    talk = site.pages["User_talk:"+card.page_title]
    if talk.exists:
        print ("OUTPUTTING CARD BACK {}".format(talk.page_title))

        talkfilename = path4page(talk)
        talkpath = os.path.join(args.output, talkfilename)
        with open(talkpath, "w") as f:
            print ("""<!DOCTYPE html>
<!DOCTYPE html>
<html>
<head>
    <meta charset="utf-8">
    <title>Tasks of the Contingent Librarian</title>
<link rel="stylesheet" type="text/css" href="tasks.css">
<script src="tasks.js"></script>
</head>
<body>
""", file=f)
            htmlsrc = site.parse(page=talk.name)['text']['*']
            htmlsrc = rewritelinks(htmlsrc)
            print ("""<div class="cardback">{}</div>""".format(htmlsrc), file=f)
            print ("""
</body>
</html>""", file=f)