User:Simon/Library of Contingencies: Difference between revisions
No edit summary |
|||
Line 7: | Line 7: | ||
All the obverse cards are transcluded in the [[User:Simon/Tasks_of_the_Contingent_Librarian_cards|Tasks of the Contingent Librarian]] page, and all the reverse cards are listed in [[:Category: Library Snippets]] | All the obverse cards are transcluded in the [[User:Simon/Tasks_of_the_Contingent_Librarian_cards|Tasks of the Contingent Librarian]] page, and all the reverse cards are listed in [[:Category: Library Snippets]] | ||
To pull the content from the wiki, I'm using a python script called "pulltasks.py": | |||
<syntaxhighlight lang="python" line='line'> | |||
import argparse, os, sys | |||
from mwclient import Site | |||
from urllib.parse import quote as urlquote | |||
import html5lib | |||
from xml.etree import ElementTree as ET | |||
THUMB_SIZE = 320 | |||
FULL_SIZE = 640 | |||
NS_CATEGORY = 14 | |||
p = argparse.ArgumentParser(description="Dump wiki files to html") | |||
p.add_argument("--host", metavar='', default="pzwiki.wdka.nl", help='wiki host') | |||
p.add_argument("--path", metavar='', default="/mw-mediadesign/", help="Wiki path. Should end with /") | |||
p.add_argument("--output", default="pages", help="Output path for pages") | |||
p.add_argument("--category", default="Tasks of the Contingent Librarian", help="Category to query") | |||
args = p.parse_args() | |||
# print(args) | |||
# site and login | |||
def catmembers (c): | |||
prefix = c.get_prefix('cm', True) | |||
kwargs = dict(c.generate_kwargs(prefix, prop='ids|title', namespace=None, | |||
sort='sortkey', dir='asc', start=None, end=None, | |||
title=c.name, type="page")) | |||
return c.get_list(True)(c.site, 'categorymembers', 'cm', **kwargs) | |||
NS_MAIN = 0 | |||
NS_TALK = 1 | |||
NS_USER = 2 | |||
NS_USER_TALK = 3 | |||
def path4page(p): | |||
""" REturns the local path for a page """ | |||
ret = p.page_title | |||
if "/" in ret: | |||
ret = ret.split("/")[-1] | |||
ret = ret.replace(" ", "_") | |||
if p.namespace == NS_USER_TALK: | |||
ret = ret + "_rvrs" | |||
return ret + ".html" | |||
def href4page(p): | |||
p = path4path(p) | |||
ret = urlquote(p) | |||
def filenameforlink(href): | |||
""" todo: deal with namespaces? """ | |||
path = href | |||
if "/" in href: | |||
path = path.split("/")[-1] | |||
path = path+".html" | |||
return path | |||
def rewriteimagelink(a): | |||
href = a.attrib.get("href") | |||
path = href | |||
if "/" in href: | |||
path = path.split("/")[-1] | |||
print ("rewriteimagelink", path) | |||
r = site.api("query", prop="imageinfo", titles=path, iiprop="url", iiurlwidth=str(THUMB_SIZE), formatversion=2) | |||
iinfo = r['query']['pages'][0]['imageinfo'][0] | |||
thumburl = iinfo['thumburl'] | |||
#fullsizeurl = iinfo['url'] | |||
#filepageurl = iinfo['descriptionurl'] | |||
r = site.api("query", prop="imageinfo", titles=path, iiprop="url", iiurlwidth=str(FULL_SIZE), formatversion=2) | |||
iinfo = r['query']['pages'][0]['imageinfo'][0] | |||
fullsizeurl = iinfo['thumburl'] | |||
#fullsizeurl = iinfo['url'] | |||
#filepageurl = iinfo['descriptionurl'] | |||
a.attrib['href'] = fullsizeurl | |||
img = a.find("img") | |||
img.attrib['src'] = thumburl | |||
if "width" in img.attrib: | |||
del img.attrib["width"] | |||
if "height" in img.attrib: | |||
del img.attrib["height"] | |||
if "srcset" in img.attrib: | |||
del img.attrib["srcset"] | |||
print ("rewriteimagelink", thumburl, fullsizeurl) | |||
def rewritelinks(html): | |||
t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False) | |||
for a in t.findall(".//*[@href]"): | |||
linkclass = a.attrib.get("class", "") | |||
href = a.attrib.get("href") | |||
if "external" in linkclass: | |||
# leave external links alone | |||
continue | |||
# print ("LINK", href) | |||
if linkclass == "image": | |||
# link to presentation version of image | |||
# change img.src to a thumbnail | |||
rewriteimagelink(a) | |||
elif href.startswith("/mediadesign/"): | |||
new_href = filenameforlink(href) | |||
# print ("Rewriting link {} to {}".format(href, new_href), file=sys.stderr) | |||
a.attrib['href'] = new_href | |||
html = ET.tostring(t, method="html", encoding="unicode") | |||
return html | |||
site = Site(host=args.host, path=args.path) | |||
tasks=site.Categories[args.category] | |||
# FOR EVERY CARD | |||
for card in catmembers(tasks): | |||
# FRONT | |||
cardfilename = path4page(card) | |||
cardpath = os.path.join(args.output, cardfilename) | |||
print ("Saving page to {}".format(cardpath)) | |||
with open(cardpath, "w") as f: | |||
print ("""<!DOCTYPE html> | |||
<!DOCTYPE html> | |||
<html> | |||
<head> | |||
<meta charset="utf-8"> | |||
<title>Tasks of the Contingent Librarian</title> | |||
<link rel="stylesheet" type="text/css" href="tasks.css"> | |||
<script src="tasks.js"></script> | |||
</head> | |||
<body> | |||
""", file=f) | |||
htmlsrc = site.parse(page=card.name)['text']['*'] | |||
htmlsrc = rewritelinks(htmlsrc) | |||
print ("""<div class="card">{}</div>""".format(htmlsrc), file=f) | |||
print (""" | |||
</body> | |||
</html>""", file=f) | |||
# BACK | |||
talk = site.pages["User_talk:"+card.page_title] | |||
if talk.exists: | |||
print ("OUTPUTTING CARD BACK {}".format(talk.page_title)) | |||
talkfilename = path4page(talk) | |||
talkpath = os.path.join(args.output, talkfilename) | |||
with open(talkpath, "w") as f: | |||
print ("""<!DOCTYPE html> | |||
<!DOCTYPE html> | |||
<html> | |||
<head> | |||
<meta charset="utf-8"> | |||
<title>Tasks of the Contingent Librarian</title> | |||
<link rel="stylesheet" type="text/css" href="tasks.css"> | |||
<script src="tasks.js"></script> | |||
</head> | |||
<body> | |||
""", file=f) | |||
htmlsrc = site.parse(page=talk.name)['text']['*'] | |||
htmlsrc = rewritelinks(htmlsrc) | |||
print ("""<div class="cardback">{}</div>""".format(htmlsrc), file=f) | |||
print (""" | |||
</body> | |||
</html>""", file=f) | |||
</syntaxhighlight> |
Revision as of 15:46, 19 June 2020
About the Library of Contingencies
The Library of Contingencies is a digital collection of the cards produced for the text Tasks of the Contingent Librarian. The interface consists of a website with two iframes, into which an "obverse" and a "reverse" of each card is displayed. On the obverse is a task and its description, and on the reverse are images and related snippets. It follows a similar logic and offers similar affordances as the printed cards, but with the addition of snippets makes reference to concrete examples of the practice I have been developing with the bootleg library.
The content for the site is pulled from the pziwiki, with the "obverse" listed in my namespace of the wiki (User:Simon), and the "reverse" in the discussion section (User_talk:Simon).
All the obverse cards are transcluded in the Tasks of the Contingent Librarian page, and all the reverse cards are listed in Category: Library Snippets
To pull the content from the wiki, I'm using a python script called "pulltasks.py":
import argparse, os, sys
from mwclient import Site
from urllib.parse import quote as urlquote
import html5lib
from xml.etree import ElementTree as ET
THUMB_SIZE = 320
FULL_SIZE = 640
NS_CATEGORY = 14
p = argparse.ArgumentParser(description="Dump wiki files to html")
p.add_argument("--host", metavar='', default="pzwiki.wdka.nl", help='wiki host')
p.add_argument("--path", metavar='', default="/mw-mediadesign/", help="Wiki path. Should end with /")
p.add_argument("--output", default="pages", help="Output path for pages")
p.add_argument("--category", default="Tasks of the Contingent Librarian", help="Category to query")
args = p.parse_args()
# print(args)
# site and login
def catmembers (c):
prefix = c.get_prefix('cm', True)
kwargs = dict(c.generate_kwargs(prefix, prop='ids|title', namespace=None,
sort='sortkey', dir='asc', start=None, end=None,
title=c.name, type="page"))
return c.get_list(True)(c.site, 'categorymembers', 'cm', **kwargs)
NS_MAIN = 0
NS_TALK = 1
NS_USER = 2
NS_USER_TALK = 3
def path4page(p):
""" REturns the local path for a page """
ret = p.page_title
if "/" in ret:
ret = ret.split("/")[-1]
ret = ret.replace(" ", "_")
if p.namespace == NS_USER_TALK:
ret = ret + "_rvrs"
return ret + ".html"
def href4page(p):
p = path4path(p)
ret = urlquote(p)
def filenameforlink(href):
""" todo: deal with namespaces? """
path = href
if "/" in href:
path = path.split("/")[-1]
path = path+".html"
return path
def rewriteimagelink(a):
href = a.attrib.get("href")
path = href
if "/" in href:
path = path.split("/")[-1]
print ("rewriteimagelink", path)
r = site.api("query", prop="imageinfo", titles=path, iiprop="url", iiurlwidth=str(THUMB_SIZE), formatversion=2)
iinfo = r['query']['pages'][0]['imageinfo'][0]
thumburl = iinfo['thumburl']
#fullsizeurl = iinfo['url']
#filepageurl = iinfo['descriptionurl']
r = site.api("query", prop="imageinfo", titles=path, iiprop="url", iiurlwidth=str(FULL_SIZE), formatversion=2)
iinfo = r['query']['pages'][0]['imageinfo'][0]
fullsizeurl = iinfo['thumburl']
#fullsizeurl = iinfo['url']
#filepageurl = iinfo['descriptionurl']
a.attrib['href'] = fullsizeurl
img = a.find("img")
img.attrib['src'] = thumburl
if "width" in img.attrib:
del img.attrib["width"]
if "height" in img.attrib:
del img.attrib["height"]
if "srcset" in img.attrib:
del img.attrib["srcset"]
print ("rewriteimagelink", thumburl, fullsizeurl)
def rewritelinks(html):
t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
for a in t.findall(".//*[@href]"):
linkclass = a.attrib.get("class", "")
href = a.attrib.get("href")
if "external" in linkclass:
# leave external links alone
continue
# print ("LINK", href)
if linkclass == "image":
# link to presentation version of image
# change img.src to a thumbnail
rewriteimagelink(a)
elif href.startswith("/mediadesign/"):
new_href = filenameforlink(href)
# print ("Rewriting link {} to {}".format(href, new_href), file=sys.stderr)
a.attrib['href'] = new_href
html = ET.tostring(t, method="html", encoding="unicode")
return html
site = Site(host=args.host, path=args.path)
tasks=site.Categories[args.category]
# FOR EVERY CARD
for card in catmembers(tasks):
# FRONT
cardfilename = path4page(card)
cardpath = os.path.join(args.output, cardfilename)
print ("Saving page to {}".format(cardpath))
with open(cardpath, "w") as f:
print ("""<!DOCTYPE html>
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Tasks of the Contingent Librarian</title>
<link rel="stylesheet" type="text/css" href="tasks.css">
<script src="tasks.js"></script>
</head>
<body>
""", file=f)
htmlsrc = site.parse(page=card.name)['text']['*']
htmlsrc = rewritelinks(htmlsrc)
print ("""<div class="card">{}</div>""".format(htmlsrc), file=f)
print ("""
</body>
</html>""", file=f)
# BACK
talk = site.pages["User_talk:"+card.page_title]
if talk.exists:
print ("OUTPUTTING CARD BACK {}".format(talk.page_title))
talkfilename = path4page(talk)
talkpath = os.path.join(args.output, talkfilename)
with open(talkpath, "w") as f:
print ("""<!DOCTYPE html>
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Tasks of the Contingent Librarian</title>
<link rel="stylesheet" type="text/css" href="tasks.css">
<script src="tasks.js"></script>
</head>
<body>
""", file=f)
htmlsrc = site.parse(page=talk.name)['text']['*']
htmlsrc = rewritelinks(htmlsrc)
print ("""<div class="cardback">{}</div>""".format(htmlsrc), file=f)
print ("""
</body>
</html>""", file=f)