User:Simon/Library of Contingencies: Difference between revisions
No edit summary |
No edit summary |
||
(2 intermediate revisions by the same user not shown) | |||
Line 6: | Line 6: | ||
The content for the site is pulled from the pziwiki, with the "obverse" listed in my namespace of the wiki (User:Simon), and the "reverse" in the discussion section (User_talk:Simon). | The content for the site is pulled from the pziwiki, with the "obverse" listed in my namespace of the wiki (User:Simon), and the "reverse" in the discussion section (User_talk:Simon). | ||
All the obverse cards are transcluded in the Tasks of the Contingent Librarian page, and all the reverse cards are listed in [Category: Library Snippets] | All the obverse cards are transcluded in the [[User:Simon/Tasks_of_the_Contingent_Librarian_cards|Tasks of the Contingent Librarian]] page, and all the reverse cards are listed in [[:Category: Library Snippets]] | ||
To pull the content from the wiki, I'm using a python script called "pulltasks.py", which looks for any pages and images in these sections of the wiki, converting the text from mediawiki format to HTML, and rewriting links to images for thumbnail and full size versions: | |||
<syntaxhighlight lang="python" line='line'> | |||
import argparse, os, sys | |||
from mwclient import Site | |||
from urllib.parse import quote as urlquote | |||
import html5lib | |||
from xml.etree import ElementTree as ET | |||
THUMB_SIZE = 320 | |||
FULL_SIZE = 640 | |||
NS_CATEGORY = 14 | |||
p = argparse.ArgumentParser(description="Dump wiki files to html") | |||
p.add_argument("--host", metavar='', default="pzwiki.wdka.nl", help='wiki host') | |||
p.add_argument("--path", metavar='', default="/mw-mediadesign/", help="Wiki path. Should end with /") | |||
p.add_argument("--output", default="pages", help="Output path for pages") | |||
p.add_argument("--category", default="Tasks of the Contingent Librarian", help="Category to query") | |||
args = p.parse_args() | |||
# print(args) | |||
# site and login | |||
def catmembers (c): | |||
prefix = c.get_prefix('cm', True) | |||
kwargs = dict(c.generate_kwargs(prefix, prop='ids|title', namespace=None, | |||
sort='sortkey', dir='asc', start=None, end=None, | |||
title=c.name, type="page")) | |||
return c.get_list(True)(c.site, 'categorymembers', 'cm', **kwargs) | |||
NS_MAIN = 0 | |||
NS_TALK = 1 | |||
NS_USER = 2 | |||
NS_USER_TALK = 3 | |||
def path4page(p): | |||
""" REturns the local path for a page """ | |||
ret = p.page_title | |||
if "/" in ret: | |||
ret = ret.split("/")[-1] | |||
ret = ret.replace(" ", "_") | |||
if p.namespace == NS_USER_TALK: | |||
ret = ret + "_rvrs" | |||
return ret + ".html" | |||
def href4page(p): | |||
p = path4path(p) | |||
ret = urlquote(p) | |||
def filenameforlink(href): | |||
""" todo: deal with namespaces? """ | |||
path = href | |||
if "/" in href: | |||
path = path.split("/")[-1] | |||
path = path+".html" | |||
return path | |||
def rewriteimagelink(a): | |||
href = a.attrib.get("href") | |||
path = href | |||
if "/" in href: | |||
path = path.split("/")[-1] | |||
print ("rewriteimagelink", path) | |||
r = site.api("query", prop="imageinfo", titles=path, iiprop="url", iiurlwidth=str(THUMB_SIZE), formatversion=2) | |||
iinfo = r['query']['pages'][0]['imageinfo'][0] | |||
thumburl = iinfo['thumburl'] | |||
#fullsizeurl = iinfo['url'] | |||
#filepageurl = iinfo['descriptionurl'] | |||
r = site.api("query", prop="imageinfo", titles=path, iiprop="url", iiurlwidth=str(FULL_SIZE), formatversion=2) | |||
iinfo = r['query']['pages'][0]['imageinfo'][0] | |||
fullsizeurl = iinfo['thumburl'] | |||
#fullsizeurl = iinfo['url'] | |||
#filepageurl = iinfo['descriptionurl'] | |||
a.attrib['href'] = fullsizeurl | |||
img = a.find("img") | |||
img.attrib['src'] = thumburl | |||
if "width" in img.attrib: | |||
del img.attrib["width"] | |||
if "height" in img.attrib: | |||
del img.attrib["height"] | |||
if "srcset" in img.attrib: | |||
del img.attrib["srcset"] | |||
print ("rewriteimagelink", thumburl, fullsizeurl) | |||
def rewritelinks(html): | |||
t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False) | |||
for a in t.findall(".//*[@href]"): | |||
linkclass = a.attrib.get("class", "") | |||
href = a.attrib.get("href") | |||
if "external" in linkclass: | |||
# leave external links alone | |||
continue | |||
# print ("LINK", href) | |||
if linkclass == "image": | |||
# link to presentation version of image | |||
# change img.src to a thumbnail | |||
rewriteimagelink(a) | |||
elif href.startswith("/mediadesign/"): | |||
new_href = filenameforlink(href) | |||
# print ("Rewriting link {} to {}".format(href, new_href), file=sys.stderr) | |||
a.attrib['href'] = new_href | |||
html = ET.tostring(t, method="html", encoding="unicode") | |||
return html | |||
site = Site(host=args.host, path=args.path) | |||
tasks=site.Categories[args.category] | |||
# FOR EVERY CARD | |||
for card in catmembers(tasks): | |||
# FRONT | |||
cardfilename = path4page(card) | |||
cardpath = os.path.join(args.output, cardfilename) | |||
print ("Saving page to {}".format(cardpath)) | |||
with open(cardpath, "w") as f: | |||
print ("""<!DOCTYPE html> | |||
<!DOCTYPE html> | |||
<html> | |||
<head> | |||
<meta charset="utf-8"> | |||
<title>Tasks of the Contingent Librarian</title> | |||
<link rel="stylesheet" type="text/css" href="tasks.css"> | |||
<script src="tasks.js"></script> | |||
</head> | |||
<body> | |||
""", file=f) | |||
htmlsrc = site.parse(page=card.name)['text']['*'] | |||
htmlsrc = rewritelinks(htmlsrc) | |||
print ("""<div class="card">{}</div>""".format(htmlsrc), file=f) | |||
print (""" | |||
</body> | |||
</html>""", file=f) | |||
# BACK | |||
talk = site.pages["User_talk:"+card.page_title] | |||
if talk.exists: | |||
print ("OUTPUTTING CARD BACK {}".format(talk.page_title)) | |||
talkfilename = path4page(talk) | |||
talkpath = os.path.join(args.output, talkfilename) | |||
with open(talkpath, "w") as f: | |||
print ("""<!DOCTYPE html> | |||
<!DOCTYPE html> | |||
<html> | |||
<head> | |||
<meta charset="utf-8"> | |||
<title>Tasks of the Contingent Librarian</title> | |||
<link rel="stylesheet" type="text/css" href="tasks.css"> | |||
<script src="tasks.js"></script> | |||
</head> | |||
<body> | |||
""", file=f) | |||
htmlsrc = site.parse(page=talk.name)['text']['*'] | |||
htmlsrc = rewritelinks(htmlsrc) | |||
print ("""<div class="cardback">{}</div>""".format(htmlsrc), file=f) | |||
print (""" | |||
</body> | |||
</html>""", file=f) | |||
</syntaxhighlight> |
Latest revision as of 15:48, 19 June 2020
About the Library of Contingencies
The Library of Contingencies is a digital collection of the cards produced for the text Tasks of the Contingent Librarian. The interface consists of a website with two iframes, into which an "obverse" and a "reverse" of each card is displayed. On the obverse is a task and its description, and on the reverse are images and related snippets. It follows a similar logic and offers similar affordances as the printed cards, but with the addition of snippets makes reference to concrete examples of the practice I have been developing with the bootleg library.
The content for the site is pulled from the pziwiki, with the "obverse" listed in my namespace of the wiki (User:Simon), and the "reverse" in the discussion section (User_talk:Simon).
All the obverse cards are transcluded in the Tasks of the Contingent Librarian page, and all the reverse cards are listed in Category: Library Snippets
To pull the content from the wiki, I'm using a python script called "pulltasks.py", which looks for any pages and images in these sections of the wiki, converting the text from mediawiki format to HTML, and rewriting links to images for thumbnail and full size versions:
import argparse, os, sys
from mwclient import Site
from urllib.parse import quote as urlquote
import html5lib
from xml.etree import ElementTree as ET
THUMB_SIZE = 320
FULL_SIZE = 640
NS_CATEGORY = 14
p = argparse.ArgumentParser(description="Dump wiki files to html")
p.add_argument("--host", metavar='', default="pzwiki.wdka.nl", help='wiki host')
p.add_argument("--path", metavar='', default="/mw-mediadesign/", help="Wiki path. Should end with /")
p.add_argument("--output", default="pages", help="Output path for pages")
p.add_argument("--category", default="Tasks of the Contingent Librarian", help="Category to query")
args = p.parse_args()
# print(args)
# site and login
def catmembers (c):
prefix = c.get_prefix('cm', True)
kwargs = dict(c.generate_kwargs(prefix, prop='ids|title', namespace=None,
sort='sortkey', dir='asc', start=None, end=None,
title=c.name, type="page"))
return c.get_list(True)(c.site, 'categorymembers', 'cm', **kwargs)
NS_MAIN = 0
NS_TALK = 1
NS_USER = 2
NS_USER_TALK = 3
def path4page(p):
""" REturns the local path for a page """
ret = p.page_title
if "/" in ret:
ret = ret.split("/")[-1]
ret = ret.replace(" ", "_")
if p.namespace == NS_USER_TALK:
ret = ret + "_rvrs"
return ret + ".html"
def href4page(p):
p = path4path(p)
ret = urlquote(p)
def filenameforlink(href):
""" todo: deal with namespaces? """
path = href
if "/" in href:
path = path.split("/")[-1]
path = path+".html"
return path
def rewriteimagelink(a):
href = a.attrib.get("href")
path = href
if "/" in href:
path = path.split("/")[-1]
print ("rewriteimagelink", path)
r = site.api("query", prop="imageinfo", titles=path, iiprop="url", iiurlwidth=str(THUMB_SIZE), formatversion=2)
iinfo = r['query']['pages'][0]['imageinfo'][0]
thumburl = iinfo['thumburl']
#fullsizeurl = iinfo['url']
#filepageurl = iinfo['descriptionurl']
r = site.api("query", prop="imageinfo", titles=path, iiprop="url", iiurlwidth=str(FULL_SIZE), formatversion=2)
iinfo = r['query']['pages'][0]['imageinfo'][0]
fullsizeurl = iinfo['thumburl']
#fullsizeurl = iinfo['url']
#filepageurl = iinfo['descriptionurl']
a.attrib['href'] = fullsizeurl
img = a.find("img")
img.attrib['src'] = thumburl
if "width" in img.attrib:
del img.attrib["width"]
if "height" in img.attrib:
del img.attrib["height"]
if "srcset" in img.attrib:
del img.attrib["srcset"]
print ("rewriteimagelink", thumburl, fullsizeurl)
def rewritelinks(html):
t = html5lib.parseFragment(html, treebuilder = "etree", namespaceHTMLElements = False)
for a in t.findall(".//*[@href]"):
linkclass = a.attrib.get("class", "")
href = a.attrib.get("href")
if "external" in linkclass:
# leave external links alone
continue
# print ("LINK", href)
if linkclass == "image":
# link to presentation version of image
# change img.src to a thumbnail
rewriteimagelink(a)
elif href.startswith("/mediadesign/"):
new_href = filenameforlink(href)
# print ("Rewriting link {} to {}".format(href, new_href), file=sys.stderr)
a.attrib['href'] = new_href
html = ET.tostring(t, method="html", encoding="unicode")
return html
site = Site(host=args.host, path=args.path)
tasks=site.Categories[args.category]
# FOR EVERY CARD
for card in catmembers(tasks):
# FRONT
cardfilename = path4page(card)
cardpath = os.path.join(args.output, cardfilename)
print ("Saving page to {}".format(cardpath))
with open(cardpath, "w") as f:
print ("""<!DOCTYPE html>
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Tasks of the Contingent Librarian</title>
<link rel="stylesheet" type="text/css" href="tasks.css">
<script src="tasks.js"></script>
</head>
<body>
""", file=f)
htmlsrc = site.parse(page=card.name)['text']['*']
htmlsrc = rewritelinks(htmlsrc)
print ("""<div class="card">{}</div>""".format(htmlsrc), file=f)
print ("""
</body>
</html>""", file=f)
# BACK
talk = site.pages["User_talk:"+card.page_title]
if talk.exists:
print ("OUTPUTTING CARD BACK {}".format(talk.page_title))
talkfilename = path4page(talk)
talkpath = os.path.join(args.output, talkfilename)
with open(talkpath, "w") as f:
print ("""<!DOCTYPE html>
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Tasks of the Contingent Librarian</title>
<link rel="stylesheet" type="text/css" href="tasks.css">
<script src="tasks.js"></script>
</head>
<body>
""", file=f)
htmlsrc = site.parse(page=talk.name)['text']['*']
htmlsrc = rewritelinks(htmlsrc)
print ("""<div class="cardback">{}</div>""".format(htmlsrc), file=f)
print ("""
</body>
</html>""", file=f)