User:Eleanorg/2.1/Prototypes/transcription

From XPUB & Lens-Based wiki

Asking people to transcribe text and submit it to a central document. Will they transcribe verbatim?
(Answer: maybe.)

Transcribe1.png Transcribe2.png


Code

make db of imgs waiting to be transcribed

#!/usr/bin/python
#-*- coding:utf-8 -*-

import pymongo
from pymongo import Connection


#======== create db with img files

connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection  

for x in range(0,11):
	title = "text" + str(x)
	fileName = "img" + str(x) + ".jpg"
	
	
	sentence = {'title': title, 'file': fileName, 'status': "waiting", 'text':" " }
	collection.insert(sentence)

input form

Show an img waiting to be transcribed, with input form for transcription.

#!/usr/bin/python
#-*- coding:utf-8 -*-

 
import cgi
import cgitb; cgitb.enable()
import pymongo
from pymongo import Connection
import random

#========= get untranscribed img file

connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection 

waiting = []
for entry in myDB.collection.find({"status": "waiting"}):
    waiting.append(entry)			# add this hash to 'waiting' list
#print waiting
 
howMany = len(waiting)				# find out how many items in 'waiting' list
#print howMany
random = random.randint(0,howMany)		# pick a random number in this range
#print random
sentence = waiting[random]			# ...and choose the sentence at this index



#========== show input form

print "Content-Type: text/html"
print 
print """
<!DOCTYPE html>
<html>
  <head>
    <title></title>
    <style type="text/css">
    	.textImg { height: 300px; float: left;}
    	.textInput { height: 100px; width: 500px; margin: 0px 0px 30px 0px; }
    </style>

  </head>
  
<body>

<form action="updatedb.cgi" name="inputForm">	
	<img src=" """ + sentence['file'] + """ "  class="textImg"/> 
	<br />
	Transcription: <br />
	<textarea name="text" class="textInput" rows="3"></textarea><br />
	Your name (optional):<br />
	<input type="text" name="author" value="">
	<input style="" name="title" value=" """ + sentence['title'] + """ ">
	<input type="submit" value="Submit" />
</form>

</body>

</html>"""

proces input form

Change status of that sentence in db to 'finished', & add the transcribed text & author's name.

#!/usr/bin/python
#-*- coding:utf-8 -*-

import cgi
import cgitb; cgitb.enable()
import pymongo
from pymongo import Connection

#======== get text from input form

form = cgi.FieldStorage()			# Grabs whatever input comes from form
text = form.getvalue("text", "(blank transcription)")		
title = form.getvalue("title")
name = form.getvalue("author", "anon")


#========= put it in the db

connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection  

collection.update( {'title': title}, {"$set":{'status': 'done', 'text': text, 'author': name}} )


#========= print thank you
print "Content-Type: text/html"
print 
print """
<!DOCTYPE html>
<html>
  <head>
    <title></title>
  </head>
  
<body>thanks, you added this transcription:<br/> <br />
<em>""" + text + """</em><br /><br />
for """ + title + """
View the other <a href="showTexts.cgi">transcribed text so far</a>.


</body>

</html>
""""

display transcribed texts

CGI displays an html doc of all db entries. Using a loop to stop entries printing in the strange order they're stored in db, starting with number 8 rather than 0 - ?

#!/usr/bin/python
#-*- coding:utf-8 -*-

import cgi
import cgitb; cgitb.enable()
import pymongo
from pymongo import Connection



connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection 


#========= show all db entries
# tr appears blank for those not yet transcribed.

print "Content-Type: text/html"
print 
print """
<!DOCTYPE html>
<html>
  <head>
    <title></title>
    <style type="text/css">
    	table {margin: auto;}
    	tr {height:40px;}
    	.text {font-size: 30px; width: 450px; text-align:right;}
    	.name {font-size:9px; min-width: 200px; padding: 10px;}
    </style>

  </head>
  
<body>
<table>"""

# loop to make sure db entries are printed in sensible order, 0-10
for x in range(0,11):
	entry = myDB.collection.find_one({'title': 'text' + str(x)})
	print """<tr>
			<td class="text">""" + entry['text'] + """</td>
			<td class="name">(""" + entry['author'] + """)</td>
	      </tr>"""
	
	
print """</table>
</body>

</html>"""