User:Eleanorg/2.1/Prototypes/transcription
Asking people to transcribe text and submit it to a central document. Will they transcribe verbatim?
make db of imgs waiting to be transcribed
#!/usr/bin/python
#-*- coding:utf-8 -*-
import pymongo
from pymongo import Connection
#======== create db with img files
connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection
for x in range(0,11):
title = "text" + str(x)
fileName = "img" + str(x) + ".jpg"
sentence = {'title': title, 'file': fileName, 'status': "waiting", 'text':" " }
collection.insert(sentence)
input form
Show an img waiting to be transcribed, with input form for transcription.
#!/usr/bin/python
#-*- coding:utf-8 -*-
import cgi
import cgitb; cgitb.enable()
import pymongo
from pymongo import Connection
import random
#========= get untranscribed img file
connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection
waiting = []
for entry in myDB.collection.find({"status": "waiting"}):
waiting.append(entry) # add this hash to 'waiting' list
#print waiting
howMany = len(waiting) # find out how many items in 'waiting' list
#print howMany
random = random.randint(0,howMany) # pick a random number in this range
#print random
sentence = waiting[random] # ...and choose the sentence at this index
#========== show input form
print "Content-Type: text/html"
print
print """
<!DOCTYPE html>
<html>
<head>
<title></title>
<style type="text/css">
.textImg { height: 300px; float: left;}
.textInput { height: 100px; width: 500px; margin: 0px 0px 30px 0px; }
</style>
</head>
<body>
<form action="updatedb.cgi" name="inputForm">
<img src=" """ + sentence['file'] + """ " class="textImg"/>
<br />
Transcription: <br />
<textarea name="text" class="textInput" rows="3"></textarea><br />
Your name (optional):<br />
<input type="text" name="author" value="">
<input style="" name="title" value=" """ + sentence['title'] + """ ">
<input type="submit" value="Submit" />
</form>
</body>
</html>"""
proces input form
Change status of that img in db to 'finished'.
(this version just prints the text submitted)
#!/usr/bin/python
#-*- coding:utf-8 -*-
import cgi
import cgitb; cgitb.enable()
#import pymongo
#from pymongo import Connection
#======== get text from input form
form = cgi.FieldStorage() # Grabs whatever input comes from form
text = form.getvalue("text", "//this part was transcribed blank//")
title = form.getvalue("title")
#========= put it in the db
#connection = Connection()
#myDB = connection['consentTexts1']
#collection = myDB.collection
#if title:
# collection.update( {'title': title}, {"$set":{'status': 'done', 'text': text}} )
#========= print thank you
print """
<!DOCTYPE html>
<html>
<head>
<title></title>
</head>
<body>""" + title + "<br />" + text + """
</body>
</html>
"""
display transcribed texts
CGI displays an html doc of all transcribed db entries.
#!/usr/bin/python
#-*- coding:utf-8 -*-
import cgi
import cgitb; cgitb.enable()
import pymongo
from pymongo import Connection
#========= get untranscribed img file
connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection
#========= show all transcribed texts
print "Content-Type: text/html"
print
print """
<!DOCTYPE html>
<html>
<head>
<title></title>
<style type="text/css">
table {margin: auto;}
tr {height:40px;}
.text {font-size: 20px; width: 450px; }
.name {font-size:10px; min-width: 200px; background-color: #feeefe; padding: 5px;}
</style>
</head>
<body>
<table>"""
for entry in myDB.collection.find():
print """<tr><td class="text">""" + entry['text'] + """</td> <td class="name">""" + entry['author'] + "</td></tr>"
print"""</table>
</body>
</html>"""