User:Eleanorg/2.1/Prototypes/transcription: Difference between revisions
No edit summary |
|||
(13 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
Asking people to transcribe text and submit it to a central document. Will they transcribe verbatim? | Asking people to transcribe text and submit it to a central document. Will they transcribe verbatim? | ||
<br />(Answer: maybe.) | |||
[[File:transcribe1.png]] | |||
[[File:transcribe2.png]] | |||
==Code== | |||
===make db of imgs waiting to be transcribed=== | ===make db of imgs waiting to be transcribed=== | ||
Line 22: | Line 29: | ||
sentence = {'title': title, 'file': fileName, 'status': "waiting", 'text':" " } | sentence = {'title': title, 'file': fileName, 'status': "waiting", 'text':" " } | ||
collection.insert(sentence) | collection.insert(sentence) | ||
Line 30: | Line 36: | ||
===input form=== | ===input form=== | ||
Show an img waiting to be transcribed, with input form for transcription. | Show an img waiting to be transcribed, with input form for transcription. | ||
<source lang=" | <source lang="python"> | ||
#!/usr/bin/python | |||
#-*- coding:utf-8 -*- | |||
import cgi | |||
import cgitb; cgitb.enable() | |||
import pymongo | |||
from pymongo import Connection | |||
import random | |||
#========= get untranscribed img file | |||
connection = Connection() | |||
myDB = connection['consentTexts1'] | |||
collection = myDB.collection | |||
waiting = [] | |||
for entry in myDB.collection.find({"status": "waiting"}): | |||
waiting.append(entry) # add this hash to 'waiting' list | |||
#print waiting | |||
howMany = len(waiting) # find out how many items in 'waiting' list | |||
#print howMany | |||
random = random.randint(0,howMany) # pick a random number in this range | |||
#print random | |||
sentence = waiting[random] # ...and choose the sentence at this index | |||
#========== show input form | |||
print "Content-Type: text/html" | |||
print | |||
print """ | |||
<!DOCTYPE html> | |||
<html> | |||
<head> | |||
<title></title> | |||
<style type="text/css"> | |||
.textImg { height: 300px; float: left;} | |||
.textInput { height: 100px; width: 500px; margin: 0px 0px 30px 0px; } | |||
</style> | |||
</head> | |||
<body> | |||
<form action="updatedb.cgi" name="inputForm"> | |||
<img src=" """ + sentence['file'] + """ " class="textImg"/> | |||
<br /> | |||
Transcription: <br /> | |||
<textarea name="text" class="textInput" rows="3"></textarea><br /> | |||
Your name (optional):<br /> | |||
<input type="text" name="author" value=""> | |||
<input style="" name="title" value=" """ + sentence['title'] + """ "> | |||
<input type="submit" value="Submit" /> | |||
</form> | |||
</body> | |||
</html>""" | |||
</source> | </source> | ||
===proces input form=== | ===proces input form=== | ||
Change status of that | Change status of that sentence in db to 'finished', & add the transcribed text & author's name. | ||
<source lang="python"> | |||
#!/usr/bin/python | |||
#-*- coding:utf-8 -*- | |||
import cgi | |||
import cgitb; cgitb.enable() | |||
import pymongo | |||
from pymongo import Connection | |||
#======== get text from input form | |||
form = cgi.FieldStorage() # Grabs whatever input comes from form | |||
text = form.getvalue("text", "(blank transcription)") | |||
title = form.getvalue("title") | |||
name = form.getvalue("author", "anon") | |||
#========= put it in the db | |||
connection = Connection() | |||
myDB = connection['consentTexts1'] | |||
collection = myDB.collection | |||
collection.update( {'title': title}, {"$set":{'status': 'done', 'text': text, 'author': name}} ) | |||
#========= print thank you | |||
print "Content-Type: text/html" | |||
print | |||
print """ | |||
<!DOCTYPE html> | |||
<html> | |||
<head> | |||
<title></title> | |||
</head> | |||
<body>thanks, you added this transcription:<br/> <br /> | |||
<em>""" + text + """</em><br /><br /> | |||
for """ + title + """ | |||
View the other <a href="showTexts.cgi">transcribed text so far</a>. | |||
</body> | |||
</html> | |||
"""" | |||
</source> | |||
===display transcribed texts=== | ===display transcribed texts=== | ||
CGI displays an html doc of all transcribed db entries. | CGI displays an html doc of all db entries. | ||
Using a loop to stop entries printing in the strange order they're stored in db, starting with number 8 rather than 0 - ? | |||
<source lang="python"> | |||
#!/usr/bin/python | |||
#-*- coding:utf-8 -*- | |||
import cgi | |||
import cgitb; cgitb.enable() | |||
import pymongo | |||
from pymongo import Connection | |||
connection = Connection() | |||
myDB = connection['consentTexts1'] | |||
collection = myDB.collection | |||
#========= show all db entries | |||
# tr appears blank for those not yet transcribed. | |||
print "Content-Type: text/html" | |||
print | |||
print """ | |||
<!DOCTYPE html> | |||
<html> | |||
<head> | |||
<title></title> | |||
<style type="text/css"> | |||
table {margin: auto;} | |||
tr {height:40px;} | |||
.text {font-size: 30px; width: 450px; text-align:right;} | |||
.name {font-size:9px; min-width: 200px; padding: 10px;} | |||
</style> | |||
</head> | |||
<body> | |||
<table>""" | |||
# loop to make sure db entries are printed in sensible order, 0-10 | |||
for x in range(0,11): | |||
entry = myDB.collection.find_one({'title': 'text' + str(x)}) | |||
print """<tr> | |||
<td class="text">""" + entry['text'] + """</td> | |||
<td class="name">(""" + entry['author'] + """)</td> | |||
</tr>""" | |||
print """</table> | |||
</body> | |||
</html>""" | |||
</source> |
Latest revision as of 12:38, 26 September 2012
Asking people to transcribe text and submit it to a central document. Will they transcribe verbatim?
(Answer: maybe.)
Code
make db of imgs waiting to be transcribed
#!/usr/bin/python
#-*- coding:utf-8 -*-
import pymongo
from pymongo import Connection
#======== create db with img files
connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection
for x in range(0,11):
title = "text" + str(x)
fileName = "img" + str(x) + ".jpg"
sentence = {'title': title, 'file': fileName, 'status': "waiting", 'text':" " }
collection.insert(sentence)
input form
Show an img waiting to be transcribed, with input form for transcription.
#!/usr/bin/python
#-*- coding:utf-8 -*-
import cgi
import cgitb; cgitb.enable()
import pymongo
from pymongo import Connection
import random
#========= get untranscribed img file
connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection
waiting = []
for entry in myDB.collection.find({"status": "waiting"}):
waiting.append(entry) # add this hash to 'waiting' list
#print waiting
howMany = len(waiting) # find out how many items in 'waiting' list
#print howMany
random = random.randint(0,howMany) # pick a random number in this range
#print random
sentence = waiting[random] # ...and choose the sentence at this index
#========== show input form
print "Content-Type: text/html"
print
print """
<!DOCTYPE html>
<html>
<head>
<title></title>
<style type="text/css">
.textImg { height: 300px; float: left;}
.textInput { height: 100px; width: 500px; margin: 0px 0px 30px 0px; }
</style>
</head>
<body>
<form action="updatedb.cgi" name="inputForm">
<img src=" """ + sentence['file'] + """ " class="textImg"/>
<br />
Transcription: <br />
<textarea name="text" class="textInput" rows="3"></textarea><br />
Your name (optional):<br />
<input type="text" name="author" value="">
<input style="" name="title" value=" """ + sentence['title'] + """ ">
<input type="submit" value="Submit" />
</form>
</body>
</html>"""
proces input form
Change status of that sentence in db to 'finished', & add the transcribed text & author's name.
#!/usr/bin/python
#-*- coding:utf-8 -*-
import cgi
import cgitb; cgitb.enable()
import pymongo
from pymongo import Connection
#======== get text from input form
form = cgi.FieldStorage() # Grabs whatever input comes from form
text = form.getvalue("text", "(blank transcription)")
title = form.getvalue("title")
name = form.getvalue("author", "anon")
#========= put it in the db
connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection
collection.update( {'title': title}, {"$set":{'status': 'done', 'text': text, 'author': name}} )
#========= print thank you
print "Content-Type: text/html"
print
print """
<!DOCTYPE html>
<html>
<head>
<title></title>
</head>
<body>thanks, you added this transcription:<br/> <br />
<em>""" + text + """</em><br /><br />
for """ + title + """
View the other <a href="showTexts.cgi">transcribed text so far</a>.
</body>
</html>
""""
display transcribed texts
CGI displays an html doc of all db entries. Using a loop to stop entries printing in the strange order they're stored in db, starting with number 8 rather than 0 - ?
#!/usr/bin/python
#-*- coding:utf-8 -*-
import cgi
import cgitb; cgitb.enable()
import pymongo
from pymongo import Connection
connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection
#========= show all db entries
# tr appears blank for those not yet transcribed.
print "Content-Type: text/html"
print
print """
<!DOCTYPE html>
<html>
<head>
<title></title>
<style type="text/css">
table {margin: auto;}
tr {height:40px;}
.text {font-size: 30px; width: 450px; text-align:right;}
.name {font-size:9px; min-width: 200px; padding: 10px;}
</style>
</head>
<body>
<table>"""
# loop to make sure db entries are printed in sensible order, 0-10
for x in range(0,11):
entry = myDB.collection.find_one({'title': 'text' + str(x)})
print """<tr>
<td class="text">""" + entry['text'] + """</td>
<td class="name">(""" + entry['author'] + """)</td>
</tr>"""
print """</table>
</body>
</html>"""