User:Eleanorg/2.1/Prototypes/transcription: Difference between revisions

From XPUB & Lens-Based wiki
No edit summary
 
(7 intermediate revisions by the same user not shown)
Line 1: Line 1:
Asking people to transcribe text and submit it to a central document. Will they transcribe verbatim?
Asking people to transcribe text and submit it to a central document. Will they transcribe verbatim?
<br />(Answer: maybe.)
[[File:transcribe1.png]]
[[File:transcribe2.png]]
==Code==


===make db of imgs waiting to be transcribed===
===make db of imgs waiting to be transcribed===
Line 97: Line 104:
Change status of that sentence in db to 'finished', & add the transcribed text & author's name.
Change status of that sentence in db to 'finished', & add the transcribed text & author's name.


(this version just prints the text submitted)
<source lang="python">
<source lang="python">
#!/usr/bin/python
#!/usr/bin/python
Line 149: Line 154:
===display transcribed texts===
===display transcribed texts===
CGI displays an html doc of all db entries.
CGI displays an html doc of all db entries.
Currently printing in the strange order they're stored in db, starting with number 8 rather than 0 - ?
Using a loop to stop entries printing in the strange order they're stored in db, starting with number 8 rather than 0 - ?
<source lang="python">
<source lang="python">
#!/usr/bin/python
#!/usr/bin/python
Line 159: Line 164:
from pymongo import Connection
from pymongo import Connection


#========= get untranscribed img file
 


connection = Connection()
connection = Connection()
Line 166: Line 171:




#========= show all transcribed texts
#========= show all db entries
# tr appears blank for those not yet transcribed.


print "Content-Type: text/html"
print "Content-Type: text/html"
Line 178: Line 184:
     table {margin: auto;}
     table {margin: auto;}
     tr {height:40px;}
     tr {height:40px;}
     .text {font-size: 20px; width: 450px; text-align:right;}
     .text {font-size: 30px; width: 450px; text-align:right;}
     .name {font-size:10px; min-width: 200px; background-color: #feeefe; padding: 5px;}
     .name {font-size:9px; min-width: 200px; padding: 10px;}
     </style>
     </style>


Line 187: Line 193:
<table>"""
<table>"""


 
# loop to make sure db entries are printed in sensible order, 0-10
for entry in myDB.collection.find():
for x in range(0,11):
entry = myDB.collection.find_one({'title': 'text' + str(x)})
print """<tr>
print """<tr>
<td class="text">""" + entry['title'] + """</td>
<td class="text">""" + entry['text'] + """</td>
<td class="name">""" + entry['author'] + """</td>
<td class="name">(""" + entry['author'] + """)</td>
      </tr>"""
      </tr>"""
Line 197: Line 204:
print """</table>
print """</table>
</body>
</body>
</html>"""


</source>
</source>

Latest revision as of 12:38, 26 September 2012

Asking people to transcribe text and submit it to a central document. Will they transcribe verbatim?
(Answer: maybe.)

Transcribe1.png Transcribe2.png


Code

make db of imgs waiting to be transcribed

#!/usr/bin/python
#-*- coding:utf-8 -*-

import pymongo
from pymongo import Connection


#======== create db with img files

connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection  

for x in range(0,11):
	title = "text" + str(x)
	fileName = "img" + str(x) + ".jpg"
	
	
	sentence = {'title': title, 'file': fileName, 'status': "waiting", 'text':" " }
	collection.insert(sentence)

input form

Show an img waiting to be transcribed, with input form for transcription.

#!/usr/bin/python
#-*- coding:utf-8 -*-

 
import cgi
import cgitb; cgitb.enable()
import pymongo
from pymongo import Connection
import random

#========= get untranscribed img file

connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection 

waiting = []
for entry in myDB.collection.find({"status": "waiting"}):
    waiting.append(entry)			# add this hash to 'waiting' list
#print waiting
 
howMany = len(waiting)				# find out how many items in 'waiting' list
#print howMany
random = random.randint(0,howMany)		# pick a random number in this range
#print random
sentence = waiting[random]			# ...and choose the sentence at this index



#========== show input form

print "Content-Type: text/html"
print 
print """
<!DOCTYPE html>
<html>
  <head>
    <title></title>
    <style type="text/css">
    	.textImg { height: 300px; float: left;}
    	.textInput { height: 100px; width: 500px; margin: 0px 0px 30px 0px; }
    </style>

  </head>
  
<body>

<form action="updatedb.cgi" name="inputForm">	
	<img src=" """ + sentence['file'] + """ "  class="textImg"/> 
	<br />
	Transcription: <br />
	<textarea name="text" class="textInput" rows="3"></textarea><br />
	Your name (optional):<br />
	<input type="text" name="author" value="">
	<input style="" name="title" value=" """ + sentence['title'] + """ ">
	<input type="submit" value="Submit" />
</form>

</body>

</html>"""

proces input form

Change status of that sentence in db to 'finished', & add the transcribed text & author's name.

#!/usr/bin/python
#-*- coding:utf-8 -*-

import cgi
import cgitb; cgitb.enable()
import pymongo
from pymongo import Connection

#======== get text from input form

form = cgi.FieldStorage()			# Grabs whatever input comes from form
text = form.getvalue("text", "(blank transcription)")		
title = form.getvalue("title")
name = form.getvalue("author", "anon")


#========= put it in the db

connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection  

collection.update( {'title': title}, {"$set":{'status': 'done', 'text': text, 'author': name}} )


#========= print thank you
print "Content-Type: text/html"
print 
print """
<!DOCTYPE html>
<html>
  <head>
    <title></title>
  </head>
  
<body>thanks, you added this transcription:<br/> <br />
<em>""" + text + """</em><br /><br />
for """ + title + """
View the other <a href="showTexts.cgi">transcribed text so far</a>.


</body>

</html>
""""

display transcribed texts

CGI displays an html doc of all db entries. Using a loop to stop entries printing in the strange order they're stored in db, starting with number 8 rather than 0 - ?

#!/usr/bin/python
#-*- coding:utf-8 -*-

import cgi
import cgitb; cgitb.enable()
import pymongo
from pymongo import Connection



connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection 


#========= show all db entries
# tr appears blank for those not yet transcribed.

print "Content-Type: text/html"
print 
print """
<!DOCTYPE html>
<html>
  <head>
    <title></title>
    <style type="text/css">
    	table {margin: auto;}
    	tr {height:40px;}
    	.text {font-size: 30px; width: 450px; text-align:right;}
    	.name {font-size:9px; min-width: 200px; padding: 10px;}
    </style>

  </head>
  
<body>
<table>"""

# loop to make sure db entries are printed in sensible order, 0-10
for x in range(0,11):
	entry = myDB.collection.find_one({'title': 'text' + str(x)})
	print """<tr>
			<td class="text">""" + entry['text'] + """</td>
			<td class="name">(""" + entry['author'] + """)</td>
	      </tr>"""
	
	
print """</table>
</body>

</html>"""