User:Eleanorg/2.1/Prototypes/transcription: Difference between revisions

From XPUB & Lens-Based wiki
Line 149: Line 149:
===display transcribed texts===
===display transcribed texts===
CGI displays an html doc of all db entries.
CGI displays an html doc of all db entries.
Currently printing in the strange order they're stored in db, starting with number 8 rather than 0 - ?
Using a loop to stop entries printing in the strange order they're stored in db, starting with number 8 rather than 0 - ?
<source lang="python">
<source lang="python">
#!/usr/bin/python
#!/usr/bin/python
Line 178: Line 178:
     table {margin: auto;}
     table {margin: auto;}
     tr {height:40px;}
     tr {height:40px;}
     .text {font-size: 20px; width: 450px; text-align:right;}
     .text {font-size: 30px; width: 450px; text-align:right;}
     .name {font-size:10px; min-width: 200px; background-color: #feeefe; padding: 5px;}
     .name {font-size:9px; min-width: 200px; padding: 10px;}
     </style>
     </style>


Line 187: Line 187:
<table>"""
<table>"""


 
# loop to make sure db entries are printed in sensible order, 0-10
for entry in myDB.collection.find():
for x in range(0,11):
entry = myDB.collection.find_one({'title': 'text' + str(x)})
print """<tr>
print """<tr>
<td class="text">""" + entry['title'] + """</td>
<td class="text">""" + entry['text'] + """</td>
<td class="name">""" + entry['author'] + """</td>
<td class="name">(""" + entry['author'] + """)</td>
      </tr>"""
      </tr>"""
Line 197: Line 198:
print """</table>
print """</table>
</body>
</body>
</html>"""


</source>
</source>

Revision as of 01:15, 22 September 2012

Asking people to transcribe text and submit it to a central document. Will they transcribe verbatim?

make db of imgs waiting to be transcribed

#!/usr/bin/python
#-*- coding:utf-8 -*-

import pymongo
from pymongo import Connection


#======== create db with img files

connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection  

for x in range(0,11):
	title = "text" + str(x)
	fileName = "img" + str(x) + ".jpg"
	
	
	sentence = {'title': title, 'file': fileName, 'status': "waiting", 'text':" " }
	collection.insert(sentence)

input form

Show an img waiting to be transcribed, with input form for transcription.

#!/usr/bin/python
#-*- coding:utf-8 -*-

 
import cgi
import cgitb; cgitb.enable()
import pymongo
from pymongo import Connection
import random

#========= get untranscribed img file

connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection 

waiting = []
for entry in myDB.collection.find({"status": "waiting"}):
    waiting.append(entry)			# add this hash to 'waiting' list
#print waiting
 
howMany = len(waiting)				# find out how many items in 'waiting' list
#print howMany
random = random.randint(0,howMany)		# pick a random number in this range
#print random
sentence = waiting[random]			# ...and choose the sentence at this index



#========== show input form

print "Content-Type: text/html"
print 
print """
<!DOCTYPE html>
<html>
  <head>
    <title></title>
    <style type="text/css">
    	.textImg { height: 300px; float: left;}
    	.textInput { height: 100px; width: 500px; margin: 0px 0px 30px 0px; }
    </style>

  </head>
  
<body>

<form action="updatedb.cgi" name="inputForm">	
	<img src=" """ + sentence['file'] + """ "  class="textImg"/> 
	<br />
	Transcription: <br />
	<textarea name="text" class="textInput" rows="3"></textarea><br />
	Your name (optional):<br />
	<input type="text" name="author" value="">
	<input style="" name="title" value=" """ + sentence['title'] + """ ">
	<input type="submit" value="Submit" />
</form>

</body>

</html>"""

proces input form

Change status of that sentence in db to 'finished', & add the transcribed text & author's name.


(this version just prints the text submitted)

#!/usr/bin/python
#-*- coding:utf-8 -*-

import cgi
import cgitb; cgitb.enable()
import pymongo
from pymongo import Connection

#======== get text from input form

form = cgi.FieldStorage()			# Grabs whatever input comes from form
text = form.getvalue("text", "(blank transcription)")		
title = form.getvalue("title")
name = form.getvalue("author", "anon")


#========= put it in the db

connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection  

collection.update( {'title': title}, {"$set":{'status': 'done', 'text': text, 'author': name}} )


#========= print thank you
print "Content-Type: text/html"
print 
print """
<!DOCTYPE html>
<html>
  <head>
    <title></title>
  </head>
  
<body>thanks, you added this transcription:<br/> <br />
<em>""" + text + """</em><br /><br />
for """ + title + """
View the other <a href="showTexts.cgi">transcribed text so far</a>.


</body>

</html>
""""

display transcribed texts

CGI displays an html doc of all db entries. Using a loop to stop entries printing in the strange order they're stored in db, starting with number 8 rather than 0 - ?

#!/usr/bin/python
#-*- coding:utf-8 -*-

import cgi
import cgitb; cgitb.enable()
import pymongo
from pymongo import Connection

#========= get untranscribed img file

connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection 


#========= show all transcribed texts

print "Content-Type: text/html"
print 
print """
<!DOCTYPE html>
<html>
  <head>
    <title></title>
    <style type="text/css">
    	table {margin: auto;}
    	tr {height:40px;}
    	.text {font-size: 30px; width: 450px; text-align:right;}
    	.name {font-size:9px; min-width: 200px; padding: 10px;}
    </style>

  </head>
  
<body>
<table>"""

# loop to make sure db entries are printed in sensible order, 0-10
for x in range(0,11):
	entry = myDB.collection.find_one({'title': 'text' + str(x)})
	print """<tr>
			<td class="text">""" + entry['text'] + """</td>
			<td class="name">(""" + entry['author'] + """)</td>
	      </tr>"""
	
	
print """</table>
</body>

</html>"""