User:Eleanorg/2.1/Prototypes/transcription: Difference between revisions

From XPUB & Lens-Based wiki
Line 148: Line 148:


===display transcribed texts===
===display transcribed texts===
CGI displays an html doc of all transcribed db entries.
CGI displays an html doc of all db entries.
Currently printing in the strange order they're stored in db, starting with number 8 rather than 0 - ?
<source lang="python">
<source lang="python">
#!/usr/bin/python
#!/usr/bin/python
Line 177: Line 178:
     table {margin: auto;}
     table {margin: auto;}
     tr {height:40px;}
     tr {height:40px;}
     .text {font-size: 20px; width: 450px; }
     .text {font-size: 20px; width: 450px; text-align:right;}
     .name {font-size:10px; min-width: 200px; background-color: #feeefe; padding: 5px;}
     .name {font-size:10px; min-width: 200px; background-color: #feeefe; padding: 5px;}
     </style>
     </style>
Line 185: Line 186:
<body>
<body>
<table>"""
<table>"""


for entry in myDB.collection.find():
for entry in myDB.collection.find():
print """<tr><td class="text">""" + entry['text'] + """</td> <td class="name">""" + entry['author'] + "</td></tr>"
print """<tr>
print"""</table>
<td class="text">""" + entry['title'] + """</td>
<td class="name">""" + entry['author'] + """</td>
      </tr>"""
print """</table>
</body>
</body>
</html>"""


</source>
</source>

Revision as of 02:04, 22 September 2012

Asking people to transcribe text and submit it to a central document. Will they transcribe verbatim?

make db of imgs waiting to be transcribed

#!/usr/bin/python
#-*- coding:utf-8 -*-

import pymongo
from pymongo import Connection


#======== create db with img files

connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection  

for x in range(0,11):
	title = "text" + str(x)
	fileName = "img" + str(x) + ".jpg"
	
	
	sentence = {'title': title, 'file': fileName, 'status': "waiting", 'text':" " }
	collection.insert(sentence)

input form

Show an img waiting to be transcribed, with input form for transcription.

#!/usr/bin/python
#-*- coding:utf-8 -*-

 
import cgi
import cgitb; cgitb.enable()
import pymongo
from pymongo import Connection
import random

#========= get untranscribed img file

connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection 

waiting = []
for entry in myDB.collection.find({"status": "waiting"}):
    waiting.append(entry)			# add this hash to 'waiting' list
#print waiting
 
howMany = len(waiting)				# find out how many items in 'waiting' list
#print howMany
random = random.randint(0,howMany)		# pick a random number in this range
#print random
sentence = waiting[random]			# ...and choose the sentence at this index



#========== show input form

print "Content-Type: text/html"
print 
print """
<!DOCTYPE html>
<html>
  <head>
    <title></title>
    <style type="text/css">
    	.textImg { height: 300px; float: left;}
    	.textInput { height: 100px; width: 500px; margin: 0px 0px 30px 0px; }
    </style>

  </head>
  
<body>

<form action="updatedb.cgi" name="inputForm">	
	<img src=" """ + sentence['file'] + """ "  class="textImg"/> 
	<br />
	Transcription: <br />
	<textarea name="text" class="textInput" rows="3"></textarea><br />
	Your name (optional):<br />
	<input type="text" name="author" value="">
	<input style="" name="title" value=" """ + sentence['title'] + """ ">
	<input type="submit" value="Submit" />
</form>

</body>

</html>"""

proces input form

Change status of that sentence in db to 'finished', & add the transcribed text & author's name.


(this version just prints the text submitted)

#!/usr/bin/python
#-*- coding:utf-8 -*-

import cgi
import cgitb; cgitb.enable()
import pymongo
from pymongo import Connection

#======== get text from input form

form = cgi.FieldStorage()			# Grabs whatever input comes from form
text = form.getvalue("text", "(blank transcription)")		
title = form.getvalue("title")
name = form.getvalue("author", "anon")


#========= put it in the db

connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection  

collection.update( {'title': title}, {"$set":{'status': 'done', 'text': text, 'author': name}} )


#========= print thank you
print "Content-Type: text/html"
print 
print """
<!DOCTYPE html>
<html>
  <head>
    <title></title>
  </head>
  
<body>thanks, you added this transcription:<br/> <br />
<em>""" + text + """</em><br /><br />
for """ + title + """
View the other <a href="showTexts.cgi">transcribed text so far</a>.


</body>

</html>
""""

display transcribed texts

CGI displays an html doc of all db entries. Currently printing in the strange order they're stored in db, starting with number 8 rather than 0 - ?

#!/usr/bin/python
#-*- coding:utf-8 -*-

import cgi
import cgitb; cgitb.enable()
import pymongo
from pymongo import Connection

#========= get untranscribed img file

connection = Connection()
myDB = connection['consentTexts1']
collection = myDB.collection 


#========= show all transcribed texts

print "Content-Type: text/html"
print 
print """
<!DOCTYPE html>
<html>
  <head>
    <title></title>
    <style type="text/css">
    	table {margin: auto;}
    	tr {height:40px;}
    	.text {font-size: 20px; width: 450px; text-align:right;}
    	.name {font-size:10px; min-width: 200px; background-color: #feeefe; padding: 5px;}
    </style>

  </head>
  
<body>
<table>"""


for entry in myDB.collection.find():
	print """<tr>
			<td class="text">""" + entry['title'] + """</td>
			<td class="name">""" + entry['author'] + """</td>
	      </tr>"""
	
	
print """</table>
</body>