User:Manetta/prototyping/conversational-interfaces-WordNet-tour: Difference between revisions

Revision as of 08:47, 29 June 2015

WordNet tour latest version

#!/usr/bin/python
import sys, cgi, re
import cgitb; cgitb.enable()
from nltk.corpus import wordnet as wn
from itertools import islice

input = cgi.FieldStorage()
word = input.getvalue("word","entity")
# reading from the URL (like argv is reading from the command line)
# --> http://url.org/read.cgi?line=1000
# '1' above is the default value

print "Content-type: text/html; charset=utf-8"
# needs to be the first line, as this is saying to the browser how to write
print
# empty line to end the header
print '<head>'
print '<link rel="stylesheet" type="text/css" href="../stylesheet.css">'
print '</head>'
print'''
<body>
<div id="wrapper">
	<div id="left">
'''

if word:
	print "<h1>"+word+"</h1>"

synsetList = []
synsets = wn.synsets(word)
for synset in synsets:
	synsetList.append(synset)

for synsetItem in synsetList:
	
	synset = str(synsetItem)
	synset = re.sub(r"\..*\.\d\d.[)]", "", synset).replace('Synset(\'','')
	print '<br /><br /><b>synseItem</b>:', synset

	definition = synsetItem.definition() 
	print '<br /><br /><b>definition</b>:', definition
	
	hypernym = str(synsetItem.hypernyms())
	hypernym = re.sub(r"\..*\.\d\d.[)]", "", hypernym).replace('Synset(\'','').replace("[","").replace("]","")
	print '<br /><br /><b>is part of (hypernym)</b>:', hypernym

	print '<br /><br /><b>is a kind of (hyponym)</b>:<br /><br />'
	hyponyms = synsetItem.hyponyms()
	for hyponym in hyponyms:
		hyponym = str(hyponym)
		hyponym = re.sub(r"\..*\.\d\d.[)]", "", hyponym).replace('Synset(\'','')
		print '''
			<div class="button">
				<form action= ""> 
					<input type="submit" name="word" value={0} />
				</form>
			</div>
		'''.format(hyponym)
	print '<hr>'

print '''
	<div id="right">
'''
entity = 'entity'
if word == entity:
	print 'entity is the root of WordNet'

abstraction = 'abstraction'
if word == abstraction:
	print 'abstraction is 33% of being entity'
'''
	</div>
	</div>
</div>
</body>
'''

WordNet tour version 2

using cgi and python -m CGIHTTPServer to display the script in the browser

#!/usr/bin/env python
from pattern.en import wordnet
import sys, os, re
import time
import cgi
import cgitb; cgitb.enable()

# Common Gateway Interface (CGI) is a standard method used to generate dynamic content on Web pages and Web applications. CGI, when implemented on a Web server, provides an interface between the Web server and programs that generate the Web content. These programs are known as CGI scripts or simply CGIs; they are usually written in a scripting language, but can be written in any programming language.
# --> CGI is an interface between webcontent & webserver !

# CGI extends this system by allowing the owner of the Web server to designate a directory within the document collection as containing ***executable scripts*** (or binary files) instead of pre-written pages; this is known as a CGI directory.
# forwarding a URL to a executable script

# anything that the script sends to standard output is passed to the Web client instead of being shown on-screen in a terminal window.

print 'Content-type:text/html'
print
print '<h1>WordNet tour</h1>'
print '<link rel="stylesheet" href="../stylesheet.css">'


print '<p>welcome, this is a WordNet tour</p>'
print '<p>(WordNet is an English lexical database, created in 1985 in the Cognitive Science Laboratory of the Princeton University.)</p>'
print '<p>*we will take 5 stops. the tour starts at the top of the dataset: \'entity\'. abstract as it is, it embraces all the other elements of the dataset. <em>search for its definition in the WordNet dataset, and hit enter or submit</em> *</p>'

line = cgi.FieldStorage()
s = line.getvalue("highest-point-of-abstraction","")
print '''<form action= "">
	what is <input type="text" name="highest-point-of-abstraction" value={0} />  ?<br>
	<input type="submit">
	</form>'''.format(s)
try:
	# print s
	synset = wordnet.synsets(s)
	# print synset
	for i in range(len(synset)):
		w = synset[i]
		definition = w.gloss
		print '<p><i>'+definition+'<i></p>'
except ValueError:
	print ''

line = cgi.FieldStorage()
noun = line.getvalue("noun","")
print '''<form action= "">
	<p>**that\'s how '''+s+''' is a <u>description / speech act / characterization / label / particularization / sketch</u>. let\'s dive further and <em>continue to the next stop, by picking a word from the description: <input type="text" name="noun" value={0} /> hit enter or <input type="submit"></em>**</p>
	</form>'''.format(noun)
try:
	# print s
	synset = wordnet.synsets(noun)
	# print synset
	for i in range(len(synset)):
		w = synset[i]
		hyponyms = w.hyponyms()
		hyponyms = str(hyponyms).strip("[]").replace("[","").replace("],","").replace("Synset(u'","").replace("')","")
		print '<p><i>'+hyponyms+'<i></p>'
except ValueError:
	print ''


line = cgi.FieldStorage()
hyponym = line.getvalue("hyponym","")
print '''<form action= "">
	<p>***words in WordNet are structured, categorized and connected. above words are <u> more specific than a given word</u> <em>pick one of them, type it down here: <input type="text" name="hyponym" value={0} /> and hit enter or <input type="submit"></em>***</p>
	</form>'''.format(hyponym)
try:
	# print s
	synset = wordnet.synsets(hyponym)
	# print synset
	for i in range(len(synset)):
		w = synset[i]
		synonym = w.synonyms
		synonym = str(synonym).strip("[]").replace("[","").replace("u'","").replace("',",",").replace("'","")
		print '<p><i>'+synonym+'<i></p>'
except ValueError:
	print ''


line = cgi.FieldStorage()
entity = line.getvalue("entity","")
print '''<form action= "">
	<p>****above words contain two words, as <u>two words that can be interchanged in a context, are said to be synonymous relative to that context</u>.****</p>
	</form>'''.format(entity)
try:
	print '<p>*****an [entity] <i>'+s+'</i> is described as a [noun] <i>'+noun+'</i> which is a <u>word more generic than</u> [hyponym] <i>'+hyponym+'</i> which could be interchanged with [synonym] <i>'+synonym+'</i> relative to that context*****</p>'
except ValueError:
	print ''

print 'thanks to WordNet'

WordNet tour version 1

from pattern.en import wordnet
import sys, os, re
import time
from terminal_text_color import TextColor

tc = TextColor()
print tc.bold_blue("\n*welcome, this is a WordNet tour.*\n")
time.sleep(0)
print tc.bold_blue("*WordNet is an English lexical database, created in 1985 in the Cognitive Science Laboratory of the Princeton University.*\n")
time.sleep(0)
print tc.bold_blue("*we'll take 5 stops. you can choose where we stop. and, where we begin: name an object, and hit enter*\n")

objectname = sys.stdin.readline()
# objectname = objectname.replace("\n", "")

print tc.bold_blue("\n*if it could be called an 'entity', type: 'entity' and hit enter*\n")

definitions = []
for i in range (5):
	line = sys.stdin.readline()
	# readline --> reads line by line
	# read --> reads entire file
	line = line.replace("\n", "")
	# print line
	synset = wordnet.synsets(line)

	categoriesLower = []
	for i in range(len(synset)):
		# print i
		s = synset[i]
		# print s 

		# print 'Definition:', s.gloss
		# print '  Synonyms:', s.synonyms, '\n'
		# print ' Hypernyms:', s.hypernyms(), '\n'
		# print '  Hyponyms:', categoryLower
		# print '  Holonyms:', s.holonyms(), '\n'
		# print '  Meronyms:', s.meronyms(), '\n'

		categoryLower = s.hyponyms()
		categoriesLower.append(categoryLower)

		definition = s.gloss 
		print tc.bold_yellow('a '+line+' could be seen as '+definition)
		definitions.extend([definition])


	categoriesLower = str(categoriesLower).strip("[]").replace("[","").replace("],","").replace("\')","\n").replace("\'","")
	categoriesLower = categoriesLower.replace("Synset(u", "").replace(",,","").replace("  "," ").replace(", ","").replace("\n ","\n")
	print tc.bold_red("\n"+categoriesLower)
	print tc.bold_blue("*where could you relate the object to? don't be too strict. pick a term, and hit enter*\n")

print tc.bold_blue("*we're now at the end of the WordNet tour. Your WordNet tour could be described as:*\n")
listlen = len(definitions)
for i in range(len(definitions)):
	d = definitions[i]
	print 'it was', tc.bold_green(d)


# reference: 20 questions game, that names a object after 20 questions
# barend: maybe make it more 
# WordNet makes a lot of quite bold statements, when you search for 'women' or 'man', interesting way of looking at such 'objective' dataset