i-could-have-written-that workshop: binary oppositions, interface code

html

<!DOCTYPE html>
<html>
<head>
	<meta charset="utf-8">
	<title>{{name}}</title>
	<link rel="stylesheet" type="text/css" href="../css/stylesheet.css">
	<script type="text/javascript" src="../js/jquery-2.1.3.js"></script>
</head>
<body>
	<div id="wrapper">
	
		<div id="header">
			<div class="bar">
				<div>oracle construction form</div>
				<input id="name" type="text" name="name" value="{{ name }}">
			</div>
			<div id="blackshape"></div>
			<img src="../img/oracle.svg">
		</div>

		<form name="general" method="POST" action="" >

			<div id="start" class="container">
				<input id="classifier-a" type="text" name="a" value="{{ a }}">
				<input id="classifier-b" type="text" name="b" value="{{ b }}">
			</div>

			<!-- *** collect training material *** -->
			<div class="bar">collect training material - use the twitter API</div>
			<div id="twitter-api"> 
				how many tweets will your oracle need to be trained? (max. of 3000 per 15 min. due to the twitter API)
					<input type="text" name="request" value="{{ request }}"> x 3
					<input type="submit" name="submitbutton" value="request"> 
				<br>
				<div class="a">#{{ a }}</div>
				<div class="b">#{{ b }}</div>
			</div>
			<textarea class="tweets-container font-small" name="tweetsA">{{ tweetsA }}</textarea> 
			<textarea class="tweets-container font-small" name="tweetsB">{{ tweetsB }}</textarea> 

			<!-- *** from text to filtered text *** -->
			<div class="bar">from text to a filtered text</div>
			<div id="parsing">
				<div id="word-types">
					<input type="radio" name="wordtype" value="all">include all word types
					<input type="radio" name="wordtype" value="NN">only nouns
					<input type="radio" name="wordtype" value="VB">only verbs
					<input type="radio" name="wordtype" value="JJ">only adjectives
				</div>
				<hr>
				<div id="filtering" class="beta">
					<input type="radio" name="filter" value="stemmer">stemmer
					<input type="radio" name="filter" value="lemma">lemma
					<input type="radio" name="filter" value="none">none
				</div>
				<hr>
				<div id="stopwords" class="beta">
					delete stopwords by editing the list of stopwords that is included in Pattern.
				</div>
				<hr>
				<!-- <a id="wordtype" href=""><input type="submit" name="wordtypesubmit" value="preview"></a>  -->
				<div id="word-types">your selection: {{ wordtype }}</div>
			</div>

			<!-- *** from preview filtered text *** -->
			<div class="bar">preview filtered trainingset</div>
			<div id="trainingset-a" class="trainingset font-small">
				<div>
					{{ a }}:
				</div>
				<div class="trainingset-container a">
					{{ filteredA }}
				</div>
			</div>
			<div id="trainingset-b" class="trainingset font-small">
				<div>
					{{ b }}:
				</div>
				<div class="trainingset-container b">
					{{ filteredB }}
				</div>
			</div>

			<!-- *** from filtered text to bag-of-words *** -->
			<div class="bar">from filtered text to a bag-of-words</div>
			<div id="vector">
				<div id="weights">
					<input type="radio" name="weight" value="TF">TF
					<input type="radio" name="weight" value="TFIDF">TFIDF
					<input type="radio" name="weight" value="BINARY">BINARY
				</div>
				<hr>
				<div id="weights">your selection: {{ weight }}</div>
				<hr>
				<input type="submit" name="submitbutton" value="create your trainingset"> 
			</div>
			<div class="bar">trainingset</div>
			<div id="trainingset-a" class="trainingset font-small">
				<div>
					{{ a }}
				</div>
				<div id="printa" class="trainingset-container a">
					{{ bagA }}
				</div>
			</div>
			<div id="trainingset-b" class="trainingset font-small">
				<div>
					{{ b }}
				</div>
				<div id="printb" class="trainingset-container b">
					{{ bagB }}
				</div>
			</div>

			<div class="bar">print your trainingset</div>
			<div class="print-container a">
				<input type="button" value="print" onclick="printDiv(printa);">
			</div>
			<div class="print-container b">
				<input type="button" value="print" onclick="printDiv(printb);">
			</div>

			<div class="bar"></div>

		</form>

	</div>

</body>
<script type="text/javascript">
function printDiv(divName) {
	console.log('LALALAL');
	var printContents = $(divName).html();     
	var originalContents = $('body').html();       
	$('body').html(printContents);      
	window.print();      
	$('body').html(originalContents);
}

$('input[type=radio][name=wordtype]').change(function() {
	var val = $(this).attr("value");
	var href = '?wordtype='+val;
	$('a#wordtype').attr("href",href);
});
</script>
</html>

cgi

#!/usr/bin/env python

import cgi
import cgitb; cgitb.enable()
import csv
from jinja2 import FileSystemLoader, Environment
from pattern.web import Twitter
from pattern.en import Sentence, parse, tokenize
from pattern.search import search
from pattern.vector import Document, Model, KNN, TFIDF,  TF

input = cgi.FieldStorage()
env = Environment(loader=FileSystemLoader("."))
template = env.get_template("interface.html")

# ******************************************************************
# get values from interface form

# header
name = input.getvalue("name", "my oracle")

# set binary opposition
a = input.getvalue("a", "immoral")
b = input.getvalue("b", "moral")

# collect tweets
request = input.getvalue("request", "")
tweetsA = input.getvalue("tweetsA", "none yet")
tweetsB = input.getvalue("tweetsB", "none yet")

# filtering
wordtype = input.getvalue("wordtype","")
filter = input.getvalue("filter","")
filteredA = input.getvalue("filteredA", [])
filteredB = input.getvalue("filteredB", [])

# bag-of-words
weight = input.getvalue("weight","")
bagA = input.getvalue("bagA", [])
bagB = input.getvalue("bagB", [])

# ******************************************************************
# work with the values in python here


# --- tweets ---
APItweetsA = []
APItweetsB = []

def callAPI():
	t = Twitter()
	for page in range(1, 3):
		for tweet in t.search('#'+a+' OR #'+b, start=page, count=request, cached=True):
			s = tweet.text.lower()
			p = '#'+a in s and a or b 	# set the p to either a or b, according to the hashtag
			if len(s) > 0:
				if p == a:
					APItweetsA.append(s)
				if p == b:
					APItweetsB.append(s)


# --- filtering ---
if wordtype == 'all':
	wordtype = ''

# filter category A
tweetsA = tweetsA
tweetsA = tokenize(tweetsA, punctuation=".,;:!?()[]{}`''\"@#$^&*+-|=~_") # from string of tweets to sentences
if wordtype:
	for s in tweetsA:
		words = s.split(' ')			# list of words from sentence
		s = Sentence(parse(s))               			# parse tree with part-of-speech tags
		s = search(wordtype, s)                  		# adjectives in the tweet
		matches = [match[0].string for match in s] 		# adjectives as a list of strings
		for word in words:
			for match in matches:
				if word == match:
					filteredA.append(match)
					break
				else:
					continue
				break
			filteredA.append('<span class="hide">'+word+'</span>')
else:
	for s in tweetsA:
		filteredA.append(s)

# filter category B
tweetsB = tokenize(tweetsB, punctuation=".,;:!?()[]{}`''\"@#$^&*+-|=~_") # from string of tweets to sentences
if wordtype:
	for s in tweetsB:
		words = s.split(' ')			# list of words from sentence
		s = Sentence(parse(s))               			# parse tree with part-of-speech tags
		s = search(wordtype, s)                  		# adjectives in the tweet
		matches = [match[0].string for match in s] 		# adjectives as a list of strings
		for word in words:
			for match in matches:
				if word == match:
					filteredB.append(match)
					break
				else:
					continue
				break
			filteredB.append('<span class="hide">'+word+'</span>')
else:
	for s in tweetsB:
		filteredB.append(s)


# --- bag-of-words ---
if weight:
	if weight == 'TFIDF':
		w = TFIDF
	if weight == 'TF':
		w = TF
	if weight == 'BINARY':
		w = BINARY

	m = Model(weight=w)
	for tweet in filteredA:
		m.append(Document(tweet, type=a, stemmer=None)) #! what is filtered here is important!!!!!!
	for tweet in filteredB:
		m.append(Document(tweet, type=b, stemmer=None)) 

	for document in m:
		if document.type == a:
			bagA.append(document.vector)
		if document.type == b:
			bagB.append(document.vector)


# ******************************************************************
# place the values back in the tvars dictionary

tvars = {}

# header
tvars["name"] = "my oracle"

# set binary opposition
tvars["a"] = a
tvars["b"] = b

# collect tweets
tvars["request"] = request
if request != '':
	if a != '':
		if b != '':
			tvars["callAPI"] = callAPI()
tvars["tweetsA"] = ''.join(APItweetsA)
tvars["tweetsB"] = ''.join(APItweetsB)

# filtering
tvars["wordtype"] = wordtype
tvars["filter"] = filter
tvars["filteredA"] = filteredA
tvars["filteredB"] = filteredB

# bag-of-words
tvars["weight"] = weight
tvars["bagA"] = bagA
tvars["bagB"] = bagB


# ******************************************************************
# send values back to the interface 

print "Content-type: text/html;charset=utf-8"
print
print template.render(tvars).encode("utf-8")

css

body{
	font-family: "FreeSans";
	font-size: 18px;
	color:rgba(45,6,179,1);
}
#wrapper{
	position: relative;
	width: calc(100% - 14px);
	max-width: 1600px;
	top:5px;
	margin:0px auto 20px auto;
	padding:0px;
	border:2px solid rgba(45,6,179,1);
}

.bar{
	width: calc(100% - 15px);
	height: 25px;
	background-color: rgba(45,6,179,1);
	color:white;
	padding:5px 5px 5px 10px;
}

	#header{
		width: 100%;
		height: 300px;
		border-bottom: 2px solid rgba(45,6,179,1);
	}
		#header #blackshape{
			position: absolute;
			left:50%;
			width: 50%;
			top:35px;
			height: 265px;
			background-color: rgba(45,6,179,1);
		}
		#header img{
			position: absolute;
			top:90px;
			left:calc(50% - 77.5px);
		}
		input#name{
			position: absolute;
			top:0px;
			left:50%;
			width: calc(50% - 15px);
			background-color: rgba(45,6,179,1);
			color:white;
			border-bottom: 2px solid white;
		}

input#classifier-a, input#classifier-b{
	display: inline-block;
	width: calc(50% - 45px);
	padding:10px;
	margin:10px 10px 20px 10px;
	color:rgba(45,6,179,1);
}
	input#classifier-b{
		margin-left:0px;
		padding-right: 20px;
	}

#twitter-api{
	padding: 10px 10px 0px 10px;
}
	#twitter-api input[type="text"]{
		margin-left:30px;
	}
	#twitter-api .a, #twitter-api .b{
		position: relative;
		display: inline-block;
		width: calc(50% - 22.5px);
		top:0px;
	}
		#twitter-api .b{
			left:10px;
		}
	textarea.tweets-container {
		overflow-y: scroll;
		display: inline-block;
		width: calc(50% - 23.5px);
		height: 500px;
		padding:0px 10px;
		margin: 5px 0px 0px 0px;
		border:none;
		color:rgba(45,6,179,1);
	}

#parsing{

}
	#parsing #word-types, #parsing #filtering, #vector #weights, #stopwords{
		margin: 10px;
	}
	#parsing input[type="radio"], #vector input[type="radio"]{
		margin:5px 15px 5px 190px;
	}
		#parsing input[type="radio"]:first-of-type, #vector input[type="radio"]:first-of-type{
			margin:5px 15px 5px 5px;
		}
	.beta{
		color:rgba(220,220,220,1);
	}
		.beta::after{
			content: '(not included yet in this beta version)';
		}
	.hide{
		color:rgba(250,250,250,1);
	}

.trainingset{
	width: calc(50% - 15px);
	height: 450px;
	display: inline-block;
	vertical-align: top;
	padding:5px;
}
	.trainingset#trainingset-b{
		border-left:2px solid rgba(45,6,179,1);
	}
	.trainingset-container{
		width: 100%;
		height: 439px;
		padding-right: 10px;
		overflow-y:auto;
	}
		.trainingset-container.b{
			padding-right: 8px;
		}
.print-container{
	position: relative;
	display: inline-block;
	width: calc(50% - 5px);
	height: 60px;
	border-top,border-right,border-left: 2px solid rgba(45,6,179,1);
}
	.print-container.b{
		border-left:2px solid rgba(45,6,179,1);
	}


/*general elements*/

.font-small{
	font-family: "FreeSans", sans-serif;
	font-size: 14px;
}

hr{
	width: 100%;
	border:0;
	border-bottom: 2px solid rgba(45,6,179,1);
	margin:0px 0px;
}/*
textarea{
	width: 100%;
	height: auto;
	border:none;
}*/
input{
	font-size: 18px;
}
	input[type="text"]{
		border:none;
		border-bottom: 2px solid rgba(45,6,179,1);
		color:rgba(45,6,179,1);
	}
	input[type="button"], input[type="submit"]{
		background-color: rgba(220,220,220,1);
		margin:10px;
		padding:5px;
		border:1px solid rgba(200,200,200,1);
		color:rgba(45,6,179,1);
	}

	input[type="button"]:hover,  input[type="submit"]:hover{
		cursor: pointer;
	}