<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>{{name}}</title>
<link rel="stylesheet" type="text/css" href="../css/stylesheet.css">
<script type="text/javascript" src="../js/jquery-2.1.3.js"></script>
</head>
<body>
<div id="wrapper">
<div id="header">
<div class="bar">
<div>oracle construction form</div>
<input id="name" type="text" name="name" value="{{ name }}">
</div>
<div id="blackshape"></div>
<img src="../img/oracle.svg">
</div>
<form name="general" method="POST" action="" >
<div id="start" class="container">
<input id="classifier-a" type="text" name="a" value="{{ a }}">
<input id="classifier-b" type="text" name="b" value="{{ b }}">
</div>
<!-- *** collect training material *** -->
<div class="bar">collect training material - use the twitter API</div>
<div id="twitter-api">
how many tweets will your oracle need to be trained? (max. of 3000 per 15 min. due to the twitter API)
<input type="text" name="request" value="{{ request }}"> x 3
<input type="submit" name="submitbutton" value="request">
<br>
<div class="a">#{{ a }}</div>
<div class="b">#{{ b }}</div>
</div>
<textarea class="tweets-container font-small" name="tweetsA">{{ tweetsA }}</textarea>
<textarea class="tweets-container font-small" name="tweetsB">{{ tweetsB }}</textarea>
<!-- *** from text to filtered text *** -->
<div class="bar">from text to a filtered text</div>
<div id="parsing">
<div id="word-types">
<input type="radio" name="wordtype" value="all">include all word types
<input type="radio" name="wordtype" value="NN">only nouns
<input type="radio" name="wordtype" value="VB">only verbs
<input type="radio" name="wordtype" value="JJ">only adjectives
</div>
<hr>
<div id="filtering" class="beta">
<input type="radio" name="filter" value="stemmer">stemmer
<input type="radio" name="filter" value="lemma">lemma
<input type="radio" name="filter" value="none">none
</div>
<hr>
<div id="stopwords" class="beta">
delete stopwords by editing the list of stopwords that is included in Pattern.
</div>
<hr>
<!-- <a id="wordtype" href=""><input type="submit" name="wordtypesubmit" value="preview"></a> -->
<div id="word-types">your selection: {{ wordtype }}</div>
</div>
<!-- *** from preview filtered text *** -->
<div class="bar">preview filtered trainingset</div>
<div id="trainingset-a" class="trainingset font-small">
<div>
{{ a }}:
</div>
<div class="trainingset-container a">
{{ filteredA }}
</div>
</div>
<div id="trainingset-b" class="trainingset font-small">
<div>
{{ b }}:
</div>
<div class="trainingset-container b">
{{ filteredB }}
</div>
</div>
<!-- *** from filtered text to bag-of-words *** -->
<div class="bar">from filtered text to a bag-of-words</div>
<div id="vector">
<div id="weights">
<input type="radio" name="weight" value="TF">TF
<input type="radio" name="weight" value="TFIDF">TFIDF
<input type="radio" name="weight" value="BINARY">BINARY
</div>
<hr>
<div id="weights">your selection: {{ weight }}</div>
<hr>
<input type="submit" name="submitbutton" value="create your trainingset">
</div>
<div class="bar">trainingset</div>
<div id="trainingset-a" class="trainingset font-small">
<div>
{{ a }}
</div>
<div id="printa" class="trainingset-container a">
{{ bagA }}
</div>
</div>
<div id="trainingset-b" class="trainingset font-small">
<div>
{{ b }}
</div>
<div id="printb" class="trainingset-container b">
{{ bagB }}
</div>
</div>
<div class="bar">print your trainingset</div>
<div class="print-container a">
<input type="button" value="print" onclick="printDiv(printa);">
</div>
<div class="print-container b">
<input type="button" value="print" onclick="printDiv(printb);">
</div>
<div class="bar"></div>
</form>
</div>
</body>
<script type="text/javascript">
function printDiv(divName) {
console.log('LALALAL');
var printContents = $(divName).html();
var originalContents = $('body').html();
$('body').html(printContents);
window.print();
$('body').html(originalContents);
}
$('input[type=radio][name=wordtype]').change(function() {
var val = $(this).attr("value");
var href = '?wordtype='+val;
$('a#wordtype').attr("href",href);
});
</script>
</html>
#!/usr/bin/env python
import cgi
import cgitb; cgitb.enable()
import csv
from jinja2 import FileSystemLoader, Environment
from pattern.web import Twitter
from pattern.en import Sentence, parse, tokenize
from pattern.search import search
from pattern.vector import Document, Model, KNN, TFIDF, TF
input = cgi.FieldStorage()
env = Environment(loader=FileSystemLoader("."))
template = env.get_template("interface.html")
# ******************************************************************
# get values from interface form
# header
name = input.getvalue("name", "my oracle")
# set binary opposition
a = input.getvalue("a", "immoral")
b = input.getvalue("b", "moral")
# collect tweets
request = input.getvalue("request", "")
tweetsA = input.getvalue("tweetsA", "none yet")
tweetsB = input.getvalue("tweetsB", "none yet")
# filtering
wordtype = input.getvalue("wordtype","")
filter = input.getvalue("filter","")
filteredA = input.getvalue("filteredA", [])
filteredB = input.getvalue("filteredB", [])
# bag-of-words
weight = input.getvalue("weight","")
bagA = input.getvalue("bagA", [])
bagB = input.getvalue("bagB", [])
# ******************************************************************
# work with the values in python here
# --- tweets ---
APItweetsA = []
APItweetsB = []
def callAPI():
t = Twitter()
for page in range(1, 3):
for tweet in t.search('#'+a+' OR #'+b, start=page, count=request, cached=True):
s = tweet.text.lower()
p = '#'+a in s and a or b # set the p to either a or b, according to the hashtag
if len(s) > 0:
if p == a:
APItweetsA.append(s)
if p == b:
APItweetsB.append(s)
# --- filtering ---
if wordtype == 'all':
wordtype = ''
# filter category A
tweetsA = tweetsA
tweetsA = tokenize(tweetsA, punctuation=".,;:!?()[]{}`''\"@#$^&*+-|=~_") # from string of tweets to sentences
if wordtype:
for s in tweetsA:
words = s.split(' ') # list of words from sentence
s = Sentence(parse(s)) # parse tree with part-of-speech tags
s = search(wordtype, s) # adjectives in the tweet
matches = [match[0].string for match in s] # adjectives as a list of strings
for word in words:
for match in matches:
if word == match:
filteredA.append(match)
break
else:
continue
break
filteredA.append('<span class="hide">'+word+'</span>')
else:
for s in tweetsA:
filteredA.append(s)
# filter category B
tweetsB = tokenize(tweetsB, punctuation=".,;:!?()[]{}`''\"@#$^&*+-|=~_") # from string of tweets to sentences
if wordtype:
for s in tweetsB:
words = s.split(' ') # list of words from sentence
s = Sentence(parse(s)) # parse tree with part-of-speech tags
s = search(wordtype, s) # adjectives in the tweet
matches = [match[0].string for match in s] # adjectives as a list of strings
for word in words:
for match in matches:
if word == match:
filteredB.append(match)
break
else:
continue
break
filteredB.append('<span class="hide">'+word+'</span>')
else:
for s in tweetsB:
filteredB.append(s)
# --- bag-of-words ---
if weight:
if weight == 'TFIDF':
w = TFIDF
if weight == 'TF':
w = TF
if weight == 'BINARY':
w = BINARY
m = Model(weight=w)
for tweet in filteredA:
m.append(Document(tweet, type=a, stemmer=None)) #! what is filtered here is important!!!!!!
for tweet in filteredB:
m.append(Document(tweet, type=b, stemmer=None))
for document in m:
if document.type == a:
bagA.append(document.vector)
if document.type == b:
bagB.append(document.vector)
# ******************************************************************
# place the values back in the tvars dictionary
tvars = {}
# header
tvars["name"] = "my oracle"
# set binary opposition
tvars["a"] = a
tvars["b"] = b
# collect tweets
tvars["request"] = request
if request != '':
if a != '':
if b != '':
tvars["callAPI"] = callAPI()
tvars["tweetsA"] = ''.join(APItweetsA)
tvars["tweetsB"] = ''.join(APItweetsB)
# filtering
tvars["wordtype"] = wordtype
tvars["filter"] = filter
tvars["filteredA"] = filteredA
tvars["filteredB"] = filteredB
# bag-of-words
tvars["weight"] = weight
tvars["bagA"] = bagA
tvars["bagB"] = bagB
# ******************************************************************
# send values back to the interface
print "Content-type: text/html;charset=utf-8"
print
print template.render(tvars).encode("utf-8")
body{
font-family: "FreeSans";
font-size: 18px;
color:rgba(45,6,179,1);
}
#wrapper{
position: relative;
width: calc(100% - 14px);
max-width: 1600px;
top:5px;
margin:0px auto 20px auto;
padding:0px;
border:2px solid rgba(45,6,179,1);
}
.bar{
width: calc(100% - 15px);
height: 25px;
background-color: rgba(45,6,179,1);
color:white;
padding:5px 5px 5px 10px;
}
#header{
width: 100%;
height: 300px;
border-bottom: 2px solid rgba(45,6,179,1);
}
#header #blackshape{
position: absolute;
left:50%;
width: 50%;
top:35px;
height: 265px;
background-color: rgba(45,6,179,1);
}
#header img{
position: absolute;
top:90px;
left:calc(50% - 77.5px);
}
input#name{
position: absolute;
top:0px;
left:50%;
width: calc(50% - 15px);
background-color: rgba(45,6,179,1);
color:white;
border-bottom: 2px solid white;
}
input#classifier-a, input#classifier-b{
display: inline-block;
width: calc(50% - 45px);
padding:10px;
margin:10px 10px 20px 10px;
color:rgba(45,6,179,1);
}
input#classifier-b{
margin-left:0px;
padding-right: 20px;
}
#twitter-api{
padding: 10px 10px 0px 10px;
}
#twitter-api input[type="text"]{
margin-left:30px;
}
#twitter-api .a, #twitter-api .b{
position: relative;
display: inline-block;
width: calc(50% - 22.5px);
top:0px;
}
#twitter-api .b{
left:10px;
}
textarea.tweets-container {
overflow-y: scroll;
display: inline-block;
width: calc(50% - 23.5px);
height: 500px;
padding:0px 10px;
margin: 5px 0px 0px 0px;
border:none;
color:rgba(45,6,179,1);
}
#parsing{
}
#parsing #word-types, #parsing #filtering, #vector #weights, #stopwords{
margin: 10px;
}
#parsing input[type="radio"], #vector input[type="radio"]{
margin:5px 15px 5px 190px;
}
#parsing input[type="radio"]:first-of-type, #vector input[type="radio"]:first-of-type{
margin:5px 15px 5px 5px;
}
.beta{
color:rgba(220,220,220,1);
}
.beta::after{
content: '(not included yet in this beta version)';
}
.hide{
color:rgba(250,250,250,1);
}
.trainingset{
width: calc(50% - 15px);
height: 450px;
display: inline-block;
vertical-align: top;
padding:5px;
}
.trainingset#trainingset-b{
border-left:2px solid rgba(45,6,179,1);
}
.trainingset-container{
width: 100%;
height: 439px;
padding-right: 10px;
overflow-y:auto;
}
.trainingset-container.b{
padding-right: 8px;
}
.print-container{
position: relative;
display: inline-block;
width: calc(50% - 5px);
height: 60px;
border-top,border-right,border-left: 2px solid rgba(45,6,179,1);
}
.print-container.b{
border-left:2px solid rgba(45,6,179,1);
}
/*general elements*/
.font-small{
font-family: "FreeSans", sans-serif;
font-size: 14px;
}
hr{
width: 100%;
border:0;
border-bottom: 2px solid rgba(45,6,179,1);
margin:0px 0px;
}/*
textarea{
width: 100%;
height: auto;
border:none;
}*/
input{
font-size: 18px;
}
input[type="text"]{
border:none;
border-bottom: 2px solid rgba(45,6,179,1);
color:rgba(45,6,179,1);
}
input[type="button"], input[type="submit"]{
background-color: rgba(220,220,220,1);
margin:10px;
padding:5px;
border:1px solid rgba(200,200,200,1);
color:rgba(45,6,179,1);
}
input[type="button"]:hover, input[type="submit"]:hover{
cursor: pointer;
}