User:Lidia.Pereira/PTSP/SSC: Difference between revisions

Revision as of 23:26, 27 June 2014

This test is an adaptation of the gender Turing test to distinguish between a man and a woman, which itself derives from the original Turing test to distinguish between a human and machine. However, as Andrew Hodges put it, Turing's experiments are problematic due to their lack of understanding of the impact of sex, society and politics on what people might think, that is, they lack the realization that gender is nothing but a social construction. That said, this adaptation features sets of skills scraped from Linkedin and your role is to drag them to the column where you think they belong. How do you perceive these skills?

1st Step: Scrape it like it's hot

import html5lib
from urlparse import urljoin, urldefrag
import urllib2, urllib
from urllib2 import urlopen
from xml.etree import ElementTree as ET
import time, json, argparse


parser = argparse.ArgumentParser()
parser.add_argument("-url", help="start crawling!")
args = parser.parse_args()

preffix1 = args.url
preffix2 = "http://www.linkedin.com/pub/dir/"
preffix3 = "http://www.linkedin.com/pub/"
start_urls = [preffix1]
middle_ground = []
personal_pages = []
history = []
dbase = open("linkedin_database.txt", "a")
datab = open("tryout_databse.txt", "a")
dictionary = {}
same_name = ''
same_sex = ''
listing = ''
found = False



while start_urls:
    url = start_urls[0]
    print url
    start_urls = start_urls[1:]
    try:
        f = urlopen(url)
        parsed = html5lib.parse(f, namespaceHTMLElements=False)
        ol = parsed.findall(".//ol") + parsed.findall(".//ul")
        for o in ol:
            if o.attrib.get("class") == "primary" or o.attrib.get("class") == "directory":
                main_index = o
                if main_index == None:
                    continue 
                alink = main_index.findall(".//li/a")
                for b in alink:
                    if b.get("href") == None:
                        continue
                    joinity = urljoin(f.geturl(), b.attrib.get("href"))
                    joinity = urldefrag(joinity)[0]
                    if joinity not in history and joinity not in start_urls and joinity.startswith(preffix1):
                        start_urls.append(joinity)
                        history.append(joinity)
                    elif joinity not in history and joinity not in middle_ground and joinity.startswith(preffix2):
                        middle_ground.append(joinity)
                        history.append(joinity)
                    elif joinity not in history and joinity not in personal_pages and joinity.startswith(preffix3) and preffix2 not in joinity:
                        personal_pages.append(joinity)
                        history.append(joinity)
    except urllib2.URLError:
        print "Nee!"

if middle_ground:
    for mg in middle_ground:
        url = middle_ground[0]
        middle_ground[1:]
        try:
            m = urlopen(url)
            parsol = html5lib.parse(m, namespaceHTMLElements=False)
            li = parsol.findall(".//li")
            for i in li:
                if i.attrib.get("class") == "vcard":
                    blink = i.findall(".//a")
                    for bl in blink:
                        href = bl.attrib.get("href")
                        joini = urljoin(mg,href)
                        if joini not in history and joini not in personal_pages and joini.startswith(preffix3):
                            personal_pages.append(joini)
        except urllib2.URLError:
            print "Nee Nee Nee!"


if personal_pages:
    for p in personal_pages:
        url = personal_pages[0]
        print url
        personal_pages = personal_pages[1:]
        try:
            n = urlopen(url)
            parsing = html5lib.parse(n, namespaceHTMLElements=False)
            lo = parsing.findall(".//ol")
            name = parsing.findall(".//span")
            for l in lo:
                if l.attrib.get("id") == "skills-list":
                    found = True
                    datab.write("SKILLS: ")
                    list_item = l.findall(".//li/span")
                    for l_i in list_item:
                        listing += l_i.text.strip() + ", "
                        if l_i != list_item[len(list_item)-1]:
                            datab.write(l_i.text.encode("utf-8").strip() + ", ")
                        elif l_i == list_item[len(list_item)-1]:
                            datab.write(l_i.text.encode("utf-8").strip())
                            datab.write("\n")
            if found:
                for na in name:
                    if na.attrib.get("class") == "given-name":
                        given_name = na.text.encode("utf-8").lower().split()
                        given_name = given_name[0]
                        dictionary["name"] = given_name
                        datab.write("NAME:"+ given_name + '\n')
                        global same_name, same_sex
                        if given_name != same_name:
                            print given_name, "virou"
                            same_name = given_name
                            feed = urllib2.urlopen("https://gender-api.com/get?name="+ given_name +"&key=NhFeoBVUosjBsrAJAb")
                            if feed == None:
                                continue
                            data = json.load(feed)
                            sex = data['gender']
                            same_sex = sex
                            datab.write("SEX:"+ sex + '\n')
                            dictionary["sex"] = sex
                        else:
                            print given_name , "ta na mesma"
                            datab.write("SEX:"+ same_sex + '\n')
                            dictionary["sex"] = same_sex
                datab.write("\n")
                dictionary["skills"] = listing.encode("utf-8").rstrip(", ")
                dbase.write(urllib.urlencode(dictionary) + "\n")
                print dictionary  
            found = False
            listing = ''
        except urllib2.URLError:
            print "Nee!"

2nd Step: Process the Database and add id's

import uuid, urlparse, urllib

dbase = open("linkedin_database.txt","r")
datab = open("newLinkedin_database.txt","w")
lines = dbase.readlines()

for line in lines:
    d = urlparse.parse_qs(line.rstrip())
    if "id" not in d and "f" not in d and "m" not in d: 
        d["id"] = [str(uuid.uuid1())]
        d["m"] = [str(0)]
        d["f"] = [str(0)]
    datab.write(urllib.urlencode(d)+"\n")

3rd Step: Create the Interface

#!/usr/bin/env python
#-*- coding:utf-8 -*-

import cgi, urllib
import cgitb; cgitb.enable()
import urlparse, time, random
import urllib2

datab = open("newLinkedin_database.txt", "r")

lines = datab.readlines()
random.shuffle(lines)
lista = []
count = 1


print "Content-Type: text/html"
print 
print """
    <!DOCTYPE html>
    <html>
    <head>
    <meta charset='UTF-8'>
    <title> Gender Turing Test </title>
    <link rel='stylesheet' href='../jquery-ui-1.10.4.custom/css/ui-lightness/jquery-ui-1.10.4.custom.css'>
    <script src='../jquery-ui-1.10.4.custom/js/jquery-1.10.2.js'></script>
    <script src='../jquery-ui-1.10.4.custom/js/jquery-ui-1.10.4.custom.js'></script>
     <style>
    #enquadrado{position:absolute; margin-left:3%; margin-top:1.5%; width:40%} 
    #lesdraggables{ width: 300px; position:absolute; margin-left:47%;}
    #lesdroppables{position:absolute; margin-left:70%; z-index:-999}
    #column{font-family:sans-serif; text-align:center}
    .draggable { width: 103px; height: 20px; padding: 0.5em; margin: 10px 10px 10px 0; float:left}
    #male { width: 150px; height: 550px; padding: 0.5em; margin-right: 10px; float: left }
    #female { width: 150px; height: 550px; padding: 0.5em; margin-right: 10px; float: left }
    p {font-family:georgia; font-size:11px, text-align:center}
    #explanation {font-family:sans-serif; font-size:13px; color: #009999; line-height:17px }
    #title {font-family:sans-serif; font-size: 23px; color: #FF5050}
    select { width: 100px; }
    #visualizer{font-family:sans-serif; font-size:13px; color: #099; text-transform:uppercase; text-decoration: none;}
    #visualizer:hover{color:#FF5050}
    </style>
    <script src= '../genderturingtestskillset.js'>  </script>
    </head>
    <body>
    <div id = 'enquadrado'>
    <h1 id = 'title'> Gender Turing Test </h1>
    <p id = 'explanation'> This test is an adaptation of the gender Turing test to distinguish between a man and a woman, which itself derives from the original
    Turing test to distinguish between a human and machine. However, as Andrew Hodges put it, Turing's experiments are problematic due to their
    lack of understanding of the impact of sex, society and politics on what people might think, that is, they lack the realization that gender 
    is nothing but a social construction. 
    That said, this adaptation features sets of skills scraped from Linkedin and your role is to drag them to
    the column where you think they belong. How do you perceive these skills? (<a href= 'genderturingresults.cgi' id='visualizer'> <strong>Visualizer</strong> </a>)</p>
    <button id='done'>Done!</button> <br>
    
    </div>
    <div id='lesdraggables'>
    
    """

for line in lines:
    if count < 21:
        d = urlparse.parse_qs(line.rstrip())
        skills = d["skills"][0]
        id = str(d['id'][0])
        id = id.strip("['']")
        print "<div id='"+id+"' class='ui-widget-content draggable'>"
        print "<select>"
        print  "<option value='skillset'>Skill Set %s</option>" % count
        for s in skills.split(","):
            if "['" in s:
                s=s.strip("['")
            if "']" in s:
                s=s.strip("']")
            print "<option disabled value ='skill'>"+ s +"</option>"
        print "</select></div>"
    else:
        break
    count += 1

print """</div>
    <div id ='lesdroppables'>
    <div id='male' class='ui-widget-header'>
    <p id = 'column'>Masculine</p>
    </div>
    <div id='female' class='ui-widget-header'>
    <p id = 'column'>Feminine</p>
    </div>
    </div></body>
    </html>
    """

4th Step: Make it interactive and communicate with the back-end

$(function() {
        $('.ui-widget-content').each(function(){
                    $(this).draggable()
                    });
        $("#male").droppable({
            drop: function( event, ui ) {
                console.log("DROP",this, ui)
                $(this)
                    .find('p')
                    .addClass('ui-state-highlight');
                $(ui.draggable)
                    .removeClass('female')
                    .addClass('male')
                }
            });
        $("#female").droppable({
            drop: function( event, ui ) {
                console.log("DROP",this)
                $(this)
                    .find('p')
                    .addClass('ui-state-highlight')
                $(ui.draggable)
                    .removeClass('male')
                    .addClass('female')

            }
        }); 
        $('#done').click(function (){

            var data = {male:[],
                        female:[] };
            $(".draggable.male").each(function(){
                console.log("male",this)
                var id = $(this).attr("id")
                data["male"].push(id)
            })
            $(".draggable.female").each(function(){
                console.log("female",this)
                var id = $(this).attr("id")
                data["female"].push(id)
            })
            $.ajax({
                type: "POST",
                url: "/cgi-bin/genderturingtestvotes.cgi",
                data: data,
                success: function(data){
                    alert("Thank you!")
                    //console.log("python sent", data);
                },
                error: function(obj, e) {
                    console.log(e, "error")
                }
                // contentType: "application/x-www-form-urlencoded; charset=UTF-8",

                //dataType: "json"
            })
            } );
    });

5th Step: The Back-end which processes the votes

#!/usr/bin/env python
#-*- coding:utf-8 -*-

import cgi, urllib, sys
import json, urlparse

form = cgi.FieldStorage()
malevalue = form.getlist("male[]")
femalevalue = form.getlist("female[]")

dbase = open("newLinkedin_database.txt","r")
lines = dbase.readlines()
datab = open("newLinkedin_database.txt","w")
newfile= open("TestnewFile.txt","a")
dictionary = {}

for line in lines:
    d = urlparse.parse_qs(line.rstrip())
    dictionary["id"]= d["id"][0]
    dictionary["skills"]= d["skills"][0]
    for mv in malevalue:
        if mv in d["id"][0]:
            mvalue = d["m"][0].strip("['']")
            mvalue = int(mvalue) + 1
            dictionary["m"] = [str(mvalue)]
        else:
            dictionary["m"] = d["m"][0]
            
    for fm in femalevalue:
        if fm in d["id"][0]:
            fvalue = d["f"][0].strip("['']")
            fvalue = int(fvalue) + 1
            dictionary["f"] = [str(fvalue)]
        else:
            dictionary["f"] = d["f"][0]
    del line
    datab.write(urllib.urlencode(dictionary)+"\n")

6th Step: The Visualizer

#!/usr/bin/env python
#-*- coding:utf-8 -*-

import cgi, urllib, sys
import json, urlparse


voteDb = open("newLinkedin_database.txt","r")
lines = voteDb.readlines()
count = 1

print "Content-Type: text/html"
print 
print """
    <!DOCTYPE html>
    <html>
    <head>
    <meta charset='UTF-8'>
    <script src='/jquery-ui-1.10.4.custom/js/jquery-1.10.2.js'></script>
    <title> Gender Turing Test Visualizer </title>
    <script>

    </script>
    <style>
    
    #mas{
        width:50%;
        float:left;
        height:755px;
        height: 98vh;
        
    }
    #fem {
        width:49%;
        float:left;
        height: 98vh;
        border-right: 2px dashed #FF5050;
    }
    select { width: 100px; }

    h1{
    font-family:sans-serif; font-size: 23px; color: #FF5050; text-align: center;
    }
    #container {width:99%; position:absolute; margin-top:7vh; height:91vh}
    </style>



    </head>
    <body>
    <div id = 'fem'>
    <h1> Feminine </h1>
    </div>
    <div id = 'mas'>
    <h1> Masculine </h1>
    </div>
    <div id='container'>
    """

for line in lines:
    d = urlparse.parse_qs(line.rstrip())
    skills = d["skills"][0]
    if int(d["f"][0].strip("['']")) > 0 or int(d["m"][0].strip("['']")) > 0:
        feminine = int(d["f"][0].strip("['']"))
        masculine = int(d["m"][0].strip("['']"))
        total = int(masculine) + int(feminine)
        percent = str(91*masculine/total)
        print "<div class='mimi'style='margin-left:"+percent+"%;'>"
        print "<select class ='margina'>"
        print "<option value='skillset'>Skill Set %s</option>" % count
        for s in skills.split(","):
            if "['" in s:
                s=s.strip("['")
            if "']" in s:
                s=s.strip("']")
            print "<option disabled value ='skill'>"+ s +"</option>"
        print "</select> </div>"
    count += 1
    
print """</div>
<script>
var coisinhas = document.getElementsByClassName('margina');
for(var i = 0; i < coisinhas.length; i ++) {
    var valor = Math.floor((Math.random() * 44) + 1);
    coisinhas[i].style.marginTop = valor+'px';
}
</script>"""
print "</body>"
print "</html>"

@@ Line 139: / Line 139: @@
          except urllib2.URLError:
              print "Nee!"
-</syntax>
+</syntaxhighlight>
 '''2nd Step: Process the Database and add id's'''
@@ Line 157: / Line 157: @@
          d["f"] = [str(0)]
      datab.write(urllib.urlencode(d)+"\n")
-</syntax>
+</syntaxhighlight>
 '''3rd Step: Create the Interface'''
@@ Line 253: / Line 253: @@
      </html>
      """
-</syntax>
+</syntaxhighlight>
 '''4th Step: Make it interactive and communicate with the back-end'''
@@ Line 316: / Line 316: @@
              } );
      });
-</syntax>
+</syntaxhighlight>
 '''5th Step: The Back-end which processes the votes'''
@@ Line 357: / Line 357: @@
      del line
      datab.write(urllib.urlencode(dictionary)+"\n")
-</syntax>
+</syntaxhighlight>
 '''6th Step: The Visualizer'''
@@ Line 451: / Line 451: @@
 print "</body>"
 print "</html>"
-</syntax>
+</syntaxhighlight>
 </div>