User:Lidia.Pereira/PNMII/RYOG: Difference between revisions

Latest revision as of 20:36, 30 March 2014

Tea Time Talks is a chatroom in which you can interact with authors whose literary work is in the public domain. You start the conversation by searching for a specific word. The conversation unfolds by picking up a random word out of your answer to the result you got from your search, and so on and so forth. When you're done chit chatting with Balzac, or say, Dostoevski, you can save your interaction, thus generating a new narrative.

STEPS:

1. Create a database of url's containing the authors' work (the spider crawls online-literature.com in order to do so).

import urllib2, urlparse, urllib
import html5lib

yup = "http://www.online-literature.com"
urls = ["http://www.online-literature.com/author_index.php"]
history = []
rb = "rabo.txt"
rabo = open(rb,"a")
x = 0

def linkme(groupname,classname):
    for group in groupname:
        try:
            if group.attrib.get("class") == classname:
                searchable = group
                for mu in group:
                    links = mu.findall(".//a")
                    for link in links:
                        if link.attrib.get("href"):
                            href = link.attrib.get("href")
                            if href not in history and href.startswith(yup):
                                urls.append(href)
                                history.append(href)
        except httplib.IncompleteRead:
            print "Pfff!"

while urls:
    url = urls[x]
    urls.pop()
    f = urllib2.urlopen(url).read()
    parse = html5lib.parse(f, namespaceHTMLElements=False)
    group1 = parse.findall (".//tr")
    group2 = parse.findall (".//ul")
    mimimi = parse.findall(".//div")
    if x == 0:
        linkme(group1,"even")
    else:
        linkme(group2,"side-links")
    if mimimi:
        for mimi in mimimi:
            if mimi.attrib.get("id") == "chaptext":
                rabo.write(url.encode("utf-8")+"\n")

    x = x + 1

2. Write the front page script.

#!/usr/bin/env python
#-*- coding:utf-8 -*-
import cgi, urllib
import cgitb; cgitb.enable()
import urllib2, urlparse
import html5lib
import random, uuid, os

print "Content-Type: text/html"
print 
print """<!DOCTYPE html>
<html>
<head>
    <meta charset='utf-8' />
    <link rel='stylesheet' type='text/css' href='/teatimetalks.css'>
    <link href='http://fonts.googleapis.com/css?family=Crimson+Text' rel='stylesheet' type='text/css'>
    <title>Tea Time Talks</title>
</head>
<img src ='/teatimetalk.gif' class ='header'/>
<body onload='document.getElementById('q').focus()''>
<form action='/cgi-bin/firstwebcrawler.cgi' class ='page' >
<input type='text' name='q' class ='april' placeholder='Search' size ='44'/> <br>
<input type='submit' value='' class ='search'/>
</form>
</body>
</html>"""

#generates a new session for each time you access the page
sessions = open("TeaTimeTalk/sessions.txt", "r+")
session = str(uuid.uuid1())
sessions.write(session) 

#shuffles the index so the results to avoid extreme repetition
index = open("rabo.txt")
shuffled= index.readlines()
random.shuffle(shuffled)
open("rabo.txt","w").writelines(shuffled)

3. Write the chatroom script.

#!/usr/bin/env python
#-*- coding:utf-8 -*-
import cgi, urllib
import cgitb; cgitb.enable()
import urllib2, urlparse
from urlparse import urlparse
import html5lib
import random

form = cgi.FieldStorage()

q = form.getvalue("q","").strip().lower()
sessions = open("TeaTimeTalk/sessions.txt")
def id():
    lins = sessions.readlines()
    sesson = ""
    for line in lins:
        sesson = line
    return sesson

sessao = str(id())
reply = form.getvalue("reply","").strip().lower()
save = form.getvalue("save")
armazem = open("TeaTimeTalk/armazem.txt","a")
april = open("rabo.txt")
lines = april.readlines()

belezaFinal = open("TeaTimeTalk/"+sessao+".txt","a")
print "Content-Type: text/html"
print 
print """
    <!DOCTYPE html>
    <html>
    <head>
    <meta charset = utf-8 >
    <title>Tea Time Talks</title>
    <link href='http://fonts.googleapis.com/css?family=Crimson+Text' rel='stylesheet' type='text/css'>
    <link rel='stylesheet' type='text/css' href='/teatime.css'   
    </head>
    <body onload='document.getElementById("reply").focus()'>
    <a href = 'frontpage.cgi'><img src = '/teatimebanner.png' class= 'banner'/></a>
    <form>
    <input type='submit' name ='save' value='save' class ='save' \
    title ='Save your contribution' onClick='alert("Thank you for contributting!")'/>
    </form>
    <a href = 'teatimenarratives.cgi' class ='narratives'></a>
    <div class = 'teatime'>
"""

openlist = []
openlista = []
possible = []
trespossible = []

def cleanup(name,list):
    boole = True
    for line in lines:
        if boole:
            miau = urllib2.urlopen(line).read()
            parse = html5lib.parse(miau, namespaceHTMLElements=False)
            path = urlparse(line).path.strip("/")
            listed = path.split("/")
            author = listed[0]
            mimimi = parse.findall(".//div")
            for mimi in mimimi:
                if mimi.attrib.get("id") == "chaptext":
                    for search in mimi:
                        if search.text != None:
                            searchme = search.text
                            if name in searchme:
                                boole = False
                                yup = searchme.split(".")
                                for y in yup:
                                    if name in y:
                                        list.append(y)
    talk = author + ": " + random.choice(list).encode("utf-8").replace('"'," ") + "."
    talk = talk.replace("\n"," ") 
    talk = talk.replace("Mr.","Cavendish.") 
    return talk.replace("Mrs.","Chansey.")
def select(frm,lista):
    que = frm.strip().split()
    for rp in que:
        if len(rp) >= 4:
            lista.append(rp)                                          

if q:
    select(q,trespossible)
    nm = random.choice(trespossible)
    talk = cleanup(nm,openlist)
    print """<p>"""+talk+"""</p>"""
    belezaFinal.write(talk + "\n")

beleza = open("TeaTimeTalk/"+sessao+".txt")
pois = beleza.readlines()
x = 0
for line in pois:
    if x%2:
        print """<p class='reply'>you: """+line+"""</p>"""
    else:
        print """<p>"""+line+"""</p>"""
    x = x + 1

if reply:
    select(reply,possible)
    rs = random.choice(possible)
    try:
        merci = cleanup(rs,openlista)
        print """<p class ='reply'>you: """+reply+"""</p>"""
        print """<p>"""+merci+"""</p>"""
        belezaFinal.write(reply+"\n")
        belezaFinal.write(merci+"\n")
    except IndexError:
        print """<p>I'm sorry, we cannot find anything...</p>"""


if save:
    for line in pois:
        if ":" in line:
            ls=line.split(":")
            armazem.write("-"+ls[1].lstrip(" "))
        else:
            armazem.write("-"+ line)
    armazem.write("\n..............................................\
    ...........................................\n")

print """
<form>
<input type='text' name ='reply' id='reply' size='77' placeholder='Reply...' />    
<input type='submit' value='Send!' /> <br>
</form>
</div>
</body>
</html>"""

4. Write the narratives page.

#!/usr/bin/env python
#-*- coding:utf-8 -*-
import cgi, urllib
import cgitb; cgitb.enable()

armazem = open("TeaTimeTalk/armazem.txt")
lines = armazem.readlines()

print "Content-Type: text/html"
print 
print """
    <!DOCTYPE html>
    <html>
    <head>
    <title>Tea Time Narratives</title>
    <link href='http://fonts.googleapis.com/css?family=Crimson+Text' rel='stylesheet' type='text/css'>
    <link rel='stylesheet' type='text/css' href='/teatime.css'>
    </head>
    <body> 
    <a href = 'frontpage.cgi'><img src = '/teanarrativesbanner.png' class= 'banner2'/></a>
    <div class='narrative'>
    """


for line in lines:
    if "..................................................\
    ......................................." not in line and line != "\n":
        print """<p class = 'teanarrative'>- """+line+"""</p>"""
    else:
        print """<p class = 'teanarrative'>"""+line+"""</p>"""


print """</div>
    </body>
    </html>"""