User:Lidia.Pereira/PNMII/RYOG
< User:Lidia.Pereira | PNMII
Tea Time Talks is a chatroom in which you can interact with authors whose literary work is in the public domain. You start the conversation by searching for a specific word. The conversation unfolds by picking up a random word out of your answer to the result you got from your search, and so on and so forth. When you're done chit chatting with Balzac, or say, Dostoevski, you can save your interaction, thus generating a new narrative.
STEPS:
1. Create a database of url's containing the authors' work (the spider crawls online-literature.com in order to do so).
import urllib2, urlparse, urllib
import html5lib
yup = "http://www.online-literature.com"
urls = ["http://www.online-literature.com/author_index.php"]
history = []
rb = "rabo.txt"
rabo = open(rb,"a")
x = 0
def linkme(groupname,classname):
for group in groupname:
try:
if group.attrib.get("class") == classname:
searchable = group
for mu in group:
links = mu.findall(".//a")
for link in links:
if link.attrib.get("href"):
href = link.attrib.get("href")
if href not in history and href.startswith(yup):
urls.append(href)
history.append(href)
except httplib.IncompleteRead:
print "Pfff!"
while urls:
url = urls[x]
urls.pop()
f = urllib2.urlopen(url).read()
parse = html5lib.parse(f, namespaceHTMLElements=False)
group1 = parse.findall (".//tr")
group2 = parse.findall (".//ul")
mimimi = parse.findall(".//div")
if x == 0:
linkme(group1,"even")
else:
linkme(group2,"side-links")
if mimimi:
for mimi in mimimi:
if mimi.attrib.get("id") == "chaptext":
rabo.write(url.encode("utf-8")+"\n")
x = x + 1
2. Write the front page script.
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import cgi, urllib
import cgitb; cgitb.enable()
import urllib2, urlparse
import html5lib
import random, uuid, os
print "Content-Type: text/html"
print
print """<!DOCTYPE html>
<html>
<head>
<meta charset='utf-8' />
<link rel='stylesheet' type='text/css' href='/teatimetalks.css'>
<link href='http://fonts.googleapis.com/css?family=Crimson+Text' rel='stylesheet' type='text/css'>
<title>Tea Time Talks</title>
</head>
<img src ='/teatimetalk.gif' class ='header'/>
<body onload='document.getElementById('q').focus()''>
<form action='/cgi-bin/firstwebcrawler.cgi' class ='page' >
<input type='text' name='q' class ='april' placeholder='Search' size ='44'/> <br>
<input type='submit' value='' class ='search'/>
</form>
</body>
</html>"""
#generates a new session for each time you access the page
sessions = open("TeaTimeTalk/sessions.txt", "r+")
session = str(uuid.uuid1())
sessions.write(session)
#shuffles the index so the results to avoid extreme repetition
index = open("rabo.txt")
shuffled= index.readlines()
random.shuffle(shuffled)
open("rabo.txt","w").writelines(shuffled)
3. Write the chatroom script.
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import cgi, urllib
import cgitb; cgitb.enable()
import urllib2, urlparse
from urlparse import urlparse
import html5lib
import random
form = cgi.FieldStorage()
q = form.getvalue("q","").strip().lower()
sessions = open("TeaTimeTalk/sessions.txt")
def id():
lins = sessions.readlines()
sesson = ""
for line in lins:
sesson = line
return sesson
sessao = str(id())
reply = form.getvalue("reply","").strip().lower()
save = form.getvalue("save")
armazem = open("TeaTimeTalk/armazem.txt","a")
april = open("rabo.txt")
lines = april.readlines()
belezaFinal = open("TeaTimeTalk/"+sessao+".txt","a")
print "Content-Type: text/html"
print
print """
<!DOCTYPE html>
<html>
<head>
<meta charset = utf-8 >
<title>Tea Time Talks</title>
<link href='http://fonts.googleapis.com/css?family=Crimson+Text' rel='stylesheet' type='text/css'>
<link rel='stylesheet' type='text/css' href='/teatime.css'
</head>
<body onload='document.getElementById("reply").focus()'>
<a href = 'frontpage.cgi'><img src = '/teatimebanner.png' class= 'banner'/></a>
<form>
<input type='submit' name ='save' value='save' class ='save' \
title ='Save your contribution' onClick='alert("Thank you for contributting!")'/>
</form>
<a href = 'teatimenarratives.cgi' class ='narratives'></a>
<div class = 'teatime'>
"""
openlist = []
openlista = []
possible = []
trespossible = []
def cleanup(name,list):
boole = True
for line in lines:
if boole:
miau = urllib2.urlopen(line).read()
parse = html5lib.parse(miau, namespaceHTMLElements=False)
path = urlparse(line).path.strip("/")
listed = path.split("/")
author = listed[0]
mimimi = parse.findall(".//div")
for mimi in mimimi:
if mimi.attrib.get("id") == "chaptext":
for search in mimi:
if search.text != None:
searchme = search.text
if name in searchme:
boole = False
yup = searchme.split(".")
for y in yup:
if name in y:
list.append(y)
talk = author + ": " + random.choice(list).encode("utf-8").replace('"'," ") + "."
talk = talk.replace("\n"," ")
talk = talk.replace("Mr.","Cavendish.")
return talk.replace("Mrs.","Chansey.")
def select(frm,lista):
que = frm.strip().split()
for rp in que:
if len(rp) >= 4:
lista.append(rp)
if q:
select(q,trespossible)
nm = random.choice(trespossible)
talk = cleanup(nm,openlist)
print """<p>"""+talk+"""</p>"""
belezaFinal.write(talk + "\n")
beleza = open("TeaTimeTalk/"+sessao+".txt")
pois = beleza.readlines()
x = 0
for line in pois:
if x%2:
print """<p class='reply'>you: """+line+"""</p>"""
else:
print """<p>"""+line+"""</p>"""
x = x + 1
if reply:
select(reply,possible)
rs = random.choice(possible)
try:
merci = cleanup(rs,openlista)
print """<p class ='reply'>you: """+reply+"""</p>"""
print """<p>"""+merci+"""</p>"""
belezaFinal.write(reply+"\n")
belezaFinal.write(merci+"\n")
except IndexError:
print """<p>I'm sorry, we cannot find anything...</p>"""
if save:
for line in pois:
if ":" in line:
ls=line.split(":")
armazem.write("-"+ls[1].lstrip(" "))
else:
armazem.write("-"+ line)
armazem.write("\n..............................................\
...........................................\n")
print """
<form>
<input type='text' name ='reply' id='reply' size='77' placeholder='Reply...' />
<input type='submit' value='Send!' /> <br>
</form>
</div>
</body>
</html>"""
4. Write the narratives page.
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import cgi, urllib
import cgitb; cgitb.enable()
armazem = open("TeaTimeTalk/armazem.txt")
lines = armazem.readlines()
print "Content-Type: text/html"
print
print """
<!DOCTYPE html>
<html>
<head>
<title>Tea Time Narratives</title>
<link href='http://fonts.googleapis.com/css?family=Crimson+Text' rel='stylesheet' type='text/css'>
<link rel='stylesheet' type='text/css' href='/teatime.css'>
</head>
<body>
<a href = 'frontpage.cgi'><img src = '/teanarrativesbanner.png' class= 'banner2'/></a>
<div class='narrative'>
"""
for line in lines:
if "..................................................\
......................................." not in line and line != "\n":
print """<p class = 'teanarrative'>- """+line+"""</p>"""
else:
print """<p class = 'teanarrative'>"""+line+"""</p>"""
print """</div>
</body>
</html>"""