User:Emanuele Bonetti/ProblemSet2.3
< User:Emanuele Bonetti
Revision as of 20:34, 23 September 2010 by Migratebot (talk | contribs) (Created page with "Work Cloud - comment out the three different section to have three different work cloud
<source lang="text"> import codecs
import re
from pprint import pprint
import random...")
Work Cloud - comment out the three different section to have three different work cloud
import codecs
import re
from pprint import pprint
import random
t = codecs.open("redcircle2.txt","r","utf-8").read() #return a file object and read the contents
#print t
words = re.findall(r"\b[a-z'-]+\b",t, re.I) #this pattern match each word (one or more character from a to z or ' or -) - re.I -> ingnore capital case
freq={}
for w in words:
if w not in freq:
freq[w]=1
else:
freq[w]=freq[w]+1
print """
<head>
<style type="text/css">
body {text-transform:uppercase;
font-size:56px;
font-family:"Courier New", Courier, monospace;
padding:20px;
}
</style>
</head>"""
print """<p style=\"z/index=100; color:red; position:fixed; top:20px;right:20px;font-size:20px; text-align:right;vertical-align:top; text-decoration:underline;\"><strong>The Adventure <br />of the red circle</strong><br />Sir A. Conan Doyle<br /><br /><span style=\"font-size:12px;\"><a href="http://www.gutenberg.org/files/2345/2345.txt">original source</a></span></p>"""
#------------------------------------------------------
# Section to print the table
print "<table>"
print "<tr><td style=\"font-size:20px; text-align:right;vertical-align:top; text-decoration:underline;\">Words appearing more than once<br /></td><td></td></tr>"
singlewords=[]
for element in freq:
if freq[element]>1:
graph=""
for n in range(freq[element]):
graph = graph+'I'
print "<tr>"
print "<td style=\"font-size:12px;text-align:right;vertical-align:top;\">"+element+" |</td><td style=\"font-size:12px;vertical-align:top;color:red;\">"+graph+"</td>"
print "</tr>"
else:
singlewords.append(element)
print "<tr><td style=\"font-size:12px;text-align:right;vertical-align:top; text-decoration:underline\"><br />other words in the text</td><td style=\"font-size:12px;\"><br />"
for word in singlewords:
print word+" |"
print "</td></tr>"
print "</table>"
#-------------------------------------------------------------------------------
max_value=0
smallest_size=8
biggest_size=100
for word in freq:
if freq[word]>max_value:
max_value=freq[word]
list_of_freq=[]
l=[]
for w,n in freq.items():
l.append((n,w))
l.sort()
l.reverse()
ll={}
cl=[]
for n,w in l:
#print n,w
if n not in ll:
ll[n]=[w]
else:
ll[n].append(w)
#pprint(ll)
numbs=ll.keys()
numbs.sort()
numbs.reverse()
#----------------------------------------------------------------------------
#section to print the first tag cloud
#print numbs
c=900
i=100
for n in numbs:
font_size=(smallest_size+((biggest_size-smallest_size)/(max_value-1))*n)
h=400-font_size*(len(ll[n])/2)
words=ll[n]
words.sort()
#print n
for w in words:
print "<span style=\"font-size:"+ str(font_size) +"px; position:fixed; top:"+str(h)+"px;left:"+str(c)+"px;\">"+w+"</span>"
h=h+font_size
c=c-i
#i=i+50
#---------------------------------------------------------------
#Section to print the second tag cloud
x0=0
x1=900
y0=0
y1=600
c=0
for n in numbs:
font_size=(smallest_size+((biggest_size-smallest_size)/(max_value-1))*n)
words=ll[n]
words.sort()
#print n
for w in words:
print "<span style=\"font-size:"+ str(font_size) +"px; position:fixed; top:"+str(random.randint(y0,y1))+"px;left:"+str(random.randint(x0,x1))+"px;\">"+w+"</span>"
c=c+1
"""
y0=y0-50*c
#y1=y1+20
x0=x0-50*c
#x1=x1+20
"""
#---------------------------------------------------------------
Graph
import codecs
import re
from pprint import pprint
import random
import pygraphviz
t = codecs.open("redcircle2.txt","r","utf-8").read() #return a file object and read the contents
#print t
words = re.findall(r"\b[a-z'-]+\b",t, re.I) #this pattern match each word (one or more character from a to z or ' or -) - re.I -> ingnore capital case
#print words
pairs = []
for (i, w) in enumerate(words):
if i+1<len(words):
nextword = words[i+1]
pairs.append((w, nextword))
graph={}
list_of_words=[]
for (i,w) in pairs:
if i not in graph:
graph[i]=[w]
else:
if graph[i]!=[w]:
graph[i].append(w)
g=pygraphviz.AGraph(directed=True)
for w in graph:
for f in graph[w]:
g.add_edge(w,f, color='red')
g.draw('graph0.png', prog="circo")