User:Luisa Moura/prototyping/python/search
< User:Luisa Moura | prototyping/python
Revision as of 13:34, 9 April 2014 by Luisa Moura (talk | contribs) (Created page with "-- SEARCH ENGINE -- The code bellow allows a comparative research between two texts through the choice of one or two keywords. The code will grab in each text a few lines con...")
-- SEARCH ENGINE --
The code bellow allows a comparative research between two texts through the choice of one or two keywords. The code will grab in each text a few lines containing the words (five before and five after) and will set those paragraphs, from both sources, next to each other. The objective is to create a shortcut into the authors approach around certain words or expressions.
#!/usr/bin/env python
f = open("machiavelli.txt")
g = open("more.txt")
lines = f.readlines()
lines_2 = g.readlines()
numlines = len(lines)
numlines_2 = len(lines_2)
import cgi, cgitb
hits = set()
hits_2 = set()
# check accessing CGI Form Data
form = cgi.FieldStorage()
if form.has_key("q1"):
v1 = form.getvalue("q1")
else:
v1 = ""
if form.has_key("q2"):
v2 = form.getvalue("q2")
else:
v2 = ""
print "Content-type: text/html;charset=utf-8"
print
print """
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title>RESEARCH</title>
<link rel="stylesheet" type="text/css" href="../styles.css">
</head>
<body onload="document.getElementById('q').focus()">
<form action="/cgi-bin/machiavelli.py" class="top">
<input type="submit" name="s" value="Search" />
<input type="text" id="q1" name="q1" /><br />
<input type="text" id="q2" name="q2" /><br />
</form>
"""
for n in range(0, numlines-5):
block = ""
for line in lines[n:n+5]:
block += line
if v1 in block and v2 in block:
for n in range(n, n+5):
hits.add(n)
lastline = 0
for x in range(0, numlines_2-5):
block_2 = ""
for line_2 in lines_2[x:x+5]:
block_2 += line_2
if v1 in block_2 and v2 in block_2:
for x in range(x, x+5):
hits_2.add(x)
lastline_2 = 0
print """
<div class="container">
<div class="title_A">
<h1>
the prince - machiavelli
</h1>
</div>
<div class="coluna_A">
"""
for n in range(0, numlines):
if n in hits:
line = lines[n].strip()
if n > lastline + 1:
print "========================="
print "<br>"
print line.replace(v1, v1.upper() ).replace(v2, v2.upper() )
lastline = n
print "<br>"
print """
</div>
</div>
<div class="container">
<div class="title_B">
<h1>
utopia - thomas more
</h1>
</div>
<div class="coluna_B">
"""
for x in range(0, numlines_2):
if x in hits_2:
line_2 = lines_2[x].strip()
if x > lastline_2 + 1:
print "========================="
print "<br>"
print line_2.replace(v1, v1.upper() ).replace(v2, v2.upper() )
lastline_2 = x
print "<br>"
print """
</div>
</div>
<div class="limit_float">
</body>
</html>
"""
# special thanks to Lucia