|
|
Line 1: |
Line 1: |
| -- SEARCH ENGINE --
| |
|
| |
|
| The code bellow allows a comparative research between two texts through the choice of one or two keywords. The code will grab in each text a few lines containing the words (five before and five after) and will set those paragraphs, from both sources, next to each other. The objective is to create a shortcut into the authors approach around certain words or expressions.
| |
|
| |
| <source lang="python">
| |
|
| |
| #!/usr/bin/env python
| |
| f = open("machiavelli.txt")
| |
| g = open("more.txt")
| |
| lines = f.readlines()
| |
| lines_2 = g.readlines()
| |
| numlines = len(lines)
| |
| numlines_2 = len(lines_2)
| |
| import cgi, cgitb
| |
| hits = set()
| |
| hits_2 = set()
| |
| # check accessing CGI Form Data
| |
| form = cgi.FieldStorage()
| |
| if form.has_key("q1"):
| |
| v1 = form.getvalue("q1")
| |
| else:
| |
| v1 = ""
| |
| if form.has_key("q2"):
| |
| v2 = form.getvalue("q2")
| |
| else:
| |
| v2 = ""
| |
| print "Content-type: text/html;charset=utf-8"
| |
| print
| |
| print """
| |
| <!DOCTYPE html>
| |
| <html>
| |
| <head>
| |
| <meta charset="utf-8" />
| |
| <title>RESEARCH</title>
| |
| <link rel="stylesheet" type="text/css" href="../styles.css">
| |
| </head>
| |
| <body onload="document.getElementById('q').focus()">
| |
| <form action="/cgi-bin/machiavelli.py" class="top">
| |
| <input type="submit" name="s" value="Search" />
| |
| <input type="text" id="q1" name="q1" /><br />
| |
| <input type="text" id="q2" name="q2" /><br />
| |
| </form>
| |
| """
| |
| for n in range(0, numlines-5):
| |
| block = ""
| |
| for line in lines[n:n+5]:
| |
| block += line
| |
| if v1 in block and v2 in block:
| |
| for n in range(n, n+5):
| |
| hits.add(n)
| |
| lastline = 0
| |
| for x in range(0, numlines_2-5):
| |
| block_2 = ""
| |
| for line_2 in lines_2[x:x+5]:
| |
| block_2 += line_2
| |
| if v1 in block_2 and v2 in block_2:
| |
| for x in range(x, x+5):
| |
| hits_2.add(x)
| |
| lastline_2 = 0
| |
| print """
| |
| <div class="container">
| |
| <div class="title_A">
| |
| <h1>
| |
| the prince - machiavelli
| |
| </h1>
| |
| </div>
| |
| <div class="coluna_A">
| |
| """
| |
| for n in range(0, numlines):
| |
| if n in hits:
| |
| line = lines[n].strip()
| |
| if n > lastline + 1:
| |
| print "========================="
| |
| print "<br>"
| |
| print line.replace(v1, v1.upper() ).replace(v2, v2.upper() )
| |
| lastline = n
| |
| print "<br>"
| |
| print """
| |
| </div>
| |
| </div>
| |
| <div class="container">
| |
| <div class="title_B">
| |
| <h1>
| |
| utopia - thomas more
| |
| </h1>
| |
| </div>
| |
| <div class="coluna_B">
| |
| """
| |
| for x in range(0, numlines_2):
| |
| if x in hits_2:
| |
| line_2 = lines_2[x].strip()
| |
| if x > lastline_2 + 1:
| |
| print "========================="
| |
| print "<br>"
| |
| print line_2.replace(v1, v1.upper() ).replace(v2, v2.upper() )
| |
| lastline_2 = x
| |
| print "<br>"
| |
| print """
| |
| </div>
| |
| </div>
| |
| <div class="limit_float">
| |
| </body>
| |
| </html>
| |
| """
| |
| # special thanks to Lucia
| |
| </source>
| |
|
| |
| [[File:Search engine 01.jpg|framed|left]]
| |