Interfacing the law prototyping Zalan Szakacs: Difference between revisions
No edit summary |
|||
Line 9: | Line 9: | ||
[[File:Zalansearch1.png|660px|thumbnail|right]] | [[File:Zalansearch1.png|660px|thumbnail|right]] | ||
[[File:Zalansearch2.png|660px|thumbnail|right]] | [[File:Zalansearch2.png|660px|thumbnail|right]] | ||
<source lang=python> | |||
#!//usr/local/bin/python3 | |||
import cgi | |||
import cgitb; cgitb.enable() | |||
import nltk | |||
import re | |||
print ("Content-type:text/html;charset=utf-8") | |||
print () | |||
#cgi.print_environ() | |||
f = cgi.FieldStorage() | |||
submit1 = f.getvalue("submit1", "") | |||
submit2 = f.getvalue("submit2", "") | |||
text = f.getvalue("text", "") | |||
### SORTING | |||
import os | |||
import csv | |||
import string | |||
import pandas as pd | |||
import sys | |||
### SEARCHING | |||
#input keyword you want to search | |||
keyword = text | |||
print ("""<!DOCTYPE html> | |||
<html> | |||
<head> | |||
<title>Search</title> | |||
<meta charset="utf-8"> | |||
</head> | |||
<body> | |||
<p style='font-size: 20pt; font-family: Courier'>Search by keyword</p> | |||
<form method="get"> | |||
<textarea name="text" style="background: yellow; font-size: 10pt; width: 370px; height: 28px;" autofocus></textarea> | |||
<input type="submit" name="submit" value="Search" style='font-size: 9pt; height: 32px; vertical-align:top;'> | |||
</form> | |||
<p style='font-size: 9pt; font-family: Courier'> | |||
webring <br> | |||
<a href="http://145.24.204.185:8000/form.html">joca</a> | |||
<a href="http://145.24.198.145:8000/form.html">alice</a> | |||
<a href="http://145.24.246.69:8000/form.html">michael</a> | |||
<a href="http://145.24.165.175:8000/form.html">ange</a> | |||
<a href="http://145.24.254.39:8000/form.html">zalan</a> | |||
</p> | |||
</body> | |||
</html>""") | |||
x = 0 | |||
if text : | |||
#read csv, and split on "," the line | |||
csv_file = csv.reader(open('tfidf.csv', "r"), delimiter=",") | |||
col_names = next(csv_file) | |||
#loop through csv list | |||
for row in csv_file: | |||
#if current rows value is equal to input, print that row | |||
if keyword == row[0] : | |||
tfidf_list = list(zip(col_names, row)) | |||
del tfidf_list[0] | |||
sorted_by_second = sorted(tfidf_list, key=lambda x:float(x[1]), reverse=True) | |||
print ("<p></p>") | |||
print ("--------------------------------------------------------------------------------------") | |||
print ("<p style='font-size: 20pt; font-family: Courier'>Results</p>") | |||
for item in sorted_by_second: | |||
x = x+1 | |||
print ("--------------------------------------------------------------------------------------") | |||
print ("<br></br>") | |||
print(x, item) | |||
n = item[0] | |||
f = open("cgi-bin/texts/{}".format(n), "r") | |||
sents = nltk.sent_tokenize(f.read()) | |||
for sentence in sents: | |||
if re.search(r'\b({})\b'.format(text), sentence): | |||
print ("<br></br>") | |||
print(sentence) | |||
f.close() | |||
print ("<br></br>") | |||
</source> | |||
== Workshop Tunneling – Marcel Mars (memory of the world) == | == Workshop Tunneling – Marcel Mars (memory of the world) == |
Revision as of 13:26, 6 June 2018
CGI & TF-IDF search engine
Comma separated catalogue (Andre + Michael)
Workshop: – Dusan Barok (monoskop)
#!//usr/local/bin/python3
import cgi
import cgitb; cgitb.enable()
import nltk
import re
print ("Content-type:text/html;charset=utf-8")
print ()
#cgi.print_environ()
f = cgi.FieldStorage()
submit1 = f.getvalue("submit1", "")
submit2 = f.getvalue("submit2", "")
text = f.getvalue("text", "")
### SORTING
import os
import csv
import string
import pandas as pd
import sys
### SEARCHING
#input keyword you want to search
keyword = text
print ("""<!DOCTYPE html>
<html>
<head>
<title>Search</title>
<meta charset="utf-8">
</head>
<body>
<p style='font-size: 20pt; font-family: Courier'>Search by keyword</p>
<form method="get">
<textarea name="text" style="background: yellow; font-size: 10pt; width: 370px; height: 28px;" autofocus></textarea>
<input type="submit" name="submit" value="Search" style='font-size: 9pt; height: 32px; vertical-align:top;'>
</form>
<p style='font-size: 9pt; font-family: Courier'>
webring <br>
<a href="http://145.24.204.185:8000/form.html">joca</a>
<a href="http://145.24.198.145:8000/form.html">alice</a>
<a href="http://145.24.246.69:8000/form.html">michael</a>
<a href="http://145.24.165.175:8000/form.html">ange</a>
<a href="http://145.24.254.39:8000/form.html">zalan</a>
</p>
</body>
</html>""")
x = 0
if text :
#read csv, and split on "," the line
csv_file = csv.reader(open('tfidf.csv', "r"), delimiter=",")
col_names = next(csv_file)
#loop through csv list
for row in csv_file:
#if current rows value is equal to input, print that row
if keyword == row[0] :
tfidf_list = list(zip(col_names, row))
del tfidf_list[0]
sorted_by_second = sorted(tfidf_list, key=lambda x:float(x[1]), reverse=True)
print ("<p></p>")
print ("--------------------------------------------------------------------------------------")
print ("<p style='font-size: 20pt; font-family: Courier'>Results</p>")
for item in sorted_by_second:
x = x+1
print ("--------------------------------------------------------------------------------------")
print ("<br></br>")
print(x, item)
n = item[0]
f = open("cgi-bin/texts/{}".format(n), "r")
sents = nltk.sent_tokenize(f.read())
for sentence in sents:
if re.search(r'\b({})\b'.format(text), sentence):
print ("<br></br>")
print(sentence)
f.close()
print ("<br></br>")