User:Angeliki/Prototyping 3

!/usr/bin/env python3

import cgi
import cgitb; cgitb.enable()  #for seeing at errors in the code
import pandas as pd 
import csv
import sys, os, re, nltk, glob


print ("Content-type:text/html;charset=utf-")
print ()
output= "type here"
extract= ""
f = cgi.FieldStorage()
text = f.getvalue("text", "")

read csv, and split on "," the line

csv_file = csv.reader(open('tfidf.csv', "r"), delimiter=",")

Read the header, put all labels into a list

header = next(csv_file)

print(header)loop through csv list

for row in csv_file :

   #if current rows 2nd value is equal to input, print that row
   if text == row[0]:
        scores = row
        #print(row)

Zip the scores and the labels, remove the first item with the query

result = list(zip(header, scores))
del result[0]

print(result)Sort the results

output = sorted(result, key=lambda tup: tup[1], reverse=True)
extract=[]
os.chdir("texts")
for file in glob.glob("*.txt"):

   # print(file)
   searchfile = open(file, "r")
   sents = nltk.sent_tokenize(searchfile.read())
   for sentence in sents:
       if re.search(r'\b({})\b'.format(text), sentence):
           extract.append(sentence)
   searchfile.close()



print ("""
<!DOCTYPE html>
<html>
<head>
	<title></title>
	<meta charset="utf-8">
</head>
<body>
<form method="get">
	<input type="submit" name="submit" value="search" >

   <textarea name="text"> {2} </textarea>

	<textarea name="result">  </textarea>

   {1}


   {0}

</form>
</body>
</html>""".format(output, extract, text))

i=0
for i>=0:
print (output[i])
print (extract[i])
i++1

print (output[0]) print (extract[0]) print (output[1]) print (extract[1])