User:Angeliki/Prototyping 3

From XPUB & Lens-Based wiki
< User:Angeliki
Revision as of 13:33, 18 June 2018 by Angeliki (talk | contribs) (Created page with "<code> #!/usr/bin/env python3 import cgi import cgitb; cgitb.enable() #for seeing at errors in the code import pandas as pd import csv import sys, os, re, nltk, glob prin...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

  1. !/usr/bin/env python3

import cgi import cgitb; cgitb.enable() #for seeing at errors in the code import pandas as pd import csv import sys, os, re, nltk, glob


print ("Content-type:text/html;charset=utf-") print () output= "type here" extract= ""

f = cgi.FieldStorage() text = f.getvalue("text", "")


  1. read csv, and split on "," the line

csv_file = csv.reader(open('tfidf.csv', "r"), delimiter=",")

  1. Read the header, put all labels into a list

header = next(csv_file)

  1. print(header)
  1. loop through csv list

for row in csv_file :

   #if current rows 2nd value is equal to input, print that row
   if text == row[0]:
        scores = row
        #print(row)

  1. Zip the scores and the labels, remove the first item with the query

result = list(zip(header, scores)) del result[0]

  1. print(result)
  1. Sort the results

output = sorted(result, key=lambda tup: tup[1], reverse=True)

extract=[] os.chdir("texts") for file in glob.glob("*.txt"):

   # print(file)
   searchfile = open(file, "r")
   sents = nltk.sent_tokenize(searchfile.read())
   for sentence in sents:
       if re.search(r'\b({})\b'.format(text), sentence):
           extract.append(sentence)
   searchfile.close()


print (""" <!DOCTYPE html> <html> <head> <title></title> <meta charset="utf-8"> </head> <body> <form method="get"> <input type="submit" name="submit" value="search" >

   <textarea name="text"> {2} </textarea>

<textarea name="result"> </textarea>

   {1}

{0}

</form> </body> </html>""".format(output, extract, text))

  1. i=0
  2. for i>=0:
  3. print (output[i])
  4. print (extract[i])
  5. i++1

print (output[0]) print (extract[0]) print (output[1]) print (extract[1])