User:Angeliki/Prototyping 3: Difference between revisions

Revision as of 19:10, 21 June 2018

Catalogying

sorting.cgi

#!/usr/bin/env python3
import cgi
import cgitb; cgitb.enable()  #for seeing at errors in the code
import pandas as pd 
import csv
import sys, os, re, nltk, glob

print ("Content-type:text/html;charset=utf-")
print ()
output= "type here"
extract= ""

f = cgi.FieldStorage()
text = f.getvalue("text", "")

 
#read csv, and split on "," the line
csv_file = csv.reader(open('tfidf.csv', "r"), delimiter=",")
 
# Read the header, put all labels into a list
header = next(csv_file)
#print(header)
 
#loop through csv list
for row in csv_file :
    #if current rows 2nd value is equal to input, print that row
    if text == row[0]:
         scores = row
         #print(row)
 
# Zip the scores and the labels, remove the first item with the query
result = list(zip(header, scores))
del result[0]
# print(result)
 
# Sort the results
output = sorted(result, key=lambda tup: tup[1], reverse=True)

extract=[]
os.chdir("texts")
for file in glob.glob("*.txt"):
    # print(file)
    searchfile = open(file, "r")
    sents = nltk.sent_tokenize(searchfile.read())
    for sentence in sents:
        if re.search(r'\b({})\b'.format(text), sentence):
            extract.append(sentence)
    searchfile.close()

print ("""
<!DOCTYPE html>
<html>
<head>
	<title></title>
	<meta charset="utf-8">
</head>
<body>
<form method="get">
	<input type="submit" name="submit" value="search" >
    <textarea name="text"> {2} </textarea>
	<textarea name="result">  </textarea></br></br>
    {1}</br></br>
    {0}
</form>
</body>
</html>""".format(output, extract, text))
 
# i=0
# for i>=0:
#     print (output[i])
#     print (extract[i])
#     i++1

print (output[0])
print (extract[0])
print (output[1])
print (extract[1])

sorting.html

<!DOCTYPE html>
<html>
<head>
	<title></title>
	<meta charset="utf-8">
</head>
<body>
<form method="get" action="cgi-bin/sorting.cgi">
	<input type="submit" name="submit" value="search" >
	<textarea name="text"> Type here. </textarea>
	<textarea name="result"> extracts </textarea>
</form>
</body>
</html>

Most downloaded books

import csv
from collections import defaultdict, Counter
data_list = []
with open('content.csv') as csvfile:
    data_list = list(csv.reader(x.replace('\0', '') for x in csvfile))
    for col in data_list:
        print(col[0])

id_counts = Counter()
country_counts = Counter()

with open('analyze.csv') as csvfile:
    reader = csv.reader(csvfile, delimiter='\t')
    booklist_br = []
    booklist_nl = []
    booklist_gr = []
    booklist_in = []
    booklist_ge = []
    booklist_au = []
    booklist_be = []
    booklist_hu = []
    booklist_ro = []

    for row in reader:
        if row[1] == 'Brazil':
            booklist_br.append((row[0],row[2]))

        if row[1] == 'Netherlands':
            booklist_nl.append((row[0],row[2]))

        if row[1] == 'Greece':
            booklist_gr.append((row[0],row[2]))

        if row[1] == 'Indonesia':
            booklist_in.append((row[0],row[2]))

        if row[1] == 'Germany':
            booklist_ge.append((row[0],row[2]))

        if row[1] == 'Austria':
            booklist_au.append((row[0],row[2]))

        if row[1] == 'Belgium':
            booklist_be.append((row[0],row[2]))

        if row[1] == 'Hungary':
            booklist_hu.append((row[0],row[2]))

        if row[1] == 'Romania':
            booklist_ro.append((row[0],row[2]))



    sorted_booklist_br = sorted(booklist_br, key=lambda x:int(x[1]), reverse=True)
    sorted_booklist_nl = sorted(booklist_nl, key=lambda x:int(x[1]), reverse=True)
    sorted_booklist_gr = sorted(booklist_gr, key=lambda x:int(x[1]), reverse=True)
    sorted_booklist_in = sorted(booklist_in, key=lambda x:int(x[1]), reverse=True)
    sorted_booklist_ge = sorted(booklist_ge, key=lambda x:int(x[1]), reverse=True)
    sorted_booklist_au = sorted(booklist_au, key=lambda x:int(x[1]), reverse=True)
    sorted_booklist_be = sorted(booklist_be, key=lambda x:int(x[1]), reverse=True)
    sorted_booklist_hu = sorted(booklist_hu, key=lambda x:int(x[1]), reverse=True)
    sorted_booklist_ro = sorted(booklist_ro, key=lambda x:int(x[1]), reverse=True)


    print('10 most downloaded books in Brazil:')
    for ide, downloads in sorted_booklist_br[:10]:
        print(downloads+"   "+data_list[int(ide)-1][1])
    print("-----------------------------")
    print('10 most downloaded books in the Netherlands:')
    for ide, downloads in sorted_booklist_nl[:10]:
        print(downloads+"   "+data_list[int(ide)-1][1])
    print("-----------------------------")
    print('10 most downloaded books in the Greece:')
    for ide, downloads in sorted_booklist_gr[:10]:
        print(downloads+"   "+data_list[int(ide)-1][1])
    print("-----------------------------")
    print('10 most downloaded books in the Indonesia:')
    for ide, downloads in sorted_booklist_in[:10]:
        print(downloads+"   "+data_list[int(ide)-1][1])
    print("-----------------------------")
    print('10 most downloaded books in the Germany:')
    for ide, downloads in sorted_booklist_ge[:10]:
        print(downloads+"   "+data_list[int(ide)-1][1])
    print("-----------------------------")
    print('10 most downloaded books in the Austria:')
    for ide, downloads in sorted_booklist_au[:10]:
        print(downloads+"   "+data_list[int(ide)-1][1])
    print("-----------------------------")
    print('10 most downloaded books in the Belgium:')
    for ide, downloads in sorted_booklist_be[:10]:
        print(downloads+"   "+data_list[int(ide)-1][1])
    print("-----------------------------")
    print('10 most downloaded books in the Hungary:')
    for ide, downloads in sorted_booklist_hu[:10]:
        print(downloads+"   "+data_list[int(ide)-1][1])
    print("-----------------------------")
    print('10 most downloaded books in the Romania:')
    for ide, downloads in sorted_booklist_ro[:10]:
        print(downloads+"   "+data_list[int(ide)-1][1])