User:Birgit bachler/openevents/
Description
Bonuskaart-friends is a social network that connects people according to their shopping behaviour.
Just provide the system with your bonuskaart-number and optionally your name and you will find
like-minded shoppers. Your profile does not require any further maintenance since it creates itself
completely out of the shopping data you give to albertheijn by swiping the barcode of your card
every time you do your groceries. Enjoy photo galleries of your purchased products, have a look at
your detailed shopping list and get to know your new friends.
Like any customer card the albertheijn bonuskaart is an effective tool for data mining and creating
customer profiles. The Dutch supermarket chain albertheijn with their popular bonuskaart allows
their website-visitors to view their previously done shopping just by filling in the unique number
from the back of their card. By questioning the relevance and sensitivity of this data, the social
network of Bonuskaart-friends, which is based upon the look of facebook, tries to portray possible
abuse and false conclusions about a person based on that data.
To counteract this mechanism the card number 2620496071032 is available as a sticker to paste onto
your personal card in order to conceal your shopping behaviour and share one identity with many other
shoppers.
Documentation
Source Code
ah04.py:
#!/usr/local/bin/python
import html5lib, urllib2, urlparse
import os, glob
from os import system, sys
import subprocess
import time, random
cardnumber=""
def runBash(cmd):
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
out = p.stdout.read().strip()
return out #This is the stdout from the shell command
def absolutize (href, base):
if not href.lower().startswith("http://"):
return urlparse.urljoin(base, href)
return href
def openURL (url, data):
"""
returns (page, actualurl)
sets user_agent and resolves possible redirection
realurl maybe different than url in the case of a redirect
"""
request = urllib2.Request(url)
user_agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.14) Gecko/20080418 Ubuntu/7.10 (gutsy) Firefox/2.0.0.14"
request.add_header("User-Agent", user_agent)
if "?" in url:
(url, data) = url.split("?")
if data:
# print "posting"
pagefile=urllib2.urlopen(request, data)
else:
pagefile=urllib2.urlopen(request)
realurl = pagefile.geturl()
return (pagefile, realurl)
def downloadURL (url, foldername):
"""
returns (page, actualurl)
sets user_agent and resolves possible redirection
realurl maybe different than url in the case of a redirect
"""
request = urllib2.Request(url)
user_agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.14) Gecko/20080418 Ubuntu/7.10 (gutsy) Firefox/2.0.0.14"
request.add_header("User-Agent", user_agent)
pagefile=urllib2.urlopen(request)
realurl = pagefile.geturl()
data = pagefile.read()
urlpath = urlparse.urlparse(url)[2]
(path, filename) = os.path.split(urlpath)
filename = os.path.join(foldername, filename)
out = open(filename, "wb")
out.write(data)
def topProducts():
from string import punctuation
N = 20
words = {}
words_gen = (word.strip(punctuation).lower() for line in open("alle.txt")
for word in line.split())
for word in words_gen:
words[word] = words.get(word, 0) + 1
top_words = sorted(words.iteritems(),
key=lambda(word, count): (-count, word))[:N]
for word, frequency in top_words:
alle = "%s: %d" % (word, frequency)+"\n"
print >> html2, alle+"<br>"
print "*"*50
print "*"*50
print "New Bonuskaartfriend? Otherwise just press enter. Example:"
print "2601234567890 YourName"
newCard=raw_input()
print "*"*50
print "*"*50
if newCard != "":
# read the current contents of the file
f = open("cardnumbers.txt")
text = f.read()
f.close()
# open the file again for writing
f = open("cardnumbers.txt", "w")
f.write(str(newCard)+"\n")
# write the original contents
f.write(text)
f.close()
print "*"*50
print "START"
print "*"*50
txx = open("cardnumbers.txt", "r")
for line in txx:
print "*"*50
print "Busy with Bonuskaartfriend", line
print "*"*50
url = "http://www.ah.nl/previouslybought/PreviouslyBought.do"
line = line.rstrip("\n")
line2 = line[0:13]
print line, line2
cardnumber = line2
system("mkdir "+str(cardnumber)+"")
system("mkdir "+str(cardnumber)+"/output")
#print type(cardnumber)
#f, url2 = openURL(url, "cardNumber=2620481956139")
#cardnumber = "2620474055641"
f, url = openURL(url, "cardNumber="+cardnumber)
parser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder("BeautifulSoup"))
firstPage = parser.parse(f)
# print type(firstPage)
f.close()
#fillet stands for pythonnumber of item pages
loadedURL = {}
import codecs
txt = codecs.open(str(cardnumber)+"/"+str(cardnumber)+".txt", "w", encoding="utf-8")
for a in firstPage.findAll("a", { "class" : "fillet" }):
pageurl = a["href"]
pageurl = absolutize(pageurl, url)
if not loadedURL.has_key(pageurl):
#LOAD URL...
print "PAGE ", pageurl
f2, pageurl = openURL(pageurl, None)
# print f2.read()
parser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder("BeautifulSoup"))
page = parser.parse(f2)
for img in page.findAll("img"):
# print " ", img
# src = img.getAttribute("src")
src = img["src"]
if src:
src = absolutize(src, pageurl)
#print " ",src
time.sleep(random.random())
try:
downloadURL(src, ""+str(cardnumber)+"/output")
except urllib2.URLError, msg:
print "ncfiles: Urllib2 error (%s)" % msg
except socket.error, (errno, strerror):
print "ncfiles: Socket error (%s) for host %s (%s)" % (errno, host, strerror)
for h3 in page.findAll("h3"):
value = (h3.contents[0])
if value != "Afdeling":
print >> txt, value
import codecs
f = codecs.open("alle.txt", "r", encoding="utf-8")
text = f.read()
f.close()
# open the file again for writing
f = codecs.open("alle.txt", "w", encoding="utf-8")
f.write(value+"\n")
# write the original contents
f.write(text)
f.close()
loadedURL[pageurl] = True
f.close()
f2.close()
system("mkdir "+str(cardnumber)+"/products")
system("mv "+str(cardnumber)+"/output/*_80.jpg "+str(cardnumber)+"/products")
system("montage "+str(cardnumber)+"/products/*.jpg "+str(cardnumber)+"/"+str(cardnumber)+".jpg")
system("convert -delay 100 "+str(cardnumber)+"/products/*.jpg "+str(cardnumber)+"/"+str(cardnumber)+".gif")
system("rm -r "+str(cardnumber)+"/output")
system("rm -r "+str(cardnumber)+"/products")
#system("display "+str(cardnumber)+"/"+str(cardnumber)+".jpg &")
#TRY TO GRAB ALL EXISTING BONUSCARD-DIRECTORIES FOR COMPARISON-LOOP
path = '/home/merglind/Documents/OpenEvents/try04'
html = codecs.open(str(cardnumber)+"/index.html", "w", encoding="utf-8")
friends=[]
for infile in glob.glob( os.path.join(path, '26*') ):
(head, tail) = os.path.split(infile)
#print "current file is: " + tail
compareNumber = tail
if compareNumber != cardnumber:
cmd="./compare.sh "+str(cardnumber)+"/"+str(cardnumber)+".txt "+str(compareNumber)+"/"+str(compareNumber)+".txt"
value=runBash(cmd)
value=round(float(value),2)
# print value, "% Match with bonuscard number", compareNumber
friend = value, compareNumber
#print >> txt02, friend
if value > 10.0:
friends.append(friend)
#print friends
friends.sort(reverse=True)
# import pprint
# pprint.pprint(friends)
##alle.close()
#### TOP10 products
#################HTML STARTS HERE#########################
if line[14:]!="":
nickname=line[14:]
else:
nickname=cardnumber
print >> html, "<html><head><title>"+line[14:]+"'s Profile</title>"
print >> html, """<link rel="stylesheet" href="../style.css"></head>"""
print >> html, """
<body>
<table bgcolor="#00A0E2"width="100%" height="40px "border="0">
<tr>
<td width="25%"><img src="../ah_friends_t.gif"></td>
<td><h1>
"""
print >> html, ""+cardnumber+"'s Profile</></h1></td>"
print >> html, """
</tr>
</table>
<div class="outer"><div class="inner">
"""
print >> html, "<p><h2>This is "+nickname+"</h2>"
print >> html, "<div><a href='../"+cardnumber+"/"+cardnumber+".jpg'><img src='../"+cardnumber+"/"+cardnumber+".gif'></a></div>"
print >> html, "<h3>"+nickname+"'s BEST BONUS-FRIENDS:</h3>"
print >> html, "<ul>"
for friend in friends:
card=friend[1]
score="%0.2f" % friend[0]
if score > 10.:
print >> html, "<li><a href='../"+card+"/index.html'>"+card+"</a> "+score+"% Match</li>"
else:
print >> html, " "
print >> html, "</ul>"
print >> html, "<h3><a href='../"+cardnumber+"/"+cardnumber+".txt'>"+nickname+"'s Shopping List</a></h3>"
#print >> html, "<div><img src='../"+cardnumber+"/"+cardnumber+".jpg'></div>"
print >> html, "</div></div></body></html>"
f2.close()
txt.close()
cardnumber=""
#system("sys.stdout.flush()")
print "*"*50
print "END"
print "*"*50
import codecs
html2 = codecs.open("index.html", "w", encoding="utf-8")
print >> html2, "<html><head><title>Statistics</title>"
print >> html2, """<link rel="stylesheet" href="style.css"></head>"""
print >> html2, """
<body>
<table bgcolor="#00A0E2"width="100%" height="40px "border="0">
<tr>
<td width="25%"><img src="ah_friends_t.gif"></td>
<td><h1>
"""
print >> html2, "Top 20</></h1></td>"
print >> html2, """
</tr>
</table>
<div class="outer"><div class="inner">
"""
print >> html2, "<p><h2>Today's Top 20:</h2>"
topProducts()
compare.sh:
#!/bin/bash
TOTAL=`cat $1 | wc -l`
#echo $TOTAL
#WTWO=wc -l txt02.txt
cat $1 | sort | uniq > /tmp/txt01-uniq.txt
cat $2 | sort | uniq > /tmp/txt02-uniq.txt
cat /tmp/txt01-uniq.txt >> /tmp/txt02-uniq.txt
MATCHES=`cat /tmp/txt02-uniq.txt | sort | uniq -d | wc -l`
BFF=`echo "$MATCHES*100/$TOTAL" | bc -l`
echo "$BFF"