User:Jasper van Loenen/Prototyping/wikistats: Difference between revisions

From XPUB & Lens-Based wiki
No edit summary
No edit summary
 
(5 intermediate revisions by the same user not shown)
Line 1: Line 1:
__NOINDEX__
==Wikistats==
==Wikistats==
===Update: Check the animated version [http://pzwart3.wdka.hro.nl/~jvloenen/cgi-bin/feed/stats.cgi?frame=0 HERE!]===


This script keeps track of all changes on the wiki and generates a list of most active users.
This script keeps track of all changes on the wiki and generates a list of most active users.
The script uses python to read the wiki rss feed, saves the changes to a SQLite database and writes the stats to a .txt file which can be found [http://pzwart3.wdka.hro.nl/~jvloenen/stats.txt here]. This file is automatically updated.
The script uses python to read the wiki rss feed, saves the changes to a SQLite database and writes the stats to a .txt file which can be found [http://pzwart3.wdka.hro.nl/~jvloenen/stats.txt here]. This file is automatically updated using a cron job.


===RSS parser===
===RSS parser===
Line 26: Line 29:
doc = lxml.etree.parse(f)     
doc = lxml.etree.parse(f)     


#iterate trough the updates
for item in doc.xpath("/rss/channel/item"):
for item in doc.xpath("/rss/channel/item"):
     title = item.xpath("title/text()")[0]
     title = item.xpath("title/text()")[0]
Line 32: Line 36:
     pubDate = item.xpath("pubDate/text()")[0]
     pubDate = item.xpath("pubDate/text()")[0]
     userName = item.xpath("*[local-name()='creator']/text()")[0]
     userName = item.xpath("*[local-name()='creator']/text()")[0]
    #check if the user already exists in the database, if not, create
     cursor.execute("SELECT id FROM wiki_users WHERE user_name = ?", [userName])
     cursor.execute("SELECT id FROM wiki_users WHERE user_name = ?", [userName])
     data=cursor.fetchall()
     data=cursor.fetchall()
Line 49: Line 54:
         new_update_amount = data[0][1]+descriptionLength     
         new_update_amount = data[0][1]+descriptionLength     


    #check if the update has already been tracked, if not, save
     cursor.execute("SELECT id FROM wiki_updates WHERE user_id = ? AND update_date = ?", (userId, pubDate))
     cursor.execute("SELECT id FROM wiki_updates WHERE user_id = ? AND update_date = ?", (userId, pubDate))
     data=cursor.fetchall()
     data=cursor.fetchall()
Line 68: Line 74:
cursor = connection.cursor()
cursor = connection.cursor()


cursor.execute("SELECT update_times FROM wiki_users ORDER BY update_times DESC LIMIT 1")
#grab and print the stats
data=cursor.fetchall()
 
cursor.execute("SELECT DISTINCT id, user_name, update_times, update_amount FROM wiki_users ORDER BY update_times DESC")
cursor.execute("SELECT DISTINCT id, user_name, update_times, update_amount FROM wiki_users ORDER BY update_times DESC")
data=cursor.fetchall()
data=cursor.fetchall()

Latest revision as of 02:31, 14 March 2012

Wikistats

Update: Check the animated version HERE!

This script keeps track of all changes on the wiki and generates a list of most active users. The script uses python to read the wiki rss feed, saves the changes to a SQLite database and writes the stats to a .txt file which can be found here. This file is automatically updated using a cron job.

RSS parser

import urllib2
import lxml.etree
from sqlite3 import *

#initate mysqlite database
connection = connect('/home/jvloenen/public_html/cgi-bin/feed/wikitracker.db')
cursor = connection.cursor()
#create tables - run once
#cursor.execute('''CREATE TABLE wiki_users (id integer primary key, user_name text, update_times integer, update_amount integer)''')
#cursor.execute('''CREATE TABLE wiki_updates (id integer primary key, user_id integer, update_date text)''')
#connection.commit()

rssFeedUrl = "http://pzwart3.wdka.hro.nl/mediawiki/index.php?title=Special:RecentChanges&feed=rss"

#open rss feed
print "Getting feed"
f = urllib2.urlopen(rssFeedUrl)
print "Parsing"
doc = lxml.etree.parse(f)    

#iterate trough the updates
for item in doc.xpath("/rss/channel/item"):
    title = item.xpath("title/text()")[0]
    description = item.xpath("description/text()")[0]
    descriptionLength = len(description)
    pubDate = item.xpath("pubDate/text()")[0]
    userName = item.xpath("*[local-name()='creator']/text()")[0]
    #check if the user already exists in the database, if not, create
    cursor.execute("SELECT id FROM wiki_users WHERE user_name = ?", [userName])
    data=cursor.fetchall()
    if len(data)==0:
        print('There is no user %s, so he/she will be added'%userName)
        cursor.execute("INSERT INTO wiki_users (user_name,  update_times, update_amount) values (?, ?, ?)", (userName, 1, descriptionLength))
        cursor.execute("SELECT last_insert_rowid() FROM wiki_users")
        data=cursor.fetchall() #overrules the previous value of 'data'
        userId = data[0][0]
        new_update_time = 1
        new_update_amount = descriptionLength
    else:
        userId = data[0][0]
        cursor.execute("SELECT update_times, update_amount FROM wiki_users WHERE id = ?", ([userId]))
        data=cursor.fetchall()
        new_update_time = data[0][0]+1
        new_update_amount = data[0][1]+descriptionLength    

    #check if the update has already been tracked, if not, save
    cursor.execute("SELECT id FROM wiki_updates WHERE user_id = ? AND update_date = ?", (userId, pubDate))
    data=cursor.fetchall()
    if len(data)==0:
        print "Saving new entry for user "+str(userId)
        cursor.execute("INSERT INTO wiki_updates (user_id, update_date) values (?, ?)", (userId, pubDate))
        cursor.execute("UPDATE wiki_users SET update_times =?, update_amount = ? WHERE id = ?", (new_update_time, new_update_amount, userId))
        connection.commit()

print "Done"

Stats retreiver

from sqlite3 import *

#initate mysqlite database
connection = connect('/home/jvloenen/public_html/cgi-bin/feed/wikitracker.db')
cursor = connection.cursor()

#grab and print the stats
cursor.execute("SELECT DISTINCT id, user_name, update_times, update_amount FROM wiki_users ORDER BY update_times DESC")
data=cursor.fetchall()
if len(data)!=0:
    userNumber = 0
    for user in data:
	printText = ""
	for i in range(user[2]):
		printText = printText + "*"
	print "%20s %0s" % (user[1], printText)