Wikipedia-Counter

From XPUB & Lens-Based wiki

A script that can count how many edits there have been at one day at one particular Wikipedia page. The script can only count 500 edits starting with the page of a date you can choose by adjust the ID of the page.

<source lang="python">

  1. !/usr/bin/python

import sys, json, urllib2, os import re from pprint import pprint import html5lib, lxml, lxml.cssselect import urllib2, datetime

def getCSS (url, selector):

   htmlparser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder("lxml"), namespaceHTMLElements=False)
   request = urllib2.Request(url)
   request.add_header("User-Agent", "Mozilla/5.0 (X11; U; Linux x86_64; fr; rv:1.9.1.5) Gecko/20091109 Ubuntu/9.10 (karmic) Firefox/3.5.5")
   f=urllib2.urlopen(request)

   page = htmlparser.parse(f)
   selector = lxml.cssselect.CSSSelector(selector)
   return list(selector(page))


f=urllib2.urlopen("http://en.wikipedia.org/w/api.php?&action=query&titles=PAGE-TITLE&rvstartid=START-ID&prop=revisions&rvlimit=500&format=json")

data = json.load(f)

for r in data['query']['pages']["PAGE-ID"]["revisions"]:

   revid = r["revid"]
   parentid = r["parentid"]
   timestamp = r["timestamp"]
   
   print timestamp[0:4],timestamp[5:7],timestamp[8:10]
   dtt = datetime.date(int(timestamp[0:4]),int(timestamp[5:7]),int(timestamp[8:10]))
   #print dtt + datetime.timedelta(days=14) 
   print revid
   print "**************************"


print "=============================="

firstday = data['query']['pages']["PAGE-ID"]["revisions"][0] timestamp1 = firstday["timestamp"] firstdate = datetime.date(int(timestamp1[0:4]),int(timestamp1[5:7]),int(timestamp1[8:10]))

lastday = data['query']['pages']["PAGE-ID"]["revisions"][499]


timestamp2 =lastday["timestamp"] lastdate = datetime.date(int(timestamp2[0:4]),int(timestamp2[5:7]),int(timestamp2[8:10]))


currentdate = firstdate while currentdate != (lastdate-datetime.timedelta(days=1)):

   print currentdate
   counter = 0
   for r in data['query']['pages']["PAGE-ID"]["revisions"]:
       timestamp = r["timestamp"]
       dtt = datetime.date(int(timestamp[0:4]),int(timestamp[5:7]),int(timestamp[8:10]))
       if (dtt==currentdate):
           counter = counter+1
   currentdate = currentdate-datetime.timedelta(days=1)
   print counter