Wikipedia-Counter
A script that can count how many edits there have been at one day at one particular Wikipedia page. The script can only count 500 edits starting with the page of a date you can choose by adjust the ID of the page.
<source lang="python">
- !/usr/bin/python
import sys, json, urllib2, os import re from pprint import pprint import html5lib, lxml, lxml.cssselect import urllib2, datetime
def getCSS (url, selector):
htmlparser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder("lxml"), namespaceHTMLElements=False) request = urllib2.Request(url) request.add_header("User-Agent", "Mozilla/5.0 (X11; U; Linux x86_64; fr; rv:1.9.1.5) Gecko/20091109 Ubuntu/9.10 (karmic) Firefox/3.5.5") f=urllib2.urlopen(request) page = htmlparser.parse(f) selector = lxml.cssselect.CSSSelector(selector) return list(selector(page))
f=urllib2.urlopen("http://en.wikipedia.org/w/api.php?&action=query&titles=PAGE-TITLE&rvstartid=START-ID&prop=revisions&rvlimit=500&format=json")
data = json.load(f)
for r in data['query']['pages']["PAGE-ID"]["revisions"]:
revid = r["revid"] parentid = r["parentid"] timestamp = r["timestamp"] print timestamp[0:4],timestamp[5:7],timestamp[8:10] dtt = datetime.date(int(timestamp[0:4]),int(timestamp[5:7]),int(timestamp[8:10])) #print dtt + datetime.timedelta(days=14) print revid print "**************************"
print "=============================="
firstday = data['query']['pages']["PAGE-ID"]["revisions"][0] timestamp1 = firstday["timestamp"] firstdate = datetime.date(int(timestamp1[0:4]),int(timestamp1[5:7]),int(timestamp1[8:10]))
lastday = data['query']['pages']["PAGE-ID"]["revisions"][499]
timestamp2 =lastday["timestamp"]
lastdate = datetime.date(int(timestamp2[0:4]),int(timestamp2[5:7]),int(timestamp2[8:10]))
currentdate = firstdate
while currentdate != (lastdate-datetime.timedelta(days=1)):
print currentdate counter = 0 for r in data['query']['pages']["PAGE-ID"]["revisions"]:
timestamp = r["timestamp"] dtt = datetime.date(int(timestamp[0:4]),int(timestamp[5:7]),int(timestamp[8:10])) if (dtt==currentdate): counter = counter+1
currentdate = currentdate-datetime.timedelta(days=1) print counter