Twitter Bot: Difference between revisions

From XPUB & Lens-Based wiki
No edit summary
No edit summary
Line 68: Line 68:


[[Category:Cookbook]]
[[Category:Cookbook]]
[[Category:Python]]
[[Category:Scraping]]

Revision as of 11:51, 30 April 2012

Lots of data is available from Twitter via a public API (no specific API key required to use)

The easiest way is to load data using JSON:

import urllib2, json
url = "http://api.twitter.com/1/statuses/user_timeline.json?screen_name=" + "TRACKGent"
data = json.load(urllib2.urlopen(url))
print len(data)
tweet = data[0]
print tweet.keys()


You can also use feedparser:

import feedparser
 
url = "http://search.twitter.com/search.atom?q=feel"
feed = feedparser.parse(url)
for e in feed.entries:
    print e.title.encode("utf-8")
import feedparser
 
url = "http://search.twitter.com/search.atom?q=feel"
feed = feedparser.parse(url)
for e in feed.entries:
    for word in e.title.split():
        print word.encode("utf-8")

An older example using JSON:

from urllib import urlencode
import urllib2
import json


def openURL (url, user_agent="Mozilla/5.0 (X11; U; Linux x86_64; fr; rv:1.9.1.5) Gecko/20091109 Ubuntu/9.10 (karmic) Firefox/3.5.5"):
    """
    Returns: tuple with (file, actualurl)
    sets user_agent & follows redirection if necessary
    realurl maybe different than url in the case of a redirect
    """    
    request = urllib2.Request(url)
    if user_agent:
        request.add_header("User-Agent", user_agent)
    pagefile=urllib2.urlopen(request)
    realurl = pagefile.geturl()
    return (pagefile, realurl)

def getJSON (url):
    (f, url) = openURL(url)
    return json.loads(f.read())

TWITTER_SEARCH = "http://search.twitter.com/search.json"

data = getJSON(TWITTER_SEARCH + "?" + urlencode({'q': 'Rotterdam'}))
for r in data['results']:
    print r['text']