Building a web crawler with Django
Revision as of 21:39, 14 January 2011 by Michael Murtaugh (talk | contribs) (Created page with "An example of using Django to crawl Flickr. == Models == Models are defined for a Photo, a Comment, and Exif data. models.py <source lang="python"> from django.db import model...")
An example of using Django to crawl Flickr.
Models
Models are defined for a Photo, a Comment, and Exif data.
models.py
from django.db import models
class Photo (models.Model):
flickrid = models.CharField(max_length=255, blank=True)
title = models.CharField(max_length=255, blank=True)
description = models.TextField(blank=True)
page_url = models.URLField(blank=True, verify_exists=False)
# the necessary info to construct the image URLs
server = models.CharField(max_length=255, blank=True)
farm = models.CharField(max_length=255, blank=True)
secret = models.CharField(max_length=255, blank=True)
def __unicode__ (self):
return str(self.flickrid)
def image_url (self, code="t"):
return "http://farm%(farm)s.static.flickr.com/%(server)s/%(id)s_%(secret)s_%(code)s.jpg" % {
'farm': self.farm,
'server': self.server,
'secret': self.secret,
'id': self.flickrid,
'code': code
}
def admin_thumbnail (self):
return '<img src="' + self.image_url(code="t") + '" />'
admin_thumbnail.allow_tags = True
class Exif (models.Model):
photo = models.ForeignKey(Photo, related_name="exif")
tag = models.CharField(max_length=255)
tagspace = models.CharField(max_length=255)
content = models.CharField(max_length=255)
class Comment (models.Model):
photo = models.ForeignKey(Photo, related_name="comments")
authorid = models.CharField(max_length=255)
authorname = models.CharField(max_length=255)
body = models.TextField(blank=True)
def __unicode__ (self):
return "Comment on %s" % self.photo.flickrid
Admin
A custom admin makes the automatically generated admin views much more useful.
admin.py
from django.contrib import admin
from models import *
class PhotoAdmin(admin.ModelAdmin):
list_display = ("flickrid", "admin_thumbnail", "title", "description", "page_url")
admin.site.register(Photo, PhotoAdmin)
class CommentAdmin(admin.ModelAdmin):
list_display = ("photo", "body")
admin.site.register(Comment, CommentAdmin)
class ExifAdmin(admin.ModelAdmin):
list_display = ("photo", "tagspace", "tag", "content")
list_filter = ("tagspace", "tag", )
search_fields = ("tag", "content", "tagspace")
admin.site.register(Exif, ExifAdmin)