Building a web crawler with Django

From XPUB & Lens-Based wiki
Revision as of 22:39, 14 January 2011 by Michael Murtaugh (talk | contribs) (Created page with "An example of using Django to crawl Flickr. == Models == Models are defined for a Photo, a Comment, and Exif data. models.py <source lang="python"> from django.db import model...")
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

An example of using Django to crawl Flickr.

Models

Models are defined for a Photo, a Comment, and Exif data.

models.py

from django.db import models

class Photo (models.Model):
    flickrid = models.CharField(max_length=255, blank=True)
    title = models.CharField(max_length=255, blank=True)
    description = models.TextField(blank=True)
    page_url = models.URLField(blank=True, verify_exists=False)

    # the necessary info to construct the image URLs
    server = models.CharField(max_length=255, blank=True)
    farm = models.CharField(max_length=255, blank=True)
    secret = models.CharField(max_length=255, blank=True)

    def __unicode__ (self):
        return str(self.flickrid)

    def image_url (self, code="t"):
        return "http://farm%(farm)s.static.flickr.com/%(server)s/%(id)s_%(secret)s_%(code)s.jpg" % {
            'farm': self.farm,
            'server': self.server,
            'secret': self.secret,
            'id': self.flickrid,
            'code': code
        }

    def admin_thumbnail (self):
        return '<img src="' + self.image_url(code="t") + '" />'
    admin_thumbnail.allow_tags = True

class Exif (models.Model):
    photo = models.ForeignKey(Photo, related_name="exif")
    tag = models.CharField(max_length=255)
    tagspace = models.CharField(max_length=255)
    content = models.CharField(max_length=255)
  
class Comment (models.Model):
    photo = models.ForeignKey(Photo, related_name="comments")
    authorid = models.CharField(max_length=255)
    authorname = models.CharField(max_length=255)
    body = models.TextField(blank=True)
    
    def __unicode__ (self):
        return "Comment on %s" % self.photo.flickrid

Admin

A custom admin makes the automatically generated admin views much more useful.

admin.py

from django.contrib import admin
from models import *

class PhotoAdmin(admin.ModelAdmin):
    list_display = ("flickrid", "admin_thumbnail", "title", "description", "page_url")
admin.site.register(Photo, PhotoAdmin)

class CommentAdmin(admin.ModelAdmin):
    list_display = ("photo", "body")
admin.site.register(Comment, CommentAdmin)

class ExifAdmin(admin.ModelAdmin):
    list_display = ("photo", "tagspace", "tag", "content")
    list_filter = ("tagspace", "tag", )
    search_fields = ("tag", "content", "tagspace")
admin.site.register(Exif, ExifAdmin)