User:Alexander Roidl/get cover

From Media Design: Networked & Lens-Based wiki
Jump to navigation Jump to search

Python script to extract the first page of a PDF

import PyPDF2
from wand.image import Image
import io
import os


def pdf_page_to_png(src_pdf, pagenum = 0, resolution = 72,):
    dst_pdf = PyPDF2.PdfFileWriter()
    dst_pdf.addPage(src_pdf.getPage(pagenum))

    pdf_bytes = io.BytesIO()
    dst_pdf.write(pdf_bytes)
    pdf_bytes.seek(0)

    img = Image(file = pdf_bytes, resolution = resolution)
    img.convert("png")

    return img


def get_cover(file_path, filename):
    # Main
    # ====
    print(file_path)
    src_filename = file_path

    src_pdf = PyPDF2.PdfFileReader(open(src_filename, "rb"))

    # What follows is a lookup table of page numbers within sample_log.pdf and the corresponding filenames.
    pages = [{"pagenum": 0,  "filename": filename}]

    # Convert each page to a png image.
    for page in pages:
        big_filename = "app/uploads/cover/"+page["filename"] + "_cover.png"
        small_filename = "app/uploads/cover/"+page["filename"] + "cover_small" + ".png"

        img = pdf_page_to_png(src_pdf, pagenum = page["pagenum"], resolution = 200)
        img.save(filename = big_filename)

        # Ensmallen
        img.transform("", "200")
        img.save(filename = small_filename)

    return page["filename"] + "_cover.png"