User:Alexander Roidl/get cover

From XPUB & Lens-Based wiki
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

Python script to extract the first page of a PDF

import PyPDF2
from wand.image import Image
import io
import os


def pdf_page_to_png(src_pdf, pagenum = 0, resolution = 72,):
    dst_pdf = PyPDF2.PdfFileWriter()
    dst_pdf.addPage(src_pdf.getPage(pagenum))

    pdf_bytes = io.BytesIO()
    dst_pdf.write(pdf_bytes)
    pdf_bytes.seek(0)

    img = Image(file = pdf_bytes, resolution = resolution)
    img.convert("png")

    return img


def get_cover(file_path, filename):
    # Main
    # ====
    print(file_path)
    src_filename = file_path

    src_pdf = PyPDF2.PdfFileReader(open(src_filename, "rb"))

    # What follows is a lookup table of page numbers within sample_log.pdf and the corresponding filenames.
    pages = [{"pagenum": 0,  "filename": filename}]

    # Convert each page to a png image.
    for page in pages:
        big_filename = "app/uploads/cover/"+page["filename"] + "_cover.png"
        small_filename = "app/uploads/cover/"+page["filename"] + "cover_small" + ".png"

        img = pdf_page_to_png(src_pdf, pagenum = page["pagenum"], resolution = 200)
        img.save(filename = big_filename)

        # Ensmallen
        img.transform("", "200")
        img.save(filename = small_filename)

    return page["filename"] + "_cover.png"