Latest revision as of 19:27, 16 June 2020

STEPS

Republishing is separated into 6 steps:

1. Move the book from the webserver to a work directory

1.1 Replacing all spaces with underscores

2. Creating the watermark from the gathered form in Tactical Watermarks

2.1 Create the watermark in pdf with reportlab

2.2 Convert to a png

3. Append the watermark to the pdf

3.1 Burst the pdf cover

3.2 Rotate the watermark with PIL

3.3 Overlay the watermark with PIL

3.4 OCR the new cover

3.5 Resize the OCRed cover to fit the book

3.6 Merge the cover and the pdf into one

4. OCR the pdf if not OCRed already
5. Delete all the unwanted traces
6. Save the file in a directory open to Library Genesis Staff

FLOW

RUN.SH

To activate the stream I use ./run.sh

for i in {1..5}
do
  ./movebookfolder.sh
  ./watermarkformtxt.sh
  ./appendwatermarktopdf.sh
  ./deletetraces.sh
  ./republish.sh
done

1. Moving the book from the webserver to a work place

cd /home/psc/tacticalbooks/republish/
for name in *; do mv "$name" "${name// /_}"; done
cd `ls -td -- /home/psc/tacticalbooks/republish/* | head -n 1`
for name in *; do mv "$name" "${name// /_}"; done
mv `ls -td -- /home/psc/tacticalbooks/republish/* | head -n 1` /home/psc/scripts/autorepublish/inprogress

2. Creating the watermark from the gathered form in Tactical Watermarks

Bash

cd `ls -td -- /home/psc/scripts/autorepublish/inprogress/* | head -n 1`
mv `ls -td -- *.txt | head -n 1` watermark.txt
mv `ls -td -- *.txt | head -n 1` /home/psc/scripts/autorepublish/watermark
cd /home/psc/scripts/autorepublish/watermark
python3 watermark.py
rm watermark.txt
convert -density 300 -trim watermark.pdf -quality 100 watermark.png
mv /home/psc/scripts/autorepublish/watermark/watermark.png `ls -td -- /home/psc/scripts/autorepublish/inprogress/* | head -n 1`

Python watermark.py

from reportlab.pdfgen import canvas
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase import pdfmetrics
from reportlab.lib import colors
from reportlab.lib.colors import pink, green, brown, white, black
import textwrap
from textwrap import wrap
from reportlab.lib.units import inch
from reportlab.lib.pagesizes import letter
from reportlab.lib.pagesizes import A4
from reportlab.lib.units import inch
from reportlab.lib.units import cm
from reportlab.lib.colors import HexColor
import datetime
from reportlab.lib.utils import ImageReader
import ast

with open('watermark.txt', 'r') as f:
    watermark = ast.literal_eval(f.read())

#The Ruler
def DrawTheRuler(pdf):
    pdf.drawString(30,960, '|')
    pdf.drawString(60,960, '|')
    pdf.drawString(90,960, '|')
    pdf.drawString(120,960, '|')
    pdf.drawString(150,960, '|')
    pdf.drawString(180,960, '|')
    pdf.drawString(210,960, '|')
    pdf.drawString(240,960, '|')
    pdf.drawString(270,960, '|')
    pdf.drawString(300,960, '|')
    pdf.drawString(330,960, '|')
    pdf.drawString(360,960, '|')
    pdf.drawString(390,960, '|')
    pdf.drawString(420,960, '|')
    pdf.drawString(450,960, '|')
    pdf.drawString(480,960, '|')
    pdf.drawString(510,960, '|')
    pdf.drawString(540,960, '|')
    pdf.drawString(570,960, '|')
    pdf.drawString(600,960, '|')
    pdf.drawString(630,960, '|')

    pdf.drawString(10,940, '—')
    pdf.drawString(10,910, '—')
    pdf.drawString(10,880, '—')
    pdf.drawString(10,850, '—')
    pdf.drawString(10,820, '—')
    pdf.drawString(10,790, '—')
    pdf.drawString(10,760, '—')
    pdf.drawString(10,730, '—')
    pdf.drawString(10,700, '—')
    pdf.drawString(10,670, '—')
    pdf.drawString(10,640, '—')
    pdf.drawString(10,610, '—')
    pdf.drawString(10,580, '—')
    pdf.drawString(10,550, '—')
    pdf.drawString(10,520, '—')
    pdf.drawString(10,490, '—')
    pdf.drawString(10,460, '—')
    pdf.drawString(10,430, '—')
    pdf.drawString(10,400, '—')
    pdf.drawString(10,370, '—')
    pdf.drawString(10,340, '—')
    pdf.drawString(10,310, '—')
    pdf.drawString(10,280, '—')
    pdf.drawString(10,250, '—')
    pdf.drawString(10,220, '—')
    pdf.drawString(10,190, '—')
    pdf.drawString(10,160, '—')
    pdf.drawString(10,130, '—')
    pdf.drawString(10,100, '—')
    pdf.drawString(10,70, '—')
    pdf.drawString(10,40, '—')

# Unchanged
fileName = "watermark.pdf"
documentTitle = "TACTICAL WATERMARKS"
title = "TACTICAL WATERMARKS"
subTitle = "REPUBLISHED THROUGH"

#Create the file
pdf = canvas.Canvas(fileName)

#Set the background
pdf.setFillColor(HexColor(0xceff00))
pdf.rect(0,0,660, 1000,fill=1)

#Change color back to black
pdf.setFillColorRGB(0,0,0)

##Draw the Ruler
DrawTheRuler(pdf)

# Set the title
pdf.setTitle(documentTitle)
#Set the Height and Width
pdf.setPageSize((660, 1000))

#DRAW LOGO
logojump = ImageReader('logojump.png')
pdf.drawImage(logojump, 560, 10, width=(300/3),height=(300/3), mask='auto')

logostar = ImageReader('logostar.png')
pdf.drawImage(logostar, 100, 400, width=(300),height=(300), mask='auto')

#TITLE

# Register a new font
pdfmetrics.registerFont(
    TTFont('header', 'LyonJeanTrue.ttf')
)

# Draw
pdf.setFont('header', 45)
pdf.drawString(40, 30, title)

#SUBTITLE

# Register a new font for the subtitle
pdfmetrics.registerFont(
    TTFont('subtitle', 'Favorit_Medium.ttf')
)

# Draw
pdf.setFont('subtitle', 14)
pdf.setFillColorRGB(0, 0, 0)
pdf.drawString(40, 80, subTitle)

# DATE
date = datetime.datetime.now()

# Draw
pdf.setFont('subtitle', 10)
pdf.setFillColorRGB(0, 0, 0)
pdf.drawString(30, 980, str(date))

# HEADER
header = "UPLOADERS SIGNATURE"

pdf.setFont('subtitle', 35)
pdf.setFillColorRGB(0, 0, 0)
pdf.drawString(30, 920, header)

# BODY

# Register a new font for the body
pdfmetrics.registerFont(
    TTFont('body', 'Favorit_Regular.ttf')
)

pdfmetrics.registerFont(
    TTFont('bodyitalic', 'Favorit_Regular_Italic.ttf')
)

# ID
# Q
id = "Name—Nickname—Pseudonim of the uploader"

pdf.setFont('bodyitalic', 14)
pdf.setFillColorRGB(0, 0, 0)
pdf.drawString(60, 890, id)

#A
id_answer = watermark[0]

pdf.setFont('body', 14)
pdf.setFillColorRGB(0, 0, 0)
pdf.drawString(30, 870, id_answer)

# DID YOU DIGITISE?
# Q
digitise = "Did you digitise the file?"

pdf.setFont('bodyitalic', 14)
pdf.setFillColorRGB(0, 0, 0)
pdf.drawString(60, 835, digitise)

#A
digitise_answer = watermark[1]

pdf.setFont('body', 14)
pdf.setFillColorRGB(0, 0, 0)
pdf.drawString(30, 815, digitise_answer)

# HOW LONG?
# Q
howlong = "How long did it take to scan?"

pdf.setFont('bodyitalic', 14)
pdf.setFillColorRGB(0, 0, 0)
pdf.drawString(60, 780, howlong)

#A
howlong_answer = watermark[2]

pdf.setFont('body', 14)
pdf.setFillColorRGB(0, 0, 0)
pdf.drawString(30, 760, howlong_answer)

# WHERE DID YOU FIND IT?
# Q
where = "Where was the source found?"

pdf.setFont('bodyitalic', 14)
pdf.setFillColorRGB(0, 0, 0)
pdf.drawString(60, 725, where)

#A
where_answer = watermark[3]

pdf.setFont('body', 14)
pdf.setFillColorRGB(0, 0, 0)
pdf.drawString(30, 705, where_answer)

#ANECDOTE

# Q
sharing = "Why are you sharing this file?"
anecdote = "You can tell an anecdote"
personal = "You can leave a personal message!"

pdf.setFont('bodyitalic', 14)
pdf.setFillColorRGB(0, 0, 0)
pdf.drawString(60, 670, sharing)
pdf.drawString(60, 655, anecdote)
pdf.drawString(60, 640, personal)

#A
strs = watermark[4]

#Wrap the lines
textLines = wrap(strs, 75)

text = pdf.beginText(40, 615)
text.setFont("body", 14)
text.setFillColor(colors.black)
for line in textLines:
    text.textLine(line)

# Draw
pdf.drawText(text)

# Save the pdf
pdf.save()

3. Append the watermark to the pdf

Bash

cd `ls -td -- /home/psc/scripts/autorepublish/inprogress/* | head -n 1`
cp `ls -td -- *.pdf | head -n 1` /home/psc/scripts/autorepublish/overlay
cp `ls -td -- *.png | head -n 1` /home/psc/scripts/autorepublish/overlay
cd /home/psc/scripts/autorepublish/overlay
mv `ls -td -- *.pdf | head -n 1` target.pdf
python3 burstcover.py
python3 rotatelogo.py
python3 overlaylogo.py
tesseract page1.png out pdf
python3 resizecover.py
gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -sOutputFile=name.pdf cover.pdf target.pdf
var1=`ls -td -- /home/psc/scripts/autorepublish/inprogress/*/*.pdf | head -n 1`
mv name.pdf $var1

3.1 Burst the pdf cover

from pdf2image import convert_from_path

dpi = 300
pdf_file = 'target.pdf'
pages = convert_from_path(pdf_file ,dpi )

page = pages[0]
page.save('page1.png'.format(0), 'PNG')

3.2 Rotate the watermark with PIL

from PIL import Image
import PIL.ImageOps

#open both the watermark
logo = Image.open('watermark.png')

#rotate the watermark
rotatedlogo = logo.rotate(10, expand=True)
rotatedlogo.save('rotated.png')

3.3 Overlay the watermark with PIL

3.4 OCR the new cover

from PIL import Image
import os

filepath = 'page1.png'
basewidth, height = Image.open(filepath).size
basenumber, ext1 = os.path.splitext(str(basewidth*0.6))
base = int(basenumber)

#rescaling the logo
finallogo = Image.open("rotated.png")
wpercent = (base/float(finallogo.size[0]))
hsize = int((float(finallogo.size[1])*float(wpercent)))
finallogo = finallogo.resize((base,hsize), Image.ANTIALIAS)

finallogo.save('rotatedwatermark.png')

background = Image.open("page1.png")
foreground = Image.open('rotatedwatermark.png')

background.paste(foreground, (40, 70), foreground.convert('RGBA'))
background.save("page1.png")

3.5 Resize the OCRed cover to fit the book

from pdfrw import PdfReader
from PIL import Image
import os
from time import sleep
from pdf2image import convert_from_path
import cv2
import numpy as np
from PyPDF2 import PdfFileReader, PdfFileWriter

pdf = PdfReader('target.pdf')
measures = pdf.pages[0].MediaBox
basenumberwidth, ext1 = os.path.splitext(measures[2])
basenumberheight, ext2 = os.path.splitext(measures[3])
basewidth = int(basenumberwidth)
baseheight = int(basenumberheight)
print(basewidth)
print(baseheight)

fileName = ("out.pdf")

pdfFile = PdfFileReader(open(fileName, 'rb'))
# Getting only first page!
newPage = pdfFile.getPage(0)

newHeight = baseheight
newWidth = basewidth

newPage.scaleTo(newWidth, newHeight)

writer = PdfFileWriter()
writer.addPage(newPage)

with open('cover.pdf', 'wb') as f:
    writer.write(f)

4. OCR the pdf if not OCRed already

5. Delete all the unwanted traces

cd /home/psc/scripts/autorepublish/watermark/
rm watermark.pdf
cd /home/psc/scripts/autorepublish/overlay/
rm watermark.png rotated.png rotatedwatermark.png target.pdf cover.pdf out.pdf
now=$(date +%d-%b-%H_%M_%S)
mv page1.png /home/psc/tacticalbooks/covers/"$now".png

6. Save the file in a directory open to Library Genesis Staff

cd `ls -td -- /home/psc/scripts/autorepublish/inprogress/* | head -n 1`
rm watermark.png
mv `ls -td -- /home/psc/scripts/autorepublish/inprogress/*/*.pdf | head -n 1` /home/libgen/books
rm -r `ls -td -- /home/psc/scripts/autorepublish/inprogress/* | head -n 1`

User:Pedro Sá Couto/TW/REPUBLISHING FLOW: Difference between revisions

Latest revision as of 19:27, 16 June 2020

Contents

STEPS

Republishing is separated into 6 steps:

FLOW

RUN.SH

To activate the stream I use ./run.sh

1. Moving the book from the webserver to a work place

2. Creating the watermark from the gathered form in Tactical Watermarks

Bash

Python watermark.py

3. Append the watermark to the pdf

Bash

3.1 Burst the pdf cover

3.2 Rotate the watermark with PIL

3.3 Overlay the watermark with PIL

3.4 OCR the new cover

3.5 Resize the OCRed cover to fit the book

4. OCR the pdf if not OCRed already

5. Delete all the unwanted traces

6. Save the file in a directory open to Library Genesis Staff

@@ Line 1: / Line 1: @@
 =STEPS=
-The process to republishing is separated into 6 steps:<br>
+====Republishing is separated into 6 steps:====
-'''1.''' Moving the book from the webserver to a work place<br>
+'''1.''' Move the book from the webserver to a work directory<br>
 :'''1.1''' Replacing all spaces with underscores<br>
 '''2.''' Creating the watermark from the gathered form in Tactical Watermarks<br>
@@ Line 7: / Line 7: @@
 :'''2.2''' Convert to a png<br>
 '''3.''' Append the watermark to the pdf<br>
-:'''3.1''' Burst the pdf into pages<br>
+:'''3.1''' Burst the pdf cover<br>
 :'''3.2''' Rotate the watermark with PIL<br>
 :'''3.3''' Overlay the watermark with PIL<br>
-:'''3.4''' Merge all images into a PDF<br>
+:'''3.4''' OCR the new cover<br>
+:'''3.5''' Resize the OCRed cover to fit the book<br>
+:'''3.6''' Merge the cover and the pdf into one<br>
 '''4.''' OCR the pdf if not OCRed already<br>
-'''5.''' Save the file in a directory open to Library Genesis Staff<br>
+'''5.''' Delete all the unwanted traces<br>
-'''6.''' Delete all the unwanted traces<br>
+'''6.''' Save the file in a directory open to Library Genesis Staff<br>
 <br>
-=RUN.SH=
+=FLOW=
+==RUN.SH==
 ====To activate the stream I use ./run.sh====
 <source lang="python">
-sudo chmod 777 *
+for i in {1..5}
-./movebookfolder.sh
+do
-./watermarkformtxt.sh
+  ./movebookfolder.sh
-./appendwatermarktopdf.sh
+  ./watermarkformtxt.sh
-./republish.sh
+  ./appendwatermarktopdf.sh
-./deletetraces.sh
+  ./deletetraces.sh
+  ./republish.sh
+done
 </source>
 <br>
+==1. Moving the book from the webserver to a work place==
+<source lang="python">
+cd /home/psc/tacticalbooks/republish/
+for name in *; do mv "$name" "${name// /_}"; done
+cd `ls -td -- /home/psc/tacticalbooks/republish/* | head -n 1`
+for name in *; do mv "$name" "${name// /_}"; done
+mv `ls -td -- /home/psc/tacticalbooks/republish/* | head -n 1` /home/psc/scripts/autorepublish/inprogress
+</source>
+<br>
+==2. Creating the watermark from the gathered form in Tactical Watermarks==
+====Bash====
+<source lang="python">
+cd `ls -td -- /home/psc/scripts/autorepublish/inprogress/* | head -n 1`
+mv `ls -td -- *.txt | head -n 1` watermark.txt
+mv `ls -td -- *.txt | head -n 1` /home/psc/scripts/autorepublish/watermark
+cd /home/psc/scripts/autorepublish/watermark
+python3 watermark.py
+rm watermark.txt
+convert -density 300 -trim watermark.pdf -quality 100 watermark.png
+mv /home/psc/scripts/autorepublish/watermark/watermark.png `ls -td -- /home/psc/scripts/autorepublish/inprogress/* | head -n 1`
+</source>
+<br>
+====Python watermark.py====
+<source lang="python">
+from reportlab.pdfgen import canvas
+from reportlab.pdfbase.ttfonts import TTFont
+from reportlab.pdfbase import pdfmetrics
+from reportlab.lib import colors
+from reportlab.lib.colors import pink, green, brown, white, black
+import textwrap
+from textwrap import wrap
+from reportlab.lib.units import inch
+from reportlab.lib.pagesizes import letter
+from reportlab.lib.pagesizes import A4
+from reportlab.lib.units import inch
+from reportlab.lib.units import cm
+from reportlab.lib.colors import HexColor
+import datetime
+from reportlab.lib.utils import ImageReader
+import ast
+with open('watermark.txt', 'r') as f:
+    watermark = ast.literal_eval(f.read())
+#The Ruler
+def DrawTheRuler(pdf):
+    pdf.drawString(30,960, '|')
+    pdf.drawString(60,960, '|')
+    pdf.drawString(90,960, '|')
+    pdf.drawString(120,960, '|')
+    pdf.drawString(150,960, '|')
+    pdf.drawString(180,960, '|')
+    pdf.drawString(210,960, '|')
+    pdf.drawString(240,960, '|')
+    pdf.drawString(270,960, '|')
+    pdf.drawString(300,960, '|')
+    pdf.drawString(330,960, '|')
+    pdf.drawString(360,960, '|')
+    pdf.drawString(390,960, '|')
+    pdf.drawString(420,960, '|')
+    pdf.drawString(450,960, '|')
+    pdf.drawString(480,960, '|')
+    pdf.drawString(510,960, '|')
+    pdf.drawString(540,960, '|')
+    pdf.drawString(570,960, '|')
+    pdf.drawString(600,960, '|')
+    pdf.drawString(630,960, '|')
+    pdf.drawString(10,940, '—')
+    pdf.drawString(10,910, '—')
+    pdf.drawString(10,880, '—')
+    pdf.drawString(10,850, '—')
+    pdf.drawString(10,820, '—')
+    pdf.drawString(10,790, '—')
+    pdf.drawString(10,760, '—')
+    pdf.drawString(10,730, '—')
+    pdf.drawString(10,700, '—')
+    pdf.drawString(10,670, '—')
+    pdf.drawString(10,640, '—')
+    pdf.drawString(10,610, '—')
+    pdf.drawString(10,580, '—')
+    pdf.drawString(10,550, '—')
+    pdf.drawString(10,520, '—')
+    pdf.drawString(10,490, '—')
+    pdf.drawString(10,460, '—')
+    pdf.drawString(10,430, '—')
+    pdf.drawString(10,400, '—')
+    pdf.drawString(10,370, '—')
+    pdf.drawString(10,340, '—')
+    pdf.drawString(10,310, '—')
+    pdf.drawString(10,280, '—')
+    pdf.drawString(10,250, '—')
+    pdf.drawString(10,220, '—')
+    pdf.drawString(10,190, '—')
+    pdf.drawString(10,160, '—')
+    pdf.drawString(10,130, '—')
+    pdf.drawString(10,100, '—')
+    pdf.drawString(10,70, '—')
+    pdf.drawString(10,40, '—')
+# Unchanged
+fileName = "watermark.pdf"
+documentTitle = "TACTICAL WATERMARKS"
+title = "TACTICAL WATERMARKS"
+subTitle = "REPUBLISHED THROUGH"
+#Create the file
+pdf = canvas.Canvas(fileName)
+#Set the background
+pdf.setFillColor(HexColor(0xceff00))
+pdf.rect(0,0,660, 1000,fill=1)
+#Change color back to black
+pdf.setFillColorRGB(0,0,0)
+##Draw the Ruler
+DrawTheRuler(pdf)
+# Set the title
+pdf.setTitle(documentTitle)
+#Set the Height and Width
+pdf.setPageSize((660, 1000))
+#DRAW LOGO
+logojump = ImageReader('logojump.png')
+pdf.drawImage(logojump, 560, 10, width=(300/3),height=(300/3), mask='auto')
+logostar = ImageReader('logostar.png')
+pdf.drawImage(logostar, 100, 400, width=(300),height=(300), mask='auto')
+#TITLE
+# Register a new font
+pdfmetrics.registerFont(
+    TTFont('header', 'LyonJeanTrue.ttf')
+)
+# Draw
+pdf.setFont('header', 45)
+pdf.drawString(40, 30, title)
+#SUBTITLE
+# Register a new font for the subtitle
+pdfmetrics.registerFont(
+    TTFont('subtitle', 'Favorit_Medium.ttf')
+)
+# Draw
+pdf.setFont('subtitle', 14)
+pdf.setFillColorRGB(0, 0, 0)
+pdf.drawString(40, 80, subTitle)
+# DATE
+date = datetime.datetime.now()
+# Draw
+pdf.setFont('subtitle', 10)
+pdf.setFillColorRGB(0, 0, 0)
+pdf.drawString(30, 980, str(date))
+# HEADER
+header = "UPLOADERS SIGNATURE"
+pdf.setFont('subtitle', 35)
+pdf.setFillColorRGB(0, 0, 0)
+pdf.drawString(30, 920, header)
+# BODY
+# Register a new font for the body
+pdfmetrics.registerFont(
+    TTFont('body', 'Favorit_Regular.ttf')
+)
+pdfmetrics.registerFont(
+    TTFont('bodyitalic', 'Favorit_Regular_Italic.ttf')
+)
+# ID
+# Q
+id = "Name—Nickname—Pseudonim of the uploader"
+pdf.setFont('bodyitalic', 14)
+pdf.setFillColorRGB(0, 0, 0)
+pdf.drawString(60, 890, id)
+#A
+id_answer = watermark[0]
+pdf.setFont('body', 14)
+pdf.setFillColorRGB(0, 0, 0)
+pdf.drawString(30, 870, id_answer)
+# DID YOU DIGITISE?
+# Q
+digitise = "Did you digitise the file?"
+pdf.setFont('bodyitalic', 14)
+pdf.setFillColorRGB(0, 0, 0)
+pdf.drawString(60, 835, digitise)
+#A
+digitise_answer = watermark[1]
+pdf.setFont('body', 14)
+pdf.setFillColorRGB(0, 0, 0)
+pdf.drawString(30, 815, digitise_answer)
-=RESULTS IN EACH STEP=
+# HOW LONG?
-'''0.''' Starting with a Paper from JSTOR<br>
+# Q
-[[File:42938075.pdf|thumb|Calibration of Watermark soil moisture sensors for soil matric potential and temperature.pdf]]
+howlong = "How long did it take to scan?"
-'''1.''' Bursting the PDF into PNGs<br>
+pdf.setFont('bodyitalic', 14)
-====PDF is seperated into pages====
+pdf.setFillColorRGB(0, 0, 0)
-<gallery>
+pdf.drawString(60, 780, howlong)
-File:wiki_page1.png
-File:wiki_page2.png
-File:wiki_page3.png
-File:wiki_page4.png
-File:wiki_page5.png
-File:wiki_page6.png
-</gallery>
-'''2.''' Overlaying the cover<br>
+#A
+howlong_answer = watermark[2]
-====The cover is overlayed and dewatermarked====
+pdf.setFont('body', 14)
-<gallery>
+pdf.setFillColorRGB(0, 0, 0)
-File:wiki_page1_water.png
+pdf.drawString(30, 760, howlong_answer)
-</gallery>
-'''3.''' Overlaying the pages<br>
+# WHERE DID YOU FIND IT?
+# Q
+where = "Where was the source found?"
-====The pages are overlayed and dewatermarked====
+pdf.setFont('bodyitalic', 14)
-<gallery>
+pdf.setFillColorRGB(0, 0, 0)
-File:wiki_page2_water.png
+pdf.drawString(60, 725, where)
-File:wiki_page3_water.png
-File:wiki_page4_water.png
-File:wiki_page5_water.png
-File:wiki_page6_water.png
-</gallery>
-'''4.''' OCR again<br>
+#A
-====You have a De-watermarked, searchable PDF====
+where_answer = watermark[3]
-[[File:42938075_dewater.pdf|thumb|De Watermarked Calibration of Watermark soil moisture sensors for soil matric potential and temperature.pdf]]
+pdf.setFont('body', 14)
+pdf.setFillColorRGB(0, 0, 0)
+pdf.drawString(30, 705, where_answer)
+#ANECDOTE
+# Q
+sharing = "Why are you sharing this file?"
+anecdote = "You can tell an anecdote"
+personal = "You can leave a personal message!"
+pdf.setFont('bodyitalic', 14)
+pdf.setFillColorRGB(0, 0, 0)
+pdf.drawString(60, 670, sharing)
+pdf.drawString(60, 655, anecdote)
+pdf.drawString(60, 640, personal)
+#A
+strs = watermark[4]
+#Wrap the lines
+textLines = wrap(strs, 75)
+text = pdf.beginText(40, 615)
+text.setFont("body", 14)
+text.setFillColor(colors.black)
+for line in textLines:
+    text.textLine(line)
+# Draw
+pdf.drawText(text)
+# Save the pdf
+pdf.save()
+</source>
 <br>
-=1. Bursting the PDF into png=
+==3. Append the watermark to the pdf==
+==Bash==
 <source lang="python">
-#Based in the code in https://iq.opengenus.org/pdf_to_image_in_python/
+cd `ls -td -- /home/psc/scripts/autorepublish/inprogress/* | head -n 1`
+cp `ls -td -- *.pdf | head -n 1` /home/psc/scripts/autorepublish/overlay
+cp `ls -td -- *.png | head -n 1` /home/psc/scripts/autorepublish/overlay
+cd /home/psc/scripts/autorepublish/overlay
+mv `ls -td -- *.pdf | head -n 1` target.pdf
+python3 burstcover.py
+python3 rotatelogo.py
+python3 overlaylogo.py
+tesseract page1.png out pdf
+python3 resizecover.py
+gs -dBATCH -dNOPAUSE -q -sDEVICE=pdfwrite -sOutputFile=name.pdf cover.pdf target.pdf
+var1=`ls -td -- /home/psc/scripts/autorepublish/inprogress/*/*.pdf | head -n 1`
+mv name.pdf $var1
+</source>
+<br>
-import pdf2image
+==3.1 Burst the pdf cover==
-from PIL import Image
+<source lang="python">
-import time
+from pdf2image import convert_from_path
-#DECLARE CONSTANTS
+dpi = 300
-PDF_PATH = "target.pdf"
+pdf_file = 'target.pdf'
-DPI = 200
+pages = convert_from_path(pdf_file ,dpi )
-FIRST_PAGE = None
-LAST_PAGE = None
-FORMAT = 'png'
-THREAD_COUNT = 1
-USERPWD = None
-USE_CROPBOX = False
-STRICT = False
-def pdftopil():
+page = pages[0]
-    #This method reads a pdf and converts it into a sequence of images
+page.save('page1.png'.format(0), 'PNG')
-    #PDF_PATH sets the path to the PDF file
+</source>
-    #dpi parameter assists in adjusting the resolution of the image
+<br>
-    #first_page parameter allows you to set a first page to be processed by pdftoppm
+==3.2 Rotate the watermark with PIL==
-    #last_page parameter allows you to set a last page to be processed by pdftoppm
+<source lang="python">
-    #fmt parameter allows to set the format of pdftoppm conversion (PpmImageFile, TIFF)
+from PIL import Image
-    #thread_count parameter allows you to set how many thread will be used for conversion.
+import PIL.ImageOps
-    #userpw parameter allows you to set a password to unlock the converted PDF
-    #use_cropbox parameter allows you to use the crop box instead of the media box when converting
-    #strict parameter allows you to catch pdftoppm syntax error with a custom type PDFSyntaxError
-    start_time = time.time()
+#open both the watermark
-    pil_images = pdf2image.convert_from_path(PDF_PATH, dpi=DPI, first_page=FIRST_PAGE, last_page=LAST_PAGE, fmt=FORMAT, thread_count=THREAD_COUNT, userpw=USERPWD, use_cropbox=USE_CROPBOX, strict=STRICT)
+logo = Image.open('watermark.png')
-    print ("Time taken : " + str(time.time() - start_time))
-    return pil_images
-def save_images(pil_images):
+#rotate the watermark
-    d = 1
+rotatedlogo = logo.rotate(10, expand=True)
-    for image in pil_images:
+rotatedlogo.save('rotated.png')
-        image.save(("split/page%d"%d) + ".png")
+</source>
-        d += 1
+<br>
+==3.3 Overlay the watermark with PIL==
-if __name__ == "__main__":
+<source lang="python">
-    pil_images = pdftopil()
-    save_images(pil_images)
 </source>
 <br>
+==3.4 OCR the new cover==
-=2. Overlaying the cover=
 <source lang="python">
 from PIL import Image
+import os
-background = Image.open("split/page1.png")
+filepath = 'page1.png'
+basewidth, height = Image.open(filepath).size
+basenumber, ext1 = os.path.splitext(str(basewidth*0.6))
+base = int(basenumber)
 #rescaling the logo
-basewidth = (background.size[0])
+finallogo = Image.open("rotated.png")
-finalcover = Image.open("cover.png")
+wpercent = (base/float(finallogo.size[0]))
-wpercent = (basewidth/float(finalcover.size[0]))
+hsize = int((float(finallogo.size[1])*float(wpercent)))
-hsize = int((float(finalcover.size[1])*float(wpercent)))
+finallogo = finallogo.resize((base,hsize), Image.ANTIALIAS)
-finalcover = finalcover.resize((basewidth,hsize), Image.ANTIALIAS)
-finalcover.save("cover_rescale.png")
+finallogo.save('rotatedwatermark.png')
-foreground = Image.open("cover_rescale.png")
+background = Image.open("page1.png")
+foreground = Image.open('rotatedwatermark.png')
-background.paste(foreground, (0, -180), foreground.convert('RGBA'))
+background.paste(foreground, (40, 70), foreground.convert('RGBA'))
-background.save("split/page1.png")
+background.save("page1.png")
 </source>
 <br>
+==3.5 Resize the OCRed cover to fit the book==
-=3. Overlaying the pages=
-====This happens through ./jstor.sh====
 <source lang="python">
+from pdfrw import PdfReader
 from PIL import Image
+import os
+from time import sleep
+from pdf2image import convert_from_path
+import cv2
+import numpy as np
+from PyPDF2 import PdfFileReader, PdfFileWriter
-base = Image.open("split/page2.png")
+pdf = PdfReader('target.pdf')
+measures = pdf.pages[0].MediaBox
+basenumberwidth, ext1 = os.path.splitext(measures[2])
+basenumberheight, ext2 = os.path.splitext(measures[3])
+basewidth = int(basenumberwidth)
+baseheight = int(basenumberheight)
+print(basewidth)
+print(baseheight)
-#rescaling the logo
+fileName = ("out.pdf")
-basewidth = (base.size[0])
-finalpage = Image.open("pages.png")
-wpercent = (basewidth/float(finalpage.size[0]))
-hsize = int((float(finalpage.size[1])*float(wpercent)))
-finalpage = finalpage.resize((basewidth,hsize), Image.ANTIALIAS)
-finalpage.save("page_rescale.png")
-foreground = Image.open("page_rescale.png")
+pdfFile = PdfFileReader(open(fileName, 'rb'))
+# Getting only first page!
+newPage = pdfFile.getPage(0)
-i = 2
+newHeight = baseheight
+newWidth = basewidth
-while True:
+newPage.scaleTo(newWidth, newHeight)
-    try:
-        background = Image.open("split/page%i.png"%i)
-        background.paste(foreground, (0, -140), foreground.convert('RGBA'))
+writer = PdfFileWriter()
-        background.save("split/page%i.png"%i)
+writer.addPage(newPage)
-        i+=1
+with open('cover.pdf', 'wb') as f:
+    writer.write(f)
+</source>
+<br>
-    except:
+==4. OCR the pdf if not OCRed already==
-        print("DID MY JOB!")
+<source lang="python">
-        break
+</source>
+<br>
+==5. Delete all the unwanted traces==
+<source lang="python">
+cd /home/psc/scripts/autorepublish/watermark/
+rm watermark.pdf
+cd /home/psc/scripts/autorepublish/overlay/
+rm watermark.png rotated.png rotatedwatermark.png target.pdf cover.pdf out.pdf
+now=$(date +%d-%b-%H_%M_%S)
+mv page1.png /home/psc/tacticalbooks/covers/"$now".png
 </source>
 <br>
-=4. OCR again=
+==6. Save the file in a directory open to Library Genesis Staff==
-====This happens through ./jstor.sh====
 <source lang="python">
-ocrmypdf `ls -td -- /Users/PSC/Desktop/JSTOR/ready/* | head -n 1` `ls -td -- /Users/PSC/Desktop/JSTOR/ready/* | head -n 1`
+cd `ls -td -- /home/psc/scripts/autorepublish/inprogress/* | head -n 1`
+rm watermark.png
+mv `ls -td -- /home/psc/scripts/autorepublish/inprogress/*/*.pdf | head -n 1` /home/libgen/books
+rm -r `ls -td -- /home/psc/scripts/autorepublish/inprogress/* | head -n 1`
 </source>
 <br>