User:Eleanorg/2.1/Placard Generator
No time to make a placard for that demo? Generate one from the latest slogans courtesy of Socialist Worker:
http://pzwart3.wdka.hro.nl/~egreenhalgh/urllib/socialistScrape1.html
After taking this photo I asked the model, "do you agree with it?". He said: "Agree that media still peddle lies? Totally! Isn't that what media is all about?"
Code
Scrape slogans, print to screen and generate a .pdf in one script, using Urllib, Beautiful Soup & ReportLab.
#!/usr/bin/python
#-*- coding:utf-8 -*-
from reportlab.lib.pagesizes import A3
from reportlab.lib.enums import TA_CENTER # lets you use the 'ta_center' format to centre-align text
from reportlab.platypus import Paragraph # allows text to be flowable
from reportlab.platypus import SimpleDocTemplate # lets you set up margins, page size, filename etc.
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle # lets you apply custom styles to text
#import cgi
#import cgitb; cgitb.enable()
from urllib import urlopen
import urllib2
from BeautifulSoup import BeautifulSoup
#---------- scrape page contents with urllib ------------------ #
url = "http://www.socialistworker.co.uk/section.php?id=19"
mySlogan = mySlogan = "Covering Up Abuse to Maintain Their System" # default old slogan in case of scraping errors
from urllib2 import Request, urlopen, URLError
request = Request(url)
try:
response = urlopen(request)
except URLError, e:
if hasattr(e, 'reason'): # if error ('e') has 'reason' attribute (ie if it's a URLError)
# print 'There was a URLError: ' + str(e.reason)
pass
elif hassattr(e, 'code'): # if error has 'code' attribute (ie if it's an HTTPError)
# print 'there was an HTTPError: ' + str(e.code)
pass
else:
#print 'everything worked'
response = urllib2.urlopen(request) # this is the response object (the url given)
webpage = response.read()
#----------- extract slogan with Beautiful Soup -------------- #
soup = BeautifulSoup(webpage)
# get content of the <a> tags inside <h4 class="hilihead">
headingSoup = soup.findAll('h4', { "class" : "hilihead" })
if headingSoup:
slogans = []
for i in range(0,2):
slogan = headingSoup[i].contents[0].contents[0]
slogans.append(slogan)
mySlogan = slogans[0]
else:
pass
# use an old slogan if none can be found on the page
upperSlogan = mySlogan.upper() # transform to uppercase
###----------------print slogan---------------------##
htmlHeader = """<!DOCTYPE html>
<html>
<head>
<style type="text/css">
body {background-color: #333;}
div {background-color:#fff;width:800px; margin:20px auto 40px auto; padding: 100px;border:1px solid #bbb;font-size: 900%; line-height:90%;text-transform:uppercase;font-weight:bold;text-align:center; font-family: Arial, Helvetica, sans-serif;}
</style>
</head>
<body>"""
htmlFooter = """
</body>
</html>"""
print "Content-Type: text/html"
print
print htmlHeader
print "<div>"
print mySlogan
print "</div>"
print <a href="../../../2.1/placards/placard.pdf">Download .pdf</a>
print htmlFooter
###---------------make the pdf----------------------###
doc = SimpleDocTemplate("../../../2.1/urllib/placard.pdf", pagesize=A3, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18)
#doc = SimpleDocTemplate("test.pdf", pagesize=A3, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18)
# WISH LIST: anyone know how to auto-resize text w/ ReportLab to fit within 1 page?
styles = getSampleStyleSheet()
styles.add(ParagraphStyle(name="spaced", leading=130, alignment=TA_CENTER )) # make style called 'spaced', to set leading & alignment
story = []
text = '<font size=110 fontName="Helvetica-Bold">' +upperSlogan+'</font>'
#TODO how to catch errors if pdf can't be created?
story.append(Paragraph(text, styles["spaced"]))
doc.build(story)
Initial trials
Scrape slogans from SW
#!/usr/bin/python
#-*- coding:utf-8 -*-
from urllib import urlopen
from BeautifulSoup import BeautifulSoup
url = "http://www.socialistworker.co.uk/section.php?id=19"
webpage = urlopen(url).read()
# parse it with Beautiful Soup to extract p tags
soup = BeautifulSoup(webpage)
# get content of the <a> tags inside <h4 class="hilihead">
headingSoup = soup.findAll('h4', { "class" : "hilihead" })
# print text within first two h4 tags
#for i in range(0,2):
# print headingSoup[i].contents[0].contents[0]
# append slogans to a list for later use
slogans = []
for i in range(0,2):
slogan = headingSoup[i].contents[0].contents[0]
slogans.append(slogan)
print slogans
web-only implementation
Look at it here: http://pzwart3.wdka.hro.nl/~egreenhalgh/urllib/socialistScrape1.html Scrapes slogans & displays them to the user in the browser. Will eventually have a 'print' button to generate a pdf file.
.html
<!DOCTYPE html>
<html>
<head>
<style type="text/css">
body {background-color: #333;}
div {width:800px; margin:20px auto 40px auto; padding: 100px;border:1px solid #bbb;color: #fff; font-size: 900%; line-height:90%;text-align:center; font-family: Arial, Helvetica, sans-serif;
</style>
</head>
<body>
<div>
<span style="font-size: 100px; font-weight: bold;">
Need a placard? Get one here.
</span>
<br />
<form action="../cgi-bin/urllib/scrapedPlacards/socialistScrape1.cgi" name="inputForm">
<input type="submit" value="Get placard!">
</form>
</div>
</body>
</html>
.cgi
#!/usr/bin/python
#-*- coding:utf-8 -*-
import cgi
import cgitb; cgitb.enable()
from urllib import urlopen
from BeautifulSoup import BeautifulSoup
url = "http://www.socialistworker.co.uk/section.php?id=19"
webpage = urlopen(url).read()
# parse it with Beautiful Soup to extract p tags
soup = BeautifulSoup(webpage)
# get content of the <a> tags inside <h4 class="hilihead">
headingSoup = soup.findAll('h4', { "class" : "hilihead" })
# print text within first two h4 tags
#for i in range(0,2):
# print headingSoup[i].contents[0].contents[0]
# append text to a list for later use
slogans = []
for i in range(0,2):
slogan = headingSoup[i].contents[0].contents[0]
slogans.append(slogan)
# print one of them onscreen
htmlHeader = """<!DOCTYPE html>
<html>
<head>
<style type="text/css">
body {background-color: #333;}
div {background-color:#fff;width:800px; margin:20px auto 40px auto; padding: 100px;border:1px solid #bbb;font-size: 900%; line-height:90%;text-transform:uppercase;font-weight:bold;text-align:center; font-family: Arial, Helvetica, sans-serif;}
</style>
</head>
<body>"""
htmlFooter = """
</body>
</html>"""
print "Content-Type: text/html"
print
print htmlHeader
print "<div>"
print slogans[0]
print "</div>"
print htmlFooter
making PDFs
Anyone know a good way of making nice looking pdfs?
The internets suggested making a .ps file and using Ghostscript's ps2pdf function to convert to .pdf.
generate a .ps file
This contains instructions on fonts etc.
%!
/Helvetica findfont 300 scalefont setfont
300 300 moveto
(Hello, world!) show
showpage
convert to .pdf
In bash, do something like this:
ps2pdf test.ps test.pdf
This also works:
find . -type f -name "*.ps" | while read ONELINE; do ps2pdf "$ONELINE" "$(echo "$ONELINE" | sed 's/.ps/.pdf/g')"; done
pdf generation with ps2pdf
Go here, click link, get your custom placard: http://pzwart3.wdka.hro.nl/~egreenhalgh/cgi-bin/urllib/scrapedPlacards/socialistScrapePdf.cgi
(still working out how to make the pdfs look nice)
#!/usr/bin/python
#-*- coding:utf-8 -*-
import cgi
import cgitb; cgitb.enable()
from urllib import urlopen
from BeautifulSoup import BeautifulSoup
import os
#url = "http://www.socialistworker.co.uk/section.php?id=19"
#webpage = urlopen(url).read()
## parse it with Beautiful Soup to extract p tags
#soup = BeautifulSoup(webpage)
## get content of the <a> tags inside <h4 class="hilihead">
#headingSoup = soup.findAll('h4', { "class" : "hilihead" })
## append slogan to a list
#slogans = []
#for i in range(0,2):
# slogan = headingSoup[i].contents[0].contents[0]
# slogans.append(slogan)
#mySlogan = slogans[0]
mySlogan = "yo world"
# make .ps with slogan in
f = open("../../../urllib/placard.ps", "w")
ps = """%!
/Helvetica findfont 72 scalefont setfont
10 600 moveto
(""" + mySlogan + """) show
showpage"""
f.write(ps)
f.close()
print ps
# convert to .pdf with a syscall
command = "ps2pdf ../../../urllib/placard.ps ../../../urllib/placard.pdf"
os.popen(command)
# give user link to download pdf
htmlHeader = """<!DOCTYPE html>
<html>
<head>
<style type="text/css">
body {background-color: #333;}
div {width:800px; margin:20px auto 40px auto; padding: 100px;border:1px solid #bbb;font-size: 900%; line-height:90%;text-transform:uppercase;font-weight:bold;text-align:center; font-family: Arial, Helvetica, sans-serif;}
a {text-decoration: none; color: #fff;}
</style>
</head>
<body>"""
htmlFooter = """
</body>
</html>"""
print "Content-Type: text/html"
print
print htmlHeader
print "<div>"
print "<a href='../../../urllib/placard.pdf'>MAKE ME A PLACARD</a>"
print "</div>"
print htmlFooter
pdf generation with ReportLab
Run this script on the command line and it will overwrite whatever is in the existing pdf file with the new content in the 'slogan' variable
from reportlab.lib.pagesizes import A3
from reportlab.lib.enums import TA_CENTER # lets you use the 'ta_center' format to centre-align text
from reportlab.platypus import Paragraph # allows text to be flowable
from reportlab.platypus import SimpleDocTemplate # lets you set up margins, page size, filename etc.
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle # lets you apply custom styles to text
doc = SimpleDocTemplate("../../../urllib/test.pdf", pagesize=A3, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18)
styles = getSampleStyleSheet()
styles.add(ParagraphStyle(name="spaced", leading=170, alignment=TA_CENTER )) # here we make a style called 'spaced', which sets the leading and alignment of the text
story = []
slogan = "put yr text here"
text = '<font size=150>' +slogan+'</font>'
story.append(Paragraph(text, styles["spaced"]))
doc.build(story)