User:Eleanorg/2.1/Placard Generator: Difference between revisions
(Created page with "No time to make a placard for that demo? Generate one from the latest slogans courtesy of Socialist Worker. ==Code== ===Scrape them slogans=== <source lang="python"> #!/usr/...") |
No edit summary |
||
(40 intermediate revisions by 2 users not shown) | |||
Line 1: | Line 1: | ||
No time to make a placard for that demo? Generate one from the latest slogans courtesy of Socialist Worker. | No time to make a placard for that demo? Generate one from the latest slogans courtesy of Socialist Worker: <br /> | ||
http://pzwart3.wdka.hro.nl/~egreenhalgh/2.1/placards/instantPlacard.html <br /> | |||
[[file:scrapedPlacards1.jpg]] | |||
After taking this photo I asked the model, "do you agree with it?". He said: "Agree that media still peddle lies? Totally! Isn't that what media is all about?" | |||
==Code== | ==Code== | ||
Scrape slogans, print to screen and generate a .pdf in one script, using Urllib, Beautiful Soup & ReportLab. | |||
<Source lang="python"> | |||
#!/usr/bin/python | |||
#-*- coding:utf-8 -*- | |||
from reportlab.lib.pagesizes import A3 | |||
from reportlab.lib.enums import TA_CENTER # lets you use the 'ta_center' format to centre-align text | |||
from reportlab.platypus import Paragraph # allows text to be flowable | |||
from reportlab.platypus import SimpleDocTemplate # lets you set up margins, page size, filename etc. | |||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle # lets you apply custom styles to text | |||
import cgi | |||
import cgitb; cgitb.enable() | |||
from urllib import urlopen | |||
import urllib2 | |||
from BeautifulSoup import BeautifulSoup | |||
#---------- scrape page contents with urllib ------------------ # | |||
url = "http://www.socialistworker.co.uk/section.php?id=19" | |||
mySlogan = mySlogan = "Covering Up Abuse to Maintain Their System" # default old slogan in case of scraping errors | |||
from urllib2 import Request, urlopen, URLError | |||
request = Request(url) | |||
try: | |||
response = urlopen(request) | |||
except URLError, e: | |||
if hasattr(e, 'reason'): # if error ('e') has 'reason' attribute (ie if it's a URLError) | |||
# print 'There was a URLError: ' + str(e.reason) | |||
pass | |||
elif hassattr(e, 'code'): # if error has 'code' attribute (ie if it's an HTTPError) | |||
# print 'there was an HTTPError: ' + str(e.code) | |||
pass | |||
else: | |||
#print 'everything worked' | |||
response = urllib2.urlopen(request) # this is the response object (the url given) | |||
webpage = response.read() | |||
#----------- extract slogan with Beautiful Soup -------------- # | |||
soup = BeautifulSoup(webpage) | |||
# get content of the <a> tags inside <h4 class="hilihead"> | |||
headingSoup = soup.findAll('h4', { "class" : "hilihead" }) | |||
if headingSoup: | |||
slogans = [] | |||
for i in range(0,2): | |||
slogan = headingSoup[i].contents[0].contents[0] | |||
slogans.append(slogan) | |||
mySlogan = slogans[0] | |||
else: | |||
pass | |||
# use an old slogan if none can be found on the page | |||
===Scrape | upperSlogan = mySlogan.upper() # transform to uppercase | ||
###----------------print slogan---------------------## | |||
htmlHeader = """<!DOCTYPE html> | |||
<html> | |||
<head> | |||
<style type="text/css"> | |||
body {background-color: #333;} | |||
div {background-color:#fff;width:800px; margin:20px auto 40px auto; padding: 100px;border:1px solid #bbb;font-size: 900%; line-height:90%;text-transform:uppercase;font-weight:bold;text-align:center; font-family: Arial, Helvetica, sans-serif;} | |||
</style> | |||
</head> | |||
<body>""" | |||
htmlFooter = """ | |||
</body> | |||
</html>""" | |||
print "Content-Type: text/html" | |||
print | |||
print htmlHeader | |||
print "<div>" | |||
print mySlogan | |||
print "</div>" | |||
print <a href="../../../2.1/placards/placard.pdf">Download .pdf</a> | |||
print htmlFooter | |||
###---------------make the pdf----------------------### | |||
doc = SimpleDocTemplate("../../../2.1/urllib/placard.pdf", pagesize=A3, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18) | |||
#doc = SimpleDocTemplate("test.pdf", pagesize=A3, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18) | |||
# WISH LIST: anyone know how to auto-resize text w/ ReportLab to fit within 1 page? | |||
styles = getSampleStyleSheet() | |||
styles.add(ParagraphStyle(name="spaced", leading=130, alignment=TA_CENTER )) # make style called 'spaced', to set leading & alignment | |||
story = [] | |||
text = '<font size=110 fontName="Helvetica-Bold">' +upperSlogan+'</font>' | |||
#TODO how to catch errors if pdf can't be created? | |||
story.append(Paragraph(text, styles["spaced"])) | |||
doc.build(story) | |||
</source> | |||
==Initial trials== | |||
===Scrape slogans from SW=== | |||
<source lang="python"> | <source lang="python"> | ||
#!/usr/bin/python | #!/usr/bin/python | ||
Line 31: | Line 138: | ||
slogans.append(slogan) | slogans.append(slogan) | ||
print slogans | print slogans | ||
</source> | |||
===web-only implementation=== | |||
Look at it here: http://pzwart3.wdka.hro.nl/~egreenhalgh/urllib/socialistScrape1.html | |||
Scrapes slogans & displays them to the user in the browser. Will eventually have a 'print' button to generate a pdf file. | |||
.html | |||
<source lang="html4strict"> | |||
<!DOCTYPE html> | |||
<html> | |||
<head> | |||
<style type="text/css"> | |||
body {background-color: #333;} | |||
div {width:800px; margin:20px auto 40px auto; padding: 100px;border:1px solid #bbb;color: #fff; font-size: 900%; line-height:90%;text-align:center; font-family: Arial, Helvetica, sans-serif; | |||
</style> | |||
</head> | |||
<body> | |||
<div> | |||
<span style="font-size: 100px; font-weight: bold;"> | |||
Need a placard? Get one here. | |||
</span> | |||
<br /> | |||
<form action="../cgi-bin/urllib/scrapedPlacards/socialistScrape1.cgi" name="inputForm"> | |||
<input type="submit" value="Get placard!"> | |||
</form> | |||
</div> | |||
</body> | |||
</html> | |||
</source> | |||
.cgi | |||
<source lang="python"> | |||
#!/usr/bin/python | |||
#-*- coding:utf-8 -*- | |||
import cgi | |||
import cgitb; cgitb.enable() | |||
from urllib import urlopen | |||
from BeautifulSoup import BeautifulSoup | |||
url = "http://www.socialistworker.co.uk/section.php?id=19" | |||
webpage = urlopen(url).read() | |||
# parse it with Beautiful Soup to extract p tags | |||
soup = BeautifulSoup(webpage) | |||
# get content of the <a> tags inside <h4 class="hilihead"> | |||
headingSoup = soup.findAll('h4', { "class" : "hilihead" }) | |||
# print text within first two h4 tags | |||
#for i in range(0,2): | |||
# print headingSoup[i].contents[0].contents[0] | |||
# append text to a list for later use | |||
slogans = [] | |||
for i in range(0,2): | |||
slogan = headingSoup[i].contents[0].contents[0] | |||
slogans.append(slogan) | |||
# print one of them onscreen | |||
htmlHeader = """<!DOCTYPE html> | |||
<html> | |||
<head> | |||
<style type="text/css"> | |||
body {background-color: #333;} | |||
div {background-color:#fff;width:800px; margin:20px auto 40px auto; padding: 100px;border:1px solid #bbb;font-size: 900%; line-height:90%;text-transform:uppercase;font-weight:bold;text-align:center; font-family: Arial, Helvetica, sans-serif;} | |||
</style> | |||
</head> | |||
<body>""" | |||
htmlFooter = """ | |||
</body> | |||
</html>""" | |||
print "Content-Type: text/html" | |||
print | |||
print htmlHeader | |||
print "<div>" | |||
print slogans[0] | |||
print "</div>" | |||
print htmlFooter | |||
</source> | |||
==making PDFs== | |||
Anyone know a good way of making nice looking pdfs? | |||
The internets suggested making a .ps file and using Ghostscript's ps2pdf function to convert to .pdf. | |||
===generate a .ps file=== | |||
This contains instructions on fonts etc. | |||
<source lang="python"> | |||
%! | |||
/Helvetica findfont 300 scalefont setfont | |||
300 300 moveto | |||
(Hello, world!) show | |||
showpage | |||
</source> | |||
===convert to .pdf=== | |||
In bash, do something like this: | |||
<source lang="bash"> | |||
ps2pdf test.ps test.pdf | |||
</source> | |||
This also works: | |||
<source lang="bash"> | |||
find . -type f -name "*.ps" | while read ONELINE; do ps2pdf "$ONELINE" "$(echo "$ONELINE" | sed 's/.ps/.pdf/g')"; done | |||
</source> | |||
==pdf generation with ps2pdf== | |||
Go here, click link, get your custom placard: | |||
http://pzwart3.wdka.hro.nl/~egreenhalgh/cgi-bin/urllib/scrapedPlacards/socialistScrapePdf.cgi | |||
(still working out how to make the pdfs look nice) | |||
<source lang="python"> | |||
#!/usr/bin/python | |||
#-*- coding:utf-8 -*- | |||
import cgi | |||
import cgitb; cgitb.enable() | |||
from urllib import urlopen | |||
from BeautifulSoup import BeautifulSoup | |||
import os | |||
#url = "http://www.socialistworker.co.uk/section.php?id=19" | |||
#webpage = urlopen(url).read() | |||
## parse it with Beautiful Soup to extract p tags | |||
#soup = BeautifulSoup(webpage) | |||
## get content of the <a> tags inside <h4 class="hilihead"> | |||
#headingSoup = soup.findAll('h4', { "class" : "hilihead" }) | |||
## append slogan to a list | |||
#slogans = [] | |||
#for i in range(0,2): | |||
# slogan = headingSoup[i].contents[0].contents[0] | |||
# slogans.append(slogan) | |||
#mySlogan = slogans[0] | |||
mySlogan = "yo world" | |||
# make .ps with slogan in | |||
f = open("../../../urllib/placard.ps", "w") | |||
ps = """%! | |||
/Helvetica findfont 72 scalefont setfont | |||
10 600 moveto | |||
(""" + mySlogan + """) show | |||
showpage""" | |||
f.write(ps) | |||
f.close() | |||
print ps | |||
# convert to .pdf with a syscall | |||
command = "ps2pdf ../../../urllib/placard.ps ../../../urllib/placard.pdf" | |||
os.popen(command) | |||
# give user link to download pdf | |||
htmlHeader = """<!DOCTYPE html> | |||
<html> | |||
<head> | |||
<style type="text/css"> | |||
body {background-color: #333;} | |||
div {width:800px; margin:20px auto 40px auto; padding: 100px;border:1px solid #bbb;font-size: 900%; line-height:90%;text-transform:uppercase;font-weight:bold;text-align:center; font-family: Arial, Helvetica, sans-serif;} | |||
a {text-decoration: none; color: #fff;} | |||
</style> | |||
</head> | |||
<body>""" | |||
htmlFooter = """ | |||
</body> | |||
</html>""" | |||
print "Content-Type: text/html" | |||
print | |||
print htmlHeader | |||
print "<div>" | |||
print "<a href='../../../urllib/placard.pdf'>MAKE ME A PLACARD</a>" | |||
print "</div>" | |||
print htmlFooter | |||
</source> | |||
==pdf generation with ReportLab== | |||
Run this script on the command line and it will overwrite whatever is in the existing pdf file with the new content in the 'slogan' variable | |||
<source lang="python"> | |||
from reportlab.lib.pagesizes import A3 | |||
from reportlab.lib.enums import TA_CENTER # lets you use the 'ta_center' format to centre-align text | |||
from reportlab.platypus import Paragraph # allows text to be flowable | |||
from reportlab.platypus import SimpleDocTemplate # lets you set up margins, page size, filename etc. | |||
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle # lets you apply custom styles to text | |||
doc = SimpleDocTemplate("../../../urllib/test.pdf", pagesize=A3, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18) | |||
styles = getSampleStyleSheet() | |||
styles.add(ParagraphStyle(name="spaced", leading=170, alignment=TA_CENTER )) # here we make a style called 'spaced', which sets the leading and alignment of the text | |||
story = [] | |||
slogan = "put yr text here" | |||
text = '<font size=150>' +slogan+'</font>' | |||
story.append(Paragraph(text, styles["spaced"])) | |||
doc.build(story) | |||
</source> | </source> |
Latest revision as of 21:05, 9 December 2012
No time to make a placard for that demo? Generate one from the latest slogans courtesy of Socialist Worker:
http://pzwart3.wdka.hro.nl/~egreenhalgh/2.1/placards/instantPlacard.html
After taking this photo I asked the model, "do you agree with it?". He said: "Agree that media still peddle lies? Totally! Isn't that what media is all about?"
Code
Scrape slogans, print to screen and generate a .pdf in one script, using Urllib, Beautiful Soup & ReportLab.
#!/usr/bin/python
#-*- coding:utf-8 -*-
from reportlab.lib.pagesizes import A3
from reportlab.lib.enums import TA_CENTER # lets you use the 'ta_center' format to centre-align text
from reportlab.platypus import Paragraph # allows text to be flowable
from reportlab.platypus import SimpleDocTemplate # lets you set up margins, page size, filename etc.
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle # lets you apply custom styles to text
import cgi
import cgitb; cgitb.enable()
from urllib import urlopen
import urllib2
from BeautifulSoup import BeautifulSoup
#---------- scrape page contents with urllib ------------------ #
url = "http://www.socialistworker.co.uk/section.php?id=19"
mySlogan = mySlogan = "Covering Up Abuse to Maintain Their System" # default old slogan in case of scraping errors
from urllib2 import Request, urlopen, URLError
request = Request(url)
try:
response = urlopen(request)
except URLError, e:
if hasattr(e, 'reason'): # if error ('e') has 'reason' attribute (ie if it's a URLError)
# print 'There was a URLError: ' + str(e.reason)
pass
elif hassattr(e, 'code'): # if error has 'code' attribute (ie if it's an HTTPError)
# print 'there was an HTTPError: ' + str(e.code)
pass
else:
#print 'everything worked'
response = urllib2.urlopen(request) # this is the response object (the url given)
webpage = response.read()
#----------- extract slogan with Beautiful Soup -------------- #
soup = BeautifulSoup(webpage)
# get content of the <a> tags inside <h4 class="hilihead">
headingSoup = soup.findAll('h4', { "class" : "hilihead" })
if headingSoup:
slogans = []
for i in range(0,2):
slogan = headingSoup[i].contents[0].contents[0]
slogans.append(slogan)
mySlogan = slogans[0]
else:
pass
# use an old slogan if none can be found on the page
upperSlogan = mySlogan.upper() # transform to uppercase
###----------------print slogan---------------------##
htmlHeader = """<!DOCTYPE html>
<html>
<head>
<style type="text/css">
body {background-color: #333;}
div {background-color:#fff;width:800px; margin:20px auto 40px auto; padding: 100px;border:1px solid #bbb;font-size: 900%; line-height:90%;text-transform:uppercase;font-weight:bold;text-align:center; font-family: Arial, Helvetica, sans-serif;}
</style>
</head>
<body>"""
htmlFooter = """
</body>
</html>"""
print "Content-Type: text/html"
print
print htmlHeader
print "<div>"
print mySlogan
print "</div>"
print <a href="../../../2.1/placards/placard.pdf">Download .pdf</a>
print htmlFooter
###---------------make the pdf----------------------###
doc = SimpleDocTemplate("../../../2.1/urllib/placard.pdf", pagesize=A3, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18)
#doc = SimpleDocTemplate("test.pdf", pagesize=A3, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18)
# WISH LIST: anyone know how to auto-resize text w/ ReportLab to fit within 1 page?
styles = getSampleStyleSheet()
styles.add(ParagraphStyle(name="spaced", leading=130, alignment=TA_CENTER )) # make style called 'spaced', to set leading & alignment
story = []
text = '<font size=110 fontName="Helvetica-Bold">' +upperSlogan+'</font>'
#TODO how to catch errors if pdf can't be created?
story.append(Paragraph(text, styles["spaced"]))
doc.build(story)
Initial trials
Scrape slogans from SW
#!/usr/bin/python
#-*- coding:utf-8 -*-
from urllib import urlopen
from BeautifulSoup import BeautifulSoup
url = "http://www.socialistworker.co.uk/section.php?id=19"
webpage = urlopen(url).read()
# parse it with Beautiful Soup to extract p tags
soup = BeautifulSoup(webpage)
# get content of the <a> tags inside <h4 class="hilihead">
headingSoup = soup.findAll('h4', { "class" : "hilihead" })
# print text within first two h4 tags
#for i in range(0,2):
# print headingSoup[i].contents[0].contents[0]
# append slogans to a list for later use
slogans = []
for i in range(0,2):
slogan = headingSoup[i].contents[0].contents[0]
slogans.append(slogan)
print slogans
web-only implementation
Look at it here: http://pzwart3.wdka.hro.nl/~egreenhalgh/urllib/socialistScrape1.html Scrapes slogans & displays them to the user in the browser. Will eventually have a 'print' button to generate a pdf file.
.html
<!DOCTYPE html>
<html>
<head>
<style type="text/css">
body {background-color: #333;}
div {width:800px; margin:20px auto 40px auto; padding: 100px;border:1px solid #bbb;color: #fff; font-size: 900%; line-height:90%;text-align:center; font-family: Arial, Helvetica, sans-serif;
</style>
</head>
<body>
<div>
<span style="font-size: 100px; font-weight: bold;">
Need a placard? Get one here.
</span>
<br />
<form action="../cgi-bin/urllib/scrapedPlacards/socialistScrape1.cgi" name="inputForm">
<input type="submit" value="Get placard!">
</form>
</div>
</body>
</html>
.cgi
#!/usr/bin/python
#-*- coding:utf-8 -*-
import cgi
import cgitb; cgitb.enable()
from urllib import urlopen
from BeautifulSoup import BeautifulSoup
url = "http://www.socialistworker.co.uk/section.php?id=19"
webpage = urlopen(url).read()
# parse it with Beautiful Soup to extract p tags
soup = BeautifulSoup(webpage)
# get content of the <a> tags inside <h4 class="hilihead">
headingSoup = soup.findAll('h4', { "class" : "hilihead" })
# print text within first two h4 tags
#for i in range(0,2):
# print headingSoup[i].contents[0].contents[0]
# append text to a list for later use
slogans = []
for i in range(0,2):
slogan = headingSoup[i].contents[0].contents[0]
slogans.append(slogan)
# print one of them onscreen
htmlHeader = """<!DOCTYPE html>
<html>
<head>
<style type="text/css">
body {background-color: #333;}
div {background-color:#fff;width:800px; margin:20px auto 40px auto; padding: 100px;border:1px solid #bbb;font-size: 900%; line-height:90%;text-transform:uppercase;font-weight:bold;text-align:center; font-family: Arial, Helvetica, sans-serif;}
</style>
</head>
<body>"""
htmlFooter = """
</body>
</html>"""
print "Content-Type: text/html"
print
print htmlHeader
print "<div>"
print slogans[0]
print "</div>"
print htmlFooter
making PDFs
Anyone know a good way of making nice looking pdfs?
The internets suggested making a .ps file and using Ghostscript's ps2pdf function to convert to .pdf.
generate a .ps file
This contains instructions on fonts etc.
%!
/Helvetica findfont 300 scalefont setfont
300 300 moveto
(Hello, world!) show
showpage
convert to .pdf
In bash, do something like this:
ps2pdf test.ps test.pdf
This also works:
find . -type f -name "*.ps" | while read ONELINE; do ps2pdf "$ONELINE" "$(echo "$ONELINE" | sed 's/.ps/.pdf/g')"; done
pdf generation with ps2pdf
Go here, click link, get your custom placard: http://pzwart3.wdka.hro.nl/~egreenhalgh/cgi-bin/urllib/scrapedPlacards/socialistScrapePdf.cgi
(still working out how to make the pdfs look nice)
#!/usr/bin/python
#-*- coding:utf-8 -*-
import cgi
import cgitb; cgitb.enable()
from urllib import urlopen
from BeautifulSoup import BeautifulSoup
import os
#url = "http://www.socialistworker.co.uk/section.php?id=19"
#webpage = urlopen(url).read()
## parse it with Beautiful Soup to extract p tags
#soup = BeautifulSoup(webpage)
## get content of the <a> tags inside <h4 class="hilihead">
#headingSoup = soup.findAll('h4', { "class" : "hilihead" })
## append slogan to a list
#slogans = []
#for i in range(0,2):
# slogan = headingSoup[i].contents[0].contents[0]
# slogans.append(slogan)
#mySlogan = slogans[0]
mySlogan = "yo world"
# make .ps with slogan in
f = open("../../../urllib/placard.ps", "w")
ps = """%!
/Helvetica findfont 72 scalefont setfont
10 600 moveto
(""" + mySlogan + """) show
showpage"""
f.write(ps)
f.close()
print ps
# convert to .pdf with a syscall
command = "ps2pdf ../../../urllib/placard.ps ../../../urllib/placard.pdf"
os.popen(command)
# give user link to download pdf
htmlHeader = """<!DOCTYPE html>
<html>
<head>
<style type="text/css">
body {background-color: #333;}
div {width:800px; margin:20px auto 40px auto; padding: 100px;border:1px solid #bbb;font-size: 900%; line-height:90%;text-transform:uppercase;font-weight:bold;text-align:center; font-family: Arial, Helvetica, sans-serif;}
a {text-decoration: none; color: #fff;}
</style>
</head>
<body>"""
htmlFooter = """
</body>
</html>"""
print "Content-Type: text/html"
print
print htmlHeader
print "<div>"
print "<a href='../../../urllib/placard.pdf'>MAKE ME A PLACARD</a>"
print "</div>"
print htmlFooter
pdf generation with ReportLab
Run this script on the command line and it will overwrite whatever is in the existing pdf file with the new content in the 'slogan' variable
from reportlab.lib.pagesizes import A3
from reportlab.lib.enums import TA_CENTER # lets you use the 'ta_center' format to centre-align text
from reportlab.platypus import Paragraph # allows text to be flowable
from reportlab.platypus import SimpleDocTemplate # lets you set up margins, page size, filename etc.
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle # lets you apply custom styles to text
doc = SimpleDocTemplate("../../../urllib/test.pdf", pagesize=A3, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18)
styles = getSampleStyleSheet()
styles.add(ParagraphStyle(name="spaced", leading=170, alignment=TA_CENTER )) # here we make a style called 'spaced', which sets the leading and alignment of the text
story = []
slogan = "put yr text here"
text = '<font size=150>' +slogan+'</font>'
story.append(Paragraph(text, styles["spaced"]))
doc.build(story)