User:Eleanorg/2.1/Placard Generator: Difference between revisions

From XPUB & Lens-Based wiki
No edit summary
No edit summary
 
Line 1: Line 1:
No time to make a placard for that demo? Generate one from the latest slogans courtesy of Socialist Worker:  <br />
No time to make a placard for that demo? Generate one from the latest slogans courtesy of Socialist Worker:  <br />
http://pzwart3.wdka.hro.nl/~egreenhalgh/2.1/placards/instantPlacard.html
http://pzwart3.wdka.hro.nl/~egreenhalgh/2.1/placards/instantPlacard.html <br />
[[file:scrapedPlacards1.jpg]]
[[file:scrapedPlacards1.jpg]]



Latest revision as of 21:05, 9 December 2012

No time to make a placard for that demo? Generate one from the latest slogans courtesy of Socialist Worker:
http://pzwart3.wdka.hro.nl/~egreenhalgh/2.1/placards/instantPlacard.html
ScrapedPlacards1.jpg

After taking this photo I asked the model, "do you agree with it?". He said: "Agree that media still peddle lies? Totally! Isn't that what media is all about?"

Code

Scrape slogans, print to screen and generate a .pdf in one script, using Urllib, Beautiful Soup & ReportLab.

#!/usr/bin/python
#-*- coding:utf-8 -*-

from reportlab.lib.pagesizes import A3
from reportlab.lib.enums import TA_CENTER	                                              	# lets you use the 'ta_center' format to centre-align text
from reportlab.platypus import Paragraph	                                              	# allows text to be flowable
from reportlab.platypus import SimpleDocTemplate                                      		# lets you set up margins, page size, filename etc.
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle	      			# lets you apply custom styles to text

import cgi
import cgitb; cgitb.enable()
 
from urllib import urlopen
import urllib2
from BeautifulSoup import BeautifulSoup

#---------- scrape page contents with urllib ------------------ #

url = "http://www.socialistworker.co.uk/section.php?id=19"
mySlogan = mySlogan = "Covering Up Abuse to Maintain Their System" # default old slogan in case of scraping errors

from urllib2 import Request, urlopen, URLError
request = Request(url)
try:
	response = urlopen(request)
except URLError, e:
	if hasattr(e, 'reason'):		# if error ('e') has 'reason' attribute (ie if it's a URLError)
#		print 'There was a URLError: ' + str(e.reason)
		pass	
	elif hassattr(e, 'code'):		# if error has 'code' attribute (ie if it's an HTTPError)
#		print 'there was an HTTPError: ' + str(e.code)
		pass
else:
	#print 'everything worked'
	response = urllib2.urlopen(request)	# this is the response object (the url given)
	webpage = response.read()
	
#----------- extract slogan with Beautiful Soup -------------- #

soup = BeautifulSoup(webpage)
 
# get content of the <a> tags inside <h4 class="hilihead">
headingSoup = soup.findAll('h4', { "class" : "hilihead" })
if headingSoup:
	slogans = []
	for i in range(0,2):
		slogan = headingSoup[i].contents[0].contents[0]
		slogans.append(slogan)
	mySlogan = slogans[0]
else:
	pass
	# use an old slogan if none can be found on the page

upperSlogan = mySlogan.upper()	# transform to uppercase
	
###----------------print slogan---------------------##


htmlHeader = """<!DOCTYPE html>
<html>
  <head>
  	<style type="text/css">
  	body {background-color: #333;}
  	div {background-color:#fff;width:800px; margin:20px auto 40px auto; padding: 100px;border:1px solid #bbb;font-size: 900%; line-height:90%;text-transform:uppercase;font-weight:bold;text-align:center; font-family: Arial, Helvetica, sans-serif;}
  	</style>
  </head>
  <body>"""
 
htmlFooter = """
    </body>
</html>"""
 
print "Content-Type: text/html"
print
print htmlHeader
 
print "<div>"
print mySlogan
print "</div>"
 
print <a href="../../../2.1/placards/placard.pdf">Download .pdf</a>
print htmlFooter

###---------------make the pdf----------------------###

doc = SimpleDocTemplate("../../../2.1/urllib/placard.pdf", pagesize=A3, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18)
#doc = SimpleDocTemplate("test.pdf", pagesize=A3, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18)

# WISH LIST: anyone know how to auto-resize text w/ ReportLab to fit within 1 page?
styles = getSampleStyleSheet()
styles.add(ParagraphStyle(name="spaced", leading=130, alignment=TA_CENTER ))   # make style called 'spaced', to set leading & alignment
story = []
text = '<font size=110 fontName="Helvetica-Bold">' +upperSlogan+'</font>'

#TODO how to catch errors if pdf can't be created?
story.append(Paragraph(text, styles["spaced"]))
doc.build(story)

Initial trials

Scrape slogans from SW

#!/usr/bin/python
#-*- coding:utf-8 -*-

from urllib import urlopen
from BeautifulSoup import BeautifulSoup

url = "http://www.socialistworker.co.uk/section.php?id=19"
webpage = urlopen(url).read()

# parse it with Beautiful Soup to extract p tags
soup = BeautifulSoup(webpage)

# get content of the <a> tags inside <h4 class="hilihead">

headingSoup = soup.findAll('h4', { "class" : "hilihead" })

# print text within first two h4 tags
#for i in range(0,2):
#	print headingSoup[i].contents[0].contents[0]
	
# append slogans to a list for later use
slogans = []
for i in range(0,2):
	slogan = headingSoup[i].contents[0].contents[0]
	slogans.append(slogan)
print slogans

web-only implementation

Look at it here: http://pzwart3.wdka.hro.nl/~egreenhalgh/urllib/socialistScrape1.html Scrapes slogans & displays them to the user in the browser. Will eventually have a 'print' button to generate a pdf file.

.html

<!DOCTYPE html>
<html>
  <head>
  	<style type="text/css">
  		body {background-color: #333;}
  		div {width:800px; margin:20px auto 40px auto; padding: 100px;border:1px solid #bbb;color: #fff; font-size: 900%; line-height:90%;text-align:center; font-family: Arial, Helvetica, sans-serif;
  	</style>
  </head>
  <body>
  
  <div>
  	<span style="font-size: 100px; font-weight: bold;">
  		Need a placard? Get one here. 
  	</span>
  	<br />
 	<form action="../cgi-bin/urllib/scrapedPlacards/socialistScrape1.cgi" name="inputForm">  
  		<input type="submit" value="Get placard!">
	</form>
  </div>
  </body>
</html>

.cgi

#!/usr/bin/python
#-*- coding:utf-8 -*-

import cgi
import cgitb; cgitb.enable()


from urllib import urlopen
from BeautifulSoup import BeautifulSoup

url = "http://www.socialistworker.co.uk/section.php?id=19"
webpage = urlopen(url).read()

# parse it with Beautiful Soup to extract p tags
soup = BeautifulSoup(webpage)

# get content of the <a> tags inside <h4 class="hilihead">

headingSoup = soup.findAll('h4', { "class" : "hilihead" })

# print text within first two h4 tags
#for i in range(0,2):
#	print headingSoup[i].contents[0].contents[0]
	
# append text to a list for later use
slogans = []
for i in range(0,2):
	slogan = headingSoup[i].contents[0].contents[0]
	slogans.append(slogan)

# print one of them onscreen

htmlHeader = """<!DOCTYPE html>
<html>
  <head>
  	<style type="text/css">
  	body {background-color: #333;}
  	div {background-color:#fff;width:800px; margin:20px auto 40px auto; padding: 100px;border:1px solid #bbb;font-size: 900%; line-height:90%;text-transform:uppercase;font-weight:bold;text-align:center; font-family: Arial, Helvetica, sans-serif;}
  	</style>
  </head>
  <body>"""
 
htmlFooter = """
    </body>
</html>"""
 
print "Content-Type: text/html"
print
print htmlHeader

print "<div>"
print slogans[0]
print "</div>"
	
print htmlFooter

making PDFs

Anyone know a good way of making nice looking pdfs?

The internets suggested making a .ps file and using Ghostscript's ps2pdf function to convert to .pdf.

generate a .ps file

This contains instructions on fonts etc.

%!
/Helvetica findfont 300 scalefont setfont
300 300 moveto
(Hello, world!) show
showpage

convert to .pdf

In bash, do something like this:

ps2pdf test.ps test.pdf

This also works:

find . -type f -name "*.ps" | while read ONELINE; do ps2pdf "$ONELINE" "$(echo "$ONELINE" | sed 's/.ps/.pdf/g')"; done

pdf generation with ps2pdf

Go here, click link, get your custom placard: http://pzwart3.wdka.hro.nl/~egreenhalgh/cgi-bin/urllib/scrapedPlacards/socialistScrapePdf.cgi

(still working out how to make the pdfs look nice)

#!/usr/bin/python
#-*- coding:utf-8 -*-

import cgi
import cgitb; cgitb.enable()


from urllib import urlopen
from BeautifulSoup import BeautifulSoup
import os

#url = "http://www.socialistworker.co.uk/section.php?id=19"
#webpage = urlopen(url).read()

## parse it with Beautiful Soup to extract p tags
#soup = BeautifulSoup(webpage)

## get content of the <a> tags inside <h4 class="hilihead">

#headingSoup = soup.findAll('h4', { "class" : "hilihead" })

## append slogan to a list 
#slogans = []
#for i in range(0,2):
#	slogan = headingSoup[i].contents[0].contents[0]
#	slogans.append(slogan)
#mySlogan = slogans[0]
mySlogan = "yo world"

# make .ps with slogan in
f = open("../../../urllib/placard.ps", "w")
ps = """%!
/Helvetica findfont 72 scalefont setfont
10 600 moveto
(""" + mySlogan + """) show
showpage"""

f.write(ps)
f.close()
print ps

# convert to .pdf with a syscall
command = "ps2pdf ../../../urllib/placard.ps ../../../urllib/placard.pdf"
os.popen(command)

# give user link to download pdf

htmlHeader = """<!DOCTYPE html>
<html>
  <head>
  	<style type="text/css">
  	body {background-color: #333;}
  	div {width:800px; margin:20px auto 40px auto; padding: 100px;border:1px solid #bbb;font-size: 900%; line-height:90%;text-transform:uppercase;font-weight:bold;text-align:center; font-family: Arial, Helvetica, sans-serif;}
  	a {text-decoration: none; color: #fff;}
  	</style>
  </head>
  <body>"""
 
htmlFooter = """
    </body>
</html>"""
 
print "Content-Type: text/html"
print
print htmlHeader

print "<div>"
print "<a href='../../../urllib/placard.pdf'>MAKE ME A PLACARD</a>"
print "</div>"
	
print htmlFooter

pdf generation with ReportLab

Run this script on the command line and it will overwrite whatever is in the existing pdf file with the new content in the 'slogan' variable

from reportlab.lib.pagesizes import A3
from reportlab.lib.enums import TA_CENTER	                                               # lets you use the 'ta_center' format to centre-align text
from reportlab.platypus import Paragraph	                                               # allows text to be flowable
from reportlab.platypus import SimpleDocTemplate                                        # lets you set up margins, page size, filename etc.
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle	       # lets you apply custom styles to text


doc = SimpleDocTemplate("../../../urllib/test.pdf", pagesize=A3, rightMargin=72, leftMargin=72, topMargin=72, bottomMargin=18)

styles = getSampleStyleSheet()
styles.add(ParagraphStyle(name="spaced", leading=170, alignment=TA_CENTER ))   # here we make a style called 'spaced', which sets the leading and alignment of the text

story = []
slogan = "put yr text here"
text = '<font size=150>' +slogan+'</font>'

story.append(Paragraph(text, styles["spaced"]))
doc.build(story)