2010 1.09: Difference between revisions

From XPUB & Lens-Based wiki
No edit summary
No edit summary
 
(2 intermediate revisions by one other user not shown)
Line 1: Line 1:
== Display the titles of an RSS feed ==
* [[Displaying the titles of an RSS feed]]
* [[Turning a text in an alphabetical list of unique words]]
* [[Displaying a list of words from a text followed by the number of times they appear]]


<source lang="python">
#!/usr/bin/env python
from optparse import OptionParser
parser = OptionParser()
parser.add_option("-u", "--url", dest="url", default="http://feeds.bbci.co.uk/news/rss.xml", help="the url to read from")
parser.add_option("-n", "--numlines", type="int", dest="num", default=1000, help="how many lines to display")
(options, args) = parser.parse_args()
import feedparser
feed = feedparser.parse(options.url)
for e in feed.entries[:options.num]:
    print e.title.encode("utf-8")
</source>
<source lang="python">
#!/usr/bin/env python
from optparse import OptionParser
parser = OptionParser()
parser.add_option("-u", "--url", dest="url", default="http://feeds.bbci.co.uk/news/rss.xml", help="the url to read from")
parser.add_option("-n", "--numlines", type="int", dest="num", default=1000, help="how many lines to display")
(options, args) = parser.parse_args()
import feedparser
feed = feedparser.parse(options.url)
for e in feed.entries[:options.num]:
    print e.title.encode("utf-8")
</source>
== Words ==
Turns a text in an alphabetical list of unique words. Attempts to strip punctuation and lowercases everything.
<source lang="python">
#!/usr/bin/env python
import sys, string
words = {}
for line in sys.stdin:
    for word in line.split():
        word = word.lower().strip(string.punctuation)
        words[word] = words.get(word, 0) + 1
for word in sorted(words.keys()):
    print word,
print
</source>
== Word counts ==
Grab words (as above) and display one per line followed by the number of times the word appears.
<source lang="python">
#!/usr/bin/env python
import sys, string
words = {}
for line in sys.stdin:
    for word in line.split():
        word = word.lower().strip(string.punctuation)
        words[word] = words.get(word, 0) + 1
for (word, count) in sorted(words.items()):
    print word, count
</source>


== Permutations ==
== Permutations ==
Line 144: Line 76:
</source>
</source>


[[http://pzwart3.wdka.hro.nl/~mmurtaugh/cgi-bin/headlines.cgi?n=0 | See Live (1)!]][[http://pzwart3.wdka.hro.nl/~mmurtaugh/cgi-bin/headlines.cgi?n=1 | See Live (2)!]][[http://pzwart3.wdka.hro.nl/~mmurtaugh/cgi-bin/headlines.cgi?n=2 | See Live (3)!]]
[http://pzwart3.wdka.hro.nl/~mmurtaugh/cgi-bin/headlines.cgi?n=0 1][http://pzwart3.wdka.hro.nl/~mmurtaugh/cgi-bin/headlines.cgi?n=1 2][http://pzwart3.wdka.hro.nl/~mmurtaugh/cgi-bin/headlines.cgi?n=2 3]

Latest revision as of 13:15, 16 March 2011


Permutations

bla bla

#!/usr/bin/python

import sys, codecs, string, random

sys.stdin = codecs.getreader("utf-8")(sys.stdin)
count = {}
for line in sys.stdin:
	for w in line.split():
		w = w.strip(string.punctuation).lower()
		if w:
			count[w] = count.get(w,0)+1

#for w,c in sorted(count.items()):
#	print w,c

words = count.keys()
#words.sort()
random.shuffle(words)
words = words[0:5]

import itertools

for ws in itertools.permutations(words):
	for w in ws:
		print w,
	print

Headline Permutation CGI!

#!/usr/bin/env python
#-*- coding:utf-8 -*-

print "Content-type: text/html"
print

import cgi
args = cgi.FieldStorage()
n = int(args.getvalue("n", "0"))

print """
<body>
"""

import feedparser
feed = feedparser.parse("http://feeds.bbci.co.uk/news/rss.xml")

import itertools
words = feed.entries[n].title.split()
orderings = list(itertools.permutations(words))
for ws in orderings:
    print " ".join(ws)
    print "<br />"
    
#for e in feed.entries[:options.num]:
#    print "<p>"
#    print e.title.encode("utf-8")
#    print "</p>"
    
print """
</body>
"""

123