2010 1.09: Difference between revisions
No edit summary |
No edit summary |
||
Line 14: | Line 14: | ||
print e.title.encode("utf-8") | print e.title.encode("utf-8") | ||
</source> | </source> | ||
== Words == | |||
Turns a text in an alphabetical list of unique words. Attempts to strip punctuation and lowercases everything. | |||
<source lang="python"> | |||
#!/usr/bin/env python | |||
import sys, string | |||
words = {} | |||
for line in sys.stdin: | |||
for word in line.split(): | |||
word = word.lower().strip(string.punctuation) | |||
words[word] = words.get(word, 0) + 1 | |||
for word in sorted(words.keys()): | |||
print word, | |||
print | |||
</source> | |||
== Word counts == | == Word counts == |
Revision as of 22:24, 6 December 2010
Read a feed with a URL from the command line
#!/usr/bin/env python
import sys, feedparser
try:
url = sys.argv[1]
except IndexError:
url = "http://feeds.bbci.co.uk/news/rss.xml"
feed = feedparser.parse(url)
for e in feed.entries:
print e.title.encode("utf-8")
Words
Turns a text in an alphabetical list of unique words. Attempts to strip punctuation and lowercases everything.
#!/usr/bin/env python
import sys, string
words = {}
for line in sys.stdin:
for word in line.split():
word = word.lower().strip(string.punctuation)
words[word] = words.get(word, 0) + 1
for word in sorted(words.keys()):
print word,
print
Word counts
#!/usr/bin/env python
import sys, string
words = {}
for line in sys.stdin:
for word in line.split():
word = word.lower().strip(string.punctuation)
words[word] = words.get(word, 0) + 1
for (word, count) in sorted(words.items()):
print word, count