Difference between revisions of "2010 1.09"
Jump to navigation
Jump to search
Line 14: | Line 14: | ||
print e.title.encode("utf-8") | print e.title.encode("utf-8") | ||
</source> | </source> | ||
+ | |||
+ | == Words == | ||
+ | |||
+ | Turns a text in an alphabetical list of unique words. Attempts to strip punctuation and lowercases everything. | ||
+ | |||
+ | <source lang="python"> | ||
+ | #!/usr/bin/env python | ||
+ | |||
+ | import sys, string | ||
+ | |||
+ | words = {} | ||
+ | for line in sys.stdin: | ||
+ | for word in line.split(): | ||
+ | word = word.lower().strip(string.punctuation) | ||
+ | words[word] = words.get(word, 0) + 1 | ||
+ | |||
+ | for word in sorted(words.keys()): | ||
+ | print word, | ||
+ | print | ||
+ | </source> | ||
+ | |||
== Word counts == | == Word counts == |
Revision as of 23:24, 6 December 2010
Read a feed with a URL from the command line
#!/usr/bin/env python
import sys, feedparser
try:
url = sys.argv[1]
except IndexError:
url = "http://feeds.bbci.co.uk/news/rss.xml"
feed = feedparser.parse(url)
for e in feed.entries:
print e.title.encode("utf-8")
Words
Turns a text in an alphabetical list of unique words. Attempts to strip punctuation and lowercases everything.
#!/usr/bin/env python
import sys, string
words = {}
for line in sys.stdin:
for word in line.split():
word = word.lower().strip(string.punctuation)
words[word] = words.get(word, 0) + 1
for word in sorted(words.keys()):
print word,
print
Word counts
#!/usr/bin/env python
import sys, string
words = {}
for line in sys.stdin:
for word in line.split():
word = word.lower().strip(string.punctuation)
words[word] = words.get(word, 0) + 1
for (word, count) in sorted(words.items()):
print word, count