2009 206: Difference between revisions
(One intermediate revision by the same user not shown) | |||
Line 3: | Line 3: | ||
== Acquiring == | == Acquiring == | ||
[[ | Today we are working with the text of [[File:Poe.zip | 10 poems]] by Edgar Allen Poe, from [http://www.gutenberg.org/etext/10031 project gutenberg]. | ||
== Processing == | |||
== | <source lang=python> | ||
import sys, re | |||
wc = {} | |||
for line in sys.stdin: | |||
line = line.rstrip() | |||
words = re.split("[^a-zA-Z]*", line) | |||
for word in words: | |||
word=word.lower() | |||
if word: | |||
wc[word]=wc.get(word, 0)+1 | |||
allwords = wc.keys() | |||
allwords.sort() | |||
for word in allwords: | |||
print word, wc[word] | |||
</source> | |||
== Visualising == | == Visualising == | ||
== Interacting == | == Interacting == |
Revision as of 14:14, 3 March 2009
Toward a navigable text
Acquiring
Today we are working with the text of 10 poems by Edgar Allen Poe, from project gutenberg.
Processing
import sys, re
wc = {}
for line in sys.stdin:
line = line.rstrip()
words = re.split("[^a-zA-Z]*", line)
for word in words:
word=word.lower()
if word:
wc[word]=wc.get(word, 0)+1
allwords = wc.keys()
allwords.sort()
for word in allwords:
print word, wc[word]