Removing common words / stopwords
Revision as of 12:52, 16 March 2011 by Aymeric Mansoux (talk | contribs) (Created page with "<source lang="python"> from nltk.corpus import stopwords english_stops = set(stopwords.words("english")) words = "Stopwords are common words that are often handy to remove or ign...")
from nltk.corpus import stopwords
english_stops = set(stopwords.words("english"))
words = "Stopwords are common words that are often handy to remove or ignore when processing text".split()
words = [w for w in words if w not in english_stops]
print words