filtre~20171217-110857.py 415 B

123456789101112131415
  1. from nltk.tokenize import sent_tokenize, word_tokenize
  2. from nltk.corpus import stopwords
  3. data = "All work and no play makes jack dull boy. All work and no play makes jack a dull boy."
  4. stopWords = set(stopwords.words('english'))
  5. words = word_tokenize(data)
  6. wordsFiltered = []
  7. for w in words:
  8. if w not in stopWords:
  9. wordsFiltered.append(w)
  10. print(wordsFiltered)
  11. print(len(stopWords))
  12. print(stopWords)