filtre~20171217-112637.py 411 B

123456789101112131415
  1. from nltk.tokenize import sent_tokenize, word_tokenize
  2. from nltk.corpus import stopwords
  3. data = "Sous le pont Mirabeau coule la Seine Et nos amours Faut-il qu'il m'en souvienne..."
  4. stopWords = set(stopwords.words('french'))
  5. words = word_tokenize(data)
  6. wordsFiltered = []
  7. for w in words:
  8. if w not in stopWords:
  9. wordsFiltered.append(w)
  10. print(wordsFiltered)
  11. print(len(stopWords))
  12. print(stopWords)