filtre.py 424 B

1234567891011121314151617
  1. from nltk.tokenize import sent_tokenize, word_tokenize
  2. from nltk.corpus import stopwords
  3. import re
  4. data = "Sous le pont Mirabeau coule la Seine Et nos amours Faut-il qu'il m'en souvienne..."
  5. stopWords = set(stopwords.words('french'))
  6. words = word_tokenize(data)
  7. wordsFiltered = []
  8. for w in words:
  9. if w not in stopWords:
  10. wordsFiltered.append(w)
  11. print(wordsFiltered)
  12. #print(len(stopWords))
  13. #print(stopWords)