123456789101112131415 |
- from nltk.tokenize import sent_tokenize, word_tokenize
- from nltk.corpus import stopwords
-
- data = "Sous le pont Mirabeau coule la Seine Et nos amours Faut-il qu'il m'en souvienne..."
- stopWords = set(stopwords.words('french'))
- words = word_tokenize(data)
- wordsFiltered = []
-
- for w in words:
- if w not in stopWords:
- wordsFiltered.append(w)
-
- print(wordsFiltered)
- print(len(stopWords))
- print(stopWords)
|