from nltk.tokenize import sent_tokenize, word_tokenize from nltk.corpus import stopwords import re data = "Sous le pont Mirabeau coule la Seine Et nos amours Faut-il qu'il m'en souvienne..." stopWords = set(stopwords.words('french')) words = word_tokenize(data) wordsFiltered = [] for w in words: if w not in stopWords: wordsFiltered.append(w) print(wordsFiltered) #print(len(stopWords)) #print(stopWords)