1234567891011121314151617181920212223242526272829303132333435363738 |
- # A set is a list with unique words
- stopwords = set()
- # define list of filtered words
- filtered_words = []
- # read stopwords from file & save them in a list
- # read from file
- with open("english.txt", "r") as source:
- # for each line
- for line in source:
- # clean returns
- line = line.strip()
- # add word to set stopwords (cfr difference with list: list.append())
- stopwords.add(line)
- # define your sentence / string
- sentence = 'I was at Synesthésie last night and took a bus to go home.'
- # print sentence
- print("phrase originale:", sentence)
- # convert string to list of words
- words = sentence.split(" ")
- # for each word of list, check if word is in stopwords, if it isn't, add word to filtered wordlist
- for word in words:
- if word not in stopwords:
- filtered_words.append(word)
- # this is the same, but shorter + no need to declare filtered_words as list in the beginning:
- #filtered_words = [word for word in words if word not in stopwords]
- # turn wordlist into string of characters
- new_sentence = " ".join(filtered_words)
- # print new sentence
- print("phrase réécrite:", new_sentence)
|