stop_words~20171216-161354.py 1.1 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. # A set is a list with unique words
  2. stopwords = set()
  3. # define list of filtered words
  4. filtered_words = []
  5. # read stopwords from file & save them in a list
  6. # read from file
  7. with open("english.txt", "r") as source:
  8. # for each line
  9. for line in source:
  10. # clean returns
  11. line = line.strip()
  12. # add word to set stopwords (cfr difference with list: list.append())
  13. stopwords.add(line)
  14. # define your sentence / string
  15. sentence = 'I was at Synesthésie last night and took a bus to go home.'
  16. # print sentence
  17. print("phrase originale:", sentence)
  18. # convert string to list of words
  19. words = sentence.split(" ")
  20. # for each word of list, check if word is in stopwords, if it isn't, add word to filtered wordlist
  21. for word in words:
  22. if word not in stopwords:
  23. filtered_words.append(word)
  24. # this is the same, but shorter + no need to declare filtered_words as list in the beginning:
  25. #filtered_words = [word for word in words if word not in stopwords]
  26. # turn wordlist into string of characters
  27. new_sentence = " ".join(filtered_words)
  28. # print new sentence
  29. print("phrase réécrite:", new_sentence)