12345678910111213141516171819202122232425262728293031323334353637383940 |
- #!/usr/bin/python
- # this is a shebang: https://en.wikipedia.org/wiki/Shebang_%28Unix%29
- '''
- This script looks at each word in a given text, if the word contains the letters of Perec, the word is printed to another textfile
- Made for OLA #5, Paris, 15-17 décembre 2017
- '''
- # import external modules
- import re
- import string
- # define textfiles
- source = open("../data/1984_all.txt", 'r')
- destination = open("../data/perec.txt", 'w')
- # define regular expression
- regex = r'(\w*p+\w*e+\w*r+\w*e+\w*c+)'
- # write title to destination
- destination.write("Source: George Orwell's 1984\n\n\n")
- # search for pattern in source, print in terminal & write to destination
- sentences = []
- # read source line by line
- for line in source:
- # split each line into list of words, split on white spaces
- words = line.split(" ")
- for word in words:
- # look if pattern is in word
- if re.search(regex, word):
- # if yes, print word in terminal
- print(word)
- # write word to file without punctuation
- destination.write(word.strip('\., \,')+'\n')
- # close textfiles
- source.close()
- destination.close()
|