bachir
/
ola5doc


			
							12345678910111213141516171819202122232425262728293031323334353637383940
							#!/usr/bin/python
# this is a shebang: https://en.wikipedia.org/wiki/Shebang_%28Unix%29

'''
This script looks at each word in a given text, if the word contains the letters of Perec, the word is printed to another textfile
Made for OLA #5, Paris, 15-17 décembre 2017
'''

# import external modules
import re
import string

# define textfiles
source = open("../data/1984_all.txt", 'r')
destination = open("../data/perec.txt", 'w')

# define regular expression
regex = r'(\w*p+\w*e+\w*r+\w*e+\w*c+)'


# write title to destination
destination.write("Source: George Orwell's 1984\n\n\n")

# search for pattern in source, print in terminal & write to destination
sentences = []
# read source line by line
for line in source:
	# split each line into list of words, split on white spaces
	words = line.split(" ")
	for word in words:
		# look if pattern is in word
		if re.search(regex, word):
			# if yes, print word in terminal
			print(word)
			# write word to file without punctuation
			destination.write(word.strip('\., \,')+'\n')

# close textfiles
source.close()
destination.close()