1234567891011121314151617181920212223 |
- import re
- source = open("scripts/frankenstein_gutenberg.txt", "r")
- #source = open("joyce.txt", "r")
- # destination = open("perec.txt", "w")
- regex = r'(\w*p+\w*e+\w*r+\w*e+\w*c+)'
- #regex = r'(\w*e+\w*l+\w*i+\w*f+\w*)'
- # destination.write("Source: George Orwell's 1984\n\n\n")
- # destination.write("Source: James Joyce, Almost complete works\n\n\n")
- sentences = []
- perec = ''
- for line in source:
- words = line.split(" ")
- for word in words:
- if re.search(regex, word):
- # print(word)
- perec+='<p>'+word+'</p>'
- # destination.write(word+'\n')
- print perec
|