123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110 |
- #!/usr/bin/env/ python
- # This script automatises the following Oulipo constraint:
- # http://oulipo.net/fr/contraintes/litterature-definitionnelle
- # The output is printed in a txt-file and in a Logbook in Context
- # Copyright (C) 2016 Constant, Algolit, An Mertens
- # This program is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- # This program is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details: <http://www.gnu.org/licenses/>.
- from __future__ import division
- import nltk
- from nltk.corpus import wordnet as wn
- from pattern.en import tag
- import nltk.data
- from random import shuffle, choice
- # VARIABLES
- # textfiles
- source = open("frankenstein_for_machines.txt", 'r')
- destination = open("litterature_definitionelle.txt", "wt")
- ## SCRIPT
- # select 4 sentences from source
- ## split source text into list of sentences
- finding_sentences = nltk.data.load('tokenizers/punkt/english.pickle')
- sentences_list = []
- with source as text:
- for line in text:
- # this returns a list with 1 element containing the entire text, sentences separated by \n
- sentences = '\n'.join(finding_sentences.tokenize(line.strip()))
- # transform string into list of sentences
- sentences_list = sentences.split("\n")
- # pick 4 random sentences
- selected_sentences = []
- number = 0
- while number < 5:
- selected_sentences.append(choice(sentences_list))
- number += 1
- # tokenize source and get Part-of-Speech tags for each word
- definitions = []
- for sentence in selected_sentences:
- # create tuple of tuples with pairs of word + POS-tag
- collection = tag(sentence, tokenize=True, encoding='utf-8')
- # transform tuple into list to be able to manipulate it
- collection = list(collection)
- # for each pair:
- for element in collection:
- # look for nouns & replace them with their definition
- if element[1] == "NN":
- if wn.synsets(element[0]):
- synset = wn.synsets(element[0])
- definitions.append("<")
- definitions.append(synset[0].definition())
- definitions.append(">")
- else:
- break
- else:
- # non-nouns are left as words
- definitions.append(element[0])
- # write the transformed sentence
- #print(" ".join(definitions))
- with destination as text:
- text.write("ORIGINAL TEXT\n\n\n")
- for sentence in selected_sentences:
- text.write(sentence+"\n")
- text.write("\n\n")
- text.write("\n\nLITTERATURE DEFINITIONELLE\n\n\n")
- text.write(" ".join(definitions))
-
- # close the text file
- source.close()
- destination.close()
- # -------------------------------------------
- # # Write in logbook
- # # print chapters
- # #writetologbook('\setuppagenumber[state=start]')
- # writetologbook('\n\section{LITTERATURE DEFINITIONELLE}\n')
- # # print_sentences(spring_chapter)
- # writetologbook('\nORIGINAL TEXT\crlf\crlf\n')
- # for sentence in selected_sentences:
- # writetologbook(sentence+"\n")
- # writetologbook("\crlf\crlf\n\n\n")
- # writetologbook('\nLITTERATURE DEFINITIONELLE\crlf\crlf\n')
- # writetologbook(" ".join(definitions))
|