python2_litterature_definitionelle~20171003-113906.py 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. #!/usr/bin/env/ python
  2. # This script automatises the following Oulipo constraint:
  3. # http://oulipo.net/fr/contraintes/litterature-definitionnelle
  4. # The output is printed in a txt-file and in a Logbook in Context
  5. # Copyright (C) 2016 Constant, Algolit, An Mertens
  6. # This program is free software: you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation, either version 3 of the License, or
  9. # (at your option) any later version.
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details: <http://www.gnu.org/licenses/>.
  14. from __future__ import division
  15. import nltk
  16. from nltk.corpus import wordnet as wn
  17. from pattern.en import tag
  18. import nltk.data
  19. from random import shuffle, choice
  20. # VARIABLES
  21. # textfiles
  22. source = open("frankenstein_for_machines.txt", 'r')
  23. destination = open("litterature_definitionelle.txt", "wt")
  24. ## SCRIPT
  25. # select 4 sentences from source
  26. ## split source text into list of sentences
  27. finding_sentences = nltk.data.load('tokenizers/punkt/english.pickle')
  28. sentences_list = []
  29. with source as text:
  30. for line in text:
  31. # this returns a list with 1 element containing the entire text, sentences separated by \n
  32. sentences = '\n'.join(finding_sentences.tokenize(line.strip()))
  33. # transform string into list of sentences
  34. sentences_list = sentences.split("\n")
  35. # pick 4 random sentences
  36. selected_sentences = []
  37. number = 0
  38. while number < 5:
  39. selected_sentences.append(choice(sentences_list))
  40. number += 1
  41. # tokenize source and get Part-of-Speech tags for each word
  42. definitions = []
  43. for sentence in selected_sentences:
  44. # create tuple of tuples with pairs of word + POS-tag
  45. collection = tag(sentence, tokenize=True, encoding='utf-8')
  46. # transform tuple into list to be able to manipulate it
  47. collection = list(collection)
  48. # for each pair:
  49. for element in collection:
  50. # look for nouns & replace them with their definition
  51. if element[1] == "NN":
  52. if wn.synsets(element[0]):
  53. synset = wn.synsets(element[0])
  54. definitions.append("<")
  55. definitions.append(synset[0].definition())
  56. definitions.append(">")
  57. else:
  58. break
  59. else:
  60. # non-nouns are left as words
  61. definitions.append(element[0])
  62. # write the transformed sentence
  63. #print(" ".join(definitions))
  64. with destination as text:
  65. text.write("ORIGINAL TEXT\n\n\n")
  66. for sentence in selected_sentences:
  67. text.write(sentence+"\n")
  68. text.write("\n\n")
  69. text.write("\n\nLITTERATURE DEFINITIONELLE\n\n\n")
  70. text.write(" ".join(definitions))
  71. # close the text file
  72. source.close()
  73. destination.close()
  74. # -------------------------------------------
  75. # # Write in logbook
  76. # # print chapters
  77. # #writetologbook('\setuppagenumber[state=start]')
  78. # writetologbook('\n\section{LITTERATURE DEFINITIONELLE}\n')
  79. # # print_sentences(spring_chapter)
  80. # writetologbook('\nORIGINAL TEXT\crlf\crlf\n')
  81. # for sentence in selected_sentences:
  82. # writetologbook(sentence+"\n")
  83. # writetologbook("\crlf\crlf\n\n\n")
  84. # writetologbook('\nLITTERATURE DEFINITIONELLE\crlf\crlf\n')
  85. # writetologbook(" ".join(definitions))