md2html.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. # @Author: Bachir Soussi Chiadmi <bach>
  4. # @Date: 23-05-2017
  5. # @Email: bachir@figureslibres.io
  6. # @Filename: md2html.py
  7. # @Last modified by: bach
  8. # @Last modified time: 03-06-2017
  9. # @License: GPL-V3
  10. from __future__ import absolute_import, print_function, division, unicode_literals
  11. import os
  12. import re
  13. from PyQt5.QtCore import QFileSystemWatcher
  14. import json
  15. from bs4 import BeautifulSoup
  16. import pypandoc
  17. import pyphen
  18. class Compiler():
  19. def __init__(self,core):
  20. self.core = core
  21. self.initWatching()
  22. self.compileContents()
  23. def initWatching(self):
  24. self.refreshPaths()
  25. self.fs_watcher = QFileSystemWatcher(self.paths)
  26. # self.fs_watcher.directoryChanged.connect(self.directory_changed)
  27. self.fs_watcher.fileChanged.connect(self.onMdFileChanged)
  28. def onMdFileChanged(self):
  29. print("onMdFileChanged")
  30. try:
  31. self.compileContents()
  32. except Exception as e:
  33. print("Error compiling MD files", e)
  34. pass
  35. def refreshPaths(self):
  36. jsonfilepath = os.path.join(self.core.cwd,'.config/summary.json')
  37. sum_json = open(jsonfilepath).read()
  38. self.sum = json.loads(sum_json)
  39. self.paths = [os.path.join(self.core.cwd,'contents')]
  40. for item in self.sum:
  41. self.paths.append(os.path.join(self.core.cwd,'contents',item['file']))
  42. def reload(self):
  43. self.fs_watcher.removePaths(self.paths)
  44. self.refreshPaths()
  45. self.fs_watcher.addPaths(self.paths)
  46. self.compileContents()
  47. def compileContents(self):
  48. print('Compiling md')
  49. # hyphenator
  50. self._H_FR = pyphen.Pyphen(lang='fr')
  51. # TODO: add to settings language choice
  52. # create main html dom from template
  53. template_f = open(os.path.join(self.core.appcwd,"templates/main.tpl.html"), "r")
  54. template_html = template_f.read()
  55. template_dom = BeautifulSoup(template_html, 'html.parser')
  56. # get story div
  57. story_dom = template_dom.find('div', {"id":"flow-main"})
  58. # page index
  59. pi = 0
  60. # loop through pages following summary (pages liste)
  61. for p in self.sum:
  62. # print(toc[p])
  63. pagename = p['title']
  64. pageid = re.sub('[^a-z0-9]+', '-', pagename.lower())
  65. print(pageid)
  66. # files
  67. in_f = os.path.join(self.core.cwd, "contents", p['file'])
  68. if not os.path.isfile(in_f):
  69. print("Source path is not a file, can't generate html : "+in_f)
  70. continue
  71. # print('in_f : '+in_f)
  72. pdoc_args = ['--mathjax']
  73. pdoc_filters = []
  74. output = pypandoc.convert_file(in_f,
  75. to='html5',
  76. format='markdown+smart+header_attributes+link_attributes+bracketed_spans',
  77. extra_args=pdoc_args,
  78. filters=pdoc_filters)
  79. output_dom = BeautifulSoup(output, 'html.parser')
  80. # hyphenate paragraphes
  81. for node in output_dom.find_all('p'):
  82. self.hyphenate(node)
  83. # append html story page to template_dom
  84. story_page = BeautifulSoup(
  85. '<div class="story-page story-page-'+str(pi)+'" id="'+pageid+'"></div>',
  86. 'html.parser'
  87. )
  88. story_page.div.append(output_dom)
  89. story_dom.append(story_page)
  90. pi = pi+1
  91. # create main html file from filled template html dom
  92. book_html_f = os.path.join(self.core.cwd,'index.html')
  93. with open(book_html_f, 'w') as fp:
  94. fp.write(template_dom.prettify())
  95. # __ __ __
  96. # / / / /_ ______ / /_ ___ ____ _____
  97. # / /_/ / / / / __ \/ __ \/ _ \/ __ \/ ___/
  98. # / __ / /_/ / /_/ / / / / __/ / / (__ )
  99. # /_/ /_/\__, / .___/_/ /_/\___/_/ /_/____/
  100. # /____/_/
  101. def hyphenate(self, node):
  102. # print("hyphenate")
  103. nodetext = node.get_text()
  104. # print(nodetext)
  105. nodestr = str(node)
  106. # print(nodestr)
  107. for word in nodetext.split(' '):
  108. # do not hyphenate if it's not a real word
  109. if len(word) < 5 or re.search('\w+', word) == None:
  110. continue
  111. # cleaning word
  112. # remove all non-alphanumerical characteres duplicated or more
  113. word = re.sub('\W{2,}', '', word)
  114. # remove all non-alphanumerical at the begining of word
  115. word = re.sub('^\W', '', word)
  116. # remove all non-alphanumerical at the end of word
  117. word = re.sub('\W$', '', word)
  118. # remove all word remaing having special chars
  119. if re.search('\W+', word):
  120. continue
  121. # hyphenate word
  122. word_hyphenated = self._H_FR.inserted(word)
  123. # remove hyphen precedeted by less than 3 letters
  124. word_hyphenated = re.sub(r'^(\w{,2})-', r'\1', word_hyphenated)
  125. # remove hyphen followed by less than 3 letters
  126. word_hyphenated = re.sub(r'-(\w{,2})$', r'\1', word_hyphenated)
  127. # replace scores by html elemt &shy;
  128. word_hyphenated = re.sub(r'(\w)-(\w)', r'\1&shy;\2', word_hyphenated)
  129. # replace double scores by score+$shy;
  130. word_hyphenated = re.sub(r'--', r'-&shy;', word_hyphenated)
  131. # TODO: attention au date 1950-1960, le tiret disparait
  132. # print(word_hyphenated)
  133. if re.search('\b+', word):
  134. print(word+" | "+word_hyphenated)
  135. try:
  136. # replace word by hyhanated_word on source
  137. nodestr = re.sub(word, word_hyphenated, nodestr)
  138. # replaced_str_dom = BeautifulSoup(replaced_str, 'html.parser')
  139. # node.string = replaced_str
  140. # node.string.replace_with(node.string)
  141. except Exception as e:
  142. print(_ERROR_PREF+'Replacement error with \033[1m'+word+'\033[0m | \033[1m'+word_hyphenated+"\033[0m")
  143. print(e)
  144. print(node.string)
  145. print('[//]')
  146. pass
  147. # add none breaking spaces
  148. nbspzr_before = ['»', '\!', '\?', ':', ';']
  149. for char in nbspzr_before:
  150. nodestr = re.sub(r'(\w|>)\s('+char+')', r'\1&nbsp;\2', nodestr)
  151. nbspzr_after = ['«']
  152. for char in nbspzr_after:
  153. nodestr = re.sub(r'('+char+')\s(\w|<)', r'\1&nbsp;\2', nodestr)
  154. # print(nodestr)
  155. # replace node by hyphenated one
  156. node.replace_with(nodestr)