md2html.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. # @Author: Bachir Soussi Chiadmi <bach>
  4. # @Date: 23-05-2017
  5. # @Email: bachir@figureslibres.io
  6. # @Filename: md2html.py
  7. # @Last modified by: bach
  8. # @Last modified time: 03-06-2017
  9. # @License: GPL-V3
  10. from __future__ import absolute_import, print_function, division, unicode_literals
  11. import os
  12. import re
  13. from PyQt5.QtCore import QFileSystemWatcher
  14. import json
  15. from bs4 import BeautifulSoup
  16. import pypandoc
  17. import pyphen
  18. class Compiler():
  19. def __init__(self,core):
  20. self.core = core
  21. self.initWatching()
  22. self.compileContents()
  23. def initWatching(self):
  24. self.refreshPaths()
  25. self.fs_watcher = QFileSystemWatcher(self.paths)
  26. # self.fs_watcher.directoryChanged.connect(self.directory_changed)
  27. self.fs_watcher.fileChanged.connect(self.onMdFileChanged)
  28. def onMdFileChanged(self):
  29. print("onMdFileChanged")
  30. try:
  31. self.compileContents()
  32. except Exception as e:
  33. print("Error compiling MD files", e)
  34. pass
  35. def refreshPaths(self):
  36. jsonfilepath = os.path.join(self.core.cwd,'.config/summary.json')
  37. sum_json = open(jsonfilepath).read()
  38. self.sum = json.loads(sum_json)
  39. self.paths = [os.path.join(self.core.cwd,'contents')]
  40. for item in self.sum:
  41. self.paths.append(os.path.join(self.core.cwd,'contents',item['file']))
  42. def reload(self):
  43. self.fs_watcher.removePaths(self.paths)
  44. self.refreshPaths()
  45. self.fs_watcher.addPaths(self.paths)
  46. self.compileContents()
  47. def compileContents(self):
  48. print('Compiling md')
  49. # hyphenator
  50. self._H_FR = pyphen.Pyphen(lang='fr')
  51. # TODO: add to settings language choice
  52. # create main html dom from template
  53. template_f = open(os.path.join(self.core.appcwd,"templates/main.tpl.html"), "r")
  54. template_html = template_f.read()
  55. template_dom = BeautifulSoup(template_html, 'html.parser')
  56. # get story div
  57. story_dom = template_dom.find('div', {"id":"flow-main"})
  58. # page index
  59. pi = 0
  60. # loop through pages following summary (pages liste)
  61. for p in self.sum:
  62. # print(toc[p])
  63. pagename = p['title']
  64. pageid = re.sub('[^a-z0-9]+', '-', pagename.lower())
  65. print(pageid)
  66. # files
  67. in_f = os.path.join(self.core.cwd, "contents", p['file'])
  68. if not os.path.isfile(in_f):
  69. print("Source path is not a file, can't generate html : "+in_f)
  70. continue
  71. # print('in_f : '+in_f)
  72. pdoc_args = ['--mathjax']
  73. pdoc_filters = []
  74. # convert markdown from file to html
  75. output = pypandoc.convert_file(in_f,
  76. to='html5',
  77. format='markdown+smart+header_attributes+link_attributes+bracketed_spans',
  78. extra_args=pdoc_args,
  79. filters=pdoc_filters)
  80. # print(output)
  81. # convert html string to parseable html dom
  82. output_dom = BeautifulSoup(output, 'html.parser')
  83. # hyphenate paragraphes
  84. # for node in output_dom.find_all('p'):
  85. # self.hyphenate(node)
  86. # append html story page to template_dom
  87. # create a page dom
  88. story_page = BeautifulSoup(
  89. '<div class="story-page story-page-'+str(pi)+'" id="'+pageid+'"></div>',
  90. 'html.parser'
  91. )
  92. # append to page dom the converted content
  93. story_page.div.append(output_dom)
  94. # append to global dom content the page with contents
  95. story_dom.append(story_page)
  96. # increment pahe index
  97. pi = pi+1
  98. # create main html file from filled template html dom
  99. book_html_f = os.path.join(self.core.cwd,'index.html')
  100. with open(book_html_f, 'w') as fp:
  101. fp.write(template_dom.prettify())
  102. # __ __ __
  103. # / / / /_ ______ / /_ ___ ____ _____
  104. # / /_/ / / / / __ \/ __ \/ _ \/ __ \/ ___/
  105. # / __ / /_/ / /_/ / / / / __/ / / (__ )
  106. # /_/ /_/\__, / .___/_/ /_/\___/_/ /_/____/
  107. # /____/_/
  108. def hyphenate(self, node):
  109. # print("hyphenate")
  110. nodetext = node.get_text()
  111. # print(nodetext)
  112. nodestr = str(node)
  113. # print(nodestr)
  114. for word in nodetext.split(' '):
  115. # do not hyphenate if it's not a real word
  116. if len(word) < 5 or re.search('\w+', word) == None:
  117. continue
  118. # cleaning word
  119. # remove all non-alphanumerical characteres duplicated or more
  120. word = re.sub('\W{2,}', '', word)
  121. # remove all non-alphanumerical at the begining of word
  122. word = re.sub('^\W', '', word)
  123. # remove all non-alphanumerical at the end of word
  124. word = re.sub('\W$', '', word)
  125. # remove all word remaing having special chars
  126. if re.search('\W+', word):
  127. continue
  128. # hyphenate word
  129. word_hyphenated = self._H_FR.inserted(word)
  130. # remove hyphen precedeted by less than 3 letters
  131. word_hyphenated = re.sub(r'^(\w{,2})-', r'\1', word_hyphenated)
  132. # remove hyphen followed by less than 3 letters
  133. word_hyphenated = re.sub(r'-(\w{,2})$', r'\1', word_hyphenated)
  134. # replace scores by html elemt &shy;
  135. word_hyphenated = re.sub(r'(\w)-(\w)', r'\1&shy;\2', word_hyphenated)
  136. # replace double scores by score+$shy;
  137. word_hyphenated = re.sub(r'--', r'-&shy;', word_hyphenated)
  138. # TODO: attention au date 1950-1960, le tiret disparait
  139. # print(word_hyphenated)
  140. if re.search('\b+', word):
  141. print(word+" | "+word_hyphenated)
  142. try:
  143. # replace word by hyhanated_word on source
  144. nodestr = re.sub(word, word_hyphenated, nodestr)
  145. # replaced_str_dom = BeautifulSoup(replaced_str, 'html.parser')
  146. # node.string = replaced_str
  147. # node.string.replace_with(node.string)
  148. except Exception as e:
  149. print(_ERROR_PREF+'Replacement error with \033[1m'+word+'\033[0m | \033[1m'+word_hyphenated+"\033[0m")
  150. print(e)
  151. print(node.string)
  152. print('[//]')
  153. pass
  154. # add none breaking spaces
  155. nbspzr_before = ['»', '\!', '\?', ':', ';']
  156. for char in nbspzr_before:
  157. nodestr = re.sub(r'(\w|>)\s('+char+')', r'\1&nbsp;\2', nodestr)
  158. nbspzr_after = ['«']
  159. for char in nbspzr_after:
  160. nodestr = re.sub(r'('+char+')\s(\w|<)', r'\1&nbsp;\2', nodestr)
  161. # print(nodestr)
  162. # replace node by hyphenated one
  163. node.replace_with(nodestr)