7 tahun lalu · 08361ddc99
--- a/libriis/classes/md2html.py
+++ b/libriis/classes/md2html.py
@@ -17,6 +17,7 @@ from PyQt5.QtCore import QFileSystemWatcher
 
															 import json
														
 
															 from bs4 import BeautifulSoup
														
 
															 import pypandoc
														
 
															+import pyphen
														
 
															 class Compiler():
														
@@ -57,6 +58,10 @@ class Compiler():
 
															    def compileContents(self):
														
 
															       print('Compiling md')
														
 
															+      # hyphenator
														
 
															+      self._H_FR = pyphen.Pyphen(lang='fr')
														
 
															+      # TODO: add to settings language choice
														
 
															+
														
 
															       # create main html dom from template
														
 
															       template_f = open(os.path.join(self.core.appcwd,"templates/main.tpl.html"), "r")
														
 
															       template_html = template_f.read()
														
@@ -92,7 +97,9 @@ class Compiler():
 
															          output_dom = BeautifulSoup(output, 'html.parser')
														
 
															-         # TODO: hyphenate paragraph
														
 
															+         # hyphenate paragraphes
														
 
															+         for node in output_dom.find_all('p'):
														
 
															+            self.hyphenate(node)
														
 
															          # append html story page to template_dom
														
 
															          story_page = BeautifulSoup(
														
@@ -109,3 +116,77 @@ class Compiler():
 
															       book_html_f = os.path.join(self.core.cwd,'index.html')
														
 
															       with open(book_html_f, 'w') as fp:
														
 
															          fp.write(template_dom.prettify())
														
 
															+
														
 
															+
														
 
															+   #     __  __            __
														
 
															+   #    / / / /_  ______  / /_  ___  ____  _____
														
 
															+   #   / /_/ / / / / __ \/ __ \/ _ \/ __ \/ ___/
														
 
															+   #  / __  / /_/ / /_/ / / / /  __/ / / (__  )
														
 
															+   # /_/ /_/\__, / .___/_/ /_/\___/_/ /_/____/
														
 
															+   #       /____/_/
														
 
															+   def hyphenate(self, node):
														
 
															+      # print("hyphenate")
														
 
															+      nodetext = node.get_text()
														
 
															+      # print(nodetext)
														
 
															+      nodestr = str(node)
														
 
															+      # print(nodestr)
														
 
															+      for word in nodetext.split(' '):
														
 
															+
														
 
															+         # do not hyphenate if it's not a real word
														
 
															+         if len(word) < 5 or re.search('\w+', word) == None:
														
 
															+            continue
														
 
															+
														
 
															+         # cleaning word
														
 
															+         # remove all non-alphanumerical characteres duplicated or more
														
 
															+         word = re.sub('\W{2,}', '', word)
														
 
															+         # remove all non-alphanumerical at the begining of word
														
 
															+         word = re.sub('^\W', '', word)
														
 
															+         # remove all non-alphanumerical at the end of word
														
 
															+         word = re.sub('\W$', '', word)
														
 
															+
														
 
															+         # remove all word remaing having special chars
														
 
															+         if re.search('\W+', word):
														
 
															+            continue
														
 
															+
														
 
															+         # hyphenate word
														
 
															+         word_hyphenated = self._H_FR.inserted(word)
														
 
															+         # remove hyphen precedeted by less than 3 letters
														
 
															+         word_hyphenated = re.sub(r'^(\w{,2})-', r'\1', word_hyphenated)
														
 
															+         # remove hyphen followed by less than 3 letters
														
 
															+         word_hyphenated = re.sub(r'-(\w{,2})$', r'\1', word_hyphenated)
														
 
															+         # replace scores by html elemt &shy;
														
 
															+         word_hyphenated = re.sub(r'(\w)-(\w)', r'\1&shy;\2', word_hyphenated)
														
 
															+         # replace double scores by score+$shy;
														
 
															+         word_hyphenated = re.sub(r'--', r'-&shy;', word_hyphenated)
														
 
															+         # TODO: attention au date 1950-1960, le tiret disparait
														
 
															+
														
 
															+         # print(word_hyphenated)
														
 
															+
														
 
															+         if re.search('\b+', word):
														
 
															+            print(word+" | "+word_hyphenated)
														
 
															+
														
 
															+         try:
														
 
															+            # replace word by hyhanated_word on source
														
 
															+            nodestr = re.sub(word, word_hyphenated, nodestr)
														
 
															+            # replaced_str_dom = BeautifulSoup(replaced_str, 'html.parser')
														
 
															+            # node.string = replaced_str
														
 
															+            # node.string.replace_with(node.string)
														
 
															+         except Exception as e:
														
 
															+            print(_ERROR_PREF+'Replacement error with \033[1m'+word+'\033[0m | \033[1m'+word_hyphenated+"\033[0m")
														
 
															+            print(e)
														
 
															+            print(node.string)
														
 
															+            print('[//]')
														
 
															+            pass
														
 
															+
														
 
															+      # add none breaking spaces
														
 
															+      nbspzr_before = ['»', '\!', '\?', ':', ';']
														
 
															+      for char in nbspzr_before:
														
 
															+         nodestr = re.sub(r'(\w|>)\s('+char+')', r'\1&nbsp;\2', nodestr)
														
 
															+
														
 
															+      nbspzr_after = ['«']
														
 
															+      for char in nbspzr_after:
														
 
															+         nodestr = re.sub(r'('+char+')\s(\w|<)', r'\1&nbsp;\2', nodestr)
														
 
															+
														
 
															+      # print(nodestr)
														
 
															+      # replace node by hyphenated one
														
 
															+      node.replace_with(nodestr)