8 years ago · 08361ddc99
--- a/libriis/classes/md2html.py
+++ b/libriis/classes/md2html.py
@@ -17,6 +17,7 @@ from PyQt5.QtCore import QFileSystemWatcher
 
				 import json
			
 
				 from bs4 import BeautifulSoup
			
 
				 import pypandoc
			
 
				+import pyphen
			
 
				 
			
 
				 
			
 
				 class Compiler():
			
@@ -57,6 +58,10 @@ class Compiler():
 
				    def compileContents(self):
			
 
				       print('Compiling md')
			
 
				 
			
 
				+      # hyphenator
			
 
				+      self._H_FR = pyphen.Pyphen(lang='fr')
			
 
				+      # TODO: add to settings language choice
			
 
				+
			
 
				       # create main html dom from template
			
 
				       template_f = open(os.path.join(self.core.appcwd,"templates/main.tpl.html"), "r")
			
 
				       template_html = template_f.read()
			
@@ -92,7 +97,9 @@ class Compiler():
 
				 
			
 
				          output_dom = BeautifulSoup(output, 'html.parser')
			
 
				 
			
 
				-         # TODO: hyphenate paragraph
			
 
				+         # hyphenate paragraphes
			
 
				+         for node in output_dom.find_all('p'):
			
 
				+            self.hyphenate(node)
			
 
				 
			
 
				          # append html story page to template_dom
			
 
				          story_page = BeautifulSoup(
			
@@ -109,3 +116,77 @@ class Compiler():
 
				       book_html_f = os.path.join(self.core.cwd,'index.html')
			
 
				       with open(book_html_f, 'w') as fp:
			
 
				          fp.write(template_dom.prettify())
			
 
				+
			
 
				+
			
 
				+   #     __  __            __
			
 
				+   #    / / / /_  ______  / /_  ___  ____  _____
			
 
				+   #   / /_/ / / / / __ \/ __ \/ _ \/ __ \/ ___/
			
 
				+   #  / __  / /_/ / /_/ / / / /  __/ / / (__  )
			
 
				+   # /_/ /_/\__, / .___/_/ /_/\___/_/ /_/____/
			
 
				+   #       /____/_/
			
 
				+   def hyphenate(self, node):
			
 
				+      # print("hyphenate")
			
 
				+      nodetext = node.get_text()
			
 
				+      # print(nodetext)
			
 
				+      nodestr = str(node)
			
 
				+      # print(nodestr)
			
 
				+      for word in nodetext.split(' '):
			
 
				+
			
 
				+         # do not hyphenate if it's not a real word
			
 
				+         if len(word) < 5 or re.search('\w+', word) == None:
			
 
				+            continue
			
 
				+
			
 
				+         # cleaning word
			
 
				+         # remove all non-alphanumerical characteres duplicated or more
			
 
				+         word = re.sub('\W{2,}', '', word)
			
 
				+         # remove all non-alphanumerical at the begining of word
			
 
				+         word = re.sub('^\W', '', word)
			
 
				+         # remove all non-alphanumerical at the end of word
			
 
				+         word = re.sub('\W$', '', word)
			
 
				+
			
 
				+         # remove all word remaing having special chars
			
 
				+         if re.search('\W+', word):
			
 
				+            continue
			
 
				+
			
 
				+         # hyphenate word
			
 
				+         word_hyphenated = self._H_FR.inserted(word)
			
 
				+         # remove hyphen precedeted by less than 3 letters
			
 
				+         word_hyphenated = re.sub(r'^(\w{,2})-', r'\1', word_hyphenated)
			
 
				+         # remove hyphen followed by less than 3 letters
			
 
				+         word_hyphenated = re.sub(r'-(\w{,2})$', r'\1', word_hyphenated)
			
 
				+         # replace scores by html elemt &shy;
			
 
				+         word_hyphenated = re.sub(r'(\w)-(\w)', r'\1&shy;\2', word_hyphenated)
			
 
				+         # replace double scores by score+$shy;
			
 
				+         word_hyphenated = re.sub(r'--', r'-&shy;', word_hyphenated)
			
 
				+         # TODO: attention au date 1950-1960, le tiret disparait
			
 
				+
			
 
				+         # print(word_hyphenated)
			
 
				+
			
 
				+         if re.search('\b+', word):
			
 
				+            print(word+" | "+word_hyphenated)
			
 
				+
			
 
				+         try:
			
 
				+            # replace word by hyhanated_word on source
			
 
				+            nodestr = re.sub(word, word_hyphenated, nodestr)
			
 
				+            # replaced_str_dom = BeautifulSoup(replaced_str, 'html.parser')
			
 
				+            # node.string = replaced_str
			
 
				+            # node.string.replace_with(node.string)
			
 
				+         except Exception as e:
			
 
				+            print(_ERROR_PREF+'Replacement error with \033[1m'+word+'\033[0m | \033[1m'+word_hyphenated+"\033[0m")
			
 
				+            print(e)
			
 
				+            print(node.string)
			
 
				+            print('[//]')
			
 
				+            pass
			
 
				+
			
 
				+      # add none breaking spaces
			
 
				+      nbspzr_before = ['»', '\!', '\?', ':', ';']
			
 
				+      for char in nbspzr_before:
			
 
				+         nodestr = re.sub(r'(\w|>)\s('+char+')', r'\1&nbsp;\2', nodestr)
			
 
				+
			
 
				+      nbspzr_after = ['«']
			
 
				+      for char in nbspzr_after:
			
 
				+         nodestr = re.sub(r'('+char+')\s(\w|<)', r'\1&nbsp;\2', nodestr)
			
 
				+
			
 
				+      # print(nodestr)
			
 
				+      # replace node by hyphenated one
			
 
				+      node.replace_with(nodestr)