|
@@ -17,6 +17,7 @@ from PyQt5.QtCore import QFileSystemWatcher
|
|
import json
|
|
import json
|
|
from bs4 import BeautifulSoup
|
|
from bs4 import BeautifulSoup
|
|
import pypandoc
|
|
import pypandoc
|
|
|
|
+import pyphen
|
|
|
|
|
|
|
|
|
|
class Compiler():
|
|
class Compiler():
|
|
@@ -57,6 +58,10 @@ class Compiler():
|
|
def compileContents(self):
|
|
def compileContents(self):
|
|
print('Compiling md')
|
|
print('Compiling md')
|
|
|
|
|
|
|
|
+ # hyphenator
|
|
|
|
+ self._H_FR = pyphen.Pyphen(lang='fr')
|
|
|
|
+ # TODO: add to settings language choice
|
|
|
|
+
|
|
# create main html dom from template
|
|
# create main html dom from template
|
|
template_f = open(os.path.join(self.core.appcwd,"templates/main.tpl.html"), "r")
|
|
template_f = open(os.path.join(self.core.appcwd,"templates/main.tpl.html"), "r")
|
|
template_html = template_f.read()
|
|
template_html = template_f.read()
|
|
@@ -92,7 +97,9 @@ class Compiler():
|
|
|
|
|
|
output_dom = BeautifulSoup(output, 'html.parser')
|
|
output_dom = BeautifulSoup(output, 'html.parser')
|
|
|
|
|
|
- # TODO: hyphenate paragraph
|
|
|
|
|
|
+ # hyphenate paragraphes
|
|
|
|
+ for node in output_dom.find_all('p'):
|
|
|
|
+ self.hyphenate(node)
|
|
|
|
|
|
# append html story page to template_dom
|
|
# append html story page to template_dom
|
|
story_page = BeautifulSoup(
|
|
story_page = BeautifulSoup(
|
|
@@ -109,3 +116,77 @@ class Compiler():
|
|
book_html_f = os.path.join(self.core.cwd,'index.html')
|
|
book_html_f = os.path.join(self.core.cwd,'index.html')
|
|
with open(book_html_f, 'w') as fp:
|
|
with open(book_html_f, 'w') as fp:
|
|
fp.write(template_dom.prettify())
|
|
fp.write(template_dom.prettify())
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+ # __ __ __
|
|
|
|
+ # / / / /_ ______ / /_ ___ ____ _____
|
|
|
|
+ # / /_/ / / / / __ \/ __ \/ _ \/ __ \/ ___/
|
|
|
|
+ # / __ / /_/ / /_/ / / / / __/ / / (__ )
|
|
|
|
+ # /_/ /_/\__, / .___/_/ /_/\___/_/ /_/____/
|
|
|
|
+ # /____/_/
|
|
|
|
+ def hyphenate(self, node):
|
|
|
|
+ # print("hyphenate")
|
|
|
|
+ nodetext = node.get_text()
|
|
|
|
+ # print(nodetext)
|
|
|
|
+ nodestr = str(node)
|
|
|
|
+ # print(nodestr)
|
|
|
|
+ for word in nodetext.split(' '):
|
|
|
|
+
|
|
|
|
+ # do not hyphenate if it's not a real word
|
|
|
|
+ if len(word) < 5 or re.search('\w+', word) == None:
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ # cleaning word
|
|
|
|
+ # remove all non-alphanumerical characteres duplicated or more
|
|
|
|
+ word = re.sub('\W{2,}', '', word)
|
|
|
|
+ # remove all non-alphanumerical at the begining of word
|
|
|
|
+ word = re.sub('^\W', '', word)
|
|
|
|
+ # remove all non-alphanumerical at the end of word
|
|
|
|
+ word = re.sub('\W$', '', word)
|
|
|
|
+
|
|
|
|
+ # remove all word remaing having special chars
|
|
|
|
+ if re.search('\W+', word):
|
|
|
|
+ continue
|
|
|
|
+
|
|
|
|
+ # hyphenate word
|
|
|
|
+ word_hyphenated = self._H_FR.inserted(word)
|
|
|
|
+ # remove hyphen precedeted by less than 3 letters
|
|
|
|
+ word_hyphenated = re.sub(r'^(\w{,2})-', r'\1', word_hyphenated)
|
|
|
|
+ # remove hyphen followed by less than 3 letters
|
|
|
|
+ word_hyphenated = re.sub(r'-(\w{,2})$', r'\1', word_hyphenated)
|
|
|
|
+ # replace scores by html elemt ­
|
|
|
|
+ word_hyphenated = re.sub(r'(\w)-(\w)', r'\1­\2', word_hyphenated)
|
|
|
|
+ # replace double scores by score+$shy;
|
|
|
|
+ word_hyphenated = re.sub(r'--', r'-­', word_hyphenated)
|
|
|
|
+ # TODO: attention au date 1950-1960, le tiret disparait
|
|
|
|
+
|
|
|
|
+ # print(word_hyphenated)
|
|
|
|
+
|
|
|
|
+ if re.search('\b+', word):
|
|
|
|
+ print(word+" | "+word_hyphenated)
|
|
|
|
+
|
|
|
|
+ try:
|
|
|
|
+ # replace word by hyhanated_word on source
|
|
|
|
+ nodestr = re.sub(word, word_hyphenated, nodestr)
|
|
|
|
+ # replaced_str_dom = BeautifulSoup(replaced_str, 'html.parser')
|
|
|
|
+ # node.string = replaced_str
|
|
|
|
+ # node.string.replace_with(node.string)
|
|
|
|
+ except Exception as e:
|
|
|
|
+ print(_ERROR_PREF+'Replacement error with \033[1m'+word+'\033[0m | \033[1m'+word_hyphenated+"\033[0m")
|
|
|
|
+ print(e)
|
|
|
|
+ print(node.string)
|
|
|
|
+ print('[//]')
|
|
|
|
+ pass
|
|
|
|
+
|
|
|
|
+ # add none breaking spaces
|
|
|
|
+ nbspzr_before = ['»', '\!', '\?', ':', ';']
|
|
|
|
+ for char in nbspzr_before:
|
|
|
|
+ nodestr = re.sub(r'(\w|>)\s('+char+')', r'\1 \2', nodestr)
|
|
|
|
+
|
|
|
|
+ nbspzr_after = ['«']
|
|
|
|
+ for char in nbspzr_after:
|
|
|
|
+ nodestr = re.sub(r'('+char+')\s(\w|<)', r'\1 \2', nodestr)
|
|
|
|
+
|
|
|
|
+ # print(nodestr)
|
|
|
|
+ # replace node by hyphenated one
|
|
|
|
+ node.replace_with(nodestr)
|