#!/usr/bin/python # -*- coding: utf-8 -*- # @Author: Bachir Soussi Chiadmi # @Date: 27-03-2017 # @Email: bachir@figureslibres.io # @Last modified by: bach # @Last modified time: 21-04-2017 # @License: GPL-V3 import sys, os import shutil import markdown # import mistune from bs4 import BeautifulSoup import pypandoc import json import re _BOOKS_SRC = 'book-src' _BUILD_d = "build" _TOC = [] # CUR_PATH = os.path.dirname(os.path.abspath(__file__)) def main(): print("Building books") if not os.path.isdir(_BUILD_d): os.mkdir(_BUILD_d) # loop through books sources for book in os.listdir(_BOOKS_SRC): if os.path.isdir(os.path.join(_BOOKS_SRC, book)): # print(book) parse_book(book) with open(_BUILD_d+'/toc.json', 'w') as fp: json.dump(_TOC, fp, ensure_ascii=False, indent="\t") def parse_book(book): book_name = book.replace('.git', '') print("- - -") print(book_name) print("- - -") # table of content (ordered list of markdown files) sum_p = os.path.join(_BOOKS_SRC, book, "SUMMARY.md") if not os.path.isfile(sum_p): print("No summary file, can't generate html") return sum_f = open(sum_p) sum_str = sum_f.read() # print(sum_str) # convert md to html sum_html = markdown.markdown(sum_str) # print(sum_html) # create dom from html string (as it will be parsable) sum_dom = BeautifulSoup(sum_html, 'html.parser') # print(sum_dom) # parse html dom to get file list in the right order toc = parse_summary(sum_dom.ul, {}) # print(toc) # generate final html build for html2print generate_html(book, toc, book_name) def parse_summary(ul, toc): i=0 for li in ul.find_all('li',recursive=False): # print('li') for a in li.find_all('a',recursive=False): # print(a.get_text(strip=True)) # print(a['href']) href = a['href'] href = re.sub(r'^/', '', href) toc[i] = { 'label':a.get_text(strip=True), 'file':href } i = i+1 return toc def generate_html(book, toc, book_name): # build directory destination book_build_d = os.path.join(_BUILD_d,book_name) if os.path.isdir(book_build_d): shutil.rmtree(book_build_d, ignore_errors=True) os.mkdir(book_build_d) # # create main html dom from template template_f = open("templates/main.tpl.html", "r") template_html = template_f.read() template_dom = BeautifulSoup(template_html, 'html.parser') # replace title template_dom.html.head.title.contents[0].replaceWith(book_name) # get story div story_dom = template_dom.find('div', {"id":"my-story"}) # # loop through pages to convert them to html and add it to main html file book_build_d_pages = os.path.join(book_build_d,'pages') os.mkdir(book_build_d_pages) for p in toc: # print(toc[p]['file']) # files in_f = os.path.join(_BOOKS_SRC, book, toc[p]['file']) if not os.path.isfile(in_f): print("Source path is not a file, can't generate html : "+in_f) continue # print('in_f : '+in_f) # out_f = os.path.join(book_build_d_pages, toc[p]['file'].replace('/', '-').replace('.md', '.html')) # print('out_f : '+out_f) pdoc_args = ['--mathjax', '--smart'] output = pypandoc.convert_file(in_f, to='html5', format='md', extra_args=pdoc_args) # filters=filters, # outputfile=out_f) # append html story page to template_dom story_page = BeautifulSoup('
') story_page.div.append(BeautifulSoup(output)) story_dom.append(story_page) # create main html file from filled template html dom book_html_f = os.path.join(book_build_d,book_name+'.html') with open(book_html_f, 'w') as fp: fp.write(template_dom.prettify()) book_toc = { 'label':book_name, 'file':book_html_f } global _TOC _TOC.append(book_toc) if __name__ == "__main__": main()