123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172 |
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- # @Author: Bachir Soussi Chiadmi <bach>
- # @Date: 27-03-2017
- # @Email: bachir@figureslibres.io
- # @Last modified by: bach
- # @Last modified time: 21-04-2017
- # @License: GPL-V3
- import sys, os, shutil
- import markdown
- # import mistune
- from bs4 import BeautifulSoup
- import pypandoc
- import json
- import re
- _BOOK_SRC = 'book-src'
- _BUILD_d = "build"
- # CUR_PATH = os.path.dirname(os.path.abspath(__file__))
- print("Building book")
- def main():
- # clean build directory
- if os.path.isdir(_BUILD_d):
- shutil.rmtree(_BUILD_d, ignore_errors=True)
- os.mkdir(_BUILD_d)
- parse_book(_BOOK_SRC)
- def parse_book(book):
- # book_name = book.replace('.git', '')
- # print("- - -")
- print("Parse book")
- # print("- - -")
- # table of content (ordered list of markdown files)
- sum_p = os.path.join(_BOOK_SRC, "SUMMARY.md")
- if not os.path.isfile(sum_p):
- print("No summary file, can't generate html")
- return
- sum_f = open(sum_p)
- sum_str = sum_f.read()
- # print(sum_str)
- # convert md to html
- sum_html = markdown.markdown(sum_str)
- # print(sum_html)
- # create dom from html string (as it will be parsable)
- sum_dom = BeautifulSoup(sum_html, 'html.parser')
- # print(sum_dom)
- # parse html dom to get file list in the right order
- toc = parse_summary(sum_dom.ul, {})
- # print(toc)
- # generate final html build for html2print
- generate_html(book, toc)
- def parse_summary(ul, toc):
- print("Parse summary")
- i=0
- for li in ul.find_all('li',recursive=False):
- # print('li')
- for a in li.find_all('a',recursive=False):
- # print(a.get_text(strip=True))
- # print(a['href'])
- href = a['href']
- href = re.sub(r'^/', '', href)
- toc[i] = {
- 'label':a.get_text(strip=True),
- 'file':href
- }
- i = i+1
- return toc
- def generate_html(book, toc):
- print("Generate html")
- #
- # create main html dom from template
- template_f = open("templates/main.tpl.html", "r")
- template_html = template_f.read()
- template_dom = BeautifulSoup(template_html, 'html.parser')
- # replace title
- # template_dom.html.head.title.contents[0].replaceWith(book_name)
- # get story div
- story_dom = template_dom.find('div', {"id":"flow-main"})
- #
- # loop through pages to convert them to html and add it to main html file
- # book_build_d_pages = os.path.join(_BUILD_d,'pages')
- # os.mkdir(book_build_d_pages)
- pi = 0
- for p in toc:
- # print(toc[p])
- pagename = toc[p]['label']
- pageid = re.sub('[^a-z0-9]+', '-', pagename.lower())
- print(pageid)
- # files
- in_f = os.path.join(_BOOK_SRC, toc[p]['file'])
- if not os.path.isfile(in_f):
- print("Source path is not a file, can't generate html : "+in_f)
- continue
- # print('in_f : '+in_f)
- # out_f = os.path.join(book_build_d_pages, toc[p]['file'].replace('/', '-').replace('.md', '.html'))
- # print('out_f : '+out_f)
- pdoc_args = ['--mathjax',
- '--smart']
- pdoc_filters = []
- output = pypandoc.convert_file(in_f,
- to='html5',
- format='markdown+header_attributes+link_attributes+bracketed_spans',
- extra_args=pdoc_args,
- filters=pdoc_filters)
- # outputfile=out_f)
- output_dom = BeautifulSoup(output, 'html.parser')
- # copy images
- for img in output_dom.find_all('img'):
- # print('-- img ',img)
- att_src = re.sub(r"^\/", "", img['src'])
- img['src'] = att_src
- # domimg = output_dom.find('img', {'src':img['src']})
- # domimg['src'] = att_src
- # print(domimg)
- src_img = os.path.join(_BOOK_SRC, att_src)
- # print('- - '+src_img)
- if not os.path.isfile(src_img):
- print("Source path is not a file, can't copy img : "+src_img)
- continue
- dest_img = os.path.join(_BUILD_d, att_src)
- # print('- - '+dest_img)
- dest_path, dest_file = os.path.split(dest_img)
- if not os.path.isdir(dest_path):
- os.makedirs(dest_path)
- shutil.copyfile(src_img, dest_img)
- # append html story page to template_dom
- story_page = BeautifulSoup('<div class="story-page story-page-'+str(pi)+'" id="'+pageid+'"></div>', 'html.parser')
- story_page.div.append(output_dom)
- story_dom.append(story_page)
- pi = pi+1
- # create main html file from filled template html dom
- book_html_f = os.path.join(_BUILD_d,'stories.html')
- with open(book_html_f, 'w') as fp:
- fp.write(template_dom.prettify())
- if __name__ == "__main__":
- main()
|