bachir
/
gitbook-html2print


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
							#!/usr/bin/python
# -*- coding: utf-8 -*-


# @Author: Bachir Soussi Chiadmi <bach>
# @Date:   27-03-2017
# @Email:  bachir@figureslibres.io
# @Last modified by:   bach
# @Last modified time: 21-04-2017
# @License: GPL-V3


import sys, os
import shutil
import markdown
# import mistune
from bs4 import BeautifulSoup
import pypandoc
import json
import re

_BOOKS_SRC = 'book-src'
_BUILD_d = "build"
_TOC = []
# CUR_PATH = os.path.dirname(os.path.abspath(__file__))

def main():
   print("Building books")
   if not os.path.isdir(_BUILD_d):
      os.mkdir(_BUILD_d)

   # loop through books sources
   for book in os.listdir(_BOOKS_SRC):
      if os.path.isdir(os.path.join(_BOOKS_SRC, book)):
         # print(book)
         parse_book(book)

   with open(_BUILD_d+'/toc.json', 'w') as fp:
      json.dump(_TOC, fp, ensure_ascii=False, indent="\t")


def parse_book(book):
   book_name = book.replace('.git', '')
   print("- - -")
   print(book_name)
   print("- - -")

   # table of content (ordered list of markdown files)
   sum_p = os.path.join(_BOOKS_SRC, book, "SUMMARY.md")
   if not os.path.isfile(sum_p):
      print("No summary file, can't generate html")
      return

   sum_f = open(sum_p)
   sum_str = sum_f.read()
   # print(sum_str)
   # convert md to html
   sum_html = markdown.markdown(sum_str)
   # print(sum_html)
   # create dom from html string (as it will be parsable)
   sum_dom = BeautifulSoup(sum_html, 'html.parser')
   # print(sum_dom)
   # parse html dom to get file list in the right order
   toc = parse_summary(sum_dom.ul, {})
   # print(toc)

   # generate final html build for html2print
   generate_html(book, toc, book_name)

def parse_summary(ul, toc):
   i=0
   for li in ul.find_all('li',recursive=False):
      # print('li')
      for a in li.find_all('a',recursive=False):
         # print(a.get_text(strip=True))
         # print(a['href'])
         href = a['href']
         href = re.sub(r'^/', '', href)
         toc[i] = {
            'label':a.get_text(strip=True),
            'file':href
         }
      i = i+1

   return toc


def generate_html(book, toc, book_name):
   # build directory destination
   book_build_d = os.path.join(_BUILD_d,book_name)
   if os.path.isdir(book_build_d):
      shutil.rmtree(book_build_d, ignore_errors=True)
   os.mkdir(book_build_d)

   # main markdown book file where all pages will be merge
   book_md_f = os.path.join(book_build_d,book_name+'.md')

   for p in toc:
      # print(toc[p]['file'])

      # files
      in_f = os.path.join(_BOOKS_SRC, book, toc[p]['file'])
      if not os.path.isfile(in_f):
         print("Source path is not a file, can't generate html : "+in_f)
         continue
      # print('in_f : '+in_f)

      md_f = open(in_f, 'r')
      with open(book_md_f, 'a') as fp:
         fp.write(md_f.read())


   # generate html with pandoc

   # create the html file name
   html_f = book_md_f.replace('.md', '.html')
   # print('out_f : '+out_f)

   # pandoc options
   # filters = []

   pdoc_args = ['-s',
                '--mathjax',
                '--smart',
                '--css=../../assets/fonts/amiri/amiri.css',
                '--css=../../assets/css/dist/main.css',
                '--include-before-body=templates/top.tpl.html',
                '--include-after-body=templates/bot.tpl.html',
                '--include-after-body=assets/lib/jquery.min.js',
                '--include-after-body=assets/js/setup.js',
                '--include-after-body=assets/js/html2print.js',
                '--include-after-body=assets/js/script.js',
                '--include-after-body=templates/end.tpl.html']

   # pandoc command line
   # print(pypandoc.get_pandoc_version())
   output = pypandoc.convert_file(book_md_f,
                            to='html5',
                            format='md',
                            extra_args=pdoc_args,
                           #  filters=filters,
                            outputfile=html_f)

   book_toc = {
      'label':book_name,
      'file':html_f
   }

   global _TOC
   _TOC.append(book_toc)


if __name__ == "__main__":
   main()