#!/usr/bin/python # -*- coding: utf-8 -*- import sys, os import shutil import markdown # import mistune from bs4 import BeautifulSoup import pypandoc import json import re # import json # import getopt # import urllib BOOKS_SRC = 'book-src' BUILD_d = "build" def main(): if not os.path.isdir(BUILD_d): os.mkdir(BUILD_d) for book in os.listdir(BOOKS_SRC): if os.path.isdir(os.path.join(BOOKS_SRC, book)): # print(book) parse_book(book) def parse_book(book): book_name = book.replace('.git', '') print("- - -") print(book_name) print("- - -") # build destination book_build_d = os.path.join(BUILD_d,book_name) if os.path.isdir(book_build_d): shutil.rmtree(book_build_d, ignore_errors=True) os.mkdir(book_build_d) # table of content (ordered list of markdown files) sum_p = os.path.join(BOOKS_SRC, book, "SUMMARY.md") if not os.path.isfile(sum_p): print("No summary file, can't generate html") return sum_f = open(sum_p) sum_str = sum_f.read() # print(sum_str) sum_html = markdown.markdown(sum_str) # print(sum_html) sum_dom = BeautifulSoup(sum_html, 'html.parser') # print(sum_dom) toc = parse_summary(sum_dom.ul, {}) print(toc) generate_html(book, toc, book_build_d) def parse_summary(ul, toc): i=0 for li in ul.find_all('li',recursive=False): # print('li') for a in li.find_all('a',recursive=False): # print(a.get_text(strip=True)) # print(a['href']) href = a['href'] href = re.sub(r'^/', '', href) toc[i] = { 'label':a.get_text(strip=True), 'file':href } i = i+1 return toc def generate_html(book, toc, book_build_d): railway = [] for p in toc: print(toc[p]['file']) # generate html with pandoc # files md_f = toc[p]['file'] html_f = md_f.replace('.md', '.html') html_f = html_f.replace('README', 'index') html_f = html_f.replace('/', '-') in_f = os.path.join(BOOKS_SRC, book, md_f) print(in_f) out_f = os.path.join(book_build_d,html_f) print(out_f) # pandoc options filters = [] pdoc_args = ['--mathjax', '--smart'] # pandoc command line output = pypandoc.convert_file(in_f, to='html5', format='md', extra_args=pdoc_args, filters=filters, outputfile=out_f) # save reference in railway railway.append({'label':toc[p]['label'], 'file':html_f}) # save railway as json file with open(book_build_d+'/railway.json', 'w') as fp: json.dump(railway, fp, ensure_ascii=False, indent="\t") if __name__ == "__main__": main()