123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115 |
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- import sys, os
- import shutil
- import markdown
- # import mistune
- from bs4 import BeautifulSoup
- import pypandoc
- import json
- import re
- # import json
- # import getopt
- # import urllib
- BOOKS_SRC = 'book-src'
- BUILD_d = "build"
- def main():
- if not os.path.isdir(BUILD_d):
- os.mkdir(BUILD_d)
- for book in os.listdir(BOOKS_SRC):
- if os.path.isdir(os.path.join(BOOKS_SRC, book)):
- # print(book)
- parse_book(book)
- def parse_book(book):
- book_name = book.replace('.git', '')
- print("- - -")
- print(book_name)
- print("- - -")
- # build destination
- book_build_d = os.path.join(BUILD_d,book_name)
- if os.path.isdir(book_build_d):
- shutil.rmtree(book_build_d, ignore_errors=True)
- os.mkdir(book_build_d)
- # table of content (ordered list of markdown files)
- sum_p = os.path.join(BOOKS_SRC, book, "SUMMARY.md")
- if not os.path.isfile(sum_p):
- print("No summary file, can't generate html")
- return
- sum_f = open(sum_p)
- sum_str = sum_f.read()
- # print(sum_str)
- sum_html = markdown.markdown(sum_str)
- # print(sum_html)
- sum_dom = BeautifulSoup(sum_html, 'html.parser')
- # print(sum_dom)
- toc = parse_summary(sum_dom.ul, {})
- print(toc)
- generate_html(book, toc, book_build_d)
- def parse_summary(ul, toc):
- i=0
- for li in ul.find_all('li',recursive=False):
- # print('li')
- for a in li.find_all('a',recursive=False):
- # print(a.get_text(strip=True))
- # print(a['href'])
- href = a['href']
- href = re.sub(r'^/', '', href)
- toc[i] = {
- 'label':a.get_text(strip=True),
- 'file':href
- }
- i = i+1
- return toc
- def generate_html(book, toc, book_build_d):
- railway = []
- for p in toc:
- print(toc[p]['file'])
- # generate html with pandoc
- # files
- md_f = toc[p]['file']
- html_f = md_f.replace('.md', '.html')
- html_f = html_f.replace('README', 'index')
- html_f = html_f.replace('/', '-')
- in_f = os.path.join(BOOKS_SRC, book, md_f)
- print(in_f)
- out_f = os.path.join(book_build_d,html_f)
- print(out_f)
- # pandoc options
- filters = []
- pdoc_args = ['--mathjax',
- '--smart']
- # pandoc command line
- output = pypandoc.convert_file(in_f,
- to='html5',
- format='md',
- extra_args=pdoc_args,
- filters=filters,
- outputfile=out_f)
- # save reference in railway
- railway.append({'label':toc[p]['label'], 'file':html_f})
- # save railway as json file
- with open(book_build_d+'/railway.json', 'w') as fp:
- json.dump(railway, fp, ensure_ascii=False, indent="\t")
- if __name__ == "__main__":
- main()
|