build.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import sys, os
  4. import shutil
  5. import markdown
  6. # import mistune
  7. from bs4 import BeautifulSoup
  8. import pypandoc
  9. import json
  10. import re
  11. # import json
  12. # import getopt
  13. # import urllib
  14. BOOKS_SRC = 'book-src'
  15. BUILD_d = "build"
  16. CUR_PATH = os.path.dirname(os.path.abspath(__file__))
  17. def main():
  18. if not os.path.isdir(BUILD_d):
  19. os.mkdir(BUILD_d)
  20. for book in os.listdir(BOOKS_SRC):
  21. if os.path.isdir(os.path.join(BOOKS_SRC, book)):
  22. # print(book)
  23. parse_book(book)
  24. def parse_book(book):
  25. book_name = book.replace('.git', '')
  26. print("- - -")
  27. print(book_name)
  28. print("- - -")
  29. # build destination
  30. book_build_d = os.path.join(BUILD_d,book_name)
  31. if os.path.isdir(book_build_d):
  32. shutil.rmtree(book_build_d, ignore_errors=True)
  33. os.mkdir(book_build_d)
  34. # table of content (ordered list of markdown files)
  35. sum_p = os.path.join(BOOKS_SRC, book, "SUMMARY.md")
  36. if not os.path.isfile(sum_p):
  37. print("No summary file, can't generate html")
  38. return
  39. sum_f = open(sum_p)
  40. sum_str = sum_f.read()
  41. # print(sum_str)
  42. sum_html = markdown.markdown(sum_str)
  43. # print(sum_html)
  44. sum_dom = BeautifulSoup(sum_html, 'html.parser')
  45. # print(sum_dom)
  46. toc = parse_summary(sum_dom.ul, {})
  47. # print(toc)
  48. generate_html(book, toc, book_build_d)
  49. def parse_summary(ul, toc):
  50. i=0
  51. for li in ul.find_all('li',recursive=False):
  52. # print('li')
  53. for a in li.find_all('a',recursive=False):
  54. # print(a.get_text(strip=True))
  55. # print(a['href'])
  56. href = a['href']
  57. href = re.sub(r'^/', '', href)
  58. toc[i] = {
  59. 'label':a.get_text(strip=True),
  60. 'file':href
  61. }
  62. i = i+1
  63. return toc
  64. def generate_html(book, toc, book_build_d):
  65. railway = []
  66. for p in toc:
  67. # print(toc[p]['file'])
  68. # generate html with pandoc
  69. # files
  70. md_f = toc[p]['file']
  71. html_f = md_f.replace('.md', '.html')
  72. html_f = html_f.replace('README', 'index')
  73. html_f = html_f.replace('/', '-')
  74. in_f = os.path.join(BOOKS_SRC, book, md_f)
  75. # print(in_f)
  76. out_f = os.path.join(book_build_d,html_f)
  77. # print(out_f)
  78. # pandoc options
  79. # filters = []
  80. pdoc_args = ['-s',
  81. '--mathjax',
  82. '--smart',
  83. '--css=assets/fonts/amiri/amiri.css',
  84. '--css=assets/css/dist/styles.css',
  85. '--include-before-body=templates/top.tpl.html',
  86. '--include-after-body=templates/bot.tpl.html',
  87. '--include-after-body=assets/js/jquery.min.js',
  88. '--include-after-body=assets/js/script.js',
  89. '--include-after-body=templates/end.tpl.html']
  90. # pandoc command line
  91. # print(pypandoc.get_pandoc_version())
  92. output = pypandoc.convert_file(in_f,
  93. to='html5',
  94. format='md',
  95. extra_args=pdoc_args,
  96. # filters=filters,
  97. outputfile=out_f)
  98. # save reference in railway
  99. railway.append({'label':toc[p]['label'], 'file':html_f})
  100. # save railway as json file
  101. with open(book_build_d+'/railway.json', 'w') as fp:
  102. json.dump(railway, fp, ensure_ascii=False, indent="\t")
  103. if __name__ == "__main__":
  104. main()