build.py 2.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. import sys, os
  4. import shutil
  5. import markdown
  6. # import mistune
  7. from bs4 import BeautifulSoup
  8. import pypandoc
  9. import json
  10. import re
  11. # import json
  12. # import getopt
  13. # import urllib
  14. BOOKS_SRC = 'book-src'
  15. BUILD_d = "build"
  16. def main():
  17. if not os.path.isdir(BUILD_d):
  18. os.mkdir(BUILD_d)
  19. for book in os.listdir(BOOKS_SRC):
  20. if os.path.isdir(os.path.join(BOOKS_SRC, book)):
  21. # print(book)
  22. parse_book(book)
  23. def parse_book(book):
  24. book_name = book.replace('.git', '')
  25. print("- - -")
  26. print(book_name)
  27. print("- - -")
  28. # build destination
  29. book_build_d = os.path.join(BUILD_d,book_name)
  30. if os.path.isdir(book_build_d):
  31. shutil.rmtree(book_build_d, ignore_errors=True)
  32. os.mkdir(book_build_d)
  33. # table of content (ordered list of markdown files)
  34. sum_p = os.path.join(BOOKS_SRC, book, "SUMMARY.md")
  35. if not os.path.isfile(sum_p):
  36. print("No summary file, can't generate html")
  37. return
  38. sum_f = open(sum_p)
  39. sum_str = sum_f.read()
  40. # print(sum_str)
  41. sum_html = markdown.markdown(sum_str)
  42. # print(sum_html)
  43. sum_dom = BeautifulSoup(sum_html, 'html.parser')
  44. # print(sum_dom)
  45. toc = parse_summary(sum_dom.ul, {})
  46. print(toc)
  47. generate_html(book, toc, book_build_d)
  48. def parse_summary(ul, toc):
  49. i=0
  50. for li in ul.find_all('li',recursive=False):
  51. # print('li')
  52. for a in li.find_all('a',recursive=False):
  53. # print(a.get_text(strip=True))
  54. # print(a['href'])
  55. href = a['href']
  56. href = re.sub(r'^/', '', href)
  57. toc[i] = {
  58. 'label':a.get_text(strip=True),
  59. 'file':href
  60. }
  61. i = i+1
  62. return toc
  63. def generate_html(book, toc, book_build_d):
  64. railway = []
  65. for p in toc:
  66. print(toc[p]['file'])
  67. # generate html with pandoc
  68. # files
  69. md_f = toc[p]['file']
  70. html_f = md_f.replace('.md', '.html')
  71. html_f = html_f.replace('README', 'index')
  72. html_f = html_f.replace('/', '-')
  73. in_f = os.path.join(BOOKS_SRC, book, md_f)
  74. print(in_f)
  75. out_f = os.path.join(book_build_d,html_f)
  76. print(out_f)
  77. # pandoc options
  78. filters = []
  79. pdoc_args = ['--mathjax',
  80. '--smart']
  81. # pandoc command line
  82. output = pypandoc.convert_file(in_f,
  83. to='html5',
  84. format='md',
  85. extra_args=pdoc_args,
  86. filters=filters,
  87. outputfile=out_f)
  88. # save reference in railway
  89. railway.append({'label':toc[p]['label'], 'file':html_f})
  90. # save railway as json file
  91. with open(book_build_d+'/railway.json', 'w') as fp:
  92. json.dump(railway, fp, ensure_ascii=False, indent="\t")
  93. if __name__ == "__main__":
  94. main()