build.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. # @Author: Bachir Soussi Chiadmi <bach>
  4. # @Date: 27-03-2017
  5. # @Email: bachir@figureslibres.io
  6. # @Last modified by: bach
  7. # @Last modified time: 21-04-2017
  8. # @License: GPL-V3
  9. import sys, os
  10. import shutil
  11. import markdown
  12. # import mistune
  13. from bs4 import BeautifulSoup
  14. import pypandoc
  15. import json
  16. import re
  17. BOOKS_SRC = 'book-src'
  18. BUILD_d = "build"
  19. CUR_PATH = os.path.dirname(os.path.abspath(__file__))
  20. _TOC = []
  21. def main():
  22. print("Building books")
  23. if not os.path.isdir(BUILD_d):
  24. os.mkdir(BUILD_d)
  25. # loop through books sources
  26. for book in os.listdir(BOOKS_SRC):
  27. if os.path.isdir(os.path.join(BOOKS_SRC, book)):
  28. # print(book)
  29. parse_book(book)
  30. with open(BUILD_d+'/toc.json', 'w') as fp:
  31. json.dump(_TOC, fp, ensure_ascii=False, indent="\t")
  32. def parse_book(book):
  33. book_name = book.replace('.git', '')
  34. print("- - -")
  35. print(book_name)
  36. print("- - -")
  37. # table of content (ordered list of markdown files)
  38. sum_p = os.path.join(BOOKS_SRC, book, "SUMMARY.md")
  39. if not os.path.isfile(sum_p):
  40. print("No summary file, can't generate html")
  41. return
  42. sum_f = open(sum_p)
  43. sum_str = sum_f.read()
  44. # print(sum_str)
  45. # convert md to html
  46. sum_html = markdown.markdown(sum_str)
  47. # print(sum_html)
  48. # create dom from html string (as it will be parsable)
  49. sum_dom = BeautifulSoup(sum_html, 'html.parser')
  50. # print(sum_dom)
  51. # parse html dom to get file list in the right order
  52. toc = parse_summary(sum_dom.ul, {})
  53. # print(toc)
  54. # generate final html build for html2print
  55. generate_html(book, toc, book_name)
  56. def parse_summary(ul, toc):
  57. i=0
  58. for li in ul.find_all('li',recursive=False):
  59. # print('li')
  60. for a in li.find_all('a',recursive=False):
  61. # print(a.get_text(strip=True))
  62. # print(a['href'])
  63. href = a['href']
  64. href = re.sub(r'^/', '', href)
  65. toc[i] = {
  66. 'label':a.get_text(strip=True),
  67. 'file':href
  68. }
  69. i = i+1
  70. return toc
  71. def generate_html(book, toc, book_name):
  72. # build destination
  73. book_build_d = os.path.join(BUILD_d,book_name)
  74. if os.path.isdir(book_build_d):
  75. shutil.rmtree(book_build_d, ignore_errors=True)
  76. os.mkdir(book_build_d)
  77. book_toc = {
  78. 'label':book_name,
  79. 'pages':[]
  80. }
  81. for p in toc:
  82. # print(toc[p]['file'])
  83. # generate html with pandoc
  84. # files
  85. md_f = toc[p]['file']
  86. in_f = os.path.join(BOOKS_SRC, book, md_f)
  87. if not os.path.isfile(in_f):
  88. print("Source path is not a file, can't generate html : "+in_f)
  89. continue
  90. # print('in_f : '+in_f)
  91. html_f = md_f.replace('.md', '.html')
  92. html_f = html_f.replace('README', 'index')
  93. html_f = html_f.replace('/', '-')
  94. out_f = os.path.join(book_build_d,html_f)
  95. # print('out_f : '+out_f)
  96. # pandoc options
  97. # filters = []
  98. pdoc_args = ['-s',
  99. '--mathjax',
  100. '--smart',
  101. '--css=../../assets/fonts/amiri/amiri.css',
  102. '--css=../../assets/css/dist/main.css',
  103. '--include-before-body=templates/top.tpl.html',
  104. '--include-after-body=templates/bot.tpl.html',
  105. '--include-after-body=assets/lib/jquery.min.js',
  106. '--include-after-body=assets/js/setup.js',
  107. '--include-after-body=assets/js/html2print.js',
  108. '--include-after-body=assets/js/script.js',
  109. '--include-after-body=templates/end.tpl.html']
  110. # pandoc command line
  111. # print(pypandoc.get_pandoc_version())
  112. output = pypandoc.convert_file(in_f,
  113. to='html5',
  114. format='md',
  115. extra_args=pdoc_args,
  116. # filters=filters,
  117. outputfile=out_f)
  118. # save reference in railway
  119. book_toc['pages'].append({'label':toc[p]['label'], 'file':html_f})
  120. global _TOC
  121. _TOC.append(book_toc)
  122. # save railway as json file
  123. # with open(book_build_d+'/railway.json', 'w') as fp:
  124. # json.dump(railway, fp, ensure_ascii=False, indent="\t")
  125. if __name__ == "__main__":
  126. main()