build.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. #!/usr/bin/python
  2. # -*- coding: utf-8 -*-
  3. # @Author: Bachir Soussi Chiadmi <bach>
  4. # @Date: 27-03-2017
  5. # @Email: bachir@figureslibres.io
  6. # @Last modified by: bach
  7. # @Last modified time: 21-04-2017
  8. # @License: GPL-V3
  9. import sys, os, shutil
  10. import markdown
  11. # import mistune
  12. from bs4 import BeautifulSoup
  13. import pypandoc
  14. import json
  15. import re
  16. _BOOK_SRC = 'book-src'
  17. _BUILD_d = "build"
  18. # CUR_PATH = os.path.dirname(os.path.abspath(__file__))
  19. print("Building book")
  20. def main():
  21. # clean build directory
  22. if os.path.isdir(_BUILD_d):
  23. shutil.rmtree(_BUILD_d, ignore_errors=True)
  24. os.mkdir(_BUILD_d)
  25. parse_book(_BOOK_SRC)
  26. def parse_book(book):
  27. # book_name = book.replace('.git', '')
  28. # print("- - -")
  29. print("Parse book")
  30. # print("- - -")
  31. # table of content (ordered list of markdown files)
  32. sum_p = os.path.join(_BOOK_SRC, "SUMMARY.md")
  33. if not os.path.isfile(sum_p):
  34. print("No summary file, can't generate html")
  35. return
  36. sum_f = open(sum_p)
  37. sum_str = sum_f.read()
  38. # print(sum_str)
  39. # convert md to html
  40. sum_html = markdown.markdown(sum_str)
  41. # print(sum_html)
  42. # create dom from html string (as it will be parsable)
  43. sum_dom = BeautifulSoup(sum_html, 'html.parser')
  44. # print(sum_dom)
  45. # parse html dom to get file list in the right order
  46. toc = parse_summary(sum_dom.ul, {})
  47. # print(toc)
  48. # generate final html build for html2print
  49. generate_html(book, toc)
  50. def parse_summary(ul, toc):
  51. print("Parse summary")
  52. i=0
  53. for li in ul.find_all('li',recursive=False):
  54. # print('li')
  55. for a in li.find_all('a',recursive=False):
  56. # print(a.get_text(strip=True))
  57. # print(a['href'])
  58. href = a['href']
  59. href = re.sub(r'^/', '', href)
  60. toc[i] = {
  61. 'label':a.get_text(strip=True),
  62. 'file':href
  63. }
  64. i = i+1
  65. return toc
  66. def generate_html(book, toc):
  67. print("Generate html")
  68. #
  69. # create main html dom from template
  70. template_f = open("templates/main.tpl.html", "r")
  71. template_html = template_f.read()
  72. template_dom = BeautifulSoup(template_html, 'html.parser')
  73. # replace title
  74. # template_dom.html.head.title.contents[0].replaceWith(book_name)
  75. # get story div
  76. story_dom = template_dom.find('div', {"id":"flow-main"})
  77. #
  78. # loop through pages to convert them to html and add it to main html file
  79. # book_build_d_pages = os.path.join(_BUILD_d,'pages')
  80. # os.mkdir(book_build_d_pages)
  81. pi = 0
  82. for p in toc:
  83. # print(toc[p])
  84. pagename = toc[p]['label']
  85. pageid = re.sub('[^a-z0-9]+', '-', pagename.lower())
  86. print(pageid)
  87. # files
  88. in_f = os.path.join(_BOOK_SRC, toc[p]['file'])
  89. if not os.path.isfile(in_f):
  90. print("Source path is not a file, can't generate html : "+in_f)
  91. continue
  92. # print('in_f : '+in_f)
  93. # out_f = os.path.join(book_build_d_pages, toc[p]['file'].replace('/', '-').replace('.md', '.html'))
  94. # print('out_f : '+out_f)
  95. pdoc_args = ['--mathjax',
  96. '--smart']
  97. pdoc_filters = []
  98. output = pypandoc.convert_file(in_f,
  99. to='html5',
  100. format='markdown+header_attributes+link_attributes+bracketed_spans',
  101. extra_args=pdoc_args,
  102. filters=pdoc_filters)
  103. # outputfile=out_f)
  104. output_dom = BeautifulSoup(output, 'html.parser')
  105. # copy images
  106. for img in output_dom.find_all('img'):
  107. # print('-- img ',img)
  108. att_src = re.sub(r"^\/", "", img['src'])
  109. img['src'] = att_src
  110. # domimg = output_dom.find('img', {'src':img['src']})
  111. # domimg['src'] = att_src
  112. # print(domimg)
  113. src_img = os.path.join(_BOOK_SRC, att_src)
  114. # print('- - '+src_img)
  115. if not os.path.isfile(src_img):
  116. print("Source path is not a file, can't copy img : "+src_img)
  117. continue
  118. dest_img = os.path.join(_BUILD_d, att_src)
  119. # print('- - '+dest_img)
  120. dest_path, dest_file = os.path.split(dest_img)
  121. if not os.path.isdir(dest_path):
  122. os.makedirs(dest_path)
  123. shutil.copyfile(src_img, dest_img)
  124. # append html story page to template_dom
  125. story_page = BeautifulSoup('<div class="story-page story-page-'+str(pi)+'" id="'+pageid+'"></div>', 'html.parser')
  126. story_page.div.append(output_dom)
  127. story_dom.append(story_page)
  128. pi = pi+1
  129. # create main html file from filled template html dom
  130. book_html_f = os.path.join(_BUILD_d,'stories.html')
  131. with open(book_html_f, 'w') as fp:
  132. fp.write(template_dom.prettify())
  133. if __name__ == "__main__":
  134. main()