Browse Source

first draft of python buider converting md from gitbook to html with pypandoc

Bachir Soussi Chiadmi 7 years ago
parent
commit
c81294b930
2 changed files with 116 additions and 0 deletions
  1. 1 0
      .gitignore
  2. 115 0
      build.py

+ 1 - 0
.gitignore

@@ -2,3 +2,4 @@ ospkit.src
 OSPKit
 node_modules
 book-src/*.git
+build

+ 115 - 0
build.py

@@ -0,0 +1,115 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+import sys, os
+import shutil
+import markdown
+# import mistune
+from bs4 import BeautifulSoup
+import pypandoc
+import json
+import re
+
+# import json
+# import getopt
+# import urllib
+
+BOOKS_SRC = 'book-src'
+BUILD_d = "build"
+
+def main():
+   if not os.path.isdir(BUILD_d):
+      os.mkdir(BUILD_d)
+
+   for book in os.listdir(BOOKS_SRC):
+      if os.path.isdir(os.path.join(BOOKS_SRC, book)):
+         # print(book)
+         parse_book(book)
+
+
+def parse_book(book):
+   book_name = book.replace('.git', '')
+   print("- - -")
+   print(book_name)
+   print("- - -")
+
+   # build destination
+   book_build_d = os.path.join(BUILD_d,book_name)
+   if os.path.isdir(book_build_d):
+      shutil.rmtree(book_build_d, ignore_errors=True)
+   os.mkdir(book_build_d)
+
+   # table of content (ordered list of markdown files)
+   sum_p = os.path.join(BOOKS_SRC, book, "SUMMARY.md")
+   if not os.path.isfile(sum_p):
+      print("No summary file, can't generate html")
+      return
+
+   sum_f = open(sum_p)
+   sum_str = sum_f.read()
+   # print(sum_str)
+   sum_html = markdown.markdown(sum_str)
+   # print(sum_html)
+   sum_dom = BeautifulSoup(sum_html, 'html.parser')
+   # print(sum_dom)
+   toc = parse_summary(sum_dom.ul, {})
+   print(toc)
+
+   generate_html(book, toc, book_build_d)
+
+def parse_summary(ul, toc):
+   i=0
+   for li in ul.find_all('li',recursive=False):
+      # print('li')
+      for a in li.find_all('a',recursive=False):
+         # print(a.get_text(strip=True))
+         # print(a['href'])
+         href = a['href']
+         href = re.sub(r'^/', '', href)
+         toc[i] = {
+            'label':a.get_text(strip=True),
+            'file':href
+         }
+      i = i+1
+
+   return toc
+
+def generate_html(book, toc, book_build_d):
+   railway = []
+   for p in toc:
+      print(toc[p]['file'])
+      # generate html with pandoc
+
+      # files
+      md_f = toc[p]['file']
+      html_f = md_f.replace('.md', '.html')
+      html_f = html_f.replace('README', 'index')
+      html_f = html_f.replace('/', '-')
+      in_f = os.path.join(BOOKS_SRC, book, md_f)
+      print(in_f)
+      out_f = os.path.join(book_build_d,html_f)
+      print(out_f)
+
+      # pandoc options
+      filters = []
+      pdoc_args = ['--mathjax',
+                   '--smart']
+
+      # pandoc command line
+      output = pypandoc.convert_file(in_f,
+                               to='html5',
+                               format='md',
+                               extra_args=pdoc_args,
+                               filters=filters,
+                               outputfile=out_f)
+
+      # save reference in railway
+      railway.append({'label':toc[p]['label'], 'file':html_f})
+
+   # save railway as json file
+   with open(book_build_d+'/railway.json', 'w') as fp:
+      json.dump(railway, fp, ensure_ascii=False, indent="\t")
+
+
+if __name__ == "__main__":
+   main()