nb_utils.py 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. # -*- coding: utf-8 -*-
  2. import os
  3. import re
  4. import sys
  5. import shutil
  6. from subprocess import run, CalledProcessError
  7. from tempfile import TemporaryDirectory
  8. from pathlib import Path
  9. from lxml import etree
  10. from nbformat import read, NO_CONVERT
  11. from itertools import chain, zip_longest
  12. from jinja2 import Template
  13. from datetime import datetime
  14. title_font_size = 11
  15. item_font_size = 9
  16. head_margin = 3
  17. text_margin = 2
  18. SVG_TEXT = '{http://www.w3.org/2000/svg}text'
  19. SVG_RECT = '{http://www.w3.org/2000/svg}rect'
  20. def parse_headers(nb_path):
  21. nb = read(str(nb_path), as_version=NO_CONVERT)
  22. # Notebookのセルからmarkdownの部分を取り出し、行ごとのリストにする
  23. lines = [
  24. line.strip()
  25. for line in chain.from_iterable(
  26. cell['source'].split('\n')
  27. for cell in nb.cells
  28. if cell['cell_type'] == 'markdown'
  29. )
  30. if len(line.strip()) > 0 and not line.startswith('---')
  31. ]
  32. # h1, h2 の行とその次行の最初の1文を取り出す
  33. headers = [
  34. (' '.join(line0.split()[1:]),
  35. line1.split("。")[0] if line1 is not None else '')
  36. for (line0, line1) in zip_longest(lines, lines[1:])
  37. if line0.startswith('# ') or line0.startswith('## ')
  38. ]
  39. # 最初の見出しはtitle, 残りはheadersとして返す
  40. return {
  41. 'path': nb_path.as_posix(),
  42. 'title': {
  43. 'text': _to_title_text(nb_path, headers[0][0]),
  44. 'summary': headers[0][1],
  45. },
  46. 'headers': [
  47. {
  48. 'text': text,
  49. 'summary': (
  50. summary if not re.match(r'(?:#|!\[)', summary) else ''),
  51. }
  52. for (text, summary) in headers[1:]
  53. ],
  54. }
  55. def _to_title_text(nb_path, text):
  56. no = nb_path.name.split('-')[0]
  57. title = text if not text.startswith('About:') else text[6:]
  58. return f'{title}'
  59. def _get_notebook_headers(nb_dir):
  60. return dict([
  61. (nb.name, parse_headers(nb))
  62. for nb in nb_dir.glob("*.ipynb")
  63. ])
  64. def notebooks_toc(nb_dir):
  65. nb_headers = sorted(
  66. _get_notebook_headers(Path(nb_dir)).items(),
  67. key=lambda x: x[0])
  68. return "\n".join(chain.from_iterable([
  69. [
  70. f'* [{headers["title"]["text"]}]({nb_dir}/{str(nb)})'
  71. ] + list(chain.from_iterable([
  72. [
  73. f' - {header["text"]}',
  74. (f' - {header["summary"]}'
  75. if len(header["summary"]) > 0 else ''),
  76. ]
  77. for header in headers['headers']
  78. ]))
  79. for nb, headers in nb_headers
  80. ]))
  81. import json
  82. JSON = ""
  83. def load_json(PATH):
  84. with open(PATH) as f:
  85. JSON = json.load(f)
  86. return JSON
  87. def generate_svg_diag(
  88. output='WORKFLOW/images/notebooks.svg',
  89. diag='WORKFLOW/images/notebooks.diag',
  90. dir_util='WORKFLOW/FLOW/util',
  91. dir_01='WORKFLOW/FLOW/01_preparation_phase',
  92. dir_02='WORKFLOW/FLOW/02_experimental_phase',
  93. dir_03='WORKFLOW/FLOW/03_after_research_phase',
  94. font='.fonts/ipag.ttf',
  95. ):
  96. with TemporaryDirectory() as workdir:
  97. skeleton = Path(workdir) / 'skeleton.svg'
  98. _generate_skeleton(skeleton, Path(diag), Path(font))
  99. _embed_detail_information(Path(output), skeleton, Path(dir_util), Path(dir_01), Path(dir_02), Path(dir_03))
  100. return output
  101. def _generate_skeleton(output, diag, font):
  102. run(['blockdiag', '-f', font, '-Tsvg', '-o', output, diag], check=True)
  103. def setup_python_path():
  104. ver = sys.version_info
  105. lib_path = f'~/.local/lib/python{ver.major}.{ver.minor}/site-packages'
  106. lib_path = str(Path(lib_path).expanduser())
  107. if lib_path not in sys.path:
  108. sys.path.append(lib_path)
  109. def _embed_detail_information(output, skeleton, dir_util, dir_01, dir_02, dir_03):
  110. # Notebookのヘッダ取得
  111. nb_headers = _get_notebook_headers(dir_util)
  112. nb_headers.update(_get_notebook_headers(dir_01))
  113. nb_headers.update(_get_notebook_headers(dir_02))
  114. nb_headers.update(_get_notebook_headers(dir_03))
  115. # 雛形の読み込み
  116. tree = etree.parse(str(skeleton))
  117. # 雛形をNotebook情報で置き換え
  118. for elem in list(tree.findall(SVG_TEXT)):
  119. if _is_target_rect(elem, nb_headers.keys()):
  120. nb_name = _find_matching_notebook(nb_headers.keys(), elem.text)
  121. _embed_info_in_one_rect(elem, nb_headers, Path('WORKFLOW/FLOW'), nb_name)
  122. # SVGの保存
  123. output.parent.mkdir(parents=True, exist_ok=True)
  124. with output.open(mode='wb') as f:
  125. f.write(etree.tostring(tree, method='xml', pretty_print=True))
  126. def _is_target_rect(elem, notebooks):
  127. return (
  128. elem.getprevious() is not None and
  129. elem.getprevious().tag == SVG_RECT and
  130. len(elem.text) > 0 and
  131. _find_matching_notebook(notebooks, elem.text) is not None)
  132. def _find_matching_notebook(notebooks, title):
  133. for nb in notebooks:
  134. if nb.startswith(title):
  135. return nb
  136. def _embed_info_in_one_rect(elem, nb_headers, nb_dir, nb_name):
  137. headers = nb_headers[nb_name]
  138. nb_file = nb_headers[nb_name]['path']
  139. nb_file = nb_file.replace('WORKFLOW/FLOW/', '')
  140. rect_elem = elem.getprevious()
  141. rect = (
  142. (int(rect_elem.attrib['x']), int(rect_elem.attrib['y'])),
  143. (int(rect_elem.attrib['width']), int(rect_elem.attrib['height'])))
  144. childpos = elem.getparent().index(elem)
  145. parent_elem = elem.getparent()
  146. remove_texts(elem)
  147. title = headers['title']['text']
  148. if elem.text.find(':') >= 0:
  149. title = title + ' - ' + elem.text.split(':')[1]
  150. line_num = insert_title(parent_elem, childpos, rect, title, str(nb_file))
  151. insert_headers(parent_elem, childpos, rect, headers['headers'], line_num)
  152. def remove_texts(elem):
  153. old_text = elem
  154. while old_text is not None:
  155. if (old_text.getnext() is not None and
  156. old_text.getnext().tag == SVG_TEXT):
  157. next_text = old_text.getnext()
  158. else:
  159. next_text = None
  160. old_text.getparent().remove(old_text)
  161. old_text = next_text
  162. def insert_title(parent_elem, childpos, rect, title, link):
  163. height_title = (
  164. text_margin + (title_font_size + text_margin) * 2 + head_margin * 2)
  165. lines = split_title(title)
  166. if len(lines) == 2:
  167. text_elem = create_text(rect, title_font_size, font_weight='bold')
  168. text_elem.text = lines[0]
  169. text_elem.attrib['y'] = str(
  170. rect[0][1] + head_margin + text_margin + title_font_size)
  171. text_elems = [text_elem]
  172. text_elem = create_text(rect, title_font_size, font_weight='bold')
  173. text_elem.text = lines[1]
  174. text_elem.attrib['y'] = str(
  175. rect[0][1] + height_title - text_margin - head_margin)
  176. text_elems.append(text_elem)
  177. else:
  178. text_elem = create_text(rect, title_font_size, font_weight='bold')
  179. text_elem.text = title
  180. text_elem.attrib['y'] = str(
  181. rect[0][1] + height_title // 2 + title_font_size // 2)
  182. text_elems = [text_elem]
  183. parent_elem.insert(childpos, create_anchor(text_elems, link))
  184. return len(lines)
  185. def insert_headers(parent_elem, childpos, rect, headers, title_lines):
  186. offset_y = (
  187. text_margin +
  188. (title_font_size + text_margin) * (title_lines + 1) +
  189. head_margin * 2 + text_margin)
  190. for i, header in enumerate(headers):
  191. text_elem = create_text(rect, item_font_size)
  192. text_elem.text = header['text']
  193. text_elem.attrib['y'] = str(
  194. rect[0][1] + offset_y + (item_font_size + text_margin) * i +
  195. item_font_size)
  196. parent_elem.insert(childpos, text_elem)
  197. def split_title(title):
  198. if u':' in title:
  199. return [title[:title.index(u':') + 1], title[title.index(u':') + 1:]]
  200. elif len(title) >= 15:
  201. words = re.split(r'([-((])', title, 1)
  202. ret = words[0:1] + [''.join(x) for x in zip(words[1::2], words[2::2])]
  203. return [re.sub(r'^--', '- ', x) for x in ret]
  204. else:
  205. return [title]
  206. def create_text(rect, font_size, font_weight='normal', font_style='normal'):
  207. text_elem = etree.Element(SVG_TEXT)
  208. text_elem.attrib['fill'] = 'rgb(0,0,0)'
  209. text_elem.attrib['font-family'] = 'sans-serif'
  210. text_elem.attrib['font-size'] = str(font_size)
  211. text_elem.attrib['font-style'] = font_style
  212. text_elem.attrib['font-weight'] = font_weight
  213. text_elem.attrib['font-anchor'] = 'middle'
  214. text_elem.attrib['x'] = str(rect[0][0] + text_margin)
  215. text_elem.attrib['width'] = str(rect[1][0] - text_margin * 2)
  216. return text_elem
  217. def create_anchor(elems, link):
  218. a_elem = etree.Element('a')
  219. a_elem.attrib['{http://www.w3.org/1999/xlink}href'] = link
  220. for elem in elems:
  221. a_elem.append(elem)
  222. return a_elem
  223. # refs: https://note.nkmk.me/python-if-name-main/
  224. # maDMP.ipynbからコマンドライン引数でdiagファイルのパスが渡されてくる
  225. if __name__ == '__main__':
  226. generate_svg_diag(diag=sys.argv[1])