nb_utils.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256
  1. # -*- coding: utf-8 -*-
  2. import os
  3. import re
  4. import sys
  5. import shutil
  6. from subprocess import run, CalledProcessError
  7. from tempfile import TemporaryDirectory
  8. from pathlib import Path
  9. from lxml import etree
  10. from nbformat import read, NO_CONVERT
  11. from itertools import chain, zip_longest
  12. from jinja2 import Template
  13. from datetime import datetime
  14. title_font_size = 12
  15. item_font_size = 9
  16. head_margin = 3
  17. text_margin = 2
  18. title_font_color = 'rgb(255,140,0)'
  19. text_font_color = 'rgb(0,0,0)'
  20. SVG_TEXT = '{http://www.w3.org/2000/svg}text'
  21. SVG_RECT = '{http://www.w3.org/2000/svg}rect'
  22. def parse_headers(nb_path):
  23. nb = read(str(nb_path), as_version=NO_CONVERT)
  24. # Notebookのセルからmarkdownの部分を取り出し、行ごとのリストにする
  25. lines = [
  26. line.strip()
  27. for line in chain.from_iterable(
  28. cell['source'].split('\n')
  29. for cell in nb.cells
  30. if cell['cell_type'] == 'markdown'
  31. )
  32. if len(line.strip()) > 0 and not line.startswith('---')
  33. ]
  34. # h1, h2 の行とその次行の最初の1文を取り出す
  35. headers = [
  36. (' '.join(line0.split()[1:]),
  37. line1.split("。")[0] if line1 is not None else '')
  38. for (line0, line1) in zip_longest(lines, lines[1:])
  39. if line0.startswith('# ') or line0.startswith('## ')
  40. ]
  41. # 最初の見出しはtitle, 残りはheadersとして返す
  42. return {
  43. 'path': nb_path.as_posix(),
  44. 'title': {
  45. 'text': _to_title_text(nb_path, headers[0][0]),
  46. 'summary': headers[0][1],
  47. },
  48. 'headers': [
  49. {
  50. 'text': text,
  51. 'summary': (
  52. summary if not re.match(r'(?:#|!\[)', summary) else ''),
  53. }
  54. for (text, summary) in headers[1:]
  55. ],
  56. }
  57. def _to_title_text(nb_path, text):
  58. no = nb_path.name.split('-')[0]
  59. title = text if not text.startswith('About:') else text[6:]
  60. return f'{title}'
  61. def _get_notebook_headers(nb_dir):
  62. return dict([
  63. (nb.name, parse_headers(nb))
  64. for nb in nb_dir.glob("*.ipynb")
  65. ])
  66. def notebooks_toc(nb_dir):
  67. nb_headers = sorted(
  68. _get_notebook_headers(Path(nb_dir)).items(),
  69. key=lambda x: x[0])
  70. return "\n".join(chain.from_iterable([
  71. [
  72. f'* [{headers["title"]["text"]}]({nb_dir}/{str(nb)})'
  73. ] + list(chain.from_iterable([
  74. [
  75. f' - {header["text"]}',
  76. (f' - {header["summary"]}'
  77. if len(header["summary"]) > 0 else ''),
  78. ]
  79. for header in headers['headers']
  80. ]))
  81. for nb, headers in nb_headers
  82. ]))
  83. import json
  84. JSON = ""
  85. def load_json(PATH):
  86. with open(PATH) as f:
  87. JSON = json.load(f)
  88. return JSON
  89. def generate_svg_diag(
  90. output='EX-WORKFLOW/images/notebooks.svg',
  91. diag='EX-WORKFLOW/images/notebooks.diag',
  92. font='.fonts/ipag.ttf',
  93. dir_util='EX-WORKFLOW/util',
  94. dir_experiment='EX-WORKFLOW',
  95. ):
  96. with TemporaryDirectory() as workdir:
  97. skeleton = Path(workdir) / 'skeleton.svg'
  98. _generate_skeleton(skeleton, Path(diag), Path(font))
  99. _embed_detail_information(Path(output), skeleton, Path(dir_util), Path(dir_experiment))
  100. return output
  101. def _generate_skeleton(output, diag, font):
  102. run(['blockdiag', '-f', font, '-Tsvg', '-o', output, diag], check=True)
  103. def setup_python_path():
  104. ver = sys.version_info
  105. lib_path = f'~/.local/lib/python{ver.major}.{ver.minor}/site-packages'
  106. lib_path = str(Path(lib_path).expanduser())
  107. if lib_path not in sys.path:
  108. sys.path.append(lib_path)
  109. def _embed_detail_information(output, skeleton, dir_util, dir_experiment):
  110. # Notebookのヘッダ取得
  111. nb_headers = _get_notebook_headers(dir_util)
  112. nb_headers.update(_get_notebook_headers(dir_experiment))
  113. # 雛形の読み込み
  114. tree = etree.parse(str(skeleton))
  115. # 雛形をNotebook情報で置き換え
  116. for elem in list(tree.findall(SVG_TEXT)):
  117. if _is_target_rect(elem, nb_headers.keys()):
  118. nb_name = _find_matching_notebook(nb_headers.keys(), elem.text)
  119. _embed_info_in_one_rect(elem, nb_headers, Path('EX-WORKFLOW'), nb_name)
  120. # SVGの保存
  121. output.parent.mkdir(parents=True, exist_ok=True)
  122. with output.open(mode='wb') as f:
  123. f.write(etree.tostring(tree, method='xml', pretty_print=True))
  124. def _is_target_rect(elem, notebooks):
  125. return (
  126. elem.getprevious() is not None and
  127. elem.getprevious().tag == SVG_RECT and
  128. len(elem.text) > 0 and
  129. _find_matching_notebook(notebooks, elem.text) is not None)
  130. def _find_matching_notebook(notebooks, title):
  131. for nb in notebooks:
  132. if nb.startswith(title):
  133. return nb
  134. def _embed_info_in_one_rect(elem, nb_headers, nb_dir, nb_name):
  135. headers = nb_headers[nb_name]
  136. nb_file = nb_headers[nb_name]['path']
  137. rect_elem = elem.getprevious()
  138. rect = (
  139. (int(rect_elem.attrib['x']), int(rect_elem.attrib['y'])),
  140. (int(rect_elem.attrib['width']), int(rect_elem.attrib['height'])))
  141. childpos = elem.getparent().index(elem)
  142. parent_elem = elem.getparent()
  143. remove_texts(elem)
  144. title = headers['title']['text']
  145. if elem.text.find(':') >= 0:
  146. title = title + ' - ' + elem.text.split(':')[1]
  147. line_num = insert_title(parent_elem, childpos, rect, title, str(nb_file))
  148. insert_headers(parent_elem, childpos, rect, headers['headers'], line_num)
  149. def remove_texts(elem):
  150. old_text = elem
  151. while old_text is not None:
  152. if (old_text.getnext() is not None and
  153. old_text.getnext().tag == SVG_TEXT):
  154. next_text = old_text.getnext()
  155. else:
  156. next_text = None
  157. old_text.getparent().remove(old_text)
  158. old_text = next_text
  159. def insert_title(parent_elem, childpos, rect, title, link):
  160. height_title = (
  161. text_margin + (title_font_size + text_margin) * 2 + head_margin * 2)
  162. lines = split_title(title)
  163. if len(lines) == 2:
  164. text_elem = create_text(rect, title_font_size, font_weight='bold', font_color=title_font_color)
  165. text_elem.text = lines[0]
  166. text_elem.attrib['y'] = str(
  167. rect[0][1] + head_margin + text_margin + title_font_size)
  168. text_elems = [text_elem]
  169. text_elem = create_text(rect, title_font_size, font_weight='bold', font_color=title_font_color)
  170. text_elem.text = lines[1]
  171. text_elem.attrib['y'] = str(
  172. rect[0][1] + height_title - text_margin - head_margin)
  173. text_elems.append(text_elem)
  174. else:
  175. text_elem = create_text(rect, title_font_size, font_weight='bold', font_color=title_font_color)
  176. text_elem.text = title
  177. text_elem.attrib['y'] = str(
  178. rect[0][1] + height_title // 2 + title_font_size // 2)
  179. text_elems = [text_elem]
  180. parent_elem.insert(childpos, create_anchor(text_elems, link))
  181. return len(lines)
  182. def insert_headers(parent_elem, childpos, rect, headers, title_lines):
  183. offset_y = (
  184. text_margin +
  185. (title_font_size + text_margin) * (title_lines + 1) +
  186. head_margin * 2 + text_margin)
  187. for i, header in enumerate(headers):
  188. text_elem = create_text(rect, item_font_size, font_color=text_font_color)
  189. text_elem.text = header['text']
  190. text_elem.attrib['y'] = str(
  191. rect[0][1] + offset_y + (item_font_size + text_margin) * i +
  192. item_font_size)
  193. parent_elem.insert(childpos, text_elem)
  194. def split_title(title):
  195. if u':' in title:
  196. return [title[:title.index(u':') + 1], title[title.index(u':') + 1:]]
  197. elif len(title) >= 15:
  198. words = re.split(r'([-((])', title, 1)
  199. ret = words[0:1] + [''.join(x) for x in zip(words[1::2], words[2::2])]
  200. return [re.sub(r'^--', '- ', x) for x in ret]
  201. else:
  202. return [title]
  203. def create_text(rect, font_size, font_color, font_weight='normal', font_style='normal'):
  204. text_elem = etree.Element(SVG_TEXT)
  205. text_elem.attrib['fill'] = font_color
  206. text_elem.attrib['font-family'] = 'sans-serif'
  207. text_elem.attrib['font-size'] = str(font_size)
  208. text_elem.attrib['font-style'] = font_style
  209. text_elem.attrib['font-weight'] = font_weight
  210. text_elem.attrib['font-anchor'] = 'middle'
  211. text_elem.attrib['x'] = str(rect[0][0] + text_margin)
  212. text_elem.attrib['width'] = str(rect[1][0] - text_margin * 2)
  213. return text_elem
  214. def create_anchor(elems, link):
  215. a_elem = etree.Element('a')
  216. a_elem.attrib['{http://www.w3.org/1999/xlink}href'] = link
  217. for elem in elems:
  218. a_elem.append(elem)
  219. return a_elem
  220. # refs: https://note.nkmk.me/python-if-name-main/
  221. # maDMP.ipynbからコマンドライン引数でdiagファイルのパスが渡されてくる
  222. if __name__ == '__main__':
  223. generate_svg_diag(diag=sys.argv[1])