nb_utils.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. # -*- coding: utf-8 -*-
  2. import os
  3. import re
  4. import sys
  5. import shutil
  6. from subprocess import run, CalledProcessError
  7. from tempfile import TemporaryDirectory
  8. from pathlib import Path
  9. from lxml import etree
  10. from nbformat import read, NO_CONVERT
  11. from itertools import chain, zip_longest
  12. from jinja2 import Template
  13. from datetime import datetime
  14. title_font_size = 11
  15. item_font_size = 9
  16. head_margin = 3
  17. text_margin = 2
  18. SVG_TEXT = '{http://www.w3.org/2000/svg}text'
  19. SVG_RECT = '{http://www.w3.org/2000/svg}rect'
  20. def parse_headers(nb_path):
  21. nb = read(str(nb_path), as_version=NO_CONVERT)
  22. # Notebookのセルからmarkdownの部分を取り出し、行ごとのリストにする
  23. lines = [
  24. line.strip()
  25. for line in chain.from_iterable(
  26. cell['source'].split('\n')
  27. for cell in nb.cells
  28. if cell['cell_type'] == 'markdown'
  29. )
  30. if len(line.strip()) > 0 and not line.startswith('---')
  31. ]
  32. # h1, h2 の行とその次行の最初の1文を取り出す
  33. headers = [
  34. (' '.join(line0.split()[1:]),
  35. line1.split("。")[0] if line1 is not None else '')
  36. for (line0, line1) in zip_longest(lines, lines[1:])
  37. if line0.startswith('# ') or line0.startswith('## ')
  38. ]
  39. # 最初の見出しはtitle, 残りはheadersとして返す
  40. return {
  41. 'title': {
  42. 'text': _to_title_text(nb_path, headers[0][0]),
  43. 'summary': headers[0][1],
  44. },
  45. 'headers': [
  46. {
  47. 'text': text,
  48. 'summary': (
  49. summary if not re.match(r'(?:#|!\[)', summary) else ''),
  50. }
  51. for (text, summary) in headers[1:]
  52. ],
  53. }
  54. def _to_title_text(nb_path, text):
  55. no = nb_path.name.split('-')[0]
  56. title = text if not text.startswith('About:') else text[6:]
  57. return f'{title}'
  58. def _get_notebook_headers(nb_dir):
  59. return dict([
  60. (nb.name, parse_headers(nb))
  61. for nb in nb_dir.glob("*.ipynb")
  62. ])
  63. def notebooks_toc(nb_dir):
  64. nb_headers = sorted(
  65. _get_notebook_headers(Path(nb_dir)).items(),
  66. key=lambda x: x[0])
  67. return "\n".join(chain.from_iterable([
  68. [
  69. f'* [{headers["title"]["text"]}]({nb_dir}/{str(nb)})'
  70. ] + list(chain.from_iterable([
  71. [
  72. f' - {header["text"]}',
  73. (f' - {header["summary"]}'
  74. if len(header["summary"]) > 0 else ''),
  75. ]
  76. for header in headers['headers']
  77. ]))
  78. for nb, headers in nb_headers
  79. ]))
  80. import json
  81. JSON = ""
  82. def load_json(PATH):
  83. with open(PATH) as f:
  84. JSON = json.load(f)
  85. return JSON
  86. def setup_diag():
  87. setup_blockdiag()
  88. setup_lxml()
  89. def setup_lxml():
  90. if not check_lxml():
  91. install_lxml()
  92. return check_lxml()
  93. def check_lxml():
  94. try:
  95. import lxml
  96. return True
  97. except ModuleNotFoundError:
  98. return False
  99. def install_lxml():
  100. run('pip install -q --user lxml', shell=True)
  101. setup_python_path()
  102. def setup_blockdiag():
  103. if not check_blockdiag():
  104. install_blockdiag()
  105. return check_blockdiag()
  106. def check_blockdiag():
  107. try:
  108. run('blockdiag -h', shell=True, check=True)
  109. return True
  110. except CalledProcessError:
  111. return False
  112. def install_blockdiag():
  113. run('pip install -q --user blockdiag', shell=True)
  114. paths = os.environ['PATH'].split(':')
  115. local_bin = str(Path('~/.local/bin').expanduser())
  116. if local_bin not in paths:
  117. paths.append(local_bin)
  118. os.environ['PATH'] = ':'.join(paths)
  119. if not check_blockdiag():
  120. install_blockdiag()
  121. def generate_svg_diag(
  122. output='/home/jovyan/WORKFLOW/images/notebooks.svg',
  123. diag='images/notebooks.diag',
  124. nb_dir='/home/jovyan/FLOW',
  125. font='/home/jovyan/.fonts/ipag.ttf',
  126. ):
  127. with TemporaryDirectory() as workdir:
  128. skeleton = Path(workdir) / 'skeleton.svg'
  129. _generate_skeleton(skeleton, Path(diag), Path(font))
  130. _embed_detail_information(Path(output), skeleton, Path(nb_dir))
  131. return output
  132. def _generate_skeleton(output, diag, font):
  133. run(['blockdiag', '-f', font, '-Tsvg', '-o', output, diag], check=True)
  134. def setup_python_path():
  135. ver = sys.version_info
  136. lib_path = f'~/.local/lib/python{ver.major}.{ver.minor}/site-packages'
  137. lib_path = str(Path(lib_path).expanduser())
  138. if lib_path not in sys.path:
  139. sys.path.append(lib_path)
  140. def _embed_detail_information(output, skeleton, nb_dir):
  141. # Notebookのヘッダ取得
  142. nb_headers = _get_notebook_headers(nb_dir)
  143. # 雛形の読み込み
  144. tree = etree.parse(str(skeleton))
  145. # 雛形をNotebook情報で置き換え
  146. for elem in list(tree.findall(SVG_TEXT)):
  147. if _is_target_rect(elem, nb_headers.keys()):
  148. nb_name = _find_matching_notebook(nb_headers.keys(), elem.text)
  149. _embed_info_in_one_rect(elem, nb_headers, nb_dir, nb_name)
  150. # SVGの保存
  151. output.parent.mkdir(parents=True, exist_ok=True)
  152. with output.open(mode='wb') as f:
  153. f.write(etree.tostring(tree, method='xml', pretty_print=True))
  154. def _is_target_rect(elem, notebooks):
  155. return (
  156. elem.getprevious() is not None and
  157. elem.getprevious().tag == SVG_RECT and
  158. len(elem.text) > 0 and
  159. _find_matching_notebook(notebooks, elem.text) is not None)
  160. def _find_matching_notebook(notebooks, prefix):
  161. nb_prefix = prefix if prefix.find(':') < 0 else prefix.split(':')[0]
  162. for nb in notebooks:
  163. if nb.startswith(nb_prefix):
  164. return nb
  165. def _embed_info_in_one_rect(elem, nb_headers, nb_dir, nb_name):
  166. headers = nb_headers[nb_name]
  167. nb_file = nb_dir / nb_name
  168. rect_elem = elem.getprevious()
  169. rect = (
  170. (int(rect_elem.attrib['x']), int(rect_elem.attrib['y'])),
  171. (int(rect_elem.attrib['width']), int(rect_elem.attrib['height'])))
  172. childpos = elem.getparent().index(elem)
  173. parent_elem = elem.getparent()
  174. remove_texts(elem)
  175. title = headers['title']['text']
  176. if elem.text.find(':') >= 0:
  177. title = title + ' - ' + elem.text.split(':')[1]
  178. line_num = insert_title(parent_elem, childpos, rect, title, str(nb_file))
  179. insert_headers(parent_elem, childpos, rect, headers['headers'], line_num)
  180. def remove_texts(elem):
  181. old_text = elem
  182. while old_text is not None:
  183. if (old_text.getnext() is not None and
  184. old_text.getnext().tag == SVG_TEXT):
  185. next_text = old_text.getnext()
  186. else:
  187. next_text = None
  188. old_text.getparent().remove(old_text)
  189. old_text = next_text
  190. def insert_title(parent_elem, childpos, rect, title, link):
  191. height_title = (
  192. text_margin + (title_font_size + text_margin) * 2 + head_margin * 2)
  193. lines = split_title(title)
  194. if len(lines) == 2:
  195. text_elem = create_text(rect, title_font_size, font_weight='bold')
  196. text_elem.text = lines[0]
  197. text_elem.attrib['y'] = str(
  198. rect[0][1] + head_margin + text_margin + title_font_size)
  199. text_elems = [text_elem]
  200. text_elem = create_text(rect, title_font_size, font_weight='bold')
  201. text_elem.text = lines[1]
  202. text_elem.attrib['y'] = str(
  203. rect[0][1] + height_title - text_margin - head_margin)
  204. text_elems.append(text_elem)
  205. else:
  206. text_elem = create_text(rect, title_font_size, font_weight='bold')
  207. text_elem.text = title
  208. text_elem.attrib['y'] = str(
  209. rect[0][1] + height_title // 2 + title_font_size // 2)
  210. text_elems = [text_elem]
  211. parent_elem.insert(childpos, create_anchor(text_elems, link))
  212. return len(lines)
  213. def insert_headers(parent_elem, childpos, rect, headers, title_lines):
  214. offset_y = (
  215. text_margin +
  216. (title_font_size + text_margin) * (title_lines + 1) +
  217. head_margin * 2 + text_margin)
  218. for i, header in enumerate(headers):
  219. text_elem = create_text(rect, item_font_size)
  220. text_elem.text = header['text']
  221. text_elem.attrib['y'] = str(
  222. rect[0][1] + offset_y + (item_font_size + text_margin) * i +
  223. item_font_size)
  224. parent_elem.insert(childpos, text_elem)
  225. def split_title(title):
  226. if u':' in title:
  227. return [title[:title.index(u':') + 1], title[title.index(u':') + 1:]]
  228. elif len(title) >= 15:
  229. words = re.split(r'([-((])', title, 1)
  230. ret = words[0:1] + [''.join(x) for x in zip(words[1::2], words[2::2])]
  231. return [re.sub(r'^--', '- ', x) for x in ret]
  232. else:
  233. return [title]
  234. def create_text(rect, font_size, font_weight='normal', font_style='normal'):
  235. text_elem = etree.Element(SVG_TEXT)
  236. text_elem.attrib['fill'] = 'rgb(0,0,0)'
  237. text_elem.attrib['font-family'] = 'sans-serif'
  238. text_elem.attrib['font-size'] = str(font_size)
  239. text_elem.attrib['font-style'] = font_style
  240. text_elem.attrib['font-weight'] = font_weight
  241. text_elem.attrib['font-anchor'] = 'middle'
  242. text_elem.attrib['x'] = str(rect[0][0] + text_margin)
  243. text_elem.attrib['width'] = str(rect[1][0] - text_margin * 2)
  244. return text_elem
  245. def create_anchor(elems, link):
  246. a_elem = etree.Element('a')
  247. a_elem.attrib['{http://www.w3.org/1999/xlink}href'] = link
  248. for elem in elems:
  249. a_elem.append(elem)
  250. return a_elem