Scheduled service maintenance on November 22


On Friday, November 22, 2024, between 06:00 CET and 18:00 CET, GIN services will undergo planned maintenance. Extended service interruptions should be expected. We will try to keep downtimes to a minimum, but recommend that users avoid critical tasks, large data uploads, or DOI requests during this time.

We apologize for any inconvenience.

fsbrowser.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438
  1. from functools import lru_cache
  2. import logging
  3. from pathlib import Path
  4. from typing import List
  5. from PySide6.QtCore import (
  6. QFileSystemWatcher,
  7. QObject,
  8. Qt,
  9. QTimer,
  10. Signal,
  11. Slot,
  12. )
  13. from PySide6.QtWidgets import (
  14. QMenu,
  15. QTreeWidget,
  16. )
  17. from datalad.interface.base import Interface
  18. from datalad.utils import get_dataset_root
  19. from datalad.dataset.gitrepo import GitRepo
  20. from .cmd_actions import add_cmd_actions_to_menu
  21. from .fsbrowser_item import FSBrowserItem
  22. from .lsdir import GooeyLsDir
  23. from .status_light import GooeyStatusLight
  24. lgr = logging.getLogger('datalad.gooey.fsbrowser')
  25. class GooeyFilesystemBrowser(QObject):
  26. # TODO Establish ENUM for columns
  27. # FSBrowserItem
  28. item_requires_annotation = Signal(FSBrowserItem)
  29. # DONE
  30. def __init__(self, app, path: Path, treewidget: QTreeWidget):
  31. super().__init__()
  32. tw = treewidget
  33. # TODO must setColumnNumber()
  34. self._app = app
  35. self._fswatcher = QFileSystemWatcher(parent=app)
  36. self.item_requires_annotation.connect(
  37. self._queue_item_for_annotation)
  38. tw.setHeaderLabels(['Name', 'Type', 'State'])
  39. # established defined sorting order of the tree
  40. tw.sortItems(1, Qt.AscendingOrder)
  41. # establish the root item, based on a fake lsdir result
  42. # the info needed is so simple, it is not worth a command
  43. # execution
  44. root = FSBrowserItem.from_lsdir_result(
  45. dict(
  46. path=path,
  47. type='dataset' if GitRepo.is_valid(path) else 'directory',
  48. ),
  49. parent=tw,
  50. )
  51. # set the tooltip to the full path, otherwise only names are shown
  52. root.setToolTip(0, str(path))
  53. tw.addTopLevelItem(root)
  54. self._root_item = root
  55. tw.customContextMenuRequested.connect(
  56. self._custom_context_menu)
  57. self._tree = tw
  58. # whenever a treeview node is expanded, add the path to the fswatcher
  59. tw.itemExpanded.connect(self._watch_dir)
  60. # and also populate it with items for contained paths
  61. tw.itemExpanded.connect(self._populate_item)
  62. tw.itemCollapsed.connect(self._unwatch_dir)
  63. self._fswatcher.directoryChanged.connect(self._inspect_changed_dir)
  64. # items of directories to be annotated, populated by
  65. # _queue_item_for_annotation()
  66. self._annotation_queue = set()
  67. # msec
  68. self._annotation_timer_interval = 3000
  69. self._annotation_timer = QTimer(self)
  70. self._annotation_timer.timeout.connect(
  71. self._process_item_annotation_queue)
  72. self._annotation_timer.start(self._annotation_timer_interval)
  73. self._app._cmdexec.results_received.connect(
  74. self._cmdexec_results_handler)
  75. def _populate_item(self, item):
  76. if item.childCount():
  77. return
  78. # only parse, if there are no children yet
  79. # kick off lsdir command in the background
  80. self._populate_and_annotate(item, no_existing_children=True)
  81. def _populate_and_annotate(self, item, no_existing_children):
  82. self._app.execute_dataladcmd.emit(
  83. 'gooey_lsdir',
  84. dict(
  85. path=item.pathobj,
  86. result_renderer='disabled',
  87. on_failure='ignore',
  88. return_type='generator',
  89. ),
  90. dict(
  91. preferred_result_interval=0.2,
  92. result_override=dict(
  93. gooey_parent_item=item,
  94. gooey_no_existing_item=no_existing_children,
  95. ),
  96. ),
  97. )
  98. # for now we register the parent for an annotation update
  99. # but we could also report the specific path and let the
  100. # annotation code figure out the optimal way.
  101. # at present however, we get here for items of a whole dir
  102. # being reported at once.
  103. self._queue_item_for_annotation(item)
  104. @Slot(Interface, list)
  105. def _cmdexec_results_handler(self, cls, res):
  106. res_handler = None
  107. if cls == GooeyLsDir:
  108. res_handler = self._lsdir_result_receiver
  109. elif cls == GooeyStatusLight:
  110. res_handler = self._status_result_receiver
  111. else:
  112. lgr.debug('FSBrowser has no handler for result from %s', cls)
  113. return
  114. for r in res:
  115. res_handler(r)
  116. def _lsdir_result_receiver(self, res):
  117. if res.get('action') != 'gooey-lsdir':
  118. # no what we are looking for
  119. return
  120. target_item = None
  121. target_item_parent = res.get('gooey_parent_item')
  122. no_existing_item = res.get('gooey_no_existing_item', False)
  123. ipath = Path(res['path'])
  124. if target_item_parent is None:
  125. # we did not get it delivered in the result, search for it
  126. try:
  127. target_item_parent = self._get_item_from_path(ipath.parent)
  128. except ValueError:
  129. # ok, now we have no clue what this lsdir result is about
  130. # its parent is no in the tree
  131. return
  132. if (no_existing_item and target_item_parent
  133. and target_item_parent.pathobj == ipath):
  134. # sender claims that the item does not exist and provided a parent
  135. # item. reject a result if it matches the parent to avoid
  136. # duplicating the item as a child, and to also prevent an unintended
  137. # item update
  138. return
  139. if not no_existing_item:
  140. # we have no indication that the item this is about does not
  141. # already exist, search for it
  142. try:
  143. # give the parent as a starting item, to speed things up
  144. target_item = self._get_item_from_path(
  145. ipath, target_item_parent)
  146. except ValueError:
  147. # it is quite possible that the item does not exist yet.
  148. # but such cases are expensive, and the triggering code could
  149. # consider sending the 'gooey_no_existing_item' flag
  150. pass
  151. if target_item is None:
  152. # we don't have such an item yet -> make one
  153. target_item = FSBrowserItem.from_lsdir_result(
  154. res, target_item_parent)
  155. else:
  156. # we do have this already, good occasion to update it
  157. target_item.update_from_lsdir_result(res)
  158. @lru_cache(maxsize=1000)
  159. def _get_item_from_path(self, path: Path, root: FSBrowserItem = None):
  160. # this is a key function in terms of result UI snappiness
  161. # it must be as fast as anyhow possible
  162. item = self._root_item if root is None else root
  163. ipath = item.pathobj
  164. if path == ipath:
  165. return item
  166. # otherwise look for the item with the right name at the
  167. # respective level
  168. try:
  169. return self._get_item_from_trace(
  170. item, path.relative_to(ipath).parts)
  171. except ValueError as e:
  172. raise ValueError(f'Cannot find item for {path}') from e
  173. def _get_item_from_trace(self, root: FSBrowserItem, trace: List):
  174. item = root
  175. for p in trace:
  176. item = item[p]
  177. if item is None:
  178. raise ValueError(f'Cannot find item for {trace}')
  179. continue
  180. return item
  181. def _queue_item_for_annotation(self, item):
  182. """This is not thread-safe
  183. `item` should be of type 'directory' or 'dataset' for meaningful
  184. behavior.
  185. """
  186. # wait for at least half a sec longer after a new request came in
  187. # to avoid DDOS'ing the facility?
  188. if self._annotation_timer.remainingTime() < 500:
  189. self._annotation_timer.start(500)
  190. self._annotation_queue.add(item)
  191. def _process_item_annotation_queue(self):
  192. if not self._annotation_queue:
  193. return
  194. if self._app._cmdexec.n_running:
  195. # stuff is still running
  196. # make sure the population of the tree items is done too!
  197. self._annotation_timer.start(1000)
  198. return
  199. # there is stuff to annotate, make sure we do not trigger more
  200. # annotations while this one is running
  201. self._annotation_timer.stop()
  202. print("ANNOTATE!", len(self._annotation_queue))
  203. # TODO stuff could be optimized here: collapsing multiple
  204. # directories belonging to the same dataset into a single `status`
  205. # call...
  206. while self._annotation_queue:
  207. # process the queue in reverse order, assuming a user would be
  208. # interested in the last triggered directory first
  209. # (i.e., assumption is: expanding tree nodes one after
  210. # another, attention would be on the last expanded one, not the
  211. # first)
  212. item = self._annotation_queue.pop()
  213. print('->', item)
  214. ipath = item.pathobj
  215. dsroot = get_dataset_root(ipath)
  216. if dsroot is None:
  217. # no containing dataset, by definition everything is untracked
  218. for child in item.children_():
  219. # get type, only annotate non-directory items
  220. if child.datalad_type != 'directory':
  221. child.update_from_status_result(
  222. dict(state='untracked'))
  223. else:
  224. # trigger datalad-gooey-status-light execution
  225. # giving the target directory as a `path` argument should
  226. # avoid undesired recursion into subDIRECTORIES
  227. paths_to_investigate = [
  228. c.pathobj.relative_to(dsroot)
  229. for c in item.children_()
  230. if c.datalad_type != 'directory'
  231. ]
  232. if paths_to_investigate:
  233. # do not run, if there are no relevant paths to inspect
  234. self._app.execute_dataladcmd.emit(
  235. 'gooey_status_light',
  236. dict(
  237. dataset=dsroot,
  238. path=[ipath],
  239. #annex='basic',
  240. result_renderer='disabled',
  241. on_failure='ignore',
  242. return_type='generator',
  243. ),
  244. dict(
  245. preferred_result_interval=3.0,
  246. result_override=dict(
  247. gooey_parent_item=item,
  248. ),
  249. ),
  250. )
  251. # restart annotation watcher
  252. self._annotation_timer.start(self._annotation_timer_interval)
  253. def _status_result_receiver(self, res):
  254. if res.get('action') != 'status':
  255. # no what we are looking for
  256. return
  257. path = res.get('path')
  258. if path is None:
  259. # nothing that we could handle
  260. return
  261. try:
  262. target_item = self._get_item_from_trace(
  263. res['gooey_parent_item'],
  264. # the parent will only ever be the literal parent directory
  265. [Path(path).name],
  266. )
  267. except ValueError:
  268. # the corersponding item is no longer around
  269. return
  270. target_item.update_from_status_result(res)
  271. # DONE
  272. def _watch_dir(self, item):
  273. path = item.pathobj
  274. lgr.log(
  275. 9,
  276. "GooeyFilesystemBrowser._watch_dir(%r)",
  277. path,
  278. )
  279. self._fswatcher.addPath(str(path))
  280. if item.datalad_type == 'dataset':
  281. # for a repository, also watch its .git to become aware of more
  282. # Git operation outcomes. specifically watch the HEADS to catch
  283. # updates on any branch
  284. self._fswatcher.addPath(str(path / '.git' / 'refs' / 'heads'))
  285. # DONE
  286. # https://github.com/datalad/datalad-gooey/issues/50
  287. def _unwatch_dir(self, item):
  288. path = str(item.pathobj)
  289. lgr.log(
  290. 9,
  291. "GooeyFilesystemBrowser._unwatch_dir(%r) -> %r",
  292. path,
  293. self._fswatcher.removePath(path),
  294. )
  295. # DONE
  296. def _inspect_changed_dir(self, path: str):
  297. pathobj = Path(path)
  298. # look for special case of the internals of a dataset having changed
  299. path_parts = pathobj.parts
  300. if len(path_parts) > 3 \
  301. and path_parts[-3:] == ('.git', 'refs', 'heads'):
  302. # yep, happened -- inspect corresponding dataset root
  303. self._inspect_changed_dir(pathobj.parent.parent.parent)
  304. return
  305. lgr.log(9, "GooeyFilesystemBrowser._inspect_changed_dir(%r)", pathobj)
  306. # we need to know the item in the tree corresponding
  307. # to the changed directory
  308. try:
  309. item = self._get_item_from_path(pathobj)
  310. except ValueError:
  311. # the changed dir has no (longer) a matching entry in the
  312. # tree model. make sure to take it off the watch list
  313. self._fswatcher.removePath(path)
  314. lgr.log(9, "_inspect_changed_dir() -> not in view (anymore), "
  315. "removed from watcher")
  316. return
  317. parent = item.parent()
  318. if not pathobj.exists():
  319. if parent is None:
  320. # TODO we could have lost the root dir -> special action
  321. raise NotImplementedError
  322. parent.removeChild(item)
  323. lgr.log(8, "-> _inspect_changed_dir() -> item removed")
  324. return
  325. # we will kick off a `lsdir` run to update the widget, but it could
  326. # no detect item that no longer have a file system counterpart
  327. # so we remove them here and now
  328. for child in item.children_():
  329. try:
  330. # same as lexists() but with pathlib
  331. child.pathobj.lstat()
  332. except (OSError, ValueError):
  333. item.removeChild(child)
  334. # now re-explore
  335. self._populate_and_annotate(item, no_existing_children=False)
  336. lgr.log(9, "_inspect_changed_dir() -> requested update")
  337. # DONE
  338. def _custom_context_menu(self, onpoint):
  339. """Present a context menu for the item click in the directory browser
  340. """
  341. # get the tree item for the coordinate that received the
  342. # context menu request
  343. item = self._tree.itemAt(onpoint)
  344. if not item:
  345. # prevent context menus when the request did not actually
  346. # land on an item
  347. return
  348. # what kind of path is this item representing
  349. path_type = item.datalad_type
  350. if path_type is None:
  351. # we don't know what to do with this (but it also is not expected
  352. # to happen)
  353. return
  354. ipath = item.pathobj
  355. cmdkwargs = dict()
  356. context = QMenu(parent=self._tree)
  357. if path_type == 'dataset':
  358. from .active_api import dataset_api as cmdapi
  359. submenu = context.addMenu('Dataset commands')
  360. cmdkwargs['dataset'] = ipath
  361. elif path_type == 'directory':
  362. dsroot = get_dataset_root(ipath)
  363. # path the directory path to the command's `path` argument
  364. cmdkwargs['path'] = ipath
  365. if dsroot:
  366. from .active_api import directory_in_ds_api as cmdapi
  367. # also pass dsroot
  368. cmdkwargs['dataset'] = dsroot
  369. else:
  370. from .active_api import directory_api as cmdapi
  371. submenu = context.addMenu('Directory commands')
  372. elif path_type in ('file', 'symlink'):
  373. dsroot = get_dataset_root(ipath)
  374. cmdkwargs['path'] = ipath
  375. if dsroot:
  376. from .active_api import file_in_ds_api as cmdapi
  377. cmdkwargs['dataset'] = dsroot
  378. else:
  379. from .active_api import file_api as cmdapi
  380. submenu = context.addMenu('File commands')
  381. # TODO context menu for annex'ed files
  382. add_cmd_actions_to_menu(
  383. self._tree, self._app._cmdui.configure,
  384. cmdapi,
  385. submenu,
  386. cmdkwargs,
  387. )
  388. if not context.isEmpty():
  389. # present the menu at the clicked point
  390. context.exec(self._tree.viewport().mapToGlobal(onpoint))