Scheduled service maintenance on November 22


On Friday, November 22, 2024, between 06:00 CET and 18:00 CET, GIN services will undergo planned maintenance. Extended service interruptions should be expected. We will try to keep downtimes to a minimum, but recommend that users avoid critical tasks, large data uploads, or DOI requests during this time.

We apologize for any inconvenience.

status_light.py 9.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. """DataLad GUI status helper"""
  2. __docformat__ = 'restructuredtext'
  3. import logging
  4. from pathlib import (
  5. Path,
  6. PurePosixPath,
  7. )
  8. from datalad.interface.base import Interface
  9. from datalad.interface.base import build_doc
  10. from datalad.support.param import Parameter
  11. from datalad.interface.utils import eval_results
  12. from datalad.distribution.dataset import (
  13. EnsureDataset,
  14. resolve_path,
  15. require_dataset,
  16. )
  17. from datalad.support.constraints import (
  18. EnsureNone,
  19. )
  20. from datalad.utils import ensure_list
  21. from .lsdir import GooeyLsDir
  22. lgr = logging.getLogger('datalad.ext.gooey.status_light')
  23. @build_doc
  24. class GooeyStatusLight(Interface):
  25. """Internal helper for datalad-gooey"""
  26. _params_ = dict(
  27. dataset=Parameter(
  28. args=("-d", "--dataset"),
  29. doc="""specify the dataset to query. If
  30. no dataset is given, an attempt is made to identify the dataset
  31. based on the current working directory""",
  32. constraints=EnsureDataset() | EnsureNone()),
  33. path=Parameter(
  34. args=("path", ),
  35. doc="""""",
  36. )
  37. )
  38. @staticmethod
  39. @eval_results
  40. def __call__(dataset=None,
  41. path: Path or str or None = None,
  42. ):
  43. # This needs to be keep simple and as fast as anyhow possible.
  44. # anything that is not absolutely crucial to have should have
  45. # an inexpensive switch to turn it off (or be off by default.
  46. # This command is an internal helper of gooey, it has no ambition
  47. # to generalize, although the components it uses internally
  48. # might have applicability in a broader scope.
  49. ds = require_dataset(
  50. dataset,
  51. # in-principle a good thing, but off for speed
  52. check_installed=False,
  53. purpose='report status',
  54. )
  55. repo = ds.repo
  56. repo_path = repo.pathobj
  57. # normalize paths according to standard datalad rules
  58. paths = [resolve_path(p, dataset) for p in ensure_list(path)]
  59. # recode paths with repo reference for low-level API
  60. repo_paths = [repo_path / p.relative_to(ds.pathobj) for p in paths]
  61. assert len(paths) == 1
  62. # mapping:: repo_path -> current type
  63. modified = _get_worktree_modifications(
  64. repo,
  65. # put in repo paths!!
  66. repo_paths,
  67. )
  68. # put in repo paths!!
  69. untracked = _get_untracked(repo, repo_paths)
  70. # put in repo paths!!
  71. annex = _get_annexinfo(repo, repo_paths[0]) \
  72. if hasattr(repo, 'call_annex_records') else {}
  73. class _NoValue:
  74. pass
  75. for r in GooeyLsDir.__call__(
  76. # we put in repo paths! match against those!
  77. repo_paths[0],
  78. return_type='generator',
  79. on_failure='ignore',
  80. result_renderer='disabled'):
  81. # the status mapping use Path objects
  82. path = Path(r['path'])
  83. moreprops = dict(
  84. action='status',
  85. refds=ds.path,
  86. )
  87. modtype = modified.get(path, _NoValue)
  88. if modtype is not _NoValue:
  89. # we have a modification
  90. moreprops['state'] = \
  91. 'deleted' if modtype is None else 'modified'
  92. if modtype:
  93. # if it is None (deleted), keep the old one
  94. # as an annotation of what it was previously.
  95. # apply directly, to simplify logic below
  96. r['type'] = modtype
  97. if 'state' not in moreprops and r['type'] != 'directory':
  98. # there is not really a state for a directory in Git.
  99. # assigning one in this annotate-a-single-dir approach
  100. # would make the impression that everything underneath
  101. # is clean, which we simply do not know
  102. # there was no modification detected, so we either
  103. # have it clean or untracked
  104. moreprops['state'] = \
  105. 'untracked' if path in untracked else 'clean'
  106. # recode path into the dataset domain
  107. moreprops['path'] = str(ds.pathobj / path.relative_to(repo_path))
  108. r.update(moreprops)
  109. # pull in annex info, if there is any
  110. r.update(annex.get(path, {}))
  111. if 'key' in r and r.get('type') == 'symlink':
  112. # a symlink with a key is an annexed file
  113. r['type'] = 'file'
  114. yield r
  115. # lifted from https://github.com/datalad/datalad/pull/6797
  116. # mode identifiers used by Git (ls-files, ls-tree), mapped to
  117. # type identifiers as used in command results
  118. GIT_MODE_TYPE_MAP = {
  119. '100644': 'file',
  120. # we do not distinguish executables
  121. '100755': 'file',
  122. '040000': 'directory',
  123. '120000': 'symlink',
  124. '160000': 'dataset',
  125. }
  126. # lifted from https://github.com/datalad/datalad/pull/6797
  127. def _get_worktree_modifications(self, paths=None):
  128. """Report working tree modifications
  129. Parameters
  130. ----------
  131. paths : list or None
  132. If given, limits the query to the specified paths. To query all
  133. paths specify `None`, not an empty list.
  134. Returns
  135. -------
  136. dict
  137. Mapping of modified Paths to type labels from GIT_MODE_TYPE_MAP.
  138. Deleted paths have type `None` assigned.
  139. """
  140. # because of the way git considers smudge filters in modification
  141. # detection we have to consult two commands to get a full picture, see
  142. # https://github.com/datalad/datalad/issues/6791#issuecomment-1193145967
  143. # low-level code cannot handle pathobjs
  144. consider_paths = [str(p) for p in paths] if paths else None
  145. # first ask diff-files which can report typechanges. it gives a list with
  146. # interspersed diff info and filenames
  147. mod = list(self.call_git_items_(
  148. ['diff-files',
  149. # without this, diff-files would run a full status (recursively)
  150. # but we are at most interested in a subproject commit
  151. # change within the scope of this repo
  152. '--ignore-submodules=dirty',
  153. # hopefully making things faster by turning off features
  154. # we would not benefit from (at least for now)
  155. '--no-renames',
  156. '-z'
  157. ],
  158. files=consider_paths,
  159. sep='\0',
  160. read_only=True,
  161. ))
  162. # convert into a mapping path to type
  163. modified = dict(zip(
  164. # paths are every other element, starting from the second
  165. mod[1::2],
  166. # mark `None` for deletions, and take mode reports otherwise
  167. # (for simplicity keep leading ':' in prev mode for now)
  168. (None if spec.endswith('D') else spec.split(' ', maxsplit=2)[:2]
  169. for spec in mod[::2])
  170. ))
  171. # `diff-files` cannot give us the full answer to "what is modified"
  172. # because it won't consider what smudge filters could do, for this
  173. # we need `ls-files --modified` to exclude any paths that are not
  174. # actually modified
  175. modified_files = set(
  176. p for p in self.call_git_items_(
  177. # we need not look for deleted files, diff-files did that
  178. ['ls-files', '-z', '-m'],
  179. files=consider_paths,
  180. sep='\0',
  181. read_only=True,
  182. )
  183. # skip empty lines
  184. if p
  185. )
  186. modified = {
  187. # map to the current type, in case of a typechange
  188. # keep None for a deletion
  189. k: v if v is None else v[1]
  190. for k, v in modified.items()
  191. # a deletion
  192. if v is None
  193. # a typechange, strip the leading ":" for a valid comparison
  194. or v[0][1:] != v[1]
  195. # a plain modification after running possible smudge filters
  196. or k in modified_files
  197. }
  198. # convenience-map to type labels, leave raw mode if unrecognized
  199. # (which really should not happen)
  200. modified = {
  201. self.pathobj / PurePosixPath(k):
  202. GIT_MODE_TYPE_MAP.get(v, v) for k, v in modified.items()
  203. }
  204. return modified
  205. # lifted from https://github.com/datalad/datalad/pull/6797
  206. def _get_untracked(self, paths=None):
  207. """Report untracked content in the working tree
  208. Parameters
  209. ----------
  210. paths : list or None
  211. If given, limits the query to the specified paths. To query all
  212. paths specify `None`, not an empty list.
  213. Returns
  214. -------
  215. set
  216. of Path objects
  217. """
  218. # because of the way git considers smudge filters in modification
  219. # detection we have to consult two commands to get a full picture, see
  220. # https://github.com/datalad/datalad/issues/6791#issuecomment-1193145967
  221. # low-level code cannot handle pathobjs
  222. consider_paths = [str(p) for p in paths] if paths else None
  223. untracked_files = set(
  224. self.pathobj / PurePosixPath(p)
  225. for p in self.call_git_items_(
  226. ['ls-files', '-z', '-o'],
  227. files=consider_paths,
  228. sep='\0',
  229. read_only=True,
  230. )
  231. # skip empty lines
  232. if p
  233. )
  234. return untracked_files
  235. def _get_annexinfo(self, path):
  236. rpath = str(path.relative_to(self.path))
  237. match_prefix = f'{rpath}/' if rpath != '.' else ''
  238. return {
  239. self.pathobj / PurePosixPath(r['file']):
  240. # include the hashdirs, to enable a consumer to do a
  241. # "have-locally" check
  242. {
  243. k: r[k]
  244. for k in ('bytesize', 'key', 'hashdirlower', 'hashdirmixed')
  245. # for now exclude, but what is likely happening below is
  246. # that we hit an untracked file
  247. # lsdir() will report that too, so not much is lost,
  248. # but maybe other errors can happen too
  249. if k in r
  250. }
  251. for r in self.call_annex_records(
  252. ['find',
  253. # include any
  254. '--include', f'{match_prefix}*',
  255. # exclude any records within subdirs of rpath
  256. '--exclude', f'{match_prefix}*/*',
  257. ],
  258. files=[rpath],
  259. )
  260. }