"""DataLad GUI status helper""" __docformat__ = 'restructuredtext' import logging from pathlib import ( Path, PurePosixPath, ) from datalad.interface.base import Interface from datalad.interface.base import build_doc from datalad.support.param import Parameter from datalad.interface.utils import eval_results from datalad.distribution.dataset import ( EnsureDataset, resolve_path, require_dataset, ) from datalad.support.constraints import ( EnsureNone, ) from datalad.utils import ensure_list from .lsdir import GooeyLsDir lgr = logging.getLogger('datalad.ext.gooey.status_light') @build_doc class GooeyStatusLight(Interface): """Internal helper for datalad-gooey""" _params_ = dict( dataset=Parameter( args=("-d", "--dataset"), doc="""specify the dataset to query. If no dataset is given, an attempt is made to identify the dataset based on the current working directory""", constraints=EnsureDataset() | EnsureNone()), path=Parameter( args=("path", ), doc="""""", ) ) @staticmethod @eval_results def __call__(dataset=None, path: Path or str or None = None, ): # This needs to be keep simple and as fast as anyhow possible. # anything that is not absolutely crucial to have should have # an inexpensive switch to turn it off (or be off by default. # This command is an internal helper of gooey, it has no ambition # to generalize, although the components it uses internally # might have applicability in a broader scope. ds = require_dataset( dataset, # in-principle a good thing, but off for speed check_installed=False, purpose='report status', ) repo = ds.repo repo_path = repo.pathobj # normalize paths according to standard datalad rules paths = [resolve_path(p, dataset) for p in ensure_list(path)] # recode paths with repo reference for low-level API repo_paths = [repo_path / p.relative_to(ds.pathobj) for p in paths] assert len(paths) == 1 # mapping:: repo_path -> current type modified = _get_worktree_modifications( repo, # put in repo paths!! repo_paths, ) # put in repo paths!! untracked = _get_untracked(repo, repo_paths) # put in repo paths!! annex = _get_annexinfo(repo, repo_paths[0]) \ if hasattr(repo, 'call_annex_records') else {} class _NoValue: pass for r in GooeyLsDir.__call__( # we put in repo paths! match against those! repo_paths[0], return_type='generator', on_failure='ignore', result_renderer='disabled'): # the status mapping use Path objects path = Path(r['path']) moreprops = dict( action='status', refds=ds.path, ) modtype = modified.get(path, _NoValue) if modtype is not _NoValue: # we have a modification moreprops['state'] = \ 'deleted' if modtype is None else 'modified' if modtype: # if it is None (deleted), keep the old one # as an annotation of what it was previously. # apply directly, to simplify logic below r['type'] = modtype if 'state' not in moreprops and r['type'] != 'directory': # there is not really a state for a directory in Git. # assigning one in this annotate-a-single-dir approach # would make the impression that everything underneath # is clean, which we simply do not know # there was no modification detected, so we either # have it clean or untracked moreprops['state'] = \ 'untracked' if path in untracked else 'clean' # recode path into the dataset domain moreprops['path'] = str(ds.pathobj / path.relative_to(repo_path)) r.update(moreprops) # pull in annex info, if there is any r.update(annex.get(path, {})) if 'key' in r and r.get('type') == 'symlink': # a symlink with a key is an annexed file r['type'] = 'file' yield r # lifted from https://github.com/datalad/datalad/pull/6797 # mode identifiers used by Git (ls-files, ls-tree), mapped to # type identifiers as used in command results GIT_MODE_TYPE_MAP = { '100644': 'file', # we do not distinguish executables '100755': 'file', '040000': 'directory', '120000': 'symlink', '160000': 'dataset', } # lifted from https://github.com/datalad/datalad/pull/6797 def _get_worktree_modifications(self, paths=None): """Report working tree modifications Parameters ---------- paths : list or None If given, limits the query to the specified paths. To query all paths specify `None`, not an empty list. Returns ------- dict Mapping of modified Paths to type labels from GIT_MODE_TYPE_MAP. Deleted paths have type `None` assigned. """ # because of the way git considers smudge filters in modification # detection we have to consult two commands to get a full picture, see # https://github.com/datalad/datalad/issues/6791#issuecomment-1193145967 # low-level code cannot handle pathobjs consider_paths = [str(p) for p in paths] if paths else None # first ask diff-files which can report typechanges. it gives a list with # interspersed diff info and filenames mod = list(self.call_git_items_( ['diff-files', # without this, diff-files would run a full status (recursively) # but we are at most interested in a subproject commit # change within the scope of this repo '--ignore-submodules=dirty', # hopefully making things faster by turning off features # we would not benefit from (at least for now) '--no-renames', '-z' ], files=consider_paths, sep='\0', read_only=True, )) # convert into a mapping path to type modified = dict(zip( # paths are every other element, starting from the second mod[1::2], # mark `None` for deletions, and take mode reports otherwise # (for simplicity keep leading ':' in prev mode for now) (None if spec.endswith('D') else spec.split(' ', maxsplit=2)[:2] for spec in mod[::2]) )) # `diff-files` cannot give us the full answer to "what is modified" # because it won't consider what smudge filters could do, for this # we need `ls-files --modified` to exclude any paths that are not # actually modified modified_files = set( p for p in self.call_git_items_( # we need not look for deleted files, diff-files did that ['ls-files', '-z', '-m'], files=consider_paths, sep='\0', read_only=True, ) # skip empty lines if p ) modified = { # map to the current type, in case of a typechange # keep None for a deletion k: v if v is None else v[1] for k, v in modified.items() # a deletion if v is None # a typechange, strip the leading ":" for a valid comparison or v[0][1:] != v[1] # a plain modification after running possible smudge filters or k in modified_files } # convenience-map to type labels, leave raw mode if unrecognized # (which really should not happen) modified = { self.pathobj / PurePosixPath(k): GIT_MODE_TYPE_MAP.get(v, v) for k, v in modified.items() } return modified # lifted from https://github.com/datalad/datalad/pull/6797 def _get_untracked(self, paths=None): """Report untracked content in the working tree Parameters ---------- paths : list or None If given, limits the query to the specified paths. To query all paths specify `None`, not an empty list. Returns ------- set of Path objects """ # because of the way git considers smudge filters in modification # detection we have to consult two commands to get a full picture, see # https://github.com/datalad/datalad/issues/6791#issuecomment-1193145967 # low-level code cannot handle pathobjs consider_paths = [str(p) for p in paths] if paths else None untracked_files = set( self.pathobj / PurePosixPath(p) for p in self.call_git_items_( ['ls-files', '-z', '-o'], files=consider_paths, sep='\0', read_only=True, ) # skip empty lines if p ) return untracked_files def _get_annexinfo(self, path): rpath = str(path.relative_to(self.path)) match_prefix = f'{rpath}/' if rpath != '.' else '' return { self.pathobj / PurePosixPath(r['file']): # include the hashdirs, to enable a consumer to do a # "have-locally" check { k: r[k] for k in ('bytesize', 'key', 'hashdirlower', 'hashdirmixed') # for now exclude, but what is likely happening below is # that we hit an untracked file # lsdir() will report that too, so not much is lost, # but maybe other errors can happen too if k in r } for r in self.call_annex_records( ['find', # include any '--include', f'{match_prefix}*', # exclude any records within subdirs of rpath '--exclude', f'{match_prefix}*/*', ], files=[rpath], ) }