utils.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. import os
  2. import sys
  3. from typing import Optional, Generator, Tuple
  4. from uuid import UUID
  5. from tools.metadata_creator.execute import checked_execute
  6. DATALAD_DATASET_HIDDEN_DIR_NAME = ".datalad"
  7. def get_dataset_id(path) -> Optional[UUID]:
  8. config_file_path = path + "/.datalad/config"
  9. try:
  10. with open(config_file_path) as f:
  11. for line in f.readlines():
  12. elements = line.split()
  13. if elements[:2] == ["id", "="]:
  14. return UUID(elements[2])
  15. print("WARNING: no dataset id in config file: " + config_file_path, file=sys.stderr)
  16. return None
  17. except FileNotFoundError:
  18. print("WARNING: could not open config file: " + config_file_path, file=sys.stderr)
  19. return None
  20. def has_datalad_dir(path: str) -> bool:
  21. return any(
  22. filter(
  23. lambda e: e.is_dir(follow_symlinks=False) and e.name == DATALAD_DATASET_HIDDEN_DIR_NAME,
  24. os.scandir(path)))
  25. def is_dataset_dir(entry: os.DirEntry) -> bool:
  26. return entry.is_dir(follow_symlinks=False) and has_datalad_dir(entry.path)
  27. def should_follow(entry: os.DirEntry, ignore_dot_dirs) -> bool:
  28. return (
  29. entry.is_dir(follow_symlinks=False)
  30. and not entry.name.startswith(".") or ignore_dot_dirs is False)
  31. def get_dataset_version(path) -> Optional[str]:
  32. git_dir = path + "/.git"
  33. try:
  34. return checked_execute(
  35. ["git", f"--git-dir", git_dir, "log", "-1", "--pretty=format:%H"]
  36. )[0].strip()
  37. except RuntimeError:
  38. return None
  39. def read_datasets(path: str, ignore_dot_dirs: bool = True) -> Generator[Tuple[str, os.DirEntry], None, None]:
  40. """ Return all datasets and paths """
  41. path = path.rstrip("/")
  42. if has_datalad_dir(path):
  43. path_entry = tuple(filter(lambda e: path.endswith(e.name), os.scandir(path + "/..")))[0]
  44. yield "", path_entry
  45. entries = list(os.scandir(path))
  46. while entries:
  47. entry = entries.pop()
  48. if is_dataset_dir(entry):
  49. yield entry.path[len(path) + 1:], entry
  50. if should_follow(entry, ignore_dot_dirs):
  51. entries.extend(list(os.scandir(entry.path)))