filetreecreator.py 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
  1. import os
  2. from typing import Generator, Tuple
  3. from dataladmetadatamodel import JSONObject
  4. from dataladmetadatamodel.filetree import FileTree
  5. from dataladmetadatamodel.metadata import ExtractorConfiguration
  6. DATALAD_DATASET_HIDDEN_DIR_NAME = ".datalad"
  7. def is_dataset_dir(entry: os.DirEntry) -> bool:
  8. return any(
  9. filter(
  10. lambda x: x.is_dir(follow_symlinks=False) and x.name == DATALAD_DATASET_HIDDEN_DIR_NAME,
  11. os.scandir(entry.path)))
  12. def should_follow(entry: os.DirEntry, ignore_dot_dirs) -> bool:
  13. return entry.is_dir(follow_symlinks=False) \
  14. and (not entry.name.startswith(".") or ignore_dot_dirs is False) \
  15. and not is_dataset_dir(entry)
  16. def read_files(path: str, ignore_dot_dirs: bool = True) -> Generator[Tuple[str, os.DirEntry], None, None]:
  17. """ Return all sub-entries of path that are files """
  18. entries = list(os.scandir(path))
  19. while entries:
  20. entry = entries.pop()
  21. if should_follow(entry, ignore_dot_dirs):
  22. entries.extend(list(os.scandir(entry.path)))
  23. else:
  24. if not entry.is_dir() and not entry.name.startswith("."):
  25. yield entry.path[len(path) + 1:], entry
  26. def get_extractor_run(path: str,
  27. entry: os.DirEntry,
  28. parameter_set_count: int) -> JSONObject:
  29. stat = entry.stat(follow_symlinks=False)
  30. return {
  31. "info": f"file-level test metadata for parameter set #{parameter_set_count}",
  32. "path": path,
  33. "size": stat.st_size,
  34. "atime": stat.st_atime,
  35. "ctime": stat.st_ctime,
  36. "mtime": stat.st_mtime
  37. }
  38. def create_file_tree(mapper_family: str,
  39. realm: str,
  40. root_dir: str,
  41. parameter_set_count: int
  42. ) -> FileTree:
  43. file_tree = FileTree(mapper_family, realm)
  44. update_file_tree(
  45. mapper_family,
  46. realm,
  47. file_tree,
  48. root_dir,
  49. parameter_set_count
  50. )
  51. return file_tree
  52. def update_file_tree(mapper_family: str,
  53. realm: str,
  54. file_tree: FileTree,
  55. root_dir: str,
  56. parameter_set_count: int):
  57. for path, entry in read_files(root_dir):
  58. for count in range(parameter_set_count):
  59. parameters = {
  60. "fs_parameter_0": f"value_0.{count}",
  61. "fs_parameter_1": f"value_1.{count}"
  62. }
  63. file_tree.add_extractor_run(
  64. mapper_family,
  65. realm,
  66. path,
  67. None,
  68. "file-core-extractor",
  69. "metadata_creator script",
  70. "support@datalad.org",
  71. ExtractorConfiguration(
  72. "1.0.0",
  73. parameters
  74. ),
  75. get_extractor_run(path, entry, count))