12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 |
- import os
- from typing import Generator, Tuple
- from dataladmetadatamodel import JSONObject
- from dataladmetadatamodel.filetree import FileTree
- from dataladmetadatamodel.metadata import ExtractorConfiguration
- DATALAD_DATASET_HIDDEN_DIR_NAME = ".datalad"
- def is_dataset_dir(entry: os.DirEntry) -> bool:
- return any(
- filter(
- lambda x: x.is_dir(follow_symlinks=False) and x.name == DATALAD_DATASET_HIDDEN_DIR_NAME,
- os.scandir(entry.path)))
- def should_follow(entry: os.DirEntry, ignore_dot_dirs) -> bool:
- return entry.is_dir(follow_symlinks=False) \
- and (not entry.name.startswith(".") or ignore_dot_dirs is False) \
- and not is_dataset_dir(entry)
- def read_files(path: str, ignore_dot_dirs: bool = True) -> Generator[Tuple[str, os.DirEntry], None, None]:
- """ Return all sub-entries of path that are files """
- entries = list(os.scandir(path))
- while entries:
- entry = entries.pop()
- if should_follow(entry, ignore_dot_dirs):
- entries.extend(list(os.scandir(entry.path)))
- else:
- if not entry.is_dir() and not entry.name.startswith("."):
- yield entry.path[len(path) + 1:], entry
- def get_extractor_run(path: str,
- entry: os.DirEntry,
- parameter_set_count: int) -> JSONObject:
- stat = entry.stat(follow_symlinks=False)
- return {
- "info": f"file-level test metadata for parameter set #{parameter_set_count}",
- "path": path,
- "size": stat.st_size,
- "atime": stat.st_atime,
- "ctime": stat.st_ctime,
- "mtime": stat.st_mtime
- }
- def create_file_tree(mapper_family: str,
- realm: str,
- root_dir: str,
- parameter_set_count: int
- ) -> FileTree:
- file_tree = FileTree(mapper_family, realm)
- update_file_tree(
- mapper_family,
- realm,
- file_tree,
- root_dir,
- parameter_set_count
- )
- return file_tree
- def update_file_tree(mapper_family: str,
- realm: str,
- file_tree: FileTree,
- root_dir: str,
- parameter_set_count: int):
- for path, entry in read_files(root_dir):
- for count in range(parameter_set_count):
- parameters = {
- "fs_parameter_0": f"value_0.{count}",
- "fs_parameter_1": f"value_1.{count}"
- }
- file_tree.add_extractor_run(
- mapper_family,
- realm,
- path,
- None,
- "file-core-extractor",
- "metadata_creator script",
- "support@datalad.org",
- ExtractorConfiguration(
- "1.0.0",
- parameters
- ),
- get_extractor_run(path, entry, count))
|