objectreference.py 2.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. import enum
  2. import logging
  3. from pathlib import Path
  4. from typing import (
  5. Dict,
  6. List,
  7. Tuple,
  8. )
  9. from dataladmetadatamodel.mapper.reference import none_location
  10. class GitReference(enum.Enum):
  11. TREE_VERSION_LIST = "refs/datalad/dataset-tree-version-list"
  12. UUID_SET = "refs/datalad/dataset-uuid-set"
  13. TREES = "refs/datalad/object-references/trees"
  14. BLOBS = "refs/datalad/object-references/blobs"
  15. logger = logging.getLogger("datalad.metalad.gitmapper.objectreference")
  16. cached_object_references: Dict[str, List[Tuple[str, str, str, str]]] = dict()
  17. def add_object_reference(git_reference: GitReference,
  18. flag: str,
  19. object_type: str,
  20. object_hash: str):
  21. if object_hash == none_location:
  22. logger.warning("attempt to add a None-reference")
  23. return
  24. if git_reference.value not in cached_object_references:
  25. cached_object_references[git_reference.value] = []
  26. cache_entry = (
  27. flag,
  28. object_type,
  29. object_hash,
  30. "object_reference:" + object_hash
  31. )
  32. if cache_entry not in cached_object_references[git_reference.value]:
  33. cached_object_references[git_reference.value].append(cache_entry)
  34. def flush_object_references(realm: Path):
  35. global cached_object_references
  36. from dataladmetadatamodel.mapper.gitmapper.utils import locked_backend
  37. from dataladmetadatamodel.mapper.gitmapper.gitbackend.subprocess import (
  38. git_ls_tree,
  39. git_update_ref,
  40. git_save_tree,
  41. )
  42. with locked_backend(realm):
  43. for git_reference, cached_tree_entries in cached_object_references.items():
  44. try:
  45. existing_tree_entries = [
  46. tuple(line.split())
  47. for line in git_ls_tree(str(realm), git_reference)
  48. ]
  49. except RuntimeError:
  50. existing_tree_entries = []
  51. existing_tree_entries_set = set(existing_tree_entries)
  52. existing_tree_entries_set |= set(cached_tree_entries)
  53. tree_hash = git_save_tree(str(realm), existing_tree_entries_set)
  54. git_update_ref(str(realm), git_reference, tree_hash)
  55. cached_object_references = dict()
  56. def add_tree_reference(git_reference: GitReference, object_hash: str):
  57. add_object_reference(git_reference, "040000", "tree", object_hash)
  58. def add_blob_reference(git_reference: GitReference, object_hash: str):
  59. add_object_reference(git_reference, "100644", "blob", object_hash)
  60. def remove_object_reference(*args, **kwargs):
  61. raise NotImplementedError