Scheduled service maintenance on November 22


On Friday, November 22, 2024, between 06:00 CET and 18:00 CET, GIN services will undergo planned maintenance. Extended service interruptions should be expected. We will try to keep downtimes to a minimum, but recommend that users avoid critical tasks, large data uploads, or DOI requests during this time.

We apologize for any inconvenience.

metadatasource.py 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. """
  2. A representation of metadata sources.
  3. A MetadataSource objects makes the underlying
  4. storage system visible. E.g. a MetadataSource
  5. object for git-stored metadata, supports the
  6. implementation to read, write, and copy
  7. this data from, to, and between git
  8. repositories. (This is in contrast to
  9. the metadata model classes, that never
  10. deals with a backend, e.g. a git-repository,
  11. for their persistence, instead they are
  12. mapped by mappers onto a specific backend.)
  13. """
  14. import abc
  15. import json
  16. import logging
  17. import subprocess
  18. from copy import deepcopy
  19. from pathlib import Path
  20. from typing import IO, Optional
  21. from dataladmetadatamodel import (
  22. JSONObject,
  23. check_serialized_version,
  24. version_string
  25. )
  26. logger = logging.getLogger("datalad.metadata.model")
  27. class MetadataSource(abc.ABC):
  28. """
  29. Base class for all metadata source handlers
  30. """
  31. TYPE_KEY = "metadata_source_type"
  32. @abc.abstractmethod
  33. def write_object_to(self, file_descriptor):
  34. raise NotImplementedError
  35. @abc.abstractmethod
  36. def copy_object_to(self, destination: Path):
  37. raise NotImplementedError
  38. @abc.abstractmethod
  39. def to_json_obj(self) -> JSONObject:
  40. raise NotImplementedError
  41. def to_json_str(self) -> str:
  42. return json.dumps(self.to_json_obj())
  43. @staticmethod
  44. def from_json_obj(json_obj: JSONObject) -> Optional["MetadataSource"]:
  45. source_type = json_obj.get(MetadataSource.TYPE_KEY, None)
  46. if source_type is None:
  47. logger.error(
  48. f"key `{MetadataSource.TYPE_KEY}´ not found in "
  49. f"json object: {json.dumps(json_obj)}.")
  50. return None
  51. if source_type == LocalGitMetadataSource.TYPE:
  52. return LocalGitMetadataSource.from_json_obj(json_obj)
  53. elif source_type == ImmediateMetadataSource.TYPE:
  54. return ImmediateMetadataSource.from_json_obj(json_obj)
  55. else:
  56. logger.error(f"unknown metadata source type: `{source_type}´")
  57. return None
  58. @staticmethod
  59. def from_json_str(json_string: str) -> Optional["MetadataSource"]:
  60. return MetadataSource.from_json_obj(json.loads(json_string))
  61. class LocalGitMetadataSource(MetadataSource):
  62. TYPE = "LocalGitMetadataSource"
  63. def __init__(self,
  64. git_repository_path: Path,
  65. object_reference: str
  66. ):
  67. assert isinstance(object_reference, str)
  68. super().__init__()
  69. self.git_repository_path = git_repository_path
  70. self.object_reference = object_reference.strip()
  71. def __eq__(self, other):
  72. return (
  73. isinstance(other, LocalGitMetadataSource)
  74. and self.git_repository_path == other.git_repository_path
  75. and self.object_reference == other.object_reference)
  76. def write_object_to(self, file_descriptor: IO):
  77. command = f"git --git-dir {self.git_repository_path / '.git'} " \
  78. f"cat-file blob {self.object_reference}"
  79. result = subprocess.run(command, stdout=file_descriptor, shell=True)
  80. if result.returncode != 0:
  81. raise RuntimeError(
  82. f"subprocess returned {result.returncode}, "
  83. f"command: {command}")
  84. def copy_object_to(self, destination_repository: Path) -> str:
  85. """
  86. copy an object from the LocalGitMetadataSource
  87. instance into the git repository given by
  88. destination_repository.
  89. """
  90. command = f"git --git-dir {self.git_repository_path / '.git'} " \
  91. f"cat-file blob {self.object_reference}|" \
  92. f"git --git-dir {destination_repository / '.git'} " \
  93. f"hash-object -w --stdin"
  94. copied_object_reference = subprocess.check_output(
  95. command,
  96. shell=True).decode().strip()
  97. assert copied_object_reference == self.object_reference
  98. return copied_object_reference
  99. def to_json_obj(self) -> JSONObject:
  100. return {
  101. "@": dict(
  102. type="LocalGitMetadataSource",
  103. version=version_string
  104. ),
  105. MetadataSource.TYPE_KEY: LocalGitMetadataSource.TYPE,
  106. "git_repository_path": self.git_repository_path.as_posix(),
  107. "object_reference": self.object_reference}
  108. @staticmethod
  109. def from_json_obj(json_obj: JSONObject) -> Optional["LocalGitMetadataSource"]:
  110. try:
  111. assert json_obj["@"]["type"] == "LocalGitMetadataSource"
  112. check_serialized_version(json_obj)
  113. assert json_obj[MetadataSource.TYPE_KEY] == LocalGitMetadataSource.TYPE
  114. return LocalGitMetadataSource(
  115. Path(json_obj["git_repository_path"]),
  116. json_obj["object_reference"])
  117. except KeyError as key_error:
  118. logger.error(
  119. f"could not read LocalGitMetadataSource from {json_obj}, "
  120. f"reason: {key_error}")
  121. return None
  122. class ImmediateMetadataSource(MetadataSource):
  123. TYPE = "ImmediateMetadataSource"
  124. def __init__(self, content: JSONObject):
  125. super().__init__()
  126. self.content = content
  127. def __eq__(self, other):
  128. return (
  129. isinstance(other, ImmediateMetadataSource)
  130. and self.content == other.content)
  131. def copy_object_to(self, destination: Path):
  132. pass
  133. def write_object_to(self, file_descriptor: IO):
  134. json.dump(self.content, file_descriptor)
  135. def deepcopy(self):
  136. return ImmediateMetadataSource(deepcopy(self.content))
  137. def to_json_obj(self) -> JSONObject:
  138. return {
  139. "@": dict(
  140. type="ImmediateMetadataSource",
  141. version=version_string
  142. ),
  143. MetadataSource.TYPE_KEY: ImmediateMetadataSource.TYPE,
  144. "content": self.content}
  145. @staticmethod
  146. def from_json_obj(json_obj: JSONObject) -> Optional["ImmediateMetadataSource"]:
  147. try:
  148. assert json_obj["@"]["type"] == "ImmediateMetadataSource"
  149. check_serialized_version(json_obj)
  150. assert json_obj[MetadataSource.TYPE_KEY] == ImmediateMetadataSource.TYPE
  151. return ImmediateMetadataSource(json_obj["content"])
  152. except KeyError as key_error:
  153. logger.error(
  154. f"could not read ImmediateMetadataSource from {json_obj}, "
  155. f"reason: {key_error}")
  156. return None