Scheduled service maintenance on November 22


On Friday, November 22, 2024, between 06:00 CET and 18:00 CET, GIN services will undergo planned maintenance. Extended service interruptions should be expected. We will try to keep downtimes to a minimum, but recommend that users avoid critical tasks, large data uploads, or DOI requests during this time.

We apologize for any inconvenience.

create.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. import logging
  2. import sys
  3. from time import time
  4. from typing import Dict, List, Tuple, Union
  5. from uuid import UUID
  6. from dataladmetadatamodel.datasettree import DatasetTree
  7. from dataladmetadatamodel.filetree import FileTree
  8. from dataladmetadatamodel.metadata import ExtractorConfiguration, Metadata
  9. from dataladmetadatamodel.metadatarootrecord import MetadataRootRecord
  10. from dataladmetadatamodel.uuidset import UUIDSet
  11. from dataladmetadatamodel.versionlist import TreeVersionList, VersionList, VersionRecord
  12. JSONObject = Union[List["JSONObject"], Dict[str, "JSONObject"], int, float, str]
  13. MDC_LOGGER = logging.getLogger("metadata_creator")
  14. FILE_LEVEL_FORMATS = ["file-format-1", "file-format-2"]
  15. DATASET_LEVEL_FORMATS = ["dataset-format-1", "dataset-format-2"]
  16. def _create_tree_paths(tree_spec: List[Tuple[int, int]], upper_levels: List[int]) -> List[str]:
  17. upper_level_postfix = (
  18. "." + ".".join(map(str, upper_levels))
  19. if upper_levels
  20. else ""
  21. )
  22. if len(tree_spec) == 1:
  23. return [
  24. f"dataset{upper_level_postfix}.{node_number}"
  25. for node_number in range(tree_spec[0][0])
  26. ]
  27. # If we create a path that identifies a dataset, add it the the result list
  28. result = [
  29. f"dataset{upper_level_postfix}.{node_number}"
  30. for node_number in range(tree_spec[0][0])
  31. if node_number < tree_spec[0][1]
  32. ]
  33. return result + [
  34. f"dataset{upper_level_postfix}.{node_number}/" + sub_spec
  35. if node_number < tree_spec[0][1]
  36. else f"dir{upper_level_postfix}.{node_number}/" + sub_spec
  37. for node_number in range(tree_spec[0][0])
  38. for sub_spec in _create_tree_paths(tree_spec[1:], upper_levels + [node_number])
  39. ]
  40. def _create_file_paths(tree_spec: List[int], upper_levels: List[int]) -> List[str]:
  41. upper_level_postfix = (
  42. "." + ".".join(map(str, upper_levels))
  43. if upper_levels
  44. else ""
  45. )
  46. node_count = tree_spec[0]
  47. if len(tree_spec) == 1:
  48. return [
  49. f"file{upper_level_postfix}.{node_number}"
  50. for node_number in range(node_count)]
  51. return [
  52. f"dir{upper_level_postfix}.{node_index}/{sub_tree}"
  53. for node_index in range(node_count)
  54. for sub_tree in _create_file_paths(tree_spec[1:], upper_levels + [node_index])
  55. ]
  56. def _create_metadata(mapper_family: str,
  57. realm: str,
  58. path: str,
  59. formats: List[str],
  60. parameter_count: int,
  61. invocation_count: int) -> Metadata:
  62. metadata = Metadata(mapper_family, realm)
  63. for format in formats:
  64. parameter_lists = [
  65. {
  66. f"{format}-parameter-{invocation}.{i}": f"v-{format}.{invocation}.{i}"
  67. for i in range(parameter_count)
  68. }
  69. for invocation in range(invocation_count)
  70. ]
  71. for parameter_list in parameter_lists:
  72. extractor_configuration = ExtractorConfiguration(
  73. "1.0",
  74. parameter_list
  75. )
  76. metadata.add_extractor_run(
  77. int(time()),
  78. format,
  79. "Auto-created by create.py",
  80. "christian.moench@web.de",
  81. extractor_configuration,
  82. f'{{"type": "inline", "content": '
  83. f'"{format}({path})"}}'
  84. )
  85. return metadata
  86. def _create_file_tree(mapper_family, realm) -> FileTree:
  87. file_tree = FileTree(mapper_family, realm)
  88. file_paths = _create_file_paths([3, 4, 10], [])
  89. for path in file_paths:
  90. metadata = _create_metadata(
  91. mapper_family,
  92. realm,
  93. path,
  94. FILE_LEVEL_FORMATS,
  95. 3,
  96. 2
  97. )
  98. file_tree.add_metadata(path, metadata)
  99. return file_tree
  100. def _create_metadata_root_record(mapper_family: str,
  101. realm: str,
  102. path: str,
  103. uuid: UUID,
  104. primary_data_version: str) -> MetadataRootRecord:
  105. file_tree = _create_file_tree(mapper_family, realm)
  106. dataset_level_metadata = _create_metadata(
  107. mapper_family,
  108. realm,
  109. path,
  110. ["dataset_format_1", "dataset_format_2"],
  111. 4,
  112. 2
  113. )
  114. metadata_root_record = MetadataRootRecord(
  115. mapper_family,
  116. realm,
  117. uuid,
  118. primary_data_version,
  119. Connector.from_object(dataset_level_metadata),
  120. Connector.from_object(file_tree)
  121. )
  122. return metadata_root_record
  123. def _create_dataset_tree(realm) -> DatasetTree:
  124. dataset_paths = _create_tree_paths([(3, 1), (2, 1), (3, 3)], [])
  125. dataset_tree = DatasetTree(realm)
  126. for index, path in enumerate([""] + dataset_paths):
  127. dataset_tree.add_dataset(
  128. path,
  129. _create_metadata_root_record(
  130. realm,
  131. path,
  132. UUID(f"0000000000000000000000000000{index:04x}"),
  133. "000102030405060708090a0b0c0d0e0f1011{index:04x}".format(index=index)
  134. )
  135. )
  136. return dataset_tree
  137. def main(argv):
  138. _, mapper_family, realm = argv
  139. time_counter = 0
  140. version_counter = 0
  141. def get_time_str(counter: int) -> str:
  142. return str(123456789 + counter)
  143. def get_primary_data_version(counter: int) -> str:
  144. return "000102030405060708090a0b0c0d0e0f1011{counter:04x}".format(counter=counter)
  145. dataset_tree = _create_dataset_tree(mapper_family, realm)
  146. dataset_tree_version_list = TreeVersionList(initial_set={
  147. dataset_tree.value.dataset_version: VersionRecord(
  148. get_time_str(time_counter),
  149. None,
  150. Connector.from_object(dataset_tree)
  151. )
  152. })
  153. # Extract the UUID Set from the dataset tree:
  154. uuid_version_lists = {}
  155. for path, metadata_root_record in dataset_tree.dataset_paths:
  156. uuid_version_lists[metadata_root_record.dataset_identifier] = Connector.from_object(
  157. VersionList(initial_set={
  158. get_primary_data_version(version_counter): VersionRecord(
  159. get_time_str(time_counter),
  160. path,
  161. Connector.from_object(metadata_root_record)
  162. )
  163. })
  164. )
  165. version_counter += 1
  166. time_counter += 1
  167. uuid_set = UUIDSet(
  168. mapper_family,
  169. realm,
  170. uuid_version_lists
  171. )
  172. print(uuid_set.save())
  173. print(dataset_tree_version_list.save())
  174. if __name__ == "__main__":
  175. main(sys.argv)