create.py 6.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219
  1. import itertools
  2. import logging
  3. import sys
  4. from time import time
  5. from typing import Dict, List, Tuple, Union
  6. from uuid import UUID
  7. from dataladmetadatamodel.datasettree import DatasetTree
  8. from dataladmetadatamodel.filetree import FileTree
  9. from dataladmetadatamodel.metadata import ExtractorConfiguration, Metadata
  10. from dataladmetadatamodel.metadatarootrecord import MetadataRootRecord
  11. from dataladmetadatamodel.uuidset import UUIDSet
  12. from dataladmetadatamodel.versionlist import TreeVersionList, VersionList, VersionRecord
  13. JSONObject = Union[List["JSONObject"], Dict[str, "JSONObject"], int, float, str]
  14. MDC_LOGGER = logging.getLogger("metadata_creator")
  15. FILE_LEVEL_FORMATS = ["file-format-1", "file-format-2"]
  16. DATASET_LEVEL_FORMATS = ["dataset-format-1", "dataset-format-2"]
  17. def _create_tree_paths(tree_spec: List[Tuple[int, int]], upper_levels: List[int]) -> List[str]:
  18. upper_level_postfix = (
  19. "." + ".".join(map(str, upper_levels))
  20. if upper_levels
  21. else ""
  22. )
  23. if len(tree_spec) == 1:
  24. return [
  25. f"dataset{upper_level_postfix}.{node_number}"
  26. for node_number in range(tree_spec[0][0])
  27. ]
  28. # If we create a path that identifies a dataset, add it the the result list
  29. result = [
  30. f"dataset{upper_level_postfix}.{node_number}"
  31. for node_number in range(tree_spec[0][0])
  32. if node_number < tree_spec[0][1]
  33. ]
  34. return result + [
  35. f"dataset{upper_level_postfix}.{node_number}/" + sub_spec
  36. if node_number < tree_spec[0][1]
  37. else f"dir{upper_level_postfix}.{node_number}/" + sub_spec
  38. for node_number in range(tree_spec[0][0])
  39. for sub_spec in _create_tree_paths(tree_spec[1:], upper_levels + [node_number])
  40. ]
  41. def _create_file_paths(tree_spec: List[int], upper_levels: List[int]) -> List[str]:
  42. upper_level_postfix = (
  43. "." + ".".join(map(str, upper_levels))
  44. if upper_levels
  45. else ""
  46. )
  47. node_count = tree_spec[0]
  48. if len(tree_spec) == 1:
  49. return [
  50. f"file{upper_level_postfix}.{node_number}"
  51. for node_number in range(node_count)]
  52. return [
  53. f"dir{upper_level_postfix}.{node_index}/{sub_tree}"
  54. for node_index in range(node_count)
  55. for sub_tree in _create_file_paths(tree_spec[1:], upper_levels + [node_index])
  56. ]
  57. def _create_metadata(mapper_family: str,
  58. realm: str,
  59. path: str,
  60. formats: List[str],
  61. parameter_count: int,
  62. invocation_count: int) -> Metadata:
  63. metadata = Metadata(mapper_family, realm)
  64. for format in formats:
  65. parameter_lists = [
  66. {
  67. f"{format}-parameter-{invocation}.{i}": f"v-{format}.{invocation}.{i}"
  68. for i in range(parameter_count)
  69. }
  70. for invocation in range(invocation_count)
  71. ]
  72. for parameter_list in parameter_lists:
  73. extractor_configuration = ExtractorConfiguration(
  74. "1.0",
  75. parameter_list
  76. )
  77. metadata.add_extractor_run(
  78. int(time()),
  79. format,
  80. "Auto-created by create.py",
  81. "christian.moench@web.de",
  82. extractor_configuration,
  83. f'{{"type": "inline", "content": '
  84. f'"{format}({path})"}}'
  85. )
  86. return metadata
  87. def _create_file_tree(mapper_family, realm) -> FileTree:
  88. file_tree = FileTree(mapper_family, realm)
  89. file_paths = _create_file_paths([3, 4, 10], [])
  90. for path in file_paths:
  91. metadata = _create_metadata(
  92. mapper_family,
  93. realm,
  94. path,
  95. FILE_LEVEL_FORMATS,
  96. 3,
  97. 2
  98. )
  99. file_tree.add_metadata(path, metadata)
  100. return file_tree
  101. def _create_metadata_root_record(mapper_family: str,
  102. realm: str,
  103. path: str,
  104. uuid: UUID,
  105. primary_data_version: str) -> MetadataRootRecord:
  106. file_tree = _create_file_tree(mapper_family, realm)
  107. dataset_level_metadata = _create_metadata(
  108. mapper_family,
  109. realm,
  110. path,
  111. ["dataset_format_1", "dataset_format_2"],
  112. 4,
  113. 2
  114. )
  115. metadata_root_record = MetadataRootRecord(
  116. mapper_family,
  117. realm,
  118. uuid,
  119. primary_data_version,
  120. Connector.from_object(dataset_level_metadata),
  121. Connector.from_object(file_tree)
  122. )
  123. return metadata_root_record
  124. def _create_dataset_tree(mapper_family, realm) -> DatasetTree:
  125. dataset_paths = _create_tree_paths([(3, 1), (2, 1), (3, 3)], [])
  126. dataset_tree = DatasetTree(mapper_family, realm)
  127. for index, path in enumerate([""] + dataset_paths):
  128. dataset_tree.add_dataset(
  129. path,
  130. _create_metadata_root_record(
  131. mapper_family,
  132. realm,
  133. path,
  134. UUID(f"0000000000000000000000000000{index:04x}"),
  135. "000102030405060708090a0b0c0d0e0f1011{index:04x}".format(index=index)
  136. )
  137. )
  138. return dataset_tree
  139. def main(argv):
  140. _, mapper_family, realm = argv
  141. time_counter = 0
  142. version_counter = 0
  143. def get_time_str(counter: int) -> str:
  144. return str(123456789 + counter)
  145. def get_primary_data_version(counter: int) -> str:
  146. return "000102030405060708090a0b0c0d0e0f1011{counter:04x}".format(counter=counter)
  147. dataset_tree = _create_dataset_tree(mapper_family, realm)
  148. dataset_tree_version_list = TreeVersionList(initial_set={
  149. dataset_tree.value.dataset_version: VersionRecord(
  150. get_time_str(time_counter),
  151. None,
  152. Connector.from_object(dataset_tree)
  153. )
  154. })
  155. # Extract the UUID Set from the dataset tree:
  156. uuid_version_lists = {}
  157. datasets = dataset_tree.get_dataset_paths()
  158. for path, metadata_root_record in datasets:
  159. uuid_version_lists[metadata_root_record.dataset_identifier] = Connector.from_object(
  160. VersionList(initial_set={
  161. get_primary_data_version(version_counter): VersionRecord(
  162. get_time_str(time_counter),
  163. path,
  164. Connector.from_object(metadata_root_record)
  165. )
  166. })
  167. )
  168. version_counter += 1
  169. time_counter += 1
  170. uuid_set = UUIDSet(
  171. mapper_family,
  172. realm,
  173. uuid_version_lists
  174. )
  175. print(uuid_set.save())
  176. print(dataset_tree_version_list.save())
  177. if __name__ == "__main__":
  178. main(sys.argv)