Scheduled service maintenance on November 22


On Friday, November 22, 2024, between 06:00 CET and 18:00 CET, GIN services will undergo planned maintenance. Extended service interruptions should be expected. We will try to keep downtimes to a minimum, but recommend that users avoid critical tasks, large data uploads, or DOI requests during this time.

We apologize for any inconvenience.

test_filetree.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. import subprocess
  2. import tempfile
  3. import time
  4. import unittest
  5. from pathlib import Path
  6. from dataladmetadatamodel.filetree import FileTree
  7. from dataladmetadatamodel.metadata import (
  8. ExtractorConfiguration,
  9. Metadata,
  10. MetadataInstance
  11. )
  12. from dataladmetadatamodel.metadatapath import MetadataPath
  13. from dataladmetadatamodel.mapper.gitmapper.objectreference import flush_object_references
  14. from dataladmetadatamodel.tests.utils import (
  15. assert_file_trees_equal,
  16. create_file_tree_with_metadata
  17. )
  18. default_paths = [
  19. MetadataPath("a/b/c"),
  20. MetadataPath("a/b/a"),
  21. MetadataPath("b"),
  22. MetadataPath("c/d/e"),
  23. MetadataPath("a/x")
  24. ]
  25. class TestFileTree(unittest.TestCase):
  26. def test_add_metadata(self):
  27. file_tree = create_file_tree_with_metadata(default_paths, [
  28. Metadata()
  29. for _ in default_paths])
  30. returned_entries = tuple(file_tree.get_paths_recursive())
  31. returned_paths = [entry[0] for entry in returned_entries]
  32. self.assertEqual(sorted(default_paths), sorted(returned_paths))
  33. for returned_path, returned_metadata in [
  34. (entry[0], entry[1])
  35. for entry in returned_entries]:
  36. self.assertEqual(returned_metadata, Metadata())
  37. def test_add_extractor_run(self):
  38. file_tree = create_file_tree_with_metadata(default_paths, [
  39. Metadata()
  40. for _ in default_paths])
  41. author_name = "Karl-Test"
  42. author_email = author_name + "@test.com"
  43. extractor_name = "test_extractor"
  44. extractor_configuration = ExtractorConfiguration(
  45. "extractor_version_1",
  46. {"key1": "value1"})
  47. metadata_content = {"key0": "this is metadata"}
  48. file_tree.add_extractor_run(
  49. default_paths[0],
  50. 1.2,
  51. extractor_name,
  52. author_name,
  53. author_email,
  54. extractor_configuration,
  55. metadata_content)
  56. metadata = file_tree.get_metadata(default_paths[0])
  57. self.assertIsNotNone(metadata)
  58. stored_metadata = metadata.extractor_runs_for_extractor(extractor_name)
  59. self.assertEqual(
  60. stored_metadata.parameter_set,
  61. [extractor_configuration])
  62. self.assertEqual(
  63. stored_metadata.instances[0],
  64. MetadataInstance(
  65. 1.2,
  66. author_name,
  67. author_email,
  68. extractor_configuration,
  69. metadata_content))
  70. class TestMapping(unittest.TestCase):
  71. def test_adding(self):
  72. # check file tree adding is working
  73. with tempfile.TemporaryDirectory() as metadata_store:
  74. subprocess.run(["git", "init", metadata_store])
  75. file_tree = create_file_tree_with_metadata(
  76. default_paths,
  77. [Metadata() for _ in default_paths])
  78. reference = file_tree.write_out(metadata_store)
  79. file_tree = FileTree(reference=reference).read_in()
  80. additional_paths = [MetadataPath(f"x/y.{n}") for n in range(10)]
  81. for additional_path in additional_paths:
  82. file_tree.add_metadata(additional_path, Metadata())
  83. reference = file_tree.write_out()
  84. file_tree = FileTree(reference=reference).read_in()
  85. read_paths = [pair[0] for pair in file_tree.get_paths_recursive()]
  86. for path in default_paths + additional_paths:
  87. self.assertIn(path, read_paths)
  88. def test_adding_to_massive_tree(self):
  89. # check file tree adding is working
  90. with tempfile.TemporaryDirectory() as metadata_store:
  91. subprocess.run(["git", "init", metadata_store])
  92. file_tree = FileTree()
  93. start_time = time.time()
  94. for first_part in range(10):
  95. for second_part in range(10):
  96. for third_part in range(10):
  97. metadata_path = MetadataPath(f"{first_part:03}/"
  98. f"{second_part:03}/"
  99. f"{third_part:03}")
  100. file_tree.add_metadata(metadata_path, Metadata())
  101. initialisation_duration = time.time() - start_time
  102. print(f"Initialised: {initialisation_duration:4f}")
  103. start_time = time.time()
  104. reference = file_tree.write_out(metadata_store)
  105. write_out_duration = time.time() - start_time
  106. print(f"Written out: {write_out_duration:4f}")
  107. start_time = time.time()
  108. file_tree = FileTree(reference=reference).read_in()
  109. read_in_duration = time.time() - start_time
  110. print(f"Read in: {read_in_duration:4f}")
  111. start_time = time.time()
  112. file_tree.add_metadata(MetadataPath("5/5/xxx"), Metadata())
  113. add_duration = time.time() - start_time
  114. print(f"Added single entry: {add_duration:4f}")
  115. start_time = time.time()
  116. file_tree.write_out()
  117. write_out_2nd_duration = time.time() - start_time
  118. print(f"Written out single entry: {write_out_2nd_duration:4f}")
  119. def test_shallow_file_tree_mapping(self):
  120. # assert that file trees content is not mapped by default
  121. with tempfile.TemporaryDirectory() as metadata_store:
  122. subprocess.run(["git", "init", metadata_store])
  123. paths = [
  124. MetadataPath("a"),
  125. MetadataPath("b")]
  126. file_tree = FileTree()
  127. for path in paths:
  128. metadata = Metadata()
  129. file_tree.add_metadata(path, metadata)
  130. file_tree.unget_metadata(metadata, metadata_store)
  131. reference = file_tree.write_out(metadata_store)
  132. flush_object_references(Path(metadata_store))
  133. new_file_tree = FileTree(reference=reference).read_in()
  134. self.assertFalse(new_file_tree.mtree.child_nodes["a"].mapped)
  135. class TestDeepCopy(unittest.TestCase):
  136. def test_copy_from_memory(self):
  137. with \
  138. tempfile.TemporaryDirectory() as original_dir, \
  139. tempfile.TemporaryDirectory() as copy_dir:
  140. subprocess.run(["git", "init", original_dir])
  141. subprocess.run(["git", "init", copy_dir])
  142. file_tree = FileTree()
  143. for path in ["/a/b/c/d", "/a/b/d", "/a/x"]:
  144. file_tree.add_metadata(
  145. MetadataPath(path),
  146. Metadata())
  147. file_tree_copy = file_tree.deepcopy(new_destination=copy_dir)
  148. assert_file_trees_equal(self, file_tree, file_tree_copy, True)
  149. def test_copy_from_backend(self):
  150. with \
  151. tempfile.TemporaryDirectory() as original_dir, \
  152. tempfile.TemporaryDirectory() as copy_dir:
  153. subprocess.run(["git", "init", original_dir])
  154. subprocess.run(["git", "init", copy_dir])
  155. paths = [
  156. MetadataPath("a/b/c/d"),
  157. MetadataPath("a/b/d"),
  158. MetadataPath("a/x")]
  159. file_tree = FileTree()
  160. for path in paths:
  161. metadata = Metadata()
  162. file_tree.add_metadata(path, metadata)
  163. file_tree.unget_metadata(metadata, original_dir)
  164. file_tree.write_out(original_dir)
  165. file_tree_copy = file_tree.deepcopy(new_destination=copy_dir)
  166. file_tree_copy.read_in()
  167. assert_file_trees_equal(self, file_tree, file_tree_copy, True)
  168. if __name__ == '__main__':
  169. unittest.main()