test_datasettree.py 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227
  1. import subprocess
  2. import tempfile
  3. import unittest
  4. from unittest.mock import patch
  5. from dataladmetadatamodel.datasettree import DatasetTree
  6. from dataladmetadatamodel.mapper.gitmapper.gitblobcache import hash_blob
  7. from dataladmetadatamodel.metadatapath import MetadataPath
  8. from dataladmetadatamodel.metadatarootrecord import MetadataRootRecord
  9. from dataladmetadatamodel.tests.utils import (
  10. get_location,
  11. get_uuid,
  12. assert_dataset_trees_equal,
  13. create_dataset_tree
  14. )
  15. file_test_paths = [
  16. MetadataPath("a/b/c"),
  17. MetadataPath("a/b/a"),
  18. MetadataPath("b"),
  19. MetadataPath("c/d/e"),
  20. MetadataPath("a/x")]
  21. dataset_test_paths = [
  22. MetadataPath("d1"),
  23. MetadataPath("d1/d1.1"),
  24. MetadataPath("d2"),
  25. MetadataPath("d2/d2.1/d2.1.1"),
  26. MetadataPath("d3/d3.1")]
  27. uuid_0 = get_uuid(0)
  28. class TestDatasetTree(unittest.TestCase):
  29. def test_add_metadata(self):
  30. paths = [
  31. MetadataPath("a/b/c"),
  32. MetadataPath("a/b/a"),
  33. MetadataPath("b"),
  34. MetadataPath("c/d/e")]
  35. mrr = MetadataRootRecord(uuid_0, "00112233", None, None)
  36. dataset_tree = create_dataset_tree(
  37. dataset_tree_paths=paths,
  38. initial_mrr=mrr)
  39. returned_entries = tuple(dataset_tree.dataset_paths)
  40. returned_paths = [entry[0] for entry in returned_entries]
  41. self.assertEqual(sorted(paths), sorted(returned_paths))
  42. for entry in returned_entries:
  43. self.assertEqual(entry[1], mrr)
  44. def test_root_node(self):
  45. dataset_tree = DatasetTree()
  46. mrr = MetadataRootRecord(uuid_0, "00112233", None, None)
  47. dataset_tree.add_dataset(MetadataPath(""), mrr)
  48. self.assertEqual(dataset_tree.get_metadata_root_record(MetadataPath("")), mrr)
  49. returned_entries = tuple(dataset_tree.dataset_paths)
  50. self.assertEqual(len(returned_entries), 1)
  51. self.assertEqual(returned_entries[0][0], MetadataPath(""))
  52. self.assertEqual(returned_entries[0][1], mrr)
  53. class TestReferenceCreation(unittest.TestCase):
  54. def test_object_reference_creation(self):
  55. dataset_tree = create_dataset_tree(dataset_test_paths)
  56. with \
  57. patch("dataladmetadatamodel.mapper.gitmapper."
  58. "metadatamapper.git_save_str") as save_str, \
  59. patch("dataladmetadatamodel.mapper.gitmapper."
  60. "mtreenodemapper.git_save_tree_node") as save_tree_node, \
  61. patch("dataladmetadatamodel.mapper.gitmapper."
  62. "metadatarootrecordmapper.git_save_json") as save_json, \
  63. patch("dataladmetadatamodel.mapper.gitmapper."
  64. "gitblobcache.git_save_file_list") as file_list_save, \
  65. patch("dataladmetadatamodel.mtreeproxy."
  66. "add_tree_reference") as add_tree_ref:
  67. save_str.return_value = get_location(1)
  68. save_tree_node.return_value = get_location(2)
  69. save_json.return_value = get_location(3)
  70. file_list_save.side_effect = lambda r, l: [
  71. hash_blob(open(e, "rb").read())
  72. for e in l
  73. ]
  74. dataset_tree.write_out("/tmp/t1")
  75. # We expect one call for the dataset-tree itself
  76. # and one call for each file-tree, one of which
  77. # is anchored at each dataset path
  78. self.assertEqual(
  79. add_tree_ref.call_count,
  80. 1 + len(dataset_test_paths)
  81. )
  82. class TestDeepCopy(unittest.TestCase):
  83. def test_copy_from_memory(self):
  84. with \
  85. tempfile.TemporaryDirectory() as original_dir, \
  86. tempfile.TemporaryDirectory() as copy_dir:
  87. subprocess.run(["git", "init", original_dir])
  88. subprocess.run(["git", "init", copy_dir])
  89. dataset_tree = create_dataset_tree(dataset_test_paths,
  90. file_test_paths)
  91. dataset_tree_copy = dataset_tree.deepcopy("git", copy_dir)
  92. dataset_tree_copy.read_in()
  93. assert_dataset_trees_equal(
  94. self,
  95. dataset_tree,
  96. dataset_tree_copy,
  97. True)
  98. def test_copy_from_backend(self):
  99. with \
  100. tempfile.TemporaryDirectory() as original_dir, \
  101. tempfile.TemporaryDirectory() as copy_dir:
  102. subprocess.run(["git", "init", original_dir])
  103. subprocess.run(["git", "init", copy_dir])
  104. dataset_tree = create_dataset_tree(dataset_test_paths,
  105. file_test_paths)
  106. dataset_tree.write_out(original_dir)
  107. dataset_tree_copy = dataset_tree.deepcopy("git", copy_dir)
  108. dataset_tree_copy.read_in()
  109. assert_dataset_trees_equal(
  110. self,
  111. dataset_tree,
  112. dataset_tree_copy,
  113. True)
  114. class TestSubTreeManipulation(unittest.TestCase):
  115. def get_mrr(self, n: int = 0) -> MetadataRootRecord:
  116. return MetadataRootRecord(uuid_0, f"00112233-{n}", None, None)
  117. def test_subtree_adding(self):
  118. mrr_1 = self.get_mrr(1)
  119. mrr_2 = self.get_mrr(2)
  120. tree = DatasetTree()
  121. tree.add_dataset(MetadataPath("a/b/c"), mrr_1)
  122. subtree = DatasetTree()
  123. subtree.add_dataset(MetadataPath("d/e/f"), mrr_2)
  124. tree.add_subtree(subtree, MetadataPath("a/x"))
  125. mrr = tree.get_metadata_root_record(MetadataPath("a/b/c"))
  126. self.assertEqual(mrr, mrr_1)
  127. mrr = tree.get_metadata_root_record(MetadataPath("a/x/d/e/f"))
  128. self.assertEqual(mrr, mrr_2)
  129. def test_subtree_adding_with_conversion(self):
  130. mrr_1 = self.get_mrr()
  131. mrr_2 = self.get_mrr()
  132. tree = DatasetTree()
  133. tree.add_dataset(MetadataPath("a/b/c"), mrr_1)
  134. subtree = DatasetTree()
  135. subtree.add_dataset(MetadataPath("e/f"), mrr_2)
  136. tree.add_subtree(subtree, MetadataPath("a/b/c/d"))
  137. node = tree.get_metadata_root_record(MetadataPath("a/b/c"))
  138. self.assertEqual(node, mrr_1)
  139. node = tree.get_metadata_root_record(MetadataPath("a/b/c/d/e/f"))
  140. self.assertEqual(node, mrr_2)
  141. def test_subtree_adding_on_existing_path(self):
  142. tree = DatasetTree()
  143. tree.add_dataset(MetadataPath("a/b/c/d"), self.get_mrr())
  144. subtree = DatasetTree()
  145. subtree.add_dataset(MetadataPath("e/f"), self.get_mrr())
  146. self.assertRaises(
  147. ValueError,
  148. tree.add_subtree,
  149. subtree, MetadataPath("a/b/c/d"))
  150. def test_subtree_deletion(self):
  151. mrr_1 = self.get_mrr()
  152. mrr_2 = self.get_mrr()
  153. tree = DatasetTree()
  154. tree.add_dataset(MetadataPath("a/b/c"), mrr_1)
  155. tree.add_dataset(MetadataPath("a/b/c/d/e/f"), mrr_2)
  156. self.assertIsNotNone(tree.get_metadata_root_record(MetadataPath("a/b/c/d/e/f")))
  157. tree.delete_subtree(MetadataPath("a/b/c/d/e/f"))
  158. self.assertIsNone(tree.get_metadata_root_record(MetadataPath("a/b/c/d/e/f")))
  159. self.assertIsNotNone(tree.mtree.get_object_at_path(MetadataPath("a/b/c/d/e")))
  160. tree.delete_subtree(MetadataPath("a/b/c"))
  161. self.assertIsNotNone(tree.mtree.get_object_at_path(MetadataPath("a/b")))
  162. self.assertIsNone(tree.get_metadata_root_record(MetadataPath("a/b/c")))
  163. self.assertIsNone(tree.get_metadata_root_record(MetadataPath("a/b/c/d")))
  164. self.assertIsNone(tree.get_metadata_root_record(MetadataPath("a/b/c/d/e")))
  165. if __name__ == '__main__':
  166. unittest.main()