Преглед изворни кода

NF: add initial model implementation

Christian Mönch пре 3 година
родитељ
комит
28ea17e08a

+ 85 - 0
model/connector.py

@@ -0,0 +1,85 @@
+from typing import Any, Optional
+
+from model.mapper import get_mapper
+from model.mapper.reference import Reference
+
+
+class ConnectedObject:
+    def pre_save(self, _family, _realm):
+        pass
+
+    def post_load(self, _family, _realm):
+        pass
+
+
+class Connector:
+    def __init__(self,
+                 reference: Optional[Reference],
+                 obj: ConnectedObject,
+                 is_mapped: bool,
+                 is_modified: bool):
+        self.reference = reference
+        self.object = obj
+        self.is_mapped = is_mapped
+        self.is_modified = is_modified
+
+    @classmethod
+    def from_reference(cls, reference):
+        return cls(reference, None, False, False)
+
+    @classmethod
+    def from_object(cls, obj):
+        return cls(None, obj, True, True)
+
+    @classmethod
+    def from_referenced_object(cls, reference, obj):
+        return cls(reference, obj, True, False)
+
+    def load(self, family, realm) -> Any:     # TODO: rename to load_object
+        if not self.is_mapped:
+            assert self.reference is not None
+            self.object = get_mapper(
+                self.reference.mapper_family,
+                self.reference.class_name)(realm).map(self.reference)
+            self.object.post_load(family, realm)
+            self.is_mapped = True
+        return self.object
+
+    def save(self, family, realm, force_write=False) -> Reference:   # TODO: rename to save_object
+        if self.is_mapped:
+            if self.is_modified or force_write:
+                class_name = type(self.object).__name__
+                self.object.pre_save(family, realm)
+                self.reference = Reference(
+                    family,
+                    class_name,
+                    get_mapper(
+                        family,
+                        class_name
+                    )(realm).unmap(self.object)
+                )
+                self.is_modified = False
+            return self.reference
+        raise ValueError("No object is loaded or set")
+
+    def set(self, obj):
+        self.object = obj
+        self.reference = None
+        self.is_mapped = True
+        self.is_modified = True
+
+    def purge(self):
+        if self.is_mapped and self.is_modified:
+            raise ValueError("Cannot purge unsaved modified object")
+        self.object = None
+        self.is_mapped = None
+        self.is_modified = None
+
+    def __str__(self):
+        return self.__repr__()
+
+    def __repr__(self):
+        return (
+            f"Connector(reference={repr(self.reference)}, "
+            f"obj={repr(self.object)}, is_mapped={repr(self.is_mapped)}, "
+            f"is_modified={self.is_modified})")

+ 20 - 0
model/connectordict.py

@@ -0,0 +1,20 @@
+"""
+A ConnectorDict contains a set of connectors that
+are mapped onto keys, much like a dictionary.
+
+The connector dict supports saving the bottom
+half of all its connectors
+"""
+
+
+from .connector import ConnectedObject as _ConnectedObject
+
+
+class ConnectorDict(dict, _ConnectedObject):
+    def __init__(self):
+        super(dict, self).__init__()
+
+    def save_bottom_half(self, family, realm, force_write: bool = False):
+        for key, connector in self.items():
+            print(f"ConnectorDict.save_bottom_half: saving {key}, {connector}")  # TODO: remove me
+            connector.save(family, realm, force_write)

+ 20 - 0
model/connectorlist.py

@@ -0,0 +1,20 @@
+"""
+A ConnectorList contains a list of connectors,
+much like a list.
+
+The connector list supports saving the bottom
+half of all its connectors
+"""
+
+
+from .connector import ConnectedObject as _ConnectedObject
+
+
+class ConnectorList(list, _ConnectedObject):
+    def __init__(self):
+        super(list, self).__init__()
+
+    def save_bottom_half(self, family, realm):
+        for index, connector in enumerate(self):
+            print(f"ConnectorList.save_bottom_half: saving element #{index}: {connector}")  # TODO: remove me
+            connector.unmap(family, realm)

+ 48 - 0
model/filetree.py

@@ -0,0 +1,48 @@
+from typing import Optional
+
+from .connector import ConnectedObject, Connector
+from .mapper import get_mapper
+from .metadata import Metadata
+from .treenode import TreeNode
+from .mapper.reference import Reference
+
+
+class FileTree(ConnectedObject, TreeNode):
+    def __init__(self,
+                 mapper_family: str,
+                 realm: str):
+
+        super(FileTree, self).__init__()
+        self.mapper_family = mapper_family
+        self.realm = realm
+
+    def add_directory(self, path):
+        self.add_node(path, TreeNode())
+
+    def add_metadata(self, path: str, metadata: Optional[Metadata] = None):
+        self.add_node_hierarchy(path, TreeNode(value=Connector.from_object(metadata)))
+
+    def get_metadata(self, path: str):
+        return self.get_node_at_path(path).value.load(self.mapper_family, self.realm)
+
+    def set_metadata(self, path: str, metadata: Metadata):
+        self.get_node_at_path(path).value.set(metadata)
+
+    def unget_metadata(self, path: str, force_write: bool = False):
+        value = self.get_node_at_path(path).value
+        value.unmap(self.mapper_family, self.realm, force_write)
+        value.purge()
+
+    def save(self, force_write: bool = False) -> Reference:
+        """
+        Persists all file node values, i.e. all mapped metadata,
+        if they are mapped or modified Then save the tree itself,
+        with the class mapper.
+        """
+        file_node_set = self.get_paths_recursive(False)
+        for _, _, file_node in file_node_set:
+            file_node.value.save(self.mapper_family, self.realm, force_write)
+        return Reference(
+            self.mapper_family,
+            "FileTree",
+            get_mapper(self.mapper_family, "FileTree")(self.realm).unmap(self))

+ 22 - 0
model/mapper/__init__.py

@@ -0,0 +1,22 @@
+
+from .memorymapper import MEMORY_MAPPER_FAMILY
+from .gitmapper import GIT_MAPPER_FAMILY
+
+
+MAPPER_FAMILIES = {
+    "memory": MEMORY_MAPPER_FAMILY,
+    "git": GIT_MAPPER_FAMILY
+}
+
+
+def get_mapper(mapper_family: str, class_name: str):
+    family_class_mappers = MAPPER_FAMILIES.get(mapper_family, None)
+    if family_class_mappers is None:
+        raise ValueError(f"Unknown mapper family: {mapper_family}")
+
+    class_mapper = family_class_mappers.get(class_name, None)
+    if class_mapper is None:
+        raise ValueError(
+            f"No mapper for class: '{class_name}' "
+            f"in mapper family: '{mapper_family}'")
+    return class_mapper

+ 36 - 0
model/mapper/basemapper.py

@@ -0,0 +1,36 @@
+from abc import ABCMeta, abstractmethod
+from typing import Any, Optional
+
+
+from .reference import Reference
+
+
+class BaseMapper(metaclass=ABCMeta):
+    """
+    Base class for mapper classes
+
+    Mapper classes are responsible for retrieving
+    an object based on a reference and for persisting
+    an object and returning a reference to the
+    persisted object
+    """
+
+    def __init__(self, realm: Optional[str] = None):
+        """
+        realm defines the storage container for
+        the elements that are mapped. Currently
+        a model_old instance can only be stored in
+        a single container, i.e. all objects of
+        the model_old are stored in the same container,
+        although different parts may be present
+        in memory at different times.
+        """
+        self.realm = realm
+
+    @abstractmethod
+    def map(self, reference: Reference) -> Any:
+        raise NotImplementedError
+
+    @abstractmethod
+    def unmap(self, obj) -> str:
+        raise NotImplementedError

+ 19 - 0
model/mapper/gitmapper/__init__.py

@@ -0,0 +1,19 @@
+
+from .filetreemapper import FileTreeGitMapper
+from .metadatamapper import MetadataGitMapper
+from .metadatarootrecordmapper import MetadataRootRecordGitMapper
+from .referencemapper import ReferenceGitMapper
+from .textmapper import TextGitMapper
+from .uuidsetmapper import UUIDSetGitMapper
+from .versionlistmapper import VersionListGitMapper
+
+
+GIT_MAPPER_FAMILY = {
+    "FileTree": FileTreeGitMapper,
+    "Metadata": MetadataGitMapper,
+    "MetadataRootRecord": MetadataRootRecordGitMapper,
+    "Reference": ReferenceGitMapper,
+    "Text": TextGitMapper,
+    "UUIDSet": UUIDSetGitMapper,
+    "VersionList": VersionListGitMapper
+}

+ 38 - 0
model/mapper/gitmapper/execute.py

@@ -0,0 +1,38 @@
+import shlex
+import subprocess
+from typing import Any, List, Optional, Tuple, Union
+
+
+def execute_with_output(arguments: Union[str, List[str]],
+                        file_descriptor: Any,
+                        stdin_content: Optional[Union[str, bytes]] = None) -> Any:
+
+    return subprocess.run(
+        shlex.split(arguments) if isinstance(arguments, str) else arguments,
+        input=stdin_content.encode() if isinstance(stdin_content, str) else stdin_content,
+        stdout=file_descriptor
+    )
+
+
+def execute(arguments: Union[str, List[str]],
+            stdin_content: Optional[Union[str, bytes]] = None) -> Any:
+
+    return subprocess.run(
+        shlex.split(arguments) if isinstance(arguments, str) else arguments,
+        input=stdin_content.encode() if isinstance(stdin_content, str) else stdin_content,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE)
+
+
+def checked_execute(arguments: Union[str, List[str]],
+                    stdin_content: Optional[Union[str, bytes]] = None) -> Tuple[List[str], List[str]]:
+
+    result = execute(arguments, stdin_content)
+    if result.returncode != 0:
+        raise RuntimeError(
+            f"Command failed (exit code: {result.returncode}) {' '.join(arguments)}:\n"
+            f"STDOUT:\n"
+            f"{result.stdout.decode()}"
+            f"STDERR:\n"
+            f"{result.stderr.decode()}")
+    return result.stdout.decode().splitlines(), result.stderr.decode().splitlines()

+ 40 - 0
model/mapper/gitmapper/filetreemapper.py

@@ -0,0 +1,40 @@
+
+
+from .gittools import git_load_str, git_ls_tree_recursive, git_save_str, git_save_tree
+from ..basemapper import BaseMapper
+from ..reference import Reference
+
+
+class FileTreeGitMapper(BaseMapper):
+
+    def _save(self, node: "TreeNode") -> str:
+        from model.connector import Connector
+        dir_entries = []
+        for name, child_node in node.child_nodes.items():
+            if child_node.is_leaf_node():
+                assert isinstance(child_node.value, Connector)
+                # Save connectors reference.
+                location = git_save_str(self.realm, child_node.value.reference.to_json())
+                dir_entries.append(("100644", "blob", location, name))
+            else:
+                dir_entries.append(("040000", "tree", self._save(child_node), name))
+        return git_save_tree(self.realm, dir_entries)
+
+    def map(self, ref: Reference) -> "FileTree":
+        from model.connector import Connector
+        from model.filetree import FileTree
+        from model.treenode import TreeNode
+        file_tree = FileTree("git", self.realm)
+        for line in git_ls_tree_recursive(self.realm, ref.location):
+            _, _, location, path = line.split()
+            connector = Connector.from_reference(
+                Reference.from_json(
+                    git_load_str(self.realm, location)))
+            file_tree.add_node_hierarchy(path, TreeNode(connector))
+        return file_tree
+
+    def unmap(self, obj) -> str:
+        """ Save FileTree as git file tree """
+        from model.filetree import FileTree
+        assert isinstance(obj, FileTree)
+        return self._save(obj)

+ 56 - 0
model/mapper/gitmapper/gittools.py

@@ -0,0 +1,56 @@
+import json
+from typing import Dict, List, Tuple, Union
+
+from .execute import checked_execute
+
+
+def git_command_line(repo_dir: str, command: str, arguments: List[str]) -> List[str]:
+    return [
+               "git",
+               "-P",
+               "--git-dir",
+               repo_dir + "/.git",
+               command
+           ] + arguments
+
+
+def git_text_result(cmd_line):
+    result = checked_execute(cmd_line)[0]
+    return "\n".join(result)
+
+
+def git_load_str(repo_dir, object_reference) -> str:
+    cmd_line = git_command_line(repo_dir, "show", [object_reference])
+    return git_text_result(cmd_line)
+
+
+def git_load_json(repo_dir, object_reference) -> Union[Dict, List]:
+    return json.loads(git_load_str(repo_dir, object_reference))
+
+
+def git_ls_tree(repo_dir, object_reference) -> List[str]:
+    cmd_line = git_command_line(repo_dir, "ls-tree", [object_reference])
+    return checked_execute(cmd_line)[0]
+
+
+def git_ls_tree_recursive(repo_dir, object_reference) -> List[str]:
+    cmd_line = git_command_line(repo_dir, "ls-tree", ["-r", object_reference])
+    return checked_execute(cmd_line)[0]
+
+
+def git_save_str(repo_dir, content: str) -> str:
+    cmd_line = git_command_line(repo_dir, "hash-object", ["-w", "--stdin"])
+    return checked_execute(cmd_line, stdin_content=content)[0][0]
+
+
+def git_save_json(repo_dir, json_object: Union[Dict, List]) -> str:
+    return git_save_str(repo_dir, json.dumps(json_object))
+
+
+def git_save_tree(repo_dir, entry_list: List[Tuple[str, str, str, str]]) -> str:
+    tree_spec = "\n".join([
+        f"{flag} {node_type} {object_hash}\t{name}"
+        for flag, node_type, object_hash, name in entry_list
+    ]) + "\n"
+    cmd_line = git_command_line(repo_dir, "mktree", ["--missing", ])
+    return checked_execute(cmd_line, stdin_content=tree_spec)[0][0]

+ 20 - 0
model/mapper/gitmapper/metadatamapper.py

@@ -0,0 +1,20 @@
+
+from .gittools import git_load_str, git_save_str
+from ..basemapper import BaseMapper
+from ..reference import Reference
+
+
+class MetadataGitMapper(BaseMapper):
+
+    def map(self, ref: Reference) -> "Metadata":
+        from model.metadata import Metadata
+        return Metadata.from_json(
+            git_load_str(self.realm, ref)
+        )
+
+    def unmap(self, obj) -> str:
+        from model.metadata import Metadata
+        assert isinstance(obj, Metadata)
+        return git_save_str(self.realm, obj.to_json())
+
+

+ 45 - 0
model/mapper/gitmapper/metadatarootrecordmapper.py

@@ -0,0 +1,45 @@
+from typing import Any
+
+from .gittools import git_load_json, git_save_json
+from ..basemapper import BaseMapper
+from ..reference import Reference
+
+
+class MetadataRootRecordGitMapper(BaseMapper):
+
+    def map(self, ref: Reference) -> Any:
+        from model.connector import Connector
+        from model.metadatarootrecord import MetadataRootRecord
+
+        assert isinstance(ref, Reference)
+        assert ref.mapper_family == "git"
+
+        json_object = git_load_json(self.realm, ref.location)
+        return MetadataRootRecord(
+            "git",
+            self.realm,
+            json_object["dataset_identifier"],
+            Connector.from_reference(
+                Reference.from_json(json_object["dataset_level_metadata"])
+            ),
+            Connector.from_reference(
+                Reference.from_json(json_object["file_level_metadata"])
+            )
+        )
+
+    def unmap(self, obj) -> str:
+        from model.metadatarootrecord import MetadataRootRecord
+        assert isinstance(obj, MetadataRootRecord)
+        json_object = {
+            "dataset_identifier": obj.dataset_identifier,
+            "dataset_level_metadata": obj.dataset_level_metadata.save(
+                "git",
+                self.realm
+            ).to_json(),
+            "file_level_metadata": obj.file_tree.save(
+                "git",
+                self.realm
+            ).to_json()
+        }
+        return git_save_json(self.realm, json_object)
+

+ 19 - 0
model/mapper/gitmapper/referencemapper.py

@@ -0,0 +1,19 @@
+from typing import Any
+
+from .gittools import git_load_str, git_save_str
+from ..basemapper import BaseMapper
+from ..reference import Reference
+
+
+class ReferenceGitMapper(BaseMapper):
+
+    def map(self, ref: Reference) -> Reference:
+        assert isinstance(ref, Reference)
+        assert ref.mapper_family == "git"
+        ref_json_str = git_load_str(self.realm, ref.location)
+        return Reference.from_json(ref_json_str)
+
+    def unmap(self, ref: Any) -> str:
+        assert isinstance(ref, Reference)
+        return git_save_str(self.realm, ref.to_json())
+

+ 17 - 0
model/mapper/gitmapper/textmapper.py

@@ -0,0 +1,17 @@
+
+from .gittools import git_load_str, git_save_str
+from ..basemapper import BaseMapper
+from ..reference import Reference
+
+
+class TextGitMapper(BaseMapper):
+
+    def map(self, ref: Reference) -> "Text":
+        from model.text import Text
+        content = git_load_str(self.realm, ref.location)
+        return Text(content)
+
+    def unmap(self, text) -> str:
+        from model.text import Text
+        assert isinstance(text, Text)
+        return git_save_str(self.realm, text.content)

+ 48 - 0
model/mapper/gitmapper/uuidsetmapper.py

@@ -0,0 +1,48 @@
+from typing import Any
+from uuid import UUID
+
+from .execute import checked_execute
+from .gittools import git_command_line, git_ls_tree, git_save_tree
+from ..basemapper import BaseMapper
+from ..reference import Reference
+
+
+class UUIDSetGitMapper(BaseMapper):
+
+    def map(self, ref: Reference) -> Any:
+        from model.connector import Connector
+        from model.uuidset import UUIDSet
+        assert isinstance(ref, Reference)
+        assert ref.mapper_family == "git"
+
+        initial_set = {
+            UUID(line.split()[3]): Connector.from_reference(
+                Reference("git", "VersionList", line.split()[2])
+            )
+            for line in git_ls_tree(self.realm, ref.location)
+        }
+        return UUIDSet("git", self.realm, initial_set)
+
+    def unmap(self, uuid_set: Any) -> Reference:
+        """
+        Store the data in the UUIDSet, including
+        the top-half of the connectors.
+        """
+        # Import UUIDSet here to prevent recursive imports
+        from model.uuidset import UUIDSet
+        assert isinstance(uuid_set, UUIDSet)
+
+        top_half = [
+            ("100644", "blob", version_list_connector.reference.location, str(uuid))
+            for uuid, version_list_connector in uuid_set.uuid_set.items()
+        ]
+        if not top_half:
+            raise ValueError("Cannot unmap an empty UUID")
+
+        location = git_save_tree(self.realm, top_half)
+        cmd_line = git_command_line(
+            self.realm,
+            "update-ref",
+            ["refs/develop/dataset-set", location])
+        checked_execute(cmd_line)
+        return Reference("git", "UUIDSet", "refs/develop/dataset-set")

+ 49 - 0
model/mapper/gitmapper/versionlistmapper.py

@@ -0,0 +1,49 @@
+from typing import Any
+
+from .gittools import git_load_json, git_save_json
+from ..basemapper import BaseMapper
+from ..reference import Reference
+
+
+class VersionListGitMapper(BaseMapper):
+    """
+    Map version lists to git objects.
+    The objects are blobs containing json strings that
+    define a list of primary data-metadata associations.
+    """
+
+    def map(self, ref: Reference) -> Any:
+        from model.connector import Connector
+        from model.versionlist import VersionRecord, VersionList
+        assert isinstance(ref, Reference)
+        assert ref.mapper_family == "git"
+
+        json_object = git_load_json(self.realm, ref.location)
+        version_records = {
+            pdm_assoc["primary_data_version"]: VersionRecord(
+                pdm_assoc["time_stamp"],
+                pdm_assoc["path"],
+                Connector.from_reference(
+                    Reference.from_json(pdm_assoc["metadata_root"])
+                )
+            )
+            for pdm_assoc in json_object
+        }
+        return VersionList("git", self.realm, version_records)
+
+    def unmap(self, obj: Any) -> str:
+        from model.versionlist import VersionList
+        assert isinstance(obj, VersionList)
+        json_object = [
+            {
+                "primary_data_version": primary_data_version,
+                "time_stamp": version_record.time_stamp,
+                "path": version_record.path,
+                "metadata_root": version_record.mrr_connector.save(
+                    "git",
+                    self.realm
+                ).to_json()
+            }
+            for primary_data_version, version_record in obj.version_set.items()
+        ]
+        return git_save_json(self.realm, json_object)

+ 37 - 0
model/mapper/memorymapper.py

@@ -0,0 +1,37 @@
+from collections import defaultdict
+
+from .basemapper import BaseMapper as _BaseMapper
+from .reference import Reference as _Reference
+
+
+class MemoryMapper(_BaseMapper):
+    instance = None
+
+    def __init__(self):
+        super().__init__("memory")
+        self.objects = dict()
+        self.index = 1000
+
+    def __call__(self, *args, **kwargs):
+        return self
+
+    def map(self, reference: _Reference):
+        index = int(reference.location)
+        print(f"mapper: loading reference {reference}: {self.objects[index]}")
+        return self.objects[index]
+
+    def unmap(self, obj) -> _Reference:
+        location = str(self.index)
+        print(f"mapper: saving object {obj}: {location}")
+        self.objects[self.index] = obj
+        self.index += 1
+        return _Reference("memory", type(obj).__name__, location)
+
+    @classmethod
+    def get_instance(cls):
+        if MemoryMapper.instance is None:
+            MemoryMapper.instance = MemoryMapper()
+        return MemoryMapper.instance
+
+
+MEMORY_MAPPER_FAMILY = defaultdict(MemoryMapper)

+ 42 - 0
model/mapper/reference.py

@@ -0,0 +1,42 @@
+import json
+from typing import Optional
+
+
+class Reference:
+    def __init__(self,
+                 mapper_family: str,
+                 class_name: str,
+                 location: Optional[str] = None):
+        self.mapper_family = mapper_family
+        self.class_name = class_name
+        self.location = location
+
+    def __str__(self):
+        return self.__repr__()
+
+    def __repr__(self):
+        return (
+            f"Reference(mapper_family='{self.mapper_family}', "
+            f"class_name='{self.class_name}', "
+            f"location={repr(self.location)})")
+
+    def to_json(self):  # TODO: rename to to_json_str
+        return json.dumps({
+            "@": dict(
+                type="Reference",
+                version="1.0",
+            ),
+            **dict(
+                mapper_family=self.mapper_family,
+                class_name=self.class_name,
+                location=self.location)})
+
+    @classmethod
+    def from_json(cls, json_str: str) -> "Reference":   # TODO: rename to from_json_str
+        obj = json.loads(json_str)
+        assert obj["@"]["type"] == "Reference"
+        assert obj["@"]["version"] == "1.0"
+        return cls(
+            obj["mapper_family"],
+            obj["class_name"],
+            obj["location"])

+ 192 - 0
model/metadata.py

@@ -0,0 +1,192 @@
+import json
+import time
+from typing import Dict, List, Optional, Set, Union
+
+from .connector import ConnectedObject
+from .mapper import get_mapper
+from .mapper.reference import Reference
+
+
+JSONObject = Union[List["JSONObject"], Dict[str, "JSONObject"], int, float, str]
+
+
+class ExtractorConfiguration:
+    """
+    Holds a single configuration for an extractor.
+    Ensures that a version number is at least given.
+    """
+    def __init__(self,
+                 version: str,
+                 parameter: Dict[str, JSONObject]):
+        self.version = version
+        self.parameter = parameter
+
+    def to_json_obj(self) -> JSONObject:
+        return {
+            "@": dict(
+                type="ExtractorConfiguration",
+                version="1.0"
+            ),
+            "version": self.version,
+            "parameter": self.parameter
+        }
+
+    def to_json_str(self) -> str:
+        return json.dumps(self.to_json_obj())
+
+    @classmethod
+    def from_json_obj(cls, obj: JSONObject) -> "ExtractorConfiguration":
+        assert obj["@"]["type"] == "ExtractorConfiguration"
+        assert obj["@"]["version"] == "1.0"
+        return cls(
+            obj["version"],
+            obj["parameter"]
+        )
+
+    @classmethod
+    def from_json_str(cls, json_str: str) -> "ExtractorConfiguration":
+        return cls.from_json_obj(json.loads(json_str))
+
+
+class MetadataInstance:
+    """
+    A single metadata instance. It is associated
+    with provenance information, i.e. time stamp,
+    author, author_email, with a configuration, i.e.
+    parameters, and with a source that points to
+    the metadata itself.
+    """
+    def __init__(self,
+                 time_stamp,
+                 author_name,
+                 author_email,
+                 configuration: ExtractorConfiguration,
+                 metadata_location: str):
+
+        self.time_stamp = time_stamp
+        self.author_name = author_name
+        self.author_email = author_email
+        self.configuration = configuration
+        self.metadata_location = metadata_location
+
+    def to_json_obj(self) -> JSONObject:
+        return {
+            "@": dict(
+                type="MetadataInstance",
+                version="1.0"
+            ),
+            "time_stamp": self.time_stamp,
+            "author": self.author_name,
+            "author_email": self.author_email,
+            "configuration": self.configuration.to_json_obj(),
+            "metadata_location": self.metadata_location
+        }
+
+    def to_json_str(self) -> str:
+        return json.dumps(self.to_json_obj())
+
+    @classmethod
+    def from_json_obj(cls, obj: JSONObject) -> "MetadataInstance":
+        assert obj["@"]["type"] == "MetadataInstance"
+        assert obj["@"]["version"] == "1.0"
+        return cls(
+            obj["time_stamp"],
+            obj["author"],
+            obj["author_email"],
+            ExtractorConfiguration.from_json_obj(obj["configuration"]),
+            obj["metadata_location"]
+        )
+
+    @classmethod
+    def from_json_str(cls, json_str: str) -> "MetadataInstance":
+        return cls.from_json_obj(json.loads(json_str))
+
+
+class Metadata(ConnectedObject):
+    """
+    Holds entries for all metadata of a single object.
+    Metadata is identified on the first level by its
+    format-name, i.e. the extractor-name. For each
+    extractor there is a set of configurations and
+    associated metadata, i.e. objects that contain
+    the extractor result, aka the real metadata.
+    """
+    def __init__(self,
+                 mapper_family: str,
+                 realm: str,
+                 initial_instances: Optional[Dict[str, Set[MetadataInstance]]] = None):
+
+        self.mapper_family = mapper_family
+        self.realm = realm
+        self.instances = initial_instances or dict()
+
+    def to_json(self) -> str:
+        return json.dumps({
+            "@": dict(
+                type="Metadata",
+                version="1.0"
+            ),
+            "mapper_family": self.mapper_family,
+            "realm": self.realm,
+            "instances": {
+                format_name: [
+                    instance.to_json_obj()
+                    for instance in instance_set
+                ]
+                for format_name, instance_set in self.instances.items()
+            }
+        })
+
+    def save(self) -> Reference:
+        return Reference(
+            self.mapper_family,
+            "Metadata",
+            get_mapper(self.mapper_family, "Metadata")(self.realm).unmap(self))
+
+    def get_extractor_names(self):
+        return self.instances.keys()
+
+    def get_extractor_runs(self, extractor_name: str) -> Set[MetadataInstance]:
+        return self.instances[extractor_name]
+
+    def add_extractor_run(self,
+                          time_stamp: Optional[int],
+                          extractor_name: str,
+                          author_name: str,
+                          author_email: str,
+                          configuration: ExtractorConfiguration,
+                          metadata_location: str):
+
+        instance_set = self.instances.get(extractor_name, set())
+        instance_set.add(
+            MetadataInstance(
+                (
+                    time_stamp
+                    if time_stamp is not None
+                    else int(time.time())
+                ),
+                author_name,
+                author_email,
+                configuration,
+                metadata_location
+            )
+        )
+        self.instances[extractor_name] = instance_set
+
+    @classmethod
+    def from_json(cls, json_str: str):
+        obj = json.loads(json_str)
+        assert obj["@"]["type"] == "Metadata"
+        assert obj["@"]["version"] == "1.0"
+        instances = {
+            format_name: set([
+                MetadataInstance.from_json_obj(instance_json)
+                for instance_json in instance_list_json
+            ])
+            for format_name, instance_list_json in obj["instances"].items()
+        }
+        return cls(
+            obj["mapper_family"],
+            obj["realm"],
+            instances
+        )

+ 48 - 0
model/metadatarootrecord.py

@@ -0,0 +1,48 @@
+from uuid import UUID
+
+from .connector import ConnectedObject, Connector
+from .mapper import get_mapper
+from .mapper.reference import Reference
+
+
+class MetadataRootRecord(ConnectedObject):
+    def __init__(self,
+                 mapper_family,
+                 realm,
+                 dataset_identifier: UUID,
+                 dataset_level_metadata: Connector,
+                 file_tree: Connector):
+
+        self.mapper_family = mapper_family
+        self.realm = realm
+        self.dataset_identifier = dataset_identifier
+        self.dataset_level_metadata = dataset_level_metadata
+        self.file_tree = file_tree
+
+    def save(self, force_write: bool = False) -> Reference:
+        """
+        This method persists the bottom-half of all modified
+        connectors by delegating it to the ConnectorDict. Then
+        it saves the properties of the UUIDSet and the top-half
+        of the connectors with the appropriate class mapper.
+        """
+        self.file_tree.save(self.mapper_family, self.realm, force_write)
+        self.dataset_level_metadata.save(self.mapper_family, self.realm, force_write)
+        return Reference(
+            self.mapper_family,
+            "MetadataRootRecord",
+            get_mapper(
+                self.mapper_family,
+                "MetadataRootRecord")(self.realm).unmap(self))
+
+    def set_file_tree(self, file_tree: ConnectedObject):
+        self.file_tree = Connector.from_object(file_tree)
+
+    def get_file_tree(self):
+        return self.file_tree.load(self.mapper_family, self.realm)
+
+    def set_dataset_level_metadata(self, dataset_level_metadata: ConnectedObject):
+        self.dataset_level_metadata = Connector.from_object(dataset_level_metadata)
+
+    def get_dataset_level_metadata(self):
+        return self.dataset_level_metadata.load(self.mapper_family, self.realm)

+ 32 - 0
model/tests/test_tree_node.py

@@ -0,0 +1,32 @@
+import unittest
+
+from model.treenode import TreeNode
+
+
+class TestHierarchy(unittest.TestCase):
+    def test_hierarchical_adding(self):
+        paths = ["a/b/c", "a/b/a", "b", "c/d/e"]
+        tree = TreeNode()
+
+        for path in paths:
+            tree.add_node_hierarchy(path, TreeNode(value=path))
+
+        leaf_path_infos = tree.get_paths_recursive(False)
+        leaf_path_names = [info[0] for info in leaf_path_infos]
+        self.assertEqual(sorted(paths), sorted(leaf_path_names))
+        self.assertIsNotNone(tree.get_node_at_path("a"))
+        self.assertIsNotNone(tree.get_node_at_path("a/b"))
+        self.assertIsNotNone(tree.get_node_at_path("a/b/c"))
+        self.assertIsNone(tree.get_node_at_path("a/b/c/d"))
+        self.assertIsNone(tree.get_node_at_path("a/b/x"))
+
+    def test_existing_path(self):
+        tree = TreeNode()
+        tree.add_node_hierarchy("a/b/c", TreeNode(value="test-value"))
+        self.assertRaises(ValueError, tree.add_node_hierarchy, "a/b/c", TreeNode(value="test-value"))
+        self.assertRaises(ValueError, tree.add_node_hierarchy, "a/b", TreeNode(value="test-value"))
+        self.assertRaises(ValueError, tree.add_node_hierarchy, "a", TreeNode(value="test-value"))
+
+
+if __name__ == '__main__':
+    unittest.main()

+ 13 - 0
model/text.py

@@ -0,0 +1,13 @@
+"""
+Instances of the Text class just contain
+text. Their main use is as dummy-element
+during model development.
+"""
+from dataclasses import dataclass
+
+from model.connector import ConnectedObject
+
+
+@dataclass
+class Text(ConnectedObject):
+    content: str

+ 78 - 0
model/treenode.py

@@ -0,0 +1,78 @@
+from typing import Any, List, Optional, Tuple
+
+
+class TreeNode:
+    def __init__(self,
+                 value: Optional[Any] = None):
+        self.child_nodes = dict()
+        self.value = value
+
+    def is_leaf_node(self):
+        return len(self.child_nodes) == 0
+
+    def add_node(self, name: str, new_node: "TreeNode"):
+        self.add_nodes([(name, new_node)])
+
+    def add_nodes(self, new_nodes: List[Tuple[str, "TreeNode"]]):
+        new_names = set(map(lambda new_entry: new_entry[0], new_nodes))
+        duplicated_names = set(self.child_nodes.keys()) & new_names
+        if duplicated_names:
+            raise ValueError("Name(s) already exist(s): " + ", ".join(duplicated_names))
+        self.child_nodes = {
+            **self.child_nodes,
+            **dict(new_nodes)
+        }
+
+    def add_node_hierarchy(self, path: str, new_node: "TreeNode"):
+        self._add_node_hierarchy(path.split("/"), new_node)
+
+    def _add_node_hierarchy(self, path_elements: List[str], new_node: "TreeNode"):
+        if len(path_elements) == 1:
+            self.add_node(path_elements[0], new_node)
+        else:
+            sub_node = self.child_nodes.get(path_elements[0], None)
+            if not sub_node:
+                sub_node = TreeNode()
+                self.add_node(path_elements[0], sub_node)
+            else:
+                if sub_node.is_leaf_node():
+                    raise ValueError(f"Cannot replace leaf node with name {path_elements[0]} with a directory node")
+            sub_node._add_node_hierarchy(path_elements[1:], new_node)
+
+    def add_node_hierarchies(self, new_node_hierarchies: List[Tuple[str, "TreeNode"]]):
+        for path_node_tuple in new_node_hierarchies:
+            self.add_node_hierarchy(*path_node_tuple)
+
+    def get_sub_node(self, name: str):
+        return self.child_nodes[name]
+
+    def get_node_at_path(self, path: str = "") -> Optional["TreeNode"]:
+        """ Simple linear path-search """
+        path_elements = path.split("/") if path != "" else []
+        current_node = self
+        for element in path_elements:
+            try:
+                current_node = current_node.get_sub_node(element)
+            except KeyError:
+                return None
+        return current_node
+
+    def get_paths(self):
+        return tuple(self.child_nodes.keys())
+
+    def get_paths_recursive(self, show_intermediate: Optional[bool] = False) -> List[Tuple[str, bool, Any]]:
+        if show_intermediate or self.is_leaf_node():
+            result = [("", not self.is_leaf_node(), self)]
+        else:
+            result = []
+        for child_name, child_node in self.child_nodes.items():
+            child_node_infos = child_node.get_paths_recursive(show_intermediate)
+            result += [
+                (
+                    child_name + ("/" + child_node_info[0] if child_node_info[0] else ""),
+                    child_node_info[1],
+                    child_node_info[2]
+                )
+                for child_node_info in child_node_infos
+            ]
+        return result

+ 62 - 0
model/uuidset.py

@@ -0,0 +1,62 @@
+from typing import Dict, Optional
+from uuid import UUID
+
+from .mapper import get_mapper
+from .mapper.reference import Reference
+from .versionlist import VersionList
+from .connector import ConnectedObject, Connector
+from .connectordict import ConnectorDict
+
+
+class UUIDSet(ConnectedObject):
+    def __init__(self,
+                 mapper_family: str,
+                 realm: str,
+                 initial_set: Optional[Dict[UUID, Connector]] = None):
+        self.mapper_family = mapper_family
+        self.realm = realm
+        self.uuid_set = ConnectorDict()
+        if initial_set:
+            self.uuid_set.update(initial_set)
+
+    def save(self, force_write: bool = False) -> Reference:
+        """
+        This method persists the bottom-half of all modified
+        connectors by delegating it to the ConnectorDict. Then
+        it saves the properties of the UUIDSet and the top-half
+        of the connectors with the appropriate class mapper.
+        """
+        self.uuid_set.save_bottom_half(self.mapper_family, self.realm, force_write)
+        return Reference(
+            self.mapper_family,
+            "UUIDSet",
+            get_mapper(self.mapper_family, "UUIDSet")(self.realm).unmap(self))
+
+    def uuids(self):
+        return self.uuid_set.keys()
+
+    def set_version_list(self,
+                         uuid: UUID,
+                         version_list: VersionList):
+        """
+        Set a new or updated version list for the uuid.
+        Existing references are deleted.
+        The entry is marked as dirty.
+        """
+        self.uuid_set[uuid] = Connector.from_object(version_list)
+
+    def get_version_list(self, uuid):
+        """
+        Get the version list for uuid. If it is not mapped yet,
+        it will be mapped.
+        """
+        return self.uuid_set[uuid].load(self.mapper_family, self.realm)
+
+    def unget_version_list(self, uuid, force_write: bool = False):
+        """
+        Remove a version list from memory. First, persist the
+        current status, if it was changed or force_write
+        is true.
+        """
+        self.uuid_set[uuid].unmap(self.mapper_family, self.realm, force_write)
+        self.uuid_set[uuid].purge()

+ 92 - 0
model/versionlist.py

@@ -0,0 +1,92 @@
+from typing import Dict, Optional
+
+from .connector import ConnectedObject, Connector
+from .mapper import get_mapper
+from .text import Text
+from .mapper.reference import Reference
+
+
+MetadataRootRecord = Text
+
+
+class VersionRecord:
+    def __init__(self,
+                 time_stamp: str,
+                 path: Optional[str],
+                 mrr_connector: Connector):
+        self.time_stamp = time_stamp
+        self.path = path
+        self.mrr_connector = mrr_connector
+
+
+class VersionList(ConnectedObject):
+    def __init__(self,
+                 mapper_family: str,
+                 realm: str,
+                 initial_set: Optional[Dict[str, VersionRecord]] = None):
+        self.mapper_family = mapper_family
+        self.realm = realm
+        self.version_set = initial_set or dict()
+
+    def _get_version_record(self, primary_data_version) -> VersionRecord:
+        return self.version_set[primary_data_version]
+
+    def _get_mrr_connector(self, primary_data_version) -> Connector:
+        return self._get_version_record(primary_data_version).mrr_connector
+
+    def save(self, force_write: bool = False) -> Reference:
+        """
+        This method persists the bottom-half of all modified
+        connectors by delegating it to the ConnectorDict. Then
+        it saves the properties of the VersionList and the top-half
+        of the connectors with the appropriate class mapper.
+        """
+        for primary_data_version, version_record in self.version_set.items():
+            version_record.mrr_connector.save(self.mapper_family, self.realm, force_write)
+        return Reference(
+            self.mapper_family,
+            "VersionList",
+            get_mapper(self.mapper_family, "VersionList")(self.realm).unmap(self))
+
+    def versions(self):
+        return self.version_set.keys()
+
+    def set_metadata_root_record(self,
+                                 primary_data_version: str,
+                                 time_stamp: str,
+                                 path: str,
+                                 metadata_root_record: MetadataRootRecord):
+        """
+        Set a new or updated metadata root record.
+        Existing references are deleted.
+        The entry is marked as dirty.
+        """
+        self.version_set[primary_data_version] = VersionRecord(
+            time_stamp,
+            path,
+            Connector.from_object(metadata_root_record))
+
+    def get_metadata_root_record(self, primary_data_version: str):
+        """
+        Get the metadata root record, its timestamp and path for the given version.
+        If it is not mapped yet, it will be mapped.
+        """
+        version_record = self._get_version_record(primary_data_version)
+        return (
+            version_record.time_stamp,
+            version_record.path,
+            version_record.mrr_connector.load(
+                self.mapper_family,
+                self.realm))
+
+    def unget_metadata_root_record(self,
+                                   primary_data_version: str,
+                                   force_write: bool = False):
+        """
+        Remove a metadata record from memory. First, persist the
+        current status, if it was changed or if force_write
+        is true.
+        """
+        mrr_connector = self._get_mrr_connector(primary_data_version)
+        mrr_connector.save(self.mapper_family, self.realm, force_write)
+        mrr_connector.purge()