Recursive to/from_dict

Adds a create_from_dict function that takes the output of HasDict.to_dict and turns it into a live object, similar to our earlier to_object() method on ProjectHDFio. Adds an instantiate class method to HasDict to support this, this allows HasDictfromHDF to work and will be useful for dataclasses in the future. HasDict.to_dict now goes over the contents of what is returned from _to_dict and automatically converts any HasDict/HasHDF objects it finds. I haven't used this in downstream code yet to keep the change small, but in principle this will allow GenericJob/DataContainer to stop calling to_dict on their children explicitly and let the generic interface handle it. The rest of the changes are renaming everything to _from_dict/_to_dict and normalizing the argument name to obj_dict.
pyiron · Aug 16, 2024 · af49e2c · af49e2c
1 parent 5c946df
commit af49e2c
Show file tree

Hide file tree

Showing 11 changed files with 137 additions and 80 deletions.
diff --git a/pyiron_base/interfaces/has_dict.py b/pyiron_base/interfaces/has_dict.py
@@ -19,7 +19,7 @@
 from typing import Any
 
 from pyiron_base.interfaces.has_hdf import HasHDF
-from pyiron_base.storage.hdfio import DummyHDFio
+from pyiron_base.storage.hdfio import DummyHDFio, _extract_module_class_name, _import_class
 
 __author__ = "Jan Janssen"
 __copyright__ = (
@@ -32,12 +32,44 @@
 __status__ = "production"
 __date__ = "Dec 20, 2023"
 
+def create_from_dict(obj_dict):
+    """
+    Create and restores an object previously written as a dictionary.
+
+    Args:
+        obj_dict (dict): must be the output of HasDict.to_dict()
+
+    Returns:
+        object: restored object
+    """
+    if "TYPE" not in obj_dict:
+        raise ValueError("invalid obj_dict! must contain type information and be the output of HasDict.to_dict!")
+    type_field = obj_dict["TYPE"]
+    module_path, class_name = _extract_module_class_name(type_field)
+    class_object = _import_class(module_path, class_name)
+    version = obj_dict.get("VERSION", None)
+    obj = class_object.instantiate(obj_dict, version)
+    obj.from_dict(obj_dict, version)
+    return obj
 
 class HasDict(ABC):
     __dict_version__ = "0.1.0"
 
-    @abstractmethod
+    @classmethod
+    def instantiate(cls, obj_dict: dict, version: str = None) -> "Self":
+        return cls()
+
     def from_dict(self, obj_dict: dict, version: str = None):
+        def load(inner_dict):
+            if not isinstance(inner_dict, dict):
+                return inner_dict
+            if not all(k in inner_dict for k in ("NAME", "TYPE", "OBJECT", "DICT_VERSION")):
+                return {k: load(v) for k, v in inner_dict.items()}
+            return create_from_dict(inner_dict)
+        self._from_dict({k: load(v) for k, v in obj_dict.items()}, version)
+
+    @abstractmethod
+    def _from_dict(self, obj_dict: dict, version: str = None):
         pass
 
     @abstractmethod
@@ -66,7 +98,17 @@ def _type_to_dict(self):
         return type_dict
 
     def to_dict(self):
-        return self._to_dict() | self._type_to_dict()
+        type_dict = self._type_to_dict()
+        data_dict = {}
+        child_dict = {}
+        for k, v in self._to_dict().items():
+            if isinstance(v, HasDict):
+                child_dict[k] = v.to_dict()
+            elif isinstance(v, HasHDF):
+                child_dict[k] = HasDictfromHDF.to_dict(v)
+            else:
+                data_dict[k] = v
+        return data_dict | self._join_children_dict(child_dict) | type_dict
 
     @staticmethod
     def _join_children_dict(children: dict[str, dict[str, Any]]) -> dict[str, Any]:
@@ -114,12 +156,21 @@ class HasDictfromHDF(HasDict, HasHDF):
     their children to implmement it.
     """
 
-    def from_dict(self, obj_dict: dict, version: str = None):
+    @classmethod
+    def instantiate(cls, obj_dict: dict, version: str = None) -> "Self":
+        hdf = DummyHDFio(None, "/", obj_dict)
+        return cls(**cls.from_hdf_args(hdf))
+
+    def _from_dict(self, obj_dict: dict, version: str = None):
         # DummyHDFio(project=None) looks a bit weird, but it was added there
         # only to support saving/loading jobs which already use the HasDict
         # interface
-        hdf = DummyHDFio(None, "/", obj_dict)
-        self.from_hdf(hdf, group_name=self._get_hdf_group_name())
+        group_name = self._get_hdf_group_name()
+        if group_name is not None:
+            hdf = DummyHDFio(None, "/", {self._get_hdf_group_name(): obj_dict})
+        else:
+            hdf = DummyHDFio(None, "/", obj_dict)
+        self.from_hdf(hdf)
 
     def _to_dict(self):
         hdf = DummyHDFio(None, "/")

diff --git a/pyiron_base/jobs/datamining.py b/pyiron_base/jobs/datamining.py
@@ -657,8 +657,8 @@ def _save_output(self):
                 hdf5_output.file_name, key=hdf5_output.h5_path + "/table"
             )
 
-    def to_dict(self):
-        job_dict = super().to_dict()
+    def _to_dict(self):
+        job_dict = super()._to_dict()
         job_dict["input/bool_dict"] = {
             "enforce_update": self._enforce_update,
             "convert_to_object": self._pyiron_table.convert_to_object,
@@ -683,10 +683,10 @@ def to_dict(self):
             )
         return job_dict
 
-    def from_dict(self, job_dict):
-        super().from_dict(job_dict=job_dict)
-        if "project" in job_dict["input"].keys():
-            project_dict = job_dict["input"]["project"]
+    def _from_dict(self, obj_dict, version=None):
+        super()._from_dict(obj_dict=obj_dict, version=version)
+        if "project" in obj_dict["input"].keys():
+            project_dict = obj_dict["input"]["project"]
             if os.path.exists(project_dict["path"]):
                 project = self.project.__class__(
                     path=project_dict["path"],
@@ -700,18 +700,18 @@ def from_dict(self, job_dict):
                 self._logger.warning(
                     f"Could not instantiate analysis_project, no such path {project_dict['path']}."
                 )
-        if "filter" in job_dict["input"].keys():
+        if "filter" in obj_dict["input"].keys():
             self.pyiron_table.filter_function = _from_pickle(
-                job_dict["input"], "filter"
+                obj_dict["input"], "filter"
             )
-        if "db_filter" in job_dict["input"].keys():
+        if "db_filter" in obj_dict["input"].keys():
             self.pyiron_table.db_filter_function = _from_pickle(
-                job_dict["input"], "db_filter"
+                obj_dict["input"], "db_filter"
             )
-        bool_dict = job_dict["input"]["bool_dict"]
+        bool_dict = obj_dict["input"]["bool_dict"]
         self._enforce_update = bool_dict["enforce_update"]
         self._pyiron_table.convert_to_object = bool_dict["convert_to_object"]
-        self._pyiron_table.add._from_hdf(job_dict["input"])
+        self._pyiron_table.add._from_hdf(obj_dict["input"])
 
     def to_hdf(self, hdf=None, group_name=None):
         """

diff --git a/pyiron_base/jobs/flex/executablecontainer.py b/pyiron_base/jobs/flex/executablecontainer.py
@@ -141,14 +141,14 @@ def write_input_combo_funct(working_directory, input_dict):
             collect_output_funct=self._collect_output_funct,
         )
 
-    def to_dict(self) -> dict:
+    def _to_dict(self) -> dict:
         """
         Convert the job object to a dictionary representation.
 
         Returns:
             dict: A dictionary representation of the job object.
         """
-        job_dict = super().to_dict()
+        job_dict = super()._to_dict()
         if self._write_input_funct is not None:
             job_dict["write_input_function"] = np.void(
                 cloudpickle.dumps(self._write_input_funct)
@@ -159,20 +159,20 @@ def to_dict(self) -> dict:
             )
         return job_dict
 
-    def from_dict(self, job_dict: dict):
+    def _from_dict(self, obj_dict: dict, version=None):
         """
         Load the job attributes from a dictionary representation.
 
         Args:
-            job_dict (dict): A dictionary containing the job attributes.
+            obj_dict (dict): A dictionary containing the job attributes.
 
         """
-        super().from_dict(job_dict=job_dict)
-        if "write_input_function" in job_dict.keys():
+        super()._from_dict(obj_dict=obj_dict)
+        if "write_input_function" in obj_dict.keys():
             self._write_input_funct = cloudpickle.loads(
-                job_dict["write_input_function"]
+                obj_dict["write_input_function"]
             )
-        if "write_input_function" in job_dict.keys():
+        if "write_input_function" in obj_dict.keys():
             self._collect_output_funct = cloudpickle.loads(
-                job_dict["collect_output_function"]
+                obj_dict["collect_output_function"]
             )
diff --git a/pyiron_base/jobs/flex/pythonfunctioncontainer.py b/pyiron_base/jobs/flex/pythonfunctioncontainer.py
@@ -71,31 +71,31 @@ def __call__(self, *args, **kwargs):
         self.run()
         return self.output["result"]
 
-    def to_dict(self) -> dict:
+    def _to_dict(self) -> dict:
         """
         Convert the job object to a dictionary representation.
 
         Returns:
             dict: The dictionary representation of the job object.
         """
-        job_dict = super().to_dict()
+        job_dict = super()._to_dict()
         job_dict["function"] = np.void(cloudpickle.dumps(self._function))
         job_dict["_automatically_rename_on_save_using_input"] = (
             self._automatically_rename_on_save_using_input
         )
         return job_dict
 
-    def from_dict(self, job_dict: dict) -> None:
+    def _from_dict(self, obj_dict: dict, version=None) -> None:
         """
         Load the job object from a dictionary representation.
 
         Args:
-            job_dict (dict): The dictionary representation of the job object.
+            obj_dict (dict): The dictionary representation of the job object.
         """
-        super().from_dict(job_dict=job_dict)
-        self._function = cloudpickle.loads(job_dict["function"])
+        super()._from_dict(obj_dict=obj_dict)
+        self._function = cloudpickle.loads(obj_dict["function"])
         self._automatically_rename_on_save_using_input = bool(
-            job_dict["_automatically_rename_on_save_using_input"]
+            obj_dict["_automatically_rename_on_save_using_input"]
         )
 
     def save(self):

diff --git a/pyiron_base/jobs/job/extension/executable.py b/pyiron_base/jobs/job/extension/executable.py
@@ -51,6 +51,8 @@ def __init__(
         else:
             operation_system_nt = os.name == "nt"
 
+        if codename is None:
+            breakpoint()
         self.storage = ExecutableDataClass(
             version=None,
             name=codename.lower(),
@@ -206,10 +208,14 @@ def executable_path(self, new_path):
         else:
             self.storage.mpi = False
 
+    @classmethod
+    def instantiate(cls, obj_dict: dict, version: str = None) -> "Self":
+        return cls(codename=obj_dict["name"])
+
     def _to_dict(self):
         return asdict(self.storage)
 
-    def from_dict(self, executable_dict):
+    def _from_dict(self, obj_dict, version=None):
         data_container_keys = [
             "version",
             "name",
@@ -220,12 +226,12 @@ def from_dict(self, executable_dict):
         ]
         executable_class_dict = {}
         # Backwards compatibility; dict state used to be nested one level deeper
-        if "executable" in executable_dict.keys() and isinstance(
-            executable_dict["executable"], dict
+        if "executable" in obj_dict.keys() and isinstance(
+            obj_dict["executable"], dict
         ):
-            executable_dict = executable_dict["executable"]
+            obj_dict = obj_dict["executable"]
         for key in data_container_keys:
-            executable_class_dict[key] = executable_dict.get(key, None)
+            executable_class_dict[key] = obj_dict.get(key, None)
         self.storage = ExecutableDataClass(**executable_class_dict)
 
     def get_input_for_subprocess_call(self, cores, threads, gpus=None):

diff --git a/pyiron_base/jobs/job/extension/server/generic.py b/pyiron_base/jobs/job/extension/server/generic.py
@@ -566,23 +566,23 @@ def _to_dict(self):
         return asdict(self._data)
         return server_dict
 
-    def from_dict(self, server_dict):
+    def _from_dict(self, obj_dict, version=None):
         # backwards compatibility
-        if "new_h5" in server_dict.keys():
-            server_dict["new_hdf"] = server_dict.pop("new_h5") == 1
+        if "new_h5" in obj_dict.keys():
+            obj_dict["new_hdf"] = obj_dict.pop("new_h5") == 1
         for key in ["conda_environment_name", "conda_environment_path", "qid"]:
-            if key not in server_dict.keys():
-                server_dict[key] = None
-        if "accept_crash" not in server_dict.keys():
-            server_dict["accept_crash"] = False
-        if "additional_arguments" not in server_dict.keys():
-            server_dict["additional_arguments"] = {}
+            if key not in obj_dict.keys():
+                obj_dict[key] = None
+        if "accept_crash" not in obj_dict.keys():
+            obj_dict["accept_crash"] = False
+        if "additional_arguments" not in obj_dict.keys():
+            obj_dict["additional_arguments"] = {}
 
         # Reload dataclass
         for key in ["NAME", "TYPE", "OBJECT", "VERSION", "DICT_VERSION"]:
-            if key in server_dict.keys():
-                del server_dict[key]
-        self._data = ServerDataClass(**server_dict)
+            if key in obj_dict.keys():
+                del obj_dict[key]
+        self._data = ServerDataClass(**obj_dict)
         self._run_mode = Runmode(mode=self._data.run_mode)
 
     @deprecate(message="Use job.server.to_dict() instead of to_hdf()", version=0.9)

diff --git a/pyiron_base/jobs/job/generic.py b/pyiron_base/jobs/job/generic.py
@@ -1174,14 +1174,14 @@ def _to_dict(self):
             data_dict["files_to_compress"] = self._files_to_remove
         return data_dict
 
-    def from_dict(self, job_dict):
-        self._type_from_dict(type_dict=job_dict)
-        if "import_directory" in job_dict.keys():
-            self._import_directory = job_dict["import_directory"]
-        self._server.from_dict(server_dict=job_dict["server"])
-        if "executable" in job_dict.keys() and job_dict["executable"] is not None:
-            self.executable.from_dict(job_dict["executable"])
-        input_dict = job_dict["input"]
+    def _from_dict(self, obj_dict, version=None):
+        self._type_from_dict(type_dict=obj_dict)
+        if "import_directory" in obj_dict.keys():
+            self._import_directory = obj_dict["import_directory"]
+        self._server = obj_dict["server"]
+        if "executable" in obj_dict.keys() and obj_dict["executable"] is not None:
+            self._executable = obj_dict["executable"]
+        input_dict = obj_dict["input"]
         if "generic_dict" in input_dict.keys():
             generic_dict = input_dict["generic_dict"]
             self._restart_file_list = generic_dict["restart_file_list"]
@@ -1242,7 +1242,7 @@ def from_hdf(self, hdf=None, group_name=None):
             exe_dict = self._hdf5["executable/executable"].to_object().to_builtin()
             exe_dict["READ_ONLY"] = self._hdf5["executable/executable/READ_ONLY"]
             job_dict["executable"] = {"executable": exe_dict}
-        self.from_dict(job_dict=job_dict)
+        self.from_dict(obj_dict=job_dict)
 
     def save(self):
         """

diff --git a/pyiron_base/jobs/job/template.py b/pyiron_base/jobs/job/template.py
@@ -78,14 +78,14 @@ def input(self):
     def output(self):
         return self.storage.output
 
-    def to_dict(self):
-        job_dict = super().to_dict()
-        job_dict["input/data"] = self.storage.input.to_builtin()
+    def _to_dict(self):
+        job_dict = super()._to_dict()
+        job_dict["input/data"] = self.storage.input.to_dict()
         return job_dict
 
-    def from_dict(self, job_dict):
-        super().from_dict(job_dict=job_dict)
-        input_dict = job_dict["input"]
+    def _from_dict(self, obj_dict, version=None):
+        super()._from_dict(obj_dict=obj_dict, version=version)
+        input_dict = obj_dict["input"]
         if "data" in input_dict.keys():
             self.storage.input.update(input_dict["data"])