WIP: improve virtual snapshot code

psy0rz · Sep 21, 2024 · 6e8cb79 · 6e8cb79
1 parent c5f1e38
commit 6e8cb79
Show file tree

Hide file tree

Showing 4 changed files with 133 additions and 83 deletions.
diff --git a/tests/test_zfsautobackup32.py b/tests/test_zfsautobackup32.py
@@ -163,9 +163,9 @@ def test_transfer_thinning(self):
         with mocktime("20010202000000"):
             self.assertFalse(ZfsAutobackup("test2 --allow-empty".split(" ")).run())
 
-        #will become common snapshot
         with OutputIO() as buf:
             with redirect_stdout(buf):
+                # now do thinning and transfer all at once
                 with mocktime("20010203000000"):
                     self.assertFalse(ZfsAutobackup("--keep-source=1d10d --keep-target=1m10m --allow-empty --verbose --clear-mountpoint --other-snapshots test2 test_target1".split(" ")).run())
 

diff --git a/zfs_autobackup/Thinner.py b/zfs_autobackup/Thinner.py
@@ -70,24 +70,30 @@ def thin(self, objects, keep_objects, now):
 
         # traverse objects
         for thisobject in objects:
-            # important they are ints!
-            timestamp = int(thisobject.timestamp)
-            age = int(now) - timestamp
-
-            # store in the correct time blocks, per period-size, if not too old yet
-            # e.g.: look if there is ANY timeblock that wants to keep this object
-            keep = False
-            for rule in self.rules:
-                if age <= rule.ttl:
-                    block_nr = int(timestamp / rule.period)
-                    if block_nr not in time_blocks[rule.period]:
-                        time_blocks[rule.period][block_nr] = True
-                        keep = True
-
-            # keep it according to schedule, or keep it because it is in the keep_objects list
-            if keep or thisobject in keep_objects or thisobject in always_keep_objects:
+
+            #ignore stuff without timestamp, always keep those.
+            if thisobject.timestamp is None:
                 keeps.append(thisobject)
             else:
-                removes.append(thisobject)
+
+                # important they are ints!
+                timestamp = int(thisobject.timestamp)
+                age = int(now) - timestamp
+
+                # store in the correct time blocks, per period-size, if not too old yet
+                # e.g.: look if there is ANY timeblock that wants to keep this object
+                keep = False
+                for rule in self.rules:
+                    if age <= rule.ttl:
+                        block_nr = int(timestamp / rule.period)
+                        if block_nr not in time_blocks[rule.period]:
+                            time_blocks[rule.period][block_nr] = True
+                            keep = True
+
+                # keep it according to schedule, or keep it because it is in the keep_objects list
+                if keep or thisobject in keep_objects or thisobject in always_keep_objects:
+                    keeps.append(thisobject)
+                else:
+                    removes.append(thisobject)
 
         return keeps, removes
diff --git a/zfs_autobackup/ZfsDataset.py b/zfs_autobackup/ZfsDataset.py
@@ -22,20 +22,20 @@ class ZfsDataset:
     def __init__(self, zfs_node, name, force_exists=None):
         """
         Args:
-            :type zfs_node: ZfsNode.ZfsNode
+            :type zfs_node: ZfsNode
             :type name: str
             :type force_exists: bool
         """
         self.zfs_node = zfs_node
         self.name = name  # full name
-        self._virtual_snapshots = []
         self.invalidate()
         self.force_exists = force_exists
 
     def __repr__(self):
         return "{}: {}".format(self.zfs_node, self.name)
 
     def __str__(self):
+
         return self.name
 
     def __eq__(self, obj):
@@ -76,7 +76,6 @@ def invalidate(self):
         """clear caches"""
         CachedProperty.clear(self)
         self.force_exists = None
-        self._virtual_snapshots = []
 
     def split_path(self):
         """return the path elements as an array"""
@@ -379,7 +378,7 @@ def is_changed(self, min_changed_bytes=1):
             return True
 
     def is_ours(self):
-        """return true if this snapshot name has format"""
+        """return true if this snapshot name belong to the current backup_name and snapshot formatting"""
         try:
             test = self.timestamp
         except ValueError as e:
@@ -413,8 +412,14 @@ def release(self):
     @property
     def timestamp(self):
         """get timestamp from snapshot name. Only works for our own snapshots
-        with the correct format.
+        with the correct format. Snapshots that are not ours always return None
+
+        :rtype: int|None
         """
+
+        if not self.is_ours():
+            return None
+
         dt = datetime.strptime(self.snapshot_name, self.zfs_node.snapshot_time_format)
         if sys.version_info[0] >= 3:
             from datetime import timezone
@@ -432,17 +437,6 @@ def timestamp(self):
                 seconds = time.mktime(dt.timetuple())
         return seconds
 
-    def from_names(self, names, force_exists=None):
-        """convert a list[names] to a list ZfsDatasets for this zfs_node
-
-        Args:
-            :type names: list[str]
-        """
-        ret = []
-        for name in names:
-            ret.append(self.zfs_node.get_dataset(name, force_exists))
-
-        return ret
 
     # def add_virtual_snapshot(self, snapshot):
     #     """pretend a snapshot exists (usefull in test mode)"""
@@ -463,17 +457,13 @@ def snapshots(self):
         :rtype: ZfsDataset
         """
 
-        #FIXME: dont check for existance. (currenlty needed for _add_virtual_snapshots)
-        if not self.exists:
-            return []
-
         self.debug("Getting snapshots")
 
         cmd = [
             "zfs", "list", "-d", "1", "-r", "-t", "snapshot", "-H", "-o", "name", self.name
         ]
 
-        return self.from_names(self.zfs_node.run(cmd=cmd, readonly=True), force_exists=True)
+        return self.zfs_node.get_datasets(self.zfs_node.run(cmd=cmd, readonly=True), force_exists=True)
 
     @property
     def our_snapshots(self):
@@ -572,7 +562,7 @@ def recursive_datasets(self, types="filesystem,volume"):
             "zfs", "list", "-r", "-t", types, "-o", "name", "-H", self.name
         ])
 
-        return self.from_names(names[1:], force_exists=True)
+        return self.zfs_node.get_datasets(names[1:], force_exists=True)
 
     @CachedProperty
     def datasets(self, types="filesystem,volume"):
@@ -588,7 +578,7 @@ def datasets(self, types="filesystem,volume"):
             "zfs", "list", "-r", "-t", types, "-o", "name", "-H", "-d", "1", self.name
         ])
 
-        return self.from_names(names[1:], force_exists=True)
+        return self.zfs_node.get_datasets(names[1:], force_exists=True)
 
     def send_pipe(self, features, prev_snapshot, resume_token, show_progress, raw, send_properties, write_embedded,
                   send_pipes, zfs_compressed):
@@ -865,7 +855,7 @@ def thin_list(self, keeps=None, ignores=None):
 
         snapshots = [snapshot for snapshot in self.our_snapshots if snapshot not in ignores]
 
-        return self.zfs_node.thin(snapshots, keep_objects=keeps)
+        return self.zfs_node.thin_list(snapshots, keep_snapshots=keeps)
 
     def thin(self, skip_holds=False):
         """destroys snapshots according to thin_list, except last snapshot
@@ -887,19 +877,20 @@ def find_common_snapshot(self, target_dataset, guid_check):
         an initial transfer
 
         Args:
+            :rtype: ZfsDataset|None
             :type guid_check: bool
             :type target_dataset: ZfsDataset
         """
 
-        if not target_dataset.snapshots:
+        if not target_dataset.exists or not target_dataset.snapshots:
             # target has nothing yet
             return None
         else:
             for source_snapshot in reversed(self.snapshots):
                 target_snapshot = target_dataset.find_snapshot(source_snapshot)
                 if target_snapshot:
                     if guid_check and source_snapshot.properties['guid'] != target_snapshot.properties['guid']:
-                        target_snapshot.warning("Common snapshot has invalid guid, ignoring.")
+                        target_snapshot.warning("Common snapshots have mismatching GUID, ignoring.")
                     else:
                         target_snapshot.debug("common snapshot")
                         return source_snapshot
@@ -911,6 +902,7 @@ def find_start_snapshot(self, common_snapshot, also_other_snapshots):
         find it.
 
         Args:
+            :rtype: ZfsDataset|None
             :type common_snapshot: ZfsDataset
             :type also_other_snapshots: bool
         """
@@ -976,35 +968,35 @@ def get_allowed_properties(self, filter_properties, set_properties):
 
         return allowed_filter_properties, allowed_set_properties
 
-    def _add_virtual_snapshots(self, source_dataset, source_start_snapshot, also_other_snapshots):
-        """add snapshots from source to our snapshot list. (just the in memory
-        list, no disk operations)
-
-        Args:
-            :type source_dataset: ZfsDataset
-            :type source_start_snapshot: ZfsDataset
-            :type also_other_snapshots: bool
-        """
-
-        self.debug("Creating virtual target snapshots")
-        snapshot = source_start_snapshot
-        while snapshot:
-            # create virtual target snapsho
-            # NOTE: with force_exist we're telling the dataset it doesnt exist yet. (e.g. its virtual)
-            virtual_snapshot = self.zfs_node.get_dataset(self.filesystem_name + "@" + snapshot.snapshot_name,
-                                                         force_exists=False)
-            self.snapshots.append(virtual_snapshot)
-            snapshot = source_dataset.find_next_snapshot(snapshot, also_other_snapshots)
-
-    def _pre_clean(self, common_snapshot, target_dataset, source_obsoletes, target_obsoletes, target_keeps):
+    # def _add_virtual_snapshots(self, source_dataset, source_start_snapshot, also_other_snapshots):
+    #     """add snapshots from source to our snapshot list. (just the in memory
+    #     list, no disk operations)
+    #
+    #     Args:
+    #         :type source_dataset: ZfsDataset
+    #         :type source_start_snapshot: ZfsDataset
+    #         :type also_other_snapshots: bool
+    #     """
+    #
+    #     self.debug("Creating virtual target snapshots")
+    #     snapshot = source_start_snapshot
+    #     while snapshot:
+    #         # create virtual target snapsho
+    #         # NOTE: with force_exist we're telling the dataset it doesnt exist yet. (e.g. its virtual)
+    #         virtual_snapshot = self.zfs_node.get_dataset(self.filesystem_name + "@" + snapshot.snapshot_name,
+    #                                                      force_exists=False)
+    #         self.snapshots.append(virtual_snapshot)
+    #         snapshot = source_dataset.find_next_snapshot(snapshot, also_other_snapshots)
+
+    def _pre_clean(self, common_snapshot, target_dataset, source_obsoletes, target_obsoletes, target_transfers):
         """cleanup old stuff before starting snapshot syncing
 
         Args:
             :type common_snapshot: ZfsDataset
             :type target_dataset: ZfsDataset
             :type source_obsoletes: list[ZfsDataset]
             :type target_obsoletes: list[ZfsDataset]
-            :type target_keeps: list[ZfsDataset]
+            :type target_transfers: list[ZfsDataset]
         """
 
         # on source: destroy all obsoletes before common. (since we cant send them anyways)
@@ -1020,7 +1012,7 @@ def _pre_clean(self, common_snapshot, target_dataset, source_obsoletes, target_o
                 # never destroy common snapshot
             else:
                 target_snapshot = target_dataset.find_snapshot(source_snapshot)
-                if (source_snapshot in source_obsoletes) and (before_common or (target_snapshot not in target_keeps)):
+                if (source_snapshot in source_obsoletes) and (before_common or (target_snapshot not in target_transfers)):
                     source_snapshot.destroy()
 
         # on target: destroy everything thats obsolete, except common_snapshot
@@ -1053,14 +1045,24 @@ def _validate_resume_token(self, target_dataset, start_snapshot):
                     return resume_token
 
     def _plan_sync(self, target_dataset, also_other_snapshots, guid_check, raw):
-        """plan where to start syncing and what to sync and what to keep
+        """Determine at what snapshot to start syncing to target_dataset and what to sync and what to keep.
 
         Args:
             :rtype: ( ZfsDataset, ZfsDataset, list[ZfsDataset], list[ZfsDataset], list[ZfsDataset], list[ZfsDataset] )
             :type target_dataset: ZfsDataset
             :type also_other_snapshots: bool
             :type guid_check: bool
             :type raw: bool
+
+        Returns:
+            tuple: A tuple containing:
+                - ZfsDataset: The common snapshot
+                - ZfsDataset: The start snapshot
+                - list[ZfsDataset]: Our obsolete source snapshots, after transfer is done. (will be thinned asap)
+                - list[ZfsDataset]: Our obsolete target snapshots, after transfer is done. (will be thinned asap)
+                - list[ZfsDataset]: Transfer target snapshots. These need to be transferred.
+                - list[ZfsDataset]: Incompatible target snapshots. Target snapshots that are in the way, after the common snapshot. (need to be destroyed to continue)
+
         """
 
         # determine common and start snapshot
@@ -1069,20 +1071,39 @@ def _plan_sync(self, target_dataset, also_other_snapshots, guid_check, raw):
         start_snapshot = self.find_start_snapshot(common_snapshot, also_other_snapshots)
         incompatible_target_snapshots = target_dataset.find_incompatible_snapshots(common_snapshot, raw)
 
-        # let thinner decide whats obsolete on source
+        # let thinner decide whats obsolete on source after the transfer is done, keeping the last snapshot as common.
         source_obsoletes = []
         if self.our_snapshots:
             source_obsoletes = self.thin_list(keeps=[self.our_snapshots[-1]])[1]
 
-        # let thinner decide keeps/obsoletes on target, AFTER the transfer would be done (by using virtual snapshots)
-        target_dataset._add_virtual_snapshots(self, start_snapshot, also_other_snapshots)
-        target_keeps = []
-        target_obsoletes = []
-        if target_dataset.our_snapshots:
-            (target_keeps, target_obsoletes) = target_dataset.thin_list(keeps=[target_dataset.our_snapshots[-1]],
-                                                                        ignores=incompatible_target_snapshots)
+        # A list of all our possible target snapshots ( existing - incompatible + transferrable from source  )
+        # We will use this list to let the thinner decide what to transfer to the target, and which target snapshots to destroy.
+
+        # start with snapshots that already exist, minus imcompatibles
+        if target_dataset.exists:
+            possible_target_snapshots = [snapshot for snapshot in target_dataset.snapshots if snapshot not in incompatible_target_snapshots]
+        else:
+            possible_target_snapshots = []
+
+        #Add all snapshots from the source to the target list, as a virtual snapshot that doesnt exist yet (force_exist=False)
+        source_snapshot = start_snapshot
+        while source_snapshot:
+            if also_other_snapshots or source_snapshot.is_ours():
+                # virtual target snapshot
+                target_snapshot=target_dataset.zfs_node.get_dataset(target_dataset.filesystem_name + "@" + source_snapshot.snapshot_name, force_exists=False)
+                possible_target_snapshots.append(target_snapshot)
+            source_snapshot = self.find_next_snapshot(source_snapshot, False)
+
+        #Now the thinner can decide which snapshots we want on the target, by looking at the whole picture:
+        (target_keeps, target_obsoletes)=target_dataset.zfs_node.thin_list(possible_target_snapshots, keep_snapshots=[possible_target_snapshots[-1]])
+
+        #Create a list of all the target snapshots we want, that don't exist yet
+        target_transfers=[]
+        for target_keep in target_keeps:
+            if not target_keep.exists:
+                target_transfers.append(target_keep)
 
-        return common_snapshot, start_snapshot, source_obsoletes, target_obsoletes, target_keeps, incompatible_target_snapshots
+        return common_snapshot, start_snapshot, source_obsoletes, target_obsoletes, target_transfers, incompatible_target_snapshots
 
     def handle_incompatible_snapshots(self, incompatible_target_snapshots, destroy_incompatible):
         """destroy incompatbile snapshots on target before sync, or inform user
@@ -1147,7 +1168,8 @@ def sync_snapshots(self, target_dataset, features, show_progress, filter_propert
                 # keep data encrypted by sending it raw (including properties)
                 raw = True
 
-        (common_snapshot, start_snapshot, source_obsoletes, target_obsoletes, target_keeps,
+        #note: only target_obsoletes is used during sync, to check if target doesnt want the snapshot
+        (common_snapshot, start_snapshot, source_obsoletes, target_obsoletes, target_transfers,
          incompatible_target_snapshots) = \
             self._plan_sync(target_dataset=target_dataset, also_other_snapshots=also_other_snapshots,
                             guid_check=guid_check, raw=raw)
@@ -1156,7 +1178,7 @@ def sync_snapshots(self, target_dataset, features, show_progress, filter_propert
         # Also usefull with no_send to still cleanup stuff.
         self._pre_clean(
             common_snapshot=common_snapshot, target_dataset=target_dataset,
-            target_keeps=target_keeps, target_obsoletes=target_obsoletes, source_obsoletes=source_obsoletes)
+            target_transfers=target_transfers, target_obsoletes=target_obsoletes, source_obsoletes=source_obsoletes)
 
         # handle incompatible stuff on target
         target_dataset.handle_incompatible_snapshots(incompatible_target_snapshots, destroy_incompatible)