Skip to content

Commit

Permalink
WIP: improve virtual snapshot code
Browse files Browse the repository at this point in the history
  • Loading branch information
psy0rz committed Sep 21, 2024
1 parent c5f1e38 commit 6e8cb79
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 83 deletions.
2 changes: 1 addition & 1 deletion tests/test_zfsautobackup32.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,9 +163,9 @@ def test_transfer_thinning(self):
with mocktime("20010202000000"):
self.assertFalse(ZfsAutobackup("test2 --allow-empty".split(" ")).run())

#will become common snapshot
with OutputIO() as buf:
with redirect_stdout(buf):
# now do thinning and transfer all at once
with mocktime("20010203000000"):
self.assertFalse(ZfsAutobackup("--keep-source=1d10d --keep-target=1m10m --allow-empty --verbose --clear-mountpoint --other-snapshots test2 test_target1".split(" ")).run())

Expand Down
40 changes: 23 additions & 17 deletions zfs_autobackup/Thinner.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,24 +70,30 @@ def thin(self, objects, keep_objects, now):

# traverse objects
for thisobject in objects:
# important they are ints!
timestamp = int(thisobject.timestamp)
age = int(now) - timestamp

# store in the correct time blocks, per period-size, if not too old yet
# e.g.: look if there is ANY timeblock that wants to keep this object
keep = False
for rule in self.rules:
if age <= rule.ttl:
block_nr = int(timestamp / rule.period)
if block_nr not in time_blocks[rule.period]:
time_blocks[rule.period][block_nr] = True
keep = True

# keep it according to schedule, or keep it because it is in the keep_objects list
if keep or thisobject in keep_objects or thisobject in always_keep_objects:

#ignore stuff without timestamp, always keep those.
if thisobject.timestamp is None:
keeps.append(thisobject)
else:
removes.append(thisobject)

# important they are ints!
timestamp = int(thisobject.timestamp)
age = int(now) - timestamp

# store in the correct time blocks, per period-size, if not too old yet
# e.g.: look if there is ANY timeblock that wants to keep this object
keep = False
for rule in self.rules:
if age <= rule.ttl:
block_nr = int(timestamp / rule.period)
if block_nr not in time_blocks[rule.period]:
time_blocks[rule.period][block_nr] = True
keep = True

# keep it according to schedule, or keep it because it is in the keep_objects list
if keep or thisobject in keep_objects or thisobject in always_keep_objects:
keeps.append(thisobject)
else:
removes.append(thisobject)

return keeps, removes
144 changes: 83 additions & 61 deletions zfs_autobackup/ZfsDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,20 @@ class ZfsDataset:
def __init__(self, zfs_node, name, force_exists=None):
"""
Args:
:type zfs_node: ZfsNode.ZfsNode
:type zfs_node: ZfsNode
:type name: str
:type force_exists: bool
"""
self.zfs_node = zfs_node
self.name = name # full name
self._virtual_snapshots = []
self.invalidate()
self.force_exists = force_exists

def __repr__(self):
return "{}: {}".format(self.zfs_node, self.name)

def __str__(self):

return self.name

def __eq__(self, obj):
Expand Down Expand Up @@ -76,7 +76,6 @@ def invalidate(self):
"""clear caches"""
CachedProperty.clear(self)
self.force_exists = None
self._virtual_snapshots = []

def split_path(self):
"""return the path elements as an array"""
Expand Down Expand Up @@ -379,7 +378,7 @@ def is_changed(self, min_changed_bytes=1):
return True

def is_ours(self):
"""return true if this snapshot name has format"""
"""return true if this snapshot name belong to the current backup_name and snapshot formatting"""
try:
test = self.timestamp
except ValueError as e:
Expand Down Expand Up @@ -413,8 +412,14 @@ def release(self):
@property
def timestamp(self):
"""get timestamp from snapshot name. Only works for our own snapshots
with the correct format.
with the correct format. Snapshots that are not ours always return None
:rtype: int|None
"""

if not self.is_ours():
return None

dt = datetime.strptime(self.snapshot_name, self.zfs_node.snapshot_time_format)
if sys.version_info[0] >= 3:
from datetime import timezone
Expand All @@ -432,17 +437,6 @@ def timestamp(self):
seconds = time.mktime(dt.timetuple())
return seconds

def from_names(self, names, force_exists=None):
"""convert a list[names] to a list ZfsDatasets for this zfs_node
Args:
:type names: list[str]
"""
ret = []
for name in names:
ret.append(self.zfs_node.get_dataset(name, force_exists))

return ret

# def add_virtual_snapshot(self, snapshot):
# """pretend a snapshot exists (usefull in test mode)"""
Expand All @@ -463,17 +457,13 @@ def snapshots(self):
:rtype: ZfsDataset
"""

#FIXME: dont check for existance. (currenlty needed for _add_virtual_snapshots)
if not self.exists:
return []

self.debug("Getting snapshots")

cmd = [
"zfs", "list", "-d", "1", "-r", "-t", "snapshot", "-H", "-o", "name", self.name
]

return self.from_names(self.zfs_node.run(cmd=cmd, readonly=True), force_exists=True)
return self.zfs_node.get_datasets(self.zfs_node.run(cmd=cmd, readonly=True), force_exists=True)

@property
def our_snapshots(self):
Expand Down Expand Up @@ -572,7 +562,7 @@ def recursive_datasets(self, types="filesystem,volume"):
"zfs", "list", "-r", "-t", types, "-o", "name", "-H", self.name
])

return self.from_names(names[1:], force_exists=True)
return self.zfs_node.get_datasets(names[1:], force_exists=True)

@CachedProperty
def datasets(self, types="filesystem,volume"):
Expand All @@ -588,7 +578,7 @@ def datasets(self, types="filesystem,volume"):
"zfs", "list", "-r", "-t", types, "-o", "name", "-H", "-d", "1", self.name
])

return self.from_names(names[1:], force_exists=True)
return self.zfs_node.get_datasets(names[1:], force_exists=True)

def send_pipe(self, features, prev_snapshot, resume_token, show_progress, raw, send_properties, write_embedded,
send_pipes, zfs_compressed):
Expand Down Expand Up @@ -865,7 +855,7 @@ def thin_list(self, keeps=None, ignores=None):

snapshots = [snapshot for snapshot in self.our_snapshots if snapshot not in ignores]

return self.zfs_node.thin(snapshots, keep_objects=keeps)
return self.zfs_node.thin_list(snapshots, keep_snapshots=keeps)

def thin(self, skip_holds=False):
"""destroys snapshots according to thin_list, except last snapshot
Expand All @@ -887,19 +877,20 @@ def find_common_snapshot(self, target_dataset, guid_check):
an initial transfer
Args:
:rtype: ZfsDataset|None
:type guid_check: bool
:type target_dataset: ZfsDataset
"""

if not target_dataset.snapshots:
if not target_dataset.exists or not target_dataset.snapshots:
# target has nothing yet
return None
else:
for source_snapshot in reversed(self.snapshots):
target_snapshot = target_dataset.find_snapshot(source_snapshot)
if target_snapshot:
if guid_check and source_snapshot.properties['guid'] != target_snapshot.properties['guid']:
target_snapshot.warning("Common snapshot has invalid guid, ignoring.")
target_snapshot.warning("Common snapshots have mismatching GUID, ignoring.")
else:
target_snapshot.debug("common snapshot")
return source_snapshot
Expand All @@ -911,6 +902,7 @@ def find_start_snapshot(self, common_snapshot, also_other_snapshots):
find it.
Args:
:rtype: ZfsDataset|None
:type common_snapshot: ZfsDataset
:type also_other_snapshots: bool
"""
Expand Down Expand Up @@ -976,35 +968,35 @@ def get_allowed_properties(self, filter_properties, set_properties):

return allowed_filter_properties, allowed_set_properties

def _add_virtual_snapshots(self, source_dataset, source_start_snapshot, also_other_snapshots):
"""add snapshots from source to our snapshot list. (just the in memory
list, no disk operations)
Args:
:type source_dataset: ZfsDataset
:type source_start_snapshot: ZfsDataset
:type also_other_snapshots: bool
"""

self.debug("Creating virtual target snapshots")
snapshot = source_start_snapshot
while snapshot:
# create virtual target snapsho
# NOTE: with force_exist we're telling the dataset it doesnt exist yet. (e.g. its virtual)
virtual_snapshot = self.zfs_node.get_dataset(self.filesystem_name + "@" + snapshot.snapshot_name,
force_exists=False)
self.snapshots.append(virtual_snapshot)
snapshot = source_dataset.find_next_snapshot(snapshot, also_other_snapshots)

def _pre_clean(self, common_snapshot, target_dataset, source_obsoletes, target_obsoletes, target_keeps):
# def _add_virtual_snapshots(self, source_dataset, source_start_snapshot, also_other_snapshots):
# """add snapshots from source to our snapshot list. (just the in memory
# list, no disk operations)
#
# Args:
# :type source_dataset: ZfsDataset
# :type source_start_snapshot: ZfsDataset
# :type also_other_snapshots: bool
# """
#
# self.debug("Creating virtual target snapshots")
# snapshot = source_start_snapshot
# while snapshot:
# # create virtual target snapsho
# # NOTE: with force_exist we're telling the dataset it doesnt exist yet. (e.g. its virtual)
# virtual_snapshot = self.zfs_node.get_dataset(self.filesystem_name + "@" + snapshot.snapshot_name,
# force_exists=False)
# self.snapshots.append(virtual_snapshot)
# snapshot = source_dataset.find_next_snapshot(snapshot, also_other_snapshots)

def _pre_clean(self, common_snapshot, target_dataset, source_obsoletes, target_obsoletes, target_transfers):
"""cleanup old stuff before starting snapshot syncing
Args:
:type common_snapshot: ZfsDataset
:type target_dataset: ZfsDataset
:type source_obsoletes: list[ZfsDataset]
:type target_obsoletes: list[ZfsDataset]
:type target_keeps: list[ZfsDataset]
:type target_transfers: list[ZfsDataset]
"""

# on source: destroy all obsoletes before common. (since we cant send them anyways)
Expand All @@ -1020,7 +1012,7 @@ def _pre_clean(self, common_snapshot, target_dataset, source_obsoletes, target_o
# never destroy common snapshot
else:
target_snapshot = target_dataset.find_snapshot(source_snapshot)
if (source_snapshot in source_obsoletes) and (before_common or (target_snapshot not in target_keeps)):
if (source_snapshot in source_obsoletes) and (before_common or (target_snapshot not in target_transfers)):
source_snapshot.destroy()

# on target: destroy everything thats obsolete, except common_snapshot
Expand Down Expand Up @@ -1053,14 +1045,24 @@ def _validate_resume_token(self, target_dataset, start_snapshot):
return resume_token

def _plan_sync(self, target_dataset, also_other_snapshots, guid_check, raw):
"""plan where to start syncing and what to sync and what to keep
"""Determine at what snapshot to start syncing to target_dataset and what to sync and what to keep.
Args:
:rtype: ( ZfsDataset, ZfsDataset, list[ZfsDataset], list[ZfsDataset], list[ZfsDataset], list[ZfsDataset] )
:type target_dataset: ZfsDataset
:type also_other_snapshots: bool
:type guid_check: bool
:type raw: bool
Returns:
tuple: A tuple containing:
- ZfsDataset: The common snapshot
- ZfsDataset: The start snapshot
- list[ZfsDataset]: Our obsolete source snapshots, after transfer is done. (will be thinned asap)
- list[ZfsDataset]: Our obsolete target snapshots, after transfer is done. (will be thinned asap)
- list[ZfsDataset]: Transfer target snapshots. These need to be transferred.
- list[ZfsDataset]: Incompatible target snapshots. Target snapshots that are in the way, after the common snapshot. (need to be destroyed to continue)
"""

# determine common and start snapshot
Expand All @@ -1069,20 +1071,39 @@ def _plan_sync(self, target_dataset, also_other_snapshots, guid_check, raw):
start_snapshot = self.find_start_snapshot(common_snapshot, also_other_snapshots)
incompatible_target_snapshots = target_dataset.find_incompatible_snapshots(common_snapshot, raw)

# let thinner decide whats obsolete on source
# let thinner decide whats obsolete on source after the transfer is done, keeping the last snapshot as common.
source_obsoletes = []
if self.our_snapshots:
source_obsoletes = self.thin_list(keeps=[self.our_snapshots[-1]])[1]

# let thinner decide keeps/obsoletes on target, AFTER the transfer would be done (by using virtual snapshots)
target_dataset._add_virtual_snapshots(self, start_snapshot, also_other_snapshots)
target_keeps = []
target_obsoletes = []
if target_dataset.our_snapshots:
(target_keeps, target_obsoletes) = target_dataset.thin_list(keeps=[target_dataset.our_snapshots[-1]],
ignores=incompatible_target_snapshots)
# A list of all our possible target snapshots ( existing - incompatible + transferrable from source )
# We will use this list to let the thinner decide what to transfer to the target, and which target snapshots to destroy.

# start with snapshots that already exist, minus imcompatibles
if target_dataset.exists:
possible_target_snapshots = [snapshot for snapshot in target_dataset.snapshots if snapshot not in incompatible_target_snapshots]
else:
possible_target_snapshots = []

#Add all snapshots from the source to the target list, as a virtual snapshot that doesnt exist yet (force_exist=False)
source_snapshot = start_snapshot
while source_snapshot:
if also_other_snapshots or source_snapshot.is_ours():
# virtual target snapshot
target_snapshot=target_dataset.zfs_node.get_dataset(target_dataset.filesystem_name + "@" + source_snapshot.snapshot_name, force_exists=False)
possible_target_snapshots.append(target_snapshot)
source_snapshot = self.find_next_snapshot(source_snapshot, False)

#Now the thinner can decide which snapshots we want on the target, by looking at the whole picture:
(target_keeps, target_obsoletes)=target_dataset.zfs_node.thin_list(possible_target_snapshots, keep_snapshots=[possible_target_snapshots[-1]])

#Create a list of all the target snapshots we want, that don't exist yet
target_transfers=[]
for target_keep in target_keeps:
if not target_keep.exists:
target_transfers.append(target_keep)

return common_snapshot, start_snapshot, source_obsoletes, target_obsoletes, target_keeps, incompatible_target_snapshots
return common_snapshot, start_snapshot, source_obsoletes, target_obsoletes, target_transfers, incompatible_target_snapshots

def handle_incompatible_snapshots(self, incompatible_target_snapshots, destroy_incompatible):
"""destroy incompatbile snapshots on target before sync, or inform user
Expand Down Expand Up @@ -1147,7 +1168,8 @@ def sync_snapshots(self, target_dataset, features, show_progress, filter_propert
# keep data encrypted by sending it raw (including properties)
raw = True

(common_snapshot, start_snapshot, source_obsoletes, target_obsoletes, target_keeps,
#note: only target_obsoletes is used during sync, to check if target doesnt want the snapshot
(common_snapshot, start_snapshot, source_obsoletes, target_obsoletes, target_transfers,
incompatible_target_snapshots) = \
self._plan_sync(target_dataset=target_dataset, also_other_snapshots=also_other_snapshots,
guid_check=guid_check, raw=raw)
Expand All @@ -1156,7 +1178,7 @@ def sync_snapshots(self, target_dataset, features, show_progress, filter_propert
# Also usefull with no_send to still cleanup stuff.
self._pre_clean(
common_snapshot=common_snapshot, target_dataset=target_dataset,
target_keeps=target_keeps, target_obsoletes=target_obsoletes, source_obsoletes=source_obsoletes)
target_transfers=target_transfers, target_obsoletes=target_obsoletes, source_obsoletes=source_obsoletes)

# handle incompatible stuff on target
target_dataset.handle_incompatible_snapshots(incompatible_target_snapshots, destroy_incompatible)
Expand Down
Loading

0 comments on commit 6e8cb79

Please sign in to comment.