Files
Matthias Blankertz 70d36ac45f Fix snapshot retention times
The current period should not count, so e.g. monthly=3 results in the 3
_previous_ months snapshots being kept (plus the snapshot for the current
month.
2021-10-04 17:57:15 +02:00

179 lines
7.2 KiB
Python

from datetime import datetime, timezone
from dateutil.relativedelta import relativedelta
import subprocess
import sys
from pprint import pprint
# TODO: Argument parsing
verbose = True
dry_run = False
def keep_time(now, snapshots, convert, delta, count):
keep = set()
if count < 1:
return keep
start = convert(now) - delta*count
for snapshot in snapshots:
snapdate = datetime.strptime(snapshot, 'zfs-smart-snap-%Y-%m-%d-%H%M').replace(tzinfo=timezone.utc)
if snapdate >= start:
keep.add(snapshot)
start = convert(snapdate) + delta
if start > now:
break
return keep
def keep_year(now, snapshots, count):
return keep_time(now, snapshots, lambda d: d.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0), relativedelta(years=1), count)
def keep_month(now, snapshots, count):
return keep_time(now, snapshots, lambda d: d.replace(day=1, hour=0, minute=0, second=0, microsecond=0), relativedelta(months=1), count)
def keep_day(now, snapshots, count):
return keep_time(now, snapshots, lambda d: d.replace(hour=0, minute=0, second=0, microsecond=0), relativedelta(days=1), count)
def keep_hour(now, snapshots, count):
return keep_time(now, snapshots, lambda d: d.replace(minute=0, second=0, microsecond=0), relativedelta(hours=1),
count)
def keep_frequent(now, snapshots, count):
return keep_time(now, snapshots, lambda d: d.replace(minute=d.minute//15*15, second=0, microsecond=0),
relativedelta(minutes=15), count)
def snapshots_to_keep(now, snapshots, yearly=0, monthly=0, daily=0, hourly=0, frequently=0):
snapshots.sort()
keep = set()
keep.update(keep_year(now, snapshots, yearly))
keep.update(keep_month(now, snapshots, monthly))
keep.update(keep_day(now, snapshots, daily))
keep.update(keep_hour(now, snapshots, hourly))
keep.update(keep_frequent(now, snapshots, frequently))
return keep
def parse_int(fields, name, fieldname):
if fields[fieldname] == '-':
return 0
try:
return int(fields[fieldname])
except ValueError:
print(f"Warning: Could not parse value {fields[fieldname]} for fs/zvol {name} prop {fieldname}",
file=sys.stderr)
return -1
def zfs_list(fieldnames, args):
proc = subprocess.run(['/usr/sbin/zfs', 'list', '-H', '-o', ','.join(['name'] + fieldnames)] + args,
capture_output=True, check=True, encoding='UTF-8')
if not fieldnames:
result = list()
else:
result = dict()
for line in proc.stdout.splitlines():
fields = line.split('\t')
if not fieldnames:
result.append(fields[0])
else:
result[fields[0]] = {n: v for n, v in zip(fieldnames, fields[1:])}
return result
def read_snapshot_props():
periods = ['yearly', 'monthly', 'daily', 'hourly', 'frequently']
fieldnames = ['org.blankertz:snapshot'] + ['org.blankertz:snapshot:' + period for period in periods]
raw_list = zfs_list(fieldnames, ['-t', 'filesystem,volume', '-r'])
result = dict()
for name, fields in raw_list.items():
result[name] = {'snapshot': True if fields['org.blankertz:snapshot'].lower() == 'true' else False}
result[name].update({period: parse_int(fields, name, 'org.blankertz:snapshot:' + period) for period in periods})
return result
def read_snapshots(fs_to_snap):
raw_list = zfs_list([], ['-t', 'snapshot'] + fs_to_snap) if fs_to_snap else []
result = dict()
for name in raw_list:
fs, snap = name.split('@')
if snap.startswith('zfs-smart-snap'):
result[fs] = result.get(fs, []) + [snap]
return result
def find_latest_snapshots(existing_snaps):
result = dict()
for fs, snaps in existing_snaps.items():
snaps.sort()
result[fs] = snaps[-1]
return result
def written_since_last(fs, snap):
proc = subprocess.run(['/usr/sbin/zfs', 'list', '-Hp', '-o', f'written@{snap}', fs],
check=True, capture_output=True, encoding="UTF-8")
res = int(proc.stdout)
if verbose and res == 0:
print(f"{fs} not written since {snap}, skipping")
return res > 0
def get_pool(fs):
return fs.split('/')[0]
def main():
now = datetime.now(timezone.utc).replace(second=0, microsecond=0)
if verbose:
print(f"zfs_smart_snapshot running at {now.strftime('%Y-%m-%d-%H%M')}")
props = read_snapshot_props()
fs_to_snap = [k for k, v in props.items() if v['snapshot']]
if fs_to_snap:
existing_snaps = read_snapshots(fs_to_snap)
latest_snaps = find_latest_snapshots(existing_snaps)
fs_to_really_snap = [fs for fs in fs_to_snap if fs not in latest_snaps or
written_since_last(fs, latest_snaps[fs])]
pools = {get_pool(fs) for fs in fs_to_really_snap}
if fs_to_really_snap:
snapname = 'zfs-smart-snap-' + now.strftime('%Y-%m-%d-%H%M')
snaps_to_create = [f'{fs}@{snapname}' for fs in fs_to_really_snap]
if dry_run or verbose:
phrase = "Would" if dry_run else "Will"
print(f"{phrase} create snapshots " + ", ".join(snaps_to_create))
if not dry_run:
for pool in pools:
subprocess.run(['/usr/sbin/zfs', 'snapshot'] + [snap for snap in snaps_to_create if snap.startswith(pool)], check=True)
for fs in fs_to_really_snap:
existing_snaps[fs] = existing_snaps.get(fs, []) + [snapname]
to_delete = []
for fs in (fs for fs in fs_to_snap if fs in existing_snaps):
to_keep = snapshots_to_keep(now, existing_snaps[fs], props[fs]['yearly'], props[fs]['monthly'], props[fs]['daily'], props[fs]['hourly'], props[fs]['frequently'])
# Always keep latest snapshot. If it would be deleted (e.g. due to a incoherent snapshot history configuration) this could confuse the
# 'written since last snapshot' logic and cause a cycle of creating/deleteing/creating the snapshot on an idle file system
if existing_snaps[fs][-1] not in to_keep:
print(f"Warning: Latest snap {fs}@{existing_snaps[fs][-1]} not preserved by retention policy, keeping anyways", file=sys.stderr)
to_keep.add(existing_snaps[fs][-1])
# Don't cleanup old snapshots if no new snapshots were created on the pool (to prevent disk spinup only for snapshot deletion)
# The old snapshots will be cleaned once any dataset on the pool is changed and a new snapshot is created.
if get_pool(fs) not in pools:
if verbose:
print(f"Skipping snapshot cleaning for {fs} because pool {get_pool(fs)} was not written")
continue
to_delete += (f'{fs}@{snap}' for snap in set(existing_snaps[fs]).difference(to_keep))
if not dry_run:
if verbose and len(to_delete) > 0:
print("Will destroy snapshots " + ", ".join(sorted(to_delete)))
for delete_snap in to_delete:
subprocess.run(['/usr/sbin/zfs', 'destroy', delete_snap], check=True)
elif len(to_delete) > 0:
print("Would destroy snapshots " + ", ".join(sorted(to_delete)))
if __name__ == "__main__":
main()