from datetime import datetime, timezone from dateutil.relativedelta import relativedelta import subprocess import sys from pprint import pprint # TODO: Argument parsing verbose = True dry_run = False def keep_time(now, snapshots, convert, delta, count): keep = set() if count < 1: return keep start = convert(now) - delta*count for snapshot in snapshots: snapdate = datetime.strptime(snapshot, 'zfs-smart-snap-%Y-%m-%d-%H%M').replace(tzinfo=timezone.utc) if snapdate >= start: keep.add(snapshot) start = convert(snapdate) + delta if start > now: break return keep def keep_year(now, snapshots, count): return keep_time(now, snapshots, lambda d: d.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0), relativedelta(years=1), count) def keep_month(now, snapshots, count): return keep_time(now, snapshots, lambda d: d.replace(day=1, hour=0, minute=0, second=0, microsecond=0), relativedelta(months=1), count) def keep_day(now, snapshots, count): return keep_time(now, snapshots, lambda d: d.replace(hour=0, minute=0, second=0, microsecond=0), relativedelta(days=1), count) def keep_hour(now, snapshots, count): return keep_time(now, snapshots, lambda d: d.replace(minute=0, second=0, microsecond=0), relativedelta(hours=1), count) def keep_frequent(now, snapshots, count): return keep_time(now, snapshots, lambda d: d.replace(minute=d.minute//15*15, second=0, microsecond=0), relativedelta(minutes=15), count) def snapshots_to_keep(now, snapshots, yearly=0, monthly=0, daily=0, hourly=0, frequently=0): snapshots.sort() keep = set() keep.update(keep_year(now, snapshots, yearly)) keep.update(keep_month(now, snapshots, monthly)) keep.update(keep_day(now, snapshots, daily)) keep.update(keep_hour(now, snapshots, hourly)) keep.update(keep_frequent(now, snapshots, frequently)) return keep def parse_int(fields, name, fieldname): if fields[fieldname] == '-': return 0 try: return int(fields[fieldname]) except ValueError: print(f"Warning: Could not parse value {fields[fieldname]} for fs/zvol {name} prop {fieldname}", file=sys.stderr) return -1 def zfs_list(fieldnames, args): proc = subprocess.run(['/usr/sbin/zfs', 'list', '-H', '-o', ','.join(['name'] + fieldnames)] + args, capture_output=True, check=True, encoding='UTF-8') if not fieldnames: result = list() else: result = dict() for line in proc.stdout.splitlines(): fields = line.split('\t') if not fieldnames: result.append(fields[0]) else: result[fields[0]] = {n: v for n, v in zip(fieldnames, fields[1:])} return result def read_snapshot_props(): periods = ['yearly', 'monthly', 'daily', 'hourly', 'frequently'] fieldnames = ['org.blankertz:snapshot'] + ['org.blankertz:snapshot:' + period for period in periods] raw_list = zfs_list(fieldnames, ['-t', 'filesystem,volume', '-r']) result = dict() for name, fields in raw_list.items(): result[name] = {'snapshot': True if fields['org.blankertz:snapshot'].lower() == 'true' else False} result[name].update({period: parse_int(fields, name, 'org.blankertz:snapshot:' + period) for period in periods}) return result def read_snapshots(fs_to_snap): raw_list = zfs_list([], ['-t', 'snapshot'] + fs_to_snap) if fs_to_snap else [] result = dict() for name in raw_list: fs, snap = name.split('@') if snap.startswith('zfs-smart-snap'): result[fs] = result.get(fs, []) + [snap] return result def find_latest_snapshots(existing_snaps): result = dict() for fs, snaps in existing_snaps.items(): snaps.sort() result[fs] = snaps[-1] return result def written_since_last(fs, snap): proc = subprocess.run(['/usr/sbin/zfs', 'list', '-Hp', '-o', f'written@{snap}', fs], check=True, capture_output=True, encoding="UTF-8") res = int(proc.stdout) if verbose and res == 0: print(f"{fs} not written since {snap}, skipping") return res > 0 def get_pool(fs): return fs.split('/')[0] def main(): now = datetime.now(timezone.utc).replace(second=0, microsecond=0) if verbose: print(f"zfs_smart_snapshot running at {now.strftime('%Y-%m-%d-%H%M')}") props = read_snapshot_props() fs_to_snap = [k for k, v in props.items() if v['snapshot']] if fs_to_snap: existing_snaps = read_snapshots(fs_to_snap) latest_snaps = find_latest_snapshots(existing_snaps) fs_to_really_snap = [fs for fs in fs_to_snap if fs not in latest_snaps or written_since_last(fs, latest_snaps[fs])] pools = {get_pool(fs) for fs in fs_to_really_snap} if fs_to_really_snap: snapname = 'zfs-smart-snap-' + now.strftime('%Y-%m-%d-%H%M') snaps_to_create = [f'{fs}@{snapname}' for fs in fs_to_really_snap] if dry_run or verbose: phrase = "Would" if dry_run else "Will" print(f"{phrase} create snapshots " + ", ".join(snaps_to_create)) if not dry_run: for pool in pools: subprocess.run(['/usr/sbin/zfs', 'snapshot'] + [snap for snap in snaps_to_create if snap.startswith(pool)], check=True) for fs in fs_to_really_snap: existing_snaps[fs] = existing_snaps.get(fs, []) + [snapname] to_delete = [] for fs in (fs for fs in fs_to_snap if fs in existing_snaps): to_keep = snapshots_to_keep(now, existing_snaps[fs], props[fs]['yearly'], props[fs]['monthly'], props[fs]['daily'], props[fs]['hourly'], props[fs]['frequently']) # Always keep latest snapshot. If it would be deleted (e.g. due to a incoherent snapshot history configuration) this could confuse the # 'written since last snapshot' logic and cause a cycle of creating/deleteing/creating the snapshot on an idle file system if existing_snaps[fs][-1] not in to_keep: print(f"Warning: Latest snap {fs}@{existing_snaps[fs][-1]} not preserved by retention policy, keeping anyways", file=sys.stderr) to_keep.add(existing_snaps[fs][-1]) # Don't cleanup old snapshots if no new snapshots were created on the pool (to prevent disk spinup only for snapshot deletion) # The old snapshots will be cleaned once any dataset on the pool is changed and a new snapshot is created. if get_pool(fs) not in pools: if verbose: print(f"Skipping snapshot cleaning for {fs} because pool {get_pool(fs)} was not written") continue to_delete += (f'{fs}@{snap}' for snap in set(existing_snaps[fs]).difference(to_keep)) if not dry_run: if verbose and len(to_delete) > 0: print("Will destroy snapshots " + ", ".join(sorted(to_delete))) for delete_snap in to_delete: subprocess.run(['/usr/sbin/zfs', 'destroy', delete_snap], check=True) elif len(to_delete) > 0: print("Would destroy snapshots " + ", ".join(sorted(to_delete))) if __name__ == "__main__": main()