#!/bin/sh HOSTNAME=$(cat /etc/hostname) tar cfvz proxmox-${HOSTNAME}-config.tgz \ /etc/hosts \ /etc/hostname \ /etc/resolv.conf \ /etc/ceph \ /etc/corosync \ /etc/ssh \ /etc/network \ /var/lib/ceph \ /var/lib/pve-cluster # # I managed to restore everything after a very stressful 24hrs! # # For those reading in the future, don't bother backing up /etc/pve. Don't listen to what anyone says. # Useful to have no doubt - so you can cherry pick files you might need, but it's an ineffective disaster recovery strategy. # It's simply the FUSE mount of the SQLlite database, and you can't even write to it properly while it's mounted - and can't access it at all when it isn't. # Instead back up the db file /var/lib/pve-cluster/config.db and use that to restore the config. # # TLDR: To completely restore all nodes, I made the following backups and copied them to a fresh install: # /etc/hosts, /etc/hostname, /etc/resolv.conf, /etc/ceph, /etc/corosync, /etc/ssh (particularly the host keys), # /etc/network, /var/lib/ceph /var/lib/pve-cluster. I stopped PVE first to avoid conflics with "systemctl stop pve-cluster pvedaemon pveproxy pvestatd". # # After restoring these files and rebooting PVE, VM CT, storage etc. configs are restored. Ceph required some extra work in my case: # # Enable the no-subscription repository and use "pveceph install --repository no-subscription" to install ceph (or use the web ui) # Manually start and enable the manager and monitor on each node using systemctl start/enable ceph-mgr@/ceph-mon@ # Check your OSDs are detected by running "ceph-volume lvm list" # Rejoin the OSDs to the cluster using "ceph-volume lvm activate --all" # Profit #