update beta 1.2.2.2

This commit is contained in:
MacRimi
2026-06-11 17:24:20 +02:00
parent f9cf931828
commit 6094ab8e1c
2 changed files with 279 additions and 0 deletions

View File

@@ -1278,6 +1278,23 @@ _rs_apply() {
dst="/$rel"
[[ -e "$src" ]] || { ((skipped++)); continue; }
# Smart-restore hardware-drift skip list (populated by
# _rs_run_complete_guided when hb_assess_hardware_drift flags
# paths that would break on this host's hardware). Each path
# in $RS_SKIP_PATHS is one absolute path per line. Matching is
# exact-or-descendant so "/etc/zfs/zpool.cache" listed in the
# skip set covers itself when rel == "etc/zfs/zpool.cache".
if [[ -n "${RS_SKIP_PATHS:-}" ]]; then
local _abs="/$rel" _skip=""
while IFS= read -r _skip; do
[[ -z "$_skip" ]] && continue
if [[ "$_abs" == "$_skip" || "$_abs" == "$_skip"/* ]]; then
((skipped++))
continue 2
fi
done <<<"$RS_SKIP_PATHS"
fi
# Never restore cluster virtual filesystem data live.
# Extract it for manual recovery in maintenance mode.
# Path note: this used to live under /root/proxmenux-recovery/,
@@ -1589,6 +1606,19 @@ _rs_collect_pending_paths() {
;;
esac
[[ -z "$rel" || -n "${seen[$rel]}" ]] && continue
# Drop hardware-drift skips (see RS_SKIP_PATHS comment in _rs_apply).
if [[ -n "${RS_SKIP_PATHS:-}" ]]; then
local _abs="/$rel" _skip="" _drop=0
while IFS= read -r _skip; do
[[ -z "$_skip" ]] && continue
if [[ "$_abs" == "$_skip" || "$_abs" == "$_skip"/* ]]; then
_drop=1; break
fi
done <<<"$RS_SKIP_PATHS"
(( _drop )) && continue
fi
seen["$rel"]=1
out+=("$rel")
done
@@ -1745,6 +1775,55 @@ _rs_run_complete_guided() {
local -a all_paths=()
hb_load_restore_paths "$staging_root" all_paths
# ── Smart restore plan ──────────────────────────────────
# Compare the backup metadata against the live host and surface
# anything that would be unsafe to restore as-is (ZFS pool GUID
# changed, fstab UUIDs gone, NVIDIA driver state for a host with
# no NVIDIA card, ...). Only opens an extra dialog when there's
# actually drift — same-hardware/same-host restores skip it.
export RS_SKIP_PATHS=""
local -a drift_lines=()
mapfile -t drift_lines < <(hb_assess_hardware_drift "$staging_root" 2>/dev/null)
if (( ${#drift_lines[@]} > 0 )); then
local skip_paths=""
local skip_components=""
local plan_body
plan_body="\Zb$(translate "Smart restore plan — hardware compatibility check")\ZB"$'\n\n'
plan_body+="$(translate "The backup metadata was compared against this host. The following items will be SKIPPED to keep the boot safe:")"$'\n\n'
local line key action reason
for line in "${drift_lines[@]}"; do
IFS=$'\t' read -r key action reason <<<"$line"
[[ "$action" != "skip" ]] && continue
if [[ "$key" == component:* ]]; then
local cname="${key#component:}"
skip_components+="${cname} "
plan_body+=" \Z1•\Zn $(translate "Component:") \Zb${cname}\ZB"$'\n'
plan_body+=" ${reason}"$'\n\n'
else
skip_paths+="${key}"$'\n'
plan_body+=" \Z1•\Zn $(translate "Path:") \Zb${key}\ZB"$'\n'
plan_body+=" ${reason}"$'\n\n'
fi
done
plan_body+="$(translate "PVE will regenerate these files automatically for the current hardware. The rest of the backup will be applied normally.")"$'\n\n'
plan_body+="\Zb$(translate "Continue with safe restore?")\ZB"
if ! dialog --backtitle "ProxMenux" --colors \
--title "$(translate "Restore plan — compatibility check")" \
--yesno "$plan_body" 24 90; then
return 1
fi
# Persist for _rs_apply / _rs_collect_pending_paths to honor.
# We only store paths (not component:* entries) — component
# auto-reinstall already self-skips when the GPU/TPU isn't on
# this host, so we just surfaced it in the dialog for clarity.
RS_SKIP_PATHS="${skip_paths%$'\n'}"
export RS_SKIP_PATHS
fi
# Build the rich confirmation body. Replaces the previous 4-strategy
# menu — by design a Proxmox host restore always requires a reboot
# for predictable end state (pmxcfs live writes + initramfs + driver

View File

@@ -103,7 +103,10 @@ hb_default_profile_paths() {
# ── Common Proxmox tooling (skipped if not present) ──
"/etc/systemd/system" # custom units (including log2ram.service if installed)
"/etc/systemd/journald.conf" # journal retention tuning from post-install
"/etc/log2ram.conf"
"/etc/logrotate.conf"
"/etc/logrotate.d" # post-install drops log2ram + custom logrotate here
"/etc/lm-sensors"
"/etc/sensors3.conf"
"/etc/fail2ban"
@@ -174,6 +177,189 @@ hb_path_warning() {
esac
}
# ==========================================================
# HARDWARE DRIFT ASSESSMENT (smart restore)
# ==========================================================
# Compares the backup metadata captured by hb_prepare_staging
# against the live target host to detect when applying certain
# paths would break the boot (orphan ZFS pool GUID, stale fstab
# UUIDs, ...) or pointlessly reinstall components for hardware
# that's no longer present (NVIDIA driver on a host with no
# NVIDIA card).
#
# Output format on stdout — one line per assessment, tab-separated:
#
# PATH_OR_KEY \t ACTION \t REASON
#
# Where ACTION is one of:
# skip → the restore flow should EXCLUDE this from apply
# warn → restore but surface the warning in the dialog
# ok → no drift detected (omitted from output)
#
# Callers consume this to build the "Restore plan" dialog and to
# filter the hot/pending path lists. The function never modifies
# state, never prompts — pure analysis.
# Read the UUIDs referenced by a fstab file. Skips comments and
# `proc`/`none`/`tmpfs` non-block entries.
_hb_fstab_uuids() {
local fstab="$1"
[[ -f "$fstab" ]] || return 0
awk '
/^[[:space:]]*#/ { next }
/^[[:space:]]*$/ { next }
{
src = $1
if (src ~ /^UUID=/) {
sub(/^UUID=/, "", src)
print src
} else if (src ~ /^PARTUUID=/) {
sub(/^PARTUUID=/, "", src)
print src
} else if (src ~ /^\/dev\//) {
print src
}
}
' "$fstab"
}
# Build a set of live block-device UUIDs. Returns one UUID per line.
_hb_live_uuids() {
command -v blkid >/dev/null 2>&1 || return 0
blkid -s UUID -o value 2>/dev/null
}
# Build a name→guid map of the live ZFS pools.
_hb_live_zpool_guids() {
command -v zpool >/dev/null 2>&1 || return 0
zpool list -H -o name,guid 2>/dev/null
}
hb_assess_hardware_drift() {
local staging_root="$1"
local meta="$staging_root/metadata"
local rootfs="$staging_root/rootfs"
# ── ZFS pool GUID drift ──────────────────────────────
# If the backup had ZFS pools, compare each (name, guid) pair
# against what's on this host. A pool with the same NAME but a
# different GUID is the "fresh PVE install with same pool name"
# case — restoring /etc/zfs/zpool.cache would point ZFS at a
# ghost pool and drop boot to emergency.
if [[ -f "$meta/zpool.guids" ]] && [[ -s "$meta/zpool.guids" ]]; then
local bk_name bk_guid live_map
live_map=$(_hb_live_zpool_guids)
local pool_mismatch="" pool_missing=""
while IFS=$'\t ' read -r bk_name bk_guid; do
[[ -z "$bk_name" ]] && continue
local live_guid
live_guid=$(awk -v n="$bk_name" '$1==n {print $2; exit}' <<<"$live_map")
if [[ -z "$live_guid" ]]; then
pool_missing+="$bk_name "
elif [[ "$live_guid" != "$bk_guid" ]]; then
pool_mismatch+="$bk_name(${bk_guid:0:8}…→${live_guid:0:8}…) "
fi
done < "$meta/zpool.guids"
if [[ -n "$pool_missing" ]]; then
printf '%s\t%s\t%s\n' "/etc/zfs/zpool.cache" "skip" \
"$(hb_translate "Backup pools not present on this host:") ${pool_missing% }"
elif [[ -n "$pool_mismatch" ]]; then
printf '%s\t%s\t%s\n' "/etc/zfs/zpool.cache" "skip" \
"$(hb_translate "Pool name matches but GUID differs (fresh ZFS install):") ${pool_mismatch% }"
fi
fi
# ── Boot partition UUID drift ────────────────────────
# /etc/kernel/proxmox-boot-uuids lists the EFI vfat UUIDs that
# proxmox-boot-tool replicates the bootloader onto. If those
# UUIDs don't exist on this host, applying the file makes
# subsequent `proxmox-boot-tool refresh` fail.
local boot_uuid_file="$rootfs/etc/kernel/proxmox-boot-uuids"
if [[ -f "$boot_uuid_file" ]] && [[ -s "$boot_uuid_file" ]]; then
local live_uuids
live_uuids=$(_hb_live_uuids)
local missing_boot="" u
while IFS= read -r u; do
u="${u// /}"
[[ -z "$u" || "$u" == "#"* ]] && continue
if ! grep -Fxq "$u" <<<"$live_uuids"; then
missing_boot+="$u "
fi
done < "$boot_uuid_file"
if [[ -n "$missing_boot" ]]; then
printf '%s\t%s\t%s\n' "/etc/kernel/proxmox-boot-uuids" "skip" \
"$(hb_translate "Boot partition UUIDs from backup not found on this host:") ${missing_boot% }"
fi
fi
# ── fstab UUID drift ─────────────────────────────────
# Skip ONLY if at least one UUID/dev in the backup's fstab can't
# be resolved on the live host. A clean PVE+ZFS root install
# typically has just `proc /proc proc defaults 0 0`, no UUIDs —
# that yields zero referenced UUIDs and the check is a no-op.
local fstab="$rootfs/etc/fstab"
if [[ -f "$fstab" ]]; then
local live_uuids; live_uuids=$(_hb_live_uuids)
local missing_fstab="" cnt=0 u
while IFS= read -r u; do
((cnt++))
if [[ "$u" == /dev/* ]]; then
[[ -b "$u" ]] || missing_fstab+="$u "
else
grep -Fxq "$u" <<<"$live_uuids" || missing_fstab+="$u "
fi
done < <(_hb_fstab_uuids "$fstab")
if (( cnt > 0 )) && [[ -n "$missing_fstab" ]]; then
printf '%s\t%s\t%s\n' "/etc/fstab" "skip" \
"$(hb_translate "fstab references UUIDs/devices not present on this host:") ${missing_fstab% }"
fi
fi
# ── Component reinstall drift (GPU / TPU presence) ───
# components_status.json declares what proxmenux installed on
# the source (nvidia_driver, amdgpu_top, intel_gpu_tools,
# coral_driver). If the target hardware no longer has the
# corresponding device, the post-boot dispatcher would try to
# reinstall a driver for a card that isn't there. The installer
# itself short-circuits in that case (detect_*_gpus), but
# surfacing this in the dialog is cleaner than letting the user
# discover it from the postboot log.
local comp_file="$rootfs/usr/local/share/proxmenux/components_status.json"
if [[ -f "$comp_file" ]] && command -v jq >/dev/null 2>&1 && command -v lspci >/dev/null 2>&1; then
local live_pci; live_pci=$(lspci -nn 2>/dev/null)
local installed_components
installed_components=$(jq -r 'to_entries[] | select(.value.status=="installed") | .key' "$comp_file" 2>/dev/null)
local comp
while IFS= read -r comp; do
[[ -z "$comp" ]] && continue
local pattern=""
case "$comp" in
nvidia_driver) pattern='NVIDIA' ;;
amdgpu_top) pattern='Advanced Micro Devices.*\[AMD/ATI\]' ;;
intel_gpu_tools) pattern='Intel.*(VGA|Display|Graphics)' ;;
coral_driver) pattern='Global Unichip|Google.*Edge TPU' ;;
*) continue ;;
esac
if ! grep -qiE "$pattern" <<<"$live_pci"; then
printf 'component:%s\t%s\t%s\n' "$comp" "skip" \
"$(hb_translate "Component was installed on the backup source but no matching hardware was found on this host.")"
fi
done <<<"$installed_components"
fi
}
# Returns 0 (true) if hb_assess_hardware_drift produced any skip
# entries — i.e. there is something for the operator to look at in
# the smart-restore dialog. Returns 1 otherwise. Used by the
# restore flow to decide whether to show the smart-restore dialog
# at all (no drift → skip the extra prompt).
hb_has_hardware_drift() {
local staging_root="$1"
local out
out=$(hb_assess_hardware_drift "$staging_root" 2>/dev/null | grep $'\tskip\t' || true)
[[ -n "$out" ]]
}
# ==========================================================
# PROFILE PATH SELECTION
# ==========================================================
@@ -405,6 +591,20 @@ hb_prepare_staging() {
command -v pct >/dev/null 2>&1 && pct list > "$meta/pct-list.txt" 2>&1 || true
command -v zpool >/dev/null 2>&1 && zpool status > "$meta/zpool.txt" 2>&1 || true
# Extra hardware fingerprints used by hb_compat_check on restore to
# detect drift that would make some paths unsafe to apply:
# * zpool.guids → pool name + GUID. Same pool name on a fresh
# install gets a NEW GUID; restoring /etc/zfs/zpool.cache with
# the old GUID then drops the boot into emergency mode.
# * blkid.txt → all block-device UUIDs, used to verify
# /etc/fstab and /etc/kernel/proxmox-boot-uuids still resolve.
# * lspci.txt → presence test for GPUs / TPUs / NICs referenced
# by components_status.json (so we don't try to reinstall an
# NVIDIA driver on a host with no NVIDIA card any more).
command -v zpool >/dev/null 2>&1 && zpool list -H -o name,guid > "$meta/zpool.guids" 2>&1 || true
command -v blkid >/dev/null 2>&1 && blkid -s UUID -s TYPE > "$meta/blkid.txt" 2>&1 || true
command -v lspci >/dev/null 2>&1 && lspci -nn > "$meta/lspci.txt" 2>&1 || true
# Package inventory — captures what's installed on the source
# host so the restore flow can offer to reinstall missing user
# packages on the target. Solves the "config restored but the