2026-06-09 00:13:24 +02:00
|
|
|
|
#!/bin/bash
|
|
|
|
|
|
# ==========================================================
|
|
|
|
|
|
# ProxMenux - Apply Cluster Configs (post-boot)
|
|
|
|
|
|
# ==========================================================
|
|
|
|
|
|
# Fires AFTER pve-cluster.service is up, when /etc/pve is
|
|
|
|
|
|
# the live pmxcfs FUSE mount. We can write individual files
|
|
|
|
|
|
# to /etc/pve at this point and they propagate through the
|
|
|
|
|
|
# cluster filesystem normally — no need to stop pve-cluster
|
|
|
|
|
|
# (which would be unsafe at this stage of boot).
|
|
|
|
|
|
#
|
|
|
|
|
|
# Trigger: apply_pending_restore.sh writes a marker file at
|
|
|
|
|
|
# /var/lib/proxmenux/cluster-apply-pending whose contents is
|
|
|
|
|
|
# the absolute path of the recovery dir containing the
|
|
|
|
|
|
# extracted /etc/pve content. The systemd unit has
|
|
|
|
|
|
# ConditionPathExists=<marker>, so on a normal boot (no
|
|
|
|
|
|
# marker), the unit short-circuits and does nothing.
|
|
|
|
|
|
|
|
|
|
|
|
set +u
|
|
|
|
|
|
|
|
|
|
|
|
MARKER="${PMX_CLUSTER_APPLY_MARKER:-/var/lib/proxmenux/cluster-apply-pending}"
|
|
|
|
|
|
LOG_DIR="${PMX_LOG_DIR:-/var/log/proxmenux}"
|
|
|
|
|
|
|
|
|
|
|
|
mkdir -p "$LOG_DIR" >/dev/null 2>&1 || true
|
|
|
|
|
|
LOG_FILE="${LOG_DIR}/proxmenux-cluster-postboot-$(date +%Y%m%d_%H%M%S).log"
|
|
|
|
|
|
exec >>"$LOG_FILE" 2>&1
|
|
|
|
|
|
|
|
|
|
|
|
echo "=== ProxMenux cluster post-boot apply at $(date -Iseconds) ==="
|
|
|
|
|
|
|
|
|
|
|
|
if [[ ! -f "$MARKER" ]]; then
|
|
|
|
|
|
echo "No marker found at $MARKER — nothing to apply."
|
|
|
|
|
|
exit 0
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
# Marker is env-style key=value, written by apply_pending_restore.sh.
|
|
|
|
|
|
# Defaults so a malformed marker still gives us safe behaviour.
|
|
|
|
|
|
RECOVERY_ROOT=""
|
|
|
|
|
|
PENDING_DIR=""
|
|
|
|
|
|
NEEDS_INITRAMFS=0
|
|
|
|
|
|
NEEDS_GRUB=0
|
|
|
|
|
|
# shellcheck source=/dev/null
|
|
|
|
|
|
source "$MARKER"
|
|
|
|
|
|
echo "Recovery root: $RECOVERY_ROOT"
|
|
|
|
|
|
echo "Pending dir: $PENDING_DIR"
|
|
|
|
|
|
echo "Needs initramfs: $NEEDS_INITRAMFS"
|
|
|
|
|
|
echo "Needs grub: $NEEDS_GRUB"
|
|
|
|
|
|
|
|
|
|
|
|
if [[ -z "$RECOVERY_ROOT" || ! -d "$RECOVERY_ROOT" ]]; then
|
|
|
|
|
|
echo "Recovery root invalid — aborting cleanly."
|
|
|
|
|
|
rm -f "$MARKER"
|
|
|
|
|
|
exit 0
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
SOURCE_PVE="$RECOVERY_ROOT/etc/pve"
|
|
|
|
|
|
if [[ ! -d "$SOURCE_PVE" ]]; then
|
|
|
|
|
|
echo "No /etc/pve content in recovery dir — nothing to do."
|
|
|
|
|
|
rm -f "$MARKER"
|
|
|
|
|
|
exit 0
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
# Wait for pmxcfs to be fully writable. The After=pve-cluster.service
|
|
|
|
|
|
# in our unit gets us past the service-start point, but on slow boots
|
|
|
|
|
|
# the FUSE mount can take a few extra seconds to settle.
|
|
|
|
|
|
echo "Waiting for /etc/pve to be writable..."
|
|
|
|
|
|
for i in {1..60}; do
|
|
|
|
|
|
if [[ -d /etc/pve ]] \
|
|
|
|
|
|
&& touch "/etc/pve/.proxmenux-test-$$" 2>/dev/null; then
|
|
|
|
|
|
rm -f "/etc/pve/.proxmenux-test-$$" 2>/dev/null
|
|
|
|
|
|
echo "/etc/pve writable after ${i}s"
|
|
|
|
|
|
break
|
|
|
|
|
|
fi
|
|
|
|
|
|
sleep 1
|
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
|
|
# ── Detect source node name for cross-host node rename ────
|
|
|
|
|
|
# The source backup's node dir is whatever the source host
|
|
|
|
|
|
# was called; we copy its contents into THIS host's node
|
|
|
|
|
|
# dir. Two sources for the source hostname, in order of
|
|
|
|
|
|
# preference:
|
|
|
|
|
|
# 1. metadata/run_info.env from the pending dir (definitive)
|
|
|
|
|
|
# 2. The first (and usually only) dir under nodes/ in the
|
|
|
|
|
|
# source backup — works when metadata is missing
|
|
|
|
|
|
SRC_NODE=""
|
|
|
|
|
|
if [[ -n "$PENDING_DIR" ]]; then
|
|
|
|
|
|
META_RUN_INFO=$(find "$PENDING_DIR" -maxdepth 3 -name run_info.env 2>/dev/null | head -1)
|
|
|
|
|
|
if [[ -n "$META_RUN_INFO" && -f "$META_RUN_INFO" ]]; then
|
|
|
|
|
|
SRC_NODE=$(grep -m1 '^hostname=' "$META_RUN_INFO" 2>/dev/null | cut -d= -f2- | tr -d '[:space:]')
|
|
|
|
|
|
fi
|
|
|
|
|
|
fi
|
|
|
|
|
|
if [[ -z "$SRC_NODE" && -d "$SOURCE_PVE/nodes" ]]; then
|
|
|
|
|
|
SRC_NODE=$(find "$SOURCE_PVE/nodes" -mindepth 1 -maxdepth 1 -type d 2>/dev/null | head -1)
|
|
|
|
|
|
SRC_NODE=$(basename "$SRC_NODE" 2>/dev/null)
|
|
|
|
|
|
fi
|
|
|
|
|
|
CUR_NODE=$(hostname)
|
|
|
|
|
|
echo "Source node: ${SRC_NODE:-(unknown)} / Current node: ${CUR_NODE}"
|
|
|
|
|
|
|
|
|
|
|
|
# ── Apply EVERY top-level file in /etc/pve ────────────────
|
|
|
|
|
|
# Anything that's a regular file at the root of /etc/pve
|
|
|
|
|
|
# (datacenter.cfg, storage.cfg, user.cfg, domains.cfg,
|
|
|
|
|
|
# vzdump.cron, jobs.cfg, replication.cfg, ceph.conf,
|
|
|
|
|
|
# corosync.conf if cluster, etc). pmxcfs symlinks like
|
|
|
|
|
|
# /etc/pve/local, /etc/pve/lxc, /etc/pve/qemu-server,
|
|
|
|
|
|
# /etc/pve/openvz are auto-created by pmxcfs and we skip
|
|
|
|
|
|
# them — copying over them throws "Operation not permitted".
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "── Global config files ──"
|
|
|
|
|
|
copied_global=0
|
|
|
|
|
|
PMX_SYMLINKS_SKIP="local lxc qemu-server openvz"
|
|
|
|
|
|
for src in "$SOURCE_PVE"/*; do
|
|
|
|
|
|
[[ -f "$src" ]] || continue
|
|
|
|
|
|
name=$(basename "$src")
|
|
|
|
|
|
# Skip files that mirror pmxcfs symlinks
|
|
|
|
|
|
skip=0
|
|
|
|
|
|
for s in $PMX_SYMLINKS_SKIP; do
|
|
|
|
|
|
[[ "$name" == "$s" ]] && { skip=1; break; }
|
|
|
|
|
|
done
|
|
|
|
|
|
(( skip )) && continue
|
|
|
|
|
|
if cp -f "$src" "/etc/pve/$name" 2>&1; then
|
|
|
|
|
|
echo " ✓ $name"
|
|
|
|
|
|
((copied_global++))
|
|
|
|
|
|
else
|
|
|
|
|
|
echo " ✗ $name (cp failed)"
|
|
|
|
|
|
fi
|
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
|
|
# ── Subdirectories we want to preserve verbatim ───────────
|
|
|
|
|
|
# Each gets contents copied flat (no recursive dir copy of
|
|
|
|
|
|
# symlinks). These are the "shared cluster state" dirs.
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "── Cluster subdirectories ──"
|
|
|
|
|
|
copied_subdirs=0
|
|
|
|
|
|
for subdir in firewall sdn mapping virtual-guest priv ha; do
|
|
|
|
|
|
src_dir="$SOURCE_PVE/$subdir"
|
|
|
|
|
|
[[ -d "$src_dir" ]] || continue
|
|
|
|
|
|
mkdir -p "/etc/pve/$subdir" 2>/dev/null || true
|
|
|
|
|
|
while IFS= read -r f; do
|
|
|
|
|
|
rel="${f#"$src_dir"/}"
|
|
|
|
|
|
dst="/etc/pve/$subdir/$rel"
|
|
|
|
|
|
if [[ -d "$f" ]]; then
|
|
|
|
|
|
mkdir -p "$dst" 2>/dev/null || true
|
|
|
|
|
|
elif [[ -f "$f" ]]; then
|
|
|
|
|
|
mkdir -p "$(dirname "$dst")" 2>/dev/null || true
|
|
|
|
|
|
cp -f "$f" "$dst" 2>/dev/null && ((copied_subdirs++))
|
|
|
|
|
|
fi
|
|
|
|
|
|
done < <(find "$src_dir" -mindepth 1 2>/dev/null)
|
|
|
|
|
|
echo " ✓ $subdir/ (subtree)"
|
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
|
|
# ── Apply guest configs into THIS node's dir ──────────────
|
|
|
|
|
|
# This is the bit that makes `pct list` / `qm list` show
|
|
|
|
|
|
# the restored guests. We deliberately copy from the
|
|
|
|
|
|
# source's node dir into the current host's node dir, so
|
|
|
|
|
|
# cross-host restores Just Work without renaming anything.
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "── Guest configs (LXC + QEMU) ──"
|
|
|
|
|
|
copied_guests=0
|
|
|
|
|
|
skipped_guests=0
|
|
|
|
|
|
if [[ -n "$SRC_NODE" ]] && [[ -d "$SOURCE_PVE/nodes/$SRC_NODE" ]]; then
|
|
|
|
|
|
for kind in lxc qemu-server; do
|
|
|
|
|
|
src_dir="$SOURCE_PVE/nodes/$SRC_NODE/$kind"
|
|
|
|
|
|
dst_dir="/etc/pve/nodes/$CUR_NODE/$kind"
|
|
|
|
|
|
[[ -d "$src_dir" ]] || continue
|
|
|
|
|
|
mkdir -p "$dst_dir" 2>/dev/null || true
|
|
|
|
|
|
for conf in "$src_dir"/*.conf; do
|
|
|
|
|
|
[[ -f "$conf" ]] || continue
|
|
|
|
|
|
vmid=$(basename "$conf" .conf)
|
|
|
|
|
|
if [[ -e "$dst_dir/$vmid.conf" ]]; then
|
|
|
|
|
|
echo " ⚠ $kind/$vmid.conf already exists on this host — skipping (avoid clash)"
|
|
|
|
|
|
((skipped_guests++))
|
|
|
|
|
|
continue
|
|
|
|
|
|
fi
|
|
|
|
|
|
if cp -f "$conf" "$dst_dir/$vmid.conf" 2>&1; then
|
|
|
|
|
|
echo " ✓ $kind/$vmid.conf"
|
|
|
|
|
|
((copied_guests++))
|
|
|
|
|
|
else
|
|
|
|
|
|
echo " ✗ $kind/$vmid.conf (cp failed)"
|
|
|
|
|
|
fi
|
|
|
|
|
|
done
|
|
|
|
|
|
done
|
|
|
|
|
|
else
|
|
|
|
|
|
echo " (no source node dir to copy from)"
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
2026-06-09 19:14:27 +02:00
|
|
|
|
# ── LXC bind-mount stub directories ───────────────────────
|
|
|
|
|
|
# LXC containers with `mp<n>: /path,mp=...` bind-mount entries fail the
|
|
|
|
|
|
# pre-start hook (status 2) if `/path` doesn't exist on the host. After a
|
|
|
|
|
|
# cross-host restore the source's bind-mount paths (custom NAS mounts, second
|
|
|
|
|
|
# disk paths, etc.) generally don't exist on the target's fresh install yet.
|
|
|
|
|
|
# We create empty stubs so `onboot: 1` containers start; the operator wires
|
|
|
|
|
|
# the real data source afterwards. PVE-managed storages (`/mnt/pve/*`) and
|
|
|
|
|
|
# /dev/* are skipped — PVE handles the first, kernel handles the second.
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "── LXC bind-mount stubs ──"
|
|
|
|
|
|
stub_created=0
|
|
|
|
|
|
stub_skipped=0
|
|
|
|
|
|
if compgen -G "/etc/pve/nodes/$CUR_NODE/lxc/*.conf" >/dev/null 2>&1; then
|
|
|
|
|
|
for conf in /etc/pve/nodes/"$CUR_NODE"/lxc/*.conf; do
|
|
|
|
|
|
[[ -f "$conf" ]] || continue
|
|
|
|
|
|
while IFS= read -r line; do
|
|
|
|
|
|
if [[ "$line" =~ ^mp[0-9]+:[[:space:]]*(/[^,]+), ]]; then
|
|
|
|
|
|
src="${BASH_REMATCH[1]}"
|
|
|
|
|
|
[[ "$src" == /mnt/pve/* ]] && continue
|
|
|
|
|
|
[[ "$src" == /dev/* ]] && continue
|
|
|
|
|
|
if [[ -e "$src" ]]; then
|
|
|
|
|
|
((stub_skipped++))
|
|
|
|
|
|
continue
|
|
|
|
|
|
fi
|
|
|
|
|
|
if mkdir -p "$src" 2>/dev/null; then
|
|
|
|
|
|
echo " + stub $src (from $(basename "$conf"))"
|
|
|
|
|
|
((stub_created++))
|
|
|
|
|
|
fi
|
|
|
|
|
|
fi
|
|
|
|
|
|
done < "$conf"
|
|
|
|
|
|
done
|
|
|
|
|
|
fi
|
|
|
|
|
|
echo "Stubs: created=$stub_created, already-present=$stub_skipped"
|
|
|
|
|
|
|
|
|
|
|
|
# ── Stale node-dir cleanup ────────────────────────────────
|
|
|
|
|
|
# Fresh PVE install creates /etc/pve/nodes/<install-hostname>/. After our
|
|
|
|
|
|
# restore changes the hostname back to the source's, pve-cluster boots into
|
|
|
|
|
|
# the source's node dir but leaves the install-hostname dir orphaned. The
|
|
|
|
|
|
# web UI then shows a phantom offline node. Only remove dirs whose lxc/
|
|
|
|
|
|
# qemu-server/ are empty — never trample a real second cluster member.
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "── Stale node-dir cleanup ──"
|
|
|
|
|
|
removed_nodes=0
|
|
|
|
|
|
for nodedir in /etc/pve/nodes/*/; do
|
|
|
|
|
|
n=$(basename "$nodedir")
|
|
|
|
|
|
[[ "$n" == "$CUR_NODE" ]] && continue
|
|
|
|
|
|
lxc_empty=1; qemu_empty=1
|
|
|
|
|
|
[[ -d "$nodedir/lxc" ]] && [[ -n "$(ls -A "$nodedir/lxc" 2>/dev/null)" ]] && lxc_empty=0
|
|
|
|
|
|
[[ -d "$nodedir/qemu-server" ]] && [[ -n "$(ls -A "$nodedir/qemu-server" 2>/dev/null)" ]] && qemu_empty=0
|
|
|
|
|
|
if (( lxc_empty && qemu_empty )); then
|
|
|
|
|
|
if rm -rf "$nodedir" 2>/dev/null; then
|
|
|
|
|
|
echo " ✓ removed stale node dir: $n"
|
|
|
|
|
|
((removed_nodes++))
|
|
|
|
|
|
else
|
|
|
|
|
|
echo " ✗ rm failed for $n (pmxcfs may have it busy)"
|
|
|
|
|
|
fi
|
|
|
|
|
|
else
|
|
|
|
|
|
echo " ⚠ kept $n (has guest configs — looks like a real cluster member)"
|
|
|
|
|
|
fi
|
|
|
|
|
|
done
|
|
|
|
|
|
echo "Stale node dirs removed: $removed_nodes"
|
|
|
|
|
|
|
2026-06-09 00:13:24 +02:00
|
|
|
|
# ── Done with cluster config apply ─────────────────────────
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "Cluster summary: globals=$copied_global, subdirs=$copied_subdirs, guests=$copied_guests, guest-clashes-skipped=$skipped_guests"
|
|
|
|
|
|
|
|
|
|
|
|
# Remove the marker NOW (before the slow maintenance step
|
|
|
|
|
|
# below) so if the operator reboots mid-maintenance, we
|
|
|
|
|
|
# don't redo the (idempotent but wasteful) cluster apply.
|
|
|
|
|
|
# Maintenance below is also idempotent on re-run but takes
|
|
|
|
|
|
# 10+ min, so we'd rather not repeat it either — see the
|
|
|
|
|
|
# marker handling in the maintenance block.
|
|
|
|
|
|
rm -f "$MARKER"
|
|
|
|
|
|
|
|
|
|
|
|
# ── Post-restore maintenance (slow, deferrable) ────────────
|
|
|
|
|
|
# After a host-config restore, we need to:
|
|
|
|
|
|
# - update-initramfs -u -k all → so /etc/modules /etc/modprobe.d
|
|
|
|
|
|
# /etc/initramfs-tools changes get baked into the initramfs
|
|
|
|
|
|
# of every installed kernel for the NEXT boot.
|
|
|
|
|
|
# - update-grub → so /etc/default/grub changes land in
|
|
|
|
|
|
# /boot/grub/grub.cfg for the NEXT boot.
|
|
|
|
|
|
#
|
|
|
|
|
|
# These are EXPENSIVE (initramfs build per kernel × 3 = 5-10 min;
|
|
|
|
|
|
# grub a few seconds) but the user's system is already fully up
|
|
|
|
|
|
# at this point: they can SSH in, use PVE, do anything — these
|
|
|
|
|
|
# run in the background and finish whenever they finish. The
|
|
|
|
|
|
# unit's TimeoutStartSec=900 (set in apply_pending_restore.sh)
|
|
|
|
|
|
# gives us a 15-min cushion. We log progress to the same log
|
|
|
|
|
|
# file so the operator can `tail -f` if curious.
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "── Post-restore maintenance ──"
|
|
|
|
|
|
# Only do these if the apply_pending_restore.sh's path-trigger
|
|
|
|
|
|
# analysis said they're needed. On a restore that didn't touch
|
|
|
|
|
|
# /etc/modules /etc/default/grub etc., both flags are 0 and we
|
|
|
|
|
|
# skip the slow rebuild entirely.
|
|
|
|
|
|
MAINT_MARKER="/var/lib/proxmenux/post-restore-maintenance-pending"
|
|
|
|
|
|
if [[ "$NEEDS_INITRAMFS" == "1" ]] || [[ "$NEEDS_GRUB" == "1" ]]; then
|
|
|
|
|
|
mkdir -p /var/lib/proxmenux >/dev/null 2>&1 || true
|
|
|
|
|
|
printf 'started: %s\n' "$(date -Iseconds)" > "$MAINT_MARKER"
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
if [[ "$NEEDS_INITRAMFS" == "1" ]] && command -v update-initramfs >/dev/null 2>&1; then
|
|
|
|
|
|
echo "Running: update-initramfs -u -k all (5-10 min — restore touched initramfs inputs)"
|
|
|
|
|
|
if update-initramfs -u -k all 2>&1 | tail -10; then
|
|
|
|
|
|
echo " ✓ update-initramfs done"
|
|
|
|
|
|
else
|
|
|
|
|
|
echo " ✗ update-initramfs failed (system still boots; re-run manually)"
|
|
|
|
|
|
fi
|
|
|
|
|
|
else
|
|
|
|
|
|
echo "Skipping update-initramfs (restore didn't touch modules/initramfs-tools/crypttab)"
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
if [[ "$NEEDS_GRUB" == "1" ]] && command -v update-grub >/dev/null 2>&1; then
|
|
|
|
|
|
echo "Running: update-grub"
|
|
|
|
|
|
if update-grub 2>&1 | tail -3; then
|
|
|
|
|
|
echo " ✓ update-grub done"
|
|
|
|
|
|
else
|
|
|
|
|
|
echo " ✗ update-grub failed (re-run manually)"
|
|
|
|
|
|
fi
|
|
|
|
|
|
else
|
|
|
|
|
|
echo "Skipping update-grub (restore didn't touch /etc/default/grub or /etc/kernel)"
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
# Clean up the maintenance marker now that we're done.
|
|
|
|
|
|
rm -f "$MAINT_MARKER"
|
|
|
|
|
|
|
2026-06-09 17:42:51 +02:00
|
|
|
|
# ── Component auto-reinstall (driven by components_status.json) ──
|
|
|
|
|
|
# The host-config restore brings back ProxMenux state (including
|
|
|
|
|
|
# components_status.json) but NOT the binary artifacts those
|
|
|
|
|
|
# components installed outside of apt — driver modules under
|
|
|
|
|
|
# /lib/modules/<kernel>/, binaries in /usr/bin/<tool>, downloaded
|
|
|
|
|
|
# .deb files, DKMS source trees, etc. For each component the
|
|
|
|
|
|
# restore state says was installed, we kick off its native
|
|
|
|
|
|
# installer in `--auto-reinstall` mode so it replays the install
|
|
|
|
|
|
# without dialogs. The installer's own logic handles "already
|
|
|
|
|
|
# present → no-op", so this is idempotent.
|
|
|
|
|
|
#
|
|
|
|
|
|
# Apt-only components are still handled by the
|
|
|
|
|
|
# packages.manual.list pass done earlier in the restore flow
|
|
|
|
|
|
# (they're in `apt-mark showmanual`). Running the installer here
|
|
|
|
|
|
# for them is harmless overhead (the installer just sees the
|
|
|
|
|
|
# package is present and exits 0), so we don't try to filter.
|
|
|
|
|
|
#
|
|
|
|
|
|
# To register a NEW component for auto-reinstall: add it to the
|
|
|
|
|
|
# COMPONENT_INSTALLERS array below as "component_key:relative
|
|
|
|
|
|
# script path". The script must accept `--auto-reinstall` and
|
|
|
|
|
|
# read its own state from components_status.json.
|
|
|
|
|
|
COMPONENTS_STATUS="/usr/local/share/proxmenux/components_status.json"
|
|
|
|
|
|
COMPONENT_INSTALLERS=(
|
|
|
|
|
|
"nvidia_driver:gpu_tpu/nvidia_installer.sh"
|
|
|
|
|
|
"amdgpu_top:gpu_tpu/amd_gpu_tools.sh"
|
|
|
|
|
|
"intel_gpu_tools:gpu_tpu/intel_gpu_tools.sh"
|
|
|
|
|
|
"coral_driver:gpu_tpu/install_coral.sh"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if command -v jq >/dev/null 2>&1 && [[ -f "$COMPONENTS_STATUS" ]]; then
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo "── Component auto-reinstall ──"
|
|
|
|
|
|
SCRIPTS_BASE="/usr/local/share/proxmenux/scripts"
|
|
|
|
|
|
for entry in "${COMPONENT_INSTALLERS[@]}"; do
|
|
|
|
|
|
comp="${entry%%:*}"
|
|
|
|
|
|
installer="$SCRIPTS_BASE/${entry#*:}"
|
|
|
|
|
|
|
|
|
|
|
|
comp_status=$(jq -r ".${comp}.status // \"\"" "$COMPONENTS_STATUS" 2>/dev/null)
|
|
|
|
|
|
if [[ "$comp_status" != "installed" ]]; then
|
|
|
|
|
|
continue # Was never installed on the source, or was uninstalled — skip.
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
if [[ ! -f "$installer" ]]; then
|
|
|
|
|
|
echo " ✗ $comp: installer missing at $installer — skipping"
|
|
|
|
|
|
continue
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
echo ""
|
|
|
|
|
|
echo " → $comp (running $installer --auto-reinstall)"
|
|
|
|
|
|
# Run with limited output capture. The installer logs in full to
|
|
|
|
|
|
# its own log file; we only echo a tail here for the operator.
|
|
|
|
|
|
bash "$installer" --auto-reinstall 2>&1 | sed -e 's/^/ /' | tail -15
|
|
|
|
|
|
rc=${PIPESTATUS[0]}
|
|
|
|
|
|
if (( rc == 0 )); then
|
|
|
|
|
|
echo " ✓ $comp ok"
|
|
|
|
|
|
else
|
|
|
|
|
|
echo " ✗ $comp installer exited $rc — see its own log"
|
|
|
|
|
|
fi
|
|
|
|
|
|
done
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
2026-06-10 19:05:13 +02:00
|
|
|
|
POSTBOOT_END_EPOCH=$(date +%s)
|
|
|
|
|
|
POSTBOOT_DURATION=$((POSTBOOT_END_EPOCH - $(stat -c %Y "$LOG_FILE")))
|
|
|
|
|
|
POSTBOOT_DURATION_FMT=$(printf '%dm%02ds' $((POSTBOOT_DURATION / 60)) $((POSTBOOT_DURATION % 60)))
|
|
|
|
|
|
|
|
|
|
|
|
# ── Notify ProxMenux Monitor that we're done ───────────────────
|
|
|
|
|
|
# Routes through the user's configured channels (Telegram, Discord,
|
|
|
|
|
|
# ntfy, etc.). Localhost-only endpoint, no auth needed. We try
|
|
|
|
|
|
# briefly — if the Monitor isn't running, just log and move on.
|
|
|
|
|
|
COMPONENTS_REINSTALLED_CSV=""
|
|
|
|
|
|
if command -v jq >/dev/null 2>&1 && [[ -f "$COMPONENTS_STATUS" ]]; then
|
|
|
|
|
|
COMPONENTS_REINSTALLED_CSV=$(
|
|
|
|
|
|
for entry in "${COMPONENT_INSTALLERS[@]}"; do
|
|
|
|
|
|
comp="${entry%%:*}"
|
|
|
|
|
|
s=$(jq -r ".${comp}.status // \"\"" "$COMPONENTS_STATUS" 2>/dev/null)
|
|
|
|
|
|
[[ "$s" == "installed" ]] && printf '%s,' "$comp"
|
|
|
|
|
|
done | sed 's/,$//'
|
|
|
|
|
|
)
|
|
|
|
|
|
[[ -z "$COMPONENTS_REINSTALLED_CSV" ]] && COMPONENTS_REINSTALLED_CSV="none"
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
if command -v curl >/dev/null 2>&1; then
|
|
|
|
|
|
PAYLOAD=$(printf '{"hostname":"%s","guests":"%s","stubs":"%s","stale_nodes":"%s","components":"%s","duration":"%s"}' \
|
|
|
|
|
|
"$(hostname)" \
|
|
|
|
|
|
"${copied_guests:-0}" \
|
|
|
|
|
|
"${stub_created:-0}" \
|
|
|
|
|
|
"${removed_nodes:-0}" \
|
|
|
|
|
|
"${COMPONENTS_REINSTALLED_CSV:-none}" \
|
|
|
|
|
|
"$POSTBOOT_DURATION_FMT")
|
|
|
|
|
|
NOTIFY_HTTP=$(curl -s -o /dev/null -w '%{http_code}' \
|
|
|
|
|
|
-X POST "http://127.0.0.1:8008/api/internal/restore-event" \
|
|
|
|
|
|
-H "Content-Type: application/json" \
|
|
|
|
|
|
-d "$PAYLOAD" \
|
|
|
|
|
|
--max-time 5 2>/dev/null || echo "000")
|
|
|
|
|
|
if [[ "$NOTIFY_HTTP" == "200" ]]; then
|
|
|
|
|
|
echo "Notification sent (HTTP 200)"
|
|
|
|
|
|
else
|
|
|
|
|
|
echo "Notification skipped (Monitor not reachable or disabled — HTTP $NOTIFY_HTTP)"
|
|
|
|
|
|
fi
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
2026-06-09 00:13:24 +02:00
|
|
|
|
echo ""
|
2026-06-10 19:05:13 +02:00
|
|
|
|
echo "=== Apply finished at $(date -Iseconds) — total ${POSTBOOT_DURATION_FMT} ==="
|
2026-06-09 00:13:24 +02:00
|
|
|
|
echo "Log: $LOG_FILE"
|