Update 1.2.2.1 beta

This commit is contained in:
MacRimi
2026-06-09 17:42:51 +02:00
parent 61ff665cec
commit f0b8474350
5 changed files with 262 additions and 2 deletions

View File

@@ -244,6 +244,67 @@ fi
# Clean up the maintenance marker now that we're done.
rm -f "$MAINT_MARKER"
# ── Component auto-reinstall (driven by components_status.json) ──
# The host-config restore brings back ProxMenux state (including
# components_status.json) but NOT the binary artifacts those
# components installed outside of apt — driver modules under
# /lib/modules/<kernel>/, binaries in /usr/bin/<tool>, downloaded
# .deb files, DKMS source trees, etc. For each component the
# restore state says was installed, we kick off its native
# installer in `--auto-reinstall` mode so it replays the install
# without dialogs. The installer's own logic handles "already
# present → no-op", so this is idempotent.
#
# Apt-only components are still handled by the
# packages.manual.list pass done earlier in the restore flow
# (they're in `apt-mark showmanual`). Running the installer here
# for them is harmless overhead (the installer just sees the
# package is present and exits 0), so we don't try to filter.
#
# To register a NEW component for auto-reinstall: add it to the
# COMPONENT_INSTALLERS array below as "component_key:relative
# script path". The script must accept `--auto-reinstall` and
# read its own state from components_status.json.
COMPONENTS_STATUS="/usr/local/share/proxmenux/components_status.json"
COMPONENT_INSTALLERS=(
"nvidia_driver:gpu_tpu/nvidia_installer.sh"
"amdgpu_top:gpu_tpu/amd_gpu_tools.sh"
"intel_gpu_tools:gpu_tpu/intel_gpu_tools.sh"
"coral_driver:gpu_tpu/install_coral.sh"
)
if command -v jq >/dev/null 2>&1 && [[ -f "$COMPONENTS_STATUS" ]]; then
echo ""
echo "── Component auto-reinstall ──"
SCRIPTS_BASE="/usr/local/share/proxmenux/scripts"
for entry in "${COMPONENT_INSTALLERS[@]}"; do
comp="${entry%%:*}"
installer="$SCRIPTS_BASE/${entry#*:}"
comp_status=$(jq -r ".${comp}.status // \"\"" "$COMPONENTS_STATUS" 2>/dev/null)
if [[ "$comp_status" != "installed" ]]; then
continue # Was never installed on the source, or was uninstalled — skip.
fi
if [[ ! -f "$installer" ]]; then
echo "$comp: installer missing at $installer — skipping"
continue
fi
echo ""
echo "$comp (running $installer --auto-reinstall)"
# Run with limited output capture. The installer logs in full to
# its own log file; we only echo a tail here for the operator.
bash "$installer" --auto-reinstall 2>&1 | sed -e 's/^/ /' | tail -15
rc=${PIPESTATUS[0]}
if (( rc == 0 )); then
echo "$comp ok"
else
echo "$comp installer exited $rc — see its own log"
fi
done
fi
echo ""
echo "=== Apply finished at $(date -Iseconds) ==="
echo "Log: $LOG_FILE"

View File

@@ -257,7 +257,39 @@ main() {
fi
}
# ==========================================================
# Non-interactive auto-reinstall (post-restore hook)
# ==========================================================
# Called from apply_cluster_postboot.sh when components_status
# says amdgpu_top was installed on the source but its binary is
# missing on the target (typical for a fresh PVE install + host
# restore — the .deb downloaded from GitHub is not in
# packages.manual.list, so we re-fetch and install it). No
# dialogs.
auto_reinstall_from_state() {
: >"$LOG_FILE"
echo "=== amd_gpu_tools auto_reinstall $(date -Iseconds) ===" >>"$LOG_FILE"
command -v jq >/dev/null 2>&1 || return 1
[[ -f "$COMPONENTS_STATUS_FILE" ]] || return 1
local s
s=$(jq -r '.amdgpu_top.status // ""' "$COMPONENTS_STATUS_FILE" 2>/dev/null)
[[ "$s" == "installed" ]] || { echo "not installed in state ($s)" >>"$LOG_FILE"; return 0; }
if command -v amdgpu_top >/dev/null 2>&1 || dpkg -s amdgpu-top >/dev/null 2>&1; then
echo "already present — no-op" >>"$LOG_FILE"; return 0
fi
export DEBIAN_FRONTEND=noninteractive
install_dependencies >>"$LOG_FILE" 2>&1
if ! get_latest_release >>"$LOG_FILE" 2>&1; then
echo "Failed to fetch latest release info" >>"$LOG_FILE"; return 2
fi
install_amdgpu_top
}
# Run main function
if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
if [[ "${1:-}" == "--auto-reinstall" ]]; then
auto_reinstall_from_state
exit $?
fi
main
fi

View File

@@ -749,4 +749,57 @@ main() {
esac
}
# ==========================================================
# Non-interactive auto-reinstall (post-restore hook)
# ==========================================================
# Called from apply_cluster_postboot.sh when components_status
# says coral_driver was installed on the source. Coral has two
# install branches that are independent: the PCIe/M.2 gasket+apex
# DKMS modules (kernel-level) and the USB libedgetpu user-space
# runtime. We replay both if either was previously installed and
# the corresponding hardware is now present — the hardware
# detection in detect_coral_hardware naturally short-circuits if
# the user moved the card to a different host or it's not in
# this slot any more.
auto_reinstall_from_state() {
: >"$LOG_FILE"
echo "=== install_coral auto_reinstall $(date -Iseconds) ===" >>"$LOG_FILE"
command -v jq >/dev/null 2>&1 || return 1
[[ -f "$COMPONENTS_STATUS_FILE" ]] || return 1
local s
s=$(jq -r '.coral_driver.status // ""' "$COMPONENTS_STATUS_FILE" 2>/dev/null)
[[ "$s" == "installed" ]] || { echo "not installed in state ($s)" >>"$LOG_FILE"; return 0; }
detect_coral_hardware
detect_coral_install_state
# No Coral hardware on this host? Skip — nothing to install.
if (( CORAL_PCIE_COUNT == 0 && CORAL_USB_COUNT == 0 )); then
echo "no Coral hardware on this host — skipping" >>"$LOG_FILE"
return 0
fi
# Already healthy on every branch that has matching hardware → bail out.
if { (( CORAL_PCIE_COUNT == 0 )) || $CORAL_PCIE_INSTALLED; } \
&& { (( CORAL_USB_COUNT == 0 )) || $CORAL_USB_INSTALLED; }; then
echo "already healthy — no-op" >>"$LOG_FILE"
return 0
fi
export DEBIAN_FRONTEND=noninteractive
if (( CORAL_PCIE_COUNT > 0 )) && ! $CORAL_PCIE_INSTALLED; then
echo "Installing gasket+apex DKMS modules..." >>"$LOG_FILE"
install_gasket_apex_dkms >>"$LOG_FILE" 2>&1 || echo "PCIe branch failed" >>"$LOG_FILE"
fi
if (( CORAL_USB_COUNT > 0 )) && ! $CORAL_USB_INSTALLED; then
echo "Installing libedgetpu USB runtime..." >>"$LOG_FILE"
install_libedgetpu_runtime >>"$LOG_FILE" 2>&1 || echo "USB branch failed" >>"$LOG_FILE"
fi
}
if [[ "${1:-}" == "--auto-reinstall" ]]; then
auto_reinstall_from_state
exit $?
fi
main

View File

@@ -194,7 +194,29 @@ main() {
fi
}
# Run main function
# ==========================================================
# Non-interactive auto-reinstall (post-restore hook)
# ==========================================================
auto_reinstall_from_state() {
: >"$LOG_FILE"
echo "=== intel_gpu_tools auto_reinstall $(date -Iseconds) ===" >>"$LOG_FILE"
command -v jq >/dev/null 2>&1 || return 1
[[ -f "$COMPONENTS_STATUS_FILE" ]] || return 1
local s
s=$(jq -r '.intel_gpu_tools.status // ""' "$COMPONENTS_STATUS_FILE" 2>/dev/null)
[[ "$s" == "installed" ]] || { echo "not installed in state ($s)" >>"$LOG_FILE"; return 0; }
if dpkg -s intel-gpu-tools >/dev/null 2>&1; then
echo "already present — no-op" >>"$LOG_FILE"; return 0
fi
export DEBIAN_FRONTEND=noninteractive
apt-get update -qq >>"$LOG_FILE" 2>&1
install_intel_gpu_tools
}
if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
if [[ "${1:-}" == "--auto-reinstall" ]]; then
auto_reinstall_from_state
exit $?
fi
main
fi

View File

@@ -1565,6 +1565,98 @@ main() {
esac
}
# ==========================================================
# Non-interactive auto-reinstall entry point
# ==========================================================
# Invoked after a host-config restore by apply_cluster_postboot.sh
# when components_status.json reports nvidia_driver as installed
# but the kernel module isn't loaded on the live system (i.e. the
# restore brought back the configs but not the binary driver from
# /lib/modules/<kernel>/). Replays the install path the user
# originally ran via `menu → 2`, using the recorded version, with
# no dialogs.
#
# Exit codes:
# 0 installed (or no-op — GPU absent / driver already present)
# 1 state file unreadable or no nvidia_driver entry
# 2 install failed
auto_reinstall_from_state() {
: >"$LOG_FILE"
echo "=== auto_reinstall_from_state started $(date -Iseconds) ===" >>"$LOG_FILE"
if ! command -v jq >/dev/null 2>&1; then
echo "jq not available — cannot read components_status.json" | tee -a "$LOG_FILE"
return 1
fi
if [[ ! -f "$COMPONENTS_STATUS_FILE" ]]; then
echo "No components_status.json at $COMPONENTS_STATUS_FILE" | tee -a "$LOG_FILE"
return 1
fi
local recorded_status recorded_version
recorded_status=$(jq -r '.nvidia_driver.status // ""' "$COMPONENTS_STATUS_FILE" 2>/dev/null)
recorded_version=$(jq -r '.nvidia_driver.version // ""' "$COMPONENTS_STATUS_FILE" 2>/dev/null)
if [[ "$recorded_status" != "installed" ]]; then
echo "nvidia_driver not marked installed in state ($recorded_status) — nothing to do" | tee -a "$LOG_FILE"
return 0
fi
if [[ -z "$recorded_version" || "$recorded_version" == "null" ]]; then
echo "nvidia_driver marked installed but no version recorded — aborting" | tee -a "$LOG_FILE"
return 1
fi
echo "Recorded driver: $recorded_version" >>"$LOG_FILE"
detect_nvidia_gpus
if ! $NVIDIA_GPU_PRESENT; then
echo "No NVIDIA GPU detected on this host — skipping reinstall" | tee -a "$LOG_FILE"
return 0
fi
detect_driver_status
if $CURRENT_DRIVER_INSTALLED && [[ "$CURRENT_DRIVER_VERSION" == "$recorded_version" ]]; then
echo "Driver $recorded_version already installed and matches state — no-op" | tee -a "$LOG_FILE"
return 0
fi
DRIVER_VERSION="$recorded_version"
# Same install path as the interactive main() flow, minus all
# dialogs and confirmations.
echo "Reinstalling NVIDIA driver $DRIVER_VERSION non-interactively..." | tee -a "$LOG_FILE"
ensure_workdir
ensure_repos_and_headers >>"$LOG_FILE" 2>&1
blacklist_nouveau >>"$LOG_FILE" 2>&1
ensure_modules_config >>"$LOG_FILE" 2>&1
if $CURRENT_DRIVER_INSTALLED; then
echo "Different version currently installed; cleaning up first..." | tee -a "$LOG_FILE"
complete_nvidia_uninstall >>"$LOG_FILE" 2>&1
fi
if ! download_nvidia_installer >>"$LOG_FILE" 2>&1; then
echo "Download failed — see $LOG_FILE" | tee -a "$LOG_FILE"
return 2
fi
if ! run_nvidia_installer >>"$LOG_FILE" 2>&1; then
echo "Install failed — see $LOG_FILE" | tee -a "$LOG_FILE"
return 2
fi
install_udev_rules_and_persistenced >>"$LOG_FILE" 2>&1
# Record success — overwrites whatever the restore put there
# (same version key, fresh timestamp).
if declare -F update_component_status >/dev/null 2>&1; then
update_component_status "nvidia_driver" "installed" "$DRIVER_VERSION" "gpu" '{"patched":false}' >>"$LOG_FILE" 2>&1
fi
echo "✓ NVIDIA driver $DRIVER_VERSION reinstalled" | tee -a "$LOG_FILE"
return 0
}
if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
if [[ "${1:-}" == "--auto-reinstall" ]]; then
auto_reinstall_from_state
exit $?
fi
main
fi