mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-06-11 11:06:24 +00:00
Update 1.2.2.1 beta
This commit is contained in:
@@ -244,6 +244,67 @@ fi
|
||||
# Clean up the maintenance marker now that we're done.
|
||||
rm -f "$MAINT_MARKER"
|
||||
|
||||
# ── Component auto-reinstall (driven by components_status.json) ──
|
||||
# The host-config restore brings back ProxMenux state (including
|
||||
# components_status.json) but NOT the binary artifacts those
|
||||
# components installed outside of apt — driver modules under
|
||||
# /lib/modules/<kernel>/, binaries in /usr/bin/<tool>, downloaded
|
||||
# .deb files, DKMS source trees, etc. For each component the
|
||||
# restore state says was installed, we kick off its native
|
||||
# installer in `--auto-reinstall` mode so it replays the install
|
||||
# without dialogs. The installer's own logic handles "already
|
||||
# present → no-op", so this is idempotent.
|
||||
#
|
||||
# Apt-only components are still handled by the
|
||||
# packages.manual.list pass done earlier in the restore flow
|
||||
# (they're in `apt-mark showmanual`). Running the installer here
|
||||
# for them is harmless overhead (the installer just sees the
|
||||
# package is present and exits 0), so we don't try to filter.
|
||||
#
|
||||
# To register a NEW component for auto-reinstall: add it to the
|
||||
# COMPONENT_INSTALLERS array below as "component_key:relative
|
||||
# script path". The script must accept `--auto-reinstall` and
|
||||
# read its own state from components_status.json.
|
||||
COMPONENTS_STATUS="/usr/local/share/proxmenux/components_status.json"
|
||||
COMPONENT_INSTALLERS=(
|
||||
"nvidia_driver:gpu_tpu/nvidia_installer.sh"
|
||||
"amdgpu_top:gpu_tpu/amd_gpu_tools.sh"
|
||||
"intel_gpu_tools:gpu_tpu/intel_gpu_tools.sh"
|
||||
"coral_driver:gpu_tpu/install_coral.sh"
|
||||
)
|
||||
|
||||
if command -v jq >/dev/null 2>&1 && [[ -f "$COMPONENTS_STATUS" ]]; then
|
||||
echo ""
|
||||
echo "── Component auto-reinstall ──"
|
||||
SCRIPTS_BASE="/usr/local/share/proxmenux/scripts"
|
||||
for entry in "${COMPONENT_INSTALLERS[@]}"; do
|
||||
comp="${entry%%:*}"
|
||||
installer="$SCRIPTS_BASE/${entry#*:}"
|
||||
|
||||
comp_status=$(jq -r ".${comp}.status // \"\"" "$COMPONENTS_STATUS" 2>/dev/null)
|
||||
if [[ "$comp_status" != "installed" ]]; then
|
||||
continue # Was never installed on the source, or was uninstalled — skip.
|
||||
fi
|
||||
|
||||
if [[ ! -f "$installer" ]]; then
|
||||
echo " ✗ $comp: installer missing at $installer — skipping"
|
||||
continue
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo " → $comp (running $installer --auto-reinstall)"
|
||||
# Run with limited output capture. The installer logs in full to
|
||||
# its own log file; we only echo a tail here for the operator.
|
||||
bash "$installer" --auto-reinstall 2>&1 | sed -e 's/^/ /' | tail -15
|
||||
rc=${PIPESTATUS[0]}
|
||||
if (( rc == 0 )); then
|
||||
echo " ✓ $comp ok"
|
||||
else
|
||||
echo " ✗ $comp installer exited $rc — see its own log"
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Apply finished at $(date -Iseconds) ==="
|
||||
echo "Log: $LOG_FILE"
|
||||
|
||||
@@ -257,7 +257,39 @@ main() {
|
||||
fi
|
||||
}
|
||||
|
||||
# ==========================================================
|
||||
# Non-interactive auto-reinstall (post-restore hook)
|
||||
# ==========================================================
|
||||
# Called from apply_cluster_postboot.sh when components_status
|
||||
# says amdgpu_top was installed on the source but its binary is
|
||||
# missing on the target (typical for a fresh PVE install + host
|
||||
# restore — the .deb downloaded from GitHub is not in
|
||||
# packages.manual.list, so we re-fetch and install it). No
|
||||
# dialogs.
|
||||
auto_reinstall_from_state() {
|
||||
: >"$LOG_FILE"
|
||||
echo "=== amd_gpu_tools auto_reinstall $(date -Iseconds) ===" >>"$LOG_FILE"
|
||||
command -v jq >/dev/null 2>&1 || return 1
|
||||
[[ -f "$COMPONENTS_STATUS_FILE" ]] || return 1
|
||||
local s
|
||||
s=$(jq -r '.amdgpu_top.status // ""' "$COMPONENTS_STATUS_FILE" 2>/dev/null)
|
||||
[[ "$s" == "installed" ]] || { echo "not installed in state ($s)" >>"$LOG_FILE"; return 0; }
|
||||
if command -v amdgpu_top >/dev/null 2>&1 || dpkg -s amdgpu-top >/dev/null 2>&1; then
|
||||
echo "already present — no-op" >>"$LOG_FILE"; return 0
|
||||
fi
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
install_dependencies >>"$LOG_FILE" 2>&1
|
||||
if ! get_latest_release >>"$LOG_FILE" 2>&1; then
|
||||
echo "Failed to fetch latest release info" >>"$LOG_FILE"; return 2
|
||||
fi
|
||||
install_amdgpu_top
|
||||
}
|
||||
|
||||
# Run main function
|
||||
if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
|
||||
if [[ "${1:-}" == "--auto-reinstall" ]]; then
|
||||
auto_reinstall_from_state
|
||||
exit $?
|
||||
fi
|
||||
main
|
||||
fi
|
||||
@@ -749,4 +749,57 @@ main() {
|
||||
esac
|
||||
}
|
||||
|
||||
# ==========================================================
|
||||
# Non-interactive auto-reinstall (post-restore hook)
|
||||
# ==========================================================
|
||||
# Called from apply_cluster_postboot.sh when components_status
|
||||
# says coral_driver was installed on the source. Coral has two
|
||||
# install branches that are independent: the PCIe/M.2 gasket+apex
|
||||
# DKMS modules (kernel-level) and the USB libedgetpu user-space
|
||||
# runtime. We replay both if either was previously installed and
|
||||
# the corresponding hardware is now present — the hardware
|
||||
# detection in detect_coral_hardware naturally short-circuits if
|
||||
# the user moved the card to a different host or it's not in
|
||||
# this slot any more.
|
||||
auto_reinstall_from_state() {
|
||||
: >"$LOG_FILE"
|
||||
echo "=== install_coral auto_reinstall $(date -Iseconds) ===" >>"$LOG_FILE"
|
||||
command -v jq >/dev/null 2>&1 || return 1
|
||||
[[ -f "$COMPONENTS_STATUS_FILE" ]] || return 1
|
||||
local s
|
||||
s=$(jq -r '.coral_driver.status // ""' "$COMPONENTS_STATUS_FILE" 2>/dev/null)
|
||||
[[ "$s" == "installed" ]] || { echo "not installed in state ($s)" >>"$LOG_FILE"; return 0; }
|
||||
|
||||
detect_coral_hardware
|
||||
detect_coral_install_state
|
||||
|
||||
# No Coral hardware on this host? Skip — nothing to install.
|
||||
if (( CORAL_PCIE_COUNT == 0 && CORAL_USB_COUNT == 0 )); then
|
||||
echo "no Coral hardware on this host — skipping" >>"$LOG_FILE"
|
||||
return 0
|
||||
fi
|
||||
# Already healthy on every branch that has matching hardware → bail out.
|
||||
if { (( CORAL_PCIE_COUNT == 0 )) || $CORAL_PCIE_INSTALLED; } \
|
||||
&& { (( CORAL_USB_COUNT == 0 )) || $CORAL_USB_INSTALLED; }; then
|
||||
echo "already healthy — no-op" >>"$LOG_FILE"
|
||||
return 0
|
||||
fi
|
||||
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
if (( CORAL_PCIE_COUNT > 0 )) && ! $CORAL_PCIE_INSTALLED; then
|
||||
echo "Installing gasket+apex DKMS modules..." >>"$LOG_FILE"
|
||||
install_gasket_apex_dkms >>"$LOG_FILE" 2>&1 || echo "PCIe branch failed" >>"$LOG_FILE"
|
||||
fi
|
||||
if (( CORAL_USB_COUNT > 0 )) && ! $CORAL_USB_INSTALLED; then
|
||||
echo "Installing libedgetpu USB runtime..." >>"$LOG_FILE"
|
||||
install_libedgetpu_runtime >>"$LOG_FILE" 2>&1 || echo "USB branch failed" >>"$LOG_FILE"
|
||||
fi
|
||||
}
|
||||
|
||||
if [[ "${1:-}" == "--auto-reinstall" ]]; then
|
||||
auto_reinstall_from_state
|
||||
exit $?
|
||||
fi
|
||||
|
||||
main
|
||||
|
||||
@@ -194,7 +194,29 @@ main() {
|
||||
fi
|
||||
}
|
||||
|
||||
# Run main function
|
||||
# ==========================================================
|
||||
# Non-interactive auto-reinstall (post-restore hook)
|
||||
# ==========================================================
|
||||
auto_reinstall_from_state() {
|
||||
: >"$LOG_FILE"
|
||||
echo "=== intel_gpu_tools auto_reinstall $(date -Iseconds) ===" >>"$LOG_FILE"
|
||||
command -v jq >/dev/null 2>&1 || return 1
|
||||
[[ -f "$COMPONENTS_STATUS_FILE" ]] || return 1
|
||||
local s
|
||||
s=$(jq -r '.intel_gpu_tools.status // ""' "$COMPONENTS_STATUS_FILE" 2>/dev/null)
|
||||
[[ "$s" == "installed" ]] || { echo "not installed in state ($s)" >>"$LOG_FILE"; return 0; }
|
||||
if dpkg -s intel-gpu-tools >/dev/null 2>&1; then
|
||||
echo "already present — no-op" >>"$LOG_FILE"; return 0
|
||||
fi
|
||||
export DEBIAN_FRONTEND=noninteractive
|
||||
apt-get update -qq >>"$LOG_FILE" 2>&1
|
||||
install_intel_gpu_tools
|
||||
}
|
||||
|
||||
if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
|
||||
if [[ "${1:-}" == "--auto-reinstall" ]]; then
|
||||
auto_reinstall_from_state
|
||||
exit $?
|
||||
fi
|
||||
main
|
||||
fi
|
||||
@@ -1565,6 +1565,98 @@ main() {
|
||||
esac
|
||||
}
|
||||
|
||||
# ==========================================================
|
||||
# Non-interactive auto-reinstall entry point
|
||||
# ==========================================================
|
||||
# Invoked after a host-config restore by apply_cluster_postboot.sh
|
||||
# when components_status.json reports nvidia_driver as installed
|
||||
# but the kernel module isn't loaded on the live system (i.e. the
|
||||
# restore brought back the configs but not the binary driver from
|
||||
# /lib/modules/<kernel>/). Replays the install path the user
|
||||
# originally ran via `menu → 2`, using the recorded version, with
|
||||
# no dialogs.
|
||||
#
|
||||
# Exit codes:
|
||||
# 0 installed (or no-op — GPU absent / driver already present)
|
||||
# 1 state file unreadable or no nvidia_driver entry
|
||||
# 2 install failed
|
||||
auto_reinstall_from_state() {
|
||||
: >"$LOG_FILE"
|
||||
echo "=== auto_reinstall_from_state started $(date -Iseconds) ===" >>"$LOG_FILE"
|
||||
|
||||
if ! command -v jq >/dev/null 2>&1; then
|
||||
echo "jq not available — cannot read components_status.json" | tee -a "$LOG_FILE"
|
||||
return 1
|
||||
fi
|
||||
if [[ ! -f "$COMPONENTS_STATUS_FILE" ]]; then
|
||||
echo "No components_status.json at $COMPONENTS_STATUS_FILE" | tee -a "$LOG_FILE"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local recorded_status recorded_version
|
||||
recorded_status=$(jq -r '.nvidia_driver.status // ""' "$COMPONENTS_STATUS_FILE" 2>/dev/null)
|
||||
recorded_version=$(jq -r '.nvidia_driver.version // ""' "$COMPONENTS_STATUS_FILE" 2>/dev/null)
|
||||
|
||||
if [[ "$recorded_status" != "installed" ]]; then
|
||||
echo "nvidia_driver not marked installed in state ($recorded_status) — nothing to do" | tee -a "$LOG_FILE"
|
||||
return 0
|
||||
fi
|
||||
if [[ -z "$recorded_version" || "$recorded_version" == "null" ]]; then
|
||||
echo "nvidia_driver marked installed but no version recorded — aborting" | tee -a "$LOG_FILE"
|
||||
return 1
|
||||
fi
|
||||
echo "Recorded driver: $recorded_version" >>"$LOG_FILE"
|
||||
|
||||
detect_nvidia_gpus
|
||||
if ! $NVIDIA_GPU_PRESENT; then
|
||||
echo "No NVIDIA GPU detected on this host — skipping reinstall" | tee -a "$LOG_FILE"
|
||||
return 0
|
||||
fi
|
||||
detect_driver_status
|
||||
if $CURRENT_DRIVER_INSTALLED && [[ "$CURRENT_DRIVER_VERSION" == "$recorded_version" ]]; then
|
||||
echo "Driver $recorded_version already installed and matches state — no-op" | tee -a "$LOG_FILE"
|
||||
return 0
|
||||
fi
|
||||
|
||||
DRIVER_VERSION="$recorded_version"
|
||||
|
||||
# Same install path as the interactive main() flow, minus all
|
||||
# dialogs and confirmations.
|
||||
echo "Reinstalling NVIDIA driver $DRIVER_VERSION non-interactively..." | tee -a "$LOG_FILE"
|
||||
ensure_workdir
|
||||
ensure_repos_and_headers >>"$LOG_FILE" 2>&1
|
||||
blacklist_nouveau >>"$LOG_FILE" 2>&1
|
||||
ensure_modules_config >>"$LOG_FILE" 2>&1
|
||||
|
||||
if $CURRENT_DRIVER_INSTALLED; then
|
||||
echo "Different version currently installed; cleaning up first..." | tee -a "$LOG_FILE"
|
||||
complete_nvidia_uninstall >>"$LOG_FILE" 2>&1
|
||||
fi
|
||||
|
||||
if ! download_nvidia_installer >>"$LOG_FILE" 2>&1; then
|
||||
echo "Download failed — see $LOG_FILE" | tee -a "$LOG_FILE"
|
||||
return 2
|
||||
fi
|
||||
if ! run_nvidia_installer >>"$LOG_FILE" 2>&1; then
|
||||
echo "Install failed — see $LOG_FILE" | tee -a "$LOG_FILE"
|
||||
return 2
|
||||
fi
|
||||
install_udev_rules_and_persistenced >>"$LOG_FILE" 2>&1
|
||||
|
||||
# Record success — overwrites whatever the restore put there
|
||||
# (same version key, fresh timestamp).
|
||||
if declare -F update_component_status >/dev/null 2>&1; then
|
||||
update_component_status "nvidia_driver" "installed" "$DRIVER_VERSION" "gpu" '{"patched":false}' >>"$LOG_FILE" 2>&1
|
||||
fi
|
||||
|
||||
echo "✓ NVIDIA driver $DRIVER_VERSION reinstalled" | tee -a "$LOG_FILE"
|
||||
return 0
|
||||
}
|
||||
|
||||
if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
|
||||
if [[ "${1:-}" == "--auto-reinstall" ]]; then
|
||||
auto_reinstall_from_state
|
||||
exit $?
|
||||
fi
|
||||
main
|
||||
fi
|
||||
fi
|
||||
Reference in New Issue
Block a user