mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-06-11 11:06:24 +00:00
Update 1.2.2.1 beta
This commit is contained in:
@@ -244,6 +244,67 @@ fi
|
|||||||
# Clean up the maintenance marker now that we're done.
|
# Clean up the maintenance marker now that we're done.
|
||||||
rm -f "$MAINT_MARKER"
|
rm -f "$MAINT_MARKER"
|
||||||
|
|
||||||
|
# ── Component auto-reinstall (driven by components_status.json) ──
|
||||||
|
# The host-config restore brings back ProxMenux state (including
|
||||||
|
# components_status.json) but NOT the binary artifacts those
|
||||||
|
# components installed outside of apt — driver modules under
|
||||||
|
# /lib/modules/<kernel>/, binaries in /usr/bin/<tool>, downloaded
|
||||||
|
# .deb files, DKMS source trees, etc. For each component the
|
||||||
|
# restore state says was installed, we kick off its native
|
||||||
|
# installer in `--auto-reinstall` mode so it replays the install
|
||||||
|
# without dialogs. The installer's own logic handles "already
|
||||||
|
# present → no-op", so this is idempotent.
|
||||||
|
#
|
||||||
|
# Apt-only components are still handled by the
|
||||||
|
# packages.manual.list pass done earlier in the restore flow
|
||||||
|
# (they're in `apt-mark showmanual`). Running the installer here
|
||||||
|
# for them is harmless overhead (the installer just sees the
|
||||||
|
# package is present and exits 0), so we don't try to filter.
|
||||||
|
#
|
||||||
|
# To register a NEW component for auto-reinstall: add it to the
|
||||||
|
# COMPONENT_INSTALLERS array below as "component_key:relative
|
||||||
|
# script path". The script must accept `--auto-reinstall` and
|
||||||
|
# read its own state from components_status.json.
|
||||||
|
COMPONENTS_STATUS="/usr/local/share/proxmenux/components_status.json"
|
||||||
|
COMPONENT_INSTALLERS=(
|
||||||
|
"nvidia_driver:gpu_tpu/nvidia_installer.sh"
|
||||||
|
"amdgpu_top:gpu_tpu/amd_gpu_tools.sh"
|
||||||
|
"intel_gpu_tools:gpu_tpu/intel_gpu_tools.sh"
|
||||||
|
"coral_driver:gpu_tpu/install_coral.sh"
|
||||||
|
)
|
||||||
|
|
||||||
|
if command -v jq >/dev/null 2>&1 && [[ -f "$COMPONENTS_STATUS" ]]; then
|
||||||
|
echo ""
|
||||||
|
echo "── Component auto-reinstall ──"
|
||||||
|
SCRIPTS_BASE="/usr/local/share/proxmenux/scripts"
|
||||||
|
for entry in "${COMPONENT_INSTALLERS[@]}"; do
|
||||||
|
comp="${entry%%:*}"
|
||||||
|
installer="$SCRIPTS_BASE/${entry#*:}"
|
||||||
|
|
||||||
|
comp_status=$(jq -r ".${comp}.status // \"\"" "$COMPONENTS_STATUS" 2>/dev/null)
|
||||||
|
if [[ "$comp_status" != "installed" ]]; then
|
||||||
|
continue # Was never installed on the source, or was uninstalled — skip.
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [[ ! -f "$installer" ]]; then
|
||||||
|
echo " ✗ $comp: installer missing at $installer — skipping"
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo " → $comp (running $installer --auto-reinstall)"
|
||||||
|
# Run with limited output capture. The installer logs in full to
|
||||||
|
# its own log file; we only echo a tail here for the operator.
|
||||||
|
bash "$installer" --auto-reinstall 2>&1 | sed -e 's/^/ /' | tail -15
|
||||||
|
rc=${PIPESTATUS[0]}
|
||||||
|
if (( rc == 0 )); then
|
||||||
|
echo " ✓ $comp ok"
|
||||||
|
else
|
||||||
|
echo " ✗ $comp installer exited $rc — see its own log"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
echo ""
|
echo ""
|
||||||
echo "=== Apply finished at $(date -Iseconds) ==="
|
echo "=== Apply finished at $(date -Iseconds) ==="
|
||||||
echo "Log: $LOG_FILE"
|
echo "Log: $LOG_FILE"
|
||||||
|
|||||||
@@ -257,7 +257,39 @@ main() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ==========================================================
|
||||||
|
# Non-interactive auto-reinstall (post-restore hook)
|
||||||
|
# ==========================================================
|
||||||
|
# Called from apply_cluster_postboot.sh when components_status
|
||||||
|
# says amdgpu_top was installed on the source but its binary is
|
||||||
|
# missing on the target (typical for a fresh PVE install + host
|
||||||
|
# restore — the .deb downloaded from GitHub is not in
|
||||||
|
# packages.manual.list, so we re-fetch and install it). No
|
||||||
|
# dialogs.
|
||||||
|
auto_reinstall_from_state() {
|
||||||
|
: >"$LOG_FILE"
|
||||||
|
echo "=== amd_gpu_tools auto_reinstall $(date -Iseconds) ===" >>"$LOG_FILE"
|
||||||
|
command -v jq >/dev/null 2>&1 || return 1
|
||||||
|
[[ -f "$COMPONENTS_STATUS_FILE" ]] || return 1
|
||||||
|
local s
|
||||||
|
s=$(jq -r '.amdgpu_top.status // ""' "$COMPONENTS_STATUS_FILE" 2>/dev/null)
|
||||||
|
[[ "$s" == "installed" ]] || { echo "not installed in state ($s)" >>"$LOG_FILE"; return 0; }
|
||||||
|
if command -v amdgpu_top >/dev/null 2>&1 || dpkg -s amdgpu-top >/dev/null 2>&1; then
|
||||||
|
echo "already present — no-op" >>"$LOG_FILE"; return 0
|
||||||
|
fi
|
||||||
|
export DEBIAN_FRONTEND=noninteractive
|
||||||
|
install_dependencies >>"$LOG_FILE" 2>&1
|
||||||
|
if ! get_latest_release >>"$LOG_FILE" 2>&1; then
|
||||||
|
echo "Failed to fetch latest release info" >>"$LOG_FILE"; return 2
|
||||||
|
fi
|
||||||
|
install_amdgpu_top
|
||||||
|
}
|
||||||
|
|
||||||
# Run main function
|
# Run main function
|
||||||
if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
|
if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
|
||||||
|
if [[ "${1:-}" == "--auto-reinstall" ]]; then
|
||||||
|
auto_reinstall_from_state
|
||||||
|
exit $?
|
||||||
|
fi
|
||||||
main
|
main
|
||||||
fi
|
fi
|
||||||
@@ -749,4 +749,57 @@ main() {
|
|||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ==========================================================
|
||||||
|
# Non-interactive auto-reinstall (post-restore hook)
|
||||||
|
# ==========================================================
|
||||||
|
# Called from apply_cluster_postboot.sh when components_status
|
||||||
|
# says coral_driver was installed on the source. Coral has two
|
||||||
|
# install branches that are independent: the PCIe/M.2 gasket+apex
|
||||||
|
# DKMS modules (kernel-level) and the USB libedgetpu user-space
|
||||||
|
# runtime. We replay both if either was previously installed and
|
||||||
|
# the corresponding hardware is now present — the hardware
|
||||||
|
# detection in detect_coral_hardware naturally short-circuits if
|
||||||
|
# the user moved the card to a different host or it's not in
|
||||||
|
# this slot any more.
|
||||||
|
auto_reinstall_from_state() {
|
||||||
|
: >"$LOG_FILE"
|
||||||
|
echo "=== install_coral auto_reinstall $(date -Iseconds) ===" >>"$LOG_FILE"
|
||||||
|
command -v jq >/dev/null 2>&1 || return 1
|
||||||
|
[[ -f "$COMPONENTS_STATUS_FILE" ]] || return 1
|
||||||
|
local s
|
||||||
|
s=$(jq -r '.coral_driver.status // ""' "$COMPONENTS_STATUS_FILE" 2>/dev/null)
|
||||||
|
[[ "$s" == "installed" ]] || { echo "not installed in state ($s)" >>"$LOG_FILE"; return 0; }
|
||||||
|
|
||||||
|
detect_coral_hardware
|
||||||
|
detect_coral_install_state
|
||||||
|
|
||||||
|
# No Coral hardware on this host? Skip — nothing to install.
|
||||||
|
if (( CORAL_PCIE_COUNT == 0 && CORAL_USB_COUNT == 0 )); then
|
||||||
|
echo "no Coral hardware on this host — skipping" >>"$LOG_FILE"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
# Already healthy on every branch that has matching hardware → bail out.
|
||||||
|
if { (( CORAL_PCIE_COUNT == 0 )) || $CORAL_PCIE_INSTALLED; } \
|
||||||
|
&& { (( CORAL_USB_COUNT == 0 )) || $CORAL_USB_INSTALLED; }; then
|
||||||
|
echo "already healthy — no-op" >>"$LOG_FILE"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
export DEBIAN_FRONTEND=noninteractive
|
||||||
|
|
||||||
|
if (( CORAL_PCIE_COUNT > 0 )) && ! $CORAL_PCIE_INSTALLED; then
|
||||||
|
echo "Installing gasket+apex DKMS modules..." >>"$LOG_FILE"
|
||||||
|
install_gasket_apex_dkms >>"$LOG_FILE" 2>&1 || echo "PCIe branch failed" >>"$LOG_FILE"
|
||||||
|
fi
|
||||||
|
if (( CORAL_USB_COUNT > 0 )) && ! $CORAL_USB_INSTALLED; then
|
||||||
|
echo "Installing libedgetpu USB runtime..." >>"$LOG_FILE"
|
||||||
|
install_libedgetpu_runtime >>"$LOG_FILE" 2>&1 || echo "USB branch failed" >>"$LOG_FILE"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
if [[ "${1:-}" == "--auto-reinstall" ]]; then
|
||||||
|
auto_reinstall_from_state
|
||||||
|
exit $?
|
||||||
|
fi
|
||||||
|
|
||||||
main
|
main
|
||||||
|
|||||||
@@ -194,7 +194,29 @@ main() {
|
|||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
# Run main function
|
# ==========================================================
|
||||||
|
# Non-interactive auto-reinstall (post-restore hook)
|
||||||
|
# ==========================================================
|
||||||
|
auto_reinstall_from_state() {
|
||||||
|
: >"$LOG_FILE"
|
||||||
|
echo "=== intel_gpu_tools auto_reinstall $(date -Iseconds) ===" >>"$LOG_FILE"
|
||||||
|
command -v jq >/dev/null 2>&1 || return 1
|
||||||
|
[[ -f "$COMPONENTS_STATUS_FILE" ]] || return 1
|
||||||
|
local s
|
||||||
|
s=$(jq -r '.intel_gpu_tools.status // ""' "$COMPONENTS_STATUS_FILE" 2>/dev/null)
|
||||||
|
[[ "$s" == "installed" ]] || { echo "not installed in state ($s)" >>"$LOG_FILE"; return 0; }
|
||||||
|
if dpkg -s intel-gpu-tools >/dev/null 2>&1; then
|
||||||
|
echo "already present — no-op" >>"$LOG_FILE"; return 0
|
||||||
|
fi
|
||||||
|
export DEBIAN_FRONTEND=noninteractive
|
||||||
|
apt-get update -qq >>"$LOG_FILE" 2>&1
|
||||||
|
install_intel_gpu_tools
|
||||||
|
}
|
||||||
|
|
||||||
if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
|
if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
|
||||||
|
if [[ "${1:-}" == "--auto-reinstall" ]]; then
|
||||||
|
auto_reinstall_from_state
|
||||||
|
exit $?
|
||||||
|
fi
|
||||||
main
|
main
|
||||||
fi
|
fi
|
||||||
@@ -1565,6 +1565,98 @@ main() {
|
|||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# ==========================================================
|
||||||
|
# Non-interactive auto-reinstall entry point
|
||||||
|
# ==========================================================
|
||||||
|
# Invoked after a host-config restore by apply_cluster_postboot.sh
|
||||||
|
# when components_status.json reports nvidia_driver as installed
|
||||||
|
# but the kernel module isn't loaded on the live system (i.e. the
|
||||||
|
# restore brought back the configs but not the binary driver from
|
||||||
|
# /lib/modules/<kernel>/). Replays the install path the user
|
||||||
|
# originally ran via `menu → 2`, using the recorded version, with
|
||||||
|
# no dialogs.
|
||||||
|
#
|
||||||
|
# Exit codes:
|
||||||
|
# 0 installed (or no-op — GPU absent / driver already present)
|
||||||
|
# 1 state file unreadable or no nvidia_driver entry
|
||||||
|
# 2 install failed
|
||||||
|
auto_reinstall_from_state() {
|
||||||
|
: >"$LOG_FILE"
|
||||||
|
echo "=== auto_reinstall_from_state started $(date -Iseconds) ===" >>"$LOG_FILE"
|
||||||
|
|
||||||
|
if ! command -v jq >/dev/null 2>&1; then
|
||||||
|
echo "jq not available — cannot read components_status.json" | tee -a "$LOG_FILE"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
if [[ ! -f "$COMPONENTS_STATUS_FILE" ]]; then
|
||||||
|
echo "No components_status.json at $COMPONENTS_STATUS_FILE" | tee -a "$LOG_FILE"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
local recorded_status recorded_version
|
||||||
|
recorded_status=$(jq -r '.nvidia_driver.status // ""' "$COMPONENTS_STATUS_FILE" 2>/dev/null)
|
||||||
|
recorded_version=$(jq -r '.nvidia_driver.version // ""' "$COMPONENTS_STATUS_FILE" 2>/dev/null)
|
||||||
|
|
||||||
|
if [[ "$recorded_status" != "installed" ]]; then
|
||||||
|
echo "nvidia_driver not marked installed in state ($recorded_status) — nothing to do" | tee -a "$LOG_FILE"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
if [[ -z "$recorded_version" || "$recorded_version" == "null" ]]; then
|
||||||
|
echo "nvidia_driver marked installed but no version recorded — aborting" | tee -a "$LOG_FILE"
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
echo "Recorded driver: $recorded_version" >>"$LOG_FILE"
|
||||||
|
|
||||||
|
detect_nvidia_gpus
|
||||||
|
if ! $NVIDIA_GPU_PRESENT; then
|
||||||
|
echo "No NVIDIA GPU detected on this host — skipping reinstall" | tee -a "$LOG_FILE"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
detect_driver_status
|
||||||
|
if $CURRENT_DRIVER_INSTALLED && [[ "$CURRENT_DRIVER_VERSION" == "$recorded_version" ]]; then
|
||||||
|
echo "Driver $recorded_version already installed and matches state — no-op" | tee -a "$LOG_FILE"
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
DRIVER_VERSION="$recorded_version"
|
||||||
|
|
||||||
|
# Same install path as the interactive main() flow, minus all
|
||||||
|
# dialogs and confirmations.
|
||||||
|
echo "Reinstalling NVIDIA driver $DRIVER_VERSION non-interactively..." | tee -a "$LOG_FILE"
|
||||||
|
ensure_workdir
|
||||||
|
ensure_repos_and_headers >>"$LOG_FILE" 2>&1
|
||||||
|
blacklist_nouveau >>"$LOG_FILE" 2>&1
|
||||||
|
ensure_modules_config >>"$LOG_FILE" 2>&1
|
||||||
|
|
||||||
|
if $CURRENT_DRIVER_INSTALLED; then
|
||||||
|
echo "Different version currently installed; cleaning up first..." | tee -a "$LOG_FILE"
|
||||||
|
complete_nvidia_uninstall >>"$LOG_FILE" 2>&1
|
||||||
|
fi
|
||||||
|
|
||||||
|
if ! download_nvidia_installer >>"$LOG_FILE" 2>&1; then
|
||||||
|
echo "Download failed — see $LOG_FILE" | tee -a "$LOG_FILE"
|
||||||
|
return 2
|
||||||
|
fi
|
||||||
|
if ! run_nvidia_installer >>"$LOG_FILE" 2>&1; then
|
||||||
|
echo "Install failed — see $LOG_FILE" | tee -a "$LOG_FILE"
|
||||||
|
return 2
|
||||||
|
fi
|
||||||
|
install_udev_rules_and_persistenced >>"$LOG_FILE" 2>&1
|
||||||
|
|
||||||
|
# Record success — overwrites whatever the restore put there
|
||||||
|
# (same version key, fresh timestamp).
|
||||||
|
if declare -F update_component_status >/dev/null 2>&1; then
|
||||||
|
update_component_status "nvidia_driver" "installed" "$DRIVER_VERSION" "gpu" '{"patched":false}' >>"$LOG_FILE" 2>&1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✓ NVIDIA driver $DRIVER_VERSION reinstalled" | tee -a "$LOG_FILE"
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
|
if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
|
||||||
|
if [[ "${1:-}" == "--auto-reinstall" ]]; then
|
||||||
|
auto_reinstall_from_state
|
||||||
|
exit $?
|
||||||
|
fi
|
||||||
main
|
main
|
||||||
fi
|
fi
|
||||||
Reference in New Issue
Block a user