From 10ce9dbcde6ecdfb25709b761b56f355c04ca316 Mon Sep 17 00:00:00 2001 From: MacRimi Date: Mon, 6 Apr 2026 13:39:07 +0200 Subject: [PATCH] add_controller_nvme_vm.sh --- scripts/global/gpu_hook_guard_helpers.sh | 254 ++++++ scripts/global/vm_storage_helpers.sh | 128 +++ scripts/gpu_tpu/add_gpu_lxc.sh | 168 +--- scripts/gpu_tpu/add_gpu_vm.sh | 239 +++++- scripts/gpu_tpu/switch_gpu_mode.sh | 945 ++++++++++++++++++++++ scripts/menus/hw_grafics_menu.sh | 6 + scripts/menus/storage_menu.sh | 60 +- scripts/storage/add_controller_nvme_vm.sh | 401 +++++++++ scripts/storage/disk-passthrough.sh | 5 + scripts/storage/disk-passthrough_ct.sh | 2 +- scripts/vm/disk_selector.sh | 58 +- scripts/vm/synology.sh | 140 +++- scripts/vm/vm_creator.sh | 81 ++ scripts/vm/zimaos.sh | 140 +++- 14 files changed, 2390 insertions(+), 237 deletions(-) create mode 100644 scripts/global/gpu_hook_guard_helpers.sh create mode 100755 scripts/gpu_tpu/switch_gpu_mode.sh create mode 100755 scripts/storage/add_controller_nvme_vm.sh diff --git a/scripts/global/gpu_hook_guard_helpers.sh b/scripts/global/gpu_hook_guard_helpers.sh new file mode 100644 index 00000000..64b546ff --- /dev/null +++ b/scripts/global/gpu_hook_guard_helpers.sh @@ -0,0 +1,254 @@ +#!/usr/bin/env bash + +if [[ -n "${__PROXMENUX_GPU_HOOK_GUARD_HELPERS__}" ]]; then + return 0 +fi +__PROXMENUX_GPU_HOOK_GUARD_HELPERS__=1 + +PROXMENUX_GPU_HOOK_STORAGE_REF="local:snippets/proxmenux-gpu-guard.sh" +PROXMENUX_GPU_HOOK_ABS_PATH="/var/lib/vz/snippets/proxmenux-gpu-guard.sh" + +_gpu_guard_msg_warn() { + if declare -F msg_warn >/dev/null 2>&1; then + msg_warn "$1" + else + echo "[WARN] $1" >&2 + fi +} + +_gpu_guard_msg_ok() { + if declare -F msg_ok >/dev/null 2>&1; then + msg_ok "$1" + else + echo "[OK] $1" + fi +} + +_gpu_guard_has_vm_gpu() { + local vmid="$1" + qm config "$vmid" 2>/dev/null | grep -qE '^hostpci[0-9]+:' +} + +_gpu_guard_has_lxc_gpu() { + local ctid="$1" + local conf="/etc/pve/lxc/${ctid}.conf" + [[ -f "$conf" ]] || return 1 + grep -qE 'dev[0-9]+:.*(/dev/dri|/dev/nvidia|/dev/kfd)|lxc\.mount\.entry:.*dev/dri' "$conf" 2>/dev/null +} + +ensure_proxmenux_gpu_guard_hookscript() { + mkdir -p /var/lib/vz/snippets 2>/dev/null || true + + cat >"$PROXMENUX_GPU_HOOK_ABS_PATH" <<'HOOKEOF' +#!/usr/bin/env bash +set -u + +arg1="${1:-}" +arg2="${2:-}" +case "$arg1" in + pre-start|post-start|pre-stop|post-stop) + phase="$arg1" + guest_id="$arg2" + ;; + *) + guest_id="$arg1" + phase="$arg2" + ;; +esac +[[ "$phase" == "pre-start" ]] || exit 0 + +vm_conf="/etc/pve/qemu-server/${guest_id}.conf" +ct_conf="/etc/pve/lxc/${guest_id}.conf" + +if [[ -f "$vm_conf" ]]; then + mapfile -t hostpci_lines < <(grep -E '^hostpci[0-9]+:' "$vm_conf" 2>/dev/null || true) + [[ ${#hostpci_lines[@]} -eq 0 ]] && exit 0 + + # Build slot list used by this VM and block if any running VM already uses same slot. + slot_keys=() + for line in "${hostpci_lines[@]}"; do + val="${line#*: }" + [[ "$val" == *"mapping="* ]] && continue + first_field="${val%%,*}" + IFS=';' read -r -a ids <<< "$first_field" + for id in "${ids[@]}"; do + id="${id#host=}" + id="${id// /}" + [[ -z "$id" ]] && continue + if [[ "$id" =~ ^[0-9a-fA-F]{2}:[0-9a-fA-F]{2}$ ]]; then + key="${id,,}" + else + [[ "$id" =~ ^0000: ]] || id="0000:${id}" + key="${id#0000:}" + key="${key%.*}" + key="${key,,}" + fi + dup=0 + for existing in "${slot_keys[@]}"; do + [[ "$existing" == "$key" ]] && dup=1 && break + done + [[ "$dup" -eq 0 ]] && slot_keys+=("$key") + done + done + + if [[ ${#slot_keys[@]} -gt 0 ]]; then + conflict_details="" + for other_conf in /etc/pve/qemu-server/*.conf; do + [[ -f "$other_conf" ]] || continue + other_vmid="$(basename "$other_conf" .conf)" + [[ "$other_vmid" == "$guest_id" ]] && continue + qm status "$other_vmid" 2>/dev/null | grep -q "status: running" || continue + + for key in "${slot_keys[@]}"; do + if grep -qE "^hostpci[0-9]+:.*(0000:)?${key}(\\.[0-7])?([,[:space:]]|$)" "$other_conf" 2>/dev/null; then + other_name="$(awk '/^name:/ {print $2}' "$other_conf" 2>/dev/null)" + [[ -z "$other_name" ]] && other_name="VM-${other_vmid}" + conflict_details+=$'\n'"- ${key} in use by VM ${other_vmid} (${other_name})" + break + fi + done + done + + if [[ -n "$conflict_details" ]]; then + echo "ProxMenux GPU Guard: VM ${guest_id} blocked at pre-start." >&2 + echo "A hostpci device slot is already in use by another running VM." >&2 + printf '%s\n' "$conflict_details" >&2 + echo "Stop the source VM or remove/move the shared hostpci assignment." >&2 + exit 1 + fi + fi + + failed=0 + details="" + for line in "${hostpci_lines[@]}"; do + val="${line#*: }" + [[ "$val" == *"mapping="* ]] && continue + + first_field="${val%%,*}" + IFS=';' read -r -a ids <<< "$first_field" + for id in "${ids[@]}"; do + id="${id#host=}" + id="${id// /}" + [[ -z "$id" ]] && continue + + # Slot-only syntax (e.g. 01:00) is accepted by Proxmox. + if [[ "$id" =~ ^[0-9a-fA-F]{2}:[0-9a-fA-F]{2}$ ]]; then + slot_ok=false + for dev in /sys/bus/pci/devices/0000:${id}.*; do + [[ -e "$dev" ]] || continue + drv="$(basename "$(readlink "$dev/driver" 2>/dev/null)" 2>/dev/null)" + [[ "$drv" == "vfio-pci" ]] && slot_ok=true && break + done + if [[ "$slot_ok" != "true" ]]; then + failed=1 + details+=$'\n'"- ${id}: not bound to vfio-pci" + fi + continue + fi + + [[ "$id" =~ ^0000: ]] || id="0000:${id}" + dev_path="/sys/bus/pci/devices/${id}" + if [[ ! -d "$dev_path" ]]; then + failed=1 + details+=$'\n'"- ${id}: PCI device not found" + continue + fi + drv="$(basename "$(readlink "$dev_path/driver" 2>/dev/null)" 2>/dev/null)" + if [[ "$drv" != "vfio-pci" ]]; then + failed=1 + details+=$'\n'"- ${id}: driver=${drv:-none}" + fi + done + done + + if [[ "$failed" -eq 1 ]]; then + echo "ProxMenux GPU Guard: VM ${guest_id} blocked at pre-start." >&2 + echo "GPU passthrough device is not ready for VM mode (vfio-pci required)." >&2 + printf '%s\n' "$details" >&2 + echo "Switch mode to GPU -> VM from ProxMenux: GPUs and Coral-TPU Menu." >&2 + exit 1 + fi + exit 0 +fi + +if [[ -f "$ct_conf" ]]; then + mapfile -t gpu_dev_paths < <( + { + grep -E '^dev[0-9]+:' "$ct_conf" 2>/dev/null | sed -E 's/^dev[0-9]+:[[:space:]]*([^,[:space:]]+).*/\1/' + grep -E '^lxc\.mount\.entry:' "$ct_conf" 2>/dev/null | sed -E 's/^lxc\.mount\.entry:[[:space:]]*([^[:space:]]+).*/\1/' + } | grep -E '^/dev/(dri|nvidia|kfd)' | sort -u + ) + + [[ ${#gpu_dev_paths[@]} -eq 0 ]] && exit 0 + + missing="" + for dev in "${gpu_dev_paths[@]}"; do + [[ -e "$dev" ]] || missing+=$'\n'"- ${dev} unavailable" + done + + if [[ -n "$missing" ]]; then + echo "ProxMenux GPU Guard: LXC ${guest_id} blocked at pre-start." >&2 + echo "Configured GPU devices are unavailable in host device nodes." >&2 + printf '%s\n' "$missing" >&2 + echo "Switch mode to GPU -> LXC from ProxMenux: GPUs and Coral-TPU Menu." >&2 + exit 1 + fi + exit 0 +fi + +exit 0 +HOOKEOF + + chmod 755 "$PROXMENUX_GPU_HOOK_ABS_PATH" 2>/dev/null || true +} + +attach_proxmenux_gpu_guard_to_vm() { + local vmid="$1" + _gpu_guard_has_vm_gpu "$vmid" || return 0 + + local current + current=$(qm config "$vmid" 2>/dev/null | awk '/^hookscript:/ {print $2}') + if [[ "$current" == "$PROXMENUX_GPU_HOOK_STORAGE_REF" ]]; then + return 0 + fi + + if qm set "$vmid" --hookscript "$PROXMENUX_GPU_HOOK_STORAGE_REF" >/dev/null 2>&1; then + _gpu_guard_msg_ok "GPU guard hook attached to VM ${vmid}" + else + _gpu_guard_msg_warn "Could not attach GPU guard hook to VM ${vmid}. Ensure 'local' storage supports snippets." + fi +} + +attach_proxmenux_gpu_guard_to_lxc() { + local ctid="$1" + _gpu_guard_has_lxc_gpu "$ctid" || return 0 + + local current + current=$(pct config "$ctid" 2>/dev/null | awk '/^hookscript:/ {print $2}') + if [[ "$current" == "$PROXMENUX_GPU_HOOK_STORAGE_REF" ]]; then + return 0 + fi + + if pct set "$ctid" -hookscript "$PROXMENUX_GPU_HOOK_STORAGE_REF" >/dev/null 2>&1; then + _gpu_guard_msg_ok "GPU guard hook attached to LXC ${ctid}" + else + _gpu_guard_msg_warn "Could not attach GPU guard hook to LXC ${ctid}. Ensure 'local' storage supports snippets." + fi +} + +sync_proxmenux_gpu_guard_hooks() { + ensure_proxmenux_gpu_guard_hookscript + + local vmid ctid + for conf in /etc/pve/qemu-server/*.conf; do + [[ -f "$conf" ]] || continue + vmid=$(basename "$conf" .conf) + _gpu_guard_has_vm_gpu "$vmid" && attach_proxmenux_gpu_guard_to_vm "$vmid" + done + + for conf in /etc/pve/lxc/*.conf; do + [[ -f "$conf" ]] || continue + ctid=$(basename "$conf" .conf) + _gpu_guard_has_lxc_gpu "$ctid" && attach_proxmenux_gpu_guard_to_lxc "$ctid" + done +} diff --git a/scripts/global/vm_storage_helpers.sh b/scripts/global/vm_storage_helpers.sh index 187eebc5..cf9e95f2 100644 --- a/scripts/global/vm_storage_helpers.sh +++ b/scripts/global/vm_storage_helpers.sh @@ -136,3 +136,131 @@ function _vm_is_q35() { machine_line=$(qm config "$vmid" 2>/dev/null | awk -F': ' '/^machine:/ {print $2}') [[ "$machine_line" == *q35* ]] } + +function _shorten_text() { + local text="$1" + local max_len="${2:-42}" + [[ -z "$text" ]] && { echo ""; return; } + if (( ${#text} > max_len )); then + echo "${text:0:$((max_len-3))}..." + else + echo "$text" + fi +} + +function _pci_slot_base() { + local pci_full="$1" + local slot + slot="${pci_full#0000:}" + slot="${slot%.*}" + echo "$slot" +} + +function _vm_status_is_running() { + local vmid="$1" + qm status "$vmid" 2>/dev/null | grep -q "status: running" +} + +function _vm_onboot_is_enabled() { + local vmid="$1" + qm config "$vmid" 2>/dev/null | grep -qE '^onboot:\s*1' +} + +function _vm_name_by_id() { + local vmid="$1" + local conf="/etc/pve/qemu-server/${vmid}.conf" + local vm_name + vm_name=$(awk '/^name:/ {print $2}' "$conf" 2>/dev/null) + [[ -z "$vm_name" ]] && vm_name="VM-${vmid}" + echo "$vm_name" +} + +function _vm_has_pci_slot() { + local vmid="$1" + local slot_base="$2" + local conf="/etc/pve/qemu-server/${vmid}.conf" + [[ -f "$conf" ]] || return 1 + grep -qE "^hostpci[0-9]+:.*(0000:)?${slot_base}(\\.[0-7])?([,[:space:]]|$)" "$conf" +} + +function _pci_assigned_vm_ids() { + local pci_full="$1" + local exclude_vmid="${2:-}" + local slot_base conf vmid + slot_base=$(_pci_slot_base "$pci_full") + + for conf in /etc/pve/qemu-server/*.conf; do + [[ -f "$conf" ]] || continue + vmid=$(basename "$conf" .conf) + [[ -n "$exclude_vmid" && "$vmid" == "$exclude_vmid" ]] && continue + if grep -qE "^hostpci[0-9]+:.*(0000:)?${slot_base}(\\.[0-7])?([,[:space:]]|$)" "$conf"; then + echo "$vmid" + fi + done +} + +function _remove_pci_slot_from_vm_config() { + local vmid="$1" + local slot_base="$2" + local conf="/etc/pve/qemu-server/${vmid}.conf" + [[ -f "$conf" ]] || return 1 + local tmpf + tmpf=$(mktemp) + awk -v slot="$slot_base" ' + $0 ~ "^hostpci[0-9]+:.*(0000:)?" slot "(\\.[0-7])?([,[:space:]]|$)" {next} + {print} + ' "$conf" > "$tmpf" && cat "$tmpf" > "$conf" + rm -f "$tmpf" +} + +function _pci_assigned_vm_summary() { + local pci_full="$1" + local slot_base conf vmid vm_name running onboot + local -a refs=() + local running_count=0 onboot_count=0 + + slot_base="${pci_full#0000:}" + slot_base="${slot_base%.*}" + + for conf in /etc/pve/qemu-server/*.conf; do + [[ -f "$conf" ]] || continue + + if ! grep -qE "^hostpci[0-9]+:.*(0000:)?${slot_base}(\\.[0-7])?([,[:space:]]|$)" "$conf"; then + continue + fi + + vmid=$(basename "$conf" .conf) + vm_name=$(awk '/^name:/ {print $2}' "$conf" 2>/dev/null) + [[ -z "$vm_name" ]] && vm_name="VM-${vmid}" + + if qm status "$vmid" 2>/dev/null | grep -q "status: running"; then + running="running" + running_count=$((running_count + 1)) + else + running="stopped" + fi + + if grep -qE "^onboot:\s*1" "$conf" 2>/dev/null; then + onboot="1" + onboot_count=$((onboot_count + 1)) + else + onboot="0" + fi + + refs+=("${vmid}[${running},onboot=${onboot}]") + done + + [[ ${#refs[@]} -eq 0 ]] && return 1 + + local joined summary + joined=$(IFS=', '; echo "${refs[*]}") + summary="$(translate "Assigned to VM(s)"): ${joined}" + if [[ "$running_count" -gt 0 ]]; then + summary+=" ($(translate "running"): ${running_count})" + fi + if [[ "$onboot_count" -gt 0 ]]; then + summary+=", onboot=1: ${onboot_count}" + fi + echo "$summary" + return 0 +} diff --git a/scripts/gpu_tpu/add_gpu_lxc.sh b/scripts/gpu_tpu/add_gpu_lxc.sh index ed2e43e5..e3b397e8 100644 --- a/scripts/gpu_tpu/add_gpu_lxc.sh +++ b/scripts/gpu_tpu/add_gpu_lxc.sh @@ -28,6 +28,11 @@ NVIDIA_VID_DID="" if [[ -f "$UTILS_FILE" ]]; then source "$UTILS_FILE" fi +if [[ -f "$LOCAL_SCRIPTS/global/gpu_hook_guard_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS/global/gpu_hook_guard_helpers.sh" +elif [[ -f "$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)/global/gpu_hook_guard_helpers.sh" ]]; then + source "$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)/global/gpu_hook_guard_helpers.sh" +fi load_language initialize_cache @@ -827,7 +832,7 @@ _remove_vfio_modules() { } # Detects if any selected GPU is currently in GPU → VM mode (VFIO binding). -# If so, guides the user through the switch to GPU → LXC mode and exits. +# If so, delegates switch handling to switch_gpu_mode.sh and exits. check_vfio_switch_mode() { local vfio_conf="/etc/modprobe.d/vfio.conf" [[ ! -f "$vfio_conf" ]] && return 0 @@ -857,155 +862,39 @@ check_vfio_switch_mode() { [[ ${#vfio_types[@]} -eq 0 ]] && return 0 - # ── One or more GPUs are in GPU → VM mode ───────────────────── - local msg - msg="\n$(translate 'The following GPU(s) are currently in GPU → VM passthrough mode (VFIO)'):\n\n" + msg="\n$(translate 'The following selected GPU(s) are currently in GPU -> VM mode (vfio-pci):')\n\n" for i in "${!vfio_types[@]}"; do msg+=" • ${vfio_names[$i]} (${vfio_pcis[$i]})\n" done - msg+="\n$(translate 'VFIO gives the VM exclusive ownership of the GPU. The native driver is blacklisted.')\n" - msg+="$(translate 'To use this GPU in an LXC container the VFIO binding must be removed')\n" - msg+="$(translate 'and the native driver reloaded. This requires a system reboot.')\n" - - # Check for VM configs that still reference this GPU - local -a affected_vms=() affected_vm_names=() - for pci in "${vfio_pcis[@]}"; do - local pci_slot="${pci#0000:}" - pci_slot="${pci_slot%.*}" - for conf in /etc/pve/qemu-server/*.conf; do - [[ -f "$conf" ]] || continue - if grep -qE "hostpci[0-9]+:.*${pci_slot}" "$conf"; then - local vmid vm_name - vmid=$(basename "$conf" .conf) - vm_name=$(grep "^name:" "$conf" 2>/dev/null | awk '{print $2}') - local dup=false - for v in "${affected_vms[@]}"; do [[ "$v" == "$vmid" ]] && dup=true && break; done - $dup || { affected_vms+=("$vmid"); affected_vm_names+=("${vm_name:-VM-${vmid}}"); } - fi - done - done - - if [[ ${#affected_vms[@]} -gt 0 ]]; then - msg+="\n\Z1\Zb$(translate 'Warning: This GPU is assigned to the following VM(s)'):\Zn\n\n" - for i in "${!affected_vms[@]}"; do - msg+=" • VM ${affected_vms[$i]} (${affected_vm_names[$i]})\n" - done - msg+="\n$(translate 'Starting those VMs after the switch will cause errors on the system and in the VM.')\n" - msg+="$(translate 'You will be asked to stop them or remove the GPU from their config.')\n" - fi - - msg+="\n$(translate 'Do you want to switch to GPU → LXC mode?')" + msg+="\n$(translate 'To continue with Add GPU to LXC, first switch the host to GPU -> LXC mode and reboot.')\n" + msg+="$(translate 'Do you want to open Switch GPU Mode now?')" dialog --backtitle "ProxMenux" --colors \ - --title "$(translate 'GPU → VM Mode Detected')" \ - --yesno "$msg" 26 80 + --title "$(translate 'GPU -> VM Mode Detected')" \ + --yesno "$msg" 18 84 [[ $? -ne 0 ]] && exit 0 - # ── User confirmed switch — enter processing phase ───────── - LXC_SWITCH_MODE=true - - # Handle VM conflicts: stop VM or remove GPU from config - if [[ ${#affected_vms[@]} -gt 0 ]]; then - local vm_msg - vm_msg="\n$(translate 'The following VM(s) have this GPU assigned'):\n\n" - for i in "${!affected_vms[@]}"; do - vm_msg+=" • VM ${affected_vms[$i]} (${affected_vm_names[$i]})\n" - done - vm_msg+="\n$(translate 'YES — Stop the VM(s) now (GPU entry stays in config, reusable for passthrough later)')\n" - vm_msg+="$(translate 'NO — Remove GPU from VM config (VM can start normally without GPU)')" - - dialog --backtitle "ProxMenux" --colors \ - --title "$(translate 'VM Conflict: Choose Action')" \ - --yesno "$vm_msg" 18 78 - local vm_action=$? - - show_proxmenux_logo - msg_title "$(_get_lxc_run_title)" - - for i in "${!affected_vms[@]}"; do - local vmid="${affected_vms[$i]}" - if [[ $vm_action -eq 0 ]]; then - if qm status "$vmid" 2>/dev/null | grep -q "running"; then - msg_info "$(translate 'Stopping VM') ${vmid}..." - qm stop "$vmid" >>"$LOG_FILE" 2>&1 || true - msg_ok "$(translate 'VM') ${vmid} $(translate 'stopped.')" - else - msg_ok "$(translate 'VM') ${vmid} $(translate 'is already stopped.')" - fi - else - local src_conf="/etc/pve/qemu-server/${vmid}.conf" - if [[ -f "$src_conf" ]]; then - for pci in "${vfio_pcis[@]}"; do - local pci_slot="${pci#0000:}"; pci_slot="${pci_slot%.*}" - sed -i "/^hostpci[0-9]\+:.*${pci_slot}/d" "$src_conf" - done - msg_ok "$(translate 'GPU removed from VM') ${vmid} $(translate 'configuration.')" - fi - fi - done + local switch_script="$LOCAL_SCRIPTS/gpu_tpu/switch_gpu_mode.sh" + local local_switch_script + local_switch_script="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/switch_gpu_mode.sh" + if [[ ! -f "$switch_script" && -f "$local_switch_script" ]]; then + switch_script="$local_switch_script" fi - # ── Remove VFIO config for each affected GPU ─────────────── - msg_info "$(translate 'Removing VFIO configuration...')" - - local -a all_ids_to_remove=() - for i in "${!vfio_types[@]}"; do - local gpu_type="${vfio_types[$i]}" - local pci="${vfio_pcis[$i]}" - - # Collect all IOMMU group IDs (GPU + audio function, etc.) - local -a group_ids=() - mapfile -t group_ids < <(_get_iommu_group_ids "$pci") - if [[ ${#group_ids[@]} -gt 0 ]]; then - all_ids_to_remove+=("${group_ids[@]}") - else - # IOMMU not active: fall back to the GPU's own vendor:device ID - case "$gpu_type" in - intel) all_ids_to_remove+=("$INTEL_VID_DID") ;; - amd) all_ids_to_remove+=("$AMD_VID_DID") ;; - nvidia) all_ids_to_remove+=("$NVIDIA_VID_DID") ;; - esac - fi - - _remove_gpu_blacklist "$gpu_type" - msg_ok "$(translate 'Driver blacklist removed for') ${gpu_type}" - - if [[ "$gpu_type" == "amd" ]]; then - _remove_amd_softdep - msg_ok "$(translate 'AMD softdep entries removed')" - fi - done - - local remaining_count - remaining_count=$(_remove_vfio_ids "${all_ids_to_remove[@]}") - msg_ok "$(translate 'VFIO device IDs removed from /etc/modprobe.d/vfio.conf')" - - if [[ "$remaining_count" -eq 0 ]]; then - _remove_vfio_modules - msg_ok "$(translate 'VFIO modules removed from /etc/modules')" - else - msg_ok "$(translate 'VFIO modules kept (other GPUs remain in VFIO mode)')" + if [[ ! -f "$switch_script" ]]; then + dialog --backtitle "ProxMenux" \ + --title "$(translate 'Switch Script Not Found')" \ + --msgbox "\n$(translate 'switch_gpu_mode.sh was not found.')\n\n$(translate 'Expected path:')\n${LOCAL_SCRIPTS}/gpu_tpu/switch_gpu_mode.sh" 10 84 + exit 0 fi - msg_info "$(translate 'Updating initramfs (this may take a minute)...')" - update-initramfs -u -k all >>"$LOG_FILE" 2>&1 - msg_ok "$(translate 'initramfs updated')" + bash "$switch_script" - echo - msg_success "$(translate 'GPU → LXC switch complete. A reboot is required to load the native GPU driver.')" - echo - - whiptail --title "$(translate 'Reboot Required')" \ - --yesno "$(translate 'A reboot is required to complete the switch to GPU → LXC mode. Do you want to restart now?')" 10 74 - if [[ $? -eq 0 ]]; then - msg_warn "$(translate 'Rebooting the system...')" - reboot - else - msg_info2 "$(translate 'Please reboot manually before adding the GPU to an LXC container.')" - msg_success "$(translate 'Press Enter to continue...')" - read -r - fi + dialog --backtitle "ProxMenux" --colors \ + --title "$(translate 'Next Step Required')" \ + --msgbox "\n$(translate 'After switching mode, reboot the host if requested.')\n\n$(translate 'Then run this option again:')\n\n Add GPU to LXC\n\n$(translate 'This guarantees that device nodes are available before applying LXC GPU config.')" \ + 12 84 exit 0 } @@ -1034,6 +923,11 @@ main() { msg_title "$(_get_lxc_run_title)" configure_passthrough "$CONTAINER_ID" + if declare -F attach_proxmenux_gpu_guard_to_lxc >/dev/null 2>&1; then + ensure_proxmenux_gpu_guard_hookscript + attach_proxmenux_gpu_guard_to_lxc "$CONTAINER_ID" + sync_proxmenux_gpu_guard_hooks + fi if start_container_and_wait "$CONTAINER_ID"; then install_drivers "$CONTAINER_ID" diff --git a/scripts/gpu_tpu/add_gpu_vm.sh b/scripts/gpu_tpu/add_gpu_vm.sh index c92b6008..2ea54d77 100644 --- a/scripts/gpu_tpu/add_gpu_vm.sh +++ b/scripts/gpu_tpu/add_gpu_vm.sh @@ -46,6 +46,11 @@ if [[ -f "$LOCAL_SCRIPTS_LOCAL/global/pci_passthrough_helpers.sh" ]]; then elif [[ -f "$LOCAL_SCRIPTS_DEFAULT/global/pci_passthrough_helpers.sh" ]]; then source "$LOCAL_SCRIPTS_DEFAULT/global/pci_passthrough_helpers.sh" fi +if [[ -f "$LOCAL_SCRIPTS_LOCAL/global/gpu_hook_guard_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_LOCAL/global/gpu_hook_guard_helpers.sh" +elif [[ -f "$LOCAL_SCRIPTS_DEFAULT/global/gpu_hook_guard_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_DEFAULT/global/gpu_hook_guard_helpers.sh" +fi load_language initialize_cache @@ -89,6 +94,12 @@ PRESELECT_VMID="" WIZARD_CALL=false GPU_WIZARD_RESULT_FILE="" +declare -a LXC_AFFECTED_CTIDS=() +declare -a LXC_AFFECTED_NAMES=() +declare -a LXC_AFFECTED_RUNNING=() # 1 or 0 +declare -a LXC_AFFECTED_ONBOOT=() # 1 or 0 +LXC_SWITCH_ACTION="" # keep_gpu_disable_onboot | remove_gpu_keep_onboot + # ========================================================== # Helpers @@ -133,6 +144,42 @@ _vm_onboot_enabled() { qm config "$vmid" 2>/dev/null | grep -qE "^onboot:\s*1" } +_ct_is_running() { + local ctid="$1" + pct status "$ctid" 2>/dev/null | grep -q "status: running" +} + +_ct_onboot_enabled() { + local ctid="$1" + pct config "$ctid" 2>/dev/null | grep -qE "^onboot:\s*1" +} + +_lxc_conf_uses_selected_gpu() { + local conf="$1" + case "$SELECTED_GPU" in + nvidia) + grep -qE "dev[0-9]+:.*(/dev/nvidia|/dev/nvidia-caps)" "$conf" 2>/dev/null + ;; + amd) + grep -qE "dev[0-9]+:.*(/dev/dri|/dev/kfd)|lxc\.mount\.entry:.*dev/dri" "$conf" 2>/dev/null + ;; + intel) + grep -qE "dev[0-9]+:.*(/dev/dri)|lxc\.mount\.entry:.*dev/dri" "$conf" 2>/dev/null + ;; + *) + grep -qE "dev[0-9]+:.*(/dev/dri|/dev/nvidia|/dev/kfd)|lxc\.mount\.entry:.*dev/dri" "$conf" 2>/dev/null + ;; + esac +} + +_lxc_switch_action_label() { + case "$LXC_SWITCH_ACTION" in + keep_gpu_disable_onboot) echo "$(translate 'Keep GPU in LXC config + disable Start on boot')" ;; + remove_gpu_keep_onboot) echo "$(translate 'Remove GPU from LXC config + keep Start on boot unchanged')" ;; + *) echo "$(translate 'No specific LXC action selected')" ;; + esac +} + _set_wizard_result() { local result="$1" [[ -z "${GPU_WIZARD_RESULT_FILE:-}" ]] && return 0 @@ -562,6 +609,7 @@ warn_single_gpu() { msg+=" • Proxmox Web UI (https)\n" msg+=" • Serial console\n\n" msg+="$(translate 'The VM guest will have exclusive access to the GPU.')\n\n" + msg+="\Z3$(translate 'Important: some GPUs may still fail in passthrough and can affect host stability or overall performance depending on hardware/firmware quality.')\Zn\n\n" msg+="$(translate 'Make sure you have SSH or Web UI access before rebooting.')\n\n" msg+="$(translate 'Do you want to continue?')" @@ -645,11 +693,31 @@ _detect_intel_gpu_subtype() { check_intel_vm_compatibility() { local pci_full="$SELECTED_GPU_PCI" - local gpu_subtype reset_method power_state + local gpu_subtype reset_method power_state vendor device viddid gpu_subtype=$(_detect_intel_gpu_subtype) reset_method=$(_check_pci_reset_method "$pci_full") power_state=$(cat "/sys/bus/pci/devices/${pci_full}/power_state" 2>/dev/null | tr -d '[:space:]') + vendor=$(cat "/sys/bus/pci/devices/${pci_full}/vendor" 2>/dev/null | sed 's/0x//' | tr '[:upper:]' '[:lower:]') + device=$(cat "/sys/bus/pci/devices/${pci_full}/device" 2>/dev/null | sed 's/0x//' | tr '[:upper:]' '[:lower:]') + viddid="${vendor}:${device}" + + # ── BLOCKER: Known unsupported Intel Apollo Lake iGPU IDs ──────────── + if [[ "$viddid" == "8086:5a84" || "$viddid" == "8086:5a85" ]]; then + local msg + msg="\n\Zb\Z1$(translate 'GPU Not Compatible with VM Passthrough')\Zn\n\n" + msg+="$(translate 'The selected Intel GPU belongs to Apollo Lake generation and is blocked by policy for VM passthrough due to host instability risk.')\n\n" + msg+=" ${SELECTED_GPU_NAME}\n" + msg+=" ${SELECTED_GPU_PCI}\n" + msg+=" \ZbID: ${viddid}\Zn\n\n" + msg+="$(translate 'This GPU is considered incompatible with GPU passthrough to a VM in ProxMenux.')\n\n" + msg+="$(translate 'Recommended: use GPU with LXC workloads instead of VM passthrough on this hardware.')" + + dialog --backtitle "ProxMenux" --colors \ + --title "$(translate 'Blocked GPU ID')" \ + --msgbox "$msg" 20 84 + exit 0 + fi # ── BLOCKER: Intel GPU in D3cold ────────────────────────────────────── if [[ "$power_state" == "D3cold" ]]; then @@ -1016,35 +1084,78 @@ check_switch_mode() { pci_slot="${pci_slot%.*}" # 01:00 # ── LXC conflict check ──────────────────────────────── + LXC_AFFECTED_CTIDS=() + LXC_AFFECTED_NAMES=() + LXC_AFFECTED_RUNNING=() + LXC_AFFECTED_ONBOOT=() + LXC_SWITCH_ACTION="" + local lxc_affected=() + local running_count=0 + local onboot_count=0 + for conf in /etc/pve/lxc/*.conf; do [[ -f "$conf" ]] || continue - if grep -qE "dev[0-9]+:.*(/dev/dri|/dev/nvidia|/dev/kfd)" "$conf"; then - local ctid ct_name - ctid=$(basename "$conf" .conf) - ct_name=$(pct config "$ctid" 2>/dev/null | grep "^hostname:" | awk '{print $2}') - lxc_affected+=("CT ${ctid} (${ct_name:-CT-${ctid}})") - fi + _lxc_conf_uses_selected_gpu "$conf" || continue + + local ctid ct_name running_flag onboot_flag + ctid=$(basename "$conf" .conf) + ct_name=$(pct config "$ctid" 2>/dev/null | awk '/^hostname:/ {print $2}') + [[ -z "$ct_name" ]] && ct_name="CT-${ctid}" + + running_flag=0 + onboot_flag=0 + _ct_is_running "$ctid" && running_flag=1 + _ct_onboot_enabled "$ctid" && onboot_flag=1 + + LXC_AFFECTED_CTIDS+=("$ctid") + LXC_AFFECTED_NAMES+=("$ct_name") + LXC_AFFECTED_RUNNING+=("$running_flag") + LXC_AFFECTED_ONBOOT+=("$onboot_flag") + + lxc_affected+=("CT ${ctid} (${ct_name})") + [[ "$running_flag" == "1" ]] && running_count=$((running_count + 1)) + [[ "$onboot_flag" == "1" ]] && onboot_count=$((onboot_count + 1)) done if [[ ${#lxc_affected[@]} -gt 0 ]]; then SWITCH_FROM_LXC=true SWITCH_LXC_LIST=$(IFS=', '; echo "${lxc_affected[*]}") - local msg - msg="\n$(translate 'The selected GPU is currently shared with the following LXC containers via device passthrough:')\n\n" - for ct in "${lxc_affected[@]}"; do - msg+=" • ${ct}\n" + local msg action_choice + msg="\n$(translate 'The selected GPU is currently used by the following LXC container(s):')\n\n" + local i + for i in "${!LXC_AFFECTED_CTIDS[@]}"; do + local status_txt onboot_txt + status_txt="$(translate 'stopped')" + onboot_txt="onboot=0" + [[ "${LXC_AFFECTED_RUNNING[$i]}" == "1" ]] && status_txt="$(translate 'running')" + [[ "${LXC_AFFECTED_ONBOOT[$i]}" == "1" ]] && onboot_txt="onboot=1" + msg+=" • CT ${LXC_AFFECTED_CTIDS[$i]} (${LXC_AFFECTED_NAMES[$i]}) [${status_txt}, ${onboot_txt}]\n" done msg+="\n$(translate 'VM passthrough requires exclusive VFIO binding of the GPU.')\n" - msg+="$(translate 'GPU device access will be removed from those LXC containers.')\n\n" - msg+="\Z3$(translate 'After this LXC → VM switch, reboot the host so the new binding state is applied cleanly.')\Zn\n\n" - msg+="$(translate 'Do you want to continue?')" + msg+="$(translate 'Choose how to handle affected LXC containers before switching to VM mode.')\n\n" + [[ "$running_count" -gt 0 ]] && \ + msg+="\Z3$(translate 'Running containers detected'): ${running_count}\Zn\n" + [[ "$onboot_count" -gt 0 ]] && \ + msg+="\Z1\Zb$(translate 'Start on boot enabled (onboot=1)'): ${onboot_count}\Zn\n" + msg+="\n\Z3$(translate 'After this LXC → VM switch, reboot the host so the new binding state is applied cleanly.')\Zn" - dialog --backtitle "ProxMenux" \ + action_choice=$(dialog --backtitle "ProxMenux" --colors \ --title "$(translate 'GPU Used in LXC Containers')" \ - --yesno "$msg" 18 76 - [[ $? -ne 0 ]] && exit 0 + --default-item "2" \ + --menu "$msg" 25 96 8 \ + "1" "$(translate 'Keep GPU in LXC config (disable Start on boot)')" \ + "2" "$(translate 'Remove GPU from LXC config (keep Start on boot)')" \ + 2>&1 >/dev/tty) || exit 0 + + case "$action_choice" in + 1) LXC_SWITCH_ACTION="keep_gpu_disable_onboot" ;; + 2) LXC_SWITCH_ACTION="remove_gpu_keep_onboot" ;; + *) exit 0 ;; + esac + else + SWITCH_FROM_LXC=false fi # ── VM conflict check (different VM than selected) ──── @@ -1155,8 +1266,13 @@ confirm_summary() { msg+=" • $(translate 'Additional GPU audio function will be added'): ${EXTRA_AUDIO_DEVICES[*]}\n" [[ "$SELECTED_GPU" == "nvidia" ]] && \ msg+=" • $(translate 'NVIDIA KVM hiding (cpu hidden=1)')\n" - [[ "$SWITCH_FROM_LXC" == "true" ]] && \ - msg+="\n \Z3• $(translate 'GPU will be removed from LXC containers'): ${SWITCH_LXC_LIST}\Zn\n" + if [[ "$SWITCH_FROM_LXC" == "true" ]]; then + msg+="\n \Z3• $(translate 'Affected LXC containers'): ${SWITCH_LXC_LIST}\Zn\n" + msg+=" \Z3• $(translate 'Selected LXC action'): $(_lxc_switch_action_label)\Zn\n" + if [[ "$LXC_SWITCH_ACTION" == "remove_gpu_keep_onboot" ]]; then + msg+=" \Z3• $(translate 'To use the GPU again in LXC, run Add GPU to LXC from GPUs and Coral-TPU Menu')\Zn\n" + fi + fi [[ "$SWITCH_FROM_VM" == "true" ]] && \ msg+="\n \Z3• $(translate 'GPU will be removed from VM') ${SWITCH_VM_SRC}\Zn\n" msg+="\n$(translate 'Do you want to proceed?')" @@ -1350,24 +1466,76 @@ dump_amd_rom() { } -# ── Remove GPU from LXC configs (switch mode) ──────────── -cleanup_lxc_configs() { - [[ "$SWITCH_FROM_LXC" != "true" ]] && return 0 - - msg_info "$(translate 'Removing GPU device access from LXC containers...')" - for conf in /etc/pve/lxc/*.conf; do - [[ -f "$conf" ]] || continue - if grep -qE "dev[0-9]+:.*(/dev/dri|/dev/nvidia|/dev/kfd)" "$conf"; then - sed -i '/dev[0-9]\+:.*\/dev\/dri/d' "$conf" - sed -i '/dev[0-9]\+:.*\/dev\/nvidia/d' "$conf" - sed -i '/dev[0-9]\+:.*\/dev\/kfd/d' "$conf" +_remove_selected_gpu_from_lxc_conf() { + local conf="$1" + case "$SELECTED_GPU" in + nvidia) + sed -i '/dev[0-9]\+:.*\/dev\/nvidia/d' "$conf" + ;; + amd) + sed -i '/dev[0-9]\+:.*\/dev\/dri/d' "$conf" + sed -i '/dev[0-9]\+:.*\/dev\/kfd/d' "$conf" sed -i '/lxc\.mount\.entry:.*dev\/dri/d' "$conf" sed -i '/lxc\.cgroup2\.devices\.allow:.*226/d' "$conf" - local ctid - ctid=$(basename "$conf" .conf) - msg_ok "$(translate 'GPU removed from LXC') ${ctid}" | tee -a "$screen_capture" + ;; + intel) + sed -i '/dev[0-9]\+:.*\/dev\/dri/d' "$conf" + sed -i '/lxc\.mount\.entry:.*dev\/dri/d' "$conf" + sed -i '/lxc\.cgroup2\.devices\.allow:.*226/d' "$conf" + ;; + *) + sed -i '/dev[0-9]\+:.*\/dev\/dri/d' "$conf" + sed -i '/dev[0-9]\+:.*\/dev\/nvidia/d' "$conf" + sed -i '/dev[0-9]\+:.*\/dev\/kfd/d' "$conf" + sed -i '/lxc\.mount\.entry:.*dev\/dri/d' "$conf" + sed -i '/lxc\.cgroup2\.devices\.allow:.*226/d' "$conf" + ;; + esac +} + +# ── Apply selected action for affected LXC (switch mode) ─ +cleanup_lxc_configs() { + [[ "$SWITCH_FROM_LXC" != "true" ]] && return 0 + [[ ${#LXC_AFFECTED_CTIDS[@]} -eq 0 ]] && return 0 + + msg_info "$(translate 'Applying selected LXC switch action...')" + + local i + for i in "${!LXC_AFFECTED_CTIDS[@]}"; do + local ctid conf + ctid="${LXC_AFFECTED_CTIDS[$i]}" + conf="/etc/pve/lxc/${ctid}.conf" + + if [[ "${LXC_AFFECTED_RUNNING[$i]}" == "1" ]]; then + msg_info "$(translate 'Stopping LXC') ${ctid}..." + if pct stop "$ctid" >>"$LOG_FILE" 2>&1; then + msg_ok "$(translate 'LXC stopped') ${ctid}" | tee -a "$screen_capture" + else + msg_warn "$(translate 'Could not stop LXC') ${ctid}" | tee -a "$screen_capture" + fi + else + msg_ok "$(translate 'LXC already stopped') ${ctid}" | tee -a "$screen_capture" + fi + + if [[ "$LXC_SWITCH_ACTION" == "keep_gpu_disable_onboot" ]]; then + if [[ "${LXC_AFFECTED_ONBOOT[$i]}" == "1" ]]; then + if pct set "$ctid" -onboot 0 >>"$LOG_FILE" 2>&1; then + msg_warn "$(translate 'Start on boot disabled for LXC') ${ctid}" | tee -a "$screen_capture" + else + msg_error "$(translate 'Failed to disable Start on boot for LXC') ${ctid}" | tee -a "$screen_capture" + fi + fi + fi + + if [[ "$LXC_SWITCH_ACTION" == "remove_gpu_keep_onboot" && -f "$conf" ]]; then + _remove_selected_gpu_from_lxc_conf "$conf" + msg_ok "$(translate 'GPU access removed from LXC') ${ctid}" | tee -a "$screen_capture" fi done + + if [[ "$LXC_SWITCH_ACTION" == "remove_gpu_keep_onboot" ]]; then + msg_warn "$(translate 'If needed again, re-add GPU to LXC from GPUs and Coral-TPU Menu → Add GPU to LXC.')" | tee -a "$screen_capture" + fi } @@ -1562,6 +1730,11 @@ main() { cleanup_vm_config ensure_vm_display_std configure_vm + if declare -F attach_proxmenux_gpu_guard_to_vm >/dev/null 2>&1; then + ensure_proxmenux_gpu_guard_hookscript + attach_proxmenux_gpu_guard_to_vm "$SELECTED_VMID" + sync_proxmenux_gpu_guard_hooks + fi [[ "$HOST_CONFIG_CHANGED" == "true" ]] && update_initramfs_host # ── Phase 3: summary ───────────────────────────────── diff --git a/scripts/gpu_tpu/switch_gpu_mode.sh b/scripts/gpu_tpu/switch_gpu_mode.sh new file mode 100755 index 00000000..02a0f7b5 --- /dev/null +++ b/scripts/gpu_tpu/switch_gpu_mode.sh @@ -0,0 +1,945 @@ +#!/bin/bash +# ========================================================== +# ProxMenux - GPU Switch Mode (VM <-> LXC) +# ========================================================== +# Author : MacRimi +# Copyright : (c) 2024 MacRimi +# License : GPL-3.0 +# Version : 1.0 +# Last Updated: 05/04/2026 +# ========================================================== + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LOCAL_SCRIPTS_LOCAL="$(cd "$SCRIPT_DIR/.." && pwd)" +LOCAL_SCRIPTS_DEFAULT="/usr/local/share/proxmenux/scripts" +LOCAL_SCRIPTS="$LOCAL_SCRIPTS_DEFAULT" +BASE_DIR="/usr/local/share/proxmenux" +UTILS_FILE="$LOCAL_SCRIPTS/utils.sh" +if [[ -f "$LOCAL_SCRIPTS_LOCAL/utils.sh" ]]; then + LOCAL_SCRIPTS="$LOCAL_SCRIPTS_LOCAL" + UTILS_FILE="$LOCAL_SCRIPTS/utils.sh" +elif [[ ! -f "$UTILS_FILE" ]]; then + UTILS_FILE="$BASE_DIR/utils.sh" +fi + +LOG_FILE="/tmp/proxmenux_gpu_switch_mode.log" +screen_capture="/tmp/proxmenux_gpu_switch_mode_screen_$$.txt" + +if [[ -f "$UTILS_FILE" ]]; then + source "$UTILS_FILE" +fi +if [[ -f "$LOCAL_SCRIPTS_LOCAL/global/pci_passthrough_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_LOCAL/global/pci_passthrough_helpers.sh" +elif [[ -f "$LOCAL_SCRIPTS_DEFAULT/global/pci_passthrough_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_DEFAULT/global/pci_passthrough_helpers.sh" +fi +if [[ -f "$LOCAL_SCRIPTS_LOCAL/global/gpu_hook_guard_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_LOCAL/global/gpu_hook_guard_helpers.sh" +elif [[ -f "$LOCAL_SCRIPTS_DEFAULT/global/gpu_hook_guard_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_DEFAULT/global/gpu_hook_guard_helpers.sh" +fi + +load_language +initialize_cache + +declare -a ALL_GPU_PCIS=() +declare -a ALL_GPU_TYPES=() +declare -a ALL_GPU_NAMES=() +declare -a ALL_GPU_DRIVERS=() +declare -a ALL_GPU_VIDDID=() +declare -a SELECTED_GPU_IDX=() + +declare -a SELECTED_IOMMU_IDS=() +declare -a SELECTED_PCI_SLOTS=() + +declare -a LXC_AFFECTED_CTIDS=() +declare -a LXC_AFFECTED_NAMES=() +declare -a LXC_AFFECTED_RUNNING=() +declare -a LXC_AFFECTED_ONBOOT=() + +declare -a VM_AFFECTED_IDS=() +declare -a VM_AFFECTED_NAMES=() +declare -a VM_AFFECTED_RUNNING=() +declare -a VM_AFFECTED_ONBOOT=() + +TARGET_MODE="" # vm | lxc +CURRENT_MODE="" # vm | lxc | mixed +LXC_ACTION="" # keep_gpu_disable_onboot | remove_gpu_keep_onboot +VM_ACTION="" # keep_gpu_disable_onboot | remove_gpu_keep_onboot +GPU_COUNT=0 +HOST_CONFIG_CHANGED=false + +_set_title() { + show_proxmenux_logo + case "$TARGET_MODE" in + vm) msg_title "GPU Switch Mode (GPU -> VM)" ;; + lxc) msg_title "GPU Switch Mode (GPU -> LXC)" ;; + *) msg_title "GPU Switch Mode (VM <-> LXC)" ;; + esac +} + +_add_line_if_missing() { + local line="$1" + local file="$2" + touch "$file" + if ! grep -qFx "$line" "$file" 2>/dev/null; then + echo "$line" >>"$file" + HOST_CONFIG_CHANGED=true + fi +} + +_get_pci_driver() { + local pci_full="$1" + local driver_link="/sys/bus/pci/devices/${pci_full}/driver" + if [[ -L "$driver_link" ]]; then + basename "$(readlink "$driver_link")" + else + echo "none" + fi +} + +_ct_is_running() { + local ctid="$1" + pct status "$ctid" 2>/dev/null | grep -q "status: running" +} + +_ct_onboot_enabled() { + local ctid="$1" + pct config "$ctid" 2>/dev/null | grep -qE "^onboot:\s*1" +} + +_vm_is_running() { + local vmid="$1" + qm status "$vmid" 2>/dev/null | grep -q "status: running" +} + +_vm_onboot_enabled() { + local vmid="$1" + qm config "$vmid" 2>/dev/null | grep -qE "^onboot:\s*1" +} + +_get_iommu_group_ids() { + local pci_full="$1" + local group_link="/sys/bus/pci/devices/${pci_full}/iommu_group" + [[ ! -L "$group_link" ]] && return + + local group_dir + group_dir="/sys/kernel/iommu_groups/$(basename "$(readlink "$group_link")")/devices" + for dev_path in "${group_dir}/"*; do + [[ -e "$dev_path" ]] || continue + local dev dev_class vid did + dev=$(basename "$dev_path") + dev_class=$(cat "/sys/bus/pci/devices/${dev}/class" 2>/dev/null) + [[ "$dev_class" == "0x0604" || "$dev_class" == "0x0600" ]] && continue + vid=$(cat "/sys/bus/pci/devices/${dev}/vendor" 2>/dev/null | sed 's/0x//') + did=$(cat "/sys/bus/pci/devices/${dev}/device" 2>/dev/null | sed 's/0x//') + [[ -n "$vid" && -n "$did" ]] && echo "${vid}:${did}" + done +} + +_read_vfio_ids() { + local vfio_conf="/etc/modprobe.d/vfio.conf" + local ids_line ids_part + ids_line=$(grep "^options vfio-pci ids=" "$vfio_conf" 2>/dev/null | head -1) + [[ -z "$ids_line" ]] && return + ids_part=$(echo "$ids_line" | grep -oE 'ids=[^[:space:]]+' | sed 's/ids=//') + [[ -z "$ids_part" ]] && return + tr ',' '\n' <<< "$ids_part" | sed '/^$/d' +} + +_write_vfio_ids() { + local -a ids=("$@") + local vfio_conf="/etc/modprobe.d/vfio.conf" + touch "$vfio_conf" + + local current_line new_line ids_str + current_line=$(grep "^options vfio-pci ids=" "$vfio_conf" 2>/dev/null | head -1) + sed -i '/^options vfio-pci ids=/d' "$vfio_conf" + + if [[ ${#ids[@]} -gt 0 ]]; then + ids_str=$(IFS=','; echo "${ids[*]}") + new_line="options vfio-pci ids=${ids_str} disable_vga=1" + echo "$new_line" >>"$vfio_conf" + [[ "$current_line" != "$new_line" ]] && HOST_CONFIG_CHANGED=true + else + [[ -n "$current_line" ]] && HOST_CONFIG_CHANGED=true + fi +} + +_contains_in_array() { + local needle="$1" + shift + local x + for x in "$@"; do + [[ "$x" == "$needle" ]] && return 0 + done + return 1 +} + +_remove_gpu_blacklist() { + local gpu_type="$1" + local blacklist_file="/etc/modprobe.d/blacklist.conf" + [[ ! -f "$blacklist_file" ]] && return + local changed=false + case "$gpu_type" in + nvidia) + grep -qE '^blacklist (nouveau|nvidia|nvidiafb|lbm-nouveau)$|^options nouveau modeset=0$' "$blacklist_file" 2>/dev/null && changed=true + sed -i '/^blacklist nouveau$/d' "$blacklist_file" + sed -i '/^blacklist nvidia$/d' "$blacklist_file" + sed -i '/^blacklist nvidiafb$/d' "$blacklist_file" + sed -i '/^blacklist lbm-nouveau$/d' "$blacklist_file" + sed -i '/^options nouveau modeset=0$/d' "$blacklist_file" + ;; + amd) + grep -qE '^blacklist (radeon|amdgpu)$' "$blacklist_file" 2>/dev/null && changed=true + sed -i '/^blacklist radeon$/d' "$blacklist_file" + sed -i '/^blacklist amdgpu$/d' "$blacklist_file" + ;; + intel) + grep -qE '^blacklist i915$' "$blacklist_file" 2>/dev/null && changed=true + sed -i '/^blacklist i915$/d' "$blacklist_file" + ;; + esac + $changed && HOST_CONFIG_CHANGED=true + $changed +} + +_add_gpu_blacklist() { + local gpu_type="$1" + local blacklist_file="/etc/modprobe.d/blacklist.conf" + touch "$blacklist_file" + case "$gpu_type" in + nvidia) + _add_line_if_missing "blacklist nouveau" "$blacklist_file" + _add_line_if_missing "blacklist nvidia" "$blacklist_file" + _add_line_if_missing "blacklist nvidiafb" "$blacklist_file" + _add_line_if_missing "blacklist lbm-nouveau" "$blacklist_file" + _add_line_if_missing "options nouveau modeset=0" "$blacklist_file" + ;; + amd) + _add_line_if_missing "blacklist radeon" "$blacklist_file" + _add_line_if_missing "blacklist amdgpu" "$blacklist_file" + ;; + intel) + _add_line_if_missing "blacklist i915" "$blacklist_file" + ;; + esac +} + +_add_amd_softdep() { + local vfio_conf="/etc/modprobe.d/vfio.conf" + _add_line_if_missing "softdep radeon pre: vfio-pci" "$vfio_conf" + _add_line_if_missing "softdep amdgpu pre: vfio-pci" "$vfio_conf" + _add_line_if_missing "softdep snd_hda_intel pre: vfio-pci" "$vfio_conf" +} + +_remove_amd_softdep() { + local vfio_conf="/etc/modprobe.d/vfio.conf" + [[ ! -f "$vfio_conf" ]] && return + local changed=false + grep -qE '^softdep (radeon|amdgpu|snd_hda_intel) pre: vfio-pci$' "$vfio_conf" 2>/dev/null && changed=true + sed -i '/^softdep radeon pre: vfio-pci$/d' "$vfio_conf" + sed -i '/^softdep amdgpu pre: vfio-pci$/d' "$vfio_conf" + sed -i '/^softdep snd_hda_intel pre: vfio-pci$/d' "$vfio_conf" + $changed && HOST_CONFIG_CHANGED=true + $changed +} + +_add_vfio_modules() { + local modules=("vfio" "vfio_iommu_type1" "vfio_pci") + local kernel_major kernel_minor + kernel_major=$(uname -r | cut -d. -f1) + kernel_minor=$(uname -r | cut -d. -f2) + if (( kernel_major < 6 || ( kernel_major == 6 && kernel_minor < 2 ) )); then + modules+=("vfio_virqfd") + fi + local mod + for mod in "${modules[@]}"; do + _add_line_if_missing "$mod" /etc/modules + done +} + +_remove_vfio_modules_if_unused() { + local vfio_count + vfio_count=$(_read_vfio_ids | wc -l | tr -d '[:space:]') + [[ "$vfio_count" != "0" ]] && return 1 + local modules_file="/etc/modules" + [[ ! -f "$modules_file" ]] && return 1 + local had_any=false + grep -qE '^vfio$|^vfio_iommu_type1$|^vfio_pci$|^vfio_virqfd$' "$modules_file" 2>/dev/null && had_any=true + sed -i '/^vfio$/d' "$modules_file" + sed -i '/^vfio_iommu_type1$/d' "$modules_file" + sed -i '/^vfio_pci$/d' "$modules_file" + sed -i '/^vfio_virqfd$/d' "$modules_file" + if $had_any; then + HOST_CONFIG_CHANGED=true + return 0 + fi + return 1 +} + +_configure_iommu_options() { + _add_line_if_missing "options vfio_iommu_type1 allow_unsafe_interrupts=1" /etc/modprobe.d/iommu_unsafe_interrupts.conf + _add_line_if_missing "options kvm ignore_msrs=1" /etc/modprobe.d/kvm.conf +} + +_selected_types_unique() { + local -a out=() + local idx t + for idx in "${SELECTED_GPU_IDX[@]}"; do + t="${ALL_GPU_TYPES[$idx]}" + _contains_in_array "$t" "${out[@]}" || out+=("$t") + done + printf '%s\n' "${out[@]}" +} + +detect_host_gpus() { + ALL_GPU_PCIS=() + ALL_GPU_TYPES=() + ALL_GPU_NAMES=() + ALL_GPU_DRIVERS=() + ALL_GPU_VIDDID=() + + while IFS= read -r line; do + local pci_short pci_full name type driver viddid + pci_short=$(echo "$line" | awk '{print $1}') + pci_full="0000:${pci_short}" + name=$(echo "$line" | sed 's/^[^:]*[^:]: //' | sed 's/ \[.*//' | cut -c1-62) + if echo "$line" | grep -qi "Intel"; then + type="intel" + elif echo "$line" | grep -qiE "AMD|Advanced Micro|Radeon"; then + type="amd" + elif echo "$line" | grep -qi "NVIDIA"; then + type="nvidia" + else + continue + fi + driver=$(_get_pci_driver "$pci_full") + viddid=$(echo "$line" | grep -oE '\[[0-9a-f]{4}:[0-9a-f]{4}\]' | tr -d '[]') + ALL_GPU_PCIS+=("$pci_full") + ALL_GPU_TYPES+=("$type") + ALL_GPU_NAMES+=("$name") + ALL_GPU_DRIVERS+=("$driver") + ALL_GPU_VIDDID+=("$viddid") + done < <(lspci -nn | grep -iE "VGA compatible controller|3D controller|Display controller" | grep -iv "Ethernet\|Network\|Audio") + + GPU_COUNT=${#ALL_GPU_PCIS[@]} + if [[ "$GPU_COUNT" -eq 0 ]]; then + dialog --backtitle "ProxMenux" \ + --title "$(translate 'GPU Switch Mode')" \ + --msgbox "\n$(translate 'No compatible GPUs were detected on this host.')" 8 64 + exit 0 + fi +} + +_selected_gpu_current_mode() { + local mode="" + local idx drv cur + for idx in "${SELECTED_GPU_IDX[@]}"; do + drv="${ALL_GPU_DRIVERS[$idx]}" + if [[ "$drv" == "vfio-pci" ]]; then + cur="vm" + else + cur="lxc" + fi + + if [[ -z "$mode" ]]; then + mode="$cur" + elif [[ "$mode" != "$cur" ]]; then + echo "mixed" + return + fi + done + [[ -z "$mode" ]] && mode="lxc" + echo "$mode" +} + +select_target_mode() { + CURRENT_MODE=$(_selected_gpu_current_mode) + + if [[ "$CURRENT_MODE" == "mixed" ]]; then + local msg idx mode_label + msg="\n$(translate 'Mixed current mode detected in selected GPU(s).')\n\n" + msg+="$(translate 'Please select GPU(s) that are currently in the same mode and try again.')\n\n" + msg+="$(translate 'Selected GPU(s):')\n" + for idx in "${SELECTED_GPU_IDX[@]}"; do + if [[ "${ALL_GPU_DRIVERS[$idx]}" == "vfio-pci" ]]; then + mode_label="GPU -> VM" + else + mode_label="GPU -> LXC" + fi + msg+=" • ${ALL_GPU_NAMES[$idx]} (${ALL_GPU_PCIS[$idx]}) [${mode_label}]\n" + done + dialog --backtitle "ProxMenux" \ + --title "$(translate 'Mixed GPU Modes')" \ + --msgbox "$msg" 20 94 + return 2 + fi + + local menu_title menu_option_tag menu_option_desc current_mode_label current_mode_highlight + if [[ "$CURRENT_MODE" == "vm" ]]; then + TARGET_MODE="lxc" + current_mode_label="GPU -> VM (VFIO passthrough mode)" + menu_option_tag="lxc" + menu_option_desc="$(translate 'Switch to GPU -> LXC (native driver mode)')" + else + TARGET_MODE="vm" + current_mode_label="GPU -> LXC (native driver mode)" + menu_option_tag="vm" + menu_option_desc="$(translate 'Switch to GPU -> VM (VFIO passthrough mode)')" + fi + + current_mode_highlight="\\Zb\\Z4${current_mode_label}\\Zn" + menu_title="\n$(translate 'Select target mode for selected GPU(s):')\n\n$(translate 'Current mode'): ${current_mode_highlight}\n\n$(translate 'Available action'):" + local selected + selected=$(dialog --backtitle "ProxMenux" --colors \ + --title "$(translate 'GPU Switch Mode')" \ + --menu "$menu_title" 16 80 6 \ + "$menu_option_tag" "$menu_option_desc" \ + 2>&1 >/dev/tty) || exit 0 + + [[ "$selected" != "$menu_option_tag" ]] && exit 0 + return 0 +} + +# Return codes: +# 0 = compatible +# 1 = blocked and should exit +# 2 = blocked but user can reselect GPUs +validate_vm_mode_blocked_ids() { + [[ "$TARGET_MODE" != "vm" ]] && return 0 + + local -a blocked_lines=() + local idx viddid name pci + for idx in "${SELECTED_GPU_IDX[@]}"; do + viddid="${ALL_GPU_VIDDID[$idx]}" + name="${ALL_GPU_NAMES[$idx]}" + pci="${ALL_GPU_PCIS[$idx]}" + + case "$viddid" in + 8086:5a84|8086:5a85) + blocked_lines+=(" • ${name} (${pci}) [ID: ${viddid}]") + ;; + esac + done + + [[ ${#blocked_lines[@]} -eq 0 ]] && return 0 + + local msg + msg="\n\Zb\Z1$(translate 'Blocked GPU ID for VM Mode')\Zn\n\n" + msg+="$(translate 'At least one selected GPU is blocked by policy for GPU -> VM mode due to passthrough instability risk.')\n\n" + msg+="$(translate 'Blocked device(s):')\n" + local line + for line in "${blocked_lines[@]}"; do + msg+="${line}\n" + done + msg+="\n$(translate 'Recommended: use GPU -> LXC mode for these devices.')\n" + + if [[ "$GPU_COUNT" -gt 1 ]]; then + msg+="\n$(translate 'Please reselect GPU(s) and choose only compatible devices for VM mode.')" + dialog --backtitle "ProxMenux" --colors \ + --title "$(translate 'GPU Switch Mode Blocked')" \ + --msgbox "$msg" 20 88 + return 2 + fi + + dialog --backtitle "ProxMenux" --colors \ + --title "$(translate 'GPU Switch Mode Blocked')" \ + --msgbox "$msg" 19 84 + return 1 +} + +select_gpus() { + SELECTED_GPU_IDX=() + if [[ "$GPU_COUNT" -eq 1 ]]; then + SELECTED_GPU_IDX=(0) + return 0 + fi + + local -a menu_items=() + local i + for i in "${!ALL_GPU_PCIS[@]}"; do + menu_items+=("$i" "${ALL_GPU_NAMES[$i]} [${ALL_GPU_DRIVERS[$i]}] — ${ALL_GPU_PCIS[$i]}" "off") + done + + local raw sel + raw=$(dialog --backtitle "ProxMenux" \ + --title "$(translate 'Select GPU(s)')" \ + --checklist "\n$(translate 'Select one or more GPU(s) to switch mode:')" 20 96 12 \ + "${menu_items[@]}" \ + 2>&1 >/dev/tty) || exit 0 + + sel=$(echo "$raw" | tr -d '"') + if [[ -z "$sel" ]]; then + dialog --backtitle "ProxMenux" \ + --title "$(translate 'Select GPU(s)')" \ + --msgbox "\n$(translate 'No GPU selected.')" 7 52 + exit 0 + fi + read -ra SELECTED_GPU_IDX <<< "$sel" +} + +collect_selected_iommu_ids() { + SELECTED_IOMMU_IDS=() + SELECTED_PCI_SLOTS=() + + local idx pci viddid slot + for idx in "${SELECTED_GPU_IDX[@]}"; do + pci="${ALL_GPU_PCIS[$idx]}" + viddid="${ALL_GPU_VIDDID[$idx]}" + slot="${pci#0000:}" + slot="${slot%.*}" + SELECTED_PCI_SLOTS+=("$slot") + + local -a group_ids=() + mapfile -t group_ids < <(_get_iommu_group_ids "$pci") + if [[ ${#group_ids[@]} -gt 0 ]]; then + local gid + for gid in "${group_ids[@]}"; do + _contains_in_array "$gid" "${SELECTED_IOMMU_IDS[@]}" || SELECTED_IOMMU_IDS+=("$gid") + done + elif [[ -n "$viddid" ]]; then + _contains_in_array "$viddid" "${SELECTED_IOMMU_IDS[@]}" || SELECTED_IOMMU_IDS+=("$viddid") + fi + done +} + +_lxc_conf_uses_type() { + local conf="$1" + local gpu_type="$2" + case "$gpu_type" in + nvidia) grep -qE "dev[0-9]+:.*(/dev/nvidia|/dev/nvidia-caps)" "$conf" 2>/dev/null ;; + amd) grep -qE "dev[0-9]+:.*(/dev/dri|/dev/kfd)|lxc\.mount\.entry:.*dev/dri" "$conf" 2>/dev/null ;; + intel) grep -qE "dev[0-9]+:.*(/dev/dri)|lxc\.mount\.entry:.*dev/dri" "$conf" 2>/dev/null ;; + *) return 1 ;; + esac +} + +detect_affected_lxc_for_selected() { + LXC_AFFECTED_CTIDS=() + LXC_AFFECTED_NAMES=() + LXC_AFFECTED_RUNNING=() + LXC_AFFECTED_ONBOOT=() + + local -a types=() + mapfile -t types < <(_selected_types_unique) + + local conf + for conf in /etc/pve/lxc/*.conf; do + [[ -f "$conf" ]] || continue + local matched=false + local t + for t in "${types[@]}"; do + _lxc_conf_uses_type "$conf" "$t" && matched=true && break + done + $matched || continue + + local ctid ct_name run onb + ctid=$(basename "$conf" .conf) + ct_name=$(pct config "$ctid" 2>/dev/null | awk '/^hostname:/ {print $2}') + [[ -z "$ct_name" ]] && ct_name="CT-${ctid}" + run=0; onb=0 + _ct_is_running "$ctid" && run=1 + _ct_onboot_enabled "$ctid" && onb=1 + + LXC_AFFECTED_CTIDS+=("$ctid") + LXC_AFFECTED_NAMES+=("$ct_name") + LXC_AFFECTED_RUNNING+=("$run") + LXC_AFFECTED_ONBOOT+=("$onb") + done +} + +prompt_lxc_action_for_vm_mode() { + [[ ${#LXC_AFFECTED_CTIDS[@]} -eq 0 ]] && return 0 + + local running_count=0 onboot_count=0 i + for i in "${!LXC_AFFECTED_CTIDS[@]}"; do + [[ "${LXC_AFFECTED_RUNNING[$i]}" == "1" ]] && running_count=$((running_count + 1)) + [[ "${LXC_AFFECTED_ONBOOT[$i]}" == "1" ]] && onboot_count=$((onboot_count + 1)) + done + + local msg choice + msg="\n$(translate 'The selected GPU(s) are used in these LXC container(s):')\n\n" + for i in "${!LXC_AFFECTED_CTIDS[@]}"; do + local st ob + st="$(translate 'stopped')"; ob="onboot=0" + [[ "${LXC_AFFECTED_RUNNING[$i]}" == "1" ]] && st="$(translate 'running')" + [[ "${LXC_AFFECTED_ONBOOT[$i]}" == "1" ]] && ob="onboot=1" + msg+=" • CT ${LXC_AFFECTED_CTIDS[$i]} (${LXC_AFFECTED_NAMES[$i]}) [${st}, ${ob}]\n" + done + msg+="\n$(translate 'Switching to GPU -> VM mode requires exclusive VFIO binding.')\n" + [[ "$running_count" -gt 0 ]] && msg+="\Z3$(translate 'Running containers detected'): ${running_count}\Zn\n" + [[ "$onboot_count" -gt 0 ]] && msg+="\Z1\Zb$(translate 'Start on boot enabled'): ${onboot_count}\Zn\n" + msg+="\n$(translate 'Choose conflict policy:')" + + choice=$(dialog --backtitle "ProxMenux" --colors \ + --title "$(translate 'LXC Conflict Policy')" \ + --default-item "2" \ + --menu "$msg" 24 80 8 \ + "1" "$(translate 'Keep GPU in LXC config (disable Start on boot)')" \ + "2" "$(translate 'Remove GPU from LXC config (keep Start on boot)')" \ + 2>&1 >/dev/tty) || exit 0 + + case "$choice" in + 1) LXC_ACTION="keep_gpu_disable_onboot" ;; + 2) LXC_ACTION="remove_gpu_keep_onboot" ;; + *) exit 0 ;; + esac +} + +_remove_type_from_lxc_conf() { + local conf="$1" + local gpu_type="$2" + case "$gpu_type" in + nvidia) + sed -i '/dev[0-9]\+:.*\/dev\/nvidia/d' "$conf" + ;; + amd) + sed -i '/dev[0-9]\+:.*\/dev\/dri/d' "$conf" + sed -i '/dev[0-9]\+:.*\/dev\/kfd/d' "$conf" + sed -i '/lxc\.mount\.entry:.*dev\/dri/d' "$conf" + sed -i '/lxc\.cgroup2\.devices\.allow:.*226/d' "$conf" + ;; + intel) + sed -i '/dev[0-9]\+:.*\/dev\/dri/d' "$conf" + sed -i '/lxc\.mount\.entry:.*dev\/dri/d' "$conf" + sed -i '/lxc\.cgroup2\.devices\.allow:.*226/d' "$conf" + ;; + esac +} + +apply_lxc_action_for_vm_mode() { + [[ ${#LXC_AFFECTED_CTIDS[@]} -eq 0 ]] && return 0 + local -a types=() + mapfile -t types < <(_selected_types_unique) + + local i + for i in "${!LXC_AFFECTED_CTIDS[@]}"; do + local ctid conf + ctid="${LXC_AFFECTED_CTIDS[$i]}" + conf="/etc/pve/lxc/${ctid}.conf" + + if [[ "${LXC_AFFECTED_RUNNING[$i]}" == "1" ]]; then + msg_info "$(translate 'Stopping LXC') ${ctid}..." + pct stop "$ctid" >>"$LOG_FILE" 2>&1 || true + msg_ok "$(translate 'LXC stopped') ${ctid}" | tee -a "$screen_capture" + fi + + if [[ "$LXC_ACTION" == "keep_gpu_disable_onboot" && "${LXC_AFFECTED_ONBOOT[$i]}" == "1" ]]; then + if pct set "$ctid" -onboot 0 >>"$LOG_FILE" 2>&1; then + msg_warn "$(translate 'Start on boot disabled for LXC') ${ctid}" | tee -a "$screen_capture" + fi + fi + + if [[ "$LXC_ACTION" == "remove_gpu_keep_onboot" && -f "$conf" ]]; then + local t + for t in "${types[@]}"; do + _remove_type_from_lxc_conf "$conf" "$t" + done + msg_ok "$(translate 'GPU access removed from LXC') ${ctid}" | tee -a "$screen_capture" + fi + done +} + +detect_affected_vms_for_selected() { + VM_AFFECTED_IDS=() + VM_AFFECTED_NAMES=() + VM_AFFECTED_RUNNING=() + VM_AFFECTED_ONBOOT=() + + local conf + for conf in /etc/pve/qemu-server/*.conf; do + [[ -f "$conf" ]] || continue + local matched=false slot + for slot in "${SELECTED_PCI_SLOTS[@]}"; do + if grep -qE "hostpci[0-9]+:.*(0000:)?${slot}(\\.[0-7])?([,[:space:]]|$)" "$conf"; then + matched=true + break + fi + done + $matched || continue + + local vmid vm_name run onb + vmid=$(basename "$conf" .conf) + vm_name=$(grep "^name:" "$conf" 2>/dev/null | awk '{print $2}') + [[ -z "$vm_name" ]] && vm_name="VM-${vmid}" + run=0; onb=0 + _vm_is_running "$vmid" && run=1 + _vm_onboot_enabled "$vmid" && onb=1 + + VM_AFFECTED_IDS+=("$vmid") + VM_AFFECTED_NAMES+=("$vm_name") + VM_AFFECTED_RUNNING+=("$run") + VM_AFFECTED_ONBOOT+=("$onb") + done +} + +prompt_vm_action_for_lxc_mode() { + [[ ${#VM_AFFECTED_IDS[@]} -eq 0 ]] && return 0 + + local running_count=0 onboot_count=0 i + for i in "${!VM_AFFECTED_IDS[@]}"; do + [[ "${VM_AFFECTED_RUNNING[$i]}" == "1" ]] && running_count=$((running_count + 1)) + [[ "${VM_AFFECTED_ONBOOT[$i]}" == "1" ]] && onboot_count=$((onboot_count + 1)) + done + + local msg choice + msg="\n$(translate 'The selected GPU(s) are configured in these VM(s):')\n\n" + for i in "${!VM_AFFECTED_IDS[@]}"; do + local st ob + st="$(translate 'stopped')"; ob="onboot=0" + [[ "${VM_AFFECTED_RUNNING[$i]}" == "1" ]] && st="$(translate 'running')" + [[ "${VM_AFFECTED_ONBOOT[$i]}" == "1" ]] && ob="onboot=1" + msg+=" • VM ${VM_AFFECTED_IDS[$i]} (${VM_AFFECTED_NAMES[$i]}) [${st}, ${ob}]\n" + done + msg+="\n$(translate 'Switching to GPU -> LXC mode removes VFIO exclusivity.')\n" + [[ "$running_count" -gt 0 ]] && msg+="\Z3$(translate 'Running VM detected'): ${running_count}\Zn\n" + [[ "$onboot_count" -gt 0 ]] && msg+="\Z1\Zb$(translate 'Start on boot enabled'): ${onboot_count}\Zn\n" + msg+="\n$(translate 'Choose conflict policy:')" + + choice=$(dialog --backtitle "ProxMenux" --colors \ + --title "$(translate 'VM Conflict Policy')" \ + --default-item "1" \ + --menu "$msg" 24 80 8 \ + "1" "$(translate 'Keep GPU in VM config (disable Start on boot)')" \ + "2" "$(translate 'Remove GPU from VM config (keep Start on boot)')" \ + 2>&1 >/dev/tty) || exit 0 + + case "$choice" in + 1) VM_ACTION="keep_gpu_disable_onboot" ;; + 2) VM_ACTION="remove_gpu_keep_onboot" ;; + *) exit 0 ;; + esac +} + +apply_vm_action_for_lxc_mode() { + [[ ${#VM_AFFECTED_IDS[@]} -eq 0 ]] && return 0 + + local i + for i in "${!VM_AFFECTED_IDS[@]}"; do + local vmid conf + vmid="${VM_AFFECTED_IDS[$i]}" + conf="/etc/pve/qemu-server/${vmid}.conf" + + if [[ "${VM_AFFECTED_RUNNING[$i]}" == "1" ]]; then + msg_info "$(translate 'Stopping VM') ${vmid}..." + qm stop "$vmid" >>"$LOG_FILE" 2>&1 || true + msg_ok "$(translate 'VM stopped') ${vmid}" | tee -a "$screen_capture" + fi + + if [[ "$VM_ACTION" == "keep_gpu_disable_onboot" && "${VM_AFFECTED_ONBOOT[$i]}" == "1" ]]; then + if qm set "$vmid" -onboot 0 >>"$LOG_FILE" 2>&1; then + msg_warn "$(translate 'Start on boot disabled for VM') ${vmid}" | tee -a "$screen_capture" + fi + fi + + if [[ "$VM_ACTION" == "remove_gpu_keep_onboot" && -f "$conf" ]]; then + local slot + for slot in "${SELECTED_PCI_SLOTS[@]}"; do + sed -i "/^hostpci[0-9]\+:.*${slot}/d" "$conf" + done + msg_ok "$(translate 'GPU removed from VM config') ${vmid}" | tee -a "$screen_capture" + fi + done +} + +switch_to_vm_mode() { + detect_affected_lxc_for_selected + prompt_lxc_action_for_vm_mode + + _set_title + collect_selected_iommu_ids + apply_lxc_action_for_vm_mode + + msg_info "$(translate 'Configuring host for GPU -> VM mode...')" + _add_vfio_modules + msg_ok "$(translate 'VFIO modules configured in /etc/modules')" | tee -a "$screen_capture" + _configure_iommu_options + msg_ok "$(translate 'IOMMU interrupt remapping configured')" | tee -a "$screen_capture" + + local -a current_ids=() + mapfile -t current_ids < <(_read_vfio_ids) + local id + for id in "${SELECTED_IOMMU_IDS[@]}"; do + _contains_in_array "$id" "${current_ids[@]}" || current_ids+=("$id") + done + _write_vfio_ids "${current_ids[@]}" + if [[ ${#SELECTED_IOMMU_IDS[@]} -gt 0 ]]; then + local ids_label + ids_label=$(IFS=','; echo "${SELECTED_IOMMU_IDS[*]}") + msg_ok "$(translate 'vfio-pci IDs configured') (${ids_label})" | tee -a "$screen_capture" + fi + + local -a selected_types=() + mapfile -t selected_types < <(_selected_types_unique) + local t + for t in "${selected_types[@]}"; do + _add_gpu_blacklist "$t" + done + msg_ok "$(translate 'GPU host driver blacklisted in /etc/modprobe.d/blacklist.conf')" | tee -a "$screen_capture" + _contains_in_array "amd" "${selected_types[@]}" && _add_amd_softdep + + if [[ "$HOST_CONFIG_CHANGED" == "true" ]]; then + msg_info "$(translate 'Updating initramfs (this may take a minute)...')" + update-initramfs -u -k all >>"$LOG_FILE" 2>&1 + msg_ok "$(translate 'initramfs updated')" | tee -a "$screen_capture" + fi + + if declare -F sync_proxmenux_gpu_guard_hooks >/dev/null 2>&1; then + sync_proxmenux_gpu_guard_hooks + fi +} + +_type_has_remaining_vfio_ids() { + local gpu_type="$1" + local -a remaining_ids=("$@") + remaining_ids=("${remaining_ids[@]:1}") + local idx viddid + for idx in "${!ALL_GPU_TYPES[@]}"; do + [[ "${ALL_GPU_TYPES[$idx]}" != "$gpu_type" ]] && continue + viddid="${ALL_GPU_VIDDID[$idx]}" + _contains_in_array "$viddid" "${remaining_ids[@]}" && return 0 + done + return 1 +} + +switch_to_lxc_mode() { + collect_selected_iommu_ids + detect_affected_vms_for_selected + prompt_vm_action_for_lxc_mode + + _set_title + apply_vm_action_for_lxc_mode + + msg_info "$(translate 'Removing VFIO ownership for selected GPU(s)...')" + + local -a current_ids=() remaining_ids=() removed_ids=() + mapfile -t current_ids < <(_read_vfio_ids) + local id remove + for id in "${current_ids[@]}"; do + remove=false + _contains_in_array "$id" "${SELECTED_IOMMU_IDS[@]}" && remove=true + if $remove; then + removed_ids+=("$id") + else + remaining_ids+=("$id") + fi + done + _write_vfio_ids "${remaining_ids[@]}" + if [[ ${#removed_ids[@]} -gt 0 ]]; then + local ids_label + ids_label=$(IFS=','; echo "${removed_ids[*]}") + msg_ok "$(translate 'VFIO device IDs removed from /etc/modprobe.d/vfio.conf') (${ids_label})" | tee -a "$screen_capture" + fi + + local -a selected_types=() + mapfile -t selected_types < <(_selected_types_unique) + local t + for t in "${selected_types[@]}"; do + if ! _type_has_remaining_vfio_ids "$t" "${remaining_ids[@]}"; then + if _remove_gpu_blacklist "$t"; then + msg_ok "$(translate 'Driver blacklist removed for') ${t}" | tee -a "$screen_capture" + fi + fi + done + + if ! _type_has_remaining_vfio_ids "amd" "${remaining_ids[@]}"; then + _remove_amd_softdep || true + fi + + if _remove_vfio_modules_if_unused; then + msg_ok "$(translate 'VFIO modules removed from /etc/modules')" | tee -a "$screen_capture" + fi + + if [[ "$HOST_CONFIG_CHANGED" == "true" ]]; then + msg_info "$(translate 'Updating initramfs (this may take a minute)...')" + update-initramfs -u -k all >>"$LOG_FILE" 2>&1 + msg_ok "$(translate 'initramfs updated')" | tee -a "$screen_capture" + fi + + if declare -F sync_proxmenux_gpu_guard_hooks >/dev/null 2>&1; then + sync_proxmenux_gpu_guard_hooks + fi +} + +confirm_plan() { + local msg mode_line + if [[ "$TARGET_MODE" == "vm" ]]; then + mode_line="$(translate 'Target mode'): GPU -> VM (VFIO)" + else + mode_line="$(translate 'Target mode'): GPU -> LXC (native driver)" + fi + + msg="\n${mode_line}\n\n$(translate 'Selected GPU(s)'):\n" + local idx + for idx in "${SELECTED_GPU_IDX[@]}"; do + msg+=" • ${ALL_GPU_NAMES[$idx]} (${ALL_GPU_PCIS[$idx]}) [${ALL_GPU_DRIVERS[$idx]}]\n" + done + msg+="\n$(translate 'Do you want to proceed?')" + + dialog --backtitle "ProxMenux" --colors \ + --title "$(translate 'Confirm GPU Switch Mode')" \ + --yesno "$msg" 18 88 + [[ $? -ne 0 ]] && exit 0 +} + +final_summary() { + _set_title + cat "$screen_capture" + echo + echo -e "${TAB}${BL}Log: ${LOG_FILE}${CL}" + + if [[ "$HOST_CONFIG_CHANGED" == "true" ]]; then + echo -e "${TAB}${DGN}- $(translate 'Host GPU binding changed — reboot required.')${CL}" + whiptail --title "$(translate 'Reboot Required')" \ + --yesno "$(translate 'A reboot is required to apply the new GPU mode. Do you want to restart now?')" 10 74 + if [[ $? -eq 0 ]]; then + msg_warn "$(translate 'Rebooting the system...')" + reboot + else + msg_info2 "$(translate 'Please reboot manually to complete the switch.')" + msg_success "$(translate 'Press Enter to continue...')" + read -r + fi + else + echo -e "${TAB}${DGN}- $(translate 'No host VFIO/native binding changes were required.')${CL}" + msg_success "$(translate 'Press Enter to continue...')" + read -r + fi +} + +main() { + : >"$LOG_FILE" + : >"$screen_capture" + + detect_host_gpus + while true; do + select_gpus + select_target_mode + [[ $? -eq 2 ]] && continue + validate_vm_mode_blocked_ids + case $? in + 2) continue ;; + 1) exit 0 ;; + esac + break + done + confirm_plan + + clear + _set_title + echo + + if [[ "$TARGET_MODE" == "vm" ]]; then + switch_to_vm_mode + msg_success "$(translate 'GPU switch complete: VM mode prepared.')" + else + switch_to_lxc_mode + msg_success "$(translate 'GPU switch complete: LXC mode prepared.')" + fi + + final_summary + rm -f "$screen_capture" +} + +main "$@" diff --git a/scripts/menus/hw_grafics_menu.sh b/scripts/menus/hw_grafics_menu.sh index f687eefc..b0b9976c 100644 --- a/scripts/menus/hw_grafics_menu.sh +++ b/scripts/menus/hw_grafics_menu.sh @@ -38,6 +38,9 @@ while true; do "" "\Z4──────────────────────── VM ───────────────────────────\Zn" \ "6" "$(translate "Add GPU to VM (Intel | AMD | NVIDIA)") \Zb\Z4Switch Mode\Zn" \ "" "" \ + "" "\Z4──────────────────── SWICHT MODE ───────────────────────\Zn" \ + "7" "$(translate "Switch GPU Mode (VM <-> LXC)")" \ + "" "" \ "0" "$(translate "Return to Main Menu")" \ 2>&1 >/dev/tty ) || { exec bash "$LOCAL_SCRIPTS/menus/main_menu.sh"; } @@ -61,6 +64,9 @@ while true; do 6) bash "$LOCAL_SCRIPTS/gpu_tpu/add_gpu_vm.sh" ;; + 7) + bash "$LOCAL_SCRIPTS/gpu_tpu/switch_gpu_mode.sh" + ;; 0) exec bash "$LOCAL_SCRIPTS/menus/main_menu.sh" ;; diff --git a/scripts/menus/storage_menu.sh b/scripts/menus/storage_menu.sh index c1a54cb3..39339a27 100644 --- a/scripts/menus/storage_menu.sh +++ b/scripts/menus/storage_menu.sh @@ -1,50 +1,62 @@ #!/bin/bash - # ========================================================== -# ProxMenux - A menu-driven script for Proxmox VE management +# ProxMenux - Storage Menu # ========================================================== # Author : MacRimi # Copyright : (c) 2024 MacRimi -# License : (GPL-3.0) (https://github.com/MacRimi/ProxMenux/blob/main/LICENSE) -# Version : 1.1 -# Last Updated: 15/04/2025 +# License : GPL-3.0 +# Version : 2.0 +# Last Updated: 06/04/2026 # ========================================================== - -# Configuration ============================================ -LOCAL_SCRIPTS="/usr/local/share/proxmenux/scripts" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LOCAL_SCRIPTS_LOCAL="$(cd "$SCRIPT_DIR/.." && pwd)" +LOCAL_SCRIPTS_DEFAULT="/usr/local/share/proxmenux/scripts" +LOCAL_SCRIPTS="$LOCAL_SCRIPTS_DEFAULT" BASE_DIR="/usr/local/share/proxmenux" -UTILS_FILE="$BASE_DIR/utils.sh" -VENV_PATH="/opt/googletrans-env" +UTILS_FILE="$LOCAL_SCRIPTS/utils.sh" +if [[ -f "$LOCAL_SCRIPTS_LOCAL/utils.sh" ]]; then + LOCAL_SCRIPTS="$LOCAL_SCRIPTS_LOCAL" + UTILS_FILE="$LOCAL_SCRIPTS/utils.sh" +elif [[ ! -f "$UTILS_FILE" ]]; then + UTILS_FILE="$BASE_DIR/utils.sh" +fi if [[ -f "$UTILS_FILE" ]]; then source "$UTILS_FILE" fi load_language initialize_cache -# ========================================================== - while true; do - OPTION=$(dialog --clear --backtitle "ProxMenux" --title "$(translate "Disk and Storage Manager Menu")" \ - --menu "\n$(translate "Select an option:")" 20 70 10 \ - "1" "$(translate "Add Disk") Passthrough $(translate "to a VM")" \ - "2" "$(translate "Add Disk") Passthrough $(translate "to a LXC")" \ - "3" "$(translate "Import Disk Image to a VM")" \ - "4" "$(translate "Return to Main Menu")" \ - 2>&1 >/dev/tty) + OPTION=$(dialog --colors --backtitle "ProxMenux" \ + --title "$(translate "Disk and Storage Manager Menu")" \ + --menu "\n$(translate "Select an option:")" 24 84 14 \ + "" "\Z4──────────────────────── VM ───────────────────────────\Zn" \ + "1" "$(translate "Import Disk to VM")" \ + "2" "$(translate "Import Disk Image to VM")" \ + "3" "$(translate "Add Controller or NVMe PCIe to VM")" \ + "" "\Z4──────────────────────── LXC ──────────────────────────\Zn" \ + "4" "$(translate "Import Disk to LXC")" \ + "" "" \ + "0" "$(translate "Return to Main Menu")" \ + 2>&1 >/dev/tty + ) || { exec bash "$LOCAL_SCRIPTS/menus/main_menu.sh"; } - case $OPTION in - 1) + case "$OPTION" in + 1) bash "$LOCAL_SCRIPTS/storage/disk-passthrough.sh" ;; 2) - bash "$LOCAL_SCRIPTS/storage/disk-passthrough_ct.sh" - ;; - 3) bash "$LOCAL_SCRIPTS/storage/import-disk-image.sh" ;; + 3) + bash "$LOCAL_SCRIPTS/storage/add_controller_nvme_vm.sh" + ;; 4) + bash "$LOCAL_SCRIPTS/storage/disk-passthrough_ct.sh" + ;; + 0) exec bash "$LOCAL_SCRIPTS/menus/main_menu.sh" ;; *) diff --git a/scripts/storage/add_controller_nvme_vm.sh b/scripts/storage/add_controller_nvme_vm.sh new file mode 100755 index 00000000..bb899f75 --- /dev/null +++ b/scripts/storage/add_controller_nvme_vm.sh @@ -0,0 +1,401 @@ +#!/bin/bash +# ========================================================== +# ProxMenux - Add Controller or NVMe PCIe to VM +# ========================================================== +# Author : MacRimi +# Copyright : (c) 2024 MacRimi +# License : GPL-3.0 +# Version : 1.0 +# Last Updated: 06/04/2026 +# ========================================================== + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +LOCAL_SCRIPTS_LOCAL="$(cd "$SCRIPT_DIR/.." && pwd)" +LOCAL_SCRIPTS_DEFAULT="/usr/local/share/proxmenux/scripts" +LOCAL_SCRIPTS="$LOCAL_SCRIPTS_DEFAULT" +BASE_DIR="/usr/local/share/proxmenux" +UTILS_FILE="$LOCAL_SCRIPTS/utils.sh" +if [[ -f "$LOCAL_SCRIPTS_LOCAL/utils.sh" ]]; then + LOCAL_SCRIPTS="$LOCAL_SCRIPTS_LOCAL" + UTILS_FILE="$LOCAL_SCRIPTS/utils.sh" +elif [[ ! -f "$UTILS_FILE" ]]; then + UTILS_FILE="$BASE_DIR/utils.sh" +fi + +LOG_FILE="/tmp/proxmenux_add_controller_nvme_vm.log" +screen_capture="/tmp/proxmenux_add_controller_nvme_vm_screen_$$.txt" + +if [[ -f "$UTILS_FILE" ]]; then + source "$UTILS_FILE" +fi +if [[ -f "$LOCAL_SCRIPTS_LOCAL/global/vm_storage_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_LOCAL/global/vm_storage_helpers.sh" +elif [[ -f "$LOCAL_SCRIPTS_DEFAULT/global/vm_storage_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_DEFAULT/global/vm_storage_helpers.sh" +fi +if [[ -f "$LOCAL_SCRIPTS_LOCAL/global/pci_passthrough_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_LOCAL/global/pci_passthrough_helpers.sh" +elif [[ -f "$LOCAL_SCRIPTS_DEFAULT/global/pci_passthrough_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_DEFAULT/global/pci_passthrough_helpers.sh" +fi +if [[ -f "$LOCAL_SCRIPTS_LOCAL/global/gpu_hook_guard_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_LOCAL/global/gpu_hook_guard_helpers.sh" +elif [[ -f "$LOCAL_SCRIPTS_DEFAULT/global/gpu_hook_guard_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_DEFAULT/global/gpu_hook_guard_helpers.sh" +fi + +load_language +initialize_cache + +SELECTED_VMID="" +SELECTED_VM_NAME="" +declare -a SELECTED_CONTROLLER_PCIS=() + +set_title() { + show_proxmenux_logo + msg_title "$(translate "Add Controller or NVMe PCIe to VM")" +} + +select_target_vm() { + local -a vm_menu=() + local line vmid vmname vmstatus vm_machine status_label + + while IFS= read -r line; do + vmid=$(awk '{print $1}' <<< "$line") + vmname=$(awk '{print $2}' <<< "$line") + vmstatus=$(awk '{print $3}' <<< "$line") + [[ -z "$vmid" || "$vmid" == "VMID" ]] && continue + + vm_machine=$(qm config "$vmid" 2>/dev/null | awk -F': ' '/^machine:/ {print $2}') + [[ -z "$vm_machine" ]] && vm_machine="unknown" + status_label="${vmstatus}, ${vm_machine}" + vm_menu+=("$vmid" "${vmname} [${status_label}]") + done < <(qm list 2>/dev/null) + + if [[ ${#vm_menu[@]} -eq 0 ]]; then + dialog --backtitle "ProxMenux" \ + --title "$(translate "Add Controller or NVMe PCIe to VM")" \ + --msgbox "\n$(translate "No VMs available on this host.")" 8 64 + return 1 + fi + + SELECTED_VMID=$(dialog --backtitle "ProxMenux" \ + --title "$(translate "Select VM")" \ + --menu "\n$(translate "Select the target VM for PCI passthrough:")" 20 82 12 \ + "${vm_menu[@]}" \ + 2>&1 >/dev/tty) || return 1 + + SELECTED_VM_NAME=$(qm config "$SELECTED_VMID" 2>/dev/null | awk '/^name:/ {print $2}') + [[ -z "$SELECTED_VM_NAME" ]] && SELECTED_VM_NAME="VM-${SELECTED_VMID}" + return 0 +} + +validate_vm_requirements() { + local status + status=$(qm status "$SELECTED_VMID" 2>/dev/null | awk '{print $2}') + if [[ "$status" == "running" ]]; then + dialog --backtitle "ProxMenux" \ + --title "$(translate "VM Must Be Stopped")" \ + --msgbox "\n$(translate "The selected VM is running.")\n\n$(translate "Stop it first and run this option again.")" 10 72 + return 1 + fi + + if ! _vm_is_q35 "$SELECTED_VMID"; then + dialog --backtitle "ProxMenux" --colors \ + --title "$(translate "Incompatible Machine Type")" \ + --msgbox "\n\Zb\Z1$(translate "Controller/NVMe passthrough requires machine type q35.")\Zn\n\n$(translate "Selected VM"): ${SELECTED_VM_NAME} (${SELECTED_VMID})\n\n$(translate "Edit the VM machine type to q35 and try again.")" 12 80 + return 1 + fi + + if declare -F _pci_is_iommu_active >/dev/null 2>&1; then + if ! _pci_is_iommu_active; then + dialog --backtitle "ProxMenux" --colors \ + --title "$(translate "IOMMU Required")" \ + --msgbox "\n\Zb\Z1$(translate "IOMMU is not active on this host.")\Zn\n\n$(translate "PCIe passthrough requires IOMMU enabled in kernel and firmware.")\n\n$(translate "Enable IOMMU, reboot the host, and run again.")" 12 80 + return 1 + fi + fi + + return 0 +} + +select_controller_nvme() { + _refresh_host_storage_cache + + local -a menu_items=() + local blocked_report="" + local pci_path pci_full class_hex name controller_desc disk state slot_base + local -a controller_disks=() + local safe_count=0 blocked_count=0 hidden_target_count=0 + + while IFS= read -r pci_path; do + pci_full=$(basename "$pci_path") + class_hex=$(cat "$pci_path/class" 2>/dev/null | sed 's/^0x//') + [[ -z "$class_hex" ]] && continue + [[ "${class_hex:0:2}" != "01" ]] && continue + slot_base=$(_pci_slot_base "$pci_full") + + # Already attached to target VM: hide from selection. + if _vm_has_pci_slot "$SELECTED_VMID" "$slot_base"; then + hidden_target_count=$((hidden_target_count + 1)) + continue + fi + + name=$(lspci -nn -s "${pci_full#0000:}" 2>/dev/null | sed 's/^[^ ]* //') + [[ -z "$name" ]] && name="$(translate "Unknown storage controller")" + + controller_disks=() + while IFS= read -r disk; do + [[ -z "$disk" ]] && continue + _array_contains "$disk" "${controller_disks[@]}" || controller_disks+=("$disk") + done < <(_controller_block_devices "$pci_full") + + local -a blocked_reasons=() + for disk in "${controller_disks[@]}"; do + if _disk_is_host_system_used "$disk"; then + blocked_reasons+=("${disk} (${DISK_USAGE_REASON})") + elif _disk_used_in_guest_configs "$disk"; then + blocked_reasons+=("${disk} ($(translate "In use by VM/LXC config"))") + fi + done + + if [[ ${#blocked_reasons[@]} -gt 0 ]]; then + blocked_count=$((blocked_count + 1)) + blocked_report+="------------------------------------------------------------\n" + blocked_report+="PCI: ${pci_full}\n" + blocked_report+="Name: ${name}\n" + blocked_report+="$(translate "Blocked because protected or in-use disks are attached"):\n" + local reason + for reason in "${blocked_reasons[@]}"; do + blocked_report+=" - ${reason}\n" + done + blocked_report+="\n" + continue + fi + + local short_name + short_name=$(_shorten_text "$name" 42) + + local assigned_suffix="" + if [[ -n "$(_pci_assigned_vm_ids "$pci_full" "$SELECTED_VMID" 2>/dev/null | head -1)" ]]; then + assigned_suffix=" | $(translate "Assigned to VM")" + fi + + if [[ ${#controller_disks[@]} -gt 0 ]]; then + controller_desc="$(printf "%-42s [%s: %d]" "$short_name" "$(translate "attached disks")" "${#controller_disks[@]}")" + else + controller_desc="$(printf "%-42s [%s]" "$short_name" "$(translate "No attached disks")")" + fi + controller_desc+="${assigned_suffix}" + + state="off" + menu_items+=("$pci_full" "$controller_desc" "$state") + safe_count=$((safe_count + 1)) + done < <(ls -d /sys/bus/pci/devices/* 2>/dev/null | sort) + + if [[ "$safe_count" -eq 0 ]]; then + local msg + if [[ "$hidden_target_count" -gt 0 && "$blocked_count" -eq 0 ]]; then + msg="$(translate "All detected controllers/NVMe are already present in the selected VM.")\n\n$(translate "No additional device needs to be added.")" + else + msg="$(translate "No safe controllers/NVMe devices are available for passthrough.")\n\n" + fi + if [[ "$blocked_count" -gt 0 ]]; then + msg+="$(translate "Detected controllers blocked for safety:")\n\n${blocked_report}" + fi + dialog --backtitle "ProxMenux" \ + --title "$(translate "Controller + NVMe")" \ + --msgbox "$msg" 22 100 + return 1 + fi + + if [[ "$blocked_count" -gt 0 ]]; then + dialog --backtitle "ProxMenux" \ + --title "$(translate "Controller + NVMe")" \ + --msgbox "$(translate "Some controllers were hidden because they have host system disks attached.")\n\n${blocked_report}" 22 100 + fi + + local raw selected + raw=$(dialog --backtitle "ProxMenux" \ + --title "$(translate "Controller + NVMe")" \ + --checklist "\n$(translate "Select controllers/NVMe to passthrough (safe devices only):")\n\n$(translate "Only safe devices are shown in this list.")" 20 96 12 \ + "${menu_items[@]}" \ + 2>&1 >/dev/tty) || return 1 + + selected=$(echo "$raw" | tr -d '"') + SELECTED_CONTROLLER_PCIS=() + local pci + for pci in $selected; do + _array_contains "$pci" "${SELECTED_CONTROLLER_PCIS[@]}" || SELECTED_CONTROLLER_PCIS+=("$pci") + done + + if [[ ${#SELECTED_CONTROLLER_PCIS[@]} -eq 0 ]]; then + dialog --backtitle "ProxMenux" \ + --title "$(translate "Controller + NVMe")" \ + --msgbox "\n$(translate "No controller/NVMe selected.")" 8 62 + return 1 + fi + + return 0 +} + +confirm_summary() { + local msg + msg="\n$(translate "The following devices will be added to VM") ${SELECTED_VMID} (${SELECTED_VM_NAME}):\n\n" + local pci info + for pci in "${SELECTED_CONTROLLER_PCIS[@]}"; do + info=$(lspci -nn -s "${pci#0000:}" 2>/dev/null | sed 's/^[^ ]* //') + msg+=" - ${pci}${info:+ (${info})}\n" + done + msg+="\n$(translate "Do you want to continue?")" + + dialog --backtitle "ProxMenux" --colors \ + --title "$(translate "Confirm Controller + NVMe Assignment")" \ + --yesno "$msg" 18 90 + [[ $? -ne 0 ]] && return 1 + return 0 +} + +prompt_controller_conflict_policy() { + local pci="$1" + shift + local -a source_vms=("$@") + local msg vmid vm_name st ob + msg="$(translate "Selected device is already assigned to other VM(s):")\n\n" + for vmid in "${source_vms[@]}"; do + vm_name=$(_vm_name_by_id "$vmid") + st="stopped"; _vm_status_is_running "$vmid" && st="running" + ob="0"; _vm_onboot_is_enabled "$vmid" && ob="1" + msg+=" - VM ${vmid} (${vm_name}) [${st}, onboot=${ob}]\n" + done + msg+="\n$(translate "Choose action for this controller/NVMe:")" + + local choice + choice=$(whiptail --title "$(translate "Controller/NVMe Conflict Policy")" --menu "$msg" 22 96 10 \ + "1" "$(translate "Keep in source VM(s) + disable onboot + add to target VM")" \ + "2" "$(translate "Move to target VM (remove from source VM config)")" \ + "3" "$(translate "Skip this device")" \ + 3>&1 1>&2 2>&3) || { echo "skip"; return; } + + case "$choice" in + 1) echo "keep_disable_onboot" ;; + 2) echo "move_remove_source" ;; + *) echo "skip" ;; + esac +} + +apply_assignment() { + : >"$LOG_FILE" + set_title + echo + + msg_info "$(translate "Applying Controller/NVMe passthrough to VM") ${SELECTED_VMID}..." + msg_ok "$(translate "Target VM validated") (${SELECTED_VM_NAME} / ${SELECTED_VMID})" + msg_ok "$(translate "Selected devices"): ${#SELECTED_CONTROLLER_PCIS[@]}" + + local hostpci_idx=0 + msg_info "$(translate "Calculating next available hostpci slot...")" + if declare -F _pci_next_hostpci_index >/dev/null 2>&1; then + hostpci_idx=$(_pci_next_hostpci_index "$SELECTED_VMID" 2>/dev/null || echo 0) + else + local hostpci_existing + hostpci_existing=$(qm config "$SELECTED_VMID" 2>/dev/null) + while grep -q "^hostpci${hostpci_idx}:" <<< "$hostpci_existing"; do + hostpci_idx=$((hostpci_idx + 1)) + done + fi + msg_ok "$(translate "Next available hostpci slot"): hostpci${hostpci_idx}" + + local pci bdf assigned_count=0 + local need_hook_sync=false + for pci in "${SELECTED_CONTROLLER_PCIS[@]}"; do + bdf="${pci#0000:}" + if declare -F _pci_function_assigned_to_vm >/dev/null 2>&1; then + if _pci_function_assigned_to_vm "$pci" "$SELECTED_VMID"; then + msg_warn "$(translate "Controller/NVMe already present in VM config") ($pci)" + continue + fi + elif qm config "$SELECTED_VMID" 2>/dev/null | grep -qE "^hostpci[0-9]+:.*(0000:)?${bdf}([,[:space:]]|$)"; then + msg_warn "$(translate "Controller/NVMe already present in VM config") ($pci)" + continue + fi + + local -a source_vms=() + mapfile -t source_vms < <(_pci_assigned_vm_ids "$pci" "$SELECTED_VMID" 2>/dev/null) + if [[ ${#source_vms[@]} -gt 0 ]]; then + local has_running=false vmid action slot_base + for vmid in "${source_vms[@]}"; do + if _vm_status_is_running "$vmid"; then + has_running=true + msg_warn "$(translate "Controller/NVMe is in use by running VM") ${vmid} ($(translate "stop source VM first"))" + fi + done + + if $has_running; then + continue + fi + + action=$(prompt_controller_conflict_policy "$pci" "${source_vms[@]}") + case "$action" in + keep_disable_onboot) + for vmid in "${source_vms[@]}"; do + if _vm_onboot_is_enabled "$vmid"; then + if qm set "$vmid" -onboot 0 >>"$LOG_FILE" 2>&1; then + msg_warn "$(translate "Start on boot disabled for VM") ${vmid}" + fi + fi + done + need_hook_sync=true + ;; + move_remove_source) + slot_base=$(_pci_slot_base "$pci") + for vmid in "${source_vms[@]}"; do + if _remove_pci_slot_from_vm_config "$vmid" "$slot_base"; then + msg_ok "$(translate "Controller/NVMe removed from source VM") ${vmid} (${pci})" + fi + done + ;; + *) + msg_info2 "$(translate "Skipped device"): ${pci}" + continue + ;; + esac + fi + + if qm set "$SELECTED_VMID" --hostpci${hostpci_idx} "${pci},pcie=1" >>"$LOG_FILE" 2>&1; then + msg_ok "$(translate "Controller/NVMe assigned") (hostpci${hostpci_idx} -> ${pci})" + assigned_count=$((assigned_count + 1)) + hostpci_idx=$((hostpci_idx + 1)) + else + msg_error "$(translate "Failed to assign Controller/NVMe") (${pci})" + fi + done + + if $need_hook_sync && declare -F sync_proxmenux_gpu_guard_hooks >/dev/null 2>&1; then + ensure_proxmenux_gpu_guard_hookscript + sync_proxmenux_gpu_guard_hooks + msg_ok "$(translate "VM hook guard synced for shared controller/NVMe protection")" + fi + + echo + echo -e "${TAB}${BL}Log: ${LOG_FILE}${CL}" + + if [[ "$assigned_count" -gt 0 ]]; then + msg_success "$(translate "Completed. Controller/NVMe passthrough configured for VM") ${SELECTED_VMID}." + else + msg_warn "$(translate "No new Controller/NVMe entries were added.")" + fi + msg_success "$(translate "Press Enter to continue...")" + read -r +} + +main() { + select_target_vm || exit 0 + validate_vm_requirements || exit 0 + select_controller_nvme || exit 0 + confirm_summary || exit 0 + clear + apply_assignment +} + +main "$@" diff --git a/scripts/storage/disk-passthrough.sh b/scripts/storage/disk-passthrough.sh index 12224028..78b6c59f 100644 --- a/scripts/storage/disk-passthrough.sh +++ b/scripts/storage/disk-passthrough.sh @@ -66,6 +66,11 @@ fi VMID=$(echo "$VMID" | tr -d '"') +clear +show_proxmenux_logo +echo -e +msg_title "$(translate "Import Disk to VM")" +echo -e msg_ok "$(translate "VM selected successfully.")" diff --git a/scripts/storage/disk-passthrough_ct.sh b/scripts/storage/disk-passthrough_ct.sh index 16cd3652..d73e3d0d 100644 --- a/scripts/storage/disk-passthrough_ct.sh +++ b/scripts/storage/disk-passthrough_ct.sh @@ -120,7 +120,7 @@ CTID=$(echo "$CTID" | tr -d '"') clear show_proxmenux_logo echo -e -msg_title "$(translate "Add Disk") Passthrough $(translate "to a LXC")" +msg_title "$(translate "Import Disk to LXC")" echo -e msg_ok "$(translate "CT selected successfully.")" diff --git a/scripts/vm/disk_selector.sh b/scripts/vm/disk_selector.sh index ae0ecc30..68daa72d 100644 --- a/scripts/vm/disk_selector.sh +++ b/scripts/vm/disk_selector.sh @@ -271,15 +271,24 @@ function select_controller_nvme() { local menu_items=() local blocked_report="" - local pci_path pci_full class_hex name controller_disks controller_desc disk reason safe_count blocked_count state + local pci_path pci_full class_hex name controller_disks controller_desc disk safe_count blocked_count state slot_base hidden_target_count safe_count=0 blocked_count=0 + hidden_target_count=0 + local target_vmid="${VMID:-}" while IFS= read -r pci_path; do pci_full=$(basename "$pci_path") class_hex=$(cat "$pci_path/class" 2>/dev/null | sed 's/^0x//') [[ -z "$class_hex" ]] && continue [[ "${class_hex:0:2}" != "01" ]] && continue + slot_base=$(_pci_slot_base "$pci_full") + + # If target VM already has this slot assigned, hide it. + if [[ -n "$target_vmid" ]] && _vm_has_pci_slot "$target_vmid" "$slot_base"; then + hidden_target_count=$((hidden_target_count + 1)) + continue + fi name=$(lspci -nn -s "${pci_full#0000:}" 2>/dev/null | sed 's/^[^ ]* //') [[ -z "$name" ]] && name="$(translate "Unknown storage controller")" @@ -290,27 +299,44 @@ function select_controller_nvme() { _array_contains "$disk" "${controller_disks[@]}" || controller_disks+=("$disk") done < <(_controller_block_devices "$pci_full") - reason="" + local -a blocked_reasons=() for disk in "${controller_disks[@]}"; do if _disk_is_host_system_used "$disk"; then - reason+="${disk} (${DISK_USAGE_REASON}); " + blocked_reasons+=("${disk} (${DISK_USAGE_REASON})") elif _disk_used_in_guest_configs "$disk"; then - reason+="${disk} ($(translate "In use by VM/LXC config")); " + blocked_reasons+=("${disk} ($(translate "In use by VM/LXC config"))") fi done - if [[ -n "$reason" ]]; then + if [[ ${#blocked_reasons[@]} -gt 0 ]]; then blocked_count=$((blocked_count + 1)) - blocked_report+=" • ${pci_full} — ${name}\n $(translate "Blocked because protected/in-use disks are attached"): ${reason}\n" + blocked_report+="------------------------------------------------------------\n" + blocked_report+="PCI: ${pci_full}\n" + blocked_report+="Name: ${name}\n" + blocked_report+="$(translate "Blocked because protected/in-use disks are attached"):\n" + local reason + for reason in "${blocked_reasons[@]}"; do + blocked_report+=" - ${reason}\n" + done + blocked_report+="\n" continue fi - if [[ ${#controller_disks[@]} -gt 0 ]]; then - controller_desc="$(printf "%-50s [%s]" "$name" "$(IFS=,; echo "${controller_disks[*]}")")" - else - controller_desc="$(printf "%-50s [%s]" "$name" "$(translate "No attached disks detected")")" + local short_name + short_name=$(_shorten_text "$name" 42) + + local assigned_suffix="" + if [[ -n "$(_pci_assigned_vm_ids "$pci_full" "$target_vmid" 2>/dev/null | head -1)" ]]; then + assigned_suffix=" | $(translate "Assigned to VM")" fi + if [[ ${#controller_disks[@]} -gt 0 ]]; then + controller_desc="$(printf "%-42s [%s: %d]" "$short_name" "$(translate "attached disks")" "${#controller_disks[@]}")" + else + controller_desc="$(printf "%-42s [%s]" "$short_name" "$(translate "No attached disks")")" + fi + controller_desc+="${assigned_suffix}" + if _array_contains "$pci_full" "${CONTROLLER_NVME_PCIS[@]}"; then state="ON" else @@ -324,21 +350,25 @@ function select_controller_nvme() { stop_spinner if [[ $safe_count -eq 0 ]]; then local msg - msg="$(translate "No safe controllers/NVMe devices are available for passthrough.")\n\n" + if [[ "$hidden_target_count" -gt 0 && "$blocked_count" -eq 0 ]]; then + msg="$(translate "All detected controllers/NVMe are already present in the selected VM.")\n\n$(translate "No additional device needs to be added.")" + else + msg="$(translate "No safe controllers/NVMe devices are available for passthrough.")\n\n" + fi if [[ $blocked_count -gt 0 ]]; then msg+="$(translate "Detected controllers blocked for safety:")\n\n${blocked_report}" fi - whiptail --title "Controller + NVMe" --msgbox "$msg" 20 90 + whiptail --title "Controller + NVMe" --msgbox "$msg" 22 100 return 1 fi if [[ $blocked_count -gt 0 ]]; then - whiptail --title "Controller + NVMe" --msgbox "$(translate "Some controllers were hidden because they have host system disks attached.")\n\n${blocked_report}" 20 90 + whiptail --title "Controller + NVMe" --msgbox "$(translate "Some controllers were hidden because they have host system disks attached.")\n\n${blocked_report}" 22 100 fi local selected selected=$(whiptail --title "Controller + NVMe" --checklist \ - "$(translate "Select controllers/NVMe to passthrough (safe devices only):")" 20 90 10 \ + "$(translate "Select controllers/NVMe to passthrough (safe devices only):")\n\n$(translate "Only safe devices are shown in this list.")" 20 96 10 \ "${menu_items[@]}" 3>&1 1>&2 2>&3) [[ $? -ne 0 ]] && return 1 diff --git a/scripts/vm/synology.sh b/scripts/vm/synology.sh index 3d0bd0db..75f57983 100644 --- a/scripts/vm/synology.sh +++ b/scripts/vm/synology.sh @@ -56,6 +56,11 @@ if [[ -f "$LOCAL_SCRIPTS_LOCAL/global/pci_passthrough_helpers.sh" ]]; then elif [[ -f "$LOCAL_SCRIPTS_DEFAULT/global/pci_passthrough_helpers.sh" ]]; then source "$LOCAL_SCRIPTS_DEFAULT/global/pci_passthrough_helpers.sh" fi +if [[ -f "$LOCAL_SCRIPTS_LOCAL/global/gpu_hook_guard_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_LOCAL/global/gpu_hook_guard_helpers.sh" +elif [[ -f "$LOCAL_SCRIPTS_DEFAULT/global/gpu_hook_guard_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_DEFAULT/global/gpu_hook_guard_helpers.sh" +fi load_language initialize_cache # ========================================================== @@ -618,13 +623,20 @@ function select_controller_nvme() { local menu_items=() local blocked_report="" - local safe_count=0 blocked_count=0 - local pci_path pci_full class_hex name controller_disks disk reason state controller_desc + local safe_count=0 blocked_count=0 hidden_target_count=0 + local pci_path pci_full class_hex name controller_disks disk state controller_desc slot_base + local target_vmid="${VMID:-}" while IFS= read -r pci_path; do pci_full=$(basename "$pci_path") class_hex=$(cat "$pci_path/class" 2>/dev/null | sed 's/^0x//') [[ -z "$class_hex" || "${class_hex:0:2}" != "01" ]] && continue + slot_base=$(_pci_slot_base "$pci_full") + + if [[ -n "$target_vmid" ]] && _vm_has_pci_slot "$target_vmid" "$slot_base"; then + hidden_target_count=$((hidden_target_count + 1)) + continue + fi name=$(lspci -nn -s "${pci_full#0000:}" 2>/dev/null | sed 's/^[^ ]* //') [[ -z "$name" ]] && name="$(translate "Unknown storage controller")" @@ -635,27 +647,44 @@ function select_controller_nvme() { _array_contains "$disk" "${controller_disks[@]}" || controller_disks+=("$disk") done < <(_controller_block_devices "$pci_full") - reason="" + local -a blocked_reasons=() for disk in "${controller_disks[@]}"; do if _disk_is_host_system_used "$disk"; then - reason+="${disk} (${DISK_USAGE_REASON}); " + blocked_reasons+=("${disk} (${DISK_USAGE_REASON})") elif _disk_used_in_guest_configs "$disk"; then - reason+="${disk} ($(translate "In use by VM/LXC config")); " + blocked_reasons+=("${disk} ($(translate "In use by VM/LXC config"))") fi done - if [[ -n "$reason" ]]; then + if [[ ${#blocked_reasons[@]} -gt 0 ]]; then blocked_count=$((blocked_count + 1)) - blocked_report+=" • ${pci_full} — ${name}\n $(translate "Blocked because protected/in-use disks are attached"): ${reason}\n" + blocked_report+="------------------------------------------------------------\n" + blocked_report+="PCI: ${pci_full}\n" + blocked_report+="Name: ${name}\n" + blocked_report+="$(translate "Blocked because protected/in-use disks are attached"):\n" + local reason + for reason in "${blocked_reasons[@]}"; do + blocked_report+=" - ${reason}\n" + done + blocked_report+="\n" continue fi - if [[ ${#controller_disks[@]} -gt 0 ]]; then - controller_desc="$(printf "%-48s [%s]" "$name" "$(IFS=,; echo "${controller_disks[*]}")")" - else - controller_desc="$(printf "%-48s [%s]" "$name" "$(translate "No attached disks detected")")" + local short_name + short_name=$(_shorten_text "$name" 42) + + local assigned_suffix="" + if [[ -n "$(_pci_assigned_vm_ids "$pci_full" "$target_vmid" 2>/dev/null | head -1)" ]]; then + assigned_suffix=" | $(translate "Assigned to VM")" fi + if [[ ${#controller_disks[@]} -gt 0 ]]; then + controller_desc="$(printf "%-42s [%s: %d]" "$short_name" "$(translate "attached disks")" "${#controller_disks[@]}")" + else + controller_desc="$(printf "%-42s [%s]" "$short_name" "$(translate "No attached disks")")" + fi + controller_desc+="${assigned_suffix}" + if _array_contains "$pci_full" "${CONTROLLER_NVME_PCIS[@]}"; then state="ON" else @@ -668,17 +697,23 @@ function select_controller_nvme() { stop_spinner if [[ $safe_count -eq 0 ]]; then - whiptail --title "Controller + NVMe" --msgbox "$(translate "No safe controllers/NVMe devices are available for passthrough.")\n\n${blocked_report}" 20 90 + local msg + if [[ "$hidden_target_count" -gt 0 && "$blocked_count" -eq 0 ]]; then + msg="$(translate "All detected controllers/NVMe are already present in the selected VM.")\n\n$(translate "No additional device needs to be added.")" + else + msg="$(translate "No safe controllers/NVMe devices are available for passthrough.")\n\n${blocked_report}" + fi + whiptail --title "Controller + NVMe" --msgbox "$msg" 22 100 return 1 fi if [[ $blocked_count -gt 0 ]]; then - whiptail --title "Controller + NVMe" --msgbox "$(translate "Some controllers were hidden because they have host system disks attached.")\n\n${blocked_report}" 20 90 + whiptail --title "Controller + NVMe" --msgbox "$(translate "Some controllers were hidden because they have host system disks attached.")\n\n${blocked_report}" 22 100 fi local selected selected=$(whiptail --title "Controller + NVMe" --checklist \ - "$(translate "Select controllers/NVMe to passthrough (safe devices only):")" 20 90 10 \ + "$(translate "Select controllers/NVMe to passthrough (safe devices only):")\n\n$(translate "Only safe devices are shown in this list.")" 20 96 10 \ "${menu_items[@]}" 3>&1 1>&2 2>&3) || return 1 CONTROLLER_NVME_PCIS=() @@ -694,6 +729,34 @@ function select_passthrough_disk() { } # ========================================================== +function prompt_controller_conflict_policy() { + local pci="$1" + shift + local -a source_vms=("$@") + local msg vmid vm_name st ob + msg="$(translate "Selected controller/NVMe is already assigned to other VM(s):")\n\n" + for vmid in "${source_vms[@]}"; do + vm_name=$(_vm_name_by_id "$vmid") + st="stopped"; _vm_status_is_running "$vmid" && st="running" + ob="0"; _vm_onboot_is_enabled "$vmid" && ob="1" + msg+=" - VM ${vmid} (${vm_name}) [${st}, onboot=${ob}]\n" + done + msg+="\n$(translate "Choose action for this controller/NVMe:")" + + local choice + choice=$(whiptail --title "$(translate "Controller/NVMe Conflict Policy")" --menu "$msg" 22 96 10 \ + "1" "$(translate "Keep in source VM(s) + disable onboot + add to target VM")" \ + "2" "$(translate "Move to target VM (remove from source VM config)")" \ + "3" "$(translate "Skip this device")" \ + 3>&1 1>&2 2>&3) || { echo "skip"; return; } + + case "$choice" in + 1) echo "keep_disable_onboot" ;; + 2) echo "move_remove_source" ;; + *) echo "skip" ;; + esac +} + @@ -1220,6 +1283,7 @@ if [[ ${#CONTROLLER_NVME_PCIS[@]} -gt 0 ]]; then msg_error "$(translate "Controller + NVMe passthrough requires machine type q35. Skipping controller assignment.")" ERROR_FLAG=true else + NEED_HOOK_SYNC=false HOSTPCI_INDEX=0 if declare -F _pci_next_hostpci_index >/dev/null 2>&1; then HOSTPCI_INDEX=$(_pci_next_hostpci_index "$VMID" 2>/dev/null || echo 0) @@ -1244,6 +1308,48 @@ if [[ ${#CONTROLLER_NVME_PCIS[@]} -gt 0 ]]; then fi fi + SOURCE_VMS=() + mapfile -t SOURCE_VMS < <(_pci_assigned_vm_ids "$PCI_DEV" "$VMID" 2>/dev/null) + if [[ ${#SOURCE_VMS[@]} -gt 0 ]]; then + HAS_RUNNING=false + for SRC_VMID in "${SOURCE_VMS[@]}"; do + if _vm_status_is_running "$SRC_VMID"; then + HAS_RUNNING=true + msg_warn "$(translate "Controller/NVMe is in use by running VM") ${SRC_VMID} ($(translate "stop source VM first"))" + fi + done + + if [[ "$HAS_RUNNING" == "true" ]]; then + continue + fi + + CONFLICT_ACTION=$(prompt_controller_conflict_policy "$PCI_DEV" "${SOURCE_VMS[@]}") + case "$CONFLICT_ACTION" in + keep_disable_onboot) + for SRC_VMID in "${SOURCE_VMS[@]}"; do + if _vm_onboot_is_enabled "$SRC_VMID"; then + if qm set "$SRC_VMID" -onboot 0 >/dev/null 2>&1; then + msg_warn "$(translate "Start on boot disabled for VM") ${SRC_VMID}" + fi + fi + done + NEED_HOOK_SYNC=true + ;; + move_remove_source) + SLOT_BASE=$(_pci_slot_base "$PCI_DEV") + for SRC_VMID in "${SOURCE_VMS[@]}"; do + if _remove_pci_slot_from_vm_config "$SRC_VMID" "$SLOT_BASE"; then + msg_ok "$(translate "Controller/NVMe removed from source VM") ${SRC_VMID} (${PCI_DEV})" + fi + done + ;; + *) + msg_info2 "$(translate "Skipped device"): ${PCI_DEV}" + continue + ;; + esac + fi + if qm set "$VMID" --hostpci${HOSTPCI_INDEX} "${PCI_DEV},pcie=1" >/dev/null 2>&1; then msg_ok "Configured controller/NVMe as hostpci${HOSTPCI_INDEX}: ${PCI_DEV}" DISK_INFO="${DISK_INFO}

Controller/NVMe: ${PCI_DEV}

" @@ -1254,6 +1360,12 @@ if [[ ${#CONTROLLER_NVME_PCIS[@]} -gt 0 ]]; then ERROR_FLAG=true fi done + + if [[ "$NEED_HOOK_SYNC" == "true" ]] && declare -F sync_proxmenux_gpu_guard_hooks >/dev/null 2>&1; then + ensure_proxmenux_gpu_guard_hookscript + sync_proxmenux_gpu_guard_hooks + msg_ok "$(translate "VM hook guard synced for shared controller/NVMe protection")" + fi fi fi diff --git a/scripts/vm/vm_creator.sh b/scripts/vm/vm_creator.sh index a65864a4..464ade43 100644 --- a/scripts/vm/vm_creator.sh +++ b/scripts/vm/vm_creator.sh @@ -52,6 +52,11 @@ if [[ -f "$LOCAL_SCRIPTS_LOCAL/global/pci_passthrough_helpers.sh" ]]; then elif [[ -f "$LOCAL_SCRIPTS_DEFAULT/global/pci_passthrough_helpers.sh" ]]; then source "$LOCAL_SCRIPTS_DEFAULT/global/pci_passthrough_helpers.sh" fi +if [[ -f "$LOCAL_SCRIPTS_LOCAL/global/gpu_hook_guard_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_LOCAL/global/gpu_hook_guard_helpers.sh" +elif [[ -f "$LOCAL_SCRIPTS_DEFAULT/global/gpu_hook_guard_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_DEFAULT/global/gpu_hook_guard_helpers.sh" +fi load_language initialize_cache @@ -107,6 +112,34 @@ function select_interface_type() { msg_ok "$(translate "Disk interface selected:") $INTERFACE_TYPE" } +function prompt_controller_conflict_policy() { + local pci="$1" + shift + local -a source_vms=("$@") + local msg vmid vm_name st ob + msg="$(translate "Selected controller/NVMe is already assigned to other VM(s):")\n\n" + for vmid in "${source_vms[@]}"; do + vm_name=$(_vm_name_by_id "$vmid") + st="stopped"; _vm_status_is_running "$vmid" && st="running" + ob="0"; _vm_onboot_is_enabled "$vmid" && ob="1" + msg+=" - VM ${vmid} (${vm_name}) [${st}, onboot=${ob}]\n" + done + msg+="\n$(translate "Choose action for this controller/NVMe:")" + + local choice + choice=$(whiptail --title "$(translate "Controller/NVMe Conflict Policy")" --menu "$msg" 22 96 10 \ + "1" "$(translate "Keep in source VM(s) + disable onboot + add to target VM")" \ + "2" "$(translate "Move to target VM (remove from source VM config)")" \ + "3" "$(translate "Skip this device")" \ + 3>&1 1>&2 2>&3) || { echo "skip"; return; } + + case "$choice" in + 1) echo "keep_disable_onboot" ;; + 2) echo "move_remove_source" ;; + *) echo "skip" ;; + esac +} + # ========================================================== # EFI/TPM @@ -436,6 +469,7 @@ fi msg_error "$(translate "Controller + NVMe passthrough requires machine type q35. Skipping controller assignment.")" else local hostpci_idx=0 + local need_hook_sync=false if declare -F _pci_next_hostpci_index >/dev/null 2>&1; then hostpci_idx=$(_pci_next_hostpci_index "$VMID" 2>/dev/null || echo 0) else @@ -459,6 +493,47 @@ fi continue fi + local -a source_vms=() + mapfile -t source_vms < <(_pci_assigned_vm_ids "$pci" "$VMID" 2>/dev/null) + if [[ ${#source_vms[@]} -gt 0 ]]; then + local has_running=false vmid action slot_base + for vmid in "${source_vms[@]}"; do + if _vm_status_is_running "$vmid"; then + has_running=true + msg_warn "$(translate "Controller/NVMe is in use by running VM") ${vmid} ($(translate "stop source VM first"))" + fi + done + if $has_running; then + continue + fi + + action=$(prompt_controller_conflict_policy "$pci" "${source_vms[@]}") + case "$action" in + keep_disable_onboot) + for vmid in "${source_vms[@]}"; do + if _vm_onboot_is_enabled "$vmid"; then + if qm set "$vmid" -onboot 0 >/dev/null 2>&1; then + msg_warn "$(translate "Start on boot disabled for VM") ${vmid}" + fi + fi + done + need_hook_sync=true + ;; + move_remove_source) + slot_base=$(_pci_slot_base "$pci") + for vmid in "${source_vms[@]}"; do + if _remove_pci_slot_from_vm_config "$vmid" "$slot_base"; then + msg_ok "$(translate "Controller/NVMe removed from source VM") ${vmid} (${pci})" + fi + done + ;; + *) + msg_info2 "$(translate "Skipped device"): ${pci}" + continue + ;; + esac + fi + if qm set "$VMID" --hostpci${hostpci_idx} "${pci},pcie=1" >/dev/null 2>&1; then msg_ok "$(translate "Controller/NVMe assigned") (hostpci${hostpci_idx} → ${pci})" DISK_INFO+="

Controller/NVMe: ${pci}

" @@ -467,6 +542,12 @@ fi msg_error "$(translate "Failed to assign Controller/NVMe") (${pci})" fi done + + if $need_hook_sync && declare -F sync_proxmenux_gpu_guard_hooks >/dev/null 2>&1; then + ensure_proxmenux_gpu_guard_hookscript + sync_proxmenux_gpu_guard_hooks + msg_ok "$(translate "VM hook guard synced for shared controller/NVMe protection")" + fi fi fi diff --git a/scripts/vm/zimaos.sh b/scripts/vm/zimaos.sh index c4c2c380..30ae5bb4 100644 --- a/scripts/vm/zimaos.sh +++ b/scripts/vm/zimaos.sh @@ -49,6 +49,11 @@ if [[ -f "$LOCAL_SCRIPTS_LOCAL/global/pci_passthrough_helpers.sh" ]]; then elif [[ -f "$LOCAL_SCRIPTS_DEFAULT/global/pci_passthrough_helpers.sh" ]]; then source "$LOCAL_SCRIPTS_DEFAULT/global/pci_passthrough_helpers.sh" fi +if [[ -f "$LOCAL_SCRIPTS_LOCAL/global/gpu_hook_guard_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_LOCAL/global/gpu_hook_guard_helpers.sh" +elif [[ -f "$LOCAL_SCRIPTS_DEFAULT/global/gpu_hook_guard_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS_DEFAULT/global/gpu_hook_guard_helpers.sh" +fi load_language initialize_cache # ========================================================== @@ -633,13 +638,20 @@ function select_controller_nvme() { local menu_items=() local blocked_report="" - local safe_count=0 blocked_count=0 - local pci_path pci_full class_hex name controller_disks disk reason state controller_desc + local safe_count=0 blocked_count=0 hidden_target_count=0 + local pci_path pci_full class_hex name controller_disks disk state controller_desc slot_base + local target_vmid="${VMID:-}" while IFS= read -r pci_path; do pci_full=$(basename "$pci_path") class_hex=$(cat "$pci_path/class" 2>/dev/null | sed 's/^0x//') [[ -z "$class_hex" || "${class_hex:0:2}" != "01" ]] && continue + slot_base=$(_pci_slot_base "$pci_full") + + if [[ -n "$target_vmid" ]] && _vm_has_pci_slot "$target_vmid" "$slot_base"; then + hidden_target_count=$((hidden_target_count + 1)) + continue + fi name=$(lspci -nn -s "${pci_full#0000:}" 2>/dev/null | sed 's/^[^ ]* //') [[ -z "$name" ]] && name="$(translate "Unknown storage controller")" @@ -650,27 +662,44 @@ function select_controller_nvme() { _array_contains "$disk" "${controller_disks[@]}" || controller_disks+=("$disk") done < <(_controller_block_devices "$pci_full") - reason="" + local -a blocked_reasons=() for disk in "${controller_disks[@]}"; do if _disk_is_host_system_used "$disk"; then - reason+="${disk} (${DISK_USAGE_REASON}); " + blocked_reasons+=("${disk} (${DISK_USAGE_REASON})") elif _disk_used_in_guest_configs "$disk"; then - reason+="${disk} ($(translate "In use by VM/LXC config")); " + blocked_reasons+=("${disk} ($(translate "In use by VM/LXC config"))") fi done - if [[ -n "$reason" ]]; then + if [[ ${#blocked_reasons[@]} -gt 0 ]]; then blocked_count=$((blocked_count + 1)) - blocked_report+=" • ${pci_full} — ${name}\n $(translate "Blocked because protected/in-use disks are attached"): ${reason}\n" + blocked_report+="------------------------------------------------------------\n" + blocked_report+="PCI: ${pci_full}\n" + blocked_report+="Name: ${name}\n" + blocked_report+="$(translate "Blocked because protected/in-use disks are attached"):\n" + local reason + for reason in "${blocked_reasons[@]}"; do + blocked_report+=" - ${reason}\n" + done + blocked_report+="\n" continue fi - if [[ ${#controller_disks[@]} -gt 0 ]]; then - controller_desc="$(printf "%-48s [%s]" "$name" "$(IFS=,; echo "${controller_disks[*]}")")" - else - controller_desc="$(printf "%-48s [%s]" "$name" "$(translate "No attached disks detected")")" + local short_name + short_name=$(_shorten_text "$name" 42) + + local assigned_suffix="" + if [[ -n "$(_pci_assigned_vm_ids "$pci_full" "$target_vmid" 2>/dev/null | head -1)" ]]; then + assigned_suffix=" | $(translate "Assigned to VM")" fi + if [[ ${#controller_disks[@]} -gt 0 ]]; then + controller_desc="$(printf "%-42s [%s: %d]" "$short_name" "$(translate "attached disks")" "${#controller_disks[@]}")" + else + controller_desc="$(printf "%-42s [%s]" "$short_name" "$(translate "No attached disks")")" + fi + controller_desc+="${assigned_suffix}" + if _array_contains "$pci_full" "${CONTROLLER_NVME_PCIS[@]}"; then state="ON" else @@ -683,17 +712,23 @@ function select_controller_nvme() { stop_spinner if [[ $safe_count -eq 0 ]]; then - whiptail --title "Controller + NVMe" --msgbox "$(translate "No safe controllers/NVMe devices are available for passthrough.")\n\n${blocked_report}" 20 90 + local msg + if [[ "$hidden_target_count" -gt 0 && "$blocked_count" -eq 0 ]]; then + msg="$(translate "All detected controllers/NVMe are already present in the selected VM.")\n\n$(translate "No additional device needs to be added.")" + else + msg="$(translate "No safe controllers/NVMe devices are available for passthrough.")\n\n${blocked_report}" + fi + whiptail --title "Controller + NVMe" --msgbox "$msg" 22 100 return 1 fi if [[ $blocked_count -gt 0 ]]; then - whiptail --title "Controller + NVMe" --msgbox "$(translate "Some controllers were hidden because they have host system disks attached.")\n\n${blocked_report}" 20 90 + whiptail --title "Controller + NVMe" --msgbox "$(translate "Some controllers were hidden because they have host system disks attached.")\n\n${blocked_report}" 22 100 fi local selected selected=$(whiptail --title "Controller + NVMe" --checklist \ - "$(translate "Select controllers/NVMe to passthrough (safe devices only):")" 20 90 10 \ + "$(translate "Select controllers/NVMe to passthrough (safe devices only):")\n\n$(translate "Only safe devices are shown in this list.")" 20 96 10 \ "${menu_items[@]}" 3>&1 1>&2 2>&3) || return 1 CONTROLLER_NVME_PCIS=() @@ -709,6 +744,34 @@ function select_passthrough_disk() { } # ========================================================== +function prompt_controller_conflict_policy() { + local pci="$1" + shift + local -a source_vms=("$@") + local msg vmid vm_name st ob + msg="$(translate "Selected controller/NVMe is already assigned to other VM(s):")\n\n" + for vmid in "${source_vms[@]}"; do + vm_name=$(_vm_name_by_id "$vmid") + st="stopped"; _vm_status_is_running "$vmid" && st="running" + ob="0"; _vm_onboot_is_enabled "$vmid" && ob="1" + msg+=" - VM ${vmid} (${vm_name}) [${st}, onboot=${ob}]\n" + done + msg+="\n$(translate "Choose action for this controller/NVMe:")" + + local choice + choice=$(whiptail --title "$(translate "Controller/NVMe Conflict Policy")" --menu "$msg" 22 96 10 \ + "1" "$(translate "Keep in source VM(s) + disable onboot + add to target VM")" \ + "2" "$(translate "Move to target VM (remove from source VM config)")" \ + "3" "$(translate "Skip this device")" \ + 3>&1 1>&2 2>&3) || { echo "skip"; return; } + + case "$choice" in + 1) echo "keep_disable_onboot" ;; + 2) echo "move_remove_source" ;; + *) echo "skip" ;; + esac +} + @@ -1235,6 +1298,7 @@ function create_vm() { msg_error "$(translate "Controller + NVMe passthrough requires machine type q35. Skipping controller assignment.")" ERROR_FLAG=true else + NEED_HOOK_SYNC=false HOSTPCI_INDEX=0 if declare -F _pci_next_hostpci_index >/dev/null 2>&1; then HOSTPCI_INDEX=$(_pci_next_hostpci_index "$VMID" 2>/dev/null || echo 0) @@ -1259,6 +1323,48 @@ function create_vm() { fi fi + SOURCE_VMS=() + mapfile -t SOURCE_VMS < <(_pci_assigned_vm_ids "$PCI_DEV" "$VMID" 2>/dev/null) + if [[ ${#SOURCE_VMS[@]} -gt 0 ]]; then + HAS_RUNNING=false + for SRC_VMID in "${SOURCE_VMS[@]}"; do + if _vm_status_is_running "$SRC_VMID"; then + HAS_RUNNING=true + msg_warn "$(translate "Controller/NVMe is in use by running VM") ${SRC_VMID} ($(translate "stop source VM first"))" + fi + done + + if [[ "$HAS_RUNNING" == "true" ]]; then + continue + fi + + CONFLICT_ACTION=$(prompt_controller_conflict_policy "$PCI_DEV" "${SOURCE_VMS[@]}") + case "$CONFLICT_ACTION" in + keep_disable_onboot) + for SRC_VMID in "${SOURCE_VMS[@]}"; do + if _vm_onboot_is_enabled "$SRC_VMID"; then + if qm set "$SRC_VMID" -onboot 0 >/dev/null 2>&1; then + msg_warn "$(translate "Start on boot disabled for VM") ${SRC_VMID}" + fi + fi + done + NEED_HOOK_SYNC=true + ;; + move_remove_source) + SLOT_BASE=$(_pci_slot_base "$PCI_DEV") + for SRC_VMID in "${SOURCE_VMS[@]}"; do + if _remove_pci_slot_from_vm_config "$SRC_VMID" "$SLOT_BASE"; then + msg_ok "$(translate "Controller/NVMe removed from source VM") ${SRC_VMID} (${PCI_DEV})" + fi + done + ;; + *) + msg_info2 "$(translate "Skipped device"): ${PCI_DEV}" + continue + ;; + esac + fi + if qm set "$VMID" --hostpci${HOSTPCI_INDEX} "${PCI_DEV},pcie=1" >/dev/null 2>&1; then msg_ok "Configured controller/NVMe as hostpci${HOSTPCI_INDEX}: ${PCI_DEV}" DISK_INFO="${DISK_INFO}

Controller/NVMe: ${PCI_DEV}

" @@ -1269,6 +1375,12 @@ function create_vm() { ERROR_FLAG=true fi done + + if [[ "$NEED_HOOK_SYNC" == "true" ]] && declare -F sync_proxmenux_gpu_guard_hooks >/dev/null 2>&1; then + ensure_proxmenux_gpu_guard_hookscript + sync_proxmenux_gpu_guard_hooks + msg_ok "$(translate "VM hook guard synced for shared controller/NVMe protection")" + fi fi fi