Merge pull request #185 from MacRimi/develop

new version v1.2.1
This commit is contained in:
MacRimi
2026-04-21 21:14:12 +02:00
committed by GitHub
21 changed files with 2075 additions and 209 deletions

View File

@@ -28,6 +28,11 @@ NVIDIA_VID_DID=""
if [[ -f "$UTILS_FILE" ]]; then
source "$UTILS_FILE"
fi
if [[ -f "$LOCAL_SCRIPTS/global/pci_passthrough_helpers.sh" ]]; then
source "$LOCAL_SCRIPTS/global/pci_passthrough_helpers.sh"
elif [[ -f "$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)/global/pci_passthrough_helpers.sh" ]]; then
source "$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)/global/pci_passthrough_helpers.sh"
fi
if [[ -f "$LOCAL_SCRIPTS/global/gpu_hook_guard_helpers.sh" ]]; then
source "$LOCAL_SCRIPTS/global/gpu_hook_guard_helpers.sh"
elif [[ -f "$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)/global/gpu_hook_guard_helpers.sh" ]]; then
@@ -259,6 +264,67 @@ select_container() {
# ============================================================
# GPU checklist selection
# ============================================================
# ============================================================
# SR-IOV guard — refuse to pass an SR-IOV GPU to an LXC via ProxMenux.
# Although the LXC flow does not rewrite vfio.conf/blacklist (so it is
# not destructive like add_gpu_vm.sh), it blindly globs /dev/dri/card*
# and /dev/dri/renderD* without mapping each node to its BDF. With 7
# VFs the container may end up holding any/all of them, which is not
# the behavior a user asking for "one VF to this LXC" expects. Until a
# VF-aware LXC flow exists, stop and point to manual configuration —
# matching the policy used in switch_gpu_mode.sh and add_gpu_vm.sh.
# ============================================================
check_sriov_and_block_if_needed() {
declare -F _pci_sriov_role >/dev/null 2>&1 || return 0
local gpu_type pci role first_word
local -a offenders=()
for gpu_type in "${SELECTED_GPUS[@]}"; do
case "$gpu_type" in
intel) pci="$INTEL_PCI" ;;
amd) pci="$AMD_PCI" ;;
nvidia) pci="$NVIDIA_PCI" ;;
*) continue ;;
esac
[[ -n "$pci" ]] || continue
role=$(_pci_sriov_role "$pci")
first_word="${role%% *}"
case "$first_word" in
vf)
offenders+=("${pci}|vf|${role#vf }")
;;
pf-active)
offenders+=("${pci}|pf-active|${role#pf-active }")
;;
esac
done
[[ ${#offenders[@]} -eq 0 ]] && return 0
local msg entry bdf kind info
msg="\n\Zb\Z6$(translate 'SR-IOV Configuration Detected')\Zn\n\n"
for entry in "${offenders[@]}"; do
bdf="${entry%%|*}"
kind="${entry#*|}"; kind="${kind%%|*}"
info="${entry##*|}"
if [[ "$kind" == "vf" ]]; then
msg+=" • \Zb${bdf}\Zn — $(translate 'Virtual Function (parent PF:') ${info})\n"
else
msg+=" • \Zb${bdf}\Zn — $(translate 'Physical Function with') ${info} $(translate 'active VFs')\n"
fi
done
msg+="\n$(translate 'To pass SR-IOV Virtual Functions to a container, edit the LXC configuration manually via the Proxmox web interface. The Physical Function will remain bound to the native driver.')"
dialog --backtitle "ProxMenux" --colors \
--title "$(translate 'SR-IOV Configuration Detected')" \
--msgbox "$msg" 16 82
exit 0
}
select_gpus() {
local gpu_items=()
$HAS_INTEL && gpu_items+=("intel" "${INTEL_NAME:-Intel iGPU}" "off")
@@ -927,6 +993,7 @@ main() {
detect_host_gpus
select_container
select_gpus
check_sriov_and_block_if_needed
check_vfio_switch_mode
precheck_existing_lxc_gpu_config

View File

@@ -71,6 +71,7 @@ SELECTED_GPU_NAME=""
declare -a IOMMU_DEVICES=() # all PCI addrs in IOMMU group (endpoint devices)
declare -a IOMMU_VFIO_IDS=() # vendor:device for vfio-pci ids=
declare -a EXTRA_AUDIO_DEVICES=() # sibling audio function(s), typically *.1
declare -a EXTRA_AUDIO_INFO=() # parallel to EXTRA_AUDIO_DEVICES — "BDF|current_driver" pairs for the summary dialog
IOMMU_GROUP=""
IOMMU_PENDING_REBOOT=false
@@ -212,28 +213,32 @@ _strip_colors() {
printf '%s' "$1" | sed 's/\\Z[0-9a-zA-Z]//g'
}
# Msgbox: dialog in standalone mode, whiptail in wizard mode
# Msgbox: dialog in standalone mode, whiptail in wizard mode.
# I/O pinned to /dev/tty so the dialog renders reliably regardless of
# how the caller redirected stdin/stdout, and immune to the SIGTTOU
# trap that fires when this script is resumed as a background job.
_pmx_msgbox() {
local title="$1" msg="$2" h="${3:-10}" w="${4:-72}"
if [[ "$WIZARD_CALL" == "true" ]]; then
whiptail --backtitle "ProxMenux" --title "$title" \
--msgbox "$(_strip_colors "$msg")" "$h" "$w"
--msgbox "$(_strip_colors "$msg")" "$h" "$w" < /dev/tty > /dev/tty
else
dialog --backtitle "ProxMenux" --colors \
--title "$title" --msgbox "$msg" "$h" "$w"
--title "$title" --msgbox "$msg" "$h" "$w" < /dev/tty > /dev/tty
fi
}
# Yesno: dialog in standalone mode, whiptail in wizard mode
# Returns 0 for yes, 1 for no (same as dialog/whiptail)
# Yesno: dialog in standalone mode, whiptail in wizard mode.
# Returns 0 for yes, 1 for no (same as dialog/whiptail).
# I/O pinned to /dev/tty — see the note on _pmx_msgbox.
_pmx_yesno() {
local title="$1" msg="$2" h="${3:-10}" w="${4:-72}"
if [[ "$WIZARD_CALL" == "true" ]]; then
whiptail --backtitle "ProxMenux" --title "$title" \
--yesno "$(_strip_colors "$msg")" "$h" "$w"
--yesno "$(_strip_colors "$msg")" "$h" "$w" < /dev/tty > /dev/tty
else
dialog --backtitle "ProxMenux" --colors \
--title "$title" --yesno "$msg" "$h" "$w"
--title "$title" --yesno "$msg" "$h" "$w" < /dev/tty > /dev/tty
fi
return $?
}
@@ -265,6 +270,27 @@ _pmx_menu() {
return $?
}
# Checklist: dialog in standalone mode, whiptail in wizard mode.
# Usage: _pmx_checklist title msg h w list_h tag1 desc1 state1 tag2 desc2 state2 ...
# state is "on" or "off". Returns the space-separated list of selected
# tags on stdout (one line). Returns non-zero if the user cancels.
_pmx_checklist() {
local title="$1" msg="$2" h="$3" w="$4" lh="$5"
shift 5
if [[ "$WIZARD_CALL" == "true" ]]; then
whiptail --backtitle "ProxMenux" \
--title "$title" \
--checklist "$(_strip_colors "$msg")" "$h" "$w" "$lh" \
"$@" 3>&1 1>&2 2>&3
else
dialog --backtitle "ProxMenux" --colors \
--title "$title" \
--checklist "$msg" "$h" "$w" "$lh" \
"$@" 2>&1 >/dev/tty
fi
return $?
}
_file_has_exact_line() {
local line="$1"
local file="$2"
@@ -718,6 +744,48 @@ select_gpu() {
}
# ==========================================================
# SR-IOV guard — refuse to assign a Virtual Function or a Physical
# Function with active VFs. Matches the policy in switch_gpu_mode.sh:
# writing this GPU's vendor:device to /etc/modprobe.d/vfio.conf would
# let vfio-pci claim the PF at next boot and destroy the whole VF
# tree. ProxMenux does not yet manage SR-IOV lifecycle, so we stop
# before touching vfio.conf / blacklist.conf.
# ==========================================================
check_sriov_and_block_if_needed() {
declare -F _pci_sriov_role >/dev/null 2>&1 || return 0
[[ -n "$SELECTED_GPU_PCI" ]] || return 0
local role first_word detail=""
role=$(_pci_sriov_role "$SELECTED_GPU_PCI")
first_word="${role%% *}"
case "$first_word" in
vf)
local parent="${role#vf }"
detail="$(translate 'The selected device') \Zb${SELECTED_GPU_PCI}\Zn $(translate 'is an SR-IOV Virtual Function (VF). Its parent Physical Function is') \Zb${parent}\Zn."
;;
pf-active)
local n="${role#pf-active }"
detail="$(translate 'The selected device') \Zb${SELECTED_GPU_PCI}\Zn $(translate 'is a Physical Function with') \Zb${n}\Zn $(translate 'active Virtual Functions. Changing its driver binding would destroy every VF.')"
;;
*)
return 0
;;
esac
local msg
msg="\n\Zb\Z6$(translate 'SR-IOV Configuration Detected')\Zn\n\n"
msg+="${detail}\n\n"
msg+="$(translate 'To assign VFs to VMs or LXCs, edit the configuration manually via the Proxmox web interface. The Physical Function will remain bound to the native driver.')"
_pmx_msgbox "$(translate 'SR-IOV Configuration Detected')" "$msg" 16 82
[[ "$WIZARD_CALL" == "true" ]] && _set_wizard_result "cancelled"
exit 0
}
# ==========================================================
# Phase 1 — Step 4: Single-GPU warning
# ==========================================================
@@ -1067,30 +1135,39 @@ analyze_iommu_group() {
}
detect_optional_gpu_audio() {
EXTRA_AUDIO_DEVICES=()
local sibling_audio="${SELECTED_GPU_PCI%.*}.1"
local dev_path="/sys/bus/pci/devices/${sibling_audio}"
[[ -d "$dev_path" ]] || return 0
# Returns 0 if the BDF at $1 is a real PCI audio device (class 04xx).
_pci_is_audio_device() {
local bdf="$1"
[[ -n "$bdf" ]] || return 1
local dev_path="/sys/bus/pci/devices/${bdf}"
[[ -d "$dev_path" ]] || return 1
local class_hex
class_hex=$(cat "${dev_path}/class" 2>/dev/null | sed 's/^0x//')
[[ "${class_hex:0:2}" == "04" ]] || return 0
[[ "${class_hex:0:2}" == "04" ]]
}
local already_in_group=false dev
# Registers an audio BDF for passthrough alongside the GPU.
# Idempotent: skips if the BDF was already recorded by analyze_iommu_group
# (IOMMU_DEVICES) or by a previous call here (EXTRA_AUDIO_DEVICES).
# Updates EXTRA_AUDIO_DEVICES, EXTRA_AUDIO_INFO, and IOMMU_VFIO_IDS.
_register_gpu_audio_device() {
local bdf="$1"
[[ -n "$bdf" ]] || return 1
local dev_path="/sys/bus/pci/devices/${bdf}"
[[ -d "$dev_path" ]] || return 1
local dev
for dev in "${IOMMU_DEVICES[@]}"; do
if [[ "$dev" == "$sibling_audio" ]]; then
already_in_group=true
break
fi
[[ "$dev" == "$bdf" ]] && return 0
done
for dev in "${EXTRA_AUDIO_DEVICES[@]}"; do
[[ "$dev" == "$bdf" ]] && return 0
done
if [[ "$already_in_group" == "true" ]]; then
return 0
fi
EXTRA_AUDIO_DEVICES+=("$sibling_audio")
EXTRA_AUDIO_DEVICES+=("$bdf")
local drv
drv=$(_get_pci_driver "$bdf")
EXTRA_AUDIO_INFO+=("${bdf}|${drv}")
local vid did new_id
vid=$(cat "${dev_path}/vendor" 2>/dev/null | sed 's/0x//')
@@ -1101,6 +1178,98 @@ detect_optional_gpu_audio() {
IOMMU_VFIO_IDS+=("$new_id")
fi
fi
return 0
}
# Scans the host for all class-04 PCI audio devices and lets the user
# pick which ones to pass to the VM. Only invoked when the selected GPU
# has no .1 sibling audio function — the dGPU fast path continues to
# auto-include that sibling without prompting.
#
# Devices already in the GPU's IOMMU group are excluded from the list
# (analyze_iommu_group has already queued them). The checklist defaults
# to all-OFF so nothing gets passed through silently.
_prompt_user_for_audio_devices() {
# Collect eligible audio BDFs from sysfs.
local -a candidates=()
local dev_path bdf
for dev_path in /sys/bus/pci/devices/*; do
[[ -d "$dev_path" ]] || continue
bdf=$(basename "$dev_path")
_pci_is_audio_device "$bdf" || continue
# Skip ones already queued by the IOMMU group sweep.
local skip=false dev
for dev in "${IOMMU_DEVICES[@]}"; do
[[ "$dev" == "$bdf" ]] && { skip=true; break; }
done
$skip && continue
candidates+=("$bdf")
done
[[ ${#candidates[@]} -eq 0 ]] && return 0
# Build checklist items: tag=BDF, description="<name> (driver: X)".
local -a items=()
local name drv label
for bdf in "${candidates[@]}"; do
name=$(lspci -nn -s "${bdf#0000:}" 2>/dev/null \
| sed 's/^[^ ]* //' \
| sed 's/ \[0401\]//; s/ \[0403\]//; s/ \[0400\]//' \
| cut -c1-52)
[[ -z "$name" ]] && name="PCI audio"
drv=$(_get_pci_driver "$bdf")
label="${name} (driver: ${drv})"
items+=("$bdf" "$label" "off")
done
local prompt selection dialog_h list_h
prompt="$(translate 'The selected GPU has no dedicated .1 audio sibling function.')\n"
prompt+="$(translate 'If you want HDMI/analog audio inside the VM, select the audio controller(s) to pass through along with the GPU.')\n\n"
prompt+="$(translate 'Default is none (video-only passthrough). Use SPACE to toggle selections.')"
# Give the list area a floor of 4 rows so a single candidate doesn't
# render cramped under the description. Overall dialog height scales
# with that floor + room for the 4-line prompt, blank line, borders
# and button row.
list_h=${#candidates[@]}
(( list_h < 4 )) && list_h=4
dialog_h=$(( list_h + 14 ))
selection=$(_pmx_checklist \
"$(translate 'Add Audio Passthrough')" \
"$prompt" \
"$dialog_h" 82 "$list_h" \
"${items[@]}") || return 0
# dialog wraps selected tags in quotes, whiptail does not — _strip them.
selection=$(echo "$selection" | tr -d '"')
[[ -z "$selection" ]] && return 0
local picked
for picked in $selection; do
_register_gpu_audio_device "$picked"
done
}
detect_optional_gpu_audio() {
EXTRA_AUDIO_DEVICES=()
EXTRA_AUDIO_INFO=()
# Fast path: dGPUs (NVIDIA / AMD discrete) and some APUs expose audio
# as function .1 of the same slot. When present, auto-include it —
# this is the unambiguous, always-safe case because such audio only
# outputs through the GPU's own ports and was never used by the host.
local sibling_audio="${SELECTED_GPU_PCI%.*}.1"
if _pci_is_audio_device "$sibling_audio"; then
_register_gpu_audio_device "$sibling_audio"
return 0
fi
# Slow path: no sibling audio (typical for Intel iGPUs whose HDMI
# audio lives on the PCH, or setups with an external sound card).
# Ask the user explicitly via checklist — the decision of whether to
# pass chipset audio alongside an iGPU is intentional, not automatic.
_prompt_user_for_audio_devices
}
@@ -1375,8 +1544,19 @@ confirm_summary() {
else
msg+="$(translate 'hostpci entries for all IOMMU group devices')\n"
fi
[[ ${#EXTRA_AUDIO_DEVICES[@]} -gt 0 ]] && \
msg+="$(translate 'Additional GPU audio function will be added'): ${EXTRA_AUDIO_DEVICES[*]}\n"
if [[ ${#EXTRA_AUDIO_DEVICES[@]} -gt 0 ]]; then
msg+="$(translate 'Additional audio function(s) to be added'):\n"
local _audio_info _audio_bdf _audio_drv
for _audio_info in "${EXTRA_AUDIO_INFO[@]}"; do
_audio_bdf="${_audio_info%%|*}"
_audio_drv="${_audio_info#*|}"
if [[ -n "$_audio_drv" && "$_audio_drv" != "none" && "$_audio_drv" != "vfio-pci" ]]; then
msg+="${_audio_bdf} \Zb(${_audio_drv})\Zn\n"
else
msg+="${_audio_bdf}\n"
fi
done
fi
[[ "$SELECTED_GPU" == "nvidia" ]] && \
msg+="$(translate 'NVIDIA KVM hiding (cpu hidden=1)')\n"
if [[ "$SWITCH_FROM_LXC" == "true" ]]; then
@@ -1698,7 +1878,7 @@ cleanup_lxc_configs() {
[[ "$SWITCH_FROM_LXC" != "true" ]] && return 0
[[ ${#LXC_AFFECTED_CTIDS[@]} -eq 0 ]] && return 0
msg_info "$(translate 'Applying selected LXC switch action...')"
msg_info2 "$(translate 'Applying selected LXC switch action')"
local i
for i in "${!LXC_AFFECTED_CTIDS[@]}"; do
@@ -1708,7 +1888,11 @@ cleanup_lxc_configs() {
if [[ "${LXC_AFFECTED_RUNNING[$i]}" == "1" ]]; then
msg_info "$(translate 'Stopping LXC') ${ctid}..."
if pct stop "$ctid" >>"$LOG_FILE" 2>&1; then
# _pmx_stop_lxc: graceful shutdown with forceStop+timeout, then
# fallback to pct stop. Avoids the indefinite hang that raw
# `pct stop` produces when the container is locked or has
# unresponsive processes (Plex, databases, etc.).
if _pmx_stop_lxc "$ctid" "$LOG_FILE"; then
msg_ok "$(translate 'LXC stopped') ${ctid}" | tee -a "$screen_capture"
else
msg_warn "$(translate 'Could not stop LXC') ${ctid}" | tee -a "$screen_capture"
@@ -1765,8 +1949,73 @@ cleanup_vm_config() {
local src_conf="/etc/pve/qemu-server/${SWITCH_VM_SRC}.conf"
if [[ -f "$src_conf" ]]; then
msg_info "$(translate 'Removing GPU from VM') ${SWITCH_VM_SRC}..."
sed -i "/^hostpci[0-9]\+:.*${pci_slot}/d" "$src_conf"
# Precise regex: slot must be followed by ".<function>" and a
# delimiter. Kept in sync with switch_gpu_mode.sh. A looser
# ".*${pci_slot}" would match the slot as a substring and wipe
# unrelated hostpci entries (e.g. slot "00:02" matching inside
# a dGPU BDF 0000:02:00.0).
sed -E -i "/^hostpci[0-9]+:[[:space:]]*(0000:)?${pci_slot}\.[0-7]([,[:space:]]|$)/d" "$src_conf"
msg_ok "$(translate 'GPU removed from VM') ${SWITCH_VM_SRC}" | tee -a "$screen_capture"
# Cascade cleanup: detect audio companions orphaned in the
# source VM after the GPU slot is removed. Typical case: the
# source VM had an Intel iGPU at 00:02.0 paired with chipset
# audio at 00:1f.3 via the Part 1 checklist — the sed above
# only strips 00:02.* entries, leaving the chipset audio
# hostpci pointing at a device the source VM no longer uses.
#
# Unlike switch_gpu_mode (detach flow), we deliberately do NOT
# touch /etc/modprobe.d/vfio.conf here. The GPU is being moved
# to the current target VM, which may select the same audio
# companion in its own Part 1 checklist. Any vendor:device
# orphaned in vfio.conf after this move is inert — the user
# can clean it up later via switch_gpu_mode if they want.
if declare -F _vm_list_orphan_audio_hostpci >/dev/null 2>&1; then
local _orphan_audio
_orphan_audio=$(_vm_list_orphan_audio_hostpci "$SWITCH_VM_SRC" "$pci_slot")
if [[ -n "$_orphan_audio" ]]; then
local -a _orph_items=()
local _oline _o_idx _o_bdf _o_name
while IFS= read -r _oline; do
[[ -z "$_oline" ]] && continue
_o_idx="${_oline%%|*}"
_oline="${_oline#*|}"
_o_bdf="${_oline%%|*}"
_o_name="${_oline#*|}"
_orph_items+=("$_o_idx" "${_o_bdf} ${_o_name}" "on")
done <<< "$_orphan_audio"
local _prompt
_prompt="\n$(translate 'The GPU has been moved out of VM') \Zb${SWITCH_VM_SRC}\Zn.\n\n"
_prompt+="$(translate 'The source VM also has these audio devices, likely added together with the GPU. Remove them too?')\n\n"
_prompt+="$(translate '(Checked entries will be removed. Uncheck to keep in VM.)')"
local _selected
_selected=$(_pmx_checklist \
"$(translate 'Associated Audio Devices')" \
"$_prompt" \
20 84 "$(( ${#_orph_items[@]} / 3 ))" \
"${_orph_items[@]}") || _selected=""
_selected=$(echo "$_selected" | tr -d '"')
local _sel _removed=""
for _sel in $_selected; do
if declare -F _vm_remove_hostpci_index >/dev/null 2>&1; then
_vm_remove_hostpci_index "$SWITCH_VM_SRC" "$_sel" "$LOG_FILE" \
&& _removed+=" hostpci${_sel}"
else
qm set "$SWITCH_VM_SRC" --delete "hostpci${_sel}" >>"$LOG_FILE" 2>&1 \
&& _removed+=" hostpci${_sel}"
fi
done
if [[ -n "$_removed" ]]; then
show_proxmenux_logo
msg_title "${run_title}"
msg_ok "$(translate 'Associated audio removed from VM'): ${SWITCH_VM_SRC}${_removed}" \
| tee -a "$screen_capture"
fi
fi
fi
fi
}
@@ -1922,6 +2171,7 @@ main() {
detect_host_gpus
check_iommu_enabled
select_gpu
check_sriov_and_block_if_needed
warn_single_gpu
select_vm
ensure_selected_gpu_not_already_in_target_vm
@@ -2025,10 +2275,23 @@ main() {
rm -f "$screen_capture"
# Final reboot prompt. Whiptail is invoked directly (not through
# the _pmx_yesno helper) because the ProxMenux menu chain
# (menu → main_menu → hw_grafics_menu → add_gpu_vm) has been
# verified to work reliably with a bare whiptail here, while the
# dialog-based helper path hits process-group / TTY edge cases in
# that exact chain.
#
# The extra `Press Enter to continue ... read -r` between whiptail
# and `reboot` is deliberate — it gives the user a visible pause
# after the dialog closes so an accidental Enter on the yes button
# cannot trigger an immediate reboot.
if [[ "$HOST_CONFIG_CHANGED" == "true" ]]; then
whiptail --title "$(translate 'Reboot Required')" \
--yesno "$(translate 'A reboot is required for VFIO binding to take effect. Do you want to restart now?')" 10 68
if [[ $? -eq 0 ]]; then
msg_success "$(translate 'Press Enter to continue...')"
read -r
msg_warn "$(translate 'Rebooting the system...')"
reboot
else

View File

@@ -624,6 +624,75 @@ select_gpus() {
read -ra SELECTED_GPU_IDX <<< "$sel"
}
# ==========================================================
# SR-IOV guard — abort mode switch when SR-IOV is active
# ==========================================================
# Intel i915-sriov-dkms and AMD MxGPU split a Physical Function (PF) into
# multiple Virtual Functions (VFs). Switching the PF's driver destroys
# every VF; switching a VF's driver affects only that VF. ProxMenux does
# not yet manage the SR-IOV lifecycle (create/destroy VFs, track per-VF
# ownership), so operating on a PF with active VFs — or on a VF itself —
# would leave the user's virtualization stack in an inconsistent state.
# We detect the situation early and hand the user back to the Proxmox
# web UI, which understands VFs as first-class PCI devices.
check_sriov_and_block_if_needed() {
declare -F _pci_sriov_role >/dev/null 2>&1 || return 0
local idx pci role first_word pf_bdf active_count
local -a vf_list=()
local -a pf_list=()
for idx in "${SELECTED_GPU_IDX[@]}"; do
pci="${ALL_GPU_PCIS[$idx]}"
role=$(_pci_sriov_role "$pci")
first_word="${role%% *}"
case "$first_word" in
vf)
pf_bdf="${role#vf }"
vf_list+=("${pci}|${pf_bdf}")
;;
pf-active)
active_count="${role#pf-active }"
pf_list+=("${pci}|${active_count}")
;;
esac
done
[[ ${#vf_list[@]} -eq 0 && ${#pf_list[@]} -eq 0 ]] && return 0
local title msg entry bdf parent cnt
title="$(translate 'SR-IOV Configuration Detected')"
msg="\n"
if [[ ${#vf_list[@]} -gt 0 ]]; then
msg+="$(translate 'The following selected device(s) are SR-IOV Virtual Functions (VFs):')\n\n"
for entry in "${vf_list[@]}"; do
bdf="${entry%%|*}"
parent="${entry#*|}"
msg+="${bdf} $(translate '(parent PF:') ${parent})\n"
done
msg+="\n"
fi
if [[ ${#pf_list[@]} -gt 0 ]]; then
msg+="$(translate 'The following selected device(s) are Physical Functions with active Virtual Functions:')\n\n"
for entry in "${pf_list[@]}"; do
bdf="${entry%%|*}"
cnt="${entry#*|}"
msg+="${bdf}${cnt} $(translate 'active VF(s)')\n"
done
msg+="\n"
fi
msg+="$(translate 'To assign VFs to VMs or LXCs, edit the configuration manually via the Proxmox web interface. The Physical Function will remain bound to the native driver.')"
dialog --backtitle "ProxMenux" \
--title "$title" \
--msgbox "$msg" 20 80
exit 0
}
collect_selected_iommu_ids() {
SELECTED_IOMMU_IDS=()
SELECTED_PCI_SLOTS=()
@@ -766,8 +835,14 @@ apply_lxc_action_for_vm_mode() {
if [[ "${LXC_AFFECTED_RUNNING[$i]}" == "1" ]]; then
msg_info "$(translate 'Stopping LXC') ${ctid}..."
pct stop "$ctid" >>"$LOG_FILE" 2>&1 || true
msg_ok "$(translate 'LXC stopped') ${ctid}" | tee -a "$screen_capture"
# _pmx_stop_lxc: unlock + graceful shutdown with forceStop+timeout,
# fallback to pct stop. Prevents the indefinite hang that raw
# `pct stop` triggers on locked / stuck containers.
if _pmx_stop_lxc "$ctid" "$LOG_FILE"; then
msg_ok "$(translate 'LXC stopped') ${ctid}" | tee -a "$screen_capture"
else
msg_warn "$(translate 'Could not stop LXC') ${ctid}" | tee -a "$screen_capture"
fi
fi
if [[ "$LXC_ACTION" == "keep_gpu_disable_onboot" && "${LXC_AFFECTED_ONBOOT[$i]}" == "1" ]]; then
@@ -879,11 +954,102 @@ apply_vm_action_for_lxc_mode() {
fi
if [[ "$VM_ACTION" == "remove_gpu_keep_onboot" && -f "$conf" ]]; then
# Primary cleanup: strip hostpci lines whose BDF matches any of
# the GPU's selected slots. Matches both the PF function (.0) and
# any sibling audio or HDMI codec that shares the slot (typical
# for discrete NVIDIA/AMD cards where .1 is the HDMI audio).
#
# Precise regex: the slot must be followed by ".<function>" and
# either a delimiter or end-of-line. A looser ".*${slot}" would
# match by pure substring and delete unrelated hostpci entries —
# e.g. slot "00:02" would match inside "0000:02:00.0" (a dGPU at
# 02:00) and wipe both the iGPU and the unrelated dGPU.
local slot
for slot in "${SELECTED_PCI_SLOTS[@]}"; do
sed -i "/^hostpci[0-9]\+:.*${slot}/d" "$conf"
sed -E -i "/^hostpci[0-9]+:[[:space:]]*(0000:)?${slot}\.[0-7]([,[:space:]]|$)/d" "$conf"
done
msg_ok "$(translate 'GPU removed from VM config') ${vmid}" | tee -a "$screen_capture"
# Cascade cleanup: Intel iGPU passthrough typically pairs the GPU
# at 00:02.0 with chipset audio at 00:1f.3, which lives at a
# different slot and therefore survives the sed above. If it
# stays in the VM config after the GPU is gone, the VM either
# fails to start (vfio-pci no longer claims 8086:51c8 after the
# switch-back) or it steals host audio unnecessarily. Enumerate
# orphan audio hostpci entries and ask the user what to do.
if declare -F _vm_list_orphan_audio_hostpci >/dev/null 2>&1; then
local _orphan_audio
_orphan_audio=$(_vm_list_orphan_audio_hostpci "$vmid" "${SELECTED_PCI_SLOTS[0]}")
if [[ -n "$_orphan_audio" ]]; then
local -a _orph_items=()
local _line _o_idx _o_bdf _o_name
while IFS= read -r _line; do
[[ -z "$_line" ]] && continue
_o_idx="${_line%%|*}"
_line="${_line#*|}"
_o_bdf="${_line%%|*}"
_o_name="${_line#*|}"
_orph_items+=("$_o_idx" "${_o_bdf} ${_o_name}" "on")
done <<< "$_orphan_audio"
local _prompt _selected
_prompt="\n$(translate 'The GPU is being detached from VM') \Zb${vmid}\Zn.\n\n"
_prompt+="$(translate 'The VM also has these audio devices assigned via PCI passthrough — typically added together with the GPU. Remove them too?')\n\n"
_prompt+="$(translate '(Checked entries will be removed. Uncheck to keep in VM.)')"
_selected=$(dialog --backtitle "ProxMenux" --colors \
--title "$(translate 'Associated Audio Devices')" \
--checklist "$_prompt" 20 84 "$(( ${#_orph_items[@]} / 3 ))" \
"${_orph_items[@]}" \
2>&1 >/dev/tty) || _selected=""
_selected=$(echo "$_selected" | tr -d '"')
# Cross-reference table so we can recover each selected idx's
# original BDF (we need it for vendor:device lookup below).
declare -A _orphan_bdf_by_idx=()
local _o_line _o_i _o_b
while IFS= read -r _o_line; do
[[ -z "$_o_line" ]] && continue
_o_i="${_o_line%%|*}"
_o_line="${_o_line#*|}"
_o_b="${_o_line%%|*}"
_orphan_bdf_by_idx["$_o_i"]="$_o_b"
done <<< "$_orphan_audio"
local _sel _removed_audio="" _rem_bdf _vd_hex _dd_hex _vd_id
for _sel in $_selected; do
_rem_bdf="${_orphan_bdf_by_idx[$_sel]:-}"
if _vm_remove_hostpci_index "$vmid" "$_sel" "$LOG_FILE"; then
_removed_audio+=" hostpci${_sel}"
# Fix B: if the removed audio BDF is not referenced by any
# OTHER VM, its vendor:device can safely come out of
# /etc/modprobe.d/vfio.conf too. Without this step,
# SELECTED_IOMMU_IDS only held the GPU's own IOMMU group
# (e.g. 8086:46a3 for Intel iGPU) and the companion audio
# id (e.g. 8086:51c8 for chipset audio) survived in
# vfio.conf, so vfio-pci kept claiming it at next boot
# even though nothing used it.
[[ -z "$_rem_bdf" ]] && continue
if ! _pci_bdf_in_any_vm "$_rem_bdf" "${VM_AFFECTED_IDS[@]}"; then
_vd_hex=$(cat "/sys/bus/pci/devices/${_rem_bdf}/vendor" 2>/dev/null | sed 's/^0x//')
_dd_hex=$(cat "/sys/bus/pci/devices/${_rem_bdf}/device" 2>/dev/null | sed 's/^0x//')
if [[ -n "$_vd_hex" && -n "$_dd_hex" ]]; then
_vd_id="${_vd_hex}:${_dd_hex}"
if ! _contains_in_array "$_vd_id" "${SELECTED_IOMMU_IDS[@]}"; then
SELECTED_IOMMU_IDS+=("$_vd_id")
fi
fi
fi
fi
done
unset _orphan_bdf_by_idx
if [[ -n "$_removed_audio" ]]; then
msg_ok "$(translate 'Associated audio removed from VM'): ${_removed_audio# }" \
| tee -a "$screen_capture"
fi
fi
fi
fi
done
}
@@ -1164,6 +1330,7 @@ main() {
detect_host_gpus
while true; do
select_gpus
check_sriov_and_block_if_needed
select_target_mode
[[ $? -eq 2 ]] && continue
validate_vm_mode_blocked_ids

View File

@@ -507,6 +507,67 @@ find_gpu_by_slot() {
return 1
}
# ==========================================================
# SR-IOV guard — abort mode switch when SR-IOV is active
# ==========================================================
# Same policy as the interactive switch_gpu_mode.sh: refuse to operate on
# a Virtual Function or on a Physical Function that already has active
# VFs, since flipping drivers in that state collapses the VF tree and
# breaks every guest that was consuming a VF.
check_sriov_and_block_if_needed() {
declare -F _pci_sriov_role >/dev/null 2>&1 || return 0
local idx pci role first_word pf_bdf active_count
local -a vf_list=()
local -a pf_list=()
for idx in "${SELECTED_GPU_IDX[@]}"; do
pci="${ALL_GPU_PCIS[$idx]}"
role=$(_pci_sriov_role "$pci")
first_word="${role%% *}"
case "$first_word" in
vf)
pf_bdf="${role#vf }"
vf_list+=("${pci}|${pf_bdf}")
;;
pf-active)
active_count="${role#pf-active }"
pf_list+=("${pci}|${active_count}")
;;
esac
done
[[ ${#vf_list[@]} -eq 0 && ${#pf_list[@]} -eq 0 ]] && return 0
local msg entry bdf parent cnt
msg="<div style='color:#f0ad4e;font-weight:bold;margin-bottom:10px;'>$(translate 'SR-IOV Configuration Detected')</div>"
if [[ ${#vf_list[@]} -gt 0 ]]; then
msg+="<p>$(translate 'The following selected device(s) are SR-IOV Virtual Functions (VFs):')</p><ul>"
for entry in "${vf_list[@]}"; do
bdf="${entry%%|*}"
parent="${entry#*|}"
msg+="<li><code>${bdf}</code> &mdash; $(translate 'parent PF:') <code>${parent}</code></li>"
done
msg+="</ul>"
fi
if [[ ${#pf_list[@]} -gt 0 ]]; then
msg+="<p>$(translate 'The following selected device(s) are Physical Functions with active Virtual Functions:')</p><ul>"
for entry in "${pf_list[@]}"; do
bdf="${entry%%|*}"
cnt="${entry#*|}"
msg+="<li><code>${bdf}</code> &mdash; ${cnt} $(translate 'active VF(s)')</li>"
done
msg+="</ul>"
fi
msg+="<p>$(translate 'To assign VFs to VMs or LXCs, edit the configuration manually via the Proxmox web interface. The Physical Function will remain bound to the native driver.')</p>"
hybrid_msgbox "$(translate 'SR-IOV Configuration Detected')" "$msg"
return 1
}
validate_vm_mode_blocked_ids() {
[[ "$TARGET_MODE" != "vm" ]] && return 0
@@ -687,8 +748,14 @@ apply_lxc_action_for_vm_mode() {
if [[ "${LXC_AFFECTED_RUNNING[$i]}" == "1" ]]; then
msg_info "$(translate 'Stopping LXC') ${ctid}..."
pct stop "$ctid" >>"$LOG_FILE" 2>&1 || true
msg_ok "$(translate 'LXC stopped') ${ctid}" | tee -a "$screen_capture"
# _pmx_stop_lxc: unlock + graceful shutdown with forceStop+timeout,
# fallback to pct stop. Prevents the indefinite hang that raw
# `pct stop` triggers on locked / stuck containers.
if _pmx_stop_lxc "$ctid" "$LOG_FILE"; then
msg_ok "$(translate 'LXC stopped') ${ctid}" | tee -a "$screen_capture"
else
msg_warn "$(translate 'Could not stop LXC') ${ctid}" | tee -a "$screen_capture"
fi
fi
if [[ "$LXC_ACTION" == "keep_gpu_disable_onboot" && "${LXC_AFFECTED_ONBOOT[$i]}" == "1" ]]; then
@@ -804,11 +871,67 @@ apply_vm_action_for_lxc_mode() {
fi
if [[ "$VM_ACTION" == "remove_gpu_keep_onboot" && -f "$conf" ]]; then
# Primary cleanup: strip hostpci lines whose BDF matches any of
# the GPU's selected slots. Matches both the PF function (.0) and
# sibling audio/HDMI codecs (.1, typical for discrete cards).
#
# Precise regex: the slot must be followed by ".<function>" and a
# delimiter. Kept in sync with switch_gpu_mode.sh — a looser
# substring match would wipe unrelated hostpci entries (e.g. slot
# "00:02" matching as a substring inside a dGPU BDF 0000:02:00.0).
local slot
for slot in "${SELECTED_PCI_SLOTS[@]}"; do
sed -i "/^hostpci[0-9]\+:.*${slot}/d" "$conf"
sed -E -i "/^hostpci[0-9]+:[[:space:]]*(0000:)?${slot}\.[0-7]([,[:space:]]|$)/d" "$conf"
done
msg_ok "$(translate 'GPU removed from VM config') ${vmid}" | tee -a "$screen_capture"
# Cascade cleanup for the web flow: auto-remove any PCI audio
# hostpci entries at a slot DIFFERENT from the GPU (typical Intel
# iGPU case where 00:1f.3 chipset audio was paired with the iGPU
# at 00:02.0). The helper skips audio devices whose slot already
# has a display sibling in the same VM (HDMI codec of another
# still-present dGPU), so those are not touched. The web runner
# has no good way to render a multi-select checklist, so the
# eligible ones are auto-removed and reported verbatim in the log.
if declare -F _vm_list_orphan_audio_hostpci >/dev/null 2>&1; then
local _orphan_audio _line _o_idx _o_bdf _o_name _removed=""
local _vd_hex _dd_hex _vd_id
_orphan_audio=$(_vm_list_orphan_audio_hostpci "$vmid" "${SELECTED_PCI_SLOTS[0]}")
if [[ -n "$_orphan_audio" ]]; then
while IFS= read -r _line; do
[[ -z "$_line" ]] && continue
_o_idx="${_line%%|*}"
_line="${_line#*|}"
_o_bdf="${_line%%|*}"
_o_name="${_line#*|}"
if _vm_remove_hostpci_index "$vmid" "$_o_idx" "$LOG_FILE"; then
_removed+=" • hostpci${_o_idx}: ${_o_bdf} ${_o_name}\n"
# Fix B: also surface the audio's vendor:device to the
# upcoming vfio.conf cleanup if no other VM still uses
# this BDF. Ensures e.g. 8086:51c8 (Intel chipset audio)
# is stripped from /etc/modprobe.d/vfio.conf when the
# iGPU it was paired with leaves VM mode.
if declare -F _pci_bdf_in_any_vm >/dev/null 2>&1 \
&& ! _pci_bdf_in_any_vm "$_o_bdf" "${VM_AFFECTED_IDS[@]}"; then
_vd_hex=$(cat "/sys/bus/pci/devices/${_o_bdf}/vendor" 2>/dev/null | sed 's/^0x//')
_dd_hex=$(cat "/sys/bus/pci/devices/${_o_bdf}/device" 2>/dev/null | sed 's/^0x//')
if [[ -n "$_vd_hex" && -n "$_dd_hex" ]]; then
_vd_id="${_vd_hex}:${_dd_hex}"
if ! _contains_in_array "$_vd_id" "${SELECTED_IOMMU_IDS[@]}"; then
SELECTED_IOMMU_IDS+=("$_vd_id")
fi
fi
fi
fi
done <<< "$_orphan_audio"
if [[ -n "$_removed" ]]; then
msg_ok "$(translate 'Associated audio removed from VM'): ${vmid}" \
| tee -a "$screen_capture"
echo -e "$_removed" | tee -a "$screen_capture"
fi
fi
fi
fi
done
}
@@ -1147,6 +1270,12 @@ main() {
exit 1
fi
# SR-IOV guard: refuse to toggle the driver on a VF or on a PF with
# active VFs. Manual handling via Proxmox web UI is required.
if ! check_sriov_and_block_if_needed; then
exit 1
fi
# Validate if GPU is blocked for VM mode (certain Intel GPUs)
if ! validate_vm_mode_blocked_ids; then
exit 1