Update pci_passthrough_helpers.sh

This commit is contained in:
MacRimi
2026-06-12 20:57:45 +02:00
parent 6e1e47d9fd
commit 6ccb54e64a

View File

@@ -380,6 +380,9 @@ function _pci_sriov_role() {
PROXMENUX_VFIO_BIND_STATE="/etc/proxmenux/vfio-bind.bdfs" PROXMENUX_VFIO_BIND_STATE="/etc/proxmenux/vfio-bind.bdfs"
PROXMENUX_VFIO_BIND_UDEV_RULE="/etc/udev/rules.d/10-proxmenux-vfio-bind.rules" PROXMENUX_VFIO_BIND_UDEV_RULE="/etc/udev/rules.d/10-proxmenux-vfio-bind.rules"
# Auto-managed blacklist applied only when *every* NVIDIA GPU on the host
# is in passthrough. Removed when any NVIDIA GPU goes back to the host.
PROXMENUX_NVIDIA_VFIO_BLACKLIST="/etc/modprobe.d/proxmenux-nvidia-vfio-blacklist.conf"
# Legacy artifact paths from a previous attempt — kept here so we can # Legacy artifact paths from a previous attempt — kept here so we can
# remove them when migrating a host that ran the older init-top hook. # remove them when migrating a host that ran the older init-top hook.
PROXMENUX_VFIO_BIND_LEGACY_HOOK="/etc/initramfs-tools/scripts/init-top/proxmenux-vfio-bind" PROXMENUX_VFIO_BIND_LEGACY_HOOK="/etc/initramfs-tools/scripts/init-top/proxmenux-vfio-bind"
@@ -447,6 +450,7 @@ _proxmenux_vfio_bind_add_bdfs() {
done done
if $changed; then if $changed; then
_proxmenux_vfio_bind_write_udev_rule _proxmenux_vfio_bind_write_udev_rule
_proxmenux_nvidia_vfio_blacklist_sync || true
[[ -n "${HOST_CONFIG_CHANGED+x}" ]] && HOST_CONFIG_CHANGED=true [[ -n "${HOST_CONFIG_CHANGED+x}" ]] && HOST_CONFIG_CHANGED=true
fi fi
} }
@@ -471,6 +475,7 @@ _proxmenux_vfio_bind_remove_bdfs() {
if ! cmp -s "$tmp" "$PROXMENUX_VFIO_BIND_STATE"; then if ! cmp -s "$tmp" "$PROXMENUX_VFIO_BIND_STATE"; then
mv "$tmp" "$PROXMENUX_VFIO_BIND_STATE" mv "$tmp" "$PROXMENUX_VFIO_BIND_STATE"
_proxmenux_vfio_bind_write_udev_rule _proxmenux_vfio_bind_write_udev_rule
_proxmenux_nvidia_vfio_blacklist_sync || true
[[ -n "${HOST_CONFIG_CHANGED+x}" ]] && HOST_CONFIG_CHANGED=true [[ -n "${HOST_CONFIG_CHANGED+x}" ]] && HOST_CONFIG_CHANGED=true
# If empty, remove state file too (keeps host clean) # If empty, remove state file too (keeps host clean)
[[ ! -s "$PROXMENUX_VFIO_BIND_STATE" ]] && rm -f "$PROXMENUX_VFIO_BIND_STATE" [[ ! -s "$PROXMENUX_VFIO_BIND_STATE" ]] && rm -f "$PROXMENUX_VFIO_BIND_STATE"
@@ -479,6 +484,70 @@ _proxmenux_vfio_bind_remove_bdfs() {
fi fi
} }
# Returns 0 if every NVIDIA GPU on this host is registered for VFIO
# passthrough, 1 otherwise. Used to decide whether a global nvidia
# module blacklist is safe (mono-GPU host or all-passthrough case)
# or whether the host still needs the nvidia driver loaded for at
# least one GPU (multi-GPU mixed case).
_proxmenux_all_nvidia_in_vfio() {
local -a host_nvidia=() vfio_nvidia=()
local d cls vendor
for d in /sys/bus/pci/devices/*; do
vendor=$(cat "$d/vendor" 2>/dev/null)
[[ "$vendor" != "0x10de" ]] && continue
cls=$(cat "$d/class" 2>/dev/null)
case "$cls" in
0x0300*|0x0302*) host_nvidia+=("$(basename "$d")") ;;
esac
done
(( ${#host_nvidia[@]} == 0 )) && return 1
if [[ -f "$PROXMENUX_VFIO_BIND_STATE" ]]; then
local bdf full
while IFS= read -r bdf; do
[[ -z "$bdf" ]] && continue
case "$bdf" in \#*) continue ;; esac
full="$bdf"
[[ "$full" != 0000:* ]] && full="0000:${full}"
vendor=$(cat "/sys/bus/pci/devices/${full}/vendor" 2>/dev/null)
[[ "$vendor" != "0x10de" ]] && continue
cls=$(cat "/sys/bus/pci/devices/${full}/class" 2>/dev/null)
case "$cls" in
0x0300*|0x0302*) vfio_nvidia+=("$full") ;;
esac
done < "$PROXMENUX_VFIO_BIND_STATE"
fi
(( ${#vfio_nvidia[@]} >= ${#host_nvidia[@]} ))
}
# Apply or remove the auto-managed nvidia blacklist based on whether
# every host NVIDIA GPU is in VFIO passthrough. Returns 0 if the
# blacklist file changed (caller may want to rebuild initramfs).
_proxmenux_nvidia_vfio_blacklist_sync() {
if _proxmenux_all_nvidia_in_vfio; then
if [[ ! -f "$PROXMENUX_NVIDIA_VFIO_BLACKLIST" ]]; then
cat > "$PROXMENUX_NVIDIA_VFIO_BLACKLIST" <<'EOF'
# ProxMenux: every NVIDIA GPU on this host is in VFIO passthrough.
# Block the nvidia module so it doesn't loop trying to claim devices
# already owned by vfio-pci. Removed automatically when any GPU
# returns to the host.
blacklist nvidia
blacklist nvidia_drm
blacklist nvidia_modeset
blacklist nvidia_uvm
EOF
return 0
fi
else
if [[ -f "$PROXMENUX_NVIDIA_VFIO_BLACKLIST" ]]; then
rm -f "$PROXMENUX_NVIDIA_VFIO_BLACKLIST"
return 0
fi
fi
return 1
}
# Returns the BDF of a PCI bridge sharing the IOMMU group of $1, if any. # Returns the BDF of a PCI bridge sharing the IOMMU group of $1, if any.
# The kernel refuses to bind vfio-pci to root ports, so when a GPU shares # The kernel refuses to bind vfio-pci to root ports, so when a GPU shares
# its IOMMU group with the upstream root port the VFIO setup silently # its IOMMU group with the upstream root port the VFIO setup silently