diff --git a/AppImage/components/gpu-switch-mode-indicator.tsx b/AppImage/components/gpu-switch-mode-indicator.tsx index d268a9c4..792a7734 100644 --- a/AppImage/components/gpu-switch-mode-indicator.tsx +++ b/AppImage/components/gpu-switch-mode-indicator.tsx @@ -2,12 +2,20 @@ import { cn } from "@/lib/utils" +interface SriovInfo { + role: "vf" | "pf-active" | "pf-idle" + physfn?: string // VF only: parent PF BDF + vfCount?: number // PF only: active VF count + totalvfs?: number // PF only: maximum VFs +} + interface GpuSwitchModeIndicatorProps { - mode: "lxc" | "vm" | "unknown" + mode: "lxc" | "vm" | "sriov" | "unknown" isEditing?: boolean pendingMode?: "lxc" | "vm" | null onToggle?: (e: React.MouseEvent) => void className?: string + sriovInfo?: SriovInfo } export function GpuSwitchModeIndicator({ @@ -16,20 +24,38 @@ export function GpuSwitchModeIndicator({ pendingMode = null, onToggle, className, + sriovInfo, }: GpuSwitchModeIndicatorProps) { - const displayMode = pendingMode ?? mode + // SR-IOV is a non-editable hardware state. Pending toggles don't apply here. + const displayMode = mode === "sriov" ? "sriov" : (pendingMode ?? mode) const isLxcActive = displayMode === "lxc" const isVmActive = displayMode === "vm" - const hasChanged = pendingMode !== null && pendingMode !== mode + const isSriovActive = displayMode === "sriov" + const hasChanged = + mode !== "sriov" && pendingMode !== null && pendingMode !== mode // Colors - const activeColor = isLxcActive ? "#3b82f6" : isVmActive ? "#a855f7" : "#6b7280" + const sriovColor = "#14b8a6" // teal-500 + const activeColor = isSriovActive + ? sriovColor + : isLxcActive + ? "#3b82f6" + : isVmActive + ? "#a855f7" + : "#6b7280" const inactiveColor = "#374151" // gray-700 for dark theme + const dimmedColor = "#4b5563" // gray-600 for dashed SR-IOV branches const lxcColor = isLxcActive ? "#3b82f6" : inactiveColor const vmColor = isVmActive ? "#a855f7" : inactiveColor const handleClick = (e: React.MouseEvent) => { - // Only stop propagation and handle toggle when in editing mode + // SR-IOV state can't be toggled — swallow the click so it doesn't reach + // the card (which would open the detail modal unexpectedly from this + // area). For lxc/vm, preserve the original behavior. + if (isSriovActive) { + e.stopPropagation() + return + } if (isEditing) { e.stopPropagation() if (onToggle) { @@ -39,11 +65,20 @@ export function GpuSwitchModeIndicator({ // When not editing, let the click propagate to the card to open the modal } + // Build the VF count label shown in the SR-IOV badge. For PFs we know + // exactly how many VFs are active; for a VF we show its parent PF. + const sriovBadgeText = (() => { + if (!isSriovActive) return "" + if (sriovInfo?.role === "vf") return "SR-IOV VF" + if (sriovInfo?.vfCount && sriovInfo.vfCount > 0) return `SR-IOV ×${sriovInfo.vfCount}` + return "SR-IOV" + })() + return ( -
{/* GPU text */} - @@ -115,112 +150,198 @@ export function GpuSwitchModeIndicator({ cx="95" cy="50" r="6" - fill={isEditing ? "#f59e0b" : activeColor} + fill={isEditing && !isSriovActive ? "#f59e0b" : activeColor} className="transition-all duration-300" /> - {/* LXC Branch Line - going up-right */} + {/* LXC Branch Line - going up-right. + In SR-IOV mode the branch is dashed + dimmed to show that the + target is theoretically reachable via a VF but not controlled + by ProxMenux. */} - {/* VM Branch Line - going down-right */} + {/* VM Branch Line - going down-right (dashed/dimmed in SR-IOV). */} - {/* LXC Container Icon - Server/Stack icon */} - - {/* Container box */} - - {/* Container layers/lines */} - - - {/* Status dots */} - - - - + {/* SR-IOV in-line connector + badge (only when mode === 'sriov'). + A horizontal line from the switch node leads to a pill-shaped + badge carrying the "SR-IOV ×N" label. Placed on the GPU's + baseline to visually read as an in-line extension, not as a + third branch. */} + {isSriovActive && ( + <> + + + + {sriovBadgeText} + + + )} + + {/* LXC Container Icon - dimmed/smaller in SR-IOV mode. */} + {!isSriovActive && ( + + + + + + + + + )} + {/* SR-IOV: compact dimmed LXC glyph so the geometry stays recognizable + but it's clearly not the active target. */} + {isSriovActive && ( + + + + + + )} {/* LXC Label */} - - LXC - + {!isSriovActive && ( + + LXC + + )} + {isSriovActive && ( + + LXC + + )} - {/* VM Monitor Icon */} - - {/* Monitor screen */} - - {/* Screen inner/shine */} - - {/* Monitor stand */} - - {/* Monitor base */} - - + {/* VM Monitor Icon - active view */} + {!isSriovActive && ( + + + + + + + )} + {/* SR-IOV: compact dimmed VM monitor glyph, mirror of the LXC glyph. */} + {isSriovActive && ( + + + + + + )} {/* VM Label */} - - VM - + {!isSriovActive && ( + + VM + + )} + {isSriovActive && ( + + VM + + )} {/* Status Text - Large like GPU name */} @@ -228,22 +349,41 @@ export function GpuSwitchModeIndicator({ - {isLxcActive - ? "Ready for LXC containers" - : isVmActive - ? "Ready for VM passthrough" - : "Mode unknown"} + {isSriovActive + ? "SR-IOV active" + : isLxcActive + ? "Ready for LXC containers" + : isVmActive + ? "Ready for VM passthrough" + : "Mode unknown"} - {isLxcActive - ? "Native driver active" - : isVmActive - ? "VFIO-PCI driver active" - : "No driver detected"} + {isSriovActive + ? "Virtual Functions managed externally" + : isLxcActive + ? "Native driver active" + : isVmActive + ? "VFIO-PCI driver active" + : "No driver detected"} + {isSriovActive && sriovInfo && ( + + {sriovInfo.role === "vf" + ? `Virtual Function${sriovInfo.physfn ? ` · parent PF ${sriovInfo.physfn}` : ""}` + : sriovInfo.vfCount !== undefined + ? `1 PF + ${sriovInfo.vfCount} VF${sriovInfo.vfCount === 1 ? "" : "s"}${sriovInfo.totalvfs ? ` / ${sriovInfo.totalvfs} max` : ""}` + : null} + + )} {hasChanged && ( Change pending... diff --git a/AppImage/components/hardware.tsx b/AppImage/components/hardware.tsx index 88c2e26a..21959c37 100644 --- a/AppImage/components/hardware.tsx +++ b/AppImage/components/hardware.tsx @@ -293,11 +293,16 @@ export default function Hardware() { const [showSwitchModeModal, setShowSwitchModeModal] = useState(false) const [switchModeParams, setSwitchModeParams] = useState<{ gpuSlot: string; targetMode: "lxc" | "vm" } | null>(null) - // Determine GPU mode based on driver (vfio-pci = VM, native driver = LXC) - const getGpuSwitchMode = (gpu: GPU): "lxc" | "vm" | "unknown" => { + // Determine GPU mode based on driver (vfio-pci = VM, native driver = LXC). + // SR-IOV short-circuits the driver check: if the GPU is either a VF or a + // PF with active VFs, the slot is in a hardware-partitioned state that + // ProxMenux does not manage from the UI, so it's surfaced as its own mode. + const getGpuSwitchMode = (gpu: GPU): "lxc" | "vm" | "sriov" | "unknown" => { + if (gpu.sriov_role === "vf" || gpu.sriov_role === "pf-active") return "sriov" + const driver = gpu.pci_driver?.toLowerCase() || "" const kernelModule = gpu.pci_kernel_module?.toLowerCase() || "" - + // Check driver first if (driver === "vfio-pci") return "vm" if (driver === "nvidia" || driver === "amdgpu" || driver === "radeon" || driver === "i915" || driver === "xe" || driver === "nouveau" || driver === "mgag200") return "lxc" @@ -940,7 +945,11 @@ return ( Switch Mode
- {editingSwitchModeGpu === fullSlot ? ( + {getGpuSwitchMode(gpu) === "sriov" ? ( + // SR-IOV: edit controls hidden — the state is + // hardware-managed and not togglable from here. + null + ) : editingSwitchModeGpu === fullSlot ? ( <>
)} @@ -1053,8 +1072,104 @@ return (

Loading real-time data...

+ ) : selectedGPU.sriov_role === "vf" ? ( + // SR-IOV Virtual Function: per-VF telemetry is not exposed + // by the kernel, so we skip the metrics panel and show + // identity + consumer + a link back to the parent PF. +
+
+
+
+ + + +
+
+

SR-IOV Virtual Function

+

+ This device is a Virtual Function spawned by a Physical Function. Per-VF + telemetry (temperature, utilization, memory) is not exposed by the kernel — + open the parent PF to see aggregate GPU metrics. +

+
+
+
+ +
+

+ Virtual Function Detail +

+
+ Parent Physical Function + {selectedGPU.sriov_physfn ? ( + + ) : ( + unknown + )} +
+
+ Current Driver + + {selectedGPU.pci_driver || "none"} + +
+
+ Consumer +
+ {realtimeGPUData?.sriov_consumer ? ( + + + {realtimeGPUData.sriov_consumer.type.toUpperCase()} {realtimeGPUData.sriov_consumer.id} + {realtimeGPUData.sriov_consumer.name && ` · ${realtimeGPUData.sriov_consumer.name}`} + {` · ${realtimeGPUData.sriov_consumer.running ? "running" : "stopped"}`} + + ) : ( + unused + )} +
+
+
+
) : realtimeGPUData?.has_monitoring_tool === true ? ( <> + {selectedGPU.sriov_role === "pf-active" && ( + // SR-IOV Physical Function: metrics below are the + // aggregate of the whole GPU (PF + all active VFs). + // Flag it explicitly so the reader interprets numbers + // correctly. +
+
+ + + SR-IOV active + + + Metrics below reflect the Physical Function (aggregate across + {" "} + + {realtimeGPUData?.sriov_vf_count ?? selectedGPU.sriov_vf_count ?? "N"} + + {" "}VFs). + +
+
+ )}
Updating every 3 seconds @@ -1285,6 +1400,67 @@ return (
)} + {selectedGPU.sriov_role === "pf-active" && + Array.isArray(realtimeGPUData?.sriov_vfs) && + realtimeGPUData.sriov_vfs.length > 0 && ( + // Per-VF table: one row per virtfn* under the PF. + // Driver is color-coded (teal native / purple vfio-pci + // / muted fallback) and consumer pills go green when + // the guest is currently running, muted otherwise. +
+

+ Virtual Functions +

+
+ {realtimeGPUData.sriov_vfs.map((vf: any) => ( +
+ {vf.bdf} +
+ + {vf.driver || "unbound"} + + {vf.consumer ? ( + + + {vf.consumer.type.toUpperCase()} {vf.consumer.id} + {vf.consumer.name && ( + · {vf.consumer.name} + )} + + ) : ( + + unused + + )} +
+
+ ))} +
+
+ )} ) : (findPCIDeviceForGPU(selectedGPU)?.driver === 'vfio-pci' || selectedGPU.pci_driver === 'vfio-pci') ? (
diff --git a/AppImage/scripts/flask_server.py b/AppImage/scripts/flask_server.py index 83e01204..7effb6c6 100644 --- a/AppImage/scripts/flask_server.py +++ b/AppImage/scripts/flask_server.py @@ -6151,6 +6151,211 @@ def get_network_hardware_info(pci_slot): return net_info +def _get_sriov_info(slot): + """Return SR-IOV role for a PCI slot via sysfs. + + Reads /sys/bus/pci/devices// for: + - physfn symlink → slot is a Virtual Function; link target is its PF + - sriov_numvfs → active VF count if slot is a Physical Function + - sriov_totalvfs → maximum VFs this PF can spawn + + Returns a dict ready to merge into the GPU object, or {} on any error. + The 'role' key uses the same vocabulary as _pci_sriov_role in the + bash helpers (pci_passthrough_helpers.sh): vf | pf-active | pf-idle | none. + """ + try: + bdf = slot if slot.startswith('0000:') else f'0000:{slot}' + base = f'/sys/bus/pci/devices/{bdf}' + if not os.path.isdir(base): + return {} + + physfn = os.path.join(base, 'physfn') + if os.path.islink(physfn): + parent = os.path.basename(os.path.realpath(physfn)) + return { + 'sriov_role': 'vf', + 'sriov_physfn': parent, + } + + totalvfs_path = os.path.join(base, 'sriov_totalvfs') + if not os.path.isfile(totalvfs_path): + return {'sriov_role': 'none'} + + try: + totalvfs = int((open(totalvfs_path).read() or '0').strip() or 0) + except (ValueError, OSError): + totalvfs = 0 + if totalvfs <= 0: + return {'sriov_role': 'none'} + + try: + numvfs = int((open(os.path.join(base, 'sriov_numvfs')).read() or '0').strip() or 0) + except (ValueError, OSError): + numvfs = 0 + + return { + 'sriov_role': 'pf-active' if numvfs > 0 else 'pf-idle', + 'sriov_vf_count': numvfs, + 'sriov_totalvfs': totalvfs, + } + except Exception: + return {} + + +def _sriov_list_vfs_of_pf(pf_bdf): + """Return sorted list of VF BDFs that belong to a Physical Function. + Reads /sys/bus/pci/devices//virtfn symlinks (one per VF). + """ + try: + pf_full = pf_bdf if pf_bdf.startswith('0000:') else f'0000:{pf_bdf}' + base = f'/sys/bus/pci/devices/{pf_full}' + if not os.path.isdir(base): + return [] + # virtfn links are numbered (virtfn0, virtfn1, ...) and point to the VF. + entries = sorted(glob.glob(f'{base}/virtfn*'), + key=lambda p: int(re.search(r'virtfn(\d+)', p).group(1)) + if re.search(r'virtfn(\d+)', p) else 0) + return [os.path.basename(os.path.realpath(p)) for p in entries] + except Exception: + return [] + + +def _sriov_pci_driver(bdf): + """Return the current driver bound to a PCI BDF, '' if unbound.""" + try: + link = f'/sys/bus/pci/devices/{bdf}/driver' + if os.path.islink(link): + return os.path.basename(os.path.realpath(link)) + except Exception: + pass + return '' + + +def _sriov_pci_render_node(bdf): + """If the device exposes a DRM render node, return '/dev/dri/renderDX'. + LXC containers consume GPUs through these nodes, so this lets us + cross-reference an LXC's `dev: /dev/dri/renderD` config line + back to a specific VF. + """ + try: + drm_dir = f'/sys/bus/pci/devices/{bdf}/drm' + if not os.path.isdir(drm_dir): + return '' + for name in sorted(os.listdir(drm_dir)): + if name.startswith('renderD'): + return f'/dev/dri/{name}' + except Exception: + pass + return '' + + +def _sriov_guest_running(guest_type, gid): + """Best-effort status check. Returns True if running, False otherwise.""" + try: + cmd = ['qm' if guest_type == 'vm' else 'pct', 'status', str(gid)] + r = subprocess.run(cmd, capture_output=True, text=True, timeout=3) + return 'running' in (r.stdout or '').lower() + except Exception: + return False + + +def _sriov_find_guest_consumer(bdf): + """Find the VM or LXC that consumes a given VF (or PF) on the host. + + VMs: scan /etc/pve/qemu-server/*.conf for a `hostpci: ` line that + references the BDF (short or full form, possibly alongside other + ids separated by ';' and trailing options after ','). + LXCs: resolve the BDF to its DRM render node (if any) and scan + /etc/pve/lxc/*.conf for `dev:` or `lxc.mount.entry:` lines that + reference that node. + + Returns {type, id, name, running} or None. + """ + short_bdf = bdf[5:] if bdf.startswith('0000:') else bdf + full_bdf = bdf if bdf.startswith('0000:') else f'0000:{bdf}' + + # ── VM scan ── + try: + for conf in sorted(glob.glob('/etc/pve/qemu-server/*.conf')): + try: + with open(conf, 'r') as f: + text = f.read() + except OSError: + continue + if re.search( + rf'^hostpci\d+:\s*[^\n]*(?:0000:)?{re.escape(short_bdf)}(?:[,;\s]|$)', + text, re.MULTILINE, + ): + vmid = os.path.basename(conf)[:-5] # strip '.conf' + nm = re.search(r'^name:\s*(\S+)', text, re.MULTILINE) + name = nm.group(1) if nm else '' + return { + 'type': 'vm', + 'id': vmid, + 'name': name, + 'running': _sriov_guest_running('vm', vmid), + } + except Exception: + pass + + # ── LXC scan (via render node) ── + render_node = _sriov_pci_render_node(full_bdf) + if render_node: + try: + for conf in sorted(glob.glob('/etc/pve/lxc/*.conf')): + try: + with open(conf, 'r') as f: + text = f.read() + except OSError: + continue + if re.search( + rf'^(?:dev\d+|lxc\.mount\.entry):\s*[^\n]*{re.escape(render_node)}(?:[,;\s]|$)', + text, re.MULTILINE, + ): + ctid = os.path.basename(conf)[:-5] + nm = re.search(r'^hostname:\s*(\S+)', text, re.MULTILINE) + name = nm.group(1) if nm else '' + return { + 'type': 'lxc', + 'id': ctid, + 'name': name, + 'running': _sriov_guest_running('lxc', ctid), + } + except Exception: + pass + + return None + + +def _sriov_enrich_detail(gpu): + """On-demand enrichment for the GPU detail modal. + + For a PF with active VFs, populates gpu['sriov_vfs'] with per-VF driver + and consumer info. For a VF, populates gpu['sriov_consumer'] with the + guest (if any) currently referencing it. Heavier than _get_sriov_info() + because it scans guest configs, so it is NOT called from the hardware + snapshot path — only from the realtime endpoint. + """ + role = gpu.get('sriov_role') + slot = gpu.get('slot', '') + if not slot: + return + full_bdf = slot if slot.startswith('0000:') else f'0000:{slot}' + + if role == 'pf-active': + vf_list = [] + for vf_bdf in _sriov_list_vfs_of_pf(full_bdf): + vf_list.append({ + 'bdf': vf_bdf, + 'driver': _sriov_pci_driver(vf_bdf) or '', + 'render_node': _sriov_pci_render_node(vf_bdf) or '', + 'consumer': _sriov_find_guest_consumer(vf_bdf), + }) + gpu['sriov_vfs'] = vf_list + elif role == 'vf': + gpu['sriov_consumer'] = _sriov_find_guest_consumer(full_bdf) + + def get_gpu_info(): """Detect and return information about GPUs in the system""" gpus = [] @@ -6196,7 +6401,11 @@ def get_gpu_info(): gpu['pci_class'] = pci_info.get('class', '') gpu['pci_driver'] = pci_info.get('driver', '') gpu['pci_kernel_module'] = pci_info.get('kernel_module', '') - + + sriov_fields = _get_sriov_info(slot) + if sriov_fields: + gpu.update(sriov_fields) + # detailed_info = get_detailed_gpu_info(gpu) # Removed this call here # gpu.update(detailed_info) # It will be called later in api_gpu_realtime @@ -10010,7 +10219,12 @@ def api_gpu_realtime(slot): pass detailed_info = get_detailed_gpu_info(gpu) gpu.update(detailed_info) - + + # SR-IOV detail is only relevant when the modal is actually open, + # so we build it on demand here (not in get_gpu_info) to avoid + # scanning every guest config on the hardware snapshot path. + _sriov_enrich_detail(gpu) + # Extract only the monitoring-related fields realtime_data = { 'has_monitoring_tool': gpu.get('has_monitoring_tool', False), @@ -10035,9 +10249,17 @@ def api_gpu_realtime(slot): # Added for NVIDIA/AMD specific engine info if available 'engine_encoder': gpu.get('engine_encoder'), 'engine_decoder': gpu.get('engine_decoder'), - 'driver_version': gpu.get('driver_version') # Added driver_version + 'driver_version': gpu.get('driver_version'), # Added driver_version + # SR-IOV modal detail (populated only when the GPU is an SR-IOV + # Physical Function with active VFs, or a Virtual Function). + 'sriov_role': gpu.get('sriov_role'), + 'sriov_physfn': gpu.get('sriov_physfn'), + 'sriov_vf_count': gpu.get('sriov_vf_count'), + 'sriov_totalvfs': gpu.get('sriov_totalvfs'), + 'sriov_vfs': gpu.get('sriov_vfs'), + 'sriov_consumer': gpu.get('sriov_consumer'), } - + return jsonify(realtime_data) except Exception as e: # print(f"[v0] Error getting real-time GPU data: {e}") diff --git a/AppImage/types/hardware.ts b/AppImage/types/hardware.ts index 751cfe4c..092b1dd1 100644 --- a/AppImage/types/hardware.ts +++ b/AppImage/types/hardware.ts @@ -190,6 +190,34 @@ export interface GPU { }> has_monitoring_tool?: boolean note?: string + // SR-IOV state — populated from sysfs (physfn symlink + sriov_{num,total}vfs). + // "vf" — this slot is a Virtual Function; sriov_physfn is its PF. + // "pf-active" — this slot is a Physical Function with sriov_vf_count > 0. + // "pf-idle" — SR-IOV capable PF but no VFs currently active. + // "none" — not involved in SR-IOV. + sriov_role?: "vf" | "pf-active" | "pf-idle" | "none" + sriov_physfn?: string + sriov_vf_count?: number + sriov_totalvfs?: number + // SR-IOV detail — only populated by the /api/gpu//realtime endpoint + // when the modal is open (scanning guest configs is too expensive for the + // hardware snapshot path). + sriov_vfs?: SriovVfDetail[] // filled when role === "pf-active" + sriov_consumer?: SriovConsumer | null // filled when role === "vf" +} + +export interface SriovVfDetail { + bdf: string // e.g. "0000:00:02.1" + driver: string // current kernel driver (i915, vfio-pci, ...) + render_node: string // "" when the VF does not expose a DRM node + consumer: SriovConsumer | null // which guest is using this VF, if any +} + +export interface SriovConsumer { + type: "vm" | "lxc" + id: string // VMID or CTID + name: string // VM name / LXC hostname + running: boolean } export interface DiskHardwareInfo { diff --git a/scripts/global/gpu_hook_guard_helpers.sh b/scripts/global/gpu_hook_guard_helpers.sh index 22dc36ab..3c54f681 100644 --- a/scripts/global/gpu_hook_guard_helpers.sh +++ b/scripts/global/gpu_hook_guard_helpers.sh @@ -138,6 +138,12 @@ if [[ -f "$vm_conf" ]]; then slot_has_gpu=false for dev in /sys/bus/pci/devices/0000:${slot}.*; do [[ -e "$dev" ]] || continue + # SR-IOV: skip Virtual Functions when iterating a whole slot. + # VFs share the slot with their PF but carry their own driver + # state; their vfio-pci rebind is handled by Proxmox at VM + # start. Pre-flighting them would falsely block SR-IOV setups + # where the PF legitimately stays on the native driver. + [[ -L "${dev}/physfn" ]] && continue class_hex="$(cat "$dev/class" 2>/dev/null | sed 's/^0x//')" [[ "${class_hex:0:2}" != "03" ]] && continue slot_has_gpu=true @@ -159,6 +165,14 @@ if [[ -f "$vm_conf" ]]; then details+=$'\n'"- ${id}: PCI device not found" continue fi + # SR-IOV VF: do not pre-flight the driver. Proxmox rebinds the VF + # to vfio-pci as part of VM start; at pre-start time the VF may + # still be on its native driver (i915, etc.) — that is normal, + # not an error. Blocking here would prevent every SR-IOV VF + # passthrough from starting. + if [[ -L "${dev_path}/physfn" ]]; then + continue + fi class_hex="$(cat "$dev_path/class" 2>/dev/null | sed 's/^0x//')" # Enforce vfio only for display/3D devices (PCI class 03xx). [[ "${class_hex:0:2}" == "03" ]] || continue diff --git a/scripts/global/pci_passthrough_helpers.sh b/scripts/global/pci_passthrough_helpers.sh index fa29ace6..101f0660 100644 --- a/scripts/global/pci_passthrough_helpers.sh +++ b/scripts/global/pci_passthrough_helpers.sh @@ -50,3 +50,109 @@ function _pci_function_assigned_to_vm() { qm config "$vmid" 2>/dev/null | grep -qE "$pattern" } + +# ========================================================== +# SR-IOV detection helpers +# ========================================================== +# A PCI device participates in SR-IOV when either: +# - It is a Physical Function (PF) with one or more active VFs +# → /sys/bus/pci/devices//sriov_numvfs > 0 +# - It is a Virtual Function (VF) spawned by a PF +# → /sys/bus/pci/devices//physfn is a symlink to the PF +# +# These helpers accept a BDF in either "0000:00:02.0" or "00:02.0" form. +# Return 0 on match, non-zero otherwise (shell convention). + +function _pci_normalize_bdf() { + local id="$1" + [[ -z "$id" ]] && return 1 + [[ "$id" =~ ^0000: ]] || id="0000:${id}" + printf '%s\n' "$id" +} + +function _pci_is_vf() { + local id + id=$(_pci_normalize_bdf "$1") || return 1 + [[ -L "/sys/bus/pci/devices/${id}/physfn" ]] +} + +function _pci_get_pf_of_vf() { + local id + id=$(_pci_normalize_bdf "$1") || return 1 + local link="/sys/bus/pci/devices/${id}/physfn" + [[ -L "$link" ]] || return 1 + basename "$(readlink -f "$link")" +} + +function _pci_is_sriov_capable() { + local id total + id=$(_pci_normalize_bdf "$1") || return 1 + total=$(cat "/sys/bus/pci/devices/${id}/sriov_totalvfs" 2>/dev/null) + [[ -n "$total" && "$total" -gt 0 ]] +} + +function _pci_active_vf_count() { + local id num + id=$(_pci_normalize_bdf "$1") || { echo 0; return 1; } + num=$(cat "/sys/bus/pci/devices/${id}/sriov_numvfs" 2>/dev/null) + [[ -n "$num" ]] || num=0 + echo "$num" +} + +function _pci_has_active_vfs() { + local n + n=$(_pci_active_vf_count "$1") + [[ "$n" -gt 0 ]] +} + +# Filter an array (by name) of PCI BDFs in place, removing entries that +# are SR-IOV Virtual Functions or Physical Functions with active VFs — +# i.e. the configurations ProxMenux refuses to operate on today. +# +# Usage: _pci_sriov_filter_array +# Output: one line per removed entry, formatted "BDF|role" where role is +# whatever _pci_sriov_role prints (e.g. "vf 0000:00:02.0" or +# "pf-active 7"). The caller decides how to surface the removals. +# Returns: 0 if the caller should continue (even if some entries were +# filtered); the array mutation happens either way. +function _pci_sriov_filter_array() { + local -n _arr_ref="$1" + local -a _kept=() + local bdf role first + for bdf in "${_arr_ref[@]}"; do + role=$(_pci_sriov_role "$bdf" 2>/dev/null) + first="${role%% *}" + if [[ "$first" == "vf" || "$first" == "pf-active" ]]; then + echo "${bdf}|${role}" + else + _kept+=("$bdf") + fi + done + _arr_ref=("${_kept[@]}") +} + +# Emits a one-line SR-IOV role description for diagnostics/messages. +# Prints one of: +# "pf-active " — PF with N>0 active VFs +# "pf-idle" — SR-IOV capable PF with 0 VFs (benign) +# "vf " — VF (names its parent PF) +# "none" — device not involved in SR-IOV +function _pci_sriov_role() { + local id + id=$(_pci_normalize_bdf "$1") || { echo "none"; return 0; } + if _pci_is_vf "$id"; then + echo "vf $(_pci_get_pf_of_vf "$id")" + return 0 + fi + if _pci_is_sriov_capable "$id"; then + local n + n=$(_pci_active_vf_count "$id") + if [[ "$n" -gt 0 ]]; then + echo "pf-active ${n}" + else + echo "pf-idle" + fi + return 0 + fi + echo "none" +} diff --git a/scripts/gpu_tpu/add_gpu_lxc.sh b/scripts/gpu_tpu/add_gpu_lxc.sh index c765da6b..762c44ae 100644 --- a/scripts/gpu_tpu/add_gpu_lxc.sh +++ b/scripts/gpu_tpu/add_gpu_lxc.sh @@ -28,6 +28,11 @@ NVIDIA_VID_DID="" if [[ -f "$UTILS_FILE" ]]; then source "$UTILS_FILE" fi +if [[ -f "$LOCAL_SCRIPTS/global/pci_passthrough_helpers.sh" ]]; then + source "$LOCAL_SCRIPTS/global/pci_passthrough_helpers.sh" +elif [[ -f "$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)/global/pci_passthrough_helpers.sh" ]]; then + source "$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)/global/pci_passthrough_helpers.sh" +fi if [[ -f "$LOCAL_SCRIPTS/global/gpu_hook_guard_helpers.sh" ]]; then source "$LOCAL_SCRIPTS/global/gpu_hook_guard_helpers.sh" elif [[ -f "$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. && pwd)/global/gpu_hook_guard_helpers.sh" ]]; then @@ -259,6 +264,67 @@ select_container() { # ============================================================ # GPU checklist selection # ============================================================ +# ============================================================ +# SR-IOV guard — refuse to pass an SR-IOV GPU to an LXC via ProxMenux. +# Although the LXC flow does not rewrite vfio.conf/blacklist (so it is +# not destructive like add_gpu_vm.sh), it blindly globs /dev/dri/card* +# and /dev/dri/renderD* without mapping each node to its BDF. With 7 +# VFs the container may end up holding any/all of them, which is not +# the behavior a user asking for "one VF to this LXC" expects. Until a +# VF-aware LXC flow exists, stop and point to manual configuration — +# matching the policy used in switch_gpu_mode.sh and add_gpu_vm.sh. +# ============================================================ +check_sriov_and_block_if_needed() { + declare -F _pci_sriov_role >/dev/null 2>&1 || return 0 + + local gpu_type pci role first_word + local -a offenders=() + + for gpu_type in "${SELECTED_GPUS[@]}"; do + case "$gpu_type" in + intel) pci="$INTEL_PCI" ;; + amd) pci="$AMD_PCI" ;; + nvidia) pci="$NVIDIA_PCI" ;; + *) continue ;; + esac + [[ -n "$pci" ]] || continue + + role=$(_pci_sriov_role "$pci") + first_word="${role%% *}" + case "$first_word" in + vf) + offenders+=("${pci}|vf|${role#vf }") + ;; + pf-active) + offenders+=("${pci}|pf-active|${role#pf-active }") + ;; + esac + done + + [[ ${#offenders[@]} -eq 0 ]] && return 0 + + local msg entry bdf kind info + msg="\n\Zb\Z6$(translate 'SR-IOV Configuration Detected')\Zn\n\n" + for entry in "${offenders[@]}"; do + bdf="${entry%%|*}" + kind="${entry#*|}"; kind="${kind%%|*}" + info="${entry##*|}" + if [[ "$kind" == "vf" ]]; then + msg+=" • \Zb${bdf}\Zn — $(translate 'Virtual Function (parent PF:') ${info})\n" + else + msg+=" • \Zb${bdf}\Zn — $(translate 'Physical Function with') ${info} $(translate 'active VFs')\n" + fi + done + msg+="\n$(translate 'To pass SR-IOV Virtual Functions to a container, edit the LXC configuration manually via the Proxmox web interface. The Physical Function will remain bound to the native driver.')" + + dialog --backtitle "ProxMenux" --colors \ + --title "$(translate 'SR-IOV Configuration Detected')" \ + --msgbox "$msg" 16 82 + + exit 0 +} + + select_gpus() { local gpu_items=() $HAS_INTEL && gpu_items+=("intel" "${INTEL_NAME:-Intel iGPU}" "off") @@ -927,6 +993,7 @@ main() { detect_host_gpus select_container select_gpus + check_sriov_and_block_if_needed check_vfio_switch_mode precheck_existing_lxc_gpu_config diff --git a/scripts/gpu_tpu/add_gpu_vm.sh b/scripts/gpu_tpu/add_gpu_vm.sh index f57360e8..16608435 100644 --- a/scripts/gpu_tpu/add_gpu_vm.sh +++ b/scripts/gpu_tpu/add_gpu_vm.sh @@ -718,6 +718,48 @@ select_gpu() { } +# ========================================================== +# SR-IOV guard — refuse to assign a Virtual Function or a Physical +# Function with active VFs. Matches the policy in switch_gpu_mode.sh: +# writing this GPU's vendor:device to /etc/modprobe.d/vfio.conf would +# let vfio-pci claim the PF at next boot and destroy the whole VF +# tree. ProxMenux does not yet manage SR-IOV lifecycle, so we stop +# before touching vfio.conf / blacklist.conf. +# ========================================================== +check_sriov_and_block_if_needed() { + declare -F _pci_sriov_role >/dev/null 2>&1 || return 0 + [[ -n "$SELECTED_GPU_PCI" ]] || return 0 + + local role first_word detail="" + role=$(_pci_sriov_role "$SELECTED_GPU_PCI") + first_word="${role%% *}" + + case "$first_word" in + vf) + local parent="${role#vf }" + detail="$(translate 'The selected device') \Zb${SELECTED_GPU_PCI}\Zn $(translate 'is an SR-IOV Virtual Function (VF). Its parent Physical Function is') \Zb${parent}\Zn." + ;; + pf-active) + local n="${role#pf-active }" + detail="$(translate 'The selected device') \Zb${SELECTED_GPU_PCI}\Zn $(translate 'is a Physical Function with') \Zb${n}\Zn $(translate 'active Virtual Functions. Changing its driver binding would destroy every VF.')" + ;; + *) + return 0 + ;; + esac + + local msg + msg="\n\Zb\Z6$(translate 'SR-IOV Configuration Detected')\Zn\n\n" + msg+="${detail}\n\n" + msg+="$(translate 'To assign VFs to VMs or LXCs, edit the configuration manually via the Proxmox web interface. The Physical Function will remain bound to the native driver.')" + + _pmx_msgbox "$(translate 'SR-IOV Configuration Detected')" "$msg" 16 82 + + [[ "$WIZARD_CALL" == "true" ]] && _set_wizard_result "cancelled" + exit 0 +} + + # ========================================================== # Phase 1 — Step 4: Single-GPU warning # ========================================================== @@ -1922,6 +1964,7 @@ main() { detect_host_gpus check_iommu_enabled select_gpu + check_sriov_and_block_if_needed warn_single_gpu select_vm ensure_selected_gpu_not_already_in_target_vm diff --git a/scripts/gpu_tpu/switch_gpu_mode.sh b/scripts/gpu_tpu/switch_gpu_mode.sh index 4ac95e23..44ea8928 100644 --- a/scripts/gpu_tpu/switch_gpu_mode.sh +++ b/scripts/gpu_tpu/switch_gpu_mode.sh @@ -624,6 +624,75 @@ select_gpus() { read -ra SELECTED_GPU_IDX <<< "$sel" } +# ========================================================== +# SR-IOV guard — abort mode switch when SR-IOV is active +# ========================================================== +# Intel i915-sriov-dkms and AMD MxGPU split a Physical Function (PF) into +# multiple Virtual Functions (VFs). Switching the PF's driver destroys +# every VF; switching a VF's driver affects only that VF. ProxMenux does +# not yet manage the SR-IOV lifecycle (create/destroy VFs, track per-VF +# ownership), so operating on a PF with active VFs — or on a VF itself — +# would leave the user's virtualization stack in an inconsistent state. +# We detect the situation early and hand the user back to the Proxmox +# web UI, which understands VFs as first-class PCI devices. +check_sriov_and_block_if_needed() { + declare -F _pci_sriov_role >/dev/null 2>&1 || return 0 + + local idx pci role first_word pf_bdf active_count + local -a vf_list=() + local -a pf_list=() + + for idx in "${SELECTED_GPU_IDX[@]}"; do + pci="${ALL_GPU_PCIS[$idx]}" + role=$(_pci_sriov_role "$pci") + first_word="${role%% *}" + case "$first_word" in + vf) + pf_bdf="${role#vf }" + vf_list+=("${pci}|${pf_bdf}") + ;; + pf-active) + active_count="${role#pf-active }" + pf_list+=("${pci}|${active_count}") + ;; + esac + done + + [[ ${#vf_list[@]} -eq 0 && ${#pf_list[@]} -eq 0 ]] && return 0 + + local title msg entry bdf parent cnt + title="$(translate 'SR-IOV Configuration Detected')" + msg="\n" + + if [[ ${#vf_list[@]} -gt 0 ]]; then + msg+="$(translate 'The following selected device(s) are SR-IOV Virtual Functions (VFs):')\n\n" + for entry in "${vf_list[@]}"; do + bdf="${entry%%|*}" + parent="${entry#*|}" + msg+=" • ${bdf} $(translate '(parent PF:') ${parent})\n" + done + msg+="\n" + fi + + if [[ ${#pf_list[@]} -gt 0 ]]; then + msg+="$(translate 'The following selected device(s) are Physical Functions with active Virtual Functions:')\n\n" + for entry in "${pf_list[@]}"; do + bdf="${entry%%|*}" + cnt="${entry#*|}" + msg+=" • ${bdf} — ${cnt} $(translate 'active VF(s)')\n" + done + msg+="\n" + fi + + msg+="$(translate 'To assign VFs to VMs or LXCs, edit the configuration manually via the Proxmox web interface. The Physical Function will remain bound to the native driver.')" + + dialog --backtitle "ProxMenux" \ + --title "$title" \ + --msgbox "$msg" 20 80 + + exit 0 +} + collect_selected_iommu_ids() { SELECTED_IOMMU_IDS=() SELECTED_PCI_SLOTS=() @@ -1164,6 +1233,7 @@ main() { detect_host_gpus while true; do select_gpus + check_sriov_and_block_if_needed select_target_mode [[ $? -eq 2 ]] && continue validate_vm_mode_blocked_ids diff --git a/scripts/gpu_tpu/switch_gpu_mode_direct.sh b/scripts/gpu_tpu/switch_gpu_mode_direct.sh index dfe5536e..abd6456c 100644 --- a/scripts/gpu_tpu/switch_gpu_mode_direct.sh +++ b/scripts/gpu_tpu/switch_gpu_mode_direct.sh @@ -507,6 +507,67 @@ find_gpu_by_slot() { return 1 } +# ========================================================== +# SR-IOV guard — abort mode switch when SR-IOV is active +# ========================================================== +# Same policy as the interactive switch_gpu_mode.sh: refuse to operate on +# a Virtual Function or on a Physical Function that already has active +# VFs, since flipping drivers in that state collapses the VF tree and +# breaks every guest that was consuming a VF. +check_sriov_and_block_if_needed() { + declare -F _pci_sriov_role >/dev/null 2>&1 || return 0 + + local idx pci role first_word pf_bdf active_count + local -a vf_list=() + local -a pf_list=() + + for idx in "${SELECTED_GPU_IDX[@]}"; do + pci="${ALL_GPU_PCIS[$idx]}" + role=$(_pci_sriov_role "$pci") + first_word="${role%% *}" + case "$first_word" in + vf) + pf_bdf="${role#vf }" + vf_list+=("${pci}|${pf_bdf}") + ;; + pf-active) + active_count="${role#pf-active }" + pf_list+=("${pci}|${active_count}") + ;; + esac + done + + [[ ${#vf_list[@]} -eq 0 && ${#pf_list[@]} -eq 0 ]] && return 0 + + local msg entry bdf parent cnt + msg="
$(translate 'SR-IOV Configuration Detected')
" + + if [[ ${#vf_list[@]} -gt 0 ]]; then + msg+="

$(translate 'The following selected device(s) are SR-IOV Virtual Functions (VFs):')

    " + for entry in "${vf_list[@]}"; do + bdf="${entry%%|*}" + parent="${entry#*|}" + msg+="
  • ${bdf} — $(translate 'parent PF:') ${parent}
  • " + done + msg+="
" + fi + + if [[ ${#pf_list[@]} -gt 0 ]]; then + msg+="

$(translate 'The following selected device(s) are Physical Functions with active Virtual Functions:')

    " + for entry in "${pf_list[@]}"; do + bdf="${entry%%|*}" + cnt="${entry#*|}" + msg+="
  • ${bdf} — ${cnt} $(translate 'active VF(s)')
  • " + done + msg+="
" + fi + + msg+="

$(translate 'To assign VFs to VMs or LXCs, edit the configuration manually via the Proxmox web interface. The Physical Function will remain bound to the native driver.')

" + + hybrid_msgbox "$(translate 'SR-IOV Configuration Detected')" "$msg" + return 1 +} + validate_vm_mode_blocked_ids() { [[ "$TARGET_MODE" != "vm" ]] && return 0 @@ -1147,6 +1208,12 @@ main() { exit 1 fi + # SR-IOV guard: refuse to toggle the driver on a VF or on a PF with + # active VFs. Manual handling via Proxmox web UI is required. + if ! check_sriov_and_block_if_needed; then + exit 1 + fi + # Validate if GPU is blocked for VM mode (certain Intel GPUs) if ! validate_vm_mode_blocked_ids; then exit 1 diff --git a/scripts/menus/menu_Helper_Scripts.sh b/scripts/menus/menu_Helper_Scripts.sh index 50ace89e..3bf267d3 100644 --- a/scripts/menus/menu_Helper_Scripts.sh +++ b/scripts/menus/menu_Helper_Scripts.sh @@ -173,13 +173,28 @@ run_script_by_slug() { credentials=$(format_credentials "$first") # Build info message - local msg="\Zb\Z4$(translate "Description"):\Zn\n$desc" - [[ -n "$notes_dialog" ]] && msg+="\n\n\Zb\Z4$(translate "Notes"):\Zn\n$notes_dialog" +local msg="\Zb\Z4$(translate "Description"):\Zn\n$desc" + if [[ -n "$notes" ]]; then + local notes_short="" + local char_count=0 + local max_chars=400 + while IFS= read -r line; do + [[ -z "$line" ]] && continue + char_count=$(( char_count + ${#line} )) + if [[ $char_count -lt $max_chars ]]; then + notes_short+="• $line\n" + else + notes_short+="...\n" + break + fi + done <<< "$notes" + msg+="\n\n\Zb\Z4$(translate "Notes"):\Zn\n$notes_short" + fi [[ -n "$credentials" ]] && msg+="\n\n\Zb\Z4$(translate "Default Credentials"):\Zn\n$credentials" [[ "$port" -gt 0 ]] && msg+="\n\n\Zb\Z4$(translate "Default Port"):\Zn $port" [[ -n "$website" ]] && msg+="\n\Zb\Z4$(translate "Website"):\Zn $website" - msg+="\n\n$(translate "Choose how to run the script:"):" + msg+="\n\n$(translate "Choose how to run the script:")" # Build menu: one or two entries per script_info (GH + optional Mirror) declare -a MENU_OPTS=() @@ -383,7 +398,7 @@ while true; do SELECTED_IDX=$(dialog --backtitle "ProxMenux" \ --title "Proxmox VE Helper-Scripts" \ --menu "$(translate "Select a category or search for scripts:"):" \ - 20 70 14 "${MENU_ITEMS[@]}" 3>&1 1>&2 2>&3) || { + 22 75 15 "${MENU_ITEMS[@]}" 3>&1 1>&2 2>&3) || { dialog --clear --title "ProxMenux" \ --msgbox "\n\n$(translate "Visit the website to discover more scripts, stay updated with the latest updates, and support the project:")\n\nhttps://community-scripts.github.io/ProxmoxVE" 15 70 exec bash "$LOCAL_SCRIPTS/menus/main_menu.sh" @@ -425,7 +440,7 @@ while true; do SCRIPT_INDEX=$(dialog --colors --backtitle "ProxMenux" \ --title "$(translate "Scripts in") ${CATEGORY_NAMES[$SELECTED]}" \ --menu "$(translate "Choose a script to execute:"):" \ - 20 70 14 "${SCRIPTS[@]}" 3>&1 1>&2 2>&3) || break + 22 75 15 "${SCRIPTS[@]}" 3>&1 1>&2 2>&3) || break SCRIPT_SELECTED="${INDEX_TO_SLUG[$SCRIPT_INDEX]}" run_script_by_slug "$SCRIPT_SELECTED" diff --git a/scripts/storage/add_controller_nvme_vm.sh b/scripts/storage/add_controller_nvme_vm.sh index 3b4c6644..3ddf1dfd 100644 --- a/scripts/storage/add_controller_nvme_vm.sh +++ b/scripts/storage/add_controller_nvme_vm.sh @@ -364,6 +364,41 @@ select_controller_nvme() { return 1 fi + # SR-IOV guard: drop VFs / active PFs and inform the user. Same policy + # as add_gpu_vm.sh and the VM creators — refuse to rewrite host VFIO + # config for an SR-IOV device since it would collapse the VF tree. + if declare -F _pci_sriov_filter_array >/dev/null 2>&1; then + local sriov_removed="" + sriov_removed=$(_pci_sriov_filter_array SELECTED_CONTROLLER_PCIS) + if [[ -n "$sriov_removed" ]]; then + local sriov_msg="" + sriov_msg="\n$(translate "The following devices were excluded because they are part of an SR-IOV configuration:")\n" + local entry bdf role first + while IFS= read -r entry; do + [[ -z "$entry" ]] && continue + bdf="${entry%%|*}" + role="${entry#*|}" + first="${role%% *}" + if [[ "$first" == "vf" ]]; then + sriov_msg+="\n • ${bdf} — $(translate "Virtual Function")" + else + sriov_msg+="\n • ${bdf} — $(translate "Physical Function with") ${role#pf-active } $(translate "active VFs")" + fi + done <<< "$sriov_removed" + sriov_msg+="\n\n$(translate "To pass SR-IOV Virtual Functions to a VM, edit the VM configuration manually via the Proxmox web interface.")" + dialog --backtitle "ProxMenux" --colors \ + --title "$(translate "SR-IOV Configuration Detected")" \ + --msgbox "$sriov_msg" 18 82 + fi + + if [[ ${#SELECTED_CONTROLLER_PCIS[@]} -eq 0 ]]; then + dialog --backtitle "ProxMenux" \ + --title "$(translate "Controller + NVMe")" \ + --msgbox "\n$(translate "No eligible controllers remain after SR-IOV filtering.")" 8 70 + return 1 + fi + fi + return 0 } diff --git a/scripts/vm/synology.sh b/scripts/vm/synology.sh index 19984774..10756735 100644 --- a/scripts/vm/synology.sh +++ b/scripts/vm/synology.sh @@ -1255,6 +1255,48 @@ if [[ ${#EFFECTIVE_IMPORT_DISKS[@]} -gt 0 ]]; then done fi +if [[ ${#CONTROLLER_NVME_PCIS[@]} -gt 0 ]]; then + # SR-IOV guard: exclude VFs / active PFs before staging. Mid-flow + # phase-2 output; a whiptail msgbox stops the scrolling so the user + # actually sees which devices were dropped. After the ack, each + # skipped BDF is logged via msg_warn so the action is visible in the + # captured log as well. + if declare -F _pci_sriov_filter_array >/dev/null 2>&1; then + SRIOV_REMOVED=$(_pci_sriov_filter_array CONTROLLER_NVME_PCIS) + if [[ -n "$SRIOV_REMOVED" ]]; then + SRIOV_MSG="" + SRIOV_BDFS=() + SRIOV_NL=$'\n' + SRIOV_MSG="$(translate "The following devices were excluded from Controller/NVMe passthrough because they are part of an SR-IOV configuration:")" + while IFS= read -r SRIOV_ENTRY; do + [[ -z "$SRIOV_ENTRY" ]] && continue + SRIOV_BDF="${SRIOV_ENTRY%%|*}" + SRIOV_ROLE="${SRIOV_ENTRY#*|}" + SRIOV_FIRST="${SRIOV_ROLE%% *}" + SRIOV_BDFS+=("$SRIOV_BDF") + if [[ "$SRIOV_FIRST" == "vf" ]]; then + SRIOV_MSG+="${SRIOV_NL} • ${SRIOV_BDF} — $(translate "Virtual Function")" + else + SRIOV_MSG+="${SRIOV_NL} • ${SRIOV_BDF} — $(translate "Physical Function with") ${SRIOV_ROLE#pf-active } $(translate "active VFs")" + fi + done <<< "$SRIOV_REMOVED" + SRIOV_MSG+="${SRIOV_NL}${SRIOV_NL}$(translate "To pass SR-IOV Virtual Functions to a VM, edit the VM configuration manually via the Proxmox web interface.")" + + whiptail --backtitle "ProxMenux" \ + --title "$(translate "SR-IOV Configuration Detected")" \ + --msgbox "$SRIOV_MSG" 18 82 + + for SRIOV_SKIPPED in "${SRIOV_BDFS[@]}"; do + msg_warn "$(translate "Skipping SR-IOV device"): ${SRIOV_SKIPPED}" + done + fi + fi + + if [[ ${#CONTROLLER_NVME_PCIS[@]} -eq 0 ]]; then + msg_warn "$(translate "No eligible Controller/NVMe devices remain after SR-IOV filtering. Skipping.")" + fi +fi + if [[ ${#CONTROLLER_NVME_PCIS[@]} -gt 0 ]]; then local CONTROLLER_CAN_STAGE=true if declare -F _pci_is_iommu_active >/dev/null 2>&1 && ! _pci_is_iommu_active; then diff --git a/scripts/vm/vm_creator.sh b/scripts/vm/vm_creator.sh index da035e65..3218995a 100644 --- a/scripts/vm/vm_creator.sh +++ b/scripts/vm/vm_creator.sh @@ -468,6 +468,55 @@ fi done fi + if [[ ${#CONTROLLER_NVME_PCIS[@]} -gt 0 ]]; then + # SR-IOV guard: drop Virtual Functions / active-PFs before staging. + # Proxmox's VFIO rebind via qm hostpci would trigger the same VF-tree + # collapse described in the GPU flows, so we exclude them and tell + # the user to manage those passthroughs manually. + # + # UI choice: this runs mid-flow (phase 2 of the wizard, interleaved + # with msg_info/msg_ok output), so a whiptail msgbox is used to force + # the user to acknowledge the exclusion instead of letting the notice + # scroll by with the rest of the processing output. After the user + # clicks OK, a per-device msg_warn is emitted so the skipped BDFs + # remain visible in the captured log. + if declare -F _pci_sriov_filter_array >/dev/null 2>&1; then + local _sriov_removed="" + _sriov_removed=$(_pci_sriov_filter_array CONTROLLER_NVME_PCIS) + if [[ -n "$_sriov_removed" ]]; then + local _sriov_msg="" _entry _bdf _role _first _sb + local -a _sriov_bdfs=() + local _nl=$'\n' + _sriov_msg="$(translate "The following devices were excluded from Controller/NVMe passthrough because they are part of an SR-IOV configuration:")" + while IFS= read -r _entry; do + [[ -z "$_entry" ]] && continue + _bdf="${_entry%%|*}" + _role="${_entry#*|}" + _first="${_role%% *}" + _sriov_bdfs+=("$_bdf") + if [[ "$_first" == "vf" ]]; then + _sriov_msg+="${_nl} • ${_bdf} — $(translate "Virtual Function")" + else + _sriov_msg+="${_nl} • ${_bdf} — $(translate "Physical Function with") ${_role#pf-active } $(translate "active VFs")" + fi + done <<< "$_sriov_removed" + _sriov_msg+="${_nl}${_nl}$(translate "To pass SR-IOV Virtual Functions to a VM, edit the VM configuration manually via the Proxmox web interface.")" + + whiptail --backtitle "ProxMenux" \ + --title "$(translate "SR-IOV Configuration Detected")" \ + --msgbox "$_sriov_msg" 18 82 + + for _sb in "${_sriov_bdfs[@]}"; do + msg_warn "$(translate "Skipping SR-IOV device"): ${_sb}" + done + fi + fi + + if [[ ${#CONTROLLER_NVME_PCIS[@]} -eq 0 ]]; then + msg_warn "$(translate "No eligible Controller/NVMe devices remain after SR-IOV filtering. Skipping.")" + fi + fi + if [[ ${#CONTROLLER_NVME_PCIS[@]} -gt 0 ]]; then local CONTROLLER_CAN_STAGE=true if declare -F _pci_is_iommu_active >/dev/null 2>&1 && ! _pci_is_iommu_active; then diff --git a/scripts/vm/zimaos.sh b/scripts/vm/zimaos.sh index 7ec02c54..ef609381 100644 --- a/scripts/vm/zimaos.sh +++ b/scripts/vm/zimaos.sh @@ -1270,6 +1270,48 @@ function create_vm() { done fi + if [[ ${#CONTROLLER_NVME_PCIS[@]} -gt 0 ]]; then + # SR-IOV guard: mirror of the synology.sh/vm_creator.sh block — + # drop VFs and active-PF devices before staging so Proxmox does + # not collapse the VF tree at VM start. Mid-flow, so the notice + # goes through whiptail (blocking acknowledgment) and each + # skipped BDF is then echoed via msg_warn for the log trail. + if declare -F _pci_sriov_filter_array >/dev/null 2>&1; then + SRIOV_REMOVED=$(_pci_sriov_filter_array CONTROLLER_NVME_PCIS) + if [[ -n "$SRIOV_REMOVED" ]]; then + SRIOV_MSG="" + SRIOV_BDFS=() + SRIOV_NL=$'\n' + SRIOV_MSG="$(translate "The following devices were excluded from Controller/NVMe passthrough because they are part of an SR-IOV configuration:")" + while IFS= read -r SRIOV_ENTRY; do + [[ -z "$SRIOV_ENTRY" ]] && continue + SRIOV_BDF="${SRIOV_ENTRY%%|*}" + SRIOV_ROLE="${SRIOV_ENTRY#*|}" + SRIOV_FIRST="${SRIOV_ROLE%% *}" + SRIOV_BDFS+=("$SRIOV_BDF") + if [[ "$SRIOV_FIRST" == "vf" ]]; then + SRIOV_MSG+="${SRIOV_NL} • ${SRIOV_BDF} — $(translate "Virtual Function")" + else + SRIOV_MSG+="${SRIOV_NL} • ${SRIOV_BDF} — $(translate "Physical Function with") ${SRIOV_ROLE#pf-active } $(translate "active VFs")" + fi + done <<< "$SRIOV_REMOVED" + SRIOV_MSG+="${SRIOV_NL}${SRIOV_NL}$(translate "To pass SR-IOV Virtual Functions to a VM, edit the VM configuration manually via the Proxmox web interface.")" + + whiptail --backtitle "ProxMenux" \ + --title "$(translate "SR-IOV Configuration Detected")" \ + --msgbox "$SRIOV_MSG" 18 82 + + for SRIOV_SKIPPED in "${SRIOV_BDFS[@]}"; do + msg_warn "$(translate "Skipping SR-IOV device"): ${SRIOV_SKIPPED}" + done + fi + fi + + if [[ ${#CONTROLLER_NVME_PCIS[@]} -eq 0 ]]; then + msg_warn "$(translate "No eligible Controller/NVMe devices remain after SR-IOV filtering. Skipping.")" + fi + fi + if [[ ${#CONTROLLER_NVME_PCIS[@]} -gt 0 ]]; then local CONTROLLER_CAN_STAGE=true if declare -F _pci_is_iommu_active >/dev/null 2>&1 && ! _pci_is_iommu_active; then