From 4fa4bbb08b671fda3bb127cd326f4d45b964fe9c Mon Sep 17 00:00:00 2001 From: MacRimi Date: Fri, 10 Apr 2026 09:51:03 +0200 Subject: [PATCH] update switch_gpu_mode.sh --- scripts/gpu_tpu/add_gpu_lxc.sh | 14 +++++++++ scripts/gpu_tpu/add_gpu_vm.sh | 22 +++++++++++++ scripts/gpu_tpu/switch_gpu_mode.sh | 38 +++++++++++++++++++++++ scripts/gpu_tpu/switch_gpu_mode_direct.sh | 38 +++++++++++++++++++++++ 4 files changed, 112 insertions(+) diff --git a/scripts/gpu_tpu/add_gpu_lxc.sh b/scripts/gpu_tpu/add_gpu_lxc.sh index 278d6ae6..c765da6b 100644 --- a/scripts/gpu_tpu/add_gpu_lxc.sh +++ b/scripts/gpu_tpu/add_gpu_lxc.sh @@ -804,6 +804,20 @@ _remove_gpu_blacklist() { sed -i '/^blacklist nvidia_uvm$/d' "$blacklist_file" sed -i '/^blacklist lbm-nouveau$/d' "$blacklist_file" sed -i '/^options nouveau modeset=0$/d' "$blacklist_file" + + # Remove hard blacklist file created for VFIO mode + local nvidia_blacklist="/etc/modprobe.d/nvidia-blacklist.conf" + if [[ -f "$nvidia_blacklist" ]]; then + rm -f "$nvidia_blacklist" + fi + + # Restore NVIDIA udev rules if they were disabled for VFIO mode + local udev_disabled="/etc/udev/rules.d/70-nvidia.rules.proxmenux-disabled" + local udev_rules="/etc/udev/rules.d/70-nvidia.rules" + if [[ -f "$udev_disabled" ]]; then + mv "$udev_disabled" "$udev_rules" + udevadm control --reload-rules >/dev/null 2>&1 || true + fi ;; amd) sed -i '/^blacklist radeon$/d' "$blacklist_file" diff --git a/scripts/gpu_tpu/add_gpu_vm.sh b/scripts/gpu_tpu/add_gpu_vm.sh index d13bfcbe..8c895b97 100644 --- a/scripts/gpu_tpu/add_gpu_vm.sh +++ b/scripts/gpu_tpu/add_gpu_vm.sh @@ -1543,6 +1543,28 @@ sanitize_nvidia_host_stack_for_vfio() { changed=true fi + # Disable NVIDIA udev rules that trigger nvidia-smi (causes conflict with vfio-pci) + local udev_rules="/etc/udev/rules.d/70-nvidia.rules" + if [[ -f "$udev_rules" ]]; then + mv "$udev_rules" "${udev_rules}.proxmenux-disabled" >>"$LOG_FILE" 2>&1 || true + udevadm control --reload-rules >>"$LOG_FILE" 2>&1 || true + changed=true + fi + + # Create hard blacklist to prevent ANY nvidia module loading (even via modprobe/nvidia-smi) + local nvidia_blacklist="/etc/modprobe.d/nvidia-blacklist.conf" + if [[ ! -f "$nvidia_blacklist" ]]; then + cat > "$nvidia_blacklist" <<'EOF' +# ProxMenux: Hard blacklist to prevent ANY nvidia module loading in VFIO mode +# This prevents nvidia-smi and other tools from triggering module load attempts +install nvidia /bin/false +install nvidia_uvm /bin/false +install nvidia_drm /bin/false +install nvidia_modeset /bin/false +EOF + changed=true + fi + if $changed; then HOST_CONFIG_CHANGED=true msg_ok "$(translate 'NVIDIA host services/autoload disabled for VFIO mode')" | tee -a "$screen_capture" diff --git a/scripts/gpu_tpu/switch_gpu_mode.sh b/scripts/gpu_tpu/switch_gpu_mode.sh index 5d0ec58e..1c869c7c 100644 --- a/scripts/gpu_tpu/switch_gpu_mode.sh +++ b/scripts/gpu_tpu/switch_gpu_mode.sh @@ -280,6 +280,28 @@ _sanitize_nvidia_host_stack_for_vfio() { changed=true fi + # Disable NVIDIA udev rules that trigger nvidia-smi (causes conflict with vfio-pci) + local udev_rules="/etc/udev/rules.d/70-nvidia.rules" + if [[ -f "$udev_rules" ]]; then + mv "$udev_rules" "${udev_rules}.proxmenux-disabled" >>"$LOG_FILE" 2>&1 || true + udevadm control --reload-rules >>"$LOG_FILE" 2>&1 || true + changed=true + fi + + # Create hard blacklist to prevent ANY nvidia module loading (even via modprobe/nvidia-smi) + local nvidia_blacklist="/etc/modprobe.d/nvidia-blacklist.conf" + if [[ ! -f "$nvidia_blacklist" ]]; then + cat > "$nvidia_blacklist" <<'EOF' +# ProxMenux: Hard blacklist to prevent ANY nvidia module loading in VFIO mode +# This prevents nvidia-smi and other tools from triggering module load attempts +install nvidia /bin/false +install nvidia_uvm /bin/false +install nvidia_drm /bin/false +install nvidia_modeset /bin/false +EOF + changed=true + fi + if $changed; then HOST_CONFIG_CHANGED=true msg_ok "$(translate 'NVIDIA host services/autoload disabled for VFIO mode')" | tee -a "$screen_capture" @@ -294,6 +316,22 @@ _restore_nvidia_host_stack_for_lxc() { local disabled_file="/etc/modules-load.d/nvidia-vfio.conf.proxmenux-disabled-vfio" local active_file="/etc/modules-load.d/nvidia-vfio.conf" + # Remove hard blacklist that was preventing nvidia module loading + local nvidia_blacklist="/etc/modprobe.d/nvidia-blacklist.conf" + if [[ -f "$nvidia_blacklist" ]]; then + rm -f "$nvidia_blacklist" >>"$LOG_FILE" 2>&1 || true + changed=true + fi + + # Restore NVIDIA udev rules if they were disabled + local udev_disabled="/etc/udev/rules.d/70-nvidia.rules.proxmenux-disabled" + local udev_rules="/etc/udev/rules.d/70-nvidia.rules" + if [[ -f "$udev_disabled" ]]; then + mv "$udev_disabled" "$udev_rules" >>"$LOG_FILE" 2>&1 || true + udevadm control --reload-rules >>"$LOG_FILE" 2>&1 || true + changed=true + fi + # Restore previous modules-load policy if ProxMenux disabled it in VM mode. if [[ -f "$disabled_file" ]]; then mv "$disabled_file" "$active_file" >>"$LOG_FILE" 2>&1 || true diff --git a/scripts/gpu_tpu/switch_gpu_mode_direct.sh b/scripts/gpu_tpu/switch_gpu_mode_direct.sh index 9f9e773c..36196bc6 100644 --- a/scripts/gpu_tpu/switch_gpu_mode_direct.sh +++ b/scripts/gpu_tpu/switch_gpu_mode_direct.sh @@ -294,6 +294,28 @@ _sanitize_nvidia_host_stack_for_vfio() { changed=true fi + # Disable NVIDIA udev rules that trigger nvidia-smi (causes conflict with vfio-pci) + local udev_rules="/etc/udev/rules.d/70-nvidia.rules" + if [[ -f "$udev_rules" ]]; then + mv "$udev_rules" "${udev_rules}.proxmenux-disabled" >>"$LOG_FILE" 2>&1 || true + udevadm control --reload-rules >>"$LOG_FILE" 2>&1 || true + changed=true + fi + + # Create hard blacklist to prevent ANY nvidia module loading (even via modprobe/nvidia-smi) + local nvidia_blacklist="/etc/modprobe.d/nvidia-blacklist.conf" + if [[ ! -f "$nvidia_blacklist" ]]; then + cat > "$nvidia_blacklist" <<'EOF' +# ProxMenux: Hard blacklist to prevent ANY nvidia module loading in VFIO mode +# This prevents nvidia-smi and other tools from triggering module load attempts +install nvidia /bin/false +install nvidia_uvm /bin/false +install nvidia_drm /bin/false +install nvidia_modeset /bin/false +EOF + changed=true + fi + if $changed; then HOST_CONFIG_CHANGED=true msg_ok "$(translate 'NVIDIA host services/autoload disabled for VFIO mode')" | tee -a "$screen_capture" @@ -308,6 +330,22 @@ _restore_nvidia_host_stack_for_lxc() { local disabled_file="/etc/modules-load.d/nvidia-vfio.conf.proxmenux-disabled-vfio" local active_file="/etc/modules-load.d/nvidia-vfio.conf" + # Remove hard blacklist that was preventing nvidia module loading + local nvidia_blacklist="/etc/modprobe.d/nvidia-blacklist.conf" + if [[ -f "$nvidia_blacklist" ]]; then + rm -f "$nvidia_blacklist" >>"$LOG_FILE" 2>&1 || true + changed=true + fi + + # Restore NVIDIA udev rules if they were disabled + local udev_disabled="/etc/udev/rules.d/70-nvidia.rules.proxmenux-disabled" + local udev_rules="/etc/udev/rules.d/70-nvidia.rules" + if [[ -f "$udev_disabled" ]]; then + mv "$udev_disabled" "$udev_rules" >>"$LOG_FILE" 2>&1 || true + udevadm control --reload-rules >>"$LOG_FILE" 2>&1 || true + changed=true + fi + if [[ -f "$disabled_file" ]]; then mv "$disabled_file" "$active_file" >>"$LOG_FILE" 2>&1 || true changed=true