Update flask_server.py

2026-02-21 18:06:25 +00:00 · 2025-10-13 17:53:34 +02:00
parent 9f2d15e590
commit 602291736f
1 changed files with 73 additions and 3 deletions
--- a/AppImage/scripts/flask_server.py
+++ b/AppImage/scripts/flask_server.py
@@ -1820,7 +1820,7 @@ def get_detailed_gpu_info(gpu):
                # Terminate process
                try:
                    process.terminate()
-                    _, stderr_output = process.communicate(timeout=1) 
+                    _, stderr_output = process.communicate(timeout=0.5) # Use communicate with a smaller timeout
                    if stderr_output:
                        print(f"[v0] intel_gpu_top stderr: {stderr_output}", flush=True)
                except subprocess.TimeoutExpired:
@@ -4042,7 +4042,7 @@ def api_prometheus():
                    metrics.append(f'# TYPE proxmox_vm_memory_max_bytes gauge')
                    metrics.append(f'proxmox_vm_memory_max_bytes{{node="{node}",vmid="{vmid}",name="{vm_name}"}} {vm.get("maxmem", 0)} {timestamp}')
-        # Hardware metrics (temperature, fans, UPS)
+        # Hardware metrics (temperature, fans, UPS, GPU)
        try:
            hardware_info = get_hardware_info()
@@ -4064,6 +4064,76 @@ def api_prometheus():
                    metrics.append(f'# TYPE proxmox_fan_speed_rpm gauge')
                    metrics.append(f'proxmox_fan_speed_rpm{{node="{node}",fan="{fan_name}"}} {fan["speed"]} {timestamp}')
            # GPU metrics
            pci_devices = hardware_info.get('pci_devices', [])
            for device in pci_devices:
                if device.get('type') == 'GPU':
                    gpu_name = device.get('device', 'unknown').replace(' ', '_')
                    gpu_vendor = device.get('vendor', 'unknown')
                    # GPU Temperature
                    if device.get('gpu_temperature') is not None:
                        metrics.append(f'# HELP proxmox_gpu_temperature_celsius GPU temperature in Celsius')
                        metrics.append(f'# TYPE proxmox_gpu_temperature_celsius gauge')
                        metrics.append(f'proxmox_gpu_temperature_celsius{{node="{node}",gpu="{gpu_name}",vendor="{gpu_vendor}"}} {device["gpu_temperature"]} {timestamp}')
                    # GPU Utilization
                    if device.get('gpu_utilization') is not None:
                        metrics.append(f'# HELP proxmox_gpu_utilization_percent GPU utilization percentage')
                        metrics.append(f'# TYPE proxmox_gpu_utilization_percent gauge')
                        metrics.append(f'proxmox_gpu_utilization_percent{{node="{node}",gpu="{gpu_name}",vendor="{gpu_vendor}"}} {device["gpu_utilization"]} {timestamp}')
                    # GPU Memory
                    if device.get('gpu_memory_used') and device.get('gpu_memory_total'):
                        try:
                            # Extract numeric values from strings like "1024 MiB"
                            mem_used = float(device['gpu_memory_used'].split()[0])
                            mem_total = float(device['gpu_memory_total'].split()[0])
                            mem_used_bytes = mem_used * 1024 * 1024  # Convert MiB to bytes
                            mem_total_bytes = mem_total * 1024 * 1024
                            metrics.append(f'# HELP proxmox_gpu_memory_used_bytes GPU memory used in bytes')
                            metrics.append(f'# TYPE proxmox_gpu_memory_used_bytes gauge')
                            metrics.append(f'proxmox_gpu_memory_used_bytes{{node="{node}",gpu="{gpu_name}",vendor="{gpu_vendor}"}} {mem_used_bytes} {timestamp}')
                            metrics.append(f'# HELP proxmox_gpu_memory_total_bytes GPU memory total in bytes')
                            metrics.append(f'# TYPE proxmox_gpu_memory_total_bytes gauge')
                            metrics.append(f'proxmox_gpu_memory_total_bytes{{node="{node}",gpu="{gpu_name}",vendor="{gpu_vendor}"}} {mem_total_bytes} {timestamp}')
                        except (ValueError, IndexError):
                            pass
                    # GPU Power Draw (NVIDIA only)
                    if device.get('gpu_power_draw'):
                        try:
                            # Extract numeric value from string like "75.5 W"
                            power_draw = float(device['gpu_power_draw'].split()[0])
                            metrics.append(f'# HELP proxmox_gpu_power_draw_watts GPU power draw in watts')
                            metrics.append(f'# TYPE proxmox_gpu_power_draw_watts gauge')
                            metrics.append(f'proxmox_gpu_power_draw_watts{{node="{node}",gpu="{gpu_name}",vendor="{gpu_vendor}"}} {power_draw} {timestamp}')
                        except (ValueError, IndexError):
                            pass
                    # GPU Clock Speeds (NVIDIA only)
                    if device.get('gpu_clock_speed'):
                        try:
                            # Extract numeric value from string like "1500 MHz"
                            clock_speed = float(device['gpu_clock_speed'].split()[0])
                            metrics.append(f'# HELP proxmox_gpu_clock_speed_mhz GPU clock speed in MHz')
                            metrics.append(f'# TYPE proxmox_gpu_clock_speed_mhz gauge')
                            metrics.append(f'proxmox_gpu_clock_speed_mhz{{node="{node}",gpu="{gpu_name}",vendor="{gpu_vendor}"}} {clock_speed} {timestamp}')
                        except (ValueError, IndexError):
                            pass
                    if device.get('gpu_memory_clock'):
                        try:
                            # Extract numeric value from string like "5001 MHz"
                            mem_clock = float(device['gpu_memory_clock'].split()[0])
                            metrics.append(f'# HELP proxmox_gpu_memory_clock_mhz GPU memory clock speed in MHz')
                            metrics.append(f'# TYPE proxmox_gpu_memory_clock_mhz gauge')
                            metrics.append(f'proxmox_gpu_memory_clock_mhz{{node="{node}",gpu="{gpu_name}",vendor="{gpu_vendor}"}} {mem_clock} {timestamp}')
                        except (ValueError, IndexError):
                            pass
            # UPS metrics
            ups = hardware_info.get('ups')
            if ups:
@@ -4292,7 +4362,7 @@ def api_gpu_realtime(slot):
 def api_vm_details(vmid):
    """Get detailed information for a specific VM/LXC"""
    try:
-        result = subprocess.run(['pvesh', 'get', f'/cluster/resources', '--type', 'vm', '--output-format', 'json'], 
+        result = subprocess.run(['pvesh', 'get', '/cluster/resources', '--type', 'vm', '--output-format', 'json'], 
                              capture_output=True, text=True, timeout=10)
        if result.returncode == 0: