From c1614e82410f7f475aab304e28918d29110a448b Mon Sep 17 00:00:00 2001 From: MacRimi Date: Mon, 13 Oct 2025 23:50:31 +0200 Subject: [PATCH] Update AppImage --- AppImage/components/storage-metrics.tsx | 276 +++++++++++++++++++++++- AppImage/scripts/flask_server.py | 257 +++++++++++++++------- 2 files changed, 447 insertions(+), 86 deletions(-) diff --git a/AppImage/components/storage-metrics.tsx b/AppImage/components/storage-metrics.tsx index 6cda43c..50d1c11 100644 --- a/AppImage/components/storage-metrics.tsx +++ b/AppImage/components/storage-metrics.tsx @@ -4,7 +4,19 @@ import { useState, useEffect } from "react" import { Card, CardContent, CardHeader, CardTitle } from "./ui/card" import { Progress } from "./ui/progress" import { Badge } from "./ui/badge" -import { HardDrive, Database, Archive, AlertTriangle, CheckCircle, Activity, AlertCircle } from "lucide-react" +import { Button } from "./ui/button" +import { Dialog, DialogContent, DialogDescription, DialogHeader, DialogTitle, DialogTrigger } from "./ui/dialog" +import { + HardDrive, + Database, + Archive, + AlertTriangle, + CheckCircle, + Activity, + AlertCircle, + Info, + Thermometer, +} from "lucide-react" interface StorageData { total: number @@ -23,11 +35,22 @@ interface DiskInfo { usage_percent: number health: string temperature: number + disk_type?: string + percentage_used?: number + ssd_life_left?: number + wear_leveling_count?: number + media_wearout_indicator?: number +} + +interface DiskGroup { + type: string + disks: DiskInfo[] + avgTemp: number + status: "safe" | "warning" | "critical" } const fetchStorageData = async (): Promise => { try { - console.log("[v0] Fetching storage data from Flask server...") const response = await fetch("/api/storage", { method: "GET", headers: { @@ -41,7 +64,6 @@ const fetchStorageData = async (): Promise => { } const data = await response.json() - console.log("[v0] Successfully fetched storage data from Flask:", data) return data } catch (error) { console.error("[v0] Failed to fetch storage data from Flask server:", error) @@ -49,6 +71,146 @@ const fetchStorageData = async (): Promise => { } } +const getTempStatus = (temp: number, diskType: string): "safe" | "warning" | "critical" => { + if (diskType === "HDD") { + if (temp > 55) return "critical" + if (temp > 45) return "warning" + return "safe" + } else if (diskType === "SSD") { + if (temp > 65) return "critical" + if (temp > 55) return "warning" + return "safe" + } else if (diskType === "NVMe") { + if (temp > 70) return "critical" + if (temp > 60) return "warning" + return "safe" + } + // Umbral genérico + if (temp > 70) return "critical" + if (temp > 60) return "warning" + return "safe" +} + +const groupDisksByType = (disks: DiskInfo[]): DiskGroup[] => { + const groups: { [key: string]: DiskInfo[] } = {} + + disks.forEach((disk) => { + const type = disk.disk_type || "Unknown" + if (!groups[type]) { + groups[type] = [] + } + groups[type].push(disk) + }) + + return Object.entries(groups).map(([type, disks]) => { + const temps = disks.map((d) => d.temperature).filter((t) => t > 0) + const avgTemp = temps.length > 0 ? Math.round(temps.reduce((a, b) => a + b, 0) / temps.length) : 0 + + // Determinar el estado más crítico del grupo + let status: "safe" | "warning" | "critical" = "safe" + disks.forEach((disk) => { + const diskStatus = getTempStatus(disk.temperature, type) + if (diskStatus === "critical") status = "critical" + else if (diskStatus === "warning" && status !== "critical") status = "warning" + }) + + return { type, disks, avgTemp, status } + }) +} + +function TemperatureThresholdsModal() { + return ( + + + + + + + Umbrales de temperatura por tipo de disco + + Rangos de temperatura recomendados para cada tipo de dispositivo de almacenamiento + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Tipo de discoTemperatura de operaciónZona seguraZona de advertenciaZona crítica
HDD0°C – 60°C (común: 5–55°C) + + ≤ 45°C + + + + 46 – 55°C + + + + > 55°C + +
SSD0°C – 70°C + + ≤ 55°C + + + + 56 – 65°C + + + + > 65°C + +
NVMe0°C – 70°C + + ≤ 60°C + + + + 61 – 70°C + + + + > 70°C + +
+
+
+
+ ) +} + export function StorageMetrics() { const [storageData, setStorageData] = useState(null) const [loading, setLoading] = useState(true) @@ -105,6 +267,7 @@ export function StorageMetrics() { } const usagePercent = storageData.total > 0 ? (storageData.used / storageData.total) * 100 : 0 + const diskGroups = groupDisksByType(storageData.disks) return (
@@ -173,6 +336,75 @@ export function StorageMetrics() {
+ {diskGroups.length > 0 && ( +
+
+

Temperatura por tipo de disco

+ +
+
+ {diskGroups.map((group) => ( + + + +
+ + {group.type} Temperature +
+ + {group.status === "safe" ? "Seguro" : group.status === "warning" ? "Advertencia" : "Crítico"} + +
+
+ +
+ {group.avgTemp}°C +
+

+ Promedio de {group.disks.length} disco{group.disks.length > 1 ? "s" : ""} +

+
+ {group.disks.map((disk, idx) => ( +
+ {disk.name} + + {disk.temperature}°C + +
+ ))} +
+
+
+ ))} +
+
+ )} + {/* Disk Details */} @@ -191,7 +423,14 @@ export function StorageMetrics() {
-
{disk.name}
+
+ {disk.name} + {disk.disk_type && ( + + {disk.disk_type} + + )} +
{disk.fstype} • {disk.mountpoint}
@@ -208,9 +447,36 @@ export function StorageMetrics() {
Temp
-
{disk.temperature}°C
+
+ {disk.temperature}°C +
+ {(disk.disk_type === "SSD" || disk.disk_type === "NVMe") && disk.ssd_life_left !== undefined && ( +
+
Vida útil
+
= 80 + ? "text-green-500" + : disk.ssd_life_left >= 50 + ? "text-yellow-500" + : "text-red-500" + }`} + > + {disk.ssd_life_left}% +
+
+ )} + 0: + smart_data['disk_type'] = 'HDD' + print(f"[v0] Detected HDD (rotation_rate: {rotation} RPM)") - # Extract SMART status if 'smart_status' in data and 'passed' in data['smart_status']: smart_data['smart_status'] = 'passed' if data['smart_status']['passed'] else 'failed' smart_data['health'] = 'healthy' if data['smart_status']['passed'] else 'critical' print(f"[v0] SMART status: {smart_data['smart_status']}, health: {smart_data['health']}") - # Extract temperature if 'temperature' in data and 'current' in data['temperature']: smart_data['temperature'] = data['temperature']['current'] print(f"[v0] Temperature: {smart_data['temperature']}°C") - # Parse ATA SMART attributes if 'ata_smart_attributes' in data and 'table' in data['ata_smart_attributes']: print(f"[v0] Parsing ATA SMART attributes...") for attr in data['ata_smart_attributes']['table']: attr_id = attr.get('id') + attr_name = attr.get('name', '') raw_value = attr.get('raw', {}).get('value', 0) + normalized_value = attr.get('value', 0) - if attr_id == 9: # Power_On_Hours + if attr_id == 9: smart_data['power_on_hours'] = raw_value print(f"[v0] Power On Hours (ID 9): {raw_value}") - elif attr_id == 12: # Power_Cycle_Count + elif attr_id == 12: smart_data['power_cycles'] = raw_value print(f"[v0] Power Cycles (ID 12): {raw_value}") - elif attr_id == 194: # Temperature_Celsius + elif attr_id == 194: if smart_data['temperature'] == 0: smart_data['temperature'] = raw_value print(f"[v0] Temperature (ID 194): {raw_value}°C") - elif attr_id == 190: # Airflow_Temperature_Cel + elif attr_id == 190: if smart_data['temperature'] == 0: smart_data['temperature'] = raw_value print(f"[v0] Airflow Temperature (ID 190): {raw_value}°C") - elif attr_id == 5: # Reallocated_Sector_Ct + elif attr_id == 5: smart_data['reallocated_sectors'] = raw_value print(f"[v0] Reallocated Sectors (ID 5): {raw_value}") - elif attr_id == 197: # Current_Pending_Sector + elif attr_id == 197: smart_data['pending_sectors'] = raw_value print(f"[v0] Pending Sectors (ID 197): {raw_value}") - elif attr_id == 199: # UDMA_CRC_Error_Count + elif attr_id == 199: smart_data['crc_errors'] = raw_value print(f"[v0] CRC Errors (ID 199): {raw_value}") + elif attr_id == 177: # Wear_Leveling_Count + smart_data['wear_leveling_count'] = normalized_value + smart_data['ssd_life_left'] = normalized_value + print(f"[v0] Wear Leveling Count (ID 177): {normalized_value}%") + elif attr_id == 231: # SSD_Life_Left or Temperature + if 'Life' in attr_name or 'life' in attr_name: + smart_data['ssd_life_left'] = normalized_value + print(f"[v0] SSD Life Left (ID 231): {normalized_value}%") + elif attr_id == 233: # Media_Wearout_Indicator (Intel) + smart_data['media_wearout_indicator'] = normalized_value + smart_data['ssd_life_left'] = normalized_value + print(f"[v0] Media Wearout Indicator (ID 233): {normalized_value}%") + elif attr_id == 202: # Percent_Lifetime_Remain + smart_data['ssd_life_left'] = normalized_value + print(f"[v0] Percent Lifetime Remain (ID 202): {normalized_value}%") - # Parse NVMe SMART data if 'nvme_smart_health_information_log' in data: print(f"[v0] Parsing NVMe SMART data...") nvme_data = data['nvme_smart_health_information_log'] + smart_data['disk_type'] = 'NVMe' + if 'temperature' in nvme_data: smart_data['temperature'] = nvme_data['temperature'] print(f"[v0] NVMe Temperature: {smart_data['temperature']}°C") @@ -776,8 +808,12 @@ def get_smart_data(disk_name): if 'power_cycles' in nvme_data: smart_data['power_cycles'] = nvme_data['power_cycles'] print(f"[v0] NVMe Power Cycles: {smart_data['power_cycles']}") + if 'percentage_used' in nvme_data: + smart_data['percentage_used'] = nvme_data['percentage_used'] + smart_data['ssd_life_left'] = 100 - nvme_data['percentage_used'] + print(f"[v0] NVMe Percentage Used: {smart_data['percentage_used']}%") + print(f"[v0] NVMe Life Left: {smart_data['ssd_life_left']}%") - # If we got good data, break out of the loop if smart_data['model'] != 'Unknown' and smart_data['serial'] != 'Unknown': print(f"[v0] Successfully extracted complete data from JSON (attempt {cmd_index + 1})") break @@ -786,14 +822,12 @@ def get_smart_data(disk_name): print(f"[v0] JSON parse failed: {e}, trying text parsing...") if smart_data['model'] == 'Unknown' or smart_data['serial'] == 'Unknown' or smart_data['temperature'] == 0: - print(f"[v0] Parsing text output (model={smart_data['model']}, serial={smart_data['serial']}, temp={smart_data['temperature']})...") + print(f"[v0] Parsing text output...") output = stdout - # Get basic info for line in output.split('\n'): line = line.strip() - # Model detection if (line.startswith('Device Model:') or line.startswith('Model Number:')) and smart_data['model'] == 'Unknown': smart_data['model'] = line.split(':', 1)[1].strip() print(f"[v0] Found model: {smart_data['model']}") @@ -801,7 +835,6 @@ def get_smart_data(disk_name): smart_data['model'] = line.split(':', 1)[1].strip() print(f"[v0] Found model family: {smart_data['model']}") - # Serial detection elif line.startswith('Serial Number:') and smart_data['serial'] == 'Unknown': smart_data['serial'] = line.split(':', 1)[1].strip() print(f"[v0] Found serial: {smart_data['serial']}") @@ -811,14 +844,15 @@ def get_smart_data(disk_name): if 'rpm' in rate_str.lower(): try: smart_data['rotation_rate'] = int(rate_str.split()[0]) - print(f"[v0] Found rotation rate: {smart_data['rotation_rate']} RPM") + smart_data['disk_type'] = 'HDD' + print(f"[v0] Found HDD rotation rate: {smart_data['rotation_rate']} RPM") except (ValueError, IndexError): pass elif 'Solid State Device' in rate_str: - smart_data['rotation_rate'] = 0 # SSD + smart_data['rotation_rate'] = 0 + smart_data['disk_type'] = 'SSD' print(f"[v0] Found SSD (no rotation)") - # SMART status detection elif 'SMART overall-health self-assessment test result:' in line: if 'PASSED' in line: smart_data['smart_status'] = 'passed' @@ -829,14 +863,12 @@ def get_smart_data(disk_name): smart_data['health'] = 'critical' print(f"[v0] SMART status: FAILED") - # NVMe health elif 'SMART Health Status:' in line: if 'OK' in line: smart_data['smart_status'] = 'passed' smart_data['health'] = 'healthy' print(f"[v0] NVMe Health: OK") - # Temperature detection (various formats) elif 'Current Temperature:' in line and smart_data['temperature'] == 0: try: temp_str = line.split(':')[1].strip().split()[0] @@ -844,8 +876,16 @@ def get_smart_data(disk_name): print(f"[v0] Found temperature: {smart_data['temperature']}°C") except (ValueError, IndexError): pass + + elif 'Percentage Used:' in line: + try: + percentage_str = line.split(':')[1].strip().rstrip('%') + smart_data['percentage_used'] = int(percentage_str) + smart_data['ssd_life_left'] = 100 - smart_data['percentage_used'] + print(f"[v0] NVMe Percentage Used: {smart_data['percentage_used']}%") + except (ValueError, IndexError): + pass - # Parse SMART attributes table in_attributes = False for line in output.split('\n'): line = line.strip() @@ -856,7 +896,6 @@ def get_smart_data(disk_name): continue if in_attributes: - # Stop at empty line or next section if not line or line.startswith('SMART') or line.startswith('==='): in_attributes = False continue @@ -865,40 +904,56 @@ def get_smart_data(disk_name): if len(parts) >= 10: try: attr_id = parts[0] - # Raw value is typically the last column + attr_name = parts[1] + normalized_value = int(parts[3]) raw_value = parts[-1] - # Parse based on attribute ID - if attr_id == '9': # Power On Hours + if attr_id == '9': raw_clean = raw_value.split()[0].replace('h', '').replace(',', '') smart_data['power_on_hours'] = int(raw_clean) print(f"[v0] Power On Hours: {smart_data['power_on_hours']}") - elif attr_id == '12': # Power Cycle Count + elif attr_id == '12': raw_clean = raw_value.split()[0].replace(',', '') smart_data['power_cycles'] = int(raw_clean) print(f"[v0] Power Cycles: {smart_data['power_cycles']}") - elif attr_id == '194' and smart_data['temperature'] == 0: # Temperature + elif attr_id == '194' and smart_data['temperature'] == 0: temp_str = raw_value.split()[0] smart_data['temperature'] = int(temp_str) print(f"[v0] Temperature (attr 194): {smart_data['temperature']}°C") - elif attr_id == '190' and smart_data['temperature'] == 0: # Airflow Temperature + elif attr_id == '190' and smart_data['temperature'] == 0: temp_str = raw_value.split()[0] smart_data['temperature'] = int(temp_str) print(f"[v0] Airflow Temperature (attr 190): {smart_data['temperature']}°C") - elif attr_id == '5': # Reallocated Sectors + elif attr_id == '5': smart_data['reallocated_sectors'] = int(raw_value) print(f"[v0] Reallocated Sectors: {smart_data['reallocated_sectors']}") - elif attr_id == '197': # Pending Sectors + elif attr_id == '197': smart_data['pending_sectors'] = int(raw_value) print(f"[v0] Pending Sectors: {smart_data['pending_sectors']}") - elif attr_id == '199': # CRC Errors + elif attr_id == '199': smart_data['crc_errors'] = int(raw_value) print(f"[v0] CRC Errors: {smart_data['crc_errors']}") + # Parsear atributos de desgaste SSD + elif attr_id == '177': # Wear_Leveling_Count + smart_data['wear_leveling_count'] = normalized_value + smart_data['ssd_life_left'] = normalized_value + print(f"[v0] Wear Leveling Count: {normalized_value}%") + elif attr_id == '231': + if 'Life' in attr_name or 'life' in attr_name: + smart_data['ssd_life_left'] = normalized_value + print(f"[v0] SSD Life Left: {normalized_value}%") + elif attr_id == '233': # Media_Wearout_Indicator + smart_data['media_wearout_indicator'] = normalized_value + smart_data['ssd_life_left'] = normalized_value + print(f"[v0] Media Wearout Indicator: {normalized_value}%") + elif attr_id == '202': # Percent_Lifetime_Remain + smart_data['ssd_life_left'] = normalized_value + print(f"[v0] Percent Lifetime Remain: {normalized_value}%") except (ValueError, IndexError) as e: print(f"[v0] Error parsing attribute line '{line}': {e}") continue - # If we got complete data, break + if smart_data['model'] != 'Unknown' and smart_data['serial'] != 'Unknown': print(f"[v0] Successfully extracted complete data from text output (attempt {cmd_index + 1})") break @@ -919,7 +974,6 @@ def get_smart_data(disk_name): process.kill() continue finally: - # Ensure the process is terminated if it's still running if process and process.poll() is None: try: process.kill() @@ -927,7 +981,6 @@ def get_smart_data(disk_name): except Exception as kill_err: print(f"[v0] Error killing process: {kill_err}") - if smart_data['reallocated_sectors'] > 0 or smart_data['pending_sectors'] > 0: if smart_data['health'] == 'healthy': smart_data['health'] = 'warning' @@ -939,14 +992,39 @@ def get_smart_data(disk_name): smart_data['health'] = 'critical' print(f"[v0] Health: CRITICAL (SMART failed)") - # Temperature-based health (only if we have a valid temperature) if smart_data['health'] == 'healthy' and smart_data['temperature'] > 0: - if smart_data['temperature'] >= 70: - smart_data['health'] = 'critical' - print(f"[v0] Health: CRITICAL (temperature {smart_data['temperature']}°C)") - elif smart_data['temperature'] >= 60: - smart_data['health'] = 'warning' - print(f"[v0] Health: WARNING (temperature {smart_data['temperature']}°C)") + disk_type = smart_data['disk_type'] + temp = smart_data['temperature'] + + if disk_type == 'HDD': + if temp > 55: + smart_data['health'] = 'critical' + print(f"[v0] Health: CRITICAL (HDD temperature {temp}°C > 55°C)") + elif temp > 45: + smart_data['health'] = 'warning' + print(f"[v0] Health: WARNING (HDD temperature {temp}°C > 45°C)") + elif disk_type == 'SSD': + if temp > 65: + smart_data['health'] = 'critical' + print(f"[v0] Health: CRITICAL (SSD temperature {temp}°C > 65°C)") + elif temp > 55: + smart_data['health'] = 'warning' + print(f"[v0] Health: WARNING (SSD temperature {temp}°C > 55°C)") + elif disk_type == 'NVMe': + if temp > 70: + smart_data['health'] = 'critical' + print(f"[v0] Health: CRITICAL (NVMe temperature {temp}°C > 70°C)") + elif temp > 60: + smart_data['health'] = 'warning' + print(f"[v0] Health: WARNING (NVMe temperature {temp}°C > 60°C)") + else: + # Umbral genérico si no se detectó el tipo + if temp >= 70: + smart_data['health'] = 'critical' + print(f"[v0] Health: CRITICAL (temperature {temp}°C)") + elif temp >= 60: + smart_data['health'] = 'warning' + print(f"[v0] Health: WARNING (temperature {temp}°C)") except FileNotFoundError: print(f"[v0] ERROR: smartctl not found - install smartmontools for disk monitoring.") @@ -3419,7 +3497,7 @@ def api_logs(): 'level': level, 'service': log_entry.get('_SYSTEMD_UNIT', log_entry.get('SYSLOG_IDENTIFIER', 'system')), 'message': log_entry.get('MESSAGE', ''), - 'source': 'journalctl', + 'source': 'journal', 'pid': log_entry.get('_PID', ''), 'hostname': log_entry.get('_HOSTNAME', '') }) @@ -3975,6 +4053,23 @@ def api_prometheus(): metrics.append(f'# HELP proxmox_disk_usage_percent Disk usage percentage') metrics.append(f'# TYPE proxmox_disk_usage_percent gauge') metrics.append(f'proxmox_disk_usage_percent{{node="{node}",disk="{disk_name}"}} {disk.get("percent", 0)} {timestamp}') + + # Métricas de SMART para discos SSD/NVMe + if disk.get('disk_type') in ['SSD', 'NVMe']: + if disk.get('ssd_life_left') is not None and disk.get('ssd_life_left') != 100: # Solo agregar si es diferente del default (100) + metrics.append(f'# HELP proxmox_disk_ssd_life_left SSD remaining life percentage') + metrics.append(f'# TYPE proxmox_disk_ssd_life_left gauge') + metrics.append(f'proxmox_disk_ssd_life_left{{node="{node}",disk="{disk_name}"}} {disk["ssd_life_left"]} {timestamp}') + + if disk.get('percentage_used') is not None and disk.get('percentage_used') > 0: + metrics.append(f'# HELP proxmox_disk_nvme_percentage_used NVMe disk percentage used') + metrics.append(f'# TYPE proxmox_disk_nvme_percentage_used gauge') + metrics.append(f'proxmox_disk_nvme_percentage_used{{node="{node}",disk="{disk_name}"}} {disk["percentage_used"]} {timestamp}') + + if disk.get('wear_leveling_count') is not None and disk.get('wear_leveling_count') > 0: + metrics.append(f'# HELP proxmox_disk_ssd_wear_leveling_count SSD wear leveling count percentage') + metrics.append(f'# TYPE proxmox_disk_ssd_wear_leveling_count gauge') + metrics.append(f'proxmox_disk_ssd_wear_leveling_count{{node="{node}",disk="{disk_name}"}} {disk["wear_leveling_count"]} {timestamp}') # Network metrics network_info = get_network_info() @@ -4067,28 +4162,28 @@ def api_prometheus(): # GPU metrics pci_devices = hardware_info.get('pci_devices', []) for device in pci_devices: - if device.get('type') == 'GPU': + if device.get('type') == 'Graphics Card': # Check for Graphics Card type specifically gpu_name = device.get('device', 'unknown').replace(' ', '_') gpu_vendor = device.get('vendor', 'unknown') # GPU Temperature - if device.get('gpu_temperature') is not None: + if device.get('temperature') is not None: metrics.append(f'# HELP proxmox_gpu_temperature_celsius GPU temperature in Celsius') metrics.append(f'# TYPE proxmox_gpu_temperature_celsius gauge') - metrics.append(f'proxmox_gpu_temperature_celsius{{node="{node}",gpu="{gpu_name}",vendor="{gpu_vendor}"}} {device["gpu_temperature"]} {timestamp}') + metrics.append(f'proxmox_gpu_temperature_celsius{{node="{node}",gpu="{gpu_name}",vendor="{gpu_vendor}"}} {device["temperature"]} {timestamp}') # GPU Utilization - if device.get('gpu_utilization') is not None: + if device.get('utilization_gpu') is not None: metrics.append(f'# HELP proxmox_gpu_utilization_percent GPU utilization percentage') metrics.append(f'# TYPE proxmox_gpu_utilization_percent gauge') - metrics.append(f'proxmox_gpu_utilization_percent{{node="{node}",gpu="{gpu_name}",vendor="{gpu_vendor}"}} {device["gpu_utilization"]} {timestamp}') + metrics.append(f'proxmox_gpu_utilization_percent{{node="{node}",gpu="{gpu_name}",vendor="{gpu_vendor}"}} {device["utilization_gpu"]} {timestamp}') # GPU Memory - if device.get('gpu_memory_used') and device.get('gpu_memory_total'): + if device.get('memory_used') and device.get('memory_total'): try: # Extract numeric values from strings like "1024 MiB" - mem_used = float(device['gpu_memory_used'].split()[0]) - mem_total = float(device['gpu_memory_total'].split()[0]) + mem_used = float(device['memory_used'].split()[0]) + mem_total = float(device['memory_total'].split()[0]) mem_used_bytes = mem_used * 1024 * 1024 # Convert MiB to bytes mem_total_bytes = mem_total * 1024 * 1024 @@ -4103,10 +4198,10 @@ def api_prometheus(): pass # GPU Power Draw (NVIDIA only) - if device.get('gpu_power_draw'): + if device.get('power_draw'): try: # Extract numeric value from string like "75.5 W" - power_draw = float(device['gpu_power_draw'].split()[0]) + power_draw = float(device['power_draw'].split()[0]) metrics.append(f'# HELP proxmox_gpu_power_draw_watts GPU power draw in watts') metrics.append(f'# TYPE proxmox_gpu_power_draw_watts gauge') metrics.append(f'proxmox_gpu_power_draw_watts{{node="{node}",gpu="{gpu_name}",vendor="{gpu_vendor}"}} {power_draw} {timestamp}') @@ -4114,20 +4209,20 @@ def api_prometheus(): pass # GPU Clock Speeds (NVIDIA only) - if device.get('gpu_clock_speed'): + if device.get('clock_graphics'): try: # Extract numeric value from string like "1500 MHz" - clock_speed = float(device['gpu_clock_speed'].split()[0]) + clock_speed = float(device['clock_graphics'].split()[0]) metrics.append(f'# HELP proxmox_gpu_clock_speed_mhz GPU clock speed in MHz') metrics.append(f'# TYPE proxmox_gpu_clock_speed_mhz gauge') metrics.append(f'proxmox_gpu_clock_speed_mhz{{node="{node}",gpu="{gpu_name}",vendor="{gpu_vendor}"}} {clock_speed} {timestamp}') except (ValueError, IndexError): pass - if device.get('gpu_memory_clock'): + if device.get('clock_memory'): try: # Extract numeric value from string like "5001 MHz" - mem_clock = float(device['gpu_memory_clock'].split()[0]) + mem_clock = float(device['clock_memory'].split()[0]) metrics.append(f'# HELP proxmox_gpu_memory_clock_mhz GPU memory clock speed in MHz') metrics.append(f'# TYPE proxmox_gpu_memory_clock_mhz gauge') metrics.append(f'proxmox_gpu_memory_clock_mhz{{node="{node}",gpu="{gpu_name}",vendor="{gpu_vendor}"}} {mem_clock} {timestamp}')