Update flask_server.py

This commit is contained in:
MacRimi
2025-10-10 12:29:47 +02:00
parent fa34e081cc
commit b249d37bab

View File

@@ -1701,6 +1701,16 @@ def get_detailed_gpu_info(gpu):
'engine_video_enhance': None 'engine_video_enhance': None
} }
if 'amd' in vendor or 'ati' in vendor:
print(f"[v0] AMD GPU detected, checking for amdgpu_top...", flush=True)
amd_data = get_amd_gpu_realtime_data(slot)
if amd_data:
detailed_info.update(amd_data)
return detailed_info
else:
print(f"[v0] AMD GPU monitoring failed or amdgpu_top not available", flush=True)
return detailed_info
# Intel GPU monitoring with intel_gpu_top # Intel GPU monitoring with intel_gpu_top
if 'intel' in vendor: if 'intel' in vendor:
print(f"[v0] Intel GPU detected, checking for intel_gpu_top...", flush=True) print(f"[v0] Intel GPU detected, checking for intel_gpu_top...", flush=True)
@@ -2082,12 +2092,17 @@ def get_detailed_gpu_info(gpu):
# AMD GPU monitoring (placeholder, requires radeontop or similar) # AMD GPU monitoring (placeholder, requires radeontop or similar)
elif 'amd' in vendor: elif 'amd' in vendor:
print(f"[v0] AMD GPU detected. Monitoring tools like radeontop are needed for detailed info.", flush=True) print(f"[v0] AMD GPU detected. Monitoring tools like radeontop are needed for detailed info.", flush=True)
if shutil.which('radeontop'): if shutil.which('amdgpu_top'):
print(f"[v0] radeontop found, but integration is not yet implemented.", flush=True) print(f"[v0] amdgpu_top found, attempting to get real-time data...", flush=True)
# Attempt to get data using the dedicated function
amd_data = get_amd_gpu_realtime_data(slot)
if amd_data:
detailed_info.update(amd_data) # Update detailed_info with AMD data
detailed_info['has_monitoring_tool'] = True # Mark as tool found and used
else: else:
print(f"[v0] radeontop not found in PATH.", flush=True) print(f"[v0] amdgpu_top not found in PATH.", flush=True)
# Placeholder: return basic info if available from lspci or sensors # Placeholder: return basic info if available from lspci or sensors
# No detailed monitoring implemented yet for AMD # No detailed monitoring implemented yet for AMD (beyond amdgpu_top)
else: else:
print(f"[v0] Unsupported GPU vendor: {vendor}", flush=True) print(f"[v0] Unsupported GPU vendor: {vendor}", flush=True)
@@ -2095,7 +2110,6 @@ def get_detailed_gpu_info(gpu):
print(f"[v0] ===== Exiting get_detailed_gpu_info for GPU {slot} =====", flush=True) print(f"[v0] ===== Exiting get_detailed_gpu_info for GPU {slot} =====", flush=True)
return detailed_info return detailed_info
def get_pci_device_info(pci_slot): def get_pci_device_info(pci_slot):
"""Get detailed PCI device information for a given slot""" """Get detailed PCI device information for a given slot"""
pci_info = {} pci_info = {}
@@ -2195,112 +2209,6 @@ def get_network_hardware_info(pci_slot):
return net_info return net_info
def get_gpu_info():
"""Detect and return information about GPUs in the system"""
gpus = []
try:
result = subprocess.run(['lspci'], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
for line in result.stdout.split('\n'):
# Match VGA, 3D, Display controllers
if any(keyword in line for keyword in ['VGA compatible controller', '3D controller', 'Display controller']):
parts = line.split(' ', 1)
if len(parts) >= 2:
slot = parts[0].strip()
remaining = parts[1]
if ':' in remaining:
class_and_name = remaining.split(':', 1)
gpu_name = class_and_name[1].strip() if len(class_and_name) > 1 else remaining.strip()
else:
gpu_name = remaining.strip()
# Determine vendor
vendor = 'Unknown'
if 'NVIDIA' in gpu_name or 'nVidia' in gpu_name:
vendor = 'NVIDIA'
elif 'AMD' in gpu_name or 'ATI' in gpu_name or 'Radeon' in gpu_name:
vendor = 'AMD'
elif 'Intel' in gpu_name:
vendor = 'Intel'
gpu = {
'slot': slot,
'name': gpu_name,
'vendor': vendor,
'type': 'Discrete' if vendor in ['NVIDIA', 'AMD'] else 'Integrated'
}
pci_info = get_pci_device_info(slot)
if pci_info:
gpu['pci_class'] = pci_info.get('class', '')
gpu['pci_driver'] = pci_info.get('driver', '')
gpu['pci_kernel_module'] = pci_info.get('kernel_module', '')
# detailed_info = get_detailed_gpu_info(gpu) # Removed this call here
# gpu.update(detailed_info) # It will be called later in api_gpu_realtime
gpus.append(gpu)
print(f"[v0] Found GPU: {gpu_name} ({vendor}) at slot {slot}")
except Exception as e:
print(f"[v0] Error detecting GPUs from lspci: {e}")
try:
result = subprocess.run(['sensors'], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
current_adapter = None
for line in result.stdout.split('\n'):
line = line.strip()
if not line:
continue
# Detect adapter line
if line.startswith('Adapter:'):
current_adapter = line.replace('Adapter:', '').strip()
continue
# Look for GPU-related sensors (nouveau, amdgpu, radeon, i915)
if ':' in line and not line.startswith(' '):
parts = line.split(':', 1)
sensor_name = parts[0].strip()
value_part = parts[1].strip()
# Check if this is a GPU sensor
gpu_sensor_keywords = ['nouveau', 'amdgpu', 'radeon', 'i915']
is_gpu_sensor = any(keyword in current_adapter.lower() if current_adapter else False for keyword in gpu_sensor_keywords)
if is_gpu_sensor:
# Try to match this sensor to a GPU
for gpu in gpus:
# Match nouveau to NVIDIA, amdgpu/radeon to AMD, i915 to Intel
if (('nouveau' in current_adapter.lower() and gpu['vendor'] == 'NVIDIA') or
(('amdgpu' in current_adapter.lower() or 'radeon' in current_adapter.lower()) and gpu['vendor'] == 'AMD') or
('i915' in current_adapter.lower() and gpu['vendor'] == 'Intel')):
# Parse temperature (only if not already set by nvidia-smi)
if 'temperature' not in gpu or gpu['temperature'] is None:
if '°C' in value_part or 'C' in value_part:
temp_match = re.search(r'([+-]?[\d.]+)\s*°?C', value_part)
if temp_match:
gpu['temperature'] = float(temp_match.group(1))
print(f"[v0] GPU {gpu['name']}: Temperature = {gpu['temperature']}°C")
# Parse fan speed
elif 'RPM' in value_part:
rpm_match = re.search(r'([\d.]+)\s*RPM', value_part)
if rpm_match:
gpu['fan_speed'] = int(float(rpm_match.group(1)))
gpu['fan_unit'] = 'RPM'
print(f"[v0] GPU {gpu['name']}: Fan = {gpu['fan_speed']} RPM")
except Exception as e:
print(f"[v0] Error enriching GPU data from sensors: {e}")
return gpus
def get_disk_hardware_info(disk_name): def get_disk_hardware_info(disk_name):
"""Get detailed hardware information for a disk""" """Get detailed hardware information for a disk"""
disk_info = {} disk_info = {}
@@ -2798,6 +2706,273 @@ def get_hardware_info():
traceback.print_exc() traceback.print_exc()
return {} return {}
# Define the AMD specific function before it's called in get_detailed_gpu_info
def get_amd_gpu_realtime_data(slot):
"""Get real-time monitoring data for AMD GPU using amdgpu_top"""
try:
print(f"[v0] Getting AMD GPU data for slot {slot}...", flush=True)
# Check if amdgpu_top is available
amdgpu_top_path = shutil.which('amdgpu_top')
if not amdgpu_top_path:
print(f"[v0] amdgpu_top not found in PATH", flush=True)
return None
print(f"[v0] Found amdgpu_top at: {amdgpu_top_path}", flush=True)
# Execute amdgpu_top --json -n 1 for a single snapshot
cmd = [amdgpu_top_path, '--json', '-n', '1']
print(f"[v0] Executing: {' '.join(cmd)}", flush=True)
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=5
)
if result.returncode != 0:
print(f"[v0] amdgpu_top failed with code {result.returncode}", flush=True)
print(f"[v0] stderr: {result.stderr}", flush=True)
return None
# Parse JSON output
try:
data = json.loads(result.stdout)
print(f"[v0] Successfully parsed amdgpu_top JSON output", flush=True)
detailed_info = {
'has_monitoring_tool': True,
'temperature': None,
'fan_speed': None,
'fan_unit': None,
'utilization_gpu': None,
'utilization_memory': None,
'memory_used': None,
'memory_total': None,
'memory_free': None,
'power_draw': None,
'power_limit': None,
'clock_graphics': None,
'clock_memory': None,
'processes': [],
'engine_render': None,
'engine_blitter': None,
'engine_video': None,
'engine_video_enhance': None
}
# Parse sensors data
if 'sensors' in data:
sensors = data['sensors']
# Temperature (Edge Temperature or GFX)
if 'Edge Temperature' in sensors:
temp_str = sensors['Edge Temperature']
# Format: "49 C" or similar
temp_value = temp_str.split()[0]
detailed_info['temperature'] = f"{temp_value}°C"
elif 'GFX' in sensors:
temp_str = sensors['GFX']
temp_value = temp_str.split()[0]
detailed_info['temperature'] = f"{temp_value}°C"
# Power (GPU Power or GFX Power)
if 'GPU Power' in sensors:
power_str = sensors['GPU Power']
# Format: "18 W (Input)" or similar
power_value = power_str.split()[0]
detailed_info['power_draw'] = f"{power_value} W"
elif 'GFX Power' in sensors:
power_str = sensors['GFX Power']
power_value = power_str.split()[0]
detailed_info['power_draw'] = f"{power_value} W"
# Clock frequencies
if 'GFX_SCLK' in sensors:
clock_str = sensors['GFX_SCLK']
# Format: "200 MHz" or similar
clock_value = clock_str.split()[0]
detailed_info['clock_graphics'] = f"{clock_value} MHz"
if 'GFX_MCLK' in sensors:
clock_str = sensors['GFX_MCLK']
clock_value = clock_str.split()[0]
detailed_info['clock_memory'] = f"{clock_value} MHz"
# Parse VRAM usage
if 'VRAM' in data:
vram = data['VRAM']
if 'Total VRAM Usage' in vram:
usage_str = vram['Total VRAM Usage']
# Format: "375 / 512 MiB" or similar
parts = usage_str.split('/')
if len(parts) == 2:
used = parts[0].strip().split()[0]
total = parts[1].strip().split()[0]
detailed_info['memory_used'] = f"{used} MiB"
detailed_info['memory_total'] = f"{total} MiB"
# Calculate memory utilization percentage
try:
used_val = float(used)
total_val = float(total)
mem_util = (used_val / total_val) * 100
detailed_info['utilization_memory'] = f"{mem_util:.1f}%"
except:
pass
# Parse activity (GPU utilization)
if 'Activity' in data:
activity = data['Activity']
# GFX activity (main GPU utilization)
if 'GFX' in activity:
gfx_str = activity['GFX']
# Format: "15 %" or similar
gfx_value = gfx_str.split()[0]
detailed_info['utilization_gpu'] = f"{gfx_value}%"
detailed_info['engine_render'] = f"{gfx_value}%"
# Media activity (video encoding/decoding)
if 'Media' in activity:
media_str = activity['Media']
media_value = media_str.split()[0]
detailed_info['engine_video'] = f"{media_value}%"
# Parse GRBM (Graphics Register Bus Manager) for more detailed engine utilization
if 'GRBM' in data:
grbm = data['GRBM']
# Graphics Pipe -> Render/3D
if 'Graphics Pipe' in grbm:
pipe_str = grbm['Graphics Pipe']
pipe_value = pipe_str.split()[0]
# Use the higher value between Activity.GFX and GRBM.Graphics Pipe
if detailed_info['engine_render']:
current_val = float(detailed_info['engine_render'].rstrip('%'))
new_val = float(pipe_value)
if new_val > current_val:
detailed_info['engine_render'] = f"{pipe_value}%"
else:
detailed_info['engine_render'] = f"{pipe_value}%"
# Texture Pipe could be mapped to Blitter (texture operations)
if 'Texture Pipe' in grbm:
texture_str = grbm['Texture Pipe']
texture_value = texture_str.split()[0]
detailed_info['engine_blitter'] = f"{texture_value}%"
# Parse GRBM2 for additional engine data
if 'GRBM2' in data:
grbm2 = data['GRBM2']
# Command Processor - Graphics could enhance Render/3D data
if 'Command Processor - Graphics' in grbm2:
cp_str = grbm2['Command Processor - Graphics']
cp_value = cp_str.split()[0]
# Use as additional indicator for render activity
if not detailed_info['engine_render'] or detailed_info['engine_render'] == '0.0%':
detailed_info['engine_render'] = f"{cp_value}%"
# Parse fdinfo (process information)
if 'fdinfo' in data:
fdinfo = data['fdinfo']
processes = []
for proc_data in fdinfo:
process_info = {
'name': proc_data.get('name', 'Unknown'),
'pid': str(proc_data.get('pid', 'Unknown')),
'memory': {
'total': 0,
'shared': 0,
'resident': 0
},
'engines': {}
}
# Parse memory usage (VRAM and GTT)
if 'VRAM' in proc_data:
vram_str = proc_data['VRAM']
# Format: "354M" or "354 MiB"
vram_value = vram_str.replace('M', '').replace('MiB', '').strip()
try:
# Convert to KB for consistency with Intel
vram_kb = int(float(vram_value) * 1024)
process_info['memory']['total'] = vram_kb
except:
pass
if 'GTT' in proc_data:
gtt_str = proc_data['GTT']
gtt_value = gtt_str.replace('M', '').replace('MiB', '').strip()
try:
gtt_kb = int(float(gtt_value) * 1024)
process_info['memory']['resident'] = gtt_kb
except:
pass
# Parse engine utilization for this process
# Map AMD engine names to Intel-like names for consistency
engine_mapping = {
'GFX': 'Render/3D',
'Compute': 'Render/3D', # Compute also maps to Render/3D
'DMA': 'Blitter',
'DEC': 'Video', # Decode
'ENC': 'Video', # Encode
'VCN': 'Video', # Video Core Next
'UVD': 'Video', # Unified Video Decoder
'VCE': 'Video' # Video Compression Engine
}
for amd_engine, intel_engine in engine_mapping.items():
if amd_engine in proc_data:
value_str = proc_data[amd_engine]
# Format: "15%" or "15 %"
value = value_str.replace('%', '').strip()
try:
value_float = float(value)
if value_float > 0:
# If engine already exists, add to it (for DEC+ENC -> Video)
if intel_engine in process_info['engines']:
existing = float(process_info['engines'][intel_engine].rstrip('%'))
process_info['engines'][intel_engine] = f"{existing + value_float:.1f}%"
else:
process_info['engines'][intel_engine] = f"{value_float:.1f}%"
except:
pass
# Only add process if it has some GPU activity
if process_info['engines']:
processes.append(process_info)
print(f"[v0] Found AMD process: {process_info['name']} (PID: {process_info['pid']}) with {len(process_info['engines'])} active engines", flush=True)
detailed_info['processes'] = processes
print(f"[v0] Total AMD processes found: {len(processes)}", flush=True)
print(f"[v0] AMD GPU monitoring successful", flush=True)
print(f"[v0] - Temperature: {detailed_info['temperature']}", flush=True)
print(f"[v0] - Power: {detailed_info['power_draw']}", flush=True)
print(f"[v0] - Utilization: {detailed_info['utilization_gpu']}", flush=True)
print(f"[v0] - Memory: {detailed_info['memory_used']} / {detailed_info['memory_total']}", flush=True)
print(f"[v0] - Processes: {len(detailed_info['processes'])}", flush=True)
return detailed_info
except json.JSONDecodeError as e:
print(f"[v0] Failed to parse amdgpu_top JSON: {e}", flush=True)
return None
except subprocess.TimeoutExpired:
print(f"[v0] amdgpu_top command timed out", flush=True)
return None
except Exception as e:
print(f"[v0] Error getting AMD GPU data: {e}", flush=True)
import traceback
traceback.print_exc()
return None
@app.route('/api/system', methods=['GET']) @app.route('/api/system', methods=['GET'])
def api_system(): def api_system():
@@ -3177,7 +3352,7 @@ def api_vm_control(vmid):
'error': control_result.stderr 'error': control_result.stderr
}), 500 }), 500
else: else:
return jsonify({'error': 'Failed to control VM'}), 500 return jsonify({'error': 'Failed to get VM details'}), 500
except Exception as e: except Exception as e:
print(f"Error controlling VM: {e}") print(f"Error controlling VM: {e}")
return jsonify({'error': str(e)}), 500 return jsonify({'error': str(e)}), 500