mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2025-11-18 11:36:17 +00:00
Update flask_server.py
This commit is contained in:
@@ -18,6 +18,7 @@ from datetime import datetime, timedelta
|
|||||||
import re # Added for regex matching
|
import re # Added for regex matching
|
||||||
import select # Added for non-blocking read
|
import select # Added for non-blocking read
|
||||||
import shutil # Added for shutil.which
|
import shutil # Added for shutil.which
|
||||||
|
import xml.etree.ElementTree as ET # Added for XML parsing
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
CORS(app) # Enable CORS for Next.js frontend
|
CORS(app) # Enable CORS for Next.js frontend
|
||||||
@@ -1698,7 +1699,10 @@ def get_detailed_gpu_info(gpu):
|
|||||||
'engine_render': None,
|
'engine_render': None,
|
||||||
'engine_blitter': None,
|
'engine_blitter': None,
|
||||||
'engine_video': None,
|
'engine_video': None,
|
||||||
'engine_video_enhance': None
|
'engine_video_enhance': None,
|
||||||
|
# Added for NVIDIA/AMD specific engine info if available
|
||||||
|
'engine_encoder': None,
|
||||||
|
'engine_decoder': None
|
||||||
}
|
}
|
||||||
|
|
||||||
# Intel GPU monitoring with intel_gpu_top
|
# Intel GPU monitoring with intel_gpu_top
|
||||||
@@ -1998,77 +2002,227 @@ def get_detailed_gpu_info(gpu):
|
|||||||
elif 'nvidia' in vendor:
|
elif 'nvidia' in vendor:
|
||||||
print(f"[v0] NVIDIA GPU detected, checking for nvidia-smi...", flush=True)
|
print(f"[v0] NVIDIA GPU detected, checking for nvidia-smi...", flush=True)
|
||||||
if shutil.which('nvidia-smi'):
|
if shutil.which('nvidia-smi'):
|
||||||
print(f"[v0] nvidia-smi found, executing...", flush=True)
|
print(f"[v0] nvidia-smi found, executing with XML output...", flush=True)
|
||||||
try:
|
try:
|
||||||
# Basic GPU stats
|
cmd = ['nvidia-smi', '-q', '-x']
|
||||||
query_gpu = 'index,name,memory.total,memory.used,memory.free,temperature.gpu,power.draw,power.limit,utilization.gpu,utilization.memory,clocks.gr,clocks.mem,pcie.link.gen.current,pcie.link.width.current'
|
print(f"[v0] Executing command: {' '.join(cmd)}", flush=True)
|
||||||
cmd_gpu = ['nvidia-smi', f'--query-gpu={query_gpu}', '--format=csv,noheader,nounits']
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
|
||||||
print(f"[v0] Executing command: {' '.join(cmd_gpu)}", flush=True)
|
|
||||||
result_gpu = subprocess.run(cmd_gpu, capture_output=True, text=True, timeout=5)
|
|
||||||
|
|
||||||
if result_gpu.returncode == 0 and result_gpu.stdout.strip():
|
if result.returncode == 0 and result.stdout.strip():
|
||||||
output_lines = result_gpu.stdout.strip().split('\n')
|
print(f"[v0] nvidia-smi XML output received, parsing...", flush=True)
|
||||||
# Assuming only one GPU, or taking the first one if multiple are returned
|
|
||||||
gpu_data_line = output_lines[0]
|
|
||||||
parts = [p.strip() for p in gpu_data_line.split(',')]
|
|
||||||
|
|
||||||
if len(parts) >= 14: # Check if we have enough parts
|
|
||||||
try:
|
try:
|
||||||
detailed_info['temperature'] = int(parts[5]) if parts[5].isdigit() else None
|
# Parse XML
|
||||||
detailed_info['power_draw'] = float(parts[6]) if parts[6].replace('.', '', 1).isdigit() else None
|
root = ET.fromstring(result.stdout)
|
||||||
detailed_info['power_limit'] = float(parts[7]) if parts[7].replace('.', '', 1).isdigit() else None
|
|
||||||
detailed_info['utilization_gpu'] = int(parts[8]) if parts[8].isdigit() else None
|
|
||||||
detailed_info['utilization_memory'] = int(parts[9]) if parts[9].isdigit() else None
|
|
||||||
detailed_info['clock_graphics'] = int(parts[10]) if parts[10].isdigit() else None
|
|
||||||
detailed_info['clock_memory'] = int(parts[11]) if parts[11].isdigit() else None
|
|
||||||
detailed_info['memory_total'] = int(parts[2]) if parts[2].isdigit() else None
|
|
||||||
detailed_info['memory_used'] = int(parts[3]) if parts[3].isdigit() else None
|
|
||||||
detailed_info['memory_free'] = int(parts[4]) if parts[4].isdigit() else None
|
|
||||||
|
|
||||||
print(f"[v0] NVIDIA GPU Basic Stats: Temp={detailed_info['temperature']}C, Power={detailed_info['power_draw']}W, Util={detailed_info['utilization_gpu']}%", flush=True)
|
# Get first GPU (assuming single GPU or taking first one)
|
||||||
detailed_info['has_monitoring_tool'] = True
|
gpu_elem = root.find('gpu')
|
||||||
except (ValueError, IndexError) as e:
|
|
||||||
print(f"[v0] Error parsing NVIDIA GPU stats: {e}", flush=True)
|
|
||||||
detailed_info['has_monitoring_tool'] = False
|
|
||||||
|
|
||||||
# Compute processes using GPU
|
if gpu_elem is not None:
|
||||||
query_apps = 'pid,process_name,used_memory'
|
print(f"[v0] Processing NVIDIA GPU XML data...", flush=True)
|
||||||
cmd_apps = ['nvidia-smi', f'--query-compute-apps={query_apps}', '--format=csv,noheader']
|
data_retrieved = False
|
||||||
print(f"[v0] Executing command: {' '.join(cmd_apps)}", flush=True)
|
|
||||||
result_apps = subprocess.run(cmd_apps, capture_output=True, text=True, timeout=5)
|
|
||||||
|
|
||||||
if result_apps.returncode == 0 and result_apps.stdout.strip():
|
# Parse temperature
|
||||||
|
temp_elem = gpu_elem.find('.//temperature/gpu_temp')
|
||||||
|
if temp_elem is not None and temp_elem.text:
|
||||||
|
try:
|
||||||
|
# Remove ' C' suffix and convert to int
|
||||||
|
temp_str = temp_elem.text.replace(' C', '').strip()
|
||||||
|
detailed_info['temperature'] = int(temp_str)
|
||||||
|
print(f"[v0] Temperature: {detailed_info['temperature']}°C", flush=True)
|
||||||
|
data_retrieved = True
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse fan speed
|
||||||
|
fan_elem = gpu_elem.find('.//fan_speed')
|
||||||
|
if fan_elem is not None and fan_elem.text and fan_elem.text != 'N/A':
|
||||||
|
try:
|
||||||
|
# Remove ' %' suffix and convert to int
|
||||||
|
fan_str = fan_elem.text.replace(' %', '').strip()
|
||||||
|
detailed_info['fan_speed'] = int(fan_str)
|
||||||
|
detailed_info['fan_unit'] = '%'
|
||||||
|
print(f"[v0] Fan Speed: {detailed_info['fan_speed']}%", flush=True)
|
||||||
|
data_retrieved = True
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse power draw
|
||||||
|
power_elem = gpu_elem.find('.//gpu_power_readings/power_state')
|
||||||
|
instant_power_elem = gpu_elem.find('.//gpu_power_readings/instant_power_draw')
|
||||||
|
if instant_power_elem is not None and instant_power_elem.text and instant_power_elem.text != 'N/A':
|
||||||
|
try:
|
||||||
|
# Remove ' W' suffix and convert to float
|
||||||
|
power_str = instant_power_elem.text.replace(' W', '').strip()
|
||||||
|
detailed_info['power_draw'] = float(power_str)
|
||||||
|
print(f"[v0] Power Draw: {detailed_info['power_draw']} W", flush=True)
|
||||||
|
data_retrieved = True
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse power limit
|
||||||
|
power_limit_elem = gpu_elem.find('.//gpu_power_readings/current_power_limit')
|
||||||
|
if power_limit_elem is not None and power_limit_elem.text and power_limit_elem.text != 'N/A':
|
||||||
|
try:
|
||||||
|
power_limit_str = power_limit_elem.text.replace(' W', '').strip()
|
||||||
|
detailed_info['power_limit'] = float(power_limit_str)
|
||||||
|
print(f"[v0] Power Limit: {detailed_info['power_limit']} W", flush=True)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse GPU utilization
|
||||||
|
gpu_util_elem = gpu_elem.find('.//utilization/gpu_util')
|
||||||
|
if gpu_util_elem is not None and gpu_util_elem.text:
|
||||||
|
try:
|
||||||
|
util_str = gpu_util_elem.text.replace(' %', '').strip()
|
||||||
|
detailed_info['utilization_gpu'] = int(util_str)
|
||||||
|
print(f"[v0] GPU Utilization: {detailed_info['utilization_gpu']}%", flush=True)
|
||||||
|
data_retrieved = True
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse memory utilization
|
||||||
|
mem_util_elem = gpu_elem.find('.//utilization/memory_util')
|
||||||
|
if mem_util_elem is not None and mem_util_elem.text:
|
||||||
|
try:
|
||||||
|
mem_util_str = mem_util_elem.text.replace(' %', '').strip()
|
||||||
|
detailed_info['utilization_memory'] = int(mem_util_str)
|
||||||
|
print(f"[v0] Memory Utilization: {detailed_info['utilization_memory']}%", flush=True)
|
||||||
|
data_retrieved = True
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse encoder utilization
|
||||||
|
encoder_util_elem = gpu_elem.find('.//utilization/encoder_util')
|
||||||
|
if encoder_util_elem is not None and encoder_util_elem.text and encoder_util_elem.text != 'N/A':
|
||||||
|
try:
|
||||||
|
encoder_str = encoder_util_elem.text.replace(' %', '').strip()
|
||||||
|
detailed_info['engine_encoder'] = int(encoder_str)
|
||||||
|
print(f"[v0] Encoder Utilization: {detailed_info['engine_encoder']}%", flush=True)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse decoder utilization
|
||||||
|
decoder_util_elem = gpu_elem.find('.//utilization/decoder_util')
|
||||||
|
if decoder_util_elem is not None and decoder_util_elem.text and decoder_util_elem.text != 'N/A':
|
||||||
|
try:
|
||||||
|
decoder_str = decoder_util_elem.text.replace(' %', '').strip()
|
||||||
|
detailed_info['engine_decoder'] = int(decoder_str)
|
||||||
|
print(f"[v0] Decoder Utilization: {detailed_info['engine_decoder']}%", flush=True)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse clocks
|
||||||
|
graphics_clock_elem = gpu_elem.find('.//clocks/graphics_clock')
|
||||||
|
if graphics_clock_elem is not None and graphics_clock_elem.text:
|
||||||
|
try:
|
||||||
|
clock_str = graphics_clock_elem.text.replace(' MHz', '').strip()
|
||||||
|
detailed_info['clock_graphics'] = int(clock_str)
|
||||||
|
print(f"[v0] Graphics Clock: {detailed_info['clock_graphics']} MHz", flush=True)
|
||||||
|
data_retrieved = True
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
mem_clock_elem = gpu_elem.find('.//clocks/mem_clock')
|
||||||
|
if mem_clock_elem is not None and mem_clock_elem.text:
|
||||||
|
try:
|
||||||
|
mem_clock_str = mem_clock_elem.text.replace(' MHz', '').strip()
|
||||||
|
detailed_info['clock_memory'] = int(mem_clock_str)
|
||||||
|
print(f"[v0] Memory Clock: {detailed_info['clock_memory']} MHz", flush=True)
|
||||||
|
data_retrieved = True
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse memory usage
|
||||||
|
mem_total_elem = gpu_elem.find('.//fb_memory_usage/total')
|
||||||
|
if mem_total_elem is not None and mem_total_elem.text:
|
||||||
|
try:
|
||||||
|
mem_total_str = mem_total_elem.text.replace(' MiB', '').strip()
|
||||||
|
detailed_info['memory_total'] = int(mem_total_str)
|
||||||
|
print(f"[v0] Memory Total: {detailed_info['memory_total']} MB", flush=True)
|
||||||
|
data_retrieved = True
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
mem_used_elem = gpu_elem.find('.//fb_memory_usage/used')
|
||||||
|
if mem_used_elem is not None and mem_used_elem.text:
|
||||||
|
try:
|
||||||
|
mem_used_str = mem_used_elem.text.replace(' MiB', '').strip()
|
||||||
|
detailed_info['memory_used'] = int(mem_used_str)
|
||||||
|
print(f"[v0] Memory Used: {detailed_info['memory_used']} MB", flush=True)
|
||||||
|
data_retrieved = True
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
mem_free_elem = gpu_elem.find('.//fb_memory_usage/free')
|
||||||
|
if mem_free_elem is not None and mem_free_elem.text:
|
||||||
|
try:
|
||||||
|
mem_free_str = mem_free_elem.text.replace(' MiB', '').strip()
|
||||||
|
detailed_info['memory_free'] = int(mem_free_str)
|
||||||
|
print(f"[v0] Memory Free: {detailed_info['memory_free']} MB", flush=True)
|
||||||
|
except ValueError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse processes
|
||||||
|
processes_elem = gpu_elem.find('.//processes')
|
||||||
|
if processes_elem is not None:
|
||||||
processes = []
|
processes = []
|
||||||
for line in result_apps.stdout.strip().split('\n'):
|
for process_elem in processes_elem.findall('process_info'):
|
||||||
if line:
|
|
||||||
parts = [p.strip() for p in line.split(',')]
|
|
||||||
if len(parts) >= 3:
|
|
||||||
# Convert memory to MB
|
|
||||||
mem_str = parts[2]
|
|
||||||
mem_mb = 0
|
|
||||||
if 'MiB' in mem_str:
|
|
||||||
try:
|
try:
|
||||||
mem_mb = int(float(mem_str.replace('MiB', '').strip()))
|
pid_elem = process_elem.find('pid')
|
||||||
except ValueError:
|
name_elem = process_elem.find('process_name')
|
||||||
pass
|
mem_elem = process_elem.find('used_memory')
|
||||||
elif 'GiB' in mem_str:
|
type_elem = process_elem.find('type')
|
||||||
try:
|
|
||||||
mem_mb = int(float(mem_str.replace('GiB', '').strip()) * 1024)
|
if pid_elem is not None and name_elem is not None and mem_elem is not None:
|
||||||
except ValueError:
|
pid = pid_elem.text.strip()
|
||||||
pass
|
name = name_elem.text.strip()
|
||||||
|
|
||||||
|
# Parse memory (format: "362 MiB")
|
||||||
|
mem_str = mem_elem.text.replace(' MiB', '').strip()
|
||||||
|
memory_mb = int(mem_str)
|
||||||
|
|
||||||
|
# Get process type (C=Compute, G=Graphics)
|
||||||
|
proc_type = type_elem.text.strip() if type_elem is not None else 'C'
|
||||||
|
|
||||||
|
process_info = {
|
||||||
|
'pid': pid,
|
||||||
|
'name': name,
|
||||||
|
'memory_used_mb': memory_mb,
|
||||||
|
'type': proc_type,
|
||||||
|
'engines': {}
|
||||||
|
}
|
||||||
|
|
||||||
|
# For NVIDIA, we don't have per-process engine utilization
|
||||||
|
# But we can indicate the type of workload
|
||||||
|
if proc_type == 'G':
|
||||||
|
process_info['engines']['Graphics'] = 'Active'
|
||||||
|
elif proc_type == 'C':
|
||||||
|
process_info['engines']['Compute'] = 'Active'
|
||||||
|
|
||||||
|
processes.append(process_info)
|
||||||
|
print(f"[v0] Process: {name} (PID: {pid}) - {memory_mb} MB - Type: {proc_type}", flush=True)
|
||||||
|
except (ValueError, AttributeError) as e:
|
||||||
|
print(f"[v0] Error parsing process: {e}", flush=True)
|
||||||
|
continue
|
||||||
|
|
||||||
processes.append({
|
|
||||||
'pid': parts[0],
|
|
||||||
'name': parts[1],
|
|
||||||
'memory_used_mb': mem_mb
|
|
||||||
})
|
|
||||||
detailed_info['processes'] = processes
|
detailed_info['processes'] = processes
|
||||||
print(f"[v0] Found {len(processes)} NVIDIA GPU processes", flush=True)
|
print(f"[v0] Found {len(processes)} NVIDIA GPU processes", flush=True)
|
||||||
|
|
||||||
if detailed_info['has_monitoring_tool']:
|
if data_retrieved:
|
||||||
|
detailed_info['has_monitoring_tool'] = True
|
||||||
print(f"[v0] NVIDIA GPU monitoring successful", flush=True)
|
print(f"[v0] NVIDIA GPU monitoring successful", flush=True)
|
||||||
else:
|
else:
|
||||||
print(f"[v0] NVIDIA GPU monitoring failed - essential data not retrieved", flush=True)
|
print(f"[v0] NVIDIA GPU monitoring failed - no data retrieved", flush=True)
|
||||||
|
else:
|
||||||
|
print(f"[v0] No GPU element found in XML", flush=True)
|
||||||
|
|
||||||
|
except ET.ParseError as e:
|
||||||
|
print(f"[v0] Error parsing nvidia-smi XML: {e}", flush=True)
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
else:
|
||||||
|
print(f"[v0] nvidia-smi returned error or empty output", flush=True)
|
||||||
|
|
||||||
except subprocess.TimeoutExpired:
|
except subprocess.TimeoutExpired:
|
||||||
print(f"[v0] nvidia-smi timed out - marking tool as unavailable", flush=True)
|
print(f"[v0] nvidia-smi timed out - marking tool as unavailable", flush=True)
|
||||||
@@ -2151,7 +2305,7 @@ def get_detailed_gpu_info(gpu):
|
|||||||
mem_clock = clocks['GFX_MCLK']
|
mem_clock = clocks['GFX_MCLK']
|
||||||
if 'value' in mem_clock:
|
if 'value' in mem_clock:
|
||||||
detailed_info['clock_memory'] = f"{mem_clock['value']} MHz"
|
detailed_info['clock_memory'] = f"{mem_clock['value']} MHz"
|
||||||
print(f"[v0] Memory Clock: {detailed_info['clock_memory']}", flush=True)
|
print(f"[v0] Memory Clock: {detailed_info['clock_memory']} MHz", flush=True)
|
||||||
data_retrieved = True
|
data_retrieved = True
|
||||||
|
|
||||||
# Parse GPU activity (gpu_activity.GFX)
|
# Parse GPU activity (gpu_activity.GFX)
|
||||||
@@ -3335,7 +3489,10 @@ def api_gpu_realtime(slot):
|
|||||||
'engine_render': gpu.get('engine_render'),
|
'engine_render': gpu.get('engine_render'),
|
||||||
'engine_blitter': gpu.get('engine_blitter'),
|
'engine_blitter': gpu.get('engine_blitter'),
|
||||||
'engine_video': gpu.get('engine_video'),
|
'engine_video': gpu.get('engine_video'),
|
||||||
'engine_video_enhance': gpu.get('engine_video_enhance')
|
'engine_video_enhance': gpu.get('engine_video_enhance'),
|
||||||
|
# Added for NVIDIA/AMD specific engine info if available
|
||||||
|
'engine_encoder': gpu.get('engine_encoder'),
|
||||||
|
'engine_decoder': gpu.get('engine_decoder')
|
||||||
}
|
}
|
||||||
|
|
||||||
print(f"[v0] /api/gpu/{slot}/realtime returning data")
|
print(f"[v0] /api/gpu/{slot}/realtime returning data")
|
||||||
|
|||||||
Reference in New Issue
Block a user