Update flask_server.py

2025-12-14 08:06:22 +00:00 · 2025-10-09 17:03:01 +02:00
parent 765b2b1d69
commit 28b29ed086
1 changed files with 362 additions and 423 deletions
--- a/AppImage/scripts/flask_server.py
+++ b/AppImage/scripts/flask_server.py
@@ -17,6 +17,7 @@ import socket
 from datetime import datetime, timedelta
 import re # Added for regex matching
 import select # Added for non-blocking read
 import shutil # Added for shutil.which
 app = Flask(__name__)
 CORS(app)  # Enable CORS for Next.js frontend
@@ -583,26 +584,30 @@ def get_smart_data(disk_name):
            ['smartctl', '-a', '-d', 'sat,16', f'/dev/{disk_name}'],  # Text SAT with 16-byte commands
        ]
        process = None # Initialize process to None
        for cmd_index, cmd in enumerate(commands_to_try):
            print(f"[v0] Attempt {cmd_index + 1}/{len(commands_to_try)}: Running command: {' '.join(cmd)}")
            try:
-                result = subprocess.run(cmd, capture_output=True, text=True, timeout=15)
+                process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
-                print(f"[v0] Command return code: {result.returncode}")
+                stdout, stderr = process.communicate(timeout=15)
                result_code = process.returncode
-                if result.stderr:
+                print(f"[v0] Command return code: {result_code}")
-                    stderr_preview = result.stderr[:200].replace('\n', ' ')
+                
                if stderr:
                    stderr_preview = stderr[:200].replace('\n', ' ')
                    print(f"[v0] stderr: {stderr_preview}")
-                has_output = result.stdout and len(result.stdout.strip()) > 50
+                has_output = stdout and len(stdout.strip()) > 50
                if has_output:
-                    print(f"[v0] Got output ({len(result.stdout)} bytes), attempting to parse...")
+                    print(f"[v0] Got output ({len(stdout)} bytes), attempting to parse...")
                    # Try JSON parsing first (if -j flag was used)
                    if '-j' in cmd:
                        try:
                            print(f"[v0] Attempting JSON parse...")
-                            data = json.loads(result.stdout)
+                            data = json.loads(stdout)
                            print(f"[v0] JSON parse successful!")
                            # Extract model
@@ -688,7 +693,7 @@ def get_smart_data(disk_name):
                    if smart_data['model'] == 'Unknown' or smart_data['serial'] == 'Unknown' or smart_data['temperature'] == 0:
                        print(f"[v0] Parsing text output (model={smart_data['model']}, serial={smart_data['serial']}, temp={smart_data['temperature']})...")
-                        output = result.stdout
+                        output = stdout
                        # Get basic info
                        for line in output.split('\n'):
@@ -808,7 +813,7 @@ def get_smart_data(disk_name):
                            print(f"[v0] Extracted partial data from text output, continuing to next attempt...")
                else:
-                    print(f"[v0] No usable output (return code {result.returncode}), trying next command...")
+                    print(f"[v0] No usable output (return code {result_code}), trying next command...")
            except subprocess.TimeoutExpired:
                print(f"[v0] Command timeout for attempt {cmd_index + 1}, trying next...")
@@ -816,7 +821,15 @@ def get_smart_data(disk_name):
            except Exception as e:
                print(f"[v0] Error in attempt {cmd_index + 1}: {type(e).__name__}: {e}")
                continue
-        
+            finally:
                if process and process.returncode is None: # If process is still running after error
                    try:
                        process.kill()
                        print(f"[v0] Process killed for command: {' '.join(cmd)}")
                    except Exception as kill_err:
                        print(f"[v0] Error killing process: {kill_err}")
        if smart_data['reallocated_sectors'] > 0 or smart_data['pending_sectors'] > 0:
            if smart_data['health'] == 'healthy':
                smart_data['health'] = 'warning'
@@ -1573,434 +1586,359 @@ def identify_fan(fan_name, adapter):
    return fan_name
 # --- GPU Monitoring Functions ---
 def get_detailed_gpu_info(gpu):
-    """Get detailed GPU information using nvidia-smi, intel_gpu_top, or radeontop"""
+    """Get detailed monitoring information for a GPU"""
-    detailed_info = {}
+    vendor = gpu.get('vendor', '').lower()
    slot = gpu.get('slot', '')
-    vendor = gpu.get('vendor', '').upper()
+    print(f"[v0] ===== get_detailed_gpu_info called for GPU {slot} (vendor: {vendor}) =====", flush=True)
-    if vendor == 'INTEL':
+    detailed_info = {
-        try:
+        'has_monitoring_tool': False,
-            check_result = subprocess.run(['which', 'intel_gpu_top'], capture_output=True, timeout=1)
+        'temperature': None,
-            if check_result.returncode != 0:
+        'fan_speed': None,
-                detailed_info['has_monitoring_tool'] = False
+        'fan_unit': None,
-                print(f"[v0] intel_gpu_top not found for Intel GPU")
+        'utilization_gpu': None,
-                return detailed_info
+        'utilization_memory': None,
-            else:
+        'memory_used': None,
-                print(f"[v0] intel_gpu_top found for Intel GPU")
+        'memory_total': None,
-            
+        'memory_free': None,
-            gpu_device = '/dev/dri/card0'
+        'power_draw': None,
-            if not os.path.exists(gpu_device):
+        'power_limit': None,
-                print(f"[v0] GPU device {gpu_device} not found - marking tool as unavailable")
+        'clock_graphics': None,
-                detailed_info['has_monitoring_tool'] = False
+        'clock_memory': None,
-                return detailed_info
+        'processes': [],
-            
+        'engine_render': None,
-            if not os.access(gpu_device, os.R_OK):
+        'engine_blitter': None,
-                print(f"[v0] GPU device {gpu_device} not accessible - marking tool as unavailable")
+        'engine_video': None,
-                detailed_info['has_monitoring_tool'] = False
+        'engine_video_enhance': None
-                return detailed_info
+    }
-            
+    
-            print(f"[v0] GPU device {gpu_device} is accessible")
+    # Intel GPU monitoring with intel_gpu_top
-        
+    if 'intel' in vendor:
-        except Exception as e:
+        print(f"[v0] Intel GPU detected, checking for intel_gpu_top...", flush=True)
-            print(f"[v0] Error checking for intel_gpu_top: {e}")
+        if shutil.which('intel_gpu_top'):
-            detailed_info['has_monitoring_tool'] = False
+            print(f"[v0] intel_gpu_top found, executing...", flush=True)
-            return detailed_info
+            try:
-        
+                import os
-        data_retrieved = False
+                print(f"[v0] Current user: {os.getenv('USER', 'unknown')}", flush=True)
-        process = None
+                print(f"[v0] Current working directory: {os.getcwd()}", flush=True)
        try:
            cmd = ['intel_gpu_top', '-J']
            print(f"[v0] Starting intel_gpu_top with command: {' '.join(cmd)}")
            process = subprocess.Popen(
                cmd,
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                text=True,
                bufsize=1
            )
            print(f"[v0] intel_gpu_top process started, reading output...")
            output_lines = []
            start_time = time.time()
            timeout_seconds = 5.0
            json_objects_found = 0
            valid_data_found = False
            best_json_data = None  # Store the best JSON object found (one with clients if available)
            while time.time() - start_time < timeout_seconds:
                if process.poll() is not None:
                    break
                try:
                    # Use select for non-blocking read to avoid hanging
                    ready_fds, _, _ = select.select([process.stdout], [], [], 0.1)
                    if ready_fds:
                        line = process.stdout.readline()
                        if line:
                            output_lines.append(line)
                            output = ''.join(output_lines)
                            if len(output_lines) <= 20:
                                print(f"[v0] Received line {len(output_lines)}: {line.strip()[:150]}")
                            # Find all complete JSON objects
                            search_start = 0
                            while True:
                                object_start = -1
                                for i in range(search_start, len(output)):
                                    if output[i] == '{':
                                        object_start = i
                                        break
                                    elif output[i] not in [',', '\n', '\r', ' ', '\t']:
                                        break
                                if object_start == -1:
                                    break
                                # Count braces to find complete object
                                brace_count = 0
                                object_end = -1
                                for i in range(object_start, len(output)):
                                    if output[i] == '{':
                                        brace_count += 1
                                    elif output[i] == '}':
                                        brace_count -= 1
                                        if brace_count == 0:
                                            object_end = i + 1
                                            break
                                if object_end > object_start:
                                    json_objects_found += 1
                                    json_str = output[object_start:object_end]
                                    if json_objects_found == 1:
                                        print(f"[v0] Found first JSON object ({len(json_str)} chars) - skipping (baseline)")
                                        search_start = object_end
                                        while search_start < len(output) and output[search_start] in [',', '\n', '\r', ' ', '\t']:
                                            search_start += 1
                                        continue
                                    print(f"[v0] Found JSON object #{json_objects_found} ({len(json_str)} chars)")
                                    try:
                                        json_data = json.loads(json_str)
                                        print(f"[v0] Successfully parsed JSON object #{json_objects_found}")
                                        print(f"[v0] JSON keys: {list(json_data.keys())}")
                                        has_clients = 'clients' in json_data and json_data['clients']
                                        if has_clients:
                                            client_names = [client.get('name', 'Unknown') for client in json_data['clients'].values()]
                                            print(f"[v0] ✓ JSON object #{json_objects_found} has clients data with {len(json_data['clients'])} client(s): {client_names}")
                                            best_json_data = json_data
                                            valid_data_found = True
                                            break
                                        elif best_json_data is None:
                                            best_json_data = json_data
                                            print(f"[v0] ✗ JSON object #{json_objects_found} has no clients data, storing as fallback")
                                        else:
                                            print(f"[v0] ✗ JSON object #{json_objects_found} has no clients data")
                                        if json_objects_found >= 5:
                                            print(f"[v0] Read 5 JSON objects without clients, using fallback")
                                            valid_data_found = True
                                            break
                                    except json.JSONDecodeError as e:
                                        print(f"[v0] Failed to parse JSON object #{json_objects_found}: {e}")
                                    search_start = object_end
                                    while search_start < len(output) and output[search_start] in [',', '\n', '\r', ' ', '\t']:
                                        search_start += 1
                                else:
                                    break
                            if valid_data_found:
                                break
                except Exception as e:
                    print(f"[v0] Error reading intel_gpu_top output: {e}")
                    break
            if best_json_data:
                json_data = best_json_data
                has_clients = 'clients' in json_data and json_data['clients']
                print(f"[v0] Using JSON data with keys: {list(json_data.keys())}")
                print(f"[v0] Has clients: {has_clients}")
                if has_clients:
                    print(f"[v0] Number of clients: {len(json_data['clients'])}")
                else:
                    print(f"[v0] WARNING: No clients found in any of the {json_objects_found} JSON objects read")
                # Parse frequency data
                if 'frequency' in json_data:
                    freq = json_data['frequency']
                    if 'actual' in freq:
                        detailed_info['clock_graphics'] = f"{freq['actual']:.0f} MHz"
                        data_retrieved = True
                    if 'requested' in freq:
                        detailed_info['clock_max'] = f"{freq['requested']:.0f} MHz"
                        data_retrieved = True
-                # Parse power data
+                cmd = ['intel_gpu_top', '-J']
-                if 'power' in json_data:
+                print(f"[v0] Executing command: {' '.join(cmd)}", flush=True)
                    power = json_data['power']
                    if 'GPU' in power:
                        detailed_info['power_draw'] = f"{power['GPU']:.2f} W"
                        data_retrieved = True
                    if 'Package' in power:
                        detailed_info['power_limit'] = f"{power['Package']:.2f} W"
                        data_retrieved = True
-                # Parse RC6 state
+                process = subprocess.Popen(
-                if 'rc6' in json_data:
+                    cmd,
-                    rc6_value = json_data['rc6'].get('value', 0)
+                    stdout=subprocess.PIPE,
-                    detailed_info['power_state'] = f"RC6: {rc6_value:.1f}%"
+                    stderr=subprocess.PIPE,
-                    data_retrieved = True
+                    text=True,
                    bufsize=1
                )
-                # Parse interrupts
+                print(f"[v0] Process started with PID: {process.pid}", flush=True)
                if 'interrupts' in json_data:
                    irq_count = json_data['interrupts'].get('count', 0)
                    detailed_info['irq_rate'] = int(irq_count)
                    data_retrieved = True
-                engine_totals = {
+                print(f"[v0] Waiting 1 second for intel_gpu_top to initialize...", flush=True)
-                    'engine_render': 0.0,
+                time.sleep(1)
-                    'engine_blitter': 0.0,
+                print(f"[v0] Starting to read JSON objects...", flush=True)
                    'engine_video': 0.0,
                    'engine_video_enhance': 0.0
                }
-                client_engine_totals = {
+                start_time = time.time()
-                    'engine_render': 0.0,
+                timeout = 8  # Increased timeout from 5 to 8 seconds
-                    'engine_blitter': 0.0,
+                json_objects = []
-                    'engine_video': 0.0,
+                buffer = ""
-                    'engine_video_enhance': 0.0
+                brace_count = 0
-                }
+                in_json = False
-                if 'engines' in json_data:
+                print(f"[v0] Reading output from intel_gpu_top...", flush=True)
-                    engines_data = json_data['engines']
+                
-                    print(f"[v0] Parsing engines data: {list(engines_data.keys())}")
+                while time.time() - start_time < timeout:
                    if process.poll() is not None:
                        print(f"[v0] Process terminated early with code: {process.poll()}", flush=True)
                        break
-                    engine_map = {
+                    try:
-                        'Render/3D': 'engine_render',
+                        line = process.stdout.readline()
-                        'Blitter': 'engine_blitter',
+                        if not line:
-                        'Video': 'engine_video',
+                            time.sleep(0.1)
-                        'VideoEnhance': 'engine_video_enhance'
+                            continue
                        for char in line:
                            if char == '{':
                                if brace_count == 0:
                                    in_json = True
                                    buffer = char
                                else:
                                    buffer += char
                                brace_count += 1
                            elif char == '}':
                                buffer += char
                                brace_count -= 1
                                if brace_count == 0 and in_json:
                                    try:
                                        json_data = json.loads(buffer)
                                        json_objects.append(json_data)
                                        print(f"[v0] Found JSON object #{len(json_objects)} ({len(buffer)} chars)", flush=True)
                                        print(f"[v0] JSON keys: {list(json_data.keys())}", flush=True)
                                        if 'clients' in json_data:
                                            client_count = len(json_data['clients'])
                                            print(f"[v0] *** FOUND CLIENTS SECTION with {client_count} client(s) ***", flush=True)
                                            for client_id, client_data in json_data['clients'].items():
                                                client_name = client_data.get('name', 'Unknown')
                                                client_pid = client_data.get('pid', 'Unknown')
                                                print(f"[v0]   - Client: {client_name} (PID: {client_pid})", flush=True)
                                        else:
                                            print(f"[v0] No 'clients' key in this JSON object", flush=True)
                                        if len(json_objects) >= 10:
                                            print(f"[v0] Collected 10 JSON objects, stopping...", flush=True)
                                            break
                                    except json.JSONDecodeError:
                                        pass
                                    buffer = ""
                                    in_json = False
                            elif in_json:
                                buffer += char
                    except Exception as e:
                        print(f"[v0] Error reading line: {e}", flush=True)
                        break
                process.terminate()
                process.wait(timeout=1)
                print(f"[v0] Collected {len(json_objects)} JSON objects total", flush=True)
                best_json = None
                for json_obj in json_objects:
                    if 'clients' in json_obj and len(json_obj['clients']) > 0:
                        best_json = json_obj
                        print(f"[v0] Using JSON object with {len(json_obj['clients'])} client(s)", flush=True)
                        break
                if not best_json and json_objects:
                    best_json = json_objects[0]
                    print(f"[v0] No JSON with clients found, using first JSON object as fallback", flush=True)
                if best_json:
                    print(f"[v0] Parsing selected JSON object...", flush=True)
                    data_retrieved = False
                    # Initialize engine totals
                    engine_totals = {
                        'Render/3D': 0.0,
                        'Blitter': 0.0,
                        'Video': 0.0,
                        'VideoEnhance': 0.0
                    }
                    client_engine_totals = {
                        'Render/3D': 0.0,
                        'Blitter': 0.0,
                        'Video': 0.0,
                        'VideoEnhance': 0.0
                    }
-                    for engine_key in engines_data.keys():
+                    # Parse clients section (processes using GPU)
-                        engine_base = engine_key.rsplit('/', 1)[0] if '/' in engine_key and engine_key[-1].isdigit() else engine_key
+                    if 'clients' in best_json:
-                        
+                        print(f"[v0] Parsing clients section...", flush=True)
-                        if engine_base in engine_map:
+                        clients = best_json['clients']
-                            busy_value = engines_data[engine_key].get('busy', 0)
+                        processes = []
-                            key = engine_map[engine_base]
+
-                            engine_totals[key] = busy_value
+                        for client_id, client_data in clients.items():
-                            print(f"[v0] Global engine {engine_key}: {busy_value}%")
+                            process_info = {
-                
+                                'name': client_data.get('name', 'Unknown'),
-                if 'clients' in json_data:
+                                'pid': client_data.get('pid', 'Unknown'),
-                    clients_data = json_data['clients']
+                                'memory': {
-                    processes = []
+                                    'total': client_data.get('memory', {}).get('system', {}).get('total', 0),
-                    print(f"[v0] Found {len(clients_data)} clients in intel_gpu_top output")
+                                    'shared': client_data.get('memory', {}).get('system', {}).get('shared', 0),
-                    
+                                    'resident': client_data.get('memory', {}).get('system', {}).get('resident', 0)
-                    for client_id, client_info in clients_data.items():
+                                },
-                        process_info = {
+                                'engines': {}
                            'name': client_info.get('name', 'Unknown'),
                            'pid': str(client_info.get('pid', 'N/A'))
                        }
                        # Extract memory usage
                        if 'memory' in client_info:
                            if 'system' in client_info['memory'] and 'resident' in client_info['memory']['system']:
                                mem_bytes = int(client_info['memory']['system']['resident'])
                                mem_mb = mem_bytes / (1024 * 1024)
                                process_info['memory_used'] = f"{mem_mb:.0f} MB"
                            elif 'resident' in client_info['memory']:
                                mem_bytes = int(client_info['memory']['resident'])
                                mem_mb = mem_bytes / (1024 * 1024)
                                process_info['memory_used'] = f"{mem_mb:.0f} MB"
                        # Extract GPU utilization from engine classes
                        if 'engine-classes' in client_info:
                            engine_classes = client_info['engine-classes']
                            engine_map_client = {
                                'Render/3D': 'engine_render',
                                'Blitter': 'engine_blitter',
                                'Video': 'engine_video',
                                'VideoEnhance': 'engine_video_enhance'
                            }
-                            max_utilization = 0.0
+                            # Parse engine utilization for this process
-                            for engine_name, engine_key in engine_map_client.items():
+                            engine_classes = client_data.get('engine-classes', {})
-                                if engine_name in engine_classes:
+                            for engine_name, engine_data in engine_classes.items():
-                                    busy = float(engine_classes[engine_name].get('busy', 0))
+                                busy_value = float(engine_data.get('busy', 0))
-                                    # For individual process, use Render/3D as primary indicator if available
+                                process_info['engines'][engine_name] = f"{busy_value:.1f}%"
-                                    if engine_name == 'Render/3D' and busy > 0:
+                                
-                                        process_info['gpu_utilization'] = f"{busy:.1f}%"
+                                # Sum up engine utilization across all processes
-                                        max_utilization = max(max_utilization, busy)
+                                if engine_name in client_engine_totals:
-                                    elif engine_name != 'Render/3D': # For other engines, update if higher and no Render/3D found
+                                    client_engine_totals[engine_name] += busy_value
-                                        if 'gpu_utilization' not in process_info and busy > 0:
+                            
-                                            process_info['gpu_utilization'] = f"{busy:.1f}%"
+                            processes.append(process_info)
-                                            max_utilization = max(max_utilization, busy)
+                            print(f"[v0] Added process: {process_info['name']} (PID: {process_info['pid']})", flush=True)
                                    client_engine_totals[engine_key] = max(client_engine_totals[engine_key], busy) # Track for overall GPU util
                        processes.append(process_info)
                        print(f"[v0] Process: {process_info['name']} (PID: {process_info['pid']})")
                    if processes:
                        detailed_info['processes'] = processes
                        print(f"[v0] Total processes found: {len(processes)}", flush=True)
                    else:
                        print(f"[v0] WARNING: No 'clients' section in selected JSON", flush=True)
                    # Parse global engines section
                    if 'engines' in best_json:
                        print(f"[v0] Parsing engines section...", flush=True)
                        engines = best_json['engines']
                        for engine_name, engine_data in engines.items():
                            # Remove the /0 suffix if present
                            clean_name = engine_name.replace('/0', '')
                            busy_value = float(engine_data.get('busy', 0))
                            if clean_name in engine_totals:
                                engine_totals[clean_name] = busy_value
                    # Use client engine totals if available, otherwise use global engines
                    final_engines = client_engine_totals if any(v > 0 for v in client_engine_totals.values()) else engine_totals
                    detailed_info['engine_render'] = f"{final_engines['Render/3D']:.1f}%"
                    detailed_info['engine_blitter'] = f"{final_engines['Blitter']:.1f}%"
                    detailed_info['engine_video'] = f"{final_engines['Video']:.1f}%"
                    detailed_info['engine_video_enhance'] = f"{final_engines['VideoEnhance']:.1f}%"
                    # Calculate overall GPU utilization (max of all engines)
                    max_utilization = max(final_engines.values())
                    detailed_info['utilization_gpu'] = f"{max_utilization:.1f}%"
                    # Parse frequency
                    if 'frequency' in best_json:
                        freq_data = best_json['frequency']
                        actual_freq = freq_data.get('actual', 0)
                        detailed_info['clock_graphics'] = f"{actual_freq} MHz"
                        data_retrieved = True
-                
+                    
-                for key in engine_totals:
+                    # Parse power
-                    if client_engine_totals[key] > engine_totals[key]:
+                    if 'power' in best_json:
-                        engine_totals[key] = client_engine_totals[key]
+                        power_data = best_json['power']
-                
+                        gpu_power = power_data.get('GPU', 0)
-                if 'engines' in json_data or 'clients' in json_data:
+                        package_power = power_data.get('Package', 0)
-                    detailed_info['engine_render'] = f"{engine_totals['engine_render']:.1f}%"
+                        detailed_info['power_draw'] = f"{gpu_power:.2f} W"
-                    detailed_info['engine_blitter'] = f"{engine_totals['engine_blitter']:.1f}%"
+                        detailed_info['power_limit'] = f"{package_power:.2f} W"
-                    detailed_info['engine_video'] = f"{engine_totals['engine_video']:.1f}%"
+                        data_retrieved = True
-                    detailed_info['engine_video_enhance'] = f"{engine_totals['engine_video_enhance']:.1f}%"
+                    
-
+                    if data_retrieved:
-                if 'utilization_gpu' not in detailed_info:
+                        detailed_info['has_monitoring_tool'] = True
-                    print(f"[v0] No engines or clients data found, setting utilization to 0")
+                        print(f"[v0] Intel GPU monitoring successful", flush=True)
-                    detailed_info['utilization_gpu'] = "0.0%"
+                        print(f"[v0] - Utilization: {detailed_info['utilization_gpu']}", flush=True)
-                    detailed_info['engine_render'] = "0.0%"
+                        print(f"[v0] - Engines: R={detailed_info['engine_render']}, B={detailed_info['engine_blitter']}, V={detailed_info['engine_video']}, VE={detailed_info['engine_video_enhance']}", flush=True)
-                    detailed_info['engine_blitter'] = "0.0%"
+                        print(f"[v0] - Processes: {len(detailed_info['processes'])}", flush=True)
-                    detailed_info['engine_video'] = "0.0%"
+                    else:
-                    detailed_info['engine_video_enhance'] = "0.0%"
+                        print(f"[v0] WARNING: No data retrieved from intel_gpu_top", flush=True)
-                    data_retrieved = True
+                else:
-
+                    print(f"[v0] WARNING: No valid JSON objects found", flush=True)
-            else: # If no JSON data was successfully parsed
+                    # Check stderr for errors
-                print(f"[v0] No valid JSON data found from intel_gpu_top.")
+                    stderr_output = process.stderr.read()
-                detailed_info['has_monitoring_tool'] = False
+                    if stderr_output:
-        
+                        print(f"[v0] intel_gpu_top stderr: {stderr_output}", flush=True)
            if data_retrieved:
                detailed_info['has_monitoring_tool'] = True
                print(f"[v0] Intel GPU monitoring successful")
            else:
                # If data_retrieved is still False, it means even with a parsed JSON, essential data wasn't found
                detailed_info['has_monitoring_tool'] = False
                print(f"[v0] Intel GPU monitoring failed - essential data not retrieved")
        except Exception as e:
            print(f"[v0] Error getting Intel GPU details: {e}")
            import traceback
            traceback.print_exc()
            detailed_info['has_monitoring_tool'] = False
        finally:
            if process and process.poll() is None:
                process.terminate()
                try:
                    process.wait(timeout=1)
                except subprocess.TimeoutExpired:
                    process.kill()
                print(f"[v0] intel_gpu_top process terminated")
    elif vendor == 'NVIDIA':
        try:
            check_result = subprocess.run(['which', 'nvidia-smi'], capture_output=True, timeout=1)
            if check_result.returncode != 0:
                detailed_info['has_monitoring_tool'] = False
                print(f"[v0] nvidia-smi not found for NVIDIA GPU")
                return detailed_info
            else:
                print(f"[v0] nvidia-smi found for NVIDIA GPU")
        except Exception as e:
            print(f"[v0] Error checking for nvidia-smi: {e}")
            detailed_info['has_monitoring_tool'] = False
            return detailed_info
        data_retrieved = False
        try:
            # nvidia-smi query for real-time data
            print(f"[v0] Executing nvidia-smi to get GPU data...")
            result = subprocess.run(
                ['nvidia-smi', '--query-gpu=index,name,driver_version,memory.total,memory.used,memory.free,temperature.gpu,power.draw,power.limit,utilization.gpu,utilization.memory,clocks.gr,clocks.mem,pcie.link.gen.current,pcie.link.width.current', 
                 '--format=csv,noheader,nounits'],
                capture_output=True, text=True, timeout=5
            )
            print(f"[v0] nvidia-smi return code: {result.returncode}")
            print(f"[v0] nvidia-smi output: {result.stdout[:200] if result.stdout else 'No output'}")
-            if result.returncode == 0 and result.stdout.strip():
+            except Exception as e:
-                for line in result.stdout.strip().split('\n'):
+                print(f"[v0] Error running intel_gpu_top: {e}", flush=True)
-                    if line:
+                import traceback
-                        parts = [p.strip() for p in line.split(',')]
+                traceback.print_exc()
-                        if len(parts) >= 15:
+        else:
-                            detailed_info['driver_version'] = parts[2] if parts[2] != '[N/A]' else None
+            print(f"[v0] intel_gpu_top not found in PATH", flush=True)
                            detailed_info['memory_total'] = int(float(parts[3])) if parts[3] != '[N/A]' else None
                            detailed_info['memory_used'] = int(float(parts[4])) if parts[4] != '[N/A]' else None
                            detailed_info['memory_free'] = int(float(parts[5])) if parts[5] != '[N/A]' else None # Added memory_free
                            detailed_info['temperature'] = int(float(parts[6])) if parts[6] != '[N/A]' else None
                            detailed_info['power_draw'] = float(parts[7]) if parts[7] != '[N/A]' else None
                            detailed_info['power_limit'] = float(parts[8]) if parts[8] != '[N/A]' else None
                            detailed_info['utilization_gpu'] = int(float(parts[9])) if parts[9] != '[N/A]' else None
                            detailed_info['utilization_memory'] = int(float(parts[10])) if parts[10] != '[N/A]' else None
                            detailed_info['clock_graphics'] = int(float(parts[11])) if parts[11] != '[N/A]' else None
                            detailed_info['clock_memory'] = int(float(parts[12])) if parts[12] != '[N/A]' else None
                            detailed_info['pcie_gen'] = parts[13] if parts[13] != '[N/A]' else None
                            detailed_info['pcie_width'] = f"x{parts[14]}" if parts[14] != '[N/A]' else None
                            data_retrieved = True
                            print(f"[v0] NVIDIA GPU data retrieved successfully: {detailed_info}")
                            break
            # Get running processes
            result = subprocess.run(
                ['nvidia-smi', '--query-compute-apps=pid,process_name,used_memory', '--format=csv,noheader'],
                capture_output=True, text=True, timeout=5
            )
            if result.returncode == 0 and result.stdout.strip():
                processes = []
                for line in result.stdout.strip().split('\n'):
                    if line:
                        parts = [p.strip() for p in line.split(',')]
                        if len(parts) >= 3:
                            processes.append({
                                'pid': parts[0],
                                'name': parts[1],
                                'memory': parts[2]
                            })
                detailed_info['processes'] = processes
                print(f"[v0] NVIDIA GPU processes: {len(processes)} found")
            if data_retrieved:
                detailed_info['has_monitoring_tool'] = True
                print(f"[v0] NVIDIA GPU monitoring successful")
            else:
                detailed_info['has_monitoring_tool'] = False
                print(f"[v0] NVIDIA GPU monitoring failed - no data retrieved")
        except subprocess.TimeoutExpired:
            print(f"[v0] nvidia-smi timed out - marking tool as unavailable")
            detailed_info['has_monitoring_tool'] = False
        except Exception as e:
            print(f"[v0] Error getting NVIDIA GPU details: {e}")
            import traceback
            traceback.print_exc()
            detailed_info['has_monitoring_tool'] = False
    elif vendor == 'AMD':
        try:
            check_result = subprocess.run(['which', 'radeontop'], capture_output=True, timeout=1)
            if check_result.returncode != 0:
                detailed_info['has_monitoring_tool'] = False
                print(f"[v0] radeontop not found for AMD GPU")
                return detailed_info
            else:
                print(f"[v0] radeontop found for AMD GPU")
                detailed_info['has_monitoring_tool'] = True
        except Exception as e:
            print(f"[v0] Error checking for radeontop: {e}")
            detailed_info['has_monitoring_tool'] = False
            return detailed_info
    # NVIDIA GPU monitoring with nvidia-smi
    elif 'nvidia' in vendor:
        print(f"[v0] NVIDIA GPU detected, checking for nvidia-smi...", flush=True)
        if shutil.which('nvidia-smi'):
            print(f"[v0] nvidia-smi found, executing...", flush=True)
            try:
                # Basic GPU stats
                query_gpu = 'index,name,memory.total,memory.used,memory.free,temperature.gpu,power.draw,power.limit,utilization.gpu,utilization.memory,clocks.gr,clocks.mem,pcie.link.gen.current,pcie.link.width.current'
                cmd_gpu = ['nvidia-smi', f'--query-gpu={query_gpu}', '--format=csv,noheader,nounits']
                print(f"[v0] Executing command: {' '.join(cmd_gpu)}", flush=True)
                result_gpu = subprocess.run(cmd_gpu, capture_output=True, text=True, timeout=5)
                if result_gpu.returncode == 0 and result_gpu.stdout.strip():
                    output_lines = result_gpu.stdout.strip().split('\n')
                    # Assuming only one GPU, or taking the first one if multiple are returned
                    gpu_data_line = output_lines[0] 
                    parts = [p.strip() for p in gpu_data_line.split(',')]
                    if len(parts) >= 14: # Check if we have enough parts
                        try:
                            detailed_info['temperature'] = int(parts[5]) if parts[5].isdigit() else None
                            detailed_info['power_draw'] = float(parts[6]) if parts[6].replace('.', '', 1).isdigit() else None
                            detailed_info['power_limit'] = float(parts[7]) if parts[7].replace('.', '', 1).isdigit() else None
                            detailed_info['utilization_gpu'] = int(parts[8]) if parts[8].isdigit() else None
                            detailed_info['utilization_memory'] = int(parts[9]) if parts[9].isdigit() else None
                            detailed_info['clock_graphics'] = int(parts[10]) if parts[10].isdigit() else None
                            detailed_info['clock_memory'] = int(parts[11]) if parts[11].isdigit() else None
                            detailed_info['memory_total'] = int(parts[2]) if parts[2].isdigit() else None
                            detailed_info['memory_used'] = int(parts[3]) if parts[3].isdigit() else None
                            detailed_info['memory_free'] = int(parts[4]) if parts[4].isdigit() else None
                            print(f"[v0] NVIDIA GPU Basic Stats: Temp={detailed_info['temperature']}C, Power={detailed_info['power_draw']}W, Util={detailed_info['utilization_gpu']}%", flush=True)
                            detailed_info['has_monitoring_tool'] = True
                        except (ValueError, IndexError) as e:
                            print(f"[v0] Error parsing NVIDIA GPU stats: {e}", flush=True)
                            detailed_info['has_monitoring_tool'] = False
                # Compute processes using GPU
                query_apps = 'pid,process_name,used_memory'
                cmd_apps = ['nvidia-smi', f'--query-compute-apps={query_apps}', '--format=csv,noheader']
                print(f"[v0] Executing command: {' '.join(cmd_apps)}", flush=True)
                result_apps = subprocess.run(cmd_apps, capture_output=True, text=True, timeout=5)
                if result_apps.returncode == 0 and result_apps.stdout.strip():
                    processes = []
                    for line in result_apps.stdout.strip().split('\n'):
                        if line:
                            parts = [p.strip() for p in line.split(',')]
                            if len(parts) >= 3:
                                # Convert memory to MB
                                mem_str = parts[2]
                                mem_mb = 0
                                if 'MiB' in mem_str:
                                    try:
                                        mem_mb = int(float(mem_str.replace('MiB', '').strip()))
                                    except ValueError:
                                        pass
                                elif 'GiB' in mem_str:
                                    try:
                                        mem_mb = int(float(mem_str.replace('GiB', '').strip()) * 1024)
                                    except ValueError:
                                        pass
                                processes.append({
                                    'pid': parts[0],
                                    'name': parts[1],
                                    'memory_used_mb': mem_mb
                                })
                    detailed_info['processes'] = processes
                    print(f"[v0] Found {len(processes)} NVIDIA GPU processes", flush=True)
                if detailed_info['has_monitoring_tool']:
                    print(f"[v0] NVIDIA GPU monitoring successful", flush=True)
                else:
                    print(f"[v0] NVIDIA GPU monitoring failed - essential data not retrieved", flush=True)
            except subprocess.TimeoutExpired:
                print(f"[v0] nvidia-smi timed out - marking tool as unavailable", flush=True)
            except Exception as e:
                print(f"[v0] Error running nvidia-smi: {e}", flush=True)
                import traceback
                traceback.print_exc()
        else:
            print(f"[v0] nvidia-smi not found in PATH", flush=True)
    # AMD GPU monitoring (placeholder, requires radeontop or similar)
    elif 'amd' in vendor:
        print(f"[v0] AMD GPU detected. Monitoring tools like radeontop are needed for detailed info.", flush=True)
        if shutil.which('radeontop'):
            print(f"[v0] radeontop found, but integration is not yet implemented.", flush=True)
        else:
            print(f"[v0] radeontop not found in PATH.", flush=True)
        # Placeholder: return basic info if available from lspci or sensors
        # No detailed monitoring implemented yet for AMD
    else:
        print(f"[v0] Unsupported GPU vendor: {vendor}", flush=True)
    print(f"[v0] ===== Exiting get_detailed_gpu_info for GPU {slot} =====", flush=True)
    return detailed_info
 def get_pci_device_info(pci_slot):
    """Get detailed PCI device information for a given slot"""
    pci_info = {}
@@ -2144,8 +2082,8 @@ def get_gpu_info():
                            gpu['pci_driver'] = pci_info.get('driver', '')
                            gpu['pci_kernel_module'] = pci_info.get('kernel_module', '')
-                        # detailed_info = get_detailed_gpu_info(gpu)
+                        # detailed_info = get_detailed_gpu_info(gpu) # Removed this call here
-                        # gpu.update(detailed_info)
+                        # gpu.update(detailed_info)             # It will be called later in api_gpu_realtime
                        gpus.append(gpu)
                        print(f"[v0] Found GPU: {gpu_name} ({vendor}) at slot {slot}")
@@ -2859,7 +2797,7 @@ def api_hardware():
            'motherboard': hardware_info.get('motherboard', {}),
            'bios': hardware_info.get('motherboard', {}).get('bios', {}), # Extract BIOS info
            'memory_modules': hardware_info.get('memory_modules', []),
-            'storage_devices': hardware_info.get('storage_devices', []), # Fixed: use hardware_data
+            'storage_devices': hardware_info.get('storage_devices', []), # Fixed: use hardware_info
            'pci_devices': hardware_info.get('pci_devices', []),
            'temperatures': hardware_info.get('sensors', {}).get('temperatures', []),
            'fans': all_fans, # Return combined fans (sensors + IPMI)
@@ -2895,15 +2833,16 @@ def api_gpu_realtime(slot):
        gpu = None
        for g in gpus:
-            if g.get('slot') == slot or g.get('slot', '').startswith(slot):
+            # Match by slot or if the slot is a substring of the GPU's slot (e.g., '00:01.0' matching '00:01')
            if g.get('slot') == slot or slot in g.get('slot', ''):
                gpu = g
                break
        if not gpu:
-            print(f"[v0] GPU with slot {slot} not found")
+            print(f"[v0] GPU with slot matching '{slot}' not found")
            return jsonify({'error': 'GPU not found'}), 404
-        print(f"[v0] Getting detailed monitoring data for GPU {slot}...")
+        print(f"[v0] Getting detailed monitoring data for GPU at slot {gpu.get('slot')}...")
        detailed_info = get_detailed_gpu_info(gpu)
        gpu.update(detailed_info)