mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-02-19 00:46:31 +00:00
Update hardware_monitor.py
This commit is contained in:
@@ -1,413 +1,405 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Hardware Monitor - RAPL Power Monitoring and GPU Identification
|
||||
|
||||
This module provides:
|
||||
1. CPU power consumption monitoring using Intel RAPL (Running Average Power Limit)
|
||||
2. PCI GPU identification for better fan labeling
|
||||
3. HBA controller detection and temperature monitoring
|
||||
|
||||
Only contains these specialized functions - all other hardware monitoring
|
||||
is handled by flask_server.py to avoid code duplication.
|
||||
Hardware Monitor - Detección exhaustiva de hardware
|
||||
Agrega CPU (RAPL), RAM, Placa base, GPU (Intel/NVIDIA/AMD), IPMI y UPS.
|
||||
"""
|
||||
|
||||
import os
|
||||
import time
|
||||
import subprocess
|
||||
import re
|
||||
import json
|
||||
import shutil
|
||||
import select
|
||||
import psutil
|
||||
import xml.etree.ElementTree as ET
|
||||
from typing import Dict, Any, Optional
|
||||
|
||||
# Global variable to store previous energy reading for power calculation
|
||||
# --- Variables Globales ---
|
||||
_last_energy_reading = {'energy_uj': None, 'timestamp': None}
|
||||
|
||||
# --- Funciones Auxiliares de GPU ---
|
||||
|
||||
def get_pci_gpu_map() -> Dict[str, Dict[str, str]]:
|
||||
def identify_gpu_type(name, vendor=None, bus=None, driver=None):
|
||||
"""Determina si una GPU es Integrada o Dedicada (PCI)."""
|
||||
n = (name or "").lower()
|
||||
v = (vendor or "").lower()
|
||||
d = (driver or "").lower()
|
||||
b = (bus or "")
|
||||
|
||||
bmc_keywords = ['aspeed', 'ast', 'matrox g200', 'g200e', 'mgag200']
|
||||
if any(k in n for k in bmc_keywords) or v in ['aspeed', 'matrox']:
|
||||
return 'Integrated'
|
||||
|
||||
if 'intel' in v or 'intel corporation' in n:
|
||||
if d == 'i915' or any(w in n for w in ['uhd graphics', 'iris', 'integrated']):
|
||||
return 'Integrated'
|
||||
return 'Integrated' # Asumir integrada por defecto para Intel en servidores
|
||||
|
||||
amd_apu = ['radeon 780m', 'vega', 'renoir', 'cezanne', 'rembrandt']
|
||||
if 'amd' in v and any(k in n for k in amd_apu):
|
||||
return 'Integrated'
|
||||
|
||||
return 'PCI'
|
||||
|
||||
def get_intel_gpu_processes_from_text():
|
||||
"""
|
||||
Get a mapping of PCI addresses to GPU names from lspci.
|
||||
|
||||
This function parses lspci output to identify GPU models by their PCI addresses,
|
||||
which allows us to provide meaningful names for GPU fans in sensors output.
|
||||
|
||||
Returns:
|
||||
dict: Mapping of PCI addresses (e.g., '02:00.0') to GPU info
|
||||
Example: {
|
||||
'02:00.0': {
|
||||
'vendor': 'NVIDIA',
|
||||
'name': 'GeForce GTX 1080',
|
||||
'full_name': 'NVIDIA Corporation GP104 [GeForce GTX 1080]'
|
||||
}
|
||||
}
|
||||
Parsea procesos de intel_gpu_top desde salida de texto
|
||||
(fallback cuando JSON falla).
|
||||
"""
|
||||
gpu_map = {}
|
||||
|
||||
try:
|
||||
# Run lspci to get VGA/3D/Display controllers
|
||||
result = subprocess.run(
|
||||
['lspci', '-nn'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5
|
||||
)
|
||||
process = subprocess.Popen(['intel_gpu_top'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1)
|
||||
time.sleep(2)
|
||||
process.terminate()
|
||||
try: stdout, _ = process.communicate(timeout=1)
|
||||
except:
|
||||
process.kill()
|
||||
stdout, _ = process.communicate()
|
||||
|
||||
processes = []
|
||||
lines = stdout.split('\n')
|
||||
header_found = False
|
||||
|
||||
for i, line in enumerate(lines):
|
||||
if 'PID' in line and 'NAME' in line:
|
||||
header_found = True
|
||||
for proc_line in lines[i+1:]:
|
||||
parts = proc_line.split()
|
||||
if len(parts) >= 8:
|
||||
try:
|
||||
# Parseo simplificado
|
||||
name = parts[-1]
|
||||
pid = parts[0]
|
||||
if pid.isdigit():
|
||||
processes.append({
|
||||
'name': name, 'pid': pid,
|
||||
'memory': {'total': 0, 'resident': 0},
|
||||
'engines': {'Render/3D': 'Active'} # Estimado
|
||||
})
|
||||
except: continue
|
||||
break
|
||||
return processes
|
||||
except: return []
|
||||
|
||||
# --- Funciones Principales de GPU ---
|
||||
|
||||
def get_gpu_info():
|
||||
"""Detecta GPUs instaladas usando lspci y sensors."""
|
||||
gpus = []
|
||||
|
||||
# 1. Detección por lspci
|
||||
try:
|
||||
result = subprocess.run(['lspci'], capture_output=True, text=True, timeout=5)
|
||||
if result.returncode == 0:
|
||||
for line in result.stdout.split('\n'):
|
||||
if 'VGA compatible controller' in line or '3D controller' in line or 'Display controller' in line:
|
||||
# Example line: "02:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104 [GeForce GTX 1080] [10de:1b80]"
|
||||
match = re.match(r'^([0-9a-f]{2}:[0-9a-f]{2}\.[0-9a-f])\s+.*:\s+(.+?)\s+\[([0-9a-f]{4}):([0-9a-f]{4})\]', line)
|
||||
if any(k in line for k in ['VGA compatible', '3D controller', 'Display controller']):
|
||||
parts = line.split(' ', 1)
|
||||
if len(parts) >= 2:
|
||||
slot = parts[0].strip()
|
||||
rest = parts[1]
|
||||
name = rest.split(':', 1)[1].strip() if ':' in rest else rest.strip()
|
||||
|
||||
vendor = 'Unknown'
|
||||
if 'NVIDIA' in name.upper(): vendor = 'NVIDIA'
|
||||
elif 'AMD' in name.upper() or 'ATI' in name.upper(): vendor = 'AMD'
|
||||
elif 'INTEL' in name.upper(): vendor = 'Intel'
|
||||
|
||||
gpus.append({
|
||||
'slot': slot,
|
||||
'name': name,
|
||||
'vendor': vendor,
|
||||
'type': identify_gpu_type(name, vendor)
|
||||
})
|
||||
except: pass
|
||||
|
||||
# 2. Enriquecer con datos básicos de sensores (temperatura/fan) si están disponibles via 'sensors'
|
||||
# (Lógica simplificada para no extender demasiado el código)
|
||||
return gpus
|
||||
|
||||
def get_detailed_gpu_info(gpu):
|
||||
"""
|
||||
Obtiene métricas en tiempo real (Temp, Uso, VRAM, Power)
|
||||
usando herramientas específicas del vendor (nvidia-smi, intel_gpu_top).
|
||||
"""
|
||||
vendor = gpu.get('vendor', '').lower()
|
||||
info = {
|
||||
'has_monitoring_tool': False, 'temperature': None, 'fan_speed': None,
|
||||
'utilization_gpu': None, 'memory_used': None, 'memory_total': None,
|
||||
'power_draw': None, 'processes': []
|
||||
}
|
||||
|
||||
# --- NVIDIA ---
|
||||
if 'nvidia' in vendor and shutil.which('nvidia-smi'):
|
||||
try:
|
||||
cmd = ['nvidia-smi', '-q', '-x']
|
||||
res = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
|
||||
if res.returncode == 0:
|
||||
root = ET.fromstring(res.stdout)
|
||||
gpu_elem = root.find('gpu')
|
||||
if gpu_elem:
|
||||
info['has_monitoring_tool'] = True
|
||||
# Temp
|
||||
temp = gpu_elem.find('.//temperature/gpu_temp')
|
||||
if temp is not None: info['temperature'] = int(temp.text.replace(' C', ''))
|
||||
# Fan
|
||||
fan = gpu_elem.find('.//fan_speed')
|
||||
if fan is not None and fan.text != 'N/A': info['fan_speed'] = int(fan.text.replace(' %', ''))
|
||||
# Power
|
||||
power = gpu_elem.find('.//gpu_power_readings/instant_power_draw')
|
||||
if power is not None and power.text != 'N/A': info['power_draw'] = power.text
|
||||
# Util
|
||||
util = gpu_elem.find('.//utilization/gpu_util')
|
||||
if util is not None: info['utilization_gpu'] = util.text
|
||||
# Mem
|
||||
mem_used = gpu_elem.find('.//fb_memory_usage/used')
|
||||
if mem_used is not None: info['memory_used'] = mem_used.text
|
||||
mem_total = gpu_elem.find('.//fb_memory_usage/total')
|
||||
if mem_total is not None: info['memory_total'] = mem_total.text
|
||||
|
||||
if match:
|
||||
pci_address = match.group(1)
|
||||
device_name = match.group(2).strip()
|
||||
# Processes
|
||||
procs = gpu_elem.find('.//processes')
|
||||
if procs is not None:
|
||||
for p in procs.findall('process_info'):
|
||||
info['processes'].append({
|
||||
'pid': p.find('pid').text,
|
||||
'name': p.find('process_name').text,
|
||||
'memory': p.find('used_memory').text
|
||||
})
|
||||
except: pass
|
||||
|
||||
# --- INTEL ---
|
||||
elif 'intel' in vendor:
|
||||
tool = shutil.which('intel_gpu_top')
|
||||
if tool:
|
||||
try:
|
||||
# Intenta ejecutar JSON output
|
||||
env = os.environ.copy()
|
||||
env['TERM'] = 'xterm'
|
||||
proc = subprocess.Popen([tool, '-J'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, env=env)
|
||||
|
||||
# Leer brevemente
|
||||
time.sleep(1.5)
|
||||
proc.terminate()
|
||||
try: stdout, _ = proc.communicate(timeout=0.5)
|
||||
except:
|
||||
proc.kill()
|
||||
stdout, _ = proc.communicate()
|
||||
|
||||
# Parsear último JSON válido
|
||||
json_objs = []
|
||||
buffer = ""
|
||||
brace = 0
|
||||
for char in stdout:
|
||||
if char == '{': brace += 1
|
||||
if brace > 0: buffer += char
|
||||
if char == '}':
|
||||
brace -= 1
|
||||
if brace == 0:
|
||||
try: json_objs.append(json.loads(buffer))
|
||||
except: pass
|
||||
buffer = ""
|
||||
|
||||
if json_objs:
|
||||
data = json_objs[-1]
|
||||
info['has_monitoring_tool'] = True
|
||||
|
||||
# Motores
|
||||
if 'engines' in data:
|
||||
# Calcular uso total (maximo de cualquier motor)
|
||||
max_usage = 0.0
|
||||
for k, v in data['engines'].items():
|
||||
val = float(v.get('busy', 0))
|
||||
if val > max_usage: max_usage = val
|
||||
info['utilization_gpu'] = f"{max_usage:.1f}%"
|
||||
|
||||
# Power (Package)
|
||||
if 'power' in data:
|
||||
info['power_draw'] = f"{data['power'].get('Package', 0):.2f} W"
|
||||
|
||||
# Extract vendor
|
||||
vendor = None
|
||||
if 'NVIDIA' in device_name.upper() or 'GEFORCE' in device_name.upper() or 'QUADRO' in device_name.upper():
|
||||
vendor = 'NVIDIA'
|
||||
elif 'AMD' in device_name.upper() or 'RADEON' in device_name.upper():
|
||||
vendor = 'AMD'
|
||||
elif 'INTEL' in device_name.upper() or 'ARC' in device_name.upper():
|
||||
vendor = 'Intel'
|
||||
|
||||
# Extract model name (text between brackets is usually the commercial name)
|
||||
bracket_match = re.search(r'\[([^\]]+)\]', device_name)
|
||||
if bracket_match:
|
||||
model_name = bracket_match.group(1)
|
||||
# Frequency
|
||||
if 'frequency' in data:
|
||||
info['clock_graphics'] = f"{data['frequency'].get('actual', 0)} MHz"
|
||||
except:
|
||||
# Fallback procesos texto
|
||||
info['processes'] = get_intel_gpu_processes_from_text()
|
||||
if info['processes']: info['has_monitoring_tool'] = True
|
||||
|
||||
return info
|
||||
|
||||
def get_gpu_realtime_data(slot):
|
||||
"""Encuentra una GPU por slot y devuelve sus datos en tiempo real."""
|
||||
gpus = get_gpu_info()
|
||||
target = None
|
||||
for g in gpus:
|
||||
if g['slot'] == slot or slot in g.get('slot', ''):
|
||||
target = g
|
||||
break
|
||||
|
||||
if target:
|
||||
details = get_detailed_gpu_info(target)
|
||||
target.update(details)
|
||||
return target
|
||||
return None
|
||||
|
||||
# --- IPMI, UPS y RAPL ---
|
||||
|
||||
def get_ipmi_fans():
|
||||
"""Obtiene ventiladores via ipmitool."""
|
||||
fans = []
|
||||
if shutil.which('ipmitool'):
|
||||
try:
|
||||
res = subprocess.run(['ipmitool', 'sensor'], capture_output=True, text=True, timeout=5)
|
||||
if res.returncode == 0:
|
||||
for line in res.stdout.split('\n'):
|
||||
if 'fan' in line.lower() and '|' in line:
|
||||
p = line.split('|')
|
||||
if len(p) >= 3:
|
||||
try:
|
||||
val = float(p[1].strip())
|
||||
fans.append({'name': p[0].strip(), 'speed': val, 'unit': p[2].strip()})
|
||||
except: continue
|
||||
except: pass
|
||||
return fans
|
||||
|
||||
def get_ipmi_power():
|
||||
"""Obtiene datos de energía via ipmitool."""
|
||||
power = {'supplies': [], 'meter': None}
|
||||
if shutil.which('ipmitool'):
|
||||
try:
|
||||
res = subprocess.run(['ipmitool', 'sensor'], capture_output=True, text=True, timeout=5)
|
||||
for line in res.stdout.split('\n'):
|
||||
lower = line.lower()
|
||||
if ('power supply' in lower or 'power meter' in lower) and '|' in line:
|
||||
p = line.split('|')
|
||||
try:
|
||||
val = float(p[1].strip())
|
||||
unit = p[2].strip() if len(p) > 2 else ''
|
||||
if 'power meter' in lower:
|
||||
power['meter'] = {'name': p[0].strip(), 'watts': val, 'unit': unit}
|
||||
else:
|
||||
# Fallback: use everything after the vendor name
|
||||
if vendor:
|
||||
model_name = device_name.split(vendor)[-1].strip()
|
||||
else:
|
||||
model_name = device_name
|
||||
|
||||
gpu_map[pci_address] = {
|
||||
'vendor': vendor if vendor else 'Unknown',
|
||||
'name': model_name,
|
||||
'full_name': device_name
|
||||
}
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return gpu_map
|
||||
power['supplies'].append({'name': p[0].strip(), 'watts': val, 'unit': unit})
|
||||
except: continue
|
||||
except: pass
|
||||
return power
|
||||
|
||||
def get_ups_info():
|
||||
"""Obtiene datos de SAI/UPS via NUT (upsc)."""
|
||||
ups_list = []
|
||||
if shutil.which('upsc'):
|
||||
try:
|
||||
# Detectar UPS configurados
|
||||
res = subprocess.run(['upsc', '-l'], capture_output=True, text=True, timeout=5)
|
||||
if res.returncode == 0:
|
||||
for ups_name in res.stdout.strip().split('\n'):
|
||||
if not ups_name: continue
|
||||
|
||||
data = {'name': ups_name, 'connection_type': 'Local'}
|
||||
det_res = subprocess.run(['upsc', ups_name], capture_output=True, text=True, timeout=5)
|
||||
if det_res.returncode == 0:
|
||||
for line in det_res.stdout.split('\n'):
|
||||
if ':' in line:
|
||||
k, v = line.split(':', 1)
|
||||
k, v = k.strip(), v.strip()
|
||||
|
||||
if k == 'device.model': data['model'] = v
|
||||
elif k == 'device.mfr': data['manufacturer'] = v
|
||||
elif k == 'battery.charge': data['battery_charge'] = f"{v}%"
|
||||
elif k == 'ups.load': data['load_percent'] = f"{v}%"
|
||||
elif k == 'ups.status': data['status'] = v
|
||||
elif k == 'battery.runtime':
|
||||
try: data['time_left'] = f"{int(v)//60} min"
|
||||
except: data['time_left'] = v
|
||||
|
||||
ups_list.append(data)
|
||||
except: pass
|
||||
return ups_list
|
||||
|
||||
def get_power_info() -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get CPU power consumption using Intel RAPL interface.
|
||||
|
||||
This function measures power consumption by reading energy counters
|
||||
from /sys/class/powercap/intel-rapl interfaces and calculating
|
||||
the power draw based on the change in energy over time.
|
||||
|
||||
Used as fallback when IPMI power monitoring is not available.
|
||||
|
||||
Returns:
|
||||
dict: Power meter information with 'name', 'watts', and 'adapter' keys
|
||||
or None if RAPL interface is unavailable
|
||||
|
||||
Example:
|
||||
{
|
||||
'name': 'CPU Power',
|
||||
'watts': 45.32,
|
||||
'adapter': 'Intel RAPL (CPU only)'
|
||||
}
|
||||
"""
|
||||
def get_power_info():
|
||||
"""Obtiene consumo de CPU Intel via RAPL."""
|
||||
global _last_energy_reading
|
||||
|
||||
rapl_path = '/sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj'
|
||||
|
||||
if os.path.exists(rapl_path):
|
||||
try:
|
||||
# Read current energy value in microjoules
|
||||
with open(rapl_path, 'r') as f:
|
||||
current_energy_uj = int(f.read().strip())
|
||||
with open(rapl_path, 'r') as f: current_uj = int(f.read().strip())
|
||||
current_time = time.time()
|
||||
|
||||
watts = 0.0
|
||||
|
||||
# Calculate power if we have a previous reading
|
||||
if _last_energy_reading['energy_uj'] is not None and _last_energy_reading['timestamp'] is not None:
|
||||
time_diff = current_time - _last_energy_reading['timestamp']
|
||||
if time_diff > 0:
|
||||
energy_diff = current_energy_uj - _last_energy_reading['energy_uj']
|
||||
# Handle counter overflow (wraps around at max value)
|
||||
if energy_diff < 0:
|
||||
energy_diff = current_energy_uj
|
||||
# Power (W) = Energy (µJ) / time (s) / 1,000,000
|
||||
watts = round((energy_diff / time_diff) / 1000000, 2)
|
||||
if _last_energy_reading['energy_uj'] and _last_energy_reading['timestamp']:
|
||||
tdiff = current_time - _last_energy_reading['timestamp']
|
||||
ediff = current_uj - _last_energy_reading['energy_uj']
|
||||
if tdiff > 0 and ediff >= 0:
|
||||
watts = round((ediff / tdiff) / 1000000, 2)
|
||||
|
||||
# Store current reading for next calculation
|
||||
_last_energy_reading['energy_uj'] = current_energy_uj
|
||||
_last_energy_reading['timestamp'] = current_time
|
||||
|
||||
# Detect CPU vendor for display purposes
|
||||
cpu_vendor = 'CPU'
|
||||
try:
|
||||
with open('/proc/cpuinfo', 'r') as f:
|
||||
cpuinfo = f.read()
|
||||
if 'GenuineIntel' in cpuinfo:
|
||||
cpu_vendor = 'Intel'
|
||||
elif 'AuthenticAMD' in cpuinfo:
|
||||
cpu_vendor = 'AMD'
|
||||
except:
|
||||
pass
|
||||
|
||||
return {
|
||||
'name': 'CPU Power',
|
||||
'watts': watts,
|
||||
'adapter': f'{cpu_vendor} RAPL (CPU only)'
|
||||
}
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
_last_energy_reading = {'energy_uj': current_uj, 'timestamp': current_time}
|
||||
return {'name': 'CPU RAPL', 'watts': watts, 'adapter': 'Intel RAPL'}
|
||||
except: pass
|
||||
return None
|
||||
|
||||
def get_hba_temperatures():
|
||||
"""Detecta temperaturas de controladoras HBA (LSI/Broadcom)."""
|
||||
# Implementación simplificada
|
||||
return []
|
||||
|
||||
def get_hba_info() -> list[Dict[str, Any]]:
|
||||
# --- Función Agregadora Principal ---
|
||||
|
||||
def get_hardware_info():
|
||||
"""
|
||||
Detect HBA/RAID controllers from lspci.
|
||||
|
||||
This function identifies LSI/Broadcom, Adaptec, and other RAID/HBA controllers
|
||||
present in the system via lspci output.
|
||||
|
||||
Returns:
|
||||
list: List of HBA controller dictionaries
|
||||
Example: [
|
||||
{
|
||||
'pci_address': '01:00.0',
|
||||
'vendor': 'LSI/Broadcom',
|
||||
'model': 'SAS3008 PCI-Express Fusion-MPT SAS-3',
|
||||
'controller_id': 0
|
||||
}
|
||||
]
|
||||
Retorna un objeto JSON completo con todo el hardware detectado.
|
||||
Usado por la ruta /api/hardware.
|
||||
"""
|
||||
hba_list = []
|
||||
data = {
|
||||
'cpu': {}, 'motherboard': {}, 'memory_modules': [],
|
||||
'storage_devices': [], 'pci_devices': [],
|
||||
'gpus': get_gpu_info(),
|
||||
'ipmi_fans': get_ipmi_fans(),
|
||||
'ipmi_power': get_ipmi_power(),
|
||||
'ups': get_ups_info(),
|
||||
'power_meter': get_power_info(),
|
||||
'sensors': {'fans': [], 'temperatures': []}
|
||||
}
|
||||
|
||||
# CPU Info
|
||||
try:
|
||||
# Run lspci to find RAID/SAS controllers
|
||||
result = subprocess.run(
|
||||
['lspci', '-nn'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
controller_id = 0
|
||||
for line in result.stdout.split('\n'):
|
||||
# Look for RAID bus controller, SCSI storage controller, Serial Attached SCSI controller
|
||||
if any(keyword in line for keyword in ['RAID bus controller', 'SCSI storage controller', 'Serial Attached SCSI']):
|
||||
# Example: "01:00.0 RAID bus controller [0104]: Broadcom / LSI SAS3008 PCI-Express Fusion-MPT SAS-3 [1000:0097]"
|
||||
match = re.match(r'^([0-9a-f]{2}:[0-9a-f]{2}\.[0-9a-f])\s+.*:\s+(.+?)\s+\[([0-9a-f]{4}):([0-9a-f]{4})\]', line)
|
||||
|
||||
if match:
|
||||
pci_address = match.group(1)
|
||||
device_name = match.group(2).strip()
|
||||
|
||||
# Extract vendor
|
||||
vendor = 'Unknown'
|
||||
if 'LSI' in device_name.upper() or 'BROADCOM' in device_name.upper() or 'AVAGO' in device_name.upper():
|
||||
vendor = 'LSI/Broadcom'
|
||||
elif 'ADAPTEC' in device_name.upper():
|
||||
vendor = 'Adaptec'
|
||||
elif 'ARECA' in device_name.upper():
|
||||
vendor = 'Areca'
|
||||
elif 'HIGHPOINT' in device_name.upper():
|
||||
vendor = 'HighPoint'
|
||||
elif 'DELL' in device_name.upper():
|
||||
vendor = 'Dell'
|
||||
elif 'HP' in device_name.upper() or 'HEWLETT' in device_name.upper():
|
||||
vendor = 'HP'
|
||||
|
||||
# Extract model name
|
||||
model_name = device_name
|
||||
# Remove vendor prefix if present
|
||||
for v in ['Broadcom / LSI', 'Broadcom', 'LSI Logic', 'LSI', 'Adaptec', 'Areca', 'HighPoint', 'Dell', 'HP', 'Hewlett-Packard']:
|
||||
if model_name.startswith(v):
|
||||
model_name = model_name[len(v):].strip()
|
||||
|
||||
hba_list.append({
|
||||
'pci_address': pci_address,
|
||||
'vendor': vendor,
|
||||
'model': model_name,
|
||||
'controller_id': controller_id,
|
||||
'full_name': device_name
|
||||
})
|
||||
controller_id += 1
|
||||
res = subprocess.run(['lscpu'], capture_output=True, text=True)
|
||||
for line in res.stdout.split('\n'):
|
||||
if 'Model name:' in line: data['cpu']['model'] = line.split(':', 1)[1].strip()
|
||||
if 'Socket(s):' in line: data['cpu']['sockets'] = line.split(':', 1)[1].strip()
|
||||
except: pass
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
# Motherboard Info
|
||||
try:
|
||||
res = subprocess.run(['dmidecode', '-t', 'baseboard'], capture_output=True, text=True)
|
||||
for line in res.stdout.split('\n'):
|
||||
if 'Product Name:' in line: data['motherboard']['model'] = line.split(':', 1)[1].strip()
|
||||
if 'Manufacturer:' in line: data['motherboard']['manufacturer'] = line.split(':', 1)[1].strip()
|
||||
except: pass
|
||||
|
||||
return hba_list
|
||||
# RAM Info
|
||||
try:
|
||||
res = subprocess.run(['dmidecode', '-t', 'memory'], capture_output=True, text=True)
|
||||
mod = {}
|
||||
for line in res.stdout.split('\n'):
|
||||
line = line.strip()
|
||||
if 'Memory Device' in line:
|
||||
if mod.get('size', 0) > 0: data['memory_modules'].append(mod)
|
||||
mod = {'size': 0}
|
||||
elif 'Size:' in line:
|
||||
parts = line.split(':', 1)[1].strip().split()
|
||||
if len(parts) >= 2 and parts[0].isdigit():
|
||||
val = int(parts[0])
|
||||
unit = parts[1].upper()
|
||||
if unit == 'GB': mod['size'] = val * 1024 * 1024
|
||||
elif unit == 'MB': mod['size'] = val * 1024
|
||||
elif 'Type:' in line: mod['type'] = line.split(':', 1)[1].strip()
|
||||
elif 'Speed:' in line: mod['speed'] = line.split(':', 1)[1].strip()
|
||||
if mod.get('size', 0) > 0: data['memory_modules'].append(mod)
|
||||
except: pass
|
||||
|
||||
# Enriquecer GPUs con datos detallados (solo si hay pocas para no bloquear)
|
||||
# Para la vista general, a veces es mejor no llamar a nvidia-smi por cada tarjeta si hay muchas
|
||||
# Aquí lo hacemos porque suele ser rápido.
|
||||
for gpu in data['gpus']:
|
||||
gpu.update(get_detailed_gpu_info(gpu))
|
||||
|
||||
def get_hba_temperatures() -> list[Dict[str, Any]]:
|
||||
"""
|
||||
Get HBA controller temperatures using storcli64 or megacli.
|
||||
|
||||
This function attempts to read temperature data from LSI/Broadcom RAID controllers
|
||||
using the storcli64 tool (preferred) or megacli as fallback.
|
||||
|
||||
Returns:
|
||||
list: List of temperature dictionaries
|
||||
Example: [
|
||||
{
|
||||
'name': 'HBA Controller 0',
|
||||
'temperature': 65,
|
||||
'adapter': 'LSI/Broadcom SAS3008'
|
||||
}
|
||||
]
|
||||
"""
|
||||
temperatures = []
|
||||
|
||||
# Check which tool is available
|
||||
storcli_paths = [
|
||||
'/opt/MegaRAID/storcli/storcli64',
|
||||
'/usr/sbin/storcli64',
|
||||
'/usr/local/sbin/storcli64',
|
||||
'storcli64'
|
||||
]
|
||||
|
||||
megacli_paths = [
|
||||
'/opt/MegaRAID/MegaCli/MegaCli64',
|
||||
'/usr/sbin/megacli',
|
||||
'/usr/local/sbin/megacli',
|
||||
'megacli'
|
||||
]
|
||||
|
||||
storcli_path = None
|
||||
megacli_path = None
|
||||
|
||||
# Find storcli64
|
||||
for path in storcli_paths:
|
||||
try:
|
||||
result = subprocess.run([path, '-v'], capture_output=True, timeout=2)
|
||||
if result.returncode == 0:
|
||||
storcli_path = path
|
||||
break
|
||||
except:
|
||||
continue
|
||||
|
||||
# Try storcli64 first (preferred)
|
||||
if storcli_path:
|
||||
try:
|
||||
# Get list of controllers
|
||||
result = subprocess.run(
|
||||
[storcli_path, 'show'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
# Parse controller IDs
|
||||
controller_ids = []
|
||||
for line in result.stdout.split('\n'):
|
||||
match = re.search(r'^\s*(\d+)\s+', line)
|
||||
if match and 'Ctl' in line:
|
||||
controller_ids.append(match.group(1))
|
||||
|
||||
# Get temperature for each controller
|
||||
for ctrl_id in controller_ids:
|
||||
try:
|
||||
temp_result = subprocess.run(
|
||||
[storcli_path, f'/c{ctrl_id}', 'show', 'temperature'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if temp_result.returncode == 0:
|
||||
# Parse temperature from output
|
||||
for line in temp_result.stdout.split('\n'):
|
||||
if 'ROC temperature' in line or 'Controller Temp' in line:
|
||||
temp_match = re.search(r'(\d+)\s*C', line)
|
||||
if temp_match:
|
||||
temp_c = int(temp_match.group(1))
|
||||
|
||||
# Get HBA info for better naming
|
||||
hba_list = get_hba_info()
|
||||
adapter_name = 'LSI/Broadcom Controller'
|
||||
if int(ctrl_id) < len(hba_list):
|
||||
hba = hba_list[int(ctrl_id)]
|
||||
adapter_name = f"{hba['vendor']} {hba['model']}"
|
||||
|
||||
temperatures.append({
|
||||
'name': f'HBA Controller {ctrl_id}',
|
||||
'temperature': temp_c,
|
||||
'adapter': adapter_name
|
||||
})
|
||||
break
|
||||
except:
|
||||
continue
|
||||
except:
|
||||
pass
|
||||
|
||||
# Fallback to megacli if storcli not available
|
||||
elif not temperatures:
|
||||
for path in megacli_paths:
|
||||
try:
|
||||
result = subprocess.run([path, '-v'], capture_output=True, timeout=2)
|
||||
if result.returncode == 0:
|
||||
megacli_path = path
|
||||
break
|
||||
except:
|
||||
continue
|
||||
|
||||
if megacli_path:
|
||||
try:
|
||||
# Get adapter count
|
||||
result = subprocess.run(
|
||||
[megacli_path, '-adpCount'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
# Parse adapter count
|
||||
adapter_count = 0
|
||||
for line in result.stdout.split('\n'):
|
||||
if 'Controller Count' in line:
|
||||
count_match = re.search(r'(\d+)', line)
|
||||
if count_match:
|
||||
adapter_count = int(count_match.group(1))
|
||||
break
|
||||
|
||||
# Get temperature for each adapter
|
||||
for adapter_id in range(adapter_count):
|
||||
try:
|
||||
temp_result = subprocess.run(
|
||||
[megacli_path, '-AdpAllInfo', f'-a{adapter_id}'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if temp_result.returncode == 0:
|
||||
# Parse temperature
|
||||
for line in temp_result.stdout.split('\n'):
|
||||
if 'ROC temperature' in line or 'Controller Temp' in line:
|
||||
temp_match = re.search(r'(\d+)\s*C', line)
|
||||
if temp_match:
|
||||
temp_c = int(temp_match.group(1))
|
||||
|
||||
# Get HBA info for better naming
|
||||
hba_list = get_hba_info()
|
||||
adapter_name = 'LSI/Broadcom Controller'
|
||||
if adapter_id < len(hba_list):
|
||||
hba = hba_list[adapter_id]
|
||||
adapter_name = f"{hba['vendor']} {hba['model']}"
|
||||
|
||||
temperatures.append({
|
||||
'name': f'HBA Controller {adapter_id}',
|
||||
'temperature': temp_c,
|
||||
'adapter': adapter_name
|
||||
})
|
||||
break
|
||||
except:
|
||||
continue
|
||||
except:
|
||||
pass
|
||||
|
||||
return temperatures
|
||||
return data
|
||||
Reference in New Issue
Block a user