Update backend monitor

This commit is contained in:
MacRimi
2026-01-29 18:27:36 +01:00
parent 1657a7dbe3
commit ca13d18d7d
13 changed files with 6714 additions and 2106 deletions

View File

@@ -89,16 +89,6 @@ cp "$SCRIPT_DIR/flask_terminal_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || ech
cp "$SCRIPT_DIR/hardware_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ hardware_monitor.py not found" cp "$SCRIPT_DIR/hardware_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ hardware_monitor.py not found"
cp "$SCRIPT_DIR/proxmox_storage_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ proxmox_storage_monitor.py not found" cp "$SCRIPT_DIR/proxmox_storage_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ proxmox_storage_monitor.py not found"
cp "$SCRIPT_DIR/flask_script_runner.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_script_runner.py not found" cp "$SCRIPT_DIR/flask_script_runner.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_script_runner.py not found"
cp "$SCRIPT_DIR/system_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ system_monitor.py not found"
cp "$SCRIPT_DIR/flask_system_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_system_routes.py not found"
cp "$SCRIPT_DIR/storage_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ storage_monitor.py not found"
cp "$SCRIPT_DIR/flask_storage_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_storage_routes.py not found"
cp "$SCRIPT_DIR/network_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ network_monitor.py not found"
cp "$SCRIPT_DIR/flask_network_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_network_routes.py not found"
cp "$SCRIPT_DIR/vm_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ vm_monitor.py not found"
cp "$SCRIPT_DIR/flask_vm_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_vm_routes.py not found"
cp "$SCRIPT_DIR/flask_hardware_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_hardware_routes.py not found"
cp "$SCRIPT_DIR/flask_script_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_script_routes.py not found"
echo "📋 Adding translation support..." echo "📋 Adding translation support..."
cat > "$APP_DIR/usr/bin/translate_cli.py" << 'PYEOF' cat > "$APP_DIR/usr/bin/translate_cli.py" << 'PYEOF'

View File

@@ -1,37 +0,0 @@
from flask import Blueprint, jsonify
from jwt_middleware import require_auth
import hardware_monitor
# Definimos el Blueprint
hardware_bp = Blueprint('hardware', __name__)
@hardware_bp.route('/api/hardware', methods=['GET'])
@require_auth
def api_hardware():
"""
Obtiene información completa y agregada de todo el hardware.
Incluye CPU, Placa Base, RAM, Discos, GPUs, IPMI y UPS.
"""
try:
data = hardware_monitor.get_hardware_info()
return jsonify(data)
except Exception as e:
# En caso de error crítico, devolvemos un 500 pero intentamos ser descriptivos
return jsonify({'error': str(e)}), 500
@hardware_bp.route('/api/gpu/<slot>/realtime', methods=['GET'])
@require_auth
def api_gpu_realtime(slot):
"""
Obtiene métricas en tiempo real (uso, temperatura, memoria) para una GPU específica.
El 'slot' es la dirección PCI (ej: '01:00.0').
"""
try:
data = hardware_monitor.get_gpu_realtime_data(slot)
if not data:
return jsonify({'error': 'GPU not found'}), 404
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500

View File

@@ -1,50 +0,0 @@
from flask import Blueprint, jsonify, request
from jwt_middleware import require_auth
import network_monitor
# Definimos el Blueprint para las rutas de red
network_bp = Blueprint('network', __name__)
@network_bp.route('/api/network', methods=['GET'])
@require_auth
def api_network():
"""
Obtiene información completa de todas las interfaces de red.
Incluye interfaces físicas, virtuales, puentes, bonds y tráfico actual.
"""
try:
data = network_monitor.get_network_info()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@network_bp.route('/api/network/summary', methods=['GET'])
@require_auth
def api_network_summary():
"""
Obtiene un resumen optimizado de la red.
Ideal para paneles de control donde no se requiere detalle profundo de cada configuración.
"""
try:
data = network_monitor.get_network_summary()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@network_bp.route('/api/network/<interface_name>/metrics', methods=['GET'])
@require_auth
def api_network_interface_metrics(interface_name):
"""
Obtiene métricas históricas (RRD) para una interfaz específica.
Soporta diferentes periodos de tiempo (hour, day, week, month, year).
"""
try:
timeframe = request.args.get('timeframe', 'day')
# Validar timeframe básico para evitar errores en pvesh
if timeframe not in ['hour', 'day', 'week', 'month', 'year']:
return jsonify({'error': 'Invalid timeframe'}), 400
data = network_monitor.get_interface_metrics(interface_name, timeframe)
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500

View File

@@ -1,91 +0,0 @@
from flask import Blueprint, jsonify, request, Response
from flask_script_runner import script_runner
import threading
import os
# Definimos el Blueprint
script_bp = Blueprint('script', __name__)
@script_bp.route('/api/scripts/execute', methods=['POST'])
def execute_script():
"""
Ejecuta un script de bash con logs en tiempo real.
Valida que el script esté dentro del directorio permitido.
"""
try:
data = request.json
script_name = data.get('script_name')
script_params = data.get('params', {})
script_relative_path = data.get('script_relative_path')
if not script_relative_path:
return jsonify({'error': 'script_relative_path is required'}), 400
# Directorio base seguro
BASE_SCRIPTS_DIR = '/usr/local/share/proxmenux/scripts'
script_path = os.path.join(BASE_SCRIPTS_DIR, script_relative_path)
# Validación de seguridad básica (evitar path traversal)
script_path = os.path.abspath(script_path)
if not script_path.startswith(BASE_SCRIPTS_DIR):
return jsonify({'error': 'Invalid script path'}), 403
if not os.path.exists(script_path):
return jsonify({'success': False, 'error': 'Script file not found'}), 404
# Crear sesión y ejecutar en hilo separado
session_id = script_runner.create_session(script_name)
def run_script():
script_runner.execute_script(script_path, session_id, script_params)
thread = threading.Thread(target=run_script, daemon=True)
thread.start()
return jsonify({
'success': True,
'session_id': session_id
})
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@script_bp.route('/api/scripts/status/<session_id>', methods=['GET'])
def get_script_status(session_id):
"""Obtiene el estado actual de una sesión de script."""
try:
status = script_runner.get_session_status(session_id)
return jsonify(status)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@script_bp.route('/api/scripts/respond', methods=['POST'])
def respond_to_script():
"""
Envía una respuesta (input de usuario) a un script interactivo
que está esperando datos.
"""
try:
data = request.json
session_id = data.get('session_id')
interaction_id = data.get('interaction_id')
value = data.get('value')
result = script_runner.respond_to_interaction(session_id, interaction_id, value)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@script_bp.route('/api/scripts/logs/<session_id>', methods=['GET'])
def stream_script_logs(session_id):
"""
Transmite los logs del script en tiempo real usando Server-Sent Events (SSE).
"""
try:
def generate():
for log_entry in script_runner.stream_logs(session_id):
yield f"data: {log_entry}\n\n"
return Response(generate(), mimetype='text/event-stream')
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500

File diff suppressed because it is too large Load Diff

View File

@@ -1,41 +0,0 @@
from flask import Blueprint, jsonify
from jwt_middleware import require_auth
import storage_monitor
storage_bp = Blueprint('storage', __name__)
@storage_bp.route('/api/storage', methods=['GET'])
@require_auth
def api_storage():
try:
data = storage_monitor.get_storage_info()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@storage_bp.route('/api/storage/summary', methods=['GET'])
@require_auth
def api_storage_summary():
try:
data = storage_monitor.get_storage_summary()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@storage_bp.route('/api/proxmox-storage', methods=['GET'])
@require_auth
def api_proxmox_storage():
try:
data = storage_monitor.get_proxmox_storage()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@storage_bp.route('/api/backups', methods=['GET'])
@require_auth
def api_backups():
try:
data = storage_monitor.get_backups()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500

View File

@@ -1,98 +0,0 @@
from flask import Blueprint, jsonify, request, send_file
from jwt_middleware import require_auth
import system_monitor
import os
system_bp = Blueprint('system', __name__)
@system_bp.route('/api/system', methods=['GET'])
@require_auth
def api_system():
try:
data = system_monitor.get_system_info()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@system_bp.route('/api/logs', methods=['GET'])
@require_auth
def api_logs():
try:
limit = request.args.get('limit', '200')
priority = request.args.get('priority')
service = request.args.get('service')
since_days = request.args.get('since_days')
data = system_monitor.get_logs(limit, priority, service, since_days)
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@system_bp.route('/api/logs/download', methods=['GET'])
@require_auth
def api_logs_download():
try:
log_type = request.args.get('type', 'system')
hours = int(request.args.get('hours', '48'))
level = request.args.get('level', 'all')
service = request.args.get('service', 'all')
since_days = request.args.get('since_days', None)
file_path = system_monitor.generate_log_file(log_type, hours, level, service, since_days)
if file_path and os.path.exists(file_path):
return send_file(
file_path,
mimetype='text/plain',
as_attachment=True,
download_name=f'proxmox_{log_type}.log'
)
else:
return jsonify({'error': 'Failed to generate log file'}), 500
except Exception as e:
return jsonify({'error': str(e)}), 500
@system_bp.route('/api/events', methods=['GET'])
@require_auth
def api_events():
try:
limit = request.args.get('limit', '50')
data = system_monitor.get_events(limit)
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@system_bp.route('/api/notifications', methods=['GET'])
@require_auth
def api_notifications():
try:
data = system_monitor.get_notifications()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@system_bp.route('/api/notifications/download', methods=['GET'])
@require_auth
def api_notifications_download():
return jsonify({'error': 'Not implemented in modular version yet'}), 501
@system_bp.route('/api/node/metrics', methods=['GET'])
@require_auth
def api_node_metrics():
try:
timeframe = request.args.get('timeframe', 'week')
data = system_monitor.get_node_metrics(timeframe)
if 'error' in data:
return jsonify(data), 500
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@system_bp.route('/api/prometheus', methods=['GET'])
@require_auth
def api_prometheus():
try:
metrics, content_type = system_monitor.get_prometheus_metrics()
return metrics, 200, content_type
except Exception as e:
return f'# Error generating metrics: {str(e)}\n', 500, {'Content-Type': 'text/plain'}

View File

@@ -1,122 +0,0 @@
from flask import Blueprint, jsonify, request
from jwt_middleware import require_auth
import vm_monitor
# Definimos el Blueprint para las rutas de VM
vm_bp = Blueprint('vm', __name__)
@vm_bp.route('/api/vms', methods=['GET'])
@require_auth
def api_vms():
"""
Obtiene la lista de todas las máquinas virtuales y contenedores LXC.
"""
try:
data = vm_monitor.get_proxmox_vms()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@vm_bp.route('/api/vms/<int:vmid>', methods=['GET'])
@require_auth
def get_vm_config(vmid):
"""
Obtiene la configuración detallada de una VM específica.
Incluye hardware, estado y datos de red.
"""
try:
data = vm_monitor.get_vm_config(vmid)
if not data:
return jsonify({'error': 'VM/LXC not found'}), 404
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@vm_bp.route('/api/vms/<int:vmid>/control', methods=['POST'])
@require_auth
def api_vm_control(vmid):
"""
Controla el estado de una VM (start, stop, shutdown, reboot).
"""
try:
data = request.get_json()
action = data.get('action')
result = vm_monitor.control_vm(vmid, action)
if result.get('success'):
return jsonify(result)
else:
return jsonify(result), 500 if 'error' in result else 400
except Exception as e:
return jsonify({'error': str(e)}), 500
@vm_bp.route('/api/vms/<int:vmid>/config', methods=['PUT'])
@require_auth
def api_vm_config_update(vmid):
"""
Actualiza la configuración de una VM (por ejemplo, las notas/descripción).
"""
try:
data = request.get_json()
description = data.get('description', '')
result = vm_monitor.update_vm_config(vmid, description)
if result.get('success'):
return jsonify(result)
else:
return jsonify(result), 500
except Exception as e:
return jsonify({'error': str(e)}), 500
@vm_bp.route('/api/vms/<int:vmid>/metrics', methods=['GET'])
@require_auth
def api_vm_metrics(vmid):
"""
Obtiene métricas históricas (RRD) de CPU, Memoria y Red para una VM.
"""
try:
timeframe = request.args.get('timeframe', 'week')
if timeframe not in ['hour', 'day', 'week', 'month', 'year']:
return jsonify({'error': 'Invalid timeframe'}), 400
data = vm_monitor.get_vm_metrics(vmid, timeframe)
if 'error' in data:
return jsonify(data), 500 if 'Failed' in data['error'] else 404
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@vm_bp.route('/api/vms/<int:vmid>/logs', methods=['GET'])
@require_auth
def api_vm_logs(vmid):
"""
Obtiene los logs internos (consola/serial) de la VM/LXC.
"""
try:
data = vm_monitor.get_vm_logs(vmid)
if 'error' in data:
return jsonify(data), 404
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@vm_bp.route('/api/task-log/<path:upid>', methods=['GET'])
@require_auth
def get_task_log(upid):
"""
Obtiene el log completo de una tarea de Proxmox (ej. un backup o inicio de VM).
El UPID es el identificador único de la tarea.
"""
try:
log_text = vm_monitor.get_task_log(upid)
if log_text.startswith("Error") or log_text.startswith("Log file not found"):
return jsonify({'error': log_text}), 404
return log_text, 200, {'Content-Type': 'text/plain; charset=utf-8'}
except Exception as e:
return jsonify({'error': str(e)}), 500

View File

@@ -1,456 +1,413 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
""" """
Hardware Monitor - Detección exhaustiva de hardware Hardware Monitor - RAPL Power Monitoring and GPU Identification
Fusiona:
1. Consumo de CPU (RAPL) This module provides:
2. Detección de GPU (Intel/NVIDIA/AMD) y métricas detalladas 1. CPU power consumption monitoring using Intel RAPL (Running Average Power Limit)
3. Controladoras HBA/RAID y sus temperaturas 2. PCI GPU identification for better fan labeling
4. Sensores IPMI (Ventiladores/Energía) y UPS (NUT) 3. HBA controller detection and temperature monitoring
5. Información base (CPU, RAM, Placa base)
Only contains these specialized functions - all other hardware monitoring
is handled by flask_server.py to avoid code duplication.
""" """
import os import os
import time import time
import subprocess import subprocess
import re import re
import json
import shutil
import select
import psutil
import xml.etree.ElementTree as ET
from typing import Dict, Any, Optional from typing import Dict, Any, Optional
# --- Variables Globales --- # Global variable to store previous energy reading for power calculation
_last_energy_reading = {'energy_uj': None, 'timestamp': None} _last_energy_reading = {'energy_uj': None, 'timestamp': None}
# --- Funciones Auxiliares de GPU ---
def identify_gpu_type(name, vendor=None, bus=None, driver=None):
"""Determina si una GPU es Integrada o Dedicada (PCI)."""
n = (name or "").lower()
v = (vendor or "").lower()
d = (driver or "").lower()
bmc_keywords = ['aspeed', 'ast', 'matrox g200', 'g200e', 'mgag200']
if any(k in n for k in bmc_keywords) or v in ['aspeed', 'matrox']:
return 'Integrated'
if 'intel' in v or 'intel corporation' in n:
if d == 'i915' or any(w in n for w in ['uhd graphics', 'iris', 'integrated']):
return 'Integrated'
return 'Integrated' # Asumir integrada por defecto para Intel en servidores
amd_apu = ['radeon 780m', 'vega', 'renoir', 'cezanne', 'rembrandt']
if 'amd' in v and any(k in n for k in amd_apu):
return 'Integrated'
return 'PCI'
def get_intel_gpu_processes_from_text():
"""
Parsea procesos de intel_gpu_top desde salida de texto
(fallback cuando JSON falla).
"""
try:
process = subprocess.Popen(['intel_gpu_top'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, bufsize=1)
time.sleep(2)
process.terminate()
try: stdout, _ = process.communicate(timeout=1)
except:
process.kill()
stdout, _ = process.communicate()
processes = []
lines = stdout.split('\n')
header_found = False
for i, line in enumerate(lines):
if 'PID' in line and 'NAME' in line:
header_found = True
for proc_line in lines[i+1:]:
parts = proc_line.split()
if len(parts) >= 8:
try:
# Parseo simplificado
name = parts[-1]
pid = parts[0]
if pid.isdigit():
processes.append({
'name': name, 'pid': pid,
'memory': {'total': 0, 'resident': 0},
'engines': {'Render/3D': 'Active'} # Estimado
})
except: continue
break
return processes
except: return []
# --- Funciones Principales de GPU ---
def get_pci_gpu_map() -> Dict[str, Dict[str, str]]: def get_pci_gpu_map() -> Dict[str, Dict[str, str]]:
""" """
Obtiene un mapa detallado de GPUs desde lspci. Get a mapping of PCI addresses to GPU names from lspci.
Útil para enriquecer datos con nombres completos de dispositivos.
This function parses lspci output to identify GPU models by their PCI addresses,
which allows us to provide meaningful names for GPU fans in sensors output.
Returns:
dict: Mapping of PCI addresses (e.g., '02:00.0') to GPU info
Example: {
'02:00.0': {
'vendor': 'NVIDIA',
'name': 'GeForce GTX 1080',
'full_name': 'NVIDIA Corporation GP104 [GeForce GTX 1080]'
}
}
""" """
gpu_map = {} gpu_map = {}
try: try:
result = subprocess.run(['lspci', '-nn'], capture_output=True, text=True, timeout=5) # Run lspci to get VGA/3D/Display controllers
result = subprocess.run(
['lspci', '-nn'],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0: if result.returncode == 0:
for line in result.stdout.split('\n'): for line in result.stdout.split('\n'):
if any(k in line for k in ['VGA compatible', '3D controller', 'Display controller']): if 'VGA compatible controller' in line or '3D controller' in line or 'Display controller' in line:
# Example line: "02:00.0 VGA compatible controller [0300]: NVIDIA Corporation GP104 [GeForce GTX 1080] [10de:1b80]"
match = re.match(r'^([0-9a-f]{2}:[0-9a-f]{2}\.[0-9a-f])\s+.*:\s+(.+?)\s+\[([0-9a-f]{4}):([0-9a-f]{4})\]', line) match = re.match(r'^([0-9a-f]{2}:[0-9a-f]{2}\.[0-9a-f])\s+.*:\s+(.+?)\s+\[([0-9a-f]{4}):([0-9a-f]{4})\]', line)
if match:
pci = match.group(1)
name = match.group(2).strip()
vendor = 'Unknown'
if 'NVIDIA' in name.upper(): vendor = 'NVIDIA'
elif 'AMD' in name.upper() or 'ATI' in name.upper(): vendor = 'AMD'
elif 'INTEL' in name.upper(): vendor = 'Intel'
gpu_map[pci] = {'vendor': vendor, 'name': name, 'full_name': line} if match:
except Exception: pass pci_address = match.group(1)
device_name = match.group(2).strip()
# Extract vendor
vendor = None
if 'NVIDIA' in device_name.upper() or 'GEFORCE' in device_name.upper() or 'QUADRO' in device_name.upper():
vendor = 'NVIDIA'
elif 'AMD' in device_name.upper() or 'RADEON' in device_name.upper():
vendor = 'AMD'
elif 'INTEL' in device_name.upper() or 'ARC' in device_name.upper():
vendor = 'Intel'
# Extract model name (text between brackets is usually the commercial name)
bracket_match = re.search(r'\[([^\]]+)\]', device_name)
if bracket_match:
model_name = bracket_match.group(1)
else:
# Fallback: use everything after the vendor name
if vendor:
model_name = device_name.split(vendor)[-1].strip()
else:
model_name = device_name
gpu_map[pci_address] = {
'vendor': vendor if vendor else 'Unknown',
'name': model_name,
'full_name': device_name
}
except Exception:
pass
return gpu_map return gpu_map
def get_gpu_info():
"""Detecta GPUs instaladas para la API."""
gpus = []
try:
res = subprocess.run(['lspci'], capture_output=True, text=True)
for line in res.stdout.split('\n'):
if any(x in line for x in ['VGA', '3D', 'Display']):
parts = line.split(' ', 1)
if len(parts) >= 2:
slot = parts[0]
rest = parts[1]
name = rest.split(':', 1)[1].strip() if ':' in rest else rest.strip()
vendor = 'Unknown'
if 'NVIDIA' in name.upper(): vendor = 'NVIDIA'
elif 'AMD' in name.upper(): vendor = 'AMD'
elif 'INTEL' in name.upper(): vendor = 'Intel'
gpus.append({
'slot': slot,
'name': name,
'vendor': vendor,
'type': identify_gpu_type(name, vendor)
})
except: pass
return gpus
def get_detailed_gpu_info(gpu):
"""
Obtiene métricas en tiempo real (Temp, Uso, VRAM, Power)
usando herramientas específicas del vendor (nvidia-smi, intel_gpu_top).
"""
vendor = gpu.get('vendor', '').lower()
info = {
'has_monitoring_tool': False, 'temperature': None, 'fan_speed': None,
'utilization_gpu': None, 'memory_used': None, 'memory_total': None,
'power_draw': None, 'processes': []
}
# --- NVIDIA ---
if 'nvidia' in vendor and shutil.which('nvidia-smi'):
try:
cmd = ['nvidia-smi', '-q', '-x']
res = subprocess.run(cmd, capture_output=True, text=True, timeout=5)
if res.returncode == 0:
root = ET.fromstring(res.stdout)
gpu_elem = root.find('gpu')
if gpu_elem:
info['has_monitoring_tool'] = True
temp = gpu_elem.find('.//temperature/gpu_temp')
if temp is not None: info['temperature'] = int(temp.text.replace(' C', ''))
fan = gpu_elem.find('.//fan_speed')
if fan is not None and fan.text != 'N/A': info['fan_speed'] = int(fan.text.replace(' %', ''))
power = gpu_elem.find('.//gpu_power_readings/instant_power_draw')
if power is not None and power.text != 'N/A': info['power_draw'] = power.text
util = gpu_elem.find('.//utilization/gpu_util')
if util is not None: info['utilization_gpu'] = util.text
mem_used = gpu_elem.find('.//fb_memory_usage/used')
if mem_used is not None: info['memory_used'] = mem_used.text
mem_total = gpu_elem.find('.//fb_memory_usage/total')
if mem_total is not None: info['memory_total'] = mem_total.text
procs = gpu_elem.find('.//processes')
if procs is not None:
for p in procs.findall('process_info'):
info['processes'].append({
'pid': p.find('pid').text,
'name': p.find('process_name').text,
'memory': p.find('used_memory').text
})
except: pass
# --- INTEL ---
elif 'intel' in vendor:
tool = shutil.which('intel_gpu_top')
if tool:
try:
# Intenta ejecutar JSON output
env = os.environ.copy()
env['TERM'] = 'xterm'
proc = subprocess.Popen([tool, '-J'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, env=env)
# Leer brevemente
time.sleep(1.5)
proc.terminate()
try: stdout, _ = proc.communicate(timeout=0.5)
except:
proc.kill()
stdout, _ = proc.communicate()
# Parsear último JSON válido
json_objs = []
buffer = ""
brace = 0
for char in stdout:
if char == '{': brace += 1
if brace > 0: buffer += char
if char == '}':
brace -= 1
if brace == 0:
try: json_objs.append(json.loads(buffer))
except: pass
buffer = ""
if json_objs:
data = json_objs[-1]
info['has_monitoring_tool'] = True
if 'engines' in data:
max_usage = 0.0
for k, v in data['engines'].items():
val = float(v.get('busy', 0))
if val > max_usage: max_usage = val
info['utilization_gpu'] = f"{max_usage:.1f}%"
if 'power' in data:
info['power_draw'] = f"{data['power'].get('Package', 0):.2f} W"
if 'frequency' in data:
info['clock_graphics'] = f"{data['frequency'].get('actual', 0)} MHz"
except:
# Fallback procesos texto
info['processes'] = get_intel_gpu_processes_from_text()
if info['processes']: info['has_monitoring_tool'] = True
return info
def get_gpu_realtime_data(slot):
"""Encuentra una GPU por slot y devuelve sus datos en tiempo real."""
gpus = get_gpu_info()
target = None
for g in gpus:
if g['slot'] == slot or slot in g.get('slot', ''):
target = g
break
if target:
details = get_detailed_gpu_info(target)
target.update(details)
return target
return None
# --- RAPL Power (CPU) ---
def get_power_info() -> Optional[Dict[str, Any]]: def get_power_info() -> Optional[Dict[str, Any]]:
"""Obtiene consumo de CPU Intel via RAPL.""" """
Get CPU power consumption using Intel RAPL interface.
This function measures power consumption by reading energy counters
from /sys/class/powercap/intel-rapl interfaces and calculating
the power draw based on the change in energy over time.
Used as fallback when IPMI power monitoring is not available.
Returns:
dict: Power meter information with 'name', 'watts', and 'adapter' keys
or None if RAPL interface is unavailable
Example:
{
'name': 'CPU Power',
'watts': 45.32,
'adapter': 'Intel RAPL (CPU only)'
}
"""
global _last_energy_reading global _last_energy_reading
rapl_path = '/sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj' rapl_path = '/sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj'
if os.path.exists(rapl_path): if os.path.exists(rapl_path):
try: try:
with open(rapl_path, 'r') as f: current_uj = int(f.read().strip()) # Read current energy value in microjoules
with open(rapl_path, 'r') as f:
current_energy_uj = int(f.read().strip())
current_time = time.time() current_time = time.time()
watts = 0.0 watts = 0.0
if _last_energy_reading['energy_uj'] and _last_energy_reading['timestamp']: # Calculate power if we have a previous reading
tdiff = current_time - _last_energy_reading['timestamp'] if _last_energy_reading['energy_uj'] is not None and _last_energy_reading['timestamp'] is not None:
ediff = current_uj - _last_energy_reading['energy_uj'] time_diff = current_time - _last_energy_reading['timestamp']
if tdiff > 0: if time_diff > 0:
if ediff < 0: ediff = current_uj # Overflow handling energy_diff = current_energy_uj - _last_energy_reading['energy_uj']
watts = round((ediff / tdiff) / 1000000, 2) # Handle counter overflow (wraps around at max value)
if energy_diff < 0:
energy_diff = current_energy_uj
# Power (W) = Energy (µJ) / time (s) / 1,000,000
watts = round((energy_diff / time_diff) / 1000000, 2)
_last_energy_reading = {'energy_uj': current_uj, 'timestamp': current_time} # Store current reading for next calculation
_last_energy_reading['energy_uj'] = current_energy_uj
_last_energy_reading['timestamp'] = current_time
# Detect CPU vendor for display purposes
cpu_vendor = 'CPU' cpu_vendor = 'CPU'
try: try:
with open('/proc/cpuinfo', 'r') as f: with open('/proc/cpuinfo', 'r') as f:
if 'GenuineIntel' in f.read(): cpu_vendor = 'Intel' cpuinfo = f.read()
else: cpu_vendor = 'AMD' if 'GenuineIntel' in cpuinfo:
except: pass cpu_vendor = 'Intel'
elif 'AuthenticAMD' in cpuinfo:
cpu_vendor = 'AMD'
except:
pass
return {
'name': 'CPU Power',
'watts': watts,
'adapter': f'{cpu_vendor} RAPL (CPU only)'
}
except Exception:
pass
return {'name': 'CPU Power', 'watts': watts, 'adapter': f'{cpu_vendor} RAPL'}
except: pass
return None return None
# --- HBA / RAID Logic ---
def get_hba_info() -> list[Dict[str, Any]]: def get_hba_info() -> list[Dict[str, Any]]:
"""Detecta controladoras HBA/RAID.""" """
Detect HBA/RAID controllers from lspci.
This function identifies LSI/Broadcom, Adaptec, and other RAID/HBA controllers
present in the system via lspci output.
Returns:
list: List of HBA controller dictionaries
Example: [
{
'pci_address': '01:00.0',
'vendor': 'LSI/Broadcom',
'model': 'SAS3008 PCI-Express Fusion-MPT SAS-3',
'controller_id': 0
}
]
"""
hba_list = [] hba_list = []
try: try:
result = subprocess.run(['lspci', '-nn'], capture_output=True, text=True, timeout=5) # Run lspci to find RAID/SAS controllers
result = subprocess.run(
['lspci', '-nn'],
capture_output=True,
text=True,
timeout=5
)
if result.returncode == 0: if result.returncode == 0:
controller_id = 0 controller_id = 0
for line in result.stdout.split('\n'): for line in result.stdout.split('\n'):
if any(k in line for k in ['RAID bus controller', 'SCSI storage controller', 'Serial Attached SCSI']): # Look for RAID bus controller, SCSI storage controller, Serial Attached SCSI controller
if any(keyword in line for keyword in ['RAID bus controller', 'SCSI storage controller', 'Serial Attached SCSI']):
# Example: "01:00.0 RAID bus controller [0104]: Broadcom / LSI SAS3008 PCI-Express Fusion-MPT SAS-3 [1000:0097]"
match = re.match(r'^([0-9a-f]{2}:[0-9a-f]{2}\.[0-9a-f])\s+.*:\s+(.+?)\s+\[([0-9a-f]{4}):([0-9a-f]{4})\]', line) match = re.match(r'^([0-9a-f]{2}:[0-9a-f]{2}\.[0-9a-f])\s+.*:\s+(.+?)\s+\[([0-9a-f]{4}):([0-9a-f]{4})\]', line)
if match:
pci = match.group(1)
name = match.group(2).strip()
vendor = 'Unknown'
if 'LSI' in name.upper() or 'BROADCOM' in name.upper() or 'AVAGO' in name.upper(): vendor = 'LSI/Broadcom'
elif 'ADAPTEC' in name.upper(): vendor = 'Adaptec'
elif 'HP' in name.upper(): vendor = 'HP'
elif 'DELL' in name.upper(): vendor = 'Dell'
model = name if match:
for v in ['Broadcom / LSI', 'Broadcom', 'LSI Logic', 'LSI']: pci_address = match.group(1)
if model.startswith(v): model = model[len(v):].strip() device_name = match.group(2).strip()
# Extract vendor
vendor = 'Unknown'
if 'LSI' in device_name.upper() or 'BROADCOM' in device_name.upper() or 'AVAGO' in device_name.upper():
vendor = 'LSI/Broadcom'
elif 'ADAPTEC' in device_name.upper():
vendor = 'Adaptec'
elif 'ARECA' in device_name.upper():
vendor = 'Areca'
elif 'HIGHPOINT' in device_name.upper():
vendor = 'HighPoint'
elif 'DELL' in device_name.upper():
vendor = 'Dell'
elif 'HP' in device_name.upper() or 'HEWLETT' in device_name.upper():
vendor = 'HP'
# Extract model name
model_name = device_name
# Remove vendor prefix if present
for v in ['Broadcom / LSI', 'Broadcom', 'LSI Logic', 'LSI', 'Adaptec', 'Areca', 'HighPoint', 'Dell', 'HP', 'Hewlett-Packard']:
if model_name.startswith(v):
model_name = model_name[len(v):].strip()
hba_list.append({ hba_list.append({
'pci_address': pci, 'vendor': vendor, 'model': model, 'pci_address': pci_address,
'controller_id': controller_id, 'full_name': name 'vendor': vendor,
'model': model_name,
'controller_id': controller_id,
'full_name': device_name
}) })
controller_id += 1 controller_id += 1
except: pass
except Exception:
pass
return hba_list return hba_list
def get_hba_temperatures() -> list[Dict[str, Any]]: def get_hba_temperatures() -> list[Dict[str, Any]]:
"""Obtiene temperaturas de HBA (storcli/megacli).""" """
Get HBA controller temperatures using storcli64 or megacli.
This function attempts to read temperature data from LSI/Broadcom RAID controllers
using the storcli64 tool (preferred) or megacli as fallback.
Returns:
list: List of temperature dictionaries
Example: [
{
'name': 'HBA Controller 0',
'temperature': 65,
'adapter': 'LSI/Broadcom SAS3008'
}
]
"""
temperatures = [] temperatures = []
storcli_paths = ['/usr/sbin/storcli64', '/opt/MegaRAID/storcli/storcli64', 'storcli64']
storcli = next((p for p in storcli_paths if shutil.which(p) or os.path.exists(p)), None)
if storcli: # Check which tool is available
storcli_paths = [
'/opt/MegaRAID/storcli/storcli64',
'/usr/sbin/storcli64',
'/usr/local/sbin/storcli64',
'storcli64'
]
megacli_paths = [
'/opt/MegaRAID/MegaCli/MegaCli64',
'/usr/sbin/megacli',
'/usr/local/sbin/megacli',
'megacli'
]
storcli_path = None
megacli_path = None
# Find storcli64
for path in storcli_paths:
try: try:
# Intenta leer el controlador 0 como ejemplo básico result = subprocess.run([path, '-v'], capture_output=True, timeout=2)
res = subprocess.run([storcli, '/c0', 'show', 'temperature'], capture_output=True, text=True, timeout=5) if result.returncode == 0:
for line in res.stdout.split('\n'): storcli_path = path
if 'ROC temperature' in line or 'Controller Temp' in line: break
match = re.search(r'(\d+)\s*C', line) except:
if match: continue
temperatures.append({
'name': 'HBA Controller 0',
'temperature': int(match.group(1)),
'adapter': 'LSI/Broadcom'
})
except: pass
return temperatures
# --- IPMI & UPS --- # Try storcli64 first (preferred)
if storcli_path:
def get_ipmi_fans():
"""Obtiene ventiladores via ipmitool."""
fans = []
if shutil.which('ipmitool'):
try: try:
res = subprocess.run(['ipmitool', 'sensor'], capture_output=True, text=True, timeout=5) # Get list of controllers
for line in res.stdout.split('\n'): result = subprocess.run(
if 'fan' in line.lower() and '|' in line: [storcli_path, 'show'],
p = line.split('|') capture_output=True,
try: fans.append({'name': p[0].strip(), 'speed': float(p[1].strip()), 'unit': p[2].strip()}) text=True,
except: continue timeout=10
except: pass )
return fans
def get_ipmi_power(): if result.returncode == 0:
"""Obtiene datos de energía IPMI.""" # Parse controller IDs
power = {'supplies': [], 'meter': None} controller_ids = []
if shutil.which('ipmitool'): for line in result.stdout.split('\n'):
try: match = re.search(r'^\s*(\d+)\s+', line)
res = subprocess.run(['ipmitool', 'sensor'], capture_output=True, text=True, timeout=5) if match and 'Ctl' in line:
for line in res.stdout.split('\n'): controller_ids.append(match.group(1))
lower = line.lower()
if ('power supply' in lower or 'power meter' in lower) and '|' in line: # Get temperature for each controller
p = line.split('|') for ctrl_id in controller_ids:
try: try:
val = float(p[1].strip()) temp_result = subprocess.run(
unit = p[2].strip() [storcli_path, f'/c{ctrl_id}', 'show', 'temperature'],
if 'power meter' in lower: capture_output=True,
power['meter'] = {'name': p[0].strip(), 'watts': val, 'unit': unit} text=True,
else: timeout=10
power['supplies'].append({'name': p[0].strip(), 'watts': val, 'unit': unit}) )
except: continue
except: pass
return power
def get_ups_info(): if temp_result.returncode == 0:
"""Obtiene datos de UPS via NUT.""" # Parse temperature from output
ups_list = [] for line in temp_result.stdout.split('\n'):
if shutil.which('upsc'): if 'ROC temperature' in line or 'Controller Temp' in line:
try: temp_match = re.search(r'(\d+)\s*C', line)
res = subprocess.run(['upsc', '-l'], capture_output=True, text=True, timeout=5) if temp_match:
for ups in res.stdout.strip().split('\n'): temp_c = int(temp_match.group(1))
if ups:
data = {'name': ups, 'connection_type': 'Local'}
d_res = subprocess.run(['upsc', ups], capture_output=True, text=True, timeout=5)
for line in d_res.stdout.split('\n'):
if ':' in line:
k, v = line.split(':', 1)
data[k.strip()] = v.strip()
ups_list.append(data)
except: pass
return ups_list
# --- Main Hardware Aggregator --- # Get HBA info for better naming
hba_list = get_hba_info()
adapter_name = 'LSI/Broadcom Controller'
if int(ctrl_id) < len(hba_list):
hba = hba_list[int(ctrl_id)]
adapter_name = f"{hba['vendor']} {hba['model']}"
def get_hardware_info(): temperatures.append({
"""Agrega toda la información de hardware para la API.""" 'name': f'HBA Controller {ctrl_id}',
data = { 'temperature': temp_c,
'cpu': {}, 'motherboard': {}, 'memory_modules': [], 'adapter': adapter_name
'storage_devices': [], 'pci_devices': [], })
'gpus': get_gpu_info(), break
'ipmi_fans': get_ipmi_fans(), except:
'ipmi_power': get_ipmi_power(), continue
'ups': get_ups_info(), except:
'power_meter': get_power_info(), pass
'hba': get_hba_info(),
'sensors': {'fans': [], 'temperatures': get_hba_temperatures()}
}
# CPU Info # Fallback to megacli if storcli not available
try: elif not temperatures:
res = subprocess.run(['lscpu'], capture_output=True, text=True) for path in megacli_paths:
for line in res.stdout.split('\n'): try:
if 'Model name:' in line: data['cpu']['model'] = line.split(':', 1)[1].strip() result = subprocess.run([path, '-v'], capture_output=True, timeout=2)
if 'Socket(s):' in line: data['cpu']['sockets'] = line.split(':', 1)[1].strip() if result.returncode == 0:
except: pass megacli_path = path
break
except:
continue
# Motherboard if megacli_path:
try: try:
res = subprocess.run(['dmidecode', '-t', 'baseboard'], capture_output=True, text=True) # Get adapter count
for line in res.stdout.split('\n'): result = subprocess.run(
if 'Product Name:' in line: data['motherboard']['model'] = line.split(':', 1)[1].strip() [megacli_path, '-adpCount'],
if 'Manufacturer:' in line: data['motherboard']['manufacturer'] = line.split(':', 1)[1].strip() capture_output=True,
except: pass text=True,
timeout=10
)
# RAM if result.returncode == 0:
try: # Parse adapter count
res = subprocess.run(['dmidecode', '-t', 'memory'], capture_output=True, text=True) adapter_count = 0
mod = {} for line in result.stdout.split('\n'):
for line in res.stdout.split('\n'): if 'Controller Count' in line:
line = line.strip() count_match = re.search(r'(\d+)', line)
if 'Memory Device' in line: if count_match:
if mod.get('size', 0) > 0: data['memory_modules'].append(mod) adapter_count = int(count_match.group(1))
mod = {'size': 0} break
elif 'Size:' in line:
parts = line.split(':', 1)[1].strip().split()
if len(parts) >= 2 and parts[0].isdigit():
val = int(parts[0])
unit = parts[1].upper()
if unit == 'GB': mod['size'] = val * 1024 * 1024
elif unit == 'MB': mod['size'] = val * 1024
elif 'Type:' in line: mod['type'] = line.split(':', 1)[1].strip()
elif 'Speed:' in line: mod['speed'] = line.split(':', 1)[1].strip()
if mod.get('size', 0) > 0: data['memory_modules'].append(mod)
except: pass
# Enrich GPUs with details # Get temperature for each adapter
for gpu in data['gpus']: for adapter_id in range(adapter_count):
gpu.update(get_detailed_gpu_info(gpu)) try:
temp_result = subprocess.run(
[megacli_path, '-AdpAllInfo', f'-a{adapter_id}'],
capture_output=True,
text=True,
timeout=10
)
return data if temp_result.returncode == 0:
# Parse temperature
for line in temp_result.stdout.split('\n'):
if 'ROC temperature' in line or 'Controller Temp' in line:
temp_match = re.search(r'(\d+)\s*C', line)
if temp_match:
temp_c = int(temp_match.group(1))
# Get HBA info for better naming
hba_list = get_hba_info()
adapter_name = 'LSI/Broadcom Controller'
if adapter_id < len(hba_list):
hba = hba_list[adapter_id]
adapter_name = f"{hba['vendor']} {hba['model']}"
temperatures.append({
'name': f'HBA Controller {adapter_id}',
'temperature': temp_c,
'adapter': adapter_name
})
break
except:
continue
except:
pass
return temperatures

View File

@@ -1,311 +0,0 @@
import os
import re
import json
import socket
import psutil
import subprocess
from system_monitor import get_proxmox_node_name
def extract_vmid_from_interface(interface_name):
"""
Extrae el ID de la VM del nombre de la interfaz.
Ejemplo: veth100i0 -> 100 (LXC), tap105i0 -> 105 (VM)
"""
try:
match = re.match(r'(veth|tap)(\d+)i\d+', interface_name)
if match:
vmid = int(match.group(2))
interface_type = 'lxc' if match.group(1) == 'veth' else 'vm'
return vmid, interface_type
return None, None
except Exception:
return None, None
def get_vm_lxc_names():
"""
Crea un mapa de VMIDs a nombres (ej: 100 -> 'Servidor-Web').
Ayuda a identificar qué interfaz pertenece a qué máquina.
"""
vm_lxc_map = {}
try:
local_node = get_proxmox_node_name()
# Consultamos pvesh para obtener la lista de VMs
result = subprocess.run(['pvesh', 'get', '/cluster/resources', '--type', 'vm', '--output-format', 'json'],
capture_output=True, text=True, timeout=10)
if result.returncode == 0:
resources = json.loads(result.stdout)
for resource in resources:
if resource.get('node') == local_node:
vmid = resource.get('vmid')
if vmid:
vm_lxc_map[vmid] = {
'name': resource.get('name', f'VM-{vmid}'),
'type': 'lxc' if resource.get('type') == 'lxc' else 'vm',
'status': resource.get('status', 'unknown')
}
except Exception:
pass
return vm_lxc_map
def get_interface_type(interface_name):
"""
Clasifica la interfaz de red en tipos manejables.
"""
if interface_name == 'lo': return 'skip'
if interface_name.startswith(('veth', 'tap')): return 'vm_lxc'
if interface_name.startswith(('tun', 'vnet', 'docker', 'virbr')): return 'skip'
if interface_name.startswith('bond'): return 'bond'
if interface_name.startswith(('vmbr', 'br')): return 'bridge'
if '.' in interface_name: return 'vlan'
# Verificar si es una interfaz física real
if os.path.exists(f'/sys/class/net/{interface_name}/device'): return 'physical'
# Fallback por nombre común
if interface_name.startswith(('enp', 'eth', 'eno', 'ens', 'enx', 'wlan', 'wlp', 'wlo', 'usb')): return 'physical'
return 'skip'
def get_bond_info(bond_name):
"""Obtiene detalles de una interfaz Bond (agregación de enlaces)."""
info = {'mode': 'unknown', 'slaves': [], 'active_slave': None}
try:
path = f'/proc/net/bonding/{bond_name}'
if os.path.exists(path):
with open(path, 'r') as f:
content = f.read()
for line in content.split('\n'):
if 'Bonding Mode:' in line: info['mode'] = line.split(':', 1)[1].strip()
elif 'Slave Interface:' in line: info['slaves'].append(line.split(':', 1)[1].strip())
elif 'Currently Active Slave:' in line: info['active_slave'] = line.split(':', 1)[1].strip()
except Exception: pass
return info
def get_bridge_info(bridge_name):
"""
Obtiene los miembros de un Bridge (puente).
Intenta identificar la interfaz física real detrás del puente.
"""
info = {'members': [], 'physical_interface': None, 'physical_duplex': 'unknown', 'bond_slaves': []}
try:
brif_path = f'/sys/class/net/{bridge_name}/brif'
if os.path.exists(brif_path):
members = os.listdir(brif_path)
info['members'] = members
for member in members:
# Si el puente usa un bond
if member.startswith('bond'):
info['physical_interface'] = member
bond_info = get_bond_info(member)
info['bond_slaves'] = bond_info['slaves']
if bond_info['active_slave']:
try:
stats = psutil.net_if_stats().get(bond_info['active_slave'])
if stats:
info['physical_duplex'] = 'full' if stats.duplex == 2 else 'half' if stats.duplex == 1 else 'unknown'
except: pass
break
# Si el puente usa una interfaz física directa
elif member.startswith(('enp', 'eth', 'eno', 'ens', 'wlan')):
info['physical_interface'] = member
try:
stats = psutil.net_if_stats().get(member)
if stats:
info['physical_duplex'] = 'full' if stats.duplex == 2 else 'half' if stats.duplex == 1 else 'unknown'
except: pass
break
except Exception: pass
return info
def get_network_info():
"""
Obtiene información completa y detallada de TODA la red.
"""
data = {
'interfaces': [], 'physical_interfaces': [], 'bridge_interfaces': [], 'vm_lxc_interfaces': [],
'traffic': {}, 'hostname': get_proxmox_node_name(), 'domain': None, 'dns_servers': []
}
# Leer configuración DNS
try:
with open('/etc/resolv.conf', 'r') as f:
for line in f:
if line.startswith('nameserver'): data['dns_servers'].append(line.split()[1])
elif line.startswith('domain'): data['domain'] = line.split()[1]
elif line.startswith('search') and not data['domain']:
parts = line.split()
if len(parts) > 1: data['domain'] = parts[1]
except: pass
vm_map = get_vm_lxc_names()
stats = psutil.net_if_stats()
addrs = psutil.net_if_addrs()
io_counters = psutil.net_io_counters(pernic=True)
# Contadores
counts = {'physical': {'active':0, 'total':0}, 'bridge': {'active':0, 'total':0}, 'vm': {'active':0, 'total':0}}
for name, stat in stats.items():
itype = get_interface_type(name)
if itype == 'skip': continue
info = {
'name': name, 'type': itype, 'status': 'up' if stat.isup else 'down',
'speed': stat.speed, 'mtu': stat.mtu,
'duplex': 'full' if stat.duplex == 2 else 'half' if stat.duplex == 1 else 'unknown',
'addresses': []
}
# IPs
if name in addrs:
for addr in addrs[name]:
if addr.family == socket.AF_INET: # IPv4
info['addresses'].append({'ip': addr.address, 'netmask': addr.netmask})
elif addr.family == 17: # MAC
info['mac_address'] = addr.address
# Tráfico
if name in io_counters:
io = io_counters[name]
# Si es VM, invertimos perspectiva (tx host = rx vm)
if itype == 'vm_lxc':
info.update({'bytes_sent': io.bytes_recv, 'bytes_recv': io.bytes_sent,
'packets_sent': io.packets_recv, 'packets_recv': io.packets_sent})
else:
info.update({'bytes_sent': io.bytes_sent, 'bytes_recv': io.bytes_recv,
'packets_sent': io.packets_sent, 'packets_recv': io.packets_recv})
info.update({'errors_in': io.errin, 'errors_out': io.errout,
'drops_in': io.dropin, 'drops_out': io.dropout})
# Clasificación
if itype == 'vm_lxc':
counts['vm']['total'] += 1
if stat.isup: counts['vm']['active'] += 1
vmid, _ = extract_vmid_from_interface(name)
if vmid and vmid in vm_map:
info.update({'vmid': vmid, 'vm_name': vm_map[vmid]['name'],
'vm_type': vm_map[vmid]['type'], 'vm_status': vm_map[vmid]['status']})
elif vmid:
info.update({'vmid': vmid, 'vm_name': f'VM/LXC {vmid}', 'vm_status': 'unknown'})
data['vm_lxc_interfaces'].append(info)
elif itype == 'physical':
counts['physical']['total'] += 1
if stat.isup: counts['physical']['active'] += 1
data['physical_interfaces'].append(info)
elif itype == 'bridge':
counts['bridge']['total'] += 1
if stat.isup: counts['bridge']['active'] += 1
b_info = get_bridge_info(name)
info['bridge_members'] = b_info['members']
info['bridge_physical_interface'] = b_info['physical_interface']
if b_info['physical_duplex'] != 'unknown':
info['duplex'] = b_info['physical_duplex']
data['bridge_interfaces'].append(info)
elif itype == 'bond':
bond_info = get_bond_info(name)
info.update({'bond_mode': bond_info['mode'], 'bond_slaves': bond_info['slaves'],
'bond_active_slave': bond_info['active_slave']})
data['interfaces'].append(info)
# Tráfico global
g_io = psutil.net_io_counters()
data['traffic'] = {
'bytes_sent': g_io.bytes_sent, 'bytes_recv': g_io.bytes_recv,
'packets_sent': g_io.packets_sent, 'packets_recv': g_io.packets_recv,
'packet_loss_in': 0, 'packet_loss_out': 0
}
tin = g_io.packets_recv + g_io.dropin
if tin > 0: data['traffic']['packet_loss_in'] = round((g_io.dropin / tin) * 100, 2)
tout = g_io.packets_sent + g_io.dropout
if tout > 0: data['traffic']['packet_loss_out'] = round((g_io.dropout / tout) * 100, 2)
data.update({
'physical_active_count': counts['physical']['active'], 'physical_total_count': counts['physical']['total'],
'bridge_active_count': counts['bridge']['active'], 'bridge_total_count': counts['bridge']['total'],
'vm_lxc_active_count': counts['vm']['active'], 'vm_lxc_total_count': counts['vm']['total']
})
return data
def get_network_summary():
"""Resumen rápido de red."""
net_io = psutil.net_io_counters()
stats = psutil.net_if_stats()
addrs = psutil.net_if_addrs()
phys_ifaces = []
bridge_ifaces = []
counts = {'phys_active':0, 'phys_total':0, 'br_active':0, 'br_total':0}
for name, stat in stats.items():
if name in ['lo', 'docker0'] or name.startswith(('veth', 'tap', 'fw')): continue
is_up = stat.isup
addresses = []
if name in addrs:
for addr in addrs[name]:
if addr.family == socket.AF_INET:
addresses.append({'ip': addr.address, 'netmask': addr.netmask})
info = {'name': name, 'status': 'up' if is_up else 'down', 'addresses': addresses}
if name.startswith(('enp', 'eth', 'eno', 'ens', 'wlan')):
counts['phys_total'] += 1
if is_up: counts['phys_active'] += 1
phys_ifaces.append(info)
elif name.startswith(('vmbr', 'br')):
counts['br_total'] += 1
if is_up: counts['br_active'] += 1
bridge_ifaces.append(info)
return {
'physical_active_count': counts['phys_active'], 'physical_total_count': counts['phys_total'],
'bridge_active_count': counts['br_active'], 'bridge_total_count': counts['br_total'],
'physical_interfaces': phys_ifaces, 'bridge_interfaces': bridge_ifaces,
'traffic': {'bytes_sent': net_io.bytes_sent, 'bytes_recv': net_io.bytes_recv,
'packets_sent': net_io.packets_sent, 'packets_recv': net_io.packets_recv}
}
def get_interface_metrics(interface_name, timeframe='day'):
"""Obtiene métricas RRD históricas para una interfaz."""
local_node = get_proxmox_node_name()
itype = get_interface_type(interface_name)
rrd_data = []
try:
# Si es VM/LXC, sacamos datos del contenedor/VM
if itype == 'vm_lxc':
vmid, vm_type = extract_vmid_from_interface(interface_name)
if vmid:
res = subprocess.run(['pvesh', 'get', f'/nodes/{local_node}/{vm_type}/{vmid}/rrddata',
'--timeframe', timeframe, '--output-format', 'json'],
capture_output=True, text=True, timeout=10)
if res.returncode == 0:
data = json.loads(res.stdout)
for point in data:
item = {'time': point.get('time')}
if 'netin' in point: item['netin'] = point['netin']
if 'netout' in point: item['netout'] = point['netout']
rrd_data.append(item)
else:
# Si es física/bridge, sacamos datos del nodo (tráfico total del nodo)
res = subprocess.run(['pvesh', 'get', f'/nodes/{local_node}/rrddata',
'--timeframe', timeframe, '--output-format', 'json'],
capture_output=True, text=True, timeout=10)
if res.returncode == 0:
data = json.loads(res.stdout)
for point in data:
item = {'time': point.get('time')}
if 'netin' in point: item['netin'] = point['netin']
if 'netout' in point: item['netout'] = point['netout']
rrd_data.append(item)
return {'interface': interface_name, 'type': itype, 'timeframe': timeframe, 'data': rrd_data}
except Exception as e:
return {'error': str(e)}

View File

@@ -1,261 +0,0 @@
import os
import json
import math
import subprocess
import re
import psutil
from system_monitor import get_proxmox_node_name
# Intentar importar el monitor de storage externo si existe
try:
from proxmox_storage_monitor import proxmox_storage_monitor
except ImportError:
proxmox_storage_monitor = None
def format_bytes(size_in_bytes):
if size_in_bytes is None: return "N/A"
if size_in_bytes == 0: return "0 B"
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB")
i = int(math.floor(math.log(size_in_bytes, 1024)))
p = math.pow(1024, i)
s = round(size_in_bytes / p, 2)
return f"{s} {size_name[i]}"
def get_pcie_link_speed(disk_name):
"""Obtiene info PCIe para NVMe."""
pcie_info = {'pcie_gen': None, 'pcie_width': None}
try:
if disk_name.startswith('nvme'):
match = re.match(r'(nvme\d+)n\d+', disk_name)
if match:
controller = match.group(1)
sys_path = f'/sys/class/nvme/{controller}/device'
pci_address = None
if os.path.exists(sys_path):
pci_address = os.path.basename(os.readlink(sys_path))
else:
alt_path = f'/sys/block/{disk_name}/device/device'
if os.path.exists(alt_path):
pci_address = os.path.basename(os.readlink(alt_path))
if pci_address:
res = subprocess.run(['lspci', '-vvv', '-s', pci_address], capture_output=True, text=True, timeout=5)
if res.returncode == 0:
for line in res.stdout.split('\n'):
if 'LnkSta:' in line:
if 'Speed' in line:
m = re.search(r'Speed\s+([\d.]+)GT/s', line)
if m:
gt = float(m.group(1))
if gt <= 8.0: pcie_info['pcie_gen'] = '3.0'
elif gt <= 16.0: pcie_info['pcie_gen'] = '4.0'
else: pcie_info['pcie_gen'] = '5.0'
if 'Width' in line:
m = re.search(r'Width\s+x(\d+)', line)
if m: pcie_info['pcie_width'] = f'x{m.group(1)}'
except Exception: pass
return pcie_info
def get_smart_data(disk_name):
"""Obtiene datos SMART detallados."""
smart_data = {
'temperature': 0, 'health': 'unknown', 'power_on_hours': 0, 'smart_status': 'unknown',
'model': 'Unknown', 'serial': 'Unknown', 'reallocated_sectors': 0,
'ssd_life_left': None, 'rotation_rate': 0
}
cmds = [
['smartctl', '-a', '-j', f'/dev/{disk_name}'],
['smartctl', '-a', '-j', '-d', 'ata', f'/dev/{disk_name}'],
['smartctl', '-a', '-j', '-d', 'nvme', f'/dev/{disk_name}'],
['smartctl', '-a', f'/dev/{disk_name}']
]
for cmd in cmds:
try:
res = subprocess.run(cmd, capture_output=True, text=True, timeout=8)
if not res.stdout: continue
if '-j' in cmd:
try:
data = json.loads(res.stdout)
if 'model_name' in data: smart_data['model'] = data['model_name']
elif 'model_family' in data: smart_data['model'] = data['model_family']
if 'serial_number' in data: smart_data['serial'] = data['serial_number']
if 'rotation_rate' in data: smart_data['rotation_rate'] = data['rotation_rate']
if 'temperature' in data and 'current' in data['temperature']:
smart_data['temperature'] = data['temperature']['current']
if 'smart_status' in data:
smart_data['health'] = 'healthy' if data['smart_status'].get('passed') else 'critical'
# NVMe
if 'nvme_smart_health_information_log' in data:
nvme = data['nvme_smart_health_information_log']
if 'temperature' in nvme: smart_data['temperature'] = nvme['temperature']
if 'power_on_hours' in nvme: smart_data['power_on_hours'] = nvme['power_on_hours']
if 'percentage_used' in nvme: smart_data['ssd_life_left'] = 100 - nvme['percentage_used']
# ATA
if 'ata_smart_attributes' in data:
for attr in data['ata_smart_attributes'].get('table', []):
aid = attr.get('id')
raw = attr.get('raw', {}).get('value', 0)
norm = attr.get('value', 0)
if aid == 9: smart_data['power_on_hours'] = raw
elif aid == 5: smart_data['reallocated_sectors'] = raw
elif aid == 194 and smart_data['temperature'] == 0: smart_data['temperature'] = raw
elif str(aid) in ['231', '202']: smart_data['ssd_life_left'] = norm
if smart_data['model'] != 'Unknown': break
except json.JSONDecodeError: pass
# Fallback texto
if smart_data['model'] == 'Unknown':
for line in res.stdout.split('\n'):
if 'Device Model:' in line: smart_data['model'] = line.split(':', 1)[1].strip()
elif 'Serial Number:' in line: smart_data['serial'] = line.split(':', 1)[1].strip()
elif 'Current Temperature:' in line:
try: smart_data['temperature'] = int(line.split(':')[1].strip().split()[0])
except: pass
if smart_data['model'] != 'Unknown': break
except: continue
# Evaluación salud
if smart_data['reallocated_sectors'] > 0: smart_data['health'] = 'warning'
if smart_data['temperature'] >= 60: smart_data['health'] = 'warning'
return smart_data
def get_storage_info():
"""Info completa de almacenamiento."""
data = {'total': 0, 'used': 0, 'available': 0, 'disks': [], 'zfs_pools': [], 'disk_count': 0}
# 1. Discos físicos
try:
res = subprocess.run(['lsblk', '-b', '-d', '-n', '-o', 'NAME,SIZE,TYPE'], capture_output=True, text=True, timeout=5)
for line in res.stdout.strip().split('\n'):
p = line.split()
if len(p) >= 3 and p[2] == 'disk':
name = p[0]
if name.startswith('zd'): continue
size = int(p[1])
smart = get_smart_data(name)
size_tb = size / (1024**4)
size_str = f"{size_tb:.1f}T" if size_tb >= 1 else f"{size / (1024**3):.1f}G"
data['disks'].append({
'name': name,
'size': size / 1024, # KB
'size_formatted': size_str,
'size_bytes': size,
'model': smart['model'],
'serial': smart['serial'],
'temperature': smart['temperature'],
'health': smart['health'],
'ssd_life_left': smart['ssd_life_left']
})
data['total'] += size
data['disk_count'] += 1
except: pass
data['total'] = round(data['total'] / (1024**4), 1) # TB
# 2. Uso (Particiones + ZFS)
used = 0
avail = 0
try:
for part in psutil.disk_partitions():
if part.fstype not in ['tmpfs', 'overlay', 'zfs']:
try:
u = psutil.disk_usage(part.mountpoint)
used += u.used
avail += u.free
except: pass
res = subprocess.run(['zpool', 'list', '-H', '-p', '-o', 'name,size,alloc,free,health'], capture_output=True, text=True)
if res.returncode == 0:
for line in res.stdout.strip().split('\n'):
if line:
p = line.split('\t')
used += int(p[2])
avail += int(p[3])
data['zfs_pools'].append({
'name': p[0], 'size': format_bytes(int(p[1])),
'allocated': format_bytes(int(p[2])), 'free': format_bytes(int(p[3])),
'health': p[4]
})
except: pass
data['used'] = round(used / (1024**3), 1)
data['available'] = round(avail / (1024**3), 1)
return data
def get_storage_summary():
"""Resumen rápido."""
return get_storage_info() # Se puede optimizar quitando SMART
def get_proxmox_storage():
"""Storage de Proxmox."""
node = get_proxmox_node_name()
storage = []
try:
res = subprocess.run(['pvesh', 'get', '/cluster/resources', '--type', 'storage', '--output-format', 'json'], capture_output=True, text=True, timeout=10)
if res.returncode == 0:
for r in json.loads(res.stdout):
if r.get('node') == node:
tot = int(r.get('maxdisk', 0))
usd = int(r.get('disk', 0))
storage.append({
'name': r.get('storage'),
'type': r.get('plugintype'),
'status': 'active' if r.get('status')=='available' else 'error',
'total': round(tot/(1024**3), 2),
'used': round(usd/(1024**3), 2),
'percent': round((usd/tot)*100, 1) if tot>0 else 0
})
except: pass
if proxmox_storage_monitor:
u = proxmox_storage_monitor.get_storage_status().get('unavailable', [])
exist = {x['name'] for x in storage}
for x in u:
if x['name'] not in exist: storage.append(x)
return {'storage': storage}
def get_backups():
"""Lista backups."""
backups = []
try:
res = subprocess.run(['pvesh', 'get', '/storage', '--output-format', 'json'], capture_output=True, text=True)
if res.returncode == 0:
for s in json.loads(res.stdout):
sid = s.get('storage')
if s.get('type') in ['dir', 'nfs', 'cifs', 'pbs']:
c_res = subprocess.run(['pvesh', 'get', f'/nodes/localhost/storage/{sid}/content', '--output-format', 'json'], capture_output=True, text=True)
if c_res.returncode == 0:
for item in json.loads(c_res.stdout):
if item.get('content') == 'backup':
volid = item.get('volid', '')
vmid = None
if 'vzdump-qemu-' in volid:
try: vmid = volid.split('vzdump-qemu-')[1].split('-')[0]
except: pass
elif 'vzdump-lxc-' in volid:
try: vmid = volid.split('vzdump-lxc-')[1].split('-')[0]
except: pass
from datetime import datetime
backups.append({
'volid': volid, 'storage': sid, 'vmid': vmid,
'size': item.get('size', 0),
'size_human': format_bytes(item.get('size', 0)),
'created': datetime.fromtimestamp(item.get('ctime', 0)).strftime('%Y-%m-%d %H:%M:%S'),
'timestamp': item.get('ctime', 0)
})
except: pass
backups.sort(key=lambda x: x['timestamp'], reverse=True)
return {'backups': backups, 'total': len(backups)}

View File

@@ -1,337 +0,0 @@
import os
import sys
import time
import socket
import subprocess
import json
import psutil
import platform
from datetime import datetime, timedelta
# Cache para evitar llamadas excesivas a la API de Proxmox
_PROXMOX_NODE_CACHE = {"name": None, "timestamp": 0.0}
_PROXMOX_NODE_CACHE_TTL = 300 # 5 minutos
def get_proxmox_node_name() -> str:
"""Recupera el nombre real del nodo Proxmox con caché."""
now = time.time()
cached_name = _PROXMOX_NODE_CACHE.get("name")
cached_ts = _PROXMOX_NODE_CACHE.get("timestamp", 0.0)
if cached_name and (now - float(cached_ts)) < _PROXMOX_NODE_CACHE_TTL:
return str(cached_name)
try:
result = subprocess.run(
["pvesh", "get", "/nodes", "--output-format", "json"],
capture_output=True, text=True, timeout=5, check=False,
)
if result.returncode == 0 and result.stdout:
nodes = json.loads(result.stdout)
if isinstance(nodes, list) and nodes:
node_name = nodes[0].get("node")
if node_name:
_PROXMOX_NODE_CACHE["name"] = node_name
_PROXMOX_NODE_CACHE["timestamp"] = now
return node_name
except Exception:
pass
hostname = socket.gethostname()
return hostname.split(".", 1)[0]
def get_uptime():
"""Obtiene el tiempo de actividad del sistema."""
try:
boot_time = psutil.boot_time()
uptime_seconds = time.time() - boot_time
return str(timedelta(seconds=int(uptime_seconds)))
except Exception:
return "N/A"
def get_cpu_temperature():
"""Obtiene la temperatura de la CPU usando psutil."""
temp = 0
try:
if hasattr(psutil, "sensors_temperatures"):
temps = psutil.sensors_temperatures()
if temps:
sensor_priority = ['coretemp', 'k10temp', 'cpu_thermal', 'zenpower', 'acpitz']
for sensor_name in sensor_priority:
if sensor_name in temps and temps[sensor_name]:
temp = temps[sensor_name][0].current
break
if temp == 0:
for name, entries in temps.items():
if entries:
temp = entries[0].current
break
except Exception:
pass
return temp
def get_proxmox_version():
"""Obtiene la versión de Proxmox."""
try:
result = subprocess.run(['pveversion'], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
version_line = result.stdout.strip().split('\n')[0]
if '/' in version_line:
return version_line.split('/')[1]
except Exception:
pass
return None
def get_available_updates():
"""Cuenta actualizaciones pendientes."""
try:
result = subprocess.run(['apt', 'list', '--upgradable'], capture_output=True, text=True, timeout=10)
if result.returncode == 0:
lines = result.stdout.strip().split('\n')
return max(0, len(lines) - 1)
except Exception:
pass
return 0
def get_system_info():
"""Agrega toda la información del sistema."""
cpu_usage = psutil.cpu_percent(interval=0.5)
memory = psutil.virtual_memory()
load_avg = os.getloadavg()
return {
'cpu_usage': round(cpu_usage, 1),
'memory_usage': round(memory.percent, 1),
'memory_total': round(memory.total / (1024 ** 3), 1),
'memory_used': round(memory.used / (1024 ** 3), 1),
'temperature': get_cpu_temperature(),
'uptime': get_uptime(),
'load_average': list(load_avg),
'hostname': socket.gethostname(),
'proxmox_node': get_proxmox_node_name(),
'node_id': socket.gethostname(),
'timestamp': datetime.now().isoformat(),
'cpu_cores': psutil.cpu_count(logical=False),
'cpu_threads': psutil.cpu_count(logical=True),
'proxmox_version': get_proxmox_version(),
'kernel_version': platform.release(),
'available_updates': get_available_updates()
}
def get_node_metrics(timeframe='week'):
"""Obtiene métricas RRD del nodo."""
local_node = get_proxmox_node_name()
zfs_arc_size = 0
try:
with open('/proc/spl/kstat/zfs/arcstats', 'r') as f:
for line in f:
if line.startswith('size'):
parts = line.split()
if len(parts) >= 3:
zfs_arc_size = int(parts[2])
break
except Exception:
pass
try:
result = subprocess.run(
['pvesh', 'get', f'/nodes/{local_node}/rrddata', '--timeframe', timeframe, '--output-format', 'json'],
capture_output=True, text=True, timeout=10
)
if result.returncode == 0:
rrd_data = json.loads(result.stdout)
if zfs_arc_size > 0:
for item in rrd_data:
if 'zfsarc' not in item or item.get('zfsarc', 0) == 0:
item['zfsarc'] = zfs_arc_size
return {'node': local_node, 'timeframe': timeframe, 'data': rrd_data}
else:
return {'error': f"Failed to get RRD data: {result.stderr}"}
except Exception as e:
return {'error': str(e)}
def get_logs(limit='200', priority=None, service=None, since_days=None):
"""Obtiene logs del sistema (journalctl)."""
cmd = ['journalctl', '--output', 'json', '--no-pager']
if since_days:
try:
days = int(since_days)
cmd.extend(['--since', f'{days} days ago'])
except ValueError:
cmd.extend(['-n', limit])
else:
cmd.extend(['-n', limit])
if priority:
cmd.extend(['-p', priority])
if service:
cmd.extend(['-u', service])
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
if result.returncode == 0:
logs = []
for line in result.stdout.strip().split('\n'):
if line:
try:
entry = json.loads(line)
ts_us = int(entry.get('__REALTIME_TIMESTAMP', '0'))
timestamp = datetime.fromtimestamp(ts_us / 1000000).strftime('%Y-%m-%d %H:%M:%S')
priority_map = {'0': 'emerg', '1': 'alert', '2': 'crit', '3': 'err', '4': 'warning', '5': 'notice', '6': 'info', '7': 'debug'}
p_num = str(entry.get('PRIORITY', '6'))
logs.append({
'timestamp': timestamp,
'level': priority_map.get(p_num, 'info'),
'service': entry.get('_SYSTEMD_UNIT', entry.get('SYSLOG_IDENTIFIER', 'system')),
'message': entry.get('MESSAGE', ''),
'source': 'journal',
'pid': entry.get('_PID', ''),
'hostname': entry.get('_HOSTNAME', '')
})
except Exception:
continue
return {'logs': logs, 'total': len(logs)}
except Exception as e:
return {'logs': [], 'total': 0, 'error': str(e)}
return {'logs': [], 'total': 0, 'error': 'journalctl failed'}
def generate_log_file(log_type, hours, level, service, since_days):
"""Genera archivo de logs temporal."""
import tempfile
cmd = ['journalctl', '--no-pager']
if since_days: cmd.extend(['--since', f'{since_days} days ago'])
else: cmd.extend(['--since', f'{hours} hours ago'])
if log_type == 'kernel': cmd.append('-k')
elif log_type == 'auth': cmd.extend(['-u', 'ssh', '-u', 'sshd'])
if level != 'all': cmd.extend(['-p', level])
if service != 'all': cmd.extend(['-u', service])
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.log') as f:
f.write(f"ProxMenux Log ({log_type}) - Generated: {datetime.now().isoformat()}\n")
f.write("=" * 80 + "\n\n")
f.write(result.stdout if result.returncode == 0 else "Error retrieving logs")
return f.name
except Exception:
return None
def get_events(limit='50'):
"""Obtiene eventos de Proxmox."""
events = []
try:
result = subprocess.run(['pvesh', 'get', '/cluster/tasks', '--output-format', 'json'], capture_output=True, text=True, timeout=10)
if result.returncode == 0:
tasks = json.loads(result.stdout)
for task in tasks[:int(limit)]:
starttime = task.get('starttime', 0)
endtime = task.get('endtime', 0)
duration = ''
if endtime and starttime:
d_sec = endtime - starttime
if d_sec < 60: duration = f"{d_sec}s"
elif d_sec < 3600: duration = f"{d_sec // 60}m {d_sec % 60}s"
else: duration = f"{d_sec // 3600}h {(d_sec % 3600) // 60}m"
status = task.get('status', 'unknown')
level = 'info'
if status == 'OK': level = 'info'
elif status in ['stopped', 'error']: level = 'error'
elif status == 'running': level = 'warning'
events.append({
'upid': task.get('upid', ''),
'type': task.get('type', 'unknown'),
'status': status,
'level': level,
'user': task.get('user', 'unknown'),
'node': task.get('node', 'unknown'),
'vmid': str(task.get('id', '')) if task.get('id') else '',
'starttime': datetime.fromtimestamp(starttime).strftime('%Y-%m-%d %H:%M:%S') if starttime else '',
'endtime': datetime.fromtimestamp(endtime).strftime('%Y-%m-%d %H:%M:%S') if endtime else 'Running',
'duration': duration
})
except Exception:
pass
return {'events': events, 'total': len(events)}
def get_notifications():
"""Obtiene notificaciones de Proxmox."""
notifications = []
try:
cmd = [
'journalctl', '-u', 'pve-ha-lrm', '-u', 'pve-ha-crm', '-u', 'pvedaemon',
'-u', 'pveproxy', '-u', 'pvestatd', '--grep', 'notification|email|webhook|alert|notify',
'-n', '100', '--output', 'json', '--no-pager'
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
if result.returncode == 0:
for line in result.stdout.strip().split('\n'):
if line:
try:
entry = json.loads(line)
ts = int(entry.get('__REALTIME_TIMESTAMP', '0'))
timestamp = datetime.fromtimestamp(ts / 1000000).strftime('%Y-%m-%d %H:%M:%S')
msg = entry.get('MESSAGE', '')
ntype = 'info'
if 'email' in msg.lower(): ntype = 'email'
elif 'webhook' in msg.lower(): ntype = 'webhook'
elif 'error' in msg.lower() or 'fail' in msg.lower(): ntype = 'error'
elif 'alert' in msg.lower() or 'warning' in msg.lower(): ntype = 'alert'
notifications.append({
'timestamp': timestamp,
'type': ntype,
'service': entry.get('_SYSTEMD_UNIT', 'proxmox'),
'message': msg,
'source': 'journal'
})
except: continue
# Backups en tareas
task_res = subprocess.run(['pvesh', 'get', '/cluster/tasks', '--output-format', 'json'], capture_output=True, text=True, timeout=5)
if task_res.returncode == 0:
tasks = json.loads(task_res.stdout)
for task in tasks[:50]:
if task.get('type') in ['vzdump', 'backup']:
status = task.get('status', 'unknown')
ntype = 'success' if status == 'OK' else 'error' if status == 'stopped' else 'info'
notifications.append({
'timestamp': datetime.fromtimestamp(task.get('starttime', 0)).strftime('%Y-%m-%d %H:%M:%S'),
'type': ntype,
'service': 'backup',
'message': f"Backup task {task.get('upid', 'unknown')}: {status}",
'source': 'task-log'
})
except: pass
notifications.sort(key=lambda x: x['timestamp'], reverse=True)
return {'notifications': notifications[:100], 'total': len(notifications)}
def get_prometheus_metrics():
"""Genera métricas Prometheus."""
node = socket.gethostname()
timestamp = int(datetime.now().timestamp() * 1000)
lines = []
cpu = psutil.cpu_percent(interval=0.5)
mem = psutil.virtual_memory()
load = os.getloadavg()
uptime = time.time() - psutil.boot_time()
lines.append(f'proxmox_cpu_usage{{node="{node}"}} {cpu} {timestamp}')
lines.append(f'proxmox_memory_usage_percent{{node="{node}"}} {mem.percent} {timestamp}')
lines.append(f'proxmox_load_average{{node="{node}",period="1m"}} {load[0]} {timestamp}')
lines.append(f'proxmox_uptime_seconds{{node="{node}"}} {uptime} {timestamp}')
temp = get_cpu_temperature()
if temp:
lines.append(f'proxmox_cpu_temperature_celsius{{node="{node}"}} {temp} {timestamp}')
return '\n'.join(lines) + '\n', {'Content-Type': 'text/plain; version=0.0.4; charset=utf-8'}

View File

@@ -1,267 +0,0 @@
import json
import subprocess
import os
import re
from system_monitor import get_proxmox_node_name
def parse_lxc_hardware_config(vmid, node):
"""
Analiza la configuración de un LXC para detectar passthrough de hardware.
Detecta GPUs, TPUs (Coral), dispositivos USB y estado privilegiado.
"""
hardware_info = {
'privileged': None,
'gpu_passthrough': [],
'devices': []
}
try:
config_path = f'/etc/pve/lxc/{vmid}.conf'
if not os.path.exists(config_path):
return hardware_info
with open(config_path, 'r') as f:
config_content = f.read()
# Verificar estado privilegiado
if 'unprivileged: 1' in config_content:
hardware_info['privileged'] = False
elif 'unprivileged: 0' in config_content:
hardware_info['privileged'] = True
else:
# Chequeos adicionales
if 'lxc.cap.drop:' in config_content and 'lxc.cap.drop: \n' in config_content:
hardware_info['privileged'] = True
elif 'lxc.cgroup2.devices.allow: a' in config_content:
hardware_info['privileged'] = True
# Detección de GPU Passthrough
gpu_types = []
if '/dev/dri' in config_content or 'renderD128' in config_content:
if 'Intel/AMD GPU' not in gpu_types: gpu_types.append('Intel/AMD GPU')
if 'nvidia' in config_content.lower():
if any(x in config_content for x in ['nvidia0', 'nvidiactl', 'nvidia-uvm']):
if 'NVIDIA GPU' not in gpu_types: gpu_types.append('NVIDIA GPU')
hardware_info['gpu_passthrough'] = gpu_types
# Detección de otros dispositivos
devices = []
if 'apex' in config_content.lower() or 'coral' in config_content.lower(): devices.append('Coral TPU')
if 'ttyUSB' in config_content or 'ttyACM' in config_content: devices.append('USB Serial Devices')
if '/dev/bus/usb' in config_content: devices.append('USB Passthrough')
if '/dev/fb0' in config_content: devices.append('Framebuffer')
if '/dev/snd' in config_content: devices.append('Audio Devices')
if '/dev/input' in config_content: devices.append('Input Devices')
if 'tty7' in config_content: devices.append('TTY Console')
hardware_info['devices'] = devices
except Exception:
pass
return hardware_info
def get_lxc_ip_from_lxc_info(vmid):
"""
Obtiene las IPs de un contenedor LXC usando 'lxc-info' (útil para DHCP).
"""
try:
result = subprocess.run(['lxc-info', '-n', str(vmid), '-iH'], capture_output=True, text=True, timeout=5)
if result.returncode == 0 and result.stdout.strip():
ips = result.stdout.strip().split()
real_ips = [ip for ip in ips if not ip.startswith('172.')] # Filtrar IPs internas de Docker usualmente
docker_ips = [ip for ip in ips if ip.startswith('172.')]
return {
'all_ips': ips,
'real_ips': real_ips,
'docker_ips': docker_ips,
'primary_ip': real_ips[0] if real_ips else (docker_ips[0] if docker_ips else ips[0])
}
except Exception:
pass
return None
def get_proxmox_vms():
"""
Obtiene la lista de todas las VMs y Contenedores del nodo local.
"""
local_node = get_proxmox_node_name()
vms = []
try:
result = subprocess.run(['pvesh', 'get', '/cluster/resources', '--type', 'vm', '--output-format', 'json'],
capture_output=True, text=True, timeout=10)
if result.returncode == 0:
for item in json.loads(result.stdout):
if item.get('node') == local_node:
vms.append({
'vmid': item.get('vmid'),
'name': item.get('name', f"VM-{item.get('vmid')}"),
'status': item.get('status', 'unknown'),
'type': 'lxc' if item.get('type') == 'lxc' else 'qemu',
'cpu': item.get('cpu', 0),
'mem': item.get('mem', 0),
'maxmem': item.get('maxmem', 0),
'disk': item.get('disk', 0),
'maxdisk': item.get('maxdisk', 0),
'uptime': item.get('uptime', 0),
'netin': item.get('netin', 0),
'netout': item.get('netout', 0),
'diskread': item.get('diskread', 0),
'diskwrite': item.get('diskwrite', 0)
})
except Exception:
pass
return vms
def get_vm_config(vmid):
"""
Obtiene la configuración detallada de una VM específica.
Incluye detección de hardware y SO para LXC.
"""
node = get_proxmox_node_name()
# Intentar obtener config como QEMU (VM)
res = subprocess.run(['pvesh', 'get', f'/nodes/{node}/qemu/{vmid}/config', '--output-format', 'json'],
capture_output=True, text=True, timeout=5)
vm_type = 'qemu'
if res.returncode != 0:
# Si falla, intentar como LXC (Contenedor)
res = subprocess.run(['pvesh', 'get', f'/nodes/{node}/lxc/{vmid}/config', '--output-format', 'json'],
capture_output=True, text=True, timeout=5)
vm_type = 'lxc'
if res.returncode == 0:
config = json.loads(res.stdout)
# Obtener estado
status_res = subprocess.run(['pvesh', 'get', f'/nodes/{node}/{vm_type}/{vmid}/status/current', '--output-format', 'json'],
capture_output=True, text=True, timeout=5)
status = 'stopped'
if status_res.returncode == 0:
status = json.loads(status_res.stdout).get('status', 'stopped')
response = {
'vmid': vmid,
'config': config,
'node': node,
'vm_type': vm_type,
'status': status
}
# Enriquecimiento específico para LXC
if vm_type == 'lxc':
response['hardware_info'] = parse_lxc_hardware_config(vmid, node)
if status == 'running':
ip_info = get_lxc_ip_from_lxc_info(vmid)
if ip_info: response['lxc_ip_info'] = ip_info
# Intentar leer info del SO
try:
os_res = subprocess.run(['pct', 'exec', str(vmid), '--', 'cat', '/etc/os-release'],
capture_output=True, text=True, timeout=5)
if os_res.returncode == 0:
os_info = {}
for line in os_res.stdout.split('\n'):
if line.startswith('ID='): os_info['id'] = line.split('=', 1)[1].strip('"\'')
elif line.startswith('PRETTY_NAME='): os_info['pretty_name'] = line.split('=', 1)[1].strip('"\'')
if os_info: response['os_info'] = os_info
except: pass
return response
return None
def control_vm(vmid, action):
"""
Ejecuta acciones de control: start, stop, shutdown, reboot.
"""
if action not in ['start', 'stop', 'shutdown', 'reboot']:
return {'success': False, 'message': 'Invalid action'}
info = get_vm_config(vmid)
if not info:
return {'success': False, 'message': 'VM/LXC not found'}
node = info['node']
vm_type = info['vm_type']
res = subprocess.run(['pvesh', 'create', f'/nodes/{node}/{vm_type}/{vmid}/status/{action}'],
capture_output=True, text=True, timeout=30)
if res.returncode == 0:
return {'success': True, 'vmid': vmid, 'action': action, 'message': f'Successfully executed {action}'}
else:
return {'success': False, 'error': res.stderr}
def update_vm_config(vmid, description):
"""Actualiza la descripción/notas de la VM."""
info = get_vm_config(vmid)
if not info: return {'success': False, 'message': 'VM not found'}
res = subprocess.run(['pvesh', 'set', f'/nodes/{info["node"]}/{info["vm_type"]}/{vmid}/config', '-description', description],
capture_output=True, text=True, timeout=30)
if res.returncode == 0:
return {'success': True, 'message': 'Configuration updated'}
return {'success': False, 'error': res.stderr}
def get_vm_metrics(vmid, timeframe='week'):
"""Obtiene métricas RRD históricas."""
info = get_vm_config(vmid)
if not info: return {'error': 'VM not found'}
res = subprocess.run(['pvesh', 'get', f'/nodes/{info["node"]}/{info["vm_type"]}/{vmid}/rrddata',
'--timeframe', timeframe, '--output-format', 'json'],
capture_output=True, text=True, timeout=10)
if res.returncode == 0:
return {'vmid': vmid, 'type': info['vm_type'], 'timeframe': timeframe, 'data': json.loads(res.stdout)}
return {'error': f'Failed to get metrics: {res.stderr}'}
def get_vm_logs(vmid):
"""Obtiene logs internos (consola) de la VM/LXC."""
info = get_vm_config(vmid)
if not info: return {'error': 'VM not found'}
res = subprocess.run(['pvesh', 'get', f'/nodes/{info["node"]}/{info["vm_type"]}/{vmid}/log', '--start', '0', '--limit', '1000'],
capture_output=True, text=True, timeout=10)
logs = []
if res.returncode == 0:
for i, line in enumerate(res.stdout.split('\n')):
if line.strip(): logs.append({'n': i, 't': line})
return {'vmid': vmid, 'name': info['config'].get('name'), 'logs': logs}
def get_task_log(upid):
"""Lee un archivo de log de tarea específico de Proxmox."""
try:
upid_clean = upid.rstrip(':')
parts = upid_clean.split(':')
if len(parts) < 5: return "Invalid UPID format"
starttime = parts[4]
index = starttime[-1].lower() # El directorio es el último carácter hexadecimal
# Buscar en las rutas posibles
paths = [
f"/var/log/pve/tasks/{index}/{upid_clean}",
f"/var/log/pve/tasks/{index.upper()}/{upid_clean}",
f"/var/log/pve/tasks/{index}/{upid_clean}:"
]
for p in paths:
if os.path.exists(p):
with open(p, 'r', encoding='utf-8', errors='ignore') as f:
return f.read()
return "Log file not found on disk"
except Exception as e:
return f"Error reading log: {str(e)}"