Update backend monitor

This commit is contained in:
MacRimi
2026-01-29 17:47:10 +01:00
parent 7eaa692712
commit a20d61037e
12 changed files with 1720 additions and 6361 deletions

View File

@@ -89,6 +89,16 @@ cp "$SCRIPT_DIR/flask_terminal_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || ech
cp "$SCRIPT_DIR/hardware_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ hardware_monitor.py not found"
cp "$SCRIPT_DIR/proxmox_storage_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ proxmox_storage_monitor.py not found"
cp "$SCRIPT_DIR/flask_script_runner.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_script_runner.py not found"
cp "$SCRIPT_DIR/system_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ system_monitor.py not found"
cp "$SCRIPT_DIR/flask_system_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_system_routes.py not found"
cp "$SCRIPT_DIR/storage_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ storage_monitor.py not found"
cp "$SCRIPT_DIR/flask_storage_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_storage_routes.py not found"
cp "$SCRIPT_DIR/network_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ network_monitor.py not found"
cp "$SCRIPT_DIR/flask_network_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_network_routes.py not found"
cp "$SCRIPT_DIR/vm_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ vm_monitor.py not found"
cp "$SCRIPT_DIR/flask_vm_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_vm_routes.py not found"
cp "$SCRIPT_DIR/flask_hardware_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_hardware_routes.py not found"
cp "$SCRIPT_DIR/flask_script_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_script_routes.py not found"
echo "📋 Adding translation support..."
cat > "$APP_DIR/usr/bin/translate_cli.py" << 'PYEOF'

View File

@@ -0,0 +1,37 @@
from flask import Blueprint, jsonify
from jwt_middleware import require_auth
import hardware_monitor
# Definimos el Blueprint
hardware_bp = Blueprint('hardware', __name__)
@hardware_bp.route('/api/hardware', methods=['GET'])
@require_auth
def api_hardware():
"""
Obtiene información completa y agregada de todo el hardware.
Incluye CPU, Placa Base, RAM, Discos, GPUs, IPMI y UPS.
"""
try:
data = hardware_monitor.get_hardware_info()
return jsonify(data)
except Exception as e:
# En caso de error crítico, devolvemos un 500 pero intentamos ser descriptivos
return jsonify({'error': str(e)}), 500
@hardware_bp.route('/api/gpu/<slot>/realtime', methods=['GET'])
@require_auth
def api_gpu_realtime(slot):
"""
Obtiene métricas en tiempo real (uso, temperatura, memoria) para una GPU específica.
El 'slot' es la dirección PCI (ej: '01:00.0').
"""
try:
data = hardware_monitor.get_gpu_realtime_data(slot)
if not data:
return jsonify({'error': 'GPU not found'}), 404
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500

View File

@@ -0,0 +1,50 @@
from flask import Blueprint, jsonify, request
from jwt_middleware import require_auth
import network_monitor
# Definimos el Blueprint para las rutas de red
network_bp = Blueprint('network', __name__)
@network_bp.route('/api/network', methods=['GET'])
@require_auth
def api_network():
"""
Obtiene información completa de todas las interfaces de red.
Incluye interfaces físicas, virtuales, puentes, bonds y tráfico actual.
"""
try:
data = network_monitor.get_network_info()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@network_bp.route('/api/network/summary', methods=['GET'])
@require_auth
def api_network_summary():
"""
Obtiene un resumen optimizado de la red.
Ideal para paneles de control donde no se requiere detalle profundo de cada configuración.
"""
try:
data = network_monitor.get_network_summary()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@network_bp.route('/api/network/<interface_name>/metrics', methods=['GET'])
@require_auth
def api_network_interface_metrics(interface_name):
"""
Obtiene métricas históricas (RRD) para una interfaz específica.
Soporta diferentes periodos de tiempo (hour, day, week, month, year).
"""
try:
timeframe = request.args.get('timeframe', 'day')
# Validar timeframe básico para evitar errores en pvesh
if timeframe not in ['hour', 'day', 'week', 'month', 'year']:
return jsonify({'error': 'Invalid timeframe'}), 400
data = network_monitor.get_interface_metrics(interface_name, timeframe)
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500

View File

@@ -0,0 +1,91 @@
from flask import Blueprint, jsonify, request, Response
from flask_script_runner import script_runner
import threading
import os
# Definimos el Blueprint
script_bp = Blueprint('script', __name__)
@script_bp.route('/api/scripts/execute', methods=['POST'])
def execute_script():
"""
Ejecuta un script de bash con logs en tiempo real.
Valida que el script esté dentro del directorio permitido.
"""
try:
data = request.json
script_name = data.get('script_name')
script_params = data.get('params', {})
script_relative_path = data.get('script_relative_path')
if not script_relative_path:
return jsonify({'error': 'script_relative_path is required'}), 400
# Directorio base seguro
BASE_SCRIPTS_DIR = '/usr/local/share/proxmenux/scripts'
script_path = os.path.join(BASE_SCRIPTS_DIR, script_relative_path)
# Validación de seguridad básica (evitar path traversal)
script_path = os.path.abspath(script_path)
if not script_path.startswith(BASE_SCRIPTS_DIR):
return jsonify({'error': 'Invalid script path'}), 403
if not os.path.exists(script_path):
return jsonify({'success': False, 'error': 'Script file not found'}), 404
# Crear sesión y ejecutar en hilo separado
session_id = script_runner.create_session(script_name)
def run_script():
script_runner.execute_script(script_path, session_id, script_params)
thread = threading.Thread(target=run_script, daemon=True)
thread.start()
return jsonify({
'success': True,
'session_id': session_id
})
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@script_bp.route('/api/scripts/status/<session_id>', methods=['GET'])
def get_script_status(session_id):
"""Obtiene el estado actual de una sesión de script."""
try:
status = script_runner.get_session_status(session_id)
return jsonify(status)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@script_bp.route('/api/scripts/respond', methods=['POST'])
def respond_to_script():
"""
Envía una respuesta (input de usuario) a un script interactivo
que está esperando datos.
"""
try:
data = request.json
session_id = data.get('session_id')
interaction_id = data.get('interaction_id')
value = data.get('value')
result = script_runner.respond_to_interaction(session_id, interaction_id, value)
return jsonify(result)
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500
@script_bp.route('/api/scripts/logs/<session_id>', methods=['GET'])
def stream_script_logs(session_id):
"""
Transmite los logs del script en tiempo real usando Server-Sent Events (SSE).
"""
try:
def generate():
for log_entry in script_runner.stream_logs(session_id):
yield f"data: {log_entry}\n\n"
return Response(generate(), mimetype='text/event-stream')
except Exception as e:
return jsonify({'success': False, 'error': str(e)}), 500

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,41 @@
from flask import Blueprint, jsonify
from jwt_middleware import require_auth
import storage_monitor
storage_bp = Blueprint('storage', __name__)
@storage_bp.route('/api/storage', methods=['GET'])
@require_auth
def api_storage():
try:
data = storage_monitor.get_storage_info()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@storage_bp.route('/api/storage/summary', methods=['GET'])
@require_auth
def api_storage_summary():
try:
data = storage_monitor.get_storage_summary()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@storage_bp.route('/api/proxmox-storage', methods=['GET'])
@require_auth
def api_proxmox_storage():
try:
data = storage_monitor.get_proxmox_storage()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@storage_bp.route('/api/backups', methods=['GET'])
@require_auth
def api_backups():
try:
data = storage_monitor.get_backups()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500

View File

@@ -0,0 +1,98 @@
from flask import Blueprint, jsonify, request, send_file
from jwt_middleware import require_auth
import system_monitor
import os
system_bp = Blueprint('system', __name__)
@system_bp.route('/api/system', methods=['GET'])
@require_auth
def api_system():
try:
data = system_monitor.get_system_info()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@system_bp.route('/api/logs', methods=['GET'])
@require_auth
def api_logs():
try:
limit = request.args.get('limit', '200')
priority = request.args.get('priority')
service = request.args.get('service')
since_days = request.args.get('since_days')
data = system_monitor.get_logs(limit, priority, service, since_days)
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@system_bp.route('/api/logs/download', methods=['GET'])
@require_auth
def api_logs_download():
try:
log_type = request.args.get('type', 'system')
hours = int(request.args.get('hours', '48'))
level = request.args.get('level', 'all')
service = request.args.get('service', 'all')
since_days = request.args.get('since_days', None)
file_path = system_monitor.generate_log_file(log_type, hours, level, service, since_days)
if file_path and os.path.exists(file_path):
return send_file(
file_path,
mimetype='text/plain',
as_attachment=True,
download_name=f'proxmox_{log_type}.log'
)
else:
return jsonify({'error': 'Failed to generate log file'}), 500
except Exception as e:
return jsonify({'error': str(e)}), 500
@system_bp.route('/api/events', methods=['GET'])
@require_auth
def api_events():
try:
limit = request.args.get('limit', '50')
data = system_monitor.get_events(limit)
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@system_bp.route('/api/notifications', methods=['GET'])
@require_auth
def api_notifications():
try:
data = system_monitor.get_notifications()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@system_bp.route('/api/notifications/download', methods=['GET'])
@require_auth
def api_notifications_download():
return jsonify({'error': 'Not implemented in modular version yet'}), 501
@system_bp.route('/api/node/metrics', methods=['GET'])
@require_auth
def api_node_metrics():
try:
timeframe = request.args.get('timeframe', 'week')
data = system_monitor.get_node_metrics(timeframe)
if 'error' in data:
return jsonify(data), 500
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@system_bp.route('/api/prometheus', methods=['GET'])
@require_auth
def api_prometheus():
try:
metrics, content_type = system_monitor.get_prometheus_metrics()
return metrics, 200, content_type
except Exception as e:
return f'# Error generating metrics: {str(e)}\n', 500, {'Content-Type': 'text/plain'}

View File

@@ -0,0 +1,122 @@
from flask import Blueprint, jsonify, request
from jwt_middleware import require_auth
import vm_monitor
# Definimos el Blueprint para las rutas de VM
vm_bp = Blueprint('vm', __name__)
@vm_bp.route('/api/vms', methods=['GET'])
@require_auth
def api_vms():
"""
Obtiene la lista de todas las máquinas virtuales y contenedores LXC.
"""
try:
data = vm_monitor.get_proxmox_vms()
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@vm_bp.route('/api/vms/<int:vmid>', methods=['GET'])
@require_auth
def get_vm_config(vmid):
"""
Obtiene la configuración detallada de una VM específica.
Incluye hardware, estado y datos de red.
"""
try:
data = vm_monitor.get_vm_config(vmid)
if not data:
return jsonify({'error': 'VM/LXC not found'}), 404
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@vm_bp.route('/api/vms/<int:vmid>/control', methods=['POST'])
@require_auth
def api_vm_control(vmid):
"""
Controla el estado de una VM (start, stop, shutdown, reboot).
"""
try:
data = request.get_json()
action = data.get('action')
result = vm_monitor.control_vm(vmid, action)
if result.get('success'):
return jsonify(result)
else:
return jsonify(result), 500 if 'error' in result else 400
except Exception as e:
return jsonify({'error': str(e)}), 500
@vm_bp.route('/api/vms/<int:vmid>/config', methods=['PUT'])
@require_auth
def api_vm_config_update(vmid):
"""
Actualiza la configuración de una VM (por ejemplo, las notas/descripción).
"""
try:
data = request.get_json()
description = data.get('description', '')
result = vm_monitor.update_vm_config(vmid, description)
if result.get('success'):
return jsonify(result)
else:
return jsonify(result), 500
except Exception as e:
return jsonify({'error': str(e)}), 500
@vm_bp.route('/api/vms/<int:vmid>/metrics', methods=['GET'])
@require_auth
def api_vm_metrics(vmid):
"""
Obtiene métricas históricas (RRD) de CPU, Memoria y Red para una VM.
"""
try:
timeframe = request.args.get('timeframe', 'week')
if timeframe not in ['hour', 'day', 'week', 'month', 'year']:
return jsonify({'error': 'Invalid timeframe'}), 400
data = vm_monitor.get_vm_metrics(vmid, timeframe)
if 'error' in data:
return jsonify(data), 500 if 'Failed' in data['error'] else 404
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@vm_bp.route('/api/vms/<int:vmid>/logs', methods=['GET'])
@require_auth
def api_vm_logs(vmid):
"""
Obtiene los logs internos (consola/serial) de la VM/LXC.
"""
try:
data = vm_monitor.get_vm_logs(vmid)
if 'error' in data:
return jsonify(data), 404
return jsonify(data)
except Exception as e:
return jsonify({'error': str(e)}), 500
@vm_bp.route('/api/task-log/<path:upid>', methods=['GET'])
@require_auth
def get_task_log(upid):
"""
Obtiene el log completo de una tarea de Proxmox (ej. un backup o inicio de VM).
El UPID es el identificador único de la tarea.
"""
try:
log_text = vm_monitor.get_task_log(upid)
if log_text.startswith("Error") or log_text.startswith("Log file not found"):
return jsonify({'error': log_text}), 404
return log_text, 200, {'Content-Type': 'text/plain; charset=utf-8'}
except Exception as e:
return jsonify({'error': str(e)}), 500

View File

@@ -0,0 +1,311 @@
import os
import re
import json
import socket
import psutil
import subprocess
from system_monitor import get_proxmox_node_name
def extract_vmid_from_interface(interface_name):
"""
Extrae el ID de la VM del nombre de la interfaz.
Ejemplo: veth100i0 -> 100 (LXC), tap105i0 -> 105 (VM)
"""
try:
match = re.match(r'(veth|tap)(\d+)i\d+', interface_name)
if match:
vmid = int(match.group(2))
interface_type = 'lxc' if match.group(1) == 'veth' else 'vm'
return vmid, interface_type
return None, None
except Exception:
return None, None
def get_vm_lxc_names():
"""
Crea un mapa de VMIDs a nombres (ej: 100 -> 'Servidor-Web').
Ayuda a identificar qué interfaz pertenece a qué máquina.
"""
vm_lxc_map = {}
try:
local_node = get_proxmox_node_name()
# Consultamos pvesh para obtener la lista de VMs
result = subprocess.run(['pvesh', 'get', '/cluster/resources', '--type', 'vm', '--output-format', 'json'],
capture_output=True, text=True, timeout=10)
if result.returncode == 0:
resources = json.loads(result.stdout)
for resource in resources:
if resource.get('node') == local_node:
vmid = resource.get('vmid')
if vmid:
vm_lxc_map[vmid] = {
'name': resource.get('name', f'VM-{vmid}'),
'type': 'lxc' if resource.get('type') == 'lxc' else 'vm',
'status': resource.get('status', 'unknown')
}
except Exception:
pass
return vm_lxc_map
def get_interface_type(interface_name):
"""
Clasifica la interfaz de red en tipos manejables.
"""
if interface_name == 'lo': return 'skip'
if interface_name.startswith(('veth', 'tap')): return 'vm_lxc'
if interface_name.startswith(('tun', 'vnet', 'docker', 'virbr')): return 'skip'
if interface_name.startswith('bond'): return 'bond'
if interface_name.startswith(('vmbr', 'br')): return 'bridge'
if '.' in interface_name: return 'vlan'
# Verificar si es una interfaz física real
if os.path.exists(f'/sys/class/net/{interface_name}/device'): return 'physical'
# Fallback por nombre común
if interface_name.startswith(('enp', 'eth', 'eno', 'ens', 'enx', 'wlan', 'wlp', 'wlo', 'usb')): return 'physical'
return 'skip'
def get_bond_info(bond_name):
"""Obtiene detalles de una interfaz Bond (agregación de enlaces)."""
info = {'mode': 'unknown', 'slaves': [], 'active_slave': None}
try:
path = f'/proc/net/bonding/{bond_name}'
if os.path.exists(path):
with open(path, 'r') as f:
content = f.read()
for line in content.split('\n'):
if 'Bonding Mode:' in line: info['mode'] = line.split(':', 1)[1].strip()
elif 'Slave Interface:' in line: info['slaves'].append(line.split(':', 1)[1].strip())
elif 'Currently Active Slave:' in line: info['active_slave'] = line.split(':', 1)[1].strip()
except Exception: pass
return info
def get_bridge_info(bridge_name):
"""
Obtiene los miembros de un Bridge (puente).
Intenta identificar la interfaz física real detrás del puente.
"""
info = {'members': [], 'physical_interface': None, 'physical_duplex': 'unknown', 'bond_slaves': []}
try:
brif_path = f'/sys/class/net/{bridge_name}/brif'
if os.path.exists(brif_path):
members = os.listdir(brif_path)
info['members'] = members
for member in members:
# Si el puente usa un bond
if member.startswith('bond'):
info['physical_interface'] = member
bond_info = get_bond_info(member)
info['bond_slaves'] = bond_info['slaves']
if bond_info['active_slave']:
try:
stats = psutil.net_if_stats().get(bond_info['active_slave'])
if stats:
info['physical_duplex'] = 'full' if stats.duplex == 2 else 'half' if stats.duplex == 1 else 'unknown'
except: pass
break
# Si el puente usa una interfaz física directa
elif member.startswith(('enp', 'eth', 'eno', 'ens', 'wlan')):
info['physical_interface'] = member
try:
stats = psutil.net_if_stats().get(member)
if stats:
info['physical_duplex'] = 'full' if stats.duplex == 2 else 'half' if stats.duplex == 1 else 'unknown'
except: pass
break
except Exception: pass
return info
def get_network_info():
"""
Obtiene información completa y detallada de TODA la red.
"""
data = {
'interfaces': [], 'physical_interfaces': [], 'bridge_interfaces': [], 'vm_lxc_interfaces': [],
'traffic': {}, 'hostname': get_proxmox_node_name(), 'domain': None, 'dns_servers': []
}
# Leer configuración DNS
try:
with open('/etc/resolv.conf', 'r') as f:
for line in f:
if line.startswith('nameserver'): data['dns_servers'].append(line.split()[1])
elif line.startswith('domain'): data['domain'] = line.split()[1]
elif line.startswith('search') and not data['domain']:
parts = line.split()
if len(parts) > 1: data['domain'] = parts[1]
except: pass
vm_map = get_vm_lxc_names()
stats = psutil.net_if_stats()
addrs = psutil.net_if_addrs()
io_counters = psutil.net_io_counters(pernic=True)
# Contadores
counts = {'physical': {'active':0, 'total':0}, 'bridge': {'active':0, 'total':0}, 'vm': {'active':0, 'total':0}}
for name, stat in stats.items():
itype = get_interface_type(name)
if itype == 'skip': continue
info = {
'name': name, 'type': itype, 'status': 'up' if stat.isup else 'down',
'speed': stat.speed, 'mtu': stat.mtu,
'duplex': 'full' if stat.duplex == 2 else 'half' if stat.duplex == 1 else 'unknown',
'addresses': []
}
# IPs
if name in addrs:
for addr in addrs[name]:
if addr.family == socket.AF_INET: # IPv4
info['addresses'].append({'ip': addr.address, 'netmask': addr.netmask})
elif addr.family == 17: # MAC
info['mac_address'] = addr.address
# Tráfico
if name in io_counters:
io = io_counters[name]
# Si es VM, invertimos perspectiva (tx host = rx vm)
if itype == 'vm_lxc':
info.update({'bytes_sent': io.bytes_recv, 'bytes_recv': io.bytes_sent,
'packets_sent': io.packets_recv, 'packets_recv': io.packets_sent})
else:
info.update({'bytes_sent': io.bytes_sent, 'bytes_recv': io.bytes_recv,
'packets_sent': io.packets_sent, 'packets_recv': io.packets_recv})
info.update({'errors_in': io.errin, 'errors_out': io.errout,
'drops_in': io.dropin, 'drops_out': io.dropout})
# Clasificación
if itype == 'vm_lxc':
counts['vm']['total'] += 1
if stat.isup: counts['vm']['active'] += 1
vmid, _ = extract_vmid_from_interface(name)
if vmid and vmid in vm_map:
info.update({'vmid': vmid, 'vm_name': vm_map[vmid]['name'],
'vm_type': vm_map[vmid]['type'], 'vm_status': vm_map[vmid]['status']})
elif vmid:
info.update({'vmid': vmid, 'vm_name': f'VM/LXC {vmid}', 'vm_status': 'unknown'})
data['vm_lxc_interfaces'].append(info)
elif itype == 'physical':
counts['physical']['total'] += 1
if stat.isup: counts['physical']['active'] += 1
data['physical_interfaces'].append(info)
elif itype == 'bridge':
counts['bridge']['total'] += 1
if stat.isup: counts['bridge']['active'] += 1
b_info = get_bridge_info(name)
info['bridge_members'] = b_info['members']
info['bridge_physical_interface'] = b_info['physical_interface']
if b_info['physical_duplex'] != 'unknown':
info['duplex'] = b_info['physical_duplex']
data['bridge_interfaces'].append(info)
elif itype == 'bond':
bond_info = get_bond_info(name)
info.update({'bond_mode': bond_info['mode'], 'bond_slaves': bond_info['slaves'],
'bond_active_slave': bond_info['active_slave']})
data['interfaces'].append(info)
# Tráfico global
g_io = psutil.net_io_counters()
data['traffic'] = {
'bytes_sent': g_io.bytes_sent, 'bytes_recv': g_io.bytes_recv,
'packets_sent': g_io.packets_sent, 'packets_recv': g_io.packets_recv,
'packet_loss_in': 0, 'packet_loss_out': 0
}
tin = g_io.packets_recv + g_io.dropin
if tin > 0: data['traffic']['packet_loss_in'] = round((g_io.dropin / tin) * 100, 2)
tout = g_io.packets_sent + g_io.dropout
if tout > 0: data['traffic']['packet_loss_out'] = round((g_io.dropout / tout) * 100, 2)
data.update({
'physical_active_count': counts['physical']['active'], 'physical_total_count': counts['physical']['total'],
'bridge_active_count': counts['bridge']['active'], 'bridge_total_count': counts['bridge']['total'],
'vm_lxc_active_count': counts['vm']['active'], 'vm_lxc_total_count': counts['vm']['total']
})
return data
def get_network_summary():
"""Resumen rápido de red."""
net_io = psutil.net_io_counters()
stats = psutil.net_if_stats()
addrs = psutil.net_if_addrs()
phys_ifaces = []
bridge_ifaces = []
counts = {'phys_active':0, 'phys_total':0, 'br_active':0, 'br_total':0}
for name, stat in stats.items():
if name in ['lo', 'docker0'] or name.startswith(('veth', 'tap', 'fw')): continue
is_up = stat.isup
addresses = []
if name in addrs:
for addr in addrs[name]:
if addr.family == socket.AF_INET:
addresses.append({'ip': addr.address, 'netmask': addr.netmask})
info = {'name': name, 'status': 'up' if is_up else 'down', 'addresses': addresses}
if name.startswith(('enp', 'eth', 'eno', 'ens', 'wlan')):
counts['phys_total'] += 1
if is_up: counts['phys_active'] += 1
phys_ifaces.append(info)
elif name.startswith(('vmbr', 'br')):
counts['br_total'] += 1
if is_up: counts['br_active'] += 1
bridge_ifaces.append(info)
return {
'physical_active_count': counts['phys_active'], 'physical_total_count': counts['phys_total'],
'bridge_active_count': counts['br_active'], 'bridge_total_count': counts['br_total'],
'physical_interfaces': phys_ifaces, 'bridge_interfaces': bridge_ifaces,
'traffic': {'bytes_sent': net_io.bytes_sent, 'bytes_recv': net_io.bytes_recv,
'packets_sent': net_io.packets_sent, 'packets_recv': net_io.packets_recv}
}
def get_interface_metrics(interface_name, timeframe='day'):
"""Obtiene métricas RRD históricas para una interfaz."""
local_node = get_proxmox_node_name()
itype = get_interface_type(interface_name)
rrd_data = []
try:
# Si es VM/LXC, sacamos datos del contenedor/VM
if itype == 'vm_lxc':
vmid, vm_type = extract_vmid_from_interface(interface_name)
if vmid:
res = subprocess.run(['pvesh', 'get', f'/nodes/{local_node}/{vm_type}/{vmid}/rrddata',
'--timeframe', timeframe, '--output-format', 'json'],
capture_output=True, text=True, timeout=10)
if res.returncode == 0:
data = json.loads(res.stdout)
for point in data:
item = {'time': point.get('time')}
if 'netin' in point: item['netin'] = point['netin']
if 'netout' in point: item['netout'] = point['netout']
rrd_data.append(item)
else:
# Si es física/bridge, sacamos datos del nodo (tráfico total del nodo)
res = subprocess.run(['pvesh', 'get', f'/nodes/{local_node}/rrddata',
'--timeframe', timeframe, '--output-format', 'json'],
capture_output=True, text=True, timeout=10)
if res.returncode == 0:
data = json.loads(res.stdout)
for point in data:
item = {'time': point.get('time')}
if 'netin' in point: item['netin'] = point['netin']
if 'netout' in point: item['netout'] = point['netout']
rrd_data.append(item)
return {'interface': interface_name, 'type': itype, 'timeframe': timeframe, 'data': rrd_data}
except Exception as e:
return {'error': str(e)}

View File

@@ -0,0 +1,261 @@
import os
import json
import math
import subprocess
import re
import psutil
from system_monitor import get_proxmox_node_name
# Intentar importar el monitor de storage externo si existe
try:
from proxmox_storage_monitor import proxmox_storage_monitor
except ImportError:
proxmox_storage_monitor = None
def format_bytes(size_in_bytes):
if size_in_bytes is None: return "N/A"
if size_in_bytes == 0: return "0 B"
size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB")
i = int(math.floor(math.log(size_in_bytes, 1024)))
p = math.pow(1024, i)
s = round(size_in_bytes / p, 2)
return f"{s} {size_name[i]}"
def get_pcie_link_speed(disk_name):
"""Obtiene info PCIe para NVMe."""
pcie_info = {'pcie_gen': None, 'pcie_width': None}
try:
if disk_name.startswith('nvme'):
match = re.match(r'(nvme\d+)n\d+', disk_name)
if match:
controller = match.group(1)
sys_path = f'/sys/class/nvme/{controller}/device'
pci_address = None
if os.path.exists(sys_path):
pci_address = os.path.basename(os.readlink(sys_path))
else:
alt_path = f'/sys/block/{disk_name}/device/device'
if os.path.exists(alt_path):
pci_address = os.path.basename(os.readlink(alt_path))
if pci_address:
res = subprocess.run(['lspci', '-vvv', '-s', pci_address], capture_output=True, text=True, timeout=5)
if res.returncode == 0:
for line in res.stdout.split('\n'):
if 'LnkSta:' in line:
if 'Speed' in line:
m = re.search(r'Speed\s+([\d.]+)GT/s', line)
if m:
gt = float(m.group(1))
if gt <= 8.0: pcie_info['pcie_gen'] = '3.0'
elif gt <= 16.0: pcie_info['pcie_gen'] = '4.0'
else: pcie_info['pcie_gen'] = '5.0'
if 'Width' in line:
m = re.search(r'Width\s+x(\d+)', line)
if m: pcie_info['pcie_width'] = f'x{m.group(1)}'
except Exception: pass
return pcie_info
def get_smart_data(disk_name):
"""Obtiene datos SMART detallados."""
smart_data = {
'temperature': 0, 'health': 'unknown', 'power_on_hours': 0, 'smart_status': 'unknown',
'model': 'Unknown', 'serial': 'Unknown', 'reallocated_sectors': 0,
'ssd_life_left': None, 'rotation_rate': 0
}
cmds = [
['smartctl', '-a', '-j', f'/dev/{disk_name}'],
['smartctl', '-a', '-j', '-d', 'ata', f'/dev/{disk_name}'],
['smartctl', '-a', '-j', '-d', 'nvme', f'/dev/{disk_name}'],
['smartctl', '-a', f'/dev/{disk_name}']
]
for cmd in cmds:
try:
res = subprocess.run(cmd, capture_output=True, text=True, timeout=8)
if not res.stdout: continue
if '-j' in cmd:
try:
data = json.loads(res.stdout)
if 'model_name' in data: smart_data['model'] = data['model_name']
elif 'model_family' in data: smart_data['model'] = data['model_family']
if 'serial_number' in data: smart_data['serial'] = data['serial_number']
if 'rotation_rate' in data: smart_data['rotation_rate'] = data['rotation_rate']
if 'temperature' in data and 'current' in data['temperature']:
smart_data['temperature'] = data['temperature']['current']
if 'smart_status' in data:
smart_data['health'] = 'healthy' if data['smart_status'].get('passed') else 'critical'
# NVMe
if 'nvme_smart_health_information_log' in data:
nvme = data['nvme_smart_health_information_log']
if 'temperature' in nvme: smart_data['temperature'] = nvme['temperature']
if 'power_on_hours' in nvme: smart_data['power_on_hours'] = nvme['power_on_hours']
if 'percentage_used' in nvme: smart_data['ssd_life_left'] = 100 - nvme['percentage_used']
# ATA
if 'ata_smart_attributes' in data:
for attr in data['ata_smart_attributes'].get('table', []):
aid = attr.get('id')
raw = attr.get('raw', {}).get('value', 0)
norm = attr.get('value', 0)
if aid == 9: smart_data['power_on_hours'] = raw
elif aid == 5: smart_data['reallocated_sectors'] = raw
elif aid == 194 and smart_data['temperature'] == 0: smart_data['temperature'] = raw
elif str(aid) in ['231', '202']: smart_data['ssd_life_left'] = norm
if smart_data['model'] != 'Unknown': break
except json.JSONDecodeError: pass
# Fallback texto
if smart_data['model'] == 'Unknown':
for line in res.stdout.split('\n'):
if 'Device Model:' in line: smart_data['model'] = line.split(':', 1)[1].strip()
elif 'Serial Number:' in line: smart_data['serial'] = line.split(':', 1)[1].strip()
elif 'Current Temperature:' in line:
try: smart_data['temperature'] = int(line.split(':')[1].strip().split()[0])
except: pass
if smart_data['model'] != 'Unknown': break
except: continue
# Evaluación salud
if smart_data['reallocated_sectors'] > 0: smart_data['health'] = 'warning'
if smart_data['temperature'] >= 60: smart_data['health'] = 'warning'
return smart_data
def get_storage_info():
"""Info completa de almacenamiento."""
data = {'total': 0, 'used': 0, 'available': 0, 'disks': [], 'zfs_pools': [], 'disk_count': 0}
# 1. Discos físicos
try:
res = subprocess.run(['lsblk', '-b', '-d', '-n', '-o', 'NAME,SIZE,TYPE'], capture_output=True, text=True, timeout=5)
for line in res.stdout.strip().split('\n'):
p = line.split()
if len(p) >= 3 and p[2] == 'disk':
name = p[0]
if name.startswith('zd'): continue
size = int(p[1])
smart = get_smart_data(name)
size_tb = size / (1024**4)
size_str = f"{size_tb:.1f}T" if size_tb >= 1 else f"{size / (1024**3):.1f}G"
data['disks'].append({
'name': name,
'size': size / 1024, # KB
'size_formatted': size_str,
'size_bytes': size,
'model': smart['model'],
'serial': smart['serial'],
'temperature': smart['temperature'],
'health': smart['health'],
'ssd_life_left': smart['ssd_life_left']
})
data['total'] += size
data['disk_count'] += 1
except: pass
data['total'] = round(data['total'] / (1024**4), 1) # TB
# 2. Uso (Particiones + ZFS)
used = 0
avail = 0
try:
for part in psutil.disk_partitions():
if part.fstype not in ['tmpfs', 'overlay', 'zfs']:
try:
u = psutil.disk_usage(part.mountpoint)
used += u.used
avail += u.free
except: pass
res = subprocess.run(['zpool', 'list', '-H', '-p', '-o', 'name,size,alloc,free,health'], capture_output=True, text=True)
if res.returncode == 0:
for line in res.stdout.strip().split('\n'):
if line:
p = line.split('\t')
used += int(p[2])
avail += int(p[3])
data['zfs_pools'].append({
'name': p[0], 'size': format_bytes(int(p[1])),
'allocated': format_bytes(int(p[2])), 'free': format_bytes(int(p[3])),
'health': p[4]
})
except: pass
data['used'] = round(used / (1024**3), 1)
data['available'] = round(avail / (1024**3), 1)
return data
def get_storage_summary():
"""Resumen rápido."""
return get_storage_info() # Se puede optimizar quitando SMART
def get_proxmox_storage():
"""Storage de Proxmox."""
node = get_proxmox_node_name()
storage = []
try:
res = subprocess.run(['pvesh', 'get', '/cluster/resources', '--type', 'storage', '--output-format', 'json'], capture_output=True, text=True, timeout=10)
if res.returncode == 0:
for r in json.loads(res.stdout):
if r.get('node') == node:
tot = int(r.get('maxdisk', 0))
usd = int(r.get('disk', 0))
storage.append({
'name': r.get('storage'),
'type': r.get('plugintype'),
'status': 'active' if r.get('status')=='available' else 'error',
'total': round(tot/(1024**3), 2),
'used': round(usd/(1024**3), 2),
'percent': round((usd/tot)*100, 1) if tot>0 else 0
})
except: pass
if proxmox_storage_monitor:
u = proxmox_storage_monitor.get_storage_status().get('unavailable', [])
exist = {x['name'] for x in storage}
for x in u:
if x['name'] not in exist: storage.append(x)
return {'storage': storage}
def get_backups():
"""Lista backups."""
backups = []
try:
res = subprocess.run(['pvesh', 'get', '/storage', '--output-format', 'json'], capture_output=True, text=True)
if res.returncode == 0:
for s in json.loads(res.stdout):
sid = s.get('storage')
if s.get('type') in ['dir', 'nfs', 'cifs', 'pbs']:
c_res = subprocess.run(['pvesh', 'get', f'/nodes/localhost/storage/{sid}/content', '--output-format', 'json'], capture_output=True, text=True)
if c_res.returncode == 0:
for item in json.loads(c_res.stdout):
if item.get('content') == 'backup':
volid = item.get('volid', '')
vmid = None
if 'vzdump-qemu-' in volid:
try: vmid = volid.split('vzdump-qemu-')[1].split('-')[0]
except: pass
elif 'vzdump-lxc-' in volid:
try: vmid = volid.split('vzdump-lxc-')[1].split('-')[0]
except: pass
from datetime import datetime
backups.append({
'volid': volid, 'storage': sid, 'vmid': vmid,
'size': item.get('size', 0),
'size_human': format_bytes(item.get('size', 0)),
'created': datetime.fromtimestamp(item.get('ctime', 0)).strftime('%Y-%m-%d %H:%M:%S'),
'timestamp': item.get('ctime', 0)
})
except: pass
backups.sort(key=lambda x: x['timestamp'], reverse=True)
return {'backups': backups, 'total': len(backups)}

View File

@@ -0,0 +1,337 @@
import os
import sys
import time
import socket
import subprocess
import json
import psutil
import platform
from datetime import datetime, timedelta
# Cache para evitar llamadas excesivas a la API de Proxmox
_PROXMOX_NODE_CACHE = {"name": None, "timestamp": 0.0}
_PROXMOX_NODE_CACHE_TTL = 300 # 5 minutos
def get_proxmox_node_name() -> str:
"""Recupera el nombre real del nodo Proxmox con caché."""
now = time.time()
cached_name = _PROXMOX_NODE_CACHE.get("name")
cached_ts = _PROXMOX_NODE_CACHE.get("timestamp", 0.0)
if cached_name and (now - float(cached_ts)) < _PROXMOX_NODE_CACHE_TTL:
return str(cached_name)
try:
result = subprocess.run(
["pvesh", "get", "/nodes", "--output-format", "json"],
capture_output=True, text=True, timeout=5, check=False,
)
if result.returncode == 0 and result.stdout:
nodes = json.loads(result.stdout)
if isinstance(nodes, list) and nodes:
node_name = nodes[0].get("node")
if node_name:
_PROXMOX_NODE_CACHE["name"] = node_name
_PROXMOX_NODE_CACHE["timestamp"] = now
return node_name
except Exception:
pass
hostname = socket.gethostname()
return hostname.split(".", 1)[0]
def get_uptime():
"""Obtiene el tiempo de actividad del sistema."""
try:
boot_time = psutil.boot_time()
uptime_seconds = time.time() - boot_time
return str(timedelta(seconds=int(uptime_seconds)))
except Exception:
return "N/A"
def get_cpu_temperature():
"""Obtiene la temperatura de la CPU usando psutil."""
temp = 0
try:
if hasattr(psutil, "sensors_temperatures"):
temps = psutil.sensors_temperatures()
if temps:
sensor_priority = ['coretemp', 'k10temp', 'cpu_thermal', 'zenpower', 'acpitz']
for sensor_name in sensor_priority:
if sensor_name in temps and temps[sensor_name]:
temp = temps[sensor_name][0].current
break
if temp == 0:
for name, entries in temps.items():
if entries:
temp = entries[0].current
break
except Exception:
pass
return temp
def get_proxmox_version():
"""Obtiene la versión de Proxmox."""
try:
result = subprocess.run(['pveversion'], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
version_line = result.stdout.strip().split('\n')[0]
if '/' in version_line:
return version_line.split('/')[1]
except Exception:
pass
return None
def get_available_updates():
"""Cuenta actualizaciones pendientes."""
try:
result = subprocess.run(['apt', 'list', '--upgradable'], capture_output=True, text=True, timeout=10)
if result.returncode == 0:
lines = result.stdout.strip().split('\n')
return max(0, len(lines) - 1)
except Exception:
pass
return 0
def get_system_info():
"""Agrega toda la información del sistema."""
cpu_usage = psutil.cpu_percent(interval=0.5)
memory = psutil.virtual_memory()
load_avg = os.getloadavg()
return {
'cpu_usage': round(cpu_usage, 1),
'memory_usage': round(memory.percent, 1),
'memory_total': round(memory.total / (1024 ** 3), 1),
'memory_used': round(memory.used / (1024 ** 3), 1),
'temperature': get_cpu_temperature(),
'uptime': get_uptime(),
'load_average': list(load_avg),
'hostname': socket.gethostname(),
'proxmox_node': get_proxmox_node_name(),
'node_id': socket.gethostname(),
'timestamp': datetime.now().isoformat(),
'cpu_cores': psutil.cpu_count(logical=False),
'cpu_threads': psutil.cpu_count(logical=True),
'proxmox_version': get_proxmox_version(),
'kernel_version': platform.release(),
'available_updates': get_available_updates()
}
def get_node_metrics(timeframe='week'):
"""Obtiene métricas RRD del nodo."""
local_node = get_proxmox_node_name()
zfs_arc_size = 0
try:
with open('/proc/spl/kstat/zfs/arcstats', 'r') as f:
for line in f:
if line.startswith('size'):
parts = line.split()
if len(parts) >= 3:
zfs_arc_size = int(parts[2])
break
except Exception:
pass
try:
result = subprocess.run(
['pvesh', 'get', f'/nodes/{local_node}/rrddata', '--timeframe', timeframe, '--output-format', 'json'],
capture_output=True, text=True, timeout=10
)
if result.returncode == 0:
rrd_data = json.loads(result.stdout)
if zfs_arc_size > 0:
for item in rrd_data:
if 'zfsarc' not in item or item.get('zfsarc', 0) == 0:
item['zfsarc'] = zfs_arc_size
return {'node': local_node, 'timeframe': timeframe, 'data': rrd_data}
else:
return {'error': f"Failed to get RRD data: {result.stderr}"}
except Exception as e:
return {'error': str(e)}
def get_logs(limit='200', priority=None, service=None, since_days=None):
"""Obtiene logs del sistema (journalctl)."""
cmd = ['journalctl', '--output', 'json', '--no-pager']
if since_days:
try:
days = int(since_days)
cmd.extend(['--since', f'{days} days ago'])
except ValueError:
cmd.extend(['-n', limit])
else:
cmd.extend(['-n', limit])
if priority:
cmd.extend(['-p', priority])
if service:
cmd.extend(['-u', service])
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
if result.returncode == 0:
logs = []
for line in result.stdout.strip().split('\n'):
if line:
try:
entry = json.loads(line)
ts_us = int(entry.get('__REALTIME_TIMESTAMP', '0'))
timestamp = datetime.fromtimestamp(ts_us / 1000000).strftime('%Y-%m-%d %H:%M:%S')
priority_map = {'0': 'emerg', '1': 'alert', '2': 'crit', '3': 'err', '4': 'warning', '5': 'notice', '6': 'info', '7': 'debug'}
p_num = str(entry.get('PRIORITY', '6'))
logs.append({
'timestamp': timestamp,
'level': priority_map.get(p_num, 'info'),
'service': entry.get('_SYSTEMD_UNIT', entry.get('SYSLOG_IDENTIFIER', 'system')),
'message': entry.get('MESSAGE', ''),
'source': 'journal',
'pid': entry.get('_PID', ''),
'hostname': entry.get('_HOSTNAME', '')
})
except Exception:
continue
return {'logs': logs, 'total': len(logs)}
except Exception as e:
return {'logs': [], 'total': 0, 'error': str(e)}
return {'logs': [], 'total': 0, 'error': 'journalctl failed'}
def generate_log_file(log_type, hours, level, service, since_days):
"""Genera archivo de logs temporal."""
import tempfile
cmd = ['journalctl', '--no-pager']
if since_days: cmd.extend(['--since', f'{since_days} days ago'])
else: cmd.extend(['--since', f'{hours} hours ago'])
if log_type == 'kernel': cmd.append('-k')
elif log_type == 'auth': cmd.extend(['-u', 'ssh', '-u', 'sshd'])
if level != 'all': cmd.extend(['-p', level])
if service != 'all': cmd.extend(['-u', service])
try:
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.log') as f:
f.write(f"ProxMenux Log ({log_type}) - Generated: {datetime.now().isoformat()}\n")
f.write("=" * 80 + "\n\n")
f.write(result.stdout if result.returncode == 0 else "Error retrieving logs")
return f.name
except Exception:
return None
def get_events(limit='50'):
"""Obtiene eventos de Proxmox."""
events = []
try:
result = subprocess.run(['pvesh', 'get', '/cluster/tasks', '--output-format', 'json'], capture_output=True, text=True, timeout=10)
if result.returncode == 0:
tasks = json.loads(result.stdout)
for task in tasks[:int(limit)]:
starttime = task.get('starttime', 0)
endtime = task.get('endtime', 0)
duration = ''
if endtime and starttime:
d_sec = endtime - starttime
if d_sec < 60: duration = f"{d_sec}s"
elif d_sec < 3600: duration = f"{d_sec // 60}m {d_sec % 60}s"
else: duration = f"{d_sec // 3600}h {(d_sec % 3600) // 60}m"
status = task.get('status', 'unknown')
level = 'info'
if status == 'OK': level = 'info'
elif status in ['stopped', 'error']: level = 'error'
elif status == 'running': level = 'warning'
events.append({
'upid': task.get('upid', ''),
'type': task.get('type', 'unknown'),
'status': status,
'level': level,
'user': task.get('user', 'unknown'),
'node': task.get('node', 'unknown'),
'vmid': str(task.get('id', '')) if task.get('id') else '',
'starttime': datetime.fromtimestamp(starttime).strftime('%Y-%m-%d %H:%M:%S') if starttime else '',
'endtime': datetime.fromtimestamp(endtime).strftime('%Y-%m-%d %H:%M:%S') if endtime else 'Running',
'duration': duration
})
except Exception:
pass
return {'events': events, 'total': len(events)}
def get_notifications():
"""Obtiene notificaciones de Proxmox."""
notifications = []
try:
cmd = [
'journalctl', '-u', 'pve-ha-lrm', '-u', 'pve-ha-crm', '-u', 'pvedaemon',
'-u', 'pveproxy', '-u', 'pvestatd', '--grep', 'notification|email|webhook|alert|notify',
'-n', '100', '--output', 'json', '--no-pager'
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
if result.returncode == 0:
for line in result.stdout.strip().split('\n'):
if line:
try:
entry = json.loads(line)
ts = int(entry.get('__REALTIME_TIMESTAMP', '0'))
timestamp = datetime.fromtimestamp(ts / 1000000).strftime('%Y-%m-%d %H:%M:%S')
msg = entry.get('MESSAGE', '')
ntype = 'info'
if 'email' in msg.lower(): ntype = 'email'
elif 'webhook' in msg.lower(): ntype = 'webhook'
elif 'error' in msg.lower() or 'fail' in msg.lower(): ntype = 'error'
elif 'alert' in msg.lower() or 'warning' in msg.lower(): ntype = 'alert'
notifications.append({
'timestamp': timestamp,
'type': ntype,
'service': entry.get('_SYSTEMD_UNIT', 'proxmox'),
'message': msg,
'source': 'journal'
})
except: continue
# Backups en tareas
task_res = subprocess.run(['pvesh', 'get', '/cluster/tasks', '--output-format', 'json'], capture_output=True, text=True, timeout=5)
if task_res.returncode == 0:
tasks = json.loads(task_res.stdout)
for task in tasks[:50]:
if task.get('type') in ['vzdump', 'backup']:
status = task.get('status', 'unknown')
ntype = 'success' if status == 'OK' else 'error' if status == 'stopped' else 'info'
notifications.append({
'timestamp': datetime.fromtimestamp(task.get('starttime', 0)).strftime('%Y-%m-%d %H:%M:%S'),
'type': ntype,
'service': 'backup',
'message': f"Backup task {task.get('upid', 'unknown')}: {status}",
'source': 'task-log'
})
except: pass
notifications.sort(key=lambda x: x['timestamp'], reverse=True)
return {'notifications': notifications[:100], 'total': len(notifications)}
def get_prometheus_metrics():
"""Genera métricas Prometheus."""
node = socket.gethostname()
timestamp = int(datetime.now().timestamp() * 1000)
lines = []
cpu = psutil.cpu_percent(interval=0.5)
mem = psutil.virtual_memory()
load = os.getloadavg()
uptime = time.time() - psutil.boot_time()
lines.append(f'proxmox_cpu_usage{{node="{node}"}} {cpu} {timestamp}')
lines.append(f'proxmox_memory_usage_percent{{node="{node}"}} {mem.percent} {timestamp}')
lines.append(f'proxmox_load_average{{node="{node}",period="1m"}} {load[0]} {timestamp}')
lines.append(f'proxmox_uptime_seconds{{node="{node}"}} {uptime} {timestamp}')
temp = get_cpu_temperature()
if temp:
lines.append(f'proxmox_cpu_temperature_celsius{{node="{node}"}} {temp} {timestamp}')
return '\n'.join(lines) + '\n', {'Content-Type': 'text/plain; version=0.0.4; charset=utf-8'}

View File

@@ -0,0 +1,267 @@
import json
import subprocess
import os
import re
from system_monitor import get_proxmox_node_name
def parse_lxc_hardware_config(vmid, node):
"""
Analiza la configuración de un LXC para detectar passthrough de hardware.
Detecta GPUs, TPUs (Coral), dispositivos USB y estado privilegiado.
"""
hardware_info = {
'privileged': None,
'gpu_passthrough': [],
'devices': []
}
try:
config_path = f'/etc/pve/lxc/{vmid}.conf'
if not os.path.exists(config_path):
return hardware_info
with open(config_path, 'r') as f:
config_content = f.read()
# Verificar estado privilegiado
if 'unprivileged: 1' in config_content:
hardware_info['privileged'] = False
elif 'unprivileged: 0' in config_content:
hardware_info['privileged'] = True
else:
# Chequeos adicionales
if 'lxc.cap.drop:' in config_content and 'lxc.cap.drop: \n' in config_content:
hardware_info['privileged'] = True
elif 'lxc.cgroup2.devices.allow: a' in config_content:
hardware_info['privileged'] = True
# Detección de GPU Passthrough
gpu_types = []
if '/dev/dri' in config_content or 'renderD128' in config_content:
if 'Intel/AMD GPU' not in gpu_types: gpu_types.append('Intel/AMD GPU')
if 'nvidia' in config_content.lower():
if any(x in config_content for x in ['nvidia0', 'nvidiactl', 'nvidia-uvm']):
if 'NVIDIA GPU' not in gpu_types: gpu_types.append('NVIDIA GPU')
hardware_info['gpu_passthrough'] = gpu_types
# Detección de otros dispositivos
devices = []
if 'apex' in config_content.lower() or 'coral' in config_content.lower(): devices.append('Coral TPU')
if 'ttyUSB' in config_content or 'ttyACM' in config_content: devices.append('USB Serial Devices')
if '/dev/bus/usb' in config_content: devices.append('USB Passthrough')
if '/dev/fb0' in config_content: devices.append('Framebuffer')
if '/dev/snd' in config_content: devices.append('Audio Devices')
if '/dev/input' in config_content: devices.append('Input Devices')
if 'tty7' in config_content: devices.append('TTY Console')
hardware_info['devices'] = devices
except Exception:
pass
return hardware_info
def get_lxc_ip_from_lxc_info(vmid):
"""
Obtiene las IPs de un contenedor LXC usando 'lxc-info' (útil para DHCP).
"""
try:
result = subprocess.run(['lxc-info', '-n', str(vmid), '-iH'], capture_output=True, text=True, timeout=5)
if result.returncode == 0 and result.stdout.strip():
ips = result.stdout.strip().split()
real_ips = [ip for ip in ips if not ip.startswith('172.')] # Filtrar IPs internas de Docker usualmente
docker_ips = [ip for ip in ips if ip.startswith('172.')]
return {
'all_ips': ips,
'real_ips': real_ips,
'docker_ips': docker_ips,
'primary_ip': real_ips[0] if real_ips else (docker_ips[0] if docker_ips else ips[0])
}
except Exception:
pass
return None
def get_proxmox_vms():
"""
Obtiene la lista de todas las VMs y Contenedores del nodo local.
"""
local_node = get_proxmox_node_name()
vms = []
try:
result = subprocess.run(['pvesh', 'get', '/cluster/resources', '--type', 'vm', '--output-format', 'json'],
capture_output=True, text=True, timeout=10)
if result.returncode == 0:
for item in json.loads(result.stdout):
if item.get('node') == local_node:
vms.append({
'vmid': item.get('vmid'),
'name': item.get('name', f"VM-{item.get('vmid')}"),
'status': item.get('status', 'unknown'),
'type': 'lxc' if item.get('type') == 'lxc' else 'qemu',
'cpu': item.get('cpu', 0),
'mem': item.get('mem', 0),
'maxmem': item.get('maxmem', 0),
'disk': item.get('disk', 0),
'maxdisk': item.get('maxdisk', 0),
'uptime': item.get('uptime', 0),
'netin': item.get('netin', 0),
'netout': item.get('netout', 0),
'diskread': item.get('diskread', 0),
'diskwrite': item.get('diskwrite', 0)
})
except Exception:
pass
return vms
def get_vm_config(vmid):
"""
Obtiene la configuración detallada de una VM específica.
Incluye detección de hardware y SO para LXC.
"""
node = get_proxmox_node_name()
# Intentar obtener config como QEMU (VM)
res = subprocess.run(['pvesh', 'get', f'/nodes/{node}/qemu/{vmid}/config', '--output-format', 'json'],
capture_output=True, text=True, timeout=5)
vm_type = 'qemu'
if res.returncode != 0:
# Si falla, intentar como LXC (Contenedor)
res = subprocess.run(['pvesh', 'get', f'/nodes/{node}/lxc/{vmid}/config', '--output-format', 'json'],
capture_output=True, text=True, timeout=5)
vm_type = 'lxc'
if res.returncode == 0:
config = json.loads(res.stdout)
# Obtener estado
status_res = subprocess.run(['pvesh', 'get', f'/nodes/{node}/{vm_type}/{vmid}/status/current', '--output-format', 'json'],
capture_output=True, text=True, timeout=5)
status = 'stopped'
if status_res.returncode == 0:
status = json.loads(status_res.stdout).get('status', 'stopped')
response = {
'vmid': vmid,
'config': config,
'node': node,
'vm_type': vm_type,
'status': status
}
# Enriquecimiento específico para LXC
if vm_type == 'lxc':
response['hardware_info'] = parse_lxc_hardware_config(vmid, node)
if status == 'running':
ip_info = get_lxc_ip_from_lxc_info(vmid)
if ip_info: response['lxc_ip_info'] = ip_info
# Intentar leer info del SO
try:
os_res = subprocess.run(['pct', 'exec', str(vmid), '--', 'cat', '/etc/os-release'],
capture_output=True, text=True, timeout=5)
if os_res.returncode == 0:
os_info = {}
for line in os_res.stdout.split('\n'):
if line.startswith('ID='): os_info['id'] = line.split('=', 1)[1].strip('"\'')
elif line.startswith('PRETTY_NAME='): os_info['pretty_name'] = line.split('=', 1)[1].strip('"\'')
if os_info: response['os_info'] = os_info
except: pass
return response
return None
def control_vm(vmid, action):
"""
Ejecuta acciones de control: start, stop, shutdown, reboot.
"""
if action not in ['start', 'stop', 'shutdown', 'reboot']:
return {'success': False, 'message': 'Invalid action'}
info = get_vm_config(vmid)
if not info:
return {'success': False, 'message': 'VM/LXC not found'}
node = info['node']
vm_type = info['vm_type']
res = subprocess.run(['pvesh', 'create', f'/nodes/{node}/{vm_type}/{vmid}/status/{action}'],
capture_output=True, text=True, timeout=30)
if res.returncode == 0:
return {'success': True, 'vmid': vmid, 'action': action, 'message': f'Successfully executed {action}'}
else:
return {'success': False, 'error': res.stderr}
def update_vm_config(vmid, description):
"""Actualiza la descripción/notas de la VM."""
info = get_vm_config(vmid)
if not info: return {'success': False, 'message': 'VM not found'}
res = subprocess.run(['pvesh', 'set', f'/nodes/{info["node"]}/{info["vm_type"]}/{vmid}/config', '-description', description],
capture_output=True, text=True, timeout=30)
if res.returncode == 0:
return {'success': True, 'message': 'Configuration updated'}
return {'success': False, 'error': res.stderr}
def get_vm_metrics(vmid, timeframe='week'):
"""Obtiene métricas RRD históricas."""
info = get_vm_config(vmid)
if not info: return {'error': 'VM not found'}
res = subprocess.run(['pvesh', 'get', f'/nodes/{info["node"]}/{info["vm_type"]}/{vmid}/rrddata',
'--timeframe', timeframe, '--output-format', 'json'],
capture_output=True, text=True, timeout=10)
if res.returncode == 0:
return {'vmid': vmid, 'type': info['vm_type'], 'timeframe': timeframe, 'data': json.loads(res.stdout)}
return {'error': f'Failed to get metrics: {res.stderr}'}
def get_vm_logs(vmid):
"""Obtiene logs internos (consola) de la VM/LXC."""
info = get_vm_config(vmid)
if not info: return {'error': 'VM not found'}
res = subprocess.run(['pvesh', 'get', f'/nodes/{info["node"]}/{info["vm_type"]}/{vmid}/log', '--start', '0', '--limit', '1000'],
capture_output=True, text=True, timeout=10)
logs = []
if res.returncode == 0:
for i, line in enumerate(res.stdout.split('\n')):
if line.strip(): logs.append({'n': i, 't': line})
return {'vmid': vmid, 'name': info['config'].get('name'), 'logs': logs}
def get_task_log(upid):
"""Lee un archivo de log de tarea específico de Proxmox."""
try:
upid_clean = upid.rstrip(':')
parts = upid_clean.split(':')
if len(parts) < 5: return "Invalid UPID format"
starttime = parts[4]
index = starttime[-1].lower() # El directorio es el último carácter hexadecimal
# Buscar en las rutas posibles
paths = [
f"/var/log/pve/tasks/{index}/{upid_clean}",
f"/var/log/pve/tasks/{index.upper()}/{upid_clean}",
f"/var/log/pve/tasks/{index}/{upid_clean}:"
]
for p in paths:
if os.path.exists(p):
with open(p, 'r', encoding='utf-8', errors='ignore') as f:
return f.read()
return "Log file not found on disk"
except Exception as e:
return f"Error reading log: {str(e)}"