mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2026-02-19 08:56:23 +00:00
Update backend monitor
This commit is contained in:
337
AppImage/scripts/system_monitor.py
Normal file
337
AppImage/scripts/system_monitor.py
Normal file
@@ -0,0 +1,337 @@
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import socket
|
||||
import subprocess
|
||||
import json
|
||||
import psutil
|
||||
import platform
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
# Cache para evitar llamadas excesivas a la API de Proxmox
|
||||
_PROXMOX_NODE_CACHE = {"name": None, "timestamp": 0.0}
|
||||
_PROXMOX_NODE_CACHE_TTL = 300 # 5 minutos
|
||||
|
||||
def get_proxmox_node_name() -> str:
|
||||
"""Recupera el nombre real del nodo Proxmox con caché."""
|
||||
now = time.time()
|
||||
cached_name = _PROXMOX_NODE_CACHE.get("name")
|
||||
cached_ts = _PROXMOX_NODE_CACHE.get("timestamp", 0.0)
|
||||
|
||||
if cached_name and (now - float(cached_ts)) < _PROXMOX_NODE_CACHE_TTL:
|
||||
return str(cached_name)
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
["pvesh", "get", "/nodes", "--output-format", "json"],
|
||||
capture_output=True, text=True, timeout=5, check=False,
|
||||
)
|
||||
if result.returncode == 0 and result.stdout:
|
||||
nodes = json.loads(result.stdout)
|
||||
if isinstance(nodes, list) and nodes:
|
||||
node_name = nodes[0].get("node")
|
||||
if node_name:
|
||||
_PROXMOX_NODE_CACHE["name"] = node_name
|
||||
_PROXMOX_NODE_CACHE["timestamp"] = now
|
||||
return node_name
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
hostname = socket.gethostname()
|
||||
return hostname.split(".", 1)[0]
|
||||
|
||||
def get_uptime():
|
||||
"""Obtiene el tiempo de actividad del sistema."""
|
||||
try:
|
||||
boot_time = psutil.boot_time()
|
||||
uptime_seconds = time.time() - boot_time
|
||||
return str(timedelta(seconds=int(uptime_seconds)))
|
||||
except Exception:
|
||||
return "N/A"
|
||||
|
||||
def get_cpu_temperature():
|
||||
"""Obtiene la temperatura de la CPU usando psutil."""
|
||||
temp = 0
|
||||
try:
|
||||
if hasattr(psutil, "sensors_temperatures"):
|
||||
temps = psutil.sensors_temperatures()
|
||||
if temps:
|
||||
sensor_priority = ['coretemp', 'k10temp', 'cpu_thermal', 'zenpower', 'acpitz']
|
||||
for sensor_name in sensor_priority:
|
||||
if sensor_name in temps and temps[sensor_name]:
|
||||
temp = temps[sensor_name][0].current
|
||||
break
|
||||
if temp == 0:
|
||||
for name, entries in temps.items():
|
||||
if entries:
|
||||
temp = entries[0].current
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
return temp
|
||||
|
||||
def get_proxmox_version():
|
||||
"""Obtiene la versión de Proxmox."""
|
||||
try:
|
||||
result = subprocess.run(['pveversion'], capture_output=True, text=True, timeout=5)
|
||||
if result.returncode == 0:
|
||||
version_line = result.stdout.strip().split('\n')[0]
|
||||
if '/' in version_line:
|
||||
return version_line.split('/')[1]
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
def get_available_updates():
|
||||
"""Cuenta actualizaciones pendientes."""
|
||||
try:
|
||||
result = subprocess.run(['apt', 'list', '--upgradable'], capture_output=True, text=True, timeout=10)
|
||||
if result.returncode == 0:
|
||||
lines = result.stdout.strip().split('\n')
|
||||
return max(0, len(lines) - 1)
|
||||
except Exception:
|
||||
pass
|
||||
return 0
|
||||
|
||||
def get_system_info():
|
||||
"""Agrega toda la información del sistema."""
|
||||
cpu_usage = psutil.cpu_percent(interval=0.5)
|
||||
memory = psutil.virtual_memory()
|
||||
load_avg = os.getloadavg()
|
||||
|
||||
return {
|
||||
'cpu_usage': round(cpu_usage, 1),
|
||||
'memory_usage': round(memory.percent, 1),
|
||||
'memory_total': round(memory.total / (1024 ** 3), 1),
|
||||
'memory_used': round(memory.used / (1024 ** 3), 1),
|
||||
'temperature': get_cpu_temperature(),
|
||||
'uptime': get_uptime(),
|
||||
'load_average': list(load_avg),
|
||||
'hostname': socket.gethostname(),
|
||||
'proxmox_node': get_proxmox_node_name(),
|
||||
'node_id': socket.gethostname(),
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'cpu_cores': psutil.cpu_count(logical=False),
|
||||
'cpu_threads': psutil.cpu_count(logical=True),
|
||||
'proxmox_version': get_proxmox_version(),
|
||||
'kernel_version': platform.release(),
|
||||
'available_updates': get_available_updates()
|
||||
}
|
||||
|
||||
def get_node_metrics(timeframe='week'):
|
||||
"""Obtiene métricas RRD del nodo."""
|
||||
local_node = get_proxmox_node_name()
|
||||
zfs_arc_size = 0
|
||||
|
||||
try:
|
||||
with open('/proc/spl/kstat/zfs/arcstats', 'r') as f:
|
||||
for line in f:
|
||||
if line.startswith('size'):
|
||||
parts = line.split()
|
||||
if len(parts) >= 3:
|
||||
zfs_arc_size = int(parts[2])
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['pvesh', 'get', f'/nodes/{local_node}/rrddata', '--timeframe', timeframe, '--output-format', 'json'],
|
||||
capture_output=True, text=True, timeout=10
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
rrd_data = json.loads(result.stdout)
|
||||
if zfs_arc_size > 0:
|
||||
for item in rrd_data:
|
||||
if 'zfsarc' not in item or item.get('zfsarc', 0) == 0:
|
||||
item['zfsarc'] = zfs_arc_size
|
||||
return {'node': local_node, 'timeframe': timeframe, 'data': rrd_data}
|
||||
else:
|
||||
return {'error': f"Failed to get RRD data: {result.stderr}"}
|
||||
except Exception as e:
|
||||
return {'error': str(e)}
|
||||
|
||||
def get_logs(limit='200', priority=None, service=None, since_days=None):
|
||||
"""Obtiene logs del sistema (journalctl)."""
|
||||
cmd = ['journalctl', '--output', 'json', '--no-pager']
|
||||
|
||||
if since_days:
|
||||
try:
|
||||
days = int(since_days)
|
||||
cmd.extend(['--since', f'{days} days ago'])
|
||||
except ValueError:
|
||||
cmd.extend(['-n', limit])
|
||||
else:
|
||||
cmd.extend(['-n', limit])
|
||||
|
||||
if priority:
|
||||
cmd.extend(['-p', priority])
|
||||
if service:
|
||||
cmd.extend(['-u', service])
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
||||
if result.returncode == 0:
|
||||
logs = []
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line:
|
||||
try:
|
||||
entry = json.loads(line)
|
||||
ts_us = int(entry.get('__REALTIME_TIMESTAMP', '0'))
|
||||
timestamp = datetime.fromtimestamp(ts_us / 1000000).strftime('%Y-%m-%d %H:%M:%S')
|
||||
priority_map = {'0': 'emerg', '1': 'alert', '2': 'crit', '3': 'err', '4': 'warning', '5': 'notice', '6': 'info', '7': 'debug'}
|
||||
p_num = str(entry.get('PRIORITY', '6'))
|
||||
|
||||
logs.append({
|
||||
'timestamp': timestamp,
|
||||
'level': priority_map.get(p_num, 'info'),
|
||||
'service': entry.get('_SYSTEMD_UNIT', entry.get('SYSLOG_IDENTIFIER', 'system')),
|
||||
'message': entry.get('MESSAGE', ''),
|
||||
'source': 'journal',
|
||||
'pid': entry.get('_PID', ''),
|
||||
'hostname': entry.get('_HOSTNAME', '')
|
||||
})
|
||||
except Exception:
|
||||
continue
|
||||
return {'logs': logs, 'total': len(logs)}
|
||||
except Exception as e:
|
||||
return {'logs': [], 'total': 0, 'error': str(e)}
|
||||
return {'logs': [], 'total': 0, 'error': 'journalctl failed'}
|
||||
|
||||
def generate_log_file(log_type, hours, level, service, since_days):
|
||||
"""Genera archivo de logs temporal."""
|
||||
import tempfile
|
||||
cmd = ['journalctl', '--no-pager']
|
||||
if since_days: cmd.extend(['--since', f'{since_days} days ago'])
|
||||
else: cmd.extend(['--since', f'{hours} hours ago'])
|
||||
|
||||
if log_type == 'kernel': cmd.append('-k')
|
||||
elif log_type == 'auth': cmd.extend(['-u', 'ssh', '-u', 'sshd'])
|
||||
|
||||
if level != 'all': cmd.extend(['-p', level])
|
||||
if service != 'all': cmd.extend(['-u', service])
|
||||
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
||||
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.log') as f:
|
||||
f.write(f"ProxMenux Log ({log_type}) - Generated: {datetime.now().isoformat()}\n")
|
||||
f.write("=" * 80 + "\n\n")
|
||||
f.write(result.stdout if result.returncode == 0 else "Error retrieving logs")
|
||||
return f.name
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def get_events(limit='50'):
|
||||
"""Obtiene eventos de Proxmox."""
|
||||
events = []
|
||||
try:
|
||||
result = subprocess.run(['pvesh', 'get', '/cluster/tasks', '--output-format', 'json'], capture_output=True, text=True, timeout=10)
|
||||
if result.returncode == 0:
|
||||
tasks = json.loads(result.stdout)
|
||||
for task in tasks[:int(limit)]:
|
||||
starttime = task.get('starttime', 0)
|
||||
endtime = task.get('endtime', 0)
|
||||
duration = ''
|
||||
if endtime and starttime:
|
||||
d_sec = endtime - starttime
|
||||
if d_sec < 60: duration = f"{d_sec}s"
|
||||
elif d_sec < 3600: duration = f"{d_sec // 60}m {d_sec % 60}s"
|
||||
else: duration = f"{d_sec // 3600}h {(d_sec % 3600) // 60}m"
|
||||
|
||||
status = task.get('status', 'unknown')
|
||||
level = 'info'
|
||||
if status == 'OK': level = 'info'
|
||||
elif status in ['stopped', 'error']: level = 'error'
|
||||
elif status == 'running': level = 'warning'
|
||||
|
||||
events.append({
|
||||
'upid': task.get('upid', ''),
|
||||
'type': task.get('type', 'unknown'),
|
||||
'status': status,
|
||||
'level': level,
|
||||
'user': task.get('user', 'unknown'),
|
||||
'node': task.get('node', 'unknown'),
|
||||
'vmid': str(task.get('id', '')) if task.get('id') else '',
|
||||
'starttime': datetime.fromtimestamp(starttime).strftime('%Y-%m-%d %H:%M:%S') if starttime else '',
|
||||
'endtime': datetime.fromtimestamp(endtime).strftime('%Y-%m-%d %H:%M:%S') if endtime else 'Running',
|
||||
'duration': duration
|
||||
})
|
||||
except Exception:
|
||||
pass
|
||||
return {'events': events, 'total': len(events)}
|
||||
|
||||
def get_notifications():
|
||||
"""Obtiene notificaciones de Proxmox."""
|
||||
notifications = []
|
||||
try:
|
||||
cmd = [
|
||||
'journalctl', '-u', 'pve-ha-lrm', '-u', 'pve-ha-crm', '-u', 'pvedaemon',
|
||||
'-u', 'pveproxy', '-u', 'pvestatd', '--grep', 'notification|email|webhook|alert|notify',
|
||||
'-n', '100', '--output', 'json', '--no-pager'
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
||||
if result.returncode == 0:
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line:
|
||||
try:
|
||||
entry = json.loads(line)
|
||||
ts = int(entry.get('__REALTIME_TIMESTAMP', '0'))
|
||||
timestamp = datetime.fromtimestamp(ts / 1000000).strftime('%Y-%m-%d %H:%M:%S')
|
||||
msg = entry.get('MESSAGE', '')
|
||||
ntype = 'info'
|
||||
if 'email' in msg.lower(): ntype = 'email'
|
||||
elif 'webhook' in msg.lower(): ntype = 'webhook'
|
||||
elif 'error' in msg.lower() or 'fail' in msg.lower(): ntype = 'error'
|
||||
elif 'alert' in msg.lower() or 'warning' in msg.lower(): ntype = 'alert'
|
||||
|
||||
notifications.append({
|
||||
'timestamp': timestamp,
|
||||
'type': ntype,
|
||||
'service': entry.get('_SYSTEMD_UNIT', 'proxmox'),
|
||||
'message': msg,
|
||||
'source': 'journal'
|
||||
})
|
||||
except: continue
|
||||
|
||||
# Backups en tareas
|
||||
task_res = subprocess.run(['pvesh', 'get', '/cluster/tasks', '--output-format', 'json'], capture_output=True, text=True, timeout=5)
|
||||
if task_res.returncode == 0:
|
||||
tasks = json.loads(task_res.stdout)
|
||||
for task in tasks[:50]:
|
||||
if task.get('type') in ['vzdump', 'backup']:
|
||||
status = task.get('status', 'unknown')
|
||||
ntype = 'success' if status == 'OK' else 'error' if status == 'stopped' else 'info'
|
||||
notifications.append({
|
||||
'timestamp': datetime.fromtimestamp(task.get('starttime', 0)).strftime('%Y-%m-%d %H:%M:%S'),
|
||||
'type': ntype,
|
||||
'service': 'backup',
|
||||
'message': f"Backup task {task.get('upid', 'unknown')}: {status}",
|
||||
'source': 'task-log'
|
||||
})
|
||||
except: pass
|
||||
|
||||
notifications.sort(key=lambda x: x['timestamp'], reverse=True)
|
||||
return {'notifications': notifications[:100], 'total': len(notifications)}
|
||||
|
||||
def get_prometheus_metrics():
|
||||
"""Genera métricas Prometheus."""
|
||||
node = socket.gethostname()
|
||||
timestamp = int(datetime.now().timestamp() * 1000)
|
||||
lines = []
|
||||
|
||||
cpu = psutil.cpu_percent(interval=0.5)
|
||||
mem = psutil.virtual_memory()
|
||||
load = os.getloadavg()
|
||||
uptime = time.time() - psutil.boot_time()
|
||||
|
||||
lines.append(f'proxmox_cpu_usage{{node="{node}"}} {cpu} {timestamp}')
|
||||
lines.append(f'proxmox_memory_usage_percent{{node="{node}"}} {mem.percent} {timestamp}')
|
||||
lines.append(f'proxmox_load_average{{node="{node}",period="1m"}} {load[0]} {timestamp}')
|
||||
lines.append(f'proxmox_uptime_seconds{{node="{node}"}} {uptime} {timestamp}')
|
||||
|
||||
temp = get_cpu_temperature()
|
||||
if temp:
|
||||
lines.append(f'proxmox_cpu_temperature_celsius{{node="{node}"}} {temp} {timestamp}')
|
||||
|
||||
return '\n'.join(lines) + '\n', {'Content-Type': 'text/plain; version=0.0.4; charset=utf-8'}
|
||||
Reference in New Issue
Block a user