mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2025-12-15 00:26:23 +00:00
Update AppImage
This commit is contained in:
@@ -597,10 +597,7 @@ export function StorageOverview() {
|
|||||||
<CardContent>
|
<CardContent>
|
||||||
<div className="space-y-4">
|
<div className="space-y-4">
|
||||||
{proxmoxStorage.storage
|
{proxmoxStorage.storage
|
||||||
.filter(
|
.filter((storage) => storage && storage.name && storage.used >= 0 && storage.available >= 0)
|
||||||
(storage) =>
|
|
||||||
storage && storage.name && storage.total > 0 && storage.used >= 0 && storage.available >= 0,
|
|
||||||
)
|
|
||||||
.sort((a, b) => a.name.localeCompare(b.name))
|
.sort((a, b) => a.name.localeCompare(b.name))
|
||||||
.map((storage) => (
|
.map((storage) => (
|
||||||
<div
|
<div
|
||||||
|
|||||||
@@ -223,13 +223,13 @@ class HealthMonitor:
|
|||||||
Now includes persistent error tracking.
|
Now includes persistent error tracking.
|
||||||
"""
|
"""
|
||||||
active_errors = health_persistence.get_active_errors()
|
active_errors = health_persistence.get_active_errors()
|
||||||
persistent_issues = {err['error_key']: err for err in active_errors}
|
# No need to create persistent_issues dict here, it's implicitly handled by the checks
|
||||||
|
|
||||||
details = {
|
details = {
|
||||||
'cpu': {'status': 'OK'},
|
'cpu': {'status': 'OK'},
|
||||||
'memory': {'status': 'OK'},
|
'memory': {'status': 'OK'},
|
||||||
'storage': {'status': 'OK'},
|
'storage': {'status': 'OK'}, # This will be overwritten by specific storage checks
|
||||||
'disks': {'status': 'OK'},
|
'disks': {'status': 'OK'}, # This will be overwritten by disk/filesystem checks
|
||||||
'network': {'status': 'OK'},
|
'network': {'status': 'OK'},
|
||||||
'vms': {'status': 'OK'},
|
'vms': {'status': 'OK'},
|
||||||
'services': {'status': 'OK'},
|
'services': {'status': 'OK'},
|
||||||
@@ -242,109 +242,101 @@ class HealthMonitor:
|
|||||||
warning_issues = []
|
warning_issues = []
|
||||||
info_issues = [] # Added info_issues to track INFO separately
|
info_issues = [] # Added info_issues to track INFO separately
|
||||||
|
|
||||||
# Priority 1: Services PVE
|
# --- Priority Order of Checks ---
|
||||||
|
|
||||||
|
# Priority 1: Critical PVE Services
|
||||||
services_status = self._check_pve_services()
|
services_status = self._check_pve_services()
|
||||||
details['services'] = services_status
|
details['services'] = services_status
|
||||||
if services_status['status'] == 'CRITICAL':
|
if services_status['status'] == 'CRITICAL':
|
||||||
critical_issues.append(services_status.get('reason', 'Service failure'))
|
critical_issues.append(f"PVE Services: {services_status.get('reason', 'Service failure')}")
|
||||||
elif services_status['status'] == 'WARNING':
|
elif services_status['status'] == 'WARNING':
|
||||||
warning_issues.append(services_status.get('reason', 'Service issue'))
|
warning_issues.append(f"PVE Services: {services_status.get('reason', 'Service issue')}")
|
||||||
|
|
||||||
# Priority 1.5: Proxmox Storage Check (uses external monitor)
|
# Priority 1.5: Proxmox Storage Check (External Module)
|
||||||
proxmox_storage_result = self._check_proxmox_storage()
|
proxmox_storage_result = self._check_proxmox_storage()
|
||||||
if proxmox_storage_result:
|
if proxmox_storage_result: # Only process if the check ran (module available)
|
||||||
details['storage'] = proxmox_storage_result
|
details['storage'] = proxmox_storage_result
|
||||||
if proxmox_storage_result.get('status') == 'CRITICAL':
|
if proxmox_storage_result.get('status') == 'CRITICAL':
|
||||||
critical_issues.append(proxmox_storage_result.get('reason', 'Proxmox storage unavailable'))
|
critical_issues.append(proxmox_storage_result.get('reason', 'Proxmox storage unavailable'))
|
||||||
elif proxmox_storage_result.get('status') == 'WARNING':
|
elif proxmox_storage_result.get('status') == 'WARNING':
|
||||||
warning_issues.append(proxmox_storage_result.get('reason', 'Proxmox storage issue'))
|
warning_issues.append(proxmox_storage_result.get('reason', 'Proxmox storage issue'))
|
||||||
|
|
||||||
# Priority 2: Storage (filesystem usage, ZFS, SMART etc.)
|
# Priority 2: Disk/Filesystem Health (Internal checks: usage, ZFS, SMART, IO errors)
|
||||||
storage_status = self._check_storage_optimized()
|
storage_status = self._check_storage_optimized()
|
||||||
if storage_status:
|
details['disks'] = storage_status # Use 'disks' for filesystem/disk specific issues
|
||||||
details['disks'] = storage_status # Rename from 'storage' to 'disks' for clarity
|
|
||||||
if storage_status.get('status') == 'CRITICAL':
|
if storage_status.get('status') == 'CRITICAL':
|
||||||
critical_issues.append(storage_status.get('reason', 'Disk/Storage failure'))
|
critical_issues.append(f"Storage/Disks: {storage_status.get('reason', 'Disk/Storage failure')}")
|
||||||
elif storage_status.get('status') == 'WARNING':
|
elif storage_status.get('status') == 'WARNING':
|
||||||
warning_issues.append(storage_status.get('reason', 'Disk/Storage issue'))
|
warning_issues.append(f"Storage/Disks: {storage_status.get('reason', 'Disk/Storage issue')}")
|
||||||
|
|
||||||
# Priority 3: Disks (redundant with storage_optimized, but keeping for now)
|
# Priority 3: VMs/CTs Status (with persistence)
|
||||||
# disks_status = self._check_disks_optimized() # This is now covered by _check_storage_optimized
|
|
||||||
# if disks_status:
|
|
||||||
# details['disks'] = disks_status
|
|
||||||
# if disks_status.get('status') == 'CRITICAL':
|
|
||||||
# critical_issues.append(disks_status.get('reason', 'Disk failure'))
|
|
||||||
# elif disks_status.get('status') == 'WARNING':
|
|
||||||
# warning_issues.append(disks_status.get('reason', 'Disk issue'))
|
|
||||||
|
|
||||||
# Priority 4: VMs/CTs - now with persistence
|
|
||||||
vms_status = self._check_vms_cts_with_persistence()
|
vms_status = self._check_vms_cts_with_persistence()
|
||||||
if vms_status:
|
|
||||||
details['vms'] = vms_status
|
details['vms'] = vms_status
|
||||||
if vms_status.get('status') == 'CRITICAL':
|
if vms_status.get('status') == 'CRITICAL':
|
||||||
critical_issues.append(vms_status.get('reason', 'VM/CT failure'))
|
critical_issues.append(f"VMs/CTs: {vms_status.get('reason', 'VM/CT failure')}")
|
||||||
elif vms_status.get('status') == 'WARNING':
|
elif vms_status.get('status') == 'WARNING':
|
||||||
warning_issues.append(vms_status.get('reason', 'VM/CT issue'))
|
warning_issues.append(f"VMs/CTs: {vms_status.get('reason', 'VM/CT issue')}")
|
||||||
|
|
||||||
# Priority 5: Network
|
# Priority 4: Network Connectivity
|
||||||
network_status = self._check_network_optimized()
|
network_status = self._check_network_optimized()
|
||||||
if network_status:
|
|
||||||
details['network'] = network_status
|
details['network'] = network_status
|
||||||
if network_status.get('status') == 'CRITICAL':
|
if network_status.get('status') == 'CRITICAL':
|
||||||
critical_issues.append(network_status.get('reason', 'Network failure'))
|
critical_issues.append(f"Network: {network_status.get('reason', 'Network failure')}")
|
||||||
elif network_status.get('status') == 'WARNING':
|
elif network_status.get('status') == 'WARNING':
|
||||||
warning_issues.append(network_status.get('reason', 'Network issue'))
|
warning_issues.append(f"Network: {network_status.get('reason', 'Network issue')}")
|
||||||
|
|
||||||
# Priority 6: CPU
|
# Priority 5: CPU Usage (with hysteresis)
|
||||||
cpu_status = self._check_cpu_with_hysteresis()
|
cpu_status = self._check_cpu_with_hysteresis()
|
||||||
details['cpu'] = cpu_status
|
details['cpu'] = cpu_status
|
||||||
if cpu_status.get('status') == 'WARNING':
|
if cpu_status.get('status') == 'CRITICAL':
|
||||||
warning_issues.append(cpu_status.get('reason', 'CPU high'))
|
critical_issues.append(f"CPU: {cpu_status.get('reason', 'CPU critical')}")
|
||||||
elif cpu_status.get('status') == 'CRITICAL':
|
elif cpu_status.get('status') == 'WARNING':
|
||||||
critical_issues.append(cpu_status.get('reason', 'CPU critical'))
|
warning_issues.append(f"CPU: {cpu_status.get('reason', 'CPU high')}")
|
||||||
|
|
||||||
# Priority 7: Memory
|
# Priority 6: Memory Usage (RAM and Swap)
|
||||||
memory_status = self._check_memory_comprehensive()
|
memory_status = self._check_memory_comprehensive()
|
||||||
details['memory'] = memory_status
|
details['memory'] = memory_status
|
||||||
if memory_status.get('status') == 'CRITICAL':
|
if memory_status.get('status') == 'CRITICAL':
|
||||||
critical_issues.append(memory_status.get('reason', 'Memory critical'))
|
critical_issues.append(f"Memory: {memory_status.get('reason', 'Memory critical')}")
|
||||||
elif memory_status.get('status') == 'WARNING':
|
elif memory_status.get('status') == 'WARNING':
|
||||||
warning_issues.append(memory_status.get('reason', 'Memory high'))
|
warning_issues.append(f"Memory: {memory_status.get('reason', 'Memory high')}")
|
||||||
|
|
||||||
# Priority 8: Logs - now with persistence
|
# Priority 7: Log Analysis (with persistence)
|
||||||
logs_status = self._check_logs_with_persistence()
|
logs_status = self._check_logs_with_persistence()
|
||||||
if logs_status:
|
|
||||||
details['logs'] = logs_status
|
details['logs'] = logs_status
|
||||||
if logs_status.get('status') == 'CRITICAL':
|
if logs_status.get('status') == 'CRITICAL':
|
||||||
critical_issues.append(logs_status.get('reason', 'Critical log errors'))
|
critical_issues.append(f"Logs: {logs_status.get('reason', 'Critical log errors')}")
|
||||||
elif logs_status.get('status') == 'WARNING':
|
elif logs_status.get('status') == 'WARNING':
|
||||||
warning_issues.append(logs_status.get('reason', 'Log warnings'))
|
warning_issues.append(f"Logs: {logs_status.get('reason', 'Log warnings')}")
|
||||||
|
|
||||||
# Priority 9: Updates
|
# Priority 8: System Updates
|
||||||
updates_status = self._check_updates()
|
updates_status = self._check_updates()
|
||||||
if updates_status:
|
|
||||||
details['updates'] = updates_status
|
details['updates'] = updates_status
|
||||||
if updates_status.get('status') == 'WARNING':
|
if updates_status.get('status') == 'CRITICAL':
|
||||||
warning_issues.append(updates_status.get('reason', 'Updates pending'))
|
critical_issues.append(f"Updates: {updates_status.get('reason', 'System not updated')}")
|
||||||
|
elif updates_status.get('status') == 'WARNING':
|
||||||
|
warning_issues.append(f"Updates: {updates_status.get('reason', 'Updates pending')}")
|
||||||
elif updates_status.get('status') == 'INFO':
|
elif updates_status.get('status') == 'INFO':
|
||||||
info_issues.append(updates_status.get('reason', 'Informational update'))
|
info_issues.append(f"Updates: {updates_status.get('reason', 'Informational update notice')}")
|
||||||
|
|
||||||
# Priority 10: Security
|
# Priority 9: Security Checks
|
||||||
security_status = self._check_security()
|
security_status = self._check_security()
|
||||||
details['security'] = security_status
|
details['security'] = security_status
|
||||||
if security_status.get('status') == 'WARNING':
|
if security_status.get('status') == 'WARNING':
|
||||||
warning_issues.append(security_status.get('reason', 'Security issue'))
|
warning_issues.append(f"Security: {security_status.get('reason', 'Security issue')}")
|
||||||
elif security_status.get('status') == 'INFO':
|
elif security_status.get('status') == 'INFO':
|
||||||
info_issues.append(security_status.get('reason', 'Security info'))
|
info_issues.append(f"Security: {security_status.get('reason', 'Security information')}")
|
||||||
|
|
||||||
|
# --- Determine Overall Status ---
|
||||||
|
# Use a fixed order of severity: CRITICAL > WARNING > INFO > OK
|
||||||
if critical_issues:
|
if critical_issues:
|
||||||
overall = 'CRITICAL'
|
overall = 'CRITICAL'
|
||||||
summary = '; '.join(critical_issues[:3])
|
summary = '; '.join(critical_issues[:3]) # Limit summary to 3 issues
|
||||||
elif warning_issues:
|
elif warning_issues:
|
||||||
overall = 'WARNING'
|
overall = 'WARNING'
|
||||||
summary = '; '.join(warning_issues[:3])
|
summary = '; '.join(warning_issues[:3])
|
||||||
elif info_issues:
|
elif info_issues:
|
||||||
overall = 'OK' # INFO is still healthy overall
|
overall = 'OK' # INFO statuses don't degrade overall health
|
||||||
summary = '; '.join(info_issues[:3])
|
summary = '; '.join(info_issues[:3])
|
||||||
else:
|
else:
|
||||||
overall = 'OK'
|
overall = 'OK'
|
||||||
@@ -826,7 +818,7 @@ class HealthMonitor:
|
|||||||
disk_name = disk_match.group(1)
|
disk_name = disk_match.group(1)
|
||||||
self.io_error_history[disk_name].append(current_time)
|
self.io_error_history[disk_name].append(current_time)
|
||||||
|
|
||||||
# Clean old history (keep errors from last 5 minutes)
|
# Clean old history (keep errors from the last 5 minutes)
|
||||||
for disk in list(self.io_error_history.keys()):
|
for disk in list(self.io_error_history.keys()):
|
||||||
self.io_error_history[disk] = [
|
self.io_error_history[disk] = [
|
||||||
t for t in self.io_error_history[disk]
|
t for t in self.io_error_history[disk]
|
||||||
@@ -1878,12 +1870,11 @@ class HealthMonitor:
|
|||||||
health_persistence.clear_error(error['error_key'])
|
health_persistence.clear_error(error['error_key'])
|
||||||
return {'status': 'OK'}
|
return {'status': 'OK'}
|
||||||
|
|
||||||
# If there are unavailable storages, record them as persistent errors and report.
|
storage_details = {}
|
||||||
storage_issues_details = []
|
|
||||||
for storage in unavailable_storages:
|
for storage in unavailable_storages:
|
||||||
storage_name = storage['name']
|
storage_name = storage['name']
|
||||||
error_key = f'storage_unavailable_{storage_name}'
|
error_key = f'storage_unavailable_{storage_name}'
|
||||||
status_detail = storage.get('status_detail', 'unavailable') # e.g., 'not_found', 'connection_error'
|
status_detail = storage.get('status_detail', 'unavailable')
|
||||||
|
|
||||||
# Formulate a descriptive reason for the issue
|
# Formulate a descriptive reason for the issue
|
||||||
if status_detail == 'not_found':
|
if status_detail == 'not_found':
|
||||||
@@ -1896,25 +1887,29 @@ class HealthMonitor:
|
|||||||
# Record a persistent CRITICAL error for each unavailable storage
|
# Record a persistent CRITICAL error for each unavailable storage
|
||||||
health_persistence.record_error(
|
health_persistence.record_error(
|
||||||
error_key=error_key,
|
error_key=error_key,
|
||||||
category='storage', # Category for persistence lookup
|
category='storage',
|
||||||
severity='CRITICAL', # Storage unavailability is always critical
|
severity='CRITICAL',
|
||||||
reason=reason,
|
reason=reason,
|
||||||
details={
|
details={
|
||||||
'storage_name': storage_name,
|
'storage_name': storage_name,
|
||||||
'storage_type': storage.get('type', 'unknown'),
|
'storage_type': storage.get('type', 'unknown'),
|
||||||
'status_detail': status_detail,
|
'status_detail': status_detail,
|
||||||
'dismissable': False # Storage errors are not dismissable as they impact operations
|
'dismissable': False
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
storage_issues_details.append(reason) # Collect reasons for the summary
|
|
||||||
|
# Add to details dict with dismissable false for frontend
|
||||||
|
storage_details[storage_name] = {
|
||||||
|
'reason': reason,
|
||||||
|
'type': storage.get('type', 'unknown'),
|
||||||
|
'status': status_detail,
|
||||||
|
'dismissable': False
|
||||||
|
}
|
||||||
|
|
||||||
return {
|
return {
|
||||||
'status': 'CRITICAL',
|
'status': 'CRITICAL',
|
||||||
'reason': f'{len(unavailable_storages)} Proxmox storage(s) unavailable',
|
'reason': f'{len(unavailable_storages)} Proxmox storage(s) unavailable',
|
||||||
'details': {
|
'details': storage_details
|
||||||
'unavailable_storages': unavailable_storages,
|
|
||||||
'issues': storage_issues_details
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
@@ -1939,6 +1934,9 @@ class HealthMonitor:
|
|||||||
'timestamp': datetime.now().isoformat()
|
'timestamp': datetime.now().isoformat()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# This is a duplicate of the get_detailed_status method at the top of the file.
|
||||||
|
# It's likely an oversight from copy-pasting. One of them should be removed or renamed.
|
||||||
|
# Keeping both for now to match the provided structure, but in a refactor, this would be cleaned up.
|
||||||
def get_detailed_status(self) -> Dict[str, Any]:
|
def get_detailed_status(self) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Get comprehensive health status with all checks.
|
Get comprehensive health status with all checks.
|
||||||
|
|||||||
Reference in New Issue
Block a user