mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2025-11-17 19:16:25 +00:00
Update AppImage
This commit is contained in:
@@ -3,12 +3,28 @@
|
||||
import { useState, useEffect } from "react"
|
||||
import { Dialog, DialogContent, DialogDescription, DialogHeader, DialogTitle } from "@/components/ui/dialog"
|
||||
import { Badge } from "@/components/ui/badge"
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"
|
||||
import { Loader2, CheckCircle2, AlertTriangle, XCircle, Activity } from "lucide-react"
|
||||
import {
|
||||
Loader2,
|
||||
CheckCircle2,
|
||||
AlertTriangle,
|
||||
XCircle,
|
||||
Activity,
|
||||
Cpu,
|
||||
MemoryStick,
|
||||
HardDrive,
|
||||
Disc,
|
||||
Network,
|
||||
Box,
|
||||
Settings,
|
||||
FileText,
|
||||
RefreshCw,
|
||||
Shield,
|
||||
} from "lucide-react"
|
||||
|
||||
interface HealthDetail {
|
||||
interface CategoryCheck {
|
||||
status: string
|
||||
reason?: string
|
||||
details?: any
|
||||
[key: string]: any
|
||||
}
|
||||
|
||||
@@ -16,7 +32,16 @@ interface HealthDetails {
|
||||
overall: string
|
||||
summary: string
|
||||
details: {
|
||||
[category: string]: HealthDetail | { [key: string]: HealthDetail }
|
||||
cpu: CategoryCheck
|
||||
memory: CategoryCheck
|
||||
storage: CategoryCheck
|
||||
disks: CategoryCheck
|
||||
network: CategoryCheck
|
||||
vms: CategoryCheck
|
||||
services: CategoryCheck
|
||||
logs: CategoryCheck
|
||||
updates: CategoryCheck
|
||||
security: CategoryCheck
|
||||
}
|
||||
timestamp: string
|
||||
}
|
||||
@@ -27,6 +52,19 @@ interface HealthStatusModalProps {
|
||||
getApiUrl: (path: string) => string
|
||||
}
|
||||
|
||||
const CATEGORIES = [
|
||||
{ key: "cpu", label: "CPU Usage & Temperature", Icon: Cpu },
|
||||
{ key: "memory", label: "Memory & Swap", Icon: MemoryStick },
|
||||
{ key: "storage", label: "Storage Mounts & Space", Icon: HardDrive },
|
||||
{ key: "disks", label: "Disk I/O & Errors", Icon: Disc },
|
||||
{ key: "network", label: "Network Interfaces", Icon: Network },
|
||||
{ key: "vms", label: "VMs & Containers", Icon: Box },
|
||||
{ key: "services", label: "PVE Services", Icon: Settings },
|
||||
{ key: "logs", label: "System Logs", Icon: FileText },
|
||||
{ key: "updates", label: "System Updates", Icon: RefreshCw },
|
||||
{ key: "security", label: "Security & Certificates", Icon: Shield },
|
||||
]
|
||||
|
||||
export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatusModalProps) {
|
||||
const [loading, setLoading] = useState(true)
|
||||
const [healthData, setHealthData] = useState<HealthDetails | null>(null)
|
||||
@@ -58,74 +96,6 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
|
||||
}
|
||||
}
|
||||
|
||||
const getHealthStats = () => {
|
||||
if (!healthData?.details) {
|
||||
return { total: 0, healthy: 0, warnings: 0, critical: 0 }
|
||||
}
|
||||
|
||||
let healthy = 0
|
||||
let warnings = 0
|
||||
let critical = 0
|
||||
let total = 0
|
||||
|
||||
const countStatus = (detail: any) => {
|
||||
if (detail && typeof detail === "object" && detail.status) {
|
||||
total++
|
||||
const status = detail.status.toUpperCase()
|
||||
if (status === "OK") healthy++
|
||||
else if (status === "WARNING") warnings++
|
||||
else if (status === "CRITICAL") critical++
|
||||
}
|
||||
}
|
||||
|
||||
Object.values(healthData.details).forEach((categoryData) => {
|
||||
if (categoryData && typeof categoryData === "object") {
|
||||
if ("status" in categoryData) {
|
||||
countStatus(categoryData)
|
||||
} else {
|
||||
Object.values(categoryData).forEach(countStatus)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
return { total, healthy, warnings, critical }
|
||||
}
|
||||
|
||||
const getGroupedChecks = () => {
|
||||
if (!healthData?.details) return {}
|
||||
|
||||
const grouped: { [key: string]: Array<{ name: string; status: string; reason?: string; details?: any }> } = {}
|
||||
|
||||
Object.entries(healthData.details).forEach(([category, categoryData]) => {
|
||||
if (!categoryData || typeof categoryData !== "object") return
|
||||
|
||||
const categoryName = category.charAt(0).toUpperCase() + category.slice(1)
|
||||
grouped[categoryName] = []
|
||||
|
||||
if ("status" in categoryData) {
|
||||
grouped[categoryName].push({
|
||||
name: categoryName,
|
||||
status: categoryData.status,
|
||||
reason: categoryData.reason,
|
||||
details: categoryData,
|
||||
})
|
||||
} else {
|
||||
Object.entries(categoryData).forEach(([subKey, subData]: [string, any]) => {
|
||||
if (subData && typeof subData === "object" && "status" in subData) {
|
||||
grouped[categoryName].push({
|
||||
name: subKey,
|
||||
status: subData.status,
|
||||
reason: subData.reason,
|
||||
details: subData,
|
||||
})
|
||||
}
|
||||
})
|
||||
}
|
||||
})
|
||||
|
||||
return grouped
|
||||
}
|
||||
|
||||
const getStatusIcon = (status: string) => {
|
||||
const statusUpper = status?.toUpperCase()
|
||||
switch (statusUpper) {
|
||||
@@ -144,28 +114,52 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
|
||||
const statusUpper = status?.toUpperCase()
|
||||
switch (statusUpper) {
|
||||
case "OK":
|
||||
return <Badge className="bg-green-500">Healthy</Badge>
|
||||
return <Badge className="bg-green-500 text-white">Healthy</Badge>
|
||||
case "WARNING":
|
||||
return <Badge className="bg-yellow-500">Warning</Badge>
|
||||
return <Badge className="bg-yellow-500 text-white">Warning</Badge>
|
||||
case "CRITICAL":
|
||||
return <Badge className="bg-red-500">Critical</Badge>
|
||||
return <Badge className="bg-red-500 text-white">Critical</Badge>
|
||||
default:
|
||||
return <Badge>Unknown</Badge>
|
||||
}
|
||||
}
|
||||
|
||||
const getHealthStats = () => {
|
||||
if (!healthData?.details) {
|
||||
return { total: 0, healthy: 0, warnings: 0, critical: 0 }
|
||||
}
|
||||
|
||||
let healthy = 0
|
||||
let warnings = 0
|
||||
let critical = 0
|
||||
|
||||
CATEGORIES.forEach(({ key }) => {
|
||||
const categoryData = healthData.details[key as keyof typeof healthData.details]
|
||||
if (categoryData) {
|
||||
const status = categoryData.status?.toUpperCase()
|
||||
if (status === "OK") healthy++
|
||||
else if (status === "WARNING") warnings++
|
||||
else if (status === "CRITICAL") critical++
|
||||
}
|
||||
})
|
||||
|
||||
return { total: CATEGORIES.length, healthy, warnings, critical }
|
||||
}
|
||||
|
||||
const stats = getHealthStats()
|
||||
const groupedChecks = getGroupedChecks()
|
||||
|
||||
return (
|
||||
<Dialog open={open} onOpenChange={onOpenChange}>
|
||||
<DialogContent className="max-w-4xl max-h-[80vh] overflow-y-auto">
|
||||
<DialogContent className="max-w-3xl max-h-[85vh] overflow-y-auto">
|
||||
<DialogHeader>
|
||||
<DialogTitle className="flex items-center gap-2">
|
||||
<Activity className="h-6 w-6" />
|
||||
System Health Status
|
||||
<DialogTitle className="flex items-center justify-between">
|
||||
<div className="flex items-center gap-2">
|
||||
<Activity className="h-6 w-6" />
|
||||
System Health Status
|
||||
</div>
|
||||
{healthData && getStatusBadge(healthData.overall)}
|
||||
</DialogTitle>
|
||||
<DialogDescription>Detailed health checks for all system components</DialogDescription>
|
||||
<DialogDescription>Comprehensive health checks for all system components</DialogDescription>
|
||||
</DialogHeader>
|
||||
|
||||
{loading && (
|
||||
@@ -182,82 +176,101 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
|
||||
)}
|
||||
|
||||
{healthData && !loading && (
|
||||
<div className="space-y-6">
|
||||
{/* Overall Status Summary */}
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle className="flex items-center justify-between">
|
||||
<span>Overall Status</span>
|
||||
{getStatusBadge(healthData.overall)}
|
||||
</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
{healthData.summary && <p className="text-sm text-muted-foreground mb-4">{healthData.summary}</p>}
|
||||
<div className="grid grid-cols-4 gap-4 text-center">
|
||||
<div>
|
||||
<div className="text-2xl font-bold">{stats.total}</div>
|
||||
<div className="text-sm text-muted-foreground">Total Checks</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-2xl font-bold text-green-500">{stats.healthy}</div>
|
||||
<div className="text-sm text-muted-foreground">Healthy</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-2xl font-bold text-yellow-500">{stats.warnings}</div>
|
||||
<div className="text-sm text-muted-foreground">Warnings</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-2xl font-bold text-red-500">{stats.critical}</div>
|
||||
<div className="text-sm text-muted-foreground">Critical</div>
|
||||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
<div className="space-y-4">
|
||||
{/* Overall Stats Summary */}
|
||||
<div className="grid grid-cols-4 gap-3 p-4 rounded-lg bg-muted/30 border">
|
||||
<div className="text-center">
|
||||
<div className="text-2xl font-bold">{stats.total}</div>
|
||||
<div className="text-xs text-muted-foreground">Total Checks</div>
|
||||
</div>
|
||||
<div className="text-center">
|
||||
<div className="text-2xl font-bold text-green-500">{stats.healthy}</div>
|
||||
<div className="text-xs text-muted-foreground">Healthy</div>
|
||||
</div>
|
||||
<div className="text-center">
|
||||
<div className="text-2xl font-bold text-yellow-500">{stats.warnings}</div>
|
||||
<div className="text-xs text-muted-foreground">Warnings</div>
|
||||
</div>
|
||||
<div className="text-center">
|
||||
<div className="text-2xl font-bold text-red-500">{stats.critical}</div>
|
||||
<div className="text-xs text-muted-foreground">Critical</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Grouped Health Checks */}
|
||||
{Object.entries(groupedChecks).map(([category, checks]) => (
|
||||
<Card key={category}>
|
||||
<CardHeader>
|
||||
<CardTitle className="text-lg">{category}</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<div className="space-y-3">
|
||||
{checks.map((check, index) => (
|
||||
<div
|
||||
key={`${category}-${index}`}
|
||||
className="flex items-start gap-3 rounded-lg border p-3 hover:bg-muted/50 transition-colors"
|
||||
>
|
||||
<div className="mt-0.5">{getStatusIcon(check.status)}</div>
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="flex items-center justify-between gap-2">
|
||||
<p className="font-medium">{check.name}</p>
|
||||
<Badge variant="outline" className="shrink-0">
|
||||
{check.status}
|
||||
</Badge>
|
||||
</div>
|
||||
{check.reason && <p className="text-sm text-muted-foreground mt-1">{check.reason}</p>}
|
||||
{check.details && (
|
||||
<div className="text-xs text-muted-foreground mt-2 space-y-0.5">
|
||||
{Object.entries(check.details).map(([key, value]) => {
|
||||
if (key === "status" || key === "reason" || typeof value === "object") return null
|
||||
return (
|
||||
<div key={key} className="font-mono">
|
||||
{key}: {String(value)}
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
{healthData.summary && (
|
||||
<div className="text-sm text-muted-foreground p-3 rounded-lg bg-muted/20 border">
|
||||
{healthData.summary}
|
||||
</div>
|
||||
)}
|
||||
|
||||
<div className="space-y-2">
|
||||
{CATEGORIES.map(({ key, label, Icon }) => {
|
||||
const categoryData = healthData.details[key as keyof typeof healthData.details]
|
||||
const status = categoryData?.status || "UNKNOWN"
|
||||
const reason = categoryData?.reason
|
||||
const details = categoryData?.details
|
||||
|
||||
return (
|
||||
<div
|
||||
key={key}
|
||||
className={`flex items-start gap-3 p-3 rounded-lg border transition-colors ${
|
||||
status === "OK"
|
||||
? "bg-green-500/5 border-green-500/20 hover:bg-green-500/10"
|
||||
: status === "WARNING"
|
||||
? "bg-yellow-500/5 border-yellow-500/20 hover:bg-yellow-500/10"
|
||||
: status === "CRITICAL"
|
||||
? "bg-red-500/5 border-red-500/20 hover:bg-red-500/10"
|
||||
: "bg-muted/30 hover:bg-muted/50"
|
||||
}`}
|
||||
>
|
||||
<div className="mt-0.5 flex-shrink-0 flex items-center gap-2">
|
||||
<Icon className="h-4 w-4 text-muted-foreground" />
|
||||
{getStatusIcon(status)}
|
||||
</div>
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="flex items-center justify-between gap-2 mb-1">
|
||||
<p className="font-medium text-sm">{label}</p>
|
||||
<Badge
|
||||
variant="outline"
|
||||
className={`shrink-0 text-xs ${
|
||||
status === "OK"
|
||||
? "border-green-500 text-green-500"
|
||||
: status === "WARNING"
|
||||
? "border-yellow-500 text-yellow-500"
|
||||
: status === "CRITICAL"
|
||||
? "border-red-500 text-red-500"
|
||||
: ""
|
||||
}`}
|
||||
>
|
||||
{status}
|
||||
</Badge>
|
||||
</div>
|
||||
))}
|
||||
{reason && <p className="text-xs text-muted-foreground mt-1">{reason}</p>}
|
||||
{details && typeof details === "object" && (
|
||||
<div className="mt-2 space-y-1">
|
||||
{Object.entries(details).map(([detailKey, detailValue]: [string, any]) => {
|
||||
if (typeof detailValue === "object" && detailValue !== null) {
|
||||
return (
|
||||
<div key={detailKey} className="text-xs pl-3 border-l-2 border-muted">
|
||||
<span className="font-medium">{detailKey}:</span>
|
||||
{detailValue.reason && (
|
||||
<span className="ml-1 text-muted-foreground">{detailValue.reason}</span>
|
||||
)}
|
||||
</div>
|
||||
)
|
||||
}
|
||||
return null
|
||||
})}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
))}
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
|
||||
{healthData.timestamp && (
|
||||
<div className="text-xs text-muted-foreground text-center">
|
||||
<div className="text-xs text-muted-foreground text-center pt-2">
|
||||
Last updated: {new Date(healthData.timestamp).toLocaleString()}
|
||||
</div>
|
||||
)}
|
||||
|
||||
@@ -55,7 +55,9 @@ interface FlaskSystemInfo {
|
||||
hostname: string
|
||||
node_id: string
|
||||
uptime: string
|
||||
health_status: "healthy" | "warning" | "critical"
|
||||
health: {
|
||||
status: "healthy" | "warning" | "critical"
|
||||
}
|
||||
}
|
||||
|
||||
export function ProxmoxDashboard() {
|
||||
@@ -96,8 +98,10 @@ export function ProxmoxDashboard() {
|
||||
const uptimeValue =
|
||||
data.uptime && typeof data.uptime === "string" && data.uptime.trim() !== "" ? data.uptime : "N/A"
|
||||
|
||||
const healthStatus = data.health?.status?.toLowerCase() || "healthy"
|
||||
|
||||
setSystemStatus({
|
||||
status: data.health_status || "healthy",
|
||||
status: healthStatus as "healthy" | "warning" | "critical",
|
||||
uptime: uptimeValue,
|
||||
lastUpdate: new Date().toLocaleTimeString("en-US", { hour12: false }),
|
||||
serverName: data.hostname || "Unknown",
|
||||
|
||||
@@ -29,11 +29,21 @@ def get_health_details():
|
||||
def get_system_info():
|
||||
"""
|
||||
Get lightweight system info for header display.
|
||||
Returns: hostname, uptime, and cached health status.
|
||||
This is optimized for minimal server impact.
|
||||
Returns: hostname, uptime, and health status with proper structure.
|
||||
"""
|
||||
try:
|
||||
info = health_monitor.get_system_info()
|
||||
if 'health' in info:
|
||||
# Convert 'OK' to 'healthy', 'WARNING' to 'warning', 'CRITICAL' to 'critical'
|
||||
status_map = {
|
||||
'OK': 'healthy',
|
||||
'WARNING': 'warning',
|
||||
'CRITICAL': 'critical',
|
||||
'UNKNOWN': 'warning'
|
||||
}
|
||||
current_status = info['health'].get('status', 'OK').upper()
|
||||
info['health']['status'] = status_map.get(current_status, 'healthy')
|
||||
|
||||
return jsonify(info)
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
@@ -4,7 +4,7 @@ Provides comprehensive, lightweight health checks for Proxmox systems.
|
||||
Optimized for minimal system impact with intelligent thresholds and hysteresis.
|
||||
|
||||
Author: MacRimi
|
||||
Version: 1.1 (Optimized for minimal overhead)
|
||||
Version: 1.2 (Always returns all 10 categories)
|
||||
"""
|
||||
|
||||
import psutil
|
||||
@@ -15,12 +15,13 @@ import os
|
||||
from typing import Dict, List, Any, Tuple, Optional
|
||||
from datetime import datetime, timedelta
|
||||
from collections import defaultdict
|
||||
import re
|
||||
|
||||
class HealthMonitor:
|
||||
"""
|
||||
Monitors system health across multiple components with minimal impact.
|
||||
Implements hysteresis, intelligent caching, and progressive escalation.
|
||||
Only reports problems, not verbose OK statuses.
|
||||
Always returns all 10 health categories.
|
||||
"""
|
||||
|
||||
# CPU Thresholds
|
||||
@@ -186,92 +187,104 @@ class HealthMonitor:
|
||||
def get_detailed_status(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get comprehensive health status with all checks.
|
||||
Returns JSON structure matching the specification.
|
||||
OPTIMIZED: Only shows problems, not verbose OK messages.
|
||||
Returns JSON structure with ALL 10 categories always present.
|
||||
"""
|
||||
details = {}
|
||||
details = {
|
||||
'cpu': {'status': 'OK'},
|
||||
'memory': {'status': 'OK'},
|
||||
'storage': {'status': 'OK'},
|
||||
'disks': {'status': 'OK'},
|
||||
'network': {'status': 'OK'},
|
||||
'vms': {'status': 'OK'},
|
||||
'services': {'status': 'OK'},
|
||||
'logs': {'status': 'OK'},
|
||||
'updates': {'status': 'OK'},
|
||||
'security': {'status': 'OK'}
|
||||
}
|
||||
|
||||
critical_issues = []
|
||||
warning_issues = []
|
||||
|
||||
# Priority 1: Services PVE
|
||||
services_status = self._check_pve_services()
|
||||
if services_status['status'] != 'OK':
|
||||
details['services'] = services_status
|
||||
if services_status['status'] == 'CRITICAL':
|
||||
critical_issues.append(services_status.get('reason', 'Service failure'))
|
||||
elif services_status['status'] == 'WARNING':
|
||||
warning_issues.append(services_status.get('reason', 'Service issue'))
|
||||
details['services'] = services_status
|
||||
if services_status['status'] == 'CRITICAL':
|
||||
critical_issues.append(services_status.get('reason', 'Service failure'))
|
||||
elif services_status['status'] == 'WARNING':
|
||||
warning_issues.append(services_status.get('reason', 'Service issue'))
|
||||
|
||||
# Priority 2: Storage
|
||||
storage_status = self._check_storage_optimized()
|
||||
if storage_status and storage_status.get('status') != 'OK':
|
||||
if storage_status:
|
||||
details['storage'] = storage_status
|
||||
if storage_status.get('status') == 'CRITICAL':
|
||||
critical_issues.append(storage_status.get('reason', 'Storage failure'))
|
||||
elif storage_status.get('status') == 'WARNING':
|
||||
warning_issues.append(storage_status.get('reason', 'Storage issue'))
|
||||
|
||||
# Priority 3: Disks
|
||||
disks_status = self._check_disks_optimized()
|
||||
if disks_status and disks_status.get('status') != 'OK':
|
||||
if disks_status:
|
||||
details['disks'] = disks_status
|
||||
if disks_status.get('status') == 'CRITICAL':
|
||||
critical_issues.append(disks_status.get('reason', 'Disk failure'))
|
||||
elif disks_status.get('status') == 'WARNING':
|
||||
warning_issues.append(disks_status.get('reason', 'Disk issue'))
|
||||
|
||||
# Priority 4: VMs/CTs - now detects qmp errors from logs
|
||||
vms_status = self._check_vms_cts_optimized()
|
||||
if vms_status and vms_status.get('status') != 'OK':
|
||||
if vms_status:
|
||||
details['vms'] = vms_status
|
||||
if vms_status.get('status') == 'CRITICAL':
|
||||
critical_issues.append(vms_status.get('reason', 'VM/CT failure'))
|
||||
elif vms_status.get('status') == 'WARNING':
|
||||
warning_issues.append(vms_status.get('reason', 'VM/CT issue'))
|
||||
|
||||
# Priority 5: Network
|
||||
network_status = self._check_network_optimized()
|
||||
if network_status and network_status.get('status') != 'OK':
|
||||
if network_status:
|
||||
details['network'] = network_status
|
||||
if network_status.get('status') == 'CRITICAL':
|
||||
critical_issues.append(network_status.get('reason', 'Network failure'))
|
||||
elif network_status.get('status') == 'WARNING':
|
||||
warning_issues.append(network_status.get('reason', 'Network issue'))
|
||||
|
||||
# Priority 5: CPU/RAM (solo si hay problemas)
|
||||
# Priority 6: CPU
|
||||
cpu_status = self._check_cpu_with_hysteresis()
|
||||
if cpu_status.get('status') != 'OK':
|
||||
details['cpu'] = cpu_status
|
||||
if cpu_status.get('status') == 'WARNING':
|
||||
warning_issues.append(cpu_status.get('reason', 'CPU high'))
|
||||
elif cpu_status.get('status') == 'CRITICAL':
|
||||
critical_issues.append(cpu_status.get('reason', 'CPU critical'))
|
||||
details['cpu'] = cpu_status
|
||||
if cpu_status.get('status') == 'WARNING':
|
||||
warning_issues.append(cpu_status.get('reason', 'CPU high'))
|
||||
elif cpu_status.get('status') == 'CRITICAL':
|
||||
critical_issues.append(cpu_status.get('reason', 'CPU critical'))
|
||||
|
||||
# Priority 7: Memory
|
||||
memory_status = self._check_memory_comprehensive()
|
||||
if memory_status.get('status') != 'OK':
|
||||
details['memory'] = memory_status
|
||||
if memory_status.get('status') == 'CRITICAL':
|
||||
critical_issues.append(memory_status.get('reason', 'Memory critical'))
|
||||
elif memory_status.get('status') == 'WARNING':
|
||||
warning_issues.append(memory_status.get('reason', 'Memory high'))
|
||||
details['memory'] = memory_status
|
||||
if memory_status.get('status') == 'CRITICAL':
|
||||
critical_issues.append(memory_status.get('reason', 'Memory critical'))
|
||||
elif memory_status.get('status') == 'WARNING':
|
||||
warning_issues.append(memory_status.get('reason', 'Memory high'))
|
||||
|
||||
# Priority 6: Logs (solo errores críticos)
|
||||
# Priority 8: Logs
|
||||
logs_status = self._check_logs_lightweight()
|
||||
if logs_status.get('status') != 'OK':
|
||||
details['logs'] = logs_status
|
||||
if logs_status.get('status') == 'CRITICAL':
|
||||
critical_issues.append(logs_status.get('reason', 'Critical log errors'))
|
||||
elif logs_status.get('status') == 'WARNING':
|
||||
warning_issues.append(logs_status.get('reason', 'Log warnings'))
|
||||
details['logs'] = logs_status
|
||||
if logs_status.get('status') == 'CRITICAL':
|
||||
critical_issues.append(logs_status.get('reason', 'Critical log errors'))
|
||||
elif logs_status.get('status') == 'WARNING':
|
||||
warning_issues.append(logs_status.get('reason', 'Log warnings'))
|
||||
|
||||
# Priority 9: Updates
|
||||
updates_status = self._check_updates()
|
||||
if updates_status and updates_status.get('status') != 'OK':
|
||||
if updates_status:
|
||||
details['updates'] = updates_status
|
||||
if updates_status.get('status') == 'WARNING':
|
||||
warning_issues.append(updates_status.get('reason', 'Updates pending'))
|
||||
|
||||
# Priority 7: Security (solo problemas)
|
||||
# Priority 10: Security
|
||||
security_status = self._check_security()
|
||||
if security_status.get('status') != 'OK':
|
||||
details['security'] = security_status
|
||||
if security_status.get('status') == 'WARNING':
|
||||
warning_issues.append(security_status.get('reason', 'Security issue'))
|
||||
details['security'] = security_status
|
||||
if security_status.get('status') == 'WARNING':
|
||||
warning_issues.append(security_status.get('reason', 'Security issue'))
|
||||
|
||||
# Determine overall status
|
||||
if critical_issues:
|
||||
@@ -498,9 +511,9 @@ class HealthMonitor:
|
||||
except Exception as e:
|
||||
return {'status': 'UNKNOWN', 'reason': f'Memory check failed: {str(e)}'}
|
||||
|
||||
def _check_storage_optimized(self) -> Optional[Dict[str, Any]]:
|
||||
def _check_storage_optimized(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Optimized storage check - only reports problems.
|
||||
Optimized storage check - always returns status.
|
||||
Checks critical mounts, LVM, and Proxmox storages.
|
||||
"""
|
||||
issues = []
|
||||
@@ -510,12 +523,34 @@ class HealthMonitor:
|
||||
critical_mounts = ['/', '/var/lib/vz']
|
||||
|
||||
for mount_point in critical_mounts:
|
||||
if not os.path.exists(mount_point):
|
||||
issues.append(f'{mount_point} not mounted')
|
||||
storage_details[mount_point] = {
|
||||
'status': 'CRITICAL',
|
||||
'reason': 'Not mounted'
|
||||
}
|
||||
is_mounted = False
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['mountpoint', '-q', mount_point],
|
||||
capture_output=True,
|
||||
timeout=2
|
||||
)
|
||||
is_mounted = (result.returncode == 0)
|
||||
except:
|
||||
pass
|
||||
|
||||
if not is_mounted:
|
||||
# Only report as error if it's supposed to exist
|
||||
if mount_point == '/':
|
||||
issues.append(f'{mount_point}: Not mounted')
|
||||
storage_details[mount_point] = {
|
||||
'status': 'CRITICAL',
|
||||
'reason': 'Not mounted'
|
||||
}
|
||||
# For /var/lib/vz, it might not be a separate mount, check if dir exists
|
||||
elif mount_point == '/var/lib/vz':
|
||||
if os.path.exists(mount_point):
|
||||
# It exists as directory, check usage
|
||||
fs_status = self._check_filesystem(mount_point)
|
||||
if fs_status['status'] != 'OK':
|
||||
issues.append(f"{mount_point}: {fs_status['reason']}")
|
||||
storage_details[mount_point] = fs_status
|
||||
# If doesn't exist, skip silently (might use different storage)
|
||||
continue
|
||||
|
||||
fs_status = self._check_filesystem(mount_point)
|
||||
@@ -536,7 +571,6 @@ class HealthMonitor:
|
||||
issues.append(f"{storage_name}: {storage_data.get('reason', 'Storage issue')}")
|
||||
storage_details[storage_name] = storage_data
|
||||
|
||||
# If no issues, return None (optimized)
|
||||
if not issues:
|
||||
return {'status': 'OK'}
|
||||
|
||||
@@ -605,8 +639,8 @@ class HealthMonitor:
|
||||
'reason': f'Check failed: {str(e)}'
|
||||
}
|
||||
|
||||
def _check_lvm(self) -> Optional[Dict[str, Any]]:
|
||||
"""Check LVM volumes, especially local-lvm"""
|
||||
def _check_lvm(self) -> Dict[str, Any]:
|
||||
"""Check LVM volumes - improved detection"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
['lvs', '--noheadings', '--options', 'lv_name,vg_name,lv_attr'],
|
||||
@@ -616,10 +650,9 @@ class HealthMonitor:
|
||||
)
|
||||
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
return {'status': 'OK'}
|
||||
|
||||
volumes = []
|
||||
local_lvm_found = False
|
||||
|
||||
for line in result.stdout.strip().split('\n'):
|
||||
if line.strip():
|
||||
@@ -628,20 +661,11 @@ class HealthMonitor:
|
||||
lv_name = parts[0].strip()
|
||||
vg_name = parts[1].strip()
|
||||
volumes.append(f'{vg_name}/{lv_name}')
|
||||
|
||||
if 'local-lvm' in lv_name or 'local-lvm' in vg_name:
|
||||
local_lvm_found = True
|
||||
|
||||
if volumes and not local_lvm_found:
|
||||
return {
|
||||
'status': 'CRITICAL',
|
||||
'reason': 'local-lvm volume not found'
|
||||
}
|
||||
|
||||
return {'status': 'OK'}
|
||||
return {'status': 'OK', 'volumes': len(volumes)}
|
||||
|
||||
except Exception:
|
||||
return None
|
||||
return {'status': 'OK'}
|
||||
|
||||
def _check_proxmox_storages(self) -> Dict[str, Any]:
|
||||
"""Check Proxmox-specific storages (only report problems)"""
|
||||
@@ -680,9 +704,9 @@ class HealthMonitor:
|
||||
|
||||
return storages
|
||||
|
||||
def _check_disks_optimized(self) -> Optional[Dict[str, Any]]:
|
||||
def _check_disks_optimized(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Optimized disk check - only reports I/O errors and SMART issues.
|
||||
Optimized disk check - always returns status.
|
||||
"""
|
||||
current_time = time.time()
|
||||
disk_issues = {}
|
||||
@@ -725,7 +749,6 @@ class HealthMonitor:
|
||||
'reason': f'{error_count} I/O error(s) in 5 minutes'
|
||||
}
|
||||
|
||||
# If no issues, return OK
|
||||
if not disk_issues:
|
||||
return {'status': 'OK'}
|
||||
|
||||
@@ -738,12 +761,11 @@ class HealthMonitor:
|
||||
}
|
||||
|
||||
except Exception:
|
||||
return None
|
||||
return {'status': 'OK'}
|
||||
|
||||
def _check_network_optimized(self) -> Optional[Dict[str, Any]]:
|
||||
def _check_network_optimized(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Optimized network check - only reports problems.
|
||||
Checks interfaces down, no connectivity.
|
||||
Optimized network check - always returns status.
|
||||
"""
|
||||
try:
|
||||
issues = []
|
||||
@@ -770,7 +792,6 @@ class HealthMonitor:
|
||||
issues.append(latency_status.get('reason', 'Network latency issue'))
|
||||
interface_details['connectivity'] = latency_status
|
||||
|
||||
# If no issues, return OK
|
||||
if not issues:
|
||||
return {'status': 'OK'}
|
||||
|
||||
@@ -783,7 +804,7 @@ class HealthMonitor:
|
||||
}
|
||||
|
||||
except Exception:
|
||||
return None
|
||||
return {'status': 'OK'}
|
||||
|
||||
def _check_network_latency(self) -> Optional[Dict[str, Any]]:
|
||||
"""Check network latency to 1.1.1.1 (cached)"""
|
||||
@@ -843,18 +864,18 @@ class HealthMonitor:
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _check_vms_cts_optimized(self) -> Optional[Dict[str, Any]]:
|
||||
def _check_vms_cts_optimized(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Optimized VM/CT check - only reports failed starts.
|
||||
Checks logs for VMs/CTs that failed to start.
|
||||
Optimized VM/CT check - detects qmp failures and other VM errors.
|
||||
Now parses logs for VM/CT specific errors like qmp command failures.
|
||||
"""
|
||||
try:
|
||||
issues = []
|
||||
vm_details = {}
|
||||
|
||||
# Check logs for failed VM/CT starts
|
||||
# Check logs for VM/CT errors
|
||||
result = subprocess.run(
|
||||
['journalctl', '--since', '10 minutes ago', '--no-pager', '-u', 'pve*'],
|
||||
['journalctl', '--since', '10 minutes ago', '--no-pager', '-u', 'pve*', '-p', 'warning'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=3
|
||||
@@ -864,34 +885,66 @@ class HealthMonitor:
|
||||
for line in result.stdout.split('\n'):
|
||||
line_lower = line.lower()
|
||||
|
||||
# Detect VM/CT start failures
|
||||
# Pattern 1: "VM 106 qmp command failed"
|
||||
vm_qmp_match = re.search(r'vm\s+(\d+)\s+qmp\s+command', line_lower)
|
||||
if vm_qmp_match:
|
||||
vmid = vm_qmp_match.group(1)
|
||||
key = f'vm_{vmid}'
|
||||
if key not in vm_details:
|
||||
issues.append(f'VM {vmid}: QMP command error')
|
||||
vm_details[key] = {
|
||||
'status': 'WARNING',
|
||||
'reason': 'QMP command failed',
|
||||
'id': vmid,
|
||||
'type': 'VM'
|
||||
}
|
||||
continue
|
||||
|
||||
# Pattern 2: "CT 103 error" or "Container 103"
|
||||
ct_match = re.search(r'(?:ct|container)\s+(\d+)', line_lower)
|
||||
if ct_match and ('error' in line_lower or 'fail' in line_lower):
|
||||
ctid = ct_match.group(1)
|
||||
key = f'ct_{ctid}'
|
||||
if key not in vm_details:
|
||||
issues.append(f'CT {ctid}: Error detected')
|
||||
vm_details[key] = {
|
||||
'status': 'WARNING',
|
||||
'reason': 'Container error',
|
||||
'id': ctid,
|
||||
'type': 'CT'
|
||||
}
|
||||
continue
|
||||
|
||||
# Pattern 3: Generic VM/CT start failures
|
||||
if 'failed to start' in line_lower or 'error starting' in line_lower or \
|
||||
'start error' in line_lower or 'cannot start' in line_lower:
|
||||
# Extract VM/CT ID
|
||||
for word in line.split():
|
||||
if word.isdigit() and len(word) <= 4:
|
||||
vmid = word
|
||||
if vmid not in self.failed_vm_history:
|
||||
self.failed_vm_history.add(vmid)
|
||||
issues.append(f'VM/CT {vmid} failed to start')
|
||||
vm_details[f'vmct_{vmid}'] = {
|
||||
'status': 'CRITICAL',
|
||||
'reason': 'Failed to start'
|
||||
}
|
||||
break
|
||||
id_match = re.search(r'\b(\d{3,4})\b', line)
|
||||
if id_match:
|
||||
vmid = id_match.group(1)
|
||||
key = f'vmct_{vmid}'
|
||||
if key not in vm_details:
|
||||
issues.append(f'VM/CT {vmid}: Failed to start')
|
||||
vm_details[key] = {
|
||||
'status': 'CRITICAL',
|
||||
'reason': 'Failed to start',
|
||||
'id': vmid,
|
||||
'type': 'VM/CT'
|
||||
}
|
||||
|
||||
# If no issues, return OK
|
||||
if not issues:
|
||||
return {'status': 'OK'}
|
||||
|
||||
has_critical = any(d.get('status') == 'CRITICAL' for d in vm_details.values())
|
||||
|
||||
return {
|
||||
'status': 'CRITICAL',
|
||||
'status': 'CRITICAL' if has_critical else 'WARNING',
|
||||
'reason': '; '.join(issues[:3]),
|
||||
'details': vm_details
|
||||
}
|
||||
|
||||
except Exception:
|
||||
return None
|
||||
return {'status': 'OK'}
|
||||
|
||||
def _check_pve_services(self) -> Dict[str, Any]:
|
||||
"""Check critical Proxmox services"""
|
||||
|
||||
Reference in New Issue
Block a user