mirror of
https://github.com/MacRimi/ProxMenux.git
synced 2025-11-17 19:16:25 +00:00
Update AppImage
This commit is contained in:
192
AppImage/components/health-status-modal.tsx
Normal file
192
AppImage/components/health-status-modal.tsx
Normal file
@@ -0,0 +1,192 @@
|
||||
"use client"
|
||||
|
||||
import { useState, useEffect } from "react"
|
||||
import { Dialog, DialogContent, DialogDescription, DialogHeader, DialogTitle } from "@/components/ui/dialog"
|
||||
import { Badge } from "@/components/ui/badge"
|
||||
import { Card, CardContent, CardHeader, CardTitle } from "@/components/ui/card"
|
||||
import { Loader2, CheckCircle2, AlertTriangle, XCircle, Activity } from "lucide-react"
|
||||
|
||||
interface HealthCheck {
|
||||
category: string
|
||||
name: string
|
||||
status: "healthy" | "warning" | "critical"
|
||||
value: string
|
||||
message: string
|
||||
details: any
|
||||
}
|
||||
|
||||
interface HealthDetails {
|
||||
overall: {
|
||||
status: "healthy" | "warning" | "critical"
|
||||
critical_count: number
|
||||
warning_count: number
|
||||
healthy_count: number
|
||||
total_checks: number
|
||||
}
|
||||
checks: HealthCheck[]
|
||||
}
|
||||
|
||||
interface HealthStatusModalProps {
|
||||
open: boolean
|
||||
onOpenChange: (open: boolean) => void
|
||||
getApiUrl: (path: string) => string
|
||||
}
|
||||
|
||||
export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatusModalProps) {
|
||||
const [loading, setLoading] = useState(true)
|
||||
const [healthData, setHealthData] = useState<HealthDetails | null>(null)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
|
||||
useEffect(() => {
|
||||
if (open) {
|
||||
fetchHealthDetails()
|
||||
}
|
||||
}, [open])
|
||||
|
||||
const fetchHealthDetails = async () => {
|
||||
setLoading(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
const response = await fetch(getApiUrl("/api/health/details"))
|
||||
if (!response.ok) {
|
||||
throw new Error("Failed to fetch health details")
|
||||
}
|
||||
const data = await response.json()
|
||||
setHealthData(data)
|
||||
} catch (err) {
|
||||
setError(err instanceof Error ? err.message : "Unknown error")
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}
|
||||
|
||||
const getStatusIcon = (status: string) => {
|
||||
switch (status) {
|
||||
case "healthy":
|
||||
return <CheckCircle2 className="h-5 w-5 text-green-500" />
|
||||
case "warning":
|
||||
return <AlertTriangle className="h-5 w-5 text-yellow-500" />
|
||||
case "critical":
|
||||
return <XCircle className="h-5 w-5 text-red-500" />
|
||||
default:
|
||||
return <Activity className="h-5 w-5 text-gray-500" />
|
||||
}
|
||||
}
|
||||
|
||||
const getStatusBadge = (status: string) => {
|
||||
switch (status) {
|
||||
case "healthy":
|
||||
return <Badge className="bg-green-500">Healthy</Badge>
|
||||
case "warning":
|
||||
return <Badge className="bg-yellow-500">Warning</Badge>
|
||||
case "critical":
|
||||
return <Badge className="bg-red-500">Critical</Badge>
|
||||
default:
|
||||
return <Badge>Unknown</Badge>
|
||||
}
|
||||
}
|
||||
|
||||
const groupedChecks = healthData?.checks.reduce(
|
||||
(acc, check) => {
|
||||
if (!acc[check.category]) {
|
||||
acc[check.category] = []
|
||||
}
|
||||
acc[check.category].push(check)
|
||||
return acc
|
||||
},
|
||||
{} as Record<string, HealthCheck[]>,
|
||||
)
|
||||
|
||||
return (
|
||||
<Dialog open={open} onOpenChange={onOpenChange}>
|
||||
<DialogContent className="max-w-4xl max-h-[80vh] overflow-y-auto">
|
||||
<DialogHeader>
|
||||
<DialogTitle className="flex items-center gap-2">
|
||||
<Activity className="h-6 w-6" />
|
||||
System Health Status
|
||||
</DialogTitle>
|
||||
<DialogDescription>Detailed health checks for all system components</DialogDescription>
|
||||
</DialogHeader>
|
||||
|
||||
{loading && (
|
||||
<div className="flex items-center justify-center py-8">
|
||||
<Loader2 className="h-8 w-8 animate-spin text-primary" />
|
||||
</div>
|
||||
)}
|
||||
|
||||
{error && (
|
||||
<div className="rounded-lg border border-red-200 bg-red-50 p-4 text-red-800">
|
||||
<p className="font-medium">Error loading health status</p>
|
||||
<p className="text-sm">{error}</p>
|
||||
</div>
|
||||
)}
|
||||
|
||||
{healthData && !loading && (
|
||||
<div className="space-y-6">
|
||||
{/* Overall Status Summary */}
|
||||
<Card>
|
||||
<CardHeader>
|
||||
<CardTitle className="flex items-center justify-between">
|
||||
<span>Overall Status</span>
|
||||
{getStatusBadge(healthData.overall.status)}
|
||||
</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<div className="grid grid-cols-4 gap-4 text-center">
|
||||
<div>
|
||||
<div className="text-2xl font-bold">{healthData.overall.total_checks}</div>
|
||||
<div className="text-sm text-muted-foreground">Total Checks</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-2xl font-bold text-green-500">{healthData.overall.healthy_count}</div>
|
||||
<div className="text-sm text-muted-foreground">Healthy</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-2xl font-bold text-yellow-500">{healthData.overall.warning_count}</div>
|
||||
<div className="text-sm text-muted-foreground">Warnings</div>
|
||||
</div>
|
||||
<div>
|
||||
<div className="text-2xl font-bold text-red-500">{healthData.overall.critical_count}</div>
|
||||
<div className="text-sm text-muted-foreground">Critical</div>
|
||||
</div>
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
|
||||
{/* Grouped Health Checks */}
|
||||
{groupedChecks &&
|
||||
Object.entries(groupedChecks).map(([category, checks]) => (
|
||||
<Card key={category}>
|
||||
<CardHeader>
|
||||
<CardTitle className="text-lg">{category}</CardTitle>
|
||||
</CardHeader>
|
||||
<CardContent>
|
||||
<div className="space-y-3">
|
||||
{checks.map((check, index) => (
|
||||
<div
|
||||
key={index}
|
||||
className="flex items-start gap-3 rounded-lg border p-3 hover:bg-muted/50 transition-colors"
|
||||
>
|
||||
<div className="mt-0.5">{getStatusIcon(check.status)}</div>
|
||||
<div className="flex-1 min-w-0">
|
||||
<div className="flex items-center justify-between gap-2">
|
||||
<p className="font-medium">{check.name}</p>
|
||||
<span className="text-sm font-mono text-muted-foreground whitespace-nowrap">
|
||||
{check.value}
|
||||
</span>
|
||||
</div>
|
||||
<p className="text-sm text-muted-foreground mt-1">{check.message}</p>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
</CardContent>
|
||||
</Card>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
)
|
||||
}
|
||||
@@ -10,13 +10,12 @@ import { NetworkMetrics } from "./network-metrics"
|
||||
import { VirtualMachines } from "./virtual-machines"
|
||||
import Hardware from "./hardware"
|
||||
import { SystemLogs } from "./system-logs"
|
||||
import { OnboardingCarousel } from "./onboarding-carousel"
|
||||
import { AuthSetup } from "./auth-setup"
|
||||
import { Login } from "./login"
|
||||
import { Settings } from "./settings"
|
||||
import { getApiUrl, getApiBaseUrl } from "../lib/api-config"
|
||||
import { HealthStatusModal } from "./health-status-modal"
|
||||
import {
|
||||
RefreshCw,
|
||||
AlertTriangle,
|
||||
CheckCircle,
|
||||
XCircle,
|
||||
@@ -30,7 +29,6 @@ import {
|
||||
FileText,
|
||||
SettingsIcon,
|
||||
} from "lucide-react"
|
||||
import Image from "next/image"
|
||||
import { ThemeToggle } from "./theme-toggle"
|
||||
import { Sheet, SheetContent, SheetTrigger } from "./ui/sheet"
|
||||
|
||||
@@ -82,6 +80,7 @@ export function ProxmoxDashboard() {
|
||||
const [authRequired, setAuthRequired] = useState(false)
|
||||
const [isAuthenticated, setIsAuthenticated] = useState(false)
|
||||
const [authDeclined, setAuthDeclined] = useState(false)
|
||||
const [showHealthModal, setShowHealthModal] = useState(false)
|
||||
|
||||
const fetchSystemData = useCallback(async () => {
|
||||
console.log("[v0] Fetching system data from Flask server...")
|
||||
@@ -390,7 +389,46 @@ export function ProxmoxDashboard() {
|
||||
|
||||
return (
|
||||
<div className="min-h-screen bg-background">
|
||||
<OnboardingCarousel />
|
||||
<HealthStatusModal open={showHealthModal} onOpenChange={setShowHealthModal} getApiUrl={getApiUrl} />
|
||||
|
||||
<header
|
||||
className="border-b bg-card cursor-pointer hover:bg-muted/50 transition-colors"
|
||||
onClick={() => setShowHealthModal(true)}
|
||||
>
|
||||
<div className="container mx-auto px-4 py-4">
|
||||
<div className="flex items-center justify-between">
|
||||
<div className="flex items-center gap-4">
|
||||
<div className="flex items-center gap-2">
|
||||
<Server className="h-6 w-6 text-primary" />
|
||||
<h1 className="text-2xl font-bold">ProxMenuX</h1>
|
||||
</div>
|
||||
<Badge
|
||||
variant={
|
||||
systemStatus.status === "healthy"
|
||||
? "default"
|
||||
: systemStatus.status === "warning"
|
||||
? "secondary"
|
||||
: "destructive"
|
||||
}
|
||||
className="cursor-pointer"
|
||||
>
|
||||
{systemStatus.status === "healthy" && "Healthy"}
|
||||
{systemStatus.status === "warning" && "Warning"}
|
||||
{systemStatus.status === "critical" && "Critical"}
|
||||
{systemStatus.serverName === "Loading..." && "Loading..."}
|
||||
</Badge>
|
||||
</div>
|
||||
<div className="flex items-center gap-4">
|
||||
<ThemeToggle />
|
||||
{isAuthenticated && (
|
||||
<Button variant="outline" size="sm" onClick={handleLogout}>
|
||||
Logout
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
{!authDeclined && !authRequired && <AuthSetup onComplete={handleAuthSetupComplete} />}
|
||||
|
||||
@@ -415,111 +453,6 @@ export function ProxmoxDashboard() {
|
||||
</div>
|
||||
)}
|
||||
|
||||
<header className="border-b border-border bg-card sticky top-0 z-50 shadow-sm">
|
||||
<div className="container mx-auto px-4 md:px-6 py-4 md:py-4">
|
||||
{/* Logo and Title */}
|
||||
<div className="flex items-start justify-between gap-3">
|
||||
{/* Logo and Title */}
|
||||
<div className="flex items-center space-x-2 md:space-x-3 min-w-0">
|
||||
<div className="w-16 h-16 md:w-10 md:h-10 relative flex items-center justify-center bg-primary/10 flex-shrink-0">
|
||||
<Image
|
||||
src="/images/proxmenux-logo.png"
|
||||
alt="ProxMenux Logo"
|
||||
width={64}
|
||||
height={64}
|
||||
className="object-contain md:w-10 md:h-10"
|
||||
priority
|
||||
onError={(e) => {
|
||||
console.log("[v0] Logo failed to load, using fallback icon")
|
||||
const target = e.target as HTMLImageElement
|
||||
target.style.display = "none"
|
||||
const fallback = target.parentElement?.querySelector(".fallback-icon")
|
||||
if (fallback) {
|
||||
fallback.classList.remove("hidden")
|
||||
}
|
||||
}}
|
||||
/>
|
||||
<Server className="h-8 w-8 md:h-6 md:w-6 text-primary absolute fallback-icon hidden" />
|
||||
</div>
|
||||
<div className="min-w-0">
|
||||
<h1 className="text-lg md:text-xl font-semibold text-foreground truncate">ProxMenux Monitor</h1>
|
||||
<p className="text-xs md:text-sm text-muted-foreground">Proxmox System Dashboard</p>
|
||||
<div className="lg:hidden flex items-center gap-1 text-xs text-muted-foreground mt-0.5">
|
||||
<Server className="h-3 w-3" />
|
||||
<span className="truncate">Node: {systemStatus.serverName}</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Desktop Actions */}
|
||||
<div className="hidden lg:flex items-center space-x-4">
|
||||
<div className="flex items-center space-x-2">
|
||||
<Server className="h-4 w-4 text-muted-foreground" />
|
||||
<div className="text-sm">
|
||||
<div className="font-medium text-foreground">Node: {systemStatus.serverName}</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<Badge variant="outline" className={statusColor}>
|
||||
{statusIcon}
|
||||
<span className="ml-1 capitalize">{systemStatus.status}</span>
|
||||
</Badge>
|
||||
|
||||
<div className="text-sm text-muted-foreground whitespace-nowrap">Uptime: {systemStatus.uptime}</div>
|
||||
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={refreshData}
|
||||
disabled={isRefreshing}
|
||||
className="border-border/50 bg-transparent hover:bg-secondary"
|
||||
>
|
||||
<RefreshCw className={`h-4 w-4 mr-2 ${isRefreshing ? "animate-spin" : ""}`} />
|
||||
Refresh
|
||||
</Button>
|
||||
|
||||
<ThemeToggle />
|
||||
|
||||
{authRequired && isAuthenticated && (
|
||||
<Button
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={handleLogout}
|
||||
className="border-border/50 bg-transparent hover:bg-secondary"
|
||||
>
|
||||
Logout
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Mobile Actions */}
|
||||
<div className="flex lg:hidden items-center gap-2">
|
||||
<Badge variant="outline" className={`${statusColor} text-xs px-2`}>
|
||||
{statusIcon}
|
||||
<span className="ml-1 capitalize hidden sm:inline">{systemStatus.status}</span>
|
||||
</Badge>
|
||||
|
||||
<Button variant="ghost" size="sm" onClick={refreshData} disabled={isRefreshing} className="h-8 w-8 p-0">
|
||||
<RefreshCw className={`h-4 w-4 ${isRefreshing ? "animate-spin" : ""}`} />
|
||||
</Button>
|
||||
|
||||
<ThemeToggle />
|
||||
|
||||
{authRequired && isAuthenticated && (
|
||||
<Button variant="ghost" size="sm" onClick={handleLogout} className="h-8 px-2 text-xs">
|
||||
Logout
|
||||
</Button>
|
||||
)}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Mobile Server Info */}
|
||||
<div className="lg:hidden mt-2 flex items-center justify-end text-xs text-muted-foreground">
|
||||
<span className="whitespace-nowrap">Uptime: {systemStatus.uptime}</span>
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<div
|
||||
className={`sticky z-40 bg-background
|
||||
top-[120px] md:top-[76px]
|
||||
|
||||
@@ -80,6 +80,8 @@ echo "📋 Copying Flask server..."
|
||||
cp "$SCRIPT_DIR/flask_server.py" "$APP_DIR/usr/bin/"
|
||||
cp "$SCRIPT_DIR/flask_auth_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_auth_routes.py not found"
|
||||
cp "$SCRIPT_DIR/auth_manager.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ auth_manager.py not found"
|
||||
cp "$SCRIPT_DIR/health_monitor.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ health_monitor.py not found"
|
||||
cp "$SCRIPT_DIR/flask_health_routes.py" "$APP_DIR/usr/bin/" 2>/dev/null || echo "⚠️ flask_health_routes.py not found"
|
||||
|
||||
echo "📋 Adding translation support..."
|
||||
cat > "$APP_DIR/usr/bin/translate_cli.py" << 'PYEOF'
|
||||
|
||||
26
AppImage/scripts/flask_health_routes.py
Normal file
26
AppImage/scripts/flask_health_routes.py
Normal file
@@ -0,0 +1,26 @@
|
||||
"""
|
||||
Flask routes for health monitoring
|
||||
"""
|
||||
|
||||
from flask import Blueprint, jsonify
|
||||
from health_monitor import health_monitor
|
||||
|
||||
health_bp = Blueprint('health', __name__)
|
||||
|
||||
@health_bp.route('/api/health/status', methods=['GET'])
|
||||
def get_health_status():
|
||||
"""Get overall health status summary"""
|
||||
try:
|
||||
status = health_monitor.get_overall_status()
|
||||
return jsonify(status)
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
|
||||
@health_bp.route('/api/health/details', methods=['GET'])
|
||||
def get_health_details():
|
||||
"""Get detailed health status with all checks"""
|
||||
try:
|
||||
details = health_monitor.get_detailed_status()
|
||||
return jsonify(details)
|
||||
except Exception as e:
|
||||
return jsonify({'error': str(e)}), 500
|
||||
@@ -29,6 +29,8 @@ import jwt
|
||||
from functools import wraps
|
||||
from pathlib import Path
|
||||
|
||||
from flask_health_routes import health_bp
|
||||
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
|
||||
from flask_auth_routes import auth_bp
|
||||
@@ -37,6 +39,7 @@ app = Flask(__name__)
|
||||
CORS(app) # Enable CORS for Next.js frontend
|
||||
|
||||
app.register_blueprint(auth_bp)
|
||||
app.register_blueprint(health_bp)
|
||||
|
||||
|
||||
|
||||
|
||||
407
AppImage/scripts/health_monitor.py
Normal file
407
AppImage/scripts/health_monitor.py
Normal file
@@ -0,0 +1,407 @@
|
||||
"""
|
||||
Health Monitor Module
|
||||
Provides comprehensive health checks for the Proxmox system including:
|
||||
- CPU and Memory usage
|
||||
- Storage health (pools, disks, remote storage)
|
||||
- Network health (interface errors)
|
||||
- VM status
|
||||
- System events/logs errors
|
||||
"""
|
||||
|
||||
import psutil
|
||||
import subprocess
|
||||
import json
|
||||
from typing import Dict, List, Any
|
||||
|
||||
class HealthMonitor:
|
||||
"""Monitors system health across multiple components"""
|
||||
|
||||
# Thresholds
|
||||
CPU_WARNING = 75
|
||||
CPU_CRITICAL = 90
|
||||
MEMORY_WARNING = 75
|
||||
MEMORY_CRITICAL = 90
|
||||
|
||||
def __init__(self):
|
||||
self.checks = []
|
||||
|
||||
def get_overall_status(self) -> Dict[str, Any]:
|
||||
"""Get overall health status summary"""
|
||||
checks = self.run_all_checks()
|
||||
|
||||
# Determine overall status
|
||||
critical_count = sum(1 for c in checks if c['status'] == 'critical')
|
||||
warning_count = sum(1 for c in checks if c['status'] == 'warning')
|
||||
|
||||
if critical_count > 0:
|
||||
overall_status = 'critical'
|
||||
elif warning_count > 0:
|
||||
overall_status = 'warning'
|
||||
else:
|
||||
overall_status = 'healthy'
|
||||
|
||||
return {
|
||||
'status': overall_status,
|
||||
'critical_count': critical_count,
|
||||
'warning_count': warning_count,
|
||||
'healthy_count': len(checks) - critical_count - warning_count,
|
||||
'total_checks': len(checks),
|
||||
'timestamp': psutil.boot_time()
|
||||
}
|
||||
|
||||
def get_detailed_status(self) -> Dict[str, Any]:
|
||||
"""Get detailed health status with all checks"""
|
||||
checks = self.run_all_checks()
|
||||
overall = self.get_overall_status()
|
||||
|
||||
return {
|
||||
'overall': overall,
|
||||
'checks': checks
|
||||
}
|
||||
|
||||
def run_all_checks(self) -> List[Dict[str, Any]]:
|
||||
"""Run all health checks and return results"""
|
||||
checks = []
|
||||
|
||||
# CPU Check
|
||||
checks.append(self.check_cpu())
|
||||
|
||||
# Memory Check
|
||||
checks.append(self.check_memory())
|
||||
|
||||
# Storage Checks
|
||||
checks.extend(self.check_storage())
|
||||
|
||||
# Network Checks
|
||||
checks.extend(self.check_network())
|
||||
|
||||
# VM Checks
|
||||
checks.extend(self.check_vms())
|
||||
|
||||
# Events/Logs Check
|
||||
checks.append(self.check_events())
|
||||
|
||||
return checks
|
||||
|
||||
def check_cpu(self) -> Dict[str, Any]:
|
||||
"""Check CPU usage"""
|
||||
cpu_percent = psutil.cpu_percent(interval=1)
|
||||
|
||||
if cpu_percent >= self.CPU_CRITICAL:
|
||||
status = 'critical'
|
||||
message = f'CPU usage is critically high at {cpu_percent:.1f}%'
|
||||
elif cpu_percent >= self.CPU_WARNING:
|
||||
status = 'warning'
|
||||
message = f'CPU usage is elevated at {cpu_percent:.1f}%'
|
||||
else:
|
||||
status = 'healthy'
|
||||
message = f'CPU usage is normal at {cpu_percent:.1f}%'
|
||||
|
||||
return {
|
||||
'category': 'System',
|
||||
'name': 'CPU Usage',
|
||||
'status': status,
|
||||
'value': f'{cpu_percent:.1f}%',
|
||||
'message': message,
|
||||
'details': {
|
||||
'usage': cpu_percent,
|
||||
'cores': psutil.cpu_count(),
|
||||
'warning_threshold': self.CPU_WARNING,
|
||||
'critical_threshold': self.CPU_CRITICAL
|
||||
}
|
||||
}
|
||||
|
||||
def check_memory(self) -> Dict[str, Any]:
|
||||
"""Check memory usage"""
|
||||
memory = psutil.virtual_memory()
|
||||
mem_percent = memory.percent
|
||||
|
||||
if mem_percent >= self.MEMORY_CRITICAL:
|
||||
status = 'critical'
|
||||
message = f'Memory usage is critically high at {mem_percent:.1f}%'
|
||||
elif mem_percent >= self.MEMORY_WARNING:
|
||||
status = 'warning'
|
||||
message = f'Memory usage is elevated at {mem_percent:.1f}%'
|
||||
else:
|
||||
status = 'healthy'
|
||||
message = f'Memory usage is normal at {mem_percent:.1f}%'
|
||||
|
||||
return {
|
||||
'category': 'System',
|
||||
'name': 'Memory Usage',
|
||||
'status': status,
|
||||
'value': f'{mem_percent:.1f}%',
|
||||
'message': message,
|
||||
'details': {
|
||||
'usage': mem_percent,
|
||||
'total': memory.total,
|
||||
'available': memory.available,
|
||||
'used': memory.used,
|
||||
'warning_threshold': self.MEMORY_WARNING,
|
||||
'critical_threshold': self.MEMORY_CRITICAL
|
||||
}
|
||||
}
|
||||
|
||||
def check_storage(self) -> List[Dict[str, Any]]:
|
||||
"""Check storage health including ZFS pools and disks"""
|
||||
checks = []
|
||||
|
||||
# Check ZFS pools
|
||||
try:
|
||||
result = subprocess.run(['zpool', 'status'], capture_output=True, text=True, timeout=5)
|
||||
if result.returncode == 0:
|
||||
output = result.stdout
|
||||
|
||||
# Parse pool status
|
||||
pools = self._parse_zpool_status(output)
|
||||
for pool in pools:
|
||||
if pool['state'] == 'DEGRADED':
|
||||
status = 'critical'
|
||||
message = f"Pool '{pool['name']}' is degraded"
|
||||
elif pool['state'] == 'FAULTED':
|
||||
status = 'critical'
|
||||
message = f"Pool '{pool['name']}' is faulted"
|
||||
elif pool['state'] == 'OFFLINE':
|
||||
status = 'critical'
|
||||
message = f"Pool '{pool['name']}' is offline"
|
||||
elif pool['errors'] > 0:
|
||||
status = 'warning'
|
||||
message = f"Pool '{pool['name']}' has {pool['errors']} errors"
|
||||
else:
|
||||
status = 'healthy'
|
||||
message = f"Pool '{pool['name']}' is healthy"
|
||||
|
||||
checks.append({
|
||||
'category': 'Storage',
|
||||
'name': f"ZFS Pool: {pool['name']}",
|
||||
'status': status,
|
||||
'value': pool['state'],
|
||||
'message': message,
|
||||
'details': pool
|
||||
})
|
||||
except Exception as e:
|
||||
checks.append({
|
||||
'category': 'Storage',
|
||||
'name': 'ZFS Pools',
|
||||
'status': 'warning',
|
||||
'value': 'Unknown',
|
||||
'message': f'Could not check ZFS pools: {str(e)}',
|
||||
'details': {'error': str(e)}
|
||||
})
|
||||
|
||||
# Check disk partitions
|
||||
partitions = psutil.disk_partitions()
|
||||
for partition in partitions:
|
||||
try:
|
||||
usage = psutil.disk_usage(partition.mountpoint)
|
||||
percent = usage.percent
|
||||
|
||||
if percent >= 95:
|
||||
status = 'critical'
|
||||
message = f"Disk '{partition.mountpoint}' is critically full at {percent:.1f}%"
|
||||
elif percent >= 85:
|
||||
status = 'warning'
|
||||
message = f"Disk '{partition.mountpoint}' is getting full at {percent:.1f}%"
|
||||
else:
|
||||
status = 'healthy'
|
||||
message = f"Disk '{partition.mountpoint}' has sufficient space ({percent:.1f}% used)"
|
||||
|
||||
checks.append({
|
||||
'category': 'Storage',
|
||||
'name': f"Disk: {partition.mountpoint}",
|
||||
'status': status,
|
||||
'value': f'{percent:.1f}%',
|
||||
'message': message,
|
||||
'details': {
|
||||
'device': partition.device,
|
||||
'mountpoint': partition.mountpoint,
|
||||
'fstype': partition.fstype,
|
||||
'total': usage.total,
|
||||
'used': usage.used,
|
||||
'free': usage.free,
|
||||
'percent': percent
|
||||
}
|
||||
})
|
||||
except PermissionError:
|
||||
continue
|
||||
|
||||
return checks
|
||||
|
||||
def check_network(self) -> List[Dict[str, Any]]:
|
||||
"""Check network interface health (errors, not inactive interfaces)"""
|
||||
checks = []
|
||||
|
||||
# Get network interface stats
|
||||
net_io = psutil.net_io_counters(pernic=True)
|
||||
net_if_stats = psutil.net_if_stats()
|
||||
|
||||
for interface, stats in net_io.items():
|
||||
# Skip loopback
|
||||
if interface == 'lo':
|
||||
continue
|
||||
|
||||
# Only check active interfaces
|
||||
if interface in net_if_stats and net_if_stats[interface].isup:
|
||||
errors = stats.errin + stats.errout
|
||||
drops = stats.dropin + stats.dropout
|
||||
|
||||
if errors > 100 or drops > 100:
|
||||
status = 'critical'
|
||||
message = f"Interface '{interface}' has {errors} errors and {drops} dropped packets"
|
||||
elif errors > 10 or drops > 10:
|
||||
status = 'warning'
|
||||
message = f"Interface '{interface}' has {errors} errors and {drops} dropped packets"
|
||||
else:
|
||||
status = 'healthy'
|
||||
message = f"Interface '{interface}' is operating normally"
|
||||
|
||||
checks.append({
|
||||
'category': 'Network',
|
||||
'name': f"Interface: {interface}",
|
||||
'status': status,
|
||||
'value': 'Active',
|
||||
'message': message,
|
||||
'details': {
|
||||
'errors_in': stats.errin,
|
||||
'errors_out': stats.errout,
|
||||
'drops_in': stats.dropin,
|
||||
'drops_out': stats.dropout,
|
||||
'bytes_sent': stats.bytes_sent,
|
||||
'bytes_recv': stats.bytes_recv
|
||||
}
|
||||
})
|
||||
|
||||
return checks
|
||||
|
||||
def check_vms(self) -> List[Dict[str, Any]]:
|
||||
"""Check VM status"""
|
||||
checks = []
|
||||
|
||||
try:
|
||||
# Get VM list from qm
|
||||
result = subprocess.run(['qm', 'list'], capture_output=True, text=True, timeout=5)
|
||||
if result.returncode == 0:
|
||||
lines = result.stdout.strip().split('\n')[1:] # Skip header
|
||||
|
||||
running_count = 0
|
||||
stopped_count = 0
|
||||
error_count = 0
|
||||
|
||||
for line in lines:
|
||||
if line.strip():
|
||||
parts = line.split()
|
||||
if len(parts) >= 3:
|
||||
vm_status = parts[2]
|
||||
if vm_status == 'running':
|
||||
running_count += 1
|
||||
elif vm_status == 'stopped':
|
||||
stopped_count += 1
|
||||
else:
|
||||
error_count += 1
|
||||
|
||||
if error_count > 0:
|
||||
status = 'warning'
|
||||
message = f'{error_count} VMs in unexpected state'
|
||||
else:
|
||||
status = 'healthy'
|
||||
message = f'{running_count} running, {stopped_count} stopped'
|
||||
|
||||
checks.append({
|
||||
'category': 'Virtual Machines',
|
||||
'name': 'VM Status',
|
||||
'status': status,
|
||||
'value': f'{running_count + stopped_count} total',
|
||||
'message': message,
|
||||
'details': {
|
||||
'running': running_count,
|
||||
'stopped': stopped_count,
|
||||
'errors': error_count
|
||||
}
|
||||
})
|
||||
except Exception as e:
|
||||
checks.append({
|
||||
'category': 'Virtual Machines',
|
||||
'name': 'VM Status',
|
||||
'status': 'warning',
|
||||
'value': 'Unknown',
|
||||
'message': f'Could not check VM status: {str(e)}',
|
||||
'details': {'error': str(e)}
|
||||
})
|
||||
|
||||
return checks
|
||||
|
||||
def check_events(self) -> Dict[str, Any]:
|
||||
"""Check system events/logs for errors"""
|
||||
try:
|
||||
# Check journalctl for recent errors
|
||||
result = subprocess.run(
|
||||
['journalctl', '-p', 'err', '-n', '100', '--no-pager'],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=5
|
||||
)
|
||||
|
||||
if result.returncode == 0:
|
||||
error_lines = [line for line in result.stdout.split('\n') if line.strip()]
|
||||
error_count = len(error_lines)
|
||||
|
||||
if error_count > 50:
|
||||
status = 'critical'
|
||||
message = f'{error_count} errors in recent logs'
|
||||
elif error_count > 10:
|
||||
status = 'warning'
|
||||
message = f'{error_count} errors in recent logs'
|
||||
else:
|
||||
status = 'healthy'
|
||||
message = f'{error_count} errors in recent logs (normal)'
|
||||
|
||||
return {
|
||||
'category': 'System Events',
|
||||
'name': 'Error Logs',
|
||||
'status': status,
|
||||
'value': f'{error_count} errors',
|
||||
'message': message,
|
||||
'details': {
|
||||
'error_count': error_count,
|
||||
'recent_errors': error_lines[:5] # Last 5 errors
|
||||
}
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
'category': 'System Events',
|
||||
'name': 'Error Logs',
|
||||
'status': 'warning',
|
||||
'value': 'Unknown',
|
||||
'message': f'Could not check system logs: {str(e)}',
|
||||
'details': {'error': str(e)}
|
||||
}
|
||||
|
||||
def _parse_zpool_status(self, output: str) -> List[Dict[str, Any]]:
|
||||
"""Parse zpool status output"""
|
||||
pools = []
|
||||
current_pool = None
|
||||
|
||||
for line in output.split('\n'):
|
||||
line = line.strip()
|
||||
|
||||
if line.startswith('pool:'):
|
||||
if current_pool:
|
||||
pools.append(current_pool)
|
||||
current_pool = {'name': line.split(':')[1].strip(), 'state': 'UNKNOWN', 'errors': 0}
|
||||
elif line.startswith('state:') and current_pool:
|
||||
current_pool['state'] = line.split(':')[1].strip()
|
||||
elif 'errors:' in line.lower() and current_pool:
|
||||
try:
|
||||
error_part = line.split(':')[1].strip()
|
||||
if error_part.lower() != 'no known data errors':
|
||||
current_pool['errors'] = int(error_part.split()[0])
|
||||
except:
|
||||
pass
|
||||
|
||||
if current_pool:
|
||||
pools.append(current_pool)
|
||||
|
||||
return pools
|
||||
|
||||
# Global instance
|
||||
health_monitor = HealthMonitor()
|
||||
Reference in New Issue
Block a user