Update AppImage

2025-11-17 19:16:25 +00:00 · 2025-11-09 16:43:45 +01:00
parent b9619efbbf
commit 27353e160f
5 changed files with 118 additions and 159 deletions
--- a/AppImage/components/health-status-modal.tsx
+++ b/AppImage/components/health-status-modal.tsx
@@ -114,11 +114,11 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
    const statusUpper = status?.toUpperCase()
    switch (statusUpper) {
      case "OK":
-        return <Badge className="bg-green-500 text-white">Healthy</Badge>
+        return <Badge className="bg-green-500 text-white hover:bg-green-500">OK</Badge>
      case "WARNING":
-        return <Badge className="bg-yellow-500 text-white">Warning</Badge>
+        return <Badge className="bg-yellow-500 text-white hover:bg-yellow-500">Warning</Badge>
      case "CRITICAL":
-        return <Badge className="bg-red-500 text-white">Critical</Badge>
+        return <Badge className="bg-red-500 text-white hover:bg-red-500">Critical</Badge>
      default:
        return <Badge>Unknown</Badge>
    }
@@ -159,7 +159,7 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
            </div>
            {healthData && getStatusBadge(healthData.overall)}
          </DialogTitle>
-          <DialogDescription>Comprehensive health checks for all system components</DialogDescription>
+          <DialogDescription>Detailed health checks for all system components</DialogDescription>
        </DialogHeader>

        {loading && (
@@ -197,9 +197,9 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
              </div>
            </div>

-            {healthData.summary && (
-              <div className="text-sm text-muted-foreground p-3 rounded-lg bg-muted/20 border">
-                {healthData.summary}
+            {healthData.summary && healthData.summary !== "All systems operational" && (
+              <div className="text-sm p-3 rounded-lg bg-muted/20 border">
+                <span className="font-medium text-foreground">{healthData.summary}</span>
              </div>
            )}

@@ -234,11 +234,11 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
                          variant="outline"
                          className={`shrink-0 text-xs ${
                            status === "OK"
-                              ? "border-green-500 text-green-500"
+                              ? "border-green-500 text-green-500 bg-green-500/5"
                              : status === "WARNING"
-                                ? "border-yellow-500 text-yellow-500"
+                                ? "border-yellow-500 text-yellow-500 bg-yellow-500/5"
                                : status === "CRITICAL"
-                                  ? "border-red-500 text-red-500"
+                                  ? "border-red-500 text-red-500 bg-red-500/5"
                                  : ""
                          }`}
                        >
--- a/AppImage/components/proxmox-dashboard.tsx
+++ b/AppImage/components/proxmox-dashboard.tsx
@@ -98,7 +98,7 @@ export function ProxmoxDashboard() {
      const uptimeValue =
        data.uptime && typeof data.uptime === "string" && data.uptime.trim() !== "" ? data.uptime : "N/A"

-      const healthStatus = data.health?.status?.toLowerCase() || "healthy"
+      const healthStatus = data.health?.status || "healthy"

      setSystemStatus({
        status: healthStatus as "healthy" | "warning" | "critical",
--- a/AppImage/lib/polling-config.tsx
+++ b/AppImage/lib/polling-config.tsx
@@ -1,85 +0,0 @@
-"use client"
-
-import { createContext, useContext, useState, useEffect, type ReactNode } from "react"
-
-export interface PollingIntervals {
-  storage: number
-  network: number
-  vms: number
-  hardware: number
-}
-
-// Default intervals in milliseconds
-const DEFAULT_INTERVALS: PollingIntervals = {
-  storage: 60000, // 60 seconds
-  network: 60000, // 60 seconds
-  vms: 30000, // 30 seconds
-  hardware: 60000, // 60 seconds
-}
-
-const STORAGE_KEY = "proxmenux_polling_intervals"
-
-interface PollingConfigContextType {
-  intervals: PollingIntervals
-  updateInterval: (key: keyof PollingIntervals, value: number) => void
-}
-
-const PollingConfigContext = createContext<PollingConfigContextType | undefined>(undefined)
-
-export function PollingConfigProvider({ children }: { children: ReactNode }) {
-  const [intervals, setIntervals] = useState<PollingIntervals>(DEFAULT_INTERVALS)
-
-  // Load from localStorage on mount
-  useEffect(() => {
-    if (typeof window === "undefined") return
-
-    const stored = localStorage.getItem(STORAGE_KEY)
-    if (stored) {
-      try {
-        const parsed = JSON.parse(stored)
-        setIntervals({ ...DEFAULT_INTERVALS, ...parsed })
-      } catch (e) {
-        console.error("[v0] Failed to parse stored polling intervals:", e)
-      }
-    }
-  }, [])
-
-  const updateInterval = (key: keyof PollingIntervals, value: number) => {
-    setIntervals((prev) => {
-      const newIntervals = { ...prev, [key]: value }
-      if (typeof window !== "undefined") {
-        localStorage.setItem(STORAGE_KEY, JSON.stringify(newIntervals))
-      }
-      return newIntervals
-    })
-  }
-
-  return <PollingConfigContext.Provider value={{ intervals, updateInterval }}>{children}</PollingConfigContext.Provider>
-}
-
-export function usePollingConfig() {
-  const context = useContext(PollingConfigContext)
-  if (!context) {
-    // During SSR or when provider is not available, return defaults
-    if (typeof window === "undefined") {
-      return {
-        intervals: DEFAULT_INTERVALS,
-        updateInterval: () => {},
-      }
-    }
-    throw new Error("usePollingConfig must be used within PollingConfigProvider")
-  }
-  return context
-}
-
-// Interval options for the UI (in milliseconds)
-export const INTERVAL_OPTIONS = [
-  { label: "10 seconds", value: 10000 },
-  { label: "30 seconds", value: 30000 },
-  { label: "1 minute", value: 60000 },
-  { label: "2 minutes", value: 120000 },
-  { label: "5 minutes", value: 300000 },
-  { label: "10 minutes", value: 600000 },
-  { label: "30 minutes", value: 1800000 },
-  { label: "1 hour", value: 3600000 },
-]
--- a/AppImage/scripts/flask_health_routes.py
+++ b/AppImage/scripts/flask_health_routes.py
@@ -33,8 +33,8 @@ def get_system_info():
    """
    try:
        info = health_monitor.get_system_info()
+        
        if 'health' in info:
-            # Convert 'OK' to 'healthy', 'WARNING' to 'warning', 'CRITICAL' to 'critical'
            status_map = {
                'OK': 'healthy',
                'WARNING': 'warning',
--- a/AppImage/scripts/health_monitor.py
+++ b/AppImage/scripts/health_monitor.py
@@ -432,7 +432,10 @@ class HealthMonitor:
            return None
    
    def _check_memory_comprehensive(self) -> Dict[str, Any]:
-        """Check memory including RAM and swap with sustained thresholds"""
+        """
+        Check memory including RAM and swap with realistic thresholds.
+        Only alerts on truly problematic memory situations.
+        """
        try:
            memory = psutil.virtual_memory()
            swap = psutil.swap_memory()
@@ -457,7 +460,7 @@ class HealthMonitor:
            
            mem_critical = sum(
                1 for entry in self.state_history[state_key]
-                if entry['mem_percent'] >= self.MEMORY_CRITICAL and
+                if entry['mem_percent'] >= 90 and
                current_time - entry['time'] <= self.MEMORY_DURATION
            )
            
@@ -469,28 +472,20 @@ class HealthMonitor:
            
            swap_critical = sum(
                1 for entry in self.state_history[state_key]
-                if entry['swap_vs_ram'] > self.SWAP_CRITICAL_PERCENT and
+                if entry['swap_vs_ram'] > 20 and
                current_time - entry['time'] <= self.SWAP_CRITICAL_DURATION
            )
            
-            swap_warning = sum(
-                1 for entry in self.state_history[state_key]
-                if entry['swap_percent'] > 0 and
-                current_time - entry['time'] <= self.SWAP_WARNING_DURATION
-            )
            
            if mem_critical >= 2:
                status = 'CRITICAL'
-                reason = f'RAM >{self.MEMORY_CRITICAL}% for {self.MEMORY_DURATION}s'
+                reason = f'RAM >90% for {self.MEMORY_DURATION}s'
            elif swap_critical >= 2:
                status = 'CRITICAL'
-                reason = f'Swap >{self.SWAP_CRITICAL_PERCENT}% of RAM for {self.SWAP_CRITICAL_DURATION}s'
+                reason = f'Swap >20% of RAM ({swap_vs_ram:.1f}%)'
            elif mem_warning >= 2:
                status = 'WARNING'
                reason = f'RAM >{self.MEMORY_WARNING}% for {self.MEMORY_DURATION}s'
-            elif swap_warning >= 2:
-                status = 'WARNING'
-                reason = f'Swap active for >{self.SWAP_WARNING_DURATION}s'
            else:
                status = 'OK'
                reason = None
@@ -513,63 +508,73 @@ class HealthMonitor:
    
    def _check_storage_optimized(self) -> Dict[str, Any]:
        """
-        Optimized storage check - always returns status.
-        Checks critical mounts, LVM, and Proxmox storages.
+        Optimized storage check - monitors Proxmox storages from pvesm status.
+        Checks for inactive storages and disk health from SMART/events.
        """
        issues = []
        storage_details = {}
        
-        # Check critical filesystems
-        critical_mounts = ['/', '/var/lib/vz']
+        try:
+            result = subprocess.run(
+                ['pvesm', 'status'],
+                capture_output=True,
+                text=True,
+                timeout=5
+            )
+            
+            if result.returncode == 0:
+                lines = result.stdout.strip().split('\n')[1:]  # Skip header
+                for line in lines:
+                    parts = line.split()
+                    if len(parts) >= 4:
+                        storage_name = parts[0]
+                        storage_type = parts[1]
+                        enabled = parts[2]
+                        active = parts[3]
+                        
+                        if enabled == '1' and active == '0':
+                            issues.append(f'{storage_name}: Inactive')
+                            storage_details[storage_name] = {
+                                'status': 'CRITICAL',
+                                'reason': 'Storage inactive',
+                                'type': storage_type
+                            }
+        except Exception as e:
+            # If pvesm not available, skip silently
+            pass
+        
+        # Check disk health from Proxmox task log or system logs
+        disk_health_issues = self._check_disk_health_from_events()
+        if disk_health_issues:
+            for disk, issue in disk_health_issues.items():
+                issues.append(f'{disk}: {issue["reason"]}')
+                storage_details[disk] = issue
+        
+        critical_mounts = ['/']
        
        for mount_point in critical_mounts:
-            is_mounted = False
            try:
                result = subprocess.run(
                    ['mountpoint', '-q', mount_point],
                    capture_output=True,
                    timeout=2
                )
-                is_mounted = (result.returncode == 0)
-            except:
-                pass
-            
-            if not is_mounted:
-                # Only report as error if it's supposed to exist
-                if mount_point == '/':
+                
+                if result.returncode != 0:
                    issues.append(f'{mount_point}: Not mounted')
                    storage_details[mount_point] = {
                        'status': 'CRITICAL',
                        'reason': 'Not mounted'
                    }
-                # For /var/lib/vz, it might not be a separate mount, check if dir exists
-                elif mount_point == '/var/lib/vz':
-                    if os.path.exists(mount_point):
-                        # It exists as directory, check usage
-                        fs_status = self._check_filesystem(mount_point)
-                        if fs_status['status'] != 'OK':
-                            issues.append(f"{mount_point}: {fs_status['reason']}")
-                            storage_details[mount_point] = fs_status
-                    # If doesn't exist, skip silently (might use different storage)
-                continue
-            
-            fs_status = self._check_filesystem(mount_point)
-            if fs_status['status'] != 'OK':
-                issues.append(f"{mount_point}: {fs_status['reason']}")
-                storage_details[mount_point] = fs_status
-        
-        # Check LVM
-        lvm_status = self._check_lvm()
-        if lvm_status and lvm_status.get('status') != 'OK':
-            issues.append(lvm_status.get('reason', 'LVM issue'))
-            storage_details['lvm'] = lvm_status
-        
-        # Check Proxmox storages (PBS, NFS, etc)
-        pve_storages = self._check_proxmox_storages()
-        for storage_name, storage_data in pve_storages.items():
-            if storage_data.get('status') != 'OK':
-                issues.append(f"{storage_name}: {storage_data.get('reason', 'Storage issue')}")
-                storage_details[storage_name] = storage_data
+                    continue
+                
+                # Check filesystem usage
+                fs_status = self._check_filesystem(mount_point)
+                if fs_status['status'] != 'OK':
+                    issues.append(f"{mount_point}: {fs_status['reason']}")
+                    storage_details[mount_point] = fs_status
+            except Exception:
+                pass
        
        if not issues:
            return {'status': 'OK'}
@@ -873,7 +878,6 @@ class HealthMonitor:
            issues = []
            vm_details = {}
            
-            # Check logs for VM/CT errors
            result = subprocess.run(
                ['journalctl', '--since', '10 minutes ago', '--no-pager', '-u', 'pve*', '-p', 'warning'],
                capture_output=True,
@@ -885,22 +889,20 @@ class HealthMonitor:
                for line in result.stdout.split('\n'):
                    line_lower = line.lower()
                    
-                    # Pattern 1: "VM 106 qmp command failed"
-                    vm_qmp_match = re.search(r'vm\s+(\d+)\s+qmp\s+command', line_lower)
+                    vm_qmp_match = re.search(r'vm\s+(\d+)\s+qmp\s+command.*(?:failed|unable|timeout)', line_lower)
                    if vm_qmp_match:
                        vmid = vm_qmp_match.group(1)
                        key = f'vm_{vmid}'
                        if key not in vm_details:
-                            issues.append(f'VM {vmid}: QMP command error')
+                            issues.append(f'VM {vmid}: Communication issue')
                            vm_details[key] = {
                                'status': 'WARNING',
-                                'reason': 'QMP command failed',
+                                'reason': 'QMP command timeout',
                                'id': vmid,
                                'type': 'VM'
                            }
                        continue
                    
-                    # Pattern 2: "CT 103 error" or "Container 103"
                    ct_match = re.search(r'(?:ct|container)\s+(\d+)', line_lower)
                    if ct_match and ('error' in line_lower or 'fail' in line_lower):
                        ctid = ct_match.group(1)
@@ -915,9 +917,7 @@ class HealthMonitor:
                            }
                        continue
                    
-                    # Pattern 3: Generic VM/CT start failures
-                    if 'failed to start' in line_lower or 'error starting' in line_lower or \
-                       'start error' in line_lower or 'cannot start' in line_lower:
+                    if any(keyword in line_lower for keyword in ['failed to start', 'cannot start', 'activation failed', 'start error']):
                        # Extract VM/CT ID
                        id_match = re.search(r'\b(\d{3,4})\b', line)
                        if id_match:
@@ -1185,6 +1185,50 @@ class HealthMonitor:
            
        except Exception:
            return None
+    
+    def _check_disk_health_from_events(self) -> Dict[str, Any]:
+        """
+        Check for disk health warnings from Proxmox task log and system logs.
+        Returns dict of disk issues found.
+        """
+        disk_issues = {}
+        
+        try:
+            result = subprocess.run(
+                ['journalctl', '--since', '1 hour ago', '--no-pager', '-p', 'warning'],
+                capture_output=True,
+                text=True,
+                timeout=3
+            )
+            
+            if result.returncode == 0:
+                for line in result.stdout.split('\n'):
+                    line_lower = line.lower()
+                    
+                    # Check for SMART warnings
+                    if 'smart' in line_lower and ('warning' in line_lower or 'error' in line_lower or 'fail' in line_lower):
+                        # Extract disk name
+                        disk_match = re.search(r'/dev/(sd[a-z]|nvme\d+n\d+)', line)
+                        if disk_match:
+                            disk_name = disk_match.group(1)
+                            disk_issues[f'/dev/{disk_name}'] = {
+                                'status': 'WARNING',
+                                'reason': 'SMART warning detected'
+                            }
+                    
+                    # Check for disk errors
+                    if any(keyword in line_lower for keyword in ['disk error', 'ata error', 'medium error']):
+                        disk_match = re.search(r'/dev/(sd[a-z]|nvme\d+n\d+)', line)
+                        if disk_match:
+                            disk_name = disk_match.group(1)
+                            disk_issues[f'/dev/{disk_name}'] = {
+                                'status': 'CRITICAL',
+                                'reason': 'Disk error detected'
+                            }
+        except Exception:
+            pass
+        
+        return disk_issues


 # Global instance