diff --git a/AppImage/components/health-status-modal.tsx b/AppImage/components/health-status-modal.tsx
index caf839c..efaea4e 100644
--- a/AppImage/components/health-status-modal.tsx
+++ b/AppImage/components/health-status-modal.tsx
@@ -114,11 +114,11 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
const statusUpper = status?.toUpperCase()
switch (statusUpper) {
case "OK":
- return Healthy
+ return OK
case "WARNING":
- return Warning
+ return Warning
case "CRITICAL":
- return Critical
+ return Critical
default:
return Unknown
}
@@ -159,7 +159,7 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
{healthData && getStatusBadge(healthData.overall)}
- Comprehensive health checks for all system components
+ Detailed health checks for all system components
{loading && (
@@ -197,9 +197,9 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
- {healthData.summary && (
-
- {healthData.summary}
+ {healthData.summary && healthData.summary !== "All systems operational" && (
+
+ {healthData.summary}
)}
@@ -234,11 +234,11 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
variant="outline"
className={`shrink-0 text-xs ${
status === "OK"
- ? "border-green-500 text-green-500"
+ ? "border-green-500 text-green-500 bg-green-500/5"
: status === "WARNING"
- ? "border-yellow-500 text-yellow-500"
+ ? "border-yellow-500 text-yellow-500 bg-yellow-500/5"
: status === "CRITICAL"
- ? "border-red-500 text-red-500"
+ ? "border-red-500 text-red-500 bg-red-500/5"
: ""
}`}
>
diff --git a/AppImage/components/proxmox-dashboard.tsx b/AppImage/components/proxmox-dashboard.tsx
index 597bc00..8bc2164 100644
--- a/AppImage/components/proxmox-dashboard.tsx
+++ b/AppImage/components/proxmox-dashboard.tsx
@@ -98,7 +98,7 @@ export function ProxmoxDashboard() {
const uptimeValue =
data.uptime && typeof data.uptime === "string" && data.uptime.trim() !== "" ? data.uptime : "N/A"
- const healthStatus = data.health?.status?.toLowerCase() || "healthy"
+ const healthStatus = data.health?.status || "healthy"
setSystemStatus({
status: healthStatus as "healthy" | "warning" | "critical",
diff --git a/AppImage/lib/polling-config.tsx b/AppImage/lib/polling-config.tsx
deleted file mode 100644
index b0becb3..0000000
--- a/AppImage/lib/polling-config.tsx
+++ /dev/null
@@ -1,85 +0,0 @@
-"use client"
-
-import { createContext, useContext, useState, useEffect, type ReactNode } from "react"
-
-export interface PollingIntervals {
- storage: number
- network: number
- vms: number
- hardware: number
-}
-
-// Default intervals in milliseconds
-const DEFAULT_INTERVALS: PollingIntervals = {
- storage: 60000, // 60 seconds
- network: 60000, // 60 seconds
- vms: 30000, // 30 seconds
- hardware: 60000, // 60 seconds
-}
-
-const STORAGE_KEY = "proxmenux_polling_intervals"
-
-interface PollingConfigContextType {
- intervals: PollingIntervals
- updateInterval: (key: keyof PollingIntervals, value: number) => void
-}
-
-const PollingConfigContext = createContext
(undefined)
-
-export function PollingConfigProvider({ children }: { children: ReactNode }) {
- const [intervals, setIntervals] = useState(DEFAULT_INTERVALS)
-
- // Load from localStorage on mount
- useEffect(() => {
- if (typeof window === "undefined") return
-
- const stored = localStorage.getItem(STORAGE_KEY)
- if (stored) {
- try {
- const parsed = JSON.parse(stored)
- setIntervals({ ...DEFAULT_INTERVALS, ...parsed })
- } catch (e) {
- console.error("[v0] Failed to parse stored polling intervals:", e)
- }
- }
- }, [])
-
- const updateInterval = (key: keyof PollingIntervals, value: number) => {
- setIntervals((prev) => {
- const newIntervals = { ...prev, [key]: value }
- if (typeof window !== "undefined") {
- localStorage.setItem(STORAGE_KEY, JSON.stringify(newIntervals))
- }
- return newIntervals
- })
- }
-
- return {children}
-}
-
-export function usePollingConfig() {
- const context = useContext(PollingConfigContext)
- if (!context) {
- // During SSR or when provider is not available, return defaults
- if (typeof window === "undefined") {
- return {
- intervals: DEFAULT_INTERVALS,
- updateInterval: () => {},
- }
- }
- throw new Error("usePollingConfig must be used within PollingConfigProvider")
- }
- return context
-}
-
-// Interval options for the UI (in milliseconds)
-export const INTERVAL_OPTIONS = [
- { label: "10 seconds", value: 10000 },
- { label: "30 seconds", value: 30000 },
- { label: "1 minute", value: 60000 },
- { label: "2 minutes", value: 120000 },
- { label: "5 minutes", value: 300000 },
- { label: "10 minutes", value: 600000 },
- { label: "30 minutes", value: 1800000 },
- { label: "1 hour", value: 3600000 },
-]
diff --git a/AppImage/scripts/flask_health_routes.py b/AppImage/scripts/flask_health_routes.py
index 66f6a01..fb32f51 100644
--- a/AppImage/scripts/flask_health_routes.py
+++ b/AppImage/scripts/flask_health_routes.py
@@ -33,8 +33,8 @@ def get_system_info():
"""
try:
info = health_monitor.get_system_info()
+
if 'health' in info:
- # Convert 'OK' to 'healthy', 'WARNING' to 'warning', 'CRITICAL' to 'critical'
status_map = {
'OK': 'healthy',
'WARNING': 'warning',
diff --git a/AppImage/scripts/health_monitor.py b/AppImage/scripts/health_monitor.py
index 2af0824..e06ef36 100644
--- a/AppImage/scripts/health_monitor.py
+++ b/AppImage/scripts/health_monitor.py
@@ -432,7 +432,10 @@ class HealthMonitor:
return None
def _check_memory_comprehensive(self) -> Dict[str, Any]:
- """Check memory including RAM and swap with sustained thresholds"""
+ """
+ Check memory including RAM and swap with realistic thresholds.
+ Only alerts on truly problematic memory situations.
+ """
try:
memory = psutil.virtual_memory()
swap = psutil.swap_memory()
@@ -457,7 +460,7 @@ class HealthMonitor:
mem_critical = sum(
1 for entry in self.state_history[state_key]
- if entry['mem_percent'] >= self.MEMORY_CRITICAL and
+ if entry['mem_percent'] >= 90 and
current_time - entry['time'] <= self.MEMORY_DURATION
)
@@ -469,28 +472,20 @@ class HealthMonitor:
swap_critical = sum(
1 for entry in self.state_history[state_key]
- if entry['swap_vs_ram'] > self.SWAP_CRITICAL_PERCENT and
+ if entry['swap_vs_ram'] > 20 and
current_time - entry['time'] <= self.SWAP_CRITICAL_DURATION
)
- swap_warning = sum(
- 1 for entry in self.state_history[state_key]
- if entry['swap_percent'] > 0 and
- current_time - entry['time'] <= self.SWAP_WARNING_DURATION
- )
if mem_critical >= 2:
status = 'CRITICAL'
- reason = f'RAM >{self.MEMORY_CRITICAL}% for {self.MEMORY_DURATION}s'
+ reason = f'RAM >90% for {self.MEMORY_DURATION}s'
elif swap_critical >= 2:
status = 'CRITICAL'
- reason = f'Swap >{self.SWAP_CRITICAL_PERCENT}% of RAM for {self.SWAP_CRITICAL_DURATION}s'
+ reason = f'Swap >20% of RAM ({swap_vs_ram:.1f}%)'
elif mem_warning >= 2:
status = 'WARNING'
reason = f'RAM >{self.MEMORY_WARNING}% for {self.MEMORY_DURATION}s'
- elif swap_warning >= 2:
- status = 'WARNING'
- reason = f'Swap active for >{self.SWAP_WARNING_DURATION}s'
else:
status = 'OK'
reason = None
@@ -513,63 +508,73 @@ class HealthMonitor:
def _check_storage_optimized(self) -> Dict[str, Any]:
"""
- Optimized storage check - always returns status.
- Checks critical mounts, LVM, and Proxmox storages.
+ Optimized storage check - monitors Proxmox storages from pvesm status.
+ Checks for inactive storages and disk health from SMART/events.
"""
issues = []
storage_details = {}
- # Check critical filesystems
- critical_mounts = ['/', '/var/lib/vz']
+ try:
+ result = subprocess.run(
+ ['pvesm', 'status'],
+ capture_output=True,
+ text=True,
+ timeout=5
+ )
+
+ if result.returncode == 0:
+ lines = result.stdout.strip().split('\n')[1:] # Skip header
+ for line in lines:
+ parts = line.split()
+ if len(parts) >= 4:
+ storage_name = parts[0]
+ storage_type = parts[1]
+ enabled = parts[2]
+ active = parts[3]
+
+ if enabled == '1' and active == '0':
+ issues.append(f'{storage_name}: Inactive')
+ storage_details[storage_name] = {
+ 'status': 'CRITICAL',
+ 'reason': 'Storage inactive',
+ 'type': storage_type
+ }
+ except Exception as e:
+ # If pvesm not available, skip silently
+ pass
+
+ # Check disk health from Proxmox task log or system logs
+ disk_health_issues = self._check_disk_health_from_events()
+ if disk_health_issues:
+ for disk, issue in disk_health_issues.items():
+ issues.append(f'{disk}: {issue["reason"]}')
+ storage_details[disk] = issue
+
+ critical_mounts = ['/']
for mount_point in critical_mounts:
- is_mounted = False
try:
result = subprocess.run(
['mountpoint', '-q', mount_point],
capture_output=True,
timeout=2
)
- is_mounted = (result.returncode == 0)
- except:
- pass
-
- if not is_mounted:
- # Only report as error if it's supposed to exist
- if mount_point == '/':
+
+ if result.returncode != 0:
issues.append(f'{mount_point}: Not mounted')
storage_details[mount_point] = {
'status': 'CRITICAL',
'reason': 'Not mounted'
}
- # For /var/lib/vz, it might not be a separate mount, check if dir exists
- elif mount_point == '/var/lib/vz':
- if os.path.exists(mount_point):
- # It exists as directory, check usage
- fs_status = self._check_filesystem(mount_point)
- if fs_status['status'] != 'OK':
- issues.append(f"{mount_point}: {fs_status['reason']}")
- storage_details[mount_point] = fs_status
- # If doesn't exist, skip silently (might use different storage)
- continue
-
- fs_status = self._check_filesystem(mount_point)
- if fs_status['status'] != 'OK':
- issues.append(f"{mount_point}: {fs_status['reason']}")
- storage_details[mount_point] = fs_status
-
- # Check LVM
- lvm_status = self._check_lvm()
- if lvm_status and lvm_status.get('status') != 'OK':
- issues.append(lvm_status.get('reason', 'LVM issue'))
- storage_details['lvm'] = lvm_status
-
- # Check Proxmox storages (PBS, NFS, etc)
- pve_storages = self._check_proxmox_storages()
- for storage_name, storage_data in pve_storages.items():
- if storage_data.get('status') != 'OK':
- issues.append(f"{storage_name}: {storage_data.get('reason', 'Storage issue')}")
- storage_details[storage_name] = storage_data
+ continue
+
+ # Check filesystem usage
+ fs_status = self._check_filesystem(mount_point)
+ if fs_status['status'] != 'OK':
+ issues.append(f"{mount_point}: {fs_status['reason']}")
+ storage_details[mount_point] = fs_status
+ except Exception:
+ pass
if not issues:
return {'status': 'OK'}
@@ -873,7 +878,6 @@ class HealthMonitor:
issues = []
vm_details = {}
- # Check logs for VM/CT errors
result = subprocess.run(
['journalctl', '--since', '10 minutes ago', '--no-pager', '-u', 'pve*', '-p', 'warning'],
capture_output=True,
@@ -885,22 +889,20 @@ class HealthMonitor:
for line in result.stdout.split('\n'):
line_lower = line.lower()
- # Pattern 1: "VM 106 qmp command failed"
- vm_qmp_match = re.search(r'vm\s+(\d+)\s+qmp\s+command', line_lower)
+ vm_qmp_match = re.search(r'vm\s+(\d+)\s+qmp\s+command.*(?:failed|unable|timeout)', line_lower)
if vm_qmp_match:
vmid = vm_qmp_match.group(1)
key = f'vm_{vmid}'
if key not in vm_details:
- issues.append(f'VM {vmid}: QMP command error')
+ issues.append(f'VM {vmid}: Communication issue')
vm_details[key] = {
'status': 'WARNING',
- 'reason': 'QMP command failed',
+ 'reason': 'QMP command timeout',
'id': vmid,
'type': 'VM'
}
continue
- # Pattern 2: "CT 103 error" or "Container 103"
ct_match = re.search(r'(?:ct|container)\s+(\d+)', line_lower)
if ct_match and ('error' in line_lower or 'fail' in line_lower):
ctid = ct_match.group(1)
@@ -915,9 +917,7 @@ class HealthMonitor:
}
continue
- # Pattern 3: Generic VM/CT start failures
- if 'failed to start' in line_lower or 'error starting' in line_lower or \
- 'start error' in line_lower or 'cannot start' in line_lower:
+ if any(keyword in line_lower for keyword in ['failed to start', 'cannot start', 'activation failed', 'start error']):
# Extract VM/CT ID
id_match = re.search(r'\b(\d{3,4})\b', line)
if id_match:
@@ -1185,6 +1185,50 @@ class HealthMonitor:
except Exception:
return None
+
+ def _check_disk_health_from_events(self) -> Dict[str, Any]:
+ """
+ Check for disk health warnings from Proxmox task log and system logs.
+ Returns dict of disk issues found.
+ """
+ disk_issues = {}
+
+ try:
+ result = subprocess.run(
+ ['journalctl', '--since', '1 hour ago', '--no-pager', '-p', 'warning'],
+ capture_output=True,
+ text=True,
+ timeout=3
+ )
+
+ if result.returncode == 0:
+ for line in result.stdout.split('\n'):
+ line_lower = line.lower()
+
+ # Check for SMART warnings
+ if 'smart' in line_lower and ('warning' in line_lower or 'error' in line_lower or 'fail' in line_lower):
+ # Extract disk name
+ disk_match = re.search(r'/dev/(sd[a-z]|nvme\d+n\d+)', line)
+ if disk_match:
+ disk_name = disk_match.group(1)
+ disk_issues[f'/dev/{disk_name}'] = {
+ 'status': 'WARNING',
+ 'reason': 'SMART warning detected'
+ }
+
+ # Check for disk errors
+ if any(keyword in line_lower for keyword in ['disk error', 'ata error', 'medium error']):
+ disk_match = re.search(r'/dev/(sd[a-z]|nvme\d+n\d+)', line)
+ if disk_match:
+ disk_name = disk_match.group(1)
+ disk_issues[f'/dev/{disk_name}'] = {
+ 'status': 'CRITICAL',
+ 'reason': 'Disk error detected'
+ }
+ except Exception:
+ pass
+
+ return disk_issues
# Global instance