Update health monitor

This commit is contained in:
MacRimi
2026-02-17 11:35:11 +01:00
parent 8004ee48c9
commit 31c5eeb6c3
5 changed files with 659 additions and 164 deletions

View File

@@ -38,15 +38,17 @@ interface CategoryCheck {
[key: string]: any
}
interface DismissedError {
interface DismissedError {
error_key: string
category: string
severity: string
reason: string
dismissed: boolean
permanent?: boolean
suppression_remaining_hours: number
suppression_hours?: number
resolved_at: string
}
}
interface HealthDetails {
overall: string
@@ -361,31 +363,33 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
return (
<div className="mt-2 space-y-0.5">
{Object.entries(checks).map(([checkKey, checkData]) => {
{Object.entries(checks)
.filter(([, checkData]) => checkData.installed !== false)
.map(([checkKey, checkData]) => {
const isDismissable = checkData.dismissable === true
const checkStatus = checkData.status?.toUpperCase() || "OK"
return (
<div
key={checkKey}
className="flex items-center justify-between gap-2 text-xs py-1.5 px-3 rounded-md hover:bg-muted/40 transition-colors"
className="flex items-center justify-between gap-1.5 sm:gap-2 text-[10px] sm:text-xs py-1.5 px-2 sm:px-3 rounded-md hover:bg-muted/40 transition-colors"
>
<div className="flex items-center gap-2 min-w-0 flex-1 overflow-hidden">
<div className="flex items-center gap-1.5 sm:gap-2 min-w-0 flex-1 overflow-hidden">
{getStatusIcon(checkData.status, "sm")}
<span className="font-medium shrink-0">{formatCheckLabel(checkKey)}</span>
<span className="text-muted-foreground truncate block">{checkData.detail}</span>
{checkData.dismissed && (
<Badge variant="outline" className="text-[9px] px-1.5 py-0 h-4 shrink-0 text-blue-400 border-blue-400/30">
<Badge variant="outline" className="text-[9px] px-1 py-0 h-4 shrink-0 text-blue-400 border-blue-400/30">
Dismissed
</Badge>
)}
</div>
<div className="flex items-center gap-1.5 shrink-0">
<div className="flex items-center gap-1 sm:gap-1.5 shrink-0">
{(checkStatus === "WARNING" || checkStatus === "CRITICAL") && isDismissable && !checkData.dismissed && (
<Button
size="sm"
variant="outline"
className="h-5 px-1.5 shrink-0 hover:bg-red-500/10 hover:border-red-500/50 bg-transparent text-[10px]"
className="h-5 px-1 sm:px-1.5 shrink-0 hover:bg-red-500/10 hover:border-red-500/50 bg-transparent text-[10px]"
disabled={dismissingKey === checkKey}
onClick={(e) => {
e.stopPropagation()
@@ -396,8 +400,8 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
<Loader2 className="h-3 w-3 animate-spin" />
) : (
<>
<X className="h-3 w-3 mr-0.5" />
Dismiss
<X className="h-3 w-3 sm:mr-0.5" />
<span className="hidden sm:inline">Dismiss</span>
</>
)}
</Button>
@@ -414,21 +418,21 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
return (
<Dialog open={open} onOpenChange={onOpenChange}>
<DialogContent className="max-w-3xl w-[95vw] max-h-[85vh] overflow-y-auto overflow-x-hidden">
<DialogContent className="max-w-3xl w-[calc(100vw-2rem)] sm:w-[95vw] max-h-[85vh] overflow-y-auto overflow-x-hidden p-4 sm:p-6">
<DialogHeader>
<div className="flex items-center justify-between gap-3">
<DialogTitle className="flex items-center gap-2 flex-1">
<Activity className="h-6 w-6" />
System Health Status
{healthData && <div className="ml-2">{getStatusBadge(healthData.overall)}</div>}
<DialogTitle className="flex items-center gap-2 flex-1 min-w-0">
<Activity className="h-5 w-5 sm:h-6 sm:w-6 shrink-0" />
<span className="truncate text-base sm:text-lg">System Health Status</span>
{healthData && <div className="shrink-0">{getStatusBadge(healthData.overall)}</div>}
</DialogTitle>
</div>
<DialogDescription className="flex items-center gap-2">
Detailed health checks for all system components
<DialogDescription className="flex flex-wrap items-center gap-x-2 gap-y-0.5 text-xs sm:text-sm">
<span>Detailed health checks for all system components</span>
{getTimeSinceCheck() && (
<span className="inline-flex items-center gap-1 text-xs text-muted-foreground">
<Clock className="h-3 w-3" />
Last check: {getTimeSinceCheck()}
{getTimeSinceCheck()}
</span>
)}
</DialogDescription>
@@ -450,28 +454,28 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
{healthData && !loading && (
<div className="space-y-4">
{/* Overall Stats Summary */}
<div className={`grid gap-3 p-4 rounded-lg bg-muted/30 border ${stats.info > 0 ? "grid-cols-5" : "grid-cols-4"}`}>
<div className={`grid gap-2 sm:gap-3 p-3 sm:p-4 rounded-lg bg-muted/30 border ${stats.info > 0 ? "grid-cols-5" : "grid-cols-4"}`}>
<div className="text-center">
<div className="text-2xl font-bold">{stats.total}</div>
<div className="text-xs text-muted-foreground">Total</div>
<div className="text-lg sm:text-2xl font-bold">{stats.total}</div>
<div className="text-[10px] sm:text-xs text-muted-foreground">Total</div>
</div>
<div className="text-center">
<div className="text-2xl font-bold text-green-500">{stats.healthy}</div>
<div className="text-xs text-muted-foreground">Healthy</div>
<div className="text-lg sm:text-2xl font-bold text-green-500">{stats.healthy}</div>
<div className="text-[10px] sm:text-xs text-muted-foreground">Healthy</div>
</div>
{stats.info > 0 && (
<div className="text-center">
<div className="text-2xl font-bold text-blue-500">{stats.info}</div>
<div className="text-xs text-muted-foreground">Info</div>
<div className="text-lg sm:text-2xl font-bold text-blue-500">{stats.info}</div>
<div className="text-[10px] sm:text-xs text-muted-foreground">Info</div>
</div>
)}
<div className="text-center">
<div className="text-2xl font-bold text-yellow-500">{stats.warnings}</div>
<div className="text-xs text-muted-foreground">Warnings</div>
<div className="text-lg sm:text-2xl font-bold text-yellow-500">{stats.warnings}</div>
<div className="text-[10px] sm:text-xs text-muted-foreground">Warn</div>
</div>
<div className="text-center">
<div className="text-2xl font-bold text-red-500">{stats.critical}</div>
<div className="text-xs text-muted-foreground">Critical</div>
<div className="text-lg sm:text-2xl font-bold text-red-500">{stats.critical}</div>
<div className="text-[10px] sm:text-xs text-muted-foreground">Critical</div>
</div>
</div>
@@ -498,32 +502,32 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
>
{/* Clickable header row */}
<div
className="flex items-center gap-3 p-3 cursor-pointer select-none overflow-hidden"
className="flex items-center gap-2 sm:gap-3 p-2 sm:p-3 cursor-pointer select-none overflow-hidden"
onClick={() => toggleCategory(key)}
>
<div className="shrink-0 flex items-center gap-2">
<Icon className="h-4 w-4 text-blue-500" />
<div className="shrink-0 flex items-center gap-1.5 sm:gap-2">
<Icon className="h-4 w-4 text-blue-500 hidden sm:block" />
{getStatusIcon(status)}
</div>
<div className="flex-1 min-w-0 overflow-hidden">
<div className="flex items-center gap-2">
<p className="font-medium text-sm truncate">{label}</p>
<div className="flex items-center gap-1.5 sm:gap-2">
<p className="font-medium text-xs sm:text-sm truncate">{label}</p>
{hasChecks && (
<span className="text-[10px] text-muted-foreground shrink-0">
({Object.keys(checks).length} checks)
({Object.values(checks).filter(c => c.installed !== false).length})
</span>
)}
</div>
{reason && !isExpanded && (
<p className="text-xs text-muted-foreground mt-0.5 truncate" title={reason}>{reason}</p>
<p className="text-[10px] sm:text-xs text-muted-foreground mt-0.5 truncate" title={reason}>{reason}</p>
)}
</div>
<div className="flex items-center gap-2 shrink-0">
<Badge variant="outline" className={`text-xs ${getOutlineBadgeStyle(status)}`}>
<div className="flex items-center gap-1 sm:gap-2 shrink-0">
<Badge variant="outline" className={`text-[10px] sm:text-xs px-1.5 sm:px-2.5 ${getOutlineBadgeStyle(status)}`}>
{status}
</Badge>
<ChevronRight
className={`h-4 w-4 text-muted-foreground transition-transform duration-200 ${
className={`h-3.5 w-3.5 sm:h-4 sm:w-4 text-muted-foreground transition-transform duration-200 ${
isExpanded ? "rotate-90" : ""
}`}
/>
@@ -532,7 +536,7 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
{/* Expandable checks section */}
{isExpanded && (
<div className="border-t border-border/50 bg-muted/5 px-2 py-1.5 overflow-hidden">
<div className="border-t border-border/50 bg-muted/5 px-1.5 sm:px-2 py-1.5 overflow-hidden">
{reason && (
<p className="text-xs text-muted-foreground px-3 py-1.5 mb-1 break-words">{reason}</p>
)}
@@ -554,41 +558,62 @@ export function HealthStatusModal({ open, onOpenChange, getApiUrl }: HealthStatu
{/* Dismissed Items Section */}
{dismissedItems.length > 0 && (
<div className="space-y-2">
<div className="flex items-center gap-2 text-sm font-medium text-muted-foreground pt-2">
<BellOff className="h-4 w-4" />
<div className="flex items-center gap-2 text-xs sm:text-sm font-medium text-muted-foreground pt-2">
<BellOff className="h-3.5 w-3.5 sm:h-4 sm:w-4" />
Dismissed Items ({dismissedItems.length})
</div>
{dismissedItems.map((item) => (
<div
key={item.error_key}
className="flex items-start gap-3 p-3 rounded-lg border bg-muted/10 border-muted opacity-75"
>
<div className="mt-0.5 flex-shrink-0 flex items-center gap-2">
<BellOff className="h-4 w-4 text-muted-foreground" />
{getStatusIcon("INFO")}
</div>
<div className="flex-1 min-w-0">
<div className="flex items-center justify-between gap-2 mb-1">
<p className="font-medium text-sm text-muted-foreground truncate">{item.reason}</p>
<div className="flex items-center gap-1.5 shrink-0">
<Badge variant="outline" className="text-xs border-blue-500/50 text-blue-500/70 bg-transparent">
Dismissed
</Badge>
<Badge variant="outline" className={`text-xs ${getOutlineBadgeStyle(item.severity)}`}>
was {item.severity}
</Badge>
</div>
{dismissedItems.map((item) => {
const catMeta = CATEGORIES.find(c => c.category === item.category || c.key === item.category)
const CatIcon = catMeta?.Icon || BellOff
const catLabel = catMeta?.label || item.category
const isPermanent = item.permanent || item.suppression_remaining_hours === -1
return (
<div
key={item.error_key}
className="flex items-start gap-2 sm:gap-3 p-2 sm:p-3 rounded-lg border bg-muted/10 border-muted opacity-75"
>
<div className="mt-0.5 shrink-0 flex items-center gap-1.5 sm:gap-2">
<CatIcon className="h-3.5 w-3.5 sm:h-4 sm:w-4 text-muted-foreground" />
</div>
<div className="flex-1 min-w-0 overflow-hidden">
<div className="flex flex-col sm:flex-row sm:items-center sm:justify-between gap-1 sm:gap-2 mb-1">
<div className="min-w-0">
<p className="font-medium text-xs sm:text-sm text-muted-foreground truncate">{catLabel}</p>
<p className="text-[10px] sm:text-xs text-muted-foreground/70 truncate">{item.reason}</p>
</div>
<div className="flex items-center gap-1.5 shrink-0">
{isPermanent ? (
<Badge variant="outline" className="text-[9px] sm:text-xs border-amber-500/50 text-amber-500/70 bg-transparent">
Permanent
</Badge>
) : (
<Badge variant="outline" className="text-[9px] sm:text-xs border-blue-500/50 text-blue-500/70 bg-transparent">
Dismissed
</Badge>
)}
<Badge variant="outline" className={`text-[9px] sm:text-xs ${getOutlineBadgeStyle(item.severity)}`}>
was {item.severity}
</Badge>
</div>
</div>
<p className="text-[10px] sm:text-xs text-muted-foreground flex items-center gap-1">
<Clock className="h-3 w-3" />
{isPermanent
? "Permanently suppressed"
: `Suppressed for ${
item.suppression_remaining_hours < 24
? `${Math.round(item.suppression_remaining_hours)}h`
: item.suppression_remaining_hours < 720
? `${Math.round(item.suppression_remaining_hours / 24)} days`
: `${Math.round(item.suppression_remaining_hours / 720)} month(s)`
} more`
}
</p>
</div>
<p className="text-xs text-muted-foreground flex items-center gap-1">
<Clock className="h-3 w-3" />
Suppressed for {item.suppression_remaining_hours < 24
? `${Math.round(item.suppression_remaining_hours)}h`
: `${Math.round(item.suppression_remaining_hours / 24)} days`
} more
</p>
</div>
</div>
))}
)
})}
</div>
)}

View File

@@ -2,11 +2,44 @@
import { useState, useEffect } from "react"
import { Card, CardContent, CardDescription, CardHeader, CardTitle } from "./ui/card"
import { Wrench, Package, Ruler } from "lucide-react"
import { Wrench, Package, Ruler, HeartPulse, Cpu, MemoryStick, HardDrive, CircleDot, Network, Server, Settings2, FileText, RefreshCw, Shield, AlertTriangle, Info, Loader2, Check } from "lucide-react"
import { Select, SelectContent, SelectItem, SelectTrigger, SelectValue } from "./ui/select"
import { Input } from "./ui/input"
import { Badge } from "./ui/badge"
import { getNetworkUnit } from "../lib/format-network"
import { fetchApi } from "../lib/api-config"
interface SuppressionCategory {
key: string
label: string
category: string
icon: string
hours: number
}
const SUPPRESSION_OPTIONS = [
{ value: "24", label: "24 hours" },
{ value: "72", label: "3 days" },
{ value: "168", label: "1 week" },
{ value: "720", label: "1 month" },
{ value: "8760", label: "1 year" },
{ value: "custom", label: "Custom" },
{ value: "-1", label: "Permanent" },
]
const CATEGORY_ICONS: Record<string, React.ElementType> = {
cpu: Cpu,
memory: MemoryStick,
storage: HardDrive,
disk: CircleDot,
network: Network,
vms: Server,
services: Settings2,
logs: FileText,
updates: RefreshCw,
security: Shield,
}
interface ProxMenuxTool {
key: string
name: string
@@ -18,10 +51,18 @@ export function Settings() {
const [loadingTools, setLoadingTools] = useState(true)
const [networkUnitSettings, setNetworkUnitSettings] = useState<"Bytes" | "Bits">("Bytes")
const [loadingUnitSettings, setLoadingUnitSettings] = useState(true)
// Health Monitor suppression settings
const [suppressionCategories, setSuppressionCategories] = useState<SuppressionCategory[]>([])
const [loadingHealth, setLoadingHealth] = useState(true)
const [savingHealth, setSavingHealth] = useState<string | null>(null)
const [savedHealth, setSavedHealth] = useState<string | null>(null)
const [customValues, setCustomValues] = useState<Record<string, string>>({})
useEffect(() => {
loadProxmenuxTools()
getUnitsSettings()
loadHealthSettings()
}, [])
const loadProxmenuxTools = async () => {
@@ -57,6 +98,78 @@ export function Settings() {
setLoadingUnitSettings(false)
}
const loadHealthSettings = async () => {
try {
const data = await fetchApi("/api/health/settings")
if (data.categories) {
setSuppressionCategories(data.categories)
}
} catch (err) {
console.error("Failed to load health settings:", err)
} finally {
setLoadingHealth(false)
}
}
const getSelectValue = (hours: number, key: string): string => {
if (hours === -1) return "-1"
const preset = SUPPRESSION_OPTIONS.find(o => o.value === String(hours))
if (preset && preset.value !== "custom") return String(hours)
return "custom"
}
const handleSuppressionChange = async (settingKey: string, value: string) => {
if (value === "custom") {
// Show custom input -- don't save yet
const current = suppressionCategories.find(c => c.key === settingKey)
setCustomValues(prev => ({ ...prev, [settingKey]: String(current?.hours || 48) }))
// Temporarily mark as custom in state
setSuppressionCategories(prev =>
prev.map(c => c.key === settingKey ? { ...c, hours: -2 } : c)
)
return
}
const hours = parseInt(value, 10)
if (isNaN(hours)) return
await saveSuppression(settingKey, hours)
}
const handleCustomSave = async (settingKey: string) => {
const raw = customValues[settingKey]
const hours = parseInt(raw, 10)
if (isNaN(hours) || hours < 1) return
await saveSuppression(settingKey, hours)
}
const saveSuppression = async (settingKey: string, hours: number) => {
setSavingHealth(settingKey)
try {
await fetchApi("/api/health/settings", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ [settingKey]: String(hours) }),
})
setSuppressionCategories(prev =>
prev.map(c => c.key === settingKey ? { ...c, hours } : c)
)
// Remove from custom values
setCustomValues(prev => {
const next = { ...prev }
delete next[settingKey]
return next
})
setSavedHealth(settingKey)
setTimeout(() => setSavedHealth(null), 2000)
} catch (err) {
console.error("Failed to save health setting:", err)
} finally {
setSavingHealth(null)
}
}
return (
<div className="space-y-6">
<div>
@@ -95,6 +208,143 @@ export function Settings() {
</CardContent>
</Card>
{/* Health Monitor Settings */}
<Card>
<CardHeader>
<div className="flex items-center gap-2">
<HeartPulse className="h-5 w-5 text-red-500" />
<CardTitle>Health Monitor</CardTitle>
</div>
<CardDescription>
Configure how long dismissed alerts stay suppressed for each category.
When you dismiss a warning, it will not reappear until the suppression period expires.
</CardDescription>
</CardHeader>
<CardContent>
{loadingHealth ? (
<div className="flex items-center justify-center py-8">
<div className="animate-spin h-8 w-8 border-4 border-red-500 border-t-transparent rounded-full" />
</div>
) : (
<div className="space-y-1">
{/* Header */}
<div className="flex items-center justify-between mb-3 pb-2 border-b border-border">
<span className="text-sm font-medium text-muted-foreground">Category</span>
<span className="text-sm font-medium text-muted-foreground">Suppression Duration</span>
</div>
{/* Per-category rows */}
{suppressionCategories.map((cat) => {
const IconComp = CATEGORY_ICONS[cat.icon] || HeartPulse
const isCustomMode = cat.hours === -2 || (cat.key in customValues)
const isPermanent = cat.hours === -1
const isLong = cat.hours >= 720 && cat.hours !== -1
const selectVal = isCustomMode ? "custom" : getSelectValue(cat.hours, cat.key)
return (
<div key={cat.key} className="space-y-0">
<div className="flex items-center justify-between gap-3 py-2.5 px-2 rounded-lg hover:bg-muted/30 transition-colors">
<div className="flex items-center gap-2.5 min-w-0">
<IconComp className="h-4 w-4 text-muted-foreground shrink-0" />
<span className="text-sm font-medium truncate">{cat.label}</span>
{savingHealth === cat.key && (
<Loader2 className="h-3.5 w-3.5 animate-spin text-muted-foreground shrink-0" />
)}
{savedHealth === cat.key && (
<Check className="h-3.5 w-3.5 text-green-500 shrink-0" />
)}
</div>
<div className="flex items-center gap-2 shrink-0">
{isCustomMode ? (
<div className="flex items-center gap-1.5">
<Input
type="number"
min={1}
className="w-20 h-8 text-xs"
value={customValues[cat.key] || ""}
onChange={(e) => setCustomValues(prev => ({ ...prev, [cat.key]: e.target.value }))}
placeholder="Hours"
/>
<span className="text-xs text-muted-foreground">h</span>
<button
className="h-8 px-2 text-xs rounded-md border border-border bg-background hover:bg-muted transition-colors"
onClick={() => handleCustomSave(cat.key)}
disabled={savingHealth === cat.key}
>
Save
</button>
<button
className="h-8 px-2 text-xs rounded-md border border-border bg-background hover:bg-muted transition-colors text-muted-foreground"
onClick={() => {
setCustomValues(prev => {
const next = { ...prev }
delete next[cat.key]
return next
})
loadHealthSettings()
}}
>
Cancel
</button>
</div>
) : (
<Select value={selectVal} onValueChange={(v) => handleSuppressionChange(cat.key, v)}>
<SelectTrigger className="w-32 h-8 text-xs">
<SelectValue />
</SelectTrigger>
<SelectContent>
{SUPPRESSION_OPTIONS.map((opt) => (
<SelectItem key={opt.value} value={opt.value}>
{opt.label}
</SelectItem>
))}
</SelectContent>
</Select>
)}
</div>
</div>
{/* Warning for Permanent */}
{isPermanent && (
<div className="flex items-start gap-2 ml-8 mr-2 mb-2 p-2.5 rounded-md bg-amber-500/10 border border-amber-500/20">
<AlertTriangle className="h-4 w-4 text-amber-500 shrink-0 mt-0.5" />
<p className="text-xs text-amber-400/90 leading-relaxed">
Dismissed alerts for <span className="font-semibold">{cat.label}</span> will never reappear.
{cat.category === "temperature" && (
<span className="block mt-1 text-amber-300 font-medium">
Note: Critical CPU temperature alerts will still trigger for hardware safety.
</span>
)}
</p>
</div>
)}
{/* Warning for long custom duration (> 1 month) */}
{isLong && !isPermanent && (
<div className="flex items-start gap-2 ml-8 mr-2 mb-2 p-2.5 rounded-md bg-amber-500/10 border border-amber-500/20">
<Info className="h-4 w-4 text-amber-500 shrink-0 mt-0.5" />
<p className="text-xs text-amber-400/90 leading-relaxed">
Long suppression period. Dismissed alerts for this category will not reappear for an extended time.
</p>
</div>
)}
</div>
)
})}
{/* Info footer */}
<div className="flex items-start gap-2 mt-4 pt-3 border-t border-border">
<Info className="h-4 w-4 text-blue-400 shrink-0 mt-0.5" />
<p className="text-xs text-muted-foreground leading-relaxed">
These settings apply when you dismiss a warning from the Health Monitor.
Critical CPU temperature alerts always trigger regardless of settings to protect your hardware.
</p>
</div>
</div>
)}
</CardContent>
</Card>
{/* ProxMenux Optimizations */}
<Card>
<CardHeader>

View File

@@ -89,14 +89,20 @@ def acknowledge_error():
health_monitor.last_check_times.pop('overall_health', None)
health_monitor.cached_results.pop('overall_health', None)
# Determine suppression period for the response
category = result.get('category', '')
if category == 'updates':
suppression_hours = 180 * 24 # 180 days in hours
suppression_label = '6 months'
# Use the per-record suppression hours from acknowledge_error()
sup_hours = result.get('suppression_hours', 24)
if sup_hours == -1:
suppression_label = 'permanently'
elif sup_hours >= 8760:
suppression_label = f'{sup_hours // 8760} year(s)'
elif sup_hours >= 720:
suppression_label = f'{sup_hours // 720} month(s)'
elif sup_hours >= 168:
suppression_label = f'{sup_hours // 168} week(s)'
elif sup_hours >= 72:
suppression_label = f'{sup_hours // 24} day(s)'
else:
suppression_hours = 24
suppression_label = '24 hours'
suppression_label = f'{sup_hours} hours'
return jsonify({
'success': True,
@@ -104,7 +110,7 @@ def acknowledge_error():
'error_key': error_key,
'original_severity': result.get('original_severity', 'WARNING'),
'category': category,
'suppression_hours': suppression_hours,
'suppression_hours': sup_hours,
'suppression_label': suppression_label,
'acknowledged_at': result.get('acknowledged_at')
})
@@ -190,3 +196,54 @@ def mark_events_notified():
return jsonify({'success': True, 'marked_count': len(event_ids)})
except Exception as e:
return jsonify({'error': str(e)}), 500
@health_bp.route('/api/health/settings', methods=['GET'])
def get_health_settings():
"""
Get per-category suppression duration settings.
Returns all health categories with their current configured hours.
"""
try:
categories = health_persistence.get_suppression_categories()
return jsonify({'categories': categories})
except Exception as e:
return jsonify({'error': str(e)}), 500
@health_bp.route('/api/health/settings', methods=['POST'])
def save_health_settings():
"""
Save per-category suppression duration settings.
Expects JSON body with key-value pairs like: {"suppress_cpu": "168", "suppress_memory": "-1"}
Valid values: 24, 72, 168, 720, 8760, -1 (permanent), or any positive integer for custom.
"""
try:
data = request.get_json()
if not data:
return jsonify({'error': 'No settings provided'}), 400
valid_keys = set(health_persistence.CATEGORY_SETTING_MAP.values())
updated = []
for key, value in data.items():
if key not in valid_keys:
continue
try:
hours = int(value)
# Validate: must be -1 (permanent) or positive
if hours != -1 and hours < 1:
continue
health_persistence.set_setting(key, str(hours))
updated.append(key)
except (ValueError, TypeError):
continue
return jsonify({
'success': True,
'updated': updated,
'count': len(updated)
})
except Exception as e:
return jsonify({'error': str(e)}), 500

View File

@@ -2258,10 +2258,9 @@ class HealthMonitor:
try:
issues = []
checks = {
'uptime': {'status': 'OK', 'detail': ''},
'certificates': {'status': 'OK', 'detail': ''},
'login_attempts': {'status': 'OK', 'detail': ''},
'fail2ban': {'status': 'OK', 'detail': 'Not installed'}
'uptime': {'status': 'OK', 'detail': ''},
'certificates': {'status': 'OK', 'detail': ''},
'login_attempts': {'status': 'OK', 'detail': ''},
}
# Sub-check 1: Uptime for potential kernel vulnerabilities
@@ -2322,21 +2321,23 @@ class HealthMonitor:
except Exception:
checks['login_attempts'] = {'status': 'OK', 'detail': 'Unable to check login attempts'}
# Sub-check 4: Fail2Ban ban detection
# Sub-check 4: Fail2Ban ban detection (only show if installed)
try:
f2b = self._check_fail2ban_bans()
f2b_status = f2b.get('status', 'OK')
checks['fail2ban'] = {
'status': f2b_status,
'dismissable': True if f2b_status not in ['OK'] else False,
'detail': f2b.get('detail', ''),
'installed': f2b.get('installed', False),
'banned_count': f2b.get('banned_count', 0)
}
if f2b.get('status') == 'WARNING':
issues.append(f2b.get('detail', 'Fail2Ban bans detected'))
if f2b.get('installed', False):
f2b_status = f2b.get('status', 'OK')
checks['fail2ban'] = {
'status': f2b_status,
'dismissable': True if f2b_status not in ['OK'] else False,
'detail': f2b.get('detail', ''),
'installed': True,
'banned_count': f2b.get('banned_count', 0)
}
if f2b.get('status') == 'WARNING':
issues.append(f2b.get('detail', 'Fail2Ban bans detected'))
# If not installed, simply don't add it to checks
except Exception:
checks['fail2ban'] = {'status': 'OK', 'detail': 'Unable to check Fail2Ban'}
pass
# Determine overall security status
if issues:

View File

@@ -28,7 +28,23 @@ class HealthPersistence:
VM_ERROR_RETENTION = 48 * 3600 # 48 hours
LOG_ERROR_RETENTION = 24 * 3600 # 24 hours
DISK_ERROR_RETENTION = 48 * 3600 # 48 hours
UPDATES_SUPPRESSION = 180 * 24 * 3600 # 180 days (6 months)
# Default suppression: 24 hours (user can change per-category in settings)
DEFAULT_SUPPRESSION_HOURS = 24
# Mapping from error categories to settings keys
CATEGORY_SETTING_MAP = {
'temperature': 'suppress_cpu',
'memory': 'suppress_memory',
'storage': 'suppress_storage',
'disks': 'suppress_disks',
'network': 'suppress_network',
'vms': 'suppress_vms',
'pve_services': 'suppress_pve_services',
'logs': 'suppress_logs',
'updates': 'suppress_updates',
'security': 'suppress_security',
}
def __init__(self):
"""Initialize persistence with database in shared ProxMenux data directory"""
@@ -80,6 +96,21 @@ class HealthPersistence:
)
''')
# User settings table (per-category suppression durations, etc.)
cursor.execute('''
CREATE TABLE IF NOT EXISTS user_settings (
setting_key TEXT PRIMARY KEY,
setting_value TEXT NOT NULL,
updated_at TEXT NOT NULL
)
''')
# Migration: add suppression_hours column to errors if not present
cursor.execute("PRAGMA table_info(errors)")
columns = [col[1] for col in cursor.fetchall()]
if 'suppression_hours' not in columns:
cursor.execute('ALTER TABLE errors ADD COLUMN suppression_hours INTEGER DEFAULT 24')
# Indexes for performance
cursor.execute('CREATE INDEX IF NOT EXISTS idx_error_key ON errors(error_key)')
cursor.execute('CREATE INDEX IF NOT EXISTS idx_category ON errors(category)')
@@ -102,33 +133,8 @@ class HealthPersistence:
details_json = json.dumps(details) if details else None
cursor.execute('''
SELECT acknowledged, resolved_at
FROM errors
WHERE error_key = ? AND acknowledged = 1
''', (error_key,))
ack_check = cursor.fetchone()
if ack_check and ack_check[1]: # Has resolved_at timestamp
try:
resolved_dt = datetime.fromisoformat(ack_check[1])
hours_since_ack = (datetime.now() - resolved_dt).total_seconds() / 3600
if category == 'updates':
# Updates: suppress for 180 days (6 months)
suppression_hours = self.UPDATES_SUPPRESSION / 3600
else:
# Other errors: suppress for 24 hours
suppression_hours = 24
if hours_since_ack < suppression_hours:
# Skip re-adding recently acknowledged errors
conn.close()
return {'type': 'skipped_acknowledged', 'needs_notification': False}
except Exception:
pass
cursor.execute('''
SELECT id, first_seen, notification_sent, acknowledged, resolved_at
SELECT id, acknowledged, resolved_at, category, severity, first_seen,
notification_sent, suppression_hours
FROM errors WHERE error_key = ?
''', (error_key,))
existing = cursor.fetchone()
@@ -136,13 +142,64 @@ class HealthPersistence:
event_info = {'type': 'updated', 'needs_notification': False}
if existing:
error_id, first_seen, notif_sent, acknowledged, resolved_at = existing
err_id, ack, resolved_at, old_cat, old_severity, first_seen, notif_sent, stored_suppression = existing
if acknowledged == 1:
conn.close()
return {'type': 'skipped_acknowledged', 'needs_notification': False}
if ack == 1:
# SAFETY OVERRIDE: Critical CPU temperature ALWAYS re-triggers
# regardless of any dismiss/permanent setting (hardware protection)
if error_key == 'cpu_temperature' and severity == 'CRITICAL':
cursor.execute('DELETE FROM errors WHERE error_key = ?', (error_key,))
cursor.execute('''
INSERT INTO errors
(error_key, category, severity, reason, details, first_seen, last_seen)
VALUES (?, ?, ?, ?, ?, ?, ?)
''', (error_key, category, severity, reason, details_json, now, now))
event_info = {'type': 'new', 'needs_notification': True}
self._record_event(cursor, 'new', error_key,
{'severity': severity, 'reason': reason,
'note': 'CRITICAL temperature override - safety alert'})
conn.commit()
conn.close()
return event_info
# Check suppression: use per-record stored hours (set at dismiss time)
sup_hours = stored_suppression if stored_suppression is not None else self.DEFAULT_SUPPRESSION_HOURS
# Permanent dismiss (sup_hours == -1): always suppress
if sup_hours == -1:
conn.close()
return {'type': 'skipped_acknowledged', 'needs_notification': False}
# Time-limited suppression
still_suppressed = False
if resolved_at:
try:
resolved_dt = datetime.fromisoformat(resolved_at)
elapsed_hours = (datetime.now() - resolved_dt).total_seconds() / 3600
still_suppressed = elapsed_hours < sup_hours
except Exception:
pass
if still_suppressed:
conn.close()
return {'type': 'skipped_acknowledged', 'needs_notification': False}
else:
# Suppression expired - reset as a NEW event
cursor.execute('DELETE FROM errors WHERE error_key = ?', (error_key,))
cursor.execute('''
INSERT INTO errors
(error_key, category, severity, reason, details, first_seen, last_seen)
VALUES (?, ?, ?, ?, ?, ?, ?)
''', (error_key, category, severity, reason, details_json, now, now))
event_info = {'type': 'new', 'needs_notification': True}
self._record_event(cursor, 'new', error_key,
{'severity': severity, 'reason': reason,
'note': 'Re-triggered after suppression expired'})
conn.commit()
conn.close()
return event_info
# Update existing error (only if NOT acknowledged)
# Not acknowledged - update existing active error
cursor.execute('''
UPDATE errors
SET last_seen = ?, severity = ?, reason = ?, details = ?
@@ -150,13 +207,9 @@ class HealthPersistence:
''', (now, severity, reason, details_json, error_key))
# Check if severity escalated
cursor.execute('SELECT severity FROM errors WHERE error_key = ?', (error_key,))
old_severity_row = cursor.fetchone()
if old_severity_row:
old_severity = old_severity_row[0]
if old_severity == 'WARNING' and severity == 'CRITICAL':
event_info['type'] = 'escalated'
event_info['needs_notification'] = True
if old_severity == 'WARNING' and severity == 'CRITICAL':
event_info['type'] = 'escalated'
event_info['needs_notification'] = True
else:
# Insert new error
cursor.execute('''
@@ -225,21 +278,40 @@ class HealthPersistence:
"""
Remove/resolve a specific error immediately.
Used when the condition that caused the error no longer exists
(e.g., storage became available again).
(e.g., storage became available again, CPU temp recovered).
For acknowledged errors: if the condition resolved on its own,
we delete the record entirely so it can re-trigger as a fresh
event if the condition returns later.
"""
conn = sqlite3.connect(str(self.db_path))
cursor = conn.cursor()
now = datetime.now().isoformat()
# Check if this error was acknowledged (dismissed)
cursor.execute('''
UPDATE errors
SET resolved_at = ?
WHERE error_key = ? AND resolved_at IS NULL
''', (now, error_key))
SELECT acknowledged FROM errors WHERE error_key = ?
''', (error_key,))
row = cursor.fetchone()
if cursor.rowcount > 0:
self._record_event(cursor, 'cleared', error_key, {'reason': 'condition_resolved'})
if row and row[0] == 1:
# Dismissed error that naturally resolved - delete entirely
# so it can re-trigger as a new event if it happens again
cursor.execute('DELETE FROM errors WHERE error_key = ?', (error_key,))
if cursor.rowcount > 0:
self._record_event(cursor, 'cleared', error_key,
{'reason': 'condition_resolved_after_dismiss'})
else:
# Normal active error - mark as resolved
cursor.execute('''
UPDATE errors
SET resolved_at = ?
WHERE error_key = ? AND resolved_at IS NULL
''', (now, error_key))
if cursor.rowcount > 0:
self._record_event(cursor, 'cleared', error_key, {'reason': 'condition_resolved'})
conn.commit()
conn.close()
@@ -247,13 +319,9 @@ class HealthPersistence:
def acknowledge_error(self, error_key: str) -> Dict[str, Any]:
"""
Manually acknowledge an error (dismiss).
- Looks up the category's configured suppression duration from user settings
- Stores suppression_hours on the error record (snapshot at dismiss time)
- Marks as acknowledged so it won't re-appear during the suppression period
- Stores the original severity for reference
- Returns info about the acknowledged error
Suppression periods:
- updates category: 180 days (6 months)
- other categories: 24 hours
"""
conn = sqlite3.connect(str(self.db_path))
conn.row_factory = sqlite3.Row
@@ -272,15 +340,27 @@ class HealthPersistence:
original_severity = error_dict.get('severity', 'WARNING')
category = error_dict.get('category', '')
# Look up the user's configured suppression for this category
setting_key = self.CATEGORY_SETTING_MAP.get(category, '')
sup_hours = self.DEFAULT_SUPPRESSION_HOURS
if setting_key:
stored = self.get_setting(setting_key)
if stored is not None:
try:
sup_hours = int(stored)
except (ValueError, TypeError):
pass
cursor.execute('''
UPDATE errors
SET acknowledged = 1, resolved_at = ?
SET acknowledged = 1, resolved_at = ?, suppression_hours = ?
WHERE error_key = ?
''', (now, error_key))
''', (now, sup_hours, error_key))
self._record_event(cursor, 'acknowledged', error_key, {
'original_severity': original_severity,
'category': category
'category': category,
'suppression_hours': sup_hours
})
result = {
@@ -288,7 +368,8 @@ class HealthPersistence:
'error_key': error_key,
'original_severity': original_severity,
'category': category,
'acknowledged_at': now
'acknowledged_at': now,
'suppression_hours': sup_hours
}
conn.commit()
@@ -432,22 +513,30 @@ class HealthPersistence:
except (json.JSONDecodeError, TypeError):
pass
# Check if still within suppression period
# Check if still within suppression period using per-record hours
try:
resolved_dt = datetime.fromisoformat(error_dict['resolved_at'])
elapsed_seconds = (now - resolved_dt).total_seconds()
sup_hours = error_dict.get('suppression_hours')
if sup_hours is None:
sup_hours = self.DEFAULT_SUPPRESSION_HOURS
if error_dict.get('category') == 'updates':
suppression = self.UPDATES_SUPPRESSION
else:
suppression = 24 * 3600 # 24 hours
error_dict['dismissed'] = True
if elapsed_seconds < suppression:
error_dict['dismissed'] = True
error_dict['suppression_remaining_hours'] = round(
(suppression - elapsed_seconds) / 3600, 1
)
if sup_hours == -1:
# Permanent dismiss
error_dict['suppression_remaining_hours'] = -1
error_dict['permanent'] = True
dismissed.append(error_dict)
else:
elapsed_seconds = (now - resolved_dt).total_seconds()
suppression_seconds = sup_hours * 3600
if elapsed_seconds < suppression_seconds:
error_dict['suppression_remaining_hours'] = round(
(suppression_seconds - elapsed_seconds) / 3600, 1
)
error_dict['permanent'] = False
dismissed.append(error_dict)
except (ValueError, TypeError):
pass
@@ -623,6 +712,79 @@ class HealthPersistence:
# from Proxmox storage types in health_monitor.get_detailed_status()
# This avoids redundant subprocess calls and ensures immediate detection
# when the user adds new ZFS/LVM storage via Proxmox.
# ─── User Settings ──────────────────────────────────────────
def get_setting(self, key: str, default: Optional[str] = None) -> Optional[str]:
"""Get a user setting value by key."""
conn = sqlite3.connect(str(self.db_path))
cursor = conn.cursor()
cursor.execute(
'SELECT setting_value FROM user_settings WHERE setting_key = ?', (key,)
)
row = cursor.fetchone()
conn.close()
return row[0] if row else default
def set_setting(self, key: str, value: str):
"""Store a user setting value."""
conn = sqlite3.connect(str(self.db_path))
cursor = conn.cursor()
cursor.execute('''
INSERT OR REPLACE INTO user_settings (setting_key, setting_value, updated_at)
VALUES (?, ?, ?)
''', (key, value, datetime.now().isoformat()))
conn.commit()
conn.close()
def get_all_settings(self, prefix: Optional[str] = None) -> Dict[str, str]:
"""Get all user settings, optionally filtered by key prefix."""
conn = sqlite3.connect(str(self.db_path))
cursor = conn.cursor()
if prefix:
cursor.execute(
'SELECT setting_key, setting_value FROM user_settings WHERE setting_key LIKE ?',
(f'{prefix}%',)
)
else:
cursor.execute('SELECT setting_key, setting_value FROM user_settings')
rows = cursor.fetchall()
conn.close()
return {row[0]: row[1] for row in rows}
def get_suppression_categories(self) -> List[Dict[str, Any]]:
"""
Get all health categories with their current suppression settings.
Used by the settings page to render the per-category configuration.
"""
category_labels = {
'suppress_cpu': {'label': 'CPU Usage & Temperature', 'category': 'temperature', 'icon': 'cpu'},
'suppress_memory': {'label': 'Memory & Swap', 'category': 'memory', 'icon': 'memory'},
'suppress_storage': {'label': 'Storage Mounts & Space', 'category': 'storage', 'icon': 'storage'},
'suppress_disks': {'label': 'Disk I/O & Errors', 'category': 'disks', 'icon': 'disk'},
'suppress_network': {'label': 'Network Interfaces', 'category': 'network', 'icon': 'network'},
'suppress_vms': {'label': 'VMs & Containers', 'category': 'vms', 'icon': 'vms'},
'suppress_pve_services': {'label': 'PVE Services', 'category': 'pve_services', 'icon': 'services'},
'suppress_logs': {'label': 'System Logs', 'category': 'logs', 'icon': 'logs'},
'suppress_updates': {'label': 'System Updates', 'category': 'updates', 'icon': 'updates'},
'suppress_security': {'label': 'Security & Certificates', 'category': 'security', 'icon': 'security'},
}
current_settings = self.get_all_settings('suppress_')
result = []
for key, meta in category_labels.items():
stored = current_settings.get(key)
hours = int(stored) if stored else self.DEFAULT_SUPPRESSION_HOURS
result.append({
'key': key,
'label': meta['label'],
'category': meta['category'],
'icon': meta['icon'],
'hours': hours,
})
return result
# Global instance